npm - vargai - Versions diffs - 0.4.0-alpha108 → 0.4.0-alpha111 - Mend

vargai 0.4.0-alpha108 → 0.4.0-alpha111

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (47) hide show

package/package.json +4 -1
package/src/ai-sdk/generate-video.ts +14 -8
package/src/ai-sdk/providers/editly/backends/types.ts +4 -0
package/src/ai-sdk/providers/editly/layers.ts +39 -19
package/src/ai-sdk/providers/editly/rendi/index.ts +214 -3
package/src/ai-sdk/providers/fal.ts +133 -2
package/src/ai-sdk/providers/model-rules.ts +18 -0
package/src/ai-sdk/providers/varg.ts +7 -4
package/src/core/registry/resolver.ts +4 -0
package/src/core/schema/types.ts +65 -0
package/src/definitions/actions/video.ts +24 -4
package/src/definitions/models/elevenlabs.ts +14 -1
package/src/definitions/models/flux.ts +17 -1
package/src/definitions/models/heygen.ts +20 -1
package/src/definitions/models/index.ts +68 -2
package/src/definitions/models/kling.ts +326 -1
package/src/definitions/models/llama.ts +13 -1
package/src/definitions/models/ltx-a2v.ts +17 -1
package/src/definitions/models/nano-banana-2.ts +23 -1
package/src/definitions/models/nano-banana-pro.ts +17 -1
package/src/definitions/models/omnihuman.ts +13 -1
package/src/definitions/models/phota.ts +29 -1
package/src/definitions/models/qwen-image-2.ts +14 -1
package/src/definitions/models/recraft-v4.ts +13 -1
package/src/definitions/models/reve.ts +13 -1
package/src/definitions/models/seedance.ts +21 -1
package/src/definitions/models/sonauto.ts +13 -1
package/src/definitions/models/soul.ts +13 -1
package/src/definitions/models/veed-fabric.ts +16 -1
package/src/definitions/models/wan.ts +23 -3
package/src/definitions/models/whisper.ts +25 -1
package/src/providers/fal.ts +97 -0
package/src/react/elements.ts +81 -0
package/src/react/index.ts +8 -0
package/src/react/renderers/burn-captions.ts +83 -19
package/src/react/renderers/captions.ts +292 -25
package/src/react/renderers/emoji.ts +256 -0
package/src/react/renderers/fonts.ts +509 -0
package/src/react/renderers/progress.ts +12 -1
package/src/react/renderers/render.ts +83 -4
package/src/react/renderers/video.ts +24 -3
package/src/react/resolve.ts +295 -4
package/src/react/resolved-element.ts +13 -6
package/src/react/types.ts +87 -4
package/src/speech/map-segments.ts +2 -1
package/src/speech/parse-alignment.ts +111 -6
package/src/speech/word-segmenter.ts +172 -0

package/package.json CHANGED Viewed

@@ -28,6 +28,7 @@
     "@commitlint/config-conventional": "^20.0.0",
     "@size-limit/preset-small-lib": "^11.2.0",
     "@types/bun": "latest",
+    "@types/opentype.js": "^1.3.9",
     "@types/react": "^19.2.7",
     "husky": "^9.1.7",
     "lint-staged": "^16.2.7"
@@ -58,9 +59,11 @@
     "ai": "^6.0.26",
     "apify-client": "^2.20.0",
     "citty": "^0.1.6",
+    "fflate": "^0.8.2",
     "fluent-ffmpeg": "^2.1.3",
     "groq-sdk": "^0.36.0",
     "ink": "^6.5.1",
+    "opentype.js": "^1.3.4",
     "p-limit": "^6.2.0",
     "p-map": "^7.0.4",
     "react": "^19.2.0",
@@ -104,7 +107,7 @@
   "license": "Apache-2.0",
   "author": "varg.ai <hello@varg.ai> (https://varg.ai)",
   "sideEffects": false,
-  "version": "0.4.0-alpha108",
+  "version": "0.4.0-alpha111",
   "exports": {
     ".": "./src/index.ts",
     "./ai": "./src/ai-sdk/index.ts",

package/src/ai-sdk/generate-video.ts CHANGED Viewed

@@ -10,9 +10,9 @@ export type GenerateVideoPrompt =
   | string
   | {
       text?: string;
-      images?: Array<DataContent>;
-      audio?: DataContent;
-      video?: DataContent;
+      images?: DataContent | Array<DataContent>;
+      audio?: DataContent | Array<DataContent>;
+      video?: DataContent | Array<DataContent>;
     };
 export interface GenerateVideoOptions {
@@ -76,6 +76,12 @@ function toUint8Array(data: DataContent): Uint8Array {
   return data;
 }
+/** Normalize singular or array to array */
+function toArray<T>(value: T | T[] | undefined): T[] {
+  if (value == null) return [];
+  return Array.isArray(value) ? value : [value];
+}
 function normalizePrompt(prompt: GenerateVideoPrompt): {
   prompt: string | undefined;
   files: ImageModelV3File[] | undefined;
@@ -86,7 +92,7 @@ function normalizePrompt(prompt: GenerateVideoPrompt): {
   const files: ImageModelV3File[] = [];
-  for (const img of prompt.images ?? []) {
+  for (const img of toArray(prompt.images)) {
     files.push({
       type: "file",
       mediaType: "image/png",
@@ -94,19 +100,19 @@ function normalizePrompt(prompt: GenerateVideoPrompt): {
     });
   }
-  if (prompt.audio) {
+  for (const aud of toArray(prompt.audio)) {
     files.push({
       type: "file",
       mediaType: "audio/mpeg",
-      data: toUint8Array(prompt.audio),
+      data: toUint8Array(aud),
     });
   }
-  if (prompt.video) {
+  for (const vid of toArray(prompt.video)) {
     files.push({
       type: "file",
       mediaType: "video/mp4",
-      data: toUint8Array(prompt.video),
+      data: toUint8Array(vid),
     });
   }

package/src/ai-sdk/providers/editly/backends/types.ts CHANGED Viewed

@@ -47,6 +47,10 @@ export interface FFmpegRunOptions {
   verbose?: boolean;
   /** Max execution time in seconds (used by cloud backends like Rendi, ignored by local) */
   timeoutSeconds?: number;
+  /** Extra files (e.g. fonts) to include alongside inputs.
+   *  When present, cloud backends like Rendi use compressed folder mode
+   *  (input_compressed_folder) to bundle all files together. */
+  auxiliaryFiles?: { url: string; fileName: string }[];
 }
 export type FFmpegOutput =

package/src/ai-sdk/providers/editly/layers.ts CHANGED Viewed

@@ -43,12 +43,20 @@ function getCropPositionExpr(position: CropPosition | undefined): {
 }
 function escapeDrawText(text: string): string {
-  return text
-    .replace(/\\/g, "\\\\")
-    .replace(/'/g, "'\\''")
-    .replace(/:/g, "\\:")
-    .replace(/\[/g, "\\[")
-    .replace(/\]/g, "\\]");
+  return (
+    text
+      .replace(/\\/g, "\\\\")
+      .replace(/'/g, "'\\''")
+      .replace(/:/g, "\\:")
+      .replace(/\[/g, "\\[")
+      .replace(/\]/g, "\\]")
+      // Replace straight double quotes with typographic curly quotes.
+      // Straight " breaks Rendi's command parser (the -filter_complex value is
+      // wrapped in double quotes, so an unescaped " inside it terminates the
+      // argument and causes ffmpeg to interpret the next word as a file path).
+      .replace(/\u201C|\u201D/g, "\u201C") // normalise any existing curly quotes
+      .replace(/"/g, "\u201C")
+  );
 }
 function parseSize(val: number | string | undefined, base: number): number {
@@ -161,11 +169,15 @@ export function getVideoFilter(
     };
   }
-  let scaleFilter = `scale=${width}:${height}:force_original_aspect_ratio=decrease`;
-  if (layer.resizeMode === "cover") {
-    scaleFilter = `scale=${width}:${height}:force_original_aspect_ratio=increase,crop=${width}:${height}`;
+  let scaleFilter: string;
+  if (layer.resizeMode === "contain") {
+    scaleFilter = `scale=${width}:${height}:force_original_aspect_ratio=decrease`;
   } else if (layer.resizeMode === "stretch") {
     scaleFilter = `scale=${width}:${height}`;
+  } else {
+    // Default ("cover" or undefined): scale up to fill canvas, crop excess
+    const { x, y } = getCropPositionExpr(layer.cropPosition);
+    scaleFilter = `scale=${width}:${height}:force_original_aspect_ratio=increase,crop=${width}:${height}:${x}:${y}`;
   }
   filters.push(scaleFilter);
@@ -219,11 +231,15 @@ export function getVideoFilterWithTrim(
     filters.push("fps=30");
     filters.push("settb=1/30");
   } else {
-    let scaleFilter = `scale=${width}:${height}:force_original_aspect_ratio=decrease`;
-    if (layer.resizeMode === "cover") {
-      scaleFilter = `scale=${width}:${height}:force_original_aspect_ratio=increase,crop=${width}:${height}`;
+    let scaleFilter: string;
+    if (layer.resizeMode === "contain") {
+      scaleFilter = `scale=${width}:${height}:force_original_aspect_ratio=decrease`;
     } else if (layer.resizeMode === "stretch") {
       scaleFilter = `scale=${width}:${height}`;
+    } else {
+      // Default ("cover" or undefined): scale up to fill canvas, crop excess
+      const { x, y } = getCropPositionExpr(layer.cropPosition);
+      scaleFilter = `scale=${width}:${height}:force_original_aspect_ratio=increase,crop=${width}:${height}:${x}:${y}`;
     }
     filters.push(scaleFilter);
@@ -386,11 +402,14 @@ export function getImageFilter(
       };
     }
-    let scaleFilter = `scale=${width}:${height}:force_original_aspect_ratio=decrease`;
-    if (layer.resizeMode === "cover") {
-      scaleFilter = `scale=${width}:${height}:force_original_aspect_ratio=increase,crop=${width}:${height}`;
+    let scaleFilter: string;
+    if (layer.resizeMode === "contain") {
+      scaleFilter = `scale=${width}:${height}:force_original_aspect_ratio=decrease`;
     } else if (layer.resizeMode === "stretch") {
       scaleFilter = `scale=${width}:${height}`;
+    } else {
+      // Default ("cover" or undefined): scale up to fill canvas, crop excess
+      scaleFilter = `scale=${width}:${height}:force_original_aspect_ratio=increase,crop=${width}:${height}`;
     }
     filters.push(scaleFilter);
     filters.push(`pad=${width}:${height}:(ow-iw)/2:(oh-ih)/2:black`);
@@ -532,13 +551,14 @@ export function getImageOverlayFilter(
   let scaleExpr: string;
   if (!hasExplicitHeight) {
     scaleExpr = `scale=${targetWidth}:-2`;
-  } else if (layer.resizeMode === "cover") {
-    const { x, y } = getCropPositionExpr(layer.cropPosition);
-    scaleExpr = `scale=${targetWidth}:${targetHeight}:force_original_aspect_ratio=increase,crop=${targetWidth}:${targetHeight}:${x}:${y}`;
+  } else if (layer.resizeMode === "contain") {
+    scaleExpr = `scale=${targetWidth}:${targetHeight}:force_original_aspect_ratio=decrease,pad=${targetWidth}:${targetHeight}:(ow-iw)/2:(oh-ih)/2:black`;
   } else if (layer.resizeMode === "stretch") {
     scaleExpr = `scale=${targetWidth}:${targetHeight}`;
   } else {
-    scaleExpr = `scale=${targetWidth}:${targetHeight}:force_original_aspect_ratio=decrease,pad=${targetWidth}:${targetHeight}:(ow-iw)/2:(oh-ih)/2:black`;
+    // Default ("cover" or undefined): scale up to fill, crop excess
+    const { x, y } = getCropPositionExpr(layer.cropPosition);
+    scaleExpr = `scale=${targetWidth}:${targetHeight}:force_original_aspect_ratio=increase,crop=${targetWidth}:${targetHeight}:${x}:${y}`;
   }
   const zoomDir = layer.zoomDirection ?? null;

package/src/ai-sdk/providers/editly/rendi/index.ts CHANGED Viewed

@@ -1,3 +1,4 @@
+import { zipSync } from "fflate";
 import sharp from "sharp";
 import { File } from "../../../file";
 import type { StorageProvider } from "../../../storage/types";
@@ -128,6 +129,11 @@ export class RendiBackend implements FFmpegBackend {
   }
   async run(options: FFmpegRunOptions): Promise<FFmpegRunResult> {
+    // When auxiliary files (e.g. fonts) are present, use compressed folder mode
+    if (options.auxiliaryFiles && options.auxiliaryFiles.length > 0) {
+      return this.runWithCompressedFolder(options);
+    }
     let {
       inputs,
       filterComplex,
@@ -287,6 +293,194 @@ export class RendiBackend implements FFmpegBackend {
     throw new Error("Rendi command timed out");
   }
+  /**
+   * Run an FFmpeg command using Rendi's input_compressed_folder mode.
+   *
+   * Used when auxiliary files (e.g. fonts for subtitle rendering) need to be
+   * bundled alongside regular inputs. Creates a ZIP containing all input files
+   * and auxiliary files, uploads it to storage, and submits to Rendi with
+   * `input_compressed_folder` instead of `input_files`.
+   *
+   * Inside the ZIP, all files are at the root level. The ffmpeg command
+   * references files by their bare filenames (not placeholders).
+   */
+  private async runWithCompressedFolder(
+    options: FFmpegRunOptions,
+  ): Promise<FFmpegRunResult> {
+    const {
+      inputs,
+      videoFilter,
+      filterComplex,
+      outputArgs = [],
+      outputPath,
+      verbose,
+      auxiliaryFiles = [],
+    } = options;
+    // 1. Resolve all input files to URLs
+    const inputEntries: { fileName: string; url: string }[] = [];
+    for (const input of inputs ?? []) {
+      const path = this.getInputPath(input);
+      const url = await this.resolvePath(path);
+      // Extract filename from URL or path
+      const fileName =
+        url.split("/").pop()?.split("?")[0] ?? `input_${inputEntries.length}`;
+      inputEntries.push({ fileName, url });
+    }
+    // 2. Download all files (inputs + auxiliary) into memory
+    const zipContents: Record<string, Uint8Array> = {};
+    const downloadTasks = [
+      ...inputEntries.map(async (entry) => {
+        const res = await fetch(entry.url);
+        if (!res.ok)
+          throw new Error(
+            `Failed to download input ${entry.fileName}: ${res.status}`,
+          );
+        zipContents[entry.fileName] = new Uint8Array(await res.arrayBuffer());
+      }),
+      ...auxiliaryFiles.map(async (file) => {
+        const res = await fetch(file.url);
+        if (!res.ok)
+          throw new Error(
+            `Failed to download auxiliary file ${file.fileName}: ${res.status}`,
+          );
+        zipContents[file.fileName] = new Uint8Array(await res.arrayBuffer());
+      }),
+    ];
+    await Promise.all(downloadTasks);
+    if (verbose) {
+      const totalSize = Object.values(zipContents).reduce(
+        (sum, buf) => sum + buf.length,
+        0,
+      );
+      console.log(
+        `[rendi] creating ZIP with ${Object.keys(zipContents).length} files (${(totalSize / 1024 / 1024).toFixed(1)} MB)`,
+      );
+    }
+    // 3. Create ZIP
+    const zipData = zipSync(zipContents, { level: 1 }); // fast compression
+    // 4. Upload ZIP to storage
+    const zipKey = `internal/rendi-compressed-${Date.now()}.zip`;
+    const zipUrl = await this.storage.upload(
+      zipData,
+      zipKey,
+      "application/zip",
+    );
+    if (verbose) {
+      console.log(
+        `[rendi] uploaded ZIP (${(zipData.length / 1024 / 1024).toFixed(1)} MB) -> ${zipUrl}`,
+      );
+    }
+    // 5. Build ffmpeg command using bare filenames (not {{in_X}} placeholders)
+    const inputArgs: string[] = [];
+    for (const [i, input] of (inputs ?? []).entries()) {
+      if (typeof input !== "string" && "options" in input && input.options) {
+        inputArgs.push(...input.options);
+      }
+      inputArgs.push("-i", inputEntries[i]!.fileName);
+    }
+    const filterArgs: string[] = [];
+    if (filterComplex) {
+      filterArgs.push("-filter_complex", filterComplex);
+    }
+    if (videoFilter) {
+      // For compressed folder mode, the video filter references files by
+      // their bare filenames (already resolved in the working directory)
+      filterArgs.push("-vf", videoFilter);
+    }
+    const processedOutputArgs = outputArgs.filter((arg) => arg !== "-y");
+    const commandParts = [
+      ...inputArgs,
+      ...filterArgs,
+      ...processedOutputArgs,
+      "{{out_1}}",
+    ];
+    const ffmpegCommand = this.buildCommandString(commandParts);
+    const outputFilename = outputPath?.split("/").pop() ?? "output.mp4";
+    if (verbose) {
+      console.log("[rendi] input_compressed_folder:", zipUrl);
+      console.log("[rendi] ffmpeg_command:", ffmpegCommand);
+    }
+    // 6. Submit to Rendi with input_compressed_folder
+    const submitResponse = await fetch(`${RENDI_API_BASE}/run-ffmpeg-command`, {
+      method: "POST",
+      headers: {
+        "X-API-KEY": this.apiKey,
+        "Content-Type": "application/json",
+      },
+      body: JSON.stringify({
+        input_compressed_folder: zipUrl,
+        output_files: { out_1: outputFilename },
+        ffmpeg_command: ffmpegCommand,
+        max_command_run_seconds:
+          options.timeoutSeconds ?? this.maxCommandRunSeconds,
+      }),
+    });
+    if (!submitResponse.ok) {
+      const errorText = await submitResponse.text();
+      throw new Error(
+        `Rendi submit failed: ${submitResponse.status} - ${errorText}`,
+      );
+    }
+    const { command_id } =
+      (await submitResponse.json()) as RendiCommandResponse;
+    if (verbose) {
+      console.log("[rendi] command_id:", command_id);
+    }
+    // 7. Poll for completion (same as standard run)
+    let attempts = 0;
+    while (attempts < MAX_POLL_ATTEMPTS) {
+      const statusResponse = await fetch(
+        `${RENDI_API_BASE}/commands/${command_id}`,
+        {
+          headers: { "X-API-KEY": this.apiKey },
+        },
+      );
+      if (!statusResponse.ok) {
+        throw new Error(`Rendi poll failed: ${statusResponse.status}`);
+      }
+      const status = (await statusResponse.json()) as RendiStatusResponse;
+      if (status.status === "SUCCESS") {
+        const outputFile = status.output_files?.out_1;
+        if (!outputFile?.storage_url) {
+          throw new Error("Rendi completed but no output URL");
+        }
+        return { output: { type: "url", url: outputFile.storage_url } };
+      }
+      if (status.status === "FAILED") {
+        throw new Error(
+          `Rendi command failed: ${status.error_message ?? "unknown error"}`,
+        );
+      }
+      await this.sleep(POLL_INTERVAL_MS);
+      attempts++;
+    }
+    throw new Error("Rendi command timed out");
+  }
   async resolvePath(input: FilePath): Promise<string> {
     if (input instanceof File) {
       return input.upload(this.storage);
@@ -326,13 +520,30 @@ export class RendiBackend implements FFmpegBackend {
   private buildCommandString(args: string[]): string {
     return args
       .map((arg) => {
+        // Flags (e.g. -i, -filter_complex) and output placeholders pass through
         if (arg.startsWith("-") || arg.startsWith("{{")) {
           return arg;
         }
-        if (arg.includes(" ") || arg.includes(":") || arg.includes("'")) {
-          return `"${arg.replace(/"/g, '\\"')}"`;
+        // For values that need quoting (spaces, colons, single-quotes etc.):
+        // Rendi's server-side parser splits the command string like a POSIX
+        // shell.  The old approach wrapped values in "..." and tried to escape
+        // inner " with \", but Rendi's parser does NOT reliably honour \"
+        // inside double-quoted strings — any literal " in user text (e.g.
+        // drawtext titles) would terminate the quoted arg and cause the next
+        // word to be treated as an output path.
+        //
+        // Defence-in-depth: replace any surviving straight " with the
+        // typographic curly-quote equivalent (the primary escaping happens in
+        // escapeDrawText, but filter strings can also come from other sources).
+        const sanitised = arg.replace(/"/g, "\u201C");
+        if (
+          sanitised.includes(" ") ||
+          sanitised.includes(":") ||
+          sanitised.includes("'")
+        ) {
+          return `"${sanitised}"`;
         }
-        return arg;
+        return sanitised;
       })
       .join(" ");
   }

package/src/ai-sdk/providers/fal.ts CHANGED Viewed

@@ -107,6 +107,11 @@ const VIDEO_MODELS: Record<string, { t2v: string; i2v: string }> = {
     t2v: "fal-ai/kling-video/o3/standard/text-to-video",
     i2v: "fal-ai/kling-video/o3/standard/image-to-video",
   },
+  // Kling O3 4K - native 4K output (i2v only, t2v falls back to pro)
+  "kling-v3-4k-image-to-video": {
+    t2v: "fal-ai/kling-video/o3/pro/text-to-video",
+    i2v: "fal-ai/kling-video/o3/4k/image-to-video",
+  },
   // Kling v2.6 - with native audio generation
   "kling-v2.6": {
     t2v: "fal-ai/kling-video/v2.6/pro/text-to-video",
@@ -163,8 +168,25 @@ const VIDEO_EDIT_MODELS: Record<string, string> = {
   "sora-2-remix": "fal-ai/sora-2/video-to-video/remix",
 };
+// Reference-to-video models - images/elements + prompt → video with character consistency
+const REFERENCE_VIDEO_MODELS: Record<string, string> = {
+  "kling-v3-pro-reference-to-video":
+    "fal-ai/kling-video/o3/pro/reference-to-video",
+  "kling-v3-4k-reference-to-video":
+    "fal-ai/kling-video/o3/4k/reference-to-video",
+};
+// Video-to-video reference models - reference video + prompt → new video preserving motion/camera
+const V2V_REFERENCE_MODELS: Record<string, string> = {
+  "kling-v3-standard-v2v-reference":
+    "fal-ai/kling-video/o3/standard/video-to-video/reference",
+};
 // Motion control models - video-to-video with motion transfer
 const MOTION_CONTROL_MODELS: Record<string, string> = {
+  "kling-v3-pro-motion-control": "fal-ai/kling-video/v3/pro/motion-control",
+  "kling-v3-standard-motion-control":
+    "fal-ai/kling-video/v3/standard/motion-control",
   "kling-v2.6-motion": "fal-ai/kling-video/v2.6/pro/motion-control",
   "kling-v2.6-motion-standard":
     "fal-ai/kling-video/v2.6/standard/motion-control",
@@ -520,8 +542,12 @@ class FalVideoModel implements VideoModelV3 {
     const isMotionControl = MOTION_CONTROL_MODELS[this.modelId] !== undefined;
     const isVideoEdit = VIDEO_EDIT_MODELS[this.modelId] !== undefined;
     const isVideoUpscale = VIDEO_UPSCALE_MODELS[this.modelId] !== undefined;
+    const isReferenceVideo = REFERENCE_VIDEO_MODELS[this.modelId] !== undefined;
+    const isV2VReference = V2V_REFERENCE_MODELS[this.modelId] !== undefined;
     const isKlingV3 =
-      this.modelId === "kling-v3" || this.modelId === "kling-v3-standard";
+      this.modelId === "kling-v3" ||
+      this.modelId === "kling-v3-standard" ||
+      this.modelId === "kling-v3-4k-image-to-video";
     const isKlingV26 = this.modelId === "kling-v2.6";
     const isLtx2 = this.modelId === "ltx-2-19b-distilled";
     const isGrokImagine = this.modelId === "grok-imagine";
@@ -537,7 +563,11 @@ class FalVideoModel implements VideoModelV3 {
           ? this.resolveVideoEditEndpoint()
           : isVideoUpscale
             ? this.resolveVideoUpscaleEndpoint()
-            : this.resolveEndpoint(hasImageInput ?? false);
+            : isReferenceVideo
+              ? this.resolveReferenceVideoEndpoint()
+              : isV2VReference
+                ? this.resolveV2VReferenceEndpoint()
+                : this.resolveEndpoint(hasImageInput ?? false);
     const input: Record<string, unknown> = {
       ...(providerOptions?.fal ?? {}),
@@ -600,6 +630,11 @@ class FalVideoModel implements VideoModelV3 {
       if (input.keep_original_sound === undefined) {
         input.keep_original_sound = true;
       }
+      // Pass aspect ratio so the provider returns the correct output dimensions
+      if (aspectRatio && !input.aspect_ratio) {
+        input.aspect_ratio = aspectRatio;
+      }
     } else if (isVideoEdit) {
       // Video edit: video input + prompt for editing instruction
       input.prompt = prompt;
@@ -625,6 +660,86 @@ class FalVideoModel implements VideoModelV3 {
       if (videoFile) {
         input.video_url = await fileToUrl(videoFile);
       }
+    } else if (isReferenceVideo) {
+      // Reference-to-video: prompt + optional start/end images + reference images
+      // Elements and multi_prompt are passed via providerOptions.fal
+      if (prompt) {
+        input.prompt = prompt;
+      }
+      if (files) {
+        const imageFiles = files.filter((f) =>
+          getMediaType(f)?.startsWith("image/"),
+        );
+        // First image → start_image_url, second → end_image_url
+        if (imageFiles[0]) {
+          input.start_image_url = await fileToUrl(imageFiles[0]);
+        }
+        if (imageFiles[1]) {
+          input.end_image_url = await fileToUrl(imageFiles[1]);
+        }
+        // Additional images (3+) → image_urls for style/appearance reference
+        if (imageFiles.length > 2) {
+          const additionalUrls: string[] = [];
+          for (let i = 2; i < imageFiles.length; i++) {
+            additionalUrls.push(await fileToUrl(imageFiles[i]!));
+          }
+          input.image_urls = additionalUrls;
+        }
+      }
+      // Duration as string integer for Kling O3
+      const normalized = normalizeProviderInput(this.modelId, { duration });
+      input.duration = normalized.duration;
+      if (!input.aspect_ratio) {
+        input.aspect_ratio = aspectRatio ?? "16:9";
+      }
+      // Default to generating audio
+      if (input.generate_audio === undefined) {
+        input.generate_audio = true;
+      }
+    } else if (isV2VReference) {
+      // Video-to-video reference: reference video + prompt → new video preserving motion/camera
+      // Elements and image_urls are passed via providerOptions.fal
+      if (prompt) {
+        input.prompt = prompt;
+      }
+      const videoFile = files?.find((f) =>
+        getMediaType(f)?.startsWith("video/"),
+      );
+      if (videoFile) {
+        input.video_url = await fileToUrl(videoFile);
+      }
+      // Reference images from file inputs (for style/appearance)
+      if (files) {
+        const imageFiles = files.filter((f) =>
+          getMediaType(f)?.startsWith("image/"),
+        );
+        if (imageFiles.length > 0) {
+          const imageUrls: string[] = [];
+          for (const imgFile of imageFiles) {
+            imageUrls.push(await fileToUrl(imgFile));
+          }
+          input.image_urls = imageUrls;
+        }
+      }
+      // Duration as string integer for Kling O3
+      const normalized = normalizeProviderInput(this.modelId, { duration });
+      input.duration = normalized.duration;
+      if (!input.aspect_ratio) {
+        input.aspect_ratio = aspectRatio ?? "auto";
+      }
+      // Default to keeping original audio from reference video
+      if (input.keep_audio === undefined) {
+        input.keep_audio = true;
+      }
     } else {
       // Standard video generation
       input.prompt = prompt;
@@ -825,6 +940,22 @@ class FalVideoModel implements VideoModelV3 {
     return VIDEO_UPSCALE_MODELS[this.modelId] ?? this.modelId;
   }
+  private resolveReferenceVideoEndpoint(): string {
+    if (this.modelId.startsWith("raw:")) {
+      return this.modelId.slice(4);
+    }
+    return REFERENCE_VIDEO_MODELS[this.modelId] ?? this.modelId;
+  }
+  private resolveV2VReferenceEndpoint(): string {
+    if (this.modelId.startsWith("raw:")) {
+      return this.modelId.slice(4);
+    }
+    return V2V_REFERENCE_MODELS[this.modelId] ?? this.modelId;
+  }
 }
 class FalImageModel implements ImageModelV3 {

package/src/ai-sdk/providers/model-rules.ts CHANGED Viewed

@@ -70,6 +70,24 @@ const ModelDurationRules: Record<string, z.ZodType> = {
   "kling-v3": z.object({ duration: stringIntDuration(3, 15, 5) }),
   "kling-v3-standard": z.object({ duration: stringIntDuration(3, 15, 5) }),
+  // Kling O3 4K: same rules as v3
+  "kling-v3-4k-image-to-video": z.object({
+    duration: stringIntDuration(3, 15, 5),
+  }),
+  // Kling O3 reference-to-video: same duration range
+  "kling-v3-pro-reference-to-video": z.object({
+    duration: stringIntDuration(3, 15, 5),
+  }),
+  "kling-v3-4k-reference-to-video": z.object({
+    duration: stringIntDuration(3, 15, 5),
+  }),
+  // Kling O3 video-to-video reference: same duration range
+  "kling-v3-standard-v2v-reference": z.object({
+    duration: stringIntDuration(3, 15, 5),
+  }),
   // Kling v2.6: same rules as v3
   "kling-v2.6": z.object({ duration: stringIntDuration(3, 15, 5) }),