npm - vargai - Versions diffs - 0.4.0-alpha61 → 0.4.0-alpha63 - Mend

vargai 0.4.0-alpha61 → 0.4.0-alpha63

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

package/examples/grok-imagine-ai-sdk.tsx +9 -3
package/package.json +1 -1
package/src/ai-sdk/providers/editly/index.ts +7 -1
package/src/ai-sdk/providers/fal.ts +39 -2
package/src/definitions/actions/sync.ts +54 -12
package/src/definitions/models/index.ts +18 -0
package/src/definitions/models/nano-banana-2.ts +115 -0
package/src/definitions/models/omnihuman.ts +71 -0
package/src/definitions/models/qwen-image-2.ts +113 -0
package/src/definitions/models/recraft-v4.ts +94 -0
package/src/definitions/models/reve.ts +66 -0
package/src/definitions/models/veed-fabric.ts +49 -0
package/src/providers/fal.ts +102 -0
package/src/react/examples/veed-fabric-long-talking-head.tsx +75 -0
package/src/react/examples/veed-fabric-react-test.tsx +60 -0
package/src/react/renderers/packshot.ts +18 -1
package/src/react/renderers/render.ts +39 -13
package/src/react/types.ts +12 -1

package/examples/grok-imagine-ai-sdk.tsx CHANGED Viewed

@@ -40,7 +40,9 @@ async function testGrokTextToVideo() {
   // Save the video
   const outputPath = join(import.meta.dir, "../output/grok-t2v-test.mp4");
-  await writeFile(outputPath, result.videos[0]!);
+  const firstVideo = result.videos[0];
+  if (!firstVideo) throw new Error("No video returned from model");
+  await writeFile(outputPath, firstVideo);
   console.log(`Video saved to: ${outputPath}`);
   return outputPath;
@@ -88,7 +90,9 @@ async function testGrokImageToVideo() {
   // Save the video
   const outputPath = join(import.meta.dir, "../output/grok-i2v-test.mp4");
-  await writeFile(outputPath, result.videos[0]!);
+  const firstVideo = result.videos[0];
+  if (!firstVideo) throw new Error("No video returned from model");
+  await writeFile(outputPath, firstVideo);
   console.log(`Video saved to: ${outputPath}`);
   return outputPath;
@@ -136,7 +140,9 @@ async function testGrokEditVideo() {
   // Save the video
   const outputPath = join(import.meta.dir, "../output/grok-edit-test.mp4");
-  await writeFile(outputPath, result.videos[0]!);
+  const firstVideo = result.videos[0];
+  if (!firstVideo) throw new Error("No video returned from model");
+  await writeFile(outputPath, firstVideo);
   console.log(`Video saved to: ${outputPath}`);
   return outputPath;

package/package.json CHANGED Viewed

@@ -70,7 +70,7 @@
     "zod": "^4.2.1"
   },
   "sideEffects": false,
-  "version": "0.4.0-alpha61",
+  "version": "0.4.0-alpha63",
   "exports": {
     ".": "./src/index.ts",
     "./ai": "./src/ai-sdk/index.ts",

package/src/ai-sdk/providers/editly/index.ts CHANGED Viewed

@@ -269,6 +269,12 @@ function buildBaseClipFilter(
     const layer = clipLocalOverlays[i];
     if (!layer) continue;
+    if (!baseLabel) {
+      throw new Error(
+        `Clip ${clipIndex} is missing a base layer for overlay placement — ensure it has at least one visual layer (video, image, or fill-color)`,
+      );
+    }
     const overlayFilter = getVideoFilter(
       layer,
       inputIdx,
@@ -283,7 +289,7 @@ function buildBaseClipFilter(
     const outputLabel = `clip${clipIndex}ov${i}`;
     const positionFilter = getOverlayFilter(
-      baseLabel!,
+      baseLabel,
       overlayFilter.outputLabel,
       layer,
       width,

package/src/ai-sdk/providers/fal.ts CHANGED Viewed

@@ -164,6 +164,8 @@ const LIPSYNC_MODELS: Record<string, string> = {
   "sync-v2": "fal-ai/sync-lipsync",
   "sync-v2-pro": "fal-ai/sync-lipsync/v2",
   lipsync: "fal-ai/sync-lipsync",
+  "omnihuman-v1.5": "fal-ai/bytedance/omnihuman/v1.5",
+  "veed-fabric-1.0": "veed/fabric-1.0",
 };
 const IMAGE_MODELS: Record<string, string> = {
@@ -173,9 +175,20 @@ const IMAGE_MODELS: Record<string, string> = {
   "recraft-v3": "fal-ai/recraft/v3/text-to-image",
   "nano-banana-pro": "fal-ai/nano-banana-pro",
   "nano-banana-pro/edit": "fal-ai/nano-banana-pro/edit",
+  "nano-banana-2": "fal-ai/nano-banana-2/edit",
+  "nano-banana-2/edit": "fal-ai/nano-banana-2/edit",
   "seedream-v4.5/edit": "fal-ai/bytedance/seedream/v4.5/edit",
+  // Qwen Image 2 - text-to-image and image-to-image editing (standard + pro)
+  "qwen-image-2": "fal-ai/qwen-image-2/text-to-image",
+  "qwen-image-2/edit": "fal-ai/qwen-image-2/edit",
+  "qwen-image-2-pro": "fal-ai/qwen-image-2/pro/text-to-image",
+  "qwen-image-2-pro/edit": "fal-ai/qwen-image-2/pro/edit",
   // Qwen Image Edit 2511 Multiple Angles - camera angle adjustment
   "qwen-angles": "fal-ai/qwen-image-edit-2511-multiple-angles",
+  // Recraft V4 Pro - text-to-image
+  "recraft-v4-pro": "fal-ai/recraft/v4/pro/text-to-image",
+  // Reve - image editing
+  "reve/edit": "fal-ai/reve/edit",
 };
 // Models that use image_size instead of aspect_ratio
@@ -184,11 +197,19 @@ const IMAGE_SIZE_MODELS = new Set([
   "flux-dev",
   "flux-pro",
   "seedream-v4.5/edit",
+  "qwen-image-2",
+  "qwen-image-2/edit",
+  "qwen-image-2-pro",
+  "qwen-image-2-pro/edit",
+  "recraft-v4-pro",
 ]);
 // Qwen Angles model - image-to-image with camera angle adjustment
 const QWEN_ANGLES_MODEL = "qwen-angles";
+// Models that use singular image_url instead of image_urls array
+const SINGULAR_IMAGE_URL_MODELS = new Set(["reve/edit"]);
 // Map aspect ratio to image_size for Qwen Angles (base dimension 1024)
 const ASPECT_RATIO_TO_QWEN_SIZE: Record<
   string,
@@ -474,20 +495,30 @@ class FalVideoModel implements VideoModelV3 {
     };
     if (isLipsync) {
-      // Lipsync: video + audio input
+      // Lipsync: either (video + audio) or (image + audio), depending on model
       const videoFile = files?.find((f) =>
         getMediaType(f)?.startsWith("video/"),
       );
+      const imageFile = files?.find((f) =>
+        getMediaType(f)?.startsWith("image/"),
+      );
       const audioFile = files?.find((f) =>
         getMediaType(f)?.startsWith("audio/"),
       );
       if (videoFile) {
         input.video_url = await fileToUrl(videoFile);
+      } else if (imageFile) {
+        input.image_url = await fileToUrl(imageFile);
       }
       if (audioFile) {
         input.audio_url = await fileToUrl(audioFile);
       }
+      // OmniHuman supports an optional prompt
+      if (prompt && this.modelId === "omnihuman-v1.5") {
+        input.prompt = prompt;
+      }
     } else if (isMotionControl) {
       // Motion control: image + reference video input
       if (prompt) {
@@ -836,7 +867,13 @@ class FalImageModel implements ImageModelV3 {
         modelId: this.modelId,
         fileHashes,
       });
-      input.image_urls = await pMap(files, fileToUrl, { concurrency: 2 });
+      const imageUrls = await pMap(files, fileToUrl, { concurrency: 2 });
+      // Reve uses singular image_url instead of image_urls array
+      if (SINGULAR_IMAGE_URL_MODELS.has(this.modelId)) {
+        input.image_url = imageUrls[0];
+      } else {
+        input.image_urls = imageUrls;
+      }
     }
     if (isQwenAngles && !input.image_urls) {

package/src/definitions/actions/sync.ts CHANGED Viewed

@@ -15,6 +15,11 @@ import { ffmpegProvider } from "../../providers/ffmpeg";
 // Input schema with Zod
 const syncInputSchema = z.object({
+  model: z
+    .enum(["wan-25", "omnihuman-v1.5", "veed-fabric-1.0"])
+    .optional()
+    .default("wan-25")
+    .describe("Lip sync / avatar backend model"),
   image: filePathSchema.describe("Input image"),
   audio: filePathSchema.describe("Audio file"),
   prompt: z.string().describe("Description of the scene"),
@@ -40,13 +45,14 @@ export const definition: ActionDefinition<typeof schema> = {
   schema,
   routes: [],
   execute: async (inputs) => {
-    const { image, audio, prompt, duration, resolution } = inputs;
-    return lipsync({ image, audio, prompt, duration, resolution });
+    const { model, image, audio, prompt, duration, resolution } = inputs;
+    return lipsync({ model, image, audio, prompt, duration, resolution });
   },
 };
 // Types
 export interface LipsyncOptions {
+  model?: "wan-25" | "omnihuman-v1.5" | "veed-fabric-1.0";
   image: string;
   audio: string;
   prompt: string;
@@ -65,20 +71,56 @@ export interface Wav2LipOptions {
 }
 /**
- * Generate lip-synced video using Wan-25
+ * Generate lip-synced / avatar video using selected backend.
  */
 export async function lipsync(options: LipsyncOptions): Promise<LipsyncResult> {
-  const { image, audio, prompt, duration = "5", resolution = "480p" } = options;
+  const {
+    model = "wan-25",
+    image,
+    audio,
+    prompt,
+    duration = "5",
+    resolution = "480p",
+  } = options;
-  console.log("[sync] generating lip-synced video with wan-25...");
+  console.log(`[sync] generating lip-synced video with ${model}...`);
-  const result = await falProvider.wan25({
-    imageUrl: image,
-    audioUrl: audio,
-    prompt,
-    duration,
-    resolution,
-  });
+  if (model === "omnihuman-v1.5" && resolution === "480p") {
+    console.warn(
+      "[sync] omnihuman-v1.5 does not support 480p; using 720p instead",
+    );
+  }
+  if (model === "veed-fabric-1.0" && resolution === "1080p") {
+    console.warn(
+      "[sync] veed-fabric-1.0 does not support 1080p; using 720p instead",
+    );
+  }
+  const result =
+    model === "omnihuman-v1.5"
+      ? await falProvider.omnihuman15({
+          imageUrl: image,
+          audioUrl: audio,
+          prompt,
+          resolution: (resolution === "480p" ? "720p" : resolution) as
+            | "720p"
+            | "1080p",
+        })
+      : model === "veed-fabric-1.0"
+        ? await falProvider.veedFabric10({
+            imageUrl: image,
+            audioUrl: audio,
+            resolution: (resolution === "1080p" ? "720p" : resolution) as
+              | "480p"
+              | "720p",
+          })
+        : await falProvider.wan25({
+            imageUrl: image,
+            audioUrl: audio,
+            prompt,
+            duration,
+            resolution,
+          });
   const videoUrl = result.data?.video?.url;
   if (!videoUrl) {

package/src/definitions/models/index.ts CHANGED Viewed

@@ -6,9 +6,15 @@ export { definition as elevenlabsTts } from "./elevenlabs";
 export { definition as flux } from "./flux";
 export { definition as kling } from "./kling";
 export { definition as llama } from "./llama";
+export { definition as nanoBanana2 } from "./nano-banana-2";
 export { definition as nanoBananaPro } from "./nano-banana-pro";
+export { definition as omnihuman } from "./omnihuman";
+export { definition as qwenImage2 } from "./qwen-image-2";
+export { definition as recraftV4 } from "./recraft-v4";
+export { definition as reve } from "./reve";
 export { definition as sonauto } from "./sonauto";
 export { definition as soul } from "./soul";
+export { definition as veedFabric } from "./veed-fabric";
 export { definition as wan } from "./wan";
 export { definition as whisper } from "./whisper";
@@ -17,9 +23,15 @@ import { definition as elevenlabsDefinition } from "./elevenlabs";
 import { definition as fluxDefinition } from "./flux";
 import { definition as klingDefinition } from "./kling";
 import { definition as llamaDefinition } from "./llama";
+import { definition as nanoBanana2Definition } from "./nano-banana-2";
 import { definition as nanoBananaProDefinition } from "./nano-banana-pro";
+import { definition as omnihumanDefinition } from "./omnihuman";
+import { definition as qwenImage2Definition } from "./qwen-image-2";
+import { definition as recraftV4Definition } from "./recraft-v4";
+import { definition as reveDefinition } from "./reve";
 import { definition as sonautoDefinition } from "./sonauto";
 import { definition as soulDefinition } from "./soul";
+import { definition as veedFabricDefinition } from "./veed-fabric";
 import { definition as wanDefinition } from "./wan";
 import { definition as whisperDefinition } from "./whisper";
@@ -27,7 +39,13 @@ export const allModels = [
   klingDefinition,
   fluxDefinition,
   nanoBananaProDefinition,
+  nanoBanana2Definition,
+  qwenImage2Definition,
+  recraftV4Definition,
+  reveDefinition,
   wanDefinition,
+  omnihumanDefinition,
+  veedFabricDefinition,
   whisperDefinition,
   elevenlabsDefinition,
   soulDefinition,

package/src/definitions/models/nano-banana-2.ts ADDED Viewed

@@ -0,0 +1,115 @@
+/**
+ * Nano Banana 2 image editing model (Google's next-gen image generation/editing)
+ * Edit-only model requiring image_urls input
+ */
+import { z } from "zod";
+import type { ModelDefinition, ZodSchema } from "../../core/schema/types";
+// Nano Banana 2 resolution options (includes 0.5K unlike nano-banana-pro)
+const nanoBanana2ResolutionSchema = z.enum(["0.5K", "1K", "2K", "4K"]);
+// Nano Banana 2 aspect ratio options (supports "auto" unlike nano-banana-pro)
+const nanoBanana2AspectRatioSchema = z.enum([
+  "auto",
+  "21:9",
+  "16:9",
+  "3:2",
+  "4:3",
+  "5:4",
+  "1:1",
+  "4:5",
+  "3:4",
+  "2:3",
+  "9:16",
+]);
+// Output format options
+const nanoBanana2OutputFormatSchema = z.enum(["png", "jpeg", "webp"]);
+// Safety tolerance level (string enum "1"-"6", unlike nano-banana-pro's semantic filter)
+const nanoBanana2SafetyToleranceSchema = z.enum(["1", "2", "3", "4", "5", "6"]);
+// Input schema with Zod
+const nanoBanana2InputSchema = z.object({
+  prompt: z.string().describe("Text description for image editing"),
+  image_urls: z
+    .array(z.string().url())
+    .describe(
+      "Input image URLs for image-to-image editing. Required for this model.",
+    ),
+  resolution: nanoBanana2ResolutionSchema
+    .default("1K")
+    .describe(
+      "Output resolution: 0.5K (512px), 1K (1024px), 2K (2048px), or 4K",
+    ),
+  aspect_ratio: nanoBanana2AspectRatioSchema
+    .default("auto")
+    .describe("Output aspect ratio. 'auto' preserves input aspect ratio."),
+  output_format: nanoBanana2OutputFormatSchema
+    .default("png")
+    .describe("Output image format"),
+  safety_tolerance: nanoBanana2SafetyToleranceSchema
+    .default("4")
+    .describe("Safety tolerance level: 1 (most strict) to 6 (least strict)"),
+  num_images: z
+    .number()
+    .int()
+    .min(1)
+    .max(4)
+    .default(1)
+    .describe("Number of images to generate (1-4)"),
+  seed: z
+    .number()
+    .int()
+    .optional()
+    .describe("Seed for the random number generator"),
+  limit_generations: z
+    .boolean()
+    .default(true)
+    .describe(
+      "Limit generations from each round of prompting to 1. May affect quality.",
+    ),
+  enable_web_search: z
+    .boolean()
+    .default(false)
+    .describe(
+      "Enable web search to use latest information for image generation",
+    ),
+});
+// Output schema with Zod
+const nanoBanana2OutputSchema = z.object({
+  images: z.array(
+    z.object({
+      url: z.string(),
+      file_name: z.string().optional(),
+      content_type: z.string().optional(),
+    }),
+  ),
+  description: z.string().optional(),
+});
+// Schema object for the definition
+const schema: ZodSchema<
+  typeof nanoBanana2InputSchema,
+  typeof nanoBanana2OutputSchema
+> = {
+  input: nanoBanana2InputSchema,
+  output: nanoBanana2OutputSchema,
+};
+export const definition: ModelDefinition<typeof schema> = {
+  type: "model",
+  name: "nano-banana-2",
+  description:
+    "Google Nano Banana 2 - next-gen image editing model. Requires image_urls for all operations.",
+  providers: ["fal"],
+  defaultProvider: "fal",
+  providerModels: {
+    fal: "fal-ai/nano-banana-2/edit",
+  },
+  schema,
+};
+export default definition;

package/src/definitions/models/omnihuman.ts ADDED Viewed

@@ -0,0 +1,71 @@
+/**
+ * Bytedance OmniHuman v1.5
+ * Image + audio -> video (full-body human animation)
+ */
+import { z } from "zod";
+import { urlSchema } from "../../core/schema/shared";
+import type { ModelDefinition, ZodSchema } from "../../core/schema/types";
+const omnihumanResolutionSchema = z
+  .enum(["720p", "1080p"])
+  .describe("Output resolution");
+// Input schema with Zod
+const omnihumanInputSchema = z.object({
+  prompt: z
+    .string()
+    .optional()
+    .describe("The text prompt used to guide the video generation"),
+  image_url: urlSchema.describe(
+    "The URL of the image used to generate the video",
+  ),
+  audio_url: urlSchema.describe(
+    "The URL of the audio file to generate the video",
+  ),
+  turbo_mode: z
+    .boolean()
+    .optional()
+    .default(false)
+    .describe("Faster generation with slight quality trade-off"),
+  resolution: omnihumanResolutionSchema
+    .optional()
+    .default("1080p")
+    .describe(
+      "The resolution of the generated video. 720p generation is faster and higher in quality",
+    ),
+});
+// Output schema with Zod
+const omnihumanOutputSchema = z.object({
+  video: z.object({
+    url: z.string(),
+  }),
+  duration: z
+    .number()
+    .optional()
+    .describe("Duration of audio input/video output as used for billing"),
+});
+const schema: ZodSchema<
+  typeof omnihumanInputSchema,
+  typeof omnihumanOutputSchema
+> = {
+  input: omnihumanInputSchema,
+  output: omnihumanOutputSchema,
+};
+export const definition: ModelDefinition<typeof schema> = {
+  type: "model",
+  name: "omnihuman",
+  description:
+    "OmniHuman v1.5 - generate a vivid talking video from an image and an audio file",
+  providers: ["fal"],
+  defaultProvider: "fal",
+  providerModels: {
+    fal: "fal-ai/bytedance/omnihuman/v1.5",
+  },
+  schema,
+};
+export default definition;

package/src/definitions/models/qwen-image-2.ts ADDED Viewed

@@ -0,0 +1,113 @@
+/**
+ * Qwen Image 2 generation and editing model
+ * Next-generation unified generation-and-editing model from Alibaba
+ * Supports both text-to-image and image-to-image editing
+ * Available in standard and pro tiers
+ */
+import { z } from "zod";
+import type { ModelDefinition, ZodSchema } from "../../core/schema/types";
+// Image size can be an enum string or an object with width/height
+const qwenImage2ImageSizeSchema = z.union([
+  z.enum([
+    "square_hd",
+    "square",
+    "landscape_4_3",
+    "landscape_16_9",
+    "portrait_4_3",
+    "portrait_16_9",
+  ]),
+  z.object({
+    width: z.number().int().min(512).max(2048),
+    height: z.number().int().min(512).max(2048),
+  }),
+]);
+// Output format options
+const qwenImage2OutputFormatSchema = z.enum(["png", "jpeg", "webp"]);
+// Input schema with Zod
+const qwenImage2InputSchema = z.object({
+  prompt: z
+    .string()
+    .describe(
+      "Text description for generation or editing. Supports Chinese and English.",
+    ),
+  negative_prompt: z
+    .string()
+    .default("")
+    .describe("Content to avoid in the generated image. Max 500 characters."),
+  image_size: qwenImage2ImageSizeSchema
+    .optional()
+    .describe(
+      "Output image size. Can be an enum (e.g. 'square_hd') or {width, height} object. Pixels must be between 512x512 and 2048x2048.",
+    ),
+  image_urls: z
+    .array(z.string().url())
+    .optional()
+    .describe(
+      "Reference images for editing (1-6 images). Order matters: reference as 'image 1', 'image 2' in prompt. Required for /edit endpoints.",
+    ),
+  enable_prompt_expansion: z
+    .boolean()
+    .default(true)
+    .describe("Enable LLM prompt optimization for better results"),
+  seed: z
+    .number()
+    .int()
+    .min(0)
+    .max(2147483647)
+    .optional()
+    .describe("Random seed for reproducibility"),
+  enable_safety_checker: z
+    .boolean()
+    .default(true)
+    .describe("Enable content moderation for input and output"),
+  num_images: z
+    .number()
+    .int()
+    .min(1)
+    .max(6)
+    .default(1)
+    .describe("Number of images to generate (1-4 for t2i, 1-6 for edit)"),
+  output_format: qwenImage2OutputFormatSchema
+    .default("png")
+    .describe("Output image format"),
+});
+// Output schema with Zod
+const qwenImage2OutputSchema = z.object({
+  images: z.array(
+    z.object({
+      url: z.string(),
+      file_name: z.string().optional(),
+      content_type: z.string().optional(),
+    }),
+  ),
+  seed: z.number().int().optional(),
+});
+// Schema object for the definition
+const schema: ZodSchema<
+  typeof qwenImage2InputSchema,
+  typeof qwenImage2OutputSchema
+> = {
+  input: qwenImage2InputSchema,
+  output: qwenImage2OutputSchema,
+};
+export const definition: ModelDefinition<typeof schema> = {
+  type: "model",
+  name: "qwen-image-2",
+  description:
+    "Qwen Image 2.0 - next-gen unified generation-and-editing model. Supports text-to-image and image-to-image editing in standard and pro tiers.",
+  providers: ["fal"],
+  defaultProvider: "fal",
+  providerModels: {
+    fal: "fal-ai/qwen-image-2/text-to-image",
+  },
+  schema,
+};
+export default definition;

package/src/definitions/models/recraft-v4.ts ADDED Viewed

@@ -0,0 +1,94 @@
+/**
+ * Recraft V4 Pro image generation model
+ * Built for brand systems and production-ready workflows
+ * Text-to-image only
+ */
+import { z } from "zod";
+import type { ModelDefinition, ZodSchema } from "../../core/schema/types";
+// Image size can be an enum string or an object with width/height
+const recraftV4ImageSizeSchema = z.union([
+  z.enum([
+    "square_hd",
+    "square",
+    "landscape_4_3",
+    "landscape_16_9",
+    "portrait_4_3",
+    "portrait_16_9",
+  ]),
+  z.object({
+    width: z.number().int(),
+    height: z.number().int(),
+  }),
+]);
+// RGB color schema
+const rgbColorSchema = z.object({
+  r: z.number().int().min(0).max(255),
+  g: z.number().int().min(0).max(255),
+  b: z.number().int().min(0).max(255),
+});
+// Output format - Recraft V4 outputs webp by default
+const recraftV4OutputFormatSchema = z.enum(["png", "jpeg", "webp"]);
+// Input schema with Zod
+const recraftV4InputSchema = z.object({
+  prompt: z.string().describe("Text description for image generation"),
+  image_size: recraftV4ImageSizeSchema
+    .default("square_hd")
+    .describe(
+      "Output image size. Can be an enum (e.g. 'landscape_16_9') or {width, height} object.",
+    ),
+  colors: z
+    .array(rgbColorSchema)
+    .default([])
+    .describe("Array of preferable RGB colors for the generated image"),
+  background_color: rgbColorSchema
+    .optional()
+    .describe("Preferable background color of the generated image"),
+  enable_safety_checker: z
+    .boolean()
+    .default(true)
+    .describe("Enable content safety checker"),
+  output_format: recraftV4OutputFormatSchema
+    .optional()
+    .describe("Output image format"),
+});
+// Output schema with Zod
+const recraftV4OutputSchema = z.object({
+  images: z.array(
+    z.object({
+      url: z.string(),
+      file_name: z.string().optional(),
+      file_size: z.number().optional(),
+      content_type: z.string().optional(),
+    }),
+  ),
+});
+// Schema object for the definition
+const schema: ZodSchema<
+  typeof recraftV4InputSchema,
+  typeof recraftV4OutputSchema
+> = {
+  input: recraftV4InputSchema,
+  output: recraftV4OutputSchema,
+};
+export const definition: ModelDefinition<typeof schema> = {
+  type: "model",
+  name: "recraft-v4-pro",
+  description:
+    "Recraft V4 Pro - professional text-to-image model built for brand systems and production-ready workflows. Strong composition, refined lighting, realistic materials.",
+  providers: ["fal"],
+  defaultProvider: "fal",
+  providerModels: {
+    fal: "fal-ai/recraft/v4/pro/text-to-image",
+  },
+  schema,
+};
+export default definition;

package/src/definitions/models/reve.ts ADDED Viewed

@@ -0,0 +1,66 @@
+/**
+ * Reve image editing model
+ * Upload an existing image and transform it via a text prompt
+ * Edit-only model using singular image_url (not image_urls array)
+ */
+import { z } from "zod";
+import type { ModelDefinition, ZodSchema } from "../../core/schema/types";
+// Output format options
+const reveOutputFormatSchema = z.enum(["png", "jpeg", "webp"]);
+// Input schema with Zod
+const reveInputSchema = z.object({
+  prompt: z
+    .string()
+    .describe("Text description of how to edit the provided image"),
+  image_url: z
+    .string()
+    .url()
+    .describe(
+      "URL of the reference image to edit. Supports PNG, JPEG, WebP, AVIF, and HEIF formats.",
+    ),
+  num_images: z
+    .number()
+    .int()
+    .min(1)
+    .max(4)
+    .default(1)
+    .describe("Number of images to generate (1-4)"),
+  output_format: reveOutputFormatSchema
+    .default("png")
+    .describe("Output image format"),
+});
+// Output schema with Zod
+const reveOutputSchema = z.object({
+  images: z.array(
+    z.object({
+      url: z.string(),
+      file_name: z.string().optional(),
+      content_type: z.string().optional(),
+    }),
+  ),
+});
+// Schema object for the definition
+const schema: ZodSchema<typeof reveInputSchema, typeof reveOutputSchema> = {
+  input: reveInputSchema,
+  output: reveOutputSchema,
+};
+export const definition: ModelDefinition<typeof schema> = {
+  type: "model",
+  name: "reve",
+  description:
+    "Reve edit model - upload an existing image and transform it via a text prompt. Uses singular image_url input.",
+  providers: ["fal"],
+  defaultProvider: "fal",
+  providerModels: {
+    fal: "fal-ai/reve/edit",
+  },
+  schema,
+};
+export default definition;

package/src/definitions/models/veed-fabric.ts ADDED Viewed

@@ -0,0 +1,49 @@
+/**
+ * VEED Fabric 1.0
+ * Image + audio -> talking video
+ */
+import { z } from "zod";
+import { urlSchema } from "../../core/schema/shared";
+import type { ModelDefinition, ZodSchema } from "../../core/schema/types";
+const fabricResolutionSchema = z
+  .enum(["480p", "720p"])
+  .describe("Output resolution");
+// Input schema with Zod
+const veedFabricInputSchema = z.object({
+  image_url: urlSchema.describe("Input image URL"),
+  audio_url: urlSchema.describe("Input audio URL"),
+  resolution: fabricResolutionSchema.describe("Output resolution"),
+});
+// Output schema with Zod
+const veedFabricOutputSchema = z.object({
+  video: z.object({
+    content_type: z.string().optional(),
+    url: z.string().url(),
+  }),
+});
+const schema: ZodSchema<
+  typeof veedFabricInputSchema,
+  typeof veedFabricOutputSchema
+> = {
+  input: veedFabricInputSchema,
+  output: veedFabricOutputSchema,
+};
+export const definition: ModelDefinition<typeof schema> = {
+  type: "model",
+  name: "veed-fabric",
+  description: "VEED Fabric 1.0 - turn an image into a talking video",
+  providers: ["fal"],
+  defaultProvider: "fal",
+  providerModels: {
+    fal: "veed/fabric-1.0",
+  },
+  schema,
+};
+export default definition;

package/src/providers/fal.ts CHANGED Viewed

@@ -54,6 +54,23 @@ export class FalProvider extends BaseProvider {
         return "fal-ai/nano-banana-pro/edit";
       }
     }
+    // Nano Banana 2: always route to /edit endpoint (edit-only model)
+    if (model === "fal-ai/nano-banana-2") {
+      return "fal-ai/nano-banana-2/edit";
+    }
+    // Qwen Image 2: route to /edit endpoint when image_urls are provided
+    if (model === "fal-ai/qwen-image-2/text-to-image") {
+      const imageUrls = inputs.image_urls as string[] | undefined;
+      if (imageUrls && imageUrls.length > 0) {
+        return "fal-ai/qwen-image-2/edit";
+      }
+    }
+    if (model === "fal-ai/qwen-image-2/pro/text-to-image") {
+      const imageUrls = inputs.image_urls as string[] | undefined;
+      if (imageUrls && imageUrls.length > 0) {
+        return "fal-ai/qwen-image-2/pro/edit";
+      }
+    }
     return model;
   }
@@ -332,6 +349,86 @@ export class FalProvider extends BaseProvider {
     return result;
   }
+  async omnihuman15(args: {
+    imageUrl: string;
+    audioUrl: string;
+    prompt?: string;
+    turboMode?: boolean;
+    resolution?: "720p" | "1080p";
+  }) {
+    const modelId: string = "fal-ai/bytedance/omnihuman/v1.5";
+    console.log(`[fal] starting omnihuman v1.5: ${modelId}`);
+    const imageUrl = await ensureUrl(args.imageUrl, (buffer) =>
+      this.uploadFile(buffer),
+    );
+    const audioUrl = await ensureUrl(args.audioUrl, (buffer) =>
+      this.uploadFile(buffer),
+    );
+    const input: Record<string, unknown> = {
+      ...(args.prompt ? { prompt: args.prompt } : {}),
+      image_url: imageUrl,
+      audio_url: audioUrl,
+      turbo_mode: args.turboMode ?? false,
+      resolution: args.resolution ?? "1080p",
+    };
+    const result = await fal.subscribe(modelId, {
+      input,
+      logs: true,
+      onQueueUpdate: (update) => {
+        if (update.status === "IN_PROGRESS") {
+          console.log(
+            `[fal] ${update.logs?.map((l) => l.message).join(" ") || "processing..."}`,
+          );
+        }
+      },
+    });
+    console.log("[fal] completed!");
+    return result;
+  }
+  async veedFabric10(args: {
+    imageUrl: string;
+    audioUrl: string;
+    resolution: "480p" | "720p";
+  }) {
+    const modelId: string = "veed/fabric-1.0";
+    console.log(`[fal] starting veed fabric 1.0: ${modelId}`);
+    const imageUrl = await ensureUrl(args.imageUrl, (buffer) =>
+      this.uploadFile(buffer),
+    );
+    const audioUrl = await ensureUrl(args.audioUrl, (buffer) =>
+      this.uploadFile(buffer),
+    );
+    const input: Record<string, unknown> = {
+      image_url: imageUrl,
+      audio_url: audioUrl,
+      resolution: args.resolution,
+    };
+    const result = await fal.subscribe(modelId, {
+      input,
+      logs: true,
+      onQueueUpdate: (update) => {
+        if (update.status === "IN_PROGRESS") {
+          console.log(
+            `[fal] ${update.logs?.map((l) => l.message).join(" ") || "processing..."}`,
+          );
+        }
+      },
+    });
+    console.log("[fal] completed!");
+    return result;
+  }
   async textToMusic(args: {
     prompt?: string;
     tags?: string[];
@@ -584,5 +681,10 @@ export const imageToImage = (
 ) => falProvider.imageToImage(args);
 export const wan25 = (args: Parameters<FalProvider["wan25"]>[0]) =>
   falProvider.wan25(args);
+export const omnihuman15 = (args: Parameters<FalProvider["omnihuman15"]>[0]) =>
+  falProvider.omnihuman15(args);
+export const veedFabric10 = (
+  args: Parameters<FalProvider["veedFabric10"]>[0],
+) => falProvider.veedFabric10(args);
 export const textToMusic = (args: Parameters<FalProvider["textToMusic"]>[0]) =>
   falProvider.textToMusic(args);

package/src/react/examples/veed-fabric-long-talking-head.tsx ADDED Viewed

@@ -0,0 +1,75 @@
+/**
+ * Longer talking head demo (VEED Fabric 1.0):
+ * - character image from nano-banana-pro
+ * - voice from ElevenLabs
+ * - talking video from veed/fabric-1.0 (image + audio)
+ *
+ * Run: bun run src/react/examples/veed-fabric-long-talking-head.tsx
+ * Output: output/veed-fabric-long-talking-head.mp4
+ */
+import { elevenlabs, fal } from "../../ai-sdk";
+import { Clip, Image, Render, render, Speech, Video } from "..";
+const SCRIPT =
+  "Hey, I am Nova. In this quick demo, you will hear a clean voiceover, and see a talking avatar generated from a single portrait. We are using VEED Fabric for image-to-video lipsync, and ElevenLabs for the voice.";
+const portrait = Image({
+  prompt:
+    "Ultra-realistic studio portrait of Nova, a confident friendly product designer in her early 30s, warm smile, expressive eyes, subtle freckles, natural makeup, shoulder-length dark auburn hair, modern minimal wardrobe, cinematic softbox lighting, shallow depth of field, clean neutral background, high-end camera look",
+  model: fal.imageModel("nano-banana-pro"),
+  aspectRatio: "9:16",
+});
+const voiceover = Speech({
+  model: elevenlabs.speechModel("eleven_v3"),
+  voice: "adam",
+  children: SCRIPT,
+});
+const talking = Video({
+  model: fal.videoModel("veed-fabric-1.0"),
+  keepAudio: true,
+  prompt: {
+    images: [portrait],
+    audio: voiceover,
+  },
+  providerOptions: {
+    fal: {
+      resolution: "720p",
+    },
+  },
+});
+const demo = (
+  <Render width={1080} height={1920}>
+    <Clip duration="auto">{talking}</Clip>
+  </Render>
+);
+async function main() {
+  if (!process.env.FAL_API_KEY && !process.env.FAL_KEY) {
+    console.error("ERROR: FAL_API_KEY/FAL_KEY not found in environment");
+    process.exit(1);
+  }
+  if (!process.env.ELEVENLABS_API_KEY) {
+    console.error("ERROR: ELEVENLABS_API_KEY not found in environment");
+    process.exit(1);
+  }
+  const result = await render(demo, {
+    output: "output/veed-fabric-long-talking-head.mp4",
+    cache: ".cache/ai-veed-fabric-long-talking-head",
+  });
+  console.log(
+    `ok: output/veed-fabric-long-talking-head.mp4 (${(result.video.byteLength / 1024 / 1024).toFixed(2)} MB)`,
+  );
+}
+if (import.meta.main) {
+  main().catch((err) => {
+    console.error(err);
+    process.exit(1);
+  });
+}

package/src/react/examples/veed-fabric-react-test.tsx ADDED Viewed

@@ -0,0 +1,60 @@
+/**
+ * VEED Fabric 1.0 React syntax test
+ *
+ * Uses a local image + local audio file to generate a talking video.
+ *
+ * Run: bun run src/react/examples/veed-fabric-react-test.tsx
+ * Output: output/veed-fabric-react-test.mp4
+ */
+import { fal } from "../../ai-sdk/providers/fal";
+import { Clip, Render, render, Video } from "..";
+const IMAGE_PATH = "output/garry-tan-image.png";
+const AUDIO_PATH = "output/garry-tan-voice.mp3";
+const RESOLUTION =
+  (process.env.FABRIC_RESOLUTION as "480p" | "720p" | undefined) ?? "720p";
+const video = (
+  <Render width={720} height={1280}>
+    <Clip duration={5}>
+      <Video
+        model={fal.videoModel("veed-fabric-1.0")}
+        keepAudio
+        prompt={{
+          images: [IMAGE_PATH],
+          audio: AUDIO_PATH,
+        }}
+        providerOptions={{
+          fal: {
+            resolution: RESOLUTION,
+          },
+        }}
+      />
+    </Clip>
+  </Render>
+);
+async function main() {
+  if (!process.env.FAL_API_KEY && !process.env.FAL_KEY) {
+    console.error("ERROR: FAL_API_KEY/FAL_KEY not found in environment");
+    process.exit(1);
+  }
+  const result = await render(video, {
+    output: `output/veed-fabric-react-test-${RESOLUTION}.mp4`,
+    cache: `.cache/ai-veed-fabric-${RESOLUTION}-keepaudio`,
+  });
+  console.log(
+    `ok: output/veed-fabric-react-test-${RESOLUTION}.mp4 (${(result.video.byteLength / 1024 / 1024).toFixed(2)} MB)`,
+  );
+}
+if (import.meta.main) {
+  main().catch((err) => {
+    console.error(err);
+    process.exit(1);
+  });
+}

package/src/react/renderers/packshot.ts CHANGED Viewed

@@ -11,11 +11,13 @@ import type {
   PositionObject,
   SizeValue,
   TitleLayer,
+  VideoLayer,
 } from "../../ai-sdk/providers/editly/types";
 import type { PackshotProps, VargElement } from "../types";
 import type { RenderContext } from "./context";
 import { renderImage } from "./image";
 import { createBlinkingButton } from "./packshot/blinking-button";
+import { renderVideo } from "./video";
 /**
  * Resolve an FFmpegOutput to a string path/URL via the backend.
@@ -118,8 +120,23 @@ export async function renderPackshot(
         type: "fill-color" as const,
         color: props.background,
       });
+    } else if (props.background.type === "video") {
+      const bgFile = await renderVideo(
+        props.background as VargElement<"video">,
+        ctx,
+      );
+      const bgPath = await ctx.backend.resolvePath(bgFile);
+      const videoLayer: VideoLayer = {
+        type: "video",
+        path: bgPath,
+        resizeMode: "cover",
+      };
+      layers.push(videoLayer);
     } else {
-      const bgFile = await renderImage(props.background, ctx);
+      const bgFile = await renderImage(
+        props.background as VargElement<"image">,
+        ctx,
+      );
       const bgPath = await ctx.backend.resolvePath(bgFile);
       layers.push({
         type: "image" as const,

package/src/react/renderers/render.ts CHANGED Viewed

@@ -1,5 +1,6 @@
 import type { ImageModelV3 } from "@ai-sdk/provider";
 import { generateImage, wrapImageModel } from "ai";
+import pMap from "p-map";
 import { type CacheStorage, withCache } from "../../ai-sdk/cache";
 import type { File, File as VargFile } from "../../ai-sdk/file";
 import { fileCache } from "../../ai-sdk/file-cache";
@@ -9,7 +10,6 @@ import {
   placeholderFallbackMiddleware,
   wrapVideoModel,
 } from "../../ai-sdk/middleware";
 import { editly, localBackend } from "../../ai-sdk/providers/editly";
 import type {
   AudioTrack,
@@ -236,15 +236,42 @@ export async function renderRoot(
     }
   }
-  const clipResults = await Promise.allSettled(
-    clipElements.map((clipElement) => renderClip(clipElement, ctx)),
+  const concurrency =
+    options.concurrency === undefined
+      ? Number.POSITIVE_INFINITY
+      : options.concurrency;
+  if (
+    concurrency !== Number.POSITIVE_INFINITY &&
+    (!Number.isInteger(concurrency) || concurrency < 1)
+  ) {
+    throw new Error("render option `concurrency` must be a positive integer");
+  }
+  const clipResults = await pMap(
+    clipElements,
+    async (clipElement, i) => {
+      try {
+        return {
+          status: "fulfilled" as const,
+          value: await renderClip(clipElement, ctx),
+          index: i,
+        };
+      } catch (reason) {
+        return {
+          status: "rejected" as const,
+          reason: reason as Error,
+          index: i,
+        };
+      }
+    },
+    { concurrency },
   );
-  const failures = clipResults
-    .map((r, i) =>
-      r.status === "rejected" ? { index: i, reason: r.reason } : null,
-    )
-    .filter(Boolean) as { index: number; reason: Error }[];
+  const failures = clipResults.filter(
+    (r): r is Extract<typeof r, { status: "rejected" }> =>
+      r.status === "rejected",
+  );
   if (failures.length > 0) {
     const successCount = clipResults.length - failures.length;
@@ -266,11 +293,10 @@ export async function renderRoot(
     );
   }
-  const renderedClips = clipResults.map(
-    (r) =>
-      (r as PromiseFulfilledResult<Awaited<ReturnType<typeof renderClip>>>)
-        .value,
-  );
+  const renderedClips = clipResults.map((r) => {
+    if (r.status !== "fulfilled") throw new Error("unexpected");
+    return r.value;
+  });
   const clips: Clip[] = [];
   let currentTime = 0;

package/src/react/types.ts CHANGED Viewed

@@ -209,7 +209,16 @@ export interface SwipeProps extends BaseProps {
 }
 export interface PackshotProps extends BaseProps {
-  background?: VargElement<"image"> | string;
+  /**
+   * Packshot background.
+   *
+   * - `string` — treated as a solid fill color (e.g. `"#000000"`).
+   * - `VargElement<"image">` — a generated or static image, rendered and
+   *   used as a full-bleed cover background.
+   * - `VargElement<"video">` — a generated or static video, rendered and
+   *   used as a looping full-bleed cover background.
+   */
+  background?: VargElement<"image"> | VargElement<"video"> | string;
   logo?: string;
   /**
    * Logo position on screen.
@@ -276,6 +285,8 @@ export interface RenderOptions {
   defaults?: DefaultModels;
   backend?: FFmpegBackend;
   storage?: StorageProvider;
+  /** Max concurrent clip renders. Defaults to unlimited. */
+  concurrency?: number;
 }
 // Re-export from file module for convenience