npm - vargai - Versions diffs - 0.4.0-alpha61 → 0.4.0-alpha62 - Mend

vargai 0.4.0-alpha61 → 0.4.0-alpha62

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/examples/grok-imagine-ai-sdk.tsx +9 -3
package/package.json +1 -1
package/src/ai-sdk/providers/editly/index.ts +7 -1
package/src/ai-sdk/providers/fal.ts +13 -1
package/src/definitions/actions/sync.ts +54 -12
package/src/definitions/models/index.ts +6 -0
package/src/definitions/models/omnihuman.ts +71 -0
package/src/definitions/models/veed-fabric.ts +49 -0
package/src/providers/fal.ts +85 -0
package/src/react/examples/veed-fabric-long-talking-head.tsx +75 -0
package/src/react/examples/veed-fabric-react-test.tsx +60 -0

package/examples/grok-imagine-ai-sdk.tsx CHANGED Viewed

@@ -40,7 +40,9 @@ async function testGrokTextToVideo() {
   // Save the video
   const outputPath = join(import.meta.dir, "../output/grok-t2v-test.mp4");
-  await writeFile(outputPath, result.videos[0]!);
+  const firstVideo = result.videos[0];
+  if (!firstVideo) throw new Error("No video returned from model");
+  await writeFile(outputPath, firstVideo);
   console.log(`Video saved to: ${outputPath}`);
   return outputPath;
@@ -88,7 +90,9 @@ async function testGrokImageToVideo() {
   // Save the video
   const outputPath = join(import.meta.dir, "../output/grok-i2v-test.mp4");
-  await writeFile(outputPath, result.videos[0]!);
+  const firstVideo = result.videos[0];
+  if (!firstVideo) throw new Error("No video returned from model");
+  await writeFile(outputPath, firstVideo);
   console.log(`Video saved to: ${outputPath}`);
   return outputPath;
@@ -136,7 +140,9 @@ async function testGrokEditVideo() {
   // Save the video
   const outputPath = join(import.meta.dir, "../output/grok-edit-test.mp4");
-  await writeFile(outputPath, result.videos[0]!);
+  const firstVideo = result.videos[0];
+  if (!firstVideo) throw new Error("No video returned from model");
+  await writeFile(outputPath, firstVideo);
   console.log(`Video saved to: ${outputPath}`);
   return outputPath;

package/package.json CHANGED Viewed

@@ -70,7 +70,7 @@
     "zod": "^4.2.1"
   },
   "sideEffects": false,
-  "version": "0.4.0-alpha61",
+  "version": "0.4.0-alpha62",
   "exports": {
     ".": "./src/index.ts",
     "./ai": "./src/ai-sdk/index.ts",

package/src/ai-sdk/providers/editly/index.ts CHANGED Viewed

@@ -269,6 +269,12 @@ function buildBaseClipFilter(
     const layer = clipLocalOverlays[i];
     if (!layer) continue;
+    if (!baseLabel) {
+      throw new Error(
+        `Clip ${clipIndex} is missing a base layer for overlay placement — ensure it has at least one visual layer (video, image, or fill-color)`,
+      );
+    }
     const overlayFilter = getVideoFilter(
       layer,
       inputIdx,
@@ -283,7 +289,7 @@ function buildBaseClipFilter(
     const outputLabel = `clip${clipIndex}ov${i}`;
     const positionFilter = getOverlayFilter(
-      baseLabel!,
+      baseLabel,
       overlayFilter.outputLabel,
       layer,
       width,

package/src/ai-sdk/providers/fal.ts CHANGED Viewed

@@ -164,6 +164,8 @@ const LIPSYNC_MODELS: Record<string, string> = {
   "sync-v2": "fal-ai/sync-lipsync",
   "sync-v2-pro": "fal-ai/sync-lipsync/v2",
   lipsync: "fal-ai/sync-lipsync",
+  "omnihuman-v1.5": "fal-ai/bytedance/omnihuman/v1.5",
+  "veed-fabric-1.0": "veed/fabric-1.0",
 };
 const IMAGE_MODELS: Record<string, string> = {
@@ -474,20 +476,30 @@ class FalVideoModel implements VideoModelV3 {
     };
     if (isLipsync) {
-      // Lipsync: video + audio input
+      // Lipsync: either (video + audio) or (image + audio), depending on model
       const videoFile = files?.find((f) =>
         getMediaType(f)?.startsWith("video/"),
       );
+      const imageFile = files?.find((f) =>
+        getMediaType(f)?.startsWith("image/"),
+      );
       const audioFile = files?.find((f) =>
         getMediaType(f)?.startsWith("audio/"),
       );
       if (videoFile) {
         input.video_url = await fileToUrl(videoFile);
+      } else if (imageFile) {
+        input.image_url = await fileToUrl(imageFile);
       }
       if (audioFile) {
         input.audio_url = await fileToUrl(audioFile);
       }
+      // OmniHuman supports an optional prompt
+      if (prompt && this.modelId === "omnihuman-v1.5") {
+        input.prompt = prompt;
+      }
     } else if (isMotionControl) {
       // Motion control: image + reference video input
       if (prompt) {

package/src/definitions/actions/sync.ts CHANGED Viewed

@@ -15,6 +15,11 @@ import { ffmpegProvider } from "../../providers/ffmpeg";
 // Input schema with Zod
 const syncInputSchema = z.object({
+  model: z
+    .enum(["wan-25", "omnihuman-v1.5", "veed-fabric-1.0"])
+    .optional()
+    .default("wan-25")
+    .describe("Lip sync / avatar backend model"),
   image: filePathSchema.describe("Input image"),
   audio: filePathSchema.describe("Audio file"),
   prompt: z.string().describe("Description of the scene"),
@@ -40,13 +45,14 @@ export const definition: ActionDefinition<typeof schema> = {
   schema,
   routes: [],
   execute: async (inputs) => {
-    const { image, audio, prompt, duration, resolution } = inputs;
-    return lipsync({ image, audio, prompt, duration, resolution });
+    const { model, image, audio, prompt, duration, resolution } = inputs;
+    return lipsync({ model, image, audio, prompt, duration, resolution });
   },
 };
 // Types
 export interface LipsyncOptions {
+  model?: "wan-25" | "omnihuman-v1.5" | "veed-fabric-1.0";
   image: string;
   audio: string;
   prompt: string;
@@ -65,20 +71,56 @@ export interface Wav2LipOptions {
 }
 /**
- * Generate lip-synced video using Wan-25
+ * Generate lip-synced / avatar video using selected backend.
  */
 export async function lipsync(options: LipsyncOptions): Promise<LipsyncResult> {
-  const { image, audio, prompt, duration = "5", resolution = "480p" } = options;
+  const {
+    model = "wan-25",
+    image,
+    audio,
+    prompt,
+    duration = "5",
+    resolution = "480p",
+  } = options;
-  console.log("[sync] generating lip-synced video with wan-25...");
+  console.log(`[sync] generating lip-synced video with ${model}...`);
-  const result = await falProvider.wan25({
-    imageUrl: image,
-    audioUrl: audio,
-    prompt,
-    duration,
-    resolution,
-  });
+  if (model === "omnihuman-v1.5" && resolution === "480p") {
+    console.warn(
+      "[sync] omnihuman-v1.5 does not support 480p; using 720p instead",
+    );
+  }
+  if (model === "veed-fabric-1.0" && resolution === "1080p") {
+    console.warn(
+      "[sync] veed-fabric-1.0 does not support 1080p; using 720p instead",
+    );
+  }
+  const result =
+    model === "omnihuman-v1.5"
+      ? await falProvider.omnihuman15({
+          imageUrl: image,
+          audioUrl: audio,
+          prompt,
+          resolution: (resolution === "480p" ? "720p" : resolution) as
+            | "720p"
+            | "1080p",
+        })
+      : model === "veed-fabric-1.0"
+        ? await falProvider.veedFabric10({
+            imageUrl: image,
+            audioUrl: audio,
+            resolution: (resolution === "1080p" ? "720p" : resolution) as
+              | "480p"
+              | "720p",
+          })
+        : await falProvider.wan25({
+            imageUrl: image,
+            audioUrl: audio,
+            prompt,
+            duration,
+            resolution,
+          });
   const videoUrl = result.data?.video?.url;
   if (!videoUrl) {

package/src/definitions/models/index.ts CHANGED Viewed

@@ -7,8 +7,10 @@ export { definition as flux } from "./flux";
 export { definition as kling } from "./kling";
 export { definition as llama } from "./llama";
 export { definition as nanoBananaPro } from "./nano-banana-pro";
+export { definition as omnihuman } from "./omnihuman";
 export { definition as sonauto } from "./sonauto";
 export { definition as soul } from "./soul";
+export { definition as veedFabric } from "./veed-fabric";
 export { definition as wan } from "./wan";
 export { definition as whisper } from "./whisper";
@@ -18,8 +20,10 @@ import { definition as fluxDefinition } from "./flux";
 import { definition as klingDefinition } from "./kling";
 import { definition as llamaDefinition } from "./llama";
 import { definition as nanoBananaProDefinition } from "./nano-banana-pro";
+import { definition as omnihumanDefinition } from "./omnihuman";
 import { definition as sonautoDefinition } from "./sonauto";
 import { definition as soulDefinition } from "./soul";
+import { definition as veedFabricDefinition } from "./veed-fabric";
 import { definition as wanDefinition } from "./wan";
 import { definition as whisperDefinition } from "./whisper";
@@ -28,6 +32,8 @@ export const allModels = [
   fluxDefinition,
   nanoBananaProDefinition,
   wanDefinition,
+  omnihumanDefinition,
+  veedFabricDefinition,
   whisperDefinition,
   elevenlabsDefinition,
   soulDefinition,

package/src/definitions/models/omnihuman.ts ADDED Viewed

@@ -0,0 +1,71 @@
+/**
+ * Bytedance OmniHuman v1.5
+ * Image + audio -> video (full-body human animation)
+ */
+import { z } from "zod";
+import { urlSchema } from "../../core/schema/shared";
+import type { ModelDefinition, ZodSchema } from "../../core/schema/types";
+const omnihumanResolutionSchema = z
+  .enum(["720p", "1080p"])
+  .describe("Output resolution");
+// Input schema with Zod
+const omnihumanInputSchema = z.object({
+  prompt: z
+    .string()
+    .optional()
+    .describe("The text prompt used to guide the video generation"),
+  image_url: urlSchema.describe(
+    "The URL of the image used to generate the video",
+  ),
+  audio_url: urlSchema.describe(
+    "The URL of the audio file to generate the video",
+  ),
+  turbo_mode: z
+    .boolean()
+    .optional()
+    .default(false)
+    .describe("Faster generation with slight quality trade-off"),
+  resolution: omnihumanResolutionSchema
+    .optional()
+    .default("1080p")
+    .describe(
+      "The resolution of the generated video. 720p generation is faster and higher in quality",
+    ),
+});
+// Output schema with Zod
+const omnihumanOutputSchema = z.object({
+  video: z.object({
+    url: z.string(),
+  }),
+  duration: z
+    .number()
+    .optional()
+    .describe("Duration of audio input/video output as used for billing"),
+});
+const schema: ZodSchema<
+  typeof omnihumanInputSchema,
+  typeof omnihumanOutputSchema
+> = {
+  input: omnihumanInputSchema,
+  output: omnihumanOutputSchema,
+};
+export const definition: ModelDefinition<typeof schema> = {
+  type: "model",
+  name: "omnihuman",
+  description:
+    "OmniHuman v1.5 - generate a vivid talking video from an image and an audio file",
+  providers: ["fal"],
+  defaultProvider: "fal",
+  providerModels: {
+    fal: "fal-ai/bytedance/omnihuman/v1.5",
+  },
+  schema,
+};
+export default definition;

package/src/definitions/models/veed-fabric.ts ADDED Viewed

@@ -0,0 +1,49 @@
+/**
+ * VEED Fabric 1.0
+ * Image + audio -> talking video
+ */
+import { z } from "zod";
+import { urlSchema } from "../../core/schema/shared";
+import type { ModelDefinition, ZodSchema } from "../../core/schema/types";
+const fabricResolutionSchema = z
+  .enum(["480p", "720p"])
+  .describe("Output resolution");
+// Input schema with Zod
+const veedFabricInputSchema = z.object({
+  image_url: urlSchema.describe("Input image URL"),
+  audio_url: urlSchema.describe("Input audio URL"),
+  resolution: fabricResolutionSchema.describe("Output resolution"),
+});
+// Output schema with Zod
+const veedFabricOutputSchema = z.object({
+  video: z.object({
+    content_type: z.string().optional(),
+    url: z.string().url(),
+  }),
+});
+const schema: ZodSchema<
+  typeof veedFabricInputSchema,
+  typeof veedFabricOutputSchema
+> = {
+  input: veedFabricInputSchema,
+  output: veedFabricOutputSchema,
+};
+export const definition: ModelDefinition<typeof schema> = {
+  type: "model",
+  name: "veed-fabric",
+  description: "VEED Fabric 1.0 - turn an image into a talking video",
+  providers: ["fal"],
+  defaultProvider: "fal",
+  providerModels: {
+    fal: "veed/fabric-1.0",
+  },
+  schema,
+};
+export default definition;

package/src/providers/fal.ts CHANGED Viewed

@@ -332,6 +332,86 @@ export class FalProvider extends BaseProvider {
     return result;
   }
+  async omnihuman15(args: {
+    imageUrl: string;
+    audioUrl: string;
+    prompt?: string;
+    turboMode?: boolean;
+    resolution?: "720p" | "1080p";
+  }) {
+    const modelId: string = "fal-ai/bytedance/omnihuman/v1.5";
+    console.log(`[fal] starting omnihuman v1.5: ${modelId}`);
+    const imageUrl = await ensureUrl(args.imageUrl, (buffer) =>
+      this.uploadFile(buffer),
+    );
+    const audioUrl = await ensureUrl(args.audioUrl, (buffer) =>
+      this.uploadFile(buffer),
+    );
+    const input: Record<string, unknown> = {
+      ...(args.prompt ? { prompt: args.prompt } : {}),
+      image_url: imageUrl,
+      audio_url: audioUrl,
+      turbo_mode: args.turboMode ?? false,
+      resolution: args.resolution ?? "1080p",
+    };
+    const result = await fal.subscribe(modelId, {
+      input,
+      logs: true,
+      onQueueUpdate: (update) => {
+        if (update.status === "IN_PROGRESS") {
+          console.log(
+            `[fal] ${update.logs?.map((l) => l.message).join(" ") || "processing..."}`,
+          );
+        }
+      },
+    });
+    console.log("[fal] completed!");
+    return result;
+  }
+  async veedFabric10(args: {
+    imageUrl: string;
+    audioUrl: string;
+    resolution: "480p" | "720p";
+  }) {
+    const modelId: string = "veed/fabric-1.0";
+    console.log(`[fal] starting veed fabric 1.0: ${modelId}`);
+    const imageUrl = await ensureUrl(args.imageUrl, (buffer) =>
+      this.uploadFile(buffer),
+    );
+    const audioUrl = await ensureUrl(args.audioUrl, (buffer) =>
+      this.uploadFile(buffer),
+    );
+    const input: Record<string, unknown> = {
+      image_url: imageUrl,
+      audio_url: audioUrl,
+      resolution: args.resolution,
+    };
+    const result = await fal.subscribe(modelId, {
+      input,
+      logs: true,
+      onQueueUpdate: (update) => {
+        if (update.status === "IN_PROGRESS") {
+          console.log(
+            `[fal] ${update.logs?.map((l) => l.message).join(" ") || "processing..."}`,
+          );
+        }
+      },
+    });
+    console.log("[fal] completed!");
+    return result;
+  }
   async textToMusic(args: {
     prompt?: string;
     tags?: string[];
@@ -584,5 +664,10 @@ export const imageToImage = (
 ) => falProvider.imageToImage(args);
 export const wan25 = (args: Parameters<FalProvider["wan25"]>[0]) =>
   falProvider.wan25(args);
+export const omnihuman15 = (args: Parameters<FalProvider["omnihuman15"]>[0]) =>
+  falProvider.omnihuman15(args);
+export const veedFabric10 = (
+  args: Parameters<FalProvider["veedFabric10"]>[0],
+) => falProvider.veedFabric10(args);
 export const textToMusic = (args: Parameters<FalProvider["textToMusic"]>[0]) =>
   falProvider.textToMusic(args);

package/src/react/examples/veed-fabric-long-talking-head.tsx ADDED Viewed

@@ -0,0 +1,75 @@
+/**
+ * Longer talking head demo (VEED Fabric 1.0):
+ * - character image from nano-banana-pro
+ * - voice from ElevenLabs
+ * - talking video from veed/fabric-1.0 (image + audio)
+ *
+ * Run: bun run src/react/examples/veed-fabric-long-talking-head.tsx
+ * Output: output/veed-fabric-long-talking-head.mp4
+ */
+import { elevenlabs, fal } from "../../ai-sdk";
+import { Clip, Image, Render, render, Speech, Video } from "..";
+const SCRIPT =
+  "Hey, I am Nova. In this quick demo, you will hear a clean voiceover, and see a talking avatar generated from a single portrait. We are using VEED Fabric for image-to-video lipsync, and ElevenLabs for the voice.";
+const portrait = Image({
+  prompt:
+    "Ultra-realistic studio portrait of Nova, a confident friendly product designer in her early 30s, warm smile, expressive eyes, subtle freckles, natural makeup, shoulder-length dark auburn hair, modern minimal wardrobe, cinematic softbox lighting, shallow depth of field, clean neutral background, high-end camera look",
+  model: fal.imageModel("nano-banana-pro"),
+  aspectRatio: "9:16",
+});
+const voiceover = Speech({
+  model: elevenlabs.speechModel("eleven_v3"),
+  voice: "adam",
+  children: SCRIPT,
+});
+const talking = Video({
+  model: fal.videoModel("veed-fabric-1.0"),
+  keepAudio: true,
+  prompt: {
+    images: [portrait],
+    audio: voiceover,
+  },
+  providerOptions: {
+    fal: {
+      resolution: "720p",
+    },
+  },
+});
+const demo = (
+  <Render width={1080} height={1920}>
+    <Clip duration="auto">{talking}</Clip>
+  </Render>
+);
+async function main() {
+  if (!process.env.FAL_API_KEY && !process.env.FAL_KEY) {
+    console.error("ERROR: FAL_API_KEY/FAL_KEY not found in environment");
+    process.exit(1);
+  }
+  if (!process.env.ELEVENLABS_API_KEY) {
+    console.error("ERROR: ELEVENLABS_API_KEY not found in environment");
+    process.exit(1);
+  }
+  const result = await render(demo, {
+    output: "output/veed-fabric-long-talking-head.mp4",
+    cache: ".cache/ai-veed-fabric-long-talking-head",
+  });
+  console.log(
+    `ok: output/veed-fabric-long-talking-head.mp4 (${(result.video.byteLength / 1024 / 1024).toFixed(2)} MB)`,
+  );
+}
+if (import.meta.main) {
+  main().catch((err) => {
+    console.error(err);
+    process.exit(1);
+  });
+}

package/src/react/examples/veed-fabric-react-test.tsx ADDED Viewed

@@ -0,0 +1,60 @@
+/**
+ * VEED Fabric 1.0 React syntax test
+ *
+ * Uses a local image + local audio file to generate a talking video.
+ *
+ * Run: bun run src/react/examples/veed-fabric-react-test.tsx
+ * Output: output/veed-fabric-react-test.mp4
+ */
+import { fal } from "../../ai-sdk/providers/fal";
+import { Clip, Render, render, Video } from "..";
+const IMAGE_PATH = "output/garry-tan-image.png";
+const AUDIO_PATH = "output/garry-tan-voice.mp3";
+const RESOLUTION =
+  (process.env.FABRIC_RESOLUTION as "480p" | "720p" | undefined) ?? "720p";
+const video = (
+  <Render width={720} height={1280}>
+    <Clip duration={5}>
+      <Video
+        model={fal.videoModel("veed-fabric-1.0")}
+        keepAudio
+        prompt={{
+          images: [IMAGE_PATH],
+          audio: AUDIO_PATH,
+        }}
+        providerOptions={{
+          fal: {
+            resolution: RESOLUTION,
+          },
+        }}
+      />
+    </Clip>
+  </Render>
+);
+async function main() {
+  if (!process.env.FAL_API_KEY && !process.env.FAL_KEY) {
+    console.error("ERROR: FAL_API_KEY/FAL_KEY not found in environment");
+    process.exit(1);
+  }
+  const result = await render(video, {
+    output: `output/veed-fabric-react-test-${RESOLUTION}.mp4`,
+    cache: `.cache/ai-veed-fabric-${RESOLUTION}-keepaudio`,
+  });
+  console.log(
+    `ok: output/veed-fabric-react-test-${RESOLUTION}.mp4 (${(result.video.byteLength / 1024 / 1024).toFixed(2)} MB)`,
+  );
+}
+if (import.meta.main) {
+  main().catch((err) => {
+    console.error(err);
+    process.exit(1);
+  });
+}