npm - vargai - Versions diffs - 0.4.0-alpha96 → 0.4.0-alpha97 - Mend

vargai 0.4.0-alpha96 → 0.4.0-alpha97

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/package.json +1 -1
package/src/react/renderers/talking-head.ts +6 -2
package/src/react/resolve.ts +3 -1

package/package.json CHANGED Viewed

@@ -104,7 +104,7 @@
   "license": "Apache-2.0",
   "author": "varg.ai <hello@varg.ai> (https://varg.ai)",
   "sideEffects": false,
-  "version": "0.4.0-alpha96",
+  "version": "0.4.0-alpha97",
   "exports": {
     ".": "./src/index.ts",
     "./ai": "./src/ai-sdk/index.ts",

package/src/react/renderers/talking-head.ts CHANGED Viewed

@@ -54,12 +54,16 @@ export async function renderTalkingHead(
   const characterImageData = await characterFile.arrayBuffer();
   const speechAudioData = await speechFile.arrayBuffer();
-  // Create a synthetic video element for the lipsync generation
+  // Create a synthetic video element for the lipsync generation.
+  // Lipsync models (sync-v2-pro, etc.) require `video_url`, not `image_url`,
+  // so we pass the character image as the `video` input. The fal provider will
+  // upload it and set `video_url` in the API request. Fal.ai accepts image
+  // files as the video input for lipsync — it treats them as single-frame video.
   const videoElement: VargElement<"video"> = {
     type: "video",
     props: {
       prompt: {
-        images: [characterImageData],
+        video: characterImageData,
         audio: speechAudioData,
       },
       model: lipsyncModel,

package/src/react/resolve.ts CHANGED Viewed

@@ -808,10 +808,12 @@ export async function resolveTalkingHeadElement(
   const lipsyncModel = props.lipsyncModel ?? model;
   const generateVideo = getCachedGenerateVideo();
+  // Lipsync models require `video_url`, not `image_url`, so pass the
+  // character image as the `video` input (fal accepts images as video input).
   const { video } = await generateVideo({
     model: lipsyncModel as Parameters<typeof generateVideoRaw>[0]["model"],
     prompt: {
-      images: [characterBytes],
+      video: characterBytes,
       audio: speechBytes,
     },
     duration: 0, // duration determined by audio length