vargai 0.4.0-alpha96 → 0.4.0-alpha97
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -104,7 +104,7 @@
|
|
|
104
104
|
"license": "Apache-2.0",
|
|
105
105
|
"author": "varg.ai <hello@varg.ai> (https://varg.ai)",
|
|
106
106
|
"sideEffects": false,
|
|
107
|
-
"version": "0.4.0-
|
|
107
|
+
"version": "0.4.0-alpha97",
|
|
108
108
|
"exports": {
|
|
109
109
|
".": "./src/index.ts",
|
|
110
110
|
"./ai": "./src/ai-sdk/index.ts",
|
|
@@ -54,12 +54,16 @@ export async function renderTalkingHead(
|
|
|
54
54
|
const characterImageData = await characterFile.arrayBuffer();
|
|
55
55
|
const speechAudioData = await speechFile.arrayBuffer();
|
|
56
56
|
|
|
57
|
-
// Create a synthetic video element for the lipsync generation
|
|
57
|
+
// Create a synthetic video element for the lipsync generation.
|
|
58
|
+
// Lipsync models (sync-v2-pro, etc.) require `video_url`, not `image_url`,
|
|
59
|
+
// so we pass the character image as the `video` input. The fal provider will
|
|
60
|
+
// upload it and set `video_url` in the API request. Fal.ai accepts image
|
|
61
|
+
// files as the video input for lipsync — it treats them as single-frame video.
|
|
58
62
|
const videoElement: VargElement<"video"> = {
|
|
59
63
|
type: "video",
|
|
60
64
|
props: {
|
|
61
65
|
prompt: {
|
|
62
|
-
|
|
66
|
+
video: characterImageData,
|
|
63
67
|
audio: speechAudioData,
|
|
64
68
|
},
|
|
65
69
|
model: lipsyncModel,
|
package/src/react/resolve.ts
CHANGED
|
@@ -808,10 +808,12 @@ export async function resolveTalkingHeadElement(
|
|
|
808
808
|
const lipsyncModel = props.lipsyncModel ?? model;
|
|
809
809
|
const generateVideo = getCachedGenerateVideo();
|
|
810
810
|
|
|
811
|
+
// Lipsync models require `video_url`, not `image_url`, so pass the
|
|
812
|
+
// character image as the `video` input (fal accepts images as video input).
|
|
811
813
|
const { video } = await generateVideo({
|
|
812
814
|
model: lipsyncModel as Parameters<typeof generateVideoRaw>[0]["model"],
|
|
813
815
|
prompt: {
|
|
814
|
-
|
|
816
|
+
video: characterBytes,
|
|
815
817
|
audio: speechBytes,
|
|
816
818
|
},
|
|
817
819
|
duration: 0, // duration determined by audio length
|