vargai 0.4.0-alpha61 → 0.4.0-alpha62
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/examples/grok-imagine-ai-sdk.tsx +9 -3
- package/package.json +1 -1
- package/src/ai-sdk/providers/editly/index.ts +7 -1
- package/src/ai-sdk/providers/fal.ts +13 -1
- package/src/definitions/actions/sync.ts +54 -12
- package/src/definitions/models/index.ts +6 -0
- package/src/definitions/models/omnihuman.ts +71 -0
- package/src/definitions/models/veed-fabric.ts +49 -0
- package/src/providers/fal.ts +85 -0
- package/src/react/examples/veed-fabric-long-talking-head.tsx +75 -0
- package/src/react/examples/veed-fabric-react-test.tsx +60 -0
|
@@ -40,7 +40,9 @@ async function testGrokTextToVideo() {
|
|
|
40
40
|
|
|
41
41
|
// Save the video
|
|
42
42
|
const outputPath = join(import.meta.dir, "../output/grok-t2v-test.mp4");
|
|
43
|
-
|
|
43
|
+
const firstVideo = result.videos[0];
|
|
44
|
+
if (!firstVideo) throw new Error("No video returned from model");
|
|
45
|
+
await writeFile(outputPath, firstVideo);
|
|
44
46
|
console.log(`Video saved to: ${outputPath}`);
|
|
45
47
|
|
|
46
48
|
return outputPath;
|
|
@@ -88,7 +90,9 @@ async function testGrokImageToVideo() {
|
|
|
88
90
|
|
|
89
91
|
// Save the video
|
|
90
92
|
const outputPath = join(import.meta.dir, "../output/grok-i2v-test.mp4");
|
|
91
|
-
|
|
93
|
+
const firstVideo = result.videos[0];
|
|
94
|
+
if (!firstVideo) throw new Error("No video returned from model");
|
|
95
|
+
await writeFile(outputPath, firstVideo);
|
|
92
96
|
console.log(`Video saved to: ${outputPath}`);
|
|
93
97
|
|
|
94
98
|
return outputPath;
|
|
@@ -136,7 +140,9 @@ async function testGrokEditVideo() {
|
|
|
136
140
|
|
|
137
141
|
// Save the video
|
|
138
142
|
const outputPath = join(import.meta.dir, "../output/grok-edit-test.mp4");
|
|
139
|
-
|
|
143
|
+
const firstVideo = result.videos[0];
|
|
144
|
+
if (!firstVideo) throw new Error("No video returned from model");
|
|
145
|
+
await writeFile(outputPath, firstVideo);
|
|
140
146
|
console.log(`Video saved to: ${outputPath}`);
|
|
141
147
|
|
|
142
148
|
return outputPath;
|
package/package.json
CHANGED
|
@@ -269,6 +269,12 @@ function buildBaseClipFilter(
|
|
|
269
269
|
const layer = clipLocalOverlays[i];
|
|
270
270
|
if (!layer) continue;
|
|
271
271
|
|
|
272
|
+
if (!baseLabel) {
|
|
273
|
+
throw new Error(
|
|
274
|
+
`Clip ${clipIndex} is missing a base layer for overlay placement — ensure it has at least one visual layer (video, image, or fill-color)`,
|
|
275
|
+
);
|
|
276
|
+
}
|
|
277
|
+
|
|
272
278
|
const overlayFilter = getVideoFilter(
|
|
273
279
|
layer,
|
|
274
280
|
inputIdx,
|
|
@@ -283,7 +289,7 @@ function buildBaseClipFilter(
|
|
|
283
289
|
|
|
284
290
|
const outputLabel = `clip${clipIndex}ov${i}`;
|
|
285
291
|
const positionFilter = getOverlayFilter(
|
|
286
|
-
baseLabel
|
|
292
|
+
baseLabel,
|
|
287
293
|
overlayFilter.outputLabel,
|
|
288
294
|
layer,
|
|
289
295
|
width,
|
|
@@ -164,6 +164,8 @@ const LIPSYNC_MODELS: Record<string, string> = {
|
|
|
164
164
|
"sync-v2": "fal-ai/sync-lipsync",
|
|
165
165
|
"sync-v2-pro": "fal-ai/sync-lipsync/v2",
|
|
166
166
|
lipsync: "fal-ai/sync-lipsync",
|
|
167
|
+
"omnihuman-v1.5": "fal-ai/bytedance/omnihuman/v1.5",
|
|
168
|
+
"veed-fabric-1.0": "veed/fabric-1.0",
|
|
167
169
|
};
|
|
168
170
|
|
|
169
171
|
const IMAGE_MODELS: Record<string, string> = {
|
|
@@ -474,20 +476,30 @@ class FalVideoModel implements VideoModelV3 {
|
|
|
474
476
|
};
|
|
475
477
|
|
|
476
478
|
if (isLipsync) {
|
|
477
|
-
// Lipsync: video + audio
|
|
479
|
+
// Lipsync: either (video + audio) or (image + audio), depending on model
|
|
478
480
|
const videoFile = files?.find((f) =>
|
|
479
481
|
getMediaType(f)?.startsWith("video/"),
|
|
480
482
|
);
|
|
483
|
+
const imageFile = files?.find((f) =>
|
|
484
|
+
getMediaType(f)?.startsWith("image/"),
|
|
485
|
+
);
|
|
481
486
|
const audioFile = files?.find((f) =>
|
|
482
487
|
getMediaType(f)?.startsWith("audio/"),
|
|
483
488
|
);
|
|
484
489
|
|
|
485
490
|
if (videoFile) {
|
|
486
491
|
input.video_url = await fileToUrl(videoFile);
|
|
492
|
+
} else if (imageFile) {
|
|
493
|
+
input.image_url = await fileToUrl(imageFile);
|
|
487
494
|
}
|
|
488
495
|
if (audioFile) {
|
|
489
496
|
input.audio_url = await fileToUrl(audioFile);
|
|
490
497
|
}
|
|
498
|
+
|
|
499
|
+
// OmniHuman supports an optional prompt
|
|
500
|
+
if (prompt && this.modelId === "omnihuman-v1.5") {
|
|
501
|
+
input.prompt = prompt;
|
|
502
|
+
}
|
|
491
503
|
} else if (isMotionControl) {
|
|
492
504
|
// Motion control: image + reference video input
|
|
493
505
|
if (prompt) {
|
|
@@ -15,6 +15,11 @@ import { ffmpegProvider } from "../../providers/ffmpeg";
|
|
|
15
15
|
|
|
16
16
|
// Input schema with Zod
|
|
17
17
|
const syncInputSchema = z.object({
|
|
18
|
+
model: z
|
|
19
|
+
.enum(["wan-25", "omnihuman-v1.5", "veed-fabric-1.0"])
|
|
20
|
+
.optional()
|
|
21
|
+
.default("wan-25")
|
|
22
|
+
.describe("Lip sync / avatar backend model"),
|
|
18
23
|
image: filePathSchema.describe("Input image"),
|
|
19
24
|
audio: filePathSchema.describe("Audio file"),
|
|
20
25
|
prompt: z.string().describe("Description of the scene"),
|
|
@@ -40,13 +45,14 @@ export const definition: ActionDefinition<typeof schema> = {
|
|
|
40
45
|
schema,
|
|
41
46
|
routes: [],
|
|
42
47
|
execute: async (inputs) => {
|
|
43
|
-
const { image, audio, prompt, duration, resolution } = inputs;
|
|
44
|
-
return lipsync({ image, audio, prompt, duration, resolution });
|
|
48
|
+
const { model, image, audio, prompt, duration, resolution } = inputs;
|
|
49
|
+
return lipsync({ model, image, audio, prompt, duration, resolution });
|
|
45
50
|
},
|
|
46
51
|
};
|
|
47
52
|
|
|
48
53
|
// Types
|
|
49
54
|
export interface LipsyncOptions {
|
|
55
|
+
model?: "wan-25" | "omnihuman-v1.5" | "veed-fabric-1.0";
|
|
50
56
|
image: string;
|
|
51
57
|
audio: string;
|
|
52
58
|
prompt: string;
|
|
@@ -65,20 +71,56 @@ export interface Wav2LipOptions {
|
|
|
65
71
|
}
|
|
66
72
|
|
|
67
73
|
/**
|
|
68
|
-
* Generate lip-synced video using
|
|
74
|
+
* Generate lip-synced / avatar video using selected backend.
|
|
69
75
|
*/
|
|
70
76
|
export async function lipsync(options: LipsyncOptions): Promise<LipsyncResult> {
|
|
71
|
-
const {
|
|
77
|
+
const {
|
|
78
|
+
model = "wan-25",
|
|
79
|
+
image,
|
|
80
|
+
audio,
|
|
81
|
+
prompt,
|
|
82
|
+
duration = "5",
|
|
83
|
+
resolution = "480p",
|
|
84
|
+
} = options;
|
|
72
85
|
|
|
73
|
-
console.log(
|
|
86
|
+
console.log(`[sync] generating lip-synced video with ${model}...`);
|
|
74
87
|
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
88
|
+
if (model === "omnihuman-v1.5" && resolution === "480p") {
|
|
89
|
+
console.warn(
|
|
90
|
+
"[sync] omnihuman-v1.5 does not support 480p; using 720p instead",
|
|
91
|
+
);
|
|
92
|
+
}
|
|
93
|
+
if (model === "veed-fabric-1.0" && resolution === "1080p") {
|
|
94
|
+
console.warn(
|
|
95
|
+
"[sync] veed-fabric-1.0 does not support 1080p; using 720p instead",
|
|
96
|
+
);
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
const result =
|
|
100
|
+
model === "omnihuman-v1.5"
|
|
101
|
+
? await falProvider.omnihuman15({
|
|
102
|
+
imageUrl: image,
|
|
103
|
+
audioUrl: audio,
|
|
104
|
+
prompt,
|
|
105
|
+
resolution: (resolution === "480p" ? "720p" : resolution) as
|
|
106
|
+
| "720p"
|
|
107
|
+
| "1080p",
|
|
108
|
+
})
|
|
109
|
+
: model === "veed-fabric-1.0"
|
|
110
|
+
? await falProvider.veedFabric10({
|
|
111
|
+
imageUrl: image,
|
|
112
|
+
audioUrl: audio,
|
|
113
|
+
resolution: (resolution === "1080p" ? "720p" : resolution) as
|
|
114
|
+
| "480p"
|
|
115
|
+
| "720p",
|
|
116
|
+
})
|
|
117
|
+
: await falProvider.wan25({
|
|
118
|
+
imageUrl: image,
|
|
119
|
+
audioUrl: audio,
|
|
120
|
+
prompt,
|
|
121
|
+
duration,
|
|
122
|
+
resolution,
|
|
123
|
+
});
|
|
82
124
|
|
|
83
125
|
const videoUrl = result.data?.video?.url;
|
|
84
126
|
if (!videoUrl) {
|
|
@@ -7,8 +7,10 @@ export { definition as flux } from "./flux";
|
|
|
7
7
|
export { definition as kling } from "./kling";
|
|
8
8
|
export { definition as llama } from "./llama";
|
|
9
9
|
export { definition as nanoBananaPro } from "./nano-banana-pro";
|
|
10
|
+
export { definition as omnihuman } from "./omnihuman";
|
|
10
11
|
export { definition as sonauto } from "./sonauto";
|
|
11
12
|
export { definition as soul } from "./soul";
|
|
13
|
+
export { definition as veedFabric } from "./veed-fabric";
|
|
12
14
|
export { definition as wan } from "./wan";
|
|
13
15
|
export { definition as whisper } from "./whisper";
|
|
14
16
|
|
|
@@ -18,8 +20,10 @@ import { definition as fluxDefinition } from "./flux";
|
|
|
18
20
|
import { definition as klingDefinition } from "./kling";
|
|
19
21
|
import { definition as llamaDefinition } from "./llama";
|
|
20
22
|
import { definition as nanoBananaProDefinition } from "./nano-banana-pro";
|
|
23
|
+
import { definition as omnihumanDefinition } from "./omnihuman";
|
|
21
24
|
import { definition as sonautoDefinition } from "./sonauto";
|
|
22
25
|
import { definition as soulDefinition } from "./soul";
|
|
26
|
+
import { definition as veedFabricDefinition } from "./veed-fabric";
|
|
23
27
|
import { definition as wanDefinition } from "./wan";
|
|
24
28
|
import { definition as whisperDefinition } from "./whisper";
|
|
25
29
|
|
|
@@ -28,6 +32,8 @@ export const allModels = [
|
|
|
28
32
|
fluxDefinition,
|
|
29
33
|
nanoBananaProDefinition,
|
|
30
34
|
wanDefinition,
|
|
35
|
+
omnihumanDefinition,
|
|
36
|
+
veedFabricDefinition,
|
|
31
37
|
whisperDefinition,
|
|
32
38
|
elevenlabsDefinition,
|
|
33
39
|
soulDefinition,
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Bytedance OmniHuman v1.5
|
|
3
|
+
* Image + audio -> video (full-body human animation)
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { z } from "zod";
|
|
7
|
+
import { urlSchema } from "../../core/schema/shared";
|
|
8
|
+
import type { ModelDefinition, ZodSchema } from "../../core/schema/types";
|
|
9
|
+
|
|
10
|
+
const omnihumanResolutionSchema = z
|
|
11
|
+
.enum(["720p", "1080p"])
|
|
12
|
+
.describe("Output resolution");
|
|
13
|
+
|
|
14
|
+
// Input schema with Zod
|
|
15
|
+
const omnihumanInputSchema = z.object({
|
|
16
|
+
prompt: z
|
|
17
|
+
.string()
|
|
18
|
+
.optional()
|
|
19
|
+
.describe("The text prompt used to guide the video generation"),
|
|
20
|
+
image_url: urlSchema.describe(
|
|
21
|
+
"The URL of the image used to generate the video",
|
|
22
|
+
),
|
|
23
|
+
audio_url: urlSchema.describe(
|
|
24
|
+
"The URL of the audio file to generate the video",
|
|
25
|
+
),
|
|
26
|
+
turbo_mode: z
|
|
27
|
+
.boolean()
|
|
28
|
+
.optional()
|
|
29
|
+
.default(false)
|
|
30
|
+
.describe("Faster generation with slight quality trade-off"),
|
|
31
|
+
resolution: omnihumanResolutionSchema
|
|
32
|
+
.optional()
|
|
33
|
+
.default("1080p")
|
|
34
|
+
.describe(
|
|
35
|
+
"The resolution of the generated video. 720p generation is faster and higher in quality",
|
|
36
|
+
),
|
|
37
|
+
});
|
|
38
|
+
|
|
39
|
+
// Output schema with Zod
|
|
40
|
+
const omnihumanOutputSchema = z.object({
|
|
41
|
+
video: z.object({
|
|
42
|
+
url: z.string(),
|
|
43
|
+
}),
|
|
44
|
+
duration: z
|
|
45
|
+
.number()
|
|
46
|
+
.optional()
|
|
47
|
+
.describe("Duration of audio input/video output as used for billing"),
|
|
48
|
+
});
|
|
49
|
+
|
|
50
|
+
const schema: ZodSchema<
|
|
51
|
+
typeof omnihumanInputSchema,
|
|
52
|
+
typeof omnihumanOutputSchema
|
|
53
|
+
> = {
|
|
54
|
+
input: omnihumanInputSchema,
|
|
55
|
+
output: omnihumanOutputSchema,
|
|
56
|
+
};
|
|
57
|
+
|
|
58
|
+
export const definition: ModelDefinition<typeof schema> = {
|
|
59
|
+
type: "model",
|
|
60
|
+
name: "omnihuman",
|
|
61
|
+
description:
|
|
62
|
+
"OmniHuman v1.5 - generate a vivid talking video from an image and an audio file",
|
|
63
|
+
providers: ["fal"],
|
|
64
|
+
defaultProvider: "fal",
|
|
65
|
+
providerModels: {
|
|
66
|
+
fal: "fal-ai/bytedance/omnihuman/v1.5",
|
|
67
|
+
},
|
|
68
|
+
schema,
|
|
69
|
+
};
|
|
70
|
+
|
|
71
|
+
export default definition;
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* VEED Fabric 1.0
|
|
3
|
+
* Image + audio -> talking video
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { z } from "zod";
|
|
7
|
+
import { urlSchema } from "../../core/schema/shared";
|
|
8
|
+
import type { ModelDefinition, ZodSchema } from "../../core/schema/types";
|
|
9
|
+
|
|
10
|
+
const fabricResolutionSchema = z
|
|
11
|
+
.enum(["480p", "720p"])
|
|
12
|
+
.describe("Output resolution");
|
|
13
|
+
|
|
14
|
+
// Input schema with Zod
|
|
15
|
+
const veedFabricInputSchema = z.object({
|
|
16
|
+
image_url: urlSchema.describe("Input image URL"),
|
|
17
|
+
audio_url: urlSchema.describe("Input audio URL"),
|
|
18
|
+
resolution: fabricResolutionSchema.describe("Output resolution"),
|
|
19
|
+
});
|
|
20
|
+
|
|
21
|
+
// Output schema with Zod
|
|
22
|
+
const veedFabricOutputSchema = z.object({
|
|
23
|
+
video: z.object({
|
|
24
|
+
content_type: z.string().optional(),
|
|
25
|
+
url: z.string().url(),
|
|
26
|
+
}),
|
|
27
|
+
});
|
|
28
|
+
|
|
29
|
+
const schema: ZodSchema<
|
|
30
|
+
typeof veedFabricInputSchema,
|
|
31
|
+
typeof veedFabricOutputSchema
|
|
32
|
+
> = {
|
|
33
|
+
input: veedFabricInputSchema,
|
|
34
|
+
output: veedFabricOutputSchema,
|
|
35
|
+
};
|
|
36
|
+
|
|
37
|
+
export const definition: ModelDefinition<typeof schema> = {
|
|
38
|
+
type: "model",
|
|
39
|
+
name: "veed-fabric",
|
|
40
|
+
description: "VEED Fabric 1.0 - turn an image into a talking video",
|
|
41
|
+
providers: ["fal"],
|
|
42
|
+
defaultProvider: "fal",
|
|
43
|
+
providerModels: {
|
|
44
|
+
fal: "veed/fabric-1.0",
|
|
45
|
+
},
|
|
46
|
+
schema,
|
|
47
|
+
};
|
|
48
|
+
|
|
49
|
+
export default definition;
|
package/src/providers/fal.ts
CHANGED
|
@@ -332,6 +332,86 @@ export class FalProvider extends BaseProvider {
|
|
|
332
332
|
return result;
|
|
333
333
|
}
|
|
334
334
|
|
|
335
|
+
async omnihuman15(args: {
|
|
336
|
+
imageUrl: string;
|
|
337
|
+
audioUrl: string;
|
|
338
|
+
prompt?: string;
|
|
339
|
+
turboMode?: boolean;
|
|
340
|
+
resolution?: "720p" | "1080p";
|
|
341
|
+
}) {
|
|
342
|
+
const modelId: string = "fal-ai/bytedance/omnihuman/v1.5";
|
|
343
|
+
|
|
344
|
+
console.log(`[fal] starting omnihuman v1.5: ${modelId}`);
|
|
345
|
+
|
|
346
|
+
const imageUrl = await ensureUrl(args.imageUrl, (buffer) =>
|
|
347
|
+
this.uploadFile(buffer),
|
|
348
|
+
);
|
|
349
|
+
const audioUrl = await ensureUrl(args.audioUrl, (buffer) =>
|
|
350
|
+
this.uploadFile(buffer),
|
|
351
|
+
);
|
|
352
|
+
|
|
353
|
+
const input: Record<string, unknown> = {
|
|
354
|
+
...(args.prompt ? { prompt: args.prompt } : {}),
|
|
355
|
+
image_url: imageUrl,
|
|
356
|
+
audio_url: audioUrl,
|
|
357
|
+
turbo_mode: args.turboMode ?? false,
|
|
358
|
+
resolution: args.resolution ?? "1080p",
|
|
359
|
+
};
|
|
360
|
+
|
|
361
|
+
const result = await fal.subscribe(modelId, {
|
|
362
|
+
input,
|
|
363
|
+
logs: true,
|
|
364
|
+
onQueueUpdate: (update) => {
|
|
365
|
+
if (update.status === "IN_PROGRESS") {
|
|
366
|
+
console.log(
|
|
367
|
+
`[fal] ${update.logs?.map((l) => l.message).join(" ") || "processing..."}`,
|
|
368
|
+
);
|
|
369
|
+
}
|
|
370
|
+
},
|
|
371
|
+
});
|
|
372
|
+
|
|
373
|
+
console.log("[fal] completed!");
|
|
374
|
+
return result;
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
async veedFabric10(args: {
|
|
378
|
+
imageUrl: string;
|
|
379
|
+
audioUrl: string;
|
|
380
|
+
resolution: "480p" | "720p";
|
|
381
|
+
}) {
|
|
382
|
+
const modelId: string = "veed/fabric-1.0";
|
|
383
|
+
|
|
384
|
+
console.log(`[fal] starting veed fabric 1.0: ${modelId}`);
|
|
385
|
+
|
|
386
|
+
const imageUrl = await ensureUrl(args.imageUrl, (buffer) =>
|
|
387
|
+
this.uploadFile(buffer),
|
|
388
|
+
);
|
|
389
|
+
const audioUrl = await ensureUrl(args.audioUrl, (buffer) =>
|
|
390
|
+
this.uploadFile(buffer),
|
|
391
|
+
);
|
|
392
|
+
|
|
393
|
+
const input: Record<string, unknown> = {
|
|
394
|
+
image_url: imageUrl,
|
|
395
|
+
audio_url: audioUrl,
|
|
396
|
+
resolution: args.resolution,
|
|
397
|
+
};
|
|
398
|
+
|
|
399
|
+
const result = await fal.subscribe(modelId, {
|
|
400
|
+
input,
|
|
401
|
+
logs: true,
|
|
402
|
+
onQueueUpdate: (update) => {
|
|
403
|
+
if (update.status === "IN_PROGRESS") {
|
|
404
|
+
console.log(
|
|
405
|
+
`[fal] ${update.logs?.map((l) => l.message).join(" ") || "processing..."}`,
|
|
406
|
+
);
|
|
407
|
+
}
|
|
408
|
+
},
|
|
409
|
+
});
|
|
410
|
+
|
|
411
|
+
console.log("[fal] completed!");
|
|
412
|
+
return result;
|
|
413
|
+
}
|
|
414
|
+
|
|
335
415
|
async textToMusic(args: {
|
|
336
416
|
prompt?: string;
|
|
337
417
|
tags?: string[];
|
|
@@ -584,5 +664,10 @@ export const imageToImage = (
|
|
|
584
664
|
) => falProvider.imageToImage(args);
|
|
585
665
|
export const wan25 = (args: Parameters<FalProvider["wan25"]>[0]) =>
|
|
586
666
|
falProvider.wan25(args);
|
|
667
|
+
export const omnihuman15 = (args: Parameters<FalProvider["omnihuman15"]>[0]) =>
|
|
668
|
+
falProvider.omnihuman15(args);
|
|
669
|
+
export const veedFabric10 = (
|
|
670
|
+
args: Parameters<FalProvider["veedFabric10"]>[0],
|
|
671
|
+
) => falProvider.veedFabric10(args);
|
|
587
672
|
export const textToMusic = (args: Parameters<FalProvider["textToMusic"]>[0]) =>
|
|
588
673
|
falProvider.textToMusic(args);
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Longer talking head demo (VEED Fabric 1.0):
|
|
3
|
+
* - character image from nano-banana-pro
|
|
4
|
+
* - voice from ElevenLabs
|
|
5
|
+
* - talking video from veed/fabric-1.0 (image + audio)
|
|
6
|
+
*
|
|
7
|
+
* Run: bun run src/react/examples/veed-fabric-long-talking-head.tsx
|
|
8
|
+
* Output: output/veed-fabric-long-talking-head.mp4
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
import { elevenlabs, fal } from "../../ai-sdk";
|
|
12
|
+
import { Clip, Image, Render, render, Speech, Video } from "..";
|
|
13
|
+
|
|
14
|
+
const SCRIPT =
|
|
15
|
+
"Hey, I am Nova. In this quick demo, you will hear a clean voiceover, and see a talking avatar generated from a single portrait. We are using VEED Fabric for image-to-video lipsync, and ElevenLabs for the voice.";
|
|
16
|
+
|
|
17
|
+
const portrait = Image({
|
|
18
|
+
prompt:
|
|
19
|
+
"Ultra-realistic studio portrait of Nova, a confident friendly product designer in her early 30s, warm smile, expressive eyes, subtle freckles, natural makeup, shoulder-length dark auburn hair, modern minimal wardrobe, cinematic softbox lighting, shallow depth of field, clean neutral background, high-end camera look",
|
|
20
|
+
model: fal.imageModel("nano-banana-pro"),
|
|
21
|
+
aspectRatio: "9:16",
|
|
22
|
+
});
|
|
23
|
+
|
|
24
|
+
const voiceover = Speech({
|
|
25
|
+
model: elevenlabs.speechModel("eleven_v3"),
|
|
26
|
+
voice: "adam",
|
|
27
|
+
children: SCRIPT,
|
|
28
|
+
});
|
|
29
|
+
|
|
30
|
+
const talking = Video({
|
|
31
|
+
model: fal.videoModel("veed-fabric-1.0"),
|
|
32
|
+
keepAudio: true,
|
|
33
|
+
prompt: {
|
|
34
|
+
images: [portrait],
|
|
35
|
+
audio: voiceover,
|
|
36
|
+
},
|
|
37
|
+
providerOptions: {
|
|
38
|
+
fal: {
|
|
39
|
+
resolution: "720p",
|
|
40
|
+
},
|
|
41
|
+
},
|
|
42
|
+
});
|
|
43
|
+
|
|
44
|
+
const demo = (
|
|
45
|
+
<Render width={1080} height={1920}>
|
|
46
|
+
<Clip duration="auto">{talking}</Clip>
|
|
47
|
+
</Render>
|
|
48
|
+
);
|
|
49
|
+
|
|
50
|
+
async function main() {
|
|
51
|
+
if (!process.env.FAL_API_KEY && !process.env.FAL_KEY) {
|
|
52
|
+
console.error("ERROR: FAL_API_KEY/FAL_KEY not found in environment");
|
|
53
|
+
process.exit(1);
|
|
54
|
+
}
|
|
55
|
+
if (!process.env.ELEVENLABS_API_KEY) {
|
|
56
|
+
console.error("ERROR: ELEVENLABS_API_KEY not found in environment");
|
|
57
|
+
process.exit(1);
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
const result = await render(demo, {
|
|
61
|
+
output: "output/veed-fabric-long-talking-head.mp4",
|
|
62
|
+
cache: ".cache/ai-veed-fabric-long-talking-head",
|
|
63
|
+
});
|
|
64
|
+
|
|
65
|
+
console.log(
|
|
66
|
+
`ok: output/veed-fabric-long-talking-head.mp4 (${(result.video.byteLength / 1024 / 1024).toFixed(2)} MB)`,
|
|
67
|
+
);
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
if (import.meta.main) {
|
|
71
|
+
main().catch((err) => {
|
|
72
|
+
console.error(err);
|
|
73
|
+
process.exit(1);
|
|
74
|
+
});
|
|
75
|
+
}
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* VEED Fabric 1.0 React syntax test
|
|
3
|
+
*
|
|
4
|
+
* Uses a local image + local audio file to generate a talking video.
|
|
5
|
+
*
|
|
6
|
+
* Run: bun run src/react/examples/veed-fabric-react-test.tsx
|
|
7
|
+
* Output: output/veed-fabric-react-test.mp4
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import { fal } from "../../ai-sdk/providers/fal";
|
|
11
|
+
import { Clip, Render, render, Video } from "..";
|
|
12
|
+
|
|
13
|
+
const IMAGE_PATH = "output/garry-tan-image.png";
|
|
14
|
+
const AUDIO_PATH = "output/garry-tan-voice.mp3";
|
|
15
|
+
|
|
16
|
+
const RESOLUTION =
|
|
17
|
+
(process.env.FABRIC_RESOLUTION as "480p" | "720p" | undefined) ?? "720p";
|
|
18
|
+
|
|
19
|
+
const video = (
|
|
20
|
+
<Render width={720} height={1280}>
|
|
21
|
+
<Clip duration={5}>
|
|
22
|
+
<Video
|
|
23
|
+
model={fal.videoModel("veed-fabric-1.0")}
|
|
24
|
+
keepAudio
|
|
25
|
+
prompt={{
|
|
26
|
+
images: [IMAGE_PATH],
|
|
27
|
+
audio: AUDIO_PATH,
|
|
28
|
+
}}
|
|
29
|
+
providerOptions={{
|
|
30
|
+
fal: {
|
|
31
|
+
resolution: RESOLUTION,
|
|
32
|
+
},
|
|
33
|
+
}}
|
|
34
|
+
/>
|
|
35
|
+
</Clip>
|
|
36
|
+
</Render>
|
|
37
|
+
);
|
|
38
|
+
|
|
39
|
+
async function main() {
|
|
40
|
+
if (!process.env.FAL_API_KEY && !process.env.FAL_KEY) {
|
|
41
|
+
console.error("ERROR: FAL_API_KEY/FAL_KEY not found in environment");
|
|
42
|
+
process.exit(1);
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
const result = await render(video, {
|
|
46
|
+
output: `output/veed-fabric-react-test-${RESOLUTION}.mp4`,
|
|
47
|
+
cache: `.cache/ai-veed-fabric-${RESOLUTION}-keepaudio`,
|
|
48
|
+
});
|
|
49
|
+
|
|
50
|
+
console.log(
|
|
51
|
+
`ok: output/veed-fabric-react-test-${RESOLUTION}.mp4 (${(result.video.byteLength / 1024 / 1024).toFixed(2)} MB)`,
|
|
52
|
+
);
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
if (import.meta.main) {
|
|
56
|
+
main().catch((err) => {
|
|
57
|
+
console.error(err);
|
|
58
|
+
process.exit(1);
|
|
59
|
+
});
|
|
60
|
+
}
|