vargai 0.4.0-alpha61 → 0.4.0-alpha63
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/examples/grok-imagine-ai-sdk.tsx +9 -3
- package/package.json +1 -1
- package/src/ai-sdk/providers/editly/index.ts +7 -1
- package/src/ai-sdk/providers/fal.ts +39 -2
- package/src/definitions/actions/sync.ts +54 -12
- package/src/definitions/models/index.ts +18 -0
- package/src/definitions/models/nano-banana-2.ts +115 -0
- package/src/definitions/models/omnihuman.ts +71 -0
- package/src/definitions/models/qwen-image-2.ts +113 -0
- package/src/definitions/models/recraft-v4.ts +94 -0
- package/src/definitions/models/reve.ts +66 -0
- package/src/definitions/models/veed-fabric.ts +49 -0
- package/src/providers/fal.ts +102 -0
- package/src/react/examples/veed-fabric-long-talking-head.tsx +75 -0
- package/src/react/examples/veed-fabric-react-test.tsx +60 -0
- package/src/react/renderers/packshot.ts +18 -1
- package/src/react/renderers/render.ts +39 -13
- package/src/react/types.ts +12 -1
|
@@ -40,7 +40,9 @@ async function testGrokTextToVideo() {
|
|
|
40
40
|
|
|
41
41
|
// Save the video
|
|
42
42
|
const outputPath = join(import.meta.dir, "../output/grok-t2v-test.mp4");
|
|
43
|
-
|
|
43
|
+
const firstVideo = result.videos[0];
|
|
44
|
+
if (!firstVideo) throw new Error("No video returned from model");
|
|
45
|
+
await writeFile(outputPath, firstVideo);
|
|
44
46
|
console.log(`Video saved to: ${outputPath}`);
|
|
45
47
|
|
|
46
48
|
return outputPath;
|
|
@@ -88,7 +90,9 @@ async function testGrokImageToVideo() {
|
|
|
88
90
|
|
|
89
91
|
// Save the video
|
|
90
92
|
const outputPath = join(import.meta.dir, "../output/grok-i2v-test.mp4");
|
|
91
|
-
|
|
93
|
+
const firstVideo = result.videos[0];
|
|
94
|
+
if (!firstVideo) throw new Error("No video returned from model");
|
|
95
|
+
await writeFile(outputPath, firstVideo);
|
|
92
96
|
console.log(`Video saved to: ${outputPath}`);
|
|
93
97
|
|
|
94
98
|
return outputPath;
|
|
@@ -136,7 +140,9 @@ async function testGrokEditVideo() {
|
|
|
136
140
|
|
|
137
141
|
// Save the video
|
|
138
142
|
const outputPath = join(import.meta.dir, "../output/grok-edit-test.mp4");
|
|
139
|
-
|
|
143
|
+
const firstVideo = result.videos[0];
|
|
144
|
+
if (!firstVideo) throw new Error("No video returned from model");
|
|
145
|
+
await writeFile(outputPath, firstVideo);
|
|
140
146
|
console.log(`Video saved to: ${outputPath}`);
|
|
141
147
|
|
|
142
148
|
return outputPath;
|
package/package.json
CHANGED
|
@@ -269,6 +269,12 @@ function buildBaseClipFilter(
|
|
|
269
269
|
const layer = clipLocalOverlays[i];
|
|
270
270
|
if (!layer) continue;
|
|
271
271
|
|
|
272
|
+
if (!baseLabel) {
|
|
273
|
+
throw new Error(
|
|
274
|
+
`Clip ${clipIndex} is missing a base layer for overlay placement — ensure it has at least one visual layer (video, image, or fill-color)`,
|
|
275
|
+
);
|
|
276
|
+
}
|
|
277
|
+
|
|
272
278
|
const overlayFilter = getVideoFilter(
|
|
273
279
|
layer,
|
|
274
280
|
inputIdx,
|
|
@@ -283,7 +289,7 @@ function buildBaseClipFilter(
|
|
|
283
289
|
|
|
284
290
|
const outputLabel = `clip${clipIndex}ov${i}`;
|
|
285
291
|
const positionFilter = getOverlayFilter(
|
|
286
|
-
baseLabel
|
|
292
|
+
baseLabel,
|
|
287
293
|
overlayFilter.outputLabel,
|
|
288
294
|
layer,
|
|
289
295
|
width,
|
|
@@ -164,6 +164,8 @@ const LIPSYNC_MODELS: Record<string, string> = {
|
|
|
164
164
|
"sync-v2": "fal-ai/sync-lipsync",
|
|
165
165
|
"sync-v2-pro": "fal-ai/sync-lipsync/v2",
|
|
166
166
|
lipsync: "fal-ai/sync-lipsync",
|
|
167
|
+
"omnihuman-v1.5": "fal-ai/bytedance/omnihuman/v1.5",
|
|
168
|
+
"veed-fabric-1.0": "veed/fabric-1.0",
|
|
167
169
|
};
|
|
168
170
|
|
|
169
171
|
const IMAGE_MODELS: Record<string, string> = {
|
|
@@ -173,9 +175,20 @@ const IMAGE_MODELS: Record<string, string> = {
|
|
|
173
175
|
"recraft-v3": "fal-ai/recraft/v3/text-to-image",
|
|
174
176
|
"nano-banana-pro": "fal-ai/nano-banana-pro",
|
|
175
177
|
"nano-banana-pro/edit": "fal-ai/nano-banana-pro/edit",
|
|
178
|
+
"nano-banana-2": "fal-ai/nano-banana-2/edit",
|
|
179
|
+
"nano-banana-2/edit": "fal-ai/nano-banana-2/edit",
|
|
176
180
|
"seedream-v4.5/edit": "fal-ai/bytedance/seedream/v4.5/edit",
|
|
181
|
+
// Qwen Image 2 - text-to-image and image-to-image editing (standard + pro)
|
|
182
|
+
"qwen-image-2": "fal-ai/qwen-image-2/text-to-image",
|
|
183
|
+
"qwen-image-2/edit": "fal-ai/qwen-image-2/edit",
|
|
184
|
+
"qwen-image-2-pro": "fal-ai/qwen-image-2/pro/text-to-image",
|
|
185
|
+
"qwen-image-2-pro/edit": "fal-ai/qwen-image-2/pro/edit",
|
|
177
186
|
// Qwen Image Edit 2511 Multiple Angles - camera angle adjustment
|
|
178
187
|
"qwen-angles": "fal-ai/qwen-image-edit-2511-multiple-angles",
|
|
188
|
+
// Recraft V4 Pro - text-to-image
|
|
189
|
+
"recraft-v4-pro": "fal-ai/recraft/v4/pro/text-to-image",
|
|
190
|
+
// Reve - image editing
|
|
191
|
+
"reve/edit": "fal-ai/reve/edit",
|
|
179
192
|
};
|
|
180
193
|
|
|
181
194
|
// Models that use image_size instead of aspect_ratio
|
|
@@ -184,11 +197,19 @@ const IMAGE_SIZE_MODELS = new Set([
|
|
|
184
197
|
"flux-dev",
|
|
185
198
|
"flux-pro",
|
|
186
199
|
"seedream-v4.5/edit",
|
|
200
|
+
"qwen-image-2",
|
|
201
|
+
"qwen-image-2/edit",
|
|
202
|
+
"qwen-image-2-pro",
|
|
203
|
+
"qwen-image-2-pro/edit",
|
|
204
|
+
"recraft-v4-pro",
|
|
187
205
|
]);
|
|
188
206
|
|
|
189
207
|
// Qwen Angles model - image-to-image with camera angle adjustment
|
|
190
208
|
const QWEN_ANGLES_MODEL = "qwen-angles";
|
|
191
209
|
|
|
210
|
+
// Models that use singular image_url instead of image_urls array
|
|
211
|
+
const SINGULAR_IMAGE_URL_MODELS = new Set(["reve/edit"]);
|
|
212
|
+
|
|
192
213
|
// Map aspect ratio to image_size for Qwen Angles (base dimension 1024)
|
|
193
214
|
const ASPECT_RATIO_TO_QWEN_SIZE: Record<
|
|
194
215
|
string,
|
|
@@ -474,20 +495,30 @@ class FalVideoModel implements VideoModelV3 {
|
|
|
474
495
|
};
|
|
475
496
|
|
|
476
497
|
if (isLipsync) {
|
|
477
|
-
// Lipsync: video + audio
|
|
498
|
+
// Lipsync: either (video + audio) or (image + audio), depending on model
|
|
478
499
|
const videoFile = files?.find((f) =>
|
|
479
500
|
getMediaType(f)?.startsWith("video/"),
|
|
480
501
|
);
|
|
502
|
+
const imageFile = files?.find((f) =>
|
|
503
|
+
getMediaType(f)?.startsWith("image/"),
|
|
504
|
+
);
|
|
481
505
|
const audioFile = files?.find((f) =>
|
|
482
506
|
getMediaType(f)?.startsWith("audio/"),
|
|
483
507
|
);
|
|
484
508
|
|
|
485
509
|
if (videoFile) {
|
|
486
510
|
input.video_url = await fileToUrl(videoFile);
|
|
511
|
+
} else if (imageFile) {
|
|
512
|
+
input.image_url = await fileToUrl(imageFile);
|
|
487
513
|
}
|
|
488
514
|
if (audioFile) {
|
|
489
515
|
input.audio_url = await fileToUrl(audioFile);
|
|
490
516
|
}
|
|
517
|
+
|
|
518
|
+
// OmniHuman supports an optional prompt
|
|
519
|
+
if (prompt && this.modelId === "omnihuman-v1.5") {
|
|
520
|
+
input.prompt = prompt;
|
|
521
|
+
}
|
|
491
522
|
} else if (isMotionControl) {
|
|
492
523
|
// Motion control: image + reference video input
|
|
493
524
|
if (prompt) {
|
|
@@ -836,7 +867,13 @@ class FalImageModel implements ImageModelV3 {
|
|
|
836
867
|
modelId: this.modelId,
|
|
837
868
|
fileHashes,
|
|
838
869
|
});
|
|
839
|
-
|
|
870
|
+
const imageUrls = await pMap(files, fileToUrl, { concurrency: 2 });
|
|
871
|
+
// Reve uses singular image_url instead of image_urls array
|
|
872
|
+
if (SINGULAR_IMAGE_URL_MODELS.has(this.modelId)) {
|
|
873
|
+
input.image_url = imageUrls[0];
|
|
874
|
+
} else {
|
|
875
|
+
input.image_urls = imageUrls;
|
|
876
|
+
}
|
|
840
877
|
}
|
|
841
878
|
|
|
842
879
|
if (isQwenAngles && !input.image_urls) {
|
|
@@ -15,6 +15,11 @@ import { ffmpegProvider } from "../../providers/ffmpeg";
|
|
|
15
15
|
|
|
16
16
|
// Input schema with Zod
|
|
17
17
|
const syncInputSchema = z.object({
|
|
18
|
+
model: z
|
|
19
|
+
.enum(["wan-25", "omnihuman-v1.5", "veed-fabric-1.0"])
|
|
20
|
+
.optional()
|
|
21
|
+
.default("wan-25")
|
|
22
|
+
.describe("Lip sync / avatar backend model"),
|
|
18
23
|
image: filePathSchema.describe("Input image"),
|
|
19
24
|
audio: filePathSchema.describe("Audio file"),
|
|
20
25
|
prompt: z.string().describe("Description of the scene"),
|
|
@@ -40,13 +45,14 @@ export const definition: ActionDefinition<typeof schema> = {
|
|
|
40
45
|
schema,
|
|
41
46
|
routes: [],
|
|
42
47
|
execute: async (inputs) => {
|
|
43
|
-
const { image, audio, prompt, duration, resolution } = inputs;
|
|
44
|
-
return lipsync({ image, audio, prompt, duration, resolution });
|
|
48
|
+
const { model, image, audio, prompt, duration, resolution } = inputs;
|
|
49
|
+
return lipsync({ model, image, audio, prompt, duration, resolution });
|
|
45
50
|
},
|
|
46
51
|
};
|
|
47
52
|
|
|
48
53
|
// Types
|
|
49
54
|
export interface LipsyncOptions {
|
|
55
|
+
model?: "wan-25" | "omnihuman-v1.5" | "veed-fabric-1.0";
|
|
50
56
|
image: string;
|
|
51
57
|
audio: string;
|
|
52
58
|
prompt: string;
|
|
@@ -65,20 +71,56 @@ export interface Wav2LipOptions {
|
|
|
65
71
|
}
|
|
66
72
|
|
|
67
73
|
/**
|
|
68
|
-
* Generate lip-synced video using
|
|
74
|
+
* Generate lip-synced / avatar video using selected backend.
|
|
69
75
|
*/
|
|
70
76
|
export async function lipsync(options: LipsyncOptions): Promise<LipsyncResult> {
|
|
71
|
-
const {
|
|
77
|
+
const {
|
|
78
|
+
model = "wan-25",
|
|
79
|
+
image,
|
|
80
|
+
audio,
|
|
81
|
+
prompt,
|
|
82
|
+
duration = "5",
|
|
83
|
+
resolution = "480p",
|
|
84
|
+
} = options;
|
|
72
85
|
|
|
73
|
-
console.log(
|
|
86
|
+
console.log(`[sync] generating lip-synced video with ${model}...`);
|
|
74
87
|
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
88
|
+
if (model === "omnihuman-v1.5" && resolution === "480p") {
|
|
89
|
+
console.warn(
|
|
90
|
+
"[sync] omnihuman-v1.5 does not support 480p; using 720p instead",
|
|
91
|
+
);
|
|
92
|
+
}
|
|
93
|
+
if (model === "veed-fabric-1.0" && resolution === "1080p") {
|
|
94
|
+
console.warn(
|
|
95
|
+
"[sync] veed-fabric-1.0 does not support 1080p; using 720p instead",
|
|
96
|
+
);
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
const result =
|
|
100
|
+
model === "omnihuman-v1.5"
|
|
101
|
+
? await falProvider.omnihuman15({
|
|
102
|
+
imageUrl: image,
|
|
103
|
+
audioUrl: audio,
|
|
104
|
+
prompt,
|
|
105
|
+
resolution: (resolution === "480p" ? "720p" : resolution) as
|
|
106
|
+
| "720p"
|
|
107
|
+
| "1080p",
|
|
108
|
+
})
|
|
109
|
+
: model === "veed-fabric-1.0"
|
|
110
|
+
? await falProvider.veedFabric10({
|
|
111
|
+
imageUrl: image,
|
|
112
|
+
audioUrl: audio,
|
|
113
|
+
resolution: (resolution === "1080p" ? "720p" : resolution) as
|
|
114
|
+
| "480p"
|
|
115
|
+
| "720p",
|
|
116
|
+
})
|
|
117
|
+
: await falProvider.wan25({
|
|
118
|
+
imageUrl: image,
|
|
119
|
+
audioUrl: audio,
|
|
120
|
+
prompt,
|
|
121
|
+
duration,
|
|
122
|
+
resolution,
|
|
123
|
+
});
|
|
82
124
|
|
|
83
125
|
const videoUrl = result.data?.video?.url;
|
|
84
126
|
if (!videoUrl) {
|
|
@@ -6,9 +6,15 @@ export { definition as elevenlabsTts } from "./elevenlabs";
|
|
|
6
6
|
export { definition as flux } from "./flux";
|
|
7
7
|
export { definition as kling } from "./kling";
|
|
8
8
|
export { definition as llama } from "./llama";
|
|
9
|
+
export { definition as nanoBanana2 } from "./nano-banana-2";
|
|
9
10
|
export { definition as nanoBananaPro } from "./nano-banana-pro";
|
|
11
|
+
export { definition as omnihuman } from "./omnihuman";
|
|
12
|
+
export { definition as qwenImage2 } from "./qwen-image-2";
|
|
13
|
+
export { definition as recraftV4 } from "./recraft-v4";
|
|
14
|
+
export { definition as reve } from "./reve";
|
|
10
15
|
export { definition as sonauto } from "./sonauto";
|
|
11
16
|
export { definition as soul } from "./soul";
|
|
17
|
+
export { definition as veedFabric } from "./veed-fabric";
|
|
12
18
|
export { definition as wan } from "./wan";
|
|
13
19
|
export { definition as whisper } from "./whisper";
|
|
14
20
|
|
|
@@ -17,9 +23,15 @@ import { definition as elevenlabsDefinition } from "./elevenlabs";
|
|
|
17
23
|
import { definition as fluxDefinition } from "./flux";
|
|
18
24
|
import { definition as klingDefinition } from "./kling";
|
|
19
25
|
import { definition as llamaDefinition } from "./llama";
|
|
26
|
+
import { definition as nanoBanana2Definition } from "./nano-banana-2";
|
|
20
27
|
import { definition as nanoBananaProDefinition } from "./nano-banana-pro";
|
|
28
|
+
import { definition as omnihumanDefinition } from "./omnihuman";
|
|
29
|
+
import { definition as qwenImage2Definition } from "./qwen-image-2";
|
|
30
|
+
import { definition as recraftV4Definition } from "./recraft-v4";
|
|
31
|
+
import { definition as reveDefinition } from "./reve";
|
|
21
32
|
import { definition as sonautoDefinition } from "./sonauto";
|
|
22
33
|
import { definition as soulDefinition } from "./soul";
|
|
34
|
+
import { definition as veedFabricDefinition } from "./veed-fabric";
|
|
23
35
|
import { definition as wanDefinition } from "./wan";
|
|
24
36
|
import { definition as whisperDefinition } from "./whisper";
|
|
25
37
|
|
|
@@ -27,7 +39,13 @@ export const allModels = [
|
|
|
27
39
|
klingDefinition,
|
|
28
40
|
fluxDefinition,
|
|
29
41
|
nanoBananaProDefinition,
|
|
42
|
+
nanoBanana2Definition,
|
|
43
|
+
qwenImage2Definition,
|
|
44
|
+
recraftV4Definition,
|
|
45
|
+
reveDefinition,
|
|
30
46
|
wanDefinition,
|
|
47
|
+
omnihumanDefinition,
|
|
48
|
+
veedFabricDefinition,
|
|
31
49
|
whisperDefinition,
|
|
32
50
|
elevenlabsDefinition,
|
|
33
51
|
soulDefinition,
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Nano Banana 2 image editing model (Google's next-gen image generation/editing)
|
|
3
|
+
* Edit-only model requiring image_urls input
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { z } from "zod";
|
|
7
|
+
import type { ModelDefinition, ZodSchema } from "../../core/schema/types";
|
|
8
|
+
|
|
9
|
+
// Nano Banana 2 resolution options (includes 0.5K unlike nano-banana-pro)
|
|
10
|
+
const nanoBanana2ResolutionSchema = z.enum(["0.5K", "1K", "2K", "4K"]);
|
|
11
|
+
|
|
12
|
+
// Nano Banana 2 aspect ratio options (supports "auto" unlike nano-banana-pro)
|
|
13
|
+
const nanoBanana2AspectRatioSchema = z.enum([
|
|
14
|
+
"auto",
|
|
15
|
+
"21:9",
|
|
16
|
+
"16:9",
|
|
17
|
+
"3:2",
|
|
18
|
+
"4:3",
|
|
19
|
+
"5:4",
|
|
20
|
+
"1:1",
|
|
21
|
+
"4:5",
|
|
22
|
+
"3:4",
|
|
23
|
+
"2:3",
|
|
24
|
+
"9:16",
|
|
25
|
+
]);
|
|
26
|
+
|
|
27
|
+
// Output format options
|
|
28
|
+
const nanoBanana2OutputFormatSchema = z.enum(["png", "jpeg", "webp"]);
|
|
29
|
+
|
|
30
|
+
// Safety tolerance level (string enum "1"-"6", unlike nano-banana-pro's semantic filter)
|
|
31
|
+
const nanoBanana2SafetyToleranceSchema = z.enum(["1", "2", "3", "4", "5", "6"]);
|
|
32
|
+
|
|
33
|
+
// Input schema with Zod
|
|
34
|
+
const nanoBanana2InputSchema = z.object({
|
|
35
|
+
prompt: z.string().describe("Text description for image editing"),
|
|
36
|
+
image_urls: z
|
|
37
|
+
.array(z.string().url())
|
|
38
|
+
.describe(
|
|
39
|
+
"Input image URLs for image-to-image editing. Required for this model.",
|
|
40
|
+
),
|
|
41
|
+
resolution: nanoBanana2ResolutionSchema
|
|
42
|
+
.default("1K")
|
|
43
|
+
.describe(
|
|
44
|
+
"Output resolution: 0.5K (512px), 1K (1024px), 2K (2048px), or 4K",
|
|
45
|
+
),
|
|
46
|
+
aspect_ratio: nanoBanana2AspectRatioSchema
|
|
47
|
+
.default("auto")
|
|
48
|
+
.describe("Output aspect ratio. 'auto' preserves input aspect ratio."),
|
|
49
|
+
output_format: nanoBanana2OutputFormatSchema
|
|
50
|
+
.default("png")
|
|
51
|
+
.describe("Output image format"),
|
|
52
|
+
safety_tolerance: nanoBanana2SafetyToleranceSchema
|
|
53
|
+
.default("4")
|
|
54
|
+
.describe("Safety tolerance level: 1 (most strict) to 6 (least strict)"),
|
|
55
|
+
num_images: z
|
|
56
|
+
.number()
|
|
57
|
+
.int()
|
|
58
|
+
.min(1)
|
|
59
|
+
.max(4)
|
|
60
|
+
.default(1)
|
|
61
|
+
.describe("Number of images to generate (1-4)"),
|
|
62
|
+
seed: z
|
|
63
|
+
.number()
|
|
64
|
+
.int()
|
|
65
|
+
.optional()
|
|
66
|
+
.describe("Seed for the random number generator"),
|
|
67
|
+
limit_generations: z
|
|
68
|
+
.boolean()
|
|
69
|
+
.default(true)
|
|
70
|
+
.describe(
|
|
71
|
+
"Limit generations from each round of prompting to 1. May affect quality.",
|
|
72
|
+
),
|
|
73
|
+
enable_web_search: z
|
|
74
|
+
.boolean()
|
|
75
|
+
.default(false)
|
|
76
|
+
.describe(
|
|
77
|
+
"Enable web search to use latest information for image generation",
|
|
78
|
+
),
|
|
79
|
+
});
|
|
80
|
+
|
|
81
|
+
// Output schema with Zod
|
|
82
|
+
const nanoBanana2OutputSchema = z.object({
|
|
83
|
+
images: z.array(
|
|
84
|
+
z.object({
|
|
85
|
+
url: z.string(),
|
|
86
|
+
file_name: z.string().optional(),
|
|
87
|
+
content_type: z.string().optional(),
|
|
88
|
+
}),
|
|
89
|
+
),
|
|
90
|
+
description: z.string().optional(),
|
|
91
|
+
});
|
|
92
|
+
|
|
93
|
+
// Schema object for the definition
|
|
94
|
+
const schema: ZodSchema<
|
|
95
|
+
typeof nanoBanana2InputSchema,
|
|
96
|
+
typeof nanoBanana2OutputSchema
|
|
97
|
+
> = {
|
|
98
|
+
input: nanoBanana2InputSchema,
|
|
99
|
+
output: nanoBanana2OutputSchema,
|
|
100
|
+
};
|
|
101
|
+
|
|
102
|
+
export const definition: ModelDefinition<typeof schema> = {
|
|
103
|
+
type: "model",
|
|
104
|
+
name: "nano-banana-2",
|
|
105
|
+
description:
|
|
106
|
+
"Google Nano Banana 2 - next-gen image editing model. Requires image_urls for all operations.",
|
|
107
|
+
providers: ["fal"],
|
|
108
|
+
defaultProvider: "fal",
|
|
109
|
+
providerModels: {
|
|
110
|
+
fal: "fal-ai/nano-banana-2/edit",
|
|
111
|
+
},
|
|
112
|
+
schema,
|
|
113
|
+
};
|
|
114
|
+
|
|
115
|
+
export default definition;
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Bytedance OmniHuman v1.5
|
|
3
|
+
* Image + audio -> video (full-body human animation)
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { z } from "zod";
|
|
7
|
+
import { urlSchema } from "../../core/schema/shared";
|
|
8
|
+
import type { ModelDefinition, ZodSchema } from "../../core/schema/types";
|
|
9
|
+
|
|
10
|
+
const omnihumanResolutionSchema = z
|
|
11
|
+
.enum(["720p", "1080p"])
|
|
12
|
+
.describe("Output resolution");
|
|
13
|
+
|
|
14
|
+
// Input schema with Zod
|
|
15
|
+
const omnihumanInputSchema = z.object({
|
|
16
|
+
prompt: z
|
|
17
|
+
.string()
|
|
18
|
+
.optional()
|
|
19
|
+
.describe("The text prompt used to guide the video generation"),
|
|
20
|
+
image_url: urlSchema.describe(
|
|
21
|
+
"The URL of the image used to generate the video",
|
|
22
|
+
),
|
|
23
|
+
audio_url: urlSchema.describe(
|
|
24
|
+
"The URL of the audio file to generate the video",
|
|
25
|
+
),
|
|
26
|
+
turbo_mode: z
|
|
27
|
+
.boolean()
|
|
28
|
+
.optional()
|
|
29
|
+
.default(false)
|
|
30
|
+
.describe("Faster generation with slight quality trade-off"),
|
|
31
|
+
resolution: omnihumanResolutionSchema
|
|
32
|
+
.optional()
|
|
33
|
+
.default("1080p")
|
|
34
|
+
.describe(
|
|
35
|
+
"The resolution of the generated video. 720p generation is faster and higher in quality",
|
|
36
|
+
),
|
|
37
|
+
});
|
|
38
|
+
|
|
39
|
+
// Output schema with Zod
|
|
40
|
+
const omnihumanOutputSchema = z.object({
|
|
41
|
+
video: z.object({
|
|
42
|
+
url: z.string(),
|
|
43
|
+
}),
|
|
44
|
+
duration: z
|
|
45
|
+
.number()
|
|
46
|
+
.optional()
|
|
47
|
+
.describe("Duration of audio input/video output as used for billing"),
|
|
48
|
+
});
|
|
49
|
+
|
|
50
|
+
const schema: ZodSchema<
|
|
51
|
+
typeof omnihumanInputSchema,
|
|
52
|
+
typeof omnihumanOutputSchema
|
|
53
|
+
> = {
|
|
54
|
+
input: omnihumanInputSchema,
|
|
55
|
+
output: omnihumanOutputSchema,
|
|
56
|
+
};
|
|
57
|
+
|
|
58
|
+
export const definition: ModelDefinition<typeof schema> = {
|
|
59
|
+
type: "model",
|
|
60
|
+
name: "omnihuman",
|
|
61
|
+
description:
|
|
62
|
+
"OmniHuman v1.5 - generate a vivid talking video from an image and an audio file",
|
|
63
|
+
providers: ["fal"],
|
|
64
|
+
defaultProvider: "fal",
|
|
65
|
+
providerModels: {
|
|
66
|
+
fal: "fal-ai/bytedance/omnihuman/v1.5",
|
|
67
|
+
},
|
|
68
|
+
schema,
|
|
69
|
+
};
|
|
70
|
+
|
|
71
|
+
export default definition;
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Qwen Image 2 generation and editing model
|
|
3
|
+
* Next-generation unified generation-and-editing model from Alibaba
|
|
4
|
+
* Supports both text-to-image and image-to-image editing
|
|
5
|
+
* Available in standard and pro tiers
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import { z } from "zod";
|
|
9
|
+
import type { ModelDefinition, ZodSchema } from "../../core/schema/types";
|
|
10
|
+
|
|
11
|
+
// Image size can be an enum string or an object with width/height
|
|
12
|
+
const qwenImage2ImageSizeSchema = z.union([
|
|
13
|
+
z.enum([
|
|
14
|
+
"square_hd",
|
|
15
|
+
"square",
|
|
16
|
+
"landscape_4_3",
|
|
17
|
+
"landscape_16_9",
|
|
18
|
+
"portrait_4_3",
|
|
19
|
+
"portrait_16_9",
|
|
20
|
+
]),
|
|
21
|
+
z.object({
|
|
22
|
+
width: z.number().int().min(512).max(2048),
|
|
23
|
+
height: z.number().int().min(512).max(2048),
|
|
24
|
+
}),
|
|
25
|
+
]);
|
|
26
|
+
|
|
27
|
+
// Output format options
|
|
28
|
+
const qwenImage2OutputFormatSchema = z.enum(["png", "jpeg", "webp"]);
|
|
29
|
+
|
|
30
|
+
// Input schema with Zod
|
|
31
|
+
const qwenImage2InputSchema = z.object({
|
|
32
|
+
prompt: z
|
|
33
|
+
.string()
|
|
34
|
+
.describe(
|
|
35
|
+
"Text description for generation or editing. Supports Chinese and English.",
|
|
36
|
+
),
|
|
37
|
+
negative_prompt: z
|
|
38
|
+
.string()
|
|
39
|
+
.default("")
|
|
40
|
+
.describe("Content to avoid in the generated image. Max 500 characters."),
|
|
41
|
+
image_size: qwenImage2ImageSizeSchema
|
|
42
|
+
.optional()
|
|
43
|
+
.describe(
|
|
44
|
+
"Output image size. Can be an enum (e.g. 'square_hd') or {width, height} object. Pixels must be between 512x512 and 2048x2048.",
|
|
45
|
+
),
|
|
46
|
+
image_urls: z
|
|
47
|
+
.array(z.string().url())
|
|
48
|
+
.optional()
|
|
49
|
+
.describe(
|
|
50
|
+
"Reference images for editing (1-6 images). Order matters: reference as 'image 1', 'image 2' in prompt. Required for /edit endpoints.",
|
|
51
|
+
),
|
|
52
|
+
enable_prompt_expansion: z
|
|
53
|
+
.boolean()
|
|
54
|
+
.default(true)
|
|
55
|
+
.describe("Enable LLM prompt optimization for better results"),
|
|
56
|
+
seed: z
|
|
57
|
+
.number()
|
|
58
|
+
.int()
|
|
59
|
+
.min(0)
|
|
60
|
+
.max(2147483647)
|
|
61
|
+
.optional()
|
|
62
|
+
.describe("Random seed for reproducibility"),
|
|
63
|
+
enable_safety_checker: z
|
|
64
|
+
.boolean()
|
|
65
|
+
.default(true)
|
|
66
|
+
.describe("Enable content moderation for input and output"),
|
|
67
|
+
num_images: z
|
|
68
|
+
.number()
|
|
69
|
+
.int()
|
|
70
|
+
.min(1)
|
|
71
|
+
.max(6)
|
|
72
|
+
.default(1)
|
|
73
|
+
.describe("Number of images to generate (1-4 for t2i, 1-6 for edit)"),
|
|
74
|
+
output_format: qwenImage2OutputFormatSchema
|
|
75
|
+
.default("png")
|
|
76
|
+
.describe("Output image format"),
|
|
77
|
+
});
|
|
78
|
+
|
|
79
|
+
// Output schema with Zod
|
|
80
|
+
const qwenImage2OutputSchema = z.object({
|
|
81
|
+
images: z.array(
|
|
82
|
+
z.object({
|
|
83
|
+
url: z.string(),
|
|
84
|
+
file_name: z.string().optional(),
|
|
85
|
+
content_type: z.string().optional(),
|
|
86
|
+
}),
|
|
87
|
+
),
|
|
88
|
+
seed: z.number().int().optional(),
|
|
89
|
+
});
|
|
90
|
+
|
|
91
|
+
// Schema object for the definition
|
|
92
|
+
const schema: ZodSchema<
|
|
93
|
+
typeof qwenImage2InputSchema,
|
|
94
|
+
typeof qwenImage2OutputSchema
|
|
95
|
+
> = {
|
|
96
|
+
input: qwenImage2InputSchema,
|
|
97
|
+
output: qwenImage2OutputSchema,
|
|
98
|
+
};
|
|
99
|
+
|
|
100
|
+
export const definition: ModelDefinition<typeof schema> = {
|
|
101
|
+
type: "model",
|
|
102
|
+
name: "qwen-image-2",
|
|
103
|
+
description:
|
|
104
|
+
"Qwen Image 2.0 - next-gen unified generation-and-editing model. Supports text-to-image and image-to-image editing in standard and pro tiers.",
|
|
105
|
+
providers: ["fal"],
|
|
106
|
+
defaultProvider: "fal",
|
|
107
|
+
providerModels: {
|
|
108
|
+
fal: "fal-ai/qwen-image-2/text-to-image",
|
|
109
|
+
},
|
|
110
|
+
schema,
|
|
111
|
+
};
|
|
112
|
+
|
|
113
|
+
export default definition;
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Recraft V4 Pro image generation model
|
|
3
|
+
* Built for brand systems and production-ready workflows
|
|
4
|
+
* Text-to-image only
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import { z } from "zod";
|
|
8
|
+
import type { ModelDefinition, ZodSchema } from "../../core/schema/types";
|
|
9
|
+
|
|
10
|
+
// Image size can be an enum string or an object with width/height
|
|
11
|
+
const recraftV4ImageSizeSchema = z.union([
|
|
12
|
+
z.enum([
|
|
13
|
+
"square_hd",
|
|
14
|
+
"square",
|
|
15
|
+
"landscape_4_3",
|
|
16
|
+
"landscape_16_9",
|
|
17
|
+
"portrait_4_3",
|
|
18
|
+
"portrait_16_9",
|
|
19
|
+
]),
|
|
20
|
+
z.object({
|
|
21
|
+
width: z.number().int(),
|
|
22
|
+
height: z.number().int(),
|
|
23
|
+
}),
|
|
24
|
+
]);
|
|
25
|
+
|
|
26
|
+
// RGB color schema
|
|
27
|
+
const rgbColorSchema = z.object({
|
|
28
|
+
r: z.number().int().min(0).max(255),
|
|
29
|
+
g: z.number().int().min(0).max(255),
|
|
30
|
+
b: z.number().int().min(0).max(255),
|
|
31
|
+
});
|
|
32
|
+
|
|
33
|
+
// Output format - Recraft V4 outputs webp by default
|
|
34
|
+
const recraftV4OutputFormatSchema = z.enum(["png", "jpeg", "webp"]);
|
|
35
|
+
|
|
36
|
+
// Input schema with Zod
|
|
37
|
+
const recraftV4InputSchema = z.object({
|
|
38
|
+
prompt: z.string().describe("Text description for image generation"),
|
|
39
|
+
image_size: recraftV4ImageSizeSchema
|
|
40
|
+
.default("square_hd")
|
|
41
|
+
.describe(
|
|
42
|
+
"Output image size. Can be an enum (e.g. 'landscape_16_9') or {width, height} object.",
|
|
43
|
+
),
|
|
44
|
+
colors: z
|
|
45
|
+
.array(rgbColorSchema)
|
|
46
|
+
.default([])
|
|
47
|
+
.describe("Array of preferable RGB colors for the generated image"),
|
|
48
|
+
background_color: rgbColorSchema
|
|
49
|
+
.optional()
|
|
50
|
+
.describe("Preferable background color of the generated image"),
|
|
51
|
+
enable_safety_checker: z
|
|
52
|
+
.boolean()
|
|
53
|
+
.default(true)
|
|
54
|
+
.describe("Enable content safety checker"),
|
|
55
|
+
output_format: recraftV4OutputFormatSchema
|
|
56
|
+
.optional()
|
|
57
|
+
.describe("Output image format"),
|
|
58
|
+
});
|
|
59
|
+
|
|
60
|
+
// Output schema with Zod
|
|
61
|
+
const recraftV4OutputSchema = z.object({
|
|
62
|
+
images: z.array(
|
|
63
|
+
z.object({
|
|
64
|
+
url: z.string(),
|
|
65
|
+
file_name: z.string().optional(),
|
|
66
|
+
file_size: z.number().optional(),
|
|
67
|
+
content_type: z.string().optional(),
|
|
68
|
+
}),
|
|
69
|
+
),
|
|
70
|
+
});
|
|
71
|
+
|
|
72
|
+
// Schema object for the definition
|
|
73
|
+
const schema: ZodSchema<
|
|
74
|
+
typeof recraftV4InputSchema,
|
|
75
|
+
typeof recraftV4OutputSchema
|
|
76
|
+
> = {
|
|
77
|
+
input: recraftV4InputSchema,
|
|
78
|
+
output: recraftV4OutputSchema,
|
|
79
|
+
};
|
|
80
|
+
|
|
81
|
+
export const definition: ModelDefinition<typeof schema> = {
|
|
82
|
+
type: "model",
|
|
83
|
+
name: "recraft-v4-pro",
|
|
84
|
+
description:
|
|
85
|
+
"Recraft V4 Pro - professional text-to-image model built for brand systems and production-ready workflows. Strong composition, refined lighting, realistic materials.",
|
|
86
|
+
providers: ["fal"],
|
|
87
|
+
defaultProvider: "fal",
|
|
88
|
+
providerModels: {
|
|
89
|
+
fal: "fal-ai/recraft/v4/pro/text-to-image",
|
|
90
|
+
},
|
|
91
|
+
schema,
|
|
92
|
+
};
|
|
93
|
+
|
|
94
|
+
export default definition;
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Reve image editing model
|
|
3
|
+
* Upload an existing image and transform it via a text prompt
|
|
4
|
+
* Edit-only model using singular image_url (not image_urls array)
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import { z } from "zod";
|
|
8
|
+
import type { ModelDefinition, ZodSchema } from "../../core/schema/types";
|
|
9
|
+
|
|
10
|
+
// Output format options
|
|
11
|
+
const reveOutputFormatSchema = z.enum(["png", "jpeg", "webp"]);
|
|
12
|
+
|
|
13
|
+
// Input schema with Zod
|
|
14
|
+
const reveInputSchema = z.object({
|
|
15
|
+
prompt: z
|
|
16
|
+
.string()
|
|
17
|
+
.describe("Text description of how to edit the provided image"),
|
|
18
|
+
image_url: z
|
|
19
|
+
.string()
|
|
20
|
+
.url()
|
|
21
|
+
.describe(
|
|
22
|
+
"URL of the reference image to edit. Supports PNG, JPEG, WebP, AVIF, and HEIF formats.",
|
|
23
|
+
),
|
|
24
|
+
num_images: z
|
|
25
|
+
.number()
|
|
26
|
+
.int()
|
|
27
|
+
.min(1)
|
|
28
|
+
.max(4)
|
|
29
|
+
.default(1)
|
|
30
|
+
.describe("Number of images to generate (1-4)"),
|
|
31
|
+
output_format: reveOutputFormatSchema
|
|
32
|
+
.default("png")
|
|
33
|
+
.describe("Output image format"),
|
|
34
|
+
});
|
|
35
|
+
|
|
36
|
+
// Output schema with Zod
|
|
37
|
+
const reveOutputSchema = z.object({
|
|
38
|
+
images: z.array(
|
|
39
|
+
z.object({
|
|
40
|
+
url: z.string(),
|
|
41
|
+
file_name: z.string().optional(),
|
|
42
|
+
content_type: z.string().optional(),
|
|
43
|
+
}),
|
|
44
|
+
),
|
|
45
|
+
});
|
|
46
|
+
|
|
47
|
+
// Schema object for the definition
|
|
48
|
+
const schema: ZodSchema<typeof reveInputSchema, typeof reveOutputSchema> = {
|
|
49
|
+
input: reveInputSchema,
|
|
50
|
+
output: reveOutputSchema,
|
|
51
|
+
};
|
|
52
|
+
|
|
53
|
+
export const definition: ModelDefinition<typeof schema> = {
|
|
54
|
+
type: "model",
|
|
55
|
+
name: "reve",
|
|
56
|
+
description:
|
|
57
|
+
"Reve edit model - upload an existing image and transform it via a text prompt. Uses singular image_url input.",
|
|
58
|
+
providers: ["fal"],
|
|
59
|
+
defaultProvider: "fal",
|
|
60
|
+
providerModels: {
|
|
61
|
+
fal: "fal-ai/reve/edit",
|
|
62
|
+
},
|
|
63
|
+
schema,
|
|
64
|
+
};
|
|
65
|
+
|
|
66
|
+
export default definition;
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* VEED Fabric 1.0
|
|
3
|
+
* Image + audio -> talking video
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { z } from "zod";
|
|
7
|
+
import { urlSchema } from "../../core/schema/shared";
|
|
8
|
+
import type { ModelDefinition, ZodSchema } from "../../core/schema/types";
|
|
9
|
+
|
|
10
|
+
const fabricResolutionSchema = z
|
|
11
|
+
.enum(["480p", "720p"])
|
|
12
|
+
.describe("Output resolution");
|
|
13
|
+
|
|
14
|
+
// Input schema with Zod
|
|
15
|
+
const veedFabricInputSchema = z.object({
|
|
16
|
+
image_url: urlSchema.describe("Input image URL"),
|
|
17
|
+
audio_url: urlSchema.describe("Input audio URL"),
|
|
18
|
+
resolution: fabricResolutionSchema.describe("Output resolution"),
|
|
19
|
+
});
|
|
20
|
+
|
|
21
|
+
// Output schema with Zod
|
|
22
|
+
const veedFabricOutputSchema = z.object({
|
|
23
|
+
video: z.object({
|
|
24
|
+
content_type: z.string().optional(),
|
|
25
|
+
url: z.string().url(),
|
|
26
|
+
}),
|
|
27
|
+
});
|
|
28
|
+
|
|
29
|
+
const schema: ZodSchema<
|
|
30
|
+
typeof veedFabricInputSchema,
|
|
31
|
+
typeof veedFabricOutputSchema
|
|
32
|
+
> = {
|
|
33
|
+
input: veedFabricInputSchema,
|
|
34
|
+
output: veedFabricOutputSchema,
|
|
35
|
+
};
|
|
36
|
+
|
|
37
|
+
export const definition: ModelDefinition<typeof schema> = {
|
|
38
|
+
type: "model",
|
|
39
|
+
name: "veed-fabric",
|
|
40
|
+
description: "VEED Fabric 1.0 - turn an image into a talking video",
|
|
41
|
+
providers: ["fal"],
|
|
42
|
+
defaultProvider: "fal",
|
|
43
|
+
providerModels: {
|
|
44
|
+
fal: "veed/fabric-1.0",
|
|
45
|
+
},
|
|
46
|
+
schema,
|
|
47
|
+
};
|
|
48
|
+
|
|
49
|
+
export default definition;
|
package/src/providers/fal.ts
CHANGED
|
@@ -54,6 +54,23 @@ export class FalProvider extends BaseProvider {
|
|
|
54
54
|
return "fal-ai/nano-banana-pro/edit";
|
|
55
55
|
}
|
|
56
56
|
}
|
|
57
|
+
// Nano Banana 2: always route to /edit endpoint (edit-only model)
|
|
58
|
+
if (model === "fal-ai/nano-banana-2") {
|
|
59
|
+
return "fal-ai/nano-banana-2/edit";
|
|
60
|
+
}
|
|
61
|
+
// Qwen Image 2: route to /edit endpoint when image_urls are provided
|
|
62
|
+
if (model === "fal-ai/qwen-image-2/text-to-image") {
|
|
63
|
+
const imageUrls = inputs.image_urls as string[] | undefined;
|
|
64
|
+
if (imageUrls && imageUrls.length > 0) {
|
|
65
|
+
return "fal-ai/qwen-image-2/edit";
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
if (model === "fal-ai/qwen-image-2/pro/text-to-image") {
|
|
69
|
+
const imageUrls = inputs.image_urls as string[] | undefined;
|
|
70
|
+
if (imageUrls && imageUrls.length > 0) {
|
|
71
|
+
return "fal-ai/qwen-image-2/pro/edit";
|
|
72
|
+
}
|
|
73
|
+
}
|
|
57
74
|
return model;
|
|
58
75
|
}
|
|
59
76
|
|
|
@@ -332,6 +349,86 @@ export class FalProvider extends BaseProvider {
|
|
|
332
349
|
return result;
|
|
333
350
|
}
|
|
334
351
|
|
|
352
|
+
async omnihuman15(args: {
|
|
353
|
+
imageUrl: string;
|
|
354
|
+
audioUrl: string;
|
|
355
|
+
prompt?: string;
|
|
356
|
+
turboMode?: boolean;
|
|
357
|
+
resolution?: "720p" | "1080p";
|
|
358
|
+
}) {
|
|
359
|
+
const modelId: string = "fal-ai/bytedance/omnihuman/v1.5";
|
|
360
|
+
|
|
361
|
+
console.log(`[fal] starting omnihuman v1.5: ${modelId}`);
|
|
362
|
+
|
|
363
|
+
const imageUrl = await ensureUrl(args.imageUrl, (buffer) =>
|
|
364
|
+
this.uploadFile(buffer),
|
|
365
|
+
);
|
|
366
|
+
const audioUrl = await ensureUrl(args.audioUrl, (buffer) =>
|
|
367
|
+
this.uploadFile(buffer),
|
|
368
|
+
);
|
|
369
|
+
|
|
370
|
+
const input: Record<string, unknown> = {
|
|
371
|
+
...(args.prompt ? { prompt: args.prompt } : {}),
|
|
372
|
+
image_url: imageUrl,
|
|
373
|
+
audio_url: audioUrl,
|
|
374
|
+
turbo_mode: args.turboMode ?? false,
|
|
375
|
+
resolution: args.resolution ?? "1080p",
|
|
376
|
+
};
|
|
377
|
+
|
|
378
|
+
const result = await fal.subscribe(modelId, {
|
|
379
|
+
input,
|
|
380
|
+
logs: true,
|
|
381
|
+
onQueueUpdate: (update) => {
|
|
382
|
+
if (update.status === "IN_PROGRESS") {
|
|
383
|
+
console.log(
|
|
384
|
+
`[fal] ${update.logs?.map((l) => l.message).join(" ") || "processing..."}`,
|
|
385
|
+
);
|
|
386
|
+
}
|
|
387
|
+
},
|
|
388
|
+
});
|
|
389
|
+
|
|
390
|
+
console.log("[fal] completed!");
|
|
391
|
+
return result;
|
|
392
|
+
}
|
|
393
|
+
|
|
394
|
+
async veedFabric10(args: {
|
|
395
|
+
imageUrl: string;
|
|
396
|
+
audioUrl: string;
|
|
397
|
+
resolution: "480p" | "720p";
|
|
398
|
+
}) {
|
|
399
|
+
const modelId: string = "veed/fabric-1.0";
|
|
400
|
+
|
|
401
|
+
console.log(`[fal] starting veed fabric 1.0: ${modelId}`);
|
|
402
|
+
|
|
403
|
+
const imageUrl = await ensureUrl(args.imageUrl, (buffer) =>
|
|
404
|
+
this.uploadFile(buffer),
|
|
405
|
+
);
|
|
406
|
+
const audioUrl = await ensureUrl(args.audioUrl, (buffer) =>
|
|
407
|
+
this.uploadFile(buffer),
|
|
408
|
+
);
|
|
409
|
+
|
|
410
|
+
const input: Record<string, unknown> = {
|
|
411
|
+
image_url: imageUrl,
|
|
412
|
+
audio_url: audioUrl,
|
|
413
|
+
resolution: args.resolution,
|
|
414
|
+
};
|
|
415
|
+
|
|
416
|
+
const result = await fal.subscribe(modelId, {
|
|
417
|
+
input,
|
|
418
|
+
logs: true,
|
|
419
|
+
onQueueUpdate: (update) => {
|
|
420
|
+
if (update.status === "IN_PROGRESS") {
|
|
421
|
+
console.log(
|
|
422
|
+
`[fal] ${update.logs?.map((l) => l.message).join(" ") || "processing..."}`,
|
|
423
|
+
);
|
|
424
|
+
}
|
|
425
|
+
},
|
|
426
|
+
});
|
|
427
|
+
|
|
428
|
+
console.log("[fal] completed!");
|
|
429
|
+
return result;
|
|
430
|
+
}
|
|
431
|
+
|
|
335
432
|
async textToMusic(args: {
|
|
336
433
|
prompt?: string;
|
|
337
434
|
tags?: string[];
|
|
@@ -584,5 +681,10 @@ export const imageToImage = (
|
|
|
584
681
|
) => falProvider.imageToImage(args);
|
|
585
682
|
export const wan25 = (args: Parameters<FalProvider["wan25"]>[0]) =>
|
|
586
683
|
falProvider.wan25(args);
|
|
684
|
+
export const omnihuman15 = (args: Parameters<FalProvider["omnihuman15"]>[0]) =>
|
|
685
|
+
falProvider.omnihuman15(args);
|
|
686
|
+
export const veedFabric10 = (
|
|
687
|
+
args: Parameters<FalProvider["veedFabric10"]>[0],
|
|
688
|
+
) => falProvider.veedFabric10(args);
|
|
587
689
|
export const textToMusic = (args: Parameters<FalProvider["textToMusic"]>[0]) =>
|
|
588
690
|
falProvider.textToMusic(args);
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Longer talking head demo (VEED Fabric 1.0):
|
|
3
|
+
* - character image from nano-banana-pro
|
|
4
|
+
* - voice from ElevenLabs
|
|
5
|
+
* - talking video from veed/fabric-1.0 (image + audio)
|
|
6
|
+
*
|
|
7
|
+
* Run: bun run src/react/examples/veed-fabric-long-talking-head.tsx
|
|
8
|
+
* Output: output/veed-fabric-long-talking-head.mp4
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
import { elevenlabs, fal } from "../../ai-sdk";
|
|
12
|
+
import { Clip, Image, Render, render, Speech, Video } from "..";
|
|
13
|
+
|
|
14
|
+
const SCRIPT =
|
|
15
|
+
"Hey, I am Nova. In this quick demo, you will hear a clean voiceover, and see a talking avatar generated from a single portrait. We are using VEED Fabric for image-to-video lipsync, and ElevenLabs for the voice.";
|
|
16
|
+
|
|
17
|
+
const portrait = Image({
|
|
18
|
+
prompt:
|
|
19
|
+
"Ultra-realistic studio portrait of Nova, a confident friendly product designer in her early 30s, warm smile, expressive eyes, subtle freckles, natural makeup, shoulder-length dark auburn hair, modern minimal wardrobe, cinematic softbox lighting, shallow depth of field, clean neutral background, high-end camera look",
|
|
20
|
+
model: fal.imageModel("nano-banana-pro"),
|
|
21
|
+
aspectRatio: "9:16",
|
|
22
|
+
});
|
|
23
|
+
|
|
24
|
+
const voiceover = Speech({
|
|
25
|
+
model: elevenlabs.speechModel("eleven_v3"),
|
|
26
|
+
voice: "adam",
|
|
27
|
+
children: SCRIPT,
|
|
28
|
+
});
|
|
29
|
+
|
|
30
|
+
const talking = Video({
|
|
31
|
+
model: fal.videoModel("veed-fabric-1.0"),
|
|
32
|
+
keepAudio: true,
|
|
33
|
+
prompt: {
|
|
34
|
+
images: [portrait],
|
|
35
|
+
audio: voiceover,
|
|
36
|
+
},
|
|
37
|
+
providerOptions: {
|
|
38
|
+
fal: {
|
|
39
|
+
resolution: "720p",
|
|
40
|
+
},
|
|
41
|
+
},
|
|
42
|
+
});
|
|
43
|
+
|
|
44
|
+
const demo = (
|
|
45
|
+
<Render width={1080} height={1920}>
|
|
46
|
+
<Clip duration="auto">{talking}</Clip>
|
|
47
|
+
</Render>
|
|
48
|
+
);
|
|
49
|
+
|
|
50
|
+
async function main() {
|
|
51
|
+
if (!process.env.FAL_API_KEY && !process.env.FAL_KEY) {
|
|
52
|
+
console.error("ERROR: FAL_API_KEY/FAL_KEY not found in environment");
|
|
53
|
+
process.exit(1);
|
|
54
|
+
}
|
|
55
|
+
if (!process.env.ELEVENLABS_API_KEY) {
|
|
56
|
+
console.error("ERROR: ELEVENLABS_API_KEY not found in environment");
|
|
57
|
+
process.exit(1);
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
const result = await render(demo, {
|
|
61
|
+
output: "output/veed-fabric-long-talking-head.mp4",
|
|
62
|
+
cache: ".cache/ai-veed-fabric-long-talking-head",
|
|
63
|
+
});
|
|
64
|
+
|
|
65
|
+
console.log(
|
|
66
|
+
`ok: output/veed-fabric-long-talking-head.mp4 (${(result.video.byteLength / 1024 / 1024).toFixed(2)} MB)`,
|
|
67
|
+
);
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
if (import.meta.main) {
|
|
71
|
+
main().catch((err) => {
|
|
72
|
+
console.error(err);
|
|
73
|
+
process.exit(1);
|
|
74
|
+
});
|
|
75
|
+
}
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* VEED Fabric 1.0 React syntax test
|
|
3
|
+
*
|
|
4
|
+
* Uses a local image + local audio file to generate a talking video.
|
|
5
|
+
*
|
|
6
|
+
* Run: bun run src/react/examples/veed-fabric-react-test.tsx
|
|
7
|
+
* Output: output/veed-fabric-react-test.mp4
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import { fal } from "../../ai-sdk/providers/fal";
|
|
11
|
+
import { Clip, Render, render, Video } from "..";
|
|
12
|
+
|
|
13
|
+
const IMAGE_PATH = "output/garry-tan-image.png";
|
|
14
|
+
const AUDIO_PATH = "output/garry-tan-voice.mp3";
|
|
15
|
+
|
|
16
|
+
const RESOLUTION =
|
|
17
|
+
(process.env.FABRIC_RESOLUTION as "480p" | "720p" | undefined) ?? "720p";
|
|
18
|
+
|
|
19
|
+
const video = (
|
|
20
|
+
<Render width={720} height={1280}>
|
|
21
|
+
<Clip duration={5}>
|
|
22
|
+
<Video
|
|
23
|
+
model={fal.videoModel("veed-fabric-1.0")}
|
|
24
|
+
keepAudio
|
|
25
|
+
prompt={{
|
|
26
|
+
images: [IMAGE_PATH],
|
|
27
|
+
audio: AUDIO_PATH,
|
|
28
|
+
}}
|
|
29
|
+
providerOptions={{
|
|
30
|
+
fal: {
|
|
31
|
+
resolution: RESOLUTION,
|
|
32
|
+
},
|
|
33
|
+
}}
|
|
34
|
+
/>
|
|
35
|
+
</Clip>
|
|
36
|
+
</Render>
|
|
37
|
+
);
|
|
38
|
+
|
|
39
|
+
async function main() {
|
|
40
|
+
if (!process.env.FAL_API_KEY && !process.env.FAL_KEY) {
|
|
41
|
+
console.error("ERROR: FAL_API_KEY/FAL_KEY not found in environment");
|
|
42
|
+
process.exit(1);
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
const result = await render(video, {
|
|
46
|
+
output: `output/veed-fabric-react-test-${RESOLUTION}.mp4`,
|
|
47
|
+
cache: `.cache/ai-veed-fabric-${RESOLUTION}-keepaudio`,
|
|
48
|
+
});
|
|
49
|
+
|
|
50
|
+
console.log(
|
|
51
|
+
`ok: output/veed-fabric-react-test-${RESOLUTION}.mp4 (${(result.video.byteLength / 1024 / 1024).toFixed(2)} MB)`,
|
|
52
|
+
);
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
if (import.meta.main) {
|
|
56
|
+
main().catch((err) => {
|
|
57
|
+
console.error(err);
|
|
58
|
+
process.exit(1);
|
|
59
|
+
});
|
|
60
|
+
}
|
|
@@ -11,11 +11,13 @@ import type {
|
|
|
11
11
|
PositionObject,
|
|
12
12
|
SizeValue,
|
|
13
13
|
TitleLayer,
|
|
14
|
+
VideoLayer,
|
|
14
15
|
} from "../../ai-sdk/providers/editly/types";
|
|
15
16
|
import type { PackshotProps, VargElement } from "../types";
|
|
16
17
|
import type { RenderContext } from "./context";
|
|
17
18
|
import { renderImage } from "./image";
|
|
18
19
|
import { createBlinkingButton } from "./packshot/blinking-button";
|
|
20
|
+
import { renderVideo } from "./video";
|
|
19
21
|
|
|
20
22
|
/**
|
|
21
23
|
* Resolve an FFmpegOutput to a string path/URL via the backend.
|
|
@@ -118,8 +120,23 @@ export async function renderPackshot(
|
|
|
118
120
|
type: "fill-color" as const,
|
|
119
121
|
color: props.background,
|
|
120
122
|
});
|
|
123
|
+
} else if (props.background.type === "video") {
|
|
124
|
+
const bgFile = await renderVideo(
|
|
125
|
+
props.background as VargElement<"video">,
|
|
126
|
+
ctx,
|
|
127
|
+
);
|
|
128
|
+
const bgPath = await ctx.backend.resolvePath(bgFile);
|
|
129
|
+
const videoLayer: VideoLayer = {
|
|
130
|
+
type: "video",
|
|
131
|
+
path: bgPath,
|
|
132
|
+
resizeMode: "cover",
|
|
133
|
+
};
|
|
134
|
+
layers.push(videoLayer);
|
|
121
135
|
} else {
|
|
122
|
-
const bgFile = await renderImage(
|
|
136
|
+
const bgFile = await renderImage(
|
|
137
|
+
props.background as VargElement<"image">,
|
|
138
|
+
ctx,
|
|
139
|
+
);
|
|
123
140
|
const bgPath = await ctx.backend.resolvePath(bgFile);
|
|
124
141
|
layers.push({
|
|
125
142
|
type: "image" as const,
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import type { ImageModelV3 } from "@ai-sdk/provider";
|
|
2
2
|
import { generateImage, wrapImageModel } from "ai";
|
|
3
|
+
import pMap from "p-map";
|
|
3
4
|
import { type CacheStorage, withCache } from "../../ai-sdk/cache";
|
|
4
5
|
import type { File, File as VargFile } from "../../ai-sdk/file";
|
|
5
6
|
import { fileCache } from "../../ai-sdk/file-cache";
|
|
@@ -9,7 +10,6 @@ import {
|
|
|
9
10
|
placeholderFallbackMiddleware,
|
|
10
11
|
wrapVideoModel,
|
|
11
12
|
} from "../../ai-sdk/middleware";
|
|
12
|
-
|
|
13
13
|
import { editly, localBackend } from "../../ai-sdk/providers/editly";
|
|
14
14
|
import type {
|
|
15
15
|
AudioTrack,
|
|
@@ -236,15 +236,42 @@ export async function renderRoot(
|
|
|
236
236
|
}
|
|
237
237
|
}
|
|
238
238
|
|
|
239
|
-
const
|
|
240
|
-
|
|
239
|
+
const concurrency =
|
|
240
|
+
options.concurrency === undefined
|
|
241
|
+
? Number.POSITIVE_INFINITY
|
|
242
|
+
: options.concurrency;
|
|
243
|
+
|
|
244
|
+
if (
|
|
245
|
+
concurrency !== Number.POSITIVE_INFINITY &&
|
|
246
|
+
(!Number.isInteger(concurrency) || concurrency < 1)
|
|
247
|
+
) {
|
|
248
|
+
throw new Error("render option `concurrency` must be a positive integer");
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
const clipResults = await pMap(
|
|
252
|
+
clipElements,
|
|
253
|
+
async (clipElement, i) => {
|
|
254
|
+
try {
|
|
255
|
+
return {
|
|
256
|
+
status: "fulfilled" as const,
|
|
257
|
+
value: await renderClip(clipElement, ctx),
|
|
258
|
+
index: i,
|
|
259
|
+
};
|
|
260
|
+
} catch (reason) {
|
|
261
|
+
return {
|
|
262
|
+
status: "rejected" as const,
|
|
263
|
+
reason: reason as Error,
|
|
264
|
+
index: i,
|
|
265
|
+
};
|
|
266
|
+
}
|
|
267
|
+
},
|
|
268
|
+
{ concurrency },
|
|
241
269
|
);
|
|
242
270
|
|
|
243
|
-
const failures = clipResults
|
|
244
|
-
|
|
245
|
-
r.status === "rejected"
|
|
246
|
-
|
|
247
|
-
.filter(Boolean) as { index: number; reason: Error }[];
|
|
271
|
+
const failures = clipResults.filter(
|
|
272
|
+
(r): r is Extract<typeof r, { status: "rejected" }> =>
|
|
273
|
+
r.status === "rejected",
|
|
274
|
+
);
|
|
248
275
|
|
|
249
276
|
if (failures.length > 0) {
|
|
250
277
|
const successCount = clipResults.length - failures.length;
|
|
@@ -266,11 +293,10 @@ export async function renderRoot(
|
|
|
266
293
|
);
|
|
267
294
|
}
|
|
268
295
|
|
|
269
|
-
const renderedClips = clipResults.map(
|
|
270
|
-
(r)
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
);
|
|
296
|
+
const renderedClips = clipResults.map((r) => {
|
|
297
|
+
if (r.status !== "fulfilled") throw new Error("unexpected");
|
|
298
|
+
return r.value;
|
|
299
|
+
});
|
|
274
300
|
|
|
275
301
|
const clips: Clip[] = [];
|
|
276
302
|
let currentTime = 0;
|
package/src/react/types.ts
CHANGED
|
@@ -209,7 +209,16 @@ export interface SwipeProps extends BaseProps {
|
|
|
209
209
|
}
|
|
210
210
|
|
|
211
211
|
export interface PackshotProps extends BaseProps {
|
|
212
|
-
|
|
212
|
+
/**
|
|
213
|
+
* Packshot background.
|
|
214
|
+
*
|
|
215
|
+
* - `string` — treated as a solid fill color (e.g. `"#000000"`).
|
|
216
|
+
* - `VargElement<"image">` — a generated or static image, rendered and
|
|
217
|
+
* used as a full-bleed cover background.
|
|
218
|
+
* - `VargElement<"video">` — a generated or static video, rendered and
|
|
219
|
+
* used as a looping full-bleed cover background.
|
|
220
|
+
*/
|
|
221
|
+
background?: VargElement<"image"> | VargElement<"video"> | string;
|
|
213
222
|
logo?: string;
|
|
214
223
|
/**
|
|
215
224
|
* Logo position on screen.
|
|
@@ -276,6 +285,8 @@ export interface RenderOptions {
|
|
|
276
285
|
defaults?: DefaultModels;
|
|
277
286
|
backend?: FFmpegBackend;
|
|
278
287
|
storage?: StorageProvider;
|
|
288
|
+
/** Max concurrent clip renders. Defaults to unlimited. */
|
|
289
|
+
concurrency?: number;
|
|
279
290
|
}
|
|
280
291
|
|
|
281
292
|
// Re-export from file module for convenience
|