vargai 0.4.0-alpha94 → 0.4.0-alpha96
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -104,7 +104,7 @@
|
|
|
104
104
|
"license": "Apache-2.0",
|
|
105
105
|
"author": "varg.ai <hello@varg.ai> (https://varg.ai)",
|
|
106
106
|
"sideEffects": false,
|
|
107
|
-
"version": "0.4.0-
|
|
107
|
+
"version": "0.4.0-alpha96",
|
|
108
108
|
"exports": {
|
|
109
109
|
".": "./src/index.ts",
|
|
110
110
|
"./ai": "./src/ai-sdk/index.ts",
|
|
@@ -85,6 +85,52 @@ const CREDIT_PACKAGES = [
|
|
|
85
85
|
},
|
|
86
86
|
];
|
|
87
87
|
|
|
88
|
+
// Common disposable email domains for fast client-side rejection.
|
|
89
|
+
// The server enforces a comprehensive 55k+ domain blocklist (mailchecker);
|
|
90
|
+
// this is just for instant UX feedback on the most common offenders.
|
|
91
|
+
const DISPOSABLE_DOMAINS = new Set([
|
|
92
|
+
"guerrillamail.com",
|
|
93
|
+
"guerrillamailblock.com",
|
|
94
|
+
"guerrillamail.net",
|
|
95
|
+
"guerrillamail.org",
|
|
96
|
+
"guerrillamail.de",
|
|
97
|
+
"grr.la",
|
|
98
|
+
"sharklasers.com",
|
|
99
|
+
"guerrilla.ml",
|
|
100
|
+
"yopmail.com",
|
|
101
|
+
"yopmail.fr",
|
|
102
|
+
"yopmail.net",
|
|
103
|
+
"tempmail.com",
|
|
104
|
+
"temp-mail.org",
|
|
105
|
+
"temp-mail.io",
|
|
106
|
+
"mailinator.com",
|
|
107
|
+
"mailinator2.com",
|
|
108
|
+
"throwaway.email",
|
|
109
|
+
"trashmail.com",
|
|
110
|
+
"trashmail.net",
|
|
111
|
+
"trashmail.me",
|
|
112
|
+
"10minutemail.com",
|
|
113
|
+
"10minutemail.net",
|
|
114
|
+
"dispostable.com",
|
|
115
|
+
"maildrop.cc",
|
|
116
|
+
"fakeinbox.com",
|
|
117
|
+
"mailnesia.com",
|
|
118
|
+
"tempail.com",
|
|
119
|
+
"tempr.email",
|
|
120
|
+
"discard.email",
|
|
121
|
+
"discardmail.com",
|
|
122
|
+
"mohmal.com",
|
|
123
|
+
"burpcollaborator.net",
|
|
124
|
+
]);
|
|
125
|
+
|
|
126
|
+
function isDisposableDomain(domain: string): boolean {
|
|
127
|
+
if (DISPOSABLE_DOMAINS.has(domain)) return true;
|
|
128
|
+
for (const d of DISPOSABLE_DOMAINS) {
|
|
129
|
+
if (domain.endsWith(`.${d}`)) return true;
|
|
130
|
+
}
|
|
131
|
+
return false;
|
|
132
|
+
}
|
|
133
|
+
|
|
88
134
|
function formatCents(cents: number): string {
|
|
89
135
|
return `$${(cents / 100).toLocaleString("en-US", { minimumFractionDigits: 0 })}`;
|
|
90
136
|
}
|
|
@@ -228,6 +274,17 @@ async function loginWithEmail(): Promise<LoginResult | null> {
|
|
|
228
274
|
return null;
|
|
229
275
|
}
|
|
230
276
|
|
|
277
|
+
// Quick client-side check for common disposable email domains.
|
|
278
|
+
// The server enforces a comprehensive 55k+ domain blocklist (mailchecker);
|
|
279
|
+
// this is just for faster UX feedback on the most common offenders.
|
|
280
|
+
const domain = email.split("@")[1]?.toLowerCase();
|
|
281
|
+
if (domain && isDisposableDomain(domain)) {
|
|
282
|
+
log.error(
|
|
283
|
+
"Disposable email addresses are not allowed. Please use a permanent email address.",
|
|
284
|
+
);
|
|
285
|
+
return null;
|
|
286
|
+
}
|
|
287
|
+
|
|
231
288
|
// Send OTP
|
|
232
289
|
console.log();
|
|
233
290
|
process.stdout.write(
|
package/src/react/elements.ts
CHANGED
|
@@ -2,6 +2,7 @@ import {
|
|
|
2
2
|
resolveImageElement,
|
|
3
3
|
resolveMusicElement,
|
|
4
4
|
resolveSpeechElement,
|
|
5
|
+
resolveTalkingHeadElement,
|
|
5
6
|
resolveVideoElement,
|
|
6
7
|
} from "./resolve";
|
|
7
8
|
import type { ResolvedElement } from "./resolved-element";
|
|
@@ -141,11 +142,14 @@ export function Speech(
|
|
|
141
142
|
|
|
142
143
|
export function TalkingHead(
|
|
143
144
|
props: TalkingHeadProps,
|
|
144
|
-
): VargElement<"talking-head"> {
|
|
145
|
-
|
|
145
|
+
): VargElement<"talking-head"> & PromiseLike<ResolvedElement<"talking-head">> {
|
|
146
|
+
const element = createElement(
|
|
146
147
|
"talking-head",
|
|
147
148
|
props as Record<string, unknown>,
|
|
148
|
-
|
|
149
|
+
undefined,
|
|
150
|
+
);
|
|
151
|
+
return makeThenable(element, (el) =>
|
|
152
|
+
resolveTalkingHeadElement(el, el.props as unknown as TalkingHeadProps),
|
|
149
153
|
);
|
|
150
154
|
}
|
|
151
155
|
|
|
@@ -26,6 +26,7 @@ import { renderSlider } from "./slider";
|
|
|
26
26
|
import { renderSpeech } from "./speech";
|
|
27
27
|
import { renderSubtitle } from "./subtitle";
|
|
28
28
|
import { renderSwipe } from "./swipe";
|
|
29
|
+
import { renderTalkingHead } from "./talking-head";
|
|
29
30
|
import { renderTitle } from "./title";
|
|
30
31
|
import { resolvePath } from "./utils";
|
|
31
32
|
import { renderVideo } from "./video";
|
|
@@ -151,6 +152,27 @@ async function renderClipLayers(
|
|
|
151
152
|
break;
|
|
152
153
|
}
|
|
153
154
|
|
|
155
|
+
case "talking-head": {
|
|
156
|
+
pending.push({
|
|
157
|
+
type: "async",
|
|
158
|
+
promise: renderTalkingHead(
|
|
159
|
+
element as VargElement<"talking-head">,
|
|
160
|
+
ctx,
|
|
161
|
+
)
|
|
162
|
+
.then((file) => ctx.backend.resolvePath(file))
|
|
163
|
+
.then(
|
|
164
|
+
(path) =>
|
|
165
|
+
({
|
|
166
|
+
type: "video",
|
|
167
|
+
path,
|
|
168
|
+
resizeMode: "cover",
|
|
169
|
+
mixVolume: 1,
|
|
170
|
+
}) as VideoLayer,
|
|
171
|
+
),
|
|
172
|
+
});
|
|
173
|
+
break;
|
|
174
|
+
}
|
|
175
|
+
|
|
154
176
|
case "music": {
|
|
155
177
|
const props = element.props as MusicProps;
|
|
156
178
|
pending.push({
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
import type { File } from "../../ai-sdk/file";
|
|
2
|
+
import { ResolvedElement } from "../resolved-element";
|
|
3
|
+
import type { TalkingHeadProps, VargElement } from "../types";
|
|
4
|
+
import type { RenderContext } from "./context";
|
|
5
|
+
import { renderImage } from "./image";
|
|
6
|
+
import { renderSpeech } from "./speech";
|
|
7
|
+
import { renderVideo } from "./video";
|
|
8
|
+
|
|
9
|
+
/**
|
|
10
|
+
* Render a TalkingHead element into a video file.
|
|
11
|
+
*
|
|
12
|
+
* Pipeline:
|
|
13
|
+
* 1. Resolve the character image from `image` prop (VargElement or ResolvedElement)
|
|
14
|
+
* 2. Resolve the speech audio from `audio` prop (VargElement or ResolvedElement)
|
|
15
|
+
* 3. Generate a lipsync video via `model` (image + audio → video)
|
|
16
|
+
*
|
|
17
|
+
* The result is a video File suitable for use as a VideoLayer.
|
|
18
|
+
*/
|
|
19
|
+
export async function renderTalkingHead(
|
|
20
|
+
element: VargElement<"talking-head">,
|
|
21
|
+
ctx: RenderContext,
|
|
22
|
+
): Promise<File> {
|
|
23
|
+
// If already resolved via `await TalkingHead(...)`, reuse the pre-generated file
|
|
24
|
+
if (element instanceof ResolvedElement) {
|
|
25
|
+
ctx.generatedFiles.push(element.meta.file);
|
|
26
|
+
return element.meta.file;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
const props = element.props as TalkingHeadProps;
|
|
30
|
+
|
|
31
|
+
const model = props.model ?? ctx.defaults?.video;
|
|
32
|
+
if (!model) {
|
|
33
|
+
throw new Error(
|
|
34
|
+
"TalkingHead requires 'model' prop (or set defaults.video in render options)",
|
|
35
|
+
);
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
if (!props.image) {
|
|
39
|
+
throw new Error("TalkingHead requires 'image' prop (an Image element)");
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
if (!props.audio) {
|
|
43
|
+
throw new Error("TalkingHead requires 'audio' prop (a Speech element)");
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
// Step 1 & 2: Resolve character image and speech audio in parallel
|
|
47
|
+
const [characterFile, speechFile] = await Promise.all([
|
|
48
|
+
resolveImageProp(props.image, ctx),
|
|
49
|
+
resolveAudioProp(props.audio, ctx),
|
|
50
|
+
]);
|
|
51
|
+
|
|
52
|
+
// Step 3: Generate lipsync video (image + audio → video)
|
|
53
|
+
const lipsyncModel = props.lipsyncModel ?? model;
|
|
54
|
+
const characterImageData = await characterFile.arrayBuffer();
|
|
55
|
+
const speechAudioData = await speechFile.arrayBuffer();
|
|
56
|
+
|
|
57
|
+
// Create a synthetic video element for the lipsync generation
|
|
58
|
+
const videoElement: VargElement<"video"> = {
|
|
59
|
+
type: "video",
|
|
60
|
+
props: {
|
|
61
|
+
prompt: {
|
|
62
|
+
images: [characterImageData],
|
|
63
|
+
audio: speechAudioData,
|
|
64
|
+
},
|
|
65
|
+
model: lipsyncModel,
|
|
66
|
+
keepAudio: true,
|
|
67
|
+
providerOptions: { fal: { resolution: props.resolution ?? "720p" } },
|
|
68
|
+
},
|
|
69
|
+
children: [],
|
|
70
|
+
};
|
|
71
|
+
|
|
72
|
+
return renderVideo(videoElement, ctx);
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
/**
|
|
76
|
+
* Resolve an image prop — either a pre-resolved ResolvedElement<"image">
|
|
77
|
+
* or a lazy VargElement<"image"> that needs rendering.
|
|
78
|
+
*/
|
|
79
|
+
async function resolveImageProp(
|
|
80
|
+
image: VargElement<"image">,
|
|
81
|
+
ctx: RenderContext,
|
|
82
|
+
): Promise<File> {
|
|
83
|
+
if (image instanceof ResolvedElement) {
|
|
84
|
+
ctx.generatedFiles.push(image.meta.file);
|
|
85
|
+
return image.meta.file;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
return renderImage(image, ctx);
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
/**
|
|
92
|
+
* Resolve an audio prop — either a pre-resolved ResolvedElement<"speech">
|
|
93
|
+
* or a lazy VargElement<"speech"> that needs rendering.
|
|
94
|
+
*/
|
|
95
|
+
async function resolveAudioProp(
|
|
96
|
+
audio: VargElement<"speech">,
|
|
97
|
+
ctx: RenderContext,
|
|
98
|
+
): Promise<File> {
|
|
99
|
+
if (audio instanceof ResolvedElement) {
|
|
100
|
+
ctx.generatedFiles.push(audio.meta.file);
|
|
101
|
+
return audio.meta.file;
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
return renderSpeech(audio, ctx);
|
|
105
|
+
}
|
package/src/react/resolve.ts
CHANGED
|
@@ -37,6 +37,7 @@ import type {
|
|
|
37
37
|
ImageProps,
|
|
38
38
|
MusicProps,
|
|
39
39
|
SpeechProps,
|
|
40
|
+
TalkingHeadProps,
|
|
40
41
|
VargElement,
|
|
41
42
|
} from "./types";
|
|
42
43
|
|
|
@@ -748,3 +749,95 @@ export async function resolveMusicElement(
|
|
|
748
749
|
duration,
|
|
749
750
|
});
|
|
750
751
|
}
|
|
752
|
+
|
|
753
|
+
// ---------------------------------------------------------------------------
|
|
754
|
+
// TalkingHead
|
|
755
|
+
// ---------------------------------------------------------------------------
|
|
756
|
+
/**
|
|
757
|
+
* Resolve a TalkingHead element by combining a pre-resolved image and speech
|
|
758
|
+
* into a lipsync video. Returns a ResolvedElement<"talking-head"> wrapping the
|
|
759
|
+
* final video.
|
|
760
|
+
*
|
|
761
|
+
* Pipeline:
|
|
762
|
+
* 1. Resolve the image from `image` prop (generate or reuse pre-resolved)
|
|
763
|
+
* 2. Resolve the speech from `audio` prop (generate or reuse pre-resolved)
|
|
764
|
+
* 3. Generate lipsync video from image + audio via `model`
|
|
765
|
+
*/
|
|
766
|
+
export async function resolveTalkingHeadElement(
|
|
767
|
+
element: VargElement<"talking-head">,
|
|
768
|
+
props: TalkingHeadProps,
|
|
769
|
+
): Promise<ResolvedElement<"talking-head">> {
|
|
770
|
+
const model = props.model;
|
|
771
|
+
if (!model) {
|
|
772
|
+
throw new Error(
|
|
773
|
+
"await TalkingHead() requires 'model' prop for lipsync video generation",
|
|
774
|
+
);
|
|
775
|
+
}
|
|
776
|
+
|
|
777
|
+
if (!props.image) {
|
|
778
|
+
throw new Error(
|
|
779
|
+
"await TalkingHead() requires 'image' prop (an Image element).",
|
|
780
|
+
);
|
|
781
|
+
}
|
|
782
|
+
|
|
783
|
+
if (!props.audio) {
|
|
784
|
+
throw new Error(
|
|
785
|
+
"await TalkingHead() requires 'audio' prop (a Speech element).",
|
|
786
|
+
);
|
|
787
|
+
}
|
|
788
|
+
|
|
789
|
+
// Step 1: Resolve image — if it's a ResolvedElement, use its file directly;
|
|
790
|
+
// otherwise resolve the lazy Image element via generateImage.
|
|
791
|
+
const resolvedImage =
|
|
792
|
+
props.image instanceof ResolvedElement
|
|
793
|
+
? props.image
|
|
794
|
+
: await resolveImageElement(props.image, props.image.props as ImageProps);
|
|
795
|
+
const characterBytes = new Uint8Array(await resolvedImage.file.arrayBuffer());
|
|
796
|
+
|
|
797
|
+
// Step 2: Resolve speech — same pattern.
|
|
798
|
+
const resolvedSpeech =
|
|
799
|
+
props.audio instanceof ResolvedElement
|
|
800
|
+
? props.audio
|
|
801
|
+
: await resolveSpeechElement(
|
|
802
|
+
props.audio,
|
|
803
|
+
props.audio.props as SpeechProps,
|
|
804
|
+
);
|
|
805
|
+
const speechBytes = new Uint8Array(await resolvedSpeech.file.arrayBuffer());
|
|
806
|
+
|
|
807
|
+
// Step 3: Generate lipsync video (image + audio → video)
|
|
808
|
+
const lipsyncModel = props.lipsyncModel ?? model;
|
|
809
|
+
const generateVideo = getCachedGenerateVideo();
|
|
810
|
+
|
|
811
|
+
const { video } = await generateVideo({
|
|
812
|
+
model: lipsyncModel as Parameters<typeof generateVideoRaw>[0]["model"],
|
|
813
|
+
prompt: {
|
|
814
|
+
images: [characterBytes],
|
|
815
|
+
audio: speechBytes,
|
|
816
|
+
},
|
|
817
|
+
duration: 0, // duration determined by audio length
|
|
818
|
+
});
|
|
819
|
+
|
|
820
|
+
const mediaType = video.mimeType ?? "video/mp4";
|
|
821
|
+
const modelId =
|
|
822
|
+
typeof lipsyncModel === "string" ? lipsyncModel : lipsyncModel.modelId;
|
|
823
|
+
|
|
824
|
+
const promptLabel =
|
|
825
|
+
getTextContent(element.children) ?? "talking-head lipsync";
|
|
826
|
+
|
|
827
|
+
const file = File.fromGenerated({
|
|
828
|
+
uint8Array: video.uint8Array,
|
|
829
|
+
mediaType,
|
|
830
|
+
url: (video as { url?: string }).url,
|
|
831
|
+
}).withMetadata({
|
|
832
|
+
type: "video",
|
|
833
|
+
model: modelId,
|
|
834
|
+
prompt: `talking-head: ${promptLabel.slice(0, 100)}`,
|
|
835
|
+
});
|
|
836
|
+
|
|
837
|
+
const duration = await probeDuration(file);
|
|
838
|
+
|
|
839
|
+
return new ResolvedElement(element, {
|
|
840
|
+
file,
|
|
841
|
+
duration,
|
|
842
|
+
});
|
|
843
|
+
}
|
package/src/react/types.ts
CHANGED
|
@@ -198,16 +198,20 @@ export interface SpeechProps extends BaseProps, VolumeProps {
|
|
|
198
198
|
}
|
|
199
199
|
|
|
200
200
|
export interface TalkingHeadProps extends BaseProps {
|
|
201
|
-
character
|
|
202
|
-
|
|
203
|
-
|
|
201
|
+
/** Pre-resolved or lazy image element to use as the character face. */
|
|
202
|
+
image?: VargElement<"image">;
|
|
203
|
+
/** Pre-resolved or lazy speech element to use as the audio track. */
|
|
204
|
+
audio?: VargElement<"speech">;
|
|
205
|
+
/** Lipsync video model (e.g. fal.videoModel("sync-v2-pro")). */
|
|
204
206
|
model?: VideoModelV3;
|
|
207
|
+
/** Separate lipsync model override (defaults to `model`). */
|
|
205
208
|
lipsyncModel?: VideoModelV3;
|
|
209
|
+
/** Video resolution for lipsync generation (default: "720p") */
|
|
210
|
+
resolution?: "480p" | "720p" | "1080p";
|
|
206
211
|
position?:
|
|
207
212
|
| Position
|
|
208
213
|
| { left?: string; right?: string; top?: string; bottom?: string };
|
|
209
214
|
size?: { width: string; height: string };
|
|
210
|
-
children?: string;
|
|
211
215
|
}
|
|
212
216
|
|
|
213
217
|
export interface TitleProps extends BaseProps {
|