vargai 0.4.0-alpha101 → 0.4.0-alpha102

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -104,7 +104,7 @@
104
104
  "license": "Apache-2.0",
105
105
  "author": "varg.ai <hello@varg.ai> (https://varg.ai)",
106
106
  "sideEffects": false,
107
- "version": "0.4.0-alpha101",
107
+ "version": "0.4.0-alpha102",
108
108
  "exports": {
109
109
  ".": "./src/index.ts",
110
110
  "./ai": "./src/ai-sdk/index.ts",
@@ -78,6 +78,12 @@ export {
78
78
  type GoogleProviderSettings,
79
79
  google,
80
80
  } from "./providers/google";
81
+ export {
82
+ createHeyGen,
83
+ type HeyGenProvider,
84
+ type HeyGenProviderSettings,
85
+ heygen,
86
+ } from "./providers/heygen";
81
87
  export {
82
88
  createHiggsfield,
83
89
  type HiggsfieldImageModelSettings,
@@ -0,0 +1,436 @@
1
+ /**
2
+ * HeyGen AI SDK provider for avatar video generation.
3
+ *
4
+ * Exposes heygen.videoModel("avatar-iv") for use in JSX composition:
5
+ *
6
+ * import { heygen } from "vargai/ai-sdk";
7
+ *
8
+ * const talking = Video({
9
+ * prompt: { text: "Hello world", images: [portrait] },
10
+ * model: heygen.videoModel("avatar-iv"),
11
+ * providerOptions: {
12
+ * heygen: { voice_id: "abc123", expressiveness: "medium" }
13
+ * },
14
+ * });
15
+ */
16
+
17
+ import {
18
+ type EmbeddingModelV3,
19
+ type ImageModelV3,
20
+ type LanguageModelV3,
21
+ NoSuchModelError,
22
+ type ProviderV3,
23
+ type SharedV3Warning,
24
+ type SpeechModelV3,
25
+ } from "@ai-sdk/provider";
26
+ import type {
27
+ VideoModelV3,
28
+ VideoModelV3CallOptions,
29
+ VideoModelV3File,
30
+ } from "../video-model";
31
+
32
+ const HEYGEN_API_BASE = "https://api.heygen.com";
33
+ const HEYGEN_UPLOAD_BASE = "https://upload.heygen.com";
34
+
35
+ // ---------------------------------------------------------------------------
36
+ // HeyGen response types
37
+ // ---------------------------------------------------------------------------
38
+
39
+ interface HeyGenVideoStatusData {
40
+ id: string;
41
+ status: string;
42
+ video_url?: string;
43
+ duration?: number;
44
+ error?: string | null;
45
+ }
46
+
47
+ // ---------------------------------------------------------------------------
48
+ // Helpers
49
+ // ---------------------------------------------------------------------------
50
+
51
+ function getMediaType(file: VideoModelV3File): string | undefined {
52
+ if ("mediaType" in file && file.mediaType) return file.mediaType;
53
+ return undefined;
54
+ }
55
+
56
+ async function fileToBytes(file: VideoModelV3File): Promise<Uint8Array> {
57
+ if ("data" in file) {
58
+ if (file.data instanceof Uint8Array) return file.data;
59
+ if (typeof file.data === "string") return Buffer.from(file.data, "base64");
60
+ }
61
+ throw new Error("HeyGen: file has no data");
62
+ }
63
+
64
+ /**
65
+ * Upload a file to HeyGen's asset endpoint and return the asset_id.
66
+ */
67
+ async function uploadAssetToHeyGen(
68
+ apiKey: string,
69
+ data: Uint8Array,
70
+ contentType: string,
71
+ ): Promise<string> {
72
+ const res = await fetch(`${HEYGEN_UPLOAD_BASE}/v1/asset`, {
73
+ method: "POST",
74
+ headers: {
75
+ "X-Api-Key": apiKey,
76
+ "Content-Type": contentType,
77
+ },
78
+ body: data,
79
+ });
80
+
81
+ if (!res.ok) {
82
+ const errorText = await res.text();
83
+ throw new Error(`HeyGen asset upload failed (${res.status}): ${errorText}`);
84
+ }
85
+
86
+ const json = (await res.json()) as {
87
+ data?: { id?: string };
88
+ };
89
+ const assetId = json.data?.id;
90
+ if (!assetId) throw new Error("HeyGen asset upload returned no asset id");
91
+ return assetId;
92
+ }
93
+
94
+ /**
95
+ * Upload an image as a HeyGen talking photo and return the talking_photo_id.
96
+ * This allows any image to be used as a character in Studio V2 videos.
97
+ */
98
+ async function uploadTalkingPhoto(
99
+ apiKey: string,
100
+ data: Uint8Array,
101
+ contentType: string,
102
+ ): Promise<string> {
103
+ const res = await fetch(`${HEYGEN_UPLOAD_BASE}/v1/talking_photo`, {
104
+ method: "POST",
105
+ headers: {
106
+ "X-Api-Key": apiKey,
107
+ "Content-Type": contentType,
108
+ },
109
+ body: data,
110
+ });
111
+
112
+ if (!res.ok) {
113
+ const errorText = await res.text();
114
+ throw new Error(
115
+ `HeyGen talking photo upload failed (${res.status}): ${errorText}`,
116
+ );
117
+ }
118
+
119
+ const json = (await res.json()) as {
120
+ data?: { talking_photo_id?: string };
121
+ };
122
+ const talkingPhotoId = json.data?.talking_photo_id;
123
+ if (!talkingPhotoId)
124
+ throw new Error("HeyGen talking photo upload returned no talking_photo_id");
125
+ return talkingPhotoId;
126
+ }
127
+
128
+ /**
129
+ * Build the `background` object for a Studio V2 scene.
130
+ * Accepts a URL string (image), a hex color, or a structured object.
131
+ */
132
+ function buildBackground(bg: unknown): Record<string, unknown> | undefined {
133
+ if (!bg) return undefined;
134
+ if (typeof bg === "string") {
135
+ if (bg.startsWith("#")) return { type: "color", value: bg };
136
+ return { type: "image", url: bg, fit: "cover" };
137
+ }
138
+ if (typeof bg === "object") return bg as Record<string, unknown>;
139
+ return undefined;
140
+ }
141
+
142
+ /**
143
+ * Poll HeyGen video status until completed or failed.
144
+ */
145
+ async function pollVideoStatus(
146
+ apiKey: string,
147
+ videoId: string,
148
+ signal?: AbortSignal,
149
+ ): Promise<HeyGenVideoStatusData> {
150
+ const maxWait = 600_000; // 10 minutes
151
+ const pollInterval = 5_000; // 5 seconds
152
+ const start = Date.now();
153
+
154
+ while (Date.now() - start < maxWait) {
155
+ if (signal?.aborted) throw new Error("HeyGen: aborted");
156
+
157
+ const res = await fetch(
158
+ `${HEYGEN_API_BASE}/v1/video_status.get?video_id=${videoId}`,
159
+ {
160
+ headers: {
161
+ "X-Api-Key": apiKey,
162
+ Accept: "application/json",
163
+ },
164
+ signal,
165
+ },
166
+ );
167
+
168
+ if (!res.ok) {
169
+ throw new Error(`HeyGen status check failed (${res.status})`);
170
+ }
171
+
172
+ const body = (await res.json()) as {
173
+ data?: HeyGenVideoStatusData;
174
+ };
175
+ const status = body.data?.status?.toLowerCase();
176
+
177
+ if (status === "completed") {
178
+ if (!body.data?.video_url) {
179
+ throw new Error("HeyGen video completed but no video_url in response");
180
+ }
181
+ return body.data;
182
+ }
183
+
184
+ if (status === "failed") {
185
+ throw new Error(
186
+ `HeyGen video generation failed: ${body.data?.error ?? "unknown error"}`,
187
+ );
188
+ }
189
+
190
+ await new Promise((resolve) => setTimeout(resolve, pollInterval));
191
+ }
192
+
193
+ throw new Error(`HeyGen video generation timed out after ${maxWait / 1000}s`);
194
+ }
195
+
196
+ // ---------------------------------------------------------------------------
197
+ // Video model
198
+ // ---------------------------------------------------------------------------
199
+
200
+ class HeyGenVideoModel implements VideoModelV3 {
201
+ readonly specificationVersion = "v3" as const;
202
+ readonly provider = "heygen";
203
+ readonly modelId: string;
204
+ readonly maxVideosPerCall = 1;
205
+
206
+ private apiKey: string;
207
+
208
+ constructor(modelId: string, apiKey: string) {
209
+ this.modelId = modelId;
210
+ this.apiKey = apiKey;
211
+ }
212
+
213
+ async doGenerate(options: VideoModelV3CallOptions) {
214
+ const { prompt, files, providerOptions, abortSignal } = options;
215
+ const warnings: SharedV3Warning[] = [];
216
+
217
+ const heygenOpts = (providerOptions?.heygen ?? {}) as Record<
218
+ string,
219
+ unknown
220
+ >;
221
+
222
+ // ---- Resolve character source ----
223
+ const avatarId = heygenOpts.avatar_id as string | undefined;
224
+ const talkingPhotoId = heygenOpts.talking_photo_id as string | undefined;
225
+ const voiceId = heygenOpts.voice_id as string | undefined;
226
+
227
+ // If an image file is provided and no avatar/talking_photo specified,
228
+ // upload it as a talking photo for use in Studio V2
229
+ let resolvedTalkingPhotoId = talkingPhotoId;
230
+ if (!avatarId && !talkingPhotoId) {
231
+ const imageFile = files?.find((f) =>
232
+ getMediaType(f)?.startsWith("image/"),
233
+ );
234
+ if (imageFile) {
235
+ const bytes = await fileToBytes(imageFile);
236
+ const contentType = getMediaType(imageFile) ?? "image/jpeg";
237
+ resolvedTalkingPhotoId = await uploadTalkingPhoto(
238
+ this.apiKey,
239
+ bytes,
240
+ contentType,
241
+ );
242
+ }
243
+ }
244
+
245
+ // Upload audio file if present (external audio mode)
246
+ let audioAssetId: string | undefined;
247
+ const audioFile = files?.find((f) => getMediaType(f)?.startsWith("audio/"));
248
+ if (audioFile) {
249
+ const audioBytes = await fileToBytes(audioFile);
250
+ const audioContentType = getMediaType(audioFile) ?? "audio/mpeg";
251
+ audioAssetId = await uploadAssetToHeyGen(
252
+ this.apiKey,
253
+ audioBytes,
254
+ audioContentType,
255
+ );
256
+ }
257
+
258
+ if (prompt && voiceId === undefined && !audioFile) {
259
+ warnings.push({
260
+ type: "other",
261
+ message:
262
+ "HeyGen requires voice_id when using script mode. Pass it via providerOptions.heygen.voice_id",
263
+ });
264
+ }
265
+
266
+ // ---- Always use Studio V2 (POST /v2/video/generate) ----
267
+ // Works for both pre-registered avatars and uploaded talking photos.
268
+
269
+ // Build character object
270
+ const character: Record<string, unknown> = {};
271
+ if (avatarId) {
272
+ character.type = "avatar";
273
+ character.avatar_id = avatarId;
274
+ character.avatar_style = (heygenOpts.avatar_style as string) ?? "normal";
275
+ } else if (resolvedTalkingPhotoId) {
276
+ character.type = "talking_photo";
277
+ character.talking_photo_id = resolvedTalkingPhotoId;
278
+ if (heygenOpts.talking_style)
279
+ character.talking_style = heygenOpts.talking_style;
280
+ if (heygenOpts.use_avatar_iv_model)
281
+ character.use_avatar_iv_model = heygenOpts.use_avatar_iv_model;
282
+ if (heygenOpts.matting) character.matting = heygenOpts.matting;
283
+ }
284
+
285
+ // Build voice object
286
+ const voice: Record<string, unknown> = {};
287
+ if (audioAssetId) {
288
+ voice.type = "audio";
289
+ voice.audio_asset_id = audioAssetId;
290
+ } else if (prompt && voiceId) {
291
+ voice.type = "text";
292
+ voice.input_text = prompt;
293
+ voice.voice_id = voiceId;
294
+ if (heygenOpts.speed) voice.speed = heygenOpts.speed;
295
+ if (heygenOpts.emotion) voice.emotion = heygenOpts.emotion;
296
+ }
297
+
298
+ // Build background object
299
+ const background = buildBackground(heygenOpts.background);
300
+
301
+ // Build scene
302
+ const scene: Record<string, unknown> = { character, voice };
303
+ if (background) scene.background = background;
304
+
305
+ // Aspect ratio → dimension
306
+ const aspectRatio =
307
+ (heygenOpts.aspect_ratio as string | undefined) ?? options.aspectRatio;
308
+ const dim =
309
+ aspectRatio === "9:16"
310
+ ? { width: 720, height: 1280 }
311
+ : { width: 1280, height: 720 };
312
+
313
+ const studioPayload: Record<string, unknown> = {
314
+ video_inputs: [scene],
315
+ dimension: dim,
316
+ };
317
+ if (heygenOpts.callback_url)
318
+ studioPayload.callback_url = heygenOpts.callback_url;
319
+ if (heygenOpts.title) studioPayload.title = heygenOpts.title;
320
+ if (heygenOpts.caption) studioPayload.caption = heygenOpts.caption;
321
+
322
+ const submitUrl = `${HEYGEN_API_BASE}/v2/video/generate`;
323
+ const submitBody = JSON.stringify(studioPayload);
324
+
325
+ // ---- Submit ----
326
+ const submitRes = await fetch(submitUrl, {
327
+ method: "POST",
328
+ headers: {
329
+ "X-Api-Key": this.apiKey,
330
+ "Content-Type": "application/json",
331
+ Accept: "application/json",
332
+ },
333
+ body: submitBody,
334
+ signal: abortSignal,
335
+ });
336
+
337
+ if (!submitRes.ok) {
338
+ const errorText = await submitRes.text();
339
+ throw new Error(
340
+ `HeyGen video generation failed (${submitRes.status}): ${errorText}`,
341
+ );
342
+ }
343
+
344
+ const submitData = (await submitRes.json()) as {
345
+ data?: { video_id?: string };
346
+ video_id?: string;
347
+ };
348
+ const videoId = submitData.data?.video_id ?? submitData.video_id;
349
+ if (!videoId) throw new Error("HeyGen returned no video_id");
350
+
351
+ // ---- Poll for completion ----
352
+ const statusData = await pollVideoStatus(this.apiKey, videoId, abortSignal);
353
+
354
+ // ---- Download video ----
355
+ const videoRes = await fetch(statusData.video_url!, {
356
+ signal: abortSignal,
357
+ });
358
+ if (!videoRes.ok) {
359
+ throw new Error(`Failed to download HeyGen video (${videoRes.status})`);
360
+ }
361
+ const videoBytes = new Uint8Array(await videoRes.arrayBuffer());
362
+
363
+ return {
364
+ videos: [videoBytes],
365
+ warnings,
366
+ response: {
367
+ timestamp: new Date(),
368
+ modelId: this.modelId,
369
+ headers: undefined,
370
+ },
371
+ };
372
+ }
373
+ }
374
+
375
+ // ---------------------------------------------------------------------------
376
+ // Provider factory
377
+ // ---------------------------------------------------------------------------
378
+
379
+ export interface HeyGenProviderSettings {
380
+ apiKey?: string;
381
+ }
382
+
383
+ export interface HeyGenProvider extends ProviderV3 {
384
+ videoModel(modelId?: string): VideoModelV3;
385
+ }
386
+
387
+ export function createHeyGen(
388
+ settings: HeyGenProviderSettings = {},
389
+ ): HeyGenProvider {
390
+ const apiKey = settings.apiKey ?? process.env.HEYGEN_API_KEY;
391
+ if (!apiKey) {
392
+ throw new Error("HEYGEN_API_KEY not set");
393
+ }
394
+
395
+ return {
396
+ specificationVersion: "v3",
397
+ videoModel(modelId = "avatar-iv") {
398
+ return new HeyGenVideoModel(modelId, apiKey);
399
+ },
400
+ languageModel(modelId: string): LanguageModelV3 {
401
+ throw new NoSuchModelError({
402
+ modelId,
403
+ modelType: "languageModel",
404
+ });
405
+ },
406
+ embeddingModel(modelId: string): EmbeddingModelV3 {
407
+ throw new NoSuchModelError({
408
+ modelId,
409
+ modelType: "embeddingModel",
410
+ });
411
+ },
412
+ imageModel(modelId: string): ImageModelV3 {
413
+ throw new NoSuchModelError({
414
+ modelId,
415
+ modelType: "imageModel",
416
+ });
417
+ },
418
+ speechModel(modelId: string): SpeechModelV3 {
419
+ throw new NoSuchModelError({
420
+ modelId,
421
+ modelType: "speechModel",
422
+ });
423
+ },
424
+ };
425
+ }
426
+
427
+ // Lazy singleton (same pattern as elevenlabs)
428
+ let _heygen: HeyGenProvider | undefined;
429
+ export const heygen = new Proxy({} as HeyGenProvider, {
430
+ get(_, prop) {
431
+ if (!_heygen) {
432
+ _heygen = createHeyGen();
433
+ }
434
+ return _heygen[prop as keyof HeyGenProvider];
435
+ },
436
+ });
@@ -0,0 +1,61 @@
1
+ /**
2
+ * HeyGen avatar video model
3
+ * Generates talking avatar videos from script + voice + image/avatar
4
+ */
5
+
6
+ import { z } from "zod";
7
+ import type { ModelDefinition, ZodSchema } from "../../core/schema/types";
8
+
9
+ const heygenInputSchema = z.object({
10
+ script: z.string().describe("Script text for the avatar to speak"),
11
+ voice_id: z.string().describe("HeyGen voice ID"),
12
+ avatar_id: z.string().optional().describe("Pre-registered HeyGen avatar ID"),
13
+ image_url: z
14
+ .string()
15
+ .optional()
16
+ .describe("Image URL to animate (alternative to avatar_id)"),
17
+ motion_prompt: z
18
+ .string()
19
+ .optional()
20
+ .describe("Natural language motion control prompt"),
21
+ expressiveness: z
22
+ .enum(["low", "medium", "high"])
23
+ .optional()
24
+ .default("medium")
25
+ .describe("Expressiveness level of the avatar"),
26
+ aspect_ratio: z
27
+ .enum(["16:9", "9:16"])
28
+ .optional()
29
+ .default("16:9")
30
+ .describe("Video aspect ratio"),
31
+ resolution: z
32
+ .enum(["720p", "1080p"])
33
+ .optional()
34
+ .default("1080p")
35
+ .describe("Video resolution"),
36
+ });
37
+
38
+ const heygenOutputSchema = z.object({
39
+ videoUrl: z.string(),
40
+ duration: z.number().optional(),
41
+ });
42
+
43
+ const schema: ZodSchema<typeof heygenInputSchema, typeof heygenOutputSchema> = {
44
+ input: heygenInputSchema,
45
+ output: heygenOutputSchema,
46
+ };
47
+
48
+ export const definition: ModelDefinition<typeof schema> = {
49
+ type: "model",
50
+ name: "heygen-avatar",
51
+ description:
52
+ "HeyGen Avatar IV model for generating talking avatar videos from script and voice",
53
+ providers: ["heygen"],
54
+ defaultProvider: "heygen",
55
+ providerModels: {
56
+ heygen: "avatar-iv",
57
+ },
58
+ schema,
59
+ };
60
+
61
+ export default definition;
@@ -4,6 +4,7 @@
4
4
 
5
5
  export { definition as elevenlabsTts } from "./elevenlabs";
6
6
  export { definition as flux } from "./flux";
7
+ export { definition as heygenAvatar } from "./heygen";
7
8
  export { definition as kling } from "./kling";
8
9
  export { definition as llama } from "./llama";
9
10
  export { definition as ltxA2v } from "./ltx-a2v";
@@ -31,6 +32,7 @@ export { definition as whisper } from "./whisper";
31
32
  // All model definitions for auto-loading
32
33
  import { definition as elevenlabsDefinition } from "./elevenlabs";
33
34
  import { definition as fluxDefinition } from "./flux";
35
+ import { definition as heygenAvatarDefinition } from "./heygen";
34
36
  import { definition as klingDefinition } from "./kling";
35
37
  import { definition as llamaDefinition } from "./llama";
36
38
  import { definition as ltxA2vDefinition } from "./ltx-a2v";
@@ -77,4 +79,5 @@ export const allModels = [
77
79
  seedance2FastPreviewDefinition,
78
80
  sonautoDefinition,
79
81
  llamaDefinition,
82
+ heygenAvatarDefinition,
80
83
  ];
@@ -0,0 +1,480 @@
1
+ /**
2
+ * HeyGen provider for AI avatar video generation
3
+ * Supports text-to-video with talking avatars, photo-to-video lipsync,
4
+ * and pre-built avatar catalog.
5
+ *
6
+ * HeyGen API docs: https://docs.heygen.com/reference
7
+ */
8
+
9
+ import type { JobStatusUpdate, ProviderConfig } from "../core/schema/types";
10
+ import { BaseProvider } from "./base";
11
+
12
+ const HEYGEN_API_BASE = "https://api.heygen.com";
13
+ const HEYGEN_UPLOAD_BASE = "https://upload.heygen.com";
14
+
15
+ // ---------------------------------------------------------------------------
16
+ // Response types
17
+ // ---------------------------------------------------------------------------
18
+
19
+ interface HeyGenVideoResponse {
20
+ video_id: string;
21
+ status?: string;
22
+ }
23
+
24
+ interface HeyGenVideoStatusResponse {
25
+ code: number;
26
+ data: {
27
+ id: string;
28
+ status: string;
29
+ video_url?: string;
30
+ video_url_caption?: string;
31
+ thumbnail_url?: string;
32
+ gif_url?: string;
33
+ caption_url?: string;
34
+ duration?: number;
35
+ error?: string | null;
36
+ callback_id?: string;
37
+ created_at?: number;
38
+ };
39
+ message: string;
40
+ }
41
+
42
+ interface HeyGenVoice {
43
+ voice_id: string;
44
+ name: string;
45
+ language: string;
46
+ gender: string;
47
+ preview_audio?: string;
48
+ support_pause?: boolean;
49
+ emotion_support?: boolean;
50
+ support_interactive_avatar?: boolean;
51
+ support_locale?: boolean;
52
+ }
53
+
54
+ interface HeyGenAvatar {
55
+ avatar_id: string;
56
+ avatar_name: string;
57
+ gender?: string;
58
+ preview_image_url?: string;
59
+ preview_video_url?: string;
60
+ tags?: string[];
61
+ }
62
+
63
+ interface HeyGenTalkingPhoto {
64
+ talking_photo_id: string;
65
+ talking_photo_name: string;
66
+ preview_image_url?: string;
67
+ }
68
+
69
+ interface HeyGenAssetUploadResponse {
70
+ code: number;
71
+ data: {
72
+ id: string;
73
+ name: string;
74
+ file_type: string;
75
+ url: string;
76
+ image_key?: string;
77
+ };
78
+ }
79
+
80
+ // ---------------------------------------------------------------------------
81
+ // Provider
82
+ // ---------------------------------------------------------------------------
83
+
84
+ export class HeyGenProvider extends BaseProvider {
85
+ readonly name = "heygen";
86
+ private apiKey: string;
87
+
88
+ constructor(config?: ProviderConfig) {
89
+ super({
90
+ timeout: 600000, // 10 minutes default (avatar videos can take 30-120s)
91
+ ...config,
92
+ });
93
+ this.apiKey = config?.apiKey || process.env.HEYGEN_API_KEY || "";
94
+ }
95
+
96
+ // ---- Provider interface ----
97
+
98
+ async submit(
99
+ _model: string,
100
+ inputs: Record<string, unknown>,
101
+ _config?: ProviderConfig,
102
+ ): Promise<string> {
103
+ const script = inputs.script as string | undefined;
104
+ const voiceId = inputs.voice_id as string | undefined;
105
+ const avatarId = inputs.avatar_id as string | undefined;
106
+ const talkingPhotoId = inputs.talking_photo_id as string | undefined;
107
+ const audioUrl = inputs.audio_url as string | undefined;
108
+ const audioAssetId = inputs.audio_asset_id as string | undefined;
109
+ const aspectRatio = inputs.aspect_ratio as string | undefined;
110
+ const callbackUrl = inputs.callback_url as string | undefined;
111
+ const title = inputs.title as string | undefined;
112
+ const background = inputs.background as
113
+ | Record<string, unknown>
114
+ | string
115
+ | undefined;
116
+
117
+ // Always use Studio V2 (POST /v2/video/generate)
118
+ // Works for avatars, talking photos, and uploaded images
119
+
120
+ // Build character
121
+ const character: Record<string, unknown> = {};
122
+ if (avatarId) {
123
+ character.type = "avatar";
124
+ character.avatar_id = avatarId;
125
+ character.avatar_style = (inputs.avatar_style as string) ?? "normal";
126
+ } else if (talkingPhotoId) {
127
+ character.type = "talking_photo";
128
+ character.talking_photo_id = talkingPhotoId;
129
+ if (inputs.talking_style) character.talking_style = inputs.talking_style;
130
+ if (inputs.matting) character.matting = inputs.matting;
131
+ }
132
+
133
+ // Build voice
134
+ const voice: Record<string, unknown> = {};
135
+ if (script && voiceId) {
136
+ voice.type = "text";
137
+ voice.input_text = script;
138
+ voice.voice_id = voiceId;
139
+ if (inputs.speed) voice.speed = inputs.speed;
140
+ if (inputs.emotion) voice.emotion = inputs.emotion;
141
+ } else if (audioUrl) {
142
+ voice.type = "audio";
143
+ voice.audio_url = audioUrl;
144
+ } else if (audioAssetId) {
145
+ voice.type = "audio";
146
+ voice.audio_asset_id = audioAssetId;
147
+ }
148
+
149
+ // Build background
150
+ let bg: Record<string, unknown> | undefined;
151
+ if (typeof background === "string") {
152
+ bg = background.startsWith("#")
153
+ ? { type: "color", value: background }
154
+ : { type: "image", url: background, fit: "cover" };
155
+ } else if (background) {
156
+ bg = background;
157
+ }
158
+
159
+ // Build scene
160
+ const scene: Record<string, unknown> = { character, voice };
161
+ if (bg) scene.background = bg;
162
+
163
+ // Dimension from aspect ratio
164
+ const dim =
165
+ aspectRatio === "9:16"
166
+ ? { width: 720, height: 1280 }
167
+ : { width: 1280, height: 720 };
168
+
169
+ const payload: Record<string, unknown> = {
170
+ video_inputs: [scene],
171
+ dimension: dim,
172
+ };
173
+ if (callbackUrl) payload.callback_url = callbackUrl;
174
+ if (title) payload.title = title;
175
+
176
+ console.log("[heygen] submitting via Studio V2...");
177
+
178
+ const response = await fetch(`${HEYGEN_API_BASE}/v2/video/generate`, {
179
+ method: "POST",
180
+ headers: {
181
+ "X-Api-Key": this.apiKey,
182
+ "Content-Type": "application/json",
183
+ Accept: "application/json",
184
+ },
185
+ body: JSON.stringify(payload),
186
+ });
187
+
188
+ if (!response.ok) {
189
+ const errorText = await response.text();
190
+ throw new Error(
191
+ `heygen submit failed (${response.status}): ${errorText}`,
192
+ );
193
+ }
194
+
195
+ const data = (await response.json()) as {
196
+ data?: HeyGenVideoResponse;
197
+ video_id?: string;
198
+ error?: unknown;
199
+ };
200
+ const videoId = data.data?.video_id ?? data.video_id;
201
+
202
+ if (!videoId) {
203
+ throw new Error("no video_id in heygen response");
204
+ }
205
+
206
+ console.log(`[heygen] video submitted: ${videoId}`);
207
+ return videoId;
208
+ }
209
+
210
+ async getStatus(jobId: string): Promise<JobStatusUpdate> {
211
+ const res = await fetch(
212
+ `${HEYGEN_API_BASE}/v1/video_status.get?video_id=${jobId}`,
213
+ {
214
+ method: "GET",
215
+ headers: {
216
+ "X-Api-Key": this.apiKey,
217
+ Accept: "application/json",
218
+ },
219
+ },
220
+ );
221
+
222
+ if (!res.ok) {
223
+ throw new Error(`heygen status check failed (${res.status})`);
224
+ }
225
+
226
+ const body = (await res.json()) as HeyGenVideoStatusResponse;
227
+ const status = body.data?.status?.toLowerCase();
228
+
229
+ const statusMap: Record<string, JobStatusUpdate["status"]> = {
230
+ pending: "queued",
231
+ waiting: "queued",
232
+ processing: "processing",
233
+ completed: "completed",
234
+ failed: "failed",
235
+ };
236
+
237
+ return {
238
+ status: statusMap[status] ?? "processing",
239
+ output: body.data?.video_url
240
+ ? { url: body.data.video_url, duration: body.data.duration }
241
+ : undefined,
242
+ error: body.data?.error ?? undefined,
243
+ };
244
+ }
245
+
246
+ async getResult(jobId: string): Promise<unknown> {
247
+ const res = await fetch(
248
+ `${HEYGEN_API_BASE}/v1/video_status.get?video_id=${jobId}`,
249
+ {
250
+ method: "GET",
251
+ headers: {
252
+ "X-Api-Key": this.apiKey,
253
+ Accept: "application/json",
254
+ },
255
+ },
256
+ );
257
+
258
+ if (!res.ok) {
259
+ throw new Error(`heygen result fetch failed (${res.status})`);
260
+ }
261
+
262
+ const body = (await res.json()) as HeyGenVideoStatusResponse;
263
+ return body.data;
264
+ }
265
+
266
+ // ---- Convenience methods ----
267
+
268
+ /**
269
+ * Generate an avatar video from script text using Avatar IV.
270
+ * This is the simplest path: script + voice + avatar/image = video.
271
+ */
272
+ async createAvatarVideo(args: {
273
+ script: string;
274
+ voiceId: string;
275
+ avatarId?: string;
276
+ imageUrl?: string;
277
+ motionPrompt?: string;
278
+ expressiveness?: "low" | "medium" | "high";
279
+ aspectRatio?: "16:9" | "9:16";
280
+ resolution?: "720p" | "1080p";
281
+ }) {
282
+ console.log("[heygen] starting avatar video generation...");
283
+
284
+ const videoId = await this.submit("avatar-iv", {
285
+ script: args.script,
286
+ voice_id: args.voiceId,
287
+ ...(args.avatarId ? { avatar_id: args.avatarId } : {}),
288
+ ...(args.imageUrl ? { image_url: args.imageUrl } : {}),
289
+ ...(args.motionPrompt ? { motion_prompt: args.motionPrompt } : {}),
290
+ ...(args.expressiveness ? { expressiveness: args.expressiveness } : {}),
291
+ ...(args.aspectRatio ? { aspect_ratio: args.aspectRatio } : {}),
292
+ ...(args.resolution ? { resolution: args.resolution } : {}),
293
+ });
294
+
295
+ const result = (await this.waitForCompletion(videoId, {
296
+ maxWait: this.config.timeout ?? 600000,
297
+ pollInterval: 5000,
298
+ })) as { url?: string; duration?: number };
299
+
300
+ if (!result?.url) {
301
+ throw new Error("heygen video completed but no video URL");
302
+ }
303
+
304
+ console.log("[heygen] avatar video completed!");
305
+ return { video: { url: result.url, duration: result.duration } };
306
+ }
307
+
308
+ /**
309
+ * List available HeyGen voices.
310
+ */
311
+ async listVoices(): Promise<HeyGenVoice[]> {
312
+ console.log("[heygen] fetching voices...");
313
+
314
+ const res = await fetch(`${HEYGEN_API_BASE}/v2/voices`, {
315
+ headers: {
316
+ "X-Api-Key": this.apiKey,
317
+ Accept: "application/json",
318
+ },
319
+ });
320
+
321
+ if (!res.ok) {
322
+ const errorText = await res.text();
323
+ throw new Error(
324
+ `heygen list voices failed (${res.status}): ${errorText}`,
325
+ );
326
+ }
327
+
328
+ const data = (await res.json()) as {
329
+ data?: { voices?: HeyGenVoice[] };
330
+ error?: unknown;
331
+ };
332
+ const voices = data.data?.voices ?? [];
333
+ console.log(`[heygen] found ${voices.length} voices`);
334
+ return voices;
335
+ }
336
+
337
+ /**
338
+ * List available HeyGen avatars and talking photos.
339
+ */
340
+ async listAvatars(): Promise<{
341
+ avatars: HeyGenAvatar[];
342
+ talkingPhotos: HeyGenTalkingPhoto[];
343
+ }> {
344
+ console.log("[heygen] fetching avatars...");
345
+
346
+ const res = await fetch(`${HEYGEN_API_BASE}/v2/avatars`, {
347
+ headers: {
348
+ "X-Api-Key": this.apiKey,
349
+ Accept: "application/json",
350
+ },
351
+ });
352
+
353
+ if (!res.ok) {
354
+ const errorText = await res.text();
355
+ throw new Error(
356
+ `heygen list avatars failed (${res.status}): ${errorText}`,
357
+ );
358
+ }
359
+
360
+ const data = (await res.json()) as {
361
+ data?: {
362
+ avatars?: HeyGenAvatar[];
363
+ talking_photos?: HeyGenTalkingPhoto[];
364
+ };
365
+ error?: unknown;
366
+ };
367
+
368
+ const avatars = data.data?.avatars ?? [];
369
+ const talkingPhotos = data.data?.talking_photos ?? [];
370
+ console.log(
371
+ `[heygen] found ${avatars.length} avatars, ${talkingPhotos.length} talking photos`,
372
+ );
373
+ return { avatars, talkingPhotos };
374
+ }
375
+
376
+ /**
377
+ * Upload an image as a talking photo to HeyGen.
378
+ * Returns a talking_photo_id that can be used in Studio V2 video generation.
379
+ */
380
+ async uploadTalkingPhoto(
381
+ file: Uint8Array | ArrayBuffer,
382
+ contentType = "image/jpeg",
383
+ ): Promise<string> {
384
+ console.log("[heygen] uploading talking photo...");
385
+
386
+ const body = file instanceof Uint8Array ? file : new Uint8Array(file);
387
+
388
+ const res = await fetch(`${HEYGEN_UPLOAD_BASE}/v1/talking_photo`, {
389
+ method: "POST",
390
+ headers: {
391
+ "X-Api-Key": this.apiKey,
392
+ "Content-Type": contentType,
393
+ },
394
+ body,
395
+ });
396
+
397
+ if (!res.ok) {
398
+ const errorText = await res.text();
399
+ throw new Error(
400
+ `heygen talking photo upload failed (${res.status}): ${errorText}`,
401
+ );
402
+ }
403
+
404
+ const data = (await res.json()) as {
405
+ data?: { talking_photo_id?: string };
406
+ };
407
+ const talkingPhotoId = data.data?.talking_photo_id;
408
+
409
+ if (!talkingPhotoId) {
410
+ throw new Error("no talking_photo_id in heygen upload response");
411
+ }
412
+
413
+ console.log(`[heygen] uploaded talking photo: ${talkingPhotoId}`);
414
+ return talkingPhotoId;
415
+ }
416
+
417
+ /**
418
+ * Upload an asset (image/audio/video) to HeyGen.
419
+ * Note: uses upload.heygen.com, not api.heygen.com.
420
+ */
421
+ async uploadAsset(
422
+ file: Uint8Array | ArrayBuffer,
423
+ contentType: string,
424
+ ): Promise<{ assetId: string; url: string }> {
425
+ console.log(`[heygen] uploading asset (${contentType})...`);
426
+
427
+ const body = file instanceof Uint8Array ? file : new Uint8Array(file);
428
+
429
+ const res = await fetch(`${HEYGEN_UPLOAD_BASE}/v1/asset`, {
430
+ method: "POST",
431
+ headers: {
432
+ "X-Api-Key": this.apiKey,
433
+ "Content-Type": contentType,
434
+ },
435
+ body,
436
+ });
437
+
438
+ if (!res.ok) {
439
+ const errorText = await res.text();
440
+ throw new Error(
441
+ `heygen asset upload failed (${res.status}): ${errorText}`,
442
+ );
443
+ }
444
+
445
+ const data = (await res.json()) as HeyGenAssetUploadResponse;
446
+ const assetId = data.data?.id;
447
+ const url = data.data?.url;
448
+
449
+ if (!assetId) {
450
+ throw new Error("no asset id in heygen upload response");
451
+ }
452
+
453
+ console.log(`[heygen] uploaded asset: ${assetId}`);
454
+ return { assetId, url: url ?? "" };
455
+ }
456
+
457
+ /**
458
+ * Get remaining API quota.
459
+ */
460
+ async getRemainingQuota(): Promise<number> {
461
+ const res = await fetch(`${HEYGEN_API_BASE}/v2/user/remaining_quota`, {
462
+ headers: {
463
+ "X-Api-Key": this.apiKey,
464
+ Accept: "application/json",
465
+ },
466
+ });
467
+
468
+ if (!res.ok) {
469
+ throw new Error(`heygen quota check failed (${res.status})`);
470
+ }
471
+
472
+ const data = (await res.json()) as {
473
+ data?: { remaining_quota?: number };
474
+ };
475
+ return data.data?.remaining_quota ?? 0;
476
+ }
477
+ }
478
+
479
+ // Export singleton instance
480
+ export const heygenProvider = new HeyGenProvider();
@@ -86,6 +86,8 @@ export {
86
86
  listModels,
87
87
  transcribeAudio,
88
88
  } from "./groq";
89
+ // HeyGen provider (AI avatar video generation)
90
+ export { HeyGenProvider, heygenProvider } from "./heygen";
89
91
  // Higgsfield provider (Soul image generation)
90
92
  export {
91
93
  BatchSize,
@@ -129,6 +131,7 @@ import { falProvider } from "./fal";
129
131
  import { ffmpegProvider } from "./ffmpeg";
130
132
  import { fireworksProvider } from "./fireworks";
131
133
  import { groqProvider } from "./groq";
134
+ import { heygenProvider } from "./heygen";
132
135
  import { higgsfieldProvider } from "./higgsfield";
133
136
  import { piapiProvider } from "./piapi";
134
137
  import { replicateProvider } from "./replicate";
@@ -142,6 +145,7 @@ providers.register(elevenlabsProvider);
142
145
  providers.register(groqProvider);
143
146
  providers.register(fireworksProvider);
144
147
  providers.register(higgsfieldProvider);
148
+ providers.register(heygenProvider);
145
149
  providers.register(piapiProvider);
146
150
  providers.register(ffmpegProvider);
147
151
  providers.register(storageProvider);