@mixio-pro/kalaasetu-mcp 1.0.6 → 1.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -2
- package/package.json +3 -1
- package/src/storage/gcs.ts +116 -0
- package/src/storage/index.ts +11 -8
- package/src/storage/interface.ts +1 -1
- package/src/storage/local.ts +0 -2
- package/src/tools/gemini.ts +55 -27
- package/src/tools/hunyuan-avatar.ts +83 -25
- package/src/tools/image-to-video.ts +14 -9
- package/src/tools/infinitalk.ts +87 -27
- package/src/storage/payload.ts +0 -46
package/README.md
CHANGED
|
@@ -68,7 +68,9 @@ Add to your Cursor settings (`~/.cursor/config.json` or via Settings → MCP):
|
|
|
68
68
|
"env": {
|
|
69
69
|
"GEMINI_API_KEY": "your-gemini-api-key",
|
|
70
70
|
"FAL_KEY": "your-fal-api-key",
|
|
71
|
-
"PERPLEXITY_API_KEY": "your-perplexity-api-key"
|
|
71
|
+
"PERPLEXITY_API_KEY": "your-perplexity-api-key",
|
|
72
|
+
"STORAGE_PROVIDER":"gcs",
|
|
73
|
+
"GCS_BUCKET":"your-gcs-bucket-name"
|
|
72
74
|
}
|
|
73
75
|
}
|
|
74
76
|
}
|
|
@@ -88,7 +90,9 @@ Add to your OpenCode MCP configuration:
|
|
|
88
90
|
"environment": {
|
|
89
91
|
"GEMINI_API_KEY": "your-gemini-api-key",
|
|
90
92
|
"FAL_KEY": "your-fal-api-key",
|
|
91
|
-
"PERPLEXITY_API_KEY": "your-perplexity-api-key"
|
|
93
|
+
"PERPLEXITY_API_KEY": "your-perplexity-api-key",
|
|
94
|
+
"STORAGE_PROVIDER":"gcs",
|
|
95
|
+
"GCS_BUCKET":"your-bucket-name"
|
|
92
96
|
}
|
|
93
97
|
}
|
|
94
98
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@mixio-pro/kalaasetu-mcp",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.8",
|
|
4
4
|
"description": "A powerful Model Context Protocol server providing AI tools for content generation and analysis",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"module": "src/index.ts",
|
|
@@ -49,8 +49,10 @@
|
|
|
49
49
|
"dependencies": {
|
|
50
50
|
"@fal-ai/client": "^1.7.2",
|
|
51
51
|
"@google/genai": "^1.28.0",
|
|
52
|
+
"@types/node": "^24.10.1",
|
|
52
53
|
"@types/wav": "^1.0.4",
|
|
53
54
|
"fastmcp": "^3.22.0",
|
|
55
|
+
"form-data": "^4.0.5",
|
|
54
56
|
"google-auth-library": "^10.5.0",
|
|
55
57
|
"wav": "^1.0.2",
|
|
56
58
|
"zod": "^4.1.12"
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
import { GoogleAuth } from "google-auth-library";
|
|
2
|
+
import type { StorageProvider } from "./interface";
|
|
3
|
+
import * as path from "path";
|
|
4
|
+
|
|
5
|
+
export class GCSStorageProvider implements StorageProvider {
|
|
6
|
+
private bucket: string;
|
|
7
|
+
private auth: GoogleAuth;
|
|
8
|
+
|
|
9
|
+
constructor(bucket: string) {
|
|
10
|
+
this.bucket = bucket;
|
|
11
|
+
this.auth = new GoogleAuth({
|
|
12
|
+
scopes: ["https://www.googleapis.com/auth/cloud-platform"],
|
|
13
|
+
});
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
async init(): Promise<void> {
|
|
17
|
+
console.log(
|
|
18
|
+
`Initializing GCS Storage Provider with bucket: ${this.bucket}`
|
|
19
|
+
);
|
|
20
|
+
// Verify we can get credentials
|
|
21
|
+
try {
|
|
22
|
+
await this.auth.getClient();
|
|
23
|
+
} catch (error) {
|
|
24
|
+
console.warn(`Warning: Could not initialize GCS client: ${error}`);
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
private async getAccessToken(): Promise<string> {
|
|
29
|
+
const client = await this.auth.getClient();
|
|
30
|
+
const token = await client.getAccessToken();
|
|
31
|
+
if (!token.token) {
|
|
32
|
+
throw new Error("Failed to get GCS access token");
|
|
33
|
+
}
|
|
34
|
+
return token.token;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
async readFile(filePath: string): Promise<Buffer> {
|
|
38
|
+
const objectName = path.basename(filePath);
|
|
39
|
+
const url = `https://storage.googleapis.com/storage/v1/b/${
|
|
40
|
+
this.bucket
|
|
41
|
+
}/o/${encodeURIComponent(objectName)}?alt=media`;
|
|
42
|
+
|
|
43
|
+
const token = await this.getAccessToken();
|
|
44
|
+
const response = await fetch(url, {
|
|
45
|
+
headers: {
|
|
46
|
+
Authorization: `Bearer ${token}`,
|
|
47
|
+
},
|
|
48
|
+
});
|
|
49
|
+
|
|
50
|
+
if (!response.ok) {
|
|
51
|
+
throw new Error(
|
|
52
|
+
`Failed to read file from GCS: ${response.status} ${response.statusText}`
|
|
53
|
+
);
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
const arrayBuffer = await response.arrayBuffer();
|
|
57
|
+
return Buffer.from(arrayBuffer);
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
async writeFile(filePath: string, data: Buffer | string): Promise<string> {
|
|
61
|
+
const objectName = path.basename(filePath);
|
|
62
|
+
const buffer = Buffer.isBuffer(data) ? data : Buffer.from(data);
|
|
63
|
+
|
|
64
|
+
// Upload using JSON API
|
|
65
|
+
const url = `https://storage.googleapis.com/upload/storage/v1/b/${
|
|
66
|
+
this.bucket
|
|
67
|
+
}/o?uploadType=media&name=${encodeURIComponent(objectName)}`;
|
|
68
|
+
|
|
69
|
+
const token = await this.getAccessToken();
|
|
70
|
+
const response = await fetch(url, {
|
|
71
|
+
method: "POST",
|
|
72
|
+
headers: {
|
|
73
|
+
Authorization: `Bearer ${token}`,
|
|
74
|
+
"Content-Type": "application/octet-stream",
|
|
75
|
+
"Content-Length": buffer.length.toString(),
|
|
76
|
+
},
|
|
77
|
+
body: buffer,
|
|
78
|
+
});
|
|
79
|
+
|
|
80
|
+
if (!response.ok) {
|
|
81
|
+
const errorText = await response.text();
|
|
82
|
+
throw new Error(
|
|
83
|
+
`Failed to upload to GCS: ${response.status} ${errorText}`
|
|
84
|
+
);
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
// Return public URL
|
|
88
|
+
return `https://storage.googleapis.com/${this.bucket}/${objectName}`;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
async exists(filePath: string): Promise<boolean> {
|
|
92
|
+
try {
|
|
93
|
+
const objectName = path.basename(filePath);
|
|
94
|
+
const url = `https://storage.googleapis.com/storage/v1/b/${
|
|
95
|
+
this.bucket
|
|
96
|
+
}/o/${encodeURIComponent(objectName)}`;
|
|
97
|
+
|
|
98
|
+
const token = await this.getAccessToken();
|
|
99
|
+
const response = await fetch(url, {
|
|
100
|
+
method: "GET",
|
|
101
|
+
headers: {
|
|
102
|
+
Authorization: `Bearer ${token}`,
|
|
103
|
+
},
|
|
104
|
+
});
|
|
105
|
+
|
|
106
|
+
return response.ok;
|
|
107
|
+
} catch {
|
|
108
|
+
return false;
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
async getPublicUrl(filePath: string): Promise<string> {
|
|
113
|
+
const objectName = path.basename(filePath);
|
|
114
|
+
return `https://storage.googleapis.com/${this.bucket}/${objectName}`;
|
|
115
|
+
}
|
|
116
|
+
}
|
package/src/storage/index.ts
CHANGED
|
@@ -1,24 +1,27 @@
|
|
|
1
1
|
import type { StorageProvider } from "./interface";
|
|
2
2
|
import { LocalStorageProvider } from "./local";
|
|
3
|
-
import {
|
|
3
|
+
import { GCSStorageProvider } from "./gcs";
|
|
4
4
|
|
|
5
5
|
let storageInstance: StorageProvider | null = null;
|
|
6
6
|
|
|
7
7
|
export function getStorage(): StorageProvider {
|
|
8
8
|
if (!storageInstance) {
|
|
9
9
|
const type = process.env.STORAGE_PROVIDER || "local";
|
|
10
|
-
console.error(`Initializing storage provider: ${type}`);
|
|
10
|
+
console.error(`Initializing storage provider: ${type}`);
|
|
11
11
|
|
|
12
|
-
if (type === "
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
12
|
+
if (type === "gcs") {
|
|
13
|
+
const bucket = process.env.GCS_BUCKET;
|
|
14
|
+
|
|
15
|
+
if (!bucket) {
|
|
16
|
+
throw new Error("GCS_BUCKET is required when using gcs storage");
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
storageInstance = new GCSStorageProvider(bucket);
|
|
17
20
|
} else {
|
|
18
21
|
storageInstance = new LocalStorageProvider(process.cwd());
|
|
19
22
|
}
|
|
20
23
|
|
|
21
|
-
// Initialize async
|
|
24
|
+
// Initialize async
|
|
22
25
|
storageInstance
|
|
23
26
|
.init()
|
|
24
27
|
.catch((err) => console.error("Failed to init storage:", err));
|
package/src/storage/interface.ts
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
export interface StorageProvider {
|
|
2
2
|
init(): Promise<void>;
|
|
3
3
|
readFile(path: string): Promise<Buffer>;
|
|
4
|
-
writeFile(path: string, data: Buffer | string): Promise<string>; // Returns
|
|
4
|
+
writeFile(path: string, data: Buffer | string): Promise<string>; // Returns public URL
|
|
5
5
|
exists(path: string): Promise<boolean>;
|
|
6
6
|
getPublicUrl(path: string): Promise<string>;
|
|
7
7
|
}
|
package/src/storage/local.ts
CHANGED
|
@@ -14,7 +14,6 @@ export class LocalStorageProvider implements StorageProvider {
|
|
|
14
14
|
}
|
|
15
15
|
|
|
16
16
|
async readFile(filePath: string): Promise<Buffer> {
|
|
17
|
-
// Handle absolute paths by checking if it starts with basePath or just use it if it exists
|
|
18
17
|
let fullPath = filePath;
|
|
19
18
|
if (!path.isAbsolute(filePath)) {
|
|
20
19
|
fullPath = path.resolve(this.basePath, filePath);
|
|
@@ -45,7 +44,6 @@ export class LocalStorageProvider implements StorageProvider {
|
|
|
45
44
|
}
|
|
46
45
|
|
|
47
46
|
async getPublicUrl(filePath: string): Promise<string> {
|
|
48
|
-
// For local, we just return the absolute path
|
|
49
47
|
let fullPath = filePath;
|
|
50
48
|
if (!path.isAbsolute(filePath)) {
|
|
51
49
|
fullPath = path.resolve(this.basePath, filePath);
|
package/src/tools/gemini.ts
CHANGED
|
@@ -164,7 +164,7 @@ async function processVideoInput(
|
|
|
164
164
|
}
|
|
165
165
|
|
|
166
166
|
export const geminiTextToImage = {
|
|
167
|
-
name: "
|
|
167
|
+
name: "generateImage",
|
|
168
168
|
description:
|
|
169
169
|
"Generate images from text prompts using Gemini 2.5 Flash Image model",
|
|
170
170
|
parameters: z.object({
|
|
@@ -194,30 +194,41 @@ export const geminiTextToImage = {
|
|
|
194
194
|
},
|
|
195
195
|
});
|
|
196
196
|
|
|
197
|
-
|
|
197
|
+
const images = [];
|
|
198
|
+
let textResponse = "";
|
|
199
|
+
|
|
198
200
|
if (response.candidates && response.candidates[0]?.content?.parts) {
|
|
199
201
|
for (const part of response.candidates[0].content.parts) {
|
|
200
202
|
if (part.text) {
|
|
201
|
-
|
|
203
|
+
textResponse += part.text;
|
|
202
204
|
} else if (part.inlineData?.data) {
|
|
203
205
|
const imageData = part.inlineData.data;
|
|
204
206
|
if (args.output_path) {
|
|
205
207
|
const storage = getStorage();
|
|
206
|
-
await storage.writeFile(
|
|
208
|
+
const url = await storage.writeFile(
|
|
207
209
|
args.output_path,
|
|
208
210
|
Buffer.from(imageData, "base64")
|
|
209
211
|
);
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
)}...`;
|
|
212
|
+
images.push({
|
|
213
|
+
url,
|
|
214
|
+
filename: args.output_path,
|
|
215
|
+
mimeType: "image/png",
|
|
216
|
+
});
|
|
216
217
|
}
|
|
217
218
|
}
|
|
218
219
|
}
|
|
219
220
|
}
|
|
220
|
-
|
|
221
|
+
|
|
222
|
+
if (images.length > 0) {
|
|
223
|
+
return JSON.stringify({
|
|
224
|
+
images,
|
|
225
|
+
message: textResponse || "Image generated successfully",
|
|
226
|
+
});
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
return (
|
|
230
|
+
textResponse || "Image generation completed but no response received"
|
|
231
|
+
);
|
|
221
232
|
} catch (error: any) {
|
|
222
233
|
throw new Error(`Image generation failed: ${error.message}`);
|
|
223
234
|
}
|
|
@@ -225,7 +236,7 @@ export const geminiTextToImage = {
|
|
|
225
236
|
};
|
|
226
237
|
|
|
227
238
|
export const geminiEditImage = {
|
|
228
|
-
name: "
|
|
239
|
+
name: "editImage",
|
|
229
240
|
description:
|
|
230
241
|
"Edit existing images with text instructions using Gemini 2.5 Flash Image Preview",
|
|
231
242
|
parameters: z.object({
|
|
@@ -261,30 +272,39 @@ export const geminiEditImage = {
|
|
|
261
272
|
contents: contents,
|
|
262
273
|
});
|
|
263
274
|
|
|
264
|
-
|
|
275
|
+
const images = [];
|
|
276
|
+
let textResponse = "";
|
|
277
|
+
|
|
265
278
|
if (response.candidates && response.candidates[0]?.content?.parts) {
|
|
266
279
|
for (const part of response.candidates[0].content.parts) {
|
|
267
280
|
if (part.text) {
|
|
268
|
-
|
|
281
|
+
textResponse += part.text;
|
|
269
282
|
} else if (part.inlineData?.data) {
|
|
270
283
|
const imageData = part.inlineData.data;
|
|
271
284
|
if (args.output_path) {
|
|
272
285
|
const storage = getStorage();
|
|
273
|
-
await storage.writeFile(
|
|
286
|
+
const url = await storage.writeFile(
|
|
274
287
|
args.output_path,
|
|
275
288
|
Buffer.from(imageData, "base64")
|
|
276
289
|
);
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
)}...`;
|
|
290
|
+
images.push({
|
|
291
|
+
url,
|
|
292
|
+
filename: args.output_path,
|
|
293
|
+
mimeType: "image/png",
|
|
294
|
+
});
|
|
283
295
|
}
|
|
284
296
|
}
|
|
285
297
|
}
|
|
286
298
|
}
|
|
287
|
-
|
|
299
|
+
|
|
300
|
+
if (images.length > 0) {
|
|
301
|
+
return JSON.stringify({
|
|
302
|
+
images,
|
|
303
|
+
message: textResponse || "Image edited successfully",
|
|
304
|
+
});
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
return textResponse || "Image editing completed but no response received";
|
|
288
308
|
} catch (error: any) {
|
|
289
309
|
throw new Error(`Image editing failed: ${error.message}`);
|
|
290
310
|
}
|
|
@@ -292,7 +312,7 @@ export const geminiEditImage = {
|
|
|
292
312
|
};
|
|
293
313
|
|
|
294
314
|
export const geminiAnalyzeImages = {
|
|
295
|
-
name: "
|
|
315
|
+
name: "analyzeImages",
|
|
296
316
|
description:
|
|
297
317
|
"Analyze and describe images using Gemini 2.5 Pro with advanced multimodal understanding",
|
|
298
318
|
parameters: z.object({
|
|
@@ -358,7 +378,7 @@ export const geminiAnalyzeImages = {
|
|
|
358
378
|
};
|
|
359
379
|
|
|
360
380
|
export const geminiSingleSpeakerTts = {
|
|
361
|
-
name: "
|
|
381
|
+
name: "generateSpeech",
|
|
362
382
|
description:
|
|
363
383
|
"Generate single speaker voice audio from text using Gemini 2.5 Pro Preview TTS model",
|
|
364
384
|
parameters: z.object({
|
|
@@ -407,9 +427,17 @@ export const geminiSingleSpeakerTts = {
|
|
|
407
427
|
// Generate output filename if not provided
|
|
408
428
|
const outputPath = args.output_path || `voice_output_${Date.now()}.wav`;
|
|
409
429
|
|
|
410
|
-
|
|
430
|
+
const storage = getStorage();
|
|
431
|
+
const url = await storage.writeFile(outputPath, audioBuffer);
|
|
411
432
|
|
|
412
|
-
return
|
|
433
|
+
return JSON.stringify({
|
|
434
|
+
audio: {
|
|
435
|
+
url,
|
|
436
|
+
filename: outputPath,
|
|
437
|
+
mimeType: "audio/wav",
|
|
438
|
+
},
|
|
439
|
+
message: "Audio generated successfully",
|
|
440
|
+
});
|
|
413
441
|
} catch (error: any) {
|
|
414
442
|
throw new Error(`Voice generation failed: ${error.message}`);
|
|
415
443
|
}
|
|
@@ -417,7 +445,7 @@ export const geminiSingleSpeakerTts = {
|
|
|
417
445
|
};
|
|
418
446
|
|
|
419
447
|
export const geminiAnalyzeVideos = {
|
|
420
|
-
name: "
|
|
448
|
+
name: "analyzeVideos",
|
|
421
449
|
description:
|
|
422
450
|
"Analyze and understand video content using Gemini 2.5 Flash model. Intelligently handles YouTube URLs and local videos (files <20MB processed inline, ≥20MB uploaded via File API). Supports timestamp queries, clipping, and custom frame rates with default 5 FPS for local videos to optimize processing.",
|
|
423
451
|
parameters: z.object({
|
|
@@ -5,10 +5,12 @@ import { callFalModel } from "../utils/fal.utils";
|
|
|
5
5
|
* Calculate number of frames based on audio duration at 25 FPS
|
|
6
6
|
* Adds 1 second buffer to ensure complete audio coverage
|
|
7
7
|
*/
|
|
8
|
-
function calculateFramesFromAudioDuration(
|
|
8
|
+
function calculateFramesFromAudioDuration(
|
|
9
|
+
audioDurationSeconds: number
|
|
10
|
+
): number {
|
|
9
11
|
const totalDuration = audioDurationSeconds + 1; // Add 1 second buffer
|
|
10
12
|
const frames = Math.round(totalDuration * 25); // 25 FPS
|
|
11
|
-
|
|
13
|
+
|
|
12
14
|
// Clamp to valid range (129-401 frames)
|
|
13
15
|
return Math.max(129, Math.min(401, frames));
|
|
14
16
|
}
|
|
@@ -18,17 +20,52 @@ function calculateFramesFromAudioDuration(audioDurationSeconds: number): number
|
|
|
18
20
|
*/
|
|
19
21
|
export const hunyuanAvatar = {
|
|
20
22
|
name: "hunyuan_avatar",
|
|
21
|
-
description:
|
|
23
|
+
description:
|
|
24
|
+
"Generate high-fidelity audio-driven human animation videos using FAL AI Hunyuan Avatar. Creates realistic talking avatar animations from an image and audio file.",
|
|
22
25
|
parameters: z.object({
|
|
23
|
-
image_url: z
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
26
|
+
image_url: z
|
|
27
|
+
.string()
|
|
28
|
+
.describe("Public URL of the reference image for the avatar."),
|
|
29
|
+
audio_url: z
|
|
30
|
+
.string()
|
|
31
|
+
.describe("Public URL of the audio file to drive the animation."),
|
|
32
|
+
audio_duration_seconds: z
|
|
33
|
+
.number()
|
|
34
|
+
.optional()
|
|
35
|
+
.describe(
|
|
36
|
+
"Duration of the audio in seconds. If provided, will automatically calculate optimal frames (audio duration + 1 second buffer at 25 FPS)."
|
|
37
|
+
),
|
|
38
|
+
text: z
|
|
39
|
+
.string()
|
|
40
|
+
.optional()
|
|
41
|
+
.describe(
|
|
42
|
+
"Text prompt describing the scene. Default: 'A cat is singing.'"
|
|
43
|
+
),
|
|
44
|
+
num_frames: z
|
|
45
|
+
.number()
|
|
46
|
+
.optional()
|
|
47
|
+
.describe(
|
|
48
|
+
"Number of video frames to generate at 25 FPS. Range: 129 to 401. If not provided and audio_duration_seconds is given, will be calculated automatically. Default: 129"
|
|
49
|
+
),
|
|
50
|
+
num_inference_steps: z
|
|
51
|
+
.number()
|
|
52
|
+
.optional()
|
|
53
|
+
.describe(
|
|
54
|
+
"Number of inference steps for sampling. Higher values give better quality but take longer. Range: 30 to 50. Default: 30"
|
|
55
|
+
),
|
|
56
|
+
turbo_mode: z
|
|
57
|
+
.boolean()
|
|
58
|
+
.optional()
|
|
59
|
+
.describe(
|
|
60
|
+
"If true, the video will be generated faster with no noticeable degradation in visual quality. Default: true"
|
|
61
|
+
),
|
|
30
62
|
seed: z.number().optional().describe("Random seed for generation."),
|
|
31
|
-
fal_key: z
|
|
63
|
+
fal_key: z
|
|
64
|
+
.string()
|
|
65
|
+
.optional()
|
|
66
|
+
.describe(
|
|
67
|
+
"FAL API key. If not provided, will use FAL_KEY environment variable."
|
|
68
|
+
),
|
|
32
69
|
}),
|
|
33
70
|
execute: async (args: {
|
|
34
71
|
image_url: string;
|
|
@@ -43,17 +80,28 @@ export const hunyuanAvatar = {
|
|
|
43
80
|
}) => {
|
|
44
81
|
// Calculate frames from audio duration if provided and num_frames not specified
|
|
45
82
|
let calculatedFrames = args.num_frames;
|
|
46
|
-
if (
|
|
47
|
-
|
|
83
|
+
if (
|
|
84
|
+
args.audio_duration_seconds !== undefined &&
|
|
85
|
+
args.num_frames === undefined
|
|
86
|
+
) {
|
|
87
|
+
calculatedFrames = calculateFramesFromAudioDuration(
|
|
88
|
+
args.audio_duration_seconds
|
|
89
|
+
);
|
|
48
90
|
}
|
|
49
91
|
|
|
50
92
|
// Validate num_frames range if provided
|
|
51
|
-
if (
|
|
93
|
+
if (
|
|
94
|
+
calculatedFrames !== undefined &&
|
|
95
|
+
(calculatedFrames < 129 || calculatedFrames > 401)
|
|
96
|
+
) {
|
|
52
97
|
throw new Error("num_frames must be between 129 and 401");
|
|
53
98
|
}
|
|
54
99
|
|
|
55
100
|
// Validate num_inference_steps range if provided
|
|
56
|
-
if (
|
|
101
|
+
if (
|
|
102
|
+
args.num_inference_steps !== undefined &&
|
|
103
|
+
(args.num_inference_steps < 30 || args.num_inference_steps > 50)
|
|
104
|
+
) {
|
|
57
105
|
throw new Error("num_inference_steps must be between 30 and 50");
|
|
58
106
|
}
|
|
59
107
|
|
|
@@ -80,23 +128,33 @@ export const hunyuanAvatar = {
|
|
|
80
128
|
input.seed = args.seed;
|
|
81
129
|
}
|
|
82
130
|
|
|
83
|
-
const result = await callFalModel("fal-ai/hunyuan-avatar", input, {
|
|
131
|
+
const result = await callFalModel("fal-ai/hunyuan-avatar", input, {
|
|
132
|
+
falKey: args.fal_key,
|
|
133
|
+
});
|
|
84
134
|
|
|
85
135
|
// Extract video data from the response
|
|
86
136
|
const videoData = result.data?.video;
|
|
87
137
|
|
|
88
138
|
if (!videoData || !videoData.url) {
|
|
89
|
-
throw new Error(
|
|
139
|
+
throw new Error(
|
|
140
|
+
`No video data in completed response: ${JSON.stringify(result.data)}`
|
|
141
|
+
);
|
|
90
142
|
}
|
|
91
143
|
|
|
92
144
|
const videoUrl = videoData.url;
|
|
93
|
-
const
|
|
94
|
-
? `\nFile: ${videoData.file_name} (${(videoData.file_size / 1024 / 1024).toFixed(2)} MB)`
|
|
95
|
-
: "";
|
|
96
|
-
const requestIdInfo = result.requestId ? `\nRequest ID: ${result.requestId}` : "";
|
|
97
|
-
|
|
145
|
+
const fileName = videoData.file_name || "hunyuan_avatar.mp4";
|
|
98
146
|
|
|
99
|
-
return
|
|
100
|
-
|
|
147
|
+
return JSON.stringify({
|
|
148
|
+
videos: [
|
|
149
|
+
{
|
|
150
|
+
url: videoUrl,
|
|
151
|
+
filename: fileName,
|
|
152
|
+
mimeType: "video/mp4",
|
|
153
|
+
filesize: videoData.file_size,
|
|
154
|
+
},
|
|
155
|
+
],
|
|
156
|
+
message: "Hunyuan Avatar video generated successfully",
|
|
157
|
+
requestId: result.requestId,
|
|
158
|
+
});
|
|
101
159
|
},
|
|
102
|
-
};
|
|
160
|
+
};
|
|
@@ -64,7 +64,7 @@ async function fileToBase64(
|
|
|
64
64
|
}
|
|
65
65
|
|
|
66
66
|
export const imageToVideo = {
|
|
67
|
-
name: "
|
|
67
|
+
name: "generateVideoi2v",
|
|
68
68
|
description:
|
|
69
69
|
"Generate videos from an image as starting first frame using Vertex Veo models (predictLongRunning + fetchPredictOperation).",
|
|
70
70
|
parameters: z.object({
|
|
@@ -283,7 +283,8 @@ export const imageToVideo = {
|
|
|
283
283
|
|
|
284
284
|
const resp = current.response || current;
|
|
285
285
|
// Decode from response.videos[].bytesBase64Encoded only
|
|
286
|
-
const
|
|
286
|
+
const videos: Array<{ url: string; filename: string; mimeType: string }> =
|
|
287
|
+
[];
|
|
287
288
|
const saveVideo = async (base64: string, index: number) => {
|
|
288
289
|
if (!base64) return;
|
|
289
290
|
const filePath = args.output_path
|
|
@@ -291,14 +292,15 @@ export const imageToVideo = {
|
|
|
291
292
|
? args.output_path
|
|
292
293
|
: args.output_path.replace(/\.mp4$/i, `_${index}.mp4`)
|
|
293
294
|
: `video_output_${Date.now()}${index === 0 ? "" : "_" + index}.mp4`;
|
|
294
|
-
// For storage provider, we use the path as is (relative or absolute)
|
|
295
|
-
// If using LocalStorage, it handles resolving.
|
|
296
|
-
// If using Payload, it handles the key.
|
|
297
295
|
|
|
298
296
|
const buf = Buffer.from(base64, "base64");
|
|
299
297
|
const storage = getStorage();
|
|
300
|
-
await storage.writeFile(filePath, buf);
|
|
301
|
-
|
|
298
|
+
const url = await storage.writeFile(filePath, buf);
|
|
299
|
+
videos.push({
|
|
300
|
+
url,
|
|
301
|
+
filename: filePath,
|
|
302
|
+
mimeType: "video/mp4",
|
|
303
|
+
});
|
|
302
304
|
};
|
|
303
305
|
|
|
304
306
|
if (Array.isArray(resp?.videos) && resp.videos.length > 0) {
|
|
@@ -309,8 +311,11 @@ export const imageToVideo = {
|
|
|
309
311
|
}
|
|
310
312
|
}
|
|
311
313
|
}
|
|
312
|
-
if (
|
|
313
|
-
return
|
|
314
|
+
if (videos.length > 0) {
|
|
315
|
+
return JSON.stringify({
|
|
316
|
+
videos,
|
|
317
|
+
message: "Video(s) generated successfully",
|
|
318
|
+
});
|
|
314
319
|
}
|
|
315
320
|
|
|
316
321
|
// If nothing saved, return a concise summary plus head/tail snippets of JSON
|
package/src/tools/infinitalk.ts
CHANGED
|
@@ -5,10 +5,12 @@ import { callFalModel } from "../utils/fal.utils";
|
|
|
5
5
|
* Calculate number of frames based on audio duration at 25 FPS
|
|
6
6
|
* Adds 1 second buffer to ensure complete audio coverage
|
|
7
7
|
*/
|
|
8
|
-
function calculateFramesFromAudioDuration(
|
|
8
|
+
function calculateFramesFromAudioDuration(
|
|
9
|
+
audioDurationSeconds: number
|
|
10
|
+
): number {
|
|
9
11
|
const totalDuration = audioDurationSeconds + 1; // Add 1 second buffer
|
|
10
12
|
const frames = Math.round(totalDuration * 25); // 25 FPS
|
|
11
|
-
|
|
13
|
+
|
|
12
14
|
// Clamp to valid range (41-721 frames)
|
|
13
15
|
return Math.max(41, Math.min(721, frames));
|
|
14
16
|
}
|
|
@@ -18,17 +20,56 @@ function calculateFramesFromAudioDuration(audioDurationSeconds: number): number
|
|
|
18
20
|
*/
|
|
19
21
|
export const infinitalk = {
|
|
20
22
|
name: "infinitalk",
|
|
21
|
-
description:
|
|
23
|
+
description:
|
|
24
|
+
"Generate a talking avatar video from an image and audio file using FAL AI Infinitalk. The avatar lip-syncs to the provided audio with natural facial expressions.",
|
|
22
25
|
parameters: z.object({
|
|
23
|
-
image_url: z
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
26
|
+
image_url: z
|
|
27
|
+
.string()
|
|
28
|
+
.describe(
|
|
29
|
+
"Public URL of the input image. If the input image does not match the chosen aspect ratio, it is resized and center cropped."
|
|
30
|
+
),
|
|
31
|
+
audio_url: z
|
|
32
|
+
.string()
|
|
33
|
+
.describe("The Public URL of the audio file for lip-sync generation."),
|
|
34
|
+
audio_duration_seconds: z
|
|
35
|
+
.number()
|
|
36
|
+
.optional()
|
|
37
|
+
.describe(
|
|
38
|
+
"Duration of the audio in seconds. If provided, will automatically calculate optimal frames (audio duration + 1 second buffer at 25 FPS)."
|
|
39
|
+
),
|
|
40
|
+
prompt: z
|
|
41
|
+
.string()
|
|
42
|
+
.describe(
|
|
43
|
+
"The text prompt to guide video generation (e.g., 'A woman with colorful hair talking on a podcast')"
|
|
44
|
+
),
|
|
45
|
+
num_frames: z
|
|
46
|
+
.number()
|
|
47
|
+
.optional()
|
|
48
|
+
.describe(
|
|
49
|
+
"Number of frames to generate. Must be between 41 to 721. If not provided and audio_duration_seconds is given, will be calculated automatically. Default: 145"
|
|
50
|
+
),
|
|
51
|
+
resolution: z
|
|
52
|
+
.enum(["480p", "720p"])
|
|
53
|
+
.optional()
|
|
54
|
+
.describe("Resolution of the video to generate. Default: '480p'"),
|
|
55
|
+
seed: z
|
|
56
|
+
.number()
|
|
57
|
+
.optional()
|
|
58
|
+
.describe(
|
|
59
|
+
"Random seed for reproducibility. If not provided, a random seed is chosen. Default: 42"
|
|
60
|
+
),
|
|
61
|
+
acceleration: z
|
|
62
|
+
.enum(["none", "regular", "high"])
|
|
63
|
+
.optional()
|
|
64
|
+
.describe(
|
|
65
|
+
"The acceleration level to use for generation. Default: 'regular'"
|
|
66
|
+
),
|
|
67
|
+
fal_key: z
|
|
68
|
+
.string()
|
|
69
|
+
.optional()
|
|
70
|
+
.describe(
|
|
71
|
+
"FAL API key. If not provided, will use FAL_KEY environment variable."
|
|
72
|
+
),
|
|
32
73
|
}),
|
|
33
74
|
execute: async (args: {
|
|
34
75
|
image_url: string;
|
|
@@ -43,12 +84,20 @@ export const infinitalk = {
|
|
|
43
84
|
}) => {
|
|
44
85
|
// Calculate frames from audio duration if provided and num_frames not specified
|
|
45
86
|
let calculatedFrames = args.num_frames;
|
|
46
|
-
if (
|
|
47
|
-
|
|
87
|
+
if (
|
|
88
|
+
args.audio_duration_seconds !== undefined &&
|
|
89
|
+
args.num_frames === undefined
|
|
90
|
+
) {
|
|
91
|
+
calculatedFrames = calculateFramesFromAudioDuration(
|
|
92
|
+
args.audio_duration_seconds
|
|
93
|
+
);
|
|
48
94
|
}
|
|
49
95
|
|
|
50
96
|
// Validate num_frames range if provided
|
|
51
|
-
if (
|
|
97
|
+
if (
|
|
98
|
+
calculatedFrames !== undefined &&
|
|
99
|
+
(calculatedFrames < 41 || calculatedFrames > 721)
|
|
100
|
+
) {
|
|
52
101
|
throw new Error("num_frames must be between 41 and 721");
|
|
53
102
|
}
|
|
54
103
|
|
|
@@ -63,9 +112,9 @@ export const infinitalk = {
|
|
|
63
112
|
if (calculatedFrames !== undefined) {
|
|
64
113
|
input.num_frames = calculatedFrames;
|
|
65
114
|
}
|
|
66
|
-
|
|
67
|
-
input.resolution = args.resolution ||
|
|
68
|
-
|
|
115
|
+
|
|
116
|
+
input.resolution = args.resolution || "480p";
|
|
117
|
+
|
|
69
118
|
if (args.seed !== undefined) {
|
|
70
119
|
input.seed = args.seed;
|
|
71
120
|
}
|
|
@@ -73,24 +122,35 @@ export const infinitalk = {
|
|
|
73
122
|
input.acceleration = args.acceleration;
|
|
74
123
|
}
|
|
75
124
|
|
|
76
|
-
const result = await callFalModel("fal-ai/infinitalk", input, {
|
|
125
|
+
const result = await callFalModel("fal-ai/infinitalk", input, {
|
|
126
|
+
falKey: args.fal_key,
|
|
127
|
+
});
|
|
77
128
|
|
|
78
129
|
// Extract video data from the response
|
|
79
130
|
const videoData = result.data?.video;
|
|
80
131
|
const seed = result.data?.seed;
|
|
81
132
|
|
|
82
133
|
if (!videoData || !videoData.url) {
|
|
83
|
-
throw new Error(
|
|
134
|
+
throw new Error(
|
|
135
|
+
`No video data in completed response: ${JSON.stringify(result.data)}`
|
|
136
|
+
);
|
|
84
137
|
}
|
|
85
138
|
|
|
86
139
|
const videoUrl = videoData.url;
|
|
87
|
-
const
|
|
88
|
-
? `\nFile: ${videoData.file_name} (${(videoData.file_size / 1024 / 1024).toFixed(2)} MB)`
|
|
89
|
-
: "";
|
|
90
|
-
const seedInfo = seed !== undefined ? `\nSeed: ${seed}` : "";
|
|
91
|
-
const requestIdInfo = result.requestId ? `\nRequest ID: ${result.requestId}` : "";
|
|
140
|
+
const fileName = videoData.file_name || "infinitalk.mp4";
|
|
92
141
|
|
|
93
|
-
return
|
|
94
|
-
|
|
142
|
+
return JSON.stringify({
|
|
143
|
+
videos: [
|
|
144
|
+
{
|
|
145
|
+
url: videoUrl,
|
|
146
|
+
filename: fileName,
|
|
147
|
+
mimeType: "video/mp4",
|
|
148
|
+
filesize: videoData.file_size,
|
|
149
|
+
},
|
|
150
|
+
],
|
|
151
|
+
message: "Infinitalk video generated successfully",
|
|
152
|
+
seed: seed,
|
|
153
|
+
requestId: result.requestId,
|
|
154
|
+
});
|
|
95
155
|
},
|
|
96
156
|
};
|
package/src/storage/payload.ts
DELETED
|
@@ -1,46 +0,0 @@
|
|
|
1
|
-
import type { StorageProvider } from "./interface";
|
|
2
|
-
|
|
3
|
-
export class PayloadStorageProvider implements StorageProvider {
|
|
4
|
-
private apiUrl: string;
|
|
5
|
-
private apiKey: string;
|
|
6
|
-
private collection: string;
|
|
7
|
-
|
|
8
|
-
constructor(apiUrl: string, apiKey: string, collection: string = "media") {
|
|
9
|
-
this.apiUrl = apiUrl;
|
|
10
|
-
this.apiKey = apiKey;
|
|
11
|
-
this.collection = collection;
|
|
12
|
-
}
|
|
13
|
-
|
|
14
|
-
async init(): Promise<void> {
|
|
15
|
-
console.log("Initializing Payload Storage Provider...");
|
|
16
|
-
// TODO: Verify connection to Payload CMS
|
|
17
|
-
}
|
|
18
|
-
|
|
19
|
-
async readFile(filePath: string): Promise<Buffer> {
|
|
20
|
-
// TODO: Implement fetching file from Payload CMS
|
|
21
|
-
// 1. Search for file by filename or ID
|
|
22
|
-
// 2. Download the file buffer
|
|
23
|
-
console.log(`[Payload] Reading file: ${filePath}`);
|
|
24
|
-
throw new Error("PayloadStorageProvider.readFile not implemented yet.");
|
|
25
|
-
}
|
|
26
|
-
|
|
27
|
-
async writeFile(filePath: string, data: Buffer | string): Promise<string> {
|
|
28
|
-
// TODO: Implement uploading file to Payload CMS
|
|
29
|
-
// 1. Create FormData
|
|
30
|
-
// 2. POST to /api/{collection}
|
|
31
|
-
console.log(`[Payload] Writing file: ${filePath}`);
|
|
32
|
-
throw new Error("PayloadStorageProvider.writeFile not implemented yet.");
|
|
33
|
-
}
|
|
34
|
-
|
|
35
|
-
async exists(filePath: string): Promise<boolean> {
|
|
36
|
-
// TODO: Check if file exists in Payload
|
|
37
|
-
console.log(`[Payload] Checking existence: ${filePath}`);
|
|
38
|
-
return false;
|
|
39
|
-
}
|
|
40
|
-
|
|
41
|
-
async getPublicUrl(filePath: string): Promise<string> {
|
|
42
|
-
// TODO: Return the public URL of the file in Payload
|
|
43
|
-
console.log(`[Payload] Getting public URL: ${filePath}`);
|
|
44
|
-
return `${this.apiUrl}/${this.collection}/${filePath}`;
|
|
45
|
-
}
|
|
46
|
-
}
|