@mixio-pro/kalaasetu-mcp 1.0.7 → 1.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mixio-pro/kalaasetu-mcp",
3
- "version": "1.0.7",
3
+ "version": "1.0.9",
4
4
  "description": "A powerful Model Context Protocol server providing AI tools for content generation and analysis",
5
5
  "type": "module",
6
6
  "module": "src/index.ts",
@@ -10,6 +10,7 @@ import * as os from "os";
10
10
  import * as wav from "wav";
11
11
  import { PassThrough } from "stream";
12
12
  import { getStorage } from "../storage";
13
+ import { generateTimestampedFilename } from "../utils/filename";
13
14
 
14
15
  const ai = new GoogleGenAI({
15
16
  apiKey: process.env.GEMINI_API_KEY || "",
@@ -194,30 +195,44 @@ export const geminiTextToImage = {
194
195
  },
195
196
  });
196
197
 
197
- let result = "";
198
+ const images = [];
199
+ let textResponse = "";
200
+
198
201
  if (response.candidates && response.candidates[0]?.content?.parts) {
199
202
  for (const part of response.candidates[0].content.parts) {
200
203
  if (part.text) {
201
- result += part.text;
204
+ textResponse += part.text;
202
205
  } else if (part.inlineData?.data) {
203
206
  const imageData = part.inlineData.data;
204
207
  if (args.output_path) {
205
208
  const storage = getStorage();
209
+ const timestampedPath = generateTimestampedFilename(
210
+ args.output_path
211
+ );
206
212
  const url = await storage.writeFile(
207
- args.output_path,
213
+ timestampedPath,
208
214
  Buffer.from(imageData, "base64")
209
215
  );
210
- result += `\nImage saved to: ${url}`;
211
- } else {
212
- result += `\nGenerated image (base64): ${imageData.substring(
213
- 0,
214
- 100
215
- )}...`;
216
+ images.push({
217
+ url,
218
+ filename: timestampedPath,
219
+ mimeType: "image/png",
220
+ });
216
221
  }
217
222
  }
218
223
  }
219
224
  }
220
- return result || "Image generation completed but no response received";
225
+
226
+ if (images.length > 0) {
227
+ return JSON.stringify({
228
+ images,
229
+ message: textResponse || "Image generated successfully",
230
+ });
231
+ }
232
+
233
+ return (
234
+ textResponse || "Image generation completed but no response received"
235
+ );
221
236
  } catch (error: any) {
222
237
  throw new Error(`Image generation failed: ${error.message}`);
223
238
  }
@@ -261,30 +276,42 @@ export const geminiEditImage = {
261
276
  contents: contents,
262
277
  });
263
278
 
264
- let result = "";
279
+ const images = [];
280
+ let textResponse = "";
281
+
265
282
  if (response.candidates && response.candidates[0]?.content?.parts) {
266
283
  for (const part of response.candidates[0].content.parts) {
267
284
  if (part.text) {
268
- result += part.text;
285
+ textResponse += part.text;
269
286
  } else if (part.inlineData?.data) {
270
287
  const imageData = part.inlineData.data;
271
288
  if (args.output_path) {
272
289
  const storage = getStorage();
290
+ const timestampedPath = generateTimestampedFilename(
291
+ args.output_path
292
+ );
273
293
  const url = await storage.writeFile(
274
- args.output_path,
294
+ timestampedPath,
275
295
  Buffer.from(imageData, "base64")
276
296
  );
277
- result += `\nEdited image saved to: ${url}`;
278
- } else {
279
- result += `\nEdited image (base64): ${imageData.substring(
280
- 0,
281
- 100
282
- )}...`;
297
+ images.push({
298
+ url,
299
+ filename: timestampedPath,
300
+ mimeType: "image/png",
301
+ });
283
302
  }
284
303
  }
285
304
  }
286
305
  }
287
- return result || "Image editing completed but no response received";
306
+
307
+ if (images.length > 0) {
308
+ return JSON.stringify({
309
+ images,
310
+ message: textResponse || "Image edited successfully",
311
+ });
312
+ }
313
+
314
+ return textResponse || "Image editing completed but no response received";
288
315
  } catch (error: any) {
289
316
  throw new Error(`Image editing failed: ${error.message}`);
290
317
  }
@@ -405,12 +432,20 @@ export const geminiSingleSpeakerTts = {
405
432
  const audioBuffer = Buffer.from(data, "base64");
406
433
 
407
434
  // Generate output filename if not provided
408
- const outputPath = args.output_path || `voice_output_${Date.now()}.wav`;
435
+ const outputPath = args.output_path || "voice_output.wav";
436
+ const timestampedPath = generateTimestampedFilename(outputPath);
409
437
 
410
438
  const storage = getStorage();
411
- const url = await storage.writeFile(outputPath, audioBuffer);
439
+ const url = await storage.writeFile(timestampedPath, audioBuffer);
412
440
 
413
- return `Audio generated successfully: ${url}`;
441
+ return JSON.stringify({
442
+ audio: {
443
+ url,
444
+ filename: outputPath,
445
+ mimeType: "audio/wav",
446
+ },
447
+ message: "Audio generated successfully",
448
+ });
414
449
  } catch (error: any) {
415
450
  throw new Error(`Voice generation failed: ${error.message}`);
416
451
  }
@@ -5,10 +5,12 @@ import { callFalModel } from "../utils/fal.utils";
5
5
  * Calculate number of frames based on audio duration at 25 FPS
6
6
  * Adds 1 second buffer to ensure complete audio coverage
7
7
  */
8
- function calculateFramesFromAudioDuration(audioDurationSeconds: number): number {
8
+ function calculateFramesFromAudioDuration(
9
+ audioDurationSeconds: number
10
+ ): number {
9
11
  const totalDuration = audioDurationSeconds + 1; // Add 1 second buffer
10
12
  const frames = Math.round(totalDuration * 25); // 25 FPS
11
-
13
+
12
14
  // Clamp to valid range (129-401 frames)
13
15
  return Math.max(129, Math.min(401, frames));
14
16
  }
@@ -18,17 +20,52 @@ function calculateFramesFromAudioDuration(audioDurationSeconds: number): number
18
20
  */
19
21
  export const hunyuanAvatar = {
20
22
  name: "hunyuan_avatar",
21
- description: "Generate high-fidelity audio-driven human animation videos using FAL AI Hunyuan Avatar. Creates realistic talking avatar animations from an image and audio file.",
23
+ description:
24
+ "Generate high-fidelity audio-driven human animation videos using FAL AI Hunyuan Avatar. Creates realistic talking avatar animations from an image and audio file.",
22
25
  parameters: z.object({
23
- image_url: z.string().describe("Public URL of the reference image for the avatar."),
24
- audio_url: z.string().describe("Public URL of the audio file to drive the animation."),
25
- audio_duration_seconds: z.number().optional().describe("Duration of the audio in seconds. If provided, will automatically calculate optimal frames (audio duration + 1 second buffer at 25 FPS)."),
26
- text: z.string().optional().describe("Text prompt describing the scene. Default: 'A cat is singing.'"),
27
- num_frames: z.number().optional().describe("Number of video frames to generate at 25 FPS. Range: 129 to 401. If not provided and audio_duration_seconds is given, will be calculated automatically. Default: 129"),
28
- num_inference_steps: z.number().optional().describe("Number of inference steps for sampling. Higher values give better quality but take longer. Range: 30 to 50. Default: 30"),
29
- turbo_mode: z.boolean().optional().describe("If true, the video will be generated faster with no noticeable degradation in visual quality. Default: true"),
26
+ image_url: z
27
+ .string()
28
+ .describe("Public URL of the reference image for the avatar."),
29
+ audio_url: z
30
+ .string()
31
+ .describe("Public URL of the audio file to drive the animation."),
32
+ audio_duration_seconds: z
33
+ .number()
34
+ .optional()
35
+ .describe(
36
+ "Duration of the audio in seconds. If provided, will automatically calculate optimal frames (audio duration + 1 second buffer at 25 FPS)."
37
+ ),
38
+ text: z
39
+ .string()
40
+ .optional()
41
+ .describe(
42
+ "Text prompt describing the scene. Default: 'A cat is singing.'"
43
+ ),
44
+ num_frames: z
45
+ .number()
46
+ .optional()
47
+ .describe(
48
+ "Number of video frames to generate at 25 FPS. Range: 129 to 401. If not provided and audio_duration_seconds is given, will be calculated automatically. Default: 129"
49
+ ),
50
+ num_inference_steps: z
51
+ .number()
52
+ .optional()
53
+ .describe(
54
+ "Number of inference steps for sampling. Higher values give better quality but take longer. Range: 30 to 50. Default: 30"
55
+ ),
56
+ turbo_mode: z
57
+ .boolean()
58
+ .optional()
59
+ .describe(
60
+ "If true, the video will be generated faster with no noticeable degradation in visual quality. Default: true"
61
+ ),
30
62
  seed: z.number().optional().describe("Random seed for generation."),
31
- fal_key: z.string().optional().describe("FAL API key. If not provided, will use FAL_KEY environment variable."),
63
+ fal_key: z
64
+ .string()
65
+ .optional()
66
+ .describe(
67
+ "FAL API key. If not provided, will use FAL_KEY environment variable."
68
+ ),
32
69
  }),
33
70
  execute: async (args: {
34
71
  image_url: string;
@@ -43,17 +80,28 @@ export const hunyuanAvatar = {
43
80
  }) => {
44
81
  // Calculate frames from audio duration if provided and num_frames not specified
45
82
  let calculatedFrames = args.num_frames;
46
- if (args.audio_duration_seconds !== undefined && args.num_frames === undefined) {
47
- calculatedFrames = calculateFramesFromAudioDuration(args.audio_duration_seconds);
83
+ if (
84
+ args.audio_duration_seconds !== undefined &&
85
+ args.num_frames === undefined
86
+ ) {
87
+ calculatedFrames = calculateFramesFromAudioDuration(
88
+ args.audio_duration_seconds
89
+ );
48
90
  }
49
91
 
50
92
  // Validate num_frames range if provided
51
- if (calculatedFrames !== undefined && (calculatedFrames < 129 || calculatedFrames > 401)) {
93
+ if (
94
+ calculatedFrames !== undefined &&
95
+ (calculatedFrames < 129 || calculatedFrames > 401)
96
+ ) {
52
97
  throw new Error("num_frames must be between 129 and 401");
53
98
  }
54
99
 
55
100
  // Validate num_inference_steps range if provided
56
- if (args.num_inference_steps !== undefined && (args.num_inference_steps < 30 || args.num_inference_steps > 50)) {
101
+ if (
102
+ args.num_inference_steps !== undefined &&
103
+ (args.num_inference_steps < 30 || args.num_inference_steps > 50)
104
+ ) {
57
105
  throw new Error("num_inference_steps must be between 30 and 50");
58
106
  }
59
107
 
@@ -80,23 +128,33 @@ export const hunyuanAvatar = {
80
128
  input.seed = args.seed;
81
129
  }
82
130
 
83
- const result = await callFalModel("fal-ai/hunyuan-avatar", input, { falKey: args.fal_key });
131
+ const result = await callFalModel("fal-ai/hunyuan-avatar", input, {
132
+ falKey: args.fal_key,
133
+ });
84
134
 
85
135
  // Extract video data from the response
86
136
  const videoData = result.data?.video;
87
137
 
88
138
  if (!videoData || !videoData.url) {
89
- throw new Error(`No video data in completed response: ${JSON.stringify(result.data)}`);
139
+ throw new Error(
140
+ `No video data in completed response: ${JSON.stringify(result.data)}`
141
+ );
90
142
  }
91
143
 
92
144
  const videoUrl = videoData.url;
93
- const fileDetails = videoData.file_name && videoData.file_size !== undefined
94
- ? `\nFile: ${videoData.file_name} (${(videoData.file_size / 1024 / 1024).toFixed(2)} MB)`
95
- : "";
96
- const requestIdInfo = result.requestId ? `\nRequest ID: ${result.requestId}` : "";
97
-
145
+ const fileName = videoData.file_name || "hunyuan_avatar.mp4";
98
146
 
99
- return videoUrl
100
- // return `✅ Hunyuan Avatar video generated successfully!\n\nVideo URL: ${videoUrl}${fileDetails}${requestIdInfo}`;
147
+ return JSON.stringify({
148
+ videos: [
149
+ {
150
+ url: videoUrl,
151
+ filename: fileName,
152
+ mimeType: "video/mp4",
153
+ filesize: videoData.file_size,
154
+ },
155
+ ],
156
+ message: "Hunyuan Avatar video generated successfully",
157
+ requestId: result.requestId,
158
+ });
101
159
  },
102
- };
160
+ };
@@ -4,6 +4,7 @@ import { exec } from "child_process";
4
4
  import * as path from "path";
5
5
  import { z } from "zod";
6
6
  import { getStorage } from "../storage";
7
+ import { generateTimestampedFilename } from "../utils/filename";
7
8
 
8
9
  async function wait(ms: number): Promise<void> {
9
10
  return new Promise((resolve) => setTimeout(resolve, ms));
@@ -283,19 +284,26 @@ export const imageToVideo = {
283
284
 
284
285
  const resp = current.response || current;
285
286
  // Decode from response.videos[].bytesBase64Encoded only
286
- const outputs: string[] = [];
287
+ const videos: Array<{ url: string; filename: string; mimeType: string }> =
288
+ [];
287
289
  const saveVideo = async (base64: string, index: number) => {
288
290
  if (!base64) return;
289
- const filePath = args.output_path
291
+ const baseFilename = args.output_path
290
292
  ? index === 0
291
293
  ? args.output_path
292
294
  : args.output_path.replace(/\.mp4$/i, `_${index}.mp4`)
293
- : `video_output_${Date.now()}${index === 0 ? "" : "_" + index}.mp4`;
295
+ : `video_output${index > 0 ? `_${index}` : ""}.mp4`;
296
+
297
+ const filePath = generateTimestampedFilename(baseFilename);
294
298
 
295
299
  const buf = Buffer.from(base64, "base64");
296
300
  const storage = getStorage();
297
301
  const url = await storage.writeFile(filePath, buf);
298
- outputs.push(url);
302
+ videos.push({
303
+ url,
304
+ filename: filePath,
305
+ mimeType: "video/mp4",
306
+ });
299
307
  };
300
308
 
301
309
  if (Array.isArray(resp?.videos) && resp.videos.length > 0) {
@@ -306,8 +314,11 @@ export const imageToVideo = {
306
314
  }
307
315
  }
308
316
  }
309
- if (outputs.length > 0) {
310
- return `Video(s) saved to: ${outputs.join(", ")}`;
317
+ if (videos.length > 0) {
318
+ return JSON.stringify({
319
+ videos,
320
+ message: "Video(s) generated successfully",
321
+ });
311
322
  }
312
323
 
313
324
  // If nothing saved, return a concise summary plus head/tail snippets of JSON
@@ -5,10 +5,12 @@ import { callFalModel } from "../utils/fal.utils";
5
5
  * Calculate number of frames based on audio duration at 25 FPS
6
6
  * Adds 1 second buffer to ensure complete audio coverage
7
7
  */
8
- function calculateFramesFromAudioDuration(audioDurationSeconds: number): number {
8
+ function calculateFramesFromAudioDuration(
9
+ audioDurationSeconds: number
10
+ ): number {
9
11
  const totalDuration = audioDurationSeconds + 1; // Add 1 second buffer
10
12
  const frames = Math.round(totalDuration * 25); // 25 FPS
11
-
13
+
12
14
  // Clamp to valid range (41-721 frames)
13
15
  return Math.max(41, Math.min(721, frames));
14
16
  }
@@ -18,17 +20,56 @@ function calculateFramesFromAudioDuration(audioDurationSeconds: number): number
18
20
  */
19
21
  export const infinitalk = {
20
22
  name: "infinitalk",
21
- description: "Generate a talking avatar video from an image and audio file using FAL AI Infinitalk. The avatar lip-syncs to the provided audio with natural facial expressions.",
23
+ description:
24
+ "Generate a talking avatar video from an image and audio file using FAL AI Infinitalk. The avatar lip-syncs to the provided audio with natural facial expressions.",
22
25
  parameters: z.object({
23
- image_url: z.string().describe("Public URL of the input image. If the input image does not match the chosen aspect ratio, it is resized and center cropped."),
24
- audio_url: z.string().describe("The Public URL of the audio file for lip-sync generation."),
25
- audio_duration_seconds: z.number().optional().describe("Duration of the audio in seconds. If provided, will automatically calculate optimal frames (audio duration + 1 second buffer at 25 FPS)."),
26
- prompt: z.string().describe("The text prompt to guide video generation (e.g., 'A woman with colorful hair talking on a podcast')"),
27
- num_frames: z.number().optional().describe("Number of frames to generate. Must be between 41 to 721. If not provided and audio_duration_seconds is given, will be calculated automatically. Default: 145"),
28
- resolution: z.enum(["480p", "720p"]).optional().describe("Resolution of the video to generate. Default: '480p'"),
29
- seed: z.number().optional().describe("Random seed for reproducibility. If not provided, a random seed is chosen. Default: 42"),
30
- acceleration: z.enum(["none", "regular", "high"]).optional().describe("The acceleration level to use for generation. Default: 'regular'"),
31
- fal_key: z.string().optional().describe("FAL API key. If not provided, will use FAL_KEY environment variable."),
26
+ image_url: z
27
+ .string()
28
+ .describe(
29
+ "Public URL of the input image. If the input image does not match the chosen aspect ratio, it is resized and center cropped."
30
+ ),
31
+ audio_url: z
32
+ .string()
33
+ .describe("The Public URL of the audio file for lip-sync generation."),
34
+ audio_duration_seconds: z
35
+ .number()
36
+ .optional()
37
+ .describe(
38
+ "Duration of the audio in seconds. If provided, will automatically calculate optimal frames (audio duration + 1 second buffer at 25 FPS)."
39
+ ),
40
+ prompt: z
41
+ .string()
42
+ .describe(
43
+ "The text prompt to guide video generation (e.g., 'A woman with colorful hair talking on a podcast')"
44
+ ),
45
+ num_frames: z
46
+ .number()
47
+ .optional()
48
+ .describe(
49
+ "Number of frames to generate. Must be between 41 to 721. If not provided and audio_duration_seconds is given, will be calculated automatically. Default: 145"
50
+ ),
51
+ resolution: z
52
+ .enum(["480p", "720p"])
53
+ .optional()
54
+ .describe("Resolution of the video to generate. Default: '480p'"),
55
+ seed: z
56
+ .number()
57
+ .optional()
58
+ .describe(
59
+ "Random seed for reproducibility. If not provided, a random seed is chosen. Default: 42"
60
+ ),
61
+ acceleration: z
62
+ .enum(["none", "regular", "high"])
63
+ .optional()
64
+ .describe(
65
+ "The acceleration level to use for generation. Default: 'regular'"
66
+ ),
67
+ fal_key: z
68
+ .string()
69
+ .optional()
70
+ .describe(
71
+ "FAL API key. If not provided, will use FAL_KEY environment variable."
72
+ ),
32
73
  }),
33
74
  execute: async (args: {
34
75
  image_url: string;
@@ -43,12 +84,20 @@ export const infinitalk = {
43
84
  }) => {
44
85
  // Calculate frames from audio duration if provided and num_frames not specified
45
86
  let calculatedFrames = args.num_frames;
46
- if (args.audio_duration_seconds !== undefined && args.num_frames === undefined) {
47
- calculatedFrames = calculateFramesFromAudioDuration(args.audio_duration_seconds);
87
+ if (
88
+ args.audio_duration_seconds !== undefined &&
89
+ args.num_frames === undefined
90
+ ) {
91
+ calculatedFrames = calculateFramesFromAudioDuration(
92
+ args.audio_duration_seconds
93
+ );
48
94
  }
49
95
 
50
96
  // Validate num_frames range if provided
51
- if (calculatedFrames !== undefined && (calculatedFrames < 41 || calculatedFrames > 721)) {
97
+ if (
98
+ calculatedFrames !== undefined &&
99
+ (calculatedFrames < 41 || calculatedFrames > 721)
100
+ ) {
52
101
  throw new Error("num_frames must be between 41 and 721");
53
102
  }
54
103
 
@@ -63,9 +112,9 @@ export const infinitalk = {
63
112
  if (calculatedFrames !== undefined) {
64
113
  input.num_frames = calculatedFrames;
65
114
  }
66
-
67
- input.resolution = args.resolution || '480p';
68
-
115
+
116
+ input.resolution = args.resolution || "480p";
117
+
69
118
  if (args.seed !== undefined) {
70
119
  input.seed = args.seed;
71
120
  }
@@ -73,24 +122,35 @@ export const infinitalk = {
73
122
  input.acceleration = args.acceleration;
74
123
  }
75
124
 
76
- const result = await callFalModel("fal-ai/infinitalk", input, { falKey: args.fal_key });
125
+ const result = await callFalModel("fal-ai/infinitalk", input, {
126
+ falKey: args.fal_key,
127
+ });
77
128
 
78
129
  // Extract video data from the response
79
130
  const videoData = result.data?.video;
80
131
  const seed = result.data?.seed;
81
132
 
82
133
  if (!videoData || !videoData.url) {
83
- throw new Error(`No video data in completed response: ${JSON.stringify(result.data)}`);
134
+ throw new Error(
135
+ `No video data in completed response: ${JSON.stringify(result.data)}`
136
+ );
84
137
  }
85
138
 
86
139
  const videoUrl = videoData.url;
87
- const fileDetails = videoData.file_name && videoData.file_size !== undefined
88
- ? `\nFile: ${videoData.file_name} (${(videoData.file_size / 1024 / 1024).toFixed(2)} MB)`
89
- : "";
90
- const seedInfo = seed !== undefined ? `\nSeed: ${seed}` : "";
91
- const requestIdInfo = result.requestId ? `\nRequest ID: ${result.requestId}` : "";
140
+ const fileName = videoData.file_name || "infinitalk.mp4";
92
141
 
93
- return videoUrl
94
- // return `✅ Infinitalk video generated successfully!\n\nVideo URL: ${videoUrl}${fileDetails}${seedInfo}${requestIdInfo}`;
142
+ return JSON.stringify({
143
+ videos: [
144
+ {
145
+ url: videoUrl,
146
+ filename: fileName,
147
+ mimeType: "video/mp4",
148
+ filesize: videoData.file_size,
149
+ },
150
+ ],
151
+ message: "Infinitalk video generated successfully",
152
+ seed: seed,
153
+ requestId: result.requestId,
154
+ });
95
155
  },
96
156
  };
@@ -0,0 +1,22 @@
1
+ /**
2
+ * Generate a timestamped filename to avoid conflicts
3
+ * Format: YYYYMMDD_HHmmss_filename.ext
4
+ */
5
+ export function generateTimestampedFilename(basename: string): string {
6
+ const now = new Date();
7
+ const timestamp = now
8
+ .toISOString()
9
+ .replace(/[-:]/g, "")
10
+ .replace(/\.\d{3}Z$/, "")
11
+ .replace("T", "_");
12
+
13
+ // Extract extension if present
14
+ const lastDot = basename.lastIndexOf(".");
15
+ if (lastDot > 0) {
16
+ const name = basename.substring(0, lastDot);
17
+ const ext = basename.substring(lastDot);
18
+ return `${timestamp}_${name}${ext}`;
19
+ }
20
+
21
+ return `${timestamp}_${basename}`;
22
+ }