npm - @mixio-pro/kalaasetu-mcp - Versions diffs - 1.0.7 → 1.0.9 - Mend

@mixio-pro/kalaasetu-mcp 1.0.7 → 1.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/package.json +1 -1
package/src/tools/gemini.ts +58 -23
package/src/tools/hunyuan-avatar.ts +83 -25
package/src/tools/image-to-video.ts +17 -6
package/src/tools/infinitalk.ts +87 -27
package/src/utils/filename.ts +22 -0

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@mixio-pro/kalaasetu-mcp",
-  "version": "1.0.7",
+  "version": "1.0.9",
   "description": "A powerful Model Context Protocol server providing AI tools for content generation and analysis",
   "type": "module",
   "module": "src/index.ts",

package/src/tools/gemini.ts CHANGED Viewed

@@ -10,6 +10,7 @@ import * as os from "os";
 import * as wav from "wav";
 import { PassThrough } from "stream";
 import { getStorage } from "../storage";
+import { generateTimestampedFilename } from "../utils/filename";
 const ai = new GoogleGenAI({
   apiKey: process.env.GEMINI_API_KEY || "",
@@ -194,30 +195,44 @@ export const geminiTextToImage = {
         },
       });
-      let result = "";
+      const images = [];
+      let textResponse = "";
       if (response.candidates && response.candidates[0]?.content?.parts) {
         for (const part of response.candidates[0].content.parts) {
           if (part.text) {
-            result += part.text;
+            textResponse += part.text;
           } else if (part.inlineData?.data) {
             const imageData = part.inlineData.data;
             if (args.output_path) {
               const storage = getStorage();
+              const timestampedPath = generateTimestampedFilename(
+                args.output_path
+              );
               const url = await storage.writeFile(
-                args.output_path,
+                timestampedPath,
                 Buffer.from(imageData, "base64")
               );
-              result += `\nImage saved to: ${url}`;
-            } else {
-              result += `\nGenerated image (base64): ${imageData.substring(
-                0,
-                100
-              )}...`;
+              images.push({
+                url,
+                filename: timestampedPath,
+                mimeType: "image/png",
+              });
             }
           }
         }
       }
-      return result || "Image generation completed but no response received";
+      if (images.length > 0) {
+        return JSON.stringify({
+          images,
+          message: textResponse || "Image generated successfully",
+        });
+      }
+      return (
+        textResponse || "Image generation completed but no response received"
+      );
     } catch (error: any) {
       throw new Error(`Image generation failed: ${error.message}`);
     }
@@ -261,30 +276,42 @@ export const geminiEditImage = {
         contents: contents,
       });
-      let result = "";
+      const images = [];
+      let textResponse = "";
       if (response.candidates && response.candidates[0]?.content?.parts) {
         for (const part of response.candidates[0].content.parts) {
           if (part.text) {
-            result += part.text;
+            textResponse += part.text;
           } else if (part.inlineData?.data) {
             const imageData = part.inlineData.data;
             if (args.output_path) {
               const storage = getStorage();
+              const timestampedPath = generateTimestampedFilename(
+                args.output_path
+              );
               const url = await storage.writeFile(
-                args.output_path,
+                timestampedPath,
                 Buffer.from(imageData, "base64")
               );
-              result += `\nEdited image saved to: ${url}`;
-            } else {
-              result += `\nEdited image (base64): ${imageData.substring(
-                0,
-                100
-              )}...`;
+              images.push({
+                url,
+                filename: timestampedPath,
+                mimeType: "image/png",
+              });
             }
           }
         }
       }
-      return result || "Image editing completed but no response received";
+      if (images.length > 0) {
+        return JSON.stringify({
+          images,
+          message: textResponse || "Image edited successfully",
+        });
+      }
+      return textResponse || "Image editing completed but no response received";
     } catch (error: any) {
       throw new Error(`Image editing failed: ${error.message}`);
     }
@@ -405,12 +432,20 @@ export const geminiSingleSpeakerTts = {
       const audioBuffer = Buffer.from(data, "base64");
       // Generate output filename if not provided
-      const outputPath = args.output_path || `voice_output_${Date.now()}.wav`;
+      const outputPath = args.output_path || "voice_output.wav";
+      const timestampedPath = generateTimestampedFilename(outputPath);
       const storage = getStorage();
-      const url = await storage.writeFile(outputPath, audioBuffer);
+      const url = await storage.writeFile(timestampedPath, audioBuffer);
-      return `Audio generated successfully: ${url}`;
+      return JSON.stringify({
+        audio: {
+          url,
+          filename: outputPath,
+          mimeType: "audio/wav",
+        },
+        message: "Audio generated successfully",
+      });
     } catch (error: any) {
       throw new Error(`Voice generation failed: ${error.message}`);
     }

package/src/tools/hunyuan-avatar.ts CHANGED Viewed

@@ -5,10 +5,12 @@ import { callFalModel } from "../utils/fal.utils";
  * Calculate number of frames based on audio duration at 25 FPS
  * Adds 1 second buffer to ensure complete audio coverage
  */
-function calculateFramesFromAudioDuration(audioDurationSeconds: number): number {
+function calculateFramesFromAudioDuration(
+  audioDurationSeconds: number
+): number {
   const totalDuration = audioDurationSeconds + 1; // Add 1 second buffer
   const frames = Math.round(totalDuration * 25); // 25 FPS
   // Clamp to valid range (129-401 frames)
   return Math.max(129, Math.min(401, frames));
 }
@@ -18,17 +20,52 @@ function calculateFramesFromAudioDuration(audioDurationSeconds: number): number
  */
 export const hunyuanAvatar = {
   name: "hunyuan_avatar",
-  description: "Generate high-fidelity audio-driven human animation videos using FAL AI Hunyuan Avatar. Creates realistic talking avatar animations from an image and audio file.",
+  description:
+    "Generate high-fidelity audio-driven human animation videos using FAL AI Hunyuan Avatar. Creates realistic talking avatar animations from an image and audio file.",
   parameters: z.object({
-    image_url: z.string().describe("Public URL of the reference image for the avatar."),
-    audio_url: z.string().describe("Public URL of the audio file to drive the animation."),
-    audio_duration_seconds: z.number().optional().describe("Duration of the audio in seconds. If provided, will automatically calculate optimal frames (audio duration + 1 second buffer at 25 FPS)."),
-    text: z.string().optional().describe("Text prompt describing the scene. Default: 'A cat is singing.'"),
-    num_frames: z.number().optional().describe("Number of video frames to generate at 25 FPS. Range: 129 to 401. If not provided and audio_duration_seconds is given, will be calculated automatically. Default: 129"),
-    num_inference_steps: z.number().optional().describe("Number of inference steps for sampling. Higher values give better quality but take longer. Range: 30 to 50. Default: 30"),
-    turbo_mode: z.boolean().optional().describe("If true, the video will be generated faster with no noticeable degradation in visual quality. Default: true"),
+    image_url: z
+      .string()
+      .describe("Public URL of the reference image for the avatar."),
+    audio_url: z
+      .string()
+      .describe("Public URL of the audio file to drive the animation."),
+    audio_duration_seconds: z
+      .number()
+      .optional()
+      .describe(
+        "Duration of the audio in seconds. If provided, will automatically calculate optimal frames (audio duration + 1 second buffer at 25 FPS)."
+      ),
+    text: z
+      .string()
+      .optional()
+      .describe(
+        "Text prompt describing the scene. Default: 'A cat is singing.'"
+      ),
+    num_frames: z
+      .number()
+      .optional()
+      .describe(
+        "Number of video frames to generate at 25 FPS. Range: 129 to 401. If not provided and audio_duration_seconds is given, will be calculated automatically. Default: 129"
+      ),
+    num_inference_steps: z
+      .number()
+      .optional()
+      .describe(
+        "Number of inference steps for sampling. Higher values give better quality but take longer. Range: 30 to 50. Default: 30"
+      ),
+    turbo_mode: z
+      .boolean()
+      .optional()
+      .describe(
+        "If true, the video will be generated faster with no noticeable degradation in visual quality. Default: true"
+      ),
     seed: z.number().optional().describe("Random seed for generation."),
-    fal_key: z.string().optional().describe("FAL API key. If not provided, will use FAL_KEY environment variable."),
+    fal_key: z
+      .string()
+      .optional()
+      .describe(
+        "FAL API key. If not provided, will use FAL_KEY environment variable."
+      ),
   }),
   execute: async (args: {
     image_url: string;
@@ -43,17 +80,28 @@ export const hunyuanAvatar = {
   }) => {
     // Calculate frames from audio duration if provided and num_frames not specified
     let calculatedFrames = args.num_frames;
-    if (args.audio_duration_seconds !== undefined && args.num_frames === undefined) {
-      calculatedFrames = calculateFramesFromAudioDuration(args.audio_duration_seconds);
+    if (
+      args.audio_duration_seconds !== undefined &&
+      args.num_frames === undefined
+    ) {
+      calculatedFrames = calculateFramesFromAudioDuration(
+        args.audio_duration_seconds
+      );
     }
     // Validate num_frames range if provided
-    if (calculatedFrames !== undefined && (calculatedFrames < 129 || calculatedFrames > 401)) {
+    if (
+      calculatedFrames !== undefined &&
+      (calculatedFrames < 129 || calculatedFrames > 401)
+    ) {
       throw new Error("num_frames must be between 129 and 401");
     }
     // Validate num_inference_steps range if provided
-    if (args.num_inference_steps !== undefined && (args.num_inference_steps < 30 || args.num_inference_steps > 50)) {
+    if (
+      args.num_inference_steps !== undefined &&
+      (args.num_inference_steps < 30 || args.num_inference_steps > 50)
+    ) {
       throw new Error("num_inference_steps must be between 30 and 50");
     }
@@ -80,23 +128,33 @@ export const hunyuanAvatar = {
       input.seed = args.seed;
     }
-    const result = await callFalModel("fal-ai/hunyuan-avatar", input, { falKey: args.fal_key });
+    const result = await callFalModel("fal-ai/hunyuan-avatar", input, {
+      falKey: args.fal_key,
+    });
     // Extract video data from the response
     const videoData = result.data?.video;
     if (!videoData || !videoData.url) {
-      throw new Error(`No video data in completed response: ${JSON.stringify(result.data)}`);
+      throw new Error(
+        `No video data in completed response: ${JSON.stringify(result.data)}`
+      );
     }
     const videoUrl = videoData.url;
-    const fileDetails = videoData.file_name && videoData.file_size !== undefined
-      ? `\nFile: ${videoData.file_name} (${(videoData.file_size / 1024 / 1024).toFixed(2)} MB)`
-      : "";
-    const requestIdInfo = result.requestId ? `\nRequest ID: ${result.requestId}` : "";
+    const fileName = videoData.file_name || "hunyuan_avatar.mp4";
-    return videoUrl
-    // return `✅ Hunyuan Avatar video generated successfully!\n\nVideo URL: ${videoUrl}${fileDetails}${requestIdInfo}`;
+    return JSON.stringify({
+      videos: [
+        {
+          url: videoUrl,
+          filename: fileName,
+          mimeType: "video/mp4",
+          filesize: videoData.file_size,
+        },
+      ],
+      message: "Hunyuan Avatar video generated successfully",
+      requestId: result.requestId,
+    });
   },
-};
+};

package/src/tools/image-to-video.ts CHANGED Viewed

@@ -4,6 +4,7 @@ import { exec } from "child_process";
 import * as path from "path";
 import { z } from "zod";
 import { getStorage } from "../storage";
+import { generateTimestampedFilename } from "../utils/filename";
 async function wait(ms: number): Promise<void> {
   return new Promise((resolve) => setTimeout(resolve, ms));
@@ -283,19 +284,26 @@ export const imageToVideo = {
     const resp = current.response || current;
     // Decode from response.videos[].bytesBase64Encoded only
-    const outputs: string[] = [];
+    const videos: Array<{ url: string; filename: string; mimeType: string }> =
+      [];
     const saveVideo = async (base64: string, index: number) => {
       if (!base64) return;
-      const filePath = args.output_path
+      const baseFilename = args.output_path
         ? index === 0
           ? args.output_path
           : args.output_path.replace(/\.mp4$/i, `_${index}.mp4`)
-        : `video_output_${Date.now()}${index === 0 ? "" : "_" + index}.mp4`;
+        : `video_output${index > 0 ? `_${index}` : ""}.mp4`;
+      const filePath = generateTimestampedFilename(baseFilename);
       const buf = Buffer.from(base64, "base64");
       const storage = getStorage();
       const url = await storage.writeFile(filePath, buf);
-      outputs.push(url);
+      videos.push({
+        url,
+        filename: filePath,
+        mimeType: "video/mp4",
+      });
     };
     if (Array.isArray(resp?.videos) && resp.videos.length > 0) {
@@ -306,8 +314,11 @@ export const imageToVideo = {
         }
       }
     }
-    if (outputs.length > 0) {
-      return `Video(s) saved to: ${outputs.join(", ")}`;
+    if (videos.length > 0) {
+      return JSON.stringify({
+        videos,
+        message: "Video(s) generated successfully",
+      });
     }
     // If nothing saved, return a concise summary plus head/tail snippets of JSON

package/src/tools/infinitalk.ts CHANGED Viewed

@@ -5,10 +5,12 @@ import { callFalModel } from "../utils/fal.utils";
  * Calculate number of frames based on audio duration at 25 FPS
  * Adds 1 second buffer to ensure complete audio coverage
  */
-function calculateFramesFromAudioDuration(audioDurationSeconds: number): number {
+function calculateFramesFromAudioDuration(
+  audioDurationSeconds: number
+): number {
   const totalDuration = audioDurationSeconds + 1; // Add 1 second buffer
   const frames = Math.round(totalDuration * 25); // 25 FPS
   // Clamp to valid range (41-721 frames)
   return Math.max(41, Math.min(721, frames));
 }
@@ -18,17 +20,56 @@ function calculateFramesFromAudioDuration(audioDurationSeconds: number): number
  */
 export const infinitalk = {
   name: "infinitalk",
-  description: "Generate a talking avatar video from an image and audio file using FAL AI Infinitalk. The avatar lip-syncs to the provided audio with natural facial expressions.",
+  description:
+    "Generate a talking avatar video from an image and audio file using FAL AI Infinitalk. The avatar lip-syncs to the provided audio with natural facial expressions.",
   parameters: z.object({
-    image_url: z.string().describe("Public URL of the input image. If the input image does not match the chosen aspect ratio, it is resized and center cropped."),
-    audio_url: z.string().describe("The Public URL of the audio file for lip-sync generation."),
-    audio_duration_seconds: z.number().optional().describe("Duration of the audio in seconds. If provided, will automatically calculate optimal frames (audio duration + 1 second buffer at 25 FPS)."),
-    prompt: z.string().describe("The text prompt to guide video generation (e.g., 'A woman with colorful hair talking on a podcast')"),
-    num_frames: z.number().optional().describe("Number of frames to generate. Must be between 41 to 721. If not provided and audio_duration_seconds is given, will be calculated automatically. Default: 145"),
-    resolution: z.enum(["480p", "720p"]).optional().describe("Resolution of the video to generate. Default: '480p'"),
-    seed: z.number().optional().describe("Random seed for reproducibility. If not provided, a random seed is chosen. Default: 42"),
-    acceleration: z.enum(["none", "regular", "high"]).optional().describe("The acceleration level to use for generation. Default: 'regular'"),
-    fal_key: z.string().optional().describe("FAL API key. If not provided, will use FAL_KEY environment variable."),
+    image_url: z
+      .string()
+      .describe(
+        "Public URL of the input image. If the input image does not match the chosen aspect ratio, it is resized and center cropped."
+      ),
+    audio_url: z
+      .string()
+      .describe("The Public URL of the audio file for lip-sync generation."),
+    audio_duration_seconds: z
+      .number()
+      .optional()
+      .describe(
+        "Duration of the audio in seconds. If provided, will automatically calculate optimal frames (audio duration + 1 second buffer at 25 FPS)."
+      ),
+    prompt: z
+      .string()
+      .describe(
+        "The text prompt to guide video generation (e.g., 'A woman with colorful hair talking on a podcast')"
+      ),
+    num_frames: z
+      .number()
+      .optional()
+      .describe(
+        "Number of frames to generate. Must be between 41 to 721. If not provided and audio_duration_seconds is given, will be calculated automatically. Default: 145"
+      ),
+    resolution: z
+      .enum(["480p", "720p"])
+      .optional()
+      .describe("Resolution of the video to generate. Default: '480p'"),
+    seed: z
+      .number()
+      .optional()
+      .describe(
+        "Random seed for reproducibility. If not provided, a random seed is chosen. Default: 42"
+      ),
+    acceleration: z
+      .enum(["none", "regular", "high"])
+      .optional()
+      .describe(
+        "The acceleration level to use for generation. Default: 'regular'"
+      ),
+    fal_key: z
+      .string()
+      .optional()
+      .describe(
+        "FAL API key. If not provided, will use FAL_KEY environment variable."
+      ),
   }),
   execute: async (args: {
     image_url: string;
@@ -43,12 +84,20 @@ export const infinitalk = {
   }) => {
     // Calculate frames from audio duration if provided and num_frames not specified
     let calculatedFrames = args.num_frames;
-    if (args.audio_duration_seconds !== undefined && args.num_frames === undefined) {
-      calculatedFrames = calculateFramesFromAudioDuration(args.audio_duration_seconds);
+    if (
+      args.audio_duration_seconds !== undefined &&
+      args.num_frames === undefined
+    ) {
+      calculatedFrames = calculateFramesFromAudioDuration(
+        args.audio_duration_seconds
+      );
     }
     // Validate num_frames range if provided
-    if (calculatedFrames !== undefined && (calculatedFrames < 41 || calculatedFrames > 721)) {
+    if (
+      calculatedFrames !== undefined &&
+      (calculatedFrames < 41 || calculatedFrames > 721)
+    ) {
       throw new Error("num_frames must be between 41 and 721");
     }
@@ -63,9 +112,9 @@ export const infinitalk = {
     if (calculatedFrames !== undefined) {
       input.num_frames = calculatedFrames;
     }
-    input.resolution = args.resolution || '480p';
+    input.resolution = args.resolution || "480p";
     if (args.seed !== undefined) {
       input.seed = args.seed;
     }
@@ -73,24 +122,35 @@ export const infinitalk = {
       input.acceleration = args.acceleration;
     }
-    const result = await callFalModel("fal-ai/infinitalk", input, { falKey: args.fal_key });
+    const result = await callFalModel("fal-ai/infinitalk", input, {
+      falKey: args.fal_key,
+    });
     // Extract video data from the response
     const videoData = result.data?.video;
     const seed = result.data?.seed;
     if (!videoData || !videoData.url) {
-      throw new Error(`No video data in completed response: ${JSON.stringify(result.data)}`);
+      throw new Error(
+        `No video data in completed response: ${JSON.stringify(result.data)}`
+      );
     }
     const videoUrl = videoData.url;
-    const fileDetails = videoData.file_name && videoData.file_size !== undefined
-      ? `\nFile: ${videoData.file_name} (${(videoData.file_size / 1024 / 1024).toFixed(2)} MB)`
-      : "";
-    const seedInfo = seed !== undefined ? `\nSeed: ${seed}` : "";
-    const requestIdInfo = result.requestId ? `\nRequest ID: ${result.requestId}` : "";
+    const fileName = videoData.file_name || "infinitalk.mp4";
-    return videoUrl
-    // return `✅ Infinitalk video generated successfully!\n\nVideo URL: ${videoUrl}${fileDetails}${seedInfo}${requestIdInfo}`;
+    return JSON.stringify({
+      videos: [
+        {
+          url: videoUrl,
+          filename: fileName,
+          mimeType: "video/mp4",
+          filesize: videoData.file_size,
+        },
+      ],
+      message: "Infinitalk video generated successfully",
+      seed: seed,
+      requestId: result.requestId,
+    });
   },
 };

package/src/utils/filename.ts ADDED Viewed

@@ -0,0 +1,22 @@
+/**
+ * Generate a timestamped filename to avoid conflicts
+ * Format: YYYYMMDD_HHmmss_filename.ext
+ */
+export function generateTimestampedFilename(basename: string): string {
+  const now = new Date();
+  const timestamp = now
+    .toISOString()
+    .replace(/[-:]/g, "")
+    .replace(/\.\d{3}Z$/, "")
+    .replace("T", "_");
+  // Extract extension if present
+  const lastDot = basename.lastIndexOf(".");
+  if (lastDot > 0) {
+    const name = basename.substring(0, lastDot);
+    const ext = basename.substring(lastDot);
+    return `${timestamp}_${name}${ext}`;
+  }
+  return `${timestamp}_${basename}`;
+}