npm - @mixio-pro/kalaasetu-mcp - Versions diffs - 2.1.2-beta → 2.1.3-beta - Mend

@mixio-pro/kalaasetu-mcp 2.1.2-beta → 2.1.3-beta

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/package.json +1 -1
package/src/index.ts +2 -0
package/src/tools/generate-image.ts +178 -0

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@mixio-pro/kalaasetu-mcp",
-  "version": "2.1.2-beta",
+  "version": "2.1.3-beta",
   "description": "A powerful Model Context Protocol server providing AI tools for content generation and analysis",
   "type": "module",
   "module": "src/index.ts",

package/src/index.ts CHANGED Viewed

@@ -8,6 +8,7 @@ import {
 } from "./tools/fal";
 import { createAllFalTools } from "./tools/fal/dynamic-tools";
 import { geminiEditImage, geminiTextToImage } from "./tools/gemini";
+import { generateImageConversational } from "./tools/generate-image";
 import { imageToVideo } from "./tools/image-to-video";
 import { getGenerationStatus } from "./tools/get-status";
@@ -30,6 +31,7 @@ async function main() {
   // server.addTool(geminiTextToImage);
   // server.addTool(geminiEditImage);
   server.addTool(imageToVideo);
+  server.addTool(generateImageConversational);
   // 3. Add Discovery Tools
   server.addTool(falListPresets);

package/src/tools/generate-image.ts ADDED Viewed

@@ -0,0 +1,178 @@
+import { z } from "zod";
+import { GoogleGenAI } from "@google/genai";
+import * as fs from "fs";
+import { getStorage } from "../storage";
+import { generateTimestampedFilename } from "../utils/filename";
+import { safeToolExecute } from "../utils/tool-wrapper";
+import { ensureLocalFile } from "../utils/url-file";
+import { resolveEnhancer } from "../utils/prompt-enhancer-presets";
+const ai = new GoogleGenAI({
+  apiKey: process.env.GEMINI_API_KEY || "",
+});
+async function fileToGenerativePart(filePath: string) {
+  const fileResult = await ensureLocalFile(filePath);
+  try {
+    let imageBytes: Buffer;
+    if (fileResult.isTemp) {
+      imageBytes = fs.readFileSync(fileResult.path);
+    } else {
+      const storage = getStorage();
+      imageBytes = await storage.readFile(fileResult.path);
+    }
+    return {
+      inlineData: {
+        data: Buffer.from(imageBytes).toString("base64"),
+        mimeType: "image/jpeg",
+      },
+    };
+  } finally {
+    fileResult.cleanup();
+  }
+}
+export const generateImageConversational = {
+  name: "generateImageConversational",
+  description:
+    "Generate high-quality images from text prompts using Google's Imagen 3 model via Gemini. " +
+    "This tool supports conversational history with reference images and their descriptions. " +
+    "Use this tool when you need to provide specific reference images with context/descriptions to guide the generation.",
+  parameters: z.object({
+    prompt: z
+      .string()
+      .describe("Detailed text description of the image to generate."),
+    aspect_ratio: z
+      .string()
+      .optional()
+      .describe(
+        "Supported ratios: 1:1, 3:4, 4:3, 9:16, or 16:9. Default is 9:16.",
+      ),
+    output_path: z
+      .string()
+      .optional()
+      .describe(
+        "Optional: specific local path or filename to save the image. " +
+          "If omitted, a timestamped filename is generated automatically.",
+      ),
+    reference_images: z
+      .array(
+        z.object({
+          path: z
+            .string()
+            .describe("Local path or URL to the reference image."),
+          description: z
+            .string()
+            .optional()
+            .describe("Optional description of the reference image."),
+        }),
+      )
+      .optional()
+      .describe(
+        "Optional: List of reference images with optional descriptions to guide the style or content.",
+      ),
+    enhancer_preset: z
+      .string()
+      .optional()
+      .describe(
+        "Optional: Name of a prompt enhancer preset to apply (e.g., 'cinematic', 'photorealistic', 'anime'). " +
+          "Automatically enhances the prompt with professional style modifiers.",
+      ),
+  }),
+  timeoutMs: 300000,
+  execute: async (args: {
+    prompt: string;
+    aspect_ratio?: string;
+    output_path?: string;
+    reference_images?: { path: string; description?: string }[];
+    enhancer_preset?: string;
+  }) => {
+    return safeToolExecute(
+      async () => {
+        try {
+          // Apply prompt enhancement if preset specified
+          let enhancedPrompt = args.prompt;
+          if (args.enhancer_preset) {
+            const enhancer = resolveEnhancer(args.enhancer_preset);
+            if (enhancer.hasTransformations()) {
+              enhancedPrompt = enhancer.enhance(args.prompt);
+            }
+          }
+          const contents: any[] = [];
+          // Add reference images with descriptions (conversational history style)
+          if (args.reference_images && Array.isArray(args.reference_images)) {
+            for (const refImg of args.reference_images) {
+              if (refImg.description) {
+                contents.push({ text: refImg.description });
+              }
+              const imagePart = await fileToGenerativePart(refImg.path);
+              contents.push(imagePart);
+            }
+          }
+          // Add main prompt
+          contents.push({ text: enhancedPrompt });
+          const response = await ai.models.generateContent({
+            model: "gemini-3-pro-image-preview",
+            contents: contents, // Pass contents directly which is interpreted as User role parts
+            config: {
+              responseModalities: ["TEXT", "IMAGE"],
+              imageConfig: {
+                aspectRatio: args.aspect_ratio || "9:16",
+              },
+            },
+          });
+          const images = [];
+          let textResponse = "";
+          if (response.candidates && response.candidates[0]?.content?.parts) {
+            for (const part of response.candidates[0].content.parts) {
+              if (part.text) {
+                textResponse += part.text;
+              } else if (part.inlineData?.data) {
+                const imageData = part.inlineData.data;
+                // Always save the image
+                const outputPath =
+                  args.output_path ||
+                  generateTimestampedFilename("generated_image.png");
+                const storage = getStorage();
+                const url = await storage.writeFile(
+                  outputPath,
+                  Buffer.from(imageData, "base64"),
+                );
+                images.push({
+                  url,
+                  filename: outputPath,
+                  mimeType: "image/png",
+                });
+              }
+            }
+          }
+          if (images.length > 0) {
+            return JSON.stringify({
+              url: images?.[0]?.url,
+              images,
+              message: textResponse || "Image generated successfully",
+            });
+          }
+          return (
+            textResponse ||
+            "Image generation completed but no image was produced"
+          );
+        } catch (error: any) {
+          throw new Error(`Image generation failed: ${error.message}`);
+        }
+      },
+      "gemini-generateImageConversational",
+      { toolName: "generateImageConversational" },
+    );
+  },
+};