npm - @r16t/multimodal-mcp - Versions diffs - 1.1.2 → 1.2.3 - Mend

@r16t/multimodal-mcp 1.1.2 → 1.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

package/README.md +12 -9
package/build/providers/google.d.ts +2 -1
package/build/providers/google.js +69 -13
package/build/providers/openai.d.ts +2 -1
package/build/providers/openai.js +24 -3
package/build/providers/registry.d.ts +1 -0
package/build/providers/registry.js +3 -0
package/build/providers/types.d.ts +10 -0
package/build/providers/xai.d.ts +2 -1
package/build/providers/xai.js +40 -0
package/build/read-media-file.d.ts +4 -0
package/build/read-media-file.js +21 -0
package/build/server.js +11 -1
package/build/tools/edit-image.d.ts +21 -0
package/build/tools/edit-image.js +49 -0
package/build/tools/generate-video.d.ts +1 -0
package/build/tools/generate-video.js +10 -0
package/build/tools/list-providers.js +2 -0
package/package.json +6 -3

package/README.md CHANGED Viewed

@@ -17,13 +17,13 @@ Set the API key for at least one provider. Most users only need one — add more
 ```bash
 # Using OpenAI
-claude mcp add multimodal-mcp -e OPENAI_API_KEY=sk-... -- npx @r16t/multimodal-mcp
+claude mcp add multimodal-mcp -e OPENAI_API_KEY=sk-... -- npx @r16t/multimodal-mcp@latest
 # Or using xAI
-# claude mcp add multimodal-mcp -e XAI_API_KEY=xai-... -- npx @r16t/multimodal-mcp
+# claude mcp add multimodal-mcp -e XAI_API_KEY=xai-... -- npx @r16t/multimodal-mcp@latest
 # Or using Gemini
-# claude mcp add multimodal-mcp -e GEMINI_API_KEY=AIza... -- npx @r16t/multimodal-mcp
+# claude mcp add multimodal-mcp -e GEMINI_API_KEY=AIza... -- npx @r16t/multimodal-mcp@latest
 ```
 Using a different editor? See [setup instructions](#editor-setup) for Claude Desktop, Cursor, VS Code, Windsurf, and Cline.
@@ -36,7 +36,7 @@ Using a different editor? See [setup instructions](#editor-setup) for Claude Des
 | `XAI_API_KEY` | At least one provider key | xAI API key — enables image and video generation via grok-imagine-image and grok-imagine-video |
 | `GEMINI_API_KEY` | At least one provider key | Gemini API key — enables image, video, and audio generation via imagen-4, veo-3.1, and gemini-2.5-flash-preview-tts |
 | `GOOGLE_API_KEY` | — | Alias for `GEMINI_API_KEY`; either name is accepted |
-| `MEDIA_OUTPUT_DIR` | No | Directory for saved media files. Defaults to the system temp directory |
+| `MEDIA_OUTPUT_DIR` | No | Directory for saved media files. Defaults to the current working directory |
 ## Available Tools
@@ -50,6 +50,7 @@ Generate an image from a text prompt.
 | `provider` | string | No | Provider to use: `openai`, `xai`, `google`. Auto-selects if omitted |
 | `aspectRatio` | string | No | Aspect ratio: `1:1`, `16:9`, `9:16`, `4:3`, `3:4` |
 | `quality` | string | No | Quality level: `low`, `standard`, `high` |
+| `outputDirectory` | string | No | Directory to save the generated file. Absolute or relative path. Defaults to `MEDIA_OUTPUT_DIR` or cwd |
 | `providerOptions` | object | No | Provider-specific parameters passed through directly |
 ### `generate_video`
@@ -63,6 +64,7 @@ Generate a video from a text prompt. Video generation is asynchronous and may ta
 | `duration` | number | No | Video duration in seconds (provider limits apply) |
 | `aspectRatio` | string | No | Aspect ratio: `16:9`, `9:16`, `1:1` |
 | `resolution` | string | No | Resolution: `480p`, `720p`, `1080p` |
+| `outputDirectory` | string | No | Directory to save the generated file. Absolute or relative path. Defaults to `MEDIA_OUTPUT_DIR` or cwd |
 | `providerOptions` | object | No | Provider-specific parameters passed through directly |
 ### `generate_audio`
@@ -76,6 +78,7 @@ Generate audio (text-to-speech) from text. Audio generation is synchronous.
 | `voice` | string | No | Voice name (provider-specific). OpenAI: `alloy`, `ash`, `coral`, `echo`, `fable`, `nova`, `onyx`, `sage`, `shimmer`. Google: `Kore`, `Charon`, `Fenrir`, `Aoede`, `Puck`, etc. |
 | `speed` | number | No | Speech speed multiplier (OpenAI only): `0.25` to `4.0` |
 | `format` | string | No | Output format (OpenAI only): `mp3`, `opus`, `aac`, `flac`, `wav`, `pcm` |
+| `outputDirectory` | string | No | Directory to save the generated file. Absolute or relative path. Defaults to `MEDIA_OUTPUT_DIR` or cwd |
 | `providerOptions` | object | No | Provider-specific parameters passed through directly |
 ### `list_providers`
@@ -162,7 +165,7 @@ Add to `~/Library/Application Support/Claude/claude_desktop_config.json`:
   "mcpServers": {
     "multimodal-mcp": {
       "command": "npx",
-      "args": ["@r16t/multimodal-mcp"],
+      "args": ["@r16t/multimodal-mcp@latest"],
       "env": {
         "OPENAI_API_KEY": "sk-..."
       }
@@ -180,7 +183,7 @@ Add to `.cursor/mcp.json` in your project root (or `~/.cursor/mcp.json` globally
   "mcpServers": {
     "multimodal-mcp": {
       "command": "npx",
-      "args": ["@r16t/multimodal-mcp"],
+      "args": ["@r16t/multimodal-mcp@latest"],
       "env": {
         "OPENAI_API_KEY": "sk-..."
       }
@@ -198,7 +201,7 @@ Add to `.vscode/mcp.json` in your project root:
   "servers": {
     "multimodal-mcp": {
       "command": "npx",
-      "args": ["@r16t/multimodal-mcp"],
+      "args": ["@r16t/multimodal-mcp@latest"],
       "env": {
         "OPENAI_API_KEY": "sk-..."
       }
@@ -216,7 +219,7 @@ Add to `~/.codeium/windsurf/mcp_config.json`:
   "mcpServers": {
     "multimodal-mcp": {
       "command": "npx",
-      "args": ["@r16t/multimodal-mcp"],
+      "args": ["@r16t/multimodal-mcp@latest"],
       "env": {
         "OPENAI_API_KEY": "sk-..."
       }
@@ -234,7 +237,7 @@ Add to `~/Library/Application Support/Code/User/globalStorage/saoudrizwan.claude
   "mcpServers": {
     "multimodal-mcp": {
       "command": "npx",
-      "args": ["@r16t/multimodal-mcp"],
+      "args": ["@r16t/multimodal-mcp@latest"],
       "env": {
         "OPENAI_API_KEY": "sk-..."
       }

package/build/providers/google.d.ts CHANGED Viewed

@@ -1,10 +1,11 @@
-import type { MediaProvider, ProviderCapabilities, ImageParams, VideoParams, AudioParams, GeneratedMedia } from "./types.js";
+import type { MediaProvider, ProviderCapabilities, ImageParams, EditImageParams, VideoParams, AudioParams, GeneratedMedia } from "./types.js";
 export declare class GoogleProvider implements MediaProvider {
     readonly name = "google";
     readonly capabilities: ProviderCapabilities;
     private apiKey;
     constructor(apiKey: string);
     generateImage(params: ImageParams): Promise<GeneratedMedia>;
+    editImage(params: EditImageParams): Promise<GeneratedMedia>;
     generateVideo(params: VideoParams): Promise<GeneratedMedia>;
     generateAudio(params: AudioParams): Promise<GeneratedMedia>;
 }

package/build/providers/google.js CHANGED Viewed

@@ -4,6 +4,7 @@ export class GoogleProvider {
     name = "google";
     capabilities = {
         supportsImageGeneration: true,
+        supportsImageEditing: true,
         supportsVideoGeneration: true,
         supportsAudioGeneration: true,
         supportedImageAspectRatios: ["1:1", "16:9", "9:16", "4:3", "3:4"],
@@ -17,12 +18,13 @@ export class GoogleProvider {
         this.apiKey = apiKey;
     }
     async generateImage(params) {
-        const response = await fetch(`${GEMINI_BASE_URL}/models/imagen-4:generateImages?key=${this.apiKey}`, {
+        const response = await fetch(`${GEMINI_BASE_URL}/models/imagen-4.0-generate-001:predict?key=${this.apiKey}`, {
             method: "POST",
             headers: { "Content-Type": "application/json" },
             body: JSON.stringify({
-                prompt: params.prompt,
-                config: {
+                instances: [{ prompt: params.prompt }],
+                parameters: {
+                    sampleCount: 1,
                     aspectRatio: params.aspectRatio,
                     ...params.providerOptions,
                 },
@@ -36,22 +38,76 @@ export class GoogleProvider {
         return {
             data: Buffer.from(base64, "base64"),
             mimeType: "image/png",
-            metadata: { model: "imagen-4", provider: "google" },
+            metadata: { model: "imagen-4.0-generate-001", provider: "google" },
         };
     }
-    async generateVideo(params) {
-        const submitResponse = await fetch(`${GEMINI_BASE_URL}/models/veo-3.1:predictLongRunning?key=${this.apiKey}`, {
+    async editImage(params) {
+        const base64Image = params.imageData.toString("base64");
+        const response = await fetch(`${GEMINI_BASE_URL}/models/gemini-2.5-flash-preview-image:generateContent?key=${this.apiKey}`, {
             method: "POST",
             headers: { "Content-Type": "application/json" },
             body: JSON.stringify({
-                prompt: params.prompt,
-                config: {
-                    aspectRatio: params.aspectRatio,
-                    durationSeconds: params.duration,
+                contents: [{
+                        parts: [
+                            { text: params.prompt },
+                            {
+                                inlineData: {
+                                    mimeType: params.imageMimeType,
+                                    data: base64Image,
+                                },
+                            },
+                        ],
+                    }],
+                generationConfig: {
+                    responseModalities: ["IMAGE"],
                     ...params.providerOptions,
                 },
             }),
         });
+        if (!response.ok) {
+            throw new Error(`Google image editing failed: ${response.status}`);
+        }
+        const result = (await response.json());
+        const imagePart = result.candidates[0]?.content?.parts?.find((part) => part.inlineData !== undefined);
+        if (!imagePart?.inlineData) {
+            throw new Error("Google image editing returned no image data");
+        }
+        return {
+            data: Buffer.from(imagePart.inlineData.data, "base64"),
+            mimeType: imagePart.inlineData.mimeType || "image/png",
+            metadata: {
+                model: "gemini-2.5-flash-preview-image",
+                provider: "google",
+                operation: "edit",
+            },
+        };
+    }
+    async generateVideo(params) {
+        const instance = {
+            prompt: params.prompt,
+        };
+        if (params.imageData) {
+            const base64Image = params.imageData.toString("base64");
+            instance.image = {
+                inlineData: {
+                    mimeType: params.imageMimeType ?? "image/png",
+                    data: base64Image,
+                },
+            };
+        }
+        const requestBody = {
+            instances: [instance],
+            parameters: {
+                aspectRatio: params.aspectRatio,
+                durationSeconds: params.duration,
+                ...params.providerOptions,
+            },
+        };
+        const submitResponse = await fetch(`${GEMINI_BASE_URL}/models/veo-3.1-generate-preview:predictLongRunning?key=${this.apiKey}`, {
+            method: "POST",
+            headers: { "Content-Type": "application/json" },
+            body: JSON.stringify(requestBody),
+        });
         if (!submitResponse.ok) {
             throw new Error(`Google video generation failed: ${submitResponse.status}`);
         }
@@ -66,7 +122,7 @@ export class GoogleProvider {
         return {
             data,
             mimeType: "video/mp4",
-            metadata: { model: "veo-3.1", provider: "google", operationName: operation.name },
+            metadata: { model: "veo-3.1-generate-preview", provider: "google", operationName: operation.name },
         };
     }
     async generateAudio(params) {
@@ -78,8 +134,8 @@ export class GoogleProvider {
             body: JSON.stringify({
                 contents: [{ parts: [{ text: params.text }] }],
                 generationConfig: {
-                    response_modalities: ["AUDIO"],
-                    speech_config: {
+                    responseModalities: ["AUDIO"],
+                    speechConfig: {
                         voiceConfig: {
                             prebuiltVoiceConfig: { voiceName: voice },
                         },

package/build/providers/openai.d.ts CHANGED Viewed

@@ -1,10 +1,11 @@
-import type { MediaProvider, ProviderCapabilities, ImageParams, VideoParams, AudioParams, GeneratedMedia } from "./types.js";
+import type { MediaProvider, ProviderCapabilities, ImageParams, EditImageParams, VideoParams, AudioParams, GeneratedMedia } from "./types.js";
 export declare class OpenAIProvider implements MediaProvider {
     readonly name = "openai";
     readonly capabilities: ProviderCapabilities;
     private client;
     constructor(apiKey: string);
     generateImage(params: ImageParams): Promise<GeneratedMedia>;
+    editImage(params: EditImageParams): Promise<GeneratedMedia>;
     generateVideo(params: VideoParams): Promise<GeneratedMedia>;
     generateAudio(params: AudioParams): Promise<GeneratedMedia>;
     private audioFormatToMimeType;

package/build/providers/openai.js CHANGED Viewed

@@ -11,6 +11,7 @@ export class OpenAIProvider {
     name = "openai";
     capabilities = {
         supportsImageGeneration: true,
+        supportsImageEditing: true,
         supportsVideoGeneration: true,
         supportsAudioGeneration: true,
         supportedImageAspectRatios: ["1:1", "16:9", "9:16", "4:3", "3:4"],
@@ -39,14 +40,34 @@ export class OpenAIProvider {
             metadata: { model: "gpt-image-1", provider: "openai" },
         };
     }
+    async editImage(params) {
+        const imageFile = new File([new Uint8Array(params.imageData)], "input.png", { type: params.imageMimeType });
+        const response = await this.client.images.edit({
+            model: "gpt-image-1",
+            image: imageFile,
+            prompt: params.prompt,
+            ...params.providerOptions,
+        });
+        const base64Data = response.data[0].b64_json;
+        return {
+            data: Buffer.from(base64Data, "base64"),
+            mimeType: "image/png",
+            metadata: { model: "gpt-image-1", provider: "openai", operation: "edit" },
+        };
+    }
     async generateVideo(params) {
         const videos = this.client.videos;
-        const job = await videos.create({
+        const createParams = {
             model: "sora-2",
             prompt: params.prompt,
-            duration: params.duration,
+            seconds: String(params.duration),
             ...params.providerOptions,
-        });
+        };
+        if (params.imageData) {
+            const imageFile = new File([new Uint8Array(params.imageData)], "first-frame.png", { type: params.imageMimeType ?? "image/png" });
+            createParams.input_reference = imageFile;
+        }
+        const job = await videos.create(createParams);
         const result = await pollForCompletion(() => videos.retrieve(job.id), (status) => status.status === "completed", { timeoutMs: 600_000, intervalMs: 5_000 });
         const videoUrl = result.url;
         const videoResponse = await fetch(videoUrl);

package/build/providers/registry.d.ts CHANGED Viewed

@@ -4,6 +4,7 @@ export declare class ProviderRegistry {
     register(provider: MediaProvider): void;
     getProvider(name?: string): MediaProvider | undefined;
     getImageProviders(): MediaProvider[];
+    getImageEditProviders(): MediaProvider[];
     getVideoProviders(): MediaProvider[];
     getAudioProviders(): MediaProvider[];
     listCapabilities(): ProviderInfo[];

package/build/providers/registry.js CHANGED Viewed

@@ -13,6 +13,9 @@ export class ProviderRegistry {
     getImageProviders() {
         return [...this.providers.values()].filter((p) => p.capabilities.supportsImageGeneration);
     }
+    getImageEditProviders() {
+        return [...this.providers.values()].filter((p) => p.capabilities.supportsImageEditing);
+    }
     getVideoProviders() {
         return [...this.providers.values()].filter((p) => p.capabilities.supportsVideoGeneration);
     }

package/build/providers/types.d.ts CHANGED Viewed

@@ -2,11 +2,13 @@ export interface MediaProvider {
     readonly name: string;
     readonly capabilities: ProviderCapabilities;
     generateImage(params: ImageParams): Promise<GeneratedMedia>;
+    editImage(params: EditImageParams): Promise<GeneratedMedia>;
     generateVideo(params: VideoParams): Promise<GeneratedMedia>;
     generateAudio(params: AudioParams): Promise<GeneratedMedia>;
 }
 export interface ProviderCapabilities {
     supportsImageGeneration: boolean;
+    supportsImageEditing: boolean;
     supportsVideoGeneration: boolean;
     supportsAudioGeneration: boolean;
     supportedImageAspectRatios: string[];
@@ -21,11 +23,19 @@ export interface ImageParams {
     quality: string;
     providerOptions?: Record<string, unknown>;
 }
+export interface EditImageParams {
+    imageData: Buffer;
+    imageMimeType: string;
+    prompt: string;
+    providerOptions?: Record<string, unknown>;
+}
 export interface VideoParams {
     prompt: string;
     duration: number;
     aspectRatio: string;
     resolution: string;
+    imageData?: Buffer;
+    imageMimeType?: string;
     providerOptions?: Record<string, unknown>;
 }
 export interface AudioParams {

package/build/providers/xai.d.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import type { MediaProvider, ProviderCapabilities, ImageParams, VideoParams, AudioParams, GeneratedMedia } from "./types.js";
+import type { MediaProvider, ProviderCapabilities, ImageParams, EditImageParams, VideoParams, AudioParams, GeneratedMedia } from "./types.js";
 export declare class XAIProvider implements MediaProvider {
     readonly name = "xai";
     readonly capabilities: ProviderCapabilities;
@@ -6,6 +6,7 @@ export declare class XAIProvider implements MediaProvider {
     private apiKey;
     constructor(apiKey: string);
     generateImage(params: ImageParams): Promise<GeneratedMedia>;
+    editImage(params: EditImageParams): Promise<GeneratedMedia>;
     generateVideo(params: VideoParams): Promise<GeneratedMedia>;
     generateAudio(_params: AudioParams): Promise<GeneratedMedia>;
 }

package/build/providers/xai.js CHANGED Viewed

@@ -7,6 +7,7 @@ export class XAIProvider {
     name = "xai";
     capabilities = {
         supportsImageGeneration: true,
+        supportsImageEditing: true,
         supportsVideoGeneration: true,
         supportsAudioGeneration: false,
         supportedImageAspectRatios: ["1:1", "16:9", "9:16", "4:3", "3:4"],
@@ -39,6 +40,45 @@ export class XAIProvider {
             metadata: { model: IMAGE_MODEL, provider: "xai" },
         };
     }
+    async editImage(params) {
+        const base64Data = params.imageData.toString("base64");
+        const dataUri = `data:${params.imageMimeType};base64,${base64Data}`;
+        const response = await fetch(`${XAI_BASE_URL}/images/edits`, {
+            method: "POST",
+            headers: {
+                "Content-Type": "application/json",
+                Authorization: `Bearer ${this.apiKey}`,
+            },
+            body: JSON.stringify({
+                model: IMAGE_MODEL,
+                prompt: params.prompt,
+                image: { url: dataUri, type: "image_url" },
+                ...params.providerOptions,
+            }),
+        });
+        if (!response.ok) {
+            throw new Error(`xAI image editing failed: ${response.status}`);
+        }
+        const result = (await response.json());
+        const imageResult = result.data?.[0];
+        if (imageResult?.b64_json) {
+            return {
+                data: Buffer.from(imageResult.b64_json, "base64"),
+                mimeType: "image/png",
+                metadata: { model: IMAGE_MODEL, provider: "xai", operation: "edit" },
+            };
+        }
+        if (imageResult?.url) {
+            const imageResponse = await fetch(imageResult.url);
+            const data = Buffer.from(await imageResponse.arrayBuffer());
+            return {
+                data,
+                mimeType: "image/png",
+                metadata: { model: IMAGE_MODEL, provider: "xai", operation: "edit" },
+            };
+        }
+        throw new Error("xAI image editing returned no data");
+    }
     async generateVideo(params) {
         const submitResponse = await fetch(`${XAI_BASE_URL}/videos/generations`, {
             method: "POST",

package/build/read-media-file.d.ts ADDED Viewed

@@ -0,0 +1,4 @@
+export declare function readMediaFile(filePath: string): Promise<{
+    data: Buffer;
+    mimeType: string;
+}>;

package/build/read-media-file.js ADDED Viewed

@@ -0,0 +1,21 @@
+import { readFile } from "node:fs/promises";
+import { extname, resolve } from "node:path";
+const EXTENSION_TO_MIME = {
+    ".png": "image/png",
+    ".jpg": "image/jpeg",
+    ".jpeg": "image/jpeg",
+    ".webp": "image/webp",
+    ".gif": "image/gif",
+    ".mp4": "video/mp4",
+};
+export async function readMediaFile(filePath) {
+    const absolutePath = resolve(filePath);
+    const extension = extname(absolutePath).toLowerCase();
+    const mimeType = EXTENSION_TO_MIME[extension];
+    if (!mimeType) {
+        const supported = Object.keys(EXTENSION_TO_MIME).join(", ");
+        throw new Error(`Unsupported file extension "${extension}". Supported: ${supported}`);
+    }
+    const data = await readFile(absolutePath);
+    return { data: Buffer.from(data), mimeType };
+}

package/build/server.js CHANGED Viewed

@@ -6,6 +6,7 @@ import { XAIProvider } from "./providers/xai.js";
 import { GoogleProvider } from "./providers/google.js";
 import { FileManager } from "./file-manager.js";
 import { buildGenerateImageHandler } from "./tools/generate-image.js";
+import { buildEditImageHandler } from "./tools/edit-image.js";
 import { buildGenerateVideoHandler } from "./tools/generate-video.js";
 import { buildGenerateAudioHandler } from "./tools/generate-audio.js";
 import { buildListProvidersHandler } from "./tools/list-providers.js";
@@ -25,6 +26,7 @@ export function createServer(config) {
         console.error("[server] Registered Google provider");
     }
     const generateImageHandler = buildGenerateImageHandler(registry, fileManager);
+    const editImageHandler = buildEditImageHandler(registry, fileManager);
     const generateVideoHandler = buildGenerateVideoHandler(registry, fileManager);
     const generateAudioHandler = buildGenerateAudioHandler(registry, fileManager);
     const listProvidersHandler = buildListProvidersHandler(registry);
@@ -38,12 +40,20 @@ export function createServer(config) {
         outputDirectory: z.string().optional().describe("Directory to save the generated file. Supports absolute or relative paths (resolved from cwd). Defaults to MEDIA_OUTPUT_DIR env var or cwd."),
         providerOptions: z.record(z.string(), z.unknown()).optional().describe("Provider-specific parameters passed through directly"),
     }, async (params) => generateImageHandler(params));
-    server.tool("generate_video", `Generate a video from a text prompt using AI. Available providers: ${providerNames}`, {
+    server.tool("edit_image", `Edit an existing image using AI. Provide the path to an image and a text prompt describing the desired edits. Available providers: ${providerNames}`, {
+        imagePath: z.string().describe("Absolute path to the source image file to edit"),
+        prompt: z.string().describe("Text description of the edits to apply to the image"),
+        provider: z.string().optional().describe("Provider to use: openai, xai, google. Auto-selects if omitted."),
+        outputDirectory: z.string().optional().describe("Directory to save the edited file. Supports absolute or relative paths (resolved from cwd). Defaults to MEDIA_OUTPUT_DIR env var or cwd."),
+        providerOptions: z.record(z.string(), z.unknown()).optional().describe("Provider-specific parameters passed through directly"),
+    }, async (params) => editImageHandler(params));
+    server.tool("generate_video", `Generate a video from a text prompt using AI. Optionally provide an image as the first frame. Available providers: ${providerNames}`, {
         prompt: z.string().describe("Text description of the video to generate"),
         provider: z.string().optional().describe("Provider to use: openai, xai, google. Auto-selects if omitted."),
         duration: z.number().optional().describe("Video duration in seconds (provider limits apply)"),
         aspectRatio: z.string().optional().describe("Aspect ratio: 16:9, 9:16, 1:1"),
         resolution: z.string().optional().describe("Resolution: 480p, 720p, 1080p"),
+        imagePath: z.string().optional().describe("Path to an image to use as the first frame of the video (OpenAI and Google only)"),
         outputDirectory: z.string().optional().describe("Directory to save the generated file. Supports absolute or relative paths (resolved from cwd). Defaults to MEDIA_OUTPUT_DIR env var or cwd."),
         providerOptions: z.record(z.string(), z.unknown()).optional().describe("Provider-specific parameters passed through directly"),
     }, async (params) => generateVideoHandler(params));

package/build/tools/edit-image.d.ts ADDED Viewed

@@ -0,0 +1,21 @@
+import type { ProviderRegistry } from "../providers/registry.js";
+import type { FileManager } from "../file-manager.js";
+export declare function buildEditImageHandler(registry: ProviderRegistry, fileManager: FileManager): (params: {
+    imagePath: string;
+    prompt: string;
+    provider?: string;
+    outputDirectory?: string;
+    providerOptions?: Record<string, unknown>;
+}) => Promise<{
+    isError: true;
+    content: {
+        type: "text";
+        text: string;
+    }[];
+} | {
+    content: {
+        type: "text";
+        text: string;
+    }[];
+    isError?: undefined;
+}>;

package/build/tools/edit-image.js ADDED Viewed

@@ -0,0 +1,49 @@
+import { readMediaFile } from "../read-media-file.js";
+import { sanitizeError } from "../errors.js";
+export function buildEditImageHandler(registry, fileManager) {
+    return async (params) => {
+        const provider = params.provider
+            ? registry.getProvider(params.provider)
+            : registry.getImageEditProviders()[0];
+        if (!provider) {
+            const availableNames = registry.getImageEditProviders().map((p) => p.name).join(", ");
+            const text = params.provider
+                ? `Provider "${params.provider}" is not configured. Available providers: ${availableNames || "none"}`
+                : "No image editing provider available. Configure one of: OPENAI_API_KEY, XAI_API_KEY, GEMINI_API_KEY";
+            return {
+                isError: true,
+                content: [{ type: "text", text }],
+            };
+        }
+        if (!provider.capabilities.supportsImageEditing) {
+            const availableNames = registry.getImageEditProviders().map((p) => p.name).join(", ");
+            return {
+                isError: true,
+                content: [{
+                        type: "text",
+                        text: `Provider "${provider.name}" does not support image editing. Available: ${availableNames || "none"}`,
+                    }],
+            };
+        }
+        try {
+            const { data, mimeType } = await readMediaFile(params.imagePath);
+            const media = await provider.editImage({
+                imageData: data,
+                imageMimeType: mimeType,
+                prompt: params.prompt,
+                providerOptions: params.providerOptions,
+            });
+            const filePath = await fileManager.save(media, "image", params.outputDirectory);
+            return {
+                content: [{ type: "text", text: `Edited image saved to ${filePath}` }],
+            };
+        }
+        catch (error) {
+            const message = sanitizeError(error);
+            return {
+                isError: true,
+                content: [{ type: "text", text: `Image editing failed: ${message}` }],
+            };
+        }
+    };
+}

package/build/tools/generate-video.d.ts CHANGED Viewed

@@ -6,6 +6,7 @@ export declare function buildGenerateVideoHandler(registry: ProviderRegistry, fi
     duration?: number;
     aspectRatio?: string;
     resolution?: string;
+    imagePath?: string;
     outputDirectory?: string;
     providerOptions?: Record<string, unknown>;
 }) => Promise<{

package/build/tools/generate-video.js CHANGED Viewed

@@ -1,3 +1,4 @@
+import { readMediaFile } from "../read-media-file.js";
 import { sanitizeError } from "../errors.js";
 export function buildGenerateVideoHandler(registry, fileManager) {
     return async (params) => {
@@ -13,11 +14,20 @@ export function buildGenerateVideoHandler(registry, fileManager) {
             };
         }
         try {
+            let imageData;
+            let imageMimeType;
+            if (params.imagePath) {
+                const file = await readMediaFile(params.imagePath);
+                imageData = file.data;
+                imageMimeType = file.mimeType;
+            }
             const media = await provider.generateVideo({
                 prompt: params.prompt,
                 duration: params.duration ?? 5,
                 aspectRatio: params.aspectRatio ?? "16:9",
                 resolution: params.resolution ?? "720p",
+                imageData,
+                imageMimeType,
                 providerOptions: params.providerOptions,
             });
             const filePath = await fileManager.save(media, "video", params.outputDirectory);

package/build/tools/list-providers.js CHANGED Viewed

@@ -13,6 +13,8 @@ export function buildListProvidersHandler(registry) {
             const caps = [];
             if (p.capabilities.supportsImageGeneration)
                 caps.push("image");
+            if (p.capabilities.supportsImageEditing)
+                caps.push("image editing");
             if (p.capabilities.supportsVideoGeneration)
                 caps.push("video");
             if (p.capabilities.supportsAudioGeneration)

package/package.json CHANGED Viewed

@@ -1,12 +1,11 @@
 {
   "name": "@r16t/multimodal-mcp",
-  "version": "1.1.2",
+  "version": "1.2.3",
   "description": "Multi-provider media generation MCP server",
   "type": "module",
   "main": "build/index.js",
   "bin": {
-    "multimodal-mcp": "build/index.js",
-    "@r16t/multimodal-mcp": "build/index.js"
+    "multimodal-mcp": "build/index.js"
   },
   "files": [
     "build/"
@@ -19,6 +18,10 @@
     "lint": "eslint src --fix",
     "typecheck": "tsc --noEmit"
   },
+  "repository": {
+    "type": "git",
+    "url": "https://github.com/rsmdt/multimodal-mcp"
+  },
   "license": "MIT",
   "keywords": [
     "mcp",