@r16t/multimodal-mcp 1.0.0 → 1.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Rudolf S.
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md CHANGED
@@ -1,65 +1,41 @@
1
1
  # multimodal-mcp
2
2
 
3
- Multi-provider media generation MCP server. Generate images, videos, and audio from text prompts using OpenAI, xAI, and Google through a single unified interface.
3
+ Multi-provider media generation MCP server. Generate images, videos, and audio from text prompts using OpenAI, xAI, and Gemini through a single unified interface.
4
4
 
5
5
  ## Features
6
6
 
7
- - 🎨 **Image Generation** — Generate images via OpenAI (gpt-image-1), xAI (grok-imagine-image), or Google (imagen-4)
8
- - 🎬 **Video Generation** — Generate videos via OpenAI (sora-2), xAI (grok-imagine-video), or Google (veo-3.1)
9
- - 🔊 **Audio Generation** — Text-to-speech via OpenAI (tts-1) or Google (gemini-2.5-flash-preview-tts)
7
+ - 🎨 **Image Generation** — Generate images via OpenAI (gpt-image-1), xAI (grok-imagine-image), or Gemini (imagen-4)
8
+ - 🎬 **Video Generation** — Generate videos via OpenAI (sora-2), xAI (grok-imagine-video), or Gemini (veo-3.1)
9
+ - 🔊 **Audio Generation** — Text-to-speech via OpenAI (tts-1) or Gemini (gemini-2.5-flash-preview-tts)
10
10
  - 🔄 **Auto-Discovery** — Automatically detects configured providers from environment variables
11
11
  - 🎯 **Provider Selection** — Auto-selects or explicitly choose a provider per request
12
12
  - 📁 **File Output** — Saves all generated media to disk with descriptive filenames
13
13
 
14
14
  ## Quick Start
15
15
 
16
- ### Claude Desktop
17
-
18
- Add to your Claude Desktop config (`~/Library/Application Support/Claude/claude_desktop_config.json`):
19
-
20
- ```json
21
- {
22
- "mcpServers": {
23
- "multimodal-mcp": {
24
- "command": "npx",
25
- "args": ["@r16t/multimodal-mcp"],
26
- "env": {
27
- "OPENAI_API_KEY": "sk-...",
28
- "XAI_API_KEY": "xai-...",
29
- "GOOGLE_API_KEY": "AIza...",
30
- "MEDIA_OUTPUT_DIR": "/tmp/media"
31
- }
32
- }
33
- }
34
- }
35
- ```
16
+ Set the API key for at least one provider. Most users only need one — add more to access additional providers.
36
17
 
37
- You only need to set keys for the providers you want to use. At least one is required.
18
+ ```bash
19
+ # Using OpenAI
20
+ claude mcp add multimodal-mcp -e OPENAI_API_KEY=sk-... -- npx @r16t/multimodal-mcp
38
21
 
39
- ### Cursor / Other MCP Clients
22
+ # Or using xAI
23
+ # claude mcp add multimodal-mcp -e XAI_API_KEY=xai-... -- npx @r16t/multimodal-mcp
40
24
 
41
- ```json
42
- {
43
- "mcpServers": {
44
- "multimodal-mcp": {
45
- "command": "npx",
46
- "args": ["@r16t/multimodal-mcp"],
47
- "env": {
48
- "OPENAI_API_KEY": "sk-..."
49
- }
50
- }
51
- }
52
- }
25
+ # Or using Gemini
26
+ # claude mcp add multimodal-mcp -e GEMINI_API_KEY=AIza... -- npx @r16t/multimodal-mcp
53
27
  ```
54
28
 
29
+ Using a different editor? See [setup instructions](#editor-setup) for Claude Desktop, Cursor, VS Code, Windsurf, and Cline.
30
+
55
31
  ## Environment Variables
56
32
 
57
33
  | Variable | Required | Description |
58
34
  |----------|----------|-------------|
59
35
  | `OPENAI_API_KEY` | At least one provider key | OpenAI API key — enables image, video, and audio generation via gpt-image-1, sora-2, and tts-1 |
60
36
  | `XAI_API_KEY` | At least one provider key | xAI API key — enables image and video generation via grok-imagine-image and grok-imagine-video |
61
- | `GOOGLE_API_KEY` | At least one provider key | Google API key — enables image, video, and audio generation via imagen-4, veo-3.1, and gemini-2.5-flash-preview-tts |
62
- | `GEMINI_API_KEY` | — | Alias for `GOOGLE_API_KEY`; either name is accepted |
37
+ | `GEMINI_API_KEY` | At least one provider key | Gemini API key — enables image, video, and audio generation via imagen-4, veo-3.1, and gemini-2.5-flash-preview-tts |
38
+ | `GOOGLE_API_KEY` | — | Alias for `GEMINI_API_KEY`; either name is accepted |
63
39
  | `MEDIA_OUTPUT_DIR` | No | Directory for saved media files. Defaults to the system temp directory |
64
40
 
65
41
  ## Available Tools
@@ -112,7 +88,7 @@ List all configured media generation providers and their capabilities. Takes no
112
88
  |----------|:-----:|:-----:|:-----:|-------------|-------------|-------------|
113
89
  | OpenAI | ✅ | ✅ | ✅ | gpt-image-1 | sora-2 | tts-1 |
114
90
  | xAI | ✅ | ✅ | — | grok-imagine-image | grok-imagine-video | — |
115
- | Google | ✅ | ✅ | ✅ | imagen-4 | veo-3.1 | gemini-2.5-flash-preview-tts |
91
+ | Gemini | ✅ | ✅ | ✅ | imagen-4 | veo-3.1 | gemini-2.5-flash-preview-tts |
116
92
 
117
93
  ### Image Aspect Ratios
118
94
 
@@ -120,7 +96,7 @@ List all configured media generation providers and their capabilities. Takes no
120
96
  |----------|:---:|:----:|:----:|:---:|:---:|
121
97
  | OpenAI | ✅ | ✅ | ✅ | ✅ | ✅ |
122
98
  | xAI | ✅ | ✅ | ✅ | ✅ | ✅ |
123
- | Google | ✅ | ✅ | ✅ | ✅ | ✅ |
99
+ | Gemini | ✅ | ✅ | ✅ | ✅ | ✅ |
124
100
 
125
101
  ### Video Aspect Ratios & Resolutions
126
102
 
@@ -128,14 +104,14 @@ List all configured media generation providers and their capabilities. Takes no
128
104
  |----------|:----:|:----:|:---:|:----:|:----:|:-----:|
129
105
  | OpenAI | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
130
106
  | xAI | ✅ | ✅ | ✅ | — | ✅ | ✅ |
131
- | Google | ✅ | ✅ | — | — | ✅ | ✅ |
107
+ | Gemini | ✅ | ✅ | — | — | ✅ | ✅ |
132
108
 
133
109
  ### Audio Formats
134
110
 
135
111
  | Provider | mp3 | opus | aac | flac | wav | pcm |
136
112
  |----------|:---:|:----:|:---:|:----:|:---:|:---:|
137
113
  | OpenAI | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
138
- | Google | — | — | — | — | ✅ | — |
114
+ | Gemini | — | — | — | — | ✅ | — |
139
115
 
140
116
  ## Troubleshooting
141
117
 
@@ -145,11 +121,11 @@ List all configured media generation providers and their capabilities. Takes no
145
121
  [config] No provider API keys detected
146
122
  ```
147
123
 
148
- Set at least one of `OPENAI_API_KEY`, `XAI_API_KEY`, or `GOOGLE_API_KEY` in the MCP server's `env` block.
124
+ Set at least one of `OPENAI_API_KEY`, `XAI_API_KEY`, or `GEMINI_API_KEY` in the MCP server's `env` block.
149
125
 
150
126
  ### Provider not available for requested media type
151
127
 
152
- All three providers support image and video generation. Audio generation (text-to-speech) is supported by OpenAI and Google. xAI does not currently offer a standalone TTS API. If you specify a `provider` that isn't configured (no API key) or doesn't support the requested media type, you'll receive an error. Omit the `provider` parameter to auto-select from configured providers.
128
+ All three providers support image and video generation. Audio generation (text-to-speech) is supported by OpenAI and Gemini. xAI does not currently offer a standalone TTS API. If you specify a `provider` that isn't configured (no API key) or doesn't support the requested media type, you'll receive an error. Omit the `provider` parameter to auto-select from configured providers.
153
129
 
154
130
  ### Video generation timeout
155
131
 
@@ -159,9 +135,9 @@ Video generation polls for up to 10 minutes. If your video hasn't completed in t
159
135
 
160
136
  This indicates the xAI API returned an empty response. Check that your `XAI_API_KEY` is valid and that your prompt does not violate xAI content policies.
161
137
 
162
- ### Google image/video generation failed: 403
138
+ ### Gemini image/video generation failed: 403
163
139
 
164
- Verify your `GOOGLE_API_KEY` has the Generative Language API enabled in Google Cloud Console.
140
+ Verify your `GEMINI_API_KEY` has the Generative Language API enabled in Google Cloud Console.
165
141
 
166
142
  ## Development
167
143
 
@@ -173,6 +149,100 @@ npm run typecheck # Type-check without emitting
173
149
  npm run dev # Watch mode for TypeScript compilation
174
150
  ```
175
151
 
152
+ ## Editor Setup
153
+
154
+ Replace `OPENAI_API_KEY` with your provider of choice (`XAI_API_KEY`, `GEMINI_API_KEY`). You can set multiple keys to enable multiple providers.
155
+
156
+ ### Claude Desktop
157
+
158
+ Add to `~/Library/Application Support/Claude/claude_desktop_config.json`:
159
+
160
+ ```json
161
+ {
162
+ "mcpServers": {
163
+ "multimodal-mcp": {
164
+ "command": "npx",
165
+ "args": ["@r16t/multimodal-mcp"],
166
+ "env": {
167
+ "OPENAI_API_KEY": "sk-..."
168
+ }
169
+ }
170
+ }
171
+ }
172
+ ```
173
+
174
+ ### Cursor
175
+
176
+ Add to `.cursor/mcp.json` in your project root (or `~/.cursor/mcp.json` globally):
177
+
178
+ ```json
179
+ {
180
+ "mcpServers": {
181
+ "multimodal-mcp": {
182
+ "command": "npx",
183
+ "args": ["@r16t/multimodal-mcp"],
184
+ "env": {
185
+ "OPENAI_API_KEY": "sk-..."
186
+ }
187
+ }
188
+ }
189
+ }
190
+ ```
191
+
192
+ ### VS Code (GitHub Copilot)
193
+
194
+ Add to `.vscode/mcp.json` in your project root:
195
+
196
+ ```json
197
+ {
198
+ "servers": {
199
+ "multimodal-mcp": {
200
+ "command": "npx",
201
+ "args": ["@r16t/multimodal-mcp"],
202
+ "env": {
203
+ "OPENAI_API_KEY": "sk-..."
204
+ }
205
+ }
206
+ }
207
+ }
208
+ ```
209
+
210
+ ### Windsurf
211
+
212
+ Add to `~/.codeium/windsurf/mcp_config.json`:
213
+
214
+ ```json
215
+ {
216
+ "mcpServers": {
217
+ "multimodal-mcp": {
218
+ "command": "npx",
219
+ "args": ["@r16t/multimodal-mcp"],
220
+ "env": {
221
+ "OPENAI_API_KEY": "sk-..."
222
+ }
223
+ }
224
+ }
225
+ }
226
+ ```
227
+
228
+ ### Cline
229
+
230
+ Add to `~/Library/Application Support/Code/User/globalStorage/saoudrizwan.claude-dev/settings/cline_mcp_settings.json`:
231
+
232
+ ```json
233
+ {
234
+ "mcpServers": {
235
+ "multimodal-mcp": {
236
+ "command": "npx",
237
+ "args": ["@r16t/multimodal-mcp"],
238
+ "env": {
239
+ "OPENAI_API_KEY": "sk-..."
240
+ }
241
+ }
242
+ }
243
+ }
244
+ ```
245
+
176
246
  ## License
177
247
 
178
248
  MIT
package/build/config.js CHANGED
@@ -1,20 +1,19 @@
1
1
  import { z } from "zod";
2
- import { tmpdir } from "node:os";
3
2
  const configSchema = z.object({
4
3
  openaiApiKey: z.string().optional(),
5
4
  xaiApiKey: z.string().optional(),
6
5
  googleApiKey: z.string().optional(),
7
6
  outputDirectory: z.string(),
8
7
  });
9
- function resolveGoogleKey() {
10
- return process.env.GOOGLE_API_KEY || process.env.GEMINI_API_KEY || undefined;
8
+ function resolveGeminiKey() {
9
+ return process.env.GEMINI_API_KEY || process.env.GOOGLE_API_KEY || undefined;
11
10
  }
12
11
  export function loadConfig() {
13
12
  const config = configSchema.parse({
14
13
  openaiApiKey: process.env.OPENAI_API_KEY || undefined,
15
14
  xaiApiKey: process.env.XAI_API_KEY || undefined,
16
- googleApiKey: resolveGoogleKey(),
17
- outputDirectory: process.env.MEDIA_OUTPUT_DIR || tmpdir(),
15
+ googleApiKey: resolveGeminiKey(),
16
+ outputDirectory: process.env.MEDIA_OUTPUT_DIR || process.cwd(),
18
17
  });
19
18
  const detected = [];
20
19
  if (config.openaiApiKey)
@@ -22,7 +21,7 @@ export function loadConfig() {
22
21
  if (config.xaiApiKey)
23
22
  detected.push("xAI");
24
23
  if (config.googleApiKey)
25
- detected.push("Google");
24
+ detected.push("Gemini");
26
25
  if (detected.length > 0) {
27
26
  console.error(`[config] Detected providers: ${detected.join(", ")}`);
28
27
  }
@@ -2,6 +2,6 @@ import type { GeneratedMedia } from "./providers/types.js";
2
2
  export declare class FileManager {
3
3
  private readonly outputDirectory;
4
4
  constructor(outputDirectory: string);
5
- save(media: GeneratedMedia, type: "image" | "video" | "audio"): Promise<string>;
5
+ save(media: GeneratedMedia, type: "image" | "video" | "audio", outputDirectory?: string): Promise<string>;
6
6
  private getExtension;
7
7
  }
@@ -6,14 +6,17 @@ export class FileManager {
6
6
  constructor(outputDirectory) {
7
7
  this.outputDirectory = resolve(outputDirectory);
8
8
  }
9
- async save(media, type) {
10
- await mkdir(this.outputDirectory, { recursive: true });
9
+ async save(media, type, outputDirectory) {
10
+ const targetDirectory = outputDirectory
11
+ ? resolve(outputDirectory)
12
+ : this.outputDirectory;
13
+ await mkdir(targetDirectory, { recursive: true });
11
14
  const extension = this.getExtension(type, media.mimeType);
12
15
  const provider = media.metadata.provider || "unknown";
13
16
  const timestamp = Date.now();
14
17
  const random = randomBytes(4).toString("hex");
15
18
  const filename = `${type}-${timestamp}-${provider}-${random}.${extension}`;
16
- const filePath = join(this.outputDirectory, filename);
19
+ const filePath = join(targetDirectory, filename);
17
20
  await writeFile(filePath, media.data);
18
21
  return filePath;
19
22
  }
@@ -28,8 +28,8 @@ export class OpenAIProvider {
28
28
  model: "gpt-image-1",
29
29
  prompt: params.prompt,
30
30
  size: this.mapAspectRatioToSize(params.aspectRatio),
31
- quality: params.quality === "high" ? "hd" : "standard",
32
- response_format: "b64_json",
31
+ quality: params.quality === "high" ? "high" : params.quality === "low" ? "low" : "medium",
32
+ output_format: "png",
33
33
  ...params.providerOptions,
34
34
  });
35
35
  const base64Data = response.data[0].b64_json;
package/build/server.js CHANGED
@@ -35,6 +35,7 @@ export function createServer(config) {
35
35
  provider: z.string().optional().describe("Provider to use: openai, xai, google. Auto-selects if omitted."),
36
36
  aspectRatio: z.string().optional().describe("Aspect ratio: 1:1, 16:9, 9:16, 4:3, 3:4"),
37
37
  quality: z.string().optional().describe("Quality level: low, standard, high"),
38
+ outputDirectory: z.string().optional().describe("Directory to save the generated file. Supports absolute or relative paths (resolved from cwd). Defaults to MEDIA_OUTPUT_DIR env var or cwd."),
38
39
  providerOptions: z.record(z.string(), z.unknown()).optional().describe("Provider-specific parameters passed through directly"),
39
40
  }, async (params) => generateImageHandler(params));
40
41
  server.tool("generate_video", `Generate a video from a text prompt using AI. Available providers: ${providerNames}`, {
@@ -43,6 +44,7 @@ export function createServer(config) {
43
44
  duration: z.number().optional().describe("Video duration in seconds (provider limits apply)"),
44
45
  aspectRatio: z.string().optional().describe("Aspect ratio: 16:9, 9:16, 1:1"),
45
46
  resolution: z.string().optional().describe("Resolution: 480p, 720p, 1080p"),
47
+ outputDirectory: z.string().optional().describe("Directory to save the generated file. Supports absolute or relative paths (resolved from cwd). Defaults to MEDIA_OUTPUT_DIR env var or cwd."),
46
48
  providerOptions: z.record(z.string(), z.unknown()).optional().describe("Provider-specific parameters passed through directly"),
47
49
  }, async (params) => generateVideoHandler(params));
48
50
  server.tool("generate_audio", `Generate audio (text-to-speech) from text using AI. Available providers: ${providerNames}`, {
@@ -51,6 +53,7 @@ export function createServer(config) {
51
53
  voice: z.string().optional().describe("Voice name (provider-specific). OpenAI: alloy, ash, coral, echo, fable, nova, onyx, sage, shimmer. Google: Kore, Charon, Fenrir, Aoede, Puck, etc."),
52
54
  speed: z.number().optional().describe("Speech speed multiplier (OpenAI only): 0.25 to 4.0"),
53
55
  format: z.string().optional().describe("Output format (OpenAI only): mp3, opus, aac, flac, wav, pcm"),
56
+ outputDirectory: z.string().optional().describe("Directory to save the generated file. Supports absolute or relative paths (resolved from cwd). Defaults to MEDIA_OUTPUT_DIR env var or cwd."),
54
57
  providerOptions: z.record(z.string(), z.unknown()).optional().describe("Provider-specific parameters passed through directly"),
55
58
  }, async (params) => generateAudioHandler(params));
56
59
  server.tool("list_providers", "List all configured media generation providers and their capabilities", async () => listProvidersHandler());
@@ -6,6 +6,7 @@ export declare function buildGenerateAudioHandler(registry: ProviderRegistry, fi
6
6
  voice?: string;
7
7
  speed?: number;
8
8
  format?: string;
9
+ outputDirectory?: string;
9
10
  providerOptions?: Record<string, unknown>;
10
11
  }) => Promise<{
11
12
  isError: true;
@@ -8,7 +8,7 @@ export function buildGenerateAudioHandler(registry, fileManager) {
8
8
  const available = registry.getAudioProviders().map((p) => p.name).join(", ") || "none";
9
9
  const text = params.provider
10
10
  ? `Provider "${params.provider}" is not configured or does not support audio. Available audio providers: ${available}`
11
- : "No audio provider available. Configure one of: OPENAI_API_KEY, GOOGLE_API_KEY";
11
+ : "No audio provider available. Configure one of: OPENAI_API_KEY, GEMINI_API_KEY";
12
12
  return {
13
13
  isError: true,
14
14
  content: [{ type: "text", text }],
@@ -32,7 +32,7 @@ export function buildGenerateAudioHandler(registry, fileManager) {
32
32
  format: params.format,
33
33
  providerOptions: params.providerOptions,
34
34
  });
35
- const filePath = await fileManager.save(media, "audio");
35
+ const filePath = await fileManager.save(media, "audio", params.outputDirectory);
36
36
  return {
37
37
  content: [{ type: "text", text: `Audio saved to ${filePath}` }],
38
38
  };
@@ -5,6 +5,7 @@ export declare function buildGenerateImageHandler(registry: ProviderRegistry, fi
5
5
  provider?: string;
6
6
  aspectRatio?: string;
7
7
  quality?: string;
8
+ outputDirectory?: string;
8
9
  providerOptions?: Record<string, unknown>;
9
10
  }) => Promise<{
10
11
  isError: true;
@@ -6,7 +6,7 @@ export function buildGenerateImageHandler(registry, fileManager) {
6
6
  const availableNames = registry.getImageProviders().map((p) => p.name).join(", ");
7
7
  const text = params.provider
8
8
  ? `Provider "${params.provider}" is not configured. Available providers: ${availableNames || "none"}`
9
- : "No image provider available. Configure one of: OPENAI_API_KEY, XAI_API_KEY, GOOGLE_API_KEY";
9
+ : "No image provider available. Configure one of: OPENAI_API_KEY, XAI_API_KEY, GEMINI_API_KEY";
10
10
  return {
11
11
  isError: true,
12
12
  content: [{ type: "text", text }],
@@ -19,7 +19,7 @@ export function buildGenerateImageHandler(registry, fileManager) {
19
19
  quality: params.quality ?? "standard",
20
20
  providerOptions: params.providerOptions,
21
21
  });
22
- const filePath = await fileManager.save(media, "image");
22
+ const filePath = await fileManager.save(media, "image", params.outputDirectory);
23
23
  return {
24
24
  content: [{ type: "text", text: `Image saved to ${filePath}` }],
25
25
  };
@@ -6,6 +6,7 @@ export declare function buildGenerateVideoHandler(registry: ProviderRegistry, fi
6
6
  duration?: number;
7
7
  aspectRatio?: string;
8
8
  resolution?: string;
9
+ outputDirectory?: string;
9
10
  providerOptions?: Record<string, unknown>;
10
11
  }) => Promise<{
11
12
  isError: true;
@@ -6,7 +6,7 @@ export function buildGenerateVideoHandler(registry, fileManager) {
6
6
  const available = registry.getVideoProviders().map((p) => p.name).join(", ") || "none";
7
7
  const text = params.provider
8
8
  ? `Provider "${params.provider}" is not configured. Available providers: ${available}`
9
- : "No video provider available. Configure one of: OPENAI_API_KEY, XAI_API_KEY, GOOGLE_API_KEY";
9
+ : "No video provider available. Configure one of: OPENAI_API_KEY, XAI_API_KEY, GEMINI_API_KEY";
10
10
  return {
11
11
  isError: true,
12
12
  content: [{ type: "text", text }],
@@ -20,7 +20,7 @@ export function buildGenerateVideoHandler(registry, fileManager) {
20
20
  resolution: params.resolution ?? "720p",
21
21
  providerOptions: params.providerOptions,
22
22
  });
23
- const filePath = await fileManager.save(media, "video");
23
+ const filePath = await fileManager.save(media, "video", params.outputDirectory);
24
24
  return {
25
25
  content: [{ type: "text", text: `Video saved to ${filePath}` }],
26
26
  };
@@ -5,7 +5,7 @@ export function buildListProvidersHandler(registry) {
5
5
  return {
6
6
  content: [{
7
7
  type: "text",
8
- text: "No providers configured. Set one or more API keys: OPENAI_API_KEY, XAI_API_KEY, GOOGLE_API_KEY",
8
+ text: "No providers configured. Set one or more API keys: OPENAI_API_KEY, XAI_API_KEY, GEMINI_API_KEY",
9
9
  }],
10
10
  };
11
11
  }
@@ -15,6 +15,8 @@ export function buildListProvidersHandler(registry) {
15
15
  caps.push("image");
16
16
  if (p.capabilities.supportsVideoGeneration)
17
17
  caps.push("video");
18
+ if (p.capabilities.supportsAudioGeneration)
19
+ caps.push("audio");
18
20
  return `- ${p.name}: ${caps.join(", ")}`;
19
21
  });
20
22
  return {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@r16t/multimodal-mcp",
3
- "version": "1.0.0",
3
+ "version": "1.1.2",
4
4
  "description": "Multi-provider media generation MCP server",
5
5
  "type": "module",
6
6
  "main": "build/index.js",