@r16t/multimodal-mcp 1.0.0 → 1.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +118 -48
- package/build/config.js +5 -6
- package/build/file-manager.d.ts +1 -1
- package/build/file-manager.js +6 -3
- package/build/providers/openai.js +2 -2
- package/build/server.js +3 -0
- package/build/tools/generate-audio.d.ts +1 -0
- package/build/tools/generate-audio.js +2 -2
- package/build/tools/generate-image.d.ts +1 -0
- package/build/tools/generate-image.js +2 -2
- package/build/tools/generate-video.d.ts +1 -0
- package/build/tools/generate-video.js +2 -2
- package/build/tools/list-providers.js +3 -1
- package/package.json +1 -1
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Rudolf S.
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
CHANGED
|
@@ -1,65 +1,41 @@
|
|
|
1
1
|
# multimodal-mcp
|
|
2
2
|
|
|
3
|
-
Multi-provider media generation MCP server. Generate images, videos, and audio from text prompts using OpenAI, xAI, and
|
|
3
|
+
Multi-provider media generation MCP server. Generate images, videos, and audio from text prompts using OpenAI, xAI, and Gemini through a single unified interface.
|
|
4
4
|
|
|
5
5
|
## Features
|
|
6
6
|
|
|
7
|
-
- 🎨 **Image Generation** — Generate images via OpenAI (gpt-image-1), xAI (grok-imagine-image), or
|
|
8
|
-
- 🎬 **Video Generation** — Generate videos via OpenAI (sora-2), xAI (grok-imagine-video), or
|
|
9
|
-
- 🔊 **Audio Generation** — Text-to-speech via OpenAI (tts-1) or
|
|
7
|
+
- 🎨 **Image Generation** — Generate images via OpenAI (gpt-image-1), xAI (grok-imagine-image), or Gemini (imagen-4)
|
|
8
|
+
- 🎬 **Video Generation** — Generate videos via OpenAI (sora-2), xAI (grok-imagine-video), or Gemini (veo-3.1)
|
|
9
|
+
- 🔊 **Audio Generation** — Text-to-speech via OpenAI (tts-1) or Gemini (gemini-2.5-flash-preview-tts)
|
|
10
10
|
- 🔄 **Auto-Discovery** — Automatically detects configured providers from environment variables
|
|
11
11
|
- 🎯 **Provider Selection** — Auto-selects or explicitly choose a provider per request
|
|
12
12
|
- 📁 **File Output** — Saves all generated media to disk with descriptive filenames
|
|
13
13
|
|
|
14
14
|
## Quick Start
|
|
15
15
|
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
Add to your Claude Desktop config (`~/Library/Application Support/Claude/claude_desktop_config.json`):
|
|
19
|
-
|
|
20
|
-
```json
|
|
21
|
-
{
|
|
22
|
-
"mcpServers": {
|
|
23
|
-
"multimodal-mcp": {
|
|
24
|
-
"command": "npx",
|
|
25
|
-
"args": ["@r16t/multimodal-mcp"],
|
|
26
|
-
"env": {
|
|
27
|
-
"OPENAI_API_KEY": "sk-...",
|
|
28
|
-
"XAI_API_KEY": "xai-...",
|
|
29
|
-
"GOOGLE_API_KEY": "AIza...",
|
|
30
|
-
"MEDIA_OUTPUT_DIR": "/tmp/media"
|
|
31
|
-
}
|
|
32
|
-
}
|
|
33
|
-
}
|
|
34
|
-
}
|
|
35
|
-
```
|
|
16
|
+
Set the API key for at least one provider. Most users only need one — add more to access additional providers.
|
|
36
17
|
|
|
37
|
-
|
|
18
|
+
```bash
|
|
19
|
+
# Using OpenAI
|
|
20
|
+
claude mcp add multimodal-mcp -e OPENAI_API_KEY=sk-... -- npx @r16t/multimodal-mcp
|
|
38
21
|
|
|
39
|
-
|
|
22
|
+
# Or using xAI
|
|
23
|
+
# claude mcp add multimodal-mcp -e XAI_API_KEY=xai-... -- npx @r16t/multimodal-mcp
|
|
40
24
|
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
"mcpServers": {
|
|
44
|
-
"multimodal-mcp": {
|
|
45
|
-
"command": "npx",
|
|
46
|
-
"args": ["@r16t/multimodal-mcp"],
|
|
47
|
-
"env": {
|
|
48
|
-
"OPENAI_API_KEY": "sk-..."
|
|
49
|
-
}
|
|
50
|
-
}
|
|
51
|
-
}
|
|
52
|
-
}
|
|
25
|
+
# Or using Gemini
|
|
26
|
+
# claude mcp add multimodal-mcp -e GEMINI_API_KEY=AIza... -- npx @r16t/multimodal-mcp
|
|
53
27
|
```
|
|
54
28
|
|
|
29
|
+
Using a different editor? See [setup instructions](#editor-setup) for Claude Desktop, Cursor, VS Code, Windsurf, and Cline.
|
|
30
|
+
|
|
55
31
|
## Environment Variables
|
|
56
32
|
|
|
57
33
|
| Variable | Required | Description |
|
|
58
34
|
|----------|----------|-------------|
|
|
59
35
|
| `OPENAI_API_KEY` | At least one provider key | OpenAI API key — enables image, video, and audio generation via gpt-image-1, sora-2, and tts-1 |
|
|
60
36
|
| `XAI_API_KEY` | At least one provider key | xAI API key — enables image and video generation via grok-imagine-image and grok-imagine-video |
|
|
61
|
-
| `
|
|
62
|
-
| `
|
|
37
|
+
| `GEMINI_API_KEY` | At least one provider key | Gemini API key — enables image, video, and audio generation via imagen-4, veo-3.1, and gemini-2.5-flash-preview-tts |
|
|
38
|
+
| `GOOGLE_API_KEY` | — | Alias for `GEMINI_API_KEY`; either name is accepted |
|
|
63
39
|
| `MEDIA_OUTPUT_DIR` | No | Directory for saved media files. Defaults to the system temp directory |
|
|
64
40
|
|
|
65
41
|
## Available Tools
|
|
@@ -112,7 +88,7 @@ List all configured media generation providers and their capabilities. Takes no
|
|
|
112
88
|
|----------|:-----:|:-----:|:-----:|-------------|-------------|-------------|
|
|
113
89
|
| OpenAI | ✅ | ✅ | ✅ | gpt-image-1 | sora-2 | tts-1 |
|
|
114
90
|
| xAI | ✅ | ✅ | — | grok-imagine-image | grok-imagine-video | — |
|
|
115
|
-
|
|
|
91
|
+
| Gemini | ✅ | ✅ | ✅ | imagen-4 | veo-3.1 | gemini-2.5-flash-preview-tts |
|
|
116
92
|
|
|
117
93
|
### Image Aspect Ratios
|
|
118
94
|
|
|
@@ -120,7 +96,7 @@ List all configured media generation providers and their capabilities. Takes no
|
|
|
120
96
|
|----------|:---:|:----:|:----:|:---:|:---:|
|
|
121
97
|
| OpenAI | ✅ | ✅ | ✅ | ✅ | ✅ |
|
|
122
98
|
| xAI | ✅ | ✅ | ✅ | ✅ | ✅ |
|
|
123
|
-
|
|
|
99
|
+
| Gemini | ✅ | ✅ | ✅ | ✅ | ✅ |
|
|
124
100
|
|
|
125
101
|
### Video Aspect Ratios & Resolutions
|
|
126
102
|
|
|
@@ -128,14 +104,14 @@ List all configured media generation providers and their capabilities. Takes no
|
|
|
128
104
|
|----------|:----:|:----:|:---:|:----:|:----:|:-----:|
|
|
129
105
|
| OpenAI | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
|
130
106
|
| xAI | ✅ | ✅ | ✅ | — | ✅ | ✅ |
|
|
131
|
-
|
|
|
107
|
+
| Gemini | ✅ | ✅ | — | — | ✅ | ✅ |
|
|
132
108
|
|
|
133
109
|
### Audio Formats
|
|
134
110
|
|
|
135
111
|
| Provider | mp3 | opus | aac | flac | wav | pcm |
|
|
136
112
|
|----------|:---:|:----:|:---:|:----:|:---:|:---:|
|
|
137
113
|
| OpenAI | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
|
138
|
-
|
|
|
114
|
+
| Gemini | — | — | — | — | ✅ | — |
|
|
139
115
|
|
|
140
116
|
## Troubleshooting
|
|
141
117
|
|
|
@@ -145,11 +121,11 @@ List all configured media generation providers and their capabilities. Takes no
|
|
|
145
121
|
[config] No provider API keys detected
|
|
146
122
|
```
|
|
147
123
|
|
|
148
|
-
Set at least one of `OPENAI_API_KEY`, `XAI_API_KEY`, or `
|
|
124
|
+
Set at least one of `OPENAI_API_KEY`, `XAI_API_KEY`, or `GEMINI_API_KEY` in the MCP server's `env` block.
|
|
149
125
|
|
|
150
126
|
### Provider not available for requested media type
|
|
151
127
|
|
|
152
|
-
All three providers support image and video generation. Audio generation (text-to-speech) is supported by OpenAI and
|
|
128
|
+
All three providers support image and video generation. Audio generation (text-to-speech) is supported by OpenAI and Gemini. xAI does not currently offer a standalone TTS API. If you specify a `provider` that isn't configured (no API key) or doesn't support the requested media type, you'll receive an error. Omit the `provider` parameter to auto-select from configured providers.
|
|
153
129
|
|
|
154
130
|
### Video generation timeout
|
|
155
131
|
|
|
@@ -159,9 +135,9 @@ Video generation polls for up to 10 minutes. If your video hasn't completed in t
|
|
|
159
135
|
|
|
160
136
|
This indicates the xAI API returned an empty response. Check that your `XAI_API_KEY` is valid and that your prompt does not violate xAI content policies.
|
|
161
137
|
|
|
162
|
-
###
|
|
138
|
+
### Gemini image/video generation failed: 403
|
|
163
139
|
|
|
164
|
-
Verify your `
|
|
140
|
+
Verify your `GEMINI_API_KEY` has the Generative Language API enabled in Google Cloud Console.
|
|
165
141
|
|
|
166
142
|
## Development
|
|
167
143
|
|
|
@@ -173,6 +149,100 @@ npm run typecheck # Type-check without emitting
|
|
|
173
149
|
npm run dev # Watch mode for TypeScript compilation
|
|
174
150
|
```
|
|
175
151
|
|
|
152
|
+
## Editor Setup
|
|
153
|
+
|
|
154
|
+
Replace `OPENAI_API_KEY` with your provider of choice (`XAI_API_KEY`, `GEMINI_API_KEY`). You can set multiple keys to enable multiple providers.
|
|
155
|
+
|
|
156
|
+
### Claude Desktop
|
|
157
|
+
|
|
158
|
+
Add to `~/Library/Application Support/Claude/claude_desktop_config.json`:
|
|
159
|
+
|
|
160
|
+
```json
|
|
161
|
+
{
|
|
162
|
+
"mcpServers": {
|
|
163
|
+
"multimodal-mcp": {
|
|
164
|
+
"command": "npx",
|
|
165
|
+
"args": ["@r16t/multimodal-mcp"],
|
|
166
|
+
"env": {
|
|
167
|
+
"OPENAI_API_KEY": "sk-..."
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
### Cursor
|
|
175
|
+
|
|
176
|
+
Add to `.cursor/mcp.json` in your project root (or `~/.cursor/mcp.json` globally):
|
|
177
|
+
|
|
178
|
+
```json
|
|
179
|
+
{
|
|
180
|
+
"mcpServers": {
|
|
181
|
+
"multimodal-mcp": {
|
|
182
|
+
"command": "npx",
|
|
183
|
+
"args": ["@r16t/multimodal-mcp"],
|
|
184
|
+
"env": {
|
|
185
|
+
"OPENAI_API_KEY": "sk-..."
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
```
|
|
191
|
+
|
|
192
|
+
### VS Code (GitHub Copilot)
|
|
193
|
+
|
|
194
|
+
Add to `.vscode/mcp.json` in your project root:
|
|
195
|
+
|
|
196
|
+
```json
|
|
197
|
+
{
|
|
198
|
+
"servers": {
|
|
199
|
+
"multimodal-mcp": {
|
|
200
|
+
"command": "npx",
|
|
201
|
+
"args": ["@r16t/multimodal-mcp"],
|
|
202
|
+
"env": {
|
|
203
|
+
"OPENAI_API_KEY": "sk-..."
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
```
|
|
209
|
+
|
|
210
|
+
### Windsurf
|
|
211
|
+
|
|
212
|
+
Add to `~/.codeium/windsurf/mcp_config.json`:
|
|
213
|
+
|
|
214
|
+
```json
|
|
215
|
+
{
|
|
216
|
+
"mcpServers": {
|
|
217
|
+
"multimodal-mcp": {
|
|
218
|
+
"command": "npx",
|
|
219
|
+
"args": ["@r16t/multimodal-mcp"],
|
|
220
|
+
"env": {
|
|
221
|
+
"OPENAI_API_KEY": "sk-..."
|
|
222
|
+
}
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
```
|
|
227
|
+
|
|
228
|
+
### Cline
|
|
229
|
+
|
|
230
|
+
Add to `~/Library/Application Support/Code/User/globalStorage/saoudrizwan.claude-dev/settings/cline_mcp_settings.json`:
|
|
231
|
+
|
|
232
|
+
```json
|
|
233
|
+
{
|
|
234
|
+
"mcpServers": {
|
|
235
|
+
"multimodal-mcp": {
|
|
236
|
+
"command": "npx",
|
|
237
|
+
"args": ["@r16t/multimodal-mcp"],
|
|
238
|
+
"env": {
|
|
239
|
+
"OPENAI_API_KEY": "sk-..."
|
|
240
|
+
}
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
```
|
|
245
|
+
|
|
176
246
|
## License
|
|
177
247
|
|
|
178
248
|
MIT
|
package/build/config.js
CHANGED
|
@@ -1,20 +1,19 @@
|
|
|
1
1
|
import { z } from "zod";
|
|
2
|
-
import { tmpdir } from "node:os";
|
|
3
2
|
const configSchema = z.object({
|
|
4
3
|
openaiApiKey: z.string().optional(),
|
|
5
4
|
xaiApiKey: z.string().optional(),
|
|
6
5
|
googleApiKey: z.string().optional(),
|
|
7
6
|
outputDirectory: z.string(),
|
|
8
7
|
});
|
|
9
|
-
function
|
|
10
|
-
return process.env.
|
|
8
|
+
function resolveGeminiKey() {
|
|
9
|
+
return process.env.GEMINI_API_KEY || process.env.GOOGLE_API_KEY || undefined;
|
|
11
10
|
}
|
|
12
11
|
export function loadConfig() {
|
|
13
12
|
const config = configSchema.parse({
|
|
14
13
|
openaiApiKey: process.env.OPENAI_API_KEY || undefined,
|
|
15
14
|
xaiApiKey: process.env.XAI_API_KEY || undefined,
|
|
16
|
-
googleApiKey:
|
|
17
|
-
outputDirectory: process.env.MEDIA_OUTPUT_DIR ||
|
|
15
|
+
googleApiKey: resolveGeminiKey(),
|
|
16
|
+
outputDirectory: process.env.MEDIA_OUTPUT_DIR || process.cwd(),
|
|
18
17
|
});
|
|
19
18
|
const detected = [];
|
|
20
19
|
if (config.openaiApiKey)
|
|
@@ -22,7 +21,7 @@ export function loadConfig() {
|
|
|
22
21
|
if (config.xaiApiKey)
|
|
23
22
|
detected.push("xAI");
|
|
24
23
|
if (config.googleApiKey)
|
|
25
|
-
detected.push("
|
|
24
|
+
detected.push("Gemini");
|
|
26
25
|
if (detected.length > 0) {
|
|
27
26
|
console.error(`[config] Detected providers: ${detected.join(", ")}`);
|
|
28
27
|
}
|
package/build/file-manager.d.ts
CHANGED
|
@@ -2,6 +2,6 @@ import type { GeneratedMedia } from "./providers/types.js";
|
|
|
2
2
|
export declare class FileManager {
|
|
3
3
|
private readonly outputDirectory;
|
|
4
4
|
constructor(outputDirectory: string);
|
|
5
|
-
save(media: GeneratedMedia, type: "image" | "video" | "audio"): Promise<string>;
|
|
5
|
+
save(media: GeneratedMedia, type: "image" | "video" | "audio", outputDirectory?: string): Promise<string>;
|
|
6
6
|
private getExtension;
|
|
7
7
|
}
|
package/build/file-manager.js
CHANGED
|
@@ -6,14 +6,17 @@ export class FileManager {
|
|
|
6
6
|
constructor(outputDirectory) {
|
|
7
7
|
this.outputDirectory = resolve(outputDirectory);
|
|
8
8
|
}
|
|
9
|
-
async save(media, type) {
|
|
10
|
-
|
|
9
|
+
async save(media, type, outputDirectory) {
|
|
10
|
+
const targetDirectory = outputDirectory
|
|
11
|
+
? resolve(outputDirectory)
|
|
12
|
+
: this.outputDirectory;
|
|
13
|
+
await mkdir(targetDirectory, { recursive: true });
|
|
11
14
|
const extension = this.getExtension(type, media.mimeType);
|
|
12
15
|
const provider = media.metadata.provider || "unknown";
|
|
13
16
|
const timestamp = Date.now();
|
|
14
17
|
const random = randomBytes(4).toString("hex");
|
|
15
18
|
const filename = `${type}-${timestamp}-${provider}-${random}.${extension}`;
|
|
16
|
-
const filePath = join(
|
|
19
|
+
const filePath = join(targetDirectory, filename);
|
|
17
20
|
await writeFile(filePath, media.data);
|
|
18
21
|
return filePath;
|
|
19
22
|
}
|
|
@@ -28,8 +28,8 @@ export class OpenAIProvider {
|
|
|
28
28
|
model: "gpt-image-1",
|
|
29
29
|
prompt: params.prompt,
|
|
30
30
|
size: this.mapAspectRatioToSize(params.aspectRatio),
|
|
31
|
-
quality: params.quality === "high" ? "
|
|
32
|
-
|
|
31
|
+
quality: params.quality === "high" ? "high" : params.quality === "low" ? "low" : "medium",
|
|
32
|
+
output_format: "png",
|
|
33
33
|
...params.providerOptions,
|
|
34
34
|
});
|
|
35
35
|
const base64Data = response.data[0].b64_json;
|
package/build/server.js
CHANGED
|
@@ -35,6 +35,7 @@ export function createServer(config) {
|
|
|
35
35
|
provider: z.string().optional().describe("Provider to use: openai, xai, google. Auto-selects if omitted."),
|
|
36
36
|
aspectRatio: z.string().optional().describe("Aspect ratio: 1:1, 16:9, 9:16, 4:3, 3:4"),
|
|
37
37
|
quality: z.string().optional().describe("Quality level: low, standard, high"),
|
|
38
|
+
outputDirectory: z.string().optional().describe("Directory to save the generated file. Supports absolute or relative paths (resolved from cwd). Defaults to MEDIA_OUTPUT_DIR env var or cwd."),
|
|
38
39
|
providerOptions: z.record(z.string(), z.unknown()).optional().describe("Provider-specific parameters passed through directly"),
|
|
39
40
|
}, async (params) => generateImageHandler(params));
|
|
40
41
|
server.tool("generate_video", `Generate a video from a text prompt using AI. Available providers: ${providerNames}`, {
|
|
@@ -43,6 +44,7 @@ export function createServer(config) {
|
|
|
43
44
|
duration: z.number().optional().describe("Video duration in seconds (provider limits apply)"),
|
|
44
45
|
aspectRatio: z.string().optional().describe("Aspect ratio: 16:9, 9:16, 1:1"),
|
|
45
46
|
resolution: z.string().optional().describe("Resolution: 480p, 720p, 1080p"),
|
|
47
|
+
outputDirectory: z.string().optional().describe("Directory to save the generated file. Supports absolute or relative paths (resolved from cwd). Defaults to MEDIA_OUTPUT_DIR env var or cwd."),
|
|
46
48
|
providerOptions: z.record(z.string(), z.unknown()).optional().describe("Provider-specific parameters passed through directly"),
|
|
47
49
|
}, async (params) => generateVideoHandler(params));
|
|
48
50
|
server.tool("generate_audio", `Generate audio (text-to-speech) from text using AI. Available providers: ${providerNames}`, {
|
|
@@ -51,6 +53,7 @@ export function createServer(config) {
|
|
|
51
53
|
voice: z.string().optional().describe("Voice name (provider-specific). OpenAI: alloy, ash, coral, echo, fable, nova, onyx, sage, shimmer. Google: Kore, Charon, Fenrir, Aoede, Puck, etc."),
|
|
52
54
|
speed: z.number().optional().describe("Speech speed multiplier (OpenAI only): 0.25 to 4.0"),
|
|
53
55
|
format: z.string().optional().describe("Output format (OpenAI only): mp3, opus, aac, flac, wav, pcm"),
|
|
56
|
+
outputDirectory: z.string().optional().describe("Directory to save the generated file. Supports absolute or relative paths (resolved from cwd). Defaults to MEDIA_OUTPUT_DIR env var or cwd."),
|
|
54
57
|
providerOptions: z.record(z.string(), z.unknown()).optional().describe("Provider-specific parameters passed through directly"),
|
|
55
58
|
}, async (params) => generateAudioHandler(params));
|
|
56
59
|
server.tool("list_providers", "List all configured media generation providers and their capabilities", async () => listProvidersHandler());
|
|
@@ -8,7 +8,7 @@ export function buildGenerateAudioHandler(registry, fileManager) {
|
|
|
8
8
|
const available = registry.getAudioProviders().map((p) => p.name).join(", ") || "none";
|
|
9
9
|
const text = params.provider
|
|
10
10
|
? `Provider "${params.provider}" is not configured or does not support audio. Available audio providers: ${available}`
|
|
11
|
-
: "No audio provider available. Configure one of: OPENAI_API_KEY,
|
|
11
|
+
: "No audio provider available. Configure one of: OPENAI_API_KEY, GEMINI_API_KEY";
|
|
12
12
|
return {
|
|
13
13
|
isError: true,
|
|
14
14
|
content: [{ type: "text", text }],
|
|
@@ -32,7 +32,7 @@ export function buildGenerateAudioHandler(registry, fileManager) {
|
|
|
32
32
|
format: params.format,
|
|
33
33
|
providerOptions: params.providerOptions,
|
|
34
34
|
});
|
|
35
|
-
const filePath = await fileManager.save(media, "audio");
|
|
35
|
+
const filePath = await fileManager.save(media, "audio", params.outputDirectory);
|
|
36
36
|
return {
|
|
37
37
|
content: [{ type: "text", text: `Audio saved to ${filePath}` }],
|
|
38
38
|
};
|
|
@@ -6,7 +6,7 @@ export function buildGenerateImageHandler(registry, fileManager) {
|
|
|
6
6
|
const availableNames = registry.getImageProviders().map((p) => p.name).join(", ");
|
|
7
7
|
const text = params.provider
|
|
8
8
|
? `Provider "${params.provider}" is not configured. Available providers: ${availableNames || "none"}`
|
|
9
|
-
: "No image provider available. Configure one of: OPENAI_API_KEY, XAI_API_KEY,
|
|
9
|
+
: "No image provider available. Configure one of: OPENAI_API_KEY, XAI_API_KEY, GEMINI_API_KEY";
|
|
10
10
|
return {
|
|
11
11
|
isError: true,
|
|
12
12
|
content: [{ type: "text", text }],
|
|
@@ -19,7 +19,7 @@ export function buildGenerateImageHandler(registry, fileManager) {
|
|
|
19
19
|
quality: params.quality ?? "standard",
|
|
20
20
|
providerOptions: params.providerOptions,
|
|
21
21
|
});
|
|
22
|
-
const filePath = await fileManager.save(media, "image");
|
|
22
|
+
const filePath = await fileManager.save(media, "image", params.outputDirectory);
|
|
23
23
|
return {
|
|
24
24
|
content: [{ type: "text", text: `Image saved to ${filePath}` }],
|
|
25
25
|
};
|
|
@@ -6,7 +6,7 @@ export function buildGenerateVideoHandler(registry, fileManager) {
|
|
|
6
6
|
const available = registry.getVideoProviders().map((p) => p.name).join(", ") || "none";
|
|
7
7
|
const text = params.provider
|
|
8
8
|
? `Provider "${params.provider}" is not configured. Available providers: ${available}`
|
|
9
|
-
: "No video provider available. Configure one of: OPENAI_API_KEY, XAI_API_KEY,
|
|
9
|
+
: "No video provider available. Configure one of: OPENAI_API_KEY, XAI_API_KEY, GEMINI_API_KEY";
|
|
10
10
|
return {
|
|
11
11
|
isError: true,
|
|
12
12
|
content: [{ type: "text", text }],
|
|
@@ -20,7 +20,7 @@ export function buildGenerateVideoHandler(registry, fileManager) {
|
|
|
20
20
|
resolution: params.resolution ?? "720p",
|
|
21
21
|
providerOptions: params.providerOptions,
|
|
22
22
|
});
|
|
23
|
-
const filePath = await fileManager.save(media, "video");
|
|
23
|
+
const filePath = await fileManager.save(media, "video", params.outputDirectory);
|
|
24
24
|
return {
|
|
25
25
|
content: [{ type: "text", text: `Video saved to ${filePath}` }],
|
|
26
26
|
};
|
|
@@ -5,7 +5,7 @@ export function buildListProvidersHandler(registry) {
|
|
|
5
5
|
return {
|
|
6
6
|
content: [{
|
|
7
7
|
type: "text",
|
|
8
|
-
text: "No providers configured. Set one or more API keys: OPENAI_API_KEY, XAI_API_KEY,
|
|
8
|
+
text: "No providers configured. Set one or more API keys: OPENAI_API_KEY, XAI_API_KEY, GEMINI_API_KEY",
|
|
9
9
|
}],
|
|
10
10
|
};
|
|
11
11
|
}
|
|
@@ -15,6 +15,8 @@ export function buildListProvidersHandler(registry) {
|
|
|
15
15
|
caps.push("image");
|
|
16
16
|
if (p.capabilities.supportsVideoGeneration)
|
|
17
17
|
caps.push("video");
|
|
18
|
+
if (p.capabilities.supportsAudioGeneration)
|
|
19
|
+
caps.push("audio");
|
|
18
20
|
return `- ${p.name}: ${caps.join(", ")}`;
|
|
19
21
|
});
|
|
20
22
|
return {
|