@r16t/multimodal-mcp 1.3.1 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -5
- package/build/errors.js +2 -0
- package/build/providers/bfl.d.ts +3 -0
- package/build/providers/bfl.js +62 -11
- package/build/providers/elevenlabs.js +2 -2
- package/build/providers/registry.js +1 -1
- package/build/providers/types.d.ts +1 -1
- package/build/tools/list-providers.js +7 -7
- package/build/tools/transcribe-audio.js +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -19,19 +19,19 @@ Set the API key for at least one provider. Most users only need one — add more
|
|
|
19
19
|
|
|
20
20
|
```bash
|
|
21
21
|
# Using OpenAI
|
|
22
|
-
claude mcp add multimodal-mcp -e OPENAI_API_KEY=sk-... -- npx @r16t/multimodal-mcp@latest
|
|
22
|
+
claude mcp add multimodal-mcp -e OPENAI_API_KEY=sk-... -- npx -y @r16t/multimodal-mcp@latest
|
|
23
23
|
|
|
24
24
|
# Or using xAI
|
|
25
|
-
# claude mcp add multimodal-mcp -e XAI_API_KEY=xai-... -- npx @r16t/multimodal-mcp@latest
|
|
25
|
+
# claude mcp add multimodal-mcp -e XAI_API_KEY=xai-... -- npx -y @r16t/multimodal-mcp@latest
|
|
26
26
|
|
|
27
27
|
# Or using Gemini
|
|
28
|
-
# claude mcp add multimodal-mcp -e GEMINI_API_KEY=AIza... -- npx @r16t/multimodal-mcp@latest
|
|
28
|
+
# claude mcp add multimodal-mcp -e GEMINI_API_KEY=AIza... -- npx -y @r16t/multimodal-mcp@latest
|
|
29
29
|
|
|
30
30
|
# Or using ElevenLabs (audio + transcription)
|
|
31
|
-
# claude mcp add multimodal-mcp -e ELEVENLABS_API_KEY=xi-... -- npx @r16t/multimodal-mcp@latest
|
|
31
|
+
# claude mcp add multimodal-mcp -e ELEVENLABS_API_KEY=xi-... -- npx -y @r16t/multimodal-mcp@latest
|
|
32
32
|
|
|
33
33
|
# Or using BFL/FLUX (images)
|
|
34
|
-
# claude mcp add multimodal-mcp -e BFL_API_KEY=... -- npx @r16t/multimodal-mcp@latest
|
|
34
|
+
# claude mcp add multimodal-mcp -e BFL_API_KEY=... -- npx -y @r16t/multimodal-mcp@latest
|
|
35
35
|
```
|
|
36
36
|
|
|
37
37
|
Using a different editor? See [setup instructions](#editor-setup) for Claude Desktop, Cursor, VS Code, Windsurf, and Cline.
|
package/build/errors.js
CHANGED
|
@@ -4,6 +4,8 @@ const API_KEY_PATTERNS = [
|
|
|
4
4
|
/AIzaSy[a-zA-Z0-9_-]{10,}/g,
|
|
5
5
|
/key=[a-zA-Z0-9_-]{20,}/g,
|
|
6
6
|
/xi-[a-zA-Z0-9_-]{10,}/g,
|
|
7
|
+
/\b[a-f0-9]{32}\b/g,
|
|
8
|
+
/[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}/gi,
|
|
7
9
|
];
|
|
8
10
|
export function sanitizeError(error) {
|
|
9
11
|
let message;
|
package/build/providers/bfl.d.ts
CHANGED
package/build/providers/bfl.js
CHANGED
|
@@ -1,7 +1,14 @@
|
|
|
1
1
|
import { pollForCompletion } from "./polling.js";
|
|
2
|
-
const BFL_BASE_URL = "https://api.bfl.
|
|
2
|
+
const BFL_BASE_URL = "https://api.bfl.ai/v1";
|
|
3
3
|
const IMAGE_MODEL = "flux-pro-1.1";
|
|
4
4
|
const EDIT_MODEL = "flux-kontext-pro";
|
|
5
|
+
const ALLOWED_MODELS = new Set([
|
|
6
|
+
"flux-pro-1.1",
|
|
7
|
+
"flux-pro-1.1-ultra",
|
|
8
|
+
"flux-dev",
|
|
9
|
+
"flux-kontext-pro",
|
|
10
|
+
"flux-kontext-max",
|
|
11
|
+
]);
|
|
5
12
|
const ASPECT_RATIO_MAP = {
|
|
6
13
|
"1:1": { width: 1024, height: 1024 },
|
|
7
14
|
"16:9": { width: 1344, height: 768 },
|
|
@@ -9,6 +16,15 @@ const ASPECT_RATIO_MAP = {
|
|
|
9
16
|
"4:3": { width: 1152, height: 896 },
|
|
10
17
|
"3:4": { width: 896, height: 1152 },
|
|
11
18
|
};
|
|
19
|
+
const ALLOWED_POLL_HOSTS = new Set([
|
|
20
|
+
"api.bfl.ai",
|
|
21
|
+
"api.bfl.ml",
|
|
22
|
+
]);
|
|
23
|
+
const ALLOWED_DOWNLOAD_HOSTS = new Set([
|
|
24
|
+
"delivery-bfl.ai",
|
|
25
|
+
"cdn.bfl.ai",
|
|
26
|
+
"cdn.bfl.ml",
|
|
27
|
+
]);
|
|
12
28
|
export class BFLProvider {
|
|
13
29
|
name = "bfl";
|
|
14
30
|
capabilities = {
|
|
@@ -32,16 +48,22 @@ export class BFLProvider {
|
|
|
32
48
|
const modelName = model ?? IMAGE_MODEL;
|
|
33
49
|
const { width, height } = this.mapAspectRatio(params.aspectRatio);
|
|
34
50
|
const task = await this.submitTask(modelName, { prompt: params.prompt, width, height, ...options });
|
|
35
|
-
const result = await this.pollTask(task.
|
|
36
|
-
|
|
51
|
+
const result = await this.pollTask(task.polling_url);
|
|
52
|
+
const sample = result.result?.sample;
|
|
53
|
+
if (!sample)
|
|
54
|
+
throw new Error("BFL returned Ready status with no result sample");
|
|
55
|
+
return this.downloadResult(sample, modelName);
|
|
37
56
|
}
|
|
38
57
|
async editImage(params) {
|
|
39
58
|
const { model, ...options } = params.providerOptions ?? {};
|
|
40
59
|
const modelName = model ?? EDIT_MODEL;
|
|
41
60
|
const input_image = params.imageData.toString("base64");
|
|
42
61
|
const task = await this.submitTask(modelName, { prompt: params.prompt, input_image, ...options });
|
|
43
|
-
const result = await this.pollTask(task.
|
|
44
|
-
|
|
62
|
+
const result = await this.pollTask(task.polling_url);
|
|
63
|
+
const sample = result.result?.sample;
|
|
64
|
+
if (!sample)
|
|
65
|
+
throw new Error("BFL returned Ready status with no result sample");
|
|
66
|
+
return this.downloadResult(sample, modelName);
|
|
45
67
|
}
|
|
46
68
|
async generateVideo(_params) {
|
|
47
69
|
throw new Error("BFL does not support video generation");
|
|
@@ -50,25 +72,37 @@ export class BFLProvider {
|
|
|
50
72
|
throw new Error("BFL does not support audio generation");
|
|
51
73
|
}
|
|
52
74
|
async submitTask(model, body) {
|
|
75
|
+
this.validateModel(model);
|
|
53
76
|
const response = await fetch(`${BFL_BASE_URL}/${model}`, {
|
|
54
77
|
method: "POST",
|
|
55
|
-
headers: { "Content-Type": "application/json", "
|
|
78
|
+
headers: { "Content-Type": "application/json", "x-key": this.apiKey },
|
|
56
79
|
body: JSON.stringify(body),
|
|
57
80
|
});
|
|
58
81
|
if (!response.ok) {
|
|
59
82
|
throw new Error(`BFL task submission failed: ${response.status}`);
|
|
60
83
|
}
|
|
61
|
-
|
|
84
|
+
const task = await response.json();
|
|
85
|
+
this.validatePollingUrl(task.polling_url);
|
|
86
|
+
return task;
|
|
62
87
|
}
|
|
63
|
-
async pollTask(
|
|
88
|
+
async pollTask(pollingUrl) {
|
|
64
89
|
return pollForCompletion(async () => {
|
|
65
|
-
const response = await fetch(
|
|
66
|
-
headers: { "
|
|
90
|
+
const response = await fetch(pollingUrl, {
|
|
91
|
+
headers: { "x-key": this.apiKey },
|
|
67
92
|
});
|
|
93
|
+
if (!response.ok) {
|
|
94
|
+
throw new Error(`BFL poll failed: ${response.status}`);
|
|
95
|
+
}
|
|
68
96
|
return response.json();
|
|
69
|
-
}, (result) =>
|
|
97
|
+
}, (result) => {
|
|
98
|
+
if (result.status === "Error" || result.status === "Failed") {
|
|
99
|
+
throw new Error(`BFL task failed with status: ${result.status}`);
|
|
100
|
+
}
|
|
101
|
+
return result.status === "Ready";
|
|
102
|
+
}, { timeoutMs: 300_000, intervalMs: 3_000 });
|
|
70
103
|
}
|
|
71
104
|
async downloadResult(url, model) {
|
|
105
|
+
this.validateDownloadUrl(url);
|
|
72
106
|
const response = await fetch(url);
|
|
73
107
|
if (!response.ok) {
|
|
74
108
|
throw new Error(`BFL image download failed: ${response.status}`);
|
|
@@ -77,6 +111,23 @@ export class BFLProvider {
|
|
|
77
111
|
const data = Buffer.from(await response.arrayBuffer());
|
|
78
112
|
return { data, mimeType, metadata: { model, provider: "bfl" } };
|
|
79
113
|
}
|
|
114
|
+
validateModel(model) {
|
|
115
|
+
if (!ALLOWED_MODELS.has(model)) {
|
|
116
|
+
throw new Error(`Unknown BFL model: ${model}. Allowed: ${[...ALLOWED_MODELS].join(", ")}`);
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
validatePollingUrl(url) {
|
|
120
|
+
const parsed = new URL(url);
|
|
121
|
+
if (!ALLOWED_POLL_HOSTS.has(parsed.hostname)) {
|
|
122
|
+
throw new Error(`Unexpected BFL polling host: ${parsed.hostname}`);
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
validateDownloadUrl(url) {
|
|
126
|
+
const parsed = new URL(url);
|
|
127
|
+
if (!ALLOWED_DOWNLOAD_HOSTS.has(parsed.hostname)) {
|
|
128
|
+
throw new Error(`Unexpected BFL download host: ${parsed.hostname}`);
|
|
129
|
+
}
|
|
130
|
+
}
|
|
80
131
|
mapAspectRatio(ratio) {
|
|
81
132
|
const dimensions = ASPECT_RATIO_MAP[ratio];
|
|
82
133
|
if (!dimensions) {
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
const BASE_URL = "https://api.elevenlabs.io/v1";
|
|
2
2
|
const DEFAULT_VOICE_ID = "JBFqnCBsd6RMkjVDRZzb";
|
|
3
3
|
const DEFAULT_TTS_MODEL = "eleven_flash_v2_5";
|
|
4
|
-
const TRANSCRIPTION_MODEL = "
|
|
4
|
+
const TRANSCRIPTION_MODEL = "scribe_v2";
|
|
5
5
|
export class ElevenLabsProvider {
|
|
6
6
|
name = "elevenlabs";
|
|
7
7
|
capabilities = {
|
|
@@ -79,7 +79,7 @@ export class ElevenLabsProvider {
|
|
|
79
79
|
}
|
|
80
80
|
async generateSoundEffect(params) {
|
|
81
81
|
const filtered = Object.fromEntries(Object.entries(params.providerOptions ?? {}).filter(([k]) => k !== "mode"));
|
|
82
|
-
const response = await fetch(`${BASE_URL}/
|
|
82
|
+
const response = await fetch(`${BASE_URL}/sound-generation`, {
|
|
83
83
|
method: "POST",
|
|
84
84
|
headers: { "Content-Type": "application/json", "xi-api-key": this.apiKey },
|
|
85
85
|
body: JSON.stringify({ text: params.text, ...filtered }),
|
|
@@ -23,7 +23,7 @@ export class ProviderRegistry {
|
|
|
23
23
|
return [...this.providers.values()].filter((p) => p.capabilities.supportsAudioGeneration);
|
|
24
24
|
}
|
|
25
25
|
getTranscriptionProviders() {
|
|
26
|
-
return [...this.providers.values()].filter((p) => p.
|
|
26
|
+
return [...this.providers.values()].filter((p) => typeof p.transcribeAudio === "function");
|
|
27
27
|
}
|
|
28
28
|
listCapabilities() {
|
|
29
29
|
return [...this.providers.values()].map((p) => ({
|
|
@@ -12,7 +12,7 @@ export interface ProviderCapabilities {
|
|
|
12
12
|
supportsImageEditing: boolean;
|
|
13
13
|
supportsVideoGeneration: boolean;
|
|
14
14
|
supportsAudioGeneration: boolean;
|
|
15
|
-
supportsTranscription
|
|
15
|
+
supportsTranscription?: boolean;
|
|
16
16
|
supportedImageAspectRatios: string[];
|
|
17
17
|
supportedVideoAspectRatios: string[];
|
|
18
18
|
supportedVideoResolutions: string[];
|
|
@@ -10,18 +10,18 @@ export function buildListProvidersHandler(registry) {
|
|
|
10
10
|
};
|
|
11
11
|
}
|
|
12
12
|
const lines = providers.map((p) => {
|
|
13
|
-
const
|
|
13
|
+
const capabilityLabels = [];
|
|
14
14
|
if (p.capabilities.supportsImageGeneration)
|
|
15
|
-
|
|
15
|
+
capabilityLabels.push("image");
|
|
16
16
|
if (p.capabilities.supportsImageEditing)
|
|
17
|
-
|
|
17
|
+
capabilityLabels.push("image editing");
|
|
18
18
|
if (p.capabilities.supportsVideoGeneration)
|
|
19
|
-
|
|
19
|
+
capabilityLabels.push("video");
|
|
20
20
|
if (p.capabilities.supportsAudioGeneration)
|
|
21
|
-
|
|
21
|
+
capabilityLabels.push("audio");
|
|
22
22
|
if (p.capabilities.supportsTranscription)
|
|
23
|
-
|
|
24
|
-
return `- ${p.name}: ${
|
|
23
|
+
capabilityLabels.push("transcription");
|
|
24
|
+
return `- ${p.name}: ${capabilityLabels.join(", ")}`;
|
|
25
25
|
});
|
|
26
26
|
return {
|
|
27
27
|
content: [{
|
|
@@ -15,7 +15,7 @@ export function buildTranscribeAudioHandler(registry) {
|
|
|
15
15
|
content: [{ type: "text", text }],
|
|
16
16
|
};
|
|
17
17
|
}
|
|
18
|
-
if (!provider.
|
|
18
|
+
if (!provider.transcribeAudio) {
|
|
19
19
|
const available = registry.getTranscriptionProviders().map((p) => p.name).join(", ") || "none";
|
|
20
20
|
return {
|
|
21
21
|
isError: true,
|