@r16t/multimodal-mcp 1.3.0 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -19,19 +19,19 @@ Set the API key for at least one provider. Most users only need one — add more
19
19
 
20
20
  ```bash
21
21
  # Using OpenAI
22
- claude mcp add multimodal-mcp -e OPENAI_API_KEY=sk-... -- npx @r16t/multimodal-mcp@latest
22
+ claude mcp add multimodal-mcp -e OPENAI_API_KEY=sk-... -- npx -y @r16t/multimodal-mcp@latest
23
23
 
24
24
  # Or using xAI
25
- # claude mcp add multimodal-mcp -e XAI_API_KEY=xai-... -- npx @r16t/multimodal-mcp@latest
25
+ # claude mcp add multimodal-mcp -e XAI_API_KEY=xai-... -- npx -y @r16t/multimodal-mcp@latest
26
26
 
27
27
  # Or using Gemini
28
- # claude mcp add multimodal-mcp -e GEMINI_API_KEY=AIza... -- npx @r16t/multimodal-mcp@latest
28
+ # claude mcp add multimodal-mcp -e GEMINI_API_KEY=AIza... -- npx -y @r16t/multimodal-mcp@latest
29
29
 
30
30
  # Or using ElevenLabs (audio + transcription)
31
- # claude mcp add multimodal-mcp -e ELEVENLABS_API_KEY=xi-... -- npx @r16t/multimodal-mcp@latest
31
+ # claude mcp add multimodal-mcp -e ELEVENLABS_API_KEY=xi-... -- npx -y @r16t/multimodal-mcp@latest
32
32
 
33
33
  # Or using BFL/FLUX (images)
34
- # claude mcp add multimodal-mcp -e BFL_API_KEY=... -- npx @r16t/multimodal-mcp@latest
34
+ # claude mcp add multimodal-mcp -e BFL_API_KEY=... -- npx -y @r16t/multimodal-mcp@latest
35
35
  ```
36
36
 
37
37
  Using a different editor? See [setup instructions](#editor-setup) for Claude Desktop, Cursor, VS Code, Windsurf, and Cline.
package/build/errors.js CHANGED
@@ -4,6 +4,8 @@ const API_KEY_PATTERNS = [
4
4
  /AIzaSy[a-zA-Z0-9_-]{10,}/g,
5
5
  /key=[a-zA-Z0-9_-]{20,}/g,
6
6
  /xi-[a-zA-Z0-9_-]{10,}/g,
7
+ /\b[a-f0-9]{32}\b/g,
8
+ /[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}/gi,
7
9
  ];
8
10
  export function sanitizeError(error) {
9
11
  let message;
@@ -11,5 +11,8 @@ export declare class BFLProvider implements MediaProvider {
11
11
  private submitTask;
12
12
  private pollTask;
13
13
  private downloadResult;
14
+ private validateModel;
15
+ private validatePollingUrl;
16
+ private validateDownloadUrl;
14
17
  private mapAspectRatio;
15
18
  }
@@ -1,7 +1,14 @@
1
1
  import { pollForCompletion } from "./polling.js";
2
- const BFL_BASE_URL = "https://api.bfl.ml/v1";
2
+ const BFL_BASE_URL = "https://api.bfl.ai/v1";
3
3
  const IMAGE_MODEL = "flux-pro-1.1";
4
4
  const EDIT_MODEL = "flux-kontext-pro";
5
+ const ALLOWED_MODELS = new Set([
6
+ "flux-pro-1.1",
7
+ "flux-pro-1.1-ultra",
8
+ "flux-dev",
9
+ "flux-kontext-pro",
10
+ "flux-kontext-max",
11
+ ]);
5
12
  const ASPECT_RATIO_MAP = {
6
13
  "1:1": { width: 1024, height: 1024 },
7
14
  "16:9": { width: 1344, height: 768 },
@@ -9,6 +16,15 @@ const ASPECT_RATIO_MAP = {
9
16
  "4:3": { width: 1152, height: 896 },
10
17
  "3:4": { width: 896, height: 1152 },
11
18
  };
19
+ const ALLOWED_POLL_HOSTS = new Set([
20
+ "api.bfl.ai",
21
+ "api.bfl.ml",
22
+ ]);
23
+ const ALLOWED_DOWNLOAD_HOSTS = new Set([
24
+ "delivery-bfl.ai",
25
+ "cdn.bfl.ai",
26
+ "cdn.bfl.ml",
27
+ ]);
12
28
  export class BFLProvider {
13
29
  name = "bfl";
14
30
  capabilities = {
@@ -32,16 +48,22 @@ export class BFLProvider {
32
48
  const modelName = model ?? IMAGE_MODEL;
33
49
  const { width, height } = this.mapAspectRatio(params.aspectRatio);
34
50
  const task = await this.submitTask(modelName, { prompt: params.prompt, width, height, ...options });
35
- const result = await this.pollTask(task.id);
36
- return this.downloadResult(result.result.sample, modelName);
51
+ const result = await this.pollTask(task.polling_url);
52
+ const sample = result.result?.sample;
53
+ if (!sample)
54
+ throw new Error("BFL returned Ready status with no result sample");
55
+ return this.downloadResult(sample, modelName);
37
56
  }
38
57
  async editImage(params) {
39
58
  const { model, ...options } = params.providerOptions ?? {};
40
59
  const modelName = model ?? EDIT_MODEL;
41
60
  const input_image = params.imageData.toString("base64");
42
61
  const task = await this.submitTask(modelName, { prompt: params.prompt, input_image, ...options });
43
- const result = await this.pollTask(task.id);
44
- return this.downloadResult(result.result.sample, modelName);
62
+ const result = await this.pollTask(task.polling_url);
63
+ const sample = result.result?.sample;
64
+ if (!sample)
65
+ throw new Error("BFL returned Ready status with no result sample");
66
+ return this.downloadResult(sample, modelName);
45
67
  }
46
68
  async generateVideo(_params) {
47
69
  throw new Error("BFL does not support video generation");
@@ -50,25 +72,37 @@ export class BFLProvider {
50
72
  throw new Error("BFL does not support audio generation");
51
73
  }
52
74
  async submitTask(model, body) {
75
+ this.validateModel(model);
53
76
  const response = await fetch(`${BFL_BASE_URL}/${model}`, {
54
77
  method: "POST",
55
- headers: { "Content-Type": "application/json", "X-Key": this.apiKey },
78
+ headers: { "Content-Type": "application/json", "x-key": this.apiKey },
56
79
  body: JSON.stringify(body),
57
80
  });
58
81
  if (!response.ok) {
59
82
  throw new Error(`BFL task submission failed: ${response.status}`);
60
83
  }
61
- return response.json();
84
+ const task = await response.json();
85
+ this.validatePollingUrl(task.polling_url);
86
+ return task;
62
87
  }
63
- async pollTask(taskId) {
88
+ async pollTask(pollingUrl) {
64
89
  return pollForCompletion(async () => {
65
- const response = await fetch(`${BFL_BASE_URL}/get_result?id=${taskId}`, {
66
- headers: { "X-Key": this.apiKey },
90
+ const response = await fetch(pollingUrl, {
91
+ headers: { "x-key": this.apiKey },
67
92
  });
93
+ if (!response.ok) {
94
+ throw new Error(`BFL poll failed: ${response.status}`);
95
+ }
68
96
  return response.json();
69
- }, (result) => result.status === "Ready", { timeoutMs: 300_000, intervalMs: 3_000 });
97
+ }, (result) => {
98
+ if (result.status === "Error" || result.status === "Failed") {
99
+ throw new Error(`BFL task failed with status: ${result.status}`);
100
+ }
101
+ return result.status === "Ready";
102
+ }, { timeoutMs: 300_000, intervalMs: 3_000 });
70
103
  }
71
104
  async downloadResult(url, model) {
105
+ this.validateDownloadUrl(url);
72
106
  const response = await fetch(url);
73
107
  if (!response.ok) {
74
108
  throw new Error(`BFL image download failed: ${response.status}`);
@@ -77,6 +111,23 @@ export class BFLProvider {
77
111
  const data = Buffer.from(await response.arrayBuffer());
78
112
  return { data, mimeType, metadata: { model, provider: "bfl" } };
79
113
  }
114
+ validateModel(model) {
115
+ if (!ALLOWED_MODELS.has(model)) {
116
+ throw new Error(`Unknown BFL model: ${model}. Allowed: ${[...ALLOWED_MODELS].join(", ")}`);
117
+ }
118
+ }
119
+ validatePollingUrl(url) {
120
+ const parsed = new URL(url);
121
+ if (!ALLOWED_POLL_HOSTS.has(parsed.hostname)) {
122
+ throw new Error(`Unexpected BFL polling host: ${parsed.hostname}`);
123
+ }
124
+ }
125
+ validateDownloadUrl(url) {
126
+ const parsed = new URL(url);
127
+ if (!ALLOWED_DOWNLOAD_HOSTS.has(parsed.hostname)) {
128
+ throw new Error(`Unexpected BFL download host: ${parsed.hostname}`);
129
+ }
130
+ }
80
131
  mapAspectRatio(ratio) {
81
132
  const dimensions = ASPECT_RATIO_MAP[ratio];
82
133
  if (!dimensions) {
@@ -1,7 +1,7 @@
1
1
  const BASE_URL = "https://api.elevenlabs.io/v1";
2
2
  const DEFAULT_VOICE_ID = "JBFqnCBsd6RMkjVDRZzb";
3
3
  const DEFAULT_TTS_MODEL = "eleven_flash_v2_5";
4
- const TRANSCRIPTION_MODEL = "scribe_v1";
4
+ const TRANSCRIPTION_MODEL = "scribe_v2";
5
5
  export class ElevenLabsProvider {
6
6
  name = "elevenlabs";
7
7
  capabilities = {
@@ -79,7 +79,7 @@ export class ElevenLabsProvider {
79
79
  }
80
80
  async generateSoundEffect(params) {
81
81
  const filtered = Object.fromEntries(Object.entries(params.providerOptions ?? {}).filter(([k]) => k !== "mode"));
82
- const response = await fetch(`${BASE_URL}/text-to-sound-effects`, {
82
+ const response = await fetch(`${BASE_URL}/sound-generation`, {
83
83
  method: "POST",
84
84
  headers: { "Content-Type": "application/json", "xi-api-key": this.apiKey },
85
85
  body: JSON.stringify({ text: params.text, ...filtered }),
@@ -23,7 +23,7 @@ export class ProviderRegistry {
23
23
  return [...this.providers.values()].filter((p) => p.capabilities.supportsAudioGeneration);
24
24
  }
25
25
  getTranscriptionProviders() {
26
- return [...this.providers.values()].filter((p) => p.capabilities.supportsTranscription);
26
+ return [...this.providers.values()].filter((p) => typeof p.transcribeAudio === "function");
27
27
  }
28
28
  listCapabilities() {
29
29
  return [...this.providers.values()].map((p) => ({
@@ -12,7 +12,7 @@ export interface ProviderCapabilities {
12
12
  supportsImageEditing: boolean;
13
13
  supportsVideoGeneration: boolean;
14
14
  supportsAudioGeneration: boolean;
15
- supportsTranscription: boolean;
15
+ supportsTranscription?: boolean;
16
16
  supportedImageAspectRatios: string[];
17
17
  supportedVideoAspectRatios: string[];
18
18
  supportedVideoResolutions: string[];
@@ -10,18 +10,18 @@ export function buildListProvidersHandler(registry) {
10
10
  };
11
11
  }
12
12
  const lines = providers.map((p) => {
13
- const caps = [];
13
+ const capabilityLabels = [];
14
14
  if (p.capabilities.supportsImageGeneration)
15
- caps.push("image");
15
+ capabilityLabels.push("image");
16
16
  if (p.capabilities.supportsImageEditing)
17
- caps.push("image editing");
17
+ capabilityLabels.push("image editing");
18
18
  if (p.capabilities.supportsVideoGeneration)
19
- caps.push("video");
19
+ capabilityLabels.push("video");
20
20
  if (p.capabilities.supportsAudioGeneration)
21
- caps.push("audio");
21
+ capabilityLabels.push("audio");
22
22
  if (p.capabilities.supportsTranscription)
23
- caps.push("transcription");
24
- return `- ${p.name}: ${caps.join(", ")}`;
23
+ capabilityLabels.push("transcription");
24
+ return `- ${p.name}: ${capabilityLabels.join(", ")}`;
25
25
  });
26
26
  return {
27
27
  content: [{
@@ -15,7 +15,7 @@ export function buildTranscribeAudioHandler(registry) {
15
15
  content: [{ type: "text", text }],
16
16
  };
17
17
  }
18
- if (!provider.capabilities.supportsTranscription || !provider.transcribeAudio) {
18
+ if (!provider.transcribeAudio) {
19
19
  const available = registry.getTranscriptionProviders().map((p) => p.name).join(", ") || "none";
20
20
  return {
21
21
  isError: true,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@r16t/multimodal-mcp",
3
- "version": "1.3.0",
3
+ "version": "1.4.0",
4
4
  "mcpName": "io.github.rsmdt/multimodal",
5
5
  "description": "Multi-provider media generation MCP server",
6
6
  "type": "module",