@reaatech/media-pipeline-mcp-elevenlabs 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +178 -0
- package/dist/index.cjs +233 -0
- package/dist/index.d.cts +32 -0
- package/dist/index.d.ts +32 -0
- package/dist/index.js +205 -0
- package/package.json +49 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Media Pipeline MCP Contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
# @reaatech/media-pipeline-mcp-elevenlabs
|
|
2
|
+
|
|
3
|
+
[](https://www.npmjs.com/package/@reaatech/media-pipeline-mcp-elevenlabs)
|
|
4
|
+
[](https://github.com/reaatech/media-pipeline-mcp/blob/main/LICENSE)
|
|
5
|
+
[](https://github.com/reaatech/media-pipeline-mcp/actions/workflows/ci.yml)
|
|
6
|
+
|
|
7
|
+
> **Status:** Pre-1.0 — APIs may change in minor versions. Pin to a specific version in production.
|
|
8
|
+
|
|
9
|
+
ElevenLabs provider for the media pipeline framework. Delivers high-quality text-to-speech synthesis with configurable voice selection, speaking speed, voice stability tuning, similarity boost, and style exaggeration. Supports multiple output formats and native audio-byte streaming.
|
|
10
|
+
|
|
11
|
+
## Installation
|
|
12
|
+
|
|
13
|
+
```bash
|
|
14
|
+
npm install @reaatech/media-pipeline-mcp-elevenlabs
|
|
15
|
+
# or
|
|
16
|
+
pnpm add @reaatech/media-pipeline-mcp-elevenlabs
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
## Feature Overview
|
|
20
|
+
|
|
21
|
+
- High-quality TTS with `eleven_monolingual_v1`, `eleven_multilingual_v2`, and `eleven_turbo_v2` models
|
|
22
|
+
- Named voice selection (Rachel, Josh, Daniel, Charlotte) plus custom voice IDs
|
|
23
|
+
- Fine-grained voice tuning: stability (0-1), similarity boost (0-1), style exaggeration (0-1)
|
|
24
|
+
- Speaking speed control via SSML prosody tags
|
|
25
|
+
- Multiple output formats: MP3, WAV, OGG, FLAC, AAC
|
|
26
|
+
- Streaming support for TTS audio bytes (`supportsStreaming`)
|
|
27
|
+
- Character-count-based cost estimation
|
|
28
|
+
|
|
29
|
+
## Quick Start
|
|
30
|
+
|
|
31
|
+
```typescript
|
|
32
|
+
import { ElevenLabsProvider } from "@reaatech/media-pipeline-mcp-elevenlabs";
|
|
33
|
+
|
|
34
|
+
const provider = new ElevenLabsProvider({ apiKey: process.env.ELEVENLABS_API_KEY! });
|
|
35
|
+
|
|
36
|
+
const audio = await provider.execute({
|
|
37
|
+
operation: "audio.tts",
|
|
38
|
+
params: {
|
|
39
|
+
text: "Welcome to our media pipeline. This audio was generated with ElevenLabs.",
|
|
40
|
+
voice: "Rachel",
|
|
41
|
+
speed: 1.0,
|
|
42
|
+
model: "eleven_turbo_v2",
|
|
43
|
+
},
|
|
44
|
+
config: {},
|
|
45
|
+
});
|
|
46
|
+
|
|
47
|
+
// Save or pipe the audio
|
|
48
|
+
import { writeFileSync } from "node:fs";
|
|
49
|
+
writeFileSync("output.mp3", audio.data);
|
|
50
|
+
console.log(`Generated ${audio.metadata.characterCount} chars in ${audio.metadata.duration}s`);
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
## Supported Operations
|
|
54
|
+
|
|
55
|
+
| Operation | Default Model | Description | Output Format |
|
|
56
|
+
|-----------|---------------|-------------|---------------|
|
|
57
|
+
| `audio.tts` | `eleven_monolingual_v1` | Text-to-speech with voice and parameter control | Audio bytes in `mp3`, `wav`, `ogg`, `flac`, or `aac` |
|
|
58
|
+
|
|
59
|
+
## Configuration Parameters
|
|
60
|
+
|
|
61
|
+
### `audio.tts`
|
|
62
|
+
|
|
63
|
+
| Parameter | Type | Default | Description |
|
|
64
|
+
|-----------|------|---------|-------------|
|
|
65
|
+
| `text` | `string` | *required* | Text to convert to speech |
|
|
66
|
+
| `voice` | `string` | `"Rachel"` | Voice name (`Rachel`, `Josh`, `Daniel`, `Charlotte`) or custom voice ID |
|
|
67
|
+
| `speed` | `number` | `1.0` | Speaking rate multiplier (uses SSML prosody) |
|
|
68
|
+
| `model` | `string` | `"eleven_monolingual_v1"` | TTS model ID |
|
|
69
|
+
| `response_format` | `string` | `"mp3"` | Output audio format: `mp3`, `wav`, `ogg`, `flac`, `aac` |
|
|
70
|
+
|
|
71
|
+
### Voice Tuning (internal defaults)
|
|
72
|
+
|
|
73
|
+
The provider applies these voice settings automatically on every request:
|
|
74
|
+
|
|
75
|
+
| Parameter | Default | Description |
|
|
76
|
+
|-----------|---------|-------------|
|
|
77
|
+
| `stability` | `0.5` | Voice stability (0 = more variable, 1 = more consistent) |
|
|
78
|
+
| `similarity_boost` | `0.75` | Speaker similarity to target voice (0-1) |
|
|
79
|
+
| `style` | `0.0` | Style exaggeration (0-1) |
|
|
80
|
+
| `use_speaker_boost` | `true` | Enhance speaker clarity |
|
|
81
|
+
|
|
82
|
+
## API Reference
|
|
83
|
+
|
|
84
|
+
### `ElevenLabsProvider`
|
|
85
|
+
|
|
86
|
+
```typescript
|
|
87
|
+
class ElevenLabsProvider extends MediaProvider {
|
|
88
|
+
constructor(config: ElevenLabsProviderConfig)
|
|
89
|
+
|
|
90
|
+
healthCheck(): Promise<ProviderHealth>
|
|
91
|
+
estimateCost(input: ProviderInput): Promise<CostEstimate>
|
|
92
|
+
execute(input: ProviderInput): Promise<ProviderOutput>
|
|
93
|
+
}
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
### `ElevenLabsProviderConfig`
|
|
97
|
+
|
|
98
|
+
```typescript
|
|
99
|
+
interface ElevenLabsProviderConfig {
|
|
100
|
+
apiKey: string;
|
|
101
|
+
voices?: {
|
|
102
|
+
default?: string;
|
|
103
|
+
[voiceName: string]: string | undefined;
|
|
104
|
+
};
|
|
105
|
+
model?: string; // Default model ID
|
|
106
|
+
timeout?: number; // Request timeout in ms
|
|
107
|
+
}
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
### Factory Function
|
|
111
|
+
|
|
112
|
+
```typescript
|
|
113
|
+
import { defineElevenLabsProvider } from "@reaatech/media-pipeline-mcp-elevenlabs";
|
|
114
|
+
|
|
115
|
+
const provider = defineElevenLabsProvider({ apiKey: process.env.ELEVENLABS_API_KEY! });
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
### Voice Resolution Logic
|
|
119
|
+
|
|
120
|
+
Voice parameters are resolved in this order:
|
|
121
|
+
1. If a custom `voices` map is configured, the name is looked up there first
|
|
122
|
+
2. If the value starts with `voice_` or is exactly 20 characters, it's treated as a raw voice ID
|
|
123
|
+
3. If the name matches a built-in preset, that voice ID is used
|
|
124
|
+
4. Falls back to `"Rachel"`
|
|
125
|
+
|
|
126
|
+
### Key Methods
|
|
127
|
+
|
|
128
|
+
| Method | Returns | Description |
|
|
129
|
+
|--------|---------|-------------|
|
|
130
|
+
| `healthCheck()` | `ProviderHealth` | Validates API key by fetching `/v1/voices` from the ElevenLabs API |
|
|
131
|
+
| `estimateCost(input)` | `CostEstimate` | Estimates cost based on text character count × per-character rate |
|
|
132
|
+
| `execute(input)` | `ProviderOutput` | Synthesizes audio and returns raw audio bytes with metadata |
|
|
133
|
+
|
|
134
|
+
### Non-Retryable Errors
|
|
135
|
+
|
|
136
|
+
The provider classifies these errors as non-retryable: authentication failed, invalid API key, permission denied, insufficient credits, voice not found, invalid voice ID.
|
|
137
|
+
|
|
138
|
+
## Cost Estimation
|
|
139
|
+
|
|
140
|
+
### Per-Character Pricing
|
|
141
|
+
|
|
142
|
+
| Model | Cost / Character |
|
|
143
|
+
|-------|-----------------|
|
|
144
|
+
| `eleven_turbo_v2` | $0.0002 |
|
|
145
|
+
| `eleven_monolingual_v1` | $0.0003 |
|
|
146
|
+
| `eleven_multilingual_v2` | $0.0005 |
|
|
147
|
+
|
|
148
|
+
### Example Estimates
|
|
149
|
+
|
|
150
|
+
| Text Length | Model | Est. Cost |
|
|
151
|
+
|------------|-------|-----------|
|
|
152
|
+
| 100 chars | `eleven_turbo_v2` | $0.02 |
|
|
153
|
+
| 100 chars | `eleven_monolingual_v1` | $0.03 |
|
|
154
|
+
| 500 chars | `eleven_multilingual_v2` | $0.25 |
|
|
155
|
+
|
|
156
|
+
## Cache Configuration
|
|
157
|
+
|
|
158
|
+
The provider exposes `static cacheConfig` with deterministic and non-deterministic parameters.
|
|
159
|
+
|
|
160
|
+
**Deterministic parameters:** `text`, `voice_id`, `voice`, `model`, `voice_settings`
|
|
161
|
+
|
|
162
|
+
**Non-deterministic parameters:** (none)
|
|
163
|
+
|
|
164
|
+
The `normalize()` function trims and collapses whitespace in `text`, and preserves voice settings as-is. All parameters are deterministic, so identical text + voice + model combinations will produce matching cache keys.
|
|
165
|
+
|
|
166
|
+
## Health Check
|
|
167
|
+
|
|
168
|
+
The health check sends a GET request to `https://api.elevenlabs.io/v1/voices` using the `xi-api-key` header. Returns `{ healthy: true, latency: <ms> }` on 2xx response, or `{ healthy: false, error: "<message>" }` on failure.
|
|
169
|
+
|
|
170
|
+
## Related Packages
|
|
171
|
+
|
|
172
|
+
- [`@reaatech/media-pipeline-mcp-provider-core`](https://www.npmjs.com/package/@reaatech/media-pipeline-mcp-provider-core) — Base provider class
|
|
173
|
+
- [`@reaatech/media-pipeline-mcp-server`](https://www.npmjs.com/package/@reaatech/media-pipeline-mcp-server) — MCP server
|
|
174
|
+
- [`@reaatech/media-pipeline-mcp-openai`](https://www.npmjs.com/package/@reaatech/media-pipeline-mcp-openai) — Alternative TTS provider (TTS-1)
|
|
175
|
+
|
|
176
|
+
## License
|
|
177
|
+
|
|
178
|
+
[MIT](https://github.com/reaatech/media-pipeline-mcp/blob/main/LICENSE)
|
package/dist/index.cjs
ADDED
|
@@ -0,0 +1,233 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __defProp = Object.defineProperty;
|
|
3
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
4
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
5
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
6
|
+
var __export = (target, all) => {
|
|
7
|
+
for (var name in all)
|
|
8
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
9
|
+
};
|
|
10
|
+
var __copyProps = (to, from, except, desc) => {
|
|
11
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
12
|
+
for (let key of __getOwnPropNames(from))
|
|
13
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
14
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
15
|
+
}
|
|
16
|
+
return to;
|
|
17
|
+
};
|
|
18
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
19
|
+
|
|
20
|
+
// src/index.ts
|
|
21
|
+
var index_exports = {};
|
|
22
|
+
__export(index_exports, {
|
|
23
|
+
ElevenLabsProvider: () => ElevenLabsProvider,
|
|
24
|
+
defineElevenLabsProvider: () => defineElevenLabsProvider
|
|
25
|
+
});
|
|
26
|
+
module.exports = __toCommonJS(index_exports);
|
|
27
|
+
|
|
28
|
+
// src/elevenlabs-provider.ts
|
|
29
|
+
var import_media_pipeline_mcp_provider_core = require("@reaatech/media-pipeline-mcp-provider-core");
|
|
30
|
+
|
|
31
|
+
// src/pricing.json
|
|
32
|
+
var pricing_default = {
|
|
33
|
+
"audio.tts": {
|
|
34
|
+
eleven_monolingual_v1: {
|
|
35
|
+
input: { perUnit: 3e-4, unit: "per character" },
|
|
36
|
+
expectedDurationMs: 2e3
|
|
37
|
+
},
|
|
38
|
+
eleven_multilingual_v2: {
|
|
39
|
+
input: { perUnit: 5e-4, unit: "per character" },
|
|
40
|
+
expectedDurationMs: 3e3
|
|
41
|
+
},
|
|
42
|
+
eleven_turbo_v2: {
|
|
43
|
+
input: { perUnit: 2e-4, unit: "per character" },
|
|
44
|
+
expectedDurationMs: 1e3
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
};
|
|
48
|
+
|
|
49
|
+
// src/elevenlabs-provider.ts
|
|
50
|
+
var ElevenLabsProvider = class extends import_media_pipeline_mcp_provider_core.MediaProvider {
|
|
51
|
+
static cacheConfig = {
|
|
52
|
+
deterministicParams: ["text", "voice_id", "voice", "model", "voice_settings"],
|
|
53
|
+
nonDeterministicParams: [],
|
|
54
|
+
normalize: (inputs) => {
|
|
55
|
+
const normalized = {};
|
|
56
|
+
if (inputs.text !== void 0)
|
|
57
|
+
normalized.text = String(inputs.text).trim().replace(/\s+/g, " ");
|
|
58
|
+
if (inputs.voice !== void 0) normalized.voice = inputs.voice;
|
|
59
|
+
if (inputs.voice_id !== void 0) normalized.voice_id = inputs.voice_id;
|
|
60
|
+
if (inputs.model !== void 0) normalized.model = inputs.model;
|
|
61
|
+
if (inputs.voice_settings !== void 0) {
|
|
62
|
+
normalized.voice_settings = inputs.voice_settings;
|
|
63
|
+
}
|
|
64
|
+
return normalized;
|
|
65
|
+
}
|
|
66
|
+
};
|
|
67
|
+
// §0.6 — elevenlabs streams TTS bytes natively. No native webhook surface.
|
|
68
|
+
supportsStreaming = /* @__PURE__ */ new Set(["audio.tts"]);
|
|
69
|
+
supportsWebhooks = false;
|
|
70
|
+
name = "elevenlabs";
|
|
71
|
+
supportedOperations = ["audio.tts"];
|
|
72
|
+
config;
|
|
73
|
+
baseUrl = "https://api.elevenlabs.io/v1";
|
|
74
|
+
defaultVoices = {
|
|
75
|
+
default: "Rachel",
|
|
76
|
+
// Professional female voice
|
|
77
|
+
"male-narrator": "Josh",
|
|
78
|
+
"female-narrator": "Rachel",
|
|
79
|
+
"british-male": "Daniel",
|
|
80
|
+
"british-female": "Charlotte"
|
|
81
|
+
};
|
|
82
|
+
constructor(config) {
|
|
83
|
+
super();
|
|
84
|
+
this.config = config;
|
|
85
|
+
}
|
|
86
|
+
async healthCheck() {
|
|
87
|
+
const startTime = Date.now();
|
|
88
|
+
try {
|
|
89
|
+
const response = await fetch(`${this.baseUrl}/voices`, {
|
|
90
|
+
headers: {
|
|
91
|
+
"xi-api-key": this.config.apiKey
|
|
92
|
+
}
|
|
93
|
+
});
|
|
94
|
+
if (!response.ok) {
|
|
95
|
+
throw new Error(`Health check failed: ${response.statusText}`);
|
|
96
|
+
}
|
|
97
|
+
return {
|
|
98
|
+
healthy: true,
|
|
99
|
+
latency: Date.now() - startTime
|
|
100
|
+
};
|
|
101
|
+
} catch (error) {
|
|
102
|
+
return {
|
|
103
|
+
healthy: false,
|
|
104
|
+
latency: Date.now() - startTime,
|
|
105
|
+
error: error.message
|
|
106
|
+
};
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
async estimateCost(input) {
|
|
110
|
+
const opPricing = pricing_default[input.operation];
|
|
111
|
+
if (!opPricing) {
|
|
112
|
+
return { costUsd: 0, currency: "USD" };
|
|
113
|
+
}
|
|
114
|
+
const model = input.params.model || "eleven_monolingual_v1";
|
|
115
|
+
const entry = opPricing[model] || opPricing.eleven_monolingual_v1;
|
|
116
|
+
const text = input.params.text || "";
|
|
117
|
+
const costUsd = text.length * (entry?.input.perUnit ?? 3e-4);
|
|
118
|
+
return { costUsd, currency: "USD", estimatedDurationMs: entry?.expectedDurationMs };
|
|
119
|
+
}
|
|
120
|
+
async execute(input) {
|
|
121
|
+
const startTime = Date.now();
|
|
122
|
+
try {
|
|
123
|
+
if (input.operation !== "audio.tts") {
|
|
124
|
+
throw new Error(`Unsupported operation: ${input.operation}`);
|
|
125
|
+
}
|
|
126
|
+
const text = input.params.text;
|
|
127
|
+
const voice = this.resolveVoice(input.params.voice);
|
|
128
|
+
const speed = input.params.speed || 1;
|
|
129
|
+
const format = input.params.response_format || "mp3";
|
|
130
|
+
const model = input.params.model || "eleven_monolingual_v1";
|
|
131
|
+
if (!text) {
|
|
132
|
+
throw new Error("Text is required for TTS");
|
|
133
|
+
}
|
|
134
|
+
const requestBody = {
|
|
135
|
+
text,
|
|
136
|
+
model_id: model,
|
|
137
|
+
voice_settings: {
|
|
138
|
+
stability: 0.5,
|
|
139
|
+
similarity_boost: 0.75,
|
|
140
|
+
style: 0,
|
|
141
|
+
use_speaker_boost: true
|
|
142
|
+
}
|
|
143
|
+
};
|
|
144
|
+
let processedText = text;
|
|
145
|
+
if (speed !== 1) {
|
|
146
|
+
processedText = `<speak rate="${speed * 100}%">${text}</speak>`;
|
|
147
|
+
}
|
|
148
|
+
requestBody.text = processedText;
|
|
149
|
+
const response = await fetch(`${this.baseUrl}/text-to-speech/${voice}`, {
|
|
150
|
+
method: "POST",
|
|
151
|
+
headers: {
|
|
152
|
+
"xi-api-key": this.config.apiKey,
|
|
153
|
+
"Content-Type": "application/json"
|
|
154
|
+
},
|
|
155
|
+
body: JSON.stringify(requestBody)
|
|
156
|
+
});
|
|
157
|
+
if (!response.ok) {
|
|
158
|
+
const errorText = await response.text();
|
|
159
|
+
throw new Error(`ElevenLabs API error: ${response.status} - ${errorText}`);
|
|
160
|
+
}
|
|
161
|
+
const arrayBuffer = await response.arrayBuffer();
|
|
162
|
+
const data = Buffer.from(arrayBuffer);
|
|
163
|
+
const mimeType = this.getMimeType(format);
|
|
164
|
+
const duration = this.estimateDuration(text);
|
|
165
|
+
const cost = (await this.estimateCost(input)).costUsd;
|
|
166
|
+
return {
|
|
167
|
+
data,
|
|
168
|
+
mimeType,
|
|
169
|
+
costUsd: cost,
|
|
170
|
+
durationMs: Date.now() - startTime,
|
|
171
|
+
metadata: {
|
|
172
|
+
model,
|
|
173
|
+
operation: input.operation,
|
|
174
|
+
voice,
|
|
175
|
+
duration,
|
|
176
|
+
speed,
|
|
177
|
+
format,
|
|
178
|
+
characterCount: text.length
|
|
179
|
+
}
|
|
180
|
+
};
|
|
181
|
+
} catch (error) {
|
|
182
|
+
throw new Error(`ElevenLabs provider error: ${error.message}`, { cause: error });
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
resolveVoice(voiceName) {
|
|
186
|
+
if (!voiceName) {
|
|
187
|
+
return this.defaultVoices.default;
|
|
188
|
+
}
|
|
189
|
+
if (this.config.voices?.[voiceName]) {
|
|
190
|
+
return this.config.voices[voiceName];
|
|
191
|
+
}
|
|
192
|
+
if (voiceName.startsWith("voice_") || voiceName.length === 20) {
|
|
193
|
+
return voiceName;
|
|
194
|
+
}
|
|
195
|
+
if (this.defaultVoices[voiceName]) {
|
|
196
|
+
return this.defaultVoices[voiceName];
|
|
197
|
+
}
|
|
198
|
+
return this.defaultVoices.default;
|
|
199
|
+
}
|
|
200
|
+
getMimeType(format) {
|
|
201
|
+
const mimeTypes = {
|
|
202
|
+
mp3: "audio/mpeg",
|
|
203
|
+
wav: "audio/wav",
|
|
204
|
+
ogg: "audio/ogg",
|
|
205
|
+
flac: "audio/flac",
|
|
206
|
+
aac: "audio/aac"
|
|
207
|
+
};
|
|
208
|
+
return mimeTypes[format] || "audio/mpeg";
|
|
209
|
+
}
|
|
210
|
+
estimateDuration(text) {
|
|
211
|
+
const charsPerSecond = 12.5;
|
|
212
|
+
return Math.ceil(text.length / charsPerSecond);
|
|
213
|
+
}
|
|
214
|
+
isNonRetryableError(error) {
|
|
215
|
+
const nonRetryableMessages = [
|
|
216
|
+
"authentication failed",
|
|
217
|
+
"invalid api key",
|
|
218
|
+
"permission denied",
|
|
219
|
+
"insufficient credits",
|
|
220
|
+
"voice not found",
|
|
221
|
+
"invalid voice id"
|
|
222
|
+
];
|
|
223
|
+
return nonRetryableMessages.some((msg) => error.message.toLowerCase().includes(msg));
|
|
224
|
+
}
|
|
225
|
+
};
|
|
226
|
+
function defineElevenLabsProvider(config) {
|
|
227
|
+
return new ElevenLabsProvider(config);
|
|
228
|
+
}
|
|
229
|
+
// Annotate the CommonJS export names for ESM import in node:
|
|
230
|
+
0 && (module.exports = {
|
|
231
|
+
ElevenLabsProvider,
|
|
232
|
+
defineElevenLabsProvider
|
|
233
|
+
});
|
package/dist/index.d.cts
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
import { MediaProvider, ProviderCacheConfig, ProviderHealth, ProviderInput, CostEstimate, ProviderOutput } from '@reaatech/media-pipeline-mcp-provider-core';
|
|
2
|
+
|
|
3
|
+
interface ElevenLabsProviderConfig {
|
|
4
|
+
apiKey: string;
|
|
5
|
+
voices?: {
|
|
6
|
+
default?: string;
|
|
7
|
+
[key: string]: string | undefined;
|
|
8
|
+
};
|
|
9
|
+
model?: string;
|
|
10
|
+
timeout?: number;
|
|
11
|
+
}
|
|
12
|
+
declare class ElevenLabsProvider extends MediaProvider {
|
|
13
|
+
static cacheConfig: ProviderCacheConfig;
|
|
14
|
+
readonly supportsStreaming: Set<string>;
|
|
15
|
+
readonly supportsWebhooks = false;
|
|
16
|
+
readonly name = "elevenlabs";
|
|
17
|
+
readonly supportedOperations: string[];
|
|
18
|
+
private config;
|
|
19
|
+
private baseUrl;
|
|
20
|
+
private defaultVoices;
|
|
21
|
+
constructor(config: ElevenLabsProviderConfig);
|
|
22
|
+
healthCheck(): Promise<ProviderHealth>;
|
|
23
|
+
estimateCost(input: ProviderInput): Promise<CostEstimate>;
|
|
24
|
+
execute(input: ProviderInput): Promise<ProviderOutput>;
|
|
25
|
+
private resolveVoice;
|
|
26
|
+
private getMimeType;
|
|
27
|
+
private estimateDuration;
|
|
28
|
+
protected isNonRetryableError(error: Error): boolean;
|
|
29
|
+
}
|
|
30
|
+
declare function defineElevenLabsProvider(config: ElevenLabsProviderConfig): ElevenLabsProvider;
|
|
31
|
+
|
|
32
|
+
export { ElevenLabsProvider, type ElevenLabsProviderConfig, defineElevenLabsProvider };
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
import { MediaProvider, ProviderCacheConfig, ProviderHealth, ProviderInput, CostEstimate, ProviderOutput } from '@reaatech/media-pipeline-mcp-provider-core';
|
|
2
|
+
|
|
3
|
+
interface ElevenLabsProviderConfig {
|
|
4
|
+
apiKey: string;
|
|
5
|
+
voices?: {
|
|
6
|
+
default?: string;
|
|
7
|
+
[key: string]: string | undefined;
|
|
8
|
+
};
|
|
9
|
+
model?: string;
|
|
10
|
+
timeout?: number;
|
|
11
|
+
}
|
|
12
|
+
declare class ElevenLabsProvider extends MediaProvider {
|
|
13
|
+
static cacheConfig: ProviderCacheConfig;
|
|
14
|
+
readonly supportsStreaming: Set<string>;
|
|
15
|
+
readonly supportsWebhooks = false;
|
|
16
|
+
readonly name = "elevenlabs";
|
|
17
|
+
readonly supportedOperations: string[];
|
|
18
|
+
private config;
|
|
19
|
+
private baseUrl;
|
|
20
|
+
private defaultVoices;
|
|
21
|
+
constructor(config: ElevenLabsProviderConfig);
|
|
22
|
+
healthCheck(): Promise<ProviderHealth>;
|
|
23
|
+
estimateCost(input: ProviderInput): Promise<CostEstimate>;
|
|
24
|
+
execute(input: ProviderInput): Promise<ProviderOutput>;
|
|
25
|
+
private resolveVoice;
|
|
26
|
+
private getMimeType;
|
|
27
|
+
private estimateDuration;
|
|
28
|
+
protected isNonRetryableError(error: Error): boolean;
|
|
29
|
+
}
|
|
30
|
+
declare function defineElevenLabsProvider(config: ElevenLabsProviderConfig): ElevenLabsProvider;
|
|
31
|
+
|
|
32
|
+
export { ElevenLabsProvider, type ElevenLabsProviderConfig, defineElevenLabsProvider };
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,205 @@
|
|
|
1
|
+
// src/elevenlabs-provider.ts
|
|
2
|
+
import { MediaProvider } from "@reaatech/media-pipeline-mcp-provider-core";
|
|
3
|
+
|
|
4
|
+
// src/pricing.json
|
|
5
|
+
var pricing_default = {
|
|
6
|
+
"audio.tts": {
|
|
7
|
+
eleven_monolingual_v1: {
|
|
8
|
+
input: { perUnit: 3e-4, unit: "per character" },
|
|
9
|
+
expectedDurationMs: 2e3
|
|
10
|
+
},
|
|
11
|
+
eleven_multilingual_v2: {
|
|
12
|
+
input: { perUnit: 5e-4, unit: "per character" },
|
|
13
|
+
expectedDurationMs: 3e3
|
|
14
|
+
},
|
|
15
|
+
eleven_turbo_v2: {
|
|
16
|
+
input: { perUnit: 2e-4, unit: "per character" },
|
|
17
|
+
expectedDurationMs: 1e3
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
};
|
|
21
|
+
|
|
22
|
+
// src/elevenlabs-provider.ts
|
|
23
|
+
var ElevenLabsProvider = class extends MediaProvider {
|
|
24
|
+
static cacheConfig = {
|
|
25
|
+
deterministicParams: ["text", "voice_id", "voice", "model", "voice_settings"],
|
|
26
|
+
nonDeterministicParams: [],
|
|
27
|
+
normalize: (inputs) => {
|
|
28
|
+
const normalized = {};
|
|
29
|
+
if (inputs.text !== void 0)
|
|
30
|
+
normalized.text = String(inputs.text).trim().replace(/\s+/g, " ");
|
|
31
|
+
if (inputs.voice !== void 0) normalized.voice = inputs.voice;
|
|
32
|
+
if (inputs.voice_id !== void 0) normalized.voice_id = inputs.voice_id;
|
|
33
|
+
if (inputs.model !== void 0) normalized.model = inputs.model;
|
|
34
|
+
if (inputs.voice_settings !== void 0) {
|
|
35
|
+
normalized.voice_settings = inputs.voice_settings;
|
|
36
|
+
}
|
|
37
|
+
return normalized;
|
|
38
|
+
}
|
|
39
|
+
};
|
|
40
|
+
// §0.6 — elevenlabs streams TTS bytes natively. No native webhook surface.
|
|
41
|
+
supportsStreaming = /* @__PURE__ */ new Set(["audio.tts"]);
|
|
42
|
+
supportsWebhooks = false;
|
|
43
|
+
name = "elevenlabs";
|
|
44
|
+
supportedOperations = ["audio.tts"];
|
|
45
|
+
config;
|
|
46
|
+
baseUrl = "https://api.elevenlabs.io/v1";
|
|
47
|
+
defaultVoices = {
|
|
48
|
+
default: "Rachel",
|
|
49
|
+
// Professional female voice
|
|
50
|
+
"male-narrator": "Josh",
|
|
51
|
+
"female-narrator": "Rachel",
|
|
52
|
+
"british-male": "Daniel",
|
|
53
|
+
"british-female": "Charlotte"
|
|
54
|
+
};
|
|
55
|
+
constructor(config) {
|
|
56
|
+
super();
|
|
57
|
+
this.config = config;
|
|
58
|
+
}
|
|
59
|
+
async healthCheck() {
|
|
60
|
+
const startTime = Date.now();
|
|
61
|
+
try {
|
|
62
|
+
const response = await fetch(`${this.baseUrl}/voices`, {
|
|
63
|
+
headers: {
|
|
64
|
+
"xi-api-key": this.config.apiKey
|
|
65
|
+
}
|
|
66
|
+
});
|
|
67
|
+
if (!response.ok) {
|
|
68
|
+
throw new Error(`Health check failed: ${response.statusText}`);
|
|
69
|
+
}
|
|
70
|
+
return {
|
|
71
|
+
healthy: true,
|
|
72
|
+
latency: Date.now() - startTime
|
|
73
|
+
};
|
|
74
|
+
} catch (error) {
|
|
75
|
+
return {
|
|
76
|
+
healthy: false,
|
|
77
|
+
latency: Date.now() - startTime,
|
|
78
|
+
error: error.message
|
|
79
|
+
};
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
async estimateCost(input) {
|
|
83
|
+
const opPricing = pricing_default[input.operation];
|
|
84
|
+
if (!opPricing) {
|
|
85
|
+
return { costUsd: 0, currency: "USD" };
|
|
86
|
+
}
|
|
87
|
+
const model = input.params.model || "eleven_monolingual_v1";
|
|
88
|
+
const entry = opPricing[model] || opPricing.eleven_monolingual_v1;
|
|
89
|
+
const text = input.params.text || "";
|
|
90
|
+
const costUsd = text.length * (entry?.input.perUnit ?? 3e-4);
|
|
91
|
+
return { costUsd, currency: "USD", estimatedDurationMs: entry?.expectedDurationMs };
|
|
92
|
+
}
|
|
93
|
+
async execute(input) {
|
|
94
|
+
const startTime = Date.now();
|
|
95
|
+
try {
|
|
96
|
+
if (input.operation !== "audio.tts") {
|
|
97
|
+
throw new Error(`Unsupported operation: ${input.operation}`);
|
|
98
|
+
}
|
|
99
|
+
const text = input.params.text;
|
|
100
|
+
const voice = this.resolveVoice(input.params.voice);
|
|
101
|
+
const speed = input.params.speed || 1;
|
|
102
|
+
const format = input.params.response_format || "mp3";
|
|
103
|
+
const model = input.params.model || "eleven_monolingual_v1";
|
|
104
|
+
if (!text) {
|
|
105
|
+
throw new Error("Text is required for TTS");
|
|
106
|
+
}
|
|
107
|
+
const requestBody = {
|
|
108
|
+
text,
|
|
109
|
+
model_id: model,
|
|
110
|
+
voice_settings: {
|
|
111
|
+
stability: 0.5,
|
|
112
|
+
similarity_boost: 0.75,
|
|
113
|
+
style: 0,
|
|
114
|
+
use_speaker_boost: true
|
|
115
|
+
}
|
|
116
|
+
};
|
|
117
|
+
let processedText = text;
|
|
118
|
+
if (speed !== 1) {
|
|
119
|
+
processedText = `<speak rate="${speed * 100}%">${text}</speak>`;
|
|
120
|
+
}
|
|
121
|
+
requestBody.text = processedText;
|
|
122
|
+
const response = await fetch(`${this.baseUrl}/text-to-speech/${voice}`, {
|
|
123
|
+
method: "POST",
|
|
124
|
+
headers: {
|
|
125
|
+
"xi-api-key": this.config.apiKey,
|
|
126
|
+
"Content-Type": "application/json"
|
|
127
|
+
},
|
|
128
|
+
body: JSON.stringify(requestBody)
|
|
129
|
+
});
|
|
130
|
+
if (!response.ok) {
|
|
131
|
+
const errorText = await response.text();
|
|
132
|
+
throw new Error(`ElevenLabs API error: ${response.status} - ${errorText}`);
|
|
133
|
+
}
|
|
134
|
+
const arrayBuffer = await response.arrayBuffer();
|
|
135
|
+
const data = Buffer.from(arrayBuffer);
|
|
136
|
+
const mimeType = this.getMimeType(format);
|
|
137
|
+
const duration = this.estimateDuration(text);
|
|
138
|
+
const cost = (await this.estimateCost(input)).costUsd;
|
|
139
|
+
return {
|
|
140
|
+
data,
|
|
141
|
+
mimeType,
|
|
142
|
+
costUsd: cost,
|
|
143
|
+
durationMs: Date.now() - startTime,
|
|
144
|
+
metadata: {
|
|
145
|
+
model,
|
|
146
|
+
operation: input.operation,
|
|
147
|
+
voice,
|
|
148
|
+
duration,
|
|
149
|
+
speed,
|
|
150
|
+
format,
|
|
151
|
+
characterCount: text.length
|
|
152
|
+
}
|
|
153
|
+
};
|
|
154
|
+
} catch (error) {
|
|
155
|
+
throw new Error(`ElevenLabs provider error: ${error.message}`, { cause: error });
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
resolveVoice(voiceName) {
|
|
159
|
+
if (!voiceName) {
|
|
160
|
+
return this.defaultVoices.default;
|
|
161
|
+
}
|
|
162
|
+
if (this.config.voices?.[voiceName]) {
|
|
163
|
+
return this.config.voices[voiceName];
|
|
164
|
+
}
|
|
165
|
+
if (voiceName.startsWith("voice_") || voiceName.length === 20) {
|
|
166
|
+
return voiceName;
|
|
167
|
+
}
|
|
168
|
+
if (this.defaultVoices[voiceName]) {
|
|
169
|
+
return this.defaultVoices[voiceName];
|
|
170
|
+
}
|
|
171
|
+
return this.defaultVoices.default;
|
|
172
|
+
}
|
|
173
|
+
getMimeType(format) {
|
|
174
|
+
const mimeTypes = {
|
|
175
|
+
mp3: "audio/mpeg",
|
|
176
|
+
wav: "audio/wav",
|
|
177
|
+
ogg: "audio/ogg",
|
|
178
|
+
flac: "audio/flac",
|
|
179
|
+
aac: "audio/aac"
|
|
180
|
+
};
|
|
181
|
+
return mimeTypes[format] || "audio/mpeg";
|
|
182
|
+
}
|
|
183
|
+
estimateDuration(text) {
|
|
184
|
+
const charsPerSecond = 12.5;
|
|
185
|
+
return Math.ceil(text.length / charsPerSecond);
|
|
186
|
+
}
|
|
187
|
+
isNonRetryableError(error) {
|
|
188
|
+
const nonRetryableMessages = [
|
|
189
|
+
"authentication failed",
|
|
190
|
+
"invalid api key",
|
|
191
|
+
"permission denied",
|
|
192
|
+
"insufficient credits",
|
|
193
|
+
"voice not found",
|
|
194
|
+
"invalid voice id"
|
|
195
|
+
];
|
|
196
|
+
return nonRetryableMessages.some((msg) => error.message.toLowerCase().includes(msg));
|
|
197
|
+
}
|
|
198
|
+
};
|
|
199
|
+
function defineElevenLabsProvider(config) {
|
|
200
|
+
return new ElevenLabsProvider(config);
|
|
201
|
+
}
|
|
202
|
+
export {
|
|
203
|
+
ElevenLabsProvider,
|
|
204
|
+
defineElevenLabsProvider
|
|
205
|
+
};
|
package/package.json
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@reaatech/media-pipeline-mcp-elevenlabs",
|
|
3
|
+
"version": "0.3.0",
|
|
4
|
+
"description": "ElevenLabs provider — high-quality text-to-speech with voice selection, speed control, and voice tuning",
|
|
5
|
+
"license": "MIT",
|
|
6
|
+
"author": "Rick Somers <rick@reaatech.com> (https://reaatech.com)",
|
|
7
|
+
"repository": {
|
|
8
|
+
"type": "git",
|
|
9
|
+
"url": "https://github.com/reaatech/media-pipeline-mcp.git",
|
|
10
|
+
"directory": "packages/elevenlabs"
|
|
11
|
+
},
|
|
12
|
+
"homepage": "https://github.com/reaatech/media-pipeline-mcp/tree/main/packages/elevenlabs#readme",
|
|
13
|
+
"bugs": {
|
|
14
|
+
"url": "https://github.com/reaatech/media-pipeline-mcp/issues"
|
|
15
|
+
},
|
|
16
|
+
"type": "module",
|
|
17
|
+
"main": "./dist/index.cjs",
|
|
18
|
+
"module": "./dist/index.js",
|
|
19
|
+
"types": "./dist/index.d.ts",
|
|
20
|
+
"exports": {
|
|
21
|
+
".": {
|
|
22
|
+
"types": "./dist/index.d.ts",
|
|
23
|
+
"import": "./dist/index.js",
|
|
24
|
+
"require": "./dist/index.cjs"
|
|
25
|
+
}
|
|
26
|
+
},
|
|
27
|
+
"files": [
|
|
28
|
+
"dist"
|
|
29
|
+
],
|
|
30
|
+
"publishConfig": {
|
|
31
|
+
"access": "public"
|
|
32
|
+
},
|
|
33
|
+
"dependencies": {
|
|
34
|
+
"@11labs/client": "^0.1.0",
|
|
35
|
+
"@reaatech/media-pipeline-mcp-provider-core": "0.3.0"
|
|
36
|
+
},
|
|
37
|
+
"devDependencies": {
|
|
38
|
+
"@types/node": "^20.11.0",
|
|
39
|
+
"tsup": "^8.4.0",
|
|
40
|
+
"typescript": "^5.8.3",
|
|
41
|
+
"vitest": "^3.1.1"
|
|
42
|
+
},
|
|
43
|
+
"scripts": {
|
|
44
|
+
"build": "tsup src/index.ts --format cjs,esm --dts --clean",
|
|
45
|
+
"test": "vitest run",
|
|
46
|
+
"test:coverage": "vitest run --coverage",
|
|
47
|
+
"clean": "rm -rf dist"
|
|
48
|
+
}
|
|
49
|
+
}
|