@mastra/voice-elevenlabs 0.12.0-beta.0 → 0.12.0-beta.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,163 @@
1
+ # Voice API Reference
2
+
3
+ > API reference for voice - 2 entries
4
+
5
+
6
+ ---
7
+
8
+ ## Reference: ElevenLabs
9
+
10
+ > Documentation for the ElevenLabs voice implementation, offering high-quality text-to-speech capabilities with multiple voice models and natural-sounding synthesis.
11
+
12
+ The ElevenLabs voice implementation in Mastra provides high-quality text-to-speech (TTS) and speech-to-text (STT) capabilities using the ElevenLabs API.
13
+
14
+ ## Usage Example
15
+
16
+ ```typescript
17
+ import { ElevenLabsVoice } from "@mastra/voice-elevenlabs";
18
+
19
+ // Initialize with default configuration (uses ELEVENLABS_API_KEY environment variable)
20
+ const voice = new ElevenLabsVoice();
21
+
22
+ // Initialize with custom configuration
23
+ const voice = new ElevenLabsVoice({
24
+ speechModel: {
25
+ name: "eleven_multilingual_v2",
26
+ apiKey: "your-api-key",
27
+ },
28
+ speaker: "custom-speaker-id",
29
+ });
30
+
31
+ // Text-to-Speech
32
+ const audioStream = await voice.speak("Hello, world!");
33
+
34
+ // Get available speakers
35
+ const speakers = await voice.getSpeakers();
36
+ ```
37
+
38
+ ## Constructor Parameters
39
+
40
+ ### ElevenLabsVoiceConfig
41
+
42
+ ## Methods
43
+
44
+ ### speak()
45
+
46
+ Converts text to speech using the configured speech model and voice.
47
+
48
+ Returns: `Promise<NodeJS.ReadableStream>`
49
+
50
+ ### getSpeakers()
51
+
52
+ Returns an array of available voice options, where each node contains:
53
+
54
+ ### listen()
55
+
56
+ Converts audio input to text using ElevenLabs Speech-to-Text API.
57
+
58
+ The options object supports the following properties:
59
+
60
+ Returns: `Promise<string>` - A Promise that resolves to the transcribed text
61
+
62
+ ## Important Notes
63
+
64
+ 1. An ElevenLabs API key is required. Set it via the `ELEVENLABS_API_KEY` environment variable or pass it in the constructor.
65
+ 2. The default speaker is set to Aria (ID: '9BWtsMINqrJLrRacOk9x').
66
+ 3. Speech-to-text functionality is not supported by ElevenLabs.
67
+ 4. Available speakers can be retrieved using the `getSpeakers()` method, which returns detailed information about each voice including language and gender.
68
+
69
+ ---
70
+
71
+ ## Reference: voice.getSpeakers()
72
+
73
+ > Documentation for the getSpeakers() method available in voice providers, which retrieves available voice options.
74
+
75
+ The `getSpeakers()` method retrieves a list of available voice options (speakers) from the voice provider. This allows applications to present users with voice choices or programmatically select the most appropriate voice for different contexts.
76
+
77
+ ## Usage Example
78
+
79
+ ```typescript
80
+ import { OpenAIVoice } from "@mastra/voice-openai";
81
+ import { ElevenLabsVoice } from "@mastra/voice-elevenlabs";
82
+
83
+ // Initialize voice providers
84
+ const openaiVoice = new OpenAIVoice();
85
+ const elevenLabsVoice = new ElevenLabsVoice({
86
+ apiKey: process.env.ELEVENLABS_API_KEY,
87
+ });
88
+
89
+ // Get available speakers from OpenAI
90
+ const openaiSpeakers = await openaiVoice.getSpeakers();
91
+ console.log("OpenAI voices:", openaiSpeakers);
92
+ // Example output: [{ voiceId: "alloy" }, { voiceId: "echo" }, { voiceId: "fable" }, ...]
93
+
94
+ // Get available speakers from ElevenLabs
95
+ const elevenLabsSpeakers = await elevenLabsVoice.getSpeakers();
96
+ console.log("ElevenLabs voices:", elevenLabsSpeakers);
97
+ // Example output: [{ voiceId: "21m00Tcm4TlvDq8ikWAM", name: "Rachel" }, ...]
98
+
99
+ // Use a specific voice for speech
100
+ const text = "Hello, this is a test of different voices.";
101
+ await openaiVoice.speak(text, { speaker: openaiSpeakers[2].voiceId });
102
+ await elevenLabsVoice.speak(text, { speaker: elevenLabsSpeakers[0].voiceId });
103
+ ```
104
+
105
+ ## Parameters
106
+
107
+ This method does not accept any parameters.
108
+
109
+ ## Return Value
110
+
111
+ ## Provider-Specific Metadata
112
+
113
+ Different voice providers return different metadata for their voices:
114
+
115
+ **OpenAI:**
116
+
117
+
118
+
119
+
120
+ **OpenAI Realtime:**
121
+
122
+
123
+
124
+ **Deepgram:**
125
+
126
+
127
+
128
+ **ElevenLabs:**
129
+
130
+
131
+
132
+ **Google:**
133
+
134
+
135
+
136
+ **Azure:**
137
+
138
+
139
+
140
+ **Murf:**
141
+
142
+
143
+
144
+ **PlayAI:**
145
+
146
+
147
+
148
+ **Speechify:**
149
+
150
+
151
+
152
+ **Sarvam:**
153
+
154
+
155
+
156
+
157
+ ## Notes
158
+
159
+ - The available voices vary significantly between providers
160
+ - Some providers may require authentication to retrieve the full list of voices
161
+ - The default implementation returns an empty array if the provider doesn't support this method
162
+ - For performance reasons, consider caching the results if you need to display the list frequently
163
+ - The `voiceId` property is guaranteed to be present for all providers, but additional metadata varies
package/dist/index.cjs CHANGED
@@ -73,6 +73,7 @@ var ElevenLabsVoice = class extends voice.MastraVoice {
73
73
  * @param {string | NodeJS.ReadableStream} input - The text to be converted to speech or a stream containing audio data.
74
74
  * @param {Object} [options] - Optional parameters for the speech generation.
75
75
  * @param {string} [options.speaker] - The ID of the speaker to use for the speech. If not provided, the default speaker will be used.
76
+ * @param {string} [options.outputFormat] - The audio output format (e.g., 'ulaw_8000', 'pcm_16000', 'mp3_44100_128'). If not provided, defaults to ElevenLabs' default format.
76
77
  *
77
78
  * @returns {Promise<NodeJS.ReadableStream>} A promise that resolves to a readable stream of the generated speech.
78
79
  *
@@ -91,7 +92,8 @@ var ElevenLabsVoice = class extends voice.MastraVoice {
91
92
  text,
92
93
  voice: speaker,
93
94
  model_id: this.speechModel?.name,
94
- stream: true
95
+ stream: true,
96
+ ...options?.outputFormat && { output_format: options.outputFormat }
95
97
  });
96
98
  }
97
99
  /**
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/index.ts"],"names":["MastraVoice","ElevenLabsClient"],"mappings":";;;;;;AAkCO,IAAM,eAAA,GAAN,cAA8BA,iBAAA,CAAY;AAAA,EACvC,MAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAWR,WAAA,CAAY;AAAA,IACV,WAAA;AAAA,IACA,cAAA;AAAA,IACA;AAAA,GACF,GAAuG,EAAC,EAAG;AACzG,IAAA,MAAM,MAAA,GAAS,WAAA,EAAa,MAAA,IAAU,OAAA,CAAQ,GAAA,CAAI,kBAAA;AAClD,IAAA,KAAA,CAAM;AAAA,MACJ,WAAA,EAAa;AAAA,QACX,IAAA,EAAM,aAAa,IAAA,IAAQ,wBAAA;AAAA,QAC3B,QAAQ,WAAA,EAAa;AAAA,OACvB;AAAA,MACA,cAAA,EAAgB;AAAA,QACd,IAAA,EAAM,gBAAgB,IAAA,IAAQ,WAAA;AAAA,QAC9B,QAAQ,cAAA,EAAgB;AAAA,OAC1B;AAAA,MACA;AAAA,KACD,CAAA;AAED,IAAA,IAAI,CAAC,MAAA,EAAQ;AACX,MAAA,MAAM,IAAI,MAAM,+BAA+B,CAAA;AAAA,IACjD;AAEA,IAAA,IAAA,CAAK,MAAA,GAAS,IAAIC,2BAAA,CAAiB;AAAA,MACjC;AAAA,KACD,CAAA;AAED,IAAA,IAAA,CAAK,UAAU,OAAA,IAAW,sBAAA;AAAA,EAC5B;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EASA,MAAM,WAAA,GAAc;AAClB,IAAA,MAAM,MAAA,GAAS,MAAM,IAAA,CAAK,MAAA,CAAO,OAAO,MAAA,EAAO;AAC/C,IAAA,OACE,MAAA,EAAQ,MAAA,EAAQ,GAAA,CAAI,CAAA,KAAA,MAAU;AAAA,MAC5B,SAAS,KAAA,CAAM,QAAA;AAAA,MACf,MAAM,KAAA,CAAM,IAAA;AAAA,MACZ,QAAA,EAAU,KAAA,CAAM,MAAA,EAAQ,QAAA,IAAY,IAAA;AAAA,MACpC,MAAA,EAAQ,KAAA,CAAM,MAAA,EAAQ,MAAA,IAAU;AAAA,KAClC,CAAE,KAAK,EAAC;AAAA,EAEZ;AAAA,EAEA,MAAc,eAAe,MAAA,EAAgD;AAC3E,IAAA,MAAM,SAAmB,EAAC;AAC1B,IAAA,WAAA,MAAiB,SAAS,MAAA,EAAQ;AAChC,MAAA,IAAI,OAAO,UAAU,QAAA,EAAU;AAC7B,QAAA,MAAA,CAAO,IAAA,CAAK,MAAA,CAAO,IAAA,CAAK,KAAK,CAAC,CAAA;AAAA,MAChC,CAAA,MAAO;AACL,QAAA,MAAA,CAAO,KAAK,KAAK,CAAA;AAAA,MACnB;AAAA,IACF;AACA,IAAA,OAAO,MAAA,CAAO,MAAA,CAAO,MAAM,CAAA,CAAE,SAAS,OAAO,CAAA;AAAA,EAC/C;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAaA,MAAM,KAAA,CAAM,KAAA,EAAuC,OAAA,EAAgE;AACjH,IAAA,MAAM,OAAA,GAAU,OAAA,EAAS,OAAA,IAAW,IAAA,CAAK,OAAA;AACzC,IAAA,IAAI,CAAC,OAAA,EAAS;AACZ,MAAA,MAAM,IAAI,MAAM,sBAAsB,CAAA;AAAA,IACxC;AAEA,IAAA,IAAI,CAAC,IAAA,CAAK,WAAA,EAAa,IAAA,EAAM;AAC3B,MAAA,MAAM,IAAI,MAAM,2BAA2B,CAAA;AAAA,IAC7C;AACA,IAAA,MAAM,IAAA,GAAO,OAAO,KAAA,KAAU,QAAA,GAAW,QAAQ,MAAM,IAAA,CAAK,eAAe,KAAK,CAAA;AAEhF,IAAA,OAAO,MAAM,IAAA,CAAK,MAAA,CAAO,QAAA,CAAS;AAAA,MAChC,IAAA;AAAA,MACA,KAAA,EAAO,OAAA;AAAA,MACP,QAAA,EAAU,KAAK,WAAA,EAAa,IAAA;AAAA,MAC5B,MAAA,EAAQ;AAAA,KACT,CAAA;AAAA,EACH;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAOA,MAAM,WAAA,GAAc;AAClB,IAAA,OAAO,EAAE,SAAS,IAAA,EAAK;AAAA,EACzB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAkBA,MAAM,MAAA,CAAO,KAAA,EAA8B,OAAA,EAAoD;AAC7F,IAAA,MAAM,SAAmB,EAAC;AAC1B,IAAA,WAAA,MAAiB,SAAS,KAAA,EAAO;AAC/B,MAAA,IAAI,OAAO,UAAU,QAAA,EAAU;AAC7B,QAAA,MAAA,CAAO,IAAA,CAAK,MAAA,CAAO,IAAA,CAAK,KAAK,CAAC,CAAA;AAAA,MAChC,CAAA,MAAO;AACL,QAAA,MAAA,CAAO,KAAK,KAAK,CAAA;AAAA,MACnB;AAAA,IACF;AACA,IAAA,MAAM,MAAA,GAAS,MAAA,CAAO,MAAA,CAAO,MAAM,CAAA;AAEnC,IAAA,MAAM,EAAE,eAAe,gBAAA,EAAkB,YAAA,EAAc,UAAU,GAAG,cAAA,EAAe,GAAI,OAAA,IAAW,EAAC;AAEnG,IAAA,MAAM,IAAA,GAAO,IAAI,IAAA,CAAK,CAAC,MAAM,CAAA,EAAG,CAAA,MAAA,EAAS,QAAA,IAAY,KAAK,CAAA,CAAE,CAAA;AAE5D,IAAA,MAAM,aAAA,GAAgB,MAAM,IAAA,CAAK,MAAA,CAAO,YAAA,CAAa,OAAA;AAAA,MACnD;AAAA,QACE,IAAA;AAAA,QACA,QAAA,EAAU,KAAK,cAAA,EAAgB,IAAA;AAAA,QAC/B,aAAA;AAAA,QACA,gBAAA;AAAA,QACA;AAAA,OACF;AAAA,MACA;AAAA,KACF;AAEA,IAAA,OAAO,aAAA,CAAc,IAAA;AAAA,EACvB;AACF","file":"index.cjs","sourcesContent":["import { MastraVoice } from '@mastra/core/voice';\nimport { ElevenLabsClient } from 'elevenlabs';\n\ntype ElevenLabsModel =\n | 'eleven_multilingual_v2'\n | 'eleven_flash_v2_5'\n | 'eleven_flash_v2'\n | 'eleven_multilingual_sts_v2'\n | 'eleven_english_sts_v2'\n | 'scribe_v1';\n\ninterface ElevenLabsVoiceConfig {\n name?: ElevenLabsModel;\n apiKey?: string;\n}\n\ninterface SpeechToTextOptions {\n language_code?: string;\n tag_audio_events?: boolean;\n num_speakers?: number;\n filetype?: string;\n}\n\ninterface RequestOptions {\n timeoutInSeconds?: number;\n maxRetries?: number;\n abortSignal?: AbortSignal;\n apiKey?: string | undefined;\n headers?: Record<string, string>;\n}\n\n// Combined options type\ntype ElevenLabsListenOptions = SpeechToTextOptions & RequestOptions;\n\nexport class ElevenLabsVoice extends MastraVoice {\n private client: ElevenLabsClient;\n\n /**\n * Creates an instance of the ElevenLabsVoice class.\n *\n * @param {Object} options - The options for the voice configuration.\n * @param {ElevenLabsVoiceConfig} [options.speechModel] - The configuration for the speech model, including the model name and API key.\n * @param {string} [options.speaker] - The ID of the speaker to use. If not provided, a default speaker will be used.\n *\n * @throws {Error} If the ELEVENLABS_API_KEY is not set in the environment variables.\n */\n constructor({\n speechModel,\n listeningModel,\n speaker,\n }: { speechModel?: ElevenLabsVoiceConfig; listeningModel?: ElevenLabsVoiceConfig; speaker?: string } = {}) {\n const apiKey = speechModel?.apiKey ?? process.env.ELEVENLABS_API_KEY;\n super({\n speechModel: {\n name: speechModel?.name ?? 'eleven_multilingual_v2',\n apiKey: speechModel?.apiKey,\n },\n listeningModel: {\n name: listeningModel?.name ?? 'scribe_v1',\n apiKey: listeningModel?.apiKey,\n },\n speaker,\n });\n\n if (!apiKey) {\n throw new Error('ELEVENLABS_API_KEY is not set');\n }\n\n this.client = new ElevenLabsClient({\n apiKey,\n });\n\n this.speaker = speaker || '9BWtsMINqrJLrRacOk9x'; // Aria is the default speaker\n }\n\n /**\n * Retrieves a list of available speakers from the Eleven Labs API.\n * Each speaker includes their ID, name, language, and gender.\n *\n * @returns {Promise<Array<{ voiceId: string, name: string, language: string, gender: string }>>}\n * A promise that resolves to an array of speaker objects.\n */\n async getSpeakers() {\n const voices = await this.client.voices.getAll();\n return (\n voices?.voices?.map(voice => ({\n voiceId: voice.voice_id,\n name: voice.name,\n language: voice.labels?.language || 'en',\n gender: voice.labels?.gender || 'neutral',\n })) ?? []\n );\n }\n\n private async streamToString(stream: NodeJS.ReadableStream): Promise<string> {\n const chunks: Buffer[] = [];\n for await (const chunk of stream) {\n if (typeof chunk === 'string') {\n chunks.push(Buffer.from(chunk));\n } else {\n chunks.push(chunk);\n }\n }\n return Buffer.concat(chunks).toString('utf-8');\n }\n\n /**\n * Converts text or audio input into speech using the Eleven Labs API.\n *\n * @param {string | NodeJS.ReadableStream} input - The text to be converted to speech or a stream containing audio data.\n * @param {Object} [options] - Optional parameters for the speech generation.\n * @param {string} [options.speaker] - The ID of the speaker to use for the speech. If not provided, the default speaker will be used.\n *\n * @returns {Promise<NodeJS.ReadableStream>} A promise that resolves to a readable stream of the generated speech.\n *\n * @throws {Error} If no speaker is specified or if no speech model is set.\n */\n async speak(input: string | NodeJS.ReadableStream, options?: { speaker?: string }): Promise<NodeJS.ReadableStream> {\n const speaker = options?.speaker || this.speaker;\n if (!speaker) {\n throw new Error('No speaker specified');\n }\n\n if (!this.speechModel?.name) {\n throw new Error('No speech model specified');\n }\n const text = typeof input === 'string' ? input : await this.streamToString(input);\n\n return await this.client.generate({\n text,\n voice: speaker,\n model_id: this.speechModel?.name as ElevenLabsModel,\n stream: true,\n });\n }\n\n /**\n * Checks if listening capabilities are enabled.\n *\n * @returns {Promise<{ enabled: boolean }>}\n */\n async getListener() {\n return { enabled: true };\n }\n\n /**\n * Converts audio input to text using ElevenLabs Speech-to-Text API.\n *\n * @param input - A readable stream containing the audio data to transcribe\n * @param options - Configuration options for the transcription\n * @param options.language_code - ISO language code (e.g., 'en', 'fr', 'es')\n * @param options.tag_audio_events - Whether to tag audio events like [MUSIC], [LAUGHTER], etc.\n * @param options.num_speakers - Number of speakers to detect in the audio\n * @param options.filetype - Audio file format (e.g., 'mp3', 'wav', 'ogg')\n * @param options.timeoutInSeconds - Request timeout in seconds\n * @param options.maxRetries - Maximum number of retry attempts\n * @param options.abortSignal - Signal to abort the request\n *\n * @returns A Promise that resolves to the transcribed text\n *\n */\n async listen(input: NodeJS.ReadableStream, options?: ElevenLabsListenOptions): Promise<string> {\n const chunks: Buffer[] = [];\n for await (const chunk of input) {\n if (typeof chunk === 'string') {\n chunks.push(Buffer.from(chunk));\n } else {\n chunks.push(chunk);\n }\n }\n const buffer = Buffer.concat(chunks);\n\n const { language_code, tag_audio_events, num_speakers, filetype, ...requestOptions } = options || {};\n\n const file = new File([buffer], `audio.${filetype || 'mp3'}`);\n\n const transcription = await this.client.speechToText.convert(\n {\n file: file,\n model_id: this.listeningModel?.name as ElevenLabsModel,\n language_code,\n tag_audio_events,\n num_speakers,\n },\n requestOptions,\n );\n\n return transcription.text;\n }\n}\n"]}
1
+ {"version":3,"sources":["../src/index.ts"],"names":["MastraVoice","ElevenLabsClient"],"mappings":";;;;;;AAkCO,IAAM,eAAA,GAAN,cAA8BA,iBAAA,CAAY;AAAA,EACvC,MAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAWR,WAAA,CAAY;AAAA,IACV,WAAA;AAAA,IACA,cAAA;AAAA,IACA;AAAA,GACF,GAAuG,EAAC,EAAG;AACzG,IAAA,MAAM,MAAA,GAAS,WAAA,EAAa,MAAA,IAAU,OAAA,CAAQ,GAAA,CAAI,kBAAA;AAClD,IAAA,KAAA,CAAM;AAAA,MACJ,WAAA,EAAa;AAAA,QACX,IAAA,EAAM,aAAa,IAAA,IAAQ,wBAAA;AAAA,QAC3B,QAAQ,WAAA,EAAa;AAAA,OACvB;AAAA,MACA,cAAA,EAAgB;AAAA,QACd,IAAA,EAAM,gBAAgB,IAAA,IAAQ,WAAA;AAAA,QAC9B,QAAQ,cAAA,EAAgB;AAAA,OAC1B;AAAA,MACA;AAAA,KACD,CAAA;AAED,IAAA,IAAI,CAAC,MAAA,EAAQ;AACX,MAAA,MAAM,IAAI,MAAM,+BAA+B,CAAA;AAAA,IACjD;AAEA,IAAA,IAAA,CAAK,MAAA,GAAS,IAAIC,2BAAA,CAAiB;AAAA,MACjC;AAAA,KACD,CAAA;AAED,IAAA,IAAA,CAAK,UAAU,OAAA,IAAW,sBAAA;AAAA,EAC5B;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EASA,MAAM,WAAA,GAAc;AAClB,IAAA,MAAM,MAAA,GAAS,MAAM,IAAA,CAAK,MAAA,CAAO,OAAO,MAAA,EAAO;AAC/C,IAAA,OACE,MAAA,EAAQ,MAAA,EAAQ,GAAA,CAAI,CAAA,KAAA,MAAU;AAAA,MAC5B,SAAS,KAAA,CAAM,QAAA;AAAA,MACf,MAAM,KAAA,CAAM,IAAA;AAAA,MACZ,QAAA,EAAU,KAAA,CAAM,MAAA,EAAQ,QAAA,IAAY,IAAA;AAAA,MACpC,MAAA,EAAQ,KAAA,CAAM,MAAA,EAAQ,MAAA,IAAU;AAAA,KAClC,CAAE,KAAK,EAAC;AAAA,EAEZ;AAAA,EAEA,MAAc,eAAe,MAAA,EAAgD;AAC3E,IAAA,MAAM,SAAmB,EAAC;AAC1B,IAAA,WAAA,MAAiB,SAAS,MAAA,EAAQ;AAChC,MAAA,IAAI,OAAO,UAAU,QAAA,EAAU;AAC7B,QAAA,MAAA,CAAO,IAAA,CAAK,MAAA,CAAO,IAAA,CAAK,KAAK,CAAC,CAAA;AAAA,MAChC,CAAA,MAAO;AACL,QAAA,MAAA,CAAO,KAAK,KAAK,CAAA;AAAA,MACnB;AAAA,IACF;AACA,IAAA,OAAO,MAAA,CAAO,MAAA,CAAO,MAAM,CAAA,CAAE,SAAS,OAAO,CAAA;AAAA,EAC/C;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAcA,MAAM,KAAA,CACJ,KAAA,EACA,OAAA,EAKgC;AAChC,IAAA,MAAM,OAAA,GAAU,OAAA,EAAS,OAAA,IAAW,IAAA,CAAK,OAAA;AACzC,IAAA,IAAI,CAAC,OAAA,EAAS;AACZ,MAAA,MAAM,IAAI,MAAM,sBAAsB,CAAA;AAAA,IACxC;AAEA,IAAA,IAAI,CAAC,IAAA,CAAK,WAAA,EAAa,IAAA,EAAM;AAC3B,MAAA,MAAM,IAAI,MAAM,2BAA2B,CAAA;AAAA,IAC7C;AACA,IAAA,MAAM,IAAA,GAAO,OAAO,KAAA,KAAU,QAAA,GAAW,QAAQ,MAAM,IAAA,CAAK,eAAe,KAAK,CAAA;AAEhF,IAAA,OAAO,MAAM,IAAA,CAAK,MAAA,CAAO,QAAA,CAAS;AAAA,MAChC,IAAA;AAAA,MACA,KAAA,EAAO,OAAA;AAAA,MACP,QAAA,EAAU,KAAK,WAAA,EAAa,IAAA;AAAA,MAC5B,MAAA,EAAQ,IAAA;AAAA,MACR,GAAI,OAAA,EAAS,YAAA,IAAgB,EAAE,aAAA,EAAe,QAAQ,YAAA;AAAoB,KAC3E,CAAA;AAAA,EACH;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAOA,MAAM,WAAA,GAAc;AAClB,IAAA,OAAO,EAAE,SAAS,IAAA,EAAK;AAAA,EACzB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAkBA,MAAM,MAAA,CAAO,KAAA,EAA8B,OAAA,EAAoD;AAC7F,IAAA,MAAM,SAAmB,EAAC;AAC1B,IAAA,WAAA,MAAiB,SAAS,KAAA,EAAO;AAC/B,MAAA,IAAI,OAAO,UAAU,QAAA,EAAU;AAC7B,QAAA,MAAA,CAAO,IAAA,CAAK,MAAA,CAAO,IAAA,CAAK,KAAK,CAAC,CAAA;AAAA,MAChC,CAAA,MAAO;AACL,QAAA,MAAA,CAAO,KAAK,KAAK,CAAA;AAAA,MACnB;AAAA,IACF;AACA,IAAA,MAAM,MAAA,GAAS,MAAA,CAAO,MAAA,CAAO,MAAM,CAAA;AAEnC,IAAA,MAAM,EAAE,eAAe,gBAAA,EAAkB,YAAA,EAAc,UAAU,GAAG,cAAA,EAAe,GAAI,OAAA,IAAW,EAAC;AAEnG,IAAA,MAAM,IAAA,GAAO,IAAI,IAAA,CAAK,CAAC,MAAM,CAAA,EAAG,CAAA,MAAA,EAAS,QAAA,IAAY,KAAK,CAAA,CAAE,CAAA;AAE5D,IAAA,MAAM,aAAA,GAAgB,MAAM,IAAA,CAAK,MAAA,CAAO,YAAA,CAAa,OAAA;AAAA,MACnD;AAAA,QACE,IAAA;AAAA,QACA,QAAA,EAAU,KAAK,cAAA,EAAgB,IAAA;AAAA,QAC/B,aAAA;AAAA,QACA,gBAAA;AAAA,QACA;AAAA,OACF;AAAA,MACA;AAAA,KACF;AAEA,IAAA,OAAO,aAAA,CAAc,IAAA;AAAA,EACvB;AACF","file":"index.cjs","sourcesContent":["import { MastraVoice } from '@mastra/core/voice';\nimport { ElevenLabsClient } from 'elevenlabs';\n\ntype ElevenLabsModel =\n | 'eleven_multilingual_v2'\n | 'eleven_flash_v2_5'\n | 'eleven_flash_v2'\n | 'eleven_multilingual_sts_v2'\n | 'eleven_english_sts_v2'\n | 'scribe_v1';\n\ninterface ElevenLabsVoiceConfig {\n name?: ElevenLabsModel;\n apiKey?: string;\n}\n\ninterface SpeechToTextOptions {\n language_code?: string;\n tag_audio_events?: boolean;\n num_speakers?: number;\n filetype?: string;\n}\n\ninterface RequestOptions {\n timeoutInSeconds?: number;\n maxRetries?: number;\n abortSignal?: AbortSignal;\n apiKey?: string | undefined;\n headers?: Record<string, string>;\n}\n\n// Combined options type\ntype ElevenLabsListenOptions = SpeechToTextOptions & RequestOptions;\n\nexport class ElevenLabsVoice extends MastraVoice {\n private client: ElevenLabsClient;\n\n /**\n * Creates an instance of the ElevenLabsVoice class.\n *\n * @param {Object} options - The options for the voice configuration.\n * @param {ElevenLabsVoiceConfig} [options.speechModel] - The configuration for the speech model, including the model name and API key.\n * @param {string} [options.speaker] - The ID of the speaker to use. If not provided, a default speaker will be used.\n *\n * @throws {Error} If the ELEVENLABS_API_KEY is not set in the environment variables.\n */\n constructor({\n speechModel,\n listeningModel,\n speaker,\n }: { speechModel?: ElevenLabsVoiceConfig; listeningModel?: ElevenLabsVoiceConfig; speaker?: string } = {}) {\n const apiKey = speechModel?.apiKey ?? process.env.ELEVENLABS_API_KEY;\n super({\n speechModel: {\n name: speechModel?.name ?? 'eleven_multilingual_v2',\n apiKey: speechModel?.apiKey,\n },\n listeningModel: {\n name: listeningModel?.name ?? 'scribe_v1',\n apiKey: listeningModel?.apiKey,\n },\n speaker,\n });\n\n if (!apiKey) {\n throw new Error('ELEVENLABS_API_KEY is not set');\n }\n\n this.client = new ElevenLabsClient({\n apiKey,\n });\n\n this.speaker = speaker || '9BWtsMINqrJLrRacOk9x'; // Aria is the default speaker\n }\n\n /**\n * Retrieves a list of available speakers from the Eleven Labs API.\n * Each speaker includes their ID, name, language, and gender.\n *\n * @returns {Promise<Array<{ voiceId: string, name: string, language: string, gender: string }>>}\n * A promise that resolves to an array of speaker objects.\n */\n async getSpeakers() {\n const voices = await this.client.voices.getAll();\n return (\n voices?.voices?.map(voice => ({\n voiceId: voice.voice_id,\n name: voice.name,\n language: voice.labels?.language || 'en',\n gender: voice.labels?.gender || 'neutral',\n })) ?? []\n );\n }\n\n private async streamToString(stream: NodeJS.ReadableStream): Promise<string> {\n const chunks: Buffer[] = [];\n for await (const chunk of stream) {\n if (typeof chunk === 'string') {\n chunks.push(Buffer.from(chunk));\n } else {\n chunks.push(chunk);\n }\n }\n return Buffer.concat(chunks).toString('utf-8');\n }\n\n /**\n * Converts text or audio input into speech using the Eleven Labs API.\n *\n * @param {string | NodeJS.ReadableStream} input - The text to be converted to speech or a stream containing audio data.\n * @param {Object} [options] - Optional parameters for the speech generation.\n * @param {string} [options.speaker] - The ID of the speaker to use for the speech. If not provided, the default speaker will be used.\n * @param {string} [options.outputFormat] - The audio output format (e.g., 'ulaw_8000', 'pcm_16000', 'mp3_44100_128'). If not provided, defaults to ElevenLabs' default format.\n *\n * @returns {Promise<NodeJS.ReadableStream>} A promise that resolves to a readable stream of the generated speech.\n *\n * @throws {Error} If no speaker is specified or if no speech model is set.\n */\n async speak(\n input: string | NodeJS.ReadableStream,\n options?: {\n speaker?: string;\n outputFormat?: string;\n [key: string]: any;\n },\n ): Promise<NodeJS.ReadableStream> {\n const speaker = options?.speaker || this.speaker;\n if (!speaker) {\n throw new Error('No speaker specified');\n }\n\n if (!this.speechModel?.name) {\n throw new Error('No speech model specified');\n }\n const text = typeof input === 'string' ? input : await this.streamToString(input);\n\n return await this.client.generate({\n text,\n voice: speaker,\n model_id: this.speechModel?.name as ElevenLabsModel,\n stream: true,\n ...(options?.outputFormat && { output_format: options.outputFormat as any }),\n });\n }\n\n /**\n * Checks if listening capabilities are enabled.\n *\n * @returns {Promise<{ enabled: boolean }>}\n */\n async getListener() {\n return { enabled: true };\n }\n\n /**\n * Converts audio input to text using ElevenLabs Speech-to-Text API.\n *\n * @param input - A readable stream containing the audio data to transcribe\n * @param options - Configuration options for the transcription\n * @param options.language_code - ISO language code (e.g., 'en', 'fr', 'es')\n * @param options.tag_audio_events - Whether to tag audio events like [MUSIC], [LAUGHTER], etc.\n * @param options.num_speakers - Number of speakers to detect in the audio\n * @param options.filetype - Audio file format (e.g., 'mp3', 'wav', 'ogg')\n * @param options.timeoutInSeconds - Request timeout in seconds\n * @param options.maxRetries - Maximum number of retry attempts\n * @param options.abortSignal - Signal to abort the request\n *\n * @returns A Promise that resolves to the transcribed text\n *\n */\n async listen(input: NodeJS.ReadableStream, options?: ElevenLabsListenOptions): Promise<string> {\n const chunks: Buffer[] = [];\n for await (const chunk of input) {\n if (typeof chunk === 'string') {\n chunks.push(Buffer.from(chunk));\n } else {\n chunks.push(chunk);\n }\n }\n const buffer = Buffer.concat(chunks);\n\n const { language_code, tag_audio_events, num_speakers, filetype, ...requestOptions } = options || {};\n\n const file = new File([buffer], `audio.${filetype || 'mp3'}`);\n\n const transcription = await this.client.speechToText.convert(\n {\n file: file,\n model_id: this.listeningModel?.name as ElevenLabsModel,\n language_code,\n tag_audio_events,\n num_speakers,\n },\n requestOptions,\n );\n\n return transcription.text;\n }\n}\n"]}
package/dist/index.d.ts CHANGED
@@ -54,6 +54,7 @@ export declare class ElevenLabsVoice extends MastraVoice {
54
54
  * @param {string | NodeJS.ReadableStream} input - The text to be converted to speech or a stream containing audio data.
55
55
  * @param {Object} [options] - Optional parameters for the speech generation.
56
56
  * @param {string} [options.speaker] - The ID of the speaker to use for the speech. If not provided, the default speaker will be used.
57
+ * @param {string} [options.outputFormat] - The audio output format (e.g., 'ulaw_8000', 'pcm_16000', 'mp3_44100_128'). If not provided, defaults to ElevenLabs' default format.
57
58
  *
58
59
  * @returns {Promise<NodeJS.ReadableStream>} A promise that resolves to a readable stream of the generated speech.
59
60
  *
@@ -61,6 +62,8 @@ export declare class ElevenLabsVoice extends MastraVoice {
61
62
  */
62
63
  speak(input: string | NodeJS.ReadableStream, options?: {
63
64
  speaker?: string;
65
+ outputFormat?: string;
66
+ [key: string]: any;
64
67
  }): Promise<NodeJS.ReadableStream>;
65
68
  /**
66
69
  * Checks if listening capabilities are enabled.
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,oBAAoB,CAAC;AAGjD,KAAK,eAAe,GAChB,wBAAwB,GACxB,mBAAmB,GACnB,iBAAiB,GACjB,4BAA4B,GAC5B,uBAAuB,GACvB,WAAW,CAAC;AAEhB,UAAU,qBAAqB;IAC7B,IAAI,CAAC,EAAE,eAAe,CAAC;IACvB,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AAED,UAAU,mBAAmB;IAC3B,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,gBAAgB,CAAC,EAAE,OAAO,CAAC;IAC3B,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAED,UAAU,cAAc;IACtB,gBAAgB,CAAC,EAAE,MAAM,CAAC;IAC1B,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,WAAW,CAAC,EAAE,WAAW,CAAC;IAC1B,MAAM,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;IAC5B,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CAClC;AAGD,KAAK,uBAAuB,GAAG,mBAAmB,GAAG,cAAc,CAAC;AAEpE,qBAAa,eAAgB,SAAQ,WAAW;IAC9C,OAAO,CAAC,MAAM,CAAmB;IAEjC;;;;;;;;OAQG;gBACS,EACV,WAAW,EACX,cAAc,EACd,OAAO,GACR,GAAE;QAAE,WAAW,CAAC,EAAE,qBAAqB,CAAC;QAAC,cAAc,CAAC,EAAE,qBAAqB,CAAC;QAAC,OAAO,CAAC,EAAE,MAAM,CAAA;KAAO;IAyBzG;;;;;;OAMG;IACG,WAAW;;;;;;YAYH,cAAc;IAY5B;;;;;;;;;;OAUG;IACG,KAAK,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,CAAC,cAAc,EAAE,OAAO,CAAC,EAAE;QAAE,OAAO,CAAC,EAAE,MAAM,CAAA;KAAE,GAAG,OAAO,CAAC,MAAM,CAAC,cAAc,CAAC;IAmBlH;;;;OAIG;IACG,WAAW;;;IAIjB;;;;;;;;;;;;;;;OAeG;IACG,MAAM,CAAC,KAAK,EAAE,MAAM,CAAC,cAAc,EAAE,OAAO,CAAC,EAAE,uBAAuB,GAAG,OAAO,CAAC,MAAM,CAAC;CA4B/F"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,oBAAoB,CAAC;AAGjD,KAAK,eAAe,GAChB,wBAAwB,GACxB,mBAAmB,GACnB,iBAAiB,GACjB,4BAA4B,GAC5B,uBAAuB,GACvB,WAAW,CAAC;AAEhB,UAAU,qBAAqB;IAC7B,IAAI,CAAC,EAAE,eAAe,CAAC;IACvB,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AAED,UAAU,mBAAmB;IAC3B,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,gBAAgB,CAAC,EAAE,OAAO,CAAC;IAC3B,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAED,UAAU,cAAc;IACtB,gBAAgB,CAAC,EAAE,MAAM,CAAC;IAC1B,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,WAAW,CAAC,EAAE,WAAW,CAAC;IAC1B,MAAM,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;IAC5B,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CAClC;AAGD,KAAK,uBAAuB,GAAG,mBAAmB,GAAG,cAAc,CAAC;AAEpE,qBAAa,eAAgB,SAAQ,WAAW;IAC9C,OAAO,CAAC,MAAM,CAAmB;IAEjC;;;;;;;;OAQG;gBACS,EACV,WAAW,EACX,cAAc,EACd,OAAO,GACR,GAAE;QAAE,WAAW,CAAC,EAAE,qBAAqB,CAAC;QAAC,cAAc,CAAC,EAAE,qBAAqB,CAAC;QAAC,OAAO,CAAC,EAAE,MAAM,CAAA;KAAO;IAyBzG;;;;;;OAMG;IACG,WAAW;;;;;;YAYH,cAAc;IAY5B;;;;;;;;;;;OAWG;IACG,KAAK,CACT,KAAK,EAAE,MAAM,GAAG,MAAM,CAAC,cAAc,EACrC,OAAO,CAAC,EAAE;QACR,OAAO,CAAC,EAAE,MAAM,CAAC;QACjB,YAAY,CAAC,EAAE,MAAM,CAAC;QACtB,CAAC,GAAG,EAAE,MAAM,GAAG,GAAG,CAAC;KACpB,GACA,OAAO,CAAC,MAAM,CAAC,cAAc,CAAC;IAoBjC;;;;OAIG;IACG,WAAW;;;IAIjB;;;;;;;;;;;;;;;OAeG;IACG,MAAM,CAAC,KAAK,EAAE,MAAM,CAAC,cAAc,EAAE,OAAO,CAAC,EAAE,uBAAuB,GAAG,OAAO,CAAC,MAAM,CAAC;CA4B/F"}
package/dist/index.js CHANGED
@@ -71,6 +71,7 @@ var ElevenLabsVoice = class extends MastraVoice {
71
71
  * @param {string | NodeJS.ReadableStream} input - The text to be converted to speech or a stream containing audio data.
72
72
  * @param {Object} [options] - Optional parameters for the speech generation.
73
73
  * @param {string} [options.speaker] - The ID of the speaker to use for the speech. If not provided, the default speaker will be used.
74
+ * @param {string} [options.outputFormat] - The audio output format (e.g., 'ulaw_8000', 'pcm_16000', 'mp3_44100_128'). If not provided, defaults to ElevenLabs' default format.
74
75
  *
75
76
  * @returns {Promise<NodeJS.ReadableStream>} A promise that resolves to a readable stream of the generated speech.
76
77
  *
@@ -89,7 +90,8 @@ var ElevenLabsVoice = class extends MastraVoice {
89
90
  text,
90
91
  voice: speaker,
91
92
  model_id: this.speechModel?.name,
92
- stream: true
93
+ stream: true,
94
+ ...options?.outputFormat && { output_format: options.outputFormat }
93
95
  });
94
96
  }
95
97
  /**
package/dist/index.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/index.ts"],"names":[],"mappings":";;;;AAkCO,IAAM,eAAA,GAAN,cAA8B,WAAA,CAAY;AAAA,EACvC,MAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAWR,WAAA,CAAY;AAAA,IACV,WAAA;AAAA,IACA,cAAA;AAAA,IACA;AAAA,GACF,GAAuG,EAAC,EAAG;AACzG,IAAA,MAAM,MAAA,GAAS,WAAA,EAAa,MAAA,IAAU,OAAA,CAAQ,GAAA,CAAI,kBAAA;AAClD,IAAA,KAAA,CAAM;AAAA,MACJ,WAAA,EAAa;AAAA,QACX,IAAA,EAAM,aAAa,IAAA,IAAQ,wBAAA;AAAA,QAC3B,QAAQ,WAAA,EAAa;AAAA,OACvB;AAAA,MACA,cAAA,EAAgB;AAAA,QACd,IAAA,EAAM,gBAAgB,IAAA,IAAQ,WAAA;AAAA,QAC9B,QAAQ,cAAA,EAAgB;AAAA,OAC1B;AAAA,MACA;AAAA,KACD,CAAA;AAED,IAAA,IAAI,CAAC,MAAA,EAAQ;AACX,MAAA,MAAM,IAAI,MAAM,+BAA+B,CAAA;AAAA,IACjD;AAEA,IAAA,IAAA,CAAK,MAAA,GAAS,IAAI,gBAAA,CAAiB;AAAA,MACjC;AAAA,KACD,CAAA;AAED,IAAA,IAAA,CAAK,UAAU,OAAA,IAAW,sBAAA;AAAA,EAC5B;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EASA,MAAM,WAAA,GAAc;AAClB,IAAA,MAAM,MAAA,GAAS,MAAM,IAAA,CAAK,MAAA,CAAO,OAAO,MAAA,EAAO;AAC/C,IAAA,OACE,MAAA,EAAQ,MAAA,EAAQ,GAAA,CAAI,CAAA,KAAA,MAAU;AAAA,MAC5B,SAAS,KAAA,CAAM,QAAA;AAAA,MACf,MAAM,KAAA,CAAM,IAAA;AAAA,MACZ,QAAA,EAAU,KAAA,CAAM,MAAA,EAAQ,QAAA,IAAY,IAAA;AAAA,MACpC,MAAA,EAAQ,KAAA,CAAM,MAAA,EAAQ,MAAA,IAAU;AAAA,KAClC,CAAE,KAAK,EAAC;AAAA,EAEZ;AAAA,EAEA,MAAc,eAAe,MAAA,EAAgD;AAC3E,IAAA,MAAM,SAAmB,EAAC;AAC1B,IAAA,WAAA,MAAiB,SAAS,MAAA,EAAQ;AAChC,MAAA,IAAI,OAAO,UAAU,QAAA,EAAU;AAC7B,QAAA,MAAA,CAAO,IAAA,CAAK,MAAA,CAAO,IAAA,CAAK,KAAK,CAAC,CAAA;AAAA,MAChC,CAAA,MAAO;AACL,QAAA,MAAA,CAAO,KAAK,KAAK,CAAA;AAAA,MACnB;AAAA,IACF;AACA,IAAA,OAAO,MAAA,CAAO,MAAA,CAAO,MAAM,CAAA,CAAE,SAAS,OAAO,CAAA;AAAA,EAC/C;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAaA,MAAM,KAAA,CAAM,KAAA,EAAuC,OAAA,EAAgE;AACjH,IAAA,MAAM,OAAA,GAAU,OAAA,EAAS,OAAA,IAAW,IAAA,CAAK,OAAA;AACzC,IAAA,IAAI,CAAC,OAAA,EAAS;AACZ,MAAA,MAAM,IAAI,MAAM,sBAAsB,CAAA;AAAA,IACxC;AAEA,IAAA,IAAI,CAAC,IAAA,CAAK,WAAA,EAAa,IAAA,EAAM;AAC3B,MAAA,MAAM,IAAI,MAAM,2BAA2B,CAAA;AAAA,IAC7C;AACA,IAAA,MAAM,IAAA,GAAO,OAAO,KAAA,KAAU,QAAA,GAAW,QAAQ,MAAM,IAAA,CAAK,eAAe,KAAK,CAAA;AAEhF,IAAA,OAAO,MAAM,IAAA,CAAK,MAAA,CAAO,QAAA,CAAS;AAAA,MAChC,IAAA;AAAA,MACA,KAAA,EAAO,OAAA;AAAA,MACP,QAAA,EAAU,KAAK,WAAA,EAAa,IAAA;AAAA,MAC5B,MAAA,EAAQ;AAAA,KACT,CAAA;AAAA,EACH;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAOA,MAAM,WAAA,GAAc;AAClB,IAAA,OAAO,EAAE,SAAS,IAAA,EAAK;AAAA,EACzB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAkBA,MAAM,MAAA,CAAO,KAAA,EAA8B,OAAA,EAAoD;AAC7F,IAAA,MAAM,SAAmB,EAAC;AAC1B,IAAA,WAAA,MAAiB,SAAS,KAAA,EAAO;AAC/B,MAAA,IAAI,OAAO,UAAU,QAAA,EAAU;AAC7B,QAAA,MAAA,CAAO,IAAA,CAAK,MAAA,CAAO,IAAA,CAAK,KAAK,CAAC,CAAA;AAAA,MAChC,CAAA,MAAO;AACL,QAAA,MAAA,CAAO,KAAK,KAAK,CAAA;AAAA,MACnB;AAAA,IACF;AACA,IAAA,MAAM,MAAA,GAAS,MAAA,CAAO,MAAA,CAAO,MAAM,CAAA;AAEnC,IAAA,MAAM,EAAE,eAAe,gBAAA,EAAkB,YAAA,EAAc,UAAU,GAAG,cAAA,EAAe,GAAI,OAAA,IAAW,EAAC;AAEnG,IAAA,MAAM,IAAA,GAAO,IAAI,IAAA,CAAK,CAAC,MAAM,CAAA,EAAG,CAAA,MAAA,EAAS,QAAA,IAAY,KAAK,CAAA,CAAE,CAAA;AAE5D,IAAA,MAAM,aAAA,GAAgB,MAAM,IAAA,CAAK,MAAA,CAAO,YAAA,CAAa,OAAA;AAAA,MACnD;AAAA,QACE,IAAA;AAAA,QACA,QAAA,EAAU,KAAK,cAAA,EAAgB,IAAA;AAAA,QAC/B,aAAA;AAAA,QACA,gBAAA;AAAA,QACA;AAAA,OACF;AAAA,MACA;AAAA,KACF;AAEA,IAAA,OAAO,aAAA,CAAc,IAAA;AAAA,EACvB;AACF","file":"index.js","sourcesContent":["import { MastraVoice } from '@mastra/core/voice';\nimport { ElevenLabsClient } from 'elevenlabs';\n\ntype ElevenLabsModel =\n | 'eleven_multilingual_v2'\n | 'eleven_flash_v2_5'\n | 'eleven_flash_v2'\n | 'eleven_multilingual_sts_v2'\n | 'eleven_english_sts_v2'\n | 'scribe_v1';\n\ninterface ElevenLabsVoiceConfig {\n name?: ElevenLabsModel;\n apiKey?: string;\n}\n\ninterface SpeechToTextOptions {\n language_code?: string;\n tag_audio_events?: boolean;\n num_speakers?: number;\n filetype?: string;\n}\n\ninterface RequestOptions {\n timeoutInSeconds?: number;\n maxRetries?: number;\n abortSignal?: AbortSignal;\n apiKey?: string | undefined;\n headers?: Record<string, string>;\n}\n\n// Combined options type\ntype ElevenLabsListenOptions = SpeechToTextOptions & RequestOptions;\n\nexport class ElevenLabsVoice extends MastraVoice {\n private client: ElevenLabsClient;\n\n /**\n * Creates an instance of the ElevenLabsVoice class.\n *\n * @param {Object} options - The options for the voice configuration.\n * @param {ElevenLabsVoiceConfig} [options.speechModel] - The configuration for the speech model, including the model name and API key.\n * @param {string} [options.speaker] - The ID of the speaker to use. If not provided, a default speaker will be used.\n *\n * @throws {Error} If the ELEVENLABS_API_KEY is not set in the environment variables.\n */\n constructor({\n speechModel,\n listeningModel,\n speaker,\n }: { speechModel?: ElevenLabsVoiceConfig; listeningModel?: ElevenLabsVoiceConfig; speaker?: string } = {}) {\n const apiKey = speechModel?.apiKey ?? process.env.ELEVENLABS_API_KEY;\n super({\n speechModel: {\n name: speechModel?.name ?? 'eleven_multilingual_v2',\n apiKey: speechModel?.apiKey,\n },\n listeningModel: {\n name: listeningModel?.name ?? 'scribe_v1',\n apiKey: listeningModel?.apiKey,\n },\n speaker,\n });\n\n if (!apiKey) {\n throw new Error('ELEVENLABS_API_KEY is not set');\n }\n\n this.client = new ElevenLabsClient({\n apiKey,\n });\n\n this.speaker = speaker || '9BWtsMINqrJLrRacOk9x'; // Aria is the default speaker\n }\n\n /**\n * Retrieves a list of available speakers from the Eleven Labs API.\n * Each speaker includes their ID, name, language, and gender.\n *\n * @returns {Promise<Array<{ voiceId: string, name: string, language: string, gender: string }>>}\n * A promise that resolves to an array of speaker objects.\n */\n async getSpeakers() {\n const voices = await this.client.voices.getAll();\n return (\n voices?.voices?.map(voice => ({\n voiceId: voice.voice_id,\n name: voice.name,\n language: voice.labels?.language || 'en',\n gender: voice.labels?.gender || 'neutral',\n })) ?? []\n );\n }\n\n private async streamToString(stream: NodeJS.ReadableStream): Promise<string> {\n const chunks: Buffer[] = [];\n for await (const chunk of stream) {\n if (typeof chunk === 'string') {\n chunks.push(Buffer.from(chunk));\n } else {\n chunks.push(chunk);\n }\n }\n return Buffer.concat(chunks).toString('utf-8');\n }\n\n /**\n * Converts text or audio input into speech using the Eleven Labs API.\n *\n * @param {string | NodeJS.ReadableStream} input - The text to be converted to speech or a stream containing audio data.\n * @param {Object} [options] - Optional parameters for the speech generation.\n * @param {string} [options.speaker] - The ID of the speaker to use for the speech. If not provided, the default speaker will be used.\n *\n * @returns {Promise<NodeJS.ReadableStream>} A promise that resolves to a readable stream of the generated speech.\n *\n * @throws {Error} If no speaker is specified or if no speech model is set.\n */\n async speak(input: string | NodeJS.ReadableStream, options?: { speaker?: string }): Promise<NodeJS.ReadableStream> {\n const speaker = options?.speaker || this.speaker;\n if (!speaker) {\n throw new Error('No speaker specified');\n }\n\n if (!this.speechModel?.name) {\n throw new Error('No speech model specified');\n }\n const text = typeof input === 'string' ? input : await this.streamToString(input);\n\n return await this.client.generate({\n text,\n voice: speaker,\n model_id: this.speechModel?.name as ElevenLabsModel,\n stream: true,\n });\n }\n\n /**\n * Checks if listening capabilities are enabled.\n *\n * @returns {Promise<{ enabled: boolean }>}\n */\n async getListener() {\n return { enabled: true };\n }\n\n /**\n * Converts audio input to text using ElevenLabs Speech-to-Text API.\n *\n * @param input - A readable stream containing the audio data to transcribe\n * @param options - Configuration options for the transcription\n * @param options.language_code - ISO language code (e.g., 'en', 'fr', 'es')\n * @param options.tag_audio_events - Whether to tag audio events like [MUSIC], [LAUGHTER], etc.\n * @param options.num_speakers - Number of speakers to detect in the audio\n * @param options.filetype - Audio file format (e.g., 'mp3', 'wav', 'ogg')\n * @param options.timeoutInSeconds - Request timeout in seconds\n * @param options.maxRetries - Maximum number of retry attempts\n * @param options.abortSignal - Signal to abort the request\n *\n * @returns A Promise that resolves to the transcribed text\n *\n */\n async listen(input: NodeJS.ReadableStream, options?: ElevenLabsListenOptions): Promise<string> {\n const chunks: Buffer[] = [];\n for await (const chunk of input) {\n if (typeof chunk === 'string') {\n chunks.push(Buffer.from(chunk));\n } else {\n chunks.push(chunk);\n }\n }\n const buffer = Buffer.concat(chunks);\n\n const { language_code, tag_audio_events, num_speakers, filetype, ...requestOptions } = options || {};\n\n const file = new File([buffer], `audio.${filetype || 'mp3'}`);\n\n const transcription = await this.client.speechToText.convert(\n {\n file: file,\n model_id: this.listeningModel?.name as ElevenLabsModel,\n language_code,\n tag_audio_events,\n num_speakers,\n },\n requestOptions,\n );\n\n return transcription.text;\n }\n}\n"]}
1
+ {"version":3,"sources":["../src/index.ts"],"names":[],"mappings":";;;;AAkCO,IAAM,eAAA,GAAN,cAA8B,WAAA,CAAY;AAAA,EACvC,MAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAWR,WAAA,CAAY;AAAA,IACV,WAAA;AAAA,IACA,cAAA;AAAA,IACA;AAAA,GACF,GAAuG,EAAC,EAAG;AACzG,IAAA,MAAM,MAAA,GAAS,WAAA,EAAa,MAAA,IAAU,OAAA,CAAQ,GAAA,CAAI,kBAAA;AAClD,IAAA,KAAA,CAAM;AAAA,MACJ,WAAA,EAAa;AAAA,QACX,IAAA,EAAM,aAAa,IAAA,IAAQ,wBAAA;AAAA,QAC3B,QAAQ,WAAA,EAAa;AAAA,OACvB;AAAA,MACA,cAAA,EAAgB;AAAA,QACd,IAAA,EAAM,gBAAgB,IAAA,IAAQ,WAAA;AAAA,QAC9B,QAAQ,cAAA,EAAgB;AAAA,OAC1B;AAAA,MACA;AAAA,KACD,CAAA;AAED,IAAA,IAAI,CAAC,MAAA,EAAQ;AACX,MAAA,MAAM,IAAI,MAAM,+BAA+B,CAAA;AAAA,IACjD;AAEA,IAAA,IAAA,CAAK,MAAA,GAAS,IAAI,gBAAA,CAAiB;AAAA,MACjC;AAAA,KACD,CAAA;AAED,IAAA,IAAA,CAAK,UAAU,OAAA,IAAW,sBAAA;AAAA,EAC5B;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EASA,MAAM,WAAA,GAAc;AAClB,IAAA,MAAM,MAAA,GAAS,MAAM,IAAA,CAAK,MAAA,CAAO,OAAO,MAAA,EAAO;AAC/C,IAAA,OACE,MAAA,EAAQ,MAAA,EAAQ,GAAA,CAAI,CAAA,KAAA,MAAU;AAAA,MAC5B,SAAS,KAAA,CAAM,QAAA;AAAA,MACf,MAAM,KAAA,CAAM,IAAA;AAAA,MACZ,QAAA,EAAU,KAAA,CAAM,MAAA,EAAQ,QAAA,IAAY,IAAA;AAAA,MACpC,MAAA,EAAQ,KAAA,CAAM,MAAA,EAAQ,MAAA,IAAU;AAAA,KAClC,CAAE,KAAK,EAAC;AAAA,EAEZ;AAAA,EAEA,MAAc,eAAe,MAAA,EAAgD;AAC3E,IAAA,MAAM,SAAmB,EAAC;AAC1B,IAAA,WAAA,MAAiB,SAAS,MAAA,EAAQ;AAChC,MAAA,IAAI,OAAO,UAAU,QAAA,EAAU;AAC7B,QAAA,MAAA,CAAO,IAAA,CAAK,MAAA,CAAO,IAAA,CAAK,KAAK,CAAC,CAAA;AAAA,MAChC,CAAA,MAAO;AACL,QAAA,MAAA,CAAO,KAAK,KAAK,CAAA;AAAA,MACnB;AAAA,IACF;AACA,IAAA,OAAO,MAAA,CAAO,MAAA,CAAO,MAAM,CAAA,CAAE,SAAS,OAAO,CAAA;AAAA,EAC/C;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAcA,MAAM,KAAA,CACJ,KAAA,EACA,OAAA,EAKgC;AAChC,IAAA,MAAM,OAAA,GAAU,OAAA,EAAS,OAAA,IAAW,IAAA,CAAK,OAAA;AACzC,IAAA,IAAI,CAAC,OAAA,EAAS;AACZ,MAAA,MAAM,IAAI,MAAM,sBAAsB,CAAA;AAAA,IACxC;AAEA,IAAA,IAAI,CAAC,IAAA,CAAK,WAAA,EAAa,IAAA,EAAM;AAC3B,MAAA,MAAM,IAAI,MAAM,2BAA2B,CAAA;AAAA,IAC7C;AACA,IAAA,MAAM,IAAA,GAAO,OAAO,KAAA,KAAU,QAAA,GAAW,QAAQ,MAAM,IAAA,CAAK,eAAe,KAAK,CAAA;AAEhF,IAAA,OAAO,MAAM,IAAA,CAAK,MAAA,CAAO,QAAA,CAAS;AAAA,MAChC,IAAA;AAAA,MACA,KAAA,EAAO,OAAA;AAAA,MACP,QAAA,EAAU,KAAK,WAAA,EAAa,IAAA;AAAA,MAC5B,MAAA,EAAQ,IAAA;AAAA,MACR,GAAI,OAAA,EAAS,YAAA,IAAgB,EAAE,aAAA,EAAe,QAAQ,YAAA;AAAoB,KAC3E,CAAA;AAAA,EACH;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAOA,MAAM,WAAA,GAAc;AAClB,IAAA,OAAO,EAAE,SAAS,IAAA,EAAK;AAAA,EACzB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAkBA,MAAM,MAAA,CAAO,KAAA,EAA8B,OAAA,EAAoD;AAC7F,IAAA,MAAM,SAAmB,EAAC;AAC1B,IAAA,WAAA,MAAiB,SAAS,KAAA,EAAO;AAC/B,MAAA,IAAI,OAAO,UAAU,QAAA,EAAU;AAC7B,QAAA,MAAA,CAAO,IAAA,CAAK,MAAA,CAAO,IAAA,CAAK,KAAK,CAAC,CAAA;AAAA,MAChC,CAAA,MAAO;AACL,QAAA,MAAA,CAAO,KAAK,KAAK,CAAA;AAAA,MACnB;AAAA,IACF;AACA,IAAA,MAAM,MAAA,GAAS,MAAA,CAAO,MAAA,CAAO,MAAM,CAAA;AAEnC,IAAA,MAAM,EAAE,eAAe,gBAAA,EAAkB,YAAA,EAAc,UAAU,GAAG,cAAA,EAAe,GAAI,OAAA,IAAW,EAAC;AAEnG,IAAA,MAAM,IAAA,GAAO,IAAI,IAAA,CAAK,CAAC,MAAM,CAAA,EAAG,CAAA,MAAA,EAAS,QAAA,IAAY,KAAK,CAAA,CAAE,CAAA;AAE5D,IAAA,MAAM,aAAA,GAAgB,MAAM,IAAA,CAAK,MAAA,CAAO,YAAA,CAAa,OAAA;AAAA,MACnD;AAAA,QACE,IAAA;AAAA,QACA,QAAA,EAAU,KAAK,cAAA,EAAgB,IAAA;AAAA,QAC/B,aAAA;AAAA,QACA,gBAAA;AAAA,QACA;AAAA,OACF;AAAA,MACA;AAAA,KACF;AAEA,IAAA,OAAO,aAAA,CAAc,IAAA;AAAA,EACvB;AACF","file":"index.js","sourcesContent":["import { MastraVoice } from '@mastra/core/voice';\nimport { ElevenLabsClient } from 'elevenlabs';\n\ntype ElevenLabsModel =\n | 'eleven_multilingual_v2'\n | 'eleven_flash_v2_5'\n | 'eleven_flash_v2'\n | 'eleven_multilingual_sts_v2'\n | 'eleven_english_sts_v2'\n | 'scribe_v1';\n\ninterface ElevenLabsVoiceConfig {\n name?: ElevenLabsModel;\n apiKey?: string;\n}\n\ninterface SpeechToTextOptions {\n language_code?: string;\n tag_audio_events?: boolean;\n num_speakers?: number;\n filetype?: string;\n}\n\ninterface RequestOptions {\n timeoutInSeconds?: number;\n maxRetries?: number;\n abortSignal?: AbortSignal;\n apiKey?: string | undefined;\n headers?: Record<string, string>;\n}\n\n// Combined options type\ntype ElevenLabsListenOptions = SpeechToTextOptions & RequestOptions;\n\nexport class ElevenLabsVoice extends MastraVoice {\n private client: ElevenLabsClient;\n\n /**\n * Creates an instance of the ElevenLabsVoice class.\n *\n * @param {Object} options - The options for the voice configuration.\n * @param {ElevenLabsVoiceConfig} [options.speechModel] - The configuration for the speech model, including the model name and API key.\n * @param {string} [options.speaker] - The ID of the speaker to use. If not provided, a default speaker will be used.\n *\n * @throws {Error} If the ELEVENLABS_API_KEY is not set in the environment variables.\n */\n constructor({\n speechModel,\n listeningModel,\n speaker,\n }: { speechModel?: ElevenLabsVoiceConfig; listeningModel?: ElevenLabsVoiceConfig; speaker?: string } = {}) {\n const apiKey = speechModel?.apiKey ?? process.env.ELEVENLABS_API_KEY;\n super({\n speechModel: {\n name: speechModel?.name ?? 'eleven_multilingual_v2',\n apiKey: speechModel?.apiKey,\n },\n listeningModel: {\n name: listeningModel?.name ?? 'scribe_v1',\n apiKey: listeningModel?.apiKey,\n },\n speaker,\n });\n\n if (!apiKey) {\n throw new Error('ELEVENLABS_API_KEY is not set');\n }\n\n this.client = new ElevenLabsClient({\n apiKey,\n });\n\n this.speaker = speaker || '9BWtsMINqrJLrRacOk9x'; // Aria is the default speaker\n }\n\n /**\n * Retrieves a list of available speakers from the Eleven Labs API.\n * Each speaker includes their ID, name, language, and gender.\n *\n * @returns {Promise<Array<{ voiceId: string, name: string, language: string, gender: string }>>}\n * A promise that resolves to an array of speaker objects.\n */\n async getSpeakers() {\n const voices = await this.client.voices.getAll();\n return (\n voices?.voices?.map(voice => ({\n voiceId: voice.voice_id,\n name: voice.name,\n language: voice.labels?.language || 'en',\n gender: voice.labels?.gender || 'neutral',\n })) ?? []\n );\n }\n\n private async streamToString(stream: NodeJS.ReadableStream): Promise<string> {\n const chunks: Buffer[] = [];\n for await (const chunk of stream) {\n if (typeof chunk === 'string') {\n chunks.push(Buffer.from(chunk));\n } else {\n chunks.push(chunk);\n }\n }\n return Buffer.concat(chunks).toString('utf-8');\n }\n\n /**\n * Converts text or audio input into speech using the Eleven Labs API.\n *\n * @param {string | NodeJS.ReadableStream} input - The text to be converted to speech or a stream containing audio data.\n * @param {Object} [options] - Optional parameters for the speech generation.\n * @param {string} [options.speaker] - The ID of the speaker to use for the speech. If not provided, the default speaker will be used.\n * @param {string} [options.outputFormat] - The audio output format (e.g., 'ulaw_8000', 'pcm_16000', 'mp3_44100_128'). If not provided, defaults to ElevenLabs' default format.\n *\n * @returns {Promise<NodeJS.ReadableStream>} A promise that resolves to a readable stream of the generated speech.\n *\n * @throws {Error} If no speaker is specified or if no speech model is set.\n */\n async speak(\n input: string | NodeJS.ReadableStream,\n options?: {\n speaker?: string;\n outputFormat?: string;\n [key: string]: any;\n },\n ): Promise<NodeJS.ReadableStream> {\n const speaker = options?.speaker || this.speaker;\n if (!speaker) {\n throw new Error('No speaker specified');\n }\n\n if (!this.speechModel?.name) {\n throw new Error('No speech model specified');\n }\n const text = typeof input === 'string' ? input : await this.streamToString(input);\n\n return await this.client.generate({\n text,\n voice: speaker,\n model_id: this.speechModel?.name as ElevenLabsModel,\n stream: true,\n ...(options?.outputFormat && { output_format: options.outputFormat as any }),\n });\n }\n\n /**\n * Checks if listening capabilities are enabled.\n *\n * @returns {Promise<{ enabled: boolean }>}\n */\n async getListener() {\n return { enabled: true };\n }\n\n /**\n * Converts audio input to text using ElevenLabs Speech-to-Text API.\n *\n * @param input - A readable stream containing the audio data to transcribe\n * @param options - Configuration options for the transcription\n * @param options.language_code - ISO language code (e.g., 'en', 'fr', 'es')\n * @param options.tag_audio_events - Whether to tag audio events like [MUSIC], [LAUGHTER], etc.\n * @param options.num_speakers - Number of speakers to detect in the audio\n * @param options.filetype - Audio file format (e.g., 'mp3', 'wav', 'ogg')\n * @param options.timeoutInSeconds - Request timeout in seconds\n * @param options.maxRetries - Maximum number of retry attempts\n * @param options.abortSignal - Signal to abort the request\n *\n * @returns A Promise that resolves to the transcribed text\n *\n */\n async listen(input: NodeJS.ReadableStream, options?: ElevenLabsListenOptions): Promise<string> {\n const chunks: Buffer[] = [];\n for await (const chunk of input) {\n if (typeof chunk === 'string') {\n chunks.push(Buffer.from(chunk));\n } else {\n chunks.push(chunk);\n }\n }\n const buffer = Buffer.concat(chunks);\n\n const { language_code, tag_audio_events, num_speakers, filetype, ...requestOptions } = options || {};\n\n const file = new File([buffer], `audio.${filetype || 'mp3'}`);\n\n const transcription = await this.client.speechToText.convert(\n {\n file: file,\n model_id: this.listeningModel?.name as ElevenLabsModel,\n language_code,\n tag_audio_events,\n num_speakers,\n },\n requestOptions,\n );\n\n return transcription.text;\n }\n}\n"]}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mastra/voice-elevenlabs",
3
- "version": "0.12.0-beta.0",
3
+ "version": "0.12.0-beta.2",
4
4
  "description": "Mastra ElevenLabs voice integration",
5
5
  "type": "module",
6
6
  "files": [
@@ -27,15 +27,16 @@
27
27
  "elevenlabs": "^1.59.0"
28
28
  },
29
29
  "devDependencies": {
30
- "@microsoft/api-extractor": "^7.52.8",
31
- "@types/node": "^20.19.0",
30
+ "@types/node": "22.13.17",
31
+ "@vitest/coverage-v8": "4.0.12",
32
+ "@vitest/ui": "4.0.12",
32
33
  "eslint": "^9.37.0",
33
34
  "tsup": "^8.5.0",
34
- "typescript": "^5.8.3",
35
- "vitest": "^3.2.4",
35
+ "typescript": "^5.9.3",
36
+ "vitest": "4.0.16",
37
+ "@internal/types-builder": "0.0.28",
36
38
  "@internal/lint": "0.0.53",
37
- "@mastra/core": "1.0.0-beta.0",
38
- "@internal/types-builder": "0.0.28"
39
+ "@mastra/core": "1.0.0-beta.20"
39
40
  },
40
41
  "peerDependencies": {
41
42
  "@mastra/core": ">=1.0.0-0 <2.0.0-0",
@@ -55,6 +56,7 @@
55
56
  },
56
57
  "scripts": {
57
58
  "build": "tsup --silent --config tsup.config.ts",
59
+ "postbuild": "pnpx tsx ../../scripts/generate-package-docs.ts voice/elevenlabs",
58
60
  "build:watch": "tsup build --watch && tsc -p tsconfig.build.json",
59
61
  "test": "vitest run",
60
62
  "lint": "eslint ."