@mastra/voice-openai 0.12.0-beta.1 → 0.12.0-beta.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,568 @@
1
+ # Voice API Reference
2
+
3
+ > API reference for voice - 5 entries
4
+
5
+
6
+ ---
7
+
8
+ ## Reference: CompositeVoice
9
+
10
+ > Documentation for the CompositeVoice class, which enables combining multiple voice providers for flexible text-to-speech and speech-to-text operations.
11
+
12
+ The CompositeVoice class allows you to combine different voice providers for text-to-speech and speech-to-text operations. This is particularly useful when you want to use the best provider for each operation - for example, using OpenAI for speech-to-text and PlayAI for text-to-speech.
13
+
14
+ CompositeVoice supports both Mastra voice providers and AI SDK model providers
15
+
16
+ ## Constructor Parameters
17
+
18
+ ## Methods
19
+
20
+ ### speak()
21
+
22
+ Converts text to speech using the configured speaking provider.
23
+
24
+ Notes:
25
+
26
+ - If no speaking provider is configured, this method will throw an error
27
+ - Options are passed through to the configured speaking provider
28
+ - Returns a stream of audio data
29
+
30
+ ### listen()
31
+
32
+ Converts speech to text using the configured listening provider.
33
+
34
+ Notes:
35
+
36
+ - If no listening provider is configured, this method will throw an error
37
+ - Options are passed through to the configured listening provider
38
+ - Returns either a string or a stream of transcribed text, depending on the provider
39
+
40
+ ### getSpeakers()
41
+
42
+ Returns a list of available voices from the speaking provider, where each node contains:
43
+
44
+ Notes:
45
+
46
+ - Returns voices from the speaking provider only
47
+ - If no speaking provider is configured, returns an empty array
48
+ - Each voice object will have at least a voiceId property
49
+ - Additional voice properties depend on the speaking provider
50
+
51
+ ## Usage Examples
52
+
53
+ ### Using Mastra Voice Providers
54
+
55
+ ```typescript
56
+ import { CompositeVoice } from "@mastra/core/voice";
57
+ import { OpenAIVoice } from "@mastra/voice-openai";
58
+ import { PlayAIVoice } from "@mastra/voice-playai";
59
+
60
+ // Create voice providers
61
+ const openai = new OpenAIVoice();
62
+ const playai = new PlayAIVoice();
63
+
64
+ // Use OpenAI for listening (speech-to-text) and PlayAI for speaking (text-to-speech)
65
+ const voice = new CompositeVoice({
66
+ input: openai,
67
+ output: playai,
68
+ });
69
+
70
+ // Convert speech to text using OpenAI
71
+ const text = await voice.listen(audioStream);
72
+
73
+ // Convert text to speech using PlayAI
74
+ const audio = await voice.speak("Hello, world!");
75
+ ```
76
+
77
+ ### Using AI SDK Model Providers
78
+
79
+ You can pass AI SDK transcription and speech models directly to CompositeVoice:
80
+
81
+ ```typescript
82
+ import { CompositeVoice } from "@mastra/core/voice";
83
+ import { openai } from "@ai-sdk/openai";
84
+ import { elevenlabs } from "@ai-sdk/elevenlabs";
85
+
86
+ // Use AI SDK models directly - they will be auto-wrapped
87
+ const voice = new CompositeVoice({
88
+ input: openai.transcription('whisper-1'), // AI SDK transcription
89
+ output: elevenlabs.speech('eleven_turbo_v2'), // AI SDK speech
90
+ });
91
+
92
+ // Works the same way as with Mastra providers
93
+ const text = await voice.listen(audioStream);
94
+ const audio = await voice.speak("Hello from AI SDK!");
95
+ ```
96
+
97
+ ### Mix and Match
98
+
99
+ You can combine Mastra providers with AI SDK models:
100
+
101
+ ```typescript
102
+ import { CompositeVoice } from "@mastra/core/voice";
103
+ import { PlayAIVoice } from "@mastra/voice-playai";
104
+ import { groq } from "@ai-sdk/groq";
105
+
106
+ const voice = new CompositeVoice({
107
+ input: groq.transcription('whisper-large-v3'), // AI SDK for STT
108
+ output: new PlayAIVoice(), // Mastra for TTS
109
+ });
110
+ ```
111
+
112
+ ---
113
+
114
+ ## Reference: OpenAI
115
+
116
+ > Documentation for the OpenAIVoice class, providing text-to-speech and speech-to-text capabilities.
117
+
118
+ The OpenAIVoice class in Mastra provides text-to-speech and speech-to-text capabilities using OpenAI's models.
119
+
120
+ ## Usage Example
121
+
122
+ ```typescript
123
+ import { OpenAIVoice } from "@mastra/voice-openai";
124
+
125
+ // Initialize with default configuration using environment variables
126
+ const voice = new OpenAIVoice();
127
+
128
+ // Or initialize with specific configuration
129
+ const voiceWithConfig = new OpenAIVoice({
130
+ speechModel: {
131
+ name: "tts-1-hd",
132
+ apiKey: "your-openai-api-key",
133
+ },
134
+ listeningModel: {
135
+ name: "whisper-1",
136
+ apiKey: "your-openai-api-key",
137
+ },
138
+ speaker: "alloy", // Default voice
139
+ });
140
+
141
+ // Convert text to speech
142
+ const audioStream = await voice.speak("Hello, how can I help you?", {
143
+ speaker: "nova", // Override default voice
144
+ speed: 1.2, // Adjust speech speed
145
+ });
146
+
147
+ // Convert speech to text
148
+ const text = await voice.listen(audioStream, {
149
+ filetype: "mp3",
150
+ });
151
+ ```
152
+
153
+ ## Configuration
154
+
155
+ ### Constructor Options
156
+
157
+ ### OpenAIConfig
158
+
159
+ ## Methods
160
+
161
+ ### speak()
162
+
163
+ Converts text to speech using OpenAI's text-to-speech models.
164
+
165
+ Returns: `Promise<NodeJS.ReadableStream>`
166
+
167
+ ### listen()
168
+
169
+ Transcribes audio using OpenAI's Whisper model.
170
+
171
+ Returns: `Promise<string>`
172
+
173
+ ### getSpeakers()
174
+
175
+ Returns an array of available voice options, where each node contains:
176
+
177
+ ## Notes
178
+
179
+ - API keys can be provided via constructor options or the `OPENAI_API_KEY` environment variable
180
+ - The `tts-1-hd` model provides higher quality audio but may have slower processing times
181
+ - Speech recognition supports multiple audio formats including mp3, wav, and webm
182
+
183
+ ---
184
+
185
+ ## Reference: voice.getSpeakers()
186
+
187
+ > Documentation for the getSpeakers() method available in voice providers, which retrieves available voice options.
188
+
189
+ The `getSpeakers()` method retrieves a list of available voice options (speakers) from the voice provider. This allows applications to present users with voice choices or programmatically select the most appropriate voice for different contexts.
190
+
191
+ ## Usage Example
192
+
193
+ ```typescript
194
+ import { OpenAIVoice } from "@mastra/voice-openai";
195
+ import { ElevenLabsVoice } from "@mastra/voice-elevenlabs";
196
+
197
+ // Initialize voice providers
198
+ const openaiVoice = new OpenAIVoice();
199
+ const elevenLabsVoice = new ElevenLabsVoice({
200
+ apiKey: process.env.ELEVENLABS_API_KEY,
201
+ });
202
+
203
+ // Get available speakers from OpenAI
204
+ const openaiSpeakers = await openaiVoice.getSpeakers();
205
+ console.log("OpenAI voices:", openaiSpeakers);
206
+ // Example output: [{ voiceId: "alloy" }, { voiceId: "echo" }, { voiceId: "fable" }, ...]
207
+
208
+ // Get available speakers from ElevenLabs
209
+ const elevenLabsSpeakers = await elevenLabsVoice.getSpeakers();
210
+ console.log("ElevenLabs voices:", elevenLabsSpeakers);
211
+ // Example output: [{ voiceId: "21m00Tcm4TlvDq8ikWAM", name: "Rachel" }, ...]
212
+
213
+ // Use a specific voice for speech
214
+ const text = "Hello, this is a test of different voices.";
215
+ await openaiVoice.speak(text, { speaker: openaiSpeakers[2].voiceId });
216
+ await elevenLabsVoice.speak(text, { speaker: elevenLabsSpeakers[0].voiceId });
217
+ ```
218
+
219
+ ## Parameters
220
+
221
+ This method does not accept any parameters.
222
+
223
+ ## Return Value
224
+
225
+ ## Provider-Specific Metadata
226
+
227
+ Different voice providers return different metadata for their voices:
228
+
229
+ **OpenAI:**
230
+
231
+
232
+
233
+
234
+ **OpenAI Realtime:**
235
+
236
+
237
+
238
+ **Deepgram:**
239
+
240
+
241
+
242
+ **ElevenLabs:**
243
+
244
+
245
+
246
+ **Google:**
247
+
248
+
249
+
250
+ **Azure:**
251
+
252
+
253
+
254
+ **Murf:**
255
+
256
+
257
+
258
+ **PlayAI:**
259
+
260
+
261
+
262
+ **Speechify:**
263
+
264
+
265
+
266
+ **Sarvam:**
267
+
268
+
269
+
270
+
271
+ ## Notes
272
+
273
+ - The available voices vary significantly between providers
274
+ - Some providers may require authentication to retrieve the full list of voices
275
+ - The default implementation returns an empty array if the provider doesn't support this method
276
+ - For performance reasons, consider caching the results if you need to display the list frequently
277
+ - The `voiceId` property is guaranteed to be present for all providers, but additional metadata varies
278
+
279
+ ---
280
+
281
+ ## Reference: voice.listen()
282
+
283
+ > Documentation for the listen() method available in all Mastra voice providers, which converts speech to text.
284
+
285
+ The `listen()` method is a core function available in all Mastra voice providers that converts speech to text. It takes an audio stream as input and returns the transcribed text.
286
+
287
+ ## Parameters
288
+
289
+ ## Return Value
290
+
291
+ Returns one of the following:
292
+
293
+ - `Promise<string>`: A promise that resolves to the transcribed text
294
+ - `Promise<NodeJS.ReadableStream>`: A promise that resolves to a stream of transcribed text (for streaming transcription)
295
+ - `Promise<void>`: For real-time providers that emit 'writing' events instead of returning text directly
296
+
297
+ ## Provider-Specific Options
298
+
299
+ Each voice provider may support additional options specific to their implementation. Here are some examples:
300
+
301
+ ### OpenAI
302
+
303
+ ### Google
304
+
305
+ ### Deepgram
306
+
307
+ ## Usage Example
308
+
309
+ ```typescript
310
+ import { OpenAIVoice } from "@mastra/voice-openai";
311
+ import { getMicrophoneStream } from "@mastra/node-audio";
312
+ import { createReadStream } from "fs";
313
+ import path from "path";
314
+
315
+ // Initialize a voice provider
316
+ const voice = new OpenAIVoice({
317
+ listeningModel: {
318
+ name: "whisper-1",
319
+ apiKey: process.env.OPENAI_API_KEY,
320
+ },
321
+ });
322
+
323
+ // Basic usage with a file stream
324
+ const audioFilePath = path.join(process.cwd(), "audio.mp3");
325
+ const audioStream = createReadStream(audioFilePath);
326
+ const transcript = await voice.listen(audioStream, {
327
+ filetype: "mp3",
328
+ });
329
+ console.log("Transcribed text:", transcript);
330
+
331
+ // Using a microphone stream
332
+ const microphoneStream = getMicrophoneStream(); // Assume this function gets audio input
333
+ const transcription = await voice.listen(microphoneStream);
334
+
335
+ // With provider-specific options
336
+ const transcriptWithOptions = await voice.listen(audioStream, {
337
+ language: "en",
338
+ prompt: "This is a conversation about artificial intelligence.",
339
+ });
340
+ ```
341
+
342
+ ## Using with CompositeVoice
343
+
344
+ When using `CompositeVoice`, the `listen()` method delegates to the configured listening provider:
345
+
346
+ ```typescript
347
+ import { CompositeVoice } from "@mastra/core/voice";
348
+ import { OpenAIVoice } from "@mastra/voice-openai";
349
+ import { PlayAIVoice } from "@mastra/voice-playai";
350
+
351
+ const voice = new CompositeVoice({
352
+ input: new OpenAIVoice(),
353
+ output: new PlayAIVoice(),
354
+ });
355
+
356
+ // This will use the OpenAIVoice provider
357
+ const transcript = await voice.listen(audioStream);
358
+ ```
359
+
360
+ ### Using AI SDK Model Providers
361
+
362
+ You can also use AI SDK transcription models directly with `CompositeVoice`:
363
+
364
+ ```typescript
365
+ import { CompositeVoice } from "@mastra/core/voice";
366
+ import { openai } from "@ai-sdk/openai";
367
+ import { groq } from "@ai-sdk/groq";
368
+
369
+ // Use AI SDK transcription models
370
+ const voice = new CompositeVoice({
371
+ input: openai.transcription('whisper-1'), // AI SDK model
372
+ output: new PlayAIVoice(), // Mastra provider
373
+ });
374
+
375
+ // Works the same way
376
+ const transcript = await voice.listen(audioStream);
377
+
378
+ // Provider-specific options can be passed through
379
+ const transcriptWithOptions = await voice.listen(audioStream, {
380
+ providerOptions: {
381
+ openai: {
382
+ language: 'en',
383
+ prompt: 'This is about AI',
384
+ }
385
+ }
386
+ });
387
+ ```
388
+
389
+ See the [CompositeVoice reference](https://mastra.ai/reference/v1/voice/composite-voice) for more details on AI SDK integration.
390
+
391
+ ## Realtime Voice Providers
392
+
393
+ When using realtime voice providers like `OpenAIRealtimeVoice`, the `listen()` method behaves differently:
394
+
395
+ - Instead of returning transcribed text, it emits 'writing' events with the transcribed text
396
+ - You need to register an event listener to receive the transcription
397
+
398
+ ```typescript
399
+ import { OpenAIRealtimeVoice } from "@mastra/voice-openai-realtime";
400
+ import { getMicrophoneStream } from "@mastra/node-audio";
401
+
402
+ const voice = new OpenAIRealtimeVoice();
403
+ await voice.connect();
404
+
405
+ // Register event listener for transcription
406
+ voice.on("writing", ({ text, role }) => {
407
+ console.log(`${role}: ${text}`);
408
+ });
409
+
410
+ // This will emit 'writing' events instead of returning text
411
+ const microphoneStream = getMicrophoneStream();
412
+ await voice.listen(microphoneStream);
413
+ ```
414
+
415
+ ## Notes
416
+
417
+ - Not all voice providers support speech-to-text functionality (e.g., PlayAI, Speechify)
418
+ - The behavior of `listen()` may vary slightly between providers, but all implementations follow the same basic interface
419
+ - When using a realtime voice provider, the method might not return text directly but instead emit a 'writing' event
420
+ - The audio format supported depends on the provider. Common formats include MP3, WAV, and M4A
421
+ - Some providers support streaming transcription, where text is returned as it's transcribed
422
+ - For best performance, consider closing or ending the audio stream when you're done with it
423
+
424
+ ## Related Methods
425
+
426
+ - [voice.speak()](./voice.speak) - Converts text to speech
427
+ - [voice.send()](./voice.send) - Sends audio data to the voice provider in real-time
428
+ - [voice.on()](./voice.on) - Registers an event listener for voice events
429
+
430
+ ---
431
+
432
+ ## Reference: voice.speak()
433
+
434
+ > Documentation for the speak() method available in all Mastra voice providers, which converts text to speech.
435
+
436
+ The `speak()` method is a core function available in all Mastra voice providers that converts text to speech. It takes text input and returns an audio stream that can be played or saved.
437
+
438
+ ## Parameters
439
+
440
+ ## Return Value
441
+
442
+ Returns a `Promise<NodeJS.ReadableStream | void>` where:
443
+
444
+ - `NodeJS.ReadableStream`: A stream of audio data that can be played or saved
445
+ - `void`: When using a realtime voice provider that emits audio through events instead of returning it directly
446
+
447
+ ## Provider-Specific Options
448
+
449
+ Each voice provider may support additional options specific to their implementation. Here are some examples:
450
+
451
+ ### OpenAI
452
+
453
+ ### ElevenLabs
454
+
455
+ ### Google
456
+
457
+ ### Murf
458
+
459
+ ## Usage Example
460
+
461
+ ```typescript
462
+ import { OpenAIVoice } from "@mastra/voice-openai";
463
+ // Initialize a voice provider
464
+ const voice = new OpenAIVoice({
465
+ speaker: "alloy", // Default voice
466
+ });
467
+ // Basic usage with default settings
468
+ const audioStream = await voice.speak("Hello, world!");
469
+ // Using a different voice for this specific request
470
+ const audioStreamWithDifferentVoice = await voice.speak("Hello again!", {
471
+ speaker: "nova",
472
+ });
473
+ // Using provider-specific options
474
+ const audioStreamWithOptions = await voice.speak("Hello with options!", {
475
+ speaker: "echo",
476
+ speed: 1.2, // OpenAI-specific option
477
+ });
478
+ // Using a text stream as input
479
+ import { Readable } from "stream";
480
+ const textStream = Readable.from(["Hello", " from", " a", " stream!"]);
481
+ const audioStreamFromTextStream = await voice.speak(textStream);
482
+ ```
483
+
484
+ ## Using with CompositeVoice
485
+
486
+ When using `CompositeVoice`, the `speak()` method delegates to the configured speaking provider:
487
+
488
+ ```typescript
489
+ import { CompositeVoice } from "@mastra/core/voice";
490
+ import { OpenAIVoice } from "@mastra/voice-openai";
491
+ import { PlayAIVoice } from "@mastra/voice-playai";
492
+
493
+ const voice = new CompositeVoice({
494
+ output: new PlayAIVoice(),
495
+ input: new OpenAIVoice(),
496
+ });
497
+
498
+ // This will use the PlayAIVoice provider
499
+ const audioStream = await voice.speak("Hello, world!");
500
+ ```
501
+
502
+ ### Using AI SDK Model Providers
503
+
504
+ You can also use AI SDK speech models directly with `CompositeVoice`:
505
+
506
+ ```typescript
507
+ import { CompositeVoice } from "@mastra/core/voice";
508
+ import { openai } from "@ai-sdk/openai";
509
+ import { elevenlabs } from "@ai-sdk/elevenlabs";
510
+
511
+ // Use AI SDK speech models
512
+ const voice = new CompositeVoice({
513
+ output: elevenlabs.speech('eleven_turbo_v2'), // AI SDK model
514
+ input: openai.transcription('whisper-1'), // AI SDK model
515
+ });
516
+
517
+ // Works the same way
518
+ const audioStream = await voice.speak("Hello from AI SDK!");
519
+
520
+ // Provider-specific options can be passed through
521
+ const audioWithOptions = await voice.speak("Hello with options!", {
522
+ speaker: 'Rachel', // ElevenLabs voice
523
+ providerOptions: {
524
+ elevenlabs: {
525
+ stability: 0.5,
526
+ similarity_boost: 0.75,
527
+ }
528
+ }
529
+ });
530
+ ```
531
+
532
+ See the [CompositeVoice reference](https://mastra.ai/reference/v1/voice/composite-voice) for more details on AI SDK integration.
533
+
534
+ ## Realtime Voice Providers
535
+
536
+ When using realtime voice providers like `OpenAIRealtimeVoice`, the `speak()` method behaves differently:
537
+
538
+ - Instead of returning an audio stream, it emits a 'speaking' event with the audio data
539
+ - You need to register an event listener to receive the audio chunks
540
+
541
+ ```typescript
542
+ import { OpenAIRealtimeVoice } from "@mastra/voice-openai-realtime";
543
+ import Speaker from "@mastra/node-speaker";
544
+
545
+ const speaker = new Speaker({
546
+ sampleRate: 24100, // Audio sample rate in Hz - standard for high-quality audio on MacBook Pro
547
+ channels: 1, // Mono audio output (as opposed to stereo which would be 2)
548
+ bitDepth: 16, // Bit depth for audio quality - CD quality standard (16-bit resolution)
549
+ });
550
+
551
+ const voice = new OpenAIRealtimeVoice();
552
+ await voice.connect();
553
+ // Register event listener for audio chunks
554
+ voice.on("speaker", (stream) => {
555
+ // Handle audio chunk (e.g., play it or save it)
556
+ stream.pipe(speaker);
557
+ });
558
+ // This will emit 'speaking' events instead of returning a stream
559
+ await voice.speak("Hello, this is realtime speech!");
560
+ ```
561
+
562
+ ## Notes
563
+
564
+ - The behavior of `speak()` may vary slightly between providers, but all implementations follow the same basic interface.
565
+ - When using a realtime voice provider, the method might not return an audio stream directly but instead emit a 'speaking' event.
566
+ - If a text stream is provided as input, the provider will typically convert it to a string before processing.
567
+ - The audio format of the returned stream depends on the provider. Common formats include MP3, WAV, and OGG.
568
+ - For best performance, consider closing or ending the audio stream when you're done with it.
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/index.ts"],"names":["MastraVoice","OpenAI","PassThrough"],"mappings":";;;;;;;;;;;AA6BO,IAAM,WAAA,GAAN,cAA0BA,iBAAA,CAAY;AAAA,EAC3C,YAAA;AAAA,EACA,eAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAWA,WAAA,CAAY;AAAA,IACV,cAAA;AAAA,IACA,WAAA;AAAA,IACA;AAAA,GACF,GAII,EAAC,EAAG;AACN,IAAA,MAAM,aAAA,GAAgB,QAAQ,GAAA,CAAI,cAAA;AAClC,IAAA,MAAM,kBAAA,GAAqB;AAAA,MACzB,IAAA,EAAM,OAAA;AAAA,MACN,MAAA,EAAQ;AAAA,KACV;AACA,IAAA,MAAM,qBAAA,GAAwB;AAAA,MAC5B,IAAA,EAAM,WAAA;AAAA,MACN,MAAA,EAAQ;AAAA,KACV;AAEA,IAAA,KAAA,CAAM;AAAA,MACJ,WAAA,EAAa;AAAA,QACX,IAAA,EAAM,WAAA,EAAa,IAAA,IAAQ,kBAAA,CAAmB,IAAA;AAAA,QAC9C,MAAA,EAAQ,WAAA,EAAa,MAAA,IAAU,kBAAA,CAAmB;AAAA,OACpD;AAAA,MACA,cAAA,EAAgB;AAAA,QACd,IAAA,EAAM,cAAA,EAAgB,IAAA,IAAQ,qBAAA,CAAsB,IAAA;AAAA,QACpD,MAAA,EAAQ,cAAA,EAAgB,MAAA,IAAU,qBAAA,CAAsB;AAAA,OAC1D;AAAA,MACA,SAAS,OAAA,IAAW;AAAA,KACrB,CAAA;AAED,IAAA,MAAM,YAAA,GAAe,aAAa,MAAA,IAAU,aAAA;AAC5C,IAAA,IAAI,CAAC,YAAA,EAAc;AACjB,MAAA,MAAM,IAAI,MAAM,sCAAsC,CAAA;AAAA,IACxD;AACA,IAAA,IAAA,CAAK,YAAA,GAAe,IAAIC,uBAAA,CAAO;AAAA,MAC7B,MAAA,EAAQ,YAAA;AAAA,MACR,GAAG,WAAA,EAAa;AAAA,KACjB,CAAA;AAED,IAAA,MAAM,eAAA,GAAkB,gBAAgB,MAAA,IAAU,aAAA;AAClD,IAAA,IAAI,CAAC,eAAA,EAAiB;AACpB,MAAA,MAAM,IAAI,MAAM,yCAAyC,CAAA;AAAA,IAC3D;AACA,IAAA,IAAA,CAAK,eAAA,GAAkB,IAAIA,uBAAA,CAAO;AAAA,MAChC,MAAA,EAAQ,eAAA;AAAA,MACR,GAAG,cAAA,EAAgB;AAAA,KACpB,CAAA;AAED,IAAA,IAAI,CAAC,IAAA,CAAK,YAAA,IAAgB,CAAC,KAAK,eAAA,EAAiB;AAC/C,MAAA,MAAM,IAAI,MAAM,0FAA0F,CAAA;AAAA,IAC5G;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EASA,MAAM,WAAA,GAA0D;AAC9D,IAAA,IAAI,CAAC,KAAK,WAAA,EAAa;AACrB,MAAA,MAAM,IAAI,MAAM,6BAA6B,CAAA;AAAA,IAC/C;AAEA,IAAA,OAAO;AAAA,MACL,EAAE,SAAS,OAAA,EAAQ;AAAA,MACnB,EAAE,SAAS,MAAA,EAAO;AAAA,MAClB,EAAE,SAAS,OAAA,EAAQ;AAAA,MACnB,EAAE,SAAS,MAAA,EAAO;AAAA,MAClB,EAAE,SAAS,MAAA,EAAO;AAAA,MAClB,EAAE,SAAS,SAAA,EAAU;AAAA,MACrB,EAAE,SAAS,KAAA,EAAM;AAAA,MACjB,EAAE,SAAS,OAAA,EAAQ;AAAA,MACnB,EAAE,SAAS,MAAA;AAAO,KACpB;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAYA,MAAM,KAAA,CACJ,KAAA,EACA,OAAA,EAKgC;AAChC,IAAA,IAAI,CAAC,KAAK,YAAA,EAAc;AACtB,MAAA,MAAM,IAAI,MAAM,6BAA6B,CAAA;AAAA,IAC/C;AAEA,IAAA,IAAI,OAAO,UAAU,QAAA,EAAU;AAC7B,MAAA,MAAM,SAAmB,EAAC;AAC1B,MAAA,WAAA,MAAiB,SAAS,KAAA,EAAO;AAC/B,QAAA,IAAI,OAAO,UAAU,QAAA,EAAU;AAC7B,UAAA,MAAA,CAAO,IAAA,CAAK,MAAA,CAAO,IAAA,CAAK,KAAK,CAAC,CAAA;AAAA,QAChC,CAAA,MAAO;AACL,UAAA,MAAA,CAAO,KAAK,KAAK,CAAA;AAAA,QACnB;AAAA,MACF;AACA,MAAA,KAAA,GAAQ,MAAA,CAAO,MAAA,CAAO,MAAM,CAAA,CAAE,SAAS,OAAO,CAAA;AAAA,IAChD;AAEA,IAAA,IAAI,KAAA,CAAM,IAAA,EAAK,CAAE,MAAA,KAAW,CAAA,EAAG;AAC7B,MAAA,MAAM,IAAI,MAAM,qBAAqB,CAAA;AAAA,IACvC;AAEA,IAAA,MAAM,EAAE,SAAS,cAAA,EAAgB,KAAA,EAAO,GAAG,YAAA,EAAa,GAAI,WAAW,EAAC;AAExE,IAAA,MAAM,WAAW,MAAM,IAAA,CAAK,YAAA,CAAc,KAAA,CAAM,OAAO,MAAA,CAAO;AAAA,MAC5D,KAAA,EAAO,IAAA,CAAK,WAAA,EAAa,IAAA,IAAQ,OAAA;AAAA,MACjC,KAAA,EAAQ,WAAW,IAAA,CAAK,OAAA;AAAA,MACxB,iBAAiB,cAAA,IAAkB,KAAA;AAAA,MACnC,KAAA;AAAA,MACA,OAAO,KAAA,IAAS,CAAA;AAAA,MAChB,GAAG;AAAA,KACJ,CAAA;AAED,IAAA,MAAM,WAAA,GAAc,IAAIC,kBAAA,EAAY;AACpC,IAAA,MAAM,SAAS,MAAA,CAAO,IAAA,CAAK,MAAM,QAAA,CAAS,aAAa,CAAA;AACvD,IAAA,WAAA,CAAY,IAAI,MAAM,CAAA;AACtB,IAAA,OAAO,WAAA;AAAA,EACT;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAOA,MAAM,WAAA,GAAc;AAClB,IAAA,IAAI,CAAC,KAAK,eAAA,EAAiB;AACzB,MAAA,OAAO,EAAE,SAAS,KAAA,EAAM;AAAA,IAC1B;AACA,IAAA,OAAO,EAAE,SAAS,IAAA,EAAK;AAAA,EACzB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAYA,MAAM,MAAA,CACJ,WAAA,EACA,OAAA,EAIiB;AACjB,IAAA,IAAI,CAAC,KAAK,eAAA,EAAiB;AACzB,MAAA,MAAM,IAAI,MAAM,gCAAgC,CAAA;AAAA,IAClD;AAEA,IAAA,MAAM,SAAmB,EAAC;AAC1B,IAAA,WAAA,MAAiB,SAAS,WAAA,EAAa;AACrC,MAAA,IAAI,OAAO,UAAU,QAAA,EAAU;AAC7B,QAAA,MAAA,CAAO,IAAA,CAAK,MAAA,CAAO,IAAA,CAAK,KAAK,CAAC,CAAA;AAAA,MAChC,CAAA,MAAO;AACL,QAAA,MAAA,CAAO,KAAK,KAAK,CAAA;AAAA,MACnB;AAAA,IACF;AACA,IAAA,MAAM,WAAA,GAAc,MAAA,CAAO,MAAA,CAAO,MAAM,CAAA;AAExC,IAAA,MAAM,EAAE,QAAA,EAAU,GAAG,YAAA,EAAa,GAAI,WAAW,EAAC;AAClD,IAAA,MAAM,IAAA,GAAO,IAAI,IAAA,CAAK,CAAC,WAAW,CAAA,EAAG,CAAA,MAAA,EAAS,QAAA,IAAY,KAAK,CAAA,CAAE,CAAA;AAEjE,IAAA,MAAM,WAAW,MAAM,IAAA,CAAK,eAAA,CAAiB,KAAA,CAAM,eAAe,MAAA,CAAO;AAAA,MACvE,KAAA,EAAO,IAAA,CAAK,cAAA,EAAgB,IAAA,IAAQ,WAAA;AAAA,MACpC,IAAA;AAAA,MACA,GAAG;AAAA,KACJ,CAAA;AAED,IAAA,OAAO,QAAA,CAAS,IAAA;AAAA,EAClB;AACF","file":"index.cjs","sourcesContent":["import { PassThrough } from 'stream';\n\nimport { MastraVoice } from '@mastra/core/voice';\nimport OpenAI from 'openai';\nimport type { ClientOptions } from 'openai';\n\ntype OpenAIVoiceId = 'alloy' | 'echo' | 'fable' | 'onyx' | 'nova' | 'shimmer' | 'ash' | 'coral' | 'sage';\ntype OpenAIModel = 'tts-1' | 'tts-1-hd' | 'whisper-1';\n\nexport interface OpenAIConfig {\n name?: OpenAIModel;\n apiKey?: string;\n options?: Omit<ClientOptions, 'apiKey'>;\n}\n\nexport interface OpenAIVoiceConfig {\n speech?: {\n model: 'tts-1' | 'tts-1-hd';\n apiKey?: string;\n speaker?: OpenAIVoiceId;\n options?: Omit<ClientOptions, 'apiKey'>;\n };\n listening?: {\n model: 'whisper-1';\n apiKey?: string;\n options?: Omit<ClientOptions, 'apiKey'>;\n };\n}\n\nexport class OpenAIVoice extends MastraVoice {\n speechClient?: OpenAI;\n listeningClient?: OpenAI;\n\n /**\n * Constructs an instance of OpenAIVoice with optional configurations for speech and listening models.\n *\n * @param {Object} [config] - Configuration options for the OpenAIVoice instance.\n * @param {OpenAIConfig} [config.listeningModel] - Configuration for the listening model, including model name and API key.\n * @param {OpenAIConfig} [config.speechModel] - Configuration for the speech model, including model name and API key.\n * @param {string} [config.speaker] - The default speaker's voice to use for speech synthesis.\n * @throws {Error} - Throws an error if no API key is provided for either the speech or listening model.\n */\n constructor({\n listeningModel,\n speechModel,\n speaker,\n }: {\n listeningModel?: OpenAIConfig;\n speechModel?: OpenAIConfig;\n speaker?: string;\n } = {}) {\n const defaultApiKey = process.env.OPENAI_API_KEY;\n const defaultSpeechModel = {\n name: 'tts-1',\n apiKey: defaultApiKey,\n };\n const defaultListeningModel = {\n name: 'whisper-1',\n apiKey: defaultApiKey,\n };\n\n super({\n speechModel: {\n name: speechModel?.name ?? defaultSpeechModel.name,\n apiKey: speechModel?.apiKey ?? defaultSpeechModel.apiKey,\n },\n listeningModel: {\n name: listeningModel?.name ?? defaultListeningModel.name,\n apiKey: listeningModel?.apiKey ?? defaultListeningModel.apiKey,\n },\n speaker: speaker ?? 'alloy',\n });\n\n const speechApiKey = speechModel?.apiKey || defaultApiKey;\n if (!speechApiKey) {\n throw new Error('No API key provided for speech model');\n }\n this.speechClient = new OpenAI({\n apiKey: speechApiKey,\n ...speechModel?.options,\n });\n\n const listeningApiKey = listeningModel?.apiKey || defaultApiKey;\n if (!listeningApiKey) {\n throw new Error('No API key provided for listening model');\n }\n this.listeningClient = new OpenAI({\n apiKey: listeningApiKey,\n ...listeningModel?.options,\n });\n\n if (!this.speechClient && !this.listeningClient) {\n throw new Error('At least one of OPENAI_API_KEY, speechModel.apiKey, or listeningModel.apiKey must be set');\n }\n }\n\n /**\n * Retrieves a list of available speakers for the speech model.\n *\n * @returns {Promise<Array<{ voiceId: OpenAIVoiceId }>>} - A promise that resolves to an array of objects,\n * each containing a `voiceId` representing an available speaker.\n * @throws {Error} - Throws an error if the speech model is not configured.\n */\n async getSpeakers(): Promise<Array<{ voiceId: OpenAIVoiceId }>> {\n if (!this.speechModel) {\n throw new Error('Speech model not configured');\n }\n\n return [\n { voiceId: 'alloy' },\n { voiceId: 'echo' },\n { voiceId: 'fable' },\n { voiceId: 'onyx' },\n { voiceId: 'nova' },\n { voiceId: 'shimmer' },\n { voiceId: 'ash' },\n { voiceId: 'coral' },\n { voiceId: 'sage' },\n ];\n }\n\n /**\n * Converts text or audio input into speech using the configured speech model.\n *\n * @param {string | NodeJS.ReadableStream} input - The text or audio stream to be converted into speech.\n * @param {Object} [options] - Optional parameters for the speech synthesis.\n * @param {string} [options.speaker] - The speaker's voice to use for the speech synthesis.\n * @param {number} [options.speed] - The speed at which the speech should be synthesized.\n * @returns {Promise<NodeJS.ReadableStream>} - A promise that resolves to a readable stream of the synthesized audio.\n * @throws {Error} - Throws an error if the speech model is not configured or if the input text is empty.\n */\n async speak(\n input: string | NodeJS.ReadableStream,\n options?: {\n speaker?: string;\n speed?: number;\n [key: string]: any;\n },\n ): Promise<NodeJS.ReadableStream> {\n if (!this.speechClient) {\n throw new Error('Speech model not configured');\n }\n\n if (typeof input !== 'string') {\n const chunks: Buffer[] = [];\n for await (const chunk of input) {\n if (typeof chunk === 'string') {\n chunks.push(Buffer.from(chunk));\n } else {\n chunks.push(chunk);\n }\n }\n input = Buffer.concat(chunks).toString('utf-8');\n }\n\n if (input.trim().length === 0) {\n throw new Error('Input text is empty');\n }\n\n const { speaker, responseFormat, speed, ...otherOptions } = options || {};\n\n const response = await this.speechClient!.audio.speech.create({\n model: this.speechModel?.name ?? 'tts-1',\n voice: (speaker ?? this.speaker) as OpenAIVoiceId,\n response_format: responseFormat ?? 'mp3',\n input,\n speed: speed || 1.0,\n ...otherOptions,\n });\n\n const passThrough = new PassThrough();\n const buffer = Buffer.from(await response.arrayBuffer());\n passThrough.end(buffer);\n return passThrough;\n }\n\n /**\n * Checks if listening capabilities are enabled.\n *\n * @returns {Promise<{ enabled: boolean }>}\n */\n async getListener() {\n if (!this.listeningClient) {\n return { enabled: false };\n }\n return { enabled: true };\n }\n\n /**\n * Transcribes audio from a given stream using the configured listening model.\n *\n * @param {NodeJS.ReadableStream} audioStream - The audio stream to be transcribed.\n * @param {Object} [options] - Optional parameters for the transcription.\n * @param {string} [options.filetype] - The file type of the audio stream.\n * Supported types include 'mp3', 'mp4', 'mpeg', 'mpga', 'm4a', 'wav', 'webm'.\n * @returns {Promise<string>} - A promise that resolves to the transcribed text.\n * @throws {Error} - Throws an error if the listening model is not configured.\n */\n async listen(\n audioStream: NodeJS.ReadableStream,\n options?: {\n filetype?: 'mp3' | 'mp4' | 'mpeg' | 'mpga' | 'm4a' | 'wav' | 'webm';\n [key: string]: any;\n },\n ): Promise<string> {\n if (!this.listeningClient) {\n throw new Error('Listening model not configured');\n }\n\n const chunks: Buffer[] = [];\n for await (const chunk of audioStream) {\n if (typeof chunk === 'string') {\n chunks.push(Buffer.from(chunk));\n } else {\n chunks.push(chunk);\n }\n }\n const audioBuffer = Buffer.concat(chunks);\n\n const { filetype, ...otherOptions } = options || {};\n const file = new File([audioBuffer], `audio.${filetype || 'mp3'}`);\n\n const response = await this.listeningClient!.audio.transcriptions.create({\n model: this.listeningModel?.name || 'whisper-1',\n file: file as any,\n ...otherOptions,\n });\n\n return response.text;\n }\n}\n"]}
1
+ {"version":3,"sources":["../src/index.ts"],"names":["MastraVoice","OpenAI","PassThrough"],"mappings":";;;;;;;;;;;AA6BO,IAAM,WAAA,GAAN,cAA0BA,iBAAA,CAAY;AAAA,EAC3C,YAAA;AAAA,EACA,eAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAWA,WAAA,CAAY;AAAA,IACV,cAAA;AAAA,IACA,WAAA;AAAA,IACA;AAAA,GACF,GAII,EAAC,EAAG;AACN,IAAA,MAAM,aAAA,GAAgB,QAAQ,GAAA,CAAI,cAAA;AAClC,IAAA,MAAM,kBAAA,GAAqB;AAAA,MACzB,IAAA,EAAM,OAAA;AAAA,MACN,MAAA,EAAQ;AAAA,KACV;AACA,IAAA,MAAM,qBAAA,GAAwB;AAAA,MAC5B,IAAA,EAAM,WAAA;AAAA,MACN,MAAA,EAAQ;AAAA,KACV;AAEA,IAAA,KAAA,CAAM;AAAA,MACJ,WAAA,EAAa;AAAA,QACX,IAAA,EAAM,WAAA,EAAa,IAAA,IAAQ,kBAAA,CAAmB,IAAA;AAAA,QAC9C,MAAA,EAAQ,WAAA,EAAa,MAAA,IAAU,kBAAA,CAAmB;AAAA,OACpD;AAAA,MACA,cAAA,EAAgB;AAAA,QACd,IAAA,EAAM,cAAA,EAAgB,IAAA,IAAQ,qBAAA,CAAsB,IAAA;AAAA,QACpD,MAAA,EAAQ,cAAA,EAAgB,MAAA,IAAU,qBAAA,CAAsB;AAAA,OAC1D;AAAA,MACA,SAAS,OAAA,IAAW;AAAA,KACrB,CAAA;AAED,IAAA,MAAM,YAAA,GAAe,aAAa,MAAA,IAAU,aAAA;AAC5C,IAAA,IAAI,CAAC,YAAA,EAAc;AACjB,MAAA,MAAM,IAAI,MAAM,sCAAsC,CAAA;AAAA,IACxD;AACA,IAAA,IAAA,CAAK,YAAA,GAAe,IAAIC,uBAAA,CAAO;AAAA,MAC7B,MAAA,EAAQ,YAAA;AAAA,MACR,GAAG,WAAA,EAAa;AAAA,KACjB,CAAA;AAED,IAAA,MAAM,eAAA,GAAkB,gBAAgB,MAAA,IAAU,aAAA;AAClD,IAAA,IAAI,CAAC,eAAA,EAAiB;AACpB,MAAA,MAAM,IAAI,MAAM,yCAAyC,CAAA;AAAA,IAC3D;AACA,IAAA,IAAA,CAAK,eAAA,GAAkB,IAAIA,uBAAA,CAAO;AAAA,MAChC,MAAA,EAAQ,eAAA;AAAA,MACR,GAAG,cAAA,EAAgB;AAAA,KACpB,CAAA;AAED,IAAA,IAAI,CAAC,IAAA,CAAK,YAAA,IAAgB,CAAC,KAAK,eAAA,EAAiB;AAC/C,MAAA,MAAM,IAAI,MAAM,0FAA0F,CAAA;AAAA,IAC5G;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EASA,MAAM,WAAA,GAA0D;AAC9D,IAAA,IAAI,CAAC,KAAK,WAAA,EAAa;AACrB,MAAA,MAAM,IAAI,MAAM,6BAA6B,CAAA;AAAA,IAC/C;AAEA,IAAA,OAAO;AAAA,MACL,EAAE,SAAS,OAAA,EAAQ;AAAA,MACnB,EAAE,SAAS,MAAA,EAAO;AAAA,MAClB,EAAE,SAAS,OAAA,EAAQ;AAAA,MACnB,EAAE,SAAS,MAAA,EAAO;AAAA,MAClB,EAAE,SAAS,MAAA,EAAO;AAAA,MAClB,EAAE,SAAS,SAAA,EAAU;AAAA,MACrB,EAAE,SAAS,KAAA,EAAM;AAAA,MACjB,EAAE,SAAS,OAAA,EAAQ;AAAA,MACnB,EAAE,SAAS,MAAA;AAAO,KACpB;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAYA,MAAM,KAAA,CACJ,KAAA,EACA,OAAA,EAKgC;AAChC,IAAA,IAAI,CAAC,KAAK,YAAA,EAAc;AACtB,MAAA,MAAM,IAAI,MAAM,6BAA6B,CAAA;AAAA,IAC/C;AAEA,IAAA,IAAI,OAAO,UAAU,QAAA,EAAU;AAC7B,MAAA,MAAM,SAAmB,EAAC;AAC1B,MAAA,WAAA,MAAiB,SAAS,KAAA,EAAO;AAC/B,QAAA,IAAI,OAAO,UAAU,QAAA,EAAU;AAC7B,UAAA,MAAA,CAAO,IAAA,CAAK,MAAA,CAAO,IAAA,CAAK,KAAK,CAAC,CAAA;AAAA,QAChC,CAAA,MAAO;AACL,UAAA,MAAA,CAAO,KAAK,KAAK,CAAA;AAAA,QACnB;AAAA,MACF;AACA,MAAA,KAAA,GAAQ,MAAA,CAAO,MAAA,CAAO,MAAM,CAAA,CAAE,SAAS,OAAO,CAAA;AAAA,IAChD;AAEA,IAAA,IAAI,KAAA,CAAM,IAAA,EAAK,CAAE,MAAA,KAAW,CAAA,EAAG;AAC7B,MAAA,MAAM,IAAI,MAAM,qBAAqB,CAAA;AAAA,IACvC;AAEA,IAAA,MAAM,EAAE,SAAS,cAAA,EAAgB,KAAA,EAAO,GAAG,YAAA,EAAa,GAAI,WAAW,EAAC;AAExE,IAAA,MAAM,WAAW,MAAM,IAAA,CAAK,YAAA,CAAc,KAAA,CAAM,OAAO,MAAA,CAAO;AAAA,MAC5D,KAAA,EAAO,IAAA,CAAK,WAAA,EAAa,IAAA,IAAQ,OAAA;AAAA,MACjC,KAAA,EAAQ,WAAW,IAAA,CAAK,OAAA;AAAA,MACxB,iBAAiB,cAAA,IAAkB,KAAA;AAAA,MACnC,KAAA;AAAA,MACA,OAAO,KAAA,IAAS,CAAA;AAAA,MAChB,GAAG;AAAA,KACJ,CAAA;AAED,IAAA,MAAM,WAAA,GAAc,IAAIC,kBAAA,EAAY;AACpC,IAAA,MAAM,SAAS,MAAA,CAAO,IAAA,CAAK,MAAM,QAAA,CAAS,aAAa,CAAA;AACvD,IAAA,WAAA,CAAY,IAAI,MAAM,CAAA;AACtB,IAAA,OAAO,WAAA;AAAA,EACT;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAOA,MAAM,WAAA,GAAc;AAClB,IAAA,IAAI,CAAC,KAAK,eAAA,EAAiB;AACzB,MAAA,OAAO,EAAE,SAAS,KAAA,EAAM;AAAA,IAC1B;AACA,IAAA,OAAO,EAAE,SAAS,IAAA,EAAK;AAAA,EACzB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAYA,MAAM,MAAA,CACJ,WAAA,EACA,OAAA,EAIiB;AACjB,IAAA,IAAI,CAAC,KAAK,eAAA,EAAiB;AACzB,MAAA,MAAM,IAAI,MAAM,gCAAgC,CAAA;AAAA,IAClD;AAEA,IAAA,MAAM,SAAmB,EAAC;AAC1B,IAAA,WAAA,MAAiB,SAAS,WAAA,EAAa;AACrC,MAAA,IAAI,OAAO,UAAU,QAAA,EAAU;AAC7B,QAAA,MAAA,CAAO,IAAA,CAAK,MAAA,CAAO,IAAA,CAAK,KAAK,CAAC,CAAA;AAAA,MAChC,CAAA,MAAO;AACL,QAAA,MAAA,CAAO,KAAK,KAAK,CAAA;AAAA,MACnB;AAAA,IACF;AACA,IAAA,MAAM,WAAA,GAAc,MAAA,CAAO,MAAA,CAAO,MAAM,CAAA;AAExC,IAAA,MAAM,EAAE,QAAA,EAAU,GAAG,YAAA,EAAa,GAAI,WAAW,EAAC;AAClD,IAAA,MAAM,IAAA,GAAO,IAAI,IAAA,CAAK,CAAC,WAAW,CAAA,EAAG,CAAA,MAAA,EAAS,QAAA,IAAY,KAAK,CAAA,CAAE,CAAA;AAEjE,IAAA,MAAM,WAAW,MAAM,IAAA,CAAK,eAAA,CAAiB,KAAA,CAAM,eAAe,MAAA,CAAO;AAAA,MACvE,KAAA,EAAO,IAAA,CAAK,cAAA,EAAgB,IAAA,IAAQ,WAAA;AAAA,MACpC,IAAA;AAAA,MACA,GAAG;AAAA,KACJ,CAAA;AAED,IAAA,OAAO,QAAA,CAAS,IAAA;AAAA,EAClB;AACF","file":"index.cjs","sourcesContent":["import { PassThrough } from 'node:stream';\n\nimport { MastraVoice } from '@mastra/core/voice';\nimport OpenAI from 'openai';\nimport type { ClientOptions } from 'openai';\n\ntype OpenAIVoiceId = 'alloy' | 'echo' | 'fable' | 'onyx' | 'nova' | 'shimmer' | 'ash' | 'coral' | 'sage';\ntype OpenAIModel = 'tts-1' | 'tts-1-hd' | 'whisper-1';\n\nexport interface OpenAIConfig {\n name?: OpenAIModel;\n apiKey?: string;\n options?: Omit<ClientOptions, 'apiKey'>;\n}\n\nexport interface OpenAIVoiceConfig {\n speech?: {\n model: 'tts-1' | 'tts-1-hd';\n apiKey?: string;\n speaker?: OpenAIVoiceId;\n options?: Omit<ClientOptions, 'apiKey'>;\n };\n listening?: {\n model: 'whisper-1';\n apiKey?: string;\n options?: Omit<ClientOptions, 'apiKey'>;\n };\n}\n\nexport class OpenAIVoice extends MastraVoice {\n speechClient?: OpenAI;\n listeningClient?: OpenAI;\n\n /**\n * Constructs an instance of OpenAIVoice with optional configurations for speech and listening models.\n *\n * @param {Object} [config] - Configuration options for the OpenAIVoice instance.\n * @param {OpenAIConfig} [config.listeningModel] - Configuration for the listening model, including model name and API key.\n * @param {OpenAIConfig} [config.speechModel] - Configuration for the speech model, including model name and API key.\n * @param {string} [config.speaker] - The default speaker's voice to use for speech synthesis.\n * @throws {Error} - Throws an error if no API key is provided for either the speech or listening model.\n */\n constructor({\n listeningModel,\n speechModel,\n speaker,\n }: {\n listeningModel?: OpenAIConfig;\n speechModel?: OpenAIConfig;\n speaker?: string;\n } = {}) {\n const defaultApiKey = process.env.OPENAI_API_KEY;\n const defaultSpeechModel = {\n name: 'tts-1',\n apiKey: defaultApiKey,\n };\n const defaultListeningModel = {\n name: 'whisper-1',\n apiKey: defaultApiKey,\n };\n\n super({\n speechModel: {\n name: speechModel?.name ?? defaultSpeechModel.name,\n apiKey: speechModel?.apiKey ?? defaultSpeechModel.apiKey,\n },\n listeningModel: {\n name: listeningModel?.name ?? defaultListeningModel.name,\n apiKey: listeningModel?.apiKey ?? defaultListeningModel.apiKey,\n },\n speaker: speaker ?? 'alloy',\n });\n\n const speechApiKey = speechModel?.apiKey || defaultApiKey;\n if (!speechApiKey) {\n throw new Error('No API key provided for speech model');\n }\n this.speechClient = new OpenAI({\n apiKey: speechApiKey,\n ...speechModel?.options,\n });\n\n const listeningApiKey = listeningModel?.apiKey || defaultApiKey;\n if (!listeningApiKey) {\n throw new Error('No API key provided for listening model');\n }\n this.listeningClient = new OpenAI({\n apiKey: listeningApiKey,\n ...listeningModel?.options,\n });\n\n if (!this.speechClient && !this.listeningClient) {\n throw new Error('At least one of OPENAI_API_KEY, speechModel.apiKey, or listeningModel.apiKey must be set');\n }\n }\n\n /**\n * Retrieves a list of available speakers for the speech model.\n *\n * @returns {Promise<Array<{ voiceId: OpenAIVoiceId }>>} - A promise that resolves to an array of objects,\n * each containing a `voiceId` representing an available speaker.\n * @throws {Error} - Throws an error if the speech model is not configured.\n */\n async getSpeakers(): Promise<Array<{ voiceId: OpenAIVoiceId }>> {\n if (!this.speechModel) {\n throw new Error('Speech model not configured');\n }\n\n return [\n { voiceId: 'alloy' },\n { voiceId: 'echo' },\n { voiceId: 'fable' },\n { voiceId: 'onyx' },\n { voiceId: 'nova' },\n { voiceId: 'shimmer' },\n { voiceId: 'ash' },\n { voiceId: 'coral' },\n { voiceId: 'sage' },\n ];\n }\n\n /**\n * Converts text or audio input into speech using the configured speech model.\n *\n * @param {string | NodeJS.ReadableStream} input - The text or audio stream to be converted into speech.\n * @param {Object} [options] - Optional parameters for the speech synthesis.\n * @param {string} [options.speaker] - The speaker's voice to use for the speech synthesis.\n * @param {number} [options.speed] - The speed at which the speech should be synthesized.\n * @returns {Promise<NodeJS.ReadableStream>} - A promise that resolves to a readable stream of the synthesized audio.\n * @throws {Error} - Throws an error if the speech model is not configured or if the input text is empty.\n */\n async speak(\n input: string | NodeJS.ReadableStream,\n options?: {\n speaker?: string;\n speed?: number;\n [key: string]: any;\n },\n ): Promise<NodeJS.ReadableStream> {\n if (!this.speechClient) {\n throw new Error('Speech model not configured');\n }\n\n if (typeof input !== 'string') {\n const chunks: Buffer[] = [];\n for await (const chunk of input) {\n if (typeof chunk === 'string') {\n chunks.push(Buffer.from(chunk));\n } else {\n chunks.push(chunk);\n }\n }\n input = Buffer.concat(chunks).toString('utf-8');\n }\n\n if (input.trim().length === 0) {\n throw new Error('Input text is empty');\n }\n\n const { speaker, responseFormat, speed, ...otherOptions } = options || {};\n\n const response = await this.speechClient!.audio.speech.create({\n model: this.speechModel?.name ?? 'tts-1',\n voice: (speaker ?? this.speaker) as OpenAIVoiceId,\n response_format: responseFormat ?? 'mp3',\n input,\n speed: speed || 1.0,\n ...otherOptions,\n });\n\n const passThrough = new PassThrough();\n const buffer = Buffer.from(await response.arrayBuffer());\n passThrough.end(buffer);\n return passThrough;\n }\n\n /**\n * Checks if listening capabilities are enabled.\n *\n * @returns {Promise<{ enabled: boolean }>}\n */\n async getListener() {\n if (!this.listeningClient) {\n return { enabled: false };\n }\n return { enabled: true };\n }\n\n /**\n * Transcribes audio from a given stream using the configured listening model.\n *\n * @param {NodeJS.ReadableStream} audioStream - The audio stream to be transcribed.\n * @param {Object} [options] - Optional parameters for the transcription.\n * @param {string} [options.filetype] - The file type of the audio stream.\n * Supported types include 'mp3', 'mp4', 'mpeg', 'mpga', 'm4a', 'wav', 'webm'.\n * @returns {Promise<string>} - A promise that resolves to the transcribed text.\n * @throws {Error} - Throws an error if the listening model is not configured.\n */\n async listen(\n audioStream: NodeJS.ReadableStream,\n options?: {\n filetype?: 'mp3' | 'mp4' | 'mpeg' | 'mpga' | 'm4a' | 'wav' | 'webm';\n [key: string]: any;\n },\n ): Promise<string> {\n if (!this.listeningClient) {\n throw new Error('Listening model not configured');\n }\n\n const chunks: Buffer[] = [];\n for await (const chunk of audioStream) {\n if (typeof chunk === 'string') {\n chunks.push(Buffer.from(chunk));\n } else {\n chunks.push(chunk);\n }\n }\n const audioBuffer = Buffer.concat(chunks);\n\n const { filetype, ...otherOptions } = options || {};\n const file = new File([audioBuffer], `audio.${filetype || 'mp3'}`);\n\n const response = await this.listeningClient!.audio.transcriptions.create({\n model: this.listeningModel?.name || 'whisper-1',\n file: file as any,\n ...otherOptions,\n });\n\n return response.text;\n }\n}\n"]}
package/dist/index.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/index.ts"],"names":[],"mappings":";;;;;AA6BO,IAAM,WAAA,GAAN,cAA0B,WAAA,CAAY;AAAA,EAC3C,YAAA;AAAA,EACA,eAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAWA,WAAA,CAAY;AAAA,IACV,cAAA;AAAA,IACA,WAAA;AAAA,IACA;AAAA,GACF,GAII,EAAC,EAAG;AACN,IAAA,MAAM,aAAA,GAAgB,QAAQ,GAAA,CAAI,cAAA;AAClC,IAAA,MAAM,kBAAA,GAAqB;AAAA,MACzB,IAAA,EAAM,OAAA;AAAA,MACN,MAAA,EAAQ;AAAA,KACV;AACA,IAAA,MAAM,qBAAA,GAAwB;AAAA,MAC5B,IAAA,EAAM,WAAA;AAAA,MACN,MAAA,EAAQ;AAAA,KACV;AAEA,IAAA,KAAA,CAAM;AAAA,MACJ,WAAA,EAAa;AAAA,QACX,IAAA,EAAM,WAAA,EAAa,IAAA,IAAQ,kBAAA,CAAmB,IAAA;AAAA,QAC9C,MAAA,EAAQ,WAAA,EAAa,MAAA,IAAU,kBAAA,CAAmB;AAAA,OACpD;AAAA,MACA,cAAA,EAAgB;AAAA,QACd,IAAA,EAAM,cAAA,EAAgB,IAAA,IAAQ,qBAAA,CAAsB,IAAA;AAAA,QACpD,MAAA,EAAQ,cAAA,EAAgB,MAAA,IAAU,qBAAA,CAAsB;AAAA,OAC1D;AAAA,MACA,SAAS,OAAA,IAAW;AAAA,KACrB,CAAA;AAED,IAAA,MAAM,YAAA,GAAe,aAAa,MAAA,IAAU,aAAA;AAC5C,IAAA,IAAI,CAAC,YAAA,EAAc;AACjB,MAAA,MAAM,IAAI,MAAM,sCAAsC,CAAA;AAAA,IACxD;AACA,IAAA,IAAA,CAAK,YAAA,GAAe,IAAI,MAAA,CAAO;AAAA,MAC7B,MAAA,EAAQ,YAAA;AAAA,MACR,GAAG,WAAA,EAAa;AAAA,KACjB,CAAA;AAED,IAAA,MAAM,eAAA,GAAkB,gBAAgB,MAAA,IAAU,aAAA;AAClD,IAAA,IAAI,CAAC,eAAA,EAAiB;AACpB,MAAA,MAAM,IAAI,MAAM,yCAAyC,CAAA;AAAA,IAC3D;AACA,IAAA,IAAA,CAAK,eAAA,GAAkB,IAAI,MAAA,CAAO;AAAA,MAChC,MAAA,EAAQ,eAAA;AAAA,MACR,GAAG,cAAA,EAAgB;AAAA,KACpB,CAAA;AAED,IAAA,IAAI,CAAC,IAAA,CAAK,YAAA,IAAgB,CAAC,KAAK,eAAA,EAAiB;AAC/C,MAAA,MAAM,IAAI,MAAM,0FAA0F,CAAA;AAAA,IAC5G;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EASA,MAAM,WAAA,GAA0D;AAC9D,IAAA,IAAI,CAAC,KAAK,WAAA,EAAa;AACrB,MAAA,MAAM,IAAI,MAAM,6BAA6B,CAAA;AAAA,IAC/C;AAEA,IAAA,OAAO;AAAA,MACL,EAAE,SAAS,OAAA,EAAQ;AAAA,MACnB,EAAE,SAAS,MAAA,EAAO;AAAA,MAClB,EAAE,SAAS,OAAA,EAAQ;AAAA,MACnB,EAAE,SAAS,MAAA,EAAO;AAAA,MAClB,EAAE,SAAS,MAAA,EAAO;AAAA,MAClB,EAAE,SAAS,SAAA,EAAU;AAAA,MACrB,EAAE,SAAS,KAAA,EAAM;AAAA,MACjB,EAAE,SAAS,OAAA,EAAQ;AAAA,MACnB,EAAE,SAAS,MAAA;AAAO,KACpB;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAYA,MAAM,KAAA,CACJ,KAAA,EACA,OAAA,EAKgC;AAChC,IAAA,IAAI,CAAC,KAAK,YAAA,EAAc;AACtB,MAAA,MAAM,IAAI,MAAM,6BAA6B,CAAA;AAAA,IAC/C;AAEA,IAAA,IAAI,OAAO,UAAU,QAAA,EAAU;AAC7B,MAAA,MAAM,SAAmB,EAAC;AAC1B,MAAA,WAAA,MAAiB,SAAS,KAAA,EAAO;AAC/B,QAAA,IAAI,OAAO,UAAU,QAAA,EAAU;AAC7B,UAAA,MAAA,CAAO,IAAA,CAAK,MAAA,CAAO,IAAA,CAAK,KAAK,CAAC,CAAA;AAAA,QAChC,CAAA,MAAO;AACL,UAAA,MAAA,CAAO,KAAK,KAAK,CAAA;AAAA,QACnB;AAAA,MACF;AACA,MAAA,KAAA,GAAQ,MAAA,CAAO,MAAA,CAAO,MAAM,CAAA,CAAE,SAAS,OAAO,CAAA;AAAA,IAChD;AAEA,IAAA,IAAI,KAAA,CAAM,IAAA,EAAK,CAAE,MAAA,KAAW,CAAA,EAAG;AAC7B,MAAA,MAAM,IAAI,MAAM,qBAAqB,CAAA;AAAA,IACvC;AAEA,IAAA,MAAM,EAAE,SAAS,cAAA,EAAgB,KAAA,EAAO,GAAG,YAAA,EAAa,GAAI,WAAW,EAAC;AAExE,IAAA,MAAM,WAAW,MAAM,IAAA,CAAK,YAAA,CAAc,KAAA,CAAM,OAAO,MAAA,CAAO;AAAA,MAC5D,KAAA,EAAO,IAAA,CAAK,WAAA,EAAa,IAAA,IAAQ,OAAA;AAAA,MACjC,KAAA,EAAQ,WAAW,IAAA,CAAK,OAAA;AAAA,MACxB,iBAAiB,cAAA,IAAkB,KAAA;AAAA,MACnC,KAAA;AAAA,MACA,OAAO,KAAA,IAAS,CAAA;AAAA,MAChB,GAAG;AAAA,KACJ,CAAA;AAED,IAAA,MAAM,WAAA,GAAc,IAAI,WAAA,EAAY;AACpC,IAAA,MAAM,SAAS,MAAA,CAAO,IAAA,CAAK,MAAM,QAAA,CAAS,aAAa,CAAA;AACvD,IAAA,WAAA,CAAY,IAAI,MAAM,CAAA;AACtB,IAAA,OAAO,WAAA;AAAA,EACT;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAOA,MAAM,WAAA,GAAc;AAClB,IAAA,IAAI,CAAC,KAAK,eAAA,EAAiB;AACzB,MAAA,OAAO,EAAE,SAAS,KAAA,EAAM;AAAA,IAC1B;AACA,IAAA,OAAO,EAAE,SAAS,IAAA,EAAK;AAAA,EACzB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAYA,MAAM,MAAA,CACJ,WAAA,EACA,OAAA,EAIiB;AACjB,IAAA,IAAI,CAAC,KAAK,eAAA,EAAiB;AACzB,MAAA,MAAM,IAAI,MAAM,gCAAgC,CAAA;AAAA,IAClD;AAEA,IAAA,MAAM,SAAmB,EAAC;AAC1B,IAAA,WAAA,MAAiB,SAAS,WAAA,EAAa;AACrC,MAAA,IAAI,OAAO,UAAU,QAAA,EAAU;AAC7B,QAAA,MAAA,CAAO,IAAA,CAAK,MAAA,CAAO,IAAA,CAAK,KAAK,CAAC,CAAA;AAAA,MAChC,CAAA,MAAO;AACL,QAAA,MAAA,CAAO,KAAK,KAAK,CAAA;AAAA,MACnB;AAAA,IACF;AACA,IAAA,MAAM,WAAA,GAAc,MAAA,CAAO,MAAA,CAAO,MAAM,CAAA;AAExC,IAAA,MAAM,EAAE,QAAA,EAAU,GAAG,YAAA,EAAa,GAAI,WAAW,EAAC;AAClD,IAAA,MAAM,IAAA,GAAO,IAAI,IAAA,CAAK,CAAC,WAAW,CAAA,EAAG,CAAA,MAAA,EAAS,QAAA,IAAY,KAAK,CAAA,CAAE,CAAA;AAEjE,IAAA,MAAM,WAAW,MAAM,IAAA,CAAK,eAAA,CAAiB,KAAA,CAAM,eAAe,MAAA,CAAO;AAAA,MACvE,KAAA,EAAO,IAAA,CAAK,cAAA,EAAgB,IAAA,IAAQ,WAAA;AAAA,MACpC,IAAA;AAAA,MACA,GAAG;AAAA,KACJ,CAAA;AAED,IAAA,OAAO,QAAA,CAAS,IAAA;AAAA,EAClB;AACF","file":"index.js","sourcesContent":["import { PassThrough } from 'stream';\n\nimport { MastraVoice } from '@mastra/core/voice';\nimport OpenAI from 'openai';\nimport type { ClientOptions } from 'openai';\n\ntype OpenAIVoiceId = 'alloy' | 'echo' | 'fable' | 'onyx' | 'nova' | 'shimmer' | 'ash' | 'coral' | 'sage';\ntype OpenAIModel = 'tts-1' | 'tts-1-hd' | 'whisper-1';\n\nexport interface OpenAIConfig {\n name?: OpenAIModel;\n apiKey?: string;\n options?: Omit<ClientOptions, 'apiKey'>;\n}\n\nexport interface OpenAIVoiceConfig {\n speech?: {\n model: 'tts-1' | 'tts-1-hd';\n apiKey?: string;\n speaker?: OpenAIVoiceId;\n options?: Omit<ClientOptions, 'apiKey'>;\n };\n listening?: {\n model: 'whisper-1';\n apiKey?: string;\n options?: Omit<ClientOptions, 'apiKey'>;\n };\n}\n\nexport class OpenAIVoice extends MastraVoice {\n speechClient?: OpenAI;\n listeningClient?: OpenAI;\n\n /**\n * Constructs an instance of OpenAIVoice with optional configurations for speech and listening models.\n *\n * @param {Object} [config] - Configuration options for the OpenAIVoice instance.\n * @param {OpenAIConfig} [config.listeningModel] - Configuration for the listening model, including model name and API key.\n * @param {OpenAIConfig} [config.speechModel] - Configuration for the speech model, including model name and API key.\n * @param {string} [config.speaker] - The default speaker's voice to use for speech synthesis.\n * @throws {Error} - Throws an error if no API key is provided for either the speech or listening model.\n */\n constructor({\n listeningModel,\n speechModel,\n speaker,\n }: {\n listeningModel?: OpenAIConfig;\n speechModel?: OpenAIConfig;\n speaker?: string;\n } = {}) {\n const defaultApiKey = process.env.OPENAI_API_KEY;\n const defaultSpeechModel = {\n name: 'tts-1',\n apiKey: defaultApiKey,\n };\n const defaultListeningModel = {\n name: 'whisper-1',\n apiKey: defaultApiKey,\n };\n\n super({\n speechModel: {\n name: speechModel?.name ?? defaultSpeechModel.name,\n apiKey: speechModel?.apiKey ?? defaultSpeechModel.apiKey,\n },\n listeningModel: {\n name: listeningModel?.name ?? defaultListeningModel.name,\n apiKey: listeningModel?.apiKey ?? defaultListeningModel.apiKey,\n },\n speaker: speaker ?? 'alloy',\n });\n\n const speechApiKey = speechModel?.apiKey || defaultApiKey;\n if (!speechApiKey) {\n throw new Error('No API key provided for speech model');\n }\n this.speechClient = new OpenAI({\n apiKey: speechApiKey,\n ...speechModel?.options,\n });\n\n const listeningApiKey = listeningModel?.apiKey || defaultApiKey;\n if (!listeningApiKey) {\n throw new Error('No API key provided for listening model');\n }\n this.listeningClient = new OpenAI({\n apiKey: listeningApiKey,\n ...listeningModel?.options,\n });\n\n if (!this.speechClient && !this.listeningClient) {\n throw new Error('At least one of OPENAI_API_KEY, speechModel.apiKey, or listeningModel.apiKey must be set');\n }\n }\n\n /**\n * Retrieves a list of available speakers for the speech model.\n *\n * @returns {Promise<Array<{ voiceId: OpenAIVoiceId }>>} - A promise that resolves to an array of objects,\n * each containing a `voiceId` representing an available speaker.\n * @throws {Error} - Throws an error if the speech model is not configured.\n */\n async getSpeakers(): Promise<Array<{ voiceId: OpenAIVoiceId }>> {\n if (!this.speechModel) {\n throw new Error('Speech model not configured');\n }\n\n return [\n { voiceId: 'alloy' },\n { voiceId: 'echo' },\n { voiceId: 'fable' },\n { voiceId: 'onyx' },\n { voiceId: 'nova' },\n { voiceId: 'shimmer' },\n { voiceId: 'ash' },\n { voiceId: 'coral' },\n { voiceId: 'sage' },\n ];\n }\n\n /**\n * Converts text or audio input into speech using the configured speech model.\n *\n * @param {string | NodeJS.ReadableStream} input - The text or audio stream to be converted into speech.\n * @param {Object} [options] - Optional parameters for the speech synthesis.\n * @param {string} [options.speaker] - The speaker's voice to use for the speech synthesis.\n * @param {number} [options.speed] - The speed at which the speech should be synthesized.\n * @returns {Promise<NodeJS.ReadableStream>} - A promise that resolves to a readable stream of the synthesized audio.\n * @throws {Error} - Throws an error if the speech model is not configured or if the input text is empty.\n */\n async speak(\n input: string | NodeJS.ReadableStream,\n options?: {\n speaker?: string;\n speed?: number;\n [key: string]: any;\n },\n ): Promise<NodeJS.ReadableStream> {\n if (!this.speechClient) {\n throw new Error('Speech model not configured');\n }\n\n if (typeof input !== 'string') {\n const chunks: Buffer[] = [];\n for await (const chunk of input) {\n if (typeof chunk === 'string') {\n chunks.push(Buffer.from(chunk));\n } else {\n chunks.push(chunk);\n }\n }\n input = Buffer.concat(chunks).toString('utf-8');\n }\n\n if (input.trim().length === 0) {\n throw new Error('Input text is empty');\n }\n\n const { speaker, responseFormat, speed, ...otherOptions } = options || {};\n\n const response = await this.speechClient!.audio.speech.create({\n model: this.speechModel?.name ?? 'tts-1',\n voice: (speaker ?? this.speaker) as OpenAIVoiceId,\n response_format: responseFormat ?? 'mp3',\n input,\n speed: speed || 1.0,\n ...otherOptions,\n });\n\n const passThrough = new PassThrough();\n const buffer = Buffer.from(await response.arrayBuffer());\n passThrough.end(buffer);\n return passThrough;\n }\n\n /**\n * Checks if listening capabilities are enabled.\n *\n * @returns {Promise<{ enabled: boolean }>}\n */\n async getListener() {\n if (!this.listeningClient) {\n return { enabled: false };\n }\n return { enabled: true };\n }\n\n /**\n * Transcribes audio from a given stream using the configured listening model.\n *\n * @param {NodeJS.ReadableStream} audioStream - The audio stream to be transcribed.\n * @param {Object} [options] - Optional parameters for the transcription.\n * @param {string} [options.filetype] - The file type of the audio stream.\n * Supported types include 'mp3', 'mp4', 'mpeg', 'mpga', 'm4a', 'wav', 'webm'.\n * @returns {Promise<string>} - A promise that resolves to the transcribed text.\n * @throws {Error} - Throws an error if the listening model is not configured.\n */\n async listen(\n audioStream: NodeJS.ReadableStream,\n options?: {\n filetype?: 'mp3' | 'mp4' | 'mpeg' | 'mpga' | 'm4a' | 'wav' | 'webm';\n [key: string]: any;\n },\n ): Promise<string> {\n if (!this.listeningClient) {\n throw new Error('Listening model not configured');\n }\n\n const chunks: Buffer[] = [];\n for await (const chunk of audioStream) {\n if (typeof chunk === 'string') {\n chunks.push(Buffer.from(chunk));\n } else {\n chunks.push(chunk);\n }\n }\n const audioBuffer = Buffer.concat(chunks);\n\n const { filetype, ...otherOptions } = options || {};\n const file = new File([audioBuffer], `audio.${filetype || 'mp3'}`);\n\n const response = await this.listeningClient!.audio.transcriptions.create({\n model: this.listeningModel?.name || 'whisper-1',\n file: file as any,\n ...otherOptions,\n });\n\n return response.text;\n }\n}\n"]}
1
+ {"version":3,"sources":["../src/index.ts"],"names":[],"mappings":";;;;;AA6BO,IAAM,WAAA,GAAN,cAA0B,WAAA,CAAY;AAAA,EAC3C,YAAA;AAAA,EACA,eAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAWA,WAAA,CAAY;AAAA,IACV,cAAA;AAAA,IACA,WAAA;AAAA,IACA;AAAA,GACF,GAII,EAAC,EAAG;AACN,IAAA,MAAM,aAAA,GAAgB,QAAQ,GAAA,CAAI,cAAA;AAClC,IAAA,MAAM,kBAAA,GAAqB;AAAA,MACzB,IAAA,EAAM,OAAA;AAAA,MACN,MAAA,EAAQ;AAAA,KACV;AACA,IAAA,MAAM,qBAAA,GAAwB;AAAA,MAC5B,IAAA,EAAM,WAAA;AAAA,MACN,MAAA,EAAQ;AAAA,KACV;AAEA,IAAA,KAAA,CAAM;AAAA,MACJ,WAAA,EAAa;AAAA,QACX,IAAA,EAAM,WAAA,EAAa,IAAA,IAAQ,kBAAA,CAAmB,IAAA;AAAA,QAC9C,MAAA,EAAQ,WAAA,EAAa,MAAA,IAAU,kBAAA,CAAmB;AAAA,OACpD;AAAA,MACA,cAAA,EAAgB;AAAA,QACd,IAAA,EAAM,cAAA,EAAgB,IAAA,IAAQ,qBAAA,CAAsB,IAAA;AAAA,QACpD,MAAA,EAAQ,cAAA,EAAgB,MAAA,IAAU,qBAAA,CAAsB;AAAA,OAC1D;AAAA,MACA,SAAS,OAAA,IAAW;AAAA,KACrB,CAAA;AAED,IAAA,MAAM,YAAA,GAAe,aAAa,MAAA,IAAU,aAAA;AAC5C,IAAA,IAAI,CAAC,YAAA,EAAc;AACjB,MAAA,MAAM,IAAI,MAAM,sCAAsC,CAAA;AAAA,IACxD;AACA,IAAA,IAAA,CAAK,YAAA,GAAe,IAAI,MAAA,CAAO;AAAA,MAC7B,MAAA,EAAQ,YAAA;AAAA,MACR,GAAG,WAAA,EAAa;AAAA,KACjB,CAAA;AAED,IAAA,MAAM,eAAA,GAAkB,gBAAgB,MAAA,IAAU,aAAA;AAClD,IAAA,IAAI,CAAC,eAAA,EAAiB;AACpB,MAAA,MAAM,IAAI,MAAM,yCAAyC,CAAA;AAAA,IAC3D;AACA,IAAA,IAAA,CAAK,eAAA,GAAkB,IAAI,MAAA,CAAO;AAAA,MAChC,MAAA,EAAQ,eAAA;AAAA,MACR,GAAG,cAAA,EAAgB;AAAA,KACpB,CAAA;AAED,IAAA,IAAI,CAAC,IAAA,CAAK,YAAA,IAAgB,CAAC,KAAK,eAAA,EAAiB;AAC/C,MAAA,MAAM,IAAI,MAAM,0FAA0F,CAAA;AAAA,IAC5G;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EASA,MAAM,WAAA,GAA0D;AAC9D,IAAA,IAAI,CAAC,KAAK,WAAA,EAAa;AACrB,MAAA,MAAM,IAAI,MAAM,6BAA6B,CAAA;AAAA,IAC/C;AAEA,IAAA,OAAO;AAAA,MACL,EAAE,SAAS,OAAA,EAAQ;AAAA,MACnB,EAAE,SAAS,MAAA,EAAO;AAAA,MAClB,EAAE,SAAS,OAAA,EAAQ;AAAA,MACnB,EAAE,SAAS,MAAA,EAAO;AAAA,MAClB,EAAE,SAAS,MAAA,EAAO;AAAA,MAClB,EAAE,SAAS,SAAA,EAAU;AAAA,MACrB,EAAE,SAAS,KAAA,EAAM;AAAA,MACjB,EAAE,SAAS,OAAA,EAAQ;AAAA,MACnB,EAAE,SAAS,MAAA;AAAO,KACpB;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAYA,MAAM,KAAA,CACJ,KAAA,EACA,OAAA,EAKgC;AAChC,IAAA,IAAI,CAAC,KAAK,YAAA,EAAc;AACtB,MAAA,MAAM,IAAI,MAAM,6BAA6B,CAAA;AAAA,IAC/C;AAEA,IAAA,IAAI,OAAO,UAAU,QAAA,EAAU;AAC7B,MAAA,MAAM,SAAmB,EAAC;AAC1B,MAAA,WAAA,MAAiB,SAAS,KAAA,EAAO;AAC/B,QAAA,IAAI,OAAO,UAAU,QAAA,EAAU;AAC7B,UAAA,MAAA,CAAO,IAAA,CAAK,MAAA,CAAO,IAAA,CAAK,KAAK,CAAC,CAAA;AAAA,QAChC,CAAA,MAAO;AACL,UAAA,MAAA,CAAO,KAAK,KAAK,CAAA;AAAA,QACnB;AAAA,MACF;AACA,MAAA,KAAA,GAAQ,MAAA,CAAO,MAAA,CAAO,MAAM,CAAA,CAAE,SAAS,OAAO,CAAA;AAAA,IAChD;AAEA,IAAA,IAAI,KAAA,CAAM,IAAA,EAAK,CAAE,MAAA,KAAW,CAAA,EAAG;AAC7B,MAAA,MAAM,IAAI,MAAM,qBAAqB,CAAA;AAAA,IACvC;AAEA,IAAA,MAAM,EAAE,SAAS,cAAA,EAAgB,KAAA,EAAO,GAAG,YAAA,EAAa,GAAI,WAAW,EAAC;AAExE,IAAA,MAAM,WAAW,MAAM,IAAA,CAAK,YAAA,CAAc,KAAA,CAAM,OAAO,MAAA,CAAO;AAAA,MAC5D,KAAA,EAAO,IAAA,CAAK,WAAA,EAAa,IAAA,IAAQ,OAAA;AAAA,MACjC,KAAA,EAAQ,WAAW,IAAA,CAAK,OAAA;AAAA,MACxB,iBAAiB,cAAA,IAAkB,KAAA;AAAA,MACnC,KAAA;AAAA,MACA,OAAO,KAAA,IAAS,CAAA;AAAA,MAChB,GAAG;AAAA,KACJ,CAAA;AAED,IAAA,MAAM,WAAA,GAAc,IAAI,WAAA,EAAY;AACpC,IAAA,MAAM,SAAS,MAAA,CAAO,IAAA,CAAK,MAAM,QAAA,CAAS,aAAa,CAAA;AACvD,IAAA,WAAA,CAAY,IAAI,MAAM,CAAA;AACtB,IAAA,OAAO,WAAA;AAAA,EACT;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAOA,MAAM,WAAA,GAAc;AAClB,IAAA,IAAI,CAAC,KAAK,eAAA,EAAiB;AACzB,MAAA,OAAO,EAAE,SAAS,KAAA,EAAM;AAAA,IAC1B;AACA,IAAA,OAAO,EAAE,SAAS,IAAA,EAAK;AAAA,EACzB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAYA,MAAM,MAAA,CACJ,WAAA,EACA,OAAA,EAIiB;AACjB,IAAA,IAAI,CAAC,KAAK,eAAA,EAAiB;AACzB,MAAA,MAAM,IAAI,MAAM,gCAAgC,CAAA;AAAA,IAClD;AAEA,IAAA,MAAM,SAAmB,EAAC;AAC1B,IAAA,WAAA,MAAiB,SAAS,WAAA,EAAa;AACrC,MAAA,IAAI,OAAO,UAAU,QAAA,EAAU;AAC7B,QAAA,MAAA,CAAO,IAAA,CAAK,MAAA,CAAO,IAAA,CAAK,KAAK,CAAC,CAAA;AAAA,MAChC,CAAA,MAAO;AACL,QAAA,MAAA,CAAO,KAAK,KAAK,CAAA;AAAA,MACnB;AAAA,IACF;AACA,IAAA,MAAM,WAAA,GAAc,MAAA,CAAO,MAAA,CAAO,MAAM,CAAA;AAExC,IAAA,MAAM,EAAE,QAAA,EAAU,GAAG,YAAA,EAAa,GAAI,WAAW,EAAC;AAClD,IAAA,MAAM,IAAA,GAAO,IAAI,IAAA,CAAK,CAAC,WAAW,CAAA,EAAG,CAAA,MAAA,EAAS,QAAA,IAAY,KAAK,CAAA,CAAE,CAAA;AAEjE,IAAA,MAAM,WAAW,MAAM,IAAA,CAAK,eAAA,CAAiB,KAAA,CAAM,eAAe,MAAA,CAAO;AAAA,MACvE,KAAA,EAAO,IAAA,CAAK,cAAA,EAAgB,IAAA,IAAQ,WAAA;AAAA,MACpC,IAAA;AAAA,MACA,GAAG;AAAA,KACJ,CAAA;AAED,IAAA,OAAO,QAAA,CAAS,IAAA;AAAA,EAClB;AACF","file":"index.js","sourcesContent":["import { PassThrough } from 'node:stream';\n\nimport { MastraVoice } from '@mastra/core/voice';\nimport OpenAI from 'openai';\nimport type { ClientOptions } from 'openai';\n\ntype OpenAIVoiceId = 'alloy' | 'echo' | 'fable' | 'onyx' | 'nova' | 'shimmer' | 'ash' | 'coral' | 'sage';\ntype OpenAIModel = 'tts-1' | 'tts-1-hd' | 'whisper-1';\n\nexport interface OpenAIConfig {\n name?: OpenAIModel;\n apiKey?: string;\n options?: Omit<ClientOptions, 'apiKey'>;\n}\n\nexport interface OpenAIVoiceConfig {\n speech?: {\n model: 'tts-1' | 'tts-1-hd';\n apiKey?: string;\n speaker?: OpenAIVoiceId;\n options?: Omit<ClientOptions, 'apiKey'>;\n };\n listening?: {\n model: 'whisper-1';\n apiKey?: string;\n options?: Omit<ClientOptions, 'apiKey'>;\n };\n}\n\nexport class OpenAIVoice extends MastraVoice {\n speechClient?: OpenAI;\n listeningClient?: OpenAI;\n\n /**\n * Constructs an instance of OpenAIVoice with optional configurations for speech and listening models.\n *\n * @param {Object} [config] - Configuration options for the OpenAIVoice instance.\n * @param {OpenAIConfig} [config.listeningModel] - Configuration for the listening model, including model name and API key.\n * @param {OpenAIConfig} [config.speechModel] - Configuration for the speech model, including model name and API key.\n * @param {string} [config.speaker] - The default speaker's voice to use for speech synthesis.\n * @throws {Error} - Throws an error if no API key is provided for either the speech or listening model.\n */\n constructor({\n listeningModel,\n speechModel,\n speaker,\n }: {\n listeningModel?: OpenAIConfig;\n speechModel?: OpenAIConfig;\n speaker?: string;\n } = {}) {\n const defaultApiKey = process.env.OPENAI_API_KEY;\n const defaultSpeechModel = {\n name: 'tts-1',\n apiKey: defaultApiKey,\n };\n const defaultListeningModel = {\n name: 'whisper-1',\n apiKey: defaultApiKey,\n };\n\n super({\n speechModel: {\n name: speechModel?.name ?? defaultSpeechModel.name,\n apiKey: speechModel?.apiKey ?? defaultSpeechModel.apiKey,\n },\n listeningModel: {\n name: listeningModel?.name ?? defaultListeningModel.name,\n apiKey: listeningModel?.apiKey ?? defaultListeningModel.apiKey,\n },\n speaker: speaker ?? 'alloy',\n });\n\n const speechApiKey = speechModel?.apiKey || defaultApiKey;\n if (!speechApiKey) {\n throw new Error('No API key provided for speech model');\n }\n this.speechClient = new OpenAI({\n apiKey: speechApiKey,\n ...speechModel?.options,\n });\n\n const listeningApiKey = listeningModel?.apiKey || defaultApiKey;\n if (!listeningApiKey) {\n throw new Error('No API key provided for listening model');\n }\n this.listeningClient = new OpenAI({\n apiKey: listeningApiKey,\n ...listeningModel?.options,\n });\n\n if (!this.speechClient && !this.listeningClient) {\n throw new Error('At least one of OPENAI_API_KEY, speechModel.apiKey, or listeningModel.apiKey must be set');\n }\n }\n\n /**\n * Retrieves a list of available speakers for the speech model.\n *\n * @returns {Promise<Array<{ voiceId: OpenAIVoiceId }>>} - A promise that resolves to an array of objects,\n * each containing a `voiceId` representing an available speaker.\n * @throws {Error} - Throws an error if the speech model is not configured.\n */\n async getSpeakers(): Promise<Array<{ voiceId: OpenAIVoiceId }>> {\n if (!this.speechModel) {\n throw new Error('Speech model not configured');\n }\n\n return [\n { voiceId: 'alloy' },\n { voiceId: 'echo' },\n { voiceId: 'fable' },\n { voiceId: 'onyx' },\n { voiceId: 'nova' },\n { voiceId: 'shimmer' },\n { voiceId: 'ash' },\n { voiceId: 'coral' },\n { voiceId: 'sage' },\n ];\n }\n\n /**\n * Converts text or audio input into speech using the configured speech model.\n *\n * @param {string | NodeJS.ReadableStream} input - The text or audio stream to be converted into speech.\n * @param {Object} [options] - Optional parameters for the speech synthesis.\n * @param {string} [options.speaker] - The speaker's voice to use for the speech synthesis.\n * @param {number} [options.speed] - The speed at which the speech should be synthesized.\n * @returns {Promise<NodeJS.ReadableStream>} - A promise that resolves to a readable stream of the synthesized audio.\n * @throws {Error} - Throws an error if the speech model is not configured or if the input text is empty.\n */\n async speak(\n input: string | NodeJS.ReadableStream,\n options?: {\n speaker?: string;\n speed?: number;\n [key: string]: any;\n },\n ): Promise<NodeJS.ReadableStream> {\n if (!this.speechClient) {\n throw new Error('Speech model not configured');\n }\n\n if (typeof input !== 'string') {\n const chunks: Buffer[] = [];\n for await (const chunk of input) {\n if (typeof chunk === 'string') {\n chunks.push(Buffer.from(chunk));\n } else {\n chunks.push(chunk);\n }\n }\n input = Buffer.concat(chunks).toString('utf-8');\n }\n\n if (input.trim().length === 0) {\n throw new Error('Input text is empty');\n }\n\n const { speaker, responseFormat, speed, ...otherOptions } = options || {};\n\n const response = await this.speechClient!.audio.speech.create({\n model: this.speechModel?.name ?? 'tts-1',\n voice: (speaker ?? this.speaker) as OpenAIVoiceId,\n response_format: responseFormat ?? 'mp3',\n input,\n speed: speed || 1.0,\n ...otherOptions,\n });\n\n const passThrough = new PassThrough();\n const buffer = Buffer.from(await response.arrayBuffer());\n passThrough.end(buffer);\n return passThrough;\n }\n\n /**\n * Checks if listening capabilities are enabled.\n *\n * @returns {Promise<{ enabled: boolean }>}\n */\n async getListener() {\n if (!this.listeningClient) {\n return { enabled: false };\n }\n return { enabled: true };\n }\n\n /**\n * Transcribes audio from a given stream using the configured listening model.\n *\n * @param {NodeJS.ReadableStream} audioStream - The audio stream to be transcribed.\n * @param {Object} [options] - Optional parameters for the transcription.\n * @param {string} [options.filetype] - The file type of the audio stream.\n * Supported types include 'mp3', 'mp4', 'mpeg', 'mpga', 'm4a', 'wav', 'webm'.\n * @returns {Promise<string>} - A promise that resolves to the transcribed text.\n * @throws {Error} - Throws an error if the listening model is not configured.\n */\n async listen(\n audioStream: NodeJS.ReadableStream,\n options?: {\n filetype?: 'mp3' | 'mp4' | 'mpeg' | 'mpga' | 'm4a' | 'wav' | 'webm';\n [key: string]: any;\n },\n ): Promise<string> {\n if (!this.listeningClient) {\n throw new Error('Listening model not configured');\n }\n\n const chunks: Buffer[] = [];\n for await (const chunk of audioStream) {\n if (typeof chunk === 'string') {\n chunks.push(Buffer.from(chunk));\n } else {\n chunks.push(chunk);\n }\n }\n const audioBuffer = Buffer.concat(chunks);\n\n const { filetype, ...otherOptions } = options || {};\n const file = new File([audioBuffer], `audio.${filetype || 'mp3'}`);\n\n const response = await this.listeningClient!.audio.transcriptions.create({\n model: this.listeningModel?.name || 'whisper-1',\n file: file as any,\n ...otherOptions,\n });\n\n return response.text;\n }\n}\n"]}