@mastra/voice-openai 0.12.0-beta.1 → 0.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +46 -0
- package/dist/docs/README.md +32 -0
- package/dist/docs/SKILL.md +33 -0
- package/dist/docs/SOURCE_MAP.json +6 -0
- package/dist/docs/agents/01-adding-voice.md +352 -0
- package/dist/docs/voice/01-overview.md +1019 -0
- package/dist/docs/voice/02-text-to-speech.md +87 -0
- package/dist/docs/voice/03-speech-to-text.md +83 -0
- package/dist/docs/voice/04-reference.md +568 -0
- package/dist/index.cjs.map +1 -1
- package/dist/index.js.map +1 -1
- package/package.json +9 -9
|
@@ -0,0 +1,568 @@
|
|
|
1
|
+
# Voice API Reference
|
|
2
|
+
|
|
3
|
+
> API reference for voice - 5 entries
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
---
|
|
7
|
+
|
|
8
|
+
## Reference: CompositeVoice
|
|
9
|
+
|
|
10
|
+
> Documentation for the CompositeVoice class, which enables combining multiple voice providers for flexible text-to-speech and speech-to-text operations.
|
|
11
|
+
|
|
12
|
+
The CompositeVoice class allows you to combine different voice providers for text-to-speech and speech-to-text operations. This is particularly useful when you want to use the best provider for each operation - for example, using OpenAI for speech-to-text and PlayAI for text-to-speech.
|
|
13
|
+
|
|
14
|
+
CompositeVoice supports both Mastra voice providers and AI SDK model providers
|
|
15
|
+
|
|
16
|
+
## Constructor Parameters
|
|
17
|
+
|
|
18
|
+
## Methods
|
|
19
|
+
|
|
20
|
+
### speak()
|
|
21
|
+
|
|
22
|
+
Converts text to speech using the configured speaking provider.
|
|
23
|
+
|
|
24
|
+
Notes:
|
|
25
|
+
|
|
26
|
+
- If no speaking provider is configured, this method will throw an error
|
|
27
|
+
- Options are passed through to the configured speaking provider
|
|
28
|
+
- Returns a stream of audio data
|
|
29
|
+
|
|
30
|
+
### listen()
|
|
31
|
+
|
|
32
|
+
Converts speech to text using the configured listening provider.
|
|
33
|
+
|
|
34
|
+
Notes:
|
|
35
|
+
|
|
36
|
+
- If no listening provider is configured, this method will throw an error
|
|
37
|
+
- Options are passed through to the configured listening provider
|
|
38
|
+
- Returns either a string or a stream of transcribed text, depending on the provider
|
|
39
|
+
|
|
40
|
+
### getSpeakers()
|
|
41
|
+
|
|
42
|
+
Returns a list of available voices from the speaking provider, where each node contains:
|
|
43
|
+
|
|
44
|
+
Notes:
|
|
45
|
+
|
|
46
|
+
- Returns voices from the speaking provider only
|
|
47
|
+
- If no speaking provider is configured, returns an empty array
|
|
48
|
+
- Each voice object will have at least a voiceId property
|
|
49
|
+
- Additional voice properties depend on the speaking provider
|
|
50
|
+
|
|
51
|
+
## Usage Examples
|
|
52
|
+
|
|
53
|
+
### Using Mastra Voice Providers
|
|
54
|
+
|
|
55
|
+
```typescript
|
|
56
|
+
import { CompositeVoice } from "@mastra/core/voice";
|
|
57
|
+
import { OpenAIVoice } from "@mastra/voice-openai";
|
|
58
|
+
import { PlayAIVoice } from "@mastra/voice-playai";
|
|
59
|
+
|
|
60
|
+
// Create voice providers
|
|
61
|
+
const openai = new OpenAIVoice();
|
|
62
|
+
const playai = new PlayAIVoice();
|
|
63
|
+
|
|
64
|
+
// Use OpenAI for listening (speech-to-text) and PlayAI for speaking (text-to-speech)
|
|
65
|
+
const voice = new CompositeVoice({
|
|
66
|
+
input: openai,
|
|
67
|
+
output: playai,
|
|
68
|
+
});
|
|
69
|
+
|
|
70
|
+
// Convert speech to text using OpenAI
|
|
71
|
+
const text = await voice.listen(audioStream);
|
|
72
|
+
|
|
73
|
+
// Convert text to speech using PlayAI
|
|
74
|
+
const audio = await voice.speak("Hello, world!");
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
### Using AI SDK Model Providers
|
|
78
|
+
|
|
79
|
+
You can pass AI SDK transcription and speech models directly to CompositeVoice:
|
|
80
|
+
|
|
81
|
+
```typescript
|
|
82
|
+
import { CompositeVoice } from "@mastra/core/voice";
|
|
83
|
+
import { openai } from "@ai-sdk/openai";
|
|
84
|
+
import { elevenlabs } from "@ai-sdk/elevenlabs";
|
|
85
|
+
|
|
86
|
+
// Use AI SDK models directly - they will be auto-wrapped
|
|
87
|
+
const voice = new CompositeVoice({
|
|
88
|
+
input: openai.transcription('whisper-1'), // AI SDK transcription
|
|
89
|
+
output: elevenlabs.speech('eleven_turbo_v2'), // AI SDK speech
|
|
90
|
+
});
|
|
91
|
+
|
|
92
|
+
// Works the same way as with Mastra providers
|
|
93
|
+
const text = await voice.listen(audioStream);
|
|
94
|
+
const audio = await voice.speak("Hello from AI SDK!");
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
### Mix and Match
|
|
98
|
+
|
|
99
|
+
You can combine Mastra providers with AI SDK models:
|
|
100
|
+
|
|
101
|
+
```typescript
|
|
102
|
+
import { CompositeVoice } from "@mastra/core/voice";
|
|
103
|
+
import { PlayAIVoice } from "@mastra/voice-playai";
|
|
104
|
+
import { groq } from "@ai-sdk/groq";
|
|
105
|
+
|
|
106
|
+
const voice = new CompositeVoice({
|
|
107
|
+
input: groq.transcription('whisper-large-v3'), // AI SDK for STT
|
|
108
|
+
output: new PlayAIVoice(), // Mastra for TTS
|
|
109
|
+
});
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
---
|
|
113
|
+
|
|
114
|
+
## Reference: OpenAI
|
|
115
|
+
|
|
116
|
+
> Documentation for the OpenAIVoice class, providing text-to-speech and speech-to-text capabilities.
|
|
117
|
+
|
|
118
|
+
The OpenAIVoice class in Mastra provides text-to-speech and speech-to-text capabilities using OpenAI's models.
|
|
119
|
+
|
|
120
|
+
## Usage Example
|
|
121
|
+
|
|
122
|
+
```typescript
|
|
123
|
+
import { OpenAIVoice } from "@mastra/voice-openai";
|
|
124
|
+
|
|
125
|
+
// Initialize with default configuration using environment variables
|
|
126
|
+
const voice = new OpenAIVoice();
|
|
127
|
+
|
|
128
|
+
// Or initialize with specific configuration
|
|
129
|
+
const voiceWithConfig = new OpenAIVoice({
|
|
130
|
+
speechModel: {
|
|
131
|
+
name: "tts-1-hd",
|
|
132
|
+
apiKey: "your-openai-api-key",
|
|
133
|
+
},
|
|
134
|
+
listeningModel: {
|
|
135
|
+
name: "whisper-1",
|
|
136
|
+
apiKey: "your-openai-api-key",
|
|
137
|
+
},
|
|
138
|
+
speaker: "alloy", // Default voice
|
|
139
|
+
});
|
|
140
|
+
|
|
141
|
+
// Convert text to speech
|
|
142
|
+
const audioStream = await voice.speak("Hello, how can I help you?", {
|
|
143
|
+
speaker: "nova", // Override default voice
|
|
144
|
+
speed: 1.2, // Adjust speech speed
|
|
145
|
+
});
|
|
146
|
+
|
|
147
|
+
// Convert speech to text
|
|
148
|
+
const text = await voice.listen(audioStream, {
|
|
149
|
+
filetype: "mp3",
|
|
150
|
+
});
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
## Configuration
|
|
154
|
+
|
|
155
|
+
### Constructor Options
|
|
156
|
+
|
|
157
|
+
### OpenAIConfig
|
|
158
|
+
|
|
159
|
+
## Methods
|
|
160
|
+
|
|
161
|
+
### speak()
|
|
162
|
+
|
|
163
|
+
Converts text to speech using OpenAI's text-to-speech models.
|
|
164
|
+
|
|
165
|
+
Returns: `Promise<NodeJS.ReadableStream>`
|
|
166
|
+
|
|
167
|
+
### listen()
|
|
168
|
+
|
|
169
|
+
Transcribes audio using OpenAI's Whisper model.
|
|
170
|
+
|
|
171
|
+
Returns: `Promise<string>`
|
|
172
|
+
|
|
173
|
+
### getSpeakers()
|
|
174
|
+
|
|
175
|
+
Returns an array of available voice options, where each node contains:
|
|
176
|
+
|
|
177
|
+
## Notes
|
|
178
|
+
|
|
179
|
+
- API keys can be provided via constructor options or the `OPENAI_API_KEY` environment variable
|
|
180
|
+
- The `tts-1-hd` model provides higher quality audio but may have slower processing times
|
|
181
|
+
- Speech recognition supports multiple audio formats including mp3, wav, and webm
|
|
182
|
+
|
|
183
|
+
---
|
|
184
|
+
|
|
185
|
+
## Reference: voice.getSpeakers()
|
|
186
|
+
|
|
187
|
+
> Documentation for the getSpeakers() method available in voice providers, which retrieves available voice options.
|
|
188
|
+
|
|
189
|
+
The `getSpeakers()` method retrieves a list of available voice options (speakers) from the voice provider. This allows applications to present users with voice choices or programmatically select the most appropriate voice for different contexts.
|
|
190
|
+
|
|
191
|
+
## Usage Example
|
|
192
|
+
|
|
193
|
+
```typescript
|
|
194
|
+
import { OpenAIVoice } from "@mastra/voice-openai";
|
|
195
|
+
import { ElevenLabsVoice } from "@mastra/voice-elevenlabs";
|
|
196
|
+
|
|
197
|
+
// Initialize voice providers
|
|
198
|
+
const openaiVoice = new OpenAIVoice();
|
|
199
|
+
const elevenLabsVoice = new ElevenLabsVoice({
|
|
200
|
+
apiKey: process.env.ELEVENLABS_API_KEY,
|
|
201
|
+
});
|
|
202
|
+
|
|
203
|
+
// Get available speakers from OpenAI
|
|
204
|
+
const openaiSpeakers = await openaiVoice.getSpeakers();
|
|
205
|
+
console.log("OpenAI voices:", openaiSpeakers);
|
|
206
|
+
// Example output: [{ voiceId: "alloy" }, { voiceId: "echo" }, { voiceId: "fable" }, ...]
|
|
207
|
+
|
|
208
|
+
// Get available speakers from ElevenLabs
|
|
209
|
+
const elevenLabsSpeakers = await elevenLabsVoice.getSpeakers();
|
|
210
|
+
console.log("ElevenLabs voices:", elevenLabsSpeakers);
|
|
211
|
+
// Example output: [{ voiceId: "21m00Tcm4TlvDq8ikWAM", name: "Rachel" }, ...]
|
|
212
|
+
|
|
213
|
+
// Use a specific voice for speech
|
|
214
|
+
const text = "Hello, this is a test of different voices.";
|
|
215
|
+
await openaiVoice.speak(text, { speaker: openaiSpeakers[2].voiceId });
|
|
216
|
+
await elevenLabsVoice.speak(text, { speaker: elevenLabsSpeakers[0].voiceId });
|
|
217
|
+
```
|
|
218
|
+
|
|
219
|
+
## Parameters
|
|
220
|
+
|
|
221
|
+
This method does not accept any parameters.
|
|
222
|
+
|
|
223
|
+
## Return Value
|
|
224
|
+
|
|
225
|
+
## Provider-Specific Metadata
|
|
226
|
+
|
|
227
|
+
Different voice providers return different metadata for their voices:
|
|
228
|
+
|
|
229
|
+
**OpenAI:**
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
**OpenAI Realtime:**
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
**Deepgram:**
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
**ElevenLabs:**
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
**Google:**
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
**Azure:**
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
|
|
254
|
+
**Murf:**
|
|
255
|
+
|
|
256
|
+
|
|
257
|
+
|
|
258
|
+
**PlayAI:**
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
|
|
262
|
+
**Speechify:**
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
**Sarvam:**
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
## Notes
|
|
272
|
+
|
|
273
|
+
- The available voices vary significantly between providers
|
|
274
|
+
- Some providers may require authentication to retrieve the full list of voices
|
|
275
|
+
- The default implementation returns an empty array if the provider doesn't support this method
|
|
276
|
+
- For performance reasons, consider caching the results if you need to display the list frequently
|
|
277
|
+
- The `voiceId` property is guaranteed to be present for all providers, but additional metadata varies
|
|
278
|
+
|
|
279
|
+
---
|
|
280
|
+
|
|
281
|
+
## Reference: voice.listen()
|
|
282
|
+
|
|
283
|
+
> Documentation for the listen() method available in all Mastra voice providers, which converts speech to text.
|
|
284
|
+
|
|
285
|
+
The `listen()` method is a core function available in all Mastra voice providers that converts speech to text. It takes an audio stream as input and returns the transcribed text.
|
|
286
|
+
|
|
287
|
+
## Parameters
|
|
288
|
+
|
|
289
|
+
## Return Value
|
|
290
|
+
|
|
291
|
+
Returns one of the following:
|
|
292
|
+
|
|
293
|
+
- `Promise<string>`: A promise that resolves to the transcribed text
|
|
294
|
+
- `Promise<NodeJS.ReadableStream>`: A promise that resolves to a stream of transcribed text (for streaming transcription)
|
|
295
|
+
- `Promise<void>`: For real-time providers that emit 'writing' events instead of returning text directly
|
|
296
|
+
|
|
297
|
+
## Provider-Specific Options
|
|
298
|
+
|
|
299
|
+
Each voice provider may support additional options specific to their implementation. Here are some examples:
|
|
300
|
+
|
|
301
|
+
### OpenAI
|
|
302
|
+
|
|
303
|
+
### Google
|
|
304
|
+
|
|
305
|
+
### Deepgram
|
|
306
|
+
|
|
307
|
+
## Usage Example
|
|
308
|
+
|
|
309
|
+
```typescript
|
|
310
|
+
import { OpenAIVoice } from "@mastra/voice-openai";
|
|
311
|
+
import { getMicrophoneStream } from "@mastra/node-audio";
|
|
312
|
+
import { createReadStream } from "fs";
|
|
313
|
+
import path from "path";
|
|
314
|
+
|
|
315
|
+
// Initialize a voice provider
|
|
316
|
+
const voice = new OpenAIVoice({
|
|
317
|
+
listeningModel: {
|
|
318
|
+
name: "whisper-1",
|
|
319
|
+
apiKey: process.env.OPENAI_API_KEY,
|
|
320
|
+
},
|
|
321
|
+
});
|
|
322
|
+
|
|
323
|
+
// Basic usage with a file stream
|
|
324
|
+
const audioFilePath = path.join(process.cwd(), "audio.mp3");
|
|
325
|
+
const audioStream = createReadStream(audioFilePath);
|
|
326
|
+
const transcript = await voice.listen(audioStream, {
|
|
327
|
+
filetype: "mp3",
|
|
328
|
+
});
|
|
329
|
+
console.log("Transcribed text:", transcript);
|
|
330
|
+
|
|
331
|
+
// Using a microphone stream
|
|
332
|
+
const microphoneStream = getMicrophoneStream(); // Assume this function gets audio input
|
|
333
|
+
const transcription = await voice.listen(microphoneStream);
|
|
334
|
+
|
|
335
|
+
// With provider-specific options
|
|
336
|
+
const transcriptWithOptions = await voice.listen(audioStream, {
|
|
337
|
+
language: "en",
|
|
338
|
+
prompt: "This is a conversation about artificial intelligence.",
|
|
339
|
+
});
|
|
340
|
+
```
|
|
341
|
+
|
|
342
|
+
## Using with CompositeVoice
|
|
343
|
+
|
|
344
|
+
When using `CompositeVoice`, the `listen()` method delegates to the configured listening provider:
|
|
345
|
+
|
|
346
|
+
```typescript
|
|
347
|
+
import { CompositeVoice } from "@mastra/core/voice";
|
|
348
|
+
import { OpenAIVoice } from "@mastra/voice-openai";
|
|
349
|
+
import { PlayAIVoice } from "@mastra/voice-playai";
|
|
350
|
+
|
|
351
|
+
const voice = new CompositeVoice({
|
|
352
|
+
input: new OpenAIVoice(),
|
|
353
|
+
output: new PlayAIVoice(),
|
|
354
|
+
});
|
|
355
|
+
|
|
356
|
+
// This will use the OpenAIVoice provider
|
|
357
|
+
const transcript = await voice.listen(audioStream);
|
|
358
|
+
```
|
|
359
|
+
|
|
360
|
+
### Using AI SDK Model Providers
|
|
361
|
+
|
|
362
|
+
You can also use AI SDK transcription models directly with `CompositeVoice`:
|
|
363
|
+
|
|
364
|
+
```typescript
|
|
365
|
+
import { CompositeVoice } from "@mastra/core/voice";
|
|
366
|
+
import { openai } from "@ai-sdk/openai";
|
|
367
|
+
import { groq } from "@ai-sdk/groq";
|
|
368
|
+
|
|
369
|
+
// Use AI SDK transcription models
|
|
370
|
+
const voice = new CompositeVoice({
|
|
371
|
+
input: openai.transcription('whisper-1'), // AI SDK model
|
|
372
|
+
output: new PlayAIVoice(), // Mastra provider
|
|
373
|
+
});
|
|
374
|
+
|
|
375
|
+
// Works the same way
|
|
376
|
+
const transcript = await voice.listen(audioStream);
|
|
377
|
+
|
|
378
|
+
// Provider-specific options can be passed through
|
|
379
|
+
const transcriptWithOptions = await voice.listen(audioStream, {
|
|
380
|
+
providerOptions: {
|
|
381
|
+
openai: {
|
|
382
|
+
language: 'en',
|
|
383
|
+
prompt: 'This is about AI',
|
|
384
|
+
}
|
|
385
|
+
}
|
|
386
|
+
});
|
|
387
|
+
```
|
|
388
|
+
|
|
389
|
+
See the [CompositeVoice reference](https://mastra.ai/reference/v1/voice/composite-voice) for more details on AI SDK integration.
|
|
390
|
+
|
|
391
|
+
## Realtime Voice Providers
|
|
392
|
+
|
|
393
|
+
When using realtime voice providers like `OpenAIRealtimeVoice`, the `listen()` method behaves differently:
|
|
394
|
+
|
|
395
|
+
- Instead of returning transcribed text, it emits 'writing' events with the transcribed text
|
|
396
|
+
- You need to register an event listener to receive the transcription
|
|
397
|
+
|
|
398
|
+
```typescript
|
|
399
|
+
import { OpenAIRealtimeVoice } from "@mastra/voice-openai-realtime";
|
|
400
|
+
import { getMicrophoneStream } from "@mastra/node-audio";
|
|
401
|
+
|
|
402
|
+
const voice = new OpenAIRealtimeVoice();
|
|
403
|
+
await voice.connect();
|
|
404
|
+
|
|
405
|
+
// Register event listener for transcription
|
|
406
|
+
voice.on("writing", ({ text, role }) => {
|
|
407
|
+
console.log(`${role}: ${text}`);
|
|
408
|
+
});
|
|
409
|
+
|
|
410
|
+
// This will emit 'writing' events instead of returning text
|
|
411
|
+
const microphoneStream = getMicrophoneStream();
|
|
412
|
+
await voice.listen(microphoneStream);
|
|
413
|
+
```
|
|
414
|
+
|
|
415
|
+
## Notes
|
|
416
|
+
|
|
417
|
+
- Not all voice providers support speech-to-text functionality (e.g., PlayAI, Speechify)
|
|
418
|
+
- The behavior of `listen()` may vary slightly between providers, but all implementations follow the same basic interface
|
|
419
|
+
- When using a realtime voice provider, the method might not return text directly but instead emit a 'writing' event
|
|
420
|
+
- The audio format supported depends on the provider. Common formats include MP3, WAV, and M4A
|
|
421
|
+
- Some providers support streaming transcription, where text is returned as it's transcribed
|
|
422
|
+
- For best performance, consider closing or ending the audio stream when you're done with it
|
|
423
|
+
|
|
424
|
+
## Related Methods
|
|
425
|
+
|
|
426
|
+
- [voice.speak()](./voice.speak) - Converts text to speech
|
|
427
|
+
- [voice.send()](./voice.send) - Sends audio data to the voice provider in real-time
|
|
428
|
+
- [voice.on()](./voice.on) - Registers an event listener for voice events
|
|
429
|
+
|
|
430
|
+
---
|
|
431
|
+
|
|
432
|
+
## Reference: voice.speak()
|
|
433
|
+
|
|
434
|
+
> Documentation for the speak() method available in all Mastra voice providers, which converts text to speech.
|
|
435
|
+
|
|
436
|
+
The `speak()` method is a core function available in all Mastra voice providers that converts text to speech. It takes text input and returns an audio stream that can be played or saved.
|
|
437
|
+
|
|
438
|
+
## Parameters
|
|
439
|
+
|
|
440
|
+
## Return Value
|
|
441
|
+
|
|
442
|
+
Returns a `Promise<NodeJS.ReadableStream | void>` where:
|
|
443
|
+
|
|
444
|
+
- `NodeJS.ReadableStream`: A stream of audio data that can be played or saved
|
|
445
|
+
- `void`: When using a realtime voice provider that emits audio through events instead of returning it directly
|
|
446
|
+
|
|
447
|
+
## Provider-Specific Options
|
|
448
|
+
|
|
449
|
+
Each voice provider may support additional options specific to their implementation. Here are some examples:
|
|
450
|
+
|
|
451
|
+
### OpenAI
|
|
452
|
+
|
|
453
|
+
### ElevenLabs
|
|
454
|
+
|
|
455
|
+
### Google
|
|
456
|
+
|
|
457
|
+
### Murf
|
|
458
|
+
|
|
459
|
+
## Usage Example
|
|
460
|
+
|
|
461
|
+
```typescript
|
|
462
|
+
import { OpenAIVoice } from "@mastra/voice-openai";
|
|
463
|
+
// Initialize a voice provider
|
|
464
|
+
const voice = new OpenAIVoice({
|
|
465
|
+
speaker: "alloy", // Default voice
|
|
466
|
+
});
|
|
467
|
+
// Basic usage with default settings
|
|
468
|
+
const audioStream = await voice.speak("Hello, world!");
|
|
469
|
+
// Using a different voice for this specific request
|
|
470
|
+
const audioStreamWithDifferentVoice = await voice.speak("Hello again!", {
|
|
471
|
+
speaker: "nova",
|
|
472
|
+
});
|
|
473
|
+
// Using provider-specific options
|
|
474
|
+
const audioStreamWithOptions = await voice.speak("Hello with options!", {
|
|
475
|
+
speaker: "echo",
|
|
476
|
+
speed: 1.2, // OpenAI-specific option
|
|
477
|
+
});
|
|
478
|
+
// Using a text stream as input
|
|
479
|
+
import { Readable } from "stream";
|
|
480
|
+
const textStream = Readable.from(["Hello", " from", " a", " stream!"]);
|
|
481
|
+
const audioStreamFromTextStream = await voice.speak(textStream);
|
|
482
|
+
```
|
|
483
|
+
|
|
484
|
+
## Using with CompositeVoice
|
|
485
|
+
|
|
486
|
+
When using `CompositeVoice`, the `speak()` method delegates to the configured speaking provider:
|
|
487
|
+
|
|
488
|
+
```typescript
|
|
489
|
+
import { CompositeVoice } from "@mastra/core/voice";
|
|
490
|
+
import { OpenAIVoice } from "@mastra/voice-openai";
|
|
491
|
+
import { PlayAIVoice } from "@mastra/voice-playai";
|
|
492
|
+
|
|
493
|
+
const voice = new CompositeVoice({
|
|
494
|
+
output: new PlayAIVoice(),
|
|
495
|
+
input: new OpenAIVoice(),
|
|
496
|
+
});
|
|
497
|
+
|
|
498
|
+
// This will use the PlayAIVoice provider
|
|
499
|
+
const audioStream = await voice.speak("Hello, world!");
|
|
500
|
+
```
|
|
501
|
+
|
|
502
|
+
### Using AI SDK Model Providers
|
|
503
|
+
|
|
504
|
+
You can also use AI SDK speech models directly with `CompositeVoice`:
|
|
505
|
+
|
|
506
|
+
```typescript
|
|
507
|
+
import { CompositeVoice } from "@mastra/core/voice";
|
|
508
|
+
import { openai } from "@ai-sdk/openai";
|
|
509
|
+
import { elevenlabs } from "@ai-sdk/elevenlabs";
|
|
510
|
+
|
|
511
|
+
// Use AI SDK speech models
|
|
512
|
+
const voice = new CompositeVoice({
|
|
513
|
+
output: elevenlabs.speech('eleven_turbo_v2'), // AI SDK model
|
|
514
|
+
input: openai.transcription('whisper-1'), // AI SDK model
|
|
515
|
+
});
|
|
516
|
+
|
|
517
|
+
// Works the same way
|
|
518
|
+
const audioStream = await voice.speak("Hello from AI SDK!");
|
|
519
|
+
|
|
520
|
+
// Provider-specific options can be passed through
|
|
521
|
+
const audioWithOptions = await voice.speak("Hello with options!", {
|
|
522
|
+
speaker: 'Rachel', // ElevenLabs voice
|
|
523
|
+
providerOptions: {
|
|
524
|
+
elevenlabs: {
|
|
525
|
+
stability: 0.5,
|
|
526
|
+
similarity_boost: 0.75,
|
|
527
|
+
}
|
|
528
|
+
}
|
|
529
|
+
});
|
|
530
|
+
```
|
|
531
|
+
|
|
532
|
+
See the [CompositeVoice reference](https://mastra.ai/reference/v1/voice/composite-voice) for more details on AI SDK integration.
|
|
533
|
+
|
|
534
|
+
## Realtime Voice Providers
|
|
535
|
+
|
|
536
|
+
When using realtime voice providers like `OpenAIRealtimeVoice`, the `speak()` method behaves differently:
|
|
537
|
+
|
|
538
|
+
- Instead of returning an audio stream, it emits a 'speaking' event with the audio data
|
|
539
|
+
- You need to register an event listener to receive the audio chunks
|
|
540
|
+
|
|
541
|
+
```typescript
|
|
542
|
+
import { OpenAIRealtimeVoice } from "@mastra/voice-openai-realtime";
|
|
543
|
+
import Speaker from "@mastra/node-speaker";
|
|
544
|
+
|
|
545
|
+
const speaker = new Speaker({
|
|
546
|
+
sampleRate: 24100, // Audio sample rate in Hz - standard for high-quality audio on MacBook Pro
|
|
547
|
+
channels: 1, // Mono audio output (as opposed to stereo which would be 2)
|
|
548
|
+
bitDepth: 16, // Bit depth for audio quality - CD quality standard (16-bit resolution)
|
|
549
|
+
});
|
|
550
|
+
|
|
551
|
+
const voice = new OpenAIRealtimeVoice();
|
|
552
|
+
await voice.connect();
|
|
553
|
+
// Register event listener for audio chunks
|
|
554
|
+
voice.on("speaker", (stream) => {
|
|
555
|
+
// Handle audio chunk (e.g., play it or save it)
|
|
556
|
+
stream.pipe(speaker);
|
|
557
|
+
});
|
|
558
|
+
// This will emit 'speaking' events instead of returning a stream
|
|
559
|
+
await voice.speak("Hello, this is realtime speech!");
|
|
560
|
+
```
|
|
561
|
+
|
|
562
|
+
## Notes
|
|
563
|
+
|
|
564
|
+
- The behavior of `speak()` may vary slightly between providers, but all implementations follow the same basic interface.
|
|
565
|
+
- When using a realtime voice provider, the method might not return an audio stream directly but instead emit a 'speaking' event.
|
|
566
|
+
- If a text stream is provided as input, the provider will typically convert it to a string before processing.
|
|
567
|
+
- The audio format of the returned stream depends on the provider. Common formats include MP3, WAV, and OGG.
|
|
568
|
+
- For best performance, consider closing or ending the audio stream when you're done with it.
|
package/dist/index.cjs.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/index.ts"],"names":["MastraVoice","OpenAI","PassThrough"],"mappings":";;;;;;;;;;;AA6BO,IAAM,WAAA,GAAN,cAA0BA,iBAAA,CAAY;AAAA,EAC3C,YAAA;AAAA,EACA,eAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAWA,WAAA,CAAY;AAAA,IACV,cAAA;AAAA,IACA,WAAA;AAAA,IACA;AAAA,GACF,GAII,EAAC,EAAG;AACN,IAAA,MAAM,aAAA,GAAgB,QAAQ,GAAA,CAAI,cAAA;AAClC,IAAA,MAAM,kBAAA,GAAqB;AAAA,MACzB,IAAA,EAAM,OAAA;AAAA,MACN,MAAA,EAAQ;AAAA,KACV;AACA,IAAA,MAAM,qBAAA,GAAwB;AAAA,MAC5B,IAAA,EAAM,WAAA;AAAA,MACN,MAAA,EAAQ;AAAA,KACV;AAEA,IAAA,KAAA,CAAM;AAAA,MACJ,WAAA,EAAa;AAAA,QACX,IAAA,EAAM,WAAA,EAAa,IAAA,IAAQ,kBAAA,CAAmB,IAAA;AAAA,QAC9C,MAAA,EAAQ,WAAA,EAAa,MAAA,IAAU,kBAAA,CAAmB;AAAA,OACpD;AAAA,MACA,cAAA,EAAgB;AAAA,QACd,IAAA,EAAM,cAAA,EAAgB,IAAA,IAAQ,qBAAA,CAAsB,IAAA;AAAA,QACpD,MAAA,EAAQ,cAAA,EAAgB,MAAA,IAAU,qBAAA,CAAsB;AAAA,OAC1D;AAAA,MACA,SAAS,OAAA,IAAW;AAAA,KACrB,CAAA;AAED,IAAA,MAAM,YAAA,GAAe,aAAa,MAAA,IAAU,aAAA;AAC5C,IAAA,IAAI,CAAC,YAAA,EAAc;AACjB,MAAA,MAAM,IAAI,MAAM,sCAAsC,CAAA;AAAA,IACxD;AACA,IAAA,IAAA,CAAK,YAAA,GAAe,IAAIC,uBAAA,CAAO;AAAA,MAC7B,MAAA,EAAQ,YAAA;AAAA,MACR,GAAG,WAAA,EAAa;AAAA,KACjB,CAAA;AAED,IAAA,MAAM,eAAA,GAAkB,gBAAgB,MAAA,IAAU,aAAA;AAClD,IAAA,IAAI,CAAC,eAAA,EAAiB;AACpB,MAAA,MAAM,IAAI,MAAM,yCAAyC,CAAA;AAAA,IAC3D;AACA,IAAA,IAAA,CAAK,eAAA,GAAkB,IAAIA,uBAAA,CAAO;AAAA,MAChC,MAAA,EAAQ,eAAA;AAAA,MACR,GAAG,cAAA,EAAgB;AAAA,KACpB,CAAA;AAED,IAAA,IAAI,CAAC,IAAA,CAAK,YAAA,IAAgB,CAAC,KAAK,eAAA,EAAiB;AAC/C,MAAA,MAAM,IAAI,MAAM,0FAA0F,CAAA;AAAA,IAC5G;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EASA,MAAM,WAAA,GAA0D;AAC9D,IAAA,IAAI,CAAC,KAAK,WAAA,EAAa;AACrB,MAAA,MAAM,IAAI,MAAM,6BAA6B,CAAA;AAAA,IAC/C;AAEA,IAAA,OAAO;AAAA,MACL,EAAE,SAAS,OAAA,EAAQ;AAAA,MACnB,EAAE,SAAS,MAAA,EAAO;AAAA,MAClB,EAAE,SAAS,OAAA,EAAQ;AAAA,MACnB,EAAE,SAAS,MAAA,EAAO;AAAA,MAClB,EAAE,SAAS,MAAA,EAAO;AAAA,MAClB,EAAE,SAAS,SAAA,EAAU;AAAA,MACrB,EAAE,SAAS,KAAA,EAAM;AAAA,MACjB,EAAE,SAAS,OAAA,EAAQ;AAAA,MACnB,EAAE,SAAS,MAAA;AAAO,KACpB;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAYA,MAAM,KAAA,CACJ,KAAA,EACA,OAAA,EAKgC;AAChC,IAAA,IAAI,CAAC,KAAK,YAAA,EAAc;AACtB,MAAA,MAAM,IAAI,MAAM,6BAA6B,CAAA;AAAA,IAC/C;AAEA,IAAA,IAAI,OAAO,UAAU,QAAA,EAAU;AAC7B,MAAA,MAAM,SAAmB,EAAC;AAC1B,MAAA,WAAA,MAAiB,SAAS,KAAA,EAAO;AAC/B,QAAA,IAAI,OAAO,UAAU,QAAA,EAAU;AAC7B,UAAA,MAAA,CAAO,IAAA,CAAK,MAAA,CAAO,IAAA,CAAK,KAAK,CAAC,CAAA;AAAA,QAChC,CAAA,MAAO;AACL,UAAA,MAAA,CAAO,KAAK,KAAK,CAAA;AAAA,QACnB;AAAA,MACF;AACA,MAAA,KAAA,GAAQ,MAAA,CAAO,MAAA,CAAO,MAAM,CAAA,CAAE,SAAS,OAAO,CAAA;AAAA,IAChD;AAEA,IAAA,IAAI,KAAA,CAAM,IAAA,EAAK,CAAE,MAAA,KAAW,CAAA,EAAG;AAC7B,MAAA,MAAM,IAAI,MAAM,qBAAqB,CAAA;AAAA,IACvC;AAEA,IAAA,MAAM,EAAE,SAAS,cAAA,EAAgB,KAAA,EAAO,GAAG,YAAA,EAAa,GAAI,WAAW,EAAC;AAExE,IAAA,MAAM,WAAW,MAAM,IAAA,CAAK,YAAA,CAAc,KAAA,CAAM,OAAO,MAAA,CAAO;AAAA,MAC5D,KAAA,EAAO,IAAA,CAAK,WAAA,EAAa,IAAA,IAAQ,OAAA;AAAA,MACjC,KAAA,EAAQ,WAAW,IAAA,CAAK,OAAA;AAAA,MACxB,iBAAiB,cAAA,IAAkB,KAAA;AAAA,MACnC,KAAA;AAAA,MACA,OAAO,KAAA,IAAS,CAAA;AAAA,MAChB,GAAG;AAAA,KACJ,CAAA;AAED,IAAA,MAAM,WAAA,GAAc,IAAIC,kBAAA,EAAY;AACpC,IAAA,MAAM,SAAS,MAAA,CAAO,IAAA,CAAK,MAAM,QAAA,CAAS,aAAa,CAAA;AACvD,IAAA,WAAA,CAAY,IAAI,MAAM,CAAA;AACtB,IAAA,OAAO,WAAA;AAAA,EACT;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAOA,MAAM,WAAA,GAAc;AAClB,IAAA,IAAI,CAAC,KAAK,eAAA,EAAiB;AACzB,MAAA,OAAO,EAAE,SAAS,KAAA,EAAM;AAAA,IAC1B;AACA,IAAA,OAAO,EAAE,SAAS,IAAA,EAAK;AAAA,EACzB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAYA,MAAM,MAAA,CACJ,WAAA,EACA,OAAA,EAIiB;AACjB,IAAA,IAAI,CAAC,KAAK,eAAA,EAAiB;AACzB,MAAA,MAAM,IAAI,MAAM,gCAAgC,CAAA;AAAA,IAClD;AAEA,IAAA,MAAM,SAAmB,EAAC;AAC1B,IAAA,WAAA,MAAiB,SAAS,WAAA,EAAa;AACrC,MAAA,IAAI,OAAO,UAAU,QAAA,EAAU;AAC7B,QAAA,MAAA,CAAO,IAAA,CAAK,MAAA,CAAO,IAAA,CAAK,KAAK,CAAC,CAAA;AAAA,MAChC,CAAA,MAAO;AACL,QAAA,MAAA,CAAO,KAAK,KAAK,CAAA;AAAA,MACnB;AAAA,IACF;AACA,IAAA,MAAM,WAAA,GAAc,MAAA,CAAO,MAAA,CAAO,MAAM,CAAA;AAExC,IAAA,MAAM,EAAE,QAAA,EAAU,GAAG,YAAA,EAAa,GAAI,WAAW,EAAC;AAClD,IAAA,MAAM,IAAA,GAAO,IAAI,IAAA,CAAK,CAAC,WAAW,CAAA,EAAG,CAAA,MAAA,EAAS,QAAA,IAAY,KAAK,CAAA,CAAE,CAAA;AAEjE,IAAA,MAAM,WAAW,MAAM,IAAA,CAAK,eAAA,CAAiB,KAAA,CAAM,eAAe,MAAA,CAAO;AAAA,MACvE,KAAA,EAAO,IAAA,CAAK,cAAA,EAAgB,IAAA,IAAQ,WAAA;AAAA,MACpC,IAAA;AAAA,MACA,GAAG;AAAA,KACJ,CAAA;AAED,IAAA,OAAO,QAAA,CAAS,IAAA;AAAA,EAClB;AACF","file":"index.cjs","sourcesContent":["import { PassThrough } from 'stream';\n\nimport { MastraVoice } from '@mastra/core/voice';\nimport OpenAI from 'openai';\nimport type { ClientOptions } from 'openai';\n\ntype OpenAIVoiceId = 'alloy' | 'echo' | 'fable' | 'onyx' | 'nova' | 'shimmer' | 'ash' | 'coral' | 'sage';\ntype OpenAIModel = 'tts-1' | 'tts-1-hd' | 'whisper-1';\n\nexport interface OpenAIConfig {\n name?: OpenAIModel;\n apiKey?: string;\n options?: Omit<ClientOptions, 'apiKey'>;\n}\n\nexport interface OpenAIVoiceConfig {\n speech?: {\n model: 'tts-1' | 'tts-1-hd';\n apiKey?: string;\n speaker?: OpenAIVoiceId;\n options?: Omit<ClientOptions, 'apiKey'>;\n };\n listening?: {\n model: 'whisper-1';\n apiKey?: string;\n options?: Omit<ClientOptions, 'apiKey'>;\n };\n}\n\nexport class OpenAIVoice extends MastraVoice {\n speechClient?: OpenAI;\n listeningClient?: OpenAI;\n\n /**\n * Constructs an instance of OpenAIVoice with optional configurations for speech and listening models.\n *\n * @param {Object} [config] - Configuration options for the OpenAIVoice instance.\n * @param {OpenAIConfig} [config.listeningModel] - Configuration for the listening model, including model name and API key.\n * @param {OpenAIConfig} [config.speechModel] - Configuration for the speech model, including model name and API key.\n * @param {string} [config.speaker] - The default speaker's voice to use for speech synthesis.\n * @throws {Error} - Throws an error if no API key is provided for either the speech or listening model.\n */\n constructor({\n listeningModel,\n speechModel,\n speaker,\n }: {\n listeningModel?: OpenAIConfig;\n speechModel?: OpenAIConfig;\n speaker?: string;\n } = {}) {\n const defaultApiKey = process.env.OPENAI_API_KEY;\n const defaultSpeechModel = {\n name: 'tts-1',\n apiKey: defaultApiKey,\n };\n const defaultListeningModel = {\n name: 'whisper-1',\n apiKey: defaultApiKey,\n };\n\n super({\n speechModel: {\n name: speechModel?.name ?? defaultSpeechModel.name,\n apiKey: speechModel?.apiKey ?? defaultSpeechModel.apiKey,\n },\n listeningModel: {\n name: listeningModel?.name ?? defaultListeningModel.name,\n apiKey: listeningModel?.apiKey ?? defaultListeningModel.apiKey,\n },\n speaker: speaker ?? 'alloy',\n });\n\n const speechApiKey = speechModel?.apiKey || defaultApiKey;\n if (!speechApiKey) {\n throw new Error('No API key provided for speech model');\n }\n this.speechClient = new OpenAI({\n apiKey: speechApiKey,\n ...speechModel?.options,\n });\n\n const listeningApiKey = listeningModel?.apiKey || defaultApiKey;\n if (!listeningApiKey) {\n throw new Error('No API key provided for listening model');\n }\n this.listeningClient = new OpenAI({\n apiKey: listeningApiKey,\n ...listeningModel?.options,\n });\n\n if (!this.speechClient && !this.listeningClient) {\n throw new Error('At least one of OPENAI_API_KEY, speechModel.apiKey, or listeningModel.apiKey must be set');\n }\n }\n\n /**\n * Retrieves a list of available speakers for the speech model.\n *\n * @returns {Promise<Array<{ voiceId: OpenAIVoiceId }>>} - A promise that resolves to an array of objects,\n * each containing a `voiceId` representing an available speaker.\n * @throws {Error} - Throws an error if the speech model is not configured.\n */\n async getSpeakers(): Promise<Array<{ voiceId: OpenAIVoiceId }>> {\n if (!this.speechModel) {\n throw new Error('Speech model not configured');\n }\n\n return [\n { voiceId: 'alloy' },\n { voiceId: 'echo' },\n { voiceId: 'fable' },\n { voiceId: 'onyx' },\n { voiceId: 'nova' },\n { voiceId: 'shimmer' },\n { voiceId: 'ash' },\n { voiceId: 'coral' },\n { voiceId: 'sage' },\n ];\n }\n\n /**\n * Converts text or audio input into speech using the configured speech model.\n *\n * @param {string | NodeJS.ReadableStream} input - The text or audio stream to be converted into speech.\n * @param {Object} [options] - Optional parameters for the speech synthesis.\n * @param {string} [options.speaker] - The speaker's voice to use for the speech synthesis.\n * @param {number} [options.speed] - The speed at which the speech should be synthesized.\n * @returns {Promise<NodeJS.ReadableStream>} - A promise that resolves to a readable stream of the synthesized audio.\n * @throws {Error} - Throws an error if the speech model is not configured or if the input text is empty.\n */\n async speak(\n input: string | NodeJS.ReadableStream,\n options?: {\n speaker?: string;\n speed?: number;\n [key: string]: any;\n },\n ): Promise<NodeJS.ReadableStream> {\n if (!this.speechClient) {\n throw new Error('Speech model not configured');\n }\n\n if (typeof input !== 'string') {\n const chunks: Buffer[] = [];\n for await (const chunk of input) {\n if (typeof chunk === 'string') {\n chunks.push(Buffer.from(chunk));\n } else {\n chunks.push(chunk);\n }\n }\n input = Buffer.concat(chunks).toString('utf-8');\n }\n\n if (input.trim().length === 0) {\n throw new Error('Input text is empty');\n }\n\n const { speaker, responseFormat, speed, ...otherOptions } = options || {};\n\n const response = await this.speechClient!.audio.speech.create({\n model: this.speechModel?.name ?? 'tts-1',\n voice: (speaker ?? this.speaker) as OpenAIVoiceId,\n response_format: responseFormat ?? 'mp3',\n input,\n speed: speed || 1.0,\n ...otherOptions,\n });\n\n const passThrough = new PassThrough();\n const buffer = Buffer.from(await response.arrayBuffer());\n passThrough.end(buffer);\n return passThrough;\n }\n\n /**\n * Checks if listening capabilities are enabled.\n *\n * @returns {Promise<{ enabled: boolean }>}\n */\n async getListener() {\n if (!this.listeningClient) {\n return { enabled: false };\n }\n return { enabled: true };\n }\n\n /**\n * Transcribes audio from a given stream using the configured listening model.\n *\n * @param {NodeJS.ReadableStream} audioStream - The audio stream to be transcribed.\n * @param {Object} [options] - Optional parameters for the transcription.\n * @param {string} [options.filetype] - The file type of the audio stream.\n * Supported types include 'mp3', 'mp4', 'mpeg', 'mpga', 'm4a', 'wav', 'webm'.\n * @returns {Promise<string>} - A promise that resolves to the transcribed text.\n * @throws {Error} - Throws an error if the listening model is not configured.\n */\n async listen(\n audioStream: NodeJS.ReadableStream,\n options?: {\n filetype?: 'mp3' | 'mp4' | 'mpeg' | 'mpga' | 'm4a' | 'wav' | 'webm';\n [key: string]: any;\n },\n ): Promise<string> {\n if (!this.listeningClient) {\n throw new Error('Listening model not configured');\n }\n\n const chunks: Buffer[] = [];\n for await (const chunk of audioStream) {\n if (typeof chunk === 'string') {\n chunks.push(Buffer.from(chunk));\n } else {\n chunks.push(chunk);\n }\n }\n const audioBuffer = Buffer.concat(chunks);\n\n const { filetype, ...otherOptions } = options || {};\n const file = new File([audioBuffer], `audio.${filetype || 'mp3'}`);\n\n const response = await this.listeningClient!.audio.transcriptions.create({\n model: this.listeningModel?.name || 'whisper-1',\n file: file as any,\n ...otherOptions,\n });\n\n return response.text;\n }\n}\n"]}
|
|
1
|
+
{"version":3,"sources":["../src/index.ts"],"names":["MastraVoice","OpenAI","PassThrough"],"mappings":";;;;;;;;;;;AA6BO,IAAM,WAAA,GAAN,cAA0BA,iBAAA,CAAY;AAAA,EAC3C,YAAA;AAAA,EACA,eAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAWA,WAAA,CAAY;AAAA,IACV,cAAA;AAAA,IACA,WAAA;AAAA,IACA;AAAA,GACF,GAII,EAAC,EAAG;AACN,IAAA,MAAM,aAAA,GAAgB,QAAQ,GAAA,CAAI,cAAA;AAClC,IAAA,MAAM,kBAAA,GAAqB;AAAA,MACzB,IAAA,EAAM,OAAA;AAAA,MACN,MAAA,EAAQ;AAAA,KACV;AACA,IAAA,MAAM,qBAAA,GAAwB;AAAA,MAC5B,IAAA,EAAM,WAAA;AAAA,MACN,MAAA,EAAQ;AAAA,KACV;AAEA,IAAA,KAAA,CAAM;AAAA,MACJ,WAAA,EAAa;AAAA,QACX,IAAA,EAAM,WAAA,EAAa,IAAA,IAAQ,kBAAA,CAAmB,IAAA;AAAA,QAC9C,MAAA,EAAQ,WAAA,EAAa,MAAA,IAAU,kBAAA,CAAmB;AAAA,OACpD;AAAA,MACA,cAAA,EAAgB;AAAA,QACd,IAAA,EAAM,cAAA,EAAgB,IAAA,IAAQ,qBAAA,CAAsB,IAAA;AAAA,QACpD,MAAA,EAAQ,cAAA,EAAgB,MAAA,IAAU,qBAAA,CAAsB;AAAA,OAC1D;AAAA,MACA,SAAS,OAAA,IAAW;AAAA,KACrB,CAAA;AAED,IAAA,MAAM,YAAA,GAAe,aAAa,MAAA,IAAU,aAAA;AAC5C,IAAA,IAAI,CAAC,YAAA,EAAc;AACjB,MAAA,MAAM,IAAI,MAAM,sCAAsC,CAAA;AAAA,IACxD;AACA,IAAA,IAAA,CAAK,YAAA,GAAe,IAAIC,uBAAA,CAAO;AAAA,MAC7B,MAAA,EAAQ,YAAA;AAAA,MACR,GAAG,WAAA,EAAa;AAAA,KACjB,CAAA;AAED,IAAA,MAAM,eAAA,GAAkB,gBAAgB,MAAA,IAAU,aAAA;AAClD,IAAA,IAAI,CAAC,eAAA,EAAiB;AACpB,MAAA,MAAM,IAAI,MAAM,yCAAyC,CAAA;AAAA,IAC3D;AACA,IAAA,IAAA,CAAK,eAAA,GAAkB,IAAIA,uBAAA,CAAO;AAAA,MAChC,MAAA,EAAQ,eAAA;AAAA,MACR,GAAG,cAAA,EAAgB;AAAA,KACpB,CAAA;AAED,IAAA,IAAI,CAAC,IAAA,CAAK,YAAA,IAAgB,CAAC,KAAK,eAAA,EAAiB;AAC/C,MAAA,MAAM,IAAI,MAAM,0FAA0F,CAAA;AAAA,IAC5G;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EASA,MAAM,WAAA,GAA0D;AAC9D,IAAA,IAAI,CAAC,KAAK,WAAA,EAAa;AACrB,MAAA,MAAM,IAAI,MAAM,6BAA6B,CAAA;AAAA,IAC/C;AAEA,IAAA,OAAO;AAAA,MACL,EAAE,SAAS,OAAA,EAAQ;AAAA,MACnB,EAAE,SAAS,MAAA,EAAO;AAAA,MAClB,EAAE,SAAS,OAAA,EAAQ;AAAA,MACnB,EAAE,SAAS,MAAA,EAAO;AAAA,MAClB,EAAE,SAAS,MAAA,EAAO;AAAA,MAClB,EAAE,SAAS,SAAA,EAAU;AAAA,MACrB,EAAE,SAAS,KAAA,EAAM;AAAA,MACjB,EAAE,SAAS,OAAA,EAAQ;AAAA,MACnB,EAAE,SAAS,MAAA;AAAO,KACpB;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAYA,MAAM,KAAA,CACJ,KAAA,EACA,OAAA,EAKgC;AAChC,IAAA,IAAI,CAAC,KAAK,YAAA,EAAc;AACtB,MAAA,MAAM,IAAI,MAAM,6BAA6B,CAAA;AAAA,IAC/C;AAEA,IAAA,IAAI,OAAO,UAAU,QAAA,EAAU;AAC7B,MAAA,MAAM,SAAmB,EAAC;AAC1B,MAAA,WAAA,MAAiB,SAAS,KAAA,EAAO;AAC/B,QAAA,IAAI,OAAO,UAAU,QAAA,EAAU;AAC7B,UAAA,MAAA,CAAO,IAAA,CAAK,MAAA,CAAO,IAAA,CAAK,KAAK,CAAC,CAAA;AAAA,QAChC,CAAA,MAAO;AACL,UAAA,MAAA,CAAO,KAAK,KAAK,CAAA;AAAA,QACnB;AAAA,MACF;AACA,MAAA,KAAA,GAAQ,MAAA,CAAO,MAAA,CAAO,MAAM,CAAA,CAAE,SAAS,OAAO,CAAA;AAAA,IAChD;AAEA,IAAA,IAAI,KAAA,CAAM,IAAA,EAAK,CAAE,MAAA,KAAW,CAAA,EAAG;AAC7B,MAAA,MAAM,IAAI,MAAM,qBAAqB,CAAA;AAAA,IACvC;AAEA,IAAA,MAAM,EAAE,SAAS,cAAA,EAAgB,KAAA,EAAO,GAAG,YAAA,EAAa,GAAI,WAAW,EAAC;AAExE,IAAA,MAAM,WAAW,MAAM,IAAA,CAAK,YAAA,CAAc,KAAA,CAAM,OAAO,MAAA,CAAO;AAAA,MAC5D,KAAA,EAAO,IAAA,CAAK,WAAA,EAAa,IAAA,IAAQ,OAAA;AAAA,MACjC,KAAA,EAAQ,WAAW,IAAA,CAAK,OAAA;AAAA,MACxB,iBAAiB,cAAA,IAAkB,KAAA;AAAA,MACnC,KAAA;AAAA,MACA,OAAO,KAAA,IAAS,CAAA;AAAA,MAChB,GAAG;AAAA,KACJ,CAAA;AAED,IAAA,MAAM,WAAA,GAAc,IAAIC,kBAAA,EAAY;AACpC,IAAA,MAAM,SAAS,MAAA,CAAO,IAAA,CAAK,MAAM,QAAA,CAAS,aAAa,CAAA;AACvD,IAAA,WAAA,CAAY,IAAI,MAAM,CAAA;AACtB,IAAA,OAAO,WAAA;AAAA,EACT;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAOA,MAAM,WAAA,GAAc;AAClB,IAAA,IAAI,CAAC,KAAK,eAAA,EAAiB;AACzB,MAAA,OAAO,EAAE,SAAS,KAAA,EAAM;AAAA,IAC1B;AACA,IAAA,OAAO,EAAE,SAAS,IAAA,EAAK;AAAA,EACzB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAYA,MAAM,MAAA,CACJ,WAAA,EACA,OAAA,EAIiB;AACjB,IAAA,IAAI,CAAC,KAAK,eAAA,EAAiB;AACzB,MAAA,MAAM,IAAI,MAAM,gCAAgC,CAAA;AAAA,IAClD;AAEA,IAAA,MAAM,SAAmB,EAAC;AAC1B,IAAA,WAAA,MAAiB,SAAS,WAAA,EAAa;AACrC,MAAA,IAAI,OAAO,UAAU,QAAA,EAAU;AAC7B,QAAA,MAAA,CAAO,IAAA,CAAK,MAAA,CAAO,IAAA,CAAK,KAAK,CAAC,CAAA;AAAA,MAChC,CAAA,MAAO;AACL,QAAA,MAAA,CAAO,KAAK,KAAK,CAAA;AAAA,MACnB;AAAA,IACF;AACA,IAAA,MAAM,WAAA,GAAc,MAAA,CAAO,MAAA,CAAO,MAAM,CAAA;AAExC,IAAA,MAAM,EAAE,QAAA,EAAU,GAAG,YAAA,EAAa,GAAI,WAAW,EAAC;AAClD,IAAA,MAAM,IAAA,GAAO,IAAI,IAAA,CAAK,CAAC,WAAW,CAAA,EAAG,CAAA,MAAA,EAAS,QAAA,IAAY,KAAK,CAAA,CAAE,CAAA;AAEjE,IAAA,MAAM,WAAW,MAAM,IAAA,CAAK,eAAA,CAAiB,KAAA,CAAM,eAAe,MAAA,CAAO;AAAA,MACvE,KAAA,EAAO,IAAA,CAAK,cAAA,EAAgB,IAAA,IAAQ,WAAA;AAAA,MACpC,IAAA;AAAA,MACA,GAAG;AAAA,KACJ,CAAA;AAED,IAAA,OAAO,QAAA,CAAS,IAAA;AAAA,EAClB;AACF","file":"index.cjs","sourcesContent":["import { PassThrough } from 'node:stream';\n\nimport { MastraVoice } from '@mastra/core/voice';\nimport OpenAI from 'openai';\nimport type { ClientOptions } from 'openai';\n\ntype OpenAIVoiceId = 'alloy' | 'echo' | 'fable' | 'onyx' | 'nova' | 'shimmer' | 'ash' | 'coral' | 'sage';\ntype OpenAIModel = 'tts-1' | 'tts-1-hd' | 'whisper-1';\n\nexport interface OpenAIConfig {\n name?: OpenAIModel;\n apiKey?: string;\n options?: Omit<ClientOptions, 'apiKey'>;\n}\n\nexport interface OpenAIVoiceConfig {\n speech?: {\n model: 'tts-1' | 'tts-1-hd';\n apiKey?: string;\n speaker?: OpenAIVoiceId;\n options?: Omit<ClientOptions, 'apiKey'>;\n };\n listening?: {\n model: 'whisper-1';\n apiKey?: string;\n options?: Omit<ClientOptions, 'apiKey'>;\n };\n}\n\nexport class OpenAIVoice extends MastraVoice {\n speechClient?: OpenAI;\n listeningClient?: OpenAI;\n\n /**\n * Constructs an instance of OpenAIVoice with optional configurations for speech and listening models.\n *\n * @param {Object} [config] - Configuration options for the OpenAIVoice instance.\n * @param {OpenAIConfig} [config.listeningModel] - Configuration for the listening model, including model name and API key.\n * @param {OpenAIConfig} [config.speechModel] - Configuration for the speech model, including model name and API key.\n * @param {string} [config.speaker] - The default speaker's voice to use for speech synthesis.\n * @throws {Error} - Throws an error if no API key is provided for either the speech or listening model.\n */\n constructor({\n listeningModel,\n speechModel,\n speaker,\n }: {\n listeningModel?: OpenAIConfig;\n speechModel?: OpenAIConfig;\n speaker?: string;\n } = {}) {\n const defaultApiKey = process.env.OPENAI_API_KEY;\n const defaultSpeechModel = {\n name: 'tts-1',\n apiKey: defaultApiKey,\n };\n const defaultListeningModel = {\n name: 'whisper-1',\n apiKey: defaultApiKey,\n };\n\n super({\n speechModel: {\n name: speechModel?.name ?? defaultSpeechModel.name,\n apiKey: speechModel?.apiKey ?? defaultSpeechModel.apiKey,\n },\n listeningModel: {\n name: listeningModel?.name ?? defaultListeningModel.name,\n apiKey: listeningModel?.apiKey ?? defaultListeningModel.apiKey,\n },\n speaker: speaker ?? 'alloy',\n });\n\n const speechApiKey = speechModel?.apiKey || defaultApiKey;\n if (!speechApiKey) {\n throw new Error('No API key provided for speech model');\n }\n this.speechClient = new OpenAI({\n apiKey: speechApiKey,\n ...speechModel?.options,\n });\n\n const listeningApiKey = listeningModel?.apiKey || defaultApiKey;\n if (!listeningApiKey) {\n throw new Error('No API key provided for listening model');\n }\n this.listeningClient = new OpenAI({\n apiKey: listeningApiKey,\n ...listeningModel?.options,\n });\n\n if (!this.speechClient && !this.listeningClient) {\n throw new Error('At least one of OPENAI_API_KEY, speechModel.apiKey, or listeningModel.apiKey must be set');\n }\n }\n\n /**\n * Retrieves a list of available speakers for the speech model.\n *\n * @returns {Promise<Array<{ voiceId: OpenAIVoiceId }>>} - A promise that resolves to an array of objects,\n * each containing a `voiceId` representing an available speaker.\n * @throws {Error} - Throws an error if the speech model is not configured.\n */\n async getSpeakers(): Promise<Array<{ voiceId: OpenAIVoiceId }>> {\n if (!this.speechModel) {\n throw new Error('Speech model not configured');\n }\n\n return [\n { voiceId: 'alloy' },\n { voiceId: 'echo' },\n { voiceId: 'fable' },\n { voiceId: 'onyx' },\n { voiceId: 'nova' },\n { voiceId: 'shimmer' },\n { voiceId: 'ash' },\n { voiceId: 'coral' },\n { voiceId: 'sage' },\n ];\n }\n\n /**\n * Converts text or audio input into speech using the configured speech model.\n *\n * @param {string | NodeJS.ReadableStream} input - The text or audio stream to be converted into speech.\n * @param {Object} [options] - Optional parameters for the speech synthesis.\n * @param {string} [options.speaker] - The speaker's voice to use for the speech synthesis.\n * @param {number} [options.speed] - The speed at which the speech should be synthesized.\n * @returns {Promise<NodeJS.ReadableStream>} - A promise that resolves to a readable stream of the synthesized audio.\n * @throws {Error} - Throws an error if the speech model is not configured or if the input text is empty.\n */\n async speak(\n input: string | NodeJS.ReadableStream,\n options?: {\n speaker?: string;\n speed?: number;\n [key: string]: any;\n },\n ): Promise<NodeJS.ReadableStream> {\n if (!this.speechClient) {\n throw new Error('Speech model not configured');\n }\n\n if (typeof input !== 'string') {\n const chunks: Buffer[] = [];\n for await (const chunk of input) {\n if (typeof chunk === 'string') {\n chunks.push(Buffer.from(chunk));\n } else {\n chunks.push(chunk);\n }\n }\n input = Buffer.concat(chunks).toString('utf-8');\n }\n\n if (input.trim().length === 0) {\n throw new Error('Input text is empty');\n }\n\n const { speaker, responseFormat, speed, ...otherOptions } = options || {};\n\n const response = await this.speechClient!.audio.speech.create({\n model: this.speechModel?.name ?? 'tts-1',\n voice: (speaker ?? this.speaker) as OpenAIVoiceId,\n response_format: responseFormat ?? 'mp3',\n input,\n speed: speed || 1.0,\n ...otherOptions,\n });\n\n const passThrough = new PassThrough();\n const buffer = Buffer.from(await response.arrayBuffer());\n passThrough.end(buffer);\n return passThrough;\n }\n\n /**\n * Checks if listening capabilities are enabled.\n *\n * @returns {Promise<{ enabled: boolean }>}\n */\n async getListener() {\n if (!this.listeningClient) {\n return { enabled: false };\n }\n return { enabled: true };\n }\n\n /**\n * Transcribes audio from a given stream using the configured listening model.\n *\n * @param {NodeJS.ReadableStream} audioStream - The audio stream to be transcribed.\n * @param {Object} [options] - Optional parameters for the transcription.\n * @param {string} [options.filetype] - The file type of the audio stream.\n * Supported types include 'mp3', 'mp4', 'mpeg', 'mpga', 'm4a', 'wav', 'webm'.\n * @returns {Promise<string>} - A promise that resolves to the transcribed text.\n * @throws {Error} - Throws an error if the listening model is not configured.\n */\n async listen(\n audioStream: NodeJS.ReadableStream,\n options?: {\n filetype?: 'mp3' | 'mp4' | 'mpeg' | 'mpga' | 'm4a' | 'wav' | 'webm';\n [key: string]: any;\n },\n ): Promise<string> {\n if (!this.listeningClient) {\n throw new Error('Listening model not configured');\n }\n\n const chunks: Buffer[] = [];\n for await (const chunk of audioStream) {\n if (typeof chunk === 'string') {\n chunks.push(Buffer.from(chunk));\n } else {\n chunks.push(chunk);\n }\n }\n const audioBuffer = Buffer.concat(chunks);\n\n const { filetype, ...otherOptions } = options || {};\n const file = new File([audioBuffer], `audio.${filetype || 'mp3'}`);\n\n const response = await this.listeningClient!.audio.transcriptions.create({\n model: this.listeningModel?.name || 'whisper-1',\n file: file as any,\n ...otherOptions,\n });\n\n return response.text;\n }\n}\n"]}
|
package/dist/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/index.ts"],"names":[],"mappings":";;;;;AA6BO,IAAM,WAAA,GAAN,cAA0B,WAAA,CAAY;AAAA,EAC3C,YAAA;AAAA,EACA,eAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAWA,WAAA,CAAY;AAAA,IACV,cAAA;AAAA,IACA,WAAA;AAAA,IACA;AAAA,GACF,GAII,EAAC,EAAG;AACN,IAAA,MAAM,aAAA,GAAgB,QAAQ,GAAA,CAAI,cAAA;AAClC,IAAA,MAAM,kBAAA,GAAqB;AAAA,MACzB,IAAA,EAAM,OAAA;AAAA,MACN,MAAA,EAAQ;AAAA,KACV;AACA,IAAA,MAAM,qBAAA,GAAwB;AAAA,MAC5B,IAAA,EAAM,WAAA;AAAA,MACN,MAAA,EAAQ;AAAA,KACV;AAEA,IAAA,KAAA,CAAM;AAAA,MACJ,WAAA,EAAa;AAAA,QACX,IAAA,EAAM,WAAA,EAAa,IAAA,IAAQ,kBAAA,CAAmB,IAAA;AAAA,QAC9C,MAAA,EAAQ,WAAA,EAAa,MAAA,IAAU,kBAAA,CAAmB;AAAA,OACpD;AAAA,MACA,cAAA,EAAgB;AAAA,QACd,IAAA,EAAM,cAAA,EAAgB,IAAA,IAAQ,qBAAA,CAAsB,IAAA;AAAA,QACpD,MAAA,EAAQ,cAAA,EAAgB,MAAA,IAAU,qBAAA,CAAsB;AAAA,OAC1D;AAAA,MACA,SAAS,OAAA,IAAW;AAAA,KACrB,CAAA;AAED,IAAA,MAAM,YAAA,GAAe,aAAa,MAAA,IAAU,aAAA;AAC5C,IAAA,IAAI,CAAC,YAAA,EAAc;AACjB,MAAA,MAAM,IAAI,MAAM,sCAAsC,CAAA;AAAA,IACxD;AACA,IAAA,IAAA,CAAK,YAAA,GAAe,IAAI,MAAA,CAAO;AAAA,MAC7B,MAAA,EAAQ,YAAA;AAAA,MACR,GAAG,WAAA,EAAa;AAAA,KACjB,CAAA;AAED,IAAA,MAAM,eAAA,GAAkB,gBAAgB,MAAA,IAAU,aAAA;AAClD,IAAA,IAAI,CAAC,eAAA,EAAiB;AACpB,MAAA,MAAM,IAAI,MAAM,yCAAyC,CAAA;AAAA,IAC3D;AACA,IAAA,IAAA,CAAK,eAAA,GAAkB,IAAI,MAAA,CAAO;AAAA,MAChC,MAAA,EAAQ,eAAA;AAAA,MACR,GAAG,cAAA,EAAgB;AAAA,KACpB,CAAA;AAED,IAAA,IAAI,CAAC,IAAA,CAAK,YAAA,IAAgB,CAAC,KAAK,eAAA,EAAiB;AAC/C,MAAA,MAAM,IAAI,MAAM,0FAA0F,CAAA;AAAA,IAC5G;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EASA,MAAM,WAAA,GAA0D;AAC9D,IAAA,IAAI,CAAC,KAAK,WAAA,EAAa;AACrB,MAAA,MAAM,IAAI,MAAM,6BAA6B,CAAA;AAAA,IAC/C;AAEA,IAAA,OAAO;AAAA,MACL,EAAE,SAAS,OAAA,EAAQ;AAAA,MACnB,EAAE,SAAS,MAAA,EAAO;AAAA,MAClB,EAAE,SAAS,OAAA,EAAQ;AAAA,MACnB,EAAE,SAAS,MAAA,EAAO;AAAA,MAClB,EAAE,SAAS,MAAA,EAAO;AAAA,MAClB,EAAE,SAAS,SAAA,EAAU;AAAA,MACrB,EAAE,SAAS,KAAA,EAAM;AAAA,MACjB,EAAE,SAAS,OAAA,EAAQ;AAAA,MACnB,EAAE,SAAS,MAAA;AAAO,KACpB;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAYA,MAAM,KAAA,CACJ,KAAA,EACA,OAAA,EAKgC;AAChC,IAAA,IAAI,CAAC,KAAK,YAAA,EAAc;AACtB,MAAA,MAAM,IAAI,MAAM,6BAA6B,CAAA;AAAA,IAC/C;AAEA,IAAA,IAAI,OAAO,UAAU,QAAA,EAAU;AAC7B,MAAA,MAAM,SAAmB,EAAC;AAC1B,MAAA,WAAA,MAAiB,SAAS,KAAA,EAAO;AAC/B,QAAA,IAAI,OAAO,UAAU,QAAA,EAAU;AAC7B,UAAA,MAAA,CAAO,IAAA,CAAK,MAAA,CAAO,IAAA,CAAK,KAAK,CAAC,CAAA;AAAA,QAChC,CAAA,MAAO;AACL,UAAA,MAAA,CAAO,KAAK,KAAK,CAAA;AAAA,QACnB;AAAA,MACF;AACA,MAAA,KAAA,GAAQ,MAAA,CAAO,MAAA,CAAO,MAAM,CAAA,CAAE,SAAS,OAAO,CAAA;AAAA,IAChD;AAEA,IAAA,IAAI,KAAA,CAAM,IAAA,EAAK,CAAE,MAAA,KAAW,CAAA,EAAG;AAC7B,MAAA,MAAM,IAAI,MAAM,qBAAqB,CAAA;AAAA,IACvC;AAEA,IAAA,MAAM,EAAE,SAAS,cAAA,EAAgB,KAAA,EAAO,GAAG,YAAA,EAAa,GAAI,WAAW,EAAC;AAExE,IAAA,MAAM,WAAW,MAAM,IAAA,CAAK,YAAA,CAAc,KAAA,CAAM,OAAO,MAAA,CAAO;AAAA,MAC5D,KAAA,EAAO,IAAA,CAAK,WAAA,EAAa,IAAA,IAAQ,OAAA;AAAA,MACjC,KAAA,EAAQ,WAAW,IAAA,CAAK,OAAA;AAAA,MACxB,iBAAiB,cAAA,IAAkB,KAAA;AAAA,MACnC,KAAA;AAAA,MACA,OAAO,KAAA,IAAS,CAAA;AAAA,MAChB,GAAG;AAAA,KACJ,CAAA;AAED,IAAA,MAAM,WAAA,GAAc,IAAI,WAAA,EAAY;AACpC,IAAA,MAAM,SAAS,MAAA,CAAO,IAAA,CAAK,MAAM,QAAA,CAAS,aAAa,CAAA;AACvD,IAAA,WAAA,CAAY,IAAI,MAAM,CAAA;AACtB,IAAA,OAAO,WAAA;AAAA,EACT;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAOA,MAAM,WAAA,GAAc;AAClB,IAAA,IAAI,CAAC,KAAK,eAAA,EAAiB;AACzB,MAAA,OAAO,EAAE,SAAS,KAAA,EAAM;AAAA,IAC1B;AACA,IAAA,OAAO,EAAE,SAAS,IAAA,EAAK;AAAA,EACzB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAYA,MAAM,MAAA,CACJ,WAAA,EACA,OAAA,EAIiB;AACjB,IAAA,IAAI,CAAC,KAAK,eAAA,EAAiB;AACzB,MAAA,MAAM,IAAI,MAAM,gCAAgC,CAAA;AAAA,IAClD;AAEA,IAAA,MAAM,SAAmB,EAAC;AAC1B,IAAA,WAAA,MAAiB,SAAS,WAAA,EAAa;AACrC,MAAA,IAAI,OAAO,UAAU,QAAA,EAAU;AAC7B,QAAA,MAAA,CAAO,IAAA,CAAK,MAAA,CAAO,IAAA,CAAK,KAAK,CAAC,CAAA;AAAA,MAChC,CAAA,MAAO;AACL,QAAA,MAAA,CAAO,KAAK,KAAK,CAAA;AAAA,MACnB;AAAA,IACF;AACA,IAAA,MAAM,WAAA,GAAc,MAAA,CAAO,MAAA,CAAO,MAAM,CAAA;AAExC,IAAA,MAAM,EAAE,QAAA,EAAU,GAAG,YAAA,EAAa,GAAI,WAAW,EAAC;AAClD,IAAA,MAAM,IAAA,GAAO,IAAI,IAAA,CAAK,CAAC,WAAW,CAAA,EAAG,CAAA,MAAA,EAAS,QAAA,IAAY,KAAK,CAAA,CAAE,CAAA;AAEjE,IAAA,MAAM,WAAW,MAAM,IAAA,CAAK,eAAA,CAAiB,KAAA,CAAM,eAAe,MAAA,CAAO;AAAA,MACvE,KAAA,EAAO,IAAA,CAAK,cAAA,EAAgB,IAAA,IAAQ,WAAA;AAAA,MACpC,IAAA;AAAA,MACA,GAAG;AAAA,KACJ,CAAA;AAED,IAAA,OAAO,QAAA,CAAS,IAAA;AAAA,EAClB;AACF","file":"index.js","sourcesContent":["import { PassThrough } from 'stream';\n\nimport { MastraVoice } from '@mastra/core/voice';\nimport OpenAI from 'openai';\nimport type { ClientOptions } from 'openai';\n\ntype OpenAIVoiceId = 'alloy' | 'echo' | 'fable' | 'onyx' | 'nova' | 'shimmer' | 'ash' | 'coral' | 'sage';\ntype OpenAIModel = 'tts-1' | 'tts-1-hd' | 'whisper-1';\n\nexport interface OpenAIConfig {\n name?: OpenAIModel;\n apiKey?: string;\n options?: Omit<ClientOptions, 'apiKey'>;\n}\n\nexport interface OpenAIVoiceConfig {\n speech?: {\n model: 'tts-1' | 'tts-1-hd';\n apiKey?: string;\n speaker?: OpenAIVoiceId;\n options?: Omit<ClientOptions, 'apiKey'>;\n };\n listening?: {\n model: 'whisper-1';\n apiKey?: string;\n options?: Omit<ClientOptions, 'apiKey'>;\n };\n}\n\nexport class OpenAIVoice extends MastraVoice {\n speechClient?: OpenAI;\n listeningClient?: OpenAI;\n\n /**\n * Constructs an instance of OpenAIVoice with optional configurations for speech and listening models.\n *\n * @param {Object} [config] - Configuration options for the OpenAIVoice instance.\n * @param {OpenAIConfig} [config.listeningModel] - Configuration for the listening model, including model name and API key.\n * @param {OpenAIConfig} [config.speechModel] - Configuration for the speech model, including model name and API key.\n * @param {string} [config.speaker] - The default speaker's voice to use for speech synthesis.\n * @throws {Error} - Throws an error if no API key is provided for either the speech or listening model.\n */\n constructor({\n listeningModel,\n speechModel,\n speaker,\n }: {\n listeningModel?: OpenAIConfig;\n speechModel?: OpenAIConfig;\n speaker?: string;\n } = {}) {\n const defaultApiKey = process.env.OPENAI_API_KEY;\n const defaultSpeechModel = {\n name: 'tts-1',\n apiKey: defaultApiKey,\n };\n const defaultListeningModel = {\n name: 'whisper-1',\n apiKey: defaultApiKey,\n };\n\n super({\n speechModel: {\n name: speechModel?.name ?? defaultSpeechModel.name,\n apiKey: speechModel?.apiKey ?? defaultSpeechModel.apiKey,\n },\n listeningModel: {\n name: listeningModel?.name ?? defaultListeningModel.name,\n apiKey: listeningModel?.apiKey ?? defaultListeningModel.apiKey,\n },\n speaker: speaker ?? 'alloy',\n });\n\n const speechApiKey = speechModel?.apiKey || defaultApiKey;\n if (!speechApiKey) {\n throw new Error('No API key provided for speech model');\n }\n this.speechClient = new OpenAI({\n apiKey: speechApiKey,\n ...speechModel?.options,\n });\n\n const listeningApiKey = listeningModel?.apiKey || defaultApiKey;\n if (!listeningApiKey) {\n throw new Error('No API key provided for listening model');\n }\n this.listeningClient = new OpenAI({\n apiKey: listeningApiKey,\n ...listeningModel?.options,\n });\n\n if (!this.speechClient && !this.listeningClient) {\n throw new Error('At least one of OPENAI_API_KEY, speechModel.apiKey, or listeningModel.apiKey must be set');\n }\n }\n\n /**\n * Retrieves a list of available speakers for the speech model.\n *\n * @returns {Promise<Array<{ voiceId: OpenAIVoiceId }>>} - A promise that resolves to an array of objects,\n * each containing a `voiceId` representing an available speaker.\n * @throws {Error} - Throws an error if the speech model is not configured.\n */\n async getSpeakers(): Promise<Array<{ voiceId: OpenAIVoiceId }>> {\n if (!this.speechModel) {\n throw new Error('Speech model not configured');\n }\n\n return [\n { voiceId: 'alloy' },\n { voiceId: 'echo' },\n { voiceId: 'fable' },\n { voiceId: 'onyx' },\n { voiceId: 'nova' },\n { voiceId: 'shimmer' },\n { voiceId: 'ash' },\n { voiceId: 'coral' },\n { voiceId: 'sage' },\n ];\n }\n\n /**\n * Converts text or audio input into speech using the configured speech model.\n *\n * @param {string | NodeJS.ReadableStream} input - The text or audio stream to be converted into speech.\n * @param {Object} [options] - Optional parameters for the speech synthesis.\n * @param {string} [options.speaker] - The speaker's voice to use for the speech synthesis.\n * @param {number} [options.speed] - The speed at which the speech should be synthesized.\n * @returns {Promise<NodeJS.ReadableStream>} - A promise that resolves to a readable stream of the synthesized audio.\n * @throws {Error} - Throws an error if the speech model is not configured or if the input text is empty.\n */\n async speak(\n input: string | NodeJS.ReadableStream,\n options?: {\n speaker?: string;\n speed?: number;\n [key: string]: any;\n },\n ): Promise<NodeJS.ReadableStream> {\n if (!this.speechClient) {\n throw new Error('Speech model not configured');\n }\n\n if (typeof input !== 'string') {\n const chunks: Buffer[] = [];\n for await (const chunk of input) {\n if (typeof chunk === 'string') {\n chunks.push(Buffer.from(chunk));\n } else {\n chunks.push(chunk);\n }\n }\n input = Buffer.concat(chunks).toString('utf-8');\n }\n\n if (input.trim().length === 0) {\n throw new Error('Input text is empty');\n }\n\n const { speaker, responseFormat, speed, ...otherOptions } = options || {};\n\n const response = await this.speechClient!.audio.speech.create({\n model: this.speechModel?.name ?? 'tts-1',\n voice: (speaker ?? this.speaker) as OpenAIVoiceId,\n response_format: responseFormat ?? 'mp3',\n input,\n speed: speed || 1.0,\n ...otherOptions,\n });\n\n const passThrough = new PassThrough();\n const buffer = Buffer.from(await response.arrayBuffer());\n passThrough.end(buffer);\n return passThrough;\n }\n\n /**\n * Checks if listening capabilities are enabled.\n *\n * @returns {Promise<{ enabled: boolean }>}\n */\n async getListener() {\n if (!this.listeningClient) {\n return { enabled: false };\n }\n return { enabled: true };\n }\n\n /**\n * Transcribes audio from a given stream using the configured listening model.\n *\n * @param {NodeJS.ReadableStream} audioStream - The audio stream to be transcribed.\n * @param {Object} [options] - Optional parameters for the transcription.\n * @param {string} [options.filetype] - The file type of the audio stream.\n * Supported types include 'mp3', 'mp4', 'mpeg', 'mpga', 'm4a', 'wav', 'webm'.\n * @returns {Promise<string>} - A promise that resolves to the transcribed text.\n * @throws {Error} - Throws an error if the listening model is not configured.\n */\n async listen(\n audioStream: NodeJS.ReadableStream,\n options?: {\n filetype?: 'mp3' | 'mp4' | 'mpeg' | 'mpga' | 'm4a' | 'wav' | 'webm';\n [key: string]: any;\n },\n ): Promise<string> {\n if (!this.listeningClient) {\n throw new Error('Listening model not configured');\n }\n\n const chunks: Buffer[] = [];\n for await (const chunk of audioStream) {\n if (typeof chunk === 'string') {\n chunks.push(Buffer.from(chunk));\n } else {\n chunks.push(chunk);\n }\n }\n const audioBuffer = Buffer.concat(chunks);\n\n const { filetype, ...otherOptions } = options || {};\n const file = new File([audioBuffer], `audio.${filetype || 'mp3'}`);\n\n const response = await this.listeningClient!.audio.transcriptions.create({\n model: this.listeningModel?.name || 'whisper-1',\n file: file as any,\n ...otherOptions,\n });\n\n return response.text;\n }\n}\n"]}
|
|
1
|
+
{"version":3,"sources":["../src/index.ts"],"names":[],"mappings":";;;;;AA6BO,IAAM,WAAA,GAAN,cAA0B,WAAA,CAAY;AAAA,EAC3C,YAAA;AAAA,EACA,eAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAWA,WAAA,CAAY;AAAA,IACV,cAAA;AAAA,IACA,WAAA;AAAA,IACA;AAAA,GACF,GAII,EAAC,EAAG;AACN,IAAA,MAAM,aAAA,GAAgB,QAAQ,GAAA,CAAI,cAAA;AAClC,IAAA,MAAM,kBAAA,GAAqB;AAAA,MACzB,IAAA,EAAM,OAAA;AAAA,MACN,MAAA,EAAQ;AAAA,KACV;AACA,IAAA,MAAM,qBAAA,GAAwB;AAAA,MAC5B,IAAA,EAAM,WAAA;AAAA,MACN,MAAA,EAAQ;AAAA,KACV;AAEA,IAAA,KAAA,CAAM;AAAA,MACJ,WAAA,EAAa;AAAA,QACX,IAAA,EAAM,WAAA,EAAa,IAAA,IAAQ,kBAAA,CAAmB,IAAA;AAAA,QAC9C,MAAA,EAAQ,WAAA,EAAa,MAAA,IAAU,kBAAA,CAAmB;AAAA,OACpD;AAAA,MACA,cAAA,EAAgB;AAAA,QACd,IAAA,EAAM,cAAA,EAAgB,IAAA,IAAQ,qBAAA,CAAsB,IAAA;AAAA,QACpD,MAAA,EAAQ,cAAA,EAAgB,MAAA,IAAU,qBAAA,CAAsB;AAAA,OAC1D;AAAA,MACA,SAAS,OAAA,IAAW;AAAA,KACrB,CAAA;AAED,IAAA,MAAM,YAAA,GAAe,aAAa,MAAA,IAAU,aAAA;AAC5C,IAAA,IAAI,CAAC,YAAA,EAAc;AACjB,MAAA,MAAM,IAAI,MAAM,sCAAsC,CAAA;AAAA,IACxD;AACA,IAAA,IAAA,CAAK,YAAA,GAAe,IAAI,MAAA,CAAO;AAAA,MAC7B,MAAA,EAAQ,YAAA;AAAA,MACR,GAAG,WAAA,EAAa;AAAA,KACjB,CAAA;AAED,IAAA,MAAM,eAAA,GAAkB,gBAAgB,MAAA,IAAU,aAAA;AAClD,IAAA,IAAI,CAAC,eAAA,EAAiB;AACpB,MAAA,MAAM,IAAI,MAAM,yCAAyC,CAAA;AAAA,IAC3D;AACA,IAAA,IAAA,CAAK,eAAA,GAAkB,IAAI,MAAA,CAAO;AAAA,MAChC,MAAA,EAAQ,eAAA;AAAA,MACR,GAAG,cAAA,EAAgB;AAAA,KACpB,CAAA;AAED,IAAA,IAAI,CAAC,IAAA,CAAK,YAAA,IAAgB,CAAC,KAAK,eAAA,EAAiB;AAC/C,MAAA,MAAM,IAAI,MAAM,0FAA0F,CAAA;AAAA,IAC5G;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EASA,MAAM,WAAA,GAA0D;AAC9D,IAAA,IAAI,CAAC,KAAK,WAAA,EAAa;AACrB,MAAA,MAAM,IAAI,MAAM,6BAA6B,CAAA;AAAA,IAC/C;AAEA,IAAA,OAAO;AAAA,MACL,EAAE,SAAS,OAAA,EAAQ;AAAA,MACnB,EAAE,SAAS,MAAA,EAAO;AAAA,MAClB,EAAE,SAAS,OAAA,EAAQ;AAAA,MACnB,EAAE,SAAS,MAAA,EAAO;AAAA,MAClB,EAAE,SAAS,MAAA,EAAO;AAAA,MAClB,EAAE,SAAS,SAAA,EAAU;AAAA,MACrB,EAAE,SAAS,KAAA,EAAM;AAAA,MACjB,EAAE,SAAS,OAAA,EAAQ;AAAA,MACnB,EAAE,SAAS,MAAA;AAAO,KACpB;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAYA,MAAM,KAAA,CACJ,KAAA,EACA,OAAA,EAKgC;AAChC,IAAA,IAAI,CAAC,KAAK,YAAA,EAAc;AACtB,MAAA,MAAM,IAAI,MAAM,6BAA6B,CAAA;AAAA,IAC/C;AAEA,IAAA,IAAI,OAAO,UAAU,QAAA,EAAU;AAC7B,MAAA,MAAM,SAAmB,EAAC;AAC1B,MAAA,WAAA,MAAiB,SAAS,KAAA,EAAO;AAC/B,QAAA,IAAI,OAAO,UAAU,QAAA,EAAU;AAC7B,UAAA,MAAA,CAAO,IAAA,CAAK,MAAA,CAAO,IAAA,CAAK,KAAK,CAAC,CAAA;AAAA,QAChC,CAAA,MAAO;AACL,UAAA,MAAA,CAAO,KAAK,KAAK,CAAA;AAAA,QACnB;AAAA,MACF;AACA,MAAA,KAAA,GAAQ,MAAA,CAAO,MAAA,CAAO,MAAM,CAAA,CAAE,SAAS,OAAO,CAAA;AAAA,IAChD;AAEA,IAAA,IAAI,KAAA,CAAM,IAAA,EAAK,CAAE,MAAA,KAAW,CAAA,EAAG;AAC7B,MAAA,MAAM,IAAI,MAAM,qBAAqB,CAAA;AAAA,IACvC;AAEA,IAAA,MAAM,EAAE,SAAS,cAAA,EAAgB,KAAA,EAAO,GAAG,YAAA,EAAa,GAAI,WAAW,EAAC;AAExE,IAAA,MAAM,WAAW,MAAM,IAAA,CAAK,YAAA,CAAc,KAAA,CAAM,OAAO,MAAA,CAAO;AAAA,MAC5D,KAAA,EAAO,IAAA,CAAK,WAAA,EAAa,IAAA,IAAQ,OAAA;AAAA,MACjC,KAAA,EAAQ,WAAW,IAAA,CAAK,OAAA;AAAA,MACxB,iBAAiB,cAAA,IAAkB,KAAA;AAAA,MACnC,KAAA;AAAA,MACA,OAAO,KAAA,IAAS,CAAA;AAAA,MAChB,GAAG;AAAA,KACJ,CAAA;AAED,IAAA,MAAM,WAAA,GAAc,IAAI,WAAA,EAAY;AACpC,IAAA,MAAM,SAAS,MAAA,CAAO,IAAA,CAAK,MAAM,QAAA,CAAS,aAAa,CAAA;AACvD,IAAA,WAAA,CAAY,IAAI,MAAM,CAAA;AACtB,IAAA,OAAO,WAAA;AAAA,EACT;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAOA,MAAM,WAAA,GAAc;AAClB,IAAA,IAAI,CAAC,KAAK,eAAA,EAAiB;AACzB,MAAA,OAAO,EAAE,SAAS,KAAA,EAAM;AAAA,IAC1B;AACA,IAAA,OAAO,EAAE,SAAS,IAAA,EAAK;AAAA,EACzB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAYA,MAAM,MAAA,CACJ,WAAA,EACA,OAAA,EAIiB;AACjB,IAAA,IAAI,CAAC,KAAK,eAAA,EAAiB;AACzB,MAAA,MAAM,IAAI,MAAM,gCAAgC,CAAA;AAAA,IAClD;AAEA,IAAA,MAAM,SAAmB,EAAC;AAC1B,IAAA,WAAA,MAAiB,SAAS,WAAA,EAAa;AACrC,MAAA,IAAI,OAAO,UAAU,QAAA,EAAU;AAC7B,QAAA,MAAA,CAAO,IAAA,CAAK,MAAA,CAAO,IAAA,CAAK,KAAK,CAAC,CAAA;AAAA,MAChC,CAAA,MAAO;AACL,QAAA,MAAA,CAAO,KAAK,KAAK,CAAA;AAAA,MACnB;AAAA,IACF;AACA,IAAA,MAAM,WAAA,GAAc,MAAA,CAAO,MAAA,CAAO,MAAM,CAAA;AAExC,IAAA,MAAM,EAAE,QAAA,EAAU,GAAG,YAAA,EAAa,GAAI,WAAW,EAAC;AAClD,IAAA,MAAM,IAAA,GAAO,IAAI,IAAA,CAAK,CAAC,WAAW,CAAA,EAAG,CAAA,MAAA,EAAS,QAAA,IAAY,KAAK,CAAA,CAAE,CAAA;AAEjE,IAAA,MAAM,WAAW,MAAM,IAAA,CAAK,eAAA,CAAiB,KAAA,CAAM,eAAe,MAAA,CAAO;AAAA,MACvE,KAAA,EAAO,IAAA,CAAK,cAAA,EAAgB,IAAA,IAAQ,WAAA;AAAA,MACpC,IAAA;AAAA,MACA,GAAG;AAAA,KACJ,CAAA;AAED,IAAA,OAAO,QAAA,CAAS,IAAA;AAAA,EAClB;AACF","file":"index.js","sourcesContent":["import { PassThrough } from 'node:stream';\n\nimport { MastraVoice } from '@mastra/core/voice';\nimport OpenAI from 'openai';\nimport type { ClientOptions } from 'openai';\n\ntype OpenAIVoiceId = 'alloy' | 'echo' | 'fable' | 'onyx' | 'nova' | 'shimmer' | 'ash' | 'coral' | 'sage';\ntype OpenAIModel = 'tts-1' | 'tts-1-hd' | 'whisper-1';\n\nexport interface OpenAIConfig {\n name?: OpenAIModel;\n apiKey?: string;\n options?: Omit<ClientOptions, 'apiKey'>;\n}\n\nexport interface OpenAIVoiceConfig {\n speech?: {\n model: 'tts-1' | 'tts-1-hd';\n apiKey?: string;\n speaker?: OpenAIVoiceId;\n options?: Omit<ClientOptions, 'apiKey'>;\n };\n listening?: {\n model: 'whisper-1';\n apiKey?: string;\n options?: Omit<ClientOptions, 'apiKey'>;\n };\n}\n\nexport class OpenAIVoice extends MastraVoice {\n speechClient?: OpenAI;\n listeningClient?: OpenAI;\n\n /**\n * Constructs an instance of OpenAIVoice with optional configurations for speech and listening models.\n *\n * @param {Object} [config] - Configuration options for the OpenAIVoice instance.\n * @param {OpenAIConfig} [config.listeningModel] - Configuration for the listening model, including model name and API key.\n * @param {OpenAIConfig} [config.speechModel] - Configuration for the speech model, including model name and API key.\n * @param {string} [config.speaker] - The default speaker's voice to use for speech synthesis.\n * @throws {Error} - Throws an error if no API key is provided for either the speech or listening model.\n */\n constructor({\n listeningModel,\n speechModel,\n speaker,\n }: {\n listeningModel?: OpenAIConfig;\n speechModel?: OpenAIConfig;\n speaker?: string;\n } = {}) {\n const defaultApiKey = process.env.OPENAI_API_KEY;\n const defaultSpeechModel = {\n name: 'tts-1',\n apiKey: defaultApiKey,\n };\n const defaultListeningModel = {\n name: 'whisper-1',\n apiKey: defaultApiKey,\n };\n\n super({\n speechModel: {\n name: speechModel?.name ?? defaultSpeechModel.name,\n apiKey: speechModel?.apiKey ?? defaultSpeechModel.apiKey,\n },\n listeningModel: {\n name: listeningModel?.name ?? defaultListeningModel.name,\n apiKey: listeningModel?.apiKey ?? defaultListeningModel.apiKey,\n },\n speaker: speaker ?? 'alloy',\n });\n\n const speechApiKey = speechModel?.apiKey || defaultApiKey;\n if (!speechApiKey) {\n throw new Error('No API key provided for speech model');\n }\n this.speechClient = new OpenAI({\n apiKey: speechApiKey,\n ...speechModel?.options,\n });\n\n const listeningApiKey = listeningModel?.apiKey || defaultApiKey;\n if (!listeningApiKey) {\n throw new Error('No API key provided for listening model');\n }\n this.listeningClient = new OpenAI({\n apiKey: listeningApiKey,\n ...listeningModel?.options,\n });\n\n if (!this.speechClient && !this.listeningClient) {\n throw new Error('At least one of OPENAI_API_KEY, speechModel.apiKey, or listeningModel.apiKey must be set');\n }\n }\n\n /**\n * Retrieves a list of available speakers for the speech model.\n *\n * @returns {Promise<Array<{ voiceId: OpenAIVoiceId }>>} - A promise that resolves to an array of objects,\n * each containing a `voiceId` representing an available speaker.\n * @throws {Error} - Throws an error if the speech model is not configured.\n */\n async getSpeakers(): Promise<Array<{ voiceId: OpenAIVoiceId }>> {\n if (!this.speechModel) {\n throw new Error('Speech model not configured');\n }\n\n return [\n { voiceId: 'alloy' },\n { voiceId: 'echo' },\n { voiceId: 'fable' },\n { voiceId: 'onyx' },\n { voiceId: 'nova' },\n { voiceId: 'shimmer' },\n { voiceId: 'ash' },\n { voiceId: 'coral' },\n { voiceId: 'sage' },\n ];\n }\n\n /**\n * Converts text or audio input into speech using the configured speech model.\n *\n * @param {string | NodeJS.ReadableStream} input - The text or audio stream to be converted into speech.\n * @param {Object} [options] - Optional parameters for the speech synthesis.\n * @param {string} [options.speaker] - The speaker's voice to use for the speech synthesis.\n * @param {number} [options.speed] - The speed at which the speech should be synthesized.\n * @returns {Promise<NodeJS.ReadableStream>} - A promise that resolves to a readable stream of the synthesized audio.\n * @throws {Error} - Throws an error if the speech model is not configured or if the input text is empty.\n */\n async speak(\n input: string | NodeJS.ReadableStream,\n options?: {\n speaker?: string;\n speed?: number;\n [key: string]: any;\n },\n ): Promise<NodeJS.ReadableStream> {\n if (!this.speechClient) {\n throw new Error('Speech model not configured');\n }\n\n if (typeof input !== 'string') {\n const chunks: Buffer[] = [];\n for await (const chunk of input) {\n if (typeof chunk === 'string') {\n chunks.push(Buffer.from(chunk));\n } else {\n chunks.push(chunk);\n }\n }\n input = Buffer.concat(chunks).toString('utf-8');\n }\n\n if (input.trim().length === 0) {\n throw new Error('Input text is empty');\n }\n\n const { speaker, responseFormat, speed, ...otherOptions } = options || {};\n\n const response = await this.speechClient!.audio.speech.create({\n model: this.speechModel?.name ?? 'tts-1',\n voice: (speaker ?? this.speaker) as OpenAIVoiceId,\n response_format: responseFormat ?? 'mp3',\n input,\n speed: speed || 1.0,\n ...otherOptions,\n });\n\n const passThrough = new PassThrough();\n const buffer = Buffer.from(await response.arrayBuffer());\n passThrough.end(buffer);\n return passThrough;\n }\n\n /**\n * Checks if listening capabilities are enabled.\n *\n * @returns {Promise<{ enabled: boolean }>}\n */\n async getListener() {\n if (!this.listeningClient) {\n return { enabled: false };\n }\n return { enabled: true };\n }\n\n /**\n * Transcribes audio from a given stream using the configured listening model.\n *\n * @param {NodeJS.ReadableStream} audioStream - The audio stream to be transcribed.\n * @param {Object} [options] - Optional parameters for the transcription.\n * @param {string} [options.filetype] - The file type of the audio stream.\n * Supported types include 'mp3', 'mp4', 'mpeg', 'mpga', 'm4a', 'wav', 'webm'.\n * @returns {Promise<string>} - A promise that resolves to the transcribed text.\n * @throws {Error} - Throws an error if the listening model is not configured.\n */\n async listen(\n audioStream: NodeJS.ReadableStream,\n options?: {\n filetype?: 'mp3' | 'mp4' | 'mpeg' | 'mpga' | 'm4a' | 'wav' | 'webm';\n [key: string]: any;\n },\n ): Promise<string> {\n if (!this.listeningClient) {\n throw new Error('Listening model not configured');\n }\n\n const chunks: Buffer[] = [];\n for await (const chunk of audioStream) {\n if (typeof chunk === 'string') {\n chunks.push(Buffer.from(chunk));\n } else {\n chunks.push(chunk);\n }\n }\n const audioBuffer = Buffer.concat(chunks);\n\n const { filetype, ...otherOptions } = options || {};\n const file = new File([audioBuffer], `audio.${filetype || 'mp3'}`);\n\n const response = await this.listeningClient!.audio.transcriptions.create({\n model: this.listeningModel?.name || 'whisper-1',\n file: file as any,\n ...otherOptions,\n });\n\n return response.text;\n }\n}\n"]}
|