@mastra/voice-google-gemini-live 0.0.0-fix-local-pkg-cwd-20251224015404 → 0.0.0-fix-11640-persist-workflow-events-to-memory-20260106220027

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1019 @@
1
+ > Overview of voice capabilities in Mastra, including text-to-speech, speech-to-text, and real-time speech-to-speech interactions.
2
+
3
+ # Voice in Mastra
4
+
5
+ Mastra's Voice system provides a unified interface for voice interactions, enabling text-to-speech (TTS), speech-to-text (STT), and real-time speech-to-speech (STS) capabilities in your applications.
6
+
7
+ ## Adding Voice to Agents
8
+
9
+ To learn how to integrate voice capabilities into your agents, check out the [Adding Voice to Agents](https://mastra.ai/docs/v1/agents/adding-voice) documentation. This section covers how to use both single and multiple voice providers, as well as real-time interactions.
10
+
11
+ ```typescript
12
+ import { Agent } from "@mastra/core/agent";
13
+ import { OpenAIVoice } from "@mastra/voice-openai";
14
+
15
+ // Initialize OpenAI voice for TTS
16
+
17
+ const voiceAgent = new Agent({
18
+ id: "voice-agent",
19
+ name: "Voice Agent",
20
+ instructions:
21
+ "You are a voice assistant that can help users with their tasks.",
22
+ model: "openai/gpt-5.1",
23
+ voice: new OpenAIVoice(),
24
+ });
25
+ ```
26
+
27
+ You can then use the following voice capabilities:
28
+
29
+ ### Text to Speech (TTS)
30
+
31
+ Turn your agent's responses into natural-sounding speech using Mastra's TTS capabilities.
32
+ Choose from multiple providers like OpenAI, ElevenLabs, and more.
33
+
34
+ For detailed configuration options and advanced features, check out our [Text-to-Speech guide](./text-to-speech).
35
+
36
+ **openai:**
37
+
38
+ ```typescript
39
+ import { Agent } from "@mastra/core/agent";
40
+ import { OpenAIVoice } from "@mastra/voice-openai";
41
+ import { playAudio } from "@mastra/node-audio";
42
+
43
+ const voiceAgent = new Agent({
44
+ id: "voice-agent",
45
+ name: "Voice Agent",
46
+ instructions:
47
+ "You are a voice assistant that can help users with their tasks.",
48
+ model: "openai/gpt-5.1",
49
+ voice: new OpenAIVoice(),
50
+ });
51
+
52
+ const { text } = await voiceAgent.generate("What color is the sky?");
53
+
54
+ // Convert text to speech to an Audio Stream
55
+ const audioStream = await voiceAgent.voice.speak(text, {
56
+ speaker: "default", // Optional: specify a speaker
57
+ responseFormat: "wav", // Optional: specify a response format
58
+ });
59
+
60
+ playAudio(audioStream);
61
+ ```
62
+
63
+ Visit the [OpenAI Voice Reference](https://mastra.ai/reference/v1/voice/openai) for more information on the OpenAI voice provider.
64
+
65
+
66
+ **azure:**
67
+
68
+ ```typescript
69
+ import { Agent } from "@mastra/core/agent";
70
+ import { AzureVoice } from "@mastra/voice-azure";
71
+ import { playAudio } from "@mastra/node-audio";
72
+
73
+ const voiceAgent = new Agent({
74
+ id: "voice-agent",
75
+ name: "Voice Agent",
76
+ instructions:
77
+ "You are a voice assistant that can help users with their tasks.",
78
+ model: "openai/gpt-5.1",
79
+ voice: new AzureVoice(),
80
+ });
81
+
82
+ const { text } = await voiceAgent.generate("What color is the sky?");
83
+
84
+ // Convert text to speech to an Audio Stream
85
+ const audioStream = await voiceAgent.voice.speak(text, {
86
+ speaker: "en-US-JennyNeural", // Optional: specify a speaker
87
+ });
88
+
89
+ playAudio(audioStream);
90
+ ```
91
+
92
+ Visit the [Azure Voice Reference](https://mastra.ai/reference/v1/voice/azure) for more information on the Azure voice provider.
93
+
94
+
95
+ **elevenlabs:**
96
+
97
+ ```typescript
98
+ import { Agent } from "@mastra/core/agent";
99
+ import { ElevenLabsVoice } from "@mastra/voice-elevenlabs";
100
+ import { playAudio } from "@mastra/node-audio";
101
+
102
+ const voiceAgent = new Agent({
103
+ id: "voice-agent",
104
+ name: "Voice Agent",
105
+ instructions:
106
+ "You are a voice assistant that can help users with their tasks.",
107
+ model: "openai/gpt-5.1",
108
+ voice: new ElevenLabsVoice(),
109
+ });
110
+
111
+ const { text } = await voiceAgent.generate("What color is the sky?");
112
+
113
+ // Convert text to speech to an Audio Stream
114
+ const audioStream = await voiceAgent.voice.speak(text, {
115
+ speaker: "default", // Optional: specify a speaker
116
+ });
117
+
118
+ playAudio(audioStream);
119
+ ```
120
+
121
+ Visit the [ElevenLabs Voice Reference](https://mastra.ai/reference/v1/voice/elevenlabs) for more information on the ElevenLabs voice provider.
122
+
123
+
124
+ **playai:**
125
+
126
+ ```typescript
127
+ import { Agent } from "@mastra/core/agent";
128
+ import { PlayAIVoice } from "@mastra/voice-playai";
129
+ import { playAudio } from "@mastra/node-audio";
130
+
131
+ const voiceAgent = new Agent({
132
+ id: "voice-agent",
133
+ name: "Voice Agent",
134
+ instructions:
135
+ "You are a voice assistant that can help users with their tasks.",
136
+ model: "openai/gpt-5.1",
137
+ voice: new PlayAIVoice(),
138
+ });
139
+
140
+ const { text } = await voiceAgent.generate("What color is the sky?");
141
+
142
+ // Convert text to speech to an Audio Stream
143
+ const audioStream = await voiceAgent.voice.speak(text, {
144
+ speaker: "default", // Optional: specify a speaker
145
+ });
146
+
147
+ playAudio(audioStream);
148
+ ```
149
+
150
+ Visit the [PlayAI Voice Reference](https://mastra.ai/reference/v1/voice/playai) for more information on the PlayAI voice provider.
151
+
152
+
153
+ **google:**
154
+
155
+ ```typescript
156
+ import { Agent } from "@mastra/core/agent";
157
+ import { GoogleVoice } from "@mastra/voice-google";
158
+ import { playAudio } from "@mastra/node-audio";
159
+
160
+ const voiceAgent = new Agent({
161
+ id: "voice-agent",
162
+ name: "Voice Agent",
163
+ instructions:
164
+ "You are a voice assistant that can help users with their tasks.",
165
+ model: "openai/gpt-5.1",
166
+ voice: new GoogleVoice(),
167
+ });
168
+
169
+ const { text } = await voiceAgent.generate("What color is the sky?");
170
+
171
+ // Convert text to speech to an Audio Stream
172
+ const audioStream = await voiceAgent.voice.speak(text, {
173
+ speaker: "en-US-Studio-O", // Optional: specify a speaker
174
+ });
175
+
176
+ playAudio(audioStream);
177
+ ```
178
+
179
+ Visit the [Google Voice Reference](https://mastra.ai/reference/v1/voice/google) for more information on the Google voice provider.
180
+
181
+
182
+ **cloudflare:**
183
+
184
+ ```typescript
185
+ import { Agent } from "@mastra/core/agent";
186
+ import { CloudflareVoice } from "@mastra/voice-cloudflare";
187
+ import { playAudio } from "@mastra/node-audio";
188
+
189
+ const voiceAgent = new Agent({
190
+ id: "voice-agent",
191
+ name: "Voice Agent",
192
+ instructions:
193
+ "You are a voice assistant that can help users with their tasks.",
194
+ model: "openai/gpt-5.1",
195
+ voice: new CloudflareVoice(),
196
+ });
197
+
198
+ const { text } = await voiceAgent.generate("What color is the sky?");
199
+
200
+ // Convert text to speech to an Audio Stream
201
+ const audioStream = await voiceAgent.voice.speak(text, {
202
+ speaker: "default", // Optional: specify a speaker
203
+ });
204
+
205
+ playAudio(audioStream);
206
+ ```
207
+
208
+ Visit the [Cloudflare Voice Reference](https://mastra.ai/reference/v1/voice/cloudflare) for more information on the Cloudflare voice provider.
209
+
210
+
211
+ **deepgram:**
212
+
213
+ ```typescript
214
+ import { Agent } from "@mastra/core/agent";
215
+ import { DeepgramVoice } from "@mastra/voice-deepgram";
216
+ import { playAudio } from "@mastra/node-audio";
217
+
218
+ const voiceAgent = new Agent({
219
+ id: "voice-agent",
220
+ name: "Voice Agent",
221
+ instructions:
222
+ "You are a voice assistant that can help users with their tasks.",
223
+ model: "openai/gpt-5.1",
224
+ voice: new DeepgramVoice(),
225
+ });
226
+
227
+ const { text } = await voiceAgent.generate("What color is the sky?");
228
+
229
+ // Convert text to speech to an Audio Stream
230
+ const audioStream = await voiceAgent.voice.speak(text, {
231
+ speaker: "aura-english-us", // Optional: specify a speaker
232
+ });
233
+
234
+ playAudio(audioStream);
235
+ ```
236
+
237
+ Visit the [Deepgram Voice Reference](https://mastra.ai/reference/v1/voice/deepgram) for more information on the Deepgram voice provider.
238
+
239
+
240
+ **speechify:**
241
+
242
+ ```typescript
243
+ import { Agent } from "@mastra/core/agent";
244
+ import { SpeechifyVoice } from "@mastra/voice-speechify";
245
+ import { playAudio } from "@mastra/node-audio";
246
+
247
+ const voiceAgent = new Agent({
248
+ id: "voice-agent",
249
+ name: "Voice Agent",
250
+ instructions:
251
+ "You are a voice assistant that can help users with their tasks.",
252
+ model: "openai/gpt-5.1",
253
+ voice: new SpeechifyVoice(),
254
+ });
255
+
256
+ const { text } = await voiceAgent.generate("What color is the sky?");
257
+
258
+ // Convert text to speech to an Audio Stream
259
+ const audioStream = await voiceAgent.voice.speak(text, {
260
+ speaker: "matthew", // Optional: specify a speaker
261
+ });
262
+
263
+ playAudio(audioStream);
264
+ ```
265
+
266
+ Visit the [Speechify Voice Reference](https://mastra.ai/reference/v1/voice/speechify) for more information on the Speechify voice provider.
267
+
268
+
269
+ **sarvam:**
270
+
271
+ ```typescript
272
+ import { Agent } from "@mastra/core/agent";
273
+ import { SarvamVoice } from "@mastra/voice-sarvam";
274
+ import { playAudio } from "@mastra/node-audio";
275
+
276
+ const voiceAgent = new Agent({
277
+ id: "voice-agent",
278
+ name: "Voice Agent",
279
+ instructions:
280
+ "You are a voice assistant that can help users with their tasks.",
281
+ model: "openai/gpt-5.1",
282
+ voice: new SarvamVoice(),
283
+ });
284
+
285
+ const { text } = await voiceAgent.generate("What color is the sky?");
286
+
287
+ // Convert text to speech to an Audio Stream
288
+ const audioStream = await voiceAgent.voice.speak(text, {
289
+ speaker: "default", // Optional: specify a speaker
290
+ });
291
+
292
+ playAudio(audioStream);
293
+ ```
294
+
295
+ Visit the [Sarvam Voice Reference](https://mastra.ai/reference/v1/voice/sarvam) for more information on the Sarvam voice provider.
296
+
297
+
298
+ **murf:**
299
+
300
+ ```typescript
301
+ import { Agent } from "@mastra/core/agent";
302
+ import { MurfVoice } from "@mastra/voice-murf";
303
+ import { playAudio } from "@mastra/node-audio";
304
+
305
+ const voiceAgent = new Agent({
306
+ id: "voice-agent",
307
+ name: "Voice Agent",
308
+ instructions:
309
+ "You are a voice assistant that can help users with their tasks.",
310
+ model: "openai/gpt-5.1",
311
+ voice: new MurfVoice(),
312
+ });
313
+
314
+ const { text } = await voiceAgent.generate("What color is the sky?");
315
+
316
+ // Convert text to speech to an Audio Stream
317
+ const audioStream = await voiceAgent.voice.speak(text, {
318
+ speaker: "default", // Optional: specify a speaker
319
+ });
320
+
321
+ playAudio(audioStream);
322
+ ```
323
+
324
+ Visit the [Murf Voice Reference](https://mastra.ai/reference/v1/voice/murf) for more information on the Murf voice provider.
325
+
326
+
327
+
328
+ ### Speech to Text (STT)
329
+
330
+ Transcribe spoken content using various providers like OpenAI, ElevenLabs, and more. For detailed configuration options and more, check out [Speech to Text](./speech-to-text).
331
+
332
+ You can download a sample audio file from [here](https://github.com/mastra-ai/realtime-voice-demo/raw/refs/heads/main/how_can_i_help_you.mp3).
333
+
334
+ <br />
335
+ <AudioPlayback audio="https://github.com/mastra-ai/realtime-voice-demo/raw/refs/heads/main/how_can_i_help_you.mp3" />
336
+
337
+ **openai:**
338
+
339
+ ```typescript
340
+ import { Agent } from "@mastra/core/agent";
341
+ import { OpenAIVoice } from "@mastra/voice-openai";
342
+ import { createReadStream } from "fs";
343
+
344
+ const voiceAgent = new Agent({
345
+ id: "voice-agent",
346
+ name: "Voice Agent",
347
+ instructions:
348
+ "You are a voice assistant that can help users with their tasks.",
349
+ model: "openai/gpt-5.1",
350
+ voice: new OpenAIVoice(),
351
+ });
352
+
353
+ // Use an audio file from a URL
354
+ const audioStream = await createReadStream("./how_can_i_help_you.mp3");
355
+
356
+ // Convert audio to text
357
+ const transcript = await voiceAgent.voice.listen(audioStream);
358
+ console.log(`User said: ${transcript}`);
359
+
360
+ // Generate a response based on the transcript
361
+ const { text } = await voiceAgent.generate(transcript);
362
+ ```
363
+
364
+ Visit the [OpenAI Voice Reference](https://mastra.ai/reference/v1/voice/openai) for more information on the OpenAI voice provider.
365
+
366
+
367
+ **azure:**
368
+
369
+ ```typescript
370
+ import { createReadStream } from "fs";
371
+ import { Agent } from "@mastra/core/agent";
372
+ import { AzureVoice } from "@mastra/voice-azure";
373
+ import { createReadStream } from "fs";
374
+
375
+ const voiceAgent = new Agent({
376
+ id: "voice-agent",
377
+ name: "Voice Agent",
378
+ instructions:
379
+ "You are a voice assistant that can help users with their tasks.",
380
+ model: "openai/gpt-5.1",
381
+ voice: new AzureVoice(),
382
+ });
383
+
384
+ // Use an audio file from a URL
385
+ const audioStream = await createReadStream("./how_can_i_help_you.mp3");
386
+
387
+ // Convert audio to text
388
+ const transcript = await voiceAgent.voice.listen(audioStream);
389
+ console.log(`User said: ${transcript}`);
390
+
391
+ // Generate a response based on the transcript
392
+ const { text } = await voiceAgent.generate(transcript);
393
+ ```
394
+
395
+ Visit the [Azure Voice Reference](https://mastra.ai/reference/v1/voice/azure) for more information on the Azure voice provider.
396
+
397
+
398
+ **elevenlabs:**
399
+
400
+ ```typescript
401
+ import { Agent } from "@mastra/core/agent";
402
+ import { ElevenLabsVoice } from "@mastra/voice-elevenlabs";
403
+ import { createReadStream } from "fs";
404
+
405
+ const voiceAgent = new Agent({
406
+ id: "voice-agent",
407
+ name: "Voice Agent",
408
+ instructions:
409
+ "You are a voice assistant that can help users with their tasks.",
410
+ model: "openai/gpt-5.1",
411
+ voice: new ElevenLabsVoice(),
412
+ });
413
+
414
+ // Use an audio file from a URL
415
+ const audioStream = await createReadStream("./how_can_i_help_you.mp3");
416
+
417
+ // Convert audio to text
418
+ const transcript = await voiceAgent.voice.listen(audioStream);
419
+ console.log(`User said: ${transcript}`);
420
+
421
+ // Generate a response based on the transcript
422
+ const { text } = await voiceAgent.generate(transcript);
423
+ ```
424
+
425
+ Visit the [ElevenLabs Voice Reference](https://mastra.ai/reference/v1/voice/elevenlabs) for more information on the ElevenLabs voice provider.
426
+
427
+
428
+ **google:**
429
+
430
+ ```typescript
431
+ import { Agent } from "@mastra/core/agent";
432
+ import { GoogleVoice } from "@mastra/voice-google";
433
+ import { createReadStream } from "fs";
434
+
435
+ const voiceAgent = new Agent({
436
+ id: "voice-agent",
437
+ name: "Voice Agent",
438
+ instructions:
439
+ "You are a voice assistant that can help users with their tasks.",
440
+ model: "openai/gpt-5.1",
441
+ voice: new GoogleVoice(),
442
+ });
443
+
444
+ // Use an audio file from a URL
445
+ const audioStream = await createReadStream("./how_can_i_help_you.mp3");
446
+
447
+ // Convert audio to text
448
+ const transcript = await voiceAgent.voice.listen(audioStream);
449
+ console.log(`User said: ${transcript}`);
450
+
451
+ // Generate a response based on the transcript
452
+ const { text } = await voiceAgent.generate(transcript);
453
+ ```
454
+
455
+ Visit the [Google Voice Reference](https://mastra.ai/reference/v1/voice/google) for more information on the Google voice provider.
456
+
457
+
458
+ **cloudflare:**
459
+
460
+ ```typescript
461
+ import { Agent } from "@mastra/core/agent";
462
+ import { CloudflareVoice } from "@mastra/voice-cloudflare";
463
+ import { createReadStream } from "fs";
464
+
465
+ const voiceAgent = new Agent({
466
+ id: "voice-agent",
467
+ name: "Voice Agent",
468
+ instructions:
469
+ "You are a voice assistant that can help users with their tasks.",
470
+ model: "openai/gpt-5.1",
471
+ voice: new CloudflareVoice(),
472
+ });
473
+
474
+ // Use an audio file from a URL
475
+ const audioStream = await createReadStream("./how_can_i_help_you.mp3");
476
+
477
+ // Convert audio to text
478
+ const transcript = await voiceAgent.voice.listen(audioStream);
479
+ console.log(`User said: ${transcript}`);
480
+
481
+ // Generate a response based on the transcript
482
+ const { text } = await voiceAgent.generate(transcript);
483
+ ```
484
+
485
+ Visit the [Cloudflare Voice Reference](https://mastra.ai/reference/v1/voice/cloudflare) for more information on the Cloudflare voice provider.
486
+
487
+
488
+ **deepgram:**
489
+
490
+ ```typescript
491
+ import { Agent } from "@mastra/core/agent";
492
+ import { DeepgramVoice } from "@mastra/voice-deepgram";
493
+ import { createReadStream } from "fs";
494
+
495
+ const voiceAgent = new Agent({
496
+ id: "voice-agent",
497
+ name: "Voice Agent",
498
+ instructions:
499
+ "You are a voice assistant that can help users with their tasks.",
500
+ model: "openai/gpt-5.1",
501
+ voice: new DeepgramVoice(),
502
+ });
503
+
504
+ // Use an audio file from a URL
505
+ const audioStream = await createReadStream("./how_can_i_help_you.mp3");
506
+
507
+ // Convert audio to text
508
+ const transcript = await voiceAgent.voice.listen(audioStream);
509
+ console.log(`User said: ${transcript}`);
510
+
511
+ // Generate a response based on the transcript
512
+ const { text } = await voiceAgent.generate(transcript);
513
+ ```
514
+
515
+ Visit the [Deepgram Voice Reference](https://mastra.ai/reference/v1/voice/deepgram) for more information on the Deepgram voice provider.
516
+
517
+
518
+ **sarvam:**
519
+
520
+ ```typescript
521
+ import { Agent } from "@mastra/core/agent";
522
+ import { SarvamVoice } from "@mastra/voice-sarvam";
523
+ import { createReadStream } from "fs";
524
+
525
+ const voiceAgent = new Agent({
526
+ id: "voice-agent",
527
+ name: "Voice Agent",
528
+ instructions:
529
+ "You are a voice assistant that can help users with their tasks.",
530
+ model: "openai/gpt-5.1",
531
+ voice: new SarvamVoice(),
532
+ });
533
+
534
+ // Use an audio file from a URL
535
+ const audioStream = await createReadStream("./how_can_i_help_you.mp3");
536
+
537
+ // Convert audio to text
538
+ const transcript = await voiceAgent.voice.listen(audioStream);
539
+ console.log(`User said: ${transcript}`);
540
+
541
+ // Generate a response based on the transcript
542
+ const { text } = await voiceAgent.generate(transcript);
543
+ ```
544
+
545
+ Visit the [Sarvam Voice Reference](https://mastra.ai/reference/v1/voice/sarvam) for more information on the Sarvam voice provider.
546
+
547
+
548
+
549
+ ### Speech to Speech (STS)
550
+
551
+ Create conversational experiences with speech-to-speech capabilities. The unified API enables real-time voice interactions between users and AI agents.
552
+ For detailed configuration options and advanced features, check out [Speech to Speech](./speech-to-speech).
553
+
554
+ **openai:**
555
+
556
+ ```typescript
557
+ import { Agent } from "@mastra/core/agent";
558
+ import { playAudio, getMicrophoneStream } from "@mastra/node-audio";
559
+ import { OpenAIRealtimeVoice } from "@mastra/voice-openai-realtime";
560
+
561
+ const voiceAgent = new Agent({
562
+ id: "voice-agent",
563
+ name: "Voice Agent",
564
+ instructions:
565
+ "You are a voice assistant that can help users with their tasks.",
566
+ model: "openai/gpt-5.1",
567
+ voice: new OpenAIRealtimeVoice(),
568
+ });
569
+
570
+ // Listen for agent audio responses
571
+ voiceAgent.voice.on("speaker", ({ audio }) => {
572
+ playAudio(audio);
573
+ });
574
+
575
+ // Initiate the conversation
576
+ await voiceAgent.voice.speak("How can I help you today?");
577
+
578
+ // Send continuous audio from the microphone
579
+ const micStream = getMicrophoneStream();
580
+ await voiceAgent.voice.send(micStream);
581
+ ```
582
+
583
+ Visit the [OpenAI Voice Reference](https://mastra.ai/reference/v1/voice/openai-realtime) for more information on the OpenAI voice provider.
584
+
585
+
586
+ **google:**
587
+
588
+ ```typescript
589
+ import { Agent } from "@mastra/core/agent";
590
+ import { playAudio, getMicrophoneStream } from "@mastra/node-audio";
591
+ import { GeminiLiveVoice } from "@mastra/voice-google-gemini-live";
592
+
593
+ const voiceAgent = new Agent({
594
+ id: "voice-agent",
595
+ name: "Voice Agent",
596
+ instructions:
597
+ "You are a voice assistant that can help users with their tasks.",
598
+ model: "openai/gpt-5.1",
599
+ voice: new GeminiLiveVoice({
600
+ // Live API mode
601
+ apiKey: process.env.GOOGLE_API_KEY,
602
+ model: "gemini-2.0-flash-exp",
603
+ speaker: "Puck",
604
+ debug: true,
605
+ // Vertex AI alternative:
606
+ // vertexAI: true,
607
+ // project: 'your-gcp-project',
608
+ // location: 'us-central1',
609
+ // serviceAccountKeyFile: '/path/to/service-account.json',
610
+ }),
611
+ });
612
+
613
+ // Connect before using speak/send
614
+ await voiceAgent.voice.connect();
615
+
616
+ // Listen for agent audio responses
617
+ voiceAgent.voice.on("speaker", ({ audio }) => {
618
+ playAudio(audio);
619
+ });
620
+
621
+ // Listen for text responses and transcriptions
622
+ voiceAgent.voice.on("writing", ({ text, role }) => {
623
+ console.log(`${role}: ${text}`);
624
+ });
625
+
626
+ // Initiate the conversation
627
+ await voiceAgent.voice.speak("How can I help you today?");
628
+
629
+ // Send continuous audio from the microphone
630
+ const micStream = getMicrophoneStream();
631
+ await voiceAgent.voice.send(micStream);
632
+ ```
633
+
634
+ Visit the [Google Gemini Live Reference](https://mastra.ai/reference/v1/voice/google-gemini-live) for more information on the Google Gemini Live voice provider.
635
+
636
+
637
+
638
+ ## Voice Configuration
639
+
640
+ Each voice provider can be configured with different models and options. Below are the detailed configuration options for all supported providers:
641
+
642
+ **openai:**
643
+
644
+ ```typescript
645
+ // OpenAI Voice Configuration
646
+ const voice = new OpenAIVoice({
647
+ speechModel: {
648
+ name: "gpt-3.5-turbo", // Example model name
649
+ apiKey: process.env.OPENAI_API_KEY,
650
+ language: "en-US", // Language code
651
+ voiceType: "neural", // Type of voice model
652
+ },
653
+ listeningModel: {
654
+ name: "whisper-1", // Example model name
655
+ apiKey: process.env.OPENAI_API_KEY,
656
+ language: "en-US", // Language code
657
+ format: "wav", // Audio format
658
+ },
659
+ speaker: "alloy", // Example speaker name
660
+ });
661
+ ```
662
+
663
+ Visit the [OpenAI Voice Reference](https://mastra.ai/reference/v1/voice/openai) for more information on the OpenAI voice provider.
664
+
665
+
666
+ **azure:**
667
+
668
+ ```typescript
669
+ // Azure Voice Configuration
670
+ const voice = new AzureVoice({
671
+ speechModel: {
672
+ name: "en-US-JennyNeural", // Example model name
673
+ apiKey: process.env.AZURE_SPEECH_KEY,
674
+ region: process.env.AZURE_SPEECH_REGION,
675
+ language: "en-US", // Language code
676
+ style: "cheerful", // Voice style
677
+ pitch: "+0Hz", // Pitch adjustment
678
+ rate: "1.0", // Speech rate
679
+ },
680
+ listeningModel: {
681
+ name: "en-US", // Example model name
682
+ apiKey: process.env.AZURE_SPEECH_KEY,
683
+ region: process.env.AZURE_SPEECH_REGION,
684
+ format: "simple", // Output format
685
+ },
686
+ });
687
+ ```
688
+
689
+ Visit the [Azure Voice Reference](https://mastra.ai/reference/v1/voice/azure) for more information on the Azure voice provider.
690
+
691
+
692
+ **elevenlabs:**
693
+
694
+ ```typescript
695
+ // ElevenLabs Voice Configuration
696
+ const voice = new ElevenLabsVoice({
697
+ speechModel: {
698
+ voiceId: "your-voice-id", // Example voice ID
699
+ model: "eleven_multilingual_v2", // Example model name
700
+ apiKey: process.env.ELEVENLABS_API_KEY,
701
+ language: "en", // Language code
702
+ emotion: "neutral", // Emotion setting
703
+ },
704
+ // ElevenLabs may not have a separate listening model
705
+ });
706
+ ```
707
+
708
+ Visit the [ElevenLabs Voice Reference](https://mastra.ai/reference/v1/voice/elevenlabs) for more information on the ElevenLabs voice provider.
709
+
710
+
711
+ **playai:**
712
+
713
+ ```typescript
714
+ // PlayAI Voice Configuration
715
+ const voice = new PlayAIVoice({
716
+ speechModel: {
717
+ name: "playai-voice", // Example model name
718
+ speaker: "emma", // Example speaker name
719
+ apiKey: process.env.PLAYAI_API_KEY,
720
+ language: "en-US", // Language code
721
+ speed: 1.0, // Speech speed
722
+ },
723
+ // PlayAI may not have a separate listening model
724
+ });
725
+ ```
726
+
727
+ Visit the [PlayAI Voice Reference](https://mastra.ai/reference/v1/voice/playai) for more information on the PlayAI voice provider.
728
+
729
+
730
+ **google:**
731
+
732
+ ```typescript
733
+ // Google Voice Configuration
734
+ const voice = new GoogleVoice({
735
+ speechModel: {
736
+ name: "en-US-Studio-O", // Example model name
737
+ apiKey: process.env.GOOGLE_API_KEY,
738
+ languageCode: "en-US", // Language code
739
+ gender: "FEMALE", // Voice gender
740
+ speakingRate: 1.0, // Speaking rate
741
+ },
742
+ listeningModel: {
743
+ name: "en-US", // Example model name
744
+ sampleRateHertz: 16000, // Sample rate
745
+ },
746
+ });
747
+ ```
748
+
749
+ Visit the [Google Voice Reference](https://mastra.ai/reference/v1/voice/google) for more information on the Google voice provider.
750
+
751
+
752
+ **cloudflare:**
753
+
754
+ ```typescript
755
+ // Cloudflare Voice Configuration
756
+ const voice = new CloudflareVoice({
757
+ speechModel: {
758
+ name: "cloudflare-voice", // Example model name
759
+ accountId: process.env.CLOUDFLARE_ACCOUNT_ID,
760
+ apiToken: process.env.CLOUDFLARE_API_TOKEN,
761
+ language: "en-US", // Language code
762
+ format: "mp3", // Audio format
763
+ },
764
+ // Cloudflare may not have a separate listening model
765
+ });
766
+ ```
767
+
768
+ Visit the [Cloudflare Voice Reference](https://mastra.ai/reference/v1/voice/cloudflare) for more information on the Cloudflare voice provider.
769
+
770
+
771
+ **deepgram:**
772
+
773
+ ```typescript
774
+ // Deepgram Voice Configuration
775
+ const voice = new DeepgramVoice({
776
+ speechModel: {
777
+ name: "nova-2", // Example model name
778
+ speaker: "aura-english-us", // Example speaker name
779
+ apiKey: process.env.DEEPGRAM_API_KEY,
780
+ language: "en-US", // Language code
781
+ tone: "formal", // Tone setting
782
+ },
783
+ listeningModel: {
784
+ name: "nova-2", // Example model name
785
+ format: "flac", // Audio format
786
+ },
787
+ });
788
+ ```
789
+
790
+ Visit the [Deepgram Voice Reference](https://mastra.ai/reference/v1/voice/deepgram) for more information on the Deepgram voice provider.
791
+
792
+
793
+ **speechify:**
794
+
795
+ ```typescript
796
+ // Speechify Voice Configuration
797
+ const voice = new SpeechifyVoice({
798
+ speechModel: {
799
+ name: "speechify-voice", // Example model name
800
+ speaker: "matthew", // Example speaker name
801
+ apiKey: process.env.SPEECHIFY_API_KEY,
802
+ language: "en-US", // Language code
803
+ speed: 1.0, // Speech speed
804
+ },
805
+ // Speechify may not have a separate listening model
806
+ });
807
+ ```
808
+
809
+ Visit the [Speechify Voice Reference](https://mastra.ai/reference/v1/voice/speechify) for more information on the Speechify voice provider.
810
+
811
+
812
+ **sarvam:**
813
+
814
+ ```typescript
815
+ // Sarvam Voice Configuration
816
+ const voice = new SarvamVoice({
817
+ speechModel: {
818
+ name: "sarvam-voice", // Example model name
819
+ apiKey: process.env.SARVAM_API_KEY,
820
+ language: "en-IN", // Language code
821
+ style: "conversational", // Style setting
822
+ },
823
+ // Sarvam may not have a separate listening model
824
+ });
825
+ ```
826
+
827
+ Visit the [Sarvam Voice Reference](https://mastra.ai/reference/v1/voice/sarvam) for more information on the Sarvam voice provider.
828
+
829
+
830
+ **murf:**
831
+
832
+ ```typescript
833
+ // Murf Voice Configuration
834
+ const voice = new MurfVoice({
835
+ speechModel: {
836
+ name: "murf-voice", // Example model name
837
+ apiKey: process.env.MURF_API_KEY,
838
+ language: "en-US", // Language code
839
+ emotion: "happy", // Emotion setting
840
+ },
841
+ // Murf may not have a separate listening model
842
+ });
843
+ ```
844
+
845
+ Visit the [Murf Voice Reference](https://mastra.ai/reference/v1/voice/murf) for more information on the Murf voice provider.
846
+
847
+
848
+ **openai-realtime:**
849
+
850
+ ```typescript
851
+ // OpenAI Realtime Voice Configuration
852
+ const voice = new OpenAIRealtimeVoice({
853
+ speechModel: {
854
+ name: "gpt-3.5-turbo", // Example model name
855
+ apiKey: process.env.OPENAI_API_KEY,
856
+ language: "en-US", // Language code
857
+ },
858
+ listeningModel: {
859
+ name: "whisper-1", // Example model name
860
+ apiKey: process.env.OPENAI_API_KEY,
861
+ format: "ogg", // Audio format
862
+ },
863
+ speaker: "alloy", // Example speaker name
864
+ });
865
+ ```
866
+
867
+ For more information on the OpenAI Realtime voice provider, refer to the [OpenAI Realtime Voice Reference](https://mastra.ai/reference/v1/voice/openai-realtime).
868
+
869
+
870
+ **google-gemini-live:**
871
+
872
+ ```typescript
873
+ // Google Gemini Live Voice Configuration
874
+ const voice = new GeminiLiveVoice({
875
+ speechModel: {
876
+ name: "gemini-2.0-flash-exp", // Example model name
877
+ apiKey: process.env.GOOGLE_API_KEY,
878
+ },
879
+ speaker: "Puck", // Example speaker name
880
+ // Google Gemini Live is a realtime bidirectional API without separate speech and listening models
881
+ });
882
+ ```
883
+
884
+ Visit the [Google Gemini Live Reference](https://mastra.ai/reference/v1/voice/google-gemini-live) for more information on the Google Gemini Live voice provider.
885
+
886
+
887
+ **aisdk:**
888
+
889
+ ```typescript
890
+ // AI SDK Voice Configuration
891
+ import { CompositeVoice } from "@mastra/core/voice";
892
+ import { openai } from "@ai-sdk/openai";
893
+ import { elevenlabs } from "@ai-sdk/elevenlabs";
894
+
895
+ // Use AI SDK models directly - no need to install separate packages
896
+ const voice = new CompositeVoice({
897
+ input: openai.transcription('whisper-1'), // AI SDK transcription
898
+ output: elevenlabs.speech('eleven_turbo_v2'), // AI SDK speech
899
+ });
900
+
901
+ // Works seamlessly with your agent
902
+ const voiceAgent = new Agent({
903
+ id: "aisdk-voice-agent",
904
+ name: "AI SDK Voice Agent",
905
+ instructions: "You are a helpful assistant with voice capabilities.",
906
+ model: "openai/gpt-5.1",
907
+ voice,
908
+ });
909
+ ```
910
+
911
+
912
+ ### Using Multiple Voice Providers
913
+
914
+ This example demonstrates how to create and use two different voice providers in Mastra: OpenAI for speech-to-text (STT) and PlayAI for text-to-speech (TTS).
915
+
916
+ Start by creating instances of the voice providers with any necessary configuration.
917
+
918
+ ```typescript
919
+ import { OpenAIVoice } from "@mastra/voice-openai";
920
+ import { PlayAIVoice } from "@mastra/voice-playai";
921
+ import { CompositeVoice } from "@mastra/core/voice";
922
+ import { playAudio, getMicrophoneStream } from "@mastra/node-audio";
923
+
924
+ // Initialize OpenAI voice for STT
925
+ const input = new OpenAIVoice({
926
+ listeningModel: {
927
+ name: "whisper-1",
928
+ apiKey: process.env.OPENAI_API_KEY,
929
+ },
930
+ });
931
+
932
+ // Initialize PlayAI voice for TTS
933
+ const output = new PlayAIVoice({
934
+ speechModel: {
935
+ name: "playai-voice",
936
+ apiKey: process.env.PLAYAI_API_KEY,
937
+ },
938
+ });
939
+
940
+ // Combine the providers using CompositeVoice
941
+ const voice = new CompositeVoice({
942
+ input,
943
+ output,
944
+ });
945
+
946
+ // Implement voice interactions using the combined voice provider
947
+ const audioStream = getMicrophoneStream(); // Assume this function gets audio input
948
+ const transcript = await voice.listen(audioStream);
949
+
950
+ // Log the transcribed text
951
+ console.log("Transcribed text:", transcript);
952
+
953
+ // Convert text to speech
954
+ const responseAudio = await voice.speak(`You said: ${transcript}`, {
955
+ speaker: "default", // Optional: specify a speaker,
956
+ responseFormat: "wav", // Optional: specify a response format
957
+ });
958
+
959
+ // Play the audio response
960
+ playAudio(responseAudio);
961
+ ```
962
+
963
+ ### Using AI SDK Model Providers
964
+
965
+ You can also use AI SDK models directly with `CompositeVoice`:
966
+
967
+ ```typescript
968
+ import { CompositeVoice } from "@mastra/core/voice";
969
+ import { openai } from "@ai-sdk/openai";
970
+ import { elevenlabs } from "@ai-sdk/elevenlabs";
971
+ import { playAudio, getMicrophoneStream } from "@mastra/node-audio";
972
+
973
+ // Use AI SDK models directly - no provider setup needed
974
+ const voice = new CompositeVoice({
975
+ input: openai.transcription('whisper-1'), // AI SDK transcription
976
+ output: elevenlabs.speech('eleven_turbo_v2'), // AI SDK speech
977
+ });
978
+
979
+ // Works the same way as Mastra providers
980
+ const audioStream = getMicrophoneStream();
981
+ const transcript = await voice.listen(audioStream);
982
+
983
+ console.log("Transcribed text:", transcript);
984
+
985
+ // Convert text to speech
986
+ const responseAudio = await voice.speak(`You said: ${transcript}`, {
987
+ speaker: "Rachel", // ElevenLabs voice
988
+ });
989
+
990
+ playAudio(responseAudio);
991
+ ```
992
+
993
+ You can also mix AI SDK models with Mastra providers:
994
+
995
+ ```typescript
996
+ import { CompositeVoice } from "@mastra/core/voice";
997
+ import { PlayAIVoice } from "@mastra/voice-playai";
998
+ import { groq } from "@ai-sdk/groq";
999
+
1000
+ const voice = new CompositeVoice({
1001
+ input: groq.transcription('whisper-large-v3'), // AI SDK for STT
1002
+ output: new PlayAIVoice(), // Mastra provider for TTS
1003
+ });
1004
+ ```
1005
+
1006
+ For more information on the CompositeVoice, refer to the [CompositeVoice Reference](https://mastra.ai/reference/v1/voice/composite-voice).
1007
+
1008
+ ## More Resources
1009
+
1010
+ - [CompositeVoice](https://mastra.ai/reference/v1/voice/composite-voice)
1011
+ - [MastraVoice](https://mastra.ai/reference/v1/voice/mastra-voice)
1012
+ - [OpenAI Voice](https://mastra.ai/reference/v1/voice/openai)
1013
+ - [OpenAI Realtime Voice](https://mastra.ai/reference/v1/voice/openai-realtime)
1014
+ - [Azure Voice](https://mastra.ai/reference/v1/voice/azure)
1015
+ - [Google Voice](https://mastra.ai/reference/v1/voice/google)
1016
+ - [Google Gemini Live Voice](https://mastra.ai/reference/v1/voice/google-gemini-live)
1017
+ - [Deepgram Voice](https://mastra.ai/reference/v1/voice/deepgram)
1018
+ - [PlayAI Voice](https://mastra.ai/reference/v1/voice/playai)
1019
+ - [Voice Examples](https://github.com/mastra-ai/voice-examples)