@mastra/voice-cloudflare 0.12.1 → 0.12.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +16 -0
- package/dist/_types/@internal_voice/dist/_types/@internal_ai-sdk-v5/dist/index.d.ts +8888 -0
- package/dist/_types/@internal_voice/dist/_types/@internal_core/dist/base/index.d.ts +31 -0
- package/dist/_types/@internal_voice/dist/_types/@internal_core/dist/logger/index.d.ts +217 -0
- package/dist/_types/@internal_voice/dist/_types/@internal_core/dist/request-context/index.d.ts +147 -0
- package/dist/_types/@internal_voice/dist/_types/@internal_core/dist/types/index.d.ts +3 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v3/ZodError.d.ts +164 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v3/errors.d.ts +5 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v3/external.d.ts +6 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v3/helpers/enumUtil.d.ts +8 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v3/helpers/errorUtil.d.ts +9 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v3/helpers/parseUtil.d.ts +78 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v3/helpers/partialUtil.d.ts +8 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v3/helpers/typeAliases.d.ts +2 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v3/helpers/util.d.ts +85 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v3/index.d.cts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v3/index.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v3/locales/en.d.ts +3 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v3/standard-schema.d.ts +102 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v3/types.d.ts +1034 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/classic/checks.d.ts +1 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/classic/coerce.d.ts +17 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/classic/compat.d.ts +50 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/classic/errors.d.ts +30 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/classic/external.d.ts +16 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/classic/from-json-schema.d.ts +12 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/classic/index.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/classic/iso.d.ts +22 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/classic/parse.d.ts +31 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/classic/schemas.d.ts +767 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/core/api.d.ts +325 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/core/checks.d.ts +278 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/core/core.d.ts +70 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/core/doc.d.ts +14 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/core/errors.d.ts +221 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/core/index.d.ts +16 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/core/json-schema-generator.d.ts +65 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/core/json-schema-processors.d.ts +49 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/core/json-schema.d.ts +88 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/core/parse.d.ts +49 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/core/regexes.d.ts +85 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/core/registries.d.ts +35 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/core/schemas.d.ts +1184 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/core/standard-schema.d.ts +126 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/core/to-json-schema.d.ts +114 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/core/util.d.ts +200 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/core/versions.d.ts +5 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/index.d.cts +3 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/ar.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/az.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/be.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/bg.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/ca.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/cs.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/da.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/de.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/el.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/en.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/eo.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/es.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/fa.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/fi.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/fr-CA.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/fr.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/he.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/hr.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/hu.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/hy.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/id.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/index.d.ts +52 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/is.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/it.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/ja.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/ka.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/kh.d.ts +5 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/km.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/ko.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/lt.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/mk.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/ms.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/nl.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/no.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/ota.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/pl.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/ps.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/pt.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/ro.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/ru.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/sl.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/sv.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/ta.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/th.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/tr.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/ua.d.ts +5 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/uk.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/ur.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/uz.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/vi.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/yo.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/zh-CN.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/zh-TW.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/index.d.ts +16 -0
- package/dist/_types/@internal_voice/dist/voice/aisdk/index.d.ts +3 -0
- package/dist/_types/@internal_voice/dist/voice/aisdk/speech.d.ts +23 -0
- package/dist/_types/@internal_voice/dist/voice/aisdk/transcription.d.ts +22 -0
- package/dist/_types/@internal_voice/dist/voice/composite-voice.d.ts +72 -0
- package/dist/_types/@internal_voice/dist/voice/default-voice.d.ts +13 -0
- package/dist/_types/@internal_voice/dist/voice/index.d.ts +5 -0
- package/dist/_types/@internal_voice/dist/voice/voice.d.ts +172 -0
- package/dist/docs/SKILL.md +1 -1
- package/dist/docs/assets/SOURCE_MAP.json +1 -1
- package/dist/docs/references/docs-agents-adding-voice.md +55 -23
- package/dist/docs/references/docs-voice-overview.md +317 -26
- package/dist/docs/references/reference-voice-cloudflare.md +3 -3
- package/dist/index.cjs +259 -3
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.ts +1 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +258 -2
- package/dist/index.js.map +1 -1
- package/package.json +11 -12
|
@@ -20,7 +20,7 @@ export const agent = new Agent({
|
|
|
20
20
|
id: 'voice-agent',
|
|
21
21
|
name: 'Voice Agent',
|
|
22
22
|
instructions: `You are a helpful assistant with both STT and TTS capabilities.`,
|
|
23
|
-
model: 'openai/gpt-5.
|
|
23
|
+
model: 'openai/gpt-5.5',
|
|
24
24
|
voice,
|
|
25
25
|
})
|
|
26
26
|
|
|
@@ -109,7 +109,7 @@ export const agent = new Agent({
|
|
|
109
109
|
id: 'speech-to-speech-agent',
|
|
110
110
|
name: 'Speech-to-Speech Agent',
|
|
111
111
|
instructions: `You are a helpful assistant with speech-to-speech capabilities.`,
|
|
112
|
-
model: 'openai/gpt-5.
|
|
112
|
+
model: 'openai/gpt-5.5',
|
|
113
113
|
tools: {
|
|
114
114
|
// Tools configured on Agent are passed to voice provider
|
|
115
115
|
search,
|
|
@@ -132,6 +132,37 @@ agent.voice.send(microphoneStream)
|
|
|
132
132
|
agent.voice.close()
|
|
133
133
|
```
|
|
134
134
|
|
|
135
|
+
### Per-session voice for concurrent sessions
|
|
136
|
+
|
|
137
|
+
A static `voice` instance is shared across every request. For one-shot text-to-speech this is fine, but realtime and speech-to-speech providers store one WebSocket, one set of tools, and one request context per instance. If you deploy a single agent that handles several live sessions at once, a shared instance lets one session overwrite another session's tools, instructions, and request context.
|
|
138
|
+
|
|
139
|
+
To give each session its own voice, provide `voice` as a resolver. Mastra runs the resolver on every `getVoice()` call and returns a fresh, session-owned instance:
|
|
140
|
+
|
|
141
|
+
```typescript
|
|
142
|
+
import { Agent } from '@mastra/core/agent'
|
|
143
|
+
import { OpenAIRealtimeVoice } from '@mastra/voice-openai-realtime'
|
|
144
|
+
|
|
145
|
+
export const agent = new Agent({
|
|
146
|
+
id: 'support-line',
|
|
147
|
+
name: 'Support Line',
|
|
148
|
+
instructions: ({ requestContext }) => `Help user ${requestContext.get('user')}.`,
|
|
149
|
+
model: 'openai/gpt-5.5',
|
|
150
|
+
voice: ({ requestContext }) => new OpenAIRealtimeVoice({ apiKey: requestContext.get('apiKey') }),
|
|
151
|
+
})
|
|
152
|
+
|
|
153
|
+
// Each concurrent session resolves its own voice instance
|
|
154
|
+
const voice = await agent.getVoice({ requestContext })
|
|
155
|
+
await voice.connect()
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
When you use a resolver:
|
|
159
|
+
|
|
160
|
+
- Each call to `getVoice()` returns a new instance, so concurrent sessions never share state.
|
|
161
|
+
- Mastra does not add tools or instructions to a resolver instance. Configure those inside the resolver or on the provider.
|
|
162
|
+
- You own the lifecycle of the returned instance, so call `disconnect()` or `close()` when the session ends.
|
|
163
|
+
|
|
164
|
+
The `agent.voice` getter has no request context, so it throws when `voice` is a resolver. Use `agent.getVoice({ requestContext })` instead.
|
|
165
|
+
|
|
135
166
|
### Event System
|
|
136
167
|
|
|
137
168
|
The realtime voice provider emits several events you can listen for:
|
|
@@ -209,7 +240,7 @@ export const convertToText = async (input: string | NodeJS.ReadableStream): Prom
|
|
|
209
240
|
export const hybridVoiceAgent = new Agent({
|
|
210
241
|
id: 'hybrid-voice-agent',
|
|
211
242
|
name: 'Hybrid Voice Agent',
|
|
212
|
-
model: 'openai/gpt-5.
|
|
243
|
+
model: 'openai/gpt-5.5',
|
|
213
244
|
instructions: 'You can speak and listen using different providers.',
|
|
214
245
|
voice: new CompositeVoice({
|
|
215
246
|
input: new OpenAIVoice(),
|
|
@@ -221,7 +252,7 @@ export const unifiedVoiceAgent = new Agent({
|
|
|
221
252
|
id: 'unified-voice-agent',
|
|
222
253
|
name: 'Unified Voice Agent',
|
|
223
254
|
instructions: 'You are an agent with both STT and TTS capabilities.',
|
|
224
|
-
model: 'openai/gpt-5.
|
|
255
|
+
model: 'openai/gpt-5.5',
|
|
225
256
|
voice: new OpenAIVoice(),
|
|
226
257
|
})
|
|
227
258
|
|
|
@@ -263,7 +294,7 @@ export const agent = new Agent({
|
|
|
263
294
|
id: 'voice-agent',
|
|
264
295
|
name: 'Voice Agent',
|
|
265
296
|
instructions: `You are a helpful assistant with both STT and TTS capabilities.`,
|
|
266
|
-
model: 'openai/gpt-5.
|
|
297
|
+
model: 'openai/gpt-5.5',
|
|
267
298
|
|
|
268
299
|
// Create a composite voice using OpenAI for listening and PlayAI for speaking
|
|
269
300
|
voice: new CompositeVoice({
|
|
@@ -288,7 +319,7 @@ export const agent = new Agent({
|
|
|
288
319
|
id: 'aisdk-voice-agent',
|
|
289
320
|
name: 'AI SDK Voice Agent',
|
|
290
321
|
instructions: `You are a helpful assistant with voice capabilities.`,
|
|
291
|
-
model: 'openai/gpt-5.
|
|
322
|
+
model: 'openai/gpt-5.5',
|
|
292
323
|
|
|
293
324
|
// Pass AI SDK models directly to CompositeVoice
|
|
294
325
|
voice: new CompositeVoice({
|
|
@@ -327,23 +358,24 @@ For the complete list of supported AI SDK providers and their capabilities:
|
|
|
327
358
|
|
|
328
359
|
Mastra supports multiple voice providers for text-to-speech (TTS) and speech-to-text (STT) capabilities:
|
|
329
360
|
|
|
330
|
-
| Provider | Package | Features
|
|
331
|
-
| --------------- | ------------------------------- |
|
|
332
|
-
| OpenAI | `@mastra/voice-openai` | TTS, STT
|
|
333
|
-
| OpenAI Realtime | `@mastra/voice-openai-realtime` | Realtime speech-to-speech
|
|
334
|
-
|
|
|
335
|
-
|
|
|
336
|
-
|
|
|
337
|
-
|
|
|
338
|
-
|
|
|
339
|
-
|
|
|
340
|
-
|
|
|
341
|
-
|
|
|
342
|
-
|
|
|
361
|
+
| Provider | Package | Features | Reference |
|
|
362
|
+
| --------------- | ------------------------------- | ----------------------------------------- | ------------------------------------------------------------------ |
|
|
363
|
+
| OpenAI | `@mastra/voice-openai` | TTS, STT | [Documentation](https://mastra.ai/reference/voice/openai) |
|
|
364
|
+
| OpenAI Realtime | `@mastra/voice-openai-realtime` | Realtime speech-to-speech | [Documentation](https://mastra.ai/reference/voice/openai-realtime) |
|
|
365
|
+
| AWS Nova Sonic | `@mastra/voice-aws-nova-sonic` | Realtime speech-to-speech via AWS Bedrock | [Documentation](https://mastra.ai/reference/voice/aws-nova-sonic) |
|
|
366
|
+
| ElevenLabs | `@mastra/voice-elevenlabs` | High-quality TTS | [Documentation](https://mastra.ai/reference/voice/elevenlabs) |
|
|
367
|
+
| PlayAI | `@mastra/voice-playai` | TTS | [Documentation](https://mastra.ai/reference/voice/playai) |
|
|
368
|
+
| Google | `@mastra/voice-google` | TTS, STT | [Documentation](https://mastra.ai/reference/voice/google) |
|
|
369
|
+
| Deepgram | `@mastra/voice-deepgram` | STT | [Documentation](https://mastra.ai/reference/voice/deepgram) |
|
|
370
|
+
| Murf | `@mastra/voice-murf` | TTS | [Documentation](https://mastra.ai/reference/voice/murf) |
|
|
371
|
+
| Speechify | `@mastra/voice-speechify` | TTS | [Documentation](https://mastra.ai/reference/voice/speechify) |
|
|
372
|
+
| Sarvam | `@mastra/voice-sarvam` | TTS, STT | [Documentation](https://mastra.ai/reference/voice/sarvam) |
|
|
373
|
+
| Azure | `@mastra/voice-azure` | TTS, STT | [Documentation](https://mastra.ai/reference/voice/mastra-voice) |
|
|
374
|
+
| Cloudflare | `@mastra/voice-cloudflare` | TTS | [Documentation](https://mastra.ai/reference/voice/mastra-voice) |
|
|
343
375
|
|
|
344
376
|
## Next steps
|
|
345
377
|
|
|
346
|
-
- [Voice API Reference](https://mastra.ai/reference/voice/mastra-voice)
|
|
347
|
-
- [Text to Speech Examples](https://github.com/mastra-ai/voice-examples/tree/main/text-to-speech)
|
|
348
|
-
- [Speech to Text Examples](https://github.com/mastra-ai/voice-examples/tree/main/speech-to-text)
|
|
349
|
-
- [Speech to Speech Examples](https://github.com/mastra-ai/voice-examples/tree/main/speech-to-speech)
|
|
378
|
+
- [Voice API Reference](https://mastra.ai/reference/voice/mastra-voice): Detailed API documentation for voice capabilities
|
|
379
|
+
- [Text to Speech Examples](https://github.com/mastra-ai/voice-examples/tree/main/text-to-speech): Interactive story generator and other TTS implementations
|
|
380
|
+
- [Speech to Text Examples](https://github.com/mastra-ai/voice-examples/tree/main/speech-to-text): Voice memo app and other STT implementations
|
|
381
|
+
- [Speech to Speech Examples](https://github.com/mastra-ai/voice-examples/tree/main/speech-to-speech): Real-time voice conversation with call analysis
|
|
@@ -16,7 +16,7 @@ const voiceAgent = new Agent({
|
|
|
16
16
|
id: 'voice-agent',
|
|
17
17
|
name: 'Voice Agent',
|
|
18
18
|
instructions: 'You are a voice assistant that can help users with their tasks.',
|
|
19
|
-
model: 'openai/gpt-5.
|
|
19
|
+
model: 'openai/gpt-5.5',
|
|
20
20
|
voice: new OpenAIVoice(),
|
|
21
21
|
})
|
|
22
22
|
```
|
|
@@ -40,7 +40,7 @@ const voiceAgent = new Agent({
|
|
|
40
40
|
id: 'voice-agent',
|
|
41
41
|
name: 'Voice Agent',
|
|
42
42
|
instructions: 'You are a voice assistant that can help users with their tasks.',
|
|
43
|
-
model: 'openai/gpt-5.
|
|
43
|
+
model: 'openai/gpt-5.5',
|
|
44
44
|
voice: new OpenAIVoice(),
|
|
45
45
|
})
|
|
46
46
|
|
|
@@ -68,7 +68,7 @@ const voiceAgent = new Agent({
|
|
|
68
68
|
id: 'voice-agent',
|
|
69
69
|
name: 'Voice Agent',
|
|
70
70
|
instructions: 'You are a voice assistant that can help users with their tasks.',
|
|
71
|
-
model: 'openai/gpt-5.
|
|
71
|
+
model: 'openai/gpt-5.5',
|
|
72
72
|
voice: new AzureVoice(),
|
|
73
73
|
})
|
|
74
74
|
|
|
@@ -95,7 +95,7 @@ const voiceAgent = new Agent({
|
|
|
95
95
|
id: 'voice-agent',
|
|
96
96
|
name: 'Voice Agent',
|
|
97
97
|
instructions: 'You are a voice assistant that can help users with their tasks.',
|
|
98
|
-
model: 'openai/gpt-5.
|
|
98
|
+
model: 'openai/gpt-5.5',
|
|
99
99
|
voice: new ElevenLabsVoice(),
|
|
100
100
|
})
|
|
101
101
|
|
|
@@ -122,7 +122,7 @@ const voiceAgent = new Agent({
|
|
|
122
122
|
id: 'voice-agent',
|
|
123
123
|
name: 'Voice Agent',
|
|
124
124
|
instructions: 'You are a voice assistant that can help users with their tasks.',
|
|
125
|
-
model: 'openai/gpt-5.
|
|
125
|
+
model: 'openai/gpt-5.5',
|
|
126
126
|
voice: new PlayAIVoice(),
|
|
127
127
|
})
|
|
128
128
|
|
|
@@ -149,7 +149,7 @@ const voiceAgent = new Agent({
|
|
|
149
149
|
id: 'voice-agent',
|
|
150
150
|
name: 'Voice Agent',
|
|
151
151
|
instructions: 'You are a voice assistant that can help users with their tasks.',
|
|
152
|
-
model: 'openai/gpt-5.
|
|
152
|
+
model: 'openai/gpt-5.5',
|
|
153
153
|
voice: new GoogleVoice(),
|
|
154
154
|
})
|
|
155
155
|
|
|
@@ -176,7 +176,7 @@ const voiceAgent = new Agent({
|
|
|
176
176
|
id: 'voice-agent',
|
|
177
177
|
name: 'Voice Agent',
|
|
178
178
|
instructions: 'You are a voice assistant that can help users with their tasks.',
|
|
179
|
-
model: 'openai/gpt-5.
|
|
179
|
+
model: 'openai/gpt-5.5',
|
|
180
180
|
voice: new CloudflareVoice(),
|
|
181
181
|
})
|
|
182
182
|
|
|
@@ -203,7 +203,7 @@ const voiceAgent = new Agent({
|
|
|
203
203
|
id: 'voice-agent',
|
|
204
204
|
name: 'Voice Agent',
|
|
205
205
|
instructions: 'You are a voice assistant that can help users with their tasks.',
|
|
206
|
-
model: 'openai/gpt-5.
|
|
206
|
+
model: 'openai/gpt-5.5',
|
|
207
207
|
voice: new DeepgramVoice(),
|
|
208
208
|
})
|
|
209
209
|
|
|
@@ -219,6 +219,33 @@ playAudio(audioStream)
|
|
|
219
219
|
|
|
220
220
|
Visit the [Deepgram Voice Reference](https://mastra.ai/reference/voice/deepgram) for more information on the Deepgram voice provider.
|
|
221
221
|
|
|
222
|
+
**Inworld**:
|
|
223
|
+
|
|
224
|
+
```typescript
|
|
225
|
+
import { Agent } from '@mastra/core/agent'
|
|
226
|
+
import { InworldVoice } from '@mastra/voice-inworld'
|
|
227
|
+
import { playAudio } from '@mastra/node-audio'
|
|
228
|
+
|
|
229
|
+
const voiceAgent = new Agent({
|
|
230
|
+
id: 'voice-agent',
|
|
231
|
+
name: 'Voice Agent',
|
|
232
|
+
instructions: 'You are a voice assistant that can help users with their tasks.',
|
|
233
|
+
model: 'openai/gpt-5.5',
|
|
234
|
+
voice: new InworldVoice(),
|
|
235
|
+
})
|
|
236
|
+
|
|
237
|
+
const { text } = await voiceAgent.generate('What color is the sky?')
|
|
238
|
+
|
|
239
|
+
// Convert text to speech to an Audio Stream
|
|
240
|
+
const audioStream = await voiceAgent.voice.speak(text, {
|
|
241
|
+
speaker: 'Dennis', // Optional: specify a speaker
|
|
242
|
+
})
|
|
243
|
+
|
|
244
|
+
playAudio(audioStream)
|
|
245
|
+
```
|
|
246
|
+
|
|
247
|
+
Visit the [Inworld Voice Reference](https://mastra.ai/reference/voice/inworld) for more information on the Inworld voice provider.
|
|
248
|
+
|
|
222
249
|
**Speechify**:
|
|
223
250
|
|
|
224
251
|
```typescript
|
|
@@ -230,7 +257,7 @@ const voiceAgent = new Agent({
|
|
|
230
257
|
id: 'voice-agent',
|
|
231
258
|
name: 'Voice Agent',
|
|
232
259
|
instructions: 'You are a voice assistant that can help users with their tasks.',
|
|
233
|
-
model: 'openai/gpt-5.
|
|
260
|
+
model: 'openai/gpt-5.5',
|
|
234
261
|
voice: new SpeechifyVoice(),
|
|
235
262
|
})
|
|
236
263
|
|
|
@@ -257,7 +284,7 @@ const voiceAgent = new Agent({
|
|
|
257
284
|
id: 'voice-agent',
|
|
258
285
|
name: 'Voice Agent',
|
|
259
286
|
instructions: 'You are a voice assistant that can help users with their tasks.',
|
|
260
|
-
model: 'openai/gpt-5.
|
|
287
|
+
model: 'openai/gpt-5.5',
|
|
261
288
|
voice: new SarvamVoice(),
|
|
262
289
|
})
|
|
263
290
|
|
|
@@ -265,7 +292,7 @@ const { text } = await voiceAgent.generate('What color is the sky?')
|
|
|
265
292
|
|
|
266
293
|
// Convert text to speech to an Audio Stream
|
|
267
294
|
const audioStream = await voiceAgent.voice.speak(text, {
|
|
268
|
-
speaker: '
|
|
295
|
+
speaker: 'shubh', // Optional: specify a bulbul:v3 speaker
|
|
269
296
|
})
|
|
270
297
|
|
|
271
298
|
playAudio(audioStream)
|
|
@@ -284,7 +311,7 @@ const voiceAgent = new Agent({
|
|
|
284
311
|
id: 'voice-agent',
|
|
285
312
|
name: 'Voice Agent',
|
|
286
313
|
instructions: 'You are a voice assistant that can help users with their tasks.',
|
|
287
|
-
model: 'openai/gpt-5.
|
|
314
|
+
model: 'openai/gpt-5.5',
|
|
288
315
|
voice: new MurfVoice(),
|
|
289
316
|
})
|
|
290
317
|
|
|
@@ -319,7 +346,7 @@ const voiceAgent = new Agent({
|
|
|
319
346
|
id: 'voice-agent',
|
|
320
347
|
name: 'Voice Agent',
|
|
321
348
|
instructions: 'You are a voice assistant that can help users with their tasks.',
|
|
322
|
-
model: 'openai/gpt-5.
|
|
349
|
+
model: 'openai/gpt-5.5',
|
|
323
350
|
voice: new OpenAIVoice(),
|
|
324
351
|
})
|
|
325
352
|
|
|
@@ -348,7 +375,7 @@ const voiceAgent = new Agent({
|
|
|
348
375
|
id: 'voice-agent',
|
|
349
376
|
name: 'Voice Agent',
|
|
350
377
|
instructions: 'You are a voice assistant that can help users with their tasks.',
|
|
351
|
-
model: 'openai/gpt-5.
|
|
378
|
+
model: 'openai/gpt-5.5',
|
|
352
379
|
voice: new AzureVoice(),
|
|
353
380
|
})
|
|
354
381
|
|
|
@@ -376,7 +403,7 @@ const voiceAgent = new Agent({
|
|
|
376
403
|
id: 'voice-agent',
|
|
377
404
|
name: 'Voice Agent',
|
|
378
405
|
instructions: 'You are a voice assistant that can help users with their tasks.',
|
|
379
|
-
model: 'openai/gpt-5.
|
|
406
|
+
model: 'openai/gpt-5.5',
|
|
380
407
|
voice: new ElevenLabsVoice(),
|
|
381
408
|
})
|
|
382
409
|
|
|
@@ -404,7 +431,7 @@ const voiceAgent = new Agent({
|
|
|
404
431
|
id: 'voice-agent',
|
|
405
432
|
name: 'Voice Agent',
|
|
406
433
|
instructions: 'You are a voice assistant that can help users with their tasks.',
|
|
407
|
-
model: 'openai/gpt-5.
|
|
434
|
+
model: 'openai/gpt-5.5',
|
|
408
435
|
voice: new GoogleVoice(),
|
|
409
436
|
})
|
|
410
437
|
|
|
@@ -432,7 +459,7 @@ const voiceAgent = new Agent({
|
|
|
432
459
|
id: 'voice-agent',
|
|
433
460
|
name: 'Voice Agent',
|
|
434
461
|
instructions: 'You are a voice assistant that can help users with their tasks.',
|
|
435
|
-
model: 'openai/gpt-5.
|
|
462
|
+
model: 'openai/gpt-5.5',
|
|
436
463
|
voice: new CloudflareVoice(),
|
|
437
464
|
})
|
|
438
465
|
|
|
@@ -460,7 +487,7 @@ const voiceAgent = new Agent({
|
|
|
460
487
|
id: 'voice-agent',
|
|
461
488
|
name: 'Voice Agent',
|
|
462
489
|
instructions: 'You are a voice assistant that can help users with their tasks.',
|
|
463
|
-
model: 'openai/gpt-5.
|
|
490
|
+
model: 'openai/gpt-5.5',
|
|
464
491
|
voice: new DeepgramVoice(),
|
|
465
492
|
})
|
|
466
493
|
|
|
@@ -477,6 +504,34 @@ const { text } = await voiceAgent.generate(transcript)
|
|
|
477
504
|
|
|
478
505
|
Visit the [Deepgram Voice Reference](https://mastra.ai/reference/voice/deepgram) for more information on the Deepgram voice provider.
|
|
479
506
|
|
|
507
|
+
**Inworld**:
|
|
508
|
+
|
|
509
|
+
```typescript
|
|
510
|
+
import { Agent } from '@mastra/core/agent'
|
|
511
|
+
import { InworldVoice } from '@mastra/voice-inworld'
|
|
512
|
+
import { createReadStream } from 'fs'
|
|
513
|
+
|
|
514
|
+
const voiceAgent = new Agent({
|
|
515
|
+
id: 'voice-agent',
|
|
516
|
+
name: 'Voice Agent',
|
|
517
|
+
instructions: 'You are a voice assistant that can help users with their tasks.',
|
|
518
|
+
model: 'openai/gpt-5.5',
|
|
519
|
+
voice: new InworldVoice(),
|
|
520
|
+
})
|
|
521
|
+
|
|
522
|
+
// Use an audio file from a URL
|
|
523
|
+
const audioStream = await createReadStream('./how_can_i_help_you.mp3')
|
|
524
|
+
|
|
525
|
+
// Convert audio to text
|
|
526
|
+
const transcript = await voiceAgent.voice.listen(audioStream)
|
|
527
|
+
console.log(`User said: ${transcript}`)
|
|
528
|
+
|
|
529
|
+
// Generate a response based on the transcript
|
|
530
|
+
const { text } = await voiceAgent.generate(transcript)
|
|
531
|
+
```
|
|
532
|
+
|
|
533
|
+
Visit the [Inworld Voice Reference](https://mastra.ai/reference/voice/inworld) for more information on the Inworld voice provider.
|
|
534
|
+
|
|
480
535
|
**Sarvam**:
|
|
481
536
|
|
|
482
537
|
```typescript
|
|
@@ -488,7 +543,7 @@ const voiceAgent = new Agent({
|
|
|
488
543
|
id: 'voice-agent',
|
|
489
544
|
name: 'Voice Agent',
|
|
490
545
|
instructions: 'You are a voice assistant that can help users with their tasks.',
|
|
491
|
-
model: 'openai/gpt-5.
|
|
546
|
+
model: 'openai/gpt-5.5',
|
|
492
547
|
voice: new SarvamVoice(),
|
|
493
548
|
})
|
|
494
549
|
|
|
@@ -520,7 +575,7 @@ const voiceAgent = new Agent({
|
|
|
520
575
|
id: 'voice-agent',
|
|
521
576
|
name: 'Voice Agent',
|
|
522
577
|
instructions: 'You are a voice assistant that can help users with their tasks.',
|
|
523
|
-
model: 'openai/gpt-5.
|
|
578
|
+
model: 'openai/gpt-5.5',
|
|
524
579
|
voice: new OpenAIRealtimeVoice(),
|
|
525
580
|
})
|
|
526
581
|
|
|
@@ -550,7 +605,7 @@ const voiceAgent = new Agent({
|
|
|
550
605
|
id: 'voice-agent',
|
|
551
606
|
name: 'Voice Agent',
|
|
552
607
|
instructions: 'You are a voice assistant that can help users with their tasks.',
|
|
553
|
-
model: 'openai/gpt-5.
|
|
608
|
+
model: 'openai/gpt-5.5',
|
|
554
609
|
voice: new GeminiLiveVoice({
|
|
555
610
|
// Live API mode
|
|
556
611
|
apiKey: process.env.GOOGLE_API_KEY,
|
|
@@ -588,6 +643,134 @@ await voiceAgent.voice.send(micStream)
|
|
|
588
643
|
|
|
589
644
|
Visit the [Google Gemini Live Reference](https://mastra.ai/reference/voice/google-gemini-live) for more information on the Google Gemini Live voice provider.
|
|
590
645
|
|
|
646
|
+
**AWS Nova Sonic**:
|
|
647
|
+
|
|
648
|
+
```typescript
|
|
649
|
+
import { Agent } from '@mastra/core/agent'
|
|
650
|
+
import { playAudio, getMicrophoneStream } from '@mastra/node-audio'
|
|
651
|
+
import { NovaSonicVoice } from '@mastra/voice-aws-nova-sonic'
|
|
652
|
+
|
|
653
|
+
const voiceAgent = new Agent({
|
|
654
|
+
id: 'voice-agent',
|
|
655
|
+
name: 'Voice Agent',
|
|
656
|
+
instructions: 'You are a voice assistant that can help users with their tasks.',
|
|
657
|
+
model: 'openai/gpt-5.5',
|
|
658
|
+
voice: new NovaSonicVoice({
|
|
659
|
+
region: 'us-east-1',
|
|
660
|
+
speaker: 'matthew',
|
|
661
|
+
// Static credentials are optional. The default AWS credential
|
|
662
|
+
// provider chain is used when none are passed.
|
|
663
|
+
}),
|
|
664
|
+
})
|
|
665
|
+
|
|
666
|
+
// Connect before using speak/send
|
|
667
|
+
await voiceAgent.voice.connect()
|
|
668
|
+
|
|
669
|
+
// Listen for assistant audio (Int16Array PCM)
|
|
670
|
+
voiceAgent.voice.on('speaking', ({ audioData }) => {
|
|
671
|
+
if (audioData) playAudio(audioData)
|
|
672
|
+
})
|
|
673
|
+
|
|
674
|
+
// Listen for transcribed text
|
|
675
|
+
voiceAgent.voice.on('writing', ({ text, role }) => {
|
|
676
|
+
console.log(`${role}: ${text}`)
|
|
677
|
+
})
|
|
678
|
+
|
|
679
|
+
// Initiate the conversation
|
|
680
|
+
await voiceAgent.voice.speak('How can I help you today?')
|
|
681
|
+
|
|
682
|
+
// Send continuous audio from the microphone
|
|
683
|
+
const micStream = getMicrophoneStream()
|
|
684
|
+
await voiceAgent.voice.send(micStream)
|
|
685
|
+
```
|
|
686
|
+
|
|
687
|
+
Visit the [AWS Nova Sonic Reference](https://mastra.ai/reference/voice/aws-nova-sonic) for more information on the AWS Nova Sonic voice provider.
|
|
688
|
+
|
|
689
|
+
**Inworld Realtime**:
|
|
690
|
+
|
|
691
|
+
```typescript
|
|
692
|
+
import { Agent } from '@mastra/core/agent'
|
|
693
|
+
import { playAudio, getMicrophoneStream } from '@mastra/node-audio'
|
|
694
|
+
import { InworldRealtimeVoice } from '@mastra/voice-inworld'
|
|
695
|
+
|
|
696
|
+
const voiceAgent = new Agent({
|
|
697
|
+
id: 'voice-agent',
|
|
698
|
+
name: 'Voice Agent',
|
|
699
|
+
instructions: 'You are a voice assistant that can help users with their tasks.',
|
|
700
|
+
model: 'openai/gpt-5.5',
|
|
701
|
+
voice: new InworldRealtimeVoice({
|
|
702
|
+
apiKey: process.env.INWORLD_API_KEY,
|
|
703
|
+
model: 'inworld/models/gemma-4-26b-a4b-it',
|
|
704
|
+
speaker: 'Sarah',
|
|
705
|
+
}),
|
|
706
|
+
})
|
|
707
|
+
|
|
708
|
+
// Connect before using speak/send
|
|
709
|
+
await voiceAgent.voice.connect()
|
|
710
|
+
|
|
711
|
+
// Listen for agent audio (PCM stream)
|
|
712
|
+
voiceAgent.voice.on('speaker', stream => {
|
|
713
|
+
playAudio(stream)
|
|
714
|
+
})
|
|
715
|
+
|
|
716
|
+
// Listen for text responses and transcriptions
|
|
717
|
+
voiceAgent.voice.on('writing', ({ text, role }) => {
|
|
718
|
+
console.log(`${role}: ${text}`)
|
|
719
|
+
})
|
|
720
|
+
|
|
721
|
+
// Initiate the conversation
|
|
722
|
+
await voiceAgent.voice.speak('How can I help you today?')
|
|
723
|
+
|
|
724
|
+
// Send continuous audio from the microphone
|
|
725
|
+
const micStream = getMicrophoneStream()
|
|
726
|
+
await voiceAgent.voice.send(micStream)
|
|
727
|
+
```
|
|
728
|
+
|
|
729
|
+
Visit the [Inworld Realtime Reference](https://mastra.ai/reference/voice/inworld-realtime) for more information on the Inworld Realtime voice provider.
|
|
730
|
+
|
|
731
|
+
**xAI**:
|
|
732
|
+
|
|
733
|
+
```typescript
|
|
734
|
+
import { Agent } from '@mastra/core/agent'
|
|
735
|
+
import { playAudio, getMicrophoneStream } from '@mastra/node-audio'
|
|
736
|
+
import { XAIRealtimeVoice } from '@mastra/voice-xai-realtime'
|
|
737
|
+
|
|
738
|
+
const voiceAgent = new Agent({
|
|
739
|
+
id: 'voice-agent',
|
|
740
|
+
name: 'Voice Agent',
|
|
741
|
+
instructions: 'You are a voice assistant that can help users with their tasks.',
|
|
742
|
+
model: 'xai/grok-4.3',
|
|
743
|
+
voice: new XAIRealtimeVoice({
|
|
744
|
+
apiKey: process.env.XAI_API_KEY,
|
|
745
|
+
model: 'grok-voice-think-fast-1.0',
|
|
746
|
+
speaker: 'eve',
|
|
747
|
+
turnDetection: { type: 'server_vad' },
|
|
748
|
+
}),
|
|
749
|
+
})
|
|
750
|
+
|
|
751
|
+
// Connect before using speak/send
|
|
752
|
+
await voiceAgent.voice.connect()
|
|
753
|
+
|
|
754
|
+
// Listen for agent audio responses
|
|
755
|
+
voiceAgent.voice.on('speaker', audioStream => {
|
|
756
|
+
playAudio(audioStream)
|
|
757
|
+
})
|
|
758
|
+
|
|
759
|
+
// Listen for text responses and transcriptions
|
|
760
|
+
voiceAgent.voice.on('writing', ({ text, role }) => {
|
|
761
|
+
console.log(`${role}: ${text}`)
|
|
762
|
+
})
|
|
763
|
+
|
|
764
|
+
// Initiate the conversation
|
|
765
|
+
await voiceAgent.voice.speak('How can I help you today?')
|
|
766
|
+
|
|
767
|
+
// Send continuous audio from the microphone
|
|
768
|
+
const micStream = getMicrophoneStream()
|
|
769
|
+
await voiceAgent.voice.send(micStream)
|
|
770
|
+
```
|
|
771
|
+
|
|
772
|
+
Visit the [xAI Realtime Voice Reference](https://mastra.ai/reference/voice/xai-realtime) for more information on the xAI voice provider.
|
|
773
|
+
|
|
591
774
|
## Voice configuration
|
|
592
775
|
|
|
593
776
|
Each voice provider can be configured with different models and options. Below are the detailed configuration options for all supported providers:
|
|
@@ -736,6 +919,34 @@ const voice = new DeepgramVoice({
|
|
|
736
919
|
|
|
737
920
|
Visit the [Deepgram Voice Reference](https://mastra.ai/reference/voice/deepgram) for more information on the Deepgram voice provider.
|
|
738
921
|
|
|
922
|
+
**Inworld**:
|
|
923
|
+
|
|
924
|
+
```typescript
|
|
925
|
+
// Inworld Voice Configuration
|
|
926
|
+
const voice = new InworldVoice({
|
|
927
|
+
speechModel: {
|
|
928
|
+
name: 'inworld-tts-2',
|
|
929
|
+
apiKey: process.env.INWORLD_API_KEY,
|
|
930
|
+
},
|
|
931
|
+
listeningModel: {
|
|
932
|
+
name: 'groq/whisper-large-v3',
|
|
933
|
+
apiKey: process.env.INWORLD_API_KEY,
|
|
934
|
+
},
|
|
935
|
+
speaker: 'Dennis',
|
|
936
|
+
audioEncoding: 'MP3',
|
|
937
|
+
sampleRateHertz: 48000,
|
|
938
|
+
language: 'en-US',
|
|
939
|
+
})
|
|
940
|
+
|
|
941
|
+
// Per-call options: `deliveryMode` is honored only by `inworld-tts-2`.
|
|
942
|
+
const audioStream = await voice.speak('Hello!', {
|
|
943
|
+
deliveryMode: 'BALANCED', // 'STABLE' | 'BALANCED' | 'CREATIVE'
|
|
944
|
+
language: 'en-US', // BCP-47 per-call override
|
|
945
|
+
})
|
|
946
|
+
```
|
|
947
|
+
|
|
948
|
+
Visit the [Inworld Voice Reference](https://mastra.ai/reference/voice/inworld) for more information on the Inworld voice provider.
|
|
949
|
+
|
|
739
950
|
**Speechify**:
|
|
740
951
|
|
|
741
952
|
```typescript
|
|
@@ -760,12 +971,15 @@ Visit the [Speechify Voice Reference](https://mastra.ai/reference/voice/speechif
|
|
|
760
971
|
// Sarvam Voice Configuration
|
|
761
972
|
const voice = new SarvamVoice({
|
|
762
973
|
speechModel: {
|
|
763
|
-
|
|
974
|
+
model: 'bulbul:v3', // TTS model (bulbul:v2 or bulbul:v3)
|
|
764
975
|
apiKey: process.env.SARVAM_API_KEY,
|
|
765
|
-
language: 'en-IN', //
|
|
766
|
-
style: 'conversational', // Style setting
|
|
976
|
+
language: 'en-IN', // BCP-47 language code
|
|
767
977
|
},
|
|
768
|
-
|
|
978
|
+
listeningModel: {
|
|
979
|
+
model: 'saarika:v2.5', // STT model (saarika:v2.5 or saaras:v3)
|
|
980
|
+
apiKey: process.env.SARVAM_API_KEY,
|
|
981
|
+
},
|
|
982
|
+
speaker: 'shubh', // Default bulbul:v3 speaker
|
|
769
983
|
})
|
|
770
984
|
```
|
|
771
985
|
|
|
@@ -809,6 +1023,38 @@ const voice = new OpenAIRealtimeVoice({
|
|
|
809
1023
|
|
|
810
1024
|
For more information on the OpenAI Realtime voice provider, refer to the [OpenAI Realtime Voice Reference](https://mastra.ai/reference/voice/openai-realtime).
|
|
811
1025
|
|
|
1026
|
+
**xAI Realtime**:
|
|
1027
|
+
|
|
1028
|
+
```typescript
|
|
1029
|
+
// xAI Realtime Voice Configuration
|
|
1030
|
+
const voice = new XAIRealtimeVoice({
|
|
1031
|
+
apiKey: process.env.XAI_API_KEY,
|
|
1032
|
+
model: 'grok-voice-think-fast-1.0',
|
|
1033
|
+
speaker: 'eve',
|
|
1034
|
+
instructions: 'You are a concise voice assistant.',
|
|
1035
|
+
turnDetection: {
|
|
1036
|
+
type: 'server_vad',
|
|
1037
|
+
threshold: 0.85,
|
|
1038
|
+
silence_duration_ms: 1000,
|
|
1039
|
+
prefix_padding_ms: 333,
|
|
1040
|
+
},
|
|
1041
|
+
audio: {
|
|
1042
|
+
input: { format: { type: 'audio/pcm', rate: 24000 } },
|
|
1043
|
+
output: { format: { type: 'audio/pcm', rate: 24000 } },
|
|
1044
|
+
},
|
|
1045
|
+
serverTools: [
|
|
1046
|
+
{ type: 'web_search' },
|
|
1047
|
+
{
|
|
1048
|
+
type: 'mcp',
|
|
1049
|
+
server_url: 'https://mcp.example.com/mcp',
|
|
1050
|
+
server_label: 'business-tools',
|
|
1051
|
+
},
|
|
1052
|
+
],
|
|
1053
|
+
})
|
|
1054
|
+
```
|
|
1055
|
+
|
|
1056
|
+
Visit the [xAI Realtime Voice Reference](https://mastra.ai/reference/voice/xai-realtime) for more information on the xAI realtime voice provider.
|
|
1057
|
+
|
|
812
1058
|
**Google Gemini Live**:
|
|
813
1059
|
|
|
814
1060
|
```typescript
|
|
@@ -825,6 +1071,48 @@ const voice = new GeminiLiveVoice({
|
|
|
825
1071
|
|
|
826
1072
|
Visit the [Google Gemini Live Reference](https://mastra.ai/reference/voice/google-gemini-live) for more information on the Google Gemini Live voice provider.
|
|
827
1073
|
|
|
1074
|
+
**AWS Nova Sonic**:
|
|
1075
|
+
|
|
1076
|
+
```typescript
|
|
1077
|
+
// AWS Nova Sonic Voice Configuration
|
|
1078
|
+
const voice = new NovaSonicVoice({
|
|
1079
|
+
region: 'us-east-1',
|
|
1080
|
+
speaker: 'matthew',
|
|
1081
|
+
sessionConfig: {
|
|
1082
|
+
inferenceConfiguration: {
|
|
1083
|
+
temperature: 0.7,
|
|
1084
|
+
maxTokens: 1024,
|
|
1085
|
+
},
|
|
1086
|
+
turnDetectionConfiguration: {
|
|
1087
|
+
endpointingSensitivity: 'MEDIUM',
|
|
1088
|
+
},
|
|
1089
|
+
},
|
|
1090
|
+
// AWS Nova Sonic is a realtime bidirectional API without separate speech and listening models
|
|
1091
|
+
})
|
|
1092
|
+
```
|
|
1093
|
+
|
|
1094
|
+
Visit the [AWS Nova Sonic Reference](https://mastra.ai/reference/voice/aws-nova-sonic) for more information on the AWS Nova Sonic voice provider.
|
|
1095
|
+
|
|
1096
|
+
**Inworld Realtime**:
|
|
1097
|
+
|
|
1098
|
+
```typescript
|
|
1099
|
+
// Inworld Realtime Voice Configuration
|
|
1100
|
+
const voice = new InworldRealtimeVoice({
|
|
1101
|
+
apiKey: process.env.INWORLD_API_KEY,
|
|
1102
|
+
model: 'inworld/models/gemma-4-26b-a4b-it',
|
|
1103
|
+
speaker: 'Sarah',
|
|
1104
|
+
// Typed Inworld realtime knobs (semantic VAD, playback speed, MCP tool routing, ...)
|
|
1105
|
+
session: {
|
|
1106
|
+
audio: {
|
|
1107
|
+
output: { speed: 1.1 },
|
|
1108
|
+
input: { turn_detection: { type: 'semantic_vad', eagerness: 'high' } },
|
|
1109
|
+
},
|
|
1110
|
+
},
|
|
1111
|
+
})
|
|
1112
|
+
```
|
|
1113
|
+
|
|
1114
|
+
Visit the [Inworld Realtime Reference](https://mastra.ai/reference/voice/inworld-realtime) for more information on the Inworld Realtime voice provider.
|
|
1115
|
+
|
|
828
1116
|
**AI SDK**:
|
|
829
1117
|
|
|
830
1118
|
```typescript
|
|
@@ -844,7 +1132,7 @@ const voiceAgent = new Agent({
|
|
|
844
1132
|
id: 'aisdk-voice-agent',
|
|
845
1133
|
name: 'AI SDK Voice Agent',
|
|
846
1134
|
instructions: 'You are a helpful assistant with voice capabilities.',
|
|
847
|
-
model: 'openai/gpt-5.
|
|
1135
|
+
model: 'openai/gpt-5.5',
|
|
848
1136
|
voice,
|
|
849
1137
|
})
|
|
850
1138
|
```
|
|
@@ -951,9 +1239,12 @@ For more information on the CompositeVoice, refer to the [CompositeVoice Referen
|
|
|
951
1239
|
- [MastraVoice](https://mastra.ai/reference/voice/mastra-voice)
|
|
952
1240
|
- [OpenAI Voice](https://mastra.ai/reference/voice/openai)
|
|
953
1241
|
- [OpenAI Realtime Voice](https://mastra.ai/reference/voice/openai-realtime)
|
|
1242
|
+
- [xAI Realtime Voice](https://mastra.ai/reference/voice/xai-realtime)
|
|
954
1243
|
- [Azure Voice](https://mastra.ai/reference/voice/azure)
|
|
955
1244
|
- [Google Voice](https://mastra.ai/reference/voice/google)
|
|
956
1245
|
- [Google Gemini Live Voice](https://mastra.ai/reference/voice/google-gemini-live)
|
|
1246
|
+
- [AWS Nova Sonic Voice](https://mastra.ai/reference/voice/aws-nova-sonic)
|
|
957
1247
|
- [Deepgram Voice](https://mastra.ai/reference/voice/deepgram)
|
|
1248
|
+
- [Inworld Voice](https://mastra.ai/reference/voice/inworld)
|
|
958
1249
|
- [PlayAI Voice](https://mastra.ai/reference/voice/playai)
|
|
959
1250
|
- [Voice Examples](https://github.com/mastra-ai/voice-examples)
|