@mastra/voice-cloudflare 0.12.1 → 0.12.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (121) hide show
  1. package/CHANGELOG.md +16 -0
  2. package/dist/_types/@internal_voice/dist/_types/@internal_ai-sdk-v5/dist/index.d.ts +8888 -0
  3. package/dist/_types/@internal_voice/dist/_types/@internal_core/dist/base/index.d.ts +31 -0
  4. package/dist/_types/@internal_voice/dist/_types/@internal_core/dist/logger/index.d.ts +217 -0
  5. package/dist/_types/@internal_voice/dist/_types/@internal_core/dist/request-context/index.d.ts +147 -0
  6. package/dist/_types/@internal_voice/dist/_types/@internal_core/dist/types/index.d.ts +3 -0
  7. package/dist/_types/@internal_voice/dist/_types/zod/v3/ZodError.d.ts +164 -0
  8. package/dist/_types/@internal_voice/dist/_types/zod/v3/errors.d.ts +5 -0
  9. package/dist/_types/@internal_voice/dist/_types/zod/v3/external.d.ts +6 -0
  10. package/dist/_types/@internal_voice/dist/_types/zod/v3/helpers/enumUtil.d.ts +8 -0
  11. package/dist/_types/@internal_voice/dist/_types/zod/v3/helpers/errorUtil.d.ts +9 -0
  12. package/dist/_types/@internal_voice/dist/_types/zod/v3/helpers/parseUtil.d.ts +78 -0
  13. package/dist/_types/@internal_voice/dist/_types/zod/v3/helpers/partialUtil.d.ts +8 -0
  14. package/dist/_types/@internal_voice/dist/_types/zod/v3/helpers/typeAliases.d.ts +2 -0
  15. package/dist/_types/@internal_voice/dist/_types/zod/v3/helpers/util.d.ts +85 -0
  16. package/dist/_types/@internal_voice/dist/_types/zod/v3/index.d.cts +4 -0
  17. package/dist/_types/@internal_voice/dist/_types/zod/v3/index.d.ts +4 -0
  18. package/dist/_types/@internal_voice/dist/_types/zod/v3/locales/en.d.ts +3 -0
  19. package/dist/_types/@internal_voice/dist/_types/zod/v3/standard-schema.d.ts +102 -0
  20. package/dist/_types/@internal_voice/dist/_types/zod/v3/types.d.ts +1034 -0
  21. package/dist/_types/@internal_voice/dist/_types/zod/v4/classic/checks.d.ts +1 -0
  22. package/dist/_types/@internal_voice/dist/_types/zod/v4/classic/coerce.d.ts +17 -0
  23. package/dist/_types/@internal_voice/dist/_types/zod/v4/classic/compat.d.ts +50 -0
  24. package/dist/_types/@internal_voice/dist/_types/zod/v4/classic/errors.d.ts +30 -0
  25. package/dist/_types/@internal_voice/dist/_types/zod/v4/classic/external.d.ts +16 -0
  26. package/dist/_types/@internal_voice/dist/_types/zod/v4/classic/from-json-schema.d.ts +12 -0
  27. package/dist/_types/@internal_voice/dist/_types/zod/v4/classic/index.d.ts +4 -0
  28. package/dist/_types/@internal_voice/dist/_types/zod/v4/classic/iso.d.ts +22 -0
  29. package/dist/_types/@internal_voice/dist/_types/zod/v4/classic/parse.d.ts +31 -0
  30. package/dist/_types/@internal_voice/dist/_types/zod/v4/classic/schemas.d.ts +767 -0
  31. package/dist/_types/@internal_voice/dist/_types/zod/v4/core/api.d.ts +325 -0
  32. package/dist/_types/@internal_voice/dist/_types/zod/v4/core/checks.d.ts +278 -0
  33. package/dist/_types/@internal_voice/dist/_types/zod/v4/core/core.d.ts +70 -0
  34. package/dist/_types/@internal_voice/dist/_types/zod/v4/core/doc.d.ts +14 -0
  35. package/dist/_types/@internal_voice/dist/_types/zod/v4/core/errors.d.ts +221 -0
  36. package/dist/_types/@internal_voice/dist/_types/zod/v4/core/index.d.ts +16 -0
  37. package/dist/_types/@internal_voice/dist/_types/zod/v4/core/json-schema-generator.d.ts +65 -0
  38. package/dist/_types/@internal_voice/dist/_types/zod/v4/core/json-schema-processors.d.ts +49 -0
  39. package/dist/_types/@internal_voice/dist/_types/zod/v4/core/json-schema.d.ts +88 -0
  40. package/dist/_types/@internal_voice/dist/_types/zod/v4/core/parse.d.ts +49 -0
  41. package/dist/_types/@internal_voice/dist/_types/zod/v4/core/regexes.d.ts +85 -0
  42. package/dist/_types/@internal_voice/dist/_types/zod/v4/core/registries.d.ts +35 -0
  43. package/dist/_types/@internal_voice/dist/_types/zod/v4/core/schemas.d.ts +1184 -0
  44. package/dist/_types/@internal_voice/dist/_types/zod/v4/core/standard-schema.d.ts +126 -0
  45. package/dist/_types/@internal_voice/dist/_types/zod/v4/core/to-json-schema.d.ts +114 -0
  46. package/dist/_types/@internal_voice/dist/_types/zod/v4/core/util.d.ts +200 -0
  47. package/dist/_types/@internal_voice/dist/_types/zod/v4/core/versions.d.ts +5 -0
  48. package/dist/_types/@internal_voice/dist/_types/zod/v4/index.d.cts +3 -0
  49. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/ar.d.ts +4 -0
  50. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/az.d.ts +4 -0
  51. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/be.d.ts +4 -0
  52. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/bg.d.ts +4 -0
  53. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/ca.d.ts +4 -0
  54. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/cs.d.ts +4 -0
  55. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/da.d.ts +4 -0
  56. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/de.d.ts +4 -0
  57. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/el.d.ts +4 -0
  58. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/en.d.ts +4 -0
  59. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/eo.d.ts +4 -0
  60. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/es.d.ts +4 -0
  61. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/fa.d.ts +4 -0
  62. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/fi.d.ts +4 -0
  63. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/fr-CA.d.ts +4 -0
  64. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/fr.d.ts +4 -0
  65. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/he.d.ts +4 -0
  66. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/hr.d.ts +4 -0
  67. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/hu.d.ts +4 -0
  68. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/hy.d.ts +4 -0
  69. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/id.d.ts +4 -0
  70. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/index.d.ts +52 -0
  71. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/is.d.ts +4 -0
  72. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/it.d.ts +4 -0
  73. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/ja.d.ts +4 -0
  74. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/ka.d.ts +4 -0
  75. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/kh.d.ts +5 -0
  76. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/km.d.ts +4 -0
  77. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/ko.d.ts +4 -0
  78. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/lt.d.ts +4 -0
  79. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/mk.d.ts +4 -0
  80. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/ms.d.ts +4 -0
  81. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/nl.d.ts +4 -0
  82. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/no.d.ts +4 -0
  83. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/ota.d.ts +4 -0
  84. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/pl.d.ts +4 -0
  85. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/ps.d.ts +4 -0
  86. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/pt.d.ts +4 -0
  87. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/ro.d.ts +4 -0
  88. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/ru.d.ts +4 -0
  89. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/sl.d.ts +4 -0
  90. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/sv.d.ts +4 -0
  91. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/ta.d.ts +4 -0
  92. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/th.d.ts +4 -0
  93. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/tr.d.ts +4 -0
  94. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/ua.d.ts +5 -0
  95. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/uk.d.ts +4 -0
  96. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/ur.d.ts +4 -0
  97. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/uz.d.ts +4 -0
  98. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/vi.d.ts +4 -0
  99. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/yo.d.ts +4 -0
  100. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/zh-CN.d.ts +4 -0
  101. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/zh-TW.d.ts +4 -0
  102. package/dist/_types/@internal_voice/dist/index.d.ts +16 -0
  103. package/dist/_types/@internal_voice/dist/voice/aisdk/index.d.ts +3 -0
  104. package/dist/_types/@internal_voice/dist/voice/aisdk/speech.d.ts +23 -0
  105. package/dist/_types/@internal_voice/dist/voice/aisdk/transcription.d.ts +22 -0
  106. package/dist/_types/@internal_voice/dist/voice/composite-voice.d.ts +72 -0
  107. package/dist/_types/@internal_voice/dist/voice/default-voice.d.ts +13 -0
  108. package/dist/_types/@internal_voice/dist/voice/index.d.ts +5 -0
  109. package/dist/_types/@internal_voice/dist/voice/voice.d.ts +172 -0
  110. package/dist/docs/SKILL.md +1 -1
  111. package/dist/docs/assets/SOURCE_MAP.json +1 -1
  112. package/dist/docs/references/docs-agents-adding-voice.md +55 -23
  113. package/dist/docs/references/docs-voice-overview.md +317 -26
  114. package/dist/docs/references/reference-voice-cloudflare.md +3 -3
  115. package/dist/index.cjs +259 -3
  116. package/dist/index.cjs.map +1 -1
  117. package/dist/index.d.ts +1 -1
  118. package/dist/index.d.ts.map +1 -1
  119. package/dist/index.js +258 -2
  120. package/dist/index.js.map +1 -1
  121. package/package.json +11 -12
@@ -20,7 +20,7 @@ export const agent = new Agent({
20
20
  id: 'voice-agent',
21
21
  name: 'Voice Agent',
22
22
  instructions: `You are a helpful assistant with both STT and TTS capabilities.`,
23
- model: 'openai/gpt-5.4',
23
+ model: 'openai/gpt-5.5',
24
24
  voice,
25
25
  })
26
26
 
@@ -109,7 +109,7 @@ export const agent = new Agent({
109
109
  id: 'speech-to-speech-agent',
110
110
  name: 'Speech-to-Speech Agent',
111
111
  instructions: `You are a helpful assistant with speech-to-speech capabilities.`,
112
- model: 'openai/gpt-5.4',
112
+ model: 'openai/gpt-5.5',
113
113
  tools: {
114
114
  // Tools configured on Agent are passed to voice provider
115
115
  search,
@@ -132,6 +132,37 @@ agent.voice.send(microphoneStream)
132
132
  agent.voice.close()
133
133
  ```
134
134
 
135
+ ### Per-session voice for concurrent sessions
136
+
137
+ A static `voice` instance is shared across every request. For one-shot text-to-speech this is fine, but realtime and speech-to-speech providers store one WebSocket, one set of tools, and one request context per instance. If you deploy a single agent that handles several live sessions at once, a shared instance lets one session overwrite another session's tools, instructions, and request context.
138
+
139
+ To give each session its own voice, provide `voice` as a resolver. Mastra runs the resolver on every `getVoice()` call and returns a fresh, session-owned instance:
140
+
141
+ ```typescript
142
+ import { Agent } from '@mastra/core/agent'
143
+ import { OpenAIRealtimeVoice } from '@mastra/voice-openai-realtime'
144
+
145
+ export const agent = new Agent({
146
+ id: 'support-line',
147
+ name: 'Support Line',
148
+ instructions: ({ requestContext }) => `Help user ${requestContext.get('user')}.`,
149
+ model: 'openai/gpt-5.5',
150
+ voice: ({ requestContext }) => new OpenAIRealtimeVoice({ apiKey: requestContext.get('apiKey') }),
151
+ })
152
+
153
+ // Each concurrent session resolves its own voice instance
154
+ const voice = await agent.getVoice({ requestContext })
155
+ await voice.connect()
156
+ ```
157
+
158
+ When you use a resolver:
159
+
160
+ - Each call to `getVoice()` returns a new instance, so concurrent sessions never share state.
161
+ - Mastra does not add tools or instructions to a resolver instance. Configure those inside the resolver or on the provider.
162
+ - You own the lifecycle of the returned instance, so call `disconnect()` or `close()` when the session ends.
163
+
164
+ The `agent.voice` getter has no request context, so it throws when `voice` is a resolver. Use `agent.getVoice({ requestContext })` instead.
165
+
135
166
  ### Event System
136
167
 
137
168
  The realtime voice provider emits several events you can listen for:
@@ -209,7 +240,7 @@ export const convertToText = async (input: string | NodeJS.ReadableStream): Prom
209
240
  export const hybridVoiceAgent = new Agent({
210
241
  id: 'hybrid-voice-agent',
211
242
  name: 'Hybrid Voice Agent',
212
- model: 'openai/gpt-5.4',
243
+ model: 'openai/gpt-5.5',
213
244
  instructions: 'You can speak and listen using different providers.',
214
245
  voice: new CompositeVoice({
215
246
  input: new OpenAIVoice(),
@@ -221,7 +252,7 @@ export const unifiedVoiceAgent = new Agent({
221
252
  id: 'unified-voice-agent',
222
253
  name: 'Unified Voice Agent',
223
254
  instructions: 'You are an agent with both STT and TTS capabilities.',
224
- model: 'openai/gpt-5.4',
255
+ model: 'openai/gpt-5.5',
225
256
  voice: new OpenAIVoice(),
226
257
  })
227
258
 
@@ -263,7 +294,7 @@ export const agent = new Agent({
263
294
  id: 'voice-agent',
264
295
  name: 'Voice Agent',
265
296
  instructions: `You are a helpful assistant with both STT and TTS capabilities.`,
266
- model: 'openai/gpt-5.4',
297
+ model: 'openai/gpt-5.5',
267
298
 
268
299
  // Create a composite voice using OpenAI for listening and PlayAI for speaking
269
300
  voice: new CompositeVoice({
@@ -288,7 +319,7 @@ export const agent = new Agent({
288
319
  id: 'aisdk-voice-agent',
289
320
  name: 'AI SDK Voice Agent',
290
321
  instructions: `You are a helpful assistant with voice capabilities.`,
291
- model: 'openai/gpt-5.4',
322
+ model: 'openai/gpt-5.5',
292
323
 
293
324
  // Pass AI SDK models directly to CompositeVoice
294
325
  voice: new CompositeVoice({
@@ -327,23 +358,24 @@ For the complete list of supported AI SDK providers and their capabilities:
327
358
 
328
359
  Mastra supports multiple voice providers for text-to-speech (TTS) and speech-to-text (STT) capabilities:
329
360
 
330
- | Provider | Package | Features | Reference |
331
- | --------------- | ------------------------------- | ------------------------- | ------------------------------------------------------------------ |
332
- | OpenAI | `@mastra/voice-openai` | TTS, STT | [Documentation](https://mastra.ai/reference/voice/openai) |
333
- | OpenAI Realtime | `@mastra/voice-openai-realtime` | Realtime speech-to-speech | [Documentation](https://mastra.ai/reference/voice/openai-realtime) |
334
- | ElevenLabs | `@mastra/voice-elevenlabs` | High-quality TTS | [Documentation](https://mastra.ai/reference/voice/elevenlabs) |
335
- | PlayAI | `@mastra/voice-playai` | TTS | [Documentation](https://mastra.ai/reference/voice/playai) |
336
- | Google | `@mastra/voice-google` | TTS, STT | [Documentation](https://mastra.ai/reference/voice/google) |
337
- | Deepgram | `@mastra/voice-deepgram` | STT | [Documentation](https://mastra.ai/reference/voice/deepgram) |
338
- | Murf | `@mastra/voice-murf` | TTS | [Documentation](https://mastra.ai/reference/voice/murf) |
339
- | Speechify | `@mastra/voice-speechify` | TTS | [Documentation](https://mastra.ai/reference/voice/speechify) |
340
- | Sarvam | `@mastra/voice-sarvam` | TTS, STT | [Documentation](https://mastra.ai/reference/voice/sarvam) |
341
- | Azure | `@mastra/voice-azure` | TTS, STT | [Documentation](https://mastra.ai/reference/voice/mastra-voice) |
342
- | Cloudflare | `@mastra/voice-cloudflare` | TTS | [Documentation](https://mastra.ai/reference/voice/mastra-voice) |
361
+ | Provider | Package | Features | Reference |
362
+ | --------------- | ------------------------------- | ----------------------------------------- | ------------------------------------------------------------------ |
363
+ | OpenAI | `@mastra/voice-openai` | TTS, STT | [Documentation](https://mastra.ai/reference/voice/openai) |
364
+ | OpenAI Realtime | `@mastra/voice-openai-realtime` | Realtime speech-to-speech | [Documentation](https://mastra.ai/reference/voice/openai-realtime) |
365
+ | AWS Nova Sonic | `@mastra/voice-aws-nova-sonic` | Realtime speech-to-speech via AWS Bedrock | [Documentation](https://mastra.ai/reference/voice/aws-nova-sonic) |
366
+ | ElevenLabs | `@mastra/voice-elevenlabs` | High-quality TTS | [Documentation](https://mastra.ai/reference/voice/elevenlabs) |
367
+ | PlayAI | `@mastra/voice-playai` | TTS | [Documentation](https://mastra.ai/reference/voice/playai) |
368
+ | Google | `@mastra/voice-google` | TTS, STT | [Documentation](https://mastra.ai/reference/voice/google) |
369
+ | Deepgram | `@mastra/voice-deepgram` | STT | [Documentation](https://mastra.ai/reference/voice/deepgram) |
370
+ | Murf | `@mastra/voice-murf` | TTS | [Documentation](https://mastra.ai/reference/voice/murf) |
371
+ | Speechify | `@mastra/voice-speechify` | TTS | [Documentation](https://mastra.ai/reference/voice/speechify) |
372
+ | Sarvam | `@mastra/voice-sarvam` | TTS, STT | [Documentation](https://mastra.ai/reference/voice/sarvam) |
373
+ | Azure | `@mastra/voice-azure` | TTS, STT | [Documentation](https://mastra.ai/reference/voice/mastra-voice) |
374
+ | Cloudflare | `@mastra/voice-cloudflare` | TTS | [Documentation](https://mastra.ai/reference/voice/mastra-voice) |
343
375
 
344
376
  ## Next steps
345
377
 
346
- - [Voice API Reference](https://mastra.ai/reference/voice/mastra-voice) - Detailed API documentation for voice capabilities
347
- - [Text to Speech Examples](https://github.com/mastra-ai/voice-examples/tree/main/text-to-speech) - Interactive story generator and other TTS implementations
348
- - [Speech to Text Examples](https://github.com/mastra-ai/voice-examples/tree/main/speech-to-text) - Voice memo app and other STT implementations
349
- - [Speech to Speech Examples](https://github.com/mastra-ai/voice-examples/tree/main/speech-to-speech) - Real-time voice conversation with call analysis
378
+ - [Voice API Reference](https://mastra.ai/reference/voice/mastra-voice): Detailed API documentation for voice capabilities
379
+ - [Text to Speech Examples](https://github.com/mastra-ai/voice-examples/tree/main/text-to-speech): Interactive story generator and other TTS implementations
380
+ - [Speech to Text Examples](https://github.com/mastra-ai/voice-examples/tree/main/speech-to-text): Voice memo app and other STT implementations
381
+ - [Speech to Speech Examples](https://github.com/mastra-ai/voice-examples/tree/main/speech-to-speech): Real-time voice conversation with call analysis
@@ -16,7 +16,7 @@ const voiceAgent = new Agent({
16
16
  id: 'voice-agent',
17
17
  name: 'Voice Agent',
18
18
  instructions: 'You are a voice assistant that can help users with their tasks.',
19
- model: 'openai/gpt-5.4',
19
+ model: 'openai/gpt-5.5',
20
20
  voice: new OpenAIVoice(),
21
21
  })
22
22
  ```
@@ -40,7 +40,7 @@ const voiceAgent = new Agent({
40
40
  id: 'voice-agent',
41
41
  name: 'Voice Agent',
42
42
  instructions: 'You are a voice assistant that can help users with their tasks.',
43
- model: 'openai/gpt-5.4',
43
+ model: 'openai/gpt-5.5',
44
44
  voice: new OpenAIVoice(),
45
45
  })
46
46
 
@@ -68,7 +68,7 @@ const voiceAgent = new Agent({
68
68
  id: 'voice-agent',
69
69
  name: 'Voice Agent',
70
70
  instructions: 'You are a voice assistant that can help users with their tasks.',
71
- model: 'openai/gpt-5.4',
71
+ model: 'openai/gpt-5.5',
72
72
  voice: new AzureVoice(),
73
73
  })
74
74
 
@@ -95,7 +95,7 @@ const voiceAgent = new Agent({
95
95
  id: 'voice-agent',
96
96
  name: 'Voice Agent',
97
97
  instructions: 'You are a voice assistant that can help users with their tasks.',
98
- model: 'openai/gpt-5.4',
98
+ model: 'openai/gpt-5.5',
99
99
  voice: new ElevenLabsVoice(),
100
100
  })
101
101
 
@@ -122,7 +122,7 @@ const voiceAgent = new Agent({
122
122
  id: 'voice-agent',
123
123
  name: 'Voice Agent',
124
124
  instructions: 'You are a voice assistant that can help users with their tasks.',
125
- model: 'openai/gpt-5.4',
125
+ model: 'openai/gpt-5.5',
126
126
  voice: new PlayAIVoice(),
127
127
  })
128
128
 
@@ -149,7 +149,7 @@ const voiceAgent = new Agent({
149
149
  id: 'voice-agent',
150
150
  name: 'Voice Agent',
151
151
  instructions: 'You are a voice assistant that can help users with their tasks.',
152
- model: 'openai/gpt-5.4',
152
+ model: 'openai/gpt-5.5',
153
153
  voice: new GoogleVoice(),
154
154
  })
155
155
 
@@ -176,7 +176,7 @@ const voiceAgent = new Agent({
176
176
  id: 'voice-agent',
177
177
  name: 'Voice Agent',
178
178
  instructions: 'You are a voice assistant that can help users with their tasks.',
179
- model: 'openai/gpt-5.4',
179
+ model: 'openai/gpt-5.5',
180
180
  voice: new CloudflareVoice(),
181
181
  })
182
182
 
@@ -203,7 +203,7 @@ const voiceAgent = new Agent({
203
203
  id: 'voice-agent',
204
204
  name: 'Voice Agent',
205
205
  instructions: 'You are a voice assistant that can help users with their tasks.',
206
- model: 'openai/gpt-5.4',
206
+ model: 'openai/gpt-5.5',
207
207
  voice: new DeepgramVoice(),
208
208
  })
209
209
 
@@ -219,6 +219,33 @@ playAudio(audioStream)
219
219
 
220
220
  Visit the [Deepgram Voice Reference](https://mastra.ai/reference/voice/deepgram) for more information on the Deepgram voice provider.
221
221
 
222
+ **Inworld**:
223
+
224
+ ```typescript
225
+ import { Agent } from '@mastra/core/agent'
226
+ import { InworldVoice } from '@mastra/voice-inworld'
227
+ import { playAudio } from '@mastra/node-audio'
228
+
229
+ const voiceAgent = new Agent({
230
+ id: 'voice-agent',
231
+ name: 'Voice Agent',
232
+ instructions: 'You are a voice assistant that can help users with their tasks.',
233
+ model: 'openai/gpt-5.5',
234
+ voice: new InworldVoice(),
235
+ })
236
+
237
+ const { text } = await voiceAgent.generate('What color is the sky?')
238
+
239
+ // Convert text to speech to an Audio Stream
240
+ const audioStream = await voiceAgent.voice.speak(text, {
241
+ speaker: 'Dennis', // Optional: specify a speaker
242
+ })
243
+
244
+ playAudio(audioStream)
245
+ ```
246
+
247
+ Visit the [Inworld Voice Reference](https://mastra.ai/reference/voice/inworld) for more information on the Inworld voice provider.
248
+
222
249
  **Speechify**:
223
250
 
224
251
  ```typescript
@@ -230,7 +257,7 @@ const voiceAgent = new Agent({
230
257
  id: 'voice-agent',
231
258
  name: 'Voice Agent',
232
259
  instructions: 'You are a voice assistant that can help users with their tasks.',
233
- model: 'openai/gpt-5.4',
260
+ model: 'openai/gpt-5.5',
234
261
  voice: new SpeechifyVoice(),
235
262
  })
236
263
 
@@ -257,7 +284,7 @@ const voiceAgent = new Agent({
257
284
  id: 'voice-agent',
258
285
  name: 'Voice Agent',
259
286
  instructions: 'You are a voice assistant that can help users with their tasks.',
260
- model: 'openai/gpt-5.4',
287
+ model: 'openai/gpt-5.5',
261
288
  voice: new SarvamVoice(),
262
289
  })
263
290
 
@@ -265,7 +292,7 @@ const { text } = await voiceAgent.generate('What color is the sky?')
265
292
 
266
293
  // Convert text to speech to an Audio Stream
267
294
  const audioStream = await voiceAgent.voice.speak(text, {
268
- speaker: 'default', // Optional: specify a speaker
295
+ speaker: 'shubh', // Optional: specify a bulbul:v3 speaker
269
296
  })
270
297
 
271
298
  playAudio(audioStream)
@@ -284,7 +311,7 @@ const voiceAgent = new Agent({
284
311
  id: 'voice-agent',
285
312
  name: 'Voice Agent',
286
313
  instructions: 'You are a voice assistant that can help users with their tasks.',
287
- model: 'openai/gpt-5.4',
314
+ model: 'openai/gpt-5.5',
288
315
  voice: new MurfVoice(),
289
316
  })
290
317
 
@@ -319,7 +346,7 @@ const voiceAgent = new Agent({
319
346
  id: 'voice-agent',
320
347
  name: 'Voice Agent',
321
348
  instructions: 'You are a voice assistant that can help users with their tasks.',
322
- model: 'openai/gpt-5.4',
349
+ model: 'openai/gpt-5.5',
323
350
  voice: new OpenAIVoice(),
324
351
  })
325
352
 
@@ -348,7 +375,7 @@ const voiceAgent = new Agent({
348
375
  id: 'voice-agent',
349
376
  name: 'Voice Agent',
350
377
  instructions: 'You are a voice assistant that can help users with their tasks.',
351
- model: 'openai/gpt-5.4',
378
+ model: 'openai/gpt-5.5',
352
379
  voice: new AzureVoice(),
353
380
  })
354
381
 
@@ -376,7 +403,7 @@ const voiceAgent = new Agent({
376
403
  id: 'voice-agent',
377
404
  name: 'Voice Agent',
378
405
  instructions: 'You are a voice assistant that can help users with their tasks.',
379
- model: 'openai/gpt-5.4',
406
+ model: 'openai/gpt-5.5',
380
407
  voice: new ElevenLabsVoice(),
381
408
  })
382
409
 
@@ -404,7 +431,7 @@ const voiceAgent = new Agent({
404
431
  id: 'voice-agent',
405
432
  name: 'Voice Agent',
406
433
  instructions: 'You are a voice assistant that can help users with their tasks.',
407
- model: 'openai/gpt-5.4',
434
+ model: 'openai/gpt-5.5',
408
435
  voice: new GoogleVoice(),
409
436
  })
410
437
 
@@ -432,7 +459,7 @@ const voiceAgent = new Agent({
432
459
  id: 'voice-agent',
433
460
  name: 'Voice Agent',
434
461
  instructions: 'You are a voice assistant that can help users with their tasks.',
435
- model: 'openai/gpt-5.4',
462
+ model: 'openai/gpt-5.5',
436
463
  voice: new CloudflareVoice(),
437
464
  })
438
465
 
@@ -460,7 +487,7 @@ const voiceAgent = new Agent({
460
487
  id: 'voice-agent',
461
488
  name: 'Voice Agent',
462
489
  instructions: 'You are a voice assistant that can help users with their tasks.',
463
- model: 'openai/gpt-5.4',
490
+ model: 'openai/gpt-5.5',
464
491
  voice: new DeepgramVoice(),
465
492
  })
466
493
 
@@ -477,6 +504,34 @@ const { text } = await voiceAgent.generate(transcript)
477
504
 
478
505
  Visit the [Deepgram Voice Reference](https://mastra.ai/reference/voice/deepgram) for more information on the Deepgram voice provider.
479
506
 
507
+ **Inworld**:
508
+
509
+ ```typescript
510
+ import { Agent } from '@mastra/core/agent'
511
+ import { InworldVoice } from '@mastra/voice-inworld'
512
+ import { createReadStream } from 'fs'
513
+
514
+ const voiceAgent = new Agent({
515
+ id: 'voice-agent',
516
+ name: 'Voice Agent',
517
+ instructions: 'You are a voice assistant that can help users with their tasks.',
518
+ model: 'openai/gpt-5.5',
519
+ voice: new InworldVoice(),
520
+ })
521
+
522
+ // Use an audio file from a URL
523
+ const audioStream = await createReadStream('./how_can_i_help_you.mp3')
524
+
525
+ // Convert audio to text
526
+ const transcript = await voiceAgent.voice.listen(audioStream)
527
+ console.log(`User said: ${transcript}`)
528
+
529
+ // Generate a response based on the transcript
530
+ const { text } = await voiceAgent.generate(transcript)
531
+ ```
532
+
533
+ Visit the [Inworld Voice Reference](https://mastra.ai/reference/voice/inworld) for more information on the Inworld voice provider.
534
+
480
535
  **Sarvam**:
481
536
 
482
537
  ```typescript
@@ -488,7 +543,7 @@ const voiceAgent = new Agent({
488
543
  id: 'voice-agent',
489
544
  name: 'Voice Agent',
490
545
  instructions: 'You are a voice assistant that can help users with their tasks.',
491
- model: 'openai/gpt-5.4',
546
+ model: 'openai/gpt-5.5',
492
547
  voice: new SarvamVoice(),
493
548
  })
494
549
 
@@ -520,7 +575,7 @@ const voiceAgent = new Agent({
520
575
  id: 'voice-agent',
521
576
  name: 'Voice Agent',
522
577
  instructions: 'You are a voice assistant that can help users with their tasks.',
523
- model: 'openai/gpt-5.4',
578
+ model: 'openai/gpt-5.5',
524
579
  voice: new OpenAIRealtimeVoice(),
525
580
  })
526
581
 
@@ -550,7 +605,7 @@ const voiceAgent = new Agent({
550
605
  id: 'voice-agent',
551
606
  name: 'Voice Agent',
552
607
  instructions: 'You are a voice assistant that can help users with their tasks.',
553
- model: 'openai/gpt-5.4',
608
+ model: 'openai/gpt-5.5',
554
609
  voice: new GeminiLiveVoice({
555
610
  // Live API mode
556
611
  apiKey: process.env.GOOGLE_API_KEY,
@@ -588,6 +643,134 @@ await voiceAgent.voice.send(micStream)
588
643
 
589
644
  Visit the [Google Gemini Live Reference](https://mastra.ai/reference/voice/google-gemini-live) for more information on the Google Gemini Live voice provider.
590
645
 
646
+ **AWS Nova Sonic**:
647
+
648
+ ```typescript
649
+ import { Agent } from '@mastra/core/agent'
650
+ import { playAudio, getMicrophoneStream } from '@mastra/node-audio'
651
+ import { NovaSonicVoice } from '@mastra/voice-aws-nova-sonic'
652
+
653
+ const voiceAgent = new Agent({
654
+ id: 'voice-agent',
655
+ name: 'Voice Agent',
656
+ instructions: 'You are a voice assistant that can help users with their tasks.',
657
+ model: 'openai/gpt-5.5',
658
+ voice: new NovaSonicVoice({
659
+ region: 'us-east-1',
660
+ speaker: 'matthew',
661
+ // Static credentials are optional. The default AWS credential
662
+ // provider chain is used when none are passed.
663
+ }),
664
+ })
665
+
666
+ // Connect before using speak/send
667
+ await voiceAgent.voice.connect()
668
+
669
+ // Listen for assistant audio (Int16Array PCM)
670
+ voiceAgent.voice.on('speaking', ({ audioData }) => {
671
+ if (audioData) playAudio(audioData)
672
+ })
673
+
674
+ // Listen for transcribed text
675
+ voiceAgent.voice.on('writing', ({ text, role }) => {
676
+ console.log(`${role}: ${text}`)
677
+ })
678
+
679
+ // Initiate the conversation
680
+ await voiceAgent.voice.speak('How can I help you today?')
681
+
682
+ // Send continuous audio from the microphone
683
+ const micStream = getMicrophoneStream()
684
+ await voiceAgent.voice.send(micStream)
685
+ ```
686
+
687
+ Visit the [AWS Nova Sonic Reference](https://mastra.ai/reference/voice/aws-nova-sonic) for more information on the AWS Nova Sonic voice provider.
688
+
689
+ **Inworld Realtime**:
690
+
691
+ ```typescript
692
+ import { Agent } from '@mastra/core/agent'
693
+ import { playAudio, getMicrophoneStream } from '@mastra/node-audio'
694
+ import { InworldRealtimeVoice } from '@mastra/voice-inworld'
695
+
696
+ const voiceAgent = new Agent({
697
+ id: 'voice-agent',
698
+ name: 'Voice Agent',
699
+ instructions: 'You are a voice assistant that can help users with their tasks.',
700
+ model: 'openai/gpt-5.5',
701
+ voice: new InworldRealtimeVoice({
702
+ apiKey: process.env.INWORLD_API_KEY,
703
+ model: 'inworld/models/gemma-4-26b-a4b-it',
704
+ speaker: 'Sarah',
705
+ }),
706
+ })
707
+
708
+ // Connect before using speak/send
709
+ await voiceAgent.voice.connect()
710
+
711
+ // Listen for agent audio (PCM stream)
712
+ voiceAgent.voice.on('speaker', stream => {
713
+ playAudio(stream)
714
+ })
715
+
716
+ // Listen for text responses and transcriptions
717
+ voiceAgent.voice.on('writing', ({ text, role }) => {
718
+ console.log(`${role}: ${text}`)
719
+ })
720
+
721
+ // Initiate the conversation
722
+ await voiceAgent.voice.speak('How can I help you today?')
723
+
724
+ // Send continuous audio from the microphone
725
+ const micStream = getMicrophoneStream()
726
+ await voiceAgent.voice.send(micStream)
727
+ ```
728
+
729
+ Visit the [Inworld Realtime Reference](https://mastra.ai/reference/voice/inworld-realtime) for more information on the Inworld Realtime voice provider.
730
+
731
+ **xAI**:
732
+
733
+ ```typescript
734
+ import { Agent } from '@mastra/core/agent'
735
+ import { playAudio, getMicrophoneStream } from '@mastra/node-audio'
736
+ import { XAIRealtimeVoice } from '@mastra/voice-xai-realtime'
737
+
738
+ const voiceAgent = new Agent({
739
+ id: 'voice-agent',
740
+ name: 'Voice Agent',
741
+ instructions: 'You are a voice assistant that can help users with their tasks.',
742
+ model: 'xai/grok-4.3',
743
+ voice: new XAIRealtimeVoice({
744
+ apiKey: process.env.XAI_API_KEY,
745
+ model: 'grok-voice-think-fast-1.0',
746
+ speaker: 'eve',
747
+ turnDetection: { type: 'server_vad' },
748
+ }),
749
+ })
750
+
751
+ // Connect before using speak/send
752
+ await voiceAgent.voice.connect()
753
+
754
+ // Listen for agent audio responses
755
+ voiceAgent.voice.on('speaker', audioStream => {
756
+ playAudio(audioStream)
757
+ })
758
+
759
+ // Listen for text responses and transcriptions
760
+ voiceAgent.voice.on('writing', ({ text, role }) => {
761
+ console.log(`${role}: ${text}`)
762
+ })
763
+
764
+ // Initiate the conversation
765
+ await voiceAgent.voice.speak('How can I help you today?')
766
+
767
+ // Send continuous audio from the microphone
768
+ const micStream = getMicrophoneStream()
769
+ await voiceAgent.voice.send(micStream)
770
+ ```
771
+
772
+ Visit the [xAI Realtime Voice Reference](https://mastra.ai/reference/voice/xai-realtime) for more information on the xAI voice provider.
773
+
591
774
  ## Voice configuration
592
775
 
593
776
  Each voice provider can be configured with different models and options. Below are the detailed configuration options for all supported providers:
@@ -736,6 +919,34 @@ const voice = new DeepgramVoice({
736
919
 
737
920
  Visit the [Deepgram Voice Reference](https://mastra.ai/reference/voice/deepgram) for more information on the Deepgram voice provider.
738
921
 
922
+ **Inworld**:
923
+
924
+ ```typescript
925
+ // Inworld Voice Configuration
926
+ const voice = new InworldVoice({
927
+ speechModel: {
928
+ name: 'inworld-tts-2',
929
+ apiKey: process.env.INWORLD_API_KEY,
930
+ },
931
+ listeningModel: {
932
+ name: 'groq/whisper-large-v3',
933
+ apiKey: process.env.INWORLD_API_KEY,
934
+ },
935
+ speaker: 'Dennis',
936
+ audioEncoding: 'MP3',
937
+ sampleRateHertz: 48000,
938
+ language: 'en-US',
939
+ })
940
+
941
+ // Per-call options: `deliveryMode` is honored only by `inworld-tts-2`.
942
+ const audioStream = await voice.speak('Hello!', {
943
+ deliveryMode: 'BALANCED', // 'STABLE' | 'BALANCED' | 'CREATIVE'
944
+ language: 'en-US', // BCP-47 per-call override
945
+ })
946
+ ```
947
+
948
+ Visit the [Inworld Voice Reference](https://mastra.ai/reference/voice/inworld) for more information on the Inworld voice provider.
949
+
739
950
  **Speechify**:
740
951
 
741
952
  ```typescript
@@ -760,12 +971,15 @@ Visit the [Speechify Voice Reference](https://mastra.ai/reference/voice/speechif
760
971
  // Sarvam Voice Configuration
761
972
  const voice = new SarvamVoice({
762
973
  speechModel: {
763
- name: 'sarvam-voice', // Example model name
974
+ model: 'bulbul:v3', // TTS model (bulbul:v2 or bulbul:v3)
764
975
  apiKey: process.env.SARVAM_API_KEY,
765
- language: 'en-IN', // Language code
766
- style: 'conversational', // Style setting
976
+ language: 'en-IN', // BCP-47 language code
767
977
  },
768
- // Sarvam may not have a separate listening model
978
+ listeningModel: {
979
+ model: 'saarika:v2.5', // STT model (saarika:v2.5 or saaras:v3)
980
+ apiKey: process.env.SARVAM_API_KEY,
981
+ },
982
+ speaker: 'shubh', // Default bulbul:v3 speaker
769
983
  })
770
984
  ```
771
985
 
@@ -809,6 +1023,38 @@ const voice = new OpenAIRealtimeVoice({
809
1023
 
810
1024
  For more information on the OpenAI Realtime voice provider, refer to the [OpenAI Realtime Voice Reference](https://mastra.ai/reference/voice/openai-realtime).
811
1025
 
1026
+ **xAI Realtime**:
1027
+
1028
+ ```typescript
1029
+ // xAI Realtime Voice Configuration
1030
+ const voice = new XAIRealtimeVoice({
1031
+ apiKey: process.env.XAI_API_KEY,
1032
+ model: 'grok-voice-think-fast-1.0',
1033
+ speaker: 'eve',
1034
+ instructions: 'You are a concise voice assistant.',
1035
+ turnDetection: {
1036
+ type: 'server_vad',
1037
+ threshold: 0.85,
1038
+ silence_duration_ms: 1000,
1039
+ prefix_padding_ms: 333,
1040
+ },
1041
+ audio: {
1042
+ input: { format: { type: 'audio/pcm', rate: 24000 } },
1043
+ output: { format: { type: 'audio/pcm', rate: 24000 } },
1044
+ },
1045
+ serverTools: [
1046
+ { type: 'web_search' },
1047
+ {
1048
+ type: 'mcp',
1049
+ server_url: 'https://mcp.example.com/mcp',
1050
+ server_label: 'business-tools',
1051
+ },
1052
+ ],
1053
+ })
1054
+ ```
1055
+
1056
+ Visit the [xAI Realtime Voice Reference](https://mastra.ai/reference/voice/xai-realtime) for more information on the xAI realtime voice provider.
1057
+
812
1058
  **Google Gemini Live**:
813
1059
 
814
1060
  ```typescript
@@ -825,6 +1071,48 @@ const voice = new GeminiLiveVoice({
825
1071
 
826
1072
  Visit the [Google Gemini Live Reference](https://mastra.ai/reference/voice/google-gemini-live) for more information on the Google Gemini Live voice provider.
827
1073
 
1074
+ **AWS Nova Sonic**:
1075
+
1076
+ ```typescript
1077
+ // AWS Nova Sonic Voice Configuration
1078
+ const voice = new NovaSonicVoice({
1079
+ region: 'us-east-1',
1080
+ speaker: 'matthew',
1081
+ sessionConfig: {
1082
+ inferenceConfiguration: {
1083
+ temperature: 0.7,
1084
+ maxTokens: 1024,
1085
+ },
1086
+ turnDetectionConfiguration: {
1087
+ endpointingSensitivity: 'MEDIUM',
1088
+ },
1089
+ },
1090
+ // AWS Nova Sonic is a realtime bidirectional API without separate speech and listening models
1091
+ })
1092
+ ```
1093
+
1094
+ Visit the [AWS Nova Sonic Reference](https://mastra.ai/reference/voice/aws-nova-sonic) for more information on the AWS Nova Sonic voice provider.
1095
+
1096
+ **Inworld Realtime**:
1097
+
1098
+ ```typescript
1099
+ // Inworld Realtime Voice Configuration
1100
+ const voice = new InworldRealtimeVoice({
1101
+ apiKey: process.env.INWORLD_API_KEY,
1102
+ model: 'inworld/models/gemma-4-26b-a4b-it',
1103
+ speaker: 'Sarah',
1104
+ // Typed Inworld realtime knobs (semantic VAD, playback speed, MCP tool routing, ...)
1105
+ session: {
1106
+ audio: {
1107
+ output: { speed: 1.1 },
1108
+ input: { turn_detection: { type: 'semantic_vad', eagerness: 'high' } },
1109
+ },
1110
+ },
1111
+ })
1112
+ ```
1113
+
1114
+ Visit the [Inworld Realtime Reference](https://mastra.ai/reference/voice/inworld-realtime) for more information on the Inworld Realtime voice provider.
1115
+
828
1116
  **AI SDK**:
829
1117
 
830
1118
  ```typescript
@@ -844,7 +1132,7 @@ const voiceAgent = new Agent({
844
1132
  id: 'aisdk-voice-agent',
845
1133
  name: 'AI SDK Voice Agent',
846
1134
  instructions: 'You are a helpful assistant with voice capabilities.',
847
- model: 'openai/gpt-5.4',
1135
+ model: 'openai/gpt-5.5',
848
1136
  voice,
849
1137
  })
850
1138
  ```
@@ -951,9 +1239,12 @@ For more information on the CompositeVoice, refer to the [CompositeVoice Referen
951
1239
  - [MastraVoice](https://mastra.ai/reference/voice/mastra-voice)
952
1240
  - [OpenAI Voice](https://mastra.ai/reference/voice/openai)
953
1241
  - [OpenAI Realtime Voice](https://mastra.ai/reference/voice/openai-realtime)
1242
+ - [xAI Realtime Voice](https://mastra.ai/reference/voice/xai-realtime)
954
1243
  - [Azure Voice](https://mastra.ai/reference/voice/azure)
955
1244
  - [Google Voice](https://mastra.ai/reference/voice/google)
956
1245
  - [Google Gemini Live Voice](https://mastra.ai/reference/voice/google-gemini-live)
1246
+ - [AWS Nova Sonic Voice](https://mastra.ai/reference/voice/aws-nova-sonic)
957
1247
  - [Deepgram Voice](https://mastra.ai/reference/voice/deepgram)
1248
+ - [Inworld Voice](https://mastra.ai/reference/voice/inworld)
958
1249
  - [PlayAI Voice](https://mastra.ai/reference/voice/playai)
959
1250
  - [Voice Examples](https://github.com/mastra-ai/voice-examples)