@mastra/voice-openai 0.12.1 → 0.12.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (128) hide show
  1. package/CHANGELOG.md +16 -0
  2. package/LICENSE.md +15 -0
  3. package/dist/_types/@internal_voice/dist/_types/@internal_ai-sdk-v5/dist/index.d.ts +8888 -0
  4. package/dist/_types/@internal_voice/dist/_types/@internal_core/dist/base/index.d.ts +31 -0
  5. package/dist/_types/@internal_voice/dist/_types/@internal_core/dist/logger/index.d.ts +217 -0
  6. package/dist/_types/@internal_voice/dist/_types/@internal_core/dist/request-context/index.d.ts +147 -0
  7. package/dist/_types/@internal_voice/dist/_types/@internal_core/dist/types/index.d.ts +3 -0
  8. package/dist/_types/@internal_voice/dist/_types/zod/v3/ZodError.d.ts +164 -0
  9. package/dist/_types/@internal_voice/dist/_types/zod/v3/errors.d.ts +5 -0
  10. package/dist/_types/@internal_voice/dist/_types/zod/v3/external.d.ts +6 -0
  11. package/dist/_types/@internal_voice/dist/_types/zod/v3/helpers/enumUtil.d.ts +8 -0
  12. package/dist/_types/@internal_voice/dist/_types/zod/v3/helpers/errorUtil.d.ts +9 -0
  13. package/dist/_types/@internal_voice/dist/_types/zod/v3/helpers/parseUtil.d.ts +78 -0
  14. package/dist/_types/@internal_voice/dist/_types/zod/v3/helpers/partialUtil.d.ts +8 -0
  15. package/dist/_types/@internal_voice/dist/_types/zod/v3/helpers/typeAliases.d.ts +2 -0
  16. package/dist/_types/@internal_voice/dist/_types/zod/v3/helpers/util.d.ts +85 -0
  17. package/dist/_types/@internal_voice/dist/_types/zod/v3/index.d.cts +4 -0
  18. package/dist/_types/@internal_voice/dist/_types/zod/v3/index.d.ts +4 -0
  19. package/dist/_types/@internal_voice/dist/_types/zod/v3/locales/en.d.ts +3 -0
  20. package/dist/_types/@internal_voice/dist/_types/zod/v3/standard-schema.d.ts +102 -0
  21. package/dist/_types/@internal_voice/dist/_types/zod/v3/types.d.ts +1034 -0
  22. package/dist/_types/@internal_voice/dist/_types/zod/v4/classic/checks.d.ts +1 -0
  23. package/dist/_types/@internal_voice/dist/_types/zod/v4/classic/coerce.d.ts +17 -0
  24. package/dist/_types/@internal_voice/dist/_types/zod/v4/classic/compat.d.ts +50 -0
  25. package/dist/_types/@internal_voice/dist/_types/zod/v4/classic/errors.d.ts +30 -0
  26. package/dist/_types/@internal_voice/dist/_types/zod/v4/classic/external.d.ts +16 -0
  27. package/dist/_types/@internal_voice/dist/_types/zod/v4/classic/from-json-schema.d.ts +12 -0
  28. package/dist/_types/@internal_voice/dist/_types/zod/v4/classic/index.d.ts +4 -0
  29. package/dist/_types/@internal_voice/dist/_types/zod/v4/classic/iso.d.ts +22 -0
  30. package/dist/_types/@internal_voice/dist/_types/zod/v4/classic/parse.d.ts +31 -0
  31. package/dist/_types/@internal_voice/dist/_types/zod/v4/classic/schemas.d.ts +767 -0
  32. package/dist/_types/@internal_voice/dist/_types/zod/v4/core/api.d.ts +325 -0
  33. package/dist/_types/@internal_voice/dist/_types/zod/v4/core/checks.d.ts +278 -0
  34. package/dist/_types/@internal_voice/dist/_types/zod/v4/core/core.d.ts +70 -0
  35. package/dist/_types/@internal_voice/dist/_types/zod/v4/core/doc.d.ts +14 -0
  36. package/dist/_types/@internal_voice/dist/_types/zod/v4/core/errors.d.ts +221 -0
  37. package/dist/_types/@internal_voice/dist/_types/zod/v4/core/index.d.ts +16 -0
  38. package/dist/_types/@internal_voice/dist/_types/zod/v4/core/json-schema-generator.d.ts +65 -0
  39. package/dist/_types/@internal_voice/dist/_types/zod/v4/core/json-schema-processors.d.ts +49 -0
  40. package/dist/_types/@internal_voice/dist/_types/zod/v4/core/json-schema.d.ts +88 -0
  41. package/dist/_types/@internal_voice/dist/_types/zod/v4/core/parse.d.ts +49 -0
  42. package/dist/_types/@internal_voice/dist/_types/zod/v4/core/regexes.d.ts +85 -0
  43. package/dist/_types/@internal_voice/dist/_types/zod/v4/core/registries.d.ts +35 -0
  44. package/dist/_types/@internal_voice/dist/_types/zod/v4/core/schemas.d.ts +1184 -0
  45. package/dist/_types/@internal_voice/dist/_types/zod/v4/core/standard-schema.d.ts +126 -0
  46. package/dist/_types/@internal_voice/dist/_types/zod/v4/core/to-json-schema.d.ts +114 -0
  47. package/dist/_types/@internal_voice/dist/_types/zod/v4/core/util.d.ts +200 -0
  48. package/dist/_types/@internal_voice/dist/_types/zod/v4/core/versions.d.ts +5 -0
  49. package/dist/_types/@internal_voice/dist/_types/zod/v4/index.d.cts +3 -0
  50. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/ar.d.ts +4 -0
  51. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/az.d.ts +4 -0
  52. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/be.d.ts +4 -0
  53. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/bg.d.ts +4 -0
  54. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/ca.d.ts +4 -0
  55. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/cs.d.ts +4 -0
  56. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/da.d.ts +4 -0
  57. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/de.d.ts +4 -0
  58. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/el.d.ts +4 -0
  59. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/en.d.ts +4 -0
  60. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/eo.d.ts +4 -0
  61. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/es.d.ts +4 -0
  62. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/fa.d.ts +4 -0
  63. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/fi.d.ts +4 -0
  64. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/fr-CA.d.ts +4 -0
  65. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/fr.d.ts +4 -0
  66. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/he.d.ts +4 -0
  67. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/hr.d.ts +4 -0
  68. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/hu.d.ts +4 -0
  69. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/hy.d.ts +4 -0
  70. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/id.d.ts +4 -0
  71. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/index.d.ts +52 -0
  72. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/is.d.ts +4 -0
  73. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/it.d.ts +4 -0
  74. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/ja.d.ts +4 -0
  75. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/ka.d.ts +4 -0
  76. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/kh.d.ts +5 -0
  77. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/km.d.ts +4 -0
  78. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/ko.d.ts +4 -0
  79. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/lt.d.ts +4 -0
  80. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/mk.d.ts +4 -0
  81. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/ms.d.ts +4 -0
  82. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/nl.d.ts +4 -0
  83. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/no.d.ts +4 -0
  84. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/ota.d.ts +4 -0
  85. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/pl.d.ts +4 -0
  86. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/ps.d.ts +4 -0
  87. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/pt.d.ts +4 -0
  88. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/ro.d.ts +4 -0
  89. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/ru.d.ts +4 -0
  90. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/sl.d.ts +4 -0
  91. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/sv.d.ts +4 -0
  92. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/ta.d.ts +4 -0
  93. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/th.d.ts +4 -0
  94. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/tr.d.ts +4 -0
  95. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/ua.d.ts +5 -0
  96. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/uk.d.ts +4 -0
  97. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/ur.d.ts +4 -0
  98. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/uz.d.ts +4 -0
  99. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/vi.d.ts +4 -0
  100. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/yo.d.ts +4 -0
  101. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/zh-CN.d.ts +4 -0
  102. package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/zh-TW.d.ts +4 -0
  103. package/dist/_types/@internal_voice/dist/index.d.ts +16 -0
  104. package/dist/_types/@internal_voice/dist/voice/aisdk/index.d.ts +3 -0
  105. package/dist/_types/@internal_voice/dist/voice/aisdk/speech.d.ts +23 -0
  106. package/dist/_types/@internal_voice/dist/voice/aisdk/transcription.d.ts +22 -0
  107. package/dist/_types/@internal_voice/dist/voice/composite-voice.d.ts +72 -0
  108. package/dist/_types/@internal_voice/dist/voice/default-voice.d.ts +13 -0
  109. package/dist/_types/@internal_voice/dist/voice/index.d.ts +5 -0
  110. package/dist/_types/@internal_voice/dist/voice/voice.d.ts +172 -0
  111. package/dist/docs/SKILL.md +2 -2
  112. package/dist/docs/assets/SOURCE_MAP.json +1 -1
  113. package/dist/docs/references/docs-agents-adding-voice.md +186 -158
  114. package/dist/docs/references/docs-voice-overview.md +650 -379
  115. package/dist/docs/references/docs-voice-speech-to-text.md +27 -28
  116. package/dist/docs/references/docs-voice-text-to-speech.md +28 -29
  117. package/dist/docs/references/reference-voice-composite-voice.md +37 -37
  118. package/dist/docs/references/reference-voice-openai.md +36 -30
  119. package/dist/docs/references/reference-voice-voice.getSpeakers.md +45 -45
  120. package/dist/docs/references/reference-voice-voice.listen.md +64 -58
  121. package/dist/docs/references/reference-voice-voice.speak.md +65 -55
  122. package/dist/index.cjs +260 -2
  123. package/dist/index.cjs.map +1 -1
  124. package/dist/index.d.ts +1 -1
  125. package/dist/index.d.ts.map +1 -1
  126. package/dist/index.js +259 -1
  127. package/dist/index.js.map +1 -1
  128. package/package.json +10 -11
@@ -4,11 +4,11 @@ The `listen()` method is a core function available in all Mastra voice providers
4
4
 
5
5
  ## Parameters
6
6
 
7
- **audioStream:** (`NodeJS.ReadableStream`): Audio stream to transcribe. This can be a file stream or a microphone stream.
7
+ **audioStream** (`NodeJS.ReadableStream`): Audio stream to transcribe. This can be a file stream or a microphone stream.
8
8
 
9
- **options?:** (`object`): Provider-specific options for speech recognition
9
+ **options** (`object`): Provider-specific options for speech recognition
10
10
 
11
- ## Return Value
11
+ ## Return value
12
12
 
13
13
  Returns one of the following:
14
14
 
@@ -16,81 +16,87 @@ Returns one of the following:
16
16
  - `Promise<NodeJS.ReadableStream>`: A promise that resolves to a stream of transcribed text (for streaming transcription)
17
17
  - `Promise<void>`: For real-time providers that emit 'writing' events instead of returning text directly
18
18
 
19
- ## Provider-Specific Options
19
+ ## Provider-specific options
20
20
 
21
21
  Each voice provider may support additional options specific to their implementation. Here are some examples:
22
22
 
23
23
  ### OpenAI
24
24
 
25
- **options.filetype?:** (`string`): Audio file format (e.g., 'mp3', 'wav', 'm4a') (Default: `'mp3'`)
25
+ **options** (`Options`): Configuration options.
26
26
 
27
- **options.prompt?:** (`string`): Text to guide the model's transcription
27
+ **options.filetype** (`string`): Audio file format (e.g., 'mp3', 'wav', 'm4a')
28
28
 
29
- **options.language?:** (`string`): Language code (e.g., 'en', 'fr', 'de')
29
+ **options.prompt** (`string`): Text to guide the model's transcription
30
+
31
+ **options.language** (`string`): Language code (e.g., 'en', 'fr', 'de')
30
32
 
31
33
  ### Google
32
34
 
33
- **options.stream?:** (`boolean`): Whether to use streaming recognition (Default: `false`)
35
+ **options** (`Options`): Configuration options.
36
+
37
+ **options.stream** (`boolean`): Whether to use streaming recognition
34
38
 
35
- **options.config?:** (`object`): Recognition configuration from Google Cloud Speech-to-Text API (Default: `{ encoding: 'LINEAR16', languageCode: 'en-US' }`)
39
+ **options.config** (`object`): Recognition configuration from Google Cloud Speech-to-Text API
36
40
 
37
41
  ### Deepgram
38
42
 
39
- **options.model?:** (`string`): Deepgram model to use for transcription (Default: `'nova-2'`)
43
+ **options** (`Options`): Configuration options.
40
44
 
41
- **options.language?:** (`string`): Language code for transcription (Default: `'en'`)
45
+ **options.model** (`string`): Deepgram model to use for transcription
42
46
 
43
- ## Usage Example
47
+ **options.language** (`string`): Language code for transcription
48
+
49
+ ## Usage example
44
50
 
45
51
  ```typescript
46
- import { OpenAIVoice } from "@mastra/voice-openai";
47
- import { getMicrophoneStream } from "@mastra/node-audio";
48
- import { createReadStream } from "fs";
49
- import path from "path";
52
+ import { OpenAIVoice } from '@mastra/voice-openai'
53
+ import { getMicrophoneStream } from '@mastra/node-audio'
54
+ import { createReadStream } from 'fs'
55
+ import path from 'path'
50
56
 
51
57
  // Initialize a voice provider
52
58
  const voice = new OpenAIVoice({
53
59
  listeningModel: {
54
- name: "whisper-1",
60
+ name: 'whisper-1',
55
61
  apiKey: process.env.OPENAI_API_KEY,
56
62
  },
57
- });
63
+ })
58
64
 
59
65
  // Basic usage with a file stream
60
- const audioFilePath = path.join(process.cwd(), "audio.mp3");
61
- const audioStream = createReadStream(audioFilePath);
66
+ const audioFilePath = path.join(process.cwd(), 'audio.mp3')
67
+ const audioStream = createReadStream(audioFilePath)
62
68
  const transcript = await voice.listen(audioStream, {
63
- filetype: "mp3",
64
- });
65
- console.log("Transcribed text:", transcript);
69
+ filetype: 'mp3',
70
+ })
71
+ console.log('Transcribed text:', transcript)
66
72
 
67
73
  // Using a microphone stream
68
- const microphoneStream = getMicrophoneStream(); // Assume this function gets audio input
69
- const transcription = await voice.listen(microphoneStream);
74
+ const microphoneStream = getMicrophoneStream() // Assume this function gets audio input
75
+ const transcription = await voice.listen(microphoneStream)
70
76
 
71
77
  // With provider-specific options
72
78
  const transcriptWithOptions = await voice.listen(audioStream, {
73
- language: "en",
74
- prompt: "This is a conversation about artificial intelligence.",
75
- });
79
+ language: 'en',
80
+ prompt: 'This is a conversation about artificial intelligence.',
81
+ })
76
82
  ```
77
83
 
78
- ## Using with CompositeVoice
84
+ ## Using with `CompositeVoice`
79
85
 
80
86
  When using `CompositeVoice`, the `listen()` method delegates to the configured listening provider:
81
87
 
82
88
  ```typescript
83
- import { CompositeVoice } from "@mastra/core/voice";
84
- import { OpenAIVoice } from "@mastra/voice-openai";
85
- import { PlayAIVoice } from "@mastra/voice-playai";
89
+ import { CompositeVoice } from '@mastra/core/voice'
90
+ import { OpenAIVoice } from '@mastra/voice-openai'
91
+ import { PlayAIVoice } from '@mastra/voice-playai'
86
92
 
87
93
  const voice = new CompositeVoice({
88
94
  input: new OpenAIVoice(),
89
95
  output: new PlayAIVoice(),
90
- });
96
+ })
91
97
 
92
98
  // This will use the OpenAIVoice provider
93
- const transcript = await voice.listen(audioStream);
99
+ const transcript = await voice.listen(audioStream)
94
100
  ```
95
101
 
96
102
  ### Using AI SDK Model Providers
@@ -98,18 +104,18 @@ const transcript = await voice.listen(audioStream);
98
104
  You can also use AI SDK transcription models directly with `CompositeVoice`:
99
105
 
100
106
  ```typescript
101
- import { CompositeVoice } from "@mastra/core/voice";
102
- import { openai } from "@ai-sdk/openai";
103
- import { groq } from "@ai-sdk/groq";
107
+ import { CompositeVoice } from '@mastra/core/voice'
108
+ import { openai } from '@ai-sdk/openai'
109
+ import { groq } from '@ai-sdk/groq'
104
110
 
105
111
  // Use AI SDK transcription models
106
112
  const voice = new CompositeVoice({
107
- input: openai.transcription('whisper-1'), // AI SDK model
108
- output: new PlayAIVoice(), // Mastra provider
109
- });
113
+ input: openai.transcription('whisper-1'), // AI SDK model
114
+ output: new PlayAIVoice(), // Mastra provider
115
+ })
110
116
 
111
117
  // Works the same way
112
- const transcript = await voice.listen(audioStream);
118
+ const transcript = await voice.listen(audioStream)
113
119
 
114
120
  // Provider-specific options can be passed through
115
121
  const transcriptWithOptions = await voice.listen(audioStream, {
@@ -117,14 +123,14 @@ const transcriptWithOptions = await voice.listen(audioStream, {
117
123
  openai: {
118
124
  language: 'en',
119
125
  prompt: 'This is about AI',
120
- }
121
- }
122
- });
126
+ },
127
+ },
128
+ })
123
129
  ```
124
130
 
125
131
  See the [CompositeVoice reference](https://mastra.ai/reference/voice/composite-voice) for more details on AI SDK integration.
126
132
 
127
- ## Realtime Voice Providers
133
+ ## Realtime voice providers
128
134
 
129
135
  When using realtime voice providers like `OpenAIRealtimeVoice`, the `listen()` method behaves differently:
130
136
 
@@ -132,20 +138,20 @@ When using realtime voice providers like `OpenAIRealtimeVoice`, the `listen()` m
132
138
  - You need to register an event listener to receive the transcription
133
139
 
134
140
  ```typescript
135
- import { OpenAIRealtimeVoice } from "@mastra/voice-openai-realtime";
136
- import { getMicrophoneStream } from "@mastra/node-audio";
141
+ import { OpenAIRealtimeVoice } from '@mastra/voice-openai-realtime'
142
+ import { getMicrophoneStream } from '@mastra/node-audio'
137
143
 
138
- const voice = new OpenAIRealtimeVoice();
139
- await voice.connect();
144
+ const voice = new OpenAIRealtimeVoice()
145
+ await voice.connect()
140
146
 
141
147
  // Register event listener for transcription
142
- voice.on("writing", ({ text, role }) => {
143
- console.log(`${role}: ${text}`);
144
- });
148
+ voice.on('writing', ({ text, role }) => {
149
+ console.log(`${role}: ${text}`)
150
+ })
145
151
 
146
152
  // This will emit 'writing' events instead of returning text
147
- const microphoneStream = getMicrophoneStream();
148
- await voice.listen(microphoneStream);
153
+ const microphoneStream = getMicrophoneStream()
154
+ await voice.listen(microphoneStream)
149
155
  ```
150
156
 
151
157
  ## Notes
@@ -157,8 +163,8 @@ await voice.listen(microphoneStream);
157
163
  - Some providers support streaming transcription, where text is returned as it's transcribed
158
164
  - For best performance, consider closing or ending the audio stream when you're done with it
159
165
 
160
- ## Related Methods
166
+ ## Related methods
161
167
 
162
- - [voice.speak()](https://mastra.ai/reference/voice/voice.speak) - Converts text to speech
163
- - [voice.send()](https://mastra.ai/reference/voice/voice.send) - Sends audio data to the voice provider in real-time
164
- - [voice.on()](https://mastra.ai/reference/voice/voice.on) - Registers an event listener for voice events
168
+ - [voice.speak()](https://mastra.ai/reference/voice/voice.speak): Converts text to speech
169
+ - [voice.send()](https://mastra.ai/reference/voice/voice.send): Sends audio data to the voice provider in real-time
170
+ - [voice.on()](https://mastra.ai/reference/voice/voice.on): Registers an event listener for voice events
@@ -4,88 +4,98 @@ The `speak()` method is a core function available in all Mastra voice providers
4
4
 
5
5
  ## Parameters
6
6
 
7
- **input:** (`string | NodeJS.ReadableStream`): Text to convert to speech. Can be a string or a readable stream of text.
7
+ **input** (`string | NodeJS.ReadableStream`): Text to convert to speech. Can be a string or a readable stream of text.
8
8
 
9
- **options?:** (`object`): Options for speech synthesis
9
+ **options** (`object`): Options for speech synthesis
10
10
 
11
- **options.speaker?:** (`string`): Voice ID to use for this specific request. Overrides the default speaker set in the constructor.
11
+ **options.speaker** (`string`): Voice ID to use for this specific request. Overrides the default speaker set in the constructor.
12
12
 
13
- ## Return Value
13
+ ## Return value
14
14
 
15
15
  Returns a `Promise<NodeJS.ReadableStream | void>` where:
16
16
 
17
17
  - `NodeJS.ReadableStream`: A stream of audio data that can be played or saved
18
18
  - `void`: When using a realtime voice provider that emits audio through events instead of returning it directly
19
19
 
20
- ## Provider-Specific Options
20
+ ## Provider-specific options
21
21
 
22
22
  Each voice provider may support additional options specific to their implementation. Here are some examples:
23
23
 
24
24
  ### OpenAI
25
25
 
26
- **options.speed?:** (`number`): Speech speed multiplier. Values between 0.25 and 4.0 are supported. (Default: `1.0`)
26
+ **options** (`Options`): Configuration options.
27
+
28
+ **options.speed** (`number`): Speech speed multiplier. Values between 0.25 and 4.0 are supported.
27
29
 
28
30
  ### ElevenLabs
29
31
 
30
- **options.stability?:** (`number`): Voice stability. Higher values result in more stable, less expressive speech. (Default: `0.5`)
32
+ **options** (`Options`): Configuration options.
33
+
34
+ **options.stability** (`number`): Voice stability. Higher values result in more stable, less expressive speech.
31
35
 
32
- **options.similarity\_boost?:** (`number`): Voice clarity and similarity to the original voice. (Default: `0.75`)
36
+ **options.similarity\_boost** (`number`): Voice clarity and similarity to the original voice.
33
37
 
34
38
  ### Google
35
39
 
36
- **options.languageCode?:** (`string`): Language code for the voice (e.g., 'en-US').
40
+ **options** (`Options`): Configuration options.
41
+
42
+ **options.languageCode** (`string`): Language code for the voice (e.g., 'en-US').
37
43
 
38
- **options.audioConfig?:** (`object`): Audio configuration options from Google Cloud Text-to-Speech API. (Default: `{ audioEncoding: 'LINEAR16' }`)
44
+ **options.audioConfig** (`object`): Audio configuration options from Google Cloud Text-to-Speech API.
39
45
 
40
46
  ### Murf
41
47
 
42
- **options.properties.rate?:** (`number`): Speech rate multiplier.
48
+ **options** (`Options`): Configuration options.
49
+
50
+ **options.properties** (`object`): properties configuration.
51
+
52
+ **options.properties.rate** (`number`): Speech rate multiplier.
43
53
 
44
- **options.properties.pitch?:** (`number`): Voice pitch adjustment.
54
+ **options.properties.pitch** (`number`): Voice pitch adjustment.
45
55
 
46
- **options.properties.format?:** (`'MP3' | 'WAV' | 'FLAC' | 'ALAW' | 'ULAW'`): Output audio format.
56
+ **options.properties.format** (`'MP3' | 'WAV' | 'FLAC' | 'ALAW' | 'ULAW'`): Output audio format.
47
57
 
48
- ## Usage Example
58
+ ## Usage example
49
59
 
50
60
  ```typescript
51
- import { OpenAIVoice } from "@mastra/voice-openai";
61
+ import { OpenAIVoice } from '@mastra/voice-openai'
52
62
  // Initialize a voice provider
53
63
  const voice = new OpenAIVoice({
54
- speaker: "alloy", // Default voice
55
- });
64
+ speaker: 'alloy', // Default voice
65
+ })
56
66
  // Basic usage with default settings
57
- const audioStream = await voice.speak("Hello, world!");
67
+ const audioStream = await voice.speak('Hello, world!')
58
68
  // Using a different voice for this specific request
59
- const audioStreamWithDifferentVoice = await voice.speak("Hello again!", {
60
- speaker: "nova",
61
- });
69
+ const audioStreamWithDifferentVoice = await voice.speak('Hello again!', {
70
+ speaker: 'nova',
71
+ })
62
72
  // Using provider-specific options
63
- const audioStreamWithOptions = await voice.speak("Hello with options!", {
64
- speaker: "echo",
73
+ const audioStreamWithOptions = await voice.speak('Hello with options!', {
74
+ speaker: 'echo',
65
75
  speed: 1.2, // OpenAI-specific option
66
- });
76
+ })
67
77
  // Using a text stream as input
68
- import { Readable } from "stream";
69
- const textStream = Readable.from(["Hello", " from", " a", " stream!"]);
70
- const audioStreamFromTextStream = await voice.speak(textStream);
78
+ import { Readable } from 'stream'
79
+ const textStream = Readable.from(['Hello', ' from', ' a', ' stream!'])
80
+ const audioStreamFromTextStream = await voice.speak(textStream)
71
81
  ```
72
82
 
73
- ## Using with CompositeVoice
83
+ ## Using with `CompositeVoice`
74
84
 
75
85
  When using `CompositeVoice`, the `speak()` method delegates to the configured speaking provider:
76
86
 
77
87
  ```typescript
78
- import { CompositeVoice } from "@mastra/core/voice";
79
- import { OpenAIVoice } from "@mastra/voice-openai";
80
- import { PlayAIVoice } from "@mastra/voice-playai";
88
+ import { CompositeVoice } from '@mastra/core/voice'
89
+ import { OpenAIVoice } from '@mastra/voice-openai'
90
+ import { PlayAIVoice } from '@mastra/voice-playai'
81
91
 
82
92
  const voice = new CompositeVoice({
83
93
  output: new PlayAIVoice(),
84
94
  input: new OpenAIVoice(),
85
- });
95
+ })
86
96
 
87
97
  // This will use the PlayAIVoice provider
88
- const audioStream = await voice.speak("Hello, world!");
98
+ const audioStream = await voice.speak('Hello, world!')
89
99
  ```
90
100
 
91
101
  ### Using AI SDK Model Providers
@@ -93,34 +103,34 @@ const audioStream = await voice.speak("Hello, world!");
93
103
  You can also use AI SDK speech models directly with `CompositeVoice`:
94
104
 
95
105
  ```typescript
96
- import { CompositeVoice } from "@mastra/core/voice";
97
- import { openai } from "@ai-sdk/openai";
98
- import { elevenlabs } from "@ai-sdk/elevenlabs";
106
+ import { CompositeVoice } from '@mastra/core/voice'
107
+ import { openai } from '@ai-sdk/openai'
108
+ import { elevenlabs } from '@ai-sdk/elevenlabs'
99
109
 
100
110
  // Use AI SDK speech models
101
111
  const voice = new CompositeVoice({
102
- output: elevenlabs.speech('eleven_turbo_v2'), // AI SDK model
103
- input: openai.transcription('whisper-1'), // AI SDK model
104
- });
112
+ output: elevenlabs.speech('eleven_turbo_v2'), // AI SDK model
113
+ input: openai.transcription('whisper-1'), // AI SDK model
114
+ })
105
115
 
106
116
  // Works the same way
107
- const audioStream = await voice.speak("Hello from AI SDK!");
117
+ const audioStream = await voice.speak('Hello from AI SDK!')
108
118
 
109
119
  // Provider-specific options can be passed through
110
- const audioWithOptions = await voice.speak("Hello with options!", {
111
- speaker: 'Rachel', // ElevenLabs voice
120
+ const audioWithOptions = await voice.speak('Hello with options!', {
121
+ speaker: 'Rachel', // ElevenLabs voice
112
122
  providerOptions: {
113
123
  elevenlabs: {
114
124
  stability: 0.5,
115
125
  similarity_boost: 0.75,
116
- }
117
- }
118
- });
126
+ },
127
+ },
128
+ })
119
129
  ```
120
130
 
121
131
  See the [CompositeVoice reference](https://mastra.ai/reference/voice/composite-voice) for more details on AI SDK integration.
122
132
 
123
- ## Realtime Voice Providers
133
+ ## Realtime voice providers
124
134
 
125
135
  When using realtime voice providers like `OpenAIRealtimeVoice`, the `speak()` method behaves differently:
126
136
 
@@ -128,24 +138,24 @@ When using realtime voice providers like `OpenAIRealtimeVoice`, the `speak()` me
128
138
  - You need to register an event listener to receive the audio chunks
129
139
 
130
140
  ```typescript
131
- import { OpenAIRealtimeVoice } from "@mastra/voice-openai-realtime";
132
- import Speaker from "@mastra/node-speaker";
141
+ import { OpenAIRealtimeVoice } from '@mastra/voice-openai-realtime'
142
+ import Speaker from '@mastra/node-speaker'
133
143
 
134
144
  const speaker = new Speaker({
135
145
  sampleRate: 24100, // Audio sample rate in Hz - standard for high-quality audio on MacBook Pro
136
146
  channels: 1, // Mono audio output (as opposed to stereo which would be 2)
137
147
  bitDepth: 16, // Bit depth for audio quality - CD quality standard (16-bit resolution)
138
- });
148
+ })
139
149
 
140
- const voice = new OpenAIRealtimeVoice();
141
- await voice.connect();
150
+ const voice = new OpenAIRealtimeVoice()
151
+ await voice.connect()
142
152
  // Register event listener for audio chunks
143
- voice.on("speaker", (stream) => {
153
+ voice.on('speaker', stream => {
144
154
  // Handle audio chunk (e.g., play it or save it)
145
- stream.pipe(speaker);
146
- });
155
+ stream.pipe(speaker)
156
+ })
147
157
  // This will emit 'speaking' events instead of returning a stream
148
- await voice.speak("Hello, this is realtime speech!");
158
+ await voice.speak('Hello, this is realtime speech!')
149
159
  ```
150
160
 
151
161
  ## Notes