@mastra/mcp-docs-server 0.0.0-commonjs-20250414101718

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (254) hide show
  1. package/.docs/organized/changelogs/%40mastra%2Fastra.md +302 -0
  2. package/.docs/organized/changelogs/%40mastra%2Fchroma.md +302 -0
  3. package/.docs/organized/changelogs/%40mastra%2Fclickhouse.md +161 -0
  4. package/.docs/organized/changelogs/%40mastra%2Fclient-js.md +302 -0
  5. package/.docs/organized/changelogs/%40mastra%2Fcloudflare.md +110 -0
  6. package/.docs/organized/changelogs/%40mastra%2Fcore.md +302 -0
  7. package/.docs/organized/changelogs/%40mastra%2Fdeployer-cloudflare.md +302 -0
  8. package/.docs/organized/changelogs/%40mastra%2Fdeployer-netlify.md +302 -0
  9. package/.docs/organized/changelogs/%40mastra%2Fdeployer-vercel.md +302 -0
  10. package/.docs/organized/changelogs/%40mastra%2Fdeployer.md +302 -0
  11. package/.docs/organized/changelogs/%40mastra%2Fevals.md +302 -0
  12. package/.docs/organized/changelogs/%40mastra%2Ffirecrawl.md +302 -0
  13. package/.docs/organized/changelogs/%40mastra%2Fgithub.md +302 -0
  14. package/.docs/organized/changelogs/%40mastra%2Floggers.md +302 -0
  15. package/.docs/organized/changelogs/%40mastra%2Fmcp-docs-server.md +302 -0
  16. package/.docs/organized/changelogs/%40mastra%2Fmcp-registry-registry.md +26 -0
  17. package/.docs/organized/changelogs/%40mastra%2Fmcp.md +302 -0
  18. package/.docs/organized/changelogs/%40mastra%2Fmem0.md +196 -0
  19. package/.docs/organized/changelogs/%40mastra%2Fmemory.md +302 -0
  20. package/.docs/organized/changelogs/%40mastra%2Fpg.md +302 -0
  21. package/.docs/organized/changelogs/%40mastra%2Fpinecone.md +302 -0
  22. package/.docs/organized/changelogs/%40mastra%2Fplayground-ui.md +302 -0
  23. package/.docs/organized/changelogs/%40mastra%2Fqdrant.md +302 -0
  24. package/.docs/organized/changelogs/%40mastra%2Frag.md +302 -0
  25. package/.docs/organized/changelogs/%40mastra%2Fragie.md +302 -0
  26. package/.docs/organized/changelogs/%40mastra%2Fserver.md +302 -0
  27. package/.docs/organized/changelogs/%40mastra%2Fspeech-azure.md +302 -0
  28. package/.docs/organized/changelogs/%40mastra%2Fspeech-deepgram.md +302 -0
  29. package/.docs/organized/changelogs/%40mastra%2Fspeech-elevenlabs.md +302 -0
  30. package/.docs/organized/changelogs/%40mastra%2Fspeech-google.md +302 -0
  31. package/.docs/organized/changelogs/%40mastra%2Fspeech-ibm.md +302 -0
  32. package/.docs/organized/changelogs/%40mastra%2Fspeech-murf.md +302 -0
  33. package/.docs/organized/changelogs/%40mastra%2Fspeech-openai.md +302 -0
  34. package/.docs/organized/changelogs/%40mastra%2Fspeech-playai.md +302 -0
  35. package/.docs/organized/changelogs/%40mastra%2Fspeech-replicate.md +302 -0
  36. package/.docs/organized/changelogs/%40mastra%2Fspeech-speechify.md +302 -0
  37. package/.docs/organized/changelogs/%40mastra%2Fturbopuffer.md +302 -0
  38. package/.docs/organized/changelogs/%40mastra%2Fupstash.md +302 -0
  39. package/.docs/organized/changelogs/%40mastra%2Fvectorize.md +302 -0
  40. package/.docs/organized/changelogs/%40mastra%2Fvoice-azure.md +250 -0
  41. package/.docs/organized/changelogs/%40mastra%2Fvoice-cloudflare.md +250 -0
  42. package/.docs/organized/changelogs/%40mastra%2Fvoice-deepgram.md +302 -0
  43. package/.docs/organized/changelogs/%40mastra%2Fvoice-elevenlabs.md +302 -0
  44. package/.docs/organized/changelogs/%40mastra%2Fvoice-google.md +302 -0
  45. package/.docs/organized/changelogs/%40mastra%2Fvoice-murf.md +302 -0
  46. package/.docs/organized/changelogs/%40mastra%2Fvoice-openai-realtime.md +302 -0
  47. package/.docs/organized/changelogs/%40mastra%2Fvoice-openai.md +302 -0
  48. package/.docs/organized/changelogs/%40mastra%2Fvoice-playai.md +302 -0
  49. package/.docs/organized/changelogs/%40mastra%2Fvoice-sarvam.md +302 -0
  50. package/.docs/organized/changelogs/%40mastra%2Fvoice-speechify.md +302 -0
  51. package/.docs/organized/changelogs/create-mastra.md +302 -0
  52. package/.docs/organized/changelogs/mastra.md +302 -0
  53. package/.docs/organized/code-examples/agent-network.md +282 -0
  54. package/.docs/organized/code-examples/agent.md +388 -0
  55. package/.docs/organized/code-examples/ai-sdk-useChat.md +378 -0
  56. package/.docs/organized/code-examples/assistant-ui.md +37 -0
  57. package/.docs/organized/code-examples/bird-checker-with-express.md +235 -0
  58. package/.docs/organized/code-examples/bird-checker-with-nextjs-and-eval.md +360 -0
  59. package/.docs/organized/code-examples/bird-checker-with-nextjs.md +250 -0
  60. package/.docs/organized/code-examples/client-side-tools.md +69 -0
  61. package/.docs/organized/code-examples/crypto-chatbot.md +96 -0
  62. package/.docs/organized/code-examples/fireworks-r1.md +159 -0
  63. package/.docs/organized/code-examples/mcp-registry-registry.md +63 -0
  64. package/.docs/organized/code-examples/memory-todo-agent.md +164 -0
  65. package/.docs/organized/code-examples/memory-with-context.md +167 -0
  66. package/.docs/organized/code-examples/memory-with-libsql.md +204 -0
  67. package/.docs/organized/code-examples/memory-with-mem0.md +121 -0
  68. package/.docs/organized/code-examples/memory-with-pg.md +224 -0
  69. package/.docs/organized/code-examples/memory-with-upstash.md +268 -0
  70. package/.docs/organized/code-examples/quick-start.md +129 -0
  71. package/.docs/organized/code-examples/stock-price-tool.md +124 -0
  72. package/.docs/organized/code-examples/weather-agent.md +353 -0
  73. package/.docs/organized/code-examples/workflow-ai-recruiter.md +159 -0
  74. package/.docs/organized/code-examples/workflow-with-inline-steps.md +111 -0
  75. package/.docs/organized/code-examples/workflow-with-memory.md +393 -0
  76. package/.docs/organized/code-examples/workflow-with-separate-steps.md +131 -0
  77. package/.docs/raw/agents/adding-tools.mdx +317 -0
  78. package/.docs/raw/agents/adding-voice.mdx +175 -0
  79. package/.docs/raw/agents/agent-memory.mdx +62 -0
  80. package/.docs/raw/agents/mcp-guide.mdx +215 -0
  81. package/.docs/raw/agents/overview.mdx +303 -0
  82. package/.docs/raw/community/discord.mdx +12 -0
  83. package/.docs/raw/community/licensing.mdx +63 -0
  84. package/.docs/raw/deployment/client.mdx +120 -0
  85. package/.docs/raw/deployment/deployment.mdx +127 -0
  86. package/.docs/raw/deployment/server.mdx +282 -0
  87. package/.docs/raw/evals/custom-eval.mdx +22 -0
  88. package/.docs/raw/evals/overview.mdx +95 -0
  89. package/.docs/raw/evals/running-in-ci.mdx +81 -0
  90. package/.docs/raw/evals/textual-evals.mdx +54 -0
  91. package/.docs/raw/faq/index.mdx +63 -0
  92. package/.docs/raw/frameworks/ai-sdk.mdx +296 -0
  93. package/.docs/raw/frameworks/next-js.mdx +238 -0
  94. package/.docs/raw/getting-started/installation.mdx +442 -0
  95. package/.docs/raw/getting-started/mcp-docs-server.mdx +141 -0
  96. package/.docs/raw/getting-started/project-structure.mdx +80 -0
  97. package/.docs/raw/index.mdx +22 -0
  98. package/.docs/raw/integrations/index.mdx +213 -0
  99. package/.docs/raw/local-dev/add-to-existing-project.mdx +48 -0
  100. package/.docs/raw/local-dev/creating-a-new-project.mdx +54 -0
  101. package/.docs/raw/local-dev/mastra-dev.mdx +108 -0
  102. package/.docs/raw/memory/memory-processors.mdx +131 -0
  103. package/.docs/raw/memory/overview.mdx +119 -0
  104. package/.docs/raw/memory/semantic-recall.mdx +122 -0
  105. package/.docs/raw/memory/working-memory.mdx +87 -0
  106. package/.docs/raw/observability/logging.mdx +38 -0
  107. package/.docs/raw/observability/nextjs-tracing.mdx +108 -0
  108. package/.docs/raw/observability/tracing.mdx +115 -0
  109. package/.docs/raw/rag/chunking-and-embedding.mdx +156 -0
  110. package/.docs/raw/rag/overview.mdx +85 -0
  111. package/.docs/raw/rag/retrieval.mdx +365 -0
  112. package/.docs/raw/rag/vector-databases.mdx +340 -0
  113. package/.docs/raw/reference/agents/createTool.mdx +229 -0
  114. package/.docs/raw/reference/agents/generate.mdx +334 -0
  115. package/.docs/raw/reference/agents/getAgent.mdx +54 -0
  116. package/.docs/raw/reference/agents/stream.mdx +369 -0
  117. package/.docs/raw/reference/cli/build.mdx +55 -0
  118. package/.docs/raw/reference/cli/dev.mdx +134 -0
  119. package/.docs/raw/reference/cli/init.mdx +43 -0
  120. package/.docs/raw/reference/client-js/agents.mdx +107 -0
  121. package/.docs/raw/reference/client-js/error-handling.mdx +38 -0
  122. package/.docs/raw/reference/client-js/logs.mdx +24 -0
  123. package/.docs/raw/reference/client-js/memory.mdx +97 -0
  124. package/.docs/raw/reference/client-js/telemetry.mdx +20 -0
  125. package/.docs/raw/reference/client-js/tools.mdx +44 -0
  126. package/.docs/raw/reference/client-js/vectors.mdx +79 -0
  127. package/.docs/raw/reference/client-js/workflows.mdx +136 -0
  128. package/.docs/raw/reference/core/mastra-class.mdx +232 -0
  129. package/.docs/raw/reference/deployer/cloudflare.mdx +207 -0
  130. package/.docs/raw/reference/deployer/deployer.mdx +159 -0
  131. package/.docs/raw/reference/deployer/netlify.mdx +109 -0
  132. package/.docs/raw/reference/deployer/vercel.mdx +117 -0
  133. package/.docs/raw/reference/evals/answer-relevancy.mdx +186 -0
  134. package/.docs/raw/reference/evals/bias.mdx +186 -0
  135. package/.docs/raw/reference/evals/completeness.mdx +174 -0
  136. package/.docs/raw/reference/evals/content-similarity.mdx +183 -0
  137. package/.docs/raw/reference/evals/context-position.mdx +190 -0
  138. package/.docs/raw/reference/evals/context-precision.mdx +189 -0
  139. package/.docs/raw/reference/evals/context-relevancy.mdx +188 -0
  140. package/.docs/raw/reference/evals/contextual-recall.mdx +191 -0
  141. package/.docs/raw/reference/evals/faithfulness.mdx +193 -0
  142. package/.docs/raw/reference/evals/hallucination.mdx +219 -0
  143. package/.docs/raw/reference/evals/keyword-coverage.mdx +176 -0
  144. package/.docs/raw/reference/evals/prompt-alignment.mdx +238 -0
  145. package/.docs/raw/reference/evals/summarization.mdx +205 -0
  146. package/.docs/raw/reference/evals/textual-difference.mdx +161 -0
  147. package/.docs/raw/reference/evals/tone-consistency.mdx +181 -0
  148. package/.docs/raw/reference/evals/toxicity.mdx +165 -0
  149. package/.docs/raw/reference/index.mdx +12 -0
  150. package/.docs/raw/reference/memory/Memory.mdx +212 -0
  151. package/.docs/raw/reference/memory/createThread.mdx +95 -0
  152. package/.docs/raw/reference/memory/getThreadById.mdx +46 -0
  153. package/.docs/raw/reference/memory/getThreadsByResourceId.mdx +48 -0
  154. package/.docs/raw/reference/memory/query.mdx +167 -0
  155. package/.docs/raw/reference/networks/agent-network.mdx +159 -0
  156. package/.docs/raw/reference/observability/create-logger.mdx +106 -0
  157. package/.docs/raw/reference/observability/logger.mdx +55 -0
  158. package/.docs/raw/reference/observability/otel-config.mdx +120 -0
  159. package/.docs/raw/reference/observability/providers/braintrust.mdx +40 -0
  160. package/.docs/raw/reference/observability/providers/dash0.mdx +40 -0
  161. package/.docs/raw/reference/observability/providers/index.mdx +16 -0
  162. package/.docs/raw/reference/observability/providers/laminar.mdx +41 -0
  163. package/.docs/raw/reference/observability/providers/langfuse.mdx +51 -0
  164. package/.docs/raw/reference/observability/providers/langsmith.mdx +48 -0
  165. package/.docs/raw/reference/observability/providers/langwatch.mdx +45 -0
  166. package/.docs/raw/reference/observability/providers/new-relic.mdx +40 -0
  167. package/.docs/raw/reference/observability/providers/signoz.mdx +40 -0
  168. package/.docs/raw/reference/observability/providers/traceloop.mdx +40 -0
  169. package/.docs/raw/reference/rag/astra.mdx +258 -0
  170. package/.docs/raw/reference/rag/chroma.mdx +281 -0
  171. package/.docs/raw/reference/rag/chunk.mdx +235 -0
  172. package/.docs/raw/reference/rag/document.mdx +127 -0
  173. package/.docs/raw/reference/rag/embeddings.mdx +160 -0
  174. package/.docs/raw/reference/rag/extract-params.mdx +226 -0
  175. package/.docs/raw/reference/rag/graph-rag.mdx +182 -0
  176. package/.docs/raw/reference/rag/libsql.mdx +357 -0
  177. package/.docs/raw/reference/rag/metadata-filters.mdx +298 -0
  178. package/.docs/raw/reference/rag/pg.mdx +477 -0
  179. package/.docs/raw/reference/rag/pinecone.mdx +281 -0
  180. package/.docs/raw/reference/rag/qdrant.mdx +236 -0
  181. package/.docs/raw/reference/rag/rerank.mdx +212 -0
  182. package/.docs/raw/reference/rag/turbopuffer.mdx +249 -0
  183. package/.docs/raw/reference/rag/upstash.mdx +247 -0
  184. package/.docs/raw/reference/rag/vectorize.mdx +298 -0
  185. package/.docs/raw/reference/storage/libsql.mdx +74 -0
  186. package/.docs/raw/reference/storage/postgresql.mdx +48 -0
  187. package/.docs/raw/reference/storage/upstash.mdx +86 -0
  188. package/.docs/raw/reference/tools/client.mdx +207 -0
  189. package/.docs/raw/reference/tools/document-chunker-tool.mdx +141 -0
  190. package/.docs/raw/reference/tools/graph-rag-tool.mdx +154 -0
  191. package/.docs/raw/reference/tools/mcp-configuration.mdx +206 -0
  192. package/.docs/raw/reference/tools/vector-query-tool.mdx +212 -0
  193. package/.docs/raw/reference/voice/composite-voice.mdx +140 -0
  194. package/.docs/raw/reference/voice/deepgram.mdx +164 -0
  195. package/.docs/raw/reference/voice/elevenlabs.mdx +216 -0
  196. package/.docs/raw/reference/voice/google.mdx +198 -0
  197. package/.docs/raw/reference/voice/mastra-voice.mdx +394 -0
  198. package/.docs/raw/reference/voice/murf.mdx +251 -0
  199. package/.docs/raw/reference/voice/openai-realtime.mdx +431 -0
  200. package/.docs/raw/reference/voice/openai.mdx +168 -0
  201. package/.docs/raw/reference/voice/playai.mdx +159 -0
  202. package/.docs/raw/reference/voice/sarvam.mdx +260 -0
  203. package/.docs/raw/reference/voice/speechify.mdx +145 -0
  204. package/.docs/raw/reference/voice/voice.answer.mdx +122 -0
  205. package/.docs/raw/reference/voice/voice.connect.mdx +124 -0
  206. package/.docs/raw/reference/voice/voice.listen.mdx +195 -0
  207. package/.docs/raw/reference/voice/voice.on.mdx +189 -0
  208. package/.docs/raw/reference/voice/voice.send.mdx +118 -0
  209. package/.docs/raw/reference/voice/voice.speak.mdx +203 -0
  210. package/.docs/raw/reference/workflows/after.mdx +88 -0
  211. package/.docs/raw/reference/workflows/afterEvent.mdx +76 -0
  212. package/.docs/raw/reference/workflows/commit.mdx +37 -0
  213. package/.docs/raw/reference/workflows/createRun.mdx +77 -0
  214. package/.docs/raw/reference/workflows/else.mdx +72 -0
  215. package/.docs/raw/reference/workflows/events.mdx +305 -0
  216. package/.docs/raw/reference/workflows/execute.mdx +110 -0
  217. package/.docs/raw/reference/workflows/if.mdx +107 -0
  218. package/.docs/raw/reference/workflows/resume.mdx +155 -0
  219. package/.docs/raw/reference/workflows/resumeWithEvent.mdx +133 -0
  220. package/.docs/raw/reference/workflows/snapshots.mdx +207 -0
  221. package/.docs/raw/reference/workflows/start.mdx +84 -0
  222. package/.docs/raw/reference/workflows/step-class.mdx +100 -0
  223. package/.docs/raw/reference/workflows/step-condition.mdx +134 -0
  224. package/.docs/raw/reference/workflows/step-function.mdx +92 -0
  225. package/.docs/raw/reference/workflows/step-options.mdx +69 -0
  226. package/.docs/raw/reference/workflows/step-retries.mdx +203 -0
  227. package/.docs/raw/reference/workflows/suspend.mdx +70 -0
  228. package/.docs/raw/reference/workflows/then.mdx +74 -0
  229. package/.docs/raw/reference/workflows/until.mdx +165 -0
  230. package/.docs/raw/reference/workflows/watch.mdx +118 -0
  231. package/.docs/raw/reference/workflows/while.mdx +168 -0
  232. package/.docs/raw/reference/workflows/workflow.mdx +233 -0
  233. package/.docs/raw/storage/overview.mdx +378 -0
  234. package/.docs/raw/voice/overview.mdx +135 -0
  235. package/.docs/raw/voice/speech-to-text.mdx +45 -0
  236. package/.docs/raw/voice/text-to-speech.mdx +52 -0
  237. package/.docs/raw/voice/voice-to-voice.mdx +310 -0
  238. package/.docs/raw/workflows/control-flow.mdx +778 -0
  239. package/.docs/raw/workflows/dynamic-workflows.mdx +236 -0
  240. package/.docs/raw/workflows/error-handling.mdx +183 -0
  241. package/.docs/raw/workflows/nested-workflows.mdx +352 -0
  242. package/.docs/raw/workflows/overview.mdx +203 -0
  243. package/.docs/raw/workflows/steps.mdx +108 -0
  244. package/.docs/raw/workflows/suspend-and-resume.mdx +404 -0
  245. package/.docs/raw/workflows/variables.mdx +313 -0
  246. package/LICENSE.md +46 -0
  247. package/README.md +129 -0
  248. package/dist/_tsup-dts-rollup.d.ts +149 -0
  249. package/dist/chunk-QWYMT5LP.js +194 -0
  250. package/dist/prepare-docs/prepare.d.ts +1 -0
  251. package/dist/prepare-docs/prepare.js +1 -0
  252. package/dist/stdio.d.ts +1 -0
  253. package/dist/stdio.js +518 -0
  254. package/package.json +60 -0
@@ -0,0 +1,431 @@
1
+ ---
2
+ title: "Reference: OpenAI Realtime Voice | Voice Providers | Mastra Docs"
3
+ description: "Documentation for the OpenAIRealtimeVoice class, providing real-time text-to-speech and speech-to-text capabilities via WebSockets."
4
+ ---
5
+
6
+ # OpenAI Realtime Voice
7
+
8
+ The OpenAIRealtimeVoice class provides real-time voice interaction capabilities using OpenAI's WebSocket-based API. It supports real time speech to speech, voice activity detection, and event-based audio streaming.
9
+
10
+ ## Usage Example
11
+
12
+ ```typescript
13
+ import { OpenAIRealtimeVoice } from "@mastra/voice-openai-realtime";
14
+
15
+ // Initialize with default configuration using environment variables
16
+ const voice = new OpenAIRealtimeVoice();
17
+
18
+ // Or initialize with specific configuration
19
+ const voiceWithConfig = new OpenAIRealtimeVoice({
20
+ chatModel: {
21
+ apiKey: 'your-openai-api-key',
22
+ model: 'gpt-4o-mini-realtime-preview-2024-12-17',
23
+ options: {
24
+ sessionConfig: {
25
+ turn_detection: {
26
+ type: 'server_vad',
27
+ threshold: 0.6,
28
+ silence_duration_ms: 1200
29
+ }
30
+ }
31
+ }
32
+ },
33
+ speaker: 'alloy' // Default voice
34
+ });
35
+
36
+ // Establish connection
37
+ await voice.connect();
38
+
39
+ // Set up event listeners
40
+ voice.on('speaking', ({ audio }) => {
41
+ // Handle audio data (Int16Array) pcm format by default
42
+ playAudio(audio);
43
+ });
44
+
45
+ voice.on('writing', ({ text, role }) => {
46
+ // Handle transcribed text
47
+ console.log(`${role}: ${text}`);
48
+ });
49
+
50
+ // Convert text to speech
51
+ await voice.speak('Hello, how can I help you today?', {
52
+ speaker: 'echo' // Override default voice
53
+ });
54
+
55
+ // Process audio input
56
+ const microphoneStream = getMicrophoneStream();
57
+ await voice.send(microphoneStream);
58
+
59
+ // When done, disconnect
60
+ voice.connect();
61
+ ```
62
+
63
+ ## Configuration
64
+
65
+ ### Constructor Options
66
+
67
+ <PropertiesTable
68
+ content={[
69
+ {
70
+ name: "chatModel",
71
+ type: "object",
72
+ description: "Configuration for the OpenAI realtime model.",
73
+ isOptional: true,
74
+ defaultValue: "{}",
75
+ },
76
+ {
77
+ name: "speaker",
78
+ type: "string",
79
+ description: "Default voice ID for speech synthesis.",
80
+ isOptional: true,
81
+ defaultValue: "'alloy'",
82
+ },
83
+ ]}
84
+ />
85
+
86
+ ### chatModel
87
+
88
+ <PropertiesTable
89
+ content={[
90
+ {
91
+ name: "model",
92
+ type: "string",
93
+ description: "The model ID to use for real-time voice interactions.",
94
+ isOptional: true,
95
+ defaultValue: "'gpt-4o-mini-realtime-preview-2024-12-17'",
96
+ },
97
+ {
98
+ name: "apiKey",
99
+ type: "string",
100
+ description: "OpenAI API key. Falls back to OPENAI_API_KEY environment variable.",
101
+ isOptional: true,
102
+ },
103
+ {
104
+ name: "tools",
105
+ type: "ToolsInput",
106
+ description: "Tools configuration for extending model capabilities. When OpenAIRealtimeVoice is added to an Agent, any tools configured for the Agent will automatically be available to the voice interface.",
107
+ isOptional: true,
108
+ },
109
+ {
110
+ name: "options",
111
+ type: "object",
112
+ description: "Additional options for the realtime client.",
113
+ isOptional: true,
114
+ },
115
+ ]}
116
+ />
117
+
118
+ ### options
119
+
120
+ <PropertiesTable
121
+ content={[
122
+ {
123
+ name: "sessionConfig",
124
+ type: "Realtime.SessionConfig",
125
+ description: "Configuration for the realtime session.",
126
+ isOptional: true,
127
+ },
128
+ {
129
+ name: "url",
130
+ type: "string",
131
+ description: "Custom WebSocket URL.",
132
+ isOptional: true,
133
+ },
134
+ {
135
+ name: "dangerouslyAllowAPIKeyInBrowser",
136
+ type: "boolean",
137
+ description: "Whether to allow API key in browser environments.",
138
+ isOptional: true,
139
+ defaultValue: "false",
140
+ },
141
+ {
142
+ name: "debug",
143
+ type: "boolean",
144
+ description: "Enable debug logging.",
145
+ isOptional: true,
146
+ defaultValue: "false",
147
+ },
148
+ ]}
149
+ />
150
+
151
+ ### Voice Activity Detection (VAD) Configuration
152
+
153
+ <PropertiesTable
154
+ content={[
155
+ {
156
+ name: "type",
157
+ type: "string",
158
+ description: "Type of VAD to use. Server-side VAD provides better accuracy.",
159
+ isOptional: true,
160
+ defaultValue: "'server_vad'",
161
+ },
162
+ {
163
+ name: "threshold",
164
+ type: "number",
165
+ description: "Speech detection sensitivity (0.0-1.0).",
166
+ isOptional: true,
167
+ defaultValue: "0.5",
168
+ },
169
+ {
170
+ name: "prefix_padding_ms",
171
+ type: "number",
172
+ description: "Milliseconds of audio to include before speech is detected.",
173
+ isOptional: true,
174
+ defaultValue: "1000",
175
+ },
176
+ {
177
+ name: "silence_duration_ms",
178
+ type: "number",
179
+ description: "Milliseconds of silence before ending a turn.",
180
+ isOptional: true,
181
+ defaultValue: "1000",
182
+ },
183
+ ]}
184
+ />
185
+
186
+ ## Methods
187
+
188
+ ### connect()
189
+
190
+ Establishes a connection to the OpenAI realtime service. Must be called before using speak, listen, or send functions.
191
+
192
+ <PropertiesTable
193
+ content={[
194
+ {
195
+ name: "returns",
196
+ type: "Promise<void>",
197
+ description: "Promise that resolves when the connection is established.",
198
+ },
199
+ ]}
200
+ />
201
+
202
+ ### speak()
203
+
204
+ Emits a speaking event using the configured voice model. Can accept either a string or a readable stream as input.
205
+
206
+ <PropertiesTable
207
+ content={[
208
+ {
209
+ name: "input",
210
+ type: "string | NodeJS.ReadableStream",
211
+ description: "Text or text stream to convert to speech.",
212
+ isOptional: false,
213
+ },
214
+ {
215
+ name: "options.speaker",
216
+ type: "string",
217
+ description: "Voice ID to use for this specific speech request.",
218
+ isOptional: true,
219
+ defaultValue: "Constructor's speaker value",
220
+ },
221
+ ]}
222
+ />
223
+
224
+ Returns: `Promise<void>`
225
+
226
+ ### listen()
227
+
228
+ Processes audio input for speech recognition. Takes a readable stream of audio data and emits a 'listening' event with the transcribed text.
229
+
230
+ <PropertiesTable
231
+ content={[
232
+ {
233
+ name: "audioData",
234
+ type: "NodeJS.ReadableStream",
235
+ description: "Audio stream to transcribe.",
236
+ isOptional: false,
237
+ },
238
+ ]}
239
+ />
240
+
241
+ Returns: `Promise<void>`
242
+
243
+ ### send()
244
+
245
+ Streams audio data in real-time to the OpenAI service for continuous audio streaming scenarios like live microphone input.
246
+
247
+ <PropertiesTable
248
+ content={[
249
+ {
250
+ name: "audioData",
251
+ type: "NodeJS.ReadableStream",
252
+ description: "Audio stream to send to the service.",
253
+ isOptional: false,
254
+ },
255
+ ]}
256
+ />
257
+
258
+ Returns: `Promise<void>`
259
+
260
+ ### updateConfig()
261
+
262
+ Updates the session configuration for the voice instance. This can be used to modify voice settings, turn detection, and other parameters.
263
+
264
+ <PropertiesTable
265
+ content={[
266
+ {
267
+ name: "sessionConfig",
268
+ type: "Realtime.SessionConfig",
269
+ description: "New session configuration to apply.",
270
+ isOptional: false,
271
+ },
272
+ ]}
273
+ />
274
+
275
+ Returns: `void`
276
+
277
+ ### addTools()
278
+
279
+ Adds a set of tools to the voice instance. Tools allow the model to perform additional actions during conversations. When OpenAIRealtimeVoice is added to an Agent, any tools configured for the Agent will automatically be available to the voice interface.
280
+
281
+ <PropertiesTable
282
+ content={[
283
+ {
284
+ name: "tools",
285
+ type: "ToolsInput",
286
+ description: "Tools configuration to equip.",
287
+ isOptional: true,
288
+ },
289
+ ]}
290
+ />
291
+
292
+ Returns: `void`
293
+
294
+ ### close()
295
+
296
+ Disconnects from the OpenAI realtime session and cleans up resources. Should be called when you're done with the voice instance.
297
+
298
+ Returns: `void`
299
+
300
+ ### getSpeakers()
301
+
302
+ Returns a list of available voice speakers.
303
+
304
+ Returns: `Promise<Array<{ voiceId: string; [key: string]: any }>>`
305
+
306
+ ### on()
307
+
308
+ Registers an event listener for voice events.
309
+
310
+ <PropertiesTable
311
+ content={[
312
+ {
313
+ name: "event",
314
+ type: "string",
315
+ description: "Name of the event to listen for.",
316
+ isOptional: false,
317
+ },
318
+ {
319
+ name: "callback",
320
+ type: "Function",
321
+ description: "Function to call when the event occurs.",
322
+ isOptional: false,
323
+ },
324
+ ]}
325
+ />
326
+
327
+ Returns: `void`
328
+
329
+ ### off()
330
+
331
+ Removes a previously registered event listener.
332
+
333
+ <PropertiesTable
334
+ content={[
335
+ {
336
+ name: "event",
337
+ type: "string",
338
+ description: "Name of the event to stop listening to.",
339
+ isOptional: false,
340
+ },
341
+ {
342
+ name: "callback",
343
+ type: "Function",
344
+ description: "The specific callback function to remove.",
345
+ isOptional: false,
346
+ },
347
+ ]}
348
+ />
349
+
350
+ Returns: `void`
351
+
352
+ ## Events
353
+
354
+ The OpenAIRealtimeVoice class emits the following events:
355
+
356
+ <PropertiesTable
357
+ content={[
358
+ {
359
+ name: "speaking",
360
+ type: "event",
361
+ description: "Emitted when audio data is received from the model. Callback receives { audio: Int16Array }.",
362
+ },
363
+ {
364
+ name: "writing",
365
+ type: "event",
366
+ description: "Emitted when transcribed text is available. Callback receives { text: string, role: string }.",
367
+ },
368
+ {
369
+ name: "error",
370
+ type: "event",
371
+ description: "Emitted when an error occurs. Callback receives the error object.",
372
+ },
373
+ ]}
374
+ />
375
+
376
+ ### OpenAI Realtime Events
377
+
378
+ You can also listen to [OpenAI Realtime utility events](https://github.com/openai/openai-realtime-api-beta#reference-client-utility-events) by prefixing with 'openAIRealtime:':
379
+
380
+ <PropertiesTable
381
+ content={[
382
+ {
383
+ name: "openAIRealtime:conversation.created",
384
+ type: "event",
385
+ description: "Emitted when a new conversation is created.",
386
+ },
387
+ {
388
+ name: "openAIRealtime:conversation.interrupted",
389
+ type: "event",
390
+ description: "Emitted when a conversation is interrupted.",
391
+ },
392
+ {
393
+ name: "openAIRealtime:conversation.updated",
394
+ type: "event",
395
+ description: "Emitted when a conversation is updated.",
396
+ },
397
+ {
398
+ name: "openAIRealtime:conversation.item.appended",
399
+ type: "event",
400
+ description: "Emitted when an item is appended to the conversation.",
401
+ },
402
+ {
403
+ name: "openAIRealtime:conversation.item.completed",
404
+ type: "event",
405
+ description: "Emitted when an item in the conversation is completed.",
406
+ },
407
+ ]}
408
+ />
409
+
410
+ ## Available Voices
411
+
412
+ The following voice options are available:
413
+
414
+ - `alloy`: Neutral and balanced
415
+ - `ash`: Clear and precise
416
+ - `ballad`: Melodic and smooth
417
+ - `coral`: Warm and friendly
418
+ - `echo`: Resonant and deep
419
+ - `sage`: Calm and thoughtful
420
+ - `shimmer`: Bright and energetic
421
+ - `verse`: Versatile and expressive
422
+
423
+ ## Notes
424
+
425
+ - API keys can be provided via constructor options or the `OPENAI_API_KEY` environment variable
426
+ - The OpenAI Realtime Voice API uses WebSockets for real-time communication
427
+ - Server-side Voice Activity Detection (VAD) provides better accuracy for speech detection
428
+ - All audio data is processed as Int16Array format
429
+ - The voice instance must be connected with `connect()` before using other methods
430
+ - Always call `close()` when done to properly clean up resources
431
+ - Memory management is handled by OpenAI Realtime API
@@ -0,0 +1,168 @@
1
+ ---
2
+ title: "Reference: OpenAI Voice | Voice Providers | Mastra Docs"
3
+ description: "Documentation for the OpenAIVoice class, providing text-to-speech and speech-to-text capabilities."
4
+ ---
5
+
6
+ # OpenAI
7
+
8
+ The OpenAIVoice class in Mastra provides text-to-speech and speech-to-text capabilities using OpenAI's models.
9
+
10
+ ## Usage Example
11
+
12
+ ```typescript
13
+ import { OpenAIVoice } from '@mastra/voice-openai';
14
+
15
+ // Initialize with default configuration using environment variables
16
+ const voice = new OpenAIVoice();
17
+
18
+ // Or initialize with specific configuration
19
+ const voiceWithConfig = new OpenAIVoice({
20
+ speechModel: {
21
+ name: 'tts-1-hd',
22
+ apiKey: 'your-openai-api-key'
23
+ },
24
+ listeningModel: {
25
+ name: 'whisper-1',
26
+ apiKey: 'your-openai-api-key'
27
+ },
28
+ speaker: 'alloy' // Default voice
29
+ });
30
+
31
+ // Convert text to speech
32
+ const audioStream = await voice.speak('Hello, how can I help you?', {
33
+ speaker: 'nova', // Override default voice
34
+ speed: 1.2 // Adjust speech speed
35
+ });
36
+
37
+ // Convert speech to text
38
+ const text = await voice.listen(audioStream, {
39
+ filetype: 'mp3'
40
+ });
41
+ ```
42
+
43
+ ## Configuration
44
+
45
+ ### Constructor Options
46
+
47
+ <PropertiesTable
48
+ content={[
49
+ {
50
+ name: "speechModel",
51
+ type: "OpenAIConfig",
52
+ description: "Configuration for text-to-speech synthesis.",
53
+ isOptional: true,
54
+ defaultValue: "{ name: 'tts-1' }",
55
+ },
56
+ {
57
+ name: "listeningModel",
58
+ type: "OpenAIConfig",
59
+ description: "Configuration for speech-to-text recognition.",
60
+ isOptional: true,
61
+ defaultValue: "{ name: 'whisper-1' }",
62
+ },
63
+ {
64
+ name: "speaker",
65
+ type: "OpenAIVoiceId",
66
+ description: "Default voice ID for speech synthesis.",
67
+ isOptional: true,
68
+ defaultValue: "'alloy'",
69
+ },
70
+ ]}
71
+ />
72
+
73
+ ### OpenAIConfig
74
+
75
+ <PropertiesTable
76
+ content={[
77
+ {
78
+ name: "name",
79
+ type: "'tts-1' | 'tts-1-hd' | 'whisper-1'",
80
+ description: "Model name. Use 'tts-1-hd' for higher quality audio.",
81
+ isOptional: true,
82
+ },
83
+ {
84
+ name: "apiKey",
85
+ type: "string",
86
+ description: "OpenAI API key. Falls back to OPENAI_API_KEY environment variable.",
87
+ isOptional: true,
88
+ },
89
+ ]}
90
+ />
91
+
92
+ ## Methods
93
+
94
+ ### speak()
95
+
96
+ Converts text to speech using OpenAI's text-to-speech models.
97
+
98
+ <PropertiesTable
99
+ content={[
100
+ {
101
+ name: "input",
102
+ type: "string | NodeJS.ReadableStream",
103
+ description: "Text or text stream to convert to speech.",
104
+ isOptional: false,
105
+ },
106
+ {
107
+ name: "options.speaker",
108
+ type: "OpenAIVoiceId",
109
+ description: "Voice ID to use for speech synthesis.",
110
+ isOptional: true,
111
+ defaultValue: "Constructor's speaker value",
112
+ },
113
+ {
114
+ name: "options.speed",
115
+ type: "number",
116
+ description: "Speech speed multiplier.",
117
+ isOptional: true,
118
+ defaultValue: "1.0",
119
+ },
120
+ ]}
121
+ />
122
+
123
+ Returns: `Promise<NodeJS.ReadableStream>`
124
+
125
+ ### listen()
126
+
127
+ Transcribes audio using OpenAI's Whisper model.
128
+
129
+ <PropertiesTable
130
+ content={[
131
+ {
132
+ name: "audioStream",
133
+ type: "NodeJS.ReadableStream",
134
+ description: "Audio stream to transcribe.",
135
+ isOptional: false,
136
+ },
137
+ {
138
+ name: "options.filetype",
139
+ type: "string",
140
+ description: "Audio format of the input stream.",
141
+ isOptional: true,
142
+ defaultValue: "'mp3'",
143
+ },
144
+ ]}
145
+ />
146
+
147
+ Returns: `Promise<string>`
148
+
149
+ ### getSpeakers()
150
+
151
+ Returns an array of available voice options, where each node contains:
152
+
153
+ <PropertiesTable
154
+ content={[
155
+ {
156
+ name: "voiceId",
157
+ type: "string",
158
+ description: "Unique identifier for the voice",
159
+ isOptional: false,
160
+ },
161
+ ]}
162
+ />
163
+
164
+ ## Notes
165
+
166
+ - API keys can be provided via constructor options or the `OPENAI_API_KEY` environment variable
167
+ - The `tts-1-hd` model provides higher quality audio but may have slower processing times
168
+ - Speech recognition supports multiple audio formats including mp3, wav, and webm