@mastra/mcp-docs-server 0.0.3 → 0.0.4-alpha.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. package/.docs/organized/changelogs/%40mastra%2Fastra.md +20 -20
  2. package/.docs/organized/changelogs/%40mastra%2Fchroma.md +20 -20
  3. package/.docs/organized/changelogs/%40mastra%2Fclient-js.md +22 -22
  4. package/.docs/organized/changelogs/%40mastra%2Fcomposio.md +19 -19
  5. package/.docs/organized/changelogs/%40mastra%2Fcore.md +17 -17
  6. package/.docs/organized/changelogs/%40mastra%2Fdeployer-cloudflare.md +25 -25
  7. package/.docs/organized/changelogs/%40mastra%2Fdeployer-netlify.md +25 -25
  8. package/.docs/organized/changelogs/%40mastra%2Fdeployer-vercel.md +25 -25
  9. package/.docs/organized/changelogs/%40mastra%2Fdeployer.md +23 -23
  10. package/.docs/organized/changelogs/%40mastra%2Fevals.md +19 -19
  11. package/.docs/organized/changelogs/%40mastra%2Ffirecrawl.md +21 -21
  12. package/.docs/organized/changelogs/%40mastra%2Fgithub.md +19 -19
  13. package/.docs/organized/changelogs/%40mastra%2Floggers.md +19 -19
  14. package/.docs/organized/changelogs/%40mastra%2Fmcp-docs-server.md +18 -0
  15. package/.docs/organized/changelogs/%40mastra%2Fmcp.md +19 -19
  16. package/.docs/organized/changelogs/%40mastra%2Fmemory.md +19 -19
  17. package/.docs/organized/changelogs/%40mastra%2Fpg.md +19 -19
  18. package/.docs/organized/changelogs/%40mastra%2Fpinecone.md +19 -19
  19. package/.docs/organized/changelogs/%40mastra%2Fplayground-ui.md +26 -26
  20. package/.docs/organized/changelogs/%40mastra%2Fqdrant.md +20 -20
  21. package/.docs/organized/changelogs/%40mastra%2Frag.md +19 -19
  22. package/.docs/organized/changelogs/%40mastra%2Fragie.md +19 -19
  23. package/.docs/organized/changelogs/%40mastra%2Fspeech-azure.md +19 -19
  24. package/.docs/organized/changelogs/%40mastra%2Fspeech-deepgram.md +19 -19
  25. package/.docs/organized/changelogs/%40mastra%2Fspeech-elevenlabs.md +19 -19
  26. package/.docs/organized/changelogs/%40mastra%2Fspeech-google.md +19 -19
  27. package/.docs/organized/changelogs/%40mastra%2Fspeech-ibm.md +19 -19
  28. package/.docs/organized/changelogs/%40mastra%2Fspeech-murf.md +19 -19
  29. package/.docs/organized/changelogs/%40mastra%2Fspeech-openai.md +19 -19
  30. package/.docs/organized/changelogs/%40mastra%2Fspeech-playai.md +19 -19
  31. package/.docs/organized/changelogs/%40mastra%2Fspeech-replicate.md +19 -19
  32. package/.docs/organized/changelogs/%40mastra%2Fspeech-speechify.md +19 -19
  33. package/.docs/organized/changelogs/%40mastra%2Fstabilityai.md +19 -19
  34. package/.docs/organized/changelogs/%40mastra%2Fturbopuffer.md +18 -0
  35. package/.docs/organized/changelogs/%40mastra%2Fupstash.md +20 -20
  36. package/.docs/organized/changelogs/%40mastra%2Fvectorize.md +19 -19
  37. package/.docs/organized/changelogs/%40mastra%2Fvoice-deepgram.md +19 -19
  38. package/.docs/organized/changelogs/%40mastra%2Fvoice-elevenlabs.md +19 -19
  39. package/.docs/organized/changelogs/%40mastra%2Fvoice-google.md +19 -19
  40. package/.docs/organized/changelogs/%40mastra%2Fvoice-murf.md +19 -19
  41. package/.docs/organized/changelogs/%40mastra%2Fvoice-openai-realtime.md +18 -0
  42. package/.docs/organized/changelogs/%40mastra%2Fvoice-openai.md +19 -19
  43. package/.docs/organized/changelogs/%40mastra%2Fvoice-playai.md +19 -19
  44. package/.docs/organized/changelogs/%40mastra%2Fvoice-sarvam.md +19 -0
  45. package/.docs/organized/changelogs/%40mastra%2Fvoice-speechify.md +19 -19
  46. package/.docs/organized/changelogs/create-mastra.md +16 -16
  47. package/.docs/organized/changelogs/mastra.md +29 -29
  48. package/.docs/organized/code-examples/ai-sdk-useChat.md +2 -1
  49. package/.docs/raw/agents/02-adding-tools.mdx +6 -0
  50. package/.docs/raw/agents/02a-mcp-guide.mdx +192 -0
  51. package/.docs/raw/agents/03-adding-voice.mdx +8 -8
  52. package/.docs/raw/evals/00-overview.mdx +2 -2
  53. package/.docs/raw/evals/03-running-in-ci.mdx +7 -4
  54. package/.docs/raw/getting-started/mcp-docs-server.mdx +5 -2
  55. package/.docs/raw/guides/04-research-assistant.mdx +273 -0
  56. package/.docs/raw/local-dev/mastra-dev.mdx +2 -2
  57. package/.docs/raw/rag/overview.mdx +3 -3
  58. package/.docs/raw/rag/retrieval.mdx +7 -4
  59. package/.docs/raw/rag/vector-databases.mdx +107 -40
  60. package/.docs/raw/reference/client-js/workflows.mdx +1 -0
  61. package/.docs/raw/reference/rag/libsql.mdx +3 -3
  62. package/.docs/raw/reference/tools/client.mdx +1 -1
  63. package/.docs/raw/reference/tools/vector-query-tool.mdx +1 -1
  64. package/.docs/raw/reference/voice/sarvam.mdx +260 -0
  65. package/.docs/raw/reference/workflows/snapshots.mdx +204 -0
  66. package/.docs/raw/voice/overview.mdx +135 -0
  67. package/.docs/raw/voice/speech-to-text.mdx +45 -0
  68. package/.docs/raw/voice/text-to-speech.mdx +52 -0
  69. package/.docs/raw/voice/voice-to-voice.mdx +310 -0
  70. package/.docs/raw/workflows/dynamic-workflows.mdx +4 -0
  71. package/.docs/raw/workflows/steps.mdx +12 -2
  72. package/.docs/raw/workflows/suspend-and-resume.mdx +71 -1
  73. package/.docs/raw/workflows/variables.mdx +23 -3
  74. package/package.json +2 -2
@@ -0,0 +1,260 @@
1
+ ---
2
+ title: "Reference: Sarvam Voice | Voice Providers | Mastra Docs"
3
+ description: "Documentation for the Sarvam class, providing text-to-speech and speech-to-text capabilities."
4
+ ---
5
+
6
+ # Sarvam
7
+
8
+ The SarvamVoice class in Mastra provides text-to-speech and speech-to-text capabilities using Sarvam AI models.
9
+
10
+ ## Usage Example
11
+
12
+ ```typescript
13
+ import { SarvamVoice } from "@mastra/voice-sarvam";
14
+
15
+ // Initialize with default configuration using environment variables
16
+ const voice = new SarvamVoice();
17
+
18
+ // Or initialize with specific configuration
19
+ const voiceWithConfig = new SarvamVoice({
20
+ speechModel: {
21
+ model: "bulbul:v1",
22
+ apiKey: process.env.SARVAM_API_KEY!,
23
+ language: "en-IN",
24
+ properties: {
25
+ pitch: 0,
26
+ pace: 1.65,
27
+ loudness: 1.5,
28
+ speech_sample_rate: 8000,
29
+ enable_preprocessing: false,
30
+ eng_interpolation_wt: 123,
31
+ },
32
+ },
33
+ listeningModel: {
34
+ model: "saarika:v2",
35
+ apiKey: process.env.SARVAM_API_KEY!,
36
+ languageCode: "en-IN",
37
+ filetype?: 'wav';
38
+ },
39
+ speaker: "meera", // Default voice
40
+ });
41
+
42
+
43
+ // Convert text to speech
44
+ const audioStream = await voice.speak("Hello, how can I help you?");
45
+
46
+
47
+ // Convert speech to text
48
+ const text = await voice.listen(audioStream, {
49
+ filetype: "wav",
50
+ });
51
+ ```
52
+
53
+ ### Sarvam API Docs -
54
+
55
+ https://docs.sarvam.ai/api-reference-docs/endpoints/text-to-speech
56
+
57
+ ## Configuration
58
+
59
+ ### Constructor Options
60
+
61
+ <PropertiesTable
62
+ content={[
63
+ {
64
+ name: "speechModel",
65
+ type: "SarvamVoiceConfig",
66
+ description: "Configuration for text-to-speech synthesis.",
67
+ isOptional: true,
68
+ defaultValue: "{ model: 'bulbul:v1', language: 'en-IN' }",
69
+ },
70
+ {
71
+ name: "speaker",
72
+ type: "SarvamVoiceId",
73
+ description:
74
+ "The speaker to be used for the output audio. If not provided, Meera will be used as default. AvailableOptions - meera, pavithra, maitreyi, arvind, amol, amartya, diya, neel, misha, vian, arjun, maya",
75
+ isOptional: true,
76
+ defaultValue: "'meera'",
77
+ },
78
+ {
79
+ name: "listeningModel",
80
+ type: "SarvamListenOptions",
81
+ description: "Configuration for speech-to-text recognition.",
82
+ isOptional: true,
83
+ defaultValue: "{ model: 'saarika:v2', language_code: 'unknown' }",
84
+ },
85
+ ]}
86
+ />
87
+
88
+ ### SarvamVoiceConfig
89
+
90
+ <PropertiesTable
91
+ content={[
92
+ {
93
+ name: "apiKey",
94
+ type: "string",
95
+ description:
96
+ "Sarvam API key. Falls back to SARVAM_API_KEY environment variable.",
97
+ isOptional: true,
98
+ },
99
+ {
100
+ name: "model",
101
+ type: "SarvamTTSModel",
102
+ description: "Specifies the model to use for text-to-speech conversion.",
103
+ isOptional: true,
104
+ defaultValue: "'bulbul:v1'",
105
+ },
106
+ {
107
+ name: "language",
108
+ type: "SarvamTTSLanguage",
109
+ description:
110
+ "Target language for speech synthesis. Available options: hi-IN, bn-IN, kn-IN, ml-IN, mr-IN, od-IN, pa-IN, ta-IN, te-IN, en-IN, gu-IN",
111
+ isOptional: false,
112
+ defaultValue: "'en-IN'",
113
+ },
114
+ {
115
+ name: "properties",
116
+ type: "object",
117
+ description: "Additional voice properties for customization.",
118
+ isOptional: true,
119
+ },
120
+ {
121
+ name: "properties.pitch",
122
+ type: "number",
123
+ description:
124
+ "Controls the pitch of the audio. Lower values result in a deeper voice, while higher values make it sharper. The suitable range is between -0.75 and 0.75.",
125
+ isOptional: true,
126
+ },
127
+ {
128
+ name: "properties.pace",
129
+ type: "number",
130
+ description:
131
+ "Controls the speed of the audio. Lower values result in slower speech, while higher values make it faster. The suitable range is between 0.5 and 2.0. Default is 1.0. Required range: 0.3 <= x <= 3",
132
+ isOptional: true,
133
+ },
134
+ {
135
+ name: "properties.loudness",
136
+ type: "number",
137
+ description:
138
+ "Controls the loudness of the audio. Lower values result in quieter audio, while higher values make it louder. The suitable range is between 0.3 and 3.0. Required range: 0 <= x <= 3",
139
+ isOptional: true,
140
+ },
141
+ {
142
+ name: "properties.speech_sample_rate",
143
+ type: "8000 | 16000 | 22050",
144
+ description: "Audio sample rate in Hz.",
145
+ isOptional: true,
146
+ },
147
+ {
148
+ name: "properties.enable_preprocessing",
149
+ type: "boolean",
150
+ description:
151
+ "Controls whether normalization of English words and numeric entities (e.g., numbers, dates) is performed. Set to true for better handling of mixed-language text. Default is false.",
152
+ isOptional: true,
153
+ },
154
+ {
155
+ name: "properties.eng_interpolation_wt",
156
+ type: "number",
157
+ description: "Weight for interpolating with English speaker at encoder.",
158
+ isOptional: true,
159
+ },
160
+ ]}
161
+ />
162
+
163
+ ### SarvamListenOptions
164
+
165
+ <PropertiesTable
166
+ content={[
167
+ {
168
+ name: "apiKey",
169
+ type: "string",
170
+ description:
171
+ "Sarvam API key. Falls back to SARVAM_API_KEY environment variable.",
172
+ isOptional: true,
173
+ },
174
+ {
175
+ name: "model",
176
+ type: "SarvamSTTModel",
177
+ description:
178
+ "Specifies the model to use for speech-to-text conversion. Note:- Default model is saarika:v2 . Available options: saarika:v1, saarika:v2, saarika:flash ",
179
+ isOptional: true,
180
+ defaultValue: "'saarika:v2'",
181
+ },
182
+ {
183
+ name: "languageCode",
184
+ type: "SarvamSTTLanguage",
185
+ description:
186
+ "Specifies the language of the input audio. This parameter is required to ensure accurate transcription. For the saarika:v1 model, this parameter is mandatory. For the saarika:v2 model, it is optional. unknown: Use this when the language is not known; the API will detect it automatically. Note:- that the saarika:v1 model does not support unknown language code. Available options: unknown, hi-IN, bn-IN, kn-IN, ml-IN, mr-IN, od-IN, pa-IN, ta-IN, te-IN, en-IN, gu-IN ",
187
+ isOptional: true,
188
+ defaultValue: "'unknown'",
189
+ },
190
+ {
191
+ name: "filetype",
192
+ type: "'mp3' | 'wav'",
193
+ description: "Audio format of the input stream.",
194
+ isOptional: true,
195
+ },
196
+ ]}
197
+ />
198
+
199
+ ## Methods
200
+
201
+ ### speak()
202
+
203
+ Converts text to speech using Sarvam's text-to-speech models.
204
+
205
+ <PropertiesTable
206
+ content={[
207
+ {
208
+ name: "input",
209
+ type: "string | NodeJS.ReadableStream",
210
+ description: "Text or text stream to convert to speech.",
211
+ isOptional: false,
212
+ },
213
+ {
214
+ name: "options.speaker",
215
+ type: "SarvamVoiceId",
216
+ description: "Voice ID to use for speech synthesis.",
217
+ isOptional: true,
218
+ defaultValue: "Constructor's speaker value",
219
+ },
220
+ ]}
221
+ />
222
+
223
+ Returns: `Promise<NodeJS.ReadableStream>`
224
+
225
+ ### listen()
226
+
227
+ Transcribes audio using Sarvam's speech recognition models.
228
+
229
+ <PropertiesTable
230
+ content={[
231
+ {
232
+ name: "input",
233
+ type: "NodeJS.ReadableStream",
234
+ description: "Audio stream to transcribe.",
235
+ isOptional: false,
236
+ },
237
+ {
238
+ name: "options",
239
+ type: "SarvamListenOptions",
240
+ description: "Configuration options for speech recognition.",
241
+ isOptional: true,
242
+ },
243
+ ]}
244
+ />
245
+
246
+ Returns: `Promise<string>`
247
+
248
+ ### getSpeakers()
249
+
250
+ Returns an array of available voice options.
251
+
252
+ Returns: `Promise<Array<{voiceId: SarvamVoiceId}>>`
253
+
254
+ ## Notes
255
+
256
+ - API key can be provided via constructor options or the `SARVAM_API_KEY` environment variable
257
+ - If no API key is provided, the constructor will throw an error
258
+ - The service communicates with the Sarvam AI API at `https://api.sarvam.ai`
259
+ - Audio is returned as a stream containing binary audio data
260
+ - Speech recognition supports mp3 and wav audio formats
@@ -0,0 +1,204 @@
1
+ ---
2
+ title: "Reference: Snapshots | Workflow State Persistence | Mastra Docs"
3
+ description: "Technical reference on snapshots in Mastra - the serialized workflow state that enables suspend and resume functionality"
4
+ ---
5
+
6
+ # Snapshots
7
+
8
+ In Mastra, a snapshot is a serializable representation of a workflow's complete execution state at a specific point in time. Snapshots capture all the information needed to resume a workflow from exactly where it left off, including:
9
+
10
+ - The current state of each step in the workflow
11
+ - The outputs of completed steps
12
+ - The execution path taken through the workflow
13
+ - Any suspended steps and their metadata
14
+ - The remaining retry attempts for each step
15
+ - Additional contextual data needed to resume execution
16
+
17
+ Snapshots are automatically created and managed by Mastra whenever a workflow is suspended, and are persisted to the configured storage system.
18
+
19
+ ## The Role of Snapshots in Suspend and Resume
20
+
21
+ Snapshots are the key mechanism enabling Mastra's suspend and resume capabilities. When a workflow step calls `await suspend()`:
22
+
23
+ 1. The workflow execution is paused at that exact point
24
+ 2. The current state of the workflow is captured as a snapshot
25
+ 3. The snapshot is persisted to storage
26
+ 4. The workflow step is marked as "suspended" with a status of `'suspended'`
27
+ 5. Later, when `resume()` is called on the suspended step, the snapshot is retrieved
28
+ 6. The workflow execution resumes from exactly where it left off
29
+
30
+ This mechanism provides a powerful way to implement human-in-the-loop workflows, handle rate limiting, wait for external resources, and implement complex branching workflows that may need to pause for extended periods.
31
+
32
+ ## Snapshot Anatomy
33
+
34
+ A Mastra workflow snapshot consists of several key components:
35
+
36
+ ```typescript
37
+ export interface WorkflowRunState {
38
+ // Core state info
39
+ value: Record<string, string>; // Current state machine value
40
+ context: { // Workflow context
41
+ steps: Record<string, { // Step execution results
42
+ status: 'success' | 'failed' | 'suspended' | 'waiting' | 'skipped';
43
+ payload?: any; // Step-specific data
44
+ error?: string; // Error info if failed
45
+ }>;
46
+ triggerData: Record<string, any>; // Initial trigger data
47
+ attempts: Record<string, number>; // Remaining retry attempts
48
+ inputData: Record<string, any>; // Initial input data
49
+ };
50
+
51
+ activePaths: Array<{ // Currently active execution paths
52
+ stepPath: string[];
53
+ stepId: string;
54
+ status: string;
55
+ }>;
56
+
57
+ // Metadata
58
+ runId: string; // Unique run identifier
59
+ timestamp: number; // Time snapshot was created
60
+
61
+ // For nested workflows and suspended steps
62
+ childStates?: Record<string, WorkflowRunState>; // Child workflow states
63
+ suspendedSteps?: Record<string, string>; // Mapping of suspended steps
64
+ }
65
+ ```
66
+
67
+ ## How Snapshots Are Saved and Retrieved
68
+
69
+ Mastra persists snapshots to the configured storage system. By default, snapshots are saved to a LibSQL database, but can be configured to use other storage providers like Upstash.
70
+ The snapshots are stored in the `workflow_snapshots` table and identified uniquely by the `run_id` for the associated run when using libsql.
71
+ Utilizing a persistence layer allows for the snapshots to be persisted across workflow runs, allowing for advanced human-in-the-loop functionality.
72
+
73
+ Read more about [libsql storage](../storage/libsql.mdx) and [upstash storage](../storage/upstash.mdx) here.
74
+
75
+ ### Saving Snapshots
76
+
77
+ When a workflow is suspended, Mastra automatically persists the workflow snapshot with these steps:
78
+
79
+ 1. The `suspend()` function in a step execution triggers the snapshot process
80
+ 2. The `WorkflowInstance.suspend()` method records the suspended machine
81
+ 3. `persistWorkflowSnapshot()` is called to save the current state
82
+ 4. The snapshot is serialized and stored in the configured database in the `workflow_snapshots` table
83
+ 5. The storage record includes the workflow name, run ID, and the serialized snapshot
84
+
85
+
86
+ ### Retrieving Snapshots
87
+
88
+ When a workflow is resumed, Mastra retrieves the persisted snapshot with these steps:
89
+
90
+ 1. The `resume()` method is called with a specific step ID
91
+ 2. The snapshot is loaded from storage using `loadWorkflowSnapshot()`
92
+ 3. The snapshot is parsed and prepared for resumption
93
+ 4. The workflow execution is recreated with the snapshot state
94
+ 5. The suspended step is resumed, and execution continues
95
+
96
+ ## Storage Options for Snapshots
97
+
98
+ Mastra provides multiple storage options for persisting snapshots.
99
+
100
+ A `storage` instance is configured on the `Mastra` class, and is used to setup a snapshot persistence layer for all workflows registered on the `Mastra` instance.
101
+ This means that storage is shared across all workflows registered with the same `Mastra` instance.
102
+
103
+ ### LibSQL (Default)
104
+
105
+ The default storage option is LibSQL, a SQLite-compatible database:
106
+
107
+ ```typescript
108
+ import { Mastra } from '@mastra/core/mastra';
109
+ import { DefaultStorage } from '@mastra/core/storage/libsql';
110
+
111
+ const mastra = new Mastra({
112
+ storage: new DefaultStorage({
113
+ config: {
114
+ url: "file:storage.db", // Local file-based database
115
+ // For production:
116
+ // url: process.env.DATABASE_URL,
117
+ // authToken: process.env.DATABASE_AUTH_TOKEN,
118
+ }
119
+ }),
120
+ workflows: {
121
+ weatherWorkflow,
122
+ travelWorkflow,
123
+ }
124
+ });
125
+ ```
126
+
127
+ ### Upstash (Redis-Compatible)
128
+
129
+ For serverless environments:
130
+
131
+ ```typescript
132
+ import { Mastra } from '@mastra/core/mastra';
133
+ import { UpstashStore } from "@mastra/upstash";
134
+
135
+ const mastra = new Mastra({
136
+ storage: new UpstashStore({
137
+ url: process.env.UPSTASH_URL,
138
+ token: process.env.UPSTASH_TOKEN,
139
+ }),
140
+ workflows: {
141
+ weatherWorkflow,
142
+ travelWorkflow,
143
+ }
144
+ });
145
+ ```
146
+
147
+ ## Best Practices for Working with Snapshots
148
+
149
+ 1. **Ensure Serializability**: Any data that needs to be included in the snapshot must be serializable (convertible to JSON).
150
+
151
+ 2. **Minimize Snapshot Size**: Avoid storing large data objects directly in the workflow context. Instead, store references to them (like IDs) and retrieve the data when needed.
152
+
153
+ 3. **Handle Resume Context Carefully**: When resuming a workflow, carefully consider what context to provide. This will be merged with the existing snapshot data.
154
+
155
+ 4. **Set Up Proper Monitoring**: Implement monitoring for suspended workflows, especially long-running ones, to ensure they are properly resumed.
156
+
157
+ 5. **Consider Storage Scaling**: For applications with many suspended workflows, ensure your storage solution is appropriately scaled.
158
+
159
+ ## Advanced Snapshot Patterns
160
+
161
+ ### Custom Snapshot Metadata
162
+
163
+ When suspending a workflow, you can include custom metadata that can help when resuming:
164
+
165
+ ```typescript
166
+ await suspend({
167
+ reason: "Waiting for customer approval",
168
+ requiredApprovers: ["manager", "finance"],
169
+ requestedBy: currentUser,
170
+ urgency: "high",
171
+ expires: new Date(Date.now() + 7 * 24 * 60 * 60 * 1000)
172
+ });
173
+ ```
174
+
175
+ This metadata is stored with the snapshot and available when resuming.
176
+
177
+ ### Conditional Resumption
178
+
179
+ You can implement conditional logic based on the suspend payload when resuming:
180
+
181
+ ```typescript
182
+ run.watch(async ({ context, activePaths }) => {
183
+ for (const path of activePaths) {
184
+ const approvalStep = context.steps?.approval;
185
+ if (approvalStep?.status === 'suspended') {
186
+ const payload = approvalStep.suspendPayload;
187
+
188
+ if (payload.urgency === "high" && currentUser.role === "manager") {
189
+ await resume({
190
+ stepId: 'approval',
191
+ context: { approved: true, approver: currentUser.id },
192
+ });
193
+ }
194
+ }
195
+ }
196
+ });
197
+ ```
198
+
199
+ ## Related
200
+
201
+ - [Suspend Function Reference](./suspend.mdx)
202
+ - [Resume Function Reference](./resume.mdx)
203
+ - [Watch Function Reference](./watch.mdx)
204
+ - [Suspend and Resume Guide](../../workflows/suspend-and-resume.mdx)
@@ -0,0 +1,135 @@
1
+ ---
2
+ title: Voice in Mastra | Mastra Docs
3
+ description: Overview of voice capabilities in Mastra, including text-to-speech, speech-to-text, and real-time voice-to-voice interactions.
4
+ ---
5
+
6
+ # Voice in Mastra
7
+
8
+ Mastra's Voice system provides a unified interface for voice interactions, enabling text-to-speech (TTS), speech-to-text (STT), and real-time voice-to-voice capabilities in your applications.
9
+
10
+ ## Key Features
11
+
12
+ - Standardized API across different voice providers
13
+ - Support for multiple voice services
14
+ - Voice-to-voice interactions using events for continuous audio streaming
15
+ - Composable voice providers for mixing TTS and STT services
16
+
17
+ ## Adding Voice to Agents
18
+
19
+ To learn how to integrate voice capabilities into your agents, check out the [Adding Voice to Agents](../agents/03-adding-voice.mdx) documentation. This section covers how to use both single and multiple voice providers, as well as real-time interactions.
20
+
21
+
22
+ ## Example of Using a Single Voice Provider
23
+
24
+ ```typescript
25
+ import { OpenAIVoice } from "@mastra/voice-openai";
26
+
27
+ // Initialize OpenAI voice for TTS
28
+ const voice = new OpenAIVoice({
29
+ speechModel: {
30
+ name: "tts-1-hd", // Specify the TTS model
31
+ apiKey: process.env.OPENAI_API_KEY, // Your OpenAI API key
32
+ },
33
+ });
34
+
35
+ // Convert text to speech
36
+ const audioStream = await voice.speak("Hello! How can I assist you today?", {
37
+ speaker: "default", // Optional: specify a speaker
38
+ });
39
+
40
+ // Play the audio response
41
+ playAudio(audioStream);
42
+ ```
43
+
44
+ ## Example of Using Multiple Voice Providers
45
+ This example demonstrates how to create and use two different voice providers in Mastra: OpenAI for speech-to-text (STT) and PlayAI for text-to-speech (TTS).
46
+
47
+ Start by creating instances of the voice providers with any necessary configuration.
48
+
49
+ ```typescript
50
+ import { OpenAIVoice } from "@mastra/voice-openai";
51
+ import { PlayAIVoice } from "@mastra/voice-playai";
52
+ import { CompositeVoice } from "@mastra/core/voice";
53
+
54
+ // Initialize OpenAI voice for STT
55
+ const listeningProvider = new OpenAIVoice({
56
+ listeningModel: {
57
+ name: "whisper-1",
58
+ apiKey: process.env.OPENAI_API_KEY,
59
+ },
60
+ });
61
+
62
+ // Initialize PlayAI voice for TTS
63
+ const speakingProvider = new PlayAIVoice({
64
+ speechModel: {
65
+ name: "playai-voice",
66
+ apiKey: process.env.PLAYAI_API_KEY,
67
+ },
68
+ });
69
+
70
+ // Combine the providers using CompositeVoice
71
+ const voice = new CompositeVoice({
72
+ listeningProvider,
73
+ speakingProvider,
74
+ });
75
+
76
+ // Implement voice interactions using the combined voice provider
77
+ const audioStream = getMicrophoneStream(); // Assume this function gets audio input
78
+ const transcript = await voice.listen(audioStream);
79
+
80
+ // Log the transcribed text
81
+ console.log("Transcribed text:", transcript);
82
+
83
+ // Convert text to speech
84
+ const responseAudio = await voice.speak(`You said: ${transcript}`, {
85
+ speaker: "default", // Optional: specify a speaker
86
+ });
87
+
88
+ // Play the audio response
89
+ playAudio(responseAudio);
90
+ ```
91
+
92
+ ## Real-time Capabilities
93
+
94
+ Many voice providers support real-time speech-to-speech interactions through WebSocket connections, enabling:
95
+
96
+ - Live voice conversations with AI
97
+ - Streaming transcription
98
+ - Real-time text-to-speech synthesis
99
+ - Tool usage during conversations
100
+
101
+
102
+ ## Voice Configuration
103
+
104
+ Voice providers can be configured with different models and options:
105
+
106
+ ```typescript
107
+ const voice = new OpenAIVoice({
108
+ speechModel: {
109
+ name: "tts-1-hd",
110
+ apiKey: process.env.OPENAI_API_KEY
111
+ },
112
+ listeningModel: {
113
+ name: "whisper-1"
114
+ },
115
+ speaker: "alloy"
116
+ });
117
+ ```
118
+
119
+ ## Available Voice Providers
120
+
121
+ Mastra supports a variety of voice providers, including:
122
+
123
+ - OpenAI
124
+ - PlayAI
125
+ - Murf
126
+ - ElevenLabs
127
+ - [More](https://github.com/mastra-ai/mastra/tree/main/voice)
128
+
129
+ ## More Resources
130
+
131
+ - [CompositeVoice](../reference/voice/composite-voice.mdx)
132
+ - [MastraVoice](../reference/voice/mastra-voice.mdx)
133
+ - [OpenAI Voice](../reference/voice/openai.mdx)
134
+ - [PlayAI Voice](../reference/voice/playai.mdx)
135
+ - [Voice Examples](../../examples/voice/)
@@ -0,0 +1,45 @@
1
+ ---
2
+ title: Speech-to-Text (STT) in Mastra | Mastra Docs
3
+ description: Overview of Speech-to-Text capabilities in Mastra, including configuration, usage, and integration with voice providers.
4
+ ---
5
+
6
+ # Speech-to-Text (STT)
7
+
8
+ Speech-to-Text (STT) in Mastra provides a standardized interface for converting audio input into text across multiple service providers. This section covers STT configuration and usage. Check out the [Adding Voice to Agents](../agents/03-adding-voice.mdx) documentation to learn how to use STT in an agent.
9
+
10
+ ## Speech Configuration
11
+
12
+ To use STT in Mastra, you need to provide a `listeningModel` configuration when initializing the voice provider. This configuration includes parameters such as:
13
+
14
+ - **`name`**: The specific STT model to use.
15
+ - **`apiKey`**: Your API key for authentication.
16
+ - **Provider-specific options**: Additional options that may be required or supported by the specific voice provider.
17
+
18
+ **Note**: All of these parameters are optional. You can use the default settings provided by the voice provider, which will depend on the specific provider you are using.
19
+
20
+ ### Example Configuration
21
+
22
+ ```typescript
23
+ const voice = new OpenAIVoice({
24
+ listeningModel: {
25
+ name: "whisper-1",
26
+ apiKey: process.env.OPENAI_API_KEY,
27
+ },
28
+ });
29
+
30
+ // If using default settings the configuration can be simplified to:
31
+ const voice = new OpenAIVoice();
32
+ ```
33
+
34
+ ## Using the Listen Method
35
+
36
+ The primary method for STT is the `listen()` method, which converts spoken audio into text. Here's how to use it:
37
+
38
+ ```typescript
39
+ const audioStream = getMicrophoneStream(); // Assume this function gets audio input
40
+ const transcript = await voice.listen(audioStream, {
41
+ filetype: "m4a", // Optional: specify the audio file type
42
+ });
43
+ ```
44
+
45
+ **Note**: If you are using a voice-to-voice provider, such as `OpenAIRealtimeVoice`, the `listen()` method will emit a "writing" event instead of returning a transcript directly.
@@ -0,0 +1,52 @@
1
+ ---
2
+ title: Text-to-Speech (TTS) in Mastra | Mastra Docs
3
+ description: Overview of Text-to-Speech capabilities in Mastra, including configuration, usage, and integration with voice providers.
4
+ ---
5
+
6
+ # Text-to-Speech (TTS)
7
+
8
+ Text-to-Speech (TTS) in Mastra offers a unified API for synthesizing spoken audio from text using various provider services. This section explains TTS configuration options and implementation methods. For integrating TTS capabilities with agents, refer to the [Adding Voice to Agents](../agents/03-adding-voice.mdx) documentation.
9
+
10
+ ## Speech Configuration
11
+
12
+ To use TTS in Mastra, you need to provide a `speechModel` configuration when initializing the voice provider. This configuration includes parameters such as:
13
+
14
+ - **`name`**: The specific TTS model to use.
15
+ - **`apiKey`**: Your API key for authentication.
16
+ - **Provider-specific options**: Additional options that may be required or supported by the specific voice provider.
17
+
18
+ The **`speaker`** option is specified separately and allows you to select different voices for speech synthesis.
19
+
20
+ **Note**: All of these parameters are optional. You can use the default settings provided by the voice provider, which will depend on the specific provider you are using.
21
+
22
+ ### Example Configuration
23
+
24
+ ```typescript
25
+ const voice = new OpenAIVoice({
26
+ speechModel: {
27
+ name: "tts-1-hd",
28
+ apiKey: process.env.OPENAI_API_KEY
29
+ },
30
+ speaker: "alloy",
31
+ });
32
+
33
+ // If using default settings the configuration can be simplified to:
34
+ const voice = new OpenAIVoice();
35
+ ```
36
+
37
+ ## Using the Speak Method
38
+
39
+ The primary method for TTS is the `speak()` method, which converts text to speech. This method can accept options that allows you to specify the speaker and other provider-specific options. Here's how to use it:
40
+
41
+ ```typescript
42
+ const readableStream = await voice.speak("Hello, world!", {
43
+ speaker: "default", // Optional: specify a speaker
44
+ properties: {
45
+ speed: 1.0, // Optional: adjust speech speed
46
+ pitch: "default", // Optional: specify pitch if supported
47
+ },
48
+ });
49
+ ```
50
+
51
+ **Note**: If you are using a voice-to-voice provider, such as `OpenAIRealtimeVoice`, the `speak()` method will emit a "speaking" event instead of returning an Readable Stream.
52
+