@mastra/mcp-docs-server 1.0.0-beta.3 → 1.0.0-beta.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.docs/organized/changelogs/%40mastra%2Fai-sdk.md +30 -30
- package/.docs/organized/changelogs/%40mastra%2Fauth.md +6 -0
- package/.docs/organized/changelogs/%40mastra%2Fclient-js.md +12 -12
- package/.docs/organized/changelogs/%40mastra%2Fcore.md +45 -45
- package/.docs/organized/changelogs/%40mastra%2Fdeployer-cloud.md +9 -9
- package/.docs/organized/changelogs/%40mastra%2Fdeployer.md +9 -9
- package/.docs/organized/changelogs/%40mastra%2Fmcp-docs-server.md +8 -8
- package/.docs/organized/changelogs/%40mastra%2Fplayground-ui.md +12 -12
- package/.docs/organized/changelogs/%40mastra%2Freact.md +8 -8
- package/.docs/organized/changelogs/%40mastra%2Fserver.md +10 -10
- package/.docs/organized/changelogs/%40mastra%2Fvoice-google-gemini-live.md +10 -10
- package/.docs/organized/changelogs/create-mastra.md +7 -7
- package/.docs/organized/changelogs/mastra.md +11 -11
- package/.docs/raw/agents/adding-voice.mdx +49 -0
- package/.docs/raw/reference/agents/generate.mdx +11 -92
- package/.docs/raw/reference/agents/network.mdx +3 -85
- package/.docs/raw/reference/streaming/agents/stream.mdx +3 -92
- package/.docs/raw/reference/voice/composite-voice.mdx +71 -28
- package/.docs/raw/reference/voice/voice.listen.mdx +86 -52
- package/.docs/raw/reference/voice/voice.speak.mdx +75 -40
- package/.docs/raw/voice/overview.mdx +67 -0
- package/.docs/raw/workflows/overview.mdx +1 -1
- package/CHANGELOG.md +7 -0
- package/package.json +3 -3
|
@@ -3,6 +3,8 @@ title: "Reference: Agent.generate() | Agents"
|
|
|
3
3
|
description: "Documentation for the `Agent.generate()` method in Mastra agents, which enables non-streaming generation of responses with enhanced capabilities."
|
|
4
4
|
---
|
|
5
5
|
|
|
6
|
+
import { MODEL_SETTINGS_OBJECT } from "@site/src/components/ModelSettingsProperties";
|
|
7
|
+
|
|
6
8
|
# Agent.generate()
|
|
7
9
|
|
|
8
10
|
The `.generate()` method enables non-streaming response generation from an agent, with enhanced capabilities and flexible output formats. It accepts messages and optional generation options, supporting both Mastra’s native format and AI SDK v5 compatibility.
|
|
@@ -17,6 +19,14 @@ const mastraResult = await agent.generate("message for agent");
|
|
|
17
19
|
const aiSdkResult = await agent.generate("message for agent", {
|
|
18
20
|
format: "aisdk",
|
|
19
21
|
});
|
|
22
|
+
|
|
23
|
+
// With model settings (e.g., limiting output tokens)
|
|
24
|
+
const limitedResult = await agent.generate("Write a short poem about coding", {
|
|
25
|
+
modelSettings: {
|
|
26
|
+
maxOutputTokens: 50,
|
|
27
|
+
temperature: 0.7,
|
|
28
|
+
},
|
|
29
|
+
});
|
|
20
30
|
```
|
|
21
31
|
|
|
22
32
|
:::info
|
|
@@ -392,91 +402,7 @@ const aiSdkResult = await agent.generate("message for agent", {
|
|
|
392
402
|
},
|
|
393
403
|
],
|
|
394
404
|
},
|
|
395
|
-
|
|
396
|
-
name: "modelSettings",
|
|
397
|
-
type: "CallSettings",
|
|
398
|
-
isOptional: true,
|
|
399
|
-
description: "Model-specific settings like temperature, topP, etc.",
|
|
400
|
-
properties: [
|
|
401
|
-
{
|
|
402
|
-
parameters: [
|
|
403
|
-
{
|
|
404
|
-
name: "temperature",
|
|
405
|
-
type: "number",
|
|
406
|
-
isOptional: true,
|
|
407
|
-
description:
|
|
408
|
-
"Controls randomness in generation (0-2). Higher values make output more random.",
|
|
409
|
-
},
|
|
410
|
-
],
|
|
411
|
-
},
|
|
412
|
-
{
|
|
413
|
-
parameters: [
|
|
414
|
-
{
|
|
415
|
-
name: "maxRetries",
|
|
416
|
-
type: "number",
|
|
417
|
-
isOptional: true,
|
|
418
|
-
description:
|
|
419
|
-
"Maximum number of retry attempts for failed requests.",
|
|
420
|
-
},
|
|
421
|
-
],
|
|
422
|
-
},
|
|
423
|
-
{
|
|
424
|
-
parameters: [
|
|
425
|
-
{
|
|
426
|
-
name: "topP",
|
|
427
|
-
type: "number",
|
|
428
|
-
isOptional: true,
|
|
429
|
-
description:
|
|
430
|
-
"Nucleus sampling parameter (0-1). Controls diversity of generated text.",
|
|
431
|
-
},
|
|
432
|
-
],
|
|
433
|
-
},
|
|
434
|
-
{
|
|
435
|
-
parameters: [
|
|
436
|
-
{
|
|
437
|
-
name: "topK",
|
|
438
|
-
type: "number",
|
|
439
|
-
isOptional: true,
|
|
440
|
-
description:
|
|
441
|
-
"Top-k sampling parameter. Limits vocabulary to k most likely tokens.",
|
|
442
|
-
},
|
|
443
|
-
],
|
|
444
|
-
},
|
|
445
|
-
{
|
|
446
|
-
parameters: [
|
|
447
|
-
{
|
|
448
|
-
name: "presencePenalty",
|
|
449
|
-
type: "number",
|
|
450
|
-
isOptional: true,
|
|
451
|
-
description:
|
|
452
|
-
"Penalty for token presence (-2 to 2). Reduces repetition.",
|
|
453
|
-
},
|
|
454
|
-
],
|
|
455
|
-
},
|
|
456
|
-
{
|
|
457
|
-
parameters: [
|
|
458
|
-
{
|
|
459
|
-
name: "frequencyPenalty",
|
|
460
|
-
type: "number",
|
|
461
|
-
isOptional: true,
|
|
462
|
-
description:
|
|
463
|
-
"Penalty for token frequency (-2 to 2). Reduces repetition of frequent tokens.",
|
|
464
|
-
},
|
|
465
|
-
],
|
|
466
|
-
},
|
|
467
|
-
{
|
|
468
|
-
parameters: [
|
|
469
|
-
{
|
|
470
|
-
name: "stopSequences",
|
|
471
|
-
type: "string[]",
|
|
472
|
-
isOptional: true,
|
|
473
|
-
description:
|
|
474
|
-
"Array of strings that will stop generation when encountered.",
|
|
475
|
-
},
|
|
476
|
-
],
|
|
477
|
-
},
|
|
478
|
-
],
|
|
479
|
-
},
|
|
405
|
+
MODEL_SETTINGS_OBJECT,
|
|
480
406
|
{
|
|
481
407
|
name: "threadId",
|
|
482
408
|
type: "string",
|
|
@@ -652,13 +578,6 @@ const aiSdkResult = await agent.generate("message for agent", {
|
|
|
652
578
|
},
|
|
653
579
|
],
|
|
654
580
|
},
|
|
655
|
-
{
|
|
656
|
-
name: "maxTokens",
|
|
657
|
-
type: "number",
|
|
658
|
-
isOptional: true,
|
|
659
|
-
description:
|
|
660
|
-
"Conditions for stopping execution (e.g., step count, token limit).",
|
|
661
|
-
},
|
|
662
581
|
]}
|
|
663
582
|
/>
|
|
664
583
|
|
|
@@ -3,6 +3,8 @@ title: "Reference: Agent.network() | Agents"
|
|
|
3
3
|
description: "Documentation for the `Agent.network()` method in Mastra agents, which enables multi-agent collaboration and routing."
|
|
4
4
|
---
|
|
5
5
|
|
|
6
|
+
import { MODEL_SETTINGS_OBJECT } from "@site/src/components/ModelSettingsProperties";
|
|
7
|
+
|
|
6
8
|
# Agent.network()
|
|
7
9
|
|
|
8
10
|
:::caution Experimental Feature
|
|
@@ -208,91 +210,7 @@ await agent.network(`
|
|
|
208
210
|
},
|
|
209
211
|
],
|
|
210
212
|
},
|
|
211
|
-
|
|
212
|
-
name: "modelSettings",
|
|
213
|
-
type: "CallSettings",
|
|
214
|
-
isOptional: true,
|
|
215
|
-
description:
|
|
216
|
-
"Model-specific settings like temperature, maxTokens, topP, etc. These are passed to the underlying language model.",
|
|
217
|
-
properties: [
|
|
218
|
-
{
|
|
219
|
-
parameters: [
|
|
220
|
-
{
|
|
221
|
-
name: "temperature",
|
|
222
|
-
type: "number",
|
|
223
|
-
isOptional: true,
|
|
224
|
-
description:
|
|
225
|
-
"Controls randomness in the model's output. Higher values (e.g., 0.8) make the output more random, lower values (e.g., 0.2) make it more focused and deterministic.",
|
|
226
|
-
},
|
|
227
|
-
],
|
|
228
|
-
},
|
|
229
|
-
{
|
|
230
|
-
parameters: [
|
|
231
|
-
{
|
|
232
|
-
name: "maxRetries",
|
|
233
|
-
type: "number",
|
|
234
|
-
isOptional: true,
|
|
235
|
-
description: "Maximum number of retries for failed requests.",
|
|
236
|
-
},
|
|
237
|
-
],
|
|
238
|
-
},
|
|
239
|
-
{
|
|
240
|
-
parameters: [
|
|
241
|
-
{
|
|
242
|
-
name: "topP",
|
|
243
|
-
type: "number",
|
|
244
|
-
isOptional: true,
|
|
245
|
-
description:
|
|
246
|
-
"Nucleus sampling. This is a number between 0 and 1. It is recommended to set either temperature or topP, but not both.",
|
|
247
|
-
},
|
|
248
|
-
],
|
|
249
|
-
},
|
|
250
|
-
{
|
|
251
|
-
parameters: [
|
|
252
|
-
{
|
|
253
|
-
name: "topK",
|
|
254
|
-
type: "number",
|
|
255
|
-
isOptional: true,
|
|
256
|
-
description:
|
|
257
|
-
"Only sample from the top K options for each subsequent token. Used to remove 'long tail' low probability responses.",
|
|
258
|
-
},
|
|
259
|
-
],
|
|
260
|
-
},
|
|
261
|
-
{
|
|
262
|
-
parameters: [
|
|
263
|
-
{
|
|
264
|
-
name: "presencePenalty",
|
|
265
|
-
type: "number",
|
|
266
|
-
isOptional: true,
|
|
267
|
-
description:
|
|
268
|
-
"Presence penalty setting. It affects the likelihood of the model to repeat information that is already in the prompt. A number between -1 (increase repetition) and 1 (maximum penalty, decrease repetition).",
|
|
269
|
-
},
|
|
270
|
-
],
|
|
271
|
-
},
|
|
272
|
-
{
|
|
273
|
-
parameters: [
|
|
274
|
-
{
|
|
275
|
-
name: "frequencyPenalty",
|
|
276
|
-
type: "number",
|
|
277
|
-
isOptional: true,
|
|
278
|
-
description:
|
|
279
|
-
"Frequency penalty setting. It affects the likelihood of the model to repeatedly use the same words or phrases. A number between -1 (increase repetition) and 1 (maximum penalty, decrease repetition).",
|
|
280
|
-
},
|
|
281
|
-
],
|
|
282
|
-
},
|
|
283
|
-
{
|
|
284
|
-
parameters: [
|
|
285
|
-
{
|
|
286
|
-
name: "stopSequences",
|
|
287
|
-
type: "string[]",
|
|
288
|
-
isOptional: true,
|
|
289
|
-
description:
|
|
290
|
-
"Stop sequences. If set, the model will stop generating text when one of the stop sequences is generated.",
|
|
291
|
-
},
|
|
292
|
-
],
|
|
293
|
-
},
|
|
294
|
-
],
|
|
295
|
-
},
|
|
213
|
+
MODEL_SETTINGS_OBJECT,
|
|
296
214
|
{
|
|
297
215
|
name: "runId",
|
|
298
216
|
type: "string",
|
|
@@ -3,6 +3,8 @@ title: "Reference: Agent.stream() | Streaming"
|
|
|
3
3
|
description: "Documentation for the `Agent.stream()` method in Mastra agents, which enables real-time streaming of responses with enhanced capabilities."
|
|
4
4
|
---
|
|
5
5
|
|
|
6
|
+
import { MODEL_SETTINGS_OBJECT } from "@site/src/components/ModelSettingsProperties";
|
|
7
|
+
|
|
6
8
|
# Agent.stream()
|
|
7
9
|
|
|
8
10
|
The `.stream()` method enables real-time streaming of responses from an agent with enhanced capabilities and format flexibility. This method accepts messages and optional streaming options, providing a next-generation streaming experience with support for both Mastra's native format and AI SDK v5 compatibility.
|
|
@@ -396,91 +398,7 @@ const aiSdkStream = await agent.stream("message for agent", {
|
|
|
396
398
|
},
|
|
397
399
|
],
|
|
398
400
|
},
|
|
399
|
-
|
|
400
|
-
name: "modelSettings",
|
|
401
|
-
type: "CallSettings",
|
|
402
|
-
isOptional: true,
|
|
403
|
-
description:
|
|
404
|
-
"Model-specific settings like temperature, maxTokens, topP, etc. These are passed to the underlying language model.",
|
|
405
|
-
properties: [
|
|
406
|
-
{
|
|
407
|
-
parameters: [
|
|
408
|
-
{
|
|
409
|
-
name: "temperature",
|
|
410
|
-
type: "number",
|
|
411
|
-
isOptional: true,
|
|
412
|
-
description:
|
|
413
|
-
"Controls randomness in the model's output. Higher values (e.g., 0.8) make the output more random, lower values (e.g., 0.2) make it more focused and deterministic.",
|
|
414
|
-
},
|
|
415
|
-
],
|
|
416
|
-
},
|
|
417
|
-
{
|
|
418
|
-
parameters: [
|
|
419
|
-
{
|
|
420
|
-
name: "maxRetries",
|
|
421
|
-
type: "number",
|
|
422
|
-
isOptional: true,
|
|
423
|
-
description: "Maximum number of retries for failed requests.",
|
|
424
|
-
},
|
|
425
|
-
],
|
|
426
|
-
},
|
|
427
|
-
{
|
|
428
|
-
parameters: [
|
|
429
|
-
{
|
|
430
|
-
name: "topP",
|
|
431
|
-
type: "number",
|
|
432
|
-
isOptional: true,
|
|
433
|
-
description:
|
|
434
|
-
"Nucleus sampling. This is a number between 0 and 1. It is recommended to set either temperature or topP, but not both.",
|
|
435
|
-
},
|
|
436
|
-
],
|
|
437
|
-
},
|
|
438
|
-
{
|
|
439
|
-
parameters: [
|
|
440
|
-
{
|
|
441
|
-
name: "topK",
|
|
442
|
-
type: "number",
|
|
443
|
-
isOptional: true,
|
|
444
|
-
description:
|
|
445
|
-
"Only sample from the top K options for each subsequent token. Used to remove 'long tail' low probability responses.",
|
|
446
|
-
},
|
|
447
|
-
],
|
|
448
|
-
},
|
|
449
|
-
{
|
|
450
|
-
parameters: [
|
|
451
|
-
{
|
|
452
|
-
name: "presencePenalty",
|
|
453
|
-
type: "number",
|
|
454
|
-
isOptional: true,
|
|
455
|
-
description:
|
|
456
|
-
"Presence penalty setting. It affects the likelihood of the model to repeat information that is already in the prompt. A number between -1 (increase repetition) and 1 (maximum penalty, decrease repetition).",
|
|
457
|
-
},
|
|
458
|
-
],
|
|
459
|
-
},
|
|
460
|
-
{
|
|
461
|
-
parameters: [
|
|
462
|
-
{
|
|
463
|
-
name: "frequencyPenalty",
|
|
464
|
-
type: "number",
|
|
465
|
-
isOptional: true,
|
|
466
|
-
description:
|
|
467
|
-
"Frequency penalty setting. It affects the likelihood of the model to repeatedly use the same words or phrases. A number between -1 (increase repetition) and 1 (maximum penalty, decrease repetition).",
|
|
468
|
-
},
|
|
469
|
-
],
|
|
470
|
-
},
|
|
471
|
-
{
|
|
472
|
-
parameters: [
|
|
473
|
-
{
|
|
474
|
-
name: "stopSequences",
|
|
475
|
-
type: "string[]",
|
|
476
|
-
isOptional: true,
|
|
477
|
-
description:
|
|
478
|
-
"Stop sequences. If set, the model will stop generating text when one of the stop sequences is generated.",
|
|
479
|
-
},
|
|
480
|
-
],
|
|
481
|
-
},
|
|
482
|
-
],
|
|
483
|
-
},
|
|
401
|
+
MODEL_SETTINGS_OBJECT,
|
|
484
402
|
{
|
|
485
403
|
name: "threadId",
|
|
486
404
|
type: "string",
|
|
@@ -661,13 +579,6 @@ const aiSdkStream = await agent.stream("message for agent", {
|
|
|
661
579
|
},
|
|
662
580
|
],
|
|
663
581
|
},
|
|
664
|
-
{
|
|
665
|
-
name: "maxTokens",
|
|
666
|
-
type: "number",
|
|
667
|
-
isOptional: true,
|
|
668
|
-
description:
|
|
669
|
-
"Condition(s) that determine when to stop the agent's execution. Can be a single condition or array of conditions.",
|
|
670
|
-
},
|
|
671
582
|
]}
|
|
672
583
|
/>
|
|
673
584
|
|
|
@@ -7,31 +7,7 @@ description: "Documentation for the CompositeVoice class, which enables combinin
|
|
|
7
7
|
|
|
8
8
|
The CompositeVoice class allows you to combine different voice providers for text-to-speech and speech-to-text operations. This is particularly useful when you want to use the best provider for each operation - for example, using OpenAI for speech-to-text and PlayAI for text-to-speech.
|
|
9
9
|
|
|
10
|
-
CompositeVoice
|
|
11
|
-
|
|
12
|
-
## Usage Example
|
|
13
|
-
|
|
14
|
-
```typescript
|
|
15
|
-
import { CompositeVoice } from "@mastra/core/voice";
|
|
16
|
-
import { OpenAIVoice } from "@mastra/voice-openai";
|
|
17
|
-
import { PlayAIVoice } from "@mastra/voice-playai";
|
|
18
|
-
|
|
19
|
-
// Create voice providers
|
|
20
|
-
const openai = new OpenAIVoice();
|
|
21
|
-
const playai = new PlayAIVoice();
|
|
22
|
-
|
|
23
|
-
// Use OpenAI for listening (speech-to-text) and PlayAI for speaking (text-to-speech)
|
|
24
|
-
const voice = new CompositeVoice({
|
|
25
|
-
input: openai,
|
|
26
|
-
output: playai,
|
|
27
|
-
});
|
|
28
|
-
|
|
29
|
-
// Convert speech to text using OpenAI
|
|
30
|
-
const text = await voice.listen(audioStream);
|
|
31
|
-
|
|
32
|
-
// Convert text to speech using PlayAI
|
|
33
|
-
const audio = await voice.speak("Hello, world!");
|
|
34
|
-
```
|
|
10
|
+
CompositeVoice supports both Mastra voice providers and AI SDK model providers
|
|
35
11
|
|
|
36
12
|
## Constructor Parameters
|
|
37
13
|
|
|
@@ -45,14 +21,20 @@ const audio = await voice.speak("Hello, world!");
|
|
|
45
21
|
},
|
|
46
22
|
{
|
|
47
23
|
name: "config.input",
|
|
48
|
-
type: "MastraVoice",
|
|
49
|
-
description: "Voice provider to use for speech-to-text operations",
|
|
24
|
+
type: "MastraVoice | TranscriptionModel",
|
|
25
|
+
description: "Voice provider or AI SDK transcription model to use for speech-to-text operations. AI SDK models are automatically wrapped.",
|
|
50
26
|
isOptional: true,
|
|
51
27
|
},
|
|
52
28
|
{
|
|
53
29
|
name: "config.output",
|
|
30
|
+
type: "MastraVoice | SpeechModel",
|
|
31
|
+
description: "Voice provider or AI SDK speech model to use for text-to-speech operations. AI SDK models are automatically wrapped.",
|
|
32
|
+
isOptional: true,
|
|
33
|
+
},
|
|
34
|
+
{
|
|
35
|
+
name: "config.realtime",
|
|
54
36
|
type: "MastraVoice",
|
|
55
|
-
description: "Voice provider to use for
|
|
37
|
+
description: "Voice provider to use for real-time speech-to-speech operations",
|
|
56
38
|
isOptional: true,
|
|
57
39
|
},
|
|
58
40
|
]}
|
|
@@ -142,3 +124,64 @@ Notes:
|
|
|
142
124
|
- If no speaking provider is configured, returns an empty array
|
|
143
125
|
- Each voice object will have at least a voiceId property
|
|
144
126
|
- Additional voice properties depend on the speaking provider
|
|
127
|
+
|
|
128
|
+
## Usage Examples
|
|
129
|
+
|
|
130
|
+
### Using Mastra Voice Providers
|
|
131
|
+
|
|
132
|
+
```typescript
|
|
133
|
+
import { CompositeVoice } from "@mastra/core/voice";
|
|
134
|
+
import { OpenAIVoice } from "@mastra/voice-openai";
|
|
135
|
+
import { PlayAIVoice } from "@mastra/voice-playai";
|
|
136
|
+
|
|
137
|
+
// Create voice providers
|
|
138
|
+
const openai = new OpenAIVoice();
|
|
139
|
+
const playai = new PlayAIVoice();
|
|
140
|
+
|
|
141
|
+
// Use OpenAI for listening (speech-to-text) and PlayAI for speaking (text-to-speech)
|
|
142
|
+
const voice = new CompositeVoice({
|
|
143
|
+
input: openai,
|
|
144
|
+
output: playai,
|
|
145
|
+
});
|
|
146
|
+
|
|
147
|
+
// Convert speech to text using OpenAI
|
|
148
|
+
const text = await voice.listen(audioStream);
|
|
149
|
+
|
|
150
|
+
// Convert text to speech using PlayAI
|
|
151
|
+
const audio = await voice.speak("Hello, world!");
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
### Using AI SDK Model Providers
|
|
155
|
+
|
|
156
|
+
You can pass AI SDK transcription and speech models directly to CompositeVoice:
|
|
157
|
+
|
|
158
|
+
```typescript
|
|
159
|
+
import { CompositeVoice } from "@mastra/core/voice";
|
|
160
|
+
import { openai } from "@ai-sdk/openai";
|
|
161
|
+
import { elevenlabs } from "@ai-sdk/elevenlabs";
|
|
162
|
+
|
|
163
|
+
// Use AI SDK models directly - they will be auto-wrapped
|
|
164
|
+
const voice = new CompositeVoice({
|
|
165
|
+
input: openai.transcription('whisper-1'), // AI SDK transcription
|
|
166
|
+
output: elevenlabs.speech('eleven_turbo_v2'), // AI SDK speech
|
|
167
|
+
});
|
|
168
|
+
|
|
169
|
+
// Works the same way as with Mastra providers
|
|
170
|
+
const text = await voice.listen(audioStream);
|
|
171
|
+
const audio = await voice.speak("Hello from AI SDK!");
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
### Mix and Match
|
|
175
|
+
|
|
176
|
+
You can combine Mastra providers with AI SDK models:
|
|
177
|
+
|
|
178
|
+
```typescript
|
|
179
|
+
import { CompositeVoice } from "@mastra/core/voice";
|
|
180
|
+
import { PlayAIVoice } from "@mastra/voice-playai";
|
|
181
|
+
import { groq } from "@ai-sdk/groq";
|
|
182
|
+
|
|
183
|
+
const voice = new CompositeVoice({
|
|
184
|
+
input: groq.transcription('whisper-large-v3'), // AI SDK for STT
|
|
185
|
+
output: new PlayAIVoice(), // Mastra for TTS
|
|
186
|
+
});
|
|
187
|
+
```
|
|
@@ -7,41 +7,6 @@ description: "Documentation for the listen() method available in all Mastra voic
|
|
|
7
7
|
|
|
8
8
|
The `listen()` method is a core function available in all Mastra voice providers that converts speech to text. It takes an audio stream as input and returns the transcribed text.
|
|
9
9
|
|
|
10
|
-
## Usage Example
|
|
11
|
-
|
|
12
|
-
```typescript
|
|
13
|
-
import { OpenAIVoice } from "@mastra/voice-openai";
|
|
14
|
-
import { getMicrophoneStream } from "@mastra/node-audio";
|
|
15
|
-
import { createReadStream } from "fs";
|
|
16
|
-
import path from "path";
|
|
17
|
-
|
|
18
|
-
// Initialize a voice provider
|
|
19
|
-
const voice = new OpenAIVoice({
|
|
20
|
-
listeningModel: {
|
|
21
|
-
name: "whisper-1",
|
|
22
|
-
apiKey: process.env.OPENAI_API_KEY,
|
|
23
|
-
},
|
|
24
|
-
});
|
|
25
|
-
|
|
26
|
-
// Basic usage with a file stream
|
|
27
|
-
const audioFilePath = path.join(process.cwd(), "audio.mp3");
|
|
28
|
-
const audioStream = createReadStream(audioFilePath);
|
|
29
|
-
const transcript = await voice.listen(audioStream, {
|
|
30
|
-
filetype: "mp3",
|
|
31
|
-
});
|
|
32
|
-
console.log("Transcribed text:", transcript);
|
|
33
|
-
|
|
34
|
-
// Using a microphone stream
|
|
35
|
-
const microphoneStream = getMicrophoneStream(); // Assume this function gets audio input
|
|
36
|
-
const transcription = await voice.listen(microphoneStream);
|
|
37
|
-
|
|
38
|
-
// With provider-specific options
|
|
39
|
-
const transcriptWithOptions = await voice.listen(audioStream, {
|
|
40
|
-
language: "en",
|
|
41
|
-
prompt: "This is a conversation about artificial intelligence.",
|
|
42
|
-
});
|
|
43
|
-
```
|
|
44
|
-
|
|
45
10
|
## Parameters
|
|
46
11
|
|
|
47
12
|
<PropertiesTable
|
|
@@ -143,30 +108,42 @@ Each voice provider may support additional options specific to their implementat
|
|
|
143
108
|
]}
|
|
144
109
|
/>
|
|
145
110
|
|
|
146
|
-
##
|
|
147
|
-
|
|
148
|
-
When using realtime voice providers like `OpenAIRealtimeVoice`, the `listen()` method behaves differently:
|
|
149
|
-
|
|
150
|
-
- Instead of returning transcribed text, it emits 'writing' events with the transcribed text
|
|
151
|
-
- You need to register an event listener to receive the transcription
|
|
111
|
+
## Usage Example
|
|
152
112
|
|
|
153
113
|
```typescript
|
|
154
|
-
import {
|
|
114
|
+
import { OpenAIVoice } from "@mastra/voice-openai";
|
|
155
115
|
import { getMicrophoneStream } from "@mastra/node-audio";
|
|
116
|
+
import { createReadStream } from "fs";
|
|
117
|
+
import path from "path";
|
|
156
118
|
|
|
157
|
-
|
|
158
|
-
|
|
119
|
+
// Initialize a voice provider
|
|
120
|
+
const voice = new OpenAIVoice({
|
|
121
|
+
listeningModel: {
|
|
122
|
+
name: "whisper-1",
|
|
123
|
+
apiKey: process.env.OPENAI_API_KEY,
|
|
124
|
+
},
|
|
125
|
+
});
|
|
159
126
|
|
|
160
|
-
//
|
|
161
|
-
|
|
162
|
-
|
|
127
|
+
// Basic usage with a file stream
|
|
128
|
+
const audioFilePath = path.join(process.cwd(), "audio.mp3");
|
|
129
|
+
const audioStream = createReadStream(audioFilePath);
|
|
130
|
+
const transcript = await voice.listen(audioStream, {
|
|
131
|
+
filetype: "mp3",
|
|
163
132
|
});
|
|
133
|
+
console.log("Transcribed text:", transcript);
|
|
164
134
|
|
|
165
|
-
//
|
|
166
|
-
const microphoneStream = getMicrophoneStream();
|
|
167
|
-
await voice.listen(microphoneStream);
|
|
135
|
+
// Using a microphone stream
|
|
136
|
+
const microphoneStream = getMicrophoneStream(); // Assume this function gets audio input
|
|
137
|
+
const transcription = await voice.listen(microphoneStream);
|
|
138
|
+
|
|
139
|
+
// With provider-specific options
|
|
140
|
+
const transcriptWithOptions = await voice.listen(audioStream, {
|
|
141
|
+
language: "en",
|
|
142
|
+
prompt: "This is a conversation about artificial intelligence.",
|
|
143
|
+
});
|
|
168
144
|
```
|
|
169
145
|
|
|
146
|
+
|
|
170
147
|
## Using with CompositeVoice
|
|
171
148
|
|
|
172
149
|
When using `CompositeVoice`, the `listen()` method delegates to the configured listening provider:
|
|
@@ -177,14 +154,70 @@ import { OpenAIVoice } from "@mastra/voice-openai";
|
|
|
177
154
|
import { PlayAIVoice } from "@mastra/voice-playai";
|
|
178
155
|
|
|
179
156
|
const voice = new CompositeVoice({
|
|
180
|
-
|
|
181
|
-
|
|
157
|
+
input: new OpenAIVoice(),
|
|
158
|
+
output: new PlayAIVoice(),
|
|
182
159
|
});
|
|
183
160
|
|
|
184
161
|
// This will use the OpenAIVoice provider
|
|
185
162
|
const transcript = await voice.listen(audioStream);
|
|
186
163
|
```
|
|
187
164
|
|
|
165
|
+
### Using AI SDK Model Providers
|
|
166
|
+
|
|
167
|
+
You can also use AI SDK transcription models directly with `CompositeVoice`:
|
|
168
|
+
|
|
169
|
+
```typescript
|
|
170
|
+
import { CompositeVoice } from "@mastra/core/voice";
|
|
171
|
+
import { openai } from "@ai-sdk/openai";
|
|
172
|
+
import { groq } from "@ai-sdk/groq";
|
|
173
|
+
|
|
174
|
+
// Use AI SDK transcription models
|
|
175
|
+
const voice = new CompositeVoice({
|
|
176
|
+
input: openai.transcription('whisper-1'), // AI SDK model
|
|
177
|
+
output: new PlayAIVoice(), // Mastra provider
|
|
178
|
+
});
|
|
179
|
+
|
|
180
|
+
// Works the same way
|
|
181
|
+
const transcript = await voice.listen(audioStream);
|
|
182
|
+
|
|
183
|
+
// Provider-specific options can be passed through
|
|
184
|
+
const transcriptWithOptions = await voice.listen(audioStream, {
|
|
185
|
+
providerOptions: {
|
|
186
|
+
openai: {
|
|
187
|
+
language: 'en',
|
|
188
|
+
prompt: 'This is about AI',
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
});
|
|
192
|
+
```
|
|
193
|
+
|
|
194
|
+
See the [CompositeVoice reference](/reference/v1/voice/composite-voice) for more details on AI SDK integration.
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
## Realtime Voice Providers
|
|
198
|
+
|
|
199
|
+
When using realtime voice providers like `OpenAIRealtimeVoice`, the `listen()` method behaves differently:
|
|
200
|
+
|
|
201
|
+
- Instead of returning transcribed text, it emits 'writing' events with the transcribed text
|
|
202
|
+
- You need to register an event listener to receive the transcription
|
|
203
|
+
|
|
204
|
+
```typescript
|
|
205
|
+
import { OpenAIRealtimeVoice } from "@mastra/voice-openai-realtime";
|
|
206
|
+
import { getMicrophoneStream } from "@mastra/node-audio";
|
|
207
|
+
|
|
208
|
+
const voice = new OpenAIRealtimeVoice();
|
|
209
|
+
await voice.connect();
|
|
210
|
+
|
|
211
|
+
// Register event listener for transcription
|
|
212
|
+
voice.on("writing", ({ text, role }) => {
|
|
213
|
+
console.log(`${role}: ${text}`);
|
|
214
|
+
});
|
|
215
|
+
|
|
216
|
+
// This will emit 'writing' events instead of returning text
|
|
217
|
+
const microphoneStream = getMicrophoneStream();
|
|
218
|
+
await voice.listen(microphoneStream);
|
|
219
|
+
```
|
|
220
|
+
|
|
188
221
|
## Notes
|
|
189
222
|
|
|
190
223
|
- Not all voice providers support speech-to-text functionality (e.g., PlayAI, Speechify)
|
|
@@ -194,6 +227,7 @@ const transcript = await voice.listen(audioStream);
|
|
|
194
227
|
- Some providers support streaming transcription, where text is returned as it's transcribed
|
|
195
228
|
- For best performance, consider closing or ending the audio stream when you're done with it
|
|
196
229
|
|
|
230
|
+
|
|
197
231
|
## Related Methods
|
|
198
232
|
|
|
199
233
|
- [voice.speak()](./voice.speak) - Converts text to speech
|