@mastra/mcp-docs-server 1.1.25-alpha.7 → 1.1.25
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.docs/docs/agents/processors.md +33 -0
- package/.docs/docs/mcp/overview.md +17 -0
- package/.docs/docs/memory/message-history.md +6 -0
- package/.docs/docs/memory/observational-memory.md +6 -0
- package/.docs/docs/voice/overview.md +8 -5
- package/.docs/guides/build-your-ui/ai-sdk-ui.md +32 -0
- package/.docs/guides/deployment/mastra-platform.md +2 -0
- package/.docs/models/gateways/openrouter.md +2 -8
- package/.docs/models/gateways/vercel.md +1 -1
- package/.docs/models/index.md +1 -1
- package/.docs/models/providers/anthropic.md +12 -2
- package/.docs/models/providers/fireworks-ai.md +2 -1
- package/.docs/models/providers/google.md +5 -1
- package/.docs/models/providers/inception.md +9 -11
- package/.docs/models/providers/kilo.md +2 -2
- package/.docs/models/providers/nano-gpt.md +1 -2
- package/.docs/models/providers/nvidia.md +2 -1
- package/.docs/models/providers/openai.md +4 -0
- package/.docs/models/providers/opencode-go.md +4 -2
- package/.docs/models/providers/opencode.md +4 -2
- package/.docs/models/providers/poe.md +5 -16
- package/.docs/models/providers/xai.md +4 -0
- package/.docs/reference/ai-sdk/handle-chat-stream.md +17 -0
- package/.docs/reference/ai-sdk/to-ai-sdk-stream.md +15 -0
- package/.docs/reference/cli/mastra.md +100 -1
- package/.docs/reference/index.md +1 -0
- package/.docs/reference/processors/prefill-error-handler.md +70 -0
- package/.docs/reference/processors/processor-interface.md +110 -12
- package/.docs/reference/tools/create-tool.md +30 -0
- package/.docs/reference/tools/mcp-client.md +45 -0
- package/.docs/reference/voice/sarvam.md +29 -23
- package/CHANGELOG.md +16 -0
- package/package.json +6 -6
|
@@ -11,6 +11,8 @@ You can use individual [`Processor`](https://mastra.ai/reference/processors/proc
|
|
|
11
11
|
|
|
12
12
|
Some processors implement both input and output logic and can be used in either array depending on where the transformation should occur.
|
|
13
13
|
|
|
14
|
+
Some built-in processors also persist hidden system reminder messages using `<system-reminder>...</system-reminder>` text plus `metadata.systemReminder`. These reminders stay available in raw memory history and retry/prompt reconstruction paths, but standard UI-facing message conversions and default memory recall hide them unless you explicitly opt in.
|
|
15
|
+
|
|
14
16
|
## When to use processors
|
|
15
17
|
|
|
16
18
|
Use processors to:
|
|
@@ -536,6 +538,37 @@ The retry mechanism:
|
|
|
536
538
|
- Tracks retry count via the `retryCount` parameter
|
|
537
539
|
- Respects `maxProcessorRetries` limit on the agent
|
|
538
540
|
|
|
541
|
+
## API error handling
|
|
542
|
+
|
|
543
|
+
The `processAPIError` method handles LLM API rejections — errors where the API rejects the request (such as 400 or 422 status codes) rather than network or server failures. This lets you modify the request and retry when the API rejects the message format.
|
|
544
|
+
|
|
545
|
+
```typescript
|
|
546
|
+
import { APICallError } from '@ai-sdk/provider'
|
|
547
|
+
import type { Processor, ProcessAPIErrorArgs, ProcessAPIErrorResult } from '@mastra/core/processors'
|
|
548
|
+
|
|
549
|
+
export class ContextLengthHandler implements Processor {
|
|
550
|
+
id = 'context-length-handler'
|
|
551
|
+
|
|
552
|
+
processAPIError({
|
|
553
|
+
error,
|
|
554
|
+
messageList,
|
|
555
|
+
retryCount,
|
|
556
|
+
}: ProcessAPIErrorArgs): ProcessAPIErrorResult | void {
|
|
557
|
+
if (retryCount > 0) return
|
|
558
|
+
|
|
559
|
+
if (APICallError.isInstance(error) && error.message.includes('context length exceeded')) {
|
|
560
|
+
const messages = messageList.get.all.db()
|
|
561
|
+
if (messages.length > 4) {
|
|
562
|
+
messageList.removeByIds([messages[1]!.id, messages[2]!.id])
|
|
563
|
+
return { retry: true }
|
|
564
|
+
}
|
|
565
|
+
}
|
|
566
|
+
}
|
|
567
|
+
}
|
|
568
|
+
```
|
|
569
|
+
|
|
570
|
+
Mastra includes a built-in [`PrefillErrorHandler`](https://mastra.ai/reference/processors/prefill-error-handler) that automatically handles the Anthropic "assistant message prefill" error. This processor is auto-injected and requires no configuration.
|
|
571
|
+
|
|
539
572
|
## Related documentation
|
|
540
573
|
|
|
541
574
|
- [Guardrails](https://mastra.ai/docs/agents/guardrails): Security and validation processors
|
|
@@ -89,6 +89,23 @@ export const testAgent = new Agent({
|
|
|
89
89
|
|
|
90
90
|
> **Info:** Visit [Agent Class](https://mastra.ai/reference/agents/agent) for a full list of configuration options.
|
|
91
91
|
|
|
92
|
+
## Tool approval
|
|
93
|
+
|
|
94
|
+
You can require human approval before MCP tools are executed by setting `requireToolApproval` on a server definition. This integrates with the existing [human-in-the-loop](https://mastra.ai/docs/workflows/human-in-the-loop) approval flow.
|
|
95
|
+
|
|
96
|
+
```typescript
|
|
97
|
+
export const mcp = new MCPClient({
|
|
98
|
+
servers: {
|
|
99
|
+
github: {
|
|
100
|
+
url: new URL('http://localhost:3000/mcp'),
|
|
101
|
+
requireToolApproval: true,
|
|
102
|
+
},
|
|
103
|
+
},
|
|
104
|
+
})
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
You can also pass a function to decide dynamically per-call. See the [MCPClient reference](https://mastra.ai/reference/tools/mcp-client) for the full API.
|
|
108
|
+
|
|
92
109
|
## Configuring `MCPServer`
|
|
93
110
|
|
|
94
111
|
To expose agents, tools, and workflows from your Mastra application to external systems over HTTP(S) use the `MCPServer` class. This makes them accessible to any system or agent that supports the protocol.
|
|
@@ -6,6 +6,12 @@ You can also retrieve message history to display past conversations in your UI.
|
|
|
6
6
|
|
|
7
7
|
> **Info:** Each message belongs to a thread (the conversation) and a resource (the user or entity it's associated with). See [Threads and resources](https://mastra.ai/docs/memory/storage) for more detail.
|
|
8
8
|
|
|
9
|
+
> **Warning:** When you use memory with a client application, send **only the new message** from the client instead of the full conversation history.
|
|
10
|
+
>
|
|
11
|
+
> Sending the full history is redundant because Mastra loads messages from storage, and it can cause message ordering bugs when client-side timestamps conflict with stored timestamps.
|
|
12
|
+
>
|
|
13
|
+
> For an AI SDK example, see [Using Mastra Memory](https://mastra.ai/guides/build-your-ui/ai-sdk-ui).
|
|
14
|
+
|
|
9
15
|
## Getting started
|
|
10
16
|
|
|
11
17
|
Install the Mastra memory module along with a [storage adapter](https://mastra.ai/docs/memory/storage) for your database. The examples below use `@mastra/libsql`, which stores data locally in a `mastra.db` file.
|
|
@@ -38,6 +38,12 @@ const memory = new Memory({
|
|
|
38
38
|
|
|
39
39
|
See [configuration options](https://mastra.ai/reference/memory/observational-memory) for full API details.
|
|
40
40
|
|
|
41
|
+
> **Warning:** When you use OM with a client application, send **only the new message** from the client instead of the full conversation history.
|
|
42
|
+
>
|
|
43
|
+
> Observational memory still relies on stored conversation history. Sending the full history is redundant and can cause message ordering bugs when client-side timestamps conflict with stored timestamps.
|
|
44
|
+
>
|
|
45
|
+
> For an AI SDK example, see [Using Mastra Memory](https://mastra.ai/guides/build-your-ui/ai-sdk-ui).
|
|
46
|
+
|
|
41
47
|
> **Note:** OM currently only supports `@mastra/pg`, `@mastra/libsql`, and `@mastra/mongodb` storage adapters. It uses background agents for managing memory. When using `observationalMemory: true`, the default model is `google/gemini-2.5-flash`. When passing a config object, a `model` must be explicitly set.
|
|
42
48
|
|
|
43
49
|
## Benefits
|
|
@@ -265,7 +265,7 @@ const { text } = await voiceAgent.generate('What color is the sky?')
|
|
|
265
265
|
|
|
266
266
|
// Convert text to speech to an Audio Stream
|
|
267
267
|
const audioStream = await voiceAgent.voice.speak(text, {
|
|
268
|
-
speaker: '
|
|
268
|
+
speaker: 'shubh', // Optional: specify a bulbul:v3 speaker
|
|
269
269
|
})
|
|
270
270
|
|
|
271
271
|
playAudio(audioStream)
|
|
@@ -760,12 +760,15 @@ Visit the [Speechify Voice Reference](https://mastra.ai/reference/voice/speechif
|
|
|
760
760
|
// Sarvam Voice Configuration
|
|
761
761
|
const voice = new SarvamVoice({
|
|
762
762
|
speechModel: {
|
|
763
|
-
|
|
763
|
+
model: 'bulbul:v3', // TTS model (bulbul:v2 or bulbul:v3)
|
|
764
|
+
apiKey: process.env.SARVAM_API_KEY,
|
|
765
|
+
language: 'en-IN', // BCP-47 language code
|
|
766
|
+
},
|
|
767
|
+
listeningModel: {
|
|
768
|
+
model: 'saarika:v2.5', // STT model (saarika:v2.5 or saaras:v3)
|
|
764
769
|
apiKey: process.env.SARVAM_API_KEY,
|
|
765
|
-
language: 'en-IN', // Language code
|
|
766
|
-
style: 'conversational', // Style setting
|
|
767
770
|
},
|
|
768
|
-
|
|
771
|
+
speaker: 'shubh', // Default bulbul:v3 speaker
|
|
769
772
|
})
|
|
770
773
|
```
|
|
771
774
|
|
|
@@ -238,6 +238,38 @@ export default function Chat() {
|
|
|
238
238
|
|
|
239
239
|
Use [`prepareSendMessagesRequest`](https://ai-sdk.dev/docs/reference/ai-sdk-ui/use-chat#transport.default-chat-transport.prepare-send-messages-request) to customize the request sent to the chat route, for example to pass additional configuration to the agent.
|
|
240
240
|
|
|
241
|
+
## Using Mastra Memory
|
|
242
|
+
|
|
243
|
+
When your agent has [memory](https://mastra.ai/docs/memory/overview) configured, Mastra loads conversation history from storage on the server. Send only the new message from the client instead of the full conversation history.
|
|
244
|
+
|
|
245
|
+
Sending the full history is redundant and can cause message ordering bugs because client-side timestamps can conflict with the timestamps stored in your database.
|
|
246
|
+
|
|
247
|
+
```typescript
|
|
248
|
+
import { useChat } from '@ai-sdk/react'
|
|
249
|
+
import { DefaultChatTransport } from 'ai'
|
|
250
|
+
|
|
251
|
+
const { messages, sendMessage } = useChat({
|
|
252
|
+
transport: new DefaultChatTransport({
|
|
253
|
+
api: 'http://localhost:4111/chat/weatherAgent',
|
|
254
|
+
prepareSendMessagesRequest({ messages }) {
|
|
255
|
+
return {
|
|
256
|
+
body: {
|
|
257
|
+
messages: [messages[messages.length - 1]],
|
|
258
|
+
threadId: 'user-thread-123',
|
|
259
|
+
resourceId: 'user-123',
|
|
260
|
+
},
|
|
261
|
+
}
|
|
262
|
+
},
|
|
263
|
+
}),
|
|
264
|
+
})
|
|
265
|
+
```
|
|
266
|
+
|
|
267
|
+
Set `threadId` and `resourceId` from your app's own state, such as URL params, auth context, or your database.
|
|
268
|
+
|
|
269
|
+
See [Message history](https://mastra.ai/docs/memory/message-history) for more on how Mastra memory loads and stores messages.
|
|
270
|
+
|
|
271
|
+
[`chatRoute()`](https://mastra.ai/reference/ai-sdk/chat-route) and [`handleChatStream()`](https://mastra.ai/reference/ai-sdk/handle-chat-stream) already work with memory. Configure the client to send only the new message and include the thread and resource identifiers.
|
|
272
|
+
|
|
241
273
|
### `useCompletion()`
|
|
242
274
|
|
|
243
275
|
The `useCompletion()` hook handles single-turn completions between your frontend and a Mastra agent, allowing you to send a prompt and receive a streamed response over HTTP.
|
|
@@ -182,6 +182,8 @@ The CLI reads `organizationId` and `projectId` from `.mastra-project.json` by de
|
|
|
182
182
|
## Related
|
|
183
183
|
|
|
184
184
|
- [CLI reference: `mastra server deploy`](https://mastra.ai/reference/cli/mastra)
|
|
185
|
+
- [CLI reference: `mastra server pause`](https://mastra.ai/reference/cli/mastra)
|
|
186
|
+
- [CLI reference: `mastra server restart`](https://mastra.ai/reference/cli/mastra)
|
|
185
187
|
- [CLI reference: `mastra studio deploy`](https://mastra.ai/reference/cli/mastra)
|
|
186
188
|
- [CLI reference: `mastra auth tokens`](https://mastra.ai/reference/cli/mastra)
|
|
187
189
|
- [Mastra platform overview](https://mastra.ai/docs/mastra-platform/overview)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# OpenRouter
|
|
2
2
|
|
|
3
|
-
OpenRouter aggregates models from multiple providers with enhanced features like rate limiting and failover. Access
|
|
3
|
+
OpenRouter aggregates models from multiple providers with enhanced features like rate limiting and failover. Access 170 models through Mastra's model router.
|
|
4
4
|
|
|
5
5
|
Learn more in the [OpenRouter documentation](https://openrouter.ai/models).
|
|
6
6
|
|
|
@@ -46,7 +46,6 @@ ANTHROPIC_API_KEY=ant-...
|
|
|
46
46
|
| `anthropic/claude-sonnet-4.6` |
|
|
47
47
|
| `arcee-ai/trinity-large-preview:free` |
|
|
48
48
|
| `arcee-ai/trinity-large-thinking` |
|
|
49
|
-
| `arcee-ai/trinity-mini:free` |
|
|
50
49
|
| `black-forest-labs/flux.2-flex` |
|
|
51
50
|
| `black-forest-labs/flux.2-klein-4b` |
|
|
52
51
|
| `black-forest-labs/flux.2-max` |
|
|
@@ -88,9 +87,8 @@ ANTHROPIC_API_KEY=ant-...
|
|
|
88
87
|
| `google/gemma-4-26b-a4b-it:free` |
|
|
89
88
|
| `google/gemma-4-31b-it` |
|
|
90
89
|
| `google/gemma-4-31b-it:free` |
|
|
91
|
-
| `inception/mercury` |
|
|
92
90
|
| `inception/mercury-2` |
|
|
93
|
-
| `inception/mercury-
|
|
91
|
+
| `inception/mercury-edit-2` |
|
|
94
92
|
| `liquid/lfm-2.5-1.2b-instruct:free` |
|
|
95
93
|
| `liquid/lfm-2.5-1.2b-thinking:free` |
|
|
96
94
|
| `meta-llama/llama-3.2-11b-vision-instruct` |
|
|
@@ -117,7 +115,6 @@ ANTHROPIC_API_KEY=ant-...
|
|
|
117
115
|
| `moonshotai/kimi-k2-0905` |
|
|
118
116
|
| `moonshotai/kimi-k2-0905:exacto` |
|
|
119
117
|
| `moonshotai/kimi-k2-thinking` |
|
|
120
|
-
| `moonshotai/kimi-k2:free` |
|
|
121
118
|
| `moonshotai/kimi-k2.5` |
|
|
122
119
|
| `nousresearch/hermes-3-llama-3.1-405b:free` |
|
|
123
120
|
| `nousresearch/hermes-4-405b` |
|
|
@@ -168,15 +165,12 @@ ANTHROPIC_API_KEY=ant-...
|
|
|
168
165
|
| `qwen/qwen3-235b-a22b-thinking-2507` |
|
|
169
166
|
| `qwen/qwen3-30b-a3b-instruct-2507` |
|
|
170
167
|
| `qwen/qwen3-30b-a3b-thinking-2507` |
|
|
171
|
-
| `qwen/qwen3-4b:free` |
|
|
172
168
|
| `qwen/qwen3-coder` |
|
|
173
169
|
| `qwen/qwen3-coder-30b-a3b-instruct` |
|
|
174
170
|
| `qwen/qwen3-coder-flash` |
|
|
175
171
|
| `qwen/qwen3-coder:exacto` |
|
|
176
|
-
| `qwen/qwen3-coder:free` |
|
|
177
172
|
| `qwen/qwen3-max` |
|
|
178
173
|
| `qwen/qwen3-next-80b-a3b-instruct` |
|
|
179
|
-
| `qwen/qwen3-next-80b-a3b-instruct:free` |
|
|
180
174
|
| `qwen/qwen3-next-80b-a3b-thinking` |
|
|
181
175
|
| `qwen/qwen3.5-397b-a17b` |
|
|
182
176
|
| `qwen/qwen3.5-flash-02-23` |
|
|
@@ -119,7 +119,7 @@ ANTHROPIC_API_KEY=ant-...
|
|
|
119
119
|
| `google/text-embedding-005` |
|
|
120
120
|
| `google/text-multilingual-embedding-002` |
|
|
121
121
|
| `inception/mercury-2` |
|
|
122
|
-
| `inception/mercury-
|
|
122
|
+
| `inception/mercury-edit-2` |
|
|
123
123
|
| `kwaipilot/kat-coder-pro-v1` |
|
|
124
124
|
| `kwaipilot/kat-coder-pro-v2` |
|
|
125
125
|
| `meituan/longcat-flash-chat` |
|
package/.docs/models/index.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# Model Providers
|
|
2
2
|
|
|
3
|
-
Mastra provides a unified interface for working with LLMs across multiple providers, giving you access to
|
|
3
|
+
Mastra provides a unified interface for working with LLMs across multiple providers, giving you access to 3596 models from 99 providers through a single API.
|
|
4
4
|
|
|
5
5
|
## Features
|
|
6
6
|
|
|
@@ -114,13 +114,23 @@ const response = await agent.generate("Hello!", {
|
|
|
114
114
|
|
|
115
115
|
**cacheControl** (`{ type: "ephemeral"; ttl?: "5m" | "1h" | undefined; } | undefined`)
|
|
116
116
|
|
|
117
|
+
**metadata** (`{ userId?: string | undefined; } | undefined`)
|
|
118
|
+
|
|
119
|
+
**mcpServers** (`{ type: "url"; name: string; url: string; authorizationToken?: string | null | undefined; toolConfiguration?: { enabled?: boolean | null | undefined; allowedTools?: string[] | null | undefined; } | null | undefined; }[] | undefined`)
|
|
120
|
+
|
|
117
121
|
**container** (`{ id?: string | undefined; skills?: { type: "anthropic" | "custom"; skillId: string; version?: string | undefined; }[] | undefined; } | undefined`)
|
|
118
122
|
|
|
123
|
+
**toolStreaming** (`boolean | undefined`)
|
|
124
|
+
|
|
119
125
|
**effort** (`"low" | "medium" | "high" | "max" | undefined`)
|
|
120
126
|
|
|
121
|
-
**speed** (`"fast" | undefined`)
|
|
127
|
+
**speed** (`"fast" | "standard" | undefined`)
|
|
128
|
+
|
|
129
|
+
**inferenceGeo** (`"us" | "global" | undefined`)
|
|
130
|
+
|
|
131
|
+
**anthropicBeta** (`string[] | undefined`)
|
|
122
132
|
|
|
123
|
-
**contextManagement** (`{ edits: ({ type: "
|
|
133
|
+
**contextManagement** (`{ edits: ({ type: "clear_tool_uses_20250919"; trigger?: { type: "input_tokens"; value: number; } | { type: "tool_uses"; value: number; } | undefined; keep?: { type: "tool_uses"; value: number; } | undefined; clearAtLeast?: { ...; } | undefined; clearToolInputs?: boolean | undefined; excludeTools?: string[] | undefin...`)
|
|
124
134
|
|
|
125
135
|
## Direct provider installation
|
|
126
136
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# Fireworks AI
|
|
2
2
|
|
|
3
|
-
Access
|
|
3
|
+
Access 17 Fireworks AI models through Mastra's model router. Authentication is handled automatically using the `FIREWORKS_API_KEY` environment variable.
|
|
4
4
|
|
|
5
5
|
Learn more in the [Fireworks AI documentation](https://fireworks.ai/docs/).
|
|
6
6
|
|
|
@@ -48,6 +48,7 @@ for await (const chunk of stream) {
|
|
|
48
48
|
| `fireworks-ai/accounts/fireworks/models/kimi-k2p5` | 256K | | | | | | $0.60 | $3 |
|
|
49
49
|
| `fireworks-ai/accounts/fireworks/models/minimax-m2p1` | 200K | | | | | | $0.30 | $1 |
|
|
50
50
|
| `fireworks-ai/accounts/fireworks/models/minimax-m2p5` | 197K | | | | | | $0.30 | $1 |
|
|
51
|
+
| `fireworks-ai/accounts/fireworks/models/minimax-m2p7` | 197K | | | | | | $0.30 | $1 |
|
|
51
52
|
| `fireworks-ai/accounts/fireworks/models/qwen3p6-plus` | 128K | | | | | | $0.50 | $3 |
|
|
52
53
|
| `fireworks-ai/accounts/fireworks/routers/kimi-k2p5-turbo` | 256K | | | | | | — | — |
|
|
53
54
|
|
|
@@ -137,10 +137,14 @@ const response = await agent.generate("Hello!", {
|
|
|
137
137
|
|
|
138
138
|
**mediaResolution** (`"MEDIA_RESOLUTION_UNSPECIFIED" | "MEDIA_RESOLUTION_LOW" | "MEDIA_RESOLUTION_MEDIUM" | "MEDIA_RESOLUTION_HIGH" | undefined`)
|
|
139
139
|
|
|
140
|
-
**imageConfig** (`{ aspectRatio?: "1:1" | "2:3" | "3:2" | "3:4" | "4:3" | "4:5" | "5:4" | "9:16" | "16:9" | "21:9" | undefined; imageSize?: "1K" | "2K" | "4K" | undefined; } | undefined`)
|
|
140
|
+
**imageConfig** (`{ aspectRatio?: "1:1" | "2:3" | "3:2" | "3:4" | "4:3" | "4:5" | "5:4" | "9:16" | "16:9" | "21:9" | "1:8" | "8:1" | "1:4" | "4:1" | undefined; imageSize?: "1K" | "2K" | "4K" | "512" | undefined; } | undefined`)
|
|
141
141
|
|
|
142
142
|
**retrievalConfig** (`{ latLng?: { latitude: number; longitude: number; } | undefined; } | undefined`)
|
|
143
143
|
|
|
144
|
+
**streamFunctionCallArguments** (`boolean | undefined`)
|
|
145
|
+
|
|
146
|
+
**serviceTier** (`"standard" | "flex" | "priority" | undefined`)
|
|
147
|
+
|
|
144
148
|
## Direct provider installation
|
|
145
149
|
|
|
146
150
|
This provider can also be installed directly as a standalone package, which can be used instead of the Mastra model router string. View the [package documentation](https://www.npmjs.com/package/@ai-sdk/google) for more details.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# Inception
|
|
2
2
|
|
|
3
|
-
Access
|
|
3
|
+
Access 2 Inception models through Mastra's model router. Authentication is handled automatically using the `INCEPTION_API_KEY` environment variable.
|
|
4
4
|
|
|
5
5
|
Learn more in the [Inception documentation](https://platform.inceptionlabs.ai/docs).
|
|
6
6
|
|
|
@@ -15,7 +15,7 @@ const agent = new Agent({
|
|
|
15
15
|
id: "my-agent",
|
|
16
16
|
name: "My Agent",
|
|
17
17
|
instructions: "You are a helpful assistant",
|
|
18
|
-
model: "inception/mercury"
|
|
18
|
+
model: "inception/mercury-2"
|
|
19
19
|
});
|
|
20
20
|
|
|
21
21
|
// Generate a response
|
|
@@ -32,12 +32,10 @@ for await (const chunk of stream) {
|
|
|
32
32
|
|
|
33
33
|
## Models
|
|
34
34
|
|
|
35
|
-
| Model
|
|
36
|
-
|
|
|
37
|
-
| `inception/mercury`
|
|
38
|
-
| `inception/mercury-2`
|
|
39
|
-
| `inception/mercury-coder` | 128K | | | | | | $0.25 | $1 |
|
|
40
|
-
| `inception/mercury-edit` | 128K | | | | | | $0.25 | $0.75 |
|
|
35
|
+
| Model | Context | Tools | Reasoning | Image | Audio | Video | Input $/1M | Output $/1M |
|
|
36
|
+
| -------------------------- | ------- | ----- | --------- | ----- | ----- | ----- | ---------- | ----------- |
|
|
37
|
+
| `inception/mercury-2` | 128K | | | | | | $0.25 | $0.75 |
|
|
38
|
+
| `inception/mercury-edit-2` | 128K | | | | | | $0.25 | $0.75 |
|
|
41
39
|
|
|
42
40
|
## Advanced configuration
|
|
43
41
|
|
|
@@ -49,7 +47,7 @@ const agent = new Agent({
|
|
|
49
47
|
name: "custom-agent",
|
|
50
48
|
model: {
|
|
51
49
|
url: "https://api.inceptionlabs.ai/v1/",
|
|
52
|
-
id: "inception/mercury",
|
|
50
|
+
id: "inception/mercury-2",
|
|
53
51
|
apiKey: process.env.INCEPTION_API_KEY,
|
|
54
52
|
headers: {
|
|
55
53
|
"X-Custom-Header": "value"
|
|
@@ -67,8 +65,8 @@ const agent = new Agent({
|
|
|
67
65
|
model: ({ requestContext }) => {
|
|
68
66
|
const useAdvanced = requestContext.task === "complex";
|
|
69
67
|
return useAdvanced
|
|
70
|
-
? "inception/mercury-edit"
|
|
71
|
-
: "inception/mercury";
|
|
68
|
+
? "inception/mercury-edit-2"
|
|
69
|
+
: "inception/mercury-2";
|
|
72
70
|
}
|
|
73
71
|
});
|
|
74
72
|
```
|
|
@@ -127,9 +127,8 @@ for await (const chunk of stream) {
|
|
|
127
127
|
| `kilo/google/lyria-3-pro-preview` | 1.0M | | | | | | — | — |
|
|
128
128
|
| `kilo/gryphe/mythomax-l2-13b` | 4K | | | | | | $0.06 | $0.06 |
|
|
129
129
|
| `kilo/ibm-granite/granite-4.0-h-micro` | 131K | | | | | | $0.02 | $0.11 |
|
|
130
|
-
| `kilo/inception/mercury` | 128K | | | | | | $0.25 | $0.75 |
|
|
131
130
|
| `kilo/inception/mercury-2` | 128K | | | | | | $0.25 | $0.75 |
|
|
132
|
-
| `kilo/inception/mercury-
|
|
131
|
+
| `kilo/inception/mercury-edit-2` | 128K | | | | | | $0.25 | $0.75 |
|
|
133
132
|
| `kilo/inflection/inflection-3-pi` | 8K | | | | | | $3 | $10 |
|
|
134
133
|
| `kilo/inflection/inflection-3-productivity` | 8K | | | | | | $3 | $10 |
|
|
135
134
|
| `kilo/kilo-auto/balanced` | 205K | | | | | | $0.60 | $3 |
|
|
@@ -268,6 +267,7 @@ for await (const chunk of stream) {
|
|
|
268
267
|
| `kilo/openai/o4-mini-high` | 200K | | | | | | $1 | $4 |
|
|
269
268
|
| `kilo/openrouter/auto` | 2.0M | | | | | | — | — |
|
|
270
269
|
| `kilo/openrouter/bodybuilder` | 128K | | | | | | — | — |
|
|
270
|
+
| `kilo/openrouter/elephant-alpha` | 262K | | | | | | — | — |
|
|
271
271
|
| `kilo/openrouter/free` | 200K | | | | | | — | — |
|
|
272
272
|
| `kilo/perplexity/sonar` | 127K | | | | | | $1 | $1 |
|
|
273
273
|
| `kilo/perplexity/sonar-deep-research` | 128K | | | | | | $2 | $8 |
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# NanoGPT
|
|
2
2
|
|
|
3
|
-
Access
|
|
3
|
+
Access 518 NanoGPT models through Mastra's model router. Authentication is handled automatically using the `NANO_GPT_API_KEY` environment variable.
|
|
4
4
|
|
|
5
5
|
Learn more in the [NanoGPT documentation](https://docs.nano-gpt.com).
|
|
6
6
|
|
|
@@ -319,7 +319,6 @@ for await (const chunk of stream) {
|
|
|
319
319
|
| `nano-gpt/meganova-ai/manta-mini-1.0` | 8K | | | | | | $0.02 | $0.16 |
|
|
320
320
|
| `nano-gpt/meganova-ai/manta-pro-1.0` | 33K | | | | | | $0.06 | $0.50 |
|
|
321
321
|
| `nano-gpt/meituan-longcat/LongCat-Flash-Chat-FP8` | 128K | | | | | | $0.15 | $0.70 |
|
|
322
|
-
| `nano-gpt/mercury-coder-small` | 33K | | | | | | $0.25 | $1 |
|
|
323
322
|
| `nano-gpt/Meta-Llama-3-1-8B-Instruct-FP8` | 128K | | | | | | $0.02 | $0.03 |
|
|
324
323
|
| `nano-gpt/meta-llama/llama-3.1-8b-instruct` | 131K | | | | | | $0.05 | $0.05 |
|
|
325
324
|
| `nano-gpt/meta-llama/llama-3.2-3b-instruct` | 131K | | | | | | $0.03 | $0.05 |
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# Nvidia
|
|
2
2
|
|
|
3
|
-
Access
|
|
3
|
+
Access 76 Nvidia models through Mastra's model router. Authentication is handled automatically using the `NVIDIA_API_KEY` environment variable.
|
|
4
4
|
|
|
5
5
|
Learn more in the [Nvidia documentation](https://docs.api.nvidia.com/nim/).
|
|
6
6
|
|
|
@@ -71,6 +71,7 @@ for await (const chunk of stream) {
|
|
|
71
71
|
| `nvidia/microsoft/phi-4-mini-instruct` | 131K | | | | | | — | — |
|
|
72
72
|
| `nvidia/minimaxai/minimax-m2.1` | 205K | | | | | | — | — |
|
|
73
73
|
| `nvidia/minimaxai/minimax-m2.5` | 205K | | | | | | — | — |
|
|
74
|
+
| `nvidia/minimaxai/minimax-m2.7` | 205K | | | | | | $0.30 | $1 |
|
|
74
75
|
| `nvidia/mistralai/codestral-22b-instruct-v0.1` | 128K | | | | | | — | — |
|
|
75
76
|
| `nvidia/mistralai/devstral-2-123b-instruct-2512` | 262K | | | | | | — | — |
|
|
76
77
|
| `nvidia/mistralai/mamba-codestral-7b-v0.1` | 128K | | | | | | — | — |
|
|
@@ -171,6 +171,10 @@ const response = await agent.generate("Hello!", {
|
|
|
171
171
|
|
|
172
172
|
**user** (`string | null | undefined`)
|
|
173
173
|
|
|
174
|
+
**systemMessageMode** (`"remove" | "system" | "developer" | undefined`)
|
|
175
|
+
|
|
176
|
+
**forceReasoning** (`boolean | undefined`)
|
|
177
|
+
|
|
174
178
|
## Direct provider installation
|
|
175
179
|
|
|
176
180
|
This provider can also be installed directly as a standalone package, which can be used instead of the Mastra model router string. View the [package documentation](https://www.npmjs.com/package/@ai-sdk/openai) for more details.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# OpenCode Go
|
|
2
2
|
|
|
3
|
-
Access
|
|
3
|
+
Access 9 OpenCode Go models through Mastra's model router. Authentication is handled automatically using the `OPENCODE_API_KEY` environment variable.
|
|
4
4
|
|
|
5
5
|
Learn more in the [OpenCode Go documentation](https://opencode.ai/docs/zen).
|
|
6
6
|
|
|
@@ -41,6 +41,8 @@ for await (const chunk of stream) {
|
|
|
41
41
|
| `opencode-go/mimo-v2-pro` | 1.0M | | | | | | $1 | $3 |
|
|
42
42
|
| `opencode-go/minimax-m2.5` | 205K | | | | | | $0.30 | $1 |
|
|
43
43
|
| `opencode-go/minimax-m2.7` | 205K | | | | | | $0.30 | $1 |
|
|
44
|
+
| `opencode-go/qwen3.5-plus` | 262K | | | | | | $0.20 | $1 |
|
|
45
|
+
| `opencode-go/qwen3.6-plus` | 262K | | | | | | $0.50 | $3 |
|
|
44
46
|
|
|
45
47
|
## Advanced configuration
|
|
46
48
|
|
|
@@ -70,7 +72,7 @@ const agent = new Agent({
|
|
|
70
72
|
model: ({ requestContext }) => {
|
|
71
73
|
const useAdvanced = requestContext.task === "complex";
|
|
72
74
|
return useAdvanced
|
|
73
|
-
? "opencode-go/
|
|
75
|
+
? "opencode-go/qwen3.6-plus"
|
|
74
76
|
: "opencode-go/glm-5";
|
|
75
77
|
}
|
|
76
78
|
});
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# OpenCode Zen
|
|
2
2
|
|
|
3
|
-
Access
|
|
3
|
+
Access 34 OpenCode Zen models through Mastra's model router. Authentication is handled automatically using the `OPENCODE_API_KEY` environment variable.
|
|
4
4
|
|
|
5
5
|
Learn more in the [OpenCode Zen documentation](https://opencode.ai/docs/zen).
|
|
6
6
|
|
|
@@ -66,6 +66,8 @@ for await (const chunk of stream) {
|
|
|
66
66
|
| `opencode/minimax-m2.5` | 205K | | | | | | $0.30 | $1 |
|
|
67
67
|
| `opencode/minimax-m2.5-free` | 205K | | | | | | — | — |
|
|
68
68
|
| `opencode/nemotron-3-super-free` | 205K | | | | | | — | — |
|
|
69
|
+
| `opencode/qwen3.5-plus` | 262K | | | | | | $0.20 | $1 |
|
|
70
|
+
| `opencode/qwen3.6-plus` | 262K | | | | | | $0.50 | $3 |
|
|
69
71
|
|
|
70
72
|
## Advanced configuration
|
|
71
73
|
|
|
@@ -95,7 +97,7 @@ const agent = new Agent({
|
|
|
95
97
|
model: ({ requestContext }) => {
|
|
96
98
|
const useAdvanced = requestContext.task === "complex";
|
|
97
99
|
return useAdvanced
|
|
98
|
-
? "opencode/
|
|
100
|
+
? "opencode/qwen3.6-plus"
|
|
99
101
|
: "opencode/big-pickle";
|
|
100
102
|
}
|
|
101
103
|
});
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# Poe
|
|
2
2
|
|
|
3
|
-
Access
|
|
3
|
+
Access 117 Poe models through Mastra's model router. Authentication is handled automatically using the `POE_API_KEY` environment variable.
|
|
4
4
|
|
|
5
5
|
Learn more in the [Poe documentation](https://creator.poe.com/docs/external-applications/openai-compatible-api).
|
|
6
6
|
|
|
@@ -41,17 +41,12 @@ for await (const chunk of stream) {
|
|
|
41
41
|
| `poe/anthropic/claude-opus-4.1` | 197K | | | | | | $13 | $64 |
|
|
42
42
|
| `poe/anthropic/claude-opus-4.5` | 197K | | | | | | $4 | $21 |
|
|
43
43
|
| `poe/anthropic/claude-opus-4.6` | 983K | | | | | | $4 | $21 |
|
|
44
|
-
| `poe/anthropic/claude-sonnet-3.5` | 189K | | | | | | $3 | $13 |
|
|
45
|
-
| `poe/anthropic/claude-sonnet-3.5-june` | 189K | | | | | | $3 | $13 |
|
|
46
44
|
| `poe/anthropic/claude-sonnet-3.7` | 197K | | | | | | $3 | $13 |
|
|
47
45
|
| `poe/anthropic/claude-sonnet-4` | 983K | | | | | | $3 | $13 |
|
|
48
46
|
| `poe/anthropic/claude-sonnet-4.5` | 983K | | | | | | $3 | $13 |
|
|
49
47
|
| `poe/anthropic/claude-sonnet-4.6` | 983K | | | | | | $3 | $13 |
|
|
50
|
-
| `poe/cerebras/gpt-oss-120b-cs` |
|
|
51
|
-
| `poe/cerebras/llama-3.1-8b-cs` |
|
|
52
|
-
| `poe/cerebras/llama-3.3-70b-cs` | — | | | | | | — | — |
|
|
53
|
-
| `poe/cerebras/qwen3-235b-2507-cs` | — | | | | | | — | — |
|
|
54
|
-
| `poe/cerebras/qwen3-32b-cs` | — | | | | | | — | — |
|
|
48
|
+
| `poe/cerebras/gpt-oss-120b-cs` | 128K | | | | | | $0.35 | $0.75 |
|
|
49
|
+
| `poe/cerebras/llama-3.1-8b-cs` | 128K | | | | | | $0.10 | $0.10 |
|
|
55
50
|
| `poe/elevenlabs/elevenlabs-music` | 2K | | | | | | — | — |
|
|
56
51
|
| `poe/elevenlabs/elevenlabs-v2.5-turbo` | 128K | | | | | | — | — |
|
|
57
52
|
| `poe/elevenlabs/elevenlabs-v3` | 128K | | | | | | — | — |
|
|
@@ -62,10 +57,8 @@ for await (const chunk of stream) {
|
|
|
62
57
|
| `poe/google/gemini-2.5-flash-lite` | 1.0M | | | | | | $0.07 | $0.28 |
|
|
63
58
|
| `poe/google/gemini-2.5-pro` | 1.1M | | | | | | $0.87 | $7 |
|
|
64
59
|
| `poe/google/gemini-3-flash` | 1.0M | | | | | | $0.40 | $2 |
|
|
65
|
-
| `poe/google/gemini-3-pro` | 1.0M | | | | | | $2 | $10 |
|
|
66
60
|
| `poe/google/gemini-3.1-flash-lite` | 1.0M | | | | | | $0.25 | $2 |
|
|
67
61
|
| `poe/google/gemini-3.1-pro` | 1.0M | | | | | | $2 | $12 |
|
|
68
|
-
| `poe/google/gemini-deep-research` | 1.0M | | | | | | $2 | $10 |
|
|
69
62
|
| `poe/google/gemma-4-31b` | 262K | | | | | | — | — |
|
|
70
63
|
| `poe/google/imagen-3` | 480 | | | | | | — | — |
|
|
71
64
|
| `poe/google/imagen-3-fast` | 480 | | | | | | — | — |
|
|
@@ -88,20 +81,16 @@ for await (const chunk of stream) {
|
|
|
88
81
|
| `poe/novita/deepseek-v3.2` | 128K | | | | | | $0.27 | $0.40 |
|
|
89
82
|
| `poe/novita/glm-4.6` | — | | | | | | — | — |
|
|
90
83
|
| `poe/novita/glm-4.6v` | 131K | | | | | | — | — |
|
|
91
|
-
| `poe/novita/glm-4.7` | 205K | | | | | | — | — |
|
|
92
84
|
| `poe/novita/glm-4.7-flash` | 200K | | | | | | — | — |
|
|
93
85
|
| `poe/novita/glm-4.7-n` | 205K | | | | | | — | — |
|
|
94
|
-
| `poe/novita/glm-5` | 205K | | | | | |
|
|
86
|
+
| `poe/novita/glm-5` | 205K | | | | | | $1 | $3 |
|
|
95
87
|
| `poe/novita/kimi-k2-thinking` | 256K | | | | | | — | — |
|
|
96
|
-
| `poe/novita/kimi-k2.5` |
|
|
88
|
+
| `poe/novita/kimi-k2.5` | 128K | | | | | | $0.60 | $3 |
|
|
97
89
|
| `poe/novita/minimax-m2.1` | 205K | | | | | | — | — |
|
|
98
|
-
| `poe/openai/chatgpt-4o-latest` | 128K | | | | | | $5 | $14 |
|
|
99
90
|
| `poe/openai/dall-e-3` | 800 | | | | | | — | — |
|
|
100
91
|
| `poe/openai/gpt-3.5-turbo` | 16K | | | | | | $0.45 | $1 |
|
|
101
92
|
| `poe/openai/gpt-3.5-turbo-instruct` | 4K | | | | | | $1 | $2 |
|
|
102
93
|
| `poe/openai/gpt-3.5-turbo-raw` | 5K | | | | | | $0.45 | $1 |
|
|
103
|
-
| `poe/openai/gpt-4-classic` | 8K | | | | | | $27 | $54 |
|
|
104
|
-
| `poe/openai/gpt-4-classic-0314` | 8K | | | | | | $27 | $54 |
|
|
105
94
|
| `poe/openai/gpt-4-turbo` | 128K | | | | | | $9 | $27 |
|
|
106
95
|
| `poe/openai/gpt-4.1` | 1.0M | | | | | | $2 | $7 |
|
|
107
96
|
| `poe/openai/gpt-4.1-mini` | 1.0M | | | | | | $0.36 | $1 |
|
|
@@ -109,6 +109,10 @@ const response = await agent.generate("Hello!", {
|
|
|
109
109
|
|
|
110
110
|
**reasoningEffort** (`"low" | "high" | undefined`)
|
|
111
111
|
|
|
112
|
+
**logprobs** (`boolean | undefined`)
|
|
113
|
+
|
|
114
|
+
**topLogprobs** (`number | undefined`)
|
|
115
|
+
|
|
112
116
|
**parallel\_function\_calling** (`boolean | undefined`)
|
|
113
117
|
|
|
114
118
|
**searchParameters** (`{ mode: "off" | "auto" | "on"; returnCitations?: boolean | undefined; fromDate?: string | undefined; toDate?: string | undefined; maxSearchResults?: number | undefined; sources?: ({ ...; } | ... 2 more ... | { ...; })[] | undefined; } | undefined`)
|
|
@@ -8,6 +8,23 @@ Framework-agnostic handler for streaming agent chat in AI SDK-compatible format.
|
|
|
8
8
|
|
|
9
9
|
Use [`chatRoute()`](https://mastra.ai/reference/ai-sdk/chat-route) if you want to create a chat route inside a Mastra server.
|
|
10
10
|
|
|
11
|
+
## Structured output in UI streams
|
|
12
|
+
|
|
13
|
+
When you pass `structuredOutput` to the underlying agent execution, the final structured output object is emitted in the AI SDK-compatible UI stream as a custom data part:
|
|
14
|
+
|
|
15
|
+
```json
|
|
16
|
+
{
|
|
17
|
+
"type": "data-structured-output",
|
|
18
|
+
"data": {
|
|
19
|
+
"object": {}
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
The `object` field contains your full structured output value. Mastra emits this event for the final structured output object only. Partial structured output chunks are not exposed in the UI stream.
|
|
25
|
+
|
|
26
|
+
Read this event with AI SDK UI's custom data handling, such as `onData`, or render it from message data parts.
|
|
27
|
+
|
|
11
28
|
## Usage example
|
|
12
29
|
|
|
13
30
|
Next.js App Router example:
|
|
@@ -6,6 +6,21 @@ This is useful when building custom streaming endpoints outside Mastra's provide
|
|
|
6
6
|
|
|
7
7
|
`toAISdkStream()` keeps the existing AI SDK v5/default behavior. If your app is typed against AI SDK v6, pass `version: 'v6'` in the options object.
|
|
8
8
|
|
|
9
|
+
## Structured output in UI streams
|
|
10
|
+
|
|
11
|
+
When the source agent stream includes a final structured output object, `toAISdkStream()` emits it as a custom AI SDK UI data part:
|
|
12
|
+
|
|
13
|
+
```json
|
|
14
|
+
{
|
|
15
|
+
"type": "data-structured-output",
|
|
16
|
+
"data": {
|
|
17
|
+
"object": {}
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
The `object` field contains your full structured output value. This maps Mastra's final structured output chunk into the AI SDK UI stream. Partial structured output chunks are not emitted.
|
|
23
|
+
|
|
9
24
|
## Usage example
|
|
10
25
|
|
|
11
26
|
Next.js App Router example:
|