@mastra/mcp-docs-server 1.1.15 → 1.1.16-alpha.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.docs/docs/memory/observational-memory.md +36 -0
- package/.docs/docs/observability/tracing/exporters/datadog.md +132 -2
- package/.docs/docs/server/middleware.md +13 -1
- package/.docs/docs/server/server-adapters.md +3 -2
- package/.docs/docs/workspace/skills.md +23 -0
- package/.docs/guides/migrations/upgrade-to-v1/agent.md +23 -0
- package/.docs/models/gateways/openrouter.md +4 -43
- package/.docs/models/gateways/vercel.md +6 -1
- package/.docs/models/index.md +22 -2
- package/.docs/models/providers/baseten.md +1 -1
- package/.docs/models/providers/cortecs.md +6 -1
- package/.docs/models/providers/fastrouter.md +3 -2
- package/.docs/models/providers/fireworks-ai.md +3 -2
- package/.docs/models/providers/nano-gpt.md +2 -1
- package/.docs/models/providers/nvidia.md +2 -1
- package/.docs/models/providers/vivgrid.md +13 -12
- package/.docs/models/providers/vultr.md +1 -2
- package/.docs/models/providers/xai.md +27 -27
- package/.docs/models/providers/zenmux.md +90 -73
- package/.docs/reference/memory/observational-memory.md +42 -3
- package/.docs/reference/server/express-adapter.md +23 -0
- package/.docs/reference/server/fastify-adapter.md +28 -0
- package/.docs/reference/server/hono-adapter.md +22 -0
- package/.docs/reference/server/koa-adapter.md +23 -0
- package/.docs/reference/server/mastra-server.md +3 -2
- package/.docs/reference/tools/create-tool.md +1 -1
- package/.docs/reference/workspace/workspace-class.md +13 -1
- package/CHANGELOG.md +37 -0
- package/package.json +5 -5
|
@@ -137,6 +137,42 @@ const memory = new Memory({
|
|
|
137
137
|
|
|
138
138
|
See [model configuration](https://mastra.ai/reference/memory/observational-memory) for using different models per agent.
|
|
139
139
|
|
|
140
|
+
### Token-tiered model selection
|
|
141
|
+
|
|
142
|
+
You can use `ModelByInputTokens` to specify different Observer or Reflector models based on input token count. OM selects the matching model tier at runtime from the configured `upTo` thresholds.
|
|
143
|
+
|
|
144
|
+
```typescript
|
|
145
|
+
import { Memory, ModelByInputTokens } from '@mastra/memory'
|
|
146
|
+
|
|
147
|
+
const memory = new Memory({
|
|
148
|
+
options: {
|
|
149
|
+
observationalMemory: {
|
|
150
|
+
observation: {
|
|
151
|
+
model: new ModelByInputTokens({
|
|
152
|
+
upTo: {
|
|
153
|
+
10_000: 'google/gemini-2.5-flash', // Fast and cheap for small inputs
|
|
154
|
+
40_000: 'openai/gpt-4o', // Stronger for medium inputs
|
|
155
|
+
1_000_000: 'openai/gpt-4.5', // Most capable for very large inputs
|
|
156
|
+
},
|
|
157
|
+
}),
|
|
158
|
+
},
|
|
159
|
+
reflection: {
|
|
160
|
+
model: new ModelByInputTokens({
|
|
161
|
+
upTo: {
|
|
162
|
+
20_000: 'google/gemini-2.5-flash',
|
|
163
|
+
80_000: 'openai/gpt-4o',
|
|
164
|
+
},
|
|
165
|
+
}),
|
|
166
|
+
},
|
|
167
|
+
},
|
|
168
|
+
},
|
|
169
|
+
})
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
The `upTo` keys are inclusive upper bounds. OM computes the actual input token count for the Observer or Reflector call, resolves the matching tier directly, and uses that concrete model for the run.
|
|
173
|
+
|
|
174
|
+
If the input exceeds the largest configured threshold, an error is thrown — ensure your thresholds cover the full range of possible input sizes, or use a model with a sufficiently large context window at the highest tier.
|
|
175
|
+
|
|
140
176
|
## Scopes
|
|
141
177
|
|
|
142
178
|
### Thread scope (default)
|
|
@@ -145,6 +145,135 @@ Mastra span types are automatically mapped to Datadog LLMObs span kinds:
|
|
|
145
145
|
|
|
146
146
|
Other/future Mastra span types will default to 'task' when mapped unless specified.
|
|
147
147
|
|
|
148
|
+
## Application Performance Monitoring
|
|
149
|
+
|
|
150
|
+
The sections above cover Mastra's [LLM Observability](https://docs.datadoghq.com/llm_observability/) integration. To trace your Mastra HTTP server routes (request latency, error tracking, service maps), use `dd-trace` directly for Datadog Application Performance Monitoring (APM).
|
|
151
|
+
|
|
152
|
+
### Prerequisites
|
|
153
|
+
|
|
154
|
+
1. **Datadog Agent**: Install a [Datadog Agent](https://docs.datadoghq.com/agent/) on the same host or accessible via network. The agent receives traces from `dd-trace` on `localhost:8126` and forwards them to Datadog. Follow the [agent installation guide](https://docs.datadoghq.com/agent/) to set it up.
|
|
155
|
+
|
|
156
|
+
2. **dd-trace package**: Install the tracing library in your project:
|
|
157
|
+
|
|
158
|
+
**npm**:
|
|
159
|
+
|
|
160
|
+
```bash
|
|
161
|
+
npm install dd-trace
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
**pnpm**:
|
|
165
|
+
|
|
166
|
+
```bash
|
|
167
|
+
pnpm add dd-trace
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
**Yarn**:
|
|
171
|
+
|
|
172
|
+
```bash
|
|
173
|
+
yarn add dd-trace
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
**Bun**:
|
|
177
|
+
|
|
178
|
+
```bash
|
|
179
|
+
bun add dd-trace
|
|
180
|
+
```
|
|
181
|
+
|
|
182
|
+
> **Note:** APM traces always route through the Datadog Agent. This is different from LLM Observability, which supports agentless mode (direct HTTPS to Datadog).
|
|
183
|
+
|
|
184
|
+
### APM only
|
|
185
|
+
|
|
186
|
+
Import and initialize `dd-trace` at the top of your entry file, before any other imports:
|
|
187
|
+
|
|
188
|
+
```typescript
|
|
189
|
+
import tracer from 'dd-trace'
|
|
190
|
+
|
|
191
|
+
tracer.init({
|
|
192
|
+
service: process.env.DD_SERVICE || 'my-mastra-app',
|
|
193
|
+
env: process.env.DD_ENV || 'production',
|
|
194
|
+
version: process.env.DD_VERSION,
|
|
195
|
+
})
|
|
196
|
+
|
|
197
|
+
import { Mastra } from '@mastra/core'
|
|
198
|
+
|
|
199
|
+
export const mastra = new Mastra({
|
|
200
|
+
bundler: {
|
|
201
|
+
externals: [
|
|
202
|
+
'dd-trace',
|
|
203
|
+
'@datadog/native-metrics',
|
|
204
|
+
'@datadog/native-appsec',
|
|
205
|
+
'@datadog/native-iast-taint-tracking',
|
|
206
|
+
'@datadog/pprof',
|
|
207
|
+
],
|
|
208
|
+
},
|
|
209
|
+
})
|
|
210
|
+
```
|
|
211
|
+
|
|
212
|
+
Set the tracer metadata environment variables:
|
|
213
|
+
|
|
214
|
+
```bash
|
|
215
|
+
DD_SERVICE=my-mastra-app
|
|
216
|
+
DD_ENV=production
|
|
217
|
+
DD_VERSION=1.0.0
|
|
218
|
+
```
|
|
219
|
+
|
|
220
|
+
`dd-trace` auto-instruments popular HTTP frameworks, including those supported by Mastra's [server adapters](https://mastra.ai/docs/server/server-adapters). Inbound requests, outbound HTTP calls, and database queries appear as APM traces in Datadog.
|
|
221
|
+
|
|
222
|
+
### APM and LLM Observability
|
|
223
|
+
|
|
224
|
+
Import and initialize `dd-trace` before creating the Mastra instance. The `DatadogExporter` detects the existing tracer and skips re-initialization, adding LLM Observability on top of your APM setup:
|
|
225
|
+
|
|
226
|
+
```typescript
|
|
227
|
+
import tracer from 'dd-trace'
|
|
228
|
+
|
|
229
|
+
tracer.init({
|
|
230
|
+
service: process.env.DD_SERVICE || 'my-mastra-app',
|
|
231
|
+
env: process.env.DD_ENV || 'production',
|
|
232
|
+
version: process.env.DD_VERSION,
|
|
233
|
+
})
|
|
234
|
+
|
|
235
|
+
import { Mastra } from '@mastra/core'
|
|
236
|
+
import { Observability } from '@mastra/observability'
|
|
237
|
+
import { DatadogExporter } from '@mastra/datadog'
|
|
238
|
+
|
|
239
|
+
export const mastra = new Mastra({
|
|
240
|
+
observability: new Observability({
|
|
241
|
+
configs: {
|
|
242
|
+
datadog: {
|
|
243
|
+
serviceName: 'my-mastra-app',
|
|
244
|
+
exporters: [
|
|
245
|
+
new DatadogExporter({
|
|
246
|
+
mlApp: process.env.DD_LLMOBS_ML_APP!,
|
|
247
|
+
apiKey: process.env.DD_API_KEY!,
|
|
248
|
+
}),
|
|
249
|
+
],
|
|
250
|
+
},
|
|
251
|
+
},
|
|
252
|
+
}),
|
|
253
|
+
bundler: {
|
|
254
|
+
externals: [
|
|
255
|
+
'dd-trace',
|
|
256
|
+
'@datadog/native-metrics',
|
|
257
|
+
'@datadog/native-appsec',
|
|
258
|
+
'@datadog/native-iast-taint-tracking',
|
|
259
|
+
'@datadog/pprof',
|
|
260
|
+
],
|
|
261
|
+
},
|
|
262
|
+
})
|
|
263
|
+
```
|
|
264
|
+
|
|
265
|
+
```bash
|
|
266
|
+
DD_SERVICE=my-mastra-app
|
|
267
|
+
DD_ENV=production
|
|
268
|
+
DD_VERSION=1.0.0
|
|
269
|
+
DD_API_KEY=your-datadog-api-key
|
|
270
|
+
DD_LLMOBS_ML_APP=my-llm-app
|
|
271
|
+
```
|
|
272
|
+
|
|
273
|
+
Server routes appear as APM traces and LLM calls appear as LLM Observability spans, all under the same service in Datadog.
|
|
274
|
+
|
|
275
|
+
> **Note:** Import and initialize `dd-trace` before all other modules. This allows its auto-instrumentation to patch HTTP, database, and framework libraries at load time.
|
|
276
|
+
|
|
148
277
|
## Troubleshooting
|
|
149
278
|
|
|
150
279
|
### Native module ABI mismatch
|
|
@@ -183,5 +312,6 @@ export const mastra = new Mastra({
|
|
|
183
312
|
|
|
184
313
|
## Related
|
|
185
314
|
|
|
186
|
-
- [Tracing
|
|
187
|
-
- [Datadog LLM Observability
|
|
315
|
+
- [Tracing overview](https://mastra.ai/docs/observability/tracing/overview)
|
|
316
|
+
- [Datadog LLM Observability documentation](https://docs.datadoghq.com/llm_observability/)
|
|
317
|
+
- [Datadog APM documentation](https://docs.datadoghq.com/tracing/)
|
|
@@ -104,6 +104,7 @@ Mastra provides reserved context keys that, when set by middleware, take precede
|
|
|
104
104
|
```typescript
|
|
105
105
|
import { Mastra } from '@mastra/core'
|
|
106
106
|
import { MASTRA_RESOURCE_ID_KEY } from '@mastra/core/request-context'
|
|
107
|
+
import { getAuthenticatedUser } from '@mastra/server/auth'
|
|
107
108
|
|
|
108
109
|
export const mastra = new Mastra({
|
|
109
110
|
server: {
|
|
@@ -117,8 +118,17 @@ export const mastra = new Mastra({
|
|
|
117
118
|
{
|
|
118
119
|
path: '/api/*',
|
|
119
120
|
handler: async (c, next) => {
|
|
121
|
+
const token = c.req.header('Authorization')
|
|
122
|
+
if (!token) {
|
|
123
|
+
return c.json({ error: 'Unauthorized' }, 401)
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
const user = await getAuthenticatedUser<{ id: string }>({
|
|
127
|
+
mastra: c.get('mastra'),
|
|
128
|
+
token,
|
|
129
|
+
request: c.req.raw,
|
|
130
|
+
})
|
|
120
131
|
const requestContext = c.get('requestContext')
|
|
121
|
-
const user = requestContext.get('user')
|
|
122
132
|
|
|
123
133
|
if (!user) {
|
|
124
134
|
return c.json({ error: 'Unauthorized' }, 401)
|
|
@@ -136,6 +146,8 @@ export const mastra = new Mastra({
|
|
|
136
146
|
})
|
|
137
147
|
```
|
|
138
148
|
|
|
149
|
+
`server.middleware` runs before Mastra's per-route auth checks. When middleware needs the authenticated user, call `getAuthenticatedUser()` to resolve it from the configured auth provider without changing the default route auth flow.
|
|
150
|
+
|
|
139
151
|
With this middleware, the server automatically:
|
|
140
152
|
|
|
141
153
|
- **Filters thread listing** to only return threads owned by the user
|
|
@@ -341,7 +341,7 @@ app.listen(port, () => {
|
|
|
341
341
|
Calling `init()` runs three steps in order. Understanding this flow helps when you need to insert your own middleware at specific points.
|
|
342
342
|
|
|
343
343
|
1. `registerContextMiddleware()`: Attaches the Mastra instance, request context, tools, and abort signal to every request. This makes Mastra available to all subsequent middleware and route handlers.
|
|
344
|
-
2. `registerAuthMiddleware()`:
|
|
344
|
+
2. `registerAuthMiddleware()`: Runs the adapter auth hook during initialization. Official adapters enforce auth inline when Mastra registers built-in routes and `registerApiRoute()` routes, so raw framework routes should use the adapter's exported `createAuthMiddleware()` helper when they need Mastra auth.
|
|
345
345
|
3. `registerRoutes()`: Registers all Mastra API routes for agents, workflows, and other features. Also registers MCP routes if MCP servers are configured.
|
|
346
346
|
|
|
347
347
|
### Manual initialization
|
|
@@ -359,7 +359,6 @@ server.registerContextMiddleware();
|
|
|
359
359
|
// Middleware that needs Mastra context
|
|
360
360
|
app.use(customMiddleware);
|
|
361
361
|
|
|
362
|
-
server.registerAuthMiddleware();
|
|
363
362
|
await server.registerRoutes();
|
|
364
363
|
|
|
365
364
|
// Routes after Mastra
|
|
@@ -374,6 +373,8 @@ You can add your own routes to the app alongside Mastra's routes.
|
|
|
374
373
|
|
|
375
374
|
- Routes added **before** `init()` won't have Mastra context available.
|
|
376
375
|
- Routes added **after** `init()` have access to the Mastra context (the Mastra instance, request context, authenticated user, etc.).
|
|
376
|
+
- When you want Mastra-managed auth and route metadata such as `requiresAuth`, prefer `registerApiRoute()`.
|
|
377
|
+
- When you mount routes directly on the framework app, use the adapter's exported `createAuthMiddleware()` helper if those routes need Mastra auth.
|
|
377
378
|
|
|
378
379
|
> **Info:** Visit "Adding custom routes" for [Express](https://mastra.ai/reference/server/express-adapter) and [Hono](https://mastra.ai/reference/server/hono-adapter) for more information.
|
|
379
380
|
|
|
@@ -127,6 +127,29 @@ The agent has three skill tools:
|
|
|
127
127
|
|
|
128
128
|
This design is stateless — there is no activation state to track. If the skill instructions leave the conversation context (due to context window limits or compaction), the agent can call `skill` again to reload them.
|
|
129
129
|
|
|
130
|
+
## Same-named skills
|
|
131
|
+
|
|
132
|
+
When multiple skill directories contain a skill with the same name, all of them are discovered and listed. The agent sees every skill in its system message, along with each skill's path and source type, so it can tell them apart.
|
|
133
|
+
|
|
134
|
+
When the agent activates a skill by name, tie-breaking determines which one is returned:
|
|
135
|
+
|
|
136
|
+
1. **Source-type priority**: local skills take precedence over managed (`.mastra/`) skills, which take precedence over external (`node_modules/`) skills.
|
|
137
|
+
2. **Unresolvable conflicts throw**: if two skills share the same name _and_ the same source type (for example, two local skills both named `brand-guidelines`), `get()` throws an error. Rename one or move it to a different source type to resolve the conflict.
|
|
138
|
+
3. **Path escape hatch**: the agent can pass a skill's full path instead of its name to activate a specific skill, bypassing tie-breaking entirely.
|
|
139
|
+
|
|
140
|
+
```typescript
|
|
141
|
+
const workspace = new Workspace({
|
|
142
|
+
filesystem: new LocalFilesystem({ basePath: './workspace' }),
|
|
143
|
+
skills: [
|
|
144
|
+
'node_modules/@myorg/skills', // external: provides "brand-guidelines"
|
|
145
|
+
'/skills', // local: also provides "brand-guidelines"
|
|
146
|
+
],
|
|
147
|
+
})
|
|
148
|
+
|
|
149
|
+
// get('brand-guidelines') returns the local copy (local > external)
|
|
150
|
+
// get('node_modules/@myorg/skills/brand-guidelines') returns the external copy
|
|
151
|
+
```
|
|
152
|
+
|
|
130
153
|
## Skill search
|
|
131
154
|
|
|
132
155
|
If BM25 or vector search is enabled on the workspace, skills are automatically indexed. Agents can search across skill content to find relevant instructions.
|
|
@@ -140,6 +140,29 @@ To migrate, update processor method names.
|
|
|
140
140
|
> npx @mastra/codemod@latest v1/agent-processor-methods .
|
|
141
141
|
> ```
|
|
142
142
|
|
|
143
|
+
### Zod v3 and v4 structured output schemas remain supported
|
|
144
|
+
|
|
145
|
+
Mastra v1 continues to accept both Zod v3 and Zod v4 schemas in public agent APIs that take structured output schemas. This includes methods such as `agent.generateLegacy()` and `agent.streamLegacy()` and the related option types.
|
|
146
|
+
|
|
147
|
+
If you already pass Zod schemas to agent APIs, no migration is required for Zod version compatibility. Keep your existing schema imports:
|
|
148
|
+
|
|
149
|
+
```ts
|
|
150
|
+
import { z as z3 } from 'zod/v3'
|
|
151
|
+
import { z as z4 } from 'zod/v4'
|
|
152
|
+
|
|
153
|
+
await agent.generateLegacy({
|
|
154
|
+
prompt: 'Summarize this ticket',
|
|
155
|
+
output: z3.object({ summary: z3.string() }),
|
|
156
|
+
})
|
|
157
|
+
|
|
158
|
+
await agent.streamLegacy({
|
|
159
|
+
prompt: 'Extract contact info',
|
|
160
|
+
output: z4.object({ email: z4.string().email() }),
|
|
161
|
+
})
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
Only update your imports if you want to standardize on one Zod version across your application.
|
|
165
|
+
|
|
143
166
|
### Default options method renames for AI SDK versions
|
|
144
167
|
|
|
145
168
|
Default options methods have been renamed to clarify legacy (AI SDK v4) vs new (AI SDK v5+) APIs. This change helps developers understand which AI SDK version they're targeting.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# OpenRouter
|
|
2
2
|
|
|
3
|
-
OpenRouter aggregates models from multiple providers with enhanced features like rate limiting and failover. Access
|
|
3
|
+
OpenRouter aggregates models from multiple providers with enhanced features like rate limiting and failover. Access 164 models through Mastra's model router.
|
|
4
4
|
|
|
5
5
|
Learn more in the [OpenRouter documentation](https://openrouter.ai/models).
|
|
6
6
|
|
|
@@ -13,7 +13,7 @@ const agent = new Agent({
|
|
|
13
13
|
id: "my-agent",
|
|
14
14
|
name: "My Agent",
|
|
15
15
|
instructions: "You are a helpful assistant",
|
|
16
|
-
model: "openrouter/
|
|
16
|
+
model: "openrouter/anthropic/claude-3.5-haiku"
|
|
17
17
|
});
|
|
18
18
|
```
|
|
19
19
|
|
|
@@ -34,7 +34,6 @@ ANTHROPIC_API_KEY=ant-...
|
|
|
34
34
|
|
|
35
35
|
| Model |
|
|
36
36
|
| --------------------------------------------------------------- |
|
|
37
|
-
| `allenai/molmo-2-8b:free` |
|
|
38
37
|
| `anthropic/claude-3.5-haiku` |
|
|
39
38
|
| `anthropic/claude-3.7-sonnet` |
|
|
40
39
|
| `anthropic/claude-haiku-4.5` |
|
|
@@ -53,23 +52,14 @@ ANTHROPIC_API_KEY=ant-...
|
|
|
53
52
|
| `black-forest-labs/flux.2-pro` |
|
|
54
53
|
| `bytedance-seed/seedream-4.5` |
|
|
55
54
|
| `cognitivecomputations/dolphin-mistral-24b-venice-edition:free` |
|
|
56
|
-
| `cognitivecomputations/dolphin3.0-mistral-24b` |
|
|
57
|
-
| `cognitivecomputations/dolphin3.0-r1-mistral-24b` |
|
|
58
55
|
| `deepseek/deepseek-chat-v3-0324` |
|
|
59
56
|
| `deepseek/deepseek-chat-v3.1` |
|
|
60
|
-
| `deepseek/deepseek-r1-0528-qwen3-8b:free` |
|
|
61
|
-
| `deepseek/deepseek-r1-0528:free` |
|
|
62
57
|
| `deepseek/deepseek-r1-distill-llama-70b` |
|
|
63
|
-
| `deepseek/deepseek-r1-distill-qwen-14b` |
|
|
64
|
-
| `deepseek/deepseek-r1:free` |
|
|
65
|
-
| `deepseek/deepseek-v3-base:free` |
|
|
66
58
|
| `deepseek/deepseek-v3.1-terminus` |
|
|
67
59
|
| `deepseek/deepseek-v3.1-terminus:exacto` |
|
|
68
60
|
| `deepseek/deepseek-v3.2` |
|
|
69
61
|
| `deepseek/deepseek-v3.2-speciale` |
|
|
70
|
-
| `featherless/qwerky-72b` |
|
|
71
62
|
| `google/gemini-2.0-flash-001` |
|
|
72
|
-
| `google/gemini-2.0-flash-exp:free` |
|
|
73
63
|
| `google/gemini-2.5-flash` |
|
|
74
64
|
| `google/gemini-2.5-flash-lite` |
|
|
75
65
|
| `google/gemini-2.5-flash-lite-preview-09-2025` |
|
|
@@ -95,15 +85,11 @@ ANTHROPIC_API_KEY=ant-...
|
|
|
95
85
|
| `inception/mercury` |
|
|
96
86
|
| `inception/mercury-2` |
|
|
97
87
|
| `inception/mercury-coder` |
|
|
98
|
-
| `kwaipilot/kat-coder-pro:free` |
|
|
99
88
|
| `liquid/lfm-2.5-1.2b-instruct:free` |
|
|
100
89
|
| `liquid/lfm-2.5-1.2b-thinking:free` |
|
|
101
|
-
| `meta-llama/llama-3.1-405b-instruct:free` |
|
|
102
90
|
| `meta-llama/llama-3.2-11b-vision-instruct` |
|
|
103
91
|
| `meta-llama/llama-3.2-3b-instruct:free` |
|
|
104
92
|
| `meta-llama/llama-3.3-70b-instruct:free` |
|
|
105
|
-
| `meta-llama/llama-4-scout:free` |
|
|
106
|
-
| `microsoft/mai-ds-r1:free` |
|
|
107
93
|
| `minimax/minimax-01` |
|
|
108
94
|
| `minimax/minimax-m1` |
|
|
109
95
|
| `minimax/minimax-m2` |
|
|
@@ -112,30 +98,25 @@ ANTHROPIC_API_KEY=ant-...
|
|
|
112
98
|
| `minimax/minimax-m2.7` |
|
|
113
99
|
| `mistralai/codestral-2508` |
|
|
114
100
|
| `mistralai/devstral-2512` |
|
|
115
|
-
| `mistralai/devstral-2512:free` |
|
|
116
101
|
| `mistralai/devstral-medium-2507` |
|
|
117
102
|
| `mistralai/devstral-small-2505` |
|
|
118
|
-
| `mistralai/devstral-small-2505:free` |
|
|
119
103
|
| `mistralai/devstral-small-2507` |
|
|
120
|
-
| `mistralai/mistral-7b-instruct:free` |
|
|
121
104
|
| `mistralai/mistral-medium-3` |
|
|
122
105
|
| `mistralai/mistral-medium-3.1` |
|
|
123
|
-
| `mistralai/mistral-nemo:free` |
|
|
124
106
|
| `mistralai/mistral-small-3.1-24b-instruct` |
|
|
125
107
|
| `mistralai/mistral-small-3.2-24b-instruct` |
|
|
126
|
-
| `mistralai/mistral-small-3.2-24b-instruct:free` |
|
|
127
|
-
| `moonshotai/kimi-dev-72b:free` |
|
|
128
108
|
| `moonshotai/kimi-k2` |
|
|
129
109
|
| `moonshotai/kimi-k2-0905` |
|
|
130
110
|
| `moonshotai/kimi-k2-0905:exacto` |
|
|
131
111
|
| `moonshotai/kimi-k2-thinking` |
|
|
132
112
|
| `moonshotai/kimi-k2:free` |
|
|
133
113
|
| `moonshotai/kimi-k2.5` |
|
|
134
|
-
| `nousresearch/deephermes-3-llama-3-8b-preview` |
|
|
135
114
|
| `nousresearch/hermes-3-llama-3.1-405b:free` |
|
|
136
115
|
| `nousresearch/hermes-4-405b` |
|
|
137
116
|
| `nousresearch/hermes-4-70b` |
|
|
138
117
|
| `nvidia/nemotron-3-nano-30b-a3b:free` |
|
|
118
|
+
| `nvidia/nemotron-3-super-120b-a12b` |
|
|
119
|
+
| `nvidia/nemotron-3-super-120b-a12b-free` |
|
|
139
120
|
| `nvidia/nemotron-nano-12b-v2-vl:free` |
|
|
140
121
|
| `nvidia/nemotron-nano-9b-v2` |
|
|
141
122
|
| `nvidia/nemotron-nano-9b-v2:free` |
|
|
@@ -170,29 +151,15 @@ ANTHROPIC_API_KEY=ant-...
|
|
|
170
151
|
| `openai/gpt-oss-20b:free` |
|
|
171
152
|
| `openai/gpt-oss-safeguard-20b` |
|
|
172
153
|
| `openai/o4-mini` |
|
|
173
|
-
| `openrouter/aurora-alpha` |
|
|
174
154
|
| `openrouter/free` |
|
|
175
|
-
| `openrouter/healer-alpha` |
|
|
176
|
-
| `openrouter/hunter-alpha` |
|
|
177
|
-
| `openrouter/sherlock-dash-alpha` |
|
|
178
|
-
| `openrouter/sherlock-think-alpha` |
|
|
179
155
|
| `prime-intellect/intellect-3` |
|
|
180
156
|
| `qwen/qwen-2.5-coder-32b-instruct` |
|
|
181
|
-
| `qwen/qwen-2.5-vl-7b-instruct:free` |
|
|
182
|
-
| `qwen/qwen2.5-vl-32b-instruct:free` |
|
|
183
157
|
| `qwen/qwen2.5-vl-72b-instruct` |
|
|
184
|
-
| `qwen/qwen2.5-vl-72b-instruct:free` |
|
|
185
|
-
| `qwen/qwen3-14b:free` |
|
|
186
158
|
| `qwen/qwen3-235b-a22b-07-25` |
|
|
187
|
-
| `qwen/qwen3-235b-a22b-07-25:free` |
|
|
188
159
|
| `qwen/qwen3-235b-a22b-thinking-2507` |
|
|
189
|
-
| `qwen/qwen3-235b-a22b:free` |
|
|
190
160
|
| `qwen/qwen3-30b-a3b-instruct-2507` |
|
|
191
161
|
| `qwen/qwen3-30b-a3b-thinking-2507` |
|
|
192
|
-
| `qwen/qwen3-30b-a3b:free` |
|
|
193
|
-
| `qwen/qwen3-32b:free` |
|
|
194
162
|
| `qwen/qwen3-4b:free` |
|
|
195
|
-
| `qwen/qwen3-8b:free` |
|
|
196
163
|
| `qwen/qwen3-coder` |
|
|
197
164
|
| `qwen/qwen3-coder-30b-a3b-instruct` |
|
|
198
165
|
| `qwen/qwen3-coder-flash` |
|
|
@@ -204,17 +171,11 @@ ANTHROPIC_API_KEY=ant-...
|
|
|
204
171
|
| `qwen/qwen3-next-80b-a3b-thinking` |
|
|
205
172
|
| `qwen/qwen3.5-397b-a17b` |
|
|
206
173
|
| `qwen/qwen3.5-plus-02-15` |
|
|
207
|
-
| `qwen/qwq-32b:free` |
|
|
208
|
-
| `rekaai/reka-flash-3` |
|
|
209
|
-
| `sarvamai/sarvam-m:free` |
|
|
210
174
|
| `sourceful/riverflow-v2-fast-preview` |
|
|
211
175
|
| `sourceful/riverflow-v2-max-preview` |
|
|
212
176
|
| `sourceful/riverflow-v2-standard-preview` |
|
|
213
177
|
| `stepfun/step-3.5-flash` |
|
|
214
178
|
| `stepfun/step-3.5-flash:free` |
|
|
215
|
-
| `thudm/glm-z1-32b:free` |
|
|
216
|
-
| `tngtech/deepseek-r1t2-chimera:free` |
|
|
217
|
-
| `tngtech/tng-r1t-chimera:free` |
|
|
218
179
|
| `x-ai/grok-3` |
|
|
219
180
|
| `x-ai/grok-3-beta` |
|
|
220
181
|
| `x-ai/grok-3-mini` |
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# Vercel
|
|
2
2
|
|
|
3
|
-
Vercel aggregates models from multiple providers with enhanced features like rate limiting and failover. Access
|
|
3
|
+
Vercel aggregates models from multiple providers with enhanced features like rate limiting and failover. Access 224 models through Mastra's model router.
|
|
4
4
|
|
|
5
5
|
Learn more in the [Vercel documentation](https://ai-sdk.dev/providers/ai-sdk-providers).
|
|
6
6
|
|
|
@@ -108,6 +108,7 @@ ANTHROPIC_API_KEY=ant-...
|
|
|
108
108
|
| `google/gemini-3.1-flash-lite-preview` |
|
|
109
109
|
| `google/gemini-3.1-pro-preview` |
|
|
110
110
|
| `google/gemini-embedding-001` |
|
|
111
|
+
| `google/gemini-embedding-2` |
|
|
111
112
|
| `google/imagen-4.0-fast-generate-001` |
|
|
112
113
|
| `google/imagen-4.0-generate-001` |
|
|
113
114
|
| `google/imagen-4.0-ultra-generate-001` |
|
|
@@ -235,13 +236,17 @@ ANTHROPIC_API_KEY=ant-...
|
|
|
235
236
|
| `xai/grok-4-fast-reasoning` |
|
|
236
237
|
| `xai/grok-4.1-fast-non-reasoning` |
|
|
237
238
|
| `xai/grok-4.1-fast-reasoning` |
|
|
239
|
+
| `xai/grok-4.20-multi-agent` |
|
|
238
240
|
| `xai/grok-4.20-multi-agent-beta` |
|
|
241
|
+
| `xai/grok-4.20-non-reasoning` |
|
|
239
242
|
| `xai/grok-4.20-non-reasoning-beta` |
|
|
243
|
+
| `xai/grok-4.20-reasoning` |
|
|
240
244
|
| `xai/grok-4.20-reasoning-beta` |
|
|
241
245
|
| `xai/grok-code-fast-1` |
|
|
242
246
|
| `xai/grok-imagine-image` |
|
|
243
247
|
| `xai/grok-imagine-image-pro` |
|
|
244
248
|
| `xiaomi/mimo-v2-flash` |
|
|
249
|
+
| `xiaomi/mimo-v2-pro` |
|
|
245
250
|
| `zai/glm-4.5` |
|
|
246
251
|
| `zai/glm-4.5-air` |
|
|
247
252
|
| `zai/glm-4.5v` |
|
package/.docs/models/index.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# Model Providers
|
|
2
2
|
|
|
3
|
-
Mastra provides a unified interface for working with LLMs across multiple providers, giving you access to
|
|
3
|
+
Mastra provides a unified interface for working with LLMs across multiple providers, giving you access to 3388 models from 94 providers through a single API.
|
|
4
4
|
|
|
5
5
|
## Features
|
|
6
6
|
|
|
@@ -232,7 +232,11 @@ Your users never experience the disruption - the response comes back with the sa
|
|
|
232
232
|
|
|
233
233
|
Mastra also supports local models like `gpt-oss`, `Qwen3`, `DeepSeek` and many more that you run on your own hardware. The application running your local model needs to provide an OpenAI-compatible API server for Mastra to connect to. We recommend using [LMStudio](https://lmstudio.ai/) (see [Running the LMStudio server](https://lmstudio.ai/docs/developer/core/server)).
|
|
234
234
|
|
|
235
|
-
For
|
|
235
|
+
For custom OpenAI-compatible endpoints, `id` is the routing form that Mastra sends through the model router.
|
|
236
|
+
|
|
237
|
+
Use `provider/model` when the remote behaves like a direct provider and expects a bare model name such as `llama3.2`.
|
|
238
|
+
|
|
239
|
+
Use `gateway/provider/model` when the remote behaves like a model gateway and the upstream model namespace includes the provider, such as `mastra/google/gemini-2.5-flash` or `openrouter/google/gemini-2.5-flash`.
|
|
236
240
|
|
|
237
241
|
For the `url` it's **important** that you use the base URL of the OpenAI-compatible endpoint with Mastra's `model` setting and not the individual chat endpoints.
|
|
238
242
|
|
|
@@ -250,6 +254,22 @@ const agent = new Agent({
|
|
|
250
254
|
})
|
|
251
255
|
```
|
|
252
256
|
|
|
257
|
+
If the remote behaves like a model gateway, include the gateway prefix in `id`:
|
|
258
|
+
|
|
259
|
+
```typescript
|
|
260
|
+
import { Agent } from "@mastra/core/agent";
|
|
261
|
+
|
|
262
|
+
const agent = new Agent({
|
|
263
|
+
id: "my-agent",
|
|
264
|
+
name: "My Agent",
|
|
265
|
+
instructions: "You are a helpful assistant",
|
|
266
|
+
model: {
|
|
267
|
+
id: "mastra/google/gemini-2.5-flash",
|
|
268
|
+
url: "http://your-custom-openai-compatible-endpoint.com/v1"
|
|
269
|
+
}
|
|
270
|
+
})
|
|
271
|
+
```
|
|
272
|
+
|
|
253
273
|
### Example: LMStudio
|
|
254
274
|
|
|
255
275
|
After starting the LMStudio server, the local server is available at `http://localhost:1234` and it provides endpoints like `/v1/models`, `/v1/chat/completions`, etc. The `url` will be `http://localhost:1234/v1`. For the `id` you can use (`lmstudio/${modelId}`) which will be displayed in the LMStudio interface.
|
|
@@ -38,7 +38,7 @@ for await (const chunk of stream) {
|
|
|
38
38
|
| `baseten/deepseek-ai/DeepSeek-V3.1` | 164K | | | | | | $0.50 | $2 |
|
|
39
39
|
| `baseten/MiniMaxAI/MiniMax-M2.5` | 204K | | | | | | $0.30 | $1 |
|
|
40
40
|
| `baseten/moonshotai/Kimi-K2.5` | 262K | | | | | | $0.60 | $3 |
|
|
41
|
-
| `baseten/nvidia/Nemotron-
|
|
41
|
+
| `baseten/nvidia/Nemotron-120B-A12B` | 262K | | | | | | $0.30 | $0.75 |
|
|
42
42
|
| `baseten/openai/gpt-oss-120b` | 128K | | | | | | $0.10 | $0.50 |
|
|
43
43
|
| `baseten/zai-org/GLM-4.6` | 200K | | | | | | $0.60 | $2 |
|
|
44
44
|
| `baseten/zai-org/GLM-4.7` | 205K | | | | | | $0.60 | $2 |
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# Cortecs
|
|
2
2
|
|
|
3
|
-
Access
|
|
3
|
+
Access 28 Cortecs models through Mastra's model router. Authentication is handled automatically using the `CORTECS_API_KEY` environment variable.
|
|
4
4
|
|
|
5
5
|
Learn more in the [Cortecs documentation](https://cortecs.ai).
|
|
6
6
|
|
|
@@ -35,6 +35,10 @@ for await (const chunk of stream) {
|
|
|
35
35
|
| Model | Context | Tools | Reasoning | Image | Audio | Video | Input $/1M | Output $/1M |
|
|
36
36
|
| ---------------------------------------- | ------- | ----- | --------- | ----- | ----- | ----- | ---------- | ----------- |
|
|
37
37
|
| `cortecs/claude-4-5-sonnet` | 200K | | | | | | $3 | $16 |
|
|
38
|
+
| `cortecs/claude-4-6-sonnet` | 1.0M | | | | | | $4 | $18 |
|
|
39
|
+
| `cortecs/claude-haiku-4-5` | 200K | | | | | | $1 | $5 |
|
|
40
|
+
| `cortecs/claude-opus4-5` | 200K | | | | | | $6 | $30 |
|
|
41
|
+
| `cortecs/claude-opus4-6` | 1.0M | | | | | | $6 | $30 |
|
|
38
42
|
| `cortecs/claude-sonnet-4` | 200K | | | | | | $3 | $17 |
|
|
39
43
|
| `cortecs/deepseek-v3-0324` | 128K | | | | | | $0.55 | $2 |
|
|
40
44
|
| `cortecs/devstral-2512` | 262K | | | | | | — | — |
|
|
@@ -53,6 +57,7 @@ for await (const chunk of stream) {
|
|
|
53
57
|
| `cortecs/llama-3.1-405b-instruct` | 128K | | | | | | — | — |
|
|
54
58
|
| `cortecs/minimax-m2` | 400K | | | | | | $0.39 | $2 |
|
|
55
59
|
| `cortecs/minimax-m2.1` | 196K | | | | | | $0.34 | $1 |
|
|
60
|
+
| `cortecs/minimax-m2.5` | 197K | | | | | | $0.32 | $1 |
|
|
56
61
|
| `cortecs/nova-pro-v1` | 300K | | | | | | $1 | $4 |
|
|
57
62
|
| `cortecs/qwen3-32b` | 16K | | | | | | $0.10 | $0.33 |
|
|
58
63
|
| `cortecs/qwen3-coder-480b-a35b-instruct` | 262K | | | | | | $0.44 | $2 |
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# FastRouter
|
|
2
2
|
|
|
3
|
-
Access
|
|
3
|
+
Access 15 FastRouter models through Mastra's model router. Authentication is handled automatically using the `FASTROUTER_API_KEY` environment variable.
|
|
4
4
|
|
|
5
5
|
Learn more in the [FastRouter documentation](https://fastrouter.ai/models).
|
|
6
6
|
|
|
@@ -48,6 +48,7 @@ for await (const chunk of stream) {
|
|
|
48
48
|
| `fastrouter/openai/gpt-oss-20b` | 131K | | | | | | $0.05 | $0.20 |
|
|
49
49
|
| `fastrouter/qwen/qwen3-coder` | 262K | | | | | | $0.30 | $1 |
|
|
50
50
|
| `fastrouter/x-ai/grok-4` | 256K | | | | | | $3 | $15 |
|
|
51
|
+
| `fastrouter/z-ai/glm-5` | 205K | | | | | | $0.95 | $3 |
|
|
51
52
|
|
|
52
53
|
## Advanced configuration
|
|
53
54
|
|
|
@@ -77,7 +78,7 @@ const agent = new Agent({
|
|
|
77
78
|
model: ({ requestContext }) => {
|
|
78
79
|
const useAdvanced = requestContext.task === "complex";
|
|
79
80
|
return useAdvanced
|
|
80
|
-
? "fastrouter/
|
|
81
|
+
? "fastrouter/z-ai/glm-5"
|
|
81
82
|
: "fastrouter/anthropic/claude-opus-4.1";
|
|
82
83
|
}
|
|
83
84
|
});
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# Fireworks AI
|
|
2
2
|
|
|
3
|
-
Access
|
|
3
|
+
Access 14 Fireworks AI models through Mastra's model router. Authentication is handled automatically using the `FIREWORKS_API_KEY` environment variable.
|
|
4
4
|
|
|
5
5
|
Learn more in the [Fireworks AI documentation](https://fireworks.ai/docs/).
|
|
6
6
|
|
|
@@ -47,6 +47,7 @@ for await (const chunk of stream) {
|
|
|
47
47
|
| `fireworks-ai/accounts/fireworks/models/kimi-k2p5` | 256K | | | | | | $0.60 | $3 |
|
|
48
48
|
| `fireworks-ai/accounts/fireworks/models/minimax-m2p1` | 200K | | | | | | $0.30 | $1 |
|
|
49
49
|
| `fireworks-ai/accounts/fireworks/models/minimax-m2p5` | 197K | | | | | | $0.30 | $1 |
|
|
50
|
+
| `fireworks-ai/accounts/fireworks/routers/kimi-k2p5-turbo` | 256K | | | | | | — | — |
|
|
50
51
|
|
|
51
52
|
## Advanced configuration
|
|
52
53
|
|
|
@@ -76,7 +77,7 @@ const agent = new Agent({
|
|
|
76
77
|
model: ({ requestContext }) => {
|
|
77
78
|
const useAdvanced = requestContext.task === "complex";
|
|
78
79
|
return useAdvanced
|
|
79
|
-
? "fireworks-ai/accounts/fireworks/
|
|
80
|
+
? "fireworks-ai/accounts/fireworks/routers/kimi-k2p5-turbo"
|
|
80
81
|
: "fireworks-ai/accounts/fireworks/models/deepseek-v3p1";
|
|
81
82
|
}
|
|
82
83
|
});
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# NanoGPT
|
|
2
2
|
|
|
3
|
-
Access
|
|
3
|
+
Access 517 NanoGPT models through Mastra's model router. Authentication is handled automatically using the `NANO_GPT_API_KEY` environment variable.
|
|
4
4
|
|
|
5
5
|
Learn more in the [NanoGPT documentation](https://docs.nano-gpt.com).
|
|
6
6
|
|
|
@@ -335,6 +335,7 @@ for await (const chunk of stream) {
|
|
|
335
335
|
| `nano-gpt/minimax/minimax-m2-her` | 66K | | | | | | $0.30 | $1 |
|
|
336
336
|
| `nano-gpt/minimax/minimax-m2.1` | 200K | | | | | | $0.33 | $1 |
|
|
337
337
|
| `nano-gpt/minimax/minimax-m2.5` | 205K | | | | | | $0.30 | $1 |
|
|
338
|
+
| `nano-gpt/minimax/minimax-m2.7` | 205K | | | | | | $0.30 | $1 |
|
|
338
339
|
| `nano-gpt/MiniMaxAI/MiniMax-M1-80k` | 1.0M | | | | | | $0.61 | $2 |
|
|
339
340
|
| `nano-gpt/miromind-ai/mirothinker-v1.5-235b` | 33K | | | | | | $0.30 | $1 |
|
|
340
341
|
| `nano-gpt/Mistral-Nemo-12B-Instruct-2407` | 16K | | | | | | $0.01 | $0.01 |
|