@mastra/mcp-docs-server 1.1.39-alpha.8 → 1.1.40-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/.docs/docs/agents/acp.md +238 -0
  2. package/.docs/docs/agents/agent-approval.md +2 -0
  3. package/.docs/docs/agents/background-tasks.md +9 -6
  4. package/.docs/docs/agents/response-caching.md +2 -0
  5. package/.docs/docs/agents/signals.md +29 -3
  6. package/.docs/docs/evals/evals-with-memory.md +146 -0
  7. package/.docs/docs/evals/running-in-ci.md +1 -0
  8. package/.docs/docs/memory/multi-user-threads.md +206 -0
  9. package/.docs/docs/memory/observational-memory.md +53 -17
  10. package/.docs/docs/memory/overview.md +1 -0
  11. package/.docs/docs/memory/working-memory.md +1 -1
  12. package/.docs/models/gateways/netlify.md +2 -1
  13. package/.docs/models/gateways/openrouter.md +2 -1
  14. package/.docs/models/gateways/vercel.md +1 -2
  15. package/.docs/models/index.md +1 -1
  16. package/.docs/models/providers/cloudflare-workers-ai.md +33 -14
  17. package/.docs/models/providers/deepinfra.md +2 -2
  18. package/.docs/models/providers/fireworks-ai.md +23 -22
  19. package/.docs/models/providers/google.md +29 -46
  20. package/.docs/models/providers/llmgateway.md +186 -191
  21. package/.docs/models/providers/opencode.md +3 -2
  22. package/.docs/models/providers/orcarouter.md +2 -2
  23. package/.docs/models/providers/poe.md +2 -1
  24. package/.docs/models/providers/routing-run.md +27 -40
  25. package/.docs/models/providers/scaleway.md +2 -1
  26. package/.docs/models/providers/the-grid-ai.md +15 -9
  27. package/.docs/models/providers/xai.md +11 -18
  28. package/.docs/reference/agents/agent.md +13 -5
  29. package/.docs/reference/agents/channels.md +4 -2
  30. package/.docs/reference/client-js/agents.md +1 -1
  31. package/.docs/reference/configuration.md +1 -1
  32. package/.docs/reference/memory/observational-memory.md +5 -3
  33. package/.docs/reference/server/register-api-route.md +1 -1
  34. package/.docs/reference/storage/convex.md +74 -12
  35. package/.docs/reference/tools/mcp-client.md +27 -2
  36. package/.docs/reference/vectors/convex.md +129 -7
  37. package/CHANGELOG.md +73 -0
  38. package/package.json +6 -6
@@ -0,0 +1,206 @@
1
+ # Multi-user threads
2
+
3
+ A single Mastra thread can be shared by multiple users, each with their own name and functional role. You carry speaker identity in the message body so the agent can tell users apart while reading from a single shared thread.
4
+
5
+ ## When to use multi-user threads
6
+
7
+ Use multi-user threads when several people collaborate on the same subject through one agent:
8
+
9
+ - Collaborative documents with editors, reviewers, and approvers
10
+ - Group chats where one assistant serves many participants
11
+ - Multi-stakeholder reviews where different roles have different authority
12
+
13
+ ## Share one `resourceId` across all participants
14
+
15
+ A thread belongs to exactly one `resourceId`, so all participants on a shared thread need to pass the same value. Instead of using a user id (the default for single-user apps), key `resourceId` on the conversation itself — for example `doc_${docId}` for a shared document, or `room_${roomId}` for a group chat. With everyone pointing at the same `resourceId`, they read and write the same history.
16
+
17
+ ## Tag each user message with the speaker's identity
18
+
19
+ The model needs to know who's talking on every turn. Since the message body is the one place that survives into history and back into context, wrap each user message in a small `<turn>` tag with the speaker's id, name, and role. The tag stays attached to the message, so when prior turns are recalled the model still sees who said what.
20
+
21
+ Build the tag with a small helper. The example below is one way to do it — copy it into your project and adapt it to your shape of user data:
22
+
23
+ ```typescript
24
+ export type Speaker = {
25
+ id: string
26
+ name: string
27
+ role: string
28
+ }
29
+
30
+ function escapeAttr(value: string) {
31
+ return value
32
+ .replace(/&/g, '&amp;')
33
+ .replace(/"/g, '&quot;')
34
+ .replace(/</g, '&lt;')
35
+ .replace(/>/g, '&gt;')
36
+ }
37
+
38
+ export function asUserTurn(speaker: Speaker, text: string) {
39
+ const id = escapeAttr(speaker.id)
40
+ const name = escapeAttr(speaker.name)
41
+ const role = escapeAttr(speaker.role)
42
+ return {
43
+ role: 'user' as const,
44
+ content: `<turn author_id="${id}" author_name="${name}" functional_role="${role}">
45
+ ${text}
46
+ </turn>`,
47
+ }
48
+ }
49
+ ```
50
+
51
+ Teach the agent how to read the `<turn>` tag in its instructions. The agent must have `memory` configured so it can be called with a `thread` and `resource`:
52
+
53
+ ```typescript
54
+ import { Agent } from '@mastra/core/agent'
55
+ import { Memory } from '@mastra/memory'
56
+ import { LibSQLStore } from '@mastra/libsql'
57
+
58
+ const memory = new Memory({
59
+ storage: new LibSQLStore({ url: 'file:./collab.db' }),
60
+ options: {
61
+ lastMessages: 20,
62
+ },
63
+ })
64
+
65
+ export const collabAgent = new Agent({
66
+ id: 'collab',
67
+ name: 'CollabAgent',
68
+ model: 'openai/gpt-5.4-mini',
69
+ memory,
70
+ instructions: `
71
+ You are a collaborative document assistant. Multiple users talk to you in the SAME thread.
72
+
73
+ Every user message is wrapped in a <turn> tag carrying the user's identity:
74
+
75
+ <turn author_id="u_alice" author_name="Alice" functional_role="editor">
76
+ ...message text...
77
+ </turn>
78
+
79
+ Rules:
80
+ 1. Address users by their author_name.
81
+ 2. Respect functional_role: editors propose changes, reviewers approve.
82
+ 3. When attributing past statements, read author_name from the surrounding <turn> tag.
83
+ 4. Do not echo the <turn> tags back at users.
84
+ `.trim(),
85
+ })
86
+ ```
87
+
88
+ Call the agent with the wrapped message. Every participant shares the same `thread` and `resource`:
89
+
90
+ ```typescript
91
+ import { asUserTurn } from './identity'
92
+
93
+ const docResourceId = 'doc_42'
94
+ const docThreadId = 'doc_42'
95
+
96
+ const alice = { id: 'u_alice', name: 'Alice', role: 'editor' }
97
+ const bob = { id: 'u_bob', name: 'Bob', role: 'reviewer' }
98
+
99
+ await collabAgent.generate([asUserTurn(alice, 'My favorite color is teal.')], {
100
+ memory: { thread: docThreadId, resource: docResourceId },
101
+ })
102
+
103
+ await collabAgent.generate([asUserTurn(bob, 'I want QA sign-off before publish.')], {
104
+ memory: { thread: docThreadId, resource: docResourceId },
105
+ })
106
+ ```
107
+
108
+ The `<turn>` tag persists in the message body, so when history is recalled on later turns the model still sees who said what.
109
+
110
+ ## Combining with memory layers
111
+
112
+ The user-tagging pattern composes with every memory layer. Pick the layer based on how long the conversation needs to remember per-user facts:
113
+
114
+ - **Short conversations** (a single session, or a thread small enough to fit in `lastMessages`), or when you need a verbatim record of who said what: use [message history alone](#message-history-alone). The user tags in history are enough; no extra memory layer needed.
115
+ - **Long-running threads** (conversations that outgrow `lastMessages`, where you need per-user facts to survive history eviction): use [observational memory](#with-observational-memory-recommended).
116
+ - **Need a structured participants list, or your storage adapter doesn't support OM** (OM requires LibSQL, PG, or MongoDB): use [working memory](#with-working-memory).
117
+
118
+ We recommend using observational memory or working memory, not both — they cover overlapping needs, and running both at once adds latency and token cost without much benefit.
119
+
120
+ ### Message history alone
121
+
122
+ For short conversations, or when you need a verbatim record of who said what, the user tags in history are enough. `lastMessages` brings prior turns back into context with their attribution intact:
123
+
124
+ ```typescript
125
+ import { Memory } from '@mastra/memory'
126
+ import { LibSQLStore } from '@mastra/libsql'
127
+
128
+ const memory = new Memory({
129
+ storage: new LibSQLStore({ url: 'file:./collab.db' }),
130
+ options: {
131
+ lastMessages: 20,
132
+ },
133
+ })
134
+ ```
135
+
136
+ The model reads identity from the `<turn>` tag on the current message and from prior tagged messages brought back through `lastMessages`.
137
+
138
+ ### With observational memory (recommended)
139
+
140
+ [Observational Memory](https://mastra.ai/docs/memory/observational-memory) (OM) extracts per-user facts into a background log without burning the agent's tool budget. The default Observer model reads `<turn>` tags natively and produces named attribution like `Alice stated her favorite color is teal.` and `Bob asked for QA sign-off before publish.`
141
+
142
+ Prefer OM over working memory for multi-user threads when your storage supports it. OM extracts facts automatically, scales to any number of participants, and doesn't need template upkeep. Enable it with no overrides:
143
+
144
+ ```typescript
145
+ import { Memory } from '@mastra/memory'
146
+ import { LibSQLStore } from '@mastra/libsql'
147
+
148
+ const memory = new Memory({
149
+ storage: new LibSQLStore({ url: 'file:./collab.db' }),
150
+ options: {
151
+ lastMessages: 20,
152
+ observationalMemory: true,
153
+ },
154
+ })
155
+ ```
156
+
157
+ OM requires a storage adapter that supports it: `@mastra/libsql`, `@mastra/pg`, or `@mastra/mongodb`.
158
+
159
+ > **Note:** If you switch the Observer to a weaker model and see facts collapse to a generic `User`, use [`observation.instruction`](https://mastra.ai/reference/memory/observational-memory) to teach the Observer how to read the `<turn>` tag.
160
+
161
+ ### With working memory
162
+
163
+ Use working memory when OM isn't an option — for example, when your storage adapter doesn't support OM, or when you need a structured, deterministic participants list the agent can read and write on every turn.
164
+
165
+ The default [working memory](https://mastra.ai/docs/memory/working-memory) template assumes one user per thread ("First Name", "Last Name", etc.). For multi-user threads, provide a template with a participants list:
166
+
167
+ ```typescript
168
+ import { Memory } from '@mastra/memory'
169
+ import { LibSQLStore } from '@mastra/libsql'
170
+
171
+ const memory = new Memory({
172
+ storage: new LibSQLStore({ url: 'file:./collab.db' }),
173
+ options: {
174
+ lastMessages: 20,
175
+ workingMemory: {
176
+ enabled: true,
177
+ scope: 'thread',
178
+ template: `# Document Collaboration State
179
+
180
+ ## Participants
181
+ <!-- One entry per known collaborator. Use author_id as the stable key. -->
182
+ <!-- - **<author_name>** (<author_id>, <functional_role>): <their position> -->
183
+
184
+ ## Open Questions
185
+
186
+ ## Decisions
187
+ `,
188
+ },
189
+ },
190
+ })
191
+ ```
192
+
193
+ Set `scope: 'thread'` so the participants list belongs to the document, not to any individual user. Add one instruction telling the agent to append new participants to the list whenever a new `author_id` shows up in a `<turn>`.
194
+
195
+ For more on templates, see [Custom templates](https://mastra.ai/docs/memory/working-memory).
196
+
197
+ ## Security
198
+
199
+ Set the `speaker` from your authenticated request context, never from the request body. If a client can choose its own `author_id`, one user can impersonate another. Use [Request Context](https://mastra.ai/docs/server/request-context) to read the verified user from your auth layer and build the `<turn>` tag on the server before calling the agent.
200
+
201
+ ## Related
202
+
203
+ - [Working memory](https://mastra.ai/docs/memory/working-memory)
204
+ - [Observational memory](https://mastra.ai/docs/memory/observational-memory)
205
+ - [Share memory between agents](https://mastra.ai/docs/memory/overview)
206
+ - [`Memory` reference](https://mastra.ai/reference/memory/memory-class)
@@ -88,7 +88,7 @@ const memory = new Memory({
88
88
  options: {
89
89
  observationalMemory: {
90
90
  model: 'google/gemini-2.5-flash',
91
- activateAfterIdle: '5m',
91
+ activateAfterIdle: 'auto',
92
92
  activateOnProviderChange: true,
93
93
  },
94
94
  },
@@ -144,6 +144,28 @@ OM uses fast local token estimation for this thresholding work. Text is estimate
144
144
 
145
145
  The Observer can also see attachments in the history it reviews. OM keeps readable placeholders like `[Image #1: reference-board.png]` or `[File #1: floorplan.pdf]` in the transcript for readability, and forwards the actual attachment parts alongside the text. Image-like `file` parts are upgraded to image inputs for the Observer when possible, while non-image attachments are forwarded as file parts with normalized token counting. This applies to both normal thread observation and batched resource-scope observation.
146
146
 
147
+ If your Observer model is text-only or its API rejects multimodal input, set `observation.observeAttachments` to `false` to drop attachments before they reach the Observer. The readable placeholders (`[Image #1: ...]`, `[File #1: ...]`) are kept in the transcript so the Observer can still reason about what was shared without receiving the binary payload. The same filter applies to tool results that contain image or file parts:
148
+
149
+ ```typescript
150
+ new Agent({
151
+ name: 'assistant',
152
+ instructions: 'You are a helpful assistant.',
153
+ model: 'openai/gpt-5-mini',
154
+ memory: new Memory({
155
+ options: {
156
+ observationalMemory: {
157
+ observation: {
158
+ model: 'deepseek/deepseek-reasoner',
159
+ observeAttachments: false,
160
+ },
161
+ },
162
+ },
163
+ }),
164
+ })
165
+ ```
166
+
167
+ You can also pass an allowlist of mimeType globs (for example `['image/*']`) to forward only the kinds the Observer can handle.
168
+
147
169
  ```md
148
170
  Date: 2026-01-15
149
171
 
@@ -444,35 +466,48 @@ Reflection works similarly — the Reflector runs in the background when observa
444
466
 
445
467
  ### Settings
446
468
 
447
- | Setting | Default | What it controls |
448
- | ------------------------------------- | ------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
449
- | `observation.bufferTokens` | `0.2` | How often to buffer. `0.2` means every 20% of `messageTokens` — with the default 30k threshold, that's roughly every 6k tokens. Can also be an absolute token count (e.g. `5000`). |
450
- | `observation.bufferActivation` | `0.8` | How aggressively to clear the message window on activation. `0.8` means remove enough messages to keep only 20% of `messageTokens` remaining. Lower values keep more message history. |
451
- | `observation.blockAfter` | `1.2` | Safety threshold as a multiplier of `messageTokens`. At `1.2`, synchronous observation is forced at 36k tokens (1.2 × 30k). Only matters if buffering can't keep up. |
452
- | `activateAfterIdle` | none | Forces buffered observations to activate after a period of inactivity, even before `observation.messageTokens` is reached. Accepts a numeric millisecond value such as `300_000`, or duration strings like `"5m"` or `"1hr"`. Set this to your prompt cache TTL if you want activation to happen before the next cold prompt. |
453
- | `activateOnProviderChange` | `false` | Forces buffered observations to activate when the next step uses a different `provider/model` than the one that produced the latest assistant step. Use this when switching providers or models would invalidate prompt cache reuse. |
454
- | `reflection.bufferActivation` | `0.5` | When to start background reflection. `0.5` means reflection begins when observations reach 50% of the `observationTokens` threshold. |
455
- | `reflection.activateAfterIdle` | none | Opts buffered reflections into idle activation. Reflections don't inherit top-level `activateAfterIdle`. |
456
- | `reflection.activateOnProviderChange` | `false` | Opts buffered reflections into provider-change activation. Reflections don't inherit top-level `activateOnProviderChange`. |
457
- | `reflection.blockAfter` | `1.2` | Safety threshold for reflection, same logic as observation. |
458
-
459
- If you're relying on prompt caching, set `activateAfterIdle` to match your cache TTL. That way, once a thread has been idle long enough for the cache to expire, the next request can activate buffered observations first and send a smaller compressed context window.
469
+ | Setting | Default | What it controls |
470
+ | ------------------------------------- | ------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
471
+ | `observation.bufferTokens` | `0.2` | How often to buffer. `0.2` means every 20% of `messageTokens` — with the default 30k threshold, that's roughly every 6k tokens. Can also be an absolute token count (e.g. `5000`). |
472
+ | `observation.bufferActivation` | `0.8` | How aggressively to clear the message window on activation. `0.8` means remove enough messages to keep only 20% of `messageTokens` remaining. Lower values keep more message history. |
473
+ | `observation.blockAfter` | `1.2` | Safety threshold as a multiplier of `messageTokens`. At `1.2`, synchronous observation is forced at 36k tokens (1.2 × 30k). Only matters if buffering can't keep up. |
474
+ | `activateAfterIdle` | none | Forces buffered observations to activate after a period of inactivity, even before `observation.messageTokens` is reached. Accepts a numeric millisecond value such as `300_000`, duration strings like `"5m"` or `"1hr"`, or `"auto"` for a provider-aware prompt cache TTL. |
475
+ | `activateOnProviderChange` | `false` | Forces buffered observations to activate when the next step uses a different `provider/model` than the one that produced the latest assistant step. Use this when switching providers or models would invalidate prompt cache reuse. |
476
+ | `reflection.bufferActivation` | `0.5` | When to start background reflection. `0.5` means reflection begins when observations reach 50% of the `observationTokens` threshold. |
477
+ | `reflection.activateAfterIdle` | none | Opts buffered reflections into idle activation. Reflections don't inherit top-level `activateAfterIdle`. |
478
+ | `reflection.activateOnProviderChange` | `false` | Opts buffered reflections into provider-change activation. Reflections don't inherit top-level `activateOnProviderChange`. |
479
+ | `reflection.blockAfter` | `1.2` | Safety threshold for reflection, same logic as observation. |
480
+
481
+ If you're relying on prompt caching, set `activateAfterIdle` to `"auto"` or to a specific cache TTL. That way, once a thread has been idle long enough for the cache to expire, the next request can activate buffered observations first and send a smaller compressed context window.
482
+
483
+ With `"auto"`, Mastra chooses an idle activation TTL from the active model provider:
484
+
485
+ | Provider | Auto TTL |
486
+ | --------------------------------------------------------------------------------------- | --------- |
487
+ | Anthropic, OpenRouter, unknown providers, xAI | 5 minutes |
488
+ | DeepSeek | 1 hour |
489
+ | Google Gemini | 24 hours |
490
+ | Groq | 2 hours |
491
+ | OpenAI with `providerOptions.openai.promptCacheRetention: "24h"` | 1 hour |
492
+ | OpenAI with `providerOptions.openai.promptCacheRetention: "in_memory"` | 5 minutes |
493
+ | OpenAI `gpt-4*`, `gpt-5`, `gpt-5-*`, `gpt-5.1*`, `gpt-5.2*`, `gpt-5.3*`, and `gpt-5.4*` | 5 minutes |
494
+ | Other OpenAI models | 1 hour |
460
495
 
461
496
  ```typescript
462
497
  const memory = new Memory({
463
498
  options: {
464
499
  observationalMemory: {
465
500
  model: 'google/gemini-2.5-flash',
466
- activateAfterIdle: '5m',
501
+ activateAfterIdle: 'auto',
467
502
  activateOnProviderChange: true,
468
503
  },
469
504
  },
470
505
  })
471
506
  ```
472
507
 
473
- With a 5-minute prompt cache TTL, this activates buffered observations after 5 minutes of inactivity so the next uncached prompt uses compressed observations instead of a larger raw message window. If you prefer, `300_000` works the same way.
508
+ With `"auto"`, this activates buffered observations based on the active provider's prompt cache behavior so the next uncached prompt uses compressed observations instead of a larger raw message window. If you prefer a fixed 5-minute TTL, use `"5m"` or `300_000`.
474
509
 
475
- Changing model or providers mid-thread will invalidate the prompt cache. If your agent can switch between providers or models mid-thread, `activateOnProviderChange: true` forces buffered observations to activate before the new provider runs. That avoids sending a large raw window to a provider that can't reuse the previous prompt cache.
510
+ Changing models or providers mid-thread will invalidate the prompt cache. If your agent can switch between providers or models mid-thread, `activateOnProviderChange: true` forces buffered observations to activate before the new provider runs. That avoids sending a large raw window to a provider that can't reuse the previous prompt cache.
476
511
 
477
512
  ### Disabling
478
513
 
@@ -530,6 +565,7 @@ No manual migration needed. OM reads existing messages and observes them lazily
530
565
  - **[Message history](https://mastra.ai/docs/memory/message-history)**: High-fidelity record of the current conversation
531
566
  - **[Working memory](https://mastra.ai/docs/memory/working-memory)**: Small, structured state (JSON or markdown) for user preferences, names, goals
532
567
  - **[Semantic Recall](https://mastra.ai/docs/memory/semantic-recall)**: RAG-based retrieval of relevant past messages
568
+ - **[Multi-user threads](https://mastra.ai/docs/memory/multi-user-threads)**: How OM attributes facts to individual users when several people share a single thread
533
569
 
534
570
  If you're using working memory to store conversation summaries or ongoing state that grows over time, OM is a better fit. Working memory is for small, structured data; OM is for long-running event logs. OM also manages message history automatically—the `messageTokens` setting controls how much raw history remains before observation runs.
535
571
 
@@ -7,6 +7,7 @@ Mastra agents can be configured to store [message history](https://mastra.ai/doc
7
7
  - [Observational Memory](https://mastra.ai/docs/memory/observational-memory) (Recommended): Uses background agents to maintain a dense observation log that replaces raw message history as it grows. This keeps the context window small while preserving long-term memory.
8
8
  - [Working memory](https://mastra.ai/docs/memory/working-memory): Stores persistent, structured user data such as names, preferences, and goals.
9
9
  - [Semantic recall](https://mastra.ai/docs/memory/semantic-recall): Retrieves relevant past messages based on semantic meaning rather than exact keywords.
10
+ - [Multi-user threads](https://mastra.ai/docs/memory/multi-user-threads): Share one thread between multiple users.
10
11
 
11
12
  If the combined memory exceeds the model's context limit, [memory processors](https://mastra.ai/docs/memory/memory-processors) can filter, trim, or prioritize content so the most relevant information is preserved.
12
13
 
@@ -130,7 +130,7 @@ Resource-scoped working memory requires specific storage adapters that support t
130
130
 
131
131
  ## Custom templates
132
132
 
133
- Templates guide the agent on what information to track and update in working memory. While a default template is used if none is provided, you'll typically want to define a custom template tailored to your agent's specific use case to ensure it remembers the most relevant information.
133
+ Templates guide the agent on what information to track and update in working memory. While a default template is used if none is provided, you'll typically want to define a custom template tailored to your agent's specific use case to ensure it remembers the most relevant information. For threads shared by multiple users, see [Multi-user threads](https://mastra.ai/docs/memory/multi-user-threads).
134
134
 
135
135
  Here's an example of a custom template. In this example the agent will store the users name, location, timezone, etc as soon as the user sends a message containing any of the info:
136
136
 
@@ -1,6 +1,6 @@
1
1
  # Netlify
2
2
 
3
- Netlify AI Gateway provides unified access to multiple providers with built-in caching and observability. Access 68 models through Mastra's model router.
3
+ Netlify AI Gateway provides unified access to multiple providers with built-in caching and observability. Access 69 models through Mastra's model router.
4
4
 
5
5
  Learn more in the [Netlify documentation](https://docs.netlify.com/build/ai-gateway/overview/).
6
6
 
@@ -61,6 +61,7 @@ ANTHROPIC_API_KEY=ant-...
61
61
  | `gemini/gemini-3.1-flash-lite-preview` |
62
62
  | `gemini/gemini-3.1-pro-preview` |
63
63
  | `gemini/gemini-3.1-pro-preview-customtools` |
64
+ | `gemini/gemini-3.5-flash` |
64
65
  | `gemini/gemini-flash-latest` |
65
66
  | `gemini/gemini-flash-lite-latest` |
66
67
  | `openai/chat-latest` |
@@ -1,6 +1,6 @@
1
1
  # ![OpenRouter logo](https://models.dev/logos/openrouter.svg)OpenRouter
2
2
 
3
- OpenRouter aggregates models from multiple providers with enhanced features like rate limiting and failover. Access 356 models through Mastra's model router.
3
+ OpenRouter aggregates models from multiple providers with enhanced features like rate limiting and failover. Access 357 models through Mastra's model router.
4
4
 
5
5
  Learn more in the [OpenRouter documentation](https://openrouter.ai/models).
6
6
 
@@ -126,6 +126,7 @@ ANTHROPIC_API_KEY=ant-...
126
126
  | `google/gemini-3.1-flash-lite-preview` |
127
127
  | `google/gemini-3.1-pro-preview` |
128
128
  | `google/gemini-3.1-pro-preview-customtools` |
129
+ | `google/gemini-3.5-flash` |
129
130
  | `google/gemma-2-27b-it` |
130
131
  | `google/gemma-3-12b-it` |
131
132
  | `google/gemma-3-27b-it` |
@@ -1,6 +1,6 @@
1
1
  # ![Vercel logo](https://models.dev/logos/vercel.svg)Vercel
2
2
 
3
- Vercel aggregates models from multiple providers with enhanced features like rate limiting and failover. Access 240 models through Mastra's model router.
3
+ Vercel aggregates models from multiple providers with enhanced features like rate limiting and failover. Access 239 models through Mastra's model router.
4
4
 
5
5
  Learn more in the [Vercel documentation](https://ai-sdk.dev/providers/ai-sdk-providers).
6
6
 
@@ -244,7 +244,6 @@ ANTHROPIC_API_KEY=ant-...
244
244
  | `voyage/voyage-code-3` |
245
245
  | `voyage/voyage-finance-2` |
246
246
  | `voyage/voyage-law-2` |
247
- | `xai/grok-2-vision` |
248
247
  | `xai/grok-4-fast-reasoning` |
249
248
  | `xai/grok-4.1-fast-non-reasoning` |
250
249
  | `xai/grok-4.1-fast-reasoning` |
@@ -1,6 +1,6 @@
1
1
  # Model Providers
2
2
 
3
- Mastra provides a unified interface for working with LLMs across multiple providers, giving you access to 4219 models from 121 providers through a single API.
3
+ Mastra provides a unified interface for working with LLMs across multiple providers, giving you access to 4207 models from 121 providers through a single API.
4
4
 
5
5
  ## Features
6
6
 
@@ -1,6 +1,6 @@
1
1
  # ![Cloudflare Workers AI logo](https://models.dev/logos/cloudflare-workers-ai.svg)Cloudflare Workers AI
2
2
 
3
- Access 8 Cloudflare Workers AI models through Mastra's model router. Authentication is handled automatically using the `CLOUDFLARE_API_KEY` environment variable. Configure `CLOUDFLARE_ACCOUNT_ID` as well.
3
+ Access 27 Cloudflare Workers AI models through Mastra's model router. Authentication is handled automatically using the `CLOUDFLARE_API_KEY` environment variable. Configure `CLOUDFLARE_ACCOUNT_ID` as well.
4
4
 
5
5
  Learn more in the [Cloudflare Workers AI documentation](https://developers.cloudflare.com/workers-ai/models/).
6
6
 
@@ -16,7 +16,7 @@ const agent = new Agent({
16
16
  id: "my-agent",
17
17
  name: "My Agent",
18
18
  instructions: "You are a helpful assistant",
19
- model: "cloudflare-workers-ai/@cf/google/gemma-4-26b-a4b-it"
19
+ model: "cloudflare-workers-ai/@cf/aisingapore/gemma-sea-lion-v4-27b-it"
20
20
  });
21
21
 
22
22
  // Generate a response
@@ -33,16 +33,35 @@ for await (const chunk of stream) {
33
33
 
34
34
  ## Models
35
35
 
36
- | Model | Context | Tools | Reasoning | Image | Audio | Video | Input $/1M | Output $/1M |
37
- | --------------------------------------------------------------- | ------- | ----- | --------- | ----- | ----- | ----- | ---------- | ----------- |
38
- | `cloudflare-workers-ai/@cf/google/gemma-4-26b-a4b-it` | 256K | | | | | | $0.10 | $0.30 |
39
- | `cloudflare-workers-ai/@cf/meta/llama-4-scout-17b-16e-instruct` | 128K | | | | | | $0.27 | $0.85 |
40
- | `cloudflare-workers-ai/@cf/moonshotai/kimi-k2.5` | 256K | | | | | | $0.60 | $3 |
41
- | `cloudflare-workers-ai/@cf/moonshotai/kimi-k2.6` | 256K | | | | | | $0.95 | $4 |
42
- | `cloudflare-workers-ai/@cf/nvidia/nemotron-3-120b-a12b` | 256K | | | | | | $0.50 | $2 |
43
- | `cloudflare-workers-ai/@cf/openai/gpt-oss-120b` | 128K | | | | | | $0.35 | $0.75 |
44
- | `cloudflare-workers-ai/@cf/openai/gpt-oss-20b` | 128K | | | | | | $0.20 | $0.30 |
45
- | `cloudflare-workers-ai/@cf/zai-org/glm-4.7-flash` | 131K | | | | | | $0.06 | $0.40 |
36
+ | Model | Context | Tools | Reasoning | Image | Audio | Video | Input $/1M | Output $/1M |
37
+ | -------------------------------------------------------------------- | ------- | ----- | --------- | ----- | ----- | ----- | ---------- | ----------- |
38
+ | `cloudflare-workers-ai/@cf/aisingapore/gemma-sea-lion-v4-27b-it` | 128K | | | | | | $0.35 | $0.56 |
39
+ | `cloudflare-workers-ai/@cf/deepseek-ai/deepseek-r1-distill-qwen-32b` | 80K | | | | | | $0.50 | $5 |
40
+ | `cloudflare-workers-ai/@cf/google/gemma-3-12b-it` | 80K | | | | | | $0.34 | $0.56 |
41
+ | `cloudflare-workers-ai/@cf/google/gemma-4-26b-a4b-it` | 256K | | | | | | $0.10 | $0.30 |
42
+ | `cloudflare-workers-ai/@cf/ibm-granite/granite-4.0-h-micro` | 131K | | | | | | $0.02 | $0.11 |
43
+ | `cloudflare-workers-ai/@cf/meta/llama-2-7b-chat-fp16` | 4K | | | | | | $0.56 | $7 |
44
+ | `cloudflare-workers-ai/@cf/meta/llama-3-8b-instruct` | 8K | | | | | | $0.28 | $0.83 |
45
+ | `cloudflare-workers-ai/@cf/meta/llama-3-8b-instruct-awq` | 8K | | | | | | $0.12 | $0.27 |
46
+ | `cloudflare-workers-ai/@cf/meta/llama-3.1-8b-instruct-awq` | 8K | | | | | | $0.12 | $0.27 |
47
+ | `cloudflare-workers-ai/@cf/meta/llama-3.1-8b-instruct-fp8` | 32K | | | | | | $0.15 | $0.29 |
48
+ | `cloudflare-workers-ai/@cf/meta/llama-3.2-11b-vision-instruct` | 128K | | | | | | $0.05 | $0.68 |
49
+ | `cloudflare-workers-ai/@cf/meta/llama-3.2-1b-instruct` | 60K | | | | | | $0.03 | $0.20 |
50
+ | `cloudflare-workers-ai/@cf/meta/llama-3.2-3b-instruct` | 80K | | | | | | $0.05 | $0.34 |
51
+ | `cloudflare-workers-ai/@cf/meta/llama-3.3-70b-instruct-fp8-fast` | 24K | | | | | | $0.29 | $2 |
52
+ | `cloudflare-workers-ai/@cf/meta/llama-4-scout-17b-16e-instruct` | 131K | | | | | | $0.27 | $0.85 |
53
+ | `cloudflare-workers-ai/@cf/meta/llama-guard-3-8b` | 131K | | | | | | $0.48 | $0.03 |
54
+ | `cloudflare-workers-ai/@cf/mistral/mistral-7b-instruct-v0.1` | 3K | | | | | | $0.11 | $0.19 |
55
+ | `cloudflare-workers-ai/@cf/mistralai/mistral-small-3.1-24b-instruct` | 128K | | | | | | $0.35 | $0.56 |
56
+ | `cloudflare-workers-ai/@cf/moonshotai/kimi-k2.5` | 256K | | | | | | $0.60 | $3 |
57
+ | `cloudflare-workers-ai/@cf/moonshotai/kimi-k2.6` | 262K | | | | | | $0.95 | $4 |
58
+ | `cloudflare-workers-ai/@cf/nvidia/nemotron-3-120b-a12b` | 256K | | | | | | $0.50 | $2 |
59
+ | `cloudflare-workers-ai/@cf/openai/gpt-oss-120b` | 128K | | | | | | $0.35 | $0.75 |
60
+ | `cloudflare-workers-ai/@cf/openai/gpt-oss-20b` | 128K | | | | | | $0.20 | $0.30 |
61
+ | `cloudflare-workers-ai/@cf/qwen/qwen2.5-coder-32b-instruct` | 33K | | | | | | $0.66 | $1 |
62
+ | `cloudflare-workers-ai/@cf/qwen/qwen3-30b-a3b-fp8` | 33K | | | | | | $0.05 | $0.34 |
63
+ | `cloudflare-workers-ai/@cf/qwen/qwq-32b` | 24K | | | | | | $0.66 | $1 |
64
+ | `cloudflare-workers-ai/@cf/zai-org/glm-4.7-flash` | 131K | | | | | | $0.06 | $0.40 |
46
65
 
47
66
  ## Advanced configuration
48
67
 
@@ -54,7 +73,7 @@ const agent = new Agent({
54
73
  name: "custom-agent",
55
74
  model: {
56
75
  url: "https://api.cloudflare.com/client/v4/accounts/${CLOUDFLARE_ACCOUNT_ID}/ai/v1",
57
- id: "cloudflare-workers-ai/@cf/google/gemma-4-26b-a4b-it",
76
+ id: "cloudflare-workers-ai/@cf/aisingapore/gemma-sea-lion-v4-27b-it",
58
77
  apiKey: process.env.CLOUDFLARE_API_KEY,
59
78
  headers: {
60
79
  "X-Custom-Header": "value"
@@ -73,7 +92,7 @@ const agent = new Agent({
73
92
  const useAdvanced = requestContext.task === "complex";
74
93
  return useAdvanced
75
94
  ? "cloudflare-workers-ai/@cf/zai-org/glm-4.7-flash"
76
- : "cloudflare-workers-ai/@cf/google/gemma-4-26b-a4b-it";
95
+ : "cloudflare-workers-ai/@cf/aisingapore/gemma-sea-lion-v4-27b-it";
77
96
  }
78
97
  });
79
98
  ```
@@ -38,8 +38,8 @@ for await (const chunk of stream) {
38
38
  | `deepinfra/deepseek-ai/DeepSeek-V3.2` | 164K | | | | | | $0.26 | $0.38 |
39
39
  | `deepinfra/deepseek-ai/DeepSeek-V4-Flash` | 1.0M | | | | | | $0.14 | $0.28 |
40
40
  | `deepinfra/deepseek-ai/DeepSeek-V4-Pro` | 66K | | | | | | $2 | $3 |
41
- | `deepinfra/google/gemma-4-26B-A4B-it` | 256K | | | | | | $0.07 | $0.34 |
42
- | `deepinfra/google/gemma-4-31B-it` | 256K | | | | | | $0.13 | $0.38 |
41
+ | `deepinfra/google/gemma-4-26B-A4B-it` | 262K | | | | | | $0.07 | $0.34 |
42
+ | `deepinfra/google/gemma-4-31B-it` | 262K | | | | | | $0.13 | $0.38 |
43
43
  | `deepinfra/meta-llama/Llama-3.1-70B-Instruct` | 131K | | | | | | $0.40 | $0.40 |
44
44
  | `deepinfra/meta-llama/Llama-3.1-70B-Instruct-Turbo` | 131K | | | | | | $0.40 | $0.40 |
45
45
  | `deepinfra/meta-llama/Llama-3.1-8B-Instruct` | 131K | | | | | | $0.02 | $0.05 |
@@ -1,6 +1,6 @@
1
1
  # ![Fireworks AI logo](https://models.dev/logos/fireworks-ai.svg)Fireworks AI
2
2
 
3
- Access 19 Fireworks AI models through Mastra's model router. Authentication is handled automatically using the `FIREWORKS_API_KEY` environment variable.
3
+ Access 20 Fireworks AI models through Mastra's model router. Authentication is handled automatically using the `FIREWORKS_API_KEY` environment variable.
4
4
 
5
5
  Learn more in the [Fireworks AI documentation](https://fireworks.ai/docs/).
6
6
 
@@ -32,27 +32,28 @@ for await (const chunk of stream) {
32
32
 
33
33
  ## Models
34
34
 
35
- | Model | Context | Tools | Reasoning | Image | Audio | Video | Input $/1M | Output $/1M |
36
- | --------------------------------------------------------- | ------- | ----- | --------- | ----- | ----- | ----- | ---------- | ----------- |
37
- | `fireworks-ai/accounts/fireworks/models/deepseek-v3p1` | 164K | | | | | | $0.56 | $2 |
38
- | `fireworks-ai/accounts/fireworks/models/deepseek-v3p2` | 160K | | | | | | $0.56 | $2 |
39
- | `fireworks-ai/accounts/fireworks/models/deepseek-v4-pro` | 1.0M | | | | | | $2 | $3 |
40
- | `fireworks-ai/accounts/fireworks/models/glm-4p5` | 131K | | | | | | $0.55 | $2 |
41
- | `fireworks-ai/accounts/fireworks/models/glm-4p5-air` | 131K | | | | | | $0.22 | $0.88 |
42
- | `fireworks-ai/accounts/fireworks/models/glm-4p7` | 198K | | | | | | $0.60 | $2 |
43
- | `fireworks-ai/accounts/fireworks/models/glm-5` | 203K | | | | | | $1 | $3 |
44
- | `fireworks-ai/accounts/fireworks/models/glm-5p1` | 203K | | | | | | $1 | $4 |
45
- | `fireworks-ai/accounts/fireworks/models/gpt-oss-120b` | 131K | | | | | | $0.15 | $0.60 |
46
- | `fireworks-ai/accounts/fireworks/models/gpt-oss-20b` | 131K | | | | | | $0.05 | $0.20 |
47
- | `fireworks-ai/accounts/fireworks/models/kimi-k2-instruct` | 128K | | | | | | $1 | $3 |
48
- | `fireworks-ai/accounts/fireworks/models/kimi-k2-thinking` | 256K | | | | | | $0.60 | $3 |
49
- | `fireworks-ai/accounts/fireworks/models/kimi-k2p5` | 256K | | | | | | $0.60 | $3 |
50
- | `fireworks-ai/accounts/fireworks/models/kimi-k2p6` | 262K | | | | | | $0.95 | $4 |
51
- | `fireworks-ai/accounts/fireworks/models/minimax-m2p1` | 200K | | | | | | $0.30 | $1 |
52
- | `fireworks-ai/accounts/fireworks/models/minimax-m2p5` | 197K | | | | | | $0.30 | $1 |
53
- | `fireworks-ai/accounts/fireworks/models/minimax-m2p7` | 197K | | | | | | $0.30 | $1 |
54
- | `fireworks-ai/accounts/fireworks/models/qwen3p6-plus` | 128K | | | | | | $0.50 | $3 |
55
- | `fireworks-ai/accounts/fireworks/routers/kimi-k2p5-turbo` | 256K | | | | | | | |
35
+ | Model | Context | Tools | Reasoning | Image | Audio | Video | Input $/1M | Output $/1M |
36
+ | ---------------------------------------------------------- | ------- | ----- | --------- | ----- | ----- | ----- | ---------- | ----------- |
37
+ | `fireworks-ai/accounts/fireworks/models/deepseek-v3p1` | 164K | | | | | | $0.56 | $2 |
38
+ | `fireworks-ai/accounts/fireworks/models/deepseek-v3p2` | 160K | | | | | | $0.56 | $2 |
39
+ | `fireworks-ai/accounts/fireworks/models/deepseek-v4-flash` | 1.0M | | | | | | $0.14 | $0.28 |
40
+ | `fireworks-ai/accounts/fireworks/models/deepseek-v4-pro` | 1.0M | | | | | | $2 | $3 |
41
+ | `fireworks-ai/accounts/fireworks/models/glm-4p5` | 131K | | | | | | $0.55 | $2 |
42
+ | `fireworks-ai/accounts/fireworks/models/glm-4p5-air` | 131K | | | | | | $0.22 | $0.88 |
43
+ | `fireworks-ai/accounts/fireworks/models/glm-4p7` | 198K | | | | | | $0.60 | $2 |
44
+ | `fireworks-ai/accounts/fireworks/models/glm-5` | 203K | | | | | | $1 | $3 |
45
+ | `fireworks-ai/accounts/fireworks/models/glm-5p1` | 203K | | | | | | $1 | $4 |
46
+ | `fireworks-ai/accounts/fireworks/models/gpt-oss-120b` | 131K | | | | | | $0.15 | $0.60 |
47
+ | `fireworks-ai/accounts/fireworks/models/gpt-oss-20b` | 131K | | | | | | $0.05 | $0.20 |
48
+ | `fireworks-ai/accounts/fireworks/models/kimi-k2-instruct` | 128K | | | | | | $1 | $3 |
49
+ | `fireworks-ai/accounts/fireworks/models/kimi-k2-thinking` | 256K | | | | | | $0.60 | $3 |
50
+ | `fireworks-ai/accounts/fireworks/models/kimi-k2p5` | 256K | | | | | | $0.60 | $3 |
51
+ | `fireworks-ai/accounts/fireworks/models/kimi-k2p6` | 262K | | | | | | $0.95 | $4 |
52
+ | `fireworks-ai/accounts/fireworks/models/minimax-m2p1` | 200K | | | | | | $0.30 | $1 |
53
+ | `fireworks-ai/accounts/fireworks/models/minimax-m2p5` | 197K | | | | | | $0.30 | $1 |
54
+ | `fireworks-ai/accounts/fireworks/models/minimax-m2p7` | 197K | | | | | | $0.30 | $1 |
55
+ | `fireworks-ai/accounts/fireworks/models/qwen3p6-plus` | 128K | | | | | | $0.50 | $3 |
56
+ | `fireworks-ai/accounts/fireworks/routers/kimi-k2p5-turbo` | 256K | | | | | | — | — |
56
57
 
57
58
  ## Advanced configuration
58
59