@mastra/mcp-docs-server 1.1.34 → 1.1.35-alpha.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.docs/docs/agents/background-tasks.md +62 -2
- package/.docs/docs/agents/processors.md +26 -2
- package/.docs/docs/memory/observational-memory.md +2 -1
- package/.docs/docs/memory/overview.md +2 -1
- package/.docs/guides/deployment/inngest.md +45 -16
- package/.docs/guides/guide/web-search.md +7 -7
- package/.docs/models/gateways/azure-openai.md +94 -23
- package/.docs/models/gateways/netlify.md +2 -1
- package/.docs/models/gateways/openrouter.md +2 -1
- package/.docs/models/index.md +1 -1
- package/.docs/models/providers/deepinfra.md +2 -1
- package/.docs/models/providers/digitalocean.md +2 -1
- package/.docs/models/providers/kiro.md +110 -0
- package/.docs/models/providers/llmgateway.md +1 -1
- package/.docs/models/providers/opencode-go.md +2 -4
- package/.docs/models/providers/opencode.md +1 -1
- package/.docs/models/providers/qiniu-ai.md +2 -2
- package/.docs/models/providers/xiaomi.md +2 -2
- package/.docs/models/providers/zenmux.md +1 -1
- package/.docs/models/providers.md +1 -0
- package/.docs/reference/cli/mastra.md +464 -0
- package/.docs/reference/client-js/agents.md +26 -1
- package/.docs/reference/harness/harness-class.md +2 -0
- package/.docs/reference/index.md +1 -0
- package/.docs/reference/processors/processor-interface.md +74 -12
- package/.docs/reference/processors/provider-history-compat.md +132 -0
- package/.docs/reference/streaming/ChunkType.md +44 -0
- package/.docs/reference/streaming/agents/stream.md +18 -2
- package/.docs/reference/tools/mcp-client.md +47 -0
- package/CHANGELOG.md +35 -0
- package/package.json +4 -4
|
@@ -127,10 +127,12 @@ When a tool call dispatches as a background task, two streams may surface lifecy
|
|
|
127
127
|
| `background-task-completed` | The task finished successfully. The `payload.result` matches the eventual tool result. | Manager stream |
|
|
128
128
|
| `background-task-failed` | The task threw or timed out. | Manager stream |
|
|
129
129
|
| `background-task-cancelled` | The task was cancelled before completing. | Manager stream |
|
|
130
|
+
| `background-task-suspended` | The tool called `suspend()` from inside its execute. | Manager stream |
|
|
131
|
+
| `background-task-resumed` | A suspended task was resumed via `manager.resume(taskId, resumeData)`. | Manager stream |
|
|
130
132
|
|
|
131
|
-
`agent.stream().fullStream` only emits the agent-loop chunks (`background-task-started`, `background-task-progress`) on its own. `agent.streamUntilIdle()` emits the same two chunks and additionally subscribes to the manager pubsub for the run's memory scope and pipes the
|
|
133
|
+
`agent.stream().fullStream` only emits the agent-loop chunks (`background-task-started`, `background-task-progress`) on its own. `agent.streamUntilIdle()` emits the same two chunks and additionally subscribes to the manager pubsub for the run's memory scope and pipes the seven manager chunks (`background-task-running`, `background-task-output`, `background-task-completed`, `background-task-failed`, `background-task-cancelled`, `background-task-suspended`, `background-task-resumed`) into the same `fullStream`.
|
|
132
134
|
|
|
133
|
-
`backgroundTaskManager.stream()` only emits the
|
|
135
|
+
`backgroundTaskManager.stream()` only emits the seven manager chunks.
|
|
134
136
|
|
|
135
137
|
The full payload shapes are documented in the [background task chunks reference](https://mastra.ai/reference/streaming/ChunkType).
|
|
136
138
|
|
|
@@ -210,6 +212,64 @@ When this `researchAgent` is delegated to from a supervisor that has no backgrou
|
|
|
210
212
|
|
|
211
213
|
Use this pattern when you want a subagent to behave consistently in the background regardless of which supervisor invokes it. Use the supervisor-side opt-in (above) when you want to tune background behavior centrally per supervisor.
|
|
212
214
|
|
|
215
|
+
## Suspending and resuming
|
|
216
|
+
|
|
217
|
+
A background task can pause itself mid-execution and wait for an external signal before continuing. This is useful for human approvals, webhooks, or any flow where the next step depends on data that arrives later.
|
|
218
|
+
|
|
219
|
+
A tool calls `suspend(data)` from inside its `execute`, which:
|
|
220
|
+
|
|
221
|
+
- Persists `status: 'suspended'` and the `data` payload on the task record.
|
|
222
|
+
- Saves the workflow snapshot so the run survives process restarts.
|
|
223
|
+
- Emits a `background-task-suspended` chunk on the manager stream.
|
|
224
|
+
- Releases the concurrency slot so other tasks can run.
|
|
225
|
+
|
|
226
|
+
Resume the task with `mastra.backgroundTaskManager.resume(taskId, resumeData)`. The `resumeData` arrives in the tool's `execute` options on the resumed run, and the task transitions back to `running`.
|
|
227
|
+
|
|
228
|
+
```typescript
|
|
229
|
+
import { createTool } from '@mastra/core/tools'
|
|
230
|
+
import { z } from 'zod'
|
|
231
|
+
|
|
232
|
+
export const reviewTool = createTool({
|
|
233
|
+
id: 'review',
|
|
234
|
+
description: 'Submit a draft for human review.',
|
|
235
|
+
inputSchema: z.object({ draft: z.string() }),
|
|
236
|
+
outputSchema: z.object({ approvedBy: z.string(), edits: z.string().optional() }),
|
|
237
|
+
background: { enabled: true },
|
|
238
|
+
execute: async ({ draft }, context) => {
|
|
239
|
+
const { suspend, resumeData } = context.agent
|
|
240
|
+
if (!resumeData) {
|
|
241
|
+
await suspend?.({ awaiting: 'approval', draft })
|
|
242
|
+
return { approvedBy: '', edits: undefined }
|
|
243
|
+
}
|
|
244
|
+
const { reviewer, edits } = resumeData as { reviewer: string; edits?: string }
|
|
245
|
+
return { approvedBy: reviewer, edits }
|
|
246
|
+
},
|
|
247
|
+
})
|
|
248
|
+
```
|
|
249
|
+
|
|
250
|
+
The first invocation of `execute` sees `resumeData === undefined` and calls `suspend`. After the task is resumed, the runtime restarts the tool with `resumeData` populated; the `if` branch falls through and the tool returns its real result.
|
|
251
|
+
|
|
252
|
+
To resume the task once an approval arrives:
|
|
253
|
+
|
|
254
|
+
```typescript
|
|
255
|
+
await mastra.backgroundTaskManager?.resume(taskId, {
|
|
256
|
+
reviewer: 'alice@example.com',
|
|
257
|
+
edits: 'Reworded paragraph 3.',
|
|
258
|
+
})
|
|
259
|
+
```
|
|
260
|
+
|
|
261
|
+
### What happens to the agent loop
|
|
262
|
+
|
|
263
|
+
When a task suspends mid-`streamUntilIdle()`, the wrapper treats it as terminal for the current iteration and closes. To continue the agent immediately when the resume payload is in hand, call `agent.resumeStreamUntilIdle(resumeData, { runId, toolCallId, memory })`: the resumed bg task runs to completion, its result lands in the message list, and the agent runs a follow-up turn — all on the same SSE connection. If you'd rather drive the resume out-of-band, call `mastra.backgroundTaskManager.resume(taskId, resumeData)` directly and the result still writes into the thread for the next user turn to pick up.
|
|
264
|
+
|
|
265
|
+
### Re-registering the executor on resume
|
|
266
|
+
|
|
267
|
+
The manager keeps tool executors in process memory. If the process restarts while a task is suspended, the executor closure is gone — the caller of `resume()` must re-register it first via `manager.registerTaskContext(taskId, ...)`. Tasks dispatched and resumed inside the same process don't need this.
|
|
268
|
+
|
|
269
|
+
### Cancelling a suspended task
|
|
270
|
+
|
|
271
|
+
`manager.cancel(taskId)` works against suspended tasks the same way it works for running ones: the row flips to `cancelled`, the workflow snapshot is cleaned up, and a `task.cancelled` event fires.
|
|
272
|
+
|
|
213
273
|
## Lifecycle callbacks
|
|
214
274
|
|
|
215
275
|
Each layer can register terminal-state callbacks. They don't replace one another, and success/failure hooks fire for their respective outcomes:
|
|
@@ -211,6 +211,22 @@ The method receives the current `stepNumber`, `model`, `tools`, `toolChoice`, `m
|
|
|
211
211
|
|
|
212
212
|
See the [`Processor` reference](https://mastra.ai/reference/processors/processor-interface) for all available arguments and return types.
|
|
213
213
|
|
|
214
|
+
### Rewrite the LLM request before the provider call
|
|
215
|
+
|
|
216
|
+
Use `processLLMRequest()` when you need to rewrite the final prompt that Mastra sends to the model. This hook runs after Mastra converts the `MessageList` into the provider-facing prompt format (`LanguageModelV2Prompt`) and immediately before the provider call.
|
|
217
|
+
|
|
218
|
+
Use the message-based hooks for conversation changes:
|
|
219
|
+
|
|
220
|
+
- `processInput()`: Change the conversation once before the agentic loop starts.
|
|
221
|
+
- `processInputStep()`: Change messages or step configuration before each LLM call.
|
|
222
|
+
- `processLLMRequest()`: Change only the outbound prompt for the current provider call.
|
|
223
|
+
|
|
224
|
+
Changes returned from `processLLMRequest()` are transient. They don't persist back to `MessageList`, memory, UI history, or future provider calls. This makes the hook a good fit for provider compatibility rewrites, role/content normalization, or other model-specific prompt changes that shouldn't alter stored conversation history.
|
|
225
|
+
|
|
226
|
+
The method receives `prompt`, `model`, `stepNumber`, `steps`, `state`, and the shared processor context. Calling `abort()` from `processLLMRequest()` emits the normal tripwire response and stops the call.
|
|
227
|
+
|
|
228
|
+
See the [`Processor` reference](https://mastra.ai/reference/processors/processor-interface) for all available arguments and return types.
|
|
229
|
+
|
|
214
230
|
### Use the `prepareStep()` callback
|
|
215
231
|
|
|
216
232
|
The `prepareStep()` callback on `generate()` or `stream()` is a shorthand for `processInputStep()`. Internally, Mastra wraps it in a processor that calls your function at each step. It accepts the same arguments and return type as `processInputStep()`, but doesn't require creating a class:
|
|
@@ -317,7 +333,7 @@ For more on retry behavior, see [Retry mechanism](#retry-mechanism) in Advanced
|
|
|
317
333
|
|
|
318
334
|
### Persist data across chunks and steps
|
|
319
335
|
|
|
320
|
-
Output methods receive a `state` object that persists for the lifetime of one request. State is keyed by the processor's `id`, so each processor sees only its own data, and it
|
|
336
|
+
Output methods receive a `state` object that persists for the lifetime of one request. State is keyed by the processor's `id`, so each processor sees only its own data, and it's shared between `processOutputStream`, `processOutputStep`, and `processOutputResult`. A new state object is created for every new `agent.generate()` or `agent.stream()` call.
|
|
321
337
|
|
|
322
338
|
```typescript
|
|
323
339
|
import type { Processor } from '@mastra/core/processors'
|
|
@@ -383,6 +399,14 @@ Enables dynamic tool discovery for agents with large tool libraries. Instead of
|
|
|
383
399
|
|
|
384
400
|
See the [`ToolSearchProcessor` reference](https://mastra.ai/reference/processors/tool-search-processor) for configuration options and usage examples.
|
|
385
401
|
|
|
402
|
+
### `ProviderHistoryCompat`
|
|
403
|
+
|
|
404
|
+
Handles provider-specific history incompatibilities when agents reuse messages across model providers. It can rewrite the outbound LLM request before the provider call, or recover from known provider API errors and retry.
|
|
405
|
+
|
|
406
|
+
Add `ProviderHistoryCompat` explicitly when you need provider history compatibility rules, reactive API error recovery, custom compatibility rules, or predictable processor ordering.
|
|
407
|
+
|
|
408
|
+
See the [`ProviderHistoryCompat` reference](https://mastra.ai/reference/processors/provider-history-compat) for setup, built-in rules, and custom rule options.
|
|
409
|
+
|
|
386
410
|
## Advanced patterns
|
|
387
411
|
|
|
388
412
|
### Ensure a final response with `maxSteps`
|
|
@@ -494,7 +518,7 @@ for await (const chunk of stream.fullStream) {
|
|
|
494
518
|
|
|
495
519
|
Custom chunk types must use the `data-` prefix (e.g., `data-moderation-update`, `data-status`).
|
|
496
520
|
|
|
497
|
-
By default, `processOutputStream()` skips `data-*` chunks so it
|
|
521
|
+
By default, `processOutputStream()` skips `data-*` chunks so it doesn't accidentally operate on tool telemetry or other processors' output. To inspect, modify, or block these chunks in a processor, set `processDataParts = true` on that processor:
|
|
498
522
|
|
|
499
523
|
```typescript
|
|
500
524
|
class ModerationCollector implements Processor {
|
|
@@ -458,4 +458,5 @@ In practical terms, OM replaces both working memory and message history, and has
|
|
|
458
458
|
- [Observational Memory Reference](https://mastra.ai/reference/memory/observational-memory)
|
|
459
459
|
- [Memory Overview](https://mastra.ai/docs/memory/overview)
|
|
460
460
|
- [Message History](https://mastra.ai/docs/memory/message-history)
|
|
461
|
-
- [Memory Processors](https://mastra.ai/docs/memory/memory-processors)
|
|
461
|
+
- [Memory Processors](https://mastra.ai/docs/memory/memory-processors)
|
|
462
|
+
- [Mastra Code](https://code.mastra.ai/): A coding agent using Observational Memory
|
|
@@ -237,4 +237,5 @@ export const memoryAgent = new Agent({
|
|
|
237
237
|
|
|
238
238
|
- [`Memory` reference](https://mastra.ai/reference/memory/memory-class)
|
|
239
239
|
- [Tracing](https://mastra.ai/docs/observability/tracing/overview)
|
|
240
|
-
- [Request Context](https://mastra.ai/docs/server/request-context)
|
|
240
|
+
- [Request Context](https://mastra.ai/docs/server/request-context)
|
|
241
|
+
- [Mastra Code](https://code.mastra.ai/): A coding agent using Mastra's memory system
|
|
@@ -21,27 +21,29 @@ Install the required packages:
|
|
|
21
21
|
**npm**:
|
|
22
22
|
|
|
23
23
|
```bash
|
|
24
|
-
npm install @mastra/inngest@latest inngest
|
|
24
|
+
npm install @mastra/inngest@latest inngest
|
|
25
25
|
```
|
|
26
26
|
|
|
27
27
|
**pnpm**:
|
|
28
28
|
|
|
29
29
|
```bash
|
|
30
|
-
pnpm add @mastra/inngest@latest inngest
|
|
30
|
+
pnpm add @mastra/inngest@latest inngest
|
|
31
31
|
```
|
|
32
32
|
|
|
33
33
|
**Yarn**:
|
|
34
34
|
|
|
35
35
|
```bash
|
|
36
|
-
yarn add @mastra/inngest@latest inngest
|
|
36
|
+
yarn add @mastra/inngest@latest inngest
|
|
37
37
|
```
|
|
38
38
|
|
|
39
39
|
**Bun**:
|
|
40
40
|
|
|
41
41
|
```bash
|
|
42
|
-
bun add @mastra/inngest@latest inngest
|
|
42
|
+
bun add @mastra/inngest@latest inngest
|
|
43
43
|
```
|
|
44
44
|
|
|
45
|
+
> **Note:** Requires `inngest@^4` and Inngest Dev Server `v1.18.0` or later. Realtime is built into the SDK in v4, so `@inngest/realtime` and `realtimeMiddleware` are no longer used.
|
|
46
|
+
|
|
45
47
|
## Building an Inngest workflow
|
|
46
48
|
|
|
47
49
|
This guide walks through creating a workflow with Inngest and Mastra, demonstrating a counter application that increments a value until it reaches 10.
|
|
@@ -54,13 +56,11 @@ In development:
|
|
|
54
56
|
|
|
55
57
|
```ts
|
|
56
58
|
import { Inngest } from 'inngest'
|
|
57
|
-
import { realtimeMiddleware } from '@inngest/realtime/middleware'
|
|
58
59
|
|
|
59
60
|
export const inngest = new Inngest({
|
|
60
61
|
id: 'mastra',
|
|
61
62
|
baseUrl: 'http://localhost:8288',
|
|
62
63
|
isDev: true,
|
|
63
|
-
middleware: [realtimeMiddleware()],
|
|
64
64
|
})
|
|
65
65
|
```
|
|
66
66
|
|
|
@@ -68,11 +68,9 @@ In production:
|
|
|
68
68
|
|
|
69
69
|
```ts
|
|
70
70
|
import { Inngest } from 'inngest'
|
|
71
|
-
import { realtimeMiddleware } from '@inngest/realtime/middleware'
|
|
72
71
|
|
|
73
72
|
export const inngest = new Inngest({
|
|
74
73
|
id: 'mastra',
|
|
75
|
-
middleware: [realtimeMiddleware()],
|
|
76
74
|
})
|
|
77
75
|
```
|
|
78
76
|
|
|
@@ -141,7 +139,7 @@ export const mastra = new Mastra({
|
|
|
141
139
|
host: '0.0.0.0',
|
|
142
140
|
apiRoutes: [
|
|
143
141
|
{
|
|
144
|
-
path: '/api
|
|
142
|
+
path: '/inngest/api',
|
|
145
143
|
method: 'ALL',
|
|
146
144
|
createHandler: async ({ mastra }) => {
|
|
147
145
|
return serve({ mastra, inngest })
|
|
@@ -153,6 +151,8 @@ export const mastra = new Mastra({
|
|
|
153
151
|
})
|
|
154
152
|
```
|
|
155
153
|
|
|
154
|
+
> **Note:** The path is `/inngest/api`, not `/api/inngest`. Mastra reserves the `/api` prefix for built-in routes (agents, workflows, memory). Custom `apiRoutes` paths that start with the server's `apiPrefix` (default `/api`) throw at startup. See [#15743](https://github.com/mastra-ai/mastra/pull/15743) for context, or skip to [Using a custom `apiPrefix`](#using-a-custom-apiprefix) if you need to keep `/api/inngest`.
|
|
155
|
+
|
|
156
156
|
## Running workflows
|
|
157
157
|
|
|
158
158
|
### Running locally
|
|
@@ -162,10 +162,10 @@ export const mastra = new Mastra({
|
|
|
162
162
|
2. Start the Inngest Dev Server. In a new terminal, run:
|
|
163
163
|
|
|
164
164
|
```bash
|
|
165
|
-
npx inngest-cli@latest dev -u http://localhost:4111/api
|
|
165
|
+
npx inngest-cli@latest dev -u http://localhost:4111/inngest/api
|
|
166
166
|
```
|
|
167
167
|
|
|
168
|
-
> **Note:** The URL after `-u` tells the Inngest dev server where to find your Mastra `/api
|
|
168
|
+
> **Note:** The URL after `-u` tells the Inngest dev server where to find your Mastra `/inngest/api` endpoint
|
|
169
169
|
|
|
170
170
|
3. Open the Inngest Dashboard at <http://localhost:8288> and go to the **Apps** section in the sidebar to verify your Mastra workflow is registered
|
|
171
171
|
|
|
@@ -229,6 +229,8 @@ Before you begin, make sure you have:
|
|
|
229
229
|
|
|
230
230
|
5. Sync with the [Inngest dashboard](https://app.inngest.com/env/production/apps) by selecting **Sync new app with Vercel** and following the instructions
|
|
231
231
|
|
|
232
|
+
> **Warning:** Inngest's auto-discover convention assumes `/api/inngest`. Because this guide uses `/inngest/api`, set the **URL** field on the Inngest app to your deployed origin plus `/inngest/api` (for example `https://your-app.vercel.app/inngest/api`). If you leave it on the default, the Inngest dashboard will not find your app's functions.
|
|
233
|
+
|
|
232
234
|
6. Invoke the workflow by going to **Functions**, selecting `workflow.increment-workflow`, selecting **All actions** > **Invoke**, and providing the following input:
|
|
233
235
|
|
|
234
236
|
```json
|
|
@@ -294,7 +296,7 @@ export const mastra = new Mastra({
|
|
|
294
296
|
host: '0.0.0.0',
|
|
295
297
|
apiRoutes: [
|
|
296
298
|
{
|
|
297
|
-
path: '/api
|
|
299
|
+
path: '/inngest/api',
|
|
298
300
|
method: 'ALL',
|
|
299
301
|
createHandler: async ({ mastra }) => {
|
|
300
302
|
return serve({
|
|
@@ -316,7 +318,7 @@ When you include custom functions:
|
|
|
316
318
|
|
|
317
319
|
1. Mastra workflows are automatically converted to Inngest functions with IDs like `workflow.${workflowId}`
|
|
318
320
|
2. Custom functions retain their specified IDs (e.g., `send-welcome-email`, `process-webhook`)
|
|
319
|
-
3. All functions are served together on the same `/api
|
|
321
|
+
3. All functions are served together on the same `/inngest/api` endpoint
|
|
320
322
|
|
|
321
323
|
This allows you to combine Mastra's workflow orchestration with your existing Inngest functions.
|
|
322
324
|
|
|
@@ -338,7 +340,7 @@ const app = express()
|
|
|
338
340
|
app.use(express.json())
|
|
339
341
|
|
|
340
342
|
const handler = createServe(expressAdapter)({ mastra, inngest })
|
|
341
|
-
app.use('/api
|
|
343
|
+
app.use('/inngest/api', handler)
|
|
342
344
|
|
|
343
345
|
app.listen(3000)
|
|
344
346
|
```
|
|
@@ -358,7 +360,7 @@ const handler = createServe(fastifyAdapter)({ mastra, inngest })
|
|
|
358
360
|
|
|
359
361
|
fastify.route({
|
|
360
362
|
method: ['GET', 'POST', 'PUT'],
|
|
361
|
-
url: '/api
|
|
363
|
+
url: '/inngest/api',
|
|
362
364
|
handler,
|
|
363
365
|
})
|
|
364
366
|
|
|
@@ -382,7 +384,7 @@ const router = new Router()
|
|
|
382
384
|
app.use(bodyParser())
|
|
383
385
|
|
|
384
386
|
const handler = createServe(koaAdapter)({ mastra, inngest })
|
|
385
|
-
router.all('/api
|
|
387
|
+
router.all('/inngest/api', handler)
|
|
386
388
|
|
|
387
389
|
app.use(router.routes())
|
|
388
390
|
app.use(router.allowedMethods())
|
|
@@ -406,6 +408,33 @@ export { handler as GET, handler as POST, handler as PUT }
|
|
|
406
408
|
|
|
407
409
|
The `createServe` function works with any Inngest adapter. See the [Inngest serve documentation](https://www.inngest.com/docs/reference/serve) for a complete list of available adapters including AWS Lambda, Cloudflare Workers, and more.
|
|
408
410
|
|
|
411
|
+
## Using a custom `apiPrefix`
|
|
412
|
+
|
|
413
|
+
If you need to keep `/api/inngest` (for example to match Inngest's auto-discover convention without changing the dashboard URL), set `server.apiPrefix` to relocate Mastra's built-in routes:
|
|
414
|
+
|
|
415
|
+
```ts
|
|
416
|
+
import { Mastra } from '@mastra/core'
|
|
417
|
+
import { serve } from '@mastra/inngest'
|
|
418
|
+
import { inngest } from './inngest'
|
|
419
|
+
|
|
420
|
+
export const mastra = new Mastra({
|
|
421
|
+
server: {
|
|
422
|
+
apiPrefix: '/_mastra',
|
|
423
|
+
apiRoutes: [
|
|
424
|
+
{
|
|
425
|
+
path: '/api/inngest',
|
|
426
|
+
method: 'ALL',
|
|
427
|
+
createHandler: async ({ mastra }) => serve({ mastra, inngest }),
|
|
428
|
+
},
|
|
429
|
+
],
|
|
430
|
+
},
|
|
431
|
+
})
|
|
432
|
+
```
|
|
433
|
+
|
|
434
|
+
Mastra's built-in routes now resolve under `/_mastra/agents`, `/_mastra/workflows`, and so on, freeing the `/api/inngest` path for your custom route.
|
|
435
|
+
|
|
436
|
+
> **Warning:** The default auth configuration protects `/api/*` and treats `/api`, `/api/auth/*` as public. When you change `apiPrefix`, those defaults no longer match and built-in routes fall outside the protected pattern. Update `server.auth.protected` and `server.auth.public` to reference the new prefix, and update any client code (including [`MastraClient`](https://mastra.ai/docs/server/mastra-client) `apiPrefix`) that hits `/api/*`.
|
|
437
|
+
|
|
409
438
|
## Flow control
|
|
410
439
|
|
|
411
440
|
Inngest workflows support flow control features including concurrency limits, rate limiting, throttling, debouncing, and priority queuing. These options are configured in the `createWorkflow()` call and help manage workflow execution at scale.
|
|
@@ -17,7 +17,7 @@ Some LLM providers include built-in web search capabilities that can be used dir
|
|
|
17
17
|
|
|
18
18
|
1. Install dependencies
|
|
19
19
|
|
|
20
|
-
**
|
|
20
|
+
**OpenAI**:
|
|
21
21
|
|
|
22
22
|
**npm**:
|
|
23
23
|
|
|
@@ -119,7 +119,7 @@ Some LLM providers include built-in web search capabilities that can be used dir
|
|
|
119
119
|
|
|
120
120
|
2. Create a new file `src/mastra/agents/searchAgent.ts` and define your agent:
|
|
121
121
|
|
|
122
|
-
**
|
|
122
|
+
**OpenAI**:
|
|
123
123
|
|
|
124
124
|
```ts
|
|
125
125
|
import { Agent } from '@mastra/core/agent'
|
|
@@ -128,7 +128,7 @@ Some LLM providers include built-in web search capabilities that can be used dir
|
|
|
128
128
|
id: 'search-agent',
|
|
129
129
|
name: 'Search Agent',
|
|
130
130
|
instructions: 'You are a search agent that can search the web for information.',
|
|
131
|
-
model: 'openai/gpt-5.
|
|
131
|
+
model: 'openai/gpt-5.5',
|
|
132
132
|
})
|
|
133
133
|
```
|
|
134
134
|
|
|
@@ -147,7 +147,7 @@ Some LLM providers include built-in web search capabilities that can be used dir
|
|
|
147
147
|
|
|
148
148
|
3. Setup the tool:
|
|
149
149
|
|
|
150
|
-
**
|
|
150
|
+
**OpenAI**:
|
|
151
151
|
|
|
152
152
|
```ts
|
|
153
153
|
import { openai } from '@ai-sdk/openai'
|
|
@@ -157,7 +157,7 @@ Some LLM providers include built-in web search capabilities that can be used dir
|
|
|
157
157
|
id: 'search-agent',
|
|
158
158
|
name: 'Search Agent',
|
|
159
159
|
instructions: 'You are a search agent that can search the web for information.',
|
|
160
|
-
model: 'openai/gpt-5.
|
|
160
|
+
model: 'openai/gpt-5.5',
|
|
161
161
|
tools: {
|
|
162
162
|
webSearch: openai.tools.webSearch(),
|
|
163
163
|
},
|
|
@@ -241,7 +241,7 @@ For more control over search behavior, you can integrate external search APIs as
|
|
|
241
241
|
id: 'search-agent',
|
|
242
242
|
name: 'Search Agent',
|
|
243
243
|
instructions: 'You are a search agent that can search the web for information.',
|
|
244
|
-
model: 'openai/gpt-5.
|
|
244
|
+
model: 'openai/gpt-5.5',
|
|
245
245
|
})
|
|
246
246
|
```
|
|
247
247
|
|
|
@@ -293,7 +293,7 @@ For more control over search behavior, you can integrate external search APIs as
|
|
|
293
293
|
id: 'search-agent',
|
|
294
294
|
name: 'Search Agent',
|
|
295
295
|
instructions: 'You are a search agent that can search the web for information.',
|
|
296
|
-
model: 'openai/gpt-5.
|
|
296
|
+
model: 'openai/gpt-5.5',
|
|
297
297
|
tools: {
|
|
298
298
|
webSearch,
|
|
299
299
|
},
|
|
@@ -13,7 +13,7 @@ const agent = new Agent({
|
|
|
13
13
|
id: "my-agent",
|
|
14
14
|
name: "My Agent",
|
|
15
15
|
instructions: "You are a helpful assistant",
|
|
16
|
-
model: "azure-openai/my-
|
|
16
|
+
model: "azure-openai/my-gpt-5-4-deployment" // Use your Azure deployment name (autocompleted in dev mode)
|
|
17
17
|
});
|
|
18
18
|
|
|
19
19
|
// Generate a response
|
|
@@ -34,9 +34,9 @@ Azure model IDs follow this pattern: `azure-openai/your-deployment-name`
|
|
|
34
34
|
|
|
35
35
|
The deployment name is **specific to your Azure account** and chosen when you create a deployment in Azure Portal. Common examples:
|
|
36
36
|
|
|
37
|
-
- `azure-openai/my-
|
|
38
|
-
- `azure-openai/production-gpt-
|
|
39
|
-
- `azure-openai/staging-gpt-
|
|
37
|
+
- `azure-openai/my-gpt-5-4-deployment`
|
|
38
|
+
- `azure-openai/production-gpt-5-4`
|
|
39
|
+
- `azure-openai/staging-gpt-5-4-mini`
|
|
40
40
|
|
|
41
41
|
## Setup
|
|
42
42
|
|
|
@@ -44,7 +44,7 @@ Create deployments in [Azure OpenAI Studio](https://oai.azure.com/). The resourc
|
|
|
44
44
|
|
|
45
45
|
## Configuration
|
|
46
46
|
|
|
47
|
-
Instantiate the gateway and pass it to Mastra.
|
|
47
|
+
Instantiate the gateway and pass it to Mastra. The common configuration modes are shown below.
|
|
48
48
|
|
|
49
49
|
### Static Deployments
|
|
50
50
|
|
|
@@ -59,7 +59,7 @@ export const mastra = new Mastra({
|
|
|
59
59
|
new AzureOpenAIGateway({
|
|
60
60
|
resourceName: "my-openai-resource",
|
|
61
61
|
apiKey: process.env.AZURE_API_KEY!,
|
|
62
|
-
deployments: ["gpt-4-prod", "gpt-
|
|
62
|
+
deployments: ["gpt-5-4-prod", "gpt-5-4-mini-dev"],
|
|
63
63
|
}),
|
|
64
64
|
],
|
|
65
65
|
});
|
|
@@ -111,7 +111,7 @@ export const mastra = new Mastra({
|
|
|
111
111
|
type: "entraId",
|
|
112
112
|
credential: new DefaultAzureCredential(),
|
|
113
113
|
},
|
|
114
|
-
deployments: ["gpt-4-prod", "gpt-
|
|
114
|
+
deployments: ["gpt-5-4-prod", "gpt-5-4-mini-dev"],
|
|
115
115
|
}),
|
|
116
116
|
],
|
|
117
117
|
});
|
|
@@ -145,23 +145,94 @@ export const mastra = new Mastra({
|
|
|
145
145
|
});
|
|
146
146
|
```
|
|
147
147
|
|
|
148
|
+
### Azure Responses API
|
|
149
|
+
|
|
150
|
+
Azure OpenAI supports the Responses API through the `v1` API path used by the AI SDK Azure provider. Set `useResponsesAPI: true` when your Azure resource and deployment support that route. The gateway then uses `apiVersion: "v1"` and `useDeploymentBasedUrls: false` by default.
|
|
151
|
+
|
|
152
|
+
```typescript
|
|
153
|
+
import { Mastra } from "@mastra/core";
|
|
154
|
+
import { AzureOpenAIGateway } from "@mastra/core/llm";
|
|
155
|
+
|
|
156
|
+
export const mastra = new Mastra({
|
|
157
|
+
gateways: [
|
|
158
|
+
new AzureOpenAIGateway({
|
|
159
|
+
resourceName: "my-openai-resource",
|
|
160
|
+
apiKey: process.env.AZURE_API_KEY!,
|
|
161
|
+
useResponsesAPI: true,
|
|
162
|
+
deployments: ["my-gpt-5-4-deployment"],
|
|
163
|
+
}),
|
|
164
|
+
],
|
|
165
|
+
});
|
|
166
|
+
```
|
|
167
|
+
|
|
168
|
+
Keep `useResponsesAPI` omitted or set it to `false` for the existing Azure chat completions route. That path keeps `apiVersion: "2024-04-01-preview"` and deployment-based URLs by default for compatibility.
|
|
169
|
+
|
|
170
|
+
You can still configure `apiVersion` and `useDeploymentBasedUrls` directly. For example, set `useDeploymentBasedUrls: false` to use the Azure `v1` URL shape with the chat model constructor; the gateway defaults `apiVersion` to `"v1"` for that route. Passing `apiVersion: "v1"` by itself keeps the existing deployment-based URL default for compatibility.
|
|
171
|
+
|
|
172
|
+
Do not combine `useResponsesAPI: true` with `useDeploymentBasedUrls: true`; the gateway rejects that configuration because Responses API support uses the Azure `v1` route.
|
|
173
|
+
|
|
174
|
+
Use `apiVersion: "v1"` for the GA `v1` route. Microsoft currently exposes preview `v1` features through feature-specific headers, such as `"aoai-evals": "preview"`, or through preview/alpha API paths. The gateway still accepts `apiVersion: "preview"` with `useDeploymentBasedUrls: false` for Azure provider configurations that require the preview query value. Date-based API versions are only for the legacy deployment-based route, so the gateway rejects them when `useResponsesAPI` is `true` or `useDeploymentBasedUrls` is `false`.
|
|
175
|
+
|
|
176
|
+
The same API key and Microsoft Entra ID authentication modes work with the `v1` route.
|
|
177
|
+
|
|
178
|
+
### Azure Responses WebSocket transport
|
|
179
|
+
|
|
180
|
+
Azure OpenAI also supports WebSocket mode on the Responses API. Use it for agent or tool loops with many model-tool round trips. Keep the standard HTTP transport for single-shot requests and short conversations.
|
|
181
|
+
|
|
182
|
+
WebSocket transport requires `useResponsesAPI: true`, because Azure exposes it on the `v1` Responses path. Then opt in per stream request with `providerOptions.azure.transport: "websocket"`.
|
|
183
|
+
|
|
184
|
+
```typescript
|
|
185
|
+
import { Agent } from "@mastra/core/agent";
|
|
186
|
+
|
|
187
|
+
const agent = new Agent({
|
|
188
|
+
id: "azure-ws-agent",
|
|
189
|
+
name: "Azure WebSocket Agent",
|
|
190
|
+
instructions: "Use tools when they are useful.",
|
|
191
|
+
model: "azure-openai/my-gpt-5-4-deployment",
|
|
192
|
+
});
|
|
193
|
+
|
|
194
|
+
const stream = await agent.stream("Find and improve the slow function.", {
|
|
195
|
+
providerOptions: {
|
|
196
|
+
azure: {
|
|
197
|
+
transport: "websocket",
|
|
198
|
+
store: false,
|
|
199
|
+
websocket: {
|
|
200
|
+
closeOnFinish: false,
|
|
201
|
+
},
|
|
202
|
+
},
|
|
203
|
+
},
|
|
204
|
+
});
|
|
205
|
+
|
|
206
|
+
for await (const chunk of stream.textStream) {
|
|
207
|
+
process.stdout.write(chunk);
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
stream.transport?.close();
|
|
211
|
+
```
|
|
212
|
+
|
|
213
|
+
Set `closeOnFinish: false` when you want to keep the socket open across follow-up turns. Azure keeps one response chain in connection-local memory, so continuing from the most recent `previous_response_id` can reduce continuation latency. The connection runs one response at a time and does not multiplex parallel runs.
|
|
214
|
+
|
|
215
|
+
Do not send overlapping follow-up requests with `previous_response_id` on the same WebSocket transport. Mastra rejects overlapping continuation requests because Azure only keeps one in-flight response per connection. Wait for the active stream to finish before continuing the response chain.
|
|
216
|
+
|
|
148
217
|
## Configuration Reference
|
|
149
218
|
|
|
150
|
-
| Option | Type | Required | Description
|
|
151
|
-
| --------------------------- | ----------------- | -------- |
|
|
152
|
-
| `resourceName` | `string` | Yes | Azure OpenAI resource name
|
|
153
|
-
| `apiKey` | `string` | Yes\* | API key from "Keys and Endpoint"
|
|
154
|
-
| `authentication` | `object` | No | Microsoft Entra ID authentication
|
|
155
|
-
| `authentication.type` | `"entraId"` | Yes\* | Authentication mode
|
|
156
|
-
| `authentication.credential` | `TokenCredential` | Yes\* | Azure SDK-compatible credential for `entraId` authentication mode
|
|
157
|
-
| `authentication.scope` | `string` | No | Token scope (default: `https://cognitiveservices.azure.com/.default`)
|
|
158
|
-
| `apiVersion` | `string` | No | API version (default: `2024-04-01-preview`)
|
|
159
|
-
| `
|
|
160
|
-
| `
|
|
161
|
-
| `
|
|
162
|
-
| `management
|
|
163
|
-
| `management.
|
|
164
|
-
| `management.
|
|
165
|
-
| `management.
|
|
219
|
+
| Option | Type | Required | Description |
|
|
220
|
+
| --------------------------- | ----------------- | -------- | ---------------------------------------------------------------------------------------------------------------------------- |
|
|
221
|
+
| `resourceName` | `string` | Yes | Azure OpenAI resource name |
|
|
222
|
+
| `apiKey` | `string` | Yes\* | API key from "Keys and Endpoint" |
|
|
223
|
+
| `authentication` | `object` | No | Microsoft Entra ID authentication |
|
|
224
|
+
| `authentication.type` | `"entraId"` | Yes\* | Authentication mode |
|
|
225
|
+
| `authentication.credential` | `TokenCredential` | Yes\* | Azure SDK-compatible credential for `entraId` authentication mode |
|
|
226
|
+
| `authentication.scope` | `string` | No | Token scope (default: `https://cognitiveservices.azure.com/.default`) |
|
|
227
|
+
| `apiVersion` | `string` | No | API version (default: `2024-04-01-preview`, or `v1` when `useResponsesAPI` is `true` or `useDeploymentBasedUrls` is `false`) |
|
|
228
|
+
| `useResponsesAPI` | `boolean` | No | Resolve deployments through the Azure OpenAI Responses API (default: `false`) |
|
|
229
|
+
| `useDeploymentBasedUrls` | `boolean` | No | Use Azure deployment-based URLs (default: `true`, or `false` when `useResponsesAPI` is `true`) |
|
|
230
|
+
| `deployments` | `string[]` | No | Deployment names for static mode |
|
|
231
|
+
| `management` | `object` | No | Management API credentials |
|
|
232
|
+
| `management.tenantId` | `string` | Yes\* | Azure AD tenant ID |
|
|
233
|
+
| `management.clientId` | `string` | Yes\* | Service Principal client ID |
|
|
234
|
+
| `management.clientSecret` | `string` | Yes\* | Service Principal secret |
|
|
235
|
+
| `management.subscriptionId` | `string` | Yes\* | Azure subscription ID |
|
|
236
|
+
| `management.resourceGroup` | `string` | Yes\* | Resource group name |
|
|
166
237
|
|
|
167
238
|
\* Provide either `apiKey` or `authentication.type: "entraId"`. Management fields are required if `management` is provided.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# Netlify
|
|
2
2
|
|
|
3
|
-
Netlify AI Gateway provides unified access to multiple providers with built-in caching and observability. Access
|
|
3
|
+
Netlify AI Gateway provides unified access to multiple providers with built-in caching and observability. Access 67 models through Mastra's model router.
|
|
4
4
|
|
|
5
5
|
Learn more in the [Netlify documentation](https://docs.netlify.com/build/ai-gateway/overview/).
|
|
6
6
|
|
|
@@ -62,6 +62,7 @@ ANTHROPIC_API_KEY=ant-...
|
|
|
62
62
|
| `gemini/gemini-3.1-pro-preview-customtools` |
|
|
63
63
|
| `gemini/gemini-flash-latest` |
|
|
64
64
|
| `gemini/gemini-flash-lite-latest` |
|
|
65
|
+
| `openai/chat-latest` |
|
|
65
66
|
| `openai/gpt-4.1` |
|
|
66
67
|
| `openai/gpt-4.1-mini` |
|
|
67
68
|
| `openai/gpt-4.1-nano` |
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# OpenRouter
|
|
2
2
|
|
|
3
|
-
OpenRouter aggregates models from multiple providers with enhanced features like rate limiting and failover. Access
|
|
3
|
+
OpenRouter aggregates models from multiple providers with enhanced features like rate limiting and failover. Access 186 models through Mastra's model router.
|
|
4
4
|
|
|
5
5
|
Learn more in the [OpenRouter documentation](https://openrouter.ai/models).
|
|
6
6
|
|
|
@@ -172,6 +172,7 @@ ANTHROPIC_API_KEY=ant-...
|
|
|
172
172
|
| `poolside/laguna-xs.2:free` |
|
|
173
173
|
| `prime-intellect/intellect-3` |
|
|
174
174
|
| `qwen/qwen-2.5-coder-32b-instruct` |
|
|
175
|
+
| `qwen/qwen-3.6-27b` |
|
|
175
176
|
| `qwen/qwen2.5-vl-72b-instruct` |
|
|
176
177
|
| `qwen/qwen3-235b-a22b-07-25` |
|
|
177
178
|
| `qwen/qwen3-235b-a22b-thinking-2507` |
|
package/.docs/models/index.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# Model Providers
|
|
2
2
|
|
|
3
|
-
Mastra provides a unified interface for working with LLMs across multiple providers, giving you access to
|
|
3
|
+
Mastra provides a unified interface for working with LLMs across multiple providers, giving you access to 3889 models from 108 providers through a single API.
|
|
4
4
|
|
|
5
5
|
## Features
|
|
6
6
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# Deep Infra
|
|
2
2
|
|
|
3
|
-
Access
|
|
3
|
+
Access 36 Deep Infra models through Mastra's model router. Authentication is handled automatically using the `DEEPINFRA_API_KEY` environment variable.
|
|
4
4
|
|
|
5
5
|
Learn more in the [Deep Infra documentation](https://deepinfra.com/models).
|
|
6
6
|
|
|
@@ -36,6 +36,7 @@ for await (const chunk of stream) {
|
|
|
36
36
|
| `deepinfra/anthropic/claude-4-opus` | 200K | | | | | | $17 | $83 |
|
|
37
37
|
| `deepinfra/deepseek-ai/DeepSeek-R1-0528` | 164K | | | | | | $0.50 | $2 |
|
|
38
38
|
| `deepinfra/deepseek-ai/DeepSeek-V3.2` | 164K | | | | | | $0.26 | $0.38 |
|
|
39
|
+
| `deepinfra/deepseek-ai/DeepSeek-V4-Flash` | 1.0M | | | | | | $0.14 | $0.28 |
|
|
39
40
|
| `deepinfra/deepseek-ai/DeepSeek-V4-Pro` | 66K | | | | | | $2 | $3 |
|
|
40
41
|
| `deepinfra/google/gemma-4-26B-A4B-it` | 256K | | | | | | $0.07 | $0.34 |
|
|
41
42
|
| `deepinfra/google/gemma-4-31B-it` | 256K | | | | | | $0.13 | $0.38 |
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# DigitalOcean
|
|
2
2
|
|
|
3
|
-
Access
|
|
3
|
+
Access 64 DigitalOcean models through Mastra's model router. Authentication is handled automatically using the `DIGITALOCEAN_ACCESS_TOKEN` environment variable.
|
|
4
4
|
|
|
5
5
|
Learn more in the [DigitalOcean documentation](https://docs.digitalocean.com/products/gradient-ai-platform/details/models/).
|
|
6
6
|
|
|
@@ -60,6 +60,7 @@ for await (const chunk of stream) {
|
|
|
60
60
|
| `digitalocean/glm-5` | 203K | | | | | | $1 | $3 |
|
|
61
61
|
| `digitalocean/gte-large-en-v1.5` | 8K | | | | | | $0.09 | — |
|
|
62
62
|
| `digitalocean/kimi-k2.5` | 262K | | | | | | $0.50 | $3 |
|
|
63
|
+
| `digitalocean/kimi-k2.6` | 262K | | | | | | $0.95 | $4 |
|
|
63
64
|
| `digitalocean/llama-4-maverick` | 1.0M | | | | | | $0.25 | $0.87 |
|
|
64
65
|
| `digitalocean/llama-guard-4-12b` | 128K | | | | | | — | — |
|
|
65
66
|
| `digitalocean/llama3.3-70b-instruct` | 128K | | | | | | $0.65 | $0.65 |
|