@mastra/mcp-docs-server 1.1.17-alpha.9 → 1.1.18-alpha.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.docs/docs/memory/observational-memory.md +49 -4
- package/.docs/docs/server/mastra-client.md +17 -0
- package/.docs/docs/server/server-adapters.md +15 -1
- package/.docs/models/gateways/netlify.md +65 -66
- package/.docs/models/gateways/openrouter.md +3 -2
- package/.docs/models/gateways/vercel.md +3 -1
- package/.docs/models/index.md +1 -1
- package/.docs/models/providers/bailing.md +1 -1
- package/.docs/models/providers/cloudflare-workers-ai.md +4 -3
- package/.docs/models/providers/firmware.md +2 -2
- package/.docs/models/providers/friendli.md +1 -1
- package/.docs/models/providers/github-models.md +1 -1
- package/.docs/models/providers/google.md +7 -2
- package/.docs/models/providers/groq.md +24 -16
- package/.docs/models/providers/huggingface.md +1 -1
- package/.docs/models/providers/mistral.md +3 -2
- package/.docs/models/providers/nano-gpt.md +3 -1
- package/.docs/models/providers/openai.md +2 -1
- package/.docs/models/providers/opencode.md +3 -2
- package/.docs/models/providers/poe.md +3 -1
- package/.docs/models/providers/vultr.md +11 -16
- package/.docs/models/providers/zai-coding-plan.md +3 -2
- package/.docs/models/providers/zenmux.md +2 -31
- package/.docs/models/providers/zhipuai-coding-plan.md +3 -2
- package/.docs/reference/ai-sdk/handle-chat-stream.md +2 -0
- package/.docs/reference/client-js/agents.md +11 -6
- package/.docs/reference/client-js/mastra-client.md +1 -1
- package/.docs/reference/client-js/memory.md +1 -1
- package/.docs/reference/configuration.md +24 -0
- package/.docs/reference/core/mastra-model-gateway.md +2 -0
- package/.docs/reference/deployer/cloudflare.md +31 -1
- package/.docs/reference/evals/scorer-utils.md +9 -5
- package/.docs/reference/evals/trajectory-accuracy.md +29 -15
- package/.docs/reference/index.md +0 -2
- package/.docs/reference/logging/pino-logger.md +58 -0
- package/.docs/reference/memory/observational-memory.md +32 -6
- package/CHANGELOG.md +44 -0
- package/package.json +6 -6
- package/.docs/reference/core/getStoredAgentById.md +0 -87
- package/.docs/reference/core/listStoredAgents.md +0 -91
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# Vultr
|
|
2
2
|
|
|
3
|
-
Access
|
|
3
|
+
Access 4 Vultr models through Mastra's model router. Authentication is handled automatically using the `VULTR_API_KEY` environment variable.
|
|
4
4
|
|
|
5
5
|
Learn more in the [Vultr documentation](https://api.vultrinference.com/).
|
|
6
6
|
|
|
@@ -15,7 +15,7 @@ const agent = new Agent({
|
|
|
15
15
|
id: "my-agent",
|
|
16
16
|
name: "My Agent",
|
|
17
17
|
instructions: "You are a helpful assistant",
|
|
18
|
-
model: "vultr/DeepSeek-
|
|
18
|
+
model: "vultr/DeepSeek-V3.2"
|
|
19
19
|
});
|
|
20
20
|
|
|
21
21
|
// Generate a response
|
|
@@ -32,17 +32,12 @@ for await (const chunk of stream) {
|
|
|
32
32
|
|
|
33
33
|
## Models
|
|
34
34
|
|
|
35
|
-
| Model
|
|
36
|
-
|
|
|
37
|
-
| `vultr/DeepSeek-
|
|
38
|
-
| `vultr/
|
|
39
|
-
| `vultr/
|
|
40
|
-
| `vultr/
|
|
41
|
-
| `vultr/gpt-oss-120b` | 130K | | | | | | $0.15 | $0.60 |
|
|
42
|
-
| `vultr/Kimi-K2.5` | 261K | | | | | | $0.55 | $3 |
|
|
43
|
-
| `vultr/Llama-3_1-Nemotron-Ultra-253B-v1` | 32K | | | | | | $0.55 | $2 |
|
|
44
|
-
| `vultr/MiniMax-M2.5` | 196K | | | | | | $0.30 | $1 |
|
|
45
|
-
| `vultr/NVIDIA-Nemotron-3-Super-120B-A12B-NVFP4` | 260K | | | | | | $0.20 | $0.80 |
|
|
35
|
+
| Model | Context | Tools | Reasoning | Image | Audio | Video | Input $/1M | Output $/1M |
|
|
36
|
+
| --------------------- | ------- | ----- | --------- | ----- | ----- | ----- | ---------- | ----------- |
|
|
37
|
+
| `vultr/DeepSeek-V3.2` | 163K | | | | | | $0.55 | $2 |
|
|
38
|
+
| `vultr/GLM-5-FP8` | 202K | | | | | | $0.85 | $3 |
|
|
39
|
+
| `vultr/Kimi-K2.5` | 261K | | | | | | $0.55 | $3 |
|
|
40
|
+
| `vultr/MiniMax-M2.5` | 196K | | | | | | $0.30 | $1 |
|
|
46
41
|
|
|
47
42
|
## Advanced configuration
|
|
48
43
|
|
|
@@ -54,7 +49,7 @@ const agent = new Agent({
|
|
|
54
49
|
name: "custom-agent",
|
|
55
50
|
model: {
|
|
56
51
|
url: "https://api.vultrinference.com/v1",
|
|
57
|
-
id: "vultr/DeepSeek-
|
|
52
|
+
id: "vultr/DeepSeek-V3.2",
|
|
58
53
|
apiKey: process.env.VULTR_API_KEY,
|
|
59
54
|
headers: {
|
|
60
55
|
"X-Custom-Header": "value"
|
|
@@ -72,8 +67,8 @@ const agent = new Agent({
|
|
|
72
67
|
model: ({ requestContext }) => {
|
|
73
68
|
const useAdvanced = requestContext.task === "complex";
|
|
74
69
|
return useAdvanced
|
|
75
|
-
? "vultr/
|
|
76
|
-
: "vultr/DeepSeek-
|
|
70
|
+
? "vultr/MiniMax-M2.5"
|
|
71
|
+
: "vultr/DeepSeek-V3.2";
|
|
77
72
|
}
|
|
78
73
|
});
|
|
79
74
|
```
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# Z.AI Coding Plan
|
|
2
2
|
|
|
3
|
-
Access
|
|
3
|
+
Access 12 Z.AI Coding Plan models through Mastra's model router. Authentication is handled automatically using the `ZHIPU_API_KEY` environment variable.
|
|
4
4
|
|
|
5
5
|
Learn more in the [Z.AI Coding Plan documentation](https://docs.z.ai/devpack/overview).
|
|
6
6
|
|
|
@@ -45,6 +45,7 @@ for await (const chunk of stream) {
|
|
|
45
45
|
| `zai-coding-plan/glm-4.7-flashx` | 200K | | | | | | $0.07 | $0.40 |
|
|
46
46
|
| `zai-coding-plan/glm-5` | 205K | | | | | | — | — |
|
|
47
47
|
| `zai-coding-plan/glm-5-turbo` | 200K | | | | | | — | — |
|
|
48
|
+
| `zai-coding-plan/glm-5.1` | 200K | | | | | | — | — |
|
|
48
49
|
|
|
49
50
|
## Advanced configuration
|
|
50
51
|
|
|
@@ -74,7 +75,7 @@ const agent = new Agent({
|
|
|
74
75
|
model: ({ requestContext }) => {
|
|
75
76
|
const useAdvanced = requestContext.task === "complex";
|
|
76
77
|
return useAdvanced
|
|
77
|
-
? "zai-coding-plan/glm-5
|
|
78
|
+
? "zai-coding-plan/glm-5.1"
|
|
78
79
|
: "zai-coding-plan/glm-4.5";
|
|
79
80
|
}
|
|
80
81
|
});
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# ZenMux
|
|
2
2
|
|
|
3
|
-
Access
|
|
3
|
+
Access 85 ZenMux models through Mastra's model router. Authentication is handled automatically using the `ZENMUX_API_KEY` environment variable.
|
|
4
4
|
|
|
5
5
|
Learn more in the [ZenMux documentation](https://docs.zenmux.ai).
|
|
6
6
|
|
|
@@ -52,7 +52,6 @@ for await (const chunk of stream) {
|
|
|
52
52
|
| `zenmux/google/gemini-2.5-flash-lite` | 1.0M | | | | | | $0.10 | $0.40 |
|
|
53
53
|
| `zenmux/google/gemini-2.5-pro` | 1.0M | | | | | | $1 | $10 |
|
|
54
54
|
| `zenmux/google/gemini-3-flash-preview` | 1.0M | | | | | | $0.50 | $3 |
|
|
55
|
-
| `zenmux/google/gemini-3-pro-image-preview` | 1.0M | | | | | | $2 | $12 |
|
|
56
55
|
| `zenmux/google/gemini-3-pro-preview` | 1.0M | | | | | | $2 | $12 |
|
|
57
56
|
| `zenmux/google/gemini-3.1-flash-lite-preview` | 1.1M | | | | | | $0.25 | $2 |
|
|
58
57
|
| `zenmux/google/gemini-3.1-pro-preview` | 1.0M | | | | | | $2 | $12 |
|
|
@@ -130,7 +129,7 @@ const agent = new Agent({
|
|
|
130
129
|
id: "custom-agent",
|
|
131
130
|
name: "custom-agent",
|
|
132
131
|
model: {
|
|
133
|
-
url: "https://zenmux.ai/api/
|
|
132
|
+
url: "https://zenmux.ai/api/v1",
|
|
134
133
|
id: "zenmux/anthropic/claude-3.5-haiku",
|
|
135
134
|
apiKey: process.env.ZENMUX_API_KEY,
|
|
136
135
|
headers: {
|
|
@@ -153,32 +152,4 @@ const agent = new Agent({
|
|
|
153
152
|
: "zenmux/anthropic/claude-3.5-haiku";
|
|
154
153
|
}
|
|
155
154
|
});
|
|
156
|
-
```
|
|
157
|
-
|
|
158
|
-
## Direct provider installation
|
|
159
|
-
|
|
160
|
-
This provider can also be installed directly as a standalone package, which can be used instead of the Mastra model router string. View the [package documentation](https://www.npmjs.com/package/@ai-sdk/anthropic) for more details.
|
|
161
|
-
|
|
162
|
-
**npm**:
|
|
163
|
-
|
|
164
|
-
```bash
|
|
165
|
-
npm install @ai-sdk/anthropic
|
|
166
|
-
```
|
|
167
|
-
|
|
168
|
-
**pnpm**:
|
|
169
|
-
|
|
170
|
-
```bash
|
|
171
|
-
pnpm add @ai-sdk/anthropic
|
|
172
|
-
```
|
|
173
|
-
|
|
174
|
-
**Yarn**:
|
|
175
|
-
|
|
176
|
-
```bash
|
|
177
|
-
yarn add @ai-sdk/anthropic
|
|
178
|
-
```
|
|
179
|
-
|
|
180
|
-
**Bun**:
|
|
181
|
-
|
|
182
|
-
```bash
|
|
183
|
-
bun add @ai-sdk/anthropic
|
|
184
155
|
```
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# Zhipu AI Coding Plan
|
|
2
2
|
|
|
3
|
-
Access
|
|
3
|
+
Access 13 Zhipu AI Coding Plan models through Mastra's model router. Authentication is handled automatically using the `ZHIPU_API_KEY` environment variable.
|
|
4
4
|
|
|
5
5
|
Learn more in the [Zhipu AI Coding Plan documentation](https://docs.bigmodel.cn/cn/coding-plan/overview).
|
|
6
6
|
|
|
@@ -46,6 +46,7 @@ for await (const chunk of stream) {
|
|
|
46
46
|
| `zhipuai-coding-plan/glm-4.7-flashx` | 200K | | | | | | $0.07 | $0.40 |
|
|
47
47
|
| `zhipuai-coding-plan/glm-5` | 205K | | | | | | — | — |
|
|
48
48
|
| `zhipuai-coding-plan/glm-5-turbo` | 200K | | | | | | — | — |
|
|
49
|
+
| `zhipuai-coding-plan/glm-5.1` | 200K | | | | | | — | — |
|
|
49
50
|
|
|
50
51
|
## Advanced configuration
|
|
51
52
|
|
|
@@ -75,7 +76,7 @@ const agent = new Agent({
|
|
|
75
76
|
model: ({ requestContext }) => {
|
|
76
77
|
const useAdvanced = requestContext.task === "complex";
|
|
77
78
|
return useAdvanced
|
|
78
|
-
? "zhipuai-coding-plan/glm-5
|
|
79
|
+
? "zhipuai-coding-plan/glm-5.1"
|
|
79
80
|
: "zhipuai-coding-plan/glm-4.5";
|
|
80
81
|
}
|
|
81
82
|
});
|
|
@@ -59,4 +59,6 @@ export async function POST(req: Request) {
|
|
|
59
59
|
|
|
60
60
|
**sendSources** (`boolean`): Whether to include source citations in the stream. (Default: `false`)
|
|
61
61
|
|
|
62
|
+
**onError** (`(error: unknown) => string`): Called when the stream encounters an error. Return the string that will be sent to the client as the error message. Use this to sanitize errors before they reach the client — for example, to prevent internal infrastructure details from leaking to end users.
|
|
63
|
+
|
|
62
64
|
**messageMetadata** (`(options: { part: UIMessageStreamPart }) => Record<string, unknown> | undefined`): A function that receives the current stream part and returns metadata to attach to start and finish chunks. See the \[AI SDK message metadata docs]\(https\://ai-sdk.dev/docs/ai-sdk-ui/message-metadata) for details.
|
|
@@ -308,7 +308,7 @@ response.processDataStream({
|
|
|
308
308
|
|
|
309
309
|
## Stored agents
|
|
310
310
|
|
|
311
|
-
Stored agents are agent configurations stored in a database that can be created, updated, and deleted at runtime. They reference primitives (tools, workflows, other agents,
|
|
311
|
+
Stored agents are agent configurations stored in a database that can be created, updated, and deleted at runtime. They reference primitives (tools, workflows, other agents, scorers) by key, which are resolved from the Mastra registry when the agent is instantiated. Memory is configured inline as a `SerializedMemoryConfig` object with options such as `lastMessages` and `semanticRecall`.
|
|
312
312
|
|
|
313
313
|
### `listStoredAgents()`
|
|
314
314
|
|
|
@@ -361,10 +361,15 @@ const agent = await mastraClient.createStoredAgent({
|
|
|
361
361
|
provider: 'openai',
|
|
362
362
|
name: 'gpt-5.4',
|
|
363
363
|
},
|
|
364
|
-
tools:
|
|
365
|
-
workflows:
|
|
366
|
-
agents:
|
|
367
|
-
memory:
|
|
364
|
+
tools: { calculator: {}, weather: {} },
|
|
365
|
+
workflows: { 'data-processing': {} },
|
|
366
|
+
agents: { 'subagent-1': {} },
|
|
367
|
+
memory: {
|
|
368
|
+
options: {
|
|
369
|
+
lastMessages: 20,
|
|
370
|
+
semanticRecall: false,
|
|
371
|
+
},
|
|
372
|
+
},
|
|
368
373
|
scorers: {
|
|
369
374
|
'quality-scorer': {
|
|
370
375
|
sampling: { type: 'ratio', rate: 0.1 },
|
|
@@ -415,7 +420,7 @@ const updated = await storedAgent.update({
|
|
|
415
420
|
```typescript
|
|
416
421
|
// Update just the tools
|
|
417
422
|
await storedAgent.update({
|
|
418
|
-
tools:
|
|
423
|
+
tools: { 'new-tool-1': {}, 'new-tool-2': {} },
|
|
419
424
|
})
|
|
420
425
|
|
|
421
426
|
// Update metadata
|
|
@@ -32,7 +32,7 @@ export const mastraClient = new MastraClient({
|
|
|
32
32
|
|
|
33
33
|
**getAgent(agentId)** (`Agent`): Retrieves a specific agent instance by ID.
|
|
34
34
|
|
|
35
|
-
**
|
|
35
|
+
**listMemoryThreads(params)** (`Promise<StorageThreadType[]>`): Retrieves memory threads for the specified resource and agent. Requires a \`resourceId\` and an \`agentId\`.
|
|
36
36
|
|
|
37
37
|
**createMemoryThread(params)** (`Promise<MemoryThread>`): Creates a new memory thread with the given parameters.
|
|
38
38
|
|
|
@@ -7,7 +7,7 @@ The Memory API provides methods to manage conversation threads and message histo
|
|
|
7
7
|
Retrieve all memory threads for a specific resource:
|
|
8
8
|
|
|
9
9
|
```typescript
|
|
10
|
-
const threads = await mastraClient.
|
|
10
|
+
const threads = await mastraClient.listMemoryThreads({
|
|
11
11
|
resourceId: 'resource-1',
|
|
12
12
|
agentId: 'agent-1', // Optional - can be omitted if storage is configured
|
|
13
13
|
})
|
|
@@ -566,6 +566,30 @@ export const mastra = new Mastra({
|
|
|
566
566
|
})
|
|
567
567
|
```
|
|
568
568
|
|
|
569
|
+
### server.mcpOptions
|
|
570
|
+
|
|
571
|
+
**Type:** `object`\
|
|
572
|
+
**Default:** `undefined`
|
|
573
|
+
|
|
574
|
+
MCP transport options applied to all MCP HTTP and SSE routes. Use this to enable stateless mode for serverless environments (Cloudflare Workers, Vercel Edge, AWS Lambda, etc.) where persistent connections and in-memory session state are not available.
|
|
575
|
+
|
|
576
|
+
| Property | Type | Default | Description |
|
|
577
|
+
| -------------------- | -------------- | ----------- | ---------------------------------------------------- |
|
|
578
|
+
| `serverless` | `boolean` | `false` | Run MCP in stateless mode without session management |
|
|
579
|
+
| `sessionIdGenerator` | `() => string` | `undefined` | Custom session ID generator function |
|
|
580
|
+
|
|
581
|
+
```typescript
|
|
582
|
+
import { Mastra } from '@mastra/core'
|
|
583
|
+
|
|
584
|
+
export const mastra = new Mastra({
|
|
585
|
+
server: {
|
|
586
|
+
mcpOptions: {
|
|
587
|
+
serverless: true,
|
|
588
|
+
},
|
|
589
|
+
},
|
|
590
|
+
})
|
|
591
|
+
```
|
|
592
|
+
|
|
569
593
|
### server.build
|
|
570
594
|
|
|
571
595
|
Build-time configuration for server features. These options control development tools like Swagger UI and request logging, which are enabled during local development but disabled in production by default.
|
|
@@ -87,6 +87,8 @@ Fetches provider configurations from the gateway.
|
|
|
87
87
|
|
|
88
88
|
Builds the API URL for a specific model/provider combination.
|
|
89
89
|
|
|
90
|
+
If your provider URL contains placeholders such as `${ACCOUNT_ID}`, resolve them inside `buildUrl()` from `envVars` or `process.env` before returning the final URL.
|
|
91
|
+
|
|
90
92
|
**Parameters:**
|
|
91
93
|
|
|
92
94
|
**modelId** (`string`): Full model ID (e.g., "custom/my-provider/model-1")
|
|
@@ -87,4 +87,34 @@ Use `vars` in the `CloudflareDeployer` constructor only for non-sensitive config
|
|
|
87
87
|
|
|
88
88
|
## Build output
|
|
89
89
|
|
|
90
|
-
After running `mastra build`, the deployer generates a `wrangler.jsonc` file conforming to Cloudflare's [wrangler configuration](https://developers.cloudflare.com/workers/wrangler/configuration/). It points to files inside `.mastra/output` so you need to run `mastra build` before deploying with Wrangler.
|
|
90
|
+
After running `mastra build`, the deployer generates a `wrangler.jsonc` file conforming to Cloudflare's [wrangler configuration](https://developers.cloudflare.com/workers/wrangler/configuration/). It points to files inside `.mastra/output` so you need to run `mastra build` before deploying with Wrangler.
|
|
91
|
+
|
|
92
|
+
## Cloudflare bindings
|
|
93
|
+
|
|
94
|
+
When you use the Cloudflare deployer, you can import runtime bindings from `cloudflare:workers` in your Mastra config file. Mastra automatically preserves protocol-based runtime imports like `cloudflare:workers` during `mastra build` without trying to install them as npm dependencies.
|
|
95
|
+
|
|
96
|
+
```typescript
|
|
97
|
+
import { env } from 'cloudflare:workers'
|
|
98
|
+
import { Mastra } from '@mastra/core'
|
|
99
|
+
import { registerApiRoute } from '@mastra/core/server'
|
|
100
|
+
import { CloudflareDeployer } from '@mastra/deployer-cloudflare'
|
|
101
|
+
|
|
102
|
+
export const mastra = new Mastra({
|
|
103
|
+
deployer: new CloudflareDeployer({
|
|
104
|
+
name: 'my-worker',
|
|
105
|
+
kv_namespaces: [{ binding: 'CACHE', id: 'your-kv-namespace-id' }],
|
|
106
|
+
}),
|
|
107
|
+
server: {
|
|
108
|
+
apiRoutes: [
|
|
109
|
+
registerApiRoute('/bindings', {
|
|
110
|
+
method: 'GET',
|
|
111
|
+
requiresAuth: false,
|
|
112
|
+
handler: async c => {
|
|
113
|
+
await env.CACHE.put('status', 'ok')
|
|
114
|
+
return c.json({ status: await env.CACHE.get('status') })
|
|
115
|
+
},
|
|
116
|
+
}),
|
|
117
|
+
],
|
|
118
|
+
},
|
|
119
|
+
})
|
|
120
|
+
```
|
|
@@ -367,10 +367,11 @@ The `expected` parameter accepts either a `Trajectory` (actual trajectory) or `{
|
|
|
367
367
|
import { compareTrajectories } from '@mastra/evals/scorers/utils'
|
|
368
368
|
|
|
369
369
|
// Using ExpectedStep[] (recommended for expectations)
|
|
370
|
+
// Data fields (e.g. toolArgs) are auto-compared when present on expected steps
|
|
370
371
|
const result = compareTrajectories(
|
|
371
372
|
actualTrajectory,
|
|
372
373
|
{ steps: [{ name: 'search' }, { name: 'summarize', stepType: 'tool_call' }] },
|
|
373
|
-
{
|
|
374
|
+
{ allowRepeatedSteps: true },
|
|
374
375
|
)
|
|
375
376
|
// result.score — 0.0 to 1.0
|
|
376
377
|
// result.missingSteps — step names not found
|
|
@@ -412,7 +413,9 @@ const result = checkTrajectoryEfficiency(trajectory, {
|
|
|
412
413
|
})
|
|
413
414
|
// result.score — 1.0 if within all budgets, lower with penalties
|
|
414
415
|
// result.redundantCalls — duplicate tool+args combos
|
|
415
|
-
// result.
|
|
416
|
+
// result.overStepBudget — true if maxSteps exceeded
|
|
417
|
+
// result.overTokenBudget — true if maxTotalTokens exceeded
|
|
418
|
+
// result.overDurationBudget — true if maxTotalDurationMs exceeded
|
|
416
419
|
```
|
|
417
420
|
|
|
418
421
|
**Returns:** `TrajectoryEfficiencyResult`
|
|
@@ -428,8 +431,9 @@ const result = checkTrajectoryBlacklist(trajectory, {
|
|
|
428
431
|
blacklistedTools: ['deleteAll', 'admin-override'],
|
|
429
432
|
blacklistedSequences: [['escalate', 'admin-override']],
|
|
430
433
|
})
|
|
431
|
-
// result.
|
|
432
|
-
// result.
|
|
434
|
+
// result.score — 1.0 if no violations, 0.0 if any found
|
|
435
|
+
// result.violatedTools — blacklisted tools that were called
|
|
436
|
+
// result.violatedSequences — blacklisted sequences that were detected
|
|
433
437
|
```
|
|
434
438
|
|
|
435
439
|
**Returns:** `TrajectoryBlacklistResult`
|
|
@@ -442,7 +446,7 @@ Detects tool failure patterns including retries, fallbacks, and argument correct
|
|
|
442
446
|
import { analyzeToolFailures } from '@mastra/evals/scorers/utils'
|
|
443
447
|
|
|
444
448
|
const result = analyzeToolFailures(trajectory, {
|
|
445
|
-
maxRetriesPerTool:
|
|
449
|
+
maxRetriesPerTool: 2,
|
|
446
450
|
})
|
|
447
451
|
// result.score — 1.0 if no failure patterns, lower if patterns detected
|
|
448
452
|
// result.patterns — detected patterns (retry, fallback, arg_correction)
|
|
@@ -103,13 +103,15 @@ All step types share the base properties `name`, `durationMs`, `metadata`, and `
|
|
|
103
103
|
|
|
104
104
|
## Expected steps
|
|
105
105
|
|
|
106
|
-
When defining expected trajectories, use `ExpectedStep` instead of the full `TrajectoryStep` discriminated union. `ExpectedStep` is a
|
|
106
|
+
When defining expected trajectories, use `ExpectedStep` instead of the full `TrajectoryStep` discriminated union. `ExpectedStep` is a discriminated union that mirrors `TrajectoryStep` — when you specify a `stepType`, you get autocomplete for that variant's fields (e.g., `toolArgs` for `tool_call`, `modelId` for `model_generation`). All variant-specific fields are optional, so you only assert against what you care about.
|
|
107
|
+
|
|
108
|
+
Omit `stepType` entirely to match any step by name only.
|
|
107
109
|
|
|
108
110
|
**name** (`string`): Step name to match (tool name, agent ID, workflow step name, etc.).
|
|
109
111
|
|
|
110
|
-
**stepType** (`TrajectoryStepType`): Step type
|
|
112
|
+
**stepType** (`TrajectoryStepType`): Step type discriminant. When set, enables autocomplete for that variant's fields. If omitted, matches any step type with the given name.
|
|
111
113
|
|
|
112
|
-
**
|
|
114
|
+
**(variant fields)** (`varies`): Type-specific fields from the corresponding TrajectoryStep variant. For example, \`toolArgs\` and \`toolResult\` for \`tool\_call\`, \`modelId\` for \`model\_generation\`, \`output\` for \`workflow\_step\`. All optional — only specified fields are compared.
|
|
113
115
|
|
|
114
116
|
**children** (`TrajectoryExpectation`): Nested expectation config for this step's children. Overrides the parent config for evaluating children of this step.
|
|
115
117
|
|
|
@@ -120,11 +122,14 @@ const steps: ExpectedStep[] = [
|
|
|
120
122
|
// Match by name only (any step type)
|
|
121
123
|
{ name: 'search' },
|
|
122
124
|
|
|
123
|
-
// Match by name and step type
|
|
125
|
+
// Match by name and step type (autocomplete for tool_call fields)
|
|
124
126
|
{ name: 'search', stepType: 'tool_call' },
|
|
125
127
|
|
|
126
|
-
// Match with
|
|
127
|
-
{ name: 'search', stepType: 'tool_call',
|
|
128
|
+
// Match with specific toolArgs (auto-compared when present)
|
|
129
|
+
{ name: 'search', stepType: 'tool_call', toolArgs: { query: 'weather' } },
|
|
130
|
+
|
|
131
|
+
// Match a model generation step by model ID
|
|
132
|
+
{ name: 'gpt-4o', stepType: 'model_generation', modelId: 'gpt-4o' },
|
|
128
133
|
]
|
|
129
134
|
```
|
|
130
135
|
|
|
@@ -182,7 +187,7 @@ The `createTrajectoryAccuracyScorerCode()` function from `@mastra/evals/scorers/
|
|
|
182
187
|
|
|
183
188
|
### Parameters
|
|
184
189
|
|
|
185
|
-
**expectedTrajectory** (`
|
|
190
|
+
**expectedTrajectory** (`Trajectory | ExpectedStep[]`): Static expected trajectory to compare against. Accepts a full Trajectory or an array of ExpectedStep matchers. When omitted, the scorer reads expectedTrajectory from each dataset item at runtime.
|
|
186
191
|
|
|
187
192
|
**comparisonOptions** (`TrajectoryComparisonOptions`): Controls how the comparison is performed.
|
|
188
193
|
|
|
@@ -368,8 +373,8 @@ const scorer = createTrajectoryAccuracyScorerCode({
|
|
|
368
373
|
},
|
|
369
374
|
],
|
|
370
375
|
},
|
|
371
|
-
comparisonOptions: { compareStepData: true },
|
|
372
376
|
})
|
|
377
|
+
// Data fields like toolArgs are auto-compared when present on expected steps
|
|
373
378
|
```
|
|
374
379
|
|
|
375
380
|
## LLM-based trajectory accuracy scorer
|
|
@@ -380,7 +385,7 @@ The `createTrajectoryAccuracyScorerLLM()` function from `@mastra/evals/scorers/p
|
|
|
380
385
|
|
|
381
386
|
**model** (`MastraModelConfig`): The LLM model to use for evaluating trajectory quality.
|
|
382
387
|
|
|
383
|
-
**expectedTrajectory** (`
|
|
388
|
+
**expectedTrajectory** (`Trajectory | ExpectedStep[]`): Optional static expected trajectory to compare against. Accepts a full Trajectory or an array of ExpectedStep matchers. When omitted, the LLM evaluates the trajectory based on the task requirements alone. Can also come from dataset items at runtime.
|
|
384
389
|
|
|
385
390
|
### Features
|
|
386
391
|
|
|
@@ -461,7 +466,7 @@ The `createTrajectoryScorerCode()` function from `@mastra/evals/scorers/prebuilt
|
|
|
461
466
|
|
|
462
467
|
**defaults** (`TrajectoryExpectation`): Default expectations applied to all dataset items. Per-item expectedTrajectory values override these defaults.
|
|
463
468
|
|
|
464
|
-
**weights** (`
|
|
469
|
+
**weights** (`TrajectoryScoreWeights`): Custom weights for combining dimension scores. Weights are normalized to sum to 1.0.
|
|
465
470
|
|
|
466
471
|
### Scoring behavior
|
|
467
472
|
|
|
@@ -472,7 +477,7 @@ The unified scorer evaluates four dimensions:
|
|
|
472
477
|
3. **Blacklist** — Checks for forbidden tools or sequences. Any violation immediately results in a score of **0.0** regardless of other dimensions.
|
|
473
478
|
4. **Tool failures** — Detects retry patterns, fallback patterns, and argument correction patterns.
|
|
474
479
|
|
|
475
|
-
The final score is a weighted
|
|
480
|
+
The final score is a weighted combination of active dimensions, normalized by which dimensions are active. Default weights are accuracy 0.4, efficiency 0.3, tool failures 0.2, blacklist 0.1, but you can customize them via the `weights` option. Blacklist violations override everything to 0. When nested evaluations are present, the score is 70% top-level and 30% nested average.
|
|
476
481
|
|
|
477
482
|
### Unified scorer results
|
|
478
483
|
|
|
@@ -481,11 +486,13 @@ The final score is a weighted average of accuracy, efficiency, and tool failures
|
|
|
481
486
|
runId: string,
|
|
482
487
|
preprocessStepResult: {
|
|
483
488
|
accuracy?: TrajectoryComparisonResult,
|
|
484
|
-
efficiency
|
|
485
|
-
blacklist
|
|
486
|
-
toolFailures
|
|
489
|
+
efficiency?: TrajectoryEfficiencyResult,
|
|
490
|
+
blacklist?: TrajectoryBlacklistResult,
|
|
491
|
+
toolFailures?: ToolFailureAnalysisResult,
|
|
492
|
+
nested?: NestedEvaluationResult[],
|
|
487
493
|
},
|
|
488
|
-
score: number
|
|
494
|
+
score: number,
|
|
495
|
+
reason: string
|
|
489
496
|
}
|
|
490
497
|
```
|
|
491
498
|
|
|
@@ -542,6 +549,13 @@ const scorer = createTrajectoryScorerCode({
|
|
|
542
549
|
noRedundantCalls: true,
|
|
543
550
|
maxRetriesPerTool: 2,
|
|
544
551
|
},
|
|
552
|
+
// Customize how dimensions contribute to the final score
|
|
553
|
+
weights: {
|
|
554
|
+
accuracy: 0.5, // prioritize step accuracy
|
|
555
|
+
efficiency: 0.3,
|
|
556
|
+
toolFailures: 0.1,
|
|
557
|
+
blacklist: 0.1,
|
|
558
|
+
},
|
|
545
559
|
})
|
|
546
560
|
```
|
|
547
561
|
|
package/.docs/reference/index.md
CHANGED
|
@@ -66,7 +66,6 @@ The Reference section provides documentation of Mastra's API, including paramete
|
|
|
66
66
|
- [.getScorerById()](https://mastra.ai/reference/core/getScorerById)
|
|
67
67
|
- [.getServer()](https://mastra.ai/reference/core/getServer)
|
|
68
68
|
- [.getStorage()](https://mastra.ai/reference/core/getStorage)
|
|
69
|
-
- [.getStoredAgentById()](https://mastra.ai/reference/core/getStoredAgentById)
|
|
70
69
|
- [.getTelemetry()](https://mastra.ai/reference/core/getTelemetry)
|
|
71
70
|
- [.getVector()](https://mastra.ai/reference/core/getVector)
|
|
72
71
|
- [.getWorkflow()](https://mastra.ai/reference/core/getWorkflow)
|
|
@@ -77,7 +76,6 @@ The Reference section provides documentation of Mastra's API, including paramete
|
|
|
77
76
|
- [.listMCPServers()](https://mastra.ai/reference/core/listMCPServers)
|
|
78
77
|
- [.listMemory()](https://mastra.ai/reference/core/listMemory)
|
|
79
78
|
- [.listScorers()](https://mastra.ai/reference/core/listScorers)
|
|
80
|
-
- [.listStoredAgents()](https://mastra.ai/reference/core/listStoredAgents)
|
|
81
79
|
- [.listVectors()](https://mastra.ai/reference/core/listVectors)
|
|
82
80
|
- [.listWorkflows()](https://mastra.ai/reference/core/listWorkflows)
|
|
83
81
|
- [.setLogger()](https://mastra.ai/reference/core/setLogger)
|
|
@@ -30,6 +30,64 @@ export const mastra = new Mastra({
|
|
|
30
30
|
|
|
31
31
|
**formatters** (`pino.LoggerOptions['formatters']`): Custom Pino formatters for log serialization.
|
|
32
32
|
|
|
33
|
+
**redact** (`pino.LoggerOptions['redact']`): Paths or options for redacting sensitive fields from log output (Pino \`redact\`).
|
|
34
|
+
|
|
35
|
+
**prettyPrint** (`boolean`): When false, disables \`pino-pretty\` and writes raw JSON lines (useful for log aggregators). (Default: `true`)
|
|
36
|
+
|
|
37
|
+
**mixin** (`pino.MixinFn`): Pino mixin function merged into every log object (for example request-scoped \`traceId\` or other shared metadata).
|
|
38
|
+
|
|
39
|
+
**customLevels** (`Record<string, number>`): Custom log levels and numeric values, forwarded to Pino. Standard severity is still logged via \`debug\`, \`info\`, \`warn\`, and \`error\`; extra levels follow Pino’s custom-level behavior.
|
|
40
|
+
|
|
41
|
+
## Log enrichment with `mixin`
|
|
42
|
+
|
|
43
|
+
Use `mixin` when you want the same structured fields on every line (for correlation with the rest of your services):
|
|
44
|
+
|
|
45
|
+
```typescript
|
|
46
|
+
import { Mastra } from '@mastra/core'
|
|
47
|
+
import { PinoLogger } from '@mastra/loggers'
|
|
48
|
+
|
|
49
|
+
function getTraceContext() {
|
|
50
|
+
return { traceId: 'abc-123' }
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
export const mastra = new Mastra({
|
|
54
|
+
logger: new PinoLogger({
|
|
55
|
+
name: 'Mastra',
|
|
56
|
+
level: 'info',
|
|
57
|
+
mixin() {
|
|
58
|
+
return getTraceContext()
|
|
59
|
+
},
|
|
60
|
+
}),
|
|
61
|
+
})
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
## Custom levels
|
|
65
|
+
|
|
66
|
+
`customLevels` is passed through to Pino. `PinoLogger` only exposes `debug`, `info`, `warn`, and `error`; for any extra level name (for example `audit`), subclass and forward to the underlying Pino instance:
|
|
67
|
+
|
|
68
|
+
```typescript
|
|
69
|
+
import { Mastra } from '@mastra/core'
|
|
70
|
+
import { PinoLogger } from '@mastra/loggers'
|
|
71
|
+
|
|
72
|
+
type AuditLevel = 'audit'
|
|
73
|
+
|
|
74
|
+
class MastraPinoWithAudit extends PinoLogger<AuditLevel> {
|
|
75
|
+
audit(message: string, meta: Record<string, unknown> = {}) {
|
|
76
|
+
this.logger.audit(meta, message)
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
const logger = new MastraPinoWithAudit({
|
|
81
|
+
name: 'Mastra',
|
|
82
|
+
level: 'info',
|
|
83
|
+
customLevels: { audit: 35 },
|
|
84
|
+
})
|
|
85
|
+
|
|
86
|
+
export const mastra = new Mastra({ logger })
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
Numeric values follow Pino’s ordering (built-in levels use 10–60). A level of `35` sits between `info` (30) and `warn` (40), so with `level: 'info'` both `info` and `audit` lines are emitted.
|
|
90
|
+
|
|
33
91
|
## File transport (structured logs)
|
|
34
92
|
|
|
35
93
|
Writes structured logs to a file using the `FileTransport`. The logger accepts a plain message as the first argument and structured metadata as the second argument. These are internally converted to a `BaseLogMessage` and persisted to the configured file path.
|
|
@@ -38,7 +38,7 @@ OM performs thresholding with fast local token estimation. Text uses `tokenx`, a
|
|
|
38
38
|
|
|
39
39
|
**shareTokenBudget** (`boolean`): Share the token budget between messages and observations. When enabled, the total budget is \`observation.messageTokens + reflection.observationTokens\`. Messages can use more space when observations are small, and vice versa. This maximizes context usage through flexible allocation. \`shareTokenBudget\` is not yet compatible with async buffering. You must set \`observation: { bufferTokens: false }\` when using this option (this is a temporary limitation). (Default: `false`)
|
|
40
40
|
|
|
41
|
-
**retrieval** (`boolean`): \*\*Experimental.\*\* Enable retrieval-mode observation groups as durable pointers to raw message history.
|
|
41
|
+
**retrieval** (`boolean | { vector?: boolean; scope?: 'thread' | 'resource' }`): \*\*Experimental.\*\* Enable retrieval-mode observation groups as durable pointers to raw message history. \`true\` enables cross-thread browsing by default. \`{ vector: true }\` also enables semantic search using Memory's vector store and embedder. \`{ scope: 'thread' }\` restricts the recall tool to the current thread only. Default scope is \`'resource'\`. (Default: `false`)
|
|
42
42
|
|
|
43
43
|
**observation** (`ObservationalMemoryObservationConfig`): Configuration for the observation step. Controls when the Observer agent runs and how it behaves.
|
|
44
44
|
|
|
@@ -578,21 +578,31 @@ The standalone `ObservationalMemory` class accepts all the same options as the `
|
|
|
578
578
|
|
|
579
579
|
## Recall tool
|
|
580
580
|
|
|
581
|
-
When `retrieval
|
|
581
|
+
When `retrieval` is set (any truthy value), a `recall` tool is registered so the agent can page through raw messages behind observation group ranges. By default (scope `'resource'`), the tool supports listing threads (`mode: "threads"`), browsing other threads (`threadId`), and cross-thread search. With `retrieval: { vector: true }`, semantic search is available (`mode: "search"`). Set `scope: 'thread'` to restrict the tool to the current thread only. The tool is automatically added to the agent's tool list — no manual registration is needed.
|
|
582
582
|
|
|
583
583
|
### Parameters
|
|
584
584
|
|
|
585
|
-
**
|
|
585
|
+
**mode** (`'messages' | 'threads' | 'search'`): What to retrieve. \`"messages"\` (default) pages through message history. \`"threads"\` lists all threads for the current user. \`"search"\` finds messages by semantic similarity across all threads (requires vector store and embedder). (Default: `'messages'`)
|
|
586
586
|
|
|
587
|
-
**
|
|
587
|
+
**query** (`string`): Search query for \`mode: "search"\`. Finds messages semantically similar to this text across all threads for the current user.
|
|
588
588
|
|
|
589
|
-
**
|
|
589
|
+
**cursor** (`string`): A message ID to anchor the recall query. Required for \`mode: "messages"\` when browsing the current thread. Extract the start or end ID from an observation group range (e.g. from \`\_range: \\\`startId:endId\\\`\_\`, use either \`startId\` or \`endId\`). If a range string is passed directly, the tool returns a hint explaining how to extract the correct ID. Can be omitted when \`threadId\` is provided to start reading from the beginning of that thread.
|
|
590
|
+
|
|
591
|
+
**threadId** (`string`): Browse a different thread by its ID. Use \`mode: "threads"\` first to discover thread IDs. When provided without a \`cursor\`, reading starts from the beginning of the thread.
|
|
592
|
+
|
|
593
|
+
**page** (`number`): Pagination offset. For messages: positive values page forward from cursor, negative values page backward. For threads: page number (0-indexed). \`0\` is treated as \`1\` for messages. (Default: `1`)
|
|
594
|
+
|
|
595
|
+
**limit** (`number`): Maximum number of items to return per page. (Default: `20`)
|
|
590
596
|
|
|
591
597
|
**detail** (`'low' | 'high'`): Controls how much content is shown per message part. \`'low'\` shows truncated text and tool names with positional indices (\`\[p0]\`, \`\[p1]\`). \`'high'\` shows full content including tool arguments and results, clamped to one part per call with continuation hints. (Default: `'low'`)
|
|
592
598
|
|
|
593
599
|
**partIndex** (`number`): Fetch a single message part at full detail by its positional index. Use this when a low-detail recall shows an interesting part at \`\[p1]\` — call again with \`partIndex: 1\` to see the full content without loading every part.
|
|
594
600
|
|
|
595
|
-
|
|
601
|
+
**before** (`string`): For \`mode: "threads"\` only. Filter to threads created before this date. Accepts ISO 8601 format (e.g. \`"2026-03-15"\`, \`"2026-03-10T00:00:00Z"\`).
|
|
602
|
+
|
|
603
|
+
**after** (`string`): For \`mode: "threads"\` only. Filter to threads created after this date. Accepts ISO 8601 format (e.g. \`"2026-03-01"\`, \`"2026-03-10T00:00:00Z"\`).
|
|
604
|
+
|
|
605
|
+
### Returns (messages mode)
|
|
596
606
|
|
|
597
607
|
**messages** (`string`): Formatted message content. Format depends on the \`detail\` level.
|
|
598
608
|
|
|
@@ -612,6 +622,22 @@ When `retrieval: true` is set with `scope: 'thread'`, OM registers a `recall` to
|
|
|
612
622
|
|
|
613
623
|
**tokenOffset** (`number`): Approximate number of tokens that were trimmed when \`truncated\` is true.
|
|
614
624
|
|
|
625
|
+
### Returns (threads mode)
|
|
626
|
+
|
|
627
|
+
**threads** (`string`): Formatted thread listing. Each thread shows its title, ID, and dates. The current thread is marked with \`← current\`.
|
|
628
|
+
|
|
629
|
+
**count** (`number`): Number of threads returned.
|
|
630
|
+
|
|
631
|
+
**page** (`number`): The page number returned.
|
|
632
|
+
|
|
633
|
+
**hasMore** (`boolean`): Whether more threads exist on the next page.
|
|
634
|
+
|
|
635
|
+
### Returns (search mode)
|
|
636
|
+
|
|
637
|
+
**results** (`string`): Formatted search results grouped by thread. Each result shows the thread title, thread ID, relevance score, message preview, and a cursor ID for browsing into that thread.
|
|
638
|
+
|
|
639
|
+
**count** (`number`): Number of matching messages found.
|
|
640
|
+
|
|
615
641
|
### ModelByInputTokens
|
|
616
642
|
|
|
617
643
|
`ModelByInputTokens` selects a model based on the input token count. It chooses the model for the smallest threshold that covers the actual input size.
|