@mastra/mcp-docs-server 0.13.25-alpha.0 → 0.13.25
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.docs/organized/changelogs/%40mastra%2Fagent-builder.md +12 -1
- package/.docs/organized/changelogs/%40mastra%2Fai-sdk.md +11 -0
- package/.docs/organized/changelogs/%40mastra%2Fastra.md +10 -10
- package/.docs/organized/changelogs/%40mastra%2Fchroma.md +10 -10
- package/.docs/organized/changelogs/%40mastra%2Fclickhouse.md +12 -12
- package/.docs/organized/changelogs/%40mastra%2Fclient-js.md +18 -18
- package/.docs/organized/changelogs/%40mastra%2Fcloud.md +10 -10
- package/.docs/organized/changelogs/%40mastra%2Fcloudflare-d1.md +12 -12
- package/.docs/organized/changelogs/%40mastra%2Fcloudflare.md +12 -12
- package/.docs/organized/changelogs/%40mastra%2Fcore.md +35 -35
- package/.docs/organized/changelogs/%40mastra%2Fcouchbase.md +10 -10
- package/.docs/organized/changelogs/%40mastra%2Fdeployer-cloud.md +16 -16
- package/.docs/organized/changelogs/%40mastra%2Fdeployer-cloudflare.md +11 -11
- package/.docs/organized/changelogs/%40mastra%2Fdeployer-netlify.md +11 -11
- package/.docs/organized/changelogs/%40mastra%2Fdeployer-vercel.md +11 -11
- package/.docs/organized/changelogs/%40mastra%2Fdeployer.md +30 -30
- package/.docs/organized/changelogs/%40mastra%2Fdynamodb.md +12 -12
- package/.docs/organized/changelogs/%40mastra%2Fevals.md +10 -10
- package/.docs/organized/changelogs/%40mastra%2Flance.md +12 -12
- package/.docs/organized/changelogs/%40mastra%2Flibsql.md +20 -20
- package/.docs/organized/changelogs/%40mastra%2Floggers.md +11 -11
- package/.docs/organized/changelogs/%40mastra%2Fmcp-docs-server.md +10 -10
- package/.docs/organized/changelogs/%40mastra%2Fmcp-registry-registry.md +11 -11
- package/.docs/organized/changelogs/%40mastra%2Fmcp.md +13 -13
- package/.docs/organized/changelogs/%40mastra%2Fmemory.md +10 -10
- package/.docs/organized/changelogs/%40mastra%2Fmongodb.md +12 -12
- package/.docs/organized/changelogs/%40mastra%2Fmssql.md +12 -12
- package/.docs/organized/changelogs/%40mastra%2Fopensearch.md +10 -10
- package/.docs/organized/changelogs/%40mastra%2Fpg.md +18 -18
- package/.docs/organized/changelogs/%40mastra%2Fpinecone.md +10 -10
- package/.docs/organized/changelogs/%40mastra%2Fplayground-ui.md +24 -24
- package/.docs/organized/changelogs/%40mastra%2Fqdrant.md +11 -11
- package/.docs/organized/changelogs/%40mastra%2Frag.md +10 -10
- package/.docs/organized/changelogs/%40mastra%2Freact.md +17 -0
- package/.docs/organized/changelogs/%40mastra%2Fs3vectors.md +9 -0
- package/.docs/organized/changelogs/%40mastra%2Fserver.md +16 -16
- package/.docs/organized/changelogs/%40mastra%2Fturbopuffer.md +10 -10
- package/.docs/organized/changelogs/%40mastra%2Fupstash.md +12 -12
- package/.docs/organized/changelogs/%40mastra%2Fvectorize.md +10 -10
- package/.docs/organized/changelogs/%40mastra%2Fvoice-azure.md +10 -10
- package/.docs/organized/changelogs/%40mastra%2Fvoice-cloudflare.md +10 -10
- package/.docs/organized/changelogs/%40mastra%2Fvoice-deepgram.md +10 -10
- package/.docs/organized/changelogs/%40mastra%2Fvoice-elevenlabs.md +10 -10
- package/.docs/organized/changelogs/%40mastra%2Fvoice-gladia.md +11 -3
- package/.docs/organized/changelogs/%40mastra%2Fvoice-google-gemini-live.md +9 -0
- package/.docs/organized/changelogs/%40mastra%2Fvoice-google.md +12 -12
- package/.docs/organized/changelogs/%40mastra%2Fvoice-murf.md +11 -11
- package/.docs/organized/changelogs/%40mastra%2Fvoice-openai-realtime.md +11 -11
- package/.docs/organized/changelogs/%40mastra%2Fvoice-openai.md +12 -12
- package/.docs/organized/changelogs/%40mastra%2Fvoice-playai.md +10 -10
- package/.docs/organized/changelogs/%40mastra%2Fvoice-sarvam.md +10 -10
- package/.docs/organized/changelogs/%40mastra%2Fvoice-speechify.md +10 -10
- package/.docs/organized/changelogs/create-mastra.md +11 -11
- package/.docs/organized/changelogs/mastra.md +19 -19
- package/.docs/organized/code-examples/agent.md +0 -4
- package/.docs/organized/code-examples/ai-elements.md +47 -0
- package/.docs/organized/code-examples/heads-up-game.md +5 -5
- package/.docs/raw/auth/clerk.mdx +3 -3
- package/.docs/raw/observability/ai-tracing/exporters/default.mdx +1 -0
- package/.docs/raw/observability/ai-tracing/exporters/langsmith.mdx +88 -0
- package/.docs/raw/observability/ai-tracing/exporters/otel.mdx +250 -0
- package/.docs/raw/observability/ai-tracing/overview.mdx +15 -15
- package/.docs/raw/observability/overview.mdx +1 -1
- package/.docs/raw/reference/agents/network.mdx +1 -1
- package/.docs/raw/reference/auth/clerk.mdx +1 -1
- package/.docs/raw/reference/client-js/agents.mdx +4 -13
- package/.docs/raw/reference/client-js/mastra-client.mdx +10 -0
- package/.docs/raw/reference/client-js/observability.mdx +76 -0
- package/.docs/raw/reference/core/getScorer.mdx +75 -0
- package/.docs/raw/reference/core/getScorerByName.mdx +75 -0
- package/.docs/raw/reference/core/getScorers.mdx +42 -0
- package/.docs/raw/reference/core/mastra-class.mdx +7 -0
- package/.docs/raw/reference/observability/ai-tracing/ai-tracing.mdx +1 -0
- package/.docs/raw/reference/observability/ai-tracing/configuration.mdx +5 -4
- package/.docs/raw/reference/observability/ai-tracing/exporters/langsmith.mdx +112 -0
- package/.docs/raw/reference/observability/ai-tracing/exporters/otel.mdx +355 -0
- package/.docs/raw/reference/observability/logging/pino-logger.mdx +1 -1
- package/.docs/raw/reference/observability/otel-tracing/providers/index.mdx +11 -11
- package/.docs/raw/reference/scorers/create-scorer.mdx +59 -9
- package/.docs/raw/reference/scorers/mastra-scorer.mdx +6 -0
- package/.docs/raw/reference/scorers/run-experiment.mdx +216 -0
- package/.docs/raw/reference/streaming/ChunkType.mdx +3 -2
- package/.docs/raw/reference/streaming/agents/MastraModelOutput.mdx +1 -1
- package/.docs/raw/reference/streaming/agents/stream.mdx +2 -2
- package/.docs/raw/reference/streaming/workflows/resumeStreamVNext.mdx +17 -1
- package/.docs/raw/reference/streaming/workflows/stream.mdx +1 -1
- package/.docs/raw/reference/streaming/workflows/streamVNext.mdx +1 -1
- package/.docs/raw/scorers/custom-scorers.mdx +16 -1
- package/.docs/raw/scorers/overview.mdx +28 -0
- package/CHANGELOG.md +16 -0
- package/package.json +6 -6
- package/.docs/organized/changelogs/%40mastra%2Freact-hooks.md +0 -8
|
@@ -15,6 +15,7 @@ Use the `createScorer` factory to define your scorer with a name, description, a
|
|
|
15
15
|
const scorer = createScorer({
|
|
16
16
|
name: "My Custom Scorer",
|
|
17
17
|
description: "Evaluates responses based on custom criteria",
|
|
18
|
+
type: "agent", // Optional: for agent evaluation with automatic typing
|
|
18
19
|
judge: {
|
|
19
20
|
model: myModel,
|
|
20
21
|
instructions: "You are an expert evaluator..."
|
|
@@ -50,6 +51,12 @@ const scorer = createScorer({
|
|
|
50
51
|
required: false,
|
|
51
52
|
description: "Optional judge configuration for LLM-based steps. See Judge Object section below.",
|
|
52
53
|
},
|
|
54
|
+
{
|
|
55
|
+
name: "type",
|
|
56
|
+
type: "string",
|
|
57
|
+
required: false,
|
|
58
|
+
description: "Type specification for input/output. Use 'agent' for automatic agent types. For custom types, use the generic approach instead.",
|
|
59
|
+
},
|
|
53
60
|
]}
|
|
54
61
|
/>
|
|
55
62
|
|
|
@@ -76,28 +83,40 @@ This function returns a scorer builder that you can chain step methods onto. See
|
|
|
76
83
|
|
|
77
84
|
## Type Safety
|
|
78
85
|
|
|
79
|
-
|
|
86
|
+
You can specify input/output types when creating scorers for better type inference and IntelliSense support:
|
|
87
|
+
|
|
88
|
+
### Agent Type Shortcut
|
|
89
|
+
|
|
90
|
+
For evaluating agents, use `type: 'agent'` to automatically get the correct types for agent input/output:
|
|
80
91
|
|
|
81
92
|
```typescript
|
|
82
|
-
import { createScorer
|
|
93
|
+
import { createScorer } from '@mastra/core/scorers';
|
|
83
94
|
|
|
84
|
-
//
|
|
85
|
-
const agentScorer = createScorer
|
|
95
|
+
// Agent scorer with automatic typing
|
|
96
|
+
const agentScorer = createScorer({
|
|
86
97
|
name: 'Agent Response Quality',
|
|
87
|
-
description: 'Evaluates agent responses'
|
|
98
|
+
description: 'Evaluates agent responses',
|
|
99
|
+
type: 'agent' // Automatically provides ScorerRunInputForAgent/ScorerRunOutputForAgent
|
|
88
100
|
})
|
|
89
101
|
.preprocess(({ run }) => {
|
|
90
|
-
// run.input is typed as ScorerRunInputForAgent
|
|
102
|
+
// run.input is automatically typed as ScorerRunInputForAgent
|
|
91
103
|
const userMessage = run.input.inputMessages[0]?.content;
|
|
92
104
|
return { userMessage };
|
|
93
105
|
})
|
|
94
106
|
.generateScore(({ run, results }) => {
|
|
95
|
-
// run.output is typed as ScorerRunOutputForAgent
|
|
107
|
+
// run.output is automatically typed as ScorerRunOutputForAgent
|
|
96
108
|
const response = run.output[0]?.content;
|
|
97
109
|
return response.length > 10 ? 1.0 : 0.5;
|
|
98
110
|
});
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
### Custom Types with Generics
|
|
114
|
+
|
|
115
|
+
For custom input/output types, use the generic approach:
|
|
116
|
+
|
|
117
|
+
```typescript
|
|
118
|
+
import { createScorer } from '@mastra/core/scorers';
|
|
99
119
|
|
|
100
|
-
// For custom input/output types
|
|
101
120
|
type CustomInput = { query: string; context: string[] };
|
|
102
121
|
type CustomOutput = { answer: string; confidence: number };
|
|
103
122
|
|
|
@@ -105,7 +124,11 @@ const customScorer = createScorer<CustomInput, CustomOutput>({
|
|
|
105
124
|
name: 'Custom Scorer',
|
|
106
125
|
description: 'Evaluates custom data'
|
|
107
126
|
})
|
|
108
|
-
.generateScore(({ run }) =>
|
|
127
|
+
.generateScore(({ run }) => {
|
|
128
|
+
// run.input is typed as CustomInput
|
|
129
|
+
// run.output is typed as CustomOutput
|
|
130
|
+
return run.output.confidence;
|
|
131
|
+
});
|
|
109
132
|
```
|
|
110
133
|
|
|
111
134
|
### Built-in Agent Types
|
|
@@ -115,6 +138,33 @@ const customScorer = createScorer<CustomInput, CustomOutput>({
|
|
|
115
138
|
|
|
116
139
|
Using these types provides autocomplete, compile-time validation, and better documentation for your scoring logic.
|
|
117
140
|
|
|
141
|
+
## Trace Scoring with Agent Types
|
|
142
|
+
|
|
143
|
+
When you use `type: 'agent'`, your scorer is compatible for both adding directly to agents and scoring traces from agent interactions. The scorer automatically transforms trace data into the proper agent input/output format:
|
|
144
|
+
|
|
145
|
+
```typescript
|
|
146
|
+
const agentTraceScorer = createScorer({
|
|
147
|
+
name: 'Agent Trace Length',
|
|
148
|
+
description: 'Evaluates agent response length',
|
|
149
|
+
type: 'agent'
|
|
150
|
+
})
|
|
151
|
+
.generateScore(({ run }) => {
|
|
152
|
+
// Trace data is automatically transformed to agent format
|
|
153
|
+
const userMessages = run.input.inputMessages;
|
|
154
|
+
const agentResponse = run.output[0]?.content;
|
|
155
|
+
|
|
156
|
+
// Score based on response length
|
|
157
|
+
return agentResponse?.length > 50 ? 0 : 1;
|
|
158
|
+
});
|
|
159
|
+
|
|
160
|
+
// Register with Mastra for trace scoring
|
|
161
|
+
const mastra = new Mastra({
|
|
162
|
+
scorers: {
|
|
163
|
+
agentTraceScorer
|
|
164
|
+
}
|
|
165
|
+
});
|
|
166
|
+
```
|
|
167
|
+
|
|
118
168
|
## Step Method Signatures
|
|
119
169
|
|
|
120
170
|
### preprocess
|
|
@@ -67,6 +67,12 @@ const result = await scorer.run({
|
|
|
67
67
|
required: false,
|
|
68
68
|
description: "Optional runtime context from the agent or workflow step being evaluated.",
|
|
69
69
|
},
|
|
70
|
+
{
|
|
71
|
+
name: "groundTruth",
|
|
72
|
+
type: "any",
|
|
73
|
+
required: false,
|
|
74
|
+
description: "Optional expected or reference output for comparison during scoring. Automatically passed when using runExperiment.",
|
|
75
|
+
},
|
|
70
76
|
]}
|
|
71
77
|
/>
|
|
72
78
|
|
|
@@ -0,0 +1,216 @@
|
|
|
1
|
+
---
|
|
2
|
+
title: "Reference: runExperiment | Scorers | Mastra Docs"
|
|
3
|
+
description: "Documentation for the runExperiment function in Mastra, which enables batch evaluation of agents and workflows using multiple scorers."
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# runExperiment
|
|
7
|
+
|
|
8
|
+
The `runExperiment` function enables batch evaluation of agents and workflows by running multiple test cases against scorers concurrently. This is essential for systematic testing, performance analysis, and validation of AI systems.
|
|
9
|
+
|
|
10
|
+
## Usage Example
|
|
11
|
+
|
|
12
|
+
```typescript
|
|
13
|
+
import { runExperiment } from '@mastra/core/scores';
|
|
14
|
+
import { myAgent } from './agents/my-agent';
|
|
15
|
+
import { myScorer1, myScorer2 } from './scorers';
|
|
16
|
+
|
|
17
|
+
const result = await runExperiment({
|
|
18
|
+
target: myAgent,
|
|
19
|
+
data: [
|
|
20
|
+
{ input: "What is machine learning?" },
|
|
21
|
+
{ input: "Explain neural networks" },
|
|
22
|
+
{ input: "How does AI work?" }
|
|
23
|
+
],
|
|
24
|
+
scorers: [myScorer1, myScorer2],
|
|
25
|
+
concurrency: 2,
|
|
26
|
+
onItemComplete: ({ item, targetResult, scorerResults }) => {
|
|
27
|
+
console.log(`Completed: ${item.input}`);
|
|
28
|
+
console.log(`Scores:`, scorerResults);
|
|
29
|
+
}
|
|
30
|
+
});
|
|
31
|
+
|
|
32
|
+
console.log(`Average scores:`, result.scores);
|
|
33
|
+
console.log(`Processed ${result.summary.totalItems} items`);
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
## Parameters
|
|
37
|
+
|
|
38
|
+
<PropertiesTable
|
|
39
|
+
content={[
|
|
40
|
+
{
|
|
41
|
+
name: "target",
|
|
42
|
+
type: "Agent | Workflow",
|
|
43
|
+
description: "The agent or workflow to evaluate.",
|
|
44
|
+
isOptional: false,
|
|
45
|
+
},
|
|
46
|
+
{
|
|
47
|
+
name: "data",
|
|
48
|
+
type: "RunExperimentDataItem[]",
|
|
49
|
+
description: "Array of test cases with input data and optional ground truth.",
|
|
50
|
+
isOptional: false,
|
|
51
|
+
},
|
|
52
|
+
{
|
|
53
|
+
name: "scorers",
|
|
54
|
+
type: "MastraScorer[] | WorkflowScorerConfig",
|
|
55
|
+
description: "Array of scorers for agents, or configuration object for workflows specifying scorers for the workflow and individual steps.",
|
|
56
|
+
isOptional: false,
|
|
57
|
+
},
|
|
58
|
+
{
|
|
59
|
+
name: "concurrency",
|
|
60
|
+
type: "number",
|
|
61
|
+
description: "Number of test cases to run concurrently.",
|
|
62
|
+
isOptional: true,
|
|
63
|
+
defaultValue: "1",
|
|
64
|
+
},
|
|
65
|
+
{
|
|
66
|
+
name: "onItemComplete",
|
|
67
|
+
type: "function",
|
|
68
|
+
description: "Callback function called after each test case completes. Receives item, target result, and scorer results.",
|
|
69
|
+
isOptional: true,
|
|
70
|
+
},
|
|
71
|
+
]}
|
|
72
|
+
/>
|
|
73
|
+
|
|
74
|
+
## Data Item Structure
|
|
75
|
+
|
|
76
|
+
<PropertiesTable
|
|
77
|
+
content={[
|
|
78
|
+
{
|
|
79
|
+
name: "input",
|
|
80
|
+
type: "string | string[] | CoreMessage[] | any",
|
|
81
|
+
description: "Input data for the target. For agents: messages or strings. For workflows: workflow input data.",
|
|
82
|
+
isOptional: false,
|
|
83
|
+
},
|
|
84
|
+
{
|
|
85
|
+
name: "groundTruth",
|
|
86
|
+
type: "any",
|
|
87
|
+
description: "Expected or reference output for comparison during scoring.",
|
|
88
|
+
isOptional: true,
|
|
89
|
+
},
|
|
90
|
+
{
|
|
91
|
+
name: "runtimeContext",
|
|
92
|
+
type: "RuntimeContext",
|
|
93
|
+
description: "Runtime context to pass to the target during execution.",
|
|
94
|
+
isOptional: true,
|
|
95
|
+
},
|
|
96
|
+
{
|
|
97
|
+
name: "tracingContext",
|
|
98
|
+
type: "TracingContext",
|
|
99
|
+
description: "Tracing context for observability and debugging.",
|
|
100
|
+
isOptional: true,
|
|
101
|
+
},
|
|
102
|
+
]}
|
|
103
|
+
/>
|
|
104
|
+
|
|
105
|
+
## Workflow Scorer Configuration
|
|
106
|
+
|
|
107
|
+
For workflows, you can specify scorers at different levels using `WorkflowScorerConfig`:
|
|
108
|
+
|
|
109
|
+
<PropertiesTable
|
|
110
|
+
content={[
|
|
111
|
+
{
|
|
112
|
+
name: "workflow",
|
|
113
|
+
type: "MastraScorer[]",
|
|
114
|
+
description: "Array of scorers to evaluate the entire workflow output.",
|
|
115
|
+
isOptional: true,
|
|
116
|
+
},
|
|
117
|
+
{
|
|
118
|
+
name: "steps",
|
|
119
|
+
type: "Record<string, MastraScorer[]>",
|
|
120
|
+
description: "Object mapping step IDs to arrays of scorers for evaluating individual step outputs.",
|
|
121
|
+
isOptional: true,
|
|
122
|
+
},
|
|
123
|
+
]}
|
|
124
|
+
/>
|
|
125
|
+
|
|
126
|
+
## Returns
|
|
127
|
+
|
|
128
|
+
<PropertiesTable
|
|
129
|
+
content={[
|
|
130
|
+
{
|
|
131
|
+
name: "scores",
|
|
132
|
+
type: "Record<string, any>",
|
|
133
|
+
description: "Average scores across all test cases, organized by scorer name.",
|
|
134
|
+
},
|
|
135
|
+
{
|
|
136
|
+
name: "summary",
|
|
137
|
+
type: "object",
|
|
138
|
+
description: "Summary information about the experiment execution.",
|
|
139
|
+
},
|
|
140
|
+
{
|
|
141
|
+
name: "summary.totalItems",
|
|
142
|
+
type: "number",
|
|
143
|
+
description: "Total number of test cases processed.",
|
|
144
|
+
},
|
|
145
|
+
]}
|
|
146
|
+
/>
|
|
147
|
+
|
|
148
|
+
## Examples
|
|
149
|
+
|
|
150
|
+
### Agent Evaluation
|
|
151
|
+
|
|
152
|
+
```typescript
|
|
153
|
+
import { runExperiment } from '@mastra/core/scores';
|
|
154
|
+
import { createScorer } from '@mastra/core/scores';
|
|
155
|
+
|
|
156
|
+
const myScorer = createScorer({
|
|
157
|
+
name: 'My Scorer',
|
|
158
|
+
description: "Check if Agent's response contains ground truth",
|
|
159
|
+
type: 'agent'
|
|
160
|
+
}).generateScore(({ run }) => {
|
|
161
|
+
const response = run.output[0]?.content || '';
|
|
162
|
+
const expectedResponse = run.groundTruth
|
|
163
|
+
return response.includes(expectedResponse) ? 1 : 0
|
|
164
|
+
});
|
|
165
|
+
|
|
166
|
+
const result = await runExperiment({
|
|
167
|
+
target: chatAgent,
|
|
168
|
+
data: [
|
|
169
|
+
{
|
|
170
|
+
input: "What is AI?",
|
|
171
|
+
groundTruth: "AI is a field of computer science that creates intelligent machines."
|
|
172
|
+
},
|
|
173
|
+
{
|
|
174
|
+
input: "How does machine learning work?",
|
|
175
|
+
groundTruth: "Machine learning uses algorithms to learn patterns from data."
|
|
176
|
+
}
|
|
177
|
+
],
|
|
178
|
+
scorers: [relevancyScorer],
|
|
179
|
+
concurrency: 3
|
|
180
|
+
});
|
|
181
|
+
```
|
|
182
|
+
|
|
183
|
+
### Workflow Evaluation
|
|
184
|
+
|
|
185
|
+
```typescript
|
|
186
|
+
const workflowResult = await runExperiment({
|
|
187
|
+
target: myWorkflow,
|
|
188
|
+
data: [
|
|
189
|
+
{ input: { query: "Process this data", priority: "high" } },
|
|
190
|
+
{ input: { query: "Another task", priority: "low" } }
|
|
191
|
+
],
|
|
192
|
+
scorers: {
|
|
193
|
+
workflow: [outputQualityScorer],
|
|
194
|
+
steps: {
|
|
195
|
+
'validation-step': [validationScorer],
|
|
196
|
+
'processing-step': [processingScorer]
|
|
197
|
+
}
|
|
198
|
+
},
|
|
199
|
+
onItemComplete: ({ item, targetResult, scorerResults }) => {
|
|
200
|
+
console.log(`Workflow completed for: ${item.input.query}`);
|
|
201
|
+
if (scorerResults.workflow) {
|
|
202
|
+
console.log('Workflow scores:', scorerResults.workflow);
|
|
203
|
+
}
|
|
204
|
+
if (scorerResults.steps) {
|
|
205
|
+
console.log('Step scores:', scorerResults.steps);
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
});
|
|
209
|
+
```
|
|
210
|
+
|
|
211
|
+
## Related
|
|
212
|
+
|
|
213
|
+
- [createScorer()](../../reference/scorers/create-scorer) - Create custom scorers for experiments
|
|
214
|
+
- [MastraScorer](../../reference/scorers/mastra-scorer) - Learn about scorer structure and methods
|
|
215
|
+
- [Custom Scorers](../../docs/scorers/custom-scorers) - Guide to building evaluation logic
|
|
216
|
+
- [Scorers Overview](../../docs/scorers/overview) - Understanding scorer concepts
|
|
@@ -853,5 +853,6 @@ for await (const chunk of stream.fullStream) {
|
|
|
853
853
|
|
|
854
854
|
## Related Types
|
|
855
855
|
|
|
856
|
-
- [MastraModelOutput](./MastraModelOutput.mdx) - The stream object that emits these chunks
|
|
857
|
-
- [.streamVNext()](./streamVNext.mdx) - Method that returns streams emitting these chunks
|
|
856
|
+
- [MastraModelOutput](./agents/MastraModelOutput.mdx) - The stream object that emits these chunks
|
|
857
|
+
- [agent.streamVNext()](./agents/streamVNext.mdx) - Method that returns streams emitting these chunks for agents
|
|
858
|
+
- [workflow.streamVNext()](./workflows/streamVNext.mdx) - Method that returns streams emitting these chunks for workflows
|
|
@@ -318,5 +318,5 @@ if (stream.error) {
|
|
|
318
318
|
|
|
319
319
|
## Related Types
|
|
320
320
|
|
|
321
|
-
- [ChunkType](
|
|
321
|
+
- [ChunkType](../ChunkType.mdx) - All possible chunk types in the full stream
|
|
322
322
|
- [.streamVNext()](./streamVNext.mdx) - Method that returns MastraModelOutput
|
|
@@ -513,5 +513,5 @@ await agent.stream("message for agent", {
|
|
|
513
513
|
|
|
514
514
|
## Related
|
|
515
515
|
|
|
516
|
-
- [Generating responses](
|
|
517
|
-
- [Streaming responses](
|
|
516
|
+
- [Generating responses](../../../../docs/agents/overview.mdx#generating-responses)
|
|
517
|
+
- [Streaming responses](../../../../docs/agents/overview.mdx#streaming-responses)
|
|
@@ -53,6 +53,22 @@ if (result.status === "suspended") {
|
|
|
53
53
|
description: "The step to resume execution from",
|
|
54
54
|
isOptional: true,
|
|
55
55
|
},
|
|
56
|
+
{
|
|
57
|
+
name: "tracingOptions",
|
|
58
|
+
type: "TracingOptions",
|
|
59
|
+
isOptional: true,
|
|
60
|
+
description: "Options for AI tracing configuration.",
|
|
61
|
+
properties: [
|
|
62
|
+
{
|
|
63
|
+
parameters: [{
|
|
64
|
+
name: "metadata",
|
|
65
|
+
type: "Record<string, any>",
|
|
66
|
+
isOptional: true,
|
|
67
|
+
description: "Metadata to add to the root trace span. Useful for adding custom attributes like user IDs, session IDs, or feature flags."
|
|
68
|
+
}]
|
|
69
|
+
}
|
|
70
|
+
]
|
|
71
|
+
},
|
|
56
72
|
]}
|
|
57
73
|
/>
|
|
58
74
|
|
|
@@ -96,5 +112,5 @@ The stream emits various event types during workflow execution. Each event has a
|
|
|
96
112
|
## Related
|
|
97
113
|
|
|
98
114
|
- [Workflows overview](../../../docs/workflows/overview.mdx#run-workflow)
|
|
99
|
-
- [Workflow.createRunAsync()](
|
|
115
|
+
- [Workflow.createRunAsync()](../../../reference/workflows/workflow-methods/create-run.mdx)
|
|
100
116
|
- [Run.streamVNext()](./streamVNext.mdx)
|
|
@@ -122,4 +122,4 @@ The stream emits various event types during workflow execution. Each event has a
|
|
|
122
122
|
## Related
|
|
123
123
|
|
|
124
124
|
- [Workflows overview](../../../docs/workflows/overview.mdx#run-workflow)
|
|
125
|
-
- [Workflow.createRunAsync()](
|
|
125
|
+
- [Workflow.createRunAsync()](../../../reference/workflows/workflow-methods/create-run.mdx)
|
|
@@ -140,5 +140,5 @@ The stream emits various event types during workflow execution. Each event has a
|
|
|
140
140
|
## Related
|
|
141
141
|
|
|
142
142
|
- [Workflows overview](../../../docs/workflows/overview.mdx#run-workflow)
|
|
143
|
-
- [Workflow.createRunAsync()](
|
|
143
|
+
- [Workflow.createRunAsync()](../../../reference/workflows/workflow-methods/create-run.mdx)
|
|
144
144
|
- [Run.resumeStreamVNext()](./resumeStreamVNext.mdx)
|
|
@@ -31,7 +31,7 @@ You can mix and match approaches within a single scorer - for example, use a fun
|
|
|
31
31
|
|
|
32
32
|
### Initializing a Scorer
|
|
33
33
|
|
|
34
|
-
Every scorer starts with the `createScorer` factory function, which requires a name and description, and optionally accepts a
|
|
34
|
+
Every scorer starts with the `createScorer` factory function, which requires a name and description, and optionally accepts a type specification and judge configuration.
|
|
35
35
|
|
|
36
36
|
```typescript
|
|
37
37
|
import { createScorer } from '@mastra/core/scores';
|
|
@@ -54,6 +54,21 @@ const glutenCheckerScorer = createScorer({
|
|
|
54
54
|
|
|
55
55
|
The judge configuration is only needed if you plan to use prompt objects in any step. Individual steps can override this default configuration with their own judge settings.
|
|
56
56
|
|
|
57
|
+
#### Agent Type for Agent Evaluation
|
|
58
|
+
|
|
59
|
+
For type safety and compatibility with both live agent scoring and trace scoring, use `type: 'agent'` when creating scorers for agent evaluation. This allows you to use the same scorer for an agent and also use it to score traces:
|
|
60
|
+
|
|
61
|
+
```typescript
|
|
62
|
+
const myScorer = createScorer({
|
|
63
|
+
// ...
|
|
64
|
+
type: 'agent', // Automatically handles agent input/output types
|
|
65
|
+
})
|
|
66
|
+
.generateScore(({ run, results }) => {
|
|
67
|
+
// run.output is automatically typed as ScorerRunOutputForAgent
|
|
68
|
+
// run.input is automatically typed as ScorerRunInputForAgent
|
|
69
|
+
});
|
|
70
|
+
```
|
|
71
|
+
|
|
57
72
|
### Step-by-Step Breakdown
|
|
58
73
|
|
|
59
74
|
#### preprocess Step (Optional)
|
|
@@ -3,6 +3,8 @@ title: "Overview"
|
|
|
3
3
|
description: Overview of scorers in Mastra, detailing their capabilities for evaluating AI outputs and measuring performance.
|
|
4
4
|
---
|
|
5
5
|
|
|
6
|
+
import { Callout } from "nextra/components";
|
|
7
|
+
|
|
6
8
|
# Scorers overview
|
|
7
9
|
|
|
8
10
|
**Scorers** are evaluation tools that measure the quality, accuracy, or performance of AI-generated outputs. Scorers provide an automated way to assess whether your agents, workflows, or language models are producing the desired results by analyzing their responses against specific criteria.
|
|
@@ -126,6 +128,32 @@ export const contentWorkflow = createWorkflow({ ... })
|
|
|
126
128
|
|
|
127
129
|
**Automatic storage**: All scoring results are automatically stored in the `mastra_scorers` table in your configured database, allowing you to analyze performance trends over time.
|
|
128
130
|
|
|
131
|
+
## Trace evaluations
|
|
132
|
+
|
|
133
|
+
In addition to live evaluations, you can use scorers to evaluate historical traces from your agent interactions and workflows. This is particularly useful for analyzing past performance, debugging issues, or running batch evaluations.
|
|
134
|
+
|
|
135
|
+
<Callout type="info">
|
|
136
|
+
**Observability Required**
|
|
137
|
+
|
|
138
|
+
To score traces, you must first configure observability in your Mastra instance to collect trace data. See [AI Tracing documentation](../observability/ai-tracing) for setup instructions.
|
|
139
|
+
</Callout>
|
|
140
|
+
|
|
141
|
+
### Scoring traces with the playground
|
|
142
|
+
|
|
143
|
+
To score traces, you first need to register your scorers with your Mastra instance:
|
|
144
|
+
|
|
145
|
+
```typescript
|
|
146
|
+
const mastra = new Mastra({
|
|
147
|
+
// ...
|
|
148
|
+
scorers: {
|
|
149
|
+
answerRelevancy: myAnswerRelevancyScorer,
|
|
150
|
+
responseQuality: myResponseQualityScorer
|
|
151
|
+
}
|
|
152
|
+
});
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
Once registered, you can score traces interactively within the Mastra playground under the Observability section. This provides a user-friendly interface for running scorers against historical traces.
|
|
156
|
+
|
|
129
157
|
## Testing scorers locally
|
|
130
158
|
|
|
131
159
|
Mastra provides a CLI command `mastra dev` to test your scorers. The playground includes a scorers section where you can run individual scorers against test inputs and view detailed results.
|
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,21 @@
|
|
|
1
1
|
# @mastra/mcp-docs-server
|
|
2
2
|
|
|
3
|
+
## 0.13.25
|
|
4
|
+
|
|
5
|
+
### Patch Changes
|
|
6
|
+
|
|
7
|
+
- Updated dependencies [[`5f4e677`](https://github.com/mastra-ai/mastra/commit/5f4e67757bc23f2694d83af10f88cfccdc6013ff), [`dc099b4`](https://github.com/mastra-ai/mastra/commit/dc099b40fb31147ba3f362f98d991892033c4c67), [`504438b`](https://github.com/mastra-ai/mastra/commit/504438b961bde211071186bba63a842c4e3db879), [`b342a68`](https://github.com/mastra-ai/mastra/commit/b342a68e1399cf1ece9ba11bda112db89d21118c), [`a7243e2`](https://github.com/mastra-ai/mastra/commit/a7243e2e58762667a6e3921e755e89d6bb0a3282), [`504438b`](https://github.com/mastra-ai/mastra/commit/504438b961bde211071186bba63a842c4e3db879), [`7fceb0a`](https://github.com/mastra-ai/mastra/commit/7fceb0a327d678e812f90f5387c5bc4f38bd039e), [`303a9c0`](https://github.com/mastra-ai/mastra/commit/303a9c0d7dd58795915979f06a0512359e4532fb), [`df64f9e`](https://github.com/mastra-ai/mastra/commit/df64f9ef814916fff9baedd861c988084e7c41de), [`370f8a6`](https://github.com/mastra-ai/mastra/commit/370f8a6480faec70fef18d72e5f7538f27004301), [`809eea0`](https://github.com/mastra-ai/mastra/commit/809eea092fa80c3f69b9eaf078d843b57fd2a88e), [`683e5a1`](https://github.com/mastra-ai/mastra/commit/683e5a1466e48b686825b2c11f84680f296138e4), [`3679378`](https://github.com/mastra-ai/mastra/commit/3679378673350aa314741dc826f837b1984149bc), [`7775bc2`](https://github.com/mastra-ai/mastra/commit/7775bc20bb1ad1ab24797fb420e4f96c65b0d8ec), [`623ffaf`](https://github.com/mastra-ai/mastra/commit/623ffaf2d969e11e99a0224633cf7b5a0815c857), [`9fc1613`](https://github.com/mastra-ai/mastra/commit/9fc16136400186648880fd990119ac15f7c02ee4), [`61f62aa`](https://github.com/mastra-ai/mastra/commit/61f62aa31bc88fe4ddf8da6240dbcfbeb07358bd), [`db1891a`](https://github.com/mastra-ai/mastra/commit/db1891a4707443720b7cd8a260dc7e1d49b3609c), [`e8f379d`](https://github.com/mastra-ai/mastra/commit/e8f379d390efa264c4e0874f9ac0cf8839b07777), [`652066b`](https://github.com/mastra-ai/mastra/commit/652066bd1efc6bb6813ba950ed1d7573e8b7d9d4), [`3e292ba`](https://github.com/mastra-ai/mastra/commit/3e292ba00837886d5d68a34cbc0d9b703c991883), [`418c136`](https://github.com/mastra-ai/mastra/commit/418c1366843d88e491bca3f87763899ce855ca29), [`ea8d386`](https://github.com/mastra-ai/mastra/commit/ea8d386cd8c5593664515fd5770c06bf2aa980ef), [`67b0f00`](https://github.com/mastra-ai/mastra/commit/67b0f005b520335c71fb85cbaa25df4ce8484a81), [`c2a4919`](https://github.com/mastra-ai/mastra/commit/c2a4919ba6797d8bdb1509e02287496eef69303e), [`c84b7d0`](https://github.com/mastra-ai/mastra/commit/c84b7d093c4657772140cbfd2b15ef72f3315ed5), [`0130986`](https://github.com/mastra-ai/mastra/commit/0130986fc62d0edcc626dd593282661dbb9af141)]:
|
|
8
|
+
- @mastra/mcp@0.13.2
|
|
9
|
+
- @mastra/core@0.19.0
|
|
10
|
+
|
|
11
|
+
## 0.13.25-alpha.1
|
|
12
|
+
|
|
13
|
+
### Patch Changes
|
|
14
|
+
|
|
15
|
+
- Updated dependencies [[`5f4e677`](https://github.com/mastra-ai/mastra/commit/5f4e67757bc23f2694d83af10f88cfccdc6013ff), [`504438b`](https://github.com/mastra-ai/mastra/commit/504438b961bde211071186bba63a842c4e3db879), [`a7243e2`](https://github.com/mastra-ai/mastra/commit/a7243e2e58762667a6e3921e755e89d6bb0a3282), [`504438b`](https://github.com/mastra-ai/mastra/commit/504438b961bde211071186bba63a842c4e3db879), [`7fceb0a`](https://github.com/mastra-ai/mastra/commit/7fceb0a327d678e812f90f5387c5bc4f38bd039e), [`df64f9e`](https://github.com/mastra-ai/mastra/commit/df64f9ef814916fff9baedd861c988084e7c41de), [`809eea0`](https://github.com/mastra-ai/mastra/commit/809eea092fa80c3f69b9eaf078d843b57fd2a88e), [`683e5a1`](https://github.com/mastra-ai/mastra/commit/683e5a1466e48b686825b2c11f84680f296138e4), [`3679378`](https://github.com/mastra-ai/mastra/commit/3679378673350aa314741dc826f837b1984149bc), [`7775bc2`](https://github.com/mastra-ai/mastra/commit/7775bc20bb1ad1ab24797fb420e4f96c65b0d8ec), [`db1891a`](https://github.com/mastra-ai/mastra/commit/db1891a4707443720b7cd8a260dc7e1d49b3609c), [`e8f379d`](https://github.com/mastra-ai/mastra/commit/e8f379d390efa264c4e0874f9ac0cf8839b07777), [`652066b`](https://github.com/mastra-ai/mastra/commit/652066bd1efc6bb6813ba950ed1d7573e8b7d9d4), [`ea8d386`](https://github.com/mastra-ai/mastra/commit/ea8d386cd8c5593664515fd5770c06bf2aa980ef), [`c2a4919`](https://github.com/mastra-ai/mastra/commit/c2a4919ba6797d8bdb1509e02287496eef69303e), [`0130986`](https://github.com/mastra-ai/mastra/commit/0130986fc62d0edcc626dd593282661dbb9af141)]:
|
|
16
|
+
- @mastra/mcp@0.13.2-alpha.0
|
|
17
|
+
- @mastra/core@0.19.0-alpha.1
|
|
18
|
+
|
|
3
19
|
## 0.13.25-alpha.0
|
|
4
20
|
|
|
5
21
|
### Patch Changes
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@mastra/mcp-docs-server",
|
|
3
|
-
"version": "0.13.25
|
|
3
|
+
"version": "0.13.25",
|
|
4
4
|
"description": "MCP server for accessing Mastra.ai documentation, changelogs, and news.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/index.js",
|
|
@@ -33,11 +33,11 @@
|
|
|
33
33
|
"uuid": "^11.1.0",
|
|
34
34
|
"zod": "^3.25.76",
|
|
35
35
|
"zod-to-json-schema": "^3.24.6",
|
|
36
|
-
"@mastra/
|
|
37
|
-
"@mastra/
|
|
36
|
+
"@mastra/mcp": "^0.13.2",
|
|
37
|
+
"@mastra/core": "0.19.0"
|
|
38
38
|
},
|
|
39
39
|
"devDependencies": {
|
|
40
|
-
"@hono/node-server": "^1.19.
|
|
40
|
+
"@hono/node-server": "^1.19.5",
|
|
41
41
|
"@types/jsdom": "^21.1.7",
|
|
42
42
|
"@types/node": "^20.19.0",
|
|
43
43
|
"@types/turndown": "^5.0.5",
|
|
@@ -49,8 +49,8 @@
|
|
|
49
49
|
"tsx": "^4.19.4",
|
|
50
50
|
"typescript": "^5.8.3",
|
|
51
51
|
"vitest": "^3.2.4",
|
|
52
|
-
"@
|
|
53
|
-
"@
|
|
52
|
+
"@mastra/core": "0.19.0",
|
|
53
|
+
"@internal/lint": "0.0.43"
|
|
54
54
|
},
|
|
55
55
|
"homepage": "https://mastra.ai",
|
|
56
56
|
"repository": {
|