@mastra/mcp-docs-server 0.13.7-alpha.1 → 0.13.7-alpha.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.docs/organized/changelogs/%40mastra%2Fclient-js.md +37 -37
- package/.docs/organized/changelogs/%40mastra%2Fcloudflare-d1.md +18 -18
- package/.docs/organized/changelogs/%40mastra%2Fcloudflare.md +18 -18
- package/.docs/organized/changelogs/%40mastra%2Fcore.md +39 -39
- package/.docs/organized/changelogs/%40mastra%2Fdeployer-cloudflare.md +21 -21
- package/.docs/organized/changelogs/%40mastra%2Fdeployer.md +42 -42
- package/.docs/organized/changelogs/%40mastra%2Flibsql.md +29 -29
- package/.docs/organized/changelogs/%40mastra%2Fmcp-docs-server.md +22 -22
- package/.docs/organized/changelogs/%40mastra%2Fmemory.md +29 -29
- package/.docs/organized/changelogs/%40mastra%2Fmongodb.md +20 -20
- package/.docs/organized/changelogs/%40mastra%2Fmssql.md +17 -0
- package/.docs/organized/changelogs/%40mastra%2Fpg.md +29 -29
- package/.docs/organized/changelogs/%40mastra%2Fserver.md +36 -36
- package/.docs/organized/changelogs/%40mastra%2Fupstash.md +29 -29
- package/.docs/organized/changelogs/%40mastra%2Fvectorize.md +18 -18
- package/.docs/organized/changelogs/%40mastra%2Fvoice-cloudflare.md +18 -18
- package/.docs/organized/changelogs/mastra.md +21 -21
- package/.docs/organized/code-examples/agent.md +93 -3
- package/.docs/organized/code-examples/ai-sdk-v5.md +4 -4
- package/.docs/raw/agents/input-processors.mdx +268 -0
- package/.docs/raw/agents/using-tools-and-mcp.mdx +39 -0
- package/.docs/raw/community/contributing-templates.mdx +2 -2
- package/.docs/raw/observability/tracing.mdx +44 -0
- package/.docs/raw/reference/agents/agent.mdx +7 -0
- package/.docs/raw/reference/cli/dev.mdx +6 -0
- package/.docs/raw/reference/client-js/memory.mdx +18 -0
- package/.docs/raw/reference/memory/Memory.mdx +1 -0
- package/.docs/raw/reference/memory/deleteMessages.mdx +95 -0
- package/.docs/raw/reference/memory/getThreadsByResourceId.mdx +33 -1
- package/.docs/raw/reference/rag/upstash.mdx +112 -5
- package/.docs/raw/reference/scorers/answer-relevancy.mdx +0 -1
- package/.docs/raw/reference/scorers/faithfulness.mdx +0 -1
- package/.docs/raw/reference/scorers/hallucination.mdx +0 -2
- package/.docs/raw/reference/scorers/llm-scorer.mdx +45 -1
- package/.docs/raw/reference/storage/libsql.mdx +7 -4
- package/.docs/raw/reference/storage/mssql.mdx +7 -3
- package/.docs/raw/reference/storage/postgresql.mdx +7 -3
- package/.docs/raw/reference/templates.mdx +11 -5
- package/.docs/raw/scorers/custom-scorers.mdx +319 -0
- package/.docs/raw/scorers/off-the-shelf-scorers.mdx +30 -0
- package/.docs/raw/scorers/overview.mdx +124 -0
- package/package.json +6 -6
|
@@ -5,7 +5,7 @@ description: Documentation for the UpstashVector class in Mastra, which provides
|
|
|
5
5
|
|
|
6
6
|
# Upstash Vector Store
|
|
7
7
|
|
|
8
|
-
The UpstashVector class provides vector search using [Upstash Vector](https://upstash.com/vector), a serverless vector database service that provides vector similarity search with metadata filtering capabilities.
|
|
8
|
+
The UpstashVector class provides vector search using [Upstash Vector](https://upstash.com/vector), a serverless vector database service that provides vector similarity search with metadata filtering capabilities and hybrid search support.
|
|
9
9
|
|
|
10
10
|
## Constructor Options
|
|
11
11
|
|
|
@@ -66,6 +66,12 @@ Note: This method is a no-op for Upstash as indexes are created automatically.
|
|
|
66
66
|
type: "number[][]",
|
|
67
67
|
description: "Array of embedding vectors",
|
|
68
68
|
},
|
|
69
|
+
{
|
|
70
|
+
name: "sparseVectors",
|
|
71
|
+
type: "{ indices: number[], values: number[] }[]",
|
|
72
|
+
isOptional: true,
|
|
73
|
+
description: "Array of sparse vectors for hybrid search. Each sparse vector must have matching indices and values arrays.",
|
|
74
|
+
},
|
|
69
75
|
{
|
|
70
76
|
name: "metadata",
|
|
71
77
|
type: "Record<string, any>[]",
|
|
@@ -95,6 +101,12 @@ Note: This method is a no-op for Upstash as indexes are created automatically.
|
|
|
95
101
|
type: "number[]",
|
|
96
102
|
description: "Query vector to find similar vectors",
|
|
97
103
|
},
|
|
104
|
+
{
|
|
105
|
+
name: "sparseVector",
|
|
106
|
+
type: "{ indices: number[], values: number[] }",
|
|
107
|
+
isOptional: true,
|
|
108
|
+
description: "Optional sparse vector for hybrid search. Must have matching indices and values arrays.",
|
|
109
|
+
},
|
|
98
110
|
{
|
|
99
111
|
name: "topK",
|
|
100
112
|
type: "number",
|
|
@@ -115,6 +127,18 @@ Note: This method is a no-op for Upstash as indexes are created automatically.
|
|
|
115
127
|
defaultValue: "false",
|
|
116
128
|
description: "Whether to include vectors in the results",
|
|
117
129
|
},
|
|
130
|
+
{
|
|
131
|
+
name: "fusionAlgorithm",
|
|
132
|
+
type: "FusionAlgorithm",
|
|
133
|
+
isOptional: true,
|
|
134
|
+
description: "Algorithm used to combine dense and sparse search results in hybrid search (e.g., RRF - Reciprocal Rank Fusion)",
|
|
135
|
+
},
|
|
136
|
+
{
|
|
137
|
+
name: "queryMode",
|
|
138
|
+
type: "QueryMode",
|
|
139
|
+
isOptional: true,
|
|
140
|
+
description: "Search mode: 'DENSE' for dense-only, 'SPARSE' for sparse-only, or 'HYBRID' for combined search",
|
|
141
|
+
},
|
|
118
142
|
]}
|
|
119
143
|
/>
|
|
120
144
|
|
|
@@ -173,18 +197,17 @@ interface IndexStats {
|
|
|
173
197
|
{
|
|
174
198
|
name: "update",
|
|
175
199
|
type: "object",
|
|
176
|
-
description: "Update object containing vector and/or metadata",
|
|
200
|
+
description: "Update object containing vector, sparse vector, and/or metadata",
|
|
177
201
|
},
|
|
178
202
|
]}
|
|
179
203
|
/>
|
|
180
204
|
|
|
181
205
|
The `update` object can have the following properties:
|
|
182
206
|
|
|
183
|
-
- `vector` (optional): An array of numbers representing the new vector.
|
|
207
|
+
- `vector` (optional): An array of numbers representing the new dense vector.
|
|
208
|
+
- `sparseVector` (optional): A sparse vector object with `indices` and `values` arrays for hybrid indexes.
|
|
184
209
|
- `metadata` (optional): A record of key-value pairs for metadata.
|
|
185
210
|
|
|
186
|
-
Throws an error if neither `vector` nor `metadata` is provided, or if only `metadata` is provided.
|
|
187
|
-
|
|
188
211
|
### deleteVector()
|
|
189
212
|
|
|
190
213
|
<PropertiesTable
|
|
@@ -204,6 +227,90 @@ Throws an error if neither `vector` nor `metadata` is provided, or if only `meta
|
|
|
204
227
|
|
|
205
228
|
Attempts to delete an item by its ID from the specified index. Logs an error message if the deletion fails.
|
|
206
229
|
|
|
230
|
+
## Hybrid Vector Search
|
|
231
|
+
|
|
232
|
+
Upstash Vector supports hybrid search that combines semantic search (dense vectors) with keyword-based search (sparse vectors) for improved relevance and accuracy.
|
|
233
|
+
|
|
234
|
+
### Basic Hybrid Usage
|
|
235
|
+
|
|
236
|
+
```typescript copy
|
|
237
|
+
import { UpstashVector } from '@mastra/upstash';
|
|
238
|
+
|
|
239
|
+
const vectorStore = new UpstashVector({
|
|
240
|
+
url: process.env.UPSTASH_VECTOR_URL,
|
|
241
|
+
token: process.env.UPSTASH_VECTOR_TOKEN
|
|
242
|
+
});
|
|
243
|
+
|
|
244
|
+
// Upsert vectors with both dense and sparse components
|
|
245
|
+
const denseVectors = [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]];
|
|
246
|
+
const sparseVectors = [
|
|
247
|
+
{ indices: [1, 5, 10], values: [0.8, 0.6, 0.4] },
|
|
248
|
+
{ indices: [2, 6, 11], values: [0.7, 0.5, 0.3] }
|
|
249
|
+
];
|
|
250
|
+
|
|
251
|
+
await vectorStore.upsert({
|
|
252
|
+
indexName: 'hybrid-index',
|
|
253
|
+
vectors: denseVectors,
|
|
254
|
+
sparseVectors: sparseVectors,
|
|
255
|
+
metadata: [{ title: 'Document 1' }, { title: 'Document 2' }]
|
|
256
|
+
});
|
|
257
|
+
|
|
258
|
+
// Query with hybrid search
|
|
259
|
+
const results = await vectorStore.query({
|
|
260
|
+
indexName: 'hybrid-index',
|
|
261
|
+
queryVector: [0.1, 0.2, 0.3],
|
|
262
|
+
sparseVector: { indices: [1, 5], values: [0.9, 0.7] },
|
|
263
|
+
topK: 10
|
|
264
|
+
});
|
|
265
|
+
```
|
|
266
|
+
|
|
267
|
+
### Advanced Hybrid Search Options
|
|
268
|
+
|
|
269
|
+
```typescript copy
|
|
270
|
+
import { FusionAlgorithm, QueryMode } from '@upstash/vector';
|
|
271
|
+
|
|
272
|
+
// Query with specific fusion algorithm
|
|
273
|
+
const fusionResults = await vectorStore.query({
|
|
274
|
+
indexName: 'hybrid-index',
|
|
275
|
+
queryVector: [0.1, 0.2, 0.3],
|
|
276
|
+
sparseVector: { indices: [1, 5], values: [0.9, 0.7] },
|
|
277
|
+
fusionAlgorithm: FusionAlgorithm.RRF,
|
|
278
|
+
topK: 10
|
|
279
|
+
});
|
|
280
|
+
|
|
281
|
+
// Dense-only search
|
|
282
|
+
const denseResults = await vectorStore.query({
|
|
283
|
+
indexName: 'hybrid-index',
|
|
284
|
+
queryVector: [0.1, 0.2, 0.3],
|
|
285
|
+
queryMode: QueryMode.DENSE,
|
|
286
|
+
topK: 10
|
|
287
|
+
});
|
|
288
|
+
|
|
289
|
+
// Sparse-only search
|
|
290
|
+
const sparseResults = await vectorStore.query({
|
|
291
|
+
indexName: 'hybrid-index',
|
|
292
|
+
queryVector: [0.1, 0.2, 0.3], // Still required for index structure
|
|
293
|
+
sparseVector: { indices: [1, 5], values: [0.9, 0.7] },
|
|
294
|
+
queryMode: QueryMode.SPARSE,
|
|
295
|
+
topK: 10
|
|
296
|
+
});
|
|
297
|
+
```
|
|
298
|
+
|
|
299
|
+
### Updating Hybrid Vectors
|
|
300
|
+
|
|
301
|
+
```typescript copy
|
|
302
|
+
// Update both dense and sparse components
|
|
303
|
+
await vectorStore.updateVector({
|
|
304
|
+
indexName: 'hybrid-index',
|
|
305
|
+
id: 'vector-id',
|
|
306
|
+
update: {
|
|
307
|
+
vector: [0.2, 0.3, 0.4],
|
|
308
|
+
sparseVector: { indices: [2, 7, 12], values: [0.9, 0.8, 0.6] },
|
|
309
|
+
metadata: { title: 'Updated Document' }
|
|
310
|
+
}
|
|
311
|
+
});
|
|
312
|
+
```
|
|
313
|
+
|
|
207
314
|
## Response Types
|
|
208
315
|
|
|
209
316
|
Query results are returned in this format:
|
|
@@ -131,5 +131,3 @@ Final score: `(hallucinated_statements / total_statements) * scale`
|
|
|
131
131
|
|
|
132
132
|
- [Faithfulness Scorer](./faithfulness)
|
|
133
133
|
- [Answer Relevancy Scorer](./answer-relevancy)
|
|
134
|
-
- [Context Precision Scorer](./context-precision)
|
|
135
|
-
- [Context Relevancy Scorer](./context-relevancy)
|
|
@@ -3,7 +3,7 @@ title: "Reference: createLLMScorer | Scorers | Mastra Docs"
|
|
|
3
3
|
description: Documentation for creating LLM-based scorers in Mastra, allowing users to define evaluation logic using language models.
|
|
4
4
|
---
|
|
5
5
|
|
|
6
|
-
#
|
|
6
|
+
# createLLMScorer
|
|
7
7
|
|
|
8
8
|
The `createLLMScorer()` function lets you define custom scorers that use a language model (LLM) as a judge for evaluation. LLM scorers are ideal for tasks where you want to use prompt-based evaluation, such as answer relevancy, faithfulness, or custom prompt-based metrics. LLM scorers integrate seamlessly with the Mastra scoring framework and can be used anywhere built-in scorers are used.
|
|
9
9
|
|
|
@@ -138,6 +138,50 @@ This function returns an instance of the MastraScorer class. See the [MastraScor
|
|
|
138
138
|
]}
|
|
139
139
|
/>
|
|
140
140
|
|
|
141
|
+
## Calculate Score Function
|
|
142
|
+
|
|
143
|
+
The `calculateScore` function converts the LLM's structured analysis into a numerical score. This function receives the results from previous steps but not the score itself (since that's what it calculates).
|
|
144
|
+
|
|
145
|
+
<PropertiesTable
|
|
146
|
+
content={[
|
|
147
|
+
{
|
|
148
|
+
name: "input",
|
|
149
|
+
type: "Record<string, any>[]",
|
|
150
|
+
required: true,
|
|
151
|
+
description:
|
|
152
|
+
"Input records provided to the scorer. If the scorer is added to an agent, this will be an array of user messages, e.g. `[{ role: 'user', content: 'hello world' }]`. If the scorer is used in a workflow, this will be the input of the workflow.",
|
|
153
|
+
},
|
|
154
|
+
{
|
|
155
|
+
name: "output",
|
|
156
|
+
type: "Record<string, any>",
|
|
157
|
+
required: true,
|
|
158
|
+
description:
|
|
159
|
+
"Output record provided to the scorer. For agents, this is usually the agent's response. For workflows, this is the workflow's output.",
|
|
160
|
+
},
|
|
161
|
+
{
|
|
162
|
+
name: "runtimeContext",
|
|
163
|
+
type: "object",
|
|
164
|
+
required: false,
|
|
165
|
+
description: "Runtime context from the agent or workflow step being evaluated (optional).",
|
|
166
|
+
},
|
|
167
|
+
{
|
|
168
|
+
name: "extractStepResult",
|
|
169
|
+
type: "object",
|
|
170
|
+
required: false,
|
|
171
|
+
description: "Result of the extract step, if defined (optional).",
|
|
172
|
+
},
|
|
173
|
+
{
|
|
174
|
+
name: "analyzeStepResult",
|
|
175
|
+
type: "object",
|
|
176
|
+
required: true,
|
|
177
|
+
description: "Structured result from the analyze step, conforming to the outputSchema defined in the analyze step.",
|
|
178
|
+
},
|
|
179
|
+
]}
|
|
180
|
+
/>
|
|
181
|
+
|
|
182
|
+
Returns: `number`
|
|
183
|
+
The function must return a numerical score, typically in the 0-1 range where 1 represents the best possible score.
|
|
184
|
+
|
|
141
185
|
## Reason Object
|
|
142
186
|
<PropertiesTable
|
|
143
187
|
content={[
|
|
@@ -65,7 +65,10 @@ For production use cases, use a persistent database URL: `libsql://your-database
|
|
|
65
65
|
|
|
66
66
|
The storage implementation handles schema creation and updates automatically. It creates the following tables:
|
|
67
67
|
|
|
68
|
-
- `
|
|
69
|
-
- `
|
|
70
|
-
- `
|
|
71
|
-
- `
|
|
68
|
+
- `mastra_workflow_snapshot`: Stores workflow state and execution data
|
|
69
|
+
- `mastra_evals`: Stores evaluation results and metadata
|
|
70
|
+
- `mastra_threads`: Stores conversation threads
|
|
71
|
+
- `mastra_messages`: Stores individual messages
|
|
72
|
+
- `mastra_traces`: Stores telemetry and tracing data
|
|
73
|
+
- `mastra_scorers`: Stores scoring and evaluation data
|
|
74
|
+
- `mastra_resources`: Stores resource working memory data
|
|
@@ -88,9 +88,13 @@ const store5 = new MSSQLStore({
|
|
|
88
88
|
|
|
89
89
|
The storage implementation handles schema creation and updates automatically. It creates the following tables:
|
|
90
90
|
|
|
91
|
-
- `
|
|
92
|
-
- `
|
|
93
|
-
- `
|
|
91
|
+
- `mastra_workflow_snapshot`: Stores workflow state and execution data
|
|
92
|
+
- `mastra_evals`: Stores evaluation results and metadata
|
|
93
|
+
- `mastra_threads`: Stores conversation threads
|
|
94
|
+
- `mastra_messages`: Stores individual messages
|
|
95
|
+
- `mastra_traces`: Stores telemetry and tracing data
|
|
96
|
+
- `mastra_scorers`: Stores scoring and evaluation data
|
|
97
|
+
- `mastra_resources`: Stores resource working memory data
|
|
94
98
|
|
|
95
99
|
### Direct Database and Pool Access
|
|
96
100
|
|
|
@@ -88,9 +88,13 @@ const store5 = new PostgresStore({
|
|
|
88
88
|
|
|
89
89
|
The storage implementation handles schema creation and updates automatically. It creates the following tables:
|
|
90
90
|
|
|
91
|
-
- `
|
|
92
|
-
- `
|
|
93
|
-
- `
|
|
91
|
+
- `mastra_workflow_snapshot`: Stores workflow state and execution data
|
|
92
|
+
- `mastra_evals`: Stores evaluation results and metadata
|
|
93
|
+
- `mastra_threads`: Stores conversation threads
|
|
94
|
+
- `mastra_messages`: Stores individual messages
|
|
95
|
+
- `mastra_traces`: Stores telemetry and tracing data
|
|
96
|
+
- `mastra_scorers`: Stores scoring and evaluation data
|
|
97
|
+
- `mastra_resources`: Stores resource working memory data
|
|
94
98
|
|
|
95
99
|
### Direct Database and Pool Access
|
|
96
100
|
|
|
@@ -70,7 +70,7 @@ All templates follow this standardized structure:
|
|
|
70
70
|
<FileTree.File name="example-agent.ts" />
|
|
71
71
|
</FileTree.Folder>
|
|
72
72
|
<FileTree.Folder name="tools">
|
|
73
|
-
<FileTree.File name="
|
|
73
|
+
<FileTree.File name="example-tool.ts" />
|
|
74
74
|
</FileTree.Folder>
|
|
75
75
|
<FileTree.Folder name="workflows">
|
|
76
76
|
<FileTree.File name="example-workflow.ts" />
|
|
@@ -126,8 +126,10 @@ Use the standard Mastra TypeScript configuration:
|
|
|
126
126
|
Include a `.env.example` file with all required environment variables:
|
|
127
127
|
|
|
128
128
|
```bash filename=".env.example"
|
|
129
|
-
#
|
|
129
|
+
# LLM provider API keys (choose one or more)
|
|
130
130
|
OPENAI_API_KEY=your_openai_api_key_here
|
|
131
|
+
ANTHROPIC_API_KEY=your_anthropic_api_key_here
|
|
132
|
+
GOOGLE_GENERATIVE_AI_API_KEY=your_google_api_key_here
|
|
131
133
|
|
|
132
134
|
# Other service API keys as needed
|
|
133
135
|
OTHER_SERVICE_API_KEY=your_api_key_here
|
|
@@ -137,15 +139,17 @@ OTHER_SERVICE_API_KEY=your_api_key_here
|
|
|
137
139
|
|
|
138
140
|
#### LLM Provider
|
|
139
141
|
|
|
140
|
-
|
|
142
|
+
We recommend using OpenAI, Anthropic, or Google model providers for templates. Choose the provider that best fits your use case:
|
|
141
143
|
|
|
142
144
|
```typescript filename="src/mastra/agents/example-agent.ts"
|
|
143
|
-
import { Agent } from '@mastra/core';
|
|
145
|
+
import { Agent } from '@mastra/core/agent';
|
|
144
146
|
import { openai } from '@ai-sdk/openai';
|
|
147
|
+
// Or use: import { anthropic } from '@ai-sdk/anthropic';
|
|
148
|
+
// Or use: import { google } from '@ai-sdk/google';
|
|
145
149
|
|
|
146
150
|
const agent = new Agent({
|
|
147
151
|
name: 'example-agent',
|
|
148
|
-
model: openai('gpt-4'),
|
|
152
|
+
model: openai('gpt-4'), // or anthropic('') or google('')
|
|
149
153
|
instructions: 'Your agent instructions here',
|
|
150
154
|
// ... other configuration
|
|
151
155
|
});
|
|
@@ -186,6 +190,8 @@ Detailed explanation of the template's functionality and use case.
|
|
|
186
190
|
## Environment Variables
|
|
187
191
|
|
|
188
192
|
- `OPENAI_API_KEY`: Your OpenAI API key. Get one at [OpenAI Platform](https://platform.openai.com/api-keys)
|
|
193
|
+
- `ANTHROPIC_API_KEY`: Your Anthropic API key. Get one at [Anthropic Console](https://console.anthropic.com/settings/keys)
|
|
194
|
+
- `GOOGLE_GENERATIVE_AI_API_KEY`: Your Google AI API key. Get one at [Google AI Studio](https://makersuite.google.com/app/apikey)
|
|
189
195
|
- `OTHER_API_KEY`: Description of what this key is for
|
|
190
196
|
|
|
191
197
|
## Usage
|
|
@@ -0,0 +1,319 @@
|
|
|
1
|
+
## Creating scorers
|
|
2
|
+
|
|
3
|
+
Mastra provides two approaches for creating custom scorers:
|
|
4
|
+
|
|
5
|
+
**Code scorers** use programmatic logic and algorithms. They're ideal for deterministic evaluations, performance-critical scenarios, and cases where you have clear algorithmic criteria.
|
|
6
|
+
|
|
7
|
+
**LLM scorers** use language models as judges. They're perfect for subjective evaluations, complex criteria that are difficult to code algorithmically, and cases where human-like judgment is needed.
|
|
8
|
+
|
|
9
|
+
### Code-based scorers
|
|
10
|
+
|
|
11
|
+
Code scorers use `createScorer` to build evaluation logic with programmatic algorithms. They're ideal for deterministic evaluations, performance-critical scenarios, and cases where you have clear algorithmic criteria or need integration with existing libraries.
|
|
12
|
+
|
|
13
|
+
Code scorers follow Mastra's three-step evaluation pipeline:
|
|
14
|
+
- an optional **extract** step for preprocessing complex data
|
|
15
|
+
- a required **analyze** step for core evaluation and scoring
|
|
16
|
+
- and an optional **reason** step for generating explanations.
|
|
17
|
+
|
|
18
|
+
For the complete API reference, see [`createScorer`](/reference/scorers/custom-code-scorer), and for a detailed explanation of the pipeline, see [evaluation process](/docs/scorers/overview#evaluation-pipeline).
|
|
19
|
+
|
|
20
|
+
#### Extract Step
|
|
21
|
+
|
|
22
|
+
This optional step preprocesses input/output data when you need to evaluate multiple distinct elements, filter content, or focus analysis on specific parts of complex data.
|
|
23
|
+
|
|
24
|
+
- **Receives:**
|
|
25
|
+
- `input`: User messages (when used with agents) or workflow step input (when used with workflow steps)
|
|
26
|
+
- `output`: Agent's response (when used with agents) or workflow step output (when used with workflow steps)
|
|
27
|
+
- `runtimeContext`: Runtime context from the agent or workflow step being evaluated
|
|
28
|
+
- **Must return:** `{ results: any }`
|
|
29
|
+
- **Data flow:** The `results` value is passed to the analyze step as `extractStepResult`
|
|
30
|
+
|
|
31
|
+
```typescript filename="src/mastra/scorers/keyword-coverage-scorer.ts" showLineNumbers copy
|
|
32
|
+
import { createScorer } from "@mastra/core/scores";
|
|
33
|
+
import keywordExtractor from "keyword-extractor";
|
|
34
|
+
|
|
35
|
+
export const keywordCoverageScorer = createScorer({
|
|
36
|
+
name: "Keyword Coverage",
|
|
37
|
+
description: "Evaluates how well the output covers keywords from the input",
|
|
38
|
+
|
|
39
|
+
// Step 1: Extract keywords from input and output
|
|
40
|
+
extract: async ({ input, output }) => {
|
|
41
|
+
const inputText = input?.map(i => i.content).join(", ") || "";
|
|
42
|
+
const outputText = output.text;
|
|
43
|
+
|
|
44
|
+
const extractKeywords = (text: string) => {
|
|
45
|
+
return keywordExtractor.extract(text);
|
|
46
|
+
};
|
|
47
|
+
|
|
48
|
+
const inputKeywords = new Set(extractKeywords(inputText));
|
|
49
|
+
const outputKeywords = new Set(extractKeywords(outputText));
|
|
50
|
+
|
|
51
|
+
return {
|
|
52
|
+
results: {
|
|
53
|
+
inputKeywords,
|
|
54
|
+
outputKeywords,
|
|
55
|
+
},
|
|
56
|
+
};
|
|
57
|
+
},
|
|
58
|
+
|
|
59
|
+
// ... analyze and reason steps
|
|
60
|
+
});
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
#### Analyze Step
|
|
64
|
+
|
|
65
|
+
This required step performs the core evaluation and generates the numerical score for all scorers.
|
|
66
|
+
|
|
67
|
+
- **Receives:** Everything from extract step, plus:
|
|
68
|
+
- `extractStepResult`: Results from the extract step (if extract step was defined)
|
|
69
|
+
- **Must return:** `{ score: number, results?: any }`
|
|
70
|
+
- **Data flow:** The `score` and optional `results` are passed to the reason step
|
|
71
|
+
|
|
72
|
+
```typescript filename="src/mastra/scorers/keyword-coverage-scorer.ts" showLineNumbers copy
|
|
73
|
+
export const keywordCoverageScorer = createScorer({
|
|
74
|
+
// ... name, description, extract step
|
|
75
|
+
|
|
76
|
+
// Step 2: Analyze keyword coverage and calculate score
|
|
77
|
+
analyze: async ({ input, output, extractStepResult }) => {
|
|
78
|
+
const { inputKeywords, outputKeywords } = extractStepResult.results;
|
|
79
|
+
|
|
80
|
+
if (inputKeywords.size === 0) {
|
|
81
|
+
return { score: 1, results: { coverage: 1, matched: 0, total: 0 } };
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
const matchedKeywords = [...inputKeywords].filter(keyword =>
|
|
85
|
+
outputKeywords.has(keyword)
|
|
86
|
+
);
|
|
87
|
+
|
|
88
|
+
const coverage = matchedKeywords.length / inputKeywords.size;
|
|
89
|
+
|
|
90
|
+
return {
|
|
91
|
+
score: coverage,
|
|
92
|
+
results: {
|
|
93
|
+
coverage,
|
|
94
|
+
matched: matchedKeywords.length,
|
|
95
|
+
total: inputKeywords.size,
|
|
96
|
+
matchedKeywords,
|
|
97
|
+
},
|
|
98
|
+
};
|
|
99
|
+
},
|
|
100
|
+
|
|
101
|
+
// ... reason step
|
|
102
|
+
});
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
#### Reason Step
|
|
106
|
+
|
|
107
|
+
This optional step generates human-readable explanations for scores, useful for actionable feedback, debugging transparency, or compliance documentation.
|
|
108
|
+
|
|
109
|
+
- **Receives:** Everything from analyze step, plus:
|
|
110
|
+
- `score`: The numerical score (0-1) calculated by the analyze step
|
|
111
|
+
- `analyzeStepResult`: Results from the analyze step (contains the score and any additional results)
|
|
112
|
+
- **Must return:** `{ reason: string }`
|
|
113
|
+
|
|
114
|
+
```typescript filename="src/mastra/scorers/keyword-coverage-scorer.ts" showLineNumbers copy
|
|
115
|
+
export const keywordCoverageScorer = createScorer({
|
|
116
|
+
// ... name, description, extract and analyze steps
|
|
117
|
+
|
|
118
|
+
// Step 3: Generate explanation for the score
|
|
119
|
+
reason: async ({ score, analyzeStepResult, extractStepResult }) => {
|
|
120
|
+
const { matched, total, matchedKeywords } = analyzeStepResult.results;
|
|
121
|
+
const { inputKeywords } = extractStepResult.results;
|
|
122
|
+
|
|
123
|
+
const percentage = Math.round(score * 100);
|
|
124
|
+
const missedKeywords = [...inputKeywords].filter(
|
|
125
|
+
keyword => !matchedKeywords.includes(keyword)
|
|
126
|
+
);
|
|
127
|
+
|
|
128
|
+
let reason = `The output achieved ${percentage}% keyword coverage (${matched}/${total} keywords).`;
|
|
129
|
+
|
|
130
|
+
if (matchedKeywords.length > 0) {
|
|
131
|
+
reason += ` Covered keywords: ${matchedKeywords.join(", ")}.`;
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
if (missedKeywords.length > 0) {
|
|
135
|
+
reason += ` Missing keywords: ${missedKeywords.join(", ")}.`;
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
return { reason };
|
|
139
|
+
},
|
|
140
|
+
});
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
**Examples and Resources:**
|
|
144
|
+
- [Custom Native JavaScript Scorer Example](/examples/scorers/custom-native-javascript-eval) - Example walkthrough.
|
|
145
|
+
- [Built-in Code Scorers](https://github.com/mastra-ai/mastra/tree/main/packages/evals/src/scorers/code) - Real implementations for reference
|
|
146
|
+
|
|
147
|
+
### LLM-based scorers
|
|
148
|
+
|
|
149
|
+
LLM scorers use `createLLMScorer` to build evaluations that leverage language models as judges. They're perfect for subjective evaluations that require understanding context, complex criteria that are difficult to code algorithmically, natural language understanding tasks, and cases where human-like judgment is needed.
|
|
150
|
+
|
|
151
|
+
LLM scorers follow the same evaluation pipeline as code scorers with an additional `calculateScore` function:
|
|
152
|
+
- an optional **extract** step where the LLM processes input/output and returns structured data
|
|
153
|
+
- a required **analyze** step where the LLM performs evaluation and returns structured analysis
|
|
154
|
+
- a required **calculateScore** function that converts LLM analysis into numerical score
|
|
155
|
+
- and an optional **reason** step where the LLM generates human-readable explanations
|
|
156
|
+
|
|
157
|
+
The `calculateScore` function leverages the best of both approaches: LLMs excel at qualitative analysis and understanding, while deterministic functions ensure precise and consistent numerical scoring.
|
|
158
|
+
|
|
159
|
+
For the complete API reference, see [`createLLMScorer`](/reference/scorers/llm-scorer), and for a detailed explanation of the pipeline, see [evaluation process](/docs/scorers/overview#evaluation-pipeline).
|
|
160
|
+
|
|
161
|
+
#### Judge Configuration
|
|
162
|
+
|
|
163
|
+
All LLM scorer steps share this required configuration that defines the model and system instructions.
|
|
164
|
+
|
|
165
|
+
- **Configuration:** `judge` object containing:
|
|
166
|
+
- **model:** The LLM model instance for evaluation
|
|
167
|
+
- **instructions:** System prompt that guides the LLM's behavior
|
|
168
|
+
|
|
169
|
+
```typescript filename="src/mastra/scorers/tone-scorer.ts" showLineNumbers copy
|
|
170
|
+
import { openai } from "@ai-sdk/openai";
|
|
171
|
+
import { createLLMScorer } from "@mastra/core/scores";
|
|
172
|
+
|
|
173
|
+
export const toneScorer = createLLMScorer({
|
|
174
|
+
name: 'Tone Scorer',
|
|
175
|
+
description: 'Evaluates the tone of the output',
|
|
176
|
+
|
|
177
|
+
// Shared judge configuration
|
|
178
|
+
judge: {
|
|
179
|
+
model: openai('gpt-4o'),
|
|
180
|
+
instructions: 'You are an expert in analyzing text tone and communication style.',
|
|
181
|
+
},
|
|
182
|
+
|
|
183
|
+
// ... other steps
|
|
184
|
+
});
|
|
185
|
+
```
|
|
186
|
+
|
|
187
|
+
#### Extract Step
|
|
188
|
+
|
|
189
|
+
This optional step uses an LLM to preprocess input/output data when you need to evaluate multiple distinct elements, filter content, or focus analysis on specific parts of complex data.
|
|
190
|
+
|
|
191
|
+
- **Configuration:** `{ description, outputSchema, createPrompt }`
|
|
192
|
+
- **Data flow:** The structured output (defined by outputSchema) is passed to the analyze step as `extractStepResult`
|
|
193
|
+
|
|
194
|
+
```typescript filename="src/mastra/scorers/content-scorer.ts" showLineNumbers copy
|
|
195
|
+
export const contentScorer = createLLMScorer({
|
|
196
|
+
// ... judge configuration
|
|
197
|
+
|
|
198
|
+
extract: {
|
|
199
|
+
description: 'Extract key themes and topics from the content',
|
|
200
|
+
outputSchema: z.object({
|
|
201
|
+
themes: z.array(z.string()),
|
|
202
|
+
topics: z.array(z.string()),
|
|
203
|
+
keyPhrases: z.array(z.string())
|
|
204
|
+
}),
|
|
205
|
+
createPrompt: ({ run }) => `
|
|
206
|
+
Analyze this content and extract:
|
|
207
|
+
1. Main themes (3-5 high-level concepts)
|
|
208
|
+
2. Specific topics mentioned
|
|
209
|
+
3. Key phrases that capture the essence
|
|
210
|
+
|
|
211
|
+
Content: ${run.output.text}
|
|
212
|
+
|
|
213
|
+
Return a JSON object with themes, topics, and keyPhrases arrays.
|
|
214
|
+
`,
|
|
215
|
+
},
|
|
216
|
+
|
|
217
|
+
// ... other steps
|
|
218
|
+
});
|
|
219
|
+
```
|
|
220
|
+
|
|
221
|
+
#### Analyze Step
|
|
222
|
+
|
|
223
|
+
This required step uses an LLM to perform the core evaluation and return structured analysis that will be converted to a numerical score.
|
|
224
|
+
|
|
225
|
+
- **Configuration:** `{ description, outputSchema, createPrompt }`
|
|
226
|
+
- **Data flow:** The structured output is passed to the calculateScore function and then to the reason step
|
|
227
|
+
|
|
228
|
+
```typescript filename="src/mastra/scorers/quality-scorer.ts" showLineNumbers copy
|
|
229
|
+
export const qualityScorer = createLLMScorer({
|
|
230
|
+
// ... judge configuration
|
|
231
|
+
|
|
232
|
+
analyze: {
|
|
233
|
+
description: 'Evaluate content quality across multiple dimensions',
|
|
234
|
+
outputSchema: z.object({
|
|
235
|
+
clarity: z.number().min(1).max(5),
|
|
236
|
+
accuracy: z.number().min(1).max(5),
|
|
237
|
+
completeness: z.number().min(1).max(5),
|
|
238
|
+
relevance: z.number().min(1).max(5)
|
|
239
|
+
}),
|
|
240
|
+
createPrompt: ({ run }) => `
|
|
241
|
+
Evaluate this content on a scale of 1-5 for:
|
|
242
|
+
- Clarity: How clear and understandable is it?
|
|
243
|
+
- Accuracy: How factually correct does it appear?
|
|
244
|
+
- Completeness: How thorough is the response?
|
|
245
|
+
- Relevance: How well does it address the input?
|
|
246
|
+
|
|
247
|
+
Input: ${run.input.map(i => i.content).join(', ')}
|
|
248
|
+
Output: ${run.output.text}
|
|
249
|
+
|
|
250
|
+
Return a JSON object with numeric scores for each dimension.
|
|
251
|
+
`,
|
|
252
|
+
},
|
|
253
|
+
|
|
254
|
+
// ... other steps
|
|
255
|
+
});
|
|
256
|
+
```
|
|
257
|
+
|
|
258
|
+
#### Calculate Score Step
|
|
259
|
+
|
|
260
|
+
This required function converts the LLM's structured analysis into a numerical score, providing deterministic scoring logic since LLMs aren't reliable for consistent numerical outputs.
|
|
261
|
+
|
|
262
|
+
- **Configuration:** `calculateScore` function that receives `{ run }` and returns a number
|
|
263
|
+
- **Data flow:** Converts the analyze step's structured output into a numerical score (0-1 range)
|
|
264
|
+
|
|
265
|
+
```typescript filename="src/mastra/scorers/quality-scorer.ts" showLineNumbers copy
|
|
266
|
+
export const qualityScorer = createLLMScorer({
|
|
267
|
+
// ... previous steps
|
|
268
|
+
|
|
269
|
+
calculateScore: ({ run }) => {
|
|
270
|
+
const { clarity, accuracy, completeness, relevance } = run.analyzeStepResult;
|
|
271
|
+
|
|
272
|
+
// Calculate weighted average (scale of 1-5 to 0-1)
|
|
273
|
+
const weights = { clarity: 0.3, accuracy: 0.3, completeness: 0.2, relevance: 0.2 };
|
|
274
|
+
const weightedSum = (clarity * weights.clarity) +
|
|
275
|
+
(accuracy * weights.accuracy) +
|
|
276
|
+
(completeness * weights.completeness) +
|
|
277
|
+
(relevance * weights.relevance);
|
|
278
|
+
|
|
279
|
+
// Convert from 1-5 scale to 0-1 scale
|
|
280
|
+
return (weightedSum - 1) / 4;
|
|
281
|
+
},
|
|
282
|
+
|
|
283
|
+
// ... other steps
|
|
284
|
+
});
|
|
285
|
+
```
|
|
286
|
+
|
|
287
|
+
#### Reason Step
|
|
288
|
+
|
|
289
|
+
This optional step uses an LLM to generate human-readable explanations for scores, useful for actionable feedback, debugging transparency, or compliance documentation.
|
|
290
|
+
|
|
291
|
+
- **Configuration:** `{ description, createPrompt }`
|
|
292
|
+
- **Data flow:** Receives all previous step results and score, returns a string explanation
|
|
293
|
+
|
|
294
|
+
```typescript filename="src/mastra/scorers/quality-scorer.ts" showLineNumbers copy
|
|
295
|
+
export const qualityScorer = createLLMScorer({
|
|
296
|
+
// ... previous steps
|
|
297
|
+
|
|
298
|
+
reason: {
|
|
299
|
+
createPrompt: ({ run }) => {
|
|
300
|
+
const { clarity, accuracy, completeness, relevance } = run.analyzeStepResult;
|
|
301
|
+
const percentage = Math.round(run.score * 100);
|
|
302
|
+
|
|
303
|
+
return `
|
|
304
|
+
The content received a ${percentage}% quality score based on:
|
|
305
|
+
- Clarity: ${clarity}/5
|
|
306
|
+
- Accuracy: ${accuracy}/5
|
|
307
|
+
- Completeness: ${completeness}/5
|
|
308
|
+
- Relevance: ${relevance}/5
|
|
309
|
+
|
|
310
|
+
Provide a brief explanation of what contributed to this score.
|
|
311
|
+
`;
|
|
312
|
+
},
|
|
313
|
+
},
|
|
314
|
+
});
|
|
315
|
+
```
|
|
316
|
+
|
|
317
|
+
**Examples and Resources:**
|
|
318
|
+
- [Custom LLM Judge Scorer Example](/examples/scorers/custom-llm-judge-eval) - Example Walkthrough with gluten checker
|
|
319
|
+
- [Built-in LLM Scorers](https://github.com/mastra-ai/mastra/tree/main/packages/evals/src/scorers/llm) - Real implementations for reference
|