@mastra/mcp-docs-server 0.13.7-alpha.1 → 0.13.7-alpha.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/.docs/organized/changelogs/%40mastra%2Fclient-js.md +37 -37
  2. package/.docs/organized/changelogs/%40mastra%2Fcloudflare-d1.md +18 -18
  3. package/.docs/organized/changelogs/%40mastra%2Fcloudflare.md +18 -18
  4. package/.docs/organized/changelogs/%40mastra%2Fcore.md +39 -39
  5. package/.docs/organized/changelogs/%40mastra%2Fdeployer-cloudflare.md +21 -21
  6. package/.docs/organized/changelogs/%40mastra%2Fdeployer.md +42 -42
  7. package/.docs/organized/changelogs/%40mastra%2Flibsql.md +29 -29
  8. package/.docs/organized/changelogs/%40mastra%2Fmcp-docs-server.md +22 -22
  9. package/.docs/organized/changelogs/%40mastra%2Fmemory.md +29 -29
  10. package/.docs/organized/changelogs/%40mastra%2Fmongodb.md +20 -20
  11. package/.docs/organized/changelogs/%40mastra%2Fmssql.md +17 -0
  12. package/.docs/organized/changelogs/%40mastra%2Fpg.md +29 -29
  13. package/.docs/organized/changelogs/%40mastra%2Fserver.md +36 -36
  14. package/.docs/organized/changelogs/%40mastra%2Fupstash.md +29 -29
  15. package/.docs/organized/changelogs/%40mastra%2Fvectorize.md +18 -18
  16. package/.docs/organized/changelogs/%40mastra%2Fvoice-cloudflare.md +18 -18
  17. package/.docs/organized/changelogs/mastra.md +21 -21
  18. package/.docs/organized/code-examples/agent.md +93 -3
  19. package/.docs/organized/code-examples/ai-sdk-v5.md +4 -4
  20. package/.docs/raw/agents/input-processors.mdx +268 -0
  21. package/.docs/raw/agents/using-tools-and-mcp.mdx +39 -0
  22. package/.docs/raw/community/contributing-templates.mdx +2 -2
  23. package/.docs/raw/observability/tracing.mdx +44 -0
  24. package/.docs/raw/reference/agents/agent.mdx +7 -0
  25. package/.docs/raw/reference/cli/dev.mdx +6 -0
  26. package/.docs/raw/reference/client-js/memory.mdx +18 -0
  27. package/.docs/raw/reference/memory/Memory.mdx +1 -0
  28. package/.docs/raw/reference/memory/deleteMessages.mdx +95 -0
  29. package/.docs/raw/reference/memory/getThreadsByResourceId.mdx +33 -1
  30. package/.docs/raw/reference/rag/upstash.mdx +112 -5
  31. package/.docs/raw/reference/scorers/answer-relevancy.mdx +0 -1
  32. package/.docs/raw/reference/scorers/faithfulness.mdx +0 -1
  33. package/.docs/raw/reference/scorers/hallucination.mdx +0 -2
  34. package/.docs/raw/reference/scorers/llm-scorer.mdx +45 -1
  35. package/.docs/raw/reference/storage/libsql.mdx +7 -4
  36. package/.docs/raw/reference/storage/mssql.mdx +7 -3
  37. package/.docs/raw/reference/storage/postgresql.mdx +7 -3
  38. package/.docs/raw/reference/templates.mdx +11 -5
  39. package/.docs/raw/scorers/custom-scorers.mdx +319 -0
  40. package/.docs/raw/scorers/off-the-shelf-scorers.mdx +30 -0
  41. package/.docs/raw/scorers/overview.mdx +124 -0
  42. package/package.json +6 -6
@@ -5,7 +5,7 @@ description: Documentation for the UpstashVector class in Mastra, which provides
5
5
 
6
6
  # Upstash Vector Store
7
7
 
8
- The UpstashVector class provides vector search using [Upstash Vector](https://upstash.com/vector), a serverless vector database service that provides vector similarity search with metadata filtering capabilities.
8
+ The UpstashVector class provides vector search using [Upstash Vector](https://upstash.com/vector), a serverless vector database service that provides vector similarity search with metadata filtering capabilities and hybrid search support.
9
9
 
10
10
  ## Constructor Options
11
11
 
@@ -66,6 +66,12 @@ Note: This method is a no-op for Upstash as indexes are created automatically.
66
66
  type: "number[][]",
67
67
  description: "Array of embedding vectors",
68
68
  },
69
+ {
70
+ name: "sparseVectors",
71
+ type: "{ indices: number[], values: number[] }[]",
72
+ isOptional: true,
73
+ description: "Array of sparse vectors for hybrid search. Each sparse vector must have matching indices and values arrays.",
74
+ },
69
75
  {
70
76
  name: "metadata",
71
77
  type: "Record<string, any>[]",
@@ -95,6 +101,12 @@ Note: This method is a no-op for Upstash as indexes are created automatically.
95
101
  type: "number[]",
96
102
  description: "Query vector to find similar vectors",
97
103
  },
104
+ {
105
+ name: "sparseVector",
106
+ type: "{ indices: number[], values: number[] }",
107
+ isOptional: true,
108
+ description: "Optional sparse vector for hybrid search. Must have matching indices and values arrays.",
109
+ },
98
110
  {
99
111
  name: "topK",
100
112
  type: "number",
@@ -115,6 +127,18 @@ Note: This method is a no-op for Upstash as indexes are created automatically.
115
127
  defaultValue: "false",
116
128
  description: "Whether to include vectors in the results",
117
129
  },
130
+ {
131
+ name: "fusionAlgorithm",
132
+ type: "FusionAlgorithm",
133
+ isOptional: true,
134
+ description: "Algorithm used to combine dense and sparse search results in hybrid search (e.g., RRF - Reciprocal Rank Fusion)",
135
+ },
136
+ {
137
+ name: "queryMode",
138
+ type: "QueryMode",
139
+ isOptional: true,
140
+ description: "Search mode: 'DENSE' for dense-only, 'SPARSE' for sparse-only, or 'HYBRID' for combined search",
141
+ },
118
142
  ]}
119
143
  />
120
144
 
@@ -173,18 +197,17 @@ interface IndexStats {
173
197
  {
174
198
  name: "update",
175
199
  type: "object",
176
- description: "Update object containing vector and/or metadata",
200
+ description: "Update object containing vector, sparse vector, and/or metadata",
177
201
  },
178
202
  ]}
179
203
  />
180
204
 
181
205
  The `update` object can have the following properties:
182
206
 
183
- - `vector` (optional): An array of numbers representing the new vector.
207
+ - `vector` (optional): An array of numbers representing the new dense vector.
208
+ - `sparseVector` (optional): A sparse vector object with `indices` and `values` arrays for hybrid indexes.
184
209
  - `metadata` (optional): A record of key-value pairs for metadata.
185
210
 
186
- Throws an error if neither `vector` nor `metadata` is provided, or if only `metadata` is provided.
187
-
188
211
  ### deleteVector()
189
212
 
190
213
  <PropertiesTable
@@ -204,6 +227,90 @@ Throws an error if neither `vector` nor `metadata` is provided, or if only `meta
204
227
 
205
228
  Attempts to delete an item by its ID from the specified index. Logs an error message if the deletion fails.
206
229
 
230
+ ## Hybrid Vector Search
231
+
232
+ Upstash Vector supports hybrid search that combines semantic search (dense vectors) with keyword-based search (sparse vectors) for improved relevance and accuracy.
233
+
234
+ ### Basic Hybrid Usage
235
+
236
+ ```typescript copy
237
+ import { UpstashVector } from '@mastra/upstash';
238
+
239
+ const vectorStore = new UpstashVector({
240
+ url: process.env.UPSTASH_VECTOR_URL,
241
+ token: process.env.UPSTASH_VECTOR_TOKEN
242
+ });
243
+
244
+ // Upsert vectors with both dense and sparse components
245
+ const denseVectors = [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]];
246
+ const sparseVectors = [
247
+ { indices: [1, 5, 10], values: [0.8, 0.6, 0.4] },
248
+ { indices: [2, 6, 11], values: [0.7, 0.5, 0.3] }
249
+ ];
250
+
251
+ await vectorStore.upsert({
252
+ indexName: 'hybrid-index',
253
+ vectors: denseVectors,
254
+ sparseVectors: sparseVectors,
255
+ metadata: [{ title: 'Document 1' }, { title: 'Document 2' }]
256
+ });
257
+
258
+ // Query with hybrid search
259
+ const results = await vectorStore.query({
260
+ indexName: 'hybrid-index',
261
+ queryVector: [0.1, 0.2, 0.3],
262
+ sparseVector: { indices: [1, 5], values: [0.9, 0.7] },
263
+ topK: 10
264
+ });
265
+ ```
266
+
267
+ ### Advanced Hybrid Search Options
268
+
269
+ ```typescript copy
270
+ import { FusionAlgorithm, QueryMode } from '@upstash/vector';
271
+
272
+ // Query with specific fusion algorithm
273
+ const fusionResults = await vectorStore.query({
274
+ indexName: 'hybrid-index',
275
+ queryVector: [0.1, 0.2, 0.3],
276
+ sparseVector: { indices: [1, 5], values: [0.9, 0.7] },
277
+ fusionAlgorithm: FusionAlgorithm.RRF,
278
+ topK: 10
279
+ });
280
+
281
+ // Dense-only search
282
+ const denseResults = await vectorStore.query({
283
+ indexName: 'hybrid-index',
284
+ queryVector: [0.1, 0.2, 0.3],
285
+ queryMode: QueryMode.DENSE,
286
+ topK: 10
287
+ });
288
+
289
+ // Sparse-only search
290
+ const sparseResults = await vectorStore.query({
291
+ indexName: 'hybrid-index',
292
+ queryVector: [0.1, 0.2, 0.3], // Still required for index structure
293
+ sparseVector: { indices: [1, 5], values: [0.9, 0.7] },
294
+ queryMode: QueryMode.SPARSE,
295
+ topK: 10
296
+ });
297
+ ```
298
+
299
+ ### Updating Hybrid Vectors
300
+
301
+ ```typescript copy
302
+ // Update both dense and sparse components
303
+ await vectorStore.updateVector({
304
+ indexName: 'hybrid-index',
305
+ id: 'vector-id',
306
+ update: {
307
+ vector: [0.2, 0.3, 0.4],
308
+ sparseVector: { indices: [2, 7, 12], values: [0.9, 0.8, 0.6] },
309
+ metadata: { title: 'Updated Document' }
310
+ }
311
+ });
312
+ ```
313
+
207
314
  ## Response Types
208
315
 
209
316
  Query results are returned in this format:
@@ -111,5 +111,4 @@ The scorer evaluates relevancy through query-answer alignment, considering compl
111
111
 
112
112
  ## Related
113
113
 
114
- - [Prompt Alignment Scorer](./prompt-alignment)
115
114
  - [Faithfulness Scorer](./faithfulness)
@@ -120,4 +120,3 @@ Final score: `(supported_claims / total_claims) * scale`
120
120
 
121
121
  - [Answer Relevancy Scorer](./answer-relevancy)
122
122
  - [Hallucination Scorer](./hallucination)
123
- - [Context Relevancy Scorer](./context-relevancy)
@@ -131,5 +131,3 @@ Final score: `(hallucinated_statements / total_statements) * scale`
131
131
 
132
132
  - [Faithfulness Scorer](./faithfulness)
133
133
  - [Answer Relevancy Scorer](./answer-relevancy)
134
- - [Context Precision Scorer](./context-precision)
135
- - [Context Relevancy Scorer](./context-relevancy)
@@ -3,7 +3,7 @@ title: "Reference: createLLMScorer | Scorers | Mastra Docs"
3
3
  description: Documentation for creating LLM-based scorers in Mastra, allowing users to define evaluation logic using language models.
4
4
  ---
5
5
 
6
- # LLM Scorer
6
+ # createLLMScorer
7
7
 
8
8
  The `createLLMScorer()` function lets you define custom scorers that use a language model (LLM) as a judge for evaluation. LLM scorers are ideal for tasks where you want to use prompt-based evaluation, such as answer relevancy, faithfulness, or custom prompt-based metrics. LLM scorers integrate seamlessly with the Mastra scoring framework and can be used anywhere built-in scorers are used.
9
9
 
@@ -138,6 +138,50 @@ This function returns an instance of the MastraScorer class. See the [MastraScor
138
138
  ]}
139
139
  />
140
140
 
141
+ ## Calculate Score Function
142
+
143
+ The `calculateScore` function converts the LLM's structured analysis into a numerical score. This function receives the results from previous steps but not the score itself (since that's what it calculates).
144
+
145
+ <PropertiesTable
146
+ content={[
147
+ {
148
+ name: "input",
149
+ type: "Record<string, any>[]",
150
+ required: true,
151
+ description:
152
+ "Input records provided to the scorer. If the scorer is added to an agent, this will be an array of user messages, e.g. `[{ role: 'user', content: 'hello world' }]`. If the scorer is used in a workflow, this will be the input of the workflow.",
153
+ },
154
+ {
155
+ name: "output",
156
+ type: "Record<string, any>",
157
+ required: true,
158
+ description:
159
+ "Output record provided to the scorer. For agents, this is usually the agent's response. For workflows, this is the workflow's output.",
160
+ },
161
+ {
162
+ name: "runtimeContext",
163
+ type: "object",
164
+ required: false,
165
+ description: "Runtime context from the agent or workflow step being evaluated (optional).",
166
+ },
167
+ {
168
+ name: "extractStepResult",
169
+ type: "object",
170
+ required: false,
171
+ description: "Result of the extract step, if defined (optional).",
172
+ },
173
+ {
174
+ name: "analyzeStepResult",
175
+ type: "object",
176
+ required: true,
177
+ description: "Structured result from the analyze step, conforming to the outputSchema defined in the analyze step.",
178
+ },
179
+ ]}
180
+ />
181
+
182
+ Returns: `number`
183
+ The function must return a numerical score, typically in the 0-1 range where 1 represents the best possible score.
184
+
141
185
  ## Reason Object
142
186
  <PropertiesTable
143
187
  content={[
@@ -65,7 +65,10 @@ For production use cases, use a persistent database URL: `libsql://your-database
65
65
 
66
66
  The storage implementation handles schema creation and updates automatically. It creates the following tables:
67
67
 
68
- - `threads`: Stores conversation threads
69
- - `messages`: Stores individual messages
70
- - `resources`: Stores user-specific data for resource-scoped working memory
71
- - `metadata`: Stores additional metadata for threads and messages
68
+ - `mastra_workflow_snapshot`: Stores workflow state and execution data
69
+ - `mastra_evals`: Stores evaluation results and metadata
70
+ - `mastra_threads`: Stores conversation threads
71
+ - `mastra_messages`: Stores individual messages
72
+ - `mastra_traces`: Stores telemetry and tracing data
73
+ - `mastra_scorers`: Stores scoring and evaluation data
74
+ - `mastra_resources`: Stores resource working memory data
@@ -88,9 +88,13 @@ const store5 = new MSSQLStore({
88
88
 
89
89
  The storage implementation handles schema creation and updates automatically. It creates the following tables:
90
90
 
91
- - `threads`: Stores conversation threads
92
- - `messages`: Stores individual messages
93
- - `metadata`: Stores additional metadata for threads and messages
91
+ - `mastra_workflow_snapshot`: Stores workflow state and execution data
92
+ - `mastra_evals`: Stores evaluation results and metadata
93
+ - `mastra_threads`: Stores conversation threads
94
+ - `mastra_messages`: Stores individual messages
95
+ - `mastra_traces`: Stores telemetry and tracing data
96
+ - `mastra_scorers`: Stores scoring and evaluation data
97
+ - `mastra_resources`: Stores resource working memory data
94
98
 
95
99
  ### Direct Database and Pool Access
96
100
 
@@ -88,9 +88,13 @@ const store5 = new PostgresStore({
88
88
 
89
89
  The storage implementation handles schema creation and updates automatically. It creates the following tables:
90
90
 
91
- - `threads`: Stores conversation threads
92
- - `messages`: Stores individual messages
93
- - `metadata`: Stores additional metadata for threads and messages
91
+ - `mastra_workflow_snapshot`: Stores workflow state and execution data
92
+ - `mastra_evals`: Stores evaluation results and metadata
93
+ - `mastra_threads`: Stores conversation threads
94
+ - `mastra_messages`: Stores individual messages
95
+ - `mastra_traces`: Stores telemetry and tracing data
96
+ - `mastra_scorers`: Stores scoring and evaluation data
97
+ - `mastra_resources`: Stores resource working memory data
94
98
 
95
99
  ### Direct Database and Pool Access
96
100
 
@@ -70,7 +70,7 @@ All templates follow this standardized structure:
70
70
  <FileTree.File name="example-agent.ts" />
71
71
  </FileTree.Folder>
72
72
  <FileTree.Folder name="tools">
73
- <FileTree.File name="custom-tool.ts" />
73
+ <FileTree.File name="example-tool.ts" />
74
74
  </FileTree.Folder>
75
75
  <FileTree.Folder name="workflows">
76
76
  <FileTree.File name="example-workflow.ts" />
@@ -126,8 +126,10 @@ Use the standard Mastra TypeScript configuration:
126
126
  Include a `.env.example` file with all required environment variables:
127
127
 
128
128
  ```bash filename=".env.example"
129
- # OpenAI API key for LLM operations
129
+ # LLM provider API keys (choose one or more)
130
130
  OPENAI_API_KEY=your_openai_api_key_here
131
+ ANTHROPIC_API_KEY=your_anthropic_api_key_here
132
+ GOOGLE_GENERATIVE_AI_API_KEY=your_google_api_key_here
131
133
 
132
134
  # Other service API keys as needed
133
135
  OTHER_SERVICE_API_KEY=your_api_key_here
@@ -137,15 +139,17 @@ OTHER_SERVICE_API_KEY=your_api_key_here
137
139
 
138
140
  #### LLM Provider
139
141
 
140
- Use OpenAI as the default provider unless demonstrating specific integrations:
142
+ We recommend using OpenAI, Anthropic, or Google model providers for templates. Choose the provider that best fits your use case:
141
143
 
142
144
  ```typescript filename="src/mastra/agents/example-agent.ts"
143
- import { Agent } from '@mastra/core';
145
+ import { Agent } from '@mastra/core/agent';
144
146
  import { openai } from '@ai-sdk/openai';
147
+ // Or use: import { anthropic } from '@ai-sdk/anthropic';
148
+ // Or use: import { google } from '@ai-sdk/google';
145
149
 
146
150
  const agent = new Agent({
147
151
  name: 'example-agent',
148
- model: openai('gpt-4'),
152
+ model: openai('gpt-4'), // or anthropic('') or google('')
149
153
  instructions: 'Your agent instructions here',
150
154
  // ... other configuration
151
155
  });
@@ -186,6 +190,8 @@ Detailed explanation of the template's functionality and use case.
186
190
  ## Environment Variables
187
191
 
188
192
  - `OPENAI_API_KEY`: Your OpenAI API key. Get one at [OpenAI Platform](https://platform.openai.com/api-keys)
193
+ - `ANTHROPIC_API_KEY`: Your Anthropic API key. Get one at [Anthropic Console](https://console.anthropic.com/settings/keys)
194
+ - `GOOGLE_GENERATIVE_AI_API_KEY`: Your Google AI API key. Get one at [Google AI Studio](https://makersuite.google.com/app/apikey)
189
195
  - `OTHER_API_KEY`: Description of what this key is for
190
196
 
191
197
  ## Usage
@@ -0,0 +1,319 @@
1
+ ## Creating scorers
2
+
3
+ Mastra provides two approaches for creating custom scorers:
4
+
5
+ **Code scorers** use programmatic logic and algorithms. They're ideal for deterministic evaluations, performance-critical scenarios, and cases where you have clear algorithmic criteria.
6
+
7
+ **LLM scorers** use language models as judges. They're perfect for subjective evaluations, complex criteria that are difficult to code algorithmically, and cases where human-like judgment is needed.
8
+
9
+ ### Code-based scorers
10
+
11
+ Code scorers use `createScorer` to build evaluation logic with programmatic algorithms. They're ideal for deterministic evaluations, performance-critical scenarios, and cases where you have clear algorithmic criteria or need integration with existing libraries.
12
+
13
+ Code scorers follow Mastra's three-step evaluation pipeline:
14
+ - an optional **extract** step for preprocessing complex data
15
+ - a required **analyze** step for core evaluation and scoring
16
+ - and an optional **reason** step for generating explanations.
17
+
18
+ For the complete API reference, see [`createScorer`](/reference/scorers/custom-code-scorer), and for a detailed explanation of the pipeline, see [evaluation process](/docs/scorers/overview#evaluation-pipeline).
19
+
20
+ #### Extract Step
21
+
22
+ This optional step preprocesses input/output data when you need to evaluate multiple distinct elements, filter content, or focus analysis on specific parts of complex data.
23
+
24
+ - **Receives:**
25
+ - `input`: User messages (when used with agents) or workflow step input (when used with workflow steps)
26
+ - `output`: Agent's response (when used with agents) or workflow step output (when used with workflow steps)
27
+ - `runtimeContext`: Runtime context from the agent or workflow step being evaluated
28
+ - **Must return:** `{ results: any }`
29
+ - **Data flow:** The `results` value is passed to the analyze step as `extractStepResult`
30
+
31
+ ```typescript filename="src/mastra/scorers/keyword-coverage-scorer.ts" showLineNumbers copy
32
+ import { createScorer } from "@mastra/core/scores";
33
+ import keywordExtractor from "keyword-extractor";
34
+
35
+ export const keywordCoverageScorer = createScorer({
36
+ name: "Keyword Coverage",
37
+ description: "Evaluates how well the output covers keywords from the input",
38
+
39
+ // Step 1: Extract keywords from input and output
40
+ extract: async ({ input, output }) => {
41
+ const inputText = input?.map(i => i.content).join(", ") || "";
42
+ const outputText = output.text;
43
+
44
+ const extractKeywords = (text: string) => {
45
+ return keywordExtractor.extract(text);
46
+ };
47
+
48
+ const inputKeywords = new Set(extractKeywords(inputText));
49
+ const outputKeywords = new Set(extractKeywords(outputText));
50
+
51
+ return {
52
+ results: {
53
+ inputKeywords,
54
+ outputKeywords,
55
+ },
56
+ };
57
+ },
58
+
59
+ // ... analyze and reason steps
60
+ });
61
+ ```
62
+
63
+ #### Analyze Step
64
+
65
+ This required step performs the core evaluation and generates the numerical score for all scorers.
66
+
67
+ - **Receives:** Everything from extract step, plus:
68
+ - `extractStepResult`: Results from the extract step (if extract step was defined)
69
+ - **Must return:** `{ score: number, results?: any }`
70
+ - **Data flow:** The `score` and optional `results` are passed to the reason step
71
+
72
+ ```typescript filename="src/mastra/scorers/keyword-coverage-scorer.ts" showLineNumbers copy
73
+ export const keywordCoverageScorer = createScorer({
74
+ // ... name, description, extract step
75
+
76
+ // Step 2: Analyze keyword coverage and calculate score
77
+ analyze: async ({ input, output, extractStepResult }) => {
78
+ const { inputKeywords, outputKeywords } = extractStepResult.results;
79
+
80
+ if (inputKeywords.size === 0) {
81
+ return { score: 1, results: { coverage: 1, matched: 0, total: 0 } };
82
+ }
83
+
84
+ const matchedKeywords = [...inputKeywords].filter(keyword =>
85
+ outputKeywords.has(keyword)
86
+ );
87
+
88
+ const coverage = matchedKeywords.length / inputKeywords.size;
89
+
90
+ return {
91
+ score: coverage,
92
+ results: {
93
+ coverage,
94
+ matched: matchedKeywords.length,
95
+ total: inputKeywords.size,
96
+ matchedKeywords,
97
+ },
98
+ };
99
+ },
100
+
101
+ // ... reason step
102
+ });
103
+ ```
104
+
105
+ #### Reason Step
106
+
107
+ This optional step generates human-readable explanations for scores, useful for actionable feedback, debugging transparency, or compliance documentation.
108
+
109
+ - **Receives:** Everything from analyze step, plus:
110
+ - `score`: The numerical score (0-1) calculated by the analyze step
111
+ - `analyzeStepResult`: Results from the analyze step (contains the score and any additional results)
112
+ - **Must return:** `{ reason: string }`
113
+
114
+ ```typescript filename="src/mastra/scorers/keyword-coverage-scorer.ts" showLineNumbers copy
115
+ export const keywordCoverageScorer = createScorer({
116
+ // ... name, description, extract and analyze steps
117
+
118
+ // Step 3: Generate explanation for the score
119
+ reason: async ({ score, analyzeStepResult, extractStepResult }) => {
120
+ const { matched, total, matchedKeywords } = analyzeStepResult.results;
121
+ const { inputKeywords } = extractStepResult.results;
122
+
123
+ const percentage = Math.round(score * 100);
124
+ const missedKeywords = [...inputKeywords].filter(
125
+ keyword => !matchedKeywords.includes(keyword)
126
+ );
127
+
128
+ let reason = `The output achieved ${percentage}% keyword coverage (${matched}/${total} keywords).`;
129
+
130
+ if (matchedKeywords.length > 0) {
131
+ reason += ` Covered keywords: ${matchedKeywords.join(", ")}.`;
132
+ }
133
+
134
+ if (missedKeywords.length > 0) {
135
+ reason += ` Missing keywords: ${missedKeywords.join(", ")}.`;
136
+ }
137
+
138
+ return { reason };
139
+ },
140
+ });
141
+ ```
142
+
143
+ **Examples and Resources:**
144
+ - [Custom Native JavaScript Scorer Example](/examples/scorers/custom-native-javascript-eval) - Example walkthrough.
145
+ - [Built-in Code Scorers](https://github.com/mastra-ai/mastra/tree/main/packages/evals/src/scorers/code) - Real implementations for reference
146
+
147
+ ### LLM-based scorers
148
+
149
+ LLM scorers use `createLLMScorer` to build evaluations that leverage language models as judges. They're perfect for subjective evaluations that require understanding context, complex criteria that are difficult to code algorithmically, natural language understanding tasks, and cases where human-like judgment is needed.
150
+
151
+ LLM scorers follow the same evaluation pipeline as code scorers with an additional `calculateScore` function:
152
+ - an optional **extract** step where the LLM processes input/output and returns structured data
153
+ - a required **analyze** step where the LLM performs evaluation and returns structured analysis
154
+ - a required **calculateScore** function that converts LLM analysis into numerical score
155
+ - and an optional **reason** step where the LLM generates human-readable explanations
156
+
157
+ The `calculateScore` function leverages the best of both approaches: LLMs excel at qualitative analysis and understanding, while deterministic functions ensure precise and consistent numerical scoring.
158
+
159
+ For the complete API reference, see [`createLLMScorer`](/reference/scorers/llm-scorer), and for a detailed explanation of the pipeline, see [evaluation process](/docs/scorers/overview#evaluation-pipeline).
160
+
161
+ #### Judge Configuration
162
+
163
+ All LLM scorer steps share this required configuration that defines the model and system instructions.
164
+
165
+ - **Configuration:** `judge` object containing:
166
+ - **model:** The LLM model instance for evaluation
167
+ - **instructions:** System prompt that guides the LLM's behavior
168
+
169
+ ```typescript filename="src/mastra/scorers/tone-scorer.ts" showLineNumbers copy
170
+ import { openai } from "@ai-sdk/openai";
171
+ import { createLLMScorer } from "@mastra/core/scores";
172
+
173
+ export const toneScorer = createLLMScorer({
174
+ name: 'Tone Scorer',
175
+ description: 'Evaluates the tone of the output',
176
+
177
+ // Shared judge configuration
178
+ judge: {
179
+ model: openai('gpt-4o'),
180
+ instructions: 'You are an expert in analyzing text tone and communication style.',
181
+ },
182
+
183
+ // ... other steps
184
+ });
185
+ ```
186
+
187
+ #### Extract Step
188
+
189
+ This optional step uses an LLM to preprocess input/output data when you need to evaluate multiple distinct elements, filter content, or focus analysis on specific parts of complex data.
190
+
191
+ - **Configuration:** `{ description, outputSchema, createPrompt }`
192
+ - **Data flow:** The structured output (defined by outputSchema) is passed to the analyze step as `extractStepResult`
193
+
194
+ ```typescript filename="src/mastra/scorers/content-scorer.ts" showLineNumbers copy
195
+ export const contentScorer = createLLMScorer({
196
+ // ... judge configuration
197
+
198
+ extract: {
199
+ description: 'Extract key themes and topics from the content',
200
+ outputSchema: z.object({
201
+ themes: z.array(z.string()),
202
+ topics: z.array(z.string()),
203
+ keyPhrases: z.array(z.string())
204
+ }),
205
+ createPrompt: ({ run }) => `
206
+ Analyze this content and extract:
207
+ 1. Main themes (3-5 high-level concepts)
208
+ 2. Specific topics mentioned
209
+ 3. Key phrases that capture the essence
210
+
211
+ Content: ${run.output.text}
212
+
213
+ Return a JSON object with themes, topics, and keyPhrases arrays.
214
+ `,
215
+ },
216
+
217
+ // ... other steps
218
+ });
219
+ ```
220
+
221
+ #### Analyze Step
222
+
223
+ This required step uses an LLM to perform the core evaluation and return structured analysis that will be converted to a numerical score.
224
+
225
+ - **Configuration:** `{ description, outputSchema, createPrompt }`
226
+ - **Data flow:** The structured output is passed to the calculateScore function and then to the reason step
227
+
228
+ ```typescript filename="src/mastra/scorers/quality-scorer.ts" showLineNumbers copy
229
+ export const qualityScorer = createLLMScorer({
230
+ // ... judge configuration
231
+
232
+ analyze: {
233
+ description: 'Evaluate content quality across multiple dimensions',
234
+ outputSchema: z.object({
235
+ clarity: z.number().min(1).max(5),
236
+ accuracy: z.number().min(1).max(5),
237
+ completeness: z.number().min(1).max(5),
238
+ relevance: z.number().min(1).max(5)
239
+ }),
240
+ createPrompt: ({ run }) => `
241
+ Evaluate this content on a scale of 1-5 for:
242
+ - Clarity: How clear and understandable is it?
243
+ - Accuracy: How factually correct does it appear?
244
+ - Completeness: How thorough is the response?
245
+ - Relevance: How well does it address the input?
246
+
247
+ Input: ${run.input.map(i => i.content).join(', ')}
248
+ Output: ${run.output.text}
249
+
250
+ Return a JSON object with numeric scores for each dimension.
251
+ `,
252
+ },
253
+
254
+ // ... other steps
255
+ });
256
+ ```
257
+
258
+ #### Calculate Score Step
259
+
260
+ This required function converts the LLM's structured analysis into a numerical score, providing deterministic scoring logic since LLMs aren't reliable for consistent numerical outputs.
261
+
262
+ - **Configuration:** `calculateScore` function that receives `{ run }` and returns a number
263
+ - **Data flow:** Converts the analyze step's structured output into a numerical score (0-1 range)
264
+
265
+ ```typescript filename="src/mastra/scorers/quality-scorer.ts" showLineNumbers copy
266
+ export const qualityScorer = createLLMScorer({
267
+ // ... previous steps
268
+
269
+ calculateScore: ({ run }) => {
270
+ const { clarity, accuracy, completeness, relevance } = run.analyzeStepResult;
271
+
272
+ // Calculate weighted average (scale of 1-5 to 0-1)
273
+ const weights = { clarity: 0.3, accuracy: 0.3, completeness: 0.2, relevance: 0.2 };
274
+ const weightedSum = (clarity * weights.clarity) +
275
+ (accuracy * weights.accuracy) +
276
+ (completeness * weights.completeness) +
277
+ (relevance * weights.relevance);
278
+
279
+ // Convert from 1-5 scale to 0-1 scale
280
+ return (weightedSum - 1) / 4;
281
+ },
282
+
283
+ // ... other steps
284
+ });
285
+ ```
286
+
287
+ #### Reason Step
288
+
289
+ This optional step uses an LLM to generate human-readable explanations for scores, useful for actionable feedback, debugging transparency, or compliance documentation.
290
+
291
+ - **Configuration:** `{ description, createPrompt }`
292
+ - **Data flow:** Receives all previous step results and score, returns a string explanation
293
+
294
+ ```typescript filename="src/mastra/scorers/quality-scorer.ts" showLineNumbers copy
295
+ export const qualityScorer = createLLMScorer({
296
+ // ... previous steps
297
+
298
+ reason: {
299
+ createPrompt: ({ run }) => {
300
+ const { clarity, accuracy, completeness, relevance } = run.analyzeStepResult;
301
+ const percentage = Math.round(run.score * 100);
302
+
303
+ return `
304
+ The content received a ${percentage}% quality score based on:
305
+ - Clarity: ${clarity}/5
306
+ - Accuracy: ${accuracy}/5
307
+ - Completeness: ${completeness}/5
308
+ - Relevance: ${relevance}/5
309
+
310
+ Provide a brief explanation of what contributed to this score.
311
+ `;
312
+ },
313
+ },
314
+ });
315
+ ```
316
+
317
+ **Examples and Resources:**
318
+ - [Custom LLM Judge Scorer Example](/examples/scorers/custom-llm-judge-eval) - Example Walkthrough with gluten checker
319
+ - [Built-in LLM Scorers](https://github.com/mastra-ai/mastra/tree/main/packages/evals/src/scorers/llm) - Real implementations for reference