@mastra/mcp-docs-server 0.13.10 → 0.13.11-alpha.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. package/.docs/organized/changelogs/%40internal%2Fstorage-test-utils.md +9 -9
  2. package/.docs/organized/changelogs/%40internal%2Ftypes-builder.md +2 -0
  3. package/.docs/organized/changelogs/%40mastra%2Fclient-js.md +18 -18
  4. package/.docs/organized/changelogs/%40mastra%2Fcore.md +23 -23
  5. package/.docs/organized/changelogs/%40mastra%2Fdeployer-cloudflare.md +20 -20
  6. package/.docs/organized/changelogs/%40mastra%2Fdeployer-netlify.md +20 -20
  7. package/.docs/organized/changelogs/%40mastra%2Fdeployer-vercel.md +20 -20
  8. package/.docs/organized/changelogs/%40mastra%2Fdeployer.md +20 -20
  9. package/.docs/organized/changelogs/%40mastra%2Ffirecrawl.md +13 -13
  10. package/.docs/organized/changelogs/%40mastra%2Flibsql.md +9 -9
  11. package/.docs/organized/changelogs/%40mastra%2Fmcp-docs-server.md +18 -18
  12. package/.docs/organized/changelogs/%40mastra%2Fmemory.md +12 -12
  13. package/.docs/organized/changelogs/%40mastra%2Fpg.md +9 -9
  14. package/.docs/organized/changelogs/%40mastra%2Fplayground-ui.md +21 -21
  15. package/.docs/organized/changelogs/%40mastra%2Frag.md +12 -12
  16. package/.docs/organized/changelogs/%40mastra%2Fschema-compat.md +7 -0
  17. package/.docs/organized/changelogs/%40mastra%2Fserver.md +18 -18
  18. package/.docs/organized/changelogs/create-mastra.md +9 -9
  19. package/.docs/organized/changelogs/mastra.md +22 -22
  20. package/.docs/organized/code-examples/agent-network.md +4 -3
  21. package/.docs/organized/code-examples/agent.md +33 -2
  22. package/.docs/raw/agents/overview.mdx +21 -1
  23. package/.docs/raw/getting-started/mcp-docs-server.mdx +2 -2
  24. package/.docs/raw/rag/chunking-and-embedding.mdx +11 -0
  25. package/.docs/raw/reference/agents/agent.mdx +64 -38
  26. package/.docs/raw/reference/agents/generate.mdx +206 -202
  27. package/.docs/raw/reference/agents/getAgent.mdx +23 -38
  28. package/.docs/raw/reference/agents/getDefaultGenerateOptions.mdx +62 -0
  29. package/.docs/raw/reference/agents/getDefaultStreamOptions.mdx +62 -0
  30. package/.docs/raw/reference/agents/getDefaultVNextStreamOptions.mdx +62 -0
  31. package/.docs/raw/reference/agents/getDescription.mdx +30 -0
  32. package/.docs/raw/reference/agents/getInstructions.mdx +36 -73
  33. package/.docs/raw/reference/agents/getLLM.mdx +69 -0
  34. package/.docs/raw/reference/agents/getMemory.mdx +42 -119
  35. package/.docs/raw/reference/agents/getModel.mdx +36 -75
  36. package/.docs/raw/reference/agents/getScorers.mdx +62 -0
  37. package/.docs/raw/reference/agents/getTools.mdx +36 -128
  38. package/.docs/raw/reference/agents/getVoice.mdx +36 -83
  39. package/.docs/raw/reference/agents/getWorkflows.mdx +37 -74
  40. package/.docs/raw/reference/agents/stream.mdx +263 -226
  41. package/.docs/raw/reference/agents/streamVNext.mdx +208 -402
  42. package/.docs/raw/reference/rag/chunk.mdx +51 -2
  43. package/.docs/raw/reference/scorers/answer-relevancy.mdx +6 -6
  44. package/.docs/raw/reference/scorers/bias.mdx +6 -6
  45. package/.docs/raw/reference/scorers/completeness.mdx +2 -2
  46. package/.docs/raw/reference/scorers/content-similarity.mdx +1 -1
  47. package/.docs/raw/reference/scorers/create-scorer.mdx +445 -0
  48. package/.docs/raw/reference/scorers/faithfulness.mdx +6 -6
  49. package/.docs/raw/reference/scorers/hallucination.mdx +6 -6
  50. package/.docs/raw/reference/scorers/keyword-coverage.mdx +2 -2
  51. package/.docs/raw/reference/scorers/mastra-scorer.mdx +116 -158
  52. package/.docs/raw/reference/scorers/toxicity.mdx +2 -2
  53. package/.docs/raw/scorers/custom-scorers.mdx +166 -268
  54. package/.docs/raw/scorers/overview.mdx +21 -13
  55. package/.docs/raw/server-db/local-dev-playground.mdx +3 -3
  56. package/package.json +3 -3
  57. package/.docs/raw/reference/agents/createTool.mdx +0 -241
  58. package/.docs/raw/reference/scorers/custom-code-scorer.mdx +0 -155
  59. package/.docs/raw/reference/scorers/llm-scorer.mdx +0 -210
@@ -1,319 +1,217 @@
1
1
  ## Creating scorers
2
2
 
3
- Mastra provides two approaches for creating custom scorers:
3
+ Mastra provides a unified `createScorer` factory that allows you to build custom evaluation logic using either JavaScript functions or LLM-based prompt objects for each step. This flexibility lets you choose the best approach for each part of your evaluation pipeline.
4
4
 
5
- **Code scorers** use programmatic logic and algorithms. They're ideal for deterministic evaluations, performance-critical scenarios, and cases where you have clear algorithmic criteria.
5
+ ### The Four-Step Pipeline
6
6
 
7
- **LLM scorers** use language models as judges. They're perfect for subjective evaluations, complex criteria that are difficult to code algorithmically, and cases where human-like judgment is needed.
7
+ All scorers in Mastra follow a consistent four-step evaluation pipeline:
8
8
 
9
- ### Code-based scorers
9
+ 1. **preprocess** (optional): Prepare or transform input/output data
10
+ 2. **analyze** (optional): Perform evaluation analysis and gather insights
11
+ 3. **generateScore** (required): Convert analysis into a numerical score
12
+ 4. **generateReason** (optional): Generate human-readable explanations
10
13
 
11
- Code scorers use `createScorer` to build evaluation logic with programmatic algorithms. They're ideal for deterministic evaluations, performance-critical scenarios, and cases where you have clear algorithmic criteria or need integration with existing libraries.
14
+ Each step can use either **functions** or **prompt objects** (LLM-based evaluation), giving you the flexibility to combine deterministic algorithms with AI judgment as needed.
12
15
 
13
- Code scorers follow Mastra's three-step evaluation pipeline:
14
- - an optional **extract** step for preprocessing complex data
15
- - a required **analyze** step for core evaluation and scoring
16
- - and an optional **reason** step for generating explanations.
16
+ ### Functions vs Prompt Objects
17
17
 
18
- For the complete API reference, see [`createScorer`](/reference/scorers/custom-code-scorer), and for a detailed explanation of the pipeline, see [evaluation process](/docs/scorers/overview#evaluation-pipeline).
18
+ **Functions** use JavaScript for deterministic logic. They're ideal for:
19
+ - Algorithmic evaluations with clear criteria
20
+ - Performance-critical scenarios
21
+ - Integration with existing libraries
22
+ - Consistent, reproducible results
19
23
 
20
- #### Extract Step
24
+ **Prompt Objects** use LLMs as judges for evaluation. They're perfect for:
25
+ - Subjective evaluations requiring human-like judgment
26
+ - Complex criteria difficult to code algorithmically
27
+ - Natural language understanding tasks
28
+ - Nuanced context evaluation
21
29
 
22
- This optional step preprocesses input/output data when you need to evaluate multiple distinct elements, filter content, or focus analysis on specific parts of complex data.
30
+ You can mix and match approaches within a single scorer - for example, use a function for preprocessing data and an LLM for analyzing quality.
23
31
 
24
- - **Receives:**
25
- - `input`: User messages (when used with agents) or workflow step input (when used with workflow steps)
26
- - `output`: Agent's response (when used with agents) or workflow step output (when used with workflow steps)
27
- - `runtimeContext`: Runtime context from the agent or workflow step being evaluated
28
- - **Must return:** `{ results: any }`
29
- - **Data flow:** The `results` value is passed to the analyze step as `extractStepResult`
32
+ ### Initializing a Scorer
30
33
 
31
- ```typescript filename="src/mastra/scorers/keyword-coverage-scorer.ts" showLineNumbers copy
32
- import { createScorer } from "@mastra/core/scores";
33
- import keywordExtractor from "keyword-extractor";
34
+ Every scorer starts with the `createScorer` factory function, which requires a name and description, and optionally accepts a judge configuration for LLM-based steps.
34
35
 
35
- export const keywordCoverageScorer = createScorer({
36
- name: "Keyword Coverage",
37
- description: "Evaluates how well the output covers keywords from the input",
36
+ ```typescript
37
+ import { createScorer } from '@mastra/core/scores';
38
+ import { openai } from '@ai-sdk/openai';
38
39
 
39
- // Step 1: Extract keywords from input and output
40
- extract: async ({ input, output }) => {
41
- const inputText = input?.map(i => i.content).join(", ") || "";
42
- const outputText = output.text;
43
-
44
- const extractKeywords = (text: string) => {
45
- return keywordExtractor.extract(text);
46
- };
47
-
48
- const inputKeywords = new Set(extractKeywords(inputText));
49
- const outputKeywords = new Set(extractKeywords(outputText));
50
-
51
- return {
52
- results: {
53
- inputKeywords,
54
- outputKeywords,
55
- },
56
- };
57
- },
58
-
59
- // ... analyze and reason steps
60
- });
40
+ const glutenCheckerScorer = createScorer({
41
+ name: 'Gluten Checker',
42
+ description: 'Check if recipes contain gluten ingredients',
43
+ judge: { // Optional: for prompt object steps
44
+ model: openai('gpt-4o'),
45
+ instructions: 'You are a Chef that identifies if recipes contain gluten.'
46
+ }
47
+ })
48
+ // Chain step methods here
49
+ .preprocess(...)
50
+ .analyze(...)
51
+ .generateScore(...)
52
+ .generateReason(...)
61
53
  ```
62
54
 
63
- #### Analyze Step
55
+ The judge configuration is only needed if you plan to use prompt objects in any step. Individual steps can override this default configuration with their own judge settings.
64
56
 
65
- This required step performs the core evaluation and generates the numerical score for all scorers.
57
+ ### Step-by-Step Breakdown
66
58
 
67
- - **Receives:** Everything from extract step, plus:
68
- - `extractStepResult`: Results from the extract step (if extract step was defined)
69
- - **Must return:** `{ score: number, results?: any }`
70
- - **Data flow:** The `score` and optional `results` are passed to the reason step
59
+ #### preprocess Step (Optional)
71
60
 
72
- ```typescript filename="src/mastra/scorers/keyword-coverage-scorer.ts" showLineNumbers copy
73
- export const keywordCoverageScorer = createScorer({
74
- // ... name, description, extract step
61
+ Prepares input/output data when you need to extract specific elements, filter content, or transform complex data structures.
75
62
 
76
- // Step 2: Analyze keyword coverage and calculate score
77
- analyze: async ({ input, output, extractStepResult }) => {
78
- const { inputKeywords, outputKeywords } = extractStepResult.results;
79
-
80
- if (inputKeywords.size === 0) {
81
- return { score: 1, results: { coverage: 1, matched: 0, total: 0 } };
82
- }
63
+ **Functions:** `({ run, results }) => any`
83
64
 
84
- const matchedKeywords = [...inputKeywords].filter(keyword =>
85
- outputKeywords.has(keyword)
86
- );
87
-
88
- const coverage = matchedKeywords.length / inputKeywords.size;
89
-
90
- return {
91
- score: coverage,
92
- results: {
93
- coverage,
94
- matched: matchedKeywords.length,
95
- total: inputKeywords.size,
96
- matchedKeywords,
97
- },
98
- };
99
- },
100
-
101
- // ... reason step
102
- });
65
+ ```typescript
66
+ const glutenCheckerScorer = createScorer(...)
67
+ .preprocess(({ run }) => {
68
+ // Extract and clean recipe text
69
+ const recipeText = run.output.text.toLowerCase();
70
+ const wordCount = recipeText.split(' ').length;
71
+
72
+ return {
73
+ recipeText,
74
+ wordCount,
75
+ hasCommonGlutenWords: /flour|wheat|bread|pasta/.test(recipeText)
76
+ };
77
+ })
103
78
  ```
104
79
 
105
- #### Reason Step
106
-
107
- This optional step generates human-readable explanations for scores, useful for actionable feedback, debugging transparency, or compliance documentation.
108
-
109
- - **Receives:** Everything from analyze step, plus:
110
- - `score`: The numerical score (0-1) calculated by the analyze step
111
- - `analyzeStepResult`: Results from the analyze step (contains the score and any additional results)
112
- - **Must return:** `{ reason: string }`
113
-
114
- ```typescript filename="src/mastra/scorers/keyword-coverage-scorer.ts" showLineNumbers copy
115
- export const keywordCoverageScorer = createScorer({
116
- // ... name, description, extract and analyze steps
117
-
118
- // Step 3: Generate explanation for the score
119
- reason: async ({ score, analyzeStepResult, extractStepResult }) => {
120
- const { matched, total, matchedKeywords } = analyzeStepResult.results;
121
- const { inputKeywords } = extractStepResult.results;
80
+ **Prompt Objects:** Use `description`, `outputSchema`, and `createPrompt` to structure LLM-based preprocessing.
81
+
82
+ ```typescript
83
+ const glutenCheckerScorer = createScorer(...)
84
+ .preprocess({
85
+ description: 'Extract ingredients from the recipe',
86
+ outputSchema: z.object({
87
+ ingredients: z.array(z.string()),
88
+ cookingMethods: z.array(z.string())
89
+ }),
90
+ createPrompt: ({ run }) => `
91
+ Extract all ingredients and cooking methods from this recipe:
92
+ ${run.output.text}
122
93
 
123
- const percentage = Math.round(score * 100);
124
- const missedKeywords = [...inputKeywords].filter(
125
- keyword => !matchedKeywords.includes(keyword)
126
- );
127
-
128
- let reason = `The output achieved ${percentage}% keyword coverage (${matched}/${total} keywords).`;
129
-
130
- if (matchedKeywords.length > 0) {
131
- reason += ` Covered keywords: ${matchedKeywords.join(", ")}.`;
132
- }
133
-
134
- if (missedKeywords.length > 0) {
135
- reason += ` Missing keywords: ${missedKeywords.join(", ")}.`;
136
- }
137
-
138
- return { reason };
139
- },
140
- });
94
+ Return JSON with ingredients and cookingMethods arrays.
95
+ `
96
+ })
141
97
  ```
142
98
 
143
- **Examples and Resources:**
144
- - [Custom Native JavaScript Scorer Example](/examples/scorers/custom-native-javascript-eval) - Example walkthrough.
145
- - [Built-in Code Scorers](https://github.com/mastra-ai/mastra/tree/main/packages/evals/src/scorers/code) - Real implementations for reference
146
-
147
- ### LLM-based scorers
148
-
149
- LLM scorers use `createLLMScorer` to build evaluations that leverage language models as judges. They're perfect for subjective evaluations that require understanding context, complex criteria that are difficult to code algorithmically, natural language understanding tasks, and cases where human-like judgment is needed.
150
-
151
- LLM scorers follow the same evaluation pipeline as code scorers with an additional `calculateScore` function:
152
- - an optional **extract** step where the LLM processes input/output and returns structured data
153
- - a required **analyze** step where the LLM performs evaluation and returns structured analysis
154
- - a required **calculateScore** function that converts LLM analysis into numerical score
155
- - and an optional **reason** step where the LLM generates human-readable explanations
156
-
157
- The `calculateScore` function leverages the best of both approaches: LLMs excel at qualitative analysis and understanding, while deterministic functions ensure precise and consistent numerical scoring.
158
-
159
- For the complete API reference, see [`createLLMScorer`](/reference/scorers/llm-scorer), and for a detailed explanation of the pipeline, see [evaluation process](/docs/scorers/overview#evaluation-pipeline).
99
+ **Data Flow:** Results are available to subsequent steps as `results.preprocessStepResult`
160
100
 
161
- #### Judge Configuration
101
+ #### analyze Step (Optional)
162
102
 
163
- All LLM scorer steps share this required configuration that defines the model and system instructions.
103
+ Performs core evaluation analysis, gathering insights that will inform the scoring decision.
164
104
 
165
- - **Configuration:** `judge` object containing:
166
- - **model:** The LLM model instance for evaluation
167
- - **instructions:** System prompt that guides the LLM's behavior
105
+ **Functions:** `({ run, results }) => any`
168
106
 
169
- ```typescript filename="src/mastra/scorers/tone-scorer.ts" showLineNumbers copy
170
- import { openai } from "@ai-sdk/openai";
171
- import { createLLMScorer } from "@mastra/core/scores";
172
-
173
- export const toneScorer = createLLMScorer({
174
- name: 'Tone Scorer',
175
- description: 'Evaluates the tone of the output',
107
+ ```typescript
108
+ const glutenCheckerScorer = createScorer({...})
109
+ .preprocess(...)
110
+ .analyze(({ run, results }) => {
111
+ const { recipeText, hasCommonGlutenWords } = results.preprocessStepResult;
176
112
 
177
- // Shared judge configuration
178
- judge: {
179
- model: openai('gpt-4o'),
180
- instructions: 'You are an expert in analyzing text tone and communication style.',
181
- },
113
+ // Simple gluten detection algorithm
114
+ const glutenKeywords = ['wheat', 'flour', 'barley', 'rye', 'bread'];
115
+ const foundGlutenWords = glutenKeywords.filter(word =>
116
+ recipeText.includes(word)
117
+ );
182
118
 
183
- // ... other steps
184
- });
119
+ return {
120
+ isGlutenFree: foundGlutenWords.length === 0,
121
+ detectedGlutenSources: foundGlutenWords,
122
+ confidence: hasCommonGlutenWords ? 0.9 : 0.7
123
+ };
124
+ })
185
125
  ```
186
126
 
187
- #### Extract Step
188
-
189
- This optional step uses an LLM to preprocess input/output data when you need to evaluate multiple distinct elements, filter content, or focus analysis on specific parts of complex data.
190
-
191
- - **Configuration:** `{ description, outputSchema, createPrompt }`
192
- - **Data flow:** The structured output (defined by outputSchema) is passed to the analyze step as `extractStepResult`
193
-
194
- ```typescript filename="src/mastra/scorers/content-scorer.ts" showLineNumbers copy
195
- export const contentScorer = createLLMScorer({
196
- // ... judge configuration
197
-
198
- extract: {
199
- description: 'Extract key themes and topics from the content',
200
- outputSchema: z.object({
201
- themes: z.array(z.string()),
202
- topics: z.array(z.string()),
203
- keyPhrases: z.array(z.string())
204
- }),
205
- createPrompt: ({ run }) => `
206
- Analyze this content and extract:
207
- 1. Main themes (3-5 high-level concepts)
208
- 2. Specific topics mentioned
209
- 3. Key phrases that capture the essence
210
-
211
- Content: ${run.output.text}
212
-
213
- Return a JSON object with themes, topics, and keyPhrases arrays.
214
- `,
215
- },
216
-
217
- // ... other steps
218
- });
127
+ **Prompt Objects:** Use `description`, `outputSchema`, and `createPrompt` for LLM-based analysis.
128
+
129
+ ```typescript
130
+ const glutenCheckerScorer = createScorer({...})
131
+ .preprocess(...)
132
+ .analyze({
133
+ description: 'Analyze recipe for gluten content',
134
+ outputSchema: z.object({
135
+ isGlutenFree: z.boolean(),
136
+ glutenSources: z.array(z.string()),
137
+ confidence: z.number().min(0).max(1)
138
+ }),
139
+ createPrompt: ({ run, results }) => `
140
+ Analyze this recipe for gluten content:
141
+ "${results.preprocessStepResult.recipeText}"
142
+
143
+ Look for wheat, barley, rye, and hidden sources like soy sauce.
144
+ Return JSON with isGlutenFree, glutenSources array, and confidence (0-1).
145
+ `
146
+ })
219
147
  ```
220
148
 
221
- #### Analyze Step
149
+ **Data Flow:** Results are available to subsequent steps as `results.analyzeStepResult`
222
150
 
223
- This required step uses an LLM to perform the core evaluation and return structured analysis that will be converted to a numerical score.
151
+ #### generateScore Step (Required)
224
152
 
225
- - **Configuration:** `{ description, outputSchema, createPrompt }`
226
- - **Data flow:** The structured output is passed to the calculateScore function and then to the reason step
153
+ Converts analysis results into a numerical score. This is the only required step in the pipeline.
227
154
 
228
- ```typescript filename="src/mastra/scorers/quality-scorer.ts" showLineNumbers copy
229
- export const qualityScorer = createLLMScorer({
230
- // ... judge configuration
231
-
232
- analyze: {
233
- description: 'Evaluate content quality across multiple dimensions',
234
- outputSchema: z.object({
235
- clarity: z.number().min(1).max(5),
236
- accuracy: z.number().min(1).max(5),
237
- completeness: z.number().min(1).max(5),
238
- relevance: z.number().min(1).max(5)
239
- }),
240
- createPrompt: ({ run }) => `
241
- Evaluate this content on a scale of 1-5 for:
242
- - Clarity: How clear and understandable is it?
243
- - Accuracy: How factually correct does it appear?
244
- - Completeness: How thorough is the response?
245
- - Relevance: How well does it address the input?
246
-
247
- Input: ${run.input.map(i => i.content).join(', ')}
248
- Output: ${run.output.text}
249
-
250
- Return a JSON object with numeric scores for each dimension.
251
- `,
252
- },
155
+ **Functions:** `({ run, results }) => number`
156
+
157
+ ```typescript
158
+ const glutenCheckerScorer = createScorer({...})
159
+ .preprocess(...)
160
+ .analyze(...)
161
+ .generateScore(({ results }) => {
162
+ const { isGlutenFree, confidence } = results.analyzeStepResult;
253
163
 
254
- // ... other steps
255
- });
164
+ // Return 1 for gluten-free, 0 for contains gluten
165
+ // Weight by confidence level
166
+ return isGlutenFree ? confidence : 0;
167
+ })
256
168
  ```
257
169
 
258
- #### Calculate Score Step
170
+ **Prompt Objects:** See the [`createScorer`](/reference/scorers/create-scorer) API reference for details on using prompt objects with generateScore, including required `calculateScore` function.
259
171
 
260
- This required function converts the LLM's structured analysis into a numerical score, providing deterministic scoring logic since LLMs aren't reliable for consistent numerical outputs.
172
+ **Data Flow:** The score is available to generateReason as the `score` parameter
261
173
 
262
- - **Configuration:** `calculateScore` function that receives `{ run }` and returns a number
263
- - **Data flow:** Converts the analyze step's structured output into a numerical score (0-1 range)
174
+ #### generateReason Step (Optional)
264
175
 
265
- ```typescript filename="src/mastra/scorers/quality-scorer.ts" showLineNumbers copy
266
- export const qualityScorer = createLLMScorer({
267
- // ... previous steps
268
-
269
- calculateScore: ({ run }) => {
270
- const { clarity, accuracy, completeness, relevance } = run.analyzeStepResult;
271
-
272
- // Calculate weighted average (scale of 1-5 to 0-1)
273
- const weights = { clarity: 0.3, accuracy: 0.3, completeness: 0.2, relevance: 0.2 };
274
- const weightedSum = (clarity * weights.clarity) +
275
- (accuracy * weights.accuracy) +
276
- (completeness * weights.completeness) +
277
- (relevance * weights.relevance);
278
-
279
- // Convert from 1-5 scale to 0-1 scale
280
- return (weightedSum - 1) / 4;
281
- },
282
-
283
- // ... other steps
284
- });
285
- ```
176
+ Generates human-readable explanations for the score, useful for debugging, transparency, or user feedback.
286
177
 
287
- #### Reason Step
178
+ **Functions:** `({ run, results, score }) => string`
288
179
 
289
- This optional step uses an LLM to generate human-readable explanations for scores, useful for actionable feedback, debugging transparency, or compliance documentation.
290
-
291
- - **Configuration:** `{ description, createPrompt }`
292
- - **Data flow:** Receives all previous step results and score, returns a string explanation
293
-
294
- ```typescript filename="src/mastra/scorers/quality-scorer.ts" showLineNumbers copy
295
- export const qualityScorer = createLLMScorer({
296
- // ... previous steps
180
+ ```typescript
181
+ const glutenCheckerScorer = createScorer({...})
182
+ .preprocess(...)
183
+ .analyze(...)
184
+ .generateScore(...)
185
+ .generateReason(({ results, score }) => {
186
+ const { isGlutenFree, glutenSources } = results.analyzeStepResult;
297
187
 
298
- reason: {
299
- createPrompt: ({ run }) => {
300
- const { clarity, accuracy, completeness, relevance } = run.analyzeStepResult;
301
- const percentage = Math.round(run.score * 100);
302
-
303
- return `
304
- The content received a ${percentage}% quality score based on:
305
- - Clarity: ${clarity}/5
306
- - Accuracy: ${accuracy}/5
307
- - Completeness: ${completeness}/5
308
- - Relevance: ${relevance}/5
309
-
310
- Provide a brief explanation of what contributed to this score.
311
- `;
312
- },
313
- },
314
- });
188
+ if (isGlutenFree) {
189
+ return `Score: ${score}. This recipe is gluten-free with no harmful ingredients detected.`;
190
+ } else {
191
+ return `Score: ${score}. Contains gluten from: ${glutenSources.join(', ')}`;
192
+ }
193
+ })
194
+ ```
195
+
196
+ **Prompt Objects:** Use `description` and `createPrompt` for LLM-generated explanations.
197
+
198
+ ```typescript
199
+ const glutenCheckerScorer = createScorer({...})
200
+ .preprocess(...)
201
+ .analyze(...)
202
+ .generateScore(...)
203
+ .generateReason({
204
+ description: 'Explain the gluten assessment',
205
+ createPrompt: ({ results, score }) => `
206
+ Explain why this recipe received a score of ${score}.
207
+ Analysis: ${JSON.stringify(results.analyzeStepResult)}
208
+
209
+ Provide a clear explanation for someone with dietary restrictions.
210
+ `
211
+ })
315
212
  ```
316
213
 
317
214
  **Examples and Resources:**
318
- - [Custom LLM Judge Scorer Example](/examples/scorers/custom-llm-judge-eval) - Example Walkthrough with gluten checker
319
- - [Built-in LLM Scorers](https://github.com/mastra-ai/mastra/tree/main/packages/evals/src/scorers/llm) - Real implementations for reference
215
+ - [Custom Scorer Example](/examples/scorers/custom-scorer) - Complete walkthrough
216
+ - [createScorer API Reference](/reference/scorers/create-scorer) - Complete technical documentation
217
+ - [Built-in Scorers Source Code](https://github.com/mastra-ai/mastra/tree/main/packages/evals/src/scorers) - Real implementations for reference
@@ -11,28 +11,36 @@ description: Overview of scorers in Mastra, detailing their capabilities for eva
11
11
 
12
12
  ## Evaluation pipeline
13
13
 
14
- Mastra scorers follow an optional three-step pipeline that allows for evaluation workflows:
14
+ Mastra scorers follow a flexible four-step pipeline that allows for simple to complex evaluation workflows:
15
15
 
16
- 1. **Extract** (Optional): Identify and isolate relevant content for focused evaluation
17
- 2. **Analyze** (Required): Perform the core evaluation and generate a score
18
- 3. **Reason** (Optional): Provide explanations or justifications for the score
16
+ 1. **preprocess** (Optional): Prepare or transform input/output data for evaluation
17
+ 2. **analyze** (Optional): Perform evaluation analysis and gather insights
18
+ 3. **generateScore** (Required): Convert analysis into a numerical score
19
+ 4. **generateReason** (Optional): Generate explanations or justifications for the score
19
20
 
20
21
  This modular structure enables both simple single-step evaluations and complex multi-stage analysis workflows, allowing you to build evaluations that match your specific needs.
21
22
 
22
23
  ### When to use each step
23
24
 
24
- **Extract step** - Use when your content is complex or needs preprocessing:
25
- - Separating facts from opinions in mixed responses
26
- - Focusing evaluation on specific sections of long outputs
25
+ **preprocess step** - Use when your content is complex or needs preprocessing:
26
+ - Extracting specific elements from complex data structures
27
+ - Cleaning or normalizing text before analysis
27
28
  - Parsing multiple claims that need individual evaluation
28
- - Example: Bias detection that first identifies opinion statements
29
+ - Filtering content to focus evaluation on relevant sections
29
30
 
30
- **Analyze step** - Always required for core evaluation:
31
- - Straightforward scenarios: Direct scoring of input/output pairs
32
- - Complex scenarios: Evaluate preprocessed content and generate detailed results
33
- - Applies your scoring criteria and calculates the numerical score
31
+ **analyze step** - Use when you need structured evaluation analysis:
32
+ - Gathering insights that inform the scoring decision
33
+ - Breaking down complex evaluation criteria into components
34
+ - Performing detailed analysis that generateScore will use
35
+ - Collecting evidence or reasoning data for transparency
34
36
 
35
- **Reason step** - Use when explanations are important:
37
+ **generateScore step** - Always required for converting analysis to scores:
38
+ - Simple scenarios: Direct scoring of input/output pairs
39
+ - Complex scenarios: Converting detailed analysis results into numerical scores
40
+ - Applying business logic and weighting to analysis results
41
+ - The only step that produces the final numerical score
42
+
43
+ **generateReason step** - Use when explanations are important:
36
44
  - Users need to understand why a score was assigned
37
45
  - Debugging and transparency are critical
38
46
  - Compliance or auditing requires explanations
@@ -44,7 +44,7 @@ The Playground lets you interact with your agents, workflows, and tools. It prov
44
44
  Quickly test and debug your agents during development using the interactive chat interface in the Agent Playground.
45
45
 
46
46
  <VideoPlayer
47
- src="https://res.cloudinary.com/dygi6femd/video/upload/v1751406022/local-dev-agents-playground_100_m3begx.mp4"
47
+ src="https://res.cloudinary.com/mastra-assets/video/upload/v1751406022/local-dev-agents-playground_100_m3begx.mp4"
48
48
  />
49
49
 
50
50
  Key features:
@@ -60,7 +60,7 @@ Key features:
60
60
  Validate workflows by supplying defined inputs and visualizing each step within the Workflow Playground.
61
61
 
62
62
  <VideoPlayer
63
- src="https://res.cloudinary.com/dygi6femd/video/upload/v1751406027/local-dev-workflows-playground_100_rbc466.mp4"
63
+ src="https://res.cloudinary.com/mastra-assets/video/upload/v1751406027/local-dev-workflows-playground_100_rbc466.mp4"
64
64
  />
65
65
 
66
66
  Key features:
@@ -76,7 +76,7 @@ Key features:
76
76
  Quickly test and debug custom tools in isolation using the Tools Playground, without running a full agent or workflow.
77
77
 
78
78
  <VideoPlayer
79
- src="https://res.cloudinary.com/dygi6femd/video/upload/v1751406316/local-dev-agents-tools_100_fe1jdt.mp4"
79
+ src="https://res.cloudinary.com/mastra-assets/video/upload/v1751406316/local-dev-agents-tools_100_fe1jdt.mp4"
80
80
  />
81
81
 
82
82
  Key features:
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mastra/mcp-docs-server",
3
- "version": "0.13.10",
3
+ "version": "0.13.11-alpha.0",
4
4
  "description": "MCP server for accessing Mastra.ai documentation, changelogs, and news.",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -32,7 +32,7 @@
32
32
  "uuid": "^11.1.0",
33
33
  "zod": "^3.25.67",
34
34
  "zod-to-json-schema": "^3.24.5",
35
- "@mastra/core": "0.13.1",
35
+ "@mastra/core": "0.13.2-alpha.0",
36
36
  "@mastra/mcp": "^0.10.10"
37
37
  },
38
38
  "devDependencies": {
@@ -49,7 +49,7 @@
49
49
  "typescript": "^5.8.3",
50
50
  "vitest": "^3.2.4",
51
51
  "@internal/lint": "0.0.28",
52
- "@mastra/core": "0.13.1"
52
+ "@mastra/core": "0.13.2-alpha.0"
53
53
  },
54
54
  "scripts": {
55
55
  "prepare-docs": "cross-env PREPARE=true node dist/prepare-docs/prepare.js",