@mastra/mcp-docs-server 0.13.7-alpha.0 → 0.13.7-alpha.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. package/.docs/organized/changelogs/%40mastra%2Fclient-js.md +39 -39
  2. package/.docs/organized/changelogs/%40mastra%2Fcloudflare-d1.md +18 -18
  3. package/.docs/organized/changelogs/%40mastra%2Fcloudflare.md +18 -18
  4. package/.docs/organized/changelogs/%40mastra%2Fcore.md +45 -45
  5. package/.docs/organized/changelogs/%40mastra%2Fdeployer-cloudflare.md +21 -21
  6. package/.docs/organized/changelogs/%40mastra%2Fdeployer.md +44 -44
  7. package/.docs/organized/changelogs/%40mastra%2Fevals.md +11 -11
  8. package/.docs/organized/changelogs/%40mastra%2Flibsql.md +29 -29
  9. package/.docs/organized/changelogs/%40mastra%2Fmcp-docs-server.md +25 -25
  10. package/.docs/organized/changelogs/%40mastra%2Fmemory.md +39 -39
  11. package/.docs/organized/changelogs/%40mastra%2Fmongodb.md +20 -20
  12. package/.docs/organized/changelogs/%40mastra%2Fmssql.md +17 -0
  13. package/.docs/organized/changelogs/%40mastra%2Fpg.md +29 -29
  14. package/.docs/organized/changelogs/%40mastra%2Fplayground-ui.md +12 -12
  15. package/.docs/organized/changelogs/%40mastra%2Fserver.md +38 -38
  16. package/.docs/organized/changelogs/%40mastra%2Fupstash.md +29 -29
  17. package/.docs/organized/changelogs/%40mastra%2Fvectorize.md +18 -18
  18. package/.docs/organized/changelogs/%40mastra%2Fvoice-cloudflare.md +18 -18
  19. package/.docs/organized/changelogs/create-mastra.md +7 -7
  20. package/.docs/organized/changelogs/mastra.md +32 -32
  21. package/.docs/organized/code-examples/agent.md +93 -3
  22. package/.docs/organized/code-examples/ai-sdk-v5.md +4 -4
  23. package/.docs/raw/agents/input-processors.mdx +268 -0
  24. package/.docs/raw/agents/using-tools-and-mcp.mdx +39 -0
  25. package/.docs/raw/community/contributing-templates.mdx +192 -0
  26. package/.docs/raw/getting-started/installation.mdx +16 -0
  27. package/.docs/raw/getting-started/templates.mdx +95 -0
  28. package/.docs/raw/observability/tracing.mdx +44 -0
  29. package/.docs/raw/reference/agents/agent.mdx +7 -0
  30. package/.docs/raw/reference/agents/generate.mdx +18 -1
  31. package/.docs/raw/reference/agents/stream.mdx +18 -1
  32. package/.docs/raw/reference/cli/dev.mdx +6 -0
  33. package/.docs/raw/reference/client-js/memory.mdx +18 -0
  34. package/.docs/raw/reference/core/mastra-class.mdx +1 -1
  35. package/.docs/raw/reference/memory/Memory.mdx +1 -0
  36. package/.docs/raw/reference/memory/deleteMessages.mdx +95 -0
  37. package/.docs/raw/reference/memory/getThreadsByResourceId.mdx +33 -1
  38. package/.docs/raw/reference/rag/upstash.mdx +112 -5
  39. package/.docs/raw/reference/scorers/answer-relevancy.mdx +114 -0
  40. package/.docs/raw/reference/scorers/bias.mdx +127 -0
  41. package/.docs/raw/reference/scorers/completeness.mdx +89 -0
  42. package/.docs/raw/reference/scorers/content-similarity.mdx +96 -0
  43. package/.docs/raw/reference/scorers/custom-code-scorer.mdx +155 -0
  44. package/.docs/raw/reference/scorers/faithfulness.mdx +122 -0
  45. package/.docs/raw/reference/scorers/hallucination.mdx +133 -0
  46. package/.docs/raw/reference/scorers/keyword-coverage.mdx +92 -0
  47. package/.docs/raw/reference/scorers/llm-scorer.mdx +210 -0
  48. package/.docs/raw/reference/scorers/mastra-scorer.mdx +218 -0
  49. package/.docs/raw/reference/scorers/textual-difference.mdx +76 -0
  50. package/.docs/raw/reference/scorers/tone-consistency.mdx +75 -0
  51. package/.docs/raw/reference/scorers/toxicity.mdx +109 -0
  52. package/.docs/raw/reference/storage/libsql.mdx +7 -4
  53. package/.docs/raw/reference/storage/mssql.mdx +7 -3
  54. package/.docs/raw/reference/storage/postgresql.mdx +7 -3
  55. package/.docs/raw/reference/templates.mdx +228 -0
  56. package/.docs/raw/scorers/custom-scorers.mdx +319 -0
  57. package/.docs/raw/scorers/off-the-shelf-scorers.mdx +30 -0
  58. package/.docs/raw/scorers/overview.mdx +124 -0
  59. package/package.json +4 -4
@@ -0,0 +1,319 @@
1
+ ## Creating scorers
2
+
3
+ Mastra provides two approaches for creating custom scorers:
4
+
5
+ **Code scorers** use programmatic logic and algorithms. They're ideal for deterministic evaluations, performance-critical scenarios, and cases where you have clear algorithmic criteria.
6
+
7
+ **LLM scorers** use language models as judges. They're perfect for subjective evaluations, complex criteria that are difficult to code algorithmically, and cases where human-like judgment is needed.
8
+
9
+ ### Code-based scorers
10
+
11
+ Code scorers use `createScorer` to build evaluation logic with programmatic algorithms. They're ideal for deterministic evaluations, performance-critical scenarios, and cases where you have clear algorithmic criteria or need integration with existing libraries.
12
+
13
+ Code scorers follow Mastra's three-step evaluation pipeline:
14
+ - an optional **extract** step for preprocessing complex data
15
+ - a required **analyze** step for core evaluation and scoring
16
+ - and an optional **reason** step for generating explanations.
17
+
18
+ For the complete API reference, see [`createScorer`](/reference/scorers/custom-code-scorer), and for a detailed explanation of the pipeline, see [evaluation process](/docs/scorers/overview#evaluation-pipeline).
19
+
20
+ #### Extract Step
21
+
22
+ This optional step preprocesses input/output data when you need to evaluate multiple distinct elements, filter content, or focus analysis on specific parts of complex data.
23
+
24
+ - **Receives:**
25
+ - `input`: User messages (when used with agents) or workflow step input (when used with workflow steps)
26
+ - `output`: Agent's response (when used with agents) or workflow step output (when used with workflow steps)
27
+ - `runtimeContext`: Runtime context from the agent or workflow step being evaluated
28
+ - **Must return:** `{ results: any }`
29
+ - **Data flow:** The `results` value is passed to the analyze step as `extractStepResult`
30
+
31
+ ```typescript filename="src/mastra/scorers/keyword-coverage-scorer.ts" showLineNumbers copy
32
+ import { createScorer } from "@mastra/core/scores";
33
+ import keywordExtractor from "keyword-extractor";
34
+
35
+ export const keywordCoverageScorer = createScorer({
36
+ name: "Keyword Coverage",
37
+ description: "Evaluates how well the output covers keywords from the input",
38
+
39
+ // Step 1: Extract keywords from input and output
40
+ extract: async ({ input, output }) => {
41
+ const inputText = input?.map(i => i.content).join(", ") || "";
42
+ const outputText = output.text;
43
+
44
+ const extractKeywords = (text: string) => {
45
+ return keywordExtractor.extract(text);
46
+ };
47
+
48
+ const inputKeywords = new Set(extractKeywords(inputText));
49
+ const outputKeywords = new Set(extractKeywords(outputText));
50
+
51
+ return {
52
+ results: {
53
+ inputKeywords,
54
+ outputKeywords,
55
+ },
56
+ };
57
+ },
58
+
59
+ // ... analyze and reason steps
60
+ });
61
+ ```
62
+
63
+ #### Analyze Step
64
+
65
+ This required step performs the core evaluation and generates the numerical score for all scorers.
66
+
67
+ - **Receives:** Everything from extract step, plus:
68
+ - `extractStepResult`: Results from the extract step (if extract step was defined)
69
+ - **Must return:** `{ score: number, results?: any }`
70
+ - **Data flow:** The `score` and optional `results` are passed to the reason step
71
+
72
+ ```typescript filename="src/mastra/scorers/keyword-coverage-scorer.ts" showLineNumbers copy
73
+ export const keywordCoverageScorer = createScorer({
74
+ // ... name, description, extract step
75
+
76
+ // Step 2: Analyze keyword coverage and calculate score
77
+ analyze: async ({ input, output, extractStepResult }) => {
78
+ const { inputKeywords, outputKeywords } = extractStepResult.results;
79
+
80
+ if (inputKeywords.size === 0) {
81
+ return { score: 1, results: { coverage: 1, matched: 0, total: 0 } };
82
+ }
83
+
84
+ const matchedKeywords = [...inputKeywords].filter(keyword =>
85
+ outputKeywords.has(keyword)
86
+ );
87
+
88
+ const coverage = matchedKeywords.length / inputKeywords.size;
89
+
90
+ return {
91
+ score: coverage,
92
+ results: {
93
+ coverage,
94
+ matched: matchedKeywords.length,
95
+ total: inputKeywords.size,
96
+ matchedKeywords,
97
+ },
98
+ };
99
+ },
100
+
101
+ // ... reason step
102
+ });
103
+ ```
104
+
105
+ #### Reason Step
106
+
107
+ This optional step generates human-readable explanations for scores, useful for actionable feedback, debugging transparency, or compliance documentation.
108
+
109
+ - **Receives:** Everything from analyze step, plus:
110
+ - `score`: The numerical score (0-1) calculated by the analyze step
111
+ - `analyzeStepResult`: Results from the analyze step (contains the score and any additional results)
112
+ - **Must return:** `{ reason: string }`
113
+
114
+ ```typescript filename="src/mastra/scorers/keyword-coverage-scorer.ts" showLineNumbers copy
115
+ export const keywordCoverageScorer = createScorer({
116
+ // ... name, description, extract and analyze steps
117
+
118
+ // Step 3: Generate explanation for the score
119
+ reason: async ({ score, analyzeStepResult, extractStepResult }) => {
120
+ const { matched, total, matchedKeywords } = analyzeStepResult.results;
121
+ const { inputKeywords } = extractStepResult.results;
122
+
123
+ const percentage = Math.round(score * 100);
124
+ const missedKeywords = [...inputKeywords].filter(
125
+ keyword => !matchedKeywords.includes(keyword)
126
+ );
127
+
128
+ let reason = `The output achieved ${percentage}% keyword coverage (${matched}/${total} keywords).`;
129
+
130
+ if (matchedKeywords.length > 0) {
131
+ reason += ` Covered keywords: ${matchedKeywords.join(", ")}.`;
132
+ }
133
+
134
+ if (missedKeywords.length > 0) {
135
+ reason += ` Missing keywords: ${missedKeywords.join(", ")}.`;
136
+ }
137
+
138
+ return { reason };
139
+ },
140
+ });
141
+ ```
142
+
143
+ **Examples and Resources:**
144
+ - [Custom Native JavaScript Scorer Example](/examples/scorers/custom-native-javascript-eval) - Example walkthrough.
145
+ - [Built-in Code Scorers](https://github.com/mastra-ai/mastra/tree/main/packages/evals/src/scorers/code) - Real implementations for reference
146
+
147
+ ### LLM-based scorers
148
+
149
+ LLM scorers use `createLLMScorer` to build evaluations that leverage language models as judges. They're perfect for subjective evaluations that require understanding context, complex criteria that are difficult to code algorithmically, natural language understanding tasks, and cases where human-like judgment is needed.
150
+
151
+ LLM scorers follow the same evaluation pipeline as code scorers with an additional `calculateScore` function:
152
+ - an optional **extract** step where the LLM processes input/output and returns structured data
153
+ - a required **analyze** step where the LLM performs evaluation and returns structured analysis
154
+ - a required **calculateScore** function that converts LLM analysis into numerical score
155
+ - and an optional **reason** step where the LLM generates human-readable explanations
156
+
157
+ The `calculateScore` function leverages the best of both approaches: LLMs excel at qualitative analysis and understanding, while deterministic functions ensure precise and consistent numerical scoring.
158
+
159
+ For the complete API reference, see [`createLLMScorer`](/reference/scorers/llm-scorer), and for a detailed explanation of the pipeline, see [evaluation process](/docs/scorers/overview#evaluation-pipeline).
160
+
161
+ #### Judge Configuration
162
+
163
+ All LLM scorer steps share this required configuration that defines the model and system instructions.
164
+
165
+ - **Configuration:** `judge` object containing:
166
+ - **model:** The LLM model instance for evaluation
167
+ - **instructions:** System prompt that guides the LLM's behavior
168
+
169
+ ```typescript filename="src/mastra/scorers/tone-scorer.ts" showLineNumbers copy
170
+ import { openai } from "@ai-sdk/openai";
171
+ import { createLLMScorer } from "@mastra/core/scores";
172
+
173
+ export const toneScorer = createLLMScorer({
174
+ name: 'Tone Scorer',
175
+ description: 'Evaluates the tone of the output',
176
+
177
+ // Shared judge configuration
178
+ judge: {
179
+ model: openai('gpt-4o'),
180
+ instructions: 'You are an expert in analyzing text tone and communication style.',
181
+ },
182
+
183
+ // ... other steps
184
+ });
185
+ ```
186
+
187
+ #### Extract Step
188
+
189
+ This optional step uses an LLM to preprocess input/output data when you need to evaluate multiple distinct elements, filter content, or focus analysis on specific parts of complex data.
190
+
191
+ - **Configuration:** `{ description, outputSchema, createPrompt }`
192
+ - **Data flow:** The structured output (defined by outputSchema) is passed to the analyze step as `extractStepResult`
193
+
194
+ ```typescript filename="src/mastra/scorers/content-scorer.ts" showLineNumbers copy
195
+ export const contentScorer = createLLMScorer({
196
+ // ... judge configuration
197
+
198
+ extract: {
199
+ description: 'Extract key themes and topics from the content',
200
+ outputSchema: z.object({
201
+ themes: z.array(z.string()),
202
+ topics: z.array(z.string()),
203
+ keyPhrases: z.array(z.string())
204
+ }),
205
+ createPrompt: ({ run }) => `
206
+ Analyze this content and extract:
207
+ 1. Main themes (3-5 high-level concepts)
208
+ 2. Specific topics mentioned
209
+ 3. Key phrases that capture the essence
210
+
211
+ Content: ${run.output.text}
212
+
213
+ Return a JSON object with themes, topics, and keyPhrases arrays.
214
+ `,
215
+ },
216
+
217
+ // ... other steps
218
+ });
219
+ ```
220
+
221
+ #### Analyze Step
222
+
223
+ This required step uses an LLM to perform the core evaluation and return structured analysis that will be converted to a numerical score.
224
+
225
+ - **Configuration:** `{ description, outputSchema, createPrompt }`
226
+ - **Data flow:** The structured output is passed to the calculateScore function and then to the reason step
227
+
228
+ ```typescript filename="src/mastra/scorers/quality-scorer.ts" showLineNumbers copy
229
+ export const qualityScorer = createLLMScorer({
230
+ // ... judge configuration
231
+
232
+ analyze: {
233
+ description: 'Evaluate content quality across multiple dimensions',
234
+ outputSchema: z.object({
235
+ clarity: z.number().min(1).max(5),
236
+ accuracy: z.number().min(1).max(5),
237
+ completeness: z.number().min(1).max(5),
238
+ relevance: z.number().min(1).max(5)
239
+ }),
240
+ createPrompt: ({ run }) => `
241
+ Evaluate this content on a scale of 1-5 for:
242
+ - Clarity: How clear and understandable is it?
243
+ - Accuracy: How factually correct does it appear?
244
+ - Completeness: How thorough is the response?
245
+ - Relevance: How well does it address the input?
246
+
247
+ Input: ${run.input.map(i => i.content).join(', ')}
248
+ Output: ${run.output.text}
249
+
250
+ Return a JSON object with numeric scores for each dimension.
251
+ `,
252
+ },
253
+
254
+ // ... other steps
255
+ });
256
+ ```
257
+
258
+ #### Calculate Score Step
259
+
260
+ This required function converts the LLM's structured analysis into a numerical score, providing deterministic scoring logic since LLMs aren't reliable for consistent numerical outputs.
261
+
262
+ - **Configuration:** `calculateScore` function that receives `{ run }` and returns a number
263
+ - **Data flow:** Converts the analyze step's structured output into a numerical score (0-1 range)
264
+
265
+ ```typescript filename="src/mastra/scorers/quality-scorer.ts" showLineNumbers copy
266
+ export const qualityScorer = createLLMScorer({
267
+ // ... previous steps
268
+
269
+ calculateScore: ({ run }) => {
270
+ const { clarity, accuracy, completeness, relevance } = run.analyzeStepResult;
271
+
272
+ // Calculate weighted average (scale of 1-5 to 0-1)
273
+ const weights = { clarity: 0.3, accuracy: 0.3, completeness: 0.2, relevance: 0.2 };
274
+ const weightedSum = (clarity * weights.clarity) +
275
+ (accuracy * weights.accuracy) +
276
+ (completeness * weights.completeness) +
277
+ (relevance * weights.relevance);
278
+
279
+ // Convert from 1-5 scale to 0-1 scale
280
+ return (weightedSum - 1) / 4;
281
+ },
282
+
283
+ // ... other steps
284
+ });
285
+ ```
286
+
287
+ #### Reason Step
288
+
289
+ This optional step uses an LLM to generate human-readable explanations for scores, useful for actionable feedback, debugging transparency, or compliance documentation.
290
+
291
+ - **Configuration:** `{ description, createPrompt }`
292
+ - **Data flow:** Receives all previous step results and score, returns a string explanation
293
+
294
+ ```typescript filename="src/mastra/scorers/quality-scorer.ts" showLineNumbers copy
295
+ export const qualityScorer = createLLMScorer({
296
+ // ... previous steps
297
+
298
+ reason: {
299
+ createPrompt: ({ run }) => {
300
+ const { clarity, accuracy, completeness, relevance } = run.analyzeStepResult;
301
+ const percentage = Math.round(run.score * 100);
302
+
303
+ return `
304
+ The content received a ${percentage}% quality score based on:
305
+ - Clarity: ${clarity}/5
306
+ - Accuracy: ${accuracy}/5
307
+ - Completeness: ${completeness}/5
308
+ - Relevance: ${relevance}/5
309
+
310
+ Provide a brief explanation of what contributed to this score.
311
+ `;
312
+ },
313
+ },
314
+ });
315
+ ```
316
+
317
+ **Examples and Resources:**
318
+ - [Custom LLM Judge Scorer Example](/examples/scorers/custom-llm-judge-eval) - Example Walkthrough with gluten checker
319
+ - [Built-in LLM Scorers](https://github.com/mastra-ai/mastra/tree/main/packages/evals/src/scorers/llm) - Real implementations for reference
@@ -0,0 +1,30 @@
1
+ ---
2
+ title: "Built-in Scorers"
3
+ description: "Overview of Mastra's ready-to-use scorers for evaluating AI outputs across quality, safety, and performance dimensions."
4
+ ---
5
+
6
+ # Built-in Scorers
7
+
8
+ Mastra provides a comprehensive set of built-in scorers for evaluating AI outputs. These scorers are optimized for common evaluation scenarios and are ready to use in your agents and workflows.
9
+
10
+ ## Available Scorers
11
+
12
+ ### Accuracy and Reliability
13
+
14
+ These scorers evaluate how correct, truthful, and complete your agent's answers are:
15
+
16
+ - [`answer-relevancy`](/reference/scorers/answer-relevancy): Evaluates how well responses address the input query (`0-1`, higher is better)
17
+ - [`faithfulness`](/reference/scorers/faithfulness): Measures how accurately responses represent provided context (`0-1`, higher is better)
18
+ - [`hallucination`](/reference/scorers/hallucination): Detects factual contradictions and unsupported claims (`0-1`, lower is better)
19
+ - [`completeness`](/reference/scorers/completeness): Checks if responses include all necessary information (`0-1`, higher is better)
20
+ - [`content-similarity`](/reference/scorers/content-similarity): Measures textual similarity using character-level matching (`0-1`, higher is better)
21
+ - [`textual-difference`](/reference/scorers/textual-difference): Measures textual differences between strings (`0-1`, higher means more similar)
22
+
23
+ ### Output Quality
24
+
25
+ These scorers evaluate adherence to format, style, and safety requirements:
26
+
27
+ - [`tone-consistency`](/reference/scorers/tone-consistency): Measures consistency in formality, complexity, and style (`0-1`, higher is better)
28
+ - [`toxicity`](/reference/scorers/toxicity): Detects harmful or inappropriate content (`0-1`, lower is better)
29
+ - [`bias`](/reference/scorers/bias): Detects potential biases in the output (`0-1`, lower is better)
30
+ - [`keyword-coverage`](/reference/scorers/keyword-coverage): Assesses technical terminology usage (`0-1`, higher is better)
@@ -0,0 +1,124 @@
1
+ ---
2
+ title: "Overview"
3
+ description: Overview of scorers in Mastra, detailing their capabilities for evaluating AI outputs and measuring performance.
4
+ ---
5
+
6
+ # Scorers overview
7
+
8
+ **Scorers** are evaluation tools that measure the quality, accuracy, or performance of AI-generated outputs. Scorers provide an automated way to assess whether your agents, workflows, or language models are producing the desired results by analyzing their responses against specific criteria.
9
+
10
+ **Scores** are numerical values (typically between 0 and 1) that quantify how well an output meets your evaluation criteria. These scores enable you to objectively track performance, compare different approaches, and identify areas for improvement in your AI systems.
11
+
12
+ ## Evaluation pipeline
13
+
14
+ Mastra scorers follow an optional three-step pipeline that allows for evaluation workflows:
15
+
16
+ 1. **Extract** (Optional): Identify and isolate relevant content for focused evaluation
17
+ 2. **Analyze** (Required): Perform the core evaluation and generate a score
18
+ 3. **Reason** (Optional): Provide explanations or justifications for the score
19
+
20
+ This modular structure enables both simple single-step evaluations and complex multi-stage analysis workflows, allowing you to build evaluations that match your specific needs.
21
+
22
+ ### When to use each step
23
+
24
+ **Extract step** - Use when your content is complex or needs preprocessing:
25
+ - Separating facts from opinions in mixed responses
26
+ - Focusing evaluation on specific sections of long outputs
27
+ - Parsing multiple claims that need individual evaluation
28
+ - Example: Bias detection that first identifies opinion statements
29
+
30
+ **Analyze step** - Always required for core evaluation:
31
+ - Straightforward scenarios: Direct scoring of input/output pairs
32
+ - Complex scenarios: Evaluate preprocessed content and generate detailed results
33
+ - Applies your scoring criteria and calculates the numerical score
34
+
35
+ **Reason step** - Use when explanations are important:
36
+ - Users need to understand why a score was assigned
37
+ - Debugging and transparency are critical
38
+ - Compliance or auditing requires explanations
39
+ - Providing actionable feedback for improvement
40
+
41
+ To learn how to create your own Scorers, see [Creating Custom Scorers](/docs/scorers/custom-scorers).
42
+
43
+ ## Live evaluations
44
+
45
+ **Live evaluations** allow you to automatically score AI outputs in real-time as your agents and workflows operate. Instead of running evaluations manually or in batches, scorers run asynchronously alongside your AI systems, providing continuous quality monitoring.
46
+
47
+ ### Adding scorers to agents
48
+
49
+ You can add built-in scorers to your agents to automatically evaluate their outputs. See the [full list of built-in scorers](/docs/scorers/off-the-shelf-scorers) for all available options.
50
+
51
+ ```typescript filename="src/mastra/agents/evaluated-agent.ts" showLineNumbers copy
52
+ import { Agent } from "@mastra/core/agent";
53
+ import { openai } from "@ai-sdk/openai";
54
+ import {
55
+ createAnswerRelevancyScorer,
56
+ createToxicityScorer
57
+ } from "@mastra/evals/scorers/llm";
58
+
59
+ export const evaluatedAgent = new Agent({
60
+ // ...
61
+ scorers: {
62
+ relevancy: {
63
+ scorer: createAnswerRelevancyScorer({ model: openai("gpt-4o-mini") }),
64
+ sampling: { type: "ratio", rate: 0.5 }
65
+ },
66
+ safety: {
67
+ scorer: createToxicityScorer({ model: openai("gpt-4o-mini") }),
68
+ sampling: { type: "ratio", rate: 1 }
69
+ }
70
+ }
71
+ });
72
+ ```
73
+
74
+ ### Adding scorers to workflow steps
75
+
76
+ You can also add scorers to individual workflow steps to evaluate outputs at specific points in your process:
77
+
78
+ ```typescript filename="src/mastra/workflows/content-generation.ts" showLineNumbers copy
79
+ import { createWorkflow, createStep } from "@mastra/core/workflows";
80
+ import { z } from "zod";
81
+ import { customStepScorer } from "../scorers/custom-step-scorer";
82
+
83
+ const contentStep = createStep({
84
+ // ...
85
+ scorers: {
86
+ customStepScorer: {
87
+ scorer: customStepScorer(),
88
+ sampling: {
89
+ type: "ratio",
90
+ rate: 1, // Score every step execution
91
+ }
92
+ }
93
+ },
94
+ });
95
+
96
+ export const contentWorkflow = createWorkflow({ ... })
97
+ .then(contentStep)
98
+ .commit();
99
+ ```
100
+
101
+ ### How live evaluations work
102
+
103
+ **Asynchronous execution**: Live evaluations run in the background without blocking your agent responses or workflow execution. This ensures your AI systems maintain their performance while still being monitored.
104
+
105
+ **Sampling control**: The `sampling.rate` parameter (0-1) controls what percentage of outputs get scored:
106
+ - `1.0`: Score every single response (100%)
107
+ - `0.5`: Score half of all responses (50%)
108
+ - `0.1`: Score 10% of responses
109
+ - `0.0`: Disable scoring
110
+
111
+ **Automatic storage**: All scoring results are automatically stored in the `mastra_scorers` table in your configured database, allowing you to analyze performance trends over time.
112
+
113
+ ## Testing scorers locally
114
+
115
+ Mastra provides a CLI command `mastra dev` to test your scorers. The playground includes a scorers section where you can run individual scorers against test inputs and view detailed results.
116
+
117
+ For more details, see the [Local Dev Playground](/docs/server-db/local-dev-playground) docs.
118
+
119
+ ## Next steps
120
+
121
+ - Learn how to create your own scorers in the [Creating Custom Scorers](/docs/scorers/custom-scorers) guide
122
+ - Explore built-in scorers in the [Off-the-shelf Scorers](/docs/scorers/off-the-shelf-scorers) section
123
+ - Test scorers with the [Local Dev Playground](/docs/server-db/local-dev-playground)
124
+ - See example scorers in the [Examples Overview](/examples) section
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mastra/mcp-docs-server",
3
- "version": "0.13.7-alpha.0",
3
+ "version": "0.13.7-alpha.2",
4
4
  "description": "MCP server for accessing Mastra.ai documentation, changelogs, and news.",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -32,7 +32,7 @@
32
32
  "uuid": "^11.1.0",
33
33
  "zod": "^3.25.67",
34
34
  "zod-to-json-schema": "^3.24.5",
35
- "@mastra/core": "0.12.0-alpha.0",
35
+ "@mastra/core": "0.12.0-alpha.2",
36
36
  "@mastra/mcp": "^0.10.7"
37
37
  },
38
38
  "devDependencies": {
@@ -43,13 +43,13 @@
43
43
  "@wong2/mcp-cli": "^1.10.0",
44
44
  "cross-env": "^7.0.3",
45
45
  "eslint": "^9.30.1",
46
- "hono": "^4.8.4",
46
+ "hono": "^4.8.9",
47
47
  "tsup": "^8.5.0",
48
48
  "tsx": "^4.19.4",
49
49
  "typescript": "^5.8.3",
50
50
  "vitest": "^3.2.4",
51
51
  "@internal/lint": "0.0.23",
52
- "@mastra/core": "0.12.0-alpha.0"
52
+ "@mastra/core": "0.12.0-alpha.2"
53
53
  },
54
54
  "scripts": {
55
55
  "prepare-docs": "cross-env PREPARE=true node dist/prepare-docs/prepare.js",