@mastra/mcp-docs-server 0.13.30-alpha.0 → 0.13.30-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. package/.docs/organized/changelogs/%40mastra%2Fagent-builder.md +9 -9
  2. package/.docs/organized/changelogs/%40mastra%2Fai-sdk.md +15 -0
  3. package/.docs/organized/changelogs/%40mastra%2Fclient-js.md +8 -8
  4. package/.docs/organized/changelogs/%40mastra%2Fcore.md +29 -29
  5. package/.docs/organized/changelogs/%40mastra%2Fdeployer-cloud.md +9 -9
  6. package/.docs/organized/changelogs/%40mastra%2Fdeployer.md +16 -16
  7. package/.docs/organized/changelogs/%40mastra%2Fmcp-docs-server.md +8 -8
  8. package/.docs/organized/changelogs/%40mastra%2Fmemory.md +16 -16
  9. package/.docs/organized/changelogs/%40mastra%2Fpg.md +16 -16
  10. package/.docs/organized/changelogs/%40mastra%2Fplayground-ui.md +22 -22
  11. package/.docs/organized/changelogs/%40mastra%2Freact.md +13 -0
  12. package/.docs/organized/changelogs/%40mastra%2Fserver.md +8 -8
  13. package/.docs/organized/changelogs/create-mastra.md +19 -19
  14. package/.docs/organized/changelogs/mastra.md +27 -27
  15. package/.docs/organized/code-examples/agent.md +0 -1
  16. package/.docs/organized/code-examples/agui.md +2 -2
  17. package/.docs/organized/code-examples/client-side-tools.md +2 -2
  18. package/.docs/raw/agents/adding-voice.mdx +118 -25
  19. package/.docs/raw/agents/agent-memory.mdx +73 -89
  20. package/.docs/raw/agents/guardrails.mdx +1 -1
  21. package/.docs/raw/agents/overview.mdx +39 -7
  22. package/.docs/raw/agents/using-tools.mdx +95 -0
  23. package/.docs/raw/deployment/overview.mdx +9 -11
  24. package/.docs/raw/frameworks/agentic-uis/ai-sdk.mdx +1 -1
  25. package/.docs/raw/frameworks/servers/express.mdx +2 -2
  26. package/.docs/raw/getting-started/installation.mdx +34 -85
  27. package/.docs/raw/getting-started/mcp-docs-server.mdx +13 -1
  28. package/.docs/raw/index.mdx +49 -14
  29. package/.docs/raw/observability/ai-tracing/exporters/otel.mdx +3 -0
  30. package/.docs/raw/reference/observability/ai-tracing/exporters/otel.mdx +6 -0
  31. package/.docs/raw/reference/scorers/answer-relevancy.mdx +105 -7
  32. package/.docs/raw/reference/scorers/answer-similarity.mdx +266 -16
  33. package/.docs/raw/reference/scorers/bias.mdx +107 -6
  34. package/.docs/raw/reference/scorers/completeness.mdx +131 -8
  35. package/.docs/raw/reference/scorers/content-similarity.mdx +107 -8
  36. package/.docs/raw/reference/scorers/context-precision.mdx +234 -18
  37. package/.docs/raw/reference/scorers/context-relevance.mdx +418 -35
  38. package/.docs/raw/reference/scorers/faithfulness.mdx +122 -8
  39. package/.docs/raw/reference/scorers/hallucination.mdx +125 -8
  40. package/.docs/raw/reference/scorers/keyword-coverage.mdx +141 -9
  41. package/.docs/raw/reference/scorers/noise-sensitivity.mdx +478 -6
  42. package/.docs/raw/reference/scorers/prompt-alignment.mdx +351 -102
  43. package/.docs/raw/reference/scorers/textual-difference.mdx +134 -6
  44. package/.docs/raw/reference/scorers/tone-consistency.mdx +133 -0
  45. package/.docs/raw/reference/scorers/tool-call-accuracy.mdx +422 -65
  46. package/.docs/raw/reference/scorers/toxicity.mdx +125 -7
  47. package/.docs/raw/reference/workflows/workflow.mdx +33 -0
  48. package/.docs/raw/scorers/custom-scorers.mdx +244 -3
  49. package/.docs/raw/scorers/overview.mdx +8 -38
  50. package/.docs/raw/server-db/middleware.mdx +5 -2
  51. package/.docs/raw/server-db/runtime-context.mdx +178 -0
  52. package/.docs/raw/streaming/workflow-streaming.mdx +5 -1
  53. package/.docs/raw/tools-mcp/overview.mdx +25 -7
  54. package/.docs/raw/workflows/overview.mdx +28 -1
  55. package/CHANGELOG.md +7 -0
  56. package/package.json +4 -4
  57. package/.docs/raw/agents/runtime-context.mdx +0 -106
  58. package/.docs/raw/agents/using-tools-and-mcp.mdx +0 -241
  59. package/.docs/raw/getting-started/model-providers.mdx +0 -63
  60. package/.docs/raw/tools-mcp/runtime-context.mdx +0 -63
  61. /package/.docs/raw/{evals → scorers/evals-old-api}/custom-eval.mdx +0 -0
  62. /package/.docs/raw/{evals → scorers/evals-old-api}/overview.mdx +0 -0
  63. /package/.docs/raw/{evals → scorers/evals-old-api}/running-in-ci.mdx +0 -0
  64. /package/.docs/raw/{evals → scorers/evals-old-api}/textual-evals.mdx +0 -0
  65. /package/.docs/raw/{server-db → workflows}/snapshots.mdx +0 -0
@@ -7,8 +7,6 @@ description: Documentation for the Content Similarity Scorer in Mastra, which me
7
7
 
8
8
  The `createContentSimilarityScorer()` function measures the textual similarity between two strings, providing a score that indicates how closely they match. It supports configurable options for case sensitivity and whitespace handling.
9
9
 
10
- For a usage example, see the [Content Similarity Examples](/examples/scorers/content-similarity).
11
-
12
10
  ## Parameters
13
11
 
14
12
  The `createContentSimilarityScorer()` function accepts a single options object with the following properties:
@@ -78,15 +76,116 @@ The scorer evaluates textual similarity through character-level matching and con
78
76
 
79
77
  Final score: `similarity_value * scale`
80
78
 
79
+ ## Examples
80
+
81
+ ### High similarity example
82
+
83
+ In this example, the response closely resembles the query in both structure and meaning. Minor differences in tense and phrasing do not significantly affect the overall similarity.
84
+
85
+ ```typescript filename="src/example-high-similarity.ts" showLineNumbers copy
86
+ import { createContentSimilarityScorer } from "@mastra/evals/scorers/llm";
87
+
88
+ const scorer = createContentSimilarityScorer();
89
+
90
+ const query = "The quick brown fox jumps over the lazy dog.";
91
+ const response = "A quick brown fox jumped over a lazy dog.";
92
+
93
+ const result = await scorer.run({
94
+ input: [{ role: 'user', content: query }],
95
+ output: { text: response },
96
+ });
97
+
98
+ console.log(result);
99
+ ```
100
+
101
+ #### High similarity output
102
+
103
+ The output receives a high score because the response preserves the intent and content of the query with only subtle wording changes.
104
+
105
+ ```typescript
106
+ {
107
+ score: 0.7761194029850746,
108
+ analyzeStepResult: {
109
+ similarity: 0.7761194029850746
110
+ },
111
+ }
112
+ ```
113
+
114
+ ### Moderate similarity example
115
+
116
+ In this example, the response shares some conceptual overlap with the query but diverges in structure and wording. Key elements remain present, but the phrasing introduces moderate variation.
117
+
118
+ ```typescript filename="src/example-moderate-similarity.ts" showLineNumbers copy
119
+ import { createContentSimilarityScorer } from "@mastra/evals/scorers/llm";
120
+
121
+ const scorer = createContentSimilarityScorer();
122
+
123
+ const query = "A brown fox quickly leaps across a sleeping dog.";
124
+ const response = "The quick brown fox jumps over the lazy dog.";
125
+
126
+ const result = await scorer.run({
127
+ input: [{ role: 'user', content: query }],
128
+ output: { text: response },
129
+ });
130
+
131
+ console.log(result);
132
+ ```
133
+
134
+ #### Moderate similarity output
135
+
136
+ The output receives a mid-range score because the response captures the general idea of the query, though it differs enough in wording to reduce overall similarity.
137
+
138
+ ```typescript
139
+ {
140
+ score: 0.40540540540540543,
141
+ analyzeStepResult: {
142
+ similarity: 0.40540540540540543
143
+ }
144
+ }
145
+ ```
146
+
147
+ ### Low similarity example
148
+
149
+ In this example, the response and query are unrelated in meaning, despite having a similar grammatical structure. There is little to no shared content overlap.
150
+
151
+ ```typescript filename="src/example-low-similarity.ts" showLineNumbers copy
152
+ import { createContentSimilarityScorer } from "@mastra/evals/scorers/llm";
153
+
154
+ const scorer = createContentSimilarityScorer();
155
+
156
+ const query = "The cat sleeps on the windowsill.";
157
+ const response = "The quick brown fox jumps over the lazy dog.";
158
+
159
+ const result = await scorer.run({
160
+ input: [{ role: 'user', content: query }],
161
+ output: { text: response },
162
+ });
163
+
164
+ console.log(result);
165
+ ```
166
+
167
+ #### Low similarity output
168
+
169
+ The output receives a low score because the response does not align with the content or intent of the query.
170
+
171
+ ```typescript
172
+ {
173
+ score: 0.25806451612903225,
174
+ analyzeStepResult: {
175
+ similarity: 0.25806451612903225
176
+ },
177
+ }
178
+ ```
179
+
81
180
  ### Score interpretation
82
181
 
83
- (0 to scale, default 0-1)
182
+ A similarity score between 0 and 1:
84
183
 
85
- - 1.0: Perfect match - identical texts
86
- - 0.7-0.9: High similarity - mostly matching content
87
- - 0.4-0.6: Moderate similarity - partial matches
88
- - 0.1-0.3: Low similarity - few matching patterns
89
- - 0.0: No similarity - completely different texts
184
+ - **1.0**: Perfect match content is nearly identical.
185
+ - **0.70.9**: High similarity minor differences in word choice or structure.
186
+ - **0.40.6**: Moderate similarity general overlap with noticeable variation.
187
+ - **0.10.3**: Low similarity few common elements or shared meaning.
188
+ - **0.0**: No similarity completely different content.
90
189
 
91
190
  ## Related
92
191
 
@@ -9,6 +9,22 @@ import { PropertiesTable } from "@/components/properties-table";
9
9
 
10
10
  The `createContextPrecisionScorer()` function creates a scorer that evaluates how relevant and well-positioned retrieved context pieces are for generating expected outputs. It uses **Mean Average Precision (MAP)** to reward systems that place relevant context earlier in the sequence.
11
11
 
12
+ It is especially useful for these use cases:
13
+
14
+ **RAG System Evaluation**
15
+
16
+ Ideal for evaluating retrieved context in RAG pipelines where:
17
+ - Context ordering matters for model performance
18
+ - You need to measure retrieval quality beyond simple relevance
19
+ - Early relevant context is more valuable than later relevant context
20
+
21
+ **Context Window Optimization**
22
+
23
+ Use when optimizing context selection for:
24
+ - Limited context windows
25
+ - Token budget constraints
26
+ - Multi-step reasoning tasks
27
+
12
28
  ## Parameters
13
29
 
14
30
  <PropertiesTable
@@ -48,9 +64,8 @@ The `createContextPrecisionScorer()` function creates a scorer that evaluates ho
48
64
  ]}
49
65
  />
50
66
 
51
- :::note
52
- Either `context` or `contextExtractor` must be provided. If both are provided, `contextExtractor` takes precedence.
53
- :::
67
+
68
+ **Note**: Either `context` or `contextExtractor` must be provided. If both are provided, `contextExtractor` takes precedence.
54
69
 
55
70
  ## .run() Returns
56
71
 
@@ -93,10 +108,26 @@ Where:
93
108
 
94
109
  ### Score Interpretation
95
110
 
96
- - **1.0** = Perfect precision (all relevant context appears first)
97
- - **0.5-0.9** = Good precision with some relevant context well-positioned
98
- - **0.1-0.4** = Poor precision with relevant context buried or scattered
99
- - **0.0** = No relevant context found
111
+ - **0.9-1.0**: Excellent precision - all relevant context early in sequence
112
+ - **0.7-0.8**: Good precision - most relevant context well-positioned
113
+ - **0.4-0.6**: Moderate precision - relevant context mixed with irrelevant
114
+ - **0.1-0.3**: Poor precision - little relevant context or poorly positioned
115
+ - **0.0**: No relevant context found
116
+
117
+ ### Reason analysis
118
+
119
+ The reason field explains:
120
+ - Which context pieces were deemed relevant/irrelevant
121
+ - How positioning affected the MAP calculation
122
+ - Specific relevance criteria used in evaluation
123
+
124
+ ### Optimization insights
125
+
126
+ Use results to:
127
+ - **Improve retrieval**: Filter out irrelevant context before ranking
128
+ - **Optimize ranking**: Ensure relevant context appears early
129
+ - **Tune chunk size**: Balance context detail vs. relevance precision
130
+ - **Evaluate embeddings**: Test different embedding models for better retrieval
100
131
 
101
132
  ### Example Calculation
102
133
 
@@ -109,19 +140,204 @@ Given context: `[relevant, irrelevant, relevant, irrelevant]`
109
140
 
110
141
  MAP = (1.0 + 0.67) / 2 = 0.835 ≈ **0.83**
111
142
 
112
- ## Usage Patterns
143
+ ## Scorer configuration
113
144
 
114
- ### RAG System Evaluation
115
- Ideal for evaluating retrieved context in RAG pipelines where:
116
- - Context ordering matters for model performance
117
- - You need to measure retrieval quality beyond simple relevance
118
- - Early relevant context is more valuable than later relevant context
145
+ ### Dynamic context extraction
119
146
 
120
- ### Context Window Optimization
121
- Use when optimizing context selection for:
122
- - Limited context windows
123
- - Token budget constraints
124
- - Multi-step reasoning tasks
147
+ ```typescript
148
+ const scorer = createContextPrecisionScorer({
149
+ model: openai('gpt-4o-mini'),
150
+ options: {
151
+ contextExtractor: (input, output) => {
152
+ // Extract context dynamically based on the query
153
+ const query = input?.inputMessages?.[0]?.content || '';
154
+
155
+ // Example: Retrieve from a vector database
156
+ const searchResults = vectorDB.search(query, { limit: 10 });
157
+ return searchResults.map(result => result.content);
158
+ },
159
+ scale: 1,
160
+ },
161
+ });
162
+ ```
163
+
164
+ ### Large context evaluation
165
+
166
+ ```typescript
167
+ const scorer = createContextPrecisionScorer({
168
+ model: openai('gpt-4o-mini'),
169
+ options: {
170
+ context: [
171
+ // Simulate retrieved documents from vector database
172
+ 'Document 1: Highly relevant content...',
173
+ 'Document 2: Somewhat related content...',
174
+ 'Document 3: Tangentially related...',
175
+ 'Document 4: Not relevant...',
176
+ 'Document 5: Highly relevant content...',
177
+ // ... up to dozens of context pieces
178
+ ],
179
+ },
180
+ });
181
+ ```
182
+
183
+ ## Examples
184
+
185
+ ### High precision example
186
+
187
+ This example shows perfect context precision where all relevant context appears early:
188
+
189
+ ```typescript
190
+ import { openai } from '@ai-sdk/openai';
191
+ import { createContextPrecisionScorer } from '@mastra/evals';
192
+
193
+ const scorer = createContextPrecisionScorer({
194
+ model: openai('gpt-4o-mini'),
195
+ options: {
196
+ context: [
197
+ 'Photosynthesis is the process by which plants convert sunlight, carbon dioxide, and water into glucose and oxygen.',
198
+ 'The process occurs in the chloroplasts of plant cells, specifically in the thylakoids.',
199
+ 'Light-dependent reactions happen in the thylakoid membranes, while the Calvin cycle occurs in the stroma.',
200
+ ],
201
+ scale: 1,
202
+ },
203
+ });
204
+
205
+ const result = await scorer.run({
206
+ input: {
207
+ inputMessages: [
208
+ {
209
+ id: '1',
210
+ role: 'user',
211
+ content: 'How does photosynthesis work in plants?',
212
+ },
213
+ ],
214
+ },
215
+ output: [
216
+ {
217
+ id: '2',
218
+ role: 'assistant',
219
+ content: 'Photosynthesis is the process where plants convert sunlight, CO2, and water into glucose and oxygen using chloroplasts.',
220
+ },
221
+ ],
222
+ });
223
+
224
+ console.log(result);
225
+ // Output:
226
+ // {
227
+ // score: 1.0,
228
+ // reason: "The score is 1.0 because all context pieces are highly relevant to explaining photosynthesis and are optimally ordered to support the expected output."
229
+ // }
230
+ ```
231
+
232
+ ### Mixed precision example
233
+
234
+ This example shows moderate precision with both relevant and irrelevant context:
235
+
236
+ ```typescript
237
+ import { openai } from '@ai-sdk/openai';
238
+ import { createContextPrecisionScorer } from '@mastra/evals';
239
+
240
+ const scorer = createContextPrecisionScorer({
241
+ model: openai('gpt-4o-mini'),
242
+ options: {
243
+ context: [
244
+ 'Regular exercise improves cardiovascular health by strengthening the heart muscle.',
245
+ 'A balanced diet should include fruits, vegetables, and whole grains.',
246
+ 'Physical activity releases endorphins which improve mood and reduce stress.',
247
+ 'The average person should drink 8 glasses of water per day.',
248
+ 'Exercise also helps maintain healthy body weight and muscle mass.',
249
+ ],
250
+ scale: 1,
251
+ },
252
+ });
253
+
254
+ const result = await scorer.run({
255
+ input: {
256
+ inputMessages: [
257
+ {
258
+ id: '1',
259
+ role: 'user',
260
+ content: 'What are the mental and physical benefits of exercise?',
261
+ },
262
+ ],
263
+ },
264
+ output: [
265
+ {
266
+ id: '2',
267
+ role: 'assistant',
268
+ content: 'Exercise provides cardiovascular benefits, improves mood through endorphin release, and helps maintain healthy body composition.',
269
+ },
270
+ ],
271
+ });
272
+
273
+ console.log(result);
274
+ // Output:
275
+ // {
276
+ // score: 0.72,
277
+ // reason: "The score is 0.72 because contexts 1, 3, and 5 are relevant to exercise benefits, but irrelevant contexts about diet and hydration reduce the precision score."
278
+ // }
279
+ ```
280
+
281
+ ### Low precision example
282
+
283
+ This example shows poor context precision with mostly irrelevant context:
284
+
285
+ ```typescript
286
+ import { openai } from '@ai-sdk/openai';
287
+ import { createContextPrecisionScorer } from '@mastra/evals';
288
+
289
+ const scorer = createContextPrecisionScorer({
290
+ model: openai('gpt-4o-mini'),
291
+ options: {
292
+ context: [
293
+ 'The weather forecast shows sunny skies this weekend.',
294
+ 'Coffee is one of the world\'s most popular beverages.',
295
+ 'Machine learning requires large amounts of training data.',
296
+ 'Cats typically sleep 12-16 hours per day.',
297
+ 'The capital of France is Paris.',
298
+ ],
299
+ scale: 1,
300
+ },
301
+ });
302
+
303
+ const result = await scorer.run({
304
+ input: {
305
+ inputMessages: [
306
+ {
307
+ id: '1',
308
+ role: 'user',
309
+ content: 'How does photosynthesis work?',
310
+ },
311
+ ],
312
+ },
313
+ output: [
314
+ {
315
+ id: '2',
316
+ role: 'assistant',
317
+ content: 'Photosynthesis is the process by which plants convert sunlight into energy using chlorophyll.',
318
+ },
319
+ ],
320
+ });
321
+
322
+ console.log(result);
323
+ // Output:
324
+ // {
325
+ // score: 0.0,
326
+ // reason: "The score is 0.0 because none of the retrieved context pieces are relevant to explaining photosynthesis."
327
+ // }
328
+ ```
329
+
330
+ ## Comparison with Context Relevance
331
+
332
+ Choose the right scorer for your needs:
333
+
334
+ | Use Case | Context Relevance | Context Precision |
335
+ |----------|-------------------|-------------------|
336
+ | **RAG evaluation** | When usage matters | When ranking matters |
337
+ | **Context quality** | Nuanced levels | Binary relevance |
338
+ | **Missing detection** | ✓ Identifies gaps | ✗ Not evaluated |
339
+ | **Usage tracking** | ✓ Tracks utilization | ✗ Not considered |
340
+ | **Position sensitivity** | ✗ Position agnostic | ✓ Rewards early placement |
125
341
 
126
342
  ## Related
127
343