@mastra/mcp-docs-server 0.13.14-alpha.0 → 0.13.14-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/.docs/organized/changelogs/%40mastra%2Fclient-js.md +18 -18
  2. package/.docs/organized/changelogs/%40mastra%2Fcloudflare-d1.md +10 -10
  3. package/.docs/organized/changelogs/%40mastra%2Fcloudflare.md +10 -10
  4. package/.docs/organized/changelogs/%40mastra%2Fcore.md +24 -24
  5. package/.docs/organized/changelogs/%40mastra%2Fdeployer-cloudflare.md +15 -15
  6. package/.docs/organized/changelogs/%40mastra%2Fdeployer-netlify.md +11 -11
  7. package/.docs/organized/changelogs/%40mastra%2Fdeployer.md +18 -18
  8. package/.docs/organized/changelogs/%40mastra%2Fevals.md +14 -14
  9. package/.docs/organized/changelogs/%40mastra%2Ffirecrawl.md +9 -9
  10. package/.docs/organized/changelogs/%40mastra%2Flance.md +10 -10
  11. package/.docs/organized/changelogs/%40mastra%2Flibsql.md +12 -12
  12. package/.docs/organized/changelogs/%40mastra%2Fmcp-docs-server.md +9 -9
  13. package/.docs/organized/changelogs/%40mastra%2Fmcp.md +13 -13
  14. package/.docs/organized/changelogs/%40mastra%2Fmemory.md +13 -13
  15. package/.docs/organized/changelogs/%40mastra%2Fmssql.md +9 -0
  16. package/.docs/organized/changelogs/%40mastra%2Fopensearch.md +11 -11
  17. package/.docs/organized/changelogs/%40mastra%2Fpg.md +11 -11
  18. package/.docs/organized/changelogs/%40mastra%2Fplayground-ui.md +15 -15
  19. package/.docs/organized/changelogs/%40mastra%2Fqdrant.md +10 -10
  20. package/.docs/organized/changelogs/%40mastra%2Frag.md +14 -14
  21. package/.docs/organized/changelogs/%40mastra%2Fschema-compat.md +8 -0
  22. package/.docs/organized/changelogs/%40mastra%2Fserver.md +14 -14
  23. package/.docs/organized/changelogs/%40mastra%2Fupstash.md +10 -10
  24. package/.docs/organized/changelogs/%40mastra%2Fvectorize.md +10 -10
  25. package/.docs/organized/changelogs/%40mastra%2Fvoice-cloudflare.md +10 -10
  26. package/.docs/organized/changelogs/%40mastra%2Fvoice-deepgram.md +10 -10
  27. package/.docs/organized/changelogs/%40mastra%2Fvoice-elevenlabs.md +10 -10
  28. package/.docs/organized/changelogs/%40mastra%2Fvoice-gladia.md +9 -0
  29. package/.docs/organized/changelogs/%40mastra%2Fvoice-google.md +10 -10
  30. package/.docs/organized/changelogs/%40mastra%2Fvoice-murf.md +11 -11
  31. package/.docs/organized/changelogs/%40mastra%2Fvoice-openai-realtime.md +10 -10
  32. package/.docs/organized/changelogs/%40mastra%2Fvoice-openai.md +11 -11
  33. package/.docs/organized/changelogs/%40mastra%2Fvoice-playai.md +10 -10
  34. package/.docs/organized/changelogs/%40mastra%2Fvoice-sarvam.md +10 -10
  35. package/.docs/organized/changelogs/%40mastra%2Fvoice-speechify.md +10 -10
  36. package/.docs/organized/changelogs/create-mastra.md +3 -3
  37. package/.docs/organized/changelogs/mastra.md +13 -13
  38. package/.docs/organized/code-examples/agent.md +2 -2
  39. package/.docs/raw/frameworks/agentic-uis/ai-sdk.mdx +1 -1
  40. package/.docs/raw/reference/client-js/mastra-client.mdx +6 -0
  41. package/.docs/raw/reference/scorers/create-scorer.mdx +1 -1
  42. package/.docs/raw/reference/scorers/tool-call-accuracy.mdx +211 -0
  43. package/.docs/raw/scorers/off-the-shelf-scorers.mdx +1 -0
  44. package/.docs/raw/tools-mcp/mcp-overview.mdx +1 -1
  45. package/.docs/raw/workflows/control-flow.mdx +2 -0
  46. package/package.json +5 -5
@@ -416,7 +416,7 @@ Mastra automatically handles AI SDK v4 data using its internal `MessageList` cla
416
416
  For cases where you need to manually convert messages between AI SDK and Mastra formats, use the `convertMessages` utility:
417
417
 
418
418
  ```typescript
419
- import { convertMessages } from '@mastra/core';
419
+ import { convertMessages } from '@mastra/core/agent';
420
420
 
421
421
  // Convert AI SDK v4 messages to v5
422
422
  const aiv5Messages = convertMessages(aiv4Messages).to('AIV5.UI');
@@ -54,6 +54,12 @@ export const mastraClient = new MastraClient({
54
54
  description: "An object containing custom HTTP headers to include with every request.",
55
55
  isOptional: true,
56
56
  },
57
+ {
58
+ name: "credentials",
59
+ type: '"omit" | "same-origin" | "include"',
60
+ description: "Credentials mode for requests. See https://developer.mozilla.org/en-US/docs/Web/API/Request/credentials for more info.",
61
+ isOptional: true,
62
+ },
57
63
  ]}
58
64
  />
59
65
 
@@ -79,7 +79,7 @@ This function returns a scorer builder that you can chain step methods onto. See
79
79
  For better type inference and IntelliSense support, you can specify input/output types when creating scorers:
80
80
 
81
81
  ```typescript
82
- import { createScorer, ScorerRunInputForAgent, ScorerRunOutputForAgent } from '@mastra/core';
82
+ import { createScorer, ScorerRunInputForAgent, ScorerRunOutputForAgent } from '@mastra/core/scorers';
83
83
 
84
84
  // For agent evaluation with full type safety
85
85
  const agentScorer = createScorer<ScorerRunInputForAgent, ScorerRunOutputForAgent>({
@@ -0,0 +1,211 @@
1
+ ---
2
+ title: "Reference: Tool Call Accuracy | Scorers | Mastra Docs"
3
+ description: Documentation for the Tool Call Accuracy Scorers in Mastra, which evaluate whether LLM outputs call the correct tools from available options.
4
+ ---
5
+
6
+ # Tool Call Accuracy Scorers
7
+
8
+ Mastra provides two tool call accuracy scorers for evaluating whether an LLM selects the correct tools from available options:
9
+
10
+ 1. **Code-based scorer** - Deterministic evaluation using exact tool matching
11
+ 2. **LLM-based scorer** - Semantic evaluation using AI to assess appropriateness
12
+
13
+ For usage examples, see the [Tool Call Accuracy Examples](/examples/scorers/tool-call-accuracy).
14
+
15
+ ## Code-Based Tool Call Accuracy Scorer
16
+
17
+ The `createToolCallAccuracyScorerCode()` function from `@mastra/evals/scorers/code` provides deterministic binary scoring based on exact tool matching and supports both strict and lenient evaluation modes, as well as tool calling order validation.
18
+
19
+ ### Parameters
20
+
21
+ <PropertiesTable
22
+ content={[
23
+ {
24
+ name: "expectedTool",
25
+ type: "string",
26
+ description: "The name of the tool that should be called for the given task. Ignored when expectedToolOrder is provided.",
27
+ required: false,
28
+ },
29
+ {
30
+ name: "strictMode",
31
+ type: "boolean",
32
+ description: "Controls evaluation strictness. For single tool mode: only exact single tool calls accepted. For order checking mode: tools must match exactly with no extra tools allowed.",
33
+ required: false,
34
+ default: "false",
35
+ },
36
+ {
37
+ name: "expectedToolOrder",
38
+ type: "string[]",
39
+ description: "Array of tool names in the expected calling order. When provided, enables order checking mode and ignores expectedTool parameter.",
40
+ required: false,
41
+ },
42
+ ]}
43
+ />
44
+
45
+ This function returns an instance of the MastraScorer class. See the [MastraScorer reference](./mastra-scorer) for details on the `.run()` method and its input/output.
46
+
47
+ ### Evaluation Modes
48
+
49
+ The code-based scorer operates in two distinct modes:
50
+
51
+ #### Single Tool Mode
52
+
53
+ When `expectedToolOrder` is not provided, the scorer evaluates single tool selection:
54
+
55
+ - **Standard Mode (strictMode: false)**: Returns `1` if the expected tool is called, regardless of other tools
56
+ - **Strict Mode (strictMode: true)**: Returns `1` only if exactly one tool is called and it matches the expected tool
57
+
58
+ #### Order Checking Mode
59
+
60
+ When `expectedToolOrder` is provided, the scorer validates tool calling sequence:
61
+
62
+ - **Strict Order (strictMode: true)**: Tools must be called in exactly the specified order with no extra tools
63
+ - **Flexible Order (strictMode: false)**: Expected tools must appear in correct relative order (extra tools allowed)
64
+
65
+ ### Examples
66
+
67
+ ```typescript
68
+ import { createToolCallAccuracyScorerCode } from '@mastra/evals/scorers/code';
69
+
70
+ // Single tool validation
71
+ const scorer = createToolCallAccuracyScorerCode({
72
+ expectedTool: 'weather-tool'
73
+ });
74
+
75
+ // Strict single tool (no other tools allowed)
76
+ const strictScorer = createToolCallAccuracyScorerCode({
77
+ expectedTool: 'calculator-tool',
78
+ strictMode: true
79
+ });
80
+
81
+ // Tool order validation
82
+ const orderScorer = createToolCallAccuracyScorerCode({
83
+ expectedTool: 'search-tool', // ignored when order is specified
84
+ expectedToolOrder: ['search-tool', 'weather-tool'],
85
+ strictMode: true // exact match required
86
+ });
87
+ ```
88
+
89
+ ## LLM-Based Tool Call Accuracy Scorer
90
+
91
+ The `createToolCallAccuracyScorerLLM()` function from `@mastra/evals/scorers/llm` uses an LLM to evaluate whether the tools called by an agent are appropriate for the given user request, providing semantic evaluation rather than exact matching.
92
+
93
+ ### Parameters
94
+
95
+ <PropertiesTable
96
+ content={[
97
+ {
98
+ name: "model",
99
+ type: "MastraLanguageModel",
100
+ description: "The LLM model to use for evaluating tool appropriateness",
101
+ required: true,
102
+ },
103
+ {
104
+ name: "availableTools",
105
+ type: "Array<{name: string, description: string}>",
106
+ description: "List of available tools with their descriptions for context",
107
+ required: true,
108
+ },
109
+ ]}
110
+ />
111
+
112
+ ### Features
113
+
114
+ The LLM-based scorer provides:
115
+
116
+ - **Semantic Evaluation**: Understands context and user intent
117
+ - **Appropriateness Assessment**: Distinguishes between "helpful" and "appropriate" tools
118
+ - **Clarification Handling**: Recognizes when agents appropriately ask for clarification
119
+ - **Missing Tool Detection**: Identifies tools that should have been called
120
+ - **Reasoning Generation**: Provides explanations for scoring decisions
121
+
122
+ ### Evaluation Process
123
+
124
+ 1. **Extract Tool Calls**: Identifies tools mentioned in agent output
125
+ 2. **Analyze Appropriateness**: Evaluates each tool against user request
126
+ 3. **Generate Score**: Calculates score based on appropriate vs total tool calls
127
+ 4. **Generate Reasoning**: Provides human-readable explanation
128
+
129
+ ### Examples
130
+
131
+ ```typescript
132
+ import { createToolCallAccuracyScorerLLM } from '@mastra/evals/scorers/llm';
133
+ import { openai } from '@ai-sdk/openai';
134
+
135
+ const llmScorer = createToolCallAccuracyScorerLLM({
136
+ model: openai('gpt-4o-mini'),
137
+ availableTools: [
138
+ {
139
+ name: 'weather-tool',
140
+ description: 'Get current weather information for any location'
141
+ },
142
+ {
143
+ name: 'search-tool',
144
+ description: 'Search the web for information'
145
+ },
146
+ {
147
+ name: 'calendar-tool',
148
+ description: 'Check calendar events and scheduling'
149
+ }
150
+ ]
151
+ });
152
+
153
+ const result = await llmScorer.run(agentRun);
154
+ console.log(result.score); // 0.0 to 1.0
155
+ console.log(result.reason); // Explanation of the score
156
+ ```
157
+
158
+ ## Choosing Between Scorers
159
+
160
+ ### Use the Code-Based Scorer When:
161
+
162
+ - You need **deterministic, reproducible** results
163
+ - You want to test **exact tool matching**
164
+ - You need to validate **specific tool sequences**
165
+ - Speed and cost are priorities (no LLM calls)
166
+ - You're running automated tests
167
+
168
+ ### Use the LLM-Based Scorer When:
169
+
170
+ - You need **semantic understanding** of appropriateness
171
+ - Tool selection depends on **context and intent**
172
+ - You want to handle **edge cases** like clarification requests
173
+ - You need **explanations** for scoring decisions
174
+ - You're evaluating **production agent behavior**
175
+
176
+ ## Scoring Details
177
+
178
+ ### Code-Based Scoring
179
+
180
+ - **Binary scores**: Always returns 0 or 1
181
+ - **Deterministic**: Same input always produces same output
182
+ - **Fast**: No external API calls
183
+
184
+ ### LLM-Based Scoring
185
+
186
+ - **Fractional scores**: Returns values between 0.0 and 1.0
187
+ - **Context-aware**: Considers user intent and appropriateness
188
+ - **Explanatory**: Provides reasoning for scores
189
+
190
+ ## Use Cases
191
+
192
+ ### Code-Based Scorer Use Cases
193
+
194
+ - **Unit Testing**: Verify specific tool selection behavior
195
+ - **Regression Testing**: Ensure tool selection doesn't change
196
+ - **Workflow Validation**: Check tool sequences in multi-step processes
197
+ - **CI/CD Pipelines**: Fast, deterministic validation
198
+
199
+ ### LLM-Based Scorer Use Cases
200
+
201
+ - **Quality Assurance**: Evaluate production agent behavior
202
+ - **A/B Testing**: Compare different agent implementations
203
+ - **User Intent Alignment**: Ensure tools match user needs
204
+ - **Edge Case Handling**: Evaluate clarification and error scenarios
205
+
206
+ ## Related
207
+
208
+ - [Answer Relevancy Scorer](./answer-relevancy)
209
+ - [Completeness Scorer](./completeness)
210
+ - [Faithfulness Scorer](./faithfulness)
211
+ - [Custom Scorers](/docs/scorers/custom-scorers)
@@ -19,6 +19,7 @@ These scorers evaluate how correct, truthful, and complete your agent's answers
19
19
  - [`completeness`](/reference/scorers/completeness): Checks if responses include all necessary information (`0-1`, higher is better)
20
20
  - [`content-similarity`](/reference/scorers/content-similarity): Measures textual similarity using character-level matching (`0-1`, higher is better)
21
21
  - [`textual-difference`](/reference/scorers/textual-difference): Measures textual differences between strings (`0-1`, higher means more similar)
22
+ - [`tool-call-accuracy`](/reference/scorers/tool-call-accuracy): Evaluates whether the LLM selects the correct tool from available options (`0-1`, higher is better)
22
23
 
23
24
  ### Output Quality
24
25
 
@@ -341,7 +341,7 @@ When a tool includes an `outputSchema`, its `execute` function **must** return a
341
341
  Here's an example of a tool with an `outputSchema`:
342
342
 
343
343
  ```typescript filename="src/tools/structured-tool.ts"
344
- import { createTool } from '@mastra/core';
344
+ import { createTool } from '@mastra/core/tools';
345
345
  import { z } from 'zod';
346
346
 
347
347
  export const structuredTool = createTool({
@@ -55,6 +55,8 @@ This executes `step1` and `step2` concurrently, then continues to `step3` after
55
55
 
56
56
  > See [Parallel Execution with Steps](/examples/workflows/parallel-steps) for more information.
57
57
 
58
+ > 📹 Watch: How to run steps in parallel and optimize your Mastra workflow → [YouTube (3 minutes)](https://youtu.be/pTSOSWbreE0)
59
+
58
60
  ## Conditional logic with `.branch()`
59
61
 
60
62
  Execute steps conditionally using `.branch()`:
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mastra/mcp-docs-server",
3
- "version": "0.13.14-alpha.0",
3
+ "version": "0.13.14-alpha.1",
4
4
  "description": "MCP server for accessing Mastra.ai documentation, changelogs, and news.",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -32,8 +32,8 @@
32
32
  "uuid": "^11.1.0",
33
33
  "zod": "^3.25.67",
34
34
  "zod-to-json-schema": "^3.24.5",
35
- "@mastra/mcp": "^0.10.13-alpha.0",
36
- "@mastra/core": "0.14.2-alpha.0"
35
+ "@mastra/core": "0.14.2-alpha.1",
36
+ "@mastra/mcp": "^0.10.13-alpha.1"
37
37
  },
38
38
  "devDependencies": {
39
39
  "@hono/node-server": "^1.17.1",
@@ -48,8 +48,8 @@
48
48
  "tsx": "^4.19.4",
49
49
  "typescript": "^5.8.3",
50
50
  "vitest": "^3.2.4",
51
- "@internal/lint": "0.0.31",
52
- "@mastra/core": "0.14.2-alpha.0"
51
+ "@mastra/core": "0.14.2-alpha.1",
52
+ "@internal/lint": "0.0.31"
53
53
  },
54
54
  "scripts": {
55
55
  "prepare-docs": "cross-env PREPARE=true node dist/prepare-docs/prepare.js",