npm - @mastra/mcp-docs-server - Versions diffs - 0.13.37 → 0.13.38 - Mend

@mastra/mcp-docs-server 0.13.37 → 0.13.38

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (397) hide show

package/.docs/raw/reference/scorers/tool-call-accuracy.mdx CHANGED Viewed

@@ -1,5 +1,5 @@
 ---
-title: "Reference: Tool Call Accuracy | Scorers | Mastra Docs"
+title: "Reference: Tool Call Accuracy Scorers | Scorers | Mastra Docs"
 description: Documentation for the Tool Call Accuracy Scorers in Mastra, which evaluate whether LLM outputs call the correct tools from available options.
 ---
@@ -39,20 +39,23 @@ The `createToolCallAccuracyScorerCode()` function from `@mastra/evals/scorers/co
     {
       name: "expectedTool",
       type: "string",
-      description: "The name of the tool that should be called for the given task. Ignored when expectedToolOrder is provided.",
+      description:
+        "The name of the tool that should be called for the given task. Ignored when expectedToolOrder is provided.",
       required: false,
     },
     {
       name: "strictMode",
       type: "boolean",
-      description: "Controls evaluation strictness. For single tool mode: only exact single tool calls accepted. For order checking mode: tools must match exactly with no extra tools allowed.",
+      description:
+        "Controls evaluation strictness. For single tool mode: only exact single tool calls accepted. For order checking mode: tools must match exactly with no extra tools allowed.",
       required: false,
       default: "false",
     },
     {
       name: "expectedToolOrder",
       type: "string[]",
-      description: "Array of tool names in the expected calling order. When provided, enables order checking mode and ignores expectedTool parameter.",
+      description:
+        "Array of tool names in the expected calling order. When provided, enables order checking mode and ignores expectedTool parameter.",
       required: false,
     },
   ]}
@@ -88,22 +91,22 @@ When `expectedToolOrder` is provided, the scorer validates tool calling sequence
 ```typescript showLineNumbers copy
 // Standard mode - passes if expected tool is called
-const lenientScorer = createCodeScorer({
-  expectedTool: 'search-tool',
-  strictMode: false
+const lenientScorer = createCodeScorer({
+  expectedTool: "search-tool",
+  strictMode: false,
 });
 // Strict mode - only passes if exactly one tool is called
-const strictScorer = createCodeScorer({
-  expectedTool: 'search-tool',
-  strictMode: true
+const strictScorer = createCodeScorer({
+  expectedTool: "search-tool",
+  strictMode: true,
 });
 // Order checking with strict mode
 const strictOrderScorer = createCodeScorer({
-  expectedTool: 'step1-tool',
-  expectedToolOrder: ['step1-tool', 'step2-tool', 'step3-tool'],
-  strictMode: true // no extra tools allowed
+  expectedTool: "step1-tool",
+  expectedToolOrder: ["step1-tool", "step2-tool", "step3-tool"],
+  strictMode: true, // no extra tools allowed
 });
 ```
@@ -132,35 +135,35 @@ The code-based scorer provides deterministic, binary scoring (0 or 1) based on e
 ### Correct tool selection
-```typescript filename="src/example-correct-tool.ts" showLineNumbers copy
-const scorer = createToolCallAccuracyScorerCode({
-  expectedTool: 'weather-tool'
+```typescript title="src/example-correct-tool.ts" showLineNumbers copy
+const scorer = createToolCallAccuracyScorerCode({
+  expectedTool: "weather-tool",
 });
 // Simulate LLM input and output with tool call
 const inputMessages = [
-  createUIMessage({
-    content: 'What is the weather like in New York today?',
-    role: 'user',
-    id: 'input-1'
-  })
+  createUIMessage({
+    content: "What is the weather like in New York today?",
+    role: "user",
+    id: "input-1",
+  }),
 ];
 const output = [
   createUIMessage({
-    content: 'Let me check the weather for you.',
-    role: 'assistant',
-    id: 'output-1',
+    content: "Let me check the weather for you.",
+    role: "assistant",
+    id: "output-1",
     toolInvocations: [
       createToolInvocation({
-        toolCallId: 'call-123',
-        toolName: 'weather-tool',
-        args: { location: 'New York' },
-        result: { temperature: '72°F', condition: 'sunny' },
-        state: 'result'
-      })
-    ]
-  })
+        toolCallId: "call-123",
+        toolName: "weather-tool",
+        args: { location: "New York" },
+        result: { temperature: "72°F", condition: "sunny" },
+        state: "result",
+      }),
+    ],
+  }),
 ];
 const run = createAgentTestRun({ inputMessages, output });
@@ -174,35 +177,35 @@ console.log(result.preprocessStepResult?.correctToolCalled); // true
 Only passes if exactly one tool is called:
-```typescript filename="src/example-strict-mode.ts" showLineNumbers copy
-const strictScorer = createToolCallAccuracyScorerCode({
-  expectedTool: 'weather-tool',
-  strictMode: true
+```typescript title="src/example-strict-mode.ts" showLineNumbers copy
+const strictScorer = createToolCallAccuracyScorerCode({
+  expectedTool: "weather-tool",
+  strictMode: true,
 });
 // Multiple tools called - fails in strict mode
 const output = [
   createUIMessage({
-    content: 'Let me help you with that.',
-    role: 'assistant',
-    id: 'output-1',
+    content: "Let me help you with that.",
+    role: "assistant",
+    id: "output-1",
     toolInvocations: [
       createToolInvocation({
-        toolCallId: 'call-1',
-        toolName: 'search-tool',
+        toolCallId: "call-1",
+        toolName: "search-tool",
         args: {},
         result: {},
-        state: 'result',
+        state: "result",
       }),
       createToolInvocation({
-        toolCallId: 'call-2',
-        toolName: 'weather-tool',
-        args: { location: 'New York' },
-        result: { temperature: '20°C' },
-        state: 'result',
-      })
-    ]
-  })
+        toolCallId: "call-2",
+        toolName: "weather-tool",
+        args: { location: "New York" },
+        result: { temperature: "20°C" },
+        state: "result",
+      }),
+    ],
+  }),
 ];
 const result = await strictScorer.run(run);
@@ -213,35 +216,35 @@ console.log(result.score); // 0 - fails because multiple tools were called
 Validates that tools are called in a specific sequence:
-```typescript filename="src/example-order-validation.ts" showLineNumbers copy
+```typescript title="src/example-order-validation.ts" showLineNumbers copy
 const orderScorer = createToolCallAccuracyScorerCode({
-  expectedTool: 'auth-tool', // ignored when order is specified
-  expectedToolOrder: ['auth-tool', 'fetch-tool'],
-  strictMode: true // no extra tools allowed
+  expectedTool: "auth-tool", // ignored when order is specified
+  expectedToolOrder: ["auth-tool", "fetch-tool"],
+  strictMode: true, // no extra tools allowed
 });
 const output = [
   createUIMessage({
-    content: 'I will authenticate and fetch the data.',
-    role: 'assistant',
-    id: 'output-1',
+    content: "I will authenticate and fetch the data.",
+    role: "assistant",
+    id: "output-1",
     toolInvocations: [
       createToolInvocation({
-        toolCallId: 'call-1',
-        toolName: 'auth-tool',
-        args: { token: 'abc123' },
+        toolCallId: "call-1",
+        toolName: "auth-tool",
+        args: { token: "abc123" },
         result: { authenticated: true },
-        state: 'result'
+        state: "result",
       }),
       createToolInvocation({
-        toolCallId: 'call-2',
-        toolName: 'fetch-tool',
-        args: { endpoint: '/data' },
-        result: { data: ['item1'] },
-        state: 'result'
-      })
-    ]
-  })
+        toolCallId: "call-2",
+        toolName: "fetch-tool",
+        args: { endpoint: "/data" },
+        result: { data: ["item1"] },
+        state: "result",
+      }),
+    ],
+  }),
 ];
 const result = await orderScorer.run(run);
@@ -252,42 +255,42 @@ console.log(result.score); // 1 - correct order
 Allows extra tools as long as expected tools maintain relative order:
-```typescript filename="src/example-flexible-order.ts" showLineNumbers copy
+```typescript title="src/example-flexible-order.ts" showLineNumbers copy
 const flexibleOrderScorer = createToolCallAccuracyScorerCode({
-  expectedTool: 'auth-tool',
-  expectedToolOrder: ['auth-tool', 'fetch-tool'],
-  strictMode: false // allows extra tools
+  expectedTool: "auth-tool",
+  expectedToolOrder: ["auth-tool", "fetch-tool"],
+  strictMode: false, // allows extra tools
 });
 const output = [
   createUIMessage({
-    content: 'Performing comprehensive operation.',
-    role: 'assistant',
-    id: 'output-1',
+    content: "Performing comprehensive operation.",
+    role: "assistant",
+    id: "output-1",
     toolInvocations: [
       createToolInvocation({
-        toolCallId: 'call-1',
-        toolName: 'auth-tool',
-        args: { token: 'abc123' },
+        toolCallId: "call-1",
+        toolName: "auth-tool",
+        args: { token: "abc123" },
         result: { authenticated: true },
-        state: 'result'
+        state: "result",
       }),
       createToolInvocation({
-        toolCallId: 'call-2',
-        toolName: 'log-tool', // Extra tool - OK in flexible mode
-        args: { message: 'Starting fetch' },
+        toolCallId: "call-2",
+        toolName: "log-tool", // Extra tool - OK in flexible mode
+        args: { message: "Starting fetch" },
         result: { logged: true },
-        state: 'result'
+        state: "result",
       }),
       createToolInvocation({
-        toolCallId: 'call-3',
-        toolName: 'fetch-tool',
-        args: { endpoint: '/data' },
-        result: { data: ['item1'] },
-        state: 'result'
-      })
-    ]
-  })
+        toolCallId: "call-3",
+        toolName: "fetch-tool",
+        args: { endpoint: "/data" },
+        result: { data: ["item1"] },
+        state: "result",
+      }),
+    ],
+  }),
 ];
 const result = await flexibleOrderScorer.run(run);
@@ -311,7 +314,8 @@ The `createToolCallAccuracyScorerLLM()` function from `@mastra/evals/scorers/llm
     {
       name: "availableTools",
       type: "Array<{name: string, description: string}>",
-      description: "List of available tools with their descriptions for context",
+      description:
+        "List of available tools with their descriptions for context",
       required: true,
     },
   ]}
@@ -383,48 +387,48 @@ The LLM-based scorer uses AI to evaluate whether tool selections are appropriate
 ### Basic LLM evaluation
-```typescript filename="src/example-llm-basic.ts" showLineNumbers copy
+```typescript title="src/example-llm-basic.ts" showLineNumbers copy
 const llmScorer = createToolCallAccuracyScorerLLM({
-  model: 'openai/gpt-4o-mini',
+  model: "openai/gpt-4o-mini",
   availableTools: [
-    {
-      name: 'weather-tool',
-      description: 'Get current weather information for any location'
+    {
+      name: "weather-tool",
+      description: "Get current weather information for any location",
     },
-    {
-      name: 'calendar-tool',
-      description: 'Check calendar events and scheduling'
+    {
+      name: "calendar-tool",
+      description: "Check calendar events and scheduling",
     },
-    {
-      name: 'search-tool',
-      description: 'Search the web for general information'
-    }
-  ]
+    {
+      name: "search-tool",
+      description: "Search the web for general information",
+    },
+  ],
 });
 const inputMessages = [
-  createUIMessage({
-    content: 'What is the weather like in San Francisco today?',
-    role: 'user',
-    id: 'input-1'
-  })
+  createUIMessage({
+    content: "What is the weather like in San Francisco today?",
+    role: "user",
+    id: "input-1",
+  }),
 ];
 const output = [
   createUIMessage({
-    content: 'Let me check the current weather for you.',
-    role: 'assistant',
-    id: 'output-1',
+    content: "Let me check the current weather for you.",
+    role: "assistant",
+    id: "output-1",
     toolInvocations: [
       createToolInvocation({
-        toolCallId: 'call-123',
-        toolName: 'weather-tool',
-        args: { location: 'San Francisco', date: 'today' },
-        result: { temperature: '68°F', condition: 'foggy' },
-        state: 'result'
-      })
-    ]
-  })
+        toolCallId: "call-123",
+        toolName: "weather-tool",
+        args: { location: "San Francisco", date: "today" },
+        result: { temperature: "68°F", condition: "foggy" },
+        state: "result",
+      }),
+    ],
+  }),
 ];
 const run = createAgentTestRun({ inputMessages, output });
@@ -436,30 +440,30 @@ console.log(result.reason); // "The agent correctly used the weather-tool to add
 ### Handling inappropriate tool usage
-```typescript filename="src/example-llm-inappropriate.ts" showLineNumbers copy
+```typescript title="src/example-llm-inappropriate.ts" showLineNumbers copy
 const inputMessages = [
-  createUIMessage({
-    content: 'What is the weather in Tokyo?',
-    role: 'user',
-    id: 'input-1'
-  })
+  createUIMessage({
+    content: "What is the weather in Tokyo?",
+    role: "user",
+    id: "input-1",
+  }),
 ];
 const inappropriateOutput = [
   createUIMessage({
-    content: 'Let me search for that information.',
-    role: 'assistant',
-    id: 'output-1',
+    content: "Let me search for that information.",
+    role: "assistant",
+    id: "output-1",
     toolInvocations: [
       createToolInvocation({
-        toolCallId: 'call-456',
-        toolName: 'search-tool', // Less appropriate than weather-tool
-        args: { query: 'Tokyo weather' },
-        result: { results: ['Tokyo weather data...'] },
-        state: 'result'
-      })
-    ]
-  })
+        toolCallId: "call-456",
+        toolName: "search-tool", // Less appropriate than weather-tool
+        args: { query: "Tokyo weather" },
+        result: { results: ["Tokyo weather data..."] },
+        state: "result",
+      }),
+    ],
+  }),
 ];
 const run = createAgentTestRun({ inputMessages, output: inappropriateOutput });
@@ -473,12 +477,12 @@ console.log(result.reason); // "The agent used search-tool when weather-tool wou
 The LLM scorer recognizes when agents appropriately ask for clarification:
-```typescript filename="src/example-llm-clarification.ts" showLineNumbers copy
+```typescript title="src/example-llm-clarification.ts" showLineNumbers copy
 const vagueInput = [
-  createUIMessage({
-    content: 'I need help with something',
-    role: 'user',
-    id: 'input-1'
+  createUIMessage({
+    content: 'I need help with something',
+    role: 'user',
+    id: 'input-1'
   })
 ];
@@ -491,9 +495,9 @@ const clarificationOutput = [
   })
 ];
-const run = createAgentTestRun({
-  inputMessages: vagueInput,
-  output: clarificationOutput
+const run = createAgentTestRun({
+  inputMessages: vagueInput,
+  output: clarificationOutput
 });
 const result = await llmScorer.run(run);
@@ -505,58 +509,58 @@ console.log(result.reason); // "The agent appropriately asked for clarification
 Here's an example using both scorers on the same data:
-```typescript filename="src/example-comparison.ts" showLineNumbers copy
-import { createToolCallAccuracyScorerCode as createCodeScorer } from '@mastra/evals/scorers/code';
-import { createToolCallAccuracyScorerLLM as createLLMScorer } from '@mastra/evals/scorers/llm';
+```typescript title="src/example-comparison.ts" showLineNumbers copy
+import { createToolCallAccuracyScorerCode as createCodeScorer } from "@mastra/evals/scorers/code";
+import { createToolCallAccuracyScorerLLM as createLLMScorer } from "@mastra/evals/scorers/llm";
 // Setup both scorers
 const codeScorer = createCodeScorer({
-  expectedTool: 'weather-tool',
-  strictMode: false
+  expectedTool: "weather-tool",
+  strictMode: false,
 });
 const llmScorer = createLLMScorer({
-  model: 'openai/gpt-4o-mini',
+  model: "openai/gpt-4o-mini",
   availableTools: [
-    { name: 'weather-tool', description: 'Get weather information' },
-    { name: 'search-tool', description: 'Search the web' }
-  ]
+    { name: "weather-tool", description: "Get weather information" },
+    { name: "search-tool", description: "Search the web" },
+  ],
 });
 // Test data
 const run = createAgentTestRun({
   inputMessages: [
-    createUIMessage({
-      content: 'What is the weather?',
-      role: 'user',
-      id: 'input-1'
-    })
+    createUIMessage({
+      content: "What is the weather?",
+      role: "user",
+      id: "input-1",
+    }),
   ],
   output: [
     createUIMessage({
-      content: 'Let me find that information.',
-      role: 'assistant',
-      id: 'output-1',
+      content: "Let me find that information.",
+      role: "assistant",
+      id: "output-1",
       toolInvocations: [
         createToolInvocation({
-          toolCallId: 'call-1',
-          toolName: 'search-tool',
-          args: { query: 'weather' },
-          result: { results: ['weather data'] },
-          state: 'result'
-        })
-      ]
-    })
-  ]
+          toolCallId: "call-1",
+          toolName: "search-tool",
+          args: { query: "weather" },
+          result: { results: ["weather data"] },
+          state: "result",
+        }),
+      ],
+    }),
+  ],
 });
 // Run both scorers
 const codeResult = await codeScorer.run(run);
 const llmResult = await llmScorer.run(run);
-console.log('Code Scorer:', codeResult.score); // 0 - wrong tool
-console.log('LLM Scorer:', llmResult.score);   // 0.3 - partially appropriate
-console.log('LLM Reason:', llmResult.reason);   // Explains why search-tool is less appropriate
+console.log("Code Scorer:", codeResult.score); // 0 - wrong tool
+console.log("LLM Scorer:", llmResult.score); // 0.3 - partially appropriate
+console.log("LLM Reason:", llmResult.reason); // Explains why search-tool is less appropriate
 ```
 ## Related