@mastra/mcp-docs-server 0.13.10 → 0.13.11-alpha.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.docs/organized/changelogs/%40internal%2Fstorage-test-utils.md +9 -9
- package/.docs/organized/changelogs/%40internal%2Ftypes-builder.md +2 -0
- package/.docs/organized/changelogs/%40mastra%2Fclient-js.md +26 -26
- package/.docs/organized/changelogs/%40mastra%2Fcore.md +30 -30
- package/.docs/organized/changelogs/%40mastra%2Fdeployer-cloudflare.md +20 -20
- package/.docs/organized/changelogs/%40mastra%2Fdeployer-netlify.md +20 -20
- package/.docs/organized/changelogs/%40mastra%2Fdeployer-vercel.md +20 -20
- package/.docs/organized/changelogs/%40mastra%2Fdeployer.md +30 -30
- package/.docs/organized/changelogs/%40mastra%2Ffirecrawl.md +13 -13
- package/.docs/organized/changelogs/%40mastra%2Flibsql.md +9 -9
- package/.docs/organized/changelogs/%40mastra%2Fmcp-docs-server.md +26 -26
- package/.docs/organized/changelogs/%40mastra%2Fmemory.md +21 -21
- package/.docs/organized/changelogs/%40mastra%2Fpg.md +9 -9
- package/.docs/organized/changelogs/%40mastra%2Fplayground-ui.md +21 -21
- package/.docs/organized/changelogs/%40mastra%2Frag.md +12 -12
- package/.docs/organized/changelogs/%40mastra%2Fschema-compat.md +7 -0
- package/.docs/organized/changelogs/%40mastra%2Fserver.md +26 -26
- package/.docs/organized/changelogs/create-mastra.md +11 -11
- package/.docs/organized/changelogs/mastra.md +31 -31
- package/.docs/organized/code-examples/agent-network.md +4 -3
- package/.docs/organized/code-examples/agent.md +33 -2
- package/.docs/raw/agents/overview.mdx +21 -1
- package/.docs/raw/getting-started/mcp-docs-server.mdx +2 -2
- package/.docs/raw/rag/chunking-and-embedding.mdx +11 -0
- package/.docs/raw/reference/agents/agent.mdx +64 -38
- package/.docs/raw/reference/agents/generate.mdx +206 -202
- package/.docs/raw/reference/agents/getAgent.mdx +23 -38
- package/.docs/raw/reference/agents/getDefaultGenerateOptions.mdx +62 -0
- package/.docs/raw/reference/agents/getDefaultStreamOptions.mdx +62 -0
- package/.docs/raw/reference/agents/getDefaultVNextStreamOptions.mdx +62 -0
- package/.docs/raw/reference/agents/getDescription.mdx +30 -0
- package/.docs/raw/reference/agents/getInstructions.mdx +36 -73
- package/.docs/raw/reference/agents/getLLM.mdx +69 -0
- package/.docs/raw/reference/agents/getMemory.mdx +42 -119
- package/.docs/raw/reference/agents/getModel.mdx +36 -75
- package/.docs/raw/reference/agents/getScorers.mdx +62 -0
- package/.docs/raw/reference/agents/getTools.mdx +36 -128
- package/.docs/raw/reference/agents/getVoice.mdx +36 -83
- package/.docs/raw/reference/agents/getWorkflows.mdx +37 -74
- package/.docs/raw/reference/agents/stream.mdx +263 -226
- package/.docs/raw/reference/agents/streamVNext.mdx +208 -402
- package/.docs/raw/reference/cli/build.mdx +1 -0
- package/.docs/raw/reference/rag/chunk.mdx +51 -2
- package/.docs/raw/reference/scorers/answer-relevancy.mdx +6 -6
- package/.docs/raw/reference/scorers/bias.mdx +6 -6
- package/.docs/raw/reference/scorers/completeness.mdx +2 -2
- package/.docs/raw/reference/scorers/content-similarity.mdx +1 -1
- package/.docs/raw/reference/scorers/create-scorer.mdx +445 -0
- package/.docs/raw/reference/scorers/faithfulness.mdx +6 -6
- package/.docs/raw/reference/scorers/hallucination.mdx +6 -6
- package/.docs/raw/reference/scorers/keyword-coverage.mdx +2 -2
- package/.docs/raw/reference/scorers/mastra-scorer.mdx +116 -158
- package/.docs/raw/reference/scorers/toxicity.mdx +2 -2
- package/.docs/raw/scorers/custom-scorers.mdx +166 -268
- package/.docs/raw/scorers/overview.mdx +21 -13
- package/.docs/raw/server-db/local-dev-playground.mdx +3 -3
- package/package.json +5 -5
- package/.docs/raw/reference/agents/createTool.mdx +0 -241
- package/.docs/raw/reference/scorers/custom-code-scorer.mdx +0 -155
- package/.docs/raw/reference/scorers/llm-scorer.mdx +0 -210
|
@@ -1,241 +0,0 @@
|
|
|
1
|
-
---
|
|
2
|
-
title: "Reference: createTool() | Tools | Agents | Mastra Docs"
|
|
3
|
-
description: Documentation for the createTool function in Mastra, which creates custom tools for agents and workflows.
|
|
4
|
-
---
|
|
5
|
-
|
|
6
|
-
# `createTool()`
|
|
7
|
-
|
|
8
|
-
The `createTool()` function creates typed tools that can be executed by agents or workflows. Tools have built-in schema validation, execution context, and integration with the Mastra ecosystem.
|
|
9
|
-
|
|
10
|
-
## Overview
|
|
11
|
-
|
|
12
|
-
Tools are a fundamental building block in Mastra that allow agents to interact with external systems, perform computations, and access data. Each tool has:
|
|
13
|
-
|
|
14
|
-
- A unique identifier
|
|
15
|
-
- A description that helps the AI understand when and how to use the tool
|
|
16
|
-
- Optional input and output schemas for validation
|
|
17
|
-
- An execution function that implements the tool's logic
|
|
18
|
-
|
|
19
|
-
## Example Usage
|
|
20
|
-
|
|
21
|
-
```ts filename="src/tools/stock-tools.ts" showLineNumbers copy
|
|
22
|
-
import { createTool } from "@mastra/core/tools";
|
|
23
|
-
import { z } from "zod";
|
|
24
|
-
|
|
25
|
-
// Helper function to fetch stock data
|
|
26
|
-
const getStockPrice = async (symbol: string) => {
|
|
27
|
-
const response = await fetch(
|
|
28
|
-
`https://mastra-stock-data.vercel.app/api/stock-data?symbol=${symbol}`,
|
|
29
|
-
);
|
|
30
|
-
const data = await response.json();
|
|
31
|
-
return data.prices["4. close"];
|
|
32
|
-
};
|
|
33
|
-
|
|
34
|
-
// Create a tool to get stock prices
|
|
35
|
-
export const stockPriceTool = createTool({
|
|
36
|
-
id: "getStockPrice",
|
|
37
|
-
description: "Fetches the current stock price for a given ticker symbol",
|
|
38
|
-
inputSchema: z.object({
|
|
39
|
-
symbol: z.string().describe("The stock ticker symbol (e.g., AAPL, MSFT)"),
|
|
40
|
-
}),
|
|
41
|
-
outputSchema: z.object({
|
|
42
|
-
symbol: z.string(),
|
|
43
|
-
price: z.number(),
|
|
44
|
-
currency: z.string(),
|
|
45
|
-
timestamp: z.string(),
|
|
46
|
-
}),
|
|
47
|
-
execute: async ({ context }) => {
|
|
48
|
-
const price = await getStockPrice(context.symbol);
|
|
49
|
-
|
|
50
|
-
return {
|
|
51
|
-
symbol: context.symbol,
|
|
52
|
-
price: parseFloat(price),
|
|
53
|
-
currency: "USD",
|
|
54
|
-
timestamp: new Date().toISOString(),
|
|
55
|
-
};
|
|
56
|
-
},
|
|
57
|
-
});
|
|
58
|
-
|
|
59
|
-
// Create a tool that uses the thread context
|
|
60
|
-
export const threadInfoTool = createTool({
|
|
61
|
-
id: "getThreadInfo",
|
|
62
|
-
description: "Returns information about the current conversation thread",
|
|
63
|
-
inputSchema: z.object({
|
|
64
|
-
includeResource: z.boolean().optional().default(false),
|
|
65
|
-
}),
|
|
66
|
-
execute: async ({ context, threadId, resourceId }) => {
|
|
67
|
-
return {
|
|
68
|
-
threadId,
|
|
69
|
-
resourceId: context.includeResource ? resourceId : undefined,
|
|
70
|
-
timestamp: new Date().toISOString(),
|
|
71
|
-
};
|
|
72
|
-
},
|
|
73
|
-
});
|
|
74
|
-
```
|
|
75
|
-
|
|
76
|
-
## API Reference
|
|
77
|
-
|
|
78
|
-
### Parameters
|
|
79
|
-
|
|
80
|
-
`createTool()` accepts a single object with the following properties:
|
|
81
|
-
|
|
82
|
-
<PropertiesTable
|
|
83
|
-
content={[
|
|
84
|
-
{
|
|
85
|
-
name: "id",
|
|
86
|
-
type: "string",
|
|
87
|
-
required: true,
|
|
88
|
-
description:
|
|
89
|
-
"Unique identifier for the tool. This should be descriptive of the tool's function.",
|
|
90
|
-
},
|
|
91
|
-
{
|
|
92
|
-
name: "description",
|
|
93
|
-
type: "string",
|
|
94
|
-
required: true,
|
|
95
|
-
description:
|
|
96
|
-
"Detailed description of what the tool does, when it should be used, and what inputs it requires. This helps the AI understand how to use the tool effectively.",
|
|
97
|
-
},
|
|
98
|
-
{
|
|
99
|
-
name: "execute",
|
|
100
|
-
type: "(context: ToolExecutionContext, options?: any) => Promise<any>",
|
|
101
|
-
required: false,
|
|
102
|
-
description:
|
|
103
|
-
"Async function that implements the tool's logic. Receives the execution context and optional configuration.",
|
|
104
|
-
properties: [
|
|
105
|
-
{
|
|
106
|
-
type: "ToolExecutionContext",
|
|
107
|
-
parameters: [
|
|
108
|
-
{
|
|
109
|
-
name: "context",
|
|
110
|
-
type: "object",
|
|
111
|
-
description:
|
|
112
|
-
"The validated input data that matches the inputSchema",
|
|
113
|
-
},
|
|
114
|
-
{
|
|
115
|
-
name: "threadId",
|
|
116
|
-
type: "string",
|
|
117
|
-
isOptional: true,
|
|
118
|
-
description:
|
|
119
|
-
"Identifier for the conversation thread, if available",
|
|
120
|
-
},
|
|
121
|
-
{
|
|
122
|
-
name: "resourceId",
|
|
123
|
-
type: "string",
|
|
124
|
-
isOptional: true,
|
|
125
|
-
description:
|
|
126
|
-
"Identifier for the user or resource interacting with the tool",
|
|
127
|
-
},
|
|
128
|
-
{
|
|
129
|
-
name: "mastra",
|
|
130
|
-
type: "Mastra",
|
|
131
|
-
isOptional: true,
|
|
132
|
-
description: "Reference to the Mastra instance, if available",
|
|
133
|
-
},
|
|
134
|
-
],
|
|
135
|
-
},
|
|
136
|
-
{
|
|
137
|
-
type: "ToolOptions",
|
|
138
|
-
parameters: [
|
|
139
|
-
{
|
|
140
|
-
name: "toolCallId",
|
|
141
|
-
type: "string",
|
|
142
|
-
description:
|
|
143
|
-
"The ID of the tool call. You can use it e.g. when sending tool-call related information with stream data.",
|
|
144
|
-
},
|
|
145
|
-
{
|
|
146
|
-
name: "messages",
|
|
147
|
-
type: "CoreMessage[]",
|
|
148
|
-
description:
|
|
149
|
-
"Messages that were sent to the language model to initiate the response that contained the tool call. The messages do not include the system prompt nor the assistant response that contained the tool call.",
|
|
150
|
-
},
|
|
151
|
-
{
|
|
152
|
-
name: "abortSignal",
|
|
153
|
-
type: "AbortSignal",
|
|
154
|
-
isOptional: true,
|
|
155
|
-
description:
|
|
156
|
-
"An optional abort signal that indicates that the overall operation should be aborted.",
|
|
157
|
-
},
|
|
158
|
-
],
|
|
159
|
-
},
|
|
160
|
-
],
|
|
161
|
-
},
|
|
162
|
-
{
|
|
163
|
-
name: "inputSchema",
|
|
164
|
-
type: "ZodSchema",
|
|
165
|
-
required: false,
|
|
166
|
-
description:
|
|
167
|
-
"Zod schema that defines and validates the tool's input parameters. If not provided, the tool will accept any input.",
|
|
168
|
-
},
|
|
169
|
-
{
|
|
170
|
-
name: "outputSchema",
|
|
171
|
-
type: "ZodSchema",
|
|
172
|
-
required: false,
|
|
173
|
-
description:
|
|
174
|
-
"Zod schema that defines and validates the tool's output. Helps ensure the tool returns data in the expected format.",
|
|
175
|
-
},
|
|
176
|
-
]}
|
|
177
|
-
/>
|
|
178
|
-
|
|
179
|
-
### Returns
|
|
180
|
-
|
|
181
|
-
<PropertiesTable
|
|
182
|
-
content={[
|
|
183
|
-
{
|
|
184
|
-
name: "Tool",
|
|
185
|
-
type: "Tool<TSchemaIn, TSchemaOut>",
|
|
186
|
-
description:
|
|
187
|
-
"A Tool instance that can be used with agents, workflows, or directly executed.",
|
|
188
|
-
properties: [
|
|
189
|
-
{
|
|
190
|
-
type: "Tool",
|
|
191
|
-
parameters: [
|
|
192
|
-
{
|
|
193
|
-
name: "id",
|
|
194
|
-
type: "string",
|
|
195
|
-
description: "The tool's unique identifier",
|
|
196
|
-
},
|
|
197
|
-
{
|
|
198
|
-
name: "description",
|
|
199
|
-
type: "string",
|
|
200
|
-
description: "Description of the tool's functionality",
|
|
201
|
-
},
|
|
202
|
-
{
|
|
203
|
-
name: "inputSchema",
|
|
204
|
-
type: "ZodSchema | undefined",
|
|
205
|
-
description: "Schema for validating inputs",
|
|
206
|
-
},
|
|
207
|
-
{
|
|
208
|
-
name: "outputSchema",
|
|
209
|
-
type: "ZodSchema | undefined",
|
|
210
|
-
description: "Schema for validating outputs",
|
|
211
|
-
},
|
|
212
|
-
{
|
|
213
|
-
name: "execute",
|
|
214
|
-
type: "Function",
|
|
215
|
-
description: "The tool's execution function",
|
|
216
|
-
},
|
|
217
|
-
],
|
|
218
|
-
},
|
|
219
|
-
],
|
|
220
|
-
},
|
|
221
|
-
]}
|
|
222
|
-
/>
|
|
223
|
-
|
|
224
|
-
## Type Safety
|
|
225
|
-
|
|
226
|
-
The `createTool()` function provides full type safety through TypeScript generics:
|
|
227
|
-
|
|
228
|
-
- Input types are inferred from the `inputSchema`
|
|
229
|
-
- Output types are inferred from the `outputSchema`
|
|
230
|
-
- The execution context is properly typed based on the input schema
|
|
231
|
-
|
|
232
|
-
This ensures that your tools are type-safe throughout your application.
|
|
233
|
-
|
|
234
|
-
## Best Practices
|
|
235
|
-
|
|
236
|
-
1. **Descriptive IDs**: Use clear, action-oriented IDs like `getWeatherForecast` or `searchDatabase`
|
|
237
|
-
2. **Detailed Descriptions**: Provide comprehensive descriptions that explain when and how to use the tool
|
|
238
|
-
3. **Input Validation**: Use Zod schemas to validate inputs and provide helpful error messages
|
|
239
|
-
4. **Error Handling**: Implement proper error handling in your execute function
|
|
240
|
-
5. **Idempotency**: When possible, make your tools idempotent (same input always produces same output)
|
|
241
|
-
6. **Performance**: Keep tools lightweight and fast to execute
|
|
@@ -1,155 +0,0 @@
|
|
|
1
|
-
---
|
|
2
|
-
title: "Reference: Create Custom Scorer | Scorers | Mastra Docs"
|
|
3
|
-
description: Documentation for creating custom code scorers in Mastra, allowing users to define their own evaluation logic.
|
|
4
|
-
---
|
|
5
|
-
|
|
6
|
-
# createScorer
|
|
7
|
-
|
|
8
|
-
Mastra allows you to define your own custom code scorers for evaluating input/output pairs using any logic you choose. Custom scorers integrate seamlessly with the Mastra scoring framework and can be used anywhere built-in scorers are used.
|
|
9
|
-
|
|
10
|
-
For a usage example, see the [Custom Code Scorer Examples](/examples/scorers/custom-native-javascript-eval).
|
|
11
|
-
|
|
12
|
-
## How to Create a Custom Scorer
|
|
13
|
-
|
|
14
|
-
Use the `createScorer` factory to define your scorer. You must provide at least a `name`, `description`, and an `analyze` function. Optionally, you can provide `extract` and `reason` functions for multi-step or more advanced logic.
|
|
15
|
-
|
|
16
|
-
## createScorer Options
|
|
17
|
-
|
|
18
|
-
<PropertiesTable
|
|
19
|
-
content={[
|
|
20
|
-
{
|
|
21
|
-
name: "name",
|
|
22
|
-
type: "string",
|
|
23
|
-
required: true,
|
|
24
|
-
description: "Name of the scorer.",
|
|
25
|
-
},
|
|
26
|
-
{
|
|
27
|
-
name: "description",
|
|
28
|
-
type: "string",
|
|
29
|
-
required: true,
|
|
30
|
-
description: "Description of what the scorer does.",
|
|
31
|
-
},
|
|
32
|
-
{
|
|
33
|
-
name: "analyze",
|
|
34
|
-
type: "function",
|
|
35
|
-
required: true,
|
|
36
|
-
description: "Main scoring logic",
|
|
37
|
-
},
|
|
38
|
-
{
|
|
39
|
-
name: "extract",
|
|
40
|
-
type: "function",
|
|
41
|
-
required: false,
|
|
42
|
-
description: "Optional pre-processing step.",
|
|
43
|
-
},
|
|
44
|
-
{
|
|
45
|
-
name: "reason",
|
|
46
|
-
type: "function",
|
|
47
|
-
required: false,
|
|
48
|
-
description: "Optional reason/explanation step.",
|
|
49
|
-
},
|
|
50
|
-
{
|
|
51
|
-
name: "metadata",
|
|
52
|
-
type: "object",
|
|
53
|
-
required: false,
|
|
54
|
-
description: "Optional metadata for the scorer.",
|
|
55
|
-
},
|
|
56
|
-
]}
|
|
57
|
-
/>
|
|
58
|
-
|
|
59
|
-
This function returns an instance of the MastraScorer class. See the [MastraScorer reference](./mastra-scorer) for details on the `.run()` method and its input/output.
|
|
60
|
-
|
|
61
|
-
## Step Function Signatures
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
### extract
|
|
65
|
-
<PropertiesTable
|
|
66
|
-
content={[
|
|
67
|
-
{
|
|
68
|
-
name: "input",
|
|
69
|
-
type: "Record<string, any>[]",
|
|
70
|
-
required: false,
|
|
71
|
-
description:
|
|
72
|
-
"Input records provided to the scorer. If the scorer is added to an agent, this will be an array of user messages, e.g. `[{ role: 'user', content: 'hello world' }]`. If the scorer is used in a workflow, this will be the input of the workflow.",
|
|
73
|
-
},
|
|
74
|
-
{
|
|
75
|
-
name: "output",
|
|
76
|
-
type: "Record<string, any>",
|
|
77
|
-
required: true,
|
|
78
|
-
description:
|
|
79
|
-
"Output record provided to the scorer. For agents, this is usually the agent's response. For workflows, this is the workflow's output.",
|
|
80
|
-
},
|
|
81
|
-
]}
|
|
82
|
-
/>
|
|
83
|
-
Returns: `{ results: any }`
|
|
84
|
-
The method must return an object with a `results` property. The value of `results` will be passed to the analyze function as `extractStepResult`.
|
|
85
|
-
|
|
86
|
-
### analyze
|
|
87
|
-
<PropertiesTable
|
|
88
|
-
content={[
|
|
89
|
-
{
|
|
90
|
-
name: "input",
|
|
91
|
-
type: "Record<string, any>[]",
|
|
92
|
-
required: true,
|
|
93
|
-
description:
|
|
94
|
-
"Input records provided to the scorer. If the scorer is added to an agent, this will be an array of user messages, e.g. `[{ role: 'user', content: 'hello world' }]`. If the scorer is used in a workflow, this will be the input of the workflow.",
|
|
95
|
-
},
|
|
96
|
-
{
|
|
97
|
-
name: "output",
|
|
98
|
-
type: "Record<string, any>",
|
|
99
|
-
required: true,
|
|
100
|
-
description:
|
|
101
|
-
"Output record provided to the scorer. For agents, this is usually the agent's response. For workflows, this is the workflow's output.",
|
|
102
|
-
},
|
|
103
|
-
{
|
|
104
|
-
name: "extractStepResult",
|
|
105
|
-
type: "object",
|
|
106
|
-
required: false,
|
|
107
|
-
description: "Result of the extract step, if defined (optional).",
|
|
108
|
-
},
|
|
109
|
-
]}
|
|
110
|
-
/>
|
|
111
|
-
Returns: `{ score: number, results?: any }`
|
|
112
|
-
The method must return an object with a `score` property (required). Optionally, it may return a `results` property. The value of `results` will be passed to the reason function as `analyzeStepResult`.
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
### reason
|
|
116
|
-
<PropertiesTable
|
|
117
|
-
content={[
|
|
118
|
-
{
|
|
119
|
-
name: "input",
|
|
120
|
-
type: "Record<string, any>[]",
|
|
121
|
-
required: true,
|
|
122
|
-
description:
|
|
123
|
-
"Input records provided to the scorer. If the scorer is added to an agent, this will be an array of user messages, e.g. `[{ role: 'user', content: 'hello world' }]`. If the scorer is used in a workflow, this will be the input of the workflow.",
|
|
124
|
-
},
|
|
125
|
-
{
|
|
126
|
-
name: "output",
|
|
127
|
-
type: "Record<string, any>",
|
|
128
|
-
required: true,
|
|
129
|
-
description:
|
|
130
|
-
"Output record provided to the scorer. For agents, this is usually the agent's response. For workflows, this is the workflow's output.",
|
|
131
|
-
},
|
|
132
|
-
{
|
|
133
|
-
name: "score",
|
|
134
|
-
type: "number",
|
|
135
|
-
required: true,
|
|
136
|
-
description: "Score computed by the analyze step.",
|
|
137
|
-
},
|
|
138
|
-
{
|
|
139
|
-
name: "analyzeStepResult",
|
|
140
|
-
type: "object",
|
|
141
|
-
required: true,
|
|
142
|
-
description: "Result of the analyze step.",
|
|
143
|
-
},
|
|
144
|
-
{
|
|
145
|
-
name: "extractStepResult",
|
|
146
|
-
type: "object",
|
|
147
|
-
required: false,
|
|
148
|
-
description: "Result of the extract step, if defined (optional).",
|
|
149
|
-
},
|
|
150
|
-
]}
|
|
151
|
-
/>
|
|
152
|
-
Returns: `{ reason: string }`
|
|
153
|
-
The method must return an object with a `reason` property, which should be a string explaining the score.
|
|
154
|
-
|
|
155
|
-
All step functions can be async.
|
|
@@ -1,210 +0,0 @@
|
|
|
1
|
-
---
|
|
2
|
-
title: "Reference: createLLMScorer | Scorers | Mastra Docs"
|
|
3
|
-
description: Documentation for creating LLM-based scorers in Mastra, allowing users to define evaluation logic using language models.
|
|
4
|
-
---
|
|
5
|
-
|
|
6
|
-
# createLLMScorer
|
|
7
|
-
|
|
8
|
-
The `createLLMScorer()` function lets you define custom scorers that use a language model (LLM) as a judge for evaluation. LLM scorers are ideal for tasks where you want to use prompt-based evaluation, such as answer relevancy, faithfulness, or custom prompt-based metrics. LLM scorers integrate seamlessly with the Mastra scoring framework and can be used anywhere built-in scorers are used.
|
|
9
|
-
|
|
10
|
-
For a usage example, see the [Custom LLM Judge Examples](/examples/scorers/custom-llm-judge-eval).
|
|
11
|
-
|
|
12
|
-
## createLLMScorer Options
|
|
13
|
-
|
|
14
|
-
<PropertiesTable
|
|
15
|
-
content={[
|
|
16
|
-
{
|
|
17
|
-
name: "name",
|
|
18
|
-
type: "string",
|
|
19
|
-
required: true,
|
|
20
|
-
description: "Name of the scorer.",
|
|
21
|
-
},
|
|
22
|
-
{
|
|
23
|
-
name: "description",
|
|
24
|
-
type: "string",
|
|
25
|
-
required: true,
|
|
26
|
-
description: "Description of what the scorer does.",
|
|
27
|
-
},
|
|
28
|
-
{
|
|
29
|
-
name: "judge",
|
|
30
|
-
type: "object",
|
|
31
|
-
required: true,
|
|
32
|
-
description: "Judge configuration object. Must include a model and instructions (system prompt). See Judge Object section below.",
|
|
33
|
-
},
|
|
34
|
-
{
|
|
35
|
-
name: "extract",
|
|
36
|
-
type: "object",
|
|
37
|
-
required: false,
|
|
38
|
-
description: "(Optional) Extraction step configuration object. See Extract Object section below.",
|
|
39
|
-
},
|
|
40
|
-
{
|
|
41
|
-
name: "analyze",
|
|
42
|
-
type: "object",
|
|
43
|
-
required: true,
|
|
44
|
-
description: "Analysis step configuration object. See Analyze Object section below.",
|
|
45
|
-
},
|
|
46
|
-
{
|
|
47
|
-
name: "reason",
|
|
48
|
-
type: "object",
|
|
49
|
-
required: false,
|
|
50
|
-
description: "(Optional) Reason step configuration object. See Reason Object section below.",
|
|
51
|
-
},
|
|
52
|
-
{
|
|
53
|
-
name: "calculateScore",
|
|
54
|
-
type: "function",
|
|
55
|
-
required: true,
|
|
56
|
-
description: "Function: ({ run }) => number. Computes the final score from the analyze step result.",
|
|
57
|
-
},
|
|
58
|
-
]}
|
|
59
|
-
/>
|
|
60
|
-
|
|
61
|
-
This function returns an instance of the MastraScorer class. See the [MastraScorer reference](./mastra-scorer) for details on the `.run()` method and its input/output.
|
|
62
|
-
|
|
63
|
-
## Judge Object
|
|
64
|
-
<PropertiesTable
|
|
65
|
-
content={[
|
|
66
|
-
{
|
|
67
|
-
name: "model",
|
|
68
|
-
type: "LanguageModel",
|
|
69
|
-
required: true,
|
|
70
|
-
description: "The LLM model instance to use for evaluation.",
|
|
71
|
-
},
|
|
72
|
-
{
|
|
73
|
-
name: "instructions",
|
|
74
|
-
type: "string",
|
|
75
|
-
required: true,
|
|
76
|
-
description: "System prompt/instructions for the LLM.",
|
|
77
|
-
},
|
|
78
|
-
]}
|
|
79
|
-
/>
|
|
80
|
-
|
|
81
|
-
## Extract Object
|
|
82
|
-
<PropertiesTable
|
|
83
|
-
content={[
|
|
84
|
-
{
|
|
85
|
-
name: "description",
|
|
86
|
-
type: "string",
|
|
87
|
-
required: true,
|
|
88
|
-
description: "Description of the extract step.",
|
|
89
|
-
},
|
|
90
|
-
{
|
|
91
|
-
name: "judge",
|
|
92
|
-
type: "object",
|
|
93
|
-
required: false,
|
|
94
|
-
description: "(Optional) LLM judge for this step (can override main judge/model). See Judge Object section.",
|
|
95
|
-
},
|
|
96
|
-
{
|
|
97
|
-
name: "outputSchema",
|
|
98
|
-
type: "ZodSchema",
|
|
99
|
-
required: true,
|
|
100
|
-
description: "Zod schema for the expected output of the extract step.",
|
|
101
|
-
},
|
|
102
|
-
{
|
|
103
|
-
name: "createPrompt",
|
|
104
|
-
type: "function",
|
|
105
|
-
required: true,
|
|
106
|
-
description: "Function: ({ run: ScoringInput }) => string. Returns the prompt for the LLM.",
|
|
107
|
-
},
|
|
108
|
-
]}
|
|
109
|
-
/>
|
|
110
|
-
|
|
111
|
-
## Analyze Object
|
|
112
|
-
<PropertiesTable
|
|
113
|
-
content={[
|
|
114
|
-
{
|
|
115
|
-
name: "description",
|
|
116
|
-
type: "string",
|
|
117
|
-
required: true,
|
|
118
|
-
description: "Description of the analyze step.",
|
|
119
|
-
},
|
|
120
|
-
{
|
|
121
|
-
name: "judge",
|
|
122
|
-
type: "object",
|
|
123
|
-
required: false,
|
|
124
|
-
description: "(Optional) LLM judge for this step (can override main judge/model). See Judge Object section.",
|
|
125
|
-
},
|
|
126
|
-
{
|
|
127
|
-
name: "outputSchema",
|
|
128
|
-
type: "ZodSchema",
|
|
129
|
-
required: true,
|
|
130
|
-
description: "Zod schema for the expected output of the analyze step.",
|
|
131
|
-
},
|
|
132
|
-
{
|
|
133
|
-
name: "createPrompt",
|
|
134
|
-
type: "function",
|
|
135
|
-
required: true,
|
|
136
|
-
description: "Function: ({ run: ScoringInput & { extractStepResult } }) => string. Returns the LLM prompt.",
|
|
137
|
-
},
|
|
138
|
-
]}
|
|
139
|
-
/>
|
|
140
|
-
|
|
141
|
-
## Calculate Score Function
|
|
142
|
-
|
|
143
|
-
The `calculateScore` function converts the LLM's structured analysis into a numerical score. This function receives the results from previous steps but not the score itself (since that's what it calculates).
|
|
144
|
-
|
|
145
|
-
<PropertiesTable
|
|
146
|
-
content={[
|
|
147
|
-
{
|
|
148
|
-
name: "input",
|
|
149
|
-
type: "Record<string, any>[]",
|
|
150
|
-
required: true,
|
|
151
|
-
description:
|
|
152
|
-
"Input records provided to the scorer. If the scorer is added to an agent, this will be an array of user messages, e.g. `[{ role: 'user', content: 'hello world' }]`. If the scorer is used in a workflow, this will be the input of the workflow.",
|
|
153
|
-
},
|
|
154
|
-
{
|
|
155
|
-
name: "output",
|
|
156
|
-
type: "Record<string, any>",
|
|
157
|
-
required: true,
|
|
158
|
-
description:
|
|
159
|
-
"Output record provided to the scorer. For agents, this is usually the agent's response. For workflows, this is the workflow's output.",
|
|
160
|
-
},
|
|
161
|
-
{
|
|
162
|
-
name: "runtimeContext",
|
|
163
|
-
type: "object",
|
|
164
|
-
required: false,
|
|
165
|
-
description: "Runtime context from the agent or workflow step being evaluated (optional).",
|
|
166
|
-
},
|
|
167
|
-
{
|
|
168
|
-
name: "extractStepResult",
|
|
169
|
-
type: "object",
|
|
170
|
-
required: false,
|
|
171
|
-
description: "Result of the extract step, if defined (optional).",
|
|
172
|
-
},
|
|
173
|
-
{
|
|
174
|
-
name: "analyzeStepResult",
|
|
175
|
-
type: "object",
|
|
176
|
-
required: true,
|
|
177
|
-
description: "Structured result from the analyze step, conforming to the outputSchema defined in the analyze step.",
|
|
178
|
-
},
|
|
179
|
-
]}
|
|
180
|
-
/>
|
|
181
|
-
|
|
182
|
-
Returns: `number`
|
|
183
|
-
The function must return a numerical score, typically in the 0-1 range where 1 represents the best possible score.
|
|
184
|
-
|
|
185
|
-
## Reason Object
|
|
186
|
-
<PropertiesTable
|
|
187
|
-
content={[
|
|
188
|
-
{
|
|
189
|
-
name: "description",
|
|
190
|
-
type: "string",
|
|
191
|
-
required: true,
|
|
192
|
-
description: "Description of the reason step.",
|
|
193
|
-
},
|
|
194
|
-
{
|
|
195
|
-
name: "judge",
|
|
196
|
-
type: "object",
|
|
197
|
-
required: false,
|
|
198
|
-
description: "(Optional) LLM judge for this step (can override main judge/model). See Judge Object section.",
|
|
199
|
-
},
|
|
200
|
-
{
|
|
201
|
-
name: "createPrompt",
|
|
202
|
-
type: "function",
|
|
203
|
-
required: true,
|
|
204
|
-
description: "Function: ({ run }) => string. `run` includes input, output, extractStepResult, analyzeStepResult, and score. Returns the prompt for the LLM.",
|
|
205
|
-
},
|
|
206
|
-
]}
|
|
207
|
-
/>
|
|
208
|
-
|
|
209
|
-
LLM scorers may also include step-specific prompt fields in the return value, such as `extractPrompt`, `analyzePrompt`, and `reasonPrompt`.
|
|
210
|
-
|