@mastra/mcp-docs-server 0.13.10 → 0.13.11-alpha.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.docs/organized/changelogs/%40internal%2Fstorage-test-utils.md +9 -9
- package/.docs/organized/changelogs/%40internal%2Ftypes-builder.md +2 -0
- package/.docs/organized/changelogs/%40mastra%2Fclient-js.md +18 -18
- package/.docs/organized/changelogs/%40mastra%2Fcore.md +23 -23
- package/.docs/organized/changelogs/%40mastra%2Fdeployer-cloudflare.md +20 -20
- package/.docs/organized/changelogs/%40mastra%2Fdeployer-netlify.md +20 -20
- package/.docs/organized/changelogs/%40mastra%2Fdeployer-vercel.md +20 -20
- package/.docs/organized/changelogs/%40mastra%2Fdeployer.md +20 -20
- package/.docs/organized/changelogs/%40mastra%2Ffirecrawl.md +13 -13
- package/.docs/organized/changelogs/%40mastra%2Flibsql.md +9 -9
- package/.docs/organized/changelogs/%40mastra%2Fmcp-docs-server.md +18 -18
- package/.docs/organized/changelogs/%40mastra%2Fmemory.md +12 -12
- package/.docs/organized/changelogs/%40mastra%2Fpg.md +9 -9
- package/.docs/organized/changelogs/%40mastra%2Fplayground-ui.md +21 -21
- package/.docs/organized/changelogs/%40mastra%2Frag.md +12 -12
- package/.docs/organized/changelogs/%40mastra%2Fschema-compat.md +7 -0
- package/.docs/organized/changelogs/%40mastra%2Fserver.md +18 -18
- package/.docs/organized/changelogs/create-mastra.md +9 -9
- package/.docs/organized/changelogs/mastra.md +22 -22
- package/.docs/organized/code-examples/agent-network.md +4 -3
- package/.docs/organized/code-examples/agent.md +33 -2
- package/.docs/raw/agents/overview.mdx +21 -1
- package/.docs/raw/getting-started/mcp-docs-server.mdx +2 -2
- package/.docs/raw/rag/chunking-and-embedding.mdx +11 -0
- package/.docs/raw/reference/agents/agent.mdx +64 -38
- package/.docs/raw/reference/agents/generate.mdx +206 -202
- package/.docs/raw/reference/agents/getAgent.mdx +23 -38
- package/.docs/raw/reference/agents/getDefaultGenerateOptions.mdx +62 -0
- package/.docs/raw/reference/agents/getDefaultStreamOptions.mdx +62 -0
- package/.docs/raw/reference/agents/getDefaultVNextStreamOptions.mdx +62 -0
- package/.docs/raw/reference/agents/getDescription.mdx +30 -0
- package/.docs/raw/reference/agents/getInstructions.mdx +36 -73
- package/.docs/raw/reference/agents/getLLM.mdx +69 -0
- package/.docs/raw/reference/agents/getMemory.mdx +42 -119
- package/.docs/raw/reference/agents/getModel.mdx +36 -75
- package/.docs/raw/reference/agents/getScorers.mdx +62 -0
- package/.docs/raw/reference/agents/getTools.mdx +36 -128
- package/.docs/raw/reference/agents/getVoice.mdx +36 -83
- package/.docs/raw/reference/agents/getWorkflows.mdx +37 -74
- package/.docs/raw/reference/agents/stream.mdx +263 -226
- package/.docs/raw/reference/agents/streamVNext.mdx +208 -402
- package/.docs/raw/reference/rag/chunk.mdx +51 -2
- package/.docs/raw/reference/scorers/answer-relevancy.mdx +6 -6
- package/.docs/raw/reference/scorers/bias.mdx +6 -6
- package/.docs/raw/reference/scorers/completeness.mdx +2 -2
- package/.docs/raw/reference/scorers/content-similarity.mdx +1 -1
- package/.docs/raw/reference/scorers/create-scorer.mdx +445 -0
- package/.docs/raw/reference/scorers/faithfulness.mdx +6 -6
- package/.docs/raw/reference/scorers/hallucination.mdx +6 -6
- package/.docs/raw/reference/scorers/keyword-coverage.mdx +2 -2
- package/.docs/raw/reference/scorers/mastra-scorer.mdx +116 -158
- package/.docs/raw/reference/scorers/toxicity.mdx +2 -2
- package/.docs/raw/scorers/custom-scorers.mdx +166 -268
- package/.docs/raw/scorers/overview.mdx +21 -13
- package/.docs/raw/server-db/local-dev-playground.mdx +3 -3
- package/package.json +3 -3
- package/.docs/raw/reference/agents/createTool.mdx +0 -241
- package/.docs/raw/reference/scorers/custom-code-scorer.mdx +0 -155
- package/.docs/raw/reference/scorers/llm-scorer.mdx +0 -210
|
@@ -49,7 +49,7 @@ The following parameters are available for all chunking strategies.
|
|
|
49
49
|
content={[
|
|
50
50
|
{
|
|
51
51
|
name: "strategy",
|
|
52
|
-
type: "'recursive' | 'character' | 'token' | 'markdown' | 'html' | 'json' | 'latex' | 'sentence'",
|
|
52
|
+
type: "'recursive' | 'character' | 'token' | 'markdown' | 'semantic-markdown' | 'html' | 'json' | 'latex' | 'sentence'",
|
|
53
53
|
isOptional: true,
|
|
54
54
|
description:
|
|
55
55
|
"The chunking strategy to use. If not specified, defaults based on document type. Depending on the chunking strategy, there are additional optionals. Defaults: .md files → 'markdown', .html/.htm → 'html', .json → 'json', .tex → 'latex', others → 'recursive'",
|
|
@@ -106,11 +106,13 @@ The following parameters are available for all chunking strategies.
|
|
|
106
106
|
type: "ExtractParams",
|
|
107
107
|
isOptional: true,
|
|
108
108
|
description:
|
|
109
|
-
"Metadata extraction configuration.
|
|
109
|
+
"Metadata extraction configuration.",
|
|
110
110
|
},
|
|
111
111
|
]}
|
|
112
112
|
/>
|
|
113
113
|
|
|
114
|
+
See [ExtractParams reference](/reference/rag/extract-params.mdx) for details on the `extract` parameter.
|
|
115
|
+
|
|
114
116
|
## Strategy-Specific Options
|
|
115
117
|
|
|
116
118
|
Strategy-specific options are passed as top-level parameters alongside the strategy parameter. For example:
|
|
@@ -161,6 +163,13 @@ const chunks = await doc.chunk({
|
|
|
161
163
|
stripHeaders: true, // Markdown-specific option
|
|
162
164
|
});
|
|
163
165
|
|
|
166
|
+
// Semantic Markdown strategy example
|
|
167
|
+
const chunks = await doc.chunk({
|
|
168
|
+
strategy: "semantic-markdown",
|
|
169
|
+
joinThreshold: 500, // Semantic Markdown-specific option
|
|
170
|
+
modelName: "gpt-3.5-turbo", // Semantic Markdown-specific option
|
|
171
|
+
});
|
|
172
|
+
|
|
164
173
|
// Token strategy example
|
|
165
174
|
const chunks = await doc.chunk({
|
|
166
175
|
strategy: "token",
|
|
@@ -319,6 +328,46 @@ The options documented below are passed directly at the top level of the configu
|
|
|
319
328
|
|
|
320
329
|
**Important:** When using the `headers` option, the markdown strategy ignores all general options and content is split based on the markdown header structure. To use size-based chunking with markdown, omit the `headers` parameter.
|
|
321
330
|
|
|
331
|
+
### Semantic Markdown
|
|
332
|
+
|
|
333
|
+
<PropertiesTable
|
|
334
|
+
content={[
|
|
335
|
+
{
|
|
336
|
+
name: "joinThreshold",
|
|
337
|
+
type: "number",
|
|
338
|
+
isOptional: true,
|
|
339
|
+
defaultValue: "500",
|
|
340
|
+
description: "Maximum token count for merging related sections. Sections exceeding this limit individually are left intact, but smaller sections are merged with siblings or parents if the combined size stays under this threshold.",
|
|
341
|
+
},
|
|
342
|
+
{
|
|
343
|
+
name: "modelName",
|
|
344
|
+
type: "string",
|
|
345
|
+
isOptional: true,
|
|
346
|
+
description: "Name of the model for tokenization. If provided, the model's underlying tokenization `encodingName` will be used.",
|
|
347
|
+
},
|
|
348
|
+
{
|
|
349
|
+
name: "encodingName",
|
|
350
|
+
type: "string",
|
|
351
|
+
isOptional: true,
|
|
352
|
+
defaultValue: "cl100k_base",
|
|
353
|
+
description: "Name of the token encoding to use. Derived from `modelName` if available.",
|
|
354
|
+
},
|
|
355
|
+
{
|
|
356
|
+
name: "allowedSpecial",
|
|
357
|
+
type: "Set<string> | 'all'",
|
|
358
|
+
isOptional: true,
|
|
359
|
+
description: "Set of special tokens allowed during tokenization, or 'all' to allow all special tokens",
|
|
360
|
+
},
|
|
361
|
+
{
|
|
362
|
+
name: "disallowedSpecial",
|
|
363
|
+
type: "Set<string> | 'all'",
|
|
364
|
+
isOptional: true,
|
|
365
|
+
defaultValue: "all",
|
|
366
|
+
description: "Set of special tokens to disallow during tokenization, or 'all' to disallow all special tokens",
|
|
367
|
+
},
|
|
368
|
+
]}
|
|
369
|
+
/>
|
|
370
|
+
|
|
322
371
|
### Token
|
|
323
372
|
|
|
324
373
|
<PropertiesTable
|
|
@@ -53,12 +53,12 @@ This function returns an instance of the MastraScorer class. The `.run()` method
|
|
|
53
53
|
description: "Relevancy score (0 to scale, default 0-1)",
|
|
54
54
|
},
|
|
55
55
|
{
|
|
56
|
-
name: "
|
|
56
|
+
name: "preprocessPrompt",
|
|
57
57
|
type: "string",
|
|
58
|
-
description: "The prompt sent to the LLM for the
|
|
58
|
+
description: "The prompt sent to the LLM for the preprocess step (optional).",
|
|
59
59
|
},
|
|
60
60
|
{
|
|
61
|
-
name: "
|
|
61
|
+
name: "preprocessStepResult",
|
|
62
62
|
type: "object",
|
|
63
63
|
description: "Object with extracted statements: { statements: string[] }",
|
|
64
64
|
},
|
|
@@ -73,7 +73,7 @@ This function returns an instance of the MastraScorer class. The `.run()` method
|
|
|
73
73
|
description: "Object with results: { results: Array<{ result: 'yes' | 'unsure' | 'no', reason: string }> }",
|
|
74
74
|
},
|
|
75
75
|
{
|
|
76
|
-
name: "
|
|
76
|
+
name: "generateReasonPrompt",
|
|
77
77
|
type: "string",
|
|
78
78
|
description: "The prompt sent to the LLM for the reason step (optional).",
|
|
79
79
|
},
|
|
@@ -91,7 +91,7 @@ The scorer evaluates relevancy through query-answer alignment, considering compl
|
|
|
91
91
|
|
|
92
92
|
### Scoring Process
|
|
93
93
|
|
|
94
|
-
1. **Statement
|
|
94
|
+
1. **Statement Preprocess:**
|
|
95
95
|
- Breaks output into meaningful statements while preserving context.
|
|
96
96
|
2. **Relevance Analysis:**
|
|
97
97
|
- Each statement is evaluated as:
|
|
@@ -111,4 +111,4 @@ The scorer evaluates relevancy through query-answer alignment, considering compl
|
|
|
111
111
|
|
|
112
112
|
## Related
|
|
113
113
|
|
|
114
|
-
- [Faithfulness Scorer](./faithfulness)
|
|
114
|
+
- [Faithfulness Scorer](./faithfulness)
|
|
@@ -41,14 +41,14 @@ This function returns an instance of the MastraScorer class. The `.run()` method
|
|
|
41
41
|
description: "The id of the run (optional).",
|
|
42
42
|
},
|
|
43
43
|
{
|
|
44
|
-
name: "
|
|
44
|
+
name: "preprocessStepResult",
|
|
45
45
|
type: "object",
|
|
46
46
|
description: "Object with extracted opinions: { opinions: string[] }",
|
|
47
47
|
},
|
|
48
48
|
{
|
|
49
|
-
name: "
|
|
49
|
+
name: "preprocessPrompt",
|
|
50
50
|
type: "string",
|
|
51
|
-
description: "The prompt sent to the LLM for the
|
|
51
|
+
description: "The prompt sent to the LLM for the preprocess step (optional).",
|
|
52
52
|
},
|
|
53
53
|
{
|
|
54
54
|
name: "analyzeStepResult",
|
|
@@ -71,9 +71,9 @@ This function returns an instance of the MastraScorer class. The `.run()` method
|
|
|
71
71
|
description: "Explanation of the score.",
|
|
72
72
|
},
|
|
73
73
|
{
|
|
74
|
-
name: "
|
|
74
|
+
name: "generateReasonPrompt",
|
|
75
75
|
type: "string",
|
|
76
|
-
description: "The prompt sent to the LLM for the
|
|
76
|
+
description: "The prompt sent to the LLM for the generateReason step (optional).",
|
|
77
77
|
},
|
|
78
78
|
]}
|
|
79
79
|
/>
|
|
@@ -124,4 +124,4 @@ Final score: `(biased_opinions / total_opinions) * scale`
|
|
|
124
124
|
|
|
125
125
|
- [Toxicity Scorer](./toxicity)
|
|
126
126
|
- [Faithfulness Scorer](./faithfulness)
|
|
127
|
-
- [Hallucination Scorer](./hallucination)
|
|
127
|
+
- [Hallucination Scorer](./hallucination)
|
|
@@ -25,7 +25,7 @@ This function returns an instance of the MastraScorer class. See the [MastraScor
|
|
|
25
25
|
description: "The id of the run (optional).",
|
|
26
26
|
},
|
|
27
27
|
{
|
|
28
|
-
name: "
|
|
28
|
+
name: "preprocessStepResult",
|
|
29
29
|
type: "object",
|
|
30
30
|
description: "Object with extracted elements and coverage details: { inputElements: string[], outputElements: string[], missingElements: string[], elementCounts: { input: number, output: number } }",
|
|
31
31
|
},
|
|
@@ -86,4 +86,4 @@ Final score: `(covered_elements / total_input_elements) * scale`
|
|
|
86
86
|
- [Answer Relevancy Scorer](./answer-relevancy)
|
|
87
87
|
- [Content Similarity Scorer](./content-similarity)
|
|
88
88
|
- [Textual Difference Scorer](./textual-difference)
|
|
89
|
-
- [Keyword Coverage Scorer](./keyword-coverage)
|
|
89
|
+
- [Keyword Coverage Scorer](./keyword-coverage)
|
|
@@ -44,7 +44,7 @@ This function returns an instance of the MastraScorer class. See the [MastraScor
|
|
|
44
44
|
description: "The id of the run (optional).",
|
|
45
45
|
},
|
|
46
46
|
{
|
|
47
|
-
name: "
|
|
47
|
+
name: "preprocessStepResult",
|
|
48
48
|
type: "object",
|
|
49
49
|
description: "Object with processed input and output: { processedInput: string, processedOutput: string }",
|
|
50
50
|
},
|
|
@@ -0,0 +1,445 @@
|
|
|
1
|
+
---
|
|
2
|
+
title: "Reference: Create Custom Scorer | Scorers | Mastra Docs"
|
|
3
|
+
description: Documentation for creating custom scorers in Mastra, allowing users to define their own evaluation logic using either JavaScript functions or LLM-based prompts.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# createScorer
|
|
7
|
+
|
|
8
|
+
Mastra provides a unified `createScorer` factory that allows you to define custom scorers for evaluating input/output pairs. You can use either native JavaScript functions or LLM-based prompt objects for each evaluation step. Custom scorers can be added to Agents and Workflow steps.
|
|
9
|
+
|
|
10
|
+
## How to Create a Custom Scorer
|
|
11
|
+
|
|
12
|
+
Use the `createScorer` factory to define your scorer with a name, description, and optional judge configuration. Then chain step methods to build your evaluation pipeline. You must provide at least a `generateScore` step.
|
|
13
|
+
|
|
14
|
+
```typescript
|
|
15
|
+
const scorer = createScorer({
|
|
16
|
+
name: "My Custom Scorer",
|
|
17
|
+
description: "Evaluates responses based on custom criteria",
|
|
18
|
+
judge: {
|
|
19
|
+
model: myModel,
|
|
20
|
+
instructions: "You are an expert evaluator..."
|
|
21
|
+
}
|
|
22
|
+
})
|
|
23
|
+
.preprocess({ /* step config */ })
|
|
24
|
+
.analyze({ /* step config */ })
|
|
25
|
+
.generateScore(({ run, results }) => {
|
|
26
|
+
// Return a number
|
|
27
|
+
})
|
|
28
|
+
.generateReason({ /* step config */ });
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
## createScorer Options
|
|
32
|
+
|
|
33
|
+
<PropertiesTable
|
|
34
|
+
content={[
|
|
35
|
+
{
|
|
36
|
+
name: "name",
|
|
37
|
+
type: "string",
|
|
38
|
+
required: true,
|
|
39
|
+
description: "Name of the scorer.",
|
|
40
|
+
},
|
|
41
|
+
{
|
|
42
|
+
name: "description",
|
|
43
|
+
type: "string",
|
|
44
|
+
required: true,
|
|
45
|
+
description: "Description of what the scorer does.",
|
|
46
|
+
},
|
|
47
|
+
{
|
|
48
|
+
name: "judge",
|
|
49
|
+
type: "object",
|
|
50
|
+
required: false,
|
|
51
|
+
description: "Optional judge configuration for LLM-based steps. See Judge Object section below.",
|
|
52
|
+
},
|
|
53
|
+
]}
|
|
54
|
+
/>
|
|
55
|
+
|
|
56
|
+
This function returns a scorer builder that you can chain step methods onto. See the [MastraScorer reference](./mastra-scorer) for details on the `.run()` method and its input/output.
|
|
57
|
+
|
|
58
|
+
## Judge Object
|
|
59
|
+
|
|
60
|
+
<PropertiesTable
|
|
61
|
+
content={[
|
|
62
|
+
{
|
|
63
|
+
name: "model",
|
|
64
|
+
type: "LanguageModel",
|
|
65
|
+
required: true,
|
|
66
|
+
description: "The LLM model instance to use for evaluation.",
|
|
67
|
+
},
|
|
68
|
+
{
|
|
69
|
+
name: "instructions",
|
|
70
|
+
type: "string",
|
|
71
|
+
required: true,
|
|
72
|
+
description: "System prompt/instructions for the LLM.",
|
|
73
|
+
},
|
|
74
|
+
]}
|
|
75
|
+
/>
|
|
76
|
+
|
|
77
|
+
## Type Safety
|
|
78
|
+
|
|
79
|
+
For better type inference and IntelliSense support, you can specify input/output types when creating scorers:
|
|
80
|
+
|
|
81
|
+
```typescript
|
|
82
|
+
import { createScorer, ScorerRunInputForAgent, ScorerRunOutputForAgent } from '@mastra/core';
|
|
83
|
+
|
|
84
|
+
// For agent evaluation with full type safety
|
|
85
|
+
const agentScorer = createScorer<ScorerRunInputForAgent, ScorerRunOutputForAgent>({
|
|
86
|
+
name: 'Agent Response Quality',
|
|
87
|
+
description: 'Evaluates agent responses'
|
|
88
|
+
})
|
|
89
|
+
.preprocess(({ run }) => {
|
|
90
|
+
// run.input is typed as ScorerRunInputForAgent
|
|
91
|
+
const userMessage = run.input.inputMessages[0]?.content;
|
|
92
|
+
return { userMessage };
|
|
93
|
+
})
|
|
94
|
+
.generateScore(({ run, results }) => {
|
|
95
|
+
// run.output is typed as ScorerRunOutputForAgent
|
|
96
|
+
const response = run.output[0]?.content;
|
|
97
|
+
return response.length > 10 ? 1.0 : 0.5;
|
|
98
|
+
});
|
|
99
|
+
|
|
100
|
+
// For custom input/output types
|
|
101
|
+
type CustomInput = { query: string; context: string[] };
|
|
102
|
+
type CustomOutput = { answer: string; confidence: number };
|
|
103
|
+
|
|
104
|
+
const customScorer = createScorer<CustomInput, CustomOutput>({
|
|
105
|
+
name: 'Custom Scorer',
|
|
106
|
+
description: 'Evaluates custom data'
|
|
107
|
+
})
|
|
108
|
+
.generateScore(({ run }) => run.output.confidence);
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
### Built-in Agent Types
|
|
112
|
+
|
|
113
|
+
- **`ScorerRunInputForAgent`** - Contains `inputMessages`, `rememberedMessages`, `systemMessages`, and `taggedSystemMessages` for agent evaluation
|
|
114
|
+
- **`ScorerRunOutputForAgent`** - Array of agent response messages
|
|
115
|
+
|
|
116
|
+
Using these types provides autocomplete, compile-time validation, and better documentation for your scoring logic.
|
|
117
|
+
|
|
118
|
+
## Step Method Signatures
|
|
119
|
+
|
|
120
|
+
### preprocess
|
|
121
|
+
|
|
122
|
+
Optional preprocessing step that can extract or transform data before analysis.
|
|
123
|
+
|
|
124
|
+
**Function Mode:**
|
|
125
|
+
Function: `({ run, results }) => any`
|
|
126
|
+
|
|
127
|
+
<PropertiesTable
|
|
128
|
+
content={[
|
|
129
|
+
{
|
|
130
|
+
name: "run.input",
|
|
131
|
+
type: "any",
|
|
132
|
+
required: true,
|
|
133
|
+
description: "Input records provided to the scorer. If the scorer is added to an agent, this will be an array of user messages, e.g. `[{ role: 'user', content: 'hello world' }]`. If the scorer is used in a workflow, this will be the input of the workflow.",
|
|
134
|
+
},
|
|
135
|
+
{
|
|
136
|
+
name: "run.output",
|
|
137
|
+
type: "any",
|
|
138
|
+
required: true,
|
|
139
|
+
description: "Output record provided to the scorer. For agents, this is usually the agent's response. For workflows, this is the workflow's output.",
|
|
140
|
+
},
|
|
141
|
+
{
|
|
142
|
+
name: "run.runId",
|
|
143
|
+
type: "string",
|
|
144
|
+
required: true,
|
|
145
|
+
description: "Unique identifier for this scoring run.",
|
|
146
|
+
},
|
|
147
|
+
{
|
|
148
|
+
name: "run.runtimeContext",
|
|
149
|
+
type: "object",
|
|
150
|
+
required: false,
|
|
151
|
+
description: "Runtime context from the agent or workflow step being evaluated (optional).",
|
|
152
|
+
},
|
|
153
|
+
{
|
|
154
|
+
name: "results",
|
|
155
|
+
type: "object",
|
|
156
|
+
required: true,
|
|
157
|
+
description: "Empty object (no previous steps).",
|
|
158
|
+
},
|
|
159
|
+
]}
|
|
160
|
+
/>
|
|
161
|
+
|
|
162
|
+
Returns: `any`
|
|
163
|
+
The method can return any value. The returned value will be available to subsequent steps as `preprocessStepResult`.
|
|
164
|
+
|
|
165
|
+
**Prompt Object Mode:**
|
|
166
|
+
<PropertiesTable
|
|
167
|
+
content={[
|
|
168
|
+
{
|
|
169
|
+
name: "description",
|
|
170
|
+
type: "string",
|
|
171
|
+
required: true,
|
|
172
|
+
description: "Description of what this preprocessing step does.",
|
|
173
|
+
},
|
|
174
|
+
{
|
|
175
|
+
name: "outputSchema",
|
|
176
|
+
type: "ZodSchema",
|
|
177
|
+
required: true,
|
|
178
|
+
description: "Zod schema for the expected output of the preprocess step.",
|
|
179
|
+
},
|
|
180
|
+
{
|
|
181
|
+
name: "createPrompt",
|
|
182
|
+
type: "function",
|
|
183
|
+
required: true,
|
|
184
|
+
description: "Function: ({ run, results }) => string. Returns the prompt for the LLM.",
|
|
185
|
+
},
|
|
186
|
+
{
|
|
187
|
+
name: "judge",
|
|
188
|
+
type: "object",
|
|
189
|
+
required: false,
|
|
190
|
+
description: "(Optional) LLM judge for this step (can override main judge). See Judge Object section.",
|
|
191
|
+
},
|
|
192
|
+
]}
|
|
193
|
+
/>
|
|
194
|
+
|
|
195
|
+
### analyze
|
|
196
|
+
|
|
197
|
+
Optional analysis step that processes the input/output and any preprocessed data.
|
|
198
|
+
|
|
199
|
+
**Function Mode:**
|
|
200
|
+
Function: `({ run, results }) => any`
|
|
201
|
+
|
|
202
|
+
<PropertiesTable
|
|
203
|
+
content={[
|
|
204
|
+
{
|
|
205
|
+
name: "run.input",
|
|
206
|
+
type: "any",
|
|
207
|
+
required: true,
|
|
208
|
+
description: "Input records provided to the scorer. If the scorer is added to an agent, this will be an array of user messages, e.g. `[{ role: 'user', content: 'hello world' }]`. If the scorer is used in a workflow, this will be the input of the workflow.",
|
|
209
|
+
},
|
|
210
|
+
{
|
|
211
|
+
name: "run.output",
|
|
212
|
+
type: "any",
|
|
213
|
+
required: true,
|
|
214
|
+
description: "Output record provided to the scorer. For agents, this is usually the agent's response. For workflows, this is the workflow's output.",
|
|
215
|
+
},
|
|
216
|
+
{
|
|
217
|
+
name: "run.runId",
|
|
218
|
+
type: "string",
|
|
219
|
+
required: true,
|
|
220
|
+
description: "Unique identifier for this scoring run.",
|
|
221
|
+
},
|
|
222
|
+
{
|
|
223
|
+
name: "run.runtimeContext",
|
|
224
|
+
type: "object",
|
|
225
|
+
required: false,
|
|
226
|
+
description: "Runtime context from the agent or workflow step being evaluated (optional).",
|
|
227
|
+
},
|
|
228
|
+
{
|
|
229
|
+
name: "results.preprocessStepResult",
|
|
230
|
+
type: "any",
|
|
231
|
+
required: false,
|
|
232
|
+
description: "Result from preprocess step, if defined (optional).",
|
|
233
|
+
},
|
|
234
|
+
]}
|
|
235
|
+
/>
|
|
236
|
+
|
|
237
|
+
Returns: `any`
|
|
238
|
+
The method can return any value. The returned value will be available to subsequent steps as `analyzeStepResult`.
|
|
239
|
+
|
|
240
|
+
**Prompt Object Mode:**
|
|
241
|
+
<PropertiesTable
|
|
242
|
+
content={[
|
|
243
|
+
{
|
|
244
|
+
name: "description",
|
|
245
|
+
type: "string",
|
|
246
|
+
required: true,
|
|
247
|
+
description: "Description of what this analysis step does.",
|
|
248
|
+
},
|
|
249
|
+
{
|
|
250
|
+
name: "outputSchema",
|
|
251
|
+
type: "ZodSchema",
|
|
252
|
+
required: true,
|
|
253
|
+
description: "Zod schema for the expected output of the analyze step.",
|
|
254
|
+
},
|
|
255
|
+
{
|
|
256
|
+
name: "createPrompt",
|
|
257
|
+
type: "function",
|
|
258
|
+
required: true,
|
|
259
|
+
description: "Function: ({ run, results }) => string. Returns the prompt for the LLM.",
|
|
260
|
+
},
|
|
261
|
+
{
|
|
262
|
+
name: "judge",
|
|
263
|
+
type: "object",
|
|
264
|
+
required: false,
|
|
265
|
+
description: "(Optional) LLM judge for this step (can override main judge). See Judge Object section.",
|
|
266
|
+
},
|
|
267
|
+
]}
|
|
268
|
+
/>
|
|
269
|
+
|
|
270
|
+
### generateScore
|
|
271
|
+
|
|
272
|
+
**Required** step that computes the final numerical score.
|
|
273
|
+
|
|
274
|
+
**Function Mode:**
|
|
275
|
+
Function: `({ run, results }) => number`
|
|
276
|
+
|
|
277
|
+
<PropertiesTable
|
|
278
|
+
content={[
|
|
279
|
+
{
|
|
280
|
+
name: "run.input",
|
|
281
|
+
type: "any",
|
|
282
|
+
required: true,
|
|
283
|
+
description: "Input records provided to the scorer. If the scorer is added to an agent, this will be an array of user messages, e.g. `[{ role: 'user', content: 'hello world' }]`. If the scorer is used in a workflow, this will be the input of the workflow.",
|
|
284
|
+
},
|
|
285
|
+
{
|
|
286
|
+
name: "run.output",
|
|
287
|
+
type: "any",
|
|
288
|
+
required: true,
|
|
289
|
+
description: "Output record provided to the scorer. For agents, this is usually the agent's response. For workflows, this is the workflow's output.",
|
|
290
|
+
},
|
|
291
|
+
{
|
|
292
|
+
name: "run.runId",
|
|
293
|
+
type: "string",
|
|
294
|
+
required: true,
|
|
295
|
+
description: "Unique identifier for this scoring run.",
|
|
296
|
+
},
|
|
297
|
+
{
|
|
298
|
+
name: "run.runtimeContext",
|
|
299
|
+
type: "object",
|
|
300
|
+
required: false,
|
|
301
|
+
description: "Runtime context from the agent or workflow step being evaluated (optional).",
|
|
302
|
+
},
|
|
303
|
+
{
|
|
304
|
+
name: "results.preprocessStepResult",
|
|
305
|
+
type: "any",
|
|
306
|
+
required: false,
|
|
307
|
+
description: "Result from preprocess step, if defined (optional).",
|
|
308
|
+
},
|
|
309
|
+
{
|
|
310
|
+
name: "results.analyzeStepResult",
|
|
311
|
+
type: "any",
|
|
312
|
+
required: false,
|
|
313
|
+
description: "Result from analyze step, if defined (optional).",
|
|
314
|
+
},
|
|
315
|
+
]}
|
|
316
|
+
/>
|
|
317
|
+
|
|
318
|
+
Returns: `number`
|
|
319
|
+
The method must return a numerical score.
|
|
320
|
+
|
|
321
|
+
**Prompt Object Mode:**
|
|
322
|
+
<PropertiesTable
|
|
323
|
+
content={[
|
|
324
|
+
{
|
|
325
|
+
name: "description",
|
|
326
|
+
type: "string",
|
|
327
|
+
required: true,
|
|
328
|
+
description: "Description of what this scoring step does.",
|
|
329
|
+
},
|
|
330
|
+
{
|
|
331
|
+
name: "outputSchema",
|
|
332
|
+
type: "ZodSchema",
|
|
333
|
+
required: true,
|
|
334
|
+
description: "Zod schema for the expected output of the generateScore step.",
|
|
335
|
+
},
|
|
336
|
+
{
|
|
337
|
+
name: "createPrompt",
|
|
338
|
+
type: "function",
|
|
339
|
+
required: true,
|
|
340
|
+
description: "Function: ({ run, results }) => string. Returns the prompt for the LLM.",
|
|
341
|
+
},
|
|
342
|
+
{
|
|
343
|
+
name: "judge",
|
|
344
|
+
type: "object",
|
|
345
|
+
required: false,
|
|
346
|
+
description: "(Optional) LLM judge for this step (can override main judge). See Judge Object section.",
|
|
347
|
+
},
|
|
348
|
+
]}
|
|
349
|
+
/>
|
|
350
|
+
|
|
351
|
+
When using prompt object mode, you must also provide a `calculateScore` function to convert the LLM output to a numerical score:
|
|
352
|
+
|
|
353
|
+
<PropertiesTable
|
|
354
|
+
content={[
|
|
355
|
+
{
|
|
356
|
+
name: "calculateScore",
|
|
357
|
+
type: "function",
|
|
358
|
+
required: true,
|
|
359
|
+
description: "Function: ({ run, results, analyzeStepResult }) => number. Converts the LLM's structured output into a numerical score.",
|
|
360
|
+
},
|
|
361
|
+
]}
|
|
362
|
+
/>
|
|
363
|
+
|
|
364
|
+
### generateReason
|
|
365
|
+
|
|
366
|
+
Optional step that provides an explanation for the score.
|
|
367
|
+
|
|
368
|
+
**Function Mode:**
|
|
369
|
+
Function: `({ run, results, score }) => string`
|
|
370
|
+
|
|
371
|
+
<PropertiesTable
|
|
372
|
+
content={[
|
|
373
|
+
{
|
|
374
|
+
name: "run.input",
|
|
375
|
+
type: "any",
|
|
376
|
+
required: true,
|
|
377
|
+
description: "Input records provided to the scorer. If the scorer is added to an agent, this will be an array of user messages, e.g. `[{ role: 'user', content: 'hello world' }]`. If the scorer is used in a workflow, this will be the input of the workflow.",
|
|
378
|
+
},
|
|
379
|
+
{
|
|
380
|
+
name: "run.output",
|
|
381
|
+
type: "any",
|
|
382
|
+
required: true,
|
|
383
|
+
description: "Output record provided to the scorer. For agents, this is usually the agent's response. For workflows, this is the workflow's output.",
|
|
384
|
+
},
|
|
385
|
+
{
|
|
386
|
+
name: "run.runId",
|
|
387
|
+
type: "string",
|
|
388
|
+
required: true,
|
|
389
|
+
description: "Unique identifier for this scoring run.",
|
|
390
|
+
},
|
|
391
|
+
{
|
|
392
|
+
name: "run.runtimeContext",
|
|
393
|
+
type: "object",
|
|
394
|
+
required: false,
|
|
395
|
+
description: "Runtime context from the agent or workflow step being evaluated (optional).",
|
|
396
|
+
},
|
|
397
|
+
{
|
|
398
|
+
name: "results.preprocessStepResult",
|
|
399
|
+
type: "any",
|
|
400
|
+
required: false,
|
|
401
|
+
description: "Result from preprocess step, if defined (optional).",
|
|
402
|
+
},
|
|
403
|
+
{
|
|
404
|
+
name: "results.analyzeStepResult",
|
|
405
|
+
type: "any",
|
|
406
|
+
required: false,
|
|
407
|
+
description: "Result from analyze step, if defined (optional).",
|
|
408
|
+
},
|
|
409
|
+
{
|
|
410
|
+
name: "score",
|
|
411
|
+
type: "number",
|
|
412
|
+
required: true,
|
|
413
|
+
description: "Score computed by the generateScore step.",
|
|
414
|
+
},
|
|
415
|
+
]}
|
|
416
|
+
/>
|
|
417
|
+
|
|
418
|
+
Returns: `string`
|
|
419
|
+
The method must return a string explaining the score.
|
|
420
|
+
|
|
421
|
+
**Prompt Object Mode:**
|
|
422
|
+
<PropertiesTable
|
|
423
|
+
content={[
|
|
424
|
+
{
|
|
425
|
+
name: "description",
|
|
426
|
+
type: "string",
|
|
427
|
+
required: true,
|
|
428
|
+
description: "Description of what this reasoning step does.",
|
|
429
|
+
},
|
|
430
|
+
{
|
|
431
|
+
name: "createPrompt",
|
|
432
|
+
type: "function",
|
|
433
|
+
required: true,
|
|
434
|
+
description: "Function: ({ run, results, score }) => string. Returns the prompt for the LLM.",
|
|
435
|
+
},
|
|
436
|
+
{
|
|
437
|
+
name: "judge",
|
|
438
|
+
type: "object",
|
|
439
|
+
required: false,
|
|
440
|
+
description: "(Optional) LLM judge for this step (can override main judge). See Judge Object section.",
|
|
441
|
+
},
|
|
442
|
+
]}
|
|
443
|
+
/>
|
|
444
|
+
|
|
445
|
+
All step functions can be async.
|
|
@@ -49,14 +49,14 @@ This function returns an instance of the MastraScorer class. The `.run()` method
|
|
|
49
49
|
description: "The id of the run (optional).",
|
|
50
50
|
},
|
|
51
51
|
{
|
|
52
|
-
name: "
|
|
52
|
+
name: "preprocessStepResult",
|
|
53
53
|
type: "string[]",
|
|
54
54
|
description: "Array of extracted claims from the output.",
|
|
55
55
|
},
|
|
56
56
|
{
|
|
57
|
-
name: "
|
|
57
|
+
name: "preprocessPrompt",
|
|
58
58
|
type: "string",
|
|
59
|
-
description: "The prompt sent to the LLM for the
|
|
59
|
+
description: "The prompt sent to the LLM for the preprocess step (optional).",
|
|
60
60
|
},
|
|
61
61
|
{
|
|
62
62
|
name: "analyzeStepResult",
|
|
@@ -79,9 +79,9 @@ This function returns an instance of the MastraScorer class. The `.run()` method
|
|
|
79
79
|
description: "A detailed explanation of the score, including which claims were supported, contradicted, or marked as unsure.",
|
|
80
80
|
},
|
|
81
81
|
{
|
|
82
|
-
name: "
|
|
82
|
+
name: "generateReasonPrompt",
|
|
83
83
|
type: "string",
|
|
84
|
-
description: "The prompt sent to the LLM for the
|
|
84
|
+
description: "The prompt sent to the LLM for the generateReason step (optional).",
|
|
85
85
|
},
|
|
86
86
|
]}
|
|
87
87
|
/>
|
|
@@ -119,4 +119,4 @@ Final score: `(supported_claims / total_claims) * scale`
|
|
|
119
119
|
## Related
|
|
120
120
|
|
|
121
121
|
- [Answer Relevancy Scorer](./answer-relevancy)
|
|
122
|
-
- [Hallucination Scorer](./hallucination)
|
|
122
|
+
- [Hallucination Scorer](./hallucination)
|