@mastra/mcp-docs-server 0.13.7-alpha.0 → 0.13.7-alpha.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.docs/organized/changelogs/%40mastra%2Fclient-js.md +39 -39
- package/.docs/organized/changelogs/%40mastra%2Fcloudflare-d1.md +18 -18
- package/.docs/organized/changelogs/%40mastra%2Fcloudflare.md +18 -18
- package/.docs/organized/changelogs/%40mastra%2Fcore.md +45 -45
- package/.docs/organized/changelogs/%40mastra%2Fdeployer-cloudflare.md +21 -21
- package/.docs/organized/changelogs/%40mastra%2Fdeployer.md +44 -44
- package/.docs/organized/changelogs/%40mastra%2Fevals.md +11 -11
- package/.docs/organized/changelogs/%40mastra%2Flibsql.md +29 -29
- package/.docs/organized/changelogs/%40mastra%2Fmcp-docs-server.md +25 -25
- package/.docs/organized/changelogs/%40mastra%2Fmemory.md +39 -39
- package/.docs/organized/changelogs/%40mastra%2Fmongodb.md +20 -20
- package/.docs/organized/changelogs/%40mastra%2Fmssql.md +17 -0
- package/.docs/organized/changelogs/%40mastra%2Fpg.md +29 -29
- package/.docs/organized/changelogs/%40mastra%2Fplayground-ui.md +12 -12
- package/.docs/organized/changelogs/%40mastra%2Fserver.md +38 -38
- package/.docs/organized/changelogs/%40mastra%2Fupstash.md +29 -29
- package/.docs/organized/changelogs/%40mastra%2Fvectorize.md +18 -18
- package/.docs/organized/changelogs/%40mastra%2Fvoice-cloudflare.md +18 -18
- package/.docs/organized/changelogs/create-mastra.md +7 -7
- package/.docs/organized/changelogs/mastra.md +32 -32
- package/.docs/organized/code-examples/agent.md +93 -3
- package/.docs/organized/code-examples/ai-sdk-v5.md +4 -4
- package/.docs/raw/agents/input-processors.mdx +268 -0
- package/.docs/raw/agents/using-tools-and-mcp.mdx +39 -0
- package/.docs/raw/community/contributing-templates.mdx +192 -0
- package/.docs/raw/getting-started/installation.mdx +16 -0
- package/.docs/raw/getting-started/templates.mdx +95 -0
- package/.docs/raw/observability/tracing.mdx +44 -0
- package/.docs/raw/reference/agents/agent.mdx +7 -0
- package/.docs/raw/reference/agents/generate.mdx +18 -1
- package/.docs/raw/reference/agents/stream.mdx +18 -1
- package/.docs/raw/reference/cli/dev.mdx +6 -0
- package/.docs/raw/reference/client-js/memory.mdx +18 -0
- package/.docs/raw/reference/core/mastra-class.mdx +1 -1
- package/.docs/raw/reference/memory/Memory.mdx +1 -0
- package/.docs/raw/reference/memory/deleteMessages.mdx +95 -0
- package/.docs/raw/reference/memory/getThreadsByResourceId.mdx +33 -1
- package/.docs/raw/reference/rag/upstash.mdx +112 -5
- package/.docs/raw/reference/scorers/answer-relevancy.mdx +114 -0
- package/.docs/raw/reference/scorers/bias.mdx +127 -0
- package/.docs/raw/reference/scorers/completeness.mdx +89 -0
- package/.docs/raw/reference/scorers/content-similarity.mdx +96 -0
- package/.docs/raw/reference/scorers/custom-code-scorer.mdx +155 -0
- package/.docs/raw/reference/scorers/faithfulness.mdx +122 -0
- package/.docs/raw/reference/scorers/hallucination.mdx +133 -0
- package/.docs/raw/reference/scorers/keyword-coverage.mdx +92 -0
- package/.docs/raw/reference/scorers/llm-scorer.mdx +210 -0
- package/.docs/raw/reference/scorers/mastra-scorer.mdx +218 -0
- package/.docs/raw/reference/scorers/textual-difference.mdx +76 -0
- package/.docs/raw/reference/scorers/tone-consistency.mdx +75 -0
- package/.docs/raw/reference/scorers/toxicity.mdx +109 -0
- package/.docs/raw/reference/storage/libsql.mdx +7 -4
- package/.docs/raw/reference/storage/mssql.mdx +7 -3
- package/.docs/raw/reference/storage/postgresql.mdx +7 -3
- package/.docs/raw/reference/templates.mdx +228 -0
- package/.docs/raw/scorers/custom-scorers.mdx +319 -0
- package/.docs/raw/scorers/off-the-shelf-scorers.mdx +30 -0
- package/.docs/raw/scorers/overview.mdx +124 -0
- package/package.json +4 -4
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
---
|
|
2
|
+
title: "Reference: Hallucination | Scorers | Mastra Docs"
|
|
3
|
+
description: Documentation for the Hallucination Scorer in Mastra, which evaluates the factual correctness of LLM outputs by identifying contradictions with provided context.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# Hallucination Scorer
|
|
7
|
+
|
|
8
|
+
The `createHallucinationScorer()` function evaluates whether an LLM generates factually correct information by comparing its output against the provided context. This scorer measures hallucination by identifying direct contradictions between the context and the output.
|
|
9
|
+
|
|
10
|
+
For a usage example, see the [Hallucination Examples](/examples/scorers/hallucination).
|
|
11
|
+
|
|
12
|
+
## Parameters
|
|
13
|
+
|
|
14
|
+
The `createHallucinationScorer()` function accepts a single options object with the following properties:
|
|
15
|
+
|
|
16
|
+
<PropertiesTable
|
|
17
|
+
content={[
|
|
18
|
+
{
|
|
19
|
+
name: "model",
|
|
20
|
+
type: "LanguageModel",
|
|
21
|
+
required: true,
|
|
22
|
+
description: "Configuration for the model used to evaluate hallucination.",
|
|
23
|
+
},
|
|
24
|
+
{
|
|
25
|
+
name: "scale",
|
|
26
|
+
type: "number",
|
|
27
|
+
required: false,
|
|
28
|
+
defaultValue: "1",
|
|
29
|
+
description: "Maximum score value.",
|
|
30
|
+
},
|
|
31
|
+
]}
|
|
32
|
+
/>
|
|
33
|
+
|
|
34
|
+
This function returns an instance of the MastraScorer class. The `.run()` method accepts the same input as other scorers (see the [MastraScorer reference](./mastra-scorer)), but the return value includes LLM-specific fields as documented below.
|
|
35
|
+
|
|
36
|
+
## .run() Returns
|
|
37
|
+
|
|
38
|
+
<PropertiesTable
|
|
39
|
+
content={[
|
|
40
|
+
{
|
|
41
|
+
name: "runId",
|
|
42
|
+
type: "string",
|
|
43
|
+
description: "The id of the run (optional).",
|
|
44
|
+
},
|
|
45
|
+
{
|
|
46
|
+
name: "extractStepResult",
|
|
47
|
+
type: "object",
|
|
48
|
+
description: "Object with extracted claims: { claims: string[] }",
|
|
49
|
+
},
|
|
50
|
+
{
|
|
51
|
+
name: "extractPrompt",
|
|
52
|
+
type: "string",
|
|
53
|
+
description: "The prompt sent to the LLM for the extract step (optional).",
|
|
54
|
+
},
|
|
55
|
+
{
|
|
56
|
+
name: "analyzeStepResult",
|
|
57
|
+
type: "object",
|
|
58
|
+
description: "Object with verdicts: { verdicts: Array<{ statement: string, verdict: 'yes' | 'no', reason: string }> }",
|
|
59
|
+
},
|
|
60
|
+
{
|
|
61
|
+
name: "analyzePrompt",
|
|
62
|
+
type: "string",
|
|
63
|
+
description: "The prompt sent to the LLM for the analyze step (optional).",
|
|
64
|
+
},
|
|
65
|
+
{
|
|
66
|
+
name: "score",
|
|
67
|
+
type: "number",
|
|
68
|
+
description: "Hallucination score (0 to scale, default 0-1).",
|
|
69
|
+
},
|
|
70
|
+
{
|
|
71
|
+
name: "reason",
|
|
72
|
+
type: "string",
|
|
73
|
+
description: "Detailed explanation of the score and identified contradictions.",
|
|
74
|
+
},
|
|
75
|
+
{
|
|
76
|
+
name: "reasonPrompt",
|
|
77
|
+
type: "string",
|
|
78
|
+
description: "The prompt sent to the LLM for the reason step (optional).",
|
|
79
|
+
},
|
|
80
|
+
]}
|
|
81
|
+
/>
|
|
82
|
+
|
|
83
|
+
## Scoring Details
|
|
84
|
+
|
|
85
|
+
The scorer evaluates hallucination through contradiction detection and unsupported claim analysis.
|
|
86
|
+
|
|
87
|
+
### Scoring Process
|
|
88
|
+
|
|
89
|
+
1. Analyzes factual content:
|
|
90
|
+
- Extracts statements from context
|
|
91
|
+
- Identifies numerical values and dates
|
|
92
|
+
- Maps statement relationships
|
|
93
|
+
2. Analyzes output for hallucinations:
|
|
94
|
+
- Compares against context statements
|
|
95
|
+
- Marks direct conflicts as hallucinations
|
|
96
|
+
- Identifies unsupported claims as hallucinations
|
|
97
|
+
- Evaluates numerical accuracy
|
|
98
|
+
- Considers approximation context
|
|
99
|
+
3. Calculates hallucination score:
|
|
100
|
+
- Counts hallucinated statements (contradictions and unsupported claims)
|
|
101
|
+
- Divides by total statements
|
|
102
|
+
- Scales to configured range
|
|
103
|
+
|
|
104
|
+
Final score: `(hallucinated_statements / total_statements) * scale`
|
|
105
|
+
|
|
106
|
+
### Important Considerations
|
|
107
|
+
|
|
108
|
+
- Claims not present in context are treated as hallucinations
|
|
109
|
+
- Subjective claims are hallucinations unless explicitly supported
|
|
110
|
+
- Speculative language ("might", "possibly") about facts IN context is allowed
|
|
111
|
+
- Speculative language about facts NOT in context is treated as hallucination
|
|
112
|
+
- Empty outputs result in zero hallucinations
|
|
113
|
+
- Numerical evaluation considers:
|
|
114
|
+
- Scale-appropriate precision
|
|
115
|
+
- Contextual approximations
|
|
116
|
+
- Explicit precision indicators
|
|
117
|
+
|
|
118
|
+
### Score interpretation
|
|
119
|
+
|
|
120
|
+
(0 to scale, default 0-1)
|
|
121
|
+
|
|
122
|
+
- 1.0: Complete hallucination - contradicts all context statements
|
|
123
|
+
- 0.75: High hallucination - contradicts 75% of context statements
|
|
124
|
+
- 0.5: Moderate hallucination - contradicts half of context statements
|
|
125
|
+
- 0.25: Low hallucination - contradicts 25% of context statements
|
|
126
|
+
- 0.0: No hallucination - output aligns with all context statements
|
|
127
|
+
|
|
128
|
+
**Note:** The score represents the degree of hallucination - lower scores indicate better factual alignment with the provided context
|
|
129
|
+
|
|
130
|
+
## Related
|
|
131
|
+
|
|
132
|
+
- [Faithfulness Scorer](./faithfulness)
|
|
133
|
+
- [Answer Relevancy Scorer](./answer-relevancy)
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
---
|
|
2
|
+
title: "Reference: Keyword Coverage | Scorers | Mastra Docs"
|
|
3
|
+
description: Documentation for the Keyword Coverage Scorer in Mastra, which evaluates how well LLM outputs cover important keywords from the input.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# Keyword Coverage Scorer
|
|
7
|
+
|
|
8
|
+
The `createKeywordCoverageScorer()` function evaluates how well an LLM's output covers the important keywords from the input. It analyzes keyword presence and matches while ignoring common words and stop words.
|
|
9
|
+
|
|
10
|
+
For a usage example, see the [Keyword Coverage Examples](/examples/scorers/keyword-coverage).
|
|
11
|
+
|
|
12
|
+
## Parameters
|
|
13
|
+
|
|
14
|
+
The `createKeywordCoverageScorer()` function does not take any options.
|
|
15
|
+
|
|
16
|
+
This function returns an instance of the MastraScorer class. See the [MastraScorer reference](./mastra-scorer) for details on the `.run()` method and its input/output.
|
|
17
|
+
|
|
18
|
+
## .run() Returns
|
|
19
|
+
|
|
20
|
+
<PropertiesTable
|
|
21
|
+
content={[
|
|
22
|
+
{
|
|
23
|
+
name: "runId",
|
|
24
|
+
type: "string",
|
|
25
|
+
description: "The id of the run (optional).",
|
|
26
|
+
},
|
|
27
|
+
{
|
|
28
|
+
name: "extractStepResult",
|
|
29
|
+
type: "object",
|
|
30
|
+
description: "Object with extracted keywords: { referenceKeywords: Set<string>, responseKeywords: Set<string> }",
|
|
31
|
+
},
|
|
32
|
+
{
|
|
33
|
+
name: "analyzeStepResult",
|
|
34
|
+
type: "object",
|
|
35
|
+
description: "Object with keyword coverage: { totalKeywords: number, matchedKeywords: number }",
|
|
36
|
+
},
|
|
37
|
+
{
|
|
38
|
+
name: "score",
|
|
39
|
+
type: "number",
|
|
40
|
+
description: "Coverage score (0-1) representing the proportion of matched keywords.",
|
|
41
|
+
},
|
|
42
|
+
]}
|
|
43
|
+
/>
|
|
44
|
+
|
|
45
|
+
## Scoring Details
|
|
46
|
+
|
|
47
|
+
The scorer evaluates keyword coverage by matching keywords with the following features:
|
|
48
|
+
|
|
49
|
+
- Common word and stop word filtering (e.g., "the", "a", "and")
|
|
50
|
+
- Case-insensitive matching
|
|
51
|
+
- Word form variation handling
|
|
52
|
+
- Special handling of technical terms and compound words
|
|
53
|
+
|
|
54
|
+
### Scoring Process
|
|
55
|
+
|
|
56
|
+
1. Processes keywords from input and output:
|
|
57
|
+
- Filters out common words and stop words
|
|
58
|
+
- Normalizes case and word forms
|
|
59
|
+
- Handles special terms and compounds
|
|
60
|
+
2. Calculates keyword coverage:
|
|
61
|
+
- Matches keywords between texts
|
|
62
|
+
- Counts successful matches
|
|
63
|
+
- Computes coverage ratio
|
|
64
|
+
|
|
65
|
+
Final score: `(matched_keywords / total_keywords) * scale`
|
|
66
|
+
|
|
67
|
+
### Score interpretation
|
|
68
|
+
|
|
69
|
+
(0 to scale, default 0-1)
|
|
70
|
+
|
|
71
|
+
- 1.0: Perfect keyword coverage
|
|
72
|
+
- 0.7-0.9: Good coverage with most keywords present
|
|
73
|
+
- 0.4-0.6: Moderate coverage with some keywords missing
|
|
74
|
+
- 0.1-0.3: Poor coverage with many keywords missing
|
|
75
|
+
- 0.0: No keyword matches
|
|
76
|
+
|
|
77
|
+
## Special Cases
|
|
78
|
+
|
|
79
|
+
The scorer handles several special cases:
|
|
80
|
+
|
|
81
|
+
- Empty input/output: Returns score of 1.0 if both empty, 0.0 if only one is empty
|
|
82
|
+
- Single word: Treated as a single keyword
|
|
83
|
+
- Technical terms: Preserves compound technical terms (e.g., "React.js", "machine learning")
|
|
84
|
+
- Case differences: "JavaScript" matches "javascript"
|
|
85
|
+
- Common words: Ignored in scoring to focus on meaningful keywords
|
|
86
|
+
|
|
87
|
+
## Related
|
|
88
|
+
|
|
89
|
+
- [Completeness Scorer](./completeness)
|
|
90
|
+
- [Content Similarity Scorer](./content-similarity)
|
|
91
|
+
- [Answer Relevancy Scorer](./answer-relevancy)
|
|
92
|
+
- [Textual Difference Scorer](./textual-difference)
|
|
@@ -0,0 +1,210 @@
|
|
|
1
|
+
---
|
|
2
|
+
title: "Reference: createLLMScorer | Scorers | Mastra Docs"
|
|
3
|
+
description: Documentation for creating LLM-based scorers in Mastra, allowing users to define evaluation logic using language models.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# createLLMScorer
|
|
7
|
+
|
|
8
|
+
The `createLLMScorer()` function lets you define custom scorers that use a language model (LLM) as a judge for evaluation. LLM scorers are ideal for tasks where you want to use prompt-based evaluation, such as answer relevancy, faithfulness, or custom prompt-based metrics. LLM scorers integrate seamlessly with the Mastra scoring framework and can be used anywhere built-in scorers are used.
|
|
9
|
+
|
|
10
|
+
For a usage example, see the [Custom LLM Judge Examples](/examples/scorers/custom-llm-judge-eval).
|
|
11
|
+
|
|
12
|
+
## createLLMScorer Options
|
|
13
|
+
|
|
14
|
+
<PropertiesTable
|
|
15
|
+
content={[
|
|
16
|
+
{
|
|
17
|
+
name: "name",
|
|
18
|
+
type: "string",
|
|
19
|
+
required: true,
|
|
20
|
+
description: "Name of the scorer.",
|
|
21
|
+
},
|
|
22
|
+
{
|
|
23
|
+
name: "description",
|
|
24
|
+
type: "string",
|
|
25
|
+
required: true,
|
|
26
|
+
description: "Description of what the scorer does.",
|
|
27
|
+
},
|
|
28
|
+
{
|
|
29
|
+
name: "judge",
|
|
30
|
+
type: "object",
|
|
31
|
+
required: true,
|
|
32
|
+
description: "Judge configuration object. Must include a model and instructions (system prompt). See Judge Object section below.",
|
|
33
|
+
},
|
|
34
|
+
{
|
|
35
|
+
name: "extract",
|
|
36
|
+
type: "object",
|
|
37
|
+
required: false,
|
|
38
|
+
description: "(Optional) Extraction step configuration object. See Extract Object section below.",
|
|
39
|
+
},
|
|
40
|
+
{
|
|
41
|
+
name: "analyze",
|
|
42
|
+
type: "object",
|
|
43
|
+
required: true,
|
|
44
|
+
description: "Analysis step configuration object. See Analyze Object section below.",
|
|
45
|
+
},
|
|
46
|
+
{
|
|
47
|
+
name: "reason",
|
|
48
|
+
type: "object",
|
|
49
|
+
required: false,
|
|
50
|
+
description: "(Optional) Reason step configuration object. See Reason Object section below.",
|
|
51
|
+
},
|
|
52
|
+
{
|
|
53
|
+
name: "calculateScore",
|
|
54
|
+
type: "function",
|
|
55
|
+
required: true,
|
|
56
|
+
description: "Function: ({ run }) => number. Computes the final score from the analyze step result.",
|
|
57
|
+
},
|
|
58
|
+
]}
|
|
59
|
+
/>
|
|
60
|
+
|
|
61
|
+
This function returns an instance of the MastraScorer class. See the [MastraScorer reference](./mastra-scorer) for details on the `.run()` method and its input/output.
|
|
62
|
+
|
|
63
|
+
## Judge Object
|
|
64
|
+
<PropertiesTable
|
|
65
|
+
content={[
|
|
66
|
+
{
|
|
67
|
+
name: "model",
|
|
68
|
+
type: "LanguageModel",
|
|
69
|
+
required: true,
|
|
70
|
+
description: "The LLM model instance to use for evaluation.",
|
|
71
|
+
},
|
|
72
|
+
{
|
|
73
|
+
name: "instructions",
|
|
74
|
+
type: "string",
|
|
75
|
+
required: true,
|
|
76
|
+
description: "System prompt/instructions for the LLM.",
|
|
77
|
+
},
|
|
78
|
+
]}
|
|
79
|
+
/>
|
|
80
|
+
|
|
81
|
+
## Extract Object
|
|
82
|
+
<PropertiesTable
|
|
83
|
+
content={[
|
|
84
|
+
{
|
|
85
|
+
name: "description",
|
|
86
|
+
type: "string",
|
|
87
|
+
required: true,
|
|
88
|
+
description: "Description of the extract step.",
|
|
89
|
+
},
|
|
90
|
+
{
|
|
91
|
+
name: "judge",
|
|
92
|
+
type: "object",
|
|
93
|
+
required: false,
|
|
94
|
+
description: "(Optional) LLM judge for this step (can override main judge/model). See Judge Object section.",
|
|
95
|
+
},
|
|
96
|
+
{
|
|
97
|
+
name: "outputSchema",
|
|
98
|
+
type: "ZodSchema",
|
|
99
|
+
required: true,
|
|
100
|
+
description: "Zod schema for the expected output of the extract step.",
|
|
101
|
+
},
|
|
102
|
+
{
|
|
103
|
+
name: "createPrompt",
|
|
104
|
+
type: "function",
|
|
105
|
+
required: true,
|
|
106
|
+
description: "Function: ({ run: ScoringInput }) => string. Returns the prompt for the LLM.",
|
|
107
|
+
},
|
|
108
|
+
]}
|
|
109
|
+
/>
|
|
110
|
+
|
|
111
|
+
## Analyze Object
|
|
112
|
+
<PropertiesTable
|
|
113
|
+
content={[
|
|
114
|
+
{
|
|
115
|
+
name: "description",
|
|
116
|
+
type: "string",
|
|
117
|
+
required: true,
|
|
118
|
+
description: "Description of the analyze step.",
|
|
119
|
+
},
|
|
120
|
+
{
|
|
121
|
+
name: "judge",
|
|
122
|
+
type: "object",
|
|
123
|
+
required: false,
|
|
124
|
+
description: "(Optional) LLM judge for this step (can override main judge/model). See Judge Object section.",
|
|
125
|
+
},
|
|
126
|
+
{
|
|
127
|
+
name: "outputSchema",
|
|
128
|
+
type: "ZodSchema",
|
|
129
|
+
required: true,
|
|
130
|
+
description: "Zod schema for the expected output of the analyze step.",
|
|
131
|
+
},
|
|
132
|
+
{
|
|
133
|
+
name: "createPrompt",
|
|
134
|
+
type: "function",
|
|
135
|
+
required: true,
|
|
136
|
+
description: "Function: ({ run: ScoringInput & { extractStepResult } }) => string. Returns the LLM prompt.",
|
|
137
|
+
},
|
|
138
|
+
]}
|
|
139
|
+
/>
|
|
140
|
+
|
|
141
|
+
## Calculate Score Function
|
|
142
|
+
|
|
143
|
+
The `calculateScore` function converts the LLM's structured analysis into a numerical score. This function receives the results from previous steps but not the score itself (since that's what it calculates).
|
|
144
|
+
|
|
145
|
+
<PropertiesTable
|
|
146
|
+
content={[
|
|
147
|
+
{
|
|
148
|
+
name: "input",
|
|
149
|
+
type: "Record<string, any>[]",
|
|
150
|
+
required: true,
|
|
151
|
+
description:
|
|
152
|
+
"Input records provided to the scorer. If the scorer is added to an agent, this will be an array of user messages, e.g. `[{ role: 'user', content: 'hello world' }]`. If the scorer is used in a workflow, this will be the input of the workflow.",
|
|
153
|
+
},
|
|
154
|
+
{
|
|
155
|
+
name: "output",
|
|
156
|
+
type: "Record<string, any>",
|
|
157
|
+
required: true,
|
|
158
|
+
description:
|
|
159
|
+
"Output record provided to the scorer. For agents, this is usually the agent's response. For workflows, this is the workflow's output.",
|
|
160
|
+
},
|
|
161
|
+
{
|
|
162
|
+
name: "runtimeContext",
|
|
163
|
+
type: "object",
|
|
164
|
+
required: false,
|
|
165
|
+
description: "Runtime context from the agent or workflow step being evaluated (optional).",
|
|
166
|
+
},
|
|
167
|
+
{
|
|
168
|
+
name: "extractStepResult",
|
|
169
|
+
type: "object",
|
|
170
|
+
required: false,
|
|
171
|
+
description: "Result of the extract step, if defined (optional).",
|
|
172
|
+
},
|
|
173
|
+
{
|
|
174
|
+
name: "analyzeStepResult",
|
|
175
|
+
type: "object",
|
|
176
|
+
required: true,
|
|
177
|
+
description: "Structured result from the analyze step, conforming to the outputSchema defined in the analyze step.",
|
|
178
|
+
},
|
|
179
|
+
]}
|
|
180
|
+
/>
|
|
181
|
+
|
|
182
|
+
Returns: `number`
|
|
183
|
+
The function must return a numerical score, typically in the 0-1 range where 1 represents the best possible score.
|
|
184
|
+
|
|
185
|
+
## Reason Object
|
|
186
|
+
<PropertiesTable
|
|
187
|
+
content={[
|
|
188
|
+
{
|
|
189
|
+
name: "description",
|
|
190
|
+
type: "string",
|
|
191
|
+
required: true,
|
|
192
|
+
description: "Description of the reason step.",
|
|
193
|
+
},
|
|
194
|
+
{
|
|
195
|
+
name: "judge",
|
|
196
|
+
type: "object",
|
|
197
|
+
required: false,
|
|
198
|
+
description: "(Optional) LLM judge for this step (can override main judge/model). See Judge Object section.",
|
|
199
|
+
},
|
|
200
|
+
{
|
|
201
|
+
name: "createPrompt",
|
|
202
|
+
type: "function",
|
|
203
|
+
required: true,
|
|
204
|
+
description: "Function: ({ run }) => string. `run` includes input, output, extractStepResult, analyzeStepResult, and score. Returns the prompt for the LLM.",
|
|
205
|
+
},
|
|
206
|
+
]}
|
|
207
|
+
/>
|
|
208
|
+
|
|
209
|
+
LLM scorers may also include step-specific prompt fields in the return value, such as `extractPrompt`, `analyzePrompt`, and `reasonPrompt`.
|
|
210
|
+
|
|
@@ -0,0 +1,218 @@
|
|
|
1
|
+
---
|
|
2
|
+
title: "Reference: MastraScorer | Scorers | Mastra Docs"
|
|
3
|
+
description: Documentation for the MastraScorer base class in Mastra, which provides the foundation for all custom and built-in scorers.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# MastraScorer
|
|
7
|
+
|
|
8
|
+
The `MastraScorer` class is the base class for all scorers in Mastra. It provides a standard `.run()` method for evaluating input/output pairs and supports multi-step scoring workflows. Most users will use `createScorer` or `createLLMScorer`, but advanced users can subclass or instantiate `MastraScorer` directly for full control.
|
|
9
|
+
|
|
10
|
+
## Constructor Options
|
|
11
|
+
|
|
12
|
+
<PropertiesTable
|
|
13
|
+
content={[
|
|
14
|
+
{
|
|
15
|
+
name: "name",
|
|
16
|
+
type: "string",
|
|
17
|
+
required: true,
|
|
18
|
+
description: "Name of the scorer.",
|
|
19
|
+
},
|
|
20
|
+
{
|
|
21
|
+
name: "description",
|
|
22
|
+
type: "string",
|
|
23
|
+
required: true,
|
|
24
|
+
description: "Description of what the scorer does.",
|
|
25
|
+
},
|
|
26
|
+
{
|
|
27
|
+
name: "extract",
|
|
28
|
+
type: "function",
|
|
29
|
+
required: false,
|
|
30
|
+
description: "Optional extraction step. See extract step signature below.",
|
|
31
|
+
},
|
|
32
|
+
{
|
|
33
|
+
name: "analyze",
|
|
34
|
+
type: "function",
|
|
35
|
+
required: true,
|
|
36
|
+
description: "Main scoring logic. See analyze step signature below.",
|
|
37
|
+
},
|
|
38
|
+
{
|
|
39
|
+
name: "reason",
|
|
40
|
+
type: "function",
|
|
41
|
+
required: false,
|
|
42
|
+
description: "Optional reason/explanation step. See reason step signature below.",
|
|
43
|
+
},
|
|
44
|
+
{
|
|
45
|
+
name: "metadata",
|
|
46
|
+
type: "object",
|
|
47
|
+
required: false,
|
|
48
|
+
description: "Optional metadata for the scorer.",
|
|
49
|
+
},
|
|
50
|
+
{
|
|
51
|
+
name: "isLLMScorer",
|
|
52
|
+
type: "boolean",
|
|
53
|
+
required: false,
|
|
54
|
+
description: "(Internal) Used to distinguish LLM scorers.",
|
|
55
|
+
},
|
|
56
|
+
]}
|
|
57
|
+
/>
|
|
58
|
+
|
|
59
|
+
## Step Function Signatures
|
|
60
|
+
|
|
61
|
+
### extract
|
|
62
|
+
<PropertiesTable
|
|
63
|
+
content={[
|
|
64
|
+
{
|
|
65
|
+
name: "input",
|
|
66
|
+
type: "Record<string, any>[]",
|
|
67
|
+
required: false,
|
|
68
|
+
description: "Input records provided to the scorer. If the scorer is added to an agent, this will be an array of user messages, e.g. `[{ role: 'user', content: 'hello world' }]`. If the scorer is used in a workflow, this will be the input of the workflow.",
|
|
69
|
+
},
|
|
70
|
+
{
|
|
71
|
+
name: "output",
|
|
72
|
+
type: "Record<string, any>",
|
|
73
|
+
required: true,
|
|
74
|
+
description: "Output record provided to the scorer. For agents, this is usually the agent's response. For workflows, this is the workflow's output.",
|
|
75
|
+
},
|
|
76
|
+
]}
|
|
77
|
+
/>
|
|
78
|
+
Returns: `{ results: any }`
|
|
79
|
+
The method must return an object with a `results` property. The value of `results` will be passed to the analyze function as `extractStepResult`.
|
|
80
|
+
|
|
81
|
+
### analyze
|
|
82
|
+
<PropertiesTable
|
|
83
|
+
content={[
|
|
84
|
+
{
|
|
85
|
+
name: "input",
|
|
86
|
+
type: "Record<string, any>[]",
|
|
87
|
+
required: true,
|
|
88
|
+
description: "Input records provided to the scorer. If the scorer is added to an agent, this will be an array of user messages, e.g. `[{ role: 'user', content: 'hello world' }]`. If the scorer is used in a workflow, this will be the input of the workflow.",
|
|
89
|
+
},
|
|
90
|
+
{
|
|
91
|
+
name: "output",
|
|
92
|
+
type: "Record<string, any>",
|
|
93
|
+
required: true,
|
|
94
|
+
description: "Output record provided to the scorer. For agents, this is usually the agent's response. For workflows, this is the workflow's output.",
|
|
95
|
+
},
|
|
96
|
+
{
|
|
97
|
+
name: "extractStepResult",
|
|
98
|
+
type: "object",
|
|
99
|
+
required: false,
|
|
100
|
+
description: "Result of the extract step, if defined (optional).",
|
|
101
|
+
},
|
|
102
|
+
]}
|
|
103
|
+
/>
|
|
104
|
+
Returns: `{ score: number, results?: any }`
|
|
105
|
+
The method must return an object with a `score` property (required). Optionally, it may return a `results` property. The value of `results` will be passed to the reason function as `analyzeStepResult`.
|
|
106
|
+
|
|
107
|
+
### reason
|
|
108
|
+
<PropertiesTable
|
|
109
|
+
content={[
|
|
110
|
+
{
|
|
111
|
+
name: "input",
|
|
112
|
+
type: "Record<string, any>[]",
|
|
113
|
+
required: true,
|
|
114
|
+
description: "Input records provided to the scorer. If the scorer is added to an agent, this will be an array of user messages, e.g. `[{ role: 'user', content: 'hello world' }]`. If the scorer is used in a workflow, this will be the input of the workflow.",
|
|
115
|
+
},
|
|
116
|
+
{
|
|
117
|
+
name: "output",
|
|
118
|
+
type: "Record<string, any>",
|
|
119
|
+
required: true,
|
|
120
|
+
description: "Output record provided to the scorer. For agents, this is usually the agent's response. For workflows, this is the workflow's output.",
|
|
121
|
+
},
|
|
122
|
+
{
|
|
123
|
+
name: "score",
|
|
124
|
+
type: "number",
|
|
125
|
+
required: true,
|
|
126
|
+
description: "Score computed by the analyze step.",
|
|
127
|
+
},
|
|
128
|
+
{
|
|
129
|
+
name: "analyzeStepResult",
|
|
130
|
+
type: "object",
|
|
131
|
+
required: true,
|
|
132
|
+
description: "Result of the analyze step.",
|
|
133
|
+
},
|
|
134
|
+
{
|
|
135
|
+
name: "extractStepResult",
|
|
136
|
+
type: "object",
|
|
137
|
+
required: false,
|
|
138
|
+
description: "Result of the extract step, if defined (optional).",
|
|
139
|
+
},
|
|
140
|
+
]}
|
|
141
|
+
/>
|
|
142
|
+
Returns: `{ reason: string }`
|
|
143
|
+
The method must return an object with a `reason` property, which should be a string explaining the score.
|
|
144
|
+
|
|
145
|
+
All step functions can be async.
|
|
146
|
+
|
|
147
|
+
## .run() Input
|
|
148
|
+
|
|
149
|
+
<PropertiesTable
|
|
150
|
+
content={[
|
|
151
|
+
{
|
|
152
|
+
name: "runId",
|
|
153
|
+
type: "string",
|
|
154
|
+
required: false,
|
|
155
|
+
description: "The id of the run (optional).",
|
|
156
|
+
},
|
|
157
|
+
{
|
|
158
|
+
name: "input",
|
|
159
|
+
type: "Record<string, any>[]",
|
|
160
|
+
required: true,
|
|
161
|
+
description: "An array of records. This should contain user messages or the data to be evaluated.",
|
|
162
|
+
},
|
|
163
|
+
{
|
|
164
|
+
name: "output",
|
|
165
|
+
type: "Record<string, any>",
|
|
166
|
+
required: true,
|
|
167
|
+
description: "A record. This should contain the output to be evaluated.",
|
|
168
|
+
},
|
|
169
|
+
{
|
|
170
|
+
name: "additionalContext",
|
|
171
|
+
type: "Record<string, any>",
|
|
172
|
+
required: false,
|
|
173
|
+
description: "Additional context for the run (optional).",
|
|
174
|
+
},
|
|
175
|
+
{
|
|
176
|
+
name: "runtimeContext",
|
|
177
|
+
type: "Record<string, any>",
|
|
178
|
+
required: false,
|
|
179
|
+
description: "Runtime context for the run (optional).",
|
|
180
|
+
},
|
|
181
|
+
]}
|
|
182
|
+
/>
|
|
183
|
+
|
|
184
|
+
## .run() Returns
|
|
185
|
+
|
|
186
|
+
<PropertiesTable
|
|
187
|
+
content={[
|
|
188
|
+
{
|
|
189
|
+
name: "runId",
|
|
190
|
+
type: "string",
|
|
191
|
+
description: "The id of the run (optional).",
|
|
192
|
+
},
|
|
193
|
+
{
|
|
194
|
+
name: "extractStepResult",
|
|
195
|
+
type: "object",
|
|
196
|
+
description: "Result of the extract step, if defined (optional).",
|
|
197
|
+
},
|
|
198
|
+
{
|
|
199
|
+
name: "analyzeStepResult",
|
|
200
|
+
type: "object",
|
|
201
|
+
description: "Result of the analyze step (custom structure defined by your scorer).",
|
|
202
|
+
},
|
|
203
|
+
{
|
|
204
|
+
name: "score",
|
|
205
|
+
type: "number",
|
|
206
|
+
description: "Score computed by your analyze function.",
|
|
207
|
+
},
|
|
208
|
+
{
|
|
209
|
+
name: "reason",
|
|
210
|
+
type: "string",
|
|
211
|
+
description: "Reason/explanation for the score, if defined (optional).",
|
|
212
|
+
},
|
|
213
|
+
]}
|
|
214
|
+
/>
|
|
215
|
+
|
|
216
|
+
## Integration
|
|
217
|
+
|
|
218
|
+
MastraScorer instances can be used for agents and workflow steps
|