@mastra/mcp-docs-server 0.13.7-alpha.0 → 0.13.7-alpha.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.docs/organized/changelogs/%40mastra%2Fclient-js.md +11 -11
- package/.docs/organized/changelogs/%40mastra%2Fcore.md +9 -9
- package/.docs/organized/changelogs/%40mastra%2Fdeployer.md +12 -12
- package/.docs/organized/changelogs/%40mastra%2Fevals.md +11 -11
- package/.docs/organized/changelogs/%40mastra%2Fmcp-docs-server.md +10 -10
- package/.docs/organized/changelogs/%40mastra%2Fmemory.md +11 -11
- package/.docs/organized/changelogs/%40mastra%2Fplayground-ui.md +12 -12
- package/.docs/organized/changelogs/%40mastra%2Fserver.md +10 -10
- package/.docs/organized/changelogs/create-mastra.md +7 -7
- package/.docs/organized/changelogs/mastra.md +12 -12
- package/.docs/raw/community/contributing-templates.mdx +192 -0
- package/.docs/raw/getting-started/installation.mdx +16 -0
- package/.docs/raw/getting-started/templates.mdx +95 -0
- package/.docs/raw/reference/agents/generate.mdx +18 -1
- package/.docs/raw/reference/agents/stream.mdx +18 -1
- package/.docs/raw/reference/core/mastra-class.mdx +1 -1
- package/.docs/raw/reference/scorers/answer-relevancy.mdx +115 -0
- package/.docs/raw/reference/scorers/bias.mdx +127 -0
- package/.docs/raw/reference/scorers/completeness.mdx +89 -0
- package/.docs/raw/reference/scorers/content-similarity.mdx +96 -0
- package/.docs/raw/reference/scorers/custom-code-scorer.mdx +155 -0
- package/.docs/raw/reference/scorers/faithfulness.mdx +123 -0
- package/.docs/raw/reference/scorers/hallucination.mdx +135 -0
- package/.docs/raw/reference/scorers/keyword-coverage.mdx +92 -0
- package/.docs/raw/reference/scorers/llm-scorer.mdx +166 -0
- package/.docs/raw/reference/scorers/mastra-scorer.mdx +218 -0
- package/.docs/raw/reference/scorers/textual-difference.mdx +76 -0
- package/.docs/raw/reference/scorers/tone-consistency.mdx +75 -0
- package/.docs/raw/reference/scorers/toxicity.mdx +109 -0
- package/.docs/raw/reference/templates.mdx +222 -0
- package/package.json +5 -5
|
@@ -16,16 +16,27 @@ The `messages` parameter can be:
|
|
|
16
16
|
- A single string
|
|
17
17
|
- An array of strings
|
|
18
18
|
- An array of message objects with `role` and `content` properties
|
|
19
|
+
- An array of `UIMessageWithMetadata` objects (for messages with metadata)
|
|
19
20
|
|
|
20
|
-
The message object
|
|
21
|
+
The message object structures:
|
|
21
22
|
|
|
22
23
|
```typescript
|
|
23
24
|
interface Message {
|
|
24
25
|
role: "system" | "user" | "assistant";
|
|
25
26
|
content: string;
|
|
26
27
|
}
|
|
28
|
+
|
|
29
|
+
// For messages with metadata
|
|
30
|
+
interface UIMessageWithMetadata {
|
|
31
|
+
role: "user" | "assistant";
|
|
32
|
+
content: string;
|
|
33
|
+
parts: Array<{ type: string; text?: string; [key: string]: any }>;
|
|
34
|
+
metadata?: Record<string, unknown>; // Optional metadata field
|
|
35
|
+
}
|
|
27
36
|
```
|
|
28
37
|
|
|
38
|
+
When using `UIMessageWithMetadata`, the metadata will be preserved throughout the conversation and stored with the messages in memory.
|
|
39
|
+
|
|
29
40
|
### `options` (Optional)
|
|
30
41
|
|
|
31
42
|
An optional object that can include configuration for output structure, memory management, tool usage, telemetry, and more.
|
|
@@ -181,6 +192,12 @@ An optional object that can include configuration for output structure, memory m
|
|
|
181
192
|
description:
|
|
182
193
|
"Tools that are executed on the 'client' side of the request. These tools do not have execute functions in the definition.",
|
|
183
194
|
},
|
|
195
|
+
{
|
|
196
|
+
name: "savePerStep",
|
|
197
|
+
type: "boolean",
|
|
198
|
+
isOptional: true,
|
|
199
|
+
description: "Save messages incrementally after each generation step completes (default: false)",
|
|
200
|
+
}
|
|
184
201
|
]}
|
|
185
202
|
/>
|
|
186
203
|
|
|
@@ -126,7 +126,7 @@ The constructor accepts an optional `Config` object to customize its behavior an
|
|
|
126
126
|
"Server configuration including port, host, timeout, API routes, middleware, CORS settings, and build options for Swagger UI, API request logging, and OpenAPI docs.",
|
|
127
127
|
isOptional: true,
|
|
128
128
|
defaultValue:
|
|
129
|
-
"{ port: 4111, host: localhost, cors: { origin: '*', allowMethods: ['GET', 'POST', 'PUT', 'DELETE', 'OPTIONS'], allowHeaders: ['Content-Type', 'Authorization', 'x-mastra-client-type'], exposeHeaders: ['Content-Length', 'X-Requested-With'], credentials: false } }",
|
|
129
|
+
"{ port: 4111, host: localhost, cors: { origin: '*', allowMethods: ['GET', 'POST', 'PUT', 'PATCH', 'DELETE', 'OPTIONS'], allowHeaders: ['Content-Type', 'Authorization', 'x-mastra-client-type'], exposeHeaders: ['Content-Length', 'X-Requested-With'], credentials: false } }",
|
|
130
130
|
},
|
|
131
131
|
{
|
|
132
132
|
name: "mcpServers",
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
---
|
|
2
|
+
title: "Reference: Answer Relevancy | Scorers | Mastra Docs"
|
|
3
|
+
description: Documentation for the Answer Relevancy Scorer in Mastra, which evaluates how well LLM outputs address the input query.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# Answer Relevancy Scorer
|
|
7
|
+
|
|
8
|
+
The `createAnswerRelevancyScorer()` function accepts a single options object with the following properties:
|
|
9
|
+
|
|
10
|
+
For usage example, see the [Answer Relevancy Examples](/examples/scorers/answer-relevancy).
|
|
11
|
+
|
|
12
|
+
## Parameters
|
|
13
|
+
|
|
14
|
+
<PropertiesTable
|
|
15
|
+
content={[
|
|
16
|
+
{
|
|
17
|
+
name: "model",
|
|
18
|
+
type: "LanguageModel",
|
|
19
|
+
required: true,
|
|
20
|
+
description: "Configuration for the model used to evaluate relevancy.",
|
|
21
|
+
},
|
|
22
|
+
{
|
|
23
|
+
name: "uncertaintyWeight",
|
|
24
|
+
type: "number",
|
|
25
|
+
required: false,
|
|
26
|
+
defaultValue: "0.3",
|
|
27
|
+
description: "Weight given to 'unsure' verdicts in scoring (0-1).",
|
|
28
|
+
},
|
|
29
|
+
{
|
|
30
|
+
name: "scale",
|
|
31
|
+
type: "number",
|
|
32
|
+
required: false,
|
|
33
|
+
defaultValue: "1",
|
|
34
|
+
description: "Maximum score value.",
|
|
35
|
+
},
|
|
36
|
+
]}
|
|
37
|
+
/>
|
|
38
|
+
|
|
39
|
+
This function returns an instance of the MastraScorer class. The `.run()` method accepts the same input as other scorers (see the [MastraScorer reference](./mastra-scorer)), but the return value includes LLM-specific fields as documented below.
|
|
40
|
+
|
|
41
|
+
## .run() Returns
|
|
42
|
+
|
|
43
|
+
<PropertiesTable
|
|
44
|
+
content={[
|
|
45
|
+
{
|
|
46
|
+
name: "runId",
|
|
47
|
+
type: "string",
|
|
48
|
+
description: "The id of the run (optional).",
|
|
49
|
+
},
|
|
50
|
+
{
|
|
51
|
+
name: "score",
|
|
52
|
+
type: "number",
|
|
53
|
+
description: "Relevancy score (0 to scale, default 0-1)",
|
|
54
|
+
},
|
|
55
|
+
{
|
|
56
|
+
name: "extractPrompt",
|
|
57
|
+
type: "string",
|
|
58
|
+
description: "The prompt sent to the LLM for the extract step (optional).",
|
|
59
|
+
},
|
|
60
|
+
{
|
|
61
|
+
name: "extractStepResult",
|
|
62
|
+
type: "object",
|
|
63
|
+
description: "Object with extracted statements: { statements: string[] }",
|
|
64
|
+
},
|
|
65
|
+
{
|
|
66
|
+
name: "analyzePrompt",
|
|
67
|
+
type: "string",
|
|
68
|
+
description: "The prompt sent to the LLM for the analyze step (optional).",
|
|
69
|
+
},
|
|
70
|
+
{
|
|
71
|
+
name: "analyzeStepResult",
|
|
72
|
+
type: "object",
|
|
73
|
+
description: "Object with results: { results: Array<{ result: 'yes' | 'unsure' | 'no', reason: string }> }",
|
|
74
|
+
},
|
|
75
|
+
{
|
|
76
|
+
name: "reasonPrompt",
|
|
77
|
+
type: "string",
|
|
78
|
+
description: "The prompt sent to the LLM for the reason step (optional).",
|
|
79
|
+
},
|
|
80
|
+
{
|
|
81
|
+
name: "reason",
|
|
82
|
+
type: "string",
|
|
83
|
+
description: "Explanation of the score.",
|
|
84
|
+
},
|
|
85
|
+
]}
|
|
86
|
+
/>
|
|
87
|
+
|
|
88
|
+
## Scoring Details
|
|
89
|
+
|
|
90
|
+
The scorer evaluates relevancy through query-answer alignment, considering completeness and detail level, but not factual correctness.
|
|
91
|
+
|
|
92
|
+
### Scoring Process
|
|
93
|
+
|
|
94
|
+
1. **Statement Extraction:**
|
|
95
|
+
- Breaks output into meaningful statements while preserving context.
|
|
96
|
+
2. **Relevance Analysis:**
|
|
97
|
+
- Each statement is evaluated as:
|
|
98
|
+
- "yes": Full weight for direct matches
|
|
99
|
+
- "unsure": Partial weight (default: 0.3) for approximate matches
|
|
100
|
+
- "no": Zero weight for irrelevant content
|
|
101
|
+
3. **Score Calculation:**
|
|
102
|
+
- `((direct + uncertainty * partial) / total_statements) * scale`
|
|
103
|
+
|
|
104
|
+
### Score Interpretation
|
|
105
|
+
|
|
106
|
+
- 1.0: Perfect relevance - complete and accurate
|
|
107
|
+
- 0.7-0.9: High relevance - minor gaps or imprecisions
|
|
108
|
+
- 0.4-0.6: Moderate relevance - significant gaps
|
|
109
|
+
- 0.1-0.3: Low relevance - major issues
|
|
110
|
+
- 0.0: No relevance - incorrect or off-topic
|
|
111
|
+
|
|
112
|
+
## Related
|
|
113
|
+
|
|
114
|
+
- [Prompt Alignment Scorer](./prompt-alignment)
|
|
115
|
+
- [Faithfulness Scorer](./faithfulness)
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
---
|
|
2
|
+
title: "Reference: Bias | Scorers | Mastra Docs"
|
|
3
|
+
description: Documentation for the Bias Scorer in Mastra, which evaluates LLM outputs for various forms of bias, including gender, political, racial/ethnic, or geographical bias.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# Bias Scorer
|
|
7
|
+
The `createBiasScorer()` function accepts a single options object with the following properties:
|
|
8
|
+
|
|
9
|
+
For a usage example, see the [Bias Examples](/examples/scorers/bias).
|
|
10
|
+
|
|
11
|
+
## Parameters
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
<PropertiesTable
|
|
15
|
+
content={[
|
|
16
|
+
{
|
|
17
|
+
name: "model",
|
|
18
|
+
type: "LanguageModel",
|
|
19
|
+
required: true,
|
|
20
|
+
description: "Configuration for the model used to evaluate bias.",
|
|
21
|
+
},
|
|
22
|
+
{
|
|
23
|
+
name: "scale",
|
|
24
|
+
type: "number",
|
|
25
|
+
required: false,
|
|
26
|
+
defaultValue: "1",
|
|
27
|
+
description: "Maximum score value.",
|
|
28
|
+
},
|
|
29
|
+
]}
|
|
30
|
+
/>
|
|
31
|
+
|
|
32
|
+
This function returns an instance of the MastraScorer class. The `.run()` method accepts the same input as other scorers (see the [MastraScorer reference](./mastra-scorer)), but the return value includes LLM-specific fields as documented below.
|
|
33
|
+
|
|
34
|
+
## .run() Returns
|
|
35
|
+
|
|
36
|
+
<PropertiesTable
|
|
37
|
+
content={[
|
|
38
|
+
{
|
|
39
|
+
name: "runId",
|
|
40
|
+
type: "string",
|
|
41
|
+
description: "The id of the run (optional).",
|
|
42
|
+
},
|
|
43
|
+
{
|
|
44
|
+
name: "extractStepResult",
|
|
45
|
+
type: "object",
|
|
46
|
+
description: "Object with extracted opinions: { opinions: string[] }",
|
|
47
|
+
},
|
|
48
|
+
{
|
|
49
|
+
name: "extractPrompt",
|
|
50
|
+
type: "string",
|
|
51
|
+
description: "The prompt sent to the LLM for the extract step (optional).",
|
|
52
|
+
},
|
|
53
|
+
{
|
|
54
|
+
name: "analyzeStepResult",
|
|
55
|
+
type: "object",
|
|
56
|
+
description: "Object with results: { results: Array<{ result: 'yes' | 'no', reason: string }> }",
|
|
57
|
+
},
|
|
58
|
+
{
|
|
59
|
+
name: "analyzePrompt",
|
|
60
|
+
type: "string",
|
|
61
|
+
description: "The prompt sent to the LLM for the analyze step (optional).",
|
|
62
|
+
},
|
|
63
|
+
{
|
|
64
|
+
name: "score",
|
|
65
|
+
type: "number",
|
|
66
|
+
description: "Bias score (0 to scale, default 0-1). Higher scores indicate more bias.",
|
|
67
|
+
},
|
|
68
|
+
{
|
|
69
|
+
name: "reason",
|
|
70
|
+
type: "string",
|
|
71
|
+
description: "Explanation of the score.",
|
|
72
|
+
},
|
|
73
|
+
{
|
|
74
|
+
name: "reasonPrompt",
|
|
75
|
+
type: "string",
|
|
76
|
+
description: "The prompt sent to the LLM for the reason step (optional).",
|
|
77
|
+
},
|
|
78
|
+
]}
|
|
79
|
+
/>
|
|
80
|
+
|
|
81
|
+
## Bias Categories
|
|
82
|
+
|
|
83
|
+
The scorer evaluates several types of bias:
|
|
84
|
+
|
|
85
|
+
1. **Gender Bias**: Discrimination or stereotypes based on gender
|
|
86
|
+
2. **Political Bias**: Prejudice against political ideologies or beliefs
|
|
87
|
+
3. **Racial/Ethnic Bias**: Discrimination based on race, ethnicity, or national origin
|
|
88
|
+
4. **Geographical Bias**: Prejudice based on location or regional stereotypes
|
|
89
|
+
|
|
90
|
+
## Scoring Details
|
|
91
|
+
|
|
92
|
+
The scorer evaluates bias through opinion analysis based on:
|
|
93
|
+
|
|
94
|
+
- Opinion identification and extraction
|
|
95
|
+
- Presence of discriminatory language
|
|
96
|
+
- Use of stereotypes or generalizations
|
|
97
|
+
- Balance in perspective presentation
|
|
98
|
+
- Loaded or prejudicial terminology
|
|
99
|
+
|
|
100
|
+
### Scoring Process
|
|
101
|
+
|
|
102
|
+
1. Extracts opinions from text:
|
|
103
|
+
- Identifies subjective statements
|
|
104
|
+
- Excludes factual claims
|
|
105
|
+
- Includes cited opinions
|
|
106
|
+
2. Evaluates each opinion:
|
|
107
|
+
- Checks for discriminatory language
|
|
108
|
+
- Assesses stereotypes and generalizations
|
|
109
|
+
- Analyzes perspective balance
|
|
110
|
+
|
|
111
|
+
Final score: `(biased_opinions / total_opinions) * scale`
|
|
112
|
+
|
|
113
|
+
### Score interpretation
|
|
114
|
+
|
|
115
|
+
(0 to scale, default 0-1)
|
|
116
|
+
|
|
117
|
+
- 1.0: Complete bias - all opinions contain bias
|
|
118
|
+
- 0.7-0.9: Significant bias - majority of opinions show bias
|
|
119
|
+
- 0.4-0.6: Moderate bias - mix of biased and neutral opinions
|
|
120
|
+
- 0.1-0.3: Minimal bias - most opinions show balanced perspective
|
|
121
|
+
- 0.0: No detectable bias - opinions are balanced and neutral
|
|
122
|
+
|
|
123
|
+
## Related
|
|
124
|
+
|
|
125
|
+
- [Toxicity Scorer](./toxicity)
|
|
126
|
+
- [Faithfulness Scorer](./faithfulness)
|
|
127
|
+
- [Hallucination Scorer](./hallucination)
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
---
|
|
2
|
+
title: "Reference: Completeness | Scorers | Mastra Docs"
|
|
3
|
+
description: Documentation for the Completeness Scorer in Mastra, which evaluates how thoroughly LLM outputs cover key elements present in the input.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# Completeness Scorer
|
|
7
|
+
|
|
8
|
+
The `createCompletenessScorer()` function evaluates how thoroughly an LLM's output covers the key elements present in the input. It analyzes nouns, verbs, topics, and terms to determine coverage and provides a detailed completeness score.
|
|
9
|
+
|
|
10
|
+
For a usage example, see the [Completeness Examples](/examples/scorers/completeness).
|
|
11
|
+
|
|
12
|
+
## Parameters
|
|
13
|
+
|
|
14
|
+
The `createCompletenessScorer()` function does not take any options.
|
|
15
|
+
|
|
16
|
+
This function returns an instance of the MastraScorer class. See the [MastraScorer reference](./mastra-scorer) for details on the `.run()` method and its input/output.
|
|
17
|
+
|
|
18
|
+
## .run() Returns
|
|
19
|
+
|
|
20
|
+
<PropertiesTable
|
|
21
|
+
content={[
|
|
22
|
+
{
|
|
23
|
+
name: "runId",
|
|
24
|
+
type: "string",
|
|
25
|
+
description: "The id of the run (optional).",
|
|
26
|
+
},
|
|
27
|
+
{
|
|
28
|
+
name: "extractStepResult",
|
|
29
|
+
type: "object",
|
|
30
|
+
description: "Object with extracted elements and coverage details: { inputElements: string[], outputElements: string[], missingElements: string[], elementCounts: { input: number, output: number } }",
|
|
31
|
+
},
|
|
32
|
+
{
|
|
33
|
+
name: "score",
|
|
34
|
+
type: "number",
|
|
35
|
+
description: "Completeness score (0-1) representing the proportion of input elements covered in the output.",
|
|
36
|
+
},
|
|
37
|
+
]}
|
|
38
|
+
/>
|
|
39
|
+
|
|
40
|
+
## Element Extraction Details
|
|
41
|
+
|
|
42
|
+
The scorer extracts and analyzes several types of elements:
|
|
43
|
+
|
|
44
|
+
- Nouns: Key objects, concepts, and entities
|
|
45
|
+
- Verbs: Actions and states (converted to infinitive form)
|
|
46
|
+
- Topics: Main subjects and themes
|
|
47
|
+
- Terms: Individual significant words
|
|
48
|
+
|
|
49
|
+
The extraction process includes:
|
|
50
|
+
|
|
51
|
+
- Normalization of text (removing diacritics, converting to lowercase)
|
|
52
|
+
- Splitting camelCase words
|
|
53
|
+
- Handling of word boundaries
|
|
54
|
+
- Special handling of short words (3 characters or less)
|
|
55
|
+
- Deduplication of elements
|
|
56
|
+
|
|
57
|
+
## Scoring Details
|
|
58
|
+
|
|
59
|
+
The scorer evaluates completeness through linguistic element coverage analysis.
|
|
60
|
+
|
|
61
|
+
### Scoring Process
|
|
62
|
+
|
|
63
|
+
1. Extracts key elements:
|
|
64
|
+
- Nouns and named entities
|
|
65
|
+
- Action verbs
|
|
66
|
+
- Topic-specific terms
|
|
67
|
+
- Normalized word forms
|
|
68
|
+
2. Calculates coverage of input elements:
|
|
69
|
+
- Exact matches for short terms (≤3 chars)
|
|
70
|
+
- Substantial overlap (>60%) for longer terms
|
|
71
|
+
|
|
72
|
+
Final score: `(covered_elements / total_input_elements) * scale`
|
|
73
|
+
|
|
74
|
+
### Score interpretation
|
|
75
|
+
|
|
76
|
+
(0 to scale, default 0-1)
|
|
77
|
+
|
|
78
|
+
- 1.0: Complete coverage - contains all input elements
|
|
79
|
+
- 0.7-0.9: High coverage - includes most key elements
|
|
80
|
+
- 0.4-0.6: Partial coverage - contains some key elements
|
|
81
|
+
- 0.1-0.3: Low coverage - missing most key elements
|
|
82
|
+
- 0.0: No coverage - output lacks all input elements
|
|
83
|
+
|
|
84
|
+
## Related
|
|
85
|
+
|
|
86
|
+
- [Answer Relevancy Scorer](./answer-relevancy)
|
|
87
|
+
- [Content Similarity Scorer](./content-similarity)
|
|
88
|
+
- [Textual Difference Scorer](./textual-difference)
|
|
89
|
+
- [Keyword Coverage Scorer](./keyword-coverage)
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
---
|
|
2
|
+
title: "Reference: Content Similarity | Scorers | Mastra Docs"
|
|
3
|
+
description: Documentation for the Content Similarity Scorer in Mastra, which measures textual similarity between strings and provides a matching score.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# Content Similarity Scorer
|
|
7
|
+
|
|
8
|
+
The `createContentSimilarityScorer()` function measures the textual similarity between two strings, providing a score that indicates how closely they match. It supports configurable options for case sensitivity and whitespace handling.
|
|
9
|
+
|
|
10
|
+
For a usage example, see the [Content Similarity Examples](/examples/scorers/content-similarity).
|
|
11
|
+
|
|
12
|
+
## Parameters
|
|
13
|
+
|
|
14
|
+
The `createContentSimilarityScorer()` function accepts a single options object with the following properties:
|
|
15
|
+
|
|
16
|
+
<PropertiesTable
|
|
17
|
+
content={[
|
|
18
|
+
{
|
|
19
|
+
name: "ignoreCase",
|
|
20
|
+
type: "boolean",
|
|
21
|
+
required: false,
|
|
22
|
+
defaultValue: "true",
|
|
23
|
+
description: "Whether to ignore case differences when comparing strings.",
|
|
24
|
+
},
|
|
25
|
+
{
|
|
26
|
+
name: "ignoreWhitespace",
|
|
27
|
+
type: "boolean",
|
|
28
|
+
required: false,
|
|
29
|
+
defaultValue: "true",
|
|
30
|
+
description: "Whether to normalize whitespace when comparing strings.",
|
|
31
|
+
},
|
|
32
|
+
]}
|
|
33
|
+
/>
|
|
34
|
+
|
|
35
|
+
This function returns an instance of the MastraScorer class. See the [MastraScorer reference](./mastra-scorer) for details on the `.run()` method and its input/output.
|
|
36
|
+
|
|
37
|
+
## .run() Returns
|
|
38
|
+
|
|
39
|
+
<PropertiesTable
|
|
40
|
+
content={[
|
|
41
|
+
{
|
|
42
|
+
name: "runId",
|
|
43
|
+
type: "string",
|
|
44
|
+
description: "The id of the run (optional).",
|
|
45
|
+
},
|
|
46
|
+
{
|
|
47
|
+
name: "extractStepResult",
|
|
48
|
+
type: "object",
|
|
49
|
+
description: "Object with processed input and output: { processedInput: string, processedOutput: string }",
|
|
50
|
+
},
|
|
51
|
+
{
|
|
52
|
+
name: "analyzeStepResult",
|
|
53
|
+
type: "object",
|
|
54
|
+
description: "Object with similarity: { similarity: number }",
|
|
55
|
+
},
|
|
56
|
+
{
|
|
57
|
+
name: "score",
|
|
58
|
+
type: "number",
|
|
59
|
+
description: "Similarity score (0-1) where 1 indicates perfect similarity.",
|
|
60
|
+
},
|
|
61
|
+
]}
|
|
62
|
+
/>
|
|
63
|
+
|
|
64
|
+
## Scoring Details
|
|
65
|
+
|
|
66
|
+
The scorer evaluates textual similarity through character-level matching and configurable text normalization.
|
|
67
|
+
|
|
68
|
+
### Scoring Process
|
|
69
|
+
|
|
70
|
+
1. Normalizes text:
|
|
71
|
+
- Case normalization (if ignoreCase: true)
|
|
72
|
+
- Whitespace normalization (if ignoreWhitespace: true)
|
|
73
|
+
2. Compares processed strings using string-similarity algorithm:
|
|
74
|
+
- Analyzes character sequences
|
|
75
|
+
- Aligns word boundaries
|
|
76
|
+
- Considers relative positions
|
|
77
|
+
- Accounts for length differences
|
|
78
|
+
|
|
79
|
+
Final score: `similarity_value * scale`
|
|
80
|
+
|
|
81
|
+
### Score interpretation
|
|
82
|
+
|
|
83
|
+
(0 to scale, default 0-1)
|
|
84
|
+
|
|
85
|
+
- 1.0: Perfect match - identical texts
|
|
86
|
+
- 0.7-0.9: High similarity - mostly matching content
|
|
87
|
+
- 0.4-0.6: Moderate similarity - partial matches
|
|
88
|
+
- 0.1-0.3: Low similarity - few matching patterns
|
|
89
|
+
- 0.0: No similarity - completely different texts
|
|
90
|
+
|
|
91
|
+
## Related
|
|
92
|
+
|
|
93
|
+
- [Completeness Scorer](./completeness)
|
|
94
|
+
- [Textual Difference Scorer](./textual-difference)
|
|
95
|
+
- [Answer Relevancy Scorer](./answer-relevancy)
|
|
96
|
+
- [Keyword Coverage Scorer](./keyword-coverage)
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
---
|
|
2
|
+
title: "Reference: Create Custom Scorer | Scorers | Mastra Docs"
|
|
3
|
+
description: Documentation for creating custom code scorers in Mastra, allowing users to define their own evaluation logic.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# createScorer
|
|
7
|
+
|
|
8
|
+
Mastra allows you to define your own custom code scorers for evaluating input/output pairs using any logic you choose. Custom scorers integrate seamlessly with the Mastra scoring framework and can be used anywhere built-in scorers are used.
|
|
9
|
+
|
|
10
|
+
For a usage example, see the [Custom Code Scorer Examples](/examples/scorers/custom-native-javascript-eval).
|
|
11
|
+
|
|
12
|
+
## How to Create a Custom Scorer
|
|
13
|
+
|
|
14
|
+
Use the `createScorer` factory to define your scorer. You must provide at least a `name`, `description`, and an `analyze` function. Optionally, you can provide `extract` and `reason` functions for multi-step or more advanced logic.
|
|
15
|
+
|
|
16
|
+
## createScorer Options
|
|
17
|
+
|
|
18
|
+
<PropertiesTable
|
|
19
|
+
content={[
|
|
20
|
+
{
|
|
21
|
+
name: "name",
|
|
22
|
+
type: "string",
|
|
23
|
+
required: true,
|
|
24
|
+
description: "Name of the scorer.",
|
|
25
|
+
},
|
|
26
|
+
{
|
|
27
|
+
name: "description",
|
|
28
|
+
type: "string",
|
|
29
|
+
required: true,
|
|
30
|
+
description: "Description of what the scorer does.",
|
|
31
|
+
},
|
|
32
|
+
{
|
|
33
|
+
name: "analyze",
|
|
34
|
+
type: "function",
|
|
35
|
+
required: true,
|
|
36
|
+
description: "Main scoring logic",
|
|
37
|
+
},
|
|
38
|
+
{
|
|
39
|
+
name: "extract",
|
|
40
|
+
type: "function",
|
|
41
|
+
required: false,
|
|
42
|
+
description: "Optional pre-processing step.",
|
|
43
|
+
},
|
|
44
|
+
{
|
|
45
|
+
name: "reason",
|
|
46
|
+
type: "function",
|
|
47
|
+
required: false,
|
|
48
|
+
description: "Optional reason/explanation step.",
|
|
49
|
+
},
|
|
50
|
+
{
|
|
51
|
+
name: "metadata",
|
|
52
|
+
type: "object",
|
|
53
|
+
required: false,
|
|
54
|
+
description: "Optional metadata for the scorer.",
|
|
55
|
+
},
|
|
56
|
+
]}
|
|
57
|
+
/>
|
|
58
|
+
|
|
59
|
+
This function returns an instance of the MastraScorer class. See the [MastraScorer reference](./mastra-scorer) for details on the `.run()` method and its input/output.
|
|
60
|
+
|
|
61
|
+
## Step Function Signatures
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
### extract
|
|
65
|
+
<PropertiesTable
|
|
66
|
+
content={[
|
|
67
|
+
{
|
|
68
|
+
name: "input",
|
|
69
|
+
type: "Record<string, any>[]",
|
|
70
|
+
required: false,
|
|
71
|
+
description:
|
|
72
|
+
"Input records provided to the scorer. If the scorer is added to an agent, this will be an array of user messages, e.g. `[{ role: 'user', content: 'hello world' }]`. If the scorer is used in a workflow, this will be the input of the workflow.",
|
|
73
|
+
},
|
|
74
|
+
{
|
|
75
|
+
name: "output",
|
|
76
|
+
type: "Record<string, any>",
|
|
77
|
+
required: true,
|
|
78
|
+
description:
|
|
79
|
+
"Output record provided to the scorer. For agents, this is usually the agent's response. For workflows, this is the workflow's output.",
|
|
80
|
+
},
|
|
81
|
+
]}
|
|
82
|
+
/>
|
|
83
|
+
Returns: `{ results: any }`
|
|
84
|
+
The method must return an object with a `results` property. The value of `results` will be passed to the analyze function as `extractStepResult`.
|
|
85
|
+
|
|
86
|
+
### analyze
|
|
87
|
+
<PropertiesTable
|
|
88
|
+
content={[
|
|
89
|
+
{
|
|
90
|
+
name: "input",
|
|
91
|
+
type: "Record<string, any>[]",
|
|
92
|
+
required: true,
|
|
93
|
+
description:
|
|
94
|
+
"Input records provided to the scorer. If the scorer is added to an agent, this will be an array of user messages, e.g. `[{ role: 'user', content: 'hello world' }]`. If the scorer is used in a workflow, this will be the input of the workflow.",
|
|
95
|
+
},
|
|
96
|
+
{
|
|
97
|
+
name: "output",
|
|
98
|
+
type: "Record<string, any>",
|
|
99
|
+
required: true,
|
|
100
|
+
description:
|
|
101
|
+
"Output record provided to the scorer. For agents, this is usually the agent's response. For workflows, this is the workflow's output.",
|
|
102
|
+
},
|
|
103
|
+
{
|
|
104
|
+
name: "extractStepResult",
|
|
105
|
+
type: "object",
|
|
106
|
+
required: false,
|
|
107
|
+
description: "Result of the extract step, if defined (optional).",
|
|
108
|
+
},
|
|
109
|
+
]}
|
|
110
|
+
/>
|
|
111
|
+
Returns: `{ score: number, results?: any }`
|
|
112
|
+
The method must return an object with a `score` property (required). Optionally, it may return a `results` property. The value of `results` will be passed to the reason function as `analyzeStepResult`.
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
### reason
|
|
116
|
+
<PropertiesTable
|
|
117
|
+
content={[
|
|
118
|
+
{
|
|
119
|
+
name: "input",
|
|
120
|
+
type: "Record<string, any>[]",
|
|
121
|
+
required: true,
|
|
122
|
+
description:
|
|
123
|
+
"Input records provided to the scorer. If the scorer is added to an agent, this will be an array of user messages, e.g. `[{ role: 'user', content: 'hello world' }]`. If the scorer is used in a workflow, this will be the input of the workflow.",
|
|
124
|
+
},
|
|
125
|
+
{
|
|
126
|
+
name: "output",
|
|
127
|
+
type: "Record<string, any>",
|
|
128
|
+
required: true,
|
|
129
|
+
description:
|
|
130
|
+
"Output record provided to the scorer. For agents, this is usually the agent's response. For workflows, this is the workflow's output.",
|
|
131
|
+
},
|
|
132
|
+
{
|
|
133
|
+
name: "score",
|
|
134
|
+
type: "number",
|
|
135
|
+
required: true,
|
|
136
|
+
description: "Score computed by the analyze step.",
|
|
137
|
+
},
|
|
138
|
+
{
|
|
139
|
+
name: "analyzeStepResult",
|
|
140
|
+
type: "object",
|
|
141
|
+
required: true,
|
|
142
|
+
description: "Result of the analyze step.",
|
|
143
|
+
},
|
|
144
|
+
{
|
|
145
|
+
name: "extractStepResult",
|
|
146
|
+
type: "object",
|
|
147
|
+
required: false,
|
|
148
|
+
description: "Result of the extract step, if defined (optional).",
|
|
149
|
+
},
|
|
150
|
+
]}
|
|
151
|
+
/>
|
|
152
|
+
Returns: `{ reason: string }`
|
|
153
|
+
The method must return an object with a `reason` property, which should be a string explaining the score.
|
|
154
|
+
|
|
155
|
+
All step functions can be async.
|