@mastra/mcp-docs-server 0.13.6 → 0.13.7-alpha.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.docs/organized/changelogs/%40internal%2Fstorage-test-utils.md +9 -9
- package/.docs/organized/changelogs/%40mastra%2Fastra.md +8 -8
- package/.docs/organized/changelogs/%40mastra%2Fchroma.md +8 -8
- package/.docs/organized/changelogs/%40mastra%2Fclickhouse.md +8 -8
- package/.docs/organized/changelogs/%40mastra%2Fclient-js.md +35 -35
- package/.docs/organized/changelogs/%40mastra%2Fcloud.md +8 -8
- package/.docs/organized/changelogs/%40mastra%2Fcloudflare-d1.md +25 -25
- package/.docs/organized/changelogs/%40mastra%2Fcloudflare.md +8 -8
- package/.docs/organized/changelogs/%40mastra%2Fcore.md +30 -30
- package/.docs/organized/changelogs/%40mastra%2Fcouchbase.md +8 -8
- package/.docs/organized/changelogs/%40mastra%2Fdeployer-cloudflare.md +30 -30
- package/.docs/organized/changelogs/%40mastra%2Fdeployer-netlify.md +28 -28
- package/.docs/organized/changelogs/%40mastra%2Fdeployer-vercel.md +28 -28
- package/.docs/organized/changelogs/%40mastra%2Fdeployer.md +45 -45
- package/.docs/organized/changelogs/%40mastra%2Fdynamodb.md +26 -26
- package/.docs/organized/changelogs/%40mastra%2Fevals.md +35 -35
- package/.docs/organized/changelogs/%40mastra%2Ffirecrawl.md +29 -29
- package/.docs/organized/changelogs/%40mastra%2Fgithub.md +8 -8
- package/.docs/organized/changelogs/%40mastra%2Flance.md +7 -0
- package/.docs/organized/changelogs/%40mastra%2Flibsql.md +28 -28
- package/.docs/organized/changelogs/%40mastra%2Floggers.md +8 -8
- package/.docs/organized/changelogs/%40mastra%2Fmcp-docs-server.md +35 -35
- package/.docs/organized/changelogs/%40mastra%2Fmcp-registry-registry.md +8 -8
- package/.docs/organized/changelogs/%40mastra%2Fmcp.md +9 -9
- package/.docs/organized/changelogs/%40mastra%2Fmem0.md +26 -26
- package/.docs/organized/changelogs/%40mastra%2Fmemory.md +37 -37
- package/.docs/organized/changelogs/%40mastra%2Fmongodb.md +26 -26
- package/.docs/organized/changelogs/%40mastra%2Fmssql.md +7 -0
- package/.docs/organized/changelogs/%40mastra%2Fopensearch.md +8 -8
- package/.docs/organized/changelogs/%40mastra%2Fpg.md +8 -8
- package/.docs/organized/changelogs/%40mastra%2Fpinecone.md +8 -8
- package/.docs/organized/changelogs/%40mastra%2Fplayground-ui.md +43 -43
- package/.docs/organized/changelogs/%40mastra%2Fqdrant.md +8 -8
- package/.docs/organized/changelogs/%40mastra%2Frag.md +27 -27
- package/.docs/organized/changelogs/%40mastra%2Fragie.md +8 -8
- package/.docs/organized/changelogs/%40mastra%2Fserver.md +34 -34
- package/.docs/organized/changelogs/%40mastra%2Fturbopuffer.md +8 -8
- package/.docs/organized/changelogs/%40mastra%2Fupstash.md +26 -26
- package/.docs/organized/changelogs/%40mastra%2Fvectorize.md +8 -8
- package/.docs/organized/changelogs/%40mastra%2Fvoice-azure.md +26 -26
- package/.docs/organized/changelogs/%40mastra%2Fvoice-cloudflare.md +8 -8
- package/.docs/organized/changelogs/%40mastra%2Fvoice-deepgram.md +8 -8
- package/.docs/organized/changelogs/%40mastra%2Fvoice-elevenlabs.md +8 -8
- package/.docs/organized/changelogs/%40mastra%2Fvoice-google.md +8 -8
- package/.docs/organized/changelogs/%40mastra%2Fvoice-murf.md +8 -8
- package/.docs/organized/changelogs/%40mastra%2Fvoice-openai-realtime.md +28 -28
- package/.docs/organized/changelogs/%40mastra%2Fvoice-openai.md +26 -26
- package/.docs/organized/changelogs/%40mastra%2Fvoice-playai.md +8 -8
- package/.docs/organized/changelogs/%40mastra%2Fvoice-sarvam.md +8 -8
- package/.docs/organized/changelogs/%40mastra%2Fvoice-speechify.md +8 -8
- package/.docs/organized/changelogs/create-mastra.md +15 -15
- package/.docs/organized/changelogs/mastra.md +43 -43
- package/.docs/organized/code-examples/a2a.md +1 -1
- package/.docs/organized/code-examples/agui.md +2 -2
- package/.docs/organized/code-examples/ai-sdk-useChat.md +1 -1
- package/.docs/organized/code-examples/ai-sdk-v5.md +1 -1
- package/.docs/organized/code-examples/assistant-ui.md +1 -1
- package/.docs/organized/code-examples/bird-checker-with-express.md +1 -1
- package/.docs/organized/code-examples/bird-checker-with-nextjs-and-eval.md +1 -1
- package/.docs/organized/code-examples/bird-checker-with-nextjs.md +1 -1
- package/.docs/organized/code-examples/client-side-tools.md +2 -2
- package/.docs/organized/code-examples/crypto-chatbot.md +1 -1
- package/.docs/organized/code-examples/fireworks-r1.md +1 -1
- package/.docs/organized/code-examples/memory-with-mem0.md +1 -1
- package/.docs/organized/code-examples/memory-with-pg.md +1 -0
- package/.docs/organized/code-examples/openapi-spec-writer.md +1 -1
- package/.docs/organized/code-examples/quick-start.md +1 -1
- package/.docs/organized/code-examples/workflow-ai-recruiter.md +1 -1
- package/.docs/organized/code-examples/workflow-with-inline-steps.md +1 -1
- package/.docs/organized/code-examples/workflow-with-memory.md +1 -1
- package/.docs/organized/code-examples/workflow-with-separate-steps.md +1 -1
- package/.docs/organized/code-examples/workflow-with-suspend-resume.md +1 -1
- package/.docs/raw/agents/runtime-variables.mdx +1 -1
- package/.docs/raw/auth/index.mdx +24 -0
- package/.docs/raw/auth/jwt.mdx +99 -0
- package/.docs/raw/community/contributing-templates.mdx +192 -0
- package/.docs/raw/deployment/cloud-providers/azure-app-services.mdx +6 -5
- package/.docs/raw/deployment/cloud-providers/digital-ocean.mdx +84 -14
- package/.docs/raw/evals/custom-eval.mdx +12 -12
- package/.docs/raw/getting-started/installation.mdx +16 -0
- package/.docs/raw/getting-started/templates.mdx +95 -0
- package/.docs/raw/index.mdx +2 -2
- package/.docs/raw/reference/agents/generate.mdx +18 -1
- package/.docs/raw/reference/agents/stream.mdx +18 -1
- package/.docs/raw/reference/auth/jwt.mdx +42 -0
- package/.docs/raw/reference/client-js/agents.mdx +3 -11
- package/.docs/raw/reference/client-js/error-handling.mdx +1 -21
- package/.docs/raw/reference/client-js/logs.mdx +2 -10
- package/.docs/raw/reference/client-js/mastra-client.mdx +141 -0
- package/.docs/raw/reference/client-js/memory.mdx +6 -18
- package/.docs/raw/reference/client-js/telemetry.mdx +1 -9
- package/.docs/raw/reference/client-js/tools.mdx +2 -10
- package/.docs/raw/reference/client-js/vectors.mdx +1 -9
- package/.docs/raw/reference/client-js/workflows-legacy.mdx +3 -11
- package/.docs/raw/reference/client-js/workflows.mdx +5 -13
- package/.docs/raw/reference/core/mastra-class.mdx +7 -1
- package/.docs/raw/reference/observability/providers/dash0.mdx +2 -2
- package/.docs/raw/reference/scorers/answer-relevancy.mdx +115 -0
- package/.docs/raw/reference/scorers/bias.mdx +127 -0
- package/.docs/raw/reference/scorers/completeness.mdx +89 -0
- package/.docs/raw/reference/scorers/content-similarity.mdx +96 -0
- package/.docs/raw/reference/scorers/custom-code-scorer.mdx +155 -0
- package/.docs/raw/reference/scorers/faithfulness.mdx +123 -0
- package/.docs/raw/reference/scorers/hallucination.mdx +135 -0
- package/.docs/raw/reference/scorers/keyword-coverage.mdx +92 -0
- package/.docs/raw/reference/scorers/llm-scorer.mdx +166 -0
- package/.docs/raw/reference/scorers/mastra-scorer.mdx +218 -0
- package/.docs/raw/reference/scorers/textual-difference.mdx +76 -0
- package/.docs/raw/reference/scorers/tone-consistency.mdx +75 -0
- package/.docs/raw/reference/scorers/toxicity.mdx +109 -0
- package/.docs/raw/reference/templates.mdx +222 -0
- package/.docs/raw/reference/tools/vector-query-tool.mdx +29 -0
- package/.docs/raw/tools-mcp/overview.mdx +1 -1
- package/package.json +6 -6
- package/.docs/organized/changelogs/%40mastra%2Fagui.md +0 -302
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
---
|
|
2
|
+
title: "Reference: Faithfulness | Scorers | Mastra Docs"
|
|
3
|
+
description: Documentation for the Faithfulness Scorer in Mastra, which evaluates the factual accuracy of LLM outputs compared to the provided context.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# Faithfulness Scorer
|
|
7
|
+
|
|
8
|
+
The `createFaithfulnessScorer()` function evaluates how factually accurate an LLM's output is compared to the provided context. It extracts claims from the output and verifies them against the context, making it essential to measure RAG pipeline responses' reliability.
|
|
9
|
+
|
|
10
|
+
For a usage example, see the [Faithfulness Examples](/examples/scorers/faithfulness).
|
|
11
|
+
|
|
12
|
+
## Parameters
|
|
13
|
+
|
|
14
|
+
The `createFaithfulnessScorer()` function accepts a single options object with the following properties:
|
|
15
|
+
|
|
16
|
+
<PropertiesTable
|
|
17
|
+
content={[
|
|
18
|
+
{
|
|
19
|
+
name: "model",
|
|
20
|
+
type: "LanguageModel",
|
|
21
|
+
required: true,
|
|
22
|
+
description: "Configuration for the model used to evaluate faithfulness.",
|
|
23
|
+
},
|
|
24
|
+
{
|
|
25
|
+
name: "context",
|
|
26
|
+
type: "string[]",
|
|
27
|
+
required: true,
|
|
28
|
+
description: "Array of context chunks against which the output's claims will be verified.",
|
|
29
|
+
},
|
|
30
|
+
{
|
|
31
|
+
name: "scale",
|
|
32
|
+
type: "number",
|
|
33
|
+
required: false,
|
|
34
|
+
defaultValue: "1",
|
|
35
|
+
description: "The maximum score value. The final score will be normalized to this scale.",
|
|
36
|
+
},
|
|
37
|
+
]}
|
|
38
|
+
/>
|
|
39
|
+
|
|
40
|
+
This function returns an instance of the MastraScorer class. The `.run()` method accepts the same input as other scorers (see the [MastraScorer reference](./mastra-scorer)), but the return value includes LLM-specific fields as documented below.
|
|
41
|
+
|
|
42
|
+
## .run() Returns
|
|
43
|
+
|
|
44
|
+
<PropertiesTable
|
|
45
|
+
content={[
|
|
46
|
+
{
|
|
47
|
+
name: "runId",
|
|
48
|
+
type: "string",
|
|
49
|
+
description: "The id of the run (optional).",
|
|
50
|
+
},
|
|
51
|
+
{
|
|
52
|
+
name: "extractStepResult",
|
|
53
|
+
type: "string[]",
|
|
54
|
+
description: "Array of extracted claims from the output.",
|
|
55
|
+
},
|
|
56
|
+
{
|
|
57
|
+
name: "extractPrompt",
|
|
58
|
+
type: "string",
|
|
59
|
+
description: "The prompt sent to the LLM for the extract step (optional).",
|
|
60
|
+
},
|
|
61
|
+
{
|
|
62
|
+
name: "analyzeStepResult",
|
|
63
|
+
type: "object",
|
|
64
|
+
description: "Object with verdicts: { verdicts: Array<{ verdict: 'yes' | 'no' | 'unsure', reason: string }> }",
|
|
65
|
+
},
|
|
66
|
+
{
|
|
67
|
+
name: "analyzePrompt",
|
|
68
|
+
type: "string",
|
|
69
|
+
description: "The prompt sent to the LLM for the analyze step (optional).",
|
|
70
|
+
},
|
|
71
|
+
{
|
|
72
|
+
name: "score",
|
|
73
|
+
type: "number",
|
|
74
|
+
description: "A score between 0 and the configured scale, representing the proportion of claims that are supported by the context.",
|
|
75
|
+
},
|
|
76
|
+
{
|
|
77
|
+
name: "reason",
|
|
78
|
+
type: "string",
|
|
79
|
+
description: "A detailed explanation of the score, including which claims were supported, contradicted, or marked as unsure.",
|
|
80
|
+
},
|
|
81
|
+
{
|
|
82
|
+
name: "reasonPrompt",
|
|
83
|
+
type: "string",
|
|
84
|
+
description: "The prompt sent to the LLM for the reason step (optional).",
|
|
85
|
+
},
|
|
86
|
+
]}
|
|
87
|
+
/>
|
|
88
|
+
|
|
89
|
+
## Scoring Details
|
|
90
|
+
|
|
91
|
+
The scorer evaluates faithfulness through claim verification against provided context.
|
|
92
|
+
|
|
93
|
+
### Scoring Process
|
|
94
|
+
|
|
95
|
+
1. Analyzes claims and context:
|
|
96
|
+
- Extracts all claims (factual and speculative)
|
|
97
|
+
- Verifies each claim against context
|
|
98
|
+
- Assigns one of three verdicts:
|
|
99
|
+
- "yes" - claim supported by context
|
|
100
|
+
- "no" - claim contradicts context
|
|
101
|
+
- "unsure" - claim unverifiable
|
|
102
|
+
2. Calculates faithfulness score:
|
|
103
|
+
- Counts supported claims
|
|
104
|
+
- Divides by total claims
|
|
105
|
+
- Scales to configured range
|
|
106
|
+
|
|
107
|
+
Final score: `(supported_claims / total_claims) * scale`
|
|
108
|
+
|
|
109
|
+
### Score interpretation
|
|
110
|
+
|
|
111
|
+
(0 to scale, default 0-1)
|
|
112
|
+
|
|
113
|
+
- 1.0: All claims supported by context
|
|
114
|
+
- 0.7-0.9: Most claims supported, few unverifiable
|
|
115
|
+
- 0.4-0.6: Mixed support with some contradictions
|
|
116
|
+
- 0.1-0.3: Limited support, many contradictions
|
|
117
|
+
- 0.0: No supported claims
|
|
118
|
+
|
|
119
|
+
## Related
|
|
120
|
+
|
|
121
|
+
- [Answer Relevancy Scorer](./answer-relevancy)
|
|
122
|
+
- [Hallucination Scorer](./hallucination)
|
|
123
|
+
- [Context Relevancy Scorer](./context-relevancy)
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
---
|
|
2
|
+
title: "Reference: Hallucination | Scorers | Mastra Docs"
|
|
3
|
+
description: Documentation for the Hallucination Scorer in Mastra, which evaluates the factual correctness of LLM outputs by identifying contradictions with provided context.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# Hallucination Scorer
|
|
7
|
+
|
|
8
|
+
The `createHallucinationScorer()` function evaluates whether an LLM generates factually correct information by comparing its output against the provided context. This scorer measures hallucination by identifying direct contradictions between the context and the output.
|
|
9
|
+
|
|
10
|
+
For a usage example, see the [Hallucination Examples](/examples/scorers/hallucination).
|
|
11
|
+
|
|
12
|
+
## Parameters
|
|
13
|
+
|
|
14
|
+
The `createHallucinationScorer()` function accepts a single options object with the following properties:
|
|
15
|
+
|
|
16
|
+
<PropertiesTable
|
|
17
|
+
content={[
|
|
18
|
+
{
|
|
19
|
+
name: "model",
|
|
20
|
+
type: "LanguageModel",
|
|
21
|
+
required: true,
|
|
22
|
+
description: "Configuration for the model used to evaluate hallucination.",
|
|
23
|
+
},
|
|
24
|
+
{
|
|
25
|
+
name: "scale",
|
|
26
|
+
type: "number",
|
|
27
|
+
required: false,
|
|
28
|
+
defaultValue: "1",
|
|
29
|
+
description: "Maximum score value.",
|
|
30
|
+
},
|
|
31
|
+
]}
|
|
32
|
+
/>
|
|
33
|
+
|
|
34
|
+
This function returns an instance of the MastraScorer class. The `.run()` method accepts the same input as other scorers (see the [MastraScorer reference](./mastra-scorer)), but the return value includes LLM-specific fields as documented below.
|
|
35
|
+
|
|
36
|
+
## .run() Returns
|
|
37
|
+
|
|
38
|
+
<PropertiesTable
|
|
39
|
+
content={[
|
|
40
|
+
{
|
|
41
|
+
name: "runId",
|
|
42
|
+
type: "string",
|
|
43
|
+
description: "The id of the run (optional).",
|
|
44
|
+
},
|
|
45
|
+
{
|
|
46
|
+
name: "extractStepResult",
|
|
47
|
+
type: "object",
|
|
48
|
+
description: "Object with extracted claims: { claims: string[] }",
|
|
49
|
+
},
|
|
50
|
+
{
|
|
51
|
+
name: "extractPrompt",
|
|
52
|
+
type: "string",
|
|
53
|
+
description: "The prompt sent to the LLM for the extract step (optional).",
|
|
54
|
+
},
|
|
55
|
+
{
|
|
56
|
+
name: "analyzeStepResult",
|
|
57
|
+
type: "object",
|
|
58
|
+
description: "Object with verdicts: { verdicts: Array<{ statement: string, verdict: 'yes' | 'no', reason: string }> }",
|
|
59
|
+
},
|
|
60
|
+
{
|
|
61
|
+
name: "analyzePrompt",
|
|
62
|
+
type: "string",
|
|
63
|
+
description: "The prompt sent to the LLM for the analyze step (optional).",
|
|
64
|
+
},
|
|
65
|
+
{
|
|
66
|
+
name: "score",
|
|
67
|
+
type: "number",
|
|
68
|
+
description: "Hallucination score (0 to scale, default 0-1).",
|
|
69
|
+
},
|
|
70
|
+
{
|
|
71
|
+
name: "reason",
|
|
72
|
+
type: "string",
|
|
73
|
+
description: "Detailed explanation of the score and identified contradictions.",
|
|
74
|
+
},
|
|
75
|
+
{
|
|
76
|
+
name: "reasonPrompt",
|
|
77
|
+
type: "string",
|
|
78
|
+
description: "The prompt sent to the LLM for the reason step (optional).",
|
|
79
|
+
},
|
|
80
|
+
]}
|
|
81
|
+
/>
|
|
82
|
+
|
|
83
|
+
## Scoring Details
|
|
84
|
+
|
|
85
|
+
The scorer evaluates hallucination through contradiction detection and unsupported claim analysis.
|
|
86
|
+
|
|
87
|
+
### Scoring Process
|
|
88
|
+
|
|
89
|
+
1. Analyzes factual content:
|
|
90
|
+
- Extracts statements from context
|
|
91
|
+
- Identifies numerical values and dates
|
|
92
|
+
- Maps statement relationships
|
|
93
|
+
2. Analyzes output for hallucinations:
|
|
94
|
+
- Compares against context statements
|
|
95
|
+
- Marks direct conflicts as hallucinations
|
|
96
|
+
- Identifies unsupported claims as hallucinations
|
|
97
|
+
- Evaluates numerical accuracy
|
|
98
|
+
- Considers approximation context
|
|
99
|
+
3. Calculates hallucination score:
|
|
100
|
+
- Counts hallucinated statements (contradictions and unsupported claims)
|
|
101
|
+
- Divides by total statements
|
|
102
|
+
- Scales to configured range
|
|
103
|
+
|
|
104
|
+
Final score: `(hallucinated_statements / total_statements) * scale`
|
|
105
|
+
|
|
106
|
+
### Important Considerations
|
|
107
|
+
|
|
108
|
+
- Claims not present in context are treated as hallucinations
|
|
109
|
+
- Subjective claims are hallucinations unless explicitly supported
|
|
110
|
+
- Speculative language ("might", "possibly") about facts IN context is allowed
|
|
111
|
+
- Speculative language about facts NOT in context is treated as hallucination
|
|
112
|
+
- Empty outputs result in zero hallucinations
|
|
113
|
+
- Numerical evaluation considers:
|
|
114
|
+
- Scale-appropriate precision
|
|
115
|
+
- Contextual approximations
|
|
116
|
+
- Explicit precision indicators
|
|
117
|
+
|
|
118
|
+
### Score interpretation
|
|
119
|
+
|
|
120
|
+
(0 to scale, default 0-1)
|
|
121
|
+
|
|
122
|
+
- 1.0: Complete hallucination - contradicts all context statements
|
|
123
|
+
- 0.75: High hallucination - contradicts 75% of context statements
|
|
124
|
+
- 0.5: Moderate hallucination - contradicts half of context statements
|
|
125
|
+
- 0.25: Low hallucination - contradicts 25% of context statements
|
|
126
|
+
- 0.0: No hallucination - output aligns with all context statements
|
|
127
|
+
|
|
128
|
+
**Note:** The score represents the degree of hallucination - lower scores indicate better factual alignment with the provided context
|
|
129
|
+
|
|
130
|
+
## Related
|
|
131
|
+
|
|
132
|
+
- [Faithfulness Scorer](./faithfulness)
|
|
133
|
+
- [Answer Relevancy Scorer](./answer-relevancy)
|
|
134
|
+
- [Context Precision Scorer](./context-precision)
|
|
135
|
+
- [Context Relevancy Scorer](./context-relevancy)
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
---
|
|
2
|
+
title: "Reference: Keyword Coverage | Scorers | Mastra Docs"
|
|
3
|
+
description: Documentation for the Keyword Coverage Scorer in Mastra, which evaluates how well LLM outputs cover important keywords from the input.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# Keyword Coverage Scorer
|
|
7
|
+
|
|
8
|
+
The `createKeywordCoverageScorer()` function evaluates how well an LLM's output covers the important keywords from the input. It analyzes keyword presence and matches while ignoring common words and stop words.
|
|
9
|
+
|
|
10
|
+
For a usage example, see the [Keyword Coverage Examples](/examples/scorers/keyword-coverage).
|
|
11
|
+
|
|
12
|
+
## Parameters
|
|
13
|
+
|
|
14
|
+
The `createKeywordCoverageScorer()` function does not take any options.
|
|
15
|
+
|
|
16
|
+
This function returns an instance of the MastraScorer class. See the [MastraScorer reference](./mastra-scorer) for details on the `.run()` method and its input/output.
|
|
17
|
+
|
|
18
|
+
## .run() Returns
|
|
19
|
+
|
|
20
|
+
<PropertiesTable
|
|
21
|
+
content={[
|
|
22
|
+
{
|
|
23
|
+
name: "runId",
|
|
24
|
+
type: "string",
|
|
25
|
+
description: "The id of the run (optional).",
|
|
26
|
+
},
|
|
27
|
+
{
|
|
28
|
+
name: "extractStepResult",
|
|
29
|
+
type: "object",
|
|
30
|
+
description: "Object with extracted keywords: { referenceKeywords: Set<string>, responseKeywords: Set<string> }",
|
|
31
|
+
},
|
|
32
|
+
{
|
|
33
|
+
name: "analyzeStepResult",
|
|
34
|
+
type: "object",
|
|
35
|
+
description: "Object with keyword coverage: { totalKeywords: number, matchedKeywords: number }",
|
|
36
|
+
},
|
|
37
|
+
{
|
|
38
|
+
name: "score",
|
|
39
|
+
type: "number",
|
|
40
|
+
description: "Coverage score (0-1) representing the proportion of matched keywords.",
|
|
41
|
+
},
|
|
42
|
+
]}
|
|
43
|
+
/>
|
|
44
|
+
|
|
45
|
+
## Scoring Details
|
|
46
|
+
|
|
47
|
+
The scorer evaluates keyword coverage by matching keywords with the following features:
|
|
48
|
+
|
|
49
|
+
- Common word and stop word filtering (e.g., "the", "a", "and")
|
|
50
|
+
- Case-insensitive matching
|
|
51
|
+
- Word form variation handling
|
|
52
|
+
- Special handling of technical terms and compound words
|
|
53
|
+
|
|
54
|
+
### Scoring Process
|
|
55
|
+
|
|
56
|
+
1. Processes keywords from input and output:
|
|
57
|
+
- Filters out common words and stop words
|
|
58
|
+
- Normalizes case and word forms
|
|
59
|
+
- Handles special terms and compounds
|
|
60
|
+
2. Calculates keyword coverage:
|
|
61
|
+
- Matches keywords between texts
|
|
62
|
+
- Counts successful matches
|
|
63
|
+
- Computes coverage ratio
|
|
64
|
+
|
|
65
|
+
Final score: `(matched_keywords / total_keywords) * scale`
|
|
66
|
+
|
|
67
|
+
### Score interpretation
|
|
68
|
+
|
|
69
|
+
(0 to scale, default 0-1)
|
|
70
|
+
|
|
71
|
+
- 1.0: Perfect keyword coverage
|
|
72
|
+
- 0.7-0.9: Good coverage with most keywords present
|
|
73
|
+
- 0.4-0.6: Moderate coverage with some keywords missing
|
|
74
|
+
- 0.1-0.3: Poor coverage with many keywords missing
|
|
75
|
+
- 0.0: No keyword matches
|
|
76
|
+
|
|
77
|
+
## Special Cases
|
|
78
|
+
|
|
79
|
+
The scorer handles several special cases:
|
|
80
|
+
|
|
81
|
+
- Empty input/output: Returns score of 1.0 if both empty, 0.0 if only one is empty
|
|
82
|
+
- Single word: Treated as a single keyword
|
|
83
|
+
- Technical terms: Preserves compound technical terms (e.g., "React.js", "machine learning")
|
|
84
|
+
- Case differences: "JavaScript" matches "javascript"
|
|
85
|
+
- Common words: Ignored in scoring to focus on meaningful keywords
|
|
86
|
+
|
|
87
|
+
## Related
|
|
88
|
+
|
|
89
|
+
- [Completeness Scorer](./completeness)
|
|
90
|
+
- [Content Similarity Scorer](./content-similarity)
|
|
91
|
+
- [Answer Relevancy Scorer](./answer-relevancy)
|
|
92
|
+
- [Textual Difference Scorer](./textual-difference)
|
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
---
|
|
2
|
+
title: "Reference: createLLMScorer | Scorers | Mastra Docs"
|
|
3
|
+
description: Documentation for creating LLM-based scorers in Mastra, allowing users to define evaluation logic using language models.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# LLM Scorer
|
|
7
|
+
|
|
8
|
+
The `createLLMScorer()` function lets you define custom scorers that use a language model (LLM) as a judge for evaluation. LLM scorers are ideal for tasks where you want to use prompt-based evaluation, such as answer relevancy, faithfulness, or custom prompt-based metrics. LLM scorers integrate seamlessly with the Mastra scoring framework and can be used anywhere built-in scorers are used.
|
|
9
|
+
|
|
10
|
+
For a usage example, see the [Custom LLM Judge Examples](/examples/scorers/custom-llm-judge-eval).
|
|
11
|
+
|
|
12
|
+
## createLLMScorer Options
|
|
13
|
+
|
|
14
|
+
<PropertiesTable
|
|
15
|
+
content={[
|
|
16
|
+
{
|
|
17
|
+
name: "name",
|
|
18
|
+
type: "string",
|
|
19
|
+
required: true,
|
|
20
|
+
description: "Name of the scorer.",
|
|
21
|
+
},
|
|
22
|
+
{
|
|
23
|
+
name: "description",
|
|
24
|
+
type: "string",
|
|
25
|
+
required: true,
|
|
26
|
+
description: "Description of what the scorer does.",
|
|
27
|
+
},
|
|
28
|
+
{
|
|
29
|
+
name: "judge",
|
|
30
|
+
type: "object",
|
|
31
|
+
required: true,
|
|
32
|
+
description: "Judge configuration object. Must include a model and instructions (system prompt). See Judge Object section below.",
|
|
33
|
+
},
|
|
34
|
+
{
|
|
35
|
+
name: "extract",
|
|
36
|
+
type: "object",
|
|
37
|
+
required: false,
|
|
38
|
+
description: "(Optional) Extraction step configuration object. See Extract Object section below.",
|
|
39
|
+
},
|
|
40
|
+
{
|
|
41
|
+
name: "analyze",
|
|
42
|
+
type: "object",
|
|
43
|
+
required: true,
|
|
44
|
+
description: "Analysis step configuration object. See Analyze Object section below.",
|
|
45
|
+
},
|
|
46
|
+
{
|
|
47
|
+
name: "reason",
|
|
48
|
+
type: "object",
|
|
49
|
+
required: false,
|
|
50
|
+
description: "(Optional) Reason step configuration object. See Reason Object section below.",
|
|
51
|
+
},
|
|
52
|
+
{
|
|
53
|
+
name: "calculateScore",
|
|
54
|
+
type: "function",
|
|
55
|
+
required: true,
|
|
56
|
+
description: "Function: ({ run }) => number. Computes the final score from the analyze step result.",
|
|
57
|
+
},
|
|
58
|
+
]}
|
|
59
|
+
/>
|
|
60
|
+
|
|
61
|
+
This function returns an instance of the MastraScorer class. See the [MastraScorer reference](./mastra-scorer) for details on the `.run()` method and its input/output.
|
|
62
|
+
|
|
63
|
+
## Judge Object
|
|
64
|
+
<PropertiesTable
|
|
65
|
+
content={[
|
|
66
|
+
{
|
|
67
|
+
name: "model",
|
|
68
|
+
type: "LanguageModel",
|
|
69
|
+
required: true,
|
|
70
|
+
description: "The LLM model instance to use for evaluation.",
|
|
71
|
+
},
|
|
72
|
+
{
|
|
73
|
+
name: "instructions",
|
|
74
|
+
type: "string",
|
|
75
|
+
required: true,
|
|
76
|
+
description: "System prompt/instructions for the LLM.",
|
|
77
|
+
},
|
|
78
|
+
]}
|
|
79
|
+
/>
|
|
80
|
+
|
|
81
|
+
## Extract Object
|
|
82
|
+
<PropertiesTable
|
|
83
|
+
content={[
|
|
84
|
+
{
|
|
85
|
+
name: "description",
|
|
86
|
+
type: "string",
|
|
87
|
+
required: true,
|
|
88
|
+
description: "Description of the extract step.",
|
|
89
|
+
},
|
|
90
|
+
{
|
|
91
|
+
name: "judge",
|
|
92
|
+
type: "object",
|
|
93
|
+
required: false,
|
|
94
|
+
description: "(Optional) LLM judge for this step (can override main judge/model). See Judge Object section.",
|
|
95
|
+
},
|
|
96
|
+
{
|
|
97
|
+
name: "outputSchema",
|
|
98
|
+
type: "ZodSchema",
|
|
99
|
+
required: true,
|
|
100
|
+
description: "Zod schema for the expected output of the extract step.",
|
|
101
|
+
},
|
|
102
|
+
{
|
|
103
|
+
name: "createPrompt",
|
|
104
|
+
type: "function",
|
|
105
|
+
required: true,
|
|
106
|
+
description: "Function: ({ run: ScoringInput }) => string. Returns the prompt for the LLM.",
|
|
107
|
+
},
|
|
108
|
+
]}
|
|
109
|
+
/>
|
|
110
|
+
|
|
111
|
+
## Analyze Object
|
|
112
|
+
<PropertiesTable
|
|
113
|
+
content={[
|
|
114
|
+
{
|
|
115
|
+
name: "description",
|
|
116
|
+
type: "string",
|
|
117
|
+
required: true,
|
|
118
|
+
description: "Description of the analyze step.",
|
|
119
|
+
},
|
|
120
|
+
{
|
|
121
|
+
name: "judge",
|
|
122
|
+
type: "object",
|
|
123
|
+
required: false,
|
|
124
|
+
description: "(Optional) LLM judge for this step (can override main judge/model). See Judge Object section.",
|
|
125
|
+
},
|
|
126
|
+
{
|
|
127
|
+
name: "outputSchema",
|
|
128
|
+
type: "ZodSchema",
|
|
129
|
+
required: true,
|
|
130
|
+
description: "Zod schema for the expected output of the analyze step.",
|
|
131
|
+
},
|
|
132
|
+
{
|
|
133
|
+
name: "createPrompt",
|
|
134
|
+
type: "function",
|
|
135
|
+
required: true,
|
|
136
|
+
description: "Function: ({ run: ScoringInput & { extractStepResult } }) => string. Returns the LLM prompt.",
|
|
137
|
+
},
|
|
138
|
+
]}
|
|
139
|
+
/>
|
|
140
|
+
|
|
141
|
+
## Reason Object
|
|
142
|
+
<PropertiesTable
|
|
143
|
+
content={[
|
|
144
|
+
{
|
|
145
|
+
name: "description",
|
|
146
|
+
type: "string",
|
|
147
|
+
required: true,
|
|
148
|
+
description: "Description of the reason step.",
|
|
149
|
+
},
|
|
150
|
+
{
|
|
151
|
+
name: "judge",
|
|
152
|
+
type: "object",
|
|
153
|
+
required: false,
|
|
154
|
+
description: "(Optional) LLM judge for this step (can override main judge/model). See Judge Object section.",
|
|
155
|
+
},
|
|
156
|
+
{
|
|
157
|
+
name: "createPrompt",
|
|
158
|
+
type: "function",
|
|
159
|
+
required: true,
|
|
160
|
+
description: "Function: ({ run }) => string. `run` includes input, output, extractStepResult, analyzeStepResult, and score. Returns the prompt for the LLM.",
|
|
161
|
+
},
|
|
162
|
+
]}
|
|
163
|
+
/>
|
|
164
|
+
|
|
165
|
+
LLM scorers may also include step-specific prompt fields in the return value, such as `extractPrompt`, `analyzePrompt`, and `reasonPrompt`.
|
|
166
|
+
|