@mastra/evals 1.1.2-alpha.0 → 1.2.0-alpha.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. package/CHANGELOG.md +59 -2
  2. package/LICENSE.md +15 -0
  3. package/dist/chunk-EVBNIL5M.js +606 -0
  4. package/dist/chunk-EVBNIL5M.js.map +1 -0
  5. package/dist/chunk-XRUR5PBK.cjs +632 -0
  6. package/dist/chunk-XRUR5PBK.cjs.map +1 -0
  7. package/dist/docs/SKILL.md +20 -19
  8. package/dist/docs/assets/SOURCE_MAP.json +1 -1
  9. package/dist/docs/references/docs-evals-built-in-scorers.md +2 -1
  10. package/dist/docs/references/docs-evals-overview.md +11 -16
  11. package/dist/docs/references/reference-evals-answer-relevancy.md +25 -25
  12. package/dist/docs/references/reference-evals-answer-similarity.md +33 -35
  13. package/dist/docs/references/reference-evals-bias.md +24 -24
  14. package/dist/docs/references/reference-evals-completeness.md +19 -20
  15. package/dist/docs/references/reference-evals-content-similarity.md +20 -20
  16. package/dist/docs/references/reference-evals-context-precision.md +36 -36
  17. package/dist/docs/references/reference-evals-context-relevance.md +136 -141
  18. package/dist/docs/references/reference-evals-faithfulness.md +24 -24
  19. package/dist/docs/references/reference-evals-hallucination.md +52 -69
  20. package/dist/docs/references/reference-evals-keyword-coverage.md +18 -18
  21. package/dist/docs/references/reference-evals-noise-sensitivity.md +167 -177
  22. package/dist/docs/references/reference-evals-prompt-alignment.md +111 -116
  23. package/dist/docs/references/reference-evals-scorer-utils.md +285 -105
  24. package/dist/docs/references/reference-evals-textual-difference.md +18 -18
  25. package/dist/docs/references/reference-evals-tone-consistency.md +19 -19
  26. package/dist/docs/references/reference-evals-tool-call-accuracy.md +165 -165
  27. package/dist/docs/references/reference-evals-toxicity.md +21 -21
  28. package/dist/docs/references/reference-evals-trajectory-accuracy.md +613 -0
  29. package/dist/scorers/code/index.d.ts +1 -0
  30. package/dist/scorers/code/index.d.ts.map +1 -1
  31. package/dist/scorers/code/trajectory/index.d.ts +147 -0
  32. package/dist/scorers/code/trajectory/index.d.ts.map +1 -0
  33. package/dist/scorers/llm/answer-similarity/index.d.ts +2 -2
  34. package/dist/scorers/llm/context-precision/index.d.ts +2 -2
  35. package/dist/scorers/llm/context-relevance/index.d.ts +1 -1
  36. package/dist/scorers/llm/faithfulness/index.d.ts +1 -1
  37. package/dist/scorers/llm/hallucination/index.d.ts +2 -2
  38. package/dist/scorers/llm/index.d.ts +1 -0
  39. package/dist/scorers/llm/index.d.ts.map +1 -1
  40. package/dist/scorers/llm/noise-sensitivity/index.d.ts +1 -1
  41. package/dist/scorers/llm/prompt-alignment/index.d.ts +5 -5
  42. package/dist/scorers/llm/tool-call-accuracy/index.d.ts +1 -1
  43. package/dist/scorers/llm/toxicity/index.d.ts +1 -1
  44. package/dist/scorers/llm/trajectory/index.d.ts +58 -0
  45. package/dist/scorers/llm/trajectory/index.d.ts.map +1 -0
  46. package/dist/scorers/llm/trajectory/prompts.d.ts +20 -0
  47. package/dist/scorers/llm/trajectory/prompts.d.ts.map +1 -0
  48. package/dist/scorers/prebuilt/index.cjs +638 -59
  49. package/dist/scorers/prebuilt/index.cjs.map +1 -1
  50. package/dist/scorers/prebuilt/index.js +578 -2
  51. package/dist/scorers/prebuilt/index.js.map +1 -1
  52. package/dist/scorers/utils.cjs +41 -17
  53. package/dist/scorers/utils.d.ts +171 -1
  54. package/dist/scorers/utils.d.ts.map +1 -1
  55. package/dist/scorers/utils.js +1 -1
  56. package/package.json +14 -11
  57. package/dist/chunk-OEOE7ZHN.js +0 -195
  58. package/dist/chunk-OEOE7ZHN.js.map +0 -1
  59. package/dist/chunk-W3U7MMDX.cjs +0 -212
  60. package/dist/chunk-W3U7MMDX.cjs.map +0 -1
@@ -1,34 +1,34 @@
1
- # Bias Scorer
1
+ # Bias scorer
2
2
 
3
3
  The `createBiasScorer()` function accepts a single options object with the following properties:
4
4
 
5
5
  ## Parameters
6
6
 
7
- **model:** (`LanguageModel`): Configuration for the model used to evaluate bias.
7
+ **model** (`LanguageModel`): Configuration for the model used to evaluate bias.
8
8
 
9
- **scale:** (`number`): Maximum score value. (Default: `1`)
9
+ **scale** (`number`): Maximum score value. (Default: `1`)
10
10
 
11
11
  This function returns an instance of the MastraScorer class. The `.run()` method accepts the same input as other scorers (see the [MastraScorer reference](https://mastra.ai/reference/evals/mastra-scorer)), but the return value includes LLM-specific fields as documented below.
12
12
 
13
- ## .run() Returns
13
+ ## `.run()` returns
14
14
 
15
- **runId:** (`string`): The id of the run (optional).
15
+ **runId** (`string`): The id of the run (optional).
16
16
 
17
- **preprocessStepResult:** (`object`): Object with extracted opinions: { opinions: string\[] }
17
+ **preprocessStepResult** (`object`): Object with extracted opinions: { opinions: string\[] }
18
18
 
19
- **preprocessPrompt:** (`string`): The prompt sent to the LLM for the preprocess step (optional).
19
+ **preprocessPrompt** (`string`): The prompt sent to the LLM for the preprocess step (optional).
20
20
 
21
- **analyzeStepResult:** (`object`): Object with results: { results: Array<{ result: 'yes' | 'no', reason: string }> }
21
+ **analyzeStepResult** (`object`): Object with results: { results: Array<{ result: 'yes' | 'no', reason: string }> }
22
22
 
23
- **analyzePrompt:** (`string`): The prompt sent to the LLM for the analyze step (optional).
23
+ **analyzePrompt** (`string`): The prompt sent to the LLM for the analyze step (optional).
24
24
 
25
- **score:** (`number`): Bias score (0 to scale, default 0-1). Higher scores indicate more bias.
25
+ **score** (`number`): Bias score (0 to scale, default 0-1). Higher scores indicate more bias.
26
26
 
27
- **reason:** (`string`): Explanation of the score.
27
+ **reason** (`string`): Explanation of the score.
28
28
 
29
- **generateReasonPrompt:** (`string`): The prompt sent to the LLM for the generateReason step (optional).
29
+ **generateReasonPrompt** (`string`): The prompt sent to the LLM for the generateReason step (optional).
30
30
 
31
- ## Bias Categories
31
+ ## Bias categories
32
32
 
33
33
  The scorer evaluates several types of bias:
34
34
 
@@ -37,7 +37,7 @@ The scorer evaluates several types of bias:
37
37
  3. **Racial/Ethnic Bias**: Discrimination based on race, ethnicity, or national origin
38
38
  4. **Geographical Bias**: Prejudice based on location or regional stereotypes
39
39
 
40
- ## Scoring Details
40
+ ## Scoring details
41
41
 
42
42
  The scorer evaluates bias through opinion analysis based on:
43
43
 
@@ -78,22 +78,22 @@ A bias score between 0 and 1:
78
78
  Evaluate agent responses for bias across different types of questions:
79
79
 
80
80
  ```typescript
81
- import { runEvals } from "@mastra/core/evals";
82
- import { createBiasScorer } from "@mastra/evals/scorers/prebuilt";
83
- import { myAgent } from "./agent";
81
+ import { runEvals } from '@mastra/core/evals'
82
+ import { createBiasScorer } from '@mastra/evals/scorers/prebuilt'
83
+ import { myAgent } from './agent'
84
84
 
85
- const scorer = createBiasScorer({ model: "openai/gpt-4o" });
85
+ const scorer = createBiasScorer({ model: 'openai/gpt-5.4' })
86
86
 
87
87
  const result = await runEvals({
88
88
  data: [
89
89
  {
90
- input: "What makes someone a good leader?",
90
+ input: 'What makes someone a good leader?',
91
91
  },
92
92
  {
93
- input: "How do different age groups perform at work?",
93
+ input: 'How do different age groups perform at work?',
94
94
  },
95
95
  {
96
- input: "What is the best hiring practice?",
96
+ input: 'What is the best hiring practice?',
97
97
  },
98
98
  ],
99
99
  scorers: [scorer],
@@ -102,11 +102,11 @@ const result = await runEvals({
102
102
  console.log({
103
103
  score: scorerResults[scorer.id].score,
104
104
  reason: scorerResults[scorer.id].reason,
105
- });
105
+ })
106
106
  },
107
- });
107
+ })
108
108
 
109
- console.log(result.scores);
109
+ console.log(result.scores)
110
110
  ```
111
111
 
112
112
  For more details on `runEvals`, see the [runEvals reference](https://mastra.ai/reference/evals/run-evals).
@@ -1,20 +1,20 @@
1
- # Completeness Scorer
1
+ # Completeness scorer
2
2
 
3
3
  The `createCompletenessScorer()` function evaluates how thoroughly an LLM's output covers the key elements present in the input. It analyzes nouns, verbs, topics, and terms to determine coverage and provides a detailed completeness score.
4
4
 
5
5
  ## Parameters
6
6
 
7
- The `createCompletenessScorer()` function does not take any options.
7
+ The `createCompletenessScorer()` function doesn't take any options.
8
8
 
9
9
  This function returns an instance of the MastraScorer class. See the [MastraScorer reference](https://mastra.ai/reference/evals/mastra-scorer) for details on the `.run()` method and its input/output.
10
10
 
11
- ## .run() Returns
11
+ ## `.run()` returns
12
12
 
13
- **runId:** (`string`): The id of the run (optional).
13
+ **runId** (`string`): The id of the run (optional).
14
14
 
15
- **preprocessStepResult:** (`object`): Object with extracted elements and coverage details: { inputElements: string\[], outputElements: string\[], missingElements: string\[], elementCounts: { input: number, output: number } }
15
+ **preprocessStepResult** (`object`): Object with extracted elements and coverage details: { inputElements: string\[], outputElements: string\[], missingElements: string\[], elementCounts: { input: number, output: number } }
16
16
 
17
- **score:** (`number`): Completeness score (0-1) representing the proportion of input elements covered in the output.
17
+ **score** (`number`): Completeness score (0-1) representing the proportion of input elements covered in the output.
18
18
 
19
19
  The `.run()` method returns a result in the following shape:
20
20
 
@@ -31,7 +31,7 @@ The `.run()` method returns a result in the following shape:
31
31
  }
32
32
  ```
33
33
 
34
- ## Element Extraction Details
34
+ ## Element extraction details
35
35
 
36
36
  The scorer extracts and analyzes several types of elements:
37
37
 
@@ -48,7 +48,7 @@ The extraction process includes:
48
48
  - Special handling of short words (3 characters or less)
49
49
  - Deduplication of elements
50
50
 
51
- ### extractStepResult
51
+ ### `extractStepResult`
52
52
 
53
53
  From the `.run()` method, you can get the `extractStepResult` object with the following properties:
54
54
 
@@ -57,7 +57,7 @@ From the `.run()` method, you can get the `extractStepResult` object with the fo
57
57
  - **missingElements**: Input elements not found in the output.
58
58
  - **elementCounts**: The number of elements in the input and output.
59
59
 
60
- ## Scoring Details
60
+ ## Scoring details
61
61
 
62
62
  The scorer evaluates completeness through linguistic element coverage analysis.
63
63
 
@@ -92,25 +92,24 @@ A completeness score between 0 and 1:
92
92
  Evaluate agent responses for completeness across different query complexities:
93
93
 
94
94
  ```typescript
95
- import { runEvals } from "@mastra/core/evals";
96
- import { createCompletenessScorer } from "@mastra/evals/scorers/prebuilt";
97
- import { myAgent } from "./agent";
95
+ import { runEvals } from '@mastra/core/evals'
96
+ import { createCompletenessScorer } from '@mastra/evals/scorers/prebuilt'
97
+ import { myAgent } from './agent'
98
98
 
99
- const scorer = createCompletenessScorer();
99
+ const scorer = createCompletenessScorer()
100
100
 
101
101
  const result = await runEvals({
102
102
  data: [
103
103
  {
104
104
  input:
105
- "Explain the process of photosynthesis, including the inputs, outputs, and stages involved.",
105
+ 'Explain the process of photosynthesis, including the inputs, outputs, and stages involved.',
106
106
  },
107
107
  {
108
- input:
109
- "What are the benefits and drawbacks of remote work for both employees and employers?",
108
+ input: 'What are the benefits and drawbacks of remote work for both employees and employers?',
110
109
  },
111
110
  {
112
111
  input:
113
- "Compare renewable and non-renewable energy sources in terms of cost, environmental impact, and sustainability.",
112
+ 'Compare renewable and non-renewable energy sources in terms of cost, environmental impact, and sustainability.',
114
113
  },
115
114
  ],
116
115
  scorers: [scorer],
@@ -118,11 +117,11 @@ const result = await runEvals({
118
117
  onItemComplete: ({ scorerResults }) => {
119
118
  console.log({
120
119
  score: scorerResults[scorer.id].score,
121
- });
120
+ })
122
121
  },
123
- });
122
+ })
124
123
 
125
- console.log(result.scores);
124
+ console.log(result.scores)
126
125
  ```
127
126
 
128
127
  For more details on `runEvals`, see the [runEvals reference](https://mastra.ai/reference/evals/run-evals).
@@ -1,4 +1,4 @@
1
- # Content Similarity Scorer
1
+ # Content similarity scorer
2
2
 
3
3
  The `createContentSimilarityScorer()` function measures the textual similarity between two strings, providing a score that indicates how closely they match. It supports configurable options for case sensitivity and whitespace handling.
4
4
 
@@ -6,23 +6,23 @@ The `createContentSimilarityScorer()` function measures the textual similarity b
6
6
 
7
7
  The `createContentSimilarityScorer()` function accepts a single options object with the following properties:
8
8
 
9
- **ignoreCase:** (`boolean`): Whether to ignore case differences when comparing strings. (Default: `true`)
9
+ **ignoreCase** (`boolean`): Whether to ignore case differences when comparing strings. (Default: `true`)
10
10
 
11
- **ignoreWhitespace:** (`boolean`): Whether to normalize whitespace when comparing strings. (Default: `true`)
11
+ **ignoreWhitespace** (`boolean`): Whether to normalize whitespace when comparing strings. (Default: `true`)
12
12
 
13
13
  This function returns an instance of the MastraScorer class. See the [MastraScorer reference](https://mastra.ai/reference/evals/mastra-scorer) for details on the `.run()` method and its input/output.
14
14
 
15
- ## .run() Returns
15
+ ## `.run()` returns
16
16
 
17
- **runId:** (`string`): The id of the run (optional).
17
+ **runId** (`string`): The id of the run (optional).
18
18
 
19
- **preprocessStepResult:** (`object`): Object with processed input and output: { processedInput: string, processedOutput: string }
19
+ **preprocessStepResult** (`object`): Object with processed input and output: { processedInput: string, processedOutput: string }
20
20
 
21
- **analyzeStepResult:** (`object`): Object with similarity: { similarity: number }
21
+ **analyzeStepResult** (`object`): Object with similarity: { similarity: number }
22
22
 
23
- **score:** (`number`): Similarity score (0-1) where 1 indicates perfect similarity.
23
+ **score** (`number`): Similarity score (0-1) where 1 indicates perfect similarity.
24
24
 
25
- ## Scoring Details
25
+ ## Scoring details
26
26
 
27
27
  The scorer evaluates textual similarity through character-level matching and configurable text normalization.
28
28
 
@@ -47,23 +47,23 @@ Final score: `similarity_value * scale`
47
47
  Evaluate textual similarity between expected and actual agent outputs:
48
48
 
49
49
  ```typescript
50
- import { runEvals } from "@mastra/core/evals";
51
- import { createContentSimilarityScorer } from "@mastra/evals/scorers/prebuilt";
52
- import { myAgent } from "./agent";
50
+ import { runEvals } from '@mastra/core/evals'
51
+ import { createContentSimilarityScorer } from '@mastra/evals/scorers/prebuilt'
52
+ import { myAgent } from './agent'
53
53
 
54
- const scorer = createContentSimilarityScorer();
54
+ const scorer = createContentSimilarityScorer()
55
55
 
56
56
  const result = await runEvals({
57
57
  data: [
58
58
  {
59
- input: "Summarize the benefits of TypeScript",
59
+ input: 'Summarize the benefits of TypeScript',
60
60
  groundTruth:
61
- "TypeScript provides static typing, better tooling support, and improved code maintainability.",
61
+ 'TypeScript provides static typing, better tooling support, and improved code maintainability.',
62
62
  },
63
63
  {
64
- input: "What is machine learning?",
64
+ input: 'What is machine learning?',
65
65
  groundTruth:
66
- "Machine learning is a subset of AI that enables systems to learn from data without explicit programming.",
66
+ 'Machine learning is a subset of AI that enables systems to learn from data without explicit programming.',
67
67
  },
68
68
  ],
69
69
  scorers: [scorer],
@@ -72,11 +72,11 @@ const result = await runEvals({
72
72
  console.log({
73
73
  score: scorerResults[scorer.id].score,
74
74
  groundTruth: scorerResults[scorer.id].groundTruth,
75
- });
75
+ })
76
76
  },
77
- });
77
+ })
78
78
 
79
- console.log(result.scores);
79
+ console.log(result.scores)
80
80
  ```
81
81
 
82
82
  For more details on `runEvals`, see the [runEvals reference](https://mastra.ai/reference/evals/run-evals).
@@ -1,18 +1,18 @@
1
- # Context Precision Scorer
1
+ # Context precision scorer
2
2
 
3
3
  The `createContextPrecisionScorer()` function creates a scorer that evaluates how relevant and well-positioned retrieved context pieces are for generating expected outputs. It uses **Mean Average Precision (MAP)** to reward systems that place relevant context earlier in the sequence.
4
4
 
5
- It is especially useful for these use cases:
5
+ It's especially useful for these use cases:
6
6
 
7
- **RAG System Evaluation**
7
+ ## RAG system evaluation
8
8
 
9
9
  Ideal for evaluating retrieved context in RAG pipelines where:
10
10
 
11
11
  - Context ordering matters for model performance
12
- - You need to measure retrieval quality beyond simple relevance
12
+ - You need to measure retrieval quality beyond basic relevance
13
13
  - Early relevant context is more valuable than later relevant context
14
14
 
15
- **Context Window Optimization**
15
+ ## Context window optimization
16
16
 
17
17
  Use when optimizing context selection for:
18
18
 
@@ -22,19 +22,19 @@ Use when optimizing context selection for:
22
22
 
23
23
  ## Parameters
24
24
 
25
- **model:** (`MastraModelConfig`): The language model to use for evaluating context relevance
25
+ **model** (`MastraModelConfig`): The language model to use for evaluating context relevance
26
26
 
27
- **options:** (`ContextPrecisionMetricOptions`): Configuration options for the scorer
27
+ **options** (`ContextPrecisionMetricOptions`): Configuration options for the scorer
28
28
 
29
29
  **Note**: Either `context` or `contextExtractor` must be provided. If both are provided, `contextExtractor` takes precedence.
30
30
 
31
- ## .run() Returns
31
+ ## `.run()` returns
32
32
 
33
- **score:** (`number`): Mean Average Precision score between 0 and scale (default 0-1)
33
+ **score** (`number`): Mean Average Precision score between 0 and scale (default 0-1)
34
34
 
35
- **reason:** (`string`): Human-readable explanation of the context precision evaluation
35
+ **reason** (`string`): Human-readable explanation of the context precision evaluation
36
36
 
37
- ## Scoring Details
37
+ ## Scoring details
38
38
 
39
39
  ### Mean Average Precision (MAP)
40
40
 
@@ -77,7 +77,7 @@ The reason field explains:
77
77
  Use results to:
78
78
 
79
79
  - **Improve retrieval**: Filter out irrelevant context before ranking
80
- - **Optimize ranking**: Ensure relevant context appears early
80
+ - **Optimize ranking**: Ensure relevant context surfaces early
81
81
  - **Tune chunk size**: Balance context detail vs. relevance precision
82
82
  - **Evaluate embeddings**: Test different embedding models for better retrieval
83
83
 
@@ -98,38 +98,38 @@ MAP = (1.0 + 0.67) / 2 = 0.835 ≈ **0.83**
98
98
 
99
99
  ```typescript
100
100
  const scorer = createContextPrecisionScorer({
101
- model: "openai/gpt-5.1",
101
+ model: 'openai/gpt-5.4',
102
102
  options: {
103
103
  contextExtractor: (input, output) => {
104
104
  // Extract context dynamically based on the query
105
- const query = input?.inputMessages?.[0]?.content || "";
105
+ const query = input?.inputMessages?.[0]?.content || ''
106
106
 
107
107
  // Example: Retrieve from a vector database
108
- const searchResults = vectorDB.search(query, { limit: 10 });
109
- return searchResults.map((result) => result.content);
108
+ const searchResults = vectorDB.search(query, { limit: 10 })
109
+ return searchResults.map(result => result.content)
110
110
  },
111
111
  scale: 1,
112
112
  },
113
- });
113
+ })
114
114
  ```
115
115
 
116
116
  ### Large context evaluation
117
117
 
118
118
  ```typescript
119
119
  const scorer = createContextPrecisionScorer({
120
- model: "openai/gpt-5.1",
120
+ model: 'openai/gpt-5.4',
121
121
  options: {
122
122
  context: [
123
123
  // Simulate retrieved documents from vector database
124
- "Document 1: Highly relevant content...",
125
- "Document 2: Somewhat related content...",
126
- "Document 3: Tangentially related...",
127
- "Document 4: Not relevant...",
128
- "Document 5: Highly relevant content...",
124
+ 'Document 1: Highly relevant content...',
125
+ 'Document 2: Somewhat related content...',
126
+ 'Document 3: Tangentially related...',
127
+ 'Document 4: Not relevant...',
128
+ 'Document 5: Highly relevant content...',
129
129
  // ... up to dozens of context pieces
130
130
  ],
131
131
  },
132
- });
132
+ })
133
133
  ```
134
134
 
135
135
  ## Example
@@ -137,27 +137,27 @@ const scorer = createContextPrecisionScorer({
137
137
  Evaluate RAG system context retrieval precision for different queries:
138
138
 
139
139
  ```typescript
140
- import { runEvals } from "@mastra/core/evals";
141
- import { createContextPrecisionScorer } from "@mastra/evals/scorers/prebuilt";
142
- import { myAgent } from "./agent";
140
+ import { runEvals } from '@mastra/core/evals'
141
+ import { createContextPrecisionScorer } from '@mastra/evals/scorers/prebuilt'
142
+ import { myAgent } from './agent'
143
143
 
144
144
  const scorer = createContextPrecisionScorer({
145
- model: "openai/gpt-4o",
145
+ model: 'openai/gpt-5.4',
146
146
  options: {
147
147
  contextExtractor: (input, output) => {
148
148
  // Extract context from agent's retrieved documents
149
- return output.metadata?.retrievedContext || [];
149
+ return output.metadata?.retrievedContext || []
150
150
  },
151
151
  },
152
- });
152
+ })
153
153
 
154
154
  const result = await runEvals({
155
155
  data: [
156
156
  {
157
- input: "How does photosynthesis work in plants?",
157
+ input: 'How does photosynthesis work in plants?',
158
158
  },
159
159
  {
160
- input: "What are the mental and physical benefits of exercise?",
160
+ input: 'What are the mental and physical benefits of exercise?',
161
161
  },
162
162
  ],
163
163
  scorers: [scorer],
@@ -166,18 +166,18 @@ const result = await runEvals({
166
166
  console.log({
167
167
  score: scorerResults[scorer.id].score,
168
168
  reason: scorerResults[scorer.id].reason,
169
- });
169
+ })
170
170
  },
171
- });
171
+ })
172
172
 
173
- console.log(result.scores);
173
+ console.log(result.scores)
174
174
  ```
175
175
 
176
176
  For more details on `runEvals`, see the [runEvals reference](https://mastra.ai/reference/evals/run-evals).
177
177
 
178
178
  To add this scorer to an agent, see the [Scorers overview](https://mastra.ai/docs/evals/overview) guide.
179
179
 
180
- ## Comparison with Context Relevance
180
+ ## Comparison with context relevance
181
181
 
182
182
  Choose the right scorer for your needs:
183
183