@mastra/evals 1.1.2-alpha.0 → 1.2.0-alpha.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +59 -2
- package/LICENSE.md +15 -0
- package/dist/chunk-EVBNIL5M.js +606 -0
- package/dist/chunk-EVBNIL5M.js.map +1 -0
- package/dist/chunk-XRUR5PBK.cjs +632 -0
- package/dist/chunk-XRUR5PBK.cjs.map +1 -0
- package/dist/docs/SKILL.md +20 -19
- package/dist/docs/assets/SOURCE_MAP.json +1 -1
- package/dist/docs/references/docs-evals-built-in-scorers.md +2 -1
- package/dist/docs/references/docs-evals-overview.md +11 -16
- package/dist/docs/references/reference-evals-answer-relevancy.md +25 -25
- package/dist/docs/references/reference-evals-answer-similarity.md +33 -35
- package/dist/docs/references/reference-evals-bias.md +24 -24
- package/dist/docs/references/reference-evals-completeness.md +19 -20
- package/dist/docs/references/reference-evals-content-similarity.md +20 -20
- package/dist/docs/references/reference-evals-context-precision.md +36 -36
- package/dist/docs/references/reference-evals-context-relevance.md +136 -141
- package/dist/docs/references/reference-evals-faithfulness.md +24 -24
- package/dist/docs/references/reference-evals-hallucination.md +52 -69
- package/dist/docs/references/reference-evals-keyword-coverage.md +18 -18
- package/dist/docs/references/reference-evals-noise-sensitivity.md +167 -177
- package/dist/docs/references/reference-evals-prompt-alignment.md +111 -116
- package/dist/docs/references/reference-evals-scorer-utils.md +285 -105
- package/dist/docs/references/reference-evals-textual-difference.md +18 -18
- package/dist/docs/references/reference-evals-tone-consistency.md +19 -19
- package/dist/docs/references/reference-evals-tool-call-accuracy.md +165 -165
- package/dist/docs/references/reference-evals-toxicity.md +21 -21
- package/dist/docs/references/reference-evals-trajectory-accuracy.md +613 -0
- package/dist/scorers/code/index.d.ts +1 -0
- package/dist/scorers/code/index.d.ts.map +1 -1
- package/dist/scorers/code/trajectory/index.d.ts +147 -0
- package/dist/scorers/code/trajectory/index.d.ts.map +1 -0
- package/dist/scorers/llm/answer-similarity/index.d.ts +2 -2
- package/dist/scorers/llm/context-precision/index.d.ts +2 -2
- package/dist/scorers/llm/context-relevance/index.d.ts +1 -1
- package/dist/scorers/llm/faithfulness/index.d.ts +1 -1
- package/dist/scorers/llm/hallucination/index.d.ts +2 -2
- package/dist/scorers/llm/index.d.ts +1 -0
- package/dist/scorers/llm/index.d.ts.map +1 -1
- package/dist/scorers/llm/noise-sensitivity/index.d.ts +1 -1
- package/dist/scorers/llm/prompt-alignment/index.d.ts +5 -5
- package/dist/scorers/llm/tool-call-accuracy/index.d.ts +1 -1
- package/dist/scorers/llm/toxicity/index.d.ts +1 -1
- package/dist/scorers/llm/trajectory/index.d.ts +58 -0
- package/dist/scorers/llm/trajectory/index.d.ts.map +1 -0
- package/dist/scorers/llm/trajectory/prompts.d.ts +20 -0
- package/dist/scorers/llm/trajectory/prompts.d.ts.map +1 -0
- package/dist/scorers/prebuilt/index.cjs +638 -59
- package/dist/scorers/prebuilt/index.cjs.map +1 -1
- package/dist/scorers/prebuilt/index.js +578 -2
- package/dist/scorers/prebuilt/index.js.map +1 -1
- package/dist/scorers/utils.cjs +41 -17
- package/dist/scorers/utils.d.ts +171 -1
- package/dist/scorers/utils.d.ts.map +1 -1
- package/dist/scorers/utils.js +1 -1
- package/package.json +14 -11
- package/dist/chunk-OEOE7ZHN.js +0 -195
- package/dist/chunk-OEOE7ZHN.js.map +0 -1
- package/dist/chunk-W3U7MMDX.cjs +0 -212
- package/dist/chunk-W3U7MMDX.cjs.map +0 -1
|
@@ -1,20 +1,20 @@
|
|
|
1
|
-
# Tone
|
|
1
|
+
# Tone consistency scorer
|
|
2
2
|
|
|
3
3
|
The `createToneScorer()` function evaluates the text's emotional tone and sentiment consistency. It can operate in two modes: comparing tone between input/output pairs or analyzing tone stability within a single text.
|
|
4
4
|
|
|
5
5
|
## Parameters
|
|
6
6
|
|
|
7
|
-
The `createToneScorer()` function
|
|
7
|
+
The `createToneScorer()` function doesn't take any options.
|
|
8
8
|
|
|
9
9
|
This function returns an instance of the MastraScorer class. See the [MastraScorer reference](https://mastra.ai/reference/evals/mastra-scorer) for details on the `.run()` method and its input/output.
|
|
10
10
|
|
|
11
|
-
##
|
|
11
|
+
## `.run()` returns
|
|
12
12
|
|
|
13
|
-
**runId
|
|
13
|
+
**runId** (`string`): The id of the run (optional).
|
|
14
14
|
|
|
15
|
-
**analyzeStepResult
|
|
15
|
+
**analyzeStepResult** (`object`): Object with tone metrics: { responseSentiment: number, referenceSentiment: number, difference: number } (for comparison mode) OR { avgSentiment: number, sentimentVariance: number } (for stability mode)
|
|
16
16
|
|
|
17
|
-
**score
|
|
17
|
+
**score** (`number`): Tone consistency/stability score (0-1).
|
|
18
18
|
|
|
19
19
|
`.run()` returns a result in the following shape:
|
|
20
20
|
|
|
@@ -32,7 +32,7 @@ This function returns an instance of the MastraScorer class. See the [MastraScor
|
|
|
32
32
|
}
|
|
33
33
|
```
|
|
34
34
|
|
|
35
|
-
## Scoring
|
|
35
|
+
## Scoring details
|
|
36
36
|
|
|
37
37
|
The scorer evaluates sentiment consistency through tone pattern analysis and mode-specific scoring.
|
|
38
38
|
|
|
@@ -65,7 +65,7 @@ Final score: `mode_specific_score * scale`
|
|
|
65
65
|
- 0.1-0.3: Poor consistency with major tone changes
|
|
66
66
|
- 0.0: No consistency - completely different tones
|
|
67
67
|
|
|
68
|
-
### analyzeStepResult
|
|
68
|
+
### `analyzeStepResult`
|
|
69
69
|
|
|
70
70
|
Object with tone metrics:
|
|
71
71
|
|
|
@@ -80,21 +80,21 @@ Object with tone metrics:
|
|
|
80
80
|
Evaluate tone consistency between related agent responses:
|
|
81
81
|
|
|
82
82
|
```typescript
|
|
83
|
-
import { runEvals } from
|
|
84
|
-
import { createToneScorer } from
|
|
85
|
-
import { myAgent } from
|
|
83
|
+
import { runEvals } from '@mastra/core/evals'
|
|
84
|
+
import { createToneScorer } from '@mastra/evals/scorers/prebuilt'
|
|
85
|
+
import { myAgent } from './agent'
|
|
86
86
|
|
|
87
|
-
const scorer = createToneScorer()
|
|
87
|
+
const scorer = createToneScorer()
|
|
88
88
|
|
|
89
89
|
const result = await runEvals({
|
|
90
90
|
data: [
|
|
91
91
|
{
|
|
92
|
-
input:
|
|
93
|
-
groundTruth:
|
|
92
|
+
input: 'How was your experience with our service?',
|
|
93
|
+
groundTruth: 'The service was excellent and exceeded expectations!',
|
|
94
94
|
},
|
|
95
95
|
{
|
|
96
|
-
input:
|
|
97
|
-
groundTruth:
|
|
96
|
+
input: 'Tell me about the customer support',
|
|
97
|
+
groundTruth: 'The support team was friendly and very helpful.',
|
|
98
98
|
},
|
|
99
99
|
],
|
|
100
100
|
scorers: [scorer],
|
|
@@ -102,11 +102,11 @@ const result = await runEvals({
|
|
|
102
102
|
onItemComplete: ({ scorerResults }) => {
|
|
103
103
|
console.log({
|
|
104
104
|
score: scorerResults[scorer.id].score,
|
|
105
|
-
})
|
|
105
|
+
})
|
|
106
106
|
},
|
|
107
|
-
})
|
|
107
|
+
})
|
|
108
108
|
|
|
109
|
-
console.log(result.scores)
|
|
109
|
+
console.log(result.scores)
|
|
110
110
|
```
|
|
111
111
|
|
|
112
112
|
For more details on `runEvals`, see the [runEvals reference](https://mastra.ai/reference/evals/run-evals).
|