@mastra/evals 1.1.2-alpha.0 → 1.2.0-alpha.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. package/CHANGELOG.md +59 -2
  2. package/LICENSE.md +15 -0
  3. package/dist/chunk-EVBNIL5M.js +606 -0
  4. package/dist/chunk-EVBNIL5M.js.map +1 -0
  5. package/dist/chunk-XRUR5PBK.cjs +632 -0
  6. package/dist/chunk-XRUR5PBK.cjs.map +1 -0
  7. package/dist/docs/SKILL.md +20 -19
  8. package/dist/docs/assets/SOURCE_MAP.json +1 -1
  9. package/dist/docs/references/docs-evals-built-in-scorers.md +2 -1
  10. package/dist/docs/references/docs-evals-overview.md +11 -16
  11. package/dist/docs/references/reference-evals-answer-relevancy.md +25 -25
  12. package/dist/docs/references/reference-evals-answer-similarity.md +33 -35
  13. package/dist/docs/references/reference-evals-bias.md +24 -24
  14. package/dist/docs/references/reference-evals-completeness.md +19 -20
  15. package/dist/docs/references/reference-evals-content-similarity.md +20 -20
  16. package/dist/docs/references/reference-evals-context-precision.md +36 -36
  17. package/dist/docs/references/reference-evals-context-relevance.md +136 -141
  18. package/dist/docs/references/reference-evals-faithfulness.md +24 -24
  19. package/dist/docs/references/reference-evals-hallucination.md +52 -69
  20. package/dist/docs/references/reference-evals-keyword-coverage.md +18 -18
  21. package/dist/docs/references/reference-evals-noise-sensitivity.md +167 -177
  22. package/dist/docs/references/reference-evals-prompt-alignment.md +111 -116
  23. package/dist/docs/references/reference-evals-scorer-utils.md +285 -105
  24. package/dist/docs/references/reference-evals-textual-difference.md +18 -18
  25. package/dist/docs/references/reference-evals-tone-consistency.md +19 -19
  26. package/dist/docs/references/reference-evals-tool-call-accuracy.md +165 -165
  27. package/dist/docs/references/reference-evals-toxicity.md +21 -21
  28. package/dist/docs/references/reference-evals-trajectory-accuracy.md +613 -0
  29. package/dist/scorers/code/index.d.ts +1 -0
  30. package/dist/scorers/code/index.d.ts.map +1 -1
  31. package/dist/scorers/code/trajectory/index.d.ts +147 -0
  32. package/dist/scorers/code/trajectory/index.d.ts.map +1 -0
  33. package/dist/scorers/llm/answer-similarity/index.d.ts +2 -2
  34. package/dist/scorers/llm/context-precision/index.d.ts +2 -2
  35. package/dist/scorers/llm/context-relevance/index.d.ts +1 -1
  36. package/dist/scorers/llm/faithfulness/index.d.ts +1 -1
  37. package/dist/scorers/llm/hallucination/index.d.ts +2 -2
  38. package/dist/scorers/llm/index.d.ts +1 -0
  39. package/dist/scorers/llm/index.d.ts.map +1 -1
  40. package/dist/scorers/llm/noise-sensitivity/index.d.ts +1 -1
  41. package/dist/scorers/llm/prompt-alignment/index.d.ts +5 -5
  42. package/dist/scorers/llm/tool-call-accuracy/index.d.ts +1 -1
  43. package/dist/scorers/llm/toxicity/index.d.ts +1 -1
  44. package/dist/scorers/llm/trajectory/index.d.ts +58 -0
  45. package/dist/scorers/llm/trajectory/index.d.ts.map +1 -0
  46. package/dist/scorers/llm/trajectory/prompts.d.ts +20 -0
  47. package/dist/scorers/llm/trajectory/prompts.d.ts.map +1 -0
  48. package/dist/scorers/prebuilt/index.cjs +638 -59
  49. package/dist/scorers/prebuilt/index.cjs.map +1 -1
  50. package/dist/scorers/prebuilt/index.js +578 -2
  51. package/dist/scorers/prebuilt/index.js.map +1 -1
  52. package/dist/scorers/utils.cjs +41 -17
  53. package/dist/scorers/utils.d.ts +171 -1
  54. package/dist/scorers/utils.d.ts.map +1 -1
  55. package/dist/scorers/utils.js +1 -1
  56. package/package.json +14 -11
  57. package/dist/chunk-OEOE7ZHN.js +0 -195
  58. package/dist/chunk-OEOE7ZHN.js.map +0 -1
  59. package/dist/chunk-W3U7MMDX.cjs +0 -212
  60. package/dist/chunk-W3U7MMDX.cjs.map +0 -1
@@ -1,20 +1,20 @@
1
- # Tone Consistency Scorer
1
+ # Tone consistency scorer
2
2
 
3
3
  The `createToneScorer()` function evaluates the text's emotional tone and sentiment consistency. It can operate in two modes: comparing tone between input/output pairs or analyzing tone stability within a single text.
4
4
 
5
5
  ## Parameters
6
6
 
7
- The `createToneScorer()` function does not take any options.
7
+ The `createToneScorer()` function doesn't take any options.
8
8
 
9
9
  This function returns an instance of the MastraScorer class. See the [MastraScorer reference](https://mastra.ai/reference/evals/mastra-scorer) for details on the `.run()` method and its input/output.
10
10
 
11
- ## .run() Returns
11
+ ## `.run()` returns
12
12
 
13
- **runId:** (`string`): The id of the run (optional).
13
+ **runId** (`string`): The id of the run (optional).
14
14
 
15
- **analyzeStepResult:** (`object`): Object with tone metrics: { responseSentiment: number, referenceSentiment: number, difference: number } (for comparison mode) OR { avgSentiment: number, sentimentVariance: number } (for stability mode)
15
+ **analyzeStepResult** (`object`): Object with tone metrics: { responseSentiment: number, referenceSentiment: number, difference: number } (for comparison mode) OR { avgSentiment: number, sentimentVariance: number } (for stability mode)
16
16
 
17
- **score:** (`number`): Tone consistency/stability score (0-1).
17
+ **score** (`number`): Tone consistency/stability score (0-1).
18
18
 
19
19
  `.run()` returns a result in the following shape:
20
20
 
@@ -32,7 +32,7 @@ This function returns an instance of the MastraScorer class. See the [MastraScor
32
32
  }
33
33
  ```
34
34
 
35
- ## Scoring Details
35
+ ## Scoring details
36
36
 
37
37
  The scorer evaluates sentiment consistency through tone pattern analysis and mode-specific scoring.
38
38
 
@@ -65,7 +65,7 @@ Final score: `mode_specific_score * scale`
65
65
  - 0.1-0.3: Poor consistency with major tone changes
66
66
  - 0.0: No consistency - completely different tones
67
67
 
68
- ### analyzeStepResult
68
+ ### `analyzeStepResult`
69
69
 
70
70
  Object with tone metrics:
71
71
 
@@ -80,21 +80,21 @@ Object with tone metrics:
80
80
  Evaluate tone consistency between related agent responses:
81
81
 
82
82
  ```typescript
83
- import { runEvals } from "@mastra/core/evals";
84
- import { createToneScorer } from "@mastra/evals/scorers/prebuilt";
85
- import { myAgent } from "./agent";
83
+ import { runEvals } from '@mastra/core/evals'
84
+ import { createToneScorer } from '@mastra/evals/scorers/prebuilt'
85
+ import { myAgent } from './agent'
86
86
 
87
- const scorer = createToneScorer();
87
+ const scorer = createToneScorer()
88
88
 
89
89
  const result = await runEvals({
90
90
  data: [
91
91
  {
92
- input: "How was your experience with our service?",
93
- groundTruth: "The service was excellent and exceeded expectations!",
92
+ input: 'How was your experience with our service?',
93
+ groundTruth: 'The service was excellent and exceeded expectations!',
94
94
  },
95
95
  {
96
- input: "Tell me about the customer support",
97
- groundTruth: "The support team was friendly and very helpful.",
96
+ input: 'Tell me about the customer support',
97
+ groundTruth: 'The support team was friendly and very helpful.',
98
98
  },
99
99
  ],
100
100
  scorers: [scorer],
@@ -102,11 +102,11 @@ const result = await runEvals({
102
102
  onItemComplete: ({ scorerResults }) => {
103
103
  console.log({
104
104
  score: scorerResults[scorer.id].score,
105
- });
105
+ })
106
106
  },
107
- });
107
+ })
108
108
 
109
- console.log(result.scores);
109
+ console.log(result.scores)
110
110
  ```
111
111
 
112
112
  For more details on `runEvals`, see the [runEvals reference](https://mastra.ai/reference/evals/run-evals).