@mastra/evals 1.1.0 → 1.1.1-alpha.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. package/CHANGELOG.md +11 -0
  2. package/dist/docs/SKILL.md +31 -20
  3. package/dist/docs/{SOURCE_MAP.json → assets/SOURCE_MAP.json} +1 -1
  4. package/dist/docs/{evals/02-built-in-scorers.md → references/docs-evals-built-in-scorers.md} +5 -7
  5. package/dist/docs/{evals/01-overview.md → references/docs-evals-overview.md} +26 -10
  6. package/dist/docs/references/reference-evals-answer-relevancy.md +105 -0
  7. package/dist/docs/references/reference-evals-answer-similarity.md +99 -0
  8. package/dist/docs/references/reference-evals-bias.md +120 -0
  9. package/dist/docs/references/reference-evals-completeness.md +137 -0
  10. package/dist/docs/references/reference-evals-content-similarity.md +101 -0
  11. package/dist/docs/references/reference-evals-context-precision.md +196 -0
  12. package/dist/docs/references/reference-evals-context-relevance.md +536 -0
  13. package/dist/docs/references/reference-evals-faithfulness.md +114 -0
  14. package/dist/docs/references/reference-evals-hallucination.md +220 -0
  15. package/dist/docs/references/reference-evals-keyword-coverage.md +128 -0
  16. package/dist/docs/references/reference-evals-noise-sensitivity.md +685 -0
  17. package/dist/docs/references/reference-evals-prompt-alignment.md +619 -0
  18. package/dist/docs/references/reference-evals-scorer-utils.md +330 -0
  19. package/dist/docs/references/reference-evals-textual-difference.md +113 -0
  20. package/dist/docs/references/reference-evals-tone-consistency.md +119 -0
  21. package/dist/docs/references/reference-evals-tool-call-accuracy.md +533 -0
  22. package/dist/docs/references/reference-evals-toxicity.md +123 -0
  23. package/dist/scorers/llm/faithfulness/index.d.ts +3 -1
  24. package/dist/scorers/llm/faithfulness/index.d.ts.map +1 -1
  25. package/dist/scorers/llm/noise-sensitivity/index.d.ts.map +1 -1
  26. package/dist/scorers/llm/prompt-alignment/index.d.ts.map +1 -1
  27. package/dist/scorers/prebuilt/index.cjs +11 -7
  28. package/dist/scorers/prebuilt/index.cjs.map +1 -1
  29. package/dist/scorers/prebuilt/index.js +11 -7
  30. package/dist/scorers/prebuilt/index.js.map +1 -1
  31. package/package.json +3 -4
  32. package/dist/docs/README.md +0 -31
  33. package/dist/docs/evals/03-reference.md +0 -4092
@@ -704,7 +704,9 @@ function createFaithfulnessScorer({
704
704
  type: "agent"
705
705
  }).preprocess({
706
706
  description: "Extract relevant statements from the LLM output",
707
- outputSchema: z.array(z.string()),
707
+ outputSchema: z.object({
708
+ claims: z.array(z.string())
709
+ }),
708
710
  createPrompt: ({ run }) => {
709
711
  const prompt = createFaithfulnessExtractPrompt({ output: getAssistantMessageFromRunOutput(run.output) ?? "" });
710
712
  return prompt;
@@ -718,7 +720,7 @@ function createFaithfulnessScorer({
718
720
  (toolCall) => toolCall.state === "result" ? JSON.stringify(toolCall.result) : ""
719
721
  ) ?? [];
720
722
  const prompt = createFaithfulnessAnalyzePrompt({
721
- claims: results.preprocessStepResult || [],
723
+ claims: results.preprocessStepResult?.claims || [],
722
724
  context
723
725
  });
724
726
  return prompt;
@@ -2117,6 +2119,7 @@ Example responses:
2117
2119
  }
2118
2120
 
2119
2121
  // src/scorers/llm/noise-sensitivity/index.ts
2122
+ var scoreSchema = z.number().refine((n) => n >= 0 && n <= 1, { message: "Score must be between 0 and 1" });
2120
2123
  var analyzeOutputSchema4 = z.object({
2121
2124
  dimensions: z.array(
2122
2125
  z.object({
@@ -2128,7 +2131,7 @@ var analyzeOutputSchema4 = z.object({
2128
2131
  ),
2129
2132
  overallAssessment: z.string(),
2130
2133
  majorIssues: z.array(z.string()).optional().default([]),
2131
- robustnessScore: z.number().min(0).max(1)
2134
+ robustnessScore: scoreSchema
2132
2135
  });
2133
2136
  var DEFAULT_IMPACT_WEIGHTS = {
2134
2137
  none: 1,
@@ -2462,9 +2465,10 @@ Example responses:
2462
2465
  }
2463
2466
 
2464
2467
  // src/scorers/llm/prompt-alignment/index.ts
2468
+ var scoreSchema2 = z.number().refine((n) => n >= 0 && n <= 1, { message: "Score must be between 0 and 1" });
2465
2469
  var analyzeOutputSchema5 = z.object({
2466
2470
  intentAlignment: z.object({
2467
- score: z.number().min(0).max(1),
2471
+ score: scoreSchema2,
2468
2472
  primaryIntent: z.string(),
2469
2473
  isAddressed: z.boolean(),
2470
2474
  reasoning: z.string()
@@ -2477,15 +2481,15 @@ var analyzeOutputSchema5 = z.object({
2477
2481
  reasoning: z.string()
2478
2482
  })
2479
2483
  ),
2480
- overallScore: z.number().min(0).max(1)
2484
+ overallScore: scoreSchema2
2481
2485
  }),
2482
2486
  completeness: z.object({
2483
- score: z.number().min(0).max(1),
2487
+ score: scoreSchema2,
2484
2488
  missingElements: z.array(z.string()),
2485
2489
  reasoning: z.string()
2486
2490
  }),
2487
2491
  responseAppropriateness: z.object({
2488
- score: z.number().min(0).max(1),
2492
+ score: scoreSchema2,
2489
2493
  formatAlignment: z.boolean(),
2490
2494
  toneAlignment: z.boolean(),
2491
2495
  reasoning: z.string()