@mastra/evals 1.1.0 → 1.1.1-alpha.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. package/CHANGELOG.md +11 -0
  2. package/dist/docs/SKILL.md +31 -20
  3. package/dist/docs/{SOURCE_MAP.json → assets/SOURCE_MAP.json} +1 -1
  4. package/dist/docs/{evals/02-built-in-scorers.md → references/docs-evals-built-in-scorers.md} +5 -7
  5. package/dist/docs/{evals/01-overview.md → references/docs-evals-overview.md} +26 -10
  6. package/dist/docs/references/reference-evals-answer-relevancy.md +105 -0
  7. package/dist/docs/references/reference-evals-answer-similarity.md +99 -0
  8. package/dist/docs/references/reference-evals-bias.md +120 -0
  9. package/dist/docs/references/reference-evals-completeness.md +137 -0
  10. package/dist/docs/references/reference-evals-content-similarity.md +101 -0
  11. package/dist/docs/references/reference-evals-context-precision.md +196 -0
  12. package/dist/docs/references/reference-evals-context-relevance.md +536 -0
  13. package/dist/docs/references/reference-evals-faithfulness.md +114 -0
  14. package/dist/docs/references/reference-evals-hallucination.md +220 -0
  15. package/dist/docs/references/reference-evals-keyword-coverage.md +128 -0
  16. package/dist/docs/references/reference-evals-noise-sensitivity.md +685 -0
  17. package/dist/docs/references/reference-evals-prompt-alignment.md +619 -0
  18. package/dist/docs/references/reference-evals-scorer-utils.md +330 -0
  19. package/dist/docs/references/reference-evals-textual-difference.md +113 -0
  20. package/dist/docs/references/reference-evals-tone-consistency.md +119 -0
  21. package/dist/docs/references/reference-evals-tool-call-accuracy.md +533 -0
  22. package/dist/docs/references/reference-evals-toxicity.md +123 -0
  23. package/dist/scorers/llm/faithfulness/index.d.ts +3 -1
  24. package/dist/scorers/llm/faithfulness/index.d.ts.map +1 -1
  25. package/dist/scorers/llm/noise-sensitivity/index.d.ts.map +1 -1
  26. package/dist/scorers/llm/prompt-alignment/index.d.ts.map +1 -1
  27. package/dist/scorers/prebuilt/index.cjs +11 -7
  28. package/dist/scorers/prebuilt/index.cjs.map +1 -1
  29. package/dist/scorers/prebuilt/index.js +11 -7
  30. package/dist/scorers/prebuilt/index.js.map +1 -1
  31. package/package.json +3 -4
  32. package/dist/docs/README.md +0 -31
  33. package/dist/docs/evals/03-reference.md +0 -4092
@@ -713,7 +713,9 @@ function createFaithfulnessScorer({
713
713
  type: "agent"
714
714
  }).preprocess({
715
715
  description: "Extract relevant statements from the LLM output",
716
- outputSchema: zod.z.array(zod.z.string()),
716
+ outputSchema: zod.z.object({
717
+ claims: zod.z.array(zod.z.string())
718
+ }),
717
719
  createPrompt: ({ run }) => {
718
720
  const prompt = createFaithfulnessExtractPrompt({ output: chunkW3U7MMDX_cjs.getAssistantMessageFromRunOutput(run.output) ?? "" });
719
721
  return prompt;
@@ -727,7 +729,7 @@ function createFaithfulnessScorer({
727
729
  (toolCall) => toolCall.state === "result" ? JSON.stringify(toolCall.result) : ""
728
730
  ) ?? [];
729
731
  const prompt = createFaithfulnessAnalyzePrompt({
730
- claims: results.preprocessStepResult || [],
732
+ claims: results.preprocessStepResult?.claims || [],
731
733
  context
732
734
  });
733
735
  return prompt;
@@ -2126,6 +2128,7 @@ Example responses:
2126
2128
  }
2127
2129
 
2128
2130
  // src/scorers/llm/noise-sensitivity/index.ts
2131
+ var scoreSchema = zod.z.number().refine((n) => n >= 0 && n <= 1, { message: "Score must be between 0 and 1" });
2129
2132
  var analyzeOutputSchema4 = zod.z.object({
2130
2133
  dimensions: zod.z.array(
2131
2134
  zod.z.object({
@@ -2137,7 +2140,7 @@ var analyzeOutputSchema4 = zod.z.object({
2137
2140
  ),
2138
2141
  overallAssessment: zod.z.string(),
2139
2142
  majorIssues: zod.z.array(zod.z.string()).optional().default([]),
2140
- robustnessScore: zod.z.number().min(0).max(1)
2143
+ robustnessScore: scoreSchema
2141
2144
  });
2142
2145
  var DEFAULT_IMPACT_WEIGHTS = {
2143
2146
  none: 1,
@@ -2471,9 +2474,10 @@ Example responses:
2471
2474
  }
2472
2475
 
2473
2476
  // src/scorers/llm/prompt-alignment/index.ts
2477
+ var scoreSchema2 = zod.z.number().refine((n) => n >= 0 && n <= 1, { message: "Score must be between 0 and 1" });
2474
2478
  var analyzeOutputSchema5 = zod.z.object({
2475
2479
  intentAlignment: zod.z.object({
2476
- score: zod.z.number().min(0).max(1),
2480
+ score: scoreSchema2,
2477
2481
  primaryIntent: zod.z.string(),
2478
2482
  isAddressed: zod.z.boolean(),
2479
2483
  reasoning: zod.z.string()
@@ -2486,15 +2490,15 @@ var analyzeOutputSchema5 = zod.z.object({
2486
2490
  reasoning: zod.z.string()
2487
2491
  })
2488
2492
  ),
2489
- overallScore: zod.z.number().min(0).max(1)
2493
+ overallScore: scoreSchema2
2490
2494
  }),
2491
2495
  completeness: zod.z.object({
2492
- score: zod.z.number().min(0).max(1),
2496
+ score: scoreSchema2,
2493
2497
  missingElements: zod.z.array(zod.z.string()),
2494
2498
  reasoning: zod.z.string()
2495
2499
  }),
2496
2500
  responseAppropriateness: zod.z.object({
2497
- score: zod.z.number().min(0).max(1),
2501
+ score: scoreSchema2,
2498
2502
  formatAlignment: zod.z.boolean(),
2499
2503
  toneAlignment: zod.z.boolean(),
2500
2504
  reasoning: zod.z.string()