@mastra/evals 0.13.8-alpha.1 → 0.13.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/CHANGELOG.md +20 -0
  2. package/dist/scorers/code/completeness/index.d.ts +1 -2
  3. package/dist/scorers/code/completeness/index.d.ts.map +1 -1
  4. package/dist/scorers/code/content-similarity/index.d.ts +1 -2
  5. package/dist/scorers/code/content-similarity/index.d.ts.map +1 -1
  6. package/dist/scorers/code/index.cjs +18 -12
  7. package/dist/scorers/code/index.cjs.map +1 -1
  8. package/dist/scorers/code/index.js +18 -12
  9. package/dist/scorers/code/index.js.map +1 -1
  10. package/dist/scorers/code/keyword-coverage/index.d.ts +1 -2
  11. package/dist/scorers/code/keyword-coverage/index.d.ts.map +1 -1
  12. package/dist/scorers/code/textual-difference/index.d.ts +1 -2
  13. package/dist/scorers/code/textual-difference/index.d.ts.map +1 -1
  14. package/dist/scorers/code/tone/index.d.ts +1 -2
  15. package/dist/scorers/code/tone/index.d.ts.map +1 -1
  16. package/dist/scorers/code/tool-call-accuracy/index.d.ts +1 -2
  17. package/dist/scorers/code/tool-call-accuracy/index.d.ts.map +1 -1
  18. package/dist/scorers/llm/answer-relevancy/index.d.ts +1 -2
  19. package/dist/scorers/llm/answer-relevancy/index.d.ts.map +1 -1
  20. package/dist/scorers/llm/answer-similarity/index.d.ts +1 -2
  21. package/dist/scorers/llm/answer-similarity/index.d.ts.map +1 -1
  22. package/dist/scorers/llm/bias/index.d.ts.map +1 -1
  23. package/dist/scorers/llm/context-precision/index.d.ts +1 -1
  24. package/dist/scorers/llm/context-precision/index.d.ts.map +1 -1
  25. package/dist/scorers/llm/context-relevance/index.d.ts +1 -1
  26. package/dist/scorers/llm/context-relevance/index.d.ts.map +1 -1
  27. package/dist/scorers/llm/faithfulness/index.d.ts +1 -2
  28. package/dist/scorers/llm/faithfulness/index.d.ts.map +1 -1
  29. package/dist/scorers/llm/hallucination/index.d.ts +1 -2
  30. package/dist/scorers/llm/hallucination/index.d.ts.map +1 -1
  31. package/dist/scorers/llm/index.cjs +20 -10
  32. package/dist/scorers/llm/index.cjs.map +1 -1
  33. package/dist/scorers/llm/index.js +20 -10
  34. package/dist/scorers/llm/index.js.map +1 -1
  35. package/dist/scorers/llm/noise-sensitivity/index.d.ts +1 -2
  36. package/dist/scorers/llm/noise-sensitivity/index.d.ts.map +1 -1
  37. package/dist/scorers/llm/tool-call-accuracy/index.d.ts +1 -2
  38. package/dist/scorers/llm/tool-call-accuracy/index.d.ts.map +1 -1
  39. package/dist/scorers/llm/toxicity/index.d.ts +1 -2
  40. package/dist/scorers/llm/toxicity/index.d.ts.map +1 -1
  41. package/package.json +4 -4
@@ -220,7 +220,8 @@ function createAnswerRelevancyScorer({
220
220
  judge: {
221
221
  model,
222
222
  instructions: ANSWER_RELEVANCY_AGENT_INSTRUCTIONS
223
- }
223
+ },
224
+ type: "agent"
224
225
  }).preprocess({
225
226
  description: "Extract relevant statements from the LLM output",
226
227
  outputSchema: extractOutputSchema,
@@ -436,7 +437,8 @@ function createAnswerSimilarityScorer({
436
437
  judge: {
437
438
  model,
438
439
  instructions: ANSWER_SIMILARITY_INSTRUCTIONS
439
- }
440
+ },
441
+ type: "agent"
440
442
  }).preprocess({
441
443
  description: "Extract semantic units from output and ground truth",
442
444
  outputSchema: extractOutputSchema2,
@@ -692,7 +694,8 @@ function createFaithfulnessScorer({
692
694
  judge: {
693
695
  model,
694
696
  instructions: FAITHFULNESS_AGENT_INSTRUCTIONS
695
- }
697
+ },
698
+ type: "agent"
696
699
  }).preprocess({
697
700
  description: "Extract relevant statements from the LLM output",
698
701
  outputSchema: z.array(z.string()),
@@ -849,7 +852,8 @@ function createBiasScorer({ model, options }) {
849
852
  judge: {
850
853
  model,
851
854
  instructions: BIAS_AGENT_INSTRUCTIONS
852
- }
855
+ },
856
+ type: "agent"
853
857
  }).preprocess({
854
858
  description: "Extract relevant statements from the LLM output",
855
859
  outputSchema: z.object({
@@ -1082,7 +1086,8 @@ function createHallucinationScorer({
1082
1086
  judge: {
1083
1087
  model,
1084
1088
  instructions: HALLUCINATION_AGENT_INSTRUCTIONS
1085
- }
1089
+ },
1090
+ type: "agent"
1086
1091
  }).preprocess({
1087
1092
  description: "Extract all claims from the given output",
1088
1093
  outputSchema: z.object({
@@ -1220,7 +1225,8 @@ function createToxicityScorer({ model, options }) {
1220
1225
  judge: {
1221
1226
  model,
1222
1227
  instructions: TOXICITY_AGENT_INSTRUCTIONS
1223
- }
1228
+ },
1229
+ type: "agent"
1224
1230
  }).analyze({
1225
1231
  description: "Score the relevance of the statements to the input",
1226
1232
  outputSchema: z.object({ verdicts: z.array(z.object({ verdict: z.string(), reason: z.string() })) }),
@@ -1368,7 +1374,8 @@ function createToolCallAccuracyScorerLLM({ model, availableTools }) {
1368
1374
  judge: {
1369
1375
  model,
1370
1376
  instructions: TOOL_SELECTION_ACCURACY_INSTRUCTIONS
1371
- }
1377
+ },
1378
+ type: "agent"
1372
1379
  }).preprocess(async ({ run }) => {
1373
1380
  const isInputInvalid = !run.input || !run.input.inputMessages || run.input.inputMessages.length === 0;
1374
1381
  const isOutputInvalid = !run.output || run.output.length === 0;
@@ -1605,7 +1612,8 @@ function createContextRelevanceScorerLLM({
1605
1612
  judge: {
1606
1613
  model,
1607
1614
  instructions: CONTEXT_RELEVANCE_INSTRUCTIONS
1608
- }
1615
+ },
1616
+ type: "agent"
1609
1617
  }).analyze({
1610
1618
  description: "Analyze the relevance and utility of provided context",
1611
1619
  outputSchema: analyzeOutputSchema3,
@@ -1827,7 +1835,8 @@ function createContextPrecisionScorer({
1827
1835
  judge: {
1828
1836
  model,
1829
1837
  instructions: CONTEXT_PRECISION_AGENT_INSTRUCTIONS
1830
- }
1838
+ },
1839
+ type: "agent"
1831
1840
  }).analyze({
1832
1841
  description: "Evaluate the relevance of each context piece for generating the expected output",
1833
1842
  outputSchema: contextRelevanceOutputSchema,
@@ -2118,7 +2127,8 @@ function createNoiseSensitivityScorerLLM({
2118
2127
  judge: {
2119
2128
  model,
2120
2129
  instructions: NOISE_SENSITIVITY_INSTRUCTIONS
2121
- }
2130
+ },
2131
+ type: "agent"
2122
2132
  }).analyze({
2123
2133
  description: "Analyze the impact of noise on agent response quality",
2124
2134
  outputSchema: analyzeOutputSchema4,