@mastra/evals 0.13.8-alpha.1 → 0.13.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/CHANGELOG.md +20 -0
  2. package/dist/scorers/code/completeness/index.d.ts +1 -2
  3. package/dist/scorers/code/completeness/index.d.ts.map +1 -1
  4. package/dist/scorers/code/content-similarity/index.d.ts +1 -2
  5. package/dist/scorers/code/content-similarity/index.d.ts.map +1 -1
  6. package/dist/scorers/code/index.cjs +18 -12
  7. package/dist/scorers/code/index.cjs.map +1 -1
  8. package/dist/scorers/code/index.js +18 -12
  9. package/dist/scorers/code/index.js.map +1 -1
  10. package/dist/scorers/code/keyword-coverage/index.d.ts +1 -2
  11. package/dist/scorers/code/keyword-coverage/index.d.ts.map +1 -1
  12. package/dist/scorers/code/textual-difference/index.d.ts +1 -2
  13. package/dist/scorers/code/textual-difference/index.d.ts.map +1 -1
  14. package/dist/scorers/code/tone/index.d.ts +1 -2
  15. package/dist/scorers/code/tone/index.d.ts.map +1 -1
  16. package/dist/scorers/code/tool-call-accuracy/index.d.ts +1 -2
  17. package/dist/scorers/code/tool-call-accuracy/index.d.ts.map +1 -1
  18. package/dist/scorers/llm/answer-relevancy/index.d.ts +1 -2
  19. package/dist/scorers/llm/answer-relevancy/index.d.ts.map +1 -1
  20. package/dist/scorers/llm/answer-similarity/index.d.ts +1 -2
  21. package/dist/scorers/llm/answer-similarity/index.d.ts.map +1 -1
  22. package/dist/scorers/llm/bias/index.d.ts.map +1 -1
  23. package/dist/scorers/llm/context-precision/index.d.ts +1 -1
  24. package/dist/scorers/llm/context-precision/index.d.ts.map +1 -1
  25. package/dist/scorers/llm/context-relevance/index.d.ts +1 -1
  26. package/dist/scorers/llm/context-relevance/index.d.ts.map +1 -1
  27. package/dist/scorers/llm/faithfulness/index.d.ts +1 -2
  28. package/dist/scorers/llm/faithfulness/index.d.ts.map +1 -1
  29. package/dist/scorers/llm/hallucination/index.d.ts +1 -2
  30. package/dist/scorers/llm/hallucination/index.d.ts.map +1 -1
  31. package/dist/scorers/llm/index.cjs +20 -10
  32. package/dist/scorers/llm/index.cjs.map +1 -1
  33. package/dist/scorers/llm/index.js +20 -10
  34. package/dist/scorers/llm/index.js.map +1 -1
  35. package/dist/scorers/llm/noise-sensitivity/index.d.ts +1 -2
  36. package/dist/scorers/llm/noise-sensitivity/index.d.ts.map +1 -1
  37. package/dist/scorers/llm/tool-call-accuracy/index.d.ts +1 -2
  38. package/dist/scorers/llm/tool-call-accuracy/index.d.ts.map +1 -1
  39. package/dist/scorers/llm/toxicity/index.d.ts +1 -2
  40. package/dist/scorers/llm/toxicity/index.d.ts.map +1 -1
  41. package/package.json +4 -4
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../src/scorers/llm/bias/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AAEtD,OAAO,KAAK,EAAE,sBAAsB,EAAE,uBAAuB,EAAE,MAAM,qBAAqB,CAAC;AAW3F,MAAM,WAAW,iBAAiB;IAChC,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,wBAAgB,gBAAgB,CAAC,EAAE,KAAK,EAAE,OAAO,EAAE,EAAE;IAAE,KAAK,EAAE,aAAa,CAAC;IAAC,OAAO,CAAC,EAAE,iBAAiB,CAAA;CAAE;;;;;;;6FA+CzG"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../src/scorers/llm/bias/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AAEtD,OAAO,KAAK,EAAE,sBAAsB,EAAE,uBAAuB,EAAE,MAAM,qBAAqB,CAAC;AAW3F,MAAM,WAAW,iBAAiB;IAChC,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,wBAAgB,gBAAgB,CAAC,EAAE,KAAK,EAAE,OAAO,EAAE,EAAE;IAAE,KAAK,EAAE,aAAa,CAAC;IAAC,OAAO,CAAC,EAAE,iBAAiB,CAAA;CAAE;;;;;;;6FAgDzG"}
@@ -8,7 +8,7 @@ export interface ContextPrecisionMetricOptions {
8
8
  export declare function createContextPrecisionScorer({ model, options, }: {
9
9
  model: MastraLanguageModel;
10
10
  options: ContextPrecisionMetricOptions;
11
- }): import("@mastra/core/scores").MastraScorer<string, ScorerRunInputForAgent, ScorerRunOutputForAgent, Record<"analyzeStepResult", {
11
+ }): import("@mastra/core/scores").MastraScorer<"Context Precision Scorer", ScorerRunInputForAgent, ScorerRunOutputForAgent, Record<"analyzeStepResult", {
12
12
  verdicts: {
13
13
  verdict: string;
14
14
  reason: string;
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../src/scorers/llm/context-precision/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,oBAAoB,CAAC;AAE9D,OAAO,KAAK,EAAE,sBAAsB,EAAE,uBAAuB,EAAE,MAAM,qBAAqB,CAAC;AAS3F,MAAM,WAAW,6BAA6B;IAC5C,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;IACnB,gBAAgB,CAAC,EAAE,CAAC,KAAK,EAAE,sBAAsB,EAAE,MAAM,EAAE,uBAAuB,KAAK,MAAM,EAAE,CAAC;CACjG;AAYD,wBAAgB,4BAA4B,CAAC,EAC3C,KAAK,EACL,OAAO,GACR,EAAE;IACD,KAAK,EAAE,mBAAmB,CAAC;IAC3B,OAAO,EAAE,6BAA6B,CAAC;CACxC;;;;;;6FAkGA"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../src/scorers/llm/context-precision/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,oBAAoB,CAAC;AAE9D,OAAO,KAAK,EAAE,sBAAsB,EAAE,uBAAuB,EAAE,MAAM,qBAAqB,CAAC;AAS3F,MAAM,WAAW,6BAA6B;IAC5C,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;IACnB,gBAAgB,CAAC,EAAE,CAAC,KAAK,EAAE,sBAAsB,EAAE,MAAM,EAAE,uBAAuB,KAAK,MAAM,EAAE,CAAC;CACjG;AAYD,wBAAgB,4BAA4B,CAAC,EAC3C,KAAK,EACL,OAAO,GACR,EAAE;IACD,KAAK,EAAE,mBAAmB,CAAC;IAC3B,OAAO,EAAE,6BAA6B,CAAC;CACxC;;;;;;6FAmGA"}
@@ -13,7 +13,7 @@ export interface ContextRelevanceOptions {
13
13
  export declare function createContextRelevanceScorerLLM({ model, options, }: {
14
14
  model: MastraLanguageModel;
15
15
  options: ContextRelevanceOptions;
16
- }): import("@mastra/core/scores").MastraScorer<string, ScorerRunInputForAgent, ScorerRunOutputForAgent, Record<"analyzeStepResult", {
16
+ }): import("@mastra/core/scores").MastraScorer<"Context Relevance (LLM)", ScorerRunInputForAgent, ScorerRunOutputForAgent, Record<"analyzeStepResult", {
17
17
  evaluations: {
18
18
  reasoning: string;
19
19
  context_index: number;
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../src/scorers/llm/context-relevance/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,oBAAoB,CAAC;AAC9D,OAAO,KAAK,EAAE,sBAAsB,EAAE,uBAAuB,EAAE,MAAM,qBAAqB,CAAC;AAM3F,MAAM,WAAW,uBAAuB;IACtC,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;IACnB,gBAAgB,CAAC,EAAE,CAAC,KAAK,EAAE,sBAAsB,EAAE,MAAM,EAAE,uBAAuB,KAAK,MAAM,EAAE,CAAC;IAChG,SAAS,CAAC,EAAE;QACV,0BAA0B,CAAC,EAAE,MAAM,CAAC;QACpC,qBAAqB,CAAC,EAAE,MAAM,CAAC;QAC/B,wBAAwB,CAAC,EAAE,MAAM,CAAC;KACnC,CAAC;CACH;AAuBD,wBAAgB,+BAA+B,CAAC,EAC9C,KAAK,EACL,OAAO,GACR,EAAE;IACD,KAAK,EAAE,mBAAmB,CAAC;IAC3B,OAAO,EAAE,uBAAuB,CAAC;CAClC;;;;;;;;;;6FA4IA"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../src/scorers/llm/context-relevance/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,oBAAoB,CAAC;AAC9D,OAAO,KAAK,EAAE,sBAAsB,EAAE,uBAAuB,EAAE,MAAM,qBAAqB,CAAC;AAM3F,MAAM,WAAW,uBAAuB;IACtC,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;IACnB,gBAAgB,CAAC,EAAE,CAAC,KAAK,EAAE,sBAAsB,EAAE,MAAM,EAAE,uBAAuB,KAAK,MAAM,EAAE,CAAC;IAChG,SAAS,CAAC,EAAE;QACV,0BAA0B,CAAC,EAAE,MAAM,CAAC;QACpC,qBAAqB,CAAC,EAAE,MAAM,CAAC;QAC/B,wBAAwB,CAAC,EAAE,MAAM,CAAC;KACnC,CAAC;CACH;AAuBD,wBAAgB,+BAA+B,CAAC,EAC9C,KAAK,EACL,OAAO,GACR,EAAE;IACD,KAAK,EAAE,mBAAmB,CAAC;IAC3B,OAAO,EAAE,uBAAuB,CAAC;CAClC;;;;;;;;;;6FA6IA"}
@@ -1,5 +1,4 @@
1
1
  import type { LanguageModel } from '@mastra/core/llm';
2
- import type { ScorerRunInputForAgent, ScorerRunOutputForAgent } from '@mastra/core/scores';
3
2
  export interface FaithfulnessMetricOptions {
4
3
  scale?: number;
5
4
  context?: string[];
@@ -7,7 +6,7 @@ export interface FaithfulnessMetricOptions {
7
6
  export declare function createFaithfulnessScorer({ model, options, }: {
8
7
  model: LanguageModel;
9
8
  options?: FaithfulnessMetricOptions;
10
- }): import("@mastra/core/scores").MastraScorer<string, ScorerRunInputForAgent, ScorerRunOutputForAgent, Record<"preprocessStepResult", string[]> & Record<"analyzeStepResult", {
9
+ }): import("@mastra/core/scores").MastraScorer<"Faithfulness Scorer", import("@mastra/core/scores").ScorerRunInputForAgent, import("@mastra/core/scores").ScorerRunOutputForAgent, Record<"preprocessStepResult", string[]> & Record<"analyzeStepResult", {
11
10
  verdicts: {
12
11
  verdict: string;
13
12
  reason: string;
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../src/scorers/llm/faithfulness/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AAEtD,OAAO,KAAK,EAAE,sBAAsB,EAAE,uBAAuB,EAAE,MAAM,qBAAqB,CAAC;AAU3F,MAAM,WAAW,yBAAyB;IACxC,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;CACpB;AAED,wBAAgB,wBAAwB,CAAC,EACvC,KAAK,EACL,OAAO,GACR,EAAE;IACD,KAAK,EAAE,aAAa,CAAC;IACrB,OAAO,CAAC,EAAE,yBAAyB,CAAC;CACrC;;;;;6FAgEA"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../src/scorers/llm/faithfulness/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AAWtD,MAAM,WAAW,yBAAyB;IACxC,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;CACpB;AAED,wBAAgB,wBAAwB,CAAC,EACvC,KAAK,EACL,OAAO,GACR,EAAE;IACD,KAAK,EAAE,aAAa,CAAC;IACrB,OAAO,CAAC,EAAE,yBAAyB,CAAC;CACrC;;;;;6FAiEA"}
@@ -1,5 +1,4 @@
1
1
  import type { LanguageModel } from '@mastra/core/llm';
2
- import type { ScorerRunInputForAgent, ScorerRunOutputForAgent } from '@mastra/core/scores';
3
2
  export interface HallucinationMetricOptions {
4
3
  scale?: number;
5
4
  context: string[];
@@ -7,7 +6,7 @@ export interface HallucinationMetricOptions {
7
6
  export declare function createHallucinationScorer({ model, options, }: {
8
7
  model: LanguageModel;
9
8
  options?: HallucinationMetricOptions;
10
- }): import("@mastra/core/scores").MastraScorer<string, ScorerRunInputForAgent, ScorerRunOutputForAgent, Record<"preprocessStepResult", {
9
+ }): import("@mastra/core/scores").MastraScorer<"Hallucination Scorer", import("@mastra/core/scores").ScorerRunInputForAgent, import("@mastra/core/scores").ScorerRunOutputForAgent, Record<"preprocessStepResult", {
11
10
  claims: string[];
12
11
  }> & Record<"analyzeStepResult", {
13
12
  verdicts: {
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../src/scorers/llm/hallucination/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AAEtD,OAAO,KAAK,EAAE,sBAAsB,EAAE,uBAAuB,EAAE,MAAM,qBAAqB,CAAC;AAW3F,MAAM,WAAW,0BAA0B;IACzC,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,MAAM,EAAE,CAAC;CACnB;AAED,wBAAgB,yBAAyB,CAAC,EACxC,KAAK,EACL,OAAO,GACR,EAAE;IACD,KAAK,EAAE,aAAa,CAAC;IACrB,OAAO,CAAC,EAAE,0BAA0B,CAAC;CACtC;;;;;;;;6FA0DA"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../src/scorers/llm/hallucination/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AAYtD,MAAM,WAAW,0BAA0B;IACzC,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,MAAM,EAAE,CAAC;CACnB;AAED,wBAAgB,yBAAyB,CAAC,EACxC,KAAK,EACL,OAAO,GACR,EAAE;IACD,KAAK,EAAE,aAAa,CAAC;IACrB,OAAO,CAAC,EAAE,0BAA0B,CAAC;CACtC;;;;;;;;6FA2DA"}
@@ -222,7 +222,8 @@ function createAnswerRelevancyScorer({
222
222
  judge: {
223
223
  model,
224
224
  instructions: ANSWER_RELEVANCY_AGENT_INSTRUCTIONS
225
- }
225
+ },
226
+ type: "agent"
226
227
  }).preprocess({
227
228
  description: "Extract relevant statements from the LLM output",
228
229
  outputSchema: extractOutputSchema,
@@ -438,7 +439,8 @@ function createAnswerSimilarityScorer({
438
439
  judge: {
439
440
  model,
440
441
  instructions: ANSWER_SIMILARITY_INSTRUCTIONS
441
- }
442
+ },
443
+ type: "agent"
442
444
  }).preprocess({
443
445
  description: "Extract semantic units from output and ground truth",
444
446
  outputSchema: extractOutputSchema2,
@@ -694,7 +696,8 @@ function createFaithfulnessScorer({
694
696
  judge: {
695
697
  model,
696
698
  instructions: FAITHFULNESS_AGENT_INSTRUCTIONS
697
- }
699
+ },
700
+ type: "agent"
698
701
  }).preprocess({
699
702
  description: "Extract relevant statements from the LLM output",
700
703
  outputSchema: zod.z.array(zod.z.string()),
@@ -851,7 +854,8 @@ function createBiasScorer({ model, options }) {
851
854
  judge: {
852
855
  model,
853
856
  instructions: BIAS_AGENT_INSTRUCTIONS
854
- }
857
+ },
858
+ type: "agent"
855
859
  }).preprocess({
856
860
  description: "Extract relevant statements from the LLM output",
857
861
  outputSchema: zod.z.object({
@@ -1084,7 +1088,8 @@ function createHallucinationScorer({
1084
1088
  judge: {
1085
1089
  model,
1086
1090
  instructions: HALLUCINATION_AGENT_INSTRUCTIONS
1087
- }
1091
+ },
1092
+ type: "agent"
1088
1093
  }).preprocess({
1089
1094
  description: "Extract all claims from the given output",
1090
1095
  outputSchema: zod.z.object({
@@ -1222,7 +1227,8 @@ function createToxicityScorer({ model, options }) {
1222
1227
  judge: {
1223
1228
  model,
1224
1229
  instructions: TOXICITY_AGENT_INSTRUCTIONS
1225
- }
1230
+ },
1231
+ type: "agent"
1226
1232
  }).analyze({
1227
1233
  description: "Score the relevance of the statements to the input",
1228
1234
  outputSchema: zod.z.object({ verdicts: zod.z.array(zod.z.object({ verdict: zod.z.string(), reason: zod.z.string() })) }),
@@ -1370,7 +1376,8 @@ function createToolCallAccuracyScorerLLM({ model, availableTools }) {
1370
1376
  judge: {
1371
1377
  model,
1372
1378
  instructions: TOOL_SELECTION_ACCURACY_INSTRUCTIONS
1373
- }
1379
+ },
1380
+ type: "agent"
1374
1381
  }).preprocess(async ({ run }) => {
1375
1382
  const isInputInvalid = !run.input || !run.input.inputMessages || run.input.inputMessages.length === 0;
1376
1383
  const isOutputInvalid = !run.output || run.output.length === 0;
@@ -1607,7 +1614,8 @@ function createContextRelevanceScorerLLM({
1607
1614
  judge: {
1608
1615
  model,
1609
1616
  instructions: CONTEXT_RELEVANCE_INSTRUCTIONS
1610
- }
1617
+ },
1618
+ type: "agent"
1611
1619
  }).analyze({
1612
1620
  description: "Analyze the relevance and utility of provided context",
1613
1621
  outputSchema: analyzeOutputSchema3,
@@ -1829,7 +1837,8 @@ function createContextPrecisionScorer({
1829
1837
  judge: {
1830
1838
  model,
1831
1839
  instructions: CONTEXT_PRECISION_AGENT_INSTRUCTIONS
1832
- }
1840
+ },
1841
+ type: "agent"
1833
1842
  }).analyze({
1834
1843
  description: "Evaluate the relevance of each context piece for generating the expected output",
1835
1844
  outputSchema: contextRelevanceOutputSchema,
@@ -2120,7 +2129,8 @@ function createNoiseSensitivityScorerLLM({
2120
2129
  judge: {
2121
2130
  model,
2122
2131
  instructions: NOISE_SENSITIVITY_INSTRUCTIONS
2123
- }
2132
+ },
2133
+ type: "agent"
2124
2134
  }).analyze({
2125
2135
  description: "Analyze the impact of noise on agent response quality",
2126
2136
  outputSchema: analyzeOutputSchema4,