@mastra/evals 0.13.5 → 0.13.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. package/CHANGELOG.md +22 -0
  2. package/dist/{chunk-4LRZVFXR.js → chunk-KHEXN75Q.js} +72 -3
  3. package/dist/chunk-KHEXN75Q.js.map +1 -0
  4. package/dist/{chunk-EKSPLMYP.cjs → chunk-QKR2PMLZ.cjs} +79 -2
  5. package/dist/chunk-QKR2PMLZ.cjs.map +1 -0
  6. package/dist/{dist-QNM75ISG.cjs → dist-ALHZKHK6.cjs} +9 -9
  7. package/dist/{dist-QNM75ISG.cjs.map → dist-ALHZKHK6.cjs.map} +1 -1
  8. package/dist/{dist-KXHZV6E4.js → dist-HPW4UI62.js} +9 -9
  9. package/dist/{dist-KXHZV6E4.js.map → dist-HPW4UI62.js.map} +1 -1
  10. package/dist/index.cjs +1 -1
  11. package/dist/index.js +1 -1
  12. package/dist/scorers/code/index.cjs +2 -2
  13. package/dist/scorers/code/index.js +1 -1
  14. package/dist/scorers/llm/answer-similarity/index.d.ts +34 -0
  15. package/dist/scorers/llm/answer-similarity/index.d.ts.map +1 -0
  16. package/dist/scorers/llm/answer-similarity/prompts.d.ts +29 -0
  17. package/dist/scorers/llm/answer-similarity/prompts.d.ts.map +1 -0
  18. package/dist/scorers/llm/index.cjs +335 -68
  19. package/dist/scorers/llm/index.cjs.map +1 -1
  20. package/dist/scorers/llm/index.d.ts +1 -0
  21. package/dist/scorers/llm/index.d.ts.map +1 -1
  22. package/dist/scorers/llm/index.js +291 -27
  23. package/dist/scorers/llm/index.js.map +1 -1
  24. package/dist/scorers/utils.cjs +60 -0
  25. package/dist/scorers/utils.cjs.map +1 -0
  26. package/dist/scorers/utils.d.ts +1 -1
  27. package/dist/scorers/utils.d.ts.map +1 -1
  28. package/dist/scorers/utils.js +3 -0
  29. package/dist/scorers/utils.js.map +1 -0
  30. package/package.json +14 -4
  31. package/dist/chunk-4LRZVFXR.js.map +0 -1
  32. package/dist/chunk-EKSPLMYP.cjs.map +0 -1
package/dist/index.cjs CHANGED
@@ -41,7 +41,7 @@ var getCurrentTestInfo = async () => {
41
41
  };
42
42
  }
43
43
  try {
44
- const vitest = await import('./dist-QNM75ISG.cjs');
44
+ const vitest = await import('./dist-ALHZKHK6.cjs');
45
45
  if (typeof vitest !== "undefined" && vitest.expect?.getState) {
46
46
  const state = vitest.expect.getState();
47
47
  return {
package/dist/index.js CHANGED
@@ -39,7 +39,7 @@ var getCurrentTestInfo = async () => {
39
39
  };
40
40
  }
41
41
  try {
42
- const vitest = await import('./dist-KXHZV6E4.js');
42
+ const vitest = await import('./dist-HPW4UI62.js');
43
43
  if (typeof vitest !== "undefined" && vitest.expect?.getState) {
44
44
  const state = vitest.expect.getState();
45
45
  return {
@@ -1,6 +1,6 @@
1
1
  'use strict';
2
2
 
3
- var chunkEKSPLMYP_cjs = require('../../chunk-EKSPLMYP.cjs');
3
+ var chunkQKR2PMLZ_cjs = require('../../chunk-QKR2PMLZ.cjs');
4
4
  var scores = require('@mastra/core/scores');
5
5
  var nlp = require('compromise');
6
6
  var difflib = require('difflib');
@@ -287,7 +287,7 @@ function createToolCallAccuracyScorerCode(options) {
287
287
  if (isInputInvalid || isOutputInvalid) {
288
288
  throw new Error("Input and output messages cannot be null or empty");
289
289
  }
290
- const { tools: actualTools, toolCallInfos } = chunkEKSPLMYP_cjs.extractToolCalls(run.output);
290
+ const { tools: actualTools, toolCallInfos } = chunkQKR2PMLZ_cjs.extractToolCalls(run.output);
291
291
  const correctToolCalled = expectedTool ? strictMode ? actualTools.length === 1 && actualTools[0] === expectedTool : actualTools.includes(expectedTool) : false;
292
292
  return {
293
293
  expectedTool,
@@ -1,4 +1,4 @@
1
- import { extractToolCalls } from '../../chunk-4LRZVFXR.js';
1
+ import { extractToolCalls } from '../../chunk-KHEXN75Q.js';
2
2
  import { createScorer } from '@mastra/core/scores';
3
3
  import nlp from 'compromise';
4
4
  import { SequenceMatcher } from 'difflib';
@@ -0,0 +1,34 @@
1
+ import type { MastraLanguageModel } from '@mastra/core/agent';
2
+ import type { ScorerRunInputForAgent, ScorerRunOutputForAgent } from '@mastra/core/scores';
3
+ export interface AnswerSimilarityOptions {
4
+ requireGroundTruth?: boolean;
5
+ semanticThreshold?: number;
6
+ exactMatchBonus?: number;
7
+ missingPenalty?: number;
8
+ contradictionPenalty?: number;
9
+ extraInfoPenalty?: number;
10
+ scale?: number;
11
+ }
12
+ export declare const ANSWER_SIMILARITY_DEFAULT_OPTIONS: Required<AnswerSimilarityOptions>;
13
+ export declare const ANSWER_SIMILARITY_INSTRUCTIONS = "\nYou are a precise answer similarity evaluator for CI/CD testing. Your role is to compare agent outputs against ground truth answers to ensure consistency and accuracy in automated testing.\n\nKey Principles:\n1. Focus on semantic equivalence, not just string matching\n2. Recognize that different phrasings can convey the same information\n3. Identify missing critical information from the ground truth\n4. Detect contradictions between output and ground truth\n5. Provide actionable feedback for improving answer accuracy\n6. Be strict but fair - partial credit for partial matches\n";
14
+ export declare function createAnswerSimilarityScorer({ model, options, }: {
15
+ model: MastraLanguageModel;
16
+ options?: AnswerSimilarityOptions;
17
+ }): import("@mastra/core/scores").MastraScorer<string, ScorerRunInputForAgent, ScorerRunOutputForAgent, Record<"preprocessStepResult", {
18
+ outputUnits: string[];
19
+ groundTruthUnits: string[];
20
+ }> & Record<"analyzeStepResult", {
21
+ matches: {
22
+ groundTruthUnit: string;
23
+ outputUnit: string | null;
24
+ matchType: "exact" | "semantic" | "partial" | "missing";
25
+ explanation: string;
26
+ }[];
27
+ extraInOutput: string[];
28
+ contradictions: {
29
+ groundTruthUnit: string;
30
+ outputUnit: string;
31
+ explanation: string;
32
+ }[];
33
+ }> & Record<"generateScoreStepResult", number> & Record<"generateReasonStepResult", string>>;
34
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../src/scorers/llm/answer-similarity/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,oBAAoB,CAAC;AAE9D,OAAO,KAAK,EAAE,sBAAsB,EAAE,uBAAuB,EAAE,MAAM,qBAAqB,CAAC;AAM3F,MAAM,WAAW,uBAAuB;IACtC,kBAAkB,CAAC,EAAE,OAAO,CAAC;IAC7B,iBAAiB,CAAC,EAAE,MAAM,CAAC;IAC3B,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,oBAAoB,CAAC,EAAE,MAAM,CAAC;IAC9B,gBAAgB,CAAC,EAAE,MAAM,CAAC;IAC1B,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,eAAO,MAAM,iCAAiC,EAAE,QAAQ,CAAC,uBAAuB,CAQ/E,CAAC;AAEF,eAAO,MAAM,8BAA8B,+kBAU1C,CAAC;AA0BF,wBAAgB,4BAA4B,CAAC,EAC3C,KAAK,EACL,OAA2C,GAC5C,EAAE;IACD,KAAK,EAAE,mBAAmB,CAAC;IAC3B,OAAO,CAAC,EAAE,uBAAuB,CAAC;CACnC;;;;;;;;;;;;;;;;6FA4HA"}
@@ -0,0 +1,29 @@
1
+ export declare const createExtractPrompt: ({ output, groundTruth }: {
2
+ output: string;
3
+ groundTruth: string;
4
+ }) => string;
5
+ export declare const createAnalyzePrompt: ({ outputUnits, groundTruthUnits, }: {
6
+ outputUnits: string[];
7
+ groundTruthUnits: string[];
8
+ }) => string;
9
+ export declare const createReasonPrompt: ({ output, groundTruth, score, analysis, scale, }: {
10
+ output: string;
11
+ groundTruth: string;
12
+ score: number;
13
+ analysis: {
14
+ matches: Array<{
15
+ groundTruthUnit: string;
16
+ outputUnit: string | null;
17
+ matchType: string;
18
+ explanation: string;
19
+ }>;
20
+ extraInOutput: string[];
21
+ contradictions: Array<{
22
+ outputUnit: string;
23
+ groundTruthUnit: string;
24
+ explanation: string;
25
+ }>;
26
+ };
27
+ scale: number;
28
+ }) => string;
29
+ //# sourceMappingURL=prompts.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"prompts.d.ts","sourceRoot":"","sources":["../../../../src/scorers/llm/answer-similarity/prompts.ts"],"names":[],"mappings":"AAAA,eAAO,MAAM,mBAAmB,GAAI,yBAAyB;IAAE,MAAM,EAAE,MAAM,CAAC;IAAC,WAAW,EAAE,MAAM,CAAA;CAAE,WAiCnG,CAAC;AAEF,eAAO,MAAM,mBAAmB,GAAI,oCAGjC;IACD,WAAW,EAAE,MAAM,EAAE,CAAC;IACtB,gBAAgB,EAAE,MAAM,EAAE,CAAC;CAC5B,WAgDA,CAAC;AAEF,eAAO,MAAM,kBAAkB,GAAI,kDAMhC;IACD,MAAM,EAAE,MAAM,CAAC;IACf,WAAW,EAAE,MAAM,CAAC;IACpB,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,EAAE;QACR,OAAO,EAAE,KAAK,CAAC;YACb,eAAe,EAAE,MAAM,CAAC;YACxB,UAAU,EAAE,MAAM,GAAG,IAAI,CAAC;YAC1B,SAAS,EAAE,MAAM,CAAC;YAClB,WAAW,EAAE,MAAM,CAAC;SACrB,CAAC,CAAC;QACH,aAAa,EAAE,MAAM,EAAE,CAAC;QACxB,cAAc,EAAE,KAAK,CAAC;YACpB,UAAU,EAAE,MAAM,CAAC;YACnB,eAAe,EAAE,MAAM,CAAC;YACxB,WAAW,EAAE,MAAM,CAAC;SACrB,CAAC,CAAC;KACJ,CAAC;IACF,KAAK,EAAE,MAAM,CAAC;CACf,WAuBA,CAAC"}