@mastra/evals 0.11.0 → 0.12.0-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (187) hide show
  1. package/dist/attachListeners.d.ts +4 -0
  2. package/dist/attachListeners.d.ts.map +1 -0
  3. package/dist/{chunk-2JVD5IX6.cjs → chunk-7QAUEU4L.cjs} +2 -0
  4. package/dist/chunk-7QAUEU4L.cjs.map +1 -0
  5. package/dist/{chunk-IS3BZTWE.cjs → chunk-EMMSS5I5.cjs} +2 -0
  6. package/dist/chunk-EMMSS5I5.cjs.map +1 -0
  7. package/dist/{chunk-U67V476Y.js → chunk-G3PMV62Z.js} +2 -0
  8. package/dist/chunk-G3PMV62Z.js.map +1 -0
  9. package/dist/{chunk-COBCYVZ7.cjs → chunk-IUSAD2BW.cjs} +2 -0
  10. package/dist/chunk-IUSAD2BW.cjs.map +1 -0
  11. package/dist/{chunk-UYXFD4VX.js → chunk-QTWX6TKR.js} +2 -0
  12. package/dist/chunk-QTWX6TKR.js.map +1 -0
  13. package/dist/{chunk-TXXJUIES.js → chunk-YGTIO3J5.js} +2 -0
  14. package/dist/chunk-YGTIO3J5.js.map +1 -0
  15. package/dist/constants.d.ts +2 -0
  16. package/dist/constants.d.ts.map +1 -0
  17. package/dist/{dist-ZXFGMR47.js → dist-66YSVXZH.js} +4 -2
  18. package/dist/dist-66YSVXZH.js.map +1 -0
  19. package/dist/{dist-JD6MNRVB.cjs → dist-6ZEQKKXY.cjs} +14 -12
  20. package/dist/dist-6ZEQKKXY.cjs.map +1 -0
  21. package/dist/evaluation.d.ts +8 -0
  22. package/dist/evaluation.d.ts.map +1 -0
  23. package/dist/index.cjs +3 -1
  24. package/dist/index.cjs.map +1 -0
  25. package/dist/index.d.ts +3 -3
  26. package/dist/index.d.ts.map +1 -0
  27. package/dist/index.js +3 -1
  28. package/dist/index.js.map +1 -0
  29. package/dist/{magic-string.es-MNZ6ZGOL.js → magic-string.es-6JSI7KY4.js} +2 -0
  30. package/dist/magic-string.es-6JSI7KY4.js.map +1 -0
  31. package/dist/{magic-string.es-T2QO2IBJ.cjs → magic-string.es-NBXOXRCK.cjs} +2 -0
  32. package/dist/magic-string.es-NBXOXRCK.cjs.map +1 -0
  33. package/dist/metrics/index.d.ts +4 -0
  34. package/dist/metrics/index.d.ts.map +1 -0
  35. package/dist/metrics/judge/index.cjs +4 -2
  36. package/dist/metrics/judge/index.cjs.map +1 -0
  37. package/dist/metrics/judge/index.d.ts +7 -1
  38. package/dist/metrics/judge/index.d.ts.map +1 -0
  39. package/dist/metrics/judge/index.js +3 -1
  40. package/dist/metrics/judge/index.js.map +1 -0
  41. package/dist/metrics/llm/answer-relevancy/index.d.ts +16 -0
  42. package/dist/metrics/llm/answer-relevancy/index.d.ts.map +1 -0
  43. package/dist/metrics/llm/answer-relevancy/metricJudge.d.ts +20 -0
  44. package/dist/metrics/llm/answer-relevancy/metricJudge.d.ts.map +1 -0
  45. package/dist/metrics/llm/answer-relevancy/prompts.d.ts +19 -0
  46. package/dist/metrics/llm/answer-relevancy/prompts.d.ts.map +1 -0
  47. package/dist/metrics/llm/bias/index.d.ts +14 -0
  48. package/dist/metrics/llm/bias/index.d.ts.map +1 -0
  49. package/dist/metrics/llm/bias/metricJudge.d.ts +14 -0
  50. package/dist/metrics/llm/bias/metricJudge.d.ts.map +1 -0
  51. package/dist/metrics/llm/bias/prompts.d.ts +14 -0
  52. package/dist/metrics/llm/bias/prompts.d.ts.map +1 -0
  53. package/dist/metrics/llm/context-position/index.d.ts +16 -0
  54. package/dist/metrics/llm/context-position/index.d.ts.map +1 -0
  55. package/dist/metrics/llm/context-position/metricJudge.d.ts +20 -0
  56. package/dist/metrics/llm/context-position/metricJudge.d.ts.map +1 -0
  57. package/dist/metrics/llm/context-position/prompts.d.ts +17 -0
  58. package/dist/metrics/llm/context-position/prompts.d.ts.map +1 -0
  59. package/dist/metrics/llm/context-precision/index.d.ts +16 -0
  60. package/dist/metrics/llm/context-precision/index.d.ts.map +1 -0
  61. package/dist/metrics/llm/context-precision/metricJudge.d.ts +20 -0
  62. package/dist/metrics/llm/context-precision/metricJudge.d.ts.map +1 -0
  63. package/dist/metrics/llm/context-precision/prompts.d.ts +17 -0
  64. package/dist/metrics/llm/context-precision/prompts.d.ts.map +1 -0
  65. package/dist/metrics/llm/context-relevancy/index.d.ts +16 -0
  66. package/dist/metrics/llm/context-relevancy/index.d.ts.map +1 -0
  67. package/dist/metrics/llm/context-relevancy/metricJudge.d.ts +16 -0
  68. package/dist/metrics/llm/context-relevancy/metricJudge.d.ts.map +1 -0
  69. package/dist/metrics/llm/context-relevancy/prompts.d.ts +13 -0
  70. package/dist/metrics/llm/context-relevancy/prompts.d.ts.map +1 -0
  71. package/dist/metrics/llm/contextual-recall/index.d.ts +16 -0
  72. package/dist/metrics/llm/contextual-recall/index.d.ts.map +1 -0
  73. package/dist/metrics/llm/contextual-recall/metricJudge.d.ts +16 -0
  74. package/dist/metrics/llm/contextual-recall/metricJudge.d.ts.map +1 -0
  75. package/dist/metrics/llm/contextual-recall/prompts.d.ts +13 -0
  76. package/dist/metrics/llm/contextual-recall/prompts.d.ts.map +1 -0
  77. package/dist/metrics/llm/faithfulness/index.d.ts +16 -0
  78. package/dist/metrics/llm/faithfulness/index.d.ts.map +1 -0
  79. package/dist/metrics/llm/faithfulness/metricJudge.d.ts +22 -0
  80. package/dist/metrics/llm/faithfulness/metricJudge.d.ts.map +1 -0
  81. package/dist/metrics/llm/faithfulness/prompts.d.ts +20 -0
  82. package/dist/metrics/llm/faithfulness/prompts.d.ts.map +1 -0
  83. package/dist/metrics/llm/hallucination/index.d.ts +16 -0
  84. package/dist/metrics/llm/hallucination/index.d.ts.map +1 -0
  85. package/dist/metrics/llm/hallucination/metricJudge.d.ts +22 -0
  86. package/dist/metrics/llm/hallucination/metricJudge.d.ts.map +1 -0
  87. package/dist/metrics/llm/hallucination/prompts.d.ts +17 -0
  88. package/dist/metrics/llm/hallucination/prompts.d.ts.map +1 -0
  89. package/dist/metrics/llm/index.cjs +26 -24
  90. package/dist/metrics/llm/index.cjs.map +1 -0
  91. package/dist/metrics/llm/index.d.ts +12 -11
  92. package/dist/metrics/llm/index.d.ts.map +1 -0
  93. package/dist/metrics/llm/index.js +4 -2
  94. package/dist/metrics/llm/index.js.map +1 -0
  95. package/dist/metrics/llm/prompt-alignment/index.d.ts +33 -0
  96. package/dist/metrics/llm/prompt-alignment/index.d.ts.map +1 -0
  97. package/dist/metrics/llm/prompt-alignment/metricJudge.d.ts +20 -0
  98. package/dist/metrics/llm/prompt-alignment/metricJudge.d.ts.map +1 -0
  99. package/dist/metrics/llm/prompt-alignment/prompts.d.ts +17 -0
  100. package/dist/metrics/llm/prompt-alignment/prompts.d.ts.map +1 -0
  101. package/dist/metrics/llm/summarization/index.d.ts +19 -0
  102. package/dist/metrics/llm/summarization/index.d.ts.map +1 -0
  103. package/dist/metrics/llm/summarization/metricJudge.d.ts +34 -0
  104. package/dist/metrics/llm/summarization/metricJudge.d.ts.map +1 -0
  105. package/dist/metrics/llm/summarization/prompts.d.ts +30 -0
  106. package/dist/metrics/llm/summarization/prompts.d.ts.map +1 -0
  107. package/dist/metrics/llm/toxicity/index.d.ts +14 -0
  108. package/dist/metrics/llm/toxicity/index.d.ts.map +1 -0
  109. package/dist/metrics/llm/toxicity/metricJudge.d.ts +14 -0
  110. package/dist/metrics/llm/toxicity/metricJudge.d.ts.map +1 -0
  111. package/dist/metrics/llm/toxicity/prompts.d.ts +10 -0
  112. package/dist/metrics/llm/toxicity/prompts.d.ts.map +1 -0
  113. package/dist/metrics/llm/types.d.ts +7 -0
  114. package/dist/metrics/llm/types.d.ts.map +1 -0
  115. package/dist/metrics/llm/utils.d.ts +14 -0
  116. package/dist/metrics/llm/utils.d.ts.map +1 -0
  117. package/dist/metrics/nlp/completeness/index.d.ts +21 -0
  118. package/dist/metrics/nlp/completeness/index.d.ts.map +1 -0
  119. package/dist/metrics/nlp/content-similarity/index.d.ts +18 -0
  120. package/dist/metrics/nlp/content-similarity/index.d.ts.map +1 -0
  121. package/dist/metrics/nlp/index.cjs +2 -0
  122. package/dist/metrics/nlp/index.cjs.map +1 -0
  123. package/dist/metrics/nlp/index.d.ts +6 -5
  124. package/dist/metrics/nlp/index.d.ts.map +1 -0
  125. package/dist/metrics/nlp/index.js +2 -0
  126. package/dist/metrics/nlp/index.js.map +1 -0
  127. package/dist/metrics/nlp/keyword-coverage/index.d.ts +13 -0
  128. package/dist/metrics/nlp/keyword-coverage/index.d.ts.map +1 -0
  129. package/dist/metrics/nlp/textual-difference/index.d.ts +15 -0
  130. package/dist/metrics/nlp/textual-difference/index.d.ts.map +1 -0
  131. package/dist/metrics/nlp/tone/index.d.ts +18 -0
  132. package/dist/metrics/nlp/tone/index.d.ts.map +1 -0
  133. package/dist/scorers/code/completeness/index.d.ts +11 -0
  134. package/dist/scorers/code/completeness/index.d.ts.map +1 -0
  135. package/dist/scorers/code/content-similarity/index.d.ts +11 -0
  136. package/dist/scorers/code/content-similarity/index.d.ts.map +1 -0
  137. package/dist/scorers/code/index.cjs +139 -161
  138. package/dist/scorers/code/index.cjs.map +1 -0
  139. package/dist/scorers/code/index.d.ts +6 -5
  140. package/dist/scorers/code/index.d.ts.map +1 -0
  141. package/dist/scorers/code/index.js +139 -161
  142. package/dist/scorers/code/index.js.map +1 -0
  143. package/dist/scorers/code/keyword-coverage/index.d.ts +17 -0
  144. package/dist/scorers/code/keyword-coverage/index.d.ts.map +1 -0
  145. package/dist/scorers/code/textual-difference/index.d.ts +8 -0
  146. package/dist/scorers/code/textual-difference/index.d.ts.map +1 -0
  147. package/dist/scorers/code/tone/index.d.ts +21 -0
  148. package/dist/scorers/code/tone/index.d.ts.map +1 -0
  149. package/dist/scorers/index.d.ts +3 -0
  150. package/dist/scorers/index.d.ts.map +1 -0
  151. package/dist/scorers/llm/answer-relevancy/index.d.ts +16 -0
  152. package/dist/scorers/llm/answer-relevancy/index.d.ts.map +1 -0
  153. package/dist/scorers/llm/answer-relevancy/prompts.d.ts +13 -0
  154. package/dist/scorers/llm/answer-relevancy/prompts.d.ts.map +1 -0
  155. package/dist/scorers/llm/bias/index.d.ts +17 -0
  156. package/dist/scorers/llm/bias/index.d.ts.map +1 -0
  157. package/dist/scorers/llm/bias/prompts.d.ts +13 -0
  158. package/dist/scorers/llm/bias/prompts.d.ts.map +1 -0
  159. package/dist/scorers/llm/faithfulness/index.d.ts +16 -0
  160. package/dist/scorers/llm/faithfulness/index.d.ts.map +1 -0
  161. package/dist/scorers/llm/faithfulness/prompts.d.ts +20 -0
  162. package/dist/scorers/llm/faithfulness/prompts.d.ts.map +1 -0
  163. package/dist/scorers/llm/hallucination/index.d.ts +19 -0
  164. package/dist/scorers/llm/hallucination/index.d.ts.map +1 -0
  165. package/dist/scorers/llm/hallucination/prompts.d.ts +20 -0
  166. package/dist/scorers/llm/hallucination/prompts.d.ts.map +1 -0
  167. package/dist/scorers/llm/index.cjs +200 -207
  168. package/dist/scorers/llm/index.cjs.map +1 -0
  169. package/dist/scorers/llm/index.d.ts +6 -11
  170. package/dist/scorers/llm/index.d.ts.map +1 -0
  171. package/dist/scorers/llm/index.js +201 -208
  172. package/dist/scorers/llm/index.js.map +1 -0
  173. package/dist/scorers/llm/toxicity/index.d.ts +15 -0
  174. package/dist/scorers/llm/toxicity/index.d.ts.map +1 -0
  175. package/dist/scorers/llm/toxicity/prompts.d.ts +10 -0
  176. package/dist/scorers/llm/toxicity/prompts.d.ts.map +1 -0
  177. package/dist/scorers/utils.d.ts +59 -0
  178. package/dist/scorers/utils.d.ts.map +1 -0
  179. package/package.json +13 -12
  180. package/dist/_tsup-dts-rollup.d.cts +0 -984
  181. package/dist/_tsup-dts-rollup.d.ts +0 -984
  182. package/dist/index.d.cts +0 -3
  183. package/dist/metrics/judge/index.d.cts +0 -1
  184. package/dist/metrics/llm/index.d.cts +0 -11
  185. package/dist/metrics/nlp/index.d.cts +0 -5
  186. package/dist/scorers/code/index.d.cts +0 -5
  187. package/dist/scorers/llm/index.d.cts +0 -11
@@ -0,0 +1,4 @@
1
+ import type { Mastra } from '@mastra/core';
2
+ export declare function attachListeners(mastra?: Mastra): Promise<void>;
3
+ export declare function globalSetup(): Promise<void>;
4
+ //# sourceMappingURL=attachListeners.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"attachListeners.d.ts","sourceRoot":"","sources":["../src/attachListeners.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,cAAc,CAAC;AAO3C,wBAAsB,eAAe,CAAC,MAAM,CAAC,EAAE,MAAM,iBA0BpD;AAED,wBAAsB,WAAW,kBAOhC"}
@@ -6,3 +6,5 @@ var roundToTwoDecimals = (num) => {
6
6
  };
7
7
 
8
8
  exports.roundToTwoDecimals = roundToTwoDecimals;
9
+ //# sourceMappingURL=chunk-7QAUEU4L.cjs.map
10
+ //# sourceMappingURL=chunk-7QAUEU4L.cjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/metrics/llm/utils.ts"],"names":[],"mappings":";;;AAAO,IAAM,kBAAA,GAAqB,CAAC,GAAA,KAAgB;AACjD,EAAA,OAAO,KAAK,KAAA,CAAA,CAAO,GAAA,GAAM,MAAA,CAAO,OAAA,IAAW,GAAG,CAAA,GAAI,GAAA;AACpD","file":"chunk-7QAUEU4L.cjs","sourcesContent":["export const roundToTwoDecimals = (num: number) => {\n return Math.round((num + Number.EPSILON) * 100) / 100;\n};\n\nexport function isCloserTo(value: number, target1: number, target2: number): boolean {\n return Math.abs(value - target1) < Math.abs(value - target2);\n}\n\nexport type TestCase = {\n input: string;\n output: string;\n expectedResult: {\n score: number;\n reason?: string;\n };\n};\n\nexport type TestCaseWithContext = TestCase & {\n context: string[];\n};\n"]}
@@ -33,3 +33,5 @@ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__ge
33
33
  exports.__commonJS = __commonJS;
34
34
  exports.__export = __export;
35
35
  exports.__toESM = __toESM;
36
+ //# sourceMappingURL=chunk-EMMSS5I5.cjs.map
37
+ //# sourceMappingURL=chunk-EMMSS5I5.cjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":[],"names":[],"mappings":"","file":"chunk-EMMSS5I5.cjs"}
@@ -29,3 +29,5 @@ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__ge
29
29
  ));
30
30
 
31
31
  export { __commonJS, __export, __toESM };
32
+ //# sourceMappingURL=chunk-G3PMV62Z.js.map
33
+ //# sourceMappingURL=chunk-G3PMV62Z.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":[],"names":[],"mappings":"","file":"chunk-G3PMV62Z.js"}
@@ -15,3 +15,5 @@ var MastraAgentJudge = class {
15
15
  };
16
16
 
17
17
  exports.MastraAgentJudge = MastraAgentJudge;
18
+ //# sourceMappingURL=chunk-IUSAD2BW.cjs.map
19
+ //# sourceMappingURL=chunk-IUSAD2BW.cjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/metrics/judge/index.ts"],"names":["Agent"],"mappings":";;;;;AAGO,IAAe,mBAAf,MAAgC;AAAA,EAClB,KAAA;AAAA,EAEnB,WAAA,CAAY,IAAA,EAAc,YAAA,EAAsB,KAAA,EAAsB;AACpE,IAAA,IAAA,CAAK,KAAA,GAAQ,IAAIA,WAAA,CAAM;AAAA,MACrB,IAAA,EAAM,qBAAqB,IAAI,CAAA,CAAA;AAAA,MAC/B,YAAA;AAAA,MACA;AAAA,KACD,CAAA;AAAA,EACH;AACF","file":"chunk-IUSAD2BW.cjs","sourcesContent":["import { Agent } from '@mastra/core/agent';\nimport type { LanguageModel } from '@mastra/core/llm';\n\nexport abstract class MastraAgentJudge {\n protected readonly agent: Agent;\n\n constructor(name: string, instructions: string, model: LanguageModel) {\n this.agent = new Agent({\n name: `Mastra Eval Judge ${name}`,\n instructions: instructions,\n model,\n });\n }\n}\n"]}
@@ -4,3 +4,5 @@ var roundToTwoDecimals = (num) => {
4
4
  };
5
5
 
6
6
  export { roundToTwoDecimals };
7
+ //# sourceMappingURL=chunk-QTWX6TKR.js.map
8
+ //# sourceMappingURL=chunk-QTWX6TKR.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/metrics/llm/utils.ts"],"names":[],"mappings":";AAAO,IAAM,kBAAA,GAAqB,CAAC,GAAA,KAAgB;AACjD,EAAA,OAAO,KAAK,KAAA,CAAA,CAAO,GAAA,GAAM,MAAA,CAAO,OAAA,IAAW,GAAG,CAAA,GAAI,GAAA;AACpD","file":"chunk-QTWX6TKR.js","sourcesContent":["export const roundToTwoDecimals = (num: number) => {\n return Math.round((num + Number.EPSILON) * 100) / 100;\n};\n\nexport function isCloserTo(value: number, target1: number, target2: number): boolean {\n return Math.abs(value - target1) < Math.abs(value - target2);\n}\n\nexport type TestCase = {\n input: string;\n output: string;\n expectedResult: {\n score: number;\n reason?: string;\n };\n};\n\nexport type TestCaseWithContext = TestCase & {\n context: string[];\n};\n"]}
@@ -13,3 +13,5 @@ var MastraAgentJudge = class {
13
13
  };
14
14
 
15
15
  export { MastraAgentJudge };
16
+ //# sourceMappingURL=chunk-YGTIO3J5.js.map
17
+ //# sourceMappingURL=chunk-YGTIO3J5.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/metrics/judge/index.ts"],"names":[],"mappings":";;;AAGO,IAAe,mBAAf,MAAgC;AAAA,EAClB,KAAA;AAAA,EAEnB,WAAA,CAAY,IAAA,EAAc,YAAA,EAAsB,KAAA,EAAsB;AACpE,IAAA,IAAA,CAAK,KAAA,GAAQ,IAAI,KAAA,CAAM;AAAA,MACrB,IAAA,EAAM,qBAAqB,IAAI,CAAA,CAAA;AAAA,MAC/B,YAAA;AAAA,MACA;AAAA,KACD,CAAA;AAAA,EACH;AACF","file":"chunk-YGTIO3J5.js","sourcesContent":["import { Agent } from '@mastra/core/agent';\nimport type { LanguageModel } from '@mastra/core/llm';\n\nexport abstract class MastraAgentJudge {\n protected readonly agent: Agent;\n\n constructor(name: string, instructions: string, model: LanguageModel) {\n this.agent = new Agent({\n name: `Mastra Eval Judge ${name}`,\n instructions: instructions,\n model,\n });\n }\n}\n"]}
@@ -0,0 +1,2 @@
1
+ export declare const GLOBAL_RUN_ID_ENV_KEY = "_MASTRA_GLOBAL_RUN_ID_";
2
+ //# sourceMappingURL=constants.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"constants.d.ts","sourceRoot":"","sources":["../src/constants.ts"],"names":[],"mappings":"AAAA,eAAO,MAAM,qBAAqB,2BAA2B,CAAC"}
@@ -1,4 +1,4 @@
1
- import { __commonJS, __export, __toESM } from './chunk-U67V476Y.js';
1
+ import { __commonJS, __export, __toESM } from './chunk-G3PMV62Z.js';
2
2
  import { isatty } from 'tty';
3
3
 
4
4
  // ../../node_modules/.pnpm/js-tokens@9.0.1/node_modules/js-tokens/index.js
@@ -12887,7 +12887,7 @@ function offsetToLineNumber(source, offset) {
12887
12887
  return line + 1;
12888
12888
  }
12889
12889
  async function saveInlineSnapshots(environment, snapshots) {
12890
- const MagicString = (await import('./magic-string.es-MNZ6ZGOL.js')).default;
12890
+ const MagicString = (await import('./magic-string.es-6JSI7KY4.js')).default;
12891
12891
  const files = new Set(snapshots.map((i) => i.file));
12892
12892
  await Promise.all(Array.from(files).map(async (file) => {
12893
12893
  const snaps = snapshots.filter((i) => i.file === file);
@@ -16725,3 +16725,5 @@ chai/chai.js:
16725
16725
  */
16726
16726
 
16727
16727
  export { afterAll, afterEach, assert2 as assert, assertType, beforeAll, beforeEach, bench, chai_exports as chai, createExpect, describe, globalExpect as expect, export_expectTypeOf as expectTypeOf, inject, it, onTestFailed, onTestFinished, should, suite, test3 as test, vi, vitest };
16728
+ //# sourceMappingURL=dist-66YSVXZH.js.map
16729
+ //# sourceMappingURL=dist-66YSVXZH.js.map