@mastra/evals 0.14.4 → 1.0.0-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (195) hide show
  1. package/CHANGELOG.md +34 -25
  2. package/README.md +19 -159
  3. package/dist/{chunk-KHEXN75Q.js → chunk-CCLM7KPF.js} +45 -21
  4. package/dist/chunk-CCLM7KPF.js.map +1 -0
  5. package/dist/{chunk-QKR2PMLZ.cjs → chunk-TPQLLHZW.cjs} +46 -21
  6. package/dist/chunk-TPQLLHZW.cjs.map +1 -0
  7. package/dist/scorers/code/completeness/index.d.ts +1 -1
  8. package/dist/scorers/code/completeness/index.d.ts.map +1 -1
  9. package/dist/scorers/code/content-similarity/index.d.ts +1 -1
  10. package/dist/scorers/code/content-similarity/index.d.ts.map +1 -1
  11. package/dist/scorers/code/keyword-coverage/index.d.ts +1 -1
  12. package/dist/scorers/code/keyword-coverage/index.d.ts.map +1 -1
  13. package/dist/scorers/code/textual-difference/index.d.ts +1 -1
  14. package/dist/scorers/code/textual-difference/index.d.ts.map +1 -1
  15. package/dist/scorers/code/tone/index.d.ts +1 -1
  16. package/dist/scorers/code/tone/index.d.ts.map +1 -1
  17. package/dist/scorers/code/tool-call-accuracy/index.d.ts +1 -1
  18. package/dist/scorers/code/tool-call-accuracy/index.d.ts.map +1 -1
  19. package/dist/scorers/llm/answer-relevancy/index.d.ts +1 -1
  20. package/dist/scorers/llm/answer-relevancy/index.d.ts.map +1 -1
  21. package/dist/scorers/llm/answer-similarity/index.d.ts +2 -2
  22. package/dist/scorers/llm/answer-similarity/index.d.ts.map +1 -1
  23. package/dist/scorers/llm/bias/index.d.ts +2 -2
  24. package/dist/scorers/llm/bias/index.d.ts.map +1 -1
  25. package/dist/scorers/llm/context-precision/index.d.ts +3 -3
  26. package/dist/scorers/llm/context-precision/index.d.ts.map +1 -1
  27. package/dist/scorers/llm/context-relevance/index.d.ts +3 -3
  28. package/dist/scorers/llm/context-relevance/index.d.ts.map +1 -1
  29. package/dist/scorers/llm/faithfulness/index.d.ts +2 -2
  30. package/dist/scorers/llm/faithfulness/index.d.ts.map +1 -1
  31. package/dist/scorers/llm/hallucination/index.d.ts +2 -2
  32. package/dist/scorers/llm/hallucination/index.d.ts.map +1 -1
  33. package/dist/scorers/llm/noise-sensitivity/index.d.ts +1 -1
  34. package/dist/scorers/llm/noise-sensitivity/index.d.ts.map +1 -1
  35. package/dist/scorers/llm/prompt-alignment/index.d.ts +2 -2
  36. package/dist/scorers/llm/prompt-alignment/index.d.ts.map +1 -1
  37. package/dist/scorers/llm/tool-call-accuracy/index.d.ts +2 -2
  38. package/dist/scorers/llm/tool-call-accuracy/index.d.ts.map +1 -1
  39. package/dist/scorers/llm/toxicity/index.d.ts +2 -2
  40. package/dist/scorers/llm/toxicity/index.d.ts.map +1 -1
  41. package/dist/scorers/{llm → prebuilt}/index.cjs +479 -62
  42. package/dist/scorers/prebuilt/index.cjs.map +1 -0
  43. package/dist/scorers/prebuilt/index.d.ts +3 -0
  44. package/dist/scorers/prebuilt/index.d.ts.map +1 -0
  45. package/dist/scorers/{llm → prebuilt}/index.js +419 -15
  46. package/dist/scorers/prebuilt/index.js.map +1 -0
  47. package/dist/scorers/utils.cjs +21 -17
  48. package/dist/scorers/utils.d.ts +21 -11
  49. package/dist/scorers/utils.d.ts.map +1 -1
  50. package/dist/scorers/utils.js +1 -1
  51. package/package.json +15 -59
  52. package/dist/attachListeners.d.ts +0 -4
  53. package/dist/attachListeners.d.ts.map +0 -1
  54. package/dist/chunk-44PMY5ES.js +0 -78
  55. package/dist/chunk-44PMY5ES.js.map +0 -1
  56. package/dist/chunk-7QAUEU4L.cjs +0 -10
  57. package/dist/chunk-7QAUEU4L.cjs.map +0 -1
  58. package/dist/chunk-EMMSS5I5.cjs +0 -37
  59. package/dist/chunk-EMMSS5I5.cjs.map +0 -1
  60. package/dist/chunk-G3PMV62Z.js +0 -33
  61. package/dist/chunk-G3PMV62Z.js.map +0 -1
  62. package/dist/chunk-IUSAD2BW.cjs +0 -19
  63. package/dist/chunk-IUSAD2BW.cjs.map +0 -1
  64. package/dist/chunk-KHEXN75Q.js.map +0 -1
  65. package/dist/chunk-PWGOG6ML.cjs +0 -81
  66. package/dist/chunk-PWGOG6ML.cjs.map +0 -1
  67. package/dist/chunk-QKR2PMLZ.cjs.map +0 -1
  68. package/dist/chunk-QTWX6TKR.js +0 -8
  69. package/dist/chunk-QTWX6TKR.js.map +0 -1
  70. package/dist/chunk-YGTIO3J5.js +0 -17
  71. package/dist/chunk-YGTIO3J5.js.map +0 -1
  72. package/dist/dist-LDTK3TIP.cjs +0 -16759
  73. package/dist/dist-LDTK3TIP.cjs.map +0 -1
  74. package/dist/dist-OWYZEOJK.js +0 -16737
  75. package/dist/dist-OWYZEOJK.js.map +0 -1
  76. package/dist/evaluation.d.ts +0 -8
  77. package/dist/evaluation.d.ts.map +0 -1
  78. package/dist/index.cjs +0 -93
  79. package/dist/index.cjs.map +0 -1
  80. package/dist/index.d.ts +0 -3
  81. package/dist/index.d.ts.map +0 -1
  82. package/dist/index.js +0 -89
  83. package/dist/index.js.map +0 -1
  84. package/dist/magic-string.es-7ORA5OGR.js +0 -1305
  85. package/dist/magic-string.es-7ORA5OGR.js.map +0 -1
  86. package/dist/magic-string.es-NZ2XWFKN.cjs +0 -1311
  87. package/dist/magic-string.es-NZ2XWFKN.cjs.map +0 -1
  88. package/dist/metrics/index.d.ts +0 -4
  89. package/dist/metrics/index.d.ts.map +0 -1
  90. package/dist/metrics/judge/index.cjs +0 -12
  91. package/dist/metrics/judge/index.cjs.map +0 -1
  92. package/dist/metrics/judge/index.d.ts +0 -7
  93. package/dist/metrics/judge/index.d.ts.map +0 -1
  94. package/dist/metrics/judge/index.js +0 -3
  95. package/dist/metrics/judge/index.js.map +0 -1
  96. package/dist/metrics/llm/answer-relevancy/index.d.ts +0 -16
  97. package/dist/metrics/llm/answer-relevancy/index.d.ts.map +0 -1
  98. package/dist/metrics/llm/answer-relevancy/metricJudge.d.ts +0 -20
  99. package/dist/metrics/llm/answer-relevancy/metricJudge.d.ts.map +0 -1
  100. package/dist/metrics/llm/answer-relevancy/prompts.d.ts +0 -19
  101. package/dist/metrics/llm/answer-relevancy/prompts.d.ts.map +0 -1
  102. package/dist/metrics/llm/bias/index.d.ts +0 -14
  103. package/dist/metrics/llm/bias/index.d.ts.map +0 -1
  104. package/dist/metrics/llm/bias/metricJudge.d.ts +0 -14
  105. package/dist/metrics/llm/bias/metricJudge.d.ts.map +0 -1
  106. package/dist/metrics/llm/bias/prompts.d.ts +0 -14
  107. package/dist/metrics/llm/bias/prompts.d.ts.map +0 -1
  108. package/dist/metrics/llm/context-position/index.d.ts +0 -16
  109. package/dist/metrics/llm/context-position/index.d.ts.map +0 -1
  110. package/dist/metrics/llm/context-position/metricJudge.d.ts +0 -20
  111. package/dist/metrics/llm/context-position/metricJudge.d.ts.map +0 -1
  112. package/dist/metrics/llm/context-position/prompts.d.ts +0 -17
  113. package/dist/metrics/llm/context-position/prompts.d.ts.map +0 -1
  114. package/dist/metrics/llm/context-precision/index.d.ts +0 -16
  115. package/dist/metrics/llm/context-precision/index.d.ts.map +0 -1
  116. package/dist/metrics/llm/context-precision/metricJudge.d.ts +0 -20
  117. package/dist/metrics/llm/context-precision/metricJudge.d.ts.map +0 -1
  118. package/dist/metrics/llm/context-precision/prompts.d.ts +0 -17
  119. package/dist/metrics/llm/context-precision/prompts.d.ts.map +0 -1
  120. package/dist/metrics/llm/context-relevancy/index.d.ts +0 -16
  121. package/dist/metrics/llm/context-relevancy/index.d.ts.map +0 -1
  122. package/dist/metrics/llm/context-relevancy/metricJudge.d.ts +0 -16
  123. package/dist/metrics/llm/context-relevancy/metricJudge.d.ts.map +0 -1
  124. package/dist/metrics/llm/context-relevancy/prompts.d.ts +0 -13
  125. package/dist/metrics/llm/context-relevancy/prompts.d.ts.map +0 -1
  126. package/dist/metrics/llm/contextual-recall/index.d.ts +0 -16
  127. package/dist/metrics/llm/contextual-recall/index.d.ts.map +0 -1
  128. package/dist/metrics/llm/contextual-recall/metricJudge.d.ts +0 -16
  129. package/dist/metrics/llm/contextual-recall/metricJudge.d.ts.map +0 -1
  130. package/dist/metrics/llm/contextual-recall/prompts.d.ts +0 -13
  131. package/dist/metrics/llm/contextual-recall/prompts.d.ts.map +0 -1
  132. package/dist/metrics/llm/faithfulness/index.d.ts +0 -16
  133. package/dist/metrics/llm/faithfulness/index.d.ts.map +0 -1
  134. package/dist/metrics/llm/faithfulness/metricJudge.d.ts +0 -22
  135. package/dist/metrics/llm/faithfulness/metricJudge.d.ts.map +0 -1
  136. package/dist/metrics/llm/faithfulness/prompts.d.ts +0 -20
  137. package/dist/metrics/llm/faithfulness/prompts.d.ts.map +0 -1
  138. package/dist/metrics/llm/hallucination/index.d.ts +0 -16
  139. package/dist/metrics/llm/hallucination/index.d.ts.map +0 -1
  140. package/dist/metrics/llm/hallucination/metricJudge.d.ts +0 -22
  141. package/dist/metrics/llm/hallucination/metricJudge.d.ts.map +0 -1
  142. package/dist/metrics/llm/hallucination/prompts.d.ts +0 -17
  143. package/dist/metrics/llm/hallucination/prompts.d.ts.map +0 -1
  144. package/dist/metrics/llm/index.cjs +0 -2481
  145. package/dist/metrics/llm/index.cjs.map +0 -1
  146. package/dist/metrics/llm/index.d.ts +0 -12
  147. package/dist/metrics/llm/index.d.ts.map +0 -1
  148. package/dist/metrics/llm/index.js +0 -2469
  149. package/dist/metrics/llm/index.js.map +0 -1
  150. package/dist/metrics/llm/prompt-alignment/index.d.ts +0 -33
  151. package/dist/metrics/llm/prompt-alignment/index.d.ts.map +0 -1
  152. package/dist/metrics/llm/prompt-alignment/metricJudge.d.ts +0 -20
  153. package/dist/metrics/llm/prompt-alignment/metricJudge.d.ts.map +0 -1
  154. package/dist/metrics/llm/prompt-alignment/prompts.d.ts +0 -17
  155. package/dist/metrics/llm/prompt-alignment/prompts.d.ts.map +0 -1
  156. package/dist/metrics/llm/summarization/index.d.ts +0 -19
  157. package/dist/metrics/llm/summarization/index.d.ts.map +0 -1
  158. package/dist/metrics/llm/summarization/metricJudge.d.ts +0 -34
  159. package/dist/metrics/llm/summarization/metricJudge.d.ts.map +0 -1
  160. package/dist/metrics/llm/summarization/prompts.d.ts +0 -30
  161. package/dist/metrics/llm/summarization/prompts.d.ts.map +0 -1
  162. package/dist/metrics/llm/toxicity/index.d.ts +0 -14
  163. package/dist/metrics/llm/toxicity/index.d.ts.map +0 -1
  164. package/dist/metrics/llm/toxicity/metricJudge.d.ts +0 -14
  165. package/dist/metrics/llm/toxicity/metricJudge.d.ts.map +0 -1
  166. package/dist/metrics/llm/toxicity/prompts.d.ts +0 -10
  167. package/dist/metrics/llm/toxicity/prompts.d.ts.map +0 -1
  168. package/dist/metrics/llm/types.d.ts +0 -7
  169. package/dist/metrics/llm/types.d.ts.map +0 -1
  170. package/dist/metrics/llm/utils.d.ts +0 -14
  171. package/dist/metrics/llm/utils.d.ts.map +0 -1
  172. package/dist/metrics/nlp/completeness/index.d.ts +0 -21
  173. package/dist/metrics/nlp/completeness/index.d.ts.map +0 -1
  174. package/dist/metrics/nlp/content-similarity/index.d.ts +0 -18
  175. package/dist/metrics/nlp/content-similarity/index.d.ts.map +0 -1
  176. package/dist/metrics/nlp/index.cjs +0 -201
  177. package/dist/metrics/nlp/index.cjs.map +0 -1
  178. package/dist/metrics/nlp/index.d.ts +0 -6
  179. package/dist/metrics/nlp/index.d.ts.map +0 -1
  180. package/dist/metrics/nlp/index.js +0 -188
  181. package/dist/metrics/nlp/index.js.map +0 -1
  182. package/dist/metrics/nlp/keyword-coverage/index.d.ts +0 -13
  183. package/dist/metrics/nlp/keyword-coverage/index.d.ts.map +0 -1
  184. package/dist/metrics/nlp/textual-difference/index.d.ts +0 -15
  185. package/dist/metrics/nlp/textual-difference/index.d.ts.map +0 -1
  186. package/dist/metrics/nlp/tone/index.d.ts +0 -18
  187. package/dist/metrics/nlp/tone/index.d.ts.map +0 -1
  188. package/dist/ratio.d.ts +0 -13
  189. package/dist/ratio.d.ts.map +0 -1
  190. package/dist/scorers/code/index.cjs +0 -327
  191. package/dist/scorers/code/index.cjs.map +0 -1
  192. package/dist/scorers/code/index.js +0 -313
  193. package/dist/scorers/code/index.js.map +0 -1
  194. package/dist/scorers/llm/index.cjs.map +0 -1
  195. package/dist/scorers/llm/index.js.map +0 -1
@@ -1,8 +0,0 @@
1
- import type { Agent } from '@mastra/core/agent';
2
- import type { Metric } from '@mastra/core/eval';
3
- export declare function evaluate<T extends Agent>(agent: T, input: Parameters<T['generate']>[0], metric: Metric): Promise<import("@mastra/core").EvaluationResult>;
4
- export declare const getCurrentTestInfo: () => Promise<{
5
- testName: any;
6
- testPath: any;
7
- } | undefined>;
8
- //# sourceMappingURL=evaluation.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"evaluation.d.ts","sourceRoot":"","sources":["../src/evaluation.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,KAAK,EAAE,MAAM,oBAAoB,CAAC;AAEhD,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,mBAAmB,CAAC;AAIhD,wBAAsB,QAAQ,CAAC,CAAC,SAAS,KAAK,EAAE,KAAK,EAAE,CAAC,EAAE,KAAK,EAAE,UAAU,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,MAAM,oDAyB5G;AAED,eAAO,MAAM,kBAAkB;;;cAwB9B,CAAC"}
package/dist/index.cjs DELETED
@@ -1,93 +0,0 @@
1
- 'use strict';
2
-
3
- var _eval = require('@mastra/core/eval');
4
- var hooks = require('@mastra/core/hooks');
5
- var storage = require('@mastra/core/storage');
6
- var utils = require('@mastra/core/utils');
7
-
8
- // src/constants.ts
9
- var GLOBAL_RUN_ID_ENV_KEY = "_MASTRA_GLOBAL_RUN_ID_";
10
-
11
- // src/evaluation.ts
12
- async function evaluate(agent, input, metric) {
13
- const testInfo = await getCurrentTestInfo();
14
- let globalRunId = process.env[GLOBAL_RUN_ID_ENV_KEY];
15
- const runId = crypto.randomUUID();
16
- const agentOutput = await agent.generate(input, {
17
- runId
18
- });
19
- if (!globalRunId) {
20
- globalRunId = process.env[GLOBAL_RUN_ID_ENV_KEY] = crypto.randomUUID();
21
- console.warn('Global run id not set, you should run "globalSetup" from "@mastra/evals" before evaluating.');
22
- }
23
- const metricResult = await _eval.evaluate({
24
- agentName: agent.name,
25
- input,
26
- metric,
27
- output: agentOutput.text,
28
- globalRunId,
29
- runId,
30
- testInfo,
31
- instructions: agent.instructions
32
- });
33
- return metricResult;
34
- }
35
- var getCurrentTestInfo = async () => {
36
- if (typeof expect !== "undefined" && expect.getState) {
37
- const state = expect.getState();
38
- return {
39
- testName: state.currentTestName,
40
- testPath: state.testPath
41
- };
42
- }
43
- try {
44
- const vitest = await import('./dist-LDTK3TIP.cjs');
45
- if (typeof vitest !== "undefined" && vitest.expect?.getState) {
46
- const state = vitest.expect.getState();
47
- return {
48
- testName: state.currentTestName,
49
- testPath: state.testPath
50
- };
51
- }
52
- } catch {
53
- }
54
- return void 0;
55
- };
56
- async function attachListeners(mastra) {
57
- hooks.registerHook(hooks.AvailableHooks.ON_EVALUATION, async (traceObject) => {
58
- const storage$1 = mastra?.getStorage();
59
- if (storage$1) {
60
- const logger = mastra?.getLogger();
61
- const areFieldsValid = utils.checkEvalStorageFields(traceObject, logger);
62
- if (!areFieldsValid) return;
63
- await storage$1.insert({
64
- tableName: storage.TABLE_EVALS,
65
- record: {
66
- input: traceObject.input,
67
- output: traceObject.output,
68
- result: JSON.stringify(traceObject.result || {}),
69
- agent_name: traceObject.agentName,
70
- metric_name: traceObject.metricName,
71
- instructions: traceObject.instructions,
72
- test_info: traceObject.testInfo ? JSON.stringify(traceObject.testInfo) : null,
73
- global_run_id: traceObject.globalRunId,
74
- run_id: traceObject.runId,
75
- created_at: (/* @__PURE__ */ new Date()).toISOString()
76
- }
77
- });
78
- }
79
- });
80
- }
81
- async function globalSetup() {
82
- if (process.env[GLOBAL_RUN_ID_ENV_KEY]) {
83
- throw new Error('Global run id already set, you should only run "GlobalSetup" once');
84
- }
85
- const globalRunId = crypto.randomUUID();
86
- process.env[GLOBAL_RUN_ID_ENV_KEY] = globalRunId;
87
- }
88
-
89
- exports.attachListeners = attachListeners;
90
- exports.evaluate = evaluate;
91
- exports.globalSetup = globalSetup;
92
- //# sourceMappingURL=index.cjs.map
93
- //# sourceMappingURL=index.cjs.map
@@ -1 +0,0 @@
1
- {"version":3,"sources":["../src/constants.ts","../src/evaluation.ts","../src/attachListeners.ts"],"names":["coreEvaluate","registerHook","AvailableHooks","storage","checkEvalStorageFields","TABLE_EVALS"],"mappings":";;;;;;;;AAAO,IAAM,qBAAA,GAAwB,wBAAA;;;ACMrC,eAAsB,QAAA,CAA0B,KAAA,EAAU,KAAA,EAAqC,MAAA,EAAgB;AAC7G,EAAA,MAAM,QAAA,GAAW,MAAM,kBAAA,EAAmB;AAC1C,EAAA,IAAI,WAAA,GAAc,OAAA,CAAQ,GAAA,CAAI,qBAAqB,CAAA;AACnD,EAAA,MAAM,KAAA,GAAQ,OAAO,UAAA,EAAW;AAChC,EAAA,MAAM,WAAA,GAAc,MAAM,KAAA,CAAM,QAAA,CAAS,KAAA,EAAO;AAAA,IAC9C;AAAA,GACD,CAAA;AAED,EAAA,IAAI,CAAC,WAAA,EAAa;AAChB,IAAA,WAAA,GAAc,OAAA,CAAQ,GAAA,CAAI,qBAAqB,CAAA,GAAI,OAAO,UAAA,EAAW;AACrE,IAAA,OAAA,CAAQ,KAAK,6FAA6F,CAAA;AAAA,EAC5G;AAEA,EAAA,MAAM,YAAA,GAAe,MAAMA,cAAA,CAAa;AAAA,IACtC,WAAW,KAAA,CAAM,IAAA;AAAA,IACjB,KAAA;AAAA,IACA,MAAA;AAAA,IACA,QAAQ,WAAA,CAAY,IAAA;AAAA,IACpB,WAAA;AAAA,IACA,KAAA;AAAA,IACA,QAAA;AAAA,IACA,cAAc,KAAA,CAAM;AAAA,GACrB,CAAA;AAED,EAAA,OAAO,YAAA;AACT;AAEO,IAAM,qBAAqB,YAAY;AAG5C,EAAA,IAAI,OAAO,MAAA,KAAW,WAAA,IAAe,MAAA,CAAO,QAAA,EAAU;AAEpD,IAAA,MAAM,KAAA,GAAQ,OAAO,QAAA,EAAS;AAC9B,IAAA,OAAO;AAAA,MACL,UAAU,KAAA,CAAM,eAAA;AAAA,MAChB,UAAU,KAAA,CAAM;AAAA,KAClB;AAAA,EACF;AAEA,EAAA,IAAI;AACF,IAAA,MAAM,MAAA,GAAS,MAAM,OAAO,qBAAQ,CAAA;AACpC,IAAA,IAAI,OAAO,MAAA,KAAW,WAAA,IAAe,MAAA,CAAO,QAAQ,QAAA,EAAU;AAC5D,MAAA,MAAM,KAAA,GAAQ,MAAA,CAAO,MAAA,CAAO,QAAA,EAAS;AACrC,MAAA,OAAO;AAAA,QACL,UAAU,KAAA,CAAM,eAAA;AAAA,QAChB,UAAU,KAAA,CAAM;AAAA,OAClB;AAAA,IACF;AAAA,EACF,CAAA,CAAA,MAAQ;AAAA,EAAC;AAET,EAAA,OAAO,MAAA;AACT,CAAA;AClDA,eAAsB,gBAAgB,MAAA,EAAiB;AACrD,EAAAC,kBAAA,CAAaC,oBAAA,CAAe,aAAA,EAAe,OAAM,WAAA,KAAe;AAC9D,IAAA,MAAMC,SAAA,GAAU,QAAQ,UAAA,EAAW;AACnC,IAAA,IAAIA,SAAA,EAAS;AAEX,MAAA,MAAM,MAAA,GAAS,QAAQ,SAAA,EAAU;AACjC,MAAA,MAAM,cAAA,GAAiBC,4BAAA,CAAuB,WAAA,EAAa,MAAM,CAAA;AACjE,MAAA,IAAI,CAAC,cAAA,EAAgB;AAErB,MAAA,MAAMD,UAAQ,MAAA,CAAO;AAAA,QACnB,SAAA,EAAWE,mBAAA;AAAA,QACX,MAAA,EAAQ;AAAA,UACN,OAAO,WAAA,CAAY,KAAA;AAAA,UACnB,QAAQ,WAAA,CAAY,MAAA;AAAA,UACpB,QAAQ,IAAA,CAAK,SAAA,CAAU,WAAA,CAAY,MAAA,IAAU,EAAE,CAAA;AAAA,UAC/C,YAAY,WAAA,CAAY,SAAA;AAAA,UACxB,aAAa,WAAA,CAAY,UAAA;AAAA,UACzB,cAAc,WAAA,CAAY,YAAA;AAAA,UAC1B,WAAW,WAAA,CAAY,QAAA,GAAW,KAAK,SAAA,CAAU,WAAA,CAAY,QAAQ,CAAA,GAAI,IAAA;AAAA,UACzE,eAAe,WAAA,CAAY,WAAA;AAAA,UAC3B,QAAQ,WAAA,CAAY,KAAA;AAAA,UACpB,UAAA,EAAA,iBAAY,IAAI,IAAA,EAAK,EAAE,WAAA;AAAY;AACrC,OACD,CAAA;AAAA,IACH;AAAA,EACF,CAAC,CAAA;AACH;AAEA,eAAsB,WAAA,GAAc;AAClC,EAAA,IAAI,OAAA,CAAQ,GAAA,CAAI,qBAAqB,CAAA,EAAG;AACtC,IAAA,MAAM,IAAI,MAAM,mEAAmE,CAAA;AAAA,EACrF;AAEA,EAAA,MAAM,WAAA,GAAc,OAAO,UAAA,EAAW;AACtC,EAAA,OAAA,CAAQ,GAAA,CAAI,qBAAqB,CAAA,GAAI,WAAA;AACvC","file":"index.cjs","sourcesContent":["export const GLOBAL_RUN_ID_ENV_KEY = '_MASTRA_GLOBAL_RUN_ID_';\n","import type { Agent } from '@mastra/core/agent';\nimport { evaluate as coreEvaluate } from '@mastra/core/eval';\nimport type { Metric } from '@mastra/core/eval';\n\nimport { GLOBAL_RUN_ID_ENV_KEY } from './constants';\n\nexport async function evaluate<T extends Agent>(agent: T, input: Parameters<T['generate']>[0], metric: Metric) {\n const testInfo = await getCurrentTestInfo();\n let globalRunId = process.env[GLOBAL_RUN_ID_ENV_KEY];\n const runId = crypto.randomUUID();\n const agentOutput = await agent.generate(input, {\n runId,\n });\n\n if (!globalRunId) {\n globalRunId = process.env[GLOBAL_RUN_ID_ENV_KEY] = crypto.randomUUID();\n console.warn('Global run id not set, you should run \"globalSetup\" from \"@mastra/evals\" before evaluating.');\n }\n\n const metricResult = await coreEvaluate({\n agentName: agent.name,\n input,\n metric,\n output: agentOutput.text,\n globalRunId,\n runId,\n testInfo,\n instructions: agent.instructions,\n });\n\n return metricResult;\n}\n\nexport const getCurrentTestInfo = async () => {\n // Jest\n // @ts-ignore\n if (typeof expect !== 'undefined' && expect.getState) {\n // @ts-ignore\n const state = expect.getState();\n return {\n testName: state.currentTestName,\n testPath: state.testPath,\n };\n }\n\n try {\n const vitest = await import('vitest');\n if (typeof vitest !== 'undefined' && vitest.expect?.getState) {\n const state = vitest.expect.getState();\n return {\n testName: state.currentTestName,\n testPath: state.testPath,\n };\n }\n } catch {}\n\n return undefined;\n};\n","import type { Mastra } from '@mastra/core';\nimport { AvailableHooks, registerHook } from '@mastra/core/hooks';\nimport { TABLE_EVALS } from '@mastra/core/storage';\nimport { checkEvalStorageFields } from '@mastra/core/utils';\n\nimport { GLOBAL_RUN_ID_ENV_KEY } from './constants';\n\nexport async function attachListeners(mastra?: Mastra) {\n registerHook(AvailableHooks.ON_EVALUATION, async traceObject => {\n const storage = mastra?.getStorage();\n if (storage) {\n // Check for required fields\n const logger = mastra?.getLogger();\n const areFieldsValid = checkEvalStorageFields(traceObject, logger);\n if (!areFieldsValid) return;\n\n await storage.insert({\n tableName: TABLE_EVALS,\n record: {\n input: traceObject.input,\n output: traceObject.output,\n result: JSON.stringify(traceObject.result || {}),\n agent_name: traceObject.agentName,\n metric_name: traceObject.metricName,\n instructions: traceObject.instructions,\n test_info: traceObject.testInfo ? JSON.stringify(traceObject.testInfo) : null,\n global_run_id: traceObject.globalRunId,\n run_id: traceObject.runId,\n created_at: new Date().toISOString(),\n },\n });\n }\n });\n}\n\nexport async function globalSetup() {\n if (process.env[GLOBAL_RUN_ID_ENV_KEY]) {\n throw new Error('Global run id already set, you should only run \"GlobalSetup\" once');\n }\n\n const globalRunId = crypto.randomUUID();\n process.env[GLOBAL_RUN_ID_ENV_KEY] = globalRunId;\n}\n"]}
package/dist/index.d.ts DELETED
@@ -1,3 +0,0 @@
1
- export { evaluate } from './evaluation.js';
2
- export { attachListeners, globalSetup } from './attachListeners.js';
3
- //# sourceMappingURL=index.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,cAAc,CAAC;AACxC,OAAO,EAAE,eAAe,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC"}
package/dist/index.js DELETED
@@ -1,89 +0,0 @@
1
- import { evaluate as evaluate$1 } from '@mastra/core/eval';
2
- import { registerHook, AvailableHooks } from '@mastra/core/hooks';
3
- import { TABLE_EVALS } from '@mastra/core/storage';
4
- import { checkEvalStorageFields } from '@mastra/core/utils';
5
-
6
- // src/constants.ts
7
- var GLOBAL_RUN_ID_ENV_KEY = "_MASTRA_GLOBAL_RUN_ID_";
8
-
9
- // src/evaluation.ts
10
- async function evaluate(agent, input, metric) {
11
- const testInfo = await getCurrentTestInfo();
12
- let globalRunId = process.env[GLOBAL_RUN_ID_ENV_KEY];
13
- const runId = crypto.randomUUID();
14
- const agentOutput = await agent.generate(input, {
15
- runId
16
- });
17
- if (!globalRunId) {
18
- globalRunId = process.env[GLOBAL_RUN_ID_ENV_KEY] = crypto.randomUUID();
19
- console.warn('Global run id not set, you should run "globalSetup" from "@mastra/evals" before evaluating.');
20
- }
21
- const metricResult = await evaluate$1({
22
- agentName: agent.name,
23
- input,
24
- metric,
25
- output: agentOutput.text,
26
- globalRunId,
27
- runId,
28
- testInfo,
29
- instructions: agent.instructions
30
- });
31
- return metricResult;
32
- }
33
- var getCurrentTestInfo = async () => {
34
- if (typeof expect !== "undefined" && expect.getState) {
35
- const state = expect.getState();
36
- return {
37
- testName: state.currentTestName,
38
- testPath: state.testPath
39
- };
40
- }
41
- try {
42
- const vitest = await import('./dist-OWYZEOJK.js');
43
- if (typeof vitest !== "undefined" && vitest.expect?.getState) {
44
- const state = vitest.expect.getState();
45
- return {
46
- testName: state.currentTestName,
47
- testPath: state.testPath
48
- };
49
- }
50
- } catch {
51
- }
52
- return void 0;
53
- };
54
- async function attachListeners(mastra) {
55
- registerHook(AvailableHooks.ON_EVALUATION, async (traceObject) => {
56
- const storage = mastra?.getStorage();
57
- if (storage) {
58
- const logger = mastra?.getLogger();
59
- const areFieldsValid = checkEvalStorageFields(traceObject, logger);
60
- if (!areFieldsValid) return;
61
- await storage.insert({
62
- tableName: TABLE_EVALS,
63
- record: {
64
- input: traceObject.input,
65
- output: traceObject.output,
66
- result: JSON.stringify(traceObject.result || {}),
67
- agent_name: traceObject.agentName,
68
- metric_name: traceObject.metricName,
69
- instructions: traceObject.instructions,
70
- test_info: traceObject.testInfo ? JSON.stringify(traceObject.testInfo) : null,
71
- global_run_id: traceObject.globalRunId,
72
- run_id: traceObject.runId,
73
- created_at: (/* @__PURE__ */ new Date()).toISOString()
74
- }
75
- });
76
- }
77
- });
78
- }
79
- async function globalSetup() {
80
- if (process.env[GLOBAL_RUN_ID_ENV_KEY]) {
81
- throw new Error('Global run id already set, you should only run "GlobalSetup" once');
82
- }
83
- const globalRunId = crypto.randomUUID();
84
- process.env[GLOBAL_RUN_ID_ENV_KEY] = globalRunId;
85
- }
86
-
87
- export { attachListeners, evaluate, globalSetup };
88
- //# sourceMappingURL=index.js.map
89
- //# sourceMappingURL=index.js.map
package/dist/index.js.map DELETED
@@ -1 +0,0 @@
1
- {"version":3,"sources":["../src/constants.ts","../src/evaluation.ts","../src/attachListeners.ts"],"names":["coreEvaluate"],"mappings":";;;;;;AAAO,IAAM,qBAAA,GAAwB,wBAAA;;;ACMrC,eAAsB,QAAA,CAA0B,KAAA,EAAU,KAAA,EAAqC,MAAA,EAAgB;AAC7G,EAAA,MAAM,QAAA,GAAW,MAAM,kBAAA,EAAmB;AAC1C,EAAA,IAAI,WAAA,GAAc,OAAA,CAAQ,GAAA,CAAI,qBAAqB,CAAA;AACnD,EAAA,MAAM,KAAA,GAAQ,OAAO,UAAA,EAAW;AAChC,EAAA,MAAM,WAAA,GAAc,MAAM,KAAA,CAAM,QAAA,CAAS,KAAA,EAAO;AAAA,IAC9C;AAAA,GACD,CAAA;AAED,EAAA,IAAI,CAAC,WAAA,EAAa;AAChB,IAAA,WAAA,GAAc,OAAA,CAAQ,GAAA,CAAI,qBAAqB,CAAA,GAAI,OAAO,UAAA,EAAW;AACrE,IAAA,OAAA,CAAQ,KAAK,6FAA6F,CAAA;AAAA,EAC5G;AAEA,EAAA,MAAM,YAAA,GAAe,MAAMA,UAAA,CAAa;AAAA,IACtC,WAAW,KAAA,CAAM,IAAA;AAAA,IACjB,KAAA;AAAA,IACA,MAAA;AAAA,IACA,QAAQ,WAAA,CAAY,IAAA;AAAA,IACpB,WAAA;AAAA,IACA,KAAA;AAAA,IACA,QAAA;AAAA,IACA,cAAc,KAAA,CAAM;AAAA,GACrB,CAAA;AAED,EAAA,OAAO,YAAA;AACT;AAEO,IAAM,qBAAqB,YAAY;AAG5C,EAAA,IAAI,OAAO,MAAA,KAAW,WAAA,IAAe,MAAA,CAAO,QAAA,EAAU;AAEpD,IAAA,MAAM,KAAA,GAAQ,OAAO,QAAA,EAAS;AAC9B,IAAA,OAAO;AAAA,MACL,UAAU,KAAA,CAAM,eAAA;AAAA,MAChB,UAAU,KAAA,CAAM;AAAA,KAClB;AAAA,EACF;AAEA,EAAA,IAAI;AACF,IAAA,MAAM,MAAA,GAAS,MAAM,OAAO,oBAAQ,CAAA;AACpC,IAAA,IAAI,OAAO,MAAA,KAAW,WAAA,IAAe,MAAA,CAAO,QAAQ,QAAA,EAAU;AAC5D,MAAA,MAAM,KAAA,GAAQ,MAAA,CAAO,MAAA,CAAO,QAAA,EAAS;AACrC,MAAA,OAAO;AAAA,QACL,UAAU,KAAA,CAAM,eAAA;AAAA,QAChB,UAAU,KAAA,CAAM;AAAA,OAClB;AAAA,IACF;AAAA,EACF,CAAA,CAAA,MAAQ;AAAA,EAAC;AAET,EAAA,OAAO,MAAA;AACT,CAAA;AClDA,eAAsB,gBAAgB,MAAA,EAAiB;AACrD,EAAA,YAAA,CAAa,cAAA,CAAe,aAAA,EAAe,OAAM,WAAA,KAAe;AAC9D,IAAA,MAAM,OAAA,GAAU,QAAQ,UAAA,EAAW;AACnC,IAAA,IAAI,OAAA,EAAS;AAEX,MAAA,MAAM,MAAA,GAAS,QAAQ,SAAA,EAAU;AACjC,MAAA,MAAM,cAAA,GAAiB,sBAAA,CAAuB,WAAA,EAAa,MAAM,CAAA;AACjE,MAAA,IAAI,CAAC,cAAA,EAAgB;AAErB,MAAA,MAAM,QAAQ,MAAA,CAAO;AAAA,QACnB,SAAA,EAAW,WAAA;AAAA,QACX,MAAA,EAAQ;AAAA,UACN,OAAO,WAAA,CAAY,KAAA;AAAA,UACnB,QAAQ,WAAA,CAAY,MAAA;AAAA,UACpB,QAAQ,IAAA,CAAK,SAAA,CAAU,WAAA,CAAY,MAAA,IAAU,EAAE,CAAA;AAAA,UAC/C,YAAY,WAAA,CAAY,SAAA;AAAA,UACxB,aAAa,WAAA,CAAY,UAAA;AAAA,UACzB,cAAc,WAAA,CAAY,YAAA;AAAA,UAC1B,WAAW,WAAA,CAAY,QAAA,GAAW,KAAK,SAAA,CAAU,WAAA,CAAY,QAAQ,CAAA,GAAI,IAAA;AAAA,UACzE,eAAe,WAAA,CAAY,WAAA;AAAA,UAC3B,QAAQ,WAAA,CAAY,KAAA;AAAA,UACpB,UAAA,EAAA,iBAAY,IAAI,IAAA,EAAK,EAAE,WAAA;AAAY;AACrC,OACD,CAAA;AAAA,IACH;AAAA,EACF,CAAC,CAAA;AACH;AAEA,eAAsB,WAAA,GAAc;AAClC,EAAA,IAAI,OAAA,CAAQ,GAAA,CAAI,qBAAqB,CAAA,EAAG;AACtC,IAAA,MAAM,IAAI,MAAM,mEAAmE,CAAA;AAAA,EACrF;AAEA,EAAA,MAAM,WAAA,GAAc,OAAO,UAAA,EAAW;AACtC,EAAA,OAAA,CAAQ,GAAA,CAAI,qBAAqB,CAAA,GAAI,WAAA;AACvC","file":"index.js","sourcesContent":["export const GLOBAL_RUN_ID_ENV_KEY = '_MASTRA_GLOBAL_RUN_ID_';\n","import type { Agent } from '@mastra/core/agent';\nimport { evaluate as coreEvaluate } from '@mastra/core/eval';\nimport type { Metric } from '@mastra/core/eval';\n\nimport { GLOBAL_RUN_ID_ENV_KEY } from './constants';\n\nexport async function evaluate<T extends Agent>(agent: T, input: Parameters<T['generate']>[0], metric: Metric) {\n const testInfo = await getCurrentTestInfo();\n let globalRunId = process.env[GLOBAL_RUN_ID_ENV_KEY];\n const runId = crypto.randomUUID();\n const agentOutput = await agent.generate(input, {\n runId,\n });\n\n if (!globalRunId) {\n globalRunId = process.env[GLOBAL_RUN_ID_ENV_KEY] = crypto.randomUUID();\n console.warn('Global run id not set, you should run \"globalSetup\" from \"@mastra/evals\" before evaluating.');\n }\n\n const metricResult = await coreEvaluate({\n agentName: agent.name,\n input,\n metric,\n output: agentOutput.text,\n globalRunId,\n runId,\n testInfo,\n instructions: agent.instructions,\n });\n\n return metricResult;\n}\n\nexport const getCurrentTestInfo = async () => {\n // Jest\n // @ts-ignore\n if (typeof expect !== 'undefined' && expect.getState) {\n // @ts-ignore\n const state = expect.getState();\n return {\n testName: state.currentTestName,\n testPath: state.testPath,\n };\n }\n\n try {\n const vitest = await import('vitest');\n if (typeof vitest !== 'undefined' && vitest.expect?.getState) {\n const state = vitest.expect.getState();\n return {\n testName: state.currentTestName,\n testPath: state.testPath,\n };\n }\n } catch {}\n\n return undefined;\n};\n","import type { Mastra } from '@mastra/core';\nimport { AvailableHooks, registerHook } from '@mastra/core/hooks';\nimport { TABLE_EVALS } from '@mastra/core/storage';\nimport { checkEvalStorageFields } from '@mastra/core/utils';\n\nimport { GLOBAL_RUN_ID_ENV_KEY } from './constants';\n\nexport async function attachListeners(mastra?: Mastra) {\n registerHook(AvailableHooks.ON_EVALUATION, async traceObject => {\n const storage = mastra?.getStorage();\n if (storage) {\n // Check for required fields\n const logger = mastra?.getLogger();\n const areFieldsValid = checkEvalStorageFields(traceObject, logger);\n if (!areFieldsValid) return;\n\n await storage.insert({\n tableName: TABLE_EVALS,\n record: {\n input: traceObject.input,\n output: traceObject.output,\n result: JSON.stringify(traceObject.result || {}),\n agent_name: traceObject.agentName,\n metric_name: traceObject.metricName,\n instructions: traceObject.instructions,\n test_info: traceObject.testInfo ? JSON.stringify(traceObject.testInfo) : null,\n global_run_id: traceObject.globalRunId,\n run_id: traceObject.runId,\n created_at: new Date().toISOString(),\n },\n });\n }\n });\n}\n\nexport async function globalSetup() {\n if (process.env[GLOBAL_RUN_ID_ENV_KEY]) {\n throw new Error('Global run id already set, you should only run \"GlobalSetup\" once');\n }\n\n const globalRunId = crypto.randomUUID();\n process.env[GLOBAL_RUN_ID_ENV_KEY] = globalRunId;\n}\n"]}