@mastra/evals 0.14.3-alpha.0 → 1.0.0-beta.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. package/CHANGELOG.md +36 -9
  2. package/README.md +19 -159
  3. package/dist/{chunk-KHEXN75Q.js → chunk-CCLM7KPF.js} +45 -21
  4. package/dist/chunk-CCLM7KPF.js.map +1 -0
  5. package/dist/{chunk-QKR2PMLZ.cjs → chunk-TPQLLHZW.cjs} +46 -21
  6. package/dist/chunk-TPQLLHZW.cjs.map +1 -0
  7. package/dist/scorers/code/completeness/index.d.ts +1 -1
  8. package/dist/scorers/code/completeness/index.d.ts.map +1 -1
  9. package/dist/scorers/code/content-similarity/index.d.ts +1 -1
  10. package/dist/scorers/code/content-similarity/index.d.ts.map +1 -1
  11. package/dist/scorers/code/keyword-coverage/index.d.ts +1 -1
  12. package/dist/scorers/code/keyword-coverage/index.d.ts.map +1 -1
  13. package/dist/scorers/code/textual-difference/index.d.ts +1 -1
  14. package/dist/scorers/code/textual-difference/index.d.ts.map +1 -1
  15. package/dist/scorers/code/tone/index.d.ts +1 -1
  16. package/dist/scorers/code/tone/index.d.ts.map +1 -1
  17. package/dist/scorers/code/tool-call-accuracy/index.d.ts +1 -1
  18. package/dist/scorers/code/tool-call-accuracy/index.d.ts.map +1 -1
  19. package/dist/scorers/llm/answer-relevancy/index.d.ts +1 -1
  20. package/dist/scorers/llm/answer-relevancy/index.d.ts.map +1 -1
  21. package/dist/scorers/llm/answer-similarity/index.d.ts +2 -2
  22. package/dist/scorers/llm/answer-similarity/index.d.ts.map +1 -1
  23. package/dist/scorers/llm/bias/index.d.ts +2 -2
  24. package/dist/scorers/llm/bias/index.d.ts.map +1 -1
  25. package/dist/scorers/llm/context-precision/index.d.ts +3 -3
  26. package/dist/scorers/llm/context-precision/index.d.ts.map +1 -1
  27. package/dist/scorers/llm/context-relevance/index.d.ts +3 -3
  28. package/dist/scorers/llm/context-relevance/index.d.ts.map +1 -1
  29. package/dist/scorers/llm/faithfulness/index.d.ts +2 -2
  30. package/dist/scorers/llm/faithfulness/index.d.ts.map +1 -1
  31. package/dist/scorers/llm/hallucination/index.d.ts +2 -2
  32. package/dist/scorers/llm/hallucination/index.d.ts.map +1 -1
  33. package/dist/scorers/llm/noise-sensitivity/index.d.ts +1 -1
  34. package/dist/scorers/llm/noise-sensitivity/index.d.ts.map +1 -1
  35. package/dist/scorers/llm/prompt-alignment/index.d.ts +2 -2
  36. package/dist/scorers/llm/prompt-alignment/index.d.ts.map +1 -1
  37. package/dist/scorers/llm/tool-call-accuracy/index.d.ts +2 -2
  38. package/dist/scorers/llm/tool-call-accuracy/index.d.ts.map +1 -1
  39. package/dist/scorers/llm/toxicity/index.d.ts +2 -2
  40. package/dist/scorers/llm/toxicity/index.d.ts.map +1 -1
  41. package/dist/scorers/{llm → prebuilt}/index.cjs +479 -62
  42. package/dist/scorers/prebuilt/index.cjs.map +1 -0
  43. package/dist/scorers/prebuilt/index.d.ts +3 -0
  44. package/dist/scorers/prebuilt/index.d.ts.map +1 -0
  45. package/dist/scorers/{llm → prebuilt}/index.js +419 -15
  46. package/dist/scorers/prebuilt/index.js.map +1 -0
  47. package/dist/scorers/utils.cjs +21 -17
  48. package/dist/scorers/utils.d.ts +21 -11
  49. package/dist/scorers/utils.d.ts.map +1 -1
  50. package/dist/scorers/utils.js +1 -1
  51. package/package.json +12 -58
  52. package/dist/attachListeners.d.ts +0 -4
  53. package/dist/attachListeners.d.ts.map +0 -1
  54. package/dist/chunk-7QAUEU4L.cjs +0 -10
  55. package/dist/chunk-7QAUEU4L.cjs.map +0 -1
  56. package/dist/chunk-EMMSS5I5.cjs +0 -37
  57. package/dist/chunk-EMMSS5I5.cjs.map +0 -1
  58. package/dist/chunk-G3PMV62Z.js +0 -33
  59. package/dist/chunk-G3PMV62Z.js.map +0 -1
  60. package/dist/chunk-IUSAD2BW.cjs +0 -19
  61. package/dist/chunk-IUSAD2BW.cjs.map +0 -1
  62. package/dist/chunk-KHEXN75Q.js.map +0 -1
  63. package/dist/chunk-QKR2PMLZ.cjs.map +0 -1
  64. package/dist/chunk-QTWX6TKR.js +0 -8
  65. package/dist/chunk-QTWX6TKR.js.map +0 -1
  66. package/dist/chunk-YGTIO3J5.js +0 -17
  67. package/dist/chunk-YGTIO3J5.js.map +0 -1
  68. package/dist/dist-LDTK3TIP.cjs +0 -16759
  69. package/dist/dist-LDTK3TIP.cjs.map +0 -1
  70. package/dist/dist-OWYZEOJK.js +0 -16737
  71. package/dist/dist-OWYZEOJK.js.map +0 -1
  72. package/dist/evaluation.d.ts +0 -8
  73. package/dist/evaluation.d.ts.map +0 -1
  74. package/dist/index.cjs +0 -93
  75. package/dist/index.cjs.map +0 -1
  76. package/dist/index.d.ts +0 -3
  77. package/dist/index.d.ts.map +0 -1
  78. package/dist/index.js +0 -89
  79. package/dist/index.js.map +0 -1
  80. package/dist/magic-string.es-7ORA5OGR.js +0 -1305
  81. package/dist/magic-string.es-7ORA5OGR.js.map +0 -1
  82. package/dist/magic-string.es-NZ2XWFKN.cjs +0 -1311
  83. package/dist/magic-string.es-NZ2XWFKN.cjs.map +0 -1
  84. package/dist/metrics/index.d.ts +0 -4
  85. package/dist/metrics/index.d.ts.map +0 -1
  86. package/dist/metrics/judge/index.cjs +0 -12
  87. package/dist/metrics/judge/index.cjs.map +0 -1
  88. package/dist/metrics/judge/index.d.ts +0 -7
  89. package/dist/metrics/judge/index.d.ts.map +0 -1
  90. package/dist/metrics/judge/index.js +0 -3
  91. package/dist/metrics/judge/index.js.map +0 -1
  92. package/dist/metrics/llm/answer-relevancy/index.d.ts +0 -16
  93. package/dist/metrics/llm/answer-relevancy/index.d.ts.map +0 -1
  94. package/dist/metrics/llm/answer-relevancy/metricJudge.d.ts +0 -20
  95. package/dist/metrics/llm/answer-relevancy/metricJudge.d.ts.map +0 -1
  96. package/dist/metrics/llm/answer-relevancy/prompts.d.ts +0 -19
  97. package/dist/metrics/llm/answer-relevancy/prompts.d.ts.map +0 -1
  98. package/dist/metrics/llm/bias/index.d.ts +0 -14
  99. package/dist/metrics/llm/bias/index.d.ts.map +0 -1
  100. package/dist/metrics/llm/bias/metricJudge.d.ts +0 -14
  101. package/dist/metrics/llm/bias/metricJudge.d.ts.map +0 -1
  102. package/dist/metrics/llm/bias/prompts.d.ts +0 -14
  103. package/dist/metrics/llm/bias/prompts.d.ts.map +0 -1
  104. package/dist/metrics/llm/context-position/index.d.ts +0 -16
  105. package/dist/metrics/llm/context-position/index.d.ts.map +0 -1
  106. package/dist/metrics/llm/context-position/metricJudge.d.ts +0 -20
  107. package/dist/metrics/llm/context-position/metricJudge.d.ts.map +0 -1
  108. package/dist/metrics/llm/context-position/prompts.d.ts +0 -17
  109. package/dist/metrics/llm/context-position/prompts.d.ts.map +0 -1
  110. package/dist/metrics/llm/context-precision/index.d.ts +0 -16
  111. package/dist/metrics/llm/context-precision/index.d.ts.map +0 -1
  112. package/dist/metrics/llm/context-precision/metricJudge.d.ts +0 -20
  113. package/dist/metrics/llm/context-precision/metricJudge.d.ts.map +0 -1
  114. package/dist/metrics/llm/context-precision/prompts.d.ts +0 -17
  115. package/dist/metrics/llm/context-precision/prompts.d.ts.map +0 -1
  116. package/dist/metrics/llm/context-relevancy/index.d.ts +0 -16
  117. package/dist/metrics/llm/context-relevancy/index.d.ts.map +0 -1
  118. package/dist/metrics/llm/context-relevancy/metricJudge.d.ts +0 -16
  119. package/dist/metrics/llm/context-relevancy/metricJudge.d.ts.map +0 -1
  120. package/dist/metrics/llm/context-relevancy/prompts.d.ts +0 -13
  121. package/dist/metrics/llm/context-relevancy/prompts.d.ts.map +0 -1
  122. package/dist/metrics/llm/contextual-recall/index.d.ts +0 -16
  123. package/dist/metrics/llm/contextual-recall/index.d.ts.map +0 -1
  124. package/dist/metrics/llm/contextual-recall/metricJudge.d.ts +0 -16
  125. package/dist/metrics/llm/contextual-recall/metricJudge.d.ts.map +0 -1
  126. package/dist/metrics/llm/contextual-recall/prompts.d.ts +0 -13
  127. package/dist/metrics/llm/contextual-recall/prompts.d.ts.map +0 -1
  128. package/dist/metrics/llm/faithfulness/index.d.ts +0 -16
  129. package/dist/metrics/llm/faithfulness/index.d.ts.map +0 -1
  130. package/dist/metrics/llm/faithfulness/metricJudge.d.ts +0 -22
  131. package/dist/metrics/llm/faithfulness/metricJudge.d.ts.map +0 -1
  132. package/dist/metrics/llm/faithfulness/prompts.d.ts +0 -20
  133. package/dist/metrics/llm/faithfulness/prompts.d.ts.map +0 -1
  134. package/dist/metrics/llm/hallucination/index.d.ts +0 -16
  135. package/dist/metrics/llm/hallucination/index.d.ts.map +0 -1
  136. package/dist/metrics/llm/hallucination/metricJudge.d.ts +0 -22
  137. package/dist/metrics/llm/hallucination/metricJudge.d.ts.map +0 -1
  138. package/dist/metrics/llm/hallucination/prompts.d.ts +0 -17
  139. package/dist/metrics/llm/hallucination/prompts.d.ts.map +0 -1
  140. package/dist/metrics/llm/index.cjs +0 -2481
  141. package/dist/metrics/llm/index.cjs.map +0 -1
  142. package/dist/metrics/llm/index.d.ts +0 -12
  143. package/dist/metrics/llm/index.d.ts.map +0 -1
  144. package/dist/metrics/llm/index.js +0 -2469
  145. package/dist/metrics/llm/index.js.map +0 -1
  146. package/dist/metrics/llm/prompt-alignment/index.d.ts +0 -33
  147. package/dist/metrics/llm/prompt-alignment/index.d.ts.map +0 -1
  148. package/dist/metrics/llm/prompt-alignment/metricJudge.d.ts +0 -20
  149. package/dist/metrics/llm/prompt-alignment/metricJudge.d.ts.map +0 -1
  150. package/dist/metrics/llm/prompt-alignment/prompts.d.ts +0 -17
  151. package/dist/metrics/llm/prompt-alignment/prompts.d.ts.map +0 -1
  152. package/dist/metrics/llm/summarization/index.d.ts +0 -19
  153. package/dist/metrics/llm/summarization/index.d.ts.map +0 -1
  154. package/dist/metrics/llm/summarization/metricJudge.d.ts +0 -34
  155. package/dist/metrics/llm/summarization/metricJudge.d.ts.map +0 -1
  156. package/dist/metrics/llm/summarization/prompts.d.ts +0 -30
  157. package/dist/metrics/llm/summarization/prompts.d.ts.map +0 -1
  158. package/dist/metrics/llm/toxicity/index.d.ts +0 -14
  159. package/dist/metrics/llm/toxicity/index.d.ts.map +0 -1
  160. package/dist/metrics/llm/toxicity/metricJudge.d.ts +0 -14
  161. package/dist/metrics/llm/toxicity/metricJudge.d.ts.map +0 -1
  162. package/dist/metrics/llm/toxicity/prompts.d.ts +0 -10
  163. package/dist/metrics/llm/toxicity/prompts.d.ts.map +0 -1
  164. package/dist/metrics/llm/types.d.ts +0 -7
  165. package/dist/metrics/llm/types.d.ts.map +0 -1
  166. package/dist/metrics/llm/utils.d.ts +0 -14
  167. package/dist/metrics/llm/utils.d.ts.map +0 -1
  168. package/dist/metrics/nlp/completeness/index.d.ts +0 -21
  169. package/dist/metrics/nlp/completeness/index.d.ts.map +0 -1
  170. package/dist/metrics/nlp/content-similarity/index.d.ts +0 -18
  171. package/dist/metrics/nlp/content-similarity/index.d.ts.map +0 -1
  172. package/dist/metrics/nlp/index.cjs +0 -203
  173. package/dist/metrics/nlp/index.cjs.map +0 -1
  174. package/dist/metrics/nlp/index.d.ts +0 -6
  175. package/dist/metrics/nlp/index.d.ts.map +0 -1
  176. package/dist/metrics/nlp/index.js +0 -190
  177. package/dist/metrics/nlp/index.js.map +0 -1
  178. package/dist/metrics/nlp/keyword-coverage/index.d.ts +0 -13
  179. package/dist/metrics/nlp/keyword-coverage/index.d.ts.map +0 -1
  180. package/dist/metrics/nlp/textual-difference/index.d.ts +0 -15
  181. package/dist/metrics/nlp/textual-difference/index.d.ts.map +0 -1
  182. package/dist/metrics/nlp/tone/index.d.ts +0 -18
  183. package/dist/metrics/nlp/tone/index.d.ts.map +0 -1
  184. package/dist/scorers/code/index.cjs +0 -329
  185. package/dist/scorers/code/index.cjs.map +0 -1
  186. package/dist/scorers/code/index.js +0 -315
  187. package/dist/scorers/code/index.js.map +0 -1
  188. package/dist/scorers/llm/index.cjs.map +0 -1
  189. package/dist/scorers/llm/index.js.map +0 -1
@@ -1,8 +0,0 @@
1
- import type { Agent } from '@mastra/core/agent';
2
- import type { Metric } from '@mastra/core/eval';
3
- export declare function evaluate<T extends Agent>(agent: T, input: Parameters<T['generate']>[0], metric: Metric): Promise<import("@mastra/core").EvaluationResult>;
4
- export declare const getCurrentTestInfo: () => Promise<{
5
- testName: any;
6
- testPath: any;
7
- } | undefined>;
8
- //# sourceMappingURL=evaluation.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"evaluation.d.ts","sourceRoot":"","sources":["../src/evaluation.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,KAAK,EAAE,MAAM,oBAAoB,CAAC;AAEhD,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,mBAAmB,CAAC;AAIhD,wBAAsB,QAAQ,CAAC,CAAC,SAAS,KAAK,EAAE,KAAK,EAAE,CAAC,EAAE,KAAK,EAAE,UAAU,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,MAAM,oDAyB5G;AAED,eAAO,MAAM,kBAAkB;;;cAwB9B,CAAC"}
package/dist/index.cjs DELETED
@@ -1,93 +0,0 @@
1
- 'use strict';
2
-
3
- var _eval = require('@mastra/core/eval');
4
- var hooks = require('@mastra/core/hooks');
5
- var storage = require('@mastra/core/storage');
6
- var utils = require('@mastra/core/utils');
7
-
8
- // src/constants.ts
9
- var GLOBAL_RUN_ID_ENV_KEY = "_MASTRA_GLOBAL_RUN_ID_";
10
-
11
- // src/evaluation.ts
12
- async function evaluate(agent, input, metric) {
13
- const testInfo = await getCurrentTestInfo();
14
- let globalRunId = process.env[GLOBAL_RUN_ID_ENV_KEY];
15
- const runId = crypto.randomUUID();
16
- const agentOutput = await agent.generate(input, {
17
- runId
18
- });
19
- if (!globalRunId) {
20
- globalRunId = process.env[GLOBAL_RUN_ID_ENV_KEY] = crypto.randomUUID();
21
- console.warn('Global run id not set, you should run "globalSetup" from "@mastra/evals" before evaluating.');
22
- }
23
- const metricResult = await _eval.evaluate({
24
- agentName: agent.name,
25
- input,
26
- metric,
27
- output: agentOutput.text,
28
- globalRunId,
29
- runId,
30
- testInfo,
31
- instructions: agent.instructions
32
- });
33
- return metricResult;
34
- }
35
- var getCurrentTestInfo = async () => {
36
- if (typeof expect !== "undefined" && expect.getState) {
37
- const state = expect.getState();
38
- return {
39
- testName: state.currentTestName,
40
- testPath: state.testPath
41
- };
42
- }
43
- try {
44
- const vitest = await import('./dist-LDTK3TIP.cjs');
45
- if (typeof vitest !== "undefined" && vitest.expect?.getState) {
46
- const state = vitest.expect.getState();
47
- return {
48
- testName: state.currentTestName,
49
- testPath: state.testPath
50
- };
51
- }
52
- } catch {
53
- }
54
- return void 0;
55
- };
56
- async function attachListeners(mastra) {
57
- hooks.registerHook(hooks.AvailableHooks.ON_EVALUATION, async (traceObject) => {
58
- const storage$1 = mastra?.getStorage();
59
- if (storage$1) {
60
- const logger = mastra?.getLogger();
61
- const areFieldsValid = utils.checkEvalStorageFields(traceObject, logger);
62
- if (!areFieldsValid) return;
63
- await storage$1.insert({
64
- tableName: storage.TABLE_EVALS,
65
- record: {
66
- input: traceObject.input,
67
- output: traceObject.output,
68
- result: JSON.stringify(traceObject.result || {}),
69
- agent_name: traceObject.agentName,
70
- metric_name: traceObject.metricName,
71
- instructions: traceObject.instructions,
72
- test_info: traceObject.testInfo ? JSON.stringify(traceObject.testInfo) : null,
73
- global_run_id: traceObject.globalRunId,
74
- run_id: traceObject.runId,
75
- created_at: (/* @__PURE__ */ new Date()).toISOString()
76
- }
77
- });
78
- }
79
- });
80
- }
81
- async function globalSetup() {
82
- if (process.env[GLOBAL_RUN_ID_ENV_KEY]) {
83
- throw new Error('Global run id already set, you should only run "GlobalSetup" once');
84
- }
85
- const globalRunId = crypto.randomUUID();
86
- process.env[GLOBAL_RUN_ID_ENV_KEY] = globalRunId;
87
- }
88
-
89
- exports.attachListeners = attachListeners;
90
- exports.evaluate = evaluate;
91
- exports.globalSetup = globalSetup;
92
- //# sourceMappingURL=index.cjs.map
93
- //# sourceMappingURL=index.cjs.map
@@ -1 +0,0 @@
1
- {"version":3,"sources":["../src/constants.ts","../src/evaluation.ts","../src/attachListeners.ts"],"names":["coreEvaluate","registerHook","AvailableHooks","storage","checkEvalStorageFields","TABLE_EVALS"],"mappings":";;;;;;;;AAAO,IAAM,qBAAA,GAAwB,wBAAA;;;ACMrC,eAAsB,QAAA,CAA0B,KAAA,EAAU,KAAA,EAAqC,MAAA,EAAgB;AAC7G,EAAA,MAAM,QAAA,GAAW,MAAM,kBAAA,EAAmB;AAC1C,EAAA,IAAI,WAAA,GAAc,OAAA,CAAQ,GAAA,CAAI,qBAAqB,CAAA;AACnD,EAAA,MAAM,KAAA,GAAQ,OAAO,UAAA,EAAW;AAChC,EAAA,MAAM,WAAA,GAAc,MAAM,KAAA,CAAM,QAAA,CAAS,KAAA,EAAO;AAAA,IAC9C;AAAA,GACD,CAAA;AAED,EAAA,IAAI,CAAC,WAAA,EAAa;AAChB,IAAA,WAAA,GAAc,OAAA,CAAQ,GAAA,CAAI,qBAAqB,CAAA,GAAI,OAAO,UAAA,EAAW;AACrE,IAAA,OAAA,CAAQ,KAAK,6FAA6F,CAAA;AAAA,EAC5G;AAEA,EAAA,MAAM,YAAA,GAAe,MAAMA,cAAA,CAAa;AAAA,IACtC,WAAW,KAAA,CAAM,IAAA;AAAA,IACjB,KAAA;AAAA,IACA,MAAA;AAAA,IACA,QAAQ,WAAA,CAAY,IAAA;AAAA,IACpB,WAAA;AAAA,IACA,KAAA;AAAA,IACA,QAAA;AAAA,IACA,cAAc,KAAA,CAAM;AAAA,GACrB,CAAA;AAED,EAAA,OAAO,YAAA;AACT;AAEO,IAAM,qBAAqB,YAAY;AAG5C,EAAA,IAAI,OAAO,MAAA,KAAW,WAAA,IAAe,MAAA,CAAO,QAAA,EAAU;AAEpD,IAAA,MAAM,KAAA,GAAQ,OAAO,QAAA,EAAS;AAC9B,IAAA,OAAO;AAAA,MACL,UAAU,KAAA,CAAM,eAAA;AAAA,MAChB,UAAU,KAAA,CAAM;AAAA,KAClB;AAAA,EACF;AAEA,EAAA,IAAI;AACF,IAAA,MAAM,MAAA,GAAS,MAAM,OAAO,qBAAQ,CAAA;AACpC,IAAA,IAAI,OAAO,MAAA,KAAW,WAAA,IAAe,MAAA,CAAO,QAAQ,QAAA,EAAU;AAC5D,MAAA,MAAM,KAAA,GAAQ,MAAA,CAAO,MAAA,CAAO,QAAA,EAAS;AACrC,MAAA,OAAO;AAAA,QACL,UAAU,KAAA,CAAM,eAAA;AAAA,QAChB,UAAU,KAAA,CAAM;AAAA,OAClB;AAAA,IACF;AAAA,EACF,CAAA,CAAA,MAAQ;AAAA,EAAC;AAET,EAAA,OAAO,MAAA;AACT,CAAA;AClDA,eAAsB,gBAAgB,MAAA,EAAiB;AACrD,EAAAC,kBAAA,CAAaC,oBAAA,CAAe,aAAA,EAAe,OAAM,WAAA,KAAe;AAC9D,IAAA,MAAMC,SAAA,GAAU,QAAQ,UAAA,EAAW;AACnC,IAAA,IAAIA,SAAA,EAAS;AAEX,MAAA,MAAM,MAAA,GAAS,QAAQ,SAAA,EAAU;AACjC,MAAA,MAAM,cAAA,GAAiBC,4BAAA,CAAuB,WAAA,EAAa,MAAM,CAAA;AACjE,MAAA,IAAI,CAAC,cAAA,EAAgB;AAErB,MAAA,MAAMD,UAAQ,MAAA,CAAO;AAAA,QACnB,SAAA,EAAWE,mBAAA;AAAA,QACX,MAAA,EAAQ;AAAA,UACN,OAAO,WAAA,CAAY,KAAA;AAAA,UACnB,QAAQ,WAAA,CAAY,MAAA;AAAA,UACpB,QAAQ,IAAA,CAAK,SAAA,CAAU,WAAA,CAAY,MAAA,IAAU,EAAE,CAAA;AAAA,UAC/C,YAAY,WAAA,CAAY,SAAA;AAAA,UACxB,aAAa,WAAA,CAAY,UAAA;AAAA,UACzB,cAAc,WAAA,CAAY,YAAA;AAAA,UAC1B,WAAW,WAAA,CAAY,QAAA,GAAW,KAAK,SAAA,CAAU,WAAA,CAAY,QAAQ,CAAA,GAAI,IAAA;AAAA,UACzE,eAAe,WAAA,CAAY,WAAA;AAAA,UAC3B,QAAQ,WAAA,CAAY,KAAA;AAAA,UACpB,UAAA,EAAA,iBAAY,IAAI,IAAA,EAAK,EAAE,WAAA;AAAY;AACrC,OACD,CAAA;AAAA,IACH;AAAA,EACF,CAAC,CAAA;AACH;AAEA,eAAsB,WAAA,GAAc;AAClC,EAAA,IAAI,OAAA,CAAQ,GAAA,CAAI,qBAAqB,CAAA,EAAG;AACtC,IAAA,MAAM,IAAI,MAAM,mEAAmE,CAAA;AAAA,EACrF;AAEA,EAAA,MAAM,WAAA,GAAc,OAAO,UAAA,EAAW;AACtC,EAAA,OAAA,CAAQ,GAAA,CAAI,qBAAqB,CAAA,GAAI,WAAA;AACvC","file":"index.cjs","sourcesContent":["export const GLOBAL_RUN_ID_ENV_KEY = '_MASTRA_GLOBAL_RUN_ID_';\n","import type { Agent } from '@mastra/core/agent';\nimport { evaluate as coreEvaluate } from '@mastra/core/eval';\nimport type { Metric } from '@mastra/core/eval';\n\nimport { GLOBAL_RUN_ID_ENV_KEY } from './constants';\n\nexport async function evaluate<T extends Agent>(agent: T, input: Parameters<T['generate']>[0], metric: Metric) {\n const testInfo = await getCurrentTestInfo();\n let globalRunId = process.env[GLOBAL_RUN_ID_ENV_KEY];\n const runId = crypto.randomUUID();\n const agentOutput = await agent.generate(input, {\n runId,\n });\n\n if (!globalRunId) {\n globalRunId = process.env[GLOBAL_RUN_ID_ENV_KEY] = crypto.randomUUID();\n console.warn('Global run id not set, you should run \"globalSetup\" from \"@mastra/evals\" before evaluating.');\n }\n\n const metricResult = await coreEvaluate({\n agentName: agent.name,\n input,\n metric,\n output: agentOutput.text,\n globalRunId,\n runId,\n testInfo,\n instructions: agent.instructions,\n });\n\n return metricResult;\n}\n\nexport const getCurrentTestInfo = async () => {\n // Jest\n // @ts-ignore\n if (typeof expect !== 'undefined' && expect.getState) {\n // @ts-ignore\n const state = expect.getState();\n return {\n testName: state.currentTestName,\n testPath: state.testPath,\n };\n }\n\n try {\n const vitest = await import('vitest');\n if (typeof vitest !== 'undefined' && vitest.expect?.getState) {\n const state = vitest.expect.getState();\n return {\n testName: state.currentTestName,\n testPath: state.testPath,\n };\n }\n } catch {}\n\n return undefined;\n};\n","import type { Mastra } from '@mastra/core';\nimport { AvailableHooks, registerHook } from '@mastra/core/hooks';\nimport { TABLE_EVALS } from '@mastra/core/storage';\nimport { checkEvalStorageFields } from '@mastra/core/utils';\n\nimport { GLOBAL_RUN_ID_ENV_KEY } from './constants';\n\nexport async function attachListeners(mastra?: Mastra) {\n registerHook(AvailableHooks.ON_EVALUATION, async traceObject => {\n const storage = mastra?.getStorage();\n if (storage) {\n // Check for required fields\n const logger = mastra?.getLogger();\n const areFieldsValid = checkEvalStorageFields(traceObject, logger);\n if (!areFieldsValid) return;\n\n await storage.insert({\n tableName: TABLE_EVALS,\n record: {\n input: traceObject.input,\n output: traceObject.output,\n result: JSON.stringify(traceObject.result || {}),\n agent_name: traceObject.agentName,\n metric_name: traceObject.metricName,\n instructions: traceObject.instructions,\n test_info: traceObject.testInfo ? JSON.stringify(traceObject.testInfo) : null,\n global_run_id: traceObject.globalRunId,\n run_id: traceObject.runId,\n created_at: new Date().toISOString(),\n },\n });\n }\n });\n}\n\nexport async function globalSetup() {\n if (process.env[GLOBAL_RUN_ID_ENV_KEY]) {\n throw new Error('Global run id already set, you should only run \"GlobalSetup\" once');\n }\n\n const globalRunId = crypto.randomUUID();\n process.env[GLOBAL_RUN_ID_ENV_KEY] = globalRunId;\n}\n"]}
package/dist/index.d.ts DELETED
@@ -1,3 +0,0 @@
1
- export { evaluate } from './evaluation.js';
2
- export { attachListeners, globalSetup } from './attachListeners.js';
3
- //# sourceMappingURL=index.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,cAAc,CAAC;AACxC,OAAO,EAAE,eAAe,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC"}
package/dist/index.js DELETED
@@ -1,89 +0,0 @@
1
- import { evaluate as evaluate$1 } from '@mastra/core/eval';
2
- import { registerHook, AvailableHooks } from '@mastra/core/hooks';
3
- import { TABLE_EVALS } from '@mastra/core/storage';
4
- import { checkEvalStorageFields } from '@mastra/core/utils';
5
-
6
- // src/constants.ts
7
- var GLOBAL_RUN_ID_ENV_KEY = "_MASTRA_GLOBAL_RUN_ID_";
8
-
9
- // src/evaluation.ts
10
- async function evaluate(agent, input, metric) {
11
- const testInfo = await getCurrentTestInfo();
12
- let globalRunId = process.env[GLOBAL_RUN_ID_ENV_KEY];
13
- const runId = crypto.randomUUID();
14
- const agentOutput = await agent.generate(input, {
15
- runId
16
- });
17
- if (!globalRunId) {
18
- globalRunId = process.env[GLOBAL_RUN_ID_ENV_KEY] = crypto.randomUUID();
19
- console.warn('Global run id not set, you should run "globalSetup" from "@mastra/evals" before evaluating.');
20
- }
21
- const metricResult = await evaluate$1({
22
- agentName: agent.name,
23
- input,
24
- metric,
25
- output: agentOutput.text,
26
- globalRunId,
27
- runId,
28
- testInfo,
29
- instructions: agent.instructions
30
- });
31
- return metricResult;
32
- }
33
- var getCurrentTestInfo = async () => {
34
- if (typeof expect !== "undefined" && expect.getState) {
35
- const state = expect.getState();
36
- return {
37
- testName: state.currentTestName,
38
- testPath: state.testPath
39
- };
40
- }
41
- try {
42
- const vitest = await import('./dist-OWYZEOJK.js');
43
- if (typeof vitest !== "undefined" && vitest.expect?.getState) {
44
- const state = vitest.expect.getState();
45
- return {
46
- testName: state.currentTestName,
47
- testPath: state.testPath
48
- };
49
- }
50
- } catch {
51
- }
52
- return void 0;
53
- };
54
- async function attachListeners(mastra) {
55
- registerHook(AvailableHooks.ON_EVALUATION, async (traceObject) => {
56
- const storage = mastra?.getStorage();
57
- if (storage) {
58
- const logger = mastra?.getLogger();
59
- const areFieldsValid = checkEvalStorageFields(traceObject, logger);
60
- if (!areFieldsValid) return;
61
- await storage.insert({
62
- tableName: TABLE_EVALS,
63
- record: {
64
- input: traceObject.input,
65
- output: traceObject.output,
66
- result: JSON.stringify(traceObject.result || {}),
67
- agent_name: traceObject.agentName,
68
- metric_name: traceObject.metricName,
69
- instructions: traceObject.instructions,
70
- test_info: traceObject.testInfo ? JSON.stringify(traceObject.testInfo) : null,
71
- global_run_id: traceObject.globalRunId,
72
- run_id: traceObject.runId,
73
- created_at: (/* @__PURE__ */ new Date()).toISOString()
74
- }
75
- });
76
- }
77
- });
78
- }
79
- async function globalSetup() {
80
- if (process.env[GLOBAL_RUN_ID_ENV_KEY]) {
81
- throw new Error('Global run id already set, you should only run "GlobalSetup" once');
82
- }
83
- const globalRunId = crypto.randomUUID();
84
- process.env[GLOBAL_RUN_ID_ENV_KEY] = globalRunId;
85
- }
86
-
87
- export { attachListeners, evaluate, globalSetup };
88
- //# sourceMappingURL=index.js.map
89
- //# sourceMappingURL=index.js.map
package/dist/index.js.map DELETED
@@ -1 +0,0 @@
1
- {"version":3,"sources":["../src/constants.ts","../src/evaluation.ts","../src/attachListeners.ts"],"names":["coreEvaluate"],"mappings":";;;;;;AAAO,IAAM,qBAAA,GAAwB,wBAAA;;;ACMrC,eAAsB,QAAA,CAA0B,KAAA,EAAU,KAAA,EAAqC,MAAA,EAAgB;AAC7G,EAAA,MAAM,QAAA,GAAW,MAAM,kBAAA,EAAmB;AAC1C,EAAA,IAAI,WAAA,GAAc,OAAA,CAAQ,GAAA,CAAI,qBAAqB,CAAA;AACnD,EAAA,MAAM,KAAA,GAAQ,OAAO,UAAA,EAAW;AAChC,EAAA,MAAM,WAAA,GAAc,MAAM,KAAA,CAAM,QAAA,CAAS,KAAA,EAAO;AAAA,IAC9C;AAAA,GACD,CAAA;AAED,EAAA,IAAI,CAAC,WAAA,EAAa;AAChB,IAAA,WAAA,GAAc,OAAA,CAAQ,GAAA,CAAI,qBAAqB,CAAA,GAAI,OAAO,UAAA,EAAW;AACrE,IAAA,OAAA,CAAQ,KAAK,6FAA6F,CAAA;AAAA,EAC5G;AAEA,EAAA,MAAM,YAAA,GAAe,MAAMA,UAAA,CAAa;AAAA,IACtC,WAAW,KAAA,CAAM,IAAA;AAAA,IACjB,KAAA;AAAA,IACA,MAAA;AAAA,IACA,QAAQ,WAAA,CAAY,IAAA;AAAA,IACpB,WAAA;AAAA,IACA,KAAA;AAAA,IACA,QAAA;AAAA,IACA,cAAc,KAAA,CAAM;AAAA,GACrB,CAAA;AAED,EAAA,OAAO,YAAA;AACT;AAEO,IAAM,qBAAqB,YAAY;AAG5C,EAAA,IAAI,OAAO,MAAA,KAAW,WAAA,IAAe,MAAA,CAAO,QAAA,EAAU;AAEpD,IAAA,MAAM,KAAA,GAAQ,OAAO,QAAA,EAAS;AAC9B,IAAA,OAAO;AAAA,MACL,UAAU,KAAA,CAAM,eAAA;AAAA,MAChB,UAAU,KAAA,CAAM;AAAA,KAClB;AAAA,EACF;AAEA,EAAA,IAAI;AACF,IAAA,MAAM,MAAA,GAAS,MAAM,OAAO,oBAAQ,CAAA;AACpC,IAAA,IAAI,OAAO,MAAA,KAAW,WAAA,IAAe,MAAA,CAAO,QAAQ,QAAA,EAAU;AAC5D,MAAA,MAAM,KAAA,GAAQ,MAAA,CAAO,MAAA,CAAO,QAAA,EAAS;AACrC,MAAA,OAAO;AAAA,QACL,UAAU,KAAA,CAAM,eAAA;AAAA,QAChB,UAAU,KAAA,CAAM;AAAA,OAClB;AAAA,IACF;AAAA,EACF,CAAA,CAAA,MAAQ;AAAA,EAAC;AAET,EAAA,OAAO,MAAA;AACT,CAAA;AClDA,eAAsB,gBAAgB,MAAA,EAAiB;AACrD,EAAA,YAAA,CAAa,cAAA,CAAe,aAAA,EAAe,OAAM,WAAA,KAAe;AAC9D,IAAA,MAAM,OAAA,GAAU,QAAQ,UAAA,EAAW;AACnC,IAAA,IAAI,OAAA,EAAS;AAEX,MAAA,MAAM,MAAA,GAAS,QAAQ,SAAA,EAAU;AACjC,MAAA,MAAM,cAAA,GAAiB,sBAAA,CAAuB,WAAA,EAAa,MAAM,CAAA;AACjE,MAAA,IAAI,CAAC,cAAA,EAAgB;AAErB,MAAA,MAAM,QAAQ,MAAA,CAAO;AAAA,QACnB,SAAA,EAAW,WAAA;AAAA,QACX,MAAA,EAAQ;AAAA,UACN,OAAO,WAAA,CAAY,KAAA;AAAA,UACnB,QAAQ,WAAA,CAAY,MAAA;AAAA,UACpB,QAAQ,IAAA,CAAK,SAAA,CAAU,WAAA,CAAY,MAAA,IAAU,EAAE,CAAA;AAAA,UAC/C,YAAY,WAAA,CAAY,SAAA;AAAA,UACxB,aAAa,WAAA,CAAY,UAAA;AAAA,UACzB,cAAc,WAAA,CAAY,YAAA;AAAA,UAC1B,WAAW,WAAA,CAAY,QAAA,GAAW,KAAK,SAAA,CAAU,WAAA,CAAY,QAAQ,CAAA,GAAI,IAAA;AAAA,UACzE,eAAe,WAAA,CAAY,WAAA;AAAA,UAC3B,QAAQ,WAAA,CAAY,KAAA;AAAA,UACpB,UAAA,EAAA,iBAAY,IAAI,IAAA,EAAK,EAAE,WAAA;AAAY;AACrC,OACD,CAAA;AAAA,IACH;AAAA,EACF,CAAC,CAAA;AACH;AAEA,eAAsB,WAAA,GAAc;AAClC,EAAA,IAAI,OAAA,CAAQ,GAAA,CAAI,qBAAqB,CAAA,EAAG;AACtC,IAAA,MAAM,IAAI,MAAM,mEAAmE,CAAA;AAAA,EACrF;AAEA,EAAA,MAAM,WAAA,GAAc,OAAO,UAAA,EAAW;AACtC,EAAA,OAAA,CAAQ,GAAA,CAAI,qBAAqB,CAAA,GAAI,WAAA;AACvC","file":"index.js","sourcesContent":["export const GLOBAL_RUN_ID_ENV_KEY = '_MASTRA_GLOBAL_RUN_ID_';\n","import type { Agent } from '@mastra/core/agent';\nimport { evaluate as coreEvaluate } from '@mastra/core/eval';\nimport type { Metric } from '@mastra/core/eval';\n\nimport { GLOBAL_RUN_ID_ENV_KEY } from './constants';\n\nexport async function evaluate<T extends Agent>(agent: T, input: Parameters<T['generate']>[0], metric: Metric) {\n const testInfo = await getCurrentTestInfo();\n let globalRunId = process.env[GLOBAL_RUN_ID_ENV_KEY];\n const runId = crypto.randomUUID();\n const agentOutput = await agent.generate(input, {\n runId,\n });\n\n if (!globalRunId) {\n globalRunId = process.env[GLOBAL_RUN_ID_ENV_KEY] = crypto.randomUUID();\n console.warn('Global run id not set, you should run \"globalSetup\" from \"@mastra/evals\" before evaluating.');\n }\n\n const metricResult = await coreEvaluate({\n agentName: agent.name,\n input,\n metric,\n output: agentOutput.text,\n globalRunId,\n runId,\n testInfo,\n instructions: agent.instructions,\n });\n\n return metricResult;\n}\n\nexport const getCurrentTestInfo = async () => {\n // Jest\n // @ts-ignore\n if (typeof expect !== 'undefined' && expect.getState) {\n // @ts-ignore\n const state = expect.getState();\n return {\n testName: state.currentTestName,\n testPath: state.testPath,\n };\n }\n\n try {\n const vitest = await import('vitest');\n if (typeof vitest !== 'undefined' && vitest.expect?.getState) {\n const state = vitest.expect.getState();\n return {\n testName: state.currentTestName,\n testPath: state.testPath,\n };\n }\n } catch {}\n\n return undefined;\n};\n","import type { Mastra } from '@mastra/core';\nimport { AvailableHooks, registerHook } from '@mastra/core/hooks';\nimport { TABLE_EVALS } from '@mastra/core/storage';\nimport { checkEvalStorageFields } from '@mastra/core/utils';\n\nimport { GLOBAL_RUN_ID_ENV_KEY } from './constants';\n\nexport async function attachListeners(mastra?: Mastra) {\n registerHook(AvailableHooks.ON_EVALUATION, async traceObject => {\n const storage = mastra?.getStorage();\n if (storage) {\n // Check for required fields\n const logger = mastra?.getLogger();\n const areFieldsValid = checkEvalStorageFields(traceObject, logger);\n if (!areFieldsValid) return;\n\n await storage.insert({\n tableName: TABLE_EVALS,\n record: {\n input: traceObject.input,\n output: traceObject.output,\n result: JSON.stringify(traceObject.result || {}),\n agent_name: traceObject.agentName,\n metric_name: traceObject.metricName,\n instructions: traceObject.instructions,\n test_info: traceObject.testInfo ? JSON.stringify(traceObject.testInfo) : null,\n global_run_id: traceObject.globalRunId,\n run_id: traceObject.runId,\n created_at: new Date().toISOString(),\n },\n });\n }\n });\n}\n\nexport async function globalSetup() {\n if (process.env[GLOBAL_RUN_ID_ENV_KEY]) {\n throw new Error('Global run id already set, you should only run \"GlobalSetup\" once');\n }\n\n const globalRunId = crypto.randomUUID();\n process.env[GLOBAL_RUN_ID_ENV_KEY] = globalRunId;\n}\n"]}