@mastra/evals 0.14.4 → 1.0.0-beta.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +34 -25
- package/README.md +19 -159
- package/dist/{chunk-KHEXN75Q.js → chunk-CCLM7KPF.js} +45 -21
- package/dist/chunk-CCLM7KPF.js.map +1 -0
- package/dist/{chunk-QKR2PMLZ.cjs → chunk-TPQLLHZW.cjs} +46 -21
- package/dist/chunk-TPQLLHZW.cjs.map +1 -0
- package/dist/scorers/code/completeness/index.d.ts +1 -1
- package/dist/scorers/code/completeness/index.d.ts.map +1 -1
- package/dist/scorers/code/content-similarity/index.d.ts +1 -1
- package/dist/scorers/code/content-similarity/index.d.ts.map +1 -1
- package/dist/scorers/code/keyword-coverage/index.d.ts +1 -1
- package/dist/scorers/code/keyword-coverage/index.d.ts.map +1 -1
- package/dist/scorers/code/textual-difference/index.d.ts +1 -1
- package/dist/scorers/code/textual-difference/index.d.ts.map +1 -1
- package/dist/scorers/code/tone/index.d.ts +1 -1
- package/dist/scorers/code/tone/index.d.ts.map +1 -1
- package/dist/scorers/code/tool-call-accuracy/index.d.ts +1 -1
- package/dist/scorers/code/tool-call-accuracy/index.d.ts.map +1 -1
- package/dist/scorers/llm/answer-relevancy/index.d.ts +1 -1
- package/dist/scorers/llm/answer-relevancy/index.d.ts.map +1 -1
- package/dist/scorers/llm/answer-similarity/index.d.ts +2 -2
- package/dist/scorers/llm/answer-similarity/index.d.ts.map +1 -1
- package/dist/scorers/llm/bias/index.d.ts +2 -2
- package/dist/scorers/llm/bias/index.d.ts.map +1 -1
- package/dist/scorers/llm/context-precision/index.d.ts +3 -3
- package/dist/scorers/llm/context-precision/index.d.ts.map +1 -1
- package/dist/scorers/llm/context-relevance/index.d.ts +3 -3
- package/dist/scorers/llm/context-relevance/index.d.ts.map +1 -1
- package/dist/scorers/llm/faithfulness/index.d.ts +2 -2
- package/dist/scorers/llm/faithfulness/index.d.ts.map +1 -1
- package/dist/scorers/llm/hallucination/index.d.ts +2 -2
- package/dist/scorers/llm/hallucination/index.d.ts.map +1 -1
- package/dist/scorers/llm/noise-sensitivity/index.d.ts +1 -1
- package/dist/scorers/llm/noise-sensitivity/index.d.ts.map +1 -1
- package/dist/scorers/llm/prompt-alignment/index.d.ts +2 -2
- package/dist/scorers/llm/prompt-alignment/index.d.ts.map +1 -1
- package/dist/scorers/llm/tool-call-accuracy/index.d.ts +2 -2
- package/dist/scorers/llm/tool-call-accuracy/index.d.ts.map +1 -1
- package/dist/scorers/llm/toxicity/index.d.ts +2 -2
- package/dist/scorers/llm/toxicity/index.d.ts.map +1 -1
- package/dist/scorers/{llm → prebuilt}/index.cjs +479 -62
- package/dist/scorers/prebuilt/index.cjs.map +1 -0
- package/dist/scorers/prebuilt/index.d.ts +3 -0
- package/dist/scorers/prebuilt/index.d.ts.map +1 -0
- package/dist/scorers/{llm → prebuilt}/index.js +419 -15
- package/dist/scorers/prebuilt/index.js.map +1 -0
- package/dist/scorers/utils.cjs +21 -17
- package/dist/scorers/utils.d.ts +21 -11
- package/dist/scorers/utils.d.ts.map +1 -1
- package/dist/scorers/utils.js +1 -1
- package/package.json +15 -59
- package/dist/attachListeners.d.ts +0 -4
- package/dist/attachListeners.d.ts.map +0 -1
- package/dist/chunk-44PMY5ES.js +0 -78
- package/dist/chunk-44PMY5ES.js.map +0 -1
- package/dist/chunk-7QAUEU4L.cjs +0 -10
- package/dist/chunk-7QAUEU4L.cjs.map +0 -1
- package/dist/chunk-EMMSS5I5.cjs +0 -37
- package/dist/chunk-EMMSS5I5.cjs.map +0 -1
- package/dist/chunk-G3PMV62Z.js +0 -33
- package/dist/chunk-G3PMV62Z.js.map +0 -1
- package/dist/chunk-IUSAD2BW.cjs +0 -19
- package/dist/chunk-IUSAD2BW.cjs.map +0 -1
- package/dist/chunk-KHEXN75Q.js.map +0 -1
- package/dist/chunk-PWGOG6ML.cjs +0 -81
- package/dist/chunk-PWGOG6ML.cjs.map +0 -1
- package/dist/chunk-QKR2PMLZ.cjs.map +0 -1
- package/dist/chunk-QTWX6TKR.js +0 -8
- package/dist/chunk-QTWX6TKR.js.map +0 -1
- package/dist/chunk-YGTIO3J5.js +0 -17
- package/dist/chunk-YGTIO3J5.js.map +0 -1
- package/dist/dist-LDTK3TIP.cjs +0 -16759
- package/dist/dist-LDTK3TIP.cjs.map +0 -1
- package/dist/dist-OWYZEOJK.js +0 -16737
- package/dist/dist-OWYZEOJK.js.map +0 -1
- package/dist/evaluation.d.ts +0 -8
- package/dist/evaluation.d.ts.map +0 -1
- package/dist/index.cjs +0 -93
- package/dist/index.cjs.map +0 -1
- package/dist/index.d.ts +0 -3
- package/dist/index.d.ts.map +0 -1
- package/dist/index.js +0 -89
- package/dist/index.js.map +0 -1
- package/dist/magic-string.es-7ORA5OGR.js +0 -1305
- package/dist/magic-string.es-7ORA5OGR.js.map +0 -1
- package/dist/magic-string.es-NZ2XWFKN.cjs +0 -1311
- package/dist/magic-string.es-NZ2XWFKN.cjs.map +0 -1
- package/dist/metrics/index.d.ts +0 -4
- package/dist/metrics/index.d.ts.map +0 -1
- package/dist/metrics/judge/index.cjs +0 -12
- package/dist/metrics/judge/index.cjs.map +0 -1
- package/dist/metrics/judge/index.d.ts +0 -7
- package/dist/metrics/judge/index.d.ts.map +0 -1
- package/dist/metrics/judge/index.js +0 -3
- package/dist/metrics/judge/index.js.map +0 -1
- package/dist/metrics/llm/answer-relevancy/index.d.ts +0 -16
- package/dist/metrics/llm/answer-relevancy/index.d.ts.map +0 -1
- package/dist/metrics/llm/answer-relevancy/metricJudge.d.ts +0 -20
- package/dist/metrics/llm/answer-relevancy/metricJudge.d.ts.map +0 -1
- package/dist/metrics/llm/answer-relevancy/prompts.d.ts +0 -19
- package/dist/metrics/llm/answer-relevancy/prompts.d.ts.map +0 -1
- package/dist/metrics/llm/bias/index.d.ts +0 -14
- package/dist/metrics/llm/bias/index.d.ts.map +0 -1
- package/dist/metrics/llm/bias/metricJudge.d.ts +0 -14
- package/dist/metrics/llm/bias/metricJudge.d.ts.map +0 -1
- package/dist/metrics/llm/bias/prompts.d.ts +0 -14
- package/dist/metrics/llm/bias/prompts.d.ts.map +0 -1
- package/dist/metrics/llm/context-position/index.d.ts +0 -16
- package/dist/metrics/llm/context-position/index.d.ts.map +0 -1
- package/dist/metrics/llm/context-position/metricJudge.d.ts +0 -20
- package/dist/metrics/llm/context-position/metricJudge.d.ts.map +0 -1
- package/dist/metrics/llm/context-position/prompts.d.ts +0 -17
- package/dist/metrics/llm/context-position/prompts.d.ts.map +0 -1
- package/dist/metrics/llm/context-precision/index.d.ts +0 -16
- package/dist/metrics/llm/context-precision/index.d.ts.map +0 -1
- package/dist/metrics/llm/context-precision/metricJudge.d.ts +0 -20
- package/dist/metrics/llm/context-precision/metricJudge.d.ts.map +0 -1
- package/dist/metrics/llm/context-precision/prompts.d.ts +0 -17
- package/dist/metrics/llm/context-precision/prompts.d.ts.map +0 -1
- package/dist/metrics/llm/context-relevancy/index.d.ts +0 -16
- package/dist/metrics/llm/context-relevancy/index.d.ts.map +0 -1
- package/dist/metrics/llm/context-relevancy/metricJudge.d.ts +0 -16
- package/dist/metrics/llm/context-relevancy/metricJudge.d.ts.map +0 -1
- package/dist/metrics/llm/context-relevancy/prompts.d.ts +0 -13
- package/dist/metrics/llm/context-relevancy/prompts.d.ts.map +0 -1
- package/dist/metrics/llm/contextual-recall/index.d.ts +0 -16
- package/dist/metrics/llm/contextual-recall/index.d.ts.map +0 -1
- package/dist/metrics/llm/contextual-recall/metricJudge.d.ts +0 -16
- package/dist/metrics/llm/contextual-recall/metricJudge.d.ts.map +0 -1
- package/dist/metrics/llm/contextual-recall/prompts.d.ts +0 -13
- package/dist/metrics/llm/contextual-recall/prompts.d.ts.map +0 -1
- package/dist/metrics/llm/faithfulness/index.d.ts +0 -16
- package/dist/metrics/llm/faithfulness/index.d.ts.map +0 -1
- package/dist/metrics/llm/faithfulness/metricJudge.d.ts +0 -22
- package/dist/metrics/llm/faithfulness/metricJudge.d.ts.map +0 -1
- package/dist/metrics/llm/faithfulness/prompts.d.ts +0 -20
- package/dist/metrics/llm/faithfulness/prompts.d.ts.map +0 -1
- package/dist/metrics/llm/hallucination/index.d.ts +0 -16
- package/dist/metrics/llm/hallucination/index.d.ts.map +0 -1
- package/dist/metrics/llm/hallucination/metricJudge.d.ts +0 -22
- package/dist/metrics/llm/hallucination/metricJudge.d.ts.map +0 -1
- package/dist/metrics/llm/hallucination/prompts.d.ts +0 -17
- package/dist/metrics/llm/hallucination/prompts.d.ts.map +0 -1
- package/dist/metrics/llm/index.cjs +0 -2481
- package/dist/metrics/llm/index.cjs.map +0 -1
- package/dist/metrics/llm/index.d.ts +0 -12
- package/dist/metrics/llm/index.d.ts.map +0 -1
- package/dist/metrics/llm/index.js +0 -2469
- package/dist/metrics/llm/index.js.map +0 -1
- package/dist/metrics/llm/prompt-alignment/index.d.ts +0 -33
- package/dist/metrics/llm/prompt-alignment/index.d.ts.map +0 -1
- package/dist/metrics/llm/prompt-alignment/metricJudge.d.ts +0 -20
- package/dist/metrics/llm/prompt-alignment/metricJudge.d.ts.map +0 -1
- package/dist/metrics/llm/prompt-alignment/prompts.d.ts +0 -17
- package/dist/metrics/llm/prompt-alignment/prompts.d.ts.map +0 -1
- package/dist/metrics/llm/summarization/index.d.ts +0 -19
- package/dist/metrics/llm/summarization/index.d.ts.map +0 -1
- package/dist/metrics/llm/summarization/metricJudge.d.ts +0 -34
- package/dist/metrics/llm/summarization/metricJudge.d.ts.map +0 -1
- package/dist/metrics/llm/summarization/prompts.d.ts +0 -30
- package/dist/metrics/llm/summarization/prompts.d.ts.map +0 -1
- package/dist/metrics/llm/toxicity/index.d.ts +0 -14
- package/dist/metrics/llm/toxicity/index.d.ts.map +0 -1
- package/dist/metrics/llm/toxicity/metricJudge.d.ts +0 -14
- package/dist/metrics/llm/toxicity/metricJudge.d.ts.map +0 -1
- package/dist/metrics/llm/toxicity/prompts.d.ts +0 -10
- package/dist/metrics/llm/toxicity/prompts.d.ts.map +0 -1
- package/dist/metrics/llm/types.d.ts +0 -7
- package/dist/metrics/llm/types.d.ts.map +0 -1
- package/dist/metrics/llm/utils.d.ts +0 -14
- package/dist/metrics/llm/utils.d.ts.map +0 -1
- package/dist/metrics/nlp/completeness/index.d.ts +0 -21
- package/dist/metrics/nlp/completeness/index.d.ts.map +0 -1
- package/dist/metrics/nlp/content-similarity/index.d.ts +0 -18
- package/dist/metrics/nlp/content-similarity/index.d.ts.map +0 -1
- package/dist/metrics/nlp/index.cjs +0 -201
- package/dist/metrics/nlp/index.cjs.map +0 -1
- package/dist/metrics/nlp/index.d.ts +0 -6
- package/dist/metrics/nlp/index.d.ts.map +0 -1
- package/dist/metrics/nlp/index.js +0 -188
- package/dist/metrics/nlp/index.js.map +0 -1
- package/dist/metrics/nlp/keyword-coverage/index.d.ts +0 -13
- package/dist/metrics/nlp/keyword-coverage/index.d.ts.map +0 -1
- package/dist/metrics/nlp/textual-difference/index.d.ts +0 -15
- package/dist/metrics/nlp/textual-difference/index.d.ts.map +0 -1
- package/dist/metrics/nlp/tone/index.d.ts +0 -18
- package/dist/metrics/nlp/tone/index.d.ts.map +0 -1
- package/dist/ratio.d.ts +0 -13
- package/dist/ratio.d.ts.map +0 -1
- package/dist/scorers/code/index.cjs +0 -327
- package/dist/scorers/code/index.cjs.map +0 -1
- package/dist/scorers/code/index.js +0 -313
- package/dist/scorers/code/index.js.map +0 -1
- package/dist/scorers/llm/index.cjs.map +0 -1
- package/dist/scorers/llm/index.js.map +0 -1
package/dist/evaluation.d.ts
DELETED
|
@@ -1,8 +0,0 @@
|
|
|
1
|
-
import type { Agent } from '@mastra/core/agent';
|
|
2
|
-
import type { Metric } from '@mastra/core/eval';
|
|
3
|
-
export declare function evaluate<T extends Agent>(agent: T, input: Parameters<T['generate']>[0], metric: Metric): Promise<import("@mastra/core").EvaluationResult>;
|
|
4
|
-
export declare const getCurrentTestInfo: () => Promise<{
|
|
5
|
-
testName: any;
|
|
6
|
-
testPath: any;
|
|
7
|
-
} | undefined>;
|
|
8
|
-
//# sourceMappingURL=evaluation.d.ts.map
|
package/dist/evaluation.d.ts.map
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"evaluation.d.ts","sourceRoot":"","sources":["../src/evaluation.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,KAAK,EAAE,MAAM,oBAAoB,CAAC;AAEhD,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,mBAAmB,CAAC;AAIhD,wBAAsB,QAAQ,CAAC,CAAC,SAAS,KAAK,EAAE,KAAK,EAAE,CAAC,EAAE,KAAK,EAAE,UAAU,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,MAAM,oDAyB5G;AAED,eAAO,MAAM,kBAAkB;;;cAwB9B,CAAC"}
|
package/dist/index.cjs
DELETED
|
@@ -1,93 +0,0 @@
|
|
|
1
|
-
'use strict';
|
|
2
|
-
|
|
3
|
-
var _eval = require('@mastra/core/eval');
|
|
4
|
-
var hooks = require('@mastra/core/hooks');
|
|
5
|
-
var storage = require('@mastra/core/storage');
|
|
6
|
-
var utils = require('@mastra/core/utils');
|
|
7
|
-
|
|
8
|
-
// src/constants.ts
|
|
9
|
-
var GLOBAL_RUN_ID_ENV_KEY = "_MASTRA_GLOBAL_RUN_ID_";
|
|
10
|
-
|
|
11
|
-
// src/evaluation.ts
|
|
12
|
-
async function evaluate(agent, input, metric) {
|
|
13
|
-
const testInfo = await getCurrentTestInfo();
|
|
14
|
-
let globalRunId = process.env[GLOBAL_RUN_ID_ENV_KEY];
|
|
15
|
-
const runId = crypto.randomUUID();
|
|
16
|
-
const agentOutput = await agent.generate(input, {
|
|
17
|
-
runId
|
|
18
|
-
});
|
|
19
|
-
if (!globalRunId) {
|
|
20
|
-
globalRunId = process.env[GLOBAL_RUN_ID_ENV_KEY] = crypto.randomUUID();
|
|
21
|
-
console.warn('Global run id not set, you should run "globalSetup" from "@mastra/evals" before evaluating.');
|
|
22
|
-
}
|
|
23
|
-
const metricResult = await _eval.evaluate({
|
|
24
|
-
agentName: agent.name,
|
|
25
|
-
input,
|
|
26
|
-
metric,
|
|
27
|
-
output: agentOutput.text,
|
|
28
|
-
globalRunId,
|
|
29
|
-
runId,
|
|
30
|
-
testInfo,
|
|
31
|
-
instructions: agent.instructions
|
|
32
|
-
});
|
|
33
|
-
return metricResult;
|
|
34
|
-
}
|
|
35
|
-
var getCurrentTestInfo = async () => {
|
|
36
|
-
if (typeof expect !== "undefined" && expect.getState) {
|
|
37
|
-
const state = expect.getState();
|
|
38
|
-
return {
|
|
39
|
-
testName: state.currentTestName,
|
|
40
|
-
testPath: state.testPath
|
|
41
|
-
};
|
|
42
|
-
}
|
|
43
|
-
try {
|
|
44
|
-
const vitest = await import('./dist-LDTK3TIP.cjs');
|
|
45
|
-
if (typeof vitest !== "undefined" && vitest.expect?.getState) {
|
|
46
|
-
const state = vitest.expect.getState();
|
|
47
|
-
return {
|
|
48
|
-
testName: state.currentTestName,
|
|
49
|
-
testPath: state.testPath
|
|
50
|
-
};
|
|
51
|
-
}
|
|
52
|
-
} catch {
|
|
53
|
-
}
|
|
54
|
-
return void 0;
|
|
55
|
-
};
|
|
56
|
-
async function attachListeners(mastra) {
|
|
57
|
-
hooks.registerHook(hooks.AvailableHooks.ON_EVALUATION, async (traceObject) => {
|
|
58
|
-
const storage$1 = mastra?.getStorage();
|
|
59
|
-
if (storage$1) {
|
|
60
|
-
const logger = mastra?.getLogger();
|
|
61
|
-
const areFieldsValid = utils.checkEvalStorageFields(traceObject, logger);
|
|
62
|
-
if (!areFieldsValid) return;
|
|
63
|
-
await storage$1.insert({
|
|
64
|
-
tableName: storage.TABLE_EVALS,
|
|
65
|
-
record: {
|
|
66
|
-
input: traceObject.input,
|
|
67
|
-
output: traceObject.output,
|
|
68
|
-
result: JSON.stringify(traceObject.result || {}),
|
|
69
|
-
agent_name: traceObject.agentName,
|
|
70
|
-
metric_name: traceObject.metricName,
|
|
71
|
-
instructions: traceObject.instructions,
|
|
72
|
-
test_info: traceObject.testInfo ? JSON.stringify(traceObject.testInfo) : null,
|
|
73
|
-
global_run_id: traceObject.globalRunId,
|
|
74
|
-
run_id: traceObject.runId,
|
|
75
|
-
created_at: (/* @__PURE__ */ new Date()).toISOString()
|
|
76
|
-
}
|
|
77
|
-
});
|
|
78
|
-
}
|
|
79
|
-
});
|
|
80
|
-
}
|
|
81
|
-
async function globalSetup() {
|
|
82
|
-
if (process.env[GLOBAL_RUN_ID_ENV_KEY]) {
|
|
83
|
-
throw new Error('Global run id already set, you should only run "GlobalSetup" once');
|
|
84
|
-
}
|
|
85
|
-
const globalRunId = crypto.randomUUID();
|
|
86
|
-
process.env[GLOBAL_RUN_ID_ENV_KEY] = globalRunId;
|
|
87
|
-
}
|
|
88
|
-
|
|
89
|
-
exports.attachListeners = attachListeners;
|
|
90
|
-
exports.evaluate = evaluate;
|
|
91
|
-
exports.globalSetup = globalSetup;
|
|
92
|
-
//# sourceMappingURL=index.cjs.map
|
|
93
|
-
//# sourceMappingURL=index.cjs.map
|
package/dist/index.cjs.map
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/constants.ts","../src/evaluation.ts","../src/attachListeners.ts"],"names":["coreEvaluate","registerHook","AvailableHooks","storage","checkEvalStorageFields","TABLE_EVALS"],"mappings":";;;;;;;;AAAO,IAAM,qBAAA,GAAwB,wBAAA;;;ACMrC,eAAsB,QAAA,CAA0B,KAAA,EAAU,KAAA,EAAqC,MAAA,EAAgB;AAC7G,EAAA,MAAM,QAAA,GAAW,MAAM,kBAAA,EAAmB;AAC1C,EAAA,IAAI,WAAA,GAAc,OAAA,CAAQ,GAAA,CAAI,qBAAqB,CAAA;AACnD,EAAA,MAAM,KAAA,GAAQ,OAAO,UAAA,EAAW;AAChC,EAAA,MAAM,WAAA,GAAc,MAAM,KAAA,CAAM,QAAA,CAAS,KAAA,EAAO;AAAA,IAC9C;AAAA,GACD,CAAA;AAED,EAAA,IAAI,CAAC,WAAA,EAAa;AAChB,IAAA,WAAA,GAAc,OAAA,CAAQ,GAAA,CAAI,qBAAqB,CAAA,GAAI,OAAO,UAAA,EAAW;AACrE,IAAA,OAAA,CAAQ,KAAK,6FAA6F,CAAA;AAAA,EAC5G;AAEA,EAAA,MAAM,YAAA,GAAe,MAAMA,cAAA,CAAa;AAAA,IACtC,WAAW,KAAA,CAAM,IAAA;AAAA,IACjB,KAAA;AAAA,IACA,MAAA;AAAA,IACA,QAAQ,WAAA,CAAY,IAAA;AAAA,IACpB,WAAA;AAAA,IACA,KAAA;AAAA,IACA,QAAA;AAAA,IACA,cAAc,KAAA,CAAM;AAAA,GACrB,CAAA;AAED,EAAA,OAAO,YAAA;AACT;AAEO,IAAM,qBAAqB,YAAY;AAG5C,EAAA,IAAI,OAAO,MAAA,KAAW,WAAA,IAAe,MAAA,CAAO,QAAA,EAAU;AAEpD,IAAA,MAAM,KAAA,GAAQ,OAAO,QAAA,EAAS;AAC9B,IAAA,OAAO;AAAA,MACL,UAAU,KAAA,CAAM,eAAA;AAAA,MAChB,UAAU,KAAA,CAAM;AAAA,KAClB;AAAA,EACF;AAEA,EAAA,IAAI;AACF,IAAA,MAAM,MAAA,GAAS,MAAM,OAAO,qBAAQ,CAAA;AACpC,IAAA,IAAI,OAAO,MAAA,KAAW,WAAA,IAAe,MAAA,CAAO,QAAQ,QAAA,EAAU;AAC5D,MAAA,MAAM,KAAA,GAAQ,MAAA,CAAO,MAAA,CAAO,QAAA,EAAS;AACrC,MAAA,OAAO;AAAA,QACL,UAAU,KAAA,CAAM,eAAA;AAAA,QAChB,UAAU,KAAA,CAAM;AAAA,OAClB;AAAA,IACF;AAAA,EACF,CAAA,CAAA,MAAQ;AAAA,EAAC;AAET,EAAA,OAAO,MAAA;AACT,CAAA;AClDA,eAAsB,gBAAgB,MAAA,EAAiB;AACrD,EAAAC,kBAAA,CAAaC,oBAAA,CAAe,aAAA,EAAe,OAAM,WAAA,KAAe;AAC9D,IAAA,MAAMC,SAAA,GAAU,QAAQ,UAAA,EAAW;AACnC,IAAA,IAAIA,SAAA,EAAS;AAEX,MAAA,MAAM,MAAA,GAAS,QAAQ,SAAA,EAAU;AACjC,MAAA,MAAM,cAAA,GAAiBC,4BAAA,CAAuB,WAAA,EAAa,MAAM,CAAA;AACjE,MAAA,IAAI,CAAC,cAAA,EAAgB;AAErB,MAAA,MAAMD,UAAQ,MAAA,CAAO;AAAA,QACnB,SAAA,EAAWE,mBAAA;AAAA,QACX,MAAA,EAAQ;AAAA,UACN,OAAO,WAAA,CAAY,KAAA;AAAA,UACnB,QAAQ,WAAA,CAAY,MAAA;AAAA,UACpB,QAAQ,IAAA,CAAK,SAAA,CAAU,WAAA,CAAY,MAAA,IAAU,EAAE,CAAA;AAAA,UAC/C,YAAY,WAAA,CAAY,SAAA;AAAA,UACxB,aAAa,WAAA,CAAY,UAAA;AAAA,UACzB,cAAc,WAAA,CAAY,YAAA;AAAA,UAC1B,WAAW,WAAA,CAAY,QAAA,GAAW,KAAK,SAAA,CAAU,WAAA,CAAY,QAAQ,CAAA,GAAI,IAAA;AAAA,UACzE,eAAe,WAAA,CAAY,WAAA;AAAA,UAC3B,QAAQ,WAAA,CAAY,KAAA;AAAA,UACpB,UAAA,EAAA,iBAAY,IAAI,IAAA,EAAK,EAAE,WAAA;AAAY;AACrC,OACD,CAAA;AAAA,IACH;AAAA,EACF,CAAC,CAAA;AACH;AAEA,eAAsB,WAAA,GAAc;AAClC,EAAA,IAAI,OAAA,CAAQ,GAAA,CAAI,qBAAqB,CAAA,EAAG;AACtC,IAAA,MAAM,IAAI,MAAM,mEAAmE,CAAA;AAAA,EACrF;AAEA,EAAA,MAAM,WAAA,GAAc,OAAO,UAAA,EAAW;AACtC,EAAA,OAAA,CAAQ,GAAA,CAAI,qBAAqB,CAAA,GAAI,WAAA;AACvC","file":"index.cjs","sourcesContent":["export const GLOBAL_RUN_ID_ENV_KEY = '_MASTRA_GLOBAL_RUN_ID_';\n","import type { Agent } from '@mastra/core/agent';\nimport { evaluate as coreEvaluate } from '@mastra/core/eval';\nimport type { Metric } from '@mastra/core/eval';\n\nimport { GLOBAL_RUN_ID_ENV_KEY } from './constants';\n\nexport async function evaluate<T extends Agent>(agent: T, input: Parameters<T['generate']>[0], metric: Metric) {\n const testInfo = await getCurrentTestInfo();\n let globalRunId = process.env[GLOBAL_RUN_ID_ENV_KEY];\n const runId = crypto.randomUUID();\n const agentOutput = await agent.generate(input, {\n runId,\n });\n\n if (!globalRunId) {\n globalRunId = process.env[GLOBAL_RUN_ID_ENV_KEY] = crypto.randomUUID();\n console.warn('Global run id not set, you should run \"globalSetup\" from \"@mastra/evals\" before evaluating.');\n }\n\n const metricResult = await coreEvaluate({\n agentName: agent.name,\n input,\n metric,\n output: agentOutput.text,\n globalRunId,\n runId,\n testInfo,\n instructions: agent.instructions,\n });\n\n return metricResult;\n}\n\nexport const getCurrentTestInfo = async () => {\n // Jest\n // @ts-ignore\n if (typeof expect !== 'undefined' && expect.getState) {\n // @ts-ignore\n const state = expect.getState();\n return {\n testName: state.currentTestName,\n testPath: state.testPath,\n };\n }\n\n try {\n const vitest = await import('vitest');\n if (typeof vitest !== 'undefined' && vitest.expect?.getState) {\n const state = vitest.expect.getState();\n return {\n testName: state.currentTestName,\n testPath: state.testPath,\n };\n }\n } catch {}\n\n return undefined;\n};\n","import type { Mastra } from '@mastra/core';\nimport { AvailableHooks, registerHook } from '@mastra/core/hooks';\nimport { TABLE_EVALS } from '@mastra/core/storage';\nimport { checkEvalStorageFields } from '@mastra/core/utils';\n\nimport { GLOBAL_RUN_ID_ENV_KEY } from './constants';\n\nexport async function attachListeners(mastra?: Mastra) {\n registerHook(AvailableHooks.ON_EVALUATION, async traceObject => {\n const storage = mastra?.getStorage();\n if (storage) {\n // Check for required fields\n const logger = mastra?.getLogger();\n const areFieldsValid = checkEvalStorageFields(traceObject, logger);\n if (!areFieldsValid) return;\n\n await storage.insert({\n tableName: TABLE_EVALS,\n record: {\n input: traceObject.input,\n output: traceObject.output,\n result: JSON.stringify(traceObject.result || {}),\n agent_name: traceObject.agentName,\n metric_name: traceObject.metricName,\n instructions: traceObject.instructions,\n test_info: traceObject.testInfo ? JSON.stringify(traceObject.testInfo) : null,\n global_run_id: traceObject.globalRunId,\n run_id: traceObject.runId,\n created_at: new Date().toISOString(),\n },\n });\n }\n });\n}\n\nexport async function globalSetup() {\n if (process.env[GLOBAL_RUN_ID_ENV_KEY]) {\n throw new Error('Global run id already set, you should only run \"GlobalSetup\" once');\n }\n\n const globalRunId = crypto.randomUUID();\n process.env[GLOBAL_RUN_ID_ENV_KEY] = globalRunId;\n}\n"]}
|
package/dist/index.d.ts
DELETED
package/dist/index.d.ts.map
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,cAAc,CAAC;AACxC,OAAO,EAAE,eAAe,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC"}
|
package/dist/index.js
DELETED
|
@@ -1,89 +0,0 @@
|
|
|
1
|
-
import { evaluate as evaluate$1 } from '@mastra/core/eval';
|
|
2
|
-
import { registerHook, AvailableHooks } from '@mastra/core/hooks';
|
|
3
|
-
import { TABLE_EVALS } from '@mastra/core/storage';
|
|
4
|
-
import { checkEvalStorageFields } from '@mastra/core/utils';
|
|
5
|
-
|
|
6
|
-
// src/constants.ts
|
|
7
|
-
var GLOBAL_RUN_ID_ENV_KEY = "_MASTRA_GLOBAL_RUN_ID_";
|
|
8
|
-
|
|
9
|
-
// src/evaluation.ts
|
|
10
|
-
async function evaluate(agent, input, metric) {
|
|
11
|
-
const testInfo = await getCurrentTestInfo();
|
|
12
|
-
let globalRunId = process.env[GLOBAL_RUN_ID_ENV_KEY];
|
|
13
|
-
const runId = crypto.randomUUID();
|
|
14
|
-
const agentOutput = await agent.generate(input, {
|
|
15
|
-
runId
|
|
16
|
-
});
|
|
17
|
-
if (!globalRunId) {
|
|
18
|
-
globalRunId = process.env[GLOBAL_RUN_ID_ENV_KEY] = crypto.randomUUID();
|
|
19
|
-
console.warn('Global run id not set, you should run "globalSetup" from "@mastra/evals" before evaluating.');
|
|
20
|
-
}
|
|
21
|
-
const metricResult = await evaluate$1({
|
|
22
|
-
agentName: agent.name,
|
|
23
|
-
input,
|
|
24
|
-
metric,
|
|
25
|
-
output: agentOutput.text,
|
|
26
|
-
globalRunId,
|
|
27
|
-
runId,
|
|
28
|
-
testInfo,
|
|
29
|
-
instructions: agent.instructions
|
|
30
|
-
});
|
|
31
|
-
return metricResult;
|
|
32
|
-
}
|
|
33
|
-
var getCurrentTestInfo = async () => {
|
|
34
|
-
if (typeof expect !== "undefined" && expect.getState) {
|
|
35
|
-
const state = expect.getState();
|
|
36
|
-
return {
|
|
37
|
-
testName: state.currentTestName,
|
|
38
|
-
testPath: state.testPath
|
|
39
|
-
};
|
|
40
|
-
}
|
|
41
|
-
try {
|
|
42
|
-
const vitest = await import('./dist-OWYZEOJK.js');
|
|
43
|
-
if (typeof vitest !== "undefined" && vitest.expect?.getState) {
|
|
44
|
-
const state = vitest.expect.getState();
|
|
45
|
-
return {
|
|
46
|
-
testName: state.currentTestName,
|
|
47
|
-
testPath: state.testPath
|
|
48
|
-
};
|
|
49
|
-
}
|
|
50
|
-
} catch {
|
|
51
|
-
}
|
|
52
|
-
return void 0;
|
|
53
|
-
};
|
|
54
|
-
async function attachListeners(mastra) {
|
|
55
|
-
registerHook(AvailableHooks.ON_EVALUATION, async (traceObject) => {
|
|
56
|
-
const storage = mastra?.getStorage();
|
|
57
|
-
if (storage) {
|
|
58
|
-
const logger = mastra?.getLogger();
|
|
59
|
-
const areFieldsValid = checkEvalStorageFields(traceObject, logger);
|
|
60
|
-
if (!areFieldsValid) return;
|
|
61
|
-
await storage.insert({
|
|
62
|
-
tableName: TABLE_EVALS,
|
|
63
|
-
record: {
|
|
64
|
-
input: traceObject.input,
|
|
65
|
-
output: traceObject.output,
|
|
66
|
-
result: JSON.stringify(traceObject.result || {}),
|
|
67
|
-
agent_name: traceObject.agentName,
|
|
68
|
-
metric_name: traceObject.metricName,
|
|
69
|
-
instructions: traceObject.instructions,
|
|
70
|
-
test_info: traceObject.testInfo ? JSON.stringify(traceObject.testInfo) : null,
|
|
71
|
-
global_run_id: traceObject.globalRunId,
|
|
72
|
-
run_id: traceObject.runId,
|
|
73
|
-
created_at: (/* @__PURE__ */ new Date()).toISOString()
|
|
74
|
-
}
|
|
75
|
-
});
|
|
76
|
-
}
|
|
77
|
-
});
|
|
78
|
-
}
|
|
79
|
-
async function globalSetup() {
|
|
80
|
-
if (process.env[GLOBAL_RUN_ID_ENV_KEY]) {
|
|
81
|
-
throw new Error('Global run id already set, you should only run "GlobalSetup" once');
|
|
82
|
-
}
|
|
83
|
-
const globalRunId = crypto.randomUUID();
|
|
84
|
-
process.env[GLOBAL_RUN_ID_ENV_KEY] = globalRunId;
|
|
85
|
-
}
|
|
86
|
-
|
|
87
|
-
export { attachListeners, evaluate, globalSetup };
|
|
88
|
-
//# sourceMappingURL=index.js.map
|
|
89
|
-
//# sourceMappingURL=index.js.map
|
package/dist/index.js.map
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/constants.ts","../src/evaluation.ts","../src/attachListeners.ts"],"names":["coreEvaluate"],"mappings":";;;;;;AAAO,IAAM,qBAAA,GAAwB,wBAAA;;;ACMrC,eAAsB,QAAA,CAA0B,KAAA,EAAU,KAAA,EAAqC,MAAA,EAAgB;AAC7G,EAAA,MAAM,QAAA,GAAW,MAAM,kBAAA,EAAmB;AAC1C,EAAA,IAAI,WAAA,GAAc,OAAA,CAAQ,GAAA,CAAI,qBAAqB,CAAA;AACnD,EAAA,MAAM,KAAA,GAAQ,OAAO,UAAA,EAAW;AAChC,EAAA,MAAM,WAAA,GAAc,MAAM,KAAA,CAAM,QAAA,CAAS,KAAA,EAAO;AAAA,IAC9C;AAAA,GACD,CAAA;AAED,EAAA,IAAI,CAAC,WAAA,EAAa;AAChB,IAAA,WAAA,GAAc,OAAA,CAAQ,GAAA,CAAI,qBAAqB,CAAA,GAAI,OAAO,UAAA,EAAW;AACrE,IAAA,OAAA,CAAQ,KAAK,6FAA6F,CAAA;AAAA,EAC5G;AAEA,EAAA,MAAM,YAAA,GAAe,MAAMA,UAAA,CAAa;AAAA,IACtC,WAAW,KAAA,CAAM,IAAA;AAAA,IACjB,KAAA;AAAA,IACA,MAAA;AAAA,IACA,QAAQ,WAAA,CAAY,IAAA;AAAA,IACpB,WAAA;AAAA,IACA,KAAA;AAAA,IACA,QAAA;AAAA,IACA,cAAc,KAAA,CAAM;AAAA,GACrB,CAAA;AAED,EAAA,OAAO,YAAA;AACT;AAEO,IAAM,qBAAqB,YAAY;AAG5C,EAAA,IAAI,OAAO,MAAA,KAAW,WAAA,IAAe,MAAA,CAAO,QAAA,EAAU;AAEpD,IAAA,MAAM,KAAA,GAAQ,OAAO,QAAA,EAAS;AAC9B,IAAA,OAAO;AAAA,MACL,UAAU,KAAA,CAAM,eAAA;AAAA,MAChB,UAAU,KAAA,CAAM;AAAA,KAClB;AAAA,EACF;AAEA,EAAA,IAAI;AACF,IAAA,MAAM,MAAA,GAAS,MAAM,OAAO,oBAAQ,CAAA;AACpC,IAAA,IAAI,OAAO,MAAA,KAAW,WAAA,IAAe,MAAA,CAAO,QAAQ,QAAA,EAAU;AAC5D,MAAA,MAAM,KAAA,GAAQ,MAAA,CAAO,MAAA,CAAO,QAAA,EAAS;AACrC,MAAA,OAAO;AAAA,QACL,UAAU,KAAA,CAAM,eAAA;AAAA,QAChB,UAAU,KAAA,CAAM;AAAA,OAClB;AAAA,IACF;AAAA,EACF,CAAA,CAAA,MAAQ;AAAA,EAAC;AAET,EAAA,OAAO,MAAA;AACT,CAAA;AClDA,eAAsB,gBAAgB,MAAA,EAAiB;AACrD,EAAA,YAAA,CAAa,cAAA,CAAe,aAAA,EAAe,OAAM,WAAA,KAAe;AAC9D,IAAA,MAAM,OAAA,GAAU,QAAQ,UAAA,EAAW;AACnC,IAAA,IAAI,OAAA,EAAS;AAEX,MAAA,MAAM,MAAA,GAAS,QAAQ,SAAA,EAAU;AACjC,MAAA,MAAM,cAAA,GAAiB,sBAAA,CAAuB,WAAA,EAAa,MAAM,CAAA;AACjE,MAAA,IAAI,CAAC,cAAA,EAAgB;AAErB,MAAA,MAAM,QAAQ,MAAA,CAAO;AAAA,QACnB,SAAA,EAAW,WAAA;AAAA,QACX,MAAA,EAAQ;AAAA,UACN,OAAO,WAAA,CAAY,KAAA;AAAA,UACnB,QAAQ,WAAA,CAAY,MAAA;AAAA,UACpB,QAAQ,IAAA,CAAK,SAAA,CAAU,WAAA,CAAY,MAAA,IAAU,EAAE,CAAA;AAAA,UAC/C,YAAY,WAAA,CAAY,SAAA;AAAA,UACxB,aAAa,WAAA,CAAY,UAAA;AAAA,UACzB,cAAc,WAAA,CAAY,YAAA;AAAA,UAC1B,WAAW,WAAA,CAAY,QAAA,GAAW,KAAK,SAAA,CAAU,WAAA,CAAY,QAAQ,CAAA,GAAI,IAAA;AAAA,UACzE,eAAe,WAAA,CAAY,WAAA;AAAA,UAC3B,QAAQ,WAAA,CAAY,KAAA;AAAA,UACpB,UAAA,EAAA,iBAAY,IAAI,IAAA,EAAK,EAAE,WAAA;AAAY;AACrC,OACD,CAAA;AAAA,IACH;AAAA,EACF,CAAC,CAAA;AACH;AAEA,eAAsB,WAAA,GAAc;AAClC,EAAA,IAAI,OAAA,CAAQ,GAAA,CAAI,qBAAqB,CAAA,EAAG;AACtC,IAAA,MAAM,IAAI,MAAM,mEAAmE,CAAA;AAAA,EACrF;AAEA,EAAA,MAAM,WAAA,GAAc,OAAO,UAAA,EAAW;AACtC,EAAA,OAAA,CAAQ,GAAA,CAAI,qBAAqB,CAAA,GAAI,WAAA;AACvC","file":"index.js","sourcesContent":["export const GLOBAL_RUN_ID_ENV_KEY = '_MASTRA_GLOBAL_RUN_ID_';\n","import type { Agent } from '@mastra/core/agent';\nimport { evaluate as coreEvaluate } from '@mastra/core/eval';\nimport type { Metric } from '@mastra/core/eval';\n\nimport { GLOBAL_RUN_ID_ENV_KEY } from './constants';\n\nexport async function evaluate<T extends Agent>(agent: T, input: Parameters<T['generate']>[0], metric: Metric) {\n const testInfo = await getCurrentTestInfo();\n let globalRunId = process.env[GLOBAL_RUN_ID_ENV_KEY];\n const runId = crypto.randomUUID();\n const agentOutput = await agent.generate(input, {\n runId,\n });\n\n if (!globalRunId) {\n globalRunId = process.env[GLOBAL_RUN_ID_ENV_KEY] = crypto.randomUUID();\n console.warn('Global run id not set, you should run \"globalSetup\" from \"@mastra/evals\" before evaluating.');\n }\n\n const metricResult = await coreEvaluate({\n agentName: agent.name,\n input,\n metric,\n output: agentOutput.text,\n globalRunId,\n runId,\n testInfo,\n instructions: agent.instructions,\n });\n\n return metricResult;\n}\n\nexport const getCurrentTestInfo = async () => {\n // Jest\n // @ts-ignore\n if (typeof expect !== 'undefined' && expect.getState) {\n // @ts-ignore\n const state = expect.getState();\n return {\n testName: state.currentTestName,\n testPath: state.testPath,\n };\n }\n\n try {\n const vitest = await import('vitest');\n if (typeof vitest !== 'undefined' && vitest.expect?.getState) {\n const state = vitest.expect.getState();\n return {\n testName: state.currentTestName,\n testPath: state.testPath,\n };\n }\n } catch {}\n\n return undefined;\n};\n","import type { Mastra } from '@mastra/core';\nimport { AvailableHooks, registerHook } from '@mastra/core/hooks';\nimport { TABLE_EVALS } from '@mastra/core/storage';\nimport { checkEvalStorageFields } from '@mastra/core/utils';\n\nimport { GLOBAL_RUN_ID_ENV_KEY } from './constants';\n\nexport async function attachListeners(mastra?: Mastra) {\n registerHook(AvailableHooks.ON_EVALUATION, async traceObject => {\n const storage = mastra?.getStorage();\n if (storage) {\n // Check for required fields\n const logger = mastra?.getLogger();\n const areFieldsValid = checkEvalStorageFields(traceObject, logger);\n if (!areFieldsValid) return;\n\n await storage.insert({\n tableName: TABLE_EVALS,\n record: {\n input: traceObject.input,\n output: traceObject.output,\n result: JSON.stringify(traceObject.result || {}),\n agent_name: traceObject.agentName,\n metric_name: traceObject.metricName,\n instructions: traceObject.instructions,\n test_info: traceObject.testInfo ? JSON.stringify(traceObject.testInfo) : null,\n global_run_id: traceObject.globalRunId,\n run_id: traceObject.runId,\n created_at: new Date().toISOString(),\n },\n });\n }\n });\n}\n\nexport async function globalSetup() {\n if (process.env[GLOBAL_RUN_ID_ENV_KEY]) {\n throw new Error('Global run id already set, you should only run \"GlobalSetup\" once');\n }\n\n const globalRunId = crypto.randomUUID();\n process.env[GLOBAL_RUN_ID_ENV_KEY] = globalRunId;\n}\n"]}
|