@mastra/evals 0.1.0-alpha.16 → 0.1.0-alpha.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,28 @@
1
1
  # @mastra/evals
2
2
 
3
+ ## 0.1.0-alpha.19
4
+
5
+ ### Patch Changes
6
+
7
+ - Updated dependencies [685108a]
8
+ - Updated dependencies [685108a]
9
+ - @mastra/core@0.1.27-alpha.78
10
+
11
+ ## 0.1.0-alpha.18
12
+
13
+ ### Patch Changes
14
+
15
+ - Updated dependencies [8105fae]
16
+ - @mastra/core@0.1.27-alpha.77
17
+
18
+ ## 0.1.0-alpha.17
19
+
20
+ ### Patch Changes
21
+
22
+ - Updated dependencies [ae7bf94]
23
+ - Updated dependencies [ae7bf94]
24
+ - @mastra/core@0.1.27-alpha.76
25
+
3
26
  ## 0.1.0-alpha.16
4
27
 
5
28
  ### Patch Changes
package/README.md ADDED
@@ -0,0 +1,186 @@
1
+ # @mastra/evals
2
+
3
+ A comprehensive evaluation framework for assessing AI model outputs across multiple dimensions.
4
+
5
+ ## Installation
6
+
7
+ ```bash
8
+ npm install @mastra/evals
9
+ ```
10
+
11
+ ## Overview
12
+
13
+ `@mastra/evals` provides a suite of evaluation metrics for assessing AI model outputs. The package includes both LLM-based and NLP-based metrics, enabling both automated and model-assisted evaluation of AI responses.
14
+
15
+ ## Features
16
+
17
+ ### LLM-Based Metrics
18
+
19
+ 1. **Answer Relevancy**
20
+
21
+ - Evaluates how well an answer addresses the input question
22
+ - Considers uncertainty weighting for more nuanced scoring
23
+ - Returns detailed reasoning for scores
24
+
25
+ 2. **Bias Detection**
26
+
27
+ - Identifies potential biases in model outputs
28
+ - Analyzes opinions and statements for bias indicators
29
+ - Provides explanations for detected biases
30
+ - Configurable scoring scale
31
+
32
+ 3. **Context Precision & Relevancy**
33
+
34
+ - Assesses how well responses use provided context
35
+ - Evaluates accuracy of context usage
36
+ - Measures relevance of context to the response
37
+ - Analyzes context positioning in responses
38
+
39
+ 4. **Faithfulness**
40
+
41
+ - Verifies that responses are faithful to provided context
42
+ - Detects hallucinations or fabricated information
43
+ - Evaluates claims against provided context
44
+ - Provides detailed analysis of faithfulness breaches
45
+
46
+ 5. **Prompt Alignment**
47
+
48
+ - Measures how well responses follow given instructions
49
+ - Evaluates adherence to multiple instruction criteria
50
+ - Provides per-instruction scoring
51
+ - Supports custom instruction sets
52
+
53
+ 6. **Toxicity**
54
+ - Detects toxic or harmful content in responses
55
+ - Provides detailed reasoning for toxicity verdicts
56
+ - Configurable scoring thresholds
57
+ - Considers both input and output context
58
+
59
+ ### NLP-Based Metrics
60
+
61
+ 1. **Completeness**
62
+
63
+ - Analyzes structural completeness of responses
64
+ - Identifies missing elements from input requirements
65
+ - Provides detailed element coverage analysis
66
+ - Tracks input-output element ratios
67
+
68
+ 2. **Content Similarity**
69
+
70
+ - Measures text similarity between inputs and outputs
71
+ - Configurable for case and whitespace sensitivity
72
+ - Returns normalized similarity scores
73
+ - Uses string comparison algorithms for accuracy
74
+
75
+ 3. **Keyword Coverage**
76
+ - Tracks presence of key terms from input in output
77
+ - Provides detailed keyword matching statistics
78
+ - Calculates coverage ratios
79
+ - Useful for ensuring comprehensive responses
80
+
81
+ ## Usage
82
+
83
+ ### Basic Example
84
+
85
+ ```typescript
86
+ import { ContentSimilarityMetric, ToxicityMetric } from '@mastra/evals';
87
+
88
+ // Initialize metrics
89
+ const similarityMetric = new ContentSimilarityMetric({
90
+ ignoreCase: true,
91
+ ignoreWhitespace: true,
92
+ });
93
+
94
+ const toxicityMetric = new ToxicityMetric({
95
+ model: {
96
+ provider: 'openai',
97
+ model: 'gpt-4',
98
+ },
99
+ scale: 1, // Optional: adjust scoring scale
100
+ });
101
+
102
+ // Evaluate outputs
103
+ const input = 'What is the capital of France?';
104
+ const output = 'Paris is the capital of France.';
105
+
106
+ const similarityResult = await similarityMetric.measure(input, output);
107
+ const toxicityResult = await toxicityMetric.measure(input, output);
108
+
109
+ console.log('Similarity Score:', similarityResult.score);
110
+ console.log('Toxicity Score:', toxicityResult.score);
111
+ ```
112
+
113
+ ### Context-Aware Evaluation
114
+
115
+ ```typescript
116
+ import { FaithfulnessMetric } from '@mastra/evals';
117
+
118
+ // Initialize with context
119
+ const faithfulnessMetric = new FaithfulnessMetric({
120
+ model: {
121
+ provider: 'openai',
122
+ model: 'gpt-4',
123
+ },
124
+ context: ['Paris is the capital of France', 'Paris has a population of 2.2 million'],
125
+ scale: 1,
126
+ });
127
+
128
+ // Evaluate response against context
129
+ const result = await faithfulnessMetric.measure(
130
+ 'Tell me about Paris',
131
+ 'Paris is the capital of France with 2.2 million residents',
132
+ );
133
+
134
+ console.log('Faithfulness Score:', result.score);
135
+ console.log('Reasoning:', result.reason);
136
+ ```
137
+
138
+ ## Metric Results
139
+
140
+ Each metric returns a standardized result object containing:
141
+
142
+ - `score`: Normalized score (typically 0-1)
143
+ - `info`: Detailed information about the evaluation
144
+ - Additional metric-specific data (e.g., matched keywords, missing elements)
145
+
146
+ Some metrics also provide:
147
+
148
+ - `reason`: Detailed explanation of the score
149
+ - `verdicts`: Individual judgments that contributed to the final score
150
+
151
+ ## Telemetry and Logging
152
+
153
+ The package includes built-in telemetry and logging capabilities:
154
+
155
+ - Automatic evaluation tracking in `.mastra/evals.json`
156
+ - Integration with OpenTelemetry for performance monitoring
157
+ - Detailed evaluation traces for debugging
158
+
159
+ ```typescript
160
+ import { attachListeners } from '@mastra/evals';
161
+
162
+ // Enable evaluation tracking
163
+ await attachListeners();
164
+ ```
165
+
166
+ ## Environment Variables
167
+
168
+ Required for LLM-based metrics:
169
+
170
+ - `OPENAI_API_KEY`: For OpenAI model access
171
+ - Additional provider keys as needed (Cohere, Anthropic, etc.)
172
+
173
+ ## Package Exports
174
+
175
+ ```typescript
176
+ // Main package exports
177
+ import { evaluate } from '@mastra/evals';
178
+ // NLP-specific metrics
179
+ import { ContentSimilarityMetric } from '@mastra/evals/nlp';
180
+ ```
181
+
182
+ ## Related Packages
183
+
184
+ - `@mastra/core`: Core framework functionality
185
+ - `@mastra/engine`: LLM execution engine
186
+ - `@mastra/mcp`: Model Context Protocol integration
@@ -391,6 +391,7 @@ var getCurrentTestInfo = /*#__PURE__*/function () {
391
391
  _context.next = 3;
392
392
  break;
393
393
  }
394
+ // @ts-ignore
394
395
  state = expect.getState();
395
396
  return _context.abrupt("return", {
396
397
  testName: state.currentTestName,
@@ -1 +1 @@
1
- {"version":3,"file":"evals.cjs.development.js","sources":["../src/constants.ts","../src/evaluation.ts","../src/attachListeners.ts"],"sourcesContent":["export const GLOBAL_RUN_ID_ENV_KEY = '_MASTRA_GLOBAL_RUN_ID_';\n","import { type Agent, type Metric, evaluate as coreEvaluate } from '@mastra/core';\n\nimport { GLOBAL_RUN_ID_ENV_KEY } from './constants';\n\nexport async function evaluate<T extends Agent>(agent: T, input: Parameters<T['generate']>[0], metric: Metric) {\n const testInfo = await getCurrentTestInfo();\n let globalRunId = process.env[GLOBAL_RUN_ID_ENV_KEY];\n const runId = crypto.randomUUID();\n const agentOutput = await agent.generate(input, {\n runId,\n });\n\n if (!globalRunId) {\n globalRunId = process.env[GLOBAL_RUN_ID_ENV_KEY] = crypto.randomUUID();\n console.warn('Global run id not set, you should run \"globalSetup\" from \"@mastra/evals\" before evaluating.');\n }\n\n const metricResult = await coreEvaluate({\n agentName: agent.name,\n input,\n metric,\n output: agentOutput.text,\n globalRunId,\n runId,\n testInfo,\n });\n\n return metricResult;\n}\n\nexport const getCurrentTestInfo = async () => {\n // Jest\n if (typeof expect !== 'undefined' && expect.getState) {\n const state = expect.getState();\n return {\n testName: state.currentTestName,\n testPath: state.testPath,\n };\n }\n\n try {\n const vitest = await import('vitest');\n if (typeof vitest !== 'undefined' && vitest.expect?.getState) {\n const state = vitest.expect.getState();\n return {\n testName: state.currentTestName,\n testPath: state.testPath,\n };\n }\n } catch {}\n\n return null;\n};\n","import { AvailableHooks, registerHook } from '@mastra/core';\nimport { mkdirSync, appendFile } from 'fs';\nimport { join } from 'path';\n\nimport { GLOBAL_RUN_ID_ENV_KEY } from './constants';\n\nexport async function attachListeners() {\n const dotMastraPath = join(process.cwd(), '.mastra');\n\n try {\n mkdirSync(dotMastraPath);\n } catch (error) {}\n\n registerHook(AvailableHooks.ON_EVALUATION, traceObject => {\n appendFile(join(dotMastraPath, 'evals.json'), JSON.stringify(traceObject) + '\\n', () => {});\n });\n}\n\nexport async function globalSetup() {\n if (process.env[GLOBAL_RUN_ID_ENV_KEY]) {\n throw new Error('Global run id already set, you should only run \"GlobalSetup\" once');\n }\n\n const globalRunId = crypto.randomUUID();\n process.env[GLOBAL_RUN_ID_ENV_KEY] = globalRunId;\n}\n"],"names":["GLOBAL_RUN_ID_ENV_KEY","evaluate","_x","_x2","_x3","_evaluate","apply","arguments","_asyncToGenerator","_regeneratorRuntime","mark","_callee2","agent","input","metric","testInfo","globalRunId","runId","agentOutput","metricResult","wrap","_callee2$","_context2","prev","next","getCurrentTestInfo","sent","process","env","crypto","randomUUID","generate","console","warn","coreEvaluate","agentName","name","output","text","abrupt","stop","_ref","_callee","state","_vitest$expect","vitest","_state","_callee$","_context","expect","getState","testName","currentTestName","testPath","t0","attachListeners","_attachListeners","dotMastraPath","join","cwd","mkdirSync","error","registerHook","AvailableHooks","ON_EVALUATION","traceObject","appendFile","JSON","stringify","globalSetup","_globalSetup","Error"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAO,IAAMA,qBAAqB,GAAG,wBAAwB;;ACI7D,SAAsBC,QAAQA,CAAAC,EAAA,EAAAC,GAAA,EAAAC,GAAA,EAAA;AAAA,EAAA,OAAAC,SAAA,CAAAC,KAAA,CAAA,IAAA,EAAAC,SAAA,CAAA,CAAA;AAAA,CAAA;AAwB7B,SAAAF,SAAA,GAAA;AAAAA,EAAAA,SAAA,GAAAG,iBAAA,cAAAC,mBAAA,EAAAC,CAAAA,IAAA,CAxBM,SAAAC,QAAyCC,CAAAA,KAAQ,EAAEC,KAAmC,EAAEC,MAAc,EAAA;IAAA,IAAAC,QAAA,EAAAC,WAAA,EAAAC,KAAA,EAAAC,WAAA,EAAAC,YAAA,CAAA;AAAA,IAAA,OAAAV,mBAAA,EAAA,CAAAW,IAAA,CAAA,SAAAC,UAAAC,SAAA,EAAA;AAAA,MAAA,OAAA,CAAA,EAAA,QAAAA,SAAA,CAAAC,IAAA,GAAAD,SAAA,CAAAE,IAAA;AAAA,QAAA,KAAA,CAAA;AAAAF,UAAAA,SAAA,CAAAE,IAAA,GAAA,CAAA,CAAA;UAAA,OACpFC,kBAAkB,EAAE,CAAA;AAAA,QAAA,KAAA,CAAA;UAArCV,QAAQ,GAAAO,SAAA,CAAAI,IAAA,CAAA;AACVV,UAAAA,WAAW,GAAGW,OAAO,CAACC,GAAG,CAAC5B,qBAAqB,CAAC,CAAA;AAC9CiB,UAAAA,KAAK,GAAGY,MAAM,CAACC,UAAU,EAAE,CAAA;AAAAR,UAAAA,SAAA,CAAAE,IAAA,GAAA,CAAA,CAAA;AAAA,UAAA,OACPZ,KAAK,CAACmB,QAAQ,CAAClB,KAAK,EAAE;AAC9CI,YAAAA,KAAK,EAALA,KAAAA;AACD,WAAA,CAAC,CAAA;AAAA,QAAA,KAAA,CAAA;UAFIC,WAAW,GAAAI,SAAA,CAAAI,IAAA,CAAA;UAIjB,IAAI,CAACV,WAAW,EAAE;AAChBA,YAAAA,WAAW,GAAGW,OAAO,CAACC,GAAG,CAAC5B,qBAAqB,CAAC,GAAG6B,MAAM,CAACC,UAAU,EAAE,CAAA;AACtEE,YAAAA,OAAO,CAACC,IAAI,CAAC,6FAA6F,CAAC,CAAA;AAC7G,WAAA;AAACX,UAAAA,SAAA,CAAAE,IAAA,GAAA,EAAA,CAAA;AAAA,UAAA,OAE0BU,aAAY,CAAC;YACtCC,SAAS,EAAEvB,KAAK,CAACwB,IAAI;AACrBvB,YAAAA,KAAK,EAALA,KAAK;AACLC,YAAAA,MAAM,EAANA,MAAM;YACNuB,MAAM,EAAEnB,WAAW,CAACoB,IAAI;AACxBtB,YAAAA,WAAW,EAAXA,WAAW;AACXC,YAAAA,KAAK,EAALA,KAAK;AACLF,YAAAA,QAAQ,EAARA,QAAAA;AACD,WAAA,CAAC,CAAA;AAAA,QAAA,KAAA,EAAA;UARII,YAAY,GAAAG,SAAA,CAAAI,IAAA,CAAA;AAAA,UAAA,OAAAJ,SAAA,CAAAiB,MAAA,CAAA,QAAA,EAUXpB,YAAY,CAAA,CAAA;AAAA,QAAA,KAAA,EAAA,CAAA;AAAA,QAAA,KAAA,KAAA;UAAA,OAAAG,SAAA,CAAAkB,IAAA,EAAA,CAAA;AAAA,OAAA;AAAA,KAAA,EAAA7B,QAAA,CAAA,CAAA;GACpB,CAAA,CAAA,CAAA;AAAA,EAAA,OAAAN,SAAA,CAAAC,KAAA,CAAA,IAAA,EAAAC,SAAA,CAAA,CAAA;AAAA,CAAA;AAEM,IAAMkB,kBAAkB,gBAAA,YAAA;EAAA,IAAAgB,IAAA,gBAAAjC,iBAAA,cAAAC,mBAAA,EAAAC,CAAAA,IAAA,CAAG,SAAAgC,OAAA,GAAA;AAAA,IAAA,IAAAC,KAAA,EAAAC,cAAA,EAAAC,MAAA,EAAAC,MAAA,CAAA;AAAA,IAAA,OAAArC,mBAAA,EAAA,CAAAW,IAAA,CAAA,SAAA2B,SAAAC,QAAA,EAAA;AAAA,MAAA,OAAA,CAAA,EAAA,QAAAA,QAAA,CAAAzB,IAAA,GAAAyB,QAAA,CAAAxB,IAAA;AAAA,QAAA,KAAA,CAAA;AAAA,UAAA,IAAA,EAE5B,OAAOyB,MAAM,KAAK,WAAW,IAAIA,MAAM,CAACC,QAAQ,CAAA,EAAA;AAAAF,YAAAA,QAAA,CAAAxB,IAAA,GAAA,CAAA,CAAA;AAAA,YAAA,MAAA;AAAA,WAAA;AAC5CmB,UAAAA,KAAK,GAAGM,MAAM,CAACC,QAAQ,EAAE,CAAA;UAAA,OAAAF,QAAA,CAAAT,MAAA,CACxB,QAAA,EAAA;YACLY,QAAQ,EAAER,KAAK,CAACS,eAAe;YAC/BC,QAAQ,EAAEV,KAAK,CAACU,QAAAA;WACjB,CAAA,CAAA;AAAA,QAAA,KAAA,CAAA;AAAAL,UAAAA,QAAA,CAAAzB,IAAA,GAAA,CAAA,CAAA;AAAAyB,UAAAA,QAAA,CAAAxB,IAAA,GAAA,CAAA,CAAA;UAAA,OAIoB,OAAO,QAAQ,CAAC,CAAA;AAAA,QAAA,KAAA,CAAA;UAA/BqB,MAAM,GAAAG,QAAA,CAAAtB,IAAA,CAAA;AAAA,UAAA,IAAA,EACR,OAAOmB,MAAM,KAAK,WAAW,IAAAD,CAAAA,cAAA,GAAIC,MAAM,CAACI,MAAM,KAAbL,IAAAA,IAAAA,cAAA,CAAeM,QAAQ,CAAA,EAAA;AAAAF,YAAAA,QAAA,CAAAxB,IAAA,GAAA,EAAA,CAAA;AAAA,YAAA,MAAA;AAAA,WAAA;AACpDmB,UAAAA,MAAK,GAAGE,MAAM,CAACI,MAAM,CAACC,QAAQ,EAAE,CAAA;UAAA,OAAAF,QAAA,CAAAT,MAAA,CAC/B,QAAA,EAAA;YACLY,QAAQ,EAAER,MAAK,CAACS,eAAe;YAC/BC,QAAQ,EAAEV,MAAK,CAACU,QAAAA;WACjB,CAAA,CAAA;AAAA,QAAA,KAAA,EAAA;AAAAL,UAAAA,QAAA,CAAAxB,IAAA,GAAA,EAAA,CAAA;AAAA,UAAA,MAAA;AAAA,QAAA,KAAA,EAAA;AAAAwB,UAAAA,QAAA,CAAAzB,IAAA,GAAA,EAAA,CAAA;UAAAyB,QAAA,CAAAM,EAAA,GAAAN,QAAA,CAAA,OAAA,CAAA,CAAA,CAAA,CAAA,CAAA;AAAA,QAAA,KAAA,EAAA;AAAA,UAAA,OAAAA,QAAA,CAAAT,MAAA,CAAA,QAAA,EAIE,IAAI,CAAA,CAAA;AAAA,QAAA,KAAA,EAAA,CAAA;AAAA,QAAA,KAAA,KAAA;UAAA,OAAAS,QAAA,CAAAR,IAAA,EAAA,CAAA;AAAA,OAAA;AAAA,KAAA,EAAAE,OAAA,EAAA,IAAA,EAAA,CAAA,CAAA,CAAA,EAAA,EAAA,CAAA,CAAA,CAAA,CAAA;GACZ,CAAA,CAAA,CAAA;AAAA,EAAA,OAAA,SAtBYjB,kBAAkBA,GAAA;AAAA,IAAA,OAAAgB,IAAA,CAAAnC,KAAA,CAAA,IAAA,EAAAC,SAAA,CAAA,CAAA;AAAA,GAAA,CAAA;AAAA,CAsB9B,EAAA;;AC9CD,SAAsBgD,eAAeA,GAAA;AAAA,EAAA,OAAAC,gBAAA,CAAAlD,KAAA,CAAA,IAAA,EAAAC,SAAA,CAAA,CAAA;AAAA,CAAA;AAUpC,SAAAiD,gBAAA,GAAA;EAAAA,gBAAA,GAAAhD,iBAAA,cAAAC,mBAAA,GAAAC,IAAA,CAVM,SAAAgC,OAAA,GAAA;AAAA,IAAA,IAAAe,aAAA,CAAA;AAAA,IAAA,OAAAhD,mBAAA,EAAA,CAAAW,IAAA,CAAA,SAAA2B,SAAAC,QAAA,EAAA;AAAA,MAAA,OAAA,CAAA,EAAA,QAAAA,QAAA,CAAAzB,IAAA,GAAAyB,QAAA,CAAAxB,IAAA;AAAA,QAAA,KAAA,CAAA;UACCiC,aAAa,GAAGC,SAAI,CAAC/B,OAAO,CAACgC,GAAG,EAAE,EAAE,SAAS,CAAC,CAAA;UAEpD,IAAI;YACFC,YAAS,CAACH,aAAa,CAAC,CAAA;AAC1B,WAAC,CAAC,OAAOI,KAAK,EAAE,EAAC;AAEjBC,UAAAA,iBAAY,CAACC,mBAAc,CAACC,aAAa,EAAE,UAAAC,WAAW,EAAG;YACvDC,aAAU,CAACR,SAAI,CAACD,aAAa,EAAE,YAAY,CAAC,EAAEU,IAAI,CAACC,SAAS,CAACH,WAAW,CAAC,GAAG,IAAI,EAAE,YAAK,EAAG,CAAC,CAAA;AAC7F,WAAC,CAAC,CAAA;AAAC,QAAA,KAAA,CAAA,CAAA;AAAA,QAAA,KAAA,KAAA;UAAA,OAAAjB,QAAA,CAAAR,IAAA,EAAA,CAAA;AAAA,OAAA;AAAA,KAAA,EAAAE,OAAA,CAAA,CAAA;GACJ,CAAA,CAAA,CAAA;AAAA,EAAA,OAAAc,gBAAA,CAAAlD,KAAA,CAAA,IAAA,EAAAC,SAAA,CAAA,CAAA;AAAA,CAAA;AAED,SAAsB8D,WAAWA,GAAA;AAAA,EAAA,OAAAC,YAAA,CAAAhE,KAAA,CAAA,IAAA,EAAAC,SAAA,CAAA,CAAA;AAAA,CAAA;AAOhC,SAAA+D,YAAA,GAAA;EAAAA,YAAA,GAAA9D,iBAAA,cAAAC,mBAAA,GAAAC,IAAA,CAPM,SAAAC,QAAA,GAAA;AAAA,IAAA,IAAAK,WAAA,CAAA;AAAA,IAAA,OAAAP,mBAAA,EAAA,CAAAW,IAAA,CAAA,SAAAC,UAAAC,SAAA,EAAA;AAAA,MAAA,OAAA,CAAA,EAAA,QAAAA,SAAA,CAAAC,IAAA,GAAAD,SAAA,CAAAE,IAAA;AAAA,QAAA,KAAA,CAAA;AAAA,UAAA,IAAA,CACDG,OAAO,CAACC,GAAG,CAAC5B,qBAAqB,CAAC,EAAA;AAAAsB,YAAAA,SAAA,CAAAE,IAAA,GAAA,CAAA,CAAA;AAAA,YAAA,MAAA;AAAA,WAAA;AAAA,UAAA,MAC9B,IAAI+C,KAAK,CAAC,mEAAmE,CAAC,CAAA;AAAA,QAAA,KAAA,CAAA;AAGhFvD,UAAAA,WAAW,GAAGa,MAAM,CAACC,UAAU,EAAE,CAAA;AACvCH,UAAAA,OAAO,CAACC,GAAG,CAAC5B,qBAAqB,CAAC,GAAGgB,WAAW,CAAA;AAAC,QAAA,KAAA,CAAA,CAAA;AAAA,QAAA,KAAA,KAAA;UAAA,OAAAM,SAAA,CAAAkB,IAAA,EAAA,CAAA;AAAA,OAAA;AAAA,KAAA,EAAA7B,QAAA,CAAA,CAAA;GAClD,CAAA,CAAA,CAAA;AAAA,EAAA,OAAA2D,YAAA,CAAAhE,KAAA,CAAA,IAAA,EAAAC,SAAA,CAAA,CAAA;AAAA;;;;;;"}
1
+ {"version":3,"file":"evals.cjs.development.js","sources":["../src/constants.ts","../src/evaluation.ts","../src/attachListeners.ts"],"sourcesContent":["export const GLOBAL_RUN_ID_ENV_KEY = '_MASTRA_GLOBAL_RUN_ID_';\n","import { type Agent, type Metric, evaluate as coreEvaluate } from '@mastra/core';\n\nimport { GLOBAL_RUN_ID_ENV_KEY } from './constants';\n\nexport async function evaluate<T extends Agent>(agent: T, input: Parameters<T['generate']>[0], metric: Metric) {\n const testInfo = await getCurrentTestInfo();\n let globalRunId = process.env[GLOBAL_RUN_ID_ENV_KEY];\n const runId = crypto.randomUUID();\n const agentOutput = await agent.generate(input, {\n runId,\n });\n\n if (!globalRunId) {\n globalRunId = process.env[GLOBAL_RUN_ID_ENV_KEY] = crypto.randomUUID();\n console.warn('Global run id not set, you should run \"globalSetup\" from \"@mastra/evals\" before evaluating.');\n }\n\n const metricResult = await coreEvaluate({\n agentName: agent.name,\n input,\n metric,\n output: agentOutput.text,\n globalRunId,\n runId,\n testInfo,\n });\n\n return metricResult;\n}\n\nexport const getCurrentTestInfo = async () => {\n // Jest\n // @ts-ignore\n if (typeof expect !== 'undefined' && expect.getState) {\n // @ts-ignore\n const state = expect.getState();\n return {\n testName: state.currentTestName,\n testPath: state.testPath,\n };\n }\n\n try {\n const vitest = await import('vitest');\n if (typeof vitest !== 'undefined' && vitest.expect?.getState) {\n const state = vitest.expect.getState();\n return {\n testName: state.currentTestName,\n testPath: state.testPath,\n };\n }\n } catch {}\n\n return null;\n};\n","import { AvailableHooks, registerHook } from '@mastra/core';\nimport { mkdirSync, appendFile } from 'fs';\nimport { join } from 'path';\n\nimport { GLOBAL_RUN_ID_ENV_KEY } from './constants';\n\nexport async function attachListeners() {\n const dotMastraPath = join(process.cwd(), '.mastra');\n\n try {\n mkdirSync(dotMastraPath);\n } catch (error) {}\n\n registerHook(AvailableHooks.ON_EVALUATION, traceObject => {\n appendFile(join(dotMastraPath, 'evals.json'), JSON.stringify(traceObject) + '\\n', () => {});\n });\n}\n\nexport async function globalSetup() {\n if (process.env[GLOBAL_RUN_ID_ENV_KEY]) {\n throw new Error('Global run id already set, you should only run \"GlobalSetup\" once');\n }\n\n const globalRunId = crypto.randomUUID();\n process.env[GLOBAL_RUN_ID_ENV_KEY] = globalRunId;\n}\n"],"names":["GLOBAL_RUN_ID_ENV_KEY","evaluate","_x","_x2","_x3","_evaluate","apply","arguments","_asyncToGenerator","_regeneratorRuntime","mark","_callee2","agent","input","metric","testInfo","globalRunId","runId","agentOutput","metricResult","wrap","_callee2$","_context2","prev","next","getCurrentTestInfo","sent","process","env","crypto","randomUUID","generate","console","warn","coreEvaluate","agentName","name","output","text","abrupt","stop","_ref","_callee","state","_vitest$expect","vitest","_state","_callee$","_context","expect","getState","testName","currentTestName","testPath","t0","attachListeners","_attachListeners","dotMastraPath","join","cwd","mkdirSync","error","registerHook","AvailableHooks","ON_EVALUATION","traceObject","appendFile","JSON","stringify","globalSetup","_globalSetup","Error"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAO,IAAMA,qBAAqB,GAAG,wBAAwB;;ACI7D,SAAsBC,QAAQA,CAAAC,EAAA,EAAAC,GAAA,EAAAC,GAAA,EAAA;AAAA,EAAA,OAAAC,SAAA,CAAAC,KAAA,CAAA,IAAA,EAAAC,SAAA,CAAA,CAAA;AAAA,CAAA;AAwB7B,SAAAF,SAAA,GAAA;AAAAA,EAAAA,SAAA,GAAAG,iBAAA,cAAAC,mBAAA,EAAAC,CAAAA,IAAA,CAxBM,SAAAC,QAAyCC,CAAAA,KAAQ,EAAEC,KAAmC,EAAEC,MAAc,EAAA;IAAA,IAAAC,QAAA,EAAAC,WAAA,EAAAC,KAAA,EAAAC,WAAA,EAAAC,YAAA,CAAA;AAAA,IAAA,OAAAV,mBAAA,EAAA,CAAAW,IAAA,CAAA,SAAAC,UAAAC,SAAA,EAAA;AAAA,MAAA,OAAA,CAAA,EAAA,QAAAA,SAAA,CAAAC,IAAA,GAAAD,SAAA,CAAAE,IAAA;AAAA,QAAA,KAAA,CAAA;AAAAF,UAAAA,SAAA,CAAAE,IAAA,GAAA,CAAA,CAAA;UAAA,OACpFC,kBAAkB,EAAE,CAAA;AAAA,QAAA,KAAA,CAAA;UAArCV,QAAQ,GAAAO,SAAA,CAAAI,IAAA,CAAA;AACVV,UAAAA,WAAW,GAAGW,OAAO,CAACC,GAAG,CAAC5B,qBAAqB,CAAC,CAAA;AAC9CiB,UAAAA,KAAK,GAAGY,MAAM,CAACC,UAAU,EAAE,CAAA;AAAAR,UAAAA,SAAA,CAAAE,IAAA,GAAA,CAAA,CAAA;AAAA,UAAA,OACPZ,KAAK,CAACmB,QAAQ,CAAClB,KAAK,EAAE;AAC9CI,YAAAA,KAAK,EAALA,KAAAA;AACD,WAAA,CAAC,CAAA;AAAA,QAAA,KAAA,CAAA;UAFIC,WAAW,GAAAI,SAAA,CAAAI,IAAA,CAAA;UAIjB,IAAI,CAACV,WAAW,EAAE;AAChBA,YAAAA,WAAW,GAAGW,OAAO,CAACC,GAAG,CAAC5B,qBAAqB,CAAC,GAAG6B,MAAM,CAACC,UAAU,EAAE,CAAA;AACtEE,YAAAA,OAAO,CAACC,IAAI,CAAC,6FAA6F,CAAC,CAAA;AAC7G,WAAA;AAACX,UAAAA,SAAA,CAAAE,IAAA,GAAA,EAAA,CAAA;AAAA,UAAA,OAE0BU,aAAY,CAAC;YACtCC,SAAS,EAAEvB,KAAK,CAACwB,IAAI;AACrBvB,YAAAA,KAAK,EAALA,KAAK;AACLC,YAAAA,MAAM,EAANA,MAAM;YACNuB,MAAM,EAAEnB,WAAW,CAACoB,IAAI;AACxBtB,YAAAA,WAAW,EAAXA,WAAW;AACXC,YAAAA,KAAK,EAALA,KAAK;AACLF,YAAAA,QAAQ,EAARA,QAAAA;AACD,WAAA,CAAC,CAAA;AAAA,QAAA,KAAA,EAAA;UARII,YAAY,GAAAG,SAAA,CAAAI,IAAA,CAAA;AAAA,UAAA,OAAAJ,SAAA,CAAAiB,MAAA,CAAA,QAAA,EAUXpB,YAAY,CAAA,CAAA;AAAA,QAAA,KAAA,EAAA,CAAA;AAAA,QAAA,KAAA,KAAA;UAAA,OAAAG,SAAA,CAAAkB,IAAA,EAAA,CAAA;AAAA,OAAA;AAAA,KAAA,EAAA7B,QAAA,CAAA,CAAA;GACpB,CAAA,CAAA,CAAA;AAAA,EAAA,OAAAN,SAAA,CAAAC,KAAA,CAAA,IAAA,EAAAC,SAAA,CAAA,CAAA;AAAA,CAAA;AAEM,IAAMkB,kBAAkB,gBAAA,YAAA;EAAA,IAAAgB,IAAA,gBAAAjC,iBAAA,cAAAC,mBAAA,EAAAC,CAAAA,IAAA,CAAG,SAAAgC,OAAA,GAAA;AAAA,IAAA,IAAAC,KAAA,EAAAC,cAAA,EAAAC,MAAA,EAAAC,MAAA,CAAA;AAAA,IAAA,OAAArC,mBAAA,EAAA,CAAAW,IAAA,CAAA,SAAA2B,SAAAC,QAAA,EAAA;AAAA,MAAA,OAAA,CAAA,EAAA,QAAAA,QAAA,CAAAzB,IAAA,GAAAyB,QAAA,CAAAxB,IAAA;AAAA,QAAA,KAAA,CAAA;AAAA,UAAA,IAAA,EAG5B,OAAOyB,MAAM,KAAK,WAAW,IAAIA,MAAM,CAACC,QAAQ,CAAA,EAAA;AAAAF,YAAAA,QAAA,CAAAxB,IAAA,GAAA,CAAA,CAAA;AAAA,YAAA,MAAA;AAAA,WAAA;AAClD;AACMmB,UAAAA,KAAK,GAAGM,MAAM,CAACC,QAAQ,EAAE,CAAA;UAAA,OAAAF,QAAA,CAAAT,MAAA,CACxB,QAAA,EAAA;YACLY,QAAQ,EAAER,KAAK,CAACS,eAAe;YAC/BC,QAAQ,EAAEV,KAAK,CAACU,QAAAA;WACjB,CAAA,CAAA;AAAA,QAAA,KAAA,CAAA;AAAAL,UAAAA,QAAA,CAAAzB,IAAA,GAAA,CAAA,CAAA;AAAAyB,UAAAA,QAAA,CAAAxB,IAAA,GAAA,CAAA,CAAA;UAAA,OAIoB,OAAO,QAAQ,CAAC,CAAA;AAAA,QAAA,KAAA,CAAA;UAA/BqB,MAAM,GAAAG,QAAA,CAAAtB,IAAA,CAAA;AAAA,UAAA,IAAA,EACR,OAAOmB,MAAM,KAAK,WAAW,IAAAD,CAAAA,cAAA,GAAIC,MAAM,CAACI,MAAM,KAAbL,IAAAA,IAAAA,cAAA,CAAeM,QAAQ,CAAA,EAAA;AAAAF,YAAAA,QAAA,CAAAxB,IAAA,GAAA,EAAA,CAAA;AAAA,YAAA,MAAA;AAAA,WAAA;AACpDmB,UAAAA,MAAK,GAAGE,MAAM,CAACI,MAAM,CAACC,QAAQ,EAAE,CAAA;UAAA,OAAAF,QAAA,CAAAT,MAAA,CAC/B,QAAA,EAAA;YACLY,QAAQ,EAAER,MAAK,CAACS,eAAe;YAC/BC,QAAQ,EAAEV,MAAK,CAACU,QAAAA;WACjB,CAAA,CAAA;AAAA,QAAA,KAAA,EAAA;AAAAL,UAAAA,QAAA,CAAAxB,IAAA,GAAA,EAAA,CAAA;AAAA,UAAA,MAAA;AAAA,QAAA,KAAA,EAAA;AAAAwB,UAAAA,QAAA,CAAAzB,IAAA,GAAA,EAAA,CAAA;UAAAyB,QAAA,CAAAM,EAAA,GAAAN,QAAA,CAAA,OAAA,CAAA,CAAA,CAAA,CAAA,CAAA;AAAA,QAAA,KAAA,EAAA;AAAA,UAAA,OAAAA,QAAA,CAAAT,MAAA,CAAA,QAAA,EAIE,IAAI,CAAA,CAAA;AAAA,QAAA,KAAA,EAAA,CAAA;AAAA,QAAA,KAAA,KAAA;UAAA,OAAAS,QAAA,CAAAR,IAAA,EAAA,CAAA;AAAA,OAAA;AAAA,KAAA,EAAAE,OAAA,EAAA,IAAA,EAAA,CAAA,CAAA,CAAA,EAAA,EAAA,CAAA,CAAA,CAAA,CAAA;GACZ,CAAA,CAAA,CAAA;AAAA,EAAA,OAAA,SAxBYjB,kBAAkBA,GAAA;AAAA,IAAA,OAAAgB,IAAA,CAAAnC,KAAA,CAAA,IAAA,EAAAC,SAAA,CAAA,CAAA;AAAA,GAAA,CAAA;AAAA,CAwB9B,EAAA;;AChDD,SAAsBgD,eAAeA,GAAA;AAAA,EAAA,OAAAC,gBAAA,CAAAlD,KAAA,CAAA,IAAA,EAAAC,SAAA,CAAA,CAAA;AAAA,CAAA;AAUpC,SAAAiD,gBAAA,GAAA;EAAAA,gBAAA,GAAAhD,iBAAA,cAAAC,mBAAA,GAAAC,IAAA,CAVM,SAAAgC,OAAA,GAAA;AAAA,IAAA,IAAAe,aAAA,CAAA;AAAA,IAAA,OAAAhD,mBAAA,EAAA,CAAAW,IAAA,CAAA,SAAA2B,SAAAC,QAAA,EAAA;AAAA,MAAA,OAAA,CAAA,EAAA,QAAAA,QAAA,CAAAzB,IAAA,GAAAyB,QAAA,CAAAxB,IAAA;AAAA,QAAA,KAAA,CAAA;UACCiC,aAAa,GAAGC,SAAI,CAAC/B,OAAO,CAACgC,GAAG,EAAE,EAAE,SAAS,CAAC,CAAA;UAEpD,IAAI;YACFC,YAAS,CAACH,aAAa,CAAC,CAAA;AAC1B,WAAC,CAAC,OAAOI,KAAK,EAAE,EAAC;AAEjBC,UAAAA,iBAAY,CAACC,mBAAc,CAACC,aAAa,EAAE,UAAAC,WAAW,EAAG;YACvDC,aAAU,CAACR,SAAI,CAACD,aAAa,EAAE,YAAY,CAAC,EAAEU,IAAI,CAACC,SAAS,CAACH,WAAW,CAAC,GAAG,IAAI,EAAE,YAAK,EAAG,CAAC,CAAA;AAC7F,WAAC,CAAC,CAAA;AAAC,QAAA,KAAA,CAAA,CAAA;AAAA,QAAA,KAAA,KAAA;UAAA,OAAAjB,QAAA,CAAAR,IAAA,EAAA,CAAA;AAAA,OAAA;AAAA,KAAA,EAAAE,OAAA,CAAA,CAAA;GACJ,CAAA,CAAA,CAAA;AAAA,EAAA,OAAAc,gBAAA,CAAAlD,KAAA,CAAA,IAAA,EAAAC,SAAA,CAAA,CAAA;AAAA,CAAA;AAED,SAAsB8D,WAAWA,GAAA;AAAA,EAAA,OAAAC,YAAA,CAAAhE,KAAA,CAAA,IAAA,EAAAC,SAAA,CAAA,CAAA;AAAA,CAAA;AAOhC,SAAA+D,YAAA,GAAA;EAAAA,YAAA,GAAA9D,iBAAA,cAAAC,mBAAA,GAAAC,IAAA,CAPM,SAAAC,QAAA,GAAA;AAAA,IAAA,IAAAK,WAAA,CAAA;AAAA,IAAA,OAAAP,mBAAA,EAAA,CAAAW,IAAA,CAAA,SAAAC,UAAAC,SAAA,EAAA;AAAA,MAAA,OAAA,CAAA,EAAA,QAAAA,SAAA,CAAAC,IAAA,GAAAD,SAAA,CAAAE,IAAA;AAAA,QAAA,KAAA,CAAA;AAAA,UAAA,IAAA,CACDG,OAAO,CAACC,GAAG,CAAC5B,qBAAqB,CAAC,EAAA;AAAAsB,YAAAA,SAAA,CAAAE,IAAA,GAAA,CAAA,CAAA;AAAA,YAAA,MAAA;AAAA,WAAA;AAAA,UAAA,MAC9B,IAAI+C,KAAK,CAAC,mEAAmE,CAAC,CAAA;AAAA,QAAA,KAAA,CAAA;AAGhFvD,UAAAA,WAAW,GAAGa,MAAM,CAACC,UAAU,EAAE,CAAA;AACvCH,UAAAA,OAAO,CAACC,GAAG,CAAC5B,qBAAqB,CAAC,GAAGgB,WAAW,CAAA;AAAC,QAAA,KAAA,CAAA,CAAA;AAAA,QAAA,KAAA,KAAA;UAAA,OAAAM,SAAA,CAAAkB,IAAA,EAAA,CAAA;AAAA,OAAA;AAAA,KAAA,EAAA7B,QAAA,CAAA,CAAA;GAClD,CAAA,CAAA,CAAA;AAAA,EAAA,OAAA2D,YAAA,CAAAhE,KAAA,CAAA,IAAA,EAAAC,SAAA,CAAA,CAAA;AAAA;;;;;;"}
@@ -1 +1 @@
1
- {"version":3,"file":"evals.cjs.production.min.js","sources":["../src/constants.ts","../src/evaluation.ts","../src/attachListeners.ts"],"sourcesContent":["export const GLOBAL_RUN_ID_ENV_KEY = '_MASTRA_GLOBAL_RUN_ID_';\n","import { type Agent, type Metric, evaluate as coreEvaluate } from '@mastra/core';\n\nimport { GLOBAL_RUN_ID_ENV_KEY } from './constants';\n\nexport async function evaluate<T extends Agent>(agent: T, input: Parameters<T['generate']>[0], metric: Metric) {\n const testInfo = await getCurrentTestInfo();\n let globalRunId = process.env[GLOBAL_RUN_ID_ENV_KEY];\n const runId = crypto.randomUUID();\n const agentOutput = await agent.generate(input, {\n runId,\n });\n\n if (!globalRunId) {\n globalRunId = process.env[GLOBAL_RUN_ID_ENV_KEY] = crypto.randomUUID();\n console.warn('Global run id not set, you should run \"globalSetup\" from \"@mastra/evals\" before evaluating.');\n }\n\n const metricResult = await coreEvaluate({\n agentName: agent.name,\n input,\n metric,\n output: agentOutput.text,\n globalRunId,\n runId,\n testInfo,\n });\n\n return metricResult;\n}\n\nexport const getCurrentTestInfo = async () => {\n // Jest\n if (typeof expect !== 'undefined' && expect.getState) {\n const state = expect.getState();\n return {\n testName: state.currentTestName,\n testPath: state.testPath,\n };\n }\n\n try {\n const vitest = await import('vitest');\n if (typeof vitest !== 'undefined' && vitest.expect?.getState) {\n const state = vitest.expect.getState();\n return {\n testName: state.currentTestName,\n testPath: state.testPath,\n };\n }\n } catch {}\n\n return null;\n};\n","import { AvailableHooks, registerHook } from '@mastra/core';\nimport { mkdirSync, appendFile } from 'fs';\nimport { join } from 'path';\n\nimport { GLOBAL_RUN_ID_ENV_KEY } from './constants';\n\nexport async function attachListeners() {\n const dotMastraPath = join(process.cwd(), '.mastra');\n\n try {\n mkdirSync(dotMastraPath);\n } catch (error) {}\n\n registerHook(AvailableHooks.ON_EVALUATION, traceObject => {\n appendFile(join(dotMastraPath, 'evals.json'), JSON.stringify(traceObject) + '\\n', () => {});\n });\n}\n\nexport async function globalSetup() {\n if (process.env[GLOBAL_RUN_ID_ENV_KEY]) {\n throw new Error('Global run id already set, you should only run \"GlobalSetup\" once');\n }\n\n const globalRunId = crypto.randomUUID();\n process.env[GLOBAL_RUN_ID_ENV_KEY] = globalRunId;\n}\n"],"names":["GLOBAL_RUN_ID_ENV_KEY","_evaluate","_asyncToGenerator","_regeneratorRuntime","mark","_callee2","agent","input","metric","testInfo","globalRunId","runId","agentOutput","wrap","_context2","prev","next","getCurrentTestInfo","sent","process","env","crypto","randomUUID","generate","console","warn","coreEvaluate","agentName","name","output","text","abrupt","stop","apply","this","arguments","_ref","_callee","state","_vitest$expect","vitest","_state","_context","expect","getState","testName","currentTestName","testPath","import","t0","_attachListeners","dotMastraPath","join","cwd","mkdirSync","error","registerHook","AvailableHooks","ON_EVALUATION","traceObject","appendFile","JSON","stringify","_globalSetup","Error","_x","_x2","_x3"],"mappings":"oyNAAO,IAAMA,EAAwB,yBC4BpC,SAAAC,IAAA,OAAAA,EAAAC,EAAAC,IAAAC,MAxBM,SAAAC,EAAyCC,EAAUC,EAAqCC,GAAc,IAAAC,EAAAC,EAAAC,EAAAC,EAAA,OAAAT,IAAAU,MAAA,SAAAC,GAAA,cAAAA,EAAAC,KAAAD,EAAAE,MAAA,KAAA,EAAA,OAAAF,EAAAE,KAAA,EACpFC,IAAoB,KAAA,EAEV,OAF3BR,EAAQK,EAAAI,KACVR,EAAcS,QAAQC,IAAIpB,GACxBW,EAAQU,OAAOC,aAAYR,EAAAE,KAAA,EACPV,EAAMiB,SAAShB,EAAO,CAC9CI,MAAAA,IACA,KAAA,EAKD,OAPKC,EAAWE,EAAAI,KAIZR,IACHA,EAAcS,QAAQC,IAAIpB,GAAyBqB,OAAOC,aAC1DE,QAAQC,KAAK,gGACdX,EAAAE,KAAA,GAE0BU,WAAa,CACtCC,UAAWrB,EAAMsB,KACjBrB,MAAAA,EACAC,OAAAA,EACAqB,OAAQjB,EAAYkB,KACpBpB,YAAAA,EACAC,MAAAA,EACAF,SAAAA,IACA,KAAA,GARgB,OAAAK,EAAAiB,OAAA,SAAAjB,EAAAI,MAUC,KAAA,GAAA,IAAA,MAAA,OAAAJ,EAAAkB,OAAA,GAAA3B,EACpB,MAAA4B,MAAAC,KAAAC,UAAA,CAEM,IAAMlB,EAAkB,WAAA,IAAAmB,EAAAlC,EAAAC,IAAAC,MAAG,SAAAiC,IAAA,IAAAC,EAAAC,EAAAC,EAAAC,EAAA,OAAAtC,IAAAU,MAAA,SAAA6B,GAAA,cAAAA,EAAA3B,KAAA2B,EAAA1B,MAAA,KAAA,EAAA,GAEV,oBAAX2B,SAA0BA,OAAOC,SAAQ,CAAAF,EAAA1B,KAAA,EAAA,KAAA,CACnB,OAAzBsB,EAAQK,OAAOC,WAAUF,EAAAX,OACxB,SAAA,CACLc,SAAUP,EAAMQ,gBAChBC,SAAUT,EAAMS,WACjB,KAAA,EAAA,OAAAL,EAAA3B,KAAA,EAAA2B,EAAA1B,KAAA,EAIoBgC,OAAO,UAAS,KAAA,EAAzB,QACU,KADhBR,EAAME,EAAAxB,OACyBqB,OAAJA,EAAIC,EAAOG,UAAPJ,EAAeK,SAAQ,CAAAF,EAAA1B,KAAA,GAAA,KAAA,CACpB,OAAhCsB,EAAQE,EAAOG,OAAOC,WAAUF,EAAAX,OAC/B,SAAA,CACLc,SAAUP,EAAMQ,gBAChBC,SAAUT,EAAMS,WACjB,KAAA,GAAAL,EAAA1B,KAAA,GAAA,MAAA,KAAA,GAAA0B,EAAA3B,KAAA,GAAA2B,EAAAO,GAAAP,EAAA,MAAA,GAAA,KAAA,GAAA,OAAAA,EAAAX,OAAA,SAIE,MAAI,KAAA,GAAA,IAAA,MAAA,OAAAW,EAAAV,OAAA,GAAAK,EAAA,KAAA,CAAA,CAAA,EAAA,KACZ,KAAA,OAAA,WAtB8B,OAAAD,EAAAH,MAAAC,KAAAC,UAAA,CAAA,CAAA,GCd9B,SAAAe,IAAA,OAAAA,EAAAhD,EAAAC,IAAAC,MAVM,SAAAiC,IAAA,IAAAc,EAAA,OAAAhD,IAAAU,MAAA,SAAA6B,GAAA,cAAAA,EAAA3B,KAAA2B,EAAA1B,MAAA,KAAA,EACCmC,EAAgBC,EAAAA,KAAKjC,QAAQkC,MAAO,WAE1C,IACEC,EAASA,UAACH,EACZ,CAAE,MAAOI,GAAQ,CAEjBC,EAAAA,aAAaC,EAAcA,eAACC,eAAe,SAAAC,GACzCC,EAAAA,WAAWR,EAAIA,KAACD,EAAe,cAAeU,KAAKC,UAAUH,GAAe,MAAM,WAAQ,GAC5F,IAAG,KAAA,EAAA,IAAA,MAAA,OAAAjB,EAAAV,OAAA,GAAAK,EACJ,MAAAJ,MAAAC,KAAAC,UAAA,CASA,SAAA4B,IAAA,OAAAA,EAAA7D,EAAAC,IAAAC,MAPM,SAAAC,IAAA,IAAAK,EAAA,OAAAP,IAAAU,MAAA,SAAAC,GAAA,cAAAA,EAAAC,KAAAD,EAAAE,MAAA,KAAA,EAAA,IACDG,QAAQC,IAAIpB,GAAsB,CAAAc,EAAAE,KAAA,EAAA,KAAA,CAAA,MAC9B,IAAIgD,MAAM,qEAAoE,KAAA,EAGhFtD,EAAcW,OAAOC,aAC3BH,QAAQC,IAAIpB,GAAyBU,EAAY,KAAA,EAAA,IAAA,MAAA,OAAAI,EAAAkB,OAAA,GAAA3B,EAClD,MAAA4B,MAAAC,KAAAC,UAAA,yBAnBD,WAAqC,OAAAe,EAAAjB,MAAAC,KAAAC,UAAA,mBDFrC,SAA8B8B,EAAAC,EAAAC,GAAA,OAAAlE,EAAAgC,MAAAC,KAAAC,UAAA,sBCc9B,WAAiC,OAAA4B,EAAA9B,MAAAC,KAAAC,UAAA"}
1
+ {"version":3,"file":"evals.cjs.production.min.js","sources":["../src/constants.ts","../src/evaluation.ts","../src/attachListeners.ts"],"sourcesContent":["export const GLOBAL_RUN_ID_ENV_KEY = '_MASTRA_GLOBAL_RUN_ID_';\n","import { type Agent, type Metric, evaluate as coreEvaluate } from '@mastra/core';\n\nimport { GLOBAL_RUN_ID_ENV_KEY } from './constants';\n\nexport async function evaluate<T extends Agent>(agent: T, input: Parameters<T['generate']>[0], metric: Metric) {\n const testInfo = await getCurrentTestInfo();\n let globalRunId = process.env[GLOBAL_RUN_ID_ENV_KEY];\n const runId = crypto.randomUUID();\n const agentOutput = await agent.generate(input, {\n runId,\n });\n\n if (!globalRunId) {\n globalRunId = process.env[GLOBAL_RUN_ID_ENV_KEY] = crypto.randomUUID();\n console.warn('Global run id not set, you should run \"globalSetup\" from \"@mastra/evals\" before evaluating.');\n }\n\n const metricResult = await coreEvaluate({\n agentName: agent.name,\n input,\n metric,\n output: agentOutput.text,\n globalRunId,\n runId,\n testInfo,\n });\n\n return metricResult;\n}\n\nexport const getCurrentTestInfo = async () => {\n // Jest\n // @ts-ignore\n if (typeof expect !== 'undefined' && expect.getState) {\n // @ts-ignore\n const state = expect.getState();\n return {\n testName: state.currentTestName,\n testPath: state.testPath,\n };\n }\n\n try {\n const vitest = await import('vitest');\n if (typeof vitest !== 'undefined' && vitest.expect?.getState) {\n const state = vitest.expect.getState();\n return {\n testName: state.currentTestName,\n testPath: state.testPath,\n };\n }\n } catch {}\n\n return null;\n};\n","import { AvailableHooks, registerHook } from '@mastra/core';\nimport { mkdirSync, appendFile } from 'fs';\nimport { join } from 'path';\n\nimport { GLOBAL_RUN_ID_ENV_KEY } from './constants';\n\nexport async function attachListeners() {\n const dotMastraPath = join(process.cwd(), '.mastra');\n\n try {\n mkdirSync(dotMastraPath);\n } catch (error) {}\n\n registerHook(AvailableHooks.ON_EVALUATION, traceObject => {\n appendFile(join(dotMastraPath, 'evals.json'), JSON.stringify(traceObject) + '\\n', () => {});\n });\n}\n\nexport async function globalSetup() {\n if (process.env[GLOBAL_RUN_ID_ENV_KEY]) {\n throw new Error('Global run id already set, you should only run \"GlobalSetup\" once');\n }\n\n const globalRunId = crypto.randomUUID();\n process.env[GLOBAL_RUN_ID_ENV_KEY] = globalRunId;\n}\n"],"names":["GLOBAL_RUN_ID_ENV_KEY","_evaluate","_asyncToGenerator","_regeneratorRuntime","mark","_callee2","agent","input","metric","testInfo","globalRunId","runId","agentOutput","wrap","_context2","prev","next","getCurrentTestInfo","sent","process","env","crypto","randomUUID","generate","console","warn","coreEvaluate","agentName","name","output","text","abrupt","stop","apply","this","arguments","_ref","_callee","state","_vitest$expect","vitest","_state","_context","expect","getState","testName","currentTestName","testPath","import","t0","_attachListeners","dotMastraPath","join","cwd","mkdirSync","error","registerHook","AvailableHooks","ON_EVALUATION","traceObject","appendFile","JSON","stringify","_globalSetup","Error","_x","_x2","_x3"],"mappings":"oyNAAO,IAAMA,EAAwB,yBC4BpC,SAAAC,IAAA,OAAAA,EAAAC,EAAAC,IAAAC,MAxBM,SAAAC,EAAyCC,EAAUC,EAAqCC,GAAc,IAAAC,EAAAC,EAAAC,EAAAC,EAAA,OAAAT,IAAAU,MAAA,SAAAC,GAAA,cAAAA,EAAAC,KAAAD,EAAAE,MAAA,KAAA,EAAA,OAAAF,EAAAE,KAAA,EACpFC,IAAoB,KAAA,EAEV,OAF3BR,EAAQK,EAAAI,KACVR,EAAcS,QAAQC,IAAIpB,GACxBW,EAAQU,OAAOC,aAAYR,EAAAE,KAAA,EACPV,EAAMiB,SAAShB,EAAO,CAC9CI,MAAAA,IACA,KAAA,EAKD,OAPKC,EAAWE,EAAAI,KAIZR,IACHA,EAAcS,QAAQC,IAAIpB,GAAyBqB,OAAOC,aAC1DE,QAAQC,KAAK,gGACdX,EAAAE,KAAA,GAE0BU,WAAa,CACtCC,UAAWrB,EAAMsB,KACjBrB,MAAAA,EACAC,OAAAA,EACAqB,OAAQjB,EAAYkB,KACpBpB,YAAAA,EACAC,MAAAA,EACAF,SAAAA,IACA,KAAA,GARgB,OAAAK,EAAAiB,OAAA,SAAAjB,EAAAI,MAUC,KAAA,GAAA,IAAA,MAAA,OAAAJ,EAAAkB,OAAA,GAAA3B,EACpB,MAAA4B,MAAAC,KAAAC,UAAA,CAEM,IAAMlB,EAAkB,WAAA,IAAAmB,EAAAlC,EAAAC,IAAAC,MAAG,SAAAiC,IAAA,IAAAC,EAAAC,EAAAC,EAAAC,EAAA,OAAAtC,IAAAU,MAAA,SAAA6B,GAAA,cAAAA,EAAA3B,KAAA2B,EAAA1B,MAAA,KAAA,EAAA,GAGV,oBAAX2B,SAA0BA,OAAOC,SAAQ,CAAAF,EAAA1B,KAAA,EAAA,KAAA,CAEnB,OAAzBsB,EAAQK,OAAOC,WAAUF,EAAAX,OACxB,SAAA,CACLc,SAAUP,EAAMQ,gBAChBC,SAAUT,EAAMS,WACjB,KAAA,EAAA,OAAAL,EAAA3B,KAAA,EAAA2B,EAAA1B,KAAA,EAIoBgC,OAAO,UAAS,KAAA,EAAzB,QACU,KADhBR,EAAME,EAAAxB,OACyBqB,OAAJA,EAAIC,EAAOG,UAAPJ,EAAeK,SAAQ,CAAAF,EAAA1B,KAAA,GAAA,KAAA,CACpB,OAAhCsB,EAAQE,EAAOG,OAAOC,WAAUF,EAAAX,OAC/B,SAAA,CACLc,SAAUP,EAAMQ,gBAChBC,SAAUT,EAAMS,WACjB,KAAA,GAAAL,EAAA1B,KAAA,GAAA,MAAA,KAAA,GAAA0B,EAAA3B,KAAA,GAAA2B,EAAAO,GAAAP,EAAA,MAAA,GAAA,KAAA,GAAA,OAAAA,EAAAX,OAAA,SAIE,MAAI,KAAA,GAAA,IAAA,MAAA,OAAAW,EAAAV,OAAA,GAAAK,EAAA,KAAA,CAAA,CAAA,EAAA,KACZ,KAAA,OAAA,WAxB8B,OAAAD,EAAAH,MAAAC,KAAAC,UAAA,CAAA,CAAA,GCd9B,SAAAe,IAAA,OAAAA,EAAAhD,EAAAC,IAAAC,MAVM,SAAAiC,IAAA,IAAAc,EAAA,OAAAhD,IAAAU,MAAA,SAAA6B,GAAA,cAAAA,EAAA3B,KAAA2B,EAAA1B,MAAA,KAAA,EACCmC,EAAgBC,EAAAA,KAAKjC,QAAQkC,MAAO,WAE1C,IACEC,EAASA,UAACH,EACZ,CAAE,MAAOI,GAAQ,CAEjBC,EAAAA,aAAaC,EAAcA,eAACC,eAAe,SAAAC,GACzCC,EAAAA,WAAWR,EAAIA,KAACD,EAAe,cAAeU,KAAKC,UAAUH,GAAe,MAAM,WAAQ,GAC5F,IAAG,KAAA,EAAA,IAAA,MAAA,OAAAjB,EAAAV,OAAA,GAAAK,EACJ,MAAAJ,MAAAC,KAAAC,UAAA,CASA,SAAA4B,IAAA,OAAAA,EAAA7D,EAAAC,IAAAC,MAPM,SAAAC,IAAA,IAAAK,EAAA,OAAAP,IAAAU,MAAA,SAAAC,GAAA,cAAAA,EAAAC,KAAAD,EAAAE,MAAA,KAAA,EAAA,IACDG,QAAQC,IAAIpB,GAAsB,CAAAc,EAAAE,KAAA,EAAA,KAAA,CAAA,MAC9B,IAAIgD,MAAM,qEAAoE,KAAA,EAGhFtD,EAAcW,OAAOC,aAC3BH,QAAQC,IAAIpB,GAAyBU,EAAY,KAAA,EAAA,IAAA,MAAA,OAAAI,EAAAkB,OAAA,GAAA3B,EAClD,MAAA4B,MAAAC,KAAAC,UAAA,yBAnBD,WAAqC,OAAAe,EAAAjB,MAAAC,KAAAC,UAAA,mBDFrC,SAA8B8B,EAAAC,EAAAC,GAAA,OAAAlE,EAAAgC,MAAAC,KAAAC,UAAA,sBCc9B,WAAiC,OAAA4B,EAAA9B,MAAAC,KAAAC,UAAA"}
package/dist/evals.esm.js CHANGED
@@ -387,6 +387,7 @@ var getCurrentTestInfo = /*#__PURE__*/function () {
387
387
  _context.next = 3;
388
388
  break;
389
389
  }
390
+ // @ts-ignore
390
391
  state = expect.getState();
391
392
  return _context.abrupt("return", {
392
393
  testName: state.currentTestName,
@@ -1 +1 @@
1
- {"version":3,"file":"evals.esm.js","sources":["../src/constants.ts","../src/evaluation.ts","../src/attachListeners.ts"],"sourcesContent":["export const GLOBAL_RUN_ID_ENV_KEY = '_MASTRA_GLOBAL_RUN_ID_';\n","import { type Agent, type Metric, evaluate as coreEvaluate } from '@mastra/core';\n\nimport { GLOBAL_RUN_ID_ENV_KEY } from './constants';\n\nexport async function evaluate<T extends Agent>(agent: T, input: Parameters<T['generate']>[0], metric: Metric) {\n const testInfo = await getCurrentTestInfo();\n let globalRunId = process.env[GLOBAL_RUN_ID_ENV_KEY];\n const runId = crypto.randomUUID();\n const agentOutput = await agent.generate(input, {\n runId,\n });\n\n if (!globalRunId) {\n globalRunId = process.env[GLOBAL_RUN_ID_ENV_KEY] = crypto.randomUUID();\n console.warn('Global run id not set, you should run \"globalSetup\" from \"@mastra/evals\" before evaluating.');\n }\n\n const metricResult = await coreEvaluate({\n agentName: agent.name,\n input,\n metric,\n output: agentOutput.text,\n globalRunId,\n runId,\n testInfo,\n });\n\n return metricResult;\n}\n\nexport const getCurrentTestInfo = async () => {\n // Jest\n if (typeof expect !== 'undefined' && expect.getState) {\n const state = expect.getState();\n return {\n testName: state.currentTestName,\n testPath: state.testPath,\n };\n }\n\n try {\n const vitest = await import('vitest');\n if (typeof vitest !== 'undefined' && vitest.expect?.getState) {\n const state = vitest.expect.getState();\n return {\n testName: state.currentTestName,\n testPath: state.testPath,\n };\n }\n } catch {}\n\n return null;\n};\n","import { AvailableHooks, registerHook } from '@mastra/core';\nimport { mkdirSync, appendFile } from 'fs';\nimport { join } from 'path';\n\nimport { GLOBAL_RUN_ID_ENV_KEY } from './constants';\n\nexport async function attachListeners() {\n const dotMastraPath = join(process.cwd(), '.mastra');\n\n try {\n mkdirSync(dotMastraPath);\n } catch (error) {}\n\n registerHook(AvailableHooks.ON_EVALUATION, traceObject => {\n appendFile(join(dotMastraPath, 'evals.json'), JSON.stringify(traceObject) + '\\n', () => {});\n });\n}\n\nexport async function globalSetup() {\n if (process.env[GLOBAL_RUN_ID_ENV_KEY]) {\n throw new Error('Global run id already set, you should only run \"GlobalSetup\" once');\n }\n\n const globalRunId = crypto.randomUUID();\n process.env[GLOBAL_RUN_ID_ENV_KEY] = globalRunId;\n}\n"],"names":["GLOBAL_RUN_ID_ENV_KEY","evaluate","_x","_x2","_x3","_evaluate","apply","arguments","_asyncToGenerator","_regeneratorRuntime","mark","_callee2","agent","input","metric","testInfo","globalRunId","runId","agentOutput","metricResult","wrap","_callee2$","_context2","prev","next","getCurrentTestInfo","sent","process","env","crypto","randomUUID","generate","console","warn","coreEvaluate","agentName","name","output","text","abrupt","stop","_ref","_callee","state","_vitest$expect","vitest","_state","_callee$","_context","expect","getState","testName","currentTestName","testPath","t0","attachListeners","_attachListeners","dotMastraPath","join","cwd","mkdirSync","error","registerHook","AvailableHooks","ON_EVALUATION","traceObject","appendFile","JSON","stringify","globalSetup","_globalSetup","Error"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAO,IAAMA,qBAAqB,GAAG,wBAAwB;;ACI7D,SAAsBC,QAAQA,CAAAC,EAAA,EAAAC,GAAA,EAAAC,GAAA,EAAA;AAAA,EAAA,OAAAC,SAAA,CAAAC,KAAA,CAAA,IAAA,EAAAC,SAAA,CAAA,CAAA;AAAA,CAAA;AAwB7B,SAAAF,SAAA,GAAA;AAAAA,EAAAA,SAAA,GAAAG,iBAAA,cAAAC,mBAAA,EAAAC,CAAAA,IAAA,CAxBM,SAAAC,QAAyCC,CAAAA,KAAQ,EAAEC,KAAmC,EAAEC,MAAc,EAAA;IAAA,IAAAC,QAAA,EAAAC,WAAA,EAAAC,KAAA,EAAAC,WAAA,EAAAC,YAAA,CAAA;AAAA,IAAA,OAAAV,mBAAA,EAAA,CAAAW,IAAA,CAAA,SAAAC,UAAAC,SAAA,EAAA;AAAA,MAAA,OAAA,CAAA,EAAA,QAAAA,SAAA,CAAAC,IAAA,GAAAD,SAAA,CAAAE,IAAA;AAAA,QAAA,KAAA,CAAA;AAAAF,UAAAA,SAAA,CAAAE,IAAA,GAAA,CAAA,CAAA;UAAA,OACpFC,kBAAkB,EAAE,CAAA;AAAA,QAAA,KAAA,CAAA;UAArCV,QAAQ,GAAAO,SAAA,CAAAI,IAAA,CAAA;AACVV,UAAAA,WAAW,GAAGW,OAAO,CAACC,GAAG,CAAC5B,qBAAqB,CAAC,CAAA;AAC9CiB,UAAAA,KAAK,GAAGY,MAAM,CAACC,UAAU,EAAE,CAAA;AAAAR,UAAAA,SAAA,CAAAE,IAAA,GAAA,CAAA,CAAA;AAAA,UAAA,OACPZ,KAAK,CAACmB,QAAQ,CAAClB,KAAK,EAAE;AAC9CI,YAAAA,KAAK,EAALA,KAAAA;AACD,WAAA,CAAC,CAAA;AAAA,QAAA,KAAA,CAAA;UAFIC,WAAW,GAAAI,SAAA,CAAAI,IAAA,CAAA;UAIjB,IAAI,CAACV,WAAW,EAAE;AAChBA,YAAAA,WAAW,GAAGW,OAAO,CAACC,GAAG,CAAC5B,qBAAqB,CAAC,GAAG6B,MAAM,CAACC,UAAU,EAAE,CAAA;AACtEE,YAAAA,OAAO,CAACC,IAAI,CAAC,6FAA6F,CAAC,CAAA;AAC7G,WAAA;AAACX,UAAAA,SAAA,CAAAE,IAAA,GAAA,EAAA,CAAA;AAAA,UAAA,OAE0BU,UAAY,CAAC;YACtCC,SAAS,EAAEvB,KAAK,CAACwB,IAAI;AACrBvB,YAAAA,KAAK,EAALA,KAAK;AACLC,YAAAA,MAAM,EAANA,MAAM;YACNuB,MAAM,EAAEnB,WAAW,CAACoB,IAAI;AACxBtB,YAAAA,WAAW,EAAXA,WAAW;AACXC,YAAAA,KAAK,EAALA,KAAK;AACLF,YAAAA,QAAQ,EAARA,QAAAA;AACD,WAAA,CAAC,CAAA;AAAA,QAAA,KAAA,EAAA;UARII,YAAY,GAAAG,SAAA,CAAAI,IAAA,CAAA;AAAA,UAAA,OAAAJ,SAAA,CAAAiB,MAAA,CAAA,QAAA,EAUXpB,YAAY,CAAA,CAAA;AAAA,QAAA,KAAA,EAAA,CAAA;AAAA,QAAA,KAAA,KAAA;UAAA,OAAAG,SAAA,CAAAkB,IAAA,EAAA,CAAA;AAAA,OAAA;AAAA,KAAA,EAAA7B,QAAA,CAAA,CAAA;GACpB,CAAA,CAAA,CAAA;AAAA,EAAA,OAAAN,SAAA,CAAAC,KAAA,CAAA,IAAA,EAAAC,SAAA,CAAA,CAAA;AAAA,CAAA;AAEM,IAAMkB,kBAAkB,gBAAA,YAAA;EAAA,IAAAgB,IAAA,gBAAAjC,iBAAA,cAAAC,mBAAA,EAAAC,CAAAA,IAAA,CAAG,SAAAgC,OAAA,GAAA;AAAA,IAAA,IAAAC,KAAA,EAAAC,cAAA,EAAAC,MAAA,EAAAC,MAAA,CAAA;AAAA,IAAA,OAAArC,mBAAA,EAAA,CAAAW,IAAA,CAAA,SAAA2B,SAAAC,QAAA,EAAA;AAAA,MAAA,OAAA,CAAA,EAAA,QAAAA,QAAA,CAAAzB,IAAA,GAAAyB,QAAA,CAAAxB,IAAA;AAAA,QAAA,KAAA,CAAA;AAAA,UAAA,IAAA,EAE5B,OAAOyB,MAAM,KAAK,WAAW,IAAIA,MAAM,CAACC,QAAQ,CAAA,EAAA;AAAAF,YAAAA,QAAA,CAAAxB,IAAA,GAAA,CAAA,CAAA;AAAA,YAAA,MAAA;AAAA,WAAA;AAC5CmB,UAAAA,KAAK,GAAGM,MAAM,CAACC,QAAQ,EAAE,CAAA;UAAA,OAAAF,QAAA,CAAAT,MAAA,CACxB,QAAA,EAAA;YACLY,QAAQ,EAAER,KAAK,CAACS,eAAe;YAC/BC,QAAQ,EAAEV,KAAK,CAACU,QAAAA;WACjB,CAAA,CAAA;AAAA,QAAA,KAAA,CAAA;AAAAL,UAAAA,QAAA,CAAAzB,IAAA,GAAA,CAAA,CAAA;AAAAyB,UAAAA,QAAA,CAAAxB,IAAA,GAAA,CAAA,CAAA;UAAA,OAIoB,OAAO,QAAQ,CAAC,CAAA;AAAA,QAAA,KAAA,CAAA;UAA/BqB,MAAM,GAAAG,QAAA,CAAAtB,IAAA,CAAA;AAAA,UAAA,IAAA,EACR,OAAOmB,MAAM,KAAK,WAAW,IAAAD,CAAAA,cAAA,GAAIC,MAAM,CAACI,MAAM,KAAbL,IAAAA,IAAAA,cAAA,CAAeM,QAAQ,CAAA,EAAA;AAAAF,YAAAA,QAAA,CAAAxB,IAAA,GAAA,EAAA,CAAA;AAAA,YAAA,MAAA;AAAA,WAAA;AACpDmB,UAAAA,MAAK,GAAGE,MAAM,CAACI,MAAM,CAACC,QAAQ,EAAE,CAAA;UAAA,OAAAF,QAAA,CAAAT,MAAA,CAC/B,QAAA,EAAA;YACLY,QAAQ,EAAER,MAAK,CAACS,eAAe;YAC/BC,QAAQ,EAAEV,MAAK,CAACU,QAAAA;WACjB,CAAA,CAAA;AAAA,QAAA,KAAA,EAAA;AAAAL,UAAAA,QAAA,CAAAxB,IAAA,GAAA,EAAA,CAAA;AAAA,UAAA,MAAA;AAAA,QAAA,KAAA,EAAA;AAAAwB,UAAAA,QAAA,CAAAzB,IAAA,GAAA,EAAA,CAAA;UAAAyB,QAAA,CAAAM,EAAA,GAAAN,QAAA,CAAA,OAAA,CAAA,CAAA,CAAA,CAAA,CAAA;AAAA,QAAA,KAAA,EAAA;AAAA,UAAA,OAAAA,QAAA,CAAAT,MAAA,CAAA,QAAA,EAIE,IAAI,CAAA,CAAA;AAAA,QAAA,KAAA,EAAA,CAAA;AAAA,QAAA,KAAA,KAAA;UAAA,OAAAS,QAAA,CAAAR,IAAA,EAAA,CAAA;AAAA,OAAA;AAAA,KAAA,EAAAE,OAAA,EAAA,IAAA,EAAA,CAAA,CAAA,CAAA,EAAA,EAAA,CAAA,CAAA,CAAA,CAAA;GACZ,CAAA,CAAA,CAAA;AAAA,EAAA,OAAA,SAtBYjB,kBAAkBA,GAAA;AAAA,IAAA,OAAAgB,IAAA,CAAAnC,KAAA,CAAA,IAAA,EAAAC,SAAA,CAAA,CAAA;AAAA,GAAA,CAAA;AAAA,CAsB9B,EAAA;;AC9CD,SAAsBgD,eAAeA,GAAA;AAAA,EAAA,OAAAC,gBAAA,CAAAlD,KAAA,CAAA,IAAA,EAAAC,SAAA,CAAA,CAAA;AAAA,CAAA;AAUpC,SAAAiD,gBAAA,GAAA;EAAAA,gBAAA,GAAAhD,iBAAA,cAAAC,mBAAA,GAAAC,IAAA,CAVM,SAAAgC,OAAA,GAAA;AAAA,IAAA,IAAAe,aAAA,CAAA;AAAA,IAAA,OAAAhD,mBAAA,EAAA,CAAAW,IAAA,CAAA,SAAA2B,SAAAC,QAAA,EAAA;AAAA,MAAA,OAAA,CAAA,EAAA,QAAAA,QAAA,CAAAzB,IAAA,GAAAyB,QAAA,CAAAxB,IAAA;AAAA,QAAA,KAAA,CAAA;UACCiC,aAAa,GAAGC,IAAI,CAAC/B,OAAO,CAACgC,GAAG,EAAE,EAAE,SAAS,CAAC,CAAA;UAEpD,IAAI;YACFC,SAAS,CAACH,aAAa,CAAC,CAAA;AAC1B,WAAC,CAAC,OAAOI,KAAK,EAAE,EAAC;AAEjBC,UAAAA,YAAY,CAACC,cAAc,CAACC,aAAa,EAAE,UAAAC,WAAW,EAAG;YACvDC,UAAU,CAACR,IAAI,CAACD,aAAa,EAAE,YAAY,CAAC,EAAEU,IAAI,CAACC,SAAS,CAACH,WAAW,CAAC,GAAG,IAAI,EAAE,YAAK,EAAG,CAAC,CAAA;AAC7F,WAAC,CAAC,CAAA;AAAC,QAAA,KAAA,CAAA,CAAA;AAAA,QAAA,KAAA,KAAA;UAAA,OAAAjB,QAAA,CAAAR,IAAA,EAAA,CAAA;AAAA,OAAA;AAAA,KAAA,EAAAE,OAAA,CAAA,CAAA;GACJ,CAAA,CAAA,CAAA;AAAA,EAAA,OAAAc,gBAAA,CAAAlD,KAAA,CAAA,IAAA,EAAAC,SAAA,CAAA,CAAA;AAAA,CAAA;AAED,SAAsB8D,WAAWA,GAAA;AAAA,EAAA,OAAAC,YAAA,CAAAhE,KAAA,CAAA,IAAA,EAAAC,SAAA,CAAA,CAAA;AAAA,CAAA;AAOhC,SAAA+D,YAAA,GAAA;EAAAA,YAAA,GAAA9D,iBAAA,cAAAC,mBAAA,GAAAC,IAAA,CAPM,SAAAC,QAAA,GAAA;AAAA,IAAA,IAAAK,WAAA,CAAA;AAAA,IAAA,OAAAP,mBAAA,EAAA,CAAAW,IAAA,CAAA,SAAAC,UAAAC,SAAA,EAAA;AAAA,MAAA,OAAA,CAAA,EAAA,QAAAA,SAAA,CAAAC,IAAA,GAAAD,SAAA,CAAAE,IAAA;AAAA,QAAA,KAAA,CAAA;AAAA,UAAA,IAAA,CACDG,OAAO,CAACC,GAAG,CAAC5B,qBAAqB,CAAC,EAAA;AAAAsB,YAAAA,SAAA,CAAAE,IAAA,GAAA,CAAA,CAAA;AAAA,YAAA,MAAA;AAAA,WAAA;AAAA,UAAA,MAC9B,IAAI+C,KAAK,CAAC,mEAAmE,CAAC,CAAA;AAAA,QAAA,KAAA,CAAA;AAGhFvD,UAAAA,WAAW,GAAGa,MAAM,CAACC,UAAU,EAAE,CAAA;AACvCH,UAAAA,OAAO,CAACC,GAAG,CAAC5B,qBAAqB,CAAC,GAAGgB,WAAW,CAAA;AAAC,QAAA,KAAA,CAAA,CAAA;AAAA,QAAA,KAAA,KAAA;UAAA,OAAAM,SAAA,CAAAkB,IAAA,EAAA,CAAA;AAAA,OAAA;AAAA,KAAA,EAAA7B,QAAA,CAAA,CAAA;GAClD,CAAA,CAAA,CAAA;AAAA,EAAA,OAAA2D,YAAA,CAAAhE,KAAA,CAAA,IAAA,EAAAC,SAAA,CAAA,CAAA;AAAA;;;;"}
1
+ {"version":3,"file":"evals.esm.js","sources":["../src/constants.ts","../src/evaluation.ts","../src/attachListeners.ts"],"sourcesContent":["export const GLOBAL_RUN_ID_ENV_KEY = '_MASTRA_GLOBAL_RUN_ID_';\n","import { type Agent, type Metric, evaluate as coreEvaluate } from '@mastra/core';\n\nimport { GLOBAL_RUN_ID_ENV_KEY } from './constants';\n\nexport async function evaluate<T extends Agent>(agent: T, input: Parameters<T['generate']>[0], metric: Metric) {\n const testInfo = await getCurrentTestInfo();\n let globalRunId = process.env[GLOBAL_RUN_ID_ENV_KEY];\n const runId = crypto.randomUUID();\n const agentOutput = await agent.generate(input, {\n runId,\n });\n\n if (!globalRunId) {\n globalRunId = process.env[GLOBAL_RUN_ID_ENV_KEY] = crypto.randomUUID();\n console.warn('Global run id not set, you should run \"globalSetup\" from \"@mastra/evals\" before evaluating.');\n }\n\n const metricResult = await coreEvaluate({\n agentName: agent.name,\n input,\n metric,\n output: agentOutput.text,\n globalRunId,\n runId,\n testInfo,\n });\n\n return metricResult;\n}\n\nexport const getCurrentTestInfo = async () => {\n // Jest\n // @ts-ignore\n if (typeof expect !== 'undefined' && expect.getState) {\n // @ts-ignore\n const state = expect.getState();\n return {\n testName: state.currentTestName,\n testPath: state.testPath,\n };\n }\n\n try {\n const vitest = await import('vitest');\n if (typeof vitest !== 'undefined' && vitest.expect?.getState) {\n const state = vitest.expect.getState();\n return {\n testName: state.currentTestName,\n testPath: state.testPath,\n };\n }\n } catch {}\n\n return null;\n};\n","import { AvailableHooks, registerHook } from '@mastra/core';\nimport { mkdirSync, appendFile } from 'fs';\nimport { join } from 'path';\n\nimport { GLOBAL_RUN_ID_ENV_KEY } from './constants';\n\nexport async function attachListeners() {\n const dotMastraPath = join(process.cwd(), '.mastra');\n\n try {\n mkdirSync(dotMastraPath);\n } catch (error) {}\n\n registerHook(AvailableHooks.ON_EVALUATION, traceObject => {\n appendFile(join(dotMastraPath, 'evals.json'), JSON.stringify(traceObject) + '\\n', () => {});\n });\n}\n\nexport async function globalSetup() {\n if (process.env[GLOBAL_RUN_ID_ENV_KEY]) {\n throw new Error('Global run id already set, you should only run \"GlobalSetup\" once');\n }\n\n const globalRunId = crypto.randomUUID();\n process.env[GLOBAL_RUN_ID_ENV_KEY] = globalRunId;\n}\n"],"names":["GLOBAL_RUN_ID_ENV_KEY","evaluate","_x","_x2","_x3","_evaluate","apply","arguments","_asyncToGenerator","_regeneratorRuntime","mark","_callee2","agent","input","metric","testInfo","globalRunId","runId","agentOutput","metricResult","wrap","_callee2$","_context2","prev","next","getCurrentTestInfo","sent","process","env","crypto","randomUUID","generate","console","warn","coreEvaluate","agentName","name","output","text","abrupt","stop","_ref","_callee","state","_vitest$expect","vitest","_state","_callee$","_context","expect","getState","testName","currentTestName","testPath","t0","attachListeners","_attachListeners","dotMastraPath","join","cwd","mkdirSync","error","registerHook","AvailableHooks","ON_EVALUATION","traceObject","appendFile","JSON","stringify","globalSetup","_globalSetup","Error"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAO,IAAMA,qBAAqB,GAAG,wBAAwB;;ACI7D,SAAsBC,QAAQA,CAAAC,EAAA,EAAAC,GAAA,EAAAC,GAAA,EAAA;AAAA,EAAA,OAAAC,SAAA,CAAAC,KAAA,CAAA,IAAA,EAAAC,SAAA,CAAA,CAAA;AAAA,CAAA;AAwB7B,SAAAF,SAAA,GAAA;AAAAA,EAAAA,SAAA,GAAAG,iBAAA,cAAAC,mBAAA,EAAAC,CAAAA,IAAA,CAxBM,SAAAC,QAAyCC,CAAAA,KAAQ,EAAEC,KAAmC,EAAEC,MAAc,EAAA;IAAA,IAAAC,QAAA,EAAAC,WAAA,EAAAC,KAAA,EAAAC,WAAA,EAAAC,YAAA,CAAA;AAAA,IAAA,OAAAV,mBAAA,EAAA,CAAAW,IAAA,CAAA,SAAAC,UAAAC,SAAA,EAAA;AAAA,MAAA,OAAA,CAAA,EAAA,QAAAA,SAAA,CAAAC,IAAA,GAAAD,SAAA,CAAAE,IAAA;AAAA,QAAA,KAAA,CAAA;AAAAF,UAAAA,SAAA,CAAAE,IAAA,GAAA,CAAA,CAAA;UAAA,OACpFC,kBAAkB,EAAE,CAAA;AAAA,QAAA,KAAA,CAAA;UAArCV,QAAQ,GAAAO,SAAA,CAAAI,IAAA,CAAA;AACVV,UAAAA,WAAW,GAAGW,OAAO,CAACC,GAAG,CAAC5B,qBAAqB,CAAC,CAAA;AAC9CiB,UAAAA,KAAK,GAAGY,MAAM,CAACC,UAAU,EAAE,CAAA;AAAAR,UAAAA,SAAA,CAAAE,IAAA,GAAA,CAAA,CAAA;AAAA,UAAA,OACPZ,KAAK,CAACmB,QAAQ,CAAClB,KAAK,EAAE;AAC9CI,YAAAA,KAAK,EAALA,KAAAA;AACD,WAAA,CAAC,CAAA;AAAA,QAAA,KAAA,CAAA;UAFIC,WAAW,GAAAI,SAAA,CAAAI,IAAA,CAAA;UAIjB,IAAI,CAACV,WAAW,EAAE;AAChBA,YAAAA,WAAW,GAAGW,OAAO,CAACC,GAAG,CAAC5B,qBAAqB,CAAC,GAAG6B,MAAM,CAACC,UAAU,EAAE,CAAA;AACtEE,YAAAA,OAAO,CAACC,IAAI,CAAC,6FAA6F,CAAC,CAAA;AAC7G,WAAA;AAACX,UAAAA,SAAA,CAAAE,IAAA,GAAA,EAAA,CAAA;AAAA,UAAA,OAE0BU,UAAY,CAAC;YACtCC,SAAS,EAAEvB,KAAK,CAACwB,IAAI;AACrBvB,YAAAA,KAAK,EAALA,KAAK;AACLC,YAAAA,MAAM,EAANA,MAAM;YACNuB,MAAM,EAAEnB,WAAW,CAACoB,IAAI;AACxBtB,YAAAA,WAAW,EAAXA,WAAW;AACXC,YAAAA,KAAK,EAALA,KAAK;AACLF,YAAAA,QAAQ,EAARA,QAAAA;AACD,WAAA,CAAC,CAAA;AAAA,QAAA,KAAA,EAAA;UARII,YAAY,GAAAG,SAAA,CAAAI,IAAA,CAAA;AAAA,UAAA,OAAAJ,SAAA,CAAAiB,MAAA,CAAA,QAAA,EAUXpB,YAAY,CAAA,CAAA;AAAA,QAAA,KAAA,EAAA,CAAA;AAAA,QAAA,KAAA,KAAA;UAAA,OAAAG,SAAA,CAAAkB,IAAA,EAAA,CAAA;AAAA,OAAA;AAAA,KAAA,EAAA7B,QAAA,CAAA,CAAA;GACpB,CAAA,CAAA,CAAA;AAAA,EAAA,OAAAN,SAAA,CAAAC,KAAA,CAAA,IAAA,EAAAC,SAAA,CAAA,CAAA;AAAA,CAAA;AAEM,IAAMkB,kBAAkB,gBAAA,YAAA;EAAA,IAAAgB,IAAA,gBAAAjC,iBAAA,cAAAC,mBAAA,EAAAC,CAAAA,IAAA,CAAG,SAAAgC,OAAA,GAAA;AAAA,IAAA,IAAAC,KAAA,EAAAC,cAAA,EAAAC,MAAA,EAAAC,MAAA,CAAA;AAAA,IAAA,OAAArC,mBAAA,EAAA,CAAAW,IAAA,CAAA,SAAA2B,SAAAC,QAAA,EAAA;AAAA,MAAA,OAAA,CAAA,EAAA,QAAAA,QAAA,CAAAzB,IAAA,GAAAyB,QAAA,CAAAxB,IAAA;AAAA,QAAA,KAAA,CAAA;AAAA,UAAA,IAAA,EAG5B,OAAOyB,MAAM,KAAK,WAAW,IAAIA,MAAM,CAACC,QAAQ,CAAA,EAAA;AAAAF,YAAAA,QAAA,CAAAxB,IAAA,GAAA,CAAA,CAAA;AAAA,YAAA,MAAA;AAAA,WAAA;AAClD;AACMmB,UAAAA,KAAK,GAAGM,MAAM,CAACC,QAAQ,EAAE,CAAA;UAAA,OAAAF,QAAA,CAAAT,MAAA,CACxB,QAAA,EAAA;YACLY,QAAQ,EAAER,KAAK,CAACS,eAAe;YAC/BC,QAAQ,EAAEV,KAAK,CAACU,QAAAA;WACjB,CAAA,CAAA;AAAA,QAAA,KAAA,CAAA;AAAAL,UAAAA,QAAA,CAAAzB,IAAA,GAAA,CAAA,CAAA;AAAAyB,UAAAA,QAAA,CAAAxB,IAAA,GAAA,CAAA,CAAA;UAAA,OAIoB,OAAO,QAAQ,CAAC,CAAA;AAAA,QAAA,KAAA,CAAA;UAA/BqB,MAAM,GAAAG,QAAA,CAAAtB,IAAA,CAAA;AAAA,UAAA,IAAA,EACR,OAAOmB,MAAM,KAAK,WAAW,IAAAD,CAAAA,cAAA,GAAIC,MAAM,CAACI,MAAM,KAAbL,IAAAA,IAAAA,cAAA,CAAeM,QAAQ,CAAA,EAAA;AAAAF,YAAAA,QAAA,CAAAxB,IAAA,GAAA,EAAA,CAAA;AAAA,YAAA,MAAA;AAAA,WAAA;AACpDmB,UAAAA,MAAK,GAAGE,MAAM,CAACI,MAAM,CAACC,QAAQ,EAAE,CAAA;UAAA,OAAAF,QAAA,CAAAT,MAAA,CAC/B,QAAA,EAAA;YACLY,QAAQ,EAAER,MAAK,CAACS,eAAe;YAC/BC,QAAQ,EAAEV,MAAK,CAACU,QAAAA;WACjB,CAAA,CAAA;AAAA,QAAA,KAAA,EAAA;AAAAL,UAAAA,QAAA,CAAAxB,IAAA,GAAA,EAAA,CAAA;AAAA,UAAA,MAAA;AAAA,QAAA,KAAA,EAAA;AAAAwB,UAAAA,QAAA,CAAAzB,IAAA,GAAA,EAAA,CAAA;UAAAyB,QAAA,CAAAM,EAAA,GAAAN,QAAA,CAAA,OAAA,CAAA,CAAA,CAAA,CAAA,CAAA;AAAA,QAAA,KAAA,EAAA;AAAA,UAAA,OAAAA,QAAA,CAAAT,MAAA,CAAA,QAAA,EAIE,IAAI,CAAA,CAAA;AAAA,QAAA,KAAA,EAAA,CAAA;AAAA,QAAA,KAAA,KAAA;UAAA,OAAAS,QAAA,CAAAR,IAAA,EAAA,CAAA;AAAA,OAAA;AAAA,KAAA,EAAAE,OAAA,EAAA,IAAA,EAAA,CAAA,CAAA,CAAA,EAAA,EAAA,CAAA,CAAA,CAAA,CAAA;GACZ,CAAA,CAAA,CAAA;AAAA,EAAA,OAAA,SAxBYjB,kBAAkBA,GAAA;AAAA,IAAA,OAAAgB,IAAA,CAAAnC,KAAA,CAAA,IAAA,EAAAC,SAAA,CAAA,CAAA;AAAA,GAAA,CAAA;AAAA,CAwB9B,EAAA;;AChDD,SAAsBgD,eAAeA,GAAA;AAAA,EAAA,OAAAC,gBAAA,CAAAlD,KAAA,CAAA,IAAA,EAAAC,SAAA,CAAA,CAAA;AAAA,CAAA;AAUpC,SAAAiD,gBAAA,GAAA;EAAAA,gBAAA,GAAAhD,iBAAA,cAAAC,mBAAA,GAAAC,IAAA,CAVM,SAAAgC,OAAA,GAAA;AAAA,IAAA,IAAAe,aAAA,CAAA;AAAA,IAAA,OAAAhD,mBAAA,EAAA,CAAAW,IAAA,CAAA,SAAA2B,SAAAC,QAAA,EAAA;AAAA,MAAA,OAAA,CAAA,EAAA,QAAAA,QAAA,CAAAzB,IAAA,GAAAyB,QAAA,CAAAxB,IAAA;AAAA,QAAA,KAAA,CAAA;UACCiC,aAAa,GAAGC,IAAI,CAAC/B,OAAO,CAACgC,GAAG,EAAE,EAAE,SAAS,CAAC,CAAA;UAEpD,IAAI;YACFC,SAAS,CAACH,aAAa,CAAC,CAAA;AAC1B,WAAC,CAAC,OAAOI,KAAK,EAAE,EAAC;AAEjBC,UAAAA,YAAY,CAACC,cAAc,CAACC,aAAa,EAAE,UAAAC,WAAW,EAAG;YACvDC,UAAU,CAACR,IAAI,CAACD,aAAa,EAAE,YAAY,CAAC,EAAEU,IAAI,CAACC,SAAS,CAACH,WAAW,CAAC,GAAG,IAAI,EAAE,YAAK,EAAG,CAAC,CAAA;AAC7F,WAAC,CAAC,CAAA;AAAC,QAAA,KAAA,CAAA,CAAA;AAAA,QAAA,KAAA,KAAA;UAAA,OAAAjB,QAAA,CAAAR,IAAA,EAAA,CAAA;AAAA,OAAA;AAAA,KAAA,EAAAE,OAAA,CAAA,CAAA;GACJ,CAAA,CAAA,CAAA;AAAA,EAAA,OAAAc,gBAAA,CAAAlD,KAAA,CAAA,IAAA,EAAAC,SAAA,CAAA,CAAA;AAAA,CAAA;AAED,SAAsB8D,WAAWA,GAAA;AAAA,EAAA,OAAAC,YAAA,CAAAhE,KAAA,CAAA,IAAA,EAAAC,SAAA,CAAA,CAAA;AAAA,CAAA;AAOhC,SAAA+D,YAAA,GAAA;EAAAA,YAAA,GAAA9D,iBAAA,cAAAC,mBAAA,GAAAC,IAAA,CAPM,SAAAC,QAAA,GAAA;AAAA,IAAA,IAAAK,WAAA,CAAA;AAAA,IAAA,OAAAP,mBAAA,EAAA,CAAAW,IAAA,CAAA,SAAAC,UAAAC,SAAA,EAAA;AAAA,MAAA,OAAA,CAAA,EAAA,QAAAA,SAAA,CAAAC,IAAA,GAAAD,SAAA,CAAAE,IAAA;AAAA,QAAA,KAAA,CAAA;AAAA,UAAA,IAAA,CACDG,OAAO,CAACC,GAAG,CAAC5B,qBAAqB,CAAC,EAAA;AAAAsB,YAAAA,SAAA,CAAAE,IAAA,GAAA,CAAA,CAAA;AAAA,YAAA,MAAA;AAAA,WAAA;AAAA,UAAA,MAC9B,IAAI+C,KAAK,CAAC,mEAAmE,CAAC,CAAA;AAAA,QAAA,KAAA,CAAA;AAGhFvD,UAAAA,WAAW,GAAGa,MAAM,CAACC,UAAU,EAAE,CAAA;AACvCH,UAAAA,OAAO,CAACC,GAAG,CAAC5B,qBAAqB,CAAC,GAAGgB,WAAW,CAAA;AAAC,QAAA,KAAA,CAAA,CAAA;AAAA,QAAA,KAAA,KAAA;UAAA,OAAAM,SAAA,CAAAkB,IAAA,EAAA,CAAA;AAAA,OAAA;AAAA,KAAA,EAAA7B,QAAA,CAAA,CAAA;GAClD,CAAA,CAAA,CAAA;AAAA,EAAA,OAAA2D,YAAA,CAAAhE,KAAA,CAAA,IAAA,EAAAC,SAAA,CAAA,CAAA;AAAA;;;;"}
@@ -1,7 +1,7 @@
1
1
  import { type Agent, type Metric } from '@mastra/core';
2
2
  export declare function evaluate<T extends Agent>(agent: T, input: Parameters<T['generate']>[0], metric: Metric): Promise<import("@mastra/core").MetricResult>;
3
3
  export declare const getCurrentTestInfo: () => Promise<{
4
- testName: string | undefined;
5
- testPath: string | undefined;
4
+ testName: any;
5
+ testPath: any;
6
6
  } | null>;
7
7
  //# sourceMappingURL=evaluation.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"evaluation.d.ts","sourceRoot":"","sources":["../src/evaluation.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,KAAK,EAAE,KAAK,MAAM,EAA4B,MAAM,cAAc,CAAC;AAIjF,wBAAsB,QAAQ,CAAC,CAAC,SAAS,KAAK,EAAE,KAAK,EAAE,CAAC,EAAE,KAAK,EAAE,UAAU,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,MAAM,gDAwB5G;AAED,eAAO,MAAM,kBAAkB;;;SAsB9B,CAAC"}
1
+ {"version":3,"file":"evaluation.d.ts","sourceRoot":"","sources":["../src/evaluation.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,KAAK,EAAE,KAAK,MAAM,EAA4B,MAAM,cAAc,CAAC;AAIjF,wBAAsB,QAAQ,CAAC,CAAC,SAAS,KAAK,EAAE,KAAK,EAAE,CAAC,EAAE,KAAK,EAAE,UAAU,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,MAAM,gDAwB5G;AAED,eAAO,MAAM,kBAAkB;;;SAwB9B,CAAC"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mastra/evals",
3
- "version": "0.1.0-alpha.16",
3
+ "version": "0.1.0-alpha.19",
4
4
  "description": "",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -42,27 +42,24 @@
42
42
  "sentiment": "^5.0.2",
43
43
  "string-similarity": "^4.0.4",
44
44
  "zod": "^3.24.1",
45
- "@mastra/core": "0.1.27-alpha.75"
45
+ "@mastra/core": "0.1.27-alpha.78"
46
46
  },
47
47
  "devDependencies": {
48
48
  "@babel/preset-env": "^7.26.0",
49
49
  "@babel/preset-typescript": "^7.26.0",
50
- "@jest/globals": "^29.7.0",
51
50
  "@tsconfig/recommended": "^1.0.7",
52
51
  "@types/difflib": "^0.2.7",
53
52
  "@types/fs-extra": "^11.0.4",
54
- "@types/jest": "^29.5.12",
55
53
  "@types/sentiment": "^5.0.4",
56
54
  "@types/string-similarity": "^4.0.2",
57
55
  "dts-cli": "^2.0.5",
58
- "jest": "^29.7.0",
59
- "ts-jest": "^29.2.5"
56
+ "vitest": "^3.0.4"
60
57
  },
61
58
  "scripts": {
62
59
  "build": "dts build && pnpm build:llm && pnpm build:nlp",
63
60
  "build:llm": "dts build --name llm --entry ./src/metrics/llm/index.ts --target node --format esm --noClean",
64
61
  "build:nlp": "dts build --name nlp --entry ./src/metrics/nlp/index.ts --target node --format esm --noClean",
65
62
  "build:dev": "dts watch",
66
- "test": "node --experimental-vm-modules node_modules/jest/bin/jest.js"
63
+ "test": "vitest"
67
64
  }
68
65
  }
@@ -1,7 +1,7 @@
1
- import { describe, expect, it } from '@jest/globals';
2
1
  import { Agent } from '@mastra/core';
3
2
  import { ModelConfig } from '@mastra/core';
4
3
  import { Metric } from '@mastra/core';
4
+ import { describe, expect, it } from 'vitest';
5
5
 
6
6
  import { evaluate } from './evaluation';
7
7
 
package/src/evaluation.ts CHANGED
@@ -30,7 +30,9 @@ export async function evaluate<T extends Agent>(agent: T, input: Parameters<T['g
30
30
 
31
31
  export const getCurrentTestInfo = async () => {
32
32
  // Jest
33
+ // @ts-ignore
33
34
  if (typeof expect !== 'undefined' && expect.getState) {
35
+ // @ts-ignore
34
36
  const state = expect.getState();
35
37
  return {
36
38
  testName: state.currentTestName,
@@ -1,5 +1,5 @@
1
- import { describe, it, expect, jest } from '@jest/globals';
2
1
  import { type ModelConfig } from '@mastra/core';
2
+ import { describe, it, expect } from 'vitest';
3
3
 
4
4
  import { TestCase } from '../utils';
5
5
 
@@ -91,7 +91,6 @@ const testCases: TestCase[] = [
91
91
  ];
92
92
 
93
93
  const SECONDS = 10000;
94
- jest.setTimeout(15 * SECONDS);
95
94
 
96
95
  const modelConfig: ModelConfig = {
97
96
  provider: 'OPEN_AI',
@@ -100,54 +99,60 @@ const modelConfig: ModelConfig = {
100
99
  apiKey: process.env.OPENAI_API_KEY,
101
100
  };
102
101
 
103
- describe('AnswerRelevancyMetric', () => {
104
- const metric = new AnswerRelevancyMetric(modelConfig);
102
+ describe(
103
+ 'AnswerRelevancyMetric',
104
+ () => {
105
+ const metric = new AnswerRelevancyMetric(modelConfig);
105
106
 
106
- it('should be able to measure a prompt with perfect relevancy', async () => {
107
- const result = await metric.measure(testCases[0].input, testCases[0].output);
108
- expect(result.score).toBeCloseTo(testCases[0].expectedResult.score, 1);
109
- });
107
+ it('should be able to measure a prompt with perfect relevancy', async () => {
108
+ const result = await metric.measure(testCases[0].input, testCases[0].output);
109
+ expect(result.score).toBeCloseTo(testCases[0].expectedResult.score, 1);
110
+ });
110
111
 
111
112
  it('should be able to measure a prompt with mostly relevant information', async () => {
112
113
  const result = await metric.measure(testCases[1].input, testCases[1].output);
113
114
  const expectedScore = testCases[1].expectedResult.score;
114
115
  const difference = Math.abs(result.score - expectedScore);
115
116
 
116
- expect(difference).toBeLessThanOrEqual(0.1);
117
- });
118
-
119
- it('should be able to measure a prompt with partial relevance', async () => {
120
- const result = await metric.measure(testCases[2].input, testCases[2].output);
121
- expect(result.score).toBeCloseTo(testCases[2].expectedResult.score, 1);
122
- });
123
-
124
- it('should be able to measure a prompt with low relevance', async () => {
125
- const result = await metric.measure(testCases[3].input, testCases[3].output);
126
- expect(result.score).toBeCloseTo(testCases[3].expectedResult.score, 1);
127
- });
128
-
129
- it('should be able to measure a prompt with empty output', async () => {
130
- const result = await metric.measure(testCases[5].input, testCases[5].output);
131
- expect(result.score).toBeCloseTo(testCases[5].expectedResult.score, 1);
132
- });
133
-
134
- it('should be able to measure a prompt with incorrect but relevant answer', async () => {
135
- const result = await metric.measure(testCases[6].input, testCases[6].output);
136
- expect(result.score).toBeCloseTo(testCases[6].expectedResult.score, 1);
117
+ expect(Math.round(difference * 10) / 10).toBeLessThanOrEqual(0.1);
137
118
  });
138
119
 
139
- it('should be able to measure a prompt with a single word correct answer', async () => {
140
- const result = await metric.measure(testCases[7].input, testCases[7].output);
141
- expect(result.score).toBeCloseTo(testCases[7].expectedResult.score, 1);
142
- });
143
-
144
- it('should be able to measure a prompt with multiple questions', async () => {
145
- const result = await metric.measure(testCases[8].input, testCases[8].output);
146
- expect(result.score).toBeCloseTo(testCases[8].expectedResult.score, 1);
147
- });
148
-
149
- it('should be able to measure a prompt with technical gibberish', async () => {
150
- const result = await metric.measure(testCases[9].input, testCases[9].output);
151
- expect(result.score).toBeCloseTo(testCases[9].expectedResult.score, 1);
152
- });
153
- });
120
+ it('should be able to measure a prompt with partial relevance', async () => {
121
+ const result = await metric.measure(testCases[2].input, testCases[2].output);
122
+ expect(result.score).toBeCloseTo(testCases[2].expectedResult.score, 1);
123
+ });
124
+
125
+ it('should be able to measure a prompt with low relevance', async () => {
126
+ const result = await metric.measure(testCases[3].input, testCases[3].output);
127
+ expect(result.score).toBeCloseTo(testCases[3].expectedResult.score, 1);
128
+ });
129
+
130
+ it('should be able to measure a prompt with empty output', async () => {
131
+ const result = await metric.measure(testCases[5].input, testCases[5].output);
132
+ expect(result.score).toBeCloseTo(testCases[5].expectedResult.score, 1);
133
+ });
134
+
135
+ it('should be able to measure a prompt with incorrect but relevant answer', async () => {
136
+ const result = await metric.measure(testCases[6].input, testCases[6].output);
137
+ expect(result.score).toBeCloseTo(testCases[6].expectedResult.score, 1);
138
+ });
139
+
140
+ it('should be able to measure a prompt with a single word correct answer', async () => {
141
+ const result = await metric.measure(testCases[7].input, testCases[7].output);
142
+ expect(result.score).toBeCloseTo(testCases[7].expectedResult.score, 1);
143
+ });
144
+
145
+ it('should be able to measure a prompt with multiple questions', async () => {
146
+ const result = await metric.measure(testCases[8].input, testCases[8].output);
147
+ expect(result.score).toBeCloseTo(testCases[8].expectedResult.score, 1);
148
+ });
149
+
150
+ it('should be able to measure a prompt with technical gibberish', async () => {
151
+ const result = await metric.measure(testCases[9].input, testCases[9].output);
152
+ expect(result.score).toBeCloseTo(testCases[9].expectedResult.score, 1);
153
+ });
154
+ },
155
+ {
156
+ timeout: 15 * SECONDS,
157
+ },
158
+ );
@@ -1,5 +1,5 @@
1
- import { describe, it, expect, jest } from '@jest/globals';
2
1
  import { type ModelConfig } from '@mastra/core';
2
+ import { describe, it, expect } from 'vitest';
3
3
 
4
4
  import { isCloserTo } from '../utils';
5
5
  import { TestCase } from '../utils';
@@ -42,7 +42,6 @@ const testCases: TestCase[] = [
42
42
  ];
43
43
 
44
44
  const SECONDS = 10000;
45
- jest.setTimeout(15 * SECONDS);
46
45
 
47
46
  const modelConfig: ModelConfig = {
48
47
  provider: 'OPEN_AI',
@@ -51,18 +50,20 @@ const modelConfig: ModelConfig = {
51
50
  apiKey: process.env.OPENAI_API_KEY,
52
51
  };
53
52
 
54
- describe('BiasMetric', () => {
55
- const metric = new BiasMetric(modelConfig);
53
+ describe(
54
+ 'BiasMetric',
55
+ () => {
56
+ const metric = new BiasMetric(modelConfig);
56
57
 
57
- it('should be able to measure a prompt that is biased', async () => {
58
- const result = await metric.measure(testCases[0].input, testCases[0].output);
59
- expect(result.score).toBeCloseTo(testCases[0].expectedResult.score, 1);
60
- });
58
+ it('should be able to measure a prompt that is biased', async () => {
59
+ const result = await metric.measure(testCases[0].input, testCases[0].output);
60
+ expect(result.score).toBeCloseTo(testCases[0].expectedResult.score, 1);
61
+ });
61
62
 
62
- it('should be able to measure a prompt that is almost not biased', async () => {
63
- const result = await metric.measure(testCases[1].input, testCases[1].output);
64
- expect(result.score).toBeLessThan(0.5);
65
- });
63
+ it('should be able to measure a prompt that is almost not biased', async () => {
64
+ const result = await metric.measure(testCases[1].input, testCases[1].output);
65
+ expect(result.score).toBeLessThan(0.5);
66
+ });
66
67
 
67
68
  it('should be able to measure a prompt that is mildly biased but actually not', async () => {
68
69
  const result = await metric.measure(testCases[2].input, testCases[2].output);