@mastra/evals 0.1.0-alpha.16 → 0.1.0-alpha.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +23 -0
- package/README.md +186 -0
- package/dist/evals.cjs.development.js +1 -0
- package/dist/evals.cjs.development.js.map +1 -1
- package/dist/evals.cjs.production.min.js.map +1 -1
- package/dist/evals.esm.js +1 -0
- package/dist/evals.esm.js.map +1 -1
- package/dist/evaluation.d.ts +2 -2
- package/dist/evaluation.d.ts.map +1 -1
- package/package.json +4 -7
- package/src/evaluation.test.ts +1 -1
- package/src/evaluation.ts +2 -0
- package/src/metrics/llm/answer-relevancy/index.test.ts +49 -44
- package/src/metrics/llm/bias/index.test.ts +13 -12
- package/src/metrics/llm/context-position/index.test.ts +92 -87
- package/src/metrics/llm/context-precision/index.test.ts +69 -64
- package/src/metrics/llm/context-relevancy/index.test.ts +27 -22
- package/src/metrics/llm/contextual-recall/index.test.ts +28 -23
- package/src/metrics/llm/faithfulness/index.test.ts +81 -76
- package/src/metrics/llm/hallucination/index.test.ts +85 -80
- package/src/metrics/llm/prompt-alignment/index.test.ts +53 -48
- package/src/metrics/llm/summarization/index.test.ts +85 -80
- package/src/metrics/llm/toxicity/index.test.ts +22 -17
- package/src/metrics/nlp/completeness/index.test.ts +1 -1
- package/src/metrics/nlp/content-similarity/index.test.ts +1 -1
- package/src/metrics/nlp/keyword-coverage/index.test.ts +1 -1
- package/src/metrics/nlp/textual-difference/index.test.ts +1 -1
- package/src/metrics/nlp/tone/index.test.ts +1 -1
- package/vitest.config.ts +9 -0
- package/jest.config.ts +0 -21
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,28 @@
|
|
|
1
1
|
# @mastra/evals
|
|
2
2
|
|
|
3
|
+
## 0.1.0-alpha.19
|
|
4
|
+
|
|
5
|
+
### Patch Changes
|
|
6
|
+
|
|
7
|
+
- Updated dependencies [685108a]
|
|
8
|
+
- Updated dependencies [685108a]
|
|
9
|
+
- @mastra/core@0.1.27-alpha.78
|
|
10
|
+
|
|
11
|
+
## 0.1.0-alpha.18
|
|
12
|
+
|
|
13
|
+
### Patch Changes
|
|
14
|
+
|
|
15
|
+
- Updated dependencies [8105fae]
|
|
16
|
+
- @mastra/core@0.1.27-alpha.77
|
|
17
|
+
|
|
18
|
+
## 0.1.0-alpha.17
|
|
19
|
+
|
|
20
|
+
### Patch Changes
|
|
21
|
+
|
|
22
|
+
- Updated dependencies [ae7bf94]
|
|
23
|
+
- Updated dependencies [ae7bf94]
|
|
24
|
+
- @mastra/core@0.1.27-alpha.76
|
|
25
|
+
|
|
3
26
|
## 0.1.0-alpha.16
|
|
4
27
|
|
|
5
28
|
### Patch Changes
|
package/README.md
ADDED
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
# @mastra/evals
|
|
2
|
+
|
|
3
|
+
A comprehensive evaluation framework for assessing AI model outputs across multiple dimensions.
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
npm install @mastra/evals
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## Overview
|
|
12
|
+
|
|
13
|
+
`@mastra/evals` provides a suite of evaluation metrics for assessing AI model outputs. The package includes both LLM-based and NLP-based metrics, enabling both automated and model-assisted evaluation of AI responses.
|
|
14
|
+
|
|
15
|
+
## Features
|
|
16
|
+
|
|
17
|
+
### LLM-Based Metrics
|
|
18
|
+
|
|
19
|
+
1. **Answer Relevancy**
|
|
20
|
+
|
|
21
|
+
- Evaluates how well an answer addresses the input question
|
|
22
|
+
- Considers uncertainty weighting for more nuanced scoring
|
|
23
|
+
- Returns detailed reasoning for scores
|
|
24
|
+
|
|
25
|
+
2. **Bias Detection**
|
|
26
|
+
|
|
27
|
+
- Identifies potential biases in model outputs
|
|
28
|
+
- Analyzes opinions and statements for bias indicators
|
|
29
|
+
- Provides explanations for detected biases
|
|
30
|
+
- Configurable scoring scale
|
|
31
|
+
|
|
32
|
+
3. **Context Precision & Relevancy**
|
|
33
|
+
|
|
34
|
+
- Assesses how well responses use provided context
|
|
35
|
+
- Evaluates accuracy of context usage
|
|
36
|
+
- Measures relevance of context to the response
|
|
37
|
+
- Analyzes context positioning in responses
|
|
38
|
+
|
|
39
|
+
4. **Faithfulness**
|
|
40
|
+
|
|
41
|
+
- Verifies that responses are faithful to provided context
|
|
42
|
+
- Detects hallucinations or fabricated information
|
|
43
|
+
- Evaluates claims against provided context
|
|
44
|
+
- Provides detailed analysis of faithfulness breaches
|
|
45
|
+
|
|
46
|
+
5. **Prompt Alignment**
|
|
47
|
+
|
|
48
|
+
- Measures how well responses follow given instructions
|
|
49
|
+
- Evaluates adherence to multiple instruction criteria
|
|
50
|
+
- Provides per-instruction scoring
|
|
51
|
+
- Supports custom instruction sets
|
|
52
|
+
|
|
53
|
+
6. **Toxicity**
|
|
54
|
+
- Detects toxic or harmful content in responses
|
|
55
|
+
- Provides detailed reasoning for toxicity verdicts
|
|
56
|
+
- Configurable scoring thresholds
|
|
57
|
+
- Considers both input and output context
|
|
58
|
+
|
|
59
|
+
### NLP-Based Metrics
|
|
60
|
+
|
|
61
|
+
1. **Completeness**
|
|
62
|
+
|
|
63
|
+
- Analyzes structural completeness of responses
|
|
64
|
+
- Identifies missing elements from input requirements
|
|
65
|
+
- Provides detailed element coverage analysis
|
|
66
|
+
- Tracks input-output element ratios
|
|
67
|
+
|
|
68
|
+
2. **Content Similarity**
|
|
69
|
+
|
|
70
|
+
- Measures text similarity between inputs and outputs
|
|
71
|
+
- Configurable for case and whitespace sensitivity
|
|
72
|
+
- Returns normalized similarity scores
|
|
73
|
+
- Uses string comparison algorithms for accuracy
|
|
74
|
+
|
|
75
|
+
3. **Keyword Coverage**
|
|
76
|
+
- Tracks presence of key terms from input in output
|
|
77
|
+
- Provides detailed keyword matching statistics
|
|
78
|
+
- Calculates coverage ratios
|
|
79
|
+
- Useful for ensuring comprehensive responses
|
|
80
|
+
|
|
81
|
+
## Usage
|
|
82
|
+
|
|
83
|
+
### Basic Example
|
|
84
|
+
|
|
85
|
+
```typescript
|
|
86
|
+
import { ContentSimilarityMetric, ToxicityMetric } from '@mastra/evals';
|
|
87
|
+
|
|
88
|
+
// Initialize metrics
|
|
89
|
+
const similarityMetric = new ContentSimilarityMetric({
|
|
90
|
+
ignoreCase: true,
|
|
91
|
+
ignoreWhitespace: true,
|
|
92
|
+
});
|
|
93
|
+
|
|
94
|
+
const toxicityMetric = new ToxicityMetric({
|
|
95
|
+
model: {
|
|
96
|
+
provider: 'openai',
|
|
97
|
+
model: 'gpt-4',
|
|
98
|
+
},
|
|
99
|
+
scale: 1, // Optional: adjust scoring scale
|
|
100
|
+
});
|
|
101
|
+
|
|
102
|
+
// Evaluate outputs
|
|
103
|
+
const input = 'What is the capital of France?';
|
|
104
|
+
const output = 'Paris is the capital of France.';
|
|
105
|
+
|
|
106
|
+
const similarityResult = await similarityMetric.measure(input, output);
|
|
107
|
+
const toxicityResult = await toxicityMetric.measure(input, output);
|
|
108
|
+
|
|
109
|
+
console.log('Similarity Score:', similarityResult.score);
|
|
110
|
+
console.log('Toxicity Score:', toxicityResult.score);
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
### Context-Aware Evaluation
|
|
114
|
+
|
|
115
|
+
```typescript
|
|
116
|
+
import { FaithfulnessMetric } from '@mastra/evals';
|
|
117
|
+
|
|
118
|
+
// Initialize with context
|
|
119
|
+
const faithfulnessMetric = new FaithfulnessMetric({
|
|
120
|
+
model: {
|
|
121
|
+
provider: 'openai',
|
|
122
|
+
model: 'gpt-4',
|
|
123
|
+
},
|
|
124
|
+
context: ['Paris is the capital of France', 'Paris has a population of 2.2 million'],
|
|
125
|
+
scale: 1,
|
|
126
|
+
});
|
|
127
|
+
|
|
128
|
+
// Evaluate response against context
|
|
129
|
+
const result = await faithfulnessMetric.measure(
|
|
130
|
+
'Tell me about Paris',
|
|
131
|
+
'Paris is the capital of France with 2.2 million residents',
|
|
132
|
+
);
|
|
133
|
+
|
|
134
|
+
console.log('Faithfulness Score:', result.score);
|
|
135
|
+
console.log('Reasoning:', result.reason);
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
## Metric Results
|
|
139
|
+
|
|
140
|
+
Each metric returns a standardized result object containing:
|
|
141
|
+
|
|
142
|
+
- `score`: Normalized score (typically 0-1)
|
|
143
|
+
- `info`: Detailed information about the evaluation
|
|
144
|
+
- Additional metric-specific data (e.g., matched keywords, missing elements)
|
|
145
|
+
|
|
146
|
+
Some metrics also provide:
|
|
147
|
+
|
|
148
|
+
- `reason`: Detailed explanation of the score
|
|
149
|
+
- `verdicts`: Individual judgments that contributed to the final score
|
|
150
|
+
|
|
151
|
+
## Telemetry and Logging
|
|
152
|
+
|
|
153
|
+
The package includes built-in telemetry and logging capabilities:
|
|
154
|
+
|
|
155
|
+
- Automatic evaluation tracking in `.mastra/evals.json`
|
|
156
|
+
- Integration with OpenTelemetry for performance monitoring
|
|
157
|
+
- Detailed evaluation traces for debugging
|
|
158
|
+
|
|
159
|
+
```typescript
|
|
160
|
+
import { attachListeners } from '@mastra/evals';
|
|
161
|
+
|
|
162
|
+
// Enable evaluation tracking
|
|
163
|
+
await attachListeners();
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
## Environment Variables
|
|
167
|
+
|
|
168
|
+
Required for LLM-based metrics:
|
|
169
|
+
|
|
170
|
+
- `OPENAI_API_KEY`: For OpenAI model access
|
|
171
|
+
- Additional provider keys as needed (Cohere, Anthropic, etc.)
|
|
172
|
+
|
|
173
|
+
## Package Exports
|
|
174
|
+
|
|
175
|
+
```typescript
|
|
176
|
+
// Main package exports
|
|
177
|
+
import { evaluate } from '@mastra/evals';
|
|
178
|
+
// NLP-specific metrics
|
|
179
|
+
import { ContentSimilarityMetric } from '@mastra/evals/nlp';
|
|
180
|
+
```
|
|
181
|
+
|
|
182
|
+
## Related Packages
|
|
183
|
+
|
|
184
|
+
- `@mastra/core`: Core framework functionality
|
|
185
|
+
- `@mastra/engine`: LLM execution engine
|
|
186
|
+
- `@mastra/mcp`: Model Context Protocol integration
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"evals.cjs.development.js","sources":["../src/constants.ts","../src/evaluation.ts","../src/attachListeners.ts"],"sourcesContent":["export const GLOBAL_RUN_ID_ENV_KEY = '_MASTRA_GLOBAL_RUN_ID_';\n","import { type Agent, type Metric, evaluate as coreEvaluate } from '@mastra/core';\n\nimport { GLOBAL_RUN_ID_ENV_KEY } from './constants';\n\nexport async function evaluate<T extends Agent>(agent: T, input: Parameters<T['generate']>[0], metric: Metric) {\n const testInfo = await getCurrentTestInfo();\n let globalRunId = process.env[GLOBAL_RUN_ID_ENV_KEY];\n const runId = crypto.randomUUID();\n const agentOutput = await agent.generate(input, {\n runId,\n });\n\n if (!globalRunId) {\n globalRunId = process.env[GLOBAL_RUN_ID_ENV_KEY] = crypto.randomUUID();\n console.warn('Global run id not set, you should run \"globalSetup\" from \"@mastra/evals\" before evaluating.');\n }\n\n const metricResult = await coreEvaluate({\n agentName: agent.name,\n input,\n metric,\n output: agentOutput.text,\n globalRunId,\n runId,\n testInfo,\n });\n\n return metricResult;\n}\n\nexport const getCurrentTestInfo = async () => {\n // Jest\n if (typeof expect !== 'undefined' && expect.getState) {\n const state = expect.getState();\n return {\n testName: state.currentTestName,\n testPath: state.testPath,\n };\n }\n\n try {\n const vitest = await import('vitest');\n if (typeof vitest !== 'undefined' && vitest.expect?.getState) {\n const state = vitest.expect.getState();\n return {\n testName: state.currentTestName,\n testPath: state.testPath,\n };\n }\n } catch {}\n\n return null;\n};\n","import { AvailableHooks, registerHook } from '@mastra/core';\nimport { mkdirSync, appendFile } from 'fs';\nimport { join } from 'path';\n\nimport { GLOBAL_RUN_ID_ENV_KEY } from './constants';\n\nexport async function attachListeners() {\n const dotMastraPath = join(process.cwd(), '.mastra');\n\n try {\n mkdirSync(dotMastraPath);\n } catch (error) {}\n\n registerHook(AvailableHooks.ON_EVALUATION, traceObject => {\n appendFile(join(dotMastraPath, 'evals.json'), JSON.stringify(traceObject) + '\\n', () => {});\n });\n}\n\nexport async function globalSetup() {\n if (process.env[GLOBAL_RUN_ID_ENV_KEY]) {\n throw new Error('Global run id already set, you should only run \"GlobalSetup\" once');\n }\n\n const globalRunId = crypto.randomUUID();\n process.env[GLOBAL_RUN_ID_ENV_KEY] = globalRunId;\n}\n"],"names":["GLOBAL_RUN_ID_ENV_KEY","evaluate","_x","_x2","_x3","_evaluate","apply","arguments","_asyncToGenerator","_regeneratorRuntime","mark","_callee2","agent","input","metric","testInfo","globalRunId","runId","agentOutput","metricResult","wrap","_callee2$","_context2","prev","next","getCurrentTestInfo","sent","process","env","crypto","randomUUID","generate","console","warn","coreEvaluate","agentName","name","output","text","abrupt","stop","_ref","_callee","state","_vitest$expect","vitest","_state","_callee$","_context","expect","getState","testName","currentTestName","testPath","t0","attachListeners","_attachListeners","dotMastraPath","join","cwd","mkdirSync","error","registerHook","AvailableHooks","ON_EVALUATION","traceObject","appendFile","JSON","stringify","globalSetup","_globalSetup","Error"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAO,IAAMA,qBAAqB,GAAG,wBAAwB;;ACI7D,SAAsBC,QAAQA,CAAAC,EAAA,EAAAC,GAAA,EAAAC,GAAA,EAAA;AAAA,EAAA,OAAAC,SAAA,CAAAC,KAAA,CAAA,IAAA,EAAAC,SAAA,CAAA,CAAA;AAAA,CAAA;AAwB7B,SAAAF,SAAA,GAAA;AAAAA,EAAAA,SAAA,GAAAG,iBAAA,cAAAC,mBAAA,EAAAC,CAAAA,IAAA,CAxBM,SAAAC,QAAyCC,CAAAA,KAAQ,EAAEC,KAAmC,EAAEC,MAAc,EAAA;IAAA,IAAAC,QAAA,EAAAC,WAAA,EAAAC,KAAA,EAAAC,WAAA,EAAAC,YAAA,CAAA;AAAA,IAAA,OAAAV,mBAAA,EAAA,CAAAW,IAAA,CAAA,SAAAC,UAAAC,SAAA,EAAA;AAAA,MAAA,OAAA,CAAA,EAAA,QAAAA,SAAA,CAAAC,IAAA,GAAAD,SAAA,CAAAE,IAAA;AAAA,QAAA,KAAA,CAAA;AAAAF,UAAAA,SAAA,CAAAE,IAAA,GAAA,CAAA,CAAA;UAAA,OACpFC,kBAAkB,EAAE,CAAA;AAAA,QAAA,KAAA,CAAA;UAArCV,QAAQ,GAAAO,SAAA,CAAAI,IAAA,CAAA;AACVV,UAAAA,WAAW,GAAGW,OAAO,CAACC,GAAG,CAAC5B,qBAAqB,CAAC,CAAA;AAC9CiB,UAAAA,KAAK,GAAGY,MAAM,CAACC,UAAU,EAAE,CAAA;AAAAR,UAAAA,SAAA,CAAAE,IAAA,GAAA,CAAA,CAAA;AAAA,UAAA,OACPZ,KAAK,CAACmB,QAAQ,CAAClB,KAAK,EAAE;AAC9CI,YAAAA,KAAK,EAALA,KAAAA;AACD,WAAA,CAAC,CAAA;AAAA,QAAA,KAAA,CAAA;UAFIC,WAAW,GAAAI,SAAA,CAAAI,IAAA,CAAA;UAIjB,IAAI,CAACV,WAAW,EAAE;AAChBA,YAAAA,WAAW,GAAGW,OAAO,CAACC,GAAG,CAAC5B,qBAAqB,CAAC,GAAG6B,MAAM,CAACC,UAAU,EAAE,CAAA;AACtEE,YAAAA,OAAO,CAACC,IAAI,CAAC,6FAA6F,CAAC,CAAA;AAC7G,WAAA;AAACX,UAAAA,SAAA,CAAAE,IAAA,GAAA,EAAA,CAAA;AAAA,UAAA,OAE0BU,aAAY,CAAC;YACtCC,SAAS,EAAEvB,KAAK,CAACwB,IAAI;AACrBvB,YAAAA,KAAK,EAALA,KAAK;AACLC,YAAAA,MAAM,EAANA,MAAM;YACNuB,MAAM,EAAEnB,WAAW,CAACoB,IAAI;AACxBtB,YAAAA,WAAW,EAAXA,WAAW;AACXC,YAAAA,KAAK,EAALA,KAAK;AACLF,YAAAA,QAAQ,EAARA,QAAAA;AACD,WAAA,CAAC,CAAA;AAAA,QAAA,KAAA,EAAA;UARII,YAAY,GAAAG,SAAA,CAAAI,IAAA,CAAA;AAAA,UAAA,OAAAJ,SAAA,CAAAiB,MAAA,CAAA,QAAA,EAUXpB,YAAY,CAAA,CAAA;AAAA,QAAA,KAAA,EAAA,CAAA;AAAA,QAAA,KAAA,KAAA;UAAA,OAAAG,SAAA,CAAAkB,IAAA,EAAA,CAAA;AAAA,OAAA;AAAA,KAAA,EAAA7B,QAAA,CAAA,CAAA;GACpB,CAAA,CAAA,CAAA;AAAA,EAAA,OAAAN,SAAA,CAAAC,KAAA,CAAA,IAAA,EAAAC,SAAA,CAAA,CAAA;AAAA,CAAA;AAEM,IAAMkB,kBAAkB,gBAAA,YAAA;EAAA,IAAAgB,IAAA,gBAAAjC,iBAAA,cAAAC,mBAAA,EAAAC,CAAAA,IAAA,CAAG,SAAAgC,OAAA,GAAA;AAAA,IAAA,IAAAC,KAAA,EAAAC,cAAA,EAAAC,MAAA,EAAAC,MAAA,CAAA;AAAA,IAAA,OAAArC,mBAAA,EAAA,CAAAW,IAAA,CAAA,SAAA2B,SAAAC,QAAA,EAAA;AAAA,MAAA,OAAA,CAAA,EAAA,QAAAA,QAAA,CAAAzB,IAAA,GAAAyB,QAAA,CAAAxB,IAAA;AAAA,QAAA,KAAA,CAAA;AAAA,UAAA,IAAA,
|
|
1
|
+
{"version":3,"file":"evals.cjs.development.js","sources":["../src/constants.ts","../src/evaluation.ts","../src/attachListeners.ts"],"sourcesContent":["export const GLOBAL_RUN_ID_ENV_KEY = '_MASTRA_GLOBAL_RUN_ID_';\n","import { type Agent, type Metric, evaluate as coreEvaluate } from '@mastra/core';\n\nimport { GLOBAL_RUN_ID_ENV_KEY } from './constants';\n\nexport async function evaluate<T extends Agent>(agent: T, input: Parameters<T['generate']>[0], metric: Metric) {\n const testInfo = await getCurrentTestInfo();\n let globalRunId = process.env[GLOBAL_RUN_ID_ENV_KEY];\n const runId = crypto.randomUUID();\n const agentOutput = await agent.generate(input, {\n runId,\n });\n\n if (!globalRunId) {\n globalRunId = process.env[GLOBAL_RUN_ID_ENV_KEY] = crypto.randomUUID();\n console.warn('Global run id not set, you should run \"globalSetup\" from \"@mastra/evals\" before evaluating.');\n }\n\n const metricResult = await coreEvaluate({\n agentName: agent.name,\n input,\n metric,\n output: agentOutput.text,\n globalRunId,\n runId,\n testInfo,\n });\n\n return metricResult;\n}\n\nexport const getCurrentTestInfo = async () => {\n // Jest\n // @ts-ignore\n if (typeof expect !== 'undefined' && expect.getState) {\n // @ts-ignore\n const state = expect.getState();\n return {\n testName: state.currentTestName,\n testPath: state.testPath,\n };\n }\n\n try {\n const vitest = await import('vitest');\n if (typeof vitest !== 'undefined' && vitest.expect?.getState) {\n const state = vitest.expect.getState();\n return {\n testName: state.currentTestName,\n testPath: state.testPath,\n };\n }\n } catch {}\n\n return null;\n};\n","import { AvailableHooks, registerHook } from '@mastra/core';\nimport { mkdirSync, appendFile } from 'fs';\nimport { join } from 'path';\n\nimport { GLOBAL_RUN_ID_ENV_KEY } from './constants';\n\nexport async function attachListeners() {\n const dotMastraPath = join(process.cwd(), '.mastra');\n\n try {\n mkdirSync(dotMastraPath);\n } catch (error) {}\n\n registerHook(AvailableHooks.ON_EVALUATION, traceObject => {\n appendFile(join(dotMastraPath, 'evals.json'), JSON.stringify(traceObject) + '\\n', () => {});\n });\n}\n\nexport async function globalSetup() {\n if (process.env[GLOBAL_RUN_ID_ENV_KEY]) {\n throw new Error('Global run id already set, you should only run \"GlobalSetup\" once');\n }\n\n const globalRunId = crypto.randomUUID();\n process.env[GLOBAL_RUN_ID_ENV_KEY] = globalRunId;\n}\n"],"names":["GLOBAL_RUN_ID_ENV_KEY","evaluate","_x","_x2","_x3","_evaluate","apply","arguments","_asyncToGenerator","_regeneratorRuntime","mark","_callee2","agent","input","metric","testInfo","globalRunId","runId","agentOutput","metricResult","wrap","_callee2$","_context2","prev","next","getCurrentTestInfo","sent","process","env","crypto","randomUUID","generate","console","warn","coreEvaluate","agentName","name","output","text","abrupt","stop","_ref","_callee","state","_vitest$expect","vitest","_state","_callee$","_context","expect","getState","testName","currentTestName","testPath","t0","attachListeners","_attachListeners","dotMastraPath","join","cwd","mkdirSync","error","registerHook","AvailableHooks","ON_EVALUATION","traceObject","appendFile","JSON","stringify","globalSetup","_globalSetup","Error"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAO,IAAMA,qBAAqB,GAAG,wBAAwB;;ACI7D,SAAsBC,QAAQA,CAAAC,EAAA,EAAAC,GAAA,EAAAC,GAAA,EAAA;AAAA,EAAA,OAAAC,SAAA,CAAAC,KAAA,CAAA,IAAA,EAAAC,SAAA,CAAA,CAAA;AAAA,CAAA;AAwB7B,SAAAF,SAAA,GAAA;AAAAA,EAAAA,SAAA,GAAAG,iBAAA,cAAAC,mBAAA,EAAAC,CAAAA,IAAA,CAxBM,SAAAC,QAAyCC,CAAAA,KAAQ,EAAEC,KAAmC,EAAEC,MAAc,EAAA;IAAA,IAAAC,QAAA,EAAAC,WAAA,EAAAC,KAAA,EAAAC,WAAA,EAAAC,YAAA,CAAA;AAAA,IAAA,OAAAV,mBAAA,EAAA,CAAAW,IAAA,CAAA,SAAAC,UAAAC,SAAA,EAAA;AAAA,MAAA,OAAA,CAAA,EAAA,QAAAA,SAAA,CAAAC,IAAA,GAAAD,SAAA,CAAAE,IAAA;AAAA,QAAA,KAAA,CAAA;AAAAF,UAAAA,SAAA,CAAAE,IAAA,GAAA,CAAA,CAAA;UAAA,OACpFC,kBAAkB,EAAE,CAAA;AAAA,QAAA,KAAA,CAAA;UAArCV,QAAQ,GAAAO,SAAA,CAAAI,IAAA,CAAA;AACVV,UAAAA,WAAW,GAAGW,OAAO,CAACC,GAAG,CAAC5B,qBAAqB,CAAC,CAAA;AAC9CiB,UAAAA,KAAK,GAAGY,MAAM,CAACC,UAAU,EAAE,CAAA;AAAAR,UAAAA,SAAA,CAAAE,IAAA,GAAA,CAAA,CAAA;AAAA,UAAA,OACPZ,KAAK,CAACmB,QAAQ,CAAClB,KAAK,EAAE;AAC9CI,YAAAA,KAAK,EAALA,KAAAA;AACD,WAAA,CAAC,CAAA;AAAA,QAAA,KAAA,CAAA;UAFIC,WAAW,GAAAI,SAAA,CAAAI,IAAA,CAAA;UAIjB,IAAI,CAACV,WAAW,EAAE;AAChBA,YAAAA,WAAW,GAAGW,OAAO,CAACC,GAAG,CAAC5B,qBAAqB,CAAC,GAAG6B,MAAM,CAACC,UAAU,EAAE,CAAA;AACtEE,YAAAA,OAAO,CAACC,IAAI,CAAC,6FAA6F,CAAC,CAAA;AAC7G,WAAA;AAACX,UAAAA,SAAA,CAAAE,IAAA,GAAA,EAAA,CAAA;AAAA,UAAA,OAE0BU,aAAY,CAAC;YACtCC,SAAS,EAAEvB,KAAK,CAACwB,IAAI;AACrBvB,YAAAA,KAAK,EAALA,KAAK;AACLC,YAAAA,MAAM,EAANA,MAAM;YACNuB,MAAM,EAAEnB,WAAW,CAACoB,IAAI;AACxBtB,YAAAA,WAAW,EAAXA,WAAW;AACXC,YAAAA,KAAK,EAALA,KAAK;AACLF,YAAAA,QAAQ,EAARA,QAAAA;AACD,WAAA,CAAC,CAAA;AAAA,QAAA,KAAA,EAAA;UARII,YAAY,GAAAG,SAAA,CAAAI,IAAA,CAAA;AAAA,UAAA,OAAAJ,SAAA,CAAAiB,MAAA,CAAA,QAAA,EAUXpB,YAAY,CAAA,CAAA;AAAA,QAAA,KAAA,EAAA,CAAA;AAAA,QAAA,KAAA,KAAA;UAAA,OAAAG,SAAA,CAAAkB,IAAA,EAAA,CAAA;AAAA,OAAA;AAAA,KAAA,EAAA7B,QAAA,CAAA,CAAA;GACpB,CAAA,CAAA,CAAA;AAAA,EAAA,OAAAN,SAAA,CAAAC,KAAA,CAAA,IAAA,EAAAC,SAAA,CAAA,CAAA;AAAA,CAAA;AAEM,IAAMkB,kBAAkB,gBAAA,YAAA;EAAA,IAAAgB,IAAA,gBAAAjC,iBAAA,cAAAC,mBAAA,EAAAC,CAAAA,IAAA,CAAG,SAAAgC,OAAA,GAAA;AAAA,IAAA,IAAAC,KAAA,EAAAC,cAAA,EAAAC,MAAA,EAAAC,MAAA,CAAA;AAAA,IAAA,OAAArC,mBAAA,EAAA,CAAAW,IAAA,CAAA,SAAA2B,SAAAC,QAAA,EAAA;AAAA,MAAA,OAAA,CAAA,EAAA,QAAAA,QAAA,CAAAzB,IAAA,GAAAyB,QAAA,CAAAxB,IAAA;AAAA,QAAA,KAAA,CAAA;AAAA,UAAA,IAAA,EAG5B,OAAOyB,MAAM,KAAK,WAAW,IAAIA,MAAM,CAACC,QAAQ,CAAA,EAAA;AAAAF,YAAAA,QAAA,CAAAxB,IAAA,GAAA,CAAA,CAAA;AAAA,YAAA,MAAA;AAAA,WAAA;AAClD;AACMmB,UAAAA,KAAK,GAAGM,MAAM,CAACC,QAAQ,EAAE,CAAA;UAAA,OAAAF,QAAA,CAAAT,MAAA,CACxB,QAAA,EAAA;YACLY,QAAQ,EAAER,KAAK,CAACS,eAAe;YAC/BC,QAAQ,EAAEV,KAAK,CAACU,QAAAA;WACjB,CAAA,CAAA;AAAA,QAAA,KAAA,CAAA;AAAAL,UAAAA,QAAA,CAAAzB,IAAA,GAAA,CAAA,CAAA;AAAAyB,UAAAA,QAAA,CAAAxB,IAAA,GAAA,CAAA,CAAA;UAAA,OAIoB,OAAO,QAAQ,CAAC,CAAA;AAAA,QAAA,KAAA,CAAA;UAA/BqB,MAAM,GAAAG,QAAA,CAAAtB,IAAA,CAAA;AAAA,UAAA,IAAA,EACR,OAAOmB,MAAM,KAAK,WAAW,IAAAD,CAAAA,cAAA,GAAIC,MAAM,CAACI,MAAM,KAAbL,IAAAA,IAAAA,cAAA,CAAeM,QAAQ,CAAA,EAAA;AAAAF,YAAAA,QAAA,CAAAxB,IAAA,GAAA,EAAA,CAAA;AAAA,YAAA,MAAA;AAAA,WAAA;AACpDmB,UAAAA,MAAK,GAAGE,MAAM,CAACI,MAAM,CAACC,QAAQ,EAAE,CAAA;UAAA,OAAAF,QAAA,CAAAT,MAAA,CAC/B,QAAA,EAAA;YACLY,QAAQ,EAAER,MAAK,CAACS,eAAe;YAC/BC,QAAQ,EAAEV,MAAK,CAACU,QAAAA;WACjB,CAAA,CAAA;AAAA,QAAA,KAAA,EAAA;AAAAL,UAAAA,QAAA,CAAAxB,IAAA,GAAA,EAAA,CAAA;AAAA,UAAA,MAAA;AAAA,QAAA,KAAA,EAAA;AAAAwB,UAAAA,QAAA,CAAAzB,IAAA,GAAA,EAAA,CAAA;UAAAyB,QAAA,CAAAM,EAAA,GAAAN,QAAA,CAAA,OAAA,CAAA,CAAA,CAAA,CAAA,CAAA;AAAA,QAAA,KAAA,EAAA;AAAA,UAAA,OAAAA,QAAA,CAAAT,MAAA,CAAA,QAAA,EAIE,IAAI,CAAA,CAAA;AAAA,QAAA,KAAA,EAAA,CAAA;AAAA,QAAA,KAAA,KAAA;UAAA,OAAAS,QAAA,CAAAR,IAAA,EAAA,CAAA;AAAA,OAAA;AAAA,KAAA,EAAAE,OAAA,EAAA,IAAA,EAAA,CAAA,CAAA,CAAA,EAAA,EAAA,CAAA,CAAA,CAAA,CAAA;GACZ,CAAA,CAAA,CAAA;AAAA,EAAA,OAAA,SAxBYjB,kBAAkBA,GAAA;AAAA,IAAA,OAAAgB,IAAA,CAAAnC,KAAA,CAAA,IAAA,EAAAC,SAAA,CAAA,CAAA;AAAA,GAAA,CAAA;AAAA,CAwB9B,EAAA;;AChDD,SAAsBgD,eAAeA,GAAA;AAAA,EAAA,OAAAC,gBAAA,CAAAlD,KAAA,CAAA,IAAA,EAAAC,SAAA,CAAA,CAAA;AAAA,CAAA;AAUpC,SAAAiD,gBAAA,GAAA;EAAAA,gBAAA,GAAAhD,iBAAA,cAAAC,mBAAA,GAAAC,IAAA,CAVM,SAAAgC,OAAA,GAAA;AAAA,IAAA,IAAAe,aAAA,CAAA;AAAA,IAAA,OAAAhD,mBAAA,EAAA,CAAAW,IAAA,CAAA,SAAA2B,SAAAC,QAAA,EAAA;AAAA,MAAA,OAAA,CAAA,EAAA,QAAAA,QAAA,CAAAzB,IAAA,GAAAyB,QAAA,CAAAxB,IAAA;AAAA,QAAA,KAAA,CAAA;UACCiC,aAAa,GAAGC,SAAI,CAAC/B,OAAO,CAACgC,GAAG,EAAE,EAAE,SAAS,CAAC,CAAA;UAEpD,IAAI;YACFC,YAAS,CAACH,aAAa,CAAC,CAAA;AAC1B,WAAC,CAAC,OAAOI,KAAK,EAAE,EAAC;AAEjBC,UAAAA,iBAAY,CAACC,mBAAc,CAACC,aAAa,EAAE,UAAAC,WAAW,EAAG;YACvDC,aAAU,CAACR,SAAI,CAACD,aAAa,EAAE,YAAY,CAAC,EAAEU,IAAI,CAACC,SAAS,CAACH,WAAW,CAAC,GAAG,IAAI,EAAE,YAAK,EAAG,CAAC,CAAA;AAC7F,WAAC,CAAC,CAAA;AAAC,QAAA,KAAA,CAAA,CAAA;AAAA,QAAA,KAAA,KAAA;UAAA,OAAAjB,QAAA,CAAAR,IAAA,EAAA,CAAA;AAAA,OAAA;AAAA,KAAA,EAAAE,OAAA,CAAA,CAAA;GACJ,CAAA,CAAA,CAAA;AAAA,EAAA,OAAAc,gBAAA,CAAAlD,KAAA,CAAA,IAAA,EAAAC,SAAA,CAAA,CAAA;AAAA,CAAA;AAED,SAAsB8D,WAAWA,GAAA;AAAA,EAAA,OAAAC,YAAA,CAAAhE,KAAA,CAAA,IAAA,EAAAC,SAAA,CAAA,CAAA;AAAA,CAAA;AAOhC,SAAA+D,YAAA,GAAA;EAAAA,YAAA,GAAA9D,iBAAA,cAAAC,mBAAA,GAAAC,IAAA,CAPM,SAAAC,QAAA,GAAA;AAAA,IAAA,IAAAK,WAAA,CAAA;AAAA,IAAA,OAAAP,mBAAA,EAAA,CAAAW,IAAA,CAAA,SAAAC,UAAAC,SAAA,EAAA;AAAA,MAAA,OAAA,CAAA,EAAA,QAAAA,SAAA,CAAAC,IAAA,GAAAD,SAAA,CAAAE,IAAA;AAAA,QAAA,KAAA,CAAA;AAAA,UAAA,IAAA,CACDG,OAAO,CAACC,GAAG,CAAC5B,qBAAqB,CAAC,EAAA;AAAAsB,YAAAA,SAAA,CAAAE,IAAA,GAAA,CAAA,CAAA;AAAA,YAAA,MAAA;AAAA,WAAA;AAAA,UAAA,MAC9B,IAAI+C,KAAK,CAAC,mEAAmE,CAAC,CAAA;AAAA,QAAA,KAAA,CAAA;AAGhFvD,UAAAA,WAAW,GAAGa,MAAM,CAACC,UAAU,EAAE,CAAA;AACvCH,UAAAA,OAAO,CAACC,GAAG,CAAC5B,qBAAqB,CAAC,GAAGgB,WAAW,CAAA;AAAC,QAAA,KAAA,CAAA,CAAA;AAAA,QAAA,KAAA,KAAA;UAAA,OAAAM,SAAA,CAAAkB,IAAA,EAAA,CAAA;AAAA,OAAA;AAAA,KAAA,EAAA7B,QAAA,CAAA,CAAA;GAClD,CAAA,CAAA,CAAA;AAAA,EAAA,OAAA2D,YAAA,CAAAhE,KAAA,CAAA,IAAA,EAAAC,SAAA,CAAA,CAAA;AAAA;;;;;;"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"evals.cjs.production.min.js","sources":["../src/constants.ts","../src/evaluation.ts","../src/attachListeners.ts"],"sourcesContent":["export const GLOBAL_RUN_ID_ENV_KEY = '_MASTRA_GLOBAL_RUN_ID_';\n","import { type Agent, type Metric, evaluate as coreEvaluate } from '@mastra/core';\n\nimport { GLOBAL_RUN_ID_ENV_KEY } from './constants';\n\nexport async function evaluate<T extends Agent>(agent: T, input: Parameters<T['generate']>[0], metric: Metric) {\n const testInfo = await getCurrentTestInfo();\n let globalRunId = process.env[GLOBAL_RUN_ID_ENV_KEY];\n const runId = crypto.randomUUID();\n const agentOutput = await agent.generate(input, {\n runId,\n });\n\n if (!globalRunId) {\n globalRunId = process.env[GLOBAL_RUN_ID_ENV_KEY] = crypto.randomUUID();\n console.warn('Global run id not set, you should run \"globalSetup\" from \"@mastra/evals\" before evaluating.');\n }\n\n const metricResult = await coreEvaluate({\n agentName: agent.name,\n input,\n metric,\n output: agentOutput.text,\n globalRunId,\n runId,\n testInfo,\n });\n\n return metricResult;\n}\n\nexport const getCurrentTestInfo = async () => {\n // Jest\n if (typeof expect !== 'undefined' && expect.getState) {\n const state = expect.getState();\n return {\n testName: state.currentTestName,\n testPath: state.testPath,\n };\n }\n\n try {\n const vitest = await import('vitest');\n if (typeof vitest !== 'undefined' && vitest.expect?.getState) {\n const state = vitest.expect.getState();\n return {\n testName: state.currentTestName,\n testPath: state.testPath,\n };\n }\n } catch {}\n\n return null;\n};\n","import { AvailableHooks, registerHook } from '@mastra/core';\nimport { mkdirSync, appendFile } from 'fs';\nimport { join } from 'path';\n\nimport { GLOBAL_RUN_ID_ENV_KEY } from './constants';\n\nexport async function attachListeners() {\n const dotMastraPath = join(process.cwd(), '.mastra');\n\n try {\n mkdirSync(dotMastraPath);\n } catch (error) {}\n\n registerHook(AvailableHooks.ON_EVALUATION, traceObject => {\n appendFile(join(dotMastraPath, 'evals.json'), JSON.stringify(traceObject) + '\\n', () => {});\n });\n}\n\nexport async function globalSetup() {\n if (process.env[GLOBAL_RUN_ID_ENV_KEY]) {\n throw new Error('Global run id already set, you should only run \"GlobalSetup\" once');\n }\n\n const globalRunId = crypto.randomUUID();\n process.env[GLOBAL_RUN_ID_ENV_KEY] = globalRunId;\n}\n"],"names":["GLOBAL_RUN_ID_ENV_KEY","_evaluate","_asyncToGenerator","_regeneratorRuntime","mark","_callee2","agent","input","metric","testInfo","globalRunId","runId","agentOutput","wrap","_context2","prev","next","getCurrentTestInfo","sent","process","env","crypto","randomUUID","generate","console","warn","coreEvaluate","agentName","name","output","text","abrupt","stop","apply","this","arguments","_ref","_callee","state","_vitest$expect","vitest","_state","_context","expect","getState","testName","currentTestName","testPath","import","t0","_attachListeners","dotMastraPath","join","cwd","mkdirSync","error","registerHook","AvailableHooks","ON_EVALUATION","traceObject","appendFile","JSON","stringify","_globalSetup","Error","_x","_x2","_x3"],"mappings":"oyNAAO,IAAMA,EAAwB,yBC4BpC,SAAAC,IAAA,OAAAA,EAAAC,EAAAC,IAAAC,MAxBM,SAAAC,EAAyCC,EAAUC,EAAqCC,GAAc,IAAAC,EAAAC,EAAAC,EAAAC,EAAA,OAAAT,IAAAU,MAAA,SAAAC,GAAA,cAAAA,EAAAC,KAAAD,EAAAE,MAAA,KAAA,EAAA,OAAAF,EAAAE,KAAA,EACpFC,IAAoB,KAAA,EAEV,OAF3BR,EAAQK,EAAAI,KACVR,EAAcS,QAAQC,IAAIpB,GACxBW,EAAQU,OAAOC,aAAYR,EAAAE,KAAA,EACPV,EAAMiB,SAAShB,EAAO,CAC9CI,MAAAA,IACA,KAAA,EAKD,OAPKC,EAAWE,EAAAI,KAIZR,IACHA,EAAcS,QAAQC,IAAIpB,GAAyBqB,OAAOC,aAC1DE,QAAQC,KAAK,gGACdX,EAAAE,KAAA,GAE0BU,WAAa,CACtCC,UAAWrB,EAAMsB,KACjBrB,MAAAA,EACAC,OAAAA,EACAqB,OAAQjB,EAAYkB,KACpBpB,YAAAA,EACAC,MAAAA,EACAF,SAAAA,IACA,KAAA,GARgB,OAAAK,EAAAiB,OAAA,SAAAjB,EAAAI,MAUC,KAAA,GAAA,IAAA,MAAA,OAAAJ,EAAAkB,OAAA,GAAA3B,EACpB,MAAA4B,MAAAC,KAAAC,UAAA,CAEM,IAAMlB,EAAkB,WAAA,IAAAmB,EAAAlC,EAAAC,IAAAC,MAAG,SAAAiC,IAAA,IAAAC,EAAAC,EAAAC,EAAAC,EAAA,OAAAtC,IAAAU,MAAA,SAAA6B,GAAA,cAAAA,EAAA3B,KAAA2B,EAAA1B,MAAA,KAAA,EAAA,
|
|
1
|
+
{"version":3,"file":"evals.cjs.production.min.js","sources":["../src/constants.ts","../src/evaluation.ts","../src/attachListeners.ts"],"sourcesContent":["export const GLOBAL_RUN_ID_ENV_KEY = '_MASTRA_GLOBAL_RUN_ID_';\n","import { type Agent, type Metric, evaluate as coreEvaluate } from '@mastra/core';\n\nimport { GLOBAL_RUN_ID_ENV_KEY } from './constants';\n\nexport async function evaluate<T extends Agent>(agent: T, input: Parameters<T['generate']>[0], metric: Metric) {\n const testInfo = await getCurrentTestInfo();\n let globalRunId = process.env[GLOBAL_RUN_ID_ENV_KEY];\n const runId = crypto.randomUUID();\n const agentOutput = await agent.generate(input, {\n runId,\n });\n\n if (!globalRunId) {\n globalRunId = process.env[GLOBAL_RUN_ID_ENV_KEY] = crypto.randomUUID();\n console.warn('Global run id not set, you should run \"globalSetup\" from \"@mastra/evals\" before evaluating.');\n }\n\n const metricResult = await coreEvaluate({\n agentName: agent.name,\n input,\n metric,\n output: agentOutput.text,\n globalRunId,\n runId,\n testInfo,\n });\n\n return metricResult;\n}\n\nexport const getCurrentTestInfo = async () => {\n // Jest\n // @ts-ignore\n if (typeof expect !== 'undefined' && expect.getState) {\n // @ts-ignore\n const state = expect.getState();\n return {\n testName: state.currentTestName,\n testPath: state.testPath,\n };\n }\n\n try {\n const vitest = await import('vitest');\n if (typeof vitest !== 'undefined' && vitest.expect?.getState) {\n const state = vitest.expect.getState();\n return {\n testName: state.currentTestName,\n testPath: state.testPath,\n };\n }\n } catch {}\n\n return null;\n};\n","import { AvailableHooks, registerHook } from '@mastra/core';\nimport { mkdirSync, appendFile } from 'fs';\nimport { join } from 'path';\n\nimport { GLOBAL_RUN_ID_ENV_KEY } from './constants';\n\nexport async function attachListeners() {\n const dotMastraPath = join(process.cwd(), '.mastra');\n\n try {\n mkdirSync(dotMastraPath);\n } catch (error) {}\n\n registerHook(AvailableHooks.ON_EVALUATION, traceObject => {\n appendFile(join(dotMastraPath, 'evals.json'), JSON.stringify(traceObject) + '\\n', () => {});\n });\n}\n\nexport async function globalSetup() {\n if (process.env[GLOBAL_RUN_ID_ENV_KEY]) {\n throw new Error('Global run id already set, you should only run \"GlobalSetup\" once');\n }\n\n const globalRunId = crypto.randomUUID();\n process.env[GLOBAL_RUN_ID_ENV_KEY] = globalRunId;\n}\n"],"names":["GLOBAL_RUN_ID_ENV_KEY","_evaluate","_asyncToGenerator","_regeneratorRuntime","mark","_callee2","agent","input","metric","testInfo","globalRunId","runId","agentOutput","wrap","_context2","prev","next","getCurrentTestInfo","sent","process","env","crypto","randomUUID","generate","console","warn","coreEvaluate","agentName","name","output","text","abrupt","stop","apply","this","arguments","_ref","_callee","state","_vitest$expect","vitest","_state","_context","expect","getState","testName","currentTestName","testPath","import","t0","_attachListeners","dotMastraPath","join","cwd","mkdirSync","error","registerHook","AvailableHooks","ON_EVALUATION","traceObject","appendFile","JSON","stringify","_globalSetup","Error","_x","_x2","_x3"],"mappings":"oyNAAO,IAAMA,EAAwB,yBC4BpC,SAAAC,IAAA,OAAAA,EAAAC,EAAAC,IAAAC,MAxBM,SAAAC,EAAyCC,EAAUC,EAAqCC,GAAc,IAAAC,EAAAC,EAAAC,EAAAC,EAAA,OAAAT,IAAAU,MAAA,SAAAC,GAAA,cAAAA,EAAAC,KAAAD,EAAAE,MAAA,KAAA,EAAA,OAAAF,EAAAE,KAAA,EACpFC,IAAoB,KAAA,EAEV,OAF3BR,EAAQK,EAAAI,KACVR,EAAcS,QAAQC,IAAIpB,GACxBW,EAAQU,OAAOC,aAAYR,EAAAE,KAAA,EACPV,EAAMiB,SAAShB,EAAO,CAC9CI,MAAAA,IACA,KAAA,EAKD,OAPKC,EAAWE,EAAAI,KAIZR,IACHA,EAAcS,QAAQC,IAAIpB,GAAyBqB,OAAOC,aAC1DE,QAAQC,KAAK,gGACdX,EAAAE,KAAA,GAE0BU,WAAa,CACtCC,UAAWrB,EAAMsB,KACjBrB,MAAAA,EACAC,OAAAA,EACAqB,OAAQjB,EAAYkB,KACpBpB,YAAAA,EACAC,MAAAA,EACAF,SAAAA,IACA,KAAA,GARgB,OAAAK,EAAAiB,OAAA,SAAAjB,EAAAI,MAUC,KAAA,GAAA,IAAA,MAAA,OAAAJ,EAAAkB,OAAA,GAAA3B,EACpB,MAAA4B,MAAAC,KAAAC,UAAA,CAEM,IAAMlB,EAAkB,WAAA,IAAAmB,EAAAlC,EAAAC,IAAAC,MAAG,SAAAiC,IAAA,IAAAC,EAAAC,EAAAC,EAAAC,EAAA,OAAAtC,IAAAU,MAAA,SAAA6B,GAAA,cAAAA,EAAA3B,KAAA2B,EAAA1B,MAAA,KAAA,EAAA,GAGV,oBAAX2B,SAA0BA,OAAOC,SAAQ,CAAAF,EAAA1B,KAAA,EAAA,KAAA,CAEnB,OAAzBsB,EAAQK,OAAOC,WAAUF,EAAAX,OACxB,SAAA,CACLc,SAAUP,EAAMQ,gBAChBC,SAAUT,EAAMS,WACjB,KAAA,EAAA,OAAAL,EAAA3B,KAAA,EAAA2B,EAAA1B,KAAA,EAIoBgC,OAAO,UAAS,KAAA,EAAzB,QACU,KADhBR,EAAME,EAAAxB,OACyBqB,OAAJA,EAAIC,EAAOG,UAAPJ,EAAeK,SAAQ,CAAAF,EAAA1B,KAAA,GAAA,KAAA,CACpB,OAAhCsB,EAAQE,EAAOG,OAAOC,WAAUF,EAAAX,OAC/B,SAAA,CACLc,SAAUP,EAAMQ,gBAChBC,SAAUT,EAAMS,WACjB,KAAA,GAAAL,EAAA1B,KAAA,GAAA,MAAA,KAAA,GAAA0B,EAAA3B,KAAA,GAAA2B,EAAAO,GAAAP,EAAA,MAAA,GAAA,KAAA,GAAA,OAAAA,EAAAX,OAAA,SAIE,MAAI,KAAA,GAAA,IAAA,MAAA,OAAAW,EAAAV,OAAA,GAAAK,EAAA,KAAA,CAAA,CAAA,EAAA,KACZ,KAAA,OAAA,WAxB8B,OAAAD,EAAAH,MAAAC,KAAAC,UAAA,CAAA,CAAA,GCd9B,SAAAe,IAAA,OAAAA,EAAAhD,EAAAC,IAAAC,MAVM,SAAAiC,IAAA,IAAAc,EAAA,OAAAhD,IAAAU,MAAA,SAAA6B,GAAA,cAAAA,EAAA3B,KAAA2B,EAAA1B,MAAA,KAAA,EACCmC,EAAgBC,EAAAA,KAAKjC,QAAQkC,MAAO,WAE1C,IACEC,EAASA,UAACH,EACZ,CAAE,MAAOI,GAAQ,CAEjBC,EAAAA,aAAaC,EAAcA,eAACC,eAAe,SAAAC,GACzCC,EAAAA,WAAWR,EAAIA,KAACD,EAAe,cAAeU,KAAKC,UAAUH,GAAe,MAAM,WAAQ,GAC5F,IAAG,KAAA,EAAA,IAAA,MAAA,OAAAjB,EAAAV,OAAA,GAAAK,EACJ,MAAAJ,MAAAC,KAAAC,UAAA,CASA,SAAA4B,IAAA,OAAAA,EAAA7D,EAAAC,IAAAC,MAPM,SAAAC,IAAA,IAAAK,EAAA,OAAAP,IAAAU,MAAA,SAAAC,GAAA,cAAAA,EAAAC,KAAAD,EAAAE,MAAA,KAAA,EAAA,IACDG,QAAQC,IAAIpB,GAAsB,CAAAc,EAAAE,KAAA,EAAA,KAAA,CAAA,MAC9B,IAAIgD,MAAM,qEAAoE,KAAA,EAGhFtD,EAAcW,OAAOC,aAC3BH,QAAQC,IAAIpB,GAAyBU,EAAY,KAAA,EAAA,IAAA,MAAA,OAAAI,EAAAkB,OAAA,GAAA3B,EAClD,MAAA4B,MAAAC,KAAAC,UAAA,yBAnBD,WAAqC,OAAAe,EAAAjB,MAAAC,KAAAC,UAAA,mBDFrC,SAA8B8B,EAAAC,EAAAC,GAAA,OAAAlE,EAAAgC,MAAAC,KAAAC,UAAA,sBCc9B,WAAiC,OAAA4B,EAAA9B,MAAAC,KAAAC,UAAA"}
|
package/dist/evals.esm.js
CHANGED
package/dist/evals.esm.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"evals.esm.js","sources":["../src/constants.ts","../src/evaluation.ts","../src/attachListeners.ts"],"sourcesContent":["export const GLOBAL_RUN_ID_ENV_KEY = '_MASTRA_GLOBAL_RUN_ID_';\n","import { type Agent, type Metric, evaluate as coreEvaluate } from '@mastra/core';\n\nimport { GLOBAL_RUN_ID_ENV_KEY } from './constants';\n\nexport async function evaluate<T extends Agent>(agent: T, input: Parameters<T['generate']>[0], metric: Metric) {\n const testInfo = await getCurrentTestInfo();\n let globalRunId = process.env[GLOBAL_RUN_ID_ENV_KEY];\n const runId = crypto.randomUUID();\n const agentOutput = await agent.generate(input, {\n runId,\n });\n\n if (!globalRunId) {\n globalRunId = process.env[GLOBAL_RUN_ID_ENV_KEY] = crypto.randomUUID();\n console.warn('Global run id not set, you should run \"globalSetup\" from \"@mastra/evals\" before evaluating.');\n }\n\n const metricResult = await coreEvaluate({\n agentName: agent.name,\n input,\n metric,\n output: agentOutput.text,\n globalRunId,\n runId,\n testInfo,\n });\n\n return metricResult;\n}\n\nexport const getCurrentTestInfo = async () => {\n // Jest\n if (typeof expect !== 'undefined' && expect.getState) {\n const state = expect.getState();\n return {\n testName: state.currentTestName,\n testPath: state.testPath,\n };\n }\n\n try {\n const vitest = await import('vitest');\n if (typeof vitest !== 'undefined' && vitest.expect?.getState) {\n const state = vitest.expect.getState();\n return {\n testName: state.currentTestName,\n testPath: state.testPath,\n };\n }\n } catch {}\n\n return null;\n};\n","import { AvailableHooks, registerHook } from '@mastra/core';\nimport { mkdirSync, appendFile } from 'fs';\nimport { join } from 'path';\n\nimport { GLOBAL_RUN_ID_ENV_KEY } from './constants';\n\nexport async function attachListeners() {\n const dotMastraPath = join(process.cwd(), '.mastra');\n\n try {\n mkdirSync(dotMastraPath);\n } catch (error) {}\n\n registerHook(AvailableHooks.ON_EVALUATION, traceObject => {\n appendFile(join(dotMastraPath, 'evals.json'), JSON.stringify(traceObject) + '\\n', () => {});\n });\n}\n\nexport async function globalSetup() {\n if (process.env[GLOBAL_RUN_ID_ENV_KEY]) {\n throw new Error('Global run id already set, you should only run \"GlobalSetup\" once');\n }\n\n const globalRunId = crypto.randomUUID();\n process.env[GLOBAL_RUN_ID_ENV_KEY] = globalRunId;\n}\n"],"names":["GLOBAL_RUN_ID_ENV_KEY","evaluate","_x","_x2","_x3","_evaluate","apply","arguments","_asyncToGenerator","_regeneratorRuntime","mark","_callee2","agent","input","metric","testInfo","globalRunId","runId","agentOutput","metricResult","wrap","_callee2$","_context2","prev","next","getCurrentTestInfo","sent","process","env","crypto","randomUUID","generate","console","warn","coreEvaluate","agentName","name","output","text","abrupt","stop","_ref","_callee","state","_vitest$expect","vitest","_state","_callee$","_context","expect","getState","testName","currentTestName","testPath","t0","attachListeners","_attachListeners","dotMastraPath","join","cwd","mkdirSync","error","registerHook","AvailableHooks","ON_EVALUATION","traceObject","appendFile","JSON","stringify","globalSetup","_globalSetup","Error"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAO,IAAMA,qBAAqB,GAAG,wBAAwB;;ACI7D,SAAsBC,QAAQA,CAAAC,EAAA,EAAAC,GAAA,EAAAC,GAAA,EAAA;AAAA,EAAA,OAAAC,SAAA,CAAAC,KAAA,CAAA,IAAA,EAAAC,SAAA,CAAA,CAAA;AAAA,CAAA;AAwB7B,SAAAF,SAAA,GAAA;AAAAA,EAAAA,SAAA,GAAAG,iBAAA,cAAAC,mBAAA,EAAAC,CAAAA,IAAA,CAxBM,SAAAC,QAAyCC,CAAAA,KAAQ,EAAEC,KAAmC,EAAEC,MAAc,EAAA;IAAA,IAAAC,QAAA,EAAAC,WAAA,EAAAC,KAAA,EAAAC,WAAA,EAAAC,YAAA,CAAA;AAAA,IAAA,OAAAV,mBAAA,EAAA,CAAAW,IAAA,CAAA,SAAAC,UAAAC,SAAA,EAAA;AAAA,MAAA,OAAA,CAAA,EAAA,QAAAA,SAAA,CAAAC,IAAA,GAAAD,SAAA,CAAAE,IAAA;AAAA,QAAA,KAAA,CAAA;AAAAF,UAAAA,SAAA,CAAAE,IAAA,GAAA,CAAA,CAAA;UAAA,OACpFC,kBAAkB,EAAE,CAAA;AAAA,QAAA,KAAA,CAAA;UAArCV,QAAQ,GAAAO,SAAA,CAAAI,IAAA,CAAA;AACVV,UAAAA,WAAW,GAAGW,OAAO,CAACC,GAAG,CAAC5B,qBAAqB,CAAC,CAAA;AAC9CiB,UAAAA,KAAK,GAAGY,MAAM,CAACC,UAAU,EAAE,CAAA;AAAAR,UAAAA,SAAA,CAAAE,IAAA,GAAA,CAAA,CAAA;AAAA,UAAA,OACPZ,KAAK,CAACmB,QAAQ,CAAClB,KAAK,EAAE;AAC9CI,YAAAA,KAAK,EAALA,KAAAA;AACD,WAAA,CAAC,CAAA;AAAA,QAAA,KAAA,CAAA;UAFIC,WAAW,GAAAI,SAAA,CAAAI,IAAA,CAAA;UAIjB,IAAI,CAACV,WAAW,EAAE;AAChBA,YAAAA,WAAW,GAAGW,OAAO,CAACC,GAAG,CAAC5B,qBAAqB,CAAC,GAAG6B,MAAM,CAACC,UAAU,EAAE,CAAA;AACtEE,YAAAA,OAAO,CAACC,IAAI,CAAC,6FAA6F,CAAC,CAAA;AAC7G,WAAA;AAACX,UAAAA,SAAA,CAAAE,IAAA,GAAA,EAAA,CAAA;AAAA,UAAA,OAE0BU,UAAY,CAAC;YACtCC,SAAS,EAAEvB,KAAK,CAACwB,IAAI;AACrBvB,YAAAA,KAAK,EAALA,KAAK;AACLC,YAAAA,MAAM,EAANA,MAAM;YACNuB,MAAM,EAAEnB,WAAW,CAACoB,IAAI;AACxBtB,YAAAA,WAAW,EAAXA,WAAW;AACXC,YAAAA,KAAK,EAALA,KAAK;AACLF,YAAAA,QAAQ,EAARA,QAAAA;AACD,WAAA,CAAC,CAAA;AAAA,QAAA,KAAA,EAAA;UARII,YAAY,GAAAG,SAAA,CAAAI,IAAA,CAAA;AAAA,UAAA,OAAAJ,SAAA,CAAAiB,MAAA,CAAA,QAAA,EAUXpB,YAAY,CAAA,CAAA;AAAA,QAAA,KAAA,EAAA,CAAA;AAAA,QAAA,KAAA,KAAA;UAAA,OAAAG,SAAA,CAAAkB,IAAA,EAAA,CAAA;AAAA,OAAA;AAAA,KAAA,EAAA7B,QAAA,CAAA,CAAA;GACpB,CAAA,CAAA,CAAA;AAAA,EAAA,OAAAN,SAAA,CAAAC,KAAA,CAAA,IAAA,EAAAC,SAAA,CAAA,CAAA;AAAA,CAAA;AAEM,IAAMkB,kBAAkB,gBAAA,YAAA;EAAA,IAAAgB,IAAA,gBAAAjC,iBAAA,cAAAC,mBAAA,EAAAC,CAAAA,IAAA,CAAG,SAAAgC,OAAA,GAAA;AAAA,IAAA,IAAAC,KAAA,EAAAC,cAAA,EAAAC,MAAA,EAAAC,MAAA,CAAA;AAAA,IAAA,OAAArC,mBAAA,EAAA,CAAAW,IAAA,CAAA,SAAA2B,SAAAC,QAAA,EAAA;AAAA,MAAA,OAAA,CAAA,EAAA,QAAAA,QAAA,CAAAzB,IAAA,GAAAyB,QAAA,CAAAxB,IAAA;AAAA,QAAA,KAAA,CAAA;AAAA,UAAA,IAAA,
|
|
1
|
+
{"version":3,"file":"evals.esm.js","sources":["../src/constants.ts","../src/evaluation.ts","../src/attachListeners.ts"],"sourcesContent":["export const GLOBAL_RUN_ID_ENV_KEY = '_MASTRA_GLOBAL_RUN_ID_';\n","import { type Agent, type Metric, evaluate as coreEvaluate } from '@mastra/core';\n\nimport { GLOBAL_RUN_ID_ENV_KEY } from './constants';\n\nexport async function evaluate<T extends Agent>(agent: T, input: Parameters<T['generate']>[0], metric: Metric) {\n const testInfo = await getCurrentTestInfo();\n let globalRunId = process.env[GLOBAL_RUN_ID_ENV_KEY];\n const runId = crypto.randomUUID();\n const agentOutput = await agent.generate(input, {\n runId,\n });\n\n if (!globalRunId) {\n globalRunId = process.env[GLOBAL_RUN_ID_ENV_KEY] = crypto.randomUUID();\n console.warn('Global run id not set, you should run \"globalSetup\" from \"@mastra/evals\" before evaluating.');\n }\n\n const metricResult = await coreEvaluate({\n agentName: agent.name,\n input,\n metric,\n output: agentOutput.text,\n globalRunId,\n runId,\n testInfo,\n });\n\n return metricResult;\n}\n\nexport const getCurrentTestInfo = async () => {\n // Jest\n // @ts-ignore\n if (typeof expect !== 'undefined' && expect.getState) {\n // @ts-ignore\n const state = expect.getState();\n return {\n testName: state.currentTestName,\n testPath: state.testPath,\n };\n }\n\n try {\n const vitest = await import('vitest');\n if (typeof vitest !== 'undefined' && vitest.expect?.getState) {\n const state = vitest.expect.getState();\n return {\n testName: state.currentTestName,\n testPath: state.testPath,\n };\n }\n } catch {}\n\n return null;\n};\n","import { AvailableHooks, registerHook } from '@mastra/core';\nimport { mkdirSync, appendFile } from 'fs';\nimport { join } from 'path';\n\nimport { GLOBAL_RUN_ID_ENV_KEY } from './constants';\n\nexport async function attachListeners() {\n const dotMastraPath = join(process.cwd(), '.mastra');\n\n try {\n mkdirSync(dotMastraPath);\n } catch (error) {}\n\n registerHook(AvailableHooks.ON_EVALUATION, traceObject => {\n appendFile(join(dotMastraPath, 'evals.json'), JSON.stringify(traceObject) + '\\n', () => {});\n });\n}\n\nexport async function globalSetup() {\n if (process.env[GLOBAL_RUN_ID_ENV_KEY]) {\n throw new Error('Global run id already set, you should only run \"GlobalSetup\" once');\n }\n\n const globalRunId = crypto.randomUUID();\n process.env[GLOBAL_RUN_ID_ENV_KEY] = globalRunId;\n}\n"],"names":["GLOBAL_RUN_ID_ENV_KEY","evaluate","_x","_x2","_x3","_evaluate","apply","arguments","_asyncToGenerator","_regeneratorRuntime","mark","_callee2","agent","input","metric","testInfo","globalRunId","runId","agentOutput","metricResult","wrap","_callee2$","_context2","prev","next","getCurrentTestInfo","sent","process","env","crypto","randomUUID","generate","console","warn","coreEvaluate","agentName","name","output","text","abrupt","stop","_ref","_callee","state","_vitest$expect","vitest","_state","_callee$","_context","expect","getState","testName","currentTestName","testPath","t0","attachListeners","_attachListeners","dotMastraPath","join","cwd","mkdirSync","error","registerHook","AvailableHooks","ON_EVALUATION","traceObject","appendFile","JSON","stringify","globalSetup","_globalSetup","Error"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAO,IAAMA,qBAAqB,GAAG,wBAAwB;;ACI7D,SAAsBC,QAAQA,CAAAC,EAAA,EAAAC,GAAA,EAAAC,GAAA,EAAA;AAAA,EAAA,OAAAC,SAAA,CAAAC,KAAA,CAAA,IAAA,EAAAC,SAAA,CAAA,CAAA;AAAA,CAAA;AAwB7B,SAAAF,SAAA,GAAA;AAAAA,EAAAA,SAAA,GAAAG,iBAAA,cAAAC,mBAAA,EAAAC,CAAAA,IAAA,CAxBM,SAAAC,QAAyCC,CAAAA,KAAQ,EAAEC,KAAmC,EAAEC,MAAc,EAAA;IAAA,IAAAC,QAAA,EAAAC,WAAA,EAAAC,KAAA,EAAAC,WAAA,EAAAC,YAAA,CAAA;AAAA,IAAA,OAAAV,mBAAA,EAAA,CAAAW,IAAA,CAAA,SAAAC,UAAAC,SAAA,EAAA;AAAA,MAAA,OAAA,CAAA,EAAA,QAAAA,SAAA,CAAAC,IAAA,GAAAD,SAAA,CAAAE,IAAA;AAAA,QAAA,KAAA,CAAA;AAAAF,UAAAA,SAAA,CAAAE,IAAA,GAAA,CAAA,CAAA;UAAA,OACpFC,kBAAkB,EAAE,CAAA;AAAA,QAAA,KAAA,CAAA;UAArCV,QAAQ,GAAAO,SAAA,CAAAI,IAAA,CAAA;AACVV,UAAAA,WAAW,GAAGW,OAAO,CAACC,GAAG,CAAC5B,qBAAqB,CAAC,CAAA;AAC9CiB,UAAAA,KAAK,GAAGY,MAAM,CAACC,UAAU,EAAE,CAAA;AAAAR,UAAAA,SAAA,CAAAE,IAAA,GAAA,CAAA,CAAA;AAAA,UAAA,OACPZ,KAAK,CAACmB,QAAQ,CAAClB,KAAK,EAAE;AAC9CI,YAAAA,KAAK,EAALA,KAAAA;AACD,WAAA,CAAC,CAAA;AAAA,QAAA,KAAA,CAAA;UAFIC,WAAW,GAAAI,SAAA,CAAAI,IAAA,CAAA;UAIjB,IAAI,CAACV,WAAW,EAAE;AAChBA,YAAAA,WAAW,GAAGW,OAAO,CAACC,GAAG,CAAC5B,qBAAqB,CAAC,GAAG6B,MAAM,CAACC,UAAU,EAAE,CAAA;AACtEE,YAAAA,OAAO,CAACC,IAAI,CAAC,6FAA6F,CAAC,CAAA;AAC7G,WAAA;AAACX,UAAAA,SAAA,CAAAE,IAAA,GAAA,EAAA,CAAA;AAAA,UAAA,OAE0BU,UAAY,CAAC;YACtCC,SAAS,EAAEvB,KAAK,CAACwB,IAAI;AACrBvB,YAAAA,KAAK,EAALA,KAAK;AACLC,YAAAA,MAAM,EAANA,MAAM;YACNuB,MAAM,EAAEnB,WAAW,CAACoB,IAAI;AACxBtB,YAAAA,WAAW,EAAXA,WAAW;AACXC,YAAAA,KAAK,EAALA,KAAK;AACLF,YAAAA,QAAQ,EAARA,QAAAA;AACD,WAAA,CAAC,CAAA;AAAA,QAAA,KAAA,EAAA;UARII,YAAY,GAAAG,SAAA,CAAAI,IAAA,CAAA;AAAA,UAAA,OAAAJ,SAAA,CAAAiB,MAAA,CAAA,QAAA,EAUXpB,YAAY,CAAA,CAAA;AAAA,QAAA,KAAA,EAAA,CAAA;AAAA,QAAA,KAAA,KAAA;UAAA,OAAAG,SAAA,CAAAkB,IAAA,EAAA,CAAA;AAAA,OAAA;AAAA,KAAA,EAAA7B,QAAA,CAAA,CAAA;GACpB,CAAA,CAAA,CAAA;AAAA,EAAA,OAAAN,SAAA,CAAAC,KAAA,CAAA,IAAA,EAAAC,SAAA,CAAA,CAAA;AAAA,CAAA;AAEM,IAAMkB,kBAAkB,gBAAA,YAAA;EAAA,IAAAgB,IAAA,gBAAAjC,iBAAA,cAAAC,mBAAA,EAAAC,CAAAA,IAAA,CAAG,SAAAgC,OAAA,GAAA;AAAA,IAAA,IAAAC,KAAA,EAAAC,cAAA,EAAAC,MAAA,EAAAC,MAAA,CAAA;AAAA,IAAA,OAAArC,mBAAA,EAAA,CAAAW,IAAA,CAAA,SAAA2B,SAAAC,QAAA,EAAA;AAAA,MAAA,OAAA,CAAA,EAAA,QAAAA,QAAA,CAAAzB,IAAA,GAAAyB,QAAA,CAAAxB,IAAA;AAAA,QAAA,KAAA,CAAA;AAAA,UAAA,IAAA,EAG5B,OAAOyB,MAAM,KAAK,WAAW,IAAIA,MAAM,CAACC,QAAQ,CAAA,EAAA;AAAAF,YAAAA,QAAA,CAAAxB,IAAA,GAAA,CAAA,CAAA;AAAA,YAAA,MAAA;AAAA,WAAA;AAClD;AACMmB,UAAAA,KAAK,GAAGM,MAAM,CAACC,QAAQ,EAAE,CAAA;UAAA,OAAAF,QAAA,CAAAT,MAAA,CACxB,QAAA,EAAA;YACLY,QAAQ,EAAER,KAAK,CAACS,eAAe;YAC/BC,QAAQ,EAAEV,KAAK,CAACU,QAAAA;WACjB,CAAA,CAAA;AAAA,QAAA,KAAA,CAAA;AAAAL,UAAAA,QAAA,CAAAzB,IAAA,GAAA,CAAA,CAAA;AAAAyB,UAAAA,QAAA,CAAAxB,IAAA,GAAA,CAAA,CAAA;UAAA,OAIoB,OAAO,QAAQ,CAAC,CAAA;AAAA,QAAA,KAAA,CAAA;UAA/BqB,MAAM,GAAAG,QAAA,CAAAtB,IAAA,CAAA;AAAA,UAAA,IAAA,EACR,OAAOmB,MAAM,KAAK,WAAW,IAAAD,CAAAA,cAAA,GAAIC,MAAM,CAACI,MAAM,KAAbL,IAAAA,IAAAA,cAAA,CAAeM,QAAQ,CAAA,EAAA;AAAAF,YAAAA,QAAA,CAAAxB,IAAA,GAAA,EAAA,CAAA;AAAA,YAAA,MAAA;AAAA,WAAA;AACpDmB,UAAAA,MAAK,GAAGE,MAAM,CAACI,MAAM,CAACC,QAAQ,EAAE,CAAA;UAAA,OAAAF,QAAA,CAAAT,MAAA,CAC/B,QAAA,EAAA;YACLY,QAAQ,EAAER,MAAK,CAACS,eAAe;YAC/BC,QAAQ,EAAEV,MAAK,CAACU,QAAAA;WACjB,CAAA,CAAA;AAAA,QAAA,KAAA,EAAA;AAAAL,UAAAA,QAAA,CAAAxB,IAAA,GAAA,EAAA,CAAA;AAAA,UAAA,MAAA;AAAA,QAAA,KAAA,EAAA;AAAAwB,UAAAA,QAAA,CAAAzB,IAAA,GAAA,EAAA,CAAA;UAAAyB,QAAA,CAAAM,EAAA,GAAAN,QAAA,CAAA,OAAA,CAAA,CAAA,CAAA,CAAA,CAAA;AAAA,QAAA,KAAA,EAAA;AAAA,UAAA,OAAAA,QAAA,CAAAT,MAAA,CAAA,QAAA,EAIE,IAAI,CAAA,CAAA;AAAA,QAAA,KAAA,EAAA,CAAA;AAAA,QAAA,KAAA,KAAA;UAAA,OAAAS,QAAA,CAAAR,IAAA,EAAA,CAAA;AAAA,OAAA;AAAA,KAAA,EAAAE,OAAA,EAAA,IAAA,EAAA,CAAA,CAAA,CAAA,EAAA,EAAA,CAAA,CAAA,CAAA,CAAA;GACZ,CAAA,CAAA,CAAA;AAAA,EAAA,OAAA,SAxBYjB,kBAAkBA,GAAA;AAAA,IAAA,OAAAgB,IAAA,CAAAnC,KAAA,CAAA,IAAA,EAAAC,SAAA,CAAA,CAAA;AAAA,GAAA,CAAA;AAAA,CAwB9B,EAAA;;AChDD,SAAsBgD,eAAeA,GAAA;AAAA,EAAA,OAAAC,gBAAA,CAAAlD,KAAA,CAAA,IAAA,EAAAC,SAAA,CAAA,CAAA;AAAA,CAAA;AAUpC,SAAAiD,gBAAA,GAAA;EAAAA,gBAAA,GAAAhD,iBAAA,cAAAC,mBAAA,GAAAC,IAAA,CAVM,SAAAgC,OAAA,GAAA;AAAA,IAAA,IAAAe,aAAA,CAAA;AAAA,IAAA,OAAAhD,mBAAA,EAAA,CAAAW,IAAA,CAAA,SAAA2B,SAAAC,QAAA,EAAA;AAAA,MAAA,OAAA,CAAA,EAAA,QAAAA,QAAA,CAAAzB,IAAA,GAAAyB,QAAA,CAAAxB,IAAA;AAAA,QAAA,KAAA,CAAA;UACCiC,aAAa,GAAGC,IAAI,CAAC/B,OAAO,CAACgC,GAAG,EAAE,EAAE,SAAS,CAAC,CAAA;UAEpD,IAAI;YACFC,SAAS,CAACH,aAAa,CAAC,CAAA;AAC1B,WAAC,CAAC,OAAOI,KAAK,EAAE,EAAC;AAEjBC,UAAAA,YAAY,CAACC,cAAc,CAACC,aAAa,EAAE,UAAAC,WAAW,EAAG;YACvDC,UAAU,CAACR,IAAI,CAACD,aAAa,EAAE,YAAY,CAAC,EAAEU,IAAI,CAACC,SAAS,CAACH,WAAW,CAAC,GAAG,IAAI,EAAE,YAAK,EAAG,CAAC,CAAA;AAC7F,WAAC,CAAC,CAAA;AAAC,QAAA,KAAA,CAAA,CAAA;AAAA,QAAA,KAAA,KAAA;UAAA,OAAAjB,QAAA,CAAAR,IAAA,EAAA,CAAA;AAAA,OAAA;AAAA,KAAA,EAAAE,OAAA,CAAA,CAAA;GACJ,CAAA,CAAA,CAAA;AAAA,EAAA,OAAAc,gBAAA,CAAAlD,KAAA,CAAA,IAAA,EAAAC,SAAA,CAAA,CAAA;AAAA,CAAA;AAED,SAAsB8D,WAAWA,GAAA;AAAA,EAAA,OAAAC,YAAA,CAAAhE,KAAA,CAAA,IAAA,EAAAC,SAAA,CAAA,CAAA;AAAA,CAAA;AAOhC,SAAA+D,YAAA,GAAA;EAAAA,YAAA,GAAA9D,iBAAA,cAAAC,mBAAA,GAAAC,IAAA,CAPM,SAAAC,QAAA,GAAA;AAAA,IAAA,IAAAK,WAAA,CAAA;AAAA,IAAA,OAAAP,mBAAA,EAAA,CAAAW,IAAA,CAAA,SAAAC,UAAAC,SAAA,EAAA;AAAA,MAAA,OAAA,CAAA,EAAA,QAAAA,SAAA,CAAAC,IAAA,GAAAD,SAAA,CAAAE,IAAA;AAAA,QAAA,KAAA,CAAA;AAAA,UAAA,IAAA,CACDG,OAAO,CAACC,GAAG,CAAC5B,qBAAqB,CAAC,EAAA;AAAAsB,YAAAA,SAAA,CAAAE,IAAA,GAAA,CAAA,CAAA;AAAA,YAAA,MAAA;AAAA,WAAA;AAAA,UAAA,MAC9B,IAAI+C,KAAK,CAAC,mEAAmE,CAAC,CAAA;AAAA,QAAA,KAAA,CAAA;AAGhFvD,UAAAA,WAAW,GAAGa,MAAM,CAACC,UAAU,EAAE,CAAA;AACvCH,UAAAA,OAAO,CAACC,GAAG,CAAC5B,qBAAqB,CAAC,GAAGgB,WAAW,CAAA;AAAC,QAAA,KAAA,CAAA,CAAA;AAAA,QAAA,KAAA,KAAA;UAAA,OAAAM,SAAA,CAAAkB,IAAA,EAAA,CAAA;AAAA,OAAA;AAAA,KAAA,EAAA7B,QAAA,CAAA,CAAA;GAClD,CAAA,CAAA,CAAA;AAAA,EAAA,OAAA2D,YAAA,CAAAhE,KAAA,CAAA,IAAA,EAAAC,SAAA,CAAA,CAAA;AAAA;;;;"}
|
package/dist/evaluation.d.ts
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { type Agent, type Metric } from '@mastra/core';
|
|
2
2
|
export declare function evaluate<T extends Agent>(agent: T, input: Parameters<T['generate']>[0], metric: Metric): Promise<import("@mastra/core").MetricResult>;
|
|
3
3
|
export declare const getCurrentTestInfo: () => Promise<{
|
|
4
|
-
testName:
|
|
5
|
-
testPath:
|
|
4
|
+
testName: any;
|
|
5
|
+
testPath: any;
|
|
6
6
|
} | null>;
|
|
7
7
|
//# sourceMappingURL=evaluation.d.ts.map
|
package/dist/evaluation.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"evaluation.d.ts","sourceRoot":"","sources":["../src/evaluation.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,KAAK,EAAE,KAAK,MAAM,EAA4B,MAAM,cAAc,CAAC;AAIjF,wBAAsB,QAAQ,CAAC,CAAC,SAAS,KAAK,EAAE,KAAK,EAAE,CAAC,EAAE,KAAK,EAAE,UAAU,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,MAAM,gDAwB5G;AAED,eAAO,MAAM,kBAAkB;;;
|
|
1
|
+
{"version":3,"file":"evaluation.d.ts","sourceRoot":"","sources":["../src/evaluation.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,KAAK,EAAE,KAAK,MAAM,EAA4B,MAAM,cAAc,CAAC;AAIjF,wBAAsB,QAAQ,CAAC,CAAC,SAAS,KAAK,EAAE,KAAK,EAAE,CAAC,EAAE,KAAK,EAAE,UAAU,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,MAAM,gDAwB5G;AAED,eAAO,MAAM,kBAAkB;;;SAwB9B,CAAC"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@mastra/evals",
|
|
3
|
-
"version": "0.1.0-alpha.
|
|
3
|
+
"version": "0.1.0-alpha.19",
|
|
4
4
|
"description": "",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/index.js",
|
|
@@ -42,27 +42,24 @@
|
|
|
42
42
|
"sentiment": "^5.0.2",
|
|
43
43
|
"string-similarity": "^4.0.4",
|
|
44
44
|
"zod": "^3.24.1",
|
|
45
|
-
"@mastra/core": "0.1.27-alpha.
|
|
45
|
+
"@mastra/core": "0.1.27-alpha.78"
|
|
46
46
|
},
|
|
47
47
|
"devDependencies": {
|
|
48
48
|
"@babel/preset-env": "^7.26.0",
|
|
49
49
|
"@babel/preset-typescript": "^7.26.0",
|
|
50
|
-
"@jest/globals": "^29.7.0",
|
|
51
50
|
"@tsconfig/recommended": "^1.0.7",
|
|
52
51
|
"@types/difflib": "^0.2.7",
|
|
53
52
|
"@types/fs-extra": "^11.0.4",
|
|
54
|
-
"@types/jest": "^29.5.12",
|
|
55
53
|
"@types/sentiment": "^5.0.4",
|
|
56
54
|
"@types/string-similarity": "^4.0.2",
|
|
57
55
|
"dts-cli": "^2.0.5",
|
|
58
|
-
"
|
|
59
|
-
"ts-jest": "^29.2.5"
|
|
56
|
+
"vitest": "^3.0.4"
|
|
60
57
|
},
|
|
61
58
|
"scripts": {
|
|
62
59
|
"build": "dts build && pnpm build:llm && pnpm build:nlp",
|
|
63
60
|
"build:llm": "dts build --name llm --entry ./src/metrics/llm/index.ts --target node --format esm --noClean",
|
|
64
61
|
"build:nlp": "dts build --name nlp --entry ./src/metrics/nlp/index.ts --target node --format esm --noClean",
|
|
65
62
|
"build:dev": "dts watch",
|
|
66
|
-
"test": "
|
|
63
|
+
"test": "vitest"
|
|
67
64
|
}
|
|
68
65
|
}
|
package/src/evaluation.test.ts
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
import { describe, expect, it } from '@jest/globals';
|
|
2
1
|
import { Agent } from '@mastra/core';
|
|
3
2
|
import { ModelConfig } from '@mastra/core';
|
|
4
3
|
import { Metric } from '@mastra/core';
|
|
4
|
+
import { describe, expect, it } from 'vitest';
|
|
5
5
|
|
|
6
6
|
import { evaluate } from './evaluation';
|
|
7
7
|
|
package/src/evaluation.ts
CHANGED
|
@@ -30,7 +30,9 @@ export async function evaluate<T extends Agent>(agent: T, input: Parameters<T['g
|
|
|
30
30
|
|
|
31
31
|
export const getCurrentTestInfo = async () => {
|
|
32
32
|
// Jest
|
|
33
|
+
// @ts-ignore
|
|
33
34
|
if (typeof expect !== 'undefined' && expect.getState) {
|
|
35
|
+
// @ts-ignore
|
|
34
36
|
const state = expect.getState();
|
|
35
37
|
return {
|
|
36
38
|
testName: state.currentTestName,
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { describe, it, expect, jest } from '@jest/globals';
|
|
2
1
|
import { type ModelConfig } from '@mastra/core';
|
|
2
|
+
import { describe, it, expect } from 'vitest';
|
|
3
3
|
|
|
4
4
|
import { TestCase } from '../utils';
|
|
5
5
|
|
|
@@ -91,7 +91,6 @@ const testCases: TestCase[] = [
|
|
|
91
91
|
];
|
|
92
92
|
|
|
93
93
|
const SECONDS = 10000;
|
|
94
|
-
jest.setTimeout(15 * SECONDS);
|
|
95
94
|
|
|
96
95
|
const modelConfig: ModelConfig = {
|
|
97
96
|
provider: 'OPEN_AI',
|
|
@@ -100,54 +99,60 @@ const modelConfig: ModelConfig = {
|
|
|
100
99
|
apiKey: process.env.OPENAI_API_KEY,
|
|
101
100
|
};
|
|
102
101
|
|
|
103
|
-
describe(
|
|
104
|
-
|
|
102
|
+
describe(
|
|
103
|
+
'AnswerRelevancyMetric',
|
|
104
|
+
() => {
|
|
105
|
+
const metric = new AnswerRelevancyMetric(modelConfig);
|
|
105
106
|
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
107
|
+
it('should be able to measure a prompt with perfect relevancy', async () => {
|
|
108
|
+
const result = await metric.measure(testCases[0].input, testCases[0].output);
|
|
109
|
+
expect(result.score).toBeCloseTo(testCases[0].expectedResult.score, 1);
|
|
110
|
+
});
|
|
110
111
|
|
|
111
112
|
it('should be able to measure a prompt with mostly relevant information', async () => {
|
|
112
113
|
const result = await metric.measure(testCases[1].input, testCases[1].output);
|
|
113
114
|
const expectedScore = testCases[1].expectedResult.score;
|
|
114
115
|
const difference = Math.abs(result.score - expectedScore);
|
|
115
116
|
|
|
116
|
-
expect(difference).toBeLessThanOrEqual(0.1);
|
|
117
|
-
});
|
|
118
|
-
|
|
119
|
-
it('should be able to measure a prompt with partial relevance', async () => {
|
|
120
|
-
const result = await metric.measure(testCases[2].input, testCases[2].output);
|
|
121
|
-
expect(result.score).toBeCloseTo(testCases[2].expectedResult.score, 1);
|
|
122
|
-
});
|
|
123
|
-
|
|
124
|
-
it('should be able to measure a prompt with low relevance', async () => {
|
|
125
|
-
const result = await metric.measure(testCases[3].input, testCases[3].output);
|
|
126
|
-
expect(result.score).toBeCloseTo(testCases[3].expectedResult.score, 1);
|
|
127
|
-
});
|
|
128
|
-
|
|
129
|
-
it('should be able to measure a prompt with empty output', async () => {
|
|
130
|
-
const result = await metric.measure(testCases[5].input, testCases[5].output);
|
|
131
|
-
expect(result.score).toBeCloseTo(testCases[5].expectedResult.score, 1);
|
|
132
|
-
});
|
|
133
|
-
|
|
134
|
-
it('should be able to measure a prompt with incorrect but relevant answer', async () => {
|
|
135
|
-
const result = await metric.measure(testCases[6].input, testCases[6].output);
|
|
136
|
-
expect(result.score).toBeCloseTo(testCases[6].expectedResult.score, 1);
|
|
117
|
+
expect(Math.round(difference * 10) / 10).toBeLessThanOrEqual(0.1);
|
|
137
118
|
});
|
|
138
119
|
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
120
|
+
it('should be able to measure a prompt with partial relevance', async () => {
|
|
121
|
+
const result = await metric.measure(testCases[2].input, testCases[2].output);
|
|
122
|
+
expect(result.score).toBeCloseTo(testCases[2].expectedResult.score, 1);
|
|
123
|
+
});
|
|
124
|
+
|
|
125
|
+
it('should be able to measure a prompt with low relevance', async () => {
|
|
126
|
+
const result = await metric.measure(testCases[3].input, testCases[3].output);
|
|
127
|
+
expect(result.score).toBeCloseTo(testCases[3].expectedResult.score, 1);
|
|
128
|
+
});
|
|
129
|
+
|
|
130
|
+
it('should be able to measure a prompt with empty output', async () => {
|
|
131
|
+
const result = await metric.measure(testCases[5].input, testCases[5].output);
|
|
132
|
+
expect(result.score).toBeCloseTo(testCases[5].expectedResult.score, 1);
|
|
133
|
+
});
|
|
134
|
+
|
|
135
|
+
it('should be able to measure a prompt with incorrect but relevant answer', async () => {
|
|
136
|
+
const result = await metric.measure(testCases[6].input, testCases[6].output);
|
|
137
|
+
expect(result.score).toBeCloseTo(testCases[6].expectedResult.score, 1);
|
|
138
|
+
});
|
|
139
|
+
|
|
140
|
+
it('should be able to measure a prompt with a single word correct answer', async () => {
|
|
141
|
+
const result = await metric.measure(testCases[7].input, testCases[7].output);
|
|
142
|
+
expect(result.score).toBeCloseTo(testCases[7].expectedResult.score, 1);
|
|
143
|
+
});
|
|
144
|
+
|
|
145
|
+
it('should be able to measure a prompt with multiple questions', async () => {
|
|
146
|
+
const result = await metric.measure(testCases[8].input, testCases[8].output);
|
|
147
|
+
expect(result.score).toBeCloseTo(testCases[8].expectedResult.score, 1);
|
|
148
|
+
});
|
|
149
|
+
|
|
150
|
+
it('should be able to measure a prompt with technical gibberish', async () => {
|
|
151
|
+
const result = await metric.measure(testCases[9].input, testCases[9].output);
|
|
152
|
+
expect(result.score).toBeCloseTo(testCases[9].expectedResult.score, 1);
|
|
153
|
+
});
|
|
154
|
+
},
|
|
155
|
+
{
|
|
156
|
+
timeout: 15 * SECONDS,
|
|
157
|
+
},
|
|
158
|
+
);
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { describe, it, expect, jest } from '@jest/globals';
|
|
2
1
|
import { type ModelConfig } from '@mastra/core';
|
|
2
|
+
import { describe, it, expect } from 'vitest';
|
|
3
3
|
|
|
4
4
|
import { isCloserTo } from '../utils';
|
|
5
5
|
import { TestCase } from '../utils';
|
|
@@ -42,7 +42,6 @@ const testCases: TestCase[] = [
|
|
|
42
42
|
];
|
|
43
43
|
|
|
44
44
|
const SECONDS = 10000;
|
|
45
|
-
jest.setTimeout(15 * SECONDS);
|
|
46
45
|
|
|
47
46
|
const modelConfig: ModelConfig = {
|
|
48
47
|
provider: 'OPEN_AI',
|
|
@@ -51,18 +50,20 @@ const modelConfig: ModelConfig = {
|
|
|
51
50
|
apiKey: process.env.OPENAI_API_KEY,
|
|
52
51
|
};
|
|
53
52
|
|
|
54
|
-
describe(
|
|
55
|
-
|
|
53
|
+
describe(
|
|
54
|
+
'BiasMetric',
|
|
55
|
+
() => {
|
|
56
|
+
const metric = new BiasMetric(modelConfig);
|
|
56
57
|
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
58
|
+
it('should be able to measure a prompt that is biased', async () => {
|
|
59
|
+
const result = await metric.measure(testCases[0].input, testCases[0].output);
|
|
60
|
+
expect(result.score).toBeCloseTo(testCases[0].expectedResult.score, 1);
|
|
61
|
+
});
|
|
61
62
|
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
63
|
+
it('should be able to measure a prompt that is almost not biased', async () => {
|
|
64
|
+
const result = await metric.measure(testCases[1].input, testCases[1].output);
|
|
65
|
+
expect(result.score).toBeLessThan(0.5);
|
|
66
|
+
});
|
|
66
67
|
|
|
67
68
|
it('should be able to measure a prompt that is mildly biased but actually not', async () => {
|
|
68
69
|
const result = await metric.measure(testCases[2].input, testCases[2].output);
|