@mastra/evals 0.1.0-alpha.17 → 0.1.0-alpha.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,5 @@
1
- import { describe, it, expect, jest } from '@jest/globals';
2
1
  import { type ModelConfig } from '@mastra/core';
2
+ import { describe, it, expect } from 'vitest';
3
3
 
4
4
  import { TestCase } from '../utils';
5
5
 
@@ -166,7 +166,6 @@ const testCases: TestCase[] = [
166
166
  ];
167
167
 
168
168
  const SECONDS = 10000;
169
- jest.setTimeout(15 * SECONDS);
170
169
 
171
170
  const modelConfig: ModelConfig = {
172
171
  provider: 'OPEN_AI',
@@ -175,96 +174,102 @@ const modelConfig: ModelConfig = {
175
174
  apiKey: process.env.OPENAI_API_KEY,
176
175
  };
177
176
 
178
- describe('SummarizationMetric', () => {
179
- const metric = new SummarizationMetric(modelConfig);
177
+ describe(
178
+ 'SummarizationMetric',
179
+ () => {
180
+ const metric = new SummarizationMetric(modelConfig);
180
181
 
181
- it('should handle perfect summarization', async () => {
182
- const testCase = testCases[0]!;
183
- const result = await metric.measure(testCase.input, testCase.output);
184
- expect(result.score).toBeCloseTo(testCase.expectedResult.score, 1);
185
- });
182
+ it('should handle perfect summarization', async () => {
183
+ const testCase = testCases[0]!;
184
+ const result = await metric.measure(testCase.input, testCase.output);
185
+ expect(result.score).toBeCloseTo(testCase.expectedResult.score, 1);
186
+ });
186
187
 
187
- it('should handle mixed accuracy with contradictions', async () => {
188
- const testCase = testCases[1]!;
189
- const result = await metric.measure(testCase.input, testCase.output);
190
- expect(result.score).toBeCloseTo(testCase.expectedResult.score, 1);
191
- });
188
+ it('should handle mixed accuracy with contradictions', async () => {
189
+ const testCase = testCases[1]!;
190
+ const result = await metric.measure(testCase.input, testCase.output);
191
+ expect(result.score).toBeCloseTo(testCase.expectedResult.score, 1);
192
+ });
192
193
 
193
- it('should handle missing key information', async () => {
194
- const testCase = testCases[2]!;
195
- const result = await metric.measure(testCase.input, testCase.output);
196
- expect(result.score).toBeCloseTo(testCase.expectedResult.score, 1);
197
- });
194
+ it('should handle missing key information', async () => {
195
+ const testCase = testCases[2]!;
196
+ const result = await metric.measure(testCase.input, testCase.output);
197
+ expect(result.score).toBeCloseTo(testCase.expectedResult.score, 1);
198
+ });
198
199
 
199
- it('should handle empty output', async () => {
200
- const testCase = testCases[3]!;
201
- const result = await metric.measure(testCase.input, testCase.output);
202
- expect(result.score).toBe(testCase.expectedResult.score);
203
- });
200
+ it('should handle empty output', async () => {
201
+ const testCase = testCases[3]!;
202
+ const result = await metric.measure(testCase.input, testCase.output);
203
+ expect(result.score).toBe(testCase.expectedResult.score);
204
+ });
204
205
 
205
- it('should handle speculative additions', async () => {
206
- const testCase = testCases[4]!;
207
- const result = await metric.measure(testCase.input, testCase.output);
208
- expect(result.score).toBeCloseTo(testCase.expectedResult.score, 1);
209
- });
206
+ it('should handle speculative additions', async () => {
207
+ const testCase = testCases[4]!;
208
+ const result = await metric.measure(testCase.input, testCase.output);
209
+ expect(result.score).toBeCloseTo(testCase.expectedResult.score, 1);
210
+ });
210
211
 
211
- it('should handle incorrect emphasis', async () => {
212
- const testCase = testCases[5]!;
213
- const result = await metric.measure(testCase.input, testCase.output);
214
- expect(result.score).toBeCloseTo(testCase.expectedResult.score, 1);
215
- });
212
+ it('should handle incorrect emphasis', async () => {
213
+ const testCase = testCases[5]!;
214
+ const result = await metric.measure(testCase.input, testCase.output);
215
+ expect(result.score).toBeCloseTo(testCase.expectedResult.score, 1);
216
+ });
216
217
 
217
- it('should handle technical accuracy with missing context', async () => {
218
- const testCase = testCases[6]!;
219
- const result = await metric.measure(testCase.input, testCase.output);
220
- expect(result.score).toBeCloseTo(testCase.expectedResult.score, 1);
221
- });
218
+ it('should handle technical accuracy with missing context', async () => {
219
+ const testCase = testCases[6]!;
220
+ const result = await metric.measure(testCase.input, testCase.output);
221
+ expect(result.score).toBeCloseTo(testCase.expectedResult.score, 1);
222
+ });
222
223
 
223
- it('should handle numerical approximation', async () => {
224
- const testCase = testCases[7]!;
225
- const result = await metric.measure(testCase.input, testCase.output);
226
- expect(result.score).toBeCloseTo(testCase.expectedResult.score, 1);
227
- });
224
+ it('should handle numerical approximation', async () => {
225
+ const testCase = testCases[7]!;
226
+ const result = await metric.measure(testCase.input, testCase.output);
227
+ expect(result.score).toBeCloseTo(testCase.expectedResult.score, 1);
228
+ });
228
229
 
229
- it('should handle mixed tenses', async () => {
230
- const testCase = testCases[8]!;
231
- const result = await metric.measure(testCase.input, testCase.output);
232
- expect(result.score).toBeCloseTo(testCase.expectedResult.score, 1);
233
- });
230
+ it('should handle mixed tenses', async () => {
231
+ const testCase = testCases[8]!;
232
+ const result = await metric.measure(testCase.input, testCase.output);
233
+ expect(result.score).toBeCloseTo(testCase.expectedResult.score, 1);
234
+ });
234
235
 
235
- it('should handle subjective interpretation', async () => {
236
- const testCase = testCases[9]!;
237
- const result = await metric.measure(testCase.input, testCase.output);
238
- expect(result.score).toBeCloseTo(testCase.expectedResult.score, 1);
239
- });
236
+ it('should handle subjective interpretation', async () => {
237
+ const testCase = testCases[9]!;
238
+ const result = await metric.measure(testCase.input, testCase.output);
239
+ expect(result.score).toBeCloseTo(testCase.expectedResult.score, 1);
240
+ });
240
241
 
241
- it('should handle high alignment with low coverage', async () => {
242
- const testCase = testCases[10]!;
243
- const result = await metric.measure(testCase.input, testCase.output);
244
- expect(result.score).toBeCloseTo(testCase.expectedResult.score, 1);
245
- });
242
+ it('should handle high alignment with low coverage', async () => {
243
+ const testCase = testCases[10]!;
244
+ const result = await metric.measure(testCase.input, testCase.output);
245
+ expect(result.score).toBeCloseTo(testCase.expectedResult.score, 1);
246
+ });
246
247
 
247
- it('should handle low alignment with high coverage', async () => {
248
- const testCase = testCases[11]!;
249
- const result = await metric.measure(testCase.input, testCase.output);
250
- expect(result.score).toBeCloseTo(testCase.expectedResult.score, 1);
251
- });
248
+ it('should handle low alignment with high coverage', async () => {
249
+ const testCase = testCases[11]!;
250
+ const result = await metric.measure(testCase.input, testCase.output);
251
+ expect(result.score).toBeCloseTo(testCase.expectedResult.score, 1);
252
+ });
252
253
 
253
- it('should handle single word summary', async () => {
254
- const testCase = testCases[12]!;
255
- const result = await metric.measure(testCase.input, testCase.output);
256
- expect(result.score).toBeCloseTo(testCase.expectedResult.score, 1);
257
- });
254
+ it('should handle single word summary', async () => {
255
+ const testCase = testCases[12]!;
256
+ const result = await metric.measure(testCase.input, testCase.output);
257
+ expect(result.score).toBeCloseTo(testCase.expectedResult.score, 1);
258
+ });
258
259
 
259
- it('should handle repetitive summary', async () => {
260
- const testCase = testCases[13]!;
261
- const result = await metric.measure(testCase.input, testCase.output);
262
- expect(result.score).toBeCloseTo(testCase.expectedResult.score, 1);
263
- });
260
+ it('should handle repetitive summary', async () => {
261
+ const testCase = testCases[13]!;
262
+ const result = await metric.measure(testCase.input, testCase.output);
263
+ expect(result.score).toBeCloseTo(testCase.expectedResult.score, 1);
264
+ });
264
265
 
265
- it('should handle overly verbose summary', async () => {
266
- const testCase = testCases[14]!;
267
- const result = await metric.measure(testCase.input, testCase.output);
268
- expect(result.score).toBeCloseTo(testCase.expectedResult.score, 1);
269
- });
270
- });
266
+ it('should handle overly verbose summary', async () => {
267
+ const testCase = testCases[14]!;
268
+ const result = await metric.measure(testCase.input, testCase.output);
269
+ expect(result.score).toBeCloseTo(testCase.expectedResult.score, 1);
270
+ });
271
+ },
272
+ {
273
+ timeout: 15 * SECONDS,
274
+ },
275
+ );
@@ -1,5 +1,5 @@
1
- import { describe, it, expect, jest } from '@jest/globals';
2
1
  import { type ModelConfig } from '@mastra/core';
2
+ import { describe, it, expect } from 'vitest';
3
3
 
4
4
  import { TestCase } from '../utils';
5
5
 
@@ -35,7 +35,6 @@ const testCases: TestCase[] = [
35
35
  ];
36
36
 
37
37
  const SECONDS = 10000;
38
- jest.setTimeout(15 * SECONDS);
39
38
 
40
39
  const modelConfig: ModelConfig = {
41
40
  provider: 'OPEN_AI',
@@ -44,24 +43,30 @@ const modelConfig: ModelConfig = {
44
43
  apiKey: process.env.OPENAI_API_KEY,
45
44
  };
46
45
 
47
- describe('ToxicityMetric', () => {
48
- const metric = new ToxicityMetric(modelConfig);
46
+ describe(
47
+ 'ToxicityMetric',
48
+ () => {
49
+ const metric = new ToxicityMetric(modelConfig);
49
50
 
50
- it('should be able to measure a prompt that is toxic', async () => {
51
- const result = await metric.measure(testCases[0].input, testCases[0].output);
51
+ it('should be able to measure a prompt that is toxic', async () => {
52
+ const result = await metric.measure(testCases[0].input, testCases[0].output);
52
53
 
53
- expect(result.score).toBeCloseTo(testCases[0].expectedResult.score, 1);
54
- });
54
+ expect(result.score).toBeCloseTo(testCases[0].expectedResult.score, 1);
55
+ });
55
56
 
56
- it('should be able to measure a prompt that is not toxic', async () => {
57
- const result = await metric.measure(testCases[1].input, testCases[1].output);
57
+ it('should be able to measure a prompt that is not toxic', async () => {
58
+ const result = await metric.measure(testCases[1].input, testCases[1].output);
58
59
 
59
- expect(result.score).toBeCloseTo(testCases[1].expectedResult.score, 1);
60
- });
60
+ expect(result.score).toBeCloseTo(testCases[1].expectedResult.score, 1);
61
+ });
61
62
 
62
- it('should be able to measure a prompt that is midly toxic', async () => {
63
- const result = await metric.measure(testCases[2].input, testCases[2].output);
63
+ it('should be able to measure a prompt that is midly toxic', async () => {
64
+ const result = await metric.measure(testCases[2].input, testCases[2].output);
64
65
 
65
- expect(result.score).toBeCloseTo(testCases[2].expectedResult.score, 1);
66
- });
67
- });
66
+ expect(result.score).toBeCloseTo(testCases[2].expectedResult.score, 1);
67
+ });
68
+ },
69
+ {
70
+ timeout: 15 * SECONDS,
71
+ },
72
+ );
@@ -1,4 +1,4 @@
1
- import { describe, it, expect, beforeEach } from '@jest/globals';
1
+ import { describe, it, expect, beforeEach } from 'vitest';
2
2
 
3
3
  import { CompletenessMetric } from './index';
4
4
 
@@ -1,4 +1,4 @@
1
- import { describe, it, expect } from '@jest/globals';
1
+ import { describe, it, expect } from 'vitest';
2
2
 
3
3
  import { ContentSimilarityMetric } from './index';
4
4
 
@@ -1,4 +1,4 @@
1
- import { describe, it, expect } from '@jest/globals';
1
+ import { describe, it, expect } from 'vitest';
2
2
 
3
3
  import { KeywordCoverageMetric } from './index';
4
4
 
@@ -1,4 +1,4 @@
1
- import { describe, it, expect } from '@jest/globals';
1
+ import { describe, it, expect } from 'vitest';
2
2
 
3
3
  import { TextualDifferenceMetric } from './index';
4
4
 
@@ -1,4 +1,4 @@
1
- import { describe, it, expect } from '@jest/globals';
1
+ import { describe, it, expect } from 'vitest';
2
2
 
3
3
  import { ToneConsistencyMetric } from './index';
4
4
 
@@ -0,0 +1,9 @@
1
+ import { defineConfig } from 'vitest/config';
2
+
3
+ export default defineConfig({
4
+ test: {
5
+ environment: 'node',
6
+ include: ['src/**/*.test.ts'],
7
+ exclude: ['**/node_modules/**', '**/dist/**'],
8
+ },
9
+ });
package/jest.config.ts DELETED
@@ -1,21 +0,0 @@
1
- import { config } from 'dotenv';
2
-
3
- config();
4
-
5
- export default {
6
- maxWorkers: 1,
7
- preset: 'ts-jest',
8
- extensionsToTreatAsEsm: ['.ts'],
9
- moduleNameMapper: {
10
- '^(\\.{1,2}/.*)\\.js$': '$1',
11
- },
12
- transform: {
13
- '^.+\\.tsx?$': [
14
- 'ts-jest',
15
- {
16
- useESM: true,
17
- isolatedModules: true,
18
- },
19
- ],
20
- },
21
- };