@mastra/evals 0.1.0-alpha.22 → 0.1.0-alpha.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,19 @@
1
1
  # @mastra/evals
2
2
 
3
+ ## 0.1.0-alpha.24
4
+
5
+ ### Patch Changes
6
+
7
+ - Updated dependencies [73d112c]
8
+ - @mastra/core@0.1.27-alpha.82
9
+
10
+ ## 0.1.0-alpha.23
11
+
12
+ ### Patch Changes
13
+
14
+ - Updated dependencies [9fb3039]
15
+ - @mastra/core@0.1.27-alpha.81
16
+
3
17
  ## 0.1.0-alpha.22
4
18
 
5
19
  ### Patch Changes
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mastra/evals",
3
- "version": "0.1.0-alpha.22",
3
+ "version": "0.1.0-alpha.24",
4
4
  "description": "",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -38,7 +38,7 @@
38
38
  "sentiment": "^5.0.2",
39
39
  "string-similarity": "^4.0.4",
40
40
  "zod": "^3.24.1",
41
- "@mastra/core": "0.1.27-alpha.80"
41
+ "@mastra/core": "0.1.27-alpha.82"
42
42
  },
43
43
  "devDependencies": {
44
44
  "@babel/preset-env": "^7.26.0",
@@ -109,13 +109,13 @@ describe(
109
109
  expect(result.score).toBeCloseTo(testCases[0].expectedResult.score, 1);
110
110
  });
111
111
 
112
- it('should be able to measure a prompt with mostly relevant information', async () => {
113
- const result = await metric.measure(testCases[1].input, testCases[1].output);
114
- const expectedScore = testCases[1].expectedResult.score;
115
- const difference = Math.abs(result.score - expectedScore);
112
+ it('should be able to measure a prompt with mostly relevant information', async () => {
113
+ const result = await metric.measure(testCases[1].input, testCases[1].output);
114
+ const expectedScore = testCases[1].expectedResult.score;
115
+ const difference = Math.abs(result.score - expectedScore);
116
116
 
117
- expect(Math.round(difference * 10) / 10).toBeLessThanOrEqual(0.1);
118
- });
117
+ expect(Math.round(difference * 10) / 10).toBeLessThanOrEqual(0.1);
118
+ });
119
119
 
120
120
  it('should be able to measure a prompt with partial relevance', async () => {
121
121
  const result = await metric.measure(testCases[2].input, testCases[2].output);
@@ -50,28 +50,26 @@ const modelConfig: ModelConfig = {
50
50
  apiKey: process.env.OPENAI_API_KEY,
51
51
  };
52
52
 
53
- describe(
54
- 'BiasMetric',
55
- () => {
56
- const metric = new BiasMetric(modelConfig);
53
+ describe('BiasMetric', () => {
54
+ const metric = new BiasMetric(modelConfig);
57
55
 
58
- it('should be able to measure a prompt that is biased', async () => {
59
- const result = await metric.measure(testCases[0].input, testCases[0].output);
60
- expect(result.score).toBeCloseTo(testCases[0].expectedResult.score, 1);
61
- });
56
+ it('should be able to measure a prompt that is biased', async () => {
57
+ const result = await metric.measure(testCases[0].input, testCases[0].output);
58
+ expect(result.score).toBeCloseTo(testCases[0].expectedResult.score, 1);
59
+ }, 10000);
62
60
 
63
- it('should be able to measure a prompt that is almost not biased', async () => {
64
- const result = await metric.measure(testCases[1].input, testCases[1].output);
65
- expect(result.score).toBeLessThan(0.5);
66
- });
61
+ it('should be able to measure a prompt that is almost not biased', async () => {
62
+ const result = await metric.measure(testCases[1].input, testCases[1].output);
63
+ expect(result.score).toBeLessThan(0.5);
64
+ }, 10000);
67
65
 
68
66
  it('should be able to measure a prompt that is mildly biased but actually not', async () => {
69
67
  const result = await metric.measure(testCases[2].input, testCases[2].output);
70
68
  expect(result.score).toBe(0);
71
- });
69
+ }, 10000);
72
70
 
73
71
  it('should be able to measure a prompt that is mildly biased', async () => {
74
72
  const result = await metric.measure(testCases[3].input, testCases[3].output);
75
73
  expect(result.score).toBeLessThan(0.8);
76
- });
74
+ }, 10000);
77
75
  });