@mastra/evals 0.1.0-alpha.17 → 0.1.0-alpha.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +15 -0
- package/README.md +186 -0
- package/dist/evals.cjs.development.js +1 -0
- package/dist/evals.cjs.development.js.map +1 -1
- package/dist/evals.cjs.production.min.js.map +1 -1
- package/dist/evals.esm.js +1 -0
- package/dist/evals.esm.js.map +1 -1
- package/dist/evaluation.d.ts +2 -2
- package/dist/evaluation.d.ts.map +1 -1
- package/package.json +4 -7
- package/src/evaluation.test.ts +1 -1
- package/src/evaluation.ts +2 -0
- package/src/metrics/llm/answer-relevancy/index.test.ts +49 -44
- package/src/metrics/llm/bias/index.test.ts +13 -12
- package/src/metrics/llm/context-position/index.test.ts +92 -87
- package/src/metrics/llm/context-precision/index.test.ts +69 -64
- package/src/metrics/llm/context-relevancy/index.test.ts +27 -22
- package/src/metrics/llm/contextual-recall/index.test.ts +28 -23
- package/src/metrics/llm/faithfulness/index.test.ts +81 -76
- package/src/metrics/llm/hallucination/index.test.ts +85 -80
- package/src/metrics/llm/prompt-alignment/index.test.ts +53 -48
- package/src/metrics/llm/summarization/index.test.ts +85 -80
- package/src/metrics/llm/toxicity/index.test.ts +22 -17
- package/src/metrics/nlp/completeness/index.test.ts +1 -1
- package/src/metrics/nlp/content-similarity/index.test.ts +1 -1
- package/src/metrics/nlp/keyword-coverage/index.test.ts +1 -1
- package/src/metrics/nlp/textual-difference/index.test.ts +1 -1
- package/src/metrics/nlp/tone/index.test.ts +1 -1
- package/vitest.config.ts +9 -0
- package/jest.config.ts +0 -21
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { describe, it, expect, jest } from '@jest/globals';
|
|
2
1
|
import { type ModelConfig } from '@mastra/core';
|
|
2
|
+
import { describe, it, expect } from 'vitest';
|
|
3
3
|
|
|
4
4
|
import { TestCaseWithContext } from '../utils';
|
|
5
5
|
|
|
@@ -150,7 +150,6 @@ const testCases: TestCaseWithContext[] = [
|
|
|
150
150
|
];
|
|
151
151
|
|
|
152
152
|
const SECONDS = 10000;
|
|
153
|
-
jest.setTimeout(15 * SECONDS);
|
|
154
153
|
|
|
155
154
|
const modelConfig: ModelConfig = {
|
|
156
155
|
provider: 'OPEN_AI',
|
|
@@ -159,88 +158,94 @@ const modelConfig: ModelConfig = {
|
|
|
159
158
|
apiKey: process.env.OPENAI_API_KEY,
|
|
160
159
|
};
|
|
161
160
|
|
|
162
|
-
describe(
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
161
|
+
describe(
|
|
162
|
+
'ContextPositionMetric',
|
|
163
|
+
() => {
|
|
164
|
+
it('should handle perfect ordering with all relevant pieces', async () => {
|
|
165
|
+
const testCase = testCases[0]!;
|
|
166
|
+
const metric = new ContextPositionMetric(modelConfig, { context: testCase.context });
|
|
167
|
+
const result = await metric.measure(testCase.input, testCase.output);
|
|
168
|
+
expect(result.score).toBeCloseTo(testCase.expectedResult.score, 1);
|
|
169
|
+
});
|
|
170
|
+
|
|
171
|
+
it('should handle mixed relevance case', async () => {
|
|
172
|
+
const testCase = testCases[1]!;
|
|
173
|
+
const metric = new ContextPositionMetric(modelConfig, { context: testCase.context });
|
|
174
|
+
const result = await metric.measure(testCase.input, testCase.output);
|
|
175
|
+
expect(result.score).toBeCloseTo(testCase.expectedResult.score, 1);
|
|
176
|
+
});
|
|
177
|
+
|
|
178
|
+
it('should handle domain knowledge relevance', async () => {
|
|
179
|
+
const testCase = testCases[2]!;
|
|
180
|
+
const metric = new ContextPositionMetric(modelConfig, { context: testCase.context });
|
|
181
|
+
const result = await metric.measure(testCase.input, testCase.output);
|
|
182
|
+
expect(result.score).toBeCloseTo(testCase.expectedResult.score, 1);
|
|
183
|
+
});
|
|
184
|
+
|
|
185
|
+
it('should handle mixed relevance with good ordering', async () => {
|
|
186
|
+
const testCase = testCases[3]!;
|
|
187
|
+
const metric = new ContextPositionMetric(modelConfig, { context: testCase.context });
|
|
188
|
+
const result = await metric.measure(testCase.input, testCase.output);
|
|
189
|
+
expect(result.score).toBeCloseTo(testCase.expectedResult.score, 1);
|
|
190
|
+
});
|
|
191
|
+
|
|
192
|
+
it('should handle single relevant piece at start', async () => {
|
|
193
|
+
const testCase = testCases[4]!;
|
|
194
|
+
const metric = new ContextPositionMetric(modelConfig, { context: testCase.context });
|
|
195
|
+
const result = await metric.measure(testCase.input, testCase.output);
|
|
196
|
+
expect(result.score).toBeCloseTo(testCase.expectedResult.score, 1);
|
|
197
|
+
});
|
|
198
|
+
|
|
199
|
+
it('should handle single relevant piece in middle', async () => {
|
|
200
|
+
const testCase = testCases[5]!;
|
|
201
|
+
const metric = new ContextPositionMetric(modelConfig, { context: testCase.context });
|
|
202
|
+
const result = await metric.measure(testCase.input, testCase.output);
|
|
203
|
+
expect(result.score).toBeCloseTo(testCase.expectedResult.score, 1);
|
|
204
|
+
});
|
|
205
|
+
|
|
206
|
+
it('should handle single relevant piece at end', async () => {
|
|
207
|
+
const testCase = testCases[6]!;
|
|
208
|
+
const metric = new ContextPositionMetric(modelConfig, { context: testCase.context });
|
|
209
|
+
const result = await metric.measure(testCase.input, testCase.output);
|
|
210
|
+
expect(result.score).toBeCloseTo(testCase.expectedResult.score, 1);
|
|
211
|
+
});
|
|
212
|
+
|
|
213
|
+
it('should handle empty context', async () => {
|
|
214
|
+
const testCase = testCases[7]!;
|
|
215
|
+
const metric = new ContextPositionMetric(modelConfig, { context: testCase.context });
|
|
216
|
+
const result = await metric.measure(testCase.input, testCase.output);
|
|
217
|
+
expect(result.score).toBeCloseTo(testCase.expectedResult.score, 1);
|
|
218
|
+
});
|
|
219
|
+
|
|
220
|
+
it('should handle all irrelevant context', async () => {
|
|
221
|
+
const testCase = testCases[8]!;
|
|
222
|
+
const metric = new ContextPositionMetric(modelConfig, { context: testCase.context });
|
|
223
|
+
const result = await metric.measure(testCase.input, testCase.output);
|
|
224
|
+
expect(result.score).toBeCloseTo(testCase.expectedResult.score, 1);
|
|
225
|
+
});
|
|
226
|
+
|
|
227
|
+
it('should handle complex interdependent context', async () => {
|
|
228
|
+
const testCase = testCases[9]!;
|
|
229
|
+
const metric = new ContextPositionMetric(modelConfig, { context: testCase.context });
|
|
230
|
+
const result = await metric.measure(testCase.input, testCase.output);
|
|
231
|
+
expect(result.score).toBeCloseTo(testCase.expectedResult.score, 1);
|
|
232
|
+
});
|
|
233
|
+
|
|
234
|
+
it('should handle single piece context', async () => {
|
|
235
|
+
const testCase = testCases[10]!;
|
|
236
|
+
const metric = new ContextPositionMetric(modelConfig, { context: testCase.context });
|
|
237
|
+
const result = await metric.measure(testCase.input, testCase.output);
|
|
238
|
+
expect(result.score).toBeCloseTo(testCase.expectedResult.score, 1);
|
|
239
|
+
});
|
|
240
|
+
|
|
241
|
+
it('should handle two relevant pieces at end', async () => {
|
|
242
|
+
const testCase = testCases[11]!;
|
|
243
|
+
const metric = new ContextPositionMetric(modelConfig, { context: testCase.context });
|
|
244
|
+
const result = await metric.measure(testCase.input, testCase.output);
|
|
245
|
+
expect(result.score).toBeCloseTo(testCase.expectedResult.score, 1);
|
|
246
|
+
});
|
|
247
|
+
},
|
|
248
|
+
{
|
|
249
|
+
timeout: 15 * SECONDS,
|
|
250
|
+
},
|
|
251
|
+
);
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { describe, it, expect, jest } from '@jest/globals';
|
|
2
1
|
import { type ModelConfig } from '@mastra/core';
|
|
2
|
+
import { describe, it, expect } from 'vitest';
|
|
3
3
|
|
|
4
4
|
import { TestCaseWithContext } from '../utils';
|
|
5
5
|
|
|
@@ -127,7 +127,6 @@ const testCases: TestCaseWithContext[] = [
|
|
|
127
127
|
];
|
|
128
128
|
|
|
129
129
|
const SECONDS = 10000;
|
|
130
|
-
jest.setTimeout(15 * SECONDS);
|
|
131
130
|
|
|
132
131
|
const modelConfig: ModelConfig = {
|
|
133
132
|
provider: 'OPEN_AI',
|
|
@@ -136,74 +135,80 @@ const modelConfig: ModelConfig = {
|
|
|
136
135
|
apiKey: process.env.OPENAI_API_KEY,
|
|
137
136
|
};
|
|
138
137
|
|
|
139
|
-
describe(
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
138
|
+
describe(
|
|
139
|
+
'ContextPrecisionMetric',
|
|
140
|
+
() => {
|
|
141
|
+
it('should measure perfect context precision with all relevant items', async () => {
|
|
142
|
+
const testCase = testCases[0]!;
|
|
143
|
+
const metric = new ContextPrecisionMetric(modelConfig, { context: testCase.context });
|
|
144
|
+
const result = await metric.measure(testCase.input, testCase.output);
|
|
145
|
+
expect(result.score).toBeCloseTo(testCase.expectedResult.score, 1);
|
|
146
|
+
});
|
|
146
147
|
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
148
|
+
it('should measure high precision with irrelevant item at end', async () => {
|
|
149
|
+
const testCase = testCases[1]!;
|
|
150
|
+
const metric = new ContextPrecisionMetric(modelConfig, { context: testCase.context });
|
|
151
|
+
const result = await metric.measure(testCase.input, testCase.output);
|
|
152
|
+
expect(result.score).toBeCloseTo(testCase.expectedResult.score, 1);
|
|
153
|
+
});
|
|
153
154
|
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
155
|
+
it('should measure precision with two relevant items after irrelevant start', async () => {
|
|
156
|
+
const testCase = testCases[2]!;
|
|
157
|
+
const metric = new ContextPrecisionMetric(modelConfig, { context: testCase.context });
|
|
158
|
+
const result = await metric.measure(testCase.input, testCase.output);
|
|
159
|
+
expect(result.score).toBeCloseTo(testCase.expectedResult.score, 1);
|
|
160
|
+
});
|
|
160
161
|
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
162
|
+
it('should measure precision with alternating relevant items', async () => {
|
|
163
|
+
const testCase = testCases[3]!;
|
|
164
|
+
const metric = new ContextPrecisionMetric(modelConfig, { context: testCase.context });
|
|
165
|
+
const result = await metric.measure(testCase.input, testCase.output);
|
|
166
|
+
expect(result.score).toBeCloseTo(testCase.expectedResult.score, 1);
|
|
167
|
+
});
|
|
167
168
|
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
169
|
+
it('should measure precision with single relevant item at start', async () => {
|
|
170
|
+
const testCase = testCases[4]!;
|
|
171
|
+
const metric = new ContextPrecisionMetric(modelConfig, { context: testCase.context });
|
|
172
|
+
const result = await metric.measure(testCase.input, testCase.output);
|
|
173
|
+
expect(result.score).toBeCloseTo(testCase.expectedResult.score, 1);
|
|
174
|
+
});
|
|
174
175
|
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
176
|
+
it('should handle completely irrelevant context', async () => {
|
|
177
|
+
const testCase = testCases[5]!;
|
|
178
|
+
const metric = new ContextPrecisionMetric(modelConfig, { context: testCase.context });
|
|
179
|
+
const result = await metric.measure(testCase.input, testCase.output);
|
|
180
|
+
expect(result.score).toBeCloseTo(testCase.expectedResult.score, 1);
|
|
181
|
+
});
|
|
181
182
|
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
183
|
+
it('should handle single relevant context perfectly', async () => {
|
|
184
|
+
const testCase = testCases[6]!;
|
|
185
|
+
const metric = new ContextPrecisionMetric(modelConfig, { context: testCase.context });
|
|
186
|
+
const result = await metric.measure(testCase.input, testCase.output);
|
|
187
|
+
expect(result.score).toBeCloseTo(testCase.expectedResult.score, 1);
|
|
188
|
+
});
|
|
188
189
|
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
190
|
+
it('should measure precision with single relevant item at end', async () => {
|
|
191
|
+
const testCase = testCases[7]!;
|
|
192
|
+
const metric = new ContextPrecisionMetric(modelConfig, { context: testCase.context });
|
|
193
|
+
const result = await metric.measure(testCase.input, testCase.output);
|
|
194
|
+
expect(result.score).toBeCloseTo(testCase.expectedResult.score, 1);
|
|
195
|
+
});
|
|
195
196
|
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
197
|
+
it('should handle empty context', async () => {
|
|
198
|
+
const testCase = testCases[8]!;
|
|
199
|
+
const metric = new ContextPrecisionMetric(modelConfig, { context: testCase.context });
|
|
200
|
+
const result = await metric.measure(testCase.input, testCase.output);
|
|
201
|
+
expect(result.score).toBeCloseTo(testCase.expectedResult.score, 1);
|
|
202
|
+
});
|
|
202
203
|
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
}
|
|
204
|
+
it('should handle single irrelevant context', async () => {
|
|
205
|
+
const testCase = testCases[9]!;
|
|
206
|
+
const metric = new ContextPrecisionMetric(modelConfig, { context: testCase.context });
|
|
207
|
+
const result = await metric.measure(testCase.input, testCase.output);
|
|
208
|
+
expect(result.score).toBeCloseTo(testCase.expectedResult.score, 1);
|
|
209
|
+
});
|
|
210
|
+
},
|
|
211
|
+
{
|
|
212
|
+
timeout: 15 * SECONDS,
|
|
213
|
+
},
|
|
214
|
+
);
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { describe, it, expect, jest } from '@jest/globals';
|
|
2
1
|
import { type ModelConfig } from '@mastra/core';
|
|
2
|
+
import { describe, it, expect } from 'vitest';
|
|
3
3
|
|
|
4
4
|
import { isCloserTo } from '../utils';
|
|
5
5
|
import { TestCaseWithContext } from '../utils';
|
|
@@ -55,7 +55,6 @@ const testCases: TestCaseWithContext[] = [
|
|
|
55
55
|
];
|
|
56
56
|
|
|
57
57
|
const SECONDS = 10000;
|
|
58
|
-
jest.setTimeout(15 * SECONDS);
|
|
59
58
|
|
|
60
59
|
const modelConfig: ModelConfig = {
|
|
61
60
|
provider: 'OPEN_AI',
|
|
@@ -64,25 +63,31 @@ const modelConfig: ModelConfig = {
|
|
|
64
63
|
apiKey: process.env.OPENAI_API_KEY,
|
|
65
64
|
};
|
|
66
65
|
|
|
67
|
-
describe(
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
66
|
+
describe(
|
|
67
|
+
'ContextPrecisionMetric',
|
|
68
|
+
() => {
|
|
69
|
+
it('should measure perfect context relevancy with all relevant items', async () => {
|
|
70
|
+
const testCase = testCases[0]!;
|
|
71
|
+
const metric = new ContextRelevancyMetric(modelConfig, { context: testCase.context });
|
|
72
|
+
const result = await metric.measure(testCase.input, testCase.output);
|
|
73
|
+
expect(result.score).toBeCloseTo(testCase.expectedResult.score, 1);
|
|
74
|
+
});
|
|
74
75
|
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
76
|
+
it('should measure mixed relevancy where only some contexts are relevant', async () => {
|
|
77
|
+
const testCase = testCases[1]!;
|
|
78
|
+
const metric = new ContextRelevancyMetric(modelConfig, { context: testCase.context });
|
|
79
|
+
const result = await metric.measure(testCase.input, testCase.output);
|
|
80
|
+
expect(isCloserTo(result.score, testCase.expectedResult.score, 0)).toBe(true);
|
|
81
|
+
});
|
|
81
82
|
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
}
|
|
83
|
+
it('should measure no relevancy where contexts are completely unrelated', async () => {
|
|
84
|
+
const testCase = testCases[2]!;
|
|
85
|
+
const metric = new ContextRelevancyMetric(modelConfig, { context: testCase.context });
|
|
86
|
+
const result = await metric.measure(testCase.input, testCase.output);
|
|
87
|
+
expect(result.score).toBeCloseTo(testCase.expectedResult.score, 1);
|
|
88
|
+
});
|
|
89
|
+
},
|
|
90
|
+
{
|
|
91
|
+
timeout: 15 * SECONDS,
|
|
92
|
+
},
|
|
93
|
+
);
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { describe, it, expect, jest } from '@jest/globals';
|
|
2
1
|
import { type ModelConfig } from '@mastra/core';
|
|
2
|
+
import { describe, it, expect } from 'vitest';
|
|
3
3
|
|
|
4
4
|
import { isCloserTo } from '../utils';
|
|
5
5
|
import { TestCaseWithContext } from '../utils';
|
|
@@ -51,7 +51,6 @@ const testCases: TestCaseWithContext[] = [
|
|
|
51
51
|
];
|
|
52
52
|
|
|
53
53
|
const SECONDS = 10000;
|
|
54
|
-
jest.setTimeout(15 * SECONDS);
|
|
55
54
|
|
|
56
55
|
const modelConfig: ModelConfig = {
|
|
57
56
|
provider: 'OPEN_AI',
|
|
@@ -60,27 +59,33 @@ const modelConfig: ModelConfig = {
|
|
|
60
59
|
apiKey: process.env.OPENAI_API_KEY,
|
|
61
60
|
};
|
|
62
61
|
|
|
63
|
-
describe(
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
62
|
+
describe(
|
|
63
|
+
'ContextualRecallMetric',
|
|
64
|
+
() => {
|
|
65
|
+
it('should succeed when context is relevant', async () => {
|
|
66
|
+
const testCase = testCases[0]!;
|
|
67
|
+
const metric = new ContextualRecallMetric(modelConfig, { context: testCase.context });
|
|
68
|
+
const result = await metric.measure(testCase.input, testCase.output);
|
|
69
|
+
expect(result.score).toBeCloseTo(testCase.expectedResult.score, 2);
|
|
70
|
+
});
|
|
70
71
|
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
72
|
+
it('should be mixed', async () => {
|
|
73
|
+
const testCase = testCases[1]!;
|
|
74
|
+
const metric = new ContextualRecallMetric(modelConfig, { context: testCase.context });
|
|
75
|
+
const result = await metric.measure(testCase.input, testCase.output);
|
|
75
76
|
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
77
|
+
expect(isCloserTo(result.score, testCase.expectedResult.score, 1)).toBe(true);
|
|
78
|
+
expect(result.score - testCase.expectedResult.score).toBeGreaterThan(0);
|
|
79
|
+
});
|
|
79
80
|
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
}
|
|
81
|
+
it('should be none', async () => {
|
|
82
|
+
const testCase = testCases[2]!;
|
|
83
|
+
const metric = new ContextualRecallMetric(modelConfig, { context: testCase.context });
|
|
84
|
+
const result = await metric.measure(testCase.input, testCase.output);
|
|
85
|
+
expect(result.score).toBeCloseTo(testCase.expectedResult.score, 1);
|
|
86
|
+
});
|
|
87
|
+
},
|
|
88
|
+
{
|
|
89
|
+
timeout: 15 * SECONDS,
|
|
90
|
+
},
|
|
91
|
+
);
|