vitest-evals 0.1.4 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +16 -11
- package/dist/compatibility.test.d.mts +2 -0
- package/dist/compatibility.test.d.ts +2 -0
- package/dist/compatibility.test.js +45009 -0
- package/dist/compatibility.test.js.map +1 -0
- package/dist/compatibility.test.mjs +45864 -0
- package/dist/compatibility.test.mjs.map +1 -0
- package/dist/formatScores.test.d.mts +2 -0
- package/dist/formatScores.test.d.ts +2 -0
- package/dist/formatScores.test.js +195 -0
- package/dist/formatScores.test.js.map +1 -0
- package/dist/formatScores.test.mjs +194 -0
- package/dist/formatScores.test.mjs.map +1 -0
- package/dist/index.d.mts +9 -1
- package/dist/index.d.ts +9 -1
- package/dist/index.js +18 -13
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +18 -13
- package/dist/index.mjs.map +1 -1
- package/dist/reporter.d.mts +7 -0
- package/dist/reporter.d.ts +7 -0
- package/dist/reporter.js +53 -0
- package/dist/reporter.js.map +1 -0
- package/dist/reporter.mjs +22 -0
- package/dist/reporter.mjs.map +1 -0
- package/dist/wrapText.test.d.mts +2 -0
- package/dist/wrapText.test.d.ts +2 -0
- package/dist/wrapText.test.js +162 -0
- package/dist/wrapText.test.js.map +1 -0
- package/dist/wrapText.test.mjs +161 -0
- package/dist/wrapText.test.mjs.map +1 -0
- package/package.json +27 -14
package/README.md
CHANGED
|
@@ -18,7 +18,7 @@ async function answerQuestion(prompt: string) {
|
|
|
18
18
|
const { text } = await generateText({
|
|
19
19
|
model,
|
|
20
20
|
prompt,
|
|
21
|
-
})
|
|
21
|
+
});
|
|
22
22
|
return text;
|
|
23
23
|
}
|
|
24
24
|
```
|
|
@@ -52,7 +52,7 @@ describeEval("my evals", {
|
|
|
52
52
|
|
|
53
53
|
// The timeout for each test. Defaults to 10s. You may need to increase this if your model
|
|
54
54
|
// provider has high latency or you're using a large number of scorers.
|
|
55
|
-
// timeout:
|
|
55
|
+
// timeout: 60000,
|
|
56
56
|
|
|
57
57
|
// A check to determine if these tests should run. This is helpful to control tests so they only
|
|
58
58
|
// in certain situations, for example if a model providers API key is defined.
|
|
@@ -70,7 +70,12 @@ import { Factuality } from "autoevals";
|
|
|
70
70
|
|
|
71
71
|
describe("my test suite", () => {
|
|
72
72
|
it("kind of works", () => {
|
|
73
|
-
expect("What is the capital of France?").toEval(
|
|
73
|
+
expect("What is the capital of France?").toEval(
|
|
74
|
+
"Paris",
|
|
75
|
+
answerQuestion,
|
|
76
|
+
Factuality,
|
|
77
|
+
0.8
|
|
78
|
+
);
|
|
74
79
|
});
|
|
75
80
|
});
|
|
76
81
|
```
|
|
@@ -81,28 +86,28 @@ Scorers are compatible with the `autoevals` interface, but are also simple to im
|
|
|
81
86
|
|
|
82
87
|
```javascript
|
|
83
88
|
export const Contains = async (opts: {
|
|
84
|
-
input: string
|
|
85
|
-
expected: string
|
|
86
|
-
output: string
|
|
89
|
+
input: string,
|
|
90
|
+
expected: string,
|
|
91
|
+
output: string,
|
|
87
92
|
}) => {
|
|
88
93
|
return {
|
|
89
94
|
score: output.indexOf(expected) !== -1 ? 1.0 : 0.0,
|
|
90
95
|
};
|
|
91
|
-
}
|
|
96
|
+
};
|
|
92
97
|
```
|
|
93
98
|
|
|
94
99
|
For something more realistic, here's a reimplementation of the Factuality scorer from `autoevals`, with some flexibility
|
|
95
100
|
on the model, enabling you to evaluate against multiple models:
|
|
96
101
|
|
|
97
|
-
|
|
102
|
+
````javascript
|
|
98
103
|
import { generateObject, type LanguageModel } from "ai";
|
|
99
104
|
import { z } from "zod";
|
|
100
105
|
|
|
101
106
|
/**
|
|
102
107
|
* A Factuality checker utilizing the `ai` SDK based on the implementation in `autoevals`.
|
|
103
|
-
*
|
|
108
|
+
*
|
|
104
109
|
* @param model - The language model to utilize (via `ai`).
|
|
105
|
-
*
|
|
110
|
+
*
|
|
106
111
|
* @example
|
|
107
112
|
* ```javascript
|
|
108
113
|
* import { openai } from "@ai-sdk/openai";
|
|
@@ -167,7 +172,7 @@ export function Factuality(model: LanguageModel) {
|
|
|
167
172
|
};
|
|
168
173
|
};
|
|
169
174
|
}
|
|
170
|
-
|
|
175
|
+
````
|
|
171
176
|
|
|
172
177
|
### Separating Evals
|
|
173
178
|
|