vitest-evals 0.2.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/README.md +211 -172
  2. package/dist/index.d.mts +2 -98
  3. package/dist/index.d.ts +2 -98
  4. package/dist/index.js +270 -11
  5. package/dist/index.js.map +1 -1
  6. package/dist/index.mjs +269 -11
  7. package/dist/index.mjs.map +1 -1
  8. package/dist/scorers/index.d.mts +2 -0
  9. package/dist/scorers/index.d.ts +2 -0
  10. package/dist/scorers/index.js +282 -0
  11. package/dist/scorers/index.js.map +1 -0
  12. package/dist/scorers/index.mjs +256 -0
  13. package/dist/scorers/index.mjs.map +1 -0
  14. package/dist/scorers/toolCallScorer.d.mts +240 -0
  15. package/dist/scorers/toolCallScorer.d.ts +240 -0
  16. package/dist/scorers/toolCallScorer.js +280 -0
  17. package/dist/scorers/toolCallScorer.js.map +1 -0
  18. package/dist/scorers/toolCallScorer.mjs +256 -0
  19. package/dist/scorers/toolCallScorer.mjs.map +1 -0
  20. package/package.json +16 -4
  21. package/dist/compatibility.test.d.mts +0 -2
  22. package/dist/compatibility.test.d.ts +0 -2
  23. package/dist/compatibility.test.js +0 -45009
  24. package/dist/compatibility.test.js.map +0 -1
  25. package/dist/compatibility.test.mjs +0 -45864
  26. package/dist/compatibility.test.mjs.map +0 -1
  27. package/dist/formatScores.test.d.mts +0 -2
  28. package/dist/formatScores.test.d.ts +0 -2
  29. package/dist/formatScores.test.js +0 -195
  30. package/dist/formatScores.test.js.map +0 -1
  31. package/dist/formatScores.test.mjs +0 -194
  32. package/dist/formatScores.test.mjs.map +0 -1
  33. package/dist/wrapText.test.d.mts +0 -2
  34. package/dist/wrapText.test.d.ts +0 -2
  35. package/dist/wrapText.test.js +0 -162
  36. package/dist/wrapText.test.js.map +0 -1
  37. package/dist/wrapText.test.mjs +0 -161
  38. package/dist/wrapText.test.mjs.map +0 -1
package/README.md CHANGED
@@ -1,186 +1,246 @@
1
1
  # vitest-evals
2
2
 
3
- This project is a prototype of extending vitest to support basic _Evals_ functionality. Evals are a type of testing that is most commonly deployed to _evaluate_ the results of calls to language models. This allows you to utilize them with a pattern of testing you're familiar with, working well with your existing continuous integration toolchain.
3
+ Evaluate LLM outputs using the familiar Vitest testing framework.
4
4
 
5
- This is heavily inspired by [Evalite](https://www.evalite.dev/), but opts for a vitest-native approach to maximize the compatibility of the existing ecosystem. This means you can use it with your existing toolchain, including reporting such as code coverage and xunit.
6
-
7
- ## Use
5
+ ## Installation
8
6
 
9
7
  ```shell
10
8
  npm install -D vitest-evals
11
9
  ```
12
10
 
13
- You've likely already got a mechanism for passing the user input into your model, for example:
11
+ ## Quick Start
14
12
 
15
13
  ```javascript
16
- async function answerQuestion(prompt: string) {
17
- const model = openai("gpt-4o");
18
- const { text } = await generateText({
19
- model,
20
- prompt,
21
- });
22
- return text;
23
- }
14
+ import { describeEval } from "vitest-evals";
15
+
16
+ describeEval("capital cities", {
17
+ data: async () => [
18
+ { input: "What is the capital of France?", expected: "Paris" },
19
+ { input: "What is the capital of Japan?", expected: "Tokyo" }
20
+ ],
21
+ task: async (input) => {
22
+ const response = await queryLLM(input);
23
+ return response; // Simple string return
24
+ },
25
+ scorers: [async ({ output, expected }) => ({
26
+ score: output.toLowerCase().includes(expected.toLowerCase()) ? 1.0 : 0.0
27
+ })],
28
+ threshold: 0.8
29
+ });
24
30
  ```
25
31
 
26
- You'll use this as the `task` within your evals, and then you simply need to define a set of scenarios
27
- and a way to validate if the LLM is responding as you desire:
32
+ ## Tasks
33
+
34
+ Tasks process inputs and return outputs. Two formats are supported:
28
35
 
29
36
  ```javascript
30
- import { describeEval } from "vitest-evals";
31
- import { Factuality } from "autoevals";
32
-
33
- describeEval("my evals", {
34
- data: async () => {
35
- // The scenarios you wish to evaluate
36
- return [
37
- input: "What is the capital of France?",
38
- expected: "Paris",
39
- ];
40
- },
37
+ // Simple: just return a string
38
+ const task = async (input) => "response";
39
+
40
+ // With tool tracking: return a TaskResult
41
+ const task = async (input) => ({
42
+ result: "response",
43
+ toolCalls: [
44
+ { name: "search", arguments: { query: "..." }, result: {...} }
45
+ ]
46
+ });
47
+ ```
48
+
49
+ ## Scorers
50
+
51
+ Scorers evaluate outputs and return a score (0-1). Use built-in scorers or create your own:
41
52
 
42
- task: answerQuestion,
53
+ ```javascript
54
+ // Built-in scorer
55
+ import { ToolCallScorer } from "vitest-evals";
56
+ // Or import individually
57
+ import { ToolCallScorer } from "vitest-evals/scorers/toolCallScorer";
58
+
59
+ describeEval("tool usage", {
60
+ data: async () => [
61
+ { input: "Search weather", expectedTools: [{ name: "weather_api" }] }
62
+ ],
63
+ task: weatherTask,
64
+ scorers: [ToolCallScorer()]
65
+ });
43
66
 
44
- // Scorers determine if the response was acceptable - in this case we're using
45
- // a secondary LLM prompt to judge the response of the first.
46
- scorers: [Factuality],
67
+ // Custom scorer
68
+ const LengthScorer = async ({ output }) => ({
69
+ score: output.length > 50 ? 1.0 : 0.0
70
+ });
47
71
 
48
- // The threshold required for the average score for this eval to pass. This will be
49
- // based on the scorers you've provided, and in the case of Factuality, we might be
50
- // ok with a 60% score (see the implementation for why).
51
- threshold: 0.6,
72
+ // TypeScript scorer with custom options
73
+ import { type ScoreFn, type BaseScorerOptions } from "vitest-evals";
52
74
 
53
- // The timeout for each test. Defaults to 10s. You may need to increase this if your model
54
- // provider has high latency or you're using a large number of scorers.
55
- // timeout: 60000,
75
+ interface CustomOptions extends BaseScorerOptions {
76
+ minLength: number;
77
+ }
56
78
 
57
- // A check to determine if these tests should run. This is helpful to control tests so they only
58
- // in certain situations, for example if a model providers API key is defined.
59
- // skipIf: () => !process.env.OPENAI_API_KEY
60
- })
79
+ const TypedScorer: ScoreFn<CustomOptions> = async (opts) => ({
80
+ score: opts.output.length >= opts.minLength ? 1.0 : 0.0
81
+ });
61
82
  ```
62
83
 
63
- ### Existing Test Suites
84
+ ### Built-in Scorers
85
+
86
+ #### ToolCallScorer
87
+ Evaluates if the expected tools were called with correct arguments.
64
88
 
65
89
  ```javascript
66
- // import `vitest-evals` to expose `expect().toEval()`
67
- // This can also be done via `setupFiles` pattern in `vitest`.
68
- import "vitest-evals";
69
- import { Factuality } from "autoevals";
70
-
71
- describe("my test suite", () => {
72
- it("kind of works", () => {
73
- expect("What is the capital of France?").toEval(
74
- "Paris",
75
- answerQuestion,
76
- Factuality,
77
- 0.8
78
- );
79
- });
90
+ // Basic usage - strict matching, any order
91
+ describeEval("search test", {
92
+ data: async () => [{
93
+ input: "Find Italian restaurants",
94
+ expectedTools: [
95
+ { name: "search", arguments: { type: "restaurant" } },
96
+ { name: "filter", arguments: { cuisine: "italian" } }
97
+ ]
98
+ }],
99
+ task: myTask,
100
+ scorers: [ToolCallScorer()]
80
101
  });
102
+
103
+ // Strict evaluation - exact order and parameters
104
+ scorers: [ToolCallScorer({
105
+ ordered: true, // Tools must be in exact order
106
+ params: "strict" // Parameters must match exactly
107
+ })]
108
+
109
+ // Flexible evaluation
110
+ scorers: [ToolCallScorer({
111
+ requireAll: false, // Partial matches give partial credit
112
+ allowExtras: false // No additional tools allowed
113
+ })]
114
+ ```
115
+
116
+ **Default behavior:**
117
+ - Strict parameter matching (exact equality required)
118
+ - Any order allowed
119
+ - Extra tools allowed
120
+ - All expected tools required
121
+
122
+ ## AI SDK Integration
123
+
124
+ See [`src/ai-sdk-integration.test.ts`](src/ai-sdk-integration.test.ts) for a complete example with the Vercel AI SDK.
125
+
126
+ Transform provider responses to our format:
127
+
128
+ ```javascript
129
+ // Vercel AI SDK
130
+ const { text, toolCalls, toolResults } = await generateText(...);
131
+ return {
132
+ result: text,
133
+ toolCalls: toolCalls?.map((call, i) => ({
134
+ id: call.toolCallId,
135
+ name: call.toolName,
136
+ arguments: call.args,
137
+ result: toolResults?.[i]?.result,
138
+ status: toolResults?.[i]?.error ? 'failed' : 'completed'
139
+ }))
140
+ };
141
+ ```
142
+
143
+ ## Advanced Usage
144
+
145
+ ### Advanced Scorers
146
+
147
+ #### Using autoevals
148
+
149
+ For sophisticated evaluation, use autoevals scorers:
150
+
151
+ ```javascript
152
+ import { Factuality, ClosedQA } from "autoevals";
153
+
154
+ scorers: [
155
+ Factuality, // LLM-based factuality checking
156
+ ClosedQA.partial({
157
+ criteria: "Does the answer mention Paris?"
158
+ })
159
+ ]
81
160
  ```
82
161
 
83
- ### Scoring
162
+ #### Custom LLM-based Factuality Scorer
84
163
 
85
- Scorers are compatible with the `autoevals` interface, but are also simple to implement on your own:
164
+ Here's an example of implementing your own LLM-based factuality scorer using the Vercel AI SDK:
86
165
 
87
166
  ```javascript
88
- export const Contains = async (opts: {
89
- input: string,
90
- expected: string,
91
- output: string,
92
- }) => {
167
+ import { generateObject } from 'ai';
168
+ import { openai } from '@ai-sdk/openai';
169
+ import { z } from 'zod';
170
+
171
+ const Factuality = (model = openai('gpt-4o')) => async ({ input, output, expected }) => {
172
+ if (!expected) {
173
+ return { score: 1.0, metadata: { rationale: "No expected answer" } };
174
+ }
175
+
176
+ const { object } = await generateObject({
177
+ model,
178
+ prompt: `
179
+ Compare the factual content of the submitted answer with the expert answer.
180
+
181
+ Question: ${input}
182
+ Expert: ${expected}
183
+ Submission: ${output}
184
+
185
+ Options:
186
+ (A) Subset of expert answer
187
+ (B) Superset of expert answer
188
+ (C) Same content as expert
189
+ (D) Contradicts expert answer
190
+ (E) Different but factually equivalent
191
+ `,
192
+ schema: z.object({
193
+ answer: z.enum(['A', 'B', 'C', 'D', 'E']),
194
+ rationale: z.string()
195
+ })
196
+ });
197
+
198
+ const scores = { A: 0.4, B: 0.6, C: 1, D: 0, E: 1 };
93
199
  return {
94
- score: output.indexOf(expected) !== -1 ? 1.0 : 0.0,
200
+ score: scores[object.answer],
201
+ metadata: { rationale: object.rationale, answer: object.answer }
95
202
  };
96
203
  };
204
+
205
+ // Usage
206
+ scorers: [Factuality()]
97
207
  ```
98
208
 
99
- For something more realistic, here's a reimplementation of the Factuality scorer from `autoevals`, with some flexibility
100
- on the model, enabling you to evaluate against multiple models:
101
-
102
- ````javascript
103
- import { generateObject, type LanguageModel } from "ai";
104
- import { z } from "zod";
105
-
106
- /**
107
- * A Factuality checker utilizing the `ai` SDK based on the implementation in `autoevals`.
108
- *
109
- * @param model - The language model to utilize (via `ai`).
110
- *
111
- * @example
112
- * ```javascript
113
- * import { openai } from "@ai-sdk/openai";
114
- *
115
- * scorers: [Factuality(openai("gpt-4o"))]
116
- * ```
117
- */
118
- export function Factuality(model: LanguageModel) {
119
- return async Factuality(opts: {
120
- input: string;
121
- output: string;
122
- expected?: string;
123
- }) => {
124
- const { object } = await generateObject({
125
- model,
126
- /**
127
- * Prompt implementation from `autoevals`:
128
- *
129
- * {@link https://github.com/braintrustdata/autoevals/blob/5aa20a0a9eb8fc9e07e9e5722ebf71c68d082f32/templates/factuality.yaml}
130
- */
131
- prompt: `
132
- You are comparing a submitted answer to an expert answer on a given question. Here is the data:
133
- [BEGIN DATA]
134
- ************
135
- [Question]: ${opts.input}
136
- ************
137
- [Expert]: ${opts.expected}
138
- ************
139
- [Submission]: ${opts.output}
140
- ************
141
- [END DATA]
142
-
143
- Compare the factual content of the submitted answer with the expert answer. Ignore any differences in style, grammar, or punctuation.
144
- The submitted answer may either be a subset or superset of the expert answer, or it may conflict with it. Determine which case applies. Answer the question by selecting one of the following options:
145
- (A) The submitted answer is a subset of the expert answer and is fully consistent with it.
146
- (B) The submitted answer is a superset of the expert answer and is fully consistent with it.
147
- (C) The submitted answer contains all the same details as the expert answer.
148
- (D) There is a disagreement between the submitted answer and the expert answer.
149
- (E) The answers differ, but these differences don't matter from the perspective of factuality.
150
- `,
151
- schema: z.object({
152
- answer: z.enum(["A", "B", "C", "D", "E"]).describe("Your selection."),
153
- rationale: z
154
- .string()
155
- .describe("Why you chose this answer. Be very detailed."),
156
- }),
157
- });
158
-
159
- const scores = {
160
- A: 0.4,
161
- B: 0.6,
162
- C: 1,
163
- D: 0,
164
- E: 1,
165
- };
166
-
167
- return {
168
- score: scores[object.answer],
169
- metadata: {
170
- rationale: object.rationale,
171
- },
172
- };
173
- };
174
- }
175
- ````
209
+ ### Skip Tests Conditionally
176
210
 
177
- ### Separating Evals
211
+ ```javascript
212
+ describeEval("gpt-4 tests", {
213
+ skipIf: () => !process.env.OPENAI_API_KEY,
214
+ // ...
215
+ });
216
+ ```
178
217
 
179
- An alternative to `skipIf` for controlling if evals run is creating an separate `vitest` configuration for them. This gives a lot of advantages, particularly allowing you to maintain two completely separate test suites. A good pattern you can enable with this is a filename-based-test selector:
218
+ ### Existing Test Suites
219
+
220
+ ```javascript
221
+ import "vitest-evals";
222
+
223
+ test("capital check", () => {
224
+ const simpleFactuality = async ({ output, expected }) => ({
225
+ score: output.toLowerCase().includes(expected.toLowerCase()) ? 1.0 : 0.0
226
+ });
227
+
228
+ expect("What is the capital of France?").toEval(
229
+ "Paris",
230
+ answerQuestion,
231
+ simpleFactuality,
232
+ 0.8
233
+ );
234
+ });
235
+ ```
236
+
237
+ ## Configuration
238
+
239
+ ### Separate Eval Configuration
240
+
241
+ Create `vitest.evals.config.ts`:
180
242
 
181
243
  ```javascript
182
- // vitest.evals.config.ts
183
- /// <reference types="vitest" />
184
244
  import { defineConfig } from "vitest/config";
185
245
  import defaultConfig from "./vitest.config";
186
246
 
@@ -188,41 +248,20 @@ export default defineConfig({
188
248
  ...defaultConfig,
189
249
  test: {
190
250
  ...defaultConfig.test,
191
- // run `eval` files rather than typical `test` files
192
- include: ["src/**/*.eval.{js,mjs,cjs,ts,mts,cts,jsx,tsx}"],
251
+ include: ["src/**/*.eval.{js,ts}"],
193
252
  },
194
253
  });
195
254
  ```
196
255
 
197
- In the above, we're telling it to only match only `*.eval.*` files (vs the typical `*.test.*` or `*.spec.*`). We're also inheriting from our default `vitest.config.ts`. This gives us a clean way to run only tests, or run only evals:
256
+ Run evals separately:
198
257
 
199
258
  ```shell
200
259
  vitest --config=vitest.evals.config.ts
201
260
  ```
202
261
 
203
- Its recommended to add this to your `package.json`, such as under an `eval` helper:
204
-
205
- ```javascript
206
- // package.json
207
- {
208
- // ...
209
- "scripts": {
210
- // ...
211
- "eval": "vitest --config=vitest.evals.config.ts",
212
- }
213
- }
214
- ```
215
-
216
- You can then run your evals using `npm run eval`.
217
-
218
262
  ## Development
219
263
 
220
- Nothing fancy here.
221
-
222
- ```javascript
223
- pnpm install
224
- ```
225
-
226
- ```javascript
227
- pnpm test
228
- ```
264
+ ```shell
265
+ npm install
266
+ npm test
267
+ ```
package/dist/index.d.mts CHANGED
@@ -1,98 +1,2 @@
1
- import * as vitest from 'vitest';
2
-
3
- type TaskFn = (input: string) => Promise<string>;
4
- type Score = {
5
- score: number | null;
6
- metadata?: {
7
- rationale?: string;
8
- output?: string;
9
- };
10
- };
11
- type ScoreFn = (opts: {
12
- input: string;
13
- output: string;
14
- expected?: string;
15
- }) => Promise<Score> | Score;
16
- type ToEval<R = unknown> = (expected: string, taskFn: TaskFn, scoreFn: ScoreFn, threshold?: number) => Promise<R>;
17
- interface EvalMatchers<R = unknown> {
18
- toEval: ToEval<R>;
19
- }
20
- declare module "vitest" {
21
- interface Assertion<T = any> extends EvalMatchers<T> {
22
- }
23
- interface AsymmetricMatchersContaining extends EvalMatchers {
24
- }
25
- interface TaskMeta {
26
- eval?: {
27
- scores: (Score & {
28
- name: string;
29
- })[];
30
- avgScore: number;
31
- };
32
- }
33
- }
34
- /**
35
- * Creates a test suite for evaluating language model outputs.
36
- *
37
- * @param name - The name of the test suite
38
- * @param options - Configuration options
39
- * @param options.data - Async function that returns an array of test cases with input and expected values
40
- * @param options.task - Function that processes the input and returns the model output
41
- * @param options.skipIf - Optional function that determines if tests should be skipped
42
- * @param options.scorers - Array of scoring functions that evaluate model outputs
43
- * @param options.threshold - Minimum acceptable average score (0-1), defaults to 1.0
44
- * @param options.timeout - Test timeout in milliseconds, defaults to 60000 (60s)
45
- *
46
- * @example
47
- * ```javascript
48
- * describeEval("capital cities test", {
49
- * data: async () => [{
50
- * input: "What is the capital of France?",
51
- * expected: "Paris"
52
- * }],
53
- * task: async (input) => {
54
- * // Query LLM here
55
- * return "Paris";
56
- * },
57
- * scorers: [checkFactuality],
58
- * threshold: 0.8
59
- * });
60
- * ```
61
- */
62
- declare function describeEval(name: string, { data, task, skipIf, scorers, threshold, timeout, }: {
63
- data: () => Promise<{
64
- input: string;
65
- expected: string;
66
- }[]>;
67
- task: TaskFn;
68
- skipIf?: () => boolean;
69
- scorers: ScoreFn[];
70
- threshold?: number | null;
71
- timeout?: number;
72
- }): vitest.SuiteCollector<object>;
73
- declare function formatScores(scores: (Score & {
74
- name: string;
75
- })[]): string;
76
- /**
77
- * Wraps text to fit within a specified width, breaking at word boundaries.
78
- *
79
- * @param text - The text to wrap
80
- * @param width - The maximum width in characters (default: 80)
81
- * @returns The wrapped text with line breaks
82
- *
83
- * @example
84
- * ```javascript
85
- * const wrapped = wrapText("This is a very long text that needs to be wrapped to fit within an 80 character width.", 20);
86
- * console.log(wrapped);
87
- * // Output:
88
- * // This is a very
89
- * // long text that
90
- * // needs to be
91
- * // wrapped to fit
92
- * // within an 80
93
- * // character width.
94
- * ```
95
- */
96
- declare function wrapText(text: string, width?: number): string;
97
-
98
- export { type EvalMatchers, type Score, type ScoreFn, type TaskFn, type ToEval, describeEval, formatScores, wrapText };
1
+ import 'vitest';
2
+ export { B as BaseScorerOptions, E as EvalMatchers, S as Score, c as ScoreFn, b as TaskFn, a as TaskResult, d as ToEval, T as ToolCall, ToolCallScorer, ToolCallScorerOptions, e as describeEval, f as formatScores, w as wrapText } from './scorers/toolCallScorer.mjs';
package/dist/index.d.ts CHANGED
@@ -1,98 +1,2 @@
1
- import * as vitest from 'vitest';
2
-
3
- type TaskFn = (input: string) => Promise<string>;
4
- type Score = {
5
- score: number | null;
6
- metadata?: {
7
- rationale?: string;
8
- output?: string;
9
- };
10
- };
11
- type ScoreFn = (opts: {
12
- input: string;
13
- output: string;
14
- expected?: string;
15
- }) => Promise<Score> | Score;
16
- type ToEval<R = unknown> = (expected: string, taskFn: TaskFn, scoreFn: ScoreFn, threshold?: number) => Promise<R>;
17
- interface EvalMatchers<R = unknown> {
18
- toEval: ToEval<R>;
19
- }
20
- declare module "vitest" {
21
- interface Assertion<T = any> extends EvalMatchers<T> {
22
- }
23
- interface AsymmetricMatchersContaining extends EvalMatchers {
24
- }
25
- interface TaskMeta {
26
- eval?: {
27
- scores: (Score & {
28
- name: string;
29
- })[];
30
- avgScore: number;
31
- };
32
- }
33
- }
34
- /**
35
- * Creates a test suite for evaluating language model outputs.
36
- *
37
- * @param name - The name of the test suite
38
- * @param options - Configuration options
39
- * @param options.data - Async function that returns an array of test cases with input and expected values
40
- * @param options.task - Function that processes the input and returns the model output
41
- * @param options.skipIf - Optional function that determines if tests should be skipped
42
- * @param options.scorers - Array of scoring functions that evaluate model outputs
43
- * @param options.threshold - Minimum acceptable average score (0-1), defaults to 1.0
44
- * @param options.timeout - Test timeout in milliseconds, defaults to 60000 (60s)
45
- *
46
- * @example
47
- * ```javascript
48
- * describeEval("capital cities test", {
49
- * data: async () => [{
50
- * input: "What is the capital of France?",
51
- * expected: "Paris"
52
- * }],
53
- * task: async (input) => {
54
- * // Query LLM here
55
- * return "Paris";
56
- * },
57
- * scorers: [checkFactuality],
58
- * threshold: 0.8
59
- * });
60
- * ```
61
- */
62
- declare function describeEval(name: string, { data, task, skipIf, scorers, threshold, timeout, }: {
63
- data: () => Promise<{
64
- input: string;
65
- expected: string;
66
- }[]>;
67
- task: TaskFn;
68
- skipIf?: () => boolean;
69
- scorers: ScoreFn[];
70
- threshold?: number | null;
71
- timeout?: number;
72
- }): vitest.SuiteCollector<object>;
73
- declare function formatScores(scores: (Score & {
74
- name: string;
75
- })[]): string;
76
- /**
77
- * Wraps text to fit within a specified width, breaking at word boundaries.
78
- *
79
- * @param text - The text to wrap
80
- * @param width - The maximum width in characters (default: 80)
81
- * @returns The wrapped text with line breaks
82
- *
83
- * @example
84
- * ```javascript
85
- * const wrapped = wrapText("This is a very long text that needs to be wrapped to fit within an 80 character width.", 20);
86
- * console.log(wrapped);
87
- * // Output:
88
- * // This is a very
89
- * // long text that
90
- * // needs to be
91
- * // wrapped to fit
92
- * // within an 80
93
- * // character width.
94
- * ```
95
- */
96
- declare function wrapText(text: string, width?: number): string;
97
-
98
- export { type EvalMatchers, type Score, type ScoreFn, type TaskFn, type ToEval, describeEval, formatScores, wrapText };
1
+ import 'vitest';
2
+ export { B as BaseScorerOptions, E as EvalMatchers, S as Score, c as ScoreFn, b as TaskFn, a as TaskResult, d as ToEval, T as ToolCall, ToolCallScorer, ToolCallScorerOptions, e as describeEval, f as formatScores, w as wrapText } from './scorers/toolCallScorer.js';