vieval 0.0.1 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +8 -5
- package/dist/cli/index.d.mts +1 -1
- package/dist/cli/index.mjs +1232 -83
- package/dist/cli/index.mjs.map +1 -1
- package/dist/{config-D2fe1SnT.mjs → config-CHN24egi.mjs} +1 -1
- package/dist/{config-D2fe1SnT.mjs.map → config-CHN24egi.mjs.map} +1 -1
- package/dist/config.d.mts +2 -3
- package/dist/config.mjs +2 -2
- package/dist/core/assertions/index.d.mts +1 -1
- package/dist/core/inference-executors/index.d.mts +1 -45
- package/dist/core/inference-executors/index.mjs +1 -38
- package/dist/core/inference-executors/index.mjs.map +1 -1
- package/dist/core/processors/results/index.d.mts +1 -1
- package/dist/core/runner/index.d.mts +2 -2
- package/dist/core/runner/index.mjs +2 -2
- package/dist/env-C7X81PWa.mjs +41 -0
- package/dist/env-C7X81PWa.mjs.map +1 -0
- package/dist/env-DtpjACOW.d.mts +47 -0
- package/dist/expect-B2vaoRVZ.d.mts +10 -0
- package/dist/{expect-i9WZWGrA.mjs → expect-CaXiUkwY.mjs} +3 -3
- package/dist/expect-CaXiUkwY.mjs.map +1 -0
- package/dist/expect-extensions-BOzwV5EJ.mjs +197 -0
- package/dist/expect-extensions-BOzwV5EJ.mjs.map +1 -0
- package/dist/expect.d.mts +1 -1
- package/dist/expect.mjs +1 -1
- package/dist/{index-DP7jsORl.d.mts → index-BDMEAmf2.d.mts} +246 -3
- package/dist/{index-oSXhM1zx.d.mts → index-C3gPFmcR.d.mts} +2 -2
- package/dist/index.d.mts +326 -6
- package/dist/index.mjs +65 -23
- package/dist/index.mjs.map +1 -1
- package/dist/{models-D_MsBtYw.mjs → models-DIGdOUpJ.mjs} +1 -1
- package/dist/{models-D_MsBtYw.mjs.map → models-DIGdOUpJ.mjs.map} +1 -1
- package/dist/plugins/chat-models/index.d.mts +465 -6
- package/dist/plugins/chat-models/index.mjs +469 -6
- package/dist/plugins/chat-models/index.mjs.map +1 -1
- package/dist/{registry-ChOjjdEC.mjs → registry-CHJcTN2W.mjs} +75 -16
- package/dist/registry-CHJcTN2W.mjs.map +1 -0
- package/dist/{runner-4ZsOveoY.mjs → runner-Dpy-eivM.mjs} +177 -21
- package/dist/runner-Dpy-eivM.mjs.map +1 -0
- package/dist/testing/expect-extensions.d.mts +44 -38
- package/dist/testing/expect-extensions.mjs +1 -1
- package/package.json +11 -4
- package/dist/expect-0jPJ7Zio.d.mts +0 -2318
- package/dist/expect-extensions-CwPtgTz8.mjs +0 -13471
- package/dist/expect-extensions-CwPtgTz8.mjs.map +0 -1
- package/dist/expect-i9WZWGrA.mjs.map +0 -1
- package/dist/magic-string.es-CH1jwzMg.mjs +0 -1013
- package/dist/magic-string.es-CH1jwzMg.mjs.map +0 -1
- package/dist/plugin-DVaRZY2x.d.mts +0 -84
- package/dist/registry-ChOjjdEC.mjs.map +0 -1
- package/dist/runner-4ZsOveoY.mjs.map +0 -1
package/README.md
CHANGED
|
@@ -46,7 +46,7 @@ import { caseOf, describeEval, expect } from 'vieval'
|
|
|
46
46
|
export default describeEval('smoke', () => {
|
|
47
47
|
caseOf('2 + 2 = 4', () => {
|
|
48
48
|
expect(2 + 2).toBe(4)
|
|
49
|
-
}
|
|
49
|
+
})
|
|
50
50
|
})
|
|
51
51
|
```
|
|
52
52
|
|
|
@@ -225,6 +225,7 @@ export default defineConfig({
|
|
|
225
225
|
|
|
226
226
|
```bash
|
|
227
227
|
vieval run [--config <path>] [--project <name>] [--json]
|
|
228
|
+
vieval compare [--config <path>] [--comparison <id>] [--output <path>] [--format table|json]
|
|
228
229
|
```
|
|
229
230
|
|
|
230
231
|
Common usage:
|
|
@@ -234,15 +235,17 @@ pnpm -F vieval eval:run
|
|
|
234
235
|
pnpm -F vieval eval:run -- --config ./vieval.config.ts
|
|
235
236
|
pnpm -F vieval eval:run -- --config ./vieval.config.ts --project chess --project moderation
|
|
236
237
|
pnpm -F vieval eval:run -- --json
|
|
238
|
+
pnpm -F vieval exec tsx src/cli/index.ts compare --config ../../vieval.config.ts --comparison <comparison-id>
|
|
237
239
|
pnpm -F vieval eval:run -- --help
|
|
238
240
|
```
|
|
239
241
|
|
|
240
242
|
## Examples In This Repository
|
|
241
243
|
|
|
242
|
-
-
|
|
243
|
-
-
|
|
244
|
-
-
|
|
245
|
-
-
|
|
244
|
+
- [Define a custom eval task API](tests/projects/example-api-defining-new-task)
|
|
245
|
+
- [Configure run/eval matrix combinations](tests/projects/example-api-config-matrix)
|
|
246
|
+
- [Load datasource records as task cases](tests/projects/example-api-load-datasource-as-cases)
|
|
247
|
+
- [Compare reporters and experiment/attempt layering](tests/projects/example-api-reporters-and-experiments)
|
|
248
|
+
- [Bring your own agent execution pattern](tests/projects/example-pattern-byoa-bring-your-own-agent)
|
|
246
249
|
|
|
247
250
|
## Development
|
|
248
251
|
|
package/dist/cli/index.d.mts
CHANGED