vieval 0.0.1 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. package/README.md +8 -5
  2. package/dist/cli/index.d.mts +1 -1
  3. package/dist/cli/index.mjs +1232 -83
  4. package/dist/cli/index.mjs.map +1 -1
  5. package/dist/{config-D2fe1SnT.mjs → config-CHN24egi.mjs} +1 -1
  6. package/dist/{config-D2fe1SnT.mjs.map → config-CHN24egi.mjs.map} +1 -1
  7. package/dist/config.d.mts +2 -3
  8. package/dist/config.mjs +2 -2
  9. package/dist/core/assertions/index.d.mts +1 -1
  10. package/dist/core/inference-executors/index.d.mts +1 -45
  11. package/dist/core/inference-executors/index.mjs +1 -38
  12. package/dist/core/inference-executors/index.mjs.map +1 -1
  13. package/dist/core/processors/results/index.d.mts +1 -1
  14. package/dist/core/runner/index.d.mts +2 -2
  15. package/dist/core/runner/index.mjs +2 -2
  16. package/dist/env-C7X81PWa.mjs +41 -0
  17. package/dist/env-C7X81PWa.mjs.map +1 -0
  18. package/dist/env-DtpjACOW.d.mts +47 -0
  19. package/dist/expect-B2vaoRVZ.d.mts +10 -0
  20. package/dist/{expect-i9WZWGrA.mjs → expect-CaXiUkwY.mjs} +3 -3
  21. package/dist/expect-CaXiUkwY.mjs.map +1 -0
  22. package/dist/expect-extensions-BOzwV5EJ.mjs +197 -0
  23. package/dist/expect-extensions-BOzwV5EJ.mjs.map +1 -0
  24. package/dist/expect.d.mts +1 -1
  25. package/dist/expect.mjs +1 -1
  26. package/dist/{index-DP7jsORl.d.mts → index-BDMEAmf2.d.mts} +246 -3
  27. package/dist/{index-oSXhM1zx.d.mts → index-C3gPFmcR.d.mts} +2 -2
  28. package/dist/index.d.mts +326 -6
  29. package/dist/index.mjs +65 -23
  30. package/dist/index.mjs.map +1 -1
  31. package/dist/{models-D_MsBtYw.mjs → models-DIGdOUpJ.mjs} +1 -1
  32. package/dist/{models-D_MsBtYw.mjs.map → models-DIGdOUpJ.mjs.map} +1 -1
  33. package/dist/plugins/chat-models/index.d.mts +465 -6
  34. package/dist/plugins/chat-models/index.mjs +469 -6
  35. package/dist/plugins/chat-models/index.mjs.map +1 -1
  36. package/dist/{registry-ChOjjdEC.mjs → registry-CHJcTN2W.mjs} +75 -16
  37. package/dist/registry-CHJcTN2W.mjs.map +1 -0
  38. package/dist/{runner-4ZsOveoY.mjs → runner-Dpy-eivM.mjs} +177 -21
  39. package/dist/runner-Dpy-eivM.mjs.map +1 -0
  40. package/dist/testing/expect-extensions.d.mts +44 -38
  41. package/dist/testing/expect-extensions.mjs +1 -1
  42. package/package.json +11 -4
  43. package/dist/expect-0jPJ7Zio.d.mts +0 -2318
  44. package/dist/expect-extensions-CwPtgTz8.mjs +0 -13471
  45. package/dist/expect-extensions-CwPtgTz8.mjs.map +0 -1
  46. package/dist/expect-i9WZWGrA.mjs.map +0 -1
  47. package/dist/magic-string.es-CH1jwzMg.mjs +0 -1013
  48. package/dist/magic-string.es-CH1jwzMg.mjs.map +0 -1
  49. package/dist/plugin-DVaRZY2x.d.mts +0 -84
  50. package/dist/registry-ChOjjdEC.mjs.map +0 -1
  51. package/dist/runner-4ZsOveoY.mjs.map +0 -1
package/README.md CHANGED
@@ -46,7 +46,7 @@ import { caseOf, describeEval, expect } from 'vieval'
46
46
  export default describeEval('smoke', () => {
47
47
  caseOf('2 + 2 = 4', () => {
48
48
  expect(2 + 2).toBe(4)
49
- }, {})
49
+ })
50
50
  })
51
51
  ```
52
52
 
@@ -225,6 +225,7 @@ export default defineConfig({
225
225
 
226
226
  ```bash
227
227
  vieval run [--config <path>] [--project <name>] [--json]
228
+ vieval compare [--config <path>] [--comparison <id>] [--output <path>] [--format table|json]
228
229
  ```
229
230
 
230
231
  Common usage:
@@ -234,15 +235,17 @@ pnpm -F vieval eval:run
234
235
  pnpm -F vieval eval:run -- --config ./vieval.config.ts
235
236
  pnpm -F vieval eval:run -- --config ./vieval.config.ts --project chess --project moderation
236
237
  pnpm -F vieval eval:run -- --json
238
+ pnpm -F vieval exec tsx src/cli/index.ts compare --config ../../vieval.config.ts --comparison <comparison-id>
237
239
  pnpm -F vieval eval:run -- --help
238
240
  ```
239
241
 
240
242
  ## Examples In This Repository
241
243
 
242
- - `packages/vieval/tests/projects/example-api-defining-new-task`
243
- - `packages/vieval/tests/projects/example-api-config-matrix`
244
- - `packages/vieval/tests/projects/example-api-load-datasource-as-cases`
245
- - `packages/vieval/tests/projects/example-pattern-byoa-bring-your-own-agent`
244
+ - [Define a custom eval task API](tests/projects/example-api-defining-new-task)
245
+ - [Configure run/eval matrix combinations](tests/projects/example-api-config-matrix)
246
+ - [Load datasource records as task cases](tests/projects/example-api-load-datasource-as-cases)
247
+ - [Compare reporters and experiment/attempt layering](tests/projects/example-api-reporters-and-experiments)
248
+ - [Bring your own agent execution pattern](tests/projects/example-pattern-byoa-bring-your-own-agent)
246
249
 
247
250
  ## Development
248
251
 
@@ -1,5 +1,5 @@
1
1
  //#region src/cli/index.d.ts
2
- type Command = 'run';
2
+ type Command = 'compare' | 'report' | 'run';
3
3
  interface ParsedTopLevelCliArguments {
4
4
  command: Command | 'help';
5
5
  commandArgv: string[];