vieval 0.0.11 → 0.0.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/README.md +31 -31
  2. package/dist/bin/vieval.mjs +1 -1
  3. package/dist/cli/index.d.mts +1 -1
  4. package/dist/cli/index.mjs +1 -1
  5. package/dist/{cli-CHFCF8UR.mjs → cli-uzS81IPd.mjs} +1529 -1529
  6. package/dist/cli-uzS81IPd.mjs.map +1 -0
  7. package/dist/config.d.mts +1 -1
  8. package/dist/core/assertions/index.d.mts +156 -156
  9. package/dist/core/assertions/index.mjs +82 -82
  10. package/dist/core/assertions/index.mjs.map +1 -1
  11. package/dist/core/inference-executors/index.d.mts +37 -37
  12. package/dist/core/inference-executors/index.mjs +53 -52
  13. package/dist/core/inference-executors/index.mjs.map +1 -1
  14. package/dist/core/processors/results/index.d.mts +18 -18
  15. package/dist/core/processors/results/index.mjs.map +1 -1
  16. package/dist/core/runner/index.d.mts +2 -2
  17. package/dist/core/runner/index.mjs +258 -258
  18. package/dist/core/runner/index.mjs.map +1 -1
  19. package/dist/core/scheduler/index.d.mts +1 -1
  20. package/dist/core/scheduler/index.mjs +64 -64
  21. package/dist/core/scheduler/index.mjs.map +1 -1
  22. package/dist/{env-bRH0K6fU.d.mts → env-Br6jaWGL.d.mts} +9 -9
  23. package/dist/{env-BVYeJhGA.mjs → env-egxaJtNn.mjs} +8 -8
  24. package/dist/env-egxaJtNn.mjs.map +1 -0
  25. package/dist/{expect-extensions-Mf1sMNBv.mjs → expect-extensions-BKdEPt3h.mjs} +46 -46
  26. package/dist/expect-extensions-BKdEPt3h.mjs.map +1 -0
  27. package/dist/expect.mjs +1 -1
  28. package/dist/{index-CwKBlCG9.d.mts → index-BLIlhiWT.d.mts} +565 -565
  29. package/dist/{index-Be5I1ZJL.d.mts → index-CIaJClcC.d.mts} +48 -48
  30. package/dist/index.d.mts +207 -195
  31. package/dist/index.mjs +147 -147
  32. package/dist/index.mjs.map +1 -1
  33. package/dist/models-CaCOUPZw.mjs.map +1 -1
  34. package/dist/plugins/chat-models/index.d.mts +279 -279
  35. package/dist/plugins/chat-models/index.mjs +359 -359
  36. package/dist/plugins/chat-models/index.mjs.map +1 -1
  37. package/dist/{registry-BSyjwZFx.mjs → registry-BK7k6X81.mjs} +293 -293
  38. package/dist/registry-BK7k6X81.mjs.map +1 -0
  39. package/dist/testing/expect-extensions.d.mts +27 -27
  40. package/dist/testing/expect-extensions.mjs +1 -1
  41. package/package.json +3 -3
  42. package/dist/cli-CHFCF8UR.mjs.map +0 -1
  43. package/dist/env-BVYeJhGA.mjs.map +0 -1
  44. package/dist/expect-extensions-Mf1sMNBv.mjs.map +0 -1
  45. package/dist/registry-BSyjwZFx.mjs.map +0 -1
package/README.md CHANGED
@@ -31,6 +31,7 @@ import { defineConfig, loadEnv, requiredEnvFrom } from 'vieval'
31
31
  import { chatModelFrom, ChatModels } from 'vieval/plugins/chat-models'
32
32
 
33
33
  export default defineConfig({
34
+ env: loadEnv('test', cwd(), ''),
34
35
  plugins: [
35
36
  ChatModels({
36
37
  models: [
@@ -46,23 +47,22 @@ export default defineConfig({
46
47
  ],
47
48
  }),
48
49
  ],
49
- env: loadEnv('test', cwd(), ''),
50
50
  projects: [
51
51
  {
52
+ evalMatrix: {
53
+ extend: {
54
+ rubric: ['default'],
55
+ },
56
+ },
57
+ include: ['evals/*.eval.ts'],
52
58
  name: 'default',
53
59
  root: '.',
54
- include: ['evals/*.eval.ts'],
55
60
  runMatrix: {
56
61
  extend: {
57
62
  model: ['agent-mini'],
58
63
  scenario: ['baseline'],
59
64
  },
60
65
  },
61
- evalMatrix: {
62
- extend: {
63
- rubric: ['default'],
64
- },
65
- },
66
66
  },
67
67
  ],
68
68
  })
@@ -129,8 +129,8 @@ Use builder style when loading a batch of inputs:
129
129
  import { describeTask, expect } from 'vieval'
130
130
 
131
131
  const arithmeticCases = [
132
- { name: 'addition-small', input: { a: 1, b: 2, expected: 3 } },
133
- { name: 'addition-large', input: { a: 20, b: 22, expected: 42 } },
132
+ { input: { a: 1, b: 2, expected: 3 }, name: 'addition-small' },
133
+ { input: { a: 20, b: 22, expected: 42 }, name: 'addition-large' },
134
134
  ]
135
135
 
136
136
  describeTask('arithmetic-quality', ({ casesFromInputs }) => {
@@ -191,6 +191,7 @@ import { defineConfig, loadEnv, requiredEnvFrom } from 'vieval'
191
191
  import { chatModelFrom, ChatModels } from 'vieval/plugins/chat-models'
192
192
 
193
193
  export default defineConfig({
194
+ env: loadEnv('test', cwd(), ''),
194
195
  plugins: [
195
196
  ChatModels({
196
197
  models: [
@@ -224,12 +225,17 @@ export default defineConfig({
224
225
  ],
225
226
  }),
226
227
  ],
227
- env: loadEnv('test', cwd(), ''),
228
228
  projects: [
229
229
  {
230
+ evalMatrix: {
231
+ extend: {
232
+ rubric: ['strict', 'lenient'],
233
+ rubricModel: ['judge-mini', 'judge-large'],
234
+ },
235
+ },
236
+ include: ['evals/*.eval.ts'],
230
237
  name: 'chat-evals',
231
238
  root: '.',
232
- include: ['evals/*.eval.ts'],
233
239
  runMatrix: {
234
240
  extend: {
235
241
  model: ['agent-mini', 'agent-large'],
@@ -237,12 +243,6 @@ export default defineConfig({
237
243
  scenario: ['baseline', 'stress'],
238
244
  },
239
245
  },
240
- evalMatrix: {
241
- extend: {
242
- rubric: ['strict', 'lenient'],
243
- rubricModel: ['judge-mini', 'judge-large'],
244
- },
245
- },
246
246
  },
247
247
  ],
248
248
  })
@@ -258,19 +258,6 @@ import { defineConfig } from 'vieval'
258
258
  export default defineConfig({
259
259
  projects: [
260
260
  {
261
- name: 'motion-evals',
262
- root: '.',
263
- include: ['evals/*.eval.ts'],
264
- inferenceExecutors: [{ id: 'motion-engine' }],
265
- models: [
266
- {
267
- id: 'motion-engine:v2',
268
- aliases: ['motion-default'],
269
- inferenceExecutor: 'motion-engine',
270
- inferenceExecutorId: 'motion-engine',
271
- model: 'v2',
272
- },
273
- ],
274
261
  async executor(task, context) {
275
262
  const model = context.models.find(model =>
276
263
  model.id === 'motion-default'
@@ -285,13 +272,26 @@ export default defineConfig({
285
272
  const success = model.model === 'v2' && task.matrix.run.scenario === 'baseline'
286
273
 
287
274
  return {
288
- id: task.id,
289
275
  entryId: task.entry.id,
276
+ id: task.id,
290
277
  inferenceExecutorId: task.inferenceExecutor.id,
291
278
  matrix: task.matrix,
292
279
  scores: [{ kind: 'exact', score: success ? 1 : 0 }],
293
280
  }
294
281
  },
282
+ include: ['evals/*.eval.ts'],
283
+ inferenceExecutors: [{ id: 'motion-engine' }],
284
+ models: [
285
+ {
286
+ aliases: ['motion-default'],
287
+ id: 'motion-engine:v2',
288
+ inferenceExecutor: 'motion-engine',
289
+ inferenceExecutorId: 'motion-engine',
290
+ model: 'v2',
291
+ },
292
+ ],
293
+ name: 'motion-evals',
294
+ root: '.',
295
295
  },
296
296
  ],
297
297
  })
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/env node
2
- import { n as runTopLevelCli } from "../cli-CHFCF8UR.mjs";
2
+ import { n as runTopLevelCli } from "../cli-uzS81IPd.mjs";
3
3
  import process from "node:process";
4
4
  import { errorMessageFrom } from "@moeru/std";
5
5
  //#region src/bin/vieval.ts
@@ -1,7 +1,7 @@
1
1
  //#region src/cli/index.d.ts
2
2
  type Command = 'compare' | 'report' | 'run';
3
3
  interface ParsedTopLevelCliArguments {
4
- command: Command | 'help';
4
+ command: 'help' | Command;
5
5
  commandArgv: string[];
6
6
  }
7
7
  /**
@@ -1,2 +1,2 @@
1
- import { n as runTopLevelCli, t as parseTopLevelCliArguments } from "../cli-CHFCF8UR.mjs";
1
+ import { n as runTopLevelCli, t as parseTopLevelCliArguments } from "../cli-uzS81IPd.mjs";
2
2
  export { parseTopLevelCliArguments, runTopLevelCli };