vieval 0.0.10 → 0.0.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/README.md +31 -31
  2. package/dist/bin/vieval.mjs +1 -1
  3. package/dist/bin/vieval.mjs.map +1 -1
  4. package/dist/cli/index.d.mts +1 -1
  5. package/dist/cli/index.mjs +1 -1
  6. package/dist/{cli-DTDgaqeI.mjs → cli-uzS81IPd.mjs} +1483 -1483
  7. package/dist/cli-uzS81IPd.mjs.map +1 -0
  8. package/dist/config.d.mts +1 -1
  9. package/dist/config.mjs +1 -1
  10. package/dist/config.mjs.map +1 -1
  11. package/dist/core/assertions/index.d.mts +156 -156
  12. package/dist/core/assertions/index.mjs +82 -82
  13. package/dist/core/assertions/index.mjs.map +1 -1
  14. package/dist/core/inference-executors/index.d.mts +37 -37
  15. package/dist/core/inference-executors/index.mjs +54 -53
  16. package/dist/core/inference-executors/index.mjs.map +1 -1
  17. package/dist/core/processors/results/index.d.mts +18 -18
  18. package/dist/core/processors/results/index.mjs.map +1 -1
  19. package/dist/core/runner/index.d.mts +2 -2
  20. package/dist/core/runner/index.mjs +259 -259
  21. package/dist/core/runner/index.mjs.map +1 -1
  22. package/dist/core/scheduler/index.d.mts +1 -1
  23. package/dist/core/scheduler/index.mjs +65 -65
  24. package/dist/core/scheduler/index.mjs.map +1 -1
  25. package/dist/{env-DfWZy_n4.d.mts → env-Br6jaWGL.d.mts} +9 -9
  26. package/dist/{env-nV5rVErX.mjs → env-egxaJtNn.mjs} +8 -8
  27. package/dist/env-egxaJtNn.mjs.map +1 -0
  28. package/dist/{expect-extensions-DCSqlneN.mjs → expect-extensions-BKdEPt3h.mjs} +46 -46
  29. package/dist/expect-extensions-BKdEPt3h.mjs.map +1 -0
  30. package/dist/expect.d.mts +1 -3
  31. package/dist/expect.mjs +1 -1
  32. package/dist/expect.mjs.map +1 -1
  33. package/dist/{index-D_aMeWqO.d.mts → index-BLIlhiWT.d.mts} +565 -565
  34. package/dist/{index-Bg0atWBF.d.mts → index-CIaJClcC.d.mts} +48 -48
  35. package/dist/index.d.mts +208 -197
  36. package/dist/index.mjs +148 -148
  37. package/dist/index.mjs.map +1 -1
  38. package/dist/{models-pBSRUZhY.mjs → models-CaCOUPZw.mjs} +1 -1
  39. package/dist/{models-pBSRUZhY.mjs.map → models-CaCOUPZw.mjs.map} +1 -1
  40. package/dist/plugins/chat-models/index.d.mts +279 -279
  41. package/dist/plugins/chat-models/index.mjs +360 -360
  42. package/dist/plugins/chat-models/index.mjs.map +1 -1
  43. package/dist/{queue-DsZQkZO_.mjs → queue-BL86z2W_.mjs} +1 -1
  44. package/dist/{queue-DsZQkZO_.mjs.map → queue-BL86z2W_.mjs.map} +1 -1
  45. package/dist/{registry-DMnwE_mY.mjs → registry-BK7k6X81.mjs} +294 -294
  46. package/dist/registry-BK7k6X81.mjs.map +1 -0
  47. package/dist/testing/expect-extensions.d.mts +27 -27
  48. package/dist/testing/expect-extensions.mjs +1 -1
  49. package/package.json +12 -12
  50. package/dist/cli-DTDgaqeI.mjs.map +0 -1
  51. package/dist/env-nV5rVErX.mjs.map +0 -1
  52. package/dist/expect-extensions-DCSqlneN.mjs.map +0 -1
  53. package/dist/registry-DMnwE_mY.mjs.map +0 -1
package/README.md CHANGED
@@ -31,6 +31,7 @@ import { defineConfig, loadEnv, requiredEnvFrom } from 'vieval'
31
31
  import { chatModelFrom, ChatModels } from 'vieval/plugins/chat-models'
32
32
 
33
33
  export default defineConfig({
34
+ env: loadEnv('test', cwd(), ''),
34
35
  plugins: [
35
36
  ChatModels({
36
37
  models: [
@@ -46,23 +47,22 @@ export default defineConfig({
46
47
  ],
47
48
  }),
48
49
  ],
49
- env: loadEnv('test', cwd(), ''),
50
50
  projects: [
51
51
  {
52
+ evalMatrix: {
53
+ extend: {
54
+ rubric: ['default'],
55
+ },
56
+ },
57
+ include: ['evals/*.eval.ts'],
52
58
  name: 'default',
53
59
  root: '.',
54
- include: ['evals/*.eval.ts'],
55
60
  runMatrix: {
56
61
  extend: {
57
62
  model: ['agent-mini'],
58
63
  scenario: ['baseline'],
59
64
  },
60
65
  },
61
- evalMatrix: {
62
- extend: {
63
- rubric: ['default'],
64
- },
65
- },
66
66
  },
67
67
  ],
68
68
  })
@@ -129,8 +129,8 @@ Use builder style when loading a batch of inputs:
129
129
  import { describeTask, expect } from 'vieval'
130
130
 
131
131
  const arithmeticCases = [
132
- { name: 'addition-small', input: { a: 1, b: 2, expected: 3 } },
133
- { name: 'addition-large', input: { a: 20, b: 22, expected: 42 } },
132
+ { input: { a: 1, b: 2, expected: 3 }, name: 'addition-small' },
133
+ { input: { a: 20, b: 22, expected: 42 }, name: 'addition-large' },
134
134
  ]
135
135
 
136
136
  describeTask('arithmetic-quality', ({ casesFromInputs }) => {
@@ -191,6 +191,7 @@ import { defineConfig, loadEnv, requiredEnvFrom } from 'vieval'
191
191
  import { chatModelFrom, ChatModels } from 'vieval/plugins/chat-models'
192
192
 
193
193
  export default defineConfig({
194
+ env: loadEnv('test', cwd(), ''),
194
195
  plugins: [
195
196
  ChatModels({
196
197
  models: [
@@ -224,12 +225,17 @@ export default defineConfig({
224
225
  ],
225
226
  }),
226
227
  ],
227
- env: loadEnv('test', cwd(), ''),
228
228
  projects: [
229
229
  {
230
+ evalMatrix: {
231
+ extend: {
232
+ rubric: ['strict', 'lenient'],
233
+ rubricModel: ['judge-mini', 'judge-large'],
234
+ },
235
+ },
236
+ include: ['evals/*.eval.ts'],
230
237
  name: 'chat-evals',
231
238
  root: '.',
232
- include: ['evals/*.eval.ts'],
233
239
  runMatrix: {
234
240
  extend: {
235
241
  model: ['agent-mini', 'agent-large'],
@@ -237,12 +243,6 @@ export default defineConfig({
237
243
  scenario: ['baseline', 'stress'],
238
244
  },
239
245
  },
240
- evalMatrix: {
241
- extend: {
242
- rubric: ['strict', 'lenient'],
243
- rubricModel: ['judge-mini', 'judge-large'],
244
- },
245
- },
246
246
  },
247
247
  ],
248
248
  })
@@ -258,19 +258,6 @@ import { defineConfig } from 'vieval'
258
258
  export default defineConfig({
259
259
  projects: [
260
260
  {
261
- name: 'motion-evals',
262
- root: '.',
263
- include: ['evals/*.eval.ts'],
264
- inferenceExecutors: [{ id: 'motion-engine' }],
265
- models: [
266
- {
267
- id: 'motion-engine:v2',
268
- aliases: ['motion-default'],
269
- inferenceExecutor: 'motion-engine',
270
- inferenceExecutorId: 'motion-engine',
271
- model: 'v2',
272
- },
273
- ],
274
261
  async executor(task, context) {
275
262
  const model = context.models.find(model =>
276
263
  model.id === 'motion-default'
@@ -285,13 +272,26 @@ export default defineConfig({
285
272
  const success = model.model === 'v2' && task.matrix.run.scenario === 'baseline'
286
273
 
287
274
  return {
288
- id: task.id,
289
275
  entryId: task.entry.id,
276
+ id: task.id,
290
277
  inferenceExecutorId: task.inferenceExecutor.id,
291
278
  matrix: task.matrix,
292
279
  scores: [{ kind: 'exact', score: success ? 1 : 0 }],
293
280
  }
294
281
  },
282
+ include: ['evals/*.eval.ts'],
283
+ inferenceExecutors: [{ id: 'motion-engine' }],
284
+ models: [
285
+ {
286
+ aliases: ['motion-default'],
287
+ id: 'motion-engine:v2',
288
+ inferenceExecutor: 'motion-engine',
289
+ inferenceExecutorId: 'motion-engine',
290
+ model: 'v2',
291
+ },
292
+ ],
293
+ name: 'motion-evals',
294
+ root: '.',
295
295
  },
296
296
  ],
297
297
  })
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/env node
2
- import { n as runTopLevelCli } from "../cli-DTDgaqeI.mjs";
2
+ import { n as runTopLevelCli } from "../cli-uzS81IPd.mjs";
3
3
  import process from "node:process";
4
4
  import { errorMessageFrom } from "@moeru/std";
5
5
  //#region src/bin/vieval.ts
@@ -1 +1 @@
1
- {"version":3,"file":"vieval.mjs","names":[],"sources":["../../src/bin/vieval.ts"],"sourcesContent":["#!/usr/bin/env node\n\nimport process from 'node:process'\n\nimport { errorMessageFrom } from '@moeru/std'\n\nimport { runTopLevelCli } from '../cli/index'\n\n/**\n * Bootstraps the published `vieval` executable.\n *\n * Call stack:\n *\n * package manager shim / direct node execution\n * -> {@link runTopLevelCli} (`../cli`)\n * -> subcommand orchestration modules\n *\n * Use when:\n * - the installed `vieval` binary starts from the command line\n * - process-bound startup must stay outside import-safe CLI modules\n *\n * Expects:\n * - `process.argv` contains the raw CLI arguments after the node executable path\n *\n * Returns:\n * - resolves after the selected subcommand finishes and updates `process.exitCode`\n */\nrunTopLevelCli(process.argv.slice(2)).catch((error) => {\n const errorMessage = errorMessageFrom(error) ?? 'Unknown CLI failure.'\n process.stderr.write(`[vieval] ${errorMessage}\\n`)\n process.exitCode = 1\n})\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;AA2BA,eAAe,QAAQ,KAAK,MAAM,EAAE,CAAC,CAAC,OAAO,UAAU;CACrD,MAAM,eAAe,iBAAiB,MAAM,IAAI;AAChD,SAAQ,OAAO,MAAM,YAAY,aAAa,IAAI;AAClD,SAAQ,WAAW;EACnB"}
1
+ {"version":3,"file":"vieval.mjs","names":[],"sources":["../../src/bin/vieval.ts"],"sourcesContent":["#!/usr/bin/env node\n\nimport process from 'node:process'\n\nimport { errorMessageFrom } from '@moeru/std'\n\nimport { runTopLevelCli } from '../cli/index'\n\n/**\n * Bootstraps the published `vieval` executable.\n *\n * Call stack:\n *\n * package manager shim / direct node execution\n * -> {@link runTopLevelCli} (`../cli`)\n * -> subcommand orchestration modules\n *\n * Use when:\n * - the installed `vieval` binary starts from the command line\n * - process-bound startup must stay outside import-safe CLI modules\n *\n * Expects:\n * - `process.argv` contains the raw CLI arguments after the node executable path\n *\n * Returns:\n * - resolves after the selected subcommand finishes and updates `process.exitCode`\n */\nrunTopLevelCli(process.argv.slice(2)).catch((error) => {\n const errorMessage = errorMessageFrom(error) ?? 'Unknown CLI failure.'\n process.stderr.write(`[vieval] ${errorMessage}\\n`)\n process.exitCode = 1\n})\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;AA2BA,eAAe,QAAQ,KAAK,MAAM,CAAC,CAAC,CAAC,CAAC,OAAO,UAAU;CACrD,MAAM,eAAe,iBAAiB,KAAK,KAAK;CAChD,QAAQ,OAAO,MAAM,YAAY,aAAa,GAAG;CACjD,QAAQ,WAAW;AACrB,CAAC"}
@@ -1,7 +1,7 @@
1
1
  //#region src/cli/index.d.ts
2
2
  type Command = 'compare' | 'report' | 'run';
3
3
  interface ParsedTopLevelCliArguments {
4
- command: Command | 'help';
4
+ command: 'help' | Command;
5
5
  commandArgv: string[];
6
6
  }
7
7
  /**
@@ -1,2 +1,2 @@
1
- import { n as runTopLevelCli, t as parseTopLevelCliArguments } from "../cli-DTDgaqeI.mjs";
1
+ import { n as runTopLevelCli, t as parseTopLevelCliArguments } from "../cli-uzS81IPd.mjs";
2
2
  export { parseTopLevelCliArguments, runTopLevelCli };