vieval 0.0.10 → 0.0.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +31 -31
- package/dist/bin/vieval.mjs +1 -1
- package/dist/bin/vieval.mjs.map +1 -1
- package/dist/cli/index.d.mts +1 -1
- package/dist/cli/index.mjs +1 -1
- package/dist/{cli-DTDgaqeI.mjs → cli-uzS81IPd.mjs} +1483 -1483
- package/dist/cli-uzS81IPd.mjs.map +1 -0
- package/dist/config.d.mts +1 -1
- package/dist/config.mjs +1 -1
- package/dist/config.mjs.map +1 -1
- package/dist/core/assertions/index.d.mts +156 -156
- package/dist/core/assertions/index.mjs +82 -82
- package/dist/core/assertions/index.mjs.map +1 -1
- package/dist/core/inference-executors/index.d.mts +37 -37
- package/dist/core/inference-executors/index.mjs +54 -53
- package/dist/core/inference-executors/index.mjs.map +1 -1
- package/dist/core/processors/results/index.d.mts +18 -18
- package/dist/core/processors/results/index.mjs.map +1 -1
- package/dist/core/runner/index.d.mts +2 -2
- package/dist/core/runner/index.mjs +259 -259
- package/dist/core/runner/index.mjs.map +1 -1
- package/dist/core/scheduler/index.d.mts +1 -1
- package/dist/core/scheduler/index.mjs +65 -65
- package/dist/core/scheduler/index.mjs.map +1 -1
- package/dist/{env-DfWZy_n4.d.mts → env-Br6jaWGL.d.mts} +9 -9
- package/dist/{env-nV5rVErX.mjs → env-egxaJtNn.mjs} +8 -8
- package/dist/env-egxaJtNn.mjs.map +1 -0
- package/dist/{expect-extensions-DCSqlneN.mjs → expect-extensions-BKdEPt3h.mjs} +46 -46
- package/dist/expect-extensions-BKdEPt3h.mjs.map +1 -0
- package/dist/expect.d.mts +1 -3
- package/dist/expect.mjs +1 -1
- package/dist/expect.mjs.map +1 -1
- package/dist/{index-D_aMeWqO.d.mts → index-BLIlhiWT.d.mts} +565 -565
- package/dist/{index-Bg0atWBF.d.mts → index-CIaJClcC.d.mts} +48 -48
- package/dist/index.d.mts +208 -197
- package/dist/index.mjs +148 -148
- package/dist/index.mjs.map +1 -1
- package/dist/{models-pBSRUZhY.mjs → models-CaCOUPZw.mjs} +1 -1
- package/dist/{models-pBSRUZhY.mjs.map → models-CaCOUPZw.mjs.map} +1 -1
- package/dist/plugins/chat-models/index.d.mts +279 -279
- package/dist/plugins/chat-models/index.mjs +360 -360
- package/dist/plugins/chat-models/index.mjs.map +1 -1
- package/dist/{queue-DsZQkZO_.mjs → queue-BL86z2W_.mjs} +1 -1
- package/dist/{queue-DsZQkZO_.mjs.map → queue-BL86z2W_.mjs.map} +1 -1
- package/dist/{registry-DMnwE_mY.mjs → registry-BK7k6X81.mjs} +294 -294
- package/dist/registry-BK7k6X81.mjs.map +1 -0
- package/dist/testing/expect-extensions.d.mts +27 -27
- package/dist/testing/expect-extensions.mjs +1 -1
- package/package.json +12 -12
- package/dist/cli-DTDgaqeI.mjs.map +0 -1
- package/dist/env-nV5rVErX.mjs.map +0 -1
- package/dist/expect-extensions-DCSqlneN.mjs.map +0 -1
- package/dist/registry-DMnwE_mY.mjs.map +0 -1
package/README.md
CHANGED
|
@@ -31,6 +31,7 @@ import { defineConfig, loadEnv, requiredEnvFrom } from 'vieval'
|
|
|
31
31
|
import { chatModelFrom, ChatModels } from 'vieval/plugins/chat-models'
|
|
32
32
|
|
|
33
33
|
export default defineConfig({
|
|
34
|
+
env: loadEnv('test', cwd(), ''),
|
|
34
35
|
plugins: [
|
|
35
36
|
ChatModels({
|
|
36
37
|
models: [
|
|
@@ -46,23 +47,22 @@ export default defineConfig({
|
|
|
46
47
|
],
|
|
47
48
|
}),
|
|
48
49
|
],
|
|
49
|
-
env: loadEnv('test', cwd(), ''),
|
|
50
50
|
projects: [
|
|
51
51
|
{
|
|
52
|
+
evalMatrix: {
|
|
53
|
+
extend: {
|
|
54
|
+
rubric: ['default'],
|
|
55
|
+
},
|
|
56
|
+
},
|
|
57
|
+
include: ['evals/*.eval.ts'],
|
|
52
58
|
name: 'default',
|
|
53
59
|
root: '.',
|
|
54
|
-
include: ['evals/*.eval.ts'],
|
|
55
60
|
runMatrix: {
|
|
56
61
|
extend: {
|
|
57
62
|
model: ['agent-mini'],
|
|
58
63
|
scenario: ['baseline'],
|
|
59
64
|
},
|
|
60
65
|
},
|
|
61
|
-
evalMatrix: {
|
|
62
|
-
extend: {
|
|
63
|
-
rubric: ['default'],
|
|
64
|
-
},
|
|
65
|
-
},
|
|
66
66
|
},
|
|
67
67
|
],
|
|
68
68
|
})
|
|
@@ -129,8 +129,8 @@ Use builder style when loading a batch of inputs:
|
|
|
129
129
|
import { describeTask, expect } from 'vieval'
|
|
130
130
|
|
|
131
131
|
const arithmeticCases = [
|
|
132
|
-
{
|
|
133
|
-
{
|
|
132
|
+
{ input: { a: 1, b: 2, expected: 3 }, name: 'addition-small' },
|
|
133
|
+
{ input: { a: 20, b: 22, expected: 42 }, name: 'addition-large' },
|
|
134
134
|
]
|
|
135
135
|
|
|
136
136
|
describeTask('arithmetic-quality', ({ casesFromInputs }) => {
|
|
@@ -191,6 +191,7 @@ import { defineConfig, loadEnv, requiredEnvFrom } from 'vieval'
|
|
|
191
191
|
import { chatModelFrom, ChatModels } from 'vieval/plugins/chat-models'
|
|
192
192
|
|
|
193
193
|
export default defineConfig({
|
|
194
|
+
env: loadEnv('test', cwd(), ''),
|
|
194
195
|
plugins: [
|
|
195
196
|
ChatModels({
|
|
196
197
|
models: [
|
|
@@ -224,12 +225,17 @@ export default defineConfig({
|
|
|
224
225
|
],
|
|
225
226
|
}),
|
|
226
227
|
],
|
|
227
|
-
env: loadEnv('test', cwd(), ''),
|
|
228
228
|
projects: [
|
|
229
229
|
{
|
|
230
|
+
evalMatrix: {
|
|
231
|
+
extend: {
|
|
232
|
+
rubric: ['strict', 'lenient'],
|
|
233
|
+
rubricModel: ['judge-mini', 'judge-large'],
|
|
234
|
+
},
|
|
235
|
+
},
|
|
236
|
+
include: ['evals/*.eval.ts'],
|
|
230
237
|
name: 'chat-evals',
|
|
231
238
|
root: '.',
|
|
232
|
-
include: ['evals/*.eval.ts'],
|
|
233
239
|
runMatrix: {
|
|
234
240
|
extend: {
|
|
235
241
|
model: ['agent-mini', 'agent-large'],
|
|
@@ -237,12 +243,6 @@ export default defineConfig({
|
|
|
237
243
|
scenario: ['baseline', 'stress'],
|
|
238
244
|
},
|
|
239
245
|
},
|
|
240
|
-
evalMatrix: {
|
|
241
|
-
extend: {
|
|
242
|
-
rubric: ['strict', 'lenient'],
|
|
243
|
-
rubricModel: ['judge-mini', 'judge-large'],
|
|
244
|
-
},
|
|
245
|
-
},
|
|
246
246
|
},
|
|
247
247
|
],
|
|
248
248
|
})
|
|
@@ -258,19 +258,6 @@ import { defineConfig } from 'vieval'
|
|
|
258
258
|
export default defineConfig({
|
|
259
259
|
projects: [
|
|
260
260
|
{
|
|
261
|
-
name: 'motion-evals',
|
|
262
|
-
root: '.',
|
|
263
|
-
include: ['evals/*.eval.ts'],
|
|
264
|
-
inferenceExecutors: [{ id: 'motion-engine' }],
|
|
265
|
-
models: [
|
|
266
|
-
{
|
|
267
|
-
id: 'motion-engine:v2',
|
|
268
|
-
aliases: ['motion-default'],
|
|
269
|
-
inferenceExecutor: 'motion-engine',
|
|
270
|
-
inferenceExecutorId: 'motion-engine',
|
|
271
|
-
model: 'v2',
|
|
272
|
-
},
|
|
273
|
-
],
|
|
274
261
|
async executor(task, context) {
|
|
275
262
|
const model = context.models.find(model =>
|
|
276
263
|
model.id === 'motion-default'
|
|
@@ -285,13 +272,26 @@ export default defineConfig({
|
|
|
285
272
|
const success = model.model === 'v2' && task.matrix.run.scenario === 'baseline'
|
|
286
273
|
|
|
287
274
|
return {
|
|
288
|
-
id: task.id,
|
|
289
275
|
entryId: task.entry.id,
|
|
276
|
+
id: task.id,
|
|
290
277
|
inferenceExecutorId: task.inferenceExecutor.id,
|
|
291
278
|
matrix: task.matrix,
|
|
292
279
|
scores: [{ kind: 'exact', score: success ? 1 : 0 }],
|
|
293
280
|
}
|
|
294
281
|
},
|
|
282
|
+
include: ['evals/*.eval.ts'],
|
|
283
|
+
inferenceExecutors: [{ id: 'motion-engine' }],
|
|
284
|
+
models: [
|
|
285
|
+
{
|
|
286
|
+
aliases: ['motion-default'],
|
|
287
|
+
id: 'motion-engine:v2',
|
|
288
|
+
inferenceExecutor: 'motion-engine',
|
|
289
|
+
inferenceExecutorId: 'motion-engine',
|
|
290
|
+
model: 'v2',
|
|
291
|
+
},
|
|
292
|
+
],
|
|
293
|
+
name: 'motion-evals',
|
|
294
|
+
root: '.',
|
|
295
295
|
},
|
|
296
296
|
],
|
|
297
297
|
})
|
package/dist/bin/vieval.mjs
CHANGED
package/dist/bin/vieval.mjs.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"vieval.mjs","names":[],"sources":["../../src/bin/vieval.ts"],"sourcesContent":["#!/usr/bin/env node\n\nimport process from 'node:process'\n\nimport { errorMessageFrom } from '@moeru/std'\n\nimport { runTopLevelCli } from '../cli/index'\n\n/**\n * Bootstraps the published `vieval` executable.\n *\n * Call stack:\n *\n * package manager shim / direct node execution\n * -> {@link runTopLevelCli} (`../cli`)\n * -> subcommand orchestration modules\n *\n * Use when:\n * - the installed `vieval` binary starts from the command line\n * - process-bound startup must stay outside import-safe CLI modules\n *\n * Expects:\n * - `process.argv` contains the raw CLI arguments after the node executable path\n *\n * Returns:\n * - resolves after the selected subcommand finishes and updates `process.exitCode`\n */\nrunTopLevelCli(process.argv.slice(2)).catch((error) => {\n const errorMessage = errorMessageFrom(error) ?? 'Unknown CLI failure.'\n process.stderr.write(`[vieval] ${errorMessage}\\n`)\n process.exitCode = 1\n})\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;AA2BA,eAAe,QAAQ,KAAK,MAAM,
|
|
1
|
+
{"version":3,"file":"vieval.mjs","names":[],"sources":["../../src/bin/vieval.ts"],"sourcesContent":["#!/usr/bin/env node\n\nimport process from 'node:process'\n\nimport { errorMessageFrom } from '@moeru/std'\n\nimport { runTopLevelCli } from '../cli/index'\n\n/**\n * Bootstraps the published `vieval` executable.\n *\n * Call stack:\n *\n * package manager shim / direct node execution\n * -> {@link runTopLevelCli} (`../cli`)\n * -> subcommand orchestration modules\n *\n * Use when:\n * - the installed `vieval` binary starts from the command line\n * - process-bound startup must stay outside import-safe CLI modules\n *\n * Expects:\n * - `process.argv` contains the raw CLI arguments after the node executable path\n *\n * Returns:\n * - resolves after the selected subcommand finishes and updates `process.exitCode`\n */\nrunTopLevelCli(process.argv.slice(2)).catch((error) => {\n const errorMessage = errorMessageFrom(error) ?? 'Unknown CLI failure.'\n process.stderr.write(`[vieval] ${errorMessage}\\n`)\n process.exitCode = 1\n})\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;AA2BA,eAAe,QAAQ,KAAK,MAAM,CAAC,CAAC,CAAC,CAAC,OAAO,UAAU;CACrD,MAAM,eAAe,iBAAiB,KAAK,KAAK;CAChD,QAAQ,OAAO,MAAM,YAAY,aAAa,GAAG;CACjD,QAAQ,WAAW;AACrB,CAAC"}
|
package/dist/cli/index.d.mts
CHANGED
package/dist/cli/index.mjs
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
import { n as runTopLevelCli, t as parseTopLevelCliArguments } from "../cli-
|
|
1
|
+
import { n as runTopLevelCli, t as parseTopLevelCliArguments } from "../cli-uzS81IPd.mjs";
|
|
2
2
|
export { parseTopLevelCliArguments, runTopLevelCli };
|