vieval 0.0.11 → 0.0.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +31 -31
- package/dist/bin/vieval.mjs +1 -1
- package/dist/cli/index.d.mts +1 -1
- package/dist/cli/index.mjs +1 -1
- package/dist/{cli-CHFCF8UR.mjs → cli-uzS81IPd.mjs} +1529 -1529
- package/dist/cli-uzS81IPd.mjs.map +1 -0
- package/dist/config.d.mts +1 -1
- package/dist/core/assertions/index.d.mts +156 -156
- package/dist/core/assertions/index.mjs +82 -82
- package/dist/core/assertions/index.mjs.map +1 -1
- package/dist/core/inference-executors/index.d.mts +37 -37
- package/dist/core/inference-executors/index.mjs +53 -52
- package/dist/core/inference-executors/index.mjs.map +1 -1
- package/dist/core/processors/results/index.d.mts +18 -18
- package/dist/core/processors/results/index.mjs.map +1 -1
- package/dist/core/runner/index.d.mts +2 -2
- package/dist/core/runner/index.mjs +258 -258
- package/dist/core/runner/index.mjs.map +1 -1
- package/dist/core/scheduler/index.d.mts +1 -1
- package/dist/core/scheduler/index.mjs +64 -64
- package/dist/core/scheduler/index.mjs.map +1 -1
- package/dist/{env-bRH0K6fU.d.mts → env-Br6jaWGL.d.mts} +9 -9
- package/dist/{env-BVYeJhGA.mjs → env-egxaJtNn.mjs} +8 -8
- package/dist/env-egxaJtNn.mjs.map +1 -0
- package/dist/{expect-extensions-Mf1sMNBv.mjs → expect-extensions-BKdEPt3h.mjs} +46 -46
- package/dist/expect-extensions-BKdEPt3h.mjs.map +1 -0
- package/dist/expect.mjs +1 -1
- package/dist/{index-CwKBlCG9.d.mts → index-BLIlhiWT.d.mts} +565 -565
- package/dist/{index-Be5I1ZJL.d.mts → index-CIaJClcC.d.mts} +48 -48
- package/dist/index.d.mts +207 -195
- package/dist/index.mjs +147 -147
- package/dist/index.mjs.map +1 -1
- package/dist/models-CaCOUPZw.mjs.map +1 -1
- package/dist/plugins/chat-models/index.d.mts +279 -279
- package/dist/plugins/chat-models/index.mjs +359 -359
- package/dist/plugins/chat-models/index.mjs.map +1 -1
- package/dist/{registry-BSyjwZFx.mjs → registry-BK7k6X81.mjs} +293 -293
- package/dist/registry-BK7k6X81.mjs.map +1 -0
- package/dist/testing/expect-extensions.d.mts +27 -27
- package/dist/testing/expect-extensions.mjs +1 -1
- package/package.json +3 -3
- package/dist/cli-CHFCF8UR.mjs.map +0 -1
- package/dist/env-BVYeJhGA.mjs.map +0 -1
- package/dist/expect-extensions-Mf1sMNBv.mjs.map +0 -1
- package/dist/registry-BSyjwZFx.mjs.map +0 -1
package/README.md
CHANGED
|
@@ -31,6 +31,7 @@ import { defineConfig, loadEnv, requiredEnvFrom } from 'vieval'
|
|
|
31
31
|
import { chatModelFrom, ChatModels } from 'vieval/plugins/chat-models'
|
|
32
32
|
|
|
33
33
|
export default defineConfig({
|
|
34
|
+
env: loadEnv('test', cwd(), ''),
|
|
34
35
|
plugins: [
|
|
35
36
|
ChatModels({
|
|
36
37
|
models: [
|
|
@@ -46,23 +47,22 @@ export default defineConfig({
|
|
|
46
47
|
],
|
|
47
48
|
}),
|
|
48
49
|
],
|
|
49
|
-
env: loadEnv('test', cwd(), ''),
|
|
50
50
|
projects: [
|
|
51
51
|
{
|
|
52
|
+
evalMatrix: {
|
|
53
|
+
extend: {
|
|
54
|
+
rubric: ['default'],
|
|
55
|
+
},
|
|
56
|
+
},
|
|
57
|
+
include: ['evals/*.eval.ts'],
|
|
52
58
|
name: 'default',
|
|
53
59
|
root: '.',
|
|
54
|
-
include: ['evals/*.eval.ts'],
|
|
55
60
|
runMatrix: {
|
|
56
61
|
extend: {
|
|
57
62
|
model: ['agent-mini'],
|
|
58
63
|
scenario: ['baseline'],
|
|
59
64
|
},
|
|
60
65
|
},
|
|
61
|
-
evalMatrix: {
|
|
62
|
-
extend: {
|
|
63
|
-
rubric: ['default'],
|
|
64
|
-
},
|
|
65
|
-
},
|
|
66
66
|
},
|
|
67
67
|
],
|
|
68
68
|
})
|
|
@@ -129,8 +129,8 @@ Use builder style when loading a batch of inputs:
|
|
|
129
129
|
import { describeTask, expect } from 'vieval'
|
|
130
130
|
|
|
131
131
|
const arithmeticCases = [
|
|
132
|
-
{
|
|
133
|
-
{
|
|
132
|
+
{ input: { a: 1, b: 2, expected: 3 }, name: 'addition-small' },
|
|
133
|
+
{ input: { a: 20, b: 22, expected: 42 }, name: 'addition-large' },
|
|
134
134
|
]
|
|
135
135
|
|
|
136
136
|
describeTask('arithmetic-quality', ({ casesFromInputs }) => {
|
|
@@ -191,6 +191,7 @@ import { defineConfig, loadEnv, requiredEnvFrom } from 'vieval'
|
|
|
191
191
|
import { chatModelFrom, ChatModels } from 'vieval/plugins/chat-models'
|
|
192
192
|
|
|
193
193
|
export default defineConfig({
|
|
194
|
+
env: loadEnv('test', cwd(), ''),
|
|
194
195
|
plugins: [
|
|
195
196
|
ChatModels({
|
|
196
197
|
models: [
|
|
@@ -224,12 +225,17 @@ export default defineConfig({
|
|
|
224
225
|
],
|
|
225
226
|
}),
|
|
226
227
|
],
|
|
227
|
-
env: loadEnv('test', cwd(), ''),
|
|
228
228
|
projects: [
|
|
229
229
|
{
|
|
230
|
+
evalMatrix: {
|
|
231
|
+
extend: {
|
|
232
|
+
rubric: ['strict', 'lenient'],
|
|
233
|
+
rubricModel: ['judge-mini', 'judge-large'],
|
|
234
|
+
},
|
|
235
|
+
},
|
|
236
|
+
include: ['evals/*.eval.ts'],
|
|
230
237
|
name: 'chat-evals',
|
|
231
238
|
root: '.',
|
|
232
|
-
include: ['evals/*.eval.ts'],
|
|
233
239
|
runMatrix: {
|
|
234
240
|
extend: {
|
|
235
241
|
model: ['agent-mini', 'agent-large'],
|
|
@@ -237,12 +243,6 @@ export default defineConfig({
|
|
|
237
243
|
scenario: ['baseline', 'stress'],
|
|
238
244
|
},
|
|
239
245
|
},
|
|
240
|
-
evalMatrix: {
|
|
241
|
-
extend: {
|
|
242
|
-
rubric: ['strict', 'lenient'],
|
|
243
|
-
rubricModel: ['judge-mini', 'judge-large'],
|
|
244
|
-
},
|
|
245
|
-
},
|
|
246
246
|
},
|
|
247
247
|
],
|
|
248
248
|
})
|
|
@@ -258,19 +258,6 @@ import { defineConfig } from 'vieval'
|
|
|
258
258
|
export default defineConfig({
|
|
259
259
|
projects: [
|
|
260
260
|
{
|
|
261
|
-
name: 'motion-evals',
|
|
262
|
-
root: '.',
|
|
263
|
-
include: ['evals/*.eval.ts'],
|
|
264
|
-
inferenceExecutors: [{ id: 'motion-engine' }],
|
|
265
|
-
models: [
|
|
266
|
-
{
|
|
267
|
-
id: 'motion-engine:v2',
|
|
268
|
-
aliases: ['motion-default'],
|
|
269
|
-
inferenceExecutor: 'motion-engine',
|
|
270
|
-
inferenceExecutorId: 'motion-engine',
|
|
271
|
-
model: 'v2',
|
|
272
|
-
},
|
|
273
|
-
],
|
|
274
261
|
async executor(task, context) {
|
|
275
262
|
const model = context.models.find(model =>
|
|
276
263
|
model.id === 'motion-default'
|
|
@@ -285,13 +272,26 @@ export default defineConfig({
|
|
|
285
272
|
const success = model.model === 'v2' && task.matrix.run.scenario === 'baseline'
|
|
286
273
|
|
|
287
274
|
return {
|
|
288
|
-
id: task.id,
|
|
289
275
|
entryId: task.entry.id,
|
|
276
|
+
id: task.id,
|
|
290
277
|
inferenceExecutorId: task.inferenceExecutor.id,
|
|
291
278
|
matrix: task.matrix,
|
|
292
279
|
scores: [{ kind: 'exact', score: success ? 1 : 0 }],
|
|
293
280
|
}
|
|
294
281
|
},
|
|
282
|
+
include: ['evals/*.eval.ts'],
|
|
283
|
+
inferenceExecutors: [{ id: 'motion-engine' }],
|
|
284
|
+
models: [
|
|
285
|
+
{
|
|
286
|
+
aliases: ['motion-default'],
|
|
287
|
+
id: 'motion-engine:v2',
|
|
288
|
+
inferenceExecutor: 'motion-engine',
|
|
289
|
+
inferenceExecutorId: 'motion-engine',
|
|
290
|
+
model: 'v2',
|
|
291
|
+
},
|
|
292
|
+
],
|
|
293
|
+
name: 'motion-evals',
|
|
294
|
+
root: '.',
|
|
295
295
|
},
|
|
296
296
|
],
|
|
297
297
|
})
|
package/dist/bin/vieval.mjs
CHANGED
package/dist/cli/index.d.mts
CHANGED
package/dist/cli/index.mjs
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
import { n as runTopLevelCli, t as parseTopLevelCliArguments } from "../cli-
|
|
1
|
+
import { n as runTopLevelCli, t as parseTopLevelCliArguments } from "../cli-uzS81IPd.mjs";
|
|
2
2
|
export { parseTopLevelCliArguments, runTopLevelCli };
|