vieval 0.0.7 → 0.0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +24 -2
- package/dist/bin/vieval.mjs +1 -1
- package/dist/cli/index.mjs +1 -1
- package/dist/{cli-ImxGpoYQ.mjs → cli-Dao25VxV.mjs} +2 -2
- package/dist/cli-Dao25VxV.mjs.map +1 -0
- package/dist/config.d.mts +1 -1
- package/dist/config.mjs +1 -1
- package/dist/core/assertions/index.d.mts +1 -1
- package/dist/core/inference-executors/index.d.mts +1 -1
- package/dist/core/inference-executors/index.mjs +10 -4
- package/dist/core/inference-executors/index.mjs.map +1 -1
- package/dist/core/processors/results/index.d.mts +1 -1
- package/dist/core/runner/index.d.mts +2 -2
- package/dist/core/runner/index.mjs +6 -40
- package/dist/core/runner/index.mjs.map +1 -1
- package/dist/{env-BeHv_5mo.d.mts → env-DfWZy_n4.d.mts} +14 -9
- package/dist/env-nV5rVErX.mjs +35 -0
- package/dist/env-nV5rVErX.mjs.map +1 -0
- package/dist/{index-5R1_k2nv.d.mts → index-BkjyCInx.d.mts} +12 -37
- package/dist/index.d.mts +5 -5
- package/dist/index.mjs +1 -1
- package/dist/{models-DIGdOUpJ.mjs → models-pBSRUZhY.mjs} +1 -1
- package/dist/{models-DIGdOUpJ.mjs.map → models-pBSRUZhY.mjs.map} +1 -1
- package/dist/plugins/chat-models/index.d.mts +63 -6
- package/dist/plugins/chat-models/index.mjs +74 -18
- package/dist/plugins/chat-models/index.mjs.map +1 -1
- package/dist/registry-BHGMxjpA.mjs.map +1 -1
- package/package.json +1 -1
- package/dist/cli-ImxGpoYQ.mjs.map +0 -1
- package/dist/env--94B0UtW.mjs +0 -41
- package/dist/env--94B0UtW.mjs.map +0 -1
package/README.md
CHANGED
|
@@ -25,7 +25,9 @@ Vitest-style evaluation framework for agents, models, and task pipelines.
|
|
|
25
25
|
|
|
26
26
|
```ts
|
|
27
27
|
// vieval.config.ts
|
|
28
|
-
import {
|
|
28
|
+
import { cwd } from 'node:process'
|
|
29
|
+
|
|
30
|
+
import { defineConfig, loadEnv, requiredEnvFrom } from 'vieval'
|
|
29
31
|
import { chatModelFrom, ChatModels } from 'vieval/plugins/chat-models'
|
|
30
32
|
|
|
31
33
|
export default defineConfig({
|
|
@@ -34,12 +36,17 @@ export default defineConfig({
|
|
|
34
36
|
models: [
|
|
35
37
|
chatModelFrom({
|
|
36
38
|
aliases: ['agent-mini', 'judge-mini'],
|
|
39
|
+
apiKey: config => requiredEnvFrom(config.env, {
|
|
40
|
+
name: 'OPENAI_API_KEY',
|
|
41
|
+
type: 'string',
|
|
42
|
+
}),
|
|
37
43
|
inferenceExecutor: 'openai',
|
|
38
44
|
model: 'gpt-4.1-mini',
|
|
39
45
|
}),
|
|
40
46
|
],
|
|
41
47
|
}),
|
|
42
48
|
],
|
|
49
|
+
env: loadEnv('test', cwd(), ''),
|
|
43
50
|
projects: [
|
|
44
51
|
{
|
|
45
52
|
name: 'default',
|
|
@@ -162,7 +169,9 @@ Each scheduled task receives stable matrix metadata:
|
|
|
162
169
|
## Config Example
|
|
163
170
|
|
|
164
171
|
```ts
|
|
165
|
-
import {
|
|
172
|
+
import { cwd } from 'node:process'
|
|
173
|
+
|
|
174
|
+
import { defineConfig, loadEnv, requiredEnvFrom } from 'vieval'
|
|
166
175
|
import { chatModelFrom, ChatModels } from 'vieval/plugins/chat-models'
|
|
167
176
|
|
|
168
177
|
export default defineConfig({
|
|
@@ -171,22 +180,35 @@ export default defineConfig({
|
|
|
171
180
|
models: [
|
|
172
181
|
chatModelFrom({
|
|
173
182
|
aliases: ['agent-mini', 'judge-mini'],
|
|
183
|
+
apiKey: config => requiredEnvFrom(config.env, {
|
|
184
|
+
name: 'OPENAI_API_KEY',
|
|
185
|
+
type: 'string',
|
|
186
|
+
}),
|
|
174
187
|
inferenceExecutor: 'openai',
|
|
175
188
|
model: 'gpt-4.1-mini',
|
|
176
189
|
}),
|
|
177
190
|
chatModelFrom({
|
|
178
191
|
aliases: ['agent-large', 'judge-large'],
|
|
192
|
+
apiKey: config => requiredEnvFrom(config.env, {
|
|
193
|
+
name: 'OPENAI_API_KEY',
|
|
194
|
+
type: 'string',
|
|
195
|
+
}),
|
|
179
196
|
inferenceExecutor: 'openai',
|
|
180
197
|
model: 'gpt-4.1',
|
|
181
198
|
}),
|
|
182
199
|
chatModelFrom({
|
|
183
200
|
aliases: ['agent-openrouter-mini'],
|
|
201
|
+
apiKey: config => requiredEnvFrom(config.env, {
|
|
202
|
+
name: 'OPENROUTER_API_KEY',
|
|
203
|
+
type: 'string',
|
|
204
|
+
}),
|
|
184
205
|
inferenceExecutor: 'openrouter',
|
|
185
206
|
model: 'openai/gpt-4.1-mini',
|
|
186
207
|
}),
|
|
187
208
|
],
|
|
188
209
|
}),
|
|
189
210
|
],
|
|
211
|
+
env: loadEnv('test', cwd(), ''),
|
|
190
212
|
projects: [
|
|
191
213
|
{
|
|
192
214
|
name: 'chat-evals',
|
package/dist/bin/vieval.mjs
CHANGED
package/dist/cli/index.mjs
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
import { n as runTopLevelCli, t as parseTopLevelCliArguments } from "../cli-
|
|
1
|
+
import { n as runTopLevelCli, t as parseTopLevelCliArguments } from "../cli-Dao25VxV.mjs";
|
|
2
2
|
export { parseTopLevelCliArguments, runTopLevelCli };
|
|
@@ -2190,7 +2190,7 @@ function createAutoTaskExecutor(reporter, projectName, recordEvent, projectCaseC
|
|
|
2190
2190
|
if (taskDefinition == null) throw new Error(`Missing eval task definition for entry "${task.entry.id}".`);
|
|
2191
2191
|
const output = await taskDefinition.run({
|
|
2192
2192
|
cache: context.cache,
|
|
2193
|
-
|
|
2193
|
+
models: context.models,
|
|
2194
2194
|
reporterHooks: resolveTaskReporterHooks(task, context, reporter, projectName, recordEvent, projectCaseCounters, projectCaseFailures, vitestCompatReporter),
|
|
2195
2195
|
task,
|
|
2196
2196
|
telemetry: context.telemetry
|
|
@@ -3842,4 +3842,4 @@ async function runTopLevelCli(argv) {
|
|
|
3842
3842
|
//#endregion
|
|
3843
3843
|
export { runTopLevelCli as n, parseTopLevelCliArguments as t };
|
|
3844
3844
|
|
|
3845
|
-
//# sourceMappingURL=cli-
|
|
3845
|
+
//# sourceMappingURL=cli-Dao25VxV.mjs.map
|