vieval 0.0.10 → 0.0.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +31 -31
- package/dist/bin/vieval.mjs +1 -1
- package/dist/bin/vieval.mjs.map +1 -1
- package/dist/cli/index.d.mts +1 -1
- package/dist/cli/index.mjs +1 -1
- package/dist/{cli-DTDgaqeI.mjs → cli-uzS81IPd.mjs} +1483 -1483
- package/dist/cli-uzS81IPd.mjs.map +1 -0
- package/dist/config.d.mts +1 -1
- package/dist/config.mjs +1 -1
- package/dist/config.mjs.map +1 -1
- package/dist/core/assertions/index.d.mts +156 -156
- package/dist/core/assertions/index.mjs +82 -82
- package/dist/core/assertions/index.mjs.map +1 -1
- package/dist/core/inference-executors/index.d.mts +37 -37
- package/dist/core/inference-executors/index.mjs +54 -53
- package/dist/core/inference-executors/index.mjs.map +1 -1
- package/dist/core/processors/results/index.d.mts +18 -18
- package/dist/core/processors/results/index.mjs.map +1 -1
- package/dist/core/runner/index.d.mts +2 -2
- package/dist/core/runner/index.mjs +259 -259
- package/dist/core/runner/index.mjs.map +1 -1
- package/dist/core/scheduler/index.d.mts +1 -1
- package/dist/core/scheduler/index.mjs +65 -65
- package/dist/core/scheduler/index.mjs.map +1 -1
- package/dist/{env-DfWZy_n4.d.mts → env-Br6jaWGL.d.mts} +9 -9
- package/dist/{env-nV5rVErX.mjs → env-egxaJtNn.mjs} +8 -8
- package/dist/env-egxaJtNn.mjs.map +1 -0
- package/dist/{expect-extensions-DCSqlneN.mjs → expect-extensions-BKdEPt3h.mjs} +46 -46
- package/dist/expect-extensions-BKdEPt3h.mjs.map +1 -0
- package/dist/expect.d.mts +1 -3
- package/dist/expect.mjs +1 -1
- package/dist/expect.mjs.map +1 -1
- package/dist/{index-D_aMeWqO.d.mts → index-BLIlhiWT.d.mts} +565 -565
- package/dist/{index-Bg0atWBF.d.mts → index-CIaJClcC.d.mts} +48 -48
- package/dist/index.d.mts +208 -197
- package/dist/index.mjs +148 -148
- package/dist/index.mjs.map +1 -1
- package/dist/{models-pBSRUZhY.mjs → models-CaCOUPZw.mjs} +1 -1
- package/dist/{models-pBSRUZhY.mjs.map → models-CaCOUPZw.mjs.map} +1 -1
- package/dist/plugins/chat-models/index.d.mts +279 -279
- package/dist/plugins/chat-models/index.mjs +360 -360
- package/dist/plugins/chat-models/index.mjs.map +1 -1
- package/dist/{queue-DsZQkZO_.mjs → queue-BL86z2W_.mjs} +1 -1
- package/dist/{queue-DsZQkZO_.mjs.map → queue-BL86z2W_.mjs.map} +1 -1
- package/dist/{registry-DMnwE_mY.mjs → registry-BK7k6X81.mjs} +294 -294
- package/dist/registry-BK7k6X81.mjs.map +1 -0
- package/dist/testing/expect-extensions.d.mts +27 -27
- package/dist/testing/expect-extensions.mjs +1 -1
- package/package.json +12 -12
- package/dist/cli-DTDgaqeI.mjs.map +0 -1
- package/dist/env-nV5rVErX.mjs.map +0 -1
- package/dist/expect-extensions-DCSqlneN.mjs.map +0 -1
- package/dist/registry-DMnwE_mY.mjs.map +0 -1
package/dist/index.d.mts
CHANGED
|
@@ -1,26 +1,24 @@
|
|
|
1
|
-
import { $ as InferenceExecutor, D as TaskRunContext, I as TaskExecutionContext, J as RunResult, O as TaskRunOutput, R as ModelDefinition, S as TaskConcurrencyConfig, X as RunScoreKind, f as MatrixDefinition, k as TelemetryAttributeValue, o as CliReportingConfig, p as MatrixLayer, rt as ScheduledTask, t as ConfigHookPlugin, w as TaskExecutionPolicy } from "./index-
|
|
2
|
-
import { a as requiredEnvFrom } from "./env-
|
|
1
|
+
import { $ as InferenceExecutor, D as TaskRunContext, I as TaskExecutionContext, J as RunResult, O as TaskRunOutput, R as ModelDefinition, S as TaskConcurrencyConfig, X as RunScoreKind, f as MatrixDefinition, k as TelemetryAttributeValue, o as CliReportingConfig, p as MatrixLayer, rt as ScheduledTask, t as ConfigHookPlugin, w as TaskExecutionPolicy } from "./index-BLIlhiWT.mjs";
|
|
2
|
+
import { a as requiredEnvFrom } from "./env-Br6jaWGL.mjs";
|
|
3
3
|
import { expect } from "./expect.mjs";
|
|
4
|
-
import * as _$c12 from "c12";
|
|
5
4
|
|
|
6
5
|
//#region src/cli/reporters/vitest-compat-reporter.d.ts
|
|
7
|
-
type Awaitable<T> = T | Promise<T>;
|
|
8
6
|
/**
|
|
9
|
-
* Normalized
|
|
7
|
+
* Normalized test-case-like entity delivered to vitest-compatible reporter hooks.
|
|
10
8
|
*/
|
|
11
|
-
interface
|
|
9
|
+
interface VievalVitestCompatCase {
|
|
12
10
|
id: string;
|
|
11
|
+
module: VievalVitestCompatModule;
|
|
13
12
|
name: string;
|
|
14
|
-
|
|
13
|
+
state: 'failed' | 'passed' | 'pending' | 'skipped';
|
|
15
14
|
}
|
|
16
15
|
/**
|
|
17
|
-
* Normalized
|
|
16
|
+
* Normalized module-like entity delivered to vitest-compatible reporter hooks.
|
|
18
17
|
*/
|
|
19
|
-
interface
|
|
18
|
+
interface VievalVitestCompatModule {
|
|
20
19
|
id: string;
|
|
21
20
|
name: string;
|
|
22
|
-
|
|
23
|
-
state: 'failed' | 'passed' | 'pending' | 'skipped';
|
|
21
|
+
projectName: string;
|
|
24
22
|
}
|
|
25
23
|
/**
|
|
26
24
|
* Supported vitest-style reporter lifecycle hooks.
|
|
@@ -48,6 +46,7 @@ interface VievalVitestCompatReporter {
|
|
|
48
46
|
projectName: string;
|
|
49
47
|
}[]) => Awaitable<void>;
|
|
50
48
|
}
|
|
49
|
+
type VievalVitestCompatReporterReference = readonly [VievalVitestCompatReporterValue, unknown?] | VievalVitestCompatReporterValue;
|
|
51
50
|
/**
|
|
52
51
|
* Supported project reporter references.
|
|
53
52
|
*
|
|
@@ -59,13 +58,88 @@ interface VievalVitestCompatReporter {
|
|
|
59
58
|
* `https://github.com/vitest-dev/vitest/blob/b865b4d83d1e7874607ba1b2d84b9e2d135ecd33/packages/vitest/src/node/config/resolveConfig.ts#L674-L713`
|
|
60
59
|
*/
|
|
61
60
|
type VievalVitestCompatReporterValue = string | VievalVitestCompatReporter;
|
|
62
|
-
type
|
|
61
|
+
type Awaitable<T> = Promise<T> | T;
|
|
62
|
+
/**
|
|
63
|
+
* Creates a project-level vitest-compatible reporter bridge.
|
|
64
|
+
*
|
|
65
|
+
* Use when:
|
|
66
|
+
* - `vieval` should reuse vitest-like reporter callbacks without changing CLI output contracts
|
|
67
|
+
*
|
|
68
|
+
* Expects:
|
|
69
|
+
* - references point to modules whose default export is a reporter instance or constructor
|
|
70
|
+
*
|
|
71
|
+
* Returns:
|
|
72
|
+
* - `null` when no reporter references are configured
|
|
73
|
+
*/
|
|
63
74
|
//#endregion
|
|
64
75
|
//#region src/cli/config.d.ts
|
|
65
76
|
/**
|
|
66
|
-
*
|
|
77
|
+
* Benchmark identity and shared cache namespace.
|
|
67
78
|
*/
|
|
68
|
-
|
|
79
|
+
interface CliComparisonBenchmarkConfig {
|
|
80
|
+
/**
|
|
81
|
+
* Benchmark identifier used in report artifacts.
|
|
82
|
+
*/
|
|
83
|
+
id: string;
|
|
84
|
+
/**
|
|
85
|
+
* Shared cache namespace reused across method runs.
|
|
86
|
+
*/
|
|
87
|
+
sharedCaseNamespace: string;
|
|
88
|
+
}
|
|
89
|
+
/**
|
|
90
|
+
* One comparison entry loaded by `vieval compare`.
|
|
91
|
+
*/
|
|
92
|
+
interface CliComparisonConfig {
|
|
93
|
+
/**
|
|
94
|
+
* Benchmark metadata for reporting and shared cache coordination.
|
|
95
|
+
*/
|
|
96
|
+
benchmark: CliComparisonBenchmarkConfig;
|
|
97
|
+
/**
|
|
98
|
+
* Optional workspace exclude glob(s), also relative to config directory.
|
|
99
|
+
*/
|
|
100
|
+
excludesWorkspaces?: string | string[];
|
|
101
|
+
/**
|
|
102
|
+
* Comparison id selected by `--comparison`.
|
|
103
|
+
*/
|
|
104
|
+
id: string;
|
|
105
|
+
/**
|
|
106
|
+
* Optional workspace glob(s) discovered relative to config directory.
|
|
107
|
+
*/
|
|
108
|
+
includesWorkspaces?: string | string[];
|
|
109
|
+
/**
|
|
110
|
+
* Optional explicit method list.
|
|
111
|
+
*/
|
|
112
|
+
methods?: CliComparisonMethodConfig[];
|
|
113
|
+
}
|
|
114
|
+
/**
|
|
115
|
+
* One explicit comparison method descriptor.
|
|
116
|
+
*/
|
|
117
|
+
interface CliComparisonMethodConfig {
|
|
118
|
+
/**
|
|
119
|
+
* Optional explicit config file path for this workspace.
|
|
120
|
+
*/
|
|
121
|
+
configFilePath?: string;
|
|
122
|
+
/**
|
|
123
|
+
* Method identifier shown in compare reports.
|
|
124
|
+
*/
|
|
125
|
+
id: string;
|
|
126
|
+
/**
|
|
127
|
+
* Project name to execute inside workspace config.
|
|
128
|
+
*/
|
|
129
|
+
project: string;
|
|
130
|
+
/**
|
|
131
|
+
* Workspace path containing this method's `vieval.config.*`.
|
|
132
|
+
*/
|
|
133
|
+
workspace: string;
|
|
134
|
+
}
|
|
135
|
+
/**
|
|
136
|
+
* Comparison mode config for `vieval compare`.
|
|
137
|
+
*/
|
|
138
|
+
interface CliComparisonModeConfig extends CliConfigBase {
|
|
139
|
+
comparisons: CliComparisonConfig[];
|
|
140
|
+
projects?: never;
|
|
141
|
+
workspaces?: never;
|
|
142
|
+
}
|
|
69
143
|
/**
|
|
70
144
|
* Concurrency limits that can be declared in CLI-facing config.
|
|
71
145
|
*
|
|
@@ -81,9 +155,13 @@ type CliConfigPlugin = ConfigHookPlugin<CliConfig>;
|
|
|
81
155
|
*/
|
|
82
156
|
interface CliConcurrencyConfig {
|
|
83
157
|
/**
|
|
84
|
-
*
|
|
158
|
+
* Attempt-level concurrency cap.
|
|
85
159
|
*/
|
|
86
|
-
|
|
160
|
+
attempt?: number;
|
|
161
|
+
/**
|
|
162
|
+
* Case-level concurrency cap.
|
|
163
|
+
*/
|
|
164
|
+
case?: number;
|
|
87
165
|
/**
|
|
88
166
|
* Project-level concurrency cap.
|
|
89
167
|
*/
|
|
@@ -93,40 +171,56 @@ interface CliConcurrencyConfig {
|
|
|
93
171
|
*/
|
|
94
172
|
task?: number;
|
|
95
173
|
/**
|
|
96
|
-
*
|
|
97
|
-
*/
|
|
98
|
-
attempt?: number;
|
|
99
|
-
/**
|
|
100
|
-
* Case-level concurrency cap.
|
|
174
|
+
* Workspace-level concurrency cap.
|
|
101
175
|
*/
|
|
102
|
-
|
|
176
|
+
workspace?: number;
|
|
103
177
|
}
|
|
178
|
+
/**
|
|
179
|
+
* Top-level CLI config loaded from `vieval.config.*`.
|
|
180
|
+
*
|
|
181
|
+
* Exactly one top-level mode is allowed:
|
|
182
|
+
* - `projects`
|
|
183
|
+
* - `workspaces`
|
|
184
|
+
* - `comparisons`
|
|
185
|
+
*/
|
|
186
|
+
type CliConfig = CliComparisonModeConfig | CliProjectModeConfig | CliWorkspaceModeConfig;
|
|
187
|
+
/**
|
|
188
|
+
* CLI plugin shape bound to the full CLI config object.
|
|
189
|
+
*/
|
|
190
|
+
type CliConfigPlugin = ConfigHookPlugin<CliConfig>;
|
|
104
191
|
/**
|
|
105
192
|
* Defines one project block for `vieval run`.
|
|
106
193
|
*/
|
|
107
194
|
interface CliProjectConfig {
|
|
108
195
|
/**
|
|
109
|
-
*
|
|
110
|
-
*/
|
|
111
|
-
name: string;
|
|
112
|
-
/**
|
|
113
|
-
* Project root used for include/exclude glob matching.
|
|
196
|
+
* Optional project-scoped concurrency overrides.
|
|
114
197
|
*
|
|
115
|
-
* @default
|
|
198
|
+
* @default inherited from top-level or CLI execution settings
|
|
116
199
|
*/
|
|
117
|
-
|
|
200
|
+
concurrency?: Omit<CliConcurrencyConfig, 'workspace'>;
|
|
118
201
|
/**
|
|
119
|
-
*
|
|
120
|
-
*
|
|
121
|
-
* @default Common eval file globs for TypeScript and JavaScript module formats.
|
|
202
|
+
* Optional eval-time matrix dimensions.
|
|
122
203
|
*/
|
|
123
|
-
|
|
204
|
+
evalMatrix?: MatrixDefinition | MatrixLayer;
|
|
124
205
|
/**
|
|
125
206
|
* Glob patterns excluded from discovery.
|
|
126
207
|
*
|
|
127
208
|
* @default Common exclusion globs for dependencies, build output, and VCS directories.
|
|
128
209
|
*/
|
|
129
210
|
exclude?: string[];
|
|
211
|
+
/**
|
|
212
|
+
* Optional task executor.
|
|
213
|
+
*
|
|
214
|
+
* Use when this project should execute live inferenceExecutor requests.
|
|
215
|
+
* If omitted, `vieval run` performs collection + scheduling only.
|
|
216
|
+
*/
|
|
217
|
+
executor?: (task: ScheduledTask, context: CliProjectExecutorContext) => Promise<RunResult>;
|
|
218
|
+
/**
|
|
219
|
+
* Glob patterns for eval file discovery.
|
|
220
|
+
*
|
|
221
|
+
* @default Common eval file globs for TypeScript and JavaScript module formats.
|
|
222
|
+
*/
|
|
223
|
+
include?: string[];
|
|
130
224
|
/**
|
|
131
225
|
* Providers expanded by scheduler.
|
|
132
226
|
*
|
|
@@ -143,26 +237,9 @@ interface CliProjectConfig {
|
|
|
143
237
|
*/
|
|
144
238
|
models?: ModelDefinition[];
|
|
145
239
|
/**
|
|
146
|
-
*
|
|
147
|
-
*/
|
|
148
|
-
runMatrix?: MatrixDefinition | MatrixLayer;
|
|
149
|
-
/**
|
|
150
|
-
* Optional eval-time matrix dimensions.
|
|
151
|
-
*/
|
|
152
|
-
evalMatrix?: MatrixDefinition | MatrixLayer;
|
|
153
|
-
/**
|
|
154
|
-
* Optional project-scoped concurrency overrides.
|
|
155
|
-
*
|
|
156
|
-
* @default inherited from top-level or CLI execution settings
|
|
157
|
-
*/
|
|
158
|
-
concurrency?: Omit<CliConcurrencyConfig, 'workspace'>;
|
|
159
|
-
/**
|
|
160
|
-
* Optional task executor.
|
|
161
|
-
*
|
|
162
|
-
* Use when this project should execute live inferenceExecutor requests.
|
|
163
|
-
* If omitted, `vieval run` performs collection + scheduling only.
|
|
240
|
+
* Project label used in summary output.
|
|
164
241
|
*/
|
|
165
|
-
|
|
242
|
+
name: string;
|
|
166
243
|
/**
|
|
167
244
|
* Optional project-local plugins.
|
|
168
245
|
*/
|
|
@@ -176,107 +253,73 @@ interface CliProjectConfig {
|
|
|
176
253
|
* @default []
|
|
177
254
|
*/
|
|
178
255
|
reporters?: VievalVitestCompatReporterReference[];
|
|
179
|
-
}
|
|
180
|
-
/**
|
|
181
|
-
* One workspace descriptor for workspace-mode configs.
|
|
182
|
-
*/
|
|
183
|
-
interface CliWorkspaceConfig {
|
|
184
256
|
/**
|
|
185
|
-
*
|
|
257
|
+
* Project root used for include/exclude glob matching.
|
|
258
|
+
*
|
|
259
|
+
* @default process cwd
|
|
186
260
|
*/
|
|
187
|
-
|
|
261
|
+
root?: string;
|
|
188
262
|
/**
|
|
189
|
-
*
|
|
263
|
+
* Optional run-time matrix dimensions.
|
|
190
264
|
*/
|
|
191
|
-
|
|
265
|
+
runMatrix?: MatrixDefinition | MatrixLayer;
|
|
192
266
|
}
|
|
193
267
|
/**
|
|
194
|
-
*
|
|
268
|
+
* Execution context exposed to project-level `executor` implementations.
|
|
269
|
+
*
|
|
270
|
+
* Use when:
|
|
271
|
+
* - a project executor needs task-scoped models plus case reporter hooks
|
|
272
|
+
* - custom scheduling logic wants the same hook shape as `TaskRunContext`
|
|
273
|
+
*
|
|
274
|
+
* Expects:
|
|
275
|
+
* - `models` exposes configured model registrations for plugin helpers
|
|
276
|
+
* - `reporterHooks` follows `TaskRunContext['reporterHooks']`
|
|
277
|
+
* - `telemetry` follows `TaskRunContext['telemetry']`
|
|
278
|
+
* - `runtimeConcurrency` follows `TaskRunContext['runtimeConcurrency']`
|
|
195
279
|
*/
|
|
196
|
-
interface
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
id: string;
|
|
201
|
-
/**
|
|
202
|
-
* Workspace path containing this method's `vieval.config.*`.
|
|
203
|
-
*/
|
|
204
|
-
workspace: string;
|
|
205
|
-
/**
|
|
206
|
-
* Project name to execute inside workspace config.
|
|
207
|
-
*/
|
|
208
|
-
project: string;
|
|
209
|
-
/**
|
|
210
|
-
* Optional explicit config file path for this workspace.
|
|
211
|
-
*/
|
|
212
|
-
configFilePath?: string;
|
|
280
|
+
interface CliProjectExecutorContext extends TaskExecutionContext {
|
|
281
|
+
reporterHooks?: TaskRunContext['reporterHooks'];
|
|
282
|
+
runtimeConcurrency?: TaskRunContext['runtimeConcurrency'];
|
|
283
|
+
telemetry?: TaskRunContext['telemetry'];
|
|
213
284
|
}
|
|
214
285
|
/**
|
|
215
|
-
*
|
|
286
|
+
* Project mode config for `vieval run`.
|
|
216
287
|
*/
|
|
217
|
-
interface
|
|
218
|
-
|
|
219
|
-
* Benchmark identifier used in report artifacts.
|
|
220
|
-
*/
|
|
221
|
-
id: string;
|
|
288
|
+
interface CliProjectModeConfig extends CliConfigBase {
|
|
289
|
+
comparisons?: never;
|
|
222
290
|
/**
|
|
223
|
-
*
|
|
291
|
+
* Project list expanded by `vieval run`.
|
|
292
|
+
*
|
|
293
|
+
* @default [{ name: 'default' }]
|
|
224
294
|
*/
|
|
225
|
-
|
|
295
|
+
projects?: CliProjectConfig[];
|
|
296
|
+
workspaces?: never;
|
|
226
297
|
}
|
|
227
298
|
/**
|
|
228
|
-
* One
|
|
299
|
+
* One workspace descriptor for workspace-mode configs.
|
|
229
300
|
*/
|
|
230
|
-
interface
|
|
301
|
+
interface CliWorkspaceConfig {
|
|
231
302
|
/**
|
|
232
|
-
*
|
|
303
|
+
* Workspace identifier.
|
|
233
304
|
*/
|
|
234
305
|
id: string;
|
|
235
306
|
/**
|
|
236
|
-
*
|
|
237
|
-
*/
|
|
238
|
-
benchmark: CliComparisonBenchmarkConfig;
|
|
239
|
-
/**
|
|
240
|
-
* Optional explicit method list.
|
|
241
|
-
*/
|
|
242
|
-
methods?: CliComparisonMethodConfig[];
|
|
243
|
-
/**
|
|
244
|
-
* Optional workspace glob(s) discovered relative to config directory.
|
|
245
|
-
*/
|
|
246
|
-
includesWorkspaces?: string | string[];
|
|
247
|
-
/**
|
|
248
|
-
* Optional workspace exclude glob(s), also relative to config directory.
|
|
307
|
+
* Workspace root path.
|
|
249
308
|
*/
|
|
250
|
-
|
|
309
|
+
root: string;
|
|
251
310
|
}
|
|
252
311
|
/**
|
|
253
|
-
*
|
|
254
|
-
*
|
|
255
|
-
* Use when:
|
|
256
|
-
* - a project executor needs task-scoped models plus case reporter hooks
|
|
257
|
-
* - custom scheduling logic wants the same hook shape as `TaskRunContext`
|
|
258
|
-
*
|
|
259
|
-
* Expects:
|
|
260
|
-
* - `models` exposes configured model registrations for plugin helpers
|
|
261
|
-
* - `reporterHooks` follows `TaskRunContext['reporterHooks']`
|
|
262
|
-
* - `telemetry` follows `TaskRunContext['telemetry']`
|
|
263
|
-
* - `runtimeConcurrency` follows `TaskRunContext['runtimeConcurrency']`
|
|
312
|
+
* Workspace mode config placeholder for future workspace orchestration.
|
|
264
313
|
*/
|
|
265
|
-
interface
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
314
|
+
interface CliWorkspaceModeConfig extends CliConfigBase {
|
|
315
|
+
comparisons?: never;
|
|
316
|
+
projects?: never;
|
|
317
|
+
workspaces: CliWorkspaceConfig[];
|
|
269
318
|
}
|
|
270
319
|
/**
|
|
271
320
|
* Top-level CLI config loaded from `vieval.config.*`.
|
|
272
321
|
*/
|
|
273
322
|
interface CliConfigBase {
|
|
274
|
-
/**
|
|
275
|
-
* Global model definitions inherited by projects.
|
|
276
|
-
*
|
|
277
|
-
* @default []
|
|
278
|
-
*/
|
|
279
|
-
models?: ModelDefinition[];
|
|
280
323
|
/**
|
|
281
324
|
* Global concurrency defaults inherited by projects and tasks.
|
|
282
325
|
*
|
|
@@ -290,6 +333,22 @@ interface CliConfigBase {
|
|
|
290
333
|
* @default undefined
|
|
291
334
|
*/
|
|
292
335
|
concurrency?: CliConcurrencyConfig;
|
|
336
|
+
/**
|
|
337
|
+
* Environment variables injected into `process.env` during `vieval run`.
|
|
338
|
+
*
|
|
339
|
+
* Use when:
|
|
340
|
+
* - eval tasks depend on runtime env values (for example inferenceExecutor API keys)
|
|
341
|
+
* - config wants deterministic env values without shell-level exports
|
|
342
|
+
*
|
|
343
|
+
* @default {}
|
|
344
|
+
*/
|
|
345
|
+
env?: NodeJS.ProcessEnv;
|
|
346
|
+
/**
|
|
347
|
+
* Global model definitions inherited by projects.
|
|
348
|
+
*
|
|
349
|
+
* @default []
|
|
350
|
+
*/
|
|
351
|
+
models?: ModelDefinition[];
|
|
293
352
|
/**
|
|
294
353
|
* Global config plugins.
|
|
295
354
|
*
|
|
@@ -302,16 +361,6 @@ interface CliConfigBase {
|
|
|
302
361
|
* @default []
|
|
303
362
|
*/
|
|
304
363
|
reporters?: VievalVitestCompatReporterReference[];
|
|
305
|
-
/**
|
|
306
|
-
* Environment variables injected into `process.env` during `vieval run`.
|
|
307
|
-
*
|
|
308
|
-
* Use when:
|
|
309
|
-
* - eval tasks depend on runtime env values (for example inferenceExecutor API keys)
|
|
310
|
-
* - config wants deterministic env values without shell-level exports
|
|
311
|
-
*
|
|
312
|
-
* @default {}
|
|
313
|
-
*/
|
|
314
|
-
env?: NodeJS.ProcessEnv;
|
|
315
364
|
/**
|
|
316
365
|
* Optional reporting integrations shared by CLI run orchestration.
|
|
317
366
|
*
|
|
@@ -319,48 +368,10 @@ interface CliConfigBase {
|
|
|
319
368
|
*/
|
|
320
369
|
reporting?: CliReportingConfig;
|
|
321
370
|
}
|
|
322
|
-
/**
|
|
323
|
-
* Project mode config for `vieval run`.
|
|
324
|
-
*/
|
|
325
|
-
interface CliProjectModeConfig extends CliConfigBase {
|
|
326
|
-
/**
|
|
327
|
-
* Project list expanded by `vieval run`.
|
|
328
|
-
*
|
|
329
|
-
* @default [{ name: 'default' }]
|
|
330
|
-
*/
|
|
331
|
-
projects?: CliProjectConfig[];
|
|
332
|
-
comparisons?: never;
|
|
333
|
-
workspaces?: never;
|
|
334
|
-
}
|
|
335
|
-
/**
|
|
336
|
-
* Workspace mode config placeholder for future workspace orchestration.
|
|
337
|
-
*/
|
|
338
|
-
interface CliWorkspaceModeConfig extends CliConfigBase {
|
|
339
|
-
workspaces: CliWorkspaceConfig[];
|
|
340
|
-
projects?: never;
|
|
341
|
-
comparisons?: never;
|
|
342
|
-
}
|
|
343
|
-
/**
|
|
344
|
-
* Comparison mode config for `vieval compare`.
|
|
345
|
-
*/
|
|
346
|
-
interface CliComparisonModeConfig extends CliConfigBase {
|
|
347
|
-
comparisons: CliComparisonConfig[];
|
|
348
|
-
projects?: never;
|
|
349
|
-
workspaces?: never;
|
|
350
|
-
}
|
|
351
|
-
/**
|
|
352
|
-
* Top-level CLI config loaded from `vieval.config.*`.
|
|
353
|
-
*
|
|
354
|
-
* Exactly one top-level mode is allowed:
|
|
355
|
-
* - `projects`
|
|
356
|
-
* - `workspaces`
|
|
357
|
-
* - `comparisons`
|
|
358
|
-
*/
|
|
359
|
-
type CliConfig = CliProjectModeConfig | CliWorkspaceModeConfig | CliComparisonModeConfig;
|
|
360
371
|
/**
|
|
361
372
|
* Helper used by `vieval.config.*` for better type inference.
|
|
362
373
|
*/
|
|
363
|
-
declare const defineConfig:
|
|
374
|
+
declare const defineConfig: import("c12").DefineConfig<CliConfig, import("c12").ConfigLayerMeta>;
|
|
364
375
|
/**
|
|
365
376
|
* Loads `.env*` files using Vite's env resolution behavior.
|
|
366
377
|
*
|
|
@@ -378,6 +389,15 @@ declare const defineConfig: _$c12.DefineConfig<CliConfig, _$c12.ConfigLayerMeta>
|
|
|
378
389
|
declare function loadEnv(mode: string, envDir: string, prefixes?: string | string[]): NodeJS.ProcessEnv;
|
|
379
390
|
//#endregion
|
|
380
391
|
//#region src/dsl/task.d.ts
|
|
392
|
+
/**
|
|
393
|
+
* Per-case registration options for `caseOf`.
|
|
394
|
+
*/
|
|
395
|
+
interface CaseRegistrationOptions<TInput> extends TaskExecutionPolicy {
|
|
396
|
+
/**
|
|
397
|
+
* Optional case input payload.
|
|
398
|
+
*/
|
|
399
|
+
input: TInput;
|
|
400
|
+
}
|
|
381
401
|
/**
|
|
382
402
|
* Runtime context provided to a task case callback.
|
|
383
403
|
*/
|
|
@@ -389,26 +409,26 @@ interface CaseRunContext<TInput> extends TaskRunContext {
|
|
|
389
409
|
inputs: TInput;
|
|
390
410
|
};
|
|
391
411
|
/**
|
|
392
|
-
*
|
|
412
|
+
* Emits one custom case metric into report events.
|
|
393
413
|
*
|
|
394
414
|
* Use when:
|
|
395
|
-
* -
|
|
415
|
+
* - tasks need structured benchmark metadata beyond exact/judge score families
|
|
396
416
|
*
|
|
397
417
|
* Expects:
|
|
398
|
-
* - `
|
|
418
|
+
* - `name` to be a stable metric identifier
|
|
419
|
+
* - `value` to be JSON-serializable
|
|
399
420
|
*/
|
|
400
|
-
|
|
421
|
+
metric: (name: string, value: TelemetryAttributeValue) => void;
|
|
401
422
|
/**
|
|
402
|
-
*
|
|
423
|
+
* Overrides one case score family with a custom normalized value.
|
|
403
424
|
*
|
|
404
425
|
* Use when:
|
|
405
|
-
* -
|
|
426
|
+
* - one case computes a benchmark-native score that should flow into run aggregation
|
|
406
427
|
*
|
|
407
428
|
* Expects:
|
|
408
|
-
* - `
|
|
409
|
-
* - `value` to be JSON-serializable
|
|
429
|
+
* - `score` to stay in the `0..1` range
|
|
410
430
|
*/
|
|
411
|
-
|
|
431
|
+
score: (score: number, kind?: RunScoreKind) => void;
|
|
412
432
|
/**
|
|
413
433
|
* Cooperative abort signal for the current case execution.
|
|
414
434
|
*/
|
|
@@ -437,15 +457,6 @@ interface CasesFromInputsOptions extends TaskExecutionPolicy {
|
|
|
437
457
|
*/
|
|
438
458
|
concurrency?: number;
|
|
439
459
|
}
|
|
440
|
-
/**
|
|
441
|
-
* Per-case registration options for `caseOf`.
|
|
442
|
-
*/
|
|
443
|
-
interface CaseRegistrationOptions<TInput> extends TaskExecutionPolicy {
|
|
444
|
-
/**
|
|
445
|
-
* Optional case input payload.
|
|
446
|
-
*/
|
|
447
|
-
input: TInput;
|
|
448
|
-
}
|
|
449
460
|
/**
|
|
450
461
|
* Builder callbacks passed into `describeTask`.
|
|
451
462
|
*/
|
|
@@ -466,10 +477,6 @@ interface DescribeTaskBuilder {
|
|
|
466
477
|
* Options for `describeTask`.
|
|
467
478
|
*/
|
|
468
479
|
interface DescribeTaskOptions extends TaskExecutionPolicy {
|
|
469
|
-
/**
|
|
470
|
-
* Optional description override.
|
|
471
|
-
*/
|
|
472
|
-
description?: string;
|
|
473
480
|
/**
|
|
474
481
|
* Optional task-local concurrency overrides.
|
|
475
482
|
*
|
|
@@ -483,6 +490,10 @@ interface DescribeTaskOptions extends TaskExecutionPolicy {
|
|
|
483
490
|
* @default inherited from project or CLI concurrency settings
|
|
484
491
|
*/
|
|
485
492
|
concurrency?: TaskConcurrencyConfig;
|
|
493
|
+
/**
|
|
494
|
+
* Optional description override.
|
|
495
|
+
*/
|
|
496
|
+
description?: string;
|
|
486
497
|
}
|
|
487
498
|
/**
|
|
488
499
|
* Registers one case in the currently active task scope.
|
|
@@ -500,7 +511,7 @@ declare function casesFromInputs<TInput>(namePrefix: string, inputs: readonly TI
|
|
|
500
511
|
* - task behavior should be declared with `caseOf` and `casesFromInputs`
|
|
501
512
|
* - business agent code should be imported and run from eval task files
|
|
502
513
|
*/
|
|
503
|
-
declare function describeTask(name: string, build: ((
|
|
514
|
+
declare function describeTask(name: string, build: (() => void) | ((builder: DescribeTaskBuilder) => void), options?: DescribeTaskOptions): {
|
|
504
515
|
readonly description: string;
|
|
505
516
|
readonly name: string;
|
|
506
517
|
readonly task: {
|