vieval 0.0.11 → 0.0.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +31 -31
- package/dist/bin/vieval.mjs +1 -1
- package/dist/cli/index.d.mts +1 -1
- package/dist/cli/index.mjs +1 -1
- package/dist/{cli-CHFCF8UR.mjs → cli-uzS81IPd.mjs} +1529 -1529
- package/dist/cli-uzS81IPd.mjs.map +1 -0
- package/dist/config.d.mts +1 -1
- package/dist/core/assertions/index.d.mts +156 -156
- package/dist/core/assertions/index.mjs +82 -82
- package/dist/core/assertions/index.mjs.map +1 -1
- package/dist/core/inference-executors/index.d.mts +37 -37
- package/dist/core/inference-executors/index.mjs +53 -52
- package/dist/core/inference-executors/index.mjs.map +1 -1
- package/dist/core/processors/results/index.d.mts +18 -18
- package/dist/core/processors/results/index.mjs.map +1 -1
- package/dist/core/runner/index.d.mts +2 -2
- package/dist/core/runner/index.mjs +258 -258
- package/dist/core/runner/index.mjs.map +1 -1
- package/dist/core/scheduler/index.d.mts +1 -1
- package/dist/core/scheduler/index.mjs +64 -64
- package/dist/core/scheduler/index.mjs.map +1 -1
- package/dist/{env-bRH0K6fU.d.mts → env-Br6jaWGL.d.mts} +9 -9
- package/dist/{env-BVYeJhGA.mjs → env-egxaJtNn.mjs} +8 -8
- package/dist/env-egxaJtNn.mjs.map +1 -0
- package/dist/{expect-extensions-Mf1sMNBv.mjs → expect-extensions-BKdEPt3h.mjs} +46 -46
- package/dist/expect-extensions-BKdEPt3h.mjs.map +1 -0
- package/dist/expect.mjs +1 -1
- package/dist/{index-CwKBlCG9.d.mts → index-BLIlhiWT.d.mts} +565 -565
- package/dist/{index-Be5I1ZJL.d.mts → index-CIaJClcC.d.mts} +48 -48
- package/dist/index.d.mts +207 -195
- package/dist/index.mjs +147 -147
- package/dist/index.mjs.map +1 -1
- package/dist/models-CaCOUPZw.mjs.map +1 -1
- package/dist/plugins/chat-models/index.d.mts +279 -279
- package/dist/plugins/chat-models/index.mjs +359 -359
- package/dist/plugins/chat-models/index.mjs.map +1 -1
- package/dist/{registry-BSyjwZFx.mjs → registry-BK7k6X81.mjs} +293 -293
- package/dist/registry-BK7k6X81.mjs.map +1 -0
- package/dist/testing/expect-extensions.d.mts +27 -27
- package/dist/testing/expect-extensions.mjs +1 -1
- package/package.json +3 -3
- package/dist/cli-CHFCF8UR.mjs.map +0 -1
- package/dist/env-BVYeJhGA.mjs.map +0 -1
- package/dist/expect-extensions-Mf1sMNBv.mjs.map +0 -1
- package/dist/registry-BSyjwZFx.mjs.map +0 -1
package/dist/index.d.mts
CHANGED
|
@@ -1,25 +1,24 @@
|
|
|
1
|
-
import { $ as InferenceExecutor, D as TaskRunContext, I as TaskExecutionContext, J as RunResult, O as TaskRunOutput, R as ModelDefinition, S as TaskConcurrencyConfig, X as RunScoreKind, f as MatrixDefinition, k as TelemetryAttributeValue, o as CliReportingConfig, p as MatrixLayer, rt as ScheduledTask, t as ConfigHookPlugin, w as TaskExecutionPolicy } from "./index-
|
|
2
|
-
import { a as requiredEnvFrom } from "./env-
|
|
1
|
+
import { $ as InferenceExecutor, D as TaskRunContext, I as TaskExecutionContext, J as RunResult, O as TaskRunOutput, R as ModelDefinition, S as TaskConcurrencyConfig, X as RunScoreKind, f as MatrixDefinition, k as TelemetryAttributeValue, o as CliReportingConfig, p as MatrixLayer, rt as ScheduledTask, t as ConfigHookPlugin, w as TaskExecutionPolicy } from "./index-BLIlhiWT.mjs";
|
|
2
|
+
import { a as requiredEnvFrom } from "./env-Br6jaWGL.mjs";
|
|
3
3
|
import { expect } from "./expect.mjs";
|
|
4
4
|
|
|
5
5
|
//#region src/cli/reporters/vitest-compat-reporter.d.ts
|
|
6
|
-
type Awaitable<T> = T | Promise<T>;
|
|
7
6
|
/**
|
|
8
|
-
* Normalized
|
|
7
|
+
* Normalized test-case-like entity delivered to vitest-compatible reporter hooks.
|
|
9
8
|
*/
|
|
10
|
-
interface
|
|
9
|
+
interface VievalVitestCompatCase {
|
|
11
10
|
id: string;
|
|
11
|
+
module: VievalVitestCompatModule;
|
|
12
12
|
name: string;
|
|
13
|
-
|
|
13
|
+
state: 'failed' | 'passed' | 'pending' | 'skipped';
|
|
14
14
|
}
|
|
15
15
|
/**
|
|
16
|
-
* Normalized
|
|
16
|
+
* Normalized module-like entity delivered to vitest-compatible reporter hooks.
|
|
17
17
|
*/
|
|
18
|
-
interface
|
|
18
|
+
interface VievalVitestCompatModule {
|
|
19
19
|
id: string;
|
|
20
20
|
name: string;
|
|
21
|
-
|
|
22
|
-
state: 'failed' | 'passed' | 'pending' | 'skipped';
|
|
21
|
+
projectName: string;
|
|
23
22
|
}
|
|
24
23
|
/**
|
|
25
24
|
* Supported vitest-style reporter lifecycle hooks.
|
|
@@ -47,6 +46,7 @@ interface VievalVitestCompatReporter {
|
|
|
47
46
|
projectName: string;
|
|
48
47
|
}[]) => Awaitable<void>;
|
|
49
48
|
}
|
|
49
|
+
type VievalVitestCompatReporterReference = readonly [VievalVitestCompatReporterValue, unknown?] | VievalVitestCompatReporterValue;
|
|
50
50
|
/**
|
|
51
51
|
* Supported project reporter references.
|
|
52
52
|
*
|
|
@@ -58,13 +58,88 @@ interface VievalVitestCompatReporter {
|
|
|
58
58
|
* `https://github.com/vitest-dev/vitest/blob/b865b4d83d1e7874607ba1b2d84b9e2d135ecd33/packages/vitest/src/node/config/resolveConfig.ts#L674-L713`
|
|
59
59
|
*/
|
|
60
60
|
type VievalVitestCompatReporterValue = string | VievalVitestCompatReporter;
|
|
61
|
-
type
|
|
61
|
+
type Awaitable<T> = Promise<T> | T;
|
|
62
|
+
/**
|
|
63
|
+
* Creates a project-level vitest-compatible reporter bridge.
|
|
64
|
+
*
|
|
65
|
+
* Use when:
|
|
66
|
+
* - `vieval` should reuse vitest-like reporter callbacks without changing CLI output contracts
|
|
67
|
+
*
|
|
68
|
+
* Expects:
|
|
69
|
+
* - references point to modules whose default export is a reporter instance or constructor
|
|
70
|
+
*
|
|
71
|
+
* Returns:
|
|
72
|
+
* - `null` when no reporter references are configured
|
|
73
|
+
*/
|
|
62
74
|
//#endregion
|
|
63
75
|
//#region src/cli/config.d.ts
|
|
64
76
|
/**
|
|
65
|
-
*
|
|
77
|
+
* Benchmark identity and shared cache namespace.
|
|
66
78
|
*/
|
|
67
|
-
|
|
79
|
+
interface CliComparisonBenchmarkConfig {
|
|
80
|
+
/**
|
|
81
|
+
* Benchmark identifier used in report artifacts.
|
|
82
|
+
*/
|
|
83
|
+
id: string;
|
|
84
|
+
/**
|
|
85
|
+
* Shared cache namespace reused across method runs.
|
|
86
|
+
*/
|
|
87
|
+
sharedCaseNamespace: string;
|
|
88
|
+
}
|
|
89
|
+
/**
|
|
90
|
+
* One comparison entry loaded by `vieval compare`.
|
|
91
|
+
*/
|
|
92
|
+
interface CliComparisonConfig {
|
|
93
|
+
/**
|
|
94
|
+
* Benchmark metadata for reporting and shared cache coordination.
|
|
95
|
+
*/
|
|
96
|
+
benchmark: CliComparisonBenchmarkConfig;
|
|
97
|
+
/**
|
|
98
|
+
* Optional workspace exclude glob(s), also relative to config directory.
|
|
99
|
+
*/
|
|
100
|
+
excludesWorkspaces?: string | string[];
|
|
101
|
+
/**
|
|
102
|
+
* Comparison id selected by `--comparison`.
|
|
103
|
+
*/
|
|
104
|
+
id: string;
|
|
105
|
+
/**
|
|
106
|
+
* Optional workspace glob(s) discovered relative to config directory.
|
|
107
|
+
*/
|
|
108
|
+
includesWorkspaces?: string | string[];
|
|
109
|
+
/**
|
|
110
|
+
* Optional explicit method list.
|
|
111
|
+
*/
|
|
112
|
+
methods?: CliComparisonMethodConfig[];
|
|
113
|
+
}
|
|
114
|
+
/**
|
|
115
|
+
* One explicit comparison method descriptor.
|
|
116
|
+
*/
|
|
117
|
+
interface CliComparisonMethodConfig {
|
|
118
|
+
/**
|
|
119
|
+
* Optional explicit config file path for this workspace.
|
|
120
|
+
*/
|
|
121
|
+
configFilePath?: string;
|
|
122
|
+
/**
|
|
123
|
+
* Method identifier shown in compare reports.
|
|
124
|
+
*/
|
|
125
|
+
id: string;
|
|
126
|
+
/**
|
|
127
|
+
* Project name to execute inside workspace config.
|
|
128
|
+
*/
|
|
129
|
+
project: string;
|
|
130
|
+
/**
|
|
131
|
+
* Workspace path containing this method's `vieval.config.*`.
|
|
132
|
+
*/
|
|
133
|
+
workspace: string;
|
|
134
|
+
}
|
|
135
|
+
/**
|
|
136
|
+
* Comparison mode config for `vieval compare`.
|
|
137
|
+
*/
|
|
138
|
+
interface CliComparisonModeConfig extends CliConfigBase {
|
|
139
|
+
comparisons: CliComparisonConfig[];
|
|
140
|
+
projects?: never;
|
|
141
|
+
workspaces?: never;
|
|
142
|
+
}
|
|
68
143
|
/**
|
|
69
144
|
* Concurrency limits that can be declared in CLI-facing config.
|
|
70
145
|
*
|
|
@@ -80,9 +155,13 @@ type CliConfigPlugin = ConfigHookPlugin<CliConfig>;
|
|
|
80
155
|
*/
|
|
81
156
|
interface CliConcurrencyConfig {
|
|
82
157
|
/**
|
|
83
|
-
*
|
|
158
|
+
* Attempt-level concurrency cap.
|
|
84
159
|
*/
|
|
85
|
-
|
|
160
|
+
attempt?: number;
|
|
161
|
+
/**
|
|
162
|
+
* Case-level concurrency cap.
|
|
163
|
+
*/
|
|
164
|
+
case?: number;
|
|
86
165
|
/**
|
|
87
166
|
* Project-level concurrency cap.
|
|
88
167
|
*/
|
|
@@ -92,40 +171,56 @@ interface CliConcurrencyConfig {
|
|
|
92
171
|
*/
|
|
93
172
|
task?: number;
|
|
94
173
|
/**
|
|
95
|
-
*
|
|
96
|
-
*/
|
|
97
|
-
attempt?: number;
|
|
98
|
-
/**
|
|
99
|
-
* Case-level concurrency cap.
|
|
174
|
+
* Workspace-level concurrency cap.
|
|
100
175
|
*/
|
|
101
|
-
|
|
176
|
+
workspace?: number;
|
|
102
177
|
}
|
|
178
|
+
/**
|
|
179
|
+
* Top-level CLI config loaded from `vieval.config.*`.
|
|
180
|
+
*
|
|
181
|
+
* Exactly one top-level mode is allowed:
|
|
182
|
+
* - `projects`
|
|
183
|
+
* - `workspaces`
|
|
184
|
+
* - `comparisons`
|
|
185
|
+
*/
|
|
186
|
+
type CliConfig = CliComparisonModeConfig | CliProjectModeConfig | CliWorkspaceModeConfig;
|
|
187
|
+
/**
|
|
188
|
+
* CLI plugin shape bound to the full CLI config object.
|
|
189
|
+
*/
|
|
190
|
+
type CliConfigPlugin = ConfigHookPlugin<CliConfig>;
|
|
103
191
|
/**
|
|
104
192
|
* Defines one project block for `vieval run`.
|
|
105
193
|
*/
|
|
106
194
|
interface CliProjectConfig {
|
|
107
195
|
/**
|
|
108
|
-
*
|
|
109
|
-
*/
|
|
110
|
-
name: string;
|
|
111
|
-
/**
|
|
112
|
-
* Project root used for include/exclude glob matching.
|
|
196
|
+
* Optional project-scoped concurrency overrides.
|
|
113
197
|
*
|
|
114
|
-
* @default
|
|
198
|
+
* @default inherited from top-level or CLI execution settings
|
|
115
199
|
*/
|
|
116
|
-
|
|
200
|
+
concurrency?: Omit<CliConcurrencyConfig, 'workspace'>;
|
|
117
201
|
/**
|
|
118
|
-
*
|
|
119
|
-
*
|
|
120
|
-
* @default Common eval file globs for TypeScript and JavaScript module formats.
|
|
202
|
+
* Optional eval-time matrix dimensions.
|
|
121
203
|
*/
|
|
122
|
-
|
|
204
|
+
evalMatrix?: MatrixDefinition | MatrixLayer;
|
|
123
205
|
/**
|
|
124
206
|
* Glob patterns excluded from discovery.
|
|
125
207
|
*
|
|
126
208
|
* @default Common exclusion globs for dependencies, build output, and VCS directories.
|
|
127
209
|
*/
|
|
128
210
|
exclude?: string[];
|
|
211
|
+
/**
|
|
212
|
+
* Optional task executor.
|
|
213
|
+
*
|
|
214
|
+
* Use when this project should execute live inferenceExecutor requests.
|
|
215
|
+
* If omitted, `vieval run` performs collection + scheduling only.
|
|
216
|
+
*/
|
|
217
|
+
executor?: (task: ScheduledTask, context: CliProjectExecutorContext) => Promise<RunResult>;
|
|
218
|
+
/**
|
|
219
|
+
* Glob patterns for eval file discovery.
|
|
220
|
+
*
|
|
221
|
+
* @default Common eval file globs for TypeScript and JavaScript module formats.
|
|
222
|
+
*/
|
|
223
|
+
include?: string[];
|
|
129
224
|
/**
|
|
130
225
|
* Providers expanded by scheduler.
|
|
131
226
|
*
|
|
@@ -142,26 +237,9 @@ interface CliProjectConfig {
|
|
|
142
237
|
*/
|
|
143
238
|
models?: ModelDefinition[];
|
|
144
239
|
/**
|
|
145
|
-
*
|
|
146
|
-
*/
|
|
147
|
-
runMatrix?: MatrixDefinition | MatrixLayer;
|
|
148
|
-
/**
|
|
149
|
-
* Optional eval-time matrix dimensions.
|
|
150
|
-
*/
|
|
151
|
-
evalMatrix?: MatrixDefinition | MatrixLayer;
|
|
152
|
-
/**
|
|
153
|
-
* Optional project-scoped concurrency overrides.
|
|
154
|
-
*
|
|
155
|
-
* @default inherited from top-level or CLI execution settings
|
|
156
|
-
*/
|
|
157
|
-
concurrency?: Omit<CliConcurrencyConfig, 'workspace'>;
|
|
158
|
-
/**
|
|
159
|
-
* Optional task executor.
|
|
160
|
-
*
|
|
161
|
-
* Use when this project should execute live inferenceExecutor requests.
|
|
162
|
-
* If omitted, `vieval run` performs collection + scheduling only.
|
|
240
|
+
* Project label used in summary output.
|
|
163
241
|
*/
|
|
164
|
-
|
|
242
|
+
name: string;
|
|
165
243
|
/**
|
|
166
244
|
* Optional project-local plugins.
|
|
167
245
|
*/
|
|
@@ -175,107 +253,73 @@ interface CliProjectConfig {
|
|
|
175
253
|
* @default []
|
|
176
254
|
*/
|
|
177
255
|
reporters?: VievalVitestCompatReporterReference[];
|
|
178
|
-
}
|
|
179
|
-
/**
|
|
180
|
-
* One workspace descriptor for workspace-mode configs.
|
|
181
|
-
*/
|
|
182
|
-
interface CliWorkspaceConfig {
|
|
183
256
|
/**
|
|
184
|
-
*
|
|
257
|
+
* Project root used for include/exclude glob matching.
|
|
258
|
+
*
|
|
259
|
+
* @default process cwd
|
|
185
260
|
*/
|
|
186
|
-
|
|
261
|
+
root?: string;
|
|
187
262
|
/**
|
|
188
|
-
*
|
|
263
|
+
* Optional run-time matrix dimensions.
|
|
189
264
|
*/
|
|
190
|
-
|
|
265
|
+
runMatrix?: MatrixDefinition | MatrixLayer;
|
|
191
266
|
}
|
|
192
267
|
/**
|
|
193
|
-
*
|
|
268
|
+
* Execution context exposed to project-level `executor` implementations.
|
|
269
|
+
*
|
|
270
|
+
* Use when:
|
|
271
|
+
* - a project executor needs task-scoped models plus case reporter hooks
|
|
272
|
+
* - custom scheduling logic wants the same hook shape as `TaskRunContext`
|
|
273
|
+
*
|
|
274
|
+
* Expects:
|
|
275
|
+
* - `models` exposes configured model registrations for plugin helpers
|
|
276
|
+
* - `reporterHooks` follows `TaskRunContext['reporterHooks']`
|
|
277
|
+
* - `telemetry` follows `TaskRunContext['telemetry']`
|
|
278
|
+
* - `runtimeConcurrency` follows `TaskRunContext['runtimeConcurrency']`
|
|
194
279
|
*/
|
|
195
|
-
interface
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
id: string;
|
|
200
|
-
/**
|
|
201
|
-
* Workspace path containing this method's `vieval.config.*`.
|
|
202
|
-
*/
|
|
203
|
-
workspace: string;
|
|
204
|
-
/**
|
|
205
|
-
* Project name to execute inside workspace config.
|
|
206
|
-
*/
|
|
207
|
-
project: string;
|
|
208
|
-
/**
|
|
209
|
-
* Optional explicit config file path for this workspace.
|
|
210
|
-
*/
|
|
211
|
-
configFilePath?: string;
|
|
280
|
+
interface CliProjectExecutorContext extends TaskExecutionContext {
|
|
281
|
+
reporterHooks?: TaskRunContext['reporterHooks'];
|
|
282
|
+
runtimeConcurrency?: TaskRunContext['runtimeConcurrency'];
|
|
283
|
+
telemetry?: TaskRunContext['telemetry'];
|
|
212
284
|
}
|
|
213
285
|
/**
|
|
214
|
-
*
|
|
286
|
+
* Project mode config for `vieval run`.
|
|
215
287
|
*/
|
|
216
|
-
interface
|
|
217
|
-
|
|
218
|
-
* Benchmark identifier used in report artifacts.
|
|
219
|
-
*/
|
|
220
|
-
id: string;
|
|
288
|
+
interface CliProjectModeConfig extends CliConfigBase {
|
|
289
|
+
comparisons?: never;
|
|
221
290
|
/**
|
|
222
|
-
*
|
|
291
|
+
* Project list expanded by `vieval run`.
|
|
292
|
+
*
|
|
293
|
+
* @default [{ name: 'default' }]
|
|
223
294
|
*/
|
|
224
|
-
|
|
295
|
+
projects?: CliProjectConfig[];
|
|
296
|
+
workspaces?: never;
|
|
225
297
|
}
|
|
226
298
|
/**
|
|
227
|
-
* One
|
|
299
|
+
* One workspace descriptor for workspace-mode configs.
|
|
228
300
|
*/
|
|
229
|
-
interface
|
|
301
|
+
interface CliWorkspaceConfig {
|
|
230
302
|
/**
|
|
231
|
-
*
|
|
303
|
+
* Workspace identifier.
|
|
232
304
|
*/
|
|
233
305
|
id: string;
|
|
234
306
|
/**
|
|
235
|
-
*
|
|
236
|
-
*/
|
|
237
|
-
benchmark: CliComparisonBenchmarkConfig;
|
|
238
|
-
/**
|
|
239
|
-
* Optional explicit method list.
|
|
240
|
-
*/
|
|
241
|
-
methods?: CliComparisonMethodConfig[];
|
|
242
|
-
/**
|
|
243
|
-
* Optional workspace glob(s) discovered relative to config directory.
|
|
244
|
-
*/
|
|
245
|
-
includesWorkspaces?: string | string[];
|
|
246
|
-
/**
|
|
247
|
-
* Optional workspace exclude glob(s), also relative to config directory.
|
|
307
|
+
* Workspace root path.
|
|
248
308
|
*/
|
|
249
|
-
|
|
309
|
+
root: string;
|
|
250
310
|
}
|
|
251
311
|
/**
|
|
252
|
-
*
|
|
253
|
-
*
|
|
254
|
-
* Use when:
|
|
255
|
-
* - a project executor needs task-scoped models plus case reporter hooks
|
|
256
|
-
* - custom scheduling logic wants the same hook shape as `TaskRunContext`
|
|
257
|
-
*
|
|
258
|
-
* Expects:
|
|
259
|
-
* - `models` exposes configured model registrations for plugin helpers
|
|
260
|
-
* - `reporterHooks` follows `TaskRunContext['reporterHooks']`
|
|
261
|
-
* - `telemetry` follows `TaskRunContext['telemetry']`
|
|
262
|
-
* - `runtimeConcurrency` follows `TaskRunContext['runtimeConcurrency']`
|
|
312
|
+
* Workspace mode config placeholder for future workspace orchestration.
|
|
263
313
|
*/
|
|
264
|
-
interface
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
314
|
+
interface CliWorkspaceModeConfig extends CliConfigBase {
|
|
315
|
+
comparisons?: never;
|
|
316
|
+
projects?: never;
|
|
317
|
+
workspaces: CliWorkspaceConfig[];
|
|
268
318
|
}
|
|
269
319
|
/**
|
|
270
320
|
* Top-level CLI config loaded from `vieval.config.*`.
|
|
271
321
|
*/
|
|
272
322
|
interface CliConfigBase {
|
|
273
|
-
/**
|
|
274
|
-
* Global model definitions inherited by projects.
|
|
275
|
-
*
|
|
276
|
-
* @default []
|
|
277
|
-
*/
|
|
278
|
-
models?: ModelDefinition[];
|
|
279
323
|
/**
|
|
280
324
|
* Global concurrency defaults inherited by projects and tasks.
|
|
281
325
|
*
|
|
@@ -289,6 +333,22 @@ interface CliConfigBase {
|
|
|
289
333
|
* @default undefined
|
|
290
334
|
*/
|
|
291
335
|
concurrency?: CliConcurrencyConfig;
|
|
336
|
+
/**
|
|
337
|
+
* Environment variables injected into `process.env` during `vieval run`.
|
|
338
|
+
*
|
|
339
|
+
* Use when:
|
|
340
|
+
* - eval tasks depend on runtime env values (for example inferenceExecutor API keys)
|
|
341
|
+
* - config wants deterministic env values without shell-level exports
|
|
342
|
+
*
|
|
343
|
+
* @default {}
|
|
344
|
+
*/
|
|
345
|
+
env?: NodeJS.ProcessEnv;
|
|
346
|
+
/**
|
|
347
|
+
* Global model definitions inherited by projects.
|
|
348
|
+
*
|
|
349
|
+
* @default []
|
|
350
|
+
*/
|
|
351
|
+
models?: ModelDefinition[];
|
|
292
352
|
/**
|
|
293
353
|
* Global config plugins.
|
|
294
354
|
*
|
|
@@ -301,16 +361,6 @@ interface CliConfigBase {
|
|
|
301
361
|
* @default []
|
|
302
362
|
*/
|
|
303
363
|
reporters?: VievalVitestCompatReporterReference[];
|
|
304
|
-
/**
|
|
305
|
-
* Environment variables injected into `process.env` during `vieval run`.
|
|
306
|
-
*
|
|
307
|
-
* Use when:
|
|
308
|
-
* - eval tasks depend on runtime env values (for example inferenceExecutor API keys)
|
|
309
|
-
* - config wants deterministic env values without shell-level exports
|
|
310
|
-
*
|
|
311
|
-
* @default {}
|
|
312
|
-
*/
|
|
313
|
-
env?: NodeJS.ProcessEnv;
|
|
314
364
|
/**
|
|
315
365
|
* Optional reporting integrations shared by CLI run orchestration.
|
|
316
366
|
*
|
|
@@ -318,44 +368,6 @@ interface CliConfigBase {
|
|
|
318
368
|
*/
|
|
319
369
|
reporting?: CliReportingConfig;
|
|
320
370
|
}
|
|
321
|
-
/**
|
|
322
|
-
* Project mode config for `vieval run`.
|
|
323
|
-
*/
|
|
324
|
-
interface CliProjectModeConfig extends CliConfigBase {
|
|
325
|
-
/**
|
|
326
|
-
* Project list expanded by `vieval run`.
|
|
327
|
-
*
|
|
328
|
-
* @default [{ name: 'default' }]
|
|
329
|
-
*/
|
|
330
|
-
projects?: CliProjectConfig[];
|
|
331
|
-
comparisons?: never;
|
|
332
|
-
workspaces?: never;
|
|
333
|
-
}
|
|
334
|
-
/**
|
|
335
|
-
* Workspace mode config placeholder for future workspace orchestration.
|
|
336
|
-
*/
|
|
337
|
-
interface CliWorkspaceModeConfig extends CliConfigBase {
|
|
338
|
-
workspaces: CliWorkspaceConfig[];
|
|
339
|
-
projects?: never;
|
|
340
|
-
comparisons?: never;
|
|
341
|
-
}
|
|
342
|
-
/**
|
|
343
|
-
* Comparison mode config for `vieval compare`.
|
|
344
|
-
*/
|
|
345
|
-
interface CliComparisonModeConfig extends CliConfigBase {
|
|
346
|
-
comparisons: CliComparisonConfig[];
|
|
347
|
-
projects?: never;
|
|
348
|
-
workspaces?: never;
|
|
349
|
-
}
|
|
350
|
-
/**
|
|
351
|
-
* Top-level CLI config loaded from `vieval.config.*`.
|
|
352
|
-
*
|
|
353
|
-
* Exactly one top-level mode is allowed:
|
|
354
|
-
* - `projects`
|
|
355
|
-
* - `workspaces`
|
|
356
|
-
* - `comparisons`
|
|
357
|
-
*/
|
|
358
|
-
type CliConfig = CliProjectModeConfig | CliWorkspaceModeConfig | CliComparisonModeConfig;
|
|
359
371
|
/**
|
|
360
372
|
* Helper used by `vieval.config.*` for better type inference.
|
|
361
373
|
*/
|
|
@@ -377,6 +389,15 @@ declare const defineConfig: import("c12").DefineConfig<CliConfig, import("c12").
|
|
|
377
389
|
declare function loadEnv(mode: string, envDir: string, prefixes?: string | string[]): NodeJS.ProcessEnv;
|
|
378
390
|
//#endregion
|
|
379
391
|
//#region src/dsl/task.d.ts
|
|
392
|
+
/**
|
|
393
|
+
* Per-case registration options for `caseOf`.
|
|
394
|
+
*/
|
|
395
|
+
interface CaseRegistrationOptions<TInput> extends TaskExecutionPolicy {
|
|
396
|
+
/**
|
|
397
|
+
* Optional case input payload.
|
|
398
|
+
*/
|
|
399
|
+
input: TInput;
|
|
400
|
+
}
|
|
380
401
|
/**
|
|
381
402
|
* Runtime context provided to a task case callback.
|
|
382
403
|
*/
|
|
@@ -388,26 +409,26 @@ interface CaseRunContext<TInput> extends TaskRunContext {
|
|
|
388
409
|
inputs: TInput;
|
|
389
410
|
};
|
|
390
411
|
/**
|
|
391
|
-
*
|
|
412
|
+
* Emits one custom case metric into report events.
|
|
392
413
|
*
|
|
393
414
|
* Use when:
|
|
394
|
-
* -
|
|
415
|
+
* - tasks need structured benchmark metadata beyond exact/judge score families
|
|
395
416
|
*
|
|
396
417
|
* Expects:
|
|
397
|
-
* - `
|
|
418
|
+
* - `name` to be a stable metric identifier
|
|
419
|
+
* - `value` to be JSON-serializable
|
|
398
420
|
*/
|
|
399
|
-
|
|
421
|
+
metric: (name: string, value: TelemetryAttributeValue) => void;
|
|
400
422
|
/**
|
|
401
|
-
*
|
|
423
|
+
* Overrides one case score family with a custom normalized value.
|
|
402
424
|
*
|
|
403
425
|
* Use when:
|
|
404
|
-
* -
|
|
426
|
+
* - one case computes a benchmark-native score that should flow into run aggregation
|
|
405
427
|
*
|
|
406
428
|
* Expects:
|
|
407
|
-
* - `
|
|
408
|
-
* - `value` to be JSON-serializable
|
|
429
|
+
* - `score` to stay in the `0..1` range
|
|
409
430
|
*/
|
|
410
|
-
|
|
431
|
+
score: (score: number, kind?: RunScoreKind) => void;
|
|
411
432
|
/**
|
|
412
433
|
* Cooperative abort signal for the current case execution.
|
|
413
434
|
*/
|
|
@@ -436,15 +457,6 @@ interface CasesFromInputsOptions extends TaskExecutionPolicy {
|
|
|
436
457
|
*/
|
|
437
458
|
concurrency?: number;
|
|
438
459
|
}
|
|
439
|
-
/**
|
|
440
|
-
* Per-case registration options for `caseOf`.
|
|
441
|
-
*/
|
|
442
|
-
interface CaseRegistrationOptions<TInput> extends TaskExecutionPolicy {
|
|
443
|
-
/**
|
|
444
|
-
* Optional case input payload.
|
|
445
|
-
*/
|
|
446
|
-
input: TInput;
|
|
447
|
-
}
|
|
448
460
|
/**
|
|
449
461
|
* Builder callbacks passed into `describeTask`.
|
|
450
462
|
*/
|
|
@@ -465,10 +477,6 @@ interface DescribeTaskBuilder {
|
|
|
465
477
|
* Options for `describeTask`.
|
|
466
478
|
*/
|
|
467
479
|
interface DescribeTaskOptions extends TaskExecutionPolicy {
|
|
468
|
-
/**
|
|
469
|
-
* Optional description override.
|
|
470
|
-
*/
|
|
471
|
-
description?: string;
|
|
472
480
|
/**
|
|
473
481
|
* Optional task-local concurrency overrides.
|
|
474
482
|
*
|
|
@@ -482,6 +490,10 @@ interface DescribeTaskOptions extends TaskExecutionPolicy {
|
|
|
482
490
|
* @default inherited from project or CLI concurrency settings
|
|
483
491
|
*/
|
|
484
492
|
concurrency?: TaskConcurrencyConfig;
|
|
493
|
+
/**
|
|
494
|
+
* Optional description override.
|
|
495
|
+
*/
|
|
496
|
+
description?: string;
|
|
485
497
|
}
|
|
486
498
|
/**
|
|
487
499
|
* Registers one case in the currently active task scope.
|
|
@@ -499,7 +511,7 @@ declare function casesFromInputs<TInput>(namePrefix: string, inputs: readonly TI
|
|
|
499
511
|
* - task behavior should be declared with `caseOf` and `casesFromInputs`
|
|
500
512
|
* - business agent code should be imported and run from eval task files
|
|
501
513
|
*/
|
|
502
|
-
declare function describeTask(name: string, build: ((
|
|
514
|
+
declare function describeTask(name: string, build: (() => void) | ((builder: DescribeTaskBuilder) => void), options?: DescribeTaskOptions): {
|
|
503
515
|
readonly description: string;
|
|
504
516
|
readonly name: string;
|
|
505
517
|
readonly task: {
|