vieval 0.0.10 → 0.0.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/README.md +31 -31
  2. package/dist/bin/vieval.mjs +1 -1
  3. package/dist/bin/vieval.mjs.map +1 -1
  4. package/dist/cli/index.d.mts +1 -1
  5. package/dist/cli/index.mjs +1 -1
  6. package/dist/{cli-DTDgaqeI.mjs → cli-uzS81IPd.mjs} +1483 -1483
  7. package/dist/cli-uzS81IPd.mjs.map +1 -0
  8. package/dist/config.d.mts +1 -1
  9. package/dist/config.mjs +1 -1
  10. package/dist/config.mjs.map +1 -1
  11. package/dist/core/assertions/index.d.mts +156 -156
  12. package/dist/core/assertions/index.mjs +82 -82
  13. package/dist/core/assertions/index.mjs.map +1 -1
  14. package/dist/core/inference-executors/index.d.mts +37 -37
  15. package/dist/core/inference-executors/index.mjs +54 -53
  16. package/dist/core/inference-executors/index.mjs.map +1 -1
  17. package/dist/core/processors/results/index.d.mts +18 -18
  18. package/dist/core/processors/results/index.mjs.map +1 -1
  19. package/dist/core/runner/index.d.mts +2 -2
  20. package/dist/core/runner/index.mjs +259 -259
  21. package/dist/core/runner/index.mjs.map +1 -1
  22. package/dist/core/scheduler/index.d.mts +1 -1
  23. package/dist/core/scheduler/index.mjs +65 -65
  24. package/dist/core/scheduler/index.mjs.map +1 -1
  25. package/dist/{env-DfWZy_n4.d.mts → env-Br6jaWGL.d.mts} +9 -9
  26. package/dist/{env-nV5rVErX.mjs → env-egxaJtNn.mjs} +8 -8
  27. package/dist/env-egxaJtNn.mjs.map +1 -0
  28. package/dist/{expect-extensions-DCSqlneN.mjs → expect-extensions-BKdEPt3h.mjs} +46 -46
  29. package/dist/expect-extensions-BKdEPt3h.mjs.map +1 -0
  30. package/dist/expect.d.mts +1 -3
  31. package/dist/expect.mjs +1 -1
  32. package/dist/expect.mjs.map +1 -1
  33. package/dist/{index-D_aMeWqO.d.mts → index-BLIlhiWT.d.mts} +565 -565
  34. package/dist/{index-Bg0atWBF.d.mts → index-CIaJClcC.d.mts} +48 -48
  35. package/dist/index.d.mts +208 -197
  36. package/dist/index.mjs +148 -148
  37. package/dist/index.mjs.map +1 -1
  38. package/dist/{models-pBSRUZhY.mjs → models-CaCOUPZw.mjs} +1 -1
  39. package/dist/{models-pBSRUZhY.mjs.map → models-CaCOUPZw.mjs.map} +1 -1
  40. package/dist/plugins/chat-models/index.d.mts +279 -279
  41. package/dist/plugins/chat-models/index.mjs +360 -360
  42. package/dist/plugins/chat-models/index.mjs.map +1 -1
  43. package/dist/{queue-DsZQkZO_.mjs → queue-BL86z2W_.mjs} +1 -1
  44. package/dist/{queue-DsZQkZO_.mjs.map → queue-BL86z2W_.mjs.map} +1 -1
  45. package/dist/{registry-DMnwE_mY.mjs → registry-BK7k6X81.mjs} +294 -294
  46. package/dist/registry-BK7k6X81.mjs.map +1 -0
  47. package/dist/testing/expect-extensions.d.mts +27 -27
  48. package/dist/testing/expect-extensions.mjs +1 -1
  49. package/package.json +12 -12
  50. package/dist/cli-DTDgaqeI.mjs.map +0 -1
  51. package/dist/env-nV5rVErX.mjs.map +0 -1
  52. package/dist/expect-extensions-DCSqlneN.mjs.map +0 -1
  53. package/dist/registry-DMnwE_mY.mjs.map +0 -1
package/dist/index.d.mts CHANGED
@@ -1,26 +1,24 @@
1
- import { $ as InferenceExecutor, D as TaskRunContext, I as TaskExecutionContext, J as RunResult, O as TaskRunOutput, R as ModelDefinition, S as TaskConcurrencyConfig, X as RunScoreKind, f as MatrixDefinition, k as TelemetryAttributeValue, o as CliReportingConfig, p as MatrixLayer, rt as ScheduledTask, t as ConfigHookPlugin, w as TaskExecutionPolicy } from "./index-D_aMeWqO.mjs";
2
- import { a as requiredEnvFrom } from "./env-DfWZy_n4.mjs";
1
+ import { $ as InferenceExecutor, D as TaskRunContext, I as TaskExecutionContext, J as RunResult, O as TaskRunOutput, R as ModelDefinition, S as TaskConcurrencyConfig, X as RunScoreKind, f as MatrixDefinition, k as TelemetryAttributeValue, o as CliReportingConfig, p as MatrixLayer, rt as ScheduledTask, t as ConfigHookPlugin, w as TaskExecutionPolicy } from "./index-BLIlhiWT.mjs";
2
+ import { a as requiredEnvFrom } from "./env-Br6jaWGL.mjs";
3
3
  import { expect } from "./expect.mjs";
4
- import * as _$c12 from "c12";
5
4
 
6
5
  //#region src/cli/reporters/vitest-compat-reporter.d.ts
7
- type Awaitable<T> = T | Promise<T>;
8
6
  /**
9
- * Normalized module-like entity delivered to vitest-compatible reporter hooks.
7
+ * Normalized test-case-like entity delivered to vitest-compatible reporter hooks.
10
8
  */
11
- interface VievalVitestCompatModule {
9
+ interface VievalVitestCompatCase {
12
10
  id: string;
11
+ module: VievalVitestCompatModule;
13
12
  name: string;
14
- projectName: string;
13
+ state: 'failed' | 'passed' | 'pending' | 'skipped';
15
14
  }
16
15
  /**
17
- * Normalized test-case-like entity delivered to vitest-compatible reporter hooks.
16
+ * Normalized module-like entity delivered to vitest-compatible reporter hooks.
18
17
  */
19
- interface VievalVitestCompatCase {
18
+ interface VievalVitestCompatModule {
20
19
  id: string;
21
20
  name: string;
22
- module: VievalVitestCompatModule;
23
- state: 'failed' | 'passed' | 'pending' | 'skipped';
21
+ projectName: string;
24
22
  }
25
23
  /**
26
24
  * Supported vitest-style reporter lifecycle hooks.
@@ -48,6 +46,7 @@ interface VievalVitestCompatReporter {
48
46
  projectName: string;
49
47
  }[]) => Awaitable<void>;
50
48
  }
49
+ type VievalVitestCompatReporterReference = readonly [VievalVitestCompatReporterValue, unknown?] | VievalVitestCompatReporterValue;
51
50
  /**
52
51
  * Supported project reporter references.
53
52
  *
@@ -59,13 +58,88 @@ interface VievalVitestCompatReporter {
59
58
  * `https://github.com/vitest-dev/vitest/blob/b865b4d83d1e7874607ba1b2d84b9e2d135ecd33/packages/vitest/src/node/config/resolveConfig.ts#L674-L713`
60
59
  */
61
60
  type VievalVitestCompatReporterValue = string | VievalVitestCompatReporter;
62
- type VievalVitestCompatReporterReference = VievalVitestCompatReporterValue | readonly [VievalVitestCompatReporterValue, unknown?];
61
+ type Awaitable<T> = Promise<T> | T;
62
+ /**
63
+ * Creates a project-level vitest-compatible reporter bridge.
64
+ *
65
+ * Use when:
66
+ * - `vieval` should reuse vitest-like reporter callbacks without changing CLI output contracts
67
+ *
68
+ * Expects:
69
+ * - references point to modules whose default export is a reporter instance or constructor
70
+ *
71
+ * Returns:
72
+ * - `null` when no reporter references are configured
73
+ */
63
74
  //#endregion
64
75
  //#region src/cli/config.d.ts
65
76
  /**
66
- * CLI plugin shape bound to the full CLI config object.
77
+ * Benchmark identity and shared cache namespace.
67
78
  */
68
- type CliConfigPlugin = ConfigHookPlugin<CliConfig>;
79
+ interface CliComparisonBenchmarkConfig {
80
+ /**
81
+ * Benchmark identifier used in report artifacts.
82
+ */
83
+ id: string;
84
+ /**
85
+ * Shared cache namespace reused across method runs.
86
+ */
87
+ sharedCaseNamespace: string;
88
+ }
89
+ /**
90
+ * One comparison entry loaded by `vieval compare`.
91
+ */
92
+ interface CliComparisonConfig {
93
+ /**
94
+ * Benchmark metadata for reporting and shared cache coordination.
95
+ */
96
+ benchmark: CliComparisonBenchmarkConfig;
97
+ /**
98
+ * Optional workspace exclude glob(s), also relative to config directory.
99
+ */
100
+ excludesWorkspaces?: string | string[];
101
+ /**
102
+ * Comparison id selected by `--comparison`.
103
+ */
104
+ id: string;
105
+ /**
106
+ * Optional workspace glob(s) discovered relative to config directory.
107
+ */
108
+ includesWorkspaces?: string | string[];
109
+ /**
110
+ * Optional explicit method list.
111
+ */
112
+ methods?: CliComparisonMethodConfig[];
113
+ }
114
+ /**
115
+ * One explicit comparison method descriptor.
116
+ */
117
+ interface CliComparisonMethodConfig {
118
+ /**
119
+ * Optional explicit config file path for this workspace.
120
+ */
121
+ configFilePath?: string;
122
+ /**
123
+ * Method identifier shown in compare reports.
124
+ */
125
+ id: string;
126
+ /**
127
+ * Project name to execute inside workspace config.
128
+ */
129
+ project: string;
130
+ /**
131
+ * Workspace path containing this method's `vieval.config.*`.
132
+ */
133
+ workspace: string;
134
+ }
135
+ /**
136
+ * Comparison mode config for `vieval compare`.
137
+ */
138
+ interface CliComparisonModeConfig extends CliConfigBase {
139
+ comparisons: CliComparisonConfig[];
140
+ projects?: never;
141
+ workspaces?: never;
142
+ }
69
143
  /**
70
144
  * Concurrency limits that can be declared in CLI-facing config.
71
145
  *
@@ -81,9 +155,13 @@ type CliConfigPlugin = ConfigHookPlugin<CliConfig>;
81
155
  */
82
156
  interface CliConcurrencyConfig {
83
157
  /**
84
- * Workspace-level concurrency cap.
158
+ * Attempt-level concurrency cap.
85
159
  */
86
- workspace?: number;
160
+ attempt?: number;
161
+ /**
162
+ * Case-level concurrency cap.
163
+ */
164
+ case?: number;
87
165
  /**
88
166
  * Project-level concurrency cap.
89
167
  */
@@ -93,40 +171,56 @@ interface CliConcurrencyConfig {
93
171
  */
94
172
  task?: number;
95
173
  /**
96
- * Attempt-level concurrency cap.
97
- */
98
- attempt?: number;
99
- /**
100
- * Case-level concurrency cap.
174
+ * Workspace-level concurrency cap.
101
175
  */
102
- case?: number;
176
+ workspace?: number;
103
177
  }
178
+ /**
179
+ * Top-level CLI config loaded from `vieval.config.*`.
180
+ *
181
+ * Exactly one top-level mode is allowed:
182
+ * - `projects`
183
+ * - `workspaces`
184
+ * - `comparisons`
185
+ */
186
+ type CliConfig = CliComparisonModeConfig | CliProjectModeConfig | CliWorkspaceModeConfig;
187
+ /**
188
+ * CLI plugin shape bound to the full CLI config object.
189
+ */
190
+ type CliConfigPlugin = ConfigHookPlugin<CliConfig>;
104
191
  /**
105
192
  * Defines one project block for `vieval run`.
106
193
  */
107
194
  interface CliProjectConfig {
108
195
  /**
109
- * Project label used in summary output.
110
- */
111
- name: string;
112
- /**
113
- * Project root used for include/exclude glob matching.
196
+ * Optional project-scoped concurrency overrides.
114
197
  *
115
- * @default process cwd
198
+ * @default inherited from top-level or CLI execution settings
116
199
  */
117
- root?: string;
200
+ concurrency?: Omit<CliConcurrencyConfig, 'workspace'>;
118
201
  /**
119
- * Glob patterns for eval file discovery.
120
- *
121
- * @default Common eval file globs for TypeScript and JavaScript module formats.
202
+ * Optional eval-time matrix dimensions.
122
203
  */
123
- include?: string[];
204
+ evalMatrix?: MatrixDefinition | MatrixLayer;
124
205
  /**
125
206
  * Glob patterns excluded from discovery.
126
207
  *
127
208
  * @default Common exclusion globs for dependencies, build output, and VCS directories.
128
209
  */
129
210
  exclude?: string[];
211
+ /**
212
+ * Optional task executor.
213
+ *
214
+ * Use when this project should execute live inferenceExecutor requests.
215
+ * If omitted, `vieval run` performs collection + scheduling only.
216
+ */
217
+ executor?: (task: ScheduledTask, context: CliProjectExecutorContext) => Promise<RunResult>;
218
+ /**
219
+ * Glob patterns for eval file discovery.
220
+ *
221
+ * @default Common eval file globs for TypeScript and JavaScript module formats.
222
+ */
223
+ include?: string[];
130
224
  /**
131
225
  * Providers expanded by scheduler.
132
226
  *
@@ -143,26 +237,9 @@ interface CliProjectConfig {
143
237
  */
144
238
  models?: ModelDefinition[];
145
239
  /**
146
- * Optional run-time matrix dimensions.
147
- */
148
- runMatrix?: MatrixDefinition | MatrixLayer;
149
- /**
150
- * Optional eval-time matrix dimensions.
151
- */
152
- evalMatrix?: MatrixDefinition | MatrixLayer;
153
- /**
154
- * Optional project-scoped concurrency overrides.
155
- *
156
- * @default inherited from top-level or CLI execution settings
157
- */
158
- concurrency?: Omit<CliConcurrencyConfig, 'workspace'>;
159
- /**
160
- * Optional task executor.
161
- *
162
- * Use when this project should execute live inferenceExecutor requests.
163
- * If omitted, `vieval run` performs collection + scheduling only.
240
+ * Project label used in summary output.
164
241
  */
165
- executor?: (task: ScheduledTask, context: CliProjectExecutorContext) => Promise<RunResult>;
242
+ name: string;
166
243
  /**
167
244
  * Optional project-local plugins.
168
245
  */
@@ -176,107 +253,73 @@ interface CliProjectConfig {
176
253
  * @default []
177
254
  */
178
255
  reporters?: VievalVitestCompatReporterReference[];
179
- }
180
- /**
181
- * One workspace descriptor for workspace-mode configs.
182
- */
183
- interface CliWorkspaceConfig {
184
256
  /**
185
- * Workspace identifier.
257
+ * Project root used for include/exclude glob matching.
258
+ *
259
+ * @default process cwd
186
260
  */
187
- id: string;
261
+ root?: string;
188
262
  /**
189
- * Workspace root path.
263
+ * Optional run-time matrix dimensions.
190
264
  */
191
- root: string;
265
+ runMatrix?: MatrixDefinition | MatrixLayer;
192
266
  }
193
267
  /**
194
- * One explicit comparison method descriptor.
268
+ * Execution context exposed to project-level `executor` implementations.
269
+ *
270
+ * Use when:
271
+ * - a project executor needs task-scoped models plus case reporter hooks
272
+ * - custom scheduling logic wants the same hook shape as `TaskRunContext`
273
+ *
274
+ * Expects:
275
+ * - `models` exposes configured model registrations for plugin helpers
276
+ * - `reporterHooks` follows `TaskRunContext['reporterHooks']`
277
+ * - `telemetry` follows `TaskRunContext['telemetry']`
278
+ * - `runtimeConcurrency` follows `TaskRunContext['runtimeConcurrency']`
195
279
  */
196
- interface CliComparisonMethodConfig {
197
- /**
198
- * Method identifier shown in compare reports.
199
- */
200
- id: string;
201
- /**
202
- * Workspace path containing this method's `vieval.config.*`.
203
- */
204
- workspace: string;
205
- /**
206
- * Project name to execute inside workspace config.
207
- */
208
- project: string;
209
- /**
210
- * Optional explicit config file path for this workspace.
211
- */
212
- configFilePath?: string;
280
+ interface CliProjectExecutorContext extends TaskExecutionContext {
281
+ reporterHooks?: TaskRunContext['reporterHooks'];
282
+ runtimeConcurrency?: TaskRunContext['runtimeConcurrency'];
283
+ telemetry?: TaskRunContext['telemetry'];
213
284
  }
214
285
  /**
215
- * Benchmark identity and shared cache namespace.
286
+ * Project mode config for `vieval run`.
216
287
  */
217
- interface CliComparisonBenchmarkConfig {
218
- /**
219
- * Benchmark identifier used in report artifacts.
220
- */
221
- id: string;
288
+ interface CliProjectModeConfig extends CliConfigBase {
289
+ comparisons?: never;
222
290
  /**
223
- * Shared cache namespace reused across method runs.
291
+ * Project list expanded by `vieval run`.
292
+ *
293
+ * @default [{ name: 'default' }]
224
294
  */
225
- sharedCaseNamespace: string;
295
+ projects?: CliProjectConfig[];
296
+ workspaces?: never;
226
297
  }
227
298
  /**
228
- * One comparison entry loaded by `vieval compare`.
299
+ * One workspace descriptor for workspace-mode configs.
229
300
  */
230
- interface CliComparisonConfig {
301
+ interface CliWorkspaceConfig {
231
302
  /**
232
- * Comparison id selected by `--comparison`.
303
+ * Workspace identifier.
233
304
  */
234
305
  id: string;
235
306
  /**
236
- * Benchmark metadata for reporting and shared cache coordination.
237
- */
238
- benchmark: CliComparisonBenchmarkConfig;
239
- /**
240
- * Optional explicit method list.
241
- */
242
- methods?: CliComparisonMethodConfig[];
243
- /**
244
- * Optional workspace glob(s) discovered relative to config directory.
245
- */
246
- includesWorkspaces?: string | string[];
247
- /**
248
- * Optional workspace exclude glob(s), also relative to config directory.
307
+ * Workspace root path.
249
308
  */
250
- excludesWorkspaces?: string | string[];
309
+ root: string;
251
310
  }
252
311
  /**
253
- * Execution context exposed to project-level `executor` implementations.
254
- *
255
- * Use when:
256
- * - a project executor needs task-scoped models plus case reporter hooks
257
- * - custom scheduling logic wants the same hook shape as `TaskRunContext`
258
- *
259
- * Expects:
260
- * - `models` exposes configured model registrations for plugin helpers
261
- * - `reporterHooks` follows `TaskRunContext['reporterHooks']`
262
- * - `telemetry` follows `TaskRunContext['telemetry']`
263
- * - `runtimeConcurrency` follows `TaskRunContext['runtimeConcurrency']`
312
+ * Workspace mode config placeholder for future workspace orchestration.
264
313
  */
265
- interface CliProjectExecutorContext extends TaskExecutionContext {
266
- reporterHooks?: TaskRunContext['reporterHooks'];
267
- telemetry?: TaskRunContext['telemetry'];
268
- runtimeConcurrency?: TaskRunContext['runtimeConcurrency'];
314
+ interface CliWorkspaceModeConfig extends CliConfigBase {
315
+ comparisons?: never;
316
+ projects?: never;
317
+ workspaces: CliWorkspaceConfig[];
269
318
  }
270
319
  /**
271
320
  * Top-level CLI config loaded from `vieval.config.*`.
272
321
  */
273
322
  interface CliConfigBase {
274
- /**
275
- * Global model definitions inherited by projects.
276
- *
277
- * @default []
278
- */
279
- models?: ModelDefinition[];
280
323
  /**
281
324
  * Global concurrency defaults inherited by projects and tasks.
282
325
  *
@@ -290,6 +333,22 @@ interface CliConfigBase {
290
333
  * @default undefined
291
334
  */
292
335
  concurrency?: CliConcurrencyConfig;
336
+ /**
337
+ * Environment variables injected into `process.env` during `vieval run`.
338
+ *
339
+ * Use when:
340
+ * - eval tasks depend on runtime env values (for example inferenceExecutor API keys)
341
+ * - config wants deterministic env values without shell-level exports
342
+ *
343
+ * @default {}
344
+ */
345
+ env?: NodeJS.ProcessEnv;
346
+ /**
347
+ * Global model definitions inherited by projects.
348
+ *
349
+ * @default []
350
+ */
351
+ models?: ModelDefinition[];
293
352
  /**
294
353
  * Global config plugins.
295
354
  *
@@ -302,16 +361,6 @@ interface CliConfigBase {
302
361
  * @default []
303
362
  */
304
363
  reporters?: VievalVitestCompatReporterReference[];
305
- /**
306
- * Environment variables injected into `process.env` during `vieval run`.
307
- *
308
- * Use when:
309
- * - eval tasks depend on runtime env values (for example inferenceExecutor API keys)
310
- * - config wants deterministic env values without shell-level exports
311
- *
312
- * @default {}
313
- */
314
- env?: NodeJS.ProcessEnv;
315
364
  /**
316
365
  * Optional reporting integrations shared by CLI run orchestration.
317
366
  *
@@ -319,48 +368,10 @@ interface CliConfigBase {
319
368
  */
320
369
  reporting?: CliReportingConfig;
321
370
  }
322
- /**
323
- * Project mode config for `vieval run`.
324
- */
325
- interface CliProjectModeConfig extends CliConfigBase {
326
- /**
327
- * Project list expanded by `vieval run`.
328
- *
329
- * @default [{ name: 'default' }]
330
- */
331
- projects?: CliProjectConfig[];
332
- comparisons?: never;
333
- workspaces?: never;
334
- }
335
- /**
336
- * Workspace mode config placeholder for future workspace orchestration.
337
- */
338
- interface CliWorkspaceModeConfig extends CliConfigBase {
339
- workspaces: CliWorkspaceConfig[];
340
- projects?: never;
341
- comparisons?: never;
342
- }
343
- /**
344
- * Comparison mode config for `vieval compare`.
345
- */
346
- interface CliComparisonModeConfig extends CliConfigBase {
347
- comparisons: CliComparisonConfig[];
348
- projects?: never;
349
- workspaces?: never;
350
- }
351
- /**
352
- * Top-level CLI config loaded from `vieval.config.*`.
353
- *
354
- * Exactly one top-level mode is allowed:
355
- * - `projects`
356
- * - `workspaces`
357
- * - `comparisons`
358
- */
359
- type CliConfig = CliProjectModeConfig | CliWorkspaceModeConfig | CliComparisonModeConfig;
360
371
  /**
361
372
  * Helper used by `vieval.config.*` for better type inference.
362
373
  */
363
- declare const defineConfig: _$c12.DefineConfig<CliConfig, _$c12.ConfigLayerMeta>;
374
+ declare const defineConfig: import("c12").DefineConfig<CliConfig, import("c12").ConfigLayerMeta>;
364
375
  /**
365
376
  * Loads `.env*` files using Vite's env resolution behavior.
366
377
  *
@@ -378,6 +389,15 @@ declare const defineConfig: _$c12.DefineConfig<CliConfig, _$c12.ConfigLayerMeta>
378
389
  declare function loadEnv(mode: string, envDir: string, prefixes?: string | string[]): NodeJS.ProcessEnv;
379
390
  //#endregion
380
391
  //#region src/dsl/task.d.ts
392
+ /**
393
+ * Per-case registration options for `caseOf`.
394
+ */
395
+ interface CaseRegistrationOptions<TInput> extends TaskExecutionPolicy {
396
+ /**
397
+ * Optional case input payload.
398
+ */
399
+ input: TInput;
400
+ }
381
401
  /**
382
402
  * Runtime context provided to a task case callback.
383
403
  */
@@ -389,26 +409,26 @@ interface CaseRunContext<TInput> extends TaskRunContext {
389
409
  inputs: TInput;
390
410
  };
391
411
  /**
392
- * Overrides one case score family with a custom normalized value.
412
+ * Emits one custom case metric into report events.
393
413
  *
394
414
  * Use when:
395
- * - one case computes a benchmark-native score that should flow into run aggregation
415
+ * - tasks need structured benchmark metadata beyond exact/judge score families
396
416
  *
397
417
  * Expects:
398
- * - `score` to stay in the `0..1` range
418
+ * - `name` to be a stable metric identifier
419
+ * - `value` to be JSON-serializable
399
420
  */
400
- score: (score: number, kind?: RunScoreKind) => void;
421
+ metric: (name: string, value: TelemetryAttributeValue) => void;
401
422
  /**
402
- * Emits one custom case metric into report events.
423
+ * Overrides one case score family with a custom normalized value.
403
424
  *
404
425
  * Use when:
405
- * - tasks need structured benchmark metadata beyond exact/judge score families
426
+ * - one case computes a benchmark-native score that should flow into run aggregation
406
427
  *
407
428
  * Expects:
408
- * - `name` to be a stable metric identifier
409
- * - `value` to be JSON-serializable
429
+ * - `score` to stay in the `0..1` range
410
430
  */
411
- metric: (name: string, value: TelemetryAttributeValue) => void;
431
+ score: (score: number, kind?: RunScoreKind) => void;
412
432
  /**
413
433
  * Cooperative abort signal for the current case execution.
414
434
  */
@@ -437,15 +457,6 @@ interface CasesFromInputsOptions extends TaskExecutionPolicy {
437
457
  */
438
458
  concurrency?: number;
439
459
  }
440
- /**
441
- * Per-case registration options for `caseOf`.
442
- */
443
- interface CaseRegistrationOptions<TInput> extends TaskExecutionPolicy {
444
- /**
445
- * Optional case input payload.
446
- */
447
- input: TInput;
448
- }
449
460
  /**
450
461
  * Builder callbacks passed into `describeTask`.
451
462
  */
@@ -466,10 +477,6 @@ interface DescribeTaskBuilder {
466
477
  * Options for `describeTask`.
467
478
  */
468
479
  interface DescribeTaskOptions extends TaskExecutionPolicy {
469
- /**
470
- * Optional description override.
471
- */
472
- description?: string;
473
480
  /**
474
481
  * Optional task-local concurrency overrides.
475
482
  *
@@ -483,6 +490,10 @@ interface DescribeTaskOptions extends TaskExecutionPolicy {
483
490
  * @default inherited from project or CLI concurrency settings
484
491
  */
485
492
  concurrency?: TaskConcurrencyConfig;
493
+ /**
494
+ * Optional description override.
495
+ */
496
+ description?: string;
486
497
  }
487
498
  /**
488
499
  * Registers one case in the currently active task scope.
@@ -500,7 +511,7 @@ declare function casesFromInputs<TInput>(namePrefix: string, inputs: readonly TI
500
511
  * - task behavior should be declared with `caseOf` and `casesFromInputs`
501
512
  * - business agent code should be imported and run from eval task files
502
513
  */
503
- declare function describeTask(name: string, build: ((builder: DescribeTaskBuilder) => void) | (() => void), options?: DescribeTaskOptions): {
514
+ declare function describeTask(name: string, build: (() => void) | ((builder: DescribeTaskBuilder) => void), options?: DescribeTaskOptions): {
504
515
  readonly description: string;
505
516
  readonly name: string;
506
517
  readonly task: {