vieval 0.0.4 → 0.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -3
- package/dist/bin/vieval.d.mts +1 -0
- package/dist/bin/vieval.mjs +33 -0
- package/dist/bin/vieval.mjs.map +1 -0
- package/dist/cli/index.d.mts +32 -0
- package/dist/cli/index.mjs +1 -2582
- package/dist/cli-sanbKtQq.mjs +2821 -0
- package/dist/cli-sanbKtQq.mjs.map +1 -0
- package/dist/config.d.mts +2 -2
- package/dist/config.mjs +16 -1
- package/dist/config.mjs.map +1 -0
- package/dist/core/assertions/index.d.mts +314 -2
- package/dist/core/assertions/index.mjs +182 -1
- package/dist/core/assertions/index.mjs.map +1 -0
- package/dist/core/inference-executors/index.d.mts +1 -1
- package/dist/core/inference-executors/index.mjs +1 -1
- package/dist/core/processors/results/index.d.mts +1 -1
- package/dist/core/runner/index.d.mts +3 -2
- package/dist/core/runner/index.mjs +637 -2
- package/dist/core/runner/index.mjs.map +1 -0
- package/dist/core/scheduler/index.d.mts +2 -0
- package/dist/core/scheduler/index.mjs +188 -0
- package/dist/core/scheduler/index.mjs.map +1 -0
- package/dist/{env-C7X81PWa.mjs → env--94B0UtW.mjs} +1 -1
- package/dist/{env-C7X81PWa.mjs.map → env--94B0UtW.mjs.map} +1 -1
- package/dist/{env-DtpjACOW.d.mts → env-BeHv_5mo.d.mts} +1 -1
- package/dist/{expect-extensions-BOzwV5EJ.mjs → expect-extensions-DCSqlneN.mjs} +2 -2
- package/dist/{expect-extensions-BOzwV5EJ.mjs.map → expect-extensions-DCSqlneN.mjs.map} +1 -1
- package/dist/expect.d.mts +10 -2
- package/dist/expect.mjs +16 -1
- package/dist/expect.mjs.map +1 -0
- package/dist/{index-BDMEAmf2.d.mts → index-DBZKkpBe.d.mts} +106 -4
- package/dist/index-fakXoZEe.d.mts +147 -0
- package/dist/index.d.mts +111 -12
- package/dist/index.mjs +216 -55
- package/dist/index.mjs.map +1 -1
- package/dist/models-DIGdOUpJ.mjs.map +1 -1
- package/dist/plugins/chat-models/index.d.mts +21 -1
- package/dist/plugins/chat-models/index.mjs +27 -1
- package/dist/plugins/chat-models/index.mjs.map +1 -1
- package/dist/queue-DsZQkZO_.mjs +21 -0
- package/dist/queue-DsZQkZO_.mjs.map +1 -0
- package/dist/{registry-CHJcTN2W.mjs → registry-CcKZqDJY.mjs} +27 -5
- package/dist/registry-CcKZqDJY.mjs.map +1 -0
- package/dist/testing/expect-extensions.d.mts +1 -1
- package/dist/testing/expect-extensions.mjs +1 -1
- package/package.json +9 -3
- package/dist/assertions-DcAjfVDA.mjs +0 -183
- package/dist/assertions-DcAjfVDA.mjs.map +0 -1
- package/dist/cli/index.mjs.map +0 -1
- package/dist/config-CHN24egi.mjs +0 -17
- package/dist/config-CHN24egi.mjs.map +0 -1
- package/dist/expect-B2vaoRVZ.d.mts +0 -10
- package/dist/expect-CaXiUkwY.mjs +0 -17
- package/dist/expect-CaXiUkwY.mjs.map +0 -1
- package/dist/index-C3gPFmcR.d.mts +0 -314
- package/dist/registry-CHJcTN2W.mjs.map +0 -1
- package/dist/runner-Dpy-eivM.mjs +0 -636
- package/dist/runner-Dpy-eivM.mjs.map +0 -1
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { ReadStream, WriteStream } from "node:fs";
|
|
2
1
|
import { Buffer } from "node:buffer";
|
|
2
|
+
import { ReadStream, WriteStream } from "node:fs";
|
|
3
3
|
|
|
4
4
|
//#region src/core/cache/types.d.ts
|
|
5
5
|
/**
|
|
@@ -477,6 +477,10 @@ interface ModelDefinition {
|
|
|
477
477
|
* Alias names that can resolve this model.
|
|
478
478
|
*/
|
|
479
479
|
aliases: string[];
|
|
480
|
+
/**
|
|
481
|
+
* Optional execution policy hints attached to this model.
|
|
482
|
+
*/
|
|
483
|
+
executionPolicy?: TaskExecutionPolicy;
|
|
480
484
|
/**
|
|
481
485
|
* Optional model-level call parameters.
|
|
482
486
|
*/
|
|
@@ -858,6 +862,61 @@ interface TaskRunOutput {
|
|
|
858
862
|
*/
|
|
859
863
|
scores: readonly RunScore[];
|
|
860
864
|
}
|
|
865
|
+
/**
|
|
866
|
+
* Execution policy applied to task and case callbacks.
|
|
867
|
+
*
|
|
868
|
+
* Use when:
|
|
869
|
+
* - one task or case should time out after a bounded duration
|
|
870
|
+
* - failures should retry within the current attempt or trigger a later full task attempt
|
|
871
|
+
*
|
|
872
|
+
* Expects:
|
|
873
|
+
* - `timeout` to be a positive integer when provided
|
|
874
|
+
* - `autoRetry` and `autoAttempt` to be non-negative integers when provided
|
|
875
|
+
*
|
|
876
|
+
* Returns:
|
|
877
|
+
* - one partial execution policy descriptor
|
|
878
|
+
*/
|
|
879
|
+
interface TaskExecutionPolicy {
|
|
880
|
+
/**
|
|
881
|
+
* Additional retries allowed within the current attempt.
|
|
882
|
+
*
|
|
883
|
+
* @default 0
|
|
884
|
+
*/
|
|
885
|
+
autoRetry?: number;
|
|
886
|
+
/**
|
|
887
|
+
* Additional full task attempts allowed after the current attempt settles.
|
|
888
|
+
*
|
|
889
|
+
* @default 0
|
|
890
|
+
*/
|
|
891
|
+
autoAttempt?: number;
|
|
892
|
+
/**
|
|
893
|
+
* Timeout in milliseconds for one case execution.
|
|
894
|
+
*/
|
|
895
|
+
timeout?: number;
|
|
896
|
+
}
|
|
897
|
+
/**
|
|
898
|
+
* Task-local concurrency metadata.
|
|
899
|
+
*
|
|
900
|
+
* Use when:
|
|
901
|
+
* - task declarations need to preserve attempt and case caps for later runtime coordination
|
|
902
|
+
* - DSL execution needs to resolve the default task-level case concurrency for registered cases
|
|
903
|
+
*
|
|
904
|
+
* Expects:
|
|
905
|
+
* - each provided value to be a positive integer chosen by the caller
|
|
906
|
+
*
|
|
907
|
+
* Returns:
|
|
908
|
+
* - one partial task-local concurrency descriptor
|
|
909
|
+
*/
|
|
910
|
+
interface TaskConcurrencyConfig {
|
|
911
|
+
/**
|
|
912
|
+
* Attempt-level concurrency cap for this task.
|
|
913
|
+
*/
|
|
914
|
+
attempt?: number;
|
|
915
|
+
/**
|
|
916
|
+
* Case-level concurrency cap for this task.
|
|
917
|
+
*/
|
|
918
|
+
case?: number;
|
|
919
|
+
}
|
|
861
920
|
/**
|
|
862
921
|
* Runtime context passed into eval task `run`.
|
|
863
922
|
*/
|
|
@@ -933,6 +992,23 @@ interface TaskRunContext {
|
|
|
933
992
|
* - hooks are best-effort observers and should not affect task scoring
|
|
934
993
|
*/
|
|
935
994
|
reporterHooks?: TaskReporterHooks;
|
|
995
|
+
/**
|
|
996
|
+
* Optional runtime scheduling overrides supplied by CLI or host execution.
|
|
997
|
+
*
|
|
998
|
+
* Use when:
|
|
999
|
+
* - run operators need to override task/case concurrency without editing eval code
|
|
1000
|
+
* - DSL task runners need to distinguish runtime flags from code defaults
|
|
1001
|
+
*
|
|
1002
|
+
* Expects:
|
|
1003
|
+
* - values are positive integers when provided
|
|
1004
|
+
*
|
|
1005
|
+
* @default undefined
|
|
1006
|
+
*/
|
|
1007
|
+
runtimeConcurrency?: TaskConcurrencyConfig;
|
|
1008
|
+
/**
|
|
1009
|
+
* Cooperative abort signal for the current execution.
|
|
1010
|
+
*/
|
|
1011
|
+
signal?: AbortSignal;
|
|
936
1012
|
}
|
|
937
1013
|
/**
|
|
938
1014
|
* Allowed terminal outcomes for one task case.
|
|
@@ -943,7 +1019,7 @@ interface TaskRunContext {
|
|
|
943
1019
|
* Expects:
|
|
944
1020
|
* - consumers treat the value as the final state for the case
|
|
945
1021
|
*/
|
|
946
|
-
type TaskCaseState = 'passed' | 'failed';
|
|
1022
|
+
type TaskCaseState = 'passed' | 'failed' | 'timeout';
|
|
947
1023
|
/**
|
|
948
1024
|
* Payload emitted when a task case starts.
|
|
949
1025
|
*
|
|
@@ -956,10 +1032,18 @@ type TaskCaseState = 'passed' | 'failed';
|
|
|
956
1032
|
* - `total` is the total number of registered cases
|
|
957
1033
|
*/
|
|
958
1034
|
interface TaskCaseReporterPayload {
|
|
1035
|
+
/**
|
|
1036
|
+
* Maximum retry count configured for this case.
|
|
1037
|
+
*/
|
|
1038
|
+
autoRetry?: number;
|
|
959
1039
|
/**
|
|
960
1040
|
* Declared case label.
|
|
961
1041
|
*/
|
|
962
1042
|
name: string;
|
|
1043
|
+
/**
|
|
1044
|
+
* Current retry attempt index, where `0` is the first try.
|
|
1045
|
+
*/
|
|
1046
|
+
retryIndex?: number;
|
|
963
1047
|
/**
|
|
964
1048
|
* Zero-based case position within the task.
|
|
965
1049
|
*/
|
|
@@ -1052,6 +1136,24 @@ interface TaskDefinition {
|
|
|
1052
1136
|
* Stable task id for diagnostics.
|
|
1053
1137
|
*/
|
|
1054
1138
|
id: string;
|
|
1139
|
+
/**
|
|
1140
|
+
* Optional task-local concurrency metadata.
|
|
1141
|
+
*
|
|
1142
|
+
* Use when:
|
|
1143
|
+
* - task declarations need to preserve task-scoped attempt/case caps for later scheduler wiring
|
|
1144
|
+
* - higher-level orchestration wants to inspect task-local concurrency without executing the task
|
|
1145
|
+
*
|
|
1146
|
+
* Expects:
|
|
1147
|
+
* - each provided value to be a positive integer chosen by the caller
|
|
1148
|
+
*
|
|
1149
|
+
* Returns:
|
|
1150
|
+
* - one partial task-local concurrency descriptor
|
|
1151
|
+
*/
|
|
1152
|
+
concurrency?: TaskConcurrencyConfig;
|
|
1153
|
+
/**
|
|
1154
|
+
* Optional task-local execution policy.
|
|
1155
|
+
*/
|
|
1156
|
+
executionPolicy?: TaskExecutionPolicy;
|
|
1055
1157
|
/**
|
|
1056
1158
|
* Optional matrix layering for this task definition.
|
|
1057
1159
|
*
|
|
@@ -1186,5 +1288,5 @@ interface ConfigHookPlugin<TConfig> {
|
|
|
1186
1288
|
configVievalResolved?: (config: TConfig) => void | Promise<void>;
|
|
1187
1289
|
}
|
|
1188
1290
|
//#endregion
|
|
1189
|
-
export {
|
|
1190
|
-
//# sourceMappingURL=index-
|
|
1291
|
+
export { ScheduledTask as $, CreateTaskExecutionContextOptions as A, createRunnerRuntimeContext as B, TaskRunContext as C, RunnerTaskState as D, RunnerExecutionError as E, resolveModelByName as F, RunScore as G, AggregatedRunResults as H, asProjectRelativePath as I, CreateRunnerScheduleOptions as J, RunScoreKind as K, collectEvalEntries as L, TaskModelSelectionOptions as M, createTaskExecutionContext as N, ScheduledTaskExecutor as O, ModelDefinition as P, RunnerMatrixSelection as Q, CreateVievalRunnerRuntimeContextOptions as R, TaskReporterHooks as S, RunScheduledTasksOptions as T, AggregatedRunSummary as U, AggregatedProviderSummary as V, RunResult as W, RunnerMatrixDefinition as X, InferenceExecutor as Y, RunnerMatrixInput as Z, TaskCaseState as _, EvalDefinition as a, normalizeCacheFilePathSegments as at, TaskExecutionPolicy as b, MatrixAxisValues as c, CacheNamespace as ct, MatrixPrimitive as d, ScheduledTaskMatrix as et, MatrixRow as f, TaskCaseReporterPayload as g, TaskCaseReporterEndPayload as h, CollectedEvalEntry as i, createFilesystemTaskCacheRuntime as it, TaskExecutionContext as j, runScheduledTasks as k, MatrixDefinition as l, TaskCacheRuntime as lt, ScopedMatrices as m, defineEval as n, createRunnerSchedule as nt, EvalModule as o, CacheFileHandle as ot, MatrixValue as p, aggregateRunResults as q, defineTask as r, CreateFilesystemTaskCacheRuntimeOptions as rt, EvalModuleMap as s, CacheFileOptions as st, ConfigHookPlugin as t, ScheduledTaskMatrixMeta as tt, MatrixLayer as u, TaskConcurrencyConfig as v, TaskRunOutput as w, TaskReporterEventPayload as x, TaskDefinition as y, RunnerRuntimeContext as z };
|
|
1292
|
+
//# sourceMappingURL=index-DBZKkpBe.d.mts.map
|
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
//#region src/core/scheduler/types.d.ts
|
|
2
|
+
/**
|
|
3
|
+
* Hierarchical scheduler scopes used by the queue runtime.
|
|
4
|
+
*
|
|
5
|
+
* Use when:
|
|
6
|
+
* - selecting which concurrency cap applies to a unit of work
|
|
7
|
+
* - ordering middleware acquisition and release hooks
|
|
8
|
+
*
|
|
9
|
+
* Expects:
|
|
10
|
+
* - values move from broad to narrow scope in this order:
|
|
11
|
+
* `workspace -> project -> task -> attempt -> case`
|
|
12
|
+
*
|
|
13
|
+
* Returns:
|
|
14
|
+
* - a string literal scope identifier
|
|
15
|
+
*/
|
|
16
|
+
type SchedulerScope = 'workspace' | 'project' | 'task' | 'attempt' | 'case';
|
|
17
|
+
/**
|
|
18
|
+
* Context carried through queue acquisition, execution, and release.
|
|
19
|
+
*
|
|
20
|
+
* Use when:
|
|
21
|
+
* - middleware needs stable identifiers for logging or instrumentation
|
|
22
|
+
* - runtime helpers need to know which hierarchical scope is being executed
|
|
23
|
+
*
|
|
24
|
+
* Expects:
|
|
25
|
+
* - `workspaceId` and `experimentId` are always present
|
|
26
|
+
* - narrower ids are only provided when the selected scope requires them
|
|
27
|
+
*
|
|
28
|
+
* Returns:
|
|
29
|
+
* - a serializable scope context object
|
|
30
|
+
*/
|
|
31
|
+
interface SchedulerScopeContext {
|
|
32
|
+
scope: SchedulerScope;
|
|
33
|
+
workspaceId: string;
|
|
34
|
+
experimentId: string;
|
|
35
|
+
projectName?: string;
|
|
36
|
+
taskId?: string;
|
|
37
|
+
attemptIndex?: number;
|
|
38
|
+
caseId?: string;
|
|
39
|
+
}
|
|
40
|
+
/**
|
|
41
|
+
* Middleware hooks wrapped around scheduler execution.
|
|
42
|
+
*
|
|
43
|
+
* Use when:
|
|
44
|
+
* - recording queue lifecycle telemetry
|
|
45
|
+
* - attaching tracing or temporary resources around queued work
|
|
46
|
+
*
|
|
47
|
+
* Expects:
|
|
48
|
+
* - implementations call `next()` exactly once to continue the pipeline
|
|
49
|
+
*
|
|
50
|
+
* Returns:
|
|
51
|
+
* - optional async acquire and release hooks
|
|
52
|
+
*/
|
|
53
|
+
interface SchedulerMiddleware {
|
|
54
|
+
onAcquire?: (context: SchedulerScopeContext, next: () => Promise<void>) => Promise<void> | void;
|
|
55
|
+
onRelease?: (context: SchedulerScopeContext, next: () => Promise<void>) => Promise<void> | void;
|
|
56
|
+
}
|
|
57
|
+
/**
|
|
58
|
+
* Per-scope concurrency limits used by the scheduler runtime.
|
|
59
|
+
*
|
|
60
|
+
* Use when:
|
|
61
|
+
* - bounding parallel work for a specific scope
|
|
62
|
+
* - disabling a scope cap by omitting its entry
|
|
63
|
+
*
|
|
64
|
+
* Expects:
|
|
65
|
+
* - values are positive integers when provided
|
|
66
|
+
*
|
|
67
|
+
* Returns:
|
|
68
|
+
* - a partial map of scheduler scope to concurrency cap
|
|
69
|
+
*/
|
|
70
|
+
interface SchedulerConcurrencyConfig {
|
|
71
|
+
workspace?: number;
|
|
72
|
+
project?: number;
|
|
73
|
+
task?: number;
|
|
74
|
+
attempt?: number;
|
|
75
|
+
case?: number;
|
|
76
|
+
}
|
|
77
|
+
/**
|
|
78
|
+
* Options accepted by {@link createSchedulerRuntime}.
|
|
79
|
+
*
|
|
80
|
+
* Use when:
|
|
81
|
+
* - constructing a scheduler runtime with queue limits or middleware
|
|
82
|
+
*
|
|
83
|
+
* Expects:
|
|
84
|
+
* - omitted configuration falls back to unbounded execution for that concern
|
|
85
|
+
*
|
|
86
|
+
* Returns:
|
|
87
|
+
* - queue and middleware configuration for the runtime
|
|
88
|
+
*/
|
|
89
|
+
interface CreateSchedulerRuntimeOptions {
|
|
90
|
+
concurrency?: SchedulerConcurrencyConfig;
|
|
91
|
+
middleware?: SchedulerMiddleware[];
|
|
92
|
+
}
|
|
93
|
+
/**
|
|
94
|
+
* Runtime API used to execute case-level work through scheduler policies.
|
|
95
|
+
*
|
|
96
|
+
* Use when:
|
|
97
|
+
* - the runner needs to enqueue case execution under middleware and queue caps
|
|
98
|
+
*
|
|
99
|
+
* Expects:
|
|
100
|
+
* - `runCase` receives a case context and a callback that performs the work
|
|
101
|
+
*
|
|
102
|
+
* Returns:
|
|
103
|
+
* - a promise that resolves with the callback result once all guards release
|
|
104
|
+
*/
|
|
105
|
+
interface SchedulerRuntime {
|
|
106
|
+
runCase: <T>(context: SchedulerScopeContext, execute: () => Promise<T>) => Promise<T>;
|
|
107
|
+
}
|
|
108
|
+
//#endregion
|
|
109
|
+
//#region src/core/scheduler/runtime.d.ts
|
|
110
|
+
/**
|
|
111
|
+
* Creates the core scheduler runtime used to serialize work by scope.
|
|
112
|
+
*
|
|
113
|
+
* Call stack:
|
|
114
|
+
*
|
|
115
|
+
* {@link createSchedulerRuntime}
|
|
116
|
+
* -> `createRuntimeQueues`
|
|
117
|
+
* -> `runtime.runCase(context, execute)`
|
|
118
|
+
* -> `runWithQueues`
|
|
119
|
+
* -> `runAcquireMiddleware`
|
|
120
|
+
* -> `execute`
|
|
121
|
+
* -> `runReleaseMiddleware`
|
|
122
|
+
*
|
|
123
|
+
* Use when:
|
|
124
|
+
* - runner code needs concurrency caps for queued case execution
|
|
125
|
+
* - middleware should wrap work with acquire/release lifecycle hooks
|
|
126
|
+
*
|
|
127
|
+
* Expects:
|
|
128
|
+
* - middleware is ordered from outermost to innermost concern
|
|
129
|
+
* - concurrency caps are positive integers when provided
|
|
130
|
+
*
|
|
131
|
+
* Returns:
|
|
132
|
+
* - a scheduler runtime with case execution support
|
|
133
|
+
*/
|
|
134
|
+
declare function createSchedulerRuntime(options?: CreateSchedulerRuntimeOptions): SchedulerRuntime;
|
|
135
|
+
/**
|
|
136
|
+
* Resolves the scheduler scopes that apply to a context.
|
|
137
|
+
*
|
|
138
|
+
* Before:
|
|
139
|
+
* - `{ scope: 'case', workspaceId: 'ws', experimentId: 'exp', caseId: 'case-1' }`
|
|
140
|
+
*
|
|
141
|
+
* After:
|
|
142
|
+
* - `['workspace', 'project', 'task', 'attempt', 'case']` up to the requested scope
|
|
143
|
+
*/
|
|
144
|
+
declare function getActiveScopes(context: SchedulerScopeContext): SchedulerScope[];
|
|
145
|
+
//#endregion
|
|
146
|
+
export { SchedulerMiddleware as a, SchedulerScopeContext as c, SchedulerConcurrencyConfig as i, getActiveScopes as n, SchedulerRuntime as o, CreateSchedulerRuntimeOptions as r, SchedulerScope as s, createSchedulerRuntime as t };
|
|
147
|
+
//# sourceMappingURL=index-fakXoZEe.d.mts.map
|
package/dist/index.d.mts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import { a as requiredEnvFrom } from "./env-
|
|
3
|
-
import {
|
|
1
|
+
import { $ as ScheduledTask, C as TaskRunContext, K as RunScoreKind, P as ModelDefinition, W as RunResult, Y as InferenceExecutor, b as TaskExecutionPolicy, j as TaskExecutionContext, l as MatrixDefinition, t as ConfigHookPlugin, u as MatrixLayer, v as TaskConcurrencyConfig, w as TaskRunOutput } from "./index-DBZKkpBe.mjs";
|
|
2
|
+
import { a as requiredEnvFrom } from "./env-BeHv_5mo.mjs";
|
|
3
|
+
import { expect } from "./expect.mjs";
|
|
4
4
|
import * as _$c12 from "c12";
|
|
5
5
|
|
|
6
6
|
//#region src/cli/reporters/vitest-compat-reporter.d.ts
|
|
@@ -66,6 +66,41 @@ type VievalVitestCompatReporterReference = VievalVitestCompatReporterValue | rea
|
|
|
66
66
|
* CLI plugin shape bound to the full CLI config object.
|
|
67
67
|
*/
|
|
68
68
|
type CliConfigPlugin = ConfigHookPlugin<CliConfig>;
|
|
69
|
+
/**
|
|
70
|
+
* Concurrency limits that can be declared in CLI-facing config.
|
|
71
|
+
*
|
|
72
|
+
* Use when:
|
|
73
|
+
* - the CLI needs independent caps for workspace, project, task, attempt, or case scheduling scopes
|
|
74
|
+
* - config authors want to define concurrency without wiring runtime execution yet
|
|
75
|
+
*
|
|
76
|
+
* Expects:
|
|
77
|
+
* - each provided value to be a positive integer chosen by the caller
|
|
78
|
+
*
|
|
79
|
+
* Returns:
|
|
80
|
+
* - one partial concurrency descriptor keyed by scheduling scope
|
|
81
|
+
*/
|
|
82
|
+
interface CliConcurrencyConfig {
|
|
83
|
+
/**
|
|
84
|
+
* Workspace-level concurrency cap.
|
|
85
|
+
*/
|
|
86
|
+
workspace?: number;
|
|
87
|
+
/**
|
|
88
|
+
* Project-level concurrency cap.
|
|
89
|
+
*/
|
|
90
|
+
project?: number;
|
|
91
|
+
/**
|
|
92
|
+
* Task-level concurrency cap.
|
|
93
|
+
*/
|
|
94
|
+
task?: number;
|
|
95
|
+
/**
|
|
96
|
+
* Attempt-level concurrency cap.
|
|
97
|
+
*/
|
|
98
|
+
attempt?: number;
|
|
99
|
+
/**
|
|
100
|
+
* Case-level concurrency cap.
|
|
101
|
+
*/
|
|
102
|
+
case?: number;
|
|
103
|
+
}
|
|
69
104
|
/**
|
|
70
105
|
* Defines one project block for `vieval run`.
|
|
71
106
|
*/
|
|
@@ -115,6 +150,12 @@ interface CliProjectConfig {
|
|
|
115
150
|
* Optional eval-time matrix dimensions.
|
|
116
151
|
*/
|
|
117
152
|
evalMatrix?: MatrixDefinition | MatrixLayer;
|
|
153
|
+
/**
|
|
154
|
+
* Optional project-scoped concurrency overrides.
|
|
155
|
+
*
|
|
156
|
+
* @default inherited from top-level or CLI execution settings
|
|
157
|
+
*/
|
|
158
|
+
concurrency?: Omit<CliConcurrencyConfig, 'workspace'>;
|
|
118
159
|
/**
|
|
119
160
|
* Optional task executor.
|
|
120
161
|
*
|
|
@@ -218,9 +259,11 @@ interface CliComparisonConfig {
|
|
|
218
259
|
* Expects:
|
|
219
260
|
* - `model` resolves configured models for the current task
|
|
220
261
|
* - `reporterHooks` follows `TaskRunContext['reporterHooks']`
|
|
262
|
+
* - `runtimeConcurrency` follows `TaskRunContext['runtimeConcurrency']`
|
|
221
263
|
*/
|
|
222
264
|
interface CliProjectExecutorContext extends TaskExecutionContext {
|
|
223
265
|
reporterHooks?: TaskRunContext['reporterHooks'];
|
|
266
|
+
runtimeConcurrency?: TaskRunContext['runtimeConcurrency'];
|
|
224
267
|
}
|
|
225
268
|
/**
|
|
226
269
|
* Top-level CLI config loaded from `vieval.config.*`.
|
|
@@ -232,6 +275,19 @@ interface CliConfigBase {
|
|
|
232
275
|
* @default []
|
|
233
276
|
*/
|
|
234
277
|
models?: ModelDefinition[];
|
|
278
|
+
/**
|
|
279
|
+
* Global concurrency defaults inherited by projects and tasks.
|
|
280
|
+
*
|
|
281
|
+
* Use when:
|
|
282
|
+
* - config authors want one shared concurrency policy across workspace, project, task, attempt, and case scopes
|
|
283
|
+
* - project-local overrides should start from a top-level baseline
|
|
284
|
+
*
|
|
285
|
+
* Expects:
|
|
286
|
+
* - each provided value to be a positive integer chosen by the caller
|
|
287
|
+
*
|
|
288
|
+
* @default undefined
|
|
289
|
+
*/
|
|
290
|
+
concurrency?: CliConcurrencyConfig;
|
|
235
291
|
/**
|
|
236
292
|
* Global config plugins.
|
|
237
293
|
*
|
|
@@ -345,11 +401,43 @@ interface CaseRunContext<TInput> extends TaskRunContext {
|
|
|
345
401
|
* - `value` to be JSON-serializable
|
|
346
402
|
*/
|
|
347
403
|
metric: (name: string, value: boolean | number | string | null) => void;
|
|
404
|
+
/**
|
|
405
|
+
* Cooperative abort signal for the current case execution.
|
|
406
|
+
*/
|
|
407
|
+
signal: AbortSignal;
|
|
348
408
|
}
|
|
349
409
|
/**
|
|
350
410
|
* Callback for one task case.
|
|
351
411
|
*/
|
|
352
412
|
type CaseRunner<TInput> = (context: CaseRunContext<TInput>) => Promise<void> | void;
|
|
413
|
+
/**
|
|
414
|
+
* Per-group options for `casesFromInputs`.
|
|
415
|
+
*
|
|
416
|
+
* Use when:
|
|
417
|
+
* - one generated case group should run with a lower case concurrency than the task default
|
|
418
|
+
* - a task should keep a broader task-level cap while one expensive case family stays bounded
|
|
419
|
+
*
|
|
420
|
+
* Expects:
|
|
421
|
+
* - `concurrency` to be a positive integer when provided
|
|
422
|
+
*
|
|
423
|
+
* Returns:
|
|
424
|
+
* - one partial case-group execution descriptor
|
|
425
|
+
*/
|
|
426
|
+
interface CasesFromInputsOptions extends TaskExecutionPolicy {
|
|
427
|
+
/**
|
|
428
|
+
* Case-level concurrency cap for cases registered by one `casesFromInputs(...)` call.
|
|
429
|
+
*/
|
|
430
|
+
concurrency?: number;
|
|
431
|
+
}
|
|
432
|
+
/**
|
|
433
|
+
* Per-case registration options for `caseOf`.
|
|
434
|
+
*/
|
|
435
|
+
interface CaseRegistrationOptions<TInput> extends TaskExecutionPolicy {
|
|
436
|
+
/**
|
|
437
|
+
* Optional case input payload.
|
|
438
|
+
*/
|
|
439
|
+
input: TInput;
|
|
440
|
+
}
|
|
353
441
|
/**
|
|
354
442
|
* Builder callbacks passed into `describeTask`.
|
|
355
443
|
*/
|
|
@@ -359,35 +447,44 @@ interface DescribeTaskBuilder {
|
|
|
359
447
|
*/
|
|
360
448
|
caseOf: {
|
|
361
449
|
(name: string, run: CaseRunner<undefined>): void;
|
|
362
|
-
<TInput>(name: string, run: CaseRunner<TInput>, options:
|
|
363
|
-
input: TInput;
|
|
364
|
-
}): void;
|
|
450
|
+
<TInput>(name: string, run: CaseRunner<TInput>, options: CaseRegistrationOptions<TInput>): void;
|
|
365
451
|
};
|
|
366
452
|
/**
|
|
367
453
|
* Registers multiple cases from input list.
|
|
368
454
|
*/
|
|
369
|
-
casesFromInputs: <TInput>(namePrefix: string, inputs: readonly TInput[], run: CaseRunner<TInput
|
|
455
|
+
casesFromInputs: <TInput>(namePrefix: string, inputs: readonly TInput[], run: CaseRunner<TInput>, options?: CasesFromInputsOptions) => void;
|
|
370
456
|
}
|
|
371
457
|
/**
|
|
372
458
|
* Options for `describeTask`.
|
|
373
459
|
*/
|
|
374
|
-
interface DescribeTaskOptions {
|
|
460
|
+
interface DescribeTaskOptions extends TaskExecutionPolicy {
|
|
375
461
|
/**
|
|
376
462
|
* Optional description override.
|
|
377
463
|
*/
|
|
378
464
|
description?: string;
|
|
465
|
+
/**
|
|
466
|
+
* Optional task-local concurrency overrides.
|
|
467
|
+
*
|
|
468
|
+
* Use when:
|
|
469
|
+
* - one task should cap attempt fan-out independently from the surrounding project
|
|
470
|
+
* - one task should cap case fan-out without changing global scheduling defaults
|
|
471
|
+
*
|
|
472
|
+
* Expects:
|
|
473
|
+
* - each provided value to be a positive integer
|
|
474
|
+
*
|
|
475
|
+
* @default inherited from project or CLI concurrency settings
|
|
476
|
+
*/
|
|
477
|
+
concurrency?: TaskConcurrencyConfig;
|
|
379
478
|
}
|
|
380
479
|
/**
|
|
381
480
|
* Registers one case in the currently active task scope.
|
|
382
481
|
*/
|
|
383
482
|
declare function caseOf(name: string, run: CaseRunner<undefined>): void;
|
|
384
|
-
declare function caseOf<TInput>(name: string, run: CaseRunner<TInput>, options:
|
|
385
|
-
input: TInput;
|
|
386
|
-
}): void;
|
|
483
|
+
declare function caseOf<TInput>(name: string, run: CaseRunner<TInput>, options: CaseRegistrationOptions<TInput>): void;
|
|
387
484
|
/**
|
|
388
485
|
* Registers multiple cases in the currently active task scope.
|
|
389
486
|
*/
|
|
390
|
-
declare function casesFromInputs<TInput>(namePrefix: string, inputs: readonly TInput[], run: CaseRunner<TInput
|
|
487
|
+
declare function casesFromInputs<TInput>(namePrefix: string, inputs: readonly TInput[], run: CaseRunner<TInput>, options?: CasesFromInputsOptions): void;
|
|
391
488
|
/**
|
|
392
489
|
* Defines one eval task with task/case semantics similar to Vitest.
|
|
393
490
|
*
|
|
@@ -399,6 +496,8 @@ declare function describeTask(name: string, build: ((builder: DescribeTaskBuilde
|
|
|
399
496
|
readonly description: string;
|
|
400
497
|
readonly name: string;
|
|
401
498
|
readonly task: {
|
|
499
|
+
readonly concurrency: TaskConcurrencyConfig | undefined;
|
|
500
|
+
readonly executionPolicy: TaskExecutionPolicy | undefined;
|
|
402
501
|
readonly id: string;
|
|
403
502
|
readonly run: (context: TaskRunContext) => Promise<TaskRunOutput>;
|
|
404
503
|
};
|