vieval 0.0.10 → 0.0.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +31 -31
- package/dist/bin/vieval.mjs +1 -1
- package/dist/bin/vieval.mjs.map +1 -1
- package/dist/cli/index.d.mts +1 -1
- package/dist/cli/index.mjs +1 -1
- package/dist/{cli-DTDgaqeI.mjs → cli-uzS81IPd.mjs} +1483 -1483
- package/dist/cli-uzS81IPd.mjs.map +1 -0
- package/dist/config.d.mts +1 -1
- package/dist/config.mjs +1 -1
- package/dist/config.mjs.map +1 -1
- package/dist/core/assertions/index.d.mts +156 -156
- package/dist/core/assertions/index.mjs +82 -82
- package/dist/core/assertions/index.mjs.map +1 -1
- package/dist/core/inference-executors/index.d.mts +37 -37
- package/dist/core/inference-executors/index.mjs +54 -53
- package/dist/core/inference-executors/index.mjs.map +1 -1
- package/dist/core/processors/results/index.d.mts +18 -18
- package/dist/core/processors/results/index.mjs.map +1 -1
- package/dist/core/runner/index.d.mts +2 -2
- package/dist/core/runner/index.mjs +259 -259
- package/dist/core/runner/index.mjs.map +1 -1
- package/dist/core/scheduler/index.d.mts +1 -1
- package/dist/core/scheduler/index.mjs +65 -65
- package/dist/core/scheduler/index.mjs.map +1 -1
- package/dist/{env-DfWZy_n4.d.mts → env-Br6jaWGL.d.mts} +9 -9
- package/dist/{env-nV5rVErX.mjs → env-egxaJtNn.mjs} +8 -8
- package/dist/env-egxaJtNn.mjs.map +1 -0
- package/dist/{expect-extensions-DCSqlneN.mjs → expect-extensions-BKdEPt3h.mjs} +46 -46
- package/dist/expect-extensions-BKdEPt3h.mjs.map +1 -0
- package/dist/expect.d.mts +1 -3
- package/dist/expect.mjs +1 -1
- package/dist/expect.mjs.map +1 -1
- package/dist/{index-D_aMeWqO.d.mts → index-BLIlhiWT.d.mts} +565 -565
- package/dist/{index-Bg0atWBF.d.mts → index-CIaJClcC.d.mts} +48 -48
- package/dist/index.d.mts +208 -197
- package/dist/index.mjs +148 -148
- package/dist/index.mjs.map +1 -1
- package/dist/{models-pBSRUZhY.mjs → models-CaCOUPZw.mjs} +1 -1
- package/dist/{models-pBSRUZhY.mjs.map → models-CaCOUPZw.mjs.map} +1 -1
- package/dist/plugins/chat-models/index.d.mts +279 -279
- package/dist/plugins/chat-models/index.mjs +360 -360
- package/dist/plugins/chat-models/index.mjs.map +1 -1
- package/dist/{queue-DsZQkZO_.mjs → queue-BL86z2W_.mjs} +1 -1
- package/dist/{queue-DsZQkZO_.mjs.map → queue-BL86z2W_.mjs.map} +1 -1
- package/dist/{registry-DMnwE_mY.mjs → registry-BK7k6X81.mjs} +294 -294
- package/dist/registry-BK7k6X81.mjs.map +1 -0
- package/dist/testing/expect-extensions.d.mts +27 -27
- package/dist/testing/expect-extensions.mjs +1 -1
- package/package.json +12 -12
- package/dist/cli-DTDgaqeI.mjs.map +0 -1
- package/dist/env-nV5rVErX.mjs.map +0 -1
- package/dist/expect-extensions-DCSqlneN.mjs.map +0 -1
- package/dist/registry-DMnwE_mY.mjs.map +0 -1
package/dist/index.mjs
CHANGED
|
@@ -1,33 +1,10 @@
|
|
|
1
|
-
import { i as registerEvalDefinition, l as loadEnv, o as createNoopTelemetryRuntime, s as defineConfig } from "./registry-
|
|
2
|
-
import { t as createSchedulerQueue } from "./queue-
|
|
3
|
-
import { n as requiredEnvFrom } from "./env-nV5rVErX.mjs";
|
|
1
|
+
import { i as registerEvalDefinition, l as loadEnv, o as createNoopTelemetryRuntime, s as defineConfig } from "./registry-BK7k6X81.mjs";
|
|
2
|
+
import { t as createSchedulerQueue } from "./queue-BL86z2W_.mjs";
|
|
4
3
|
import { defineEval, defineTask } from "./config.mjs";
|
|
4
|
+
import { n as requiredEnvFrom } from "./env-egxaJtNn.mjs";
|
|
5
5
|
import { expect } from "./expect.mjs";
|
|
6
6
|
import { errorMessageFrom, sleep } from "@moeru/std";
|
|
7
7
|
//#region src/dsl/task.ts
|
|
8
|
-
function cloneCaseMatrix(matrix) {
|
|
9
|
-
return {
|
|
10
|
-
eval: { ...matrix.eval },
|
|
11
|
-
meta: { ...matrix.meta },
|
|
12
|
-
run: { ...matrix.run }
|
|
13
|
-
};
|
|
14
|
-
}
|
|
15
|
-
function createTaskCaseReporterId(index, name) {
|
|
16
|
-
return `${index}:${encodeURIComponent(name)}`;
|
|
17
|
-
}
|
|
18
|
-
function isTelemetryAttributeScalar(value) {
|
|
19
|
-
return typeof value === "boolean" || typeof value === "number" || typeof value === "string";
|
|
20
|
-
}
|
|
21
|
-
function isTelemetryAttributeArray(value) {
|
|
22
|
-
return value.every(isTelemetryAttributeScalar);
|
|
23
|
-
}
|
|
24
|
-
function canAttachMetricAsAttribute(value) {
|
|
25
|
-
if (isTelemetryAttributeScalar(value)) return true;
|
|
26
|
-
return Array.isArray(value) && isTelemetryAttributeArray(value);
|
|
27
|
-
}
|
|
28
|
-
function assertValidScore(score) {
|
|
29
|
-
if (!Number.isFinite(score) || score < 0 || score > 1) throw new Error(`Case score must be a finite number in range 0..1, got "${score}".`);
|
|
30
|
-
}
|
|
31
8
|
function assertNonNegativeInteger(value, label) {
|
|
32
9
|
if (!Number.isFinite(value) || !Number.isInteger(value) || value < 0) throw new Error(`Invalid ${label}: ${String(value)}`);
|
|
33
10
|
}
|
|
@@ -37,33 +14,128 @@ function assertNonNegativeNumber(value, label) {
|
|
|
37
14
|
function assertPositiveInteger(value, label) {
|
|
38
15
|
if (!Number.isFinite(value) || !Number.isInteger(value) || value <= 0) throw new Error(`Invalid ${label}: ${String(value)}`);
|
|
39
16
|
}
|
|
17
|
+
function assertValidScore(score) {
|
|
18
|
+
if (!Number.isFinite(score) || score < 0 || score > 1) throw new Error(`Case score must be a finite number in range 0..1, got "${score}".`);
|
|
19
|
+
}
|
|
40
20
|
function autoRetryDelayMs(retryIndex) {
|
|
41
21
|
return 500 * 2 ** (retryIndex - 1);
|
|
42
22
|
}
|
|
43
|
-
function
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
return typeof delay === "number" ? delay : delay(retryIndex);
|
|
23
|
+
function canAttachMetricAsAttribute(value) {
|
|
24
|
+
if (isTelemetryAttributeScalar(value)) return true;
|
|
25
|
+
return Array.isArray(value) && isTelemetryAttributeArray(value);
|
|
47
26
|
}
|
|
48
|
-
function
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
27
|
+
function cloneCaseMatrix(matrix) {
|
|
28
|
+
return {
|
|
29
|
+
eval: { ...matrix.eval },
|
|
30
|
+
meta: { ...matrix.meta },
|
|
31
|
+
run: { ...matrix.run }
|
|
32
|
+
};
|
|
33
|
+
}
|
|
34
|
+
function collectCaseOutcomeScores(outcome, scoreBucketsByKind) {
|
|
35
|
+
if (outcome.state !== "passed") {
|
|
36
|
+
scoreBucketsByKind.exact.push(0);
|
|
37
|
+
return;
|
|
38
|
+
}
|
|
39
|
+
if (outcome.scoresByKind.size === 0) {
|
|
40
|
+
scoreBucketsByKind.exact.push(1);
|
|
41
|
+
return;
|
|
42
|
+
}
|
|
43
|
+
scoreBucketsByKind.exact.push(outcome.scoresByKind.get("exact") ?? 1);
|
|
44
|
+
const judgeScore = outcome.scoresByKind.get("judge");
|
|
45
|
+
if (judgeScore != null) scoreBucketsByKind.judge.push(judgeScore);
|
|
46
|
+
}
|
|
47
|
+
function createCaseBuilder(registeredCases) {
|
|
48
|
+
function registerCase(name, run, options) {
|
|
49
|
+
registeredCases.push({
|
|
50
|
+
executionPolicy: normalizeExecutionPolicy(options, "task case"),
|
|
51
|
+
input: options?.input,
|
|
52
|
+
name,
|
|
53
|
+
run
|
|
54
|
+
});
|
|
55
|
+
}
|
|
56
|
+
return {
|
|
57
|
+
caseOf: registerCase,
|
|
58
|
+
casesFromInputs(namePrefix, inputs, run, options) {
|
|
59
|
+
const queueKey = options?.concurrency == null ? void 0 : {};
|
|
60
|
+
inputs.forEach((input, index) => {
|
|
61
|
+
registeredCases.push({
|
|
62
|
+
concurrency: options?.concurrency,
|
|
63
|
+
executionPolicy: normalizeExecutionPolicy(options, "casesFromInputs"),
|
|
64
|
+
input,
|
|
65
|
+
name: `${namePrefix} #${index + 1}`,
|
|
66
|
+
queueKey,
|
|
67
|
+
run
|
|
68
|
+
});
|
|
69
|
+
});
|
|
70
|
+
}
|
|
71
|
+
};
|
|
72
|
+
}
|
|
73
|
+
function createCaseTimeoutError(timeout) {
|
|
74
|
+
const error = /* @__PURE__ */ new Error(`Case timed out after ${timeout}ms.`);
|
|
75
|
+
error.name = "TimeoutError";
|
|
76
|
+
return error;
|
|
77
|
+
}
|
|
78
|
+
function createTaskCaseReporterId(index, name) {
|
|
79
|
+
return `${index}:${encodeURIComponent(name)}`;
|
|
52
80
|
}
|
|
53
81
|
function emitCaseEnd(hooks, payload) {
|
|
54
82
|
try {
|
|
55
83
|
hooks?.onCaseEnd?.(payload);
|
|
56
84
|
} catch {}
|
|
57
85
|
}
|
|
86
|
+
function emitCaseOutcome(context, taskCase, outcome, index, totalCases) {
|
|
87
|
+
emitCaseEnd(context.reporterHooks, {
|
|
88
|
+
...outcome.errorMessage == null ? {} : { errorMessage: outcome.errorMessage },
|
|
89
|
+
index,
|
|
90
|
+
...outcome.output === void 0 ? {} : { output: outcome.output },
|
|
91
|
+
name: taskCase.name,
|
|
92
|
+
state: outcome.state,
|
|
93
|
+
total: totalCases
|
|
94
|
+
});
|
|
95
|
+
}
|
|
96
|
+
function emitCaseStart(hooks, payload) {
|
|
97
|
+
try {
|
|
98
|
+
hooks?.onCaseStart?.(payload);
|
|
99
|
+
} catch {}
|
|
100
|
+
}
|
|
58
101
|
function emitReporterEvent(hooks, payload) {
|
|
59
102
|
try {
|
|
60
103
|
hooks?.onEvent?.(payload);
|
|
61
104
|
} catch {}
|
|
62
105
|
}
|
|
63
|
-
function
|
|
64
|
-
const
|
|
65
|
-
|
|
66
|
-
|
|
106
|
+
async function executeRegisteredCase(context, taskCase, index, totalCases, taskExecutionPolicy) {
|
|
107
|
+
const resolvedPolicy = resolveCaseExecutionPolicy(taskCase, taskExecutionPolicy);
|
|
108
|
+
let lastOutcome;
|
|
109
|
+
for (let retryIndex = 0; retryIndex <= resolvedPolicy.autoRetry; retryIndex += 1) {
|
|
110
|
+
if (retryIndex > 0) {
|
|
111
|
+
const retryDelayMs = resolveAutoRetryDelay(resolvedPolicy, retryIndex);
|
|
112
|
+
assertNonNegativeNumber(retryDelayMs, "autoRetryDelay result");
|
|
113
|
+
if (retryDelayMs > 0) await sleep(retryDelayMs);
|
|
114
|
+
}
|
|
115
|
+
emitCaseStart(context.reporterHooks, {
|
|
116
|
+
...resolvedPolicy.autoRetry > 0 ? {
|
|
117
|
+
autoRetry: resolvedPolicy.autoRetry,
|
|
118
|
+
retryIndex
|
|
119
|
+
} : {},
|
|
120
|
+
index,
|
|
121
|
+
...taskCase.input === void 0 ? {} : { input: taskCase.input },
|
|
122
|
+
name: taskCase.name,
|
|
123
|
+
total: totalCases
|
|
124
|
+
});
|
|
125
|
+
lastOutcome = await runCaseOnce(context, taskCase, index, resolvedPolicy.timeout);
|
|
126
|
+
if (lastOutcome.state === "passed") return lastOutcome;
|
|
127
|
+
}
|
|
128
|
+
return lastOutcome ?? {
|
|
129
|
+
errorMessage: "Unknown case failure.",
|
|
130
|
+
scoresByKind: /* @__PURE__ */ new Map(),
|
|
131
|
+
state: "failed"
|
|
132
|
+
};
|
|
133
|
+
}
|
|
134
|
+
function isTelemetryAttributeArray(value) {
|
|
135
|
+
return value.every(isTelemetryAttributeScalar);
|
|
136
|
+
}
|
|
137
|
+
function isTelemetryAttributeScalar(value) {
|
|
138
|
+
return typeof value === "boolean" || typeof value === "number" || typeof value === "string";
|
|
67
139
|
}
|
|
68
140
|
function normalizeExecutionPolicy(policy, label) {
|
|
69
141
|
if (policy == null) return;
|
|
@@ -79,6 +151,11 @@ function normalizeExecutionPolicy(policy, label) {
|
|
|
79
151
|
};
|
|
80
152
|
return Object.values(normalized).some((value) => value != null) ? normalized : void 0;
|
|
81
153
|
}
|
|
154
|
+
function resolveAutoRetryDelay(policy, retryIndex) {
|
|
155
|
+
const delay = policy.autoRetryDelay;
|
|
156
|
+
if (delay == null) return autoRetryDelayMs(retryIndex);
|
|
157
|
+
return typeof delay === "number" ? delay : delay(retryIndex);
|
|
158
|
+
}
|
|
82
159
|
function resolveCaseExecutionPolicy(taskCase, taskExecutionPolicy) {
|
|
83
160
|
return {
|
|
84
161
|
autoAttempt: taskCase.executionPolicy?.autoAttempt ?? taskExecutionPolicy?.autoAttempt ?? 0,
|
|
@@ -178,97 +255,7 @@ async function runCaseOnce(context, taskCase, index, timeout) {
|
|
|
178
255
|
if (timeoutHandle != null) clearTimeout(timeoutHandle);
|
|
179
256
|
}
|
|
180
257
|
}
|
|
181
|
-
async function executeRegisteredCase(context, taskCase, index, totalCases, taskExecutionPolicy) {
|
|
182
|
-
const resolvedPolicy = resolveCaseExecutionPolicy(taskCase, taskExecutionPolicy);
|
|
183
|
-
let lastOutcome;
|
|
184
|
-
for (let retryIndex = 0; retryIndex <= resolvedPolicy.autoRetry; retryIndex += 1) {
|
|
185
|
-
if (retryIndex > 0) {
|
|
186
|
-
const retryDelayMs = resolveAutoRetryDelay(resolvedPolicy, retryIndex);
|
|
187
|
-
assertNonNegativeNumber(retryDelayMs, "autoRetryDelay result");
|
|
188
|
-
if (retryDelayMs > 0) await sleep(retryDelayMs);
|
|
189
|
-
}
|
|
190
|
-
emitCaseStart(context.reporterHooks, {
|
|
191
|
-
...resolvedPolicy.autoRetry > 0 ? {
|
|
192
|
-
autoRetry: resolvedPolicy.autoRetry,
|
|
193
|
-
retryIndex
|
|
194
|
-
} : {},
|
|
195
|
-
index,
|
|
196
|
-
...taskCase.input === void 0 ? {} : { input: taskCase.input },
|
|
197
|
-
name: taskCase.name,
|
|
198
|
-
total: totalCases
|
|
199
|
-
});
|
|
200
|
-
lastOutcome = await runCaseOnce(context, taskCase, index, resolvedPolicy.timeout);
|
|
201
|
-
if (lastOutcome.state === "passed") return lastOutcome;
|
|
202
|
-
}
|
|
203
|
-
return lastOutcome ?? {
|
|
204
|
-
errorMessage: "Unknown case failure.",
|
|
205
|
-
scoresByKind: /* @__PURE__ */ new Map(),
|
|
206
|
-
state: "failed"
|
|
207
|
-
};
|
|
208
|
-
}
|
|
209
|
-
function collectCaseOutcomeScores(outcome, scoreBucketsByKind) {
|
|
210
|
-
if (outcome.state !== "passed") {
|
|
211
|
-
scoreBucketsByKind.exact.push(0);
|
|
212
|
-
return;
|
|
213
|
-
}
|
|
214
|
-
if (outcome.scoresByKind.size === 0) {
|
|
215
|
-
scoreBucketsByKind.exact.push(1);
|
|
216
|
-
return;
|
|
217
|
-
}
|
|
218
|
-
scoreBucketsByKind.exact.push(outcome.scoresByKind.get("exact") ?? 1);
|
|
219
|
-
const judgeScore = outcome.scoresByKind.get("judge");
|
|
220
|
-
if (judgeScore != null) scoreBucketsByKind.judge.push(judgeScore);
|
|
221
|
-
}
|
|
222
|
-
function emitCaseOutcome(context, taskCase, outcome, index, totalCases) {
|
|
223
|
-
emitCaseEnd(context.reporterHooks, {
|
|
224
|
-
...outcome.errorMessage == null ? {} : { errorMessage: outcome.errorMessage },
|
|
225
|
-
index,
|
|
226
|
-
...outcome.output === void 0 ? {} : { output: outcome.output },
|
|
227
|
-
state: outcome.state,
|
|
228
|
-
name: taskCase.name,
|
|
229
|
-
total: totalCases
|
|
230
|
-
});
|
|
231
|
-
}
|
|
232
|
-
function createCaseBuilder(registeredCases) {
|
|
233
|
-
function registerCase(name, run, options) {
|
|
234
|
-
registeredCases.push({
|
|
235
|
-
executionPolicy: normalizeExecutionPolicy(options, "task case"),
|
|
236
|
-
input: options?.input,
|
|
237
|
-
name,
|
|
238
|
-
run
|
|
239
|
-
});
|
|
240
|
-
}
|
|
241
|
-
return {
|
|
242
|
-
caseOf: registerCase,
|
|
243
|
-
casesFromInputs(namePrefix, inputs, run, options) {
|
|
244
|
-
const queueKey = options?.concurrency == null ? void 0 : {};
|
|
245
|
-
inputs.forEach((input, index) => {
|
|
246
|
-
registeredCases.push({
|
|
247
|
-
concurrency: options?.concurrency,
|
|
248
|
-
executionPolicy: normalizeExecutionPolicy(options, "casesFromInputs"),
|
|
249
|
-
input,
|
|
250
|
-
name: `${namePrefix} #${index + 1}`,
|
|
251
|
-
queueKey,
|
|
252
|
-
run
|
|
253
|
-
});
|
|
254
|
-
});
|
|
255
|
-
}
|
|
256
|
-
};
|
|
257
|
-
}
|
|
258
258
|
let activeCasesStack = [];
|
|
259
|
-
function withActiveCases(cases, callback) {
|
|
260
|
-
activeCasesStack = [...activeCasesStack, cases];
|
|
261
|
-
try {
|
|
262
|
-
return callback();
|
|
263
|
-
} finally {
|
|
264
|
-
activeCasesStack = activeCasesStack.slice(0, -1);
|
|
265
|
-
}
|
|
266
|
-
}
|
|
267
|
-
function getActiveCases() {
|
|
268
|
-
const active = activeCasesStack.at(-1);
|
|
269
|
-
if (active == null) throw new Error("caseOf/casesFromInputs must be called inside describeTask/describeEval.");
|
|
270
|
-
return active;
|
|
271
|
-
}
|
|
272
259
|
function caseOf(name, run, options) {
|
|
273
260
|
getActiveCases().push({
|
|
274
261
|
executionPolicy: normalizeExecutionPolicy(options, "task case"),
|
|
@@ -294,26 +281,6 @@ function casesFromInputs(namePrefix, inputs, run, options) {
|
|
|
294
281
|
});
|
|
295
282
|
}
|
|
296
283
|
/**
|
|
297
|
-
* Resolves the effective case concurrency for one registered task case.
|
|
298
|
-
*
|
|
299
|
-
* Before:
|
|
300
|
-
* - registered case override `2`, task default `4`
|
|
301
|
-
* - registered case override `undefined`, task default `3`
|
|
302
|
-
*
|
|
303
|
-
* After:
|
|
304
|
-
* - `2`
|
|
305
|
-
* - `3`
|
|
306
|
-
*/
|
|
307
|
-
function resolveCaseConcurrency(taskCase, taskConcurrency, runtimeConcurrency) {
|
|
308
|
-
const concurrency = runtimeConcurrency?.case ?? taskCase.concurrency ?? taskConcurrency?.case;
|
|
309
|
-
if (concurrency == null) return;
|
|
310
|
-
if (!Number.isFinite(concurrency) || !Number.isInteger(concurrency) || concurrency <= 0) throw new Error(`Invalid task case concurrency: ${String(concurrency)}`);
|
|
311
|
-
return concurrency;
|
|
312
|
-
}
|
|
313
|
-
function resolveCaseQueueKey(taskCase, defaultQueueKey) {
|
|
314
|
-
return taskCase.queueKey ?? defaultQueueKey;
|
|
315
|
-
}
|
|
316
|
-
/**
|
|
317
284
|
* Defines one eval task with task/case semantics similar to Vitest.
|
|
318
285
|
*
|
|
319
286
|
* Use when:
|
|
@@ -409,6 +376,39 @@ function describeTask(name, build, options = {}) {
|
|
|
409
376
|
registerEvalDefinition(definition);
|
|
410
377
|
return definition;
|
|
411
378
|
}
|
|
379
|
+
function getActiveCases() {
|
|
380
|
+
const active = activeCasesStack.at(-1);
|
|
381
|
+
if (active == null) throw new Error("caseOf/casesFromInputs must be called inside describeTask/describeEval.");
|
|
382
|
+
return active;
|
|
383
|
+
}
|
|
384
|
+
/**
|
|
385
|
+
* Resolves the effective case concurrency for one registered task case.
|
|
386
|
+
*
|
|
387
|
+
* Before:
|
|
388
|
+
* - registered case override `2`, task default `4`
|
|
389
|
+
* - registered case override `undefined`, task default `3`
|
|
390
|
+
*
|
|
391
|
+
* After:
|
|
392
|
+
* - `2`
|
|
393
|
+
* - `3`
|
|
394
|
+
*/
|
|
395
|
+
function resolveCaseConcurrency(taskCase, taskConcurrency, runtimeConcurrency) {
|
|
396
|
+
const concurrency = runtimeConcurrency?.case ?? taskCase.concurrency ?? taskConcurrency?.case;
|
|
397
|
+
if (concurrency == null) return;
|
|
398
|
+
if (!Number.isFinite(concurrency) || !Number.isInteger(concurrency) || concurrency <= 0) throw new Error(`Invalid task case concurrency: ${String(concurrency)}`);
|
|
399
|
+
return concurrency;
|
|
400
|
+
}
|
|
401
|
+
function resolveCaseQueueKey(taskCase, defaultQueueKey) {
|
|
402
|
+
return taskCase.queueKey ?? defaultQueueKey;
|
|
403
|
+
}
|
|
404
|
+
function withActiveCases(cases, callback) {
|
|
405
|
+
activeCasesStack = [...activeCasesStack, cases];
|
|
406
|
+
try {
|
|
407
|
+
return callback();
|
|
408
|
+
} finally {
|
|
409
|
+
activeCasesStack = activeCasesStack.slice(0, -1);
|
|
410
|
+
}
|
|
411
|
+
}
|
|
412
412
|
/**
|
|
413
413
|
* Alias of `describeTask` for eval-centric naming.
|
|
414
414
|
*/
|
package/dist/index.mjs.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.mjs","names":[],"sources":["../src/dsl/task.ts"],"sourcesContent":["import type { TaskConcurrencyConfig, TaskExecutionPolicy, TaskReporterEventPayload, TaskRunContext, TaskRunOutput } from '../config'\nimport type { RunScoreKind } from '../core/runner'\nimport type { TelemetryAttributeValue } from '../core/telemetry'\n\nimport { errorMessageFrom, sleep } from '@moeru/std'\n\nimport { defineEval, defineTask } from '../config'\nimport { createSchedulerQueue } from '../core/scheduler/queue'\nimport { createNoopTelemetryRuntime } from '../core/telemetry'\nimport { registerEvalDefinition } from './registry'\n\n/**\n * Runtime context provided to a task case callback.\n */\nexport interface CaseRunContext<TInput> extends TaskRunContext {\n /**\n * Case-scoped matrix payload.\n */\n matrix: TaskRunContext['task']['matrix'] & { inputs: TInput }\n /**\n * Overrides one case score family with a custom normalized value.\n *\n * Use when:\n * - one case computes a benchmark-native score that should flow into run aggregation\n *\n * Expects:\n * - `score` to stay in the `0..1` range\n */\n score: (score: number, kind?: RunScoreKind) => void\n /**\n * Emits one custom case metric into report events.\n *\n * Use when:\n * - tasks need structured benchmark metadata beyond exact/judge score families\n *\n * Expects:\n * - `name` to be a stable metric identifier\n * - `value` to be JSON-serializable\n */\n metric: (name: string, value: TelemetryAttributeValue) => void\n /**\n * Cooperative abort signal for the current case execution.\n */\n signal: AbortSignal\n}\n\n/**\n * Callback for one task case.\n */\nexport type CaseRunner<TInput> = (context: CaseRunContext<TInput>) => Promise<unknown> | unknown\n\ninterface RegisteredCase<TInput> {\n concurrency?: number\n executionPolicy?: TaskExecutionPolicy\n input: TInput\n name: string\n queueKey?: object\n run: CaseRunner<TInput>\n}\n\n/**\n * Per-group options for `casesFromInputs`.\n *\n * Use when:\n * - one generated case group should run with a lower case concurrency than the task default\n * - a task should keep a broader task-level cap while one expensive case family stays bounded\n *\n * Expects:\n * - `concurrency` to be a positive integer when provided\n *\n * Returns:\n * - one partial case-group execution descriptor\n */\nexport interface CasesFromInputsOptions extends TaskExecutionPolicy {\n /**\n * Case-level concurrency cap for cases registered by one `casesFromInputs(...)` call.\n */\n concurrency?: number\n}\n\n/**\n * Per-case registration options for `caseOf`.\n */\nexport interface CaseRegistrationOptions<TInput> extends TaskExecutionPolicy {\n /**\n * Optional case input payload.\n */\n input: TInput\n}\n\ninterface CaseExecutionOutcome {\n errorMessage?: string\n output?: unknown\n scoresByKind: Map<RunScoreKind, number>\n state: 'failed' | 'passed' | 'timeout'\n}\n\nfunction cloneCaseMatrix(matrix: TaskRunContext['task']['matrix']): TaskRunContext['task']['matrix'] {\n return {\n eval: {\n ...matrix.eval,\n },\n meta: {\n ...matrix.meta,\n },\n run: {\n ...matrix.run,\n },\n }\n}\n\nfunction createTaskCaseReporterId(index: number, name: string): string {\n return `${index}:${encodeURIComponent(name)}`\n}\n\nfunction isTelemetryAttributeScalar(value: unknown): value is boolean | number | string {\n return typeof value === 'boolean' || typeof value === 'number' || typeof value === 'string'\n}\n\nfunction isTelemetryAttributeArray(value: readonly TelemetryAttributeValue[]): value is readonly boolean[] | readonly number[] | readonly string[] {\n return value.every(isTelemetryAttributeScalar)\n}\n\nfunction canAttachMetricAsAttribute(value: TelemetryAttributeValue): value is boolean | number | string | readonly boolean[] | readonly number[] | readonly string[] {\n if (isTelemetryAttributeScalar(value)) {\n return true\n }\n\n return Array.isArray(value) && isTelemetryAttributeArray(value)\n}\n\nfunction assertValidScore(score: number): void {\n if (!Number.isFinite(score) || score < 0 || score > 1) {\n throw new Error(`Case score must be a finite number in range 0..1, got \"${score}\".`)\n }\n}\n\nfunction assertNonNegativeInteger(value: number, label: string): void {\n if (!Number.isFinite(value) || !Number.isInteger(value) || value < 0) {\n throw new Error(`Invalid ${label}: ${String(value)}`)\n }\n}\n\nfunction assertNonNegativeNumber(value: number, label: string): void {\n if (!Number.isFinite(value) || value < 0) {\n throw new Error(`Invalid ${label}: ${String(value)}`)\n }\n}\n\nfunction assertPositiveInteger(value: number, label: string): void {\n if (!Number.isFinite(value) || !Number.isInteger(value) || value <= 0) {\n throw new Error(`Invalid ${label}: ${String(value)}`)\n }\n}\n\nfunction autoRetryDelayMs(retryIndex: number): number {\n // Retry index 1 is the first retry after the initial case failure.\n return 500 * 2 ** (retryIndex - 1)\n}\n\nfunction resolveAutoRetryDelay(policy: TaskExecutionPolicy, retryIndex: number): number {\n const delay = policy.autoRetryDelay\n\n if (delay == null) {\n return autoRetryDelayMs(retryIndex)\n }\n\n return typeof delay === 'number' ? delay : delay(retryIndex)\n}\n\nfunction emitCaseStart(\n hooks: TaskRunContext['reporterHooks'] | undefined,\n payload: {\n autoRetry?: number\n index: number\n input?: unknown\n name: string\n retryIndex?: number\n total: number\n },\n): void {\n try {\n hooks?.onCaseStart?.(payload)\n }\n catch {\n // Reporter hooks must never affect task scoring.\n }\n}\n\nfunction emitCaseEnd(\n hooks: TaskRunContext['reporterHooks'] | undefined,\n payload: {\n index: number\n output?: unknown\n state: 'passed' | 'failed' | 'timeout'\n name: string\n total: number\n errorMessage?: string\n },\n): void {\n try {\n hooks?.onCaseEnd?.(payload)\n }\n catch {\n // Reporter hooks must never affect task scoring.\n }\n}\n\nfunction emitReporterEvent(\n hooks: TaskRunContext['reporterHooks'] | undefined,\n payload: TaskReporterEventPayload,\n): void {\n try {\n hooks?.onEvent?.(payload)\n }\n catch {\n // Reporter hooks must never affect task scoring.\n }\n}\n\nfunction createCaseTimeoutError(timeout: number): Error {\n const error = new Error(`Case timed out after ${timeout}ms.`)\n error.name = 'TimeoutError'\n return error\n}\n\nfunction normalizeExecutionPolicy(policy: TaskExecutionPolicy | undefined, label: string): TaskExecutionPolicy | undefined {\n if (policy == null) {\n return undefined\n }\n\n if (policy.autoAttempt != null) {\n assertNonNegativeInteger(policy.autoAttempt, `${label} autoAttempt`)\n }\n\n if (policy.autoRetry != null) {\n assertNonNegativeInteger(policy.autoRetry, `${label} autoRetry`)\n }\n\n if (typeof policy.autoRetryDelay === 'number') {\n assertNonNegativeNumber(policy.autoRetryDelay, `${label} autoRetryDelay`)\n }\n\n if (policy.timeout != null) {\n assertPositiveInteger(policy.timeout, `${label} timeout`)\n }\n\n const normalized = {\n autoAttempt: policy.autoAttempt,\n autoRetry: policy.autoRetry,\n autoRetryDelay: policy.autoRetryDelay,\n timeout: policy.timeout,\n }\n\n return Object.values(normalized).some(value => value != null)\n ? normalized\n : undefined\n}\n\nfunction resolveCaseExecutionPolicy(\n taskCase: RegisteredCase<unknown>,\n taskExecutionPolicy: TaskExecutionPolicy | undefined,\n): Required<Pick<TaskExecutionPolicy, 'autoAttempt' | 'autoRetry'>> & Pick<TaskExecutionPolicy, 'autoRetryDelay' | 'timeout'> {\n return {\n autoAttempt: taskCase.executionPolicy?.autoAttempt ?? taskExecutionPolicy?.autoAttempt ?? 0,\n autoRetry: taskCase.executionPolicy?.autoRetry ?? taskExecutionPolicy?.autoRetry ?? 0,\n autoRetryDelay: taskCase.executionPolicy?.autoRetryDelay ?? taskExecutionPolicy?.autoRetryDelay,\n timeout: taskCase.executionPolicy?.timeout ?? taskExecutionPolicy?.timeout,\n }\n}\n\nasync function runCaseOnce(\n context: TaskRunContext,\n taskCase: RegisteredCase<unknown>,\n index: number,\n timeout: number | undefined,\n): Promise<CaseExecutionOutcome> {\n const customScoresByKind = new Map<RunScoreKind, number>()\n const abortController = new AbortController()\n const telemetry = context.telemetry ?? createNoopTelemetryRuntime()\n const caseId = createTaskCaseReporterId(index, taskCase.name)\n let timeoutHandle: ReturnType<typeof setTimeout> | undefined\n let timedOut = false\n let settled = false\n\n try {\n return await telemetry.withSpan('vieval.case', {\n 'vieval.case.id': caseId,\n 'vieval.case.name': taskCase.name,\n 'vieval.task.id': context.task.id,\n 'vieval.task.name': context.task.entry.name,\n }, async () => {\n const runPromise = Promise.resolve(taskCase.run({\n ...context,\n matrix: {\n ...cloneCaseMatrix(context.task.matrix),\n inputs: taskCase.input,\n },\n metric(name, value) {\n if (abortController.signal.aborted || settled) {\n return\n }\n\n emitReporterEvent(context.reporterHooks, {\n caseId,\n data: {\n name,\n value,\n },\n event: 'task.case.metric',\n })\n telemetry.addEvent('vieval.case.metric', { name, value })\n if (canAttachMetricAsAttribute(value)) {\n telemetry.setAttributes({ [name]: value })\n }\n },\n score(score, kind = 'exact') {\n if (abortController.signal.aborted || settled) {\n return\n }\n\n assertValidScore(score)\n customScoresByKind.set(kind, score)\n telemetry.addEvent('vieval.case.score', {\n 'vieval.score.kind': kind,\n 'vieval.score.value': score,\n })\n emitReporterEvent(context.reporterHooks, {\n caseId,\n data: { kind, score },\n event: 'task.case.score',\n })\n },\n signal: abortController.signal,\n }))\n\n if (timeout != null) {\n const timeoutPromise = new Promise<never>((_, reject) => {\n timeoutHandle = setTimeout(() => {\n timedOut = true\n abortController.abort(createCaseTimeoutError(timeout))\n reject(createCaseTimeoutError(timeout))\n }, timeout)\n })\n\n const output = await Promise.race([runPromise, timeoutPromise])\n settled = true\n return {\n output,\n scoresByKind: customScoresByKind,\n state: 'passed',\n }\n }\n\n const output = await runPromise\n settled = true\n return {\n output,\n scoresByKind: customScoresByKind,\n state: 'passed',\n }\n })\n }\n catch (error) {\n settled = true\n return {\n errorMessage: errorMessageFrom(error) ?? (timedOut && timeout != null ? `Case timed out after ${timeout}ms.` : 'Unknown case failure.'),\n scoresByKind: customScoresByKind,\n state: timedOut ? 'timeout' : 'failed',\n }\n }\n finally {\n if (timeoutHandle != null) {\n clearTimeout(timeoutHandle)\n }\n }\n}\n\nasync function executeRegisteredCase(\n context: TaskRunContext,\n taskCase: RegisteredCase<unknown>,\n index: number,\n totalCases: number,\n taskExecutionPolicy: TaskExecutionPolicy | undefined,\n): Promise<CaseExecutionOutcome> {\n const resolvedPolicy = resolveCaseExecutionPolicy(taskCase, taskExecutionPolicy)\n let lastOutcome: CaseExecutionOutcome | undefined\n\n for (let retryIndex = 0; retryIndex <= resolvedPolicy.autoRetry; retryIndex += 1) {\n if (retryIndex > 0) {\n const retryDelayMs = resolveAutoRetryDelay(resolvedPolicy, retryIndex)\n assertNonNegativeNumber(retryDelayMs, 'autoRetryDelay result')\n\n if (retryDelayMs > 0) {\n await sleep(retryDelayMs)\n }\n }\n\n emitCaseStart(context.reporterHooks, {\n ...(resolvedPolicy.autoRetry > 0\n ? {\n autoRetry: resolvedPolicy.autoRetry,\n retryIndex,\n }\n : {}),\n index,\n ...(taskCase.input === undefined ? {} : { input: taskCase.input }),\n name: taskCase.name,\n total: totalCases,\n })\n lastOutcome = await runCaseOnce(context, taskCase, index, resolvedPolicy.timeout)\n if (lastOutcome.state === 'passed') {\n return lastOutcome\n }\n }\n\n return lastOutcome ?? {\n errorMessage: 'Unknown case failure.',\n scoresByKind: new Map(),\n state: 'failed',\n }\n}\n\nfunction collectCaseOutcomeScores(\n outcome: CaseExecutionOutcome,\n scoreBucketsByKind: Record<RunScoreKind, number[]>,\n): void {\n if (outcome.state !== 'passed') {\n scoreBucketsByKind.exact.push(0)\n return\n }\n\n if (outcome.scoresByKind.size === 0) {\n scoreBucketsByKind.exact.push(1)\n return\n }\n\n scoreBucketsByKind.exact.push(outcome.scoresByKind.get('exact') ?? 1)\n const judgeScore = outcome.scoresByKind.get('judge')\n if (judgeScore != null) {\n scoreBucketsByKind.judge.push(judgeScore)\n }\n}\n\nfunction emitCaseOutcome(\n context: TaskRunContext,\n taskCase: RegisteredCase<unknown>,\n outcome: CaseExecutionOutcome,\n index: number,\n totalCases: number,\n): void {\n emitCaseEnd(context.reporterHooks, {\n ...(outcome.errorMessage == null ? {} : { errorMessage: outcome.errorMessage }),\n index,\n ...(outcome.output === undefined ? {} : { output: outcome.output }),\n state: outcome.state,\n name: taskCase.name,\n total: totalCases,\n })\n}\n\n/**\n * Builder callbacks passed into `describeTask`.\n */\nexport interface DescribeTaskBuilder {\n /**\n * Registers one explicit case.\n */\n caseOf: {\n (name: string, run: CaseRunner<undefined>): void\n <TInput>(name: string, run: CaseRunner<TInput>, options: CaseRegistrationOptions<TInput>): void\n }\n /**\n * Registers multiple cases from input list.\n */\n casesFromInputs: <TInput>(\n namePrefix: string,\n inputs: readonly TInput[],\n run: CaseRunner<TInput>,\n options?: CasesFromInputsOptions,\n ) => void\n}\n\n/**\n * Options for `describeTask`.\n */\nexport interface DescribeTaskOptions extends TaskExecutionPolicy {\n /**\n * Optional description override.\n */\n description?: string\n /**\n * Optional task-local concurrency overrides.\n *\n * Use when:\n * - one task should cap attempt fan-out independently from the surrounding project\n * - one task should cap case fan-out without changing global scheduling defaults\n *\n * Expects:\n * - each provided value to be a positive integer\n *\n * @default inherited from project or CLI concurrency settings\n */\n concurrency?: TaskConcurrencyConfig\n}\n\nfunction createCaseBuilder(registeredCases: RegisteredCase<unknown>[]): DescribeTaskBuilder {\n function registerCase(name: string, run: CaseRunner<undefined>): void\n function registerCase<TInput>(name: string, run: CaseRunner<TInput>, options: CaseRegistrationOptions<TInput>): void\n function registerCase<TInput>(\n name: string,\n run: CaseRunner<TInput> | CaseRunner<undefined>,\n options?: CaseRegistrationOptions<TInput>,\n ): void {\n registeredCases.push({\n executionPolicy: normalizeExecutionPolicy(options, 'task case'),\n input: options?.input,\n name,\n run: run as CaseRunner<unknown>,\n })\n }\n\n return {\n caseOf: registerCase,\n casesFromInputs(namePrefix, inputs, run, options) {\n const queueKey = options?.concurrency == null ? undefined : {}\n\n inputs.forEach((input, index) => {\n registeredCases.push({\n concurrency: options?.concurrency,\n executionPolicy: normalizeExecutionPolicy(options, 'casesFromInputs'),\n input,\n name: `${namePrefix} #${index + 1}`,\n queueKey,\n run: run as CaseRunner<unknown>,\n })\n })\n },\n }\n}\n\nlet activeCasesStack: RegisteredCase<unknown>[][] = []\n\nfunction withActiveCases<T>(cases: RegisteredCase<unknown>[], callback: () => T): T {\n activeCasesStack = [...activeCasesStack, cases]\n\n try {\n return callback()\n }\n finally {\n activeCasesStack = activeCasesStack.slice(0, -1)\n }\n}\n\nfunction getActiveCases(): RegisteredCase<unknown>[] {\n const active = activeCasesStack.at(-1)\n if (active == null) {\n throw new Error('caseOf/casesFromInputs must be called inside describeTask/describeEval.')\n }\n\n return active\n}\n\n/**\n * Registers one case in the currently active task scope.\n */\nexport function caseOf(\n name: string,\n run: CaseRunner<undefined>,\n): void\n\nexport function caseOf<TInput>(\n name: string,\n run: CaseRunner<TInput>,\n options: CaseRegistrationOptions<TInput>,\n): void\n\nexport function caseOf<TInput>(\n name: string,\n run: CaseRunner<TInput> | CaseRunner<undefined>,\n options?: CaseRegistrationOptions<TInput>,\n): void {\n getActiveCases().push({\n executionPolicy: normalizeExecutionPolicy(options, 'task case'),\n input: options?.input,\n name,\n run: run as CaseRunner<unknown>,\n })\n}\n\n/**\n * Registers multiple cases in the currently active task scope.\n */\nexport function casesFromInputs<TInput>(\n namePrefix: string,\n inputs: readonly TInput[],\n run: CaseRunner<TInput>,\n options?: CasesFromInputsOptions,\n): void {\n const queueKey = options?.concurrency == null ? undefined : {}\n\n inputs.forEach((input, index) => {\n getActiveCases().push({\n concurrency: options?.concurrency,\n executionPolicy: normalizeExecutionPolicy(options, 'casesFromInputs'),\n input,\n name: `${namePrefix} #${index + 1}`,\n queueKey,\n run: run as CaseRunner<unknown>,\n })\n })\n}\n\n/**\n * Resolves the effective case concurrency for one registered task case.\n *\n * Before:\n * - registered case override `2`, task default `4`\n * - registered case override `undefined`, task default `3`\n *\n * After:\n * - `2`\n * - `3`\n */\nfunction resolveCaseConcurrency(\n taskCase: RegisteredCase<unknown>,\n taskConcurrency: TaskConcurrencyConfig | undefined,\n runtimeConcurrency: TaskConcurrencyConfig | undefined,\n): number | undefined {\n const concurrency = runtimeConcurrency?.case ?? taskCase.concurrency ?? taskConcurrency?.case\n if (concurrency == null) {\n return undefined\n }\n\n if (!Number.isFinite(concurrency) || !Number.isInteger(concurrency) || concurrency <= 0) {\n throw new Error(`Invalid task case concurrency: ${String(concurrency)}`)\n }\n\n return concurrency\n}\n\nfunction resolveCaseQueueKey(taskCase: RegisteredCase<unknown>, defaultQueueKey: object): object {\n return taskCase.queueKey ?? defaultQueueKey\n}\n\n/**\n * Defines one eval task with task/case semantics similar to Vitest.\n *\n * Use when:\n * - task behavior should be declared with `caseOf` and `casesFromInputs`\n * - business agent code should be imported and run from eval task files\n */\nexport function describeTask(\n name: string,\n build: ((builder: DescribeTaskBuilder) => void) | (() => void),\n options: DescribeTaskOptions = {},\n) {\n const registeredCases: RegisteredCase<unknown>[] = []\n const builder = createCaseBuilder(registeredCases)\n withActiveCases(registeredCases, () => {\n if (build.length > 0) {\n (build as (builder: DescribeTaskBuilder) => void)(builder)\n return\n }\n\n ;(build as () => void)()\n })\n\n const description = options.description ?? name\n const taskExecutionPolicy = normalizeExecutionPolicy(options, 'describeTask')\n\n const definition = defineEval({\n description,\n name,\n task: defineTask({\n concurrency: options.concurrency,\n executionPolicy: taskExecutionPolicy,\n id: name,\n async run(context): Promise<TaskRunOutput> {\n if (registeredCases.length === 0) {\n return {\n scores: [{ kind: 'exact', score: 1 }],\n }\n }\n\n const totalCases = registeredCases.length\n const scoreBucketsByKind: Record<RunScoreKind, number[]> = {\n exact: [],\n judge: [],\n }\n const defaultCaseQueueKey = {}\n const caseQueues = new Map<object, ReturnType<typeof createSchedulerQueue>>()\n const hasAutoAttempt = registeredCases.some(taskCase => resolveCaseExecutionPolicy(taskCase, taskExecutionPolicy).autoAttempt > 0)\n const runtimeTaskConcurrency = context.task.entry.task?.concurrency ?? options.concurrency\n\n if (!hasAutoAttempt) {\n await Promise.all(\n registeredCases.map(async (taskCase, index) => {\n const executeCase = async () => {\n const outcome = await executeRegisteredCase(context, taskCase, index, totalCases, taskExecutionPolicy)\n emitCaseOutcome(context, taskCase, outcome, index, totalCases)\n collectCaseOutcomeScores(outcome, scoreBucketsByKind)\n }\n\n const concurrency = resolveCaseConcurrency(taskCase, runtimeTaskConcurrency, context.runtimeConcurrency)\n if (concurrency == null) {\n await executeCase()\n return\n }\n\n const queueKey = resolveCaseQueueKey(taskCase, defaultCaseQueueKey)\n const queue = caseQueues.get(queueKey) ?? createSchedulerQueue(concurrency)\n caseQueues.set(queueKey, queue)\n await queue.run(executeCase)\n }),\n )\n }\n else {\n let attemptIndex = 0\n\n for (;;) {\n const attemptOutcomes = await Promise.all(\n registeredCases.map(async (taskCase, index) => {\n const executeCase = async () => await executeRegisteredCase(context, taskCase, index, totalCases, taskExecutionPolicy)\n const concurrency = resolveCaseConcurrency(taskCase, runtimeTaskConcurrency, context.runtimeConcurrency)\n if (concurrency == null) {\n return await executeCase()\n }\n\n const queueKey = resolveCaseQueueKey(taskCase, defaultCaseQueueKey)\n const queue = caseQueues.get(queueKey) ?? createSchedulerQueue(concurrency)\n caseQueues.set(queueKey, queue)\n return await queue.run(executeCase)\n }),\n )\n\n attemptOutcomes.forEach((outcome, index) => {\n const taskCase = registeredCases[index]\n if (taskCase == null) {\n return\n }\n\n emitCaseOutcome(context, taskCase, outcome, index, totalCases)\n collectCaseOutcomeScores(outcome, scoreBucketsByKind)\n })\n\n const shouldContinue = attemptOutcomes.some((outcome, index) => {\n if (outcome.state === 'passed') {\n return false\n }\n\n const taskCase = registeredCases[index]\n if (taskCase == null) {\n return false\n }\n\n return attemptIndex < resolveCaseExecutionPolicy(taskCase, taskExecutionPolicy).autoAttempt\n })\n\n if (!shouldContinue) {\n break\n }\n\n attemptIndex += 1\n }\n }\n\n const scores = (Object.keys(scoreBucketsByKind) as RunScoreKind[])\n .filter(kind => scoreBucketsByKind[kind].length > 0)\n .map((kind) => {\n const values = scoreBucketsByKind[kind]\n const total = values.reduce((sum, value) => sum + value, 0)\n return {\n kind,\n score: total / values.length,\n }\n })\n\n return {\n scores,\n }\n },\n }),\n })\n\n registerEvalDefinition(definition)\n\n return definition\n}\n\n/**\n * Alias of `describeTask` for eval-centric naming.\n */\nexport const describeEval = describeTask\n"],"mappings":";;;;;;;AAiGA,SAAS,gBAAgB,QAA4E;AACnG,QAAO;EACL,MAAM,EACJ,GAAG,OAAO,MACX;EACD,MAAM,EACJ,GAAG,OAAO,MACX;EACD,KAAK,EACH,GAAG,OAAO,KACX;EACF;;AAGH,SAAS,yBAAyB,OAAe,MAAsB;AACrE,QAAO,GAAG,MAAM,GAAG,mBAAmB,KAAK;;AAG7C,SAAS,2BAA2B,OAAoD;AACtF,QAAO,OAAO,UAAU,aAAa,OAAO,UAAU,YAAY,OAAO,UAAU;;AAGrF,SAAS,0BAA0B,OAAgH;AACjJ,QAAO,MAAM,MAAM,2BAA2B;;AAGhD,SAAS,2BAA2B,OAAiI;AACnK,KAAI,2BAA2B,MAAM,CACnC,QAAO;AAGT,QAAO,MAAM,QAAQ,MAAM,IAAI,0BAA0B,MAAM;;AAGjE,SAAS,iBAAiB,OAAqB;AAC7C,KAAI,CAAC,OAAO,SAAS,MAAM,IAAI,QAAQ,KAAK,QAAQ,EAClD,OAAM,IAAI,MAAM,0DAA0D,MAAM,IAAI;;AAIxF,SAAS,yBAAyB,OAAe,OAAqB;AACpE,KAAI,CAAC,OAAO,SAAS,MAAM,IAAI,CAAC,OAAO,UAAU,MAAM,IAAI,QAAQ,EACjE,OAAM,IAAI,MAAM,WAAW,MAAM,IAAI,OAAO,MAAM,GAAG;;AAIzD,SAAS,wBAAwB,OAAe,OAAqB;AACnE,KAAI,CAAC,OAAO,SAAS,MAAM,IAAI,QAAQ,EACrC,OAAM,IAAI,MAAM,WAAW,MAAM,IAAI,OAAO,MAAM,GAAG;;AAIzD,SAAS,sBAAsB,OAAe,OAAqB;AACjE,KAAI,CAAC,OAAO,SAAS,MAAM,IAAI,CAAC,OAAO,UAAU,MAAM,IAAI,SAAS,EAClE,OAAM,IAAI,MAAM,WAAW,MAAM,IAAI,OAAO,MAAM,GAAG;;AAIzD,SAAS,iBAAiB,YAA4B;AAEpD,QAAO,MAAM,MAAM,aAAa;;AAGlC,SAAS,sBAAsB,QAA6B,YAA4B;CACtF,MAAM,QAAQ,OAAO;AAErB,KAAI,SAAS,KACX,QAAO,iBAAiB,WAAW;AAGrC,QAAO,OAAO,UAAU,WAAW,QAAQ,MAAM,WAAW;;AAG9D,SAAS,cACP,OACA,SAQM;AACN,KAAI;AACF,SAAO,cAAc,QAAQ;SAEzB;;AAKR,SAAS,YACP,OACA,SAQM;AACN,KAAI;AACF,SAAO,YAAY,QAAQ;SAEvB;;AAKR,SAAS,kBACP,OACA,SACM;AACN,KAAI;AACF,SAAO,UAAU,QAAQ;SAErB;;AAKR,SAAS,uBAAuB,SAAwB;CACtD,MAAM,wBAAQ,IAAI,MAAM,wBAAwB,QAAQ,KAAK;AAC7D,OAAM,OAAO;AACb,QAAO;;AAGT,SAAS,yBAAyB,QAAyC,OAAgD;AACzH,KAAI,UAAU,KACZ;AAGF,KAAI,OAAO,eAAe,KACxB,0BAAyB,OAAO,aAAa,GAAG,MAAM,cAAc;AAGtE,KAAI,OAAO,aAAa,KACtB,0BAAyB,OAAO,WAAW,GAAG,MAAM,YAAY;AAGlE,KAAI,OAAO,OAAO,mBAAmB,SACnC,yBAAwB,OAAO,gBAAgB,GAAG,MAAM,iBAAiB;AAG3E,KAAI,OAAO,WAAW,KACpB,uBAAsB,OAAO,SAAS,GAAG,MAAM,UAAU;CAG3D,MAAM,aAAa;EACjB,aAAa,OAAO;EACpB,WAAW,OAAO;EAClB,gBAAgB,OAAO;EACvB,SAAS,OAAO;EACjB;AAED,QAAO,OAAO,OAAO,WAAW,CAAC,MAAK,UAAS,SAAS,KAAK,GACzD,aACA,KAAA;;AAGN,SAAS,2BACP,UACA,qBAC4H;AAC5H,QAAO;EACL,aAAa,SAAS,iBAAiB,eAAe,qBAAqB,eAAe;EAC1F,WAAW,SAAS,iBAAiB,aAAa,qBAAqB,aAAa;EACpF,gBAAgB,SAAS,iBAAiB,kBAAkB,qBAAqB;EACjF,SAAS,SAAS,iBAAiB,WAAW,qBAAqB;EACpE;;AAGH,eAAe,YACb,SACA,UACA,OACA,SAC+B;CAC/B,MAAM,qCAAqB,IAAI,KAA2B;CAC1D,MAAM,kBAAkB,IAAI,iBAAiB;CAC7C,MAAM,YAAY,QAAQ,aAAa,4BAA4B;CACnE,MAAM,SAAS,yBAAyB,OAAO,SAAS,KAAK;CAC7D,IAAI;CACJ,IAAI,WAAW;CACf,IAAI,UAAU;AAEd,KAAI;AACF,SAAO,MAAM,UAAU,SAAS,eAAe;GAC7C,kBAAkB;GAClB,oBAAoB,SAAS;GAC7B,kBAAkB,QAAQ,KAAK;GAC/B,oBAAoB,QAAQ,KAAK,MAAM;GACxC,EAAE,YAAY;GACb,MAAM,aAAa,QAAQ,QAAQ,SAAS,IAAI;IAC9C,GAAG;IACH,QAAQ;KACN,GAAG,gBAAgB,QAAQ,KAAK,OAAO;KACvC,QAAQ,SAAS;KAClB;IACD,OAAO,MAAM,OAAO;AAClB,SAAI,gBAAgB,OAAO,WAAW,QACpC;AAGF,uBAAkB,QAAQ,eAAe;MACvC;MACA,MAAM;OACJ;OACA;OACD;MACD,OAAO;MACR,CAAC;AACF,eAAU,SAAS,sBAAsB;MAAE;MAAM;MAAO,CAAC;AACzD,SAAI,2BAA2B,MAAM,CACnC,WAAU,cAAc,GAAG,OAAO,OAAO,CAAC;;IAG9C,MAAM,OAAO,OAAO,SAAS;AAC3B,SAAI,gBAAgB,OAAO,WAAW,QACpC;AAGF,sBAAiB,MAAM;AACvB,wBAAmB,IAAI,MAAM,MAAM;AACnC,eAAU,SAAS,qBAAqB;MACtC,qBAAqB;MACrB,sBAAsB;MACvB,CAAC;AACF,uBAAkB,QAAQ,eAAe;MACvC;MACA,MAAM;OAAE;OAAM;OAAO;MACrB,OAAO;MACR,CAAC;;IAEJ,QAAQ,gBAAgB;IACzB,CAAC,CAAC;AAEH,OAAI,WAAW,MAAM;IACnB,MAAM,iBAAiB,IAAI,SAAgB,GAAG,WAAW;AACvD,qBAAgB,iBAAiB;AAC/B,iBAAW;AACX,sBAAgB,MAAM,uBAAuB,QAAQ,CAAC;AACtD,aAAO,uBAAuB,QAAQ,CAAC;QACtC,QAAQ;MACX;IAEF,MAAM,SAAS,MAAM,QAAQ,KAAK,CAAC,YAAY,eAAe,CAAC;AAC/D,cAAU;AACV,WAAO;KACL;KACA,cAAc;KACd,OAAO;KACR;;GAGH,MAAM,SAAS,MAAM;AACrB,aAAU;AACV,UAAO;IACL;IACA,cAAc;IACd,OAAO;IACR;IACD;UAEG,OAAO;AACZ,YAAU;AACV,SAAO;GACL,cAAc,iBAAiB,MAAM,KAAK,YAAY,WAAW,OAAO,wBAAwB,QAAQ,OAAO;GAC/G,cAAc;GACd,OAAO,WAAW,YAAY;GAC/B;WAEK;AACN,MAAI,iBAAiB,KACnB,cAAa,cAAc;;;AAKjC,eAAe,sBACb,SACA,UACA,OACA,YACA,qBAC+B;CAC/B,MAAM,iBAAiB,2BAA2B,UAAU,oBAAoB;CAChF,IAAI;AAEJ,MAAK,IAAI,aAAa,GAAG,cAAc,eAAe,WAAW,cAAc,GAAG;AAChF,MAAI,aAAa,GAAG;GAClB,MAAM,eAAe,sBAAsB,gBAAgB,WAAW;AACtE,2BAAwB,cAAc,wBAAwB;AAE9D,OAAI,eAAe,EACjB,OAAM,MAAM,aAAa;;AAI7B,gBAAc,QAAQ,eAAe;GACnC,GAAI,eAAe,YAAY,IAC3B;IACE,WAAW,eAAe;IAC1B;IACD,GACD,EAAE;GACN;GACA,GAAI,SAAS,UAAU,KAAA,IAAY,EAAE,GAAG,EAAE,OAAO,SAAS,OAAO;GACjE,MAAM,SAAS;GACf,OAAO;GACR,CAAC;AACF,gBAAc,MAAM,YAAY,SAAS,UAAU,OAAO,eAAe,QAAQ;AACjF,MAAI,YAAY,UAAU,SACxB,QAAO;;AAIX,QAAO,eAAe;EACpB,cAAc;EACd,8BAAc,IAAI,KAAK;EACvB,OAAO;EACR;;AAGH,SAAS,yBACP,SACA,oBACM;AACN,KAAI,QAAQ,UAAU,UAAU;AAC9B,qBAAmB,MAAM,KAAK,EAAE;AAChC;;AAGF,KAAI,QAAQ,aAAa,SAAS,GAAG;AACnC,qBAAmB,MAAM,KAAK,EAAE;AAChC;;AAGF,oBAAmB,MAAM,KAAK,QAAQ,aAAa,IAAI,QAAQ,IAAI,EAAE;CACrE,MAAM,aAAa,QAAQ,aAAa,IAAI,QAAQ;AACpD,KAAI,cAAc,KAChB,oBAAmB,MAAM,KAAK,WAAW;;AAI7C,SAAS,gBACP,SACA,UACA,SACA,OACA,YACM;AACN,aAAY,QAAQ,eAAe;EACjC,GAAI,QAAQ,gBAAgB,OAAO,EAAE,GAAG,EAAE,cAAc,QAAQ,cAAc;EAC9E;EACA,GAAI,QAAQ,WAAW,KAAA,IAAY,EAAE,GAAG,EAAE,QAAQ,QAAQ,QAAQ;EAClE,OAAO,QAAQ;EACf,MAAM,SAAS;EACf,OAAO;EACR,CAAC;;AAgDJ,SAAS,kBAAkB,iBAAiE;CAG1F,SAAS,aACP,MACA,KACA,SACM;AACN,kBAAgB,KAAK;GACnB,iBAAiB,yBAAyB,SAAS,YAAY;GAC/D,OAAO,SAAS;GAChB;GACK;GACN,CAAC;;AAGJ,QAAO;EACL,QAAQ;EACR,gBAAgB,YAAY,QAAQ,KAAK,SAAS;GAChD,MAAM,WAAW,SAAS,eAAe,OAAO,KAAA,IAAY,EAAE;AAE9D,UAAO,SAAS,OAAO,UAAU;AAC/B,oBAAgB,KAAK;KACnB,aAAa,SAAS;KACtB,iBAAiB,yBAAyB,SAAS,kBAAkB;KACrE;KACA,MAAM,GAAG,WAAW,IAAI,QAAQ;KAChC;KACK;KACN,CAAC;KACF;;EAEL;;AAGH,IAAI,mBAAgD,EAAE;AAEtD,SAAS,gBAAmB,OAAkC,UAAsB;AAClF,oBAAmB,CAAC,GAAG,kBAAkB,MAAM;AAE/C,KAAI;AACF,SAAO,UAAU;WAEX;AACN,qBAAmB,iBAAiB,MAAM,GAAG,GAAG;;;AAIpD,SAAS,iBAA4C;CACnD,MAAM,SAAS,iBAAiB,GAAG,GAAG;AACtC,KAAI,UAAU,KACZ,OAAM,IAAI,MAAM,0EAA0E;AAG5F,QAAO;;AAiBT,SAAgB,OACd,MACA,KACA,SACM;AACN,iBAAgB,CAAC,KAAK;EACpB,iBAAiB,yBAAyB,SAAS,YAAY;EAC/D,OAAO,SAAS;EAChB;EACK;EACN,CAAC;;;;;AAMJ,SAAgB,gBACd,YACA,QACA,KACA,SACM;CACN,MAAM,WAAW,SAAS,eAAe,OAAO,KAAA,IAAY,EAAE;AAE9D,QAAO,SAAS,OAAO,UAAU;AAC/B,kBAAgB,CAAC,KAAK;GACpB,aAAa,SAAS;GACtB,iBAAiB,yBAAyB,SAAS,kBAAkB;GACrE;GACA,MAAM,GAAG,WAAW,IAAI,QAAQ;GAChC;GACK;GACN,CAAC;GACF;;;;;;;;;;;;;AAcJ,SAAS,uBACP,UACA,iBACA,oBACoB;CACpB,MAAM,cAAc,oBAAoB,QAAQ,SAAS,eAAe,iBAAiB;AACzF,KAAI,eAAe,KACjB;AAGF,KAAI,CAAC,OAAO,SAAS,YAAY,IAAI,CAAC,OAAO,UAAU,YAAY,IAAI,eAAe,EACpF,OAAM,IAAI,MAAM,kCAAkC,OAAO,YAAY,GAAG;AAG1E,QAAO;;AAGT,SAAS,oBAAoB,UAAmC,iBAAiC;AAC/F,QAAO,SAAS,YAAY;;;;;;;;;AAU9B,SAAgB,aACd,MACA,OACA,UAA+B,EAAE,EACjC;CACA,MAAM,kBAA6C,EAAE;CACrD,MAAM,UAAU,kBAAkB,gBAAgB;AAClD,iBAAgB,uBAAuB;AACrC,MAAI,MAAM,SAAS,GAAG;AACnB,SAAiD,QAAQ;AAC1D;;AAGA,SAAsB;GACxB;CAEF,MAAM,cAAc,QAAQ,eAAe;CAC3C,MAAM,sBAAsB,yBAAyB,SAAS,eAAe;CAE7E,MAAM,aAAa,WAAW;EAC5B;EACA;EACA,MAAM,WAAW;GACf,aAAa,QAAQ;GACrB,iBAAiB;GACjB,IAAI;GACJ,MAAM,IAAI,SAAiC;AACzC,QAAI,gBAAgB,WAAW,EAC7B,QAAO,EACL,QAAQ,CAAC;KAAE,MAAM;KAAS,OAAO;KAAG,CAAC,EACtC;IAGH,MAAM,aAAa,gBAAgB;IACnC,MAAM,qBAAqD;KACzD,OAAO,EAAE;KACT,OAAO,EAAE;KACV;IACD,MAAM,sBAAsB,EAAE;IAC9B,MAAM,6BAAa,IAAI,KAAsD;IAC7E,MAAM,iBAAiB,gBAAgB,MAAK,aAAY,2BAA2B,UAAU,oBAAoB,CAAC,cAAc,EAAE;IAClI,MAAM,yBAAyB,QAAQ,KAAK,MAAM,MAAM,eAAe,QAAQ;AAE/E,QAAI,CAAC,eACH,OAAM,QAAQ,IACZ,gBAAgB,IAAI,OAAO,UAAU,UAAU;KAC7C,MAAM,cAAc,YAAY;MAC9B,MAAM,UAAU,MAAM,sBAAsB,SAAS,UAAU,OAAO,YAAY,oBAAoB;AACtG,sBAAgB,SAAS,UAAU,SAAS,OAAO,WAAW;AAC9D,+BAAyB,SAAS,mBAAmB;;KAGvD,MAAM,cAAc,uBAAuB,UAAU,wBAAwB,QAAQ,mBAAmB;AACxG,SAAI,eAAe,MAAM;AACvB,YAAM,aAAa;AACnB;;KAGF,MAAM,WAAW,oBAAoB,UAAU,oBAAoB;KACnE,MAAM,QAAQ,WAAW,IAAI,SAAS,IAAI,qBAAqB,YAAY;AAC3E,gBAAW,IAAI,UAAU,MAAM;AAC/B,WAAM,MAAM,IAAI,YAAY;MAC5B,CACH;SAEE;KACH,IAAI,eAAe;AAEnB,cAAS;MACP,MAAM,kBAAkB,MAAM,QAAQ,IACpC,gBAAgB,IAAI,OAAO,UAAU,UAAU;OAC7C,MAAM,cAAc,YAAY,MAAM,sBAAsB,SAAS,UAAU,OAAO,YAAY,oBAAoB;OACtH,MAAM,cAAc,uBAAuB,UAAU,wBAAwB,QAAQ,mBAAmB;AACxG,WAAI,eAAe,KACjB,QAAO,MAAM,aAAa;OAG5B,MAAM,WAAW,oBAAoB,UAAU,oBAAoB;OACnE,MAAM,QAAQ,WAAW,IAAI,SAAS,IAAI,qBAAqB,YAAY;AAC3E,kBAAW,IAAI,UAAU,MAAM;AAC/B,cAAO,MAAM,MAAM,IAAI,YAAY;QACnC,CACH;AAED,sBAAgB,SAAS,SAAS,UAAU;OAC1C,MAAM,WAAW,gBAAgB;AACjC,WAAI,YAAY,KACd;AAGF,uBAAgB,SAAS,UAAU,SAAS,OAAO,WAAW;AAC9D,gCAAyB,SAAS,mBAAmB;QACrD;AAeF,UAAI,CAbmB,gBAAgB,MAAM,SAAS,UAAU;AAC9D,WAAI,QAAQ,UAAU,SACpB,QAAO;OAGT,MAAM,WAAW,gBAAgB;AACjC,WAAI,YAAY,KACd,QAAO;AAGT,cAAO,eAAe,2BAA2B,UAAU,oBAAoB,CAAC;QAChF,CAGA;AAGF,sBAAgB;;;AAepB,WAAO,EACL,QAZc,OAAO,KAAK,mBAAmB,CAC5C,QAAO,SAAQ,mBAAmB,MAAM,SAAS,EAAE,CACnD,KAAK,SAAS;KACb,MAAM,SAAS,mBAAmB;AAElC,YAAO;MACL;MACA,OAHY,OAAO,QAAQ,KAAK,UAAU,MAAM,OAAO,EAAE,GAG1C,OAAO;MACvB;MACD,EAIH;;GAEJ,CAAC;EACH,CAAC;AAEF,wBAAuB,WAAW;AAElC,QAAO;;;;;AAMT,MAAa,eAAe"}
|
|
1
|
+
{"version":3,"file":"index.mjs","names":[],"sources":["../src/dsl/task.ts"],"sourcesContent":["import type { TaskConcurrencyConfig, TaskExecutionPolicy, TaskReporterEventPayload, TaskRunContext, TaskRunOutput } from '../config'\nimport type { RunScoreKind } from '../core/runner'\nimport type { TelemetryAttributeValue } from '../core/telemetry'\n\nimport { errorMessageFrom, sleep } from '@moeru/std'\n\nimport { defineEval, defineTask } from '../config'\nimport { createSchedulerQueue } from '../core/scheduler/queue'\nimport { createNoopTelemetryRuntime } from '../core/telemetry'\nimport { registerEvalDefinition } from './registry'\n\n/**\n * Per-case registration options for `caseOf`.\n */\nexport interface CaseRegistrationOptions<TInput> extends TaskExecutionPolicy {\n /**\n * Optional case input payload.\n */\n input: TInput\n}\n\n/**\n * Runtime context provided to a task case callback.\n */\nexport interface CaseRunContext<TInput> extends TaskRunContext {\n /**\n * Case-scoped matrix payload.\n */\n matrix: TaskRunContext['task']['matrix'] & { inputs: TInput }\n /**\n * Emits one custom case metric into report events.\n *\n * Use when:\n * - tasks need structured benchmark metadata beyond exact/judge score families\n *\n * Expects:\n * - `name` to be a stable metric identifier\n * - `value` to be JSON-serializable\n */\n metric: (name: string, value: TelemetryAttributeValue) => void\n /**\n * Overrides one case score family with a custom normalized value.\n *\n * Use when:\n * - one case computes a benchmark-native score that should flow into run aggregation\n *\n * Expects:\n * - `score` to stay in the `0..1` range\n */\n score: (score: number, kind?: RunScoreKind) => void\n /**\n * Cooperative abort signal for the current case execution.\n */\n signal: AbortSignal\n}\n\n/**\n * Callback for one task case.\n */\nexport type CaseRunner<TInput> = (context: CaseRunContext<TInput>) => Promise<unknown> | unknown\n\n/**\n * Per-group options for `casesFromInputs`.\n *\n * Use when:\n * - one generated case group should run with a lower case concurrency than the task default\n * - a task should keep a broader task-level cap while one expensive case family stays bounded\n *\n * Expects:\n * - `concurrency` to be a positive integer when provided\n *\n * Returns:\n * - one partial case-group execution descriptor\n */\nexport interface CasesFromInputsOptions extends TaskExecutionPolicy {\n /**\n * Case-level concurrency cap for cases registered by one `casesFromInputs(...)` call.\n */\n concurrency?: number\n}\n\n/**\n * Builder callbacks passed into `describeTask`.\n */\nexport interface DescribeTaskBuilder {\n /**\n * Registers one explicit case.\n */\n caseOf: {\n (name: string, run: CaseRunner<undefined>): void\n <TInput>(name: string, run: CaseRunner<TInput>, options: CaseRegistrationOptions<TInput>): void\n }\n /**\n * Registers multiple cases from input list.\n */\n casesFromInputs: <TInput>(\n namePrefix: string,\n inputs: readonly TInput[],\n run: CaseRunner<TInput>,\n options?: CasesFromInputsOptions,\n ) => void\n}\n\n/**\n * Options for `describeTask`.\n */\nexport interface DescribeTaskOptions extends TaskExecutionPolicy {\n /**\n * Optional task-local concurrency overrides.\n *\n * Use when:\n * - one task should cap attempt fan-out independently from the surrounding project\n * - one task should cap case fan-out without changing global scheduling defaults\n *\n * Expects:\n * - each provided value to be a positive integer\n *\n * @default inherited from project or CLI concurrency settings\n */\n concurrency?: TaskConcurrencyConfig\n /**\n * Optional description override.\n */\n description?: string\n}\n\ninterface CaseExecutionOutcome {\n errorMessage?: string\n output?: unknown\n scoresByKind: Map<RunScoreKind, number>\n state: 'failed' | 'passed' | 'timeout'\n}\n\ninterface RegisteredCase<TInput> {\n concurrency?: number\n executionPolicy?: TaskExecutionPolicy\n input: TInput\n name: string\n queueKey?: object\n run: CaseRunner<TInput>\n}\n\nfunction assertNonNegativeInteger(value: number, label: string): void {\n if (!Number.isFinite(value) || !Number.isInteger(value) || value < 0) {\n throw new Error(`Invalid ${label}: ${String(value)}`)\n }\n}\n\nfunction assertNonNegativeNumber(value: number, label: string): void {\n if (!Number.isFinite(value) || value < 0) {\n throw new Error(`Invalid ${label}: ${String(value)}`)\n }\n}\n\nfunction assertPositiveInteger(value: number, label: string): void {\n if (!Number.isFinite(value) || !Number.isInteger(value) || value <= 0) {\n throw new Error(`Invalid ${label}: ${String(value)}`)\n }\n}\n\nfunction assertValidScore(score: number): void {\n if (!Number.isFinite(score) || score < 0 || score > 1) {\n throw new Error(`Case score must be a finite number in range 0..1, got \"${score}\".`)\n }\n}\n\nfunction autoRetryDelayMs(retryIndex: number): number {\n // Retry index 1 is the first retry after the initial case failure.\n return 500 * 2 ** (retryIndex - 1)\n}\n\nfunction canAttachMetricAsAttribute(value: TelemetryAttributeValue): value is boolean | number | readonly boolean[] | readonly number[] | readonly string[] | string {\n if (isTelemetryAttributeScalar(value)) {\n return true\n }\n\n return Array.isArray(value) && isTelemetryAttributeArray(value)\n}\n\nfunction cloneCaseMatrix(matrix: TaskRunContext['task']['matrix']): TaskRunContext['task']['matrix'] {\n return {\n eval: {\n ...matrix.eval,\n },\n meta: {\n ...matrix.meta,\n },\n run: {\n ...matrix.run,\n },\n }\n}\n\nfunction collectCaseOutcomeScores(\n outcome: CaseExecutionOutcome,\n scoreBucketsByKind: Record<RunScoreKind, number[]>,\n): void {\n if (outcome.state !== 'passed') {\n scoreBucketsByKind.exact.push(0)\n return\n }\n\n if (outcome.scoresByKind.size === 0) {\n scoreBucketsByKind.exact.push(1)\n return\n }\n\n scoreBucketsByKind.exact.push(outcome.scoresByKind.get('exact') ?? 1)\n const judgeScore = outcome.scoresByKind.get('judge')\n if (judgeScore != null) {\n scoreBucketsByKind.judge.push(judgeScore)\n }\n}\n\nfunction createCaseBuilder(registeredCases: RegisteredCase<unknown>[]): DescribeTaskBuilder {\n function registerCase(name: string, run: CaseRunner<undefined>): void\n function registerCase<TInput>(name: string, run: CaseRunner<TInput>, options: CaseRegistrationOptions<TInput>): void\n function registerCase<TInput>(\n name: string,\n run: CaseRunner<TInput> | CaseRunner<undefined>,\n options?: CaseRegistrationOptions<TInput>,\n ): void {\n registeredCases.push({\n executionPolicy: normalizeExecutionPolicy(options, 'task case'),\n input: options?.input,\n name,\n run: run as CaseRunner<unknown>,\n })\n }\n\n return {\n caseOf: registerCase,\n casesFromInputs(namePrefix, inputs, run, options) {\n const queueKey = options?.concurrency == null ? undefined : {}\n\n inputs.forEach((input, index) => {\n registeredCases.push({\n concurrency: options?.concurrency,\n executionPolicy: normalizeExecutionPolicy(options, 'casesFromInputs'),\n input,\n name: `${namePrefix} #${index + 1}`,\n queueKey,\n run: run as CaseRunner<unknown>,\n })\n })\n },\n }\n}\n\nfunction createCaseTimeoutError(timeout: number): Error {\n const error = new Error(`Case timed out after ${timeout}ms.`)\n error.name = 'TimeoutError'\n return error\n}\n\nfunction createTaskCaseReporterId(index: number, name: string): string {\n return `${index}:${encodeURIComponent(name)}`\n}\n\nfunction emitCaseEnd(\n hooks: TaskRunContext['reporterHooks'] | undefined,\n payload: {\n errorMessage?: string\n index: number\n name: string\n output?: unknown\n state: 'failed' | 'passed' | 'timeout'\n total: number\n },\n): void {\n try {\n hooks?.onCaseEnd?.(payload)\n }\n catch {\n // Reporter hooks must never affect task scoring.\n }\n}\n\nfunction emitCaseOutcome(\n context: TaskRunContext,\n taskCase: RegisteredCase<unknown>,\n outcome: CaseExecutionOutcome,\n index: number,\n totalCases: number,\n): void {\n emitCaseEnd(context.reporterHooks, {\n ...(outcome.errorMessage == null ? {} : { errorMessage: outcome.errorMessage }),\n index,\n ...(outcome.output === undefined ? {} : { output: outcome.output }),\n name: taskCase.name,\n state: outcome.state,\n total: totalCases,\n })\n}\n\nfunction emitCaseStart(\n hooks: TaskRunContext['reporterHooks'] | undefined,\n payload: {\n autoRetry?: number\n index: number\n input?: unknown\n name: string\n retryIndex?: number\n total: number\n },\n): void {\n try {\n hooks?.onCaseStart?.(payload)\n }\n catch {\n // Reporter hooks must never affect task scoring.\n }\n}\n\nfunction emitReporterEvent(\n hooks: TaskRunContext['reporterHooks'] | undefined,\n payload: TaskReporterEventPayload,\n): void {\n try {\n hooks?.onEvent?.(payload)\n }\n catch {\n // Reporter hooks must never affect task scoring.\n }\n}\n\nasync function executeRegisteredCase(\n context: TaskRunContext,\n taskCase: RegisteredCase<unknown>,\n index: number,\n totalCases: number,\n taskExecutionPolicy: TaskExecutionPolicy | undefined,\n): Promise<CaseExecutionOutcome> {\n const resolvedPolicy = resolveCaseExecutionPolicy(taskCase, taskExecutionPolicy)\n let lastOutcome: CaseExecutionOutcome | undefined\n\n for (let retryIndex = 0; retryIndex <= resolvedPolicy.autoRetry; retryIndex += 1) {\n if (retryIndex > 0) {\n const retryDelayMs = resolveAutoRetryDelay(resolvedPolicy, retryIndex)\n assertNonNegativeNumber(retryDelayMs, 'autoRetryDelay result')\n\n if (retryDelayMs > 0) {\n await sleep(retryDelayMs)\n }\n }\n\n emitCaseStart(context.reporterHooks, {\n ...(resolvedPolicy.autoRetry > 0\n ? {\n autoRetry: resolvedPolicy.autoRetry,\n retryIndex,\n }\n : {}),\n index,\n ...(taskCase.input === undefined ? {} : { input: taskCase.input }),\n name: taskCase.name,\n total: totalCases,\n })\n lastOutcome = await runCaseOnce(context, taskCase, index, resolvedPolicy.timeout)\n if (lastOutcome.state === 'passed') {\n return lastOutcome\n }\n }\n\n return lastOutcome ?? {\n errorMessage: 'Unknown case failure.',\n scoresByKind: new Map(),\n state: 'failed',\n }\n}\n\nfunction isTelemetryAttributeArray(value: readonly TelemetryAttributeValue[]): value is readonly boolean[] | readonly number[] | readonly string[] {\n return value.every(isTelemetryAttributeScalar)\n}\n\nfunction isTelemetryAttributeScalar(value: unknown): value is boolean | number | string {\n return typeof value === 'boolean' || typeof value === 'number' || typeof value === 'string'\n}\n\nfunction normalizeExecutionPolicy(policy: TaskExecutionPolicy | undefined, label: string): TaskExecutionPolicy | undefined {\n if (policy == null) {\n return undefined\n }\n\n if (policy.autoAttempt != null) {\n assertNonNegativeInteger(policy.autoAttempt, `${label} autoAttempt`)\n }\n\n if (policy.autoRetry != null) {\n assertNonNegativeInteger(policy.autoRetry, `${label} autoRetry`)\n }\n\n if (typeof policy.autoRetryDelay === 'number') {\n assertNonNegativeNumber(policy.autoRetryDelay, `${label} autoRetryDelay`)\n }\n\n if (policy.timeout != null) {\n assertPositiveInteger(policy.timeout, `${label} timeout`)\n }\n\n const normalized = {\n autoAttempt: policy.autoAttempt,\n autoRetry: policy.autoRetry,\n autoRetryDelay: policy.autoRetryDelay,\n timeout: policy.timeout,\n }\n\n return Object.values(normalized).some(value => value != null)\n ? normalized\n : undefined\n}\n\nfunction resolveAutoRetryDelay(policy: TaskExecutionPolicy, retryIndex: number): number {\n const delay = policy.autoRetryDelay\n\n if (delay == null) {\n return autoRetryDelayMs(retryIndex)\n }\n\n return typeof delay === 'number' ? delay : delay(retryIndex)\n}\n\nfunction resolveCaseExecutionPolicy(\n taskCase: RegisteredCase<unknown>,\n taskExecutionPolicy: TaskExecutionPolicy | undefined,\n): Pick<TaskExecutionPolicy, 'autoRetryDelay' | 'timeout'> & Required<Pick<TaskExecutionPolicy, 'autoAttempt' | 'autoRetry'>> {\n return {\n autoAttempt: taskCase.executionPolicy?.autoAttempt ?? taskExecutionPolicy?.autoAttempt ?? 0,\n autoRetry: taskCase.executionPolicy?.autoRetry ?? taskExecutionPolicy?.autoRetry ?? 0,\n autoRetryDelay: taskCase.executionPolicy?.autoRetryDelay ?? taskExecutionPolicy?.autoRetryDelay,\n timeout: taskCase.executionPolicy?.timeout ?? taskExecutionPolicy?.timeout,\n }\n}\n\nasync function runCaseOnce(\n context: TaskRunContext,\n taskCase: RegisteredCase<unknown>,\n index: number,\n timeout: number | undefined,\n): Promise<CaseExecutionOutcome> {\n const customScoresByKind = new Map<RunScoreKind, number>()\n const abortController = new AbortController()\n const telemetry = context.telemetry ?? createNoopTelemetryRuntime()\n const caseId = createTaskCaseReporterId(index, taskCase.name)\n let timeoutHandle: ReturnType<typeof setTimeout> | undefined\n let timedOut = false\n let settled = false\n\n try {\n return await telemetry.withSpan('vieval.case', {\n 'vieval.case.id': caseId,\n 'vieval.case.name': taskCase.name,\n 'vieval.task.id': context.task.id,\n 'vieval.task.name': context.task.entry.name,\n }, async () => {\n const runPromise = Promise.resolve(taskCase.run({\n ...context,\n matrix: {\n ...cloneCaseMatrix(context.task.matrix),\n inputs: taskCase.input,\n },\n metric(name, value) {\n if (abortController.signal.aborted || settled) {\n return\n }\n\n emitReporterEvent(context.reporterHooks, {\n caseId,\n data: {\n name,\n value,\n },\n event: 'task.case.metric',\n })\n telemetry.addEvent('vieval.case.metric', { name, value })\n if (canAttachMetricAsAttribute(value)) {\n telemetry.setAttributes({ [name]: value })\n }\n },\n score(score, kind = 'exact') {\n if (abortController.signal.aborted || settled) {\n return\n }\n\n assertValidScore(score)\n customScoresByKind.set(kind, score)\n telemetry.addEvent('vieval.case.score', {\n 'vieval.score.kind': kind,\n 'vieval.score.value': score,\n })\n emitReporterEvent(context.reporterHooks, {\n caseId,\n data: { kind, score },\n event: 'task.case.score',\n })\n },\n signal: abortController.signal,\n }))\n\n if (timeout != null) {\n const timeoutPromise = new Promise<never>((_, reject) => {\n timeoutHandle = setTimeout(() => {\n timedOut = true\n abortController.abort(createCaseTimeoutError(timeout))\n reject(createCaseTimeoutError(timeout))\n }, timeout)\n })\n\n const output = await Promise.race([runPromise, timeoutPromise])\n settled = true\n return {\n output,\n scoresByKind: customScoresByKind,\n state: 'passed',\n }\n }\n\n const output = await runPromise\n settled = true\n return {\n output,\n scoresByKind: customScoresByKind,\n state: 'passed',\n }\n })\n }\n catch (error) {\n settled = true\n return {\n errorMessage: errorMessageFrom(error) ?? (timedOut && timeout != null ? `Case timed out after ${timeout}ms.` : 'Unknown case failure.'),\n scoresByKind: customScoresByKind,\n state: timedOut ? 'timeout' : 'failed',\n }\n }\n finally {\n if (timeoutHandle != null) {\n clearTimeout(timeoutHandle)\n }\n }\n}\n\nlet activeCasesStack: RegisteredCase<unknown>[][] = []\n\n/**\n * Registers one case in the currently active task scope.\n */\nexport function caseOf(\n name: string,\n run: CaseRunner<undefined>,\n): void\nexport function caseOf<TInput>(\n name: string,\n run: CaseRunner<TInput>,\n options: CaseRegistrationOptions<TInput>,\n): void\nexport function caseOf<TInput>(\n name: string,\n run: CaseRunner<TInput> | CaseRunner<undefined>,\n options?: CaseRegistrationOptions<TInput>,\n): void {\n getActiveCases().push({\n executionPolicy: normalizeExecutionPolicy(options, 'task case'),\n input: options?.input,\n name,\n run: run as CaseRunner<unknown>,\n })\n}\n\n/**\n * Registers multiple cases in the currently active task scope.\n */\nexport function casesFromInputs<TInput>(\n namePrefix: string,\n inputs: readonly TInput[],\n run: CaseRunner<TInput>,\n options?: CasesFromInputsOptions,\n): void {\n const queueKey = options?.concurrency == null ? undefined : {}\n\n inputs.forEach((input, index) => {\n getActiveCases().push({\n concurrency: options?.concurrency,\n executionPolicy: normalizeExecutionPolicy(options, 'casesFromInputs'),\n input,\n name: `${namePrefix} #${index + 1}`,\n queueKey,\n run: run as CaseRunner<unknown>,\n })\n })\n}\n\n/**\n * Defines one eval task with task/case semantics similar to Vitest.\n *\n * Use when:\n * - task behavior should be declared with `caseOf` and `casesFromInputs`\n * - business agent code should be imported and run from eval task files\n */\nexport function describeTask(\n name: string,\n build: (() => void) | ((builder: DescribeTaskBuilder) => void),\n options: DescribeTaskOptions = {},\n) {\n const registeredCases: RegisteredCase<unknown>[] = []\n const builder = createCaseBuilder(registeredCases)\n withActiveCases(registeredCases, () => {\n if (build.length > 0) {\n (build as (builder: DescribeTaskBuilder) => void)(builder)\n return\n }\n\n ;(build as () => void)()\n })\n\n const description = options.description ?? name\n const taskExecutionPolicy = normalizeExecutionPolicy(options, 'describeTask')\n\n const definition = defineEval({\n description,\n name,\n task: defineTask({\n concurrency: options.concurrency,\n executionPolicy: taskExecutionPolicy,\n id: name,\n async run(context): Promise<TaskRunOutput> {\n if (registeredCases.length === 0) {\n return {\n scores: [{ kind: 'exact', score: 1 }],\n }\n }\n\n const totalCases = registeredCases.length\n const scoreBucketsByKind: Record<RunScoreKind, number[]> = {\n exact: [],\n judge: [],\n }\n const defaultCaseQueueKey = {}\n const caseQueues = new Map<object, ReturnType<typeof createSchedulerQueue>>()\n const hasAutoAttempt = registeredCases.some(taskCase => resolveCaseExecutionPolicy(taskCase, taskExecutionPolicy).autoAttempt > 0)\n const runtimeTaskConcurrency = context.task.entry.task?.concurrency ?? options.concurrency\n\n if (!hasAutoAttempt) {\n await Promise.all(\n registeredCases.map(async (taskCase, index) => {\n const executeCase = async () => {\n const outcome = await executeRegisteredCase(context, taskCase, index, totalCases, taskExecutionPolicy)\n emitCaseOutcome(context, taskCase, outcome, index, totalCases)\n collectCaseOutcomeScores(outcome, scoreBucketsByKind)\n }\n\n const concurrency = resolveCaseConcurrency(taskCase, runtimeTaskConcurrency, context.runtimeConcurrency)\n if (concurrency == null) {\n await executeCase()\n return\n }\n\n const queueKey = resolveCaseQueueKey(taskCase, defaultCaseQueueKey)\n const queue = caseQueues.get(queueKey) ?? createSchedulerQueue(concurrency)\n caseQueues.set(queueKey, queue)\n await queue.run(executeCase)\n }),\n )\n }\n else {\n let attemptIndex = 0\n\n for (;;) {\n const attemptOutcomes = await Promise.all(\n registeredCases.map(async (taskCase, index) => {\n const executeCase = async () => await executeRegisteredCase(context, taskCase, index, totalCases, taskExecutionPolicy)\n const concurrency = resolveCaseConcurrency(taskCase, runtimeTaskConcurrency, context.runtimeConcurrency)\n if (concurrency == null) {\n return await executeCase()\n }\n\n const queueKey = resolveCaseQueueKey(taskCase, defaultCaseQueueKey)\n const queue = caseQueues.get(queueKey) ?? createSchedulerQueue(concurrency)\n caseQueues.set(queueKey, queue)\n return await queue.run(executeCase)\n }),\n )\n\n attemptOutcomes.forEach((outcome, index) => {\n const taskCase = registeredCases[index]\n if (taskCase == null) {\n return\n }\n\n emitCaseOutcome(context, taskCase, outcome, index, totalCases)\n collectCaseOutcomeScores(outcome, scoreBucketsByKind)\n })\n\n const shouldContinue = attemptOutcomes.some((outcome, index) => {\n if (outcome.state === 'passed') {\n return false\n }\n\n const taskCase = registeredCases[index]\n if (taskCase == null) {\n return false\n }\n\n return attemptIndex < resolveCaseExecutionPolicy(taskCase, taskExecutionPolicy).autoAttempt\n })\n\n if (!shouldContinue) {\n break\n }\n\n attemptIndex += 1\n }\n }\n\n const scores = (Object.keys(scoreBucketsByKind) as RunScoreKind[])\n .filter(kind => scoreBucketsByKind[kind].length > 0)\n .map((kind) => {\n const values = scoreBucketsByKind[kind]\n const total = values.reduce((sum, value) => sum + value, 0)\n return {\n kind,\n score: total / values.length,\n }\n })\n\n return {\n scores,\n }\n },\n }),\n })\n\n registerEvalDefinition(definition)\n\n return definition\n}\n\nfunction getActiveCases(): RegisteredCase<unknown>[] {\n const active = activeCasesStack.at(-1)\n if (active == null) {\n throw new Error('caseOf/casesFromInputs must be called inside describeTask/describeEval.')\n }\n\n return active\n}\n\n/**\n * Resolves the effective case concurrency for one registered task case.\n *\n * Before:\n * - registered case override `2`, task default `4`\n * - registered case override `undefined`, task default `3`\n *\n * After:\n * - `2`\n * - `3`\n */\nfunction resolveCaseConcurrency(\n taskCase: RegisteredCase<unknown>,\n taskConcurrency: TaskConcurrencyConfig | undefined,\n runtimeConcurrency: TaskConcurrencyConfig | undefined,\n): number | undefined {\n const concurrency = runtimeConcurrency?.case ?? taskCase.concurrency ?? taskConcurrency?.case\n if (concurrency == null) {\n return undefined\n }\n\n if (!Number.isFinite(concurrency) || !Number.isInteger(concurrency) || concurrency <= 0) {\n throw new Error(`Invalid task case concurrency: ${String(concurrency)}`)\n }\n\n return concurrency\n}\n\nfunction resolveCaseQueueKey(taskCase: RegisteredCase<unknown>, defaultQueueKey: object): object {\n return taskCase.queueKey ?? defaultQueueKey\n}\n\nfunction withActiveCases<T>(cases: RegisteredCase<unknown>[], callback: () => T): T {\n activeCasesStack = [...activeCasesStack, cases]\n\n try {\n return callback()\n }\n finally {\n activeCasesStack = activeCasesStack.slice(0, -1)\n }\n}\n\n/**\n * Alias of `describeTask` for eval-centric naming.\n */\nexport const describeEval = describeTask\n"],"mappings":";;;;;;;AA8IA,SAAS,yBAAyB,OAAe,OAAqB;CACpE,IAAI,CAAC,OAAO,SAAS,KAAK,KAAK,CAAC,OAAO,UAAU,KAAK,KAAK,QAAQ,GACjE,MAAM,IAAI,MAAM,WAAW,MAAM,IAAI,OAAO,KAAK,GAAG;AAExD;AAEA,SAAS,wBAAwB,OAAe,OAAqB;CACnE,IAAI,CAAC,OAAO,SAAS,KAAK,KAAK,QAAQ,GACrC,MAAM,IAAI,MAAM,WAAW,MAAM,IAAI,OAAO,KAAK,GAAG;AAExD;AAEA,SAAS,sBAAsB,OAAe,OAAqB;CACjE,IAAI,CAAC,OAAO,SAAS,KAAK,KAAK,CAAC,OAAO,UAAU,KAAK,KAAK,SAAS,GAClE,MAAM,IAAI,MAAM,WAAW,MAAM,IAAI,OAAO,KAAK,GAAG;AAExD;AAEA,SAAS,iBAAiB,OAAqB;CAC7C,IAAI,CAAC,OAAO,SAAS,KAAK,KAAK,QAAQ,KAAK,QAAQ,GAClD,MAAM,IAAI,MAAM,0DAA0D,MAAM,GAAG;AAEvF;AAEA,SAAS,iBAAiB,YAA4B;CAEpD,OAAO,MAAM,MAAM,aAAa;AAClC;AAEA,SAAS,2BAA2B,OAAiI;CACnK,IAAI,2BAA2B,KAAK,GAClC,OAAO;CAGT,OAAO,MAAM,QAAQ,KAAK,KAAK,0BAA0B,KAAK;AAChE;AAEA,SAAS,gBAAgB,QAA4E;CACnG,OAAO;EACL,MAAM,EACJ,GAAG,OAAO,KACZ;EACA,MAAM,EACJ,GAAG,OAAO,KACZ;EACA,KAAK,EACH,GAAG,OAAO,IACZ;CACF;AACF;AAEA,SAAS,yBACP,SACA,oBACM;CACN,IAAI,QAAQ,UAAU,UAAU;EAC9B,mBAAmB,MAAM,KAAK,CAAC;EAC/B;CACF;CAEA,IAAI,QAAQ,aAAa,SAAS,GAAG;EACnC,mBAAmB,MAAM,KAAK,CAAC;EAC/B;CACF;CAEA,mBAAmB,MAAM,KAAK,QAAQ,aAAa,IAAI,OAAO,KAAK,CAAC;CACpE,MAAM,aAAa,QAAQ,aAAa,IAAI,OAAO;CACnD,IAAI,cAAc,MAChB,mBAAmB,MAAM,KAAK,UAAU;AAE5C;AAEA,SAAS,kBAAkB,iBAAiE;CAG1F,SAAS,aACP,MACA,KACA,SACM;EACN,gBAAgB,KAAK;GACnB,iBAAiB,yBAAyB,SAAS,WAAW;GAC9D,OAAO,SAAS;GAChB;GACK;EACP,CAAC;CACH;CAEA,OAAO;EACL,QAAQ;EACR,gBAAgB,YAAY,QAAQ,KAAK,SAAS;GAChD,MAAM,WAAW,SAAS,eAAe,OAAO,KAAA,IAAY,CAAC;GAE7D,OAAO,SAAS,OAAO,UAAU;IAC/B,gBAAgB,KAAK;KACnB,aAAa,SAAS;KACtB,iBAAiB,yBAAyB,SAAS,iBAAiB;KACpE;KACA,MAAM,GAAG,WAAW,IAAI,QAAQ;KAChC;KACK;IACP,CAAC;GACH,CAAC;EACH;CACF;AACF;AAEA,SAAS,uBAAuB,SAAwB;CACtD,MAAM,wBAAQ,IAAI,MAAM,wBAAwB,QAAQ,IAAI;CAC5D,MAAM,OAAO;CACb,OAAO;AACT;AAEA,SAAS,yBAAyB,OAAe,MAAsB;CACrE,OAAO,GAAG,MAAM,GAAG,mBAAmB,IAAI;AAC5C;AAEA,SAAS,YACP,OACA,SAQM;CACN,IAAI;EACF,OAAO,YAAY,OAAO;CAC5B,QACM,CAEN;AACF;AAEA,SAAS,gBACP,SACA,UACA,SACA,OACA,YACM;CACN,YAAY,QAAQ,eAAe;EACjC,GAAI,QAAQ,gBAAgB,OAAO,CAAC,IAAI,EAAE,cAAc,QAAQ,aAAa;EAC7E;EACA,GAAI,QAAQ,WAAW,KAAA,IAAY,CAAC,IAAI,EAAE,QAAQ,QAAQ,OAAO;EACjE,MAAM,SAAS;EACf,OAAO,QAAQ;EACf,OAAO;CACT,CAAC;AACH;AAEA,SAAS,cACP,OACA,SAQM;CACN,IAAI;EACF,OAAO,cAAc,OAAO;CAC9B,QACM,CAEN;AACF;AAEA,SAAS,kBACP,OACA,SACM;CACN,IAAI;EACF,OAAO,UAAU,OAAO;CAC1B,QACM,CAEN;AACF;AAEA,eAAe,sBACb,SACA,UACA,OACA,YACA,qBAC+B;CAC/B,MAAM,iBAAiB,2BAA2B,UAAU,mBAAmB;CAC/E,IAAI;CAEJ,KAAK,IAAI,aAAa,GAAG,cAAc,eAAe,WAAW,cAAc,GAAG;EAChF,IAAI,aAAa,GAAG;GAClB,MAAM,eAAe,sBAAsB,gBAAgB,UAAU;GACrE,wBAAwB,cAAc,uBAAuB;GAE7D,IAAI,eAAe,GACjB,MAAM,MAAM,YAAY;EAE5B;EAEA,cAAc,QAAQ,eAAe;GACnC,GAAI,eAAe,YAAY,IAC3B;IACE,WAAW,eAAe;IAC1B;GACF,IACA,CAAC;GACL;GACA,GAAI,SAAS,UAAU,KAAA,IAAY,CAAC,IAAI,EAAE,OAAO,SAAS,MAAM;GAChE,MAAM,SAAS;GACf,OAAO;EACT,CAAC;EACD,cAAc,MAAM,YAAY,SAAS,UAAU,OAAO,eAAe,OAAO;EAChF,IAAI,YAAY,UAAU,UACxB,OAAO;CAEX;CAEA,OAAO,eAAe;EACpB,cAAc;EACd,8BAAc,IAAI,IAAI;EACtB,OAAO;CACT;AACF;AAEA,SAAS,0BAA0B,OAAgH;CACjJ,OAAO,MAAM,MAAM,0BAA0B;AAC/C;AAEA,SAAS,2BAA2B,OAAoD;CACtF,OAAO,OAAO,UAAU,aAAa,OAAO,UAAU,YAAY,OAAO,UAAU;AACrF;AAEA,SAAS,yBAAyB,QAAyC,OAAgD;CACzH,IAAI,UAAU,MACZ;CAGF,IAAI,OAAO,eAAe,MACxB,yBAAyB,OAAO,aAAa,GAAG,MAAM,aAAa;CAGrE,IAAI,OAAO,aAAa,MACtB,yBAAyB,OAAO,WAAW,GAAG,MAAM,WAAW;CAGjE,IAAI,OAAO,OAAO,mBAAmB,UACnC,wBAAwB,OAAO,gBAAgB,GAAG,MAAM,gBAAgB;CAG1E,IAAI,OAAO,WAAW,MACpB,sBAAsB,OAAO,SAAS,GAAG,MAAM,SAAS;CAG1D,MAAM,aAAa;EACjB,aAAa,OAAO;EACpB,WAAW,OAAO;EAClB,gBAAgB,OAAO;EACvB,SAAS,OAAO;CAClB;CAEA,OAAO,OAAO,OAAO,UAAU,CAAC,CAAC,MAAK,UAAS,SAAS,IAAI,IACxD,aACA,KAAA;AACN;AAEA,SAAS,sBAAsB,QAA6B,YAA4B;CACtF,MAAM,QAAQ,OAAO;CAErB,IAAI,SAAS,MACX,OAAO,iBAAiB,UAAU;CAGpC,OAAO,OAAO,UAAU,WAAW,QAAQ,MAAM,UAAU;AAC7D;AAEA,SAAS,2BACP,UACA,qBAC4H;CAC5H,OAAO;EACL,aAAa,SAAS,iBAAiB,eAAe,qBAAqB,eAAe;EAC1F,WAAW,SAAS,iBAAiB,aAAa,qBAAqB,aAAa;EACpF,gBAAgB,SAAS,iBAAiB,kBAAkB,qBAAqB;EACjF,SAAS,SAAS,iBAAiB,WAAW,qBAAqB;CACrE;AACF;AAEA,eAAe,YACb,SACA,UACA,OACA,SAC+B;CAC/B,MAAM,qCAAqB,IAAI,IAA0B;CACzD,MAAM,kBAAkB,IAAI,gBAAgB;CAC5C,MAAM,YAAY,QAAQ,aAAa,2BAA2B;CAClE,MAAM,SAAS,yBAAyB,OAAO,SAAS,IAAI;CAC5D,IAAI;CACJ,IAAI,WAAW;CACf,IAAI,UAAU;CAEd,IAAI;EACF,OAAO,MAAM,UAAU,SAAS,eAAe;GAC7C,kBAAkB;GAClB,oBAAoB,SAAS;GAC7B,kBAAkB,QAAQ,KAAK;GAC/B,oBAAoB,QAAQ,KAAK,MAAM;EACzC,GAAG,YAAY;GACb,MAAM,aAAa,QAAQ,QAAQ,SAAS,IAAI;IAC9C,GAAG;IACH,QAAQ;KACN,GAAG,gBAAgB,QAAQ,KAAK,MAAM;KACtC,QAAQ,SAAS;IACnB;IACA,OAAO,MAAM,OAAO;KAClB,IAAI,gBAAgB,OAAO,WAAW,SACpC;KAGF,kBAAkB,QAAQ,eAAe;MACvC;MACA,MAAM;OACJ;OACA;MACF;MACA,OAAO;KACT,CAAC;KACD,UAAU,SAAS,sBAAsB;MAAE;MAAM;KAAM,CAAC;KACxD,IAAI,2BAA2B,KAAK,GAClC,UAAU,cAAc,GAAG,OAAO,MAAM,CAAC;IAE7C;IACA,MAAM,OAAO,OAAO,SAAS;KAC3B,IAAI,gBAAgB,OAAO,WAAW,SACpC;KAGF,iBAAiB,KAAK;KACtB,mBAAmB,IAAI,MAAM,KAAK;KAClC,UAAU,SAAS,qBAAqB;MACtC,qBAAqB;MACrB,sBAAsB;KACxB,CAAC;KACD,kBAAkB,QAAQ,eAAe;MACvC;MACA,MAAM;OAAE;OAAM;MAAM;MACpB,OAAO;KACT,CAAC;IACH;IACA,QAAQ,gBAAgB;GAC1B,CAAC,CAAC;GAEF,IAAI,WAAW,MAAM;IACnB,MAAM,iBAAiB,IAAI,SAAgB,GAAG,WAAW;KACvD,gBAAgB,iBAAiB;MAC/B,WAAW;MACX,gBAAgB,MAAM,uBAAuB,OAAO,CAAC;MACrD,OAAO,uBAAuB,OAAO,CAAC;KACxC,GAAG,OAAO;IACZ,CAAC;IAED,MAAM,SAAS,MAAM,QAAQ,KAAK,CAAC,YAAY,cAAc,CAAC;IAC9D,UAAU;IACV,OAAO;KACL;KACA,cAAc;KACd,OAAO;IACT;GACF;GAEA,MAAM,SAAS,MAAM;GACrB,UAAU;GACV,OAAO;IACL;IACA,cAAc;IACd,OAAO;GACT;EACF,CAAC;CACH,SACO,OAAO;EACZ,UAAU;EACV,OAAO;GACL,cAAc,iBAAiB,KAAK,MAAM,YAAY,WAAW,OAAO,wBAAwB,QAAQ,OAAO;GAC/G,cAAc;GACd,OAAO,WAAW,YAAY;EAChC;CACF,UACQ;EACN,IAAI,iBAAiB,MACnB,aAAa,aAAa;CAE9B;AACF;AAEA,IAAI,mBAAgD,CAAC;AAcrD,SAAgB,OACd,MACA,KACA,SACM;CACN,eAAe,CAAC,CAAC,KAAK;EACpB,iBAAiB,yBAAyB,SAAS,WAAW;EAC9D,OAAO,SAAS;EAChB;EACK;CACP,CAAC;AACH;;;;AAKA,SAAgB,gBACd,YACA,QACA,KACA,SACM;CACN,MAAM,WAAW,SAAS,eAAe,OAAO,KAAA,IAAY,CAAC;CAE7D,OAAO,SAAS,OAAO,UAAU;EAC/B,eAAe,CAAC,CAAC,KAAK;GACpB,aAAa,SAAS;GACtB,iBAAiB,yBAAyB,SAAS,iBAAiB;GACpE;GACA,MAAM,GAAG,WAAW,IAAI,QAAQ;GAChC;GACK;EACP,CAAC;CACH,CAAC;AACH;;;;;;;;AASA,SAAgB,aACd,MACA,OACA,UAA+B,CAAC,GAChC;CACA,MAAM,kBAA6C,CAAC;CACpD,MAAM,UAAU,kBAAkB,eAAe;CACjD,gBAAgB,uBAAuB;EACrC,IAAI,MAAM,SAAS,GAAG;GACpB,MAAkD,OAAO;GACzD;EACF;EAEC,MAAsB;CACzB,CAAC;CAED,MAAM,cAAc,QAAQ,eAAe;CAC3C,MAAM,sBAAsB,yBAAyB,SAAS,cAAc;CAE5E,MAAM,aAAa,WAAW;EAC5B;EACA;EACA,MAAM,WAAW;GACf,aAAa,QAAQ;GACrB,iBAAiB;GACjB,IAAI;GACJ,MAAM,IAAI,SAAiC;IACzC,IAAI,gBAAgB,WAAW,GAC7B,OAAO,EACL,QAAQ,CAAC;KAAE,MAAM;KAAS,OAAO;IAAE,CAAC,EACtC;IAGF,MAAM,aAAa,gBAAgB;IACnC,MAAM,qBAAqD;KACzD,OAAO,CAAC;KACR,OAAO,CAAC;IACV;IACA,MAAM,sBAAsB,CAAC;IAC7B,MAAM,6BAAa,IAAI,IAAqD;IAC5E,MAAM,iBAAiB,gBAAgB,MAAK,aAAY,2BAA2B,UAAU,mBAAmB,CAAC,CAAC,cAAc,CAAC;IACjI,MAAM,yBAAyB,QAAQ,KAAK,MAAM,MAAM,eAAe,QAAQ;IAE/E,IAAI,CAAC,gBACH,MAAM,QAAQ,IACZ,gBAAgB,IAAI,OAAO,UAAU,UAAU;KAC7C,MAAM,cAAc,YAAY;MAC9B,MAAM,UAAU,MAAM,sBAAsB,SAAS,UAAU,OAAO,YAAY,mBAAmB;MACrG,gBAAgB,SAAS,UAAU,SAAS,OAAO,UAAU;MAC7D,yBAAyB,SAAS,kBAAkB;KACtD;KAEA,MAAM,cAAc,uBAAuB,UAAU,wBAAwB,QAAQ,kBAAkB;KACvG,IAAI,eAAe,MAAM;MACvB,MAAM,YAAY;MAClB;KACF;KAEA,MAAM,WAAW,oBAAoB,UAAU,mBAAmB;KAClE,MAAM,QAAQ,WAAW,IAAI,QAAQ,KAAK,qBAAqB,WAAW;KAC1E,WAAW,IAAI,UAAU,KAAK;KAC9B,MAAM,MAAM,IAAI,WAAW;IAC7B,CAAC,CACH;SAEG;KACH,IAAI,eAAe;KAEnB,SAAS;MACP,MAAM,kBAAkB,MAAM,QAAQ,IACpC,gBAAgB,IAAI,OAAO,UAAU,UAAU;OAC7C,MAAM,cAAc,YAAY,MAAM,sBAAsB,SAAS,UAAU,OAAO,YAAY,mBAAmB;OACrH,MAAM,cAAc,uBAAuB,UAAU,wBAAwB,QAAQ,kBAAkB;OACvG,IAAI,eAAe,MACjB,OAAO,MAAM,YAAY;OAG3B,MAAM,WAAW,oBAAoB,UAAU,mBAAmB;OAClE,MAAM,QAAQ,WAAW,IAAI,QAAQ,KAAK,qBAAqB,WAAW;OAC1E,WAAW,IAAI,UAAU,KAAK;OAC9B,OAAO,MAAM,MAAM,IAAI,WAAW;MACpC,CAAC,CACH;MAEA,gBAAgB,SAAS,SAAS,UAAU;OAC1C,MAAM,WAAW,gBAAgB;OACjC,IAAI,YAAY,MACd;OAGF,gBAAgB,SAAS,UAAU,SAAS,OAAO,UAAU;OAC7D,yBAAyB,SAAS,kBAAkB;MACtD,CAAC;MAeD,IAAI,CAbmB,gBAAgB,MAAM,SAAS,UAAU;OAC9D,IAAI,QAAQ,UAAU,UACpB,OAAO;OAGT,MAAM,WAAW,gBAAgB;OACjC,IAAI,YAAY,MACd,OAAO;OAGT,OAAO,eAAe,2BAA2B,UAAU,mBAAmB,CAAC,CAAC;MAClF,CAEkB,GAChB;MAGF,gBAAgB;KAClB;IACF;IAaA,OAAO,EACL,QAZc,OAAO,KAAK,kBAAkB,CAAC,CAC5C,QAAO,SAAQ,mBAAmB,KAAK,CAAC,SAAS,CAAC,CAAC,CACnD,KAAK,SAAS;KACb,MAAM,SAAS,mBAAmB;KAElC,OAAO;MACL;MACA,OAHY,OAAO,QAAQ,KAAK,UAAU,MAAM,OAAO,CAG5C,IAAI,OAAO;KACxB;IACF,CAGK,EACP;GACF;EACF,CAAC;CACH,CAAC;CAED,uBAAuB,UAAU;CAEjC,OAAO;AACT;AAEA,SAAS,iBAA4C;CACnD,MAAM,SAAS,iBAAiB,GAAG,EAAE;CACrC,IAAI,UAAU,MACZ,MAAM,IAAI,MAAM,yEAAyE;CAG3F,OAAO;AACT;;;;;;;;;;;;AAaA,SAAS,uBACP,UACA,iBACA,oBACoB;CACpB,MAAM,cAAc,oBAAoB,QAAQ,SAAS,eAAe,iBAAiB;CACzF,IAAI,eAAe,MACjB;CAGF,IAAI,CAAC,OAAO,SAAS,WAAW,KAAK,CAAC,OAAO,UAAU,WAAW,KAAK,eAAe,GACpF,MAAM,IAAI,MAAM,kCAAkC,OAAO,WAAW,GAAG;CAGzE,OAAO;AACT;AAEA,SAAS,oBAAoB,UAAmC,iBAAiC;CAC/F,OAAO,SAAS,YAAY;AAC9B;AAEA,SAAS,gBAAmB,OAAkC,UAAsB;CAClF,mBAAmB,CAAC,GAAG,kBAAkB,KAAK;CAE9C,IAAI;EACF,OAAO,SAAS;CAClB,UACQ;EACN,mBAAmB,iBAAiB,MAAM,GAAG,EAAE;CACjD;AACF;;;;AAKA,MAAa,eAAe"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"models-
|
|
1
|
+
{"version":3,"file":"models-CaCOUPZw.mjs","names":[],"sources":["../src/config/models.ts"],"sourcesContent":["import type { TaskExecutionPolicy } from './types'\n\n/**\n * Canonical model definition consumed by vieval runtime and config.\n *\n * Use when:\n * - declaring models in `vieval.config.*`\n * - resolving task runtime models by id, alias, or concrete model name\n *\n * Expects:\n * - `id` to be stable and unique within one config\n * - `inferenceExecutorId` to match scheduler/executor identifiers\n *\n * Returns:\n * - one normalized model registration record\n */\nexport interface ModelDefinition {\n /**\n * Alias names that can resolve this model.\n */\n aliases: string[]\n /**\n * Optional execution policy hints attached to this model.\n */\n executionPolicy?: TaskExecutionPolicy\n /**\n * Stable model id.\n */\n id: string\n /**\n * Executor reference passed through config.\n *\n * `vieval` core treats this as opaque runtime metadata. Builder plugins can\n * narrow this field with plugin-specific executor input types.\n */\n inferenceExecutor: unknown\n /**\n * Inference-executor id used for matching and reporting.\n */\n inferenceExecutorId: string\n /**\n * Concrete model name passed to the inference executor.\n */\n model: string\n /**\n * Optional model-level call parameters.\n */\n parameters?: Record<string, unknown>\n}\n\n/**\n * Resolves one model by id, model name, or alias in registration order.\n *\n * Returns:\n * - the first matching model, or `undefined` when no match exists\n */\nexport function resolveModelByName(\n models: readonly ModelDefinition[],\n name: string,\n): ModelDefinition | undefined {\n return models.find(model => model.id === name || model.model === name || model.aliases.includes(name))\n}\n"],"mappings":";;;;;;;AAwDA,SAAgB,mBACd,QACA,MAC6B;CAC7B,OAAO,OAAO,MAAK,UAAS,MAAM,OAAO,QAAQ,MAAM,UAAU,QAAQ,MAAM,QAAQ,SAAS,IAAI,CAAC;AACvG"}
|