vieval 0.0.5 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/dist/bin/vieval.mjs +1 -1
  2. package/dist/cli/index.mjs +1 -1
  3. package/dist/{cli-DayPXzHX.mjs → cli-sanbKtQq.mjs} +277 -49
  4. package/dist/cli-sanbKtQq.mjs.map +1 -0
  5. package/dist/config.d.mts +2 -2
  6. package/dist/config.mjs +1 -1
  7. package/dist/core/assertions/index.d.mts +1 -1
  8. package/dist/core/inference-executors/index.d.mts +1 -1
  9. package/dist/core/inference-executors/index.mjs +1 -1
  10. package/dist/core/processors/results/index.d.mts +1 -1
  11. package/dist/core/runner/index.d.mts +3 -2
  12. package/dist/core/runner/index.mjs +3 -2
  13. package/dist/core/runner/index.mjs.map +1 -1
  14. package/dist/core/scheduler/index.d.mts +2 -0
  15. package/dist/core/scheduler/index.mjs +188 -0
  16. package/dist/core/scheduler/index.mjs.map +1 -0
  17. package/dist/{env-BFSjny07.mjs → env--94B0UtW.mjs} +1 -1
  18. package/dist/{env-BFSjny07.mjs.map → env--94B0UtW.mjs.map} +1 -1
  19. package/dist/{env-BTq3dV7C.d.mts → env-BeHv_5mo.d.mts} +1 -1
  20. package/dist/{expect-extensions-QLXESWjn.mjs → expect-extensions-DCSqlneN.mjs} +1 -1
  21. package/dist/{expect-extensions-QLXESWjn.mjs.map → expect-extensions-DCSqlneN.mjs.map} +1 -1
  22. package/dist/expect.mjs +1 -1
  23. package/dist/{index-OEdqjQSe.d.mts → index-DBZKkpBe.d.mts} +105 -3
  24. package/dist/index-fakXoZEe.d.mts +147 -0
  25. package/dist/index.d.mts +110 -11
  26. package/dist/index.mjs +214 -53
  27. package/dist/index.mjs.map +1 -1
  28. package/dist/{models-D_MsBtYw.mjs → models-DIGdOUpJ.mjs} +1 -1
  29. package/dist/models-DIGdOUpJ.mjs.map +1 -0
  30. package/dist/plugins/chat-models/index.d.mts +21 -1
  31. package/dist/plugins/chat-models/index.mjs +27 -1
  32. package/dist/plugins/chat-models/index.mjs.map +1 -1
  33. package/dist/queue-DsZQkZO_.mjs +21 -0
  34. package/dist/queue-DsZQkZO_.mjs.map +1 -0
  35. package/dist/{registry-CwcMMjnZ.mjs → registry-CcKZqDJY.mjs} +25 -3
  36. package/dist/registry-CcKZqDJY.mjs.map +1 -0
  37. package/dist/testing/expect-extensions.mjs +1 -1
  38. package/package.json +7 -1
  39. package/dist/cli-DayPXzHX.mjs.map +0 -1
  40. package/dist/models-D_MsBtYw.mjs.map +0 -1
  41. package/dist/registry-CwcMMjnZ.mjs.map +0 -1
package/dist/index.mjs CHANGED
@@ -1,5 +1,6 @@
1
- import { a as defineConfig, i as registerEvalDefinition, s as loadEnv } from "./registry-CwcMMjnZ.mjs";
2
- import { n as requiredEnvFrom } from "./env-BFSjny07.mjs";
1
+ import { a as defineConfig, i as registerEvalDefinition, s as loadEnv } from "./registry-CcKZqDJY.mjs";
2
+ import { t as createSchedulerQueue } from "./queue-DsZQkZO_.mjs";
3
+ import { n as requiredEnvFrom } from "./env--94B0UtW.mjs";
3
4
  import { defineEval, defineTask } from "./config.mjs";
4
5
  import { expect } from "./expect.mjs";
5
6
  import { errorMessageFrom } from "@moeru/std";
@@ -17,6 +18,12 @@ function createTaskCaseReporterId(index, name) {
17
18
  function assertValidScore(score) {
18
19
  if (!Number.isFinite(score) || score < 0 || score > 1) throw new Error(`Case score must be a finite number in range 0..1, got "${score}".`);
19
20
  }
21
+ function assertNonNegativeInteger(value, label) {
22
+ if (!Number.isFinite(value) || !Number.isInteger(value) || value < 0) throw new Error(`Invalid ${label}: ${String(value)}`);
23
+ }
24
+ function assertPositiveInteger(value, label) {
25
+ if (!Number.isFinite(value) || !Number.isInteger(value) || value <= 0) throw new Error(`Invalid ${label}: ${String(value)}`);
26
+ }
20
27
  function emitCaseStart(hooks, payload) {
21
28
  try {
22
29
  hooks?.onCaseStart?.(payload);
@@ -27,9 +34,126 @@ function emitCaseEnd(hooks, payload) {
27
34
  hooks?.onCaseEnd?.(payload);
28
35
  } catch {}
29
36
  }
37
+ function createCaseTimeoutError(timeout) {
38
+ const error = /* @__PURE__ */ new Error(`Case timed out after ${timeout}ms.`);
39
+ error.name = "TimeoutError";
40
+ return error;
41
+ }
42
+ function normalizeExecutionPolicy(policy, label) {
43
+ if (policy == null) return;
44
+ if (policy.autoAttempt != null) assertNonNegativeInteger(policy.autoAttempt, `${label} autoAttempt`);
45
+ if (policy.autoRetry != null) assertNonNegativeInteger(policy.autoRetry, `${label} autoRetry`);
46
+ if (policy.timeout != null) assertPositiveInteger(policy.timeout, `${label} timeout`);
47
+ const normalized = {
48
+ autoAttempt: policy.autoAttempt,
49
+ autoRetry: policy.autoRetry,
50
+ timeout: policy.timeout
51
+ };
52
+ return Object.values(normalized).some((value) => value != null) ? normalized : void 0;
53
+ }
54
+ function resolveCaseExecutionPolicy(taskCase, taskExecutionPolicy) {
55
+ return {
56
+ autoAttempt: taskCase.executionPolicy?.autoAttempt ?? taskExecutionPolicy?.autoAttempt ?? 0,
57
+ autoRetry: taskCase.executionPolicy?.autoRetry ?? taskExecutionPolicy?.autoRetry ?? 0,
58
+ timeout: taskCase.executionPolicy?.timeout ?? taskExecutionPolicy?.timeout
59
+ };
60
+ }
61
+ async function runCaseOnce(context, taskCase, index, timeout) {
62
+ const customScoresByKind = /* @__PURE__ */ new Map();
63
+ const abortController = new AbortController();
64
+ let timeoutHandle;
65
+ let timedOut = false;
66
+ let settled = false;
67
+ try {
68
+ const runPromise = Promise.resolve(taskCase.run({
69
+ ...context,
70
+ matrix: {
71
+ ...cloneCaseMatrix(context.task.matrix),
72
+ inputs: taskCase.input
73
+ },
74
+ metric(name, value) {
75
+ if (abortController.signal.aborted || settled) return;
76
+ context.reporterHooks?.onEvent?.({
77
+ caseId: createTaskCaseReporterId(index, taskCase.name),
78
+ data: {
79
+ name,
80
+ value
81
+ },
82
+ event: "task.case.metric"
83
+ });
84
+ },
85
+ score(score, kind = "exact") {
86
+ if (abortController.signal.aborted || settled) return;
87
+ assertValidScore(score);
88
+ customScoresByKind.set(kind, score);
89
+ },
90
+ signal: abortController.signal
91
+ }));
92
+ if (timeout != null) {
93
+ const timeoutPromise = new Promise((_, reject) => {
94
+ timeoutHandle = setTimeout(() => {
95
+ timedOut = true;
96
+ abortController.abort(createCaseTimeoutError(timeout));
97
+ reject(createCaseTimeoutError(timeout));
98
+ }, timeout);
99
+ });
100
+ await Promise.race([runPromise, timeoutPromise]);
101
+ } else await runPromise;
102
+ settled = true;
103
+ return {
104
+ scoresByKind: customScoresByKind,
105
+ state: "passed"
106
+ };
107
+ } catch (error) {
108
+ settled = true;
109
+ return {
110
+ errorMessage: errorMessageFrom(error) ?? (timedOut && timeout != null ? `Case timed out after ${timeout}ms.` : "Unknown case failure."),
111
+ scoresByKind: customScoresByKind,
112
+ state: timedOut ? "timeout" : "failed"
113
+ };
114
+ } finally {
115
+ if (timeoutHandle != null) clearTimeout(timeoutHandle);
116
+ }
117
+ }
118
+ async function executeRegisteredCase(context, taskCase, index, totalCases, taskExecutionPolicy) {
119
+ const resolvedPolicy = resolveCaseExecutionPolicy(taskCase, taskExecutionPolicy);
120
+ let lastOutcome;
121
+ for (let retryIndex = 0; retryIndex <= resolvedPolicy.autoRetry; retryIndex += 1) {
122
+ emitCaseStart(context.reporterHooks, {
123
+ ...resolvedPolicy.autoRetry > 0 ? {
124
+ autoRetry: resolvedPolicy.autoRetry,
125
+ retryIndex
126
+ } : {},
127
+ index,
128
+ name: taskCase.name,
129
+ total: totalCases
130
+ });
131
+ lastOutcome = await runCaseOnce(context, taskCase, index, resolvedPolicy.timeout);
132
+ if (lastOutcome.state === "passed") return lastOutcome;
133
+ }
134
+ return lastOutcome ?? {
135
+ errorMessage: "Unknown case failure.",
136
+ scoresByKind: /* @__PURE__ */ new Map(),
137
+ state: "failed"
138
+ };
139
+ }
140
+ function collectCaseOutcomeScores(outcome, scoreBucketsByKind) {
141
+ if (outcome.state !== "passed") {
142
+ scoreBucketsByKind.exact.push(0);
143
+ return;
144
+ }
145
+ if (outcome.scoresByKind.size === 0) {
146
+ scoreBucketsByKind.exact.push(1);
147
+ return;
148
+ }
149
+ scoreBucketsByKind.exact.push(outcome.scoresByKind.get("exact") ?? 1);
150
+ const judgeScore = outcome.scoresByKind.get("judge");
151
+ if (judgeScore != null) scoreBucketsByKind.judge.push(judgeScore);
152
+ }
30
153
  function createCaseBuilder(registeredCases) {
31
154
  function registerCase(name, run, options) {
32
155
  registeredCases.push({
156
+ executionPolicy: normalizeExecutionPolicy(options, "task case"),
33
157
  input: options?.input,
34
158
  name,
35
159
  run
@@ -37,11 +161,15 @@ function createCaseBuilder(registeredCases) {
37
161
  }
38
162
  return {
39
163
  caseOf: registerCase,
40
- casesFromInputs(namePrefix, inputs, run) {
164
+ casesFromInputs(namePrefix, inputs, run, options) {
165
+ const queueKey = options?.concurrency == null ? void 0 : {};
41
166
  inputs.forEach((input, index) => {
42
167
  registeredCases.push({
168
+ concurrency: options?.concurrency,
169
+ executionPolicy: normalizeExecutionPolicy(options, "casesFromInputs"),
43
170
  input,
44
171
  name: `${namePrefix} #${index + 1}`,
172
+ queueKey,
45
173
  run
46
174
  });
47
175
  });
@@ -64,6 +192,7 @@ function getActiveCases() {
64
192
  }
65
193
  function caseOf(name, run, options) {
66
194
  getActiveCases().push({
195
+ executionPolicy: normalizeExecutionPolicy(options, "task case"),
67
196
  input: options?.input,
68
197
  name,
69
198
  run
@@ -72,16 +201,40 @@ function caseOf(name, run, options) {
72
201
  /**
73
202
  * Registers multiple cases in the currently active task scope.
74
203
  */
75
- function casesFromInputs(namePrefix, inputs, run) {
204
+ function casesFromInputs(namePrefix, inputs, run, options) {
205
+ const queueKey = options?.concurrency == null ? void 0 : {};
76
206
  inputs.forEach((input, index) => {
77
207
  getActiveCases().push({
208
+ concurrency: options?.concurrency,
209
+ executionPolicy: normalizeExecutionPolicy(options, "casesFromInputs"),
78
210
  input,
79
211
  name: `${namePrefix} #${index + 1}`,
212
+ queueKey,
80
213
  run
81
214
  });
82
215
  });
83
216
  }
84
217
  /**
218
+ * Resolves the effective case concurrency for one registered task case.
219
+ *
220
+ * Before:
221
+ * - registered case override `2`, task default `4`
222
+ * - registered case override `undefined`, task default `3`
223
+ *
224
+ * After:
225
+ * - `2`
226
+ * - `3`
227
+ */
228
+ function resolveCaseConcurrency(taskCase, taskConcurrency, runtimeConcurrency) {
229
+ const concurrency = runtimeConcurrency?.case ?? taskCase.concurrency ?? taskConcurrency?.case;
230
+ if (concurrency == null) return;
231
+ if (!Number.isFinite(concurrency) || !Number.isInteger(concurrency) || concurrency <= 0) throw new Error(`Invalid task case concurrency: ${String(concurrency)}`);
232
+ return concurrency;
233
+ }
234
+ function resolveCaseQueueKey(taskCase, defaultQueueKey) {
235
+ return taskCase.queueKey ?? defaultQueueKey;
236
+ }
237
+ /**
85
238
  * Defines one eval task with task/case semantics similar to Vitest.
86
239
  *
87
240
  * Use when:
@@ -98,10 +251,14 @@ function describeTask(name, build, options = {}) {
98
251
  }
99
252
  build();
100
253
  });
254
+ const description = options.description ?? name;
255
+ const taskExecutionPolicy = normalizeExecutionPolicy(options, "describeTask");
101
256
  const definition = defineEval({
102
- description: options.description ?? name,
257
+ description,
103
258
  name,
104
259
  task: defineTask({
260
+ concurrency: options.concurrency,
261
+ executionPolicy: taskExecutionPolicy,
105
262
  id: name,
106
263
  async run(context) {
107
264
  if (registeredCases.length === 0) return { scores: [{
@@ -113,62 +270,66 @@ function describeTask(name, build, options = {}) {
113
270
  exact: [],
114
271
  judge: []
115
272
  };
116
- await Promise.all(registeredCases.map(async (taskCase, index) => {
117
- emitCaseStart(context.reporterHooks, {
118
- index,
119
- name: taskCase.name,
120
- total: totalCases
121
- });
122
- let state = "passed";
123
- let errorMessage;
124
- const caseId = createTaskCaseReporterId(index, taskCase.name);
125
- const customScoresByKind = /* @__PURE__ */ new Map();
126
- try {
127
- await taskCase.run({
128
- ...context,
129
- matrix: {
130
- ...cloneCaseMatrix(context.task.matrix),
131
- inputs: taskCase.input
132
- },
133
- metric(name, value) {
134
- context.reporterHooks?.onEvent?.({
135
- caseId,
136
- data: {
137
- name,
138
- value
139
- },
140
- event: "task.case.metric"
141
- });
142
- },
143
- score(score, kind = "exact") {
144
- assertValidScore(score);
145
- customScoresByKind.set(kind, score);
146
- }
147
- });
148
- } catch (error) {
149
- state = "failed";
150
- errorMessage = errorMessageFrom(error) ?? "Unknown case failure.";
151
- } finally {
273
+ const defaultCaseQueueKey = {};
274
+ const caseQueues = /* @__PURE__ */ new Map();
275
+ const hasAutoAttempt = registeredCases.some((taskCase) => resolveCaseExecutionPolicy(taskCase, taskExecutionPolicy).autoAttempt > 0);
276
+ const runtimeTaskConcurrency = context.task.entry.task?.concurrency ?? options.concurrency;
277
+ if (!hasAutoAttempt) await Promise.all(registeredCases.map(async (taskCase, index) => {
278
+ const executeCase = async () => {
279
+ const outcome = await executeRegisteredCase(context, taskCase, index, totalCases, taskExecutionPolicy);
152
280
  emitCaseEnd(context.reporterHooks, {
153
- ...errorMessage == null ? {} : { errorMessage },
281
+ ...outcome.errorMessage == null ? {} : { errorMessage: outcome.errorMessage },
154
282
  index,
155
- state,
283
+ state: outcome.state,
156
284
  name: taskCase.name,
157
285
  total: totalCases
158
286
  });
159
- }
160
- if (state === "failed") {
161
- scoreBucketsByKind.exact.push(0);
162
- return;
163
- }
164
- if (customScoresByKind.size === 0) {
165
- scoreBucketsByKind.exact.push(1);
287
+ collectCaseOutcomeScores(outcome, scoreBucketsByKind);
288
+ };
289
+ const concurrency = resolveCaseConcurrency(taskCase, runtimeTaskConcurrency, context.runtimeConcurrency);
290
+ if (concurrency == null) {
291
+ await executeCase();
166
292
  return;
167
293
  }
168
- scoreBucketsByKind.exact.push(customScoresByKind.get("exact") ?? 1);
169
- const judgeScore = customScoresByKind.get("judge");
170
- if (judgeScore != null) scoreBucketsByKind.judge.push(judgeScore);
294
+ const queueKey = resolveCaseQueueKey(taskCase, defaultCaseQueueKey);
295
+ const queue = caseQueues.get(queueKey) ?? createSchedulerQueue(concurrency);
296
+ caseQueues.set(queueKey, queue);
297
+ await queue.run(executeCase);
171
298
  }));
299
+ else {
300
+ let finalOutcomes = [];
301
+ let attemptIndex = 0;
302
+ for (;;) {
303
+ finalOutcomes = await Promise.all(registeredCases.map(async (taskCase, index) => {
304
+ const executeCase = async () => await executeRegisteredCase(context, taskCase, index, totalCases, taskExecutionPolicy);
305
+ const concurrency = resolveCaseConcurrency(taskCase, runtimeTaskConcurrency, context.runtimeConcurrency);
306
+ if (concurrency == null) return await executeCase();
307
+ const queueKey = resolveCaseQueueKey(taskCase, defaultCaseQueueKey);
308
+ const queue = caseQueues.get(queueKey) ?? createSchedulerQueue(concurrency);
309
+ caseQueues.set(queueKey, queue);
310
+ return await queue.run(executeCase);
311
+ }));
312
+ if (!finalOutcomes.some((outcome, index) => {
313
+ if (outcome.state === "passed") return false;
314
+ const taskCase = registeredCases[index];
315
+ if (taskCase == null) return false;
316
+ return attemptIndex < resolveCaseExecutionPolicy(taskCase, taskExecutionPolicy).autoAttempt;
317
+ })) break;
318
+ attemptIndex += 1;
319
+ }
320
+ finalOutcomes.forEach((outcome, index) => {
321
+ const taskCase = registeredCases[index];
322
+ if (taskCase == null) return;
323
+ emitCaseEnd(context.reporterHooks, {
324
+ ...outcome.errorMessage == null ? {} : { errorMessage: outcome.errorMessage },
325
+ index,
326
+ state: outcome.state,
327
+ name: taskCase.name,
328
+ total: totalCases
329
+ });
330
+ collectCaseOutcomeScores(outcome, scoreBucketsByKind);
331
+ });
332
+ }
172
333
  return { scores: Object.keys(scoreBucketsByKind).filter((kind) => scoreBucketsByKind[kind].length > 0).map((kind) => {
173
334
  const values = scoreBucketsByKind[kind];
174
335
  return {
@@ -1 +1 @@
1
- {"version":3,"file":"index.mjs","names":[],"sources":["../src/dsl/task.ts"],"sourcesContent":["import type { TaskRunContext, TaskRunOutput } from '../config'\nimport type { RunScoreKind } from '../core/runner'\n\nimport { errorMessageFrom } from '@moeru/std'\n\nimport { defineEval, defineTask } from '../config'\nimport { registerEvalDefinition } from './registry'\n\n/**\n * Runtime context provided to a task case callback.\n */\nexport interface CaseRunContext<TInput> extends TaskRunContext {\n /**\n * Case-scoped matrix payload.\n */\n matrix: TaskRunContext['task']['matrix'] & { inputs: TInput }\n /**\n * Overrides one case score family with a custom normalized value.\n *\n * Use when:\n * - one case computes a benchmark-native score that should flow into run aggregation\n *\n * Expects:\n * - `score` to stay in the `0..1` range\n */\n score: (score: number, kind?: RunScoreKind) => void\n /**\n * Emits one custom case metric into report events.\n *\n * Use when:\n * - tasks need structured benchmark metadata beyond exact/judge score families\n *\n * Expects:\n * - `name` to be a stable metric identifier\n * - `value` to be JSON-serializable\n */\n metric: (name: string, value: boolean | number | string | null) => void\n}\n\n/**\n * Callback for one task case.\n */\nexport type CaseRunner<TInput> = (context: CaseRunContext<TInput>) => Promise<void> | void\n\ninterface RegisteredCase<TInput> {\n input: TInput\n name: string\n run: CaseRunner<TInput>\n}\n\nfunction cloneCaseMatrix(matrix: TaskRunContext['task']['matrix']): TaskRunContext['task']['matrix'] {\n return {\n eval: {\n ...matrix.eval,\n },\n meta: {\n ...matrix.meta,\n },\n run: {\n ...matrix.run,\n },\n }\n}\n\nfunction createTaskCaseReporterId(index: number, name: string): string {\n return `${index}:${encodeURIComponent(name)}`\n}\n\nfunction assertValidScore(score: number): void {\n if (!Number.isFinite(score) || score < 0 || score > 1) {\n throw new Error(`Case score must be a finite number in range 0..1, got \"${score}\".`)\n }\n}\n\nfunction emitCaseStart(\n hooks: TaskRunContext['reporterHooks'] | undefined,\n payload: {\n index: number\n name: string\n total: number\n },\n): void {\n try {\n hooks?.onCaseStart?.(payload)\n }\n catch {\n // Reporter hooks must never affect task scoring.\n }\n}\n\nfunction emitCaseEnd(\n hooks: TaskRunContext['reporterHooks'] | undefined,\n payload: {\n index: number\n state: 'passed' | 'failed'\n name: string\n total: number\n errorMessage?: string\n },\n): void {\n try {\n hooks?.onCaseEnd?.(payload)\n }\n catch {\n // Reporter hooks must never affect task scoring.\n }\n}\n\n/**\n * Builder callbacks passed into `describeTask`.\n */\nexport interface DescribeTaskBuilder {\n /**\n * Registers one explicit case.\n */\n caseOf: {\n (name: string, run: CaseRunner<undefined>): void\n <TInput>(name: string, run: CaseRunner<TInput>, options: { input: TInput }): void\n }\n /**\n * Registers multiple cases from input list.\n */\n casesFromInputs: <TInput>(\n namePrefix: string,\n inputs: readonly TInput[],\n run: CaseRunner<TInput>,\n ) => void\n}\n\n/**\n * Options for `describeTask`.\n */\nexport interface DescribeTaskOptions {\n /**\n * Optional description override.\n */\n description?: string\n}\n\nfunction createCaseBuilder(registeredCases: RegisteredCase<unknown>[]): DescribeTaskBuilder {\n function registerCase(name: string, run: CaseRunner<undefined>): void\n function registerCase<TInput>(name: string, run: CaseRunner<TInput>, options: { input: TInput }): void\n function registerCase<TInput>(\n name: string,\n run: CaseRunner<TInput> | CaseRunner<undefined>,\n options?: { input: TInput },\n ): void {\n registeredCases.push({\n input: options?.input,\n name,\n run: run as CaseRunner<unknown>,\n })\n }\n\n return {\n caseOf: registerCase,\n casesFromInputs(namePrefix, inputs, run) {\n inputs.forEach((input, index) => {\n registeredCases.push({\n input,\n name: `${namePrefix} #${index + 1}`,\n run: run as CaseRunner<unknown>,\n })\n })\n },\n }\n}\n\nlet activeCasesStack: RegisteredCase<unknown>[][] = []\n\nfunction withActiveCases<T>(cases: RegisteredCase<unknown>[], callback: () => T): T {\n activeCasesStack = [...activeCasesStack, cases]\n\n try {\n return callback()\n }\n finally {\n activeCasesStack = activeCasesStack.slice(0, -1)\n }\n}\n\nfunction getActiveCases(): RegisteredCase<unknown>[] {\n const active = activeCasesStack.at(-1)\n if (active == null) {\n throw new Error('caseOf/casesFromInputs must be called inside describeTask/describeEval.')\n }\n\n return active\n}\n\n/**\n * Registers one case in the currently active task scope.\n */\nexport function caseOf(\n name: string,\n run: CaseRunner<undefined>,\n): void\n\nexport function caseOf<TInput>(\n name: string,\n run: CaseRunner<TInput>,\n options: { input: TInput },\n): void\n\nexport function caseOf<TInput>(\n name: string,\n run: CaseRunner<TInput> | CaseRunner<undefined>,\n options?: { input: TInput },\n): void {\n getActiveCases().push({\n input: options?.input,\n name,\n run: run as CaseRunner<unknown>,\n })\n}\n\n/**\n * Registers multiple cases in the currently active task scope.\n */\nexport function casesFromInputs<TInput>(\n namePrefix: string,\n inputs: readonly TInput[],\n run: CaseRunner<TInput>,\n): void {\n inputs.forEach((input, index) => {\n getActiveCases().push({\n input,\n name: `${namePrefix} #${index + 1}`,\n run: run as CaseRunner<unknown>,\n })\n })\n}\n\n/**\n * Defines one eval task with task/case semantics similar to Vitest.\n *\n * Use when:\n * - task behavior should be declared with `caseOf` and `casesFromInputs`\n * - business agent code should be imported and run from eval task files\n */\nexport function describeTask(\n name: string,\n build: ((builder: DescribeTaskBuilder) => void) | (() => void),\n options: DescribeTaskOptions = {},\n) {\n const registeredCases: RegisteredCase<unknown>[] = []\n const builder = createCaseBuilder(registeredCases)\n withActiveCases(registeredCases, () => {\n if (build.length > 0) {\n (build as (builder: DescribeTaskBuilder) => void)(builder)\n return\n }\n\n ;(build as () => void)()\n })\n\n const description = options.description ?? name\n\n const definition = defineEval({\n description,\n name,\n task: defineTask({\n id: name,\n async run(context): Promise<TaskRunOutput> {\n if (registeredCases.length === 0) {\n return {\n scores: [{ kind: 'exact', score: 1 }],\n }\n }\n\n const totalCases = registeredCases.length\n\n const scoreBucketsByKind: Record<RunScoreKind, number[]> = {\n exact: [],\n judge: [],\n }\n\n await Promise.all(\n registeredCases.map(async (taskCase, index) => {\n emitCaseStart(context.reporterHooks, {\n index,\n name: taskCase.name,\n total: totalCases,\n })\n\n let state: 'passed' | 'failed' = 'passed'\n let errorMessage: string | undefined\n const caseId = createTaskCaseReporterId(index, taskCase.name)\n const customScoresByKind = new Map<RunScoreKind, number>()\n\n try {\n await taskCase.run({\n ...context,\n matrix: {\n ...cloneCaseMatrix(context.task.matrix),\n inputs: taskCase.input,\n },\n metric(name, value) {\n context.reporterHooks?.onEvent?.({\n caseId,\n data: {\n name,\n value,\n },\n event: 'task.case.metric',\n })\n },\n score(score, kind = 'exact') {\n assertValidScore(score)\n customScoresByKind.set(kind, score)\n },\n })\n }\n catch (error) {\n state = 'failed'\n errorMessage = errorMessageFrom(error) ?? 'Unknown case failure.'\n }\n finally {\n emitCaseEnd(context.reporterHooks, {\n ...(errorMessage == null ? {} : { errorMessage }),\n index,\n state,\n name: taskCase.name,\n total: totalCases,\n })\n }\n\n if (state === 'failed') {\n scoreBucketsByKind.exact.push(0)\n return\n }\n\n if (customScoresByKind.size === 0) {\n scoreBucketsByKind.exact.push(1)\n return\n }\n\n scoreBucketsByKind.exact.push(customScoresByKind.get('exact') ?? 1)\n const judgeScore = customScoresByKind.get('judge')\n if (judgeScore != null) {\n scoreBucketsByKind.judge.push(judgeScore)\n }\n }),\n )\n\n const scores = (Object.keys(scoreBucketsByKind) as RunScoreKind[])\n .filter(kind => scoreBucketsByKind[kind].length > 0)\n .map((kind) => {\n const values = scoreBucketsByKind[kind]\n const total = values.reduce((sum, value) => sum + value, 0)\n return {\n kind,\n score: total / values.length,\n }\n })\n\n return {\n scores,\n }\n },\n }),\n })\n\n registerEvalDefinition(definition)\n\n return definition\n}\n\n/**\n * Alias of `describeTask` for eval-centric naming.\n */\nexport const describeEval = describeTask\n"],"mappings":";;;;;;AAkDA,SAAS,gBAAgB,QAA4E;AACnG,QAAO;EACL,MAAM,EACJ,GAAG,OAAO,MACX;EACD,MAAM,EACJ,GAAG,OAAO,MACX;EACD,KAAK,EACH,GAAG,OAAO,KACX;EACF;;AAGH,SAAS,yBAAyB,OAAe,MAAsB;AACrE,QAAO,GAAG,MAAM,GAAG,mBAAmB,KAAK;;AAG7C,SAAS,iBAAiB,OAAqB;AAC7C,KAAI,CAAC,OAAO,SAAS,MAAM,IAAI,QAAQ,KAAK,QAAQ,EAClD,OAAM,IAAI,MAAM,0DAA0D,MAAM,IAAI;;AAIxF,SAAS,cACP,OACA,SAKM;AACN,KAAI;AACF,SAAO,cAAc,QAAQ;SAEzB;;AAKR,SAAS,YACP,OACA,SAOM;AACN,KAAI;AACF,SAAO,YAAY,QAAQ;SAEvB;;AAoCR,SAAS,kBAAkB,iBAAiE;CAG1F,SAAS,aACP,MACA,KACA,SACM;AACN,kBAAgB,KAAK;GACnB,OAAO,SAAS;GAChB;GACK;GACN,CAAC;;AAGJ,QAAO;EACL,QAAQ;EACR,gBAAgB,YAAY,QAAQ,KAAK;AACvC,UAAO,SAAS,OAAO,UAAU;AAC/B,oBAAgB,KAAK;KACnB;KACA,MAAM,GAAG,WAAW,IAAI,QAAQ;KAC3B;KACN,CAAC;KACF;;EAEL;;AAGH,IAAI,mBAAgD,EAAE;AAEtD,SAAS,gBAAmB,OAAkC,UAAsB;AAClF,oBAAmB,CAAC,GAAG,kBAAkB,MAAM;AAE/C,KAAI;AACF,SAAO,UAAU;WAEX;AACN,qBAAmB,iBAAiB,MAAM,GAAG,GAAG;;;AAIpD,SAAS,iBAA4C;CACnD,MAAM,SAAS,iBAAiB,GAAG,GAAG;AACtC,KAAI,UAAU,KACZ,OAAM,IAAI,MAAM,0EAA0E;AAG5F,QAAO;;AAiBT,SAAgB,OACd,MACA,KACA,SACM;AACN,iBAAgB,CAAC,KAAK;EACpB,OAAO,SAAS;EAChB;EACK;EACN,CAAC;;;;;AAMJ,SAAgB,gBACd,YACA,QACA,KACM;AACN,QAAO,SAAS,OAAO,UAAU;AAC/B,kBAAgB,CAAC,KAAK;GACpB;GACA,MAAM,GAAG,WAAW,IAAI,QAAQ;GAC3B;GACN,CAAC;GACF;;;;;;;;;AAUJ,SAAgB,aACd,MACA,OACA,UAA+B,EAAE,EACjC;CACA,MAAM,kBAA6C,EAAE;CACrD,MAAM,UAAU,kBAAkB,gBAAgB;AAClD,iBAAgB,uBAAuB;AACrC,MAAI,MAAM,SAAS,GAAG;AACnB,SAAiD,QAAQ;AAC1D;;AAGA,SAAsB;GACxB;CAIF,MAAM,aAAa,WAAW;EAC5B,aAHkB,QAAQ,eAAe;EAIzC;EACA,MAAM,WAAW;GACf,IAAI;GACJ,MAAM,IAAI,SAAiC;AACzC,QAAI,gBAAgB,WAAW,EAC7B,QAAO,EACL,QAAQ,CAAC;KAAE,MAAM;KAAS,OAAO;KAAG,CAAC,EACtC;IAGH,MAAM,aAAa,gBAAgB;IAEnC,MAAM,qBAAqD;KACzD,OAAO,EAAE;KACT,OAAO,EAAE;KACV;AAED,UAAM,QAAQ,IACZ,gBAAgB,IAAI,OAAO,UAAU,UAAU;AAC7C,mBAAc,QAAQ,eAAe;MACnC;MACA,MAAM,SAAS;MACf,OAAO;MACR,CAAC;KAEF,IAAI,QAA6B;KACjC,IAAI;KACJ,MAAM,SAAS,yBAAyB,OAAO,SAAS,KAAK;KAC7D,MAAM,qCAAqB,IAAI,KAA2B;AAE1D,SAAI;AACF,YAAM,SAAS,IAAI;OACjB,GAAG;OACH,QAAQ;QACN,GAAG,gBAAgB,QAAQ,KAAK,OAAO;QACvC,QAAQ,SAAS;QAClB;OACD,OAAO,MAAM,OAAO;AAClB,gBAAQ,eAAe,UAAU;SAC/B;SACA,MAAM;UACJ;UACA;UACD;SACD,OAAO;SACR,CAAC;;OAEJ,MAAM,OAAO,OAAO,SAAS;AAC3B,yBAAiB,MAAM;AACvB,2BAAmB,IAAI,MAAM,MAAM;;OAEtC,CAAC;cAEG,OAAO;AACZ,cAAQ;AACR,qBAAe,iBAAiB,MAAM,IAAI;eAEpC;AACN,kBAAY,QAAQ,eAAe;OACjC,GAAI,gBAAgB,OAAO,EAAE,GAAG,EAAE,cAAc;OAChD;OACA;OACA,MAAM,SAAS;OACf,OAAO;OACR,CAAC;;AAGJ,SAAI,UAAU,UAAU;AACtB,yBAAmB,MAAM,KAAK,EAAE;AAChC;;AAGF,SAAI,mBAAmB,SAAS,GAAG;AACjC,yBAAmB,MAAM,KAAK,EAAE;AAChC;;AAGF,wBAAmB,MAAM,KAAK,mBAAmB,IAAI,QAAQ,IAAI,EAAE;KACnE,MAAM,aAAa,mBAAmB,IAAI,QAAQ;AAClD,SAAI,cAAc,KAChB,oBAAmB,MAAM,KAAK,WAAW;MAE3C,CACH;AAaD,WAAO,EACL,QAZc,OAAO,KAAK,mBAAmB,CAC5C,QAAO,SAAQ,mBAAmB,MAAM,SAAS,EAAE,CACnD,KAAK,SAAS;KACb,MAAM,SAAS,mBAAmB;AAElC,YAAO;MACL;MACA,OAHY,OAAO,QAAQ,KAAK,UAAU,MAAM,OAAO,EAAE,GAG1C,OAAO;MACvB;MACD,EAIH;;GAEJ,CAAC;EACH,CAAC;AAEF,wBAAuB,WAAW;AAElC,QAAO;;;;;AAMT,MAAa,eAAe"}
1
+ {"version":3,"file":"index.mjs","names":[],"sources":["../src/dsl/task.ts"],"sourcesContent":["import type { TaskConcurrencyConfig, TaskExecutionPolicy, TaskRunContext, TaskRunOutput } from '../config'\nimport type { RunScoreKind } from '../core/runner'\n\nimport { errorMessageFrom } from '@moeru/std'\n\nimport { defineEval, defineTask } from '../config'\nimport { createSchedulerQueue } from '../core/scheduler/queue'\nimport { registerEvalDefinition } from './registry'\n\n/**\n * Runtime context provided to a task case callback.\n */\nexport interface CaseRunContext<TInput> extends TaskRunContext {\n /**\n * Case-scoped matrix payload.\n */\n matrix: TaskRunContext['task']['matrix'] & { inputs: TInput }\n /**\n * Overrides one case score family with a custom normalized value.\n *\n * Use when:\n * - one case computes a benchmark-native score that should flow into run aggregation\n *\n * Expects:\n * - `score` to stay in the `0..1` range\n */\n score: (score: number, kind?: RunScoreKind) => void\n /**\n * Emits one custom case metric into report events.\n *\n * Use when:\n * - tasks need structured benchmark metadata beyond exact/judge score families\n *\n * Expects:\n * - `name` to be a stable metric identifier\n * - `value` to be JSON-serializable\n */\n metric: (name: string, value: boolean | number | string | null) => void\n /**\n * Cooperative abort signal for the current case execution.\n */\n signal: AbortSignal\n}\n\n/**\n * Callback for one task case.\n */\nexport type CaseRunner<TInput> = (context: CaseRunContext<TInput>) => Promise<void> | void\n\ninterface RegisteredCase<TInput> {\n concurrency?: number\n executionPolicy?: TaskExecutionPolicy\n input: TInput\n name: string\n queueKey?: object\n run: CaseRunner<TInput>\n}\n\n/**\n * Per-group options for `casesFromInputs`.\n *\n * Use when:\n * - one generated case group should run with a lower case concurrency than the task default\n * - a task should keep a broader task-level cap while one expensive case family stays bounded\n *\n * Expects:\n * - `concurrency` to be a positive integer when provided\n *\n * Returns:\n * - one partial case-group execution descriptor\n */\nexport interface CasesFromInputsOptions extends TaskExecutionPolicy {\n /**\n * Case-level concurrency cap for cases registered by one `casesFromInputs(...)` call.\n */\n concurrency?: number\n}\n\n/**\n * Per-case registration options for `caseOf`.\n */\nexport interface CaseRegistrationOptions<TInput> extends TaskExecutionPolicy {\n /**\n * Optional case input payload.\n */\n input: TInput\n}\n\ninterface CaseExecutionOutcome {\n errorMessage?: string\n scoresByKind: Map<RunScoreKind, number>\n state: 'failed' | 'passed' | 'timeout'\n}\n\nfunction cloneCaseMatrix(matrix: TaskRunContext['task']['matrix']): TaskRunContext['task']['matrix'] {\n return {\n eval: {\n ...matrix.eval,\n },\n meta: {\n ...matrix.meta,\n },\n run: {\n ...matrix.run,\n },\n }\n}\n\nfunction createTaskCaseReporterId(index: number, name: string): string {\n return `${index}:${encodeURIComponent(name)}`\n}\n\nfunction assertValidScore(score: number): void {\n if (!Number.isFinite(score) || score < 0 || score > 1) {\n throw new Error(`Case score must be a finite number in range 0..1, got \"${score}\".`)\n }\n}\n\nfunction assertNonNegativeInteger(value: number, label: string): void {\n if (!Number.isFinite(value) || !Number.isInteger(value) || value < 0) {\n throw new Error(`Invalid ${label}: ${String(value)}`)\n }\n}\n\nfunction assertPositiveInteger(value: number, label: string): void {\n if (!Number.isFinite(value) || !Number.isInteger(value) || value <= 0) {\n throw new Error(`Invalid ${label}: ${String(value)}`)\n }\n}\n\nfunction emitCaseStart(\n hooks: TaskRunContext['reporterHooks'] | undefined,\n payload: {\n autoRetry?: number\n index: number\n name: string\n retryIndex?: number\n total: number\n },\n): void {\n try {\n hooks?.onCaseStart?.(payload)\n }\n catch {\n // Reporter hooks must never affect task scoring.\n }\n}\n\nfunction emitCaseEnd(\n hooks: TaskRunContext['reporterHooks'] | undefined,\n payload: {\n index: number\n state: 'passed' | 'failed' | 'timeout'\n name: string\n total: number\n errorMessage?: string\n },\n): void {\n try {\n hooks?.onCaseEnd?.(payload)\n }\n catch {\n // Reporter hooks must never affect task scoring.\n }\n}\n\nfunction createCaseTimeoutError(timeout: number): Error {\n const error = new Error(`Case timed out after ${timeout}ms.`)\n error.name = 'TimeoutError'\n return error\n}\n\nfunction normalizeExecutionPolicy(policy: TaskExecutionPolicy | undefined, label: string): TaskExecutionPolicy | undefined {\n if (policy == null) {\n return undefined\n }\n\n if (policy.autoAttempt != null) {\n assertNonNegativeInteger(policy.autoAttempt, `${label} autoAttempt`)\n }\n\n if (policy.autoRetry != null) {\n assertNonNegativeInteger(policy.autoRetry, `${label} autoRetry`)\n }\n\n if (policy.timeout != null) {\n assertPositiveInteger(policy.timeout, `${label} timeout`)\n }\n\n const normalized = {\n autoAttempt: policy.autoAttempt,\n autoRetry: policy.autoRetry,\n timeout: policy.timeout,\n }\n\n return Object.values(normalized).some(value => value != null)\n ? normalized\n : undefined\n}\n\nfunction resolveCaseExecutionPolicy(\n taskCase: RegisteredCase<unknown>,\n taskExecutionPolicy: TaskExecutionPolicy | undefined,\n): Required<Pick<TaskExecutionPolicy, 'autoAttempt' | 'autoRetry'>> & Pick<TaskExecutionPolicy, 'timeout'> {\n return {\n autoAttempt: taskCase.executionPolicy?.autoAttempt ?? taskExecutionPolicy?.autoAttempt ?? 0,\n autoRetry: taskCase.executionPolicy?.autoRetry ?? taskExecutionPolicy?.autoRetry ?? 0,\n timeout: taskCase.executionPolicy?.timeout ?? taskExecutionPolicy?.timeout,\n }\n}\n\nasync function runCaseOnce(\n context: TaskRunContext,\n taskCase: RegisteredCase<unknown>,\n index: number,\n timeout: number | undefined,\n): Promise<CaseExecutionOutcome> {\n const customScoresByKind = new Map<RunScoreKind, number>()\n const abortController = new AbortController()\n let timeoutHandle: ReturnType<typeof setTimeout> | undefined\n let timedOut = false\n let settled = false\n\n try {\n const runPromise = Promise.resolve(taskCase.run({\n ...context,\n matrix: {\n ...cloneCaseMatrix(context.task.matrix),\n inputs: taskCase.input,\n },\n metric(name, value) {\n if (abortController.signal.aborted || settled) {\n return\n }\n\n context.reporterHooks?.onEvent?.({\n caseId: createTaskCaseReporterId(index, taskCase.name),\n data: {\n name,\n value,\n },\n event: 'task.case.metric',\n })\n },\n score(score, kind = 'exact') {\n if (abortController.signal.aborted || settled) {\n return\n }\n\n assertValidScore(score)\n customScoresByKind.set(kind, score)\n },\n signal: abortController.signal,\n }))\n\n if (timeout != null) {\n const timeoutPromise = new Promise<never>((_, reject) => {\n timeoutHandle = setTimeout(() => {\n timedOut = true\n abortController.abort(createCaseTimeoutError(timeout))\n reject(createCaseTimeoutError(timeout))\n }, timeout)\n })\n\n await Promise.race([runPromise, timeoutPromise])\n }\n else {\n await runPromise\n }\n\n settled = true\n return {\n scoresByKind: customScoresByKind,\n state: 'passed',\n }\n }\n catch (error) {\n settled = true\n return {\n errorMessage: errorMessageFrom(error) ?? (timedOut && timeout != null ? `Case timed out after ${timeout}ms.` : 'Unknown case failure.'),\n scoresByKind: customScoresByKind,\n state: timedOut ? 'timeout' : 'failed',\n }\n }\n finally {\n if (timeoutHandle != null) {\n clearTimeout(timeoutHandle)\n }\n }\n}\n\nasync function executeRegisteredCase(\n context: TaskRunContext,\n taskCase: RegisteredCase<unknown>,\n index: number,\n totalCases: number,\n taskExecutionPolicy: TaskExecutionPolicy | undefined,\n): Promise<CaseExecutionOutcome> {\n const resolvedPolicy = resolveCaseExecutionPolicy(taskCase, taskExecutionPolicy)\n let lastOutcome: CaseExecutionOutcome | undefined\n\n for (let retryIndex = 0; retryIndex <= resolvedPolicy.autoRetry; retryIndex += 1) {\n emitCaseStart(context.reporterHooks, {\n ...(resolvedPolicy.autoRetry > 0\n ? {\n autoRetry: resolvedPolicy.autoRetry,\n retryIndex,\n }\n : {}),\n index,\n name: taskCase.name,\n total: totalCases,\n })\n lastOutcome = await runCaseOnce(context, taskCase, index, resolvedPolicy.timeout)\n if (lastOutcome.state === 'passed') {\n return lastOutcome\n }\n }\n\n return lastOutcome ?? {\n errorMessage: 'Unknown case failure.',\n scoresByKind: new Map(),\n state: 'failed',\n }\n}\n\nfunction collectCaseOutcomeScores(\n outcome: CaseExecutionOutcome,\n scoreBucketsByKind: Record<RunScoreKind, number[]>,\n): void {\n if (outcome.state !== 'passed') {\n scoreBucketsByKind.exact.push(0)\n return\n }\n\n if (outcome.scoresByKind.size === 0) {\n scoreBucketsByKind.exact.push(1)\n return\n }\n\n scoreBucketsByKind.exact.push(outcome.scoresByKind.get('exact') ?? 1)\n const judgeScore = outcome.scoresByKind.get('judge')\n if (judgeScore != null) {\n scoreBucketsByKind.judge.push(judgeScore)\n }\n}\n\n/**\n * Builder callbacks passed into `describeTask`.\n */\nexport interface DescribeTaskBuilder {\n /**\n * Registers one explicit case.\n */\n caseOf: {\n (name: string, run: CaseRunner<undefined>): void\n <TInput>(name: string, run: CaseRunner<TInput>, options: CaseRegistrationOptions<TInput>): void\n }\n /**\n * Registers multiple cases from input list.\n */\n casesFromInputs: <TInput>(\n namePrefix: string,\n inputs: readonly TInput[],\n run: CaseRunner<TInput>,\n options?: CasesFromInputsOptions,\n ) => void\n}\n\n/**\n * Options for `describeTask`.\n */\nexport interface DescribeTaskOptions extends TaskExecutionPolicy {\n /**\n * Optional description override.\n */\n description?: string\n /**\n * Optional task-local concurrency overrides.\n *\n * Use when:\n * - one task should cap attempt fan-out independently from the surrounding project\n * - one task should cap case fan-out without changing global scheduling defaults\n *\n * Expects:\n * - each provided value to be a positive integer\n *\n * @default inherited from project or CLI concurrency settings\n */\n concurrency?: TaskConcurrencyConfig\n}\n\nfunction createCaseBuilder(registeredCases: RegisteredCase<unknown>[]): DescribeTaskBuilder {\n function registerCase(name: string, run: CaseRunner<undefined>): void\n function registerCase<TInput>(name: string, run: CaseRunner<TInput>, options: CaseRegistrationOptions<TInput>): void\n function registerCase<TInput>(\n name: string,\n run: CaseRunner<TInput> | CaseRunner<undefined>,\n options?: CaseRegistrationOptions<TInput>,\n ): void {\n registeredCases.push({\n executionPolicy: normalizeExecutionPolicy(options, 'task case'),\n input: options?.input,\n name,\n run: run as CaseRunner<unknown>,\n })\n }\n\n return {\n caseOf: registerCase,\n casesFromInputs(namePrefix, inputs, run, options) {\n const queueKey = options?.concurrency == null ? undefined : {}\n\n inputs.forEach((input, index) => {\n registeredCases.push({\n concurrency: options?.concurrency,\n executionPolicy: normalizeExecutionPolicy(options, 'casesFromInputs'),\n input,\n name: `${namePrefix} #${index + 1}`,\n queueKey,\n run: run as CaseRunner<unknown>,\n })\n })\n },\n }\n}\n\nlet activeCasesStack: RegisteredCase<unknown>[][] = []\n\nfunction withActiveCases<T>(cases: RegisteredCase<unknown>[], callback: () => T): T {\n activeCasesStack = [...activeCasesStack, cases]\n\n try {\n return callback()\n }\n finally {\n activeCasesStack = activeCasesStack.slice(0, -1)\n }\n}\n\nfunction getActiveCases(): RegisteredCase<unknown>[] {\n const active = activeCasesStack.at(-1)\n if (active == null) {\n throw new Error('caseOf/casesFromInputs must be called inside describeTask/describeEval.')\n }\n\n return active\n}\n\n/**\n * Registers one case in the currently active task scope.\n */\nexport function caseOf(\n name: string,\n run: CaseRunner<undefined>,\n): void\n\nexport function caseOf<TInput>(\n name: string,\n run: CaseRunner<TInput>,\n options: CaseRegistrationOptions<TInput>,\n): void\n\nexport function caseOf<TInput>(\n name: string,\n run: CaseRunner<TInput> | CaseRunner<undefined>,\n options?: CaseRegistrationOptions<TInput>,\n): void {\n getActiveCases().push({\n executionPolicy: normalizeExecutionPolicy(options, 'task case'),\n input: options?.input,\n name,\n run: run as CaseRunner<unknown>,\n })\n}\n\n/**\n * Registers multiple cases in the currently active task scope.\n */\nexport function casesFromInputs<TInput>(\n namePrefix: string,\n inputs: readonly TInput[],\n run: CaseRunner<TInput>,\n options?: CasesFromInputsOptions,\n): void {\n const queueKey = options?.concurrency == null ? undefined : {}\n\n inputs.forEach((input, index) => {\n getActiveCases().push({\n concurrency: options?.concurrency,\n executionPolicy: normalizeExecutionPolicy(options, 'casesFromInputs'),\n input,\n name: `${namePrefix} #${index + 1}`,\n queueKey,\n run: run as CaseRunner<unknown>,\n })\n })\n}\n\n/**\n * Resolves the effective case concurrency for one registered task case.\n *\n * Before:\n * - registered case override `2`, task default `4`\n * - registered case override `undefined`, task default `3`\n *\n * After:\n * - `2`\n * - `3`\n */\nfunction resolveCaseConcurrency(\n taskCase: RegisteredCase<unknown>,\n taskConcurrency: TaskConcurrencyConfig | undefined,\n runtimeConcurrency: TaskConcurrencyConfig | undefined,\n): number | undefined {\n const concurrency = runtimeConcurrency?.case ?? taskCase.concurrency ?? taskConcurrency?.case\n if (concurrency == null) {\n return undefined\n }\n\n if (!Number.isFinite(concurrency) || !Number.isInteger(concurrency) || concurrency <= 0) {\n throw new Error(`Invalid task case concurrency: ${String(concurrency)}`)\n }\n\n return concurrency\n}\n\nfunction resolveCaseQueueKey(taskCase: RegisteredCase<unknown>, defaultQueueKey: object): object {\n return taskCase.queueKey ?? defaultQueueKey\n}\n\n/**\n * Defines one eval task with task/case semantics similar to Vitest.\n *\n * Use when:\n * - task behavior should be declared with `caseOf` and `casesFromInputs`\n * - business agent code should be imported and run from eval task files\n */\nexport function describeTask(\n name: string,\n build: ((builder: DescribeTaskBuilder) => void) | (() => void),\n options: DescribeTaskOptions = {},\n) {\n const registeredCases: RegisteredCase<unknown>[] = []\n const builder = createCaseBuilder(registeredCases)\n withActiveCases(registeredCases, () => {\n if (build.length > 0) {\n (build as (builder: DescribeTaskBuilder) => void)(builder)\n return\n }\n\n ;(build as () => void)()\n })\n\n const description = options.description ?? name\n const taskExecutionPolicy = normalizeExecutionPolicy(options, 'describeTask')\n\n const definition = defineEval({\n description,\n name,\n task: defineTask({\n concurrency: options.concurrency,\n executionPolicy: taskExecutionPolicy,\n id: name,\n async run(context): Promise<TaskRunOutput> {\n if (registeredCases.length === 0) {\n return {\n scores: [{ kind: 'exact', score: 1 }],\n }\n }\n\n const totalCases = registeredCases.length\n const scoreBucketsByKind: Record<RunScoreKind, number[]> = {\n exact: [],\n judge: [],\n }\n const defaultCaseQueueKey = {}\n const caseQueues = new Map<object, ReturnType<typeof createSchedulerQueue>>()\n const hasAutoAttempt = registeredCases.some(taskCase => resolveCaseExecutionPolicy(taskCase, taskExecutionPolicy).autoAttempt > 0)\n const runtimeTaskConcurrency = context.task.entry.task?.concurrency ?? options.concurrency\n\n if (!hasAutoAttempt) {\n await Promise.all(\n registeredCases.map(async (taskCase, index) => {\n const executeCase = async () => {\n const outcome = await executeRegisteredCase(context, taskCase, index, totalCases, taskExecutionPolicy)\n emitCaseEnd(context.reporterHooks, {\n ...(outcome.errorMessage == null ? {} : { errorMessage: outcome.errorMessage }),\n index,\n state: outcome.state,\n name: taskCase.name,\n total: totalCases,\n })\n collectCaseOutcomeScores(outcome, scoreBucketsByKind)\n }\n\n const concurrency = resolveCaseConcurrency(taskCase, runtimeTaskConcurrency, context.runtimeConcurrency)\n if (concurrency == null) {\n await executeCase()\n return\n }\n\n const queueKey = resolveCaseQueueKey(taskCase, defaultCaseQueueKey)\n const queue = caseQueues.get(queueKey) ?? createSchedulerQueue(concurrency)\n caseQueues.set(queueKey, queue)\n await queue.run(executeCase)\n }),\n )\n }\n else {\n let finalOutcomes: CaseExecutionOutcome[] = []\n let attemptIndex = 0\n\n for (;;) {\n finalOutcomes = await Promise.all(\n registeredCases.map(async (taskCase, index) => {\n const executeCase = async () => await executeRegisteredCase(context, taskCase, index, totalCases, taskExecutionPolicy)\n const concurrency = resolveCaseConcurrency(taskCase, runtimeTaskConcurrency, context.runtimeConcurrency)\n if (concurrency == null) {\n return await executeCase()\n }\n\n const queueKey = resolveCaseQueueKey(taskCase, defaultCaseQueueKey)\n const queue = caseQueues.get(queueKey) ?? createSchedulerQueue(concurrency)\n caseQueues.set(queueKey, queue)\n return await queue.run(executeCase)\n }),\n )\n\n const shouldContinue = finalOutcomes.some((outcome, index) => {\n if (outcome.state === 'passed') {\n return false\n }\n\n const taskCase = registeredCases[index]\n if (taskCase == null) {\n return false\n }\n\n return attemptIndex < resolveCaseExecutionPolicy(taskCase, taskExecutionPolicy).autoAttempt\n })\n\n if (!shouldContinue) {\n break\n }\n\n attemptIndex += 1\n }\n\n finalOutcomes.forEach((outcome, index) => {\n const taskCase = registeredCases[index]\n if (taskCase == null) {\n return\n }\n\n emitCaseEnd(context.reporterHooks, {\n ...(outcome.errorMessage == null ? {} : { errorMessage: outcome.errorMessage }),\n index,\n state: outcome.state,\n name: taskCase.name,\n total: totalCases,\n })\n collectCaseOutcomeScores(outcome, scoreBucketsByKind)\n })\n }\n\n const scores = (Object.keys(scoreBucketsByKind) as RunScoreKind[])\n .filter(kind => scoreBucketsByKind[kind].length > 0)\n .map((kind) => {\n const values = scoreBucketsByKind[kind]\n const total = values.reduce((sum, value) => sum + value, 0)\n return {\n kind,\n score: total / values.length,\n }\n })\n\n return {\n scores,\n }\n },\n }),\n })\n\n registerEvalDefinition(definition)\n\n return definition\n}\n\n/**\n * Alias of `describeTask` for eval-centric naming.\n */\nexport const describeEval = describeTask\n"],"mappings":";;;;;;;AA8FA,SAAS,gBAAgB,QAA4E;AACnG,QAAO;EACL,MAAM,EACJ,GAAG,OAAO,MACX;EACD,MAAM,EACJ,GAAG,OAAO,MACX;EACD,KAAK,EACH,GAAG,OAAO,KACX;EACF;;AAGH,SAAS,yBAAyB,OAAe,MAAsB;AACrE,QAAO,GAAG,MAAM,GAAG,mBAAmB,KAAK;;AAG7C,SAAS,iBAAiB,OAAqB;AAC7C,KAAI,CAAC,OAAO,SAAS,MAAM,IAAI,QAAQ,KAAK,QAAQ,EAClD,OAAM,IAAI,MAAM,0DAA0D,MAAM,IAAI;;AAIxF,SAAS,yBAAyB,OAAe,OAAqB;AACpE,KAAI,CAAC,OAAO,SAAS,MAAM,IAAI,CAAC,OAAO,UAAU,MAAM,IAAI,QAAQ,EACjE,OAAM,IAAI,MAAM,WAAW,MAAM,IAAI,OAAO,MAAM,GAAG;;AAIzD,SAAS,sBAAsB,OAAe,OAAqB;AACjE,KAAI,CAAC,OAAO,SAAS,MAAM,IAAI,CAAC,OAAO,UAAU,MAAM,IAAI,SAAS,EAClE,OAAM,IAAI,MAAM,WAAW,MAAM,IAAI,OAAO,MAAM,GAAG;;AAIzD,SAAS,cACP,OACA,SAOM;AACN,KAAI;AACF,SAAO,cAAc,QAAQ;SAEzB;;AAKR,SAAS,YACP,OACA,SAOM;AACN,KAAI;AACF,SAAO,YAAY,QAAQ;SAEvB;;AAKR,SAAS,uBAAuB,SAAwB;CACtD,MAAM,wBAAQ,IAAI,MAAM,wBAAwB,QAAQ,KAAK;AAC7D,OAAM,OAAO;AACb,QAAO;;AAGT,SAAS,yBAAyB,QAAyC,OAAgD;AACzH,KAAI,UAAU,KACZ;AAGF,KAAI,OAAO,eAAe,KACxB,0BAAyB,OAAO,aAAa,GAAG,MAAM,cAAc;AAGtE,KAAI,OAAO,aAAa,KACtB,0BAAyB,OAAO,WAAW,GAAG,MAAM,YAAY;AAGlE,KAAI,OAAO,WAAW,KACpB,uBAAsB,OAAO,SAAS,GAAG,MAAM,UAAU;CAG3D,MAAM,aAAa;EACjB,aAAa,OAAO;EACpB,WAAW,OAAO;EAClB,SAAS,OAAO;EACjB;AAED,QAAO,OAAO,OAAO,WAAW,CAAC,MAAK,UAAS,SAAS,KAAK,GACzD,aACA,KAAA;;AAGN,SAAS,2BACP,UACA,qBACyG;AACzG,QAAO;EACL,aAAa,SAAS,iBAAiB,eAAe,qBAAqB,eAAe;EAC1F,WAAW,SAAS,iBAAiB,aAAa,qBAAqB,aAAa;EACpF,SAAS,SAAS,iBAAiB,WAAW,qBAAqB;EACpE;;AAGH,eAAe,YACb,SACA,UACA,OACA,SAC+B;CAC/B,MAAM,qCAAqB,IAAI,KAA2B;CAC1D,MAAM,kBAAkB,IAAI,iBAAiB;CAC7C,IAAI;CACJ,IAAI,WAAW;CACf,IAAI,UAAU;AAEd,KAAI;EACF,MAAM,aAAa,QAAQ,QAAQ,SAAS,IAAI;GAC9C,GAAG;GACH,QAAQ;IACN,GAAG,gBAAgB,QAAQ,KAAK,OAAO;IACvC,QAAQ,SAAS;IAClB;GACD,OAAO,MAAM,OAAO;AAClB,QAAI,gBAAgB,OAAO,WAAW,QACpC;AAGF,YAAQ,eAAe,UAAU;KAC/B,QAAQ,yBAAyB,OAAO,SAAS,KAAK;KACtD,MAAM;MACJ;MACA;MACD;KACD,OAAO;KACR,CAAC;;GAEJ,MAAM,OAAO,OAAO,SAAS;AAC3B,QAAI,gBAAgB,OAAO,WAAW,QACpC;AAGF,qBAAiB,MAAM;AACvB,uBAAmB,IAAI,MAAM,MAAM;;GAErC,QAAQ,gBAAgB;GACzB,CAAC,CAAC;AAEH,MAAI,WAAW,MAAM;GACnB,MAAM,iBAAiB,IAAI,SAAgB,GAAG,WAAW;AACvD,oBAAgB,iBAAiB;AAC/B,gBAAW;AACX,qBAAgB,MAAM,uBAAuB,QAAQ,CAAC;AACtD,YAAO,uBAAuB,QAAQ,CAAC;OACtC,QAAQ;KACX;AAEF,SAAM,QAAQ,KAAK,CAAC,YAAY,eAAe,CAAC;QAGhD,OAAM;AAGR,YAAU;AACV,SAAO;GACL,cAAc;GACd,OAAO;GACR;UAEI,OAAO;AACZ,YAAU;AACV,SAAO;GACL,cAAc,iBAAiB,MAAM,KAAK,YAAY,WAAW,OAAO,wBAAwB,QAAQ,OAAO;GAC/G,cAAc;GACd,OAAO,WAAW,YAAY;GAC/B;WAEK;AACN,MAAI,iBAAiB,KACnB,cAAa,cAAc;;;AAKjC,eAAe,sBACb,SACA,UACA,OACA,YACA,qBAC+B;CAC/B,MAAM,iBAAiB,2BAA2B,UAAU,oBAAoB;CAChF,IAAI;AAEJ,MAAK,IAAI,aAAa,GAAG,cAAc,eAAe,WAAW,cAAc,GAAG;AAChF,gBAAc,QAAQ,eAAe;GACnC,GAAI,eAAe,YAAY,IAC3B;IACE,WAAW,eAAe;IAC1B;IACD,GACD,EAAE;GACN;GACA,MAAM,SAAS;GACf,OAAO;GACR,CAAC;AACF,gBAAc,MAAM,YAAY,SAAS,UAAU,OAAO,eAAe,QAAQ;AACjF,MAAI,YAAY,UAAU,SACxB,QAAO;;AAIX,QAAO,eAAe;EACpB,cAAc;EACd,8BAAc,IAAI,KAAK;EACvB,OAAO;EACR;;AAGH,SAAS,yBACP,SACA,oBACM;AACN,KAAI,QAAQ,UAAU,UAAU;AAC9B,qBAAmB,MAAM,KAAK,EAAE;AAChC;;AAGF,KAAI,QAAQ,aAAa,SAAS,GAAG;AACnC,qBAAmB,MAAM,KAAK,EAAE;AAChC;;AAGF,oBAAmB,MAAM,KAAK,QAAQ,aAAa,IAAI,QAAQ,IAAI,EAAE;CACrE,MAAM,aAAa,QAAQ,aAAa,IAAI,QAAQ;AACpD,KAAI,cAAc,KAChB,oBAAmB,MAAM,KAAK,WAAW;;AAiD7C,SAAS,kBAAkB,iBAAiE;CAG1F,SAAS,aACP,MACA,KACA,SACM;AACN,kBAAgB,KAAK;GACnB,iBAAiB,yBAAyB,SAAS,YAAY;GAC/D,OAAO,SAAS;GAChB;GACK;GACN,CAAC;;AAGJ,QAAO;EACL,QAAQ;EACR,gBAAgB,YAAY,QAAQ,KAAK,SAAS;GAChD,MAAM,WAAW,SAAS,eAAe,OAAO,KAAA,IAAY,EAAE;AAE9D,UAAO,SAAS,OAAO,UAAU;AAC/B,oBAAgB,KAAK;KACnB,aAAa,SAAS;KACtB,iBAAiB,yBAAyB,SAAS,kBAAkB;KACrE;KACA,MAAM,GAAG,WAAW,IAAI,QAAQ;KAChC;KACK;KACN,CAAC;KACF;;EAEL;;AAGH,IAAI,mBAAgD,EAAE;AAEtD,SAAS,gBAAmB,OAAkC,UAAsB;AAClF,oBAAmB,CAAC,GAAG,kBAAkB,MAAM;AAE/C,KAAI;AACF,SAAO,UAAU;WAEX;AACN,qBAAmB,iBAAiB,MAAM,GAAG,GAAG;;;AAIpD,SAAS,iBAA4C;CACnD,MAAM,SAAS,iBAAiB,GAAG,GAAG;AACtC,KAAI,UAAU,KACZ,OAAM,IAAI,MAAM,0EAA0E;AAG5F,QAAO;;AAiBT,SAAgB,OACd,MACA,KACA,SACM;AACN,iBAAgB,CAAC,KAAK;EACpB,iBAAiB,yBAAyB,SAAS,YAAY;EAC/D,OAAO,SAAS;EAChB;EACK;EACN,CAAC;;;;;AAMJ,SAAgB,gBACd,YACA,QACA,KACA,SACM;CACN,MAAM,WAAW,SAAS,eAAe,OAAO,KAAA,IAAY,EAAE;AAE9D,QAAO,SAAS,OAAO,UAAU;AAC/B,kBAAgB,CAAC,KAAK;GACpB,aAAa,SAAS;GACtB,iBAAiB,yBAAyB,SAAS,kBAAkB;GACrE;GACA,MAAM,GAAG,WAAW,IAAI,QAAQ;GAChC;GACK;GACN,CAAC;GACF;;;;;;;;;;;;;AAcJ,SAAS,uBACP,UACA,iBACA,oBACoB;CACpB,MAAM,cAAc,oBAAoB,QAAQ,SAAS,eAAe,iBAAiB;AACzF,KAAI,eAAe,KACjB;AAGF,KAAI,CAAC,OAAO,SAAS,YAAY,IAAI,CAAC,OAAO,UAAU,YAAY,IAAI,eAAe,EACpF,OAAM,IAAI,MAAM,kCAAkC,OAAO,YAAY,GAAG;AAG1E,QAAO;;AAGT,SAAS,oBAAoB,UAAmC,iBAAiC;AAC/F,QAAO,SAAS,YAAY;;;;;;;;;AAU9B,SAAgB,aACd,MACA,OACA,UAA+B,EAAE,EACjC;CACA,MAAM,kBAA6C,EAAE;CACrD,MAAM,UAAU,kBAAkB,gBAAgB;AAClD,iBAAgB,uBAAuB;AACrC,MAAI,MAAM,SAAS,GAAG;AACnB,SAAiD,QAAQ;AAC1D;;AAGA,SAAsB;GACxB;CAEF,MAAM,cAAc,QAAQ,eAAe;CAC3C,MAAM,sBAAsB,yBAAyB,SAAS,eAAe;CAE7E,MAAM,aAAa,WAAW;EAC5B;EACA;EACA,MAAM,WAAW;GACf,aAAa,QAAQ;GACrB,iBAAiB;GACjB,IAAI;GACJ,MAAM,IAAI,SAAiC;AACzC,QAAI,gBAAgB,WAAW,EAC7B,QAAO,EACL,QAAQ,CAAC;KAAE,MAAM;KAAS,OAAO;KAAG,CAAC,EACtC;IAGH,MAAM,aAAa,gBAAgB;IACnC,MAAM,qBAAqD;KACzD,OAAO,EAAE;KACT,OAAO,EAAE;KACV;IACD,MAAM,sBAAsB,EAAE;IAC9B,MAAM,6BAAa,IAAI,KAAsD;IAC7E,MAAM,iBAAiB,gBAAgB,MAAK,aAAY,2BAA2B,UAAU,oBAAoB,CAAC,cAAc,EAAE;IAClI,MAAM,yBAAyB,QAAQ,KAAK,MAAM,MAAM,eAAe,QAAQ;AAE/E,QAAI,CAAC,eACH,OAAM,QAAQ,IACZ,gBAAgB,IAAI,OAAO,UAAU,UAAU;KAC7C,MAAM,cAAc,YAAY;MAC9B,MAAM,UAAU,MAAM,sBAAsB,SAAS,UAAU,OAAO,YAAY,oBAAoB;AACtG,kBAAY,QAAQ,eAAe;OACjC,GAAI,QAAQ,gBAAgB,OAAO,EAAE,GAAG,EAAE,cAAc,QAAQ,cAAc;OAC9E;OACA,OAAO,QAAQ;OACf,MAAM,SAAS;OACf,OAAO;OACR,CAAC;AACF,+BAAyB,SAAS,mBAAmB;;KAGvD,MAAM,cAAc,uBAAuB,UAAU,wBAAwB,QAAQ,mBAAmB;AACxG,SAAI,eAAe,MAAM;AACvB,YAAM,aAAa;AACnB;;KAGF,MAAM,WAAW,oBAAoB,UAAU,oBAAoB;KACnE,MAAM,QAAQ,WAAW,IAAI,SAAS,IAAI,qBAAqB,YAAY;AAC3E,gBAAW,IAAI,UAAU,MAAM;AAC/B,WAAM,MAAM,IAAI,YAAY;MAC5B,CACH;SAEE;KACH,IAAI,gBAAwC,EAAE;KAC9C,IAAI,eAAe;AAEnB,cAAS;AACP,sBAAgB,MAAM,QAAQ,IAC5B,gBAAgB,IAAI,OAAO,UAAU,UAAU;OAC7C,MAAM,cAAc,YAAY,MAAM,sBAAsB,SAAS,UAAU,OAAO,YAAY,oBAAoB;OACtH,MAAM,cAAc,uBAAuB,UAAU,wBAAwB,QAAQ,mBAAmB;AACxG,WAAI,eAAe,KACjB,QAAO,MAAM,aAAa;OAG5B,MAAM,WAAW,oBAAoB,UAAU,oBAAoB;OACnE,MAAM,QAAQ,WAAW,IAAI,SAAS,IAAI,qBAAqB,YAAY;AAC3E,kBAAW,IAAI,UAAU,MAAM;AAC/B,cAAO,MAAM,MAAM,IAAI,YAAY;QACnC,CACH;AAeD,UAAI,CAbmB,cAAc,MAAM,SAAS,UAAU;AAC5D,WAAI,QAAQ,UAAU,SACpB,QAAO;OAGT,MAAM,WAAW,gBAAgB;AACjC,WAAI,YAAY,KACd,QAAO;AAGT,cAAO,eAAe,2BAA2B,UAAU,oBAAoB,CAAC;QAChF,CAGA;AAGF,sBAAgB;;AAGlB,mBAAc,SAAS,SAAS,UAAU;MACxC,MAAM,WAAW,gBAAgB;AACjC,UAAI,YAAY,KACd;AAGF,kBAAY,QAAQ,eAAe;OACjC,GAAI,QAAQ,gBAAgB,OAAO,EAAE,GAAG,EAAE,cAAc,QAAQ,cAAc;OAC9E;OACA,OAAO,QAAQ;OACf,MAAM,SAAS;OACf,OAAO;OACR,CAAC;AACF,+BAAyB,SAAS,mBAAmB;OACrD;;AAcJ,WAAO,EACL,QAZc,OAAO,KAAK,mBAAmB,CAC5C,QAAO,SAAQ,mBAAmB,MAAM,SAAS,EAAE,CACnD,KAAK,SAAS;KACb,MAAM,SAAS,mBAAmB;AAElC,YAAO;MACL;MACA,OAHY,OAAO,QAAQ,KAAK,UAAU,MAAM,OAAO,EAAE,GAG1C,OAAO;MACvB;MACD,EAIH;;GAEJ,CAAC;EACH,CAAC;AAEF,wBAAuB,WAAW;AAElC,QAAO;;;;;AAMT,MAAa,eAAe"}
@@ -11,4 +11,4 @@ function resolveModelByName(models, name) {
11
11
  //#endregion
12
12
  export { resolveModelByName as t };
13
13
 
14
- //# sourceMappingURL=models-D_MsBtYw.mjs.map
14
+ //# sourceMappingURL=models-DIGdOUpJ.mjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"models-DIGdOUpJ.mjs","names":[],"sources":["../src/config/models.ts"],"sourcesContent":["import type { TaskExecutionPolicy } from './types'\n\n/**\n * Canonical model definition consumed by vieval runtime and config.\n *\n * Use when:\n * - declaring models in `vieval.config.*`\n * - resolving task runtime models by id, alias, or concrete model name\n *\n * Expects:\n * - `id` to be stable and unique within one config\n * - `inferenceExecutorId` to match scheduler/executor identifiers\n *\n * Returns:\n * - one normalized model registration record\n */\nexport interface ModelDefinition {\n /**\n * Stable model id.\n */\n id: string\n /**\n * Inference-executor id used for matching and reporting.\n */\n inferenceExecutorId: string\n /**\n * Executor reference passed through config.\n *\n * `vieval` core treats this as opaque runtime metadata. Builder plugins can\n * narrow this field with plugin-specific executor input types.\n */\n inferenceExecutor: unknown\n /**\n * Concrete model name passed to the inference executor.\n */\n model: string\n /**\n * Alias names that can resolve this model.\n */\n aliases: string[]\n /**\n * Optional execution policy hints attached to this model.\n */\n executionPolicy?: TaskExecutionPolicy\n /**\n * Optional model-level call parameters.\n */\n parameters?: Record<string, unknown>\n}\n\n/**\n * Resolves one model by id, model name, or alias in registration order.\n *\n * Returns:\n * - the first matching model, or `undefined` when no match exists\n */\nexport function resolveModelByName(\n models: readonly ModelDefinition[],\n name: string,\n): ModelDefinition | undefined {\n return models.find(model => model.id === name || model.model === name || model.aliases.includes(name))\n}\n"],"mappings":";;;;;;;AAwDA,SAAgB,mBACd,QACA,MAC6B;AAC7B,QAAO,OAAO,MAAK,UAAS,MAAM,OAAO,QAAQ,MAAM,UAAU,QAAQ,MAAM,QAAQ,SAAS,KAAK,CAAC"}
@@ -1,4 +1,4 @@
1
- import { M as ModelDefinition, l as MatrixDefinition, t as ConfigHookPlugin, x as TaskRunContext } from "../../index-OEdqjQSe.mjs";
1
+ import { C as TaskRunContext, P as ModelDefinition, b as TaskExecutionPolicy, l as MatrixDefinition, t as ConfigHookPlugin } from "../../index-DBZKkpBe.mjs";
2
2
 
3
3
  //#region src/plugins/chat-models/runtime-config.d.ts
4
4
  /**
@@ -382,6 +382,26 @@ interface ChatModelFromBaseOptions {
382
382
  * Alias names used by `resolveModelByName`.
383
383
  */
384
384
  aliases?: string[];
385
+ /**
386
+ * Optional execution policy hints attached to this model.
387
+ */
388
+ executionPolicy?: TaskExecutionPolicy;
389
+ /**
390
+ * Additional retries allowed within the current attempt.
391
+ *
392
+ * @default 0
393
+ */
394
+ autoRetry?: number;
395
+ /**
396
+ * Additional full task attempts allowed after the current attempt settles.
397
+ *
398
+ * @default 0
399
+ */
400
+ autoAttempt?: number;
401
+ /**
402
+ * Timeout in milliseconds for model-backed work.
403
+ */
404
+ timeout?: number;
385
405
  /**
386
406
  * Optional model-level call parameters.
387
407
  */
@@ -1,4 +1,4 @@
1
- import { n as requiredEnvFrom, t as envFrom } from "../../env-BFSjny07.mjs";
1
+ import { n as requiredEnvFrom, t as envFrom } from "../../env--94B0UtW.mjs";
2
2
  import process from "node:process";
3
3
  import { errorMessageFrom } from "@moeru/std";
4
4
  //#region src/plugins/chat-models/runtime-config.ts
@@ -288,6 +288,31 @@ function emitChatModelErrorTelemetry(context, options) {
288
288
  }
289
289
  //#endregion
290
290
  //#region src/plugins/chat-models/index.ts
291
+ function normalizeExecutionPolicy(policy) {
292
+ if (policy == null) return;
293
+ const normalized = {
294
+ autoAttempt: policy.autoAttempt,
295
+ autoRetry: policy.autoRetry,
296
+ timeout: policy.timeout
297
+ };
298
+ return Object.values(normalized).some((value) => value != null) ? normalized : void 0;
299
+ }
300
+ function hasJudgeAlias(model) {
301
+ return [
302
+ ...model.aliases ?? [],
303
+ ...model.id == null ? [] : [model.id],
304
+ model.model
305
+ ].some((value) => value.toLowerCase().includes("judge"));
306
+ }
307
+ function resolveModelExecutionPolicy(options) {
308
+ const explicitPolicy = normalizeExecutionPolicy({
309
+ autoAttempt: options.autoAttempt ?? options.executionPolicy?.autoAttempt,
310
+ autoRetry: options.autoRetry ?? options.executionPolicy?.autoRetry,
311
+ timeout: options.timeout ?? options.executionPolicy?.timeout
312
+ });
313
+ if (explicitPolicy != null && Object.keys(explicitPolicy).length > 0) return explicitPolicy;
314
+ if (hasJudgeAlias(options)) return { autoRetry: 3 };
315
+ }
291
316
  function normalizeInferenceExecutorId(inferenceExecutor, inferenceExecutorId) {
292
317
  if (typeof inferenceExecutor === "string") return inferenceExecutor;
293
318
  return inferenceExecutorId ?? "custom";
@@ -424,6 +449,7 @@ function chatModelFrom(options) {
424
449
  } : void 0;
425
450
  return {
426
451
  aliases: options.aliases ?? [],
452
+ executionPolicy: resolveModelExecutionPolicy(options),
427
453
  id: options.id ?? createDefaultModelId(inferenceExecutorId, options.model),
428
454
  inferenceExecutor: fallbackInferenceExecutor,
429
455
  inferenceExecutorId,