vieval 0.0.6 → 0.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/config.d.mts CHANGED
@@ -1,2 +1,2 @@
1
- import { C as TaskRunContext, F as resolveModelByName, P as ModelDefinition, S as TaskReporterHooks, _ as TaskCaseState, a as EvalDefinition, b as TaskExecutionPolicy, c as MatrixAxisValues, d as MatrixPrimitive, f as MatrixRow, g as TaskCaseReporterPayload, h as TaskCaseReporterEndPayload, i as CollectedEvalEntry, l as MatrixDefinition, m as ScopedMatrices, n as defineEval, o as EvalModule, p as MatrixValue, r as defineTask, s as EvalModuleMap, t as ConfigHookPlugin, u as MatrixLayer, v as TaskConcurrencyConfig, w as TaskRunOutput, x as TaskReporterEventPayload, y as TaskDefinition } from "./index-DBZKkpBe.mjs";
2
- export { CollectedEvalEntry, ConfigHookPlugin, EvalDefinition, EvalModule, EvalModuleMap, MatrixAxisValues, MatrixDefinition, MatrixLayer, MatrixPrimitive, MatrixRow, MatrixValue, ModelDefinition, ScopedMatrices, TaskCaseReporterEndPayload, TaskCaseReporterPayload, TaskCaseState, TaskConcurrencyConfig, TaskDefinition, TaskExecutionPolicy, TaskReporterEventPayload, TaskReporterHooks, TaskRunContext, TaskRunOutput, defineEval, defineTask, resolveModelByName };
1
+ import { B as resolveModelByName, C as TaskDefinition, D as TaskRunContext, E as TaskReporterHooks, O as TaskRunOutput, S as TaskConcurrencyConfig, T as TaskReporterEventPayload, _ as ScopedMatrices, a as CliOpenTelemetryReportingConfig, b as TaskCaseReporterPayload, c as EvalDefinition, d as MatrixAxisValues, f as MatrixDefinition, g as MatrixValue, h as MatrixRow, i as Awaitable, l as EvalModule, m as MatrixPrimitive, n as defineEval, o as CliReportingConfig, p as MatrixLayer, r as defineTask, s as CollectedEvalEntry, t as ConfigHookPlugin, u as EvalModuleMap, v as TaskAutoRetryDelay, w as TaskExecutionPolicy, x as TaskCaseState, y as TaskCaseReporterEndPayload, z as ModelDefinition } from "./index-5R1_k2nv.mjs";
2
+ export { Awaitable, CliOpenTelemetryReportingConfig, CliReportingConfig, CollectedEvalEntry, ConfigHookPlugin, EvalDefinition, EvalModule, EvalModuleMap, MatrixAxisValues, MatrixDefinition, MatrixLayer, MatrixPrimitive, MatrixRow, MatrixValue, ModelDefinition, ScopedMatrices, TaskAutoRetryDelay, TaskCaseReporterEndPayload, TaskCaseReporterPayload, TaskCaseState, TaskConcurrencyConfig, TaskDefinition, TaskExecutionPolicy, TaskReporterEventPayload, TaskReporterHooks, TaskRunContext, TaskRunOutput, defineEval, defineTask, resolveModelByName };
@@ -1,4 +1,4 @@
1
- import { G as RunScore, K as RunScoreKind } from "../../index-DBZKkpBe.mjs";
1
+ import { X as RunScore, Z as RunScoreKind } from "../../index-5R1_k2nv.mjs";
2
2
 
3
3
  //#region src/core/assertions/index.d.ts
4
4
  /**
@@ -1,4 +1,4 @@
1
- import { H as AggregatedRunResults } from "../../../index-DBZKkpBe.mjs";
1
+ import { q as AggregatedRunResults } from "../../../index-5R1_k2nv.mjs";
2
2
 
3
3
  //#region src/core/processors/results/policies/hybrid-threshold.d.ts
4
4
  /**
@@ -1,3 +1,3 @@
1
- import { $ as ScheduledTask, A as CreateTaskExecutionContextOptions, B as createRunnerRuntimeContext, D as RunnerTaskState, E as RunnerExecutionError, G as RunScore, H as AggregatedRunResults, I as asProjectRelativePath, J as CreateRunnerScheduleOptions, K as RunScoreKind, L as collectEvalEntries, M as TaskModelSelectionOptions, N as createTaskExecutionContext, O as ScheduledTaskExecutor, Q as RunnerMatrixSelection, R as CreateVievalRunnerRuntimeContextOptions, T as RunScheduledTasksOptions, U as AggregatedRunSummary, V as AggregatedProviderSummary, W as RunResult, X as RunnerMatrixDefinition, Y as InferenceExecutor, Z as RunnerMatrixInput, at as normalizeCacheFilePathSegments, ct as CacheNamespace, et as ScheduledTaskMatrix, it as createFilesystemTaskCacheRuntime, j as TaskExecutionContext, k as runScheduledTasks, lt as TaskCacheRuntime, nt as createRunnerSchedule, ot as CacheFileHandle, q as aggregateRunResults, rt as CreateFilesystemTaskCacheRuntimeOptions, st as CacheFileOptions, tt as ScheduledTaskMatrixMeta, z as RunnerRuntimeContext } from "../../index-DBZKkpBe.mjs";
1
+ import { $ as CreateRunnerScheduleOptions, A as RunScheduledTasksOptions, F as CreateTaskExecutionContextOptions, G as createRunnerRuntimeContext, H as collectEvalEntries, I as TaskExecutionContext, J as AggregatedRunSummary, K as AggregatedProviderSummary, L as TaskModelSelectionOptions, M as RunnerTaskState, N as ScheduledTaskExecutor, P as runScheduledTasks, Q as aggregateRunResults, R as createTaskExecutionContext, U as CreateVievalRunnerRuntimeContextOptions, V as asProjectRelativePath, W as RunnerRuntimeContext, X as RunScore, Y as RunResult, Z as RunScoreKind, at as ScheduledTaskMatrix, ct as CreateFilesystemTaskCacheRuntimeOptions, dt as CacheFileHandle, et as InferenceExecutor, ft as CacheFileOptions, it as ScheduledTask, j as RunnerExecutionError, lt as createFilesystemTaskCacheRuntime, mt as TaskCacheRuntime, nt as RunnerMatrixInput, ot as ScheduledTaskMatrixMeta, pt as CacheNamespace, q as AggregatedRunResults, rt as RunnerMatrixSelection, st as createRunnerSchedule, tt as RunnerMatrixDefinition, ut as normalizeCacheFilePathSegments } from "../../index-5R1_k2nv.mjs";
2
2
  import { a as SchedulerMiddleware, c as SchedulerScopeContext, i as SchedulerConcurrencyConfig, n as getActiveScopes, o as SchedulerRuntime, r as CreateSchedulerRuntimeOptions, s as SchedulerScope, t as createSchedulerRuntime } from "../../index-fakXoZEe.mjs";
3
3
  export { AggregatedProviderSummary, AggregatedRunResults, AggregatedRunSummary, CacheFileHandle, CacheFileOptions, CacheNamespace, CreateFilesystemTaskCacheRuntimeOptions, CreateRunnerScheduleOptions, CreateSchedulerRuntimeOptions, CreateTaskExecutionContextOptions, CreateVievalRunnerRuntimeContextOptions, InferenceExecutor, RunResult, RunScheduledTasksOptions, RunScore, RunScoreKind, RunnerExecutionError, RunnerMatrixDefinition, RunnerMatrixInput, RunnerMatrixSelection, RunnerRuntimeContext, RunnerTaskState, ScheduledTask, ScheduledTaskExecutor, ScheduledTaskMatrix, ScheduledTaskMatrixMeta, SchedulerConcurrencyConfig, SchedulerMiddleware, SchedulerRuntime, SchedulerScope, SchedulerScopeContext, TaskCacheRuntime, TaskExecutionContext, TaskModelSelectionOptions, aggregateRunResults, asProjectRelativePath, collectEvalEntries, createFilesystemTaskCacheRuntime, createRunnerRuntimeContext, createRunnerSchedule, createSchedulerRuntime, createTaskExecutionContext, getActiveScopes, normalizeCacheFilePathSegments, runScheduledTasks };
@@ -646,7 +646,39 @@ declare class RunnerExecutionError extends Error {
646
646
  */
647
647
  declare function runScheduledTasks(tasks: readonly ScheduledTask[], executor: ScheduledTaskExecutor, options?: RunScheduledTasksOptions): Promise<AggregatedRunResults>;
648
648
  //#endregion
649
+ //#region src/core/telemetry/types.d.ts
650
+ /** JSON-compatible scalar values accepted as telemetry attributes. */
651
+ type TelemetryAttributeValue = boolean | number | string | null | readonly TelemetryAttributeValue[];
652
+ /** Attribute map shared by local report projection and OpenTelemetry span calls. */
653
+ type TelemetryAttributes = Record<string, TelemetryAttributeValue | undefined>;
654
+ /**
655
+ * Internal Vieval telemetry runtime.
656
+ *
657
+ * Use when:
658
+ * - runner code needs one execution path for disabled and enabled telemetry
659
+ * - case code should run inside an active OpenTelemetry span when configured
660
+ *
661
+ * Expects:
662
+ * - attributes are JSON-compatible and stable enough for report filtering
663
+ * - callbacks are awaited by the caller
664
+ *
665
+ * Returns:
666
+ * - callback result, preserving thrown errors after telemetry records them
667
+ */
668
+ interface TelemetryRuntime {
669
+ withSpan: <T>(name: string, attributes: TelemetryAttributes, callback: () => Promise<T>) => Promise<T>;
670
+ addEvent: (name: string, attributes?: TelemetryAttributes) => void;
671
+ setAttributes: (attributes: TelemetryAttributes) => void;
672
+ recordException: (error: unknown) => void;
673
+ }
674
+ //#endregion
649
675
  //#region src/config/types.d.ts
676
+ /**
677
+ * Value that can be returned directly or through a promise.
678
+ *
679
+ * @param T - Resolved value type.
680
+ */
681
+ type Awaitable<T> = Promise<T> | T;
650
682
  /**
651
683
  * Primitive value allowed in one matrix cell.
652
684
  *
@@ -862,6 +894,12 @@ interface TaskRunOutput {
862
894
  */
863
895
  scores: readonly RunScore[];
864
896
  }
897
+ /**
898
+ * Delay policy for retries within one task case attempt.
899
+ *
900
+ * @param retryIndex Retry number where `1` is the first retry after the initial failure.
901
+ */
902
+ type TaskAutoRetryDelay = number | ((retryIndex: number) => number);
865
903
  /**
866
904
  * Execution policy applied to task and case callbacks.
867
905
  *
@@ -883,6 +921,15 @@ interface TaskExecutionPolicy {
883
921
  * @default 0
884
922
  */
885
923
  autoRetry?: number;
924
+ /**
925
+ * Delay in milliseconds before a case auto retry starts.
926
+ *
927
+ * A number applies the same delay to every retry. A function receives the
928
+ * retry index where `1` is the first retry after the initial failure.
929
+ *
930
+ * @default retryIndex => 500 * 2 ** (retryIndex - 1)
931
+ */
932
+ autoRetryDelay?: TaskAutoRetryDelay;
886
933
  /**
887
934
  * Additional full task attempts allowed after the current attempt settles.
888
935
  *
@@ -917,6 +964,30 @@ interface TaskConcurrencyConfig {
917
964
  */
918
965
  case?: number;
919
966
  }
967
+ /**
968
+ * Reporting configuration for local artifacts and optional OpenTelemetry integration.
969
+ */
970
+ interface CliReportingConfig {
971
+ /**
972
+ * Optional OpenTelemetry API integration.
973
+ */
974
+ openTelemetry?: CliOpenTelemetryReportingConfig;
975
+ }
976
+ /**
977
+ * OpenTelemetry reporting configuration managed by user config setup.
978
+ */
979
+ interface CliOpenTelemetryReportingConfig {
980
+ /**
981
+ * Enables Vieval active span wrapping through `@opentelemetry/api`.
982
+ *
983
+ * @default false
984
+ */
985
+ enabled?: boolean;
986
+ /**
987
+ * Called after all telemetry events and local report artifacts have been emitted.
988
+ */
989
+ onRunEnd?: () => Awaitable<void>;
990
+ }
920
991
  /**
921
992
  * Runtime context passed into eval task `run`.
922
993
  */
@@ -992,6 +1063,17 @@ interface TaskRunContext {
992
1063
  * - hooks are best-effort observers and should not affect task scoring
993
1064
  */
994
1065
  reporterHooks?: TaskReporterHooks;
1066
+ /**
1067
+ * Optional telemetry runtime shared by runner, DSL, and reporter integrations.
1068
+ *
1069
+ * Use when:
1070
+ * - task execution should emit events to the currently active telemetry runtime
1071
+ * - enabled and disabled telemetry should keep the same execution path
1072
+ *
1073
+ * Expects:
1074
+ * - callers inject a no-op runtime when telemetry is disabled
1075
+ */
1076
+ telemetry?: TelemetryRuntime;
995
1077
  /**
996
1078
  * Optional runtime scheduling overrides supplied by CLI or host execution.
997
1079
  *
@@ -1036,6 +1118,10 @@ interface TaskCaseReporterPayload {
1036
1118
  * Maximum retry count configured for this case.
1037
1119
  */
1038
1120
  autoRetry?: number;
1121
+ /**
1122
+ * Optional case input payload registered by the task DSL.
1123
+ */
1124
+ input?: unknown;
1039
1125
  /**
1040
1126
  * Declared case label.
1041
1127
  */
@@ -1066,6 +1152,10 @@ interface TaskCaseReporterPayload {
1066
1152
  * - `state` describes the final case result
1067
1153
  */
1068
1154
  interface TaskCaseReporterEndPayload extends TaskCaseReporterPayload {
1155
+ /**
1156
+ * Optional case output returned by the task case callback.
1157
+ */
1158
+ output?: unknown;
1069
1159
  /**
1070
1160
  * Final case state.
1071
1161
  */
@@ -1288,5 +1378,5 @@ interface ConfigHookPlugin<TConfig> {
1288
1378
  configVievalResolved?: (config: TConfig) => void | Promise<void>;
1289
1379
  }
1290
1380
  //#endregion
1291
- export { ScheduledTask as $, CreateTaskExecutionContextOptions as A, createRunnerRuntimeContext as B, TaskRunContext as C, RunnerTaskState as D, RunnerExecutionError as E, resolveModelByName as F, RunScore as G, AggregatedRunResults as H, asProjectRelativePath as I, CreateRunnerScheduleOptions as J, RunScoreKind as K, collectEvalEntries as L, TaskModelSelectionOptions as M, createTaskExecutionContext as N, ScheduledTaskExecutor as O, ModelDefinition as P, RunnerMatrixSelection as Q, CreateVievalRunnerRuntimeContextOptions as R, TaskReporterHooks as S, RunScheduledTasksOptions as T, AggregatedRunSummary as U, AggregatedProviderSummary as V, RunResult as W, RunnerMatrixDefinition as X, InferenceExecutor as Y, RunnerMatrixInput as Z, TaskCaseState as _, EvalDefinition as a, normalizeCacheFilePathSegments as at, TaskExecutionPolicy as b, MatrixAxisValues as c, CacheNamespace as ct, MatrixPrimitive as d, ScheduledTaskMatrix as et, MatrixRow as f, TaskCaseReporterPayload as g, TaskCaseReporterEndPayload as h, CollectedEvalEntry as i, createFilesystemTaskCacheRuntime as it, TaskExecutionContext as j, runScheduledTasks as k, MatrixDefinition as l, TaskCacheRuntime as lt, ScopedMatrices as m, defineEval as n, createRunnerSchedule as nt, EvalModule as o, CacheFileHandle as ot, MatrixValue as p, aggregateRunResults as q, defineTask as r, CreateFilesystemTaskCacheRuntimeOptions as rt, EvalModuleMap as s, CacheFileOptions as st, ConfigHookPlugin as t, ScheduledTaskMatrixMeta as tt, MatrixLayer as u, TaskConcurrencyConfig as v, TaskRunOutput as w, TaskReporterEventPayload as x, TaskDefinition as y, RunnerRuntimeContext as z };
1292
- //# sourceMappingURL=index-DBZKkpBe.d.mts.map
1381
+ export { CreateRunnerScheduleOptions as $, RunScheduledTasksOptions as A, resolveModelByName as B, TaskDefinition as C, TaskRunContext as D, TaskReporterHooks as E, CreateTaskExecutionContextOptions as F, createRunnerRuntimeContext as G, collectEvalEntries as H, TaskExecutionContext as I, AggregatedRunSummary as J, AggregatedProviderSummary as K, TaskModelSelectionOptions as L, RunnerTaskState as M, ScheduledTaskExecutor as N, TaskRunOutput as O, runScheduledTasks as P, aggregateRunResults as Q, createTaskExecutionContext as R, TaskConcurrencyConfig as S, TaskReporterEventPayload as T, CreateVievalRunnerRuntimeContextOptions as U, asProjectRelativePath as V, RunnerRuntimeContext as W, RunScore as X, RunResult as Y, RunScoreKind as Z, ScopedMatrices as _, CliOpenTelemetryReportingConfig as a, ScheduledTaskMatrix as at, TaskCaseReporterPayload as b, EvalDefinition as c, CreateFilesystemTaskCacheRuntimeOptions as ct, MatrixAxisValues as d, CacheFileHandle as dt, InferenceExecutor as et, MatrixDefinition as f, CacheFileOptions as ft, MatrixValue as g, MatrixRow as h, Awaitable as i, ScheduledTask as it, RunnerExecutionError as j, TelemetryAttributeValue as k, EvalModule as l, createFilesystemTaskCacheRuntime as lt, MatrixPrimitive as m, TaskCacheRuntime as mt, defineEval as n, RunnerMatrixInput as nt, CliReportingConfig as o, ScheduledTaskMatrixMeta as ot, MatrixLayer as p, CacheNamespace as pt, AggregatedRunResults as q, defineTask as r, RunnerMatrixSelection as rt, CollectedEvalEntry as s, createRunnerSchedule as st, ConfigHookPlugin as t, RunnerMatrixDefinition as tt, EvalModuleMap as u, normalizeCacheFilePathSegments as ut, TaskAutoRetryDelay as v, TaskExecutionPolicy as w, TaskCaseState as x, TaskCaseReporterEndPayload as y, ModelDefinition as z };
1382
+ //# sourceMappingURL=index-5R1_k2nv.d.mts.map
package/dist/index.d.mts CHANGED
@@ -1,4 +1,4 @@
1
- import { $ as ScheduledTask, C as TaskRunContext, K as RunScoreKind, P as ModelDefinition, W as RunResult, Y as InferenceExecutor, b as TaskExecutionPolicy, j as TaskExecutionContext, l as MatrixDefinition, t as ConfigHookPlugin, u as MatrixLayer, v as TaskConcurrencyConfig, w as TaskRunOutput } from "./index-DBZKkpBe.mjs";
1
+ import { D as TaskRunContext, I as TaskExecutionContext, O as TaskRunOutput, S as TaskConcurrencyConfig, Y as RunResult, Z as RunScoreKind, et as InferenceExecutor, f as MatrixDefinition, it as ScheduledTask, k as TelemetryAttributeValue, o as CliReportingConfig, p as MatrixLayer, t as ConfigHookPlugin, w as TaskExecutionPolicy, z as ModelDefinition } from "./index-5R1_k2nv.mjs";
2
2
  import { a as requiredEnvFrom } from "./env-BeHv_5mo.mjs";
3
3
  import { expect } from "./expect.mjs";
4
4
  import * as _$c12 from "c12";
@@ -259,10 +259,12 @@ interface CliComparisonConfig {
259
259
  * Expects:
260
260
  * - `model` resolves configured models for the current task
261
261
  * - `reporterHooks` follows `TaskRunContext['reporterHooks']`
262
+ * - `telemetry` follows `TaskRunContext['telemetry']`
262
263
  * - `runtimeConcurrency` follows `TaskRunContext['runtimeConcurrency']`
263
264
  */
264
265
  interface CliProjectExecutorContext extends TaskExecutionContext {
265
266
  reporterHooks?: TaskRunContext['reporterHooks'];
267
+ telemetry?: TaskRunContext['telemetry'];
266
268
  runtimeConcurrency?: TaskRunContext['runtimeConcurrency'];
267
269
  }
268
270
  /**
@@ -310,6 +312,12 @@ interface CliConfigBase {
310
312
  * @default {}
311
313
  */
312
314
  env?: NodeJS.ProcessEnv;
315
+ /**
316
+ * Optional reporting integrations shared by CLI run orchestration.
317
+ *
318
+ * @default undefined
319
+ */
320
+ reporting?: CliReportingConfig;
313
321
  }
314
322
  /**
315
323
  * Project mode config for `vieval run`.
@@ -400,7 +408,7 @@ interface CaseRunContext<TInput> extends TaskRunContext {
400
408
  * - `name` to be a stable metric identifier
401
409
  * - `value` to be JSON-serializable
402
410
  */
403
- metric: (name: string, value: boolean | number | string | null) => void;
411
+ metric: (name: string, value: TelemetryAttributeValue) => void;
404
412
  /**
405
413
  * Cooperative abort signal for the current case execution.
406
414
  */
@@ -409,7 +417,7 @@ interface CaseRunContext<TInput> extends TaskRunContext {
409
417
  /**
410
418
  * Callback for one task case.
411
419
  */
412
- type CaseRunner<TInput> = (context: CaseRunContext<TInput>) => Promise<void> | void;
420
+ type CaseRunner<TInput> = (context: CaseRunContext<TInput>) => Promise<unknown> | unknown;
413
421
  /**
414
422
  * Per-group options for `casesFromInputs`.
415
423
  *
package/dist/index.mjs CHANGED
@@ -1,9 +1,9 @@
1
- import { a as defineConfig, i as registerEvalDefinition, s as loadEnv } from "./registry-CcKZqDJY.mjs";
1
+ import { i as registerEvalDefinition, l as loadEnv, o as createNoopTelemetryRuntime, s as defineConfig } from "./registry-BHGMxjpA.mjs";
2
2
  import { t as createSchedulerQueue } from "./queue-DsZQkZO_.mjs";
3
3
  import { n as requiredEnvFrom } from "./env--94B0UtW.mjs";
4
4
  import { defineEval, defineTask } from "./config.mjs";
5
5
  import { expect } from "./expect.mjs";
6
- import { errorMessageFrom } from "@moeru/std";
6
+ import { errorMessageFrom, sleep } from "@moeru/std";
7
7
  //#region src/dsl/task.ts
8
8
  function cloneCaseMatrix(matrix) {
9
9
  return {
@@ -15,15 +15,36 @@ function cloneCaseMatrix(matrix) {
15
15
  function createTaskCaseReporterId(index, name) {
16
16
  return `${index}:${encodeURIComponent(name)}`;
17
17
  }
18
+ function isTelemetryAttributeScalar(value) {
19
+ return typeof value === "boolean" || typeof value === "number" || typeof value === "string";
20
+ }
21
+ function isTelemetryAttributeArray(value) {
22
+ return value.every(isTelemetryAttributeScalar);
23
+ }
24
+ function canAttachMetricAsAttribute(value) {
25
+ if (isTelemetryAttributeScalar(value)) return true;
26
+ return Array.isArray(value) && isTelemetryAttributeArray(value);
27
+ }
18
28
  function assertValidScore(score) {
19
29
  if (!Number.isFinite(score) || score < 0 || score > 1) throw new Error(`Case score must be a finite number in range 0..1, got "${score}".`);
20
30
  }
21
31
  function assertNonNegativeInteger(value, label) {
22
32
  if (!Number.isFinite(value) || !Number.isInteger(value) || value < 0) throw new Error(`Invalid ${label}: ${String(value)}`);
23
33
  }
34
+ function assertNonNegativeNumber(value, label) {
35
+ if (!Number.isFinite(value) || value < 0) throw new Error(`Invalid ${label}: ${String(value)}`);
36
+ }
24
37
  function assertPositiveInteger(value, label) {
25
38
  if (!Number.isFinite(value) || !Number.isInteger(value) || value <= 0) throw new Error(`Invalid ${label}: ${String(value)}`);
26
39
  }
40
+ function autoRetryDelayMs(retryIndex) {
41
+ return 500 * 2 ** (retryIndex - 1);
42
+ }
43
+ function resolveAutoRetryDelay(policy, retryIndex) {
44
+ const delay = policy.autoRetryDelay;
45
+ if (delay == null) return autoRetryDelayMs(retryIndex);
46
+ return typeof delay === "number" ? delay : delay(retryIndex);
47
+ }
27
48
  function emitCaseStart(hooks, payload) {
28
49
  try {
29
50
  hooks?.onCaseStart?.(payload);
@@ -34,6 +55,11 @@ function emitCaseEnd(hooks, payload) {
34
55
  hooks?.onCaseEnd?.(payload);
35
56
  } catch {}
36
57
  }
58
+ function emitReporterEvent(hooks, payload) {
59
+ try {
60
+ hooks?.onEvent?.(payload);
61
+ } catch {}
62
+ }
37
63
  function createCaseTimeoutError(timeout) {
38
64
  const error = /* @__PURE__ */ new Error(`Case timed out after ${timeout}ms.`);
39
65
  error.name = "TimeoutError";
@@ -43,10 +69,12 @@ function normalizeExecutionPolicy(policy, label) {
43
69
  if (policy == null) return;
44
70
  if (policy.autoAttempt != null) assertNonNegativeInteger(policy.autoAttempt, `${label} autoAttempt`);
45
71
  if (policy.autoRetry != null) assertNonNegativeInteger(policy.autoRetry, `${label} autoRetry`);
72
+ if (typeof policy.autoRetryDelay === "number") assertNonNegativeNumber(policy.autoRetryDelay, `${label} autoRetryDelay`);
46
73
  if (policy.timeout != null) assertPositiveInteger(policy.timeout, `${label} timeout`);
47
74
  const normalized = {
48
75
  autoAttempt: policy.autoAttempt,
49
76
  autoRetry: policy.autoRetry,
77
+ autoRetryDelay: policy.autoRetryDelay,
50
78
  timeout: policy.timeout
51
79
  };
52
80
  return Object.values(normalized).some((value) => value != null) ? normalized : void 0;
@@ -55,55 +83,90 @@ function resolveCaseExecutionPolicy(taskCase, taskExecutionPolicy) {
55
83
  return {
56
84
  autoAttempt: taskCase.executionPolicy?.autoAttempt ?? taskExecutionPolicy?.autoAttempt ?? 0,
57
85
  autoRetry: taskCase.executionPolicy?.autoRetry ?? taskExecutionPolicy?.autoRetry ?? 0,
86
+ autoRetryDelay: taskCase.executionPolicy?.autoRetryDelay ?? taskExecutionPolicy?.autoRetryDelay,
58
87
  timeout: taskCase.executionPolicy?.timeout ?? taskExecutionPolicy?.timeout
59
88
  };
60
89
  }
61
90
  async function runCaseOnce(context, taskCase, index, timeout) {
62
91
  const customScoresByKind = /* @__PURE__ */ new Map();
63
92
  const abortController = new AbortController();
93
+ const telemetry = context.telemetry ?? createNoopTelemetryRuntime();
94
+ const caseId = createTaskCaseReporterId(index, taskCase.name);
64
95
  let timeoutHandle;
65
96
  let timedOut = false;
66
97
  let settled = false;
67
98
  try {
68
- const runPromise = Promise.resolve(taskCase.run({
69
- ...context,
70
- matrix: {
71
- ...cloneCaseMatrix(context.task.matrix),
72
- inputs: taskCase.input
73
- },
74
- metric(name, value) {
75
- if (abortController.signal.aborted || settled) return;
76
- context.reporterHooks?.onEvent?.({
77
- caseId: createTaskCaseReporterId(index, taskCase.name),
78
- data: {
99
+ return await telemetry.withSpan("vieval.case", {
100
+ "vieval.case.id": caseId,
101
+ "vieval.case.name": taskCase.name,
102
+ "vieval.task.id": context.task.id,
103
+ "vieval.task.name": context.task.entry.name
104
+ }, async () => {
105
+ const runPromise = Promise.resolve(taskCase.run({
106
+ ...context,
107
+ matrix: {
108
+ ...cloneCaseMatrix(context.task.matrix),
109
+ inputs: taskCase.input
110
+ },
111
+ metric(name, value) {
112
+ if (abortController.signal.aborted || settled) return;
113
+ emitReporterEvent(context.reporterHooks, {
114
+ caseId,
115
+ data: {
116
+ name,
117
+ value
118
+ },
119
+ event: "task.case.metric"
120
+ });
121
+ telemetry.addEvent("vieval.case.metric", {
79
122
  name,
80
123
  value
81
- },
82
- event: "task.case.metric"
124
+ });
125
+ if (canAttachMetricAsAttribute(value)) telemetry.setAttributes({ [name]: value });
126
+ },
127
+ score(score, kind = "exact") {
128
+ if (abortController.signal.aborted || settled) return;
129
+ assertValidScore(score);
130
+ customScoresByKind.set(kind, score);
131
+ telemetry.addEvent("vieval.case.score", {
132
+ "vieval.score.kind": kind,
133
+ "vieval.score.value": score
134
+ });
135
+ emitReporterEvent(context.reporterHooks, {
136
+ caseId,
137
+ data: {
138
+ kind,
139
+ score
140
+ },
141
+ event: "task.case.score"
142
+ });
143
+ },
144
+ signal: abortController.signal
145
+ }));
146
+ if (timeout != null) {
147
+ const timeoutPromise = new Promise((_, reject) => {
148
+ timeoutHandle = setTimeout(() => {
149
+ timedOut = true;
150
+ abortController.abort(createCaseTimeoutError(timeout));
151
+ reject(createCaseTimeoutError(timeout));
152
+ }, timeout);
83
153
  });
84
- },
85
- score(score, kind = "exact") {
86
- if (abortController.signal.aborted || settled) return;
87
- assertValidScore(score);
88
- customScoresByKind.set(kind, score);
89
- },
90
- signal: abortController.signal
91
- }));
92
- if (timeout != null) {
93
- const timeoutPromise = new Promise((_, reject) => {
94
- timeoutHandle = setTimeout(() => {
95
- timedOut = true;
96
- abortController.abort(createCaseTimeoutError(timeout));
97
- reject(createCaseTimeoutError(timeout));
98
- }, timeout);
99
- });
100
- await Promise.race([runPromise, timeoutPromise]);
101
- } else await runPromise;
102
- settled = true;
103
- return {
104
- scoresByKind: customScoresByKind,
105
- state: "passed"
106
- };
154
+ const output = await Promise.race([runPromise, timeoutPromise]);
155
+ settled = true;
156
+ return {
157
+ output,
158
+ scoresByKind: customScoresByKind,
159
+ state: "passed"
160
+ };
161
+ }
162
+ const output = await runPromise;
163
+ settled = true;
164
+ return {
165
+ output,
166
+ scoresByKind: customScoresByKind,
167
+ state: "passed"
168
+ };
169
+ });
107
170
  } catch (error) {
108
171
  settled = true;
109
172
  return {
@@ -119,12 +182,18 @@ async function executeRegisteredCase(context, taskCase, index, totalCases, taskE
119
182
  const resolvedPolicy = resolveCaseExecutionPolicy(taskCase, taskExecutionPolicy);
120
183
  let lastOutcome;
121
184
  for (let retryIndex = 0; retryIndex <= resolvedPolicy.autoRetry; retryIndex += 1) {
185
+ if (retryIndex > 0) {
186
+ const retryDelayMs = resolveAutoRetryDelay(resolvedPolicy, retryIndex);
187
+ assertNonNegativeNumber(retryDelayMs, "autoRetryDelay result");
188
+ if (retryDelayMs > 0) await sleep(retryDelayMs);
189
+ }
122
190
  emitCaseStart(context.reporterHooks, {
123
191
  ...resolvedPolicy.autoRetry > 0 ? {
124
192
  autoRetry: resolvedPolicy.autoRetry,
125
193
  retryIndex
126
194
  } : {},
127
195
  index,
196
+ ...taskCase.input === void 0 ? {} : { input: taskCase.input },
128
197
  name: taskCase.name,
129
198
  total: totalCases
130
199
  });
@@ -280,6 +349,7 @@ function describeTask(name, build, options = {}) {
280
349
  emitCaseEnd(context.reporterHooks, {
281
350
  ...outcome.errorMessage == null ? {} : { errorMessage: outcome.errorMessage },
282
351
  index,
352
+ ...outcome.output === void 0 ? {} : { output: outcome.output },
283
353
  state: outcome.state,
284
354
  name: taskCase.name,
285
355
  total: totalCases
@@ -323,6 +393,7 @@ function describeTask(name, build, options = {}) {
323
393
  emitCaseEnd(context.reporterHooks, {
324
394
  ...outcome.errorMessage == null ? {} : { errorMessage: outcome.errorMessage },
325
395
  index,
396
+ ...outcome.output === void 0 ? {} : { output: outcome.output },
326
397
  state: outcome.state,
327
398
  name: taskCase.name,
328
399
  total: totalCases
@@ -1 +1 @@
1
- {"version":3,"file":"index.mjs","names":[],"sources":["../src/dsl/task.ts"],"sourcesContent":["import type { TaskConcurrencyConfig, TaskExecutionPolicy, TaskRunContext, TaskRunOutput } from '../config'\nimport type { RunScoreKind } from '../core/runner'\n\nimport { errorMessageFrom } from '@moeru/std'\n\nimport { defineEval, defineTask } from '../config'\nimport { createSchedulerQueue } from '../core/scheduler/queue'\nimport { registerEvalDefinition } from './registry'\n\n/**\n * Runtime context provided to a task case callback.\n */\nexport interface CaseRunContext<TInput> extends TaskRunContext {\n /**\n * Case-scoped matrix payload.\n */\n matrix: TaskRunContext['task']['matrix'] & { inputs: TInput }\n /**\n * Overrides one case score family with a custom normalized value.\n *\n * Use when:\n * - one case computes a benchmark-native score that should flow into run aggregation\n *\n * Expects:\n * - `score` to stay in the `0..1` range\n */\n score: (score: number, kind?: RunScoreKind) => void\n /**\n * Emits one custom case metric into report events.\n *\n * Use when:\n * - tasks need structured benchmark metadata beyond exact/judge score families\n *\n * Expects:\n * - `name` to be a stable metric identifier\n * - `value` to be JSON-serializable\n */\n metric: (name: string, value: boolean | number | string | null) => void\n /**\n * Cooperative abort signal for the current case execution.\n */\n signal: AbortSignal\n}\n\n/**\n * Callback for one task case.\n */\nexport type CaseRunner<TInput> = (context: CaseRunContext<TInput>) => Promise<void> | void\n\ninterface RegisteredCase<TInput> {\n concurrency?: number\n executionPolicy?: TaskExecutionPolicy\n input: TInput\n name: string\n queueKey?: object\n run: CaseRunner<TInput>\n}\n\n/**\n * Per-group options for `casesFromInputs`.\n *\n * Use when:\n * - one generated case group should run with a lower case concurrency than the task default\n * - a task should keep a broader task-level cap while one expensive case family stays bounded\n *\n * Expects:\n * - `concurrency` to be a positive integer when provided\n *\n * Returns:\n * - one partial case-group execution descriptor\n */\nexport interface CasesFromInputsOptions extends TaskExecutionPolicy {\n /**\n * Case-level concurrency cap for cases registered by one `casesFromInputs(...)` call.\n */\n concurrency?: number\n}\n\n/**\n * Per-case registration options for `caseOf`.\n */\nexport interface CaseRegistrationOptions<TInput> extends TaskExecutionPolicy {\n /**\n * Optional case input payload.\n */\n input: TInput\n}\n\ninterface CaseExecutionOutcome {\n errorMessage?: string\n scoresByKind: Map<RunScoreKind, number>\n state: 'failed' | 'passed' | 'timeout'\n}\n\nfunction cloneCaseMatrix(matrix: TaskRunContext['task']['matrix']): TaskRunContext['task']['matrix'] {\n return {\n eval: {\n ...matrix.eval,\n },\n meta: {\n ...matrix.meta,\n },\n run: {\n ...matrix.run,\n },\n }\n}\n\nfunction createTaskCaseReporterId(index: number, name: string): string {\n return `${index}:${encodeURIComponent(name)}`\n}\n\nfunction assertValidScore(score: number): void {\n if (!Number.isFinite(score) || score < 0 || score > 1) {\n throw new Error(`Case score must be a finite number in range 0..1, got \"${score}\".`)\n }\n}\n\nfunction assertNonNegativeInteger(value: number, label: string): void {\n if (!Number.isFinite(value) || !Number.isInteger(value) || value < 0) {\n throw new Error(`Invalid ${label}: ${String(value)}`)\n }\n}\n\nfunction assertPositiveInteger(value: number, label: string): void {\n if (!Number.isFinite(value) || !Number.isInteger(value) || value <= 0) {\n throw new Error(`Invalid ${label}: ${String(value)}`)\n }\n}\n\nfunction emitCaseStart(\n hooks: TaskRunContext['reporterHooks'] | undefined,\n payload: {\n autoRetry?: number\n index: number\n name: string\n retryIndex?: number\n total: number\n },\n): void {\n try {\n hooks?.onCaseStart?.(payload)\n }\n catch {\n // Reporter hooks must never affect task scoring.\n }\n}\n\nfunction emitCaseEnd(\n hooks: TaskRunContext['reporterHooks'] | undefined,\n payload: {\n index: number\n state: 'passed' | 'failed' | 'timeout'\n name: string\n total: number\n errorMessage?: string\n },\n): void {\n try {\n hooks?.onCaseEnd?.(payload)\n }\n catch {\n // Reporter hooks must never affect task scoring.\n }\n}\n\nfunction createCaseTimeoutError(timeout: number): Error {\n const error = new Error(`Case timed out after ${timeout}ms.`)\n error.name = 'TimeoutError'\n return error\n}\n\nfunction normalizeExecutionPolicy(policy: TaskExecutionPolicy | undefined, label: string): TaskExecutionPolicy | undefined {\n if (policy == null) {\n return undefined\n }\n\n if (policy.autoAttempt != null) {\n assertNonNegativeInteger(policy.autoAttempt, `${label} autoAttempt`)\n }\n\n if (policy.autoRetry != null) {\n assertNonNegativeInteger(policy.autoRetry, `${label} autoRetry`)\n }\n\n if (policy.timeout != null) {\n assertPositiveInteger(policy.timeout, `${label} timeout`)\n }\n\n const normalized = {\n autoAttempt: policy.autoAttempt,\n autoRetry: policy.autoRetry,\n timeout: policy.timeout,\n }\n\n return Object.values(normalized).some(value => value != null)\n ? normalized\n : undefined\n}\n\nfunction resolveCaseExecutionPolicy(\n taskCase: RegisteredCase<unknown>,\n taskExecutionPolicy: TaskExecutionPolicy | undefined,\n): Required<Pick<TaskExecutionPolicy, 'autoAttempt' | 'autoRetry'>> & Pick<TaskExecutionPolicy, 'timeout'> {\n return {\n autoAttempt: taskCase.executionPolicy?.autoAttempt ?? taskExecutionPolicy?.autoAttempt ?? 0,\n autoRetry: taskCase.executionPolicy?.autoRetry ?? taskExecutionPolicy?.autoRetry ?? 0,\n timeout: taskCase.executionPolicy?.timeout ?? taskExecutionPolicy?.timeout,\n }\n}\n\nasync function runCaseOnce(\n context: TaskRunContext,\n taskCase: RegisteredCase<unknown>,\n index: number,\n timeout: number | undefined,\n): Promise<CaseExecutionOutcome> {\n const customScoresByKind = new Map<RunScoreKind, number>()\n const abortController = new AbortController()\n let timeoutHandle: ReturnType<typeof setTimeout> | undefined\n let timedOut = false\n let settled = false\n\n try {\n const runPromise = Promise.resolve(taskCase.run({\n ...context,\n matrix: {\n ...cloneCaseMatrix(context.task.matrix),\n inputs: taskCase.input,\n },\n metric(name, value) {\n if (abortController.signal.aborted || settled) {\n return\n }\n\n context.reporterHooks?.onEvent?.({\n caseId: createTaskCaseReporterId(index, taskCase.name),\n data: {\n name,\n value,\n },\n event: 'task.case.metric',\n })\n },\n score(score, kind = 'exact') {\n if (abortController.signal.aborted || settled) {\n return\n }\n\n assertValidScore(score)\n customScoresByKind.set(kind, score)\n },\n signal: abortController.signal,\n }))\n\n if (timeout != null) {\n const timeoutPromise = new Promise<never>((_, reject) => {\n timeoutHandle = setTimeout(() => {\n timedOut = true\n abortController.abort(createCaseTimeoutError(timeout))\n reject(createCaseTimeoutError(timeout))\n }, timeout)\n })\n\n await Promise.race([runPromise, timeoutPromise])\n }\n else {\n await runPromise\n }\n\n settled = true\n return {\n scoresByKind: customScoresByKind,\n state: 'passed',\n }\n }\n catch (error) {\n settled = true\n return {\n errorMessage: errorMessageFrom(error) ?? (timedOut && timeout != null ? `Case timed out after ${timeout}ms.` : 'Unknown case failure.'),\n scoresByKind: customScoresByKind,\n state: timedOut ? 'timeout' : 'failed',\n }\n }\n finally {\n if (timeoutHandle != null) {\n clearTimeout(timeoutHandle)\n }\n }\n}\n\nasync function executeRegisteredCase(\n context: TaskRunContext,\n taskCase: RegisteredCase<unknown>,\n index: number,\n totalCases: number,\n taskExecutionPolicy: TaskExecutionPolicy | undefined,\n): Promise<CaseExecutionOutcome> {\n const resolvedPolicy = resolveCaseExecutionPolicy(taskCase, taskExecutionPolicy)\n let lastOutcome: CaseExecutionOutcome | undefined\n\n for (let retryIndex = 0; retryIndex <= resolvedPolicy.autoRetry; retryIndex += 1) {\n emitCaseStart(context.reporterHooks, {\n ...(resolvedPolicy.autoRetry > 0\n ? {\n autoRetry: resolvedPolicy.autoRetry,\n retryIndex,\n }\n : {}),\n index,\n name: taskCase.name,\n total: totalCases,\n })\n lastOutcome = await runCaseOnce(context, taskCase, index, resolvedPolicy.timeout)\n if (lastOutcome.state === 'passed') {\n return lastOutcome\n }\n }\n\n return lastOutcome ?? {\n errorMessage: 'Unknown case failure.',\n scoresByKind: new Map(),\n state: 'failed',\n }\n}\n\nfunction collectCaseOutcomeScores(\n outcome: CaseExecutionOutcome,\n scoreBucketsByKind: Record<RunScoreKind, number[]>,\n): void {\n if (outcome.state !== 'passed') {\n scoreBucketsByKind.exact.push(0)\n return\n }\n\n if (outcome.scoresByKind.size === 0) {\n scoreBucketsByKind.exact.push(1)\n return\n }\n\n scoreBucketsByKind.exact.push(outcome.scoresByKind.get('exact') ?? 1)\n const judgeScore = outcome.scoresByKind.get('judge')\n if (judgeScore != null) {\n scoreBucketsByKind.judge.push(judgeScore)\n }\n}\n\n/**\n * Builder callbacks passed into `describeTask`.\n */\nexport interface DescribeTaskBuilder {\n /**\n * Registers one explicit case.\n */\n caseOf: {\n (name: string, run: CaseRunner<undefined>): void\n <TInput>(name: string, run: CaseRunner<TInput>, options: CaseRegistrationOptions<TInput>): void\n }\n /**\n * Registers multiple cases from input list.\n */\n casesFromInputs: <TInput>(\n namePrefix: string,\n inputs: readonly TInput[],\n run: CaseRunner<TInput>,\n options?: CasesFromInputsOptions,\n ) => void\n}\n\n/**\n * Options for `describeTask`.\n */\nexport interface DescribeTaskOptions extends TaskExecutionPolicy {\n /**\n * Optional description override.\n */\n description?: string\n /**\n * Optional task-local concurrency overrides.\n *\n * Use when:\n * - one task should cap attempt fan-out independently from the surrounding project\n * - one task should cap case fan-out without changing global scheduling defaults\n *\n * Expects:\n * - each provided value to be a positive integer\n *\n * @default inherited from project or CLI concurrency settings\n */\n concurrency?: TaskConcurrencyConfig\n}\n\nfunction createCaseBuilder(registeredCases: RegisteredCase<unknown>[]): DescribeTaskBuilder {\n function registerCase(name: string, run: CaseRunner<undefined>): void\n function registerCase<TInput>(name: string, run: CaseRunner<TInput>, options: CaseRegistrationOptions<TInput>): void\n function registerCase<TInput>(\n name: string,\n run: CaseRunner<TInput> | CaseRunner<undefined>,\n options?: CaseRegistrationOptions<TInput>,\n ): void {\n registeredCases.push({\n executionPolicy: normalizeExecutionPolicy(options, 'task case'),\n input: options?.input,\n name,\n run: run as CaseRunner<unknown>,\n })\n }\n\n return {\n caseOf: registerCase,\n casesFromInputs(namePrefix, inputs, run, options) {\n const queueKey = options?.concurrency == null ? undefined : {}\n\n inputs.forEach((input, index) => {\n registeredCases.push({\n concurrency: options?.concurrency,\n executionPolicy: normalizeExecutionPolicy(options, 'casesFromInputs'),\n input,\n name: `${namePrefix} #${index + 1}`,\n queueKey,\n run: run as CaseRunner<unknown>,\n })\n })\n },\n }\n}\n\nlet activeCasesStack: RegisteredCase<unknown>[][] = []\n\nfunction withActiveCases<T>(cases: RegisteredCase<unknown>[], callback: () => T): T {\n activeCasesStack = [...activeCasesStack, cases]\n\n try {\n return callback()\n }\n finally {\n activeCasesStack = activeCasesStack.slice(0, -1)\n }\n}\n\nfunction getActiveCases(): RegisteredCase<unknown>[] {\n const active = activeCasesStack.at(-1)\n if (active == null) {\n throw new Error('caseOf/casesFromInputs must be called inside describeTask/describeEval.')\n }\n\n return active\n}\n\n/**\n * Registers one case in the currently active task scope.\n */\nexport function caseOf(\n name: string,\n run: CaseRunner<undefined>,\n): void\n\nexport function caseOf<TInput>(\n name: string,\n run: CaseRunner<TInput>,\n options: CaseRegistrationOptions<TInput>,\n): void\n\nexport function caseOf<TInput>(\n name: string,\n run: CaseRunner<TInput> | CaseRunner<undefined>,\n options?: CaseRegistrationOptions<TInput>,\n): void {\n getActiveCases().push({\n executionPolicy: normalizeExecutionPolicy(options, 'task case'),\n input: options?.input,\n name,\n run: run as CaseRunner<unknown>,\n })\n}\n\n/**\n * Registers multiple cases in the currently active task scope.\n */\nexport function casesFromInputs<TInput>(\n namePrefix: string,\n inputs: readonly TInput[],\n run: CaseRunner<TInput>,\n options?: CasesFromInputsOptions,\n): void {\n const queueKey = options?.concurrency == null ? undefined : {}\n\n inputs.forEach((input, index) => {\n getActiveCases().push({\n concurrency: options?.concurrency,\n executionPolicy: normalizeExecutionPolicy(options, 'casesFromInputs'),\n input,\n name: `${namePrefix} #${index + 1}`,\n queueKey,\n run: run as CaseRunner<unknown>,\n })\n })\n}\n\n/**\n * Resolves the effective case concurrency for one registered task case.\n *\n * Before:\n * - registered case override `2`, task default `4`\n * - registered case override `undefined`, task default `3`\n *\n * After:\n * - `2`\n * - `3`\n */\nfunction resolveCaseConcurrency(\n taskCase: RegisteredCase<unknown>,\n taskConcurrency: TaskConcurrencyConfig | undefined,\n runtimeConcurrency: TaskConcurrencyConfig | undefined,\n): number | undefined {\n const concurrency = runtimeConcurrency?.case ?? taskCase.concurrency ?? taskConcurrency?.case\n if (concurrency == null) {\n return undefined\n }\n\n if (!Number.isFinite(concurrency) || !Number.isInteger(concurrency) || concurrency <= 0) {\n throw new Error(`Invalid task case concurrency: ${String(concurrency)}`)\n }\n\n return concurrency\n}\n\nfunction resolveCaseQueueKey(taskCase: RegisteredCase<unknown>, defaultQueueKey: object): object {\n return taskCase.queueKey ?? defaultQueueKey\n}\n\n/**\n * Defines one eval task with task/case semantics similar to Vitest.\n *\n * Use when:\n * - task behavior should be declared with `caseOf` and `casesFromInputs`\n * - business agent code should be imported and run from eval task files\n */\nexport function describeTask(\n name: string,\n build: ((builder: DescribeTaskBuilder) => void) | (() => void),\n options: DescribeTaskOptions = {},\n) {\n const registeredCases: RegisteredCase<unknown>[] = []\n const builder = createCaseBuilder(registeredCases)\n withActiveCases(registeredCases, () => {\n if (build.length > 0) {\n (build as (builder: DescribeTaskBuilder) => void)(builder)\n return\n }\n\n ;(build as () => void)()\n })\n\n const description = options.description ?? name\n const taskExecutionPolicy = normalizeExecutionPolicy(options, 'describeTask')\n\n const definition = defineEval({\n description,\n name,\n task: defineTask({\n concurrency: options.concurrency,\n executionPolicy: taskExecutionPolicy,\n id: name,\n async run(context): Promise<TaskRunOutput> {\n if (registeredCases.length === 0) {\n return {\n scores: [{ kind: 'exact', score: 1 }],\n }\n }\n\n const totalCases = registeredCases.length\n const scoreBucketsByKind: Record<RunScoreKind, number[]> = {\n exact: [],\n judge: [],\n }\n const defaultCaseQueueKey = {}\n const caseQueues = new Map<object, ReturnType<typeof createSchedulerQueue>>()\n const hasAutoAttempt = registeredCases.some(taskCase => resolveCaseExecutionPolicy(taskCase, taskExecutionPolicy).autoAttempt > 0)\n const runtimeTaskConcurrency = context.task.entry.task?.concurrency ?? options.concurrency\n\n if (!hasAutoAttempt) {\n await Promise.all(\n registeredCases.map(async (taskCase, index) => {\n const executeCase = async () => {\n const outcome = await executeRegisteredCase(context, taskCase, index, totalCases, taskExecutionPolicy)\n emitCaseEnd(context.reporterHooks, {\n ...(outcome.errorMessage == null ? {} : { errorMessage: outcome.errorMessage }),\n index,\n state: outcome.state,\n name: taskCase.name,\n total: totalCases,\n })\n collectCaseOutcomeScores(outcome, scoreBucketsByKind)\n }\n\n const concurrency = resolveCaseConcurrency(taskCase, runtimeTaskConcurrency, context.runtimeConcurrency)\n if (concurrency == null) {\n await executeCase()\n return\n }\n\n const queueKey = resolveCaseQueueKey(taskCase, defaultCaseQueueKey)\n const queue = caseQueues.get(queueKey) ?? createSchedulerQueue(concurrency)\n caseQueues.set(queueKey, queue)\n await queue.run(executeCase)\n }),\n )\n }\n else {\n let finalOutcomes: CaseExecutionOutcome[] = []\n let attemptIndex = 0\n\n for (;;) {\n finalOutcomes = await Promise.all(\n registeredCases.map(async (taskCase, index) => {\n const executeCase = async () => await executeRegisteredCase(context, taskCase, index, totalCases, taskExecutionPolicy)\n const concurrency = resolveCaseConcurrency(taskCase, runtimeTaskConcurrency, context.runtimeConcurrency)\n if (concurrency == null) {\n return await executeCase()\n }\n\n const queueKey = resolveCaseQueueKey(taskCase, defaultCaseQueueKey)\n const queue = caseQueues.get(queueKey) ?? createSchedulerQueue(concurrency)\n caseQueues.set(queueKey, queue)\n return await queue.run(executeCase)\n }),\n )\n\n const shouldContinue = finalOutcomes.some((outcome, index) => {\n if (outcome.state === 'passed') {\n return false\n }\n\n const taskCase = registeredCases[index]\n if (taskCase == null) {\n return false\n }\n\n return attemptIndex < resolveCaseExecutionPolicy(taskCase, taskExecutionPolicy).autoAttempt\n })\n\n if (!shouldContinue) {\n break\n }\n\n attemptIndex += 1\n }\n\n finalOutcomes.forEach((outcome, index) => {\n const taskCase = registeredCases[index]\n if (taskCase == null) {\n return\n }\n\n emitCaseEnd(context.reporterHooks, {\n ...(outcome.errorMessage == null ? {} : { errorMessage: outcome.errorMessage }),\n index,\n state: outcome.state,\n name: taskCase.name,\n total: totalCases,\n })\n collectCaseOutcomeScores(outcome, scoreBucketsByKind)\n })\n }\n\n const scores = (Object.keys(scoreBucketsByKind) as RunScoreKind[])\n .filter(kind => scoreBucketsByKind[kind].length > 0)\n .map((kind) => {\n const values = scoreBucketsByKind[kind]\n const total = values.reduce((sum, value) => sum + value, 0)\n return {\n kind,\n score: total / values.length,\n }\n })\n\n return {\n scores,\n }\n },\n }),\n })\n\n registerEvalDefinition(definition)\n\n return definition\n}\n\n/**\n * Alias of `describeTask` for eval-centric naming.\n */\nexport const describeEval = describeTask\n"],"mappings":";;;;;;;AA8FA,SAAS,gBAAgB,QAA4E;AACnG,QAAO;EACL,MAAM,EACJ,GAAG,OAAO,MACX;EACD,MAAM,EACJ,GAAG,OAAO,MACX;EACD,KAAK,EACH,GAAG,OAAO,KACX;EACF;;AAGH,SAAS,yBAAyB,OAAe,MAAsB;AACrE,QAAO,GAAG,MAAM,GAAG,mBAAmB,KAAK;;AAG7C,SAAS,iBAAiB,OAAqB;AAC7C,KAAI,CAAC,OAAO,SAAS,MAAM,IAAI,QAAQ,KAAK,QAAQ,EAClD,OAAM,IAAI,MAAM,0DAA0D,MAAM,IAAI;;AAIxF,SAAS,yBAAyB,OAAe,OAAqB;AACpE,KAAI,CAAC,OAAO,SAAS,MAAM,IAAI,CAAC,OAAO,UAAU,MAAM,IAAI,QAAQ,EACjE,OAAM,IAAI,MAAM,WAAW,MAAM,IAAI,OAAO,MAAM,GAAG;;AAIzD,SAAS,sBAAsB,OAAe,OAAqB;AACjE,KAAI,CAAC,OAAO,SAAS,MAAM,IAAI,CAAC,OAAO,UAAU,MAAM,IAAI,SAAS,EAClE,OAAM,IAAI,MAAM,WAAW,MAAM,IAAI,OAAO,MAAM,GAAG;;AAIzD,SAAS,cACP,OACA,SAOM;AACN,KAAI;AACF,SAAO,cAAc,QAAQ;SAEzB;;AAKR,SAAS,YACP,OACA,SAOM;AACN,KAAI;AACF,SAAO,YAAY,QAAQ;SAEvB;;AAKR,SAAS,uBAAuB,SAAwB;CACtD,MAAM,wBAAQ,IAAI,MAAM,wBAAwB,QAAQ,KAAK;AAC7D,OAAM,OAAO;AACb,QAAO;;AAGT,SAAS,yBAAyB,QAAyC,OAAgD;AACzH,KAAI,UAAU,KACZ;AAGF,KAAI,OAAO,eAAe,KACxB,0BAAyB,OAAO,aAAa,GAAG,MAAM,cAAc;AAGtE,KAAI,OAAO,aAAa,KACtB,0BAAyB,OAAO,WAAW,GAAG,MAAM,YAAY;AAGlE,KAAI,OAAO,WAAW,KACpB,uBAAsB,OAAO,SAAS,GAAG,MAAM,UAAU;CAG3D,MAAM,aAAa;EACjB,aAAa,OAAO;EACpB,WAAW,OAAO;EAClB,SAAS,OAAO;EACjB;AAED,QAAO,OAAO,OAAO,WAAW,CAAC,MAAK,UAAS,SAAS,KAAK,GACzD,aACA,KAAA;;AAGN,SAAS,2BACP,UACA,qBACyG;AACzG,QAAO;EACL,aAAa,SAAS,iBAAiB,eAAe,qBAAqB,eAAe;EAC1F,WAAW,SAAS,iBAAiB,aAAa,qBAAqB,aAAa;EACpF,SAAS,SAAS,iBAAiB,WAAW,qBAAqB;EACpE;;AAGH,eAAe,YACb,SACA,UACA,OACA,SAC+B;CAC/B,MAAM,qCAAqB,IAAI,KAA2B;CAC1D,MAAM,kBAAkB,IAAI,iBAAiB;CAC7C,IAAI;CACJ,IAAI,WAAW;CACf,IAAI,UAAU;AAEd,KAAI;EACF,MAAM,aAAa,QAAQ,QAAQ,SAAS,IAAI;GAC9C,GAAG;GACH,QAAQ;IACN,GAAG,gBAAgB,QAAQ,KAAK,OAAO;IACvC,QAAQ,SAAS;IAClB;GACD,OAAO,MAAM,OAAO;AAClB,QAAI,gBAAgB,OAAO,WAAW,QACpC;AAGF,YAAQ,eAAe,UAAU;KAC/B,QAAQ,yBAAyB,OAAO,SAAS,KAAK;KACtD,MAAM;MACJ;MACA;MACD;KACD,OAAO;KACR,CAAC;;GAEJ,MAAM,OAAO,OAAO,SAAS;AAC3B,QAAI,gBAAgB,OAAO,WAAW,QACpC;AAGF,qBAAiB,MAAM;AACvB,uBAAmB,IAAI,MAAM,MAAM;;GAErC,QAAQ,gBAAgB;GACzB,CAAC,CAAC;AAEH,MAAI,WAAW,MAAM;GACnB,MAAM,iBAAiB,IAAI,SAAgB,GAAG,WAAW;AACvD,oBAAgB,iBAAiB;AAC/B,gBAAW;AACX,qBAAgB,MAAM,uBAAuB,QAAQ,CAAC;AACtD,YAAO,uBAAuB,QAAQ,CAAC;OACtC,QAAQ;KACX;AAEF,SAAM,QAAQ,KAAK,CAAC,YAAY,eAAe,CAAC;QAGhD,OAAM;AAGR,YAAU;AACV,SAAO;GACL,cAAc;GACd,OAAO;GACR;UAEI,OAAO;AACZ,YAAU;AACV,SAAO;GACL,cAAc,iBAAiB,MAAM,KAAK,YAAY,WAAW,OAAO,wBAAwB,QAAQ,OAAO;GAC/G,cAAc;GACd,OAAO,WAAW,YAAY;GAC/B;WAEK;AACN,MAAI,iBAAiB,KACnB,cAAa,cAAc;;;AAKjC,eAAe,sBACb,SACA,UACA,OACA,YACA,qBAC+B;CAC/B,MAAM,iBAAiB,2BAA2B,UAAU,oBAAoB;CAChF,IAAI;AAEJ,MAAK,IAAI,aAAa,GAAG,cAAc,eAAe,WAAW,cAAc,GAAG;AAChF,gBAAc,QAAQ,eAAe;GACnC,GAAI,eAAe,YAAY,IAC3B;IACE,WAAW,eAAe;IAC1B;IACD,GACD,EAAE;GACN;GACA,MAAM,SAAS;GACf,OAAO;GACR,CAAC;AACF,gBAAc,MAAM,YAAY,SAAS,UAAU,OAAO,eAAe,QAAQ;AACjF,MAAI,YAAY,UAAU,SACxB,QAAO;;AAIX,QAAO,eAAe;EACpB,cAAc;EACd,8BAAc,IAAI,KAAK;EACvB,OAAO;EACR;;AAGH,SAAS,yBACP,SACA,oBACM;AACN,KAAI,QAAQ,UAAU,UAAU;AAC9B,qBAAmB,MAAM,KAAK,EAAE;AAChC;;AAGF,KAAI,QAAQ,aAAa,SAAS,GAAG;AACnC,qBAAmB,MAAM,KAAK,EAAE;AAChC;;AAGF,oBAAmB,MAAM,KAAK,QAAQ,aAAa,IAAI,QAAQ,IAAI,EAAE;CACrE,MAAM,aAAa,QAAQ,aAAa,IAAI,QAAQ;AACpD,KAAI,cAAc,KAChB,oBAAmB,MAAM,KAAK,WAAW;;AAiD7C,SAAS,kBAAkB,iBAAiE;CAG1F,SAAS,aACP,MACA,KACA,SACM;AACN,kBAAgB,KAAK;GACnB,iBAAiB,yBAAyB,SAAS,YAAY;GAC/D,OAAO,SAAS;GAChB;GACK;GACN,CAAC;;AAGJ,QAAO;EACL,QAAQ;EACR,gBAAgB,YAAY,QAAQ,KAAK,SAAS;GAChD,MAAM,WAAW,SAAS,eAAe,OAAO,KAAA,IAAY,EAAE;AAE9D,UAAO,SAAS,OAAO,UAAU;AAC/B,oBAAgB,KAAK;KACnB,aAAa,SAAS;KACtB,iBAAiB,yBAAyB,SAAS,kBAAkB;KACrE;KACA,MAAM,GAAG,WAAW,IAAI,QAAQ;KAChC;KACK;KACN,CAAC;KACF;;EAEL;;AAGH,IAAI,mBAAgD,EAAE;AAEtD,SAAS,gBAAmB,OAAkC,UAAsB;AAClF,oBAAmB,CAAC,GAAG,kBAAkB,MAAM;AAE/C,KAAI;AACF,SAAO,UAAU;WAEX;AACN,qBAAmB,iBAAiB,MAAM,GAAG,GAAG;;;AAIpD,SAAS,iBAA4C;CACnD,MAAM,SAAS,iBAAiB,GAAG,GAAG;AACtC,KAAI,UAAU,KACZ,OAAM,IAAI,MAAM,0EAA0E;AAG5F,QAAO;;AAiBT,SAAgB,OACd,MACA,KACA,SACM;AACN,iBAAgB,CAAC,KAAK;EACpB,iBAAiB,yBAAyB,SAAS,YAAY;EAC/D,OAAO,SAAS;EAChB;EACK;EACN,CAAC;;;;;AAMJ,SAAgB,gBACd,YACA,QACA,KACA,SACM;CACN,MAAM,WAAW,SAAS,eAAe,OAAO,KAAA,IAAY,EAAE;AAE9D,QAAO,SAAS,OAAO,UAAU;AAC/B,kBAAgB,CAAC,KAAK;GACpB,aAAa,SAAS;GACtB,iBAAiB,yBAAyB,SAAS,kBAAkB;GACrE;GACA,MAAM,GAAG,WAAW,IAAI,QAAQ;GAChC;GACK;GACN,CAAC;GACF;;;;;;;;;;;;;AAcJ,SAAS,uBACP,UACA,iBACA,oBACoB;CACpB,MAAM,cAAc,oBAAoB,QAAQ,SAAS,eAAe,iBAAiB;AACzF,KAAI,eAAe,KACjB;AAGF,KAAI,CAAC,OAAO,SAAS,YAAY,IAAI,CAAC,OAAO,UAAU,YAAY,IAAI,eAAe,EACpF,OAAM,IAAI,MAAM,kCAAkC,OAAO,YAAY,GAAG;AAG1E,QAAO;;AAGT,SAAS,oBAAoB,UAAmC,iBAAiC;AAC/F,QAAO,SAAS,YAAY;;;;;;;;;AAU9B,SAAgB,aACd,MACA,OACA,UAA+B,EAAE,EACjC;CACA,MAAM,kBAA6C,EAAE;CACrD,MAAM,UAAU,kBAAkB,gBAAgB;AAClD,iBAAgB,uBAAuB;AACrC,MAAI,MAAM,SAAS,GAAG;AACnB,SAAiD,QAAQ;AAC1D;;AAGA,SAAsB;GACxB;CAEF,MAAM,cAAc,QAAQ,eAAe;CAC3C,MAAM,sBAAsB,yBAAyB,SAAS,eAAe;CAE7E,MAAM,aAAa,WAAW;EAC5B;EACA;EACA,MAAM,WAAW;GACf,aAAa,QAAQ;GACrB,iBAAiB;GACjB,IAAI;GACJ,MAAM,IAAI,SAAiC;AACzC,QAAI,gBAAgB,WAAW,EAC7B,QAAO,EACL,QAAQ,CAAC;KAAE,MAAM;KAAS,OAAO;KAAG,CAAC,EACtC;IAGH,MAAM,aAAa,gBAAgB;IACnC,MAAM,qBAAqD;KACzD,OAAO,EAAE;KACT,OAAO,EAAE;KACV;IACD,MAAM,sBAAsB,EAAE;IAC9B,MAAM,6BAAa,IAAI,KAAsD;IAC7E,MAAM,iBAAiB,gBAAgB,MAAK,aAAY,2BAA2B,UAAU,oBAAoB,CAAC,cAAc,EAAE;IAClI,MAAM,yBAAyB,QAAQ,KAAK,MAAM,MAAM,eAAe,QAAQ;AAE/E,QAAI,CAAC,eACH,OAAM,QAAQ,IACZ,gBAAgB,IAAI,OAAO,UAAU,UAAU;KAC7C,MAAM,cAAc,YAAY;MAC9B,MAAM,UAAU,MAAM,sBAAsB,SAAS,UAAU,OAAO,YAAY,oBAAoB;AACtG,kBAAY,QAAQ,eAAe;OACjC,GAAI,QAAQ,gBAAgB,OAAO,EAAE,GAAG,EAAE,cAAc,QAAQ,cAAc;OAC9E;OACA,OAAO,QAAQ;OACf,MAAM,SAAS;OACf,OAAO;OACR,CAAC;AACF,+BAAyB,SAAS,mBAAmB;;KAGvD,MAAM,cAAc,uBAAuB,UAAU,wBAAwB,QAAQ,mBAAmB;AACxG,SAAI,eAAe,MAAM;AACvB,YAAM,aAAa;AACnB;;KAGF,MAAM,WAAW,oBAAoB,UAAU,oBAAoB;KACnE,MAAM,QAAQ,WAAW,IAAI,SAAS,IAAI,qBAAqB,YAAY;AAC3E,gBAAW,IAAI,UAAU,MAAM;AAC/B,WAAM,MAAM,IAAI,YAAY;MAC5B,CACH;SAEE;KACH,IAAI,gBAAwC,EAAE;KAC9C,IAAI,eAAe;AAEnB,cAAS;AACP,sBAAgB,MAAM,QAAQ,IAC5B,gBAAgB,IAAI,OAAO,UAAU,UAAU;OAC7C,MAAM,cAAc,YAAY,MAAM,sBAAsB,SAAS,UAAU,OAAO,YAAY,oBAAoB;OACtH,MAAM,cAAc,uBAAuB,UAAU,wBAAwB,QAAQ,mBAAmB;AACxG,WAAI,eAAe,KACjB,QAAO,MAAM,aAAa;OAG5B,MAAM,WAAW,oBAAoB,UAAU,oBAAoB;OACnE,MAAM,QAAQ,WAAW,IAAI,SAAS,IAAI,qBAAqB,YAAY;AAC3E,kBAAW,IAAI,UAAU,MAAM;AAC/B,cAAO,MAAM,MAAM,IAAI,YAAY;QACnC,CACH;AAeD,UAAI,CAbmB,cAAc,MAAM,SAAS,UAAU;AAC5D,WAAI,QAAQ,UAAU,SACpB,QAAO;OAGT,MAAM,WAAW,gBAAgB;AACjC,WAAI,YAAY,KACd,QAAO;AAGT,cAAO,eAAe,2BAA2B,UAAU,oBAAoB,CAAC;QAChF,CAGA;AAGF,sBAAgB;;AAGlB,mBAAc,SAAS,SAAS,UAAU;MACxC,MAAM,WAAW,gBAAgB;AACjC,UAAI,YAAY,KACd;AAGF,kBAAY,QAAQ,eAAe;OACjC,GAAI,QAAQ,gBAAgB,OAAO,EAAE,GAAG,EAAE,cAAc,QAAQ,cAAc;OAC9E;OACA,OAAO,QAAQ;OACf,MAAM,SAAS;OACf,OAAO;OACR,CAAC;AACF,+BAAyB,SAAS,mBAAmB;OACrD;;AAcJ,WAAO,EACL,QAZc,OAAO,KAAK,mBAAmB,CAC5C,QAAO,SAAQ,mBAAmB,MAAM,SAAS,EAAE,CACnD,KAAK,SAAS;KACb,MAAM,SAAS,mBAAmB;AAElC,YAAO;MACL;MACA,OAHY,OAAO,QAAQ,KAAK,UAAU,MAAM,OAAO,EAAE,GAG1C,OAAO;MACvB;MACD,EAIH;;GAEJ,CAAC;EACH,CAAC;AAEF,wBAAuB,WAAW;AAElC,QAAO;;;;;AAMT,MAAa,eAAe"}
1
+ {"version":3,"file":"index.mjs","names":[],"sources":["../src/dsl/task.ts"],"sourcesContent":["import type { TaskConcurrencyConfig, TaskExecutionPolicy, TaskReporterEventPayload, TaskRunContext, TaskRunOutput } from '../config'\nimport type { RunScoreKind } from '../core/runner'\nimport type { TelemetryAttributeValue } from '../core/telemetry'\n\nimport { errorMessageFrom, sleep } from '@moeru/std'\n\nimport { defineEval, defineTask } from '../config'\nimport { createSchedulerQueue } from '../core/scheduler/queue'\nimport { createNoopTelemetryRuntime } from '../core/telemetry'\nimport { registerEvalDefinition } from './registry'\n\n/**\n * Runtime context provided to a task case callback.\n */\nexport interface CaseRunContext<TInput> extends TaskRunContext {\n /**\n * Case-scoped matrix payload.\n */\n matrix: TaskRunContext['task']['matrix'] & { inputs: TInput }\n /**\n * Overrides one case score family with a custom normalized value.\n *\n * Use when:\n * - one case computes a benchmark-native score that should flow into run aggregation\n *\n * Expects:\n * - `score` to stay in the `0..1` range\n */\n score: (score: number, kind?: RunScoreKind) => void\n /**\n * Emits one custom case metric into report events.\n *\n * Use when:\n * - tasks need structured benchmark metadata beyond exact/judge score families\n *\n * Expects:\n * - `name` to be a stable metric identifier\n * - `value` to be JSON-serializable\n */\n metric: (name: string, value: TelemetryAttributeValue) => void\n /**\n * Cooperative abort signal for the current case execution.\n */\n signal: AbortSignal\n}\n\n/**\n * Callback for one task case.\n */\nexport type CaseRunner<TInput> = (context: CaseRunContext<TInput>) => Promise<unknown> | unknown\n\ninterface RegisteredCase<TInput> {\n concurrency?: number\n executionPolicy?: TaskExecutionPolicy\n input: TInput\n name: string\n queueKey?: object\n run: CaseRunner<TInput>\n}\n\n/**\n * Per-group options for `casesFromInputs`.\n *\n * Use when:\n * - one generated case group should run with a lower case concurrency than the task default\n * - a task should keep a broader task-level cap while one expensive case family stays bounded\n *\n * Expects:\n * - `concurrency` to be a positive integer when provided\n *\n * Returns:\n * - one partial case-group execution descriptor\n */\nexport interface CasesFromInputsOptions extends TaskExecutionPolicy {\n /**\n * Case-level concurrency cap for cases registered by one `casesFromInputs(...)` call.\n */\n concurrency?: number\n}\n\n/**\n * Per-case registration options for `caseOf`.\n */\nexport interface CaseRegistrationOptions<TInput> extends TaskExecutionPolicy {\n /**\n * Optional case input payload.\n */\n input: TInput\n}\n\ninterface CaseExecutionOutcome {\n errorMessage?: string\n output?: unknown\n scoresByKind: Map<RunScoreKind, number>\n state: 'failed' | 'passed' | 'timeout'\n}\n\nfunction cloneCaseMatrix(matrix: TaskRunContext['task']['matrix']): TaskRunContext['task']['matrix'] {\n return {\n eval: {\n ...matrix.eval,\n },\n meta: {\n ...matrix.meta,\n },\n run: {\n ...matrix.run,\n },\n }\n}\n\nfunction createTaskCaseReporterId(index: number, name: string): string {\n return `${index}:${encodeURIComponent(name)}`\n}\n\nfunction isTelemetryAttributeScalar(value: unknown): value is boolean | number | string {\n return typeof value === 'boolean' || typeof value === 'number' || typeof value === 'string'\n}\n\nfunction isTelemetryAttributeArray(value: readonly TelemetryAttributeValue[]): value is readonly boolean[] | readonly number[] | readonly string[] {\n return value.every(isTelemetryAttributeScalar)\n}\n\nfunction canAttachMetricAsAttribute(value: TelemetryAttributeValue): value is boolean | number | string | readonly boolean[] | readonly number[] | readonly string[] {\n if (isTelemetryAttributeScalar(value)) {\n return true\n }\n\n return Array.isArray(value) && isTelemetryAttributeArray(value)\n}\n\nfunction assertValidScore(score: number): void {\n if (!Number.isFinite(score) || score < 0 || score > 1) {\n throw new Error(`Case score must be a finite number in range 0..1, got \"${score}\".`)\n }\n}\n\nfunction assertNonNegativeInteger(value: number, label: string): void {\n if (!Number.isFinite(value) || !Number.isInteger(value) || value < 0) {\n throw new Error(`Invalid ${label}: ${String(value)}`)\n }\n}\n\nfunction assertNonNegativeNumber(value: number, label: string): void {\n if (!Number.isFinite(value) || value < 0) {\n throw new Error(`Invalid ${label}: ${String(value)}`)\n }\n}\n\nfunction assertPositiveInteger(value: number, label: string): void {\n if (!Number.isFinite(value) || !Number.isInteger(value) || value <= 0) {\n throw new Error(`Invalid ${label}: ${String(value)}`)\n }\n}\n\nfunction autoRetryDelayMs(retryIndex: number): number {\n // Retry index 1 is the first retry after the initial case failure.\n return 500 * 2 ** (retryIndex - 1)\n}\n\nfunction resolveAutoRetryDelay(policy: TaskExecutionPolicy, retryIndex: number): number {\n const delay = policy.autoRetryDelay\n\n if (delay == null) {\n return autoRetryDelayMs(retryIndex)\n }\n\n return typeof delay === 'number' ? delay : delay(retryIndex)\n}\n\nfunction emitCaseStart(\n hooks: TaskRunContext['reporterHooks'] | undefined,\n payload: {\n autoRetry?: number\n index: number\n input?: unknown\n name: string\n retryIndex?: number\n total: number\n },\n): void {\n try {\n hooks?.onCaseStart?.(payload)\n }\n catch {\n // Reporter hooks must never affect task scoring.\n }\n}\n\nfunction emitCaseEnd(\n hooks: TaskRunContext['reporterHooks'] | undefined,\n payload: {\n index: number\n output?: unknown\n state: 'passed' | 'failed' | 'timeout'\n name: string\n total: number\n errorMessage?: string\n },\n): void {\n try {\n hooks?.onCaseEnd?.(payload)\n }\n catch {\n // Reporter hooks must never affect task scoring.\n }\n}\n\nfunction emitReporterEvent(\n hooks: TaskRunContext['reporterHooks'] | undefined,\n payload: TaskReporterEventPayload,\n): void {\n try {\n hooks?.onEvent?.(payload)\n }\n catch {\n // Reporter hooks must never affect task scoring.\n }\n}\n\nfunction createCaseTimeoutError(timeout: number): Error {\n const error = new Error(`Case timed out after ${timeout}ms.`)\n error.name = 'TimeoutError'\n return error\n}\n\nfunction normalizeExecutionPolicy(policy: TaskExecutionPolicy | undefined, label: string): TaskExecutionPolicy | undefined {\n if (policy == null) {\n return undefined\n }\n\n if (policy.autoAttempt != null) {\n assertNonNegativeInteger(policy.autoAttempt, `${label} autoAttempt`)\n }\n\n if (policy.autoRetry != null) {\n assertNonNegativeInteger(policy.autoRetry, `${label} autoRetry`)\n }\n\n if (typeof policy.autoRetryDelay === 'number') {\n assertNonNegativeNumber(policy.autoRetryDelay, `${label} autoRetryDelay`)\n }\n\n if (policy.timeout != null) {\n assertPositiveInteger(policy.timeout, `${label} timeout`)\n }\n\n const normalized = {\n autoAttempt: policy.autoAttempt,\n autoRetry: policy.autoRetry,\n autoRetryDelay: policy.autoRetryDelay,\n timeout: policy.timeout,\n }\n\n return Object.values(normalized).some(value => value != null)\n ? normalized\n : undefined\n}\n\nfunction resolveCaseExecutionPolicy(\n taskCase: RegisteredCase<unknown>,\n taskExecutionPolicy: TaskExecutionPolicy | undefined,\n): Required<Pick<TaskExecutionPolicy, 'autoAttempt' | 'autoRetry'>> & Pick<TaskExecutionPolicy, 'autoRetryDelay' | 'timeout'> {\n return {\n autoAttempt: taskCase.executionPolicy?.autoAttempt ?? taskExecutionPolicy?.autoAttempt ?? 0,\n autoRetry: taskCase.executionPolicy?.autoRetry ?? taskExecutionPolicy?.autoRetry ?? 0,\n autoRetryDelay: taskCase.executionPolicy?.autoRetryDelay ?? taskExecutionPolicy?.autoRetryDelay,\n timeout: taskCase.executionPolicy?.timeout ?? taskExecutionPolicy?.timeout,\n }\n}\n\nasync function runCaseOnce(\n context: TaskRunContext,\n taskCase: RegisteredCase<unknown>,\n index: number,\n timeout: number | undefined,\n): Promise<CaseExecutionOutcome> {\n const customScoresByKind = new Map<RunScoreKind, number>()\n const abortController = new AbortController()\n const telemetry = context.telemetry ?? createNoopTelemetryRuntime()\n const caseId = createTaskCaseReporterId(index, taskCase.name)\n let timeoutHandle: ReturnType<typeof setTimeout> | undefined\n let timedOut = false\n let settled = false\n\n try {\n return await telemetry.withSpan('vieval.case', {\n 'vieval.case.id': caseId,\n 'vieval.case.name': taskCase.name,\n 'vieval.task.id': context.task.id,\n 'vieval.task.name': context.task.entry.name,\n }, async () => {\n const runPromise = Promise.resolve(taskCase.run({\n ...context,\n matrix: {\n ...cloneCaseMatrix(context.task.matrix),\n inputs: taskCase.input,\n },\n metric(name, value) {\n if (abortController.signal.aborted || settled) {\n return\n }\n\n emitReporterEvent(context.reporterHooks, {\n caseId,\n data: {\n name,\n value,\n },\n event: 'task.case.metric',\n })\n telemetry.addEvent('vieval.case.metric', { name, value })\n if (canAttachMetricAsAttribute(value)) {\n telemetry.setAttributes({ [name]: value })\n }\n },\n score(score, kind = 'exact') {\n if (abortController.signal.aborted || settled) {\n return\n }\n\n assertValidScore(score)\n customScoresByKind.set(kind, score)\n telemetry.addEvent('vieval.case.score', {\n 'vieval.score.kind': kind,\n 'vieval.score.value': score,\n })\n emitReporterEvent(context.reporterHooks, {\n caseId,\n data: { kind, score },\n event: 'task.case.score',\n })\n },\n signal: abortController.signal,\n }))\n\n if (timeout != null) {\n const timeoutPromise = new Promise<never>((_, reject) => {\n timeoutHandle = setTimeout(() => {\n timedOut = true\n abortController.abort(createCaseTimeoutError(timeout))\n reject(createCaseTimeoutError(timeout))\n }, timeout)\n })\n\n const output = await Promise.race([runPromise, timeoutPromise])\n settled = true\n return {\n output,\n scoresByKind: customScoresByKind,\n state: 'passed',\n }\n }\n\n const output = await runPromise\n settled = true\n return {\n output,\n scoresByKind: customScoresByKind,\n state: 'passed',\n }\n })\n }\n catch (error) {\n settled = true\n return {\n errorMessage: errorMessageFrom(error) ?? (timedOut && timeout != null ? `Case timed out after ${timeout}ms.` : 'Unknown case failure.'),\n scoresByKind: customScoresByKind,\n state: timedOut ? 'timeout' : 'failed',\n }\n }\n finally {\n if (timeoutHandle != null) {\n clearTimeout(timeoutHandle)\n }\n }\n}\n\nasync function executeRegisteredCase(\n context: TaskRunContext,\n taskCase: RegisteredCase<unknown>,\n index: number,\n totalCases: number,\n taskExecutionPolicy: TaskExecutionPolicy | undefined,\n): Promise<CaseExecutionOutcome> {\n const resolvedPolicy = resolveCaseExecutionPolicy(taskCase, taskExecutionPolicy)\n let lastOutcome: CaseExecutionOutcome | undefined\n\n for (let retryIndex = 0; retryIndex <= resolvedPolicy.autoRetry; retryIndex += 1) {\n if (retryIndex > 0) {\n const retryDelayMs = resolveAutoRetryDelay(resolvedPolicy, retryIndex)\n assertNonNegativeNumber(retryDelayMs, 'autoRetryDelay result')\n\n if (retryDelayMs > 0) {\n await sleep(retryDelayMs)\n }\n }\n\n emitCaseStart(context.reporterHooks, {\n ...(resolvedPolicy.autoRetry > 0\n ? {\n autoRetry: resolvedPolicy.autoRetry,\n retryIndex,\n }\n : {}),\n index,\n ...(taskCase.input === undefined ? {} : { input: taskCase.input }),\n name: taskCase.name,\n total: totalCases,\n })\n lastOutcome = await runCaseOnce(context, taskCase, index, resolvedPolicy.timeout)\n if (lastOutcome.state === 'passed') {\n return lastOutcome\n }\n }\n\n return lastOutcome ?? {\n errorMessage: 'Unknown case failure.',\n scoresByKind: new Map(),\n state: 'failed',\n }\n}\n\nfunction collectCaseOutcomeScores(\n outcome: CaseExecutionOutcome,\n scoreBucketsByKind: Record<RunScoreKind, number[]>,\n): void {\n if (outcome.state !== 'passed') {\n scoreBucketsByKind.exact.push(0)\n return\n }\n\n if (outcome.scoresByKind.size === 0) {\n scoreBucketsByKind.exact.push(1)\n return\n }\n\n scoreBucketsByKind.exact.push(outcome.scoresByKind.get('exact') ?? 1)\n const judgeScore = outcome.scoresByKind.get('judge')\n if (judgeScore != null) {\n scoreBucketsByKind.judge.push(judgeScore)\n }\n}\n\n/**\n * Builder callbacks passed into `describeTask`.\n */\nexport interface DescribeTaskBuilder {\n /**\n * Registers one explicit case.\n */\n caseOf: {\n (name: string, run: CaseRunner<undefined>): void\n <TInput>(name: string, run: CaseRunner<TInput>, options: CaseRegistrationOptions<TInput>): void\n }\n /**\n * Registers multiple cases from input list.\n */\n casesFromInputs: <TInput>(\n namePrefix: string,\n inputs: readonly TInput[],\n run: CaseRunner<TInput>,\n options?: CasesFromInputsOptions,\n ) => void\n}\n\n/**\n * Options for `describeTask`.\n */\nexport interface DescribeTaskOptions extends TaskExecutionPolicy {\n /**\n * Optional description override.\n */\n description?: string\n /**\n * Optional task-local concurrency overrides.\n *\n * Use when:\n * - one task should cap attempt fan-out independently from the surrounding project\n * - one task should cap case fan-out without changing global scheduling defaults\n *\n * Expects:\n * - each provided value to be a positive integer\n *\n * @default inherited from project or CLI concurrency settings\n */\n concurrency?: TaskConcurrencyConfig\n}\n\nfunction createCaseBuilder(registeredCases: RegisteredCase<unknown>[]): DescribeTaskBuilder {\n function registerCase(name: string, run: CaseRunner<undefined>): void\n function registerCase<TInput>(name: string, run: CaseRunner<TInput>, options: CaseRegistrationOptions<TInput>): void\n function registerCase<TInput>(\n name: string,\n run: CaseRunner<TInput> | CaseRunner<undefined>,\n options?: CaseRegistrationOptions<TInput>,\n ): void {\n registeredCases.push({\n executionPolicy: normalizeExecutionPolicy(options, 'task case'),\n input: options?.input,\n name,\n run: run as CaseRunner<unknown>,\n })\n }\n\n return {\n caseOf: registerCase,\n casesFromInputs(namePrefix, inputs, run, options) {\n const queueKey = options?.concurrency == null ? undefined : {}\n\n inputs.forEach((input, index) => {\n registeredCases.push({\n concurrency: options?.concurrency,\n executionPolicy: normalizeExecutionPolicy(options, 'casesFromInputs'),\n input,\n name: `${namePrefix} #${index + 1}`,\n queueKey,\n run: run as CaseRunner<unknown>,\n })\n })\n },\n }\n}\n\nlet activeCasesStack: RegisteredCase<unknown>[][] = []\n\nfunction withActiveCases<T>(cases: RegisteredCase<unknown>[], callback: () => T): T {\n activeCasesStack = [...activeCasesStack, cases]\n\n try {\n return callback()\n }\n finally {\n activeCasesStack = activeCasesStack.slice(0, -1)\n }\n}\n\nfunction getActiveCases(): RegisteredCase<unknown>[] {\n const active = activeCasesStack.at(-1)\n if (active == null) {\n throw new Error('caseOf/casesFromInputs must be called inside describeTask/describeEval.')\n }\n\n return active\n}\n\n/**\n * Registers one case in the currently active task scope.\n */\nexport function caseOf(\n name: string,\n run: CaseRunner<undefined>,\n): void\n\nexport function caseOf<TInput>(\n name: string,\n run: CaseRunner<TInput>,\n options: CaseRegistrationOptions<TInput>,\n): void\n\nexport function caseOf<TInput>(\n name: string,\n run: CaseRunner<TInput> | CaseRunner<undefined>,\n options?: CaseRegistrationOptions<TInput>,\n): void {\n getActiveCases().push({\n executionPolicy: normalizeExecutionPolicy(options, 'task case'),\n input: options?.input,\n name,\n run: run as CaseRunner<unknown>,\n })\n}\n\n/**\n * Registers multiple cases in the currently active task scope.\n */\nexport function casesFromInputs<TInput>(\n namePrefix: string,\n inputs: readonly TInput[],\n run: CaseRunner<TInput>,\n options?: CasesFromInputsOptions,\n): void {\n const queueKey = options?.concurrency == null ? undefined : {}\n\n inputs.forEach((input, index) => {\n getActiveCases().push({\n concurrency: options?.concurrency,\n executionPolicy: normalizeExecutionPolicy(options, 'casesFromInputs'),\n input,\n name: `${namePrefix} #${index + 1}`,\n queueKey,\n run: run as CaseRunner<unknown>,\n })\n })\n}\n\n/**\n * Resolves the effective case concurrency for one registered task case.\n *\n * Before:\n * - registered case override `2`, task default `4`\n * - registered case override `undefined`, task default `3`\n *\n * After:\n * - `2`\n * - `3`\n */\nfunction resolveCaseConcurrency(\n taskCase: RegisteredCase<unknown>,\n taskConcurrency: TaskConcurrencyConfig | undefined,\n runtimeConcurrency: TaskConcurrencyConfig | undefined,\n): number | undefined {\n const concurrency = runtimeConcurrency?.case ?? taskCase.concurrency ?? taskConcurrency?.case\n if (concurrency == null) {\n return undefined\n }\n\n if (!Number.isFinite(concurrency) || !Number.isInteger(concurrency) || concurrency <= 0) {\n throw new Error(`Invalid task case concurrency: ${String(concurrency)}`)\n }\n\n return concurrency\n}\n\nfunction resolveCaseQueueKey(taskCase: RegisteredCase<unknown>, defaultQueueKey: object): object {\n return taskCase.queueKey ?? defaultQueueKey\n}\n\n/**\n * Defines one eval task with task/case semantics similar to Vitest.\n *\n * Use when:\n * - task behavior should be declared with `caseOf` and `casesFromInputs`\n * - business agent code should be imported and run from eval task files\n */\nexport function describeTask(\n name: string,\n build: ((builder: DescribeTaskBuilder) => void) | (() => void),\n options: DescribeTaskOptions = {},\n) {\n const registeredCases: RegisteredCase<unknown>[] = []\n const builder = createCaseBuilder(registeredCases)\n withActiveCases(registeredCases, () => {\n if (build.length > 0) {\n (build as (builder: DescribeTaskBuilder) => void)(builder)\n return\n }\n\n ;(build as () => void)()\n })\n\n const description = options.description ?? name\n const taskExecutionPolicy = normalizeExecutionPolicy(options, 'describeTask')\n\n const definition = defineEval({\n description,\n name,\n task: defineTask({\n concurrency: options.concurrency,\n executionPolicy: taskExecutionPolicy,\n id: name,\n async run(context): Promise<TaskRunOutput> {\n if (registeredCases.length === 0) {\n return {\n scores: [{ kind: 'exact', score: 1 }],\n }\n }\n\n const totalCases = registeredCases.length\n const scoreBucketsByKind: Record<RunScoreKind, number[]> = {\n exact: [],\n judge: [],\n }\n const defaultCaseQueueKey = {}\n const caseQueues = new Map<object, ReturnType<typeof createSchedulerQueue>>()\n const hasAutoAttempt = registeredCases.some(taskCase => resolveCaseExecutionPolicy(taskCase, taskExecutionPolicy).autoAttempt > 0)\n const runtimeTaskConcurrency = context.task.entry.task?.concurrency ?? options.concurrency\n\n if (!hasAutoAttempt) {\n await Promise.all(\n registeredCases.map(async (taskCase, index) => {\n const executeCase = async () => {\n const outcome = await executeRegisteredCase(context, taskCase, index, totalCases, taskExecutionPolicy)\n emitCaseEnd(context.reporterHooks, {\n ...(outcome.errorMessage == null ? {} : { errorMessage: outcome.errorMessage }),\n index,\n ...(outcome.output === undefined ? {} : { output: outcome.output }),\n state: outcome.state,\n name: taskCase.name,\n total: totalCases,\n })\n collectCaseOutcomeScores(outcome, scoreBucketsByKind)\n }\n\n const concurrency = resolveCaseConcurrency(taskCase, runtimeTaskConcurrency, context.runtimeConcurrency)\n if (concurrency == null) {\n await executeCase()\n return\n }\n\n const queueKey = resolveCaseQueueKey(taskCase, defaultCaseQueueKey)\n const queue = caseQueues.get(queueKey) ?? createSchedulerQueue(concurrency)\n caseQueues.set(queueKey, queue)\n await queue.run(executeCase)\n }),\n )\n }\n else {\n let finalOutcomes: CaseExecutionOutcome[] = []\n let attemptIndex = 0\n\n for (;;) {\n finalOutcomes = await Promise.all(\n registeredCases.map(async (taskCase, index) => {\n const executeCase = async () => await executeRegisteredCase(context, taskCase, index, totalCases, taskExecutionPolicy)\n const concurrency = resolveCaseConcurrency(taskCase, runtimeTaskConcurrency, context.runtimeConcurrency)\n if (concurrency == null) {\n return await executeCase()\n }\n\n const queueKey = resolveCaseQueueKey(taskCase, defaultCaseQueueKey)\n const queue = caseQueues.get(queueKey) ?? createSchedulerQueue(concurrency)\n caseQueues.set(queueKey, queue)\n return await queue.run(executeCase)\n }),\n )\n\n const shouldContinue = finalOutcomes.some((outcome, index) => {\n if (outcome.state === 'passed') {\n return false\n }\n\n const taskCase = registeredCases[index]\n if (taskCase == null) {\n return false\n }\n\n return attemptIndex < resolveCaseExecutionPolicy(taskCase, taskExecutionPolicy).autoAttempt\n })\n\n if (!shouldContinue) {\n break\n }\n\n attemptIndex += 1\n }\n\n finalOutcomes.forEach((outcome, index) => {\n const taskCase = registeredCases[index]\n if (taskCase == null) {\n return\n }\n\n emitCaseEnd(context.reporterHooks, {\n ...(outcome.errorMessage == null ? {} : { errorMessage: outcome.errorMessage }),\n index,\n ...(outcome.output === undefined ? {} : { output: outcome.output }),\n state: outcome.state,\n name: taskCase.name,\n total: totalCases,\n })\n collectCaseOutcomeScores(outcome, scoreBucketsByKind)\n })\n }\n\n const scores = (Object.keys(scoreBucketsByKind) as RunScoreKind[])\n .filter(kind => scoreBucketsByKind[kind].length > 0)\n .map((kind) => {\n const values = scoreBucketsByKind[kind]\n const total = values.reduce((sum, value) => sum + value, 0)\n return {\n kind,\n score: total / values.length,\n }\n })\n\n return {\n scores,\n }\n },\n }),\n })\n\n registerEvalDefinition(definition)\n\n return definition\n}\n\n/**\n * Alias of `describeTask` for eval-centric naming.\n */\nexport const describeEval = describeTask\n"],"mappings":";;;;;;;AAiGA,SAAS,gBAAgB,QAA4E;AACnG,QAAO;EACL,MAAM,EACJ,GAAG,OAAO,MACX;EACD,MAAM,EACJ,GAAG,OAAO,MACX;EACD,KAAK,EACH,GAAG,OAAO,KACX;EACF;;AAGH,SAAS,yBAAyB,OAAe,MAAsB;AACrE,QAAO,GAAG,MAAM,GAAG,mBAAmB,KAAK;;AAG7C,SAAS,2BAA2B,OAAoD;AACtF,QAAO,OAAO,UAAU,aAAa,OAAO,UAAU,YAAY,OAAO,UAAU;;AAGrF,SAAS,0BAA0B,OAAgH;AACjJ,QAAO,MAAM,MAAM,2BAA2B;;AAGhD,SAAS,2BAA2B,OAAiI;AACnK,KAAI,2BAA2B,MAAM,CACnC,QAAO;AAGT,QAAO,MAAM,QAAQ,MAAM,IAAI,0BAA0B,MAAM;;AAGjE,SAAS,iBAAiB,OAAqB;AAC7C,KAAI,CAAC,OAAO,SAAS,MAAM,IAAI,QAAQ,KAAK,QAAQ,EAClD,OAAM,IAAI,MAAM,0DAA0D,MAAM,IAAI;;AAIxF,SAAS,yBAAyB,OAAe,OAAqB;AACpE,KAAI,CAAC,OAAO,SAAS,MAAM,IAAI,CAAC,OAAO,UAAU,MAAM,IAAI,QAAQ,EACjE,OAAM,IAAI,MAAM,WAAW,MAAM,IAAI,OAAO,MAAM,GAAG;;AAIzD,SAAS,wBAAwB,OAAe,OAAqB;AACnE,KAAI,CAAC,OAAO,SAAS,MAAM,IAAI,QAAQ,EACrC,OAAM,IAAI,MAAM,WAAW,MAAM,IAAI,OAAO,MAAM,GAAG;;AAIzD,SAAS,sBAAsB,OAAe,OAAqB;AACjE,KAAI,CAAC,OAAO,SAAS,MAAM,IAAI,CAAC,OAAO,UAAU,MAAM,IAAI,SAAS,EAClE,OAAM,IAAI,MAAM,WAAW,MAAM,IAAI,OAAO,MAAM,GAAG;;AAIzD,SAAS,iBAAiB,YAA4B;AAEpD,QAAO,MAAM,MAAM,aAAa;;AAGlC,SAAS,sBAAsB,QAA6B,YAA4B;CACtF,MAAM,QAAQ,OAAO;AAErB,KAAI,SAAS,KACX,QAAO,iBAAiB,WAAW;AAGrC,QAAO,OAAO,UAAU,WAAW,QAAQ,MAAM,WAAW;;AAG9D,SAAS,cACP,OACA,SAQM;AACN,KAAI;AACF,SAAO,cAAc,QAAQ;SAEzB;;AAKR,SAAS,YACP,OACA,SAQM;AACN,KAAI;AACF,SAAO,YAAY,QAAQ;SAEvB;;AAKR,SAAS,kBACP,OACA,SACM;AACN,KAAI;AACF,SAAO,UAAU,QAAQ;SAErB;;AAKR,SAAS,uBAAuB,SAAwB;CACtD,MAAM,wBAAQ,IAAI,MAAM,wBAAwB,QAAQ,KAAK;AAC7D,OAAM,OAAO;AACb,QAAO;;AAGT,SAAS,yBAAyB,QAAyC,OAAgD;AACzH,KAAI,UAAU,KACZ;AAGF,KAAI,OAAO,eAAe,KACxB,0BAAyB,OAAO,aAAa,GAAG,MAAM,cAAc;AAGtE,KAAI,OAAO,aAAa,KACtB,0BAAyB,OAAO,WAAW,GAAG,MAAM,YAAY;AAGlE,KAAI,OAAO,OAAO,mBAAmB,SACnC,yBAAwB,OAAO,gBAAgB,GAAG,MAAM,iBAAiB;AAG3E,KAAI,OAAO,WAAW,KACpB,uBAAsB,OAAO,SAAS,GAAG,MAAM,UAAU;CAG3D,MAAM,aAAa;EACjB,aAAa,OAAO;EACpB,WAAW,OAAO;EAClB,gBAAgB,OAAO;EACvB,SAAS,OAAO;EACjB;AAED,QAAO,OAAO,OAAO,WAAW,CAAC,MAAK,UAAS,SAAS,KAAK,GACzD,aACA,KAAA;;AAGN,SAAS,2BACP,UACA,qBAC4H;AAC5H,QAAO;EACL,aAAa,SAAS,iBAAiB,eAAe,qBAAqB,eAAe;EAC1F,WAAW,SAAS,iBAAiB,aAAa,qBAAqB,aAAa;EACpF,gBAAgB,SAAS,iBAAiB,kBAAkB,qBAAqB;EACjF,SAAS,SAAS,iBAAiB,WAAW,qBAAqB;EACpE;;AAGH,eAAe,YACb,SACA,UACA,OACA,SAC+B;CAC/B,MAAM,qCAAqB,IAAI,KAA2B;CAC1D,MAAM,kBAAkB,IAAI,iBAAiB;CAC7C,MAAM,YAAY,QAAQ,aAAa,4BAA4B;CACnE,MAAM,SAAS,yBAAyB,OAAO,SAAS,KAAK;CAC7D,IAAI;CACJ,IAAI,WAAW;CACf,IAAI,UAAU;AAEd,KAAI;AACF,SAAO,MAAM,UAAU,SAAS,eAAe;GAC7C,kBAAkB;GAClB,oBAAoB,SAAS;GAC7B,kBAAkB,QAAQ,KAAK;GAC/B,oBAAoB,QAAQ,KAAK,MAAM;GACxC,EAAE,YAAY;GACb,MAAM,aAAa,QAAQ,QAAQ,SAAS,IAAI;IAC9C,GAAG;IACH,QAAQ;KACN,GAAG,gBAAgB,QAAQ,KAAK,OAAO;KACvC,QAAQ,SAAS;KAClB;IACD,OAAO,MAAM,OAAO;AAClB,SAAI,gBAAgB,OAAO,WAAW,QACpC;AAGF,uBAAkB,QAAQ,eAAe;MACvC;MACA,MAAM;OACJ;OACA;OACD;MACD,OAAO;MACR,CAAC;AACF,eAAU,SAAS,sBAAsB;MAAE;MAAM;MAAO,CAAC;AACzD,SAAI,2BAA2B,MAAM,CACnC,WAAU,cAAc,GAAG,OAAO,OAAO,CAAC;;IAG9C,MAAM,OAAO,OAAO,SAAS;AAC3B,SAAI,gBAAgB,OAAO,WAAW,QACpC;AAGF,sBAAiB,MAAM;AACvB,wBAAmB,IAAI,MAAM,MAAM;AACnC,eAAU,SAAS,qBAAqB;MACtC,qBAAqB;MACrB,sBAAsB;MACvB,CAAC;AACF,uBAAkB,QAAQ,eAAe;MACvC;MACA,MAAM;OAAE;OAAM;OAAO;MACrB,OAAO;MACR,CAAC;;IAEJ,QAAQ,gBAAgB;IACzB,CAAC,CAAC;AAEH,OAAI,WAAW,MAAM;IACnB,MAAM,iBAAiB,IAAI,SAAgB,GAAG,WAAW;AACvD,qBAAgB,iBAAiB;AAC/B,iBAAW;AACX,sBAAgB,MAAM,uBAAuB,QAAQ,CAAC;AACtD,aAAO,uBAAuB,QAAQ,CAAC;QACtC,QAAQ;MACX;IAEF,MAAM,SAAS,MAAM,QAAQ,KAAK,CAAC,YAAY,eAAe,CAAC;AAC/D,cAAU;AACV,WAAO;KACL;KACA,cAAc;KACd,OAAO;KACR;;GAGH,MAAM,SAAS,MAAM;AACrB,aAAU;AACV,UAAO;IACL;IACA,cAAc;IACd,OAAO;IACR;IACD;UAEG,OAAO;AACZ,YAAU;AACV,SAAO;GACL,cAAc,iBAAiB,MAAM,KAAK,YAAY,WAAW,OAAO,wBAAwB,QAAQ,OAAO;GAC/G,cAAc;GACd,OAAO,WAAW,YAAY;GAC/B;WAEK;AACN,MAAI,iBAAiB,KACnB,cAAa,cAAc;;;AAKjC,eAAe,sBACb,SACA,UACA,OACA,YACA,qBAC+B;CAC/B,MAAM,iBAAiB,2BAA2B,UAAU,oBAAoB;CAChF,IAAI;AAEJ,MAAK,IAAI,aAAa,GAAG,cAAc,eAAe,WAAW,cAAc,GAAG;AAChF,MAAI,aAAa,GAAG;GAClB,MAAM,eAAe,sBAAsB,gBAAgB,WAAW;AACtE,2BAAwB,cAAc,wBAAwB;AAE9D,OAAI,eAAe,EACjB,OAAM,MAAM,aAAa;;AAI7B,gBAAc,QAAQ,eAAe;GACnC,GAAI,eAAe,YAAY,IAC3B;IACE,WAAW,eAAe;IAC1B;IACD,GACD,EAAE;GACN;GACA,GAAI,SAAS,UAAU,KAAA,IAAY,EAAE,GAAG,EAAE,OAAO,SAAS,OAAO;GACjE,MAAM,SAAS;GACf,OAAO;GACR,CAAC;AACF,gBAAc,MAAM,YAAY,SAAS,UAAU,OAAO,eAAe,QAAQ;AACjF,MAAI,YAAY,UAAU,SACxB,QAAO;;AAIX,QAAO,eAAe;EACpB,cAAc;EACd,8BAAc,IAAI,KAAK;EACvB,OAAO;EACR;;AAGH,SAAS,yBACP,SACA,oBACM;AACN,KAAI,QAAQ,UAAU,UAAU;AAC9B,qBAAmB,MAAM,KAAK,EAAE;AAChC;;AAGF,KAAI,QAAQ,aAAa,SAAS,GAAG;AACnC,qBAAmB,MAAM,KAAK,EAAE;AAChC;;AAGF,oBAAmB,MAAM,KAAK,QAAQ,aAAa,IAAI,QAAQ,IAAI,EAAE;CACrE,MAAM,aAAa,QAAQ,aAAa,IAAI,QAAQ;AACpD,KAAI,cAAc,KAChB,oBAAmB,MAAM,KAAK,WAAW;;AAiD7C,SAAS,kBAAkB,iBAAiE;CAG1F,SAAS,aACP,MACA,KACA,SACM;AACN,kBAAgB,KAAK;GACnB,iBAAiB,yBAAyB,SAAS,YAAY;GAC/D,OAAO,SAAS;GAChB;GACK;GACN,CAAC;;AAGJ,QAAO;EACL,QAAQ;EACR,gBAAgB,YAAY,QAAQ,KAAK,SAAS;GAChD,MAAM,WAAW,SAAS,eAAe,OAAO,KAAA,IAAY,EAAE;AAE9D,UAAO,SAAS,OAAO,UAAU;AAC/B,oBAAgB,KAAK;KACnB,aAAa,SAAS;KACtB,iBAAiB,yBAAyB,SAAS,kBAAkB;KACrE;KACA,MAAM,GAAG,WAAW,IAAI,QAAQ;KAChC;KACK;KACN,CAAC;KACF;;EAEL;;AAGH,IAAI,mBAAgD,EAAE;AAEtD,SAAS,gBAAmB,OAAkC,UAAsB;AAClF,oBAAmB,CAAC,GAAG,kBAAkB,MAAM;AAE/C,KAAI;AACF,SAAO,UAAU;WAEX;AACN,qBAAmB,iBAAiB,MAAM,GAAG,GAAG;;;AAIpD,SAAS,iBAA4C;CACnD,MAAM,SAAS,iBAAiB,GAAG,GAAG;AACtC,KAAI,UAAU,KACZ,OAAM,IAAI,MAAM,0EAA0E;AAG5F,QAAO;;AAiBT,SAAgB,OACd,MACA,KACA,SACM;AACN,iBAAgB,CAAC,KAAK;EACpB,iBAAiB,yBAAyB,SAAS,YAAY;EAC/D,OAAO,SAAS;EAChB;EACK;EACN,CAAC;;;;;AAMJ,SAAgB,gBACd,YACA,QACA,KACA,SACM;CACN,MAAM,WAAW,SAAS,eAAe,OAAO,KAAA,IAAY,EAAE;AAE9D,QAAO,SAAS,OAAO,UAAU;AAC/B,kBAAgB,CAAC,KAAK;GACpB,aAAa,SAAS;GACtB,iBAAiB,yBAAyB,SAAS,kBAAkB;GACrE;GACA,MAAM,GAAG,WAAW,IAAI,QAAQ;GAChC;GACK;GACN,CAAC;GACF;;;;;;;;;;;;;AAcJ,SAAS,uBACP,UACA,iBACA,oBACoB;CACpB,MAAM,cAAc,oBAAoB,QAAQ,SAAS,eAAe,iBAAiB;AACzF,KAAI,eAAe,KACjB;AAGF,KAAI,CAAC,OAAO,SAAS,YAAY,IAAI,CAAC,OAAO,UAAU,YAAY,IAAI,eAAe,EACpF,OAAM,IAAI,MAAM,kCAAkC,OAAO,YAAY,GAAG;AAG1E,QAAO;;AAGT,SAAS,oBAAoB,UAAmC,iBAAiC;AAC/F,QAAO,SAAS,YAAY;;;;;;;;;AAU9B,SAAgB,aACd,MACA,OACA,UAA+B,EAAE,EACjC;CACA,MAAM,kBAA6C,EAAE;CACrD,MAAM,UAAU,kBAAkB,gBAAgB;AAClD,iBAAgB,uBAAuB;AACrC,MAAI,MAAM,SAAS,GAAG;AACnB,SAAiD,QAAQ;AAC1D;;AAGA,SAAsB;GACxB;CAEF,MAAM,cAAc,QAAQ,eAAe;CAC3C,MAAM,sBAAsB,yBAAyB,SAAS,eAAe;CAE7E,MAAM,aAAa,WAAW;EAC5B;EACA;EACA,MAAM,WAAW;GACf,aAAa,QAAQ;GACrB,iBAAiB;GACjB,IAAI;GACJ,MAAM,IAAI,SAAiC;AACzC,QAAI,gBAAgB,WAAW,EAC7B,QAAO,EACL,QAAQ,CAAC;KAAE,MAAM;KAAS,OAAO;KAAG,CAAC,EACtC;IAGH,MAAM,aAAa,gBAAgB;IACnC,MAAM,qBAAqD;KACzD,OAAO,EAAE;KACT,OAAO,EAAE;KACV;IACD,MAAM,sBAAsB,EAAE;IAC9B,MAAM,6BAAa,IAAI,KAAsD;IAC7E,MAAM,iBAAiB,gBAAgB,MAAK,aAAY,2BAA2B,UAAU,oBAAoB,CAAC,cAAc,EAAE;IAClI,MAAM,yBAAyB,QAAQ,KAAK,MAAM,MAAM,eAAe,QAAQ;AAE/E,QAAI,CAAC,eACH,OAAM,QAAQ,IACZ,gBAAgB,IAAI,OAAO,UAAU,UAAU;KAC7C,MAAM,cAAc,YAAY;MAC9B,MAAM,UAAU,MAAM,sBAAsB,SAAS,UAAU,OAAO,YAAY,oBAAoB;AACtG,kBAAY,QAAQ,eAAe;OACjC,GAAI,QAAQ,gBAAgB,OAAO,EAAE,GAAG,EAAE,cAAc,QAAQ,cAAc;OAC9E;OACA,GAAI,QAAQ,WAAW,KAAA,IAAY,EAAE,GAAG,EAAE,QAAQ,QAAQ,QAAQ;OAClE,OAAO,QAAQ;OACf,MAAM,SAAS;OACf,OAAO;OACR,CAAC;AACF,+BAAyB,SAAS,mBAAmB;;KAGvD,MAAM,cAAc,uBAAuB,UAAU,wBAAwB,QAAQ,mBAAmB;AACxG,SAAI,eAAe,MAAM;AACvB,YAAM,aAAa;AACnB;;KAGF,MAAM,WAAW,oBAAoB,UAAU,oBAAoB;KACnE,MAAM,QAAQ,WAAW,IAAI,SAAS,IAAI,qBAAqB,YAAY;AAC3E,gBAAW,IAAI,UAAU,MAAM;AAC/B,WAAM,MAAM,IAAI,YAAY;MAC5B,CACH;SAEE;KACH,IAAI,gBAAwC,EAAE;KAC9C,IAAI,eAAe;AAEnB,cAAS;AACP,sBAAgB,MAAM,QAAQ,IAC5B,gBAAgB,IAAI,OAAO,UAAU,UAAU;OAC7C,MAAM,cAAc,YAAY,MAAM,sBAAsB,SAAS,UAAU,OAAO,YAAY,oBAAoB;OACtH,MAAM,cAAc,uBAAuB,UAAU,wBAAwB,QAAQ,mBAAmB;AACxG,WAAI,eAAe,KACjB,QAAO,MAAM,aAAa;OAG5B,MAAM,WAAW,oBAAoB,UAAU,oBAAoB;OACnE,MAAM,QAAQ,WAAW,IAAI,SAAS,IAAI,qBAAqB,YAAY;AAC3E,kBAAW,IAAI,UAAU,MAAM;AAC/B,cAAO,MAAM,MAAM,IAAI,YAAY;QACnC,CACH;AAeD,UAAI,CAbmB,cAAc,MAAM,SAAS,UAAU;AAC5D,WAAI,QAAQ,UAAU,SACpB,QAAO;OAGT,MAAM,WAAW,gBAAgB;AACjC,WAAI,YAAY,KACd,QAAO;AAGT,cAAO,eAAe,2BAA2B,UAAU,oBAAoB,CAAC;QAChF,CAGA;AAGF,sBAAgB;;AAGlB,mBAAc,SAAS,SAAS,UAAU;MACxC,MAAM,WAAW,gBAAgB;AACjC,UAAI,YAAY,KACd;AAGF,kBAAY,QAAQ,eAAe;OACjC,GAAI,QAAQ,gBAAgB,OAAO,EAAE,GAAG,EAAE,cAAc,QAAQ,cAAc;OAC9E;OACA,GAAI,QAAQ,WAAW,KAAA,IAAY,EAAE,GAAG,EAAE,QAAQ,QAAQ,QAAQ;OAClE,OAAO,QAAQ;OACf,MAAM,SAAS;OACf,OAAO;OACR,CAAC;AACF,+BAAyB,SAAS,mBAAmB;OACrD;;AAcJ,WAAO,EACL,QAZc,OAAO,KAAK,mBAAmB,CAC5C,QAAO,SAAQ,mBAAmB,MAAM,SAAS,EAAE,CACnD,KAAK,SAAS;KACb,MAAM,SAAS,mBAAmB;AAElC,YAAO;MACL;MACA,OAHY,OAAO,QAAQ,KAAK,UAAU,MAAM,OAAO,EAAE,GAG1C,OAAO;MACvB;MACD,EAIH;;GAEJ,CAAC;EACH,CAAC;AAEF,wBAAuB,WAAW;AAElC,QAAO;;;;;AAMT,MAAa,eAAe"}
@@ -1,4 +1,4 @@
1
- import { C as TaskRunContext, P as ModelDefinition, b as TaskExecutionPolicy, l as MatrixDefinition, t as ConfigHookPlugin } from "../../index-DBZKkpBe.mjs";
1
+ import { D as TaskRunContext, f as MatrixDefinition, t as ConfigHookPlugin, w as TaskExecutionPolicy, z as ModelDefinition } from "../../index-5R1_k2nv.mjs";
2
2
 
3
3
  //#region src/plugins/chat-models/runtime-config.d.ts
4
4
  /**
@@ -392,6 +392,12 @@ interface ChatModelFromBaseOptions {
392
392
  * @default 0
393
393
  */
394
394
  autoRetry?: number;
395
+ /**
396
+ * Delay in milliseconds before a retry starts.
397
+ *
398
+ * @default retryIndex => 500 * 2 ** (retryIndex - 1)
399
+ */
400
+ autoRetryDelay?: TaskExecutionPolicy['autoRetryDelay'];
395
401
  /**
396
402
  * Additional full task attempts allowed after the current attempt settles.
397
403
  *