npm - vieval - Versions diffs - 0.0.1 → 0.0.4 - Mend

vieval 0.0.1 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (51) hide show

package/README.md +8 -5
package/dist/cli/index.d.mts +1 -1
package/dist/cli/index.mjs +1232 -83
package/dist/cli/index.mjs.map +1 -1
package/dist/{config-D2fe1SnT.mjs → config-CHN24egi.mjs} +1 -1
package/dist/{config-D2fe1SnT.mjs.map → config-CHN24egi.mjs.map} +1 -1
package/dist/config.d.mts +2 -3
package/dist/config.mjs +2 -2
package/dist/core/assertions/index.d.mts +1 -1
package/dist/core/inference-executors/index.d.mts +1 -45
package/dist/core/inference-executors/index.mjs +1 -38
package/dist/core/inference-executors/index.mjs.map +1 -1
package/dist/core/processors/results/index.d.mts +1 -1
package/dist/core/runner/index.d.mts +2 -2
package/dist/core/runner/index.mjs +2 -2
package/dist/env-C7X81PWa.mjs +41 -0
package/dist/env-C7X81PWa.mjs.map +1 -0
package/dist/env-DtpjACOW.d.mts +47 -0
package/dist/expect-B2vaoRVZ.d.mts +10 -0
package/dist/{expect-i9WZWGrA.mjs → expect-CaXiUkwY.mjs} +3 -3
package/dist/expect-CaXiUkwY.mjs.map +1 -0
package/dist/expect-extensions-BOzwV5EJ.mjs +197 -0
package/dist/expect-extensions-BOzwV5EJ.mjs.map +1 -0
package/dist/expect.d.mts +1 -1
package/dist/expect.mjs +1 -1
package/dist/{index-DP7jsORl.d.mts → index-BDMEAmf2.d.mts} +246 -3
package/dist/{index-oSXhM1zx.d.mts → index-C3gPFmcR.d.mts} +2 -2
package/dist/index.d.mts +326 -6
package/dist/index.mjs +65 -23
package/dist/index.mjs.map +1 -1
package/dist/{models-D_MsBtYw.mjs → models-DIGdOUpJ.mjs} +1 -1
package/dist/{models-D_MsBtYw.mjs.map → models-DIGdOUpJ.mjs.map} +1 -1
package/dist/plugins/chat-models/index.d.mts +465 -6
package/dist/plugins/chat-models/index.mjs +469 -6
package/dist/plugins/chat-models/index.mjs.map +1 -1
package/dist/{registry-ChOjjdEC.mjs → registry-CHJcTN2W.mjs} +75 -16
package/dist/registry-CHJcTN2W.mjs.map +1 -0
package/dist/{runner-4ZsOveoY.mjs → runner-Dpy-eivM.mjs} +177 -21
package/dist/runner-Dpy-eivM.mjs.map +1 -0
package/dist/testing/expect-extensions.d.mts +44 -38
package/dist/testing/expect-extensions.mjs +1 -1
package/package.json +11 -4
package/dist/expect-0jPJ7Zio.d.mts +0 -2318
package/dist/expect-extensions-CwPtgTz8.mjs +0 -13471
package/dist/expect-extensions-CwPtgTz8.mjs.map +0 -1
package/dist/expect-i9WZWGrA.mjs.map +0 -1
package/dist/magic-string.es-CH1jwzMg.mjs +0 -1013
package/dist/magic-string.es-CH1jwzMg.mjs.map +0 -1
package/dist/plugin-DVaRZY2x.d.mts +0 -84
package/dist/registry-ChOjjdEC.mjs.map +0 -1
package/dist/runner-4ZsOveoY.mjs.map +0 -1

package/dist/{index-DP7jsORl.d.mts → index-BDMEAmf2.d.mts} RENAMED Viewed

@@ -1,5 +1,110 @@
-import { n as ModelDefinition } from "./plugin-DVaRZY2x.mjs";
+import { ReadStream, WriteStream } from "node:fs";
+import { Buffer } from "node:buffer";
+//#region src/core/cache/types.d.ts
+/**
+ * Cache entry options used to derive one deterministic cache file path.
+ */
+interface CacheFileOptions {
+  /**
+   * Optional file extension for the cache artifact (for example: `json`, `txt`, `wav`).
+   */
+  ext?: string;
+  /**
+   * Deterministic key segments used to build the relative cache path.
+   */
+  key: readonly string[];
+  /**
+   * Optional media type hint used by adapters when extension is omitted.
+   */
+  mediaType?: string;
+}
+/**
+ * One cache file handle exposed to task code.
+ *
+ * Use when:
+ * - benchmark setup needs deterministic artifact storage
+ * - task runtime needs typed file helpers for text/json/binary payloads
+ *
+ * Expects:
+ * - `path` to be stable for the same namespace + key
+ * - read helpers to throw when the file does not exist or payload is invalid
+ *
+ * Returns:
+ * - read/write helpers over one deterministic cache artifact path
+ */
+interface CacheFileHandle {
+  path: string;
+  exists: () => Promise<boolean>;
+  openReadStream: () => ReadStream;
+  openWriteStream: () => Promise<WriteStream>;
+  readBuffer: () => Promise<Buffer>;
+  writeBuffer: (value: Buffer) => Promise<void>;
+  readText: (encoding?: BufferEncoding) => Promise<string>;
+  writeText: (value: string, encoding?: BufferEncoding) => Promise<void>;
+  readJson: <T>() => Promise<T>;
+  writeJson: (value: unknown) => Promise<void>;
+  loadAsCasesInput: <T>() => Promise<T[]>;
+  loadAsExpectFixture: <T>() => Promise<T>;
+}
+/**
+ * Namespaced cache accessor for deterministic cache artifacts.
+ */
+interface CacheNamespace {
+  file: (options: CacheFileOptions) => CacheFileHandle;
+}
+/**
+ * Task-scoped cache runtime injected into `TaskRunContext`.
+ */
+interface TaskCacheRuntime {
+  namespace: (name: string) => CacheNamespace;
+}
+//#endregion
+//#region src/core/cache/filesystem.d.ts
+/**
+ * Options for creating the filesystem-backed task cache runtime.
+ */
+interface CreateFilesystemTaskCacheRuntimeOptions {
+  /**
+   * Absolute cache root directory.
+   */
+  cacheRootDirectory: string;
+  /**
+   * Project identifier under one workspace cache scope.
+   */
+  projectName: string;
+  /**
+   * Workspace identifier used to share cache roots across projects.
+   */
+  workspaceId: string;
+}
+/**
+ * Normalizes cache file options into deterministic relative path segments.
+ *
+ * Before:
+ * - `{ key: ['cases', 'dataset hash', 'v1'], ext: 'json' }`
+ *
+ * After:
+ * - `['cases', 'dataset-hash', 'v1.json']`
+ */
+declare function normalizeCacheFilePathSegments(options: CacheFileOptions): string[];
+/**
+ * Creates a deterministic filesystem-backed task cache runtime.
+ *
+ * Use when:
+ * - eval tasks need reproducible cache paths for expensive pre-processing outputs
+ * - benchmark adapters need one artifact-oriented API for text/json/binary reads and writes
+ *
+ * Expects:
+ * - `cacheRootDirectory` to be writable by the running process
+ * - `workspaceId` + `projectName` to stay stable for reproducible paths
+ *
+ * Returns:
+ * - task cache runtime that resolves namespaced file handles under:
+ *   `<cacheRootDirectory>/<workspaceId>/<projectName>/<namespace>/...`
+ */
+declare function createFilesystemTaskCacheRuntime(options: CreateFilesystemTaskCacheRuntimeOptions): TaskCacheRuntime;
+//#endregion
 //#region src/core/runner/schedule.d.ts
 /**
  * Describes the inferenceExecutor target for a scheduled eval run.
@@ -333,6 +438,58 @@ declare function asProjectRelativePath(filePath: string, context: RunnerRuntimeC
  */
 declare function collectEvalEntries(modules: EvalModuleMap, context: RunnerRuntimeContext): CollectedEvalEntry[];
 //#endregion
+//#region src/config/models.d.ts
+/**
+ * Canonical model definition consumed by vieval runtime and config.
+ *
+ * Use when:
+ * - declaring models in `vieval.config.*`
+ * - resolving task runtime models by id, alias, or concrete model name
+ *
+ * Expects:
+ * - `id` to be stable and unique within one config
+ * - `inferenceExecutorId` to match scheduler/executor identifiers
+ *
+ * Returns:
+ * - one normalized model registration record
+ */
+interface ModelDefinition {
+  /**
+   * Stable model id.
+   */
+  id: string;
+  /**
+   * Inference-executor id used for matching and reporting.
+   */
+  inferenceExecutorId: string;
+  /**
+   * Executor reference passed through config.
+   *
+   * `vieval` core treats this as opaque runtime metadata. Builder plugins can
+   * narrow this field with plugin-specific executor input types.
+   */
+  inferenceExecutor: unknown;
+  /**
+   * Concrete model name passed to the inference executor.
+   */
+  model: string;
+  /**
+   * Alias names that can resolve this model.
+   */
+  aliases: string[];
+  /**
+   * Optional model-level call parameters.
+   */
+  parameters?: Record<string, unknown>;
+}
+/**
+ * Resolves one model by id, model name, or alias in registration order.
+ *
+ * Returns:
+ * - the first matching model, or `undefined` when no match exists
+ */
+declare function resolveModelByName(models: readonly ModelDefinition[], name: string): ModelDefinition | undefined;
+//#endregion
 //#region src/core/runner/task-context.d.ts
 /**
  * Options for selecting a model from the execution context.
@@ -347,6 +504,10 @@ interface TaskModelSelectionOptions {
  * Task-scoped execution context exposed to runner executors.
  */
 interface TaskExecutionContext {
+  /**
+   * Deterministic cache runtime scoped to the current task project.
+   */
+  cache: TaskCacheRuntime;
   /**
    * Resolves model configuration for the current task.
    *
@@ -360,6 +521,7 @@ interface TaskExecutionContext {
  * Inputs used to build task execution context.
  */
 interface CreateTaskExecutionContextOptions {
+  cache?: TaskCacheRuntime;
   models: readonly ModelDefinition[];
   task: ScheduledTask;
 }
@@ -439,6 +601,12 @@ interface RunScheduledTasksOptions {
    * - failed-task observers do not override the executor error for the task
    */
   onTaskEnd?: (task: ScheduledTask, state: RunnerTaskState) => void;
+  /**
+   * Maximum number of tasks to execute concurrently.
+   *
+   * @default 1
+   */
+  maxConcurrency?: number;
 }
 /**
  * Error thrown when a scheduled run fails before producing a normalized result.
@@ -694,6 +862,14 @@ interface TaskRunOutput {
  * Runtime context passed into eval task `run`.
  */
 interface TaskRunContext {
+  /**
+   * Task-scoped cache runtime.
+   *
+   * Use when:
+   * - benchmark setup needs deterministic artifact reuse across attempts
+   * - case-level logic needs typed text/json/binary cache loaders
+   */
+  cache: TaskExecutionContext['cache'];
   /**
    * Scheduled runner task metadata.
    *
@@ -810,6 +986,10 @@ interface TaskCaseReporterEndPayload extends TaskCaseReporterPayload {
    * Final case state.
    */
   state: TaskCaseState;
+  /**
+   * Optional failure message when `state` is `failed`.
+   */
+  errorMessage?: string;
 }
 /**
  * Reporter hooks invoked around each task case execution.
@@ -830,6 +1010,39 @@ interface TaskReporterHooks {
    * Runs after a case settles.
    */
   onCaseEnd?: (payload: TaskCaseReporterEndPayload) => void;
+  /**
+   * Runs when task code emits a custom telemetry/reporting event.
+   *
+   * Use when:
+   * - eval implementations need report artifacts beyond case lifecycle counters
+   * - model/runtime integrations emit inference, metering, or tool-call events
+   */
+  onEvent?: (payload: TaskReporterEventPayload) => void;
+}
+/**
+ * Payload emitted by task code for custom report events.
+ *
+ * Use when:
+ * - reporting runtime telemetry such as inference requests, responses, or tool calls
+ * - attaching modality-specific metrics without coupling task logic to CLI internals
+ *
+ * Expects:
+ * - `event` to be a stable event name
+ * - `data` to be JSON-serializable for report artifact persistence
+ */
+interface TaskReporterEventPayload {
+  /**
+   * Event name written into report event envelopes.
+   */
+  event: string;
+  /**
+   * Optional custom payload persisted under event `data`.
+   */
+  data?: unknown;
+  /**
+   * Optional stable case id when the event maps to one case lifecycle.
+   */
+  caseId?: string;
 }
 /**
  * Eval task definition used by `defineTask`.
@@ -943,5 +1156,35 @@ declare function defineEval<const TDefinition extends EvalDefinition>(definition
  */
 declare function defineTask<const TDefinition extends TaskDefinition>(definition: TDefinition): TDefinition;
 //#endregion
-export { asProjectRelativePath as A, RunScoreKind as B, RunnerTaskState as C, TaskExecutionContext as D, CreateTaskExecutionContextOptions as E, AggregatedProviderSummary as F, RunnerMatrixInput as G, CreateRunnerScheduleOptions as H, AggregatedRunResults as I, ScheduledTaskMatrix as J, RunnerMatrixSelection as K, AggregatedRunSummary as L, CreateVievalRunnerRuntimeContextOptions as M, RunnerRuntimeContext as N, TaskModelSelectionOptions as O, createRunnerRuntimeContext as P, RunResult as R, RunnerExecutionError as S, runScheduledTasks as T, InferenceExecutor as U, aggregateRunResults as V, RunnerMatrixDefinition as W, createRunnerSchedule as X, ScheduledTaskMatrixMeta as Y, TaskDefinition as _, EvalModule as a, TaskRunOutput as b, MatrixDefinition as c, MatrixRow as d, MatrixValue as f, TaskCaseState as g, TaskCaseReporterPayload as h, EvalDefinition as i, collectEvalEntries as j, createTaskExecutionContext as k, MatrixLayer as l, TaskCaseReporterEndPayload as m, defineTask as n, EvalModuleMap as o, ScopedMatrices as p, ScheduledTask as q, CollectedEvalEntry as r, MatrixAxisValues as s, defineEval as t, MatrixPrimitive as u, TaskReporterHooks as v, ScheduledTaskExecutor as w, RunScheduledTasksOptions as x, TaskRunContext as y, RunScore as z };
-//# sourceMappingURL=index-DP7jsORl.d.mts.map
+//#region src/config/plugin.d.ts
+/**
+ * Generic plugin contract for vieval config lifecycle hooks.
+ *
+ * Use when:
+ * - a plugin needs to transform config before CLI normalization
+ * - a plugin needs a final resolved-config callback
+ *
+ * Expects:
+ * - `name` to be stable for diagnostics
+ * - hooks to return either a full config object or `void`
+ *
+ * Returns:
+ * - a typed plugin shape bound to one config object
+ */
+interface ConfigHookPlugin<TConfig> {
+  /**
+   * Stable plugin name for diagnostics.
+   */
+  name: string;
+  /**
+   * Optional config transform hook.
+   */
+  configVieval?: (config: TConfig) => TConfig | void | Promise<TConfig | void>;
+  /**
+   * Optional hook after config is finalized.
+   */
+  configVievalResolved?: (config: TConfig) => void | Promise<void>;
+}
+//#endregion
+export { ScheduledTaskMatrixMeta as $, TaskModelSelectionOptions as A, AggregatedRunResults as B, RunScheduledTasksOptions as C, runScheduledTasks as D, ScheduledTaskExecutor as E, collectEvalEntries as F, aggregateRunResults as G, RunResult as H, CreateVievalRunnerRuntimeContextOptions as I, RunnerMatrixDefinition as J, CreateRunnerScheduleOptions as K, RunnerRuntimeContext as L, ModelDefinition as M, resolveModelByName as N, CreateTaskExecutionContextOptions as O, asProjectRelativePath as P, ScheduledTaskMatrix as Q, createRunnerRuntimeContext as R, TaskRunOutput as S, RunnerTaskState as T, RunScore as U, AggregatedRunSummary as V, RunScoreKind as W, RunnerMatrixSelection as X, RunnerMatrixInput as Y, ScheduledTask as Z, TaskCaseState as _, EvalDefinition as a, CacheFileOptions as at, TaskReporterHooks as b, MatrixAxisValues as c, MatrixPrimitive as d, createRunnerSchedule as et, MatrixRow as f, TaskCaseReporterPayload as g, TaskCaseReporterEndPayload as h, CollectedEvalEntry as i, CacheFileHandle as it, createTaskExecutionContext as j, TaskExecutionContext as k, MatrixDefinition as l, ScopedMatrices as m, defineEval as n, createFilesystemTaskCacheRuntime as nt, EvalModule as o, CacheNamespace as ot, MatrixValue as p, InferenceExecutor as q, defineTask as r, normalizeCacheFilePathSegments as rt, EvalModuleMap as s, TaskCacheRuntime as st, ConfigHookPlugin as t, CreateFilesystemTaskCacheRuntimeOptions as tt, MatrixLayer as u, TaskDefinition as v, RunnerExecutionError as w, TaskRunContext as x, TaskReporterEventPayload as y, AggregatedProviderSummary as z };
+//# sourceMappingURL=index-BDMEAmf2.d.mts.map

package/dist/{index-oSXhM1zx.d.mts → index-C3gPFmcR.d.mts} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { B as RunScoreKind, z as RunScore } from "./index-DP7jsORl.mjs";
+import { U as RunScore, W as RunScoreKind } from "./index-BDMEAmf2.mjs";
 //#region src/core/assertions/index.d.ts
 /**
@@ -311,4 +311,4 @@ declare function toRunScores(outcomes: readonly AssertionOutcome[]): RunScore[];
 declare function collectFailedAssertions(outcomes: readonly AssertionOutcome[]): AssertionOutcome[];
 //#endregion
 export { expectToolCallArgs as C, expectStructuredOutput as S, toRunScores as T, expectMustExclude as _, CustomAssertionOptions as a, expectRegex as b, RegexAssertionOptions as c, StructuredOutputAssertionOptions as d, ToolCall as f, expectCustom as g, evaluateAssertions as h, AssertionState as i, RubricAssertionOptions as l, collectFailedAssertions as m, AssertionContext as n, MustExcludeAssertionOptions as o, ToolCallArgsAssertionOptions as p, AssertionOutcome as r, MustIncludeAssertionOptions as s, Assertion as t, RubricJudgeResult as u, expectMustInclude as v, normalizeMatchText as w, expectRubric as x, expectNot as y };
-//# sourceMappingURL=index-oSXhM1zx.d.mts.map
+//# sourceMappingURL=index-C3gPFmcR.d.mts.map

package/dist/index.d.mts CHANGED Viewed

@@ -1,11 +1,302 @@
-import { b as TaskRunOutput, y as TaskRunContext } from "./index-DP7jsORl.mjs";
-import { t as expect } from "./expect-0jPJ7Zio.mjs";
+import { H as RunResult, M as ModelDefinition, S as TaskRunOutput, W as RunScoreKind, Z as ScheduledTask, k as TaskExecutionContext, l as MatrixDefinition, q as InferenceExecutor, t as ConfigHookPlugin, u as MatrixLayer, x as TaskRunContext } from "./index-BDMEAmf2.mjs";
+import { a as requiredEnvFrom } from "./env-DtpjACOW.mjs";
+import { t as expect } from "./expect-B2vaoRVZ.mjs";
+import * as _$c12 from "c12";
+//#region src/cli/reporters/vitest-compat-reporter.d.ts
+type Awaitable<T> = T | Promise<T>;
+/**
+ * Normalized module-like entity delivered to vitest-compatible reporter hooks.
+ */
+interface VievalVitestCompatModule {
+  id: string;
+  name: string;
+  projectName: string;
+}
+/**
+ * Normalized test-case-like entity delivered to vitest-compatible reporter hooks.
+ */
+interface VievalVitestCompatCase {
+  id: string;
+  name: string;
+  module: VievalVitestCompatModule;
+  state: 'failed' | 'passed' | 'pending' | 'skipped';
+}
+/**
+ * Supported vitest-style reporter lifecycle hooks.
+ *
+ * Use when:
+ * - external reporter modules should observe vieval task/case lifecycle events
+ * - the project wants a familiar Vitest reporter callback model
+ *
+ * Expects:
+ * - hook handlers to be best-effort observers only
+ * - thrown errors are ignored to avoid interrupting eval execution
+ */
+interface VievalVitestCompatReporter {
+  onTestCaseReady?: (testCase: VievalVitestCompatCase) => Awaitable<void>;
+  onTestCaseResult?: (testCase: VievalVitestCompatCase) => Awaitable<void>;
+  onTestModuleCollected?: (module: VievalVitestCompatModule) => Awaitable<void>;
+  onTestModuleEnd?: (module: VievalVitestCompatModule) => Awaitable<void>;
+  onTestModuleQueued?: (module: VievalVitestCompatModule) => Awaitable<void>;
+  onTestModuleStart?: (module: VievalVitestCompatModule) => Awaitable<void>;
+  onTestRunEnd?: (modules: readonly VievalVitestCompatModule[], errors: readonly {
+    message: string;
+  }[], state: 'failed' | 'passed') => Awaitable<void>;
+  onTestRunStart?: (specifications: readonly {
+    moduleId: string;
+    projectName: string;
+  }[]) => Awaitable<void>;
+}
+/**
+ * Supported project reporter references.
+ *
+ * - String: module path or package name, default export used.
+ * - Reporter object: inline hook object (Vitest-style inline reporter).
+ * - Tuple: [string or reporter object, constructor options].
+ *
+ * Source permalink:
+ * `https://github.com/vitest-dev/vitest/blob/b865b4d83d1e7874607ba1b2d84b9e2d135ecd33/packages/vitest/src/node/config/resolveConfig.ts#L674-L713`
+ */
+type VievalVitestCompatReporterValue = string | VievalVitestCompatReporter;
+type VievalVitestCompatReporterReference = VievalVitestCompatReporterValue | readonly [VievalVitestCompatReporterValue, unknown?];
+//#endregion
 //#region src/cli/config.d.ts
+/**
+ * CLI plugin shape bound to the full CLI config object.
+ */
+type CliConfigPlugin = ConfigHookPlugin<CliConfig>;
+/**
+ * Defines one project block for `vieval run`.
+ */
+interface CliProjectConfig {
+  /**
+   * Project label used in summary output.
+   */
+  name: string;
+  /**
+   * Project root used for include/exclude glob matching.
+   *
+   * @default process cwd
+   */
+  root?: string;
+  /**
+   * Glob patterns for eval file discovery.
+   *
+   * @default Common eval file globs for TypeScript and JavaScript module formats.
+   */
+  include?: string[];
+  /**
+   * Glob patterns excluded from discovery.
+   *
+   * @default Common exclusion globs for dependencies, build output, and VCS directories.
+   */
+  exclude?: string[];
+  /**
+   * Providers expanded by scheduler.
+   *
+   * @default [{ id: 'default' }]
+   */
+  inferenceExecutors?: InferenceExecutor[];
+  /**
+   * Model definitions available to project runtime execution.
+   *
+   * Inference executors control schedule fan-out, while models provide
+   * runtime lookup metadata for `context.model(...)` during task execution.
+   *
+   * @default inherited from top-level config models
+   */
+  models?: ModelDefinition[];
+  /**
+   * Optional run-time matrix dimensions.
+   */
+  runMatrix?: MatrixDefinition | MatrixLayer;
+  /**
+   * Optional eval-time matrix dimensions.
+   */
+  evalMatrix?: MatrixDefinition | MatrixLayer;
+  /**
+   * Optional task executor.
+   *
+   * Use when this project should execute live inferenceExecutor requests.
+   * If omitted, `vieval run` performs collection + scheduling only.
+   */
+  executor?: (task: ScheduledTask, context: CliProjectExecutorContext) => Promise<RunResult>;
+  /**
+   * Optional project-local plugins.
+   */
+  plugins?: CliConfigPlugin[];
+  /**
+   * Optional vitest-compatible reporter modules.
+   *
+   * Use when:
+   * - project runs should emit additional reporter callbacks using Vitest-style lifecycle names
+   *
+   * @default []
+   */
+  reporters?: VievalVitestCompatReporterReference[];
+}
+/**
+ * One workspace descriptor for workspace-mode configs.
+ */
+interface CliWorkspaceConfig {
+  /**
+   * Workspace identifier.
+   */
+  id: string;
+  /**
+   * Workspace root path.
+   */
+  root: string;
+}
+/**
+ * One explicit comparison method descriptor.
+ */
+interface CliComparisonMethodConfig {
+  /**
+   * Method identifier shown in compare reports.
+   */
+  id: string;
+  /**
+   * Workspace path containing this method's `vieval.config.*`.
+   */
+  workspace: string;
+  /**
+   * Project name to execute inside workspace config.
+   */
+  project: string;
+  /**
+   * Optional explicit config file path for this workspace.
+   */
+  configFilePath?: string;
+}
+/**
+ * Benchmark identity and shared cache namespace.
+ */
+interface CliComparisonBenchmarkConfig {
+  /**
+   * Benchmark identifier used in report artifacts.
+   */
+  id: string;
+  /**
+   * Shared cache namespace reused across method runs.
+   */
+  sharedCaseNamespace: string;
+}
+/**
+ * One comparison entry loaded by `vieval compare`.
+ */
+interface CliComparisonConfig {
+  /**
+   * Comparison id selected by `--comparison`.
+   */
+  id: string;
+  /**
+   * Benchmark metadata for reporting and shared cache coordination.
+   */
+  benchmark: CliComparisonBenchmarkConfig;
+  /**
+   * Optional explicit method list.
+   */
+  methods?: CliComparisonMethodConfig[];
+  /**
+   * Optional workspace glob(s) discovered relative to config directory.
+   */
+  includesWorkspaces?: string | string[];
+  /**
+   * Optional workspace exclude glob(s), also relative to config directory.
+   */
+  excludesWorkspaces?: string | string[];
+}
+/**
+ * Execution context exposed to project-level `executor` implementations.
+ *
+ * Use when:
+ * - a project executor needs the task-scoped model resolver plus case reporter hooks
+ * - custom scheduling logic wants the same hook shape as `TaskRunContext`
+ *
+ * Expects:
+ * - `model` resolves configured models for the current task
+ * - `reporterHooks` follows `TaskRunContext['reporterHooks']`
+ */
+interface CliProjectExecutorContext extends TaskExecutionContext {
+  reporterHooks?: TaskRunContext['reporterHooks'];
+}
+/**
+ * Top-level CLI config loaded from `vieval.config.*`.
+ */
+interface CliConfigBase {
+  /**
+   * Global model definitions inherited by projects.
+   *
+   * @default []
+   */
+  models?: ModelDefinition[];
+  /**
+   * Global config plugins.
+   *
+   * @default []
+   */
+  plugins?: CliConfigPlugin[];
+  /**
+   * Global vitest-compatible reporter modules inherited by projects.
+   *
+   * @default []
+   */
+  reporters?: VievalVitestCompatReporterReference[];
+  /**
+   * Environment variables injected into `process.env` during `vieval run`.
+   *
+   * Use when:
+   * - eval tasks depend on runtime env values (for example inferenceExecutor API keys)
+   * - config wants deterministic env values without shell-level exports
+   *
+   * @default {}
+   */
+  env?: NodeJS.ProcessEnv;
+}
+/**
+ * Project mode config for `vieval run`.
+ */
+interface CliProjectModeConfig extends CliConfigBase {
+  /**
+   * Project list expanded by `vieval run`.
+   *
+   * @default [{ name: 'default' }]
+   */
+  projects?: CliProjectConfig[];
+  comparisons?: never;
+  workspaces?: never;
+}
+/**
+ * Workspace mode config placeholder for future workspace orchestration.
+ */
+interface CliWorkspaceModeConfig extends CliConfigBase {
+  workspaces: CliWorkspaceConfig[];
+  projects?: never;
+  comparisons?: never;
+}
+/**
+ * Comparison mode config for `vieval compare`.
+ */
+interface CliComparisonModeConfig extends CliConfigBase {
+  comparisons: CliComparisonConfig[];
+  projects?: never;
+  workspaces?: never;
+}
+/**
+ * Top-level CLI config loaded from `vieval.config.*`.
+ *
+ * Exactly one top-level mode is allowed:
+ * - `projects`
+ * - `workspaces`
+ * - `comparisons`
+ */
+type CliConfig = CliProjectModeConfig | CliWorkspaceModeConfig | CliComparisonModeConfig;
 /**
  * Helper used by `vieval.config.*` for better type inference.
  */
-declare const defineConfig: any;
+declare const defineConfig: _$c12.DefineConfig<CliConfig, _$c12.ConfigLayerMeta>;
 /**
  * Loads `.env*` files using Vite's env resolution behavior.
  *
@@ -33,6 +324,27 @@ interface CaseRunContext<TInput> extends TaskRunContext {
   matrix: TaskRunContext['task']['matrix'] & {
     inputs: TInput;
   };
+  /**
+   * Overrides one case score family with a custom normalized value.
+   *
+   * Use when:
+   * - one case computes a benchmark-native score that should flow into run aggregation
+   *
+   * Expects:
+   * - `score` to stay in the `0..1` range
+   */
+  score: (score: number, kind?: RunScoreKind) => void;
+  /**
+   * Emits one custom case metric into report events.
+   *
+   * Use when:
+   * - tasks need structured benchmark metadata beyond exact/judge score families
+   *
+   * Expects:
+   * - `name` to be a stable metric identifier
+   * - `value` to be JSON-serializable
+   */
+  metric: (name: string, value: boolean | number | string | null) => void;
 }
 /**
  * Callback for one task case.
@@ -45,7 +357,12 @@ interface DescribeTaskBuilder {
   /**
    * Registers one explicit case.
    */
-  caseOf: <TInput>(name: string, run: CaseRunner<TInput>, input: TInput) => void;
+  caseOf: {
+    (name: string, run: CaseRunner<undefined>): void;
+    <TInput>(name: string, run: CaseRunner<TInput>, options: {
+      input: TInput;
+    }): void;
+  };
   /**
    * Registers multiple cases from input list.
    */
@@ -63,7 +380,10 @@ interface DescribeTaskOptions {
 /**
  * Registers one case in the currently active task scope.
  */
-declare function caseOf<TInput>(name: string, run: CaseRunner<TInput>, input: TInput): void;
+declare function caseOf(name: string, run: CaseRunner<undefined>): void;
+declare function caseOf<TInput>(name: string, run: CaseRunner<TInput>, options: {
+  input: TInput;
+}): void;
 /**
  * Registers multiple cases in the currently active task scope.
  */
@@ -88,5 +408,5 @@ declare function describeTask(name: string, build: ((builder: DescribeTaskBuilde
  */
 declare const describeEval: typeof describeTask;
 //#endregion
-export { caseOf, casesFromInputs, defineConfig, describeEval, describeTask, expect, loadEnv };
+export { caseOf, casesFromInputs, defineConfig, describeEval, describeTask, expect, loadEnv, requiredEnvFrom };
 //# sourceMappingURL=index.d.mts.map