npm - vieval - Versions diffs - 0.0.11 → 0.0.12 - Mend

vieval 0.0.11 → 0.0.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

package/README.md +31 -31
package/dist/bin/vieval.mjs +1 -1
package/dist/cli/index.d.mts +1 -1
package/dist/cli/index.mjs +1 -1
package/dist/{cli-CHFCF8UR.mjs → cli-uzS81IPd.mjs} +1529 -1529
package/dist/cli-uzS81IPd.mjs.map +1 -0
package/dist/config.d.mts +1 -1
package/dist/core/assertions/index.d.mts +156 -156
package/dist/core/assertions/index.mjs +82 -82
package/dist/core/assertions/index.mjs.map +1 -1
package/dist/core/inference-executors/index.d.mts +37 -37
package/dist/core/inference-executors/index.mjs +53 -52
package/dist/core/inference-executors/index.mjs.map +1 -1
package/dist/core/processors/results/index.d.mts +18 -18
package/dist/core/processors/results/index.mjs.map +1 -1
package/dist/core/runner/index.d.mts +2 -2
package/dist/core/runner/index.mjs +258 -258
package/dist/core/runner/index.mjs.map +1 -1
package/dist/core/scheduler/index.d.mts +1 -1
package/dist/core/scheduler/index.mjs +64 -64
package/dist/core/scheduler/index.mjs.map +1 -1
package/dist/{env-bRH0K6fU.d.mts → env-Br6jaWGL.d.mts} +9 -9
package/dist/{env-BVYeJhGA.mjs → env-egxaJtNn.mjs} +8 -8
package/dist/env-egxaJtNn.mjs.map +1 -0
package/dist/{expect-extensions-Mf1sMNBv.mjs → expect-extensions-BKdEPt3h.mjs} +46 -46
package/dist/expect-extensions-BKdEPt3h.mjs.map +1 -0
package/dist/expect.mjs +1 -1
package/dist/{index-CwKBlCG9.d.mts → index-BLIlhiWT.d.mts} +565 -565
package/dist/{index-Be5I1ZJL.d.mts → index-CIaJClcC.d.mts} +48 -48
package/dist/index.d.mts +207 -195
package/dist/index.mjs +147 -147
package/dist/index.mjs.map +1 -1
package/dist/models-CaCOUPZw.mjs.map +1 -1
package/dist/plugins/chat-models/index.d.mts +279 -279
package/dist/plugins/chat-models/index.mjs +359 -359
package/dist/plugins/chat-models/index.mjs.map +1 -1
package/dist/{registry-BSyjwZFx.mjs → registry-BK7k6X81.mjs} +293 -293
package/dist/registry-BK7k6X81.mjs.map +1 -0
package/dist/testing/expect-extensions.d.mts +27 -27
package/dist/testing/expect-extensions.mjs +1 -1
package/package.json +3 -3
package/dist/cli-CHFCF8UR.mjs.map +0 -1
package/dist/env-BVYeJhGA.mjs.map +0 -1
package/dist/expect-extensions-Mf1sMNBv.mjs.map +0 -1
package/dist/registry-BSyjwZFx.mjs.map +0 -1

package/dist/index.d.mts CHANGED Viewed

@@ -1,25 +1,24 @@
-import { $ as InferenceExecutor, D as TaskRunContext, I as TaskExecutionContext, J as RunResult, O as TaskRunOutput, R as ModelDefinition, S as TaskConcurrencyConfig, X as RunScoreKind, f as MatrixDefinition, k as TelemetryAttributeValue, o as CliReportingConfig, p as MatrixLayer, rt as ScheduledTask, t as ConfigHookPlugin, w as TaskExecutionPolicy } from "./index-CwKBlCG9.mjs";
-import { a as requiredEnvFrom } from "./env-bRH0K6fU.mjs";
+import { $ as InferenceExecutor, D as TaskRunContext, I as TaskExecutionContext, J as RunResult, O as TaskRunOutput, R as ModelDefinition, S as TaskConcurrencyConfig, X as RunScoreKind, f as MatrixDefinition, k as TelemetryAttributeValue, o as CliReportingConfig, p as MatrixLayer, rt as ScheduledTask, t as ConfigHookPlugin, w as TaskExecutionPolicy } from "./index-BLIlhiWT.mjs";
+import { a as requiredEnvFrom } from "./env-Br6jaWGL.mjs";
 import { expect } from "./expect.mjs";
 //#region src/cli/reporters/vitest-compat-reporter.d.ts
-type Awaitable<T> = T | Promise<T>;
 /**
- * Normalized module-like entity delivered to vitest-compatible reporter hooks.
+ * Normalized test-case-like entity delivered to vitest-compatible reporter hooks.
  */
-interface VievalVitestCompatModule {
+interface VievalVitestCompatCase {
   id: string;
+  module: VievalVitestCompatModule;
   name: string;
-  projectName: string;
+  state: 'failed' | 'passed' | 'pending' | 'skipped';
 }
 /**
- * Normalized test-case-like entity delivered to vitest-compatible reporter hooks.
+ * Normalized module-like entity delivered to vitest-compatible reporter hooks.
  */
-interface VievalVitestCompatCase {
+interface VievalVitestCompatModule {
   id: string;
   name: string;
-  module: VievalVitestCompatModule;
-  state: 'failed' | 'passed' | 'pending' | 'skipped';
+  projectName: string;
 }
 /**
  * Supported vitest-style reporter lifecycle hooks.
@@ -47,6 +46,7 @@ interface VievalVitestCompatReporter {
     projectName: string;
   }[]) => Awaitable<void>;
 }
+type VievalVitestCompatReporterReference = readonly [VievalVitestCompatReporterValue, unknown?] | VievalVitestCompatReporterValue;
 /**
  * Supported project reporter references.
  *
@@ -58,13 +58,88 @@ interface VievalVitestCompatReporter {
  * `https://github.com/vitest-dev/vitest/blob/b865b4d83d1e7874607ba1b2d84b9e2d135ecd33/packages/vitest/src/node/config/resolveConfig.ts#L674-L713`
  */
 type VievalVitestCompatReporterValue = string | VievalVitestCompatReporter;
-type VievalVitestCompatReporterReference = VievalVitestCompatReporterValue | readonly [VievalVitestCompatReporterValue, unknown?];
+type Awaitable<T> = Promise<T> | T;
+/**
+ * Creates a project-level vitest-compatible reporter bridge.
+ *
+ * Use when:
+ * - `vieval` should reuse vitest-like reporter callbacks without changing CLI output contracts
+ *
+ * Expects:
+ * - references point to modules whose default export is a reporter instance or constructor
+ *
+ * Returns:
+ * - `null` when no reporter references are configured
+ */
 //#endregion
 //#region src/cli/config.d.ts
 /**
- * CLI plugin shape bound to the full CLI config object.
+ * Benchmark identity and shared cache namespace.
  */
-type CliConfigPlugin = ConfigHookPlugin<CliConfig>;
+interface CliComparisonBenchmarkConfig {
+  /**
+   * Benchmark identifier used in report artifacts.
+   */
+  id: string;
+  /**
+   * Shared cache namespace reused across method runs.
+   */
+  sharedCaseNamespace: string;
+}
+/**
+ * One comparison entry loaded by `vieval compare`.
+ */
+interface CliComparisonConfig {
+  /**
+   * Benchmark metadata for reporting and shared cache coordination.
+   */
+  benchmark: CliComparisonBenchmarkConfig;
+  /**
+   * Optional workspace exclude glob(s), also relative to config directory.
+   */
+  excludesWorkspaces?: string | string[];
+  /**
+   * Comparison id selected by `--comparison`.
+   */
+  id: string;
+  /**
+   * Optional workspace glob(s) discovered relative to config directory.
+   */
+  includesWorkspaces?: string | string[];
+  /**
+   * Optional explicit method list.
+   */
+  methods?: CliComparisonMethodConfig[];
+}
+/**
+ * One explicit comparison method descriptor.
+ */
+interface CliComparisonMethodConfig {
+  /**
+   * Optional explicit config file path for this workspace.
+   */
+  configFilePath?: string;
+  /**
+   * Method identifier shown in compare reports.
+   */
+  id: string;
+  /**
+   * Project name to execute inside workspace config.
+   */
+  project: string;
+  /**
+   * Workspace path containing this method's `vieval.config.*`.
+   */
+  workspace: string;
+}
+/**
+ * Comparison mode config for `vieval compare`.
+ */
+interface CliComparisonModeConfig extends CliConfigBase {
+  comparisons: CliComparisonConfig[];
+  projects?: never;
+  workspaces?: never;
+}
 /**
  * Concurrency limits that can be declared in CLI-facing config.
  *
@@ -80,9 +155,13 @@ type CliConfigPlugin = ConfigHookPlugin<CliConfig>;
  */
 interface CliConcurrencyConfig {
   /**
-   * Workspace-level concurrency cap.
+   * Attempt-level concurrency cap.
    */
-  workspace?: number;
+  attempt?: number;
+  /**
+   * Case-level concurrency cap.
+   */
+  case?: number;
   /**
    * Project-level concurrency cap.
    */
@@ -92,40 +171,56 @@ interface CliConcurrencyConfig {
    */
   task?: number;
   /**
-   * Attempt-level concurrency cap.
-   */
-  attempt?: number;
-  /**
-   * Case-level concurrency cap.
+   * Workspace-level concurrency cap.
    */
-  case?: number;
+  workspace?: number;
 }
+/**
+ * Top-level CLI config loaded from `vieval.config.*`.
+ *
+ * Exactly one top-level mode is allowed:
+ * - `projects`
+ * - `workspaces`
+ * - `comparisons`
+ */
+type CliConfig = CliComparisonModeConfig | CliProjectModeConfig | CliWorkspaceModeConfig;
+/**
+ * CLI plugin shape bound to the full CLI config object.
+ */
+type CliConfigPlugin = ConfigHookPlugin<CliConfig>;
 /**
  * Defines one project block for `vieval run`.
  */
 interface CliProjectConfig {
   /**
-   * Project label used in summary output.
-   */
-  name: string;
-  /**
-   * Project root used for include/exclude glob matching.
+   * Optional project-scoped concurrency overrides.
    *
-   * @default process cwd
+   * @default inherited from top-level or CLI execution settings
    */
-  root?: string;
+  concurrency?: Omit<CliConcurrencyConfig, 'workspace'>;
   /**
-   * Glob patterns for eval file discovery.
-   *
-   * @default Common eval file globs for TypeScript and JavaScript module formats.
+   * Optional eval-time matrix dimensions.
    */
-  include?: string[];
+  evalMatrix?: MatrixDefinition | MatrixLayer;
   /**
    * Glob patterns excluded from discovery.
    *
    * @default Common exclusion globs for dependencies, build output, and VCS directories.
    */
   exclude?: string[];
+  /**
+   * Optional task executor.
+   *
+   * Use when this project should execute live inferenceExecutor requests.
+   * If omitted, `vieval run` performs collection + scheduling only.
+   */
+  executor?: (task: ScheduledTask, context: CliProjectExecutorContext) => Promise<RunResult>;
+  /**
+   * Glob patterns for eval file discovery.
+   *
+   * @default Common eval file globs for TypeScript and JavaScript module formats.
+   */
+  include?: string[];
   /**
    * Providers expanded by scheduler.
    *
@@ -142,26 +237,9 @@ interface CliProjectConfig {
    */
   models?: ModelDefinition[];
   /**
-   * Optional run-time matrix dimensions.
-   */
-  runMatrix?: MatrixDefinition | MatrixLayer;
-  /**
-   * Optional eval-time matrix dimensions.
-   */
-  evalMatrix?: MatrixDefinition | MatrixLayer;
-  /**
-   * Optional project-scoped concurrency overrides.
-   *
-   * @default inherited from top-level or CLI execution settings
-   */
-  concurrency?: Omit<CliConcurrencyConfig, 'workspace'>;
-  /**
-   * Optional task executor.
-   *
-   * Use when this project should execute live inferenceExecutor requests.
-   * If omitted, `vieval run` performs collection + scheduling only.
+   * Project label used in summary output.
    */
-  executor?: (task: ScheduledTask, context: CliProjectExecutorContext) => Promise<RunResult>;
+  name: string;
   /**
    * Optional project-local plugins.
    */
@@ -175,107 +253,73 @@ interface CliProjectConfig {
    * @default []
    */
   reporters?: VievalVitestCompatReporterReference[];
-}
-/**
- * One workspace descriptor for workspace-mode configs.
- */
-interface CliWorkspaceConfig {
   /**
-   * Workspace identifier.
+   * Project root used for include/exclude glob matching.
+   *
+   * @default process cwd
    */
-  id: string;
+  root?: string;
   /**
-   * Workspace root path.
+   * Optional run-time matrix dimensions.
    */
-  root: string;
+  runMatrix?: MatrixDefinition | MatrixLayer;
 }
 /**
- * One explicit comparison method descriptor.
+ * Execution context exposed to project-level `executor` implementations.
+ *
+ * Use when:
+ * - a project executor needs task-scoped models plus case reporter hooks
+ * - custom scheduling logic wants the same hook shape as `TaskRunContext`
+ *
+ * Expects:
+ * - `models` exposes configured model registrations for plugin helpers
+ * - `reporterHooks` follows `TaskRunContext['reporterHooks']`
+ * - `telemetry` follows `TaskRunContext['telemetry']`
+ * - `runtimeConcurrency` follows `TaskRunContext['runtimeConcurrency']`
  */
-interface CliComparisonMethodConfig {
-  /**
-   * Method identifier shown in compare reports.
-   */
-  id: string;
-  /**
-   * Workspace path containing this method's `vieval.config.*`.
-   */
-  workspace: string;
-  /**
-   * Project name to execute inside workspace config.
-   */
-  project: string;
-  /**
-   * Optional explicit config file path for this workspace.
-   */
-  configFilePath?: string;
+interface CliProjectExecutorContext extends TaskExecutionContext {
+  reporterHooks?: TaskRunContext['reporterHooks'];
+  runtimeConcurrency?: TaskRunContext['runtimeConcurrency'];
+  telemetry?: TaskRunContext['telemetry'];
 }
 /**
- * Benchmark identity and shared cache namespace.
+ * Project mode config for `vieval run`.
  */
-interface CliComparisonBenchmarkConfig {
-  /**
-   * Benchmark identifier used in report artifacts.
-   */
-  id: string;
+interface CliProjectModeConfig extends CliConfigBase {
+  comparisons?: never;
   /**
-   * Shared cache namespace reused across method runs.
+   * Project list expanded by `vieval run`.
+   *
+   * @default [{ name: 'default' }]
    */
-  sharedCaseNamespace: string;
+  projects?: CliProjectConfig[];
+  workspaces?: never;
 }
 /**
- * One comparison entry loaded by `vieval compare`.
+ * One workspace descriptor for workspace-mode configs.
  */
-interface CliComparisonConfig {
+interface CliWorkspaceConfig {
   /**
-   * Comparison id selected by `--comparison`.
+   * Workspace identifier.
    */
   id: string;
   /**
-   * Benchmark metadata for reporting and shared cache coordination.
-   */
-  benchmark: CliComparisonBenchmarkConfig;
-  /**
-   * Optional explicit method list.
-   */
-  methods?: CliComparisonMethodConfig[];
-  /**
-   * Optional workspace glob(s) discovered relative to config directory.
-   */
-  includesWorkspaces?: string | string[];
-  /**
-   * Optional workspace exclude glob(s), also relative to config directory.
+   * Workspace root path.
    */
-  excludesWorkspaces?: string | string[];
+  root: string;
 }
 /**
- * Execution context exposed to project-level `executor` implementations.
- *
- * Use when:
- * - a project executor needs task-scoped models plus case reporter hooks
- * - custom scheduling logic wants the same hook shape as `TaskRunContext`
- *
- * Expects:
- * - `models` exposes configured model registrations for plugin helpers
- * - `reporterHooks` follows `TaskRunContext['reporterHooks']`
- * - `telemetry` follows `TaskRunContext['telemetry']`
- * - `runtimeConcurrency` follows `TaskRunContext['runtimeConcurrency']`
+ * Workspace mode config placeholder for future workspace orchestration.
  */
-interface CliProjectExecutorContext extends TaskExecutionContext {
-  reporterHooks?: TaskRunContext['reporterHooks'];
-  telemetry?: TaskRunContext['telemetry'];
-  runtimeConcurrency?: TaskRunContext['runtimeConcurrency'];
+interface CliWorkspaceModeConfig extends CliConfigBase {
+  comparisons?: never;
+  projects?: never;
+  workspaces: CliWorkspaceConfig[];
 }
 /**
  * Top-level CLI config loaded from `vieval.config.*`.
  */
 interface CliConfigBase {
-  /**
-   * Global model definitions inherited by projects.
-   *
-   * @default []
-   */
-  models?: ModelDefinition[];
   /**
    * Global concurrency defaults inherited by projects and tasks.
    *
@@ -289,6 +333,22 @@ interface CliConfigBase {
    * @default undefined
    */
   concurrency?: CliConcurrencyConfig;
+  /**
+   * Environment variables injected into `process.env` during `vieval run`.
+   *
+   * Use when:
+   * - eval tasks depend on runtime env values (for example inferenceExecutor API keys)
+   * - config wants deterministic env values without shell-level exports
+   *
+   * @default {}
+   */
+  env?: NodeJS.ProcessEnv;
+  /**
+   * Global model definitions inherited by projects.
+   *
+   * @default []
+   */
+  models?: ModelDefinition[];
   /**
    * Global config plugins.
    *
@@ -301,16 +361,6 @@ interface CliConfigBase {
    * @default []
    */
   reporters?: VievalVitestCompatReporterReference[];
-  /**
-   * Environment variables injected into `process.env` during `vieval run`.
-   *
-   * Use when:
-   * - eval tasks depend on runtime env values (for example inferenceExecutor API keys)
-   * - config wants deterministic env values without shell-level exports
-   *
-   * @default {}
-   */
-  env?: NodeJS.ProcessEnv;
   /**
    * Optional reporting integrations shared by CLI run orchestration.
    *
@@ -318,44 +368,6 @@ interface CliConfigBase {
    */
   reporting?: CliReportingConfig;
 }
-/**
- * Project mode config for `vieval run`.
- */
-interface CliProjectModeConfig extends CliConfigBase {
-  /**
-   * Project list expanded by `vieval run`.
-   *
-   * @default [{ name: 'default' }]
-   */
-  projects?: CliProjectConfig[];
-  comparisons?: never;
-  workspaces?: never;
-}
-/**
- * Workspace mode config placeholder for future workspace orchestration.
- */
-interface CliWorkspaceModeConfig extends CliConfigBase {
-  workspaces: CliWorkspaceConfig[];
-  projects?: never;
-  comparisons?: never;
-}
-/**
- * Comparison mode config for `vieval compare`.
- */
-interface CliComparisonModeConfig extends CliConfigBase {
-  comparisons: CliComparisonConfig[];
-  projects?: never;
-  workspaces?: never;
-}
-/**
- * Top-level CLI config loaded from `vieval.config.*`.
- *
- * Exactly one top-level mode is allowed:
- * - `projects`
- * - `workspaces`
- * - `comparisons`
- */
-type CliConfig = CliProjectModeConfig | CliWorkspaceModeConfig | CliComparisonModeConfig;
 /**
  * Helper used by `vieval.config.*` for better type inference.
  */
@@ -377,6 +389,15 @@ declare const defineConfig: import("c12").DefineConfig<CliConfig, import("c12").
 declare function loadEnv(mode: string, envDir: string, prefixes?: string | string[]): NodeJS.ProcessEnv;
 //#endregion
 //#region src/dsl/task.d.ts
+/**
+ * Per-case registration options for `caseOf`.
+ */
+interface CaseRegistrationOptions<TInput> extends TaskExecutionPolicy {
+  /**
+   * Optional case input payload.
+   */
+  input: TInput;
+}
 /**
  * Runtime context provided to a task case callback.
  */
@@ -388,26 +409,26 @@ interface CaseRunContext<TInput> extends TaskRunContext {
     inputs: TInput;
   };
   /**
-   * Overrides one case score family with a custom normalized value.
+   * Emits one custom case metric into report events.
    *
    * Use when:
-   * - one case computes a benchmark-native score that should flow into run aggregation
+   * - tasks need structured benchmark metadata beyond exact/judge score families
    *
    * Expects:
-   * - `score` to stay in the `0..1` range
+   * - `name` to be a stable metric identifier
+   * - `value` to be JSON-serializable
    */
-  score: (score: number, kind?: RunScoreKind) => void;
+  metric: (name: string, value: TelemetryAttributeValue) => void;
   /**
-   * Emits one custom case metric into report events.
+   * Overrides one case score family with a custom normalized value.
    *
    * Use when:
-   * - tasks need structured benchmark metadata beyond exact/judge score families
+   * - one case computes a benchmark-native score that should flow into run aggregation
    *
    * Expects:
-   * - `name` to be a stable metric identifier
-   * - `value` to be JSON-serializable
+   * - `score` to stay in the `0..1` range
    */
-  metric: (name: string, value: TelemetryAttributeValue) => void;
+  score: (score: number, kind?: RunScoreKind) => void;
   /**
    * Cooperative abort signal for the current case execution.
    */
@@ -436,15 +457,6 @@ interface CasesFromInputsOptions extends TaskExecutionPolicy {
    */
   concurrency?: number;
 }
-/**
- * Per-case registration options for `caseOf`.
- */
-interface CaseRegistrationOptions<TInput> extends TaskExecutionPolicy {
-  /**
-   * Optional case input payload.
-   */
-  input: TInput;
-}
 /**
  * Builder callbacks passed into `describeTask`.
  */
@@ -465,10 +477,6 @@ interface DescribeTaskBuilder {
  * Options for `describeTask`.
  */
 interface DescribeTaskOptions extends TaskExecutionPolicy {
-  /**
-   * Optional description override.
-   */
-  description?: string;
   /**
    * Optional task-local concurrency overrides.
    *
@@ -482,6 +490,10 @@ interface DescribeTaskOptions extends TaskExecutionPolicy {
    * @default inherited from project or CLI concurrency settings
    */
   concurrency?: TaskConcurrencyConfig;
+  /**
+   * Optional description override.
+   */
+  description?: string;
 }
 /**
  * Registers one case in the currently active task scope.
@@ -499,7 +511,7 @@ declare function casesFromInputs<TInput>(namePrefix: string, inputs: readonly TI
  * - task behavior should be declared with `caseOf` and `casesFromInputs`
  * - business agent code should be imported and run from eval task files
  */
-declare function describeTask(name: string, build: ((builder: DescribeTaskBuilder) => void) | (() => void), options?: DescribeTaskOptions): {
+declare function describeTask(name: string, build: (() => void) | ((builder: DescribeTaskBuilder) => void), options?: DescribeTaskOptions): {
   readonly description: string;
   readonly name: string;
   readonly task: {