npm - vieval - Versions diffs - 0.0.10 → 0.0.12 - Mend

vieval 0.0.10 → 0.0.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (53) hide show

package/README.md +31 -31
package/dist/bin/vieval.mjs +1 -1
package/dist/bin/vieval.mjs.map +1 -1
package/dist/cli/index.d.mts +1 -1
package/dist/cli/index.mjs +1 -1
package/dist/{cli-DTDgaqeI.mjs → cli-uzS81IPd.mjs} +1483 -1483
package/dist/cli-uzS81IPd.mjs.map +1 -0
package/dist/config.d.mts +1 -1
package/dist/config.mjs +1 -1
package/dist/config.mjs.map +1 -1
package/dist/core/assertions/index.d.mts +156 -156
package/dist/core/assertions/index.mjs +82 -82
package/dist/core/assertions/index.mjs.map +1 -1
package/dist/core/inference-executors/index.d.mts +37 -37
package/dist/core/inference-executors/index.mjs +54 -53
package/dist/core/inference-executors/index.mjs.map +1 -1
package/dist/core/processors/results/index.d.mts +18 -18
package/dist/core/processors/results/index.mjs.map +1 -1
package/dist/core/runner/index.d.mts +2 -2
package/dist/core/runner/index.mjs +259 -259
package/dist/core/runner/index.mjs.map +1 -1
package/dist/core/scheduler/index.d.mts +1 -1
package/dist/core/scheduler/index.mjs +65 -65
package/dist/core/scheduler/index.mjs.map +1 -1
package/dist/{env-DfWZy_n4.d.mts → env-Br6jaWGL.d.mts} +9 -9
package/dist/{env-nV5rVErX.mjs → env-egxaJtNn.mjs} +8 -8
package/dist/env-egxaJtNn.mjs.map +1 -0
package/dist/{expect-extensions-DCSqlneN.mjs → expect-extensions-BKdEPt3h.mjs} +46 -46
package/dist/expect-extensions-BKdEPt3h.mjs.map +1 -0
package/dist/expect.d.mts +1 -3
package/dist/expect.mjs +1 -1
package/dist/expect.mjs.map +1 -1
package/dist/{index-D_aMeWqO.d.mts → index-BLIlhiWT.d.mts} +565 -565
package/dist/{index-Bg0atWBF.d.mts → index-CIaJClcC.d.mts} +48 -48
package/dist/index.d.mts +208 -197
package/dist/index.mjs +148 -148
package/dist/index.mjs.map +1 -1
package/dist/{models-pBSRUZhY.mjs → models-CaCOUPZw.mjs} +1 -1
package/dist/{models-pBSRUZhY.mjs.map → models-CaCOUPZw.mjs.map} +1 -1
package/dist/plugins/chat-models/index.d.mts +279 -279
package/dist/plugins/chat-models/index.mjs +360 -360
package/dist/plugins/chat-models/index.mjs.map +1 -1
package/dist/{queue-DsZQkZO_.mjs → queue-BL86z2W_.mjs} +1 -1
package/dist/{queue-DsZQkZO_.mjs.map → queue-BL86z2W_.mjs.map} +1 -1
package/dist/{registry-DMnwE_mY.mjs → registry-BK7k6X81.mjs} +294 -294
package/dist/registry-BK7k6X81.mjs.map +1 -0
package/dist/testing/expect-extensions.d.mts +27 -27
package/dist/testing/expect-extensions.mjs +1 -1
package/package.json +12 -12
package/dist/cli-DTDgaqeI.mjs.map +0 -1
package/dist/env-nV5rVErX.mjs.map +0 -1
package/dist/expect-extensions-DCSqlneN.mjs.map +0 -1
package/dist/registry-DMnwE_mY.mjs.map +0 -1

package/dist/config.d.mts CHANGED Viewed

@@ -1,2 +1,2 @@
-import { C as TaskDefinition, D as TaskRunContext, E as TaskReporterHooks, O as TaskRunOutput, R as ModelDefinition, S as TaskConcurrencyConfig, T as TaskReporterEventPayload, _ as ScopedMatrices, a as CliOpenTelemetryReportingConfig, b as TaskCaseReporterPayload, c as EvalDefinition, d as MatrixAxisValues, f as MatrixDefinition, g as MatrixValue, h as MatrixRow, i as Awaitable, l as EvalModule, m as MatrixPrimitive, n as defineEval, o as CliReportingConfig, p as MatrixLayer, r as defineTask, s as CollectedEvalEntry, t as ConfigHookPlugin, u as EvalModuleMap, v as TaskAutoRetryDelay, w as TaskExecutionPolicy, x as TaskCaseState, y as TaskCaseReporterEndPayload, z as resolveModelByName } from "./index-D_aMeWqO.mjs";
+import { C as TaskDefinition, D as TaskRunContext, E as TaskReporterHooks, O as TaskRunOutput, R as ModelDefinition, S as TaskConcurrencyConfig, T as TaskReporterEventPayload, _ as ScopedMatrices, a as CliOpenTelemetryReportingConfig, b as TaskCaseReporterPayload, c as EvalDefinition, d as MatrixAxisValues, f as MatrixDefinition, g as MatrixValue, h as MatrixRow, i as Awaitable, l as EvalModule, m as MatrixPrimitive, n as defineEval, o as CliReportingConfig, p as MatrixLayer, r as defineTask, s as CollectedEvalEntry, t as ConfigHookPlugin, u as EvalModuleMap, v as TaskAutoRetryDelay, w as TaskExecutionPolicy, x as TaskCaseState, y as TaskCaseReporterEndPayload, z as resolveModelByName } from "./index-BLIlhiWT.mjs";
 export { Awaitable, CliOpenTelemetryReportingConfig, CliReportingConfig, CollectedEvalEntry, ConfigHookPlugin, EvalDefinition, EvalModule, EvalModuleMap, MatrixAxisValues, MatrixDefinition, MatrixLayer, MatrixPrimitive, MatrixRow, MatrixValue, ModelDefinition, ScopedMatrices, TaskAutoRetryDelay, TaskCaseReporterEndPayload, TaskCaseReporterPayload, TaskCaseState, TaskConcurrencyConfig, TaskDefinition, TaskExecutionPolicy, TaskReporterEventPayload, TaskReporterHooks, TaskRunContext, TaskRunOutput, defineEval, defineTask, resolveModelByName };

package/dist/config.mjs CHANGED Viewed

@@ -1,4 +1,4 @@
-import { t as resolveModelByName } from "./models-pBSRUZhY.mjs";
+import { t as resolveModelByName } from "./models-CaCOUPZw.mjs";
 //#region src/config/define.ts
 /**
 * Returns the provided vieval definition while preserving literal field types.

package/dist/config.mjs.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"config.mjs","names":[],"sources":["../src/config/define.ts"],"sourcesContent":["import type { EvalDefinition, TaskDefinition } from './types'\n\n/*\n Returns the provided vieval definition while preserving literal field types.\n /\nexport function defineEval<const TDefinition extends EvalDefinition>(definition: TDefinition): TDefinition {\n return definition\n}\n\n/\n Returns the provided task definition while preserving literal field types.\n */\nexport function defineTask<const TDefinition extends TaskDefinition>(definition: TDefinition): TDefinition {\n return definition\n}\n"],"mappings":";;;;;AAKA,SAAgB,WAAqD,YAAsC;~~AACzG~~,~~QAAO;;;;;AAMT~~,SAAgB,WAAqD,YAAsC;~~AACzG~~,~~QAAO~~"}
1	+ {"version":3,"file":"config.mjs","names":[],"sources":["../src/config/define.ts"],"sourcesContent":["import type { EvalDefinition, TaskDefinition } from './types'\n\n/*\n Returns the provided vieval definition while preserving literal field types.\n /\nexport function defineEval<const TDefinition extends EvalDefinition>(definition: TDefinition): TDefinition {\n return definition\n}\n\n/\n Returns the provided task definition while preserving literal field types.\n */\nexport function defineTask<const TDefinition extends TaskDefinition>(definition: TDefinition): TDefinition {\n return definition\n}\n"],"mappings":";;;;;AAKA,SAAgB,WAAqD,YAAsC;CACzG,OAAO;AACT;;;;AAKA,SAAgB,WAAqD,YAAsC;CACzG,OAAO;AACT"}

package/dist/core/assertions/index.d.mts CHANGED Viewed

@@ -1,47 +1,30 @@
-import { X as RunScoreKind, Y as RunScore } from "../../index-D_aMeWqO.mjs";
+import { X as RunScoreKind, Y as RunScore } from "../../index-BLIlhiWT.mjs";
 //#region src/core/assertions/index.d.ts
 /**
- * Stores mutable evaluation state for stateful assertion flows.
- *
- * Use when:
- * - assertions need to share counters, rolling metrics, or memoized values
- * - a scenario evaluates multiple steps and expects state-aware checks
- */
-type AssertionState = Map<string, unknown>;
-/**
- * Represents one tool call emitted by a model response.
+ * Async assertion function used by eval scenarios.
  */
-interface ToolCall {
-  /**
-   * Tool name used by the call.
-   */
-  name: string;
-  /**
-   * Tool arguments payload.
-   */
-  args: unknown;
-}
+type Assertion = (context: AssertionContext) => Promise<AssertionOutcome>;
 /**
  * Normalized assertion context for one model output.
  */
 interface AssertionContext {
   /**
-   * Plain text model output used by text assertions.
+   * Shared mutable state for stateful assertion measurement.
    */
-  text: string;
+  state: AssertionState;
   /**
    * Optional structured output parsed from the model response.
    */
   structuredOutput?: unknown;
   /**
-   * Optional tool calls extracted from the model response.
+   * Plain text model output used by text assertions.
    */
-  toolCalls?: readonly ToolCall[];
+  text: string;
   /**
-   * Shared mutable state for stateful assertion measurement.
+   * Optional tool calls extracted from the model response.
    */
-  state: AssertionState;
+  toolCalls?: readonly ToolCall[];
 }
 /**
  * Result for one assertion evaluation.
@@ -51,70 +34,66 @@ interface AssertionOutcome {
    * Stable assertion id.
    */
   id: string;
-  /**
-   * Assertion family emitted as run score kind.
-   */
-  scoreKind: RunScoreKind;
   /**
    * Whether the assertion passed.
    */
   pass: boolean;
+  /**
+   * Human-readable reason for logs and reports.
+   */
+  reason: string;
   /**
    * Normalized score in the `0..1` range.
    */
   score: number;
   /**
-   * Human-readable reason for logs and reports.
+   * Assertion family emitted as run score kind.
    */
-  reason: string;
+  scoreKind: RunScoreKind;
 }
 /**
- * Async assertion function used by eval scenarios.
- */
-type Assertion = (context: AssertionContext) => Promise<AssertionOutcome>;
-/**
- * Normalizes text for matching.
+ * Stores mutable evaluation state for stateful assertion flows.
  *
- * Before: `"  Hello\nWorld  "`
- * After: `"hello world"`
+ * Use when:
+ * - assertions need to share counters, rolling metrics, or memoized values
+ * - a scenario evaluates multiple steps and expects state-aware checks
  */
-declare function normalizeMatchText(value: string, caseSensitive: boolean): string;
+type AssertionState = Map<string, unknown>;
 /**
- * Options for include-keyword assertions.
+ * Options for custom assertions.
  */
-interface MustIncludeAssertionOptions {
+interface CustomAssertionOptions {
   /**
-   * Stable assertion id.
+   * Custom evaluator callback.
    */
-  id: string;
+  evaluate: (context: AssertionContext) => Promise<{
+    pass: boolean;
+    reason: string;
+    score: number;
+  }> | {
+    pass: boolean;
+    reason: string;
+    score: number;
+  };
   /**
-   * Keywords that must be present.
+   * Stable assertion id.
    */
-  keywords: readonly string[];
+  id: string;
   /**
-   * Match mode for keywords.
-   *
-   * @default 'all'
+   * Score family emitted by this custom assertion.
    */
-  mode?: 'all' | 'any';
+  scoreKind: RunScoreKind;
+}
+/**
+ * Options for exclude-keyword assertions.
+ */
+interface MustExcludeAssertionOptions {
   /**
    * Case-sensitive matching toggle.
    *
    * @default false
    */
   caseSensitive?: boolean;
-}
-/**
- * Creates an assertion that requires specific keywords in model text.
- *
- * Example:
- * `expectMustInclude({ id: 'tone', keywords: ['calm', 'move'] })`
- */
-declare function expectMustInclude(options: MustIncludeAssertionOptions): Assertion;
-/**
- * Options for exclude-keyword assertions.
- */
-interface MustExcludeAssertionOptions {
   /**
    * Stable assertion id.
    */
@@ -123,156 +102,144 @@ interface MustExcludeAssertionOptions {
    * Keywords that must not appear.
    */
   keywords: readonly string[];
+}
+/**
+ * Options for include-keyword assertions.
+ */
+interface MustIncludeAssertionOptions {
   /**
    * Case-sensitive matching toggle.
    *
    * @default false
    */
   caseSensitive?: boolean;
-}
-/**
- * Creates an assertion that forbids specific keywords.
- *
- * Example:
- * `expectMustExclude({ id: 'no-engine-dump', keywords: ['bestmove', 'ponder'] })`
- */
-declare function expectMustExclude(options: MustExcludeAssertionOptions): Assertion;
-/**
- * Options for regular-expression assertions.
- */
-interface RegexAssertionOptions {
   /**
    * Stable assertion id.
    */
   id: string;
   /**
-   * Pattern to apply to model text.
+   * Keywords that must be present.
    */
-  pattern: RegExp;
+  keywords: readonly string[];
+  /**
+   * Match mode for keywords.
+   *
+   * @default 'all'
+   */
+  mode?: 'all' | 'any';
 }
 /**
- * Creates an assertion based on a regular expression.
- *
- * Example:
- * `expectRegex({ id: 'starts-with-act', pattern: /^<\|ACT:/ })`
- */
-declare function expectRegex(options: RegexAssertionOptions): Assertion;
-/**
- * Options for structured-output assertions.
+ * Options for regular-expression assertions.
  */
-interface StructuredOutputAssertionOptions<TValue> {
+interface RegexAssertionOptions {
   /**
    * Stable assertion id.
    */
   id: string;
   /**
-   * Runtime validator for structured output.
-   */
-  validate: (value: unknown) => value is TValue;
-  /**
-   * Optional failure reason.
+   * Pattern to apply to model text.
    */
-  failureReason?: string;
+  pattern: RegExp;
 }
 /**
- * Creates an assertion for structured model output.
- *
- * Example:
- * `expectStructuredOutput({ id: 'json-shape', validate: isMySchema })`
- */
-declare function expectStructuredOutput<TValue>(options: StructuredOutputAssertionOptions<TValue>): Assertion;
-/**
- * Options for tool-call argument assertions.
+ * Options for rubric assertions.
  */
-interface ToolCallArgsAssertionOptions {
+interface RubricAssertionOptions {
   /**
    * Stable assertion id.
    */
   id: string;
   /**
-   * Tool name to inspect.
+   * Async rubric judge callback.
    */
-  toolName: string;
+  judge: (context: AssertionContext) => Promise<RubricJudgeResult>;
   /**
-   * Runtime validator for tool arguments.
+   * Minimum passing score.
+   *
+   * @default 0.7
    */
-  validate: (args: unknown) => boolean;
+  minScore?: number;
 }
-/**
- * Creates an assertion for validating tool-call arguments.
- *
- * Example:
- * `expectToolCallArgs({ id: 'spark-command-shape', toolName: 'builtIn_sparkCommand', validate: isSparkArgs })`
- */
-declare function expectToolCallArgs(options: ToolCallArgsAssertionOptions): Assertion;
 /**
  * Rubric judge result returned by teacher-model or rubric logic.
  */
 interface RubricJudgeResult {
   /**
-   * Normalized score in the `0..1` range.
+   * Optional judge model id.
    */
-  score: number;
+  judgeModel?: string;
   /**
    * Judge explanation text.
    */
   reason: string;
   /**
-   * Optional judge model id.
+   * Normalized score in the `0..1` range.
    */
-  judgeModel?: string;
+  score: number;
 }
 /**
- * Options for rubric assertions.
+ * Options for structured-output assertions.
  */
-interface RubricAssertionOptions {
+interface StructuredOutputAssertionOptions<TValue> {
   /**
-   * Stable assertion id.
+   * Optional failure reason.
    */
-  id: string;
+  failureReason?: string;
   /**
-   * Async rubric judge callback.
+   * Stable assertion id.
    */
-  judge: (context: AssertionContext) => Promise<RubricJudgeResult>;
+  id: string;
   /**
-   * Minimum passing score.
-   *
-   * @default 0.7
+   * Runtime validator for structured output.
    */
-  minScore?: number;
+  validate: (value: unknown) => value is TValue;
 }
 /**
- * Creates a rubric assertion driven by teacher-model style scoring.
- *
- * Example:
- * `expectRubric({ id: 'human-like-tone', judge: judgeFn, minScore: 0.8 })`
+ * Represents one tool call emitted by a model response.
  */
-declare function expectRubric(options: RubricAssertionOptions): Assertion;
+interface ToolCall {
+  /**
+   * Tool arguments payload.
+   */
+  args: unknown;
+  /**
+   * Tool name used by the call.
+   */
+  name: string;
+}
 /**
- * Options for custom assertions.
+ * Options for tool-call argument assertions.
  */
-interface CustomAssertionOptions {
+interface ToolCallArgsAssertionOptions {
   /**
    * Stable assertion id.
    */
   id: string;
   /**
-   * Score family emitted by this custom assertion.
+   * Tool name to inspect.
    */
-  scoreKind: RunScoreKind;
+  toolName: string;
   /**
-   * Custom evaluator callback.
+   * Runtime validator for tool arguments.
    */
-  evaluate: (context: AssertionContext) => Promise<{
-    pass: boolean;
-    reason: string;
-    score: number;
-  }> | {
-    pass: boolean;
-    reason: string;
-    score: number;
-  };
+  validate: (args: unknown) => boolean;
 }
+/**
+ * Returns failing assertion outcomes in original order.
+ */
+declare function collectFailedAssertions(outcomes: readonly AssertionOutcome[]): AssertionOutcome[];
+/**
+ * Executes assertion list and returns all outcomes.
+ *
+ * Call stack:
+ *
+ * {@link evaluateAssertions}
+ *   -> `assertion(context)`
+ *     -> {@link AssertionOutcome}[]
+ */
+declare function evaluateAssertions(assertions: readonly Assertion[], context: Omit<AssertionContext, 'state'> & {
+  state?: AssertionState;
+}): Promise<AssertionOutcome[]>;
 /**
  * Creates a custom assertion with fully user-defined logic.
  *
@@ -280,6 +247,20 @@ interface CustomAssertionOptions {
  * `expectCustom({ id: 'stateful-window', scoreKind: 'exact', evaluate: (ctx) => ... })`
  */
 declare function expectCustom(options: CustomAssertionOptions): Assertion;
+/**
+ * Creates an assertion that forbids specific keywords.
+ *
+ * Example:
+ * `expectMustExclude({ id: 'no-engine-dump', keywords: ['bestmove', 'ponder'] })`
+ */
+declare function expectMustExclude(options: MustExcludeAssertionOptions): Assertion;
+/**
+ * Creates an assertion that requires specific keywords in model text.
+ *
+ * Example:
+ * `expectMustInclude({ id: 'tone', keywords: ['calm', 'move'] })`
+ */
+declare function expectMustInclude(options: MustIncludeAssertionOptions): Assertion;
 /**
  * Creates an inverse assertion.
  *
@@ -290,25 +271,44 @@ declare function expectNot(assertion: Assertion, options: {
   id: string;
 }): Assertion;
 /**
- * Executes assertion list and returns all outcomes.
+ * Creates an assertion based on a regular expression.
  *
- * Call stack:
+ * Example:
+ * `expectRegex({ id: 'starts-with-act', pattern: /^<\|ACT:/ })`
+ */
+declare function expectRegex(options: RegexAssertionOptions): Assertion;
+/**
+ * Creates a rubric assertion driven by teacher-model style scoring.
  *
- * {@link evaluateAssertions}
- *   -> `assertion(context)`
- *     -> {@link AssertionOutcome}[]
+ * Example:
+ * `expectRubric({ id: 'human-like-tone', judge: judgeFn, minScore: 0.8 })`
  */
-declare function evaluateAssertions(assertions: readonly Assertion[], context: Omit<AssertionContext, 'state'> & {
-  state?: AssertionState;
-}): Promise<AssertionOutcome[]>;
+declare function expectRubric(options: RubricAssertionOptions): Assertion;
 /**
- * Converts assertion outcomes to run-score tuples consumed by aggregation.
+ * Creates an assertion for structured model output.
+ *
+ * Example:
+ * `expectStructuredOutput({ id: 'json-shape', validate: isMySchema })`
  */
-declare function toRunScores(outcomes: readonly AssertionOutcome[]): RunScore[];
+declare function expectStructuredOutput<TValue>(options: StructuredOutputAssertionOptions<TValue>): Assertion;
 /**
- * Returns failing assertion outcomes in original order.
+ * Creates an assertion for validating tool-call arguments.
+ *
+ * Example:
+ * `expectToolCallArgs({ id: 'spark-command-shape', toolName: 'builtIn_sparkCommand', validate: isSparkArgs })`
  */
-declare function collectFailedAssertions(outcomes: readonly AssertionOutcome[]): AssertionOutcome[];
+declare function expectToolCallArgs(options: ToolCallArgsAssertionOptions): Assertion;
+/**
+ * Normalizes text for matching.
+ *
+ * Before: `"  Hello\nWorld  "`
+ * After: `"hello world"`
+ */
+declare function normalizeMatchText(value: string, caseSensitive: boolean): string;
+/**
+ * Converts assertion outcomes to run-score tuples consumed by aggregation.
+ */
+declare function toRunScores(outcomes: readonly AssertionOutcome[]): RunScore[];
 //#endregion
 export { Assertion, AssertionContext, AssertionOutcome, AssertionState, CustomAssertionOptions, MustExcludeAssertionOptions, MustIncludeAssertionOptions, RegexAssertionOptions, RubricAssertionOptions, RubricJudgeResult, StructuredOutputAssertionOptions, ToolCall, ToolCallArgsAssertionOptions, collectFailedAssertions, evaluateAssertions, expectCustom, expectMustExclude, expectMustInclude, expectNot, expectRegex, expectRubric, expectStructuredOutput, expectToolCallArgs, normalizeMatchText, toRunScores };
 //# sourceMappingURL=index.d.mts.map