npm - @m4trix/evals - Versions diffs - 0.4.0 → 0.7.0 - Mend

@m4trix/evals 0.4.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

package/dist/index.d.ts CHANGED Viewed

@@ -121,23 +121,24 @@ type TagMatcher = string | RegExp;
 type PathMatcher = string | RegExp;
 type InputOrBuilder<T> = T | (() => T);
-interface TestCaseDescribeConfig<TI extends Schema.Schema.Any, TOutputDefinition = unknown> {
+interface TestCaseDescribeConfig<TI extends Schema.Schema.Any, TO extends Schema.Schema.Any = Schema.Schema<unknown>> {
     name: string;
     tags: string[];
     inputSchema: TI;
     input: InputOrBuilder<Schema.Schema.Type<TI>>;
-    outputDefinition?: InputOrBuilder<TOutputDefinition>;
-    outputDefintion?: InputOrBuilder<TOutputDefinition>;
+    outputSchema?: TO;
+    output?: InputOrBuilder<Schema.Schema.Type<TO>>;
 }
-declare class TestCase<TInput = unknown, TOutputDefinition = unknown> {
+declare class TestCase<TInput = unknown, TOutput = unknown> {
     private readonly _config;
     private constructor();
-    static describe<TI extends Schema.Schema.Any, TOutputDefinition = unknown>(config: TestCaseDescribeConfig<TI, TOutputDefinition>): TestCase<Schema.Schema.Type<TI>, TOutputDefinition>;
+    static describe<TI extends Schema.Schema.Any, TO extends Schema.Schema.Any = Schema.Schema<unknown>>(config: TestCaseDescribeConfig<TI, TO>): TestCase<Schema.Schema.Type<TI>, Schema.Schema.Type<TO>>;
     getName(): string;
     getTags(): string[];
     getInputSchema(): Schema.Schema.Any;
     getInput(): TInput;
-    getOutputDefinition(): TOutputDefinition | undefined;
+    getOutputSchema(): Schema.Schema.Any | undefined;
+    getOutput(): TOutput | undefined;
 }
 interface DatasetDefineConfig {
@@ -209,7 +210,7 @@ interface MetricDef<TData = unknown> {
 declare const Metric: {
     of<TData>(config: {
         id: string;
-        name?: string;
+        name?: string | undefined;
         format: (data: TData) => string;
     }): MetricDef<TData>;
 };
@@ -233,7 +234,7 @@ interface ScoreDef<TData = unknown> {
 declare const Score: {
     of<TData>(config: {
         id: string;
-        name?: string;
+        name?: string | undefined;
         displayStrategy: ScoreDisplayStrategy;
         format: (data: TData) => string;
     }): ScoreDef<TData>;
@@ -309,7 +310,7 @@ type RunnerEvent = {
         passed: boolean;
         metrics?: ReadonlyArray<MetricItem>;
     }>;
-    outputDefinition?: unknown;
+    output?: unknown;
     errorMessage?: string;
 } | {
     type: 'RunCompleted';
@@ -374,4 +375,4 @@ interface BinaryScoreData {
 }
 declare const binaryScore: ScoreDef<BinaryScoreData>;
-export { type BinaryScoreData, type CliState, type CollectedDataset, type CollectedEvaluator, type CollectedTestCase, type ConfigType, Dataset, type EvalDataset, type EvalMiddleware, type EvalRun, type EvalsData, type EvaluateArgs, Evaluator, type EvaluatorOption, type LatencyData, type M4trixEvalConfig, type M4trixEvalConfigDiscovery, Metric, type MetricDef, type MetricItem, type PathMatcher, type PercentScoreData, type RunDatasetRequest, type RunSnapshot, type RunnerApi, type RunnerConfig, type RunnerConfigOverrides, type RunnerDiscoveryConfig, type RunnerEvent, Score, type ScoreDef, type ScoreDisplayStrategy, type ScoreItem, type SearchTestCasesQuery, type StartupArgs, type TagMatcher, TestCase, type TokenCountData, type ViewLevel, binaryScore, createRunner, defaultRunnerConfig, defineConfig, getMetricById, getScoreById, latencyMetric, loadMockData, loadRunnerData, parseStartupArgs, percentScore, tokenCountMetric, withRunnerConfig };
+export { BinaryScoreData, CliState, CollectedDataset, CollectedEvaluator, CollectedTestCase, ConfigType, Dataset, EvalDataset, EvalMiddleware, EvalRun, EvalsData, EvaluateArgs, Evaluator, EvaluatorOption, LatencyData, M4trixEvalConfig, M4trixEvalConfigDiscovery, Metric, MetricDef, MetricItem, PathMatcher, PercentScoreData, RunDatasetRequest, RunSnapshot, RunnerApi, RunnerConfig, RunnerConfigOverrides, RunnerDiscoveryConfig, RunnerEvent, Score, ScoreDef, ScoreDisplayStrategy, ScoreItem, SearchTestCasesQuery, StartupArgs, TagMatcher, TestCase, TokenCountData, ViewLevel, binaryScore, createRunner, defaultRunnerConfig, defineConfig, getMetricById, getScoreById, latencyMetric, loadMockData, loadRunnerData, parseStartupArgs, percentScore, tokenCountMetric, withRunnerConfig };

package/dist/index.js CHANGED Viewed

@@ -307,8 +307,8 @@ var TestCase = class _TestCase {
       tags: config.tags,
       inputSchema: config.inputSchema,
       input: config.input,
-      outputDefinition: config.outputDefinition,
-      outputDefintion: config.outputDefintion
+      outputSchema: config.outputSchema,
+      output: config.output
     });
   }
   getName() {
@@ -323,12 +323,14 @@ var TestCase = class _TestCase {
   getInput() {
     return resolve(this._config.input);
   }
-  getOutputDefinition() {
-    const value = this._config.outputDefinition ?? this._config.outputDefintion;
-    if (value === void 0) {
+  getOutputSchema() {
+    return this._config.outputSchema;
+  }
+  getOutput() {
+    if (this._config.output === void 0) {
       return void 0;
     }
-    return resolve(value);
+    return resolve(this._config.output);
   }
 };
@@ -864,12 +866,12 @@ function normalizeResult(result) {
   const metrics = Array.isArray(obj.metrics) ? obj.metrics : void 0;
   return { scores, metrics };
 }
-function readOutputDefinition(testCase) {
+function readOutput(testCase) {
   const candidate = testCase;
-  if (typeof candidate.getOutputDefinition !== "function") {
+  if (typeof candidate.getOutput !== "function") {
     return void 0;
   }
-  return candidate.getOutputDefinition();
+  return candidate.getOutput();
 }
 function nowIsoForFile() {
   return (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
@@ -899,7 +901,7 @@ var executeRunTask = (task, publishEvent, persistenceQueue, updateSnapshot) => E
     const started = Date.now();
     const evaluatorScores = [];
     let testCaseError;
-    const outputDefinition = readOutputDefinition(testCaseItem.testCase);
+    const output = readOutput(testCaseItem.testCase);
     for (const { id: evaluatorId, evaluator } of task.evaluators) {
       const evaluateFn = evaluator.getEvaluateFn();
       if (!evaluateFn) {
@@ -914,7 +916,7 @@ var executeRunTask = (task, publishEvent, persistenceQueue, updateSnapshot) => E
             evaluateFn({
               input: testCaseItem.testCase.getInput(),
               ctx,
-              output: outputDefinition
+              output
             })
           )
         );
@@ -947,7 +949,7 @@ var executeRunTask = (task, publishEvent, persistenceQueue, updateSnapshot) => E
       passed: testCasePassed,
       durationMs: Date.now() - started,
       evaluatorScores,
-      outputDefinition,
+      output,
       errorMessage: testCaseError
     };
     updateSnapshot(task.runId, (snapshot) => ({