npm - evalsense - Versions diffs - 0.3.2 → 0.4.0 - Mend

evalsense 0.3.2 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

package/README.md +235 -98
package/dist/{chunk-BFGA2NUB.cjs → chunk-4BKZPVY4.cjs} +13 -6
package/dist/chunk-4BKZPVY4.cjs.map +1 -0
package/dist/{chunk-IYLSY7NX.js → chunk-IUVDDMJ3.js} +13 -6
package/dist/chunk-IUVDDMJ3.js.map +1 -0
package/dist/chunk-NCCQRZ2Y.cjs +1141 -0
package/dist/chunk-NCCQRZ2Y.cjs.map +1 -0
package/dist/chunk-TDGWDK2L.js +1108 -0
package/dist/chunk-TDGWDK2L.js.map +1 -0
package/dist/cli.cjs +11 -11
package/dist/cli.js +1 -1
package/dist/index-CATqAHNK.d.cts +416 -0
package/dist/index-CoMpaW-K.d.ts +416 -0
package/dist/index.cjs +507 -580
package/dist/index.cjs.map +1 -1
package/dist/index.d.cts +210 -161
package/dist/index.d.ts +210 -161
package/dist/index.js +455 -524
package/dist/index.js.map +1 -1
package/dist/metrics/index.cjs +103 -342
package/dist/metrics/index.cjs.map +1 -1
package/dist/metrics/index.d.cts +260 -31
package/dist/metrics/index.d.ts +260 -31
package/dist/metrics/index.js +24 -312
package/dist/metrics/index.js.map +1 -1
package/dist/metrics/opinionated/index.cjs +5 -5
package/dist/metrics/opinionated/index.d.cts +2 -163
package/dist/metrics/opinionated/index.d.ts +2 -163
package/dist/metrics/opinionated/index.js +1 -1
package/dist/{types-C71p0wzM.d.cts → types-D0hzfyKm.d.cts} +1 -13
package/dist/{types-C71p0wzM.d.ts → types-D0hzfyKm.d.ts} +1 -13
package/package.json +1 -1
package/dist/chunk-BFGA2NUB.cjs.map +0 -1
package/dist/chunk-IYLSY7NX.js.map +0 -1
package/dist/chunk-RZFLCWTW.cjs +0 -942
package/dist/chunk-RZFLCWTW.cjs.map +0 -1
package/dist/chunk-Z3U6AUWX.js +0 -925
package/dist/chunk-Z3U6AUWX.js.map +0 -1

package/dist/index.js CHANGED Viewed

@@ -1,8 +1,6 @@
-import { getCurrentSuite, setCurrentSuite, addSuite, addTestToCurrentSuite, DatasetError, IntegrityError, buildConfusionMatrix, getTruePositives, getFalsePositives, getFalseNegatives, getSupport, AssertionError, recordAssertion, recordFieldMetrics } from './chunk-IYLSY7NX.js';
-export { AssertionError, ConfigurationError, ConsoleReporter, DatasetError, EvalSenseError, ExitCodes, IntegrityError, JsonReporter, TestExecutionError, buildConfusionMatrix, discoverEvalFiles, executeEvalFiles, formatConfusionMatrix, getExitCode, parseReport } from './chunk-IYLSY7NX.js';
+import { getCurrentSuite, setCurrentSuite, addSuite, addTestToCurrentSuite, IntegrityError, buildConfusionMatrix, getTruePositives, getFalsePositives, getFalseNegatives, getSupport, AssertionError, recordFieldMetrics, recordAssertion } from './chunk-IUVDDMJ3.js';
+export { AssertionError, ConfigurationError, ConsoleReporter, DatasetError, EvalSenseError, ExitCodes, IntegrityError, JsonReporter, TestExecutionError, buildConfusionMatrix, discoverEvalFiles, executeEvalFiles, formatConfusionMatrix, getExitCode, parseReport } from './chunk-IUVDDMJ3.js';
 import './chunk-DGUM43GV.js';
-import { readFileSync } from 'fs';
-import { resolve, extname } from 'path';
 // src/core/describe.ts
 function describe(name, fn) {
@@ -91,136 +89,6 @@ function evalTestOnly(name, fn) {
 }
 evalTest.skip = evalTestSkip;
 evalTest.only = evalTestOnly;
-function loadDataset(path) {
-  const absolutePath = resolve(process.cwd(), path);
-  const ext = extname(absolutePath).toLowerCase();
-  let records;
-  try {
-    const content = readFileSync(absolutePath, "utf-8");
-    if (ext === ".ndjson" || ext === ".jsonl") {
-      records = parseNDJSON(content);
-    } else if (ext === ".json") {
-      records = parseJSON(content);
-    } else {
-      throw new DatasetError(
-        `Unsupported file format: ${ext}. Use .json, .ndjson, or .jsonl`,
-        path
-      );
-    }
-  } catch (error) {
-    if (error instanceof DatasetError) {
-      throw error;
-    }
-    const message = error instanceof Error ? error.message : String(error);
-    throw new DatasetError(`Failed to load dataset from ${path}: ${message}`, path);
-  }
-  return {
-    records,
-    metadata: {
-      source: path,
-      count: records.length,
-      loadedAt: /* @__PURE__ */ new Date()
-    }
-  };
-}
-function parseJSON(content) {
-  const parsed = JSON.parse(content);
-  if (!Array.isArray(parsed)) {
-    throw new DatasetError("JSON dataset must be an array of records");
-  }
-  return parsed;
-}
-function parseNDJSON(content) {
-  const lines = content.split("\n").filter((line) => line.trim() !== "");
-  const records = [];
-  for (let i = 0; i < lines.length; i++) {
-    const line = lines[i];
-    if (line === void 0) continue;
-    try {
-      records.push(JSON.parse(line));
-    } catch {
-      throw new DatasetError(`Invalid JSON at line ${i + 1} in NDJSON file`);
-    }
-  }
-  return records;
-}
-function createDataset(records, source = "inline") {
-  return {
-    records,
-    metadata: {
-      source,
-      count: records.length,
-      loadedAt: /* @__PURE__ */ new Date()
-    }
-  };
-}
-// src/dataset/run-model.ts
-async function runModel(dataset, modelFn) {
-  const startTime = Date.now();
-  const predictions = [];
-  const aligned = [];
-  for (const record of dataset.records) {
-    const id = getRecordId(record);
-    const prediction = await modelFn(record);
-    if (prediction.id !== id) {
-      throw new DatasetError(
-        `Prediction ID mismatch: expected "${id}", got "${prediction.id}". Model function must return the same ID as the input record.`
-      );
-    }
-    predictions.push(prediction);
-    aligned.push({
-      id,
-      actual: { ...prediction },
-      expected: { ...record }
-    });
-  }
-  return {
-    predictions,
-    aligned,
-    duration: Date.now() - startTime
-  };
-}
-function getRecordId(record) {
-  const id = record.id ?? record._id;
-  if (id === void 0 || id === null) {
-    throw new DatasetError('Dataset records must have an "id" or "_id" field for alignment');
-  }
-  return String(id);
-}
-async function runModelParallel(dataset, modelFn, concurrency = 10) {
-  const startTime = Date.now();
-  const results = [];
-  for (let i = 0; i < dataset.records.length; i += concurrency) {
-    const batch = dataset.records.slice(i, i + concurrency);
-    const batchResults = await Promise.all(
-      batch.map(async (record) => {
-        const prediction = await modelFn(record);
-        return { prediction, record };
-      })
-    );
-    results.push(...batchResults);
-  }
-  const predictions = [];
-  const aligned = [];
-  for (const { prediction, record } of results) {
-    const id = getRecordId(record);
-    if (prediction.id !== id) {
-      throw new DatasetError(`Prediction ID mismatch: expected "${id}", got "${prediction.id}".`);
-    }
-    predictions.push(prediction);
-    aligned.push({
-      id,
-      actual: { ...prediction },
-      expected: { ...record }
-    });
-  }
-  return {
-    predictions,
-    aligned,
-    duration: Date.now() - startTime
-  };
-}
 // src/dataset/alignment.ts
 function alignByKey(predictions, expected, options = {}) {
@@ -293,14 +161,14 @@ function filterComplete(aligned, field) {
 }
 // src/dataset/integrity.ts
-function checkIntegrity(dataset, options = {}) {
+function checkIntegrity(records, options = {}) {
   const { requiredFields = [], throwOnFailure = false } = options;
   const seenIds = /* @__PURE__ */ new Map();
   const missingIds = [];
   const duplicateIds = [];
   const missingFields = [];
-  for (let i = 0; i < dataset.records.length; i++) {
-    const record = dataset.records[i];
+  for (let i = 0; i < records.length; i++) {
+    const record = records[i];
     if (!record) continue;
     const id = record.id ?? record._id;
     if (id === void 0 || id === null) {
@@ -327,7 +195,7 @@ function checkIntegrity(dataset, options = {}) {
   const valid = missingIds.length === 0 && duplicateIds.length === 0 && missingFields.length === 0;
   const result = {
     valid,
-    totalRecords: dataset.records.length,
+    totalRecords: records.length,
     missingIds,
     duplicateIds,
     missingFields
@@ -520,6 +388,91 @@ function calculatePercentageAbove(values, threshold) {
   return countAbove / values.length;
 }
+// src/assertions/metric-matcher.ts
+var MetricMatcher = class {
+  context;
+  constructor(context) {
+    this.context = context;
+  }
+  formatMetricValue(value) {
+    if (this.context.formatValue) {
+      return this.context.formatValue(value);
+    }
+    if (value >= 0 && value <= 1) {
+      return `${(value * 100).toFixed(1)}%`;
+    }
+    return value.toFixed(4);
+  }
+  createAssertion(operator, threshold, passed) {
+    const { metricName, metricValue, fieldName, targetClass } = this.context;
+    const formattedActual = this.formatMetricValue(metricValue);
+    const formattedThreshold = this.formatMetricValue(threshold);
+    const classInfo = targetClass ? ` for "${targetClass}"` : "";
+    const operatorText = {
+      ">=": "at least",
+      ">": "above",
+      "<=": "at most",
+      "<": "below",
+      "===": "equal to"
+    }[operator];
+    const message = passed ? `${metricName}${classInfo} ${formattedActual} is ${operatorText} ${formattedThreshold}` : `${metricName}${classInfo} ${formattedActual} is not ${operatorText} ${formattedThreshold}`;
+    return {
+      type: metricName.toLowerCase().replace(/\s+/g, "").replace(/²/g, "2"),
+      passed,
+      message,
+      expected: threshold,
+      actual: metricValue,
+      field: fieldName,
+      class: targetClass
+    };
+  }
+  recordAndReturn(result) {
+    this.context.assertions.push(result);
+    recordAssertion(result);
+    return this.context.parent;
+  }
+  /**
+   * Assert that the metric is greater than or equal to the threshold (>=)
+   */
+  toBeAtLeast(threshold) {
+    const passed = this.context.metricValue >= threshold;
+    const result = this.createAssertion(">=", threshold, passed);
+    return this.recordAndReturn(result);
+  }
+  /**
+   * Assert that the metric is strictly greater than the threshold (>)
+   */
+  toBeAbove(threshold) {
+    const passed = this.context.metricValue > threshold;
+    const result = this.createAssertion(">", threshold, passed);
+    return this.recordAndReturn(result);
+  }
+  /**
+   * Assert that the metric is less than or equal to the threshold (<=)
+   */
+  toBeAtMost(threshold) {
+    const passed = this.context.metricValue <= threshold;
+    const result = this.createAssertion("<=", threshold, passed);
+    return this.recordAndReturn(result);
+  }
+  /**
+   * Assert that the metric is strictly less than the threshold (<)
+   */
+  toBeBelow(threshold) {
+    const passed = this.context.metricValue < threshold;
+    const result = this.createAssertion("<", threshold, passed);
+    return this.recordAndReturn(result);
+  }
+  /**
+   * Assert that the metric equals the expected value (with optional tolerance for floats)
+   */
+  toEqual(expected, tolerance = 1e-9) {
+    const passed = Math.abs(this.context.metricValue - expected) <= tolerance;
+    const result = this.createAssertion("===", expected, passed);
+    return this.recordAndReturn(result);
+  }
+};
 // src/assertions/binarize.ts
 var BinarizeSelector = class {
   fieldName;
@@ -551,149 +504,127 @@ var BinarizeSelector = class {
       }
     }
   }
+  // ============================================================================
+  // Classification Metric Getters
+  // ============================================================================
   /**
-   * Asserts that accuracy is above a threshold
+   * Access accuracy metric for assertions
+   * @example
+   * expectStats(predictions, groundTruth)
+   *   .field("score")
+   *   .binarize(0.5)
+   *   .accuracy.toBeAtLeast(0.8)
    */
-  toHaveAccuracyAbove(threshold) {
+  get accuracy() {
     const metrics = computeClassificationMetrics(this.binaryActual, this.binaryExpected);
-    const passed = metrics.accuracy >= threshold;
-    const result = {
-      type: "accuracy",
-      passed,
-      message: passed ? `Accuracy ${(metrics.accuracy * 100).toFixed(1)}% is above ${(threshold * 100).toFixed(1)}% (binarized at ${this.threshold})` : `Accuracy ${(metrics.accuracy * 100).toFixed(1)}% is below threshold ${(threshold * 100).toFixed(1)}% (binarized at ${this.threshold})`,
-      expected: threshold,
-      actual: metrics.accuracy,
-      field: this.fieldName
-    };
-    this.assertions.push(result);
-    recordAssertion(result);
-    return this;
+    return new MetricMatcher({
+      parent: this,
+      metricName: "Accuracy",
+      metricValue: metrics.accuracy,
+      fieldName: this.fieldName,
+      assertions: this.assertions
+    });
   }
   /**
-   * Asserts that precision is above a threshold
-   * @param classOrThreshold - Either the class (true/false) or threshold
-   * @param threshold - Threshold when class is specified
+   * Access F1 score metric for assertions (macro average)
+   * @example
+   * expectStats(predictions, groundTruth)
+   *   .field("score")
+   *   .binarize(0.5)
+   *   .f1.toBeAtLeast(0.75)
    */
-  toHavePrecisionAbove(classOrThreshold, threshold) {
+  get f1() {
     const metrics = computeClassificationMetrics(this.binaryActual, this.binaryExpected);
-    let actualPrecision;
-    let targetClass;
-    let actualThreshold;
-    if (typeof classOrThreshold === "number") {
-      actualPrecision = metrics.macroAvg.precision;
-      actualThreshold = classOrThreshold;
-    } else {
-      targetClass = String(classOrThreshold);
-      actualThreshold = threshold;
-      const classMetrics = metrics.perClass[targetClass];
-      if (!classMetrics) {
-        throw new AssertionError(
-          `Class "${targetClass}" not found in binarized predictions`,
-          targetClass,
-          Object.keys(metrics.perClass),
-          this.fieldName
-        );
-      }
-      actualPrecision = classMetrics.precision;
-    }
-    const passed = actualPrecision >= actualThreshold;
-    const result = {
-      type: "precision",
-      passed,
-      message: passed ? `Precision${targetClass ? ` for ${targetClass}` : ""} ${(actualPrecision * 100).toFixed(1)}% is above ${(actualThreshold * 100).toFixed(1)}%` : `Precision${targetClass ? ` for ${targetClass}` : ""} ${(actualPrecision * 100).toFixed(1)}% is below threshold ${(actualThreshold * 100).toFixed(1)}%`,
-      expected: actualThreshold,
-      actual: actualPrecision,
-      field: this.fieldName,
-      class: targetClass
-    };
-    this.assertions.push(result);
-    recordAssertion(result);
-    return this;
+    return new MetricMatcher({
+      parent: this,
+      metricName: "F1",
+      metricValue: metrics.macroAvg.f1,
+      fieldName: this.fieldName,
+      assertions: this.assertions
+    });
   }
   /**
-   * Asserts that recall is above a threshold
-   * @param classOrThreshold - Either the class (true/false) or threshold
-   * @param threshold - Threshold when class is specified
+   * Access precision metric for assertions
+   * @param targetClass - Optional boolean class (true/false). If omitted, uses macro average
+   * @example
+   * expectStats(predictions, groundTruth)
+   *   .field("score")
+   *   .binarize(0.5)
+   *   .precision(true).toBeAtLeast(0.7)
    */
-  toHaveRecallAbove(classOrThreshold, threshold) {
+  precision(targetClass) {
     const metrics = computeClassificationMetrics(this.binaryActual, this.binaryExpected);
-    let actualRecall;
-    let targetClass;
-    let actualThreshold;
-    if (typeof classOrThreshold === "number") {
-      actualRecall = metrics.macroAvg.recall;
-      actualThreshold = classOrThreshold;
+    let metricValue;
+    let classKey;
+    if (targetClass === void 0) {
+      metricValue = metrics.macroAvg.precision;
     } else {
-      targetClass = String(classOrThreshold);
-      actualThreshold = threshold;
-      const classMetrics = metrics.perClass[targetClass];
+      classKey = String(targetClass);
+      const classMetrics = metrics.perClass[classKey];
       if (!classMetrics) {
         throw new AssertionError(
-          `Class "${targetClass}" not found in binarized predictions`,
-          targetClass,
+          `Class "${classKey}" not found in binarized predictions`,
+          classKey,
           Object.keys(metrics.perClass),
           this.fieldName
         );
       }
-      actualRecall = classMetrics.recall;
+      metricValue = classMetrics.precision;
     }
-    const passed = actualRecall >= actualThreshold;
-    const result = {
-      type: "recall",
-      passed,
-      message: passed ? `Recall${targetClass ? ` for ${targetClass}` : ""} ${(actualRecall * 100).toFixed(1)}% is above ${(actualThreshold * 100).toFixed(1)}%` : `Recall${targetClass ? ` for ${targetClass}` : ""} ${(actualRecall * 100).toFixed(1)}% is below threshold ${(actualThreshold * 100).toFixed(1)}%`,
-      expected: actualThreshold,
-      actual: actualRecall,
-      field: this.fieldName,
-      class: targetClass
-    };
-    this.assertions.push(result);
-    recordAssertion(result);
-    return this;
+    return new MetricMatcher({
+      parent: this,
+      metricName: "Precision",
+      metricValue,
+      fieldName: this.fieldName,
+      targetClass: classKey,
+      assertions: this.assertions
+    });
   }
   /**
-   * Asserts that F1 score is above a threshold
+   * Access recall metric for assertions
+   * @param targetClass - Optional boolean class (true/false). If omitted, uses macro average
+   * @example
+   * expectStats(predictions, groundTruth)
+   *   .field("score")
+   *   .binarize(0.5)
+   *   .recall(true).toBeAtLeast(0.7)
    */
-  toHaveF1Above(classOrThreshold, threshold) {
+  recall(targetClass) {
     const metrics = computeClassificationMetrics(this.binaryActual, this.binaryExpected);
-    let actualF1;
-    let targetClass;
-    let actualThreshold;
-    if (typeof classOrThreshold === "number") {
-      actualF1 = metrics.macroAvg.f1;
-      actualThreshold = classOrThreshold;
+    let metricValue;
+    let classKey;
+    if (targetClass === void 0) {
+      metricValue = metrics.macroAvg.recall;
     } else {
-      targetClass = String(classOrThreshold);
-      actualThreshold = threshold;
-      const classMetrics = metrics.perClass[targetClass];
+      classKey = String(targetClass);
+      const classMetrics = metrics.perClass[classKey];
       if (!classMetrics) {
         throw new AssertionError(
-          `Class "${targetClass}" not found in binarized predictions`,
-          targetClass,
+          `Class "${classKey}" not found in binarized predictions`,
+          classKey,
           Object.keys(metrics.perClass),
           this.fieldName
         );
       }
-      actualF1 = classMetrics.f1;
+      metricValue = classMetrics.recall;
     }
-    const passed = actualF1 >= actualThreshold;
-    const result = {
-      type: "f1",
-      passed,
-      message: passed ? `F1${targetClass ? ` for ${targetClass}` : ""} ${(actualF1 * 100).toFixed(1)}% is above ${(actualThreshold * 100).toFixed(1)}%` : `F1${targetClass ? ` for ${targetClass}` : ""} ${(actualF1 * 100).toFixed(1)}% is below threshold ${(actualThreshold * 100).toFixed(1)}%`,
-      expected: actualThreshold,
-      actual: actualF1,
-      field: this.fieldName,
-      class: targetClass
-    };
-    this.assertions.push(result);
-    recordAssertion(result);
-    return this;
+    return new MetricMatcher({
+      parent: this,
+      metricName: "Recall",
+      metricValue,
+      fieldName: this.fieldName,
+      targetClass: classKey,
+      assertions: this.assertions
+    });
   }
+  // ============================================================================
+  // Display Methods
+  // ============================================================================
   /**
-   * Includes the confusion matrix in the report
+   * Displays the confusion matrix in the report
+   * This is not an assertion - it always passes and just records the matrix for display
    */
-  toHaveConfusionMatrix() {
+  displayConfusionMatrix() {
     const metrics = computeClassificationMetrics(this.binaryActual, this.binaryExpected);
     const fieldResult = {
       field: this.fieldName,
@@ -712,6 +643,9 @@ var BinarizeSelector = class {
     recordAssertion(result);
     return this;
   }
+  // ============================================================================
+  // Utility Methods
+  // ============================================================================
   /**
    * Gets computed metrics
    */
@@ -726,6 +660,73 @@ var BinarizeSelector = class {
   }
 };
+// src/assertions/percentage-matcher.ts
+var PercentageMatcher = class {
+  context;
+  constructor(context) {
+    this.context = context;
+  }
+  formatPercentage(value) {
+    return `${(value * 100).toFixed(1)}%`;
+  }
+  createAssertion(operator, percentageThreshold, passed) {
+    const { fieldName, valueThreshold, direction, actualPercentage } = this.context;
+    const operatorText = {
+      ">=": "at least",
+      ">": "above",
+      "<=": "at most",
+      "<": "below"
+    }[operator];
+    const directionText = direction === "above" ? "above" : "below or equal to";
+    const message = passed ? `${this.formatPercentage(actualPercentage)} of '${fieldName}' values are ${directionText} ${valueThreshold} (expected ${operatorText} ${this.formatPercentage(percentageThreshold)})` : `Only ${this.formatPercentage(actualPercentage)} of '${fieldName}' values are ${directionText} ${valueThreshold} (expected ${operatorText} ${this.formatPercentage(percentageThreshold)})`;
+    return {
+      type: direction === "above" ? "percentageAbove" : "percentageBelow",
+      passed,
+      message,
+      expected: percentageThreshold,
+      actual: actualPercentage,
+      field: fieldName
+    };
+  }
+  recordAndReturn(result) {
+    this.context.assertions.push(result);
+    recordAssertion(result);
+    return this.context.parent;
+  }
+  /**
+   * Assert that the percentage is greater than or equal to the threshold (>=)
+   */
+  toBeAtLeast(percentageThreshold) {
+    const passed = this.context.actualPercentage >= percentageThreshold;
+    const result = this.createAssertion(">=", percentageThreshold, passed);
+    return this.recordAndReturn(result);
+  }
+  /**
+   * Assert that the percentage is strictly greater than the threshold (>)
+   */
+  toBeAbove(percentageThreshold) {
+    const passed = this.context.actualPercentage > percentageThreshold;
+    const result = this.createAssertion(">", percentageThreshold, passed);
+    return this.recordAndReturn(result);
+  }
+  /**
+   * Assert that the percentage is less than or equal to the threshold (<=)
+   */
+  toBeAtMost(percentageThreshold) {
+    const passed = this.context.actualPercentage <= percentageThreshold;
+    const result = this.createAssertion("<=", percentageThreshold, passed);
+    return this.recordAndReturn(result);
+  }
+  /**
+   * Assert that the percentage is strictly less than the threshold (<)
+   */
+  toBeBelow(percentageThreshold) {
+    const passed = this.context.actualPercentage < percentageThreshold;
+    const result = this.createAssertion("<", percentageThreshold, passed);
+    return this.recordAndReturn(result);
+  }
+};
 // src/assertions/field-selector.ts
 var FieldSelector = class {
   aligned;
@@ -762,83 +763,93 @@ var FieldSelector = class {
     }
   }
   /**
-   * Asserts that accuracy is above a threshold
+   * Validates that ground truth exists and both arrays contain numeric values.
+   * Returns the filtered numeric arrays for regression metrics.
    */
-  toHaveAccuracyAbove(threshold) {
+  validateRegressionInputs() {
+    this.validateGroundTruth();
+    const numericActual = filterNumericValues(this.actualValues);
+    const numericExpected = filterNumericValues(this.expectedValues);
+    if (numericActual.length === 0) {
+      throw new AssertionError(
+        `Regression metric requires numeric values, but field "${this.fieldName}" has no numeric actual values.`,
+        void 0,
+        void 0,
+        this.fieldName
+      );
+    }
+    if (numericExpected.length === 0) {
+      throw new AssertionError(
+        `Regression metric requires numeric values, but field "${this.fieldName}" has no numeric expected values.`,
+        void 0,
+        void 0,
+        this.fieldName
+      );
+    }
+    if (numericActual.length !== numericExpected.length) {
+      throw new AssertionError(
+        `Regression metric requires equal-length arrays, but got ${numericActual.length} actual and ${numericExpected.length} expected values.`,
+        numericExpected.length,
+        numericActual.length,
+        this.fieldName
+      );
+    }
+    return { actual: numericActual, expected: numericExpected };
+  }
+  // ============================================================================
+  // Classification Metric Getters
+  // ============================================================================
+  /**
+   * Access accuracy metric for assertions
+   * @example
+   * expectStats(predictions, groundTruth)
+   *   .field("sentiment")
+   *   .accuracy.toBeAtLeast(0.8)
+   */
+  get accuracy() {
     this.validateGroundTruth();
     const metrics = computeClassificationMetrics(this.actualValues, this.expectedValues);
-    const passed = metrics.accuracy >= threshold;
-    const result = {
-      type: "accuracy",
-      passed,
-      message: passed ? `Accuracy ${(metrics.accuracy * 100).toFixed(1)}% is above ${(threshold * 100).toFixed(1)}%` : `Accuracy ${(metrics.accuracy * 100).toFixed(1)}% is below threshold ${(threshold * 100).toFixed(1)}%`,
-      expected: threshold,
-      actual: metrics.accuracy,
-      field: this.fieldName
-    };
-    this.assertions.push(result);
-    recordAssertion(result);
-    return this;
+    return new MetricMatcher({
+      parent: this,
+      metricName: "Accuracy",
+      metricValue: metrics.accuracy,
+      fieldName: this.fieldName,
+      assertions: this.assertions
+    });
   }
   /**
-   * Asserts that precision is above a threshold
-   * @param classOrThreshold - Either the class name or threshold (if class is omitted, uses macro average)
-   * @param threshold - Threshold when class is specified
+   * Access F1 score metric for assertions (macro average)
+   * @example
+   * expectStats(predictions, groundTruth)
+   *   .field("sentiment")
+   *   .f1.toBeAtLeast(0.75)
    */
-  toHavePrecisionAbove(classOrThreshold, threshold) {
+  get f1() {
     this.validateGroundTruth();
     const metrics = computeClassificationMetrics(this.actualValues, this.expectedValues);
-    let actualPrecision;
-    let targetClass;
-    let actualThreshold;
-    if (typeof classOrThreshold === "number") {
-      actualPrecision = metrics.macroAvg.precision;
-      actualThreshold = classOrThreshold;
-    } else {
-      targetClass = classOrThreshold;
-      actualThreshold = threshold;
-      const classMetrics = metrics.perClass[targetClass];
-      if (!classMetrics) {
-        throw new AssertionError(
-          `Class "${targetClass}" not found in predictions`,
-          targetClass,
-          Object.keys(metrics.perClass),
-          this.fieldName
-        );
-      }
-      actualPrecision = classMetrics.precision;
-    }
-    const passed = actualPrecision >= actualThreshold;
-    const result = {
-      type: "precision",
-      passed,
-      message: passed ? `Precision${targetClass ? ` for "${targetClass}"` : ""} ${(actualPrecision * 100).toFixed(1)}% is above ${(actualThreshold * 100).toFixed(1)}%` : `Precision${targetClass ? ` for "${targetClass}"` : ""} ${(actualPrecision * 100).toFixed(1)}% is below threshold ${(actualThreshold * 100).toFixed(1)}%`,
-      expected: actualThreshold,
-      actual: actualPrecision,
-      field: this.fieldName,
-      class: targetClass
-    };
-    this.assertions.push(result);
-    recordAssertion(result);
-    return this;
+    return new MetricMatcher({
+      parent: this,
+      metricName: "F1",
+      metricValue: metrics.macroAvg.f1,
+      fieldName: this.fieldName,
+      assertions: this.assertions
+    });
   }
   /**
-   * Asserts that recall is above a threshold
-   * @param classOrThreshold - Either the class name or threshold (if class is omitted, uses macro average)
-   * @param threshold - Threshold when class is specified
+   * Access precision metric for assertions
+   * @param targetClass - Optional class name. If omitted, uses macro average
+   * @example
+   * expectStats(predictions, groundTruth)
+   *   .field("sentiment")
+   *   .precision("positive").toBeAtLeast(0.7)
    */
-  toHaveRecallAbove(classOrThreshold, threshold) {
+  precision(targetClass) {
     this.validateGroundTruth();
     const metrics = computeClassificationMetrics(this.actualValues, this.expectedValues);
-    let actualRecall;
-    let targetClass;
-    let actualThreshold;
-    if (typeof classOrThreshold === "number") {
-      actualRecall = metrics.macroAvg.recall;
-      actualThreshold = classOrThreshold;
+    let metricValue;
+    if (targetClass === void 0) {
+      metricValue = metrics.macroAvg.precision;
     } else {
-      targetClass = classOrThreshold;
-      actualThreshold = threshold;
       const classMetrics = metrics.perClass[targetClass];
       if (!classMetrics) {
         throw new AssertionError(
@@ -848,39 +859,32 @@ var FieldSelector = class {
           this.fieldName
         );
       }
-      actualRecall = classMetrics.recall;
+      metricValue = classMetrics.precision;
     }
-    const passed = actualRecall >= actualThreshold;
-    const result = {
-      type: "recall",
-      passed,
-      message: passed ? `Recall${targetClass ? ` for "${targetClass}"` : ""} ${(actualRecall * 100).toFixed(1)}% is above ${(actualThreshold * 100).toFixed(1)}%` : `Recall${targetClass ? ` for "${targetClass}"` : ""} ${(actualRecall * 100).toFixed(1)}% is below threshold ${(actualThreshold * 100).toFixed(1)}%`,
-      expected: actualThreshold,
-      actual: actualRecall,
-      field: this.fieldName,
-      class: targetClass
-    };
-    this.assertions.push(result);
-    recordAssertion(result);
-    return this;
+    return new MetricMatcher({
+      parent: this,
+      metricName: "Precision",
+      metricValue,
+      fieldName: this.fieldName,
+      targetClass,
+      assertions: this.assertions
+    });
   }
   /**
-   * Asserts that F1 score is above a threshold
-   * @param classOrThreshold - Either the class name or threshold (if class is omitted, uses macro average)
-   * @param threshold - Threshold when class is specified
+   * Access recall metric for assertions
+   * @param targetClass - Optional class name. If omitted, uses macro average
+   * @example
+   * expectStats(predictions, groundTruth)
+   *   .field("sentiment")
+   *   .recall("positive").toBeAtLeast(0.7)
    */
-  toHaveF1Above(classOrThreshold, threshold) {
+  recall(targetClass) {
     this.validateGroundTruth();
     const metrics = computeClassificationMetrics(this.actualValues, this.expectedValues);
-    let actualF1;
-    let targetClass;
-    let actualThreshold;
-    if (typeof classOrThreshold === "number") {
-      actualF1 = metrics.macroAvg.f1;
-      actualThreshold = classOrThreshold;
+    let metricValue;
+    if (targetClass === void 0) {
+      metricValue = metrics.macroAvg.recall;
     } else {
-      targetClass = classOrThreshold;
-      actualThreshold = threshold;
       const classMetrics = metrics.perClass[targetClass];
       if (!classMetrics) {
         throw new AssertionError(
@@ -890,244 +894,171 @@ var FieldSelector = class {
           this.fieldName
         );
       }
-      actualF1 = classMetrics.f1;
+      metricValue = classMetrics.recall;
     }
-    const passed = actualF1 >= actualThreshold;
-    const result = {
-      type: "f1",
-      passed,
-      message: passed ? `F1${targetClass ? ` for "${targetClass}"` : ""} ${(actualF1 * 100).toFixed(1)}% is above ${(actualThreshold * 100).toFixed(1)}%` : `F1${targetClass ? ` for "${targetClass}"` : ""} ${(actualF1 * 100).toFixed(1)}% is below threshold ${(actualThreshold * 100).toFixed(1)}%`,
-      expected: actualThreshold,
-      actual: actualF1,
-      field: this.fieldName,
-      class: targetClass
-    };
-    this.assertions.push(result);
-    recordAssertion(result);
-    return this;
+    return new MetricMatcher({
+      parent: this,
+      metricName: "Recall",
+      metricValue,
+      fieldName: this.fieldName,
+      targetClass,
+      assertions: this.assertions
+    });
   }
+  // ============================================================================
+  // Regression Metric Getters
+  // ============================================================================
   /**
-   * Includes the confusion matrix in the report
+   * Access Mean Absolute Error metric for assertions
+   * @example
+   * expectStats(predictions, groundTruth)
+   *   .field("score")
+   *   .mae.toBeAtMost(0.1)
    */
-  toHaveConfusionMatrix() {
-    const metrics = computeClassificationMetrics(this.actualValues, this.expectedValues);
-    const fieldResult = {
-      field: this.fieldName,
-      metrics,
-      binarized: false
-    };
-    recordFieldMetrics(fieldResult);
-    const result = {
-      type: "confusionMatrix",
-      passed: true,
-      message: `Confusion matrix recorded for field "${this.fieldName}"`,
-      field: this.fieldName
-    };
-    this.assertions.push(result);
-    recordAssertion(result);
-    return this;
+  get mae() {
+    const { actual, expected } = this.validateRegressionInputs();
+    const metrics = computeRegressionMetrics(actual, expected);
+    return new MetricMatcher({
+      parent: this,
+      metricName: "MAE",
+      metricValue: metrics.mae,
+      fieldName: this.fieldName,
+      assertions: this.assertions,
+      formatValue: (v) => v.toFixed(4)
+    });
   }
   /**
-   * Asserts that a percentage of values are below or equal to a threshold.
-   * This is a distributional assertion that only looks at actual values (no ground truth required).
-   *
+   * Access Root Mean Squared Error metric for assertions
+   * @example
+   * expectStats(predictions, groundTruth)
+   *   .field("score")
+   *   .rmse.toBeAtMost(0.15)
+   */
+  get rmse() {
+    const { actual, expected } = this.validateRegressionInputs();
+    const metrics = computeRegressionMetrics(actual, expected);
+    return new MetricMatcher({
+      parent: this,
+      metricName: "RMSE",
+      metricValue: metrics.rmse,
+      fieldName: this.fieldName,
+      assertions: this.assertions,
+      formatValue: (v) => v.toFixed(4)
+    });
+  }
+  /**
+   * Access R-squared (coefficient of determination) metric for assertions
+   * @example
+   * expectStats(predictions, groundTruth)
+   *   .field("score")
+   *   .r2.toBeAtLeast(0.8)
+   */
+  get r2() {
+    const { actual, expected } = this.validateRegressionInputs();
+    const metrics = computeRegressionMetrics(actual, expected);
+    return new MetricMatcher({
+      parent: this,
+      metricName: "R\xB2",
+      metricValue: metrics.r2,
+      fieldName: this.fieldName,
+      assertions: this.assertions,
+      formatValue: (v) => v.toFixed(4)
+    });
+  }
+  // ============================================================================
+  // Distribution Assertions
+  // ============================================================================
+  /**
+   * Assert on the percentage of values below or equal to a threshold
    * @param valueThreshold - The value threshold to compare against
-   * @param percentageThreshold - The minimum percentage (0-1) of values that should be <= valueThreshold
-   * @returns this for method chaining
-   *
    * @example
-   * // Assert that 90% of confidence scores are below 0.5
    * expectStats(predictions)
    *   .field("confidence")
-   *   .toHavePercentageBelow(0.5, 0.9)
+   *   .percentageBelow(0.5).toBeAtLeast(0.9)
    */
-  toHavePercentageBelow(valueThreshold, percentageThreshold) {
+  percentageBelow(valueThreshold) {
     const numericActual = filterNumericValues(this.actualValues);
     if (numericActual.length === 0) {
       throw new AssertionError(
         `Field '${this.fieldName}' contains no numeric values (found 0 numeric out of ${this.actualValues.length} total values)`,
-        percentageThreshold,
+        void 0,
         void 0,
         this.fieldName
       );
     }
     const actualPercentage = calculatePercentageBelow(numericActual, valueThreshold);
-    const passed = actualPercentage >= percentageThreshold;
-    const result = {
-      type: "percentageBelow",
-      passed,
-      message: passed ? `${(actualPercentage * 100).toFixed(1)}% of '${this.fieldName}' values are below or equal to ${valueThreshold} (expected >= ${(percentageThreshold * 100).toFixed(1)}%)` : `Only ${(actualPercentage * 100).toFixed(1)}% of '${this.fieldName}' values are below or equal to ${valueThreshold} (expected >= ${(percentageThreshold * 100).toFixed(1)}%)`,
-      expected: percentageThreshold,
-      actual: actualPercentage,
-      field: this.fieldName
-    };
-    this.assertions.push(result);
-    recordAssertion(result);
-    return this;
+    return new PercentageMatcher({
+      parent: this,
+      fieldName: this.fieldName,
+      valueThreshold,
+      direction: "below",
+      actualPercentage,
+      assertions: this.assertions
+    });
   }
   /**
-   * Asserts that a percentage of values are above a threshold.
-   * This is a distributional assertion that only looks at actual values (no ground truth required).
-   *
+   * Assert on the percentage of values above a threshold
    * @param valueThreshold - The value threshold to compare against
-   * @param percentageThreshold - The minimum percentage (0-1) of values that should be > valueThreshold
-   * @returns this for method chaining
-   *
    * @example
-   * // Assert that 80% of quality scores are above 0.7
    * expectStats(predictions)
    *   .field("quality")
-   *   .toHavePercentageAbove(0.7, 0.8)
+   *   .percentageAbove(0.7).toBeAtLeast(0.8)
    */
-  toHavePercentageAbove(valueThreshold, percentageThreshold) {
+  percentageAbove(valueThreshold) {
     const numericActual = filterNumericValues(this.actualValues);
     if (numericActual.length === 0) {
       throw new AssertionError(
         `Field '${this.fieldName}' contains no numeric values (found 0 numeric out of ${this.actualValues.length} total values)`,
-        percentageThreshold,
+        void 0,
         void 0,
         this.fieldName
       );
     }
     const actualPercentage = calculatePercentageAbove(numericActual, valueThreshold);
-    const passed = actualPercentage >= percentageThreshold;
-    const result = {
-      type: "percentageAbove",
-      passed,
-      message: passed ? `${(actualPercentage * 100).toFixed(1)}% of '${this.fieldName}' values are above ${valueThreshold} (expected >= ${(percentageThreshold * 100).toFixed(1)}%)` : `Only ${(actualPercentage * 100).toFixed(1)}% of '${this.fieldName}' values are above ${valueThreshold} (expected >= ${(percentageThreshold * 100).toFixed(1)}%)`,
-      expected: percentageThreshold,
-      actual: actualPercentage,
-      field: this.fieldName
-    };
-    this.assertions.push(result);
-    recordAssertion(result);
-    return this;
+    return new PercentageMatcher({
+      parent: this,
+      fieldName: this.fieldName,
+      valueThreshold,
+      direction: "above",
+      actualPercentage,
+      assertions: this.assertions
+    });
   }
   // ============================================================================
-  // Regression Assertions
+  // Display Methods
   // ============================================================================
   /**
-   * Validates that ground truth exists and both arrays contain numeric values.
-   * Returns the filtered numeric arrays for regression metrics.
-   */
-  validateRegressionInputs() {
-    this.validateGroundTruth();
-    const numericActual = filterNumericValues(this.actualValues);
-    const numericExpected = filterNumericValues(this.expectedValues);
-    if (numericActual.length === 0) {
-      throw new AssertionError(
-        `Regression metric requires numeric values, but field "${this.fieldName}" has no numeric actual values.`,
-        void 0,
-        void 0,
-        this.fieldName
-      );
-    }
-    if (numericExpected.length === 0) {
-      throw new AssertionError(
-        `Regression metric requires numeric values, but field "${this.fieldName}" has no numeric expected values.`,
-        void 0,
-        void 0,
-        this.fieldName
-      );
-    }
-    if (numericActual.length !== numericExpected.length) {
-      throw new AssertionError(
-        `Regression metric requires equal-length arrays, but got ${numericActual.length} actual and ${numericExpected.length} expected values.`,
-        numericExpected.length,
-        numericActual.length,
-        this.fieldName
-      );
-    }
-    return { actual: numericActual, expected: numericExpected };
-  }
-  /**
-   * Asserts that Mean Absolute Error is below a threshold.
-   * Requires numeric values in both actual and expected.
-   *
-   * @param threshold - Maximum allowed MAE
-   * @returns this for method chaining
-   *
+   * Displays the confusion matrix in the report
+   * This is not an assertion - it always passes and just records the matrix for display
    * @example
    * expectStats(predictions, groundTruth)
-   *   .field("score")
-   *   .toHaveMAEBelow(0.1)
+   *   .field("sentiment")
+   *   .accuracy.toBeAtLeast(0.8)
+   *   .displayConfusionMatrix()
    */
-  toHaveMAEBelow(threshold) {
-    const { actual, expected } = this.validateRegressionInputs();
-    const metrics = computeRegressionMetrics(actual, expected);
-    const passed = metrics.mae <= threshold;
-    const result = {
-      type: "mae",
-      passed,
-      message: passed ? `MAE ${metrics.mae.toFixed(4)} is below ${threshold}` : `MAE ${metrics.mae.toFixed(4)} exceeds threshold ${threshold}`,
-      expected: threshold,
-      actual: metrics.mae,
-      field: this.fieldName
-    };
-    this.assertions.push(result);
-    recordAssertion(result);
-    return this;
-  }
-  /**
-   * Asserts that Root Mean Squared Error is below a threshold.
-   * Requires numeric values in both actual and expected.
-   *
-   * @param threshold - Maximum allowed RMSE
-   * @returns this for method chaining
-   *
-   * @example
-   * expectStats(predictions, groundTruth)
-   *   .field("score")
-   *   .toHaveRMSEBelow(0.15)
-   */
-  toHaveRMSEBelow(threshold) {
-    const { actual, expected } = this.validateRegressionInputs();
-    const metrics = computeRegressionMetrics(actual, expected);
-    const passed = metrics.rmse <= threshold;
-    const result = {
-      type: "rmse",
-      passed,
-      message: passed ? `RMSE ${metrics.rmse.toFixed(4)} is below ${threshold}` : `RMSE ${metrics.rmse.toFixed(4)} exceeds threshold ${threshold}`,
-      expected: threshold,
-      actual: metrics.rmse,
-      field: this.fieldName
+  displayConfusionMatrix() {
+    const metrics = computeClassificationMetrics(this.actualValues, this.expectedValues);
+    const fieldResult = {
+      field: this.fieldName,
+      metrics,
+      binarized: false
     };
-    this.assertions.push(result);
-    recordAssertion(result);
-    return this;
-  }
-  /**
-   * Asserts that R-squared (coefficient of determination) is above a threshold.
-   * R² measures how well the predictions explain the variance in expected values.
-   * R² = 1.0 means perfect prediction, R² = 0 means prediction is no better than mean.
-   * Requires numeric values in both actual and expected.
-   *
-   * @param threshold - Minimum required R² value (0-1)
-   * @returns this for method chaining
-   *
-   * @example
-   * expectStats(predictions, groundTruth)
-   *   .field("score")
-   *   .toHaveR2Above(0.8)
-   */
-  toHaveR2Above(threshold) {
-    const { actual, expected } = this.validateRegressionInputs();
-    const metrics = computeRegressionMetrics(actual, expected);
-    const passed = metrics.r2 >= threshold;
+    recordFieldMetrics(fieldResult);
     const result = {
-      type: "r2",
-      passed,
-      message: passed ? `R\xB2 ${metrics.r2.toFixed(4)} is above ${threshold}` : `R\xB2 ${metrics.r2.toFixed(4)} is below threshold ${threshold}`,
-      expected: threshold,
-      actual: metrics.r2,
+      type: "confusionMatrix",
+      passed: true,
+      message: `Confusion matrix recorded for field "${this.fieldName}"`,
       field: this.fieldName
     };
     this.assertions.push(result);
     recordAssertion(result);
     return this;
   }
+  // ============================================================================
+  // Utility Methods
+  // ============================================================================
   /**
-   * Gets the computed metrics for this field
+   * Gets the computed classification metrics for this field
    */
   getMetrics() {
     return computeClassificationMetrics(this.actualValues, this.expectedValues);
@@ -1160,7 +1091,7 @@ function normalizeInput(input) {
     }));
   }
   throw new Error(
-    "Invalid input to expectStats(): expected ModelRunResult, Prediction[], or AlignedRecord[]"
+    "Invalid input to expectStats(): expected { aligned: AlignedRecord[] }, Prediction[], or AlignedRecord[]"
   );
 }
 function expectStats(inputOrActual, expected, options) {
@@ -1205,6 +1136,6 @@ var ExpectStats = class {
   }
 };
-export { afterAll, afterEach, alignByKey, beforeAll, beforeEach, checkIntegrity, computeAccuracy, computeClassificationMetrics, computeF1, computePrecision, computeRecall, createDataset, describe, evalTest, expectStats, extractFieldValues, filterComplete, it, loadDataset, runModel, runModelParallel, test, validatePredictions };
+export { afterAll, afterEach, alignByKey, beforeAll, beforeEach, checkIntegrity, computeAccuracy, computeClassificationMetrics, computeF1, computePrecision, computeRecall, describe, evalTest, expectStats, extractFieldValues, filterComplete, it, test, validatePredictions };
 //# sourceMappingURL=index.js.map
 //# sourceMappingURL=index.js.map