evalsense 0.3.2 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/README.md +235 -98
  2. package/dist/{chunk-BFGA2NUB.cjs → chunk-4BKZPVY4.cjs} +13 -6
  3. package/dist/chunk-4BKZPVY4.cjs.map +1 -0
  4. package/dist/{chunk-IYLSY7NX.js → chunk-IUVDDMJ3.js} +13 -6
  5. package/dist/chunk-IUVDDMJ3.js.map +1 -0
  6. package/dist/chunk-NCCQRZ2Y.cjs +1141 -0
  7. package/dist/chunk-NCCQRZ2Y.cjs.map +1 -0
  8. package/dist/chunk-TDGWDK2L.js +1108 -0
  9. package/dist/chunk-TDGWDK2L.js.map +1 -0
  10. package/dist/cli.cjs +11 -11
  11. package/dist/cli.js +1 -1
  12. package/dist/index-CATqAHNK.d.cts +416 -0
  13. package/dist/index-CoMpaW-K.d.ts +416 -0
  14. package/dist/index.cjs +507 -580
  15. package/dist/index.cjs.map +1 -1
  16. package/dist/index.d.cts +210 -161
  17. package/dist/index.d.ts +210 -161
  18. package/dist/index.js +455 -524
  19. package/dist/index.js.map +1 -1
  20. package/dist/metrics/index.cjs +103 -342
  21. package/dist/metrics/index.cjs.map +1 -1
  22. package/dist/metrics/index.d.cts +260 -31
  23. package/dist/metrics/index.d.ts +260 -31
  24. package/dist/metrics/index.js +24 -312
  25. package/dist/metrics/index.js.map +1 -1
  26. package/dist/metrics/opinionated/index.cjs +5 -5
  27. package/dist/metrics/opinionated/index.d.cts +2 -163
  28. package/dist/metrics/opinionated/index.d.ts +2 -163
  29. package/dist/metrics/opinionated/index.js +1 -1
  30. package/dist/{types-C71p0wzM.d.cts → types-D0hzfyKm.d.cts} +1 -13
  31. package/dist/{types-C71p0wzM.d.ts → types-D0hzfyKm.d.ts} +1 -13
  32. package/package.json +1 -1
  33. package/dist/chunk-BFGA2NUB.cjs.map +0 -1
  34. package/dist/chunk-IYLSY7NX.js.map +0 -1
  35. package/dist/chunk-RZFLCWTW.cjs +0 -942
  36. package/dist/chunk-RZFLCWTW.cjs.map +0 -1
  37. package/dist/chunk-Z3U6AUWX.js +0 -925
  38. package/dist/chunk-Z3U6AUWX.js.map +0 -1
package/dist/index.d.ts CHANGED
@@ -1,5 +1,5 @@
1
- import { T as TestFn, D as Dataset, P as Prediction, A as AlignedRecord, I as IntegrityResult, C as ClassificationMetrics, c as AssertionResult, d as ConfusionMatrix, E as EvalReport, F as FieldMetricResult } from './types-C71p0wzM.js';
2
- export { e as CLIOptions, f as ClassMetrics, g as DatasetMetadata, h as EvalTest, i as ExitCode, j as ExitCodes, a as MetricConfig, M as MetricFn, b as MetricOutput, R as RegressionMetrics, S as Suite, k as SuiteResult, l as TestContext, m as TestResult } from './types-C71p0wzM.js';
1
+ import { T as TestFn, P as Prediction, A as AlignedRecord, I as IntegrityResult, b as AssertionResult, C as ClassificationMetrics, c as ConfusionMatrix, E as EvalReport, F as FieldMetricResult } from './types-D0hzfyKm.js';
2
+ export { d as CLIOptions, e as ClassMetrics, f as EvalTest, g as ExitCode, h as ExitCodes, i as MetricConfig, M as MetricFn, a as MetricOutput, R as RegressionMetrics, S as Suite, j as SuiteResult, k as TestContext, l as TestResult } from './types-D0hzfyKm.js';
3
3
 
4
4
  /**
5
5
  * describe() implementation - Jest-like test suite grouping
@@ -73,65 +73,6 @@ declare function evalTestSkip(name: string, _fn: TestFn): void;
73
73
  */
74
74
  declare function evalTestOnly(name: string, fn: TestFn): void;
75
75
 
76
- /**
77
- * Dataset loading functionality
78
- */
79
-
80
- /**
81
- * Loads a dataset from a JSON or NDJSON file
82
- *
83
- * @param path - Path to the dataset file (relative to cwd or absolute)
84
- * @returns Dataset with records and metadata
85
- *
86
- * @example
87
- * ```ts
88
- * const dataset = loadDataset("./fixtures/sentiment.json");
89
- * // dataset.records = [{ id: "1", text: "...", sentiment: "positive" }, ...]
90
- * ```
91
- */
92
- declare function loadDataset<T extends Record<string, unknown> = Record<string, unknown>>(path: string): Dataset<T>;
93
- /**
94
- * Creates a dataset from an array of records (for testing/programmatic use)
95
- */
96
- declare function createDataset<T extends Record<string, unknown>>(records: T[], source?: string): Dataset<T>;
97
-
98
- /**
99
- * runModel() - executes a model function against a dataset
100
- */
101
-
102
- /**
103
- * Model function signature - takes a record and returns a prediction
104
- */
105
- type ModelFn<T> = (record: T) => Prediction | Promise<Prediction>;
106
- /**
107
- * Result of running a model on a dataset
108
- */
109
- interface ModelRunResult {
110
- predictions: Prediction[];
111
- aligned: AlignedRecord[];
112
- duration: number;
113
- }
114
- /**
115
- * Runs a model function against each record in a dataset
116
- *
117
- * @param dataset - The dataset to process
118
- * @param modelFn - Function that processes each record and returns a prediction
119
- * @returns Aligned predictions with actual vs expected values
120
- *
121
- * @example
122
- * ```ts
123
- * const result = await runModel(dataset, (record) => ({
124
- * id: record.id,
125
- * sentiment: classify(record.text)
126
- * }));
127
- * ```
128
- */
129
- declare function runModel<T extends Record<string, unknown>>(dataset: Dataset<T>, modelFn: ModelFn<T>): Promise<ModelRunResult>;
130
- /**
131
- * Runs model in parallel with concurrency limit
132
- */
133
- declare function runModelParallel<T extends Record<string, unknown>>(dataset: Dataset<T>, modelFn: ModelFn<T>, concurrency?: number): Promise<ModelRunResult>;
134
-
135
76
  /**
136
77
  * Dataset alignment utilities
137
78
  * Aligns predictions with ground truth by ID
@@ -192,11 +133,11 @@ interface IntegrityOptions {
192
133
  /**
193
134
  * Checks dataset integrity - validates IDs and required fields
194
135
  *
195
- * @param dataset - Dataset to check
136
+ * @param records - Array of records to check
196
137
  * @param options - Integrity check options
197
138
  * @returns Integrity result with details
198
139
  */
199
- declare function checkIntegrity<T extends Record<string, unknown>>(dataset: Dataset<T>, options?: IntegrityOptions): IntegrityResult;
140
+ declare function checkIntegrity<T extends Record<string, unknown>>(records: T[], options?: IntegrityOptions): IntegrityResult;
200
141
  /**
201
142
  * Validates predictions against a dataset
202
143
  */
@@ -206,6 +147,51 @@ declare function validatePredictions(predictions: Prediction[], expectedIds: str
206
147
  extra: string[];
207
148
  };
208
149
 
150
+ /**
151
+ * MetricMatcher - provides Jest-like assertion methods for metrics
152
+ */
153
+
154
+ interface MetricMatcherContext<TParent> {
155
+ parent: TParent;
156
+ metricName: string;
157
+ metricValue: number;
158
+ fieldName: string;
159
+ targetClass?: string;
160
+ assertions: AssertionResult[];
161
+ formatValue?: (value: number) => string;
162
+ }
163
+ /**
164
+ * Matcher class for individual metric assertions
165
+ * Returns the parent selector to enable fluent chaining
166
+ */
167
+ declare class MetricMatcher<TParent> {
168
+ private context;
169
+ constructor(context: MetricMatcherContext<TParent>);
170
+ private formatMetricValue;
171
+ private createAssertion;
172
+ private recordAndReturn;
173
+ /**
174
+ * Assert that the metric is greater than or equal to the threshold (>=)
175
+ */
176
+ toBeAtLeast(threshold: number): TParent;
177
+ /**
178
+ * Assert that the metric is strictly greater than the threshold (>)
179
+ */
180
+ toBeAbove(threshold: number): TParent;
181
+ /**
182
+ * Assert that the metric is less than or equal to the threshold (<=)
183
+ */
184
+ toBeAtMost(threshold: number): TParent;
185
+ /**
186
+ * Assert that the metric is strictly less than the threshold (<)
187
+ */
188
+ toBeBelow(threshold: number): TParent;
189
+ /**
190
+ * Assert that the metric equals the expected value (with optional tolerance for floats)
191
+ */
192
+ toEqual(expected: number, tolerance?: number): TParent;
193
+ }
194
+
209
195
  /**
210
196
  * Selector for binarized fields (continuous → binary threshold)
211
197
  */
@@ -217,29 +203,48 @@ declare class BinarizeSelector {
217
203
  private assertions;
218
204
  constructor(aligned: AlignedRecord[], fieldName: string, threshold: number);
219
205
  /**
220
- * Asserts that accuracy is above a threshold
206
+ * Access accuracy metric for assertions
207
+ * @example
208
+ * expectStats(predictions, groundTruth)
209
+ * .field("score")
210
+ * .binarize(0.5)
211
+ * .accuracy.toBeAtLeast(0.8)
221
212
  */
222
- toHaveAccuracyAbove(threshold: number): this;
213
+ get accuracy(): MetricMatcher<this>;
223
214
  /**
224
- * Asserts that precision is above a threshold
225
- * @param classOrThreshold - Either the class (true/false) or threshold
226
- * @param threshold - Threshold when class is specified
215
+ * Access F1 score metric for assertions (macro average)
216
+ * @example
217
+ * expectStats(predictions, groundTruth)
218
+ * .field("score")
219
+ * .binarize(0.5)
220
+ * .f1.toBeAtLeast(0.75)
227
221
  */
228
- toHavePrecisionAbove(classOrThreshold: boolean | number, threshold?: number): this;
222
+ get f1(): MetricMatcher<this>;
229
223
  /**
230
- * Asserts that recall is above a threshold
231
- * @param classOrThreshold - Either the class (true/false) or threshold
232
- * @param threshold - Threshold when class is specified
224
+ * Access precision metric for assertions
225
+ * @param targetClass - Optional boolean class (true/false). If omitted, uses macro average
226
+ * @example
227
+ * expectStats(predictions, groundTruth)
228
+ * .field("score")
229
+ * .binarize(0.5)
230
+ * .precision(true).toBeAtLeast(0.7)
233
231
  */
234
- toHaveRecallAbove(classOrThreshold: boolean | number, threshold?: number): this;
232
+ precision(targetClass?: boolean): MetricMatcher<this>;
235
233
  /**
236
- * Asserts that F1 score is above a threshold
234
+ * Access recall metric for assertions
235
+ * @param targetClass - Optional boolean class (true/false). If omitted, uses macro average
236
+ * @example
237
+ * expectStats(predictions, groundTruth)
238
+ * .field("score")
239
+ * .binarize(0.5)
240
+ * .recall(true).toBeAtLeast(0.7)
237
241
  */
238
- toHaveF1Above(classOrThreshold: boolean | number, threshold?: number): this;
242
+ recall(targetClass?: boolean): MetricMatcher<this>;
239
243
  /**
240
- * Includes the confusion matrix in the report
244
+ * Displays the confusion matrix in the report
245
+ * This is not an assertion - it always passes and just records the matrix for display
241
246
  */
242
- toHaveConfusionMatrix(): this;
247
+ displayConfusionMatrix(): this;
243
248
  /**
244
249
  * Gets computed metrics
245
250
  */
@@ -250,6 +255,47 @@ declare class BinarizeSelector {
250
255
  getAssertions(): AssertionResult[];
251
256
  }
252
257
 
258
+ /**
259
+ * PercentageMatcher - provides assertion methods for percentage-based distribution checks
260
+ */
261
+
262
+ type PercentageDirection = "above" | "below";
263
+ interface PercentageMatcherContext<TParent> {
264
+ parent: TParent;
265
+ fieldName: string;
266
+ valueThreshold: number;
267
+ direction: PercentageDirection;
268
+ actualPercentage: number;
269
+ assertions: AssertionResult[];
270
+ }
271
+ /**
272
+ * Matcher class for percentage-based distribution assertions
273
+ * Returns the parent selector to enable fluent chaining
274
+ */
275
+ declare class PercentageMatcher<TParent> {
276
+ private context;
277
+ constructor(context: PercentageMatcherContext<TParent>);
278
+ private formatPercentage;
279
+ private createAssertion;
280
+ private recordAndReturn;
281
+ /**
282
+ * Assert that the percentage is greater than or equal to the threshold (>=)
283
+ */
284
+ toBeAtLeast(percentageThreshold: number): TParent;
285
+ /**
286
+ * Assert that the percentage is strictly greater than the threshold (>)
287
+ */
288
+ toBeAbove(percentageThreshold: number): TParent;
289
+ /**
290
+ * Assert that the percentage is less than or equal to the threshold (<=)
291
+ */
292
+ toBeAtMost(percentageThreshold: number): TParent;
293
+ /**
294
+ * Assert that the percentage is strictly less than the threshold (<)
295
+ */
296
+ toBeBelow(percentageThreshold: number): TParent;
297
+ }
298
+
253
299
  /**
254
300
  * Field selector for building assertions on a specific field
255
301
  */
@@ -270,109 +316,98 @@ declare class FieldSelector {
270
316
  */
271
317
  private validateGroundTruth;
272
318
  /**
273
- * Asserts that accuracy is above a threshold
274
- */
275
- toHaveAccuracyAbove(threshold: number): this;
276
- /**
277
- * Asserts that precision is above a threshold
278
- * @param classOrThreshold - Either the class name or threshold (if class is omitted, uses macro average)
279
- * @param threshold - Threshold when class is specified
280
- */
281
- toHavePrecisionAbove(classOrThreshold: string | number, threshold?: number): this;
282
- /**
283
- * Asserts that recall is above a threshold
284
- * @param classOrThreshold - Either the class name or threshold (if class is omitted, uses macro average)
285
- * @param threshold - Threshold when class is specified
319
+ * Validates that ground truth exists and both arrays contain numeric values.
320
+ * Returns the filtered numeric arrays for regression metrics.
286
321
  */
287
- toHaveRecallAbove(classOrThreshold: string | number, threshold?: number): this;
322
+ private validateRegressionInputs;
288
323
  /**
289
- * Asserts that F1 score is above a threshold
290
- * @param classOrThreshold - Either the class name or threshold (if class is omitted, uses macro average)
291
- * @param threshold - Threshold when class is specified
324
+ * Access accuracy metric for assertions
325
+ * @example
326
+ * expectStats(predictions, groundTruth)
327
+ * .field("sentiment")
328
+ * .accuracy.toBeAtLeast(0.8)
292
329
  */
293
- toHaveF1Above(classOrThreshold: string | number, threshold?: number): this;
330
+ get accuracy(): MetricMatcher<this>;
294
331
  /**
295
- * Includes the confusion matrix in the report
332
+ * Access F1 score metric for assertions (macro average)
333
+ * @example
334
+ * expectStats(predictions, groundTruth)
335
+ * .field("sentiment")
336
+ * .f1.toBeAtLeast(0.75)
296
337
  */
297
- toHaveConfusionMatrix(): this;
338
+ get f1(): MetricMatcher<this>;
298
339
  /**
299
- * Asserts that a percentage of values are below or equal to a threshold.
300
- * This is a distributional assertion that only looks at actual values (no ground truth required).
301
- *
302
- * @param valueThreshold - The value threshold to compare against
303
- * @param percentageThreshold - The minimum percentage (0-1) of values that should be <= valueThreshold
304
- * @returns this for method chaining
305
- *
340
+ * Access precision metric for assertions
341
+ * @param targetClass - Optional class name. If omitted, uses macro average
306
342
  * @example
307
- * // Assert that 90% of confidence scores are below 0.5
308
- * expectStats(predictions)
309
- * .field("confidence")
310
- * .toHavePercentageBelow(0.5, 0.9)
343
+ * expectStats(predictions, groundTruth)
344
+ * .field("sentiment")
345
+ * .precision("positive").toBeAtLeast(0.7)
311
346
  */
312
- toHavePercentageBelow(valueThreshold: number, percentageThreshold: number): this;
347
+ precision(targetClass?: string): MetricMatcher<this>;
313
348
  /**
314
- * Asserts that a percentage of values are above a threshold.
315
- * This is a distributional assertion that only looks at actual values (no ground truth required).
316
- *
317
- * @param valueThreshold - The value threshold to compare against
318
- * @param percentageThreshold - The minimum percentage (0-1) of values that should be > valueThreshold
319
- * @returns this for method chaining
320
- *
349
+ * Access recall metric for assertions
350
+ * @param targetClass - Optional class name. If omitted, uses macro average
321
351
  * @example
322
- * // Assert that 80% of quality scores are above 0.7
323
- * expectStats(predictions)
324
- * .field("quality")
325
- * .toHavePercentageAbove(0.7, 0.8)
352
+ * expectStats(predictions, groundTruth)
353
+ * .field("sentiment")
354
+ * .recall("positive").toBeAtLeast(0.7)
326
355
  */
327
- toHavePercentageAbove(valueThreshold: number, percentageThreshold: number): this;
356
+ recall(targetClass?: string): MetricMatcher<this>;
328
357
  /**
329
- * Validates that ground truth exists and both arrays contain numeric values.
330
- * Returns the filtered numeric arrays for regression metrics.
358
+ * Access Mean Absolute Error metric for assertions
359
+ * @example
360
+ * expectStats(predictions, groundTruth)
361
+ * .field("score")
362
+ * .mae.toBeAtMost(0.1)
331
363
  */
332
- private validateRegressionInputs;
364
+ get mae(): MetricMatcher<this>;
333
365
  /**
334
- * Asserts that Mean Absolute Error is below a threshold.
335
- * Requires numeric values in both actual and expected.
336
- *
337
- * @param threshold - Maximum allowed MAE
338
- * @returns this for method chaining
339
- *
366
+ * Access Root Mean Squared Error metric for assertions
340
367
  * @example
341
368
  * expectStats(predictions, groundTruth)
342
369
  * .field("score")
343
- * .toHaveMAEBelow(0.1)
370
+ * .rmse.toBeAtMost(0.15)
344
371
  */
345
- toHaveMAEBelow(threshold: number): this;
372
+ get rmse(): MetricMatcher<this>;
346
373
  /**
347
- * Asserts that Root Mean Squared Error is below a threshold.
348
- * Requires numeric values in both actual and expected.
349
- *
350
- * @param threshold - Maximum allowed RMSE
351
- * @returns this for method chaining
352
- *
374
+ * Access R-squared (coefficient of determination) metric for assertions
353
375
  * @example
354
376
  * expectStats(predictions, groundTruth)
355
377
  * .field("score")
356
- * .toHaveRMSEBelow(0.15)
357
- */
358
- toHaveRMSEBelow(threshold: number): this;
359
- /**
360
- * Asserts that R-squared (coefficient of determination) is above a threshold.
361
- * measures how well the predictions explain the variance in expected values.
362
- * R² = 1.0 means perfect prediction, R² = 0 means prediction is no better than mean.
363
- * Requires numeric values in both actual and expected.
364
- *
365
- * @param threshold - Minimum required R² value (0-1)
366
- * @returns this for method chaining
367
- *
378
+ * .r2.toBeAtLeast(0.8)
379
+ */
380
+ get r2(): MetricMatcher<this>;
381
+ /**
382
+ * Assert on the percentage of values below or equal to a threshold
383
+ * @param valueThreshold - The value threshold to compare against
384
+ * @example
385
+ * expectStats(predictions)
386
+ * .field("confidence")
387
+ * .percentageBelow(0.5).toBeAtLeast(0.9)
388
+ */
389
+ percentageBelow(valueThreshold: number): PercentageMatcher<this>;
390
+ /**
391
+ * Assert on the percentage of values above a threshold
392
+ * @param valueThreshold - The value threshold to compare against
393
+ * @example
394
+ * expectStats(predictions)
395
+ * .field("quality")
396
+ * .percentageAbove(0.7).toBeAtLeast(0.8)
397
+ */
398
+ percentageAbove(valueThreshold: number): PercentageMatcher<this>;
399
+ /**
400
+ * Displays the confusion matrix in the report
401
+ * This is not an assertion - it always passes and just records the matrix for display
368
402
  * @example
369
403
  * expectStats(predictions, groundTruth)
370
- * .field("score")
371
- * .toHaveR2Above(0.8)
404
+ * .field("sentiment")
405
+ * .accuracy.toBeAtLeast(0.8)
406
+ * .displayConfusionMatrix()
372
407
  */
373
- toHaveR2Above(threshold: number): this;
408
+ displayConfusionMatrix(): this;
374
409
  /**
375
- * Gets the computed metrics for this field
410
+ * Gets the computed classification metrics for this field
376
411
  */
377
412
  getMetrics(): ClassificationMetrics;
378
413
  /**
@@ -385,10 +420,16 @@ declare class FieldSelector {
385
420
  * expectStats() - fluent assertion API for statistical evaluation
386
421
  */
387
422
 
423
+ /**
424
+ * Object with aligned records (e.g., from custom model execution)
425
+ */
426
+ interface AlignedRecordsInput {
427
+ aligned: AlignedRecord[];
428
+ }
388
429
  /**
389
430
  * Input types that expectStats() accepts
390
431
  */
391
- type StatsInput = ModelRunResult | Prediction[] | AlignedRecord[];
432
+ type StatsInput = AlignedRecordsInput | Prediction[] | AlignedRecord[];
392
433
  /**
393
434
  * Options for expectStats when using two-argument form
394
435
  */
@@ -429,20 +470,20 @@ interface ExpectStatsOptions {
429
470
  * // Pattern 1: Distribution assertions (no ground truth)
430
471
  * expectStats(predictions)
431
472
  * .field("confidence")
432
- * .toHavePercentageBelow(0.5, 0.9);
473
+ * .percentageBelow(0.5).toBeAtLeast(0.9);
433
474
  *
434
475
  * @example
435
476
  * // Pattern 2: Classification with ground truth
436
477
  * expectStats(judgeOutputs, humanLabels)
437
478
  * .field("hallucinated")
438
- * .toHaveRecallAbove(true, 0.85)
439
- * .toHavePrecisionAbove(true, 0.8);
479
+ * .recall(true).toBeAtLeast(0.85)
480
+ * .precision(true).toBeAtLeast(0.8);
440
481
  *
441
482
  * @example
442
483
  * // Pattern 3: Custom ID field
443
484
  * expectStats(predictions, groundTruth, { idField: 'uuid' })
444
485
  * .field("score")
445
- * .toHaveAccuracyAbove(0.8);
486
+ * .accuracy.toBeAtLeast(0.8);
446
487
  */
447
488
  declare function expectStats(input: StatsInput): ExpectStats;
448
489
  declare function expectStats(actual: Prediction[], expected: Array<Record<string, unknown>>): ExpectStats;
@@ -489,7 +530,15 @@ declare class ExpectStats {
489
530
  */
490
531
  declare function buildConfusionMatrix(actual: unknown[], expected: unknown[]): ConfusionMatrix;
491
532
  /**
492
- * Formats a confusion matrix as a string table
533
+ * Formats a confusion matrix as a string table with axis labels
534
+ *
535
+ * Output format:
536
+ * ```
537
+ * Predicted → negative positive
538
+ * Actual ↓
539
+ * negative 5 1
540
+ * positive 2 7
541
+ * ```
493
542
  */
494
543
  declare function formatConfusionMatrix(cm: ConfusionMatrix): string;
495
544
 
@@ -687,4 +736,4 @@ declare class TestExecutionError extends EvalSenseError {
687
736
  constructor(message: string, testName: string, originalError?: Error);
688
737
  }
689
738
 
690
- export { AlignedRecord, AssertionError, AssertionResult, ClassificationMetrics, ConfigurationError, ConfusionMatrix, ConsoleReporter, Dataset, DatasetError, EvalReport, EvalSenseError, type ExpectStatsOptions, FieldMetricResult, IntegrityError, IntegrityResult, JsonReporter, Prediction, TestExecutionError, TestFn, afterAll, afterEach, alignByKey, beforeAll, beforeEach, buildConfusionMatrix, checkIntegrity, computeAccuracy, computeClassificationMetrics, computeF1, computePrecision, computeRecall, createDataset, describe, discoverEvalFiles, evalTest, executeEvalFiles, expectStats, extractFieldValues, filterComplete, formatConfusionMatrix, getExitCode, it, loadDataset, parseReport, runModel, runModelParallel, test, validatePredictions };
739
+ export { AlignedRecord, type AlignedRecordsInput, AssertionError, AssertionResult, ClassificationMetrics, ConfigurationError, ConfusionMatrix, ConsoleReporter, DatasetError, EvalReport, EvalSenseError, type ExpectStatsOptions, FieldMetricResult, IntegrityError, IntegrityResult, JsonReporter, Prediction, type StatsInput, TestExecutionError, TestFn, afterAll, afterEach, alignByKey, beforeAll, beforeEach, buildConfusionMatrix, checkIntegrity, computeAccuracy, computeClassificationMetrics, computeF1, computePrecision, computeRecall, describe, discoverEvalFiles, evalTest, executeEvalFiles, expectStats, extractFieldValues, filterComplete, formatConfusionMatrix, getExitCode, it, parseReport, test, validatePredictions };