evalsense 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -1,12 +1,13 @@
1
1
  'use strict';
2
2
 
3
- var chunkHDJID3GC_cjs = require('./chunk-HDJID3GC.cjs');
3
+ var chunkDFC6FRTG_cjs = require('./chunk-DFC6FRTG.cjs');
4
+ require('./chunk-JEQ2X3Z6.cjs');
4
5
  var fs = require('fs');
5
6
  var path = require('path');
6
7
 
7
8
  // src/core/describe.ts
8
9
  function describe(name, fn) {
9
- const parentSuite = chunkHDJID3GC_cjs.getCurrentSuite();
10
+ const parentSuite = chunkDFC6FRTG_cjs.getCurrentSuite();
10
11
  const suite = {
11
12
  name,
12
13
  tests: [],
@@ -15,37 +16,37 @@ function describe(name, fn) {
15
16
  beforeEach: [],
16
17
  afterEach: []
17
18
  };
18
- chunkHDJID3GC_cjs.setCurrentSuite(suite);
19
+ chunkDFC6FRTG_cjs.setCurrentSuite(suite);
19
20
  try {
20
21
  fn();
21
22
  } finally {
22
- chunkHDJID3GC_cjs.setCurrentSuite(parentSuite);
23
+ chunkDFC6FRTG_cjs.setCurrentSuite(parentSuite);
23
24
  }
24
- chunkHDJID3GC_cjs.addSuite(suite);
25
+ chunkDFC6FRTG_cjs.addSuite(suite);
25
26
  }
26
27
  function beforeAll(fn) {
27
- const suite = chunkHDJID3GC_cjs.getCurrentSuite();
28
+ const suite = chunkDFC6FRTG_cjs.getCurrentSuite();
28
29
  if (!suite) {
29
30
  throw new Error("beforeAll() must be called inside a describe() block");
30
31
  }
31
32
  suite.beforeAll?.push(fn);
32
33
  }
33
34
  function afterAll(fn) {
34
- const suite = chunkHDJID3GC_cjs.getCurrentSuite();
35
+ const suite = chunkDFC6FRTG_cjs.getCurrentSuite();
35
36
  if (!suite) {
36
37
  throw new Error("afterAll() must be called inside a describe() block");
37
38
  }
38
39
  suite.afterAll?.push(fn);
39
40
  }
40
41
  function beforeEach(fn) {
41
- const suite = chunkHDJID3GC_cjs.getCurrentSuite();
42
+ const suite = chunkDFC6FRTG_cjs.getCurrentSuite();
42
43
  if (!suite) {
43
44
  throw new Error("beforeEach() must be called inside a describe() block");
44
45
  }
45
46
  suite.beforeEach?.push(fn);
46
47
  }
47
48
  function afterEach(fn) {
48
- const suite = chunkHDJID3GC_cjs.getCurrentSuite();
49
+ const suite = chunkDFC6FRTG_cjs.getCurrentSuite();
49
50
  if (!suite) {
50
51
  throw new Error("afterEach() must be called inside a describe() block");
51
52
  }
@@ -54,7 +55,7 @@ function afterEach(fn) {
54
55
 
55
56
  // src/core/eval-test.ts
56
57
  function evalTest(name, fn) {
57
- const currentSuite = chunkHDJID3GC_cjs.getCurrentSuite();
58
+ const currentSuite = chunkDFC6FRTG_cjs.getCurrentSuite();
58
59
  if (!currentSuite) {
59
60
  throw new Error("evalTest() must be called inside a describe() block");
60
61
  }
@@ -62,12 +63,12 @@ function evalTest(name, fn) {
62
63
  name,
63
64
  fn
64
65
  };
65
- chunkHDJID3GC_cjs.addTestToCurrentSuite(test2);
66
+ chunkDFC6FRTG_cjs.addTestToCurrentSuite(test2);
66
67
  }
67
68
  var test = evalTest;
68
69
  var it = evalTest;
69
70
  function evalTestSkip(name, _fn) {
70
- const currentSuite = chunkHDJID3GC_cjs.getCurrentSuite();
71
+ const currentSuite = chunkDFC6FRTG_cjs.getCurrentSuite();
71
72
  if (!currentSuite) {
72
73
  throw new Error("evalTest.skip() must be called inside a describe() block");
73
74
  }
@@ -76,10 +77,10 @@ function evalTestSkip(name, _fn) {
76
77
  fn: async () => {
77
78
  }
78
79
  };
79
- chunkHDJID3GC_cjs.addTestToCurrentSuite(test2);
80
+ chunkDFC6FRTG_cjs.addTestToCurrentSuite(test2);
80
81
  }
81
82
  function evalTestOnly(name, fn) {
82
- const currentSuite = chunkHDJID3GC_cjs.getCurrentSuite();
83
+ const currentSuite = chunkDFC6FRTG_cjs.getCurrentSuite();
83
84
  if (!currentSuite) {
84
85
  throw new Error("evalTest.only() must be called inside a describe() block");
85
86
  }
@@ -87,7 +88,7 @@ function evalTestOnly(name, fn) {
87
88
  name: `[ONLY] ${name}`,
88
89
  fn
89
90
  };
90
- chunkHDJID3GC_cjs.addTestToCurrentSuite(test2);
91
+ chunkDFC6FRTG_cjs.addTestToCurrentSuite(test2);
91
92
  }
92
93
  evalTest.skip = evalTestSkip;
93
94
  evalTest.only = evalTestOnly;
@@ -102,17 +103,17 @@ function loadDataset(path$1) {
102
103
  } else if (ext === ".json") {
103
104
  records = parseJSON(content);
104
105
  } else {
105
- throw new chunkHDJID3GC_cjs.DatasetError(
106
+ throw new chunkDFC6FRTG_cjs.DatasetError(
106
107
  `Unsupported file format: ${ext}. Use .json, .ndjson, or .jsonl`,
107
108
  path$1
108
109
  );
109
110
  }
110
111
  } catch (error) {
111
- if (error instanceof chunkHDJID3GC_cjs.DatasetError) {
112
+ if (error instanceof chunkDFC6FRTG_cjs.DatasetError) {
112
113
  throw error;
113
114
  }
114
115
  const message = error instanceof Error ? error.message : String(error);
115
- throw new chunkHDJID3GC_cjs.DatasetError(`Failed to load dataset from ${path$1}: ${message}`, path$1);
116
+ throw new chunkDFC6FRTG_cjs.DatasetError(`Failed to load dataset from ${path$1}: ${message}`, path$1);
116
117
  }
117
118
  return {
118
119
  records,
@@ -126,7 +127,7 @@ function loadDataset(path$1) {
126
127
  function parseJSON(content) {
127
128
  const parsed = JSON.parse(content);
128
129
  if (!Array.isArray(parsed)) {
129
- throw new chunkHDJID3GC_cjs.DatasetError("JSON dataset must be an array of records");
130
+ throw new chunkDFC6FRTG_cjs.DatasetError("JSON dataset must be an array of records");
130
131
  }
131
132
  return parsed;
132
133
  }
@@ -139,7 +140,7 @@ function parseNDJSON(content) {
139
140
  try {
140
141
  records.push(JSON.parse(line));
141
142
  } catch {
142
- throw new chunkHDJID3GC_cjs.DatasetError(`Invalid JSON at line ${i + 1} in NDJSON file`);
143
+ throw new chunkDFC6FRTG_cjs.DatasetError(`Invalid JSON at line ${i + 1} in NDJSON file`);
143
144
  }
144
145
  }
145
146
  return records;
@@ -164,7 +165,7 @@ async function runModel(dataset, modelFn) {
164
165
  const id = getRecordId(record);
165
166
  const prediction = await modelFn(record);
166
167
  if (prediction.id !== id) {
167
- throw new chunkHDJID3GC_cjs.DatasetError(
168
+ throw new chunkDFC6FRTG_cjs.DatasetError(
168
169
  `Prediction ID mismatch: expected "${id}", got "${prediction.id}". Model function must return the same ID as the input record.`
169
170
  );
170
171
  }
@@ -184,9 +185,7 @@ async function runModel(dataset, modelFn) {
184
185
  function getRecordId(record) {
185
186
  const id = record.id ?? record._id;
186
187
  if (id === void 0 || id === null) {
187
- throw new chunkHDJID3GC_cjs.DatasetError(
188
- 'Dataset records must have an "id" or "_id" field for alignment'
189
- );
188
+ throw new chunkDFC6FRTG_cjs.DatasetError('Dataset records must have an "id" or "_id" field for alignment');
190
189
  }
191
190
  return String(id);
192
191
  }
@@ -208,9 +207,7 @@ async function runModelParallel(dataset, modelFn, concurrency = 10) {
208
207
  for (const { prediction, record } of results) {
209
208
  const id = getRecordId(record);
210
209
  if (prediction.id !== id) {
211
- throw new chunkHDJID3GC_cjs.DatasetError(
212
- `Prediction ID mismatch: expected "${id}", got "${prediction.id}".`
213
- );
210
+ throw new chunkDFC6FRTG_cjs.DatasetError(`Prediction ID mismatch: expected "${id}", got "${prediction.id}".`);
214
211
  }
215
212
  predictions.push(prediction);
216
213
  aligned.push({
@@ -258,7 +255,7 @@ function alignByKey(predictions, expected, options = {}) {
258
255
  }
259
256
  }
260
257
  if (strict && missingIds.length > 0) {
261
- throw new chunkHDJID3GC_cjs.IntegrityError(
258
+ throw new chunkDFC6FRTG_cjs.IntegrityError(
262
259
  `${missingIds.length} prediction(s) have no matching expected record`,
263
260
  missingIds
264
261
  );
@@ -307,9 +304,7 @@ function checkIntegrity(dataset, options = {}) {
307
304
  }
308
305
  }
309
306
  if (requiredFields.length > 0) {
310
- const missing = requiredFields.filter(
311
- (field) => record[field] === void 0
312
- );
307
+ const missing = requiredFields.filter((field) => record[field] === void 0);
313
308
  if (missing.length > 0) {
314
309
  missingFields.push({
315
310
  id: String(id ?? `record[${i}]`),
@@ -332,12 +327,14 @@ function checkIntegrity(dataset, options = {}) {
332
327
  issues.push(`${missingIds.length} record(s) missing ID`);
333
328
  }
334
329
  if (duplicateIds.length > 0) {
335
- issues.push(`${duplicateIds.length} duplicate ID(s): ${duplicateIds.slice(0, 3).join(", ")}${duplicateIds.length > 3 ? "..." : ""}`);
330
+ issues.push(
331
+ `${duplicateIds.length} duplicate ID(s): ${duplicateIds.slice(0, 3).join(", ")}${duplicateIds.length > 3 ? "..." : ""}`
332
+ );
336
333
  }
337
334
  if (missingFields.length > 0) {
338
335
  issues.push(`${missingFields.length} record(s) missing required fields`);
339
336
  }
340
- throw new chunkHDJID3GC_cjs.IntegrityError(`Dataset integrity check failed: ${issues.join("; ")}`);
337
+ throw new chunkDFC6FRTG_cjs.IntegrityError(`Dataset integrity check failed: ${issues.join("; ")}`);
341
338
  }
342
339
  return result;
343
340
  }
@@ -355,7 +352,7 @@ function validatePredictions(predictions, expectedIds) {
355
352
 
356
353
  // src/statistics/classification.ts
357
354
  function computeClassificationMetrics(actual, expected) {
358
- const confusionMatrix = chunkHDJID3GC_cjs.buildConfusionMatrix(actual, expected);
355
+ const confusionMatrix = chunkDFC6FRTG_cjs.buildConfusionMatrix(actual, expected);
359
356
  return computeMetricsFromMatrix(confusionMatrix);
360
357
  }
361
358
  function computeMetricsFromMatrix(cm) {
@@ -363,10 +360,10 @@ function computeMetricsFromMatrix(cm) {
363
360
  let totalSupport = 0;
364
361
  let correctPredictions = 0;
365
362
  for (const label of cm.labels) {
366
- const tp = chunkHDJID3GC_cjs.getTruePositives(cm, label);
367
- const fp = chunkHDJID3GC_cjs.getFalsePositives(cm, label);
368
- const fn = chunkHDJID3GC_cjs.getFalseNegatives(cm, label);
369
- const support = chunkHDJID3GC_cjs.getSupport(cm, label);
363
+ const tp = chunkDFC6FRTG_cjs.getTruePositives(cm, label);
364
+ const fp = chunkDFC6FRTG_cjs.getFalsePositives(cm, label);
365
+ const fn = chunkDFC6FRTG_cjs.getFalseNegatives(cm, label);
366
+ const support = chunkDFC6FRTG_cjs.getSupport(cm, label);
370
367
  const precision = tp + fp > 0 ? tp / (tp + fp) : 0;
371
368
  const recall = tp + fn > 0 ? tp / (tp + fn) : 0;
372
369
  const f1 = precision + recall > 0 ? 2 * precision * recall / (precision + recall) : 0;
@@ -395,15 +392,15 @@ function computeMetricsFromMatrix(cm) {
395
392
  };
396
393
  }
397
394
  function computePrecision(actual, expected, targetClass) {
398
- const cm = chunkHDJID3GC_cjs.buildConfusionMatrix(actual, expected);
399
- const tp = chunkHDJID3GC_cjs.getTruePositives(cm, targetClass);
400
- const fp = chunkHDJID3GC_cjs.getFalsePositives(cm, targetClass);
395
+ const cm = chunkDFC6FRTG_cjs.buildConfusionMatrix(actual, expected);
396
+ const tp = chunkDFC6FRTG_cjs.getTruePositives(cm, targetClass);
397
+ const fp = chunkDFC6FRTG_cjs.getFalsePositives(cm, targetClass);
401
398
  return tp + fp > 0 ? tp / (tp + fp) : 0;
402
399
  }
403
400
  function computeRecall(actual, expected, targetClass) {
404
- const cm = chunkHDJID3GC_cjs.buildConfusionMatrix(actual, expected);
405
- const tp = chunkHDJID3GC_cjs.getTruePositives(cm, targetClass);
406
- const fn = chunkHDJID3GC_cjs.getFalseNegatives(cm, targetClass);
401
+ const cm = chunkDFC6FRTG_cjs.buildConfusionMatrix(actual, expected);
402
+ const tp = chunkDFC6FRTG_cjs.getTruePositives(cm, targetClass);
403
+ const fn = chunkDFC6FRTG_cjs.getFalseNegatives(cm, targetClass);
407
404
  return tp + fn > 0 ? tp / (tp + fn) : 0;
408
405
  }
409
406
  function computeF1(actual, expected, targetClass) {
@@ -430,6 +427,67 @@ function computeAccuracy(actual, expected) {
430
427
  return total > 0 ? correct / total : 0;
431
428
  }
432
429
 
430
+ // src/statistics/regression.ts
431
+ function computeRegressionMetrics(actual, expected) {
432
+ if (actual.length !== expected.length) {
433
+ throw new Error(
434
+ `Array length mismatch: actual has ${actual.length} elements, expected has ${expected.length}`
435
+ );
436
+ }
437
+ const n = actual.length;
438
+ if (n === 0) {
439
+ return { mae: 0, mse: 0, rmse: 0, r2: 0 };
440
+ }
441
+ const mae = computeMAE(actual, expected);
442
+ const mse = computeMSE(actual, expected);
443
+ const rmse = Math.sqrt(mse);
444
+ const r2 = computeR2(actual, expected);
445
+ return { mae, mse, rmse, r2 };
446
+ }
447
+ function computeMAE(actual, expected) {
448
+ if (actual.length !== expected.length || actual.length === 0) {
449
+ return 0;
450
+ }
451
+ let sum = 0;
452
+ for (let i = 0; i < actual.length; i++) {
453
+ sum += Math.abs((actual[i] ?? 0) - (expected[i] ?? 0));
454
+ }
455
+ return sum / actual.length;
456
+ }
457
+ function computeMSE(actual, expected) {
458
+ if (actual.length !== expected.length || actual.length === 0) {
459
+ return 0;
460
+ }
461
+ let sum = 0;
462
+ for (let i = 0; i < actual.length; i++) {
463
+ const diff = (actual[i] ?? 0) - (expected[i] ?? 0);
464
+ sum += diff * diff;
465
+ }
466
+ return sum / actual.length;
467
+ }
468
+ function computeR2(actual, expected) {
469
+ if (actual.length !== expected.length || actual.length === 0) {
470
+ return 0;
471
+ }
472
+ let meanExpected = 0;
473
+ for (const val of expected) {
474
+ meanExpected += val ?? 0;
475
+ }
476
+ meanExpected /= expected.length;
477
+ let ssTotal = 0;
478
+ let ssResidual = 0;
479
+ for (let i = 0; i < actual.length; i++) {
480
+ const exp = expected[i] ?? 0;
481
+ const act = actual[i] ?? 0;
482
+ ssTotal += (exp - meanExpected) ** 2;
483
+ ssResidual += (exp - act) ** 2;
484
+ }
485
+ if (ssTotal === 0) {
486
+ return ssResidual === 0 ? 1 : 0;
487
+ }
488
+ return 1 - ssResidual / ssTotal;
489
+ }
490
+
433
491
  // src/statistics/distribution.ts
434
492
  function filterNumericValues(values) {
435
493
  return values.filter(
@@ -497,9 +555,9 @@ var BinarizeSelector = class {
497
555
  field: this.fieldName
498
556
  };
499
557
  this.assertions.push(result);
500
- chunkHDJID3GC_cjs.recordAssertion(result);
558
+ chunkDFC6FRTG_cjs.recordAssertion(result);
501
559
  if (!passed) {
502
- throw new chunkHDJID3GC_cjs.AssertionError(result.message, threshold, metrics.accuracy, this.fieldName);
560
+ throw new chunkDFC6FRTG_cjs.AssertionError(result.message, threshold, metrics.accuracy, this.fieldName);
503
561
  }
504
562
  return this;
505
563
  }
@@ -521,7 +579,7 @@ var BinarizeSelector = class {
521
579
  actualThreshold = threshold;
522
580
  const classMetrics = metrics.perClass[targetClass];
523
581
  if (!classMetrics) {
524
- throw new chunkHDJID3GC_cjs.AssertionError(
582
+ throw new chunkDFC6FRTG_cjs.AssertionError(
525
583
  `Class "${targetClass}" not found in binarized predictions`,
526
584
  targetClass,
527
585
  Object.keys(metrics.perClass),
@@ -541,9 +599,9 @@ var BinarizeSelector = class {
541
599
  class: targetClass
542
600
  };
543
601
  this.assertions.push(result);
544
- chunkHDJID3GC_cjs.recordAssertion(result);
602
+ chunkDFC6FRTG_cjs.recordAssertion(result);
545
603
  if (!passed) {
546
- throw new chunkHDJID3GC_cjs.AssertionError(result.message, actualThreshold, actualPrecision, this.fieldName);
604
+ throw new chunkDFC6FRTG_cjs.AssertionError(result.message, actualThreshold, actualPrecision, this.fieldName);
547
605
  }
548
606
  return this;
549
607
  }
@@ -565,7 +623,7 @@ var BinarizeSelector = class {
565
623
  actualThreshold = threshold;
566
624
  const classMetrics = metrics.perClass[targetClass];
567
625
  if (!classMetrics) {
568
- throw new chunkHDJID3GC_cjs.AssertionError(
626
+ throw new chunkDFC6FRTG_cjs.AssertionError(
569
627
  `Class "${targetClass}" not found in binarized predictions`,
570
628
  targetClass,
571
629
  Object.keys(metrics.perClass),
@@ -585,9 +643,9 @@ var BinarizeSelector = class {
585
643
  class: targetClass
586
644
  };
587
645
  this.assertions.push(result);
588
- chunkHDJID3GC_cjs.recordAssertion(result);
646
+ chunkDFC6FRTG_cjs.recordAssertion(result);
589
647
  if (!passed) {
590
- throw new chunkHDJID3GC_cjs.AssertionError(result.message, actualThreshold, actualRecall, this.fieldName);
648
+ throw new chunkDFC6FRTG_cjs.AssertionError(result.message, actualThreshold, actualRecall, this.fieldName);
591
649
  }
592
650
  return this;
593
651
  }
@@ -607,7 +665,7 @@ var BinarizeSelector = class {
607
665
  actualThreshold = threshold;
608
666
  const classMetrics = metrics.perClass[targetClass];
609
667
  if (!classMetrics) {
610
- throw new chunkHDJID3GC_cjs.AssertionError(
668
+ throw new chunkDFC6FRTG_cjs.AssertionError(
611
669
  `Class "${targetClass}" not found in binarized predictions`,
612
670
  targetClass,
613
671
  Object.keys(metrics.perClass),
@@ -627,9 +685,9 @@ var BinarizeSelector = class {
627
685
  class: targetClass
628
686
  };
629
687
  this.assertions.push(result);
630
- chunkHDJID3GC_cjs.recordAssertion(result);
688
+ chunkDFC6FRTG_cjs.recordAssertion(result);
631
689
  if (!passed) {
632
- throw new chunkHDJID3GC_cjs.AssertionError(result.message, actualThreshold, actualF1, this.fieldName);
690
+ throw new chunkDFC6FRTG_cjs.AssertionError(result.message, actualThreshold, actualF1, this.fieldName);
633
691
  }
634
692
  return this;
635
693
  }
@@ -644,7 +702,7 @@ var BinarizeSelector = class {
644
702
  binarized: true,
645
703
  binarizeThreshold: this.threshold
646
704
  };
647
- chunkHDJID3GC_cjs.recordFieldMetrics(fieldResult);
705
+ chunkDFC6FRTG_cjs.recordFieldMetrics(fieldResult);
648
706
  const result = {
649
707
  type: "confusionMatrix",
650
708
  passed: true,
@@ -652,7 +710,7 @@ var BinarizeSelector = class {
652
710
  field: this.fieldName
653
711
  };
654
712
  this.assertions.push(result);
655
- chunkHDJID3GC_cjs.recordAssertion(result);
713
+ chunkDFC6FRTG_cjs.recordAssertion(result);
656
714
  return this;
657
715
  }
658
716
  /**
@@ -694,11 +752,9 @@ var FieldSelector = class {
694
752
  * Throws a clear error if expected values are missing.
695
753
  */
696
754
  validateGroundTruth() {
697
- const hasExpected = this.expectedValues.some(
698
- (v) => v !== void 0 && v !== null
699
- );
755
+ const hasExpected = this.expectedValues.some((v) => v !== void 0 && v !== null);
700
756
  if (!hasExpected) {
701
- throw new chunkHDJID3GC_cjs.AssertionError(
757
+ throw new chunkDFC6FRTG_cjs.AssertionError(
702
758
  `Classification metric requires ground truth, but field "${this.fieldName}" has no expected values. Use expectStats(predictions, groundTruth) to provide expected values.`,
703
759
  void 0,
704
760
  void 0,
@@ -722,9 +778,9 @@ var FieldSelector = class {
722
778
  field: this.fieldName
723
779
  };
724
780
  this.assertions.push(result);
725
- chunkHDJID3GC_cjs.recordAssertion(result);
781
+ chunkDFC6FRTG_cjs.recordAssertion(result);
726
782
  if (!passed) {
727
- throw new chunkHDJID3GC_cjs.AssertionError(result.message, threshold, metrics.accuracy, this.fieldName);
783
+ throw new chunkDFC6FRTG_cjs.AssertionError(result.message, threshold, metrics.accuracy, this.fieldName);
728
784
  }
729
785
  return this;
730
786
  }
@@ -747,7 +803,7 @@ var FieldSelector = class {
747
803
  actualThreshold = threshold;
748
804
  const classMetrics = metrics.perClass[targetClass];
749
805
  if (!classMetrics) {
750
- throw new chunkHDJID3GC_cjs.AssertionError(
806
+ throw new chunkDFC6FRTG_cjs.AssertionError(
751
807
  `Class "${targetClass}" not found in predictions`,
752
808
  targetClass,
753
809
  Object.keys(metrics.perClass),
@@ -767,9 +823,9 @@ var FieldSelector = class {
767
823
  class: targetClass
768
824
  };
769
825
  this.assertions.push(result);
770
- chunkHDJID3GC_cjs.recordAssertion(result);
826
+ chunkDFC6FRTG_cjs.recordAssertion(result);
771
827
  if (!passed) {
772
- throw new chunkHDJID3GC_cjs.AssertionError(result.message, actualThreshold, actualPrecision, this.fieldName);
828
+ throw new chunkDFC6FRTG_cjs.AssertionError(result.message, actualThreshold, actualPrecision, this.fieldName);
773
829
  }
774
830
  return this;
775
831
  }
@@ -792,7 +848,7 @@ var FieldSelector = class {
792
848
  actualThreshold = threshold;
793
849
  const classMetrics = metrics.perClass[targetClass];
794
850
  if (!classMetrics) {
795
- throw new chunkHDJID3GC_cjs.AssertionError(
851
+ throw new chunkDFC6FRTG_cjs.AssertionError(
796
852
  `Class "${targetClass}" not found in predictions`,
797
853
  targetClass,
798
854
  Object.keys(metrics.perClass),
@@ -812,9 +868,9 @@ var FieldSelector = class {
812
868
  class: targetClass
813
869
  };
814
870
  this.assertions.push(result);
815
- chunkHDJID3GC_cjs.recordAssertion(result);
871
+ chunkDFC6FRTG_cjs.recordAssertion(result);
816
872
  if (!passed) {
817
- throw new chunkHDJID3GC_cjs.AssertionError(result.message, actualThreshold, actualRecall, this.fieldName);
873
+ throw new chunkDFC6FRTG_cjs.AssertionError(result.message, actualThreshold, actualRecall, this.fieldName);
818
874
  }
819
875
  return this;
820
876
  }
@@ -837,7 +893,7 @@ var FieldSelector = class {
837
893
  actualThreshold = threshold;
838
894
  const classMetrics = metrics.perClass[targetClass];
839
895
  if (!classMetrics) {
840
- throw new chunkHDJID3GC_cjs.AssertionError(
896
+ throw new chunkDFC6FRTG_cjs.AssertionError(
841
897
  `Class "${targetClass}" not found in predictions`,
842
898
  targetClass,
843
899
  Object.keys(metrics.perClass),
@@ -857,9 +913,9 @@ var FieldSelector = class {
857
913
  class: targetClass
858
914
  };
859
915
  this.assertions.push(result);
860
- chunkHDJID3GC_cjs.recordAssertion(result);
916
+ chunkDFC6FRTG_cjs.recordAssertion(result);
861
917
  if (!passed) {
862
- throw new chunkHDJID3GC_cjs.AssertionError(result.message, actualThreshold, actualF1, this.fieldName);
918
+ throw new chunkDFC6FRTG_cjs.AssertionError(result.message, actualThreshold, actualF1, this.fieldName);
863
919
  }
864
920
  return this;
865
921
  }
@@ -873,7 +929,7 @@ var FieldSelector = class {
873
929
  metrics,
874
930
  binarized: false
875
931
  };
876
- chunkHDJID3GC_cjs.recordFieldMetrics(fieldResult);
932
+ chunkDFC6FRTG_cjs.recordFieldMetrics(fieldResult);
877
933
  const result = {
878
934
  type: "confusionMatrix",
879
935
  passed: true,
@@ -881,7 +937,7 @@ var FieldSelector = class {
881
937
  field: this.fieldName
882
938
  };
883
939
  this.assertions.push(result);
884
- chunkHDJID3GC_cjs.recordAssertion(result);
940
+ chunkDFC6FRTG_cjs.recordAssertion(result);
885
941
  return this;
886
942
  }
887
943
  /**
@@ -901,7 +957,7 @@ var FieldSelector = class {
901
957
  toHavePercentageBelow(valueThreshold, percentageThreshold) {
902
958
  const numericActual = filterNumericValues(this.actualValues);
903
959
  if (numericActual.length === 0) {
904
- throw new chunkHDJID3GC_cjs.AssertionError(
960
+ throw new chunkDFC6FRTG_cjs.AssertionError(
905
961
  `Field '${this.fieldName}' contains no numeric values (found 0 numeric out of ${this.actualValues.length} total values)`,
906
962
  percentageThreshold,
907
963
  void 0,
@@ -919,9 +975,14 @@ var FieldSelector = class {
919
975
  field: this.fieldName
920
976
  };
921
977
  this.assertions.push(result);
922
- chunkHDJID3GC_cjs.recordAssertion(result);
978
+ chunkDFC6FRTG_cjs.recordAssertion(result);
923
979
  if (!passed) {
924
- throw new chunkHDJID3GC_cjs.AssertionError(result.message, percentageThreshold, actualPercentage, this.fieldName);
980
+ throw new chunkDFC6FRTG_cjs.AssertionError(
981
+ result.message,
982
+ percentageThreshold,
983
+ actualPercentage,
984
+ this.fieldName
985
+ );
925
986
  }
926
987
  return this;
927
988
  }
@@ -942,7 +1003,7 @@ var FieldSelector = class {
942
1003
  toHavePercentageAbove(valueThreshold, percentageThreshold) {
943
1004
  const numericActual = filterNumericValues(this.actualValues);
944
1005
  if (numericActual.length === 0) {
945
- throw new chunkHDJID3GC_cjs.AssertionError(
1006
+ throw new chunkDFC6FRTG_cjs.AssertionError(
946
1007
  `Field '${this.fieldName}' contains no numeric values (found 0 numeric out of ${this.actualValues.length} total values)`,
947
1008
  percentageThreshold,
948
1009
  void 0,
@@ -960,9 +1021,146 @@ var FieldSelector = class {
960
1021
  field: this.fieldName
961
1022
  };
962
1023
  this.assertions.push(result);
963
- chunkHDJID3GC_cjs.recordAssertion(result);
1024
+ chunkDFC6FRTG_cjs.recordAssertion(result);
964
1025
  if (!passed) {
965
- throw new chunkHDJID3GC_cjs.AssertionError(result.message, percentageThreshold, actualPercentage, this.fieldName);
1026
+ throw new chunkDFC6FRTG_cjs.AssertionError(
1027
+ result.message,
1028
+ percentageThreshold,
1029
+ actualPercentage,
1030
+ this.fieldName
1031
+ );
1032
+ }
1033
+ return this;
1034
+ }
1035
+ // ============================================================================
1036
+ // Regression Assertions
1037
+ // ============================================================================
1038
+ /**
1039
+ * Validates that ground truth exists and both arrays contain numeric values.
1040
+ * Returns the filtered numeric arrays for regression metrics.
1041
+ */
1042
+ validateRegressionInputs() {
1043
+ this.validateGroundTruth();
1044
+ const numericActual = filterNumericValues(this.actualValues);
1045
+ const numericExpected = filterNumericValues(this.expectedValues);
1046
+ if (numericActual.length === 0) {
1047
+ throw new chunkDFC6FRTG_cjs.AssertionError(
1048
+ `Regression metric requires numeric values, but field "${this.fieldName}" has no numeric actual values.`,
1049
+ void 0,
1050
+ void 0,
1051
+ this.fieldName
1052
+ );
1053
+ }
1054
+ if (numericExpected.length === 0) {
1055
+ throw new chunkDFC6FRTG_cjs.AssertionError(
1056
+ `Regression metric requires numeric values, but field "${this.fieldName}" has no numeric expected values.`,
1057
+ void 0,
1058
+ void 0,
1059
+ this.fieldName
1060
+ );
1061
+ }
1062
+ if (numericActual.length !== numericExpected.length) {
1063
+ throw new chunkDFC6FRTG_cjs.AssertionError(
1064
+ `Regression metric requires equal-length arrays, but got ${numericActual.length} actual and ${numericExpected.length} expected values.`,
1065
+ numericExpected.length,
1066
+ numericActual.length,
1067
+ this.fieldName
1068
+ );
1069
+ }
1070
+ return { actual: numericActual, expected: numericExpected };
1071
+ }
1072
+ /**
1073
+ * Asserts that Mean Absolute Error is below a threshold.
1074
+ * Requires numeric values in both actual and expected.
1075
+ *
1076
+ * @param threshold - Maximum allowed MAE
1077
+ * @returns this for method chaining
1078
+ *
1079
+ * @example
1080
+ * expectStats(predictions, groundTruth)
1081
+ * .field("score")
1082
+ * .toHaveMAEBelow(0.1)
1083
+ */
1084
+ toHaveMAEBelow(threshold) {
1085
+ const { actual, expected } = this.validateRegressionInputs();
1086
+ const metrics = computeRegressionMetrics(actual, expected);
1087
+ const passed = metrics.mae <= threshold;
1088
+ const result = {
1089
+ type: "mae",
1090
+ passed,
1091
+ message: passed ? `MAE ${metrics.mae.toFixed(4)} is below ${threshold}` : `MAE ${metrics.mae.toFixed(4)} exceeds threshold ${threshold}`,
1092
+ expected: threshold,
1093
+ actual: metrics.mae,
1094
+ field: this.fieldName
1095
+ };
1096
+ this.assertions.push(result);
1097
+ chunkDFC6FRTG_cjs.recordAssertion(result);
1098
+ if (!passed) {
1099
+ throw new chunkDFC6FRTG_cjs.AssertionError(result.message, threshold, metrics.mae, this.fieldName);
1100
+ }
1101
+ return this;
1102
+ }
1103
+ /**
1104
+ * Asserts that Root Mean Squared Error is below a threshold.
1105
+ * Requires numeric values in both actual and expected.
1106
+ *
1107
+ * @param threshold - Maximum allowed RMSE
1108
+ * @returns this for method chaining
1109
+ *
1110
+ * @example
1111
+ * expectStats(predictions, groundTruth)
1112
+ * .field("score")
1113
+ * .toHaveRMSEBelow(0.15)
1114
+ */
1115
+ toHaveRMSEBelow(threshold) {
1116
+ const { actual, expected } = this.validateRegressionInputs();
1117
+ const metrics = computeRegressionMetrics(actual, expected);
1118
+ const passed = metrics.rmse <= threshold;
1119
+ const result = {
1120
+ type: "rmse",
1121
+ passed,
1122
+ message: passed ? `RMSE ${metrics.rmse.toFixed(4)} is below ${threshold}` : `RMSE ${metrics.rmse.toFixed(4)} exceeds threshold ${threshold}`,
1123
+ expected: threshold,
1124
+ actual: metrics.rmse,
1125
+ field: this.fieldName
1126
+ };
1127
+ this.assertions.push(result);
1128
+ chunkDFC6FRTG_cjs.recordAssertion(result);
1129
+ if (!passed) {
1130
+ throw new chunkDFC6FRTG_cjs.AssertionError(result.message, threshold, metrics.rmse, this.fieldName);
1131
+ }
1132
+ return this;
1133
+ }
1134
+ /**
1135
+ * Asserts that R-squared (coefficient of determination) is above a threshold.
1136
+ * R² measures how well the predictions explain the variance in expected values.
1137
+ * R² = 1.0 means perfect prediction, R² = 0 means prediction is no better than mean.
1138
+ * Requires numeric values in both actual and expected.
1139
+ *
1140
+ * @param threshold - Minimum required R² value (0-1)
1141
+ * @returns this for method chaining
1142
+ *
1143
+ * @example
1144
+ * expectStats(predictions, groundTruth)
1145
+ * .field("score")
1146
+ * .toHaveR2Above(0.8)
1147
+ */
1148
+ toHaveR2Above(threshold) {
1149
+ const { actual, expected } = this.validateRegressionInputs();
1150
+ const metrics = computeRegressionMetrics(actual, expected);
1151
+ const passed = metrics.r2 >= threshold;
1152
+ const result = {
1153
+ type: "r2",
1154
+ passed,
1155
+ message: passed ? `R\xB2 ${metrics.r2.toFixed(4)} is above ${threshold}` : `R\xB2 ${metrics.r2.toFixed(4)} is below threshold ${threshold}`,
1156
+ expected: threshold,
1157
+ actual: metrics.r2,
1158
+ field: this.fieldName
1159
+ };
1160
+ this.assertions.push(result);
1161
+ chunkDFC6FRTG_cjs.recordAssertion(result);
1162
+ if (!passed) {
1163
+ throw new chunkDFC6FRTG_cjs.AssertionError(result.message, threshold, metrics.r2, this.fieldName);
966
1164
  }
967
1165
  return this;
968
1166
  }
@@ -999,16 +1197,17 @@ function normalizeInput(input) {
999
1197
  expected: {}
1000
1198
  }));
1001
1199
  }
1002
- throw new Error("Invalid input to expectStats(): expected ModelRunResult, Prediction[], or AlignedRecord[]");
1200
+ throw new Error(
1201
+ "Invalid input to expectStats(): expected ModelRunResult, Prediction[], or AlignedRecord[]"
1202
+ );
1003
1203
  }
1004
- function expectStats(inputOrActual, expected) {
1204
+ function expectStats(inputOrActual, expected, options) {
1005
1205
  if (expected !== void 0) {
1006
1206
  if (!Array.isArray(inputOrActual)) {
1007
- throw new Error(
1008
- "When using two-argument expectStats(), first argument must be Prediction[]"
1009
- );
1207
+ throw new Error("When using two-argument expectStats(), first argument must be Prediction[]");
1010
1208
  }
1011
- const aligned2 = alignByKey(inputOrActual, expected);
1209
+ const alignOptions = options ? { idField: options.idField, strict: options.strict } : void 0;
1210
+ const aligned2 = alignByKey(inputOrActual, expected, alignOptions);
1012
1211
  return new ExpectStats(aligned2);
1013
1212
  }
1014
1213
  const aligned = normalizeInput(inputOrActual);
@@ -1041,63 +1240,63 @@ var ExpectStats = class {
1041
1240
 
1042
1241
  Object.defineProperty(exports, "AssertionError", {
1043
1242
  enumerable: true,
1044
- get: function () { return chunkHDJID3GC_cjs.AssertionError; }
1243
+ get: function () { return chunkDFC6FRTG_cjs.AssertionError; }
1045
1244
  });
1046
1245
  Object.defineProperty(exports, "ConfigurationError", {
1047
1246
  enumerable: true,
1048
- get: function () { return chunkHDJID3GC_cjs.ConfigurationError; }
1247
+ get: function () { return chunkDFC6FRTG_cjs.ConfigurationError; }
1049
1248
  });
1050
1249
  Object.defineProperty(exports, "ConsoleReporter", {
1051
1250
  enumerable: true,
1052
- get: function () { return chunkHDJID3GC_cjs.ConsoleReporter; }
1251
+ get: function () { return chunkDFC6FRTG_cjs.ConsoleReporter; }
1053
1252
  });
1054
1253
  Object.defineProperty(exports, "DatasetError", {
1055
1254
  enumerable: true,
1056
- get: function () { return chunkHDJID3GC_cjs.DatasetError; }
1255
+ get: function () { return chunkDFC6FRTG_cjs.DatasetError; }
1057
1256
  });
1058
1257
  Object.defineProperty(exports, "EvalSenseError", {
1059
1258
  enumerable: true,
1060
- get: function () { return chunkHDJID3GC_cjs.EvalSenseError; }
1259
+ get: function () { return chunkDFC6FRTG_cjs.EvalSenseError; }
1061
1260
  });
1062
1261
  Object.defineProperty(exports, "ExitCodes", {
1063
1262
  enumerable: true,
1064
- get: function () { return chunkHDJID3GC_cjs.ExitCodes; }
1263
+ get: function () { return chunkDFC6FRTG_cjs.ExitCodes; }
1065
1264
  });
1066
1265
  Object.defineProperty(exports, "IntegrityError", {
1067
1266
  enumerable: true,
1068
- get: function () { return chunkHDJID3GC_cjs.IntegrityError; }
1267
+ get: function () { return chunkDFC6FRTG_cjs.IntegrityError; }
1069
1268
  });
1070
1269
  Object.defineProperty(exports, "JsonReporter", {
1071
1270
  enumerable: true,
1072
- get: function () { return chunkHDJID3GC_cjs.JsonReporter; }
1271
+ get: function () { return chunkDFC6FRTG_cjs.JsonReporter; }
1073
1272
  });
1074
1273
  Object.defineProperty(exports, "TestExecutionError", {
1075
1274
  enumerable: true,
1076
- get: function () { return chunkHDJID3GC_cjs.TestExecutionError; }
1275
+ get: function () { return chunkDFC6FRTG_cjs.TestExecutionError; }
1077
1276
  });
1078
1277
  Object.defineProperty(exports, "buildConfusionMatrix", {
1079
1278
  enumerable: true,
1080
- get: function () { return chunkHDJID3GC_cjs.buildConfusionMatrix; }
1279
+ get: function () { return chunkDFC6FRTG_cjs.buildConfusionMatrix; }
1081
1280
  });
1082
1281
  Object.defineProperty(exports, "discoverEvalFiles", {
1083
1282
  enumerable: true,
1084
- get: function () { return chunkHDJID3GC_cjs.discoverEvalFiles; }
1283
+ get: function () { return chunkDFC6FRTG_cjs.discoverEvalFiles; }
1085
1284
  });
1086
1285
  Object.defineProperty(exports, "executeEvalFiles", {
1087
1286
  enumerable: true,
1088
- get: function () { return chunkHDJID3GC_cjs.executeEvalFiles; }
1287
+ get: function () { return chunkDFC6FRTG_cjs.executeEvalFiles; }
1089
1288
  });
1090
1289
  Object.defineProperty(exports, "formatConfusionMatrix", {
1091
1290
  enumerable: true,
1092
- get: function () { return chunkHDJID3GC_cjs.formatConfusionMatrix; }
1291
+ get: function () { return chunkDFC6FRTG_cjs.formatConfusionMatrix; }
1093
1292
  });
1094
1293
  Object.defineProperty(exports, "getExitCode", {
1095
1294
  enumerable: true,
1096
- get: function () { return chunkHDJID3GC_cjs.getExitCode; }
1295
+ get: function () { return chunkDFC6FRTG_cjs.getExitCode; }
1097
1296
  });
1098
1297
  Object.defineProperty(exports, "parseReport", {
1099
1298
  enumerable: true,
1100
- get: function () { return chunkHDJID3GC_cjs.parseReport; }
1299
+ get: function () { return chunkDFC6FRTG_cjs.parseReport; }
1101
1300
  });
1102
1301
  exports.afterAll = afterAll;
1103
1302
  exports.afterEach = afterEach;