evalsense 0.3.2 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/README.md +235 -98
  2. package/dist/{chunk-BFGA2NUB.cjs → chunk-4BKZPVY4.cjs} +13 -6
  3. package/dist/chunk-4BKZPVY4.cjs.map +1 -0
  4. package/dist/{chunk-IYLSY7NX.js → chunk-IUVDDMJ3.js} +13 -6
  5. package/dist/chunk-IUVDDMJ3.js.map +1 -0
  6. package/dist/chunk-NCCQRZ2Y.cjs +1141 -0
  7. package/dist/chunk-NCCQRZ2Y.cjs.map +1 -0
  8. package/dist/chunk-TDGWDK2L.js +1108 -0
  9. package/dist/chunk-TDGWDK2L.js.map +1 -0
  10. package/dist/cli.cjs +11 -11
  11. package/dist/cli.js +1 -1
  12. package/dist/index-CATqAHNK.d.cts +416 -0
  13. package/dist/index-CoMpaW-K.d.ts +416 -0
  14. package/dist/index.cjs +507 -580
  15. package/dist/index.cjs.map +1 -1
  16. package/dist/index.d.cts +210 -161
  17. package/dist/index.d.ts +210 -161
  18. package/dist/index.js +455 -524
  19. package/dist/index.js.map +1 -1
  20. package/dist/metrics/index.cjs +103 -342
  21. package/dist/metrics/index.cjs.map +1 -1
  22. package/dist/metrics/index.d.cts +260 -31
  23. package/dist/metrics/index.d.ts +260 -31
  24. package/dist/metrics/index.js +24 -312
  25. package/dist/metrics/index.js.map +1 -1
  26. package/dist/metrics/opinionated/index.cjs +5 -5
  27. package/dist/metrics/opinionated/index.d.cts +2 -163
  28. package/dist/metrics/opinionated/index.d.ts +2 -163
  29. package/dist/metrics/opinionated/index.js +1 -1
  30. package/dist/{types-C71p0wzM.d.cts → types-D0hzfyKm.d.cts} +1 -13
  31. package/dist/{types-C71p0wzM.d.ts → types-D0hzfyKm.d.ts} +1 -13
  32. package/package.json +1 -1
  33. package/dist/chunk-BFGA2NUB.cjs.map +0 -1
  34. package/dist/chunk-IYLSY7NX.js.map +0 -1
  35. package/dist/chunk-RZFLCWTW.cjs +0 -942
  36. package/dist/chunk-RZFLCWTW.cjs.map +0 -1
  37. package/dist/chunk-Z3U6AUWX.js +0 -925
  38. package/dist/chunk-Z3U6AUWX.js.map +0 -1
package/dist/index.cjs CHANGED
@@ -1,13 +1,11 @@
1
1
  'use strict';
2
2
 
3
- var chunkBFGA2NUB_cjs = require('./chunk-BFGA2NUB.cjs');
3
+ var chunk4BKZPVY4_cjs = require('./chunk-4BKZPVY4.cjs');
4
4
  require('./chunk-JEQ2X3Z6.cjs');
5
- var fs = require('fs');
6
- var path = require('path');
7
5
 
8
6
  // src/core/describe.ts
9
7
  function describe(name, fn) {
10
- const parentSuite = chunkBFGA2NUB_cjs.getCurrentSuite();
8
+ const parentSuite = chunk4BKZPVY4_cjs.getCurrentSuite();
11
9
  const suite = {
12
10
  name,
13
11
  tests: [],
@@ -16,37 +14,37 @@ function describe(name, fn) {
16
14
  beforeEach: [],
17
15
  afterEach: []
18
16
  };
19
- chunkBFGA2NUB_cjs.setCurrentSuite(suite);
17
+ chunk4BKZPVY4_cjs.setCurrentSuite(suite);
20
18
  try {
21
19
  fn();
22
20
  } finally {
23
- chunkBFGA2NUB_cjs.setCurrentSuite(parentSuite);
21
+ chunk4BKZPVY4_cjs.setCurrentSuite(parentSuite);
24
22
  }
25
- chunkBFGA2NUB_cjs.addSuite(suite);
23
+ chunk4BKZPVY4_cjs.addSuite(suite);
26
24
  }
27
25
  function beforeAll(fn) {
28
- const suite = chunkBFGA2NUB_cjs.getCurrentSuite();
26
+ const suite = chunk4BKZPVY4_cjs.getCurrentSuite();
29
27
  if (!suite) {
30
28
  throw new Error("beforeAll() must be called inside a describe() block");
31
29
  }
32
30
  suite.beforeAll?.push(fn);
33
31
  }
34
32
  function afterAll(fn) {
35
- const suite = chunkBFGA2NUB_cjs.getCurrentSuite();
33
+ const suite = chunk4BKZPVY4_cjs.getCurrentSuite();
36
34
  if (!suite) {
37
35
  throw new Error("afterAll() must be called inside a describe() block");
38
36
  }
39
37
  suite.afterAll?.push(fn);
40
38
  }
41
39
  function beforeEach(fn) {
42
- const suite = chunkBFGA2NUB_cjs.getCurrentSuite();
40
+ const suite = chunk4BKZPVY4_cjs.getCurrentSuite();
43
41
  if (!suite) {
44
42
  throw new Error("beforeEach() must be called inside a describe() block");
45
43
  }
46
44
  suite.beforeEach?.push(fn);
47
45
  }
48
46
  function afterEach(fn) {
49
- const suite = chunkBFGA2NUB_cjs.getCurrentSuite();
47
+ const suite = chunk4BKZPVY4_cjs.getCurrentSuite();
50
48
  if (!suite) {
51
49
  throw new Error("afterEach() must be called inside a describe() block");
52
50
  }
@@ -55,7 +53,7 @@ function afterEach(fn) {
55
53
 
56
54
  // src/core/eval-test.ts
57
55
  function evalTest(name, fn) {
58
- const currentSuite = chunkBFGA2NUB_cjs.getCurrentSuite();
56
+ const currentSuite = chunk4BKZPVY4_cjs.getCurrentSuite();
59
57
  if (!currentSuite) {
60
58
  throw new Error("evalTest() must be called inside a describe() block");
61
59
  }
@@ -63,12 +61,12 @@ function evalTest(name, fn) {
63
61
  name,
64
62
  fn
65
63
  };
66
- chunkBFGA2NUB_cjs.addTestToCurrentSuite(test2);
64
+ chunk4BKZPVY4_cjs.addTestToCurrentSuite(test2);
67
65
  }
68
66
  var test = evalTest;
69
67
  var it = evalTest;
70
68
  function evalTestSkip(name, _fn) {
71
- const currentSuite = chunkBFGA2NUB_cjs.getCurrentSuite();
69
+ const currentSuite = chunk4BKZPVY4_cjs.getCurrentSuite();
72
70
  if (!currentSuite) {
73
71
  throw new Error("evalTest.skip() must be called inside a describe() block");
74
72
  }
@@ -77,10 +75,10 @@ function evalTestSkip(name, _fn) {
77
75
  fn: async () => {
78
76
  }
79
77
  };
80
- chunkBFGA2NUB_cjs.addTestToCurrentSuite(test2);
78
+ chunk4BKZPVY4_cjs.addTestToCurrentSuite(test2);
81
79
  }
82
80
  function evalTestOnly(name, fn) {
83
- const currentSuite = chunkBFGA2NUB_cjs.getCurrentSuite();
81
+ const currentSuite = chunk4BKZPVY4_cjs.getCurrentSuite();
84
82
  if (!currentSuite) {
85
83
  throw new Error("evalTest.only() must be called inside a describe() block");
86
84
  }
@@ -88,140 +86,10 @@ function evalTestOnly(name, fn) {
88
86
  name: `[ONLY] ${name}`,
89
87
  fn
90
88
  };
91
- chunkBFGA2NUB_cjs.addTestToCurrentSuite(test2);
89
+ chunk4BKZPVY4_cjs.addTestToCurrentSuite(test2);
92
90
  }
93
91
  evalTest.skip = evalTestSkip;
94
92
  evalTest.only = evalTestOnly;
95
- function loadDataset(path$1) {
96
- const absolutePath = path.resolve(process.cwd(), path$1);
97
- const ext = path.extname(absolutePath).toLowerCase();
98
- let records;
99
- try {
100
- const content = fs.readFileSync(absolutePath, "utf-8");
101
- if (ext === ".ndjson" || ext === ".jsonl") {
102
- records = parseNDJSON(content);
103
- } else if (ext === ".json") {
104
- records = parseJSON(content);
105
- } else {
106
- throw new chunkBFGA2NUB_cjs.DatasetError(
107
- `Unsupported file format: ${ext}. Use .json, .ndjson, or .jsonl`,
108
- path$1
109
- );
110
- }
111
- } catch (error) {
112
- if (error instanceof chunkBFGA2NUB_cjs.DatasetError) {
113
- throw error;
114
- }
115
- const message = error instanceof Error ? error.message : String(error);
116
- throw new chunkBFGA2NUB_cjs.DatasetError(`Failed to load dataset from ${path$1}: ${message}`, path$1);
117
- }
118
- return {
119
- records,
120
- metadata: {
121
- source: path$1,
122
- count: records.length,
123
- loadedAt: /* @__PURE__ */ new Date()
124
- }
125
- };
126
- }
127
- function parseJSON(content) {
128
- const parsed = JSON.parse(content);
129
- if (!Array.isArray(parsed)) {
130
- throw new chunkBFGA2NUB_cjs.DatasetError("JSON dataset must be an array of records");
131
- }
132
- return parsed;
133
- }
134
- function parseNDJSON(content) {
135
- const lines = content.split("\n").filter((line) => line.trim() !== "");
136
- const records = [];
137
- for (let i = 0; i < lines.length; i++) {
138
- const line = lines[i];
139
- if (line === void 0) continue;
140
- try {
141
- records.push(JSON.parse(line));
142
- } catch {
143
- throw new chunkBFGA2NUB_cjs.DatasetError(`Invalid JSON at line ${i + 1} in NDJSON file`);
144
- }
145
- }
146
- return records;
147
- }
148
- function createDataset(records, source = "inline") {
149
- return {
150
- records,
151
- metadata: {
152
- source,
153
- count: records.length,
154
- loadedAt: /* @__PURE__ */ new Date()
155
- }
156
- };
157
- }
158
-
159
- // src/dataset/run-model.ts
160
- async function runModel(dataset, modelFn) {
161
- const startTime = Date.now();
162
- const predictions = [];
163
- const aligned = [];
164
- for (const record of dataset.records) {
165
- const id = getRecordId(record);
166
- const prediction = await modelFn(record);
167
- if (prediction.id !== id) {
168
- throw new chunkBFGA2NUB_cjs.DatasetError(
169
- `Prediction ID mismatch: expected "${id}", got "${prediction.id}". Model function must return the same ID as the input record.`
170
- );
171
- }
172
- predictions.push(prediction);
173
- aligned.push({
174
- id,
175
- actual: { ...prediction },
176
- expected: { ...record }
177
- });
178
- }
179
- return {
180
- predictions,
181
- aligned,
182
- duration: Date.now() - startTime
183
- };
184
- }
185
- function getRecordId(record) {
186
- const id = record.id ?? record._id;
187
- if (id === void 0 || id === null) {
188
- throw new chunkBFGA2NUB_cjs.DatasetError('Dataset records must have an "id" or "_id" field for alignment');
189
- }
190
- return String(id);
191
- }
192
- async function runModelParallel(dataset, modelFn, concurrency = 10) {
193
- const startTime = Date.now();
194
- const results = [];
195
- for (let i = 0; i < dataset.records.length; i += concurrency) {
196
- const batch = dataset.records.slice(i, i + concurrency);
197
- const batchResults = await Promise.all(
198
- batch.map(async (record) => {
199
- const prediction = await modelFn(record);
200
- return { prediction, record };
201
- })
202
- );
203
- results.push(...batchResults);
204
- }
205
- const predictions = [];
206
- const aligned = [];
207
- for (const { prediction, record } of results) {
208
- const id = getRecordId(record);
209
- if (prediction.id !== id) {
210
- throw new chunkBFGA2NUB_cjs.DatasetError(`Prediction ID mismatch: expected "${id}", got "${prediction.id}".`);
211
- }
212
- predictions.push(prediction);
213
- aligned.push({
214
- id,
215
- actual: { ...prediction },
216
- expected: { ...record }
217
- });
218
- }
219
- return {
220
- predictions,
221
- aligned,
222
- duration: Date.now() - startTime
223
- };
224
- }
225
93
 
226
94
  // src/dataset/alignment.ts
227
95
  function alignByKey(predictions, expected, options = {}) {
@@ -232,7 +100,7 @@ function alignByKey(predictions, expected, options = {}) {
232
100
  for (const record of expected) {
233
101
  const id = String(record[expIdField] ?? record._id);
234
102
  if (!id || id === "undefined") {
235
- throw new chunkBFGA2NUB_cjs.IntegrityError(
103
+ throw new chunk4BKZPVY4_cjs.IntegrityError(
236
104
  `Expected record missing ${expIdField} field: ${JSON.stringify(record)}`
237
105
  );
238
106
  }
@@ -243,7 +111,7 @@ function alignByKey(predictions, expected, options = {}) {
243
111
  for (const prediction of predictions) {
244
112
  const id = String(prediction[predIdField]);
245
113
  if (!id || id === "undefined") {
246
- throw new chunkBFGA2NUB_cjs.IntegrityError(
114
+ throw new chunk4BKZPVY4_cjs.IntegrityError(
247
115
  `Prediction missing ${predIdField} field: ${JSON.stringify(prediction)}`
248
116
  );
249
117
  }
@@ -267,7 +135,7 @@ function alignByKey(predictions, expected, options = {}) {
267
135
  }
268
136
  }
269
137
  if (strict && missingIds.length > 0) {
270
- throw new chunkBFGA2NUB_cjs.IntegrityError(
138
+ throw new chunk4BKZPVY4_cjs.IntegrityError(
271
139
  `${missingIds.length} prediction(s) have no matching expected record`,
272
140
  missingIds
273
141
  );
@@ -294,14 +162,14 @@ function filterComplete(aligned, field) {
294
162
  }
295
163
 
296
164
  // src/dataset/integrity.ts
297
- function checkIntegrity(dataset, options = {}) {
165
+ function checkIntegrity(records, options = {}) {
298
166
  const { requiredFields = [], throwOnFailure = false } = options;
299
167
  const seenIds = /* @__PURE__ */ new Map();
300
168
  const missingIds = [];
301
169
  const duplicateIds = [];
302
170
  const missingFields = [];
303
- for (let i = 0; i < dataset.records.length; i++) {
304
- const record = dataset.records[i];
171
+ for (let i = 0; i < records.length; i++) {
172
+ const record = records[i];
305
173
  if (!record) continue;
306
174
  const id = record.id ?? record._id;
307
175
  if (id === void 0 || id === null) {
@@ -328,7 +196,7 @@ function checkIntegrity(dataset, options = {}) {
328
196
  const valid = missingIds.length === 0 && duplicateIds.length === 0 && missingFields.length === 0;
329
197
  const result = {
330
198
  valid,
331
- totalRecords: dataset.records.length,
199
+ totalRecords: records.length,
332
200
  missingIds,
333
201
  duplicateIds,
334
202
  missingFields
@@ -346,7 +214,7 @@ function checkIntegrity(dataset, options = {}) {
346
214
  if (missingFields.length > 0) {
347
215
  issues.push(`${missingFields.length} record(s) missing required fields`);
348
216
  }
349
- throw new chunkBFGA2NUB_cjs.IntegrityError(`Dataset integrity check failed: ${issues.join("; ")}`);
217
+ throw new chunk4BKZPVY4_cjs.IntegrityError(`Dataset integrity check failed: ${issues.join("; ")}`);
350
218
  }
351
219
  return result;
352
220
  }
@@ -364,7 +232,7 @@ function validatePredictions(predictions, expectedIds) {
364
232
 
365
233
  // src/statistics/classification.ts
366
234
  function computeClassificationMetrics(actual, expected) {
367
- const confusionMatrix = chunkBFGA2NUB_cjs.buildConfusionMatrix(actual, expected);
235
+ const confusionMatrix = chunk4BKZPVY4_cjs.buildConfusionMatrix(actual, expected);
368
236
  return computeMetricsFromMatrix(confusionMatrix);
369
237
  }
370
238
  function computeMetricsFromMatrix(cm) {
@@ -372,10 +240,10 @@ function computeMetricsFromMatrix(cm) {
372
240
  let totalSupport = 0;
373
241
  let correctPredictions = 0;
374
242
  for (const label of cm.labels) {
375
- const tp = chunkBFGA2NUB_cjs.getTruePositives(cm, label);
376
- const fp = chunkBFGA2NUB_cjs.getFalsePositives(cm, label);
377
- const fn = chunkBFGA2NUB_cjs.getFalseNegatives(cm, label);
378
- const support = chunkBFGA2NUB_cjs.getSupport(cm, label);
243
+ const tp = chunk4BKZPVY4_cjs.getTruePositives(cm, label);
244
+ const fp = chunk4BKZPVY4_cjs.getFalsePositives(cm, label);
245
+ const fn = chunk4BKZPVY4_cjs.getFalseNegatives(cm, label);
246
+ const support = chunk4BKZPVY4_cjs.getSupport(cm, label);
379
247
  const precision = tp + fp > 0 ? tp / (tp + fp) : 0;
380
248
  const recall = tp + fn > 0 ? tp / (tp + fn) : 0;
381
249
  const f1 = precision + recall > 0 ? 2 * precision * recall / (precision + recall) : 0;
@@ -404,15 +272,15 @@ function computeMetricsFromMatrix(cm) {
404
272
  };
405
273
  }
406
274
  function computePrecision(actual, expected, targetClass) {
407
- const cm = chunkBFGA2NUB_cjs.buildConfusionMatrix(actual, expected);
408
- const tp = chunkBFGA2NUB_cjs.getTruePositives(cm, targetClass);
409
- const fp = chunkBFGA2NUB_cjs.getFalsePositives(cm, targetClass);
275
+ const cm = chunk4BKZPVY4_cjs.buildConfusionMatrix(actual, expected);
276
+ const tp = chunk4BKZPVY4_cjs.getTruePositives(cm, targetClass);
277
+ const fp = chunk4BKZPVY4_cjs.getFalsePositives(cm, targetClass);
410
278
  return tp + fp > 0 ? tp / (tp + fp) : 0;
411
279
  }
412
280
  function computeRecall(actual, expected, targetClass) {
413
- const cm = chunkBFGA2NUB_cjs.buildConfusionMatrix(actual, expected);
414
- const tp = chunkBFGA2NUB_cjs.getTruePositives(cm, targetClass);
415
- const fn = chunkBFGA2NUB_cjs.getFalseNegatives(cm, targetClass);
281
+ const cm = chunk4BKZPVY4_cjs.buildConfusionMatrix(actual, expected);
282
+ const tp = chunk4BKZPVY4_cjs.getTruePositives(cm, targetClass);
283
+ const fn = chunk4BKZPVY4_cjs.getFalseNegatives(cm, targetClass);
416
284
  return tp + fn > 0 ? tp / (tp + fn) : 0;
417
285
  }
418
286
  function computeF1(actual, expected, targetClass) {
@@ -521,6 +389,91 @@ function calculatePercentageAbove(values, threshold) {
521
389
  return countAbove / values.length;
522
390
  }
523
391
 
392
+ // src/assertions/metric-matcher.ts
393
+ var MetricMatcher = class {
394
+ context;
395
+ constructor(context) {
396
+ this.context = context;
397
+ }
398
+ formatMetricValue(value) {
399
+ if (this.context.formatValue) {
400
+ return this.context.formatValue(value);
401
+ }
402
+ if (value >= 0 && value <= 1) {
403
+ return `${(value * 100).toFixed(1)}%`;
404
+ }
405
+ return value.toFixed(4);
406
+ }
407
+ createAssertion(operator, threshold, passed) {
408
+ const { metricName, metricValue, fieldName, targetClass } = this.context;
409
+ const formattedActual = this.formatMetricValue(metricValue);
410
+ const formattedThreshold = this.formatMetricValue(threshold);
411
+ const classInfo = targetClass ? ` for "${targetClass}"` : "";
412
+ const operatorText = {
413
+ ">=": "at least",
414
+ ">": "above",
415
+ "<=": "at most",
416
+ "<": "below",
417
+ "===": "equal to"
418
+ }[operator];
419
+ const message = passed ? `${metricName}${classInfo} ${formattedActual} is ${operatorText} ${formattedThreshold}` : `${metricName}${classInfo} ${formattedActual} is not ${operatorText} ${formattedThreshold}`;
420
+ return {
421
+ type: metricName.toLowerCase().replace(/\s+/g, "").replace(/²/g, "2"),
422
+ passed,
423
+ message,
424
+ expected: threshold,
425
+ actual: metricValue,
426
+ field: fieldName,
427
+ class: targetClass
428
+ };
429
+ }
430
+ recordAndReturn(result) {
431
+ this.context.assertions.push(result);
432
+ chunk4BKZPVY4_cjs.recordAssertion(result);
433
+ return this.context.parent;
434
+ }
435
+ /**
436
+ * Assert that the metric is greater than or equal to the threshold (>=)
437
+ */
438
+ toBeAtLeast(threshold) {
439
+ const passed = this.context.metricValue >= threshold;
440
+ const result = this.createAssertion(">=", threshold, passed);
441
+ return this.recordAndReturn(result);
442
+ }
443
+ /**
444
+ * Assert that the metric is strictly greater than the threshold (>)
445
+ */
446
+ toBeAbove(threshold) {
447
+ const passed = this.context.metricValue > threshold;
448
+ const result = this.createAssertion(">", threshold, passed);
449
+ return this.recordAndReturn(result);
450
+ }
451
+ /**
452
+ * Assert that the metric is less than or equal to the threshold (<=)
453
+ */
454
+ toBeAtMost(threshold) {
455
+ const passed = this.context.metricValue <= threshold;
456
+ const result = this.createAssertion("<=", threshold, passed);
457
+ return this.recordAndReturn(result);
458
+ }
459
+ /**
460
+ * Assert that the metric is strictly less than the threshold (<)
461
+ */
462
+ toBeBelow(threshold) {
463
+ const passed = this.context.metricValue < threshold;
464
+ const result = this.createAssertion("<", threshold, passed);
465
+ return this.recordAndReturn(result);
466
+ }
467
+ /**
468
+ * Assert that the metric equals the expected value (with optional tolerance for floats)
469
+ */
470
+ toEqual(expected, tolerance = 1e-9) {
471
+ const passed = Math.abs(this.context.metricValue - expected) <= tolerance;
472
+ const result = this.createAssertion("===", expected, passed);
473
+ return this.recordAndReturn(result);
474
+ }
475
+ };
476
+
524
477
  // src/assertions/binarize.ts
525
478
  var BinarizeSelector = class {
526
479
  fieldName;
@@ -552,149 +505,127 @@ var BinarizeSelector = class {
552
505
  }
553
506
  }
554
507
  }
508
+ // ============================================================================
509
+ // Classification Metric Getters
510
+ // ============================================================================
555
511
  /**
556
- * Asserts that accuracy is above a threshold
512
+ * Access accuracy metric for assertions
513
+ * @example
514
+ * expectStats(predictions, groundTruth)
515
+ * .field("score")
516
+ * .binarize(0.5)
517
+ * .accuracy.toBeAtLeast(0.8)
557
518
  */
558
- toHaveAccuracyAbove(threshold) {
519
+ get accuracy() {
559
520
  const metrics = computeClassificationMetrics(this.binaryActual, this.binaryExpected);
560
- const passed = metrics.accuracy >= threshold;
561
- const result = {
562
- type: "accuracy",
563
- passed,
564
- message: passed ? `Accuracy ${(metrics.accuracy * 100).toFixed(1)}% is above ${(threshold * 100).toFixed(1)}% (binarized at ${this.threshold})` : `Accuracy ${(metrics.accuracy * 100).toFixed(1)}% is below threshold ${(threshold * 100).toFixed(1)}% (binarized at ${this.threshold})`,
565
- expected: threshold,
566
- actual: metrics.accuracy,
567
- field: this.fieldName
568
- };
569
- this.assertions.push(result);
570
- chunkBFGA2NUB_cjs.recordAssertion(result);
571
- return this;
521
+ return new MetricMatcher({
522
+ parent: this,
523
+ metricName: "Accuracy",
524
+ metricValue: metrics.accuracy,
525
+ fieldName: this.fieldName,
526
+ assertions: this.assertions
527
+ });
572
528
  }
573
529
  /**
574
- * Asserts that precision is above a threshold
575
- * @param classOrThreshold - Either the class (true/false) or threshold
576
- * @param threshold - Threshold when class is specified
530
+ * Access F1 score metric for assertions (macro average)
531
+ * @example
532
+ * expectStats(predictions, groundTruth)
533
+ * .field("score")
534
+ * .binarize(0.5)
535
+ * .f1.toBeAtLeast(0.75)
577
536
  */
578
- toHavePrecisionAbove(classOrThreshold, threshold) {
537
+ get f1() {
579
538
  const metrics = computeClassificationMetrics(this.binaryActual, this.binaryExpected);
580
- let actualPrecision;
581
- let targetClass;
582
- let actualThreshold;
583
- if (typeof classOrThreshold === "number") {
584
- actualPrecision = metrics.macroAvg.precision;
585
- actualThreshold = classOrThreshold;
586
- } else {
587
- targetClass = String(classOrThreshold);
588
- actualThreshold = threshold;
589
- const classMetrics = metrics.perClass[targetClass];
590
- if (!classMetrics) {
591
- throw new chunkBFGA2NUB_cjs.AssertionError(
592
- `Class "${targetClass}" not found in binarized predictions`,
593
- targetClass,
594
- Object.keys(metrics.perClass),
595
- this.fieldName
596
- );
597
- }
598
- actualPrecision = classMetrics.precision;
599
- }
600
- const passed = actualPrecision >= actualThreshold;
601
- const result = {
602
- type: "precision",
603
- passed,
604
- message: passed ? `Precision${targetClass ? ` for ${targetClass}` : ""} ${(actualPrecision * 100).toFixed(1)}% is above ${(actualThreshold * 100).toFixed(1)}%` : `Precision${targetClass ? ` for ${targetClass}` : ""} ${(actualPrecision * 100).toFixed(1)}% is below threshold ${(actualThreshold * 100).toFixed(1)}%`,
605
- expected: actualThreshold,
606
- actual: actualPrecision,
607
- field: this.fieldName,
608
- class: targetClass
609
- };
610
- this.assertions.push(result);
611
- chunkBFGA2NUB_cjs.recordAssertion(result);
612
- return this;
539
+ return new MetricMatcher({
540
+ parent: this,
541
+ metricName: "F1",
542
+ metricValue: metrics.macroAvg.f1,
543
+ fieldName: this.fieldName,
544
+ assertions: this.assertions
545
+ });
613
546
  }
614
547
  /**
615
- * Asserts that recall is above a threshold
616
- * @param classOrThreshold - Either the class (true/false) or threshold
617
- * @param threshold - Threshold when class is specified
548
+ * Access precision metric for assertions
549
+ * @param targetClass - Optional boolean class (true/false). If omitted, uses macro average
550
+ * @example
551
+ * expectStats(predictions, groundTruth)
552
+ * .field("score")
553
+ * .binarize(0.5)
554
+ * .precision(true).toBeAtLeast(0.7)
618
555
  */
619
- toHaveRecallAbove(classOrThreshold, threshold) {
556
+ precision(targetClass) {
620
557
  const metrics = computeClassificationMetrics(this.binaryActual, this.binaryExpected);
621
- let actualRecall;
622
- let targetClass;
623
- let actualThreshold;
624
- if (typeof classOrThreshold === "number") {
625
- actualRecall = metrics.macroAvg.recall;
626
- actualThreshold = classOrThreshold;
558
+ let metricValue;
559
+ let classKey;
560
+ if (targetClass === void 0) {
561
+ metricValue = metrics.macroAvg.precision;
627
562
  } else {
628
- targetClass = String(classOrThreshold);
629
- actualThreshold = threshold;
630
- const classMetrics = metrics.perClass[targetClass];
563
+ classKey = String(targetClass);
564
+ const classMetrics = metrics.perClass[classKey];
631
565
  if (!classMetrics) {
632
- throw new chunkBFGA2NUB_cjs.AssertionError(
633
- `Class "${targetClass}" not found in binarized predictions`,
634
- targetClass,
566
+ throw new chunk4BKZPVY4_cjs.AssertionError(
567
+ `Class "${classKey}" not found in binarized predictions`,
568
+ classKey,
635
569
  Object.keys(metrics.perClass),
636
570
  this.fieldName
637
571
  );
638
572
  }
639
- actualRecall = classMetrics.recall;
573
+ metricValue = classMetrics.precision;
640
574
  }
641
- const passed = actualRecall >= actualThreshold;
642
- const result = {
643
- type: "recall",
644
- passed,
645
- message: passed ? `Recall${targetClass ? ` for ${targetClass}` : ""} ${(actualRecall * 100).toFixed(1)}% is above ${(actualThreshold * 100).toFixed(1)}%` : `Recall${targetClass ? ` for ${targetClass}` : ""} ${(actualRecall * 100).toFixed(1)}% is below threshold ${(actualThreshold * 100).toFixed(1)}%`,
646
- expected: actualThreshold,
647
- actual: actualRecall,
648
- field: this.fieldName,
649
- class: targetClass
650
- };
651
- this.assertions.push(result);
652
- chunkBFGA2NUB_cjs.recordAssertion(result);
653
- return this;
575
+ return new MetricMatcher({
576
+ parent: this,
577
+ metricName: "Precision",
578
+ metricValue,
579
+ fieldName: this.fieldName,
580
+ targetClass: classKey,
581
+ assertions: this.assertions
582
+ });
654
583
  }
655
584
  /**
656
- * Asserts that F1 score is above a threshold
585
+ * Access recall metric for assertions
586
+ * @param targetClass - Optional boolean class (true/false). If omitted, uses macro average
587
+ * @example
588
+ * expectStats(predictions, groundTruth)
589
+ * .field("score")
590
+ * .binarize(0.5)
591
+ * .recall(true).toBeAtLeast(0.7)
657
592
  */
658
- toHaveF1Above(classOrThreshold, threshold) {
593
+ recall(targetClass) {
659
594
  const metrics = computeClassificationMetrics(this.binaryActual, this.binaryExpected);
660
- let actualF1;
661
- let targetClass;
662
- let actualThreshold;
663
- if (typeof classOrThreshold === "number") {
664
- actualF1 = metrics.macroAvg.f1;
665
- actualThreshold = classOrThreshold;
595
+ let metricValue;
596
+ let classKey;
597
+ if (targetClass === void 0) {
598
+ metricValue = metrics.macroAvg.recall;
666
599
  } else {
667
- targetClass = String(classOrThreshold);
668
- actualThreshold = threshold;
669
- const classMetrics = metrics.perClass[targetClass];
600
+ classKey = String(targetClass);
601
+ const classMetrics = metrics.perClass[classKey];
670
602
  if (!classMetrics) {
671
- throw new chunkBFGA2NUB_cjs.AssertionError(
672
- `Class "${targetClass}" not found in binarized predictions`,
673
- targetClass,
603
+ throw new chunk4BKZPVY4_cjs.AssertionError(
604
+ `Class "${classKey}" not found in binarized predictions`,
605
+ classKey,
674
606
  Object.keys(metrics.perClass),
675
607
  this.fieldName
676
608
  );
677
609
  }
678
- actualF1 = classMetrics.f1;
610
+ metricValue = classMetrics.recall;
679
611
  }
680
- const passed = actualF1 >= actualThreshold;
681
- const result = {
682
- type: "f1",
683
- passed,
684
- message: passed ? `F1${targetClass ? ` for ${targetClass}` : ""} ${(actualF1 * 100).toFixed(1)}% is above ${(actualThreshold * 100).toFixed(1)}%` : `F1${targetClass ? ` for ${targetClass}` : ""} ${(actualF1 * 100).toFixed(1)}% is below threshold ${(actualThreshold * 100).toFixed(1)}%`,
685
- expected: actualThreshold,
686
- actual: actualF1,
687
- field: this.fieldName,
688
- class: targetClass
689
- };
690
- this.assertions.push(result);
691
- chunkBFGA2NUB_cjs.recordAssertion(result);
692
- return this;
612
+ return new MetricMatcher({
613
+ parent: this,
614
+ metricName: "Recall",
615
+ metricValue,
616
+ fieldName: this.fieldName,
617
+ targetClass: classKey,
618
+ assertions: this.assertions
619
+ });
693
620
  }
621
+ // ============================================================================
622
+ // Display Methods
623
+ // ============================================================================
694
624
  /**
695
- * Includes the confusion matrix in the report
625
+ * Displays the confusion matrix in the report
626
+ * This is not an assertion - it always passes and just records the matrix for display
696
627
  */
697
- toHaveConfusionMatrix() {
628
+ displayConfusionMatrix() {
698
629
  const metrics = computeClassificationMetrics(this.binaryActual, this.binaryExpected);
699
630
  const fieldResult = {
700
631
  field: this.fieldName,
@@ -702,7 +633,7 @@ var BinarizeSelector = class {
702
633
  binarized: true,
703
634
  binarizeThreshold: this.threshold
704
635
  };
705
- chunkBFGA2NUB_cjs.recordFieldMetrics(fieldResult);
636
+ chunk4BKZPVY4_cjs.recordFieldMetrics(fieldResult);
706
637
  const result = {
707
638
  type: "confusionMatrix",
708
639
  passed: true,
@@ -710,9 +641,12 @@ var BinarizeSelector = class {
710
641
  field: this.fieldName
711
642
  };
712
643
  this.assertions.push(result);
713
- chunkBFGA2NUB_cjs.recordAssertion(result);
644
+ chunk4BKZPVY4_cjs.recordAssertion(result);
714
645
  return this;
715
646
  }
647
+ // ============================================================================
648
+ // Utility Methods
649
+ // ============================================================================
716
650
  /**
717
651
  * Gets computed metrics
718
652
  */
@@ -727,6 +661,73 @@ var BinarizeSelector = class {
727
661
  }
728
662
  };
729
663
 
664
+ // src/assertions/percentage-matcher.ts
665
+ var PercentageMatcher = class {
666
+ context;
667
+ constructor(context) {
668
+ this.context = context;
669
+ }
670
+ formatPercentage(value) {
671
+ return `${(value * 100).toFixed(1)}%`;
672
+ }
673
+ createAssertion(operator, percentageThreshold, passed) {
674
+ const { fieldName, valueThreshold, direction, actualPercentage } = this.context;
675
+ const operatorText = {
676
+ ">=": "at least",
677
+ ">": "above",
678
+ "<=": "at most",
679
+ "<": "below"
680
+ }[operator];
681
+ const directionText = direction === "above" ? "above" : "below or equal to";
682
+ const message = passed ? `${this.formatPercentage(actualPercentage)} of '${fieldName}' values are ${directionText} ${valueThreshold} (expected ${operatorText} ${this.formatPercentage(percentageThreshold)})` : `Only ${this.formatPercentage(actualPercentage)} of '${fieldName}' values are ${directionText} ${valueThreshold} (expected ${operatorText} ${this.formatPercentage(percentageThreshold)})`;
683
+ return {
684
+ type: direction === "above" ? "percentageAbove" : "percentageBelow",
685
+ passed,
686
+ message,
687
+ expected: percentageThreshold,
688
+ actual: actualPercentage,
689
+ field: fieldName
690
+ };
691
+ }
692
+ recordAndReturn(result) {
693
+ this.context.assertions.push(result);
694
+ chunk4BKZPVY4_cjs.recordAssertion(result);
695
+ return this.context.parent;
696
+ }
697
+ /**
698
+ * Assert that the percentage is greater than or equal to the threshold (>=)
699
+ */
700
+ toBeAtLeast(percentageThreshold) {
701
+ const passed = this.context.actualPercentage >= percentageThreshold;
702
+ const result = this.createAssertion(">=", percentageThreshold, passed);
703
+ return this.recordAndReturn(result);
704
+ }
705
+ /**
706
+ * Assert that the percentage is strictly greater than the threshold (>)
707
+ */
708
+ toBeAbove(percentageThreshold) {
709
+ const passed = this.context.actualPercentage > percentageThreshold;
710
+ const result = this.createAssertion(">", percentageThreshold, passed);
711
+ return this.recordAndReturn(result);
712
+ }
713
+ /**
714
+ * Assert that the percentage is less than or equal to the threshold (<=)
715
+ */
716
+ toBeAtMost(percentageThreshold) {
717
+ const passed = this.context.actualPercentage <= percentageThreshold;
718
+ const result = this.createAssertion("<=", percentageThreshold, passed);
719
+ return this.recordAndReturn(result);
720
+ }
721
+ /**
722
+ * Assert that the percentage is strictly less than the threshold (<)
723
+ */
724
+ toBeBelow(percentageThreshold) {
725
+ const passed = this.context.actualPercentage < percentageThreshold;
726
+ const result = this.createAssertion("<", percentageThreshold, passed);
727
+ return this.recordAndReturn(result);
728
+ }
729
+ };
730
+
730
731
  // src/assertions/field-selector.ts
731
732
  var FieldSelector = class {
732
733
  aligned;
@@ -754,7 +755,7 @@ var FieldSelector = class {
754
755
  validateGroundTruth() {
755
756
  const hasExpected = this.expectedValues.some((v) => v !== void 0 && v !== null);
756
757
  if (!hasExpected) {
757
- throw new chunkBFGA2NUB_cjs.AssertionError(
758
+ throw new chunk4BKZPVY4_cjs.AssertionError(
758
759
  `Classification metric requires ground truth, but field "${this.fieldName}" has no expected values. Use expectStats(predictions, groundTruth) to provide expected values.`,
759
760
  void 0,
760
761
  void 0,
@@ -763,372 +764,302 @@ var FieldSelector = class {
763
764
  }
764
765
  }
765
766
  /**
766
- * Asserts that accuracy is above a threshold
767
+ * Validates that ground truth exists and both arrays contain numeric values.
768
+ * Returns the filtered numeric arrays for regression metrics.
769
+ */
770
+ validateRegressionInputs() {
771
+ this.validateGroundTruth();
772
+ const numericActual = filterNumericValues(this.actualValues);
773
+ const numericExpected = filterNumericValues(this.expectedValues);
774
+ if (numericActual.length === 0) {
775
+ throw new chunk4BKZPVY4_cjs.AssertionError(
776
+ `Regression metric requires numeric values, but field "${this.fieldName}" has no numeric actual values.`,
777
+ void 0,
778
+ void 0,
779
+ this.fieldName
780
+ );
781
+ }
782
+ if (numericExpected.length === 0) {
783
+ throw new chunk4BKZPVY4_cjs.AssertionError(
784
+ `Regression metric requires numeric values, but field "${this.fieldName}" has no numeric expected values.`,
785
+ void 0,
786
+ void 0,
787
+ this.fieldName
788
+ );
789
+ }
790
+ if (numericActual.length !== numericExpected.length) {
791
+ throw new chunk4BKZPVY4_cjs.AssertionError(
792
+ `Regression metric requires equal-length arrays, but got ${numericActual.length} actual and ${numericExpected.length} expected values.`,
793
+ numericExpected.length,
794
+ numericActual.length,
795
+ this.fieldName
796
+ );
797
+ }
798
+ return { actual: numericActual, expected: numericExpected };
799
+ }
800
+ // ============================================================================
801
+ // Classification Metric Getters
802
+ // ============================================================================
803
+ /**
804
+ * Access accuracy metric for assertions
805
+ * @example
806
+ * expectStats(predictions, groundTruth)
807
+ * .field("sentiment")
808
+ * .accuracy.toBeAtLeast(0.8)
767
809
  */
768
- toHaveAccuracyAbove(threshold) {
810
+ get accuracy() {
769
811
  this.validateGroundTruth();
770
812
  const metrics = computeClassificationMetrics(this.actualValues, this.expectedValues);
771
- const passed = metrics.accuracy >= threshold;
772
- const result = {
773
- type: "accuracy",
774
- passed,
775
- message: passed ? `Accuracy ${(metrics.accuracy * 100).toFixed(1)}% is above ${(threshold * 100).toFixed(1)}%` : `Accuracy ${(metrics.accuracy * 100).toFixed(1)}% is below threshold ${(threshold * 100).toFixed(1)}%`,
776
- expected: threshold,
777
- actual: metrics.accuracy,
778
- field: this.fieldName
779
- };
780
- this.assertions.push(result);
781
- chunkBFGA2NUB_cjs.recordAssertion(result);
782
- return this;
813
+ return new MetricMatcher({
814
+ parent: this,
815
+ metricName: "Accuracy",
816
+ metricValue: metrics.accuracy,
817
+ fieldName: this.fieldName,
818
+ assertions: this.assertions
819
+ });
783
820
  }
784
821
  /**
785
- * Asserts that precision is above a threshold
786
- * @param classOrThreshold - Either the class name or threshold (if class is omitted, uses macro average)
787
- * @param threshold - Threshold when class is specified
822
+ * Access F1 score metric for assertions (macro average)
823
+ * @example
824
+ * expectStats(predictions, groundTruth)
825
+ * .field("sentiment")
826
+ * .f1.toBeAtLeast(0.75)
788
827
  */
789
- toHavePrecisionAbove(classOrThreshold, threshold) {
828
+ get f1() {
790
829
  this.validateGroundTruth();
791
830
  const metrics = computeClassificationMetrics(this.actualValues, this.expectedValues);
792
- let actualPrecision;
793
- let targetClass;
794
- let actualThreshold;
795
- if (typeof classOrThreshold === "number") {
796
- actualPrecision = metrics.macroAvg.precision;
797
- actualThreshold = classOrThreshold;
798
- } else {
799
- targetClass = classOrThreshold;
800
- actualThreshold = threshold;
801
- const classMetrics = metrics.perClass[targetClass];
802
- if (!classMetrics) {
803
- throw new chunkBFGA2NUB_cjs.AssertionError(
804
- `Class "${targetClass}" not found in predictions`,
805
- targetClass,
806
- Object.keys(metrics.perClass),
807
- this.fieldName
808
- );
809
- }
810
- actualPrecision = classMetrics.precision;
811
- }
812
- const passed = actualPrecision >= actualThreshold;
813
- const result = {
814
- type: "precision",
815
- passed,
816
- message: passed ? `Precision${targetClass ? ` for "${targetClass}"` : ""} ${(actualPrecision * 100).toFixed(1)}% is above ${(actualThreshold * 100).toFixed(1)}%` : `Precision${targetClass ? ` for "${targetClass}"` : ""} ${(actualPrecision * 100).toFixed(1)}% is below threshold ${(actualThreshold * 100).toFixed(1)}%`,
817
- expected: actualThreshold,
818
- actual: actualPrecision,
819
- field: this.fieldName,
820
- class: targetClass
821
- };
822
- this.assertions.push(result);
823
- chunkBFGA2NUB_cjs.recordAssertion(result);
824
- return this;
831
+ return new MetricMatcher({
832
+ parent: this,
833
+ metricName: "F1",
834
+ metricValue: metrics.macroAvg.f1,
835
+ fieldName: this.fieldName,
836
+ assertions: this.assertions
837
+ });
825
838
  }
826
839
  /**
827
- * Asserts that recall is above a threshold
828
- * @param classOrThreshold - Either the class name or threshold (if class is omitted, uses macro average)
829
- * @param threshold - Threshold when class is specified
840
+ * Access precision metric for assertions
841
+ * @param targetClass - Optional class name. If omitted, uses macro average
842
+ * @example
843
+ * expectStats(predictions, groundTruth)
844
+ * .field("sentiment")
845
+ * .precision("positive").toBeAtLeast(0.7)
830
846
  */
831
- toHaveRecallAbove(classOrThreshold, threshold) {
847
+ precision(targetClass) {
832
848
  this.validateGroundTruth();
833
849
  const metrics = computeClassificationMetrics(this.actualValues, this.expectedValues);
834
- let actualRecall;
835
- let targetClass;
836
- let actualThreshold;
837
- if (typeof classOrThreshold === "number") {
838
- actualRecall = metrics.macroAvg.recall;
839
- actualThreshold = classOrThreshold;
850
+ let metricValue;
851
+ if (targetClass === void 0) {
852
+ metricValue = metrics.macroAvg.precision;
840
853
  } else {
841
- targetClass = classOrThreshold;
842
- actualThreshold = threshold;
843
854
  const classMetrics = metrics.perClass[targetClass];
844
855
  if (!classMetrics) {
845
- throw new chunkBFGA2NUB_cjs.AssertionError(
856
+ throw new chunk4BKZPVY4_cjs.AssertionError(
846
857
  `Class "${targetClass}" not found in predictions`,
847
858
  targetClass,
848
859
  Object.keys(metrics.perClass),
849
860
  this.fieldName
850
861
  );
851
862
  }
852
- actualRecall = classMetrics.recall;
863
+ metricValue = classMetrics.precision;
853
864
  }
854
- const passed = actualRecall >= actualThreshold;
855
- const result = {
856
- type: "recall",
857
- passed,
858
- message: passed ? `Recall${targetClass ? ` for "${targetClass}"` : ""} ${(actualRecall * 100).toFixed(1)}% is above ${(actualThreshold * 100).toFixed(1)}%` : `Recall${targetClass ? ` for "${targetClass}"` : ""} ${(actualRecall * 100).toFixed(1)}% is below threshold ${(actualThreshold * 100).toFixed(1)}%`,
859
- expected: actualThreshold,
860
- actual: actualRecall,
861
- field: this.fieldName,
862
- class: targetClass
863
- };
864
- this.assertions.push(result);
865
- chunkBFGA2NUB_cjs.recordAssertion(result);
866
- return this;
865
+ return new MetricMatcher({
866
+ parent: this,
867
+ metricName: "Precision",
868
+ metricValue,
869
+ fieldName: this.fieldName,
870
+ targetClass,
871
+ assertions: this.assertions
872
+ });
867
873
  }
868
874
  /**
869
- * Asserts that F1 score is above a threshold
870
- * @param classOrThreshold - Either the class name or threshold (if class is omitted, uses macro average)
871
- * @param threshold - Threshold when class is specified
875
+ * Access recall metric for assertions
876
+ * @param targetClass - Optional class name. If omitted, uses macro average
877
+ * @example
878
+ * expectStats(predictions, groundTruth)
879
+ * .field("sentiment")
880
+ * .recall("positive").toBeAtLeast(0.7)
872
881
  */
873
- toHaveF1Above(classOrThreshold, threshold) {
882
+ recall(targetClass) {
874
883
  this.validateGroundTruth();
875
884
  const metrics = computeClassificationMetrics(this.actualValues, this.expectedValues);
876
- let actualF1;
877
- let targetClass;
878
- let actualThreshold;
879
- if (typeof classOrThreshold === "number") {
880
- actualF1 = metrics.macroAvg.f1;
881
- actualThreshold = classOrThreshold;
885
+ let metricValue;
886
+ if (targetClass === void 0) {
887
+ metricValue = metrics.macroAvg.recall;
882
888
  } else {
883
- targetClass = classOrThreshold;
884
- actualThreshold = threshold;
885
889
  const classMetrics = metrics.perClass[targetClass];
886
890
  if (!classMetrics) {
887
- throw new chunkBFGA2NUB_cjs.AssertionError(
891
+ throw new chunk4BKZPVY4_cjs.AssertionError(
888
892
  `Class "${targetClass}" not found in predictions`,
889
893
  targetClass,
890
894
  Object.keys(metrics.perClass),
891
895
  this.fieldName
892
896
  );
893
897
  }
894
- actualF1 = classMetrics.f1;
898
+ metricValue = classMetrics.recall;
895
899
  }
896
- const passed = actualF1 >= actualThreshold;
897
- const result = {
898
- type: "f1",
899
- passed,
900
- message: passed ? `F1${targetClass ? ` for "${targetClass}"` : ""} ${(actualF1 * 100).toFixed(1)}% is above ${(actualThreshold * 100).toFixed(1)}%` : `F1${targetClass ? ` for "${targetClass}"` : ""} ${(actualF1 * 100).toFixed(1)}% is below threshold ${(actualThreshold * 100).toFixed(1)}%`,
901
- expected: actualThreshold,
902
- actual: actualF1,
903
- field: this.fieldName,
904
- class: targetClass
905
- };
906
- this.assertions.push(result);
907
- chunkBFGA2NUB_cjs.recordAssertion(result);
908
- return this;
900
+ return new MetricMatcher({
901
+ parent: this,
902
+ metricName: "Recall",
903
+ metricValue,
904
+ fieldName: this.fieldName,
905
+ targetClass,
906
+ assertions: this.assertions
907
+ });
909
908
  }
909
+ // ============================================================================
910
+ // Regression Metric Getters
911
+ // ============================================================================
910
912
  /**
911
- * Includes the confusion matrix in the report
913
+ * Access Mean Absolute Error metric for assertions
914
+ * @example
915
+ * expectStats(predictions, groundTruth)
916
+ * .field("score")
917
+ * .mae.toBeAtMost(0.1)
912
918
  */
913
- toHaveConfusionMatrix() {
914
- const metrics = computeClassificationMetrics(this.actualValues, this.expectedValues);
915
- const fieldResult = {
916
- field: this.fieldName,
917
- metrics,
918
- binarized: false
919
- };
920
- chunkBFGA2NUB_cjs.recordFieldMetrics(fieldResult);
921
- const result = {
922
- type: "confusionMatrix",
923
- passed: true,
924
- message: `Confusion matrix recorded for field "${this.fieldName}"`,
925
- field: this.fieldName
926
- };
927
- this.assertions.push(result);
928
- chunkBFGA2NUB_cjs.recordAssertion(result);
929
- return this;
919
+ get mae() {
920
+ const { actual, expected } = this.validateRegressionInputs();
921
+ const metrics = computeRegressionMetrics(actual, expected);
922
+ return new MetricMatcher({
923
+ parent: this,
924
+ metricName: "MAE",
925
+ metricValue: metrics.mae,
926
+ fieldName: this.fieldName,
927
+ assertions: this.assertions,
928
+ formatValue: (v) => v.toFixed(4)
929
+ });
930
+ }
931
+ /**
932
+ * Access Root Mean Squared Error metric for assertions
933
+ * @example
934
+ * expectStats(predictions, groundTruth)
935
+ * .field("score")
936
+ * .rmse.toBeAtMost(0.15)
937
+ */
938
+ get rmse() {
939
+ const { actual, expected } = this.validateRegressionInputs();
940
+ const metrics = computeRegressionMetrics(actual, expected);
941
+ return new MetricMatcher({
942
+ parent: this,
943
+ metricName: "RMSE",
944
+ metricValue: metrics.rmse,
945
+ fieldName: this.fieldName,
946
+ assertions: this.assertions,
947
+ formatValue: (v) => v.toFixed(4)
948
+ });
930
949
  }
931
950
  /**
932
- * Asserts that a percentage of values are below or equal to a threshold.
933
- * This is a distributional assertion that only looks at actual values (no ground truth required).
934
- *
951
+ * Access R-squared (coefficient of determination) metric for assertions
952
+ * @example
953
+ * expectStats(predictions, groundTruth)
954
+ * .field("score")
955
+ * .r2.toBeAtLeast(0.8)
956
+ */
957
+ get r2() {
958
+ const { actual, expected } = this.validateRegressionInputs();
959
+ const metrics = computeRegressionMetrics(actual, expected);
960
+ return new MetricMatcher({
961
+ parent: this,
962
+ metricName: "R\xB2",
963
+ metricValue: metrics.r2,
964
+ fieldName: this.fieldName,
965
+ assertions: this.assertions,
966
+ formatValue: (v) => v.toFixed(4)
967
+ });
968
+ }
969
+ // ============================================================================
970
+ // Distribution Assertions
971
+ // ============================================================================
972
+ /**
973
+ * Assert on the percentage of values below or equal to a threshold
935
974
  * @param valueThreshold - The value threshold to compare against
936
- * @param percentageThreshold - The minimum percentage (0-1) of values that should be <= valueThreshold
937
- * @returns this for method chaining
938
- *
939
975
  * @example
940
- * // Assert that 90% of confidence scores are below 0.5
941
976
  * expectStats(predictions)
942
977
  * .field("confidence")
943
- * .toHavePercentageBelow(0.5, 0.9)
978
+ * .percentageBelow(0.5).toBeAtLeast(0.9)
944
979
  */
945
- toHavePercentageBelow(valueThreshold, percentageThreshold) {
980
+ percentageBelow(valueThreshold) {
946
981
  const numericActual = filterNumericValues(this.actualValues);
947
982
  if (numericActual.length === 0) {
948
- throw new chunkBFGA2NUB_cjs.AssertionError(
983
+ throw new chunk4BKZPVY4_cjs.AssertionError(
949
984
  `Field '${this.fieldName}' contains no numeric values (found 0 numeric out of ${this.actualValues.length} total values)`,
950
- percentageThreshold,
985
+ void 0,
951
986
  void 0,
952
987
  this.fieldName
953
988
  );
954
989
  }
955
990
  const actualPercentage = calculatePercentageBelow(numericActual, valueThreshold);
956
- const passed = actualPercentage >= percentageThreshold;
957
- const result = {
958
- type: "percentageBelow",
959
- passed,
960
- message: passed ? `${(actualPercentage * 100).toFixed(1)}% of '${this.fieldName}' values are below or equal to ${valueThreshold} (expected >= ${(percentageThreshold * 100).toFixed(1)}%)` : `Only ${(actualPercentage * 100).toFixed(1)}% of '${this.fieldName}' values are below or equal to ${valueThreshold} (expected >= ${(percentageThreshold * 100).toFixed(1)}%)`,
961
- expected: percentageThreshold,
962
- actual: actualPercentage,
963
- field: this.fieldName
964
- };
965
- this.assertions.push(result);
966
- chunkBFGA2NUB_cjs.recordAssertion(result);
967
- return this;
991
+ return new PercentageMatcher({
992
+ parent: this,
993
+ fieldName: this.fieldName,
994
+ valueThreshold,
995
+ direction: "below",
996
+ actualPercentage,
997
+ assertions: this.assertions
998
+ });
968
999
  }
969
1000
  /**
970
- * Asserts that a percentage of values are above a threshold.
971
- * This is a distributional assertion that only looks at actual values (no ground truth required).
972
- *
1001
+ * Assert on the percentage of values above a threshold
973
1002
  * @param valueThreshold - The value threshold to compare against
974
- * @param percentageThreshold - The minimum percentage (0-1) of values that should be > valueThreshold
975
- * @returns this for method chaining
976
- *
977
1003
  * @example
978
- * // Assert that 80% of quality scores are above 0.7
979
1004
  * expectStats(predictions)
980
1005
  * .field("quality")
981
- * .toHavePercentageAbove(0.7, 0.8)
1006
+ * .percentageAbove(0.7).toBeAtLeast(0.8)
982
1007
  */
983
- toHavePercentageAbove(valueThreshold, percentageThreshold) {
1008
+ percentageAbove(valueThreshold) {
984
1009
  const numericActual = filterNumericValues(this.actualValues);
985
1010
  if (numericActual.length === 0) {
986
- throw new chunkBFGA2NUB_cjs.AssertionError(
1011
+ throw new chunk4BKZPVY4_cjs.AssertionError(
987
1012
  `Field '${this.fieldName}' contains no numeric values (found 0 numeric out of ${this.actualValues.length} total values)`,
988
- percentageThreshold,
1013
+ void 0,
989
1014
  void 0,
990
1015
  this.fieldName
991
1016
  );
992
1017
  }
993
1018
  const actualPercentage = calculatePercentageAbove(numericActual, valueThreshold);
994
- const passed = actualPercentage >= percentageThreshold;
995
- const result = {
996
- type: "percentageAbove",
997
- passed,
998
- message: passed ? `${(actualPercentage * 100).toFixed(1)}% of '${this.fieldName}' values are above ${valueThreshold} (expected >= ${(percentageThreshold * 100).toFixed(1)}%)` : `Only ${(actualPercentage * 100).toFixed(1)}% of '${this.fieldName}' values are above ${valueThreshold} (expected >= ${(percentageThreshold * 100).toFixed(1)}%)`,
999
- expected: percentageThreshold,
1000
- actual: actualPercentage,
1001
- field: this.fieldName
1002
- };
1003
- this.assertions.push(result);
1004
- chunkBFGA2NUB_cjs.recordAssertion(result);
1005
- return this;
1019
+ return new PercentageMatcher({
1020
+ parent: this,
1021
+ fieldName: this.fieldName,
1022
+ valueThreshold,
1023
+ direction: "above",
1024
+ actualPercentage,
1025
+ assertions: this.assertions
1026
+ });
1006
1027
  }
1007
1028
  // ============================================================================
1008
- // Regression Assertions
1029
+ // Display Methods
1009
1030
  // ============================================================================
1010
1031
  /**
1011
- * Validates that ground truth exists and both arrays contain numeric values.
1012
- * Returns the filtered numeric arrays for regression metrics.
1013
- */
1014
- validateRegressionInputs() {
1015
- this.validateGroundTruth();
1016
- const numericActual = filterNumericValues(this.actualValues);
1017
- const numericExpected = filterNumericValues(this.expectedValues);
1018
- if (numericActual.length === 0) {
1019
- throw new chunkBFGA2NUB_cjs.AssertionError(
1020
- `Regression metric requires numeric values, but field "${this.fieldName}" has no numeric actual values.`,
1021
- void 0,
1022
- void 0,
1023
- this.fieldName
1024
- );
1025
- }
1026
- if (numericExpected.length === 0) {
1027
- throw new chunkBFGA2NUB_cjs.AssertionError(
1028
- `Regression metric requires numeric values, but field "${this.fieldName}" has no numeric expected values.`,
1029
- void 0,
1030
- void 0,
1031
- this.fieldName
1032
- );
1033
- }
1034
- if (numericActual.length !== numericExpected.length) {
1035
- throw new chunkBFGA2NUB_cjs.AssertionError(
1036
- `Regression metric requires equal-length arrays, but got ${numericActual.length} actual and ${numericExpected.length} expected values.`,
1037
- numericExpected.length,
1038
- numericActual.length,
1039
- this.fieldName
1040
- );
1041
- }
1042
- return { actual: numericActual, expected: numericExpected };
1043
- }
1044
- /**
1045
- * Asserts that Mean Absolute Error is below a threshold.
1046
- * Requires numeric values in both actual and expected.
1047
- *
1048
- * @param threshold - Maximum allowed MAE
1049
- * @returns this for method chaining
1050
- *
1032
+ * Displays the confusion matrix in the report
1033
+ * This is not an assertion - it always passes and just records the matrix for display
1051
1034
  * @example
1052
1035
  * expectStats(predictions, groundTruth)
1053
- * .field("score")
1054
- * .toHaveMAEBelow(0.1)
1036
+ * .field("sentiment")
1037
+ * .accuracy.toBeAtLeast(0.8)
1038
+ * .displayConfusionMatrix()
1055
1039
  */
1056
- toHaveMAEBelow(threshold) {
1057
- const { actual, expected } = this.validateRegressionInputs();
1058
- const metrics = computeRegressionMetrics(actual, expected);
1059
- const passed = metrics.mae <= threshold;
1060
- const result = {
1061
- type: "mae",
1062
- passed,
1063
- message: passed ? `MAE ${metrics.mae.toFixed(4)} is below ${threshold}` : `MAE ${metrics.mae.toFixed(4)} exceeds threshold ${threshold}`,
1064
- expected: threshold,
1065
- actual: metrics.mae,
1066
- field: this.fieldName
1067
- };
1068
- this.assertions.push(result);
1069
- chunkBFGA2NUB_cjs.recordAssertion(result);
1070
- return this;
1071
- }
1072
- /**
1073
- * Asserts that Root Mean Squared Error is below a threshold.
1074
- * Requires numeric values in both actual and expected.
1075
- *
1076
- * @param threshold - Maximum allowed RMSE
1077
- * @returns this for method chaining
1078
- *
1079
- * @example
1080
- * expectStats(predictions, groundTruth)
1081
- * .field("score")
1082
- * .toHaveRMSEBelow(0.15)
1083
- */
1084
- toHaveRMSEBelow(threshold) {
1085
- const { actual, expected } = this.validateRegressionInputs();
1086
- const metrics = computeRegressionMetrics(actual, expected);
1087
- const passed = metrics.rmse <= threshold;
1088
- const result = {
1089
- type: "rmse",
1090
- passed,
1091
- message: passed ? `RMSE ${metrics.rmse.toFixed(4)} is below ${threshold}` : `RMSE ${metrics.rmse.toFixed(4)} exceeds threshold ${threshold}`,
1092
- expected: threshold,
1093
- actual: metrics.rmse,
1094
- field: this.fieldName
1040
+ displayConfusionMatrix() {
1041
+ const metrics = computeClassificationMetrics(this.actualValues, this.expectedValues);
1042
+ const fieldResult = {
1043
+ field: this.fieldName,
1044
+ metrics,
1045
+ binarized: false
1095
1046
  };
1096
- this.assertions.push(result);
1097
- chunkBFGA2NUB_cjs.recordAssertion(result);
1098
- return this;
1099
- }
1100
- /**
1101
- * Asserts that R-squared (coefficient of determination) is above a threshold.
1102
- * R² measures how well the predictions explain the variance in expected values.
1103
- * R² = 1.0 means perfect prediction, R² = 0 means prediction is no better than mean.
1104
- * Requires numeric values in both actual and expected.
1105
- *
1106
- * @param threshold - Minimum required R² value (0-1)
1107
- * @returns this for method chaining
1108
- *
1109
- * @example
1110
- * expectStats(predictions, groundTruth)
1111
- * .field("score")
1112
- * .toHaveR2Above(0.8)
1113
- */
1114
- toHaveR2Above(threshold) {
1115
- const { actual, expected } = this.validateRegressionInputs();
1116
- const metrics = computeRegressionMetrics(actual, expected);
1117
- const passed = metrics.r2 >= threshold;
1047
+ chunk4BKZPVY4_cjs.recordFieldMetrics(fieldResult);
1118
1048
  const result = {
1119
- type: "r2",
1120
- passed,
1121
- message: passed ? `R\xB2 ${metrics.r2.toFixed(4)} is above ${threshold}` : `R\xB2 ${metrics.r2.toFixed(4)} is below threshold ${threshold}`,
1122
- expected: threshold,
1123
- actual: metrics.r2,
1049
+ type: "confusionMatrix",
1050
+ passed: true,
1051
+ message: `Confusion matrix recorded for field "${this.fieldName}"`,
1124
1052
  field: this.fieldName
1125
1053
  };
1126
1054
  this.assertions.push(result);
1127
- chunkBFGA2NUB_cjs.recordAssertion(result);
1055
+ chunk4BKZPVY4_cjs.recordAssertion(result);
1128
1056
  return this;
1129
1057
  }
1058
+ // ============================================================================
1059
+ // Utility Methods
1060
+ // ============================================================================
1130
1061
  /**
1131
- * Gets the computed metrics for this field
1062
+ * Gets the computed classification metrics for this field
1132
1063
  */
1133
1064
  getMetrics() {
1134
1065
  return computeClassificationMetrics(this.actualValues, this.expectedValues);
@@ -1161,7 +1092,7 @@ function normalizeInput(input) {
1161
1092
  }));
1162
1093
  }
1163
1094
  throw new Error(
1164
- "Invalid input to expectStats(): expected ModelRunResult, Prediction[], or AlignedRecord[]"
1095
+ "Invalid input to expectStats(): expected { aligned: AlignedRecord[] }, Prediction[], or AlignedRecord[]"
1165
1096
  );
1166
1097
  }
1167
1098
  function expectStats(inputOrActual, expected, options) {
@@ -1208,63 +1139,63 @@ var ExpectStats = class {
1208
1139
 
1209
1140
  Object.defineProperty(exports, "AssertionError", {
1210
1141
  enumerable: true,
1211
- get: function () { return chunkBFGA2NUB_cjs.AssertionError; }
1142
+ get: function () { return chunk4BKZPVY4_cjs.AssertionError; }
1212
1143
  });
1213
1144
  Object.defineProperty(exports, "ConfigurationError", {
1214
1145
  enumerable: true,
1215
- get: function () { return chunkBFGA2NUB_cjs.ConfigurationError; }
1146
+ get: function () { return chunk4BKZPVY4_cjs.ConfigurationError; }
1216
1147
  });
1217
1148
  Object.defineProperty(exports, "ConsoleReporter", {
1218
1149
  enumerable: true,
1219
- get: function () { return chunkBFGA2NUB_cjs.ConsoleReporter; }
1150
+ get: function () { return chunk4BKZPVY4_cjs.ConsoleReporter; }
1220
1151
  });
1221
1152
  Object.defineProperty(exports, "DatasetError", {
1222
1153
  enumerable: true,
1223
- get: function () { return chunkBFGA2NUB_cjs.DatasetError; }
1154
+ get: function () { return chunk4BKZPVY4_cjs.DatasetError; }
1224
1155
  });
1225
1156
  Object.defineProperty(exports, "EvalSenseError", {
1226
1157
  enumerable: true,
1227
- get: function () { return chunkBFGA2NUB_cjs.EvalSenseError; }
1158
+ get: function () { return chunk4BKZPVY4_cjs.EvalSenseError; }
1228
1159
  });
1229
1160
  Object.defineProperty(exports, "ExitCodes", {
1230
1161
  enumerable: true,
1231
- get: function () { return chunkBFGA2NUB_cjs.ExitCodes; }
1162
+ get: function () { return chunk4BKZPVY4_cjs.ExitCodes; }
1232
1163
  });
1233
1164
  Object.defineProperty(exports, "IntegrityError", {
1234
1165
  enumerable: true,
1235
- get: function () { return chunkBFGA2NUB_cjs.IntegrityError; }
1166
+ get: function () { return chunk4BKZPVY4_cjs.IntegrityError; }
1236
1167
  });
1237
1168
  Object.defineProperty(exports, "JsonReporter", {
1238
1169
  enumerable: true,
1239
- get: function () { return chunkBFGA2NUB_cjs.JsonReporter; }
1170
+ get: function () { return chunk4BKZPVY4_cjs.JsonReporter; }
1240
1171
  });
1241
1172
  Object.defineProperty(exports, "TestExecutionError", {
1242
1173
  enumerable: true,
1243
- get: function () { return chunkBFGA2NUB_cjs.TestExecutionError; }
1174
+ get: function () { return chunk4BKZPVY4_cjs.TestExecutionError; }
1244
1175
  });
1245
1176
  Object.defineProperty(exports, "buildConfusionMatrix", {
1246
1177
  enumerable: true,
1247
- get: function () { return chunkBFGA2NUB_cjs.buildConfusionMatrix; }
1178
+ get: function () { return chunk4BKZPVY4_cjs.buildConfusionMatrix; }
1248
1179
  });
1249
1180
  Object.defineProperty(exports, "discoverEvalFiles", {
1250
1181
  enumerable: true,
1251
- get: function () { return chunkBFGA2NUB_cjs.discoverEvalFiles; }
1182
+ get: function () { return chunk4BKZPVY4_cjs.discoverEvalFiles; }
1252
1183
  });
1253
1184
  Object.defineProperty(exports, "executeEvalFiles", {
1254
1185
  enumerable: true,
1255
- get: function () { return chunkBFGA2NUB_cjs.executeEvalFiles; }
1186
+ get: function () { return chunk4BKZPVY4_cjs.executeEvalFiles; }
1256
1187
  });
1257
1188
  Object.defineProperty(exports, "formatConfusionMatrix", {
1258
1189
  enumerable: true,
1259
- get: function () { return chunkBFGA2NUB_cjs.formatConfusionMatrix; }
1190
+ get: function () { return chunk4BKZPVY4_cjs.formatConfusionMatrix; }
1260
1191
  });
1261
1192
  Object.defineProperty(exports, "getExitCode", {
1262
1193
  enumerable: true,
1263
- get: function () { return chunkBFGA2NUB_cjs.getExitCode; }
1194
+ get: function () { return chunk4BKZPVY4_cjs.getExitCode; }
1264
1195
  });
1265
1196
  Object.defineProperty(exports, "parseReport", {
1266
1197
  enumerable: true,
1267
- get: function () { return chunkBFGA2NUB_cjs.parseReport; }
1198
+ get: function () { return chunk4BKZPVY4_cjs.parseReport; }
1268
1199
  });
1269
1200
  exports.afterAll = afterAll;
1270
1201
  exports.afterEach = afterEach;
@@ -1277,16 +1208,12 @@ exports.computeClassificationMetrics = computeClassificationMetrics;
1277
1208
  exports.computeF1 = computeF1;
1278
1209
  exports.computePrecision = computePrecision;
1279
1210
  exports.computeRecall = computeRecall;
1280
- exports.createDataset = createDataset;
1281
1211
  exports.describe = describe;
1282
1212
  exports.evalTest = evalTest;
1283
1213
  exports.expectStats = expectStats;
1284
1214
  exports.extractFieldValues = extractFieldValues;
1285
1215
  exports.filterComplete = filterComplete;
1286
1216
  exports.it = it;
1287
- exports.loadDataset = loadDataset;
1288
- exports.runModel = runModel;
1289
- exports.runModelParallel = runModelParallel;
1290
1217
  exports.test = test;
1291
1218
  exports.validatePredictions = validatePredictions;
1292
1219
  //# sourceMappingURL=index.cjs.map