evalsense 0.3.2 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +235 -98
- package/dist/{chunk-BFGA2NUB.cjs → chunk-4BKZPVY4.cjs} +13 -6
- package/dist/chunk-4BKZPVY4.cjs.map +1 -0
- package/dist/{chunk-IYLSY7NX.js → chunk-IUVDDMJ3.js} +13 -6
- package/dist/chunk-IUVDDMJ3.js.map +1 -0
- package/dist/chunk-NCCQRZ2Y.cjs +1141 -0
- package/dist/chunk-NCCQRZ2Y.cjs.map +1 -0
- package/dist/chunk-TDGWDK2L.js +1108 -0
- package/dist/chunk-TDGWDK2L.js.map +1 -0
- package/dist/cli.cjs +11 -11
- package/dist/cli.js +1 -1
- package/dist/index-CATqAHNK.d.cts +416 -0
- package/dist/index-CoMpaW-K.d.ts +416 -0
- package/dist/index.cjs +507 -580
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +210 -161
- package/dist/index.d.ts +210 -161
- package/dist/index.js +455 -524
- package/dist/index.js.map +1 -1
- package/dist/metrics/index.cjs +103 -342
- package/dist/metrics/index.cjs.map +1 -1
- package/dist/metrics/index.d.cts +260 -31
- package/dist/metrics/index.d.ts +260 -31
- package/dist/metrics/index.js +24 -312
- package/dist/metrics/index.js.map +1 -1
- package/dist/metrics/opinionated/index.cjs +5 -5
- package/dist/metrics/opinionated/index.d.cts +2 -163
- package/dist/metrics/opinionated/index.d.ts +2 -163
- package/dist/metrics/opinionated/index.js +1 -1
- package/dist/{types-C71p0wzM.d.cts → types-D0hzfyKm.d.cts} +1 -13
- package/dist/{types-C71p0wzM.d.ts → types-D0hzfyKm.d.ts} +1 -13
- package/package.json +1 -1
- package/dist/chunk-BFGA2NUB.cjs.map +0 -1
- package/dist/chunk-IYLSY7NX.js.map +0 -1
- package/dist/chunk-RZFLCWTW.cjs +0 -942
- package/dist/chunk-RZFLCWTW.cjs.map +0 -1
- package/dist/chunk-Z3U6AUWX.js +0 -925
- package/dist/chunk-Z3U6AUWX.js.map +0 -1
package/README.md
CHANGED
|
@@ -5,33 +5,183 @@
|
|
|
5
5
|
[](https://www.npmjs.com/package/evalsense)
|
|
6
6
|
[](https://opensource.org/licenses/Apache-2.0)
|
|
7
7
|
|
|
8
|
-
|
|
8
|
+
# evalsense
|
|
9
|
+
|
|
10
|
+
**evalsense is like Jest for testing code that uses LLMs.**
|
|
11
|
+
|
|
12
|
+
It helps engineers answer one simple question:
|
|
13
|
+
|
|
14
|
+
> **“Is my LLM-powered code good enough to ship?”**
|
|
15
|
+
|
|
16
|
+
Instead of checking a few example responses, evalsense runs your code across many inputs, measures overall quality, and gives you a clear **pass / fail** result — locally or in CI.
|
|
17
|
+
|
|
18
|
+
evalsense is built for **engineers deploying LLM-enabled features**, not for training or benchmarking models.
|
|
19
|
+
|
|
20
|
+
## What problem does evalsense solve?
|
|
21
|
+
|
|
22
|
+
Most LLM evaluation tools focus on individual outputs:
|
|
23
|
+
|
|
24
|
+
> _“How good is this one response?”_
|
|
25
|
+
|
|
26
|
+
That’s useful, but it doesn’t tell you whether your system is reliable.
|
|
27
|
+
|
|
28
|
+
evalsense answers a different question:
|
|
29
|
+
|
|
30
|
+
> **“Does my code consistently meet our quality bar?”**
|
|
31
|
+
|
|
32
|
+
It treats evaluation like testing:
|
|
33
|
+
|
|
34
|
+
- run your code many times
|
|
35
|
+
- measure results across all runs
|
|
36
|
+
- fail fast if quality drops
|
|
37
|
+
|
|
38
|
+
## How evalsense works (in plain terms)
|
|
39
|
+
|
|
40
|
+
At a high level, evalsense:
|
|
41
|
+
|
|
42
|
+
1. Runs your code
|
|
43
|
+
(this can be a function, module, API call, or a fixed dataset)
|
|
44
|
+
2. Collects the results
|
|
45
|
+
3. Scores them using:
|
|
46
|
+
- standard metrics (accuracy, precision, recall, F1)
|
|
47
|
+
- LLM-as-judge checks (e.g. relevance, hallucination, correctness)
|
|
48
|
+
|
|
49
|
+
4. Aggregates scores across all results
|
|
50
|
+
5. Applies rules you define
|
|
51
|
+
6. Passes or fails the test
|
|
52
|
+
|
|
53
|
+
Think of it as **unit tests for output quality**.
|
|
54
|
+
|
|
55
|
+
## A quick example
|
|
56
|
+
|
|
57
|
+
```ts
|
|
58
|
+
describe("test answer quality", async () => {
|
|
59
|
+
evalTest("toxicity detection", async () => {
|
|
60
|
+
const answers = await generateAnswersDataset(testQuestions);
|
|
61
|
+
const toxicityScore = await toxicity(answers);
|
|
62
|
+
|
|
63
|
+
expectStats(toxicityScore)
|
|
64
|
+
.field("score")
|
|
65
|
+
.percentageBelow(0.5).toBeAtLeast(0.5)
|
|
66
|
+
};
|
|
67
|
+
|
|
68
|
+
evalTest("correctness score", async () => {
|
|
69
|
+
const answers = await generateAnswersDataset(testQuestions);
|
|
70
|
+
const groundTruth = await JSON.parse(readFileSync("truth-dataset.json"));
|
|
71
|
+
|
|
72
|
+
expectStats(answers, groundTruth)
|
|
73
|
+
.field("label")
|
|
74
|
+
.accuracy.toBeAtLeast(0.9)
|
|
75
|
+
.precision("positive").toBeAtLeast(0.7)
|
|
76
|
+
.recall("positive").toBeAtLeast(0.7)
|
|
77
|
+
.displayConfusionMatrix();
|
|
78
|
+
}
|
|
79
|
+
});
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
Running the test:
|
|
83
|
+
|
|
84
|
+
```markdown
|
|
85
|
+
**test answer quality**
|
|
86
|
+
|
|
87
|
+
✓ toxicity detection (1ms)
|
|
88
|
+
✓ 50.0% of 'score' values are below or equal to 0.5 (expected >= 50.0%)
|
|
89
|
+
Expected: 50.0%
|
|
90
|
+
Actual: 50.0%
|
|
91
|
+
|
|
92
|
+
✓ correctness score (1ms)
|
|
93
|
+
Field: label | Accuracy: 100.0% | F1: 100.0%
|
|
94
|
+
negative: P=100.0% R=100.0% F1=100.0% (n=5)
|
|
95
|
+
positive: P=100.0% R=100.0% F1=100.0% (n=5)
|
|
96
|
+
|
|
97
|
+
Confusion Matrix: label
|
|
98
|
+
|
|
99
|
+
Predicted → correct incorrect
|
|
100
|
+
Actual ↓
|
|
101
|
+
correct 5 0
|
|
102
|
+
incorrect 0 5
|
|
103
|
+
|
|
104
|
+
✓ Accuracy 100.0% >= 90.0%
|
|
105
|
+
Expected: 90.0%
|
|
106
|
+
Actual: 100.0%
|
|
107
|
+
✓ Precision for 'positive' 100.0% >= 70.0%
|
|
108
|
+
Expected: 70.0%
|
|
109
|
+
Actual: 100.0%
|
|
110
|
+
✓ Recall for 'positive' 100.0% >= 70.0%
|
|
111
|
+
Expected: 70.0%
|
|
112
|
+
Actual: 100.0%
|
|
113
|
+
✓ Confusion matrix recorded for field "label"
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
If the quality drops, the test fails — just like a normal test.
|
|
117
|
+
|
|
118
|
+
## Two common ways to use evalsense
|
|
9
119
|
|
|
10
|
-
|
|
11
|
-
> **New in v0.3.0:** Regression assertions (MAE, RMSE, R²) and flexible ID matching for custom identifier fields! [See migration guide](./docs/migration-v0.3.0.md).
|
|
12
|
-
> **New in v0.2.x:** Built-in adapters for OpenAI, Anthropic, and OpenRouter - no boilerplate needed!
|
|
13
|
-
> **New in v0.2.0:** LLM-powered metrics for hallucination, relevance, faithfulness, and toxicity detection. [See migration guide](./docs/migration-v0.2.md).
|
|
120
|
+
### 1. When you **don’t have ground truth**
|
|
14
121
|
|
|
15
|
-
|
|
122
|
+
Use this when there are no labels.
|
|
16
123
|
|
|
17
|
-
|
|
124
|
+
Example:
|
|
18
125
|
|
|
19
|
-
-
|
|
20
|
-
-
|
|
21
|
-
-
|
|
22
|
-
-
|
|
23
|
-
- ✅ Supporting **deterministic CI/CD** integration with specific exit codes
|
|
126
|
+
- Run your LLM-powered function
|
|
127
|
+
- Score outputs using an LLM-as-judge (relevance, hallucination, etc.)
|
|
128
|
+
- Define what “acceptable” means
|
|
129
|
+
- Fail if quality degrades
|
|
24
130
|
|
|
25
|
-
|
|
131
|
+
**Example rule:**
|
|
26
132
|
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
133
|
+
> “Average relevance score must be at least 0.75”
|
|
134
|
+
|
|
135
|
+
### 2. When you **do have ground truth**
|
|
136
|
+
|
|
137
|
+
Use this when correct answers are known.
|
|
138
|
+
|
|
139
|
+
Example:
|
|
140
|
+
|
|
141
|
+
- Run your prediction code
|
|
142
|
+
- Compare outputs with ground truth
|
|
143
|
+
- Compute accuracy, precision, recall, F1
|
|
144
|
+
- Optionally add LLM-as-judge checks
|
|
145
|
+
- Fail if metrics fall below thresholds
|
|
146
|
+
|
|
147
|
+
**Example rule:**
|
|
148
|
+
|
|
149
|
+
> “F1 score must be ≥ 0.85 and false positives ≤ 5%”
|
|
150
|
+
|
|
151
|
+
## What evalsense is _not_
|
|
152
|
+
|
|
153
|
+
evalsense is **not**:
|
|
154
|
+
|
|
155
|
+
- A tool for scoring single responses in isolation
|
|
156
|
+
- A dashboard or experiment-tracking platform
|
|
157
|
+
- A system for analyzing agent step-by-step traces
|
|
158
|
+
- A model benchmarking or training framework
|
|
159
|
+
|
|
160
|
+
If you mainly want scores, charts, or leaderboards, other tools may be a better fit.
|
|
161
|
+
|
|
162
|
+
## Who should use evalsense
|
|
163
|
+
|
|
164
|
+
evalsense is a good fit if you:
|
|
165
|
+
|
|
166
|
+
- are **shipping LLM-powered features**
|
|
167
|
+
- want **clear pass/fail quality gates**
|
|
168
|
+
- run checks in **CI/CD**
|
|
169
|
+
- care about **regressions** (“did this get worse?”)
|
|
170
|
+
- already think in terms of tests
|
|
171
|
+
- work in **JavaScript / TypeScript**
|
|
172
|
+
|
|
173
|
+
## Who should _not_ use evalsense
|
|
174
|
+
|
|
175
|
+
evalsense may not be right for you if you:
|
|
176
|
+
|
|
177
|
+
- only care about individual output scores
|
|
178
|
+
- want visual dashboards or experiment UIs
|
|
179
|
+
- need deep agent trace inspection
|
|
180
|
+
- are training or benchmarking foundation models
|
|
181
|
+
|
|
182
|
+
## In one sentence
|
|
183
|
+
|
|
184
|
+
**evalsense lets you test the quality of LLM-powered code the same way you test everything else — with clear pass/fail results.**
|
|
35
185
|
|
|
36
186
|
## Installation
|
|
37
187
|
|
|
@@ -75,10 +225,10 @@ describe("Sentiment classifier", () => {
|
|
|
75
225
|
// 3. Assert on statistical properties
|
|
76
226
|
expectStats(predictions, groundTruth)
|
|
77
227
|
.field("sentiment")
|
|
78
|
-
.
|
|
79
|
-
.
|
|
80
|
-
.
|
|
81
|
-
.
|
|
228
|
+
.accuracy.toBeAtLeast(0.8)
|
|
229
|
+
.recall("positive").toBeAtLeast(0.7)
|
|
230
|
+
.precision("positive").toBeAtLeast(0.7)
|
|
231
|
+
.displayConfusionMatrix();
|
|
82
232
|
});
|
|
83
233
|
});
|
|
84
234
|
```
|
|
@@ -118,10 +268,10 @@ describe("Spam classifier", () => {
|
|
|
118
268
|
|
|
119
269
|
expectStats(predictions, groundTruth)
|
|
120
270
|
.field("isSpam")
|
|
121
|
-
.
|
|
122
|
-
.
|
|
123
|
-
.
|
|
124
|
-
.
|
|
271
|
+
.accuracy.toBeAtLeast(0.9)
|
|
272
|
+
.precision(true).toBeAtLeast(0.85) // Precision for spam=true
|
|
273
|
+
.recall(true).toBeAtLeast(0.85) // Recall for spam=true
|
|
274
|
+
.displayConfusionMatrix();
|
|
125
275
|
});
|
|
126
276
|
});
|
|
127
277
|
```
|
|
@@ -146,9 +296,9 @@ describe("Hallucination detector", () => {
|
|
|
146
296
|
expectStats(predictions, groundTruth)
|
|
147
297
|
.field("hallucinated")
|
|
148
298
|
.binarize(0.3) // >= 0.3 means hallucinated
|
|
149
|
-
.
|
|
150
|
-
.
|
|
151
|
-
.
|
|
299
|
+
.recall(true).toBeAtLeast(0.7)
|
|
300
|
+
.precision(true).toBeAtLeast(0.6)
|
|
301
|
+
.displayConfusionMatrix();
|
|
152
302
|
});
|
|
153
303
|
});
|
|
154
304
|
```
|
|
@@ -170,11 +320,11 @@ describe("Intent classifier", () => {
|
|
|
170
320
|
|
|
171
321
|
expectStats(predictions, groundTruth)
|
|
172
322
|
.field("intent")
|
|
173
|
-
.
|
|
174
|
-
.
|
|
175
|
-
.
|
|
176
|
-
.
|
|
177
|
-
.
|
|
323
|
+
.accuracy.toBeAtLeast(0.85)
|
|
324
|
+
.recall("purchase").toBeAtLeast(0.8)
|
|
325
|
+
.recall("support").toBeAtLeast(0.8)
|
|
326
|
+
.recall("general").toBeAtLeast(0.7)
|
|
327
|
+
.displayConfusionMatrix();
|
|
178
328
|
});
|
|
179
329
|
});
|
|
180
330
|
```
|
|
@@ -211,7 +361,7 @@ describe("LLM classifier", () => {
|
|
|
211
361
|
5
|
|
212
362
|
);
|
|
213
363
|
|
|
214
|
-
expectStats(predictions, groundTruth).field("category").
|
|
364
|
+
expectStats(predictions, groundTruth).field("category").accuracy.toBeAtLeast(0.9);
|
|
215
365
|
});
|
|
216
366
|
});
|
|
217
367
|
```
|
|
@@ -339,12 +489,6 @@ const predictions = await Promise.all(
|
|
|
339
489
|
);
|
|
340
490
|
```
|
|
341
491
|
|
|
342
|
-
**Helper functions available (optional):**
|
|
343
|
-
|
|
344
|
-
- `loadDataset(path)` - Simple JSON file loader
|
|
345
|
-
- `runModel(dataset, fn)` - Sequential model execution
|
|
346
|
-
- `runModelParallel(dataset, fn, concurrency)` - Parallel execution with concurrency limit
|
|
347
|
-
|
|
348
492
|
### Assertions
|
|
349
493
|
|
|
350
494
|
#### `expectStats(predictions, groundTruth)`
|
|
@@ -354,22 +498,16 @@ Creates a statistical assertion chain from predictions and ground truth. Aligns
|
|
|
354
498
|
```javascript
|
|
355
499
|
expectStats(predictions, groundTruth)
|
|
356
500
|
.field("prediction")
|
|
357
|
-
.
|
|
358
|
-
.
|
|
359
|
-
.
|
|
501
|
+
.accuracy.toBeAtLeast(0.8)
|
|
502
|
+
.f1.toBeAtLeast(0.75)
|
|
503
|
+
.displayConfusionMatrix();
|
|
360
504
|
```
|
|
361
505
|
|
|
362
|
-
**New in v0.3.2: Enhanced Assertion Reporting**
|
|
363
|
-
|
|
364
|
-
- All assertions (passed and failed) now display expected vs actual values
|
|
365
|
-
- Chained assertions evaluate completely instead of short-circuiting on first failure
|
|
366
|
-
- See all metric results in a single run for better debugging
|
|
367
|
-
|
|
368
506
|
**One-argument form (distribution assertions only):**
|
|
369
507
|
|
|
370
508
|
```javascript
|
|
371
509
|
// For distribution monitoring without ground truth
|
|
372
|
-
expectStats(predictions).field("confidence").
|
|
510
|
+
expectStats(predictions).field("confidence").percentageAbove(0.7).toBeAtLeast(0.8);
|
|
373
511
|
```
|
|
374
512
|
|
|
375
513
|
**Common use cases:**
|
|
@@ -397,7 +535,7 @@ Converts continuous scores to binary (>=threshold is true).
|
|
|
397
535
|
expectStats(result)
|
|
398
536
|
.field("score")
|
|
399
537
|
.binarize(0.5) // score >= 0.5 is true
|
|
400
|
-
.
|
|
538
|
+
.accuracy.toBeAtLeast(0.8);
|
|
401
539
|
```
|
|
402
540
|
|
|
403
541
|
### Available Assertions
|
|
@@ -405,43 +543,61 @@ expectStats(result)
|
|
|
405
543
|
#### Classification Metrics
|
|
406
544
|
|
|
407
545
|
```javascript
|
|
408
|
-
// Accuracy
|
|
409
|
-
.
|
|
410
|
-
.
|
|
411
|
-
.
|
|
546
|
+
// Accuracy (macro average for multi-class)
|
|
547
|
+
.accuracy.toBeAtLeast(threshold)
|
|
548
|
+
.accuracy.toBeAbove(threshold)
|
|
549
|
+
.accuracy.toBeAtMost(threshold)
|
|
550
|
+
.accuracy.toBeBelow(threshold)
|
|
551
|
+
|
|
552
|
+
// Precision (per class or macro average)
|
|
553
|
+
.precision("className").toBeAtLeast(threshold)
|
|
554
|
+
.precision().toBeAtLeast(threshold) // macro average
|
|
412
555
|
|
|
413
|
-
//
|
|
414
|
-
.
|
|
415
|
-
.
|
|
556
|
+
// Recall (per class or macro average)
|
|
557
|
+
.recall("className").toBeAtLeast(threshold)
|
|
558
|
+
.recall().toBeAtLeast(threshold) // macro average
|
|
416
559
|
|
|
417
|
-
//
|
|
418
|
-
.
|
|
419
|
-
.
|
|
560
|
+
// F1 Score (macro average)
|
|
561
|
+
.f1.toBeAtLeast(threshold)
|
|
562
|
+
.f1.toBeAbove(threshold)
|
|
420
563
|
|
|
421
|
-
//
|
|
422
|
-
.
|
|
423
|
-
.
|
|
564
|
+
// Regression Metrics
|
|
565
|
+
.mae.toBeAtMost(threshold) // Mean Absolute Error
|
|
566
|
+
.rmse.toBeAtMost(threshold) // Root Mean Squared Error
|
|
567
|
+
.r2.toBeAtLeast(threshold) // R² coefficient
|
|
424
568
|
|
|
425
569
|
// Confusion Matrix
|
|
426
|
-
.
|
|
570
|
+
.displayConfusionMatrix() // Displays confusion matrix (not an assertion)
|
|
571
|
+
```
|
|
572
|
+
|
|
573
|
+
#### Available Matchers
|
|
574
|
+
|
|
575
|
+
All metrics return a matcher object with these comparison methods:
|
|
576
|
+
|
|
577
|
+
```javascript
|
|
578
|
+
.toBeAtLeast(x) // >= x
|
|
579
|
+
.toBeAbove(x) // > x
|
|
580
|
+
.toBeAtMost(x) // <= x
|
|
581
|
+
.toBeBelow(x) // < x
|
|
582
|
+
.toEqual(x, tolerance?) // === x (with optional tolerance for floats)
|
|
427
583
|
```
|
|
428
584
|
|
|
429
|
-
#### Distribution Assertions
|
|
585
|
+
#### Distribution Assertions
|
|
430
586
|
|
|
431
587
|
Distribution assertions validate output distributions **without requiring ground truth**. Use these to monitor that model outputs stay within expected ranges.
|
|
432
588
|
|
|
433
589
|
```javascript
|
|
434
590
|
// Assert that at least 80% of confidence scores are above 0.7
|
|
435
|
-
expectStats(predictions).field("confidence").
|
|
591
|
+
expectStats(predictions).field("confidence").percentageAbove(0.7).toBeAtLeast(0.8);
|
|
436
592
|
|
|
437
593
|
// Assert that at least 90% of toxicity scores are below 0.3
|
|
438
|
-
expectStats(predictions).field("toxicity").
|
|
594
|
+
expectStats(predictions).field("toxicity").percentageBelow(0.3).toBeAtLeast(0.9);
|
|
439
595
|
|
|
440
596
|
// Chain multiple distribution assertions
|
|
441
597
|
expectStats(predictions)
|
|
442
598
|
.field("score")
|
|
443
|
-
.
|
|
444
|
-
.
|
|
599
|
+
.percentageAbove(0.5).toBeAtLeast(0.6) // At least 60% above 0.5
|
|
600
|
+
.percentageBelow(0.9).toBeAtLeast(0.8); // At least 80% below 0.9
|
|
445
601
|
```
|
|
446
602
|
|
|
447
603
|
**Use cases:**
|
|
@@ -453,7 +609,7 @@ expectStats(predictions)
|
|
|
453
609
|
|
|
454
610
|
See [Distribution Assertions Example](./examples/distribution-assertions.eval.js) for complete examples.
|
|
455
611
|
|
|
456
|
-
### Judge Validation
|
|
612
|
+
### Judge Validation
|
|
457
613
|
|
|
458
614
|
Validate judge outputs against human-labeled ground truth using the **two-argument expectStats API**:
|
|
459
615
|
|
|
@@ -475,9 +631,9 @@ const humanLabels = [
|
|
|
475
631
|
// Validate judge performance
|
|
476
632
|
expectStats(judgeOutputs, humanLabels)
|
|
477
633
|
.field("hallucinated")
|
|
478
|
-
.
|
|
479
|
-
.
|
|
480
|
-
.
|
|
634
|
+
.recall(true).toBeAtLeast(0.9) // Don't miss hallucinations
|
|
635
|
+
.precision(true).toBeAtLeast(0.7) // Some false positives OK
|
|
636
|
+
.displayConfusionMatrix();
|
|
481
637
|
```
|
|
482
638
|
|
|
483
639
|
**Use cases:**
|
|
@@ -490,7 +646,7 @@ expectStats(judgeOutputs, humanLabels)
|
|
|
490
646
|
**Two-argument expectStats:**
|
|
491
647
|
|
|
492
648
|
```javascript
|
|
493
|
-
expectStats(actual, expected).field("fieldName").
|
|
649
|
+
expectStats(actual, expected).field("fieldName").accuracy.toBeAtLeast(0.8);
|
|
494
650
|
```
|
|
495
651
|
|
|
496
652
|
The first argument is your predictions (judge outputs), the second is ground truth (human labels). Both must have matching `id` fields for alignment.
|
|
@@ -694,25 +850,6 @@ setLLMClient({
|
|
|
694
850
|
- [Migration Guide](./docs/migration-v0.2.md) - Upgrade from v0.1.x
|
|
695
851
|
- [Examples](./examples/) - Working code examples
|
|
696
852
|
|
|
697
|
-
## Philosophy
|
|
698
|
-
|
|
699
|
-
evalsense is built on the principle that **metrics are predictions, not facts**.
|
|
700
|
-
|
|
701
|
-
Instead of treating LLM-as-judge metrics (relevance, hallucination, etc.) as ground truth, evalsense:
|
|
702
|
-
|
|
703
|
-
- Treats them as **weak labels** from a model
|
|
704
|
-
- Validates them statistically against human references when available
|
|
705
|
-
- Computes confusion matrices to reveal bias and systematic errors
|
|
706
|
-
- Focuses on dataset-level distributions, not individual examples
|
|
707
|
-
|
|
708
853
|
## Contributing
|
|
709
854
|
|
|
710
855
|
Contributions are welcome! Please see [CLAUDE.md](./CLAUDE.md) for development guidelines.
|
|
711
|
-
|
|
712
|
-
## License
|
|
713
|
-
|
|
714
|
-
MIT © Mohit Joshi
|
|
715
|
-
|
|
716
|
-
---
|
|
717
|
-
|
|
718
|
-
**Made with ❤️ for the JS/Node.js AI community**
|
|
@@ -156,14 +156,21 @@ function getSupport(cm, label) {
|
|
|
156
156
|
return support;
|
|
157
157
|
}
|
|
158
158
|
function formatConfusionMatrix(cm) {
|
|
159
|
+
if (cm.labels.length === 0) {
|
|
160
|
+
return "Empty confusion matrix";
|
|
161
|
+
}
|
|
162
|
+
const rowLabelPrefix = " ";
|
|
159
163
|
const maxLabelLen = Math.max(...cm.labels.map((l) => l.length), 8);
|
|
160
164
|
const colWidth = Math.max(...cm.matrix.flat().map((n) => String(n).length), maxLabelLen);
|
|
161
|
-
const
|
|
165
|
+
const predictedLabel = "Predicted \u2192";
|
|
166
|
+
const headerPadding = rowLabelPrefix.length + maxLabelLen + 2;
|
|
167
|
+
const header = predictedLabel.padEnd(headerPadding) + cm.labels.map((l) => l.padStart(colWidth)).join(" ");
|
|
168
|
+
const axisRow = "Actual \u2193";
|
|
162
169
|
const rows = cm.labels.map((label, i) => {
|
|
163
170
|
const rowData = cm.matrix[i].map((n) => String(n).padStart(colWidth)).join(" ");
|
|
164
|
-
return label.padEnd(maxLabelLen) + " " + rowData;
|
|
171
|
+
return rowLabelPrefix + label.padEnd(maxLabelLen) + " " + rowData;
|
|
165
172
|
});
|
|
166
|
-
return [header, ...rows].join("\n");
|
|
173
|
+
return [header, axisRow, ...rows].join("\n");
|
|
167
174
|
}
|
|
168
175
|
var JsonReporter = class {
|
|
169
176
|
/**
|
|
@@ -269,7 +276,7 @@ var ConsoleReporter = class {
|
|
|
269
276
|
*/
|
|
270
277
|
printHeader(fileCount) {
|
|
271
278
|
this.log("");
|
|
272
|
-
this.log(this.color("bold", `EvalSense v0.
|
|
279
|
+
this.log(this.color("bold", `EvalSense v0.4.0`));
|
|
273
280
|
this.log(this.color("dim", `Running ${fileCount} eval file(s)...`));
|
|
274
281
|
this.log("");
|
|
275
282
|
}
|
|
@@ -816,5 +823,5 @@ exports.parseReport = parseReport;
|
|
|
816
823
|
exports.recordAssertion = recordAssertion;
|
|
817
824
|
exports.recordFieldMetrics = recordFieldMetrics;
|
|
818
825
|
exports.setCurrentSuite = setCurrentSuite;
|
|
819
|
-
//# sourceMappingURL=chunk-
|
|
820
|
-
//# sourceMappingURL=chunk-
|
|
826
|
+
//# sourceMappingURL=chunk-4BKZPVY4.cjs.map
|
|
827
|
+
//# sourceMappingURL=chunk-4BKZPVY4.cjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/core/errors.ts","../src/statistics/confusion-matrix.ts","../src/report/json-reporter.ts","../src/report/console-reporter.ts","../src/runner/discovery.ts","../src/core/types.ts","../src/core/context.ts","../src/runner/executor.ts"],"names":["stringify","writeFileSync","symbol","glob","path","resolve","existsSync","pathToFileURL"],"mappings":";;;;;;;;;;;;;AAIO,IAAM,cAAA,GAAN,cAA6B,KAAA,CAAM;AAAA,EACxC,YAAY,OAAA,EAAiB;AAC3B,IAAA,KAAA,CAAM,OAAO,CAAA;AACb,IAAA,IAAA,CAAK,IAAA,GAAO,gBAAA;AAAA,EACd;AACF;AAEO,IAAM,cAAA,GAAN,cAA6B,cAAA,CAAe;AAAA,EACjC,QAAA;AAAA,EACA,MAAA;AAAA,EACA,KAAA;AAAA,EAEhB,WAAA,CAAY,OAAA,EAAiB,QAAA,EAAoB,MAAA,EAAkB,KAAA,EAAgB;AACjF,IAAA,KAAA,CAAM,OAAO,CAAA;AACb,IAAA,IAAA,CAAK,IAAA,GAAO,gBAAA;AACZ,IAAA,IAAA,CAAK,QAAA,GAAW,QAAA;AAChB,IAAA,IAAA,CAAK,MAAA,GAAS,MAAA;AACd,IAAA,IAAA,CAAK,KAAA,GAAQ,KAAA;AAAA,EACf;AACF;AAEO,IAAM,YAAA,GAAN,cAA2B,cAAA,CAAe;AAAA,EAC/B,MAAA;AAAA,EAEhB,WAAA,CAAY,SAAiB,MAAA,EAAiB;AAC5C,IAAA,KAAA,CAAM,OAAO,CAAA;AACb,IAAA,IAAA,CAAK,IAAA,GAAO,cAAA;AACZ,IAAA,IAAA,CAAK,MAAA,GAAS,MAAA;AAAA,EAChB;AACF;AAEO,IAAM,cAAA,GAAN,cAA6B,cAAA,CAAe;AAAA,EACjC,UAAA;AAAA,EACA,YAAA;AAAA,EAEhB,WAAA,CAAY,OAAA,EAAiB,UAAA,EAAuB,YAAA,EAAyB;AAC3E,IAAA,KAAA,CAAM,OAAO,CAAA;AACb,IAAA,IAAA,CAAK,IAAA,GAAO,gBAAA;AACZ,IAAA,IAAA,CAAK,UAAA,GAAa,UAAA;AAClB,IAAA,IAAA,CAAK,YAAA,GAAe,YAAA;AAAA,EACtB;AACF;AAEO,IAAM,kBAAA,GAAN,cAAiC,cAAA,CAAe;AAAA,EACrD,YAAY,OAAA,EAAiB;AAC3B,IAAA,KAAA,CAAM,OAAO,CAAA;AACb,IAAA,IAAA,CAAK,IAAA,GAAO,oBAAA;AAAA,EACd;AACF;AAEO,IAAM,kBAAA,GAAN,cAAiC,cAAA,CAAe;AAAA,EACrC,QAAA;AAAA,EACA,aAAA;AAAA,EAEhB,WAAA,CAAY,OAAA,EAAiB,QAAA,EAAkB,aAAA,EAAuB;AACpE,IAAA,KAAA,CAAM,OAAO,CAAA;AACb,IAAA,IAAA,CAAK,IAAA,GAAO,oBAAA;AACZ,IAAA,IAAA,CAAK,QAAA,GAAW,QAAA;AAChB,IAAA,IAAA,CAAK,aAAA,GAAgB,aAAA;AAAA,EACvB;AACF;AAEO,IAAM,sBAAA,GAAN,cAAqC,cAAA,CAAe;AAAA,EACzC,QAAA;AAAA,EAOhB,YACE,QAAA,EACA;AACA,IAAA,MAAM,QAAQ,QAAA,CAAS,MAAA;AACvB,IAAA,MAAM,OAAA,GAAU,QAAA,CAAS,GAAA,CAAI,CAAC,CAAA,KAAM,CAAA,IAAA,EAAO,CAAA,CAAE,OAAO,CAAA,CAAE,CAAA,CAAE,IAAA,CAAK,IAAI,CAAA;AACjE,IAAA,KAAA,CAAM,GAAG,KAAK,CAAA,UAAA,EAAa,KAAA,GAAQ,CAAA,GAAI,MAAM,EAAE,CAAA;AAAA,EAAa,OAAO,CAAA,CAAE,CAAA;AACrE,IAAA,IAAA,CAAK,IAAA,GAAO,wBAAA;AACZ,IAAA,IAAA,CAAK,QAAA,GAAW,QAAA;AAAA,EAClB;AACF,CAAA;;;AC7DO,SAAS,oBAAA,CAAqB,QAAmB,QAAA,EAAsC;AAC5F,EAAA,IAAI,MAAA,CAAO,MAAA,KAAW,QAAA,CAAS,MAAA,EAAQ;AACrC,IAAA,MAAM,IAAI,KAAA;AAAA,MACR,CAAA,kCAAA,EAAqC,MAAA,CAAO,MAAM,CAAA,wBAAA,EAA2B,SAAS,MAAM,CAAA;AAAA,KAC9F;AAAA,EACF;AAGA,EAAA,MAAM,QAAA,uBAAe,GAAA,EAAY;AACjC,EAAA,KAAA,MAAW,OAAO,MAAA,EAAQ;AACxB,IAAA,IAAI,GAAA,KAAQ,MAAA,IAAa,GAAA,KAAQ,IAAA,EAAM;AACrC,MAAA,QAAA,CAAS,GAAA,CAAI,MAAA,CAAO,GAAG,CAAC,CAAA;AAAA,IAC1B;AAAA,EACF;AACA,EAAA,KAAA,MAAW,OAAO,QAAA,EAAU;AAC1B,IAAA,IAAI,GAAA,KAAQ,MAAA,IAAa,GAAA,KAAQ,IAAA,EAAM;AACrC,MAAA,QAAA,CAAS,GAAA,CAAI,MAAA,CAAO,GAAG,CAAC,CAAA;AAAA,IAC1B;AAAA,EACF;AAGA,EAAA,MAAM,MAAA,GAAS,KAAA,CAAM,IAAA,CAAK,QAAQ,EAAE,IAAA,EAAK;AACzC,EAAA,MAAM,UAAA,uBAAiB,GAAA,EAAoB;AAC3C,EAAA,MAAA,CAAO,OAAA,CAAQ,CAAC,KAAA,EAAO,GAAA,KAAQ,WAAW,GAAA,CAAI,KAAA,EAAO,GAAG,CAAC,CAAA;AAGzD,EAAA,MAAM,MAAA,GAAqB,OAAO,GAAA,CAAI,MAAM,OAAO,GAAA,CAAI,MAAM,CAAC,CAAC,CAAA;AAI/D,EAAA,KAAA,IAAS,CAAA,GAAI,CAAA,EAAG,CAAA,GAAI,MAAA,CAAO,QAAQ,CAAA,EAAA,EAAK;AACtC,IAAA,MAAM,SAAA,GAAY,OAAO,CAAC,CAAA;AAC1B,IAAA,MAAM,WAAA,GAAc,SAAS,CAAC,CAAA;AAE9B,IAAA,IAAI,SAAA,KAAc,MAAA,IAAa,SAAA,KAAc,IAAA,EAAM;AACnD,IAAA,IAAI,WAAA,KAAgB,MAAA,IAAa,WAAA,KAAgB,IAAA,EAAM;AAEvD,IAAA,MAAM,SAAA,GAAY,UAAA,CAAW,GAAA,CAAI,MAAA,CAAO,SAAS,CAAC,CAAA;AAClD,IAAA,MAAM,WAAA,GAAc,UAAA,CAAW,GAAA,CAAI,MAAA,CAAO,WAAW,CAAC,CAAA;AAEtD,IAAA,IAAI,SAAA,KAAc,MAAA,IAAa,WAAA,KAAgB,MAAA,EAAW;AACxD,MAAA,MAAA,CAAO,WAAW,EAAG,SAAS,CAAA,EAAA;AAAA,IAChC;AAAA,EACF;AAEA,EAAA,MAAM,QAAQ,MAAA,CAAO,MAAA;AAAA,IACnB,CAAC,CAAA,EAAG,CAAA,KAAM,CAAA,KAAM,MAAA,IAAa,CAAA,KAAM,IAAA,IAAQ,QAAA,CAAS,CAAC,CAAA,KAAM,MAAA,IAAa,QAAA,CAAS,CAAC,CAAA,KAAM;AAAA,GAC1F,CAAE,MAAA;AAEF,EAAA,OAAO,EAAE,MAAA,EAAQ,MAAA,EAAQ,KAAA,EAAM;AACjC;AAKO,SAAS,QAAA,CAAS,EAAA,EAAqB,aAAA,EAAuB,WAAA,EAA6B;AAChG,EAAA,MAAM,WAAA,GAAc,EAAA,CAAG,MAAA,CAAO,OAAA,CAAQ,aAAa,CAAA;AACnD,EAAA,MAAM,SAAA,GAAY,EAAA,CAAG,MAAA,CAAO,OAAA,CAAQ,WAAW,CAAA;AAE/C,EAAA,IAAI,WAAA,KAAgB,EAAA,IAAM,SAAA,KAAc,EAAA,EAAI;AAC1C,IAAA,OAAO,CAAA;AAAA,EACT;AAEA,EAAA,OAAO,EAAA,CAAG,MAAA,CAAO,WAAW,CAAA,GAAI,SAAS,CAAA,IAAK,CAAA;AAChD;AAKO,SAAS,gBAAA,CAAiB,IAAqB,KAAA,EAAuB;AAC3E,EAAA,OAAO,QAAA,CAAS,EAAA,EAAI,KAAA,EAAO,KAAK,CAAA;AAClC;AAKO,SAAS,iBAAA,CAAkB,IAAqB,KAAA,EAAuB;AAC5E,EAAA,MAAM,QAAA,GAAW,EAAA,CAAG,MAAA,CAAO,OAAA,CAAQ,KAAK,CAAA;AACxC,EAAA,IAAI,QAAA,KAAa,IAAI,OAAO,CAAA;AAE5B,EAAA,IAAI,EAAA,GAAK,CAAA;AACT,EAAA,KAAA,IAAS,IAAI,CAAA,EAAG,CAAA,GAAI,EAAA,CAAG,MAAA,CAAO,QAAQ,CAAA,EAAA,EAAK;AACzC,IAAA,IAAI,MAAM,QAAA,EAAU;AAClB,MAAA,EAAA,IAAM,EAAA,CAAG,MAAA,CAAO,CAAC,CAAA,GAAI,QAAQ,CAAA,IAAK,CAAA;AAAA,IACpC;AAAA,EACF;AACA,EAAA,OAAO,EAAA;AACT;AAKO,SAAS,iBAAA,CAAkB,IAAqB,KAAA,EAAuB;AAC5E,EAAA,MAAM,QAAA,GAAW,EAAA,CAAG,MAAA,CAAO,OAAA,CAAQ,KAAK,CAAA;AACxC,EAAA,IAAI,QAAA,KAAa,IAAI,OAAO,CAAA;AAE5B,EAAA,IAAI,EAAA,GAAK,CAAA;AACT,EAAA,KAAA,IAAS,IAAI,CAAA,EAAG,CAAA,GAAI,EAAA,CAAG,MAAA,CAAO,QAAQ,CAAA,EAAA,EAAK;AACzC,IAAA,IAAI,MAAM,QAAA,EAAU;AAClB,MAAA,EAAA,IAAM,EAAA,CAAG,MAAA,CAAO,QAAQ,CAAA,GAAI,CAAC,CAAA,IAAK,CAAA;AAAA,IACpC;AAAA,EACF;AACA,EAAA,OAAO,EAAA;AACT;AAuBO,SAAS,UAAA,CAAW,IAAqB,KAAA,EAAuB;AACrE,EAAA,MAAM,QAAA,GAAW,EAAA,CAAG,MAAA,CAAO,OAAA,CAAQ,KAAK,CAAA;AACxC,EAAA,IAAI,QAAA,KAAa,IAAI,OAAO,CAAA;AAE5B,EAAA,IAAI,OAAA,GAAU,CAAA;AACd,EAAA,KAAA,IAAS,IAAI,CAAA,EAAG,CAAA,GAAI,EAAA,CAAG,MAAA,CAAO,QAAQ,CAAA,EAAA,EAAK;AACzC,IAAA,OAAA,IAAW,EAAA,CAAG,MAAA,CAAO,QAAQ,CAAA,GAAI,CAAC,CAAA,IAAK,CAAA;AAAA,EACzC;AACA,EAAA,OAAO,OAAA;AACT;AAaO,SAAS,sBAAsB,EAAA,EAA6B;AACjE,EAAA,IAAI,EAAA,CAAG,MAAA,CAAO,MAAA,KAAW,CAAA,EAAG;AAC1B,IAAA,OAAO,wBAAA;AAAA,EACT;AAEA,EAAA,MAAM,cAAA,GAAiB,IAAA;AACvB,EAAA,MAAM,WAAA,GAAc,IAAA,CAAK,GAAA,CAAI,GAAG,EAAA,CAAG,MAAA,CAAO,GAAA,CAAI,CAAC,CAAA,KAAM,CAAA,CAAE,MAAM,CAAA,EAAG,CAAC,CAAA;AACjE,EAAA,MAAM,WAAW,IAAA,CAAK,GAAA,CAAI,GAAG,EAAA,CAAG,OAAO,IAAA,EAAK,CAAE,GAAA,CAAI,CAAC,MAAM,MAAA,CAAO,CAAC,CAAA,CAAE,MAAM,GAAG,WAAW,CAAA;AAGvF,EAAA,MAAM,cAAA,GAAiB,kBAAA;AACvB,EAAA,MAAM,aAAA,GAAgB,cAAA,CAAe,MAAA,GAAS,WAAA,GAAc,CAAA;AAC5D,EAAA,MAAM,SACJ,cAAA,CAAe,MAAA,CAAO,aAAa,CAAA,GAAI,GAAG,MAAA,CAAO,GAAA,CAAI,CAAC,CAAA,KAAM,EAAE,QAAA,CAAS,QAAQ,CAAC,CAAA,CAAE,KAAK,GAAG,CAAA;AAG5F,EAAA,MAAM,OAAA,GAAU,eAAA;AAGhB,EAAA,MAAM,OAAO,EAAA,CAAG,MAAA,CAAO,GAAA,CAAI,CAAC,OAAO,CAAA,KAAM;AACvC,IAAA,MAAM,UAAU,EAAA,CAAG,MAAA,CAAO,CAAC,CAAA,CAAG,IAAI,CAAC,CAAA,KAAM,MAAA,CAAO,CAAC,EAAE,QAAA,CAAS,QAAQ,CAAC,CAAA,CAAE,KAAK,GAAG,CAAA;AAC/E,IAAA,OAAO,cAAA,GAAiB,KAAA,CAAM,MAAA,CAAO,WAAW,IAAI,IAAA,GAAO,OAAA;AAAA,EAC7D,CAAC,CAAA;AAED,EAAA,OAAO,CAAC,MAAA,EAAQ,OAAA,EAAS,GAAG,IAAI,CAAA,CAAE,KAAK,IAAI,CAAA;AAC7C;ACxLO,IAAM,eAAN,MAAmB;AAAA;AAAA;AAAA;AAAA,EAIxB,OAAO,MAAA,EAA4B;AAEjC,IAAA,MAAM,YAAA,GAAe,IAAA,CAAK,cAAA,CAAe,MAAM,CAAA;AAC/C,IAAA,OAAOA,0BAAA,CAAU,YAAY,CAAA,IAAK,IAAA;AAAA,EACpC;AAAA;AAAA;AAAA;AAAA,EAKA,WAAA,CAAY,QAAoB,IAAA,EAAoB;AAClD,IAAA,MAAM,IAAA,GAAO,IAAA,CAAK,MAAA,CAAO,MAAM,CAAA;AAC/B,IAAAC,gBAAA,CAAc,IAAA,EAAM,MAAM,OAAO,CAAA;AAAA,EACnC;AAAA;AAAA;AAAA;AAAA,EAKQ,eAAe,MAAA,EAA6C;AAClE,IAAA,OAAO;AAAA,MACL,SAAS,MAAA,CAAO,OAAA;AAAA,MAChB,WAAW,MAAA,CAAO,SAAA;AAAA,MAClB,SAAS,MAAA,CAAO,OAAA;AAAA,MAChB,MAAA,EAAQ,MAAA,CAAO,MAAA,CAAO,GAAA,CAAI,CAAC,KAAA,MAAW;AAAA,QACpC,MAAM,KAAA,CAAM,IAAA;AAAA,QACZ,QAAQ,KAAA,CAAM,MAAA;AAAA,QACd,QAAQ,KAAA,CAAM,MAAA;AAAA,QACd,QAAQ,KAAA,CAAM,MAAA;AAAA,QACd,SAAS,KAAA,CAAM,OAAA;AAAA,QACf,UAAU,KAAA,CAAM,QAAA;AAAA,QAChB,KAAA,EAAO,KAAA,CAAM,KAAA,CAAM,GAAA,CAAI,CAAC,IAAA,MAAU;AAAA,UAChC,MAAM,IAAA,CAAK,IAAA;AAAA,UACX,QAAQ,IAAA,CAAK,MAAA;AAAA,UACb,UAAU,IAAA,CAAK,QAAA;AAAA,UACf,KAAA,EAAO,KAAK,KAAA,GACR;AAAA,YACE,IAAA,EAAM,KAAK,KAAA,CAAM,IAAA;AAAA,YACjB,OAAA,EAAS,KAAK,KAAA,CAAM;AAAA,WACtB,GACA,MAAA;AAAA,UACJ,UAAA,EAAY,IAAA,CAAK,UAAA,CAAW,GAAA,CAAI,CAAC,CAAA,MAAO;AAAA,YACtC,MAAM,CAAA,CAAE,IAAA;AAAA,YACR,QAAQ,CAAA,CAAE,MAAA;AAAA,YACV,SAAS,CAAA,CAAE,OAAA;AAAA,YACX,UAAU,CAAA,CAAE,QAAA;AAAA,YACZ,QAAQ,CAAA,CAAE,MAAA;AAAA,YACV,OAAO,CAAA,CAAE,KAAA;AAAA,YACT,OAAO,CAAA,CAAE;AAAA,WACX,CAAE,CAAA;AAAA,UACF,YAAA,EAAc,IAAA,CAAK,YAAA,CAAa,GAAA,CAAI,CAAC,EAAA,MAAQ;AAAA,YAC3C,OAAO,EAAA,CAAG,KAAA;AAAA,YACV,WAAW,EAAA,CAAG,SAAA;AAAA,YACd,mBAAmB,EAAA,CAAG,iBAAA;AAAA,YACtB,OAAA,EAAS;AAAA,cACP,QAAA,EAAU,GAAG,OAAA,CAAQ,QAAA;AAAA,cACrB,QAAA,EAAU,GAAG,OAAA,CAAQ,QAAA;AAAA,cACrB,QAAA,EAAU,GAAG,OAAA,CAAQ,QAAA;AAAA,cACrB,WAAA,EAAa,GAAG,OAAA,CAAQ,WAAA;AAAA,cACxB,eAAA,EAAiB;AAAA,gBACf,MAAA,EAAQ,EAAA,CAAG,OAAA,CAAQ,eAAA,CAAgB,MAAA;AAAA,gBACnC,MAAA,EAAQ,EAAA,CAAG,OAAA,CAAQ,eAAA,CAAgB,MAAA;AAAA,gBACnC,KAAA,EAAO,EAAA,CAAG,OAAA,CAAQ,eAAA,CAAgB;AAAA;AACpC;AACF,WACF,CAAE;AAAA,SACJ,CAAE;AAAA,OACJ,CAAE,CAAA;AAAA,MACF,WAAW,MAAA,CAAO;AAAA,KACpB;AAAA,EACF;AACF;AAKO,SAAS,YAAY,IAAA,EAA0B;AACpD,EAAA,MAAM,IAAA,GAAO,IAAA,CAAK,KAAA,CAAM,IAAI,CAAA;AAC5B,EAAA,OAAO,IAAA;AACT;;;AClFA,IAAM,MAAA,GAAS;AAAA,EACb,KAAA,EAAO,SAAA;AAAA,EACP,IAAA,EAAM,SAAA;AAAA,EACN,GAAA,EAAK,SAAA;AAAA,EACL,GAAA,EAAK,UAAA;AAAA,EACL,KAAA,EAAO,UAAA;AAAA,EACP,MAAA,EAAQ,UAAA;AAAA,EACR,IAAA,EAAM,UAAA;AAAA,EACN,IAAA,EAAM,UAAA;AAAA,EACN,IAAA,EAAM;AACR,CAAA;AAKA,IAAM,OAAA,GAAU;AAAA,EACd,IAAA,EAAM,QAAA;AAAA;AAAA,EACN,IAAA,EAAM,QAAA;AAAA;AAAA,EACN,KAAA,EAAO,GAAA;AAAA,EACP,IAAA,EAAM;AACR,CAAA;AAKO,IAAM,kBAAN,MAAsB;AAAA,EACnB,SAAA;AAAA,EAER,WAAA,CAAY,YAAY,IAAA,EAAM;AAC5B,IAAA,IAAA,CAAK,SAAA,GAAY,SAAA,IAAa,OAAA,CAAQ,MAAA,CAAO,KAAA,KAAU,KAAA;AAAA,EACzD;AAAA;AAAA;AAAA;AAAA,EAKA,YAAY,SAAA,EAAyB;AACnC,IAAA,IAAA,CAAK,IAAI,EAAE,CAAA;AACX,IAAA,IAAA,CAAK,GAAA,CAAI,IAAA,CAAK,KAAA,CAAM,MAAA,EAAQ,kBAAkB,CAAC,CAAA;AAC/C,IAAA,IAAA,CAAK,IAAI,IAAA,CAAK,KAAA,CAAM,OAAO,CAAA,QAAA,EAAW,SAAS,kBAAkB,CAAC,CAAA;AAClE,IAAA,IAAA,CAAK,IAAI,EAAE,CAAA;AAAA,EACb;AAAA;AAAA;AAAA;AAAA,EAKA,YAAY,MAAA,EAA0B;AAEpC,IAAA,KAAA,MAAW,KAAA,IAAS,OAAO,MAAA,EAAQ;AACjC,MAAA,IAAA,CAAK,UAAA,CAAW,KAAA,CAAM,IAAA,EAAM,KAAA,CAAM,KAAK,CAAA;AAAA,IACzC;AAGA,IAAA,IAAA,CAAK,aAAa,MAAM,CAAA;AAAA,EAC1B;AAAA;AAAA;AAAA;AAAA,EAKQ,UAAA,CAAW,MAAc,KAAA,EAA2B;AAC1D,IAAA,IAAA,CAAK,IAAI,IAAA,CAAK,KAAA,CAAM,QAAQ,CAAA,EAAA,EAAK,IAAI,EAAE,CAAC,CAAA;AACxC,IAAA,IAAA,CAAK,IAAI,EAAE,CAAA;AAEX,IAAA,KAAA,MAAW,QAAQ,KAAA,EAAO;AACxB,MAAA,IAAA,CAAK,UAAU,IAAI,CAAA;AAAA,IACrB;AAEA,IAAA,IAAA,CAAK,IAAI,EAAE,CAAA;AAAA,EACb;AAAA;AAAA;AAAA;AAAA,EAKQ,UAAU,IAAA,EAAwB;AACxC,IAAA,MAAM,MAAA,GAAS,IAAA,CAAK,eAAA,CAAgB,IAAA,CAAK,MAAM,CAAA;AAC/C,IAAA,MAAM,WAAA,GAAc,IAAA,CAAK,cAAA,CAAe,IAAA,CAAK,MAAM,CAAA;AACnD,IAAA,MAAM,WAAW,IAAA,CAAK,KAAA,CAAM,OAAO,CAAA,CAAA,EAAI,IAAA,CAAK,QAAQ,CAAA,GAAA,CAAK,CAAA;AAEzD,IAAA,IAAA,CAAK,GAAA,CAAI,CAAA,IAAA,EAAO,IAAA,CAAK,KAAA,CAAM,WAAA,EAAa,MAAM,CAAC,CAAA,CAAA,EAAI,IAAA,CAAK,IAAI,CAAA,CAAA,EAAI,QAAQ,CAAA,CAAE,CAAA;AAG1E,IAAA,KAAA,MAAW,EAAA,IAAM,KAAK,YAAA,EAAc;AAClC,MAAA,IAAA,CAAK,kBAAkB,EAAE,CAAA;AAAA,IAC3B;AAGA,IAAA,KAAA,MAAW,EAAA,IAAM,KAAK,YAAA,EAAc;AAClC,MAAA,IAAI,EAAA,CAAG,OAAA,CAAQ,eAAA,IAAmB,MAAA,CAAO,IAAA,CAAK,GAAG,OAAA,CAAQ,eAAe,CAAA,CAAE,MAAA,GAAS,CAAA,EAAG;AACpF,QAAA,IAAA,CAAK,qBAAqB,EAAE,CAAA;AAAA,MAC9B;AAAA,IACF;AAGA,IAAA,IAAI,KAAK,KAAA,EAAO;AACd,MAAA,MAAM,MAAA,GAAS,IAAA,CAAK,MAAA,KAAW,OAAA,GAAU,OAAA,GAAU,kBAAA;AACnD,MAAA,IAAA,CAAK,GAAA,CAAI,IAAA,CAAK,KAAA,CAAM,KAAA,EAAO,CAAA,MAAA,EAAS,MAAM,CAAA,EAAA,EAAK,IAAA,CAAK,KAAA,CAAM,OAAO,CAAA,CAAE,CAAC,CAAA;AACpE,MAAA,IAAA,CAAK,IAAI,EAAE,CAAA;AAAA,IACb;AAGA,IAAA,KAAA,MAAW,SAAA,IAAa,KAAK,UAAA,EAAY;AACvC,MAAA,MAAMC,OAAAA,GAAS,SAAA,CAAU,MAAA,GACrB,IAAA,CAAK,KAAA,CAAM,OAAA,EAAS,OAAA,CAAQ,IAAI,CAAA,GAChC,IAAA,CAAK,KAAA,CAAM,KAAA,EAAO,QAAQ,IAAI,CAAA;AAClC,MAAA,MAAM,KAAA,GAAQ,SAAA,CAAU,MAAA,GAAS,KAAA,GAAQ,KAAA;AAEzC,MAAA,IAAA,CAAK,GAAA,CAAI,IAAA,CAAK,KAAA,CAAM,KAAA,EAAO,CAAA,MAAA,EAASA,OAAM,CAAA,CAAA,EAAI,SAAA,CAAU,OAAO,CAAA,CAAE,CAAC,CAAA;AAGlE,MAAA,IAAI,SAAA,CAAU,QAAA,KAAa,MAAA,IAAa,SAAA,CAAU,WAAW,MAAA,EAAW;AACtE,QAAA,IAAA,CAAK,GAAA,CAAI,IAAA,CAAK,KAAA,CAAM,KAAA,EAAO,CAAA,kBAAA,EAAqB,IAAA,CAAK,WAAA,CAAY,SAAA,CAAU,QAAQ,CAAC,CAAA,CAAE,CAAC,CAAA;AACvF,QAAA,IAAA,CAAK,GAAA,CAAI,IAAA,CAAK,KAAA,CAAM,KAAA,EAAO,CAAA,kBAAA,EAAqB,IAAA,CAAK,WAAA,CAAY,SAAA,CAAU,MAAM,CAAC,CAAA,CAAE,CAAC,CAAA;AAAA,MACvF;AAAA,IACF;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKQ,kBAAkB,EAAA,EAA6B;AACrD,IAAA,MAAM,EAAE,OAAA,EAAS,KAAA,EAAO,SAAA,EAAW,mBAAkB,GAAI,EAAA;AAEzD,IAAA,MAAM,aAAa,SAAA,GAAY,CAAA,EAAG,KAAK,CAAA,cAAA,EAAiB,iBAAiB,CAAA,CAAA,CAAA,GAAM,KAAA;AAE/E,IAAA,IAAA,CAAK,GAAA;AAAA,MACH,IAAA,CAAK,KAAA;AAAA,QACH,MAAA;AAAA,QACA,CAAA,aAAA,EAAgB,UAAU,CAAA,aAAA,EAAgB,IAAA,CAAK,IAAI,OAAA,CAAQ,QAAQ,CAAC,CAAA,OAAA,EAAU,IAAA,CAAK,GAAA,CAAI,OAAA,CAAQ,QAAA,CAAS,EAAE,CAAC,CAAA;AAAA;AAC7G,KACF;AAGA,IAAA,IAAI,OAAO,IAAA,CAAK,OAAA,CAAQ,QAAQ,CAAA,CAAE,SAAS,CAAA,EAAG;AAC5C,MAAA,KAAA,MAAW,CAAC,KAAK,YAAY,CAAA,IAAK,OAAO,OAAA,CAAQ,OAAA,CAAQ,QAAQ,CAAA,EAAG;AAClE,QAAA,IAAA,CAAK,GAAA;AAAA,UACH,IAAA,CAAK,KAAA;AAAA,YACH,KAAA;AAAA,YACA,CAAA,QAAA,EAAW,GAAG,CAAA,IAAA,EAAO,IAAA,CAAK,IAAI,YAAA,CAAa,SAAS,CAAC,CAAA,GAAA,EAAM,IAAA,CAAK,GAAA,CAAI,aAAa,MAAM,CAAC,OAAO,IAAA,CAAK,GAAA,CAAI,aAAa,EAAE,CAAC,CAAA,IAAA,EAAO,YAAA,CAAa,OAAO,CAAA,CAAA;AAAA;AACrJ,SACF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKQ,aAAa,MAAA,EAA0B;AAC7C,IAAA,MAAM,EAAE,SAAQ,GAAI,MAAA;AAEpB,IAAA,IAAA,CAAK,GAAA,CAAI,IAAA,CAAK,KAAA,CAAM,MAAA,EAAQ,WAAW,CAAC,CAAA;AACxC,IAAA,IAAA,CAAK,IAAI,EAAE,CAAA;AAEX,IAAA,MAAM,YAAY,IAAA,CAAK,KAAA,CAAM,SAAS,CAAA,EAAG,OAAA,CAAQ,MAAM,CAAA,OAAA,CAAS,CAAA;AAChE,IAAA,MAAM,SAAA,GACJ,OAAA,CAAQ,MAAA,GAAS,CAAA,GACb,KAAK,KAAA,CAAM,KAAA,EAAO,CAAA,EAAG,OAAA,CAAQ,MAAM,CAAA,OAAA,CAAS,CAAA,GAC5C,CAAA,EAAG,QAAQ,MAAM,CAAA,OAAA,CAAA;AACvB,IAAA,MAAM,SAAA,GACJ,OAAA,CAAQ,MAAA,GAAS,CAAA,GACb,KAAK,KAAA,CAAM,KAAA,EAAO,CAAA,EAAG,OAAA,CAAQ,MAAM,CAAA,OAAA,CAAS,CAAA,GAC5C,CAAA,EAAG,QAAQ,MAAM,CAAA,OAAA,CAAA;AACvB,IAAA,MAAM,UAAA,GACJ,OAAA,CAAQ,OAAA,GAAU,CAAA,GACd,KAAK,KAAA,CAAM,QAAA,EAAU,CAAA,EAAG,OAAA,CAAQ,OAAO,CAAA,QAAA,CAAU,CAAA,GACjD,CAAA,EAAG,QAAQ,OAAO,CAAA,QAAA,CAAA;AAExB,IAAA,IAAA,CAAK,GAAA,CAAI,iBAAiB,SAAS,CAAA,EAAA,EAAK,SAAS,CAAA,EAAA,EAAK,SAAS,CAAA,EAAA,EAAK,UAAU,CAAA,CAAE,CAAA;AAChF,IAAA,IAAA,CAAK,GAAA,CAAI,CAAA,cAAA,EAAiB,OAAA,CAAQ,WAAW,CAAA,CAAE,CAAA;AAC/C,IAAA,IAAA,CAAK,IAAI,CAAA,cAAA,EAAiB,IAAA,CAAK,eAAe,OAAA,CAAQ,QAAQ,CAAC,CAAA,CAAE,CAAA;AACjE,IAAA,IAAA,CAAK,IAAI,EAAE,CAAA;AAGX,IAAA,IAAI,OAAA,CAAQ,MAAA,KAAW,CAAA,IAAK,OAAA,CAAQ,WAAW,CAAA,EAAG;AAChD,MAAA,IAAA,CAAK,GAAA,CAAI,IAAA,CAAK,KAAA,CAAM,OAAA,EAAS,qBAAqB,CAAC,CAAA;AAAA,IACrD,CAAA,MAAO;AACL,MAAA,IAAA,CAAK,GAAA,CAAI,IAAA,CAAK,KAAA,CAAM,KAAA,EAAO,sBAAsB,CAAC,CAAA;AAAA,IACpD;AAEA,IAAA,IAAA,CAAK,IAAI,EAAE,CAAA;AAAA,EACb;AAAA;AAAA;AAAA;AAAA,EAKA,qBAAqB,EAAA,EAA6B;AAChD,IAAA,IAAA,CAAK,IAAI,EAAE,CAAA;AACX,IAAA,IAAA,CAAK,GAAA,CAAI,KAAK,KAAA,CAAM,MAAA,EAAQ,uBAAuB,EAAA,CAAG,KAAK,EAAE,CAAC,CAAA;AAC9D,IAAA,IAAA,CAAK,IAAI,EAAE,CAAA;AAEX,IAAA,MAAM,SAAA,GAAY,qBAAA,CAAsB,EAAA,CAAG,OAAA,CAAQ,eAAe,CAAA;AAClE,IAAA,KAAA,MAAW,IAAA,IAAQ,SAAA,CAAU,KAAA,CAAM,IAAI,CAAA,EAAG;AACxC,MAAA,IAAA,CAAK,GAAA,CAAI,CAAA,IAAA,EAAO,IAAI,CAAA,CAAE,CAAA;AAAA,IACxB;AAEA,IAAA,IAAA,CAAK,IAAI,EAAE,CAAA;AAAA,EACb;AAAA;AAAA;AAAA;AAAA,EAKQ,IAAI,KAAA,EAAuB;AACjC,IAAA,OAAO,CAAA,EAAA,CAAI,KAAA,GAAQ,GAAA,EAAK,OAAA,CAAQ,CAAC,CAAC,CAAA,CAAA,CAAA;AAAA,EACpC;AAAA;AAAA;AAAA;AAAA,EAKQ,eAAe,EAAA,EAAoB;AACzC,IAAA,IAAI,KAAK,GAAA,EAAM;AACb,MAAA,OAAO,GAAG,EAAE,CAAA,EAAA,CAAA;AAAA,IACd;AACA,IAAA,OAAO,CAAA,EAAA,CAAI,EAAA,GAAK,GAAA,EAAM,OAAA,CAAQ,CAAC,CAAC,CAAA,CAAA,CAAA;AAAA,EAClC;AAAA;AAAA;AAAA;AAAA,EAKQ,gBAAgB,MAAA,EAAsC;AAC5D,IAAA,QAAQ,MAAA;AAAQ,MACd,KAAK,QAAA;AACH,QAAA,OAAO,OAAA,CAAQ,IAAA;AAAA,MACjB,KAAK,QAAA;AACH,QAAA,OAAO,OAAA,CAAQ,IAAA;AAAA,MACjB,KAAK,OAAA;AACH,QAAA,OAAO,OAAA,CAAQ,KAAA;AAAA,MACjB,KAAK,SAAA;AACH,QAAA,OAAO,OAAA,CAAQ,IAAA;AAAA;AACnB,EACF;AAAA;AAAA;AAAA;AAAA,EAKQ,eAAe,MAAA,EAAmD;AACxE,IAAA,QAAQ,MAAA;AAAQ,MACd,KAAK,QAAA;AACH,QAAA,OAAO,OAAA;AAAA,MACT,KAAK,QAAA;AACH,QAAA,OAAO,KAAA;AAAA,MACT,KAAK,OAAA;AACH,QAAA,OAAO,KAAA;AAAA,MACT,KAAK,SAAA;AACH,QAAA,OAAO,QAAA;AAAA;AACX,EACF;AAAA;AAAA;AAAA;AAAA,EAKQ,KAAA,CAAM,WAAgC,IAAA,EAAsB;AAClE,IAAA,IAAI,CAAC,KAAK,SAAA,EAAW;AACnB,MAAA,OAAO,IAAA;AAAA,IACT;AACA,IAAA,OAAO,CAAA,EAAG,OAAO,SAAS,CAAC,GAAG,IAAI,CAAA,EAAG,OAAO,KAAK,CAAA,CAAA;AAAA,EACnD;AAAA;AAAA;AAAA;AAAA,EAKQ,YAAY,KAAA,EAAwB;AAC1C,IAAA,IAAI,OAAO,UAAU,QAAA,EAAU;AAE7B,MAAA,IAAI,KAAA,IAAS,CAAA,IAAK,KAAA,IAAS,CAAA,EAAG;AAC5B,QAAA,OAAO,CAAA,EAAA,CAAI,KAAA,GAAQ,GAAA,EAAK,OAAA,CAAQ,CAAC,CAAC,CAAA,CAAA,CAAA;AAAA,MACpC;AACA,MAAA,OAAO,KAAA,CAAM,QAAQ,CAAC,CAAA;AAAA,IACxB;AACA,IAAA,IAAI,OAAO,UAAU,QAAA,EAAU;AAC7B,MAAA,OAAO,IAAI,KAAK,CAAA,CAAA,CAAA;AAAA,IAClB;AACA,IAAA,IAAI,KAAA,CAAM,OAAA,CAAQ,KAAK,CAAA,EAAG;AACxB,MAAA,OAAO,CAAA,CAAA,EAAI,KAAA,CAAM,IAAA,CAAK,IAAI,CAAC,CAAA,CAAA,CAAA;AAAA,IAC7B;AACA,IAAA,OAAO,OAAO,KAAK,CAAA;AAAA,EACrB;AAAA;AAAA;AAAA;AAAA,EAKQ,IAAI,OAAA,EAAuB;AACjC,IAAA,OAAA,CAAQ,IAAI,OAAO,CAAA;AAAA,EACrB;AACF;ACzRO,IAAM,gBAAA,GAAmB,CAAC,cAAA,EAAgB,cAAA,EAAgB,eAAe,CAAA;AAKzE,IAAM,cAAA,GAAiB,CAAC,oBAAA,EAAsB,YAAA,EAAc,eAAe,YAAY,CAAA;AAsB9F,eAAsB,iBAAA,CAAkB,OAAA,GAA4B,EAAC,EAAsB;AACzF,EAAA,MAAM,EAAE,WAAW,gBAAA,EAAkB,MAAA,GAAS,gBAAgB,GAAA,GAAM,OAAA,CAAQ,GAAA,EAAI,EAAE,GAAI,OAAA;AAEtF,EAAA,MAAM,QAAkB,EAAC;AAEzB,EAAA,KAAA,MAAW,WAAW,QAAA,EAAU;AAC9B,IAAA,MAAM,OAAA,GAAU,MAAMC,SAAA,CAAK,OAAA,EAAS;AAAA,MAClC,GAAA;AAAA,MACA,MAAA;AAAA,MACA,QAAA,EAAU,IAAA;AAAA,MACV,KAAA,EAAO;AAAA,KACR,CAAA;AACD,IAAA,KAAA,CAAM,IAAA,CAAK,GAAG,OAAO,CAAA;AAAA,EACvB;AAGA,EAAA,MAAM,MAAA,GAAS,CAAC,GAAG,IAAI,IAAI,KAAK,CAAC,EAAE,IAAA,EAAK;AAExC,EAAA,OAAO,MAAA;AACT;AAKA,eAAsB,gBAAA,CACpBC,MAAA,EACA,OAAA,GAA4B,EAAC,EACV;AACnB,EAAA,MAAM,YAAA,GAAeC,YAAA,CAAQ,OAAA,CAAQ,GAAA,IAAOD,MAAI,CAAA;AAEhD,EAAA,IAAI,CAACE,aAAA,CAAW,YAAY,CAAA,EAAG;AAC7B,IAAA,MAAM,IAAI,KAAA,CAAM,CAAA,qBAAA,EAAwBF,MAAI,CAAA,CAAE,CAAA;AAAA,EAChD;AAGA,EAAA,MAAM,EAAE,QAAA,EAAS,GAAI,MAAM,OAAO,IAAS,CAAA;AAC3C,EAAA,MAAM,IAAA,GAAO,SAAS,YAAY,CAAA;AAElC,EAAA,IAAI,IAAA,CAAK,QAAO,EAAG;AAEjB,IAAA,OAAO,CAAC,YAAY,CAAA;AAAA,EACtB;AAGA,EAAA,OAAO,iBAAA,CAAkB;AAAA,IACvB,GAAG,OAAA;AAAA,IACH,GAAA,EAAK;AAAA,GACN,CAAA;AACH;AAKO,SAAS,WAAA,CAAY,OAAiB,MAAA,EAA2B;AACtE,EAAA,IAAI,CAAC,MAAA,EAAQ;AACX,IAAA,OAAO,KAAA;AAAA,EACT;AAEA,EAAA,MAAM,WAAA,GAAc,OAAO,WAAA,EAAY;AACvC,EAAA,OAAO,KAAA,CAAM,OAAO,CAAC,IAAA,KAAS,KAAK,WAAA,EAAY,CAAE,QAAA,CAAS,WAAW,CAAC,CAAA;AACxE;;;ACuMO,IAAM,SAAA,GAAY;AAAA,EACvB,OAAA,EAAS,CAAA;AAAA,EACT,iBAAA,EAAmB,CAAA;AAAA,EACnB,iBAAA,EAAmB,CAAA;AAAA,EACnB,eAAA,EAAiB,CAAA;AAAA,EACjB,mBAAA,EAAqB;AACvB;;;ACrSA,IAAI,gBAA6B,kBAAA,EAAmB;AAUpD,IAAI,gBAAA,GAA4C,IAAA;AAKzC,SAAS,kBAAA,GAAkC;AAChD,EAAA,OAAO;AAAA,IACL,YAAA,EAAc,IAAA;AAAA,IACd,QAAQ,EAAC;AAAA,IACT,SAAS;AAAC,GACZ;AACF;AAoBO,SAAS,eAAA,GAAgC;AAC9C,EAAA,OAAO,aAAA,CAAc,YAAA;AACvB;AAKO,SAAS,gBAAgB,KAAA,EAA2B;AACzD,EAAA,aAAA,CAAc,YAAA,GAAe,KAAA;AAC/B;AAKO,SAAS,SAAS,KAAA,EAAoB;AAC3C,EAAA,aAAA,CAAc,MAAA,CAAO,KAAK,KAAK,CAAA;AACjC;AAKO,SAAS,sBAAsB,IAAA,EAAsB;AAC1D,EAAA,IAAI,CAAC,cAAc,YAAA,EAAc;AAC/B,IAAA,MAAM,IAAI,MAAM,+CAA+C,CAAA;AAAA,EACjE;AACA,EAAA,aAAA,CAAc,YAAA,CAAa,KAAA,CAAM,IAAA,CAAK,IAAI,CAAA;AAC5C;AAKO,SAAS,SAAA,GAAqB;AACnC,EAAA,OAAO,aAAA,CAAc,MAAA;AACvB;AAKO,SAAS,kBAAA,GAA2B;AACzC,EAAA,gBAAA,GAAmB;AAAA,IACjB,YAAY,EAAC;AAAA,IACb,cAAc;AAAC,GACjB;AACF;AAKO,SAAS,gBAAA,GAAqC;AACnD,EAAA,MAAM,KAAA,GAAQ,gBAAA;AACd,EAAA,gBAAA,GAAmB,IAAA;AACnB,EAAA,OAAO,SAAS,EAAE,UAAA,EAAY,EAAC,EAAG,YAAA,EAAc,EAAC,EAAE;AACrD;AAKO,SAAS,gBAAgB,MAAA,EAA+B;AAC7D,EAAA,IAAI,gBAAA,EAAkB;AACpB,IAAA,gBAAA,CAAiB,UAAA,CAAW,KAAK,MAAM,CAAA;AAAA,EACzC;AACF;AAKO,SAAS,mBAAmB,OAAA,EAAkC;AACnE,EAAA,IAAI,gBAAA,EAAkB;AACpB,IAAA,gBAAA,CAAiB,YAAA,CAAa,KAAK,OAAO,CAAA;AAAA,EAC5C;AACF;;;AChGA,eAAsB,gBAAA,CACpB,KAAA,EACA,OAAA,GAA2B,EAAC,EACP;AACrB,EAAA,MAAM,SAAA,GAAY,KAAK,GAAA,EAAI;AAC3B,EAAA,MAAM,eAA8B,EAAC;AAMrC,EAAA,KAAA,MAAW,QAAQ,KAAA,EAAO;AACxB,IAAA,IAAI;AACF,MAAA,MAAM,OAAA,GAAUG,iBAAA,CAAc,IAAI,CAAA,CAAE,IAAA;AACpC,MAAA,MAAM,OAAO,OAAA,CAAA;AAAA,IACf,SAAS,KAAA,EAAO;AACd,MAAA,MAAM,IAAI,kBAAA,CAAmB,CAAA,0BAAA,EAA6B,IAAI,CAAA,CAAA,EAAI,MAAM,KAAc,CAAA;AAAA,IACxF;AAAA,EACF;AAGA,EAAA,MAAM,SAAS,SAAA,EAAU;AAGzB,EAAA,KAAA,MAAW,SAAS,MAAA,EAAQ;AAC1B,IAAA,MAAM,MAAA,GAAS,MAAM,YAAA,CAAa,KAAA,EAAO,OAAO,CAAA;AAChD,IAAA,YAAA,CAAa,KAAK,MAAM,CAAA;AAExB,IAAA,IAAI,OAAA,CAAQ,IAAA,IAAQ,MAAA,CAAO,MAAA,GAAS,CAAA,EAAG;AACrC,MAAA;AAAA,IACF;AAAA,EACF;AAGA,EAAA,MAAM,SAAS,WAAA,CAAY,YAAA,EAAc,IAAA,CAAK,GAAA,KAAQ,SAAS,CAAA;AAE/D,EAAA,OAAO,MAAA;AACT;AAKA,eAAe,YAAA,CAAa,OAAc,OAAA,EAAgD;AACxF,EAAA,MAAM,SAAA,GAAY,KAAK,GAAA,EAAI;AAC3B,EAAA,MAAM,cAA4B,EAAC;AACnC,EAAA,IAAI,MAAA,GAAS,CAAA;AACb,EAAA,IAAI,MAAA,GAAS,CAAA;AACb,EAAA,IAAI,MAAA,GAAS,CAAA;AACb,EAAA,IAAI,OAAA,GAAU,CAAA;AAGd,EAAA,KAAA,MAAW,IAAA,IAAQ,KAAA,CAAM,SAAA,IAAa,EAAC,EAAG;AACxC,IAAA,IAAI;AACF,MAAA,MAAM,IAAA,EAAK;AAAA,IACb,SAAS,KAAA,EAAO;AAEd,MAAA,MAAM,UAAU,KAAA,YAAiB,KAAA,GAAQ,KAAA,CAAM,OAAA,GAAU,OAAO,KAAK,CAAA;AACrE,MAAA,KAAA,MAAW,IAAA,IAAQ,MAAM,KAAA,EAAO;AAC9B,QAAA,WAAA,CAAY,IAAA,CAAK;AAAA,UACf,MAAM,IAAA,CAAK,IAAA;AAAA,UACX,MAAA,EAAQ,OAAA;AAAA,UACR,YAAY,EAAC;AAAA,UACb,cAAc,EAAC;AAAA,UACf,QAAA,EAAU,CAAA;AAAA,UACV,KAAA,EAAO,IAAI,KAAA,CAAM,CAAA,uBAAA,EAA0B,OAAO,CAAA,CAAE;AAAA,SACrD,CAAA;AACD,QAAA,MAAA,EAAA;AAAA,MACF;AACA,MAAA,OAAO;AAAA,QACL,MAAM,KAAA,CAAM,IAAA;AAAA,QACZ,KAAA,EAAO,WAAA;AAAA,QACP,MAAA;AAAA,QACA,MAAA;AAAA,QACA,MAAA;AAAA,QACA,OAAA;AAAA,QACA,QAAA,EAAU,IAAA,CAAK,GAAA,EAAI,GAAI;AAAA,OACzB;AAAA,IACF;AAAA,EACF;AAGA,EAAA,KAAA,MAAW,IAAA,IAAQ,MAAM,KAAA,EAAO;AAE9B,IAAA,IAAI,OAAA,CAAQ,MAAA,IAAU,CAAC,IAAA,CAAK,IAAA,CAAK,WAAA,EAAY,CAAE,QAAA,CAAS,OAAA,CAAQ,MAAA,CAAO,WAAA,EAAa,CAAA,EAAG;AACrF,MAAA,WAAA,CAAY,IAAA,CAAK;AAAA,QACf,MAAM,IAAA,CAAK,IAAA;AAAA,QACX,MAAA,EAAQ,SAAA;AAAA,QACR,YAAY,EAAC;AAAA,QACb,cAAc,EAAC;AAAA,QACf,QAAA,EAAU;AAAA,OACX,CAAA;AACD,MAAA,OAAA,EAAA;AACA,MAAA;AAAA,IACF;AAGA,IAAA,IAAI,IAAA,CAAK,IAAA,CAAK,UAAA,CAAW,WAAW,CAAA,EAAG;AACrC,MAAA,WAAA,CAAY,IAAA,CAAK;AAAA,QACf,MAAM,IAAA,CAAK,IAAA;AAAA,QACX,MAAA,EAAQ,SAAA;AAAA,QACR,YAAY,EAAC;AAAA,QACb,cAAc,EAAC;AAAA,QACf,QAAA,EAAU;AAAA,OACX,CAAA;AACD,MAAA,OAAA,EAAA;AACA,MAAA;AAAA,IACF;AAGA,IAAA,KAAA,MAAW,IAAA,IAAQ,KAAA,CAAM,UAAA,IAAc,EAAC,EAAG;AACzC,MAAA,IAAI;AACF,QAAA,MAAM,IAAA,EAAK;AAAA,MACb,SAAS,KAAA,EAAO;AACd,QAAA,MAAM,UAAU,KAAA,YAAiB,KAAA,GAAQ,KAAA,CAAM,OAAA,GAAU,OAAO,KAAK,CAAA;AACrE,QAAA,WAAA,CAAY,IAAA,CAAK;AAAA,UACf,MAAM,IAAA,CAAK,IAAA;AAAA,UACX,MAAA,EAAQ,OAAA;AAAA,UACR,YAAY,EAAC;AAAA,UACb,cAAc,EAAC;AAAA,UACf,QAAA,EAAU,CAAA;AAAA,UACV,KAAA,EAAO,IAAI,KAAA,CAAM,CAAA,wBAAA,EAA2B,OAAO,CAAA,CAAE;AAAA,SACtD,CAAA;AACD,QAAA,MAAA,EAAA;AACA,QAAA;AAAA,MACF;AAAA,IACF;AAGA,IAAA,MAAM,MAAA,GAAS,MAAM,WAAA,CAAY,IAAA,CAAK,MAAM,IAAA,CAAK,EAAA,EAAI,QAAQ,OAAO,CAAA;AACpE,IAAA,WAAA,CAAY,KAAK,MAAM,CAAA;AAEvB,IAAA,IAAI,MAAA,CAAO,WAAW,QAAA,EAAU;AAC9B,MAAA,MAAA,EAAA;AAAA,IACF,CAAA,MAAA,IAAW,MAAA,CAAO,MAAA,KAAW,QAAA,EAAU;AACrC,MAAA,MAAA,EAAA;AAAA,IACF,CAAA,MAAA,IAAW,MAAA,CAAO,MAAA,KAAW,OAAA,EAAS;AACpC,MAAA,MAAA,EAAA;AAAA,IACF;AAGA,IAAA,KAAA,MAAW,IAAA,IAAQ,KAAA,CAAM,SAAA,IAAa,EAAC,EAAG;AACxC,MAAA,IAAI;AACF,QAAA,MAAM,IAAA,EAAK;AAAA,MACb,CAAA,CAAA,MAAQ;AAAA,MAER;AAAA,IACF;AAGA,IAAA,IAAI,OAAA,CAAQ,IAAA,KAAS,MAAA,GAAS,CAAA,IAAK,SAAS,CAAA,CAAA,EAAI;AAC9C,MAAA;AAAA,IACF;AAAA,EACF;AAGA,EAAA,KAAA,MAAW,IAAA,IAAQ,KAAA,CAAM,QAAA,IAAY,EAAC,EAAG;AACvC,IAAA,IAAI;AACF,MAAA,MAAM,IAAA,EAAK;AAAA,IACb,CAAA,CAAA,MAAQ;AAAA,IAER;AAAA,EACF;AAEA,EAAA,OAAO;AAAA,IACL,MAAM,KAAA,CAAM,IAAA;AAAA,IACZ,KAAA,EAAO,WAAA;AAAA,IACP,MAAA;AAAA,IACA,MAAA;AAAA,IACA,MAAA;AAAA,IACA,OAAA;AAAA,IACA,QAAA,EAAU,IAAA,CAAK,GAAA,EAAI,GAAI;AAAA,GACzB;AACF;AAKA,eAAe,WAAA,CACb,IAAA,EACA,EAAA,EACA,OAAA,GAAU,GAAA,EACW;AACrB,EAAA,MAAM,SAAA,GAAY,KAAK,GAAA,EAAI;AAG3B,EAAA,kBAAA,EAAmB;AAEnB,EAAA,IAAI;AAEF,IAAA,MAAM,QAAQ,IAAA,CAAK;AAAA,MACjB,EAAA,EAAG;AAAA,MACH,IAAI,OAAA;AAAA,QAAe,CAAC,CAAA,EAAG,MAAA,KACrB,UAAA,CAAW,MAAM,MAAA,CAAO,IAAI,KAAA,CAAM,CAAA,qBAAA,EAAwB,OAAO,CAAA,EAAA,CAAI,CAAC,GAAG,OAAO;AAAA;AAClF,KACD,CAAA;AAGD,IAAA,MAAM,EAAE,UAAA,EAAY,YAAA,EAAa,GAAI,gBAAA,EAAiB;AAGtD,IAAA,MAAM,mBAAmB,UAAA,CAAW,MAAA,CAAO,CAAC,CAAA,KAAM,CAAC,EAAE,MAAM,CAAA;AAE3D,IAAA,IAAI,gBAAA,CAAiB,SAAS,CAAA,EAAG;AAE/B,MAAA,MAAM,QAAQ,IAAI,sBAAA;AAAA,QAChB,gBAAA,CAAiB,GAAA,CAAI,CAAC,CAAA,MAAO;AAAA,UAC3B,SAAS,CAAA,CAAE,OAAA;AAAA,UACX,UAAU,CAAA,CAAE,QAAA;AAAA,UACZ,QAAQ,CAAA,CAAE,MAAA;AAAA,UACV,OAAO,CAAA,CAAE;AAAA,SACX,CAAE;AAAA,OACJ;AAEA,MAAA,OAAO;AAAA,QACL,IAAA;AAAA,QACA,MAAA,EAAQ,QAAA;AAAA,QACR,UAAA;AAAA,QACA,YAAA;AAAA,QACA,QAAA,EAAU,IAAA,CAAK,GAAA,EAAI,GAAI,SAAA;AAAA,QACvB;AAAA,OACF;AAAA,IACF;AAGA,IAAA,OAAO;AAAA,MACL,IAAA;AAAA,MACA,MAAA,EAAQ,QAAA;AAAA,MACR,UAAA;AAAA,MACA,YAAA;AAAA,MACA,QAAA,EAAU,IAAA,CAAK,GAAA,EAAI,GAAI;AAAA,KACzB;AAAA,EACF,SAAS,KAAA,EAAO;AACd,IAAA,MAAM,EAAE,UAAA,EAAY,YAAA,EAAa,GAAI,gBAAA,EAAiB;AAEtD,IAAA,IAAI,KAAA,YAAiB,cAAA,IAAkB,KAAA,YAAiB,sBAAA,EAAwB;AAE9E,MAAA,OAAO;AAAA,QACL,IAAA;AAAA,QACA,MAAA,EAAQ,QAAA;AAAA,QACR,UAAA;AAAA,QACA,YAAA;AAAA,QACA,QAAA,EAAU,IAAA,CAAK,GAAA,EAAI,GAAI,SAAA;AAAA,QACvB;AAAA,OACF;AAAA,IACF;AAGA,IAAA,OAAO;AAAA,MACL,IAAA;AAAA,MACA,MAAA,EAAQ,OAAA;AAAA,MACR,UAAA;AAAA,MACA,YAAA;AAAA,MACA,QAAA,EAAU,IAAA,CAAK,GAAA,EAAI,GAAI,SAAA;AAAA,MACvB,KAAA,EAAO,iBAAiB,KAAA,GAAQ,KAAA,GAAQ,IAAI,KAAA,CAAM,MAAA,CAAO,KAAK,CAAC;AAAA,KACjE;AAAA,EACF;AACF;AAKA,SAAS,WAAA,CAAY,cAA6B,aAAA,EAAmC;AACnF,EAAA,IAAI,UAAA,GAAa,CAAA;AACjB,EAAA,IAAI,WAAA,GAAc,CAAA;AAClB,EAAA,IAAI,WAAA,GAAc,CAAA;AAClB,EAAA,IAAI,WAAA,GAAc,CAAA;AAClB,EAAA,IAAI,YAAA,GAAe,CAAA;AAEnB,EAAA,KAAA,MAAW,SAAS,YAAA,EAAc;AAChC,IAAA,UAAA,IAAc,MAAM,KAAA,CAAM,MAAA;AAC1B,IAAA,WAAA,IAAe,KAAA,CAAM,MAAA;AACrB,IAAA,WAAA,IAAe,KAAA,CAAM,MAAA;AACrB,IAAA,WAAA,IAAe,KAAA,CAAM,MAAA;AACrB,IAAA,YAAA,IAAgB,KAAA,CAAM,OAAA;AAAA,EACxB;AAEA,EAAA,OAAO;AAAA,IACL,OAAA,EAAS,OAAA;AAAA,IACT,SAAA,EAAA,iBAAW,IAAI,IAAA,EAAK,EAAE,WAAA,EAAY;AAAA,IAClC,MAAA,EAAQ,YAAA;AAAA,IACR,OAAA,EAAS;AAAA,MACP,aAAa,YAAA,CAAa,MAAA;AAAA,MAC1B,UAAA;AAAA,MACA,MAAA,EAAQ,WAAA;AAAA,MACR,MAAA,EAAQ,WAAA;AAAA,MACR,MAAA,EAAQ,WAAA;AAAA,MACR,OAAA,EAAS,YAAA;AAAA,MACT,QAAA,EAAU;AAAA;AACZ,GACF;AACF;AAKO,SAAS,YAAY,MAAA,EAA4B;AACtD,EAAA,IAAI,MAAA,CAAO,OAAA,CAAQ,MAAA,GAAS,CAAA,EAAG;AAC7B,IAAA,OAAO,SAAA,CAAU,eAAA;AAAA,EACnB;AACA,EAAA,IAAI,MAAA,CAAO,OAAA,CAAQ,MAAA,GAAS,CAAA,EAAG;AAC7B,IAAA,OAAO,SAAA,CAAU,iBAAA;AAAA,EACnB;AACA,EAAA,OAAO,SAAA,CAAU,OAAA;AACnB","file":"chunk-4BKZPVY4.cjs","sourcesContent":["/**\n * Custom error classes for EvalSense\n */\n\nexport class EvalSenseError extends Error {\n constructor(message: string) {\n super(message);\n this.name = \"EvalSenseError\";\n }\n}\n\nexport class AssertionError extends EvalSenseError {\n public readonly expected: unknown;\n public readonly actual: unknown;\n public readonly field?: string;\n\n constructor(message: string, expected?: unknown, actual?: unknown, field?: string) {\n super(message);\n this.name = \"AssertionError\";\n this.expected = expected;\n this.actual = actual;\n this.field = field;\n }\n}\n\nexport class DatasetError extends EvalSenseError {\n public readonly source?: string;\n\n constructor(message: string, source?: string) {\n super(message);\n this.name = \"DatasetError\";\n this.source = source;\n }\n}\n\nexport class IntegrityError extends EvalSenseError {\n public readonly missingIds?: string[];\n public readonly duplicateIds?: string[];\n\n constructor(message: string, missingIds?: string[], duplicateIds?: string[]) {\n super(message);\n this.name = \"IntegrityError\";\n this.missingIds = missingIds;\n this.duplicateIds = duplicateIds;\n }\n}\n\nexport class ConfigurationError extends EvalSenseError {\n constructor(message: string) {\n super(message);\n this.name = \"ConfigurationError\";\n }\n}\n\nexport class TestExecutionError extends EvalSenseError {\n public readonly testName: string;\n public readonly originalError?: Error;\n\n constructor(message: string, testName: string, originalError?: Error) {\n super(message);\n this.name = \"TestExecutionError\";\n this.testName = testName;\n this.originalError = originalError;\n }\n}\n\nexport class MultipleAssertionError extends EvalSenseError {\n public readonly failures: Array<{\n message: string;\n expected?: unknown;\n actual?: unknown;\n field?: string;\n }>;\n\n constructor(\n failures: Array<{ message: string; expected?: unknown; actual?: unknown; field?: string }>\n ) {\n const count = failures.length;\n const summary = failures.map((f) => ` - ${f.message}`).join(\"\\n\");\n super(`${count} assertion${count > 1 ? \"s\" : \"\"} failed:\\n${summary}`);\n this.name = \"MultipleAssertionError\";\n this.failures = failures;\n }\n}\n","/**\n * Confusion matrix computation\n */\n\nimport type { ConfusionMatrix } from \"../core/types.js\";\n\n/**\n * Builds a confusion matrix from actual and predicted values\n *\n * @param actual - Actual/predicted values from the model\n * @param expected - Expected/ground truth values\n * @returns ConfusionMatrix with matrix, labels, and total\n *\n * @example\n * ```ts\n * const matrix = buildConfusionMatrix(\n * [\"positive\", \"negative\", \"positive\"],\n * [\"positive\", \"positive\", \"positive\"]\n * );\n * // matrix.matrix[i][j] = count of expected[i] predicted as actual[j]\n * ```\n */\nexport function buildConfusionMatrix(actual: unknown[], expected: unknown[]): ConfusionMatrix {\n if (actual.length !== expected.length) {\n throw new Error(\n `Array length mismatch: actual has ${actual.length} elements, expected has ${expected.length}`\n );\n }\n\n // Collect all unique labels\n const labelSet = new Set<string>();\n for (const val of actual) {\n if (val !== undefined && val !== null) {\n labelSet.add(String(val));\n }\n }\n for (const val of expected) {\n if (val !== undefined && val !== null) {\n labelSet.add(String(val));\n }\n }\n\n // Sort labels for deterministic ordering\n const labels = Array.from(labelSet).sort();\n const labelIndex = new Map<string, number>();\n labels.forEach((label, idx) => labelIndex.set(label, idx));\n\n // Initialize confusion matrix with zeros\n const matrix: number[][] = labels.map(() => labels.map(() => 0));\n\n // Fill the matrix\n // Row = expected (ground truth), Column = actual (predicted)\n for (let i = 0; i < actual.length; i++) {\n const actualVal = actual[i];\n const expectedVal = expected[i];\n\n if (actualVal === undefined || actualVal === null) continue;\n if (expectedVal === undefined || expectedVal === null) continue;\n\n const actualIdx = labelIndex.get(String(actualVal));\n const expectedIdx = labelIndex.get(String(expectedVal));\n\n if (actualIdx !== undefined && expectedIdx !== undefined) {\n matrix[expectedIdx]![actualIdx]!++;\n }\n }\n\n const total = actual.filter(\n (v, i) => v !== undefined && v !== null && expected[i] !== undefined && expected[i] !== null\n ).length;\n\n return { matrix, labels, total };\n}\n\n/**\n * Gets the count from a confusion matrix for a specific cell\n */\nexport function getCount(cm: ConfusionMatrix, expectedLabel: string, actualLabel: string): number {\n const expectedIdx = cm.labels.indexOf(expectedLabel);\n const actualIdx = cm.labels.indexOf(actualLabel);\n\n if (expectedIdx === -1 || actualIdx === -1) {\n return 0;\n }\n\n return cm.matrix[expectedIdx]?.[actualIdx] ?? 0;\n}\n\n/**\n * Gets true positives for a class\n */\nexport function getTruePositives(cm: ConfusionMatrix, label: string): number {\n return getCount(cm, label, label);\n}\n\n/**\n * Gets false positives for a class (predicted as class but wasn't)\n */\nexport function getFalsePositives(cm: ConfusionMatrix, label: string): number {\n const labelIdx = cm.labels.indexOf(label);\n if (labelIdx === -1) return 0;\n\n let fp = 0;\n for (let i = 0; i < cm.labels.length; i++) {\n if (i !== labelIdx) {\n fp += cm.matrix[i]?.[labelIdx] ?? 0;\n }\n }\n return fp;\n}\n\n/**\n * Gets false negatives for a class (was class but predicted as something else)\n */\nexport function getFalseNegatives(cm: ConfusionMatrix, label: string): number {\n const labelIdx = cm.labels.indexOf(label);\n if (labelIdx === -1) return 0;\n\n let fn = 0;\n for (let j = 0; j < cm.labels.length; j++) {\n if (j !== labelIdx) {\n fn += cm.matrix[labelIdx]?.[j] ?? 0;\n }\n }\n return fn;\n}\n\n/**\n * Gets true negatives for a class\n */\nexport function getTrueNegatives(cm: ConfusionMatrix, label: string): number {\n const labelIdx = cm.labels.indexOf(label);\n if (labelIdx === -1) return 0;\n\n let tn = 0;\n for (let i = 0; i < cm.labels.length; i++) {\n for (let j = 0; j < cm.labels.length; j++) {\n if (i !== labelIdx && j !== labelIdx) {\n tn += cm.matrix[i]?.[j] ?? 0;\n }\n }\n }\n return tn;\n}\n\n/**\n * Gets support (total instances) for a class in ground truth\n */\nexport function getSupport(cm: ConfusionMatrix, label: string): number {\n const labelIdx = cm.labels.indexOf(label);\n if (labelIdx === -1) return 0;\n\n let support = 0;\n for (let j = 0; j < cm.labels.length; j++) {\n support += cm.matrix[labelIdx]?.[j] ?? 0;\n }\n return support;\n}\n\n/**\n * Formats a confusion matrix as a string table with axis labels\n *\n * Output format:\n * ```\n * Predicted → negative positive\n * Actual ↓\n * negative 5 1\n * positive 2 7\n * ```\n */\nexport function formatConfusionMatrix(cm: ConfusionMatrix): string {\n if (cm.labels.length === 0) {\n return \"Empty confusion matrix\";\n }\n\n const rowLabelPrefix = \" \"; // Indent for actual labels\n const maxLabelLen = Math.max(...cm.labels.map((l) => l.length), 8);\n const colWidth = Math.max(...cm.matrix.flat().map((n) => String(n).length), maxLabelLen);\n\n // Header row: \"Predicted →\" followed by column labels\n const predictedLabel = \"Predicted →\";\n const headerPadding = rowLabelPrefix.length + maxLabelLen + 2;\n const header =\n predictedLabel.padEnd(headerPadding) + cm.labels.map((l) => l.padStart(colWidth)).join(\" \");\n\n // Axis label row: \"Actual ↓\"\n const axisRow = \"Actual ↓\";\n\n // Data rows: indented actual labels + values\n const rows = cm.labels.map((label, i) => {\n const rowData = cm.matrix[i]!.map((n) => String(n).padStart(colWidth)).join(\" \");\n return rowLabelPrefix + label.padEnd(maxLabelLen) + \" \" + rowData;\n });\n\n return [header, axisRow, ...rows].join(\"\\n\");\n}\n","/**\n * JSON Reporter - deterministic JSON output\n */\n\nimport { writeFileSync } from \"node:fs\";\nimport stringify from \"fast-json-stable-stringify\";\nimport type { EvalReport } from \"../core/types.js\";\n\n/**\n * JSON Reporter for machine-readable output\n */\nexport class JsonReporter {\n /**\n * Formats a report as deterministic JSON\n */\n format(report: EvalReport): string {\n // Create a serializable version of the report\n const serializable = this.toSerializable(report);\n return stringify(serializable) ?? \"{}\";\n }\n\n /**\n * Writes report to a file\n */\n writeToFile(report: EvalReport, path: string): void {\n const json = this.format(report);\n writeFileSync(path, json, \"utf-8\");\n }\n\n /**\n * Converts report to a JSON-serializable format\n */\n private toSerializable(report: EvalReport): Record<string, unknown> {\n return {\n version: report.version,\n timestamp: report.timestamp,\n summary: report.summary,\n suites: report.suites.map((suite) => ({\n name: suite.name,\n passed: suite.passed,\n failed: suite.failed,\n errors: suite.errors,\n skipped: suite.skipped,\n duration: suite.duration,\n tests: suite.tests.map((test) => ({\n name: test.name,\n status: test.status,\n duration: test.duration,\n error: test.error\n ? {\n name: test.error.name,\n message: test.error.message,\n }\n : undefined,\n assertions: test.assertions.map((a) => ({\n type: a.type,\n passed: a.passed,\n message: a.message,\n expected: a.expected,\n actual: a.actual,\n field: a.field,\n class: a.class,\n })),\n fieldMetrics: test.fieldMetrics.map((fm) => ({\n field: fm.field,\n binarized: fm.binarized,\n binarizeThreshold: fm.binarizeThreshold,\n metrics: {\n accuracy: fm.metrics.accuracy,\n perClass: fm.metrics.perClass,\n macroAvg: fm.metrics.macroAvg,\n weightedAvg: fm.metrics.weightedAvg,\n confusionMatrix: {\n labels: fm.metrics.confusionMatrix.labels,\n matrix: fm.metrics.confusionMatrix.matrix,\n total: fm.metrics.confusionMatrix.total,\n },\n },\n })),\n })),\n })),\n integrity: report.integrity,\n };\n }\n}\n\n/**\n * Parses a JSON report back into an EvalReport\n */\nexport function parseReport(json: string): EvalReport {\n const data = JSON.parse(json) as EvalReport;\n return data;\n}\n","/**\n * Console Reporter - human-readable output\n */\n\nimport type { EvalReport, TestResult, FieldMetricResult } from \"../core/types.js\";\nimport { formatConfusionMatrix } from \"../statistics/confusion-matrix.js\";\n\n/**\n * ANSI color codes\n */\nconst colors = {\n reset: \"\\x1b[0m\",\n bold: \"\\x1b[1m\",\n dim: \"\\x1b[2m\",\n red: \"\\x1b[31m\",\n green: \"\\x1b[32m\",\n yellow: \"\\x1b[33m\",\n blue: \"\\x1b[34m\",\n cyan: \"\\x1b[36m\",\n gray: \"\\x1b[90m\",\n};\n\n/**\n * Status symbols\n */\nconst symbols = {\n pass: \"\\u2713\", // ✓\n fail: \"\\u2717\", // ✗\n error: \"!\",\n skip: \"-\",\n};\n\n/**\n * Console reporter for human-readable output\n */\nexport class ConsoleReporter {\n private useColors: boolean;\n\n constructor(useColors = true) {\n this.useColors = useColors && process.stdout.isTTY !== false;\n }\n\n /**\n * Prints the run header\n */\n printHeader(fileCount: number): void {\n this.log(\"\");\n this.log(this.color(\"bold\", `EvalSense v0.4.0`));\n this.log(this.color(\"dim\", `Running ${fileCount} eval file(s)...`));\n this.log(\"\");\n }\n\n /**\n * Prints the full report\n */\n printReport(report: EvalReport): void {\n // Print each suite\n for (const suite of report.suites) {\n this.printSuite(suite.name, suite.tests);\n }\n\n // Print summary\n this.printSummary(report);\n }\n\n /**\n * Prints a suite's results\n */\n private printSuite(name: string, tests: TestResult[]): void {\n this.log(this.color(\"bold\", ` ${name}`));\n this.log(\"\");\n\n for (const test of tests) {\n this.printTest(test);\n }\n\n this.log(\"\");\n }\n\n /**\n * Prints a single test result\n */\n private printTest(test: TestResult): void {\n const symbol = this.getStatusSymbol(test.status);\n const statusColor = this.getStatusColor(test.status);\n const duration = this.color(\"dim\", `(${test.duration}ms)`);\n\n this.log(` ${this.color(statusColor, symbol)} ${test.name} ${duration}`);\n\n // Print field metrics\n for (const fm of test.fieldMetrics) {\n this.printFieldMetrics(fm);\n }\n\n // Print confusion matrices if requested\n for (const fm of test.fieldMetrics) {\n if (fm.metrics.confusionMatrix && Object.keys(fm.metrics.confusionMatrix).length > 0) {\n this.printConfusionMatrix(fm);\n }\n }\n\n // Print error message (for both errors and assertion failures)\n if (test.error) {\n const prefix = test.status === \"error\" ? \"Error\" : \"Assertion Failed\";\n this.log(this.color(\"red\", ` ${prefix}: ${test.error.message}`));\n this.log(\"\");\n }\n\n // Print all assertions with actual vs expected\n for (const assertion of test.assertions) {\n const symbol = assertion.passed\n ? this.color(\"green\", symbols.pass)\n : this.color(\"red\", symbols.fail);\n const color = assertion.passed ? \"dim\" : \"red\";\n\n this.log(this.color(color, ` ${symbol} ${assertion.message}`));\n\n // Show expected/actual values for all assertions (pass and fail)\n if (assertion.expected !== undefined && assertion.actual !== undefined) {\n this.log(this.color(\"dim\", ` Expected: ${this.formatValue(assertion.expected)}`));\n this.log(this.color(\"dim\", ` Actual: ${this.formatValue(assertion.actual)}`));\n }\n }\n }\n\n /**\n * Prints field metrics summary\n */\n private printFieldMetrics(fm: FieldMetricResult): void {\n const { metrics, field, binarized, binarizeThreshold } = fm;\n\n const fieldLabel = binarized ? `${field} (binarized @ ${binarizeThreshold})` : field;\n\n this.log(\n this.color(\n \"cyan\",\n ` Field: ${fieldLabel} | Accuracy: ${this.pct(metrics.accuracy)} | F1: ${this.pct(metrics.macroAvg.f1)}`\n )\n );\n\n // Show per-class metrics if multiple classes\n if (Object.keys(metrics.perClass).length > 1) {\n for (const [cls, classMetrics] of Object.entries(metrics.perClass)) {\n this.log(\n this.color(\n \"dim\",\n ` ${cls}: P=${this.pct(classMetrics.precision)} R=${this.pct(classMetrics.recall)} F1=${this.pct(classMetrics.f1)} (n=${classMetrics.support})`\n )\n );\n }\n }\n }\n\n /**\n * Prints the summary\n */\n private printSummary(report: EvalReport): void {\n const { summary } = report;\n\n this.log(this.color(\"bold\", \" Summary\"));\n this.log(\"\");\n\n const passedStr = this.color(\"green\", `${summary.passed} passed`);\n const failedStr =\n summary.failed > 0\n ? this.color(\"red\", `${summary.failed} failed`)\n : `${summary.failed} failed`;\n const errorsStr =\n summary.errors > 0\n ? this.color(\"red\", `${summary.errors} errors`)\n : `${summary.errors} errors`;\n const skippedStr =\n summary.skipped > 0\n ? this.color(\"yellow\", `${summary.skipped} skipped`)\n : `${summary.skipped} skipped`;\n\n this.log(` Tests: ${passedStr}, ${failedStr}, ${errorsStr}, ${skippedStr}`);\n this.log(` Suites: ${summary.totalSuites}`);\n this.log(` Duration: ${this.formatDuration(summary.duration)}`);\n this.log(\"\");\n\n // Final status\n if (summary.failed === 0 && summary.errors === 0) {\n this.log(this.color(\"green\", \" All tests passed!\"));\n } else {\n this.log(this.color(\"red\", \" Some tests failed.\"));\n }\n\n this.log(\"\");\n }\n\n /**\n * Prints a confusion matrix\n */\n printConfusionMatrix(fm: FieldMetricResult): void {\n this.log(\"\");\n this.log(this.color(\"bold\", ` Confusion Matrix: ${fm.field}`));\n this.log(\"\");\n\n const matrixStr = formatConfusionMatrix(fm.metrics.confusionMatrix);\n for (const line of matrixStr.split(\"\\n\")) {\n this.log(` ${line}`);\n }\n\n this.log(\"\");\n }\n\n /**\n * Formats a percentage\n */\n private pct(value: number): string {\n return `${(value * 100).toFixed(1)}%`;\n }\n\n /**\n * Formats duration\n */\n private formatDuration(ms: number): string {\n if (ms < 1000) {\n return `${ms}ms`;\n }\n return `${(ms / 1000).toFixed(2)}s`;\n }\n\n /**\n * Gets status symbol\n */\n private getStatusSymbol(status: TestResult[\"status\"]): string {\n switch (status) {\n case \"passed\":\n return symbols.pass;\n case \"failed\":\n return symbols.fail;\n case \"error\":\n return symbols.error;\n case \"skipped\":\n return symbols.skip;\n }\n }\n\n /**\n * Gets status color\n */\n private getStatusColor(status: TestResult[\"status\"]): keyof typeof colors {\n switch (status) {\n case \"passed\":\n return \"green\";\n case \"failed\":\n return \"red\";\n case \"error\":\n return \"red\";\n case \"skipped\":\n return \"yellow\";\n }\n }\n\n /**\n * Applies color if enabled\n */\n private color(colorName: keyof typeof colors, text: string): string {\n if (!this.useColors) {\n return text;\n }\n return `${colors[colorName]}${text}${colors.reset}`;\n }\n\n /**\n * Formats a value for display\n */\n private formatValue(value: unknown): string {\n if (typeof value === \"number\") {\n // Format numbers as percentages if between 0 and 1\n if (value >= 0 && value <= 1) {\n return `${(value * 100).toFixed(1)}%`;\n }\n return value.toFixed(4);\n }\n if (typeof value === \"string\") {\n return `\"${value}\"`;\n }\n if (Array.isArray(value)) {\n return `[${value.join(\", \")}]`;\n }\n return String(value);\n }\n\n /**\n * Logs a line\n */\n private log(message: string): void {\n console.log(message);\n }\n}\n","/**\n * Test file discovery - finds *.eval.js files\n */\n\nimport { glob } from \"glob\";\nimport { resolve, dirname } from \"node:path\";\nimport { existsSync } from \"node:fs\";\n\n/**\n * Default patterns for eval files\n */\nexport const DEFAULT_PATTERNS = [\"**/*.eval.js\", \"**/*.eval.ts\", \"**/*.eval.mjs\"];\n\n/**\n * Patterns to ignore\n */\nexport const DEFAULT_IGNORE = [\"**/node_modules/**\", \"**/dist/**\", \"**/build/**\", \"**/.git/**\"];\n\n/**\n * Options for file discovery\n */\nexport interface DiscoveryOptions {\n /** Patterns to match (default: *.eval.{js,ts,mjs}) */\n patterns?: string[];\n /** Patterns to ignore */\n ignore?: string[];\n /** Base directory to search from */\n cwd?: string;\n /** Filter pattern for test names */\n filter?: string;\n}\n\n/**\n * Discovers eval files matching the patterns\n *\n * @param options - Discovery options\n * @returns Array of absolute file paths\n */\nexport async function discoverEvalFiles(options: DiscoveryOptions = {}): Promise<string[]> {\n const { patterns = DEFAULT_PATTERNS, ignore = DEFAULT_IGNORE, cwd = process.cwd() } = options;\n\n const files: string[] = [];\n\n for (const pattern of patterns) {\n const matches = await glob(pattern, {\n cwd,\n ignore,\n absolute: true,\n nodir: true,\n });\n files.push(...matches);\n }\n\n // Remove duplicates and sort\n const unique = [...new Set(files)].sort();\n\n return unique;\n}\n\n/**\n * Discovers eval files from a specific path (file or directory)\n */\nexport async function discoverFromPath(\n path: string,\n options: DiscoveryOptions = {}\n): Promise<string[]> {\n const absolutePath = resolve(process.cwd(), path);\n\n if (!existsSync(absolutePath)) {\n throw new Error(`Path does not exist: ${path}`);\n }\n\n // Check if it's a file\n const { statSync } = await import(\"node:fs\");\n const stat = statSync(absolutePath);\n\n if (stat.isFile()) {\n // Single file\n return [absolutePath];\n }\n\n // Directory - discover within it\n return discoverEvalFiles({\n ...options,\n cwd: absolutePath,\n });\n}\n\n/**\n * Filters file paths by a pattern\n */\nexport function filterFiles(files: string[], filter?: string): string[] {\n if (!filter) {\n return files;\n }\n\n const filterLower = filter.toLowerCase();\n return files.filter((file) => file.toLowerCase().includes(filterLower));\n}\n\n/**\n * Groups files by their directory\n */\nexport function groupByDirectory(files: string[]): Map<string, string[]> {\n const groups = new Map<string, string[]>();\n\n for (const file of files) {\n const dir = dirname(file);\n const existing = groups.get(dir) ?? [];\n existing.push(file);\n groups.set(dir, existing);\n }\n\n return groups;\n}\n","/**\n * Core type definitions for EvalSense\n */\n\n// ============================================================================\n// Alignment Types\n// ============================================================================\n\n/**\n * A record aligned between actual (model output) and expected (ground truth)\n */\nexport interface AlignedRecord {\n id: string;\n actual: Record<string, unknown>;\n expected: Record<string, unknown>;\n}\n\n/**\n * Output from runModel() - predictions with IDs for alignment\n */\nexport interface Prediction {\n id: string;\n [field: string]: unknown;\n}\n\n// ============================================================================\n// LLM Types\n// ============================================================================\n\n/**\n * JSON Schema for structured LLM outputs\n */\nexport interface JSONSchema {\n type: string;\n properties?: Record<string, unknown>;\n required?: string[];\n [key: string]: unknown;\n}\n\n/**\n * LLM client interface for metric evaluation\n */\nexport interface LLMClient {\n /**\n * Generate a text completion from a prompt\n */\n complete(prompt: string): Promise<string>;\n\n /**\n * Generate a structured JSON completion (optional)\n */\n completeStructured?<T>(prompt: string, schema: JSONSchema): Promise<T>;\n}\n\n// ============================================================================\n// Metric Types\n// ============================================================================\n\n/**\n * Output from an LLM metric evaluation\n */\nexport interface MetricOutput {\n id: string;\n metric: string;\n score: number;\n label?: string;\n\n /** LLM's reasoning/explanation (for LLM-based metrics) */\n reasoning?: string;\n\n /** Evaluation mode used (for LLM-based metrics) */\n evaluationMode?: \"per-row\" | \"batch\";\n}\n\n/**\n * Configuration for a metric function\n */\nexport interface MetricConfig {\n outputs: Array<{ id: string; output: string }>;\n context?: string[];\n query?: string[];\n source?: string[];\n\n /** LLM client override (defaults to global client) */\n llmClient?: LLMClient;\n\n /** Evaluation mode: per-row (accurate, expensive) or batch (cheaper, potentially less accurate) */\n evaluationMode?: \"per-row\" | \"batch\";\n\n /** Custom prompt template override */\n customPrompt?: string;\n\n /** LLM temperature (default: 0) */\n temperature?: number;\n\n /** Max tokens per completion */\n maxTokens?: number;\n\n /** Timeout in milliseconds */\n timeout?: number;\n}\n\n/**\n * A metric function that evaluates outputs\n */\nexport type MetricFn = (config: MetricConfig) => Promise<MetricOutput[]>;\n\n// ============================================================================\n// Statistics Types\n// ============================================================================\n\n/**\n * Confusion matrix with labels\n */\nexport interface ConfusionMatrix {\n matrix: number[][];\n labels: string[];\n total: number;\n}\n\n/**\n * Per-class classification metrics\n */\nexport interface ClassMetrics {\n precision: number;\n recall: number;\n f1: number;\n support: number;\n}\n\n/**\n * Full classification metrics result\n */\nexport interface ClassificationMetrics {\n accuracy: number;\n perClass: Record<string, ClassMetrics>;\n macroAvg: { precision: number; recall: number; f1: number };\n weightedAvg: { precision: number; recall: number; f1: number };\n confusionMatrix: ConfusionMatrix;\n}\n\n/**\n * Regression metrics result\n */\nexport interface RegressionMetrics {\n mae: number;\n mse: number;\n rmse: number;\n r2: number;\n}\n\n// ============================================================================\n// Field Evaluation Types\n// ============================================================================\n\n/**\n * Result of evaluating a single field across all predictions\n */\nexport interface FieldMetricResult {\n field: string;\n metrics: ClassificationMetrics;\n binarized: boolean;\n binarizeThreshold?: number;\n}\n\n// ============================================================================\n// Test & Suite Types\n// ============================================================================\n\n/**\n * Test function signature\n */\nexport type TestFn = () => Promise<void> | void;\n\n/**\n * An individual eval test\n */\nexport interface EvalTest {\n name: string;\n fn: TestFn;\n}\n\n/**\n * A test suite (describe block)\n */\nexport interface Suite {\n name: string;\n tests: EvalTest[];\n beforeAll?: TestFn[];\n afterAll?: TestFn[];\n beforeEach?: TestFn[];\n afterEach?: TestFn[];\n}\n\n/**\n * Current test execution context\n */\nexport interface TestContext {\n currentSuite: Suite | null;\n suites: Suite[];\n results: SuiteResult[];\n}\n\n// ============================================================================\n// Assertion Types\n// ============================================================================\n\n/**\n * Result of a single assertion\n */\nexport interface AssertionResult {\n type: string;\n passed: boolean;\n message: string;\n expected?: unknown;\n actual?: unknown;\n field?: string;\n class?: string;\n}\n\n// ============================================================================\n// Result & Report Types\n// ============================================================================\n\n/**\n * Result of a single test\n */\nexport interface TestResult {\n name: string;\n status: \"passed\" | \"failed\" | \"error\" | \"skipped\";\n assertions: AssertionResult[];\n fieldMetrics: FieldMetricResult[];\n duration: number;\n error?: Error;\n}\n\n/**\n * Result of a test suite\n */\nexport interface SuiteResult {\n name: string;\n tests: TestResult[];\n passed: number;\n failed: number;\n errors: number;\n skipped: number;\n duration: number;\n}\n\n/**\n * Integrity check results for a dataset\n */\nexport interface IntegrityResult {\n valid: boolean;\n totalRecords: number;\n missingIds: string[];\n duplicateIds: string[];\n missingFields: Array<{ id: string; fields: string[] }>;\n}\n\n/**\n * Final evaluation report\n */\nexport interface EvalReport {\n version: string;\n timestamp: string;\n suites: SuiteResult[];\n summary: {\n totalSuites: number;\n totalTests: number;\n passed: number;\n failed: number;\n errors: number;\n skipped: number;\n duration: number;\n };\n integrity?: IntegrityResult;\n}\n\n// ============================================================================\n// CLI Types\n// ============================================================================\n\n/**\n * CLI configuration options\n */\nexport interface CLIOptions {\n filter?: string;\n output?: string;\n reporter?: \"json\" | \"console\" | \"both\";\n bail?: boolean;\n timeout?: number;\n}\n\n/**\n * Exit codes for CI integration\n */\nexport const ExitCodes = {\n SUCCESS: 0,\n ASSERTION_FAILURE: 1,\n INTEGRITY_FAILURE: 2,\n EXECUTION_ERROR: 3,\n CONFIGURATION_ERROR: 4,\n} as const;\n\nexport type ExitCode = (typeof ExitCodes)[keyof typeof ExitCodes];\n","/**\n * Test context management for EvalSense\n * Manages the global state during test execution\n */\n\nimport type { Suite, EvalTest, TestContext, AssertionResult, FieldMetricResult } from \"./types.js\";\n\n/**\n * Global test context - singleton for the current test run\n */\nlet globalContext: TestContext = createEmptyContext();\n\n/**\n * Current test execution state\n */\ninterface CurrentTestState {\n assertions: AssertionResult[];\n fieldMetrics: FieldMetricResult[];\n}\n\nlet currentTestState: CurrentTestState | null = null;\n\n/**\n * Creates an empty test context\n */\nexport function createEmptyContext(): TestContext {\n return {\n currentSuite: null,\n suites: [],\n results: [],\n };\n}\n\n/**\n * Gets the global test context\n */\nexport function getContext(): TestContext {\n return globalContext;\n}\n\n/**\n * Resets the global test context (used between test runs)\n */\nexport function resetContext(): void {\n globalContext = createEmptyContext();\n currentTestState = null;\n}\n\n/**\n * Gets the current suite being defined\n */\nexport function getCurrentSuite(): Suite | null {\n return globalContext.currentSuite;\n}\n\n/**\n * Sets the current suite being defined\n */\nexport function setCurrentSuite(suite: Suite | null): void {\n globalContext.currentSuite = suite;\n}\n\n/**\n * Adds a suite to the context\n */\nexport function addSuite(suite: Suite): void {\n globalContext.suites.push(suite);\n}\n\n/**\n * Adds a test to the current suite\n */\nexport function addTestToCurrentSuite(test: EvalTest): void {\n if (!globalContext.currentSuite) {\n throw new Error(\"Cannot add test outside of a describe() block\");\n }\n globalContext.currentSuite.tests.push(test);\n}\n\n/**\n * Gets all registered suites\n */\nexport function getSuites(): Suite[] {\n return globalContext.suites;\n}\n\n/**\n * Starts a new test execution (for collecting assertions)\n */\nexport function startTestExecution(): void {\n currentTestState = {\n assertions: [],\n fieldMetrics: [],\n };\n}\n\n/**\n * Ends the current test execution and returns collected data\n */\nexport function endTestExecution(): CurrentTestState {\n const state = currentTestState;\n currentTestState = null;\n return state ?? { assertions: [], fieldMetrics: [] };\n}\n\n/**\n * Records an assertion result in the current test\n */\nexport function recordAssertion(result: AssertionResult): void {\n if (currentTestState) {\n currentTestState.assertions.push(result);\n }\n}\n\n/**\n * Records field metrics in the current test\n */\nexport function recordFieldMetrics(metrics: FieldMetricResult): void {\n if (currentTestState) {\n currentTestState.fieldMetrics.push(metrics);\n }\n}\n\n/**\n * Checks if we're currently executing a test\n */\nexport function isInTestExecution(): boolean {\n return currentTestState !== null;\n}\n\n/**\n * Gets the current test state (for assertions to check)\n */\nexport function getCurrentTestState(): CurrentTestState | null {\n return currentTestState;\n}\n","/**\n * Test executor - runs discovered eval files\n */\n\nimport { pathToFileURL } from \"node:url\";\nimport type { Suite, TestResult, SuiteResult, EvalReport } from \"../core/types.js\";\nimport { ExitCodes } from \"../core/types.js\";\nimport { getSuites, startTestExecution, endTestExecution } from \"../core/context.js\";\nimport { AssertionError, TestExecutionError, MultipleAssertionError } from \"../core/errors.js\";\n\n/**\n * Options for test execution\n */\nexport interface ExecutorOptions {\n /** Stop on first failure */\n bail?: boolean;\n /** Test timeout in ms */\n timeout?: number;\n /** Filter pattern for test names */\n filter?: string;\n}\n\n/**\n * Executes all eval files and returns results\n */\nexport async function executeEvalFiles(\n files: string[],\n options: ExecutorOptions = {}\n): Promise<EvalReport> {\n const startTime = Date.now();\n const suiteResults: SuiteResult[] = [];\n\n // Load all eval files (this registers suites via describe())\n // Note: We don't reset context here because in CLI usage, each run\n // is a fresh Node process. For programmatic usage, call resetContext()\n // before calling this function if you need a clean slate.\n for (const file of files) {\n try {\n const fileUrl = pathToFileURL(file).href;\n await import(fileUrl);\n } catch (error) {\n throw new TestExecutionError(`Failed to load eval file: ${file}`, file, error as Error);\n }\n }\n\n // Get all registered suites\n const suites = getSuites();\n\n // Execute each suite\n for (const suite of suites) {\n const result = await executeSuite(suite, options);\n suiteResults.push(result);\n\n if (options.bail && result.failed > 0) {\n break;\n }\n }\n\n // Build report\n const report = buildReport(suiteResults, Date.now() - startTime);\n\n return report;\n}\n\n/**\n * Executes a single suite\n */\nasync function executeSuite(suite: Suite, options: ExecutorOptions): Promise<SuiteResult> {\n const startTime = Date.now();\n const testResults: TestResult[] = [];\n let passed = 0;\n let failed = 0;\n let errors = 0;\n let skipped = 0;\n\n // Run beforeAll hooks\n for (const hook of suite.beforeAll ?? []) {\n try {\n await hook();\n } catch (error) {\n // beforeAll failure fails all tests\n const message = error instanceof Error ? error.message : String(error);\n for (const test of suite.tests) {\n testResults.push({\n name: test.name,\n status: \"error\",\n assertions: [],\n fieldMetrics: [],\n duration: 0,\n error: new Error(`beforeAll hook failed: ${message}`),\n });\n errors++;\n }\n return {\n name: suite.name,\n tests: testResults,\n passed,\n failed,\n errors,\n skipped,\n duration: Date.now() - startTime,\n };\n }\n }\n\n // Run each test\n for (const test of suite.tests) {\n // Check filter\n if (options.filter && !test.name.toLowerCase().includes(options.filter.toLowerCase())) {\n testResults.push({\n name: test.name,\n status: \"skipped\",\n assertions: [],\n fieldMetrics: [],\n duration: 0,\n });\n skipped++;\n continue;\n }\n\n // Check if skipped\n if (test.name.startsWith(\"[SKIPPED]\")) {\n testResults.push({\n name: test.name,\n status: \"skipped\",\n assertions: [],\n fieldMetrics: [],\n duration: 0,\n });\n skipped++;\n continue;\n }\n\n // Run beforeEach hooks\n for (const hook of suite.beforeEach ?? []) {\n try {\n await hook();\n } catch (error) {\n const message = error instanceof Error ? error.message : String(error);\n testResults.push({\n name: test.name,\n status: \"error\",\n assertions: [],\n fieldMetrics: [],\n duration: 0,\n error: new Error(`beforeEach hook failed: ${message}`),\n });\n errors++;\n continue;\n }\n }\n\n // Execute the test\n const result = await executeTest(test.name, test.fn, options.timeout);\n testResults.push(result);\n\n if (result.status === \"passed\") {\n passed++;\n } else if (result.status === \"failed\") {\n failed++;\n } else if (result.status === \"error\") {\n errors++;\n }\n\n // Run afterEach hooks\n for (const hook of suite.afterEach ?? []) {\n try {\n await hook();\n } catch {\n // Log but don't fail the test\n }\n }\n\n // Bail on failure\n if (options.bail && (failed > 0 || errors > 0)) {\n break;\n }\n }\n\n // Run afterAll hooks\n for (const hook of suite.afterAll ?? []) {\n try {\n await hook();\n } catch {\n // Log but don't fail\n }\n }\n\n return {\n name: suite.name,\n tests: testResults,\n passed,\n failed,\n errors,\n skipped,\n duration: Date.now() - startTime,\n };\n}\n\n/**\n * Executes a single test with timeout\n */\nasync function executeTest(\n name: string,\n fn: () => Promise<void> | void,\n timeout = 30000\n): Promise<TestResult> {\n const startTime = Date.now();\n\n // Start collecting assertions\n startTestExecution();\n\n try {\n // Run with timeout\n await Promise.race([\n fn(),\n new Promise<never>((_, reject) =>\n setTimeout(() => reject(new Error(`Test timed out after ${timeout}ms`)), timeout)\n ),\n ]);\n\n // Test completed - collect results and check for assertion failures\n const { assertions, fieldMetrics } = endTestExecution();\n\n // Check if any assertions failed\n const failedAssertions = assertions.filter((a) => !a.passed);\n\n if (failedAssertions.length > 0) {\n // Multiple assertion failures - throw combined error\n const error = new MultipleAssertionError(\n failedAssertions.map((a) => ({\n message: a.message,\n expected: a.expected,\n actual: a.actual,\n field: a.field,\n }))\n );\n\n return {\n name,\n status: \"failed\",\n assertions,\n fieldMetrics,\n duration: Date.now() - startTime,\n error,\n };\n }\n\n // All assertions passed\n return {\n name,\n status: \"passed\",\n assertions,\n fieldMetrics,\n duration: Date.now() - startTime,\n };\n } catch (error) {\n const { assertions, fieldMetrics } = endTestExecution();\n\n if (error instanceof AssertionError || error instanceof MultipleAssertionError) {\n // Assertion failure\n return {\n name,\n status: \"failed\",\n assertions,\n fieldMetrics,\n duration: Date.now() - startTime,\n error,\n };\n }\n\n // Execution error\n return {\n name,\n status: \"error\",\n assertions,\n fieldMetrics,\n duration: Date.now() - startTime,\n error: error instanceof Error ? error : new Error(String(error)),\n };\n }\n}\n\n/**\n * Builds the final report from suite results\n */\nfunction buildReport(suiteResults: SuiteResult[], totalDuration: number): EvalReport {\n let totalTests = 0;\n let totalPassed = 0;\n let totalFailed = 0;\n let totalErrors = 0;\n let totalSkipped = 0;\n\n for (const suite of suiteResults) {\n totalTests += suite.tests.length;\n totalPassed += suite.passed;\n totalFailed += suite.failed;\n totalErrors += suite.errors;\n totalSkipped += suite.skipped;\n }\n\n return {\n version: \"1.0.0\",\n timestamp: new Date().toISOString(),\n suites: suiteResults,\n summary: {\n totalSuites: suiteResults.length,\n totalTests,\n passed: totalPassed,\n failed: totalFailed,\n errors: totalErrors,\n skipped: totalSkipped,\n duration: totalDuration,\n },\n };\n}\n\n/**\n * Determines exit code from report\n */\nexport function getExitCode(report: EvalReport): number {\n if (report.summary.errors > 0) {\n return ExitCodes.EXECUTION_ERROR;\n }\n if (report.summary.failed > 0) {\n return ExitCodes.ASSERTION_FAILURE;\n }\n return ExitCodes.SUCCESS;\n}\n"]}
|