@m4trix/evals 0.18.0 → 0.19.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli-simple.cjs +91 -29
- package/dist/cli-simple.cjs.map +1 -1
- package/dist/cli-simple.js +91 -29
- package/dist/cli-simple.js.map +1 -1
- package/dist/cli.cjs +61 -19
- package/dist/cli.cjs.map +1 -1
- package/dist/cli.js +61 -19
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +58 -17
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.ts +13 -8
- package/dist/index.js +58 -17
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.cjs
CHANGED
|
@@ -523,7 +523,11 @@ var Metric = {
|
|
|
523
523
|
name: config.name,
|
|
524
524
|
aggregate: config.aggregate,
|
|
525
525
|
format: config.format,
|
|
526
|
-
make: (data) => ({
|
|
526
|
+
make: (data, options) => ({
|
|
527
|
+
id: config.id,
|
|
528
|
+
data,
|
|
529
|
+
...options?.name !== void 0 && { name: options.name }
|
|
530
|
+
})
|
|
527
531
|
};
|
|
528
532
|
registry.set(config.id, def);
|
|
529
533
|
return def;
|
|
@@ -545,25 +549,61 @@ var ScoreAggregate = {
|
|
|
545
549
|
const count = values.length || 1;
|
|
546
550
|
const result = {};
|
|
547
551
|
for (const field of fields) {
|
|
548
|
-
result[field] = values.reduce(
|
|
552
|
+
result[field] = values.reduce(
|
|
553
|
+
(s, v) => s + (v[field] ?? 0),
|
|
554
|
+
0
|
|
555
|
+
) / count;
|
|
549
556
|
}
|
|
550
557
|
return result;
|
|
551
558
|
};
|
|
552
559
|
},
|
|
553
|
-
/** Average
|
|
554
|
-
averageWithVariance(
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
560
|
+
/** Average selected numeric fields, with sample std dev tracked for `value`. */
|
|
561
|
+
averageWithVariance(fields) {
|
|
562
|
+
return (values) => {
|
|
563
|
+
const count = values.length;
|
|
564
|
+
const result = {};
|
|
565
|
+
for (const field of fields) {
|
|
566
|
+
result[field] = count === 0 ? 0 : values.reduce(
|
|
567
|
+
(sum, item) => sum + (item[field] ?? 0),
|
|
568
|
+
0
|
|
569
|
+
) / count;
|
|
570
|
+
}
|
|
571
|
+
const valueField = "value";
|
|
572
|
+
const hasValueField = fields.includes(valueField);
|
|
573
|
+
if (count === 0) {
|
|
574
|
+
if (hasValueField) {
|
|
575
|
+
result[valueField] = 0;
|
|
576
|
+
}
|
|
577
|
+
return {
|
|
578
|
+
...result,
|
|
579
|
+
stdDev: void 0,
|
|
580
|
+
count: 0
|
|
581
|
+
};
|
|
582
|
+
}
|
|
583
|
+
let stdDev;
|
|
584
|
+
if (hasValueField && count >= 2) {
|
|
585
|
+
const sum = values.reduce(
|
|
586
|
+
(s, v) => s + (v[valueField] ?? 0),
|
|
587
|
+
0
|
|
588
|
+
);
|
|
589
|
+
const sumSq = values.reduce(
|
|
590
|
+
(s, v) => {
|
|
591
|
+
const value = v[valueField] ?? 0;
|
|
592
|
+
return s + value * value;
|
|
593
|
+
},
|
|
594
|
+
0
|
|
595
|
+
);
|
|
596
|
+
const mean = sum / count;
|
|
597
|
+
const variance = (sumSq - count * mean * mean) / (count - 1);
|
|
598
|
+
stdDev = variance > 0 ? Math.sqrt(variance) : 0;
|
|
599
|
+
}
|
|
600
|
+
return {
|
|
601
|
+
...values[0],
|
|
602
|
+
...result,
|
|
603
|
+
stdDev,
|
|
604
|
+
count
|
|
605
|
+
};
|
|
606
|
+
};
|
|
567
607
|
},
|
|
568
608
|
/** All runs must pass. Use for binary scores. */
|
|
569
609
|
all(values) {
|
|
@@ -597,6 +637,7 @@ var Score = {
|
|
|
597
637
|
id: config.id,
|
|
598
638
|
data,
|
|
599
639
|
...passed !== void 0 && { passed },
|
|
640
|
+
...options?.name !== void 0 && { name: options.name },
|
|
600
641
|
def
|
|
601
642
|
// Attach def so rendering/aggregation works without registry lookup
|
|
602
643
|
};
|
|
@@ -665,7 +706,7 @@ var percentScore = Score.of({
|
|
|
665
706
|
displayStrategy: "bar",
|
|
666
707
|
formatValue: (data) => data.value.toFixed(2),
|
|
667
708
|
formatAggregate: (data) => data.stdDev != null ? `Avg: ${data.value.toFixed(2)} \xB1 ${data.stdDev.toFixed(2)}` : `Avg: ${data.value.toFixed(2)}`,
|
|
668
|
-
aggregateValues: Score.aggregate.averageWithVariance
|
|
709
|
+
aggregateValues: Score.aggregate.averageWithVariance(["value"])
|
|
669
710
|
});
|
|
670
711
|
var deltaScore = Score.of({
|
|
671
712
|
id: "delta",
|