@m4trix/evals 0.15.0 → 0.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli-simple.cjs +36 -27
- package/dist/cli-simple.cjs.map +1 -1
- package/dist/cli-simple.js +36 -27
- package/dist/cli-simple.js.map +1 -1
- package/dist/cli.cjs +48 -26
- package/dist/cli.cjs.map +1 -1
- package/dist/cli.js +48 -26
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +35 -27
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.ts +49 -4
- package/dist/index.js +34 -28
- package/dist/index.js.map +1 -1
- package/package.json +3 -2
package/dist/index.cjs
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
'use strict';
|
|
2
2
|
|
|
3
3
|
var effect = require('effect');
|
|
4
|
-
var
|
|
4
|
+
var jsonDiff = require('json-diff');
|
|
5
5
|
var crypto = require('crypto');
|
|
6
6
|
var fs = require('fs');
|
|
7
7
|
var path = require('path');
|
|
@@ -660,46 +660,48 @@ var binaryScore = Score.of({
|
|
|
660
660
|
},
|
|
661
661
|
aggregate: aggregateAll
|
|
662
662
|
});
|
|
663
|
-
function
|
|
663
|
+
function createDiffString(expected, actual, diffOptions) {
|
|
664
|
+
const opts = { ...diffOptions, color: false };
|
|
665
|
+
const result = jsonDiff.diffString(expected, actual, opts);
|
|
666
|
+
return typeof result === "string" ? result : "";
|
|
667
|
+
}
|
|
668
|
+
function formatLogMessage(msg) {
|
|
669
|
+
if (typeof msg === "string")
|
|
670
|
+
return msg;
|
|
664
671
|
try {
|
|
665
|
-
|
|
672
|
+
if (msg !== null && typeof msg === "object") {
|
|
673
|
+
return JSON.stringify(msg, null, 2);
|
|
674
|
+
}
|
|
675
|
+
return String(msg);
|
|
666
676
|
} catch {
|
|
667
|
-
return String(
|
|
677
|
+
return String(msg);
|
|
668
678
|
}
|
|
669
679
|
}
|
|
670
|
-
function
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
partLines.pop();
|
|
677
|
-
}
|
|
678
|
-
for (const line of partLines) {
|
|
679
|
-
lines.push(`${prefix} ${line}`);
|
|
680
|
-
}
|
|
681
|
-
}
|
|
682
|
-
return lines.join("\n");
|
|
680
|
+
function createLogEntry(message, options) {
|
|
681
|
+
return {
|
|
682
|
+
type: "log",
|
|
683
|
+
label: options?.label,
|
|
684
|
+
message: formatLogMessage(message)
|
|
685
|
+
};
|
|
683
686
|
}
|
|
684
|
-
function
|
|
685
|
-
|
|
686
|
-
const actualStr = toJsonLines(actual);
|
|
687
|
-
const changes = diff.diffLines(expectedStr, actualStr);
|
|
688
|
-
return formatDiffString(changes);
|
|
687
|
+
function getLogLines(entry) {
|
|
688
|
+
return entry.message.split("\n");
|
|
689
689
|
}
|
|
690
690
|
function createDiffLogEntry(expected, actual, options) {
|
|
691
|
-
const
|
|
691
|
+
const { label, ...diffOpts } = options ?? {};
|
|
692
|
+
const diff = createDiffString(expected, actual, diffOpts);
|
|
692
693
|
return {
|
|
693
694
|
type: "diff",
|
|
694
|
-
label
|
|
695
|
+
label,
|
|
695
696
|
expected,
|
|
696
697
|
actual,
|
|
697
698
|
diff: diff || "(no differences)"
|
|
698
699
|
};
|
|
699
700
|
}
|
|
700
701
|
function printJsonDiff(expected, actual, options = {}) {
|
|
701
|
-
const
|
|
702
|
-
|
|
702
|
+
const { color = true, ...diffOpts } = options;
|
|
703
|
+
const diff = createDiffString(expected, actual, diffOpts);
|
|
704
|
+
if (color) {
|
|
703
705
|
const lines = diff.split("\n").map((line) => {
|
|
704
706
|
const trimmed = line.trimStart();
|
|
705
707
|
if (trimmed.startsWith("-") && !trimmed.startsWith("---")) {
|
|
@@ -1069,6 +1071,9 @@ function processOneTestCase(task, testCaseItem, totalEvaluations, publishEvent,
|
|
|
1069
1071
|
const logDiff = (expected, actual, options) => {
|
|
1070
1072
|
logs.push(createDiffLogEntry(expected, actual, options));
|
|
1071
1073
|
};
|
|
1074
|
+
const log = (message, options) => {
|
|
1075
|
+
logs.push(createLogEntry(message, options));
|
|
1076
|
+
};
|
|
1072
1077
|
const ctx = yield* effect.Effect.promise(
|
|
1073
1078
|
() => Promise.resolve(evaluator.resolveContext())
|
|
1074
1079
|
);
|
|
@@ -1078,7 +1083,8 @@ function processOneTestCase(task, testCaseItem, totalEvaluations, publishEvent,
|
|
|
1078
1083
|
input: testCaseItem.testCase.getInput(),
|
|
1079
1084
|
ctx,
|
|
1080
1085
|
output,
|
|
1081
|
-
logDiff
|
|
1086
|
+
logDiff,
|
|
1087
|
+
log
|
|
1082
1088
|
})
|
|
1083
1089
|
)
|
|
1084
1090
|
);
|
|
@@ -1660,9 +1666,11 @@ exports.Metric = Metric;
|
|
|
1660
1666
|
exports.Score = Score;
|
|
1661
1667
|
exports.TestCase = TestCase;
|
|
1662
1668
|
exports.binaryScore = binaryScore;
|
|
1669
|
+
exports.createLogEntry = createLogEntry;
|
|
1663
1670
|
exports.createRunner = createRunner;
|
|
1664
1671
|
exports.defaultRunnerConfig = defaultRunnerConfig;
|
|
1665
1672
|
exports.defineConfig = defineConfig;
|
|
1673
|
+
exports.getLogLines = getLogLines;
|
|
1666
1674
|
exports.getMetricById = getMetricById;
|
|
1667
1675
|
exports.getScoreById = getScoreById;
|
|
1668
1676
|
exports.latencyMetric = latencyMetric;
|