@m4trix/evals 0.15.0 → 0.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli-simple.cjs +36 -27
- package/dist/cli-simple.cjs.map +1 -1
- package/dist/cli-simple.js +36 -27
- package/dist/cli-simple.js.map +1 -1
- package/dist/cli.cjs +48 -26
- package/dist/cli.cjs.map +1 -1
- package/dist/cli.js +48 -26
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +35 -27
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.ts +49 -4
- package/dist/index.js +34 -28
- package/dist/index.js.map +1 -1
- package/package.json +3 -2
package/dist/cli.cjs
CHANGED
|
@@ -13,7 +13,7 @@ var fs = require('fs');
|
|
|
13
13
|
var jitiModule = require('jiti');
|
|
14
14
|
var promises = require('fs/promises');
|
|
15
15
|
var url = require('url');
|
|
16
|
-
var
|
|
16
|
+
var jsonDiff = require('json-diff');
|
|
17
17
|
|
|
18
18
|
var _documentCurrentScript = typeof document !== 'undefined' ? document.currentScript : null;
|
|
19
19
|
function _interopDefault (e) { return e && e.__esModule ? e : { default: e }; }
|
|
@@ -1004,45 +1004,46 @@ async function collectTestCasesFromFiles(config) {
|
|
|
1004
1004
|
);
|
|
1005
1005
|
return found.flat();
|
|
1006
1006
|
}
|
|
1007
|
-
function
|
|
1007
|
+
function createDiffString(expected, actual, diffOptions) {
|
|
1008
|
+
const opts = { ...diffOptions, color: false };
|
|
1009
|
+
const result = jsonDiff.diffString(expected, actual, opts);
|
|
1010
|
+
return typeof result === "string" ? result : "";
|
|
1011
|
+
}
|
|
1012
|
+
function formatLogMessage(msg) {
|
|
1013
|
+
if (typeof msg === "string")
|
|
1014
|
+
return msg;
|
|
1008
1015
|
try {
|
|
1009
|
-
|
|
1016
|
+
if (msg !== null && typeof msg === "object") {
|
|
1017
|
+
return JSON.stringify(msg, null, 2);
|
|
1018
|
+
}
|
|
1019
|
+
return String(msg);
|
|
1010
1020
|
} catch {
|
|
1011
|
-
return String(
|
|
1021
|
+
return String(msg);
|
|
1012
1022
|
}
|
|
1013
1023
|
}
|
|
1014
|
-
function
|
|
1015
|
-
|
|
1016
|
-
|
|
1017
|
-
|
|
1018
|
-
|
|
1019
|
-
|
|
1020
|
-
partLines.pop();
|
|
1021
|
-
}
|
|
1022
|
-
for (const line of partLines) {
|
|
1023
|
-
lines.push(`${prefix} ${line}`);
|
|
1024
|
-
}
|
|
1025
|
-
}
|
|
1026
|
-
return lines.join("\n");
|
|
1024
|
+
function createLogEntry(message, options) {
|
|
1025
|
+
return {
|
|
1026
|
+
type: "log",
|
|
1027
|
+
label: options?.label,
|
|
1028
|
+
message: formatLogMessage(message)
|
|
1029
|
+
};
|
|
1027
1030
|
}
|
|
1028
|
-
function
|
|
1029
|
-
|
|
1030
|
-
const actualStr = toJsonLines(actual);
|
|
1031
|
-
const changes = diff.diffLines(expectedStr, actualStr);
|
|
1032
|
-
return formatDiffString(changes);
|
|
1031
|
+
function getLogLines(entry) {
|
|
1032
|
+
return entry.message.split("\n");
|
|
1033
1033
|
}
|
|
1034
1034
|
function createDiffLogEntry(expected, actual, options) {
|
|
1035
|
-
const
|
|
1035
|
+
const { label, ...diffOpts } = options ?? {};
|
|
1036
|
+
const diff = createDiffString(expected, actual, diffOpts);
|
|
1036
1037
|
return {
|
|
1037
1038
|
type: "diff",
|
|
1038
|
-
label
|
|
1039
|
+
label,
|
|
1039
1040
|
expected,
|
|
1040
1041
|
actual,
|
|
1041
1042
|
diff: diff || "(no differences)"
|
|
1042
1043
|
};
|
|
1043
1044
|
}
|
|
1044
1045
|
function getDiffLines(entry) {
|
|
1045
|
-
const raw =
|
|
1046
|
+
const raw = entry.diff || "(no differences)";
|
|
1046
1047
|
return raw.split("\n").map((line) => {
|
|
1047
1048
|
const trimmed = line.trimStart();
|
|
1048
1049
|
if (trimmed.startsWith("-") && !trimmed.startsWith("---")) {
|
|
@@ -1300,6 +1301,9 @@ function processOneTestCase(task, testCaseItem, totalEvaluations, publishEvent,
|
|
|
1300
1301
|
const logDiff = (expected, actual, options) => {
|
|
1301
1302
|
logs.push(createDiffLogEntry(expected, actual, options));
|
|
1302
1303
|
};
|
|
1304
|
+
const log = (message, options) => {
|
|
1305
|
+
logs.push(createLogEntry(message, options));
|
|
1306
|
+
};
|
|
1303
1307
|
const ctx = yield* effect.Effect.promise(
|
|
1304
1308
|
() => Promise.resolve(evaluator.resolveContext())
|
|
1305
1309
|
);
|
|
@@ -1309,7 +1313,8 @@ function processOneTestCase(task, testCaseItem, totalEvaluations, publishEvent,
|
|
|
1309
1313
|
input: testCaseItem.testCase.getInput(),
|
|
1310
1314
|
ctx,
|
|
1311
1315
|
output,
|
|
1312
|
-
logDiff
|
|
1316
|
+
logDiff,
|
|
1317
|
+
log
|
|
1313
1318
|
})
|
|
1314
1319
|
)
|
|
1315
1320
|
);
|
|
@@ -2386,6 +2391,23 @@ function buildDetailRows(run, testCases, evaluatorNameById) {
|
|
|
2386
2391
|
)
|
|
2387
2392
|
);
|
|
2388
2393
|
}
|
|
2394
|
+
} else if (log.type === "log") {
|
|
2395
|
+
const logLines = getLogLines(log);
|
|
2396
|
+
for (let lineIdx = 0; lineIdx < logLines.length; lineIdx++) {
|
|
2397
|
+
rows.push(
|
|
2398
|
+
/* @__PURE__ */ jsxRuntime.jsxs(
|
|
2399
|
+
ink.Text,
|
|
2400
|
+
{
|
|
2401
|
+
color: "gray",
|
|
2402
|
+
children: [
|
|
2403
|
+
" ",
|
|
2404
|
+
logLines[lineIdx]
|
|
2405
|
+
]
|
|
2406
|
+
},
|
|
2407
|
+
`tc-${tc.testCaseId}-${item.evaluatorId}-${logIdx}-${lineIdx}`
|
|
2408
|
+
)
|
|
2409
|
+
);
|
|
2410
|
+
}
|
|
2389
2411
|
}
|
|
2390
2412
|
}
|
|
2391
2413
|
}
|