@m4trix/evals 0.15.0 → 0.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli-simple.cjs +36 -27
- package/dist/cli-simple.cjs.map +1 -1
- package/dist/cli-simple.js +36 -27
- package/dist/cli-simple.js.map +1 -1
- package/dist/cli.cjs +48 -26
- package/dist/cli.cjs.map +1 -1
- package/dist/cli.js +48 -26
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +35 -27
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.ts +49 -4
- package/dist/index.js +34 -28
- package/dist/index.js.map +1 -1
- package/package.json +3 -2
package/dist/cli.js
CHANGED
|
@@ -11,7 +11,7 @@ import { existsSync } from 'fs';
|
|
|
11
11
|
import * as jitiModule from 'jiti';
|
|
12
12
|
import { readdir, readFile, mkdir, appendFile } from 'fs/promises';
|
|
13
13
|
import { pathToFileURL } from 'url';
|
|
14
|
-
import {
|
|
14
|
+
import { diffString } from 'json-diff';
|
|
15
15
|
|
|
16
16
|
var SEP = " ";
|
|
17
17
|
var ARROW = "\u203A";
|
|
@@ -978,45 +978,46 @@ async function collectTestCasesFromFiles(config) {
|
|
|
978
978
|
);
|
|
979
979
|
return found.flat();
|
|
980
980
|
}
|
|
981
|
-
function
|
|
981
|
+
function createDiffString(expected, actual, diffOptions) {
|
|
982
|
+
const opts = { ...diffOptions, color: false };
|
|
983
|
+
const result = diffString(expected, actual, opts);
|
|
984
|
+
return typeof result === "string" ? result : "";
|
|
985
|
+
}
|
|
986
|
+
function formatLogMessage(msg) {
|
|
987
|
+
if (typeof msg === "string")
|
|
988
|
+
return msg;
|
|
982
989
|
try {
|
|
983
|
-
|
|
990
|
+
if (msg !== null && typeof msg === "object") {
|
|
991
|
+
return JSON.stringify(msg, null, 2);
|
|
992
|
+
}
|
|
993
|
+
return String(msg);
|
|
984
994
|
} catch {
|
|
985
|
-
return String(
|
|
995
|
+
return String(msg);
|
|
986
996
|
}
|
|
987
997
|
}
|
|
988
|
-
function
|
|
989
|
-
|
|
990
|
-
|
|
991
|
-
|
|
992
|
-
|
|
993
|
-
|
|
994
|
-
partLines.pop();
|
|
995
|
-
}
|
|
996
|
-
for (const line of partLines) {
|
|
997
|
-
lines.push(`${prefix} ${line}`);
|
|
998
|
-
}
|
|
999
|
-
}
|
|
1000
|
-
return lines.join("\n");
|
|
998
|
+
function createLogEntry(message, options) {
|
|
999
|
+
return {
|
|
1000
|
+
type: "log",
|
|
1001
|
+
label: options?.label,
|
|
1002
|
+
message: formatLogMessage(message)
|
|
1003
|
+
};
|
|
1001
1004
|
}
|
|
1002
|
-
function
|
|
1003
|
-
|
|
1004
|
-
const actualStr = toJsonLines(actual);
|
|
1005
|
-
const changes = diffLines(expectedStr, actualStr);
|
|
1006
|
-
return formatDiffString(changes);
|
|
1005
|
+
function getLogLines(entry) {
|
|
1006
|
+
return entry.message.split("\n");
|
|
1007
1007
|
}
|
|
1008
1008
|
function createDiffLogEntry(expected, actual, options) {
|
|
1009
|
-
const
|
|
1009
|
+
const { label, ...diffOpts } = options ?? {};
|
|
1010
|
+
const diff = createDiffString(expected, actual, diffOpts);
|
|
1010
1011
|
return {
|
|
1011
1012
|
type: "diff",
|
|
1012
|
-
label
|
|
1013
|
+
label,
|
|
1013
1014
|
expected,
|
|
1014
1015
|
actual,
|
|
1015
1016
|
diff: diff || "(no differences)"
|
|
1016
1017
|
};
|
|
1017
1018
|
}
|
|
1018
1019
|
function getDiffLines(entry) {
|
|
1019
|
-
const raw =
|
|
1020
|
+
const raw = entry.diff || "(no differences)";
|
|
1020
1021
|
return raw.split("\n").map((line) => {
|
|
1021
1022
|
const trimmed = line.trimStart();
|
|
1022
1023
|
if (trimmed.startsWith("-") && !trimmed.startsWith("---")) {
|
|
@@ -1274,6 +1275,9 @@ function processOneTestCase(task, testCaseItem, totalEvaluations, publishEvent,
|
|
|
1274
1275
|
const logDiff = (expected, actual, options) => {
|
|
1275
1276
|
logs.push(createDiffLogEntry(expected, actual, options));
|
|
1276
1277
|
};
|
|
1278
|
+
const log = (message, options) => {
|
|
1279
|
+
logs.push(createLogEntry(message, options));
|
|
1280
|
+
};
|
|
1277
1281
|
const ctx = yield* Effect.promise(
|
|
1278
1282
|
() => Promise.resolve(evaluator.resolveContext())
|
|
1279
1283
|
);
|
|
@@ -1283,7 +1287,8 @@ function processOneTestCase(task, testCaseItem, totalEvaluations, publishEvent,
|
|
|
1283
1287
|
input: testCaseItem.testCase.getInput(),
|
|
1284
1288
|
ctx,
|
|
1285
1289
|
output,
|
|
1286
|
-
logDiff
|
|
1290
|
+
logDiff,
|
|
1291
|
+
log
|
|
1287
1292
|
})
|
|
1288
1293
|
)
|
|
1289
1294
|
);
|
|
@@ -2360,6 +2365,23 @@ function buildDetailRows(run, testCases, evaluatorNameById) {
|
|
|
2360
2365
|
)
|
|
2361
2366
|
);
|
|
2362
2367
|
}
|
|
2368
|
+
} else if (log.type === "log") {
|
|
2369
|
+
const logLines = getLogLines(log);
|
|
2370
|
+
for (let lineIdx = 0; lineIdx < logLines.length; lineIdx++) {
|
|
2371
|
+
rows.push(
|
|
2372
|
+
/* @__PURE__ */ jsxs(
|
|
2373
|
+
Text,
|
|
2374
|
+
{
|
|
2375
|
+
color: "gray",
|
|
2376
|
+
children: [
|
|
2377
|
+
" ",
|
|
2378
|
+
logLines[lineIdx]
|
|
2379
|
+
]
|
|
2380
|
+
},
|
|
2381
|
+
`tc-${tc.testCaseId}-${item.evaluatorId}-${logIdx}-${lineIdx}`
|
|
2382
|
+
)
|
|
2383
|
+
);
|
|
2384
|
+
}
|
|
2363
2385
|
}
|
|
2364
2386
|
}
|
|
2365
2387
|
}
|