@m4trix/evals 0.15.0 → 0.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli-simple.cjs +36 -27
- package/dist/cli-simple.cjs.map +1 -1
- package/dist/cli-simple.js +36 -27
- package/dist/cli-simple.js.map +1 -1
- package/dist/cli.cjs +48 -26
- package/dist/cli.cjs.map +1 -1
- package/dist/cli.js +48 -26
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +35 -27
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.ts +49 -4
- package/dist/index.js +34 -28
- package/dist/index.js.map +1 -1
- package/package.json +3 -2
package/dist/cli-simple.cjs
CHANGED
|
@@ -8,7 +8,7 @@ var path = require('path');
|
|
|
8
8
|
var jitiModule = require('jiti');
|
|
9
9
|
var promises = require('fs/promises');
|
|
10
10
|
var url = require('url');
|
|
11
|
-
var
|
|
11
|
+
var jsonDiff = require('json-diff');
|
|
12
12
|
var React2 = require('react');
|
|
13
13
|
var ink = require('ink');
|
|
14
14
|
var jsxRuntime = require('react/jsx-runtime');
|
|
@@ -286,45 +286,46 @@ async function collectTestCasesFromFiles(config) {
|
|
|
286
286
|
);
|
|
287
287
|
return found.flat();
|
|
288
288
|
}
|
|
289
|
-
function
|
|
289
|
+
function createDiffString(expected, actual, diffOptions) {
|
|
290
|
+
const opts = { ...diffOptions, color: false };
|
|
291
|
+
const result = jsonDiff.diffString(expected, actual, opts);
|
|
292
|
+
return typeof result === "string" ? result : "";
|
|
293
|
+
}
|
|
294
|
+
function formatLogMessage(msg) {
|
|
295
|
+
if (typeof msg === "string")
|
|
296
|
+
return msg;
|
|
290
297
|
try {
|
|
291
|
-
|
|
298
|
+
if (msg !== null && typeof msg === "object") {
|
|
299
|
+
return JSON.stringify(msg, null, 2);
|
|
300
|
+
}
|
|
301
|
+
return String(msg);
|
|
292
302
|
} catch {
|
|
293
|
-
return String(
|
|
303
|
+
return String(msg);
|
|
294
304
|
}
|
|
295
305
|
}
|
|
296
|
-
function
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
partLines.pop();
|
|
303
|
-
}
|
|
304
|
-
for (const line of partLines) {
|
|
305
|
-
lines.push(`${prefix} ${line}`);
|
|
306
|
-
}
|
|
307
|
-
}
|
|
308
|
-
return lines.join("\n");
|
|
306
|
+
function createLogEntry(message, options) {
|
|
307
|
+
return {
|
|
308
|
+
type: "log",
|
|
309
|
+
label: options?.label,
|
|
310
|
+
message: formatLogMessage(message)
|
|
311
|
+
};
|
|
309
312
|
}
|
|
310
|
-
function
|
|
311
|
-
|
|
312
|
-
const actualStr = toJsonLines(actual);
|
|
313
|
-
const changes = diff.diffLines(expectedStr, actualStr);
|
|
314
|
-
return formatDiffString(changes);
|
|
313
|
+
function getLogLines(entry) {
|
|
314
|
+
return entry.message.split("\n");
|
|
315
315
|
}
|
|
316
316
|
function createDiffLogEntry(expected, actual, options) {
|
|
317
|
-
const
|
|
317
|
+
const { label, ...diffOpts } = options ?? {};
|
|
318
|
+
const diff = createDiffString(expected, actual, diffOpts);
|
|
318
319
|
return {
|
|
319
320
|
type: "diff",
|
|
320
|
-
label
|
|
321
|
+
label,
|
|
321
322
|
expected,
|
|
322
323
|
actual,
|
|
323
324
|
diff: diff || "(no differences)"
|
|
324
325
|
};
|
|
325
326
|
}
|
|
326
327
|
function getDiffLines(entry) {
|
|
327
|
-
const raw =
|
|
328
|
+
const raw = entry.diff || "(no differences)";
|
|
328
329
|
return raw.split("\n").map((line) => {
|
|
329
330
|
const trimmed = line.trimStart();
|
|
330
331
|
if (trimmed.startsWith("-") && !trimmed.startsWith("---")) {
|
|
@@ -600,6 +601,9 @@ function processOneTestCase(task, testCaseItem, totalEvaluations, publishEvent,
|
|
|
600
601
|
const logDiff = (expected, actual, options) => {
|
|
601
602
|
logs.push(createDiffLogEntry(expected, actual, options));
|
|
602
603
|
};
|
|
604
|
+
const log = (message, options) => {
|
|
605
|
+
logs.push(createLogEntry(message, options));
|
|
606
|
+
};
|
|
603
607
|
const ctx = yield* effect.Effect.promise(
|
|
604
608
|
() => Promise.resolve(evaluator.resolveContext())
|
|
605
609
|
);
|
|
@@ -609,7 +613,8 @@ function processOneTestCase(task, testCaseItem, totalEvaluations, publishEvent,
|
|
|
609
613
|
input: testCaseItem.testCase.getInput(),
|
|
610
614
|
ctx,
|
|
611
615
|
output,
|
|
612
|
-
logDiff
|
|
616
|
+
logDiff,
|
|
617
|
+
log
|
|
613
618
|
})
|
|
614
619
|
)
|
|
615
620
|
);
|
|
@@ -1782,7 +1787,7 @@ function RunView({
|
|
|
1782
1787
|
},
|
|
1783
1788
|
lineIdx
|
|
1784
1789
|
)
|
|
1785
|
-
) }, logIdx) : null
|
|
1790
|
+
) }, logIdx) : log.type === "log" ? /* @__PURE__ */ jsxRuntime.jsx(ink.Box, { flexDirection: "column", children: getLogLines(log).map((line, lineIdx) => /* @__PURE__ */ jsxRuntime.jsx(ink.Text, { color: "gray", children: line }, lineIdx)) }, logIdx) : null
|
|
1786
1791
|
) })
|
|
1787
1792
|
]
|
|
1788
1793
|
},
|
|
@@ -2286,6 +2291,10 @@ async function runSimpleEvalCommandPlain(runner, datasetName, evaluatorPattern)
|
|
|
2286
2291
|
const colored = useColor && type === "remove" ? colorize(` ${line}`, ansi2.red) : useColor && type === "add" ? colorize(` ${line}`, ansi2.green) : ` ${line}`;
|
|
2287
2292
|
lines.push(colored);
|
|
2288
2293
|
}
|
|
2294
|
+
} else if (log.type === "log") {
|
|
2295
|
+
for (const line of getLogLines(log)) {
|
|
2296
|
+
lines.push(` ${line}`);
|
|
2297
|
+
}
|
|
2289
2298
|
}
|
|
2290
2299
|
}
|
|
2291
2300
|
}
|