@m4trix/evals 0.15.0 → 0.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli-simple.cjs +36 -27
- package/dist/cli-simple.cjs.map +1 -1
- package/dist/cli-simple.js +36 -27
- package/dist/cli-simple.js.map +1 -1
- package/dist/cli.cjs +48 -26
- package/dist/cli.cjs.map +1 -1
- package/dist/cli.js +48 -26
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +35 -27
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.ts +49 -4
- package/dist/index.js +34 -28
- package/dist/index.js.map +1 -1
- package/package.json +3 -2
package/dist/cli-simple.js
CHANGED
|
@@ -6,7 +6,7 @@ import { resolve, relative, join, parse, dirname } from 'path';
|
|
|
6
6
|
import * as jitiModule from 'jiti';
|
|
7
7
|
import { writeFile, readdir, readFile, mkdir, appendFile } from 'fs/promises';
|
|
8
8
|
import { pathToFileURL } from 'url';
|
|
9
|
-
import {
|
|
9
|
+
import { diffString } from 'json-diff';
|
|
10
10
|
import React2, { useState, useEffect, useCallback } from 'react';
|
|
11
11
|
import { render, Box, Text } from 'ink';
|
|
12
12
|
import { jsxs, jsx, Fragment } from 'react/jsx-runtime';
|
|
@@ -260,45 +260,46 @@ async function collectTestCasesFromFiles(config) {
|
|
|
260
260
|
);
|
|
261
261
|
return found.flat();
|
|
262
262
|
}
|
|
263
|
-
function
|
|
263
|
+
function createDiffString(expected, actual, diffOptions) {
|
|
264
|
+
const opts = { ...diffOptions, color: false };
|
|
265
|
+
const result = diffString(expected, actual, opts);
|
|
266
|
+
return typeof result === "string" ? result : "";
|
|
267
|
+
}
|
|
268
|
+
function formatLogMessage(msg) {
|
|
269
|
+
if (typeof msg === "string")
|
|
270
|
+
return msg;
|
|
264
271
|
try {
|
|
265
|
-
|
|
272
|
+
if (msg !== null && typeof msg === "object") {
|
|
273
|
+
return JSON.stringify(msg, null, 2);
|
|
274
|
+
}
|
|
275
|
+
return String(msg);
|
|
266
276
|
} catch {
|
|
267
|
-
return String(
|
|
277
|
+
return String(msg);
|
|
268
278
|
}
|
|
269
279
|
}
|
|
270
|
-
function
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
partLines.pop();
|
|
277
|
-
}
|
|
278
|
-
for (const line of partLines) {
|
|
279
|
-
lines.push(`${prefix} ${line}`);
|
|
280
|
-
}
|
|
281
|
-
}
|
|
282
|
-
return lines.join("\n");
|
|
280
|
+
function createLogEntry(message, options) {
|
|
281
|
+
return {
|
|
282
|
+
type: "log",
|
|
283
|
+
label: options?.label,
|
|
284
|
+
message: formatLogMessage(message)
|
|
285
|
+
};
|
|
283
286
|
}
|
|
284
|
-
function
|
|
285
|
-
|
|
286
|
-
const actualStr = toJsonLines(actual);
|
|
287
|
-
const changes = diffLines(expectedStr, actualStr);
|
|
288
|
-
return formatDiffString(changes);
|
|
287
|
+
function getLogLines(entry) {
|
|
288
|
+
return entry.message.split("\n");
|
|
289
289
|
}
|
|
290
290
|
function createDiffLogEntry(expected, actual, options) {
|
|
291
|
-
const
|
|
291
|
+
const { label, ...diffOpts } = options ?? {};
|
|
292
|
+
const diff = createDiffString(expected, actual, diffOpts);
|
|
292
293
|
return {
|
|
293
294
|
type: "diff",
|
|
294
|
-
label
|
|
295
|
+
label,
|
|
295
296
|
expected,
|
|
296
297
|
actual,
|
|
297
298
|
diff: diff || "(no differences)"
|
|
298
299
|
};
|
|
299
300
|
}
|
|
300
301
|
function getDiffLines(entry) {
|
|
301
|
-
const raw =
|
|
302
|
+
const raw = entry.diff || "(no differences)";
|
|
302
303
|
return raw.split("\n").map((line) => {
|
|
303
304
|
const trimmed = line.trimStart();
|
|
304
305
|
if (trimmed.startsWith("-") && !trimmed.startsWith("---")) {
|
|
@@ -574,6 +575,9 @@ function processOneTestCase(task, testCaseItem, totalEvaluations, publishEvent,
|
|
|
574
575
|
const logDiff = (expected, actual, options) => {
|
|
575
576
|
logs.push(createDiffLogEntry(expected, actual, options));
|
|
576
577
|
};
|
|
578
|
+
const log = (message, options) => {
|
|
579
|
+
logs.push(createLogEntry(message, options));
|
|
580
|
+
};
|
|
577
581
|
const ctx = yield* Effect.promise(
|
|
578
582
|
() => Promise.resolve(evaluator.resolveContext())
|
|
579
583
|
);
|
|
@@ -583,7 +587,8 @@ function processOneTestCase(task, testCaseItem, totalEvaluations, publishEvent,
|
|
|
583
587
|
input: testCaseItem.testCase.getInput(),
|
|
584
588
|
ctx,
|
|
585
589
|
output,
|
|
586
|
-
logDiff
|
|
590
|
+
logDiff,
|
|
591
|
+
log
|
|
587
592
|
})
|
|
588
593
|
)
|
|
589
594
|
);
|
|
@@ -1756,7 +1761,7 @@ function RunView({
|
|
|
1756
1761
|
},
|
|
1757
1762
|
lineIdx
|
|
1758
1763
|
)
|
|
1759
|
-
) }, logIdx) : null
|
|
1764
|
+
) }, logIdx) : log.type === "log" ? /* @__PURE__ */ jsx(Box, { flexDirection: "column", children: getLogLines(log).map((line, lineIdx) => /* @__PURE__ */ jsx(Text, { color: "gray", children: line }, lineIdx)) }, logIdx) : null
|
|
1760
1765
|
) })
|
|
1761
1766
|
]
|
|
1762
1767
|
},
|
|
@@ -2260,6 +2265,10 @@ async function runSimpleEvalCommandPlain(runner, datasetName, evaluatorPattern)
|
|
|
2260
2265
|
const colored = useColor && type === "remove" ? colorize(` ${line}`, ansi2.red) : useColor && type === "add" ? colorize(` ${line}`, ansi2.green) : ` ${line}`;
|
|
2261
2266
|
lines.push(colored);
|
|
2262
2267
|
}
|
|
2268
|
+
} else if (log.type === "log") {
|
|
2269
|
+
for (const line of getLogLines(log)) {
|
|
2270
|
+
lines.push(` ${line}`);
|
|
2271
|
+
}
|
|
2263
2272
|
}
|
|
2264
2273
|
}
|
|
2265
2274
|
}
|