@m4trix/evals 0.10.0 → 0.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli-simple.cjs +85 -27
- package/dist/cli-simple.cjs.map +1 -1
- package/dist/cli-simple.js +85 -27
- package/dist/cli-simple.js.map +1 -1
- package/dist/cli.cjs +24 -3
- package/dist/cli.cjs.map +1 -1
- package/dist/cli.js +24 -3
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +23 -2
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.ts +25 -13
- package/dist/index.js +23 -2
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/cli.js
CHANGED
|
@@ -10,7 +10,7 @@ import { resolve, relative, join, dirname } from 'path';
|
|
|
10
10
|
import * as jitiModule from 'jiti';
|
|
11
11
|
import { mkdir, appendFile, readdir } from 'fs/promises';
|
|
12
12
|
import { pathToFileURL } from 'url';
|
|
13
|
-
import 'json-diff';
|
|
13
|
+
import { diffString } from 'json-diff';
|
|
14
14
|
|
|
15
15
|
var SEP = " ";
|
|
16
16
|
var ARROW = "\u203A";
|
|
@@ -1519,6 +1519,16 @@ async function collectTestCasesFromFiles(config) {
|
|
|
1519
1519
|
);
|
|
1520
1520
|
return found.flat();
|
|
1521
1521
|
}
|
|
1522
|
+
function createDiffLogEntry(expected, actual, options) {
|
|
1523
|
+
const diff = diffString(expected, actual, { color: false });
|
|
1524
|
+
return {
|
|
1525
|
+
type: "diff",
|
|
1526
|
+
label: options?.label,
|
|
1527
|
+
expected,
|
|
1528
|
+
actual,
|
|
1529
|
+
diff: diff || "(no differences)"
|
|
1530
|
+
};
|
|
1531
|
+
}
|
|
1522
1532
|
|
|
1523
1533
|
// src/evals/metric.ts
|
|
1524
1534
|
var registry = /* @__PURE__ */ new Map();
|
|
@@ -1699,6 +1709,10 @@ var executeRunTask = (task, publishEvent, persistenceQueue, updateSnapshot) => E
|
|
|
1699
1709
|
continue;
|
|
1700
1710
|
}
|
|
1701
1711
|
try {
|
|
1712
|
+
const logs = [];
|
|
1713
|
+
const logDiff = (expected, actual, options) => {
|
|
1714
|
+
logs.push(createDiffLogEntry(expected, actual, options));
|
|
1715
|
+
};
|
|
1702
1716
|
const ctx = yield* Effect.promise(
|
|
1703
1717
|
() => Promise.resolve(evaluator.resolveContext())
|
|
1704
1718
|
);
|
|
@@ -1707,13 +1721,20 @@ var executeRunTask = (task, publishEvent, persistenceQueue, updateSnapshot) => E
|
|
|
1707
1721
|
evaluateFn({
|
|
1708
1722
|
input: testCaseItem.testCase.getInput(),
|
|
1709
1723
|
ctx,
|
|
1710
|
-
output
|
|
1724
|
+
output,
|
|
1725
|
+
logDiff
|
|
1711
1726
|
})
|
|
1712
1727
|
)
|
|
1713
1728
|
);
|
|
1714
1729
|
const { scores, metrics } = normalizeResult(result);
|
|
1715
1730
|
const passed = computeEvaluatorPassed(evaluator, result, scores);
|
|
1716
|
-
evaluatorScores.push({
|
|
1731
|
+
evaluatorScores.push({
|
|
1732
|
+
evaluatorId,
|
|
1733
|
+
scores,
|
|
1734
|
+
passed,
|
|
1735
|
+
metrics,
|
|
1736
|
+
logs: logs.length > 0 ? logs : void 0
|
|
1737
|
+
});
|
|
1717
1738
|
} catch (error) {
|
|
1718
1739
|
testCaseError = error instanceof Error ? error.message : "Evaluator execution failed";
|
|
1719
1740
|
evaluatorScores.push({
|