@m4trix/evals 0.3.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +13 -2
- package/dist/cli-simple.cjs +24 -2
- package/dist/cli-simple.cjs.map +1 -1
- package/dist/cli-simple.js +24 -2
- package/dist/cli-simple.js.map +1 -1
- package/dist/cli.cjs +16 -1
- package/dist/cli.cjs.map +1 -1
- package/dist/cli.js +16 -1
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +28 -2
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +16 -6
- package/dist/index.d.ts +16 -6
- package/dist/index.js +28 -2
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/cli.js
CHANGED
|
@@ -1656,6 +1656,13 @@ function normalizeResult(result) {
|
|
|
1656
1656
|
const metrics = Array.isArray(obj.metrics) ? obj.metrics : void 0;
|
|
1657
1657
|
return { scores, metrics };
|
|
1658
1658
|
}
|
|
1659
|
+
function readOutput(testCase) {
|
|
1660
|
+
const candidate = testCase;
|
|
1661
|
+
if (typeof candidate.getOutput !== "function") {
|
|
1662
|
+
return void 0;
|
|
1663
|
+
}
|
|
1664
|
+
return candidate.getOutput();
|
|
1665
|
+
}
|
|
1659
1666
|
function nowIsoForFile() {
|
|
1660
1667
|
return (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
|
|
1661
1668
|
}
|
|
@@ -1684,6 +1691,7 @@ var executeRunTask = (task, publishEvent, persistenceQueue, updateSnapshot) => E
|
|
|
1684
1691
|
const started = Date.now();
|
|
1685
1692
|
const evaluatorScores = [];
|
|
1686
1693
|
let testCaseError;
|
|
1694
|
+
const output = readOutput(testCaseItem.testCase);
|
|
1687
1695
|
for (const { id: evaluatorId, evaluator } of task.evaluators) {
|
|
1688
1696
|
const evaluateFn = evaluator.getEvaluateFn();
|
|
1689
1697
|
if (!evaluateFn) {
|
|
@@ -1694,7 +1702,13 @@ var executeRunTask = (task, publishEvent, persistenceQueue, updateSnapshot) => E
|
|
|
1694
1702
|
() => Promise.resolve(evaluator.resolveContext())
|
|
1695
1703
|
);
|
|
1696
1704
|
const result = yield* Effect.promise(
|
|
1697
|
-
() => Promise.resolve(
|
|
1705
|
+
() => Promise.resolve(
|
|
1706
|
+
evaluateFn({
|
|
1707
|
+
input: testCaseItem.testCase.getInput(),
|
|
1708
|
+
ctx,
|
|
1709
|
+
output
|
|
1710
|
+
})
|
|
1711
|
+
)
|
|
1698
1712
|
);
|
|
1699
1713
|
const { scores, metrics } = normalizeResult(result);
|
|
1700
1714
|
const passed = computeEvaluatorPassed(evaluator, result, scores);
|
|
@@ -1725,6 +1739,7 @@ var executeRunTask = (task, publishEvent, persistenceQueue, updateSnapshot) => E
|
|
|
1725
1739
|
passed: testCasePassed,
|
|
1726
1740
|
durationMs: Date.now() - started,
|
|
1727
1741
|
evaluatorScores,
|
|
1742
|
+
output,
|
|
1728
1743
|
errorMessage: testCaseError
|
|
1729
1744
|
};
|
|
1730
1745
|
updateSnapshot(task.runId, (snapshot) => ({
|