@m4trix/evals 0.4.0 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -1
- package/dist/cli-simple.cjs +20 -11
- package/dist/cli-simple.cjs.map +1 -1
- package/dist/cli-simple.d.ts +0 -0
- package/dist/cli-simple.js +21 -12
- package/dist/cli-simple.js.map +1 -1
- package/dist/cli.cjs +16 -7
- package/dist/cli.cjs.map +1 -1
- package/dist/cli.d.ts +0 -0
- package/dist/cli.js +16 -7
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +15 -13
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.ts +11 -10
- package/dist/index.js +14 -12
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
- package/dist/cli-simple.d.cts +0 -1
- package/dist/cli.d.cts +0 -1
- package/dist/index.d.cts +0 -377
package/dist/cli.d.ts
CHANGED
|
File without changes
|
package/dist/cli.js
CHANGED
|
@@ -1656,12 +1656,12 @@ function normalizeResult(result) {
|
|
|
1656
1656
|
const metrics = Array.isArray(obj.metrics) ? obj.metrics : void 0;
|
|
1657
1657
|
return { scores, metrics };
|
|
1658
1658
|
}
|
|
1659
|
-
function
|
|
1659
|
+
function readOutput(testCase) {
|
|
1660
1660
|
const candidate = testCase;
|
|
1661
|
-
if (typeof candidate.
|
|
1661
|
+
if (typeof candidate.getOutput !== "function") {
|
|
1662
1662
|
return void 0;
|
|
1663
1663
|
}
|
|
1664
|
-
return candidate.
|
|
1664
|
+
return candidate.getOutput();
|
|
1665
1665
|
}
|
|
1666
1666
|
function nowIsoForFile() {
|
|
1667
1667
|
return (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
|
|
@@ -1691,7 +1691,7 @@ var executeRunTask = (task, publishEvent, persistenceQueue, updateSnapshot) => E
|
|
|
1691
1691
|
const started = Date.now();
|
|
1692
1692
|
const evaluatorScores = [];
|
|
1693
1693
|
let testCaseError;
|
|
1694
|
-
const
|
|
1694
|
+
const output = readOutput(testCaseItem.testCase);
|
|
1695
1695
|
for (const { id: evaluatorId, evaluator } of task.evaluators) {
|
|
1696
1696
|
const evaluateFn = evaluator.getEvaluateFn();
|
|
1697
1697
|
if (!evaluateFn) {
|
|
@@ -1706,7 +1706,7 @@ var executeRunTask = (task, publishEvent, persistenceQueue, updateSnapshot) => E
|
|
|
1706
1706
|
evaluateFn({
|
|
1707
1707
|
input: testCaseItem.testCase.getInput(),
|
|
1708
1708
|
ctx,
|
|
1709
|
-
output
|
|
1709
|
+
output
|
|
1710
1710
|
})
|
|
1711
1711
|
)
|
|
1712
1712
|
);
|
|
@@ -1739,7 +1739,7 @@ var executeRunTask = (task, publishEvent, persistenceQueue, updateSnapshot) => E
|
|
|
1739
1739
|
passed: testCasePassed,
|
|
1740
1740
|
durationMs: Date.now() - started,
|
|
1741
1741
|
evaluatorScores,
|
|
1742
|
-
|
|
1742
|
+
output,
|
|
1743
1743
|
errorMessage: testCaseError
|
|
1744
1744
|
};
|
|
1745
1745
|
updateSnapshot(task.runId, (snapshot) => ({
|
|
@@ -1873,9 +1873,18 @@ function mergeRunnerOverrides(base, next) {
|
|
|
1873
1873
|
if (!base) {
|
|
1874
1874
|
return next;
|
|
1875
1875
|
}
|
|
1876
|
-
{
|
|
1876
|
+
if (!next) {
|
|
1877
1877
|
return base;
|
|
1878
1878
|
}
|
|
1879
|
+
const discovery = base.discovery || next.discovery ? {
|
|
1880
|
+
...base.discovery ?? {},
|
|
1881
|
+
...next.discovery ?? {}
|
|
1882
|
+
} : void 0;
|
|
1883
|
+
return {
|
|
1884
|
+
...base,
|
|
1885
|
+
...next,
|
|
1886
|
+
discovery
|
|
1887
|
+
};
|
|
1879
1888
|
}
|
|
1880
1889
|
function createRunner(overrides) {
|
|
1881
1890
|
const fileOverrides = loadRunnerConfigFile();
|