@m4trix/evals 0.2.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +17 -9
- package/dist/cli-simple.cjs +28 -4
- package/dist/cli-simple.cjs.map +1 -1
- package/dist/cli-simple.js +28 -4
- package/dist/cli-simple.js.map +1 -1
- package/dist/cli.cjs +20 -3
- package/dist/cli.cjs.map +1 -1
- package/dist/cli.js +20 -3
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +37 -7
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +17 -7
- package/dist/index.d.ts +17 -7
- package/dist/index.js +34 -7
- package/dist/index.js.map +1 -1
- package/package.json +2 -4
package/dist/cli.js
CHANGED
|
@@ -1352,7 +1352,9 @@ function getJitiLoader() {
|
|
|
1352
1352
|
}
|
|
1353
1353
|
const createJiti2 = jitiModule.createJiti ?? jitiModule.default;
|
|
1354
1354
|
if (typeof createJiti2 !== "function") {
|
|
1355
|
-
throw new Error(
|
|
1355
|
+
throw new Error(
|
|
1356
|
+
"Failed to initialize jiti for m4trix eval config loading."
|
|
1357
|
+
);
|
|
1356
1358
|
}
|
|
1357
1359
|
cachedLoader = createJiti2(import.meta.url, {
|
|
1358
1360
|
interopDefault: true,
|
|
@@ -1375,7 +1377,7 @@ function resolveConfigValue(value) {
|
|
|
1375
1377
|
}
|
|
1376
1378
|
if (typeof value !== "object") {
|
|
1377
1379
|
throw new Error(
|
|
1378
|
-
"Invalid m4trix eval config export. Expected an object or
|
|
1380
|
+
"Invalid m4trix eval config export. Expected an object or defineConfig(() => config)."
|
|
1379
1381
|
);
|
|
1380
1382
|
}
|
|
1381
1383
|
return value;
|
|
@@ -1654,6 +1656,13 @@ function normalizeResult(result) {
|
|
|
1654
1656
|
const metrics = Array.isArray(obj.metrics) ? obj.metrics : void 0;
|
|
1655
1657
|
return { scores, metrics };
|
|
1656
1658
|
}
|
|
1659
|
+
function readOutputDefinition(testCase) {
|
|
1660
|
+
const candidate = testCase;
|
|
1661
|
+
if (typeof candidate.getOutputDefinition !== "function") {
|
|
1662
|
+
return void 0;
|
|
1663
|
+
}
|
|
1664
|
+
return candidate.getOutputDefinition();
|
|
1665
|
+
}
|
|
1657
1666
|
function nowIsoForFile() {
|
|
1658
1667
|
return (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
|
|
1659
1668
|
}
|
|
@@ -1682,6 +1691,7 @@ var executeRunTask = (task, publishEvent, persistenceQueue, updateSnapshot) => E
|
|
|
1682
1691
|
const started = Date.now();
|
|
1683
1692
|
const evaluatorScores = [];
|
|
1684
1693
|
let testCaseError;
|
|
1694
|
+
const outputDefinition = readOutputDefinition(testCaseItem.testCase);
|
|
1685
1695
|
for (const { id: evaluatorId, evaluator } of task.evaluators) {
|
|
1686
1696
|
const evaluateFn = evaluator.getEvaluateFn();
|
|
1687
1697
|
if (!evaluateFn) {
|
|
@@ -1692,7 +1702,13 @@ var executeRunTask = (task, publishEvent, persistenceQueue, updateSnapshot) => E
|
|
|
1692
1702
|
() => Promise.resolve(evaluator.resolveContext())
|
|
1693
1703
|
);
|
|
1694
1704
|
const result = yield* Effect.promise(
|
|
1695
|
-
() => Promise.resolve(
|
|
1705
|
+
() => Promise.resolve(
|
|
1706
|
+
evaluateFn({
|
|
1707
|
+
input: testCaseItem.testCase.getInput(),
|
|
1708
|
+
ctx,
|
|
1709
|
+
output: outputDefinition
|
|
1710
|
+
})
|
|
1711
|
+
)
|
|
1696
1712
|
);
|
|
1697
1713
|
const { scores, metrics } = normalizeResult(result);
|
|
1698
1714
|
const passed = computeEvaluatorPassed(evaluator, result, scores);
|
|
@@ -1723,6 +1739,7 @@ var executeRunTask = (task, publishEvent, persistenceQueue, updateSnapshot) => E
|
|
|
1723
1739
|
passed: testCasePassed,
|
|
1724
1740
|
durationMs: Date.now() - started,
|
|
1725
1741
|
evaluatorScores,
|
|
1742
|
+
outputDefinition,
|
|
1726
1743
|
errorMessage: testCaseError
|
|
1727
1744
|
};
|
|
1728
1745
|
updateSnapshot(task.runId, (snapshot) => ({
|