@m4trix/evals 0.2.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -1352,7 +1352,9 @@ function getJitiLoader() {
1352
1352
  }
1353
1353
  const createJiti2 = jitiModule.createJiti ?? jitiModule.default;
1354
1354
  if (typeof createJiti2 !== "function") {
1355
- throw new Error("Failed to initialize jiti for m4trix eval config loading.");
1355
+ throw new Error(
1356
+ "Failed to initialize jiti for m4trix eval config loading."
1357
+ );
1356
1358
  }
1357
1359
  cachedLoader = createJiti2(import.meta.url, {
1358
1360
  interopDefault: true,
@@ -1375,7 +1377,7 @@ function resolveConfigValue(value) {
1375
1377
  }
1376
1378
  if (typeof value !== "object") {
1377
1379
  throw new Error(
1378
- "Invalid m4trix eval config export. Expected an object or defineConfigFunction(() => config)."
1380
+ "Invalid m4trix eval config export. Expected an object or defineConfig(() => config)."
1379
1381
  );
1380
1382
  }
1381
1383
  return value;
@@ -1654,6 +1656,13 @@ function normalizeResult(result) {
1654
1656
  const metrics = Array.isArray(obj.metrics) ? obj.metrics : void 0;
1655
1657
  return { scores, metrics };
1656
1658
  }
1659
+ function readOutputDefinition(testCase) {
1660
+ const candidate = testCase;
1661
+ if (typeof candidate.getOutputDefinition !== "function") {
1662
+ return void 0;
1663
+ }
1664
+ return candidate.getOutputDefinition();
1665
+ }
1657
1666
  function nowIsoForFile() {
1658
1667
  return (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
1659
1668
  }
@@ -1682,6 +1691,7 @@ var executeRunTask = (task, publishEvent, persistenceQueue, updateSnapshot) => E
1682
1691
  const started = Date.now();
1683
1692
  const evaluatorScores = [];
1684
1693
  let testCaseError;
1694
+ const outputDefinition = readOutputDefinition(testCaseItem.testCase);
1685
1695
  for (const { id: evaluatorId, evaluator } of task.evaluators) {
1686
1696
  const evaluateFn = evaluator.getEvaluateFn();
1687
1697
  if (!evaluateFn) {
@@ -1692,7 +1702,13 @@ var executeRunTask = (task, publishEvent, persistenceQueue, updateSnapshot) => E
1692
1702
  () => Promise.resolve(evaluator.resolveContext())
1693
1703
  );
1694
1704
  const result = yield* Effect.promise(
1695
- () => Promise.resolve(evaluateFn(testCaseItem.testCase.getInput(), ctx))
1705
+ () => Promise.resolve(
1706
+ evaluateFn({
1707
+ input: testCaseItem.testCase.getInput(),
1708
+ ctx,
1709
+ output: outputDefinition
1710
+ })
1711
+ )
1696
1712
  );
1697
1713
  const { scores, metrics } = normalizeResult(result);
1698
1714
  const passed = computeEvaluatorPassed(evaluator, result, scores);
@@ -1723,6 +1739,7 @@ var executeRunTask = (task, publishEvent, persistenceQueue, updateSnapshot) => E
1723
1739
  passed: testCasePassed,
1724
1740
  durationMs: Date.now() - started,
1725
1741
  evaluatorScores,
1742
+ outputDefinition,
1726
1743
  errorMessage: testCaseError
1727
1744
  };
1728
1745
  updateSnapshot(task.runId, (snapshot) => ({