@m4trix/evals 0.3.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -1656,6 +1656,13 @@ function normalizeResult(result) {
1656
1656
  const metrics = Array.isArray(obj.metrics) ? obj.metrics : void 0;
1657
1657
  return { scores, metrics };
1658
1658
  }
1659
+ function readOutput(testCase) {
1660
+ const candidate = testCase;
1661
+ if (typeof candidate.getOutput !== "function") {
1662
+ return void 0;
1663
+ }
1664
+ return candidate.getOutput();
1665
+ }
1659
1666
  function nowIsoForFile() {
1660
1667
  return (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
1661
1668
  }
@@ -1684,6 +1691,7 @@ var executeRunTask = (task, publishEvent, persistenceQueue, updateSnapshot) => E
1684
1691
  const started = Date.now();
1685
1692
  const evaluatorScores = [];
1686
1693
  let testCaseError;
1694
+ const output = readOutput(testCaseItem.testCase);
1687
1695
  for (const { id: evaluatorId, evaluator } of task.evaluators) {
1688
1696
  const evaluateFn = evaluator.getEvaluateFn();
1689
1697
  if (!evaluateFn) {
@@ -1694,7 +1702,13 @@ var executeRunTask = (task, publishEvent, persistenceQueue, updateSnapshot) => E
1694
1702
  () => Promise.resolve(evaluator.resolveContext())
1695
1703
  );
1696
1704
  const result = yield* Effect.promise(
1697
- () => Promise.resolve(evaluateFn(testCaseItem.testCase.getInput(), ctx))
1705
+ () => Promise.resolve(
1706
+ evaluateFn({
1707
+ input: testCaseItem.testCase.getInput(),
1708
+ ctx,
1709
+ output
1710
+ })
1711
+ )
1698
1712
  );
1699
1713
  const { scores, metrics } = normalizeResult(result);
1700
1714
  const passed = computeEvaluatorPassed(evaluator, result, scores);
@@ -1725,6 +1739,7 @@ var executeRunTask = (task, publishEvent, persistenceQueue, updateSnapshot) => E
1725
1739
  passed: testCasePassed,
1726
1740
  durationMs: Date.now() - started,
1727
1741
  evaluatorScores,
1742
+ output,
1728
1743
  errorMessage: testCaseError
1729
1744
  };
1730
1745
  updateSnapshot(task.runId, (snapshot) => ({