@m4trix/evals 0.14.0 → 0.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -1,7 +1,7 @@
1
1
  'use strict';
2
2
 
3
3
  var effect = require('effect');
4
- var diff = require('diff');
4
+ var jsonDiff = require('json-diff');
5
5
  var crypto = require('crypto');
6
6
  var fs = require('fs');
7
7
  var path = require('path');
@@ -660,46 +660,48 @@ var binaryScore = Score.of({
660
660
  },
661
661
  aggregate: aggregateAll
662
662
  });
663
- function toJsonLines(value) {
663
+ function createDiffString(expected, actual, diffOptions) {
664
+ const opts = { ...diffOptions, color: false };
665
+ const result = jsonDiff.diffString(expected, actual, opts);
666
+ return typeof result === "string" ? result : "";
667
+ }
668
+ function formatLogMessage(msg) {
669
+ if (typeof msg === "string")
670
+ return msg;
664
671
  try {
665
- return JSON.stringify(value, null, 2);
672
+ if (msg !== null && typeof msg === "object") {
673
+ return JSON.stringify(msg, null, 2);
674
+ }
675
+ return String(msg);
666
676
  } catch {
667
- return String(value);
677
+ return String(msg);
668
678
  }
669
679
  }
670
- function formatDiffString(changes) {
671
- const lines = [];
672
- for (const part of changes) {
673
- const prefix = part.added ? "+" : part.removed ? "-" : " ";
674
- const partLines = part.value.split("\n");
675
- if (partLines[partLines.length - 1] === "") {
676
- partLines.pop();
677
- }
678
- for (const line of partLines) {
679
- lines.push(`${prefix} ${line}`);
680
- }
681
- }
682
- return lines.join("\n");
680
+ function createLogEntry(message, options) {
681
+ return {
682
+ type: "log",
683
+ label: options?.label,
684
+ message: formatLogMessage(message)
685
+ };
683
686
  }
684
- function createDiffString(expected, actual) {
685
- const expectedStr = toJsonLines(expected);
686
- const actualStr = toJsonLines(actual);
687
- const changes = diff.diffLines(expectedStr, actualStr);
688
- return formatDiffString(changes);
687
+ function getLogLines(entry) {
688
+ return entry.message.split("\n");
689
689
  }
690
690
  function createDiffLogEntry(expected, actual, options) {
691
- const diff = createDiffString(expected, actual);
691
+ const { label, ...diffOpts } = options ?? {};
692
+ const diff = createDiffString(expected, actual, diffOpts);
692
693
  return {
693
694
  type: "diff",
694
- label: options?.label,
695
+ label,
695
696
  expected,
696
697
  actual,
697
698
  diff: diff || "(no differences)"
698
699
  };
699
700
  }
700
701
  function printJsonDiff(expected, actual, options = {}) {
701
- const diff = createDiffString(expected, actual);
702
- if (options.color) {
702
+ const { color = true, ...diffOpts } = options;
703
+ const diff = createDiffString(expected, actual, diffOpts);
704
+ if (color) {
703
705
  const lines = diff.split("\n").map((line) => {
704
706
  const trimmed = line.trimStart();
705
707
  if (trimmed.startsWith("-") && !trimmed.startsWith("---")) {
@@ -1069,6 +1071,9 @@ function processOneTestCase(task, testCaseItem, totalEvaluations, publishEvent,
1069
1071
  const logDiff = (expected, actual, options) => {
1070
1072
  logs.push(createDiffLogEntry(expected, actual, options));
1071
1073
  };
1074
+ const log = (message, options) => {
1075
+ logs.push(createLogEntry(message, options));
1076
+ };
1072
1077
  const ctx = yield* effect.Effect.promise(
1073
1078
  () => Promise.resolve(evaluator.resolveContext())
1074
1079
  );
@@ -1078,7 +1083,8 @@ function processOneTestCase(task, testCaseItem, totalEvaluations, publishEvent,
1078
1083
  input: testCaseItem.testCase.getInput(),
1079
1084
  ctx,
1080
1085
  output,
1081
- logDiff
1086
+ logDiff,
1087
+ log
1082
1088
  })
1083
1089
  )
1084
1090
  );
@@ -1660,9 +1666,11 @@ exports.Metric = Metric;
1660
1666
  exports.Score = Score;
1661
1667
  exports.TestCase = TestCase;
1662
1668
  exports.binaryScore = binaryScore;
1669
+ exports.createLogEntry = createLogEntry;
1663
1670
  exports.createRunner = createRunner;
1664
1671
  exports.defaultRunnerConfig = defaultRunnerConfig;
1665
1672
  exports.defineConfig = defineConfig;
1673
+ exports.getLogLines = getLogLines;
1666
1674
  exports.getMetricById = getMetricById;
1667
1675
  exports.getScoreById = getScoreById;
1668
1676
  exports.latencyMetric = latencyMetric;