@m4trix/evals 0.15.0 → 0.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -8,7 +8,7 @@ var path = require('path');
8
8
  var jitiModule = require('jiti');
9
9
  var promises = require('fs/promises');
10
10
  var url = require('url');
11
- var diff = require('diff');
11
+ var jsonDiff = require('json-diff');
12
12
  var React2 = require('react');
13
13
  var ink = require('ink');
14
14
  var jsxRuntime = require('react/jsx-runtime');
@@ -286,45 +286,46 @@ async function collectTestCasesFromFiles(config) {
286
286
  );
287
287
  return found.flat();
288
288
  }
289
- function toJsonLines(value) {
289
+ function createDiffString(expected, actual, diffOptions) {
290
+ const opts = { ...diffOptions, color: false };
291
+ const result = jsonDiff.diffString(expected, actual, opts);
292
+ return typeof result === "string" ? result : "";
293
+ }
294
+ function formatLogMessage(msg) {
295
+ if (typeof msg === "string")
296
+ return msg;
290
297
  try {
291
- return JSON.stringify(value, null, 2);
298
+ if (msg !== null && typeof msg === "object") {
299
+ return JSON.stringify(msg, null, 2);
300
+ }
301
+ return String(msg);
292
302
  } catch {
293
- return String(value);
303
+ return String(msg);
294
304
  }
295
305
  }
296
- function formatDiffString(changes) {
297
- const lines = [];
298
- for (const part of changes) {
299
- const prefix = part.added ? "+" : part.removed ? "-" : " ";
300
- const partLines = part.value.split("\n");
301
- if (partLines[partLines.length - 1] === "") {
302
- partLines.pop();
303
- }
304
- for (const line of partLines) {
305
- lines.push(`${prefix} ${line}`);
306
- }
307
- }
308
- return lines.join("\n");
306
+ function createLogEntry(message, options) {
307
+ return {
308
+ type: "log",
309
+ label: options?.label,
310
+ message: formatLogMessage(message)
311
+ };
309
312
  }
310
- function createDiffString(expected, actual) {
311
- const expectedStr = toJsonLines(expected);
312
- const actualStr = toJsonLines(actual);
313
- const changes = diff.diffLines(expectedStr, actualStr);
314
- return formatDiffString(changes);
313
+ function getLogLines(entry) {
314
+ return entry.message.split("\n");
315
315
  }
316
316
  function createDiffLogEntry(expected, actual, options) {
317
- const diff = createDiffString(expected, actual);
317
+ const { label, ...diffOpts } = options ?? {};
318
+ const diff = createDiffString(expected, actual, diffOpts);
318
319
  return {
319
320
  type: "diff",
320
- label: options?.label,
321
+ label,
321
322
  expected,
322
323
  actual,
323
324
  diff: diff || "(no differences)"
324
325
  };
325
326
  }
326
327
  function getDiffLines(entry) {
327
- const raw = createDiffString(entry.expected, entry.actual) || "(no differences)";
328
+ const raw = entry.diff || "(no differences)";
328
329
  return raw.split("\n").map((line) => {
329
330
  const trimmed = line.trimStart();
330
331
  if (trimmed.startsWith("-") && !trimmed.startsWith("---")) {
@@ -600,6 +601,9 @@ function processOneTestCase(task, testCaseItem, totalEvaluations, publishEvent,
600
601
  const logDiff = (expected, actual, options) => {
601
602
  logs.push(createDiffLogEntry(expected, actual, options));
602
603
  };
604
+ const log = (message, options) => {
605
+ logs.push(createLogEntry(message, options));
606
+ };
603
607
  const ctx = yield* effect.Effect.promise(
604
608
  () => Promise.resolve(evaluator.resolveContext())
605
609
  );
@@ -609,7 +613,8 @@ function processOneTestCase(task, testCaseItem, totalEvaluations, publishEvent,
609
613
  input: testCaseItem.testCase.getInput(),
610
614
  ctx,
611
615
  output,
612
- logDiff
616
+ logDiff,
617
+ log
613
618
  })
614
619
  )
615
620
  );
@@ -1782,7 +1787,7 @@ function RunView({
1782
1787
  },
1783
1788
  lineIdx
1784
1789
  )
1785
- ) }, logIdx) : null
1790
+ ) }, logIdx) : log.type === "log" ? /* @__PURE__ */ jsxRuntime.jsx(ink.Box, { flexDirection: "column", children: getLogLines(log).map((line, lineIdx) => /* @__PURE__ */ jsxRuntime.jsx(ink.Text, { color: "gray", children: line }, lineIdx)) }, logIdx) : null
1786
1791
  ) })
1787
1792
  ]
1788
1793
  },
@@ -2286,6 +2291,10 @@ async function runSimpleEvalCommandPlain(runner, datasetName, evaluatorPattern)
2286
2291
  const colored = useColor && type === "remove" ? colorize(` ${line}`, ansi2.red) : useColor && type === "add" ? colorize(` ${line}`, ansi2.green) : ` ${line}`;
2287
2292
  lines.push(colored);
2288
2293
  }
2294
+ } else if (log.type === "log") {
2295
+ for (const line of getLogLines(log)) {
2296
+ lines.push(` ${line}`);
2297
+ }
2289
2298
  }
2290
2299
  }
2291
2300
  }