@m4trix/evals 0.15.0 → 0.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,7 +6,7 @@ import { resolve, relative, join, parse, dirname } from 'path';
6
6
  import * as jitiModule from 'jiti';
7
7
  import { writeFile, readdir, readFile, mkdir, appendFile } from 'fs/promises';
8
8
  import { pathToFileURL } from 'url';
9
- import { diffLines } from 'diff';
9
+ import { diffString } from 'json-diff';
10
10
  import React2, { useState, useEffect, useCallback } from 'react';
11
11
  import { render, Box, Text } from 'ink';
12
12
  import { jsxs, jsx, Fragment } from 'react/jsx-runtime';
@@ -260,45 +260,46 @@ async function collectTestCasesFromFiles(config) {
260
260
  );
261
261
  return found.flat();
262
262
  }
263
- function toJsonLines(value) {
263
+ function createDiffString(expected, actual, diffOptions) {
264
+ const opts = { ...diffOptions, color: false };
265
+ const result = diffString(expected, actual, opts);
266
+ return typeof result === "string" ? result : "";
267
+ }
268
+ function formatLogMessage(msg) {
269
+ if (typeof msg === "string")
270
+ return msg;
264
271
  try {
265
- return JSON.stringify(value, null, 2);
272
+ if (msg !== null && typeof msg === "object") {
273
+ return JSON.stringify(msg, null, 2);
274
+ }
275
+ return String(msg);
266
276
  } catch {
267
- return String(value);
277
+ return String(msg);
268
278
  }
269
279
  }
270
- function formatDiffString(changes) {
271
- const lines = [];
272
- for (const part of changes) {
273
- const prefix = part.added ? "+" : part.removed ? "-" : " ";
274
- const partLines = part.value.split("\n");
275
- if (partLines[partLines.length - 1] === "") {
276
- partLines.pop();
277
- }
278
- for (const line of partLines) {
279
- lines.push(`${prefix} ${line}`);
280
- }
281
- }
282
- return lines.join("\n");
280
+ function createLogEntry(message, options) {
281
+ return {
282
+ type: "log",
283
+ label: options?.label,
284
+ message: formatLogMessage(message)
285
+ };
283
286
  }
284
- function createDiffString(expected, actual) {
285
- const expectedStr = toJsonLines(expected);
286
- const actualStr = toJsonLines(actual);
287
- const changes = diffLines(expectedStr, actualStr);
288
- return formatDiffString(changes);
287
+ function getLogLines(entry) {
288
+ return entry.message.split("\n");
289
289
  }
290
290
  function createDiffLogEntry(expected, actual, options) {
291
- const diff = createDiffString(expected, actual);
291
+ const { label, ...diffOpts } = options ?? {};
292
+ const diff = createDiffString(expected, actual, diffOpts);
292
293
  return {
293
294
  type: "diff",
294
- label: options?.label,
295
+ label,
295
296
  expected,
296
297
  actual,
297
298
  diff: diff || "(no differences)"
298
299
  };
299
300
  }
300
301
  function getDiffLines(entry) {
301
- const raw = createDiffString(entry.expected, entry.actual) || "(no differences)";
302
+ const raw = entry.diff || "(no differences)";
302
303
  return raw.split("\n").map((line) => {
303
304
  const trimmed = line.trimStart();
304
305
  if (trimmed.startsWith("-") && !trimmed.startsWith("---")) {
@@ -574,6 +575,9 @@ function processOneTestCase(task, testCaseItem, totalEvaluations, publishEvent,
574
575
  const logDiff = (expected, actual, options) => {
575
576
  logs.push(createDiffLogEntry(expected, actual, options));
576
577
  };
578
+ const log = (message, options) => {
579
+ logs.push(createLogEntry(message, options));
580
+ };
577
581
  const ctx = yield* Effect.promise(
578
582
  () => Promise.resolve(evaluator.resolveContext())
579
583
  );
@@ -583,7 +587,8 @@ function processOneTestCase(task, testCaseItem, totalEvaluations, publishEvent,
583
587
  input: testCaseItem.testCase.getInput(),
584
588
  ctx,
585
589
  output,
586
- logDiff
590
+ logDiff,
591
+ log
587
592
  })
588
593
  )
589
594
  );
@@ -1756,7 +1761,7 @@ function RunView({
1756
1761
  },
1757
1762
  lineIdx
1758
1763
  )
1759
- ) }, logIdx) : null
1764
+ ) }, logIdx) : log.type === "log" ? /* @__PURE__ */ jsx(Box, { flexDirection: "column", children: getLogLines(log).map((line, lineIdx) => /* @__PURE__ */ jsx(Text, { color: "gray", children: line }, lineIdx)) }, logIdx) : null
1760
1765
  ) })
1761
1766
  ]
1762
1767
  },
@@ -2260,6 +2265,10 @@ async function runSimpleEvalCommandPlain(runner, datasetName, evaluatorPattern)
2260
2265
  const colored = useColor && type === "remove" ? colorize(` ${line}`, ansi2.red) : useColor && type === "add" ? colorize(` ${line}`, ansi2.green) : ` ${line}`;
2261
2266
  lines.push(colored);
2262
2267
  }
2268
+ } else if (log.type === "log") {
2269
+ for (const line of getLogLines(log)) {
2270
+ lines.push(` ${line}`);
2271
+ }
2263
2272
  }
2264
2273
  }
2265
2274
  }