npm - @m4trix/evals - Versions diffs - 0.24.0 → 0.25.1 - Mend

@m4trix/evals 0.24.0 → 0.25.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

package/dist/cli-simple.js CHANGED Viewed

@@ -8,8 +8,8 @@ import { writeFile, readdir, readFile, mkdir, appendFile } from 'fs/promises';
 import { pathToFileURL } from 'url';
 import { diffLines } from 'diff';
 import stringify from 'fast-json-stable-stringify';
-import * as React2 from 'react';
-import React2__default, { useState, useEffect, useCallback } from 'react';
+import * as React from 'react';
+import React__default, { useState, useEffect, useCallback } from 'react';
 import { render, Box, Text } from 'ink';
 import { jsxs, jsx, Fragment } from 'react/jsx-runtime';
@@ -18,18 +18,8 @@ var defaultRunnerConfig = {
   discovery: {
     rootDir: process.cwd(),
     datasetSuffixes: [".dataset.ts", ".dataset.tsx", ".dataset.js", ".dataset.mjs"],
-    evaluatorSuffixes: [
-      ".evaluator.ts",
-      ".evaluator.tsx",
-      ".evaluator.js",
-      ".evaluator.mjs"
-    ],
-    testCaseSuffixes: [
-      ".test-case.ts",
-      ".test-case.tsx",
-      ".test-case.js",
-      ".test-case.mjs"
-    ],
+    evaluatorSuffixes: [".evaluator.ts", ".evaluator.tsx", ".evaluator.js", ".evaluator.mjs"],
+    testCaseSuffixes: [".test-case.ts", ".test-case.tsx", ".test-case.js", ".test-case.mjs"],
     excludeDirectories: ["node_modules", "dist", ".next", ".git", ".pnpm-store"]
   },
   artifactDirectory: ".eval-results",
@@ -96,14 +86,15 @@ function getJitiLoader() {
   }
   const createJiti2 = jitiModule.createJiti ?? jitiModule.default;
   if (typeof createJiti2 !== "function") {
-    throw new Error(
-      "Failed to initialize jiti for m4trix eval config loading."
-    );
+    throw new Error("Failed to initialize jiti for m4trix eval config loading.");
   }
-  cachedLoader = createJiti2(import.meta.url, {
-    interopDefault: true,
-    moduleCache: true
-  });
+  cachedLoader = createJiti2(
+    import.meta.url,
+    {
+      interopDefault: true,
+      moduleCache: true
+    }
+  );
   return cachedLoader;
 }
 function resolveConfigModuleExport(loadedModule) {
@@ -207,9 +198,7 @@ async function loadModuleExports(filePath) {
 }
 async function collectDatasetsFromFiles(config) {
   const files = await walkDirectory(config.rootDir, config.excludeDirectories);
-  const matched = files.filter(
-    (filePath) => hasOneSuffix(filePath, config.datasetSuffixes)
-  );
+  const matched = files.filter((filePath) => hasOneSuffix(filePath, config.datasetSuffixes));
   const found = await Promise.all(
     matched.map(async (absolutePath) => {
       const exports = await loadModuleExports(absolutePath);
@@ -226,9 +215,7 @@ async function collectDatasetsFromFiles(config) {
 }
 async function collectEvaluatorsFromFiles(config) {
   const files = await walkDirectory(config.rootDir, config.excludeDirectories);
-  const matched = files.filter(
-    (filePath) => hasOneSuffix(filePath, config.evaluatorSuffixes)
-  );
+  const matched = files.filter((filePath) => hasOneSuffix(filePath, config.evaluatorSuffixes));
   const found = await Promise.all(
     matched.map(async (absolutePath) => {
       const exports = await loadModuleExports(absolutePath);
@@ -245,9 +232,7 @@ async function collectEvaluatorsFromFiles(config) {
 }
 async function collectTestCasesFromFiles(config) {
   const files = await walkDirectory(config.rootDir, config.excludeDirectories);
-  const matched = files.filter(
-    (filePath) => hasOneSuffix(filePath, config.testCaseSuffixes)
-  );
+  const matched = files.filter((filePath) => hasOneSuffix(filePath, config.testCaseSuffixes));
   const found = await Promise.all(
     matched.map(async (absolutePath) => {
       const exports = await loadModuleExports(absolutePath);
@@ -319,16 +304,8 @@ function createDiffString(expected, actual, diffOptions) {
   const expectedProcessed = preprocessForDiff(expected, diffOptions);
   const actualProcessed = preprocessForDiff(actual, diffOptions);
   if (diffOptions?.keysOnly) {
-    const expectedKeys = JSON.stringify(
-      extractKeys(expectedProcessed),
-      null,
-      2
-    );
-    const actualKeys = JSON.stringify(
-      extractKeys(actualProcessed),
-      null,
-      2
-    );
+    const expectedKeys = JSON.stringify(extractKeys(expectedProcessed), null, 2);
+    const actualKeys = JSON.stringify(extractKeys(actualProcessed), null, 2);
     const parts2 = diffLines(expectedKeys, actualKeys);
     return formatDiffParts(parts2);
   }
@@ -339,9 +316,7 @@ function createDiffString(expected, actual, diffOptions) {
   }
   const parts = diffLines(expectedStr, actualStr);
   if (diffOptions?.outputNewOnly) {
-    const filtered = parts.filter(
-      (p) => p.added === true
-    );
+    const filtered = parts.filter((p) => p.added === true);
     return formatDiffParts(filtered);
   }
   return formatDiffParts(parts);
@@ -443,10 +418,7 @@ var ScoreAggregate = {
       const count = values.length || 1;
       const result = {};
       for (const field of fields) {
-        result[field] = values.reduce(
-          (s, v) => s + (v[field] ?? 0),
-          0
-        ) / count;
+        result[field] = values.reduce((s, v) => s + (v[field] ?? 0), 0) / count;
       }
       return result;
     };
@@ -480,13 +452,10 @@ var ScoreAggregate = {
           (s, v) => s + (v[valueField] ?? 0),
           0
         );
-        const sumSq = values.reduce(
-          (s, v) => {
-            const value = v[valueField] ?? 0;
-            return s + value * value;
-          },
-          0
-        );
+        const sumSq = values.reduce((s, v) => {
+          const value = v[valueField] ?? 0;
+          return s + value * value;
+        }, 0);
         const mean = sum / count;
         const variance = (sumSq - count * mean * mean) / (count - 1);
         stdDev = variance > 0 ? Math.sqrt(variance) : 0;
@@ -754,20 +723,14 @@ function nowIsoForFile() {
   return (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
 }
 function createArtifactPath(artifactDirectory, datasetId, runId) {
-  return join(
-    artifactDirectory,
-    `${datasetId}_${runId}_${nowIsoForFile()}.jsonl`
-  );
+  return join(artifactDirectory, `${datasetId}_${runId}_${nowIsoForFile()}.jsonl`);
 }
 function processOneEvaluation(task, unit, totalEvaluations, publishEvent, persistenceQueue, updateSnapshot, startedRef, completedRef, passedRef, failedRef, testCaseResultsRef) {
   const { testCaseItem, rerunIndex, rerunTotal } = unit;
   return Effect.gen(function* () {
     const evaluatorRunId = `run-${randomUUID()}`;
     const started = Date.now();
-    const startedEvaluations = yield* Ref.modify(startedRef, (n) => [
-      n + 1,
-      n + 1
-    ]);
+    const startedEvaluations = yield* Ref.modify(startedRef, (n) => [n + 1, n + 1]);
     yield* publishEvent({
       type: "TestCaseStarted",
       runId: task.runId,
@@ -800,9 +763,7 @@ function processOneEvaluation(task, unit, totalEvaluations, publishEvent, persis
         return error;
       };
       try {
-        const ctx = yield* Effect.promise(
-          () => Promise.resolve(evaluator.resolveContext())
-        );
+        const ctx = yield* Effect.promise(() => Promise.resolve(evaluator.resolveContext()));
         const result = yield* Effect.promise(
           () => Promise.resolve().then(
             () => evaluateFn({
@@ -857,10 +818,7 @@ function processOneEvaluation(task, unit, totalEvaluations, publishEvent, persis
       }
     }
     const rerunPassedThis = evaluatorScores.every((s) => s.passed);
-    const completedEvaluations = yield* Ref.modify(completedRef, (n) => [
-      n + 1,
-      n + 1
-    ]);
+    const completedEvaluations = yield* Ref.modify(completedRef, (n) => [n + 1, n + 1]);
     const progressEvent = {
       type: "TestCaseProgress",
       runId: task.runId,
@@ -909,10 +867,7 @@ function processOneEvaluation(task, unit, totalEvaluations, publishEvent, persis
       } else {
         yield* Ref.update(failedRef, (n) => n + 1);
       }
-      const [passed, failed] = yield* Effect.all([
-        Ref.get(passedRef),
-        Ref.get(failedRef)
-      ]);
+      const [passed, failed] = yield* Effect.all([Ref.get(passedRef), Ref.get(failedRef)]);
       yield* updateSnapshot(task.runId, (snapshot) => ({
         ...snapshot,
         passedTestCases: passed,
@@ -1232,15 +1187,11 @@ var EffectRunner = class {
     this.persistenceQueue = Effect.runSync(
       Queue.unbounded()
     );
-    this.snapshotsRef = Effect.runSync(
-      Ref.make(/* @__PURE__ */ new Map())
-    );
+    this.snapshotsRef = Effect.runSync(Ref.make(/* @__PURE__ */ new Map()));
     this.listeners = /* @__PURE__ */ new Set();
     this.datasetsById = /* @__PURE__ */ new Map();
     this.evaluatorsById = /* @__PURE__ */ new Map();
-    this.schedulerFiber = Effect.runFork(
-      this.createSchedulerEffect()
-    );
+    this.schedulerFiber = Effect.runFork(this.createSchedulerEffect());
     this.persistenceFiber = Effect.runFork(
       createPersistenceWorker(this.persistenceQueue)
     );
@@ -1387,9 +1338,9 @@ var EffectRunner = class {
     return Effect.runSync(Ref.get(this.snapshotsRef)).get(runId);
   }
   getAllRunSnapshots() {
-    return Array.from(
-      Effect.runSync(Ref.get(this.snapshotsRef)).values()
-    ).sort((a, b) => b.queuedAt - a.queuedAt);
+    return Array.from(Effect.runSync(Ref.get(this.snapshotsRef)).values()).sort(
+      (a, b) => b.queuedAt - a.queuedAt
+    );
   }
   async loadRunSnapshotsFromArtifacts() {
     return loadRunSnapshotsFromArtifacts(this.config);
@@ -1556,12 +1507,8 @@ function GenerateView({
       const absoluteDatasetPath = resolve5(process.cwd(), dataset.filePath);
       const parsed = parse2(absoluteDatasetPath);
       const outputPath = join4(parsed.dir, `${parsed.name}.cases.json`);
-      await writeFile2(
-        outputPath,
-        `${JSON.stringify(payload, null, 2)}
-`,
-        "utf8"
-      );
+      await writeFile2(outputPath, `${JSON.stringify(payload, null, 2)}
+`, "utf8");
       if (!cancelled) {
         setResult({
           count: payload.length,
@@ -1632,7 +1579,7 @@ async function generateDatasetJsonCommandPlain(runner, datasetName) {
 async function generateDatasetJsonCommandInk(runner, datasetName) {
   return new Promise((resolve5, reject) => {
     const app = render(
-      React2__default.createElement(GenerateView, {
+      React__default.createElement(GenerateView, {
         runner,
         datasetName,
         onComplete: (err) => {
@@ -1717,9 +1664,7 @@ function createBar(value, max = 100, width = 20) {
 function aggregateEvaluatorScores(events, nameById) {
   if (events.length === 0)
     return [];
-  const evaluatorIds = new Set(
-    events.flatMap((e) => e.evaluatorScores.map((x) => x.evaluatorId))
-  );
+  const evaluatorIds = new Set(events.flatMap((e) => e.evaluatorScores.map((x) => x.evaluatorId)));
   const result = [];
   for (const evaluatorId of evaluatorIds) {
     const scoreIdToItems = /* @__PURE__ */ new Map();
@@ -1749,9 +1694,7 @@ function aggregateEvaluatorScores(events, nameById) {
       return es?.passed ?? false;
     });
     const lastEvent = events[events.length - 1];
-    const lastEs = lastEvent?.evaluatorScores.find(
-      (x) => x.evaluatorId === evaluatorId
-    );
+    const lastEs = lastEvent?.evaluatorScores.find((x) => x.evaluatorId === evaluatorId);
     result.push({
       evaluatorId,
       evaluatorName: nameById.get(evaluatorId) ?? evaluatorId,
@@ -1785,9 +1728,7 @@ function RunView({
   concurrency,
   onComplete
 }) {
-  const [phase, setPhase] = useState(
-    "loading"
-  );
+  const [phase, setPhase] = useState("loading");
   const [runInfo, setRunInfo] = useState(null);
   const [testCases, setTestCases] = useState([]);
   const [startedEvaluations, setStartedEvaluations] = useState(0);
@@ -1894,10 +1835,7 @@ function RunView({
             };
             const events = existing ? [...existing.events, newEvent] : [newEvent];
             const isAggregated = events.length > 1;
-            const aggregatedEvaluatorScores = aggregateEvaluatorScores(
-              events,
-              nameById
-            );
+            const aggregatedEvaluatorScores = aggregateEvaluatorScores(events, nameById);
             const merged = {
               name: event.testCaseName,
               testCaseId: event.testCaseId,
@@ -2002,30 +1940,22 @@ function RunView({
           label: `Evaluations ${completedEvaluations}/${runInfo?.totalTestCases ?? 0} completed \u2022 ${startedEvaluations}/${runInfo?.totalTestCases ?? 0} started`
         }
       ),
-      runningEvaluations.length > 0 && /* @__PURE__ */ jsx(Box, { flexDirection: "column", marginTop: 1, children: runningEvaluations.map((item) => /* @__PURE__ */ jsxs(
-        Text,
-        {
-          color: "yellow",
-          children: [
-            "[running ",
-            item.startedTestCases,
-            "/",
-            item.totalTestCases,
-            "]",
-            " ",
-            item.name,
-            " ",
-            /* @__PURE__ */ jsxs(Text, { color: "gray", children: [
-              "(",
-              item.rerunIndex,
-              "/",
-              item.rerunTotal,
-              ")"
-            ] })
-          ]
-        },
-        `${item.testCaseId}:${item.rerunIndex}`
-      )) })
+      runningEvaluations.length > 0 && /* @__PURE__ */ jsx(Box, { flexDirection: "column", marginTop: 1, children: runningEvaluations.map((item) => /* @__PURE__ */ jsxs(Text, { color: "yellow", children: [
+        "[running ",
+        item.startedTestCases,
+        "/",
+        item.totalTestCases,
+        "] ",
+        item.name,
+        " ",
+        /* @__PURE__ */ jsxs(Text, { color: "gray", children: [
+          "(",
+          item.rerunIndex,
+          "/",
+          item.rerunTotal,
+          ")"
+        ] })
+      ] }, `${item.testCaseId}:${item.rerunIndex}`)) })
     ] }),
     testCases.length > 0 && /* @__PURE__ */ jsx(Box, { flexDirection: "column", marginBottom: 1, children: testCases.map((tc) => /* @__PURE__ */ jsxs(Box, { flexDirection: "column", marginBottom: 0, children: [
       /* @__PURE__ */ jsxs(Text, { children: [
@@ -2057,73 +1987,63 @@ function RunView({
         ] }) : null
       ] }),
       tc.errorMessage ? /* @__PURE__ */ jsx(Text, { color: "red", children: tc.errorMessage }) : null,
-      tc.aggregatedEvaluatorScores.map((item) => /* @__PURE__ */ jsxs(
-        Box,
-        {
-          flexDirection: "column",
-          marginLeft: 2,
-          children: [
-            /* @__PURE__ */ jsxs(Text, { children: [
-              item.evaluatorName,
-              ":",
-              " ",
-              /* @__PURE__ */ jsx(Text, { color: item.passed ? "green" : "red", bold: true, children: item.passed ? "PASS" : "FAIL" }),
-              item.metrics && item.metrics.length > 0 ? /* @__PURE__ */ jsxs(Fragment, { children: [
+      tc.aggregatedEvaluatorScores.map((item) => /* @__PURE__ */ jsxs(Box, { flexDirection: "column", marginLeft: 2, children: [
+        /* @__PURE__ */ jsxs(Text, { children: [
+          item.evaluatorName,
+          ":",
+          " ",
+          /* @__PURE__ */ jsx(Text, { color: item.passed ? "green" : "red", bold: true, children: item.passed ? "PASS" : "FAIL" }),
+          item.metrics && item.metrics.length > 0 ? /* @__PURE__ */ jsxs(Fragment, { children: [
+            " ",
+            item.metrics.map((m) => {
+              const def = getMetricById(m.id);
+              if (!def)
+                return null;
+              const formatted = def.format(m.data, {
+                isAggregated: tc.isAggregated
+              });
+              const label = m.name ?? def.name;
+              return /* @__PURE__ */ jsxs(Text, { color: "gray", children: [
+                "[",
+                label ? `${label}: ` : "",
+                formatted,
+                "]",
+                " "
+              ] }, m.id);
+            })
+          ] }) : null
+        ] }),
+        item.scores.length > 0 ? item.scores.map((s, idx) => {
+          const def = s.def ?? getScoreById(s.id);
+          const scoreLabel = s.name ?? def?.name ?? def?.id ?? s.id;
+          return /* @__PURE__ */ jsxs(
+            Text,
+            {
+              color: scoreColor(toNumericScore(s.data) ?? 0),
+              children: [
+                "      ",
+                scoreLabel,
+                ":",
                 " ",
-                item.metrics.map((m) => {
-                  const def = getMetricById(m.id);
-                  if (!def)
-                    return null;
-                  const formatted = def.format(m.data, {
-                    isAggregated: tc.isAggregated
-                  });
-                  const label = m.name ?? def.name;
-                  return /* @__PURE__ */ jsxs(Text, { color: "gray", children: [
-                    "[",
-                    label ? `${label}: ` : "",
-                    formatted,
-                    "]",
-                    " "
-                  ] }, m.id);
+                formatScorePart(s, scoreColor, {
+                  isAggregated: tc.isAggregated
                 })
-              ] }) : null
-            ] }),
-            item.scores.length > 0 ? item.scores.map((s, idx) => {
-              const def = s.def ?? getScoreById(s.id);
-              const scoreLabel = s.name ?? def?.name ?? def?.id ?? s.id;
-              return /* @__PURE__ */ jsxs(
-                Text,
-                {
-                  color: scoreColor(toNumericScore(s.data) ?? 0),
-                  children: [
-                    "      ",
-                    scoreLabel,
-                    ":",
-                    " ",
-                    formatScorePart(s, scoreColor, {
-                      isAggregated: tc.isAggregated
-                    })
-                  ]
-                },
-                `${item.evaluatorId}-${s.id}-${idx}`
-              );
-            }) : /* @__PURE__ */ jsx(Text, { color: "gray", children: " n/a" }),
-            !item.passed && item.logs && item.logs.length > 0 && /* @__PURE__ */ jsx(Box, { marginLeft: 2, flexDirection: "column", children: item.logs.map(
-              (log, logIdx) => log.type === "diff" ? /* @__PURE__ */ jsx(Box, { flexDirection: "column", children: getDiffLines(log).map(
-                ({ type, line }, lineIdx) => /* @__PURE__ */ jsx(
-                  Text,
-                  {
-                    color: type === "remove" ? "red" : type === "add" ? "green" : "gray",
-                    children: line
-                  },
-                  lineIdx
-                )
-              ) }, logIdx) : log.type === "log" ? /* @__PURE__ */ jsx(Box, { flexDirection: "column", children: getLogLines(log).map((line, lineIdx) => /* @__PURE__ */ jsx(Text, { color: "gray", children: line }, lineIdx)) }, logIdx) : null
-            ) })
-          ]
-        },
-        item.evaluatorId
-      ))
+              ]
+            },
+            `${item.evaluatorId}-${s.id}-${idx}`
+          );
+        }) : /* @__PURE__ */ jsx(Text, { color: "gray", children: " n/a" }),
+        !item.passed && item.logs && item.logs.length > 0 && /* @__PURE__ */ jsx(Box, { marginLeft: 2, flexDirection: "column", children: item.logs.map(
+          (log, logIdx) => log.type === "diff" ? /* @__PURE__ */ jsx(Box, { flexDirection: "column", children: getDiffLines(log).map(({ type, line }, lineIdx) => /* @__PURE__ */ jsx(
+            Text,
+            {
+              color: type === "remove" ? "red" : type === "add" ? "green" : "gray",
+              children: line
+            },
+            lineIdx
+          )) }, logIdx) : log.type === "log" ? /* @__PURE__ */ jsx(Box, { flexDirection: "column", children: getLogLines(log).map((line, lineIdx) => /* @__PURE__ */ jsx(Text, { color: "gray", children: line }, lineIdx)) }, logIdx) : null
+        ) })
+      ] }, item.evaluatorId))
     ] }, tc.testCaseId)) }),
     phase === "completed" && summary && /* @__PURE__ */ jsxs(Box, { flexDirection: "column", children: [
       /* @__PURE__ */ jsx(Text, { color: "cyan", bold: true, children: "Run Summary" }),
@@ -2165,9 +2085,9 @@ function RunView({
         /* @__PURE__ */ jsx(Text, { color: "magenta", children: "evaluator averages" }),
         Array.from(evaluatorNameById.entries()).map(([id, name]) => {
           const agg = summary.aggregates.get(id);
-          const scoreKeys = [
-            ...summary.scoreItemsByEvaluatorScore?.keys() ?? []
-          ].filter((k) => k.startsWith(`${id}:`));
+          const scoreKeys = [...summary.scoreItemsByEvaluatorScore?.keys() ?? []].filter(
+            (k) => k.startsWith(`${id}:`)
+          );
           if (scoreKeys.length === 0) {
             return /* @__PURE__ */ jsxs(Text, { color: "gray", children: [
               "- ",
@@ -2197,19 +2117,12 @@ function RunView({
               const label = aggregated.name ?? def?.name ?? def?.id ?? aggregated.id;
               const formatted = def ? def.formatAggregate(aggregated.data) : "n/a";
               const numeric = toNumericScore(aggregated.data);
-              return /* @__PURE__ */ jsxs(
-                Text,
-                {
-                  color: numeric !== void 0 ? scoreColor(numeric) : "gray",
-                  children: [
-                    "    ",
-                    label,
-                    ": ",
-                    formatted
-                  ]
-                },
-                key
-              );
+              return /* @__PURE__ */ jsxs(Text, { color: numeric !== void 0 ? scoreColor(numeric) : "gray", children: [
+                "    ",
+                label,
+                ": ",
+                formatted
+              ] }, key);
             })
           ] }, id);
         })
@@ -2285,9 +2198,7 @@ function buildTestCaseSummaries(byId) {
     for (const evaluatorScores of events[0]?.evaluatorScores ?? []) {
       const scoreIdToItems = /* @__PURE__ */ new Map();
       for (const ev of events) {
-        const es = ev.evaluatorScores.find(
-          (x) => x.evaluatorId === evaluatorScores.evaluatorId
-        );
+        const es = ev.evaluatorScores.find((x) => x.evaluatorId === evaluatorScores.evaluatorId);
         for (const s of es?.scores ?? []) {
           const list = scoreIdToItems.get(s.id) ?? [];
           list.push(s);
@@ -2340,9 +2251,7 @@ function scoreToColor(score) {
 }
 function getEvaluatorSummaryLines(evaluatorId, evaluatorName, aggregate, scoreItemsByKey) {
   const lines = [];
-  const scoreKeys = [...scoreItemsByKey.keys()].filter(
-    (k) => k.startsWith(`${evaluatorId}:`)
-  );
+  const scoreKeys = [...scoreItemsByKey.keys()].filter((k) => k.startsWith(`${evaluatorId}:`));
   if (scoreKeys.length === 0) {
     lines.push(`- ${evaluatorName.padEnd(28)} no scores`);
     return lines;
@@ -2377,9 +2286,7 @@ function createBar2(value, max = 100, width = 20) {
 function aggregateEvaluatorScoresFromEvents(events, _evaluatorNameById) {
   if (events.length === 0)
     return [];
-  const evaluatorIds = new Set(
-    events.flatMap((e) => e.evaluatorScores.map((x) => x.evaluatorId))
-  );
+  const evaluatorIds = new Set(events.flatMap((e) => e.evaluatorScores.map((x) => x.evaluatorId)));
   const result = [];
   for (const evaluatorId of evaluatorIds) {
     const scoreIdToItems = /* @__PURE__ */ new Map();
@@ -2426,9 +2333,7 @@ function formatEvaluatorScoreLine(name, scores, passed, metrics, options) {
       if (def) {
         const formatted = def.format(m.data, options);
         const label = m.name ?? def.name;
-        metricParts.push(
-          label ? `[${label}: ${formatted}]` : `[${formatted}]`
-        );
+        metricParts.push(label ? `[${label}: ${formatted}]` : `[${formatted}]`);
       }
     }
   }
@@ -2602,10 +2507,7 @@ async function runSimpleEvalCommandPlain(runner, datasetName, evaluatorPattern,
         const aggregatedScores = aggregateEvaluatorScoresFromEvents(
           existing.events);
         const isAggregated = existing.events.length > 1;
-        const durationMs = existing.events.reduce(
-          (s, e) => s + e.durationMs,
-          0
-        );
+        const durationMs = existing.events.reduce((s, e) => s + e.durationMs, 0);
         const lines = [];
         const statusSuffix = event.errorMessage ? ` ${colorize("ERROR", `${ansi2.bold}${ansi2.red}`)}` : "";
         lines.push(
@@ -2617,18 +2519,12 @@ async function runSimpleEvalCommandPlain(runner, datasetName, evaluatorPattern,
         for (const item of aggregatedScores) {
           const name = evaluatorNameById.get(item.evaluatorId) ?? item.evaluatorId;
           lines.push(
-            ...formatEvaluatorScoreLine(
-              name,
-              item.scores,
-              item.passed,
-              item.metrics,
-              { isAggregated }
-            )
+            ...formatEvaluatorScoreLine(name, item.scores, item.passed, item.metrics, {
+              isAggregated
+            })
           );
           const lastEvent = existing.events[existing.events.length - 1];
-          const lastEs = lastEvent?.evaluatorScores.find(
-            (x) => x.evaluatorId === item.evaluatorId
-          );
+          const lastEs = lastEvent?.evaluatorScores.find((x) => x.evaluatorId === item.evaluatorId);
           if (!item.passed && lastEs?.logs && lastEs.logs.length > 0) {
             for (const log of lastEs.logs) {
               if (log.type === "diff") {
@@ -2675,9 +2571,7 @@ async function runSimpleEvalCommandPlain(runner, datasetName, evaluatorPattern,
   console.log(
     `Evaluators: ${evaluators.map((item) => item.evaluator.getName() ?? item.id).join(", ")}`
   );
-  console.log(
-    `Total test cases: ${colorize(String(snapshot.totalTestCases), ansi2.bold)}`
-  );
+  console.log(`Total test cases: ${colorize(String(snapshot.totalTestCases), ansi2.bold)}`);
   console.log("");
   drawSpinner();
   spinnerTimer = setInterval(drawSpinner, 100);
@@ -2692,10 +2586,7 @@ async function runSimpleEvalCommandPlain(runner, datasetName, evaluatorPattern,
   console.log("");
   console.log(colorize("=== Run Summary ===", `${ansi2.bold}${ansi2.cyan}`));
   console.log(
-    `- passed: ${colorize(
-      `${completed.passedTestCases}/${completed.totalTestCases}`,
-      ansi2.green
-    )}`
+    `- passed: ${colorize(`${completed.passedTestCases}/${completed.totalTestCases}`, ansi2.green)}`
   );
   console.log(
     `- failed: ${colorize(
@@ -2705,11 +2596,7 @@ async function runSimpleEvalCommandPlain(runner, datasetName, evaluatorPattern,
   );
   if (overallScoreCount > 0) {
     const overallAverage = overallScoreTotal / overallScoreCount;
-    const overallSd = sampleStdDev2(
-      overallScoreTotal,
-      overallScoreSumSq,
-      overallScoreCount
-    );
+    const overallSd = sampleStdDev2(overallScoreTotal, overallScoreSumSq, overallScoreCount);
     const avgStr = overallSd !== void 0 ? `${overallAverage.toFixed(2)} \xB1 ${overallSd.toFixed(2)}` : overallAverage.toFixed(2);
     console.log(
       `- overall avg score: ${colorize(
@@ -2758,7 +2645,7 @@ async function runSimpleEvalCommandPlain(runner, datasetName, evaluatorPattern,
 async function runSimpleEvalCommandInk(runner, datasetName, evaluatorPattern, concurrency) {
   return new Promise((resolve5, reject) => {
     const app = render(
-      React2.createElement(RunView, {
+      React.createElement(RunView, {
         runner,
         datasetName,
         evaluatorPattern,