npm - @m4trix/evals - Versions diffs - 0.24.0 → 0.25.1 - Mend

@m4trix/evals 0.24.0 → 0.25.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

package/dist/cli.js CHANGED Viewed

@@ -1,6 +1,6 @@
 #!/usr/bin/env node
 import { withFullScreen, useScreenSize } from 'fullscreen-ink';
-import React2, { useState, useRef, useReducer, useEffect, useMemo } from 'react';
+import React, { useState, useRef, useReducer, useEffect, useMemo } from 'react';
 import { useApp, useInput, Box, Text } from 'ink';
 import { jsx, jsxs, Fragment } from 'react/jsx-runtime';
 import { resolve, relative, join, dirname } from 'path';
@@ -90,11 +90,7 @@ function getFooterText(state) {
   }
   return "\u2191\u2193 move  Enter add/remove  S start run  / search  Esc cancel  q quit";
 }
-function ListItem({
-  selected,
-  label,
-  itemKey
-}) {
+function ListItem({ selected, label, itemKey }) {
   return /* @__PURE__ */ jsxs(Text, { color: selected ? "cyan" : "gray", bold: selected, children: [
     selected ? "\u25B8 " : "  ",
     label
@@ -121,9 +117,7 @@ function Pane({
     }
   );
 }
-function SectionHeader({
-  children
-}) {
+function SectionHeader({ children }) {
   return /* @__PURE__ */ jsx(Text, { color: "cyan", bold: true, children });
 }
 function StatusText({ status }) {
@@ -135,10 +129,7 @@ function StatusText({ status }) {
   ] });
 }
 var LEFT_PANE_WIDTH = 44;
-function RunsSidebar({
-  state,
-  runs
-}) {
+function RunsSidebar({ state, runs }) {
   const focused = state.focus === "left";
   return /* @__PURE__ */ jsxs(Pane, { width: LEFT_PANE_WIDTH, focused, children: [
     /* @__PURE__ */ jsx(SectionHeader, { children: "Runs" }),
@@ -167,11 +158,7 @@ function RunsSidebar({
   ] });
 }
 var BLOCKS = ["\u2581", "\u2582", "\u2583", "\u2584", "\u2585", "\u2586", "\u2587", "\u2588"];
-function Sparkline({
-  data,
-  width,
-  label
-}) {
+function Sparkline({ data, width, label }) {
   if (data.length === 0)
     return null;
   const max = Math.max(...data);
@@ -401,9 +388,7 @@ var data_mock_default = {
             { name: "contract_match", score: 100 },
             { name: "arg_validity", score: 100 }
           ],
-          checks: [
-            { name: "tool_calls", passed: true, detail: "0 unexpected" }
-          ],
+          checks: [{ name: "tool_calls", passed: true, detail: "0 unexpected" }],
           failures: [],
           meta: {
             model: "gpt-4o-mini",
@@ -426,9 +411,21 @@ var data_mock_default = {
     }
   ],
   evaluators: [
-    { id: "json-schema-validator", name: "JSON Schema Validator", configPreview: "strict=true" },
-    { id: "tool-call-contract-checker", name: "Tool-call Contract Checker", configPreview: "unexpectedCalls=error" },
-    { id: "rubric-judge", name: "Rubric Judge (LLM)", configPreview: "model=gpt-4o-mini; scale=0-100" },
+    {
+      id: "json-schema-validator",
+      name: "JSON Schema Validator",
+      configPreview: "strict=true"
+    },
+    {
+      id: "tool-call-contract-checker",
+      name: "Tool-call Contract Checker",
+      configPreview: "unexpectedCalls=error"
+    },
+    {
+      id: "rubric-judge",
+      name: "Rubric Judge (LLM)",
+      configPreview: "model=gpt-4o-mini; scale=0-100"
+    },
     { id: "pii-leak-detector", name: "PII Leak Detector", configPreview: "redact=false" }
   ]
 };
@@ -508,9 +505,7 @@ async function loadRunnerData(runner) {
   const memSnapshots = runner.getAllRunSnapshots();
   const seen = new Set(memSnapshots.map((s) => s.runId));
   const fromDisk = diskSnapshots.filter((s) => !seen.has(s.runId));
-  const snapshots = [...memSnapshots, ...fromDisk].sort(
-    (a, b) => b.queuedAt - a.queuedAt
-  );
+  const snapshots = [...memSnapshots, ...fromDisk].sort((a, b) => b.queuedAt - a.queuedAt);
   if (datasets.length === 0 && evaluators.length === 0) {
     return loadMockData();
   }
@@ -632,7 +627,11 @@ function reduceCliState(state, action) {
       return { ...state, overviewScrollOffset: Math.max(0, state.overviewScrollOffset - 1) };
     }
     if (state.level === "datasets") {
-      return { ...state, datasetMenuIndex: Math.max(0, state.datasetMenuIndex - 1), overviewScrollOffset: 0 };
+      return {
+        ...state,
+        datasetMenuIndex: Math.max(0, state.datasetMenuIndex - 1),
+        overviewScrollOffset: 0
+      };
     }
     if (state.level === "runs") {
       return { ...state, runMenuIndex: Math.max(0, state.runMenuIndex - 1) };
@@ -650,10 +649,17 @@ function reduceCliState(state, action) {
       return { ...state, detailsScrollOffset: Math.min(action.max, state.detailsScrollOffset + 1) };
     }
     if (state.level === "datasets" && state.focus === "right") {
-      return { ...state, overviewScrollOffset: Math.min(action.max, state.overviewScrollOffset + 1) };
+      return {
+        ...state,
+        overviewScrollOffset: Math.min(action.max, state.overviewScrollOffset + 1)
+      };
     }
     if (state.level === "datasets") {
-      return { ...state, datasetMenuIndex: Math.min(action.max, state.datasetMenuIndex + 1), overviewScrollOffset: 0 };
+      return {
+        ...state,
+        datasetMenuIndex: Math.min(action.max, state.datasetMenuIndex + 1),
+        overviewScrollOffset: 0
+      };
     }
     if (state.level === "runs") {
       return { ...state, runMenuIndex: Math.min(action.max, state.runMenuIndex + 1) };
@@ -735,18 +741,8 @@ var defaultRunnerConfig = {
   discovery: {
     rootDir: process.cwd(),
     datasetSuffixes: [".dataset.ts", ".dataset.tsx", ".dataset.js", ".dataset.mjs"],
-    evaluatorSuffixes: [
-      ".evaluator.ts",
-      ".evaluator.tsx",
-      ".evaluator.js",
-      ".evaluator.mjs"
-    ],
-    testCaseSuffixes: [
-      ".test-case.ts",
-      ".test-case.tsx",
-      ".test-case.js",
-      ".test-case.mjs"
-    ],
+    evaluatorSuffixes: [".evaluator.ts", ".evaluator.tsx", ".evaluator.js", ".evaluator.mjs"],
+    testCaseSuffixes: [".test-case.ts", ".test-case.tsx", ".test-case.js", ".test-case.mjs"],
     excludeDirectories: ["node_modules", "dist", ".next", ".git", ".pnpm-store"]
   },
   artifactDirectory: ".eval-results",
@@ -813,14 +809,15 @@ function getJitiLoader() {
   }
   const createJiti2 = jitiModule.createJiti ?? jitiModule.default;
   if (typeof createJiti2 !== "function") {
-    throw new Error(
-      "Failed to initialize jiti for m4trix eval config loading."
-    );
+    throw new Error("Failed to initialize jiti for m4trix eval config loading.");
   }
-  cachedLoader = createJiti2(import.meta.url, {
-    interopDefault: true,
-    moduleCache: true
-  });
+  cachedLoader = createJiti2(
+    import.meta.url,
+    {
+      interopDefault: true,
+      moduleCache: true
+    }
+  );
   return cachedLoader;
 }
 function resolveConfigModuleExport(loadedModule) {
@@ -924,9 +921,7 @@ async function loadModuleExports(filePath) {
 }
 async function collectDatasetsFromFiles(config) {
   const files = await walkDirectory(config.rootDir, config.excludeDirectories);
-  const matched = files.filter(
-    (filePath) => hasOneSuffix(filePath, config.datasetSuffixes)
-  );
+  const matched = files.filter((filePath) => hasOneSuffix(filePath, config.datasetSuffixes));
   const found = await Promise.all(
     matched.map(async (absolutePath) => {
       const exports = await loadModuleExports(absolutePath);
@@ -943,9 +938,7 @@ async function collectDatasetsFromFiles(config) {
 }
 async function collectEvaluatorsFromFiles(config) {
   const files = await walkDirectory(config.rootDir, config.excludeDirectories);
-  const matched = files.filter(
-    (filePath) => hasOneSuffix(filePath, config.evaluatorSuffixes)
-  );
+  const matched = files.filter((filePath) => hasOneSuffix(filePath, config.evaluatorSuffixes));
   const found = await Promise.all(
     matched.map(async (absolutePath) => {
       const exports = await loadModuleExports(absolutePath);
@@ -962,9 +955,7 @@ async function collectEvaluatorsFromFiles(config) {
 }
 async function collectTestCasesFromFiles(config) {
   const files = await walkDirectory(config.rootDir, config.excludeDirectories);
-  const matched = files.filter(
-    (filePath) => hasOneSuffix(filePath, config.testCaseSuffixes)
-  );
+  const matched = files.filter((filePath) => hasOneSuffix(filePath, config.testCaseSuffixes));
   const found = await Promise.all(
     matched.map(async (absolutePath) => {
       const exports = await loadModuleExports(absolutePath);
@@ -1036,16 +1027,8 @@ function createDiffString(expected, actual, diffOptions) {
   const expectedProcessed = preprocessForDiff(expected, diffOptions);
   const actualProcessed = preprocessForDiff(actual, diffOptions);
   if (diffOptions?.keysOnly) {
-    const expectedKeys = JSON.stringify(
-      extractKeys(expectedProcessed),
-      null,
-      2
-    );
-    const actualKeys = JSON.stringify(
-      extractKeys(actualProcessed),
-      null,
-      2
-    );
+    const expectedKeys = JSON.stringify(extractKeys(expectedProcessed), null, 2);
+    const actualKeys = JSON.stringify(extractKeys(actualProcessed), null, 2);
     const parts2 = diffLines(expectedKeys, actualKeys);
     return formatDiffParts(parts2);
   }
@@ -1056,9 +1039,7 @@ function createDiffString(expected, actual, diffOptions) {
   }
   const parts = diffLines(expectedStr, actualStr);
   if (diffOptions?.outputNewOnly) {
-    const filtered = parts.filter(
-      (p) => p.added === true
-    );
+    const filtered = parts.filter((p) => p.added === true);
     return formatDiffParts(filtered);
   }
   return formatDiffParts(parts);
@@ -1160,10 +1141,7 @@ var ScoreAggregate = {
       const count = values.length || 1;
       const result = {};
       for (const field of fields) {
-        result[field] = values.reduce(
-          (s, v) => s + (v[field] ?? 0),
-          0
-        ) / count;
+        result[field] = values.reduce((s, v) => s + (v[field] ?? 0), 0) / count;
       }
       return result;
     };
@@ -1197,13 +1175,10 @@ var ScoreAggregate = {
           (s, v) => s + (v[valueField] ?? 0),
           0
         );
-        const sumSq = values.reduce(
-          (s, v) => {
-            const value = v[valueField] ?? 0;
-            return s + value * value;
-          },
-          0
-        );
+        const sumSq = values.reduce((s, v) => {
+          const value = v[valueField] ?? 0;
+          return s + value * value;
+        }, 0);
         const mean = sum / count;
         const variance = (sumSq - count * mean * mean) / (count - 1);
         stdDev = variance > 0 ? Math.sqrt(variance) : 0;
@@ -1434,20 +1409,14 @@ function nowIsoForFile() {
   return (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
 }
 function createArtifactPath(artifactDirectory, datasetId, runId) {
-  return join(
-    artifactDirectory,
-    `${datasetId}_${runId}_${nowIsoForFile()}.jsonl`
-  );
+  return join(artifactDirectory, `${datasetId}_${runId}_${nowIsoForFile()}.jsonl`);
 }
 function processOneEvaluation(task, unit, totalEvaluations, publishEvent, persistenceQueue, updateSnapshot, startedRef, completedRef, passedRef, failedRef, testCaseResultsRef) {
   const { testCaseItem, rerunIndex, rerunTotal } = unit;
   return Effect.gen(function* () {
     const evaluatorRunId = `run-${randomUUID()}`;
     const started = Date.now();
-    const startedEvaluations = yield* Ref.modify(startedRef, (n) => [
-      n + 1,
-      n + 1
-    ]);
+    const startedEvaluations = yield* Ref.modify(startedRef, (n) => [n + 1, n + 1]);
     yield* publishEvent({
       type: "TestCaseStarted",
       runId: task.runId,
@@ -1480,9 +1449,7 @@ function processOneEvaluation(task, unit, totalEvaluations, publishEvent, persis
         return error;
       };
       try {
-        const ctx = yield* Effect.promise(
-          () => Promise.resolve(evaluator.resolveContext())
-        );
+        const ctx = yield* Effect.promise(() => Promise.resolve(evaluator.resolveContext()));
         const result = yield* Effect.promise(
           () => Promise.resolve().then(
             () => evaluateFn({
@@ -1537,10 +1504,7 @@ function processOneEvaluation(task, unit, totalEvaluations, publishEvent, persis
       }
     }
     const rerunPassedThis = evaluatorScores.every((s) => s.passed);
-    const completedEvaluations = yield* Ref.modify(completedRef, (n) => [
-      n + 1,
-      n + 1
-    ]);
+    const completedEvaluations = yield* Ref.modify(completedRef, (n) => [n + 1, n + 1]);
     const progressEvent = {
       type: "TestCaseProgress",
       runId: task.runId,
@@ -1589,10 +1553,7 @@ function processOneEvaluation(task, unit, totalEvaluations, publishEvent, persis
       } else {
         yield* Ref.update(failedRef, (n) => n + 1);
       }
-      const [passed, failed] = yield* Effect.all([
-        Ref.get(passedRef),
-        Ref.get(failedRef)
-      ]);
+      const [passed, failed] = yield* Effect.all([Ref.get(passedRef), Ref.get(failedRef)]);
       yield* updateSnapshot(task.runId, (snapshot) => ({
         ...snapshot,
         passedTestCases: passed,
@@ -1942,15 +1903,11 @@ var EffectRunner = class {
     this.persistenceQueue = Effect.runSync(
       Queue.unbounded()
     );
-    this.snapshotsRef = Effect.runSync(
-      Ref.make(/* @__PURE__ */ new Map())
-    );
+    this.snapshotsRef = Effect.runSync(Ref.make(/* @__PURE__ */ new Map()));
     this.listeners = /* @__PURE__ */ new Set();
     this.datasetsById = /* @__PURE__ */ new Map();
     this.evaluatorsById = /* @__PURE__ */ new Map();
-    this.schedulerFiber = Effect.runFork(
-      this.createSchedulerEffect()
-    );
+    this.schedulerFiber = Effect.runFork(this.createSchedulerEffect());
     this.persistenceFiber = Effect.runFork(
       createPersistenceWorker(this.persistenceQueue)
     );
@@ -2097,9 +2054,9 @@ var EffectRunner = class {
     return Effect.runSync(Ref.get(this.snapshotsRef)).get(runId);
   }
   getAllRunSnapshots() {
-    return Array.from(
-      Effect.runSync(Ref.get(this.snapshotsRef)).values()
-    ).sort((a, b) => b.queuedAt - a.queuedAt);
+    return Array.from(Effect.runSync(Ref.get(this.snapshotsRef)).values()).sort(
+      (a, b) => b.queuedAt - a.queuedAt
+    );
   }
   async loadRunSnapshotsFromArtifacts() {
     return loadRunSnapshotsFromArtifacts(this.config);
@@ -2315,11 +2272,7 @@ function DatasetsView({
     ] })
   ] });
 }
-function RunsView({
-  state,
-  dataset,
-  selectedRun
-}) {
+function RunsView({ state, dataset, selectedRun }) {
   const runs = dataset?.runs ?? [];
   const rightFocused = state.focus === "right";
   return /* @__PURE__ */ jsxs(Fragment, { children: [
@@ -2335,10 +2288,10 @@ function RunsView({
       /* @__PURE__ */ jsxs(Text, { color: "gray", children: [
         "Commit: ",
         selectedRun.meta.commit,
-        "  Branch: ",
+        " Branch: ",
         selectedRun.meta.branch,
+        " Seed:",
         " ",
-        "Seed: ",
         selectedRun.meta.seed
       ] }),
       /* @__PURE__ */ jsx(Text, { children: " " }),
@@ -2351,23 +2304,10 @@ function RunsView({
           format: (v) => `${v}%`
         }
       ),
-      /* @__PURE__ */ jsx(
-        TextBar,
-        {
-          label: "avg score",
-          value: Math.round(selectedRun.performance.avgScore * 100)
-        }
-      ),
+      /* @__PURE__ */ jsx(TextBar, { label: "avg score", value: Math.round(selectedRun.performance.avgScore * 100) }),
       /* @__PURE__ */ jsx(Text, { children: " " }),
       /* @__PURE__ */ jsx(SectionHeader, { children: "Dimensions" }),
-      selectedRun.dimensions.map((dimension) => /* @__PURE__ */ jsx(
-        TextBar,
-        {
-          label: dimension.name,
-          value: dimension.score
-        },
-        dimension.name
-      )),
+      selectedRun.dimensions.map((dimension) => /* @__PURE__ */ jsx(TextBar, { label: dimension.name, value: dimension.score }, dimension.name)),
       /* @__PURE__ */ jsx(Text, { children: " " }),
       /* @__PURE__ */ jsx(SectionHeader, { children: "Latency trend" }),
       /* @__PURE__ */ jsx(
@@ -2470,15 +2410,7 @@ function buildDetailRows(run, testCases, evaluatorNameById) {
     ...dimensions.map((d) => /* @__PURE__ */ jsx(TextBar, { label: d.name, value: d.score }, `dim-${d.name}`)),
     /* @__PURE__ */ jsx(Text, { children: " " }, "sp2"),
     /* @__PURE__ */ jsx(SectionHeader, { children: "Checks (boolean)" }, "checks-h"),
-    ...checks.map((c) => /* @__PURE__ */ jsx(
-      CheckRow,
-      {
-        name: c.name,
-        passed: c.passed,
-        detail: c.detail
-      },
-      `chk-${c.name}`
-    )),
+    ...checks.map((c) => /* @__PURE__ */ jsx(CheckRow, { name: c.name, passed: c.passed, detail: c.detail }, `chk-${c.name}`)),
     /* @__PURE__ */ jsx(Text, { children: " " }, "sp3"),
     /* @__PURE__ */ jsx(SectionHeader, { children: "Performance" }, "perf-h"),
     /* @__PURE__ */ jsx(
@@ -2595,17 +2527,10 @@ function buildDetailRows(run, testCases, evaluatorNameById) {
           }
         } else {
           rows.push(
-            /* @__PURE__ */ jsxs(
-              Text,
-              {
-                color: "gray",
-                children: [
-                  "      ",
-                  "n/a"
-                ]
-              },
-              `tc-${tc.testCaseId}-${item.evaluatorId}-n/a`
-            )
+            /* @__PURE__ */ jsxs(Text, { color: "gray", children: [
+              "      ",
+              "n/a"
+            ] }, `tc-${tc.testCaseId}-${item.evaluatorId}-n/a`)
           );
         }
         if (!item.passed && item.logs && item.logs.length > 0) {
@@ -2663,7 +2588,7 @@ function RunDetailsView({
   const runs = dataset?.runs ?? [];
   const rightFocused = state.focus === "right";
   const [testCases, setTestCases] = useState([]);
-  const evaluatorNameById = React2.useMemo(
+  const evaluatorNameById = React.useMemo(
     () => new Map(evaluators.map((e) => [e.id, e.name])),
     [evaluators]
   );
@@ -2686,7 +2611,7 @@ function RunDetailsView({
   const visible = rows.slice(offset, offset + DETAILS_PAGE_SIZE);
   return /* @__PURE__ */ jsxs(Fragment, { children: [
     /* @__PURE__ */ jsx(RunsSidebar, { state, dataset, runs }),
-    /* @__PURE__ */ jsx(Pane, { flexGrow: 1, marginLeft: 1, focused: rightFocused, children: /* @__PURE__ */ jsx(Box, { flexDirection: "column", children: visible.map((row, i) => /* @__PURE__ */ jsx(React2.Fragment, { children: row }, i)) }) })
+    /* @__PURE__ */ jsx(Pane, { flexGrow: 1, marginLeft: 1, focused: rightFocused, children: /* @__PURE__ */ jsx(Box, { flexDirection: "column", children: visible.map((row, i) => /* @__PURE__ */ jsx(React.Fragment, { children: row }, i)) }) })
   ] });
 }
 var LEFT_PANE_WIDTH3 = 44;
@@ -2709,19 +2634,11 @@ function NewEvaluationView({
       visibleEvaluators.map((evaluator, index) => {
         const selected = index === state.evaluatorMenuIndex;
         const inSelection = state.selectedEvaluatorIds.includes(evaluator.id);
-        return /* @__PURE__ */ jsxs(
-          Text,
-          {
-            color: selected ? "cyan" : "gray",
-            bold: selected,
-            children: [
-              selected ? "\u25B8 " : "  ",
-              inSelection ? "[x] " : "[ ] ",
-              evaluator.name
-            ]
-          },
-          evaluator.id
-        );
+        return /* @__PURE__ */ jsxs(Text, { color: selected ? "cyan" : "gray", bold: selected, children: [
+          selected ? "\u25B8 " : "  ",
+          inSelection ? "[x] " : "[ ] ",
+          evaluator.name
+        ] }, evaluator.id);
       })
     ] }),
     /* @__PURE__ */ jsxs(Pane, { flexGrow: 1, marginLeft: 1, focused: rightFocused, children: [
@@ -2753,26 +2670,16 @@ function clampCursor(state, filteredDatasetsLength, selectedRunCount) {
     ...state,
     datasetMenuIndex: Math.max(0, Math.min(state.datasetMenuIndex, datasetMax)),
     runMenuIndex: Math.max(0, Math.min(state.runMenuIndex, runMax)),
-    evaluatorMenuIndex: Math.max(
-      0,
-      Math.min(state.evaluatorMenuIndex, evaluatorMax)
-    )
+    evaluatorMenuIndex: Math.max(0, Math.min(state.evaluatorMenuIndex, evaluatorMax))
   };
 }
-function EvalsCliApp({
-  data,
-  args,
-  runner
-}) {
+function EvalsCliApp({ data, args, runner }) {
   const { exit } = useApp();
   const { width: stdoutWidth, height: stdoutHeight } = useScreenSize();
   const [liveData, setLiveData] = useState(data);
   const [runtimeMessage, setRuntimeMessage] = useState();
   const overviewRowCountRef = useRef(0);
-  const [state, dispatch] = useReducer(
-    reduceCliState,
-    createInitialState(data, args)
-  );
+  const [state, dispatch] = useReducer(reduceCliState, createInitialState(data, args));
   useEffect(() => {
     setLiveData(data);
   }, [data]);
@@ -2804,14 +2711,8 @@ function EvalsCliApp({
     filteredDatasets.length,
     getDatasetByMenuIndex(filteredDatasets, state.datasetMenuIndex)?.runs.length ?? 0
   );
-  const selectedDataset = getDatasetByMenuIndex(
-    filteredDatasets,
-    clampedState.datasetMenuIndex
-  );
-  const selectedRun = getRunByMenuIndex(
-    selectedDataset,
-    clampedState.runMenuIndex
-  );
+  const selectedDataset = getDatasetByMenuIndex(filteredDatasets, clampedState.datasetMenuIndex);
+  const selectedRun = getRunByMenuIndex(selectedDataset, clampedState.runMenuIndex);
   const visibleEvaluators = liveData.evaluators.filter(
     (evaluator) => evaluator.name.toLowerCase().includes(clampedState.searchQuery.toLowerCase())
   );
@@ -2905,9 +2806,7 @@ function EvalsCliApp({
           `Started ${snapshot.runId} on ${selectedDataset.name} (${snapshot.totalTestCases} cases).`
         );
       }).catch((error) => {
-        setRuntimeMessage(
-          error instanceof Error ? error.message : "Failed to start evaluation."
-        );
+        setRuntimeMessage(error instanceof Error ? error.message : "Failed to start evaluation.");
       });
     }
   });
@@ -2934,14 +2833,7 @@ function EvalsCliApp({
       );
     }
     if (clampedState.level === "runs") {
-      return /* @__PURE__ */ jsx(
-        RunsView,
-        {
-          state: clampedState,
-          dataset: selectedDataset,
-          selectedRun
-        }
-      );
+      return /* @__PURE__ */ jsx(RunsView, { state: clampedState, dataset: selectedDataset, selectedRun });
     }
     return /* @__PURE__ */ jsx(
       RunDetailsView,
@@ -2953,82 +2845,44 @@ function EvalsCliApp({
       }
     );
   };
-  return /* @__PURE__ */ jsxs(
-    Box,
-    {
-      flexDirection: "column",
-      flexGrow: 1,
-      width: stdoutWidth,
-      height: stdoutHeight,
-      children: [
-        /* @__PURE__ */ jsx(
-          Box,
-          {
-            borderStyle: "round",
-            borderColor: "cyan",
-            paddingX: 1,
-            width: stdoutWidth,
-            children: /* @__PURE__ */ jsx(Text, { children: getBreadcrumbText(
-              clampedState,
-              selectedDataset?.name,
-              selectedRun?.label
-            ) })
-          }
-        ),
-        clampedState.startupWarnings.length > 0 && /* @__PURE__ */ jsxs(
-          Box,
-          {
-            marginTop: 1,
-            borderStyle: "round",
-            borderColor: "yellow",
-            paddingX: 1,
-            flexDirection: "column",
-            width: stdoutWidth,
-            children: [
-              /* @__PURE__ */ jsx(Text, { color: "yellow", children: "Startup warnings:" }),
-              clampedState.startupWarnings.map((warning, index) => /* @__PURE__ */ jsx(Text, { children: warning }, `${warning}-${index}`))
-            ]
-          }
-        ),
-        clampedState.searchMode && /* @__PURE__ */ jsxs(
-          Box,
-          {
-            marginTop: 1,
-            borderStyle: "round",
-            borderColor: "magenta",
-            paddingX: 1,
-            width: stdoutWidth,
-            children: [
-              /* @__PURE__ */ jsx(Text, { color: "magenta", bold: true, children: "Search: " }),
-              /* @__PURE__ */ jsx(Text, { color: "white", children: clampedState.searchQuery })
-            ]
-          }
-        ),
-        runtimeMessage && /* @__PURE__ */ jsx(
-          Box,
-          {
-            marginTop: 1,
-            borderStyle: "round",
-            borderColor: "blue",
-            paddingX: 1,
-            width: stdoutWidth,
-            children: /* @__PURE__ */ jsx(Text, { color: "blue", children: runtimeMessage })
-          }
-        ),
-        /* @__PURE__ */ jsx(
-          Box,
-          {
-            marginTop: 1,
-            flexGrow: 1,
-            width: stdoutWidth,
-            flexDirection: "row",
-            children: renderContent()
-          }
-        ),
-        /* @__PURE__ */ jsx(Box, { marginTop: 1, paddingX: 1, children: /* @__PURE__ */ jsx(Text, { color: "gray", children: getFooterText(clampedState) }) })
-      ]
-    }
-  );
+  return /* @__PURE__ */ jsxs(Box, { flexDirection: "column", flexGrow: 1, width: stdoutWidth, height: stdoutHeight, children: [
+    /* @__PURE__ */ jsx(Box, { borderStyle: "round", borderColor: "cyan", paddingX: 1, width: stdoutWidth, children: /* @__PURE__ */ jsx(Text, { children: getBreadcrumbText(clampedState, selectedDataset?.name, selectedRun?.label) }) }),
+    clampedState.startupWarnings.length > 0 && /* @__PURE__ */ jsxs(
+      Box,
+      {
+        marginTop: 1,
+        borderStyle: "round",
+        borderColor: "yellow",
+        paddingX: 1,
+        flexDirection: "column",
+        width: stdoutWidth,
+        children: [
+          /* @__PURE__ */ jsx(Text, { color: "yellow", children: "Startup warnings:" }),
+          clampedState.startupWarnings.map((warning, index) => /* @__PURE__ */ jsx(Text, { children: warning }, `${warning}-${index}`))
+        ]
+      }
+    ),
+    clampedState.searchMode && /* @__PURE__ */ jsxs(
+      Box,
+      {
+        marginTop: 1,
+        borderStyle: "round",
+        borderColor: "magenta",
+        paddingX: 1,
+        width: stdoutWidth,
+        children: [
+          /* @__PURE__ */ jsxs(Text, { color: "magenta", bold: true, children: [
+            "Search:",
+            " "
+          ] }),
+          /* @__PURE__ */ jsx(Text, { color: "white", children: clampedState.searchQuery })
+        ]
+      }
+    ),
+    runtimeMessage && /* @__PURE__ */ jsx(Box, { marginTop: 1, borderStyle: "round", borderColor: "blue", paddingX: 1, width: stdoutWidth, children: /* @__PURE__ */ jsx(Text, { color: "blue", children: runtimeMessage }) }),
+    /* @__PURE__ */ jsx(Box, { marginTop: 1, flexGrow: 1, width: stdoutWidth, flexDirection: "row", children: renderContent() }),
+    /* @__PURE__ */ jsx(Box, { marginTop: 1, paddingX: 1, children: /* @__PURE__ */ jsx(Text, { color: "gray", children: getFooterText(clampedState) }) })
+  ] });
 }
 async function main() {
   const args = parseStartupArgs(process.argv.slice(2));
@@ -3040,9 +2894,7 @@ async function main() {
   process.on("SIGTERM", () => {
     void runner.shutdown().finally(() => process.exit(0));
   });
-  withFullScreen(
-    /* @__PURE__ */ jsx(EvalsCliApp, { data, args, runner })
-  ).start();
+  withFullScreen(/* @__PURE__ */ jsx(EvalsCliApp, { data, args, runner })).start();
 }
 void main();
 //# sourceMappingURL=out.js.map