@m4trix/evals 0.24.0 → 0.25.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -1,6 +1,6 @@
1
1
  #!/usr/bin/env node
2
2
  import { withFullScreen, useScreenSize } from 'fullscreen-ink';
3
- import React2, { useState, useRef, useReducer, useEffect, useMemo } from 'react';
3
+ import React, { useState, useRef, useReducer, useEffect, useMemo } from 'react';
4
4
  import { useApp, useInput, Box, Text } from 'ink';
5
5
  import { jsx, jsxs, Fragment } from 'react/jsx-runtime';
6
6
  import { resolve, relative, join, dirname } from 'path';
@@ -90,11 +90,7 @@ function getFooterText(state) {
90
90
  }
91
91
  return "\u2191\u2193 move Enter add/remove S start run / search Esc cancel q quit";
92
92
  }
93
- function ListItem({
94
- selected,
95
- label,
96
- itemKey
97
- }) {
93
+ function ListItem({ selected, label, itemKey }) {
98
94
  return /* @__PURE__ */ jsxs(Text, { color: selected ? "cyan" : "gray", bold: selected, children: [
99
95
  selected ? "\u25B8 " : " ",
100
96
  label
@@ -121,9 +117,7 @@ function Pane({
121
117
  }
122
118
  );
123
119
  }
124
- function SectionHeader({
125
- children
126
- }) {
120
+ function SectionHeader({ children }) {
127
121
  return /* @__PURE__ */ jsx(Text, { color: "cyan", bold: true, children });
128
122
  }
129
123
  function StatusText({ status }) {
@@ -135,10 +129,7 @@ function StatusText({ status }) {
135
129
  ] });
136
130
  }
137
131
  var LEFT_PANE_WIDTH = 44;
138
- function RunsSidebar({
139
- state,
140
- runs
141
- }) {
132
+ function RunsSidebar({ state, runs }) {
142
133
  const focused = state.focus === "left";
143
134
  return /* @__PURE__ */ jsxs(Pane, { width: LEFT_PANE_WIDTH, focused, children: [
144
135
  /* @__PURE__ */ jsx(SectionHeader, { children: "Runs" }),
@@ -167,11 +158,7 @@ function RunsSidebar({
167
158
  ] });
168
159
  }
169
160
  var BLOCKS = ["\u2581", "\u2582", "\u2583", "\u2584", "\u2585", "\u2586", "\u2587", "\u2588"];
170
- function Sparkline({
171
- data,
172
- width,
173
- label
174
- }) {
161
+ function Sparkline({ data, width, label }) {
175
162
  if (data.length === 0)
176
163
  return null;
177
164
  const max = Math.max(...data);
@@ -401,9 +388,7 @@ var data_mock_default = {
401
388
  { name: "contract_match", score: 100 },
402
389
  { name: "arg_validity", score: 100 }
403
390
  ],
404
- checks: [
405
- { name: "tool_calls", passed: true, detail: "0 unexpected" }
406
- ],
391
+ checks: [{ name: "tool_calls", passed: true, detail: "0 unexpected" }],
407
392
  failures: [],
408
393
  meta: {
409
394
  model: "gpt-4o-mini",
@@ -426,9 +411,21 @@ var data_mock_default = {
426
411
  }
427
412
  ],
428
413
  evaluators: [
429
- { id: "json-schema-validator", name: "JSON Schema Validator", configPreview: "strict=true" },
430
- { id: "tool-call-contract-checker", name: "Tool-call Contract Checker", configPreview: "unexpectedCalls=error" },
431
- { id: "rubric-judge", name: "Rubric Judge (LLM)", configPreview: "model=gpt-4o-mini; scale=0-100" },
414
+ {
415
+ id: "json-schema-validator",
416
+ name: "JSON Schema Validator",
417
+ configPreview: "strict=true"
418
+ },
419
+ {
420
+ id: "tool-call-contract-checker",
421
+ name: "Tool-call Contract Checker",
422
+ configPreview: "unexpectedCalls=error"
423
+ },
424
+ {
425
+ id: "rubric-judge",
426
+ name: "Rubric Judge (LLM)",
427
+ configPreview: "model=gpt-4o-mini; scale=0-100"
428
+ },
432
429
  { id: "pii-leak-detector", name: "PII Leak Detector", configPreview: "redact=false" }
433
430
  ]
434
431
  };
@@ -508,9 +505,7 @@ async function loadRunnerData(runner) {
508
505
  const memSnapshots = runner.getAllRunSnapshots();
509
506
  const seen = new Set(memSnapshots.map((s) => s.runId));
510
507
  const fromDisk = diskSnapshots.filter((s) => !seen.has(s.runId));
511
- const snapshots = [...memSnapshots, ...fromDisk].sort(
512
- (a, b) => b.queuedAt - a.queuedAt
513
- );
508
+ const snapshots = [...memSnapshots, ...fromDisk].sort((a, b) => b.queuedAt - a.queuedAt);
514
509
  if (datasets.length === 0 && evaluators.length === 0) {
515
510
  return loadMockData();
516
511
  }
@@ -632,7 +627,11 @@ function reduceCliState(state, action) {
632
627
  return { ...state, overviewScrollOffset: Math.max(0, state.overviewScrollOffset - 1) };
633
628
  }
634
629
  if (state.level === "datasets") {
635
- return { ...state, datasetMenuIndex: Math.max(0, state.datasetMenuIndex - 1), overviewScrollOffset: 0 };
630
+ return {
631
+ ...state,
632
+ datasetMenuIndex: Math.max(0, state.datasetMenuIndex - 1),
633
+ overviewScrollOffset: 0
634
+ };
636
635
  }
637
636
  if (state.level === "runs") {
638
637
  return { ...state, runMenuIndex: Math.max(0, state.runMenuIndex - 1) };
@@ -650,10 +649,17 @@ function reduceCliState(state, action) {
650
649
  return { ...state, detailsScrollOffset: Math.min(action.max, state.detailsScrollOffset + 1) };
651
650
  }
652
651
  if (state.level === "datasets" && state.focus === "right") {
653
- return { ...state, overviewScrollOffset: Math.min(action.max, state.overviewScrollOffset + 1) };
652
+ return {
653
+ ...state,
654
+ overviewScrollOffset: Math.min(action.max, state.overviewScrollOffset + 1)
655
+ };
654
656
  }
655
657
  if (state.level === "datasets") {
656
- return { ...state, datasetMenuIndex: Math.min(action.max, state.datasetMenuIndex + 1), overviewScrollOffset: 0 };
658
+ return {
659
+ ...state,
660
+ datasetMenuIndex: Math.min(action.max, state.datasetMenuIndex + 1),
661
+ overviewScrollOffset: 0
662
+ };
657
663
  }
658
664
  if (state.level === "runs") {
659
665
  return { ...state, runMenuIndex: Math.min(action.max, state.runMenuIndex + 1) };
@@ -735,18 +741,8 @@ var defaultRunnerConfig = {
735
741
  discovery: {
736
742
  rootDir: process.cwd(),
737
743
  datasetSuffixes: [".dataset.ts", ".dataset.tsx", ".dataset.js", ".dataset.mjs"],
738
- evaluatorSuffixes: [
739
- ".evaluator.ts",
740
- ".evaluator.tsx",
741
- ".evaluator.js",
742
- ".evaluator.mjs"
743
- ],
744
- testCaseSuffixes: [
745
- ".test-case.ts",
746
- ".test-case.tsx",
747
- ".test-case.js",
748
- ".test-case.mjs"
749
- ],
744
+ evaluatorSuffixes: [".evaluator.ts", ".evaluator.tsx", ".evaluator.js", ".evaluator.mjs"],
745
+ testCaseSuffixes: [".test-case.ts", ".test-case.tsx", ".test-case.js", ".test-case.mjs"],
750
746
  excludeDirectories: ["node_modules", "dist", ".next", ".git", ".pnpm-store"]
751
747
  },
752
748
  artifactDirectory: ".eval-results",
@@ -813,14 +809,15 @@ function getJitiLoader() {
813
809
  }
814
810
  const createJiti2 = jitiModule.createJiti ?? jitiModule.default;
815
811
  if (typeof createJiti2 !== "function") {
816
- throw new Error(
817
- "Failed to initialize jiti for m4trix eval config loading."
818
- );
812
+ throw new Error("Failed to initialize jiti for m4trix eval config loading.");
819
813
  }
820
- cachedLoader = createJiti2(import.meta.url, {
821
- interopDefault: true,
822
- moduleCache: true
823
- });
814
+ cachedLoader = createJiti2(
815
+ import.meta.url,
816
+ {
817
+ interopDefault: true,
818
+ moduleCache: true
819
+ }
820
+ );
824
821
  return cachedLoader;
825
822
  }
826
823
  function resolveConfigModuleExport(loadedModule) {
@@ -924,9 +921,7 @@ async function loadModuleExports(filePath) {
924
921
  }
925
922
  async function collectDatasetsFromFiles(config) {
926
923
  const files = await walkDirectory(config.rootDir, config.excludeDirectories);
927
- const matched = files.filter(
928
- (filePath) => hasOneSuffix(filePath, config.datasetSuffixes)
929
- );
924
+ const matched = files.filter((filePath) => hasOneSuffix(filePath, config.datasetSuffixes));
930
925
  const found = await Promise.all(
931
926
  matched.map(async (absolutePath) => {
932
927
  const exports = await loadModuleExports(absolutePath);
@@ -943,9 +938,7 @@ async function collectDatasetsFromFiles(config) {
943
938
  }
944
939
  async function collectEvaluatorsFromFiles(config) {
945
940
  const files = await walkDirectory(config.rootDir, config.excludeDirectories);
946
- const matched = files.filter(
947
- (filePath) => hasOneSuffix(filePath, config.evaluatorSuffixes)
948
- );
941
+ const matched = files.filter((filePath) => hasOneSuffix(filePath, config.evaluatorSuffixes));
949
942
  const found = await Promise.all(
950
943
  matched.map(async (absolutePath) => {
951
944
  const exports = await loadModuleExports(absolutePath);
@@ -962,9 +955,7 @@ async function collectEvaluatorsFromFiles(config) {
962
955
  }
963
956
  async function collectTestCasesFromFiles(config) {
964
957
  const files = await walkDirectory(config.rootDir, config.excludeDirectories);
965
- const matched = files.filter(
966
- (filePath) => hasOneSuffix(filePath, config.testCaseSuffixes)
967
- );
958
+ const matched = files.filter((filePath) => hasOneSuffix(filePath, config.testCaseSuffixes));
968
959
  const found = await Promise.all(
969
960
  matched.map(async (absolutePath) => {
970
961
  const exports = await loadModuleExports(absolutePath);
@@ -1036,16 +1027,8 @@ function createDiffString(expected, actual, diffOptions) {
1036
1027
  const expectedProcessed = preprocessForDiff(expected, diffOptions);
1037
1028
  const actualProcessed = preprocessForDiff(actual, diffOptions);
1038
1029
  if (diffOptions?.keysOnly) {
1039
- const expectedKeys = JSON.stringify(
1040
- extractKeys(expectedProcessed),
1041
- null,
1042
- 2
1043
- );
1044
- const actualKeys = JSON.stringify(
1045
- extractKeys(actualProcessed),
1046
- null,
1047
- 2
1048
- );
1030
+ const expectedKeys = JSON.stringify(extractKeys(expectedProcessed), null, 2);
1031
+ const actualKeys = JSON.stringify(extractKeys(actualProcessed), null, 2);
1049
1032
  const parts2 = diffLines(expectedKeys, actualKeys);
1050
1033
  return formatDiffParts(parts2);
1051
1034
  }
@@ -1056,9 +1039,7 @@ function createDiffString(expected, actual, diffOptions) {
1056
1039
  }
1057
1040
  const parts = diffLines(expectedStr, actualStr);
1058
1041
  if (diffOptions?.outputNewOnly) {
1059
- const filtered = parts.filter(
1060
- (p) => p.added === true
1061
- );
1042
+ const filtered = parts.filter((p) => p.added === true);
1062
1043
  return formatDiffParts(filtered);
1063
1044
  }
1064
1045
  return formatDiffParts(parts);
@@ -1160,10 +1141,7 @@ var ScoreAggregate = {
1160
1141
  const count = values.length || 1;
1161
1142
  const result = {};
1162
1143
  for (const field of fields) {
1163
- result[field] = values.reduce(
1164
- (s, v) => s + (v[field] ?? 0),
1165
- 0
1166
- ) / count;
1144
+ result[field] = values.reduce((s, v) => s + (v[field] ?? 0), 0) / count;
1167
1145
  }
1168
1146
  return result;
1169
1147
  };
@@ -1197,13 +1175,10 @@ var ScoreAggregate = {
1197
1175
  (s, v) => s + (v[valueField] ?? 0),
1198
1176
  0
1199
1177
  );
1200
- const sumSq = values.reduce(
1201
- (s, v) => {
1202
- const value = v[valueField] ?? 0;
1203
- return s + value * value;
1204
- },
1205
- 0
1206
- );
1178
+ const sumSq = values.reduce((s, v) => {
1179
+ const value = v[valueField] ?? 0;
1180
+ return s + value * value;
1181
+ }, 0);
1207
1182
  const mean = sum / count;
1208
1183
  const variance = (sumSq - count * mean * mean) / (count - 1);
1209
1184
  stdDev = variance > 0 ? Math.sqrt(variance) : 0;
@@ -1434,20 +1409,14 @@ function nowIsoForFile() {
1434
1409
  return (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
1435
1410
  }
1436
1411
  function createArtifactPath(artifactDirectory, datasetId, runId) {
1437
- return join(
1438
- artifactDirectory,
1439
- `${datasetId}_${runId}_${nowIsoForFile()}.jsonl`
1440
- );
1412
+ return join(artifactDirectory, `${datasetId}_${runId}_${nowIsoForFile()}.jsonl`);
1441
1413
  }
1442
1414
  function processOneEvaluation(task, unit, totalEvaluations, publishEvent, persistenceQueue, updateSnapshot, startedRef, completedRef, passedRef, failedRef, testCaseResultsRef) {
1443
1415
  const { testCaseItem, rerunIndex, rerunTotal } = unit;
1444
1416
  return Effect.gen(function* () {
1445
1417
  const evaluatorRunId = `run-${randomUUID()}`;
1446
1418
  const started = Date.now();
1447
- const startedEvaluations = yield* Ref.modify(startedRef, (n) => [
1448
- n + 1,
1449
- n + 1
1450
- ]);
1419
+ const startedEvaluations = yield* Ref.modify(startedRef, (n) => [n + 1, n + 1]);
1451
1420
  yield* publishEvent({
1452
1421
  type: "TestCaseStarted",
1453
1422
  runId: task.runId,
@@ -1480,9 +1449,7 @@ function processOneEvaluation(task, unit, totalEvaluations, publishEvent, persis
1480
1449
  return error;
1481
1450
  };
1482
1451
  try {
1483
- const ctx = yield* Effect.promise(
1484
- () => Promise.resolve(evaluator.resolveContext())
1485
- );
1452
+ const ctx = yield* Effect.promise(() => Promise.resolve(evaluator.resolveContext()));
1486
1453
  const result = yield* Effect.promise(
1487
1454
  () => Promise.resolve().then(
1488
1455
  () => evaluateFn({
@@ -1537,10 +1504,7 @@ function processOneEvaluation(task, unit, totalEvaluations, publishEvent, persis
1537
1504
  }
1538
1505
  }
1539
1506
  const rerunPassedThis = evaluatorScores.every((s) => s.passed);
1540
- const completedEvaluations = yield* Ref.modify(completedRef, (n) => [
1541
- n + 1,
1542
- n + 1
1543
- ]);
1507
+ const completedEvaluations = yield* Ref.modify(completedRef, (n) => [n + 1, n + 1]);
1544
1508
  const progressEvent = {
1545
1509
  type: "TestCaseProgress",
1546
1510
  runId: task.runId,
@@ -1589,10 +1553,7 @@ function processOneEvaluation(task, unit, totalEvaluations, publishEvent, persis
1589
1553
  } else {
1590
1554
  yield* Ref.update(failedRef, (n) => n + 1);
1591
1555
  }
1592
- const [passed, failed] = yield* Effect.all([
1593
- Ref.get(passedRef),
1594
- Ref.get(failedRef)
1595
- ]);
1556
+ const [passed, failed] = yield* Effect.all([Ref.get(passedRef), Ref.get(failedRef)]);
1596
1557
  yield* updateSnapshot(task.runId, (snapshot) => ({
1597
1558
  ...snapshot,
1598
1559
  passedTestCases: passed,
@@ -1942,15 +1903,11 @@ var EffectRunner = class {
1942
1903
  this.persistenceQueue = Effect.runSync(
1943
1904
  Queue.unbounded()
1944
1905
  );
1945
- this.snapshotsRef = Effect.runSync(
1946
- Ref.make(/* @__PURE__ */ new Map())
1947
- );
1906
+ this.snapshotsRef = Effect.runSync(Ref.make(/* @__PURE__ */ new Map()));
1948
1907
  this.listeners = /* @__PURE__ */ new Set();
1949
1908
  this.datasetsById = /* @__PURE__ */ new Map();
1950
1909
  this.evaluatorsById = /* @__PURE__ */ new Map();
1951
- this.schedulerFiber = Effect.runFork(
1952
- this.createSchedulerEffect()
1953
- );
1910
+ this.schedulerFiber = Effect.runFork(this.createSchedulerEffect());
1954
1911
  this.persistenceFiber = Effect.runFork(
1955
1912
  createPersistenceWorker(this.persistenceQueue)
1956
1913
  );
@@ -2097,9 +2054,9 @@ var EffectRunner = class {
2097
2054
  return Effect.runSync(Ref.get(this.snapshotsRef)).get(runId);
2098
2055
  }
2099
2056
  getAllRunSnapshots() {
2100
- return Array.from(
2101
- Effect.runSync(Ref.get(this.snapshotsRef)).values()
2102
- ).sort((a, b) => b.queuedAt - a.queuedAt);
2057
+ return Array.from(Effect.runSync(Ref.get(this.snapshotsRef)).values()).sort(
2058
+ (a, b) => b.queuedAt - a.queuedAt
2059
+ );
2103
2060
  }
2104
2061
  async loadRunSnapshotsFromArtifacts() {
2105
2062
  return loadRunSnapshotsFromArtifacts(this.config);
@@ -2315,11 +2272,7 @@ function DatasetsView({
2315
2272
  ] })
2316
2273
  ] });
2317
2274
  }
2318
- function RunsView({
2319
- state,
2320
- dataset,
2321
- selectedRun
2322
- }) {
2275
+ function RunsView({ state, dataset, selectedRun }) {
2323
2276
  const runs = dataset?.runs ?? [];
2324
2277
  const rightFocused = state.focus === "right";
2325
2278
  return /* @__PURE__ */ jsxs(Fragment, { children: [
@@ -2335,10 +2288,10 @@ function RunsView({
2335
2288
  /* @__PURE__ */ jsxs(Text, { color: "gray", children: [
2336
2289
  "Commit: ",
2337
2290
  selectedRun.meta.commit,
2338
- " Branch: ",
2291
+ " Branch: ",
2339
2292
  selectedRun.meta.branch,
2293
+ " Seed:",
2340
2294
  " ",
2341
- "Seed: ",
2342
2295
  selectedRun.meta.seed
2343
2296
  ] }),
2344
2297
  /* @__PURE__ */ jsx(Text, { children: " " }),
@@ -2351,23 +2304,10 @@ function RunsView({
2351
2304
  format: (v) => `${v}%`
2352
2305
  }
2353
2306
  ),
2354
- /* @__PURE__ */ jsx(
2355
- TextBar,
2356
- {
2357
- label: "avg score",
2358
- value: Math.round(selectedRun.performance.avgScore * 100)
2359
- }
2360
- ),
2307
+ /* @__PURE__ */ jsx(TextBar, { label: "avg score", value: Math.round(selectedRun.performance.avgScore * 100) }),
2361
2308
  /* @__PURE__ */ jsx(Text, { children: " " }),
2362
2309
  /* @__PURE__ */ jsx(SectionHeader, { children: "Dimensions" }),
2363
- selectedRun.dimensions.map((dimension) => /* @__PURE__ */ jsx(
2364
- TextBar,
2365
- {
2366
- label: dimension.name,
2367
- value: dimension.score
2368
- },
2369
- dimension.name
2370
- )),
2310
+ selectedRun.dimensions.map((dimension) => /* @__PURE__ */ jsx(TextBar, { label: dimension.name, value: dimension.score }, dimension.name)),
2371
2311
  /* @__PURE__ */ jsx(Text, { children: " " }),
2372
2312
  /* @__PURE__ */ jsx(SectionHeader, { children: "Latency trend" }),
2373
2313
  /* @__PURE__ */ jsx(
@@ -2470,15 +2410,7 @@ function buildDetailRows(run, testCases, evaluatorNameById) {
2470
2410
  ...dimensions.map((d) => /* @__PURE__ */ jsx(TextBar, { label: d.name, value: d.score }, `dim-${d.name}`)),
2471
2411
  /* @__PURE__ */ jsx(Text, { children: " " }, "sp2"),
2472
2412
  /* @__PURE__ */ jsx(SectionHeader, { children: "Checks (boolean)" }, "checks-h"),
2473
- ...checks.map((c) => /* @__PURE__ */ jsx(
2474
- CheckRow,
2475
- {
2476
- name: c.name,
2477
- passed: c.passed,
2478
- detail: c.detail
2479
- },
2480
- `chk-${c.name}`
2481
- )),
2413
+ ...checks.map((c) => /* @__PURE__ */ jsx(CheckRow, { name: c.name, passed: c.passed, detail: c.detail }, `chk-${c.name}`)),
2482
2414
  /* @__PURE__ */ jsx(Text, { children: " " }, "sp3"),
2483
2415
  /* @__PURE__ */ jsx(SectionHeader, { children: "Performance" }, "perf-h"),
2484
2416
  /* @__PURE__ */ jsx(
@@ -2595,17 +2527,10 @@ function buildDetailRows(run, testCases, evaluatorNameById) {
2595
2527
  }
2596
2528
  } else {
2597
2529
  rows.push(
2598
- /* @__PURE__ */ jsxs(
2599
- Text,
2600
- {
2601
- color: "gray",
2602
- children: [
2603
- " ",
2604
- "n/a"
2605
- ]
2606
- },
2607
- `tc-${tc.testCaseId}-${item.evaluatorId}-n/a`
2608
- )
2530
+ /* @__PURE__ */ jsxs(Text, { color: "gray", children: [
2531
+ " ",
2532
+ "n/a"
2533
+ ] }, `tc-${tc.testCaseId}-${item.evaluatorId}-n/a`)
2609
2534
  );
2610
2535
  }
2611
2536
  if (!item.passed && item.logs && item.logs.length > 0) {
@@ -2663,7 +2588,7 @@ function RunDetailsView({
2663
2588
  const runs = dataset?.runs ?? [];
2664
2589
  const rightFocused = state.focus === "right";
2665
2590
  const [testCases, setTestCases] = useState([]);
2666
- const evaluatorNameById = React2.useMemo(
2591
+ const evaluatorNameById = React.useMemo(
2667
2592
  () => new Map(evaluators.map((e) => [e.id, e.name])),
2668
2593
  [evaluators]
2669
2594
  );
@@ -2686,7 +2611,7 @@ function RunDetailsView({
2686
2611
  const visible = rows.slice(offset, offset + DETAILS_PAGE_SIZE);
2687
2612
  return /* @__PURE__ */ jsxs(Fragment, { children: [
2688
2613
  /* @__PURE__ */ jsx(RunsSidebar, { state, dataset, runs }),
2689
- /* @__PURE__ */ jsx(Pane, { flexGrow: 1, marginLeft: 1, focused: rightFocused, children: /* @__PURE__ */ jsx(Box, { flexDirection: "column", children: visible.map((row, i) => /* @__PURE__ */ jsx(React2.Fragment, { children: row }, i)) }) })
2614
+ /* @__PURE__ */ jsx(Pane, { flexGrow: 1, marginLeft: 1, focused: rightFocused, children: /* @__PURE__ */ jsx(Box, { flexDirection: "column", children: visible.map((row, i) => /* @__PURE__ */ jsx(React.Fragment, { children: row }, i)) }) })
2690
2615
  ] });
2691
2616
  }
2692
2617
  var LEFT_PANE_WIDTH3 = 44;
@@ -2709,19 +2634,11 @@ function NewEvaluationView({
2709
2634
  visibleEvaluators.map((evaluator, index) => {
2710
2635
  const selected = index === state.evaluatorMenuIndex;
2711
2636
  const inSelection = state.selectedEvaluatorIds.includes(evaluator.id);
2712
- return /* @__PURE__ */ jsxs(
2713
- Text,
2714
- {
2715
- color: selected ? "cyan" : "gray",
2716
- bold: selected,
2717
- children: [
2718
- selected ? "\u25B8 " : " ",
2719
- inSelection ? "[x] " : "[ ] ",
2720
- evaluator.name
2721
- ]
2722
- },
2723
- evaluator.id
2724
- );
2637
+ return /* @__PURE__ */ jsxs(Text, { color: selected ? "cyan" : "gray", bold: selected, children: [
2638
+ selected ? "\u25B8 " : " ",
2639
+ inSelection ? "[x] " : "[ ] ",
2640
+ evaluator.name
2641
+ ] }, evaluator.id);
2725
2642
  })
2726
2643
  ] }),
2727
2644
  /* @__PURE__ */ jsxs(Pane, { flexGrow: 1, marginLeft: 1, focused: rightFocused, children: [
@@ -2753,26 +2670,16 @@ function clampCursor(state, filteredDatasetsLength, selectedRunCount) {
2753
2670
  ...state,
2754
2671
  datasetMenuIndex: Math.max(0, Math.min(state.datasetMenuIndex, datasetMax)),
2755
2672
  runMenuIndex: Math.max(0, Math.min(state.runMenuIndex, runMax)),
2756
- evaluatorMenuIndex: Math.max(
2757
- 0,
2758
- Math.min(state.evaluatorMenuIndex, evaluatorMax)
2759
- )
2673
+ evaluatorMenuIndex: Math.max(0, Math.min(state.evaluatorMenuIndex, evaluatorMax))
2760
2674
  };
2761
2675
  }
2762
- function EvalsCliApp({
2763
- data,
2764
- args,
2765
- runner
2766
- }) {
2676
+ function EvalsCliApp({ data, args, runner }) {
2767
2677
  const { exit } = useApp();
2768
2678
  const { width: stdoutWidth, height: stdoutHeight } = useScreenSize();
2769
2679
  const [liveData, setLiveData] = useState(data);
2770
2680
  const [runtimeMessage, setRuntimeMessage] = useState();
2771
2681
  const overviewRowCountRef = useRef(0);
2772
- const [state, dispatch] = useReducer(
2773
- reduceCliState,
2774
- createInitialState(data, args)
2775
- );
2682
+ const [state, dispatch] = useReducer(reduceCliState, createInitialState(data, args));
2776
2683
  useEffect(() => {
2777
2684
  setLiveData(data);
2778
2685
  }, [data]);
@@ -2804,14 +2711,8 @@ function EvalsCliApp({
2804
2711
  filteredDatasets.length,
2805
2712
  getDatasetByMenuIndex(filteredDatasets, state.datasetMenuIndex)?.runs.length ?? 0
2806
2713
  );
2807
- const selectedDataset = getDatasetByMenuIndex(
2808
- filteredDatasets,
2809
- clampedState.datasetMenuIndex
2810
- );
2811
- const selectedRun = getRunByMenuIndex(
2812
- selectedDataset,
2813
- clampedState.runMenuIndex
2814
- );
2714
+ const selectedDataset = getDatasetByMenuIndex(filteredDatasets, clampedState.datasetMenuIndex);
2715
+ const selectedRun = getRunByMenuIndex(selectedDataset, clampedState.runMenuIndex);
2815
2716
  const visibleEvaluators = liveData.evaluators.filter(
2816
2717
  (evaluator) => evaluator.name.toLowerCase().includes(clampedState.searchQuery.toLowerCase())
2817
2718
  );
@@ -2905,9 +2806,7 @@ function EvalsCliApp({
2905
2806
  `Started ${snapshot.runId} on ${selectedDataset.name} (${snapshot.totalTestCases} cases).`
2906
2807
  );
2907
2808
  }).catch((error) => {
2908
- setRuntimeMessage(
2909
- error instanceof Error ? error.message : "Failed to start evaluation."
2910
- );
2809
+ setRuntimeMessage(error instanceof Error ? error.message : "Failed to start evaluation.");
2911
2810
  });
2912
2811
  }
2913
2812
  });
@@ -2934,14 +2833,7 @@ function EvalsCliApp({
2934
2833
  );
2935
2834
  }
2936
2835
  if (clampedState.level === "runs") {
2937
- return /* @__PURE__ */ jsx(
2938
- RunsView,
2939
- {
2940
- state: clampedState,
2941
- dataset: selectedDataset,
2942
- selectedRun
2943
- }
2944
- );
2836
+ return /* @__PURE__ */ jsx(RunsView, { state: clampedState, dataset: selectedDataset, selectedRun });
2945
2837
  }
2946
2838
  return /* @__PURE__ */ jsx(
2947
2839
  RunDetailsView,
@@ -2953,82 +2845,44 @@ function EvalsCliApp({
2953
2845
  }
2954
2846
  );
2955
2847
  };
2956
- return /* @__PURE__ */ jsxs(
2957
- Box,
2958
- {
2959
- flexDirection: "column",
2960
- flexGrow: 1,
2961
- width: stdoutWidth,
2962
- height: stdoutHeight,
2963
- children: [
2964
- /* @__PURE__ */ jsx(
2965
- Box,
2966
- {
2967
- borderStyle: "round",
2968
- borderColor: "cyan",
2969
- paddingX: 1,
2970
- width: stdoutWidth,
2971
- children: /* @__PURE__ */ jsx(Text, { children: getBreadcrumbText(
2972
- clampedState,
2973
- selectedDataset?.name,
2974
- selectedRun?.label
2975
- ) })
2976
- }
2977
- ),
2978
- clampedState.startupWarnings.length > 0 && /* @__PURE__ */ jsxs(
2979
- Box,
2980
- {
2981
- marginTop: 1,
2982
- borderStyle: "round",
2983
- borderColor: "yellow",
2984
- paddingX: 1,
2985
- flexDirection: "column",
2986
- width: stdoutWidth,
2987
- children: [
2988
- /* @__PURE__ */ jsx(Text, { color: "yellow", children: "Startup warnings:" }),
2989
- clampedState.startupWarnings.map((warning, index) => /* @__PURE__ */ jsx(Text, { children: warning }, `${warning}-${index}`))
2990
- ]
2991
- }
2992
- ),
2993
- clampedState.searchMode && /* @__PURE__ */ jsxs(
2994
- Box,
2995
- {
2996
- marginTop: 1,
2997
- borderStyle: "round",
2998
- borderColor: "magenta",
2999
- paddingX: 1,
3000
- width: stdoutWidth,
3001
- children: [
3002
- /* @__PURE__ */ jsx(Text, { color: "magenta", bold: true, children: "Search: " }),
3003
- /* @__PURE__ */ jsx(Text, { color: "white", children: clampedState.searchQuery })
3004
- ]
3005
- }
3006
- ),
3007
- runtimeMessage && /* @__PURE__ */ jsx(
3008
- Box,
3009
- {
3010
- marginTop: 1,
3011
- borderStyle: "round",
3012
- borderColor: "blue",
3013
- paddingX: 1,
3014
- width: stdoutWidth,
3015
- children: /* @__PURE__ */ jsx(Text, { color: "blue", children: runtimeMessage })
3016
- }
3017
- ),
3018
- /* @__PURE__ */ jsx(
3019
- Box,
3020
- {
3021
- marginTop: 1,
3022
- flexGrow: 1,
3023
- width: stdoutWidth,
3024
- flexDirection: "row",
3025
- children: renderContent()
3026
- }
3027
- ),
3028
- /* @__PURE__ */ jsx(Box, { marginTop: 1, paddingX: 1, children: /* @__PURE__ */ jsx(Text, { color: "gray", children: getFooterText(clampedState) }) })
3029
- ]
3030
- }
3031
- );
2848
+ return /* @__PURE__ */ jsxs(Box, { flexDirection: "column", flexGrow: 1, width: stdoutWidth, height: stdoutHeight, children: [
2849
+ /* @__PURE__ */ jsx(Box, { borderStyle: "round", borderColor: "cyan", paddingX: 1, width: stdoutWidth, children: /* @__PURE__ */ jsx(Text, { children: getBreadcrumbText(clampedState, selectedDataset?.name, selectedRun?.label) }) }),
2850
+ clampedState.startupWarnings.length > 0 && /* @__PURE__ */ jsxs(
2851
+ Box,
2852
+ {
2853
+ marginTop: 1,
2854
+ borderStyle: "round",
2855
+ borderColor: "yellow",
2856
+ paddingX: 1,
2857
+ flexDirection: "column",
2858
+ width: stdoutWidth,
2859
+ children: [
2860
+ /* @__PURE__ */ jsx(Text, { color: "yellow", children: "Startup warnings:" }),
2861
+ clampedState.startupWarnings.map((warning, index) => /* @__PURE__ */ jsx(Text, { children: warning }, `${warning}-${index}`))
2862
+ ]
2863
+ }
2864
+ ),
2865
+ clampedState.searchMode && /* @__PURE__ */ jsxs(
2866
+ Box,
2867
+ {
2868
+ marginTop: 1,
2869
+ borderStyle: "round",
2870
+ borderColor: "magenta",
2871
+ paddingX: 1,
2872
+ width: stdoutWidth,
2873
+ children: [
2874
+ /* @__PURE__ */ jsxs(Text, { color: "magenta", bold: true, children: [
2875
+ "Search:",
2876
+ " "
2877
+ ] }),
2878
+ /* @__PURE__ */ jsx(Text, { color: "white", children: clampedState.searchQuery })
2879
+ ]
2880
+ }
2881
+ ),
2882
+ runtimeMessage && /* @__PURE__ */ jsx(Box, { marginTop: 1, borderStyle: "round", borderColor: "blue", paddingX: 1, width: stdoutWidth, children: /* @__PURE__ */ jsx(Text, { color: "blue", children: runtimeMessage }) }),
2883
+ /* @__PURE__ */ jsx(Box, { marginTop: 1, flexGrow: 1, width: stdoutWidth, flexDirection: "row", children: renderContent() }),
2884
+ /* @__PURE__ */ jsx(Box, { marginTop: 1, paddingX: 1, children: /* @__PURE__ */ jsx(Text, { color: "gray", children: getFooterText(clampedState) }) })
2885
+ ] });
3032
2886
  }
3033
2887
  async function main() {
3034
2888
  const args = parseStartupArgs(process.argv.slice(2));
@@ -3040,9 +2894,7 @@ async function main() {
3040
2894
  process.on("SIGTERM", () => {
3041
2895
  void runner.shutdown().finally(() => process.exit(0));
3042
2896
  });
3043
- withFullScreen(
3044
- /* @__PURE__ */ jsx(EvalsCliApp, { data, args, runner })
3045
- ).start();
2897
+ withFullScreen(/* @__PURE__ */ jsx(EvalsCliApp, { data, args, runner })).start();
3046
2898
  }
3047
2899
  void main();
3048
2900
  //# sourceMappingURL=out.js.map