@m4trix/evals 0.11.0 → 0.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli-simple.cjs +719 -227
- package/dist/cli-simple.cjs.map +1 -1
- package/dist/cli-simple.js +721 -229
- package/dist/cli-simple.js.map +1 -1
- package/dist/cli.cjs +1320 -928
- package/dist/cli.cjs.map +1 -1
- package/dist/cli.js +1322 -930
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +335 -99
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.ts +24 -5
- package/dist/index.js +337 -101
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/cli.js
CHANGED
|
@@ -3,14 +3,14 @@ import { withFullScreen, useScreenSize } from 'fullscreen-ink';
|
|
|
3
3
|
import React, { useState, useReducer, useEffect, useMemo } from 'react';
|
|
4
4
|
import { useApp, useInput, Box, Text } from 'ink';
|
|
5
5
|
import { jsx, jsxs, Fragment } from 'react/jsx-runtime';
|
|
6
|
+
import { resolve, relative, join, dirname } from 'path';
|
|
7
|
+
import { diffString } from 'json-diff';
|
|
6
8
|
import { randomUUID } from 'crypto';
|
|
7
|
-
import { Effect, PubSub, Queue, Fiber } from 'effect';
|
|
9
|
+
import { Effect, PubSub, Queue, Fiber, Ref } from 'effect';
|
|
8
10
|
import { existsSync } from 'fs';
|
|
9
|
-
import { resolve, relative, join, dirname } from 'path';
|
|
10
11
|
import * as jitiModule from 'jiti';
|
|
11
|
-
import { mkdir, appendFile
|
|
12
|
+
import { readdir, readFile, mkdir, appendFile } from 'fs/promises';
|
|
12
13
|
import { pathToFileURL } from 'url';
|
|
13
|
-
import { diffString } from 'json-diff';
|
|
14
14
|
|
|
15
15
|
var SEP = " ";
|
|
16
16
|
var ARROW = "\u203A";
|
|
@@ -498,11 +498,17 @@ function toEvaluatorOption(item) {
|
|
|
498
498
|
};
|
|
499
499
|
}
|
|
500
500
|
async function loadRunnerData(runner) {
|
|
501
|
-
const [datasets, evaluators] = await Promise.all([
|
|
501
|
+
const [datasets, evaluators, diskSnapshots] = await Promise.all([
|
|
502
502
|
runner.collectDatasets(),
|
|
503
|
-
runner.collectEvaluators()
|
|
503
|
+
runner.collectEvaluators(),
|
|
504
|
+
runner.loadRunSnapshotsFromArtifacts()
|
|
504
505
|
]);
|
|
505
|
-
const
|
|
506
|
+
const memSnapshots = runner.getAllRunSnapshots();
|
|
507
|
+
const seen = new Set(memSnapshots.map((s) => s.runId));
|
|
508
|
+
const fromDisk = diskSnapshots.filter((s) => !seen.has(s.runId));
|
|
509
|
+
const snapshots = [...memSnapshots, ...fromDisk].sort(
|
|
510
|
+
(a, b) => b.queuedAt - a.queuedAt
|
|
511
|
+
);
|
|
506
512
|
if (datasets.length === 0 && evaluators.length === 0) {
|
|
507
513
|
return loadMockData();
|
|
508
514
|
}
|
|
@@ -820,458 +826,185 @@ function RunsView({
|
|
|
820
826
|
] }) })
|
|
821
827
|
] });
|
|
822
828
|
}
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
|
|
832
|
-
|
|
833
|
-
|
|
834
|
-
|
|
835
|
-
|
|
836
|
-
|
|
837
|
-
detail,
|
|
838
|
-
")"
|
|
839
|
-
] }) : null
|
|
840
|
-
] });
|
|
841
|
-
}
|
|
842
|
-
function buildDetailRows(run) {
|
|
843
|
-
const { performance, dimensions, checks, failures, meta } = run;
|
|
844
|
-
const latencyHistory = performance.latencyHistoryMs ?? [
|
|
845
|
-
performance.latencyAvgMs - 40,
|
|
846
|
-
performance.latencyAvgMs - 10,
|
|
847
|
-
performance.latencyAvgMs + 20,
|
|
848
|
-
performance.latencyP95Ms - 80,
|
|
849
|
-
performance.latencyP95Ms
|
|
850
|
-
];
|
|
851
|
-
const rows = [
|
|
852
|
-
/* @__PURE__ */ jsx(SectionHeader, { children: "Meta" }, "meta-h"),
|
|
853
|
-
/* @__PURE__ */ jsxs(Text, { color: "gray", children: [
|
|
854
|
-
"Model: ",
|
|
855
|
-
meta.model,
|
|
856
|
-
" Provider: ",
|
|
857
|
-
meta.provider
|
|
858
|
-
] }, "meta-1"),
|
|
859
|
-
/* @__PURE__ */ jsxs(Text, { color: "gray", children: [
|
|
860
|
-
"Commit: ",
|
|
861
|
-
meta.commit,
|
|
862
|
-
" Branch: ",
|
|
863
|
-
meta.branch,
|
|
864
|
-
" Seed: ",
|
|
865
|
-
meta.seed
|
|
866
|
-
] }, "meta-2"),
|
|
867
|
-
/* @__PURE__ */ jsxs(Text, { color: "gray", children: [
|
|
868
|
-
"Duration: ",
|
|
869
|
-
meta.duration,
|
|
870
|
-
" Concurrency: ",
|
|
871
|
-
meta.concurrency
|
|
872
|
-
] }, "meta-3"),
|
|
873
|
-
/* @__PURE__ */ jsxs(Text, { color: "gray", children: [
|
|
874
|
-
"Artifact: ",
|
|
875
|
-
meta.artifact
|
|
876
|
-
] }, "meta-4"),
|
|
877
|
-
/* @__PURE__ */ jsx(Text, { children: " " }, "sp1"),
|
|
878
|
-
/* @__PURE__ */ jsx(SectionHeader, { children: "Scores (0\u2013100)" }, "scores-h"),
|
|
879
|
-
...dimensions.map((d) => /* @__PURE__ */ jsx(TextBar, { label: d.name, value: d.score }, `dim-${d.name}`)),
|
|
880
|
-
/* @__PURE__ */ jsx(Text, { children: " " }, "sp2"),
|
|
881
|
-
/* @__PURE__ */ jsx(SectionHeader, { children: "Checks (boolean)" }, "checks-h"),
|
|
882
|
-
...checks.map((c) => /* @__PURE__ */ jsx(CheckRow, { name: c.name, passed: c.passed, detail: c.detail }, `chk-${c.name}`)),
|
|
883
|
-
/* @__PURE__ */ jsx(Text, { children: " " }, "sp3"),
|
|
884
|
-
/* @__PURE__ */ jsx(SectionHeader, { children: "Performance" }, "perf-h"),
|
|
885
|
-
/* @__PURE__ */ jsx(
|
|
886
|
-
TextBar,
|
|
887
|
-
{
|
|
888
|
-
label: "pass rate",
|
|
889
|
-
value: performance.passRate,
|
|
890
|
-
format: (v) => `${v}%`
|
|
891
|
-
},
|
|
892
|
-
"perf-rate"
|
|
893
|
-
),
|
|
894
|
-
/* @__PURE__ */ jsxs(Text, { color: "gray", children: [
|
|
895
|
-
"latency avg ",
|
|
896
|
-
performance.latencyAvgMs,
|
|
897
|
-
"ms p95 ",
|
|
898
|
-
performance.latencyP95Ms,
|
|
899
|
-
"ms"
|
|
900
|
-
] }, "perf-lat"),
|
|
901
|
-
/* @__PURE__ */ jsxs(Text, { color: "gray", children: [
|
|
902
|
-
"tokens avg ",
|
|
903
|
-
performance.tokensAvg,
|
|
904
|
-
" p95 ",
|
|
905
|
-
performance.tokensP95
|
|
906
|
-
] }, "perf-tok"),
|
|
907
|
-
/* @__PURE__ */ jsx(Text, { children: " " }, "sp4"),
|
|
908
|
-
/* @__PURE__ */ jsx(SectionHeader, { children: "Latency trend" }, "spark-h"),
|
|
909
|
-
/* @__PURE__ */ jsx(Sparkline, { data: latencyHistory, width: 20 }, "spark")
|
|
910
|
-
];
|
|
911
|
-
if (failures.length > 0) {
|
|
912
|
-
rows.push(/* @__PURE__ */ jsx(Text, { children: " " }, "sp5"));
|
|
913
|
-
rows.push(/* @__PURE__ */ jsx(SectionHeader, { children: "Failures (top)" }, "fail-h"));
|
|
914
|
-
failures.forEach((f, i) => {
|
|
915
|
-
rows.push(
|
|
916
|
-
/* @__PURE__ */ jsxs(Text, { color: "red", children: [
|
|
917
|
-
i + 1,
|
|
918
|
-
") ",
|
|
919
|
-
f.title
|
|
920
|
-
] }, `fail-${i}`)
|
|
921
|
-
);
|
|
922
|
-
});
|
|
829
|
+
|
|
830
|
+
// src/evals/metric.ts
|
|
831
|
+
var registry = /* @__PURE__ */ new Map();
|
|
832
|
+
var Metric = {
|
|
833
|
+
of(config) {
|
|
834
|
+
const def = {
|
|
835
|
+
id: config.id,
|
|
836
|
+
name: config.name,
|
|
837
|
+
aggregate: config.aggregate,
|
|
838
|
+
format: config.format,
|
|
839
|
+
make: (data) => ({ id: config.id, data })
|
|
840
|
+
};
|
|
841
|
+
registry.set(config.id, def);
|
|
842
|
+
return def;
|
|
923
843
|
}
|
|
924
|
-
|
|
844
|
+
};
|
|
845
|
+
function getMetricById(id) {
|
|
846
|
+
return registry.get(id);
|
|
925
847
|
}
|
|
926
|
-
|
|
927
|
-
|
|
928
|
-
|
|
929
|
-
|
|
930
|
-
|
|
931
|
-
|
|
932
|
-
|
|
933
|
-
|
|
934
|
-
|
|
935
|
-
|
|
936
|
-
|
|
937
|
-
|
|
848
|
+
|
|
849
|
+
// src/evals/score.ts
|
|
850
|
+
var registry2 = /* @__PURE__ */ new Map();
|
|
851
|
+
var Score = {
|
|
852
|
+
of(config) {
|
|
853
|
+
const def = {
|
|
854
|
+
id: config.id,
|
|
855
|
+
name: config.name,
|
|
856
|
+
displayStrategy: config.displayStrategy,
|
|
857
|
+
aggregate: config.aggregate,
|
|
858
|
+
format: config.format,
|
|
859
|
+
make: (data, options) => {
|
|
860
|
+
const passed = options?.definePassed !== void 0 ? options.definePassed(data) : void 0;
|
|
861
|
+
return {
|
|
862
|
+
id: config.id,
|
|
863
|
+
data,
|
|
864
|
+
...passed !== void 0 && { passed }
|
|
865
|
+
};
|
|
866
|
+
}
|
|
867
|
+
};
|
|
868
|
+
registry2.set(config.id, def);
|
|
869
|
+
return def;
|
|
938
870
|
}
|
|
939
|
-
|
|
940
|
-
|
|
941
|
-
|
|
942
|
-
return /* @__PURE__ */ jsxs(Fragment, { children: [
|
|
943
|
-
/* @__PURE__ */ jsx(RunsSidebar, { state, dataset, runs }),
|
|
944
|
-
/* @__PURE__ */ jsx(Pane, { flexGrow: 1, marginLeft: 1, focused: rightFocused, children: /* @__PURE__ */ jsx(Box, { flexDirection: "column", children: visible.map((row, i) => /* @__PURE__ */ jsx(React.Fragment, { children: row }, i)) }) })
|
|
945
|
-
] });
|
|
946
|
-
}
|
|
947
|
-
var LEFT_PANE_WIDTH3 = 44;
|
|
948
|
-
function NewEvaluationView({
|
|
949
|
-
state,
|
|
950
|
-
data,
|
|
951
|
-
visibleEvaluators
|
|
952
|
-
}) {
|
|
953
|
-
const selectedCount = state.selectedEvaluatorIds.length;
|
|
954
|
-
const focusedEvaluator = visibleEvaluators[state.evaluatorMenuIndex];
|
|
955
|
-
const leftFocused = state.focus === "left";
|
|
956
|
-
const rightFocused = state.focus === "right";
|
|
957
|
-
return /* @__PURE__ */ jsxs(Fragment, { children: [
|
|
958
|
-
/* @__PURE__ */ jsxs(Pane, { width: LEFT_PANE_WIDTH3, focused: leftFocused, children: [
|
|
959
|
-
/* @__PURE__ */ jsx(SectionHeader, { children: "Available Evaluators" }),
|
|
960
|
-
/* @__PURE__ */ jsxs(Text, { color: "gray", children: [
|
|
961
|
-
"Search: ",
|
|
962
|
-
state.searchQuery || "(none)"
|
|
963
|
-
] }),
|
|
964
|
-
visibleEvaluators.map((evaluator, index) => {
|
|
965
|
-
const selected = index === state.evaluatorMenuIndex;
|
|
966
|
-
const inSelection = state.selectedEvaluatorIds.includes(evaluator.id);
|
|
967
|
-
return /* @__PURE__ */ jsxs(
|
|
968
|
-
Text,
|
|
969
|
-
{
|
|
970
|
-
color: selected ? "cyan" : "gray",
|
|
971
|
-
bold: selected,
|
|
972
|
-
children: [
|
|
973
|
-
selected ? "\u25B8 " : " ",
|
|
974
|
-
inSelection ? "[x] " : "[ ] ",
|
|
975
|
-
evaluator.name
|
|
976
|
-
]
|
|
977
|
-
},
|
|
978
|
-
evaluator.id
|
|
979
|
-
);
|
|
980
|
-
})
|
|
981
|
-
] }),
|
|
982
|
-
/* @__PURE__ */ jsxs(Pane, { flexGrow: 1, marginLeft: 1, focused: rightFocused, children: [
|
|
983
|
-
/* @__PURE__ */ jsxs(SectionHeader, { children: [
|
|
984
|
-
"Selected (",
|
|
985
|
-
selectedCount,
|
|
986
|
-
")"
|
|
987
|
-
] }),
|
|
988
|
-
state.selectedEvaluatorIds.map((id, index) => {
|
|
989
|
-
const evaluator = data.evaluators.find((item) => item.id === id);
|
|
990
|
-
if (!evaluator)
|
|
991
|
-
return null;
|
|
992
|
-
return /* @__PURE__ */ jsxs(Text, { children: [
|
|
993
|
-
index + 1,
|
|
994
|
-
") ",
|
|
995
|
-
evaluator.name
|
|
996
|
-
] }, id);
|
|
997
|
-
}),
|
|
998
|
-
/* @__PURE__ */ jsx(SectionHeader, { children: "Config preview" }),
|
|
999
|
-
/* @__PURE__ */ jsx(Text, { color: "gray", children: focusedEvaluator?.configPreview ?? "Select an evaluator to inspect config." })
|
|
1000
|
-
] })
|
|
1001
|
-
] });
|
|
871
|
+
};
|
|
872
|
+
function getScoreById(id) {
|
|
873
|
+
return registry2.get(id);
|
|
1002
874
|
}
|
|
1003
|
-
|
|
1004
|
-
|
|
1005
|
-
|
|
1006
|
-
|
|
1007
|
-
|
|
1008
|
-
|
|
1009
|
-
|
|
1010
|
-
|
|
1011
|
-
|
|
1012
|
-
|
|
1013
|
-
|
|
1014
|
-
|
|
875
|
+
|
|
876
|
+
// src/evals/aggregators.ts
|
|
877
|
+
function aggregateAverage(values) {
|
|
878
|
+
if (values.length === 0) {
|
|
879
|
+
return { value: 0 };
|
|
880
|
+
}
|
|
881
|
+
const sum = values.reduce((s, v) => s + v.value, 0);
|
|
882
|
+
return { value: sum / values.length };
|
|
883
|
+
}
|
|
884
|
+
function aggregateAll(values) {
|
|
885
|
+
return { passed: values.length > 0 && values.every((v) => v.passed) };
|
|
886
|
+
}
|
|
887
|
+
function aggregateTokenCountSum(values) {
|
|
888
|
+
const initial = {
|
|
889
|
+
input: 0,
|
|
890
|
+
output: 0,
|
|
891
|
+
inputCached: 0,
|
|
892
|
+
outputCached: 0
|
|
1015
893
|
};
|
|
1016
|
-
|
|
1017
|
-
|
|
1018
|
-
|
|
1019
|
-
|
|
1020
|
-
|
|
1021
|
-
|
|
1022
|
-
|
|
1023
|
-
|
|
1024
|
-
const [liveData, setLiveData] = useState(data);
|
|
1025
|
-
const [runtimeMessage, setRuntimeMessage] = useState();
|
|
1026
|
-
const [state, dispatch] = useReducer(
|
|
1027
|
-
reduceCliState,
|
|
1028
|
-
createInitialState(data, args)
|
|
894
|
+
return values.reduce(
|
|
895
|
+
(acc, v) => ({
|
|
896
|
+
input: acc.input + (v.input ?? 0),
|
|
897
|
+
output: acc.output + (v.output ?? 0),
|
|
898
|
+
inputCached: acc.inputCached + (v.inputCached ?? 0),
|
|
899
|
+
outputCached: acc.outputCached + (v.outputCached ?? 0)
|
|
900
|
+
}),
|
|
901
|
+
initial
|
|
1029
902
|
);
|
|
1030
|
-
|
|
1031
|
-
|
|
1032
|
-
|
|
1033
|
-
|
|
1034
|
-
|
|
1035
|
-
|
|
1036
|
-
|
|
1037
|
-
|
|
1038
|
-
|
|
1039
|
-
|
|
1040
|
-
|
|
1041
|
-
|
|
1042
|
-
|
|
1043
|
-
|
|
1044
|
-
|
|
1045
|
-
|
|
1046
|
-
|
|
1047
|
-
|
|
1048
|
-
|
|
1049
|
-
|
|
1050
|
-
}
|
|
1051
|
-
|
|
1052
|
-
|
|
1053
|
-
|
|
1054
|
-
|
|
1055
|
-
|
|
1056
|
-
|
|
1057
|
-
|
|
1058
|
-
|
|
1059
|
-
|
|
1060
|
-
|
|
1061
|
-
|
|
1062
|
-
|
|
1063
|
-
|
|
1064
|
-
|
|
1065
|
-
|
|
1066
|
-
|
|
1067
|
-
|
|
1068
|
-
|
|
1069
|
-
|
|
1070
|
-
|
|
1071
|
-
|
|
1072
|
-
|
|
1073
|
-
|
|
1074
|
-
|
|
1075
|
-
|
|
1076
|
-
|
|
1077
|
-
|
|
1078
|
-
|
|
1079
|
-
|
|
1080
|
-
|
|
1081
|
-
|
|
1082
|
-
|
|
1083
|
-
|
|
1084
|
-
|
|
1085
|
-
|
|
1086
|
-
|
|
1087
|
-
|
|
1088
|
-
|
|
1089
|
-
|
|
1090
|
-
|
|
1091
|
-
|
|
1092
|
-
return;
|
|
1093
|
-
}
|
|
1094
|
-
if (isPrintableCharacter(input)) {
|
|
1095
|
-
dispatch({ type: "APPEND_SEARCH", value: input });
|
|
1096
|
-
}
|
|
1097
|
-
return;
|
|
1098
|
-
}
|
|
1099
|
-
if (key.upArrow) {
|
|
1100
|
-
const max = clampedState.level === "details" ? 100 : clampedState.level === "new-evaluation" ? visibleEvaluators.length - 1 : 100;
|
|
1101
|
-
dispatch({ type: "MOVE_UP", max });
|
|
1102
|
-
return;
|
|
1103
|
-
}
|
|
1104
|
-
if (key.downArrow) {
|
|
1105
|
-
const max = clampedState.level === "datasets" ? filteredDatasets.length : clampedState.level === "runs" ? selectedDataset?.runs.length ?? 0 : clampedState.level === "new-evaluation" ? Math.max(0, visibleEvaluators.length - 1) : 100;
|
|
1106
|
-
dispatch({ type: "MOVE_DOWN", max });
|
|
1107
|
-
return;
|
|
1108
|
-
}
|
|
1109
|
-
if (key.return) {
|
|
1110
|
-
dispatch({
|
|
1111
|
-
type: "ENTER",
|
|
1112
|
-
hasDataset: Boolean(selectedDataset),
|
|
1113
|
-
hasRun: Boolean(selectedRun)
|
|
1114
|
-
});
|
|
1115
|
-
if (clampedState.level === "new-evaluation") {
|
|
1116
|
-
const evaluator = visibleEvaluators[clampedState.evaluatorMenuIndex];
|
|
1117
|
-
if (evaluator) {
|
|
1118
|
-
dispatch({ type: "TOGGLE_EVALUATOR", evaluatorId: evaluator.id });
|
|
1119
|
-
}
|
|
1120
|
-
}
|
|
1121
|
-
return;
|
|
1122
|
-
}
|
|
1123
|
-
if (isBackKey(key)) {
|
|
1124
|
-
dispatch({ type: "BACK" });
|
|
1125
|
-
return;
|
|
1126
|
-
}
|
|
1127
|
-
if (input.toLowerCase() === "c") {
|
|
1128
|
-
dispatch({ type: "CLEAR_WARNINGS" });
|
|
1129
|
-
setRuntimeMessage(void 0);
|
|
1130
|
-
return;
|
|
903
|
+
}
|
|
904
|
+
function aggregateLatencyAverage(values) {
|
|
905
|
+
if (values.length === 0) {
|
|
906
|
+
return { ms: 0 };
|
|
907
|
+
}
|
|
908
|
+
const sum = values.reduce((s, v) => s + v.ms, 0);
|
|
909
|
+
return { ms: sum / values.length };
|
|
910
|
+
}
|
|
911
|
+
|
|
912
|
+
// src/evals/metrics/standard.ts
|
|
913
|
+
Metric.of({
|
|
914
|
+
id: "token-count",
|
|
915
|
+
name: "Tokens",
|
|
916
|
+
aggregate: aggregateTokenCountSum,
|
|
917
|
+
format: (data, options) => {
|
|
918
|
+
const input = data.input ?? 0;
|
|
919
|
+
const output = data.output ?? 0;
|
|
920
|
+
const inputCached = data.inputCached ?? 0;
|
|
921
|
+
const outputCached = data.outputCached ?? 0;
|
|
922
|
+
const cached = inputCached + outputCached;
|
|
923
|
+
const base = `in:${input} out:${output} cached:${cached}`;
|
|
924
|
+
return options?.isAggregated ? `Total: ${base}` : base;
|
|
925
|
+
}
|
|
926
|
+
});
|
|
927
|
+
Metric.of({
|
|
928
|
+
id: "latency",
|
|
929
|
+
name: "Latency",
|
|
930
|
+
aggregate: aggregateLatencyAverage,
|
|
931
|
+
format: (data, options) => options?.isAggregated ? `Avg: ${data.ms}ms` : `${data.ms}ms`
|
|
932
|
+
});
|
|
933
|
+
|
|
934
|
+
// src/evals/scores/standard.ts
|
|
935
|
+
Score.of({
|
|
936
|
+
id: "percent",
|
|
937
|
+
name: "Score",
|
|
938
|
+
displayStrategy: "bar",
|
|
939
|
+
format: (data, options) => options?.isAggregated ? `Avg: ${data.value.toFixed(2)}` : data.value.toFixed(2),
|
|
940
|
+
aggregate: aggregateAverage
|
|
941
|
+
});
|
|
942
|
+
Score.of({
|
|
943
|
+
id: "binary",
|
|
944
|
+
name: "Result",
|
|
945
|
+
displayStrategy: "passFail",
|
|
946
|
+
format: (data, options) => options?.isAggregated ? data.passed ? "All: PASSED" : "Some: FAILED" : data.passed ? "PASSED" : "NOT PASSED",
|
|
947
|
+
aggregate: aggregateAll
|
|
948
|
+
});
|
|
949
|
+
function createDiffLogEntry(expected, actual, options) {
|
|
950
|
+
const diff = diffString(expected, actual, { color: false });
|
|
951
|
+
return {
|
|
952
|
+
type: "diff",
|
|
953
|
+
label: options?.label,
|
|
954
|
+
expected,
|
|
955
|
+
actual,
|
|
956
|
+
diff: diff || "(no differences)"
|
|
957
|
+
};
|
|
958
|
+
}
|
|
959
|
+
function getDiffLines(entry) {
|
|
960
|
+
const raw = diffString(entry.expected, entry.actual, { color: false }) || "(no differences)";
|
|
961
|
+
return raw.split("\n").map((line) => {
|
|
962
|
+
const trimmed = line.trimStart();
|
|
963
|
+
if (trimmed.startsWith("-") && !trimmed.startsWith("---")) {
|
|
964
|
+
return { type: "remove", line };
|
|
1131
965
|
}
|
|
1132
|
-
if (
|
|
1133
|
-
|
|
1134
|
-
setRuntimeMessage("Runner unavailable: cannot start evaluation.");
|
|
1135
|
-
return;
|
|
1136
|
-
}
|
|
1137
|
-
if (!selectedDataset) {
|
|
1138
|
-
setRuntimeMessage("Select a dataset before starting a new evaluation.");
|
|
1139
|
-
return;
|
|
1140
|
-
}
|
|
1141
|
-
if (clampedState.selectedEvaluatorIds.length === 0) {
|
|
1142
|
-
setRuntimeMessage("Select at least one evaluator before starting.");
|
|
1143
|
-
return;
|
|
1144
|
-
}
|
|
1145
|
-
void runner.runDatasetWith({
|
|
1146
|
-
datasetId: selectedDataset.id,
|
|
1147
|
-
evaluatorIds: clampedState.selectedEvaluatorIds
|
|
1148
|
-
}).then((snapshot) => {
|
|
1149
|
-
setRuntimeMessage(
|
|
1150
|
-
`Started ${snapshot.runId} on ${selectedDataset.name} (${snapshot.totalTestCases} cases).`
|
|
1151
|
-
);
|
|
1152
|
-
}).catch((error) => {
|
|
1153
|
-
setRuntimeMessage(
|
|
1154
|
-
error instanceof Error ? error.message : "Failed to start evaluation."
|
|
1155
|
-
);
|
|
1156
|
-
});
|
|
966
|
+
if (trimmed.startsWith("+") && !trimmed.startsWith("+++")) {
|
|
967
|
+
return { type: "add", line };
|
|
1157
968
|
}
|
|
969
|
+
return { type: "context", line };
|
|
1158
970
|
});
|
|
1159
|
-
|
|
1160
|
-
|
|
1161
|
-
|
|
1162
|
-
|
|
1163
|
-
|
|
1164
|
-
|
|
1165
|
-
|
|
1166
|
-
|
|
1167
|
-
|
|
1168
|
-
|
|
1169
|
-
}
|
|
1170
|
-
if (clampedState.level === "datasets") {
|
|
1171
|
-
return /* @__PURE__ */ jsx(
|
|
1172
|
-
DatasetsView,
|
|
1173
|
-
{
|
|
1174
|
-
state: clampedState,
|
|
1175
|
-
filteredDatasets,
|
|
1176
|
-
selectedDataset
|
|
1177
|
-
}
|
|
1178
|
-
);
|
|
1179
|
-
}
|
|
1180
|
-
if (clampedState.level === "runs") {
|
|
1181
|
-
return /* @__PURE__ */ jsx(
|
|
1182
|
-
RunsView,
|
|
1183
|
-
{
|
|
1184
|
-
state: clampedState,
|
|
1185
|
-
dataset: selectedDataset,
|
|
1186
|
-
selectedRun
|
|
1187
|
-
}
|
|
1188
|
-
);
|
|
1189
|
-
}
|
|
1190
|
-
return /* @__PURE__ */ jsx(
|
|
1191
|
-
RunDetailsView,
|
|
1192
|
-
{
|
|
1193
|
-
state: clampedState,
|
|
1194
|
-
dataset: selectedDataset,
|
|
1195
|
-
selectedRun
|
|
971
|
+
}
|
|
972
|
+
|
|
973
|
+
// src/runner/score-utils.ts
|
|
974
|
+
function toNumericScoreFromScores(scores) {
|
|
975
|
+
for (const item of scores) {
|
|
976
|
+
const def = getScoreById(item.id);
|
|
977
|
+
if (def && def.displayStrategy === "bar" && typeof item.data === "object" && item.data !== null && "value" in item.data) {
|
|
978
|
+
const value = item.data.value;
|
|
979
|
+
if (typeof value === "number" && Number.isFinite(value)) {
|
|
980
|
+
return value;
|
|
1196
981
|
}
|
|
1197
|
-
);
|
|
1198
|
-
};
|
|
1199
|
-
return /* @__PURE__ */ jsxs(
|
|
1200
|
-
Box,
|
|
1201
|
-
{
|
|
1202
|
-
flexDirection: "column",
|
|
1203
|
-
flexGrow: 1,
|
|
1204
|
-
width: stdoutWidth,
|
|
1205
|
-
height: stdoutHeight,
|
|
1206
|
-
children: [
|
|
1207
|
-
/* @__PURE__ */ jsx(
|
|
1208
|
-
Box,
|
|
1209
|
-
{
|
|
1210
|
-
borderStyle: "round",
|
|
1211
|
-
borderColor: "cyan",
|
|
1212
|
-
paddingX: 1,
|
|
1213
|
-
width: stdoutWidth,
|
|
1214
|
-
children: /* @__PURE__ */ jsx(Text, { children: getBreadcrumbText(
|
|
1215
|
-
clampedState,
|
|
1216
|
-
selectedDataset?.name,
|
|
1217
|
-
selectedRun?.label
|
|
1218
|
-
) })
|
|
1219
|
-
}
|
|
1220
|
-
),
|
|
1221
|
-
clampedState.startupWarnings.length > 0 && /* @__PURE__ */ jsxs(
|
|
1222
|
-
Box,
|
|
1223
|
-
{
|
|
1224
|
-
marginTop: 1,
|
|
1225
|
-
borderStyle: "round",
|
|
1226
|
-
borderColor: "yellow",
|
|
1227
|
-
paddingX: 1,
|
|
1228
|
-
flexDirection: "column",
|
|
1229
|
-
width: stdoutWidth,
|
|
1230
|
-
children: [
|
|
1231
|
-
/* @__PURE__ */ jsx(Text, { color: "yellow", children: "Startup warnings:" }),
|
|
1232
|
-
clampedState.startupWarnings.map((warning, index) => /* @__PURE__ */ jsx(Text, { children: warning }, `${warning}-${index}`))
|
|
1233
|
-
]
|
|
1234
|
-
}
|
|
1235
|
-
),
|
|
1236
|
-
clampedState.searchMode && /* @__PURE__ */ jsxs(
|
|
1237
|
-
Box,
|
|
1238
|
-
{
|
|
1239
|
-
marginTop: 1,
|
|
1240
|
-
borderStyle: "round",
|
|
1241
|
-
borderColor: "magenta",
|
|
1242
|
-
paddingX: 1,
|
|
1243
|
-
width: stdoutWidth,
|
|
1244
|
-
children: [
|
|
1245
|
-
/* @__PURE__ */ jsx(Text, { color: "magenta", bold: true, children: "Search: " }),
|
|
1246
|
-
/* @__PURE__ */ jsx(Text, { color: "white", children: clampedState.searchQuery })
|
|
1247
|
-
]
|
|
1248
|
-
}
|
|
1249
|
-
),
|
|
1250
|
-
runtimeMessage && /* @__PURE__ */ jsx(
|
|
1251
|
-
Box,
|
|
1252
|
-
{
|
|
1253
|
-
marginTop: 1,
|
|
1254
|
-
borderStyle: "round",
|
|
1255
|
-
borderColor: "blue",
|
|
1256
|
-
paddingX: 1,
|
|
1257
|
-
width: stdoutWidth,
|
|
1258
|
-
children: /* @__PURE__ */ jsx(Text, { color: "blue", children: runtimeMessage })
|
|
1259
|
-
}
|
|
1260
|
-
),
|
|
1261
|
-
/* @__PURE__ */ jsx(
|
|
1262
|
-
Box,
|
|
1263
|
-
{
|
|
1264
|
-
marginTop: 1,
|
|
1265
|
-
flexGrow: 1,
|
|
1266
|
-
width: stdoutWidth,
|
|
1267
|
-
flexDirection: "row",
|
|
1268
|
-
children: renderContent()
|
|
1269
|
-
}
|
|
1270
|
-
),
|
|
1271
|
-
/* @__PURE__ */ jsx(Box, { marginTop: 1, paddingX: 1, children: /* @__PURE__ */ jsx(Text, { color: "gray", children: getFooterText(clampedState) }) })
|
|
1272
|
-
]
|
|
1273
982
|
}
|
|
983
|
+
const numeric = toNumericScore(item.data);
|
|
984
|
+
if (numeric !== void 0) {
|
|
985
|
+
return numeric;
|
|
986
|
+
}
|
|
987
|
+
}
|
|
988
|
+
return void 0;
|
|
989
|
+
}
|
|
990
|
+
function toNumericScore(value) {
|
|
991
|
+
if (typeof value === "number" && Number.isFinite(value)) {
|
|
992
|
+
return value;
|
|
993
|
+
}
|
|
994
|
+
if (typeof value !== "object" || value === null) {
|
|
995
|
+
return void 0;
|
|
996
|
+
}
|
|
997
|
+
const obj = value;
|
|
998
|
+
if ("score" in obj && typeof obj.score === "number" && Number.isFinite(obj.score)) {
|
|
999
|
+
return obj.score;
|
|
1000
|
+
}
|
|
1001
|
+
const numberValues = Object.values(value).filter(
|
|
1002
|
+
(entry) => typeof entry === "number" && Number.isFinite(entry)
|
|
1274
1003
|
);
|
|
1004
|
+
if (numberValues.length === 0) {
|
|
1005
|
+
return void 0;
|
|
1006
|
+
}
|
|
1007
|
+
return numberValues.reduce((sum, entry) => sum + entry, 0) / numberValues.length;
|
|
1275
1008
|
}
|
|
1276
1009
|
|
|
1277
1010
|
// src/runner/config.ts
|
|
@@ -1293,7 +1026,8 @@ var defaultRunnerConfig = {
|
|
|
1293
1026
|
],
|
|
1294
1027
|
excludeDirectories: ["node_modules", "dist", ".next", ".git", ".pnpm-store"]
|
|
1295
1028
|
},
|
|
1296
|
-
artifactDirectory: ".eval-results"
|
|
1029
|
+
artifactDirectory: ".eval-results",
|
|
1030
|
+
maxConcurrency: 1
|
|
1297
1031
|
};
|
|
1298
1032
|
function toRunnerConfigOverrides(config) {
|
|
1299
1033
|
if (!config) {
|
|
@@ -1326,6 +1060,9 @@ function toRunnerConfigOverrides(config) {
|
|
|
1326
1060
|
if (config.artifactDirectory !== void 0) {
|
|
1327
1061
|
overrides.artifactDirectory = config.artifactDirectory;
|
|
1328
1062
|
}
|
|
1063
|
+
if (config.maxConcurrency !== void 0) {
|
|
1064
|
+
overrides.maxConcurrency = config.maxConcurrency;
|
|
1065
|
+
}
|
|
1329
1066
|
if (Object.keys(discovery).length > 0) {
|
|
1330
1067
|
overrides.discovery = discovery;
|
|
1331
1068
|
}
|
|
@@ -1519,129 +1256,6 @@ async function collectTestCasesFromFiles(config) {
|
|
|
1519
1256
|
);
|
|
1520
1257
|
return found.flat();
|
|
1521
1258
|
}
|
|
1522
|
-
function createDiffLogEntry(expected, actual, options) {
|
|
1523
|
-
const diff = diffString(expected, actual, { color: false });
|
|
1524
|
-
return {
|
|
1525
|
-
type: "diff",
|
|
1526
|
-
label: options?.label,
|
|
1527
|
-
expected,
|
|
1528
|
-
actual,
|
|
1529
|
-
diff: diff || "(no differences)"
|
|
1530
|
-
};
|
|
1531
|
-
}
|
|
1532
|
-
|
|
1533
|
-
// src/evals/metric.ts
|
|
1534
|
-
var registry = /* @__PURE__ */ new Map();
|
|
1535
|
-
var Metric = {
|
|
1536
|
-
of(config) {
|
|
1537
|
-
const def = {
|
|
1538
|
-
id: config.id,
|
|
1539
|
-
name: config.name,
|
|
1540
|
-
format: config.format,
|
|
1541
|
-
make: (data) => ({ id: config.id, data })
|
|
1542
|
-
};
|
|
1543
|
-
registry.set(config.id, def);
|
|
1544
|
-
return def;
|
|
1545
|
-
}
|
|
1546
|
-
};
|
|
1547
|
-
|
|
1548
|
-
// src/evals/score.ts
|
|
1549
|
-
var registry2 = /* @__PURE__ */ new Map();
|
|
1550
|
-
var Score = {
|
|
1551
|
-
of(config) {
|
|
1552
|
-
const def = {
|
|
1553
|
-
id: config.id,
|
|
1554
|
-
name: config.name,
|
|
1555
|
-
displayStrategy: config.displayStrategy,
|
|
1556
|
-
format: config.format,
|
|
1557
|
-
make: (data, options) => {
|
|
1558
|
-
const passed = options?.definePassed !== void 0 ? options.definePassed(data) : void 0;
|
|
1559
|
-
return {
|
|
1560
|
-
id: config.id,
|
|
1561
|
-
data,
|
|
1562
|
-
...passed !== void 0 && { passed }
|
|
1563
|
-
};
|
|
1564
|
-
}
|
|
1565
|
-
};
|
|
1566
|
-
registry2.set(config.id, def);
|
|
1567
|
-
return def;
|
|
1568
|
-
}
|
|
1569
|
-
};
|
|
1570
|
-
function getScoreById(id) {
|
|
1571
|
-
return registry2.get(id);
|
|
1572
|
-
}
|
|
1573
|
-
|
|
1574
|
-
// src/evals/metrics/standard.ts
|
|
1575
|
-
Metric.of({
|
|
1576
|
-
id: "token-count",
|
|
1577
|
-
name: "Tokens",
|
|
1578
|
-
format: (data) => {
|
|
1579
|
-
const input = data.input ?? 0;
|
|
1580
|
-
const output = data.output ?? 0;
|
|
1581
|
-
const inputCached = data.inputCached ?? 0;
|
|
1582
|
-
const outputCached = data.outputCached ?? 0;
|
|
1583
|
-
const cached = inputCached + outputCached;
|
|
1584
|
-
return `in:${input} out:${output} cached:${cached}`;
|
|
1585
|
-
}
|
|
1586
|
-
});
|
|
1587
|
-
Metric.of({
|
|
1588
|
-
id: "latency",
|
|
1589
|
-
name: "Latency",
|
|
1590
|
-
format: (data) => `${data.ms}ms`
|
|
1591
|
-
});
|
|
1592
|
-
|
|
1593
|
-
// src/evals/scores/standard.ts
|
|
1594
|
-
Score.of({
|
|
1595
|
-
id: "percent",
|
|
1596
|
-
name: "Score",
|
|
1597
|
-
displayStrategy: "bar",
|
|
1598
|
-
format: (data) => data.value.toFixed(2)
|
|
1599
|
-
});
|
|
1600
|
-
Score.of({
|
|
1601
|
-
id: "binary",
|
|
1602
|
-
name: "Result",
|
|
1603
|
-
displayStrategy: "passFail",
|
|
1604
|
-
format: (data) => data.passed ? "PASSED" : "NOT PASSED"
|
|
1605
|
-
});
|
|
1606
|
-
|
|
1607
|
-
// src/runner/score-utils.ts
|
|
1608
|
-
function toNumericScoreFromScores(scores) {
|
|
1609
|
-
for (const item of scores) {
|
|
1610
|
-
const def = getScoreById(item.id);
|
|
1611
|
-
if (def && def.displayStrategy === "bar" && typeof item.data === "object" && item.data !== null && "value" in item.data) {
|
|
1612
|
-
const value = item.data.value;
|
|
1613
|
-
if (typeof value === "number" && Number.isFinite(value)) {
|
|
1614
|
-
return value;
|
|
1615
|
-
}
|
|
1616
|
-
}
|
|
1617
|
-
const numeric = toNumericScore(item.data);
|
|
1618
|
-
if (numeric !== void 0) {
|
|
1619
|
-
return numeric;
|
|
1620
|
-
}
|
|
1621
|
-
}
|
|
1622
|
-
return void 0;
|
|
1623
|
-
}
|
|
1624
|
-
function toNumericScore(value) {
|
|
1625
|
-
if (typeof value === "number" && Number.isFinite(value)) {
|
|
1626
|
-
return value;
|
|
1627
|
-
}
|
|
1628
|
-
if (typeof value !== "object" || value === null) {
|
|
1629
|
-
return void 0;
|
|
1630
|
-
}
|
|
1631
|
-
const obj = value;
|
|
1632
|
-
if ("score" in obj && typeof obj.score === "number" && Number.isFinite(obj.score)) {
|
|
1633
|
-
return obj.score;
|
|
1634
|
-
}
|
|
1635
|
-
const numberValues = Object.values(value).filter(
|
|
1636
|
-
(entry) => typeof entry === "number" && Number.isFinite(entry)
|
|
1637
|
-
);
|
|
1638
|
-
if (numberValues.length === 0) {
|
|
1639
|
-
return void 0;
|
|
1640
|
-
}
|
|
1641
|
-
return numberValues.reduce((sum, entry) => sum + entry, 0) / numberValues.length;
|
|
1642
|
-
}
|
|
1643
|
-
|
|
1644
|
-
// src/runner/execution.ts
|
|
1645
1259
|
function computeEvaluatorPassed(evaluator, result, scores) {
|
|
1646
1260
|
const scoresWithPassed = scores.filter((s) => "passed" in s && s.passed !== void 0);
|
|
1647
1261
|
if (scoresWithPassed.length > 0) {
|
|
@@ -1683,6 +1297,105 @@ function createArtifactPath(artifactDirectory, datasetId, runId) {
|
|
|
1683
1297
|
`${datasetId}_${runId}_${nowIsoForFile()}.jsonl`
|
|
1684
1298
|
);
|
|
1685
1299
|
}
|
|
1300
|
+
function processOneTestCase(task, testCaseItem, totalEvaluations, publishEvent, persistenceQueue, updateSnapshot, completedRef, passedRef, failedRef) {
|
|
1301
|
+
return Effect.gen(function* () {
|
|
1302
|
+
const reruns = typeof testCaseItem.testCase.getReruns === "function" ? testCaseItem.testCase.getReruns() : 1;
|
|
1303
|
+
const rerunPassed = [];
|
|
1304
|
+
for (let r = 0; r < reruns; r++) {
|
|
1305
|
+
const started = Date.now();
|
|
1306
|
+
const evaluatorScores = [];
|
|
1307
|
+
let testCaseError;
|
|
1308
|
+
const output = readOutput(testCaseItem.testCase);
|
|
1309
|
+
for (const { id: evaluatorId, evaluator } of task.evaluators) {
|
|
1310
|
+
const evaluateFn = evaluator.getEvaluateFn();
|
|
1311
|
+
if (!evaluateFn) {
|
|
1312
|
+
continue;
|
|
1313
|
+
}
|
|
1314
|
+
try {
|
|
1315
|
+
const logs = [];
|
|
1316
|
+
const logDiff = (expected, actual, options) => {
|
|
1317
|
+
logs.push(createDiffLogEntry(expected, actual, options));
|
|
1318
|
+
};
|
|
1319
|
+
const ctx = yield* Effect.promise(
|
|
1320
|
+
() => Promise.resolve(evaluator.resolveContext())
|
|
1321
|
+
);
|
|
1322
|
+
const result = yield* Effect.promise(
|
|
1323
|
+
() => Promise.resolve(
|
|
1324
|
+
evaluateFn({
|
|
1325
|
+
input: testCaseItem.testCase.getInput(),
|
|
1326
|
+
ctx,
|
|
1327
|
+
output,
|
|
1328
|
+
logDiff
|
|
1329
|
+
})
|
|
1330
|
+
)
|
|
1331
|
+
);
|
|
1332
|
+
const { scores, metrics } = normalizeResult(result);
|
|
1333
|
+
const passed2 = computeEvaluatorPassed(evaluator, result, scores);
|
|
1334
|
+
evaluatorScores.push({
|
|
1335
|
+
evaluatorId,
|
|
1336
|
+
scores,
|
|
1337
|
+
passed: passed2,
|
|
1338
|
+
metrics,
|
|
1339
|
+
logs: logs.length > 0 ? logs : void 0
|
|
1340
|
+
});
|
|
1341
|
+
} catch (error) {
|
|
1342
|
+
testCaseError = error instanceof Error ? error.message : "Evaluator execution failed";
|
|
1343
|
+
evaluatorScores.push({
|
|
1344
|
+
evaluatorId,
|
|
1345
|
+
scores: [],
|
|
1346
|
+
passed: false
|
|
1347
|
+
});
|
|
1348
|
+
}
|
|
1349
|
+
}
|
|
1350
|
+
const rerunPassedThis = evaluatorScores.every((s) => s.passed);
|
|
1351
|
+
rerunPassed.push(rerunPassedThis);
|
|
1352
|
+
const completedEvaluations = yield* Ref.modify(completedRef, (n) => [
|
|
1353
|
+
n + 1,
|
|
1354
|
+
n + 1
|
|
1355
|
+
]);
|
|
1356
|
+
const progressEvent = {
|
|
1357
|
+
type: "TestCaseProgress",
|
|
1358
|
+
runId: task.runId,
|
|
1359
|
+
testCaseId: testCaseItem.id,
|
|
1360
|
+
testCaseName: testCaseItem.testCase.getName(),
|
|
1361
|
+
completedTestCases: completedEvaluations,
|
|
1362
|
+
totalTestCases: totalEvaluations,
|
|
1363
|
+
rerunIndex: r + 1,
|
|
1364
|
+
rerunTotal: reruns,
|
|
1365
|
+
passed: rerunPassedThis,
|
|
1366
|
+
durationMs: Date.now() - started,
|
|
1367
|
+
evaluatorScores,
|
|
1368
|
+
output,
|
|
1369
|
+
errorMessage: testCaseError
|
|
1370
|
+
};
|
|
1371
|
+
updateSnapshot(task.runId, (snapshot) => ({
|
|
1372
|
+
...snapshot,
|
|
1373
|
+
completedTestCases: completedEvaluations
|
|
1374
|
+
}));
|
|
1375
|
+
yield* publishEvent(progressEvent);
|
|
1376
|
+
yield* Queue.offer(persistenceQueue, {
|
|
1377
|
+
runId: task.runId,
|
|
1378
|
+
artifactPath: task.snapshot.artifactPath,
|
|
1379
|
+
payload: progressEvent
|
|
1380
|
+
});
|
|
1381
|
+
}
|
|
1382
|
+
const testCasePassed = rerunPassed.every(Boolean);
|
|
1383
|
+
if (testCasePassed) {
|
|
1384
|
+
yield* Ref.update(passedRef, (n) => n + 1);
|
|
1385
|
+
} else {
|
|
1386
|
+
yield* Ref.update(failedRef, (n) => n + 1);
|
|
1387
|
+
}
|
|
1388
|
+
const [passed, failed] = yield* Effect.all([
|
|
1389
|
+
Ref.get(passedRef),
|
|
1390
|
+
Ref.get(failedRef)
|
|
1391
|
+
]);
|
|
1392
|
+
updateSnapshot(task.runId, (snapshot) => ({
|
|
1393
|
+
...snapshot,
|
|
1394
|
+
passedTestCases: passed,
|
|
1395
|
+
failedTestCases: failed
|
|
1396
|
+
}));
|
|
1397
|
+
});
|
|
1398
|
+
}
|
|
1686
1399
|
var executeRunTask = (task, publishEvent, persistenceQueue, updateSnapshot) => Effect.gen(function* () {
|
|
1687
1400
|
const startedAt = Date.now();
|
|
1688
1401
|
updateSnapshot(task.runId, (snapshot) => ({
|
|
@@ -1695,118 +1408,215 @@ var executeRunTask = (task, publishEvent, persistenceQueue, updateSnapshot) => E
|
|
|
1695
1408
|
runId: task.runId,
|
|
1696
1409
|
startedAt
|
|
1697
1410
|
});
|
|
1698
|
-
|
|
1699
|
-
|
|
1700
|
-
|
|
1701
|
-
|
|
1702
|
-
|
|
1703
|
-
|
|
1704
|
-
|
|
1705
|
-
|
|
1706
|
-
|
|
1707
|
-
|
|
1708
|
-
|
|
1709
|
-
|
|
1411
|
+
const totalEvaluations = task.testCases.reduce(
|
|
1412
|
+
(sum, tc) => sum + (typeof tc.testCase.getReruns === "function" ? tc.testCase.getReruns() : 1),
|
|
1413
|
+
0
|
|
1414
|
+
);
|
|
1415
|
+
const maxConcurrency = Math.max(1, task.maxConcurrency ?? 1);
|
|
1416
|
+
const completedRef = yield* Ref.make(0);
|
|
1417
|
+
const passedRef = yield* Ref.make(0);
|
|
1418
|
+
const failedRef = yield* Ref.make(0);
|
|
1419
|
+
const processTestCase = (testCaseItem) => processOneTestCase(
|
|
1420
|
+
task,
|
|
1421
|
+
testCaseItem,
|
|
1422
|
+
totalEvaluations,
|
|
1423
|
+
publishEvent,
|
|
1424
|
+
persistenceQueue,
|
|
1425
|
+
updateSnapshot,
|
|
1426
|
+
completedRef,
|
|
1427
|
+
passedRef,
|
|
1428
|
+
failedRef
|
|
1429
|
+
);
|
|
1430
|
+
yield* Effect.forEach(
|
|
1431
|
+
task.testCases,
|
|
1432
|
+
processTestCase,
|
|
1433
|
+
maxConcurrency > 1 ? { concurrency: maxConcurrency } : void 0
|
|
1434
|
+
);
|
|
1435
|
+
const [completedEvaluations, passedUniqueTestCases, failedUniqueTestCases] = yield* Effect.all([
|
|
1436
|
+
Ref.get(completedRef),
|
|
1437
|
+
Ref.get(passedRef),
|
|
1438
|
+
Ref.get(failedRef)
|
|
1439
|
+
]);
|
|
1440
|
+
const finishedAt = Date.now();
|
|
1441
|
+
const completedEvent = {
|
|
1442
|
+
type: "RunCompleted",
|
|
1443
|
+
runId: task.runId,
|
|
1444
|
+
finishedAt,
|
|
1445
|
+
passedTestCases: passedUniqueTestCases,
|
|
1446
|
+
failedTestCases: failedUniqueTestCases,
|
|
1447
|
+
totalTestCases: task.testCases.length,
|
|
1448
|
+
artifactPath: task.snapshot.artifactPath
|
|
1449
|
+
};
|
|
1450
|
+
updateSnapshot(task.runId, (snapshot) => ({
|
|
1451
|
+
...snapshot,
|
|
1452
|
+
status: "completed",
|
|
1453
|
+
completedTestCases: completedEvaluations,
|
|
1454
|
+
passedTestCases: passedUniqueTestCases,
|
|
1455
|
+
failedTestCases: failedUniqueTestCases,
|
|
1456
|
+
finishedAt
|
|
1457
|
+
}));
|
|
1458
|
+
yield* publishEvent(completedEvent);
|
|
1459
|
+
yield* Queue.offer(persistenceQueue, {
|
|
1460
|
+
runId: task.runId,
|
|
1461
|
+
artifactPath: task.snapshot.artifactPath,
|
|
1462
|
+
payload: completedEvent
|
|
1463
|
+
});
|
|
1464
|
+
yield* publishEvent({
|
|
1465
|
+
type: "ArtifactFlushed",
|
|
1466
|
+
runId: task.runId,
|
|
1467
|
+
artifactPath: task.snapshot.artifactPath
|
|
1468
|
+
});
|
|
1469
|
+
});
|
|
1470
|
+
async function loadRunSnapshotsFromArtifacts(config) {
|
|
1471
|
+
const baseDir = resolve(config.artifactDirectory);
|
|
1472
|
+
let entries;
|
|
1473
|
+
try {
|
|
1474
|
+
entries = await readdir(baseDir);
|
|
1475
|
+
} catch {
|
|
1476
|
+
return [];
|
|
1477
|
+
}
|
|
1478
|
+
const jsonlFiles = entries.filter((name) => name.endsWith(".jsonl"));
|
|
1479
|
+
const snapshots = [];
|
|
1480
|
+
for (const fileName of jsonlFiles) {
|
|
1481
|
+
const filePath = join(baseDir, fileName);
|
|
1482
|
+
try {
|
|
1483
|
+
const snapshot = await parseArtifactToSnapshot(filePath, config);
|
|
1484
|
+
if (snapshot) {
|
|
1485
|
+
snapshots.push(snapshot);
|
|
1710
1486
|
}
|
|
1711
|
-
|
|
1712
|
-
|
|
1713
|
-
|
|
1714
|
-
|
|
1487
|
+
} catch {
|
|
1488
|
+
}
|
|
1489
|
+
}
|
|
1490
|
+
return snapshots.sort((a, b) => b.queuedAt - a.queuedAt);
|
|
1491
|
+
}
|
|
1492
|
+
async function parseArtifactToSnapshot(filePath, _config) {
|
|
1493
|
+
const content = await readFile(filePath, "utf8");
|
|
1494
|
+
const lines = content.split("\n").filter((line) => line.trim().length > 0);
|
|
1495
|
+
if (lines.length === 0) {
|
|
1496
|
+
return null;
|
|
1497
|
+
}
|
|
1498
|
+
let runQueued = null;
|
|
1499
|
+
let runCompleted = null;
|
|
1500
|
+
let runFailed = null;
|
|
1501
|
+
let runStarted = null;
|
|
1502
|
+
for (const line of lines) {
|
|
1503
|
+
try {
|
|
1504
|
+
const event = JSON.parse(line);
|
|
1505
|
+
const type = event.type;
|
|
1506
|
+
if (type === "RunQueued") {
|
|
1507
|
+
runQueued = {
|
|
1508
|
+
runId: event.runId,
|
|
1509
|
+
datasetId: event.datasetId,
|
|
1510
|
+
datasetName: event.datasetName,
|
|
1511
|
+
evaluatorIds: event.evaluatorIds,
|
|
1512
|
+
totalTestCases: event.totalTestCases ?? 0,
|
|
1513
|
+
artifactPath: event.artifactPath ?? filePath,
|
|
1514
|
+
ts: event.ts
|
|
1515
|
+
};
|
|
1516
|
+
}
|
|
1517
|
+
if (type === "RunStarted") {
|
|
1518
|
+
runStarted = { startedAt: event.startedAt };
|
|
1519
|
+
}
|
|
1520
|
+
if (type === "RunCompleted") {
|
|
1521
|
+
runCompleted = {
|
|
1522
|
+
passedTestCases: event.passedTestCases,
|
|
1523
|
+
failedTestCases: event.failedTestCases,
|
|
1524
|
+
totalTestCases: event.totalTestCases,
|
|
1525
|
+
finishedAt: event.finishedAt
|
|
1715
1526
|
};
|
|
1716
|
-
const ctx = yield* Effect.promise(
|
|
1717
|
-
() => Promise.resolve(evaluator.resolveContext())
|
|
1718
|
-
);
|
|
1719
|
-
const result = yield* Effect.promise(
|
|
1720
|
-
() => Promise.resolve(
|
|
1721
|
-
evaluateFn({
|
|
1722
|
-
input: testCaseItem.testCase.getInput(),
|
|
1723
|
-
ctx,
|
|
1724
|
-
output,
|
|
1725
|
-
logDiff
|
|
1726
|
-
})
|
|
1727
|
-
)
|
|
1728
|
-
);
|
|
1729
|
-
const { scores, metrics } = normalizeResult(result);
|
|
1730
|
-
const passed = computeEvaluatorPassed(evaluator, result, scores);
|
|
1731
|
-
evaluatorScores.push({
|
|
1732
|
-
evaluatorId,
|
|
1733
|
-
scores,
|
|
1734
|
-
passed,
|
|
1735
|
-
metrics,
|
|
1736
|
-
logs: logs.length > 0 ? logs : void 0
|
|
1737
|
-
});
|
|
1738
|
-
} catch (error) {
|
|
1739
|
-
testCaseError = error instanceof Error ? error.message : "Evaluator execution failed";
|
|
1740
|
-
evaluatorScores.push({
|
|
1741
|
-
evaluatorId,
|
|
1742
|
-
scores: [],
|
|
1743
|
-
passed: false
|
|
1744
|
-
});
|
|
1745
1527
|
}
|
|
1528
|
+
if (type === "RunFailed") {
|
|
1529
|
+
runFailed = {
|
|
1530
|
+
finishedAt: event.finishedAt,
|
|
1531
|
+
errorMessage: event.errorMessage
|
|
1532
|
+
};
|
|
1533
|
+
}
|
|
1534
|
+
} catch {
|
|
1746
1535
|
}
|
|
1747
|
-
|
|
1748
|
-
|
|
1749
|
-
|
|
1536
|
+
}
|
|
1537
|
+
if (!runQueued) {
|
|
1538
|
+
return null;
|
|
1539
|
+
}
|
|
1540
|
+
const artifactPath = filePath;
|
|
1541
|
+
const status = runFailed ? "failed" : runCompleted ? "completed" : runStarted ? "running" : "queued";
|
|
1542
|
+
const progress = aggregateTestCaseProgress(lines);
|
|
1543
|
+
const completedTestCases = runCompleted ? runQueued.totalTestCases : progress.completedTestCases;
|
|
1544
|
+
const passedTestCases = runCompleted?.passedTestCases ?? progress.passedTestCases;
|
|
1545
|
+
const failedTestCases = runCompleted?.failedTestCases ?? progress.failedTestCases;
|
|
1546
|
+
return {
|
|
1547
|
+
runId: runQueued.runId,
|
|
1548
|
+
datasetId: runQueued.datasetId,
|
|
1549
|
+
datasetName: runQueued.datasetName,
|
|
1550
|
+
evaluatorIds: runQueued.evaluatorIds,
|
|
1551
|
+
queuedAt: runQueued.ts ?? 0,
|
|
1552
|
+
startedAt: runStarted?.startedAt,
|
|
1553
|
+
finishedAt: runCompleted?.finishedAt ?? runFailed?.finishedAt,
|
|
1554
|
+
totalTestCases: runQueued.totalTestCases,
|
|
1555
|
+
completedTestCases,
|
|
1556
|
+
passedTestCases,
|
|
1557
|
+
failedTestCases,
|
|
1558
|
+
status,
|
|
1559
|
+
artifactPath,
|
|
1560
|
+
errorMessage: runFailed?.errorMessage
|
|
1561
|
+
};
|
|
1562
|
+
}
|
|
1563
|
+
function aggregateTestCaseProgress(lines) {
|
|
1564
|
+
let completedTestCases = 0;
|
|
1565
|
+
const testCasePassedBy = /* @__PURE__ */ new Map();
|
|
1566
|
+
for (const line of lines) {
|
|
1567
|
+
try {
|
|
1568
|
+
const event = JSON.parse(line);
|
|
1569
|
+
if (event.type === "TestCaseProgress") {
|
|
1570
|
+
const ev = event;
|
|
1571
|
+
completedTestCases = ev.completedTestCases ?? completedTestCases;
|
|
1572
|
+
const id = ev.testCaseId;
|
|
1573
|
+
const current = testCasePassedBy.get(id);
|
|
1574
|
+
testCasePassedBy.set(id, current === void 0 ? ev.passed : current && ev.passed);
|
|
1575
|
+
}
|
|
1576
|
+
} catch {
|
|
1577
|
+
}
|
|
1578
|
+
}
|
|
1579
|
+
let passedTestCases = 0;
|
|
1580
|
+
let failedTestCases = 0;
|
|
1581
|
+
for (const passed of testCasePassedBy.values()) {
|
|
1582
|
+
if (passed) {
|
|
1750
1583
|
passedTestCases += 1;
|
|
1751
1584
|
} else {
|
|
1752
1585
|
failedTestCases += 1;
|
|
1753
1586
|
}
|
|
1754
|
-
const progressEvent = {
|
|
1755
|
-
type: "TestCaseProgress",
|
|
1756
|
-
runId: task.runId,
|
|
1757
|
-
testCaseId: testCaseItem.id,
|
|
1758
|
-
testCaseName: testCaseItem.testCase.getName(),
|
|
1759
|
-
completedTestCases,
|
|
1760
|
-
totalTestCases: task.testCases.length,
|
|
1761
|
-
passed: testCasePassed,
|
|
1762
|
-
durationMs: Date.now() - started,
|
|
1763
|
-
evaluatorScores,
|
|
1764
|
-
output,
|
|
1765
|
-
errorMessage: testCaseError
|
|
1766
|
-
};
|
|
1767
|
-
updateSnapshot(task.runId, (snapshot) => ({
|
|
1768
|
-
...snapshot,
|
|
1769
|
-
completedTestCases,
|
|
1770
|
-
passedTestCases,
|
|
1771
|
-
failedTestCases
|
|
1772
|
-
}));
|
|
1773
|
-
yield* publishEvent(progressEvent);
|
|
1774
|
-
yield* Queue.offer(persistenceQueue, {
|
|
1775
|
-
runId: task.runId,
|
|
1776
|
-
artifactPath: task.snapshot.artifactPath,
|
|
1777
|
-
payload: progressEvent
|
|
1778
|
-
});
|
|
1779
1587
|
}
|
|
1780
|
-
|
|
1781
|
-
|
|
1782
|
-
|
|
1783
|
-
|
|
1784
|
-
|
|
1785
|
-
|
|
1786
|
-
|
|
1787
|
-
|
|
1788
|
-
|
|
1789
|
-
|
|
1790
|
-
|
|
1791
|
-
|
|
1792
|
-
|
|
1793
|
-
|
|
1794
|
-
|
|
1795
|
-
|
|
1796
|
-
|
|
1797
|
-
|
|
1798
|
-
|
|
1799
|
-
|
|
1800
|
-
|
|
1801
|
-
|
|
1802
|
-
|
|
1803
|
-
|
|
1804
|
-
|
|
1805
|
-
|
|
1806
|
-
|
|
1807
|
-
|
|
1808
|
-
}
|
|
1809
|
-
|
|
1588
|
+
return { completedTestCases, passedTestCases, failedTestCases };
|
|
1589
|
+
}
|
|
1590
|
+
async function parseArtifactFile(artifactPath) {
|
|
1591
|
+
try {
|
|
1592
|
+
const content = await readFile(artifactPath, "utf8");
|
|
1593
|
+
const lines = content.split("\n").filter((line) => line.trim().length > 0);
|
|
1594
|
+
const results = [];
|
|
1595
|
+
for (const line of lines) {
|
|
1596
|
+
try {
|
|
1597
|
+
const event = JSON.parse(line);
|
|
1598
|
+
if (event.type === "TestCaseProgress") {
|
|
1599
|
+
const ev = event;
|
|
1600
|
+
results.push({
|
|
1601
|
+
testCaseId: ev.testCaseId,
|
|
1602
|
+
testCaseName: ev.testCaseName,
|
|
1603
|
+
completedTestCases: ev.completedTestCases,
|
|
1604
|
+
totalTestCases: ev.totalTestCases,
|
|
1605
|
+
rerunIndex: ev.rerunIndex,
|
|
1606
|
+
rerunTotal: ev.rerunTotal,
|
|
1607
|
+
passed: ev.passed,
|
|
1608
|
+
durationMs: ev.durationMs,
|
|
1609
|
+
evaluatorScores: ev.evaluatorScores ?? []
|
|
1610
|
+
});
|
|
1611
|
+
}
|
|
1612
|
+
} catch {
|
|
1613
|
+
}
|
|
1614
|
+
}
|
|
1615
|
+
return results;
|
|
1616
|
+
} catch {
|
|
1617
|
+
return [];
|
|
1618
|
+
}
|
|
1619
|
+
}
|
|
1810
1620
|
async function appendJsonLine(artifactPath, payload) {
|
|
1811
1621
|
await mkdir(dirname(artifactPath), { recursive: true });
|
|
1812
1622
|
await appendFile(artifactPath, `${JSON.stringify(payload)}
|
|
@@ -1822,291 +1632,873 @@ var createPersistenceWorker = (queue) => Effect.forever(
|
|
|
1822
1632
|
...message.payload
|
|
1823
1633
|
})
|
|
1824
1634
|
);
|
|
1825
|
-
})
|
|
1826
|
-
);
|
|
1827
|
-
|
|
1828
|
-
// src/runner/search.ts
|
|
1829
|
-
function matchesAny(value, matchers) {
|
|
1830
|
-
if (!matchers || matchers.length === 0) {
|
|
1831
|
-
return true;
|
|
1635
|
+
})
|
|
1636
|
+
);
|
|
1637
|
+
|
|
1638
|
+
// src/runner/search.ts
|
|
1639
|
+
function matchesAny(value, matchers) {
|
|
1640
|
+
if (!matchers || matchers.length === 0) {
|
|
1641
|
+
return true;
|
|
1642
|
+
}
|
|
1643
|
+
return matchers.some(
|
|
1644
|
+
(matcher) => typeof matcher === "string" ? matcher === value : matcher.test(value)
|
|
1645
|
+
);
|
|
1646
|
+
}
|
|
1647
|
+
function matchesPath(value, matchers) {
|
|
1648
|
+
if (!matchers || matchers.length === 0) {
|
|
1649
|
+
return true;
|
|
1650
|
+
}
|
|
1651
|
+
return matchers.some((matcher) => {
|
|
1652
|
+
if (typeof matcher === "string") {
|
|
1653
|
+
return value.includes(matcher);
|
|
1654
|
+
}
|
|
1655
|
+
return matcher.test(value);
|
|
1656
|
+
});
|
|
1657
|
+
}
|
|
1658
|
+
function searchCollectedTestCases(all, query) {
|
|
1659
|
+
if (!query) {
|
|
1660
|
+
return all;
|
|
1661
|
+
}
|
|
1662
|
+
return all.filter((item) => {
|
|
1663
|
+
const tags = item.testCase.getTags();
|
|
1664
|
+
if (query.excludedTags && tags.some((tag) => matchesAny(tag, query.excludedTags))) {
|
|
1665
|
+
return false;
|
|
1666
|
+
}
|
|
1667
|
+
if (query.excludedPaths && matchesPath(item.filePath, query.excludedPaths)) {
|
|
1668
|
+
return false;
|
|
1669
|
+
}
|
|
1670
|
+
const includedTagsMatch = !query.includedTags || query.includedTags.length === 0 || tags.some((tag) => matchesAny(tag, query.includedTags));
|
|
1671
|
+
const includedPathsMatch = !query.includedPaths || query.includedPaths.length === 0 || matchesPath(item.filePath, query.includedPaths);
|
|
1672
|
+
return includedTagsMatch && includedPathsMatch;
|
|
1673
|
+
});
|
|
1674
|
+
}
|
|
1675
|
+
|
|
1676
|
+
// src/runner/api.ts
|
|
1677
|
+
function parseRegexLiteral(pattern) {
|
|
1678
|
+
if (!pattern.startsWith("/")) {
|
|
1679
|
+
return void 0;
|
|
1680
|
+
}
|
|
1681
|
+
const lastSlash = pattern.lastIndexOf("/");
|
|
1682
|
+
if (lastSlash <= 0) {
|
|
1683
|
+
return void 0;
|
|
1684
|
+
}
|
|
1685
|
+
return {
|
|
1686
|
+
source: pattern.slice(1, lastSlash),
|
|
1687
|
+
flags: pattern.slice(lastSlash + 1)
|
|
1688
|
+
};
|
|
1689
|
+
}
|
|
1690
|
+
function createNameMatcher(pattern) {
|
|
1691
|
+
const normalizedPattern = pattern.trim();
|
|
1692
|
+
const regexLiteral = parseRegexLiteral(normalizedPattern);
|
|
1693
|
+
if (regexLiteral) {
|
|
1694
|
+
const regex = new RegExp(regexLiteral.source, regexLiteral.flags);
|
|
1695
|
+
return (value) => regex.test(value);
|
|
1696
|
+
}
|
|
1697
|
+
if (normalizedPattern.includes("*")) {
|
|
1698
|
+
const escaped = normalizedPattern.replace(/[.+^${}()|[\]\\]/g, "\\$&").replace(/\*/g, ".*");
|
|
1699
|
+
const regex = new RegExp(`^${escaped}$`, "i");
|
|
1700
|
+
return (value) => regex.test(value);
|
|
1701
|
+
}
|
|
1702
|
+
return (value) => value.toLowerCase() === normalizedPattern.toLowerCase();
|
|
1703
|
+
}
|
|
1704
|
+
function mergeRunnerOverrides(base, next) {
|
|
1705
|
+
if (!base) {
|
|
1706
|
+
return next;
|
|
1707
|
+
}
|
|
1708
|
+
if (!next) {
|
|
1709
|
+
return base;
|
|
1710
|
+
}
|
|
1711
|
+
const discovery = base.discovery || next.discovery ? {
|
|
1712
|
+
...base.discovery ?? {},
|
|
1713
|
+
...next.discovery ?? {}
|
|
1714
|
+
} : void 0;
|
|
1715
|
+
return {
|
|
1716
|
+
...base,
|
|
1717
|
+
...next,
|
|
1718
|
+
discovery
|
|
1719
|
+
};
|
|
1720
|
+
}
|
|
1721
|
+
function createRunner(overrides) {
|
|
1722
|
+
const fileOverrides = loadRunnerConfigFile();
|
|
1723
|
+
const merged = mergeRunnerOverrides(fileOverrides, overrides);
|
|
1724
|
+
return new EffectRunner(withRunnerConfig(merged));
|
|
1725
|
+
}
|
|
1726
|
+
var EffectRunner = class {
|
|
1727
|
+
constructor(config) {
|
|
1728
|
+
this.eventBus = Effect.runSync(PubSub.unbounded());
|
|
1729
|
+
this.runQueue = Effect.runSync(Queue.unbounded());
|
|
1730
|
+
this.persistenceQueue = Effect.runSync(
|
|
1731
|
+
Queue.unbounded()
|
|
1732
|
+
);
|
|
1733
|
+
this.snapshots = /* @__PURE__ */ new Map();
|
|
1734
|
+
this.listeners = /* @__PURE__ */ new Set();
|
|
1735
|
+
this.datasetsById = /* @__PURE__ */ new Map();
|
|
1736
|
+
this.evaluatorsById = /* @__PURE__ */ new Map();
|
|
1737
|
+
this.schedulerFiber = Effect.runFork(
|
|
1738
|
+
this.createSchedulerEffect()
|
|
1739
|
+
);
|
|
1740
|
+
this.persistenceFiber = Effect.runFork(
|
|
1741
|
+
createPersistenceWorker(this.persistenceQueue)
|
|
1742
|
+
);
|
|
1743
|
+
this.config = config;
|
|
1744
|
+
}
|
|
1745
|
+
async collectDatasets() {
|
|
1746
|
+
const datasets = await collectDatasetsFromFiles(this.config.discovery);
|
|
1747
|
+
this.datasetsById.clear();
|
|
1748
|
+
for (const dataset of datasets) {
|
|
1749
|
+
this.datasetsById.set(dataset.id, dataset);
|
|
1750
|
+
}
|
|
1751
|
+
return datasets;
|
|
1752
|
+
}
|
|
1753
|
+
async collectEvaluators() {
|
|
1754
|
+
const evaluators = await collectEvaluatorsFromFiles(this.config.discovery);
|
|
1755
|
+
this.evaluatorsById.clear();
|
|
1756
|
+
for (const evaluator of evaluators) {
|
|
1757
|
+
this.evaluatorsById.set(evaluator.id, evaluator);
|
|
1758
|
+
}
|
|
1759
|
+
return evaluators;
|
|
1760
|
+
}
|
|
1761
|
+
async resolveDatasetByName(name) {
|
|
1762
|
+
if (this.datasetsById.size === 0) {
|
|
1763
|
+
await this.collectDatasets();
|
|
1764
|
+
}
|
|
1765
|
+
const normalized = name.trim().toLowerCase();
|
|
1766
|
+
return Array.from(this.datasetsById.values()).find(
|
|
1767
|
+
(item) => item.dataset.getName().toLowerCase() === normalized
|
|
1768
|
+
);
|
|
1769
|
+
}
|
|
1770
|
+
async resolveEvaluatorsByNamePattern(pattern) {
|
|
1771
|
+
if (this.evaluatorsById.size === 0) {
|
|
1772
|
+
await this.collectEvaluators();
|
|
1773
|
+
}
|
|
1774
|
+
const matcher = createNameMatcher(pattern);
|
|
1775
|
+
return Array.from(this.evaluatorsById.values()).filter(
|
|
1776
|
+
(item) => matcher(item.evaluator.getName() ?? "")
|
|
1777
|
+
);
|
|
1778
|
+
}
|
|
1779
|
+
async searchTestCases(query) {
|
|
1780
|
+
const testCases = await collectTestCasesFromFiles(this.config.discovery);
|
|
1781
|
+
return searchCollectedTestCases(testCases, query);
|
|
1782
|
+
}
|
|
1783
|
+
async collectDatasetTestCases(datasetId) {
|
|
1784
|
+
if (this.datasetsById.size === 0) {
|
|
1785
|
+
await this.collectDatasets();
|
|
1786
|
+
}
|
|
1787
|
+
const dataset = this.datasetsById.get(datasetId);
|
|
1788
|
+
if (!dataset) {
|
|
1789
|
+
throw new Error(`Unknown dataset: ${datasetId}`);
|
|
1790
|
+
}
|
|
1791
|
+
const allTestCases = await collectTestCasesFromFiles(this.config.discovery);
|
|
1792
|
+
return allTestCases.filter(
|
|
1793
|
+
(testCase) => dataset.dataset.matchesTestCase(testCase.testCase, testCase.filePath)
|
|
1794
|
+
);
|
|
1795
|
+
}
|
|
1796
|
+
async runDatasetWith(request) {
|
|
1797
|
+
if (this.datasetsById.size === 0) {
|
|
1798
|
+
await this.collectDatasets();
|
|
1799
|
+
}
|
|
1800
|
+
if (this.evaluatorsById.size === 0) {
|
|
1801
|
+
await this.collectEvaluators();
|
|
1802
|
+
}
|
|
1803
|
+
const dataset = this.datasetsById.get(request.datasetId);
|
|
1804
|
+
if (!dataset) {
|
|
1805
|
+
throw new Error(`Unknown dataset: ${request.datasetId}`);
|
|
1806
|
+
}
|
|
1807
|
+
const selectedEvaluators = request.evaluatorIds.map((id) => this.evaluatorsById.get(id)).filter((value) => Boolean(value)).map((value) => ({ id: value.id, evaluator: value.evaluator }));
|
|
1808
|
+
if (selectedEvaluators.length === 0) {
|
|
1809
|
+
throw new Error("No evaluators selected for run");
|
|
1810
|
+
}
|
|
1811
|
+
const selectedTestCases = await this.collectDatasetTestCases(request.datasetId);
|
|
1812
|
+
const totalEvaluations = selectedTestCases.reduce(
|
|
1813
|
+
(sum, tc) => sum + (typeof tc.testCase.getReruns === "function" ? tc.testCase.getReruns() : 1),
|
|
1814
|
+
0
|
|
1815
|
+
);
|
|
1816
|
+
const runId = `run-${randomUUID()}`;
|
|
1817
|
+
const artifactPath = createArtifactPath(
|
|
1818
|
+
this.config.artifactDirectory,
|
|
1819
|
+
request.datasetId,
|
|
1820
|
+
runId
|
|
1821
|
+
);
|
|
1822
|
+
const snapshot = {
|
|
1823
|
+
runId,
|
|
1824
|
+
datasetId: request.datasetId,
|
|
1825
|
+
datasetName: dataset.dataset.getName(),
|
|
1826
|
+
evaluatorIds: selectedEvaluators.map((item) => item.id),
|
|
1827
|
+
queuedAt: Date.now(),
|
|
1828
|
+
totalTestCases: totalEvaluations,
|
|
1829
|
+
completedTestCases: 0,
|
|
1830
|
+
passedTestCases: 0,
|
|
1831
|
+
failedTestCases: 0,
|
|
1832
|
+
status: "queued",
|
|
1833
|
+
artifactPath
|
|
1834
|
+
};
|
|
1835
|
+
this.snapshots.set(runId, snapshot);
|
|
1836
|
+
const queuedEvent = {
|
|
1837
|
+
type: "RunQueued",
|
|
1838
|
+
runId,
|
|
1839
|
+
datasetId: request.datasetId,
|
|
1840
|
+
datasetName: dataset.dataset.getName(),
|
|
1841
|
+
evaluatorIds: selectedEvaluators.map((item) => item.id),
|
|
1842
|
+
totalTestCases: totalEvaluations,
|
|
1843
|
+
artifactPath
|
|
1844
|
+
};
|
|
1845
|
+
await Effect.runPromise(this.publishEvent(queuedEvent));
|
|
1846
|
+
await Effect.runPromise(
|
|
1847
|
+
Queue.offer(this.persistenceQueue, {
|
|
1848
|
+
runId,
|
|
1849
|
+
artifactPath,
|
|
1850
|
+
payload: queuedEvent
|
|
1851
|
+
})
|
|
1852
|
+
);
|
|
1853
|
+
const maxConcurrency = request.concurrency ?? this.config.maxConcurrency ?? 1;
|
|
1854
|
+
await Effect.runPromise(
|
|
1855
|
+
Queue.offer(this.runQueue, {
|
|
1856
|
+
runId,
|
|
1857
|
+
datasetId: request.datasetId,
|
|
1858
|
+
dataset: dataset.dataset,
|
|
1859
|
+
evaluators: selectedEvaluators,
|
|
1860
|
+
testCases: selectedTestCases,
|
|
1861
|
+
snapshot,
|
|
1862
|
+
maxConcurrency
|
|
1863
|
+
})
|
|
1864
|
+
);
|
|
1865
|
+
return snapshot;
|
|
1832
1866
|
}
|
|
1833
|
-
|
|
1834
|
-
|
|
1835
|
-
|
|
1836
|
-
|
|
1837
|
-
|
|
1838
|
-
|
|
1839
|
-
return true;
|
|
1867
|
+
subscribeRunEvents(listener, options) {
|
|
1868
|
+
const entry = { runId: options?.runId, listener };
|
|
1869
|
+
this.listeners.add(entry);
|
|
1870
|
+
return () => {
|
|
1871
|
+
this.listeners.delete(entry);
|
|
1872
|
+
};
|
|
1840
1873
|
}
|
|
1841
|
-
|
|
1842
|
-
|
|
1843
|
-
return value.includes(matcher);
|
|
1844
|
-
}
|
|
1845
|
-
return matcher.test(value);
|
|
1846
|
-
});
|
|
1847
|
-
}
|
|
1848
|
-
function searchCollectedTestCases(all, query) {
|
|
1849
|
-
if (!query) {
|
|
1850
|
-
return all;
|
|
1874
|
+
getRunSnapshot(runId) {
|
|
1875
|
+
return this.snapshots.get(runId);
|
|
1851
1876
|
}
|
|
1852
|
-
|
|
1853
|
-
|
|
1854
|
-
|
|
1855
|
-
|
|
1877
|
+
getAllRunSnapshots() {
|
|
1878
|
+
return Array.from(this.snapshots.values()).sort(
|
|
1879
|
+
(a, b) => b.queuedAt - a.queuedAt
|
|
1880
|
+
);
|
|
1881
|
+
}
|
|
1882
|
+
async loadRunSnapshotsFromArtifacts() {
|
|
1883
|
+
return loadRunSnapshotsFromArtifacts(this.config);
|
|
1884
|
+
}
|
|
1885
|
+
async shutdown() {
|
|
1886
|
+
await Effect.runPromise(Fiber.interrupt(this.schedulerFiber));
|
|
1887
|
+
await Effect.runPromise(Fiber.interrupt(this.persistenceFiber));
|
|
1888
|
+
await Effect.runPromise(Queue.shutdown(this.runQueue));
|
|
1889
|
+
await Effect.runPromise(Queue.shutdown(this.persistenceQueue));
|
|
1890
|
+
await Effect.runPromise(PubSub.shutdown(this.eventBus));
|
|
1891
|
+
}
|
|
1892
|
+
createSchedulerEffect() {
|
|
1893
|
+
const self = this;
|
|
1894
|
+
return Effect.forever(
|
|
1895
|
+
Effect.gen(function* () {
|
|
1896
|
+
const task = yield* Queue.take(self.runQueue);
|
|
1897
|
+
yield* Effect.fork(
|
|
1898
|
+
executeRunTask(
|
|
1899
|
+
task,
|
|
1900
|
+
self.publishEvent.bind(self),
|
|
1901
|
+
self.persistenceQueue,
|
|
1902
|
+
self.updateSnapshot.bind(self)
|
|
1903
|
+
)
|
|
1904
|
+
);
|
|
1905
|
+
})
|
|
1906
|
+
);
|
|
1907
|
+
}
|
|
1908
|
+
updateSnapshot(runId, updater) {
|
|
1909
|
+
const existing = this.snapshots.get(runId);
|
|
1910
|
+
if (!existing) {
|
|
1911
|
+
return;
|
|
1856
1912
|
}
|
|
1857
|
-
|
|
1858
|
-
|
|
1913
|
+
this.snapshots.set(runId, updater(existing));
|
|
1914
|
+
}
|
|
1915
|
+
publishEvent(event) {
|
|
1916
|
+
return Effect.sync(() => {
|
|
1917
|
+
for (const entry of this.listeners) {
|
|
1918
|
+
if (entry.runId && entry.runId !== event.runId) {
|
|
1919
|
+
continue;
|
|
1920
|
+
}
|
|
1921
|
+
entry.listener(event);
|
|
1922
|
+
}
|
|
1923
|
+
}).pipe(
|
|
1924
|
+
Effect.flatMap(() => PubSub.publish(this.eventBus, event)),
|
|
1925
|
+
Effect.asVoid
|
|
1926
|
+
);
|
|
1927
|
+
}
|
|
1928
|
+
};
|
|
1929
|
+
var DETAILS_PAGE_SIZE = 20;
|
|
1930
|
+
function scoreColor(score) {
|
|
1931
|
+
if (score >= 80)
|
|
1932
|
+
return "green";
|
|
1933
|
+
if (score >= 50)
|
|
1934
|
+
return "yellow";
|
|
1935
|
+
return "red";
|
|
1936
|
+
}
|
|
1937
|
+
function formatScorePart(item, scoreToColor) {
|
|
1938
|
+
const def = getScoreById(item.id);
|
|
1939
|
+
if (!def) {
|
|
1940
|
+
const numeric = toNumericScore(item.data);
|
|
1941
|
+
return numeric !== void 0 ? `${numeric.toFixed(2)}` : "n/a";
|
|
1942
|
+
}
|
|
1943
|
+
const formatted = def.format(item.data);
|
|
1944
|
+
if (def.displayStrategy === "bar") {
|
|
1945
|
+
const numeric = typeof item.data === "object" && item.data !== null && "value" in item.data ? item.data.value : toNumericScore(item.data);
|
|
1946
|
+
if (typeof numeric === "number" && Number.isFinite(numeric)) {
|
|
1947
|
+
const barWidth = 14;
|
|
1948
|
+
const filled = Math.round(numeric / 100 * barWidth);
|
|
1949
|
+
const bar = "\u2588".repeat(filled) + "\u2591".repeat(barWidth - filled);
|
|
1950
|
+
return `${formatted} ${bar}`;
|
|
1859
1951
|
}
|
|
1860
|
-
|
|
1861
|
-
|
|
1862
|
-
return includedTagsMatch && includedPathsMatch;
|
|
1863
|
-
});
|
|
1952
|
+
}
|
|
1953
|
+
return formatted;
|
|
1864
1954
|
}
|
|
1865
|
-
|
|
1866
|
-
|
|
1867
|
-
|
|
1868
|
-
|
|
1869
|
-
|
|
1955
|
+
function CheckRow({
|
|
1956
|
+
name,
|
|
1957
|
+
passed,
|
|
1958
|
+
detail
|
|
1959
|
+
}) {
|
|
1960
|
+
const status = passed ? "PASSED" : "FAILED";
|
|
1961
|
+
const color = passed ? "green" : "red";
|
|
1962
|
+
return /* @__PURE__ */ jsxs(Text, { children: [
|
|
1963
|
+
/* @__PURE__ */ jsx(Text, { color: "gray", children: name.padEnd(14) }),
|
|
1964
|
+
" ",
|
|
1965
|
+
/* @__PURE__ */ jsx(Text, { color, bold: true, children: status }),
|
|
1966
|
+
detail ? /* @__PURE__ */ jsxs(Text, { color: "gray", children: [
|
|
1967
|
+
" (",
|
|
1968
|
+
detail,
|
|
1969
|
+
")"
|
|
1970
|
+
] }) : null
|
|
1971
|
+
] });
|
|
1972
|
+
}
|
|
1973
|
+
function buildDetailRows(run, testCases, evaluatorNameById) {
|
|
1974
|
+
const { performance, dimensions, checks, failures, meta } = run;
|
|
1975
|
+
const latencyHistory = performance.latencyHistoryMs ?? [
|
|
1976
|
+
performance.latencyAvgMs - 40,
|
|
1977
|
+
performance.latencyAvgMs - 10,
|
|
1978
|
+
performance.latencyAvgMs + 20,
|
|
1979
|
+
performance.latencyP95Ms - 80,
|
|
1980
|
+
performance.latencyP95Ms
|
|
1981
|
+
];
|
|
1982
|
+
const rows = [
|
|
1983
|
+
/* @__PURE__ */ jsx(SectionHeader, { children: "Meta" }, "meta-h"),
|
|
1984
|
+
/* @__PURE__ */ jsxs(Text, { color: "gray", children: [
|
|
1985
|
+
"Model: ",
|
|
1986
|
+
meta.model,
|
|
1987
|
+
" Provider: ",
|
|
1988
|
+
meta.provider
|
|
1989
|
+
] }, "meta-1"),
|
|
1990
|
+
/* @__PURE__ */ jsxs(Text, { color: "gray", children: [
|
|
1991
|
+
"Commit: ",
|
|
1992
|
+
meta.commit,
|
|
1993
|
+
" Branch: ",
|
|
1994
|
+
meta.branch,
|
|
1995
|
+
" Seed: ",
|
|
1996
|
+
meta.seed
|
|
1997
|
+
] }, "meta-2"),
|
|
1998
|
+
/* @__PURE__ */ jsxs(Text, { color: "gray", children: [
|
|
1999
|
+
"Duration: ",
|
|
2000
|
+
meta.duration,
|
|
2001
|
+
" Concurrency: ",
|
|
2002
|
+
meta.concurrency
|
|
2003
|
+
] }, "meta-3"),
|
|
2004
|
+
/* @__PURE__ */ jsxs(Text, { color: "gray", children: [
|
|
2005
|
+
"Artifact: ",
|
|
2006
|
+
meta.artifact
|
|
2007
|
+
] }, "meta-4"),
|
|
2008
|
+
/* @__PURE__ */ jsx(Text, { children: " " }, "sp1"),
|
|
2009
|
+
/* @__PURE__ */ jsx(SectionHeader, { children: "Scores (0\u2013100)" }, "scores-h"),
|
|
2010
|
+
...dimensions.map((d) => /* @__PURE__ */ jsx(TextBar, { label: d.name, value: d.score }, `dim-${d.name}`)),
|
|
2011
|
+
/* @__PURE__ */ jsx(Text, { children: " " }, "sp2"),
|
|
2012
|
+
/* @__PURE__ */ jsx(SectionHeader, { children: "Checks (boolean)" }, "checks-h"),
|
|
2013
|
+
...checks.map((c) => /* @__PURE__ */ jsx(CheckRow, { name: c.name, passed: c.passed, detail: c.detail }, `chk-${c.name}`)),
|
|
2014
|
+
/* @__PURE__ */ jsx(Text, { children: " " }, "sp3"),
|
|
2015
|
+
/* @__PURE__ */ jsx(SectionHeader, { children: "Performance" }, "perf-h"),
|
|
2016
|
+
/* @__PURE__ */ jsx(
|
|
2017
|
+
TextBar,
|
|
2018
|
+
{
|
|
2019
|
+
label: "pass rate",
|
|
2020
|
+
value: performance.passRate,
|
|
2021
|
+
format: (v) => `${v}%`
|
|
2022
|
+
},
|
|
2023
|
+
"perf-rate"
|
|
2024
|
+
),
|
|
2025
|
+
/* @__PURE__ */ jsxs(Text, { color: "gray", children: [
|
|
2026
|
+
"latency avg ",
|
|
2027
|
+
performance.latencyAvgMs,
|
|
2028
|
+
"ms p95 ",
|
|
2029
|
+
performance.latencyP95Ms,
|
|
2030
|
+
"ms"
|
|
2031
|
+
] }, "perf-lat"),
|
|
2032
|
+
/* @__PURE__ */ jsxs(Text, { color: "gray", children: [
|
|
2033
|
+
"tokens avg ",
|
|
2034
|
+
performance.tokensAvg,
|
|
2035
|
+
" p95 ",
|
|
2036
|
+
performance.tokensP95
|
|
2037
|
+
] }, "perf-tok"),
|
|
2038
|
+
/* @__PURE__ */ jsx(Text, { children: " " }, "sp4"),
|
|
2039
|
+
/* @__PURE__ */ jsx(SectionHeader, { children: "Latency trend" }, "spark-h"),
|
|
2040
|
+
/* @__PURE__ */ jsx(Sparkline, { data: latencyHistory, width: 20 }, "spark")
|
|
2041
|
+
];
|
|
2042
|
+
if (failures.length > 0) {
|
|
2043
|
+
rows.push(/* @__PURE__ */ jsx(Text, { children: " " }, "sp5"));
|
|
2044
|
+
rows.push(/* @__PURE__ */ jsx(SectionHeader, { children: "Failures (top)" }, "fail-h"));
|
|
2045
|
+
failures.forEach((f, i) => {
|
|
2046
|
+
rows.push(
|
|
2047
|
+
/* @__PURE__ */ jsxs(Text, { color: "red", children: [
|
|
2048
|
+
i + 1,
|
|
2049
|
+
") ",
|
|
2050
|
+
f.title
|
|
2051
|
+
] }, `fail-${i}`)
|
|
2052
|
+
);
|
|
2053
|
+
});
|
|
1870
2054
|
}
|
|
1871
|
-
|
|
1872
|
-
|
|
1873
|
-
|
|
2055
|
+
if (testCases.length > 0) {
|
|
2056
|
+
rows.push(/* @__PURE__ */ jsx(Text, { children: " " }, "sp6"));
|
|
2057
|
+
rows.push(/* @__PURE__ */ jsx(SectionHeader, { children: "Test cases" }, "tc-h"));
|
|
2058
|
+
for (const tc of testCases) {
|
|
2059
|
+
const rerunPart = tc.rerunTotal != null && tc.rerunIndex != null ? ` (${tc.rerunIndex}/${tc.rerunTotal})` : "";
|
|
2060
|
+
rows.push(
|
|
2061
|
+
/* @__PURE__ */ jsxs(Text, { children: [
|
|
2062
|
+
/* @__PURE__ */ jsxs(Text, { color: "cyan", children: [
|
|
2063
|
+
"[",
|
|
2064
|
+
tc.completedTestCases,
|
|
2065
|
+
"/",
|
|
2066
|
+
tc.totalTestCases,
|
|
2067
|
+
"]"
|
|
2068
|
+
] }),
|
|
2069
|
+
" ",
|
|
2070
|
+
tc.testCaseName,
|
|
2071
|
+
rerunPart ? /* @__PURE__ */ jsx(Text, { color: "cyan", children: rerunPart }) : null,
|
|
2072
|
+
/* @__PURE__ */ jsxs(Text, { color: "gray", children: [
|
|
2073
|
+
" (",
|
|
2074
|
+
tc.durationMs,
|
|
2075
|
+
"ms)"
|
|
2076
|
+
] })
|
|
2077
|
+
] }, `tc-${tc.testCaseId}-${tc.rerunIndex ?? 0}`)
|
|
2078
|
+
);
|
|
2079
|
+
for (const item of tc.evaluatorScores) {
|
|
2080
|
+
const name = evaluatorNameById.get(item.evaluatorId) ?? item.evaluatorId;
|
|
2081
|
+
rows.push(
|
|
2082
|
+
/* @__PURE__ */ jsxs(Text, { children: [
|
|
2083
|
+
" ",
|
|
2084
|
+
name,
|
|
2085
|
+
":",
|
|
2086
|
+
" ",
|
|
2087
|
+
/* @__PURE__ */ jsx(Text, { color: item.passed ? "green" : "red", bold: true, children: item.passed ? "PASS" : "FAIL" }),
|
|
2088
|
+
" ",
|
|
2089
|
+
item.scores.map((s) => /* @__PURE__ */ jsxs(Text, { color: scoreColor(toNumericScore(s.data) ?? 0), children: [
|
|
2090
|
+
formatScorePart(s),
|
|
2091
|
+
" "
|
|
2092
|
+
] }, s.id)),
|
|
2093
|
+
item.metrics?.map((m) => {
|
|
2094
|
+
const def = getMetricById(m.id);
|
|
2095
|
+
if (!def)
|
|
2096
|
+
return null;
|
|
2097
|
+
const formatted = def.format(m.data);
|
|
2098
|
+
return /* @__PURE__ */ jsxs(Text, { color: "gray", children: [
|
|
2099
|
+
"[",
|
|
2100
|
+
def.name ? `${def.name}: ` : "",
|
|
2101
|
+
formatted,
|
|
2102
|
+
"]",
|
|
2103
|
+
" "
|
|
2104
|
+
] }, m.id);
|
|
2105
|
+
})
|
|
2106
|
+
] }, `tc-${tc.testCaseId}-${item.evaluatorId}`)
|
|
2107
|
+
);
|
|
2108
|
+
if (!item.passed && item.logs && item.logs.length > 0) {
|
|
2109
|
+
for (let logIdx = 0; logIdx < item.logs.length; logIdx++) {
|
|
2110
|
+
const log = item.logs[logIdx];
|
|
2111
|
+
if (log.type === "diff") {
|
|
2112
|
+
const lines = getDiffLines(log);
|
|
2113
|
+
for (let lineIdx = 0; lineIdx < lines.length; lineIdx++) {
|
|
2114
|
+
const { type, line } = lines[lineIdx];
|
|
2115
|
+
rows.push(
|
|
2116
|
+
/* @__PURE__ */ jsxs(
|
|
2117
|
+
Text,
|
|
2118
|
+
{
|
|
2119
|
+
color: type === "remove" ? "red" : type === "add" ? "green" : "gray",
|
|
2120
|
+
children: [
|
|
2121
|
+
" ",
|
|
2122
|
+
line
|
|
2123
|
+
]
|
|
2124
|
+
},
|
|
2125
|
+
`tc-${tc.testCaseId}-${item.evaluatorId}-${logIdx}-${lineIdx}`
|
|
2126
|
+
)
|
|
2127
|
+
);
|
|
2128
|
+
}
|
|
2129
|
+
}
|
|
2130
|
+
}
|
|
2131
|
+
}
|
|
2132
|
+
}
|
|
2133
|
+
}
|
|
1874
2134
|
}
|
|
1875
|
-
return
|
|
1876
|
-
source: pattern.slice(1, lastSlash),
|
|
1877
|
-
flags: pattern.slice(lastSlash + 1)
|
|
1878
|
-
};
|
|
2135
|
+
return rows;
|
|
1879
2136
|
}
|
|
1880
|
-
function
|
|
1881
|
-
|
|
1882
|
-
|
|
1883
|
-
|
|
1884
|
-
|
|
1885
|
-
|
|
1886
|
-
|
|
1887
|
-
|
|
1888
|
-
|
|
1889
|
-
|
|
1890
|
-
|
|
2137
|
+
function RunDetailsView({
|
|
2138
|
+
state,
|
|
2139
|
+
dataset,
|
|
2140
|
+
selectedRun,
|
|
2141
|
+
evaluators
|
|
2142
|
+
}) {
|
|
2143
|
+
const runs = dataset?.runs ?? [];
|
|
2144
|
+
const rightFocused = state.focus === "right";
|
|
2145
|
+
const [testCases, setTestCases] = useState([]);
|
|
2146
|
+
const evaluatorNameById = React.useMemo(
|
|
2147
|
+
() => new Map(evaluators.map((e) => [e.id, e.name])),
|
|
2148
|
+
[evaluators]
|
|
2149
|
+
);
|
|
2150
|
+
useEffect(() => {
|
|
2151
|
+
if (!selectedRun?.meta?.artifact) {
|
|
2152
|
+
setTestCases([]);
|
|
2153
|
+
return;
|
|
2154
|
+
}
|
|
2155
|
+
const artifactPath = resolve(selectedRun.meta.artifact);
|
|
2156
|
+
parseArtifactFile(artifactPath).then(setTestCases);
|
|
2157
|
+
}, [selectedRun?.meta?.artifact]);
|
|
2158
|
+
if (!selectedRun) {
|
|
2159
|
+
return /* @__PURE__ */ jsxs(Fragment, { children: [
|
|
2160
|
+
/* @__PURE__ */ jsx(RunsSidebar, { state, dataset, runs }),
|
|
2161
|
+
/* @__PURE__ */ jsx(Pane, { flexGrow: 1, marginLeft: 1, focused: rightFocused, children: /* @__PURE__ */ jsx(Text, { color: "gray", children: "Select a run to inspect details." }) })
|
|
2162
|
+
] });
|
|
1891
2163
|
}
|
|
1892
|
-
|
|
2164
|
+
const rows = buildDetailRows(selectedRun, testCases, evaluatorNameById);
|
|
2165
|
+
const offset = Math.max(0, state.detailsScrollOffset);
|
|
2166
|
+
const visible = rows.slice(offset, offset + DETAILS_PAGE_SIZE);
|
|
2167
|
+
return /* @__PURE__ */ jsxs(Fragment, { children: [
|
|
2168
|
+
/* @__PURE__ */ jsx(RunsSidebar, { state, dataset, runs }),
|
|
2169
|
+
/* @__PURE__ */ jsx(Pane, { flexGrow: 1, marginLeft: 1, focused: rightFocused, children: /* @__PURE__ */ jsx(Box, { flexDirection: "column", children: visible.map((row, i) => /* @__PURE__ */ jsx(React.Fragment, { children: row }, i)) }) })
|
|
2170
|
+
] });
|
|
1893
2171
|
}
|
|
1894
|
-
|
|
1895
|
-
|
|
1896
|
-
|
|
1897
|
-
|
|
1898
|
-
|
|
1899
|
-
|
|
1900
|
-
|
|
1901
|
-
const
|
|
1902
|
-
|
|
1903
|
-
|
|
1904
|
-
|
|
2172
|
+
var LEFT_PANE_WIDTH3 = 44;
|
|
2173
|
+
function NewEvaluationView({
|
|
2174
|
+
state,
|
|
2175
|
+
data,
|
|
2176
|
+
visibleEvaluators
|
|
2177
|
+
}) {
|
|
2178
|
+
const selectedCount = state.selectedEvaluatorIds.length;
|
|
2179
|
+
const focusedEvaluator = visibleEvaluators[state.evaluatorMenuIndex];
|
|
2180
|
+
const leftFocused = state.focus === "left";
|
|
2181
|
+
const rightFocused = state.focus === "right";
|
|
2182
|
+
return /* @__PURE__ */ jsxs(Fragment, { children: [
|
|
2183
|
+
/* @__PURE__ */ jsxs(Pane, { width: LEFT_PANE_WIDTH3, focused: leftFocused, children: [
|
|
2184
|
+
/* @__PURE__ */ jsx(SectionHeader, { children: "Available Evaluators" }),
|
|
2185
|
+
/* @__PURE__ */ jsxs(Text, { color: "gray", children: [
|
|
2186
|
+
"Search: ",
|
|
2187
|
+
state.searchQuery || "(none)"
|
|
2188
|
+
] }),
|
|
2189
|
+
visibleEvaluators.map((evaluator, index) => {
|
|
2190
|
+
const selected = index === state.evaluatorMenuIndex;
|
|
2191
|
+
const inSelection = state.selectedEvaluatorIds.includes(evaluator.id);
|
|
2192
|
+
return /* @__PURE__ */ jsxs(
|
|
2193
|
+
Text,
|
|
2194
|
+
{
|
|
2195
|
+
color: selected ? "cyan" : "gray",
|
|
2196
|
+
bold: selected,
|
|
2197
|
+
children: [
|
|
2198
|
+
selected ? "\u25B8 " : " ",
|
|
2199
|
+
inSelection ? "[x] " : "[ ] ",
|
|
2200
|
+
evaluator.name
|
|
2201
|
+
]
|
|
2202
|
+
},
|
|
2203
|
+
evaluator.id
|
|
2204
|
+
);
|
|
2205
|
+
})
|
|
2206
|
+
] }),
|
|
2207
|
+
/* @__PURE__ */ jsxs(Pane, { flexGrow: 1, marginLeft: 1, focused: rightFocused, children: [
|
|
2208
|
+
/* @__PURE__ */ jsxs(SectionHeader, { children: [
|
|
2209
|
+
"Selected (",
|
|
2210
|
+
selectedCount,
|
|
2211
|
+
")"
|
|
2212
|
+
] }),
|
|
2213
|
+
state.selectedEvaluatorIds.map((id, index) => {
|
|
2214
|
+
const evaluator = data.evaluators.find((item) => item.id === id);
|
|
2215
|
+
if (!evaluator)
|
|
2216
|
+
return null;
|
|
2217
|
+
return /* @__PURE__ */ jsxs(Text, { children: [
|
|
2218
|
+
index + 1,
|
|
2219
|
+
") ",
|
|
2220
|
+
evaluator.name
|
|
2221
|
+
] }, id);
|
|
2222
|
+
}),
|
|
2223
|
+
/* @__PURE__ */ jsx(SectionHeader, { children: "Config preview" }),
|
|
2224
|
+
/* @__PURE__ */ jsx(Text, { color: "gray", children: focusedEvaluator?.configPreview ?? "Select an evaluator to inspect config." })
|
|
2225
|
+
] })
|
|
2226
|
+
] });
|
|
2227
|
+
}
|
|
2228
|
+
function clampCursor(state, filteredDatasetsLength, selectedRunCount) {
|
|
2229
|
+
const datasetMax = filteredDatasetsLength;
|
|
2230
|
+
const runMax = selectedRunCount;
|
|
2231
|
+
const evaluatorMax = 3;
|
|
1905
2232
|
return {
|
|
1906
|
-
...
|
|
1907
|
-
|
|
1908
|
-
|
|
2233
|
+
...state,
|
|
2234
|
+
datasetMenuIndex: Math.max(0, Math.min(state.datasetMenuIndex, datasetMax)),
|
|
2235
|
+
runMenuIndex: Math.max(0, Math.min(state.runMenuIndex, runMax)),
|
|
2236
|
+
evaluatorMenuIndex: Math.max(
|
|
2237
|
+
0,
|
|
2238
|
+
Math.min(state.evaluatorMenuIndex, evaluatorMax)
|
|
2239
|
+
)
|
|
1909
2240
|
};
|
|
1910
2241
|
}
|
|
1911
|
-
function
|
|
1912
|
-
|
|
1913
|
-
|
|
1914
|
-
|
|
1915
|
-
}
|
|
1916
|
-
|
|
1917
|
-
|
|
1918
|
-
|
|
1919
|
-
|
|
1920
|
-
|
|
1921
|
-
|
|
1922
|
-
)
|
|
1923
|
-
|
|
1924
|
-
|
|
1925
|
-
|
|
1926
|
-
|
|
1927
|
-
|
|
1928
|
-
|
|
1929
|
-
|
|
1930
|
-
this.persistenceFiber = Effect.runFork(
|
|
1931
|
-
createPersistenceWorker(this.persistenceQueue)
|
|
1932
|
-
);
|
|
1933
|
-
this.config = config;
|
|
1934
|
-
}
|
|
1935
|
-
async collectDatasets() {
|
|
1936
|
-
const datasets = await collectDatasetsFromFiles(this.config.discovery);
|
|
1937
|
-
this.datasetsById.clear();
|
|
1938
|
-
for (const dataset of datasets) {
|
|
1939
|
-
this.datasetsById.set(dataset.id, dataset);
|
|
2242
|
+
function EvalsCliApp({
|
|
2243
|
+
data,
|
|
2244
|
+
args,
|
|
2245
|
+
runner
|
|
2246
|
+
}) {
|
|
2247
|
+
const { exit } = useApp();
|
|
2248
|
+
const { width: stdoutWidth, height: stdoutHeight } = useScreenSize();
|
|
2249
|
+
const [liveData, setLiveData] = useState(data);
|
|
2250
|
+
const [runtimeMessage, setRuntimeMessage] = useState();
|
|
2251
|
+
const [state, dispatch] = useReducer(
|
|
2252
|
+
reduceCliState,
|
|
2253
|
+
createInitialState(data, args)
|
|
2254
|
+
);
|
|
2255
|
+
useEffect(() => {
|
|
2256
|
+
setLiveData(data);
|
|
2257
|
+
}, [data]);
|
|
2258
|
+
useEffect(() => {
|
|
2259
|
+
if (!runner) {
|
|
2260
|
+
return void 0;
|
|
1940
2261
|
}
|
|
1941
|
-
return
|
|
1942
|
-
|
|
1943
|
-
|
|
1944
|
-
|
|
1945
|
-
|
|
1946
|
-
|
|
1947
|
-
|
|
2262
|
+
return runner.subscribeRunEvents((event) => {
|
|
2263
|
+
setLiveData((current) => applyRunnerEvent(current, event, runner));
|
|
2264
|
+
if (event.type === "RunQueued") {
|
|
2265
|
+
setRuntimeMessage(`Queued ${event.runId} with ${event.totalTestCases} test cases.`);
|
|
2266
|
+
}
|
|
2267
|
+
if (event.type === "RunCompleted") {
|
|
2268
|
+
setRuntimeMessage(
|
|
2269
|
+
`Completed ${event.runId}: ${event.passedTestCases}/${event.totalTestCases} passed.`
|
|
2270
|
+
);
|
|
2271
|
+
}
|
|
2272
|
+
if (event.type === "RunFailed") {
|
|
2273
|
+
setRuntimeMessage(`Run failed: ${event.errorMessage}`);
|
|
2274
|
+
}
|
|
2275
|
+
});
|
|
2276
|
+
}, [runner]);
|
|
2277
|
+
const filteredDatasets = useMemo(
|
|
2278
|
+
() => getFilteredDatasets(liveData, state.searchQuery),
|
|
2279
|
+
[liveData, state.searchQuery]
|
|
2280
|
+
);
|
|
2281
|
+
const clampedState = clampCursor(
|
|
2282
|
+
state,
|
|
2283
|
+
filteredDatasets.length,
|
|
2284
|
+
getDatasetByMenuIndex(filteredDatasets, state.datasetMenuIndex)?.runs.length ?? 0
|
|
2285
|
+
);
|
|
2286
|
+
const selectedDataset = getDatasetByMenuIndex(
|
|
2287
|
+
filteredDatasets,
|
|
2288
|
+
clampedState.datasetMenuIndex
|
|
2289
|
+
);
|
|
2290
|
+
const selectedRun = getRunByMenuIndex(
|
|
2291
|
+
selectedDataset,
|
|
2292
|
+
clampedState.runMenuIndex
|
|
2293
|
+
);
|
|
2294
|
+
const visibleEvaluators = liveData.evaluators.filter(
|
|
2295
|
+
(evaluator) => evaluator.name.toLowerCase().includes(clampedState.searchQuery.toLowerCase())
|
|
2296
|
+
);
|
|
2297
|
+
useInput((input, key) => {
|
|
2298
|
+
if (isQuitInput(input) || key.escape) {
|
|
2299
|
+
exit();
|
|
2300
|
+
return;
|
|
1948
2301
|
}
|
|
1949
|
-
|
|
1950
|
-
|
|
1951
|
-
|
|
1952
|
-
if (this.datasetsById.size === 0) {
|
|
1953
|
-
await this.collectDatasets();
|
|
2302
|
+
if (key.tab) {
|
|
2303
|
+
dispatch({ type: "TOGGLE_FOCUS" });
|
|
2304
|
+
return;
|
|
1954
2305
|
}
|
|
1955
|
-
|
|
1956
|
-
|
|
1957
|
-
|
|
1958
|
-
);
|
|
1959
|
-
}
|
|
1960
|
-
async resolveEvaluatorsByNamePattern(pattern) {
|
|
1961
|
-
if (this.evaluatorsById.size === 0) {
|
|
1962
|
-
await this.collectEvaluators();
|
|
2306
|
+
if (isSearchInput(input)) {
|
|
2307
|
+
dispatch({ type: "START_SEARCH" });
|
|
2308
|
+
return;
|
|
1963
2309
|
}
|
|
1964
|
-
|
|
1965
|
-
|
|
1966
|
-
|
|
1967
|
-
|
|
1968
|
-
|
|
1969
|
-
|
|
1970
|
-
|
|
1971
|
-
|
|
1972
|
-
|
|
1973
|
-
|
|
1974
|
-
|
|
1975
|
-
|
|
2310
|
+
if (clampedState.searchMode) {
|
|
2311
|
+
if (key.return) {
|
|
2312
|
+
dispatch({ type: "END_SEARCH" });
|
|
2313
|
+
return;
|
|
2314
|
+
}
|
|
2315
|
+
if (isBackKey(key)) {
|
|
2316
|
+
dispatch({ type: "REMOVE_SEARCH_CHAR" });
|
|
2317
|
+
return;
|
|
2318
|
+
}
|
|
2319
|
+
if (isPrintableCharacter(input)) {
|
|
2320
|
+
dispatch({ type: "APPEND_SEARCH", value: input });
|
|
2321
|
+
}
|
|
2322
|
+
return;
|
|
1976
2323
|
}
|
|
1977
|
-
|
|
1978
|
-
|
|
1979
|
-
|
|
2324
|
+
if (key.upArrow) {
|
|
2325
|
+
const max = clampedState.level === "details" ? 100 : clampedState.level === "new-evaluation" ? visibleEvaluators.length - 1 : 100;
|
|
2326
|
+
dispatch({ type: "MOVE_UP", max });
|
|
2327
|
+
return;
|
|
1980
2328
|
}
|
|
1981
|
-
|
|
1982
|
-
|
|
1983
|
-
(
|
|
1984
|
-
|
|
1985
|
-
}
|
|
1986
|
-
async runDatasetWith(request) {
|
|
1987
|
-
if (this.datasetsById.size === 0) {
|
|
1988
|
-
await this.collectDatasets();
|
|
2329
|
+
if (key.downArrow) {
|
|
2330
|
+
const max = clampedState.level === "datasets" ? filteredDatasets.length : clampedState.level === "runs" ? selectedDataset?.runs.length ?? 0 : clampedState.level === "new-evaluation" ? Math.max(0, visibleEvaluators.length - 1) : 100;
|
|
2331
|
+
dispatch({ type: "MOVE_DOWN", max });
|
|
2332
|
+
return;
|
|
1989
2333
|
}
|
|
1990
|
-
if (
|
|
1991
|
-
|
|
2334
|
+
if (key.return) {
|
|
2335
|
+
dispatch({
|
|
2336
|
+
type: "ENTER",
|
|
2337
|
+
hasDataset: Boolean(selectedDataset),
|
|
2338
|
+
hasRun: Boolean(selectedRun)
|
|
2339
|
+
});
|
|
2340
|
+
if (clampedState.level === "new-evaluation") {
|
|
2341
|
+
const evaluator = visibleEvaluators[clampedState.evaluatorMenuIndex];
|
|
2342
|
+
if (evaluator) {
|
|
2343
|
+
dispatch({ type: "TOGGLE_EVALUATOR", evaluatorId: evaluator.id });
|
|
2344
|
+
}
|
|
2345
|
+
}
|
|
2346
|
+
return;
|
|
1992
2347
|
}
|
|
1993
|
-
|
|
1994
|
-
|
|
1995
|
-
|
|
2348
|
+
if (isBackKey(key)) {
|
|
2349
|
+
dispatch({ type: "BACK" });
|
|
2350
|
+
return;
|
|
1996
2351
|
}
|
|
1997
|
-
|
|
1998
|
-
|
|
1999
|
-
|
|
2352
|
+
if (input.toLowerCase() === "c") {
|
|
2353
|
+
dispatch({ type: "CLEAR_WARNINGS" });
|
|
2354
|
+
setRuntimeMessage(void 0);
|
|
2355
|
+
return;
|
|
2000
2356
|
}
|
|
2001
|
-
|
|
2002
|
-
|
|
2003
|
-
|
|
2004
|
-
|
|
2005
|
-
|
|
2006
|
-
|
|
2007
|
-
|
|
2008
|
-
|
|
2009
|
-
|
|
2010
|
-
|
|
2011
|
-
|
|
2012
|
-
|
|
2013
|
-
|
|
2014
|
-
|
|
2015
|
-
|
|
2016
|
-
|
|
2017
|
-
|
|
2018
|
-
|
|
2019
|
-
|
|
2020
|
-
};
|
|
2021
|
-
this.snapshots.set(runId, snapshot);
|
|
2022
|
-
const queuedEvent = {
|
|
2023
|
-
type: "RunQueued",
|
|
2024
|
-
runId,
|
|
2025
|
-
datasetId: request.datasetId,
|
|
2026
|
-
datasetName: dataset.dataset.getName(),
|
|
2027
|
-
evaluatorIds: selectedEvaluators.map((item) => item.id),
|
|
2028
|
-
totalTestCases: selectedTestCases.length,
|
|
2029
|
-
artifactPath
|
|
2030
|
-
};
|
|
2031
|
-
await Effect.runPromise(this.publishEvent(queuedEvent));
|
|
2032
|
-
await Effect.runPromise(
|
|
2033
|
-
Queue.offer(this.persistenceQueue, {
|
|
2034
|
-
runId,
|
|
2035
|
-
artifactPath,
|
|
2036
|
-
payload: queuedEvent
|
|
2037
|
-
})
|
|
2038
|
-
);
|
|
2039
|
-
await Effect.runPromise(
|
|
2040
|
-
Queue.offer(this.runQueue, {
|
|
2041
|
-
runId,
|
|
2042
|
-
datasetId: request.datasetId,
|
|
2043
|
-
dataset: dataset.dataset,
|
|
2044
|
-
evaluators: selectedEvaluators,
|
|
2045
|
-
testCases: selectedTestCases,
|
|
2046
|
-
snapshot
|
|
2047
|
-
})
|
|
2048
|
-
);
|
|
2049
|
-
return snapshot;
|
|
2050
|
-
}
|
|
2051
|
-
subscribeRunEvents(listener, options) {
|
|
2052
|
-
const entry = { runId: options?.runId, listener };
|
|
2053
|
-
this.listeners.add(entry);
|
|
2054
|
-
return () => {
|
|
2055
|
-
this.listeners.delete(entry);
|
|
2056
|
-
};
|
|
2057
|
-
}
|
|
2058
|
-
getRunSnapshot(runId) {
|
|
2059
|
-
return this.snapshots.get(runId);
|
|
2060
|
-
}
|
|
2061
|
-
getAllRunSnapshots() {
|
|
2062
|
-
return Array.from(this.snapshots.values()).sort(
|
|
2063
|
-
(a, b) => b.queuedAt - a.queuedAt
|
|
2064
|
-
);
|
|
2065
|
-
}
|
|
2066
|
-
async shutdown() {
|
|
2067
|
-
await Effect.runPromise(Fiber.interrupt(this.schedulerFiber));
|
|
2068
|
-
await Effect.runPromise(Fiber.interrupt(this.persistenceFiber));
|
|
2069
|
-
await Effect.runPromise(Queue.shutdown(this.runQueue));
|
|
2070
|
-
await Effect.runPromise(Queue.shutdown(this.persistenceQueue));
|
|
2071
|
-
await Effect.runPromise(PubSub.shutdown(this.eventBus));
|
|
2072
|
-
}
|
|
2073
|
-
createSchedulerEffect() {
|
|
2074
|
-
const self = this;
|
|
2075
|
-
return Effect.forever(
|
|
2076
|
-
Effect.gen(function* () {
|
|
2077
|
-
const task = yield* Queue.take(self.runQueue);
|
|
2078
|
-
yield* Effect.fork(
|
|
2079
|
-
executeRunTask(
|
|
2080
|
-
task,
|
|
2081
|
-
self.publishEvent.bind(self),
|
|
2082
|
-
self.persistenceQueue,
|
|
2083
|
-
self.updateSnapshot.bind(self)
|
|
2084
|
-
)
|
|
2357
|
+
if (input.toLowerCase() === "s" && clampedState.level === "new-evaluation") {
|
|
2358
|
+
if (!runner) {
|
|
2359
|
+
setRuntimeMessage("Runner unavailable: cannot start evaluation.");
|
|
2360
|
+
return;
|
|
2361
|
+
}
|
|
2362
|
+
if (!selectedDataset) {
|
|
2363
|
+
setRuntimeMessage("Select a dataset before starting a new evaluation.");
|
|
2364
|
+
return;
|
|
2365
|
+
}
|
|
2366
|
+
if (clampedState.selectedEvaluatorIds.length === 0) {
|
|
2367
|
+
setRuntimeMessage("Select at least one evaluator before starting.");
|
|
2368
|
+
return;
|
|
2369
|
+
}
|
|
2370
|
+
void runner.runDatasetWith({
|
|
2371
|
+
datasetId: selectedDataset.id,
|
|
2372
|
+
evaluatorIds: clampedState.selectedEvaluatorIds
|
|
2373
|
+
}).then((snapshot) => {
|
|
2374
|
+
setRuntimeMessage(
|
|
2375
|
+
`Started ${snapshot.runId} on ${selectedDataset.name} (${snapshot.totalTestCases} cases).`
|
|
2085
2376
|
);
|
|
2086
|
-
})
|
|
2087
|
-
|
|
2088
|
-
|
|
2089
|
-
|
|
2090
|
-
|
|
2091
|
-
if (!existing) {
|
|
2092
|
-
return;
|
|
2377
|
+
}).catch((error) => {
|
|
2378
|
+
setRuntimeMessage(
|
|
2379
|
+
error instanceof Error ? error.message : "Failed to start evaluation."
|
|
2380
|
+
);
|
|
2381
|
+
});
|
|
2093
2382
|
}
|
|
2094
|
-
|
|
2095
|
-
|
|
2096
|
-
|
|
2097
|
-
|
|
2098
|
-
|
|
2099
|
-
|
|
2100
|
-
|
|
2383
|
+
});
|
|
2384
|
+
const renderContent = () => {
|
|
2385
|
+
if (clampedState.level === "new-evaluation") {
|
|
2386
|
+
return /* @__PURE__ */ jsx(
|
|
2387
|
+
NewEvaluationView,
|
|
2388
|
+
{
|
|
2389
|
+
state: clampedState,
|
|
2390
|
+
data: liveData,
|
|
2391
|
+
visibleEvaluators
|
|
2101
2392
|
}
|
|
2102
|
-
|
|
2393
|
+
);
|
|
2394
|
+
}
|
|
2395
|
+
if (clampedState.level === "datasets") {
|
|
2396
|
+
return /* @__PURE__ */ jsx(
|
|
2397
|
+
DatasetsView,
|
|
2398
|
+
{
|
|
2399
|
+
state: clampedState,
|
|
2400
|
+
filteredDatasets,
|
|
2401
|
+
selectedDataset
|
|
2402
|
+
}
|
|
2403
|
+
);
|
|
2404
|
+
}
|
|
2405
|
+
if (clampedState.level === "runs") {
|
|
2406
|
+
return /* @__PURE__ */ jsx(
|
|
2407
|
+
RunsView,
|
|
2408
|
+
{
|
|
2409
|
+
state: clampedState,
|
|
2410
|
+
dataset: selectedDataset,
|
|
2411
|
+
selectedRun
|
|
2412
|
+
}
|
|
2413
|
+
);
|
|
2414
|
+
}
|
|
2415
|
+
return /* @__PURE__ */ jsx(
|
|
2416
|
+
RunDetailsView,
|
|
2417
|
+
{
|
|
2418
|
+
state: clampedState,
|
|
2419
|
+
dataset: selectedDataset,
|
|
2420
|
+
selectedRun,
|
|
2421
|
+
evaluators: liveData.evaluators
|
|
2103
2422
|
}
|
|
2104
|
-
}).pipe(
|
|
2105
|
-
Effect.flatMap(() => PubSub.publish(this.eventBus, event)),
|
|
2106
|
-
Effect.asVoid
|
|
2107
2423
|
);
|
|
2108
|
-
}
|
|
2109
|
-
|
|
2424
|
+
};
|
|
2425
|
+
return /* @__PURE__ */ jsxs(
|
|
2426
|
+
Box,
|
|
2427
|
+
{
|
|
2428
|
+
flexDirection: "column",
|
|
2429
|
+
flexGrow: 1,
|
|
2430
|
+
width: stdoutWidth,
|
|
2431
|
+
height: stdoutHeight,
|
|
2432
|
+
children: [
|
|
2433
|
+
/* @__PURE__ */ jsx(
|
|
2434
|
+
Box,
|
|
2435
|
+
{
|
|
2436
|
+
borderStyle: "round",
|
|
2437
|
+
borderColor: "cyan",
|
|
2438
|
+
paddingX: 1,
|
|
2439
|
+
width: stdoutWidth,
|
|
2440
|
+
children: /* @__PURE__ */ jsx(Text, { children: getBreadcrumbText(
|
|
2441
|
+
clampedState,
|
|
2442
|
+
selectedDataset?.name,
|
|
2443
|
+
selectedRun?.label
|
|
2444
|
+
) })
|
|
2445
|
+
}
|
|
2446
|
+
),
|
|
2447
|
+
clampedState.startupWarnings.length > 0 && /* @__PURE__ */ jsxs(
|
|
2448
|
+
Box,
|
|
2449
|
+
{
|
|
2450
|
+
marginTop: 1,
|
|
2451
|
+
borderStyle: "round",
|
|
2452
|
+
borderColor: "yellow",
|
|
2453
|
+
paddingX: 1,
|
|
2454
|
+
flexDirection: "column",
|
|
2455
|
+
width: stdoutWidth,
|
|
2456
|
+
children: [
|
|
2457
|
+
/* @__PURE__ */ jsx(Text, { color: "yellow", children: "Startup warnings:" }),
|
|
2458
|
+
clampedState.startupWarnings.map((warning, index) => /* @__PURE__ */ jsx(Text, { children: warning }, `${warning}-${index}`))
|
|
2459
|
+
]
|
|
2460
|
+
}
|
|
2461
|
+
),
|
|
2462
|
+
clampedState.searchMode && /* @__PURE__ */ jsxs(
|
|
2463
|
+
Box,
|
|
2464
|
+
{
|
|
2465
|
+
marginTop: 1,
|
|
2466
|
+
borderStyle: "round",
|
|
2467
|
+
borderColor: "magenta",
|
|
2468
|
+
paddingX: 1,
|
|
2469
|
+
width: stdoutWidth,
|
|
2470
|
+
children: [
|
|
2471
|
+
/* @__PURE__ */ jsx(Text, { color: "magenta", bold: true, children: "Search: " }),
|
|
2472
|
+
/* @__PURE__ */ jsx(Text, { color: "white", children: clampedState.searchQuery })
|
|
2473
|
+
]
|
|
2474
|
+
}
|
|
2475
|
+
),
|
|
2476
|
+
runtimeMessage && /* @__PURE__ */ jsx(
|
|
2477
|
+
Box,
|
|
2478
|
+
{
|
|
2479
|
+
marginTop: 1,
|
|
2480
|
+
borderStyle: "round",
|
|
2481
|
+
borderColor: "blue",
|
|
2482
|
+
paddingX: 1,
|
|
2483
|
+
width: stdoutWidth,
|
|
2484
|
+
children: /* @__PURE__ */ jsx(Text, { color: "blue", children: runtimeMessage })
|
|
2485
|
+
}
|
|
2486
|
+
),
|
|
2487
|
+
/* @__PURE__ */ jsx(
|
|
2488
|
+
Box,
|
|
2489
|
+
{
|
|
2490
|
+
marginTop: 1,
|
|
2491
|
+
flexGrow: 1,
|
|
2492
|
+
width: stdoutWidth,
|
|
2493
|
+
flexDirection: "row",
|
|
2494
|
+
children: renderContent()
|
|
2495
|
+
}
|
|
2496
|
+
),
|
|
2497
|
+
/* @__PURE__ */ jsx(Box, { marginTop: 1, paddingX: 1, children: /* @__PURE__ */ jsx(Text, { color: "gray", children: getFooterText(clampedState) }) })
|
|
2498
|
+
]
|
|
2499
|
+
}
|
|
2500
|
+
);
|
|
2501
|
+
}
|
|
2110
2502
|
async function main() {
|
|
2111
2503
|
const args = parseStartupArgs(process.argv.slice(2));
|
|
2112
2504
|
const runner = createRunner();
|