agent-regression-lab 0.3.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -17481,13 +17481,13 @@ var require_react_dom_client_development = __commonJS({
17481
17481
  function getHoistableRoot(container2) {
17482
17482
  return "function" === typeof container2.getRootNode ? container2.getRootNode() : 9 === container2.nodeType ? container2 : container2.ownerDocument;
17483
17483
  }
17484
- function preconnectAs(rel, href, crossOrigin) {
17484
+ function preconnectAs(rel2, href, crossOrigin) {
17485
17485
  var ownerDocument = globalDocument;
17486
17486
  if (ownerDocument && "string" === typeof href && href) {
17487
17487
  var limitedEscapedHref = escapeSelectorAttributeValueInsideDoubleQuotes(href);
17488
- limitedEscapedHref = 'link[rel="' + rel + '"][href="' + limitedEscapedHref + '"]';
17488
+ limitedEscapedHref = 'link[rel="' + rel2 + '"][href="' + limitedEscapedHref + '"]';
17489
17489
  "string" === typeof crossOrigin && (limitedEscapedHref += '[crossorigin="' + crossOrigin + '"]');
17490
- preconnectsSet.has(limitedEscapedHref) || (preconnectsSet.add(limitedEscapedHref), rel = { rel, crossOrigin, href }, null === ownerDocument.querySelector(limitedEscapedHref) && (href = ownerDocument.createElement("link"), setInitialProperties(href, "link", rel), markNodeAsHoistable(href), ownerDocument.head.appendChild(href)));
17490
+ preconnectsSet.has(limitedEscapedHref) || (preconnectsSet.add(limitedEscapedHref), rel2 = { rel: rel2, crossOrigin, href }, null === ownerDocument.querySelector(limitedEscapedHref) && (href = ownerDocument.createElement("link"), setInitialProperties(href, "link", rel2), markNodeAsHoistable(href), ownerDocument.head.appendChild(href)));
17491
17491
  }
17492
17492
  }
17493
17493
  function getResource(type, currentProps, pendingProps, currentResource) {
@@ -21724,479 +21724,916 @@ var import_client = __toESM(require_client(), 1);
21724
21724
  // src/ui/App.tsx
21725
21725
  var import_react = __toESM(require_react(), 1);
21726
21726
  var import_jsx_runtime = __toESM(require_jsx_runtime(), 1);
21727
+ function getRoute() {
21728
+ const url = new URL(window.location.href);
21729
+ if (url.pathname.startsWith("/runs/")) {
21730
+ return { type: "detail", runId: decodeURIComponent(url.pathname.slice("/runs/".length)) };
21731
+ }
21732
+ if (url.pathname === "/compare-suite") {
21733
+ return {
21734
+ type: "compare-suite",
21735
+ baselineBatch: url.searchParams.get("baselineBatch") ?? void 0,
21736
+ candidateBatch: url.searchParams.get("candidateBatch") ?? void 0
21737
+ };
21738
+ }
21739
+ if (url.pathname === "/compare") {
21740
+ return {
21741
+ type: "compare",
21742
+ baseline: url.searchParams.get("baseline") ?? void 0,
21743
+ candidate: url.searchParams.get("candidate") ?? void 0
21744
+ };
21745
+ }
21746
+ return { type: "list" };
21747
+ }
21748
+ function sc(status) {
21749
+ if (status === "pass") return "p";
21750
+ if (status === "fail") return "f";
21751
+ return "w";
21752
+ }
21753
+ function scColor(status) {
21754
+ if (status === "pass") return "var(--pass)";
21755
+ if (status === "fail") return "var(--fail)";
21756
+ return "var(--warn)";
21757
+ }
21758
+ function dur(ms) {
21759
+ if (ms >= 1e4) return `${(ms / 1e3).toFixed(0)}s`;
21760
+ if (ms >= 1e3) return `${(ms / 1e3).toFixed(1)}s`;
21761
+ return `${ms}ms`;
21762
+ }
21763
+ function rel(iso) {
21764
+ const diff = Date.now() - new Date(iso).getTime();
21765
+ const m = Math.floor(diff / 6e4);
21766
+ if (m < 1) return "just now";
21767
+ if (m < 60) return `${m}m ago`;
21768
+ const h = Math.floor(m / 60);
21769
+ if (h < 24) return `${h}h ago`;
21770
+ return `${Math.floor(h / 24)}d ago`;
21771
+ }
21772
+ function avgScore(runs) {
21773
+ if (runs.length === 0) return "\u2014";
21774
+ return (runs.reduce((s, r) => s + r.score, 0) / runs.length).toFixed(2);
21775
+ }
21776
+ function riskClass(risk) {
21777
+ if (risk === "high") return "miss";
21778
+ if (risk === "medium") return "extra";
21779
+ return "match";
21780
+ }
21781
+ function getEventKind(type) {
21782
+ if (type === "tool_call" || type === "tool_result") return "tool";
21783
+ if (type.startsWith("assistant")) return "asst";
21784
+ if (type.startsWith("eval")) return "eval";
21785
+ return "user";
21786
+ }
21787
+ function getEventLabel(type) {
21788
+ const map = {
21789
+ tool_call: "Tool",
21790
+ tool_result: "Result",
21791
+ assistant_turn: "Assistant",
21792
+ user_turn: "User",
21793
+ evaluator_result: "Eval",
21794
+ evaluator_results: "Eval"
21795
+ };
21796
+ return map[type] ?? type.replace(/_/g, " ");
21797
+ }
21798
+ function getEventTitle(event) {
21799
+ const p = event.payload;
21800
+ if (event.type === "tool_call") {
21801
+ return String(p.tool_name ?? p.toolName ?? p.name ?? "tool call");
21802
+ }
21803
+ if (event.type === "tool_result") {
21804
+ return `${String(p.tool_name ?? p.toolName ?? "tool")} \u2192 ${String(p.status ?? "ok")}`;
21805
+ }
21806
+ if (event.type === "assistant_turn") {
21807
+ const content = String(p.content ?? p.text ?? "");
21808
+ return content.slice(0, 80) || "assistant response";
21809
+ }
21810
+ if (event.type === "user_turn") {
21811
+ const content = String(p.content ?? p.text ?? "");
21812
+ return content.slice(0, 80) || "user message";
21813
+ }
21814
+ return event.type.replace(/_/g, " ");
21815
+ }
21816
+ function getEventBody(event) {
21817
+ const p = event.payload;
21818
+ if (Object.keys(p).length === 0) return null;
21819
+ if (event.type === "assistant_turn" || event.type === "user_turn") {
21820
+ const content = String(p.content ?? p.text ?? "");
21821
+ if (content.length > 80) return content;
21822
+ const other = Object.fromEntries(
21823
+ Object.entries(p).filter(([k]) => k !== "content" && k !== "text")
21824
+ );
21825
+ return Object.keys(other).length > 0 ? JSON.stringify(other, null, 2) : null;
21826
+ }
21827
+ return JSON.stringify(p, null, 2);
21828
+ }
21727
21829
  function App() {
21728
21830
  const route = getRoute();
21729
21831
  return /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "shell", children: [
21730
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("header", { className: "topbar", children: /* @__PURE__ */ (0, import_jsx_runtime.jsx)("a", { className: "brand", href: "/", children: "Agent Regression Lab Alpha" }) }),
21731
- /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("main", { className: "page", children: [
21732
- route.type === "list" ? /* @__PURE__ */ (0, import_jsx_runtime.jsx)(RunListPage, {}) : null,
21733
- route.type === "detail" ? /* @__PURE__ */ (0, import_jsx_runtime.jsx)(RunDetailPage, { runId: route.runId }) : null,
21734
- route.type === "compare" ? /* @__PURE__ */ (0, import_jsx_runtime.jsx)(ComparePage, { baseline: route.baseline, candidate: route.candidate }) : null,
21735
- route.type === "compare-suite" ? /* @__PURE__ */ (0, import_jsx_runtime.jsx)(SuiteComparePage, { baselineBatch: route.baselineBatch, candidateBatch: route.candidateBatch }) : null
21832
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)(Chrome, { route }),
21833
+ route.type === "list" ? /* @__PURE__ */ (0, import_jsx_runtime.jsx)(RunsView, {}) : null,
21834
+ route.type === "detail" ? /* @__PURE__ */ (0, import_jsx_runtime.jsx)(DetailView, { runId: route.runId }) : null,
21835
+ route.type === "compare" ? /* @__PURE__ */ (0, import_jsx_runtime.jsx)(CompareView, { baseline: route.baseline, candidate: route.candidate }) : null,
21836
+ route.type === "compare-suite" ? /* @__PURE__ */ (0, import_jsx_runtime.jsx)(
21837
+ SuiteCompareView,
21838
+ {
21839
+ baselineBatch: route.baselineBatch,
21840
+ candidateBatch: route.candidateBatch
21841
+ }
21842
+ ) : null
21843
+ ] });
21844
+ }
21845
+ function Chrome({ route }) {
21846
+ return /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("header", { className: "chrome", children: [
21847
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("a", { className: "brand", href: "/", children: "ARL" }),
21848
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: "brand-meta", children: "agentlab v0.4.0" }),
21849
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("nav", { className: "nav", children: [
21850
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("a", { className: `nv-btn${route.type === "list" ? " on" : ""}`, href: "/", children: "Runs" }),
21851
+ route.type === "detail" ? /* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: "nv-btn on", children: "Detail" }) : null,
21852
+ route.type === "compare" || route.type === "compare-suite" ? /* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: "nv-btn on", children: "Compare" }) : null
21853
+ ] }),
21854
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "live", children: [
21855
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: "live-dot" }),
21856
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { children: "LIVE" })
21736
21857
  ] })
21737
21858
  ] });
21738
21859
  }
21739
- function RunListPage() {
21860
+ function RunsView() {
21740
21861
  const [runs, setRuns] = (0, import_react.useState)([]);
21741
- const [suite, setSuite] = (0, import_react.useState)("");
21742
- const [status, setStatus] = (0, import_react.useState)("");
21743
- const [provider, setProvider] = (0, import_react.useState)("");
21862
+ const [filter, setFilter] = (0, import_react.useState)("");
21863
+ const inputRef = (0, import_react.useRef)(null);
21744
21864
  (0, import_react.useEffect)(() => {
21745
- const url = new URL("/api/runs", window.location.origin);
21746
- if (suite) url.searchParams.set("suite", suite);
21747
- if (status) url.searchParams.set("status", status);
21748
- if (provider) url.searchParams.set("provider", provider);
21749
- void fetch(url).then((response) => response.json()).then((data) => setRuns(Array.isArray(data.runs) ? data.runs : []));
21750
- }, [suite, status, provider]);
21751
- return /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("section", { children: [
21752
- /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "hero", children: [
21753
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("h1", { children: "Runs" }),
21754
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("p", { children: "Inspect local alpha runs, filter failures, and compare behavior changes." })
21755
- ] }),
21756
- /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "filters", children: [
21757
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("input", { value: suite, onChange: (event) => setSuite(event.target.value), placeholder: "Suite" }),
21758
- /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("select", { value: status, onChange: (event) => setStatus(event.target.value), children: [
21759
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("option", { value: "", children: "All statuses" }),
21760
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("option", { value: "pass", children: "Pass" }),
21761
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("option", { value: "fail", children: "Fail" }),
21762
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("option", { value: "error", children: "Error" })
21865
+ void fetch("/api/runs").then((r) => r.json()).then((data) => setRuns(Array.isArray(data.runs) ? data.runs : []));
21866
+ }, []);
21867
+ const filtered = filter ? runs.filter(
21868
+ (r) => r.scenarioId.toLowerCase().includes(filter.toLowerCase()) || r.suite.toLowerCase().includes(filter.toLowerCase())
21869
+ ) : runs;
21870
+ const stats = summarizeRuns(runs);
21871
+ return /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "view-body", children: [
21872
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("aside", { className: "sidebar", children: [
21873
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "sb-head", children: [
21874
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "sb-kicker", children: "Session" }),
21875
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "sb-title", children: [
21876
+ "Agent ",
21877
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("em", { children: "Regression" }),
21878
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("br", {}),
21879
+ "Lab"
21880
+ ] }),
21881
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "tally", children: [
21882
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "t-cell", children: [
21883
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: `t-v${stats.pass > 0 ? " p" : ""}`, children: stats.pass }),
21884
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "t-l", children: "Pass" })
21885
+ ] }),
21886
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "t-cell", children: [
21887
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: `t-v${stats.fail > 0 ? " f" : ""}`, children: stats.fail }),
21888
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "t-l", children: "Fail" })
21889
+ ] }),
21890
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "t-cell", children: [
21891
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: `t-v${stats.error > 0 ? " w" : ""}`, children: stats.error }),
21892
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "t-l", children: "Error" })
21893
+ ] })
21894
+ ] })
21763
21895
  ] }),
21764
- /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("select", { value: provider, onChange: (event) => setProvider(event.target.value), children: [
21765
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("option", { value: "", children: "All providers" }),
21766
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("option", { value: "mock", children: "Mock" }),
21767
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("option", { value: "openai", children: "OpenAI" }),
21768
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("option", { value: "external_process", children: "External process" })
21896
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "sb-filter", children: /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "filt", children: [
21897
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: "filt-i", children: "/" }),
21898
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)(
21899
+ "input",
21900
+ {
21901
+ ref: inputRef,
21902
+ className: "filt-in",
21903
+ placeholder: "filter scenarios\u2026",
21904
+ value: filter,
21905
+ onChange: (e) => setFilter(e.target.value)
21906
+ }
21907
+ )
21908
+ ] }) }),
21909
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "run-list", children: [
21910
+ filtered.length === 0 && runs.length === 0 ? /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "run-list-empty", children: "No runs yet. Run a scenario from the CLI." }) : null,
21911
+ filtered.map((run, i) => /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("a", { className: `run ${sc(run.status)}`, href: `/runs/${run.id}`, children: [
21912
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "run-name", children: [
21913
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: `run-dot ${sc(run.status)}` }),
21914
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: "run-name-text", children: run.scenarioId })
21915
+ ] }),
21916
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: `run-score ${sc(run.status)}`, children: run.score.toFixed(1) }),
21917
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "run-meta", children: [
21918
+ run.suite,
21919
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { style: { color: "var(--ink-lo)", margin: "0 3px" }, children: "\xB7" }),
21920
+ dur(run.durationMs),
21921
+ i > 0 && filtered[i - 1].scenarioId === run.scenarioId ? /* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { style: { marginLeft: "6px" }, children: /* @__PURE__ */ (0, import_jsx_runtime.jsx)(
21922
+ "a",
21923
+ {
21924
+ href: `/compare?baseline=${filtered[i - 1].id}&candidate=${run.id}`,
21925
+ style: { color: "var(--accent)", fontSize: ".65rem" },
21926
+ onClick: (e) => e.stopPropagation(),
21927
+ children: "compare prev"
21928
+ }
21929
+ ) }) : null
21930
+ ] })
21931
+ ] }, run.id))
21769
21932
  ] })
21770
21933
  ] }),
21771
- runs.length === 0 ? /* @__PURE__ */ (0, import_jsx_runtime.jsx)(EmptyState, { title: "No runs yet", description: "Run a scenario from the CLI to populate the lab." }) : null,
21772
- runs.length > 0 ? /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("table", { className: "table", children: [
21773
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("thead", { children: /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("tr", { children: [
21774
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("th", { children: "Run" }),
21775
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("th", { children: "Scenario" }),
21776
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("th", { children: "Provider" }),
21777
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("th", { children: "Status" }),
21778
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("th", { children: "Score" }),
21779
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("th", { children: "Runtime" }),
21780
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("th", { children: "Steps" }),
21781
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("th", { children: "Started" })
21782
- ] }) }),
21783
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("tbody", { children: runs.map((run, index) => /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("tr", { children: [
21784
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("td", { children: /* @__PURE__ */ (0, import_jsx_runtime.jsx)("a", { href: `/runs/${run.id}`, children: run.id }) }),
21785
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("td", { children: run.scenarioId }),
21786
- /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("td", { children: [
21787
- run.provider ?? "-",
21788
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "muted", children: run.modelId ?? run.agentLabel ?? "" })
21934
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("main", { className: "main", children: [
21935
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "cmd", children: [
21936
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: "cmd-p", children: "$" }),
21937
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("span", { className: "cmd-t", children: [
21938
+ "agentlab ",
21939
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: "w", children: "list" }),
21940
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: "cmd-caret" })
21789
21941
  ] }),
21790
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("td", { children: /* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: `pill ${run.status}`, children: run.status }) }),
21791
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("td", { children: run.score }),
21792
- /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("td", { children: [
21793
- run.durationMs,
21794
- "ms"
21942
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "cmd-r", children: [
21943
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("span", { children: [
21944
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("kbd", { children: "\u21B5" }),
21945
+ "open"
21946
+ ] }),
21947
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("span", { children: [
21948
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("kbd", { children: "c" }),
21949
+ "compare"
21950
+ ] }),
21951
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("span", { children: [
21952
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("kbd", { children: "/" }),
21953
+ "filter"
21954
+ ] })
21955
+ ] })
21956
+ ] }),
21957
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "scroll", children: runs.length === 0 ? /* @__PURE__ */ (0, import_jsx_runtime.jsx)(EmptyIdle, {}) : /* @__PURE__ */ (0, import_jsx_runtime.jsxs)(import_jsx_runtime.Fragment, { children: [
21958
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "ov-hero", children: [
21959
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "oh-eyebrow", children: "Session overview" }),
21960
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "oh-num", children: [
21961
+ stats.total,
21962
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: "unit", children: "runs" })
21963
+ ] }),
21964
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("p", { className: "oh-sub", children: [
21965
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("span", { className: "hi", children: [
21966
+ stats.pass,
21967
+ " passed"
21968
+ ] }),
21969
+ ", ",
21970
+ stats.fail,
21971
+ " failed,",
21972
+ " ",
21973
+ stats.error,
21974
+ " ",
21975
+ stats.error === 1 ? "error" : "errors",
21976
+ ". Average score",
21977
+ " ",
21978
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: "hi", children: avgScore(runs) }),
21979
+ "."
21980
+ ] })
21795
21981
  ] }),
21796
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("td", { children: run.totalSteps }),
21797
- /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("td", { children: [
21798
- new Date(run.startedAt).toLocaleString(),
21799
- index > 0 && runs[index - 1].scenarioId === run.scenarioId ? /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "muted", children: /* @__PURE__ */ (0, import_jsx_runtime.jsx)("a", { href: `/compare?baseline=${runs[index - 1].id}&candidate=${run.id}`, children: "compare previous" }) }) : null,
21800
- index > 0 && runs[index - 1].suite === run.suite && runs[index - 1].suiteBatchId && run.suiteBatchId && runs[index - 1].suiteBatchId !== run.suiteBatchId ? /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "muted", children: /* @__PURE__ */ (0, import_jsx_runtime.jsx)("a", { href: `/compare-suite?baselineBatch=${runs[index - 1].suiteBatchId}&candidateBatch=${run.suiteBatchId}`, children: "compare suite batch" }) }) : null
21982
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "chart-wrap", children: [
21983
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "chart-h", children: [
21984
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { children: "Score over time" }),
21985
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("span", { className: "chart-h-r", children: [
21986
+ runs.length,
21987
+ " samples"
21988
+ ] })
21989
+ ] }),
21990
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)(ScoreChart, { runs })
21801
21991
  ] })
21802
- ] }, run.id)) })
21803
- ] }) : null
21992
+ ] }) })
21993
+ ] })
21804
21994
  ] });
21805
21995
  }
21806
- function RunDetailPage(props) {
21996
+ function DetailView({ runId }) {
21807
21997
  const [detail, setDetail] = (0, import_react.useState)(null);
21998
+ const [siblings, setSiblings] = (0, import_react.useState)([]);
21808
21999
  (0, import_react.useEffect)(() => {
21809
- void fetch(`/api/runs/${props.runId}`).then((response) => response.json()).then((data) => setDetail(data));
21810
- }, [props.runId]);
21811
- if (!detail) {
21812
- return /* @__PURE__ */ (0, import_jsx_runtime.jsx)(EmptyState, { title: "Loading run", description: "Fetching run detail from the local lab." });
21813
- }
21814
- return /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("section", { children: [
21815
- /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "hero", children: [
21816
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("h1", { children: detail.run.id }),
21817
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("p", { children: detail.run.scenarioId })
21818
- ] }),
21819
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)(FailureSummaryPanel, { detail }),
21820
- /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "stats", children: [
21821
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)(Stat, { label: "Status", value: /* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: `pill ${detail.run.status}`, children: detail.run.status }) }),
21822
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)(Stat, { label: "Score", value: detail.run.score }),
21823
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)(Stat, { label: "Runtime", value: `${detail.run.durationMs}ms` }),
21824
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)(Stat, { label: "Steps", value: detail.run.totalSteps })
21825
- ] }),
21826
- /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "panel-grid", children: [
21827
- /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("section", { className: "panel", children: [
21828
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("h2", { children: "Summary" }),
21829
- /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("p", { children: [
21830
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("strong", { children: "Provider:" }),
21831
- " ",
21832
- detail.agentVersion?.provider ?? "-"
21833
- ] }),
21834
- /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("p", { children: [
21835
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("strong", { children: "Model:" }),
21836
- " ",
21837
- detail.agentVersion?.modelId ?? "-"
21838
- ] }),
21839
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)(RunIdentitySummary, { detail }),
21840
- detail.agentVersion?.command ? /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("p", { children: [
21841
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("strong", { children: "Command:" }),
21842
- " ",
21843
- detail.agentVersion.command,
21844
- " ",
21845
- (detail.agentVersion.args ?? []).join(" ")
21846
- ] }) : null,
21847
- /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("p", { children: [
21848
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("strong", { children: "Termination:" }),
21849
- " ",
21850
- detail.run.terminationReason
22000
+ void fetch(`/api/runs/${runId}`).then((r) => r.json()).then((data) => {
22001
+ setDetail(data);
22002
+ return fetch("/api/runs").then((r) => r.json()).then((all) => {
22003
+ const items = Array.isArray(all.runs) ? all.runs : [];
22004
+ setSiblings(items.filter((r) => r.scenarioId === data.run.scenarioId));
22005
+ });
22006
+ });
22007
+ }, [runId]);
22008
+ return /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "view-body", children: [
22009
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("aside", { className: "sidebar", children: [
22010
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "sb-head", children: [
22011
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "sb-kicker", children: [
22012
+ "Run \xB7 ",
22013
+ runId.slice(0, 8)
21851
22014
  ] }),
21852
- detail.errorDetail ? /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("p", { children: [
21853
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("strong", { children: "Error:" }),
21854
- " ",
21855
- detail.errorDetail
21856
- ] }) : null,
21857
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("p", { children: /* @__PURE__ */ (0, import_jsx_runtime.jsx)("strong", { children: "Final output:" }) }),
21858
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("pre", { children: detail.run.finalOutput || "(none)" })
22015
+ detail ? /* @__PURE__ */ (0, import_jsx_runtime.jsxs)(import_jsx_runtime.Fragment, { children: [
22016
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "sb-title", children: [
22017
+ detail.run.scenarioId.split("-")[0],
22018
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("br", {}),
22019
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("em", { children: [
22020
+ "\u2014",
22021
+ detail.run.scenarioId.split("-").slice(1).join("-")
22022
+ ] })
22023
+ ] }),
22024
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "tally", children: [
22025
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "t-cell", children: [
22026
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: `t-v ${sc(detail.run.status)}`, children: detail.run.score.toFixed(1) }),
22027
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "t-l", children: "Score" })
22028
+ ] }),
22029
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "t-cell", children: [
22030
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "t-v", children: detail.run.totalSteps }),
22031
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "t-l", children: "Steps" })
22032
+ ] }),
22033
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "t-cell", children: [
22034
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "t-v", children: dur(detail.run.durationMs) }),
22035
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "t-l", children: "Time" })
22036
+ ] })
22037
+ ] })
22038
+ ] }) : /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "sb-title", children: /* @__PURE__ */ (0, import_jsx_runtime.jsx)("em", { children: "Loading\u2026" }) })
21859
22039
  ] }),
21860
- /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("section", { className: "panel", children: [
21861
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("h2", { children: "Evaluators" }),
21862
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("ul", { className: "stack", children: detail.evaluatorResults.map((result) => /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("li", { children: [
21863
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: `pill ${result.status}`, children: result.status }),
21864
- " ",
21865
- result.evaluatorId,
21866
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "muted", children: result.message })
21867
- ] }, result.evaluatorId)) })
21868
- ] })
21869
- ] }),
21870
- /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("section", { className: "panel", children: [
21871
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("h2", { children: "Tool Calls" }),
21872
- detail.toolCalls.length === 0 ? /* @__PURE__ */ (0, import_jsx_runtime.jsx)("p", { className: "muted", children: "No tool calls recorded." }) : null,
21873
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("ul", { className: "stack", children: detail.toolCalls.map((call) => /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("li", { children: [
21874
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("strong", { children: call.toolName }),
21875
- " ",
21876
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: `pill ${call.status}`, children: call.status }),
21877
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("pre", { children: JSON.stringify({ input: call.input, output: call.output }, null, 2) })
21878
- ] }, call.id)) })
22040
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "run-list", children: siblings.map((run) => /* @__PURE__ */ (0, import_jsx_runtime.jsxs)(
22041
+ "a",
22042
+ {
22043
+ className: `run ${sc(run.status)}${run.id === runId ? " on" : ""}`,
22044
+ href: `/runs/${run.id}`,
22045
+ children: [
22046
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "run-name", children: [
22047
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: `run-dot ${sc(run.status)}` }),
22048
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: "run-name-text", children: run.id.slice(0, 12) })
22049
+ ] }),
22050
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: `run-score ${sc(run.status)}`, children: run.score.toFixed(1) }),
22051
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "run-meta", children: run.id === runId ? /* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { style: { color: "var(--accent)" }, children: "current" }) : rel(run.startedAt) })
22052
+ ]
22053
+ },
22054
+ run.id
22055
+ )) })
21879
22056
  ] }),
21880
- /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("section", { className: "panel", children: [
21881
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("h2", { children: "Trace" }),
21882
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("ol", { className: "timeline", children: detail.traceEvents.map((event) => /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("li", { children: [
21883
- /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { children: [
21884
- /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("strong", { children: [
21885
- event.stepIndex,
21886
- ". ",
21887
- event.type
21888
- ] }),
22057
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("main", { className: "main", children: [
22058
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "cmd", children: [
22059
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: "cmd-p", children: "$" }),
22060
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("span", { className: "cmd-t", children: [
22061
+ "agentlab ",
22062
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: "w", children: "show" }),
21889
22063
  " ",
21890
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: "muted", children: event.source })
22064
+ runId,
22065
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: "cmd-caret" })
21891
22066
  ] }),
21892
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("pre", { children: JSON.stringify(event.payload, null, 2) })
21893
- ] }, event.eventId)) })
21894
- ] })
21895
- ] });
21896
- }
21897
- function FailureSummaryPanel(props) {
21898
- const failureItems = getFailureSummaryItems(props.detail);
21899
- if (failureItems.length === 0) {
21900
- return null;
21901
- }
21902
- return /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("section", { className: "panel", children: [
21903
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("h2", { children: "Failures First" }),
21904
- /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("p", { children: [
21905
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("strong", { children: "Status:" }),
21906
- " ",
21907
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: `pill ${props.detail.run.status}`, children: props.detail.run.status })
21908
- ] }),
21909
- /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("p", { children: [
21910
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("strong", { children: "Termination:" }),
21911
- " ",
21912
- props.detail.run.terminationReason
21913
- ] }),
21914
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("ul", { className: "stack", children: failureItems.map((item) => /* @__PURE__ */ (0, import_jsx_runtime.jsx)("li", { children: item }, item)) })
21915
- ] });
21916
- }
21917
- function RunIdentitySummary(props) {
21918
- const run = props.detail.run;
21919
- return /* @__PURE__ */ (0, import_jsx_runtime.jsxs)(import_jsx_runtime.Fragment, { children: [
21920
- /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("p", { children: [
21921
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("strong", { children: "Variant set:" }),
21922
- " ",
21923
- run.variantSetName ?? "-"
21924
- ] }),
21925
- /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("p", { children: [
21926
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("strong", { children: "Variant:" }),
21927
- " ",
21928
- run.variantLabel ?? "-"
21929
- ] }),
21930
- /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("p", { children: [
21931
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("strong", { children: "Prompt version:" }),
21932
- " ",
21933
- run.promptVersion ?? "-"
21934
- ] }),
21935
- /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("p", { children: [
21936
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("strong", { children: "Model version:" }),
21937
- " ",
21938
- run.modelVersion ?? "-"
21939
- ] }),
21940
- /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("p", { children: [
21941
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("strong", { children: "Tool schema version:" }),
21942
- " ",
21943
- run.toolSchemaVersion ?? "-"
21944
- ] }),
21945
- /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("p", { children: [
21946
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("strong", { children: "Config label:" }),
21947
- " ",
21948
- run.configLabel ?? "-"
21949
- ] }),
21950
- /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("p", { children: [
21951
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("strong", { children: "Runtime profile:" }),
21952
- " ",
21953
- run.runtimeProfileName ?? "-"
21954
- ] }),
21955
- /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("p", { children: [
21956
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("strong", { children: "Suite definition:" }),
21957
- " ",
21958
- run.suiteDefinitionName ?? "-"
22067
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "cmd-r", children: [
22068
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("span", { children: [
22069
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("kbd", { children: "\u2190" }),
22070
+ "back"
22071
+ ] }),
22072
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("span", { children: [
22073
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("kbd", { children: "c" }),
22074
+ "compare"
22075
+ ] })
22076
+ ] })
22077
+ ] }),
22078
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "scroll", children: !detail ? /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "empty", children: /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "em-title", children: "Loading\u2026" }) }) : /* @__PURE__ */ (0, import_jsx_runtime.jsxs)(import_jsx_runtime.Fragment, { children: [
22079
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "dh", children: [
22080
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: `dh-score ${sc(detail.run.status)}`, children: detail.run.score.toFixed(1) }),
22081
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "dh-meta", children: [
22082
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "dh-kicker", children: detail.agentVersion?.provider ?? "agent" }),
22083
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "dh-title", children: detail.run.scenarioId }),
22084
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "dh-row", children: [
22085
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("span", { children: [
22086
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: "d", children: "model" }),
22087
+ " ",
22088
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: "v", children: detail.agentVersion?.modelId ?? "\u2014" })
22089
+ ] }),
22090
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("span", { children: [
22091
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: "d", children: "runtime" }),
22092
+ " ",
22093
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: "v", children: dur(detail.run.durationMs) })
22094
+ ] }),
22095
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("span", { children: [
22096
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: "d", children: "evals" }),
22097
+ " ",
22098
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("span", { className: "v", children: [
22099
+ detail.evaluatorResults.filter((e) => e.status === "pass").length,
22100
+ " /",
22101
+ " ",
22102
+ detail.evaluatorResults.length
22103
+ ] })
22104
+ ] })
22105
+ ] })
22106
+ ] })
22107
+ ] }),
22108
+ detail.run.status !== "pass" ? /* @__PURE__ */ (0, import_jsx_runtime.jsx)(FailureBlock, { detail }) : null,
22109
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "trace-section", children: [
22110
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "ts-head", children: [
22111
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "ts-title", children: [
22112
+ "Trace",
22113
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)(
22114
+ "span",
22115
+ {
22116
+ className: `ts-badge ts-badge-${detail.run.status === "pass" ? "complete" : "fail"}`,
22117
+ children: detail.run.status === "pass" ? "complete" : "failed"
22118
+ }
22119
+ )
22120
+ ] }),
22121
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "ts-stats", children: [
22122
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: "v", children: detail.traceEvents.length }),
22123
+ " steps \xB7",
22124
+ " ",
22125
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: "v", children: dur(detail.run.durationMs) })
22126
+ ] })
22127
+ ] }),
22128
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)(TraceTimeline, { events: detail.traceEvents })
22129
+ ] })
22130
+ ] }) })
21959
22131
  ] })
21960
22132
  ] });
21961
22133
  }
21962
- function ComparePage(props) {
22134
+ function CompareView({
22135
+ baseline,
22136
+ candidate
22137
+ }) {
21963
22138
  const [data, setData] = (0, import_react.useState)(null);
21964
22139
  (0, import_react.useEffect)(() => {
21965
- if (!props.baseline || !props.candidate) {
21966
- setData(null);
21967
- return;
21968
- }
22140
+ if (!baseline || !candidate) return;
21969
22141
  const url = new URL("/api/compare", window.location.origin);
21970
- url.searchParams.set("baseline", props.baseline);
21971
- url.searchParams.set("candidate", props.candidate);
21972
- void fetch(url).then((response) => response.json()).then((payload) => setData(payload));
21973
- }, [props.baseline, props.candidate]);
21974
- if (!props.baseline || !props.candidate) {
21975
- return /* @__PURE__ */ (0, import_jsx_runtime.jsx)(EmptyState, { title: "No comparison selected", description: "Open the compare page with baseline and candidate run ids." });
21976
- }
21977
- if (!data) {
21978
- return /* @__PURE__ */ (0, import_jsx_runtime.jsx)(EmptyState, { title: "Loading comparison", description: "Fetching both runs and computing deltas." });
22142
+ url.searchParams.set("baseline", baseline);
22143
+ url.searchParams.set("candidate", candidate);
22144
+ void fetch(url).then((r) => r.json()).then((p) => setData(p));
22145
+ }, [baseline, candidate]);
22146
+ if (!baseline || !candidate) {
22147
+ return /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "view-body", children: /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("main", { className: "main", style: { width: "100%" }, children: [
22148
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "cmd", children: [
22149
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: "cmd-p", children: "$" }),
22150
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("span", { className: "cmd-t", style: { color: "var(--ink-lo)" }, children: [
22151
+ "agentlab compare \u2026",
22152
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: "cmd-caret" })
22153
+ ] })
22154
+ ] }),
22155
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)(EmptyIdle, {})
22156
+ ] }) });
21979
22157
  }
21980
- return /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("section", { children: [
21981
- /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "hero", children: [
21982
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("h1", { children: "Compare" }),
21983
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("p", { children: data.baseline.run.scenarioId })
21984
- ] }),
21985
- /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "stats", children: [
21986
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)(Stat, { label: "Classification", value: data.classification }),
21987
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)(Stat, { label: "Score delta", value: signed(data.deltas.score) }),
21988
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)(Stat, { label: "Runtime delta", value: `${signed(data.deltas.runtimeMs)}ms` }),
21989
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)(Stat, { label: "Step delta", value: signed(data.deltas.steps) })
21990
- ] }),
21991
- /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("section", { className: "panel", children: [
21992
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("h2", { children: "Notes" }),
21993
- data.notes.length === 0 ? /* @__PURE__ */ (0, import_jsx_runtime.jsx)("p", { className: "muted", children: "No material differences recorded." }) : null,
21994
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("ul", { className: "stack", children: data.notes.map((note) => /* @__PURE__ */ (0, import_jsx_runtime.jsx)("li", { children: note }, note)) })
21995
- ] }),
21996
- /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "panel-grid", children: [
21997
- /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("section", { className: "panel", children: [
21998
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("h2", { children: "Evaluator diffs" }),
21999
- data.evaluatorDiffs.length === 0 ? /* @__PURE__ */ (0, import_jsx_runtime.jsx)("p", { className: "muted", children: "No evaluator changes." }) : null,
22000
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("ul", { className: "stack", children: data.evaluatorDiffs.map((diff) => /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("li", { children: [
22001
- diff.note,
22002
- diff.hardGate ? " (hard gate)" : ""
22003
- ] }, diff.evaluatorId)) })
22158
+ const tone = data ? data.classification.includes("regress") ? "fail" : data.classification.includes("improv") ? "pass" : "neutral" : "neutral";
22159
+ const toneColor = tone === "fail" ? "var(--fail)" : tone === "pass" ? "var(--pass)" : "var(--ink-mid)";
22160
+ return /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "view-body", children: [
22161
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("aside", { className: "sidebar", children: [
22162
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "sb-head", children: [
22163
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "sb-kicker", style: { color: toneColor }, children: data?.classification ?? "Comparing\u2026" }),
22164
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "sb-title", style: { color: tone === "neutral" ? "var(--ink)" : toneColor }, children: [
22165
+ data ? (data.deltas.score >= 0 ? "+" : "") + data.deltas.score.toFixed(2) : "\u2014",
22166
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("br", {}),
22167
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("em", { style: { color: toneColor }, children: "delta" })
22168
+ ] }),
22169
+ data ? /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "tally", children: [
22170
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "t-cell", children: [
22171
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "t-v p", children: data.baseline.run.score.toFixed(1) }),
22172
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "t-l", children: "Base" })
22173
+ ] }),
22174
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "t-cell", children: [
22175
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: `t-v ${sc(data.candidate.run.status)}`, children: data.candidate.run.score.toFixed(1) }),
22176
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "t-l", children: "Cand" })
22177
+ ] })
22178
+ ] }) : null
22004
22179
  ] }),
22005
- /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("section", { className: "panel", children: [
22006
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("h2", { children: "Tool diffs" }),
22007
- data.toolDiffs.length === 0 ? /* @__PURE__ */ (0, import_jsx_runtime.jsx)("p", { className: "muted", children: "No tool usage changes." }) : null,
22008
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("ul", { className: "stack", children: data.toolDiffs.map((diff) => /* @__PURE__ */ (0, import_jsx_runtime.jsx)("li", { children: diff.note }, diff.toolName)) })
22009
- ] })
22180
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "run-list", children: data ? /* @__PURE__ */ (0, import_jsx_runtime.jsxs)(import_jsx_runtime.Fragment, { children: [
22181
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("a", { className: "run p", href: `/runs/${data.baseline.run.id}`, children: [
22182
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "run-name", children: [
22183
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: "run-dot p" }),
22184
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("span", { className: "run-name-text", children: [
22185
+ data.baseline.run.id.slice(0, 12),
22186
+ " \xB7 base"
22187
+ ] })
22188
+ ] }),
22189
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "run-score p", children: data.baseline.run.score.toFixed(1) }),
22190
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "run-meta", children: [
22191
+ data.baseline.run.totalSteps,
22192
+ " steps"
22193
+ ] })
22194
+ ] }),
22195
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)(
22196
+ "a",
22197
+ {
22198
+ className: `run on ${sc(data.candidate.run.status)}`,
22199
+ href: `/runs/${data.candidate.run.id}`,
22200
+ children: [
22201
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "run-name", children: [
22202
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: `run-dot ${sc(data.candidate.run.status)}` }),
22203
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("span", { className: "run-name-text", children: [
22204
+ data.candidate.run.id.slice(0, 12),
22205
+ " \xB7 cand"
22206
+ ] })
22207
+ ] }),
22208
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: `run-score ${sc(data.candidate.run.status)}`, children: data.candidate.run.score.toFixed(1) }),
22209
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "run-meta", children: [
22210
+ data.candidate.run.totalSteps,
22211
+ " steps"
22212
+ ] })
22213
+ ]
22214
+ }
22215
+ )
22216
+ ] }) : null })
22010
22217
  ] }),
22011
- /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "compare-grid", children: [
22012
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)(RunSide, { title: "Baseline", detail: data.baseline }),
22013
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)(RunSide, { title: "Candidate", detail: data.candidate })
22218
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("main", { className: "main", children: [
22219
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "cmd", children: [
22220
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: "cmd-p", children: "$" }),
22221
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("span", { className: "cmd-t", children: [
22222
+ "agentlab ",
22223
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: "w", children: "compare" }),
22224
+ " ",
22225
+ baseline.slice(0, 8),
22226
+ " ",
22227
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: "w", children: "\u2192" }),
22228
+ " ",
22229
+ candidate.slice(0, 8),
22230
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: "cmd-caret" })
22231
+ ] }),
22232
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "cmd-r", children: /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("span", { children: [
22233
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("kbd", { children: "\u2190" }),
22234
+ "back"
22235
+ ] }) })
22236
+ ] }),
22237
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "scroll", children: !data ? /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "empty", children: /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "em-title", children: "Loading comparison\u2026" }) }) : /* @__PURE__ */ (0, import_jsx_runtime.jsxs)(import_jsx_runtime.Fragment, { children: [
22238
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "cmp-hero", children: [
22239
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "cmp-eyebrow", style: { color: toneColor }, children: [
22240
+ tone === "fail" ? "Regression detected" : tone === "pass" ? "Improvement" : data.classification,
22241
+ " ",
22242
+ "\xB7 ",
22243
+ data.baseline.run.scenarioId
22244
+ ] }),
22245
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "cmp-grid", children: [
22246
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "cmp-side", children: [
22247
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "cmp-side-label", children: [
22248
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: "v", children: "baseline" }),
22249
+ " \xB7 ",
22250
+ baseline.slice(0, 6)
22251
+ ] }),
22252
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "cmp-side-score", style: { color: "var(--pass)" }, children: data.baseline.run.score.toFixed(1) })
22253
+ ] }),
22254
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { children: [
22255
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "cmp-arrow", style: { color: toneColor }, children: "\u2192" }),
22256
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "cmp-delta", style: { color: toneColor }, children: [
22257
+ data.deltas.score >= 0 ? "+" : "",
22258
+ data.deltas.score.toFixed(2)
22259
+ ] })
22260
+ ] }),
22261
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "cmp-side r", children: [
22262
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "cmp-side-label", children: [
22263
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: "v", children: "candidate" }),
22264
+ " \xB7 ",
22265
+ candidate.slice(0, 6)
22266
+ ] }),
22267
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)(
22268
+ "div",
22269
+ {
22270
+ className: "cmp-side-score",
22271
+ style: { color: scColor(data.candidate.run.status) },
22272
+ children: data.candidate.run.score.toFixed(1)
22273
+ }
22274
+ )
22275
+ ] })
22276
+ ] }),
22277
+ data.notes.length > 0 ? /* @__PURE__ */ (0, import_jsx_runtime.jsxs)(import_jsx_runtime.Fragment, { children: [
22278
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "cmp-verdict", children: data.notes[0] }),
22279
+ data.notes.length > 1 ? /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "cmp-verdict-sub", children: data.notes.slice(1).join(" \xB7 ") }) : null
22280
+ ] }) : null
22281
+ ] }),
22282
+ data.toolDiffs.length > 0 ? /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "diverge", children: [
22283
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "dv-h", children: "Tool-call timeline" }),
22284
+ data.toolDiffs.map((diff) => /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "dv-track", children: [
22285
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "dv-label", children: diff.toolName }),
22286
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "dv-steps", children: [
22287
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: `dv-step ${riskClass(diff.risk)}`, children: [
22288
+ "base: ",
22289
+ diff.baselineCount
22290
+ ] }),
22291
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: `dv-step ${riskClass(diff.risk)}`, children: [
22292
+ "cand: ",
22293
+ diff.candidateCount
22294
+ ] }),
22295
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: `dv-step ${riskClass(diff.risk)}`, style: { flex: 4 }, children: diff.note })
22296
+ ] })
22297
+ ] }, diff.toolName))
22298
+ ] }) : null,
22299
+ data.evaluatorDiffs.length > 0 ? /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "diffs", children: [
22300
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "diffs-h", children: "Evaluator changes" }),
22301
+ data.evaluatorDiffs.map((diff) => /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "diff-row", children: [
22302
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "df-label", children: diff.evaluatorId }),
22303
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "df-cell base", children: diff.baselineStatus ?? "\u2014" }),
22304
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "df-arrow", children: "\u2192" }),
22305
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "df-cell cand", children: diff.candidateStatus ?? "\u2014" })
22306
+ ] }, diff.evaluatorId))
22307
+ ] }) : null,
22308
+ data.outputChanged ? /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "diffs", children: [
22309
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "diffs-h", children: "Output diff" }),
22310
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "diff-row", children: [
22311
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "df-label", children: "Final output" }),
22312
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "df-cell base", children: data.baseline.run.finalOutput || "(none)" }),
22313
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "df-arrow", children: "\u2192" }),
22314
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "df-cell cand", children: data.candidate.run.finalOutput || "(none)" })
22315
+ ] })
22316
+ ] }) : null
22317
+ ] }) })
22014
22318
  ] })
22015
22319
  ] });
22016
22320
  }
22017
- function RunSide(props) {
22018
- return /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("section", { className: "panel", children: [
22019
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("h2", { children: props.title }),
22020
- /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("p", { children: [
22021
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("strong", { children: "Run:" }),
22022
- " ",
22023
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("a", { href: `/runs/${props.detail.run.id}`, children: props.detail.run.id })
22024
- ] }),
22025
- /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("p", { children: [
22026
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("strong", { children: "Status:" }),
22027
- " ",
22028
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: `pill ${props.detail.run.status}`, children: props.detail.run.status })
22029
- ] }),
22030
- /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("p", { children: [
22031
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("strong", { children: "Score:" }),
22032
- " ",
22033
- props.detail.run.score
22034
- ] }),
22035
- /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("p", { children: [
22036
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("strong", { children: "Runtime:" }),
22037
- " ",
22038
- props.detail.run.durationMs,
22039
- "ms"
22040
- ] }),
22041
- /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("p", { children: [
22042
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("strong", { children: "Termination:" }),
22043
- " ",
22044
- props.detail.run.terminationReason
22045
- ] }),
22046
- /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("p", { children: [
22047
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("strong", { children: "Agent:" }),
22048
- " ",
22049
- props.detail.agentVersion?.label ?? "-"
22050
- ] }),
22051
- /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("p", { children: [
22052
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("strong", { children: "Provider:" }),
22053
- " ",
22054
- props.detail.agentVersion?.provider ?? "-"
22055
- ] }),
22056
- props.detail.agentVersion?.modelId ? /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("p", { children: [
22057
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("strong", { children: "Model:" }),
22058
- " ",
22059
- props.detail.agentVersion.modelId
22060
- ] }) : null,
22061
- props.detail.agentVersion?.command ? /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("p", { children: [
22062
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("strong", { children: "Command:" }),
22063
- " ",
22064
- props.detail.agentVersion.command,
22065
- " ",
22066
- (props.detail.agentVersion.args ?? []).join(" ")
22067
- ] }) : null,
22068
- props.detail.errorDetail ? /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("p", { children: [
22069
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("strong", { children: "Error:" }),
22070
- " ",
22071
- props.detail.errorDetail
22072
- ] }) : null,
22073
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("p", { children: /* @__PURE__ */ (0, import_jsx_runtime.jsx)("strong", { children: "Final output:" }) }),
22074
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("pre", { children: props.detail.run.finalOutput || "(none)" }),
22075
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("h3", { children: "Trace" }),
22076
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("ol", { className: "timeline compact", children: props.detail.traceEvents.map((event) => /* @__PURE__ */ (0, import_jsx_runtime.jsx)("li", { children: /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("strong", { children: [
22077
- event.stepIndex,
22078
- ". ",
22079
- event.type
22080
- ] }) }, event.eventId)) })
22081
- ] });
22082
- }
22083
- function SuiteComparePage(props) {
22321
+ function SuiteCompareView({
22322
+ baselineBatch,
22323
+ candidateBatch
22324
+ }) {
22084
22325
  const [data, setData] = (0, import_react.useState)(null);
22085
22326
  (0, import_react.useEffect)(() => {
22086
- if (!props.baselineBatch || !props.candidateBatch) {
22087
- setData(null);
22088
- return;
22089
- }
22327
+ if (!baselineBatch || !candidateBatch) return;
22090
22328
  const url = new URL("/api/compare-suite", window.location.origin);
22091
- url.searchParams.set("baselineBatch", props.baselineBatch);
22092
- url.searchParams.set("candidateBatch", props.candidateBatch);
22093
- void fetch(url).then((response) => response.json()).then((payload) => setData(payload));
22094
- }, [props.baselineBatch, props.candidateBatch]);
22095
- if (!props.baselineBatch || !props.candidateBatch) {
22096
- return /* @__PURE__ */ (0, import_jsx_runtime.jsx)(EmptyState, { title: "No suite comparison selected", description: "Open the suite compare page with baseline and candidate batch ids." });
22097
- }
22098
- if (!data) {
22099
- return /* @__PURE__ */ (0, import_jsx_runtime.jsx)(EmptyState, { title: "Loading suite comparison", description: "Fetching suite batches and computing regressions." });
22329
+ url.searchParams.set("baselineBatch", baselineBatch);
22330
+ url.searchParams.set("candidateBatch", candidateBatch);
22331
+ void fetch(url).then((r) => r.json()).then((p) => setData(p));
22332
+ }, [baselineBatch, candidateBatch]);
22333
+ if (!baselineBatch || !candidateBatch) {
22334
+ return /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "view-body", children: /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("main", { className: "main", style: { width: "100%" }, children: [
22335
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "cmd", children: [
22336
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: "cmd-p", children: "$" }),
22337
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("span", { className: "cmd-t", style: { color: "var(--ink-lo)" }, children: [
22338
+ "agentlab compare --suite \u2026",
22339
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: "cmd-caret" })
22340
+ ] })
22341
+ ] }),
22342
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)(EmptyIdle, {})
22343
+ ] }) });
22100
22344
  }
22101
- return /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("section", { children: [
22102
- /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "hero", children: [
22103
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("h1", { children: "Suite Compare" }),
22104
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("p", { children: data.suite })
22105
- ] }),
22106
- /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "stats", children: [
22107
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)(Stat, { label: "Classification", value: data.classification }),
22108
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)(Stat, { label: "Pass delta", value: signed(data.deltas.pass) }),
22109
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)(Stat, { label: "Fail delta", value: signed(data.deltas.fail) }),
22110
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)(Stat, { label: "Score delta", value: signed(data.deltas.averageScore) }),
22111
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)(Stat, { label: "Runtime delta", value: `${signed(data.deltas.averageRuntimeMs)}ms` }),
22112
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)(Stat, { label: "Step delta", value: signed(data.deltas.averageSteps) })
22113
- ] }),
22114
- /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("section", { className: "panel", children: [
22115
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("h2", { children: "Notes" }),
22116
- data.notes.length === 0 ? /* @__PURE__ */ (0, import_jsx_runtime.jsx)("p", { className: "muted", children: "No suite-level notes recorded." }) : null,
22117
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("ul", { className: "stack", children: data.notes.map((note) => /* @__PURE__ */ (0, import_jsx_runtime.jsx)("li", { children: note }, note)) })
22118
- ] }),
22119
- /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "panel-grid", children: [
22120
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)(ScenarioList, { title: "Regressions", items: data.regressions }),
22121
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)(ScenarioList, { title: "Improvements", items: data.improvements })
22345
+ return /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "view-body", children: [
22346
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("aside", { className: "sidebar", children: [
22347
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "sb-head", children: [
22348
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "sb-kicker", children: "Suite Compare" }),
22349
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "sb-title", children: [
22350
+ data?.suite ?? "Loading\u2026",
22351
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("br", {}),
22352
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("em", { children: data?.classification ?? "" })
22353
+ ] }),
22354
+ data ? /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "tally", children: [
22355
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "t-cell", children: [
22356
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: `t-v${data.regressions.length > 0 ? " f" : ""}`, children: data.regressions.length }),
22357
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "t-l", children: "Regress" })
22358
+ ] }),
22359
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "t-cell", children: [
22360
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: `t-v${data.improvements.length > 0 ? " p" : ""}`, children: data.improvements.length }),
22361
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "t-l", children: "Improve" })
22362
+ ] }),
22363
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "t-cell", children: [
22364
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "t-v", children: data.unchanged.length }),
22365
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "t-l", children: "Same" })
22366
+ ] })
22367
+ ] }) : null
22368
+ ] }),
22369
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "run-list", children: data ? [
22370
+ ...data.regressions.map((item) => ({
22371
+ ...item,
22372
+ kind: "fail"
22373
+ })),
22374
+ ...data.improvements.map((item) => ({
22375
+ ...item,
22376
+ kind: "pass"
22377
+ }))
22378
+ ].map((item) => /* @__PURE__ */ (0, import_jsx_runtime.jsxs)(
22379
+ "a",
22380
+ {
22381
+ className: `run ${item.kind}`,
22382
+ href: `/compare?baseline=${item.comparison.baseline.run.id}&candidate=${item.comparison.candidate.run.id}`,
22383
+ children: [
22384
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "run-name", children: [
22385
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: `run-dot ${item.kind}` }),
22386
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: "run-name-text", children: item.scenarioId })
22387
+ ] }),
22388
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: `run-score ${item.kind}`, children: item.comparison.candidate.run.score.toFixed(1) }),
22389
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "run-meta", children: item.comparison.classification })
22390
+ ]
22391
+ },
22392
+ item.scenarioId
22393
+ )) : null })
22122
22394
  ] }),
22123
- /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("section", { className: "panel", children: [
22124
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("h2", { children: "Missing scenarios" }),
22125
- /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("p", { children: [
22126
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("strong", { children: "Missing from candidate:" }),
22127
- " ",
22128
- data.missingFromCandidate.join(", ") || "None"
22395
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("main", { className: "main", children: [
22396
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "cmd", children: [
22397
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: "cmd-p", children: "$" }),
22398
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("span", { className: "cmd-t", children: [
22399
+ "agentlab ",
22400
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: "w", children: "compare" }),
22401
+ " --suite ",
22402
+ baselineBatch.slice(0, 8),
22403
+ " ",
22404
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: "w", children: "\u2192" }),
22405
+ " ",
22406
+ candidateBatch.slice(0, 8),
22407
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: "cmd-caret" })
22408
+ ] }),
22409
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "cmd-r", children: /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("span", { children: [
22410
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("kbd", { children: "\u2190" }),
22411
+ "back"
22412
+ ] }) })
22129
22413
  ] }),
22130
- /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("p", { children: [
22131
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("strong", { children: "Missing from baseline:" }),
22132
- " ",
22133
- data.missingFromBaseline.join(", ") || "None"
22134
- ] })
22414
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "scroll", children: !data ? /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "empty", children: /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "em-title", children: "Loading suite comparison\u2026" }) }) : /* @__PURE__ */ (0, import_jsx_runtime.jsxs)(import_jsx_runtime.Fragment, { children: [
22415
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "ov-hero", children: [
22416
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "oh-eyebrow", children: [
22417
+ "Suite \xB7 ",
22418
+ data.suite
22419
+ ] }),
22420
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "oh-num", children: [
22421
+ data.regressions.length + data.improvements.length + data.unchanged.length,
22422
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: "unit", children: "scenarios" })
22423
+ ] }),
22424
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("p", { className: "oh-sub", children: [
22425
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("span", { className: "hi", children: [
22426
+ data.improvements.length,
22427
+ " improved"
22428
+ ] }),
22429
+ ",",
22430
+ " ",
22431
+ data.regressions.length,
22432
+ " regressed, ",
22433
+ data.unchanged.length,
22434
+ " unchanged. Score delta",
22435
+ " ",
22436
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("span", { className: "hi", children: [
22437
+ data.deltas.averageScore >= 0 ? "+" : "",
22438
+ data.deltas.averageScore.toFixed(2)
22439
+ ] }),
22440
+ "."
22441
+ ] })
22442
+ ] }),
22443
+ data.regressions.length > 0 ? /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "diffs", children: [
22444
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "diffs-h", children: "Regressions" }),
22445
+ data.regressions.map((item) => /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "diff-row", children: [
22446
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "df-label", children: item.scenarioId }),
22447
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "df-cell base", children: item.comparison.baseline.run.score.toFixed(1) }),
22448
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "df-arrow", children: "\u2192" }),
22449
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "df-cell cand", children: /* @__PURE__ */ (0, import_jsx_runtime.jsxs)(
22450
+ "a",
22451
+ {
22452
+ href: `/compare?baseline=${item.comparison.baseline.run.id}&candidate=${item.comparison.candidate.run.id}`,
22453
+ style: { color: "var(--fail)" },
22454
+ children: [
22455
+ item.comparison.candidate.run.score.toFixed(1),
22456
+ " \u2014 view"
22457
+ ]
22458
+ }
22459
+ ) })
22460
+ ] }, item.scenarioId))
22461
+ ] }) : null,
22462
+ data.improvements.length > 0 ? /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "diffs", children: [
22463
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "diffs-h", children: "Improvements" }),
22464
+ data.improvements.map((item) => /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "diff-row", children: [
22465
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "df-label", children: item.scenarioId }),
22466
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "df-cell base", children: item.comparison.baseline.run.score.toFixed(1) }),
22467
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "df-arrow", children: "\u2192" }),
22468
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)(
22469
+ "div",
22470
+ {
22471
+ className: "df-cell",
22472
+ style: {
22473
+ background: "color-mix(in srgb, var(--pass) 8%, transparent)",
22474
+ border: "1px solid color-mix(in srgb, var(--pass) 25%, transparent)"
22475
+ },
22476
+ children: /* @__PURE__ */ (0, import_jsx_runtime.jsxs)(
22477
+ "a",
22478
+ {
22479
+ href: `/compare?baseline=${item.comparison.baseline.run.id}&candidate=${item.comparison.candidate.run.id}`,
22480
+ style: { color: "var(--pass)" },
22481
+ children: [
22482
+ item.comparison.candidate.run.score.toFixed(1),
22483
+ " \u2014 view"
22484
+ ]
22485
+ }
22486
+ )
22487
+ }
22488
+ )
22489
+ ] }, item.scenarioId))
22490
+ ] }) : null,
22491
+ data.missingFromCandidate.length > 0 || data.missingFromBaseline.length > 0 ? /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "diffs", children: [
22492
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "diffs-h", children: "Missing scenarios" }),
22493
+ data.missingFromCandidate.length > 0 ? /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "diff-row", children: [
22494
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "df-label", children: "From candidate" }),
22495
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)(
22496
+ "div",
22497
+ {
22498
+ className: "df-cell cand",
22499
+ style: { gridColumn: "2 / 5" },
22500
+ children: data.missingFromCandidate.join(", ")
22501
+ }
22502
+ )
22503
+ ] }) : null,
22504
+ data.missingFromBaseline.length > 0 ? /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "diff-row", children: [
22505
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "df-label", children: "From baseline" }),
22506
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)(
22507
+ "div",
22508
+ {
22509
+ className: "df-cell base",
22510
+ style: { gridColumn: "2 / 5" },
22511
+ children: data.missingFromBaseline.join(", ")
22512
+ }
22513
+ )
22514
+ ] }) : null
22515
+ ] }) : null
22516
+ ] }) })
22135
22517
  ] })
22136
22518
  ] });
22137
22519
  }
22138
- function ScenarioList(props) {
22139
- return /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("section", { className: "panel", children: [
22140
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("h2", { children: props.title }),
22141
- props.items.length === 0 ? /* @__PURE__ */ (0, import_jsx_runtime.jsx)("p", { className: "muted", children: "None." }) : null,
22142
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("ul", { className: "stack", children: props.items.map((item) => /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("li", { children: [
22143
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("strong", { children: item.scenarioId }),
22144
- " ",
22145
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: "muted", children: item.comparison.classification }),
22146
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { children: /* @__PURE__ */ (0, import_jsx_runtime.jsx)("a", { href: `/compare?baseline=${item.comparison.baseline.run.id}&candidate=${item.comparison.candidate.run.id}`, children: "open run compare" }) })
22147
- ] }, item.scenarioId)) })
22520
+ function ScoreChart({ runs }) {
22521
+ const sorted = [...runs].sort(
22522
+ (a, b) => new Date(a.startedAt).getTime() - new Date(b.startedAt).getTime()
22523
+ );
22524
+ if (sorted.length < 2) return /* @__PURE__ */ (0, import_jsx_runtime.jsx)(import_jsx_runtime.Fragment, {});
22525
+ const points = sorted.map((run, i) => ({
22526
+ x: 30 + i / (sorted.length - 1) * 900,
22527
+ y: 20 + (1 - run.score) * 100,
22528
+ status: run.status,
22529
+ id: run.id
22530
+ }));
22531
+ let linePath = `M${points[0].x},${points[0].y}`;
22532
+ for (let i = 1; i < points.length; i++) {
22533
+ const p = points[i - 1];
22534
+ const c = points[i];
22535
+ const cpx = (p.x + c.x) / 2;
22536
+ linePath += ` C${cpx},${p.y} ${cpx},${c.y} ${c.x},${c.y}`;
22537
+ }
22538
+ const last = points[points.length - 1];
22539
+ const first = points[0];
22540
+ const fillPath = `${linePath} L${last.x},140 L${first.x},140 Z`;
22541
+ const failPoints = points.filter((p) => p.status !== "pass");
22542
+ return /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("svg", { viewBox: "0 0 960 140", style: { width: "100%", display: "block" }, children: [
22543
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("defs", { children: /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("linearGradient", { id: "hz-fill", x1: "0", y1: "0", x2: "0", y2: "1", children: [
22544
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("stop", { offset: "0%", stopColor: "var(--accent)", stopOpacity: ".18" }),
22545
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("stop", { offset: "100%", stopColor: "var(--accent)", stopOpacity: "0" })
22546
+ ] }) }),
22547
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("line", { x1: "0", y1: "20", x2: "960", y2: "20", stroke: "var(--line)", strokeDasharray: "2 4" }),
22548
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("line", { x1: "0", y1: "70", x2: "960", y2: "70", stroke: "var(--line)", strokeDasharray: "2 4" }),
22549
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("line", { x1: "0", y1: "120", x2: "960", y2: "120", stroke: "var(--line)" }),
22550
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("path", { d: fillPath, fill: "url(#hz-fill)" }),
22551
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)(
22552
+ "path",
22553
+ {
22554
+ d: linePath,
22555
+ fill: "none",
22556
+ stroke: "var(--accent)",
22557
+ strokeWidth: "1.4",
22558
+ strokeLinecap: "round",
22559
+ style: {
22560
+ filter: "drop-shadow(0 0 5px color-mix(in srgb, var(--accent) 50%, transparent))"
22561
+ }
22562
+ }
22563
+ ),
22564
+ failPoints.map((p) => /* @__PURE__ */ (0, import_jsx_runtime.jsx)(
22565
+ "circle",
22566
+ {
22567
+ cx: p.x,
22568
+ cy: p.y,
22569
+ r: "4",
22570
+ fill: "var(--bg)",
22571
+ stroke: "var(--fail)",
22572
+ strokeWidth: "1.5"
22573
+ },
22574
+ p.id
22575
+ ))
22148
22576
  ] });
22149
22577
  }
22150
- function Stat(props) {
22151
- return /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "stat", children: [
22152
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "muted", children: props.label }),
22153
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "stat-value", children: props.value })
22154
- ] });
22578
+ function TraceTimeline({
22579
+ events
22580
+ }) {
22581
+ return /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "trace", children: events.map((event, i) => {
22582
+ const kind = getEventKind(event.type);
22583
+ const label = getEventLabel(event.type);
22584
+ const title = getEventTitle(event);
22585
+ const body = getEventBody(event);
22586
+ const isLast = i === events.length - 1;
22587
+ return /* @__PURE__ */ (0, import_jsx_runtime.jsxs)(
22588
+ "div",
22589
+ {
22590
+ className: "t-step",
22591
+ style: { animationDelay: `${i * 55}ms` },
22592
+ children: [
22593
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: `t-node${isLast ? " p" : " active"}`, children: i + 1 }),
22594
+ /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "t-head", children: [
22595
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: `t-kind ${kind}`, children: label }),
22596
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: "t-title", children: title })
22597
+ ] }),
22598
+ body ? /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "t-body", children: body }) : null
22599
+ ]
22600
+ },
22601
+ event.eventId
22602
+ );
22603
+ }) });
22155
22604
  }
22156
- function EmptyState(props) {
22157
- return /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("section", { className: "empty", children: [
22158
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("h1", { children: props.title }),
22159
- /* @__PURE__ */ (0, import_jsx_runtime.jsx)("p", { children: props.description })
22605
+ function FailureBlock({ detail }) {
22606
+ const failed = detail.evaluatorResults.filter((e) => e.status === "fail");
22607
+ return /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "failure-panel-block", children: [
22608
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "fp-label", children: "Failure" }),
22609
+ detail.errorDetail ? /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "fp-item", children: detail.errorDetail }) : null,
22610
+ failed.map((e) => /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "fp-item", children: [
22611
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: "fp-x", children: "\u2717" }),
22612
+ " ",
22613
+ e.evaluatorId,
22614
+ ": ",
22615
+ e.message
22616
+ ] }, e.evaluatorId)),
22617
+ !detail.errorDetail && failed.length === 0 ? /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "fp-item", children: "Run did not pass. Inspect the trace for the first divergence." }) : null
22160
22618
  ] });
22161
22619
  }
22162
- function getFailureSummaryItems(detail) {
22163
- const items = [];
22164
- if (detail.errorDetail) {
22165
- items.push(`Error: ${detail.errorDetail}`);
22166
- }
22167
- for (const result of detail.evaluatorResults) {
22168
- if (result.status === "fail") {
22169
- items.push(`Evaluator ${result.evaluatorId}: ${result.message}`);
22170
- }
22171
- }
22172
- if (detail.run.status !== "pass" && items.length === 0) {
22173
- items.push("Run did not pass. Inspect evaluator results and trace for the first divergence.");
22174
- }
22175
- return items;
22176
- }
22177
- function signed(value) {
22178
- return value > 0 ? `+${value}` : `${value}`;
22620
+ function EmptyIdle() {
22621
+ return /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "empty", children: [
22622
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "em-mark", children: "ARL" }),
22623
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "em-title", children: "Awaiting signal" }),
22624
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "em-sub", children: "Run a scenario from the CLI and traces will appear here in real time." }),
22625
+ /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "em-cmd", children: "agentlab run --scenario refund-flow" })
22626
+ ] });
22179
22627
  }
22180
- function getRoute() {
22181
- const url = new URL(window.location.href);
22182
- if (url.pathname.startsWith("/runs/")) {
22183
- return { type: "detail", runId: decodeURIComponent(url.pathname.slice("/runs/".length)) };
22184
- }
22185
- if (url.pathname === "/compare-suite") {
22186
- return {
22187
- type: "compare-suite",
22188
- baselineBatch: url.searchParams.get("baselineBatch") ?? void 0,
22189
- candidateBatch: url.searchParams.get("candidateBatch") ?? void 0
22190
- };
22191
- }
22192
- if (url.pathname === "/compare") {
22193
- return {
22194
- type: "compare",
22195
- baseline: url.searchParams.get("baseline") ?? void 0,
22196
- candidate: url.searchParams.get("candidate") ?? void 0
22197
- };
22198
- }
22199
- return { type: "list" };
22628
+ function summarizeRuns(runs) {
22629
+ return {
22630
+ total: runs.length,
22631
+ pass: runs.filter((r) => r.status === "pass").length,
22632
+ fail: runs.filter((r) => r.status === "fail").length,
22633
+ error: runs.filter((r) => r.status === "error").length,
22634
+ latestSuite: runs[0]?.suite ?? "-",
22635
+ latestProvider: runs[0]?.provider ?? "-"
22636
+ };
22200
22637
  }
22201
22638
 
22202
22639
  // src/ui/client.tsx