agent-regression-lab 0.4.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/ui-assets/client.js
CHANGED
|
@@ -17481,13 +17481,13 @@ var require_react_dom_client_development = __commonJS({
|
|
|
17481
17481
|
function getHoistableRoot(container2) {
|
|
17482
17482
|
return "function" === typeof container2.getRootNode ? container2.getRootNode() : 9 === container2.nodeType ? container2 : container2.ownerDocument;
|
|
17483
17483
|
}
|
|
17484
|
-
function preconnectAs(
|
|
17484
|
+
function preconnectAs(rel2, href, crossOrigin) {
|
|
17485
17485
|
var ownerDocument = globalDocument;
|
|
17486
17486
|
if (ownerDocument && "string" === typeof href && href) {
|
|
17487
17487
|
var limitedEscapedHref = escapeSelectorAttributeValueInsideDoubleQuotes(href);
|
|
17488
|
-
limitedEscapedHref = 'link[rel="' +
|
|
17488
|
+
limitedEscapedHref = 'link[rel="' + rel2 + '"][href="' + limitedEscapedHref + '"]';
|
|
17489
17489
|
"string" === typeof crossOrigin && (limitedEscapedHref += '[crossorigin="' + crossOrigin + '"]');
|
|
17490
|
-
preconnectsSet.has(limitedEscapedHref) || (preconnectsSet.add(limitedEscapedHref),
|
|
17490
|
+
preconnectsSet.has(limitedEscapedHref) || (preconnectsSet.add(limitedEscapedHref), rel2 = { rel: rel2, crossOrigin, href }, null === ownerDocument.querySelector(limitedEscapedHref) && (href = ownerDocument.createElement("link"), setInitialProperties(href, "link", rel2), markNodeAsHoistable(href), ownerDocument.head.appendChild(href)));
|
|
17491
17491
|
}
|
|
17492
17492
|
}
|
|
17493
17493
|
function getResource(type, currentProps, pendingProps, currentResource) {
|
|
@@ -21724,562 +21724,917 @@ var import_client = __toESM(require_client(), 1);
|
|
|
21724
21724
|
// src/ui/App.tsx
|
|
21725
21725
|
var import_react = __toESM(require_react(), 1);
|
|
21726
21726
|
var import_jsx_runtime = __toESM(require_jsx_runtime(), 1);
|
|
21727
|
+
function getRoute() {
|
|
21728
|
+
const url = new URL(window.location.href);
|
|
21729
|
+
if (url.pathname.startsWith("/runs/")) {
|
|
21730
|
+
return { type: "detail", runId: decodeURIComponent(url.pathname.slice("/runs/".length)) };
|
|
21731
|
+
}
|
|
21732
|
+
if (url.pathname === "/compare-suite") {
|
|
21733
|
+
return {
|
|
21734
|
+
type: "compare-suite",
|
|
21735
|
+
baselineBatch: url.searchParams.get("baselineBatch") ?? void 0,
|
|
21736
|
+
candidateBatch: url.searchParams.get("candidateBatch") ?? void 0
|
|
21737
|
+
};
|
|
21738
|
+
}
|
|
21739
|
+
if (url.pathname === "/compare") {
|
|
21740
|
+
return {
|
|
21741
|
+
type: "compare",
|
|
21742
|
+
baseline: url.searchParams.get("baseline") ?? void 0,
|
|
21743
|
+
candidate: url.searchParams.get("candidate") ?? void 0
|
|
21744
|
+
};
|
|
21745
|
+
}
|
|
21746
|
+
return { type: "list" };
|
|
21747
|
+
}
|
|
21748
|
+
function sc(status) {
|
|
21749
|
+
if (status === "pass") return "p";
|
|
21750
|
+
if (status === "fail") return "f";
|
|
21751
|
+
return "w";
|
|
21752
|
+
}
|
|
21753
|
+
function scColor(status) {
|
|
21754
|
+
if (status === "pass") return "var(--pass)";
|
|
21755
|
+
if (status === "fail") return "var(--fail)";
|
|
21756
|
+
return "var(--warn)";
|
|
21757
|
+
}
|
|
21758
|
+
function dur(ms) {
|
|
21759
|
+
if (ms >= 1e4) return `${(ms / 1e3).toFixed(0)}s`;
|
|
21760
|
+
if (ms >= 1e3) return `${(ms / 1e3).toFixed(1)}s`;
|
|
21761
|
+
return `${ms}ms`;
|
|
21762
|
+
}
|
|
21763
|
+
function rel(iso) {
|
|
21764
|
+
const diff = Date.now() - new Date(iso).getTime();
|
|
21765
|
+
const m = Math.floor(diff / 6e4);
|
|
21766
|
+
if (m < 1) return "just now";
|
|
21767
|
+
if (m < 60) return `${m}m ago`;
|
|
21768
|
+
const h = Math.floor(m / 60);
|
|
21769
|
+
if (h < 24) return `${h}h ago`;
|
|
21770
|
+
return `${Math.floor(h / 24)}d ago`;
|
|
21771
|
+
}
|
|
21772
|
+
function avgScore(runs) {
|
|
21773
|
+
if (runs.length === 0) return "\u2014";
|
|
21774
|
+
return (runs.reduce((s, r) => s + r.score, 0) / runs.length).toFixed(2);
|
|
21775
|
+
}
|
|
21776
|
+
function riskClass(risk) {
|
|
21777
|
+
if (risk === "high") return "miss";
|
|
21778
|
+
if (risk === "medium") return "extra";
|
|
21779
|
+
return "match";
|
|
21780
|
+
}
|
|
21781
|
+
function getEventKind(type) {
|
|
21782
|
+
if (type === "tool_call" || type === "tool_result") return "tool";
|
|
21783
|
+
if (type.startsWith("assistant")) return "asst";
|
|
21784
|
+
if (type.startsWith("eval")) return "eval";
|
|
21785
|
+
return "user";
|
|
21786
|
+
}
|
|
21787
|
+
function getEventLabel(type) {
|
|
21788
|
+
const map = {
|
|
21789
|
+
tool_call: "Tool",
|
|
21790
|
+
tool_result: "Result",
|
|
21791
|
+
assistant_turn: "Assistant",
|
|
21792
|
+
user_turn: "User",
|
|
21793
|
+
evaluator_result: "Eval",
|
|
21794
|
+
evaluator_results: "Eval"
|
|
21795
|
+
};
|
|
21796
|
+
return map[type] ?? type.replace(/_/g, " ");
|
|
21797
|
+
}
|
|
21798
|
+
function getEventTitle(event) {
|
|
21799
|
+
const p = event.payload;
|
|
21800
|
+
if (event.type === "tool_call") {
|
|
21801
|
+
return String(p.tool_name ?? p.toolName ?? p.name ?? "tool call");
|
|
21802
|
+
}
|
|
21803
|
+
if (event.type === "tool_result") {
|
|
21804
|
+
return `${String(p.tool_name ?? p.toolName ?? "tool")} \u2192 ${String(p.status ?? "ok")}`;
|
|
21805
|
+
}
|
|
21806
|
+
if (event.type === "assistant_turn") {
|
|
21807
|
+
const content = String(p.content ?? p.text ?? "");
|
|
21808
|
+
return content.slice(0, 80) || "assistant response";
|
|
21809
|
+
}
|
|
21810
|
+
if (event.type === "user_turn") {
|
|
21811
|
+
const content = String(p.content ?? p.text ?? "");
|
|
21812
|
+
return content.slice(0, 80) || "user message";
|
|
21813
|
+
}
|
|
21814
|
+
return event.type.replace(/_/g, " ");
|
|
21815
|
+
}
|
|
21816
|
+
function getEventBody(event) {
|
|
21817
|
+
const p = event.payload;
|
|
21818
|
+
if (Object.keys(p).length === 0) return null;
|
|
21819
|
+
if (event.type === "assistant_turn" || event.type === "user_turn") {
|
|
21820
|
+
const content = String(p.content ?? p.text ?? "");
|
|
21821
|
+
if (content.length > 80) return content;
|
|
21822
|
+
const other = Object.fromEntries(
|
|
21823
|
+
Object.entries(p).filter(([k]) => k !== "content" && k !== "text")
|
|
21824
|
+
);
|
|
21825
|
+
return Object.keys(other).length > 0 ? JSON.stringify(other, null, 2) : null;
|
|
21826
|
+
}
|
|
21827
|
+
return JSON.stringify(p, null, 2);
|
|
21828
|
+
}
|
|
21727
21829
|
function App() {
|
|
21728
21830
|
const route = getRoute();
|
|
21729
21831
|
return /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "shell", children: [
|
|
21730
|
-
/* @__PURE__ */ (0, import_jsx_runtime.jsx)(
|
|
21731
|
-
/* @__PURE__ */ (0, import_jsx_runtime.
|
|
21732
|
-
|
|
21733
|
-
|
|
21734
|
-
|
|
21735
|
-
|
|
21832
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)(Chrome, { route }),
|
|
21833
|
+
route.type === "list" ? /* @__PURE__ */ (0, import_jsx_runtime.jsx)(RunsView, {}) : null,
|
|
21834
|
+
route.type === "detail" ? /* @__PURE__ */ (0, import_jsx_runtime.jsx)(DetailView, { runId: route.runId }) : null,
|
|
21835
|
+
route.type === "compare" ? /* @__PURE__ */ (0, import_jsx_runtime.jsx)(CompareView, { baseline: route.baseline, candidate: route.candidate }) : null,
|
|
21836
|
+
route.type === "compare-suite" ? /* @__PURE__ */ (0, import_jsx_runtime.jsx)(
|
|
21837
|
+
SuiteCompareView,
|
|
21838
|
+
{
|
|
21839
|
+
baselineBatch: route.baselineBatch,
|
|
21840
|
+
candidateBatch: route.candidateBatch
|
|
21841
|
+
}
|
|
21842
|
+
) : null
|
|
21843
|
+
] });
|
|
21844
|
+
}
|
|
21845
|
+
function Chrome({ route }) {
|
|
21846
|
+
return /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("header", { className: "chrome", children: [
|
|
21847
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("a", { className: "brand", href: "/", children: "ARL" }),
|
|
21848
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: "brand-meta", children: "agentlab v0.4.0" }),
|
|
21849
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("nav", { className: "nav", children: [
|
|
21850
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("a", { className: `nv-btn${route.type === "list" ? " on" : ""}`, href: "/", children: "Runs" }),
|
|
21851
|
+
route.type === "detail" ? /* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: "nv-btn on", children: "Detail" }) : null,
|
|
21852
|
+
route.type === "compare" || route.type === "compare-suite" ? /* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: "nv-btn on", children: "Compare" }) : null
|
|
21853
|
+
] }),
|
|
21854
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "live", children: [
|
|
21855
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: "live-dot" }),
|
|
21856
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { children: "LIVE" })
|
|
21736
21857
|
] })
|
|
21737
21858
|
] });
|
|
21738
21859
|
}
|
|
21739
|
-
function
|
|
21860
|
+
function RunsView() {
|
|
21740
21861
|
const [runs, setRuns] = (0, import_react.useState)([]);
|
|
21741
|
-
const [
|
|
21742
|
-
const
|
|
21743
|
-
const [provider, setProvider] = (0, import_react.useState)("");
|
|
21862
|
+
const [filter, setFilter] = (0, import_react.useState)("");
|
|
21863
|
+
const inputRef = (0, import_react.useRef)(null);
|
|
21744
21864
|
(0, import_react.useEffect)(() => {
|
|
21745
|
-
|
|
21746
|
-
|
|
21747
|
-
|
|
21748
|
-
|
|
21749
|
-
|
|
21750
|
-
}, [suite, status, provider]);
|
|
21865
|
+
void fetch("/api/runs").then((r) => r.json()).then((data) => setRuns(Array.isArray(data.runs) ? data.runs : []));
|
|
21866
|
+
}, []);
|
|
21867
|
+
const filtered = filter ? runs.filter(
|
|
21868
|
+
(r) => r.scenarioId.toLowerCase().includes(filter.toLowerCase()) || r.suite.toLowerCase().includes(filter.toLowerCase())
|
|
21869
|
+
) : runs;
|
|
21751
21870
|
const stats = summarizeRuns(runs);
|
|
21752
|
-
return /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("
|
|
21753
|
-
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("
|
|
21754
|
-
/* @__PURE__ */ (0, import_jsx_runtime.
|
|
21755
|
-
|
|
21756
|
-
|
|
21757
|
-
|
|
21758
|
-
|
|
21759
|
-
|
|
21760
|
-
|
|
21761
|
-
|
|
21762
|
-
|
|
21763
|
-
|
|
21764
|
-
|
|
21765
|
-
|
|
21766
|
-
|
|
21767
|
-
|
|
21768
|
-
|
|
21769
|
-
|
|
21770
|
-
|
|
21771
|
-
|
|
21871
|
+
return /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "view-body", children: [
|
|
21872
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("aside", { className: "sidebar", children: [
|
|
21873
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "sb-head", children: [
|
|
21874
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "sb-kicker", children: "Session" }),
|
|
21875
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "sb-title", children: [
|
|
21876
|
+
"Agent ",
|
|
21877
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("em", { children: "Regression" }),
|
|
21878
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("br", {}),
|
|
21879
|
+
"Lab"
|
|
21880
|
+
] }),
|
|
21881
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "tally", children: [
|
|
21882
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "t-cell", children: [
|
|
21883
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: `t-v${stats.pass > 0 ? " p" : ""}`, children: stats.pass }),
|
|
21884
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "t-l", children: "Pass" })
|
|
21885
|
+
] }),
|
|
21886
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "t-cell", children: [
|
|
21887
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: `t-v${stats.fail > 0 ? " f" : ""}`, children: stats.fail }),
|
|
21888
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "t-l", children: "Fail" })
|
|
21889
|
+
] }),
|
|
21890
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "t-cell", children: [
|
|
21891
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: `t-v${stats.error > 0 ? " w" : ""}`, children: stats.error }),
|
|
21892
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "t-l", children: "Error" })
|
|
21893
|
+
] })
|
|
21894
|
+
] })
|
|
21772
21895
|
] }),
|
|
21773
|
-
/* @__PURE__ */ (0, import_jsx_runtime.
|
|
21774
|
-
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("
|
|
21775
|
-
/* @__PURE__ */ (0, import_jsx_runtime.jsx)(
|
|
21776
|
-
|
|
21777
|
-
|
|
21896
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "sb-filter", children: /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "filt", children: [
|
|
21897
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: "filt-i", children: "/" }),
|
|
21898
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)(
|
|
21899
|
+
"input",
|
|
21900
|
+
{
|
|
21901
|
+
ref: inputRef,
|
|
21902
|
+
className: "filt-in",
|
|
21903
|
+
placeholder: "filter scenarios\u2026",
|
|
21904
|
+
value: filter,
|
|
21905
|
+
onChange: (e) => setFilter(e.target.value)
|
|
21906
|
+
}
|
|
21907
|
+
)
|
|
21908
|
+
] }) }),
|
|
21909
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "run-list", children: [
|
|
21910
|
+
filtered.length === 0 && runs.length === 0 ? /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "run-list-empty", children: "No runs yet. Run a scenario from the CLI." }) : null,
|
|
21911
|
+
filtered.map((run, i) => /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("a", { className: `run ${sc(run.status)}`, href: `/runs/${run.id}`, children: [
|
|
21912
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "run-name", children: [
|
|
21913
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: `run-dot ${sc(run.status)}` }),
|
|
21914
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: "run-name-text", children: run.scenarioId })
|
|
21915
|
+
] }),
|
|
21916
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: `run-score ${sc(run.status)}`, children: run.score.toFixed(1) }),
|
|
21917
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "run-meta", children: [
|
|
21918
|
+
run.suite,
|
|
21919
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { style: { color: "var(--ink-lo)", margin: "0 3px" }, children: "\xB7" }),
|
|
21920
|
+
dur(run.durationMs),
|
|
21921
|
+
i > 0 && filtered[i - 1].scenarioId === run.scenarioId ? /* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { style: { marginLeft: "6px" }, children: /* @__PURE__ */ (0, import_jsx_runtime.jsx)(
|
|
21922
|
+
"a",
|
|
21923
|
+
{
|
|
21924
|
+
href: `/compare?baseline=${filtered[i - 1].id}&candidate=${run.id}`,
|
|
21925
|
+
style: { color: "var(--accent)", fontSize: ".65rem" },
|
|
21926
|
+
onClick: (e) => e.stopPropagation(),
|
|
21927
|
+
children: "compare prev"
|
|
21928
|
+
}
|
|
21929
|
+
) }) : null
|
|
21930
|
+
] })
|
|
21931
|
+
] }, run.id))
|
|
21778
21932
|
] })
|
|
21779
21933
|
] }),
|
|
21780
|
-
|
|
21781
|
-
|
|
21782
|
-
|
|
21783
|
-
/* @__PURE__ */ (0, import_jsx_runtime.
|
|
21784
|
-
|
|
21785
|
-
|
|
21786
|
-
|
|
21787
|
-
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("th", { children: "Score" }),
|
|
21788
|
-
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("th", { children: "Runtime" }),
|
|
21789
|
-
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("th", { children: "Steps" }),
|
|
21790
|
-
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("th", { children: "Started" })
|
|
21791
|
-
] }) }),
|
|
21792
|
-
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("tbody", { children: runs.map((run, index) => /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("tr", { children: [
|
|
21793
|
-
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("td", { children: /* @__PURE__ */ (0, import_jsx_runtime.jsx)("a", { href: `/runs/${run.id}`, children: run.id }) }),
|
|
21794
|
-
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("td", { children: run.scenarioId }),
|
|
21795
|
-
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("td", { children: [
|
|
21796
|
-
run.provider ?? "-",
|
|
21797
|
-
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "muted", children: run.modelId ?? run.agentLabel ?? "" })
|
|
21934
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("main", { className: "main", children: [
|
|
21935
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "cmd", children: [
|
|
21936
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: "cmd-p", children: "$" }),
|
|
21937
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("span", { className: "cmd-t", children: [
|
|
21938
|
+
"agentlab ",
|
|
21939
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: "w", children: "list" }),
|
|
21940
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: "cmd-caret" })
|
|
21798
21941
|
] }),
|
|
21799
|
-
/* @__PURE__ */ (0, import_jsx_runtime.
|
|
21800
|
-
|
|
21801
|
-
|
|
21802
|
-
|
|
21803
|
-
|
|
21942
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "cmd-r", children: [
|
|
21943
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("span", { children: [
|
|
21944
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("kbd", { children: "\u21B5" }),
|
|
21945
|
+
"open"
|
|
21946
|
+
] }),
|
|
21947
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("span", { children: [
|
|
21948
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("kbd", { children: "c" }),
|
|
21949
|
+
"compare"
|
|
21950
|
+
] }),
|
|
21951
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("span", { children: [
|
|
21952
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("kbd", { children: "/" }),
|
|
21953
|
+
"filter"
|
|
21954
|
+
] })
|
|
21955
|
+
] })
|
|
21956
|
+
] }),
|
|
21957
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "scroll", children: runs.length === 0 ? /* @__PURE__ */ (0, import_jsx_runtime.jsx)(EmptyIdle, {}) : /* @__PURE__ */ (0, import_jsx_runtime.jsxs)(import_jsx_runtime.Fragment, { children: [
|
|
21958
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "ov-hero", children: [
|
|
21959
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "oh-eyebrow", children: "Session overview" }),
|
|
21960
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "oh-num", children: [
|
|
21961
|
+
stats.total,
|
|
21962
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: "unit", children: "runs" })
|
|
21963
|
+
] }),
|
|
21964
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("p", { className: "oh-sub", children: [
|
|
21965
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("span", { className: "hi", children: [
|
|
21966
|
+
stats.pass,
|
|
21967
|
+
" passed"
|
|
21968
|
+
] }),
|
|
21969
|
+
", ",
|
|
21970
|
+
stats.fail,
|
|
21971
|
+
" failed,",
|
|
21972
|
+
" ",
|
|
21973
|
+
stats.error,
|
|
21974
|
+
" ",
|
|
21975
|
+
stats.error === 1 ? "error" : "errors",
|
|
21976
|
+
". Average score",
|
|
21977
|
+
" ",
|
|
21978
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: "hi", children: avgScore(runs) }),
|
|
21979
|
+
"."
|
|
21980
|
+
] })
|
|
21804
21981
|
] }),
|
|
21805
|
-
/* @__PURE__ */ (0, import_jsx_runtime.
|
|
21806
|
-
|
|
21807
|
-
|
|
21808
|
-
|
|
21809
|
-
|
|
21982
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "chart-wrap", children: [
|
|
21983
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "chart-h", children: [
|
|
21984
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { children: "Score over time" }),
|
|
21985
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("span", { className: "chart-h-r", children: [
|
|
21986
|
+
runs.length,
|
|
21987
|
+
" samples"
|
|
21988
|
+
] })
|
|
21989
|
+
] }),
|
|
21990
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)(ScoreChart, { runs })
|
|
21810
21991
|
] })
|
|
21811
|
-
] }
|
|
21812
|
-
] })
|
|
21992
|
+
] }) })
|
|
21993
|
+
] })
|
|
21813
21994
|
] });
|
|
21814
21995
|
}
|
|
21815
|
-
function
|
|
21996
|
+
function DetailView({ runId }) {
|
|
21816
21997
|
const [detail, setDetail] = (0, import_react.useState)(null);
|
|
21998
|
+
const [siblings, setSiblings] = (0, import_react.useState)([]);
|
|
21817
21999
|
(0, import_react.useEffect)(() => {
|
|
21818
|
-
void fetch(`/api/runs/${
|
|
21819
|
-
|
|
21820
|
-
|
|
21821
|
-
|
|
21822
|
-
|
|
21823
|
-
|
|
21824
|
-
|
|
21825
|
-
|
|
21826
|
-
|
|
21827
|
-
|
|
21828
|
-
|
|
21829
|
-
|
|
21830
|
-
|
|
21831
|
-
|
|
21832
|
-
/* @__PURE__ */ (0, import_jsx_runtime.jsx)(Stat, { label: "Runtime", value: `${detail.run.durationMs}ms` }),
|
|
21833
|
-
/* @__PURE__ */ (0, import_jsx_runtime.jsx)(Stat, { label: "Steps", value: detail.run.totalSteps })
|
|
21834
|
-
] }),
|
|
21835
|
-
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "panel-grid", children: [
|
|
21836
|
-
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("section", { className: "panel", children: [
|
|
21837
|
-
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("h2", { children: "Summary" }),
|
|
21838
|
-
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("p", { children: [
|
|
21839
|
-
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("strong", { children: "Provider:" }),
|
|
21840
|
-
" ",
|
|
21841
|
-
detail.agentVersion?.provider ?? "-"
|
|
21842
|
-
] }),
|
|
21843
|
-
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("p", { children: [
|
|
21844
|
-
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("strong", { children: "Model:" }),
|
|
21845
|
-
" ",
|
|
21846
|
-
detail.agentVersion?.modelId ?? "-"
|
|
21847
|
-
] }),
|
|
21848
|
-
/* @__PURE__ */ (0, import_jsx_runtime.jsx)(RunIdentitySummary, { detail }),
|
|
21849
|
-
detail.agentVersion?.command ? /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("p", { children: [
|
|
21850
|
-
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("strong", { children: "Command:" }),
|
|
21851
|
-
" ",
|
|
21852
|
-
detail.agentVersion.command,
|
|
21853
|
-
" ",
|
|
21854
|
-
(detail.agentVersion.args ?? []).join(" ")
|
|
21855
|
-
] }) : null,
|
|
21856
|
-
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("p", { children: [
|
|
21857
|
-
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("strong", { children: "Termination:" }),
|
|
21858
|
-
" ",
|
|
21859
|
-
detail.run.terminationReason
|
|
22000
|
+
void fetch(`/api/runs/${runId}`).then((r) => r.json()).then((data) => {
|
|
22001
|
+
setDetail(data);
|
|
22002
|
+
return fetch("/api/runs").then((r) => r.json()).then((all) => {
|
|
22003
|
+
const items = Array.isArray(all.runs) ? all.runs : [];
|
|
22004
|
+
setSiblings(items.filter((r) => r.scenarioId === data.run.scenarioId));
|
|
22005
|
+
});
|
|
22006
|
+
});
|
|
22007
|
+
}, [runId]);
|
|
22008
|
+
return /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "view-body", children: [
|
|
22009
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("aside", { className: "sidebar", children: [
|
|
22010
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "sb-head", children: [
|
|
22011
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "sb-kicker", children: [
|
|
22012
|
+
"Run \xB7 ",
|
|
22013
|
+
runId.slice(0, 8)
|
|
21860
22014
|
] }),
|
|
21861
|
-
detail
|
|
21862
|
-
/* @__PURE__ */ (0, import_jsx_runtime.
|
|
21863
|
-
|
|
21864
|
-
|
|
21865
|
-
|
|
21866
|
-
|
|
21867
|
-
|
|
22015
|
+
detail ? /* @__PURE__ */ (0, import_jsx_runtime.jsxs)(import_jsx_runtime.Fragment, { children: [
|
|
22016
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "sb-title", children: [
|
|
22017
|
+
detail.run.scenarioId.split("-")[0],
|
|
22018
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("br", {}),
|
|
22019
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("em", { children: [
|
|
22020
|
+
"\u2014",
|
|
22021
|
+
detail.run.scenarioId.split("-").slice(1).join("-")
|
|
22022
|
+
] })
|
|
22023
|
+
] }),
|
|
22024
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "tally", children: [
|
|
22025
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "t-cell", children: [
|
|
22026
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: `t-v ${sc(detail.run.status)}`, children: detail.run.score.toFixed(1) }),
|
|
22027
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "t-l", children: "Score" })
|
|
22028
|
+
] }),
|
|
22029
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "t-cell", children: [
|
|
22030
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "t-v", children: detail.run.totalSteps }),
|
|
22031
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "t-l", children: "Steps" })
|
|
22032
|
+
] }),
|
|
22033
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "t-cell", children: [
|
|
22034
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "t-v", children: dur(detail.run.durationMs) }),
|
|
22035
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "t-l", children: "Time" })
|
|
22036
|
+
] })
|
|
22037
|
+
] })
|
|
22038
|
+
] }) : /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "sb-title", children: /* @__PURE__ */ (0, import_jsx_runtime.jsx)("em", { children: "Loading\u2026" }) })
|
|
21868
22039
|
] }),
|
|
21869
|
-
/* @__PURE__ */ (0, import_jsx_runtime.
|
|
21870
|
-
|
|
21871
|
-
|
|
21872
|
-
|
|
21873
|
-
|
|
21874
|
-
|
|
21875
|
-
|
|
21876
|
-
|
|
21877
|
-
|
|
21878
|
-
|
|
21879
|
-
|
|
21880
|
-
|
|
21881
|
-
|
|
21882
|
-
|
|
21883
|
-
|
|
21884
|
-
|
|
21885
|
-
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: `pill ${call.status}`, children: call.status }),
|
|
21886
|
-
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("pre", { children: JSON.stringify({ input: call.input, output: call.output }, null, 2) })
|
|
21887
|
-
] }, call.id)) })
|
|
22040
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "run-list", children: siblings.map((run) => /* @__PURE__ */ (0, import_jsx_runtime.jsxs)(
|
|
22041
|
+
"a",
|
|
22042
|
+
{
|
|
22043
|
+
className: `run ${sc(run.status)}${run.id === runId ? " on" : ""}`,
|
|
22044
|
+
href: `/runs/${run.id}`,
|
|
22045
|
+
children: [
|
|
22046
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "run-name", children: [
|
|
22047
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: `run-dot ${sc(run.status)}` }),
|
|
22048
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: "run-name-text", children: run.id.slice(0, 12) })
|
|
22049
|
+
] }),
|
|
22050
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: `run-score ${sc(run.status)}`, children: run.score.toFixed(1) }),
|
|
22051
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "run-meta", children: run.id === runId ? /* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { style: { color: "var(--accent)" }, children: "current" }) : rel(run.startedAt) })
|
|
22052
|
+
]
|
|
22053
|
+
},
|
|
22054
|
+
run.id
|
|
22055
|
+
)) })
|
|
21888
22056
|
] }),
|
|
21889
|
-
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("
|
|
21890
|
-
/* @__PURE__ */ (0, import_jsx_runtime.
|
|
21891
|
-
|
|
21892
|
-
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("
|
|
21893
|
-
|
|
21894
|
-
|
|
21895
|
-
|
|
22057
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("main", { className: "main", children: [
|
|
22058
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "cmd", children: [
|
|
22059
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: "cmd-p", children: "$" }),
|
|
22060
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("span", { className: "cmd-t", children: [
|
|
22061
|
+
"agentlab ",
|
|
22062
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: "w", children: "show" }),
|
|
22063
|
+
" ",
|
|
22064
|
+
runId,
|
|
22065
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: "cmd-caret" })
|
|
22066
|
+
] }),
|
|
22067
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "cmd-r", children: [
|
|
22068
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("span", { children: [
|
|
22069
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("kbd", { children: "\u2190" }),
|
|
22070
|
+
"back"
|
|
21896
22071
|
] }),
|
|
21897
|
-
/* @__PURE__ */ (0, import_jsx_runtime.
|
|
21898
|
-
|
|
22072
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("span", { children: [
|
|
22073
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("kbd", { children: "c" }),
|
|
22074
|
+
"compare"
|
|
22075
|
+
] })
|
|
22076
|
+
] })
|
|
22077
|
+
] }),
|
|
22078
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "scroll", children: !detail ? /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "empty", children: /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "em-title", children: "Loading\u2026" }) }) : /* @__PURE__ */ (0, import_jsx_runtime.jsxs)(import_jsx_runtime.Fragment, { children: [
|
|
22079
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "dh", children: [
|
|
22080
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: `dh-score ${sc(detail.run.status)}`, children: detail.run.score.toFixed(1) }),
|
|
22081
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "dh-meta", children: [
|
|
22082
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "dh-kicker", children: detail.agentVersion?.provider ?? "agent" }),
|
|
22083
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "dh-title", children: detail.run.scenarioId }),
|
|
22084
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "dh-row", children: [
|
|
22085
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("span", { children: [
|
|
22086
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: "d", children: "model" }),
|
|
22087
|
+
" ",
|
|
22088
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: "v", children: detail.agentVersion?.modelId ?? "\u2014" })
|
|
22089
|
+
] }),
|
|
22090
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("span", { children: [
|
|
22091
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: "d", children: "runtime" }),
|
|
22092
|
+
" ",
|
|
22093
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: "v", children: dur(detail.run.durationMs) })
|
|
22094
|
+
] }),
|
|
22095
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("span", { children: [
|
|
22096
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: "d", children: "evals" }),
|
|
22097
|
+
" ",
|
|
22098
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("span", { className: "v", children: [
|
|
22099
|
+
detail.evaluatorResults.filter((e) => e.status === "pass").length,
|
|
22100
|
+
" /",
|
|
22101
|
+
" ",
|
|
22102
|
+
detail.evaluatorResults.length
|
|
22103
|
+
] })
|
|
22104
|
+
] })
|
|
22105
|
+
] })
|
|
22106
|
+
] })
|
|
21899
22107
|
] }),
|
|
21900
|
-
/* @__PURE__ */ (0, import_jsx_runtime.jsx)(
|
|
21901
|
-
|
|
21902
|
-
|
|
21903
|
-
|
|
21904
|
-
|
|
21905
|
-
|
|
21906
|
-
|
|
21907
|
-
|
|
21908
|
-
|
|
21909
|
-
|
|
21910
|
-
|
|
21911
|
-
|
|
21912
|
-
|
|
21913
|
-
|
|
21914
|
-
|
|
21915
|
-
|
|
21916
|
-
|
|
21917
|
-
|
|
21918
|
-
|
|
21919
|
-
|
|
21920
|
-
|
|
21921
|
-
|
|
21922
|
-
|
|
21923
|
-
] });
|
|
21924
|
-
}
|
|
21925
|
-
function RunIdentitySummary(props) {
|
|
21926
|
-
const run = props.detail.run;
|
|
21927
|
-
return /* @__PURE__ */ (0, import_jsx_runtime.jsxs)(import_jsx_runtime.Fragment, { children: [
|
|
21928
|
-
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("p", { children: [
|
|
21929
|
-
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("strong", { children: "Variant set:" }),
|
|
21930
|
-
" ",
|
|
21931
|
-
run.variantSetName ?? "-"
|
|
21932
|
-
] }),
|
|
21933
|
-
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("p", { children: [
|
|
21934
|
-
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("strong", { children: "Variant:" }),
|
|
21935
|
-
" ",
|
|
21936
|
-
run.variantLabel ?? "-"
|
|
21937
|
-
] }),
|
|
21938
|
-
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("p", { children: [
|
|
21939
|
-
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("strong", { children: "Prompt version:" }),
|
|
21940
|
-
" ",
|
|
21941
|
-
run.promptVersion ?? "-"
|
|
21942
|
-
] }),
|
|
21943
|
-
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("p", { children: [
|
|
21944
|
-
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("strong", { children: "Model version:" }),
|
|
21945
|
-
" ",
|
|
21946
|
-
run.modelVersion ?? "-"
|
|
21947
|
-
] }),
|
|
21948
|
-
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("p", { children: [
|
|
21949
|
-
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("strong", { children: "Tool schema version:" }),
|
|
21950
|
-
" ",
|
|
21951
|
-
run.toolSchemaVersion ?? "-"
|
|
21952
|
-
] }),
|
|
21953
|
-
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("p", { children: [
|
|
21954
|
-
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("strong", { children: "Config label:" }),
|
|
21955
|
-
" ",
|
|
21956
|
-
run.configLabel ?? "-"
|
|
21957
|
-
] }),
|
|
21958
|
-
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("p", { children: [
|
|
21959
|
-
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("strong", { children: "Runtime profile:" }),
|
|
21960
|
-
" ",
|
|
21961
|
-
run.runtimeProfileName ?? "-"
|
|
21962
|
-
] }),
|
|
21963
|
-
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("p", { children: [
|
|
21964
|
-
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("strong", { children: "Suite definition:" }),
|
|
21965
|
-
" ",
|
|
21966
|
-
run.suiteDefinitionName ?? "-"
|
|
22108
|
+
detail.run.status !== "pass" ? /* @__PURE__ */ (0, import_jsx_runtime.jsx)(FailureBlock, { detail }) : null,
|
|
22109
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "trace-section", children: [
|
|
22110
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "ts-head", children: [
|
|
22111
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "ts-title", children: [
|
|
22112
|
+
"Trace",
|
|
22113
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)(
|
|
22114
|
+
"span",
|
|
22115
|
+
{
|
|
22116
|
+
className: `ts-badge ts-badge-${detail.run.status === "pass" ? "complete" : "fail"}`,
|
|
22117
|
+
children: detail.run.status === "pass" ? "complete" : "failed"
|
|
22118
|
+
}
|
|
22119
|
+
)
|
|
22120
|
+
] }),
|
|
22121
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "ts-stats", children: [
|
|
22122
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: "v", children: detail.traceEvents.length }),
|
|
22123
|
+
" steps \xB7",
|
|
22124
|
+
" ",
|
|
22125
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: "v", children: dur(detail.run.durationMs) })
|
|
22126
|
+
] })
|
|
22127
|
+
] }),
|
|
22128
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)(TraceTimeline, { events: detail.traceEvents })
|
|
22129
|
+
] })
|
|
22130
|
+
] }) })
|
|
21967
22131
|
] })
|
|
21968
22132
|
] });
|
|
21969
22133
|
}
|
|
21970
|
-
function
|
|
22134
|
+
function CompareView({
|
|
22135
|
+
baseline,
|
|
22136
|
+
candidate
|
|
22137
|
+
}) {
|
|
21971
22138
|
const [data, setData] = (0, import_react.useState)(null);
|
|
21972
22139
|
(0, import_react.useEffect)(() => {
|
|
21973
|
-
if (!
|
|
21974
|
-
setData(null);
|
|
21975
|
-
return;
|
|
21976
|
-
}
|
|
22140
|
+
if (!baseline || !candidate) return;
|
|
21977
22141
|
const url = new URL("/api/compare", window.location.origin);
|
|
21978
|
-
url.searchParams.set("baseline",
|
|
21979
|
-
url.searchParams.set("candidate",
|
|
21980
|
-
void fetch(url).then((
|
|
21981
|
-
}, [
|
|
21982
|
-
if (!
|
|
21983
|
-
return /* @__PURE__ */ (0, import_jsx_runtime.jsx)(
|
|
21984
|
-
|
|
21985
|
-
|
|
21986
|
-
|
|
22142
|
+
url.searchParams.set("baseline", baseline);
|
|
22143
|
+
url.searchParams.set("candidate", candidate);
|
|
22144
|
+
void fetch(url).then((r) => r.json()).then((p) => setData(p));
|
|
22145
|
+
}, [baseline, candidate]);
|
|
22146
|
+
if (!baseline || !candidate) {
|
|
22147
|
+
return /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "view-body", children: /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("main", { className: "main", style: { width: "100%" }, children: [
|
|
22148
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "cmd", children: [
|
|
22149
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: "cmd-p", children: "$" }),
|
|
22150
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("span", { className: "cmd-t", style: { color: "var(--ink-lo)" }, children: [
|
|
22151
|
+
"agentlab compare \u2026",
|
|
22152
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: "cmd-caret" })
|
|
22153
|
+
] })
|
|
22154
|
+
] }),
|
|
22155
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)(EmptyIdle, {})
|
|
22156
|
+
] }) });
|
|
21987
22157
|
}
|
|
21988
|
-
|
|
21989
|
-
|
|
21990
|
-
|
|
21991
|
-
|
|
21992
|
-
|
|
21993
|
-
|
|
21994
|
-
|
|
21995
|
-
|
|
21996
|
-
|
|
21997
|
-
|
|
21998
|
-
|
|
21999
|
-
|
|
22000
|
-
|
|
22001
|
-
|
|
22002
|
-
|
|
22003
|
-
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("ul", { className: "stack", children: data.notes.map((note) => /* @__PURE__ */ (0, import_jsx_runtime.jsx)("li", { children: note }, note)) })
|
|
22004
|
-
] }),
|
|
22005
|
-
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "panel-grid", children: [
|
|
22006
|
-
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("section", { className: "panel", children: [
|
|
22007
|
-
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("h2", { children: "Evaluator diffs" }),
|
|
22008
|
-
data.evaluatorDiffs.length === 0 ? /* @__PURE__ */ (0, import_jsx_runtime.jsx)("p", { className: "muted", children: "No evaluator changes." }) : null,
|
|
22009
|
-
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("ul", { className: "stack diff-list", children: data.evaluatorDiffs.map((diff) => /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("li", { className: "diff-card", children: [
|
|
22010
|
-
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "diff-card-head", children: [
|
|
22011
|
-
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("strong", { children: diff.evaluatorId }),
|
|
22012
|
-
diff.hardGate ? /* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: "event-chip", children: "hard gate" }) : null
|
|
22158
|
+
const tone = data ? data.classification.includes("regress") ? "fail" : data.classification.includes("improv") ? "pass" : "neutral" : "neutral";
|
|
22159
|
+
const toneColor = tone === "fail" ? "var(--fail)" : tone === "pass" ? "var(--pass)" : "var(--ink-mid)";
|
|
22160
|
+
return /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "view-body", children: [
|
|
22161
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("aside", { className: "sidebar", children: [
|
|
22162
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "sb-head", children: [
|
|
22163
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "sb-kicker", style: { color: toneColor }, children: data?.classification ?? "Comparing\u2026" }),
|
|
22164
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "sb-title", style: { color: tone === "neutral" ? "var(--ink)" : toneColor }, children: [
|
|
22165
|
+
data ? (data.deltas.score >= 0 ? "+" : "") + data.deltas.score.toFixed(2) : "\u2014",
|
|
22166
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("br", {}),
|
|
22167
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("em", { style: { color: toneColor }, children: "delta" })
|
|
22168
|
+
] }),
|
|
22169
|
+
data ? /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "tally", children: [
|
|
22170
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "t-cell", children: [
|
|
22171
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "t-v p", children: data.baseline.run.score.toFixed(1) }),
|
|
22172
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "t-l", children: "Base" })
|
|
22013
22173
|
] }),
|
|
22014
|
-
/* @__PURE__ */ (0, import_jsx_runtime.
|
|
22015
|
-
|
|
22174
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "t-cell", children: [
|
|
22175
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: `t-v ${sc(data.candidate.run.status)}`, children: data.candidate.run.score.toFixed(1) }),
|
|
22176
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "t-l", children: "Cand" })
|
|
22177
|
+
] })
|
|
22178
|
+
] }) : null
|
|
22016
22179
|
] }),
|
|
22017
|
-
/* @__PURE__ */ (0, import_jsx_runtime.
|
|
22018
|
-
/* @__PURE__ */ (0, import_jsx_runtime.
|
|
22019
|
-
|
|
22020
|
-
|
|
22021
|
-
|
|
22022
|
-
|
|
22023
|
-
|
|
22180
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "run-list", children: data ? /* @__PURE__ */ (0, import_jsx_runtime.jsxs)(import_jsx_runtime.Fragment, { children: [
|
|
22181
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("a", { className: "run p", href: `/runs/${data.baseline.run.id}`, children: [
|
|
22182
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "run-name", children: [
|
|
22183
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: "run-dot p" }),
|
|
22184
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("span", { className: "run-name-text", children: [
|
|
22185
|
+
data.baseline.run.id.slice(0, 12),
|
|
22186
|
+
" \xB7 base"
|
|
22187
|
+
] })
|
|
22024
22188
|
] }),
|
|
22025
|
-
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "
|
|
22026
|
-
|
|
22027
|
-
|
|
22189
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "run-score p", children: data.baseline.run.score.toFixed(1) }),
|
|
22190
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "run-meta", children: [
|
|
22191
|
+
data.baseline.run.totalSteps,
|
|
22192
|
+
" steps"
|
|
22193
|
+
] })
|
|
22194
|
+
] }),
|
|
22195
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)(
|
|
22196
|
+
"a",
|
|
22197
|
+
{
|
|
22198
|
+
className: `run on ${sc(data.candidate.run.status)}`,
|
|
22199
|
+
href: `/runs/${data.candidate.run.id}`,
|
|
22200
|
+
children: [
|
|
22201
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "run-name", children: [
|
|
22202
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: `run-dot ${sc(data.candidate.run.status)}` }),
|
|
22203
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("span", { className: "run-name-text", children: [
|
|
22204
|
+
data.candidate.run.id.slice(0, 12),
|
|
22205
|
+
" \xB7 cand"
|
|
22206
|
+
] })
|
|
22207
|
+
] }),
|
|
22208
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: `run-score ${sc(data.candidate.run.status)}`, children: data.candidate.run.score.toFixed(1) }),
|
|
22209
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "run-meta", children: [
|
|
22210
|
+
data.candidate.run.totalSteps,
|
|
22211
|
+
" steps"
|
|
22212
|
+
] })
|
|
22213
|
+
]
|
|
22214
|
+
}
|
|
22215
|
+
)
|
|
22216
|
+
] }) : null })
|
|
22028
22217
|
] }),
|
|
22029
|
-
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("
|
|
22030
|
-
/* @__PURE__ */ (0, import_jsx_runtime.
|
|
22031
|
-
|
|
22218
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("main", { className: "main", children: [
|
|
22219
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "cmd", children: [
|
|
22220
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: "cmd-p", children: "$" }),
|
|
22221
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("span", { className: "cmd-t", children: [
|
|
22222
|
+
"agentlab ",
|
|
22223
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: "w", children: "compare" }),
|
|
22224
|
+
" ",
|
|
22225
|
+
baseline.slice(0, 8),
|
|
22226
|
+
" ",
|
|
22227
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: "w", children: "\u2192" }),
|
|
22228
|
+
" ",
|
|
22229
|
+
candidate.slice(0, 8),
|
|
22230
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: "cmd-caret" })
|
|
22231
|
+
] }),
|
|
22232
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "cmd-r", children: /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("span", { children: [
|
|
22233
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("kbd", { children: "\u2190" }),
|
|
22234
|
+
"back"
|
|
22235
|
+
] }) })
|
|
22236
|
+
] }),
|
|
22237
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "scroll", children: !data ? /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "empty", children: /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "em-title", children: "Loading comparison\u2026" }) }) : /* @__PURE__ */ (0, import_jsx_runtime.jsxs)(import_jsx_runtime.Fragment, { children: [
|
|
22238
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "cmp-hero", children: [
|
|
22239
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "cmp-eyebrow", style: { color: toneColor }, children: [
|
|
22240
|
+
tone === "fail" ? "Regression detected" : tone === "pass" ? "Improvement" : data.classification,
|
|
22241
|
+
" ",
|
|
22242
|
+
"\xB7 ",
|
|
22243
|
+
data.baseline.run.scenarioId
|
|
22244
|
+
] }),
|
|
22245
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "cmp-grid", children: [
|
|
22246
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "cmp-side", children: [
|
|
22247
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "cmp-side-label", children: [
|
|
22248
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: "v", children: "baseline" }),
|
|
22249
|
+
" \xB7 ",
|
|
22250
|
+
baseline.slice(0, 6)
|
|
22251
|
+
] }),
|
|
22252
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "cmp-side-score", style: { color: "var(--pass)" }, children: data.baseline.run.score.toFixed(1) })
|
|
22253
|
+
] }),
|
|
22254
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { children: [
|
|
22255
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "cmp-arrow", style: { color: toneColor }, children: "\u2192" }),
|
|
22256
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "cmp-delta", style: { color: toneColor }, children: [
|
|
22257
|
+
data.deltas.score >= 0 ? "+" : "",
|
|
22258
|
+
data.deltas.score.toFixed(2)
|
|
22259
|
+
] })
|
|
22260
|
+
] }),
|
|
22261
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "cmp-side r", children: [
|
|
22262
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "cmp-side-label", children: [
|
|
22263
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: "v", children: "candidate" }),
|
|
22264
|
+
" \xB7 ",
|
|
22265
|
+
candidate.slice(0, 6)
|
|
22266
|
+
] }),
|
|
22267
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)(
|
|
22268
|
+
"div",
|
|
22269
|
+
{
|
|
22270
|
+
className: "cmp-side-score",
|
|
22271
|
+
style: { color: scColor(data.candidate.run.status) },
|
|
22272
|
+
children: data.candidate.run.score.toFixed(1)
|
|
22273
|
+
}
|
|
22274
|
+
)
|
|
22275
|
+
] })
|
|
22276
|
+
] }),
|
|
22277
|
+
data.notes.length > 0 ? /* @__PURE__ */ (0, import_jsx_runtime.jsxs)(import_jsx_runtime.Fragment, { children: [
|
|
22278
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "cmp-verdict", children: data.notes[0] }),
|
|
22279
|
+
data.notes.length > 1 ? /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "cmp-verdict-sub", children: data.notes.slice(1).join(" \xB7 ") }) : null
|
|
22280
|
+
] }) : null
|
|
22281
|
+
] }),
|
|
22282
|
+
data.toolDiffs.length > 0 ? /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "diverge", children: [
|
|
22283
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "dv-h", children: "Tool-call timeline" }),
|
|
22284
|
+
data.toolDiffs.map((diff) => /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "dv-track", children: [
|
|
22285
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "dv-label", children: diff.toolName }),
|
|
22286
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "dv-steps", children: [
|
|
22287
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: `dv-step ${riskClass(diff.risk)}`, children: [
|
|
22288
|
+
"base: ",
|
|
22289
|
+
diff.baselineCount
|
|
22290
|
+
] }),
|
|
22291
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: `dv-step ${riskClass(diff.risk)}`, children: [
|
|
22292
|
+
"cand: ",
|
|
22293
|
+
diff.candidateCount
|
|
22294
|
+
] }),
|
|
22295
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: `dv-step ${riskClass(diff.risk)}`, style: { flex: 4 }, children: diff.note })
|
|
22296
|
+
] })
|
|
22297
|
+
] }, diff.toolName))
|
|
22298
|
+
] }) : null,
|
|
22299
|
+
data.evaluatorDiffs.length > 0 ? /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "diffs", children: [
|
|
22300
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "diffs-h", children: "Evaluator changes" }),
|
|
22301
|
+
data.evaluatorDiffs.map((diff) => /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "diff-row", children: [
|
|
22302
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "df-label", children: diff.evaluatorId }),
|
|
22303
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "df-cell base", children: diff.baselineStatus ?? "\u2014" }),
|
|
22304
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "df-arrow", children: "\u2192" }),
|
|
22305
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "df-cell cand", children: diff.candidateStatus ?? "\u2014" })
|
|
22306
|
+
] }, diff.evaluatorId))
|
|
22307
|
+
] }) : null,
|
|
22308
|
+
data.outputChanged ? /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "diffs", children: [
|
|
22309
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "diffs-h", children: "Output diff" }),
|
|
22310
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "diff-row", children: [
|
|
22311
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "df-label", children: "Final output" }),
|
|
22312
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "df-cell base", children: data.baseline.run.finalOutput || "(none)" }),
|
|
22313
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "df-arrow", children: "\u2192" }),
|
|
22314
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "df-cell cand", children: data.candidate.run.finalOutput || "(none)" })
|
|
22315
|
+
] })
|
|
22316
|
+
] }) : null
|
|
22317
|
+
] }) })
|
|
22032
22318
|
] })
|
|
22033
22319
|
] });
|
|
22034
22320
|
}
|
|
22035
|
-
function
|
|
22036
|
-
|
|
22037
|
-
|
|
22038
|
-
|
|
22039
|
-
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("strong", { children: "Run:" }),
|
|
22040
|
-
" ",
|
|
22041
|
-
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("a", { href: `/runs/${props.detail.run.id}`, children: props.detail.run.id })
|
|
22042
|
-
] }),
|
|
22043
|
-
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("p", { children: [
|
|
22044
|
-
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("strong", { children: "Status:" }),
|
|
22045
|
-
" ",
|
|
22046
|
-
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: `pill ${props.detail.run.status}`, children: props.detail.run.status })
|
|
22047
|
-
] }),
|
|
22048
|
-
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("p", { children: [
|
|
22049
|
-
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("strong", { children: "Score:" }),
|
|
22050
|
-
" ",
|
|
22051
|
-
props.detail.run.score
|
|
22052
|
-
] }),
|
|
22053
|
-
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("p", { children: [
|
|
22054
|
-
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("strong", { children: "Runtime:" }),
|
|
22055
|
-
" ",
|
|
22056
|
-
props.detail.run.durationMs,
|
|
22057
|
-
"ms"
|
|
22058
|
-
] }),
|
|
22059
|
-
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("p", { children: [
|
|
22060
|
-
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("strong", { children: "Termination:" }),
|
|
22061
|
-
" ",
|
|
22062
|
-
props.detail.run.terminationReason
|
|
22063
|
-
] }),
|
|
22064
|
-
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("p", { children: [
|
|
22065
|
-
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("strong", { children: "Agent:" }),
|
|
22066
|
-
" ",
|
|
22067
|
-
props.detail.agentVersion?.label ?? "-"
|
|
22068
|
-
] }),
|
|
22069
|
-
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("p", { children: [
|
|
22070
|
-
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("strong", { children: "Provider:" }),
|
|
22071
|
-
" ",
|
|
22072
|
-
props.detail.agentVersion?.provider ?? "-"
|
|
22073
|
-
] }),
|
|
22074
|
-
props.detail.agentVersion?.modelId ? /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("p", { children: [
|
|
22075
|
-
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("strong", { children: "Model:" }),
|
|
22076
|
-
" ",
|
|
22077
|
-
props.detail.agentVersion.modelId
|
|
22078
|
-
] }) : null,
|
|
22079
|
-
props.detail.agentVersion?.command ? /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("p", { children: [
|
|
22080
|
-
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("strong", { children: "Command:" }),
|
|
22081
|
-
" ",
|
|
22082
|
-
props.detail.agentVersion.command,
|
|
22083
|
-
" ",
|
|
22084
|
-
(props.detail.agentVersion.args ?? []).join(" ")
|
|
22085
|
-
] }) : null,
|
|
22086
|
-
props.detail.errorDetail ? /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("p", { children: [
|
|
22087
|
-
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("strong", { children: "Error:" }),
|
|
22088
|
-
" ",
|
|
22089
|
-
props.detail.errorDetail
|
|
22090
|
-
] }) : null,
|
|
22091
|
-
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("p", { children: /* @__PURE__ */ (0, import_jsx_runtime.jsx)("strong", { children: "Final output:" }) }),
|
|
22092
|
-
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("pre", { children: props.detail.run.finalOutput || "(none)" }),
|
|
22093
|
-
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("h3", { children: "Trace" }),
|
|
22094
|
-
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("ol", { className: "timeline compact", children: props.detail.traceEvents.map((event) => /* @__PURE__ */ (0, import_jsx_runtime.jsx)("li", { className: "timeline-item compact-item", children: /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("strong", { children: [
|
|
22095
|
-
event.stepIndex,
|
|
22096
|
-
". ",
|
|
22097
|
-
formatEventLabel(event.type)
|
|
22098
|
-
] }) }, event.eventId)) })
|
|
22099
|
-
] });
|
|
22100
|
-
}
|
|
22101
|
-
function SuiteComparePage(props) {
|
|
22321
|
+
function SuiteCompareView({
|
|
22322
|
+
baselineBatch,
|
|
22323
|
+
candidateBatch
|
|
22324
|
+
}) {
|
|
22102
22325
|
const [data, setData] = (0, import_react.useState)(null);
|
|
22103
22326
|
(0, import_react.useEffect)(() => {
|
|
22104
|
-
if (!
|
|
22105
|
-
setData(null);
|
|
22106
|
-
return;
|
|
22107
|
-
}
|
|
22327
|
+
if (!baselineBatch || !candidateBatch) return;
|
|
22108
22328
|
const url = new URL("/api/compare-suite", window.location.origin);
|
|
22109
|
-
url.searchParams.set("baselineBatch",
|
|
22110
|
-
url.searchParams.set("candidateBatch",
|
|
22111
|
-
void fetch(url).then((
|
|
22112
|
-
}, [
|
|
22113
|
-
if (!
|
|
22114
|
-
return /* @__PURE__ */ (0, import_jsx_runtime.jsx)(
|
|
22115
|
-
|
|
22116
|
-
|
|
22117
|
-
|
|
22329
|
+
url.searchParams.set("baselineBatch", baselineBatch);
|
|
22330
|
+
url.searchParams.set("candidateBatch", candidateBatch);
|
|
22331
|
+
void fetch(url).then((r) => r.json()).then((p) => setData(p));
|
|
22332
|
+
}, [baselineBatch, candidateBatch]);
|
|
22333
|
+
if (!baselineBatch || !candidateBatch) {
|
|
22334
|
+
return /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "view-body", children: /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("main", { className: "main", style: { width: "100%" }, children: [
|
|
22335
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "cmd", children: [
|
|
22336
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: "cmd-p", children: "$" }),
|
|
22337
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("span", { className: "cmd-t", style: { color: "var(--ink-lo)" }, children: [
|
|
22338
|
+
"agentlab compare --suite \u2026",
|
|
22339
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: "cmd-caret" })
|
|
22340
|
+
] })
|
|
22341
|
+
] }),
|
|
22342
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)(EmptyIdle, {})
|
|
22343
|
+
] }) });
|
|
22118
22344
|
}
|
|
22119
|
-
return /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("
|
|
22120
|
-
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("
|
|
22121
|
-
/* @__PURE__ */ (0, import_jsx_runtime.
|
|
22122
|
-
|
|
22123
|
-
|
|
22124
|
-
|
|
22125
|
-
|
|
22126
|
-
|
|
22127
|
-
|
|
22128
|
-
|
|
22129
|
-
|
|
22130
|
-
|
|
22131
|
-
|
|
22132
|
-
|
|
22133
|
-
|
|
22134
|
-
|
|
22135
|
-
|
|
22136
|
-
|
|
22137
|
-
|
|
22138
|
-
|
|
22139
|
-
|
|
22140
|
-
|
|
22345
|
+
return /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "view-body", children: [
|
|
22346
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("aside", { className: "sidebar", children: [
|
|
22347
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "sb-head", children: [
|
|
22348
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "sb-kicker", children: "Suite Compare" }),
|
|
22349
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "sb-title", children: [
|
|
22350
|
+
data?.suite ?? "Loading\u2026",
|
|
22351
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("br", {}),
|
|
22352
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("em", { children: data?.classification ?? "" })
|
|
22353
|
+
] }),
|
|
22354
|
+
data ? /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "tally", children: [
|
|
22355
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "t-cell", children: [
|
|
22356
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: `t-v${data.regressions.length > 0 ? " f" : ""}`, children: data.regressions.length }),
|
|
22357
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "t-l", children: "Regress" })
|
|
22358
|
+
] }),
|
|
22359
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "t-cell", children: [
|
|
22360
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: `t-v${data.improvements.length > 0 ? " p" : ""}`, children: data.improvements.length }),
|
|
22361
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "t-l", children: "Improve" })
|
|
22362
|
+
] }),
|
|
22363
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "t-cell", children: [
|
|
22364
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "t-v", children: data.unchanged.length }),
|
|
22365
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "t-l", children: "Same" })
|
|
22366
|
+
] })
|
|
22367
|
+
] }) : null
|
|
22368
|
+
] }),
|
|
22369
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "run-list", children: data ? [
|
|
22370
|
+
...data.regressions.map((item) => ({
|
|
22371
|
+
...item,
|
|
22372
|
+
kind: "fail"
|
|
22373
|
+
})),
|
|
22374
|
+
...data.improvements.map((item) => ({
|
|
22375
|
+
...item,
|
|
22376
|
+
kind: "pass"
|
|
22377
|
+
}))
|
|
22378
|
+
].map((item) => /* @__PURE__ */ (0, import_jsx_runtime.jsxs)(
|
|
22379
|
+
"a",
|
|
22380
|
+
{
|
|
22381
|
+
className: `run ${item.kind}`,
|
|
22382
|
+
href: `/compare?baseline=${item.comparison.baseline.run.id}&candidate=${item.comparison.candidate.run.id}`,
|
|
22383
|
+
children: [
|
|
22384
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "run-name", children: [
|
|
22385
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: `run-dot ${item.kind}` }),
|
|
22386
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: "run-name-text", children: item.scenarioId })
|
|
22387
|
+
] }),
|
|
22388
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: `run-score ${item.kind}`, children: item.comparison.candidate.run.score.toFixed(1) }),
|
|
22389
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "run-meta", children: item.comparison.classification })
|
|
22390
|
+
]
|
|
22391
|
+
},
|
|
22392
|
+
item.scenarioId
|
|
22393
|
+
)) : null })
|
|
22141
22394
|
] }),
|
|
22142
|
-
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("
|
|
22143
|
-
/* @__PURE__ */ (0, import_jsx_runtime.
|
|
22144
|
-
|
|
22145
|
-
/* @__PURE__ */ (0, import_jsx_runtime.
|
|
22146
|
-
|
|
22147
|
-
|
|
22395
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("main", { className: "main", children: [
|
|
22396
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "cmd", children: [
|
|
22397
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: "cmd-p", children: "$" }),
|
|
22398
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("span", { className: "cmd-t", children: [
|
|
22399
|
+
"agentlab ",
|
|
22400
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: "w", children: "compare" }),
|
|
22401
|
+
" --suite ",
|
|
22402
|
+
baselineBatch.slice(0, 8),
|
|
22403
|
+
" ",
|
|
22404
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: "w", children: "\u2192" }),
|
|
22405
|
+
" ",
|
|
22406
|
+
candidateBatch.slice(0, 8),
|
|
22407
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: "cmd-caret" })
|
|
22408
|
+
] }),
|
|
22409
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "cmd-r", children: /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("span", { children: [
|
|
22410
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("kbd", { children: "\u2190" }),
|
|
22411
|
+
"back"
|
|
22412
|
+
] }) })
|
|
22148
22413
|
] }),
|
|
22149
|
-
/* @__PURE__ */ (0, import_jsx_runtime.
|
|
22150
|
-
/* @__PURE__ */ (0, import_jsx_runtime.
|
|
22151
|
-
|
|
22152
|
-
|
|
22153
|
-
|
|
22414
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "scroll", children: !data ? /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "empty", children: /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "em-title", children: "Loading suite comparison\u2026" }) }) : /* @__PURE__ */ (0, import_jsx_runtime.jsxs)(import_jsx_runtime.Fragment, { children: [
|
|
22415
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "ov-hero", children: [
|
|
22416
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "oh-eyebrow", children: [
|
|
22417
|
+
"Suite \xB7 ",
|
|
22418
|
+
data.suite
|
|
22419
|
+
] }),
|
|
22420
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "oh-num", children: [
|
|
22421
|
+
data.regressions.length + data.improvements.length + data.unchanged.length,
|
|
22422
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: "unit", children: "scenarios" })
|
|
22423
|
+
] }),
|
|
22424
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("p", { className: "oh-sub", children: [
|
|
22425
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("span", { className: "hi", children: [
|
|
22426
|
+
data.improvements.length,
|
|
22427
|
+
" improved"
|
|
22428
|
+
] }),
|
|
22429
|
+
",",
|
|
22430
|
+
" ",
|
|
22431
|
+
data.regressions.length,
|
|
22432
|
+
" regressed, ",
|
|
22433
|
+
data.unchanged.length,
|
|
22434
|
+
" unchanged. Score delta",
|
|
22435
|
+
" ",
|
|
22436
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("span", { className: "hi", children: [
|
|
22437
|
+
data.deltas.averageScore >= 0 ? "+" : "",
|
|
22438
|
+
data.deltas.averageScore.toFixed(2)
|
|
22439
|
+
] }),
|
|
22440
|
+
"."
|
|
22441
|
+
] })
|
|
22442
|
+
] }),
|
|
22443
|
+
data.regressions.length > 0 ? /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "diffs", children: [
|
|
22444
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "diffs-h", children: "Regressions" }),
|
|
22445
|
+
data.regressions.map((item) => /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "diff-row", children: [
|
|
22446
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "df-label", children: item.scenarioId }),
|
|
22447
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "df-cell base", children: item.comparison.baseline.run.score.toFixed(1) }),
|
|
22448
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "df-arrow", children: "\u2192" }),
|
|
22449
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "df-cell cand", children: /* @__PURE__ */ (0, import_jsx_runtime.jsxs)(
|
|
22450
|
+
"a",
|
|
22451
|
+
{
|
|
22452
|
+
href: `/compare?baseline=${item.comparison.baseline.run.id}&candidate=${item.comparison.candidate.run.id}`,
|
|
22453
|
+
style: { color: "var(--fail)" },
|
|
22454
|
+
children: [
|
|
22455
|
+
item.comparison.candidate.run.score.toFixed(1),
|
|
22456
|
+
" \u2014 view"
|
|
22457
|
+
]
|
|
22458
|
+
}
|
|
22459
|
+
) })
|
|
22460
|
+
] }, item.scenarioId))
|
|
22461
|
+
] }) : null,
|
|
22462
|
+
data.improvements.length > 0 ? /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "diffs", children: [
|
|
22463
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "diffs-h", children: "Improvements" }),
|
|
22464
|
+
data.improvements.map((item) => /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "diff-row", children: [
|
|
22465
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "df-label", children: item.scenarioId }),
|
|
22466
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "df-cell base", children: item.comparison.baseline.run.score.toFixed(1) }),
|
|
22467
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "df-arrow", children: "\u2192" }),
|
|
22468
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)(
|
|
22469
|
+
"div",
|
|
22470
|
+
{
|
|
22471
|
+
className: "df-cell",
|
|
22472
|
+
style: {
|
|
22473
|
+
background: "color-mix(in srgb, var(--pass) 8%, transparent)",
|
|
22474
|
+
border: "1px solid color-mix(in srgb, var(--pass) 25%, transparent)"
|
|
22475
|
+
},
|
|
22476
|
+
children: /* @__PURE__ */ (0, import_jsx_runtime.jsxs)(
|
|
22477
|
+
"a",
|
|
22478
|
+
{
|
|
22479
|
+
href: `/compare?baseline=${item.comparison.baseline.run.id}&candidate=${item.comparison.candidate.run.id}`,
|
|
22480
|
+
style: { color: "var(--pass)" },
|
|
22481
|
+
children: [
|
|
22482
|
+
item.comparison.candidate.run.score.toFixed(1),
|
|
22483
|
+
" \u2014 view"
|
|
22484
|
+
]
|
|
22485
|
+
}
|
|
22486
|
+
)
|
|
22487
|
+
}
|
|
22488
|
+
)
|
|
22489
|
+
] }, item.scenarioId))
|
|
22490
|
+
] }) : null,
|
|
22491
|
+
data.missingFromCandidate.length > 0 || data.missingFromBaseline.length > 0 ? /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "diffs", children: [
|
|
22492
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "diffs-h", children: "Missing scenarios" }),
|
|
22493
|
+
data.missingFromCandidate.length > 0 ? /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "diff-row", children: [
|
|
22494
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "df-label", children: "From candidate" }),
|
|
22495
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)(
|
|
22496
|
+
"div",
|
|
22497
|
+
{
|
|
22498
|
+
className: "df-cell cand",
|
|
22499
|
+
style: { gridColumn: "2 / 5" },
|
|
22500
|
+
children: data.missingFromCandidate.join(", ")
|
|
22501
|
+
}
|
|
22502
|
+
)
|
|
22503
|
+
] }) : null,
|
|
22504
|
+
data.missingFromBaseline.length > 0 ? /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "diff-row", children: [
|
|
22505
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "df-label", children: "From baseline" }),
|
|
22506
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)(
|
|
22507
|
+
"div",
|
|
22508
|
+
{
|
|
22509
|
+
className: "df-cell base",
|
|
22510
|
+
style: { gridColumn: "2 / 5" },
|
|
22511
|
+
children: data.missingFromBaseline.join(", ")
|
|
22512
|
+
}
|
|
22513
|
+
)
|
|
22514
|
+
] }) : null
|
|
22515
|
+
] }) : null
|
|
22516
|
+
] }) })
|
|
22154
22517
|
] })
|
|
22155
22518
|
] });
|
|
22156
22519
|
}
|
|
22157
|
-
function
|
|
22158
|
-
|
|
22159
|
-
|
|
22160
|
-
|
|
22161
|
-
|
|
22162
|
-
|
|
22163
|
-
|
|
22164
|
-
|
|
22165
|
-
|
|
22166
|
-
|
|
22167
|
-
|
|
22168
|
-
|
|
22169
|
-
|
|
22170
|
-
|
|
22171
|
-
|
|
22172
|
-
|
|
22173
|
-
|
|
22174
|
-
|
|
22520
|
+
function ScoreChart({ runs }) {
|
|
22521
|
+
const sorted = [...runs].sort(
|
|
22522
|
+
(a, b) => new Date(a.startedAt).getTime() - new Date(b.startedAt).getTime()
|
|
22523
|
+
);
|
|
22524
|
+
if (sorted.length < 2) return /* @__PURE__ */ (0, import_jsx_runtime.jsx)(import_jsx_runtime.Fragment, {});
|
|
22525
|
+
const points = sorted.map((run, i) => ({
|
|
22526
|
+
x: 30 + i / (sorted.length - 1) * 900,
|
|
22527
|
+
y: 20 + (1 - run.score) * 100,
|
|
22528
|
+
status: run.status,
|
|
22529
|
+
id: run.id
|
|
22530
|
+
}));
|
|
22531
|
+
let linePath = `M${points[0].x},${points[0].y}`;
|
|
22532
|
+
for (let i = 1; i < points.length; i++) {
|
|
22533
|
+
const p = points[i - 1];
|
|
22534
|
+
const c = points[i];
|
|
22535
|
+
const cpx = (p.x + c.x) / 2;
|
|
22536
|
+
linePath += ` C${cpx},${p.y} ${cpx},${c.y} ${c.x},${c.y}`;
|
|
22537
|
+
}
|
|
22538
|
+
const last = points[points.length - 1];
|
|
22539
|
+
const first = points[0];
|
|
22540
|
+
const fillPath = `${linePath} L${last.x},140 L${first.x},140 Z`;
|
|
22541
|
+
const failPoints = points.filter((p) => p.status !== "pass");
|
|
22542
|
+
return /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("svg", { viewBox: "0 0 960 140", style: { width: "100%", display: "block" }, children: [
|
|
22543
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("defs", { children: /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("linearGradient", { id: "hz-fill", x1: "0", y1: "0", x2: "0", y2: "1", children: [
|
|
22544
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("stop", { offset: "0%", stopColor: "var(--accent)", stopOpacity: ".18" }),
|
|
22545
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("stop", { offset: "100%", stopColor: "var(--accent)", stopOpacity: "0" })
|
|
22546
|
+
] }) }),
|
|
22547
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("line", { x1: "0", y1: "20", x2: "960", y2: "20", stroke: "var(--line)", strokeDasharray: "2 4" }),
|
|
22548
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("line", { x1: "0", y1: "70", x2: "960", y2: "70", stroke: "var(--line)", strokeDasharray: "2 4" }),
|
|
22549
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("line", { x1: "0", y1: "120", x2: "960", y2: "120", stroke: "var(--line)" }),
|
|
22550
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("path", { d: fillPath, fill: "url(#hz-fill)" }),
|
|
22551
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)(
|
|
22552
|
+
"path",
|
|
22553
|
+
{
|
|
22554
|
+
d: linePath,
|
|
22555
|
+
fill: "none",
|
|
22556
|
+
stroke: "var(--accent)",
|
|
22557
|
+
strokeWidth: "1.4",
|
|
22558
|
+
strokeLinecap: "round",
|
|
22559
|
+
style: {
|
|
22560
|
+
filter: "drop-shadow(0 0 5px color-mix(in srgb, var(--accent) 50%, transparent))"
|
|
22561
|
+
}
|
|
22562
|
+
}
|
|
22563
|
+
),
|
|
22564
|
+
failPoints.map((p) => /* @__PURE__ */ (0, import_jsx_runtime.jsx)(
|
|
22565
|
+
"circle",
|
|
22566
|
+
{
|
|
22567
|
+
cx: p.x,
|
|
22568
|
+
cy: p.y,
|
|
22569
|
+
r: "4",
|
|
22570
|
+
fill: "var(--bg)",
|
|
22571
|
+
stroke: "var(--fail)",
|
|
22572
|
+
strokeWidth: "1.5"
|
|
22573
|
+
},
|
|
22574
|
+
p.id
|
|
22575
|
+
))
|
|
22175
22576
|
] });
|
|
22176
22577
|
}
|
|
22177
|
-
function
|
|
22178
|
-
|
|
22179
|
-
|
|
22180
|
-
|
|
22181
|
-
|
|
22578
|
+
function TraceTimeline({
|
|
22579
|
+
events
|
|
22580
|
+
}) {
|
|
22581
|
+
return /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "trace", children: events.map((event, i) => {
|
|
22582
|
+
const kind = getEventKind(event.type);
|
|
22583
|
+
const label = getEventLabel(event.type);
|
|
22584
|
+
const title = getEventTitle(event);
|
|
22585
|
+
const body = getEventBody(event);
|
|
22586
|
+
const isLast = i === events.length - 1;
|
|
22587
|
+
return /* @__PURE__ */ (0, import_jsx_runtime.jsxs)(
|
|
22588
|
+
"div",
|
|
22589
|
+
{
|
|
22590
|
+
className: "t-step",
|
|
22591
|
+
style: { animationDelay: `${i * 55}ms` },
|
|
22592
|
+
children: [
|
|
22593
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: `t-node${isLast ? " p" : " active"}`, children: i + 1 }),
|
|
22594
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "t-head", children: [
|
|
22595
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: `t-kind ${kind}`, children: label }),
|
|
22596
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: "t-title", children: title })
|
|
22597
|
+
] }),
|
|
22598
|
+
body ? /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "t-body", children: body }) : null
|
|
22599
|
+
]
|
|
22600
|
+
},
|
|
22601
|
+
event.eventId
|
|
22602
|
+
);
|
|
22603
|
+
}) });
|
|
22182
22604
|
}
|
|
22183
|
-
function
|
|
22184
|
-
const
|
|
22185
|
-
return /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("
|
|
22186
|
-
/* @__PURE__ */ (0, import_jsx_runtime.
|
|
22187
|
-
|
|
22188
|
-
|
|
22189
|
-
|
|
22190
|
-
|
|
22191
|
-
|
|
22192
|
-
|
|
22193
|
-
|
|
22194
|
-
] })
|
|
22605
|
+
function FailureBlock({ detail }) {
|
|
22606
|
+
const failed = detail.evaluatorResults.filter((e) => e.status === "fail");
|
|
22607
|
+
return /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "failure-panel-block", children: [
|
|
22608
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "fp-label", children: "Failure" }),
|
|
22609
|
+
detail.errorDetail ? /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "fp-item", children: detail.errorDetail }) : null,
|
|
22610
|
+
failed.map((e) => /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "fp-item", children: [
|
|
22611
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("span", { className: "fp-x", children: "\u2717" }),
|
|
22612
|
+
" ",
|
|
22613
|
+
e.evaluatorId,
|
|
22614
|
+
": ",
|
|
22615
|
+
e.message
|
|
22616
|
+
] }, e.evaluatorId)),
|
|
22617
|
+
!detail.errorDetail && failed.length === 0 ? /* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "fp-item", children: "Run did not pass. Inspect the trace for the first divergence." }) : null
|
|
22195
22618
|
] });
|
|
22196
22619
|
}
|
|
22197
|
-
function
|
|
22198
|
-
return /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("
|
|
22199
|
-
/* @__PURE__ */ (0, import_jsx_runtime.
|
|
22200
|
-
|
|
22201
|
-
|
|
22202
|
-
|
|
22203
|
-
/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "stats compact-stats", children: [
|
|
22204
|
-
/* @__PURE__ */ (0, import_jsx_runtime.jsx)(Stat, { label: "Regressions", value: props.data.regressions.length }),
|
|
22205
|
-
/* @__PURE__ */ (0, import_jsx_runtime.jsx)(Stat, { label: "Improvements", value: props.data.improvements.length }),
|
|
22206
|
-
/* @__PURE__ */ (0, import_jsx_runtime.jsx)(Stat, { label: "Unchanged", value: props.data.unchanged.length })
|
|
22207
|
-
] })
|
|
22620
|
+
function EmptyIdle() {
|
|
22621
|
+
return /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", { className: "empty", children: [
|
|
22622
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "em-mark", children: "ARL" }),
|
|
22623
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "em-title", children: "Awaiting signal" }),
|
|
22624
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "em-sub", children: "Run a scenario from the CLI and traces will appear here in real time." }),
|
|
22625
|
+
/* @__PURE__ */ (0, import_jsx_runtime.jsx)("div", { className: "em-cmd", children: "agentlab run --scenario refund-flow" })
|
|
22208
22626
|
] });
|
|
22209
22627
|
}
|
|
22210
|
-
function getFailureSummaryItems(detail) {
|
|
22211
|
-
const items = [];
|
|
22212
|
-
if (detail.errorDetail) {
|
|
22213
|
-
items.push(`Error: ${detail.errorDetail}`);
|
|
22214
|
-
}
|
|
22215
|
-
for (const result of detail.evaluatorResults) {
|
|
22216
|
-
if (result.status === "fail") {
|
|
22217
|
-
items.push(`Evaluator ${result.evaluatorId}: ${result.message}`);
|
|
22218
|
-
}
|
|
22219
|
-
}
|
|
22220
|
-
if (detail.run.status !== "pass" && items.length === 0) {
|
|
22221
|
-
items.push("Run did not pass. Inspect evaluator results and trace for the first divergence.");
|
|
22222
|
-
}
|
|
22223
|
-
return items;
|
|
22224
|
-
}
|
|
22225
22628
|
function summarizeRuns(runs) {
|
|
22226
22629
|
return {
|
|
22227
22630
|
total: runs.length,
|
|
22228
|
-
pass: runs.filter((
|
|
22229
|
-
fail: runs.filter((
|
|
22230
|
-
error: runs.filter((
|
|
22631
|
+
pass: runs.filter((r) => r.status === "pass").length,
|
|
22632
|
+
fail: runs.filter((r) => r.status === "fail").length,
|
|
22633
|
+
error: runs.filter((r) => r.status === "error").length,
|
|
22231
22634
|
latestSuite: runs[0]?.suite ?? "-",
|
|
22232
22635
|
latestProvider: runs[0]?.provider ?? "-"
|
|
22233
22636
|
};
|
|
22234
22637
|
}
|
|
22235
|
-
function formatEventLabel(type) {
|
|
22236
|
-
return type.replaceAll("_", " ");
|
|
22237
|
-
}
|
|
22238
|
-
function mapRiskToPill(risk) {
|
|
22239
|
-
if (risk === "high") {
|
|
22240
|
-
return "fail";
|
|
22241
|
-
}
|
|
22242
|
-
if (risk === "medium") {
|
|
22243
|
-
return "error";
|
|
22244
|
-
}
|
|
22245
|
-
return "pass";
|
|
22246
|
-
}
|
|
22247
|
-
function mapClassificationToTone(classification) {
|
|
22248
|
-
if (classification.includes("regress")) {
|
|
22249
|
-
return "fail";
|
|
22250
|
-
}
|
|
22251
|
-
if (classification.includes("improv")) {
|
|
22252
|
-
return "pass";
|
|
22253
|
-
}
|
|
22254
|
-
if (classification.includes("changed")) {
|
|
22255
|
-
return "error";
|
|
22256
|
-
}
|
|
22257
|
-
return "neutral";
|
|
22258
|
-
}
|
|
22259
|
-
function signed(value) {
|
|
22260
|
-
return value > 0 ? `+${value}` : `${value}`;
|
|
22261
|
-
}
|
|
22262
|
-
function getRoute() {
|
|
22263
|
-
const url = new URL(window.location.href);
|
|
22264
|
-
if (url.pathname.startsWith("/runs/")) {
|
|
22265
|
-
return { type: "detail", runId: decodeURIComponent(url.pathname.slice("/runs/".length)) };
|
|
22266
|
-
}
|
|
22267
|
-
if (url.pathname === "/compare-suite") {
|
|
22268
|
-
return {
|
|
22269
|
-
type: "compare-suite",
|
|
22270
|
-
baselineBatch: url.searchParams.get("baselineBatch") ?? void 0,
|
|
22271
|
-
candidateBatch: url.searchParams.get("candidateBatch") ?? void 0
|
|
22272
|
-
};
|
|
22273
|
-
}
|
|
22274
|
-
if (url.pathname === "/compare") {
|
|
22275
|
-
return {
|
|
22276
|
-
type: "compare",
|
|
22277
|
-
baseline: url.searchParams.get("baseline") ?? void 0,
|
|
22278
|
-
candidate: url.searchParams.get("candidate") ?? void 0
|
|
22279
|
-
};
|
|
22280
|
-
}
|
|
22281
|
-
return { type: "list" };
|
|
22282
|
-
}
|
|
22283
22638
|
|
|
22284
22639
|
// src/ui/client.tsx
|
|
22285
22640
|
var import_jsx_runtime2 = __toESM(require_jsx_runtime(), 1);
|