@railtownai/railtracks-visualizer 0.0.57 → 0.0.59
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/index.js +208 -55
- package/dist/esm/index.js +208 -55
- package/dist/types/agenthub/components/AggregateResultsTable.d.ts +6 -1
- package/dist/types/agenthub/pages/evaluations.types.d.ts +4 -1
- package/dist/types/agenthub/pages/evaluator-result-page.d.ts +6 -1
- package/dist/types/agenthub/pages/evaluator-result.d.ts +5 -0
- package/dist/types/agenthub/pages/session-details.d.ts +6 -0
- package/dist/types/agenthub/utils/evaluatorResultFromDto.d.ts +1 -1
- package/dist/types/agenthub/utils/llmInferenceAggregateTree.d.ts +9 -2
- package/dist/types/agenthub/utils/toolUseAggregateTree.d.ts +18 -1
- package/dist/types/dto/Evaluation.d.ts +7 -1
- package/package.json +1 -1
package/dist/cjs/index.js
CHANGED
|
@@ -113599,7 +113599,7 @@ const RunContent = ({ run, isDarkMode, showTimeline })=>{
|
|
|
113599
113599
|
height: "100%"
|
|
113600
113600
|
})));
|
|
113601
113601
|
};
|
|
113602
|
-
const SessionDetails = ({ session, open, onClose })=>{
|
|
113602
|
+
const SessionDetails = ({ session, open, onClose, initialNodeId, initialRunId, loading = false })=>{
|
|
113603
113603
|
const { isDarkMode, theme } = useTheme$1();
|
|
113604
113604
|
const [isFullScreen, setIsFullScreen] = React.useState(false);
|
|
113605
113605
|
const [selectedNodeKey, setSelectedNodeKey] = React.useState(null);
|
|
@@ -113652,8 +113652,7 @@ const SessionDetails = ({ session, open, onClose })=>{
|
|
|
113652
113652
|
}, [
|
|
113653
113653
|
open
|
|
113654
113654
|
]);
|
|
113655
|
-
// Default to first root node when session loads
|
|
113656
|
-
// Combined with session_id reset to avoid effect ordering overwriting the selection.
|
|
113655
|
+
// Default to first root node when session loads, or use initialNodeId/initialRunId when provided.
|
|
113657
113656
|
const hasAutoSelectedRef = React.useRef(false);
|
|
113658
113657
|
React.useEffect(()=>{
|
|
113659
113658
|
if (!open) {
|
|
@@ -113662,6 +113661,19 @@ const SessionDetails = ({ session, open, onClose })=>{
|
|
|
113662
113661
|
}
|
|
113663
113662
|
if (!session?.runs?.length) return;
|
|
113664
113663
|
if (hasAutoSelectedRef.current && selectedNodeInfo !== null) return;
|
|
113664
|
+
// Prefer initial node when provided (e.g. from evaluation link)
|
|
113665
|
+
if (initialNodeId) {
|
|
113666
|
+
const run = initialRunId ? session.runs.find((r)=>r.run_id === initialRunId) : session.runs.find((r)=>r.nodes?.some((n)=>n.identifier === initialNodeId));
|
|
113667
|
+
if (run?.run_id && getNodeFromRun(initialNodeId, run)) {
|
|
113668
|
+
handleSelectNode({
|
|
113669
|
+
nodeId: initialNodeId,
|
|
113670
|
+
runId: run.run_id
|
|
113671
|
+
});
|
|
113672
|
+
hasAutoSelectedRef.current = true;
|
|
113673
|
+
return;
|
|
113674
|
+
}
|
|
113675
|
+
}
|
|
113676
|
+
// Fallback: first root node
|
|
113665
113677
|
for (const run of session.runs){
|
|
113666
113678
|
const firstRoot = getFirstRootNodeFromRun(run);
|
|
113667
113679
|
if (firstRoot && run.run_id) {
|
|
@@ -113678,7 +113690,9 @@ const SessionDetails = ({ session, open, onClose })=>{
|
|
|
113678
113690
|
}, [
|
|
113679
113691
|
open,
|
|
113680
113692
|
session,
|
|
113681
|
-
selectedNodeInfo
|
|
113693
|
+
selectedNodeInfo,
|
|
113694
|
+
initialNodeId,
|
|
113695
|
+
initialRunId
|
|
113682
113696
|
]);
|
|
113683
113697
|
// Reset hasAutoSelectedRef when session changes so auto-select runs for the new session
|
|
113684
113698
|
React.useEffect(()=>{
|
|
@@ -113691,7 +113705,13 @@ const SessionDetails = ({ session, open, onClose })=>{
|
|
|
113691
113705
|
// Get the active run for full screen display (use selected run or first run)
|
|
113692
113706
|
const activeRun = session?.runs.find((r)=>r.run_id === selectedNodeInfo?.runId) || session?.runs?.[0];
|
|
113693
113707
|
const extraButtonsIconSize = 24;
|
|
113694
|
-
return /*#__PURE__*/ React.createElement(
|
|
113708
|
+
return /*#__PURE__*/ React.createElement(ConfigProvider, {
|
|
113709
|
+
theme: {
|
|
113710
|
+
token: {
|
|
113711
|
+
fontFamily: "inherit"
|
|
113712
|
+
}
|
|
113713
|
+
}
|
|
113714
|
+
}, /*#__PURE__*/ React.createElement(Drawer$2, {
|
|
113695
113715
|
title: /*#__PURE__*/ React.createElement(React.Fragment, null, session?.name || "Unnamed Session", " -", " ", session?.start_time ? moment(session.start_time * 1000).fromNow() : "N/A"),
|
|
113696
113716
|
placement: "right",
|
|
113697
113717
|
size: isFullScreen ? "100%" : "70%",
|
|
@@ -113765,7 +113785,18 @@ const SessionDetails = ({ session, open, onClose })=>{
|
|
|
113765
113785
|
flexDirection: "column"
|
|
113766
113786
|
}
|
|
113767
113787
|
}
|
|
113768
|
-
},
|
|
113788
|
+
}, loading ? /*#__PURE__*/ React.createElement("div", {
|
|
113789
|
+
style: {
|
|
113790
|
+
display: "flex",
|
|
113791
|
+
alignItems: "center",
|
|
113792
|
+
justifyContent: "center",
|
|
113793
|
+
height: "100%",
|
|
113794
|
+
color: isDarkMode ? "#8c8c8c" : "#595959"
|
|
113795
|
+
}
|
|
113796
|
+
}, /*#__PURE__*/ React.createElement(Spin, {
|
|
113797
|
+
size: "large",
|
|
113798
|
+
tip: "Loading session..."
|
|
113799
|
+
})) : !session ? /*#__PURE__*/ React.createElement("div", {
|
|
113769
113800
|
style: {
|
|
113770
113801
|
display: "flex",
|
|
113771
113802
|
alignItems: "center",
|
|
@@ -113823,7 +113854,7 @@ const SessionDetails = ({ session, open, onClose })=>{
|
|
|
113823
113854
|
selectedNode: selectedNodeInfo,
|
|
113824
113855
|
session: session,
|
|
113825
113856
|
isDarkMode: isDarkMode
|
|
113826
|
-
}))));
|
|
113857
|
+
})))));
|
|
113827
113858
|
};
|
|
113828
113859
|
|
|
113829
113860
|
/**
|
|
@@ -114024,14 +114055,20 @@ function dtoMetricToMetric(def) {
|
|
|
114024
114055
|
};
|
|
114025
114056
|
}
|
|
114026
114057
|
}
|
|
114027
|
-
// Build runs from agents.agent_node_ids
|
|
114028
|
-
const
|
|
114029
|
-
|
|
114030
|
-
|
|
114031
|
-
|
|
114058
|
+
// Build runs from agents.agent_node_ids (support legacy string[] and new { session_id, agent_node_id }[])
|
|
114059
|
+
const agentNodeEntries = evalDto.agents.flatMap((a)=>{
|
|
114060
|
+
const ids = a.agent_node_ids ?? [];
|
|
114061
|
+
return ids.map((entry)=>typeof entry === "string" ? {
|
|
114062
|
+
session_id: "",
|
|
114063
|
+
agent_node_id: entry
|
|
114064
|
+
} : entry);
|
|
114065
|
+
});
|
|
114066
|
+
const runs = agentNodeEntries.map(({ session_id, agent_node_id })=>({
|
|
114067
|
+
session_id,
|
|
114068
|
+
run_id: agent_node_id,
|
|
114032
114069
|
results: {}
|
|
114033
114070
|
}));
|
|
114034
|
-
// Agent name(s) for display - comma-separated when multiple;
|
|
114071
|
+
// Agent name(s) for display - comma-separated when multiple; pass through agent_node_ids for LLM tree (both formats)
|
|
114035
114072
|
const agents = evalDto.agents.map((a)=>({
|
|
114036
114073
|
agent_name: a.agent_name,
|
|
114037
114074
|
agent_node_ids: a.agent_node_ids
|
|
@@ -114231,7 +114268,13 @@ const EvaluationsCompareView = ({ evaluations, evaluationId1, evaluationId2, onE
|
|
|
114231
114268
|
];
|
|
114232
114269
|
const evaluation1Label = evaluation1 ? `${evaluation1.name || "Unnamed Evaluation"} (${evaluation1.agent_name})` : "Select evaluation 1";
|
|
114233
114270
|
const evaluation2Label = evaluation2 ? `${evaluation2.name || "Unnamed Evaluation"} (${evaluation2.agent_name})` : "Select evaluation 2";
|
|
114234
|
-
return /*#__PURE__*/ React.createElement(
|
|
114271
|
+
return /*#__PURE__*/ React.createElement(ConfigProvider, {
|
|
114272
|
+
theme: {
|
|
114273
|
+
token: {
|
|
114274
|
+
fontFamily: "inherit"
|
|
114275
|
+
}
|
|
114276
|
+
}
|
|
114277
|
+
}, backHref && /*#__PURE__*/ React.createElement("a", {
|
|
114235
114278
|
href: backHref,
|
|
114236
114279
|
style: {
|
|
114237
114280
|
display: "inline-flex",
|
|
@@ -114426,7 +114469,13 @@ const EvaluationsCompareDrawer = ({ open, onClose, evaluationId1, evaluationId2,
|
|
|
114426
114469
|
staticMethods.error("Failed to copy share link");
|
|
114427
114470
|
}
|
|
114428
114471
|
};
|
|
114429
|
-
return /*#__PURE__*/ React.createElement(
|
|
114472
|
+
return /*#__PURE__*/ React.createElement(ConfigProvider, {
|
|
114473
|
+
theme: {
|
|
114474
|
+
token: {
|
|
114475
|
+
fontFamily: "inherit"
|
|
114476
|
+
}
|
|
114477
|
+
}
|
|
114478
|
+
}, /*#__PURE__*/ React.createElement(Drawer$2, {
|
|
114430
114479
|
title: "Compare Evaluations",
|
|
114431
114480
|
placement: "right",
|
|
114432
114481
|
size: "60%",
|
|
@@ -114468,7 +114517,7 @@ const EvaluationsCompareDrawer = ({ open, onClose, evaluationId1, evaluationId2,
|
|
|
114468
114517
|
onEvaluationId1Change: onEvaluationId1Change,
|
|
114469
114518
|
onEvaluationId2Change: onEvaluationId2Change,
|
|
114470
114519
|
showEvaluationComparisonDropdowns: false
|
|
114471
|
-
}));
|
|
114520
|
+
})));
|
|
114472
114521
|
};
|
|
114473
114522
|
|
|
114474
114523
|
const defaultGetEvaluatorResultsHref = (evaluationId, evaluatorId)=>`#/evaluations/${evaluationId}/results/${evaluatorId}`;
|
|
@@ -114494,7 +114543,13 @@ const EvaluationDetailsDrawer = ({ evaluation, open, onClose, getEvaluatorResult
|
|
|
114494
114543
|
const normalizedEvaluation = React.useMemo(()=>evaluation && isEvaluationDto(evaluation) ? transformEvaluation(evaluation) : evaluation, [
|
|
114495
114544
|
evaluation
|
|
114496
114545
|
]);
|
|
114497
|
-
return /*#__PURE__*/ React.createElement(
|
|
114546
|
+
return /*#__PURE__*/ React.createElement(ConfigProvider, {
|
|
114547
|
+
theme: {
|
|
114548
|
+
token: {
|
|
114549
|
+
fontFamily: "inherit"
|
|
114550
|
+
}
|
|
114551
|
+
}
|
|
114552
|
+
}, /*#__PURE__*/ React.createElement(Drawer$2, {
|
|
114498
114553
|
title: normalizedEvaluation ? /*#__PURE__*/ React.createElement("div", {
|
|
114499
114554
|
style: {
|
|
114500
114555
|
display: "flex",
|
|
@@ -114699,11 +114754,17 @@ const EvaluationDetailsDrawer = ({ evaluation, open, onClose, getEvaluatorResult
|
|
|
114699
114754
|
getPopupContainer: ()=>document.body
|
|
114700
114755
|
}, tag) : tag;
|
|
114701
114756
|
})));
|
|
114702
|
-
})))));
|
|
114757
|
+
}))))));
|
|
114703
114758
|
};
|
|
114704
114759
|
|
|
114705
114760
|
const GITHUB_URL = "https://github.com/RailtownAI/railtracks/";
|
|
114706
|
-
const EvaluationsErrorCard = ({ error, onRetry })=>/*#__PURE__*/ React.createElement(
|
|
114761
|
+
const EvaluationsErrorCard = ({ error, onRetry })=>/*#__PURE__*/ React.createElement(ConfigProvider, {
|
|
114762
|
+
theme: {
|
|
114763
|
+
token: {
|
|
114764
|
+
fontFamily: "inherit"
|
|
114765
|
+
}
|
|
114766
|
+
}
|
|
114767
|
+
}, /*#__PURE__*/ React.createElement(Alert, {
|
|
114707
114768
|
type: "error",
|
|
114708
114769
|
title: "Error loading evaluations",
|
|
114709
114770
|
description: /*#__PURE__*/ React.createElement("div", {
|
|
@@ -114739,7 +114800,7 @@ const EvaluationsErrorCard = ({ error, onRetry })=>/*#__PURE__*/ React.createEle
|
|
|
114739
114800
|
})
|
|
114740
114801
|
}, "Retry") : undefined,
|
|
114741
114802
|
showIcon: true
|
|
114742
|
-
});
|
|
114803
|
+
}));
|
|
114743
114804
|
|
|
114744
114805
|
const EvaluationsTable = ({ evaluations, loading = false, error = null, onRefresh, onRowClick, onCompare, compareIdsFromUrl, onCompareUrlChange, showFilters = true, showCompare = true, emptyMessage, title, pagination: serverPagination, onFiltersChange, onFetchEvaluationsByIds, errorRender })=>{
|
|
114745
114806
|
const { theme } = useTheme$1();
|
|
@@ -114991,7 +115052,13 @@ const EvaluationsTable = ({ evaluations, loading = false, error = null, onRefres
|
|
|
114991
115052
|
target: "_blank",
|
|
114992
115053
|
rel: "noopener noreferrer"
|
|
114993
115054
|
}, "view our documentation on how to create evaluations"), "."));
|
|
114994
|
-
return /*#__PURE__*/ React.createElement(
|
|
115055
|
+
return /*#__PURE__*/ React.createElement(ConfigProvider, {
|
|
115056
|
+
theme: {
|
|
115057
|
+
token: {
|
|
115058
|
+
fontFamily: "inherit"
|
|
115059
|
+
}
|
|
115060
|
+
}
|
|
115061
|
+
}, /*#__PURE__*/ React.createElement("div", {
|
|
114995
115062
|
style: {
|
|
114996
115063
|
display: "flex",
|
|
114997
115064
|
justifyContent: "space-between",
|
|
@@ -117059,12 +117126,17 @@ function isLLMNode(n) {
|
|
|
117059
117126
|
function isLLMAggregateNode(n) {
|
|
117060
117127
|
return isLLMInferenceAggregate(n);
|
|
117061
117128
|
}
|
|
117062
|
-
/**
|
|
117129
|
+
/** Normalize agent_node_ids to { nodeId, sessionId? }[] */ function normalizeAgentNodeEntries(agents) {
|
|
117063
117130
|
const map = new Map();
|
|
117064
117131
|
if (!agents) return map;
|
|
117065
117132
|
for (const a of agents){
|
|
117066
|
-
for (const
|
|
117067
|
-
|
|
117133
|
+
for (const entry of a.agent_node_ids ?? []){
|
|
117134
|
+
const nodeId = typeof entry === "string" ? entry : entry.agent_node_id;
|
|
117135
|
+
const sessionId = typeof entry === "string" ? undefined : entry.session_id;
|
|
117136
|
+
map.set(nodeId, {
|
|
117137
|
+
agentName: a.agent_name,
|
|
117138
|
+
sessionId
|
|
117139
|
+
});
|
|
117068
117140
|
}
|
|
117069
117141
|
}
|
|
117070
117142
|
return map;
|
|
@@ -117076,7 +117148,7 @@ function isLLMAggregateNode(n) {
|
|
|
117076
117148
|
*/ function buildLLMInferenceTreeFromAggregateResults(agg) {
|
|
117077
117149
|
const { roots, nodes, agents } = agg;
|
|
117078
117150
|
const nodeMap = nodes;
|
|
117079
|
-
const
|
|
117151
|
+
const agentNodeInfo = normalizeAgentNodeEntries(agents);
|
|
117080
117152
|
// Resolve root IDs to aggregate nodes
|
|
117081
117153
|
const rootAggregates = [];
|
|
117082
117154
|
for (const id of roots){
|
|
@@ -117109,14 +117181,17 @@ function isLLMAggregateNode(n) {
|
|
|
117109
117181
|
}
|
|
117110
117182
|
const sortedCalls = Array.from(llmCallIndices).sort((a, b)=>a - b);
|
|
117111
117183
|
const sortedMetrics = Array.from(metricNames).sort();
|
|
117112
|
-
// Group agent_node_ids by agent
|
|
117113
|
-
const
|
|
117114
|
-
for (const [nodeId,
|
|
117115
|
-
if (!
|
|
117116
|
-
|
|
117184
|
+
// Group agent_node_ids by agent: { nodeId, sessionId? }[]
|
|
117185
|
+
const agentToNodeEntries = new Map();
|
|
117186
|
+
for (const [nodeId, info] of agentNodeInfo){
|
|
117187
|
+
if (!agentToNodeEntries.has(info.agentName)) agentToNodeEntries.set(info.agentName, []);
|
|
117188
|
+
agentToNodeEntries.get(info.agentName).push({
|
|
117189
|
+
nodeId,
|
|
117190
|
+
sessionId: info.sessionId
|
|
117191
|
+
});
|
|
117117
117192
|
}
|
|
117118
117193
|
// If no agents provided, infer from data: collect unique agent_data_ids into "Agent (root)"
|
|
117119
|
-
if (
|
|
117194
|
+
if (agentToNodeEntries.size === 0) {
|
|
117120
117195
|
const seen = new Set();
|
|
117121
117196
|
for (const aggNode of rootAggregates){
|
|
117122
117197
|
for (const childId of aggNode.children ?? []){
|
|
@@ -117128,17 +117203,21 @@ function isLLMAggregateNode(n) {
|
|
|
117128
117203
|
}
|
|
117129
117204
|
}
|
|
117130
117205
|
if (seen.size > 0) {
|
|
117131
|
-
|
|
117206
|
+
agentToNodeEntries.set("Agent (root)", Array.from(seen).map((nodeId)=>({
|
|
117207
|
+
nodeId,
|
|
117208
|
+
sessionId: undefined
|
|
117209
|
+
})));
|
|
117132
117210
|
}
|
|
117133
117211
|
}
|
|
117134
117212
|
const rows = [];
|
|
117135
|
-
for (const [agentName,
|
|
117213
|
+
for (const [agentName, nodeEntries] of agentToNodeEntries){
|
|
117214
|
+
const nodeIds = nodeEntries.map((e)=>e.nodeId);
|
|
117136
117215
|
const agentKey = `agent-${agentName.replace(/\s+/g, "-")}`;
|
|
117137
117216
|
const llmCallChildren = [];
|
|
117138
117217
|
for (const callIdx of sortedCalls){
|
|
117139
117218
|
const callKey = `${agentKey}-call-${callIdx}`;
|
|
117140
117219
|
const nodeChildren = [];
|
|
117141
|
-
|
|
117220
|
+
nodeEntries.forEach(({ nodeId, sessionId })=>{
|
|
117142
117221
|
const metrics = {};
|
|
117143
117222
|
for (const m of sortedMetrics){
|
|
117144
117223
|
const k = `${nodeId}|${callIdx}|${m}`;
|
|
@@ -117154,6 +117233,7 @@ function isLLMAggregateNode(n) {
|
|
|
117154
117233
|
metrics,
|
|
117155
117234
|
value: metrics[primaryMetric],
|
|
117156
117235
|
agentNodeId: nodeId,
|
|
117236
|
+
sessionId,
|
|
117157
117237
|
children: undefined
|
|
117158
117238
|
});
|
|
117159
117239
|
}
|
|
@@ -117232,7 +117312,19 @@ const LATENCY_RESULT_PREFIXES = [
|
|
|
117232
117312
|
"Latency/",
|
|
117233
117313
|
"Runtime/"
|
|
117234
117314
|
];
|
|
117235
|
-
|
|
117315
|
+
/** Build map from agent_node_id -> session_id from agents (supports both formats) */ function buildAgentIdToSessionMap(agents) {
|
|
117316
|
+
const map = new Map();
|
|
117317
|
+
if (!agents) return map;
|
|
117318
|
+
for (const a of agents){
|
|
117319
|
+
for (const entry of a.agent_node_ids ?? []){
|
|
117320
|
+
const nodeId = typeof entry === "string" ? entry : entry.agent_node_id;
|
|
117321
|
+
const sessionId = typeof entry === "string" ? undefined : entry.session_id;
|
|
117322
|
+
if (sessionId) map.set(nodeId, sessionId);
|
|
117323
|
+
}
|
|
117324
|
+
}
|
|
117325
|
+
return map;
|
|
117326
|
+
}
|
|
117327
|
+
function buildTreeFromProcessedData(latencyByTool, failureRateByTool, usageCountByTool, latencyResults, failureResults, agentIdToSessionId) {
|
|
117236
117328
|
const toolNamesFromAgg = Array.from(new Set([
|
|
117237
117329
|
...latencyByTool.keys(),
|
|
117238
117330
|
...failureRateByTool.keys(),
|
|
@@ -117295,6 +117387,7 @@ function buildTreeFromProcessedData(latencyByTool, failureRateByTool, usageCount
|
|
|
117295
117387
|
const cnt = uniqueLatencies.length;
|
|
117296
117388
|
let failed = 0;
|
|
117297
117389
|
const failList = failureByToolAgentIndex.get(`${toolName}|${aid}`) ?? [];
|
|
117390
|
+
const sessionId = agentIdToSessionId.get(aid);
|
|
117298
117391
|
const leaves = uniqueLatencies.map((l, idx)=>{
|
|
117299
117392
|
const failVal = failureByKey.get(l.key);
|
|
117300
117393
|
const failValByIndex = failList[idx];
|
|
@@ -117302,12 +117395,15 @@ function buildTreeFromProcessedData(latencyByTool, failureRateByTool, usageCount
|
|
|
117302
117395
|
const isFailed = resolvedFail !== undefined && resolvedFail >= 0.5;
|
|
117303
117396
|
if (isFailed) failed++;
|
|
117304
117397
|
const leafName = l.toolNodeId ?? `invocation-${idx + 1}`;
|
|
117398
|
+
const nodeId = l.toolNodeId ?? undefined;
|
|
117305
117399
|
return {
|
|
117306
117400
|
key: `${toolName}-${aid}-${idx}`,
|
|
117307
117401
|
name: leafName,
|
|
117308
117402
|
runtimeMs: Math.round(l.value * 1000),
|
|
117309
117403
|
failureRate: isFailed ? "Failed" : "Success",
|
|
117310
117404
|
level: 3,
|
|
117405
|
+
sessionId: sessionId ?? undefined,
|
|
117406
|
+
nodeId,
|
|
117311
117407
|
children: undefined
|
|
117312
117408
|
};
|
|
117313
117409
|
});
|
|
@@ -117319,6 +117415,8 @@ function buildTreeFromProcessedData(latencyByTool, failureRateByTool, usageCount
|
|
|
117319
117415
|
runtimeMs: Math.round(totalRuntimeMs),
|
|
117320
117416
|
failureRate: agentFailurePct,
|
|
117321
117417
|
level: 2,
|
|
117418
|
+
sessionId: sessionId ?? undefined,
|
|
117419
|
+
nodeId: aid,
|
|
117322
117420
|
children: leaves.length > 0 ? leaves : undefined
|
|
117323
117421
|
});
|
|
117324
117422
|
}
|
|
@@ -117363,12 +117461,13 @@ function buildTreeFromProcessedData(latencyByTool, failureRateByTool, usageCount
|
|
|
117363
117461
|
}
|
|
117364
117462
|
const latencyResults = toolResults.filter((r)=>LATENCY_RESULT_PREFIXES.some((prefix)=>r.result_name?.startsWith(prefix)));
|
|
117365
117463
|
const failureResults = toolResults.filter((r)=>r.result_name?.startsWith("FailureRate/"));
|
|
117366
|
-
|
|
117464
|
+
const agentIdToSessionId = buildAgentIdToSessionMap(agg.agents);
|
|
117465
|
+
return buildTreeFromProcessedData(latencyByTool, failureRateByTool, usageCountByTool, latencyResults, failureResults, agentIdToSessionId);
|
|
117367
117466
|
}
|
|
117368
117467
|
/**
|
|
117369
117468
|
* Builds a tree from raw ToolUseEvaluator results (legacy flat format).
|
|
117370
117469
|
* Structure: tool (level 1) → agent (level 2) → invocation (level 3).
|
|
117371
|
-
*/ function buildToolUseTreeFromRawResults(rawResults) {
|
|
117470
|
+
*/ function buildToolUseTreeFromRawResults(rawResults, agents) {
|
|
117372
117471
|
const toolResults = rawResults.filter((r)=>isToolResult(r));
|
|
117373
117472
|
const toolAggregates = rawResults.filter((r)=>isToolAggregate(r));
|
|
117374
117473
|
const latencyByTool = new Map();
|
|
@@ -117390,7 +117489,8 @@ function buildTreeFromProcessedData(latencyByTool, failureRateByTool, usageCount
|
|
|
117390
117489
|
}
|
|
117391
117490
|
const latencyResults = toolResults.filter((r)=>r.result_name.startsWith("Latency/") || r.result_name.startsWith("Runtime/"));
|
|
117392
117491
|
const failureResults = toolResults.filter((r)=>r.result_name.startsWith("FailureRate/"));
|
|
117393
|
-
|
|
117492
|
+
const agentIdToSessionId = buildAgentIdToSessionMap(agents);
|
|
117493
|
+
return buildTreeFromProcessedData(latencyByTool, failureRateByTool, usageCountByTool, latencyResults, failureResults, agentIdToSessionId);
|
|
117394
117494
|
}
|
|
117395
117495
|
|
|
117396
117496
|
function detectAggregateType(nodes) {
|
|
@@ -117457,7 +117557,7 @@ const DEFAULT_TITLES = {
|
|
|
117457
117557
|
LLMInference: "LLM Inference Evaluator - Aggregate View",
|
|
117458
117558
|
Judge: "Judge Evaluator - Aggregate View"
|
|
117459
117559
|
};
|
|
117460
|
-
const AggregateResultsTable = ({ rawResults = [], aggregateResults, agents, title, evaluatorName })=>{
|
|
117560
|
+
const AggregateResultsTable = ({ rawResults = [], aggregateResults, agents, title, evaluatorName, onAgentNodeClick })=>{
|
|
117461
117561
|
const { theme } = useTheme$1();
|
|
117462
117562
|
const [expandedRowKeys, setExpandedRowKeys] = React.useState([]);
|
|
117463
117563
|
const { aggregateType, dataSource, llmIsTree, llmExpandableKeys } = React.useMemo(()=>{
|
|
@@ -117466,7 +117566,10 @@ const AggregateResultsTable = ({ rawResults = [], aggregateResults, agents, titl
|
|
|
117466
117566
|
if (type === "ToolUse") {
|
|
117467
117567
|
return {
|
|
117468
117568
|
aggregateType: type,
|
|
117469
|
-
dataSource: buildToolUseTreeFromAggregate(
|
|
117569
|
+
dataSource: buildToolUseTreeFromAggregate({
|
|
117570
|
+
...aggregateResults,
|
|
117571
|
+
agents
|
|
117572
|
+
}),
|
|
117470
117573
|
llmIsTree: false,
|
|
117471
117574
|
llmExpandableKeys: []
|
|
117472
117575
|
};
|
|
@@ -117508,7 +117611,7 @@ const AggregateResultsTable = ({ rawResults = [], aggregateResults, agents, titl
|
|
|
117508
117611
|
if (toolResults.length > 0) {
|
|
117509
117612
|
return {
|
|
117510
117613
|
aggregateType: "ToolUse",
|
|
117511
|
-
dataSource: buildToolUseTreeFromRawResults(toolResults),
|
|
117614
|
+
dataSource: buildToolUseTreeFromRawResults(toolResults, agents),
|
|
117512
117615
|
llmIsTree: false,
|
|
117513
117616
|
llmExpandableKeys: []
|
|
117514
117617
|
};
|
|
@@ -117616,17 +117719,37 @@ const AggregateResultsTable = ({ rawResults = [], aggregateResults, agents, titl
|
|
|
117616
117719
|
}), "Name"),
|
|
117617
117720
|
dataIndex: "name",
|
|
117618
117721
|
key: "name",
|
|
117619
|
-
render: (name, row)
|
|
117722
|
+
render: (name, row)=>{
|
|
117723
|
+
const isClickable = row.level === 3 && row.agentNodeId && onAgentNodeClick;
|
|
117724
|
+
return /*#__PURE__*/ React.createElement("span", {
|
|
117620
117725
|
style: {
|
|
117621
117726
|
display: "inline-flex",
|
|
117622
117727
|
alignItems: "center",
|
|
117623
117728
|
gap: 8,
|
|
117624
117729
|
fontFamily: "monospace",
|
|
117625
|
-
fontSize: 13
|
|
117626
|
-
|
|
117730
|
+
fontSize: 13,
|
|
117731
|
+
...isClickable && {
|
|
117732
|
+
cursor: "pointer",
|
|
117733
|
+
color: theme.colors.primary
|
|
117734
|
+
}
|
|
117735
|
+
},
|
|
117736
|
+
onClick: isClickable ? (e)=>{
|
|
117737
|
+
e.stopPropagation();
|
|
117738
|
+
onAgentNodeClick(row.sessionId, row.agentNodeId);
|
|
117739
|
+
} : undefined,
|
|
117740
|
+
onKeyDown: isClickable ? (e)=>{
|
|
117741
|
+
if (e.key === "Enter" || e.key === " ") {
|
|
117742
|
+
e.preventDefault();
|
|
117743
|
+
e.stopPropagation();
|
|
117744
|
+
onAgentNodeClick(row.sessionId, row.agentNodeId);
|
|
117745
|
+
}
|
|
117746
|
+
} : undefined,
|
|
117747
|
+
role: isClickable ? "button" : undefined,
|
|
117748
|
+
tabIndex: isClickable ? 0 : undefined
|
|
117627
117749
|
}, /*#__PURE__*/ React.createElement(LLMTagForLevel, {
|
|
117628
117750
|
level: row.level
|
|
117629
|
-
}), name)
|
|
117751
|
+
}), name);
|
|
117752
|
+
}
|
|
117630
117753
|
},
|
|
117631
117754
|
...METRIC_COLUMNS.map(({ key, title, Icon })=>({
|
|
117632
117755
|
title: /*#__PURE__*/ React.createElement("span", {
|
|
@@ -117956,17 +118079,37 @@ const AggregateResultsTable = ({ rawResults = [], aggregateResults, agents, titl
|
|
|
117956
118079
|
}), "Name"),
|
|
117957
118080
|
dataIndex: "name",
|
|
117958
118081
|
key: "name",
|
|
117959
|
-
render: (name, row)
|
|
118082
|
+
render: (name, row)=>{
|
|
118083
|
+
const isClickable = (row.level === 2 || row.level === 3) && row.nodeId && onAgentNodeClick;
|
|
118084
|
+
return /*#__PURE__*/ React.createElement("span", {
|
|
117960
118085
|
style: {
|
|
117961
118086
|
display: "inline-flex",
|
|
117962
118087
|
alignItems: "center",
|
|
117963
118088
|
gap: 8,
|
|
117964
118089
|
fontFamily: "monospace",
|
|
117965
|
-
fontSize: 13
|
|
117966
|
-
|
|
118090
|
+
fontSize: 13,
|
|
118091
|
+
...isClickable && {
|
|
118092
|
+
cursor: "pointer",
|
|
118093
|
+
color: theme.colors.primary
|
|
118094
|
+
}
|
|
118095
|
+
},
|
|
118096
|
+
onClick: isClickable ? (e)=>{
|
|
118097
|
+
e.stopPropagation();
|
|
118098
|
+
onAgentNodeClick(row.sessionId, row.nodeId);
|
|
118099
|
+
} : undefined,
|
|
118100
|
+
onKeyDown: isClickable ? (e)=>{
|
|
118101
|
+
if (e.key === "Enter" || e.key === " ") {
|
|
118102
|
+
e.preventDefault();
|
|
118103
|
+
e.stopPropagation();
|
|
118104
|
+
onAgentNodeClick(row.sessionId, row.nodeId);
|
|
118105
|
+
}
|
|
118106
|
+
} : undefined,
|
|
118107
|
+
role: isClickable ? "button" : undefined,
|
|
118108
|
+
tabIndex: isClickable ? 0 : undefined
|
|
117967
118109
|
}, /*#__PURE__*/ React.createElement(TagForLevel, {
|
|
117968
118110
|
level: row.level
|
|
117969
|
-
}), name)
|
|
118111
|
+
}), name);
|
|
118112
|
+
}
|
|
117970
118113
|
},
|
|
117971
118114
|
{
|
|
117972
118115
|
title: /*#__PURE__*/ React.createElement("span", {
|
|
@@ -118276,11 +118419,14 @@ function isCategoricalMetric(def) {
|
|
|
118276
118419
|
/** Derive agent display name from DTO. */ function deriveAgentName(evaluation) {
|
|
118277
118420
|
return evaluation.agents?.map((a)=>a.agent_name).join(", ") ?? "-";
|
|
118278
118421
|
}
|
|
118279
|
-
/** Derive runs count from DTO (agent_node_ids). */ function deriveRunsCount(evaluation) {
|
|
118280
|
-
return evaluation.agents?.
|
|
118422
|
+
/** Derive runs count from DTO (agent_node_ids). Supports both string[] and { session_id, agent_node_id }[]. */ function deriveRunsCount(evaluation) {
|
|
118423
|
+
return evaluation.agents?.reduce((sum, a)=>{
|
|
118424
|
+
const ids = a.agent_node_ids ?? [];
|
|
118425
|
+
return sum + ids.length;
|
|
118426
|
+
}, 0) ?? 0;
|
|
118281
118427
|
}
|
|
118282
118428
|
|
|
118283
|
-
const EvaluatorResult = ({ evaluation, evaluatorId, backHref })=>{
|
|
118429
|
+
const EvaluatorResult = ({ evaluation, evaluatorId, backHref, onAgentNodeClick })=>{
|
|
118284
118430
|
const { theme } = useTheme$1();
|
|
118285
118431
|
const labelColor = theme.colors.mutedForeground;
|
|
118286
118432
|
const derived = evaluation ? deriveEvaluatorResultData(evaluation, evaluatorId) : null;
|
|
@@ -118299,7 +118445,13 @@ const EvaluatorResult = ({ evaluation, evaluatorId, backHref })=>{
|
|
|
118299
118445
|
const { completed_at } = evaluation;
|
|
118300
118446
|
const agentName = deriveAgentName(evaluation);
|
|
118301
118447
|
const runsCount = deriveRunsCount(evaluation);
|
|
118302
|
-
return /*#__PURE__*/ React.createElement(
|
|
118448
|
+
return /*#__PURE__*/ React.createElement(ConfigProvider, {
|
|
118449
|
+
theme: {
|
|
118450
|
+
token: {
|
|
118451
|
+
fontFamily: "inherit"
|
|
118452
|
+
}
|
|
118453
|
+
}
|
|
118454
|
+
}, /*#__PURE__*/ React.createElement("div", {
|
|
118303
118455
|
style: {
|
|
118304
118456
|
width: "100%",
|
|
118305
118457
|
minWidth: 0
|
|
@@ -118416,8 +118568,9 @@ const EvaluatorResult = ({ evaluation, evaluatorId, backHref })=>{
|
|
|
118416
118568
|
rawResults: rawResults.filter((r)=>isToolResult(r) || isToolAggregate(r)),
|
|
118417
118569
|
aggregateResults: aggregateResults,
|
|
118418
118570
|
agents: evaluation.agents,
|
|
118419
|
-
evaluatorName: evaluatorName
|
|
118420
|
-
|
|
118571
|
+
evaluatorName: evaluatorName,
|
|
118572
|
+
onAgentNodeClick: onAgentNodeClick
|
|
118573
|
+
}))));
|
|
118421
118574
|
};
|
|
118422
118575
|
|
|
118423
118576
|
function __insertCSS(code) {
|
package/dist/esm/index.js
CHANGED
|
@@ -113579,7 +113579,7 @@ const RunContent = ({ run, isDarkMode, showTimeline })=>{
|
|
|
113579
113579
|
height: "100%"
|
|
113580
113580
|
})));
|
|
113581
113581
|
};
|
|
113582
|
-
const SessionDetails = ({ session, open, onClose })=>{
|
|
113582
|
+
const SessionDetails = ({ session, open, onClose, initialNodeId, initialRunId, loading = false })=>{
|
|
113583
113583
|
const { isDarkMode, theme } = useTheme$1();
|
|
113584
113584
|
const [isFullScreen, setIsFullScreen] = useState(false);
|
|
113585
113585
|
const [selectedNodeKey, setSelectedNodeKey] = useState(null);
|
|
@@ -113632,8 +113632,7 @@ const SessionDetails = ({ session, open, onClose })=>{
|
|
|
113632
113632
|
}, [
|
|
113633
113633
|
open
|
|
113634
113634
|
]);
|
|
113635
|
-
// Default to first root node when session loads
|
|
113636
|
-
// Combined with session_id reset to avoid effect ordering overwriting the selection.
|
|
113635
|
+
// Default to first root node when session loads, or use initialNodeId/initialRunId when provided.
|
|
113637
113636
|
const hasAutoSelectedRef = React__default.useRef(false);
|
|
113638
113637
|
React__default.useEffect(()=>{
|
|
113639
113638
|
if (!open) {
|
|
@@ -113642,6 +113641,19 @@ const SessionDetails = ({ session, open, onClose })=>{
|
|
|
113642
113641
|
}
|
|
113643
113642
|
if (!session?.runs?.length) return;
|
|
113644
113643
|
if (hasAutoSelectedRef.current && selectedNodeInfo !== null) return;
|
|
113644
|
+
// Prefer initial node when provided (e.g. from evaluation link)
|
|
113645
|
+
if (initialNodeId) {
|
|
113646
|
+
const run = initialRunId ? session.runs.find((r)=>r.run_id === initialRunId) : session.runs.find((r)=>r.nodes?.some((n)=>n.identifier === initialNodeId));
|
|
113647
|
+
if (run?.run_id && getNodeFromRun(initialNodeId, run)) {
|
|
113648
|
+
handleSelectNode({
|
|
113649
|
+
nodeId: initialNodeId,
|
|
113650
|
+
runId: run.run_id
|
|
113651
|
+
});
|
|
113652
|
+
hasAutoSelectedRef.current = true;
|
|
113653
|
+
return;
|
|
113654
|
+
}
|
|
113655
|
+
}
|
|
113656
|
+
// Fallback: first root node
|
|
113645
113657
|
for (const run of session.runs){
|
|
113646
113658
|
const firstRoot = getFirstRootNodeFromRun(run);
|
|
113647
113659
|
if (firstRoot && run.run_id) {
|
|
@@ -113658,7 +113670,9 @@ const SessionDetails = ({ session, open, onClose })=>{
|
|
|
113658
113670
|
}, [
|
|
113659
113671
|
open,
|
|
113660
113672
|
session,
|
|
113661
|
-
selectedNodeInfo
|
|
113673
|
+
selectedNodeInfo,
|
|
113674
|
+
initialNodeId,
|
|
113675
|
+
initialRunId
|
|
113662
113676
|
]);
|
|
113663
113677
|
// Reset hasAutoSelectedRef when session changes so auto-select runs for the new session
|
|
113664
113678
|
React__default.useEffect(()=>{
|
|
@@ -113671,7 +113685,13 @@ const SessionDetails = ({ session, open, onClose })=>{
|
|
|
113671
113685
|
// Get the active run for full screen display (use selected run or first run)
|
|
113672
113686
|
const activeRun = session?.runs.find((r)=>r.run_id === selectedNodeInfo?.runId) || session?.runs?.[0];
|
|
113673
113687
|
const extraButtonsIconSize = 24;
|
|
113674
|
-
return /*#__PURE__*/ React__default.createElement(
|
|
113688
|
+
return /*#__PURE__*/ React__default.createElement(ConfigProvider, {
|
|
113689
|
+
theme: {
|
|
113690
|
+
token: {
|
|
113691
|
+
fontFamily: "inherit"
|
|
113692
|
+
}
|
|
113693
|
+
}
|
|
113694
|
+
}, /*#__PURE__*/ React__default.createElement(Drawer$2, {
|
|
113675
113695
|
title: /*#__PURE__*/ React__default.createElement(React__default.Fragment, null, session?.name || "Unnamed Session", " -", " ", session?.start_time ? moment(session.start_time * 1000).fromNow() : "N/A"),
|
|
113676
113696
|
placement: "right",
|
|
113677
113697
|
size: isFullScreen ? "100%" : "70%",
|
|
@@ -113745,7 +113765,18 @@ const SessionDetails = ({ session, open, onClose })=>{
|
|
|
113745
113765
|
flexDirection: "column"
|
|
113746
113766
|
}
|
|
113747
113767
|
}
|
|
113748
|
-
},
|
|
113768
|
+
}, loading ? /*#__PURE__*/ React__default.createElement("div", {
|
|
113769
|
+
style: {
|
|
113770
|
+
display: "flex",
|
|
113771
|
+
alignItems: "center",
|
|
113772
|
+
justifyContent: "center",
|
|
113773
|
+
height: "100%",
|
|
113774
|
+
color: isDarkMode ? "#8c8c8c" : "#595959"
|
|
113775
|
+
}
|
|
113776
|
+
}, /*#__PURE__*/ React__default.createElement(Spin, {
|
|
113777
|
+
size: "large",
|
|
113778
|
+
tip: "Loading session..."
|
|
113779
|
+
})) : !session ? /*#__PURE__*/ React__default.createElement("div", {
|
|
113749
113780
|
style: {
|
|
113750
113781
|
display: "flex",
|
|
113751
113782
|
alignItems: "center",
|
|
@@ -113803,7 +113834,7 @@ const SessionDetails = ({ session, open, onClose })=>{
|
|
|
113803
113834
|
selectedNode: selectedNodeInfo,
|
|
113804
113835
|
session: session,
|
|
113805
113836
|
isDarkMode: isDarkMode
|
|
113806
|
-
}))));
|
|
113837
|
+
})))));
|
|
113807
113838
|
};
|
|
113808
113839
|
|
|
113809
113840
|
/**
|
|
@@ -114004,14 +114035,20 @@ function dtoMetricToMetric(def) {
|
|
|
114004
114035
|
};
|
|
114005
114036
|
}
|
|
114006
114037
|
}
|
|
114007
|
-
// Build runs from agents.agent_node_ids
|
|
114008
|
-
const
|
|
114009
|
-
|
|
114010
|
-
|
|
114011
|
-
|
|
114038
|
+
// Build runs from agents.agent_node_ids (support legacy string[] and new { session_id, agent_node_id }[])
|
|
114039
|
+
const agentNodeEntries = evalDto.agents.flatMap((a)=>{
|
|
114040
|
+
const ids = a.agent_node_ids ?? [];
|
|
114041
|
+
return ids.map((entry)=>typeof entry === "string" ? {
|
|
114042
|
+
session_id: "",
|
|
114043
|
+
agent_node_id: entry
|
|
114044
|
+
} : entry);
|
|
114045
|
+
});
|
|
114046
|
+
const runs = agentNodeEntries.map(({ session_id, agent_node_id })=>({
|
|
114047
|
+
session_id,
|
|
114048
|
+
run_id: agent_node_id,
|
|
114012
114049
|
results: {}
|
|
114013
114050
|
}));
|
|
114014
|
-
// Agent name(s) for display - comma-separated when multiple;
|
|
114051
|
+
// Agent name(s) for display - comma-separated when multiple; pass through agent_node_ids for LLM tree (both formats)
|
|
114015
114052
|
const agents = evalDto.agents.map((a)=>({
|
|
114016
114053
|
agent_name: a.agent_name,
|
|
114017
114054
|
agent_node_ids: a.agent_node_ids
|
|
@@ -114211,7 +114248,13 @@ const EvaluationsCompareView = ({ evaluations, evaluationId1, evaluationId2, onE
|
|
|
114211
114248
|
];
|
|
114212
114249
|
const evaluation1Label = evaluation1 ? `${evaluation1.name || "Unnamed Evaluation"} (${evaluation1.agent_name})` : "Select evaluation 1";
|
|
114213
114250
|
const evaluation2Label = evaluation2 ? `${evaluation2.name || "Unnamed Evaluation"} (${evaluation2.agent_name})` : "Select evaluation 2";
|
|
114214
|
-
return /*#__PURE__*/ React__default.createElement(
|
|
114251
|
+
return /*#__PURE__*/ React__default.createElement(ConfigProvider, {
|
|
114252
|
+
theme: {
|
|
114253
|
+
token: {
|
|
114254
|
+
fontFamily: "inherit"
|
|
114255
|
+
}
|
|
114256
|
+
}
|
|
114257
|
+
}, backHref && /*#__PURE__*/ React__default.createElement("a", {
|
|
114215
114258
|
href: backHref,
|
|
114216
114259
|
style: {
|
|
114217
114260
|
display: "inline-flex",
|
|
@@ -114406,7 +114449,13 @@ const EvaluationsCompareDrawer = ({ open, onClose, evaluationId1, evaluationId2,
|
|
|
114406
114449
|
staticMethods.error("Failed to copy share link");
|
|
114407
114450
|
}
|
|
114408
114451
|
};
|
|
114409
|
-
return /*#__PURE__*/ React__default.createElement(
|
|
114452
|
+
return /*#__PURE__*/ React__default.createElement(ConfigProvider, {
|
|
114453
|
+
theme: {
|
|
114454
|
+
token: {
|
|
114455
|
+
fontFamily: "inherit"
|
|
114456
|
+
}
|
|
114457
|
+
}
|
|
114458
|
+
}, /*#__PURE__*/ React__default.createElement(Drawer$2, {
|
|
114410
114459
|
title: "Compare Evaluations",
|
|
114411
114460
|
placement: "right",
|
|
114412
114461
|
size: "60%",
|
|
@@ -114448,7 +114497,7 @@ const EvaluationsCompareDrawer = ({ open, onClose, evaluationId1, evaluationId2,
|
|
|
114448
114497
|
onEvaluationId1Change: onEvaluationId1Change,
|
|
114449
114498
|
onEvaluationId2Change: onEvaluationId2Change,
|
|
114450
114499
|
showEvaluationComparisonDropdowns: false
|
|
114451
|
-
}));
|
|
114500
|
+
})));
|
|
114452
114501
|
};
|
|
114453
114502
|
|
|
114454
114503
|
const defaultGetEvaluatorResultsHref = (evaluationId, evaluatorId)=>`#/evaluations/${evaluationId}/results/${evaluatorId}`;
|
|
@@ -114474,7 +114523,13 @@ const EvaluationDetailsDrawer = ({ evaluation, open, onClose, getEvaluatorResult
|
|
|
114474
114523
|
const normalizedEvaluation = React__default.useMemo(()=>evaluation && isEvaluationDto(evaluation) ? transformEvaluation(evaluation) : evaluation, [
|
|
114475
114524
|
evaluation
|
|
114476
114525
|
]);
|
|
114477
|
-
return /*#__PURE__*/ React__default.createElement(
|
|
114526
|
+
return /*#__PURE__*/ React__default.createElement(ConfigProvider, {
|
|
114527
|
+
theme: {
|
|
114528
|
+
token: {
|
|
114529
|
+
fontFamily: "inherit"
|
|
114530
|
+
}
|
|
114531
|
+
}
|
|
114532
|
+
}, /*#__PURE__*/ React__default.createElement(Drawer$2, {
|
|
114478
114533
|
title: normalizedEvaluation ? /*#__PURE__*/ React__default.createElement("div", {
|
|
114479
114534
|
style: {
|
|
114480
114535
|
display: "flex",
|
|
@@ -114679,11 +114734,17 @@ const EvaluationDetailsDrawer = ({ evaluation, open, onClose, getEvaluatorResult
|
|
|
114679
114734
|
getPopupContainer: ()=>document.body
|
|
114680
114735
|
}, tag) : tag;
|
|
114681
114736
|
})));
|
|
114682
|
-
})))));
|
|
114737
|
+
}))))));
|
|
114683
114738
|
};
|
|
114684
114739
|
|
|
114685
114740
|
const GITHUB_URL = "https://github.com/RailtownAI/railtracks/";
|
|
114686
|
-
const EvaluationsErrorCard = ({ error, onRetry })=>/*#__PURE__*/ React__default.createElement(
|
|
114741
|
+
const EvaluationsErrorCard = ({ error, onRetry })=>/*#__PURE__*/ React__default.createElement(ConfigProvider, {
|
|
114742
|
+
theme: {
|
|
114743
|
+
token: {
|
|
114744
|
+
fontFamily: "inherit"
|
|
114745
|
+
}
|
|
114746
|
+
}
|
|
114747
|
+
}, /*#__PURE__*/ React__default.createElement(Alert, {
|
|
114687
114748
|
type: "error",
|
|
114688
114749
|
title: "Error loading evaluations",
|
|
114689
114750
|
description: /*#__PURE__*/ React__default.createElement("div", {
|
|
@@ -114719,7 +114780,7 @@ const EvaluationsErrorCard = ({ error, onRetry })=>/*#__PURE__*/ React__default.
|
|
|
114719
114780
|
})
|
|
114720
114781
|
}, "Retry") : undefined,
|
|
114721
114782
|
showIcon: true
|
|
114722
|
-
});
|
|
114783
|
+
}));
|
|
114723
114784
|
|
|
114724
114785
|
const EvaluationsTable = ({ evaluations, loading = false, error = null, onRefresh, onRowClick, onCompare, compareIdsFromUrl, onCompareUrlChange, showFilters = true, showCompare = true, emptyMessage, title, pagination: serverPagination, onFiltersChange, onFetchEvaluationsByIds, errorRender })=>{
|
|
114725
114786
|
const { theme } = useTheme$1();
|
|
@@ -114971,7 +115032,13 @@ const EvaluationsTable = ({ evaluations, loading = false, error = null, onRefres
|
|
|
114971
115032
|
target: "_blank",
|
|
114972
115033
|
rel: "noopener noreferrer"
|
|
114973
115034
|
}, "view our documentation on how to create evaluations"), "."));
|
|
114974
|
-
return /*#__PURE__*/ React__default.createElement(
|
|
115035
|
+
return /*#__PURE__*/ React__default.createElement(ConfigProvider, {
|
|
115036
|
+
theme: {
|
|
115037
|
+
token: {
|
|
115038
|
+
fontFamily: "inherit"
|
|
115039
|
+
}
|
|
115040
|
+
}
|
|
115041
|
+
}, /*#__PURE__*/ React__default.createElement("div", {
|
|
114975
115042
|
style: {
|
|
114976
115043
|
display: "flex",
|
|
114977
115044
|
justifyContent: "space-between",
|
|
@@ -117039,12 +117106,17 @@ function isLLMNode(n) {
|
|
|
117039
117106
|
function isLLMAggregateNode(n) {
|
|
117040
117107
|
return isLLMInferenceAggregate(n);
|
|
117041
117108
|
}
|
|
117042
|
-
/**
|
|
117109
|
+
/** Normalize agent_node_ids to { nodeId, sessionId? }[] */ function normalizeAgentNodeEntries(agents) {
|
|
117043
117110
|
const map = new Map();
|
|
117044
117111
|
if (!agents) return map;
|
|
117045
117112
|
for (const a of agents){
|
|
117046
|
-
for (const
|
|
117047
|
-
|
|
117113
|
+
for (const entry of a.agent_node_ids ?? []){
|
|
117114
|
+
const nodeId = typeof entry === "string" ? entry : entry.agent_node_id;
|
|
117115
|
+
const sessionId = typeof entry === "string" ? undefined : entry.session_id;
|
|
117116
|
+
map.set(nodeId, {
|
|
117117
|
+
agentName: a.agent_name,
|
|
117118
|
+
sessionId
|
|
117119
|
+
});
|
|
117048
117120
|
}
|
|
117049
117121
|
}
|
|
117050
117122
|
return map;
|
|
@@ -117056,7 +117128,7 @@ function isLLMAggregateNode(n) {
|
|
|
117056
117128
|
*/ function buildLLMInferenceTreeFromAggregateResults(agg) {
|
|
117057
117129
|
const { roots, nodes, agents } = agg;
|
|
117058
117130
|
const nodeMap = nodes;
|
|
117059
|
-
const
|
|
117131
|
+
const agentNodeInfo = normalizeAgentNodeEntries(agents);
|
|
117060
117132
|
// Resolve root IDs to aggregate nodes
|
|
117061
117133
|
const rootAggregates = [];
|
|
117062
117134
|
for (const id of roots){
|
|
@@ -117089,14 +117161,17 @@ function isLLMAggregateNode(n) {
|
|
|
117089
117161
|
}
|
|
117090
117162
|
const sortedCalls = Array.from(llmCallIndices).sort((a, b)=>a - b);
|
|
117091
117163
|
const sortedMetrics = Array.from(metricNames).sort();
|
|
117092
|
-
// Group agent_node_ids by agent
|
|
117093
|
-
const
|
|
117094
|
-
for (const [nodeId,
|
|
117095
|
-
if (!
|
|
117096
|
-
|
|
117164
|
+
// Group agent_node_ids by agent: { nodeId, sessionId? }[]
|
|
117165
|
+
const agentToNodeEntries = new Map();
|
|
117166
|
+
for (const [nodeId, info] of agentNodeInfo){
|
|
117167
|
+
if (!agentToNodeEntries.has(info.agentName)) agentToNodeEntries.set(info.agentName, []);
|
|
117168
|
+
agentToNodeEntries.get(info.agentName).push({
|
|
117169
|
+
nodeId,
|
|
117170
|
+
sessionId: info.sessionId
|
|
117171
|
+
});
|
|
117097
117172
|
}
|
|
117098
117173
|
// If no agents provided, infer from data: collect unique agent_data_ids into "Agent (root)"
|
|
117099
|
-
if (
|
|
117174
|
+
if (agentToNodeEntries.size === 0) {
|
|
117100
117175
|
const seen = new Set();
|
|
117101
117176
|
for (const aggNode of rootAggregates){
|
|
117102
117177
|
for (const childId of aggNode.children ?? []){
|
|
@@ -117108,17 +117183,21 @@ function isLLMAggregateNode(n) {
|
|
|
117108
117183
|
}
|
|
117109
117184
|
}
|
|
117110
117185
|
if (seen.size > 0) {
|
|
117111
|
-
|
|
117186
|
+
agentToNodeEntries.set("Agent (root)", Array.from(seen).map((nodeId)=>({
|
|
117187
|
+
nodeId,
|
|
117188
|
+
sessionId: undefined
|
|
117189
|
+
})));
|
|
117112
117190
|
}
|
|
117113
117191
|
}
|
|
117114
117192
|
const rows = [];
|
|
117115
|
-
for (const [agentName,
|
|
117193
|
+
for (const [agentName, nodeEntries] of agentToNodeEntries){
|
|
117194
|
+
const nodeIds = nodeEntries.map((e)=>e.nodeId);
|
|
117116
117195
|
const agentKey = `agent-${agentName.replace(/\s+/g, "-")}`;
|
|
117117
117196
|
const llmCallChildren = [];
|
|
117118
117197
|
for (const callIdx of sortedCalls){
|
|
117119
117198
|
const callKey = `${agentKey}-call-${callIdx}`;
|
|
117120
117199
|
const nodeChildren = [];
|
|
117121
|
-
|
|
117200
|
+
nodeEntries.forEach(({ nodeId, sessionId })=>{
|
|
117122
117201
|
const metrics = {};
|
|
117123
117202
|
for (const m of sortedMetrics){
|
|
117124
117203
|
const k = `${nodeId}|${callIdx}|${m}`;
|
|
@@ -117134,6 +117213,7 @@ function isLLMAggregateNode(n) {
|
|
|
117134
117213
|
metrics,
|
|
117135
117214
|
value: metrics[primaryMetric],
|
|
117136
117215
|
agentNodeId: nodeId,
|
|
117216
|
+
sessionId,
|
|
117137
117217
|
children: undefined
|
|
117138
117218
|
});
|
|
117139
117219
|
}
|
|
@@ -117212,7 +117292,19 @@ const LATENCY_RESULT_PREFIXES = [
|
|
|
117212
117292
|
"Latency/",
|
|
117213
117293
|
"Runtime/"
|
|
117214
117294
|
];
|
|
117215
|
-
|
|
117295
|
+
/** Build map from agent_node_id -> session_id from agents (supports both formats) */ function buildAgentIdToSessionMap(agents) {
|
|
117296
|
+
const map = new Map();
|
|
117297
|
+
if (!agents) return map;
|
|
117298
|
+
for (const a of agents){
|
|
117299
|
+
for (const entry of a.agent_node_ids ?? []){
|
|
117300
|
+
const nodeId = typeof entry === "string" ? entry : entry.agent_node_id;
|
|
117301
|
+
const sessionId = typeof entry === "string" ? undefined : entry.session_id;
|
|
117302
|
+
if (sessionId) map.set(nodeId, sessionId);
|
|
117303
|
+
}
|
|
117304
|
+
}
|
|
117305
|
+
return map;
|
|
117306
|
+
}
|
|
117307
|
+
function buildTreeFromProcessedData(latencyByTool, failureRateByTool, usageCountByTool, latencyResults, failureResults, agentIdToSessionId) {
|
|
117216
117308
|
const toolNamesFromAgg = Array.from(new Set([
|
|
117217
117309
|
...latencyByTool.keys(),
|
|
117218
117310
|
...failureRateByTool.keys(),
|
|
@@ -117275,6 +117367,7 @@ function buildTreeFromProcessedData(latencyByTool, failureRateByTool, usageCount
|
|
|
117275
117367
|
const cnt = uniqueLatencies.length;
|
|
117276
117368
|
let failed = 0;
|
|
117277
117369
|
const failList = failureByToolAgentIndex.get(`${toolName}|${aid}`) ?? [];
|
|
117370
|
+
const sessionId = agentIdToSessionId.get(aid);
|
|
117278
117371
|
const leaves = uniqueLatencies.map((l, idx)=>{
|
|
117279
117372
|
const failVal = failureByKey.get(l.key);
|
|
117280
117373
|
const failValByIndex = failList[idx];
|
|
@@ -117282,12 +117375,15 @@ function buildTreeFromProcessedData(latencyByTool, failureRateByTool, usageCount
|
|
|
117282
117375
|
const isFailed = resolvedFail !== undefined && resolvedFail >= 0.5;
|
|
117283
117376
|
if (isFailed) failed++;
|
|
117284
117377
|
const leafName = l.toolNodeId ?? `invocation-${idx + 1}`;
|
|
117378
|
+
const nodeId = l.toolNodeId ?? undefined;
|
|
117285
117379
|
return {
|
|
117286
117380
|
key: `${toolName}-${aid}-${idx}`,
|
|
117287
117381
|
name: leafName,
|
|
117288
117382
|
runtimeMs: Math.round(l.value * 1000),
|
|
117289
117383
|
failureRate: isFailed ? "Failed" : "Success",
|
|
117290
117384
|
level: 3,
|
|
117385
|
+
sessionId: sessionId ?? undefined,
|
|
117386
|
+
nodeId,
|
|
117291
117387
|
children: undefined
|
|
117292
117388
|
};
|
|
117293
117389
|
});
|
|
@@ -117299,6 +117395,8 @@ function buildTreeFromProcessedData(latencyByTool, failureRateByTool, usageCount
|
|
|
117299
117395
|
runtimeMs: Math.round(totalRuntimeMs),
|
|
117300
117396
|
failureRate: agentFailurePct,
|
|
117301
117397
|
level: 2,
|
|
117398
|
+
sessionId: sessionId ?? undefined,
|
|
117399
|
+
nodeId: aid,
|
|
117302
117400
|
children: leaves.length > 0 ? leaves : undefined
|
|
117303
117401
|
});
|
|
117304
117402
|
}
|
|
@@ -117343,12 +117441,13 @@ function buildTreeFromProcessedData(latencyByTool, failureRateByTool, usageCount
|
|
|
117343
117441
|
}
|
|
117344
117442
|
const latencyResults = toolResults.filter((r)=>LATENCY_RESULT_PREFIXES.some((prefix)=>r.result_name?.startsWith(prefix)));
|
|
117345
117443
|
const failureResults = toolResults.filter((r)=>r.result_name?.startsWith("FailureRate/"));
|
|
117346
|
-
|
|
117444
|
+
const agentIdToSessionId = buildAgentIdToSessionMap(agg.agents);
|
|
117445
|
+
return buildTreeFromProcessedData(latencyByTool, failureRateByTool, usageCountByTool, latencyResults, failureResults, agentIdToSessionId);
|
|
117347
117446
|
}
|
|
117348
117447
|
/**
|
|
117349
117448
|
* Builds a tree from raw ToolUseEvaluator results (legacy flat format).
|
|
117350
117449
|
* Structure: tool (level 1) → agent (level 2) → invocation (level 3).
|
|
117351
|
-
*/ function buildToolUseTreeFromRawResults(rawResults) {
|
|
117450
|
+
*/ function buildToolUseTreeFromRawResults(rawResults, agents) {
|
|
117352
117451
|
const toolResults = rawResults.filter((r)=>isToolResult(r));
|
|
117353
117452
|
const toolAggregates = rawResults.filter((r)=>isToolAggregate(r));
|
|
117354
117453
|
const latencyByTool = new Map();
|
|
@@ -117370,7 +117469,8 @@ function buildTreeFromProcessedData(latencyByTool, failureRateByTool, usageCount
|
|
|
117370
117469
|
}
|
|
117371
117470
|
const latencyResults = toolResults.filter((r)=>r.result_name.startsWith("Latency/") || r.result_name.startsWith("Runtime/"));
|
|
117372
117471
|
const failureResults = toolResults.filter((r)=>r.result_name.startsWith("FailureRate/"));
|
|
117373
|
-
|
|
117472
|
+
const agentIdToSessionId = buildAgentIdToSessionMap(agents);
|
|
117473
|
+
return buildTreeFromProcessedData(latencyByTool, failureRateByTool, usageCountByTool, latencyResults, failureResults, agentIdToSessionId);
|
|
117374
117474
|
}
|
|
117375
117475
|
|
|
117376
117476
|
function detectAggregateType(nodes) {
|
|
@@ -117437,7 +117537,7 @@ const DEFAULT_TITLES = {
|
|
|
117437
117537
|
LLMInference: "LLM Inference Evaluator - Aggregate View",
|
|
117438
117538
|
Judge: "Judge Evaluator - Aggregate View"
|
|
117439
117539
|
};
|
|
117440
|
-
const AggregateResultsTable = ({ rawResults = [], aggregateResults, agents, title, evaluatorName })=>{
|
|
117540
|
+
const AggregateResultsTable = ({ rawResults = [], aggregateResults, agents, title, evaluatorName, onAgentNodeClick })=>{
|
|
117441
117541
|
const { theme } = useTheme$1();
|
|
117442
117542
|
const [expandedRowKeys, setExpandedRowKeys] = useState([]);
|
|
117443
117543
|
const { aggregateType, dataSource, llmIsTree, llmExpandableKeys } = useMemo(()=>{
|
|
@@ -117446,7 +117546,10 @@ const AggregateResultsTable = ({ rawResults = [], aggregateResults, agents, titl
|
|
|
117446
117546
|
if (type === "ToolUse") {
|
|
117447
117547
|
return {
|
|
117448
117548
|
aggregateType: type,
|
|
117449
|
-
dataSource: buildToolUseTreeFromAggregate(
|
|
117549
|
+
dataSource: buildToolUseTreeFromAggregate({
|
|
117550
|
+
...aggregateResults,
|
|
117551
|
+
agents
|
|
117552
|
+
}),
|
|
117450
117553
|
llmIsTree: false,
|
|
117451
117554
|
llmExpandableKeys: []
|
|
117452
117555
|
};
|
|
@@ -117488,7 +117591,7 @@ const AggregateResultsTable = ({ rawResults = [], aggregateResults, agents, titl
|
|
|
117488
117591
|
if (toolResults.length > 0) {
|
|
117489
117592
|
return {
|
|
117490
117593
|
aggregateType: "ToolUse",
|
|
117491
|
-
dataSource: buildToolUseTreeFromRawResults(toolResults),
|
|
117594
|
+
dataSource: buildToolUseTreeFromRawResults(toolResults, agents),
|
|
117492
117595
|
llmIsTree: false,
|
|
117493
117596
|
llmExpandableKeys: []
|
|
117494
117597
|
};
|
|
@@ -117596,17 +117699,37 @@ const AggregateResultsTable = ({ rawResults = [], aggregateResults, agents, titl
|
|
|
117596
117699
|
}), "Name"),
|
|
117597
117700
|
dataIndex: "name",
|
|
117598
117701
|
key: "name",
|
|
117599
|
-
render: (name, row)
|
|
117702
|
+
render: (name, row)=>{
|
|
117703
|
+
const isClickable = row.level === 3 && row.agentNodeId && onAgentNodeClick;
|
|
117704
|
+
return /*#__PURE__*/ React__default.createElement("span", {
|
|
117600
117705
|
style: {
|
|
117601
117706
|
display: "inline-flex",
|
|
117602
117707
|
alignItems: "center",
|
|
117603
117708
|
gap: 8,
|
|
117604
117709
|
fontFamily: "monospace",
|
|
117605
|
-
fontSize: 13
|
|
117606
|
-
|
|
117710
|
+
fontSize: 13,
|
|
117711
|
+
...isClickable && {
|
|
117712
|
+
cursor: "pointer",
|
|
117713
|
+
color: theme.colors.primary
|
|
117714
|
+
}
|
|
117715
|
+
},
|
|
117716
|
+
onClick: isClickable ? (e)=>{
|
|
117717
|
+
e.stopPropagation();
|
|
117718
|
+
onAgentNodeClick(row.sessionId, row.agentNodeId);
|
|
117719
|
+
} : undefined,
|
|
117720
|
+
onKeyDown: isClickable ? (e)=>{
|
|
117721
|
+
if (e.key === "Enter" || e.key === " ") {
|
|
117722
|
+
e.preventDefault();
|
|
117723
|
+
e.stopPropagation();
|
|
117724
|
+
onAgentNodeClick(row.sessionId, row.agentNodeId);
|
|
117725
|
+
}
|
|
117726
|
+
} : undefined,
|
|
117727
|
+
role: isClickable ? "button" : undefined,
|
|
117728
|
+
tabIndex: isClickable ? 0 : undefined
|
|
117607
117729
|
}, /*#__PURE__*/ React__default.createElement(LLMTagForLevel, {
|
|
117608
117730
|
level: row.level
|
|
117609
|
-
}), name)
|
|
117731
|
+
}), name);
|
|
117732
|
+
}
|
|
117610
117733
|
},
|
|
117611
117734
|
...METRIC_COLUMNS.map(({ key, title, Icon })=>({
|
|
117612
117735
|
title: /*#__PURE__*/ React__default.createElement("span", {
|
|
@@ -117936,17 +118059,37 @@ const AggregateResultsTable = ({ rawResults = [], aggregateResults, agents, titl
|
|
|
117936
118059
|
}), "Name"),
|
|
117937
118060
|
dataIndex: "name",
|
|
117938
118061
|
key: "name",
|
|
117939
|
-
render: (name, row)
|
|
118062
|
+
render: (name, row)=>{
|
|
118063
|
+
const isClickable = (row.level === 2 || row.level === 3) && row.nodeId && onAgentNodeClick;
|
|
118064
|
+
return /*#__PURE__*/ React__default.createElement("span", {
|
|
117940
118065
|
style: {
|
|
117941
118066
|
display: "inline-flex",
|
|
117942
118067
|
alignItems: "center",
|
|
117943
118068
|
gap: 8,
|
|
117944
118069
|
fontFamily: "monospace",
|
|
117945
|
-
fontSize: 13
|
|
117946
|
-
|
|
118070
|
+
fontSize: 13,
|
|
118071
|
+
...isClickable && {
|
|
118072
|
+
cursor: "pointer",
|
|
118073
|
+
color: theme.colors.primary
|
|
118074
|
+
}
|
|
118075
|
+
},
|
|
118076
|
+
onClick: isClickable ? (e)=>{
|
|
118077
|
+
e.stopPropagation();
|
|
118078
|
+
onAgentNodeClick(row.sessionId, row.nodeId);
|
|
118079
|
+
} : undefined,
|
|
118080
|
+
onKeyDown: isClickable ? (e)=>{
|
|
118081
|
+
if (e.key === "Enter" || e.key === " ") {
|
|
118082
|
+
e.preventDefault();
|
|
118083
|
+
e.stopPropagation();
|
|
118084
|
+
onAgentNodeClick(row.sessionId, row.nodeId);
|
|
118085
|
+
}
|
|
118086
|
+
} : undefined,
|
|
118087
|
+
role: isClickable ? "button" : undefined,
|
|
118088
|
+
tabIndex: isClickable ? 0 : undefined
|
|
117947
118089
|
}, /*#__PURE__*/ React__default.createElement(TagForLevel, {
|
|
117948
118090
|
level: row.level
|
|
117949
|
-
}), name)
|
|
118091
|
+
}), name);
|
|
118092
|
+
}
|
|
117950
118093
|
},
|
|
117951
118094
|
{
|
|
117952
118095
|
title: /*#__PURE__*/ React__default.createElement("span", {
|
|
@@ -118256,11 +118399,14 @@ function isCategoricalMetric(def) {
|
|
|
118256
118399
|
/** Derive agent display name from DTO. */ function deriveAgentName(evaluation) {
|
|
118257
118400
|
return evaluation.agents?.map((a)=>a.agent_name).join(", ") ?? "-";
|
|
118258
118401
|
}
|
|
118259
|
-
/** Derive runs count from DTO (agent_node_ids). */ function deriveRunsCount(evaluation) {
|
|
118260
|
-
return evaluation.agents?.
|
|
118402
|
+
/** Derive runs count from DTO (agent_node_ids). Supports both string[] and { session_id, agent_node_id }[]. */ function deriveRunsCount(evaluation) {
|
|
118403
|
+
return evaluation.agents?.reduce((sum, a)=>{
|
|
118404
|
+
const ids = a.agent_node_ids ?? [];
|
|
118405
|
+
return sum + ids.length;
|
|
118406
|
+
}, 0) ?? 0;
|
|
118261
118407
|
}
|
|
118262
118408
|
|
|
118263
|
-
const EvaluatorResult = ({ evaluation, evaluatorId, backHref })=>{
|
|
118409
|
+
const EvaluatorResult = ({ evaluation, evaluatorId, backHref, onAgentNodeClick })=>{
|
|
118264
118410
|
const { theme } = useTheme$1();
|
|
118265
118411
|
const labelColor = theme.colors.mutedForeground;
|
|
118266
118412
|
const derived = evaluation ? deriveEvaluatorResultData(evaluation, evaluatorId) : null;
|
|
@@ -118279,7 +118425,13 @@ const EvaluatorResult = ({ evaluation, evaluatorId, backHref })=>{
|
|
|
118279
118425
|
const { completed_at } = evaluation;
|
|
118280
118426
|
const agentName = deriveAgentName(evaluation);
|
|
118281
118427
|
const runsCount = deriveRunsCount(evaluation);
|
|
118282
|
-
return /*#__PURE__*/ React__default.createElement(
|
|
118428
|
+
return /*#__PURE__*/ React__default.createElement(ConfigProvider, {
|
|
118429
|
+
theme: {
|
|
118430
|
+
token: {
|
|
118431
|
+
fontFamily: "inherit"
|
|
118432
|
+
}
|
|
118433
|
+
}
|
|
118434
|
+
}, /*#__PURE__*/ React__default.createElement("div", {
|
|
118283
118435
|
style: {
|
|
118284
118436
|
width: "100%",
|
|
118285
118437
|
minWidth: 0
|
|
@@ -118396,8 +118548,9 @@ const EvaluatorResult = ({ evaluation, evaluatorId, backHref })=>{
|
|
|
118396
118548
|
rawResults: rawResults.filter((r)=>isToolResult(r) || isToolAggregate(r)),
|
|
118397
118549
|
aggregateResults: aggregateResults,
|
|
118398
118550
|
agents: evaluation.agents,
|
|
118399
|
-
evaluatorName: evaluatorName
|
|
118400
|
-
|
|
118551
|
+
evaluatorName: evaluatorName,
|
|
118552
|
+
onAgentNodeClick: onAgentNodeClick
|
|
118553
|
+
}))));
|
|
118401
118554
|
};
|
|
118402
118555
|
|
|
118403
118556
|
function __insertCSS(code) {
|
|
@@ -13,11 +13,16 @@ export interface AggregateResultsTableProps {
|
|
|
13
13
|
/** Agents (with agent_node_ids) for LLM inference tree Agent (root) labels. */
|
|
14
14
|
agents?: {
|
|
15
15
|
agent_name: string;
|
|
16
|
-
agent_node_ids?: string[]
|
|
16
|
+
agent_node_ids?: string[] | {
|
|
17
|
+
session_id: string;
|
|
18
|
+
agent_node_id: string;
|
|
19
|
+
}[];
|
|
17
20
|
}[];
|
|
18
21
|
/** Title shown above the table. */
|
|
19
22
|
title?: string;
|
|
20
23
|
/** Optional evaluator name hint (e.g. "ToolUseEvaluator") for default title. */
|
|
21
24
|
evaluatorName?: string;
|
|
25
|
+
/** Called when an Agent/Tool Node row is clicked. sessionId may be undefined when evaluation uses legacy agent_node_ids format. */
|
|
26
|
+
onAgentNodeClick?: (sessionId: string | undefined, nodeId: string) => void;
|
|
22
27
|
}
|
|
23
28
|
export declare const AggregateResultsTable: React.FC<AggregateResultsTableProps>;
|
|
@@ -56,7 +56,10 @@ export interface Evaluation {
|
|
|
56
56
|
/** Agents in the evaluation (name and node IDs for LLM inference tree) */
|
|
57
57
|
agents: {
|
|
58
58
|
agent_name: string;
|
|
59
|
-
agent_node_ids?: string[]
|
|
59
|
+
agent_node_ids?: string[] | {
|
|
60
|
+
session_id: string;
|
|
61
|
+
agent_node_id: string;
|
|
62
|
+
}[];
|
|
60
63
|
}[];
|
|
61
64
|
/** Number of agents in the evaluation */
|
|
62
65
|
agents_count: number;
|
|
@@ -2,10 +2,15 @@ import React from "react";
|
|
|
2
2
|
export interface EvaluatorResultPageProps {
|
|
3
3
|
/** Optional href for back link. Pass from the host app (e.g. "#/evaluations" for HashRouter). */
|
|
4
4
|
backHref?: string;
|
|
5
|
+
/**
|
|
6
|
+
* Optional handler for Agent/Tool node clicks. sessionId may be undefined when evaluation uses
|
|
7
|
+
* legacy agent_node_ids format. Default opens a SessionDetails drawer on this page.
|
|
8
|
+
*/
|
|
9
|
+
onAgentNodeClick?: (sessionId: string | undefined, nodeId: string) => void;
|
|
5
10
|
}
|
|
6
11
|
/**
|
|
7
12
|
* Page that resolves evaluationId and evaluatorId from the route,
|
|
8
13
|
* fetches evaluation data, and renders EvaluatorResult.
|
|
9
|
-
* backHref should be passed from the outer/host app for portability.
|
|
14
|
+
* backHref and onAgentNodeClick should be passed from the outer/host app for portability.
|
|
10
15
|
*/
|
|
11
16
|
export declare const EvaluatorResultPage: React.FC<EvaluatorResultPageProps>;
|
|
@@ -11,5 +11,10 @@ export interface EvaluatorResultProps {
|
|
|
11
11
|
evaluatorId: string;
|
|
12
12
|
/** Optional href for back link. When provided, a "Back to Evaluations" control is shown. */
|
|
13
13
|
backHref?: string;
|
|
14
|
+
/**
|
|
15
|
+
* Optional handler for Agent/Tool node clicks. sessionId may be undefined when evaluation
|
|
16
|
+
* uses legacy agent_node_ids format. When not provided, nodes use default (drawer).
|
|
17
|
+
*/
|
|
18
|
+
onAgentNodeClick?: (sessionId: string | undefined, nodeId: string) => void;
|
|
14
19
|
}
|
|
15
20
|
export declare const EvaluatorResult: React.FC<EvaluatorResultProps>;
|
|
@@ -5,6 +5,12 @@ interface SessionDetailsProps {
|
|
|
5
5
|
session: SessionListItem | null;
|
|
6
6
|
open: boolean;
|
|
7
7
|
onClose: () => void;
|
|
8
|
+
/** Optional initial node to select when opening (e.g. from evaluation link). */
|
|
9
|
+
initialNodeId?: string;
|
|
10
|
+
/** Optional run containing the node. When omitted, the run is found by searching session.runs. */
|
|
11
|
+
initialRunId?: string;
|
|
12
|
+
/** When true, show loading state instead of "No session data available". */
|
|
13
|
+
loading?: boolean;
|
|
8
14
|
}
|
|
9
15
|
export declare const InputsOutputsComponent: React.FC<{
|
|
10
16
|
run: AgentRun;
|
|
@@ -33,5 +33,5 @@ export interface EvaluatorResultDerived {
|
|
|
33
33
|
export declare function deriveEvaluatorResultData(evaluation: Evaluation, evaluatorId: string): EvaluatorResultDerived | null;
|
|
34
34
|
/** Derive agent display name from DTO. */
|
|
35
35
|
export declare function deriveAgentName(evaluation: Evaluation): string;
|
|
36
|
-
/** Derive runs count from DTO (agent_node_ids). */
|
|
36
|
+
/** Derive runs count from DTO (agent_node_ids). Supports both string[] and { session_id, agent_node_id }[]. */
|
|
37
37
|
export declare function deriveRunsCount(evaluation: Evaluation): number;
|
|
@@ -2,13 +2,17 @@
|
|
|
2
2
|
* Transforms LLMInferenceEvaluator aggregate_results (roots + nodes) into a tree
|
|
3
3
|
* structure for table rendering. Builds hierarchy: Agent (root) -> llm call -> agent node id.
|
|
4
4
|
*/
|
|
5
|
+
type AgentNodeIdEntry = string | {
|
|
6
|
+
session_id: string;
|
|
7
|
+
agent_node_id: string;
|
|
8
|
+
};
|
|
5
9
|
export type LLMAggregateResultsInput = {
|
|
6
10
|
roots: string[];
|
|
7
11
|
nodes: Record<string, unknown>;
|
|
8
|
-
/** Optional agents for Agent (root) labels; agent_node_ids
|
|
12
|
+
/** Optional agents for Agent (root) labels; agent_node_ids can be string[] or { session_id, agent_node_id }[] */
|
|
9
13
|
agents?: {
|
|
10
14
|
agent_name: string;
|
|
11
|
-
agent_node_ids?:
|
|
15
|
+
agent_node_ids?: AgentNodeIdEntry[];
|
|
12
16
|
}[];
|
|
13
17
|
};
|
|
14
18
|
export interface LLMInferenceTreeRow {
|
|
@@ -26,6 +30,8 @@ export interface LLMInferenceTreeRow {
|
|
|
26
30
|
value?: number;
|
|
27
31
|
/** Leaf agent node id */
|
|
28
32
|
agentNodeId?: string;
|
|
33
|
+
/** Session id for linking to session-details (when agent_node_ids use new format) */
|
|
34
|
+
sessionId?: string;
|
|
29
35
|
children?: LLMInferenceTreeRow[];
|
|
30
36
|
}
|
|
31
37
|
/**
|
|
@@ -34,3 +40,4 @@ export interface LLMInferenceTreeRow {
|
|
|
34
40
|
* Each row has metric values (InputTokens, OutputTokens, TotalCost, Latency) as columns.
|
|
35
41
|
*/
|
|
36
42
|
export declare function buildLLMInferenceTreeFromAggregateResults(agg: LLMAggregateResultsInput): LLMInferenceTreeRow[];
|
|
43
|
+
export {};
|
|
@@ -3,9 +3,18 @@
|
|
|
3
3
|
* into a tree structure for table rendering. Builds hierarchy: tool → agent → invocation.
|
|
4
4
|
*/
|
|
5
5
|
import type { EvaluationResultItem } from "../../dto/Evaluation";
|
|
6
|
+
type AgentNodeIdEntry = string | {
|
|
7
|
+
session_id: string;
|
|
8
|
+
agent_node_id: string;
|
|
9
|
+
};
|
|
6
10
|
export type ToolUseAggregateResultsInput = {
|
|
7
11
|
roots: string[];
|
|
8
12
|
nodes: Record<string, unknown>;
|
|
13
|
+
/** Optional agents for session deep-linking; agent_node_ids can be string[] or { session_id, agent_node_id }[] */
|
|
14
|
+
agents?: {
|
|
15
|
+
agent_name: string;
|
|
16
|
+
agent_node_ids?: AgentNodeIdEntry[];
|
|
17
|
+
}[];
|
|
9
18
|
};
|
|
10
19
|
export interface ToolUseTreeRow {
|
|
11
20
|
key: string;
|
|
@@ -14,6 +23,10 @@ export interface ToolUseTreeRow {
|
|
|
14
23
|
runtimeMs?: number;
|
|
15
24
|
failureRate?: string | "Success" | "Failed";
|
|
16
25
|
level: 1 | 2 | 3;
|
|
26
|
+
/** Session id for deep-linking to session drawer (level 2–3) */
|
|
27
|
+
sessionId?: string;
|
|
28
|
+
/** Node id to select in SessionTree (agent_node_id for level 2, tool_node_id for level 3) */
|
|
29
|
+
nodeId?: string;
|
|
17
30
|
children?: ToolUseTreeRow[];
|
|
18
31
|
}
|
|
19
32
|
/**
|
|
@@ -25,4 +38,8 @@ export declare function buildToolUseTreeFromAggregate(agg: ToolUseAggregateResul
|
|
|
25
38
|
* Builds a tree from raw ToolUseEvaluator results (legacy flat format).
|
|
26
39
|
* Structure: tool (level 1) → agent (level 2) → invocation (level 3).
|
|
27
40
|
*/
|
|
28
|
-
export declare function buildToolUseTreeFromRawResults(rawResults: EvaluationResultItem[]
|
|
41
|
+
export declare function buildToolUseTreeFromRawResults(rawResults: EvaluationResultItem[], agents?: {
|
|
42
|
+
agent_name: string;
|
|
43
|
+
agent_node_ids?: AgentNodeIdEntry[];
|
|
44
|
+
}[]): ToolUseTreeRow[];
|
|
45
|
+
export {};
|
|
@@ -141,10 +141,16 @@ export type EvaluationEvaluatorResult = {
|
|
|
141
141
|
/** Aggregate tree (roots + nodes with ToolAggregate, LLMInferenceAggregate, CategoricalAggregate) */
|
|
142
142
|
aggregate_results?: EvaluationAggregateResults;
|
|
143
143
|
};
|
|
144
|
+
/** New format: agent node with session context */
|
|
145
|
+
export type EvaluationAgentNodeEntry = {
|
|
146
|
+
session_id: string;
|
|
147
|
+
agent_node_id: string;
|
|
148
|
+
};
|
|
144
149
|
/** Agent entry in the evaluation (each has name and associated node IDs) */
|
|
145
150
|
export type EvaluationAgent = {
|
|
146
151
|
agent_name: string;
|
|
147
|
-
|
|
152
|
+
/** Legacy: flat list of node IDs. New: list of { session_id, agent_node_id } */
|
|
153
|
+
agent_node_ids: string[] | EvaluationAgentNodeEntry[];
|
|
148
154
|
};
|
|
149
155
|
/** Root evaluation document */
|
|
150
156
|
export type Evaluation = {
|