@railtownai/railtracks-visualizer 0.0.58 → 0.0.59
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/index.js +154 -43
- package/dist/esm/index.js +154 -43
- package/dist/types/agenthub/components/AggregateResultsTable.d.ts +6 -1
- package/dist/types/agenthub/pages/evaluations.types.d.ts +4 -1
- package/dist/types/agenthub/pages/evaluator-result-page.d.ts +6 -1
- package/dist/types/agenthub/pages/evaluator-result.d.ts +5 -0
- package/dist/types/agenthub/pages/session-details.d.ts +6 -0
- package/dist/types/agenthub/utils/evaluatorResultFromDto.d.ts +1 -1
- package/dist/types/agenthub/utils/llmInferenceAggregateTree.d.ts +9 -2
- package/dist/types/agenthub/utils/toolUseAggregateTree.d.ts +18 -1
- package/dist/types/dto/Evaluation.d.ts +7 -1
- package/package.json +1 -1
package/dist/cjs/index.js
CHANGED
|
@@ -113599,7 +113599,7 @@ const RunContent = ({ run, isDarkMode, showTimeline })=>{
|
|
|
113599
113599
|
height: "100%"
|
|
113600
113600
|
})));
|
|
113601
113601
|
};
|
|
113602
|
-
const SessionDetails = ({ session, open, onClose })=>{
|
|
113602
|
+
const SessionDetails = ({ session, open, onClose, initialNodeId, initialRunId, loading = false })=>{
|
|
113603
113603
|
const { isDarkMode, theme } = useTheme$1();
|
|
113604
113604
|
const [isFullScreen, setIsFullScreen] = React.useState(false);
|
|
113605
113605
|
const [selectedNodeKey, setSelectedNodeKey] = React.useState(null);
|
|
@@ -113652,8 +113652,7 @@ const SessionDetails = ({ session, open, onClose })=>{
|
|
|
113652
113652
|
}, [
|
|
113653
113653
|
open
|
|
113654
113654
|
]);
|
|
113655
|
-
// Default to first root node when session loads
|
|
113656
|
-
// Combined with session_id reset to avoid effect ordering overwriting the selection.
|
|
113655
|
+
// Default to first root node when session loads, or use initialNodeId/initialRunId when provided.
|
|
113657
113656
|
const hasAutoSelectedRef = React.useRef(false);
|
|
113658
113657
|
React.useEffect(()=>{
|
|
113659
113658
|
if (!open) {
|
|
@@ -113662,6 +113661,19 @@ const SessionDetails = ({ session, open, onClose })=>{
|
|
|
113662
113661
|
}
|
|
113663
113662
|
if (!session?.runs?.length) return;
|
|
113664
113663
|
if (hasAutoSelectedRef.current && selectedNodeInfo !== null) return;
|
|
113664
|
+
// Prefer initial node when provided (e.g. from evaluation link)
|
|
113665
|
+
if (initialNodeId) {
|
|
113666
|
+
const run = initialRunId ? session.runs.find((r)=>r.run_id === initialRunId) : session.runs.find((r)=>r.nodes?.some((n)=>n.identifier === initialNodeId));
|
|
113667
|
+
if (run?.run_id && getNodeFromRun(initialNodeId, run)) {
|
|
113668
|
+
handleSelectNode({
|
|
113669
|
+
nodeId: initialNodeId,
|
|
113670
|
+
runId: run.run_id
|
|
113671
|
+
});
|
|
113672
|
+
hasAutoSelectedRef.current = true;
|
|
113673
|
+
return;
|
|
113674
|
+
}
|
|
113675
|
+
}
|
|
113676
|
+
// Fallback: first root node
|
|
113665
113677
|
for (const run of session.runs){
|
|
113666
113678
|
const firstRoot = getFirstRootNodeFromRun(run);
|
|
113667
113679
|
if (firstRoot && run.run_id) {
|
|
@@ -113678,7 +113690,9 @@ const SessionDetails = ({ session, open, onClose })=>{
|
|
|
113678
113690
|
}, [
|
|
113679
113691
|
open,
|
|
113680
113692
|
session,
|
|
113681
|
-
selectedNodeInfo
|
|
113693
|
+
selectedNodeInfo,
|
|
113694
|
+
initialNodeId,
|
|
113695
|
+
initialRunId
|
|
113682
113696
|
]);
|
|
113683
113697
|
// Reset hasAutoSelectedRef when session changes so auto-select runs for the new session
|
|
113684
113698
|
React.useEffect(()=>{
|
|
@@ -113771,7 +113785,18 @@ const SessionDetails = ({ session, open, onClose })=>{
|
|
|
113771
113785
|
flexDirection: "column"
|
|
113772
113786
|
}
|
|
113773
113787
|
}
|
|
113774
|
-
},
|
|
113788
|
+
}, loading ? /*#__PURE__*/ React.createElement("div", {
|
|
113789
|
+
style: {
|
|
113790
|
+
display: "flex",
|
|
113791
|
+
alignItems: "center",
|
|
113792
|
+
justifyContent: "center",
|
|
113793
|
+
height: "100%",
|
|
113794
|
+
color: isDarkMode ? "#8c8c8c" : "#595959"
|
|
113795
|
+
}
|
|
113796
|
+
}, /*#__PURE__*/ React.createElement(Spin, {
|
|
113797
|
+
size: "large",
|
|
113798
|
+
tip: "Loading session..."
|
|
113799
|
+
})) : !session ? /*#__PURE__*/ React.createElement("div", {
|
|
113775
113800
|
style: {
|
|
113776
113801
|
display: "flex",
|
|
113777
113802
|
alignItems: "center",
|
|
@@ -114030,14 +114055,20 @@ function dtoMetricToMetric(def) {
|
|
|
114030
114055
|
};
|
|
114031
114056
|
}
|
|
114032
114057
|
}
|
|
114033
|
-
// Build runs from agents.agent_node_ids
|
|
114034
|
-
const
|
|
114035
|
-
|
|
114036
|
-
|
|
114037
|
-
|
|
114058
|
+
// Build runs from agents.agent_node_ids (support legacy string[] and new { session_id, agent_node_id }[])
|
|
114059
|
+
const agentNodeEntries = evalDto.agents.flatMap((a)=>{
|
|
114060
|
+
const ids = a.agent_node_ids ?? [];
|
|
114061
|
+
return ids.map((entry)=>typeof entry === "string" ? {
|
|
114062
|
+
session_id: "",
|
|
114063
|
+
agent_node_id: entry
|
|
114064
|
+
} : entry);
|
|
114065
|
+
});
|
|
114066
|
+
const runs = agentNodeEntries.map(({ session_id, agent_node_id })=>({
|
|
114067
|
+
session_id,
|
|
114068
|
+
run_id: agent_node_id,
|
|
114038
114069
|
results: {}
|
|
114039
114070
|
}));
|
|
114040
|
-
// Agent name(s) for display - comma-separated when multiple;
|
|
114071
|
+
// Agent name(s) for display - comma-separated when multiple; pass through agent_node_ids for LLM tree (both formats)
|
|
114041
114072
|
const agents = evalDto.agents.map((a)=>({
|
|
114042
114073
|
agent_name: a.agent_name,
|
|
114043
114074
|
agent_node_ids: a.agent_node_ids
|
|
@@ -117095,12 +117126,17 @@ function isLLMNode(n) {
|
|
|
117095
117126
|
function isLLMAggregateNode(n) {
|
|
117096
117127
|
return isLLMInferenceAggregate(n);
|
|
117097
117128
|
}
|
|
117098
|
-
/**
|
|
117129
|
+
/** Normalize agent_node_ids to { nodeId, sessionId? }[] */ function normalizeAgentNodeEntries(agents) {
|
|
117099
117130
|
const map = new Map();
|
|
117100
117131
|
if (!agents) return map;
|
|
117101
117132
|
for (const a of agents){
|
|
117102
|
-
for (const
|
|
117103
|
-
|
|
117133
|
+
for (const entry of a.agent_node_ids ?? []){
|
|
117134
|
+
const nodeId = typeof entry === "string" ? entry : entry.agent_node_id;
|
|
117135
|
+
const sessionId = typeof entry === "string" ? undefined : entry.session_id;
|
|
117136
|
+
map.set(nodeId, {
|
|
117137
|
+
agentName: a.agent_name,
|
|
117138
|
+
sessionId
|
|
117139
|
+
});
|
|
117104
117140
|
}
|
|
117105
117141
|
}
|
|
117106
117142
|
return map;
|
|
@@ -117112,7 +117148,7 @@ function isLLMAggregateNode(n) {
|
|
|
117112
117148
|
*/ function buildLLMInferenceTreeFromAggregateResults(agg) {
|
|
117113
117149
|
const { roots, nodes, agents } = agg;
|
|
117114
117150
|
const nodeMap = nodes;
|
|
117115
|
-
const
|
|
117151
|
+
const agentNodeInfo = normalizeAgentNodeEntries(agents);
|
|
117116
117152
|
// Resolve root IDs to aggregate nodes
|
|
117117
117153
|
const rootAggregates = [];
|
|
117118
117154
|
for (const id of roots){
|
|
@@ -117145,14 +117181,17 @@ function isLLMAggregateNode(n) {
|
|
|
117145
117181
|
}
|
|
117146
117182
|
const sortedCalls = Array.from(llmCallIndices).sort((a, b)=>a - b);
|
|
117147
117183
|
const sortedMetrics = Array.from(metricNames).sort();
|
|
117148
|
-
// Group agent_node_ids by agent
|
|
117149
|
-
const
|
|
117150
|
-
for (const [nodeId,
|
|
117151
|
-
if (!
|
|
117152
|
-
|
|
117184
|
+
// Group agent_node_ids by agent: { nodeId, sessionId? }[]
|
|
117185
|
+
const agentToNodeEntries = new Map();
|
|
117186
|
+
for (const [nodeId, info] of agentNodeInfo){
|
|
117187
|
+
if (!agentToNodeEntries.has(info.agentName)) agentToNodeEntries.set(info.agentName, []);
|
|
117188
|
+
agentToNodeEntries.get(info.agentName).push({
|
|
117189
|
+
nodeId,
|
|
117190
|
+
sessionId: info.sessionId
|
|
117191
|
+
});
|
|
117153
117192
|
}
|
|
117154
117193
|
// If no agents provided, infer from data: collect unique agent_data_ids into "Agent (root)"
|
|
117155
|
-
if (
|
|
117194
|
+
if (agentToNodeEntries.size === 0) {
|
|
117156
117195
|
const seen = new Set();
|
|
117157
117196
|
for (const aggNode of rootAggregates){
|
|
117158
117197
|
for (const childId of aggNode.children ?? []){
|
|
@@ -117164,17 +117203,21 @@ function isLLMAggregateNode(n) {
|
|
|
117164
117203
|
}
|
|
117165
117204
|
}
|
|
117166
117205
|
if (seen.size > 0) {
|
|
117167
|
-
|
|
117206
|
+
agentToNodeEntries.set("Agent (root)", Array.from(seen).map((nodeId)=>({
|
|
117207
|
+
nodeId,
|
|
117208
|
+
sessionId: undefined
|
|
117209
|
+
})));
|
|
117168
117210
|
}
|
|
117169
117211
|
}
|
|
117170
117212
|
const rows = [];
|
|
117171
|
-
for (const [agentName,
|
|
117213
|
+
for (const [agentName, nodeEntries] of agentToNodeEntries){
|
|
117214
|
+
const nodeIds = nodeEntries.map((e)=>e.nodeId);
|
|
117172
117215
|
const agentKey = `agent-${agentName.replace(/\s+/g, "-")}`;
|
|
117173
117216
|
const llmCallChildren = [];
|
|
117174
117217
|
for (const callIdx of sortedCalls){
|
|
117175
117218
|
const callKey = `${agentKey}-call-${callIdx}`;
|
|
117176
117219
|
const nodeChildren = [];
|
|
117177
|
-
|
|
117220
|
+
nodeEntries.forEach(({ nodeId, sessionId })=>{
|
|
117178
117221
|
const metrics = {};
|
|
117179
117222
|
for (const m of sortedMetrics){
|
|
117180
117223
|
const k = `${nodeId}|${callIdx}|${m}`;
|
|
@@ -117190,6 +117233,7 @@ function isLLMAggregateNode(n) {
|
|
|
117190
117233
|
metrics,
|
|
117191
117234
|
value: metrics[primaryMetric],
|
|
117192
117235
|
agentNodeId: nodeId,
|
|
117236
|
+
sessionId,
|
|
117193
117237
|
children: undefined
|
|
117194
117238
|
});
|
|
117195
117239
|
}
|
|
@@ -117268,7 +117312,19 @@ const LATENCY_RESULT_PREFIXES = [
|
|
|
117268
117312
|
"Latency/",
|
|
117269
117313
|
"Runtime/"
|
|
117270
117314
|
];
|
|
117271
|
-
|
|
117315
|
+
/** Build map from agent_node_id -> session_id from agents (supports both formats) */ function buildAgentIdToSessionMap(agents) {
|
|
117316
|
+
const map = new Map();
|
|
117317
|
+
if (!agents) return map;
|
|
117318
|
+
for (const a of agents){
|
|
117319
|
+
for (const entry of a.agent_node_ids ?? []){
|
|
117320
|
+
const nodeId = typeof entry === "string" ? entry : entry.agent_node_id;
|
|
117321
|
+
const sessionId = typeof entry === "string" ? undefined : entry.session_id;
|
|
117322
|
+
if (sessionId) map.set(nodeId, sessionId);
|
|
117323
|
+
}
|
|
117324
|
+
}
|
|
117325
|
+
return map;
|
|
117326
|
+
}
|
|
117327
|
+
function buildTreeFromProcessedData(latencyByTool, failureRateByTool, usageCountByTool, latencyResults, failureResults, agentIdToSessionId) {
|
|
117272
117328
|
const toolNamesFromAgg = Array.from(new Set([
|
|
117273
117329
|
...latencyByTool.keys(),
|
|
117274
117330
|
...failureRateByTool.keys(),
|
|
@@ -117331,6 +117387,7 @@ function buildTreeFromProcessedData(latencyByTool, failureRateByTool, usageCount
|
|
|
117331
117387
|
const cnt = uniqueLatencies.length;
|
|
117332
117388
|
let failed = 0;
|
|
117333
117389
|
const failList = failureByToolAgentIndex.get(`${toolName}|${aid}`) ?? [];
|
|
117390
|
+
const sessionId = agentIdToSessionId.get(aid);
|
|
117334
117391
|
const leaves = uniqueLatencies.map((l, idx)=>{
|
|
117335
117392
|
const failVal = failureByKey.get(l.key);
|
|
117336
117393
|
const failValByIndex = failList[idx];
|
|
@@ -117338,12 +117395,15 @@ function buildTreeFromProcessedData(latencyByTool, failureRateByTool, usageCount
|
|
|
117338
117395
|
const isFailed = resolvedFail !== undefined && resolvedFail >= 0.5;
|
|
117339
117396
|
if (isFailed) failed++;
|
|
117340
117397
|
const leafName = l.toolNodeId ?? `invocation-${idx + 1}`;
|
|
117398
|
+
const nodeId = l.toolNodeId ?? undefined;
|
|
117341
117399
|
return {
|
|
117342
117400
|
key: `${toolName}-${aid}-${idx}`,
|
|
117343
117401
|
name: leafName,
|
|
117344
117402
|
runtimeMs: Math.round(l.value * 1000),
|
|
117345
117403
|
failureRate: isFailed ? "Failed" : "Success",
|
|
117346
117404
|
level: 3,
|
|
117405
|
+
sessionId: sessionId ?? undefined,
|
|
117406
|
+
nodeId,
|
|
117347
117407
|
children: undefined
|
|
117348
117408
|
};
|
|
117349
117409
|
});
|
|
@@ -117355,6 +117415,8 @@ function buildTreeFromProcessedData(latencyByTool, failureRateByTool, usageCount
|
|
|
117355
117415
|
runtimeMs: Math.round(totalRuntimeMs),
|
|
117356
117416
|
failureRate: agentFailurePct,
|
|
117357
117417
|
level: 2,
|
|
117418
|
+
sessionId: sessionId ?? undefined,
|
|
117419
|
+
nodeId: aid,
|
|
117358
117420
|
children: leaves.length > 0 ? leaves : undefined
|
|
117359
117421
|
});
|
|
117360
117422
|
}
|
|
@@ -117399,12 +117461,13 @@ function buildTreeFromProcessedData(latencyByTool, failureRateByTool, usageCount
|
|
|
117399
117461
|
}
|
|
117400
117462
|
const latencyResults = toolResults.filter((r)=>LATENCY_RESULT_PREFIXES.some((prefix)=>r.result_name?.startsWith(prefix)));
|
|
117401
117463
|
const failureResults = toolResults.filter((r)=>r.result_name?.startsWith("FailureRate/"));
|
|
117402
|
-
|
|
117464
|
+
const agentIdToSessionId = buildAgentIdToSessionMap(agg.agents);
|
|
117465
|
+
return buildTreeFromProcessedData(latencyByTool, failureRateByTool, usageCountByTool, latencyResults, failureResults, agentIdToSessionId);
|
|
117403
117466
|
}
|
|
117404
117467
|
/**
|
|
117405
117468
|
* Builds a tree from raw ToolUseEvaluator results (legacy flat format).
|
|
117406
117469
|
* Structure: tool (level 1) → agent (level 2) → invocation (level 3).
|
|
117407
|
-
*/ function buildToolUseTreeFromRawResults(rawResults) {
|
|
117470
|
+
*/ function buildToolUseTreeFromRawResults(rawResults, agents) {
|
|
117408
117471
|
const toolResults = rawResults.filter((r)=>isToolResult(r));
|
|
117409
117472
|
const toolAggregates = rawResults.filter((r)=>isToolAggregate(r));
|
|
117410
117473
|
const latencyByTool = new Map();
|
|
@@ -117426,7 +117489,8 @@ function buildTreeFromProcessedData(latencyByTool, failureRateByTool, usageCount
|
|
|
117426
117489
|
}
|
|
117427
117490
|
const latencyResults = toolResults.filter((r)=>r.result_name.startsWith("Latency/") || r.result_name.startsWith("Runtime/"));
|
|
117428
117491
|
const failureResults = toolResults.filter((r)=>r.result_name.startsWith("FailureRate/"));
|
|
117429
|
-
|
|
117492
|
+
const agentIdToSessionId = buildAgentIdToSessionMap(agents);
|
|
117493
|
+
return buildTreeFromProcessedData(latencyByTool, failureRateByTool, usageCountByTool, latencyResults, failureResults, agentIdToSessionId);
|
|
117430
117494
|
}
|
|
117431
117495
|
|
|
117432
117496
|
function detectAggregateType(nodes) {
|
|
@@ -117493,7 +117557,7 @@ const DEFAULT_TITLES = {
|
|
|
117493
117557
|
LLMInference: "LLM Inference Evaluator - Aggregate View",
|
|
117494
117558
|
Judge: "Judge Evaluator - Aggregate View"
|
|
117495
117559
|
};
|
|
117496
|
-
const AggregateResultsTable = ({ rawResults = [], aggregateResults, agents, title, evaluatorName })=>{
|
|
117560
|
+
const AggregateResultsTable = ({ rawResults = [], aggregateResults, agents, title, evaluatorName, onAgentNodeClick })=>{
|
|
117497
117561
|
const { theme } = useTheme$1();
|
|
117498
117562
|
const [expandedRowKeys, setExpandedRowKeys] = React.useState([]);
|
|
117499
117563
|
const { aggregateType, dataSource, llmIsTree, llmExpandableKeys } = React.useMemo(()=>{
|
|
@@ -117502,7 +117566,10 @@ const AggregateResultsTable = ({ rawResults = [], aggregateResults, agents, titl
|
|
|
117502
117566
|
if (type === "ToolUse") {
|
|
117503
117567
|
return {
|
|
117504
117568
|
aggregateType: type,
|
|
117505
|
-
dataSource: buildToolUseTreeFromAggregate(
|
|
117569
|
+
dataSource: buildToolUseTreeFromAggregate({
|
|
117570
|
+
...aggregateResults,
|
|
117571
|
+
agents
|
|
117572
|
+
}),
|
|
117506
117573
|
llmIsTree: false,
|
|
117507
117574
|
llmExpandableKeys: []
|
|
117508
117575
|
};
|
|
@@ -117544,7 +117611,7 @@ const AggregateResultsTable = ({ rawResults = [], aggregateResults, agents, titl
|
|
|
117544
117611
|
if (toolResults.length > 0) {
|
|
117545
117612
|
return {
|
|
117546
117613
|
aggregateType: "ToolUse",
|
|
117547
|
-
dataSource: buildToolUseTreeFromRawResults(toolResults),
|
|
117614
|
+
dataSource: buildToolUseTreeFromRawResults(toolResults, agents),
|
|
117548
117615
|
llmIsTree: false,
|
|
117549
117616
|
llmExpandableKeys: []
|
|
117550
117617
|
};
|
|
@@ -117652,17 +117719,37 @@ const AggregateResultsTable = ({ rawResults = [], aggregateResults, agents, titl
|
|
|
117652
117719
|
}), "Name"),
|
|
117653
117720
|
dataIndex: "name",
|
|
117654
117721
|
key: "name",
|
|
117655
|
-
render: (name, row)
|
|
117722
|
+
render: (name, row)=>{
|
|
117723
|
+
const isClickable = row.level === 3 && row.agentNodeId && onAgentNodeClick;
|
|
117724
|
+
return /*#__PURE__*/ React.createElement("span", {
|
|
117656
117725
|
style: {
|
|
117657
117726
|
display: "inline-flex",
|
|
117658
117727
|
alignItems: "center",
|
|
117659
117728
|
gap: 8,
|
|
117660
117729
|
fontFamily: "monospace",
|
|
117661
|
-
fontSize: 13
|
|
117662
|
-
|
|
117730
|
+
fontSize: 13,
|
|
117731
|
+
...isClickable && {
|
|
117732
|
+
cursor: "pointer",
|
|
117733
|
+
color: theme.colors.primary
|
|
117734
|
+
}
|
|
117735
|
+
},
|
|
117736
|
+
onClick: isClickable ? (e)=>{
|
|
117737
|
+
e.stopPropagation();
|
|
117738
|
+
onAgentNodeClick(row.sessionId, row.agentNodeId);
|
|
117739
|
+
} : undefined,
|
|
117740
|
+
onKeyDown: isClickable ? (e)=>{
|
|
117741
|
+
if (e.key === "Enter" || e.key === " ") {
|
|
117742
|
+
e.preventDefault();
|
|
117743
|
+
e.stopPropagation();
|
|
117744
|
+
onAgentNodeClick(row.sessionId, row.agentNodeId);
|
|
117745
|
+
}
|
|
117746
|
+
} : undefined,
|
|
117747
|
+
role: isClickable ? "button" : undefined,
|
|
117748
|
+
tabIndex: isClickable ? 0 : undefined
|
|
117663
117749
|
}, /*#__PURE__*/ React.createElement(LLMTagForLevel, {
|
|
117664
117750
|
level: row.level
|
|
117665
|
-
}), name)
|
|
117751
|
+
}), name);
|
|
117752
|
+
}
|
|
117666
117753
|
},
|
|
117667
117754
|
...METRIC_COLUMNS.map(({ key, title, Icon })=>({
|
|
117668
117755
|
title: /*#__PURE__*/ React.createElement("span", {
|
|
@@ -117992,17 +118079,37 @@ const AggregateResultsTable = ({ rawResults = [], aggregateResults, agents, titl
|
|
|
117992
118079
|
}), "Name"),
|
|
117993
118080
|
dataIndex: "name",
|
|
117994
118081
|
key: "name",
|
|
117995
|
-
render: (name, row)
|
|
118082
|
+
render: (name, row)=>{
|
|
118083
|
+
const isClickable = (row.level === 2 || row.level === 3) && row.nodeId && onAgentNodeClick;
|
|
118084
|
+
return /*#__PURE__*/ React.createElement("span", {
|
|
117996
118085
|
style: {
|
|
117997
118086
|
display: "inline-flex",
|
|
117998
118087
|
alignItems: "center",
|
|
117999
118088
|
gap: 8,
|
|
118000
118089
|
fontFamily: "monospace",
|
|
118001
|
-
fontSize: 13
|
|
118002
|
-
|
|
118090
|
+
fontSize: 13,
|
|
118091
|
+
...isClickable && {
|
|
118092
|
+
cursor: "pointer",
|
|
118093
|
+
color: theme.colors.primary
|
|
118094
|
+
}
|
|
118095
|
+
},
|
|
118096
|
+
onClick: isClickable ? (e)=>{
|
|
118097
|
+
e.stopPropagation();
|
|
118098
|
+
onAgentNodeClick(row.sessionId, row.nodeId);
|
|
118099
|
+
} : undefined,
|
|
118100
|
+
onKeyDown: isClickable ? (e)=>{
|
|
118101
|
+
if (e.key === "Enter" || e.key === " ") {
|
|
118102
|
+
e.preventDefault();
|
|
118103
|
+
e.stopPropagation();
|
|
118104
|
+
onAgentNodeClick(row.sessionId, row.nodeId);
|
|
118105
|
+
}
|
|
118106
|
+
} : undefined,
|
|
118107
|
+
role: isClickable ? "button" : undefined,
|
|
118108
|
+
tabIndex: isClickable ? 0 : undefined
|
|
118003
118109
|
}, /*#__PURE__*/ React.createElement(TagForLevel, {
|
|
118004
118110
|
level: row.level
|
|
118005
|
-
}), name)
|
|
118111
|
+
}), name);
|
|
118112
|
+
}
|
|
118006
118113
|
},
|
|
118007
118114
|
{
|
|
118008
118115
|
title: /*#__PURE__*/ React.createElement("span", {
|
|
@@ -118312,11 +118419,14 @@ function isCategoricalMetric(def) {
|
|
|
118312
118419
|
/** Derive agent display name from DTO. */ function deriveAgentName(evaluation) {
|
|
118313
118420
|
return evaluation.agents?.map((a)=>a.agent_name).join(", ") ?? "-";
|
|
118314
118421
|
}
|
|
118315
|
-
/** Derive runs count from DTO (agent_node_ids). */ function deriveRunsCount(evaluation) {
|
|
118316
|
-
return evaluation.agents?.
|
|
118422
|
+
/** Derive runs count from DTO (agent_node_ids). Supports both string[] and { session_id, agent_node_id }[]. */ function deriveRunsCount(evaluation) {
|
|
118423
|
+
return evaluation.agents?.reduce((sum, a)=>{
|
|
118424
|
+
const ids = a.agent_node_ids ?? [];
|
|
118425
|
+
return sum + ids.length;
|
|
118426
|
+
}, 0) ?? 0;
|
|
118317
118427
|
}
|
|
118318
118428
|
|
|
118319
|
-
const EvaluatorResult = ({ evaluation, evaluatorId, backHref })=>{
|
|
118429
|
+
const EvaluatorResult = ({ evaluation, evaluatorId, backHref, onAgentNodeClick })=>{
|
|
118320
118430
|
const { theme } = useTheme$1();
|
|
118321
118431
|
const labelColor = theme.colors.mutedForeground;
|
|
118322
118432
|
const derived = evaluation ? deriveEvaluatorResultData(evaluation, evaluatorId) : null;
|
|
@@ -118458,7 +118568,8 @@ const EvaluatorResult = ({ evaluation, evaluatorId, backHref })=>{
|
|
|
118458
118568
|
rawResults: rawResults.filter((r)=>isToolResult(r) || isToolAggregate(r)),
|
|
118459
118569
|
aggregateResults: aggregateResults,
|
|
118460
118570
|
agents: evaluation.agents,
|
|
118461
|
-
evaluatorName: evaluatorName
|
|
118571
|
+
evaluatorName: evaluatorName,
|
|
118572
|
+
onAgentNodeClick: onAgentNodeClick
|
|
118462
118573
|
}))));
|
|
118463
118574
|
};
|
|
118464
118575
|
|
package/dist/esm/index.js
CHANGED
|
@@ -113579,7 +113579,7 @@ const RunContent = ({ run, isDarkMode, showTimeline })=>{
|
|
|
113579
113579
|
height: "100%"
|
|
113580
113580
|
})));
|
|
113581
113581
|
};
|
|
113582
|
-
const SessionDetails = ({ session, open, onClose })=>{
|
|
113582
|
+
const SessionDetails = ({ session, open, onClose, initialNodeId, initialRunId, loading = false })=>{
|
|
113583
113583
|
const { isDarkMode, theme } = useTheme$1();
|
|
113584
113584
|
const [isFullScreen, setIsFullScreen] = useState(false);
|
|
113585
113585
|
const [selectedNodeKey, setSelectedNodeKey] = useState(null);
|
|
@@ -113632,8 +113632,7 @@ const SessionDetails = ({ session, open, onClose })=>{
|
|
|
113632
113632
|
}, [
|
|
113633
113633
|
open
|
|
113634
113634
|
]);
|
|
113635
|
-
// Default to first root node when session loads
|
|
113636
|
-
// Combined with session_id reset to avoid effect ordering overwriting the selection.
|
|
113635
|
+
// Default to first root node when session loads, or use initialNodeId/initialRunId when provided.
|
|
113637
113636
|
const hasAutoSelectedRef = React__default.useRef(false);
|
|
113638
113637
|
React__default.useEffect(()=>{
|
|
113639
113638
|
if (!open) {
|
|
@@ -113642,6 +113641,19 @@ const SessionDetails = ({ session, open, onClose })=>{
|
|
|
113642
113641
|
}
|
|
113643
113642
|
if (!session?.runs?.length) return;
|
|
113644
113643
|
if (hasAutoSelectedRef.current && selectedNodeInfo !== null) return;
|
|
113644
|
+
// Prefer initial node when provided (e.g. from evaluation link)
|
|
113645
|
+
if (initialNodeId) {
|
|
113646
|
+
const run = initialRunId ? session.runs.find((r)=>r.run_id === initialRunId) : session.runs.find((r)=>r.nodes?.some((n)=>n.identifier === initialNodeId));
|
|
113647
|
+
if (run?.run_id && getNodeFromRun(initialNodeId, run)) {
|
|
113648
|
+
handleSelectNode({
|
|
113649
|
+
nodeId: initialNodeId,
|
|
113650
|
+
runId: run.run_id
|
|
113651
|
+
});
|
|
113652
|
+
hasAutoSelectedRef.current = true;
|
|
113653
|
+
return;
|
|
113654
|
+
}
|
|
113655
|
+
}
|
|
113656
|
+
// Fallback: first root node
|
|
113645
113657
|
for (const run of session.runs){
|
|
113646
113658
|
const firstRoot = getFirstRootNodeFromRun(run);
|
|
113647
113659
|
if (firstRoot && run.run_id) {
|
|
@@ -113658,7 +113670,9 @@ const SessionDetails = ({ session, open, onClose })=>{
|
|
|
113658
113670
|
}, [
|
|
113659
113671
|
open,
|
|
113660
113672
|
session,
|
|
113661
|
-
selectedNodeInfo
|
|
113673
|
+
selectedNodeInfo,
|
|
113674
|
+
initialNodeId,
|
|
113675
|
+
initialRunId
|
|
113662
113676
|
]);
|
|
113663
113677
|
// Reset hasAutoSelectedRef when session changes so auto-select runs for the new session
|
|
113664
113678
|
React__default.useEffect(()=>{
|
|
@@ -113751,7 +113765,18 @@ const SessionDetails = ({ session, open, onClose })=>{
|
|
|
113751
113765
|
flexDirection: "column"
|
|
113752
113766
|
}
|
|
113753
113767
|
}
|
|
113754
|
-
},
|
|
113768
|
+
}, loading ? /*#__PURE__*/ React__default.createElement("div", {
|
|
113769
|
+
style: {
|
|
113770
|
+
display: "flex",
|
|
113771
|
+
alignItems: "center",
|
|
113772
|
+
justifyContent: "center",
|
|
113773
|
+
height: "100%",
|
|
113774
|
+
color: isDarkMode ? "#8c8c8c" : "#595959"
|
|
113775
|
+
}
|
|
113776
|
+
}, /*#__PURE__*/ React__default.createElement(Spin, {
|
|
113777
|
+
size: "large",
|
|
113778
|
+
tip: "Loading session..."
|
|
113779
|
+
})) : !session ? /*#__PURE__*/ React__default.createElement("div", {
|
|
113755
113780
|
style: {
|
|
113756
113781
|
display: "flex",
|
|
113757
113782
|
alignItems: "center",
|
|
@@ -114010,14 +114035,20 @@ function dtoMetricToMetric(def) {
|
|
|
114010
114035
|
};
|
|
114011
114036
|
}
|
|
114012
114037
|
}
|
|
114013
|
-
// Build runs from agents.agent_node_ids
|
|
114014
|
-
const
|
|
114015
|
-
|
|
114016
|
-
|
|
114017
|
-
|
|
114038
|
+
// Build runs from agents.agent_node_ids (support legacy string[] and new { session_id, agent_node_id }[])
|
|
114039
|
+
const agentNodeEntries = evalDto.agents.flatMap((a)=>{
|
|
114040
|
+
const ids = a.agent_node_ids ?? [];
|
|
114041
|
+
return ids.map((entry)=>typeof entry === "string" ? {
|
|
114042
|
+
session_id: "",
|
|
114043
|
+
agent_node_id: entry
|
|
114044
|
+
} : entry);
|
|
114045
|
+
});
|
|
114046
|
+
const runs = agentNodeEntries.map(({ session_id, agent_node_id })=>({
|
|
114047
|
+
session_id,
|
|
114048
|
+
run_id: agent_node_id,
|
|
114018
114049
|
results: {}
|
|
114019
114050
|
}));
|
|
114020
|
-
// Agent name(s) for display - comma-separated when multiple;
|
|
114051
|
+
// Agent name(s) for display - comma-separated when multiple; pass through agent_node_ids for LLM tree (both formats)
|
|
114021
114052
|
const agents = evalDto.agents.map((a)=>({
|
|
114022
114053
|
agent_name: a.agent_name,
|
|
114023
114054
|
agent_node_ids: a.agent_node_ids
|
|
@@ -117075,12 +117106,17 @@ function isLLMNode(n) {
|
|
|
117075
117106
|
function isLLMAggregateNode(n) {
|
|
117076
117107
|
return isLLMInferenceAggregate(n);
|
|
117077
117108
|
}
|
|
117078
|
-
/**
|
|
117109
|
+
/** Normalize agent_node_ids to { nodeId, sessionId? }[] */ function normalizeAgentNodeEntries(agents) {
|
|
117079
117110
|
const map = new Map();
|
|
117080
117111
|
if (!agents) return map;
|
|
117081
117112
|
for (const a of agents){
|
|
117082
|
-
for (const
|
|
117083
|
-
|
|
117113
|
+
for (const entry of a.agent_node_ids ?? []){
|
|
117114
|
+
const nodeId = typeof entry === "string" ? entry : entry.agent_node_id;
|
|
117115
|
+
const sessionId = typeof entry === "string" ? undefined : entry.session_id;
|
|
117116
|
+
map.set(nodeId, {
|
|
117117
|
+
agentName: a.agent_name,
|
|
117118
|
+
sessionId
|
|
117119
|
+
});
|
|
117084
117120
|
}
|
|
117085
117121
|
}
|
|
117086
117122
|
return map;
|
|
@@ -117092,7 +117128,7 @@ function isLLMAggregateNode(n) {
|
|
|
117092
117128
|
*/ function buildLLMInferenceTreeFromAggregateResults(agg) {
|
|
117093
117129
|
const { roots, nodes, agents } = agg;
|
|
117094
117130
|
const nodeMap = nodes;
|
|
117095
|
-
const
|
|
117131
|
+
const agentNodeInfo = normalizeAgentNodeEntries(agents);
|
|
117096
117132
|
// Resolve root IDs to aggregate nodes
|
|
117097
117133
|
const rootAggregates = [];
|
|
117098
117134
|
for (const id of roots){
|
|
@@ -117125,14 +117161,17 @@ function isLLMAggregateNode(n) {
|
|
|
117125
117161
|
}
|
|
117126
117162
|
const sortedCalls = Array.from(llmCallIndices).sort((a, b)=>a - b);
|
|
117127
117163
|
const sortedMetrics = Array.from(metricNames).sort();
|
|
117128
|
-
// Group agent_node_ids by agent
|
|
117129
|
-
const
|
|
117130
|
-
for (const [nodeId,
|
|
117131
|
-
if (!
|
|
117132
|
-
|
|
117164
|
+
// Group agent_node_ids by agent: { nodeId, sessionId? }[]
|
|
117165
|
+
const agentToNodeEntries = new Map();
|
|
117166
|
+
for (const [nodeId, info] of agentNodeInfo){
|
|
117167
|
+
if (!agentToNodeEntries.has(info.agentName)) agentToNodeEntries.set(info.agentName, []);
|
|
117168
|
+
agentToNodeEntries.get(info.agentName).push({
|
|
117169
|
+
nodeId,
|
|
117170
|
+
sessionId: info.sessionId
|
|
117171
|
+
});
|
|
117133
117172
|
}
|
|
117134
117173
|
// If no agents provided, infer from data: collect unique agent_data_ids into "Agent (root)"
|
|
117135
|
-
if (
|
|
117174
|
+
if (agentToNodeEntries.size === 0) {
|
|
117136
117175
|
const seen = new Set();
|
|
117137
117176
|
for (const aggNode of rootAggregates){
|
|
117138
117177
|
for (const childId of aggNode.children ?? []){
|
|
@@ -117144,17 +117183,21 @@ function isLLMAggregateNode(n) {
|
|
|
117144
117183
|
}
|
|
117145
117184
|
}
|
|
117146
117185
|
if (seen.size > 0) {
|
|
117147
|
-
|
|
117186
|
+
agentToNodeEntries.set("Agent (root)", Array.from(seen).map((nodeId)=>({
|
|
117187
|
+
nodeId,
|
|
117188
|
+
sessionId: undefined
|
|
117189
|
+
})));
|
|
117148
117190
|
}
|
|
117149
117191
|
}
|
|
117150
117192
|
const rows = [];
|
|
117151
|
-
for (const [agentName,
|
|
117193
|
+
for (const [agentName, nodeEntries] of agentToNodeEntries){
|
|
117194
|
+
const nodeIds = nodeEntries.map((e)=>e.nodeId);
|
|
117152
117195
|
const agentKey = `agent-${agentName.replace(/\s+/g, "-")}`;
|
|
117153
117196
|
const llmCallChildren = [];
|
|
117154
117197
|
for (const callIdx of sortedCalls){
|
|
117155
117198
|
const callKey = `${agentKey}-call-${callIdx}`;
|
|
117156
117199
|
const nodeChildren = [];
|
|
117157
|
-
|
|
117200
|
+
nodeEntries.forEach(({ nodeId, sessionId })=>{
|
|
117158
117201
|
const metrics = {};
|
|
117159
117202
|
for (const m of sortedMetrics){
|
|
117160
117203
|
const k = `${nodeId}|${callIdx}|${m}`;
|
|
@@ -117170,6 +117213,7 @@ function isLLMAggregateNode(n) {
|
|
|
117170
117213
|
metrics,
|
|
117171
117214
|
value: metrics[primaryMetric],
|
|
117172
117215
|
agentNodeId: nodeId,
|
|
117216
|
+
sessionId,
|
|
117173
117217
|
children: undefined
|
|
117174
117218
|
});
|
|
117175
117219
|
}
|
|
@@ -117248,7 +117292,19 @@ const LATENCY_RESULT_PREFIXES = [
|
|
|
117248
117292
|
"Latency/",
|
|
117249
117293
|
"Runtime/"
|
|
117250
117294
|
];
|
|
117251
|
-
|
|
117295
|
+
/** Build map from agent_node_id -> session_id from agents (supports both formats) */ function buildAgentIdToSessionMap(agents) {
|
|
117296
|
+
const map = new Map();
|
|
117297
|
+
if (!agents) return map;
|
|
117298
|
+
for (const a of agents){
|
|
117299
|
+
for (const entry of a.agent_node_ids ?? []){
|
|
117300
|
+
const nodeId = typeof entry === "string" ? entry : entry.agent_node_id;
|
|
117301
|
+
const sessionId = typeof entry === "string" ? undefined : entry.session_id;
|
|
117302
|
+
if (sessionId) map.set(nodeId, sessionId);
|
|
117303
|
+
}
|
|
117304
|
+
}
|
|
117305
|
+
return map;
|
|
117306
|
+
}
|
|
117307
|
+
function buildTreeFromProcessedData(latencyByTool, failureRateByTool, usageCountByTool, latencyResults, failureResults, agentIdToSessionId) {
|
|
117252
117308
|
const toolNamesFromAgg = Array.from(new Set([
|
|
117253
117309
|
...latencyByTool.keys(),
|
|
117254
117310
|
...failureRateByTool.keys(),
|
|
@@ -117311,6 +117367,7 @@ function buildTreeFromProcessedData(latencyByTool, failureRateByTool, usageCount
|
|
|
117311
117367
|
const cnt = uniqueLatencies.length;
|
|
117312
117368
|
let failed = 0;
|
|
117313
117369
|
const failList = failureByToolAgentIndex.get(`${toolName}|${aid}`) ?? [];
|
|
117370
|
+
const sessionId = agentIdToSessionId.get(aid);
|
|
117314
117371
|
const leaves = uniqueLatencies.map((l, idx)=>{
|
|
117315
117372
|
const failVal = failureByKey.get(l.key);
|
|
117316
117373
|
const failValByIndex = failList[idx];
|
|
@@ -117318,12 +117375,15 @@ function buildTreeFromProcessedData(latencyByTool, failureRateByTool, usageCount
|
|
|
117318
117375
|
const isFailed = resolvedFail !== undefined && resolvedFail >= 0.5;
|
|
117319
117376
|
if (isFailed) failed++;
|
|
117320
117377
|
const leafName = l.toolNodeId ?? `invocation-${idx + 1}`;
|
|
117378
|
+
const nodeId = l.toolNodeId ?? undefined;
|
|
117321
117379
|
return {
|
|
117322
117380
|
key: `${toolName}-${aid}-${idx}`,
|
|
117323
117381
|
name: leafName,
|
|
117324
117382
|
runtimeMs: Math.round(l.value * 1000),
|
|
117325
117383
|
failureRate: isFailed ? "Failed" : "Success",
|
|
117326
117384
|
level: 3,
|
|
117385
|
+
sessionId: sessionId ?? undefined,
|
|
117386
|
+
nodeId,
|
|
117327
117387
|
children: undefined
|
|
117328
117388
|
};
|
|
117329
117389
|
});
|
|
@@ -117335,6 +117395,8 @@ function buildTreeFromProcessedData(latencyByTool, failureRateByTool, usageCount
|
|
|
117335
117395
|
runtimeMs: Math.round(totalRuntimeMs),
|
|
117336
117396
|
failureRate: agentFailurePct,
|
|
117337
117397
|
level: 2,
|
|
117398
|
+
sessionId: sessionId ?? undefined,
|
|
117399
|
+
nodeId: aid,
|
|
117338
117400
|
children: leaves.length > 0 ? leaves : undefined
|
|
117339
117401
|
});
|
|
117340
117402
|
}
|
|
@@ -117379,12 +117441,13 @@ function buildTreeFromProcessedData(latencyByTool, failureRateByTool, usageCount
|
|
|
117379
117441
|
}
|
|
117380
117442
|
const latencyResults = toolResults.filter((r)=>LATENCY_RESULT_PREFIXES.some((prefix)=>r.result_name?.startsWith(prefix)));
|
|
117381
117443
|
const failureResults = toolResults.filter((r)=>r.result_name?.startsWith("FailureRate/"));
|
|
117382
|
-
|
|
117444
|
+
const agentIdToSessionId = buildAgentIdToSessionMap(agg.agents);
|
|
117445
|
+
return buildTreeFromProcessedData(latencyByTool, failureRateByTool, usageCountByTool, latencyResults, failureResults, agentIdToSessionId);
|
|
117383
117446
|
}
|
|
117384
117447
|
/**
|
|
117385
117448
|
* Builds a tree from raw ToolUseEvaluator results (legacy flat format).
|
|
117386
117449
|
* Structure: tool (level 1) → agent (level 2) → invocation (level 3).
|
|
117387
|
-
*/ function buildToolUseTreeFromRawResults(rawResults) {
|
|
117450
|
+
*/ function buildToolUseTreeFromRawResults(rawResults, agents) {
|
|
117388
117451
|
const toolResults = rawResults.filter((r)=>isToolResult(r));
|
|
117389
117452
|
const toolAggregates = rawResults.filter((r)=>isToolAggregate(r));
|
|
117390
117453
|
const latencyByTool = new Map();
|
|
@@ -117406,7 +117469,8 @@ function buildTreeFromProcessedData(latencyByTool, failureRateByTool, usageCount
|
|
|
117406
117469
|
}
|
|
117407
117470
|
const latencyResults = toolResults.filter((r)=>r.result_name.startsWith("Latency/") || r.result_name.startsWith("Runtime/"));
|
|
117408
117471
|
const failureResults = toolResults.filter((r)=>r.result_name.startsWith("FailureRate/"));
|
|
117409
|
-
|
|
117472
|
+
const agentIdToSessionId = buildAgentIdToSessionMap(agents);
|
|
117473
|
+
return buildTreeFromProcessedData(latencyByTool, failureRateByTool, usageCountByTool, latencyResults, failureResults, agentIdToSessionId);
|
|
117410
117474
|
}
|
|
117411
117475
|
|
|
117412
117476
|
function detectAggregateType(nodes) {
|
|
@@ -117473,7 +117537,7 @@ const DEFAULT_TITLES = {
|
|
|
117473
117537
|
LLMInference: "LLM Inference Evaluator - Aggregate View",
|
|
117474
117538
|
Judge: "Judge Evaluator - Aggregate View"
|
|
117475
117539
|
};
|
|
117476
|
-
const AggregateResultsTable = ({ rawResults = [], aggregateResults, agents, title, evaluatorName })=>{
|
|
117540
|
+
const AggregateResultsTable = ({ rawResults = [], aggregateResults, agents, title, evaluatorName, onAgentNodeClick })=>{
|
|
117477
117541
|
const { theme } = useTheme$1();
|
|
117478
117542
|
const [expandedRowKeys, setExpandedRowKeys] = useState([]);
|
|
117479
117543
|
const { aggregateType, dataSource, llmIsTree, llmExpandableKeys } = useMemo(()=>{
|
|
@@ -117482,7 +117546,10 @@ const AggregateResultsTable = ({ rawResults = [], aggregateResults, agents, titl
|
|
|
117482
117546
|
if (type === "ToolUse") {
|
|
117483
117547
|
return {
|
|
117484
117548
|
aggregateType: type,
|
|
117485
|
-
dataSource: buildToolUseTreeFromAggregate(
|
|
117549
|
+
dataSource: buildToolUseTreeFromAggregate({
|
|
117550
|
+
...aggregateResults,
|
|
117551
|
+
agents
|
|
117552
|
+
}),
|
|
117486
117553
|
llmIsTree: false,
|
|
117487
117554
|
llmExpandableKeys: []
|
|
117488
117555
|
};
|
|
@@ -117524,7 +117591,7 @@ const AggregateResultsTable = ({ rawResults = [], aggregateResults, agents, titl
|
|
|
117524
117591
|
if (toolResults.length > 0) {
|
|
117525
117592
|
return {
|
|
117526
117593
|
aggregateType: "ToolUse",
|
|
117527
|
-
dataSource: buildToolUseTreeFromRawResults(toolResults),
|
|
117594
|
+
dataSource: buildToolUseTreeFromRawResults(toolResults, agents),
|
|
117528
117595
|
llmIsTree: false,
|
|
117529
117596
|
llmExpandableKeys: []
|
|
117530
117597
|
};
|
|
@@ -117632,17 +117699,37 @@ const AggregateResultsTable = ({ rawResults = [], aggregateResults, agents, titl
|
|
|
117632
117699
|
}), "Name"),
|
|
117633
117700
|
dataIndex: "name",
|
|
117634
117701
|
key: "name",
|
|
117635
|
-
render: (name, row)
|
|
117702
|
+
render: (name, row)=>{
|
|
117703
|
+
const isClickable = row.level === 3 && row.agentNodeId && onAgentNodeClick;
|
|
117704
|
+
return /*#__PURE__*/ React__default.createElement("span", {
|
|
117636
117705
|
style: {
|
|
117637
117706
|
display: "inline-flex",
|
|
117638
117707
|
alignItems: "center",
|
|
117639
117708
|
gap: 8,
|
|
117640
117709
|
fontFamily: "monospace",
|
|
117641
|
-
fontSize: 13
|
|
117642
|
-
|
|
117710
|
+
fontSize: 13,
|
|
117711
|
+
...isClickable && {
|
|
117712
|
+
cursor: "pointer",
|
|
117713
|
+
color: theme.colors.primary
|
|
117714
|
+
}
|
|
117715
|
+
},
|
|
117716
|
+
onClick: isClickable ? (e)=>{
|
|
117717
|
+
e.stopPropagation();
|
|
117718
|
+
onAgentNodeClick(row.sessionId, row.agentNodeId);
|
|
117719
|
+
} : undefined,
|
|
117720
|
+
onKeyDown: isClickable ? (e)=>{
|
|
117721
|
+
if (e.key === "Enter" || e.key === " ") {
|
|
117722
|
+
e.preventDefault();
|
|
117723
|
+
e.stopPropagation();
|
|
117724
|
+
onAgentNodeClick(row.sessionId, row.agentNodeId);
|
|
117725
|
+
}
|
|
117726
|
+
} : undefined,
|
|
117727
|
+
role: isClickable ? "button" : undefined,
|
|
117728
|
+
tabIndex: isClickable ? 0 : undefined
|
|
117643
117729
|
}, /*#__PURE__*/ React__default.createElement(LLMTagForLevel, {
|
|
117644
117730
|
level: row.level
|
|
117645
|
-
}), name)
|
|
117731
|
+
}), name);
|
|
117732
|
+
}
|
|
117646
117733
|
},
|
|
117647
117734
|
...METRIC_COLUMNS.map(({ key, title, Icon })=>({
|
|
117648
117735
|
title: /*#__PURE__*/ React__default.createElement("span", {
|
|
@@ -117972,17 +118059,37 @@ const AggregateResultsTable = ({ rawResults = [], aggregateResults, agents, titl
|
|
|
117972
118059
|
}), "Name"),
|
|
117973
118060
|
dataIndex: "name",
|
|
117974
118061
|
key: "name",
|
|
117975
|
-
render: (name, row)
|
|
118062
|
+
render: (name, row)=>{
|
|
118063
|
+
const isClickable = (row.level === 2 || row.level === 3) && row.nodeId && onAgentNodeClick;
|
|
118064
|
+
return /*#__PURE__*/ React__default.createElement("span", {
|
|
117976
118065
|
style: {
|
|
117977
118066
|
display: "inline-flex",
|
|
117978
118067
|
alignItems: "center",
|
|
117979
118068
|
gap: 8,
|
|
117980
118069
|
fontFamily: "monospace",
|
|
117981
|
-
fontSize: 13
|
|
117982
|
-
|
|
118070
|
+
fontSize: 13,
|
|
118071
|
+
...isClickable && {
|
|
118072
|
+
cursor: "pointer",
|
|
118073
|
+
color: theme.colors.primary
|
|
118074
|
+
}
|
|
118075
|
+
},
|
|
118076
|
+
onClick: isClickable ? (e)=>{
|
|
118077
|
+
e.stopPropagation();
|
|
118078
|
+
onAgentNodeClick(row.sessionId, row.nodeId);
|
|
118079
|
+
} : undefined,
|
|
118080
|
+
onKeyDown: isClickable ? (e)=>{
|
|
118081
|
+
if (e.key === "Enter" || e.key === " ") {
|
|
118082
|
+
e.preventDefault();
|
|
118083
|
+
e.stopPropagation();
|
|
118084
|
+
onAgentNodeClick(row.sessionId, row.nodeId);
|
|
118085
|
+
}
|
|
118086
|
+
} : undefined,
|
|
118087
|
+
role: isClickable ? "button" : undefined,
|
|
118088
|
+
tabIndex: isClickable ? 0 : undefined
|
|
117983
118089
|
}, /*#__PURE__*/ React__default.createElement(TagForLevel, {
|
|
117984
118090
|
level: row.level
|
|
117985
|
-
}), name)
|
|
118091
|
+
}), name);
|
|
118092
|
+
}
|
|
117986
118093
|
},
|
|
117987
118094
|
{
|
|
117988
118095
|
title: /*#__PURE__*/ React__default.createElement("span", {
|
|
@@ -118292,11 +118399,14 @@ function isCategoricalMetric(def) {
|
|
|
118292
118399
|
/** Derive agent display name from DTO. */ function deriveAgentName(evaluation) {
|
|
118293
118400
|
return evaluation.agents?.map((a)=>a.agent_name).join(", ") ?? "-";
|
|
118294
118401
|
}
|
|
118295
|
-
/** Derive runs count from DTO (agent_node_ids). */ function deriveRunsCount(evaluation) {
|
|
118296
|
-
return evaluation.agents?.
|
|
118402
|
+
/** Derive runs count from DTO (agent_node_ids). Supports both string[] and { session_id, agent_node_id }[]. */ function deriveRunsCount(evaluation) {
|
|
118403
|
+
return evaluation.agents?.reduce((sum, a)=>{
|
|
118404
|
+
const ids = a.agent_node_ids ?? [];
|
|
118405
|
+
return sum + ids.length;
|
|
118406
|
+
}, 0) ?? 0;
|
|
118297
118407
|
}
|
|
118298
118408
|
|
|
118299
|
-
const EvaluatorResult = ({ evaluation, evaluatorId, backHref })=>{
|
|
118409
|
+
const EvaluatorResult = ({ evaluation, evaluatorId, backHref, onAgentNodeClick })=>{
|
|
118300
118410
|
const { theme } = useTheme$1();
|
|
118301
118411
|
const labelColor = theme.colors.mutedForeground;
|
|
118302
118412
|
const derived = evaluation ? deriveEvaluatorResultData(evaluation, evaluatorId) : null;
|
|
@@ -118438,7 +118548,8 @@ const EvaluatorResult = ({ evaluation, evaluatorId, backHref })=>{
|
|
|
118438
118548
|
rawResults: rawResults.filter((r)=>isToolResult(r) || isToolAggregate(r)),
|
|
118439
118549
|
aggregateResults: aggregateResults,
|
|
118440
118550
|
agents: evaluation.agents,
|
|
118441
|
-
evaluatorName: evaluatorName
|
|
118551
|
+
evaluatorName: evaluatorName,
|
|
118552
|
+
onAgentNodeClick: onAgentNodeClick
|
|
118442
118553
|
}))));
|
|
118443
118554
|
};
|
|
118444
118555
|
|
|
@@ -13,11 +13,16 @@ export interface AggregateResultsTableProps {
|
|
|
13
13
|
/** Agents (with agent_node_ids) for LLM inference tree Agent (root) labels. */
|
|
14
14
|
agents?: {
|
|
15
15
|
agent_name: string;
|
|
16
|
-
agent_node_ids?: string[]
|
|
16
|
+
agent_node_ids?: string[] | {
|
|
17
|
+
session_id: string;
|
|
18
|
+
agent_node_id: string;
|
|
19
|
+
}[];
|
|
17
20
|
}[];
|
|
18
21
|
/** Title shown above the table. */
|
|
19
22
|
title?: string;
|
|
20
23
|
/** Optional evaluator name hint (e.g. "ToolUseEvaluator") for default title. */
|
|
21
24
|
evaluatorName?: string;
|
|
25
|
+
/** Called when an Agent/Tool Node row is clicked. sessionId may be undefined when evaluation uses legacy agent_node_ids format. */
|
|
26
|
+
onAgentNodeClick?: (sessionId: string | undefined, nodeId: string) => void;
|
|
22
27
|
}
|
|
23
28
|
export declare const AggregateResultsTable: React.FC<AggregateResultsTableProps>;
|
|
@@ -56,7 +56,10 @@ export interface Evaluation {
|
|
|
56
56
|
/** Agents in the evaluation (name and node IDs for LLM inference tree) */
|
|
57
57
|
agents: {
|
|
58
58
|
agent_name: string;
|
|
59
|
-
agent_node_ids?: string[]
|
|
59
|
+
agent_node_ids?: string[] | {
|
|
60
|
+
session_id: string;
|
|
61
|
+
agent_node_id: string;
|
|
62
|
+
}[];
|
|
60
63
|
}[];
|
|
61
64
|
/** Number of agents in the evaluation */
|
|
62
65
|
agents_count: number;
|
|
@@ -2,10 +2,15 @@ import React from "react";
|
|
|
2
2
|
export interface EvaluatorResultPageProps {
|
|
3
3
|
/** Optional href for back link. Pass from the host app (e.g. "#/evaluations" for HashRouter). */
|
|
4
4
|
backHref?: string;
|
|
5
|
+
/**
|
|
6
|
+
* Optional handler for Agent/Tool node clicks. sessionId may be undefined when evaluation uses
|
|
7
|
+
* legacy agent_node_ids format. Default opens a SessionDetails drawer on this page.
|
|
8
|
+
*/
|
|
9
|
+
onAgentNodeClick?: (sessionId: string | undefined, nodeId: string) => void;
|
|
5
10
|
}
|
|
6
11
|
/**
|
|
7
12
|
* Page that resolves evaluationId and evaluatorId from the route,
|
|
8
13
|
* fetches evaluation data, and renders EvaluatorResult.
|
|
9
|
-
* backHref should be passed from the outer/host app for portability.
|
|
14
|
+
* backHref and onAgentNodeClick should be passed from the outer/host app for portability.
|
|
10
15
|
*/
|
|
11
16
|
export declare const EvaluatorResultPage: React.FC<EvaluatorResultPageProps>;
|
|
@@ -11,5 +11,10 @@ export interface EvaluatorResultProps {
|
|
|
11
11
|
evaluatorId: string;
|
|
12
12
|
/** Optional href for back link. When provided, a "Back to Evaluations" control is shown. */
|
|
13
13
|
backHref?: string;
|
|
14
|
+
/**
|
|
15
|
+
* Optional handler for Agent/Tool node clicks. sessionId may be undefined when evaluation
|
|
16
|
+
* uses legacy agent_node_ids format. When not provided, nodes use default (drawer).
|
|
17
|
+
*/
|
|
18
|
+
onAgentNodeClick?: (sessionId: string | undefined, nodeId: string) => void;
|
|
14
19
|
}
|
|
15
20
|
export declare const EvaluatorResult: React.FC<EvaluatorResultProps>;
|
|
@@ -5,6 +5,12 @@ interface SessionDetailsProps {
|
|
|
5
5
|
session: SessionListItem | null;
|
|
6
6
|
open: boolean;
|
|
7
7
|
onClose: () => void;
|
|
8
|
+
/** Optional initial node to select when opening (e.g. from evaluation link). */
|
|
9
|
+
initialNodeId?: string;
|
|
10
|
+
/** Optional run containing the node. When omitted, the run is found by searching session.runs. */
|
|
11
|
+
initialRunId?: string;
|
|
12
|
+
/** When true, show loading state instead of "No session data available". */
|
|
13
|
+
loading?: boolean;
|
|
8
14
|
}
|
|
9
15
|
export declare const InputsOutputsComponent: React.FC<{
|
|
10
16
|
run: AgentRun;
|
|
@@ -33,5 +33,5 @@ export interface EvaluatorResultDerived {
|
|
|
33
33
|
export declare function deriveEvaluatorResultData(evaluation: Evaluation, evaluatorId: string): EvaluatorResultDerived | null;
|
|
34
34
|
/** Derive agent display name from DTO. */
|
|
35
35
|
export declare function deriveAgentName(evaluation: Evaluation): string;
|
|
36
|
-
/** Derive runs count from DTO (agent_node_ids). */
|
|
36
|
+
/** Derive runs count from DTO (agent_node_ids). Supports both string[] and { session_id, agent_node_id }[]. */
|
|
37
37
|
export declare function deriveRunsCount(evaluation: Evaluation): number;
|
|
@@ -2,13 +2,17 @@
|
|
|
2
2
|
* Transforms LLMInferenceEvaluator aggregate_results (roots + nodes) into a tree
|
|
3
3
|
* structure for table rendering. Builds hierarchy: Agent (root) -> llm call -> agent node id.
|
|
4
4
|
*/
|
|
5
|
+
type AgentNodeIdEntry = string | {
|
|
6
|
+
session_id: string;
|
|
7
|
+
agent_node_id: string;
|
|
8
|
+
};
|
|
5
9
|
export type LLMAggregateResultsInput = {
|
|
6
10
|
roots: string[];
|
|
7
11
|
nodes: Record<string, unknown>;
|
|
8
|
-
/** Optional agents for Agent (root) labels; agent_node_ids
|
|
12
|
+
/** Optional agents for Agent (root) labels; agent_node_ids can be string[] or { session_id, agent_node_id }[] */
|
|
9
13
|
agents?: {
|
|
10
14
|
agent_name: string;
|
|
11
|
-
agent_node_ids?:
|
|
15
|
+
agent_node_ids?: AgentNodeIdEntry[];
|
|
12
16
|
}[];
|
|
13
17
|
};
|
|
14
18
|
export interface LLMInferenceTreeRow {
|
|
@@ -26,6 +30,8 @@ export interface LLMInferenceTreeRow {
|
|
|
26
30
|
value?: number;
|
|
27
31
|
/** Leaf agent node id */
|
|
28
32
|
agentNodeId?: string;
|
|
33
|
+
/** Session id for linking to session-details (when agent_node_ids use new format) */
|
|
34
|
+
sessionId?: string;
|
|
29
35
|
children?: LLMInferenceTreeRow[];
|
|
30
36
|
}
|
|
31
37
|
/**
|
|
@@ -34,3 +40,4 @@ export interface LLMInferenceTreeRow {
|
|
|
34
40
|
* Each row has metric values (InputTokens, OutputTokens, TotalCost, Latency) as columns.
|
|
35
41
|
*/
|
|
36
42
|
export declare function buildLLMInferenceTreeFromAggregateResults(agg: LLMAggregateResultsInput): LLMInferenceTreeRow[];
|
|
43
|
+
export {};
|
|
@@ -3,9 +3,18 @@
|
|
|
3
3
|
* into a tree structure for table rendering. Builds hierarchy: tool → agent → invocation.
|
|
4
4
|
*/
|
|
5
5
|
import type { EvaluationResultItem } from "../../dto/Evaluation";
|
|
6
|
+
type AgentNodeIdEntry = string | {
|
|
7
|
+
session_id: string;
|
|
8
|
+
agent_node_id: string;
|
|
9
|
+
};
|
|
6
10
|
export type ToolUseAggregateResultsInput = {
|
|
7
11
|
roots: string[];
|
|
8
12
|
nodes: Record<string, unknown>;
|
|
13
|
+
/** Optional agents for session deep-linking; agent_node_ids can be string[] or { session_id, agent_node_id }[] */
|
|
14
|
+
agents?: {
|
|
15
|
+
agent_name: string;
|
|
16
|
+
agent_node_ids?: AgentNodeIdEntry[];
|
|
17
|
+
}[];
|
|
9
18
|
};
|
|
10
19
|
export interface ToolUseTreeRow {
|
|
11
20
|
key: string;
|
|
@@ -14,6 +23,10 @@ export interface ToolUseTreeRow {
|
|
|
14
23
|
runtimeMs?: number;
|
|
15
24
|
failureRate?: string | "Success" | "Failed";
|
|
16
25
|
level: 1 | 2 | 3;
|
|
26
|
+
/** Session id for deep-linking to session drawer (level 2–3) */
|
|
27
|
+
sessionId?: string;
|
|
28
|
+
/** Node id to select in SessionTree (agent_node_id for level 2, tool_node_id for level 3) */
|
|
29
|
+
nodeId?: string;
|
|
17
30
|
children?: ToolUseTreeRow[];
|
|
18
31
|
}
|
|
19
32
|
/**
|
|
@@ -25,4 +38,8 @@ export declare function buildToolUseTreeFromAggregate(agg: ToolUseAggregateResul
|
|
|
25
38
|
* Builds a tree from raw ToolUseEvaluator results (legacy flat format).
|
|
26
39
|
* Structure: tool (level 1) → agent (level 2) → invocation (level 3).
|
|
27
40
|
*/
|
|
28
|
-
export declare function buildToolUseTreeFromRawResults(rawResults: EvaluationResultItem[]
|
|
41
|
+
export declare function buildToolUseTreeFromRawResults(rawResults: EvaluationResultItem[], agents?: {
|
|
42
|
+
agent_name: string;
|
|
43
|
+
agent_node_ids?: AgentNodeIdEntry[];
|
|
44
|
+
}[]): ToolUseTreeRow[];
|
|
45
|
+
export {};
|
|
@@ -141,10 +141,16 @@ export type EvaluationEvaluatorResult = {
|
|
|
141
141
|
/** Aggregate tree (roots + nodes with ToolAggregate, LLMInferenceAggregate, CategoricalAggregate) */
|
|
142
142
|
aggregate_results?: EvaluationAggregateResults;
|
|
143
143
|
};
|
|
144
|
+
/** New format: agent node with session context */
|
|
145
|
+
export type EvaluationAgentNodeEntry = {
|
|
146
|
+
session_id: string;
|
|
147
|
+
agent_node_id: string;
|
|
148
|
+
};
|
|
144
149
|
/** Agent entry in the evaluation (each has name and associated node IDs) */
|
|
145
150
|
export type EvaluationAgent = {
|
|
146
151
|
agent_name: string;
|
|
147
|
-
|
|
152
|
+
/** Legacy: flat list of node IDs. New: list of { session_id, agent_node_id } */
|
|
153
|
+
agent_node_ids: string[] | EvaluationAgentNodeEntry[];
|
|
148
154
|
};
|
|
149
155
|
/** Root evaluation document */
|
|
150
156
|
export type Evaluation = {
|