agenteval-sdk 0.3.2__tar.gz → 0.3.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {agenteval_sdk-0.3.2 → agenteval_sdk-0.3.4}/PKG-INFO +1 -1
- {agenteval_sdk-0.3.2 → agenteval_sdk-0.3.4}/agenteval_sdk/__init__.py +1 -1
- {agenteval_sdk-0.3.2 → agenteval_sdk-0.3.4}/agenteval_sdk/dashboard.py +10 -0
- {agenteval_sdk-0.3.2 → agenteval_sdk-0.3.4}/agenteval_sdk/local_store.py +102 -3
- {agenteval_sdk-0.3.2 → agenteval_sdk-0.3.4}/agenteval_sdk/templates/dashboard.html +7 -2
- agenteval_sdk-0.3.4/agenteval_sdk/templates/trace_tree.html +508 -0
- {agenteval_sdk-0.3.2 → agenteval_sdk-0.3.4}/agenteval_sdk.egg-info/PKG-INFO +1 -1
- {agenteval_sdk-0.3.2 → agenteval_sdk-0.3.4}/agenteval_sdk.egg-info/SOURCES.txt +2 -1
- {agenteval_sdk-0.3.2 → agenteval_sdk-0.3.4}/pyproject.toml +1 -1
- {agenteval_sdk-0.3.2 → agenteval_sdk-0.3.4}/README.md +0 -0
- {agenteval_sdk-0.3.2 → agenteval_sdk-0.3.4}/agenteval_sdk/cli.py +0 -0
- {agenteval_sdk-0.3.2 → agenteval_sdk-0.3.4}/agenteval_sdk/sdk.py +0 -0
- {agenteval_sdk-0.3.2 → agenteval_sdk-0.3.4}/agenteval_sdk.egg-info/dependency_links.txt +0 -0
- {agenteval_sdk-0.3.2 → agenteval_sdk-0.3.4}/agenteval_sdk.egg-info/entry_points.txt +0 -0
- {agenteval_sdk-0.3.2 → agenteval_sdk-0.3.4}/agenteval_sdk.egg-info/requires.txt +0 -0
- {agenteval_sdk-0.3.2 → agenteval_sdk-0.3.4}/agenteval_sdk.egg-info/top_level.txt +0 -0
- {agenteval_sdk-0.3.2 → agenteval_sdk-0.3.4}/setup.cfg +0 -0
|
@@ -26,6 +26,16 @@ def create_app(db_path: Optional[str] = None) -> Flask:
|
|
|
26
26
|
def index():
|
|
27
27
|
return render_template("dashboard.html")
|
|
28
28
|
|
|
29
|
+
@app.route("/trace-tree")
|
|
30
|
+
def trace_tree():
|
|
31
|
+
case_id = request.args.get("case_id", "")
|
|
32
|
+
return render_template("trace_tree.html", case_id=case_id)
|
|
33
|
+
|
|
34
|
+
@app.route("/api/trace-tree/<trace_id>")
|
|
35
|
+
def api_trace_tree(trace_id):
|
|
36
|
+
spans = store.get_case_traces(trace_id)
|
|
37
|
+
return jsonify(spans)
|
|
38
|
+
|
|
29
39
|
@app.route("/api/traces")
|
|
30
40
|
def api_traces():
|
|
31
41
|
limit = request.args.get("limit", 100, type=int)
|
|
@@ -39,12 +39,28 @@ class LocalStore:
|
|
|
39
39
|
error_message TEXT DEFAULT '',
|
|
40
40
|
project_id TEXT DEFAULT 'default',
|
|
41
41
|
synced INTEGER DEFAULT 0,
|
|
42
|
-
created_at TEXT DEFAULT (datetime('now'))
|
|
42
|
+
created_at TEXT DEFAULT (datetime('now')),
|
|
43
|
+
span_id TEXT DEFAULT '',
|
|
44
|
+
parent_span_id TEXT DEFAULT '',
|
|
45
|
+
latency_threshold INTEGER,
|
|
46
|
+
session_id TEXT DEFAULT '',
|
|
47
|
+
has_pii INTEGER DEFAULT 0,
|
|
48
|
+
pii_types TEXT DEFAULT ''
|
|
43
49
|
);
|
|
44
50
|
CREATE INDEX IF NOT EXISTS idx_traces_synced ON traces(synced);
|
|
45
51
|
CREATE INDEX IF NOT EXISTS idx_traces_trace_id ON traces(trace_id);
|
|
52
|
+
CREATE INDEX IF NOT EXISTS idx_traces_session_id ON traces(session_id);
|
|
46
53
|
"""
|
|
47
54
|
|
|
55
|
+
_MIGRATIONS = [
|
|
56
|
+
"ALTER TABLE traces ADD COLUMN span_id TEXT DEFAULT ''",
|
|
57
|
+
"ALTER TABLE traces ADD COLUMN parent_span_id TEXT DEFAULT ''",
|
|
58
|
+
"ALTER TABLE traces ADD COLUMN latency_threshold INTEGER",
|
|
59
|
+
"ALTER TABLE traces ADD COLUMN session_id TEXT DEFAULT ''",
|
|
60
|
+
"ALTER TABLE traces ADD COLUMN has_pii INTEGER DEFAULT 0",
|
|
61
|
+
"ALTER TABLE traces ADD COLUMN pii_types TEXT DEFAULT ''",
|
|
62
|
+
]
|
|
63
|
+
|
|
48
64
|
def __init__(self, db_path: Optional[str] = None) -> None:
|
|
49
65
|
self._db_path = db_path or _default_db_path()
|
|
50
66
|
self._lock = threading.Lock()
|
|
@@ -61,6 +77,12 @@ class LocalStore:
|
|
|
61
77
|
conn = self._connect()
|
|
62
78
|
try:
|
|
63
79
|
conn.executescript(self._DDL)
|
|
80
|
+
for stmt in self._MIGRATIONS:
|
|
81
|
+
try:
|
|
82
|
+
conn.execute(stmt)
|
|
83
|
+
conn.commit()
|
|
84
|
+
except sqlite3.OperationalError:
|
|
85
|
+
pass # column already exists
|
|
64
86
|
finally:
|
|
65
87
|
conn.close()
|
|
66
88
|
|
|
@@ -69,12 +91,18 @@ class LocalStore:
|
|
|
69
91
|
with self._lock:
|
|
70
92
|
conn = self._connect()
|
|
71
93
|
try:
|
|
94
|
+
# Normalize PII types to a comma-separated string
|
|
95
|
+
pii_raw = trace.get("client_pii_types") or trace.get("pii_types") or []
|
|
96
|
+
pii_str = ",".join(pii_raw) if isinstance(pii_raw, list) else str(pii_raw)
|
|
97
|
+
has_pii = 1 if (trace.get("client_has_pii") or trace.get("has_pii") or pii_str) else 0
|
|
98
|
+
|
|
72
99
|
cur = conn.execute(
|
|
73
100
|
"""INSERT INTO traces
|
|
74
101
|
(trace_id, agent_name, agent_input, agent_output,
|
|
75
102
|
latency_ms, tokens_used, test_status, error_message,
|
|
76
|
-
project_id, synced
|
|
77
|
-
|
|
103
|
+
project_id, synced, span_id, parent_span_id,
|
|
104
|
+
latency_threshold, session_id, has_pii, pii_types)
|
|
105
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, 0, ?, ?, ?, ?, ?, ?)""",
|
|
78
106
|
(
|
|
79
107
|
trace.get("trace_id", ""),
|
|
80
108
|
trace.get("agent_name", ""),
|
|
@@ -85,6 +113,12 @@ class LocalStore:
|
|
|
85
113
|
trace.get("test_status", "NEW"),
|
|
86
114
|
trace.get("error_message", ""),
|
|
87
115
|
trace.get("project_id", "default"),
|
|
116
|
+
trace.get("span_id", ""),
|
|
117
|
+
trace.get("parent_span_id", ""),
|
|
118
|
+
trace.get("latency_threshold"),
|
|
119
|
+
trace.get("session_id", ""),
|
|
120
|
+
has_pii,
|
|
121
|
+
pii_str,
|
|
88
122
|
),
|
|
89
123
|
)
|
|
90
124
|
conn.commit()
|
|
@@ -150,6 +184,71 @@ class LocalStore:
|
|
|
150
184
|
finally:
|
|
151
185
|
conn.close()
|
|
152
186
|
|
|
187
|
+
def get_by_trace_id(self, trace_id: str) -> List[Dict[str, Any]]:
|
|
188
|
+
"""Return all spans for a given trace_id, ordered by id (creation order)."""
|
|
189
|
+
with self._lock:
|
|
190
|
+
conn = self._connect()
|
|
191
|
+
try:
|
|
192
|
+
rows = conn.execute(
|
|
193
|
+
"SELECT * FROM traces WHERE trace_id = ? ORDER BY id ASC",
|
|
194
|
+
(trace_id,),
|
|
195
|
+
).fetchall()
|
|
196
|
+
return [dict(r) for r in rows]
|
|
197
|
+
finally:
|
|
198
|
+
conn.close()
|
|
199
|
+
|
|
200
|
+
def get_case_traces(self, case_id: str) -> List[Dict[str, Any]]:
|
|
201
|
+
"""Return all traces belonging to a case/investigation.
|
|
202
|
+
|
|
203
|
+
Looks up by trace_id first, then finds related traces that were
|
|
204
|
+
created within a short time window with the same project_id.
|
|
205
|
+
This handles multi-agent systems where different agents may
|
|
206
|
+
produce different trace_ids for the same logical case.
|
|
207
|
+
"""
|
|
208
|
+
with self._lock:
|
|
209
|
+
conn = self._connect()
|
|
210
|
+
try:
|
|
211
|
+
# 1. Direct match on trace_id
|
|
212
|
+
direct = conn.execute(
|
|
213
|
+
"SELECT * FROM traces WHERE trace_id = ? ORDER BY id ASC",
|
|
214
|
+
(case_id,),
|
|
215
|
+
).fetchall()
|
|
216
|
+
|
|
217
|
+
if not direct:
|
|
218
|
+
return []
|
|
219
|
+
|
|
220
|
+
# 2. Find time window and project from the direct matches
|
|
221
|
+
first = dict(direct[0])
|
|
222
|
+
project_id = first.get("project_id", "default")
|
|
223
|
+
created_at = first.get("created_at", "")
|
|
224
|
+
first_id = first["id"]
|
|
225
|
+
|
|
226
|
+
# 3. Find related traces: same project, created within
|
|
227
|
+
# 10 seconds of the first span, different trace_id
|
|
228
|
+
related = conn.execute(
|
|
229
|
+
"""SELECT * FROM traces
|
|
230
|
+
WHERE project_id = ?
|
|
231
|
+
AND trace_id != ?
|
|
232
|
+
AND id BETWEEN ? AND ? + 50
|
|
233
|
+
AND abs(julianday(created_at) - julianday(?)) < ?
|
|
234
|
+
ORDER BY id ASC""",
|
|
235
|
+
(project_id, case_id, max(1, first_id - 5), first_id + 50,
|
|
236
|
+
created_at, 10.0 / 86400),
|
|
237
|
+
).fetchall()
|
|
238
|
+
|
|
239
|
+
# Combine and deduplicate by id
|
|
240
|
+
seen_ids = set()
|
|
241
|
+
result = []
|
|
242
|
+
for row in list(direct) + list(related):
|
|
243
|
+
d = dict(row)
|
|
244
|
+
if d["id"] not in seen_ids:
|
|
245
|
+
seen_ids.add(d["id"])
|
|
246
|
+
result.append(d)
|
|
247
|
+
result.sort(key=lambda r: r["id"])
|
|
248
|
+
return result
|
|
249
|
+
finally:
|
|
250
|
+
conn.close()
|
|
251
|
+
|
|
153
252
|
@property
|
|
154
253
|
def path(self) -> str:
|
|
155
254
|
return self._db_path
|
|
@@ -245,10 +245,11 @@
|
|
|
245
245
|
<th>Status</th>
|
|
246
246
|
<th>Synced</th>
|
|
247
247
|
<th>Time</th>
|
|
248
|
+
<th>MRI</th>
|
|
248
249
|
</tr>
|
|
249
250
|
</thead>
|
|
250
251
|
<tbody id="traces-body">
|
|
251
|
-
<tr><td colspan="
|
|
252
|
+
<tr><td colspan="10" class="empty-state"><p>Loading…</p></td></tr>
|
|
252
253
|
</tbody>
|
|
253
254
|
</table>
|
|
254
255
|
</div>
|
|
@@ -286,7 +287,7 @@
|
|
|
286
287
|
function renderTable(traces) {
|
|
287
288
|
const tbody = document.getElementById('traces-body');
|
|
288
289
|
if (!traces.length) {
|
|
289
|
-
tbody.innerHTML = '<tr><td colspan="
|
|
290
|
+
tbody.innerHTML = '<tr><td colspan="10" class="empty-state"><p>No traces found. Use <code>AgentEvalClient.log_trace()</code> to get started.</p></td></tr>';
|
|
290
291
|
return;
|
|
291
292
|
}
|
|
292
293
|
tbody.innerHTML = traces.map((t, i) => `
|
|
@@ -300,6 +301,7 @@
|
|
|
300
301
|
<td>${statusBadge(t.test_status)}</td>
|
|
301
302
|
<td>${t.synced ? '<span class="badge badge-synced">synced</span>' : '<span class="badge badge-pending">pending</span>'}</td>
|
|
302
303
|
<td>${esc(formatTime(t.created_at))}</td>
|
|
304
|
+
<td><a href="/trace-tree?case_id=${encodeURIComponent(t.trace_id)}" onclick="event.stopPropagation()" style="color:var(--accent);text-decoration:none;font-size:0.8rem;" title="Open Trace Tree MRI">🔬 Tree</a></td>
|
|
303
305
|
</tr>
|
|
304
306
|
`).join('');
|
|
305
307
|
}
|
|
@@ -324,6 +326,9 @@
|
|
|
324
326
|
document.getElementById('modal-content').innerHTML = `
|
|
325
327
|
<button class="close-btn" onclick="closeModal()">×</button>
|
|
326
328
|
<h2>Trace Detail</h2>
|
|
329
|
+
<div style="margin-bottom:16px;">
|
|
330
|
+
<a href="/trace-tree?case_id=${encodeURIComponent(t.trace_id)}" class="btn" style="display:inline-block;text-decoration:none;font-size:0.85rem;">🔬 View Trace Tree MRI</a>
|
|
331
|
+
</div>
|
|
327
332
|
<div class="field"><div class="field-label">Trace ID</div><div>${esc(t.trace_id)}</div></div>
|
|
328
333
|
<div class="field"><div class="field-label">Agent Name</div><div>${esc(t.agent_name || '—')}</div></div>
|
|
329
334
|
<div class="field"><div class="field-label">Project ID</div><div>${esc(t.project_id || 'default')}</div></div>
|
|
@@ -0,0 +1,508 @@
|
|
|
1
|
+
<!DOCTYPE html>
|
|
2
|
+
<html lang="en">
|
|
3
|
+
<head>
|
|
4
|
+
<meta charset="UTF-8">
|
|
5
|
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
6
|
+
<title>Trace Tree MRI — AgentEval</title>
|
|
7
|
+
<script src="https://d3js.org/d3.v7.min.js"></script>
|
|
8
|
+
<style>
|
|
9
|
+
:root {
|
|
10
|
+
--bg: #0f1117;
|
|
11
|
+
--surface: #1a1d27;
|
|
12
|
+
--surface2: #22252f;
|
|
13
|
+
--border: #2a2d3a;
|
|
14
|
+
--text: #e4e4e7;
|
|
15
|
+
--muted: #71717a;
|
|
16
|
+
--accent: #6366f1;
|
|
17
|
+
--accent-hover: #818cf8;
|
|
18
|
+
--green: #22c55e;
|
|
19
|
+
--red: #ef4444;
|
|
20
|
+
--amber: #f59e0b;
|
|
21
|
+
--blue: #3b82f6;
|
|
22
|
+
--purple: #a855f7;
|
|
23
|
+
--cyan: #06b6d4;
|
|
24
|
+
}
|
|
25
|
+
* { box-sizing: border-box; margin: 0; padding: 0; }
|
|
26
|
+
body {
|
|
27
|
+
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, monospace;
|
|
28
|
+
background: var(--bg);
|
|
29
|
+
color: var(--text);
|
|
30
|
+
line-height: 1.5;
|
|
31
|
+
}
|
|
32
|
+
.container { max-width: 1600px; margin: 0 auto; padding: 24px; }
|
|
33
|
+
|
|
34
|
+
header {
|
|
35
|
+
display: flex; justify-content: space-between; align-items: center;
|
|
36
|
+
padding-bottom: 20px; border-bottom: 1px solid var(--border); margin-bottom: 24px;
|
|
37
|
+
}
|
|
38
|
+
header h1 { font-size: 1.4rem; font-weight: 600; }
|
|
39
|
+
header h1 span { color: var(--accent); }
|
|
40
|
+
.header-actions { display: flex; gap: 10px; align-items: center; }
|
|
41
|
+
.btn {
|
|
42
|
+
background: var(--accent); color: #fff; border: none; border-radius: 6px;
|
|
43
|
+
padding: 8px 16px; font-size: 0.85rem; cursor: pointer;
|
|
44
|
+
text-decoration: none; transition: background 0.15s;
|
|
45
|
+
}
|
|
46
|
+
.btn:hover { background: var(--accent-hover); }
|
|
47
|
+
.btn-secondary { background: var(--surface); border: 1px solid var(--border); color: var(--text); }
|
|
48
|
+
.btn-secondary:hover { background: var(--border); }
|
|
49
|
+
|
|
50
|
+
.stats {
|
|
51
|
+
display: grid; grid-template-columns: repeat(auto-fit, minmax(160px, 1fr));
|
|
52
|
+
gap: 14px; margin-bottom: 24px;
|
|
53
|
+
}
|
|
54
|
+
.stat-card {
|
|
55
|
+
background: var(--surface); border: 1px solid var(--border);
|
|
56
|
+
border-radius: 10px; padding: 16px;
|
|
57
|
+
}
|
|
58
|
+
.stat-card .label {
|
|
59
|
+
font-size: 0.7rem; color: var(--muted); text-transform: uppercase;
|
|
60
|
+
letter-spacing: 0.05em;
|
|
61
|
+
}
|
|
62
|
+
.stat-card .value { font-size: 1.5rem; font-weight: 700; margin-top: 2px; }
|
|
63
|
+
.stat-card .value.green { color: var(--green); }
|
|
64
|
+
.stat-card .value.red { color: var(--red); }
|
|
65
|
+
.stat-card .value.amber { color: var(--amber); }
|
|
66
|
+
.stat-card .value.blue { color: var(--blue); }
|
|
67
|
+
|
|
68
|
+
.legend {
|
|
69
|
+
display: flex; gap: 24px; margin-bottom: 20px;
|
|
70
|
+
font-size: 0.8rem; color: var(--muted); flex-wrap: wrap;
|
|
71
|
+
}
|
|
72
|
+
.legend-item { display: flex; align-items: center; gap: 6px; }
|
|
73
|
+
.legend-dot { width: 12px; height: 12px; border-radius: 3px; display: inline-block; }
|
|
74
|
+
|
|
75
|
+
#tree-container {
|
|
76
|
+
background: var(--surface); border: 1px solid var(--border);
|
|
77
|
+
border-radius: 10px; min-height: 500px; overflow: auto; position: relative;
|
|
78
|
+
padding: 20px;
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
.tree-link {
|
|
82
|
+
fill: none; stroke: #3a3d4a; stroke-width: 2.5;
|
|
83
|
+
}
|
|
84
|
+
.tree-link.fanout { stroke: var(--cyan); stroke-dasharray: 6,3; }
|
|
85
|
+
|
|
86
|
+
.detail-panel {
|
|
87
|
+
display: none; position: fixed; top: 0; right: 0; width: 420px; height: 100vh;
|
|
88
|
+
background: var(--surface); border-left: 1px solid var(--border);
|
|
89
|
+
z-index: 200; overflow-y: auto; padding: 24px;
|
|
90
|
+
box-shadow: -4px 0 20px rgba(0,0,0,0.4);
|
|
91
|
+
}
|
|
92
|
+
.detail-panel.active { display: block; }
|
|
93
|
+
.detail-panel .close-btn {
|
|
94
|
+
float: right; background: none; border: none; color: var(--muted);
|
|
95
|
+
font-size: 1.4rem; cursor: pointer;
|
|
96
|
+
}
|
|
97
|
+
.detail-panel h2 { font-size: 1.1rem; margin-bottom: 16px; }
|
|
98
|
+
.detail-panel .field { margin-bottom: 14px; }
|
|
99
|
+
.detail-panel .field-label {
|
|
100
|
+
font-size: 0.7rem; color: var(--muted); text-transform: uppercase; margin-bottom: 4px;
|
|
101
|
+
}
|
|
102
|
+
.detail-panel pre {
|
|
103
|
+
background: var(--bg); border: 1px solid var(--border); border-radius: 6px;
|
|
104
|
+
padding: 10px; font-size: 0.78rem; white-space: pre-wrap;
|
|
105
|
+
word-break: break-all; max-height: 180px; overflow-y: auto; margin-top: 2px;
|
|
106
|
+
}
|
|
107
|
+
.detail-panel .badge {
|
|
108
|
+
display: inline-block; padding: 2px 8px; border-radius: 4px;
|
|
109
|
+
font-size: 0.7rem; font-weight: 600; text-transform: uppercase;
|
|
110
|
+
}
|
|
111
|
+
.badge-pass { background: rgba(34,197,94,0.15); color: var(--green); }
|
|
112
|
+
.badge-fail, .badge-block { background: rgba(239,68,68,0.15); color: var(--red); }
|
|
113
|
+
.badge-flag { background: rgba(245,158,11,0.15); color: var(--amber); }
|
|
114
|
+
.badge-new { background: rgba(99,102,241,0.15); color: var(--accent); }
|
|
115
|
+
.badge-pii { background: rgba(239,68,68,0.15); color: var(--red); }
|
|
116
|
+
.badge-clean { background: rgba(34,197,94,0.15); color: var(--green); }
|
|
117
|
+
|
|
118
|
+
.empty-state {
|
|
119
|
+
text-align: center; padding: 80px 20px; color: var(--muted);
|
|
120
|
+
}
|
|
121
|
+
.empty-state h2 { font-size: 1.2rem; margin-bottom: 12px; color: var(--text); }
|
|
122
|
+
</style>
|
|
123
|
+
</head>
|
|
124
|
+
<body>
|
|
125
|
+
<div class="container">
|
|
126
|
+
<header>
|
|
127
|
+
<h1>🔬 <span>Trace Tree MRI</span></h1>
|
|
128
|
+
<div class="header-actions">
|
|
129
|
+
<span id="trace-id-display" style="font-size:0.8rem; color:var(--muted);"></span>
|
|
130
|
+
<a href="/" class="btn btn-secondary">← Dashboard</a>
|
|
131
|
+
</div>
|
|
132
|
+
</header>
|
|
133
|
+
|
|
134
|
+
<div class="stats" id="stats-row" style="display:none;">
|
|
135
|
+
<div class="stat-card">
|
|
136
|
+
<div class="label">Case / Trace ID</div>
|
|
137
|
+
<div class="value" id="stat-trace" style="font-size:0.75rem; word-break:break-all;">—</div>
|
|
138
|
+
</div>
|
|
139
|
+
<div class="stat-card">
|
|
140
|
+
<div class="label">Total Agents</div>
|
|
141
|
+
<div class="value blue" id="stat-agents">—</div>
|
|
142
|
+
</div>
|
|
143
|
+
<div class="stat-card">
|
|
144
|
+
<div class="label">Total Latency</div>
|
|
145
|
+
<div class="value amber" id="stat-latency">—</div>
|
|
146
|
+
</div>
|
|
147
|
+
<div class="stat-card">
|
|
148
|
+
<div class="label">Total Tokens</div>
|
|
149
|
+
<div class="value" id="stat-tokens">—</div>
|
|
150
|
+
</div>
|
|
151
|
+
<div class="stat-card">
|
|
152
|
+
<div class="label">Threshold Breaches</div>
|
|
153
|
+
<div class="value red" id="stat-breaches">—</div>
|
|
154
|
+
</div>
|
|
155
|
+
<div class="stat-card">
|
|
156
|
+
<div class="label">PII Detected</div>
|
|
157
|
+
<div class="value" id="stat-pii">—</div>
|
|
158
|
+
</div>
|
|
159
|
+
</div>
|
|
160
|
+
|
|
161
|
+
<div class="legend" id="legend" style="display:none;">
|
|
162
|
+
<div class="legend-item"><span class="legend-dot" style="background:var(--green);"></span> Pass / Within threshold</div>
|
|
163
|
+
<div class="legend-item"><span class="legend-dot" style="background:var(--red);"></span> Block / Exceeded threshold</div>
|
|
164
|
+
<div class="legend-item"><span class="legend-dot" style="background:var(--amber);"></span> Flag / Monitor</div>
|
|
165
|
+
<div class="legend-item"><span class="legend-dot" style="background:var(--accent);"></span> No threshold set</div>
|
|
166
|
+
<div class="legend-item"><span class="legend-dot" style="background:var(--cyan);"></span> Fan-out (parallel)</div>
|
|
167
|
+
</div>
|
|
168
|
+
|
|
169
|
+
<div id="tree-container"></div>
|
|
170
|
+
</div>
|
|
171
|
+
|
|
172
|
+
<div class="detail-panel" id="detail-panel">
|
|
173
|
+
<button class="close-btn" onclick="closePanel()">×</button>
|
|
174
|
+
<div id="detail-content"></div>
|
|
175
|
+
</div>
|
|
176
|
+
|
|
177
|
+
<script>
|
|
178
|
+
const CASE_ID = "{{ case_id }}";
|
|
179
|
+
|
|
180
|
+
/* Known AML pipeline roles — used to infer hierarchy when
|
|
181
|
+
span_id / parent_span_id are missing. */
|
|
182
|
+
const AGENT_ROLES = {
|
|
183
|
+
'Intake_Triage_Agent': { order: 0, tier: 'intake' },
|
|
184
|
+
'Parallel_Analysis': { order: 1, tier: 'parallel' },
|
|
185
|
+
'KYC_Profile_Agent': { order: 2, tier: 'analysis' },
|
|
186
|
+
'Sanctions_Screening_Agent': { order: 3, tier: 'analysis' },
|
|
187
|
+
'Transaction_Analysis_Agent':{ order: 4, tier: 'analysis' },
|
|
188
|
+
'Lead_Investigator_Agent': { order: 5, tier: 'investigator' },
|
|
189
|
+
'Verdict_AutoClear': { order: 6, tier: 'verdict' },
|
|
190
|
+
'Verdict_AutoMonitor': { order: 6, tier: 'verdict' },
|
|
191
|
+
'Verdict_AutoBlock_OFAC': { order: 6, tier: 'verdict' },
|
|
192
|
+
'HITL_Decision': { order: 6, tier: 'verdict' },
|
|
193
|
+
};
|
|
194
|
+
|
|
195
|
+
const TIER_COLORS = {
|
|
196
|
+
intake: '#6366f1',
|
|
197
|
+
analysis: '#06b6d4',
|
|
198
|
+
parallel: '#06b6d4',
|
|
199
|
+
investigator: '#a855f7',
|
|
200
|
+
verdict: '#f59e0b',
|
|
201
|
+
unknown: '#71717a',
|
|
202
|
+
};
|
|
203
|
+
|
|
204
|
+
function esc(s) {
|
|
205
|
+
const d = document.createElement('div');
|
|
206
|
+
d.textContent = s || '';
|
|
207
|
+
return d.innerHTML;
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
async function loadTree() {
|
|
211
|
+
const container = document.getElementById('tree-container');
|
|
212
|
+
if (!CASE_ID) {
|
|
213
|
+
container.innerHTML = '<div class="empty-state"><h2>No Case ID provided</h2>' +
|
|
214
|
+
'<p>Add <code>?case_id=<trace_id></code> to the URL.</p></div>';
|
|
215
|
+
return;
|
|
216
|
+
}
|
|
217
|
+
document.getElementById('stat-trace').textContent = CASE_ID;
|
|
218
|
+
|
|
219
|
+
let spans;
|
|
220
|
+
try {
|
|
221
|
+
const res = await fetch('/api/trace-tree/' + encodeURIComponent(CASE_ID));
|
|
222
|
+
spans = await res.json();
|
|
223
|
+
} catch (e) {
|
|
224
|
+
container.innerHTML = '<div class="empty-state"><h2>Load failed</h2><p>' + esc(e.message) + '</p></div>';
|
|
225
|
+
return;
|
|
226
|
+
}
|
|
227
|
+
if (!spans.length) {
|
|
228
|
+
container.innerHTML = '<div class="empty-state"><h2>No spans found</h2>' +
|
|
229
|
+
'<p>No agents recorded for case <code>' + esc(CASE_ID) + '</code>.</p>' +
|
|
230
|
+
'<p style="margin-top:8px;">Run a scenario first, then reopen this page.</p></div>';
|
|
231
|
+
return;
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
// ── Stats ──
|
|
235
|
+
document.getElementById('stats-row').style.display = '';
|
|
236
|
+
document.getElementById('legend').style.display = '';
|
|
237
|
+
document.getElementById('stat-agents').textContent = spans.length;
|
|
238
|
+
document.getElementById('stat-latency').textContent =
|
|
239
|
+
spans.reduce((s, n) => s + (n.latency_ms || 0), 0) + 'ms';
|
|
240
|
+
document.getElementById('stat-tokens').textContent =
|
|
241
|
+
spans.reduce((s, n) => s + (n.tokens_used || 0), 0);
|
|
242
|
+
const breaches = spans.filter(s => s.latency_threshold && s.latency_ms > s.latency_threshold).length;
|
|
243
|
+
document.getElementById('stat-breaches').textContent = breaches;
|
|
244
|
+
const piiCount = spans.filter(s => s.has_pii).length;
|
|
245
|
+
const piiEl = document.getElementById('stat-pii');
|
|
246
|
+
piiEl.textContent = piiCount ? piiCount + ' agent(s)' : 'Clean';
|
|
247
|
+
piiEl.className = 'value ' + (piiCount ? 'red' : 'green');
|
|
248
|
+
|
|
249
|
+
renderTree(spans, container);
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
/* ── Build tree hierarchy ──
|
|
253
|
+
Try span_id / parent_span_id first.
|
|
254
|
+
Fall back to AGENT_ROLES inference when those are empty. */
|
|
255
|
+
function buildTree(spans) {
|
|
256
|
+
const hasHierarchy = spans.some(s => s.span_id && s.parent_span_id);
|
|
257
|
+
if (hasHierarchy) return buildTreeFromSpans(spans);
|
|
258
|
+
return buildTreeFromRoles(spans);
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
function buildTreeFromSpans(spans) {
|
|
262
|
+
const bySpan = {};
|
|
263
|
+
let root = null;
|
|
264
|
+
for (const s of spans) {
|
|
265
|
+
const n = { ...s, children: [], _name: s.agent_name || 'Unknown',
|
|
266
|
+
_tier: (AGENT_ROLES[s.agent_name]||{}).tier || 'unknown' };
|
|
267
|
+
if (s.span_id) bySpan[s.span_id] = n;
|
|
268
|
+
}
|
|
269
|
+
for (const s of spans) {
|
|
270
|
+
const n = s.span_id ? bySpan[s.span_id] : { ...s, children: [], _name: s.agent_name||'', _tier: 'unknown' };
|
|
271
|
+
if (!s.parent_span_id || !bySpan[s.parent_span_id]) {
|
|
272
|
+
if (!root) root = n;
|
|
273
|
+
else { if (!root._wrap) { root = { _name: 'Trace', children: [root], _virtual: true, _wrap: true, latency_ms:0, tokens_used:0, _tier:'intake' }; } root.children.push(n); }
|
|
274
|
+
} else { bySpan[s.parent_span_id].children.push(n); }
|
|
275
|
+
}
|
|
276
|
+
return root || { _name: 'Empty', children: [], latency_ms:0, _tier:'unknown', _virtual:true };
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
function buildTreeFromRoles(spans) {
|
|
280
|
+
const nodes = spans.map(s => ({
|
|
281
|
+
...s,
|
|
282
|
+
_name: s.agent_name || 'Unknown',
|
|
283
|
+
_tier: (AGENT_ROLES[s.agent_name]||{}).tier || 'unknown',
|
|
284
|
+
_order: (AGENT_ROLES[s.agent_name]||{}).order ?? 99,
|
|
285
|
+
children: [],
|
|
286
|
+
}));
|
|
287
|
+
nodes.sort((a, b) => a._order - b._order || a.id - b.id);
|
|
288
|
+
|
|
289
|
+
let root = null;
|
|
290
|
+
const analysis = [], verdict = [];
|
|
291
|
+
let investigator = null;
|
|
292
|
+
|
|
293
|
+
for (const n of nodes) {
|
|
294
|
+
if (n._tier === 'intake' && !root) root = n;
|
|
295
|
+
else if (n._tier === 'analysis') analysis.push(n);
|
|
296
|
+
else if (n._tier === 'investigator' && !investigator) investigator = n;
|
|
297
|
+
else if (n._tier === 'verdict') verdict.push(n);
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
if (!root) {
|
|
301
|
+
root = { _name: 'Case Start', _tier: 'intake', children: [],
|
|
302
|
+
latency_ms: 0, tokens_used: 0, _virtual: true, agent_name: 'Case Start' };
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
// Fan-out: analysis agents are children of intake
|
|
306
|
+
if (analysis.length) {
|
|
307
|
+
root.children = analysis;
|
|
308
|
+
root._fanout = true;
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
// Investigator merges after all analysis agents
|
|
312
|
+
if (investigator) {
|
|
313
|
+
if (analysis.length) {
|
|
314
|
+
// Add investigator as sibling after analysis, connected via a merge marker
|
|
315
|
+
root.children.push(investigator);
|
|
316
|
+
investigator._merge = true;
|
|
317
|
+
} else {
|
|
318
|
+
root.children.push(investigator);
|
|
319
|
+
}
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
// Verdict is child of investigator (or root if no investigator)
|
|
323
|
+
const verdictParent = investigator || root;
|
|
324
|
+
verdictParent.children.push(...verdict);
|
|
325
|
+
|
|
326
|
+
// Unknowns
|
|
327
|
+
for (const n of nodes) {
|
|
328
|
+
if (n._tier === 'unknown' && n !== root) root.children.push(n);
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
return root;
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
/* ── D3 rendering with card-style agent boxes ── */
|
|
335
|
+
function renderTree(spans, container) {
|
|
336
|
+
container.innerHTML = '';
|
|
337
|
+
|
|
338
|
+
const rootData = buildTree(spans);
|
|
339
|
+
const NODE_W = 250, NODE_H = 108;
|
|
340
|
+
const margin = { top: 60, right: 80, bottom: 60, left: 80 };
|
|
341
|
+
|
|
342
|
+
const hierRoot = d3.hierarchy(rootData, d => d.children);
|
|
343
|
+
d3.tree().nodeSize([NODE_H + 30, NODE_W + 80])(hierRoot);
|
|
344
|
+
|
|
345
|
+
let x0 = Infinity, x1 = -Infinity, y1 = -Infinity;
|
|
346
|
+
hierRoot.each(d => { if (d.x < x0) x0 = d.x; if (d.x > x1) x1 = d.x; if (d.y > y1) y1 = d.y; });
|
|
347
|
+
|
|
348
|
+
const width = y1 + NODE_W + margin.left + margin.right + 100;
|
|
349
|
+
const height = (x1 - x0) + NODE_H + margin.top + margin.bottom + 40;
|
|
350
|
+
|
|
351
|
+
const svg = d3.select(container).append('svg')
|
|
352
|
+
.attr('width', Math.max(width, container.clientWidth))
|
|
353
|
+
.attr('height', Math.max(height, 500));
|
|
354
|
+
|
|
355
|
+
const g = svg.append('g')
|
|
356
|
+
.attr('transform', 'translate(' + (margin.left + 20) + ',' + (margin.top - x0 + 20) + ')');
|
|
357
|
+
|
|
358
|
+
// ── Links ──
|
|
359
|
+
g.selectAll('.tree-link')
|
|
360
|
+
.data(hierRoot.links())
|
|
361
|
+
.join('path')
|
|
362
|
+
.attr('class', d => {
|
|
363
|
+
const pTier = d.source.data._tier;
|
|
364
|
+
const cTier = d.target.data._tier;
|
|
365
|
+
return 'tree-link' + ((pTier === 'intake' && cTier === 'analysis') ? ' fanout' : '');
|
|
366
|
+
})
|
|
367
|
+
.attr('d', d3.linkHorizontal().x(d => d.y).y(d => d.x));
|
|
368
|
+
|
|
369
|
+
// ── Node groups ──
|
|
370
|
+
const ng = g.selectAll('.agent-node')
|
|
371
|
+
.data(hierRoot.descendants())
|
|
372
|
+
.join('g')
|
|
373
|
+
.attr('class', 'agent-node')
|
|
374
|
+
.attr('transform', d => 'translate(' + (d.y - NODE_W/2) + ',' + (d.x - NODE_H/2) + ')')
|
|
375
|
+
.style('cursor', d => d.data._virtual ? 'default' : 'pointer')
|
|
376
|
+
.on('click', (ev, d) => { if (!d.data._virtual) showDetail(d.data); });
|
|
377
|
+
|
|
378
|
+
// Card background
|
|
379
|
+
ng.append('rect')
|
|
380
|
+
.attr('width', NODE_W).attr('height', NODE_H).attr('rx', 8)
|
|
381
|
+
.attr('fill', d => d.data._virtual ? 'transparent' : 'var(--surface2)')
|
|
382
|
+
.attr('stroke', d => d.data._virtual ? 'var(--border)' : (TIER_COLORS[d.data._tier] || TIER_COLORS.unknown))
|
|
383
|
+
.attr('stroke-width', 2)
|
|
384
|
+
.attr('stroke-dasharray', d => d.data._virtual ? '4,4' : 'none');
|
|
385
|
+
|
|
386
|
+
// Left tier strip
|
|
387
|
+
ng.filter(d => !d.data._virtual).append('rect')
|
|
388
|
+
.attr('width', 4).attr('height', NODE_H).attr('rx', 2)
|
|
389
|
+
.attr('fill', d => TIER_COLORS[d.data._tier] || TIER_COLORS.unknown);
|
|
390
|
+
|
|
391
|
+
// Agent name
|
|
392
|
+
ng.append('text')
|
|
393
|
+
.attr('x', 14).attr('y', 22)
|
|
394
|
+
.attr('font-size', '12px').attr('font-weight', '700').attr('fill', 'var(--text)')
|
|
395
|
+
.text(d => d.data._name);
|
|
396
|
+
|
|
397
|
+
// Status badge (top-right)
|
|
398
|
+
ng.filter(d => !d.data._virtual).append('text')
|
|
399
|
+
.attr('x', NODE_W - 12).attr('y', 22)
|
|
400
|
+
.attr('text-anchor', 'end').attr('font-size', '9px').attr('font-weight', '700')
|
|
401
|
+
.attr('fill', d => statusColor(d.data.test_status))
|
|
402
|
+
.text(d => (d.data.test_status || 'NEW').toUpperCase());
|
|
403
|
+
|
|
404
|
+
// Latency line
|
|
405
|
+
ng.filter(d => !d.data._virtual).append('text')
|
|
406
|
+
.attr('x', 14).attr('y', 42).attr('font-size', '11px').attr('font-weight', '600')
|
|
407
|
+
.attr('fill', d => {
|
|
408
|
+
if (d.data.latency_threshold && (d.data.latency_ms||0) > d.data.latency_threshold) return 'var(--red)';
|
|
409
|
+
return 'var(--green)';
|
|
410
|
+
})
|
|
411
|
+
.text(d => {
|
|
412
|
+
let t = '\u23F1 ' + (d.data.latency_ms || 0) + 'ms';
|
|
413
|
+
if (d.data.latency_threshold) t += ' / ' + d.data.latency_threshold + 'ms';
|
|
414
|
+
return t;
|
|
415
|
+
});
|
|
416
|
+
|
|
417
|
+
// Tokens line
|
|
418
|
+
ng.filter(d => !d.data._virtual).append('text')
|
|
419
|
+
.attr('x', 14).attr('y', 58).attr('font-size', '10px').attr('fill', 'var(--muted)')
|
|
420
|
+
.text(d => '\uD83D\uDD24 ' + (d.data.tokens_used || 0) + ' tokens');
|
|
421
|
+
|
|
422
|
+
// PII indicator
|
|
423
|
+
ng.filter(d => !d.data._virtual).append('text')
|
|
424
|
+
.attr('x', 14).attr('y', 74).attr('font-size', '10px')
|
|
425
|
+
.attr('fill', d => d.data.has_pii ? 'var(--red)' : 'var(--green)')
|
|
426
|
+
.text(d => d.data.has_pii
|
|
427
|
+
? '\uD83D\uDD34 PII: ' + (d.data.pii_types || 'detected')
|
|
428
|
+
: '\uD83D\uDFE2 No PII');
|
|
429
|
+
|
|
430
|
+
// Latency bar background
|
|
431
|
+
ng.filter(d => !d.data._virtual && d.data.latency_threshold).append('rect')
|
|
432
|
+
.attr('x', 10).attr('y', NODE_H - 14)
|
|
433
|
+
.attr('width', NODE_W - 20).attr('height', 5).attr('rx', 2)
|
|
434
|
+
.attr('fill', 'var(--border)');
|
|
435
|
+
|
|
436
|
+
// Latency bar fill
|
|
437
|
+
ng.filter(d => !d.data._virtual && d.data.latency_threshold).append('rect')
|
|
438
|
+
.attr('x', 10).attr('y', NODE_H - 14)
|
|
439
|
+
.attr('width', d => {
|
|
440
|
+
const pct = Math.min((d.data.latency_ms||0) / d.data.latency_threshold, 1);
|
|
441
|
+
return (NODE_W - 20) * pct;
|
|
442
|
+
})
|
|
443
|
+
.attr('height', 5).attr('rx', 2)
|
|
444
|
+
.attr('fill', d => (d.data.latency_ms||0) > d.data.latency_threshold ? 'var(--red)' : 'var(--green)');
|
|
445
|
+
|
|
446
|
+
// Fan-out arrow label
|
|
447
|
+
ng.filter(d => d.data._fanout).append('text')
|
|
448
|
+
.attr('x', NODE_W + 10).attr('y', NODE_H / 2 + 4)
|
|
449
|
+
.attr('font-size', '10px').attr('fill', 'var(--cyan)').attr('font-weight', '600')
|
|
450
|
+
.text('fan-out \u2192');
|
|
451
|
+
|
|
452
|
+
// Merge arrow label
|
|
453
|
+
ng.filter(d => d.data._merge).append('text')
|
|
454
|
+
.attr('x', -40).attr('y', NODE_H / 2 + 4)
|
|
455
|
+
.attr('font-size', '10px').attr('fill', 'var(--purple)').attr('font-weight', '600')
|
|
456
|
+
.text('\u21E3 merge');
|
|
457
|
+
}
|
|
458
|
+
|
|
459
|
+
function statusColor(s) {
|
|
460
|
+
var l = (s||'').toLowerCase();
|
|
461
|
+
if (l==='pass'||l==='evaluated') return 'var(--green)';
|
|
462
|
+
if (l==='fail'||l==='block') return 'var(--red)';
|
|
463
|
+
if (l==='flag'||l==='monitor') return 'var(--amber)';
|
|
464
|
+
return 'var(--accent)';
|
|
465
|
+
}
|
|
466
|
+
|
|
467
|
+
function showDetail(data) {
|
|
468
|
+
var panel = document.getElementById('detail-panel');
|
|
469
|
+
var content = document.getElementById('detail-content');
|
|
470
|
+
var threshold = data.latency_threshold
|
|
471
|
+
? data.latency_threshold + 'ms ' + ((data.latency_ms||0) > data.latency_threshold ? '\u26A0\uFE0F EXCEEDED' : '\u2705 OK')
|
|
472
|
+
: 'Not set';
|
|
473
|
+
var piiHtml = data.has_pii
|
|
474
|
+
? '<span class="badge badge-pii">\u26A0 PII DETECTED</span> ' + esc(data.pii_types || '')
|
|
475
|
+
: '<span class="badge badge-clean">\u2713 Clean</span>';
|
|
476
|
+
var statusCls = statusBadgeClass(data.test_status);
|
|
477
|
+
|
|
478
|
+
content.innerHTML =
|
|
479
|
+
'<h2>' + esc(data.agent_name || data._name) + '</h2>' +
|
|
480
|
+
'<div class="field"><div class="field-label">Status</div><span class="badge ' + statusCls + '">' + esc(data.test_status||'NEW') + '</span></div>' +
|
|
481
|
+
'<div class="field"><div class="field-label">Trace ID</div><div style="font-size:0.8rem;word-break:break-all;">' + esc(data.trace_id||'\u2014') + '</div></div>' +
|
|
482
|
+
'<div class="field"><div class="field-label">Span ID</div><div style="font-size:0.8rem;word-break:break-all;">' + esc(data.span_id||'\u2014') + '</div></div>' +
|
|
483
|
+
'<div class="field"><div class="field-label">Latency</div><div>' + (data.latency_ms||0) + 'ms</div></div>' +
|
|
484
|
+
'<div class="field"><div class="field-label">Threshold</div><div>' + threshold + '</div></div>' +
|
|
485
|
+
'<div class="field"><div class="field-label">Tokens Used</div><div>' + (data.tokens_used||0) + '</div></div>' +
|
|
486
|
+
'<div class="field"><div class="field-label">PII Handling</div><div>' + piiHtml + '</div></div>' +
|
|
487
|
+
'<div class="field"><div class="field-label">Input</div><pre>' + esc(data.agent_input||'\u2014') + '</pre></div>' +
|
|
488
|
+
'<div class="field"><div class="field-label">Output</div><pre>' + esc(data.agent_output||'\u2014') + '</pre></div>' +
|
|
489
|
+
'<div class="field"><div class="field-label">Error</div><div>' + esc(data.error_message||'\u2014') + '</div></div>' +
|
|
490
|
+
'<div class="field"><div class="field-label">Created At</div><div>' + esc(data.created_at||'\u2014') + '</div></div>';
|
|
491
|
+
panel.classList.add('active');
|
|
492
|
+
}
|
|
493
|
+
|
|
494
|
+
function closePanel() { document.getElementById('detail-panel').classList.remove('active'); }
|
|
495
|
+
|
|
496
|
+
function statusBadgeClass(s) {
|
|
497
|
+
var l = (s||'').toLowerCase();
|
|
498
|
+
if (l==='pass'||l==='evaluated') return 'badge-pass';
|
|
499
|
+
if (l==='fail'||l==='block') return 'badge-block';
|
|
500
|
+
if (l==='flag'||l==='monitor') return 'badge-flag';
|
|
501
|
+
return 'badge-new';
|
|
502
|
+
}
|
|
503
|
+
|
|
504
|
+
document.addEventListener('keydown', function(e) { if (e.key==='Escape') closePanel(); });
|
|
505
|
+
loadTree();
|
|
506
|
+
</script>
|
|
507
|
+
</body>
|
|
508
|
+
</html>
|
|
@@ -11,4 +11,5 @@ agenteval_sdk.egg-info/dependency_links.txt
|
|
|
11
11
|
agenteval_sdk.egg-info/entry_points.txt
|
|
12
12
|
agenteval_sdk.egg-info/requires.txt
|
|
13
13
|
agenteval_sdk.egg-info/top_level.txt
|
|
14
|
-
agenteval_sdk/templates/dashboard.html
|
|
14
|
+
agenteval_sdk/templates/dashboard.html
|
|
15
|
+
agenteval_sdk/templates/trace_tree.html
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|