agenteval-sdk 0.3.2__tar.gz → 0.3.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: agenteval_sdk
3
- Version: 0.3.2
3
+ Version: 0.3.3
4
4
  Summary: Telemetry and Observability SDK for AI Agents — with Trace Tree MRI
5
5
  License-Expression: MIT
6
6
  Keywords: ai,agents,observability,tracing,llm
@@ -12,7 +12,7 @@ from .sdk import (
12
12
  current_latency_threshold,
13
13
  )
14
14
 
15
- __version__ = "0.3.2"
15
+ __version__ = "0.3.3"
16
16
 
17
17
  __all__ = [
18
18
  "AgentEvalClient",
@@ -26,6 +26,16 @@ def create_app(db_path: Optional[str] = None) -> Flask:
26
26
  def index():
27
27
  return render_template("dashboard.html")
28
28
 
29
+ @app.route("/trace-tree")
30
+ def trace_tree():
31
+ case_id = request.args.get("case_id", "")
32
+ return render_template("trace_tree.html", case_id=case_id)
33
+
34
+ @app.route("/api/trace-tree/<trace_id>")
35
+ def api_trace_tree(trace_id):
36
+ spans = store.get_by_trace_id(trace_id)
37
+ return jsonify(spans)
38
+
29
39
  @app.route("/api/traces")
30
40
  def api_traces():
31
41
  limit = request.args.get("limit", 100, type=int)
@@ -39,12 +39,21 @@ class LocalStore:
39
39
  error_message TEXT DEFAULT '',
40
40
  project_id TEXT DEFAULT 'default',
41
41
  synced INTEGER DEFAULT 0,
42
- created_at TEXT DEFAULT (datetime('now'))
42
+ created_at TEXT DEFAULT (datetime('now')),
43
+ span_id TEXT DEFAULT '',
44
+ parent_span_id TEXT DEFAULT '',
45
+ latency_threshold INTEGER
43
46
  );
44
47
  CREATE INDEX IF NOT EXISTS idx_traces_synced ON traces(synced);
45
48
  CREATE INDEX IF NOT EXISTS idx_traces_trace_id ON traces(trace_id);
46
49
  """
47
50
 
51
+ _MIGRATIONS = [
52
+ "ALTER TABLE traces ADD COLUMN span_id TEXT DEFAULT ''",
53
+ "ALTER TABLE traces ADD COLUMN parent_span_id TEXT DEFAULT ''",
54
+ "ALTER TABLE traces ADD COLUMN latency_threshold INTEGER",
55
+ ]
56
+
48
57
  def __init__(self, db_path: Optional[str] = None) -> None:
49
58
  self._db_path = db_path or _default_db_path()
50
59
  self._lock = threading.Lock()
@@ -61,6 +70,12 @@ class LocalStore:
61
70
  conn = self._connect()
62
71
  try:
63
72
  conn.executescript(self._DDL)
73
+ for stmt in self._MIGRATIONS:
74
+ try:
75
+ conn.execute(stmt)
76
+ conn.commit()
77
+ except sqlite3.OperationalError:
78
+ pass # column already exists
64
79
  finally:
65
80
  conn.close()
66
81
 
@@ -73,8 +88,9 @@ class LocalStore:
73
88
  """INSERT INTO traces
74
89
  (trace_id, agent_name, agent_input, agent_output,
75
90
  latency_ms, tokens_used, test_status, error_message,
76
- project_id, synced)
77
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, 0)""",
91
+ project_id, synced, span_id, parent_span_id,
92
+ latency_threshold)
93
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, 0, ?, ?, ?)""",
78
94
  (
79
95
  trace.get("trace_id", ""),
80
96
  trace.get("agent_name", ""),
@@ -85,6 +101,9 @@ class LocalStore:
85
101
  trace.get("test_status", "NEW"),
86
102
  trace.get("error_message", ""),
87
103
  trace.get("project_id", "default"),
104
+ trace.get("span_id", ""),
105
+ trace.get("parent_span_id", ""),
106
+ trace.get("latency_threshold"),
88
107
  ),
89
108
  )
90
109
  conn.commit()
@@ -150,6 +169,19 @@ class LocalStore:
150
169
  finally:
151
170
  conn.close()
152
171
 
172
+ def get_by_trace_id(self, trace_id: str) -> List[Dict[str, Any]]:
173
+ """Return all spans for a given trace_id, ordered by id (creation order)."""
174
+ with self._lock:
175
+ conn = self._connect()
176
+ try:
177
+ rows = conn.execute(
178
+ "SELECT * FROM traces WHERE trace_id = ? ORDER BY id ASC",
179
+ (trace_id,),
180
+ ).fetchall()
181
+ return [dict(r) for r in rows]
182
+ finally:
183
+ conn.close()
184
+
153
185
  @property
154
186
  def path(self) -> str:
155
187
  return self._db_path
@@ -0,0 +1,418 @@
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>Trace Tree MRI — AgentEval</title>
7
+ <script src="https://d3js.org/d3.v7.min.js"></script>
8
+ <style>
9
+ :root {
10
+ --bg: #0f1117;
11
+ --surface: #1a1d27;
12
+ --border: #2a2d3a;
13
+ --text: #e4e4e7;
14
+ --muted: #71717a;
15
+ --accent: #6366f1;
16
+ --accent-hover: #818cf8;
17
+ --green: #22c55e;
18
+ --red: #ef4444;
19
+ --amber: #f59e0b;
20
+ }
21
+ * { box-sizing: border-box; margin: 0; padding: 0; }
22
+ body {
23
+ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, monospace;
24
+ background: var(--bg);
25
+ color: var(--text);
26
+ line-height: 1.5;
27
+ }
28
+ .container { max-width: 1400px; margin: 0 auto; padding: 24px; }
29
+
30
+ header {
31
+ display: flex;
32
+ justify-content: space-between;
33
+ align-items: center;
34
+ padding-bottom: 20px;
35
+ border-bottom: 1px solid var(--border);
36
+ margin-bottom: 24px;
37
+ }
38
+ header h1 { font-size: 1.4rem; font-weight: 600; }
39
+ header h1 span { color: var(--accent); }
40
+ .header-actions { display: flex; gap: 10px; align-items: center; }
41
+ .btn {
42
+ background: var(--accent);
43
+ color: #fff;
44
+ border: none;
45
+ border-radius: 6px;
46
+ padding: 8px 16px;
47
+ font-size: 0.85rem;
48
+ cursor: pointer;
49
+ text-decoration: none;
50
+ transition: background 0.15s;
51
+ }
52
+ .btn:hover { background: var(--accent-hover); }
53
+ .btn-secondary { background: var(--surface); border: 1px solid var(--border); color: var(--text); }
54
+ .btn-secondary:hover { background: var(--border); }
55
+
56
+ .stats {
57
+ display: grid;
58
+ grid-template-columns: repeat(auto-fit, minmax(180px, 1fr));
59
+ gap: 16px;
60
+ margin-bottom: 24px;
61
+ }
62
+ .stat-card {
63
+ background: var(--surface);
64
+ border: 1px solid var(--border);
65
+ border-radius: 10px;
66
+ padding: 18px;
67
+ }
68
+ .stat-card .label { font-size: 0.75rem; color: var(--muted); text-transform: uppercase; letter-spacing: 0.05em; }
69
+ .stat-card .value { font-size: 1.75rem; font-weight: 700; margin-top: 4px; }
70
+ .stat-card .value.green { color: var(--green); }
71
+ .stat-card .value.red { color: var(--red); }
72
+ .stat-card .value.amber { color: var(--amber); }
73
+
74
+ #tree-container {
75
+ background: var(--surface);
76
+ border: 1px solid var(--border);
77
+ border-radius: 10px;
78
+ min-height: 500px;
79
+ overflow: auto;
80
+ position: relative;
81
+ }
82
+
83
+ .node circle {
84
+ stroke-width: 2.5px;
85
+ cursor: pointer;
86
+ }
87
+ .node text {
88
+ font-size: 12px;
89
+ fill: var(--text);
90
+ }
91
+ .link {
92
+ fill: none;
93
+ stroke: var(--border);
94
+ stroke-width: 2px;
95
+ }
96
+
97
+ .tooltip {
98
+ position: absolute;
99
+ background: var(--surface);
100
+ border: 1px solid var(--border);
101
+ border-radius: 8px;
102
+ padding: 14px;
103
+ font-size: 0.8rem;
104
+ max-width: 400px;
105
+ pointer-events: none;
106
+ z-index: 200;
107
+ display: none;
108
+ }
109
+ .tooltip .tt-label { font-size: 0.7rem; color: var(--muted); text-transform: uppercase; margin-top: 6px; }
110
+ .tooltip .tt-value { word-break: break-all; }
111
+ .tooltip .tt-value pre {
112
+ background: var(--bg);
113
+ border: 1px solid var(--border);
114
+ border-radius: 4px;
115
+ padding: 6px;
116
+ font-size: 0.75rem;
117
+ white-space: pre-wrap;
118
+ max-height: 100px;
119
+ overflow-y: auto;
120
+ margin-top: 2px;
121
+ }
122
+
123
+ .empty-state {
124
+ text-align: center;
125
+ padding: 80px 20px;
126
+ color: var(--muted);
127
+ }
128
+ .empty-state h2 { font-size: 1.2rem; margin-bottom: 12px; color: var(--text); }
129
+
130
+ .legend {
131
+ display: flex;
132
+ gap: 20px;
133
+ margin-bottom: 16px;
134
+ font-size: 0.8rem;
135
+ color: var(--muted);
136
+ }
137
+ .legend-item { display: flex; align-items: center; gap: 6px; }
138
+ .legend-dot {
139
+ width: 12px;
140
+ height: 12px;
141
+ border-radius: 50%;
142
+ display: inline-block;
143
+ }
144
+ </style>
145
+ </head>
146
+ <body>
147
+ <div class="container">
148
+ <header>
149
+ <h1>🔬 <span>Trace Tree MRI</span></h1>
150
+ <div class="header-actions">
151
+ <span id="trace-id-display" style="font-size:0.8rem; color:var(--muted);"></span>
152
+ <a href="/" class="btn btn-secondary">← Dashboard</a>
153
+ </div>
154
+ </header>
155
+
156
+ <div class="stats" id="stats-row" style="display:none;">
157
+ <div class="stat-card">
158
+ <div class="label">Case / Trace ID</div>
159
+ <div class="value" id="stat-trace" style="font-size:0.85rem; word-break:break-all;">—</div>
160
+ </div>
161
+ <div class="stat-card">
162
+ <div class="label">Total Spans</div>
163
+ <div class="value" id="stat-spans">—</div>
164
+ </div>
165
+ <div class="stat-card">
166
+ <div class="label">Total Latency</div>
167
+ <div class="value amber" id="stat-latency">—</div>
168
+ </div>
169
+ <div class="stat-card">
170
+ <div class="label">Threshold Breaches</div>
171
+ <div class="value red" id="stat-breaches">—</div>
172
+ </div>
173
+ </div>
174
+
175
+ <div class="legend" id="legend" style="display:none;">
176
+ <div class="legend-item"><span class="legend-dot" style="background:var(--green);"></span> Within threshold</div>
177
+ <div class="legend-item"><span class="legend-dot" style="background:var(--red);"></span> Exceeded threshold</div>
178
+ <div class="legend-item"><span class="legend-dot" style="background:var(--accent);"></span> No threshold set</div>
179
+ </div>
180
+
181
+ <div id="tree-container"></div>
182
+ <div class="tooltip" id="tooltip"></div>
183
+ </div>
184
+
185
+ <script>
186
+ const CASE_ID = "{{ case_id }}";
187
+
188
+ function esc(s) {
189
+ const d = document.createElement('div');
190
+ d.textContent = s || '';
191
+ return d.innerHTML;
192
+ }
193
+
194
+ async function loadTree() {
195
+ const container = document.getElementById('tree-container');
196
+ if (!CASE_ID) {
197
+ container.innerHTML = `
198
+ <div class="empty-state">
199
+ <h2>No Case ID provided</h2>
200
+ <p>Add <code>?case_id=&lt;trace_id&gt;</code> to the URL to view a trace tree.</p>
201
+ </div>`;
202
+ return;
203
+ }
204
+
205
+ document.getElementById('stat-trace').textContent = CASE_ID;
206
+
207
+ let spans;
208
+ try {
209
+ const res = await fetch(`/api/trace-tree/${encodeURIComponent(CASE_ID)}`);
210
+ spans = await res.json();
211
+ } catch (e) {
212
+ container.innerHTML = `<div class="empty-state"><h2>Failed to load trace data</h2><p>${esc(e.message)}</p></div>`;
213
+ return;
214
+ }
215
+
216
+ if (!spans.length) {
217
+ container.innerHTML = `
218
+ <div class="empty-state">
219
+ <h2>No spans found</h2>
220
+ <p>No trace spans found for case ID: <code>${esc(CASE_ID)}</code></p>
221
+ <p style="margin-top:8px;">Make sure you are running the dashboard from the same environment where traces were logged.</p>
222
+ </div>`;
223
+ return;
224
+ }
225
+
226
+ // Show stats
227
+ document.getElementById('stats-row').style.display = '';
228
+ document.getElementById('legend').style.display = '';
229
+ document.getElementById('stat-spans').textContent = spans.length;
230
+
231
+ const totalLatency = spans.reduce((s, n) => s + (n.latency_ms || 0), 0);
232
+ document.getElementById('stat-latency').textContent = totalLatency + 'ms';
233
+
234
+ const breaches = spans.filter(s =>
235
+ s.latency_threshold && s.latency_ms > s.latency_threshold
236
+ ).length;
237
+ document.getElementById('stat-breaches').textContent = breaches;
238
+
239
+ // Build tree from flat span list
240
+ const root = buildTree(spans);
241
+ renderD3Tree(root, container);
242
+ }
243
+
244
+ function buildTree(spans) {
245
+ const bySpan = {};
246
+ let root = null;
247
+
248
+ // Index by span_id
249
+ for (const s of spans) {
250
+ const node = {
251
+ ...s,
252
+ children: [],
253
+ _name: s.agent_name || 'Unknown',
254
+ };
255
+ if (s.span_id) bySpan[s.span_id] = node;
256
+ }
257
+
258
+ // Link parent -> children
259
+ for (const s of spans) {
260
+ const node = s.span_id ? bySpan[s.span_id] : { ...s, children: [], _name: s.agent_name || 'Unknown' };
261
+ if (!s.parent_span_id || !bySpan[s.parent_span_id]) {
262
+ // Root node (no parent or parent not in this set)
263
+ if (!root) root = node;
264
+ else {
265
+ // Multiple roots — wrap in a virtual root
266
+ if (!root._virtual) {
267
+ root = { _name: 'Trace', children: [root], _virtual: true, latency_ms: 0, tokens_used: 0 };
268
+ }
269
+ root.children.push(node);
270
+ }
271
+ } else {
272
+ bySpan[s.parent_span_id].children.push(node);
273
+ }
274
+ }
275
+
276
+ return root || { _name: 'Empty', children: [], latency_ms: 0 };
277
+ }
278
+
279
+ function renderD3Tree(rootData, container) {
280
+ container.innerHTML = '';
281
+
282
+ const margin = { top: 40, right: 200, bottom: 40, left: 120 };
283
+ const nodeW = 220;
284
+ const nodeH = 70;
285
+
286
+ const hierRoot = d3.hierarchy(rootData, d => d.children);
287
+ const treeLayout = d3.tree().nodeSize([nodeH + 10, nodeW]);
288
+ treeLayout(hierRoot);
289
+
290
+ // Compute bounds
291
+ let x0 = Infinity, x1 = -Infinity;
292
+ hierRoot.each(d => {
293
+ if (d.x < x0) x0 = d.x;
294
+ if (d.x > x1) x1 = d.x;
295
+ });
296
+
297
+ const width = (hierRoot.height + 1) * nodeW + margin.left + margin.right;
298
+ const height = (x1 - x0) + margin.top + margin.bottom;
299
+
300
+ const svg = d3.select(container)
301
+ .append('svg')
302
+ .attr('width', width)
303
+ .attr('height', height)
304
+ .style('min-width', '100%');
305
+
306
+ const g = svg.append('g')
307
+ .attr('transform', `translate(${margin.left},${margin.top - x0})`);
308
+
309
+ // Links
310
+ g.selectAll('.link')
311
+ .data(hierRoot.links())
312
+ .join('path')
313
+ .attr('class', 'link')
314
+ .attr('d', d3.linkHorizontal()
315
+ .x(d => d.y)
316
+ .y(d => d.x)
317
+ );
318
+
319
+ // Nodes
320
+ const tooltip = document.getElementById('tooltip');
321
+ const nodes = g.selectAll('.node')
322
+ .data(hierRoot.descendants())
323
+ .join('g')
324
+ .attr('class', 'node')
325
+ .attr('transform', d => `translate(${d.y},${d.x})`);
326
+
327
+ nodes.append('circle')
328
+ .attr('r', 8)
329
+ .attr('fill', d => nodeColor(d.data))
330
+ .attr('stroke', d => nodeStroke(d.data))
331
+ .on('mouseover', (event, d) => showTooltip(event, d.data, tooltip))
332
+ .on('mousemove', (event) => moveTooltip(event, tooltip))
333
+ .on('mouseout', () => hideTooltip(tooltip));
334
+
335
+ // Label: agent name
336
+ nodes.append('text')
337
+ .attr('dy', -14)
338
+ .attr('x', 0)
339
+ .attr('text-anchor', 'middle')
340
+ .style('font-weight', '600')
341
+ .text(d => d.data._name);
342
+
343
+ // Sub-label: latency
344
+ nodes.append('text')
345
+ .attr('dy', 22)
346
+ .attr('x', 0)
347
+ .attr('text-anchor', 'middle')
348
+ .style('font-size', '10px')
349
+ .style('fill', d => {
350
+ const data = d.data;
351
+ if (data.latency_threshold && data.latency_ms > data.latency_threshold)
352
+ return 'var(--red)';
353
+ return 'var(--muted)';
354
+ })
355
+ .text(d => {
356
+ const ms = d.data.latency_ms || 0;
357
+ let label = ms + 'ms';
358
+ if (d.data.latency_threshold) {
359
+ label += ` / ${d.data.latency_threshold}ms`;
360
+ }
361
+ return label;
362
+ });
363
+ }
364
+
365
+ function nodeColor(d) {
366
+ if (d._virtual) return 'var(--muted)';
367
+ if (!d.latency_threshold) return 'var(--accent)';
368
+ return d.latency_ms > d.latency_threshold ? 'var(--red)' : 'var(--green)';
369
+ }
370
+
371
+ function nodeStroke(d) {
372
+ if (d._virtual) return 'var(--border)';
373
+ if (!d.latency_threshold) return '#4f46e5';
374
+ return d.latency_ms > d.latency_threshold ? '#dc2626' : '#16a34a';
375
+ }
376
+
377
+ function showTooltip(event, data, el) {
378
+ const threshold = data.latency_threshold
379
+ ? `${data.latency_threshold}ms ${data.latency_ms > data.latency_threshold ? '⚠️ EXCEEDED' : '✅ OK'}`
380
+ : 'Not set';
381
+ el.innerHTML = `
382
+ <div style="font-weight:700; font-size:0.95rem; margin-bottom:8px;">${esc(data._name)}</div>
383
+ <div class="tt-label">Span ID</div>
384
+ <div class="tt-value">${esc(data.span_id || '—')}</div>
385
+ <div class="tt-label">Latency</div>
386
+ <div class="tt-value">${data.latency_ms || 0}ms</div>
387
+ <div class="tt-label">Threshold</div>
388
+ <div class="tt-value">${threshold}</div>
389
+ <div class="tt-label">Tokens</div>
390
+ <div class="tt-value">${data.tokens_used || 0}</div>
391
+ <div class="tt-label">Status</div>
392
+ <div class="tt-value">${esc(data.test_status || '—')}</div>
393
+ <div class="tt-label">Input</div>
394
+ <div class="tt-value"><pre>${esc(truncate(data.agent_input, 200))}</pre></div>
395
+ <div class="tt-label">Output</div>
396
+ <div class="tt-value"><pre>${esc(truncate(data.agent_output, 200))}</pre></div>
397
+ `;
398
+ el.style.display = 'block';
399
+ moveTooltip(event, el);
400
+ }
401
+
402
+ function moveTooltip(event, el) {
403
+ el.style.left = (event.pageX + 16) + 'px';
404
+ el.style.top = (event.pageY - 10) + 'px';
405
+ }
406
+
407
+ function hideTooltip(el) {
408
+ el.style.display = 'none';
409
+ }
410
+
411
+ function truncate(s, n) {
412
+ return (s || '').length > n ? (s || '').substring(0, n) + '…' : (s || '');
413
+ }
414
+
415
+ loadTree();
416
+ </script>
417
+ </body>
418
+ </html>
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: agenteval_sdk
3
- Version: 0.3.2
3
+ Version: 0.3.3
4
4
  Summary: Telemetry and Observability SDK for AI Agents — with Trace Tree MRI
5
5
  License-Expression: MIT
6
6
  Keywords: ai,agents,observability,tracing,llm
@@ -11,4 +11,5 @@ agenteval_sdk.egg-info/dependency_links.txt
11
11
  agenteval_sdk.egg-info/entry_points.txt
12
12
  agenteval_sdk.egg-info/requires.txt
13
13
  agenteval_sdk.egg-info/top_level.txt
14
- agenteval_sdk/templates/dashboard.html
14
+ agenteval_sdk/templates/dashboard.html
15
+ agenteval_sdk/templates/trace_tree.html
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "agenteval_sdk"
7
- version = "0.3.2"
7
+ version = "0.3.3"
8
8
  description = "Telemetry and Observability SDK for AI Agents — with Trace Tree MRI"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.8"
File without changes
File without changes