htmlgraph 0.24.2__py3-none-any.whl → 0.26.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (112) hide show
  1. htmlgraph/__init__.py +20 -1
  2. htmlgraph/agent_detection.py +26 -10
  3. htmlgraph/analytics/cross_session.py +4 -3
  4. htmlgraph/analytics/work_type.py +52 -16
  5. htmlgraph/analytics_index.py +51 -19
  6. htmlgraph/api/__init__.py +3 -0
  7. htmlgraph/api/main.py +2263 -0
  8. htmlgraph/api/static/htmx.min.js +1 -0
  9. htmlgraph/api/static/style-redesign.css +1344 -0
  10. htmlgraph/api/static/style.css +1079 -0
  11. htmlgraph/api/templates/dashboard-redesign.html +812 -0
  12. htmlgraph/api/templates/dashboard.html +794 -0
  13. htmlgraph/api/templates/partials/activity-feed-hierarchical.html +326 -0
  14. htmlgraph/api/templates/partials/activity-feed.html +1020 -0
  15. htmlgraph/api/templates/partials/agents-redesign.html +317 -0
  16. htmlgraph/api/templates/partials/agents.html +317 -0
  17. htmlgraph/api/templates/partials/event-traces.html +373 -0
  18. htmlgraph/api/templates/partials/features-kanban-redesign.html +509 -0
  19. htmlgraph/api/templates/partials/features.html +509 -0
  20. htmlgraph/api/templates/partials/metrics-redesign.html +346 -0
  21. htmlgraph/api/templates/partials/metrics.html +346 -0
  22. htmlgraph/api/templates/partials/orchestration-redesign.html +443 -0
  23. htmlgraph/api/templates/partials/orchestration.html +163 -0
  24. htmlgraph/api/templates/partials/spawners.html +375 -0
  25. htmlgraph/atomic_ops.py +560 -0
  26. htmlgraph/builders/base.py +55 -1
  27. htmlgraph/builders/bug.py +17 -2
  28. htmlgraph/builders/chore.py +17 -2
  29. htmlgraph/builders/epic.py +17 -2
  30. htmlgraph/builders/feature.py +25 -2
  31. htmlgraph/builders/phase.py +17 -2
  32. htmlgraph/builders/spike.py +27 -2
  33. htmlgraph/builders/track.py +14 -0
  34. htmlgraph/cigs/__init__.py +4 -0
  35. htmlgraph/cigs/reporter.py +818 -0
  36. htmlgraph/cli.py +1427 -401
  37. htmlgraph/cli_commands/__init__.py +1 -0
  38. htmlgraph/cli_commands/feature.py +195 -0
  39. htmlgraph/cli_framework.py +115 -0
  40. htmlgraph/collections/__init__.py +2 -0
  41. htmlgraph/collections/base.py +21 -0
  42. htmlgraph/collections/session.py +189 -0
  43. htmlgraph/collections/spike.py +7 -1
  44. htmlgraph/collections/task_delegation.py +236 -0
  45. htmlgraph/collections/traces.py +482 -0
  46. htmlgraph/config.py +113 -0
  47. htmlgraph/converter.py +41 -0
  48. htmlgraph/cost_analysis/__init__.py +5 -0
  49. htmlgraph/cost_analysis/analyzer.py +438 -0
  50. htmlgraph/dashboard.html +3356 -492
  51. htmlgraph-0.24.2.data/data/htmlgraph/dashboard.html → htmlgraph/dashboard.html.backup +2246 -248
  52. htmlgraph/dashboard.html.bak +7181 -0
  53. htmlgraph/dashboard.html.bak2 +7231 -0
  54. htmlgraph/dashboard.html.bak3 +7232 -0
  55. htmlgraph/db/__init__.py +38 -0
  56. htmlgraph/db/queries.py +790 -0
  57. htmlgraph/db/schema.py +1584 -0
  58. htmlgraph/deploy.py +26 -27
  59. htmlgraph/docs/API_REFERENCE.md +841 -0
  60. htmlgraph/docs/HTTP_API.md +750 -0
  61. htmlgraph/docs/INTEGRATION_GUIDE.md +752 -0
  62. htmlgraph/docs/ORCHESTRATION_PATTERNS.md +710 -0
  63. htmlgraph/docs/README.md +533 -0
  64. htmlgraph/docs/version_check.py +3 -1
  65. htmlgraph/error_handler.py +544 -0
  66. htmlgraph/event_log.py +2 -0
  67. htmlgraph/hooks/.htmlgraph/.session-warning-state.json +6 -0
  68. htmlgraph/hooks/.htmlgraph/agents.json +72 -0
  69. htmlgraph/hooks/.htmlgraph/index.sqlite +0 -0
  70. htmlgraph/hooks/__init__.py +8 -0
  71. htmlgraph/hooks/bootstrap.py +169 -0
  72. htmlgraph/hooks/cigs_pretool_enforcer.py +2 -2
  73. htmlgraph/hooks/concurrent_sessions.py +208 -0
  74. htmlgraph/hooks/context.py +318 -0
  75. htmlgraph/hooks/drift_handler.py +525 -0
  76. htmlgraph/hooks/event_tracker.py +496 -79
  77. htmlgraph/hooks/orchestrator.py +6 -4
  78. htmlgraph/hooks/orchestrator_reflector.py +4 -4
  79. htmlgraph/hooks/post_tool_use_handler.py +257 -0
  80. htmlgraph/hooks/pretooluse.py +473 -6
  81. htmlgraph/hooks/prompt_analyzer.py +637 -0
  82. htmlgraph/hooks/session_handler.py +637 -0
  83. htmlgraph/hooks/state_manager.py +504 -0
  84. htmlgraph/hooks/subagent_stop.py +309 -0
  85. htmlgraph/hooks/task_enforcer.py +39 -0
  86. htmlgraph/hooks/validator.py +15 -11
  87. htmlgraph/models.py +111 -15
  88. htmlgraph/operations/fastapi_server.py +230 -0
  89. htmlgraph/orchestration/headless_spawner.py +344 -29
  90. htmlgraph/orchestration/live_events.py +377 -0
  91. htmlgraph/pydantic_models.py +476 -0
  92. htmlgraph/quality_gates.py +350 -0
  93. htmlgraph/repo_hash.py +511 -0
  94. htmlgraph/sdk.py +348 -10
  95. htmlgraph/server.py +194 -0
  96. htmlgraph/session_hooks.py +300 -0
  97. htmlgraph/session_manager.py +131 -1
  98. htmlgraph/session_registry.py +587 -0
  99. htmlgraph/session_state.py +436 -0
  100. htmlgraph/system_prompts.py +449 -0
  101. htmlgraph/templates/orchestration-view.html +350 -0
  102. htmlgraph/track_builder.py +19 -0
  103. htmlgraph/validation.py +115 -0
  104. htmlgraph-0.26.1.data/data/htmlgraph/dashboard.html +7458 -0
  105. {htmlgraph-0.24.2.dist-info → htmlgraph-0.26.1.dist-info}/METADATA +91 -64
  106. {htmlgraph-0.24.2.dist-info → htmlgraph-0.26.1.dist-info}/RECORD +112 -46
  107. {htmlgraph-0.24.2.data → htmlgraph-0.26.1.data}/data/htmlgraph/styles.css +0 -0
  108. {htmlgraph-0.24.2.data → htmlgraph-0.26.1.data}/data/htmlgraph/templates/AGENTS.md.template +0 -0
  109. {htmlgraph-0.24.2.data → htmlgraph-0.26.1.data}/data/htmlgraph/templates/CLAUDE.md.template +0 -0
  110. {htmlgraph-0.24.2.data → htmlgraph-0.26.1.data}/data/htmlgraph/templates/GEMINI.md.template +0 -0
  111. {htmlgraph-0.24.2.dist-info → htmlgraph-0.26.1.dist-info}/WHEEL +0 -0
  112. {htmlgraph-0.24.2.dist-info → htmlgraph-0.26.1.dist-info}/entry_points.txt +0 -0
htmlgraph/api/main.py ADDED
@@ -0,0 +1,2263 @@
1
+ """
2
+ HtmlGraph FastAPI Backend - Real-time Agent Observability Dashboard
3
+
4
+ Provides REST API and WebSocket support for viewing:
5
+ - Agent activity feed with real-time event streaming
6
+ - Orchestration chains and delegation handoffs
7
+ - Feature tracker with Kanban views
8
+ - Session metrics and performance analytics
9
+
10
+ Architecture:
11
+ - FastAPI backend querying SQLite database
12
+ - Jinja2 templates for server-side rendering
13
+ - HTMX for interactive UI without page reloads
14
+ - WebSocket for real-time event streaming
15
+ """
16
+
17
+ import asyncio
18
+ import json
19
+ import logging
20
+ import random
21
+ import sqlite3
22
+ import time
23
+ from datetime import datetime
24
+ from pathlib import Path
25
+ from typing import Any
26
+
27
+ import aiosqlite
28
+ from fastapi import FastAPI, Request, WebSocket, WebSocketDisconnect
29
+ from fastapi.responses import HTMLResponse
30
+ from fastapi.staticfiles import StaticFiles
31
+ from fastapi.templating import Jinja2Templates
32
+ from pydantic import BaseModel
33
+
34
+ logger = logging.getLogger(__name__)
35
+
36
+
37
+ class QueryCache:
38
+ """Simple in-memory cache with TTL support for query results."""
39
+
40
+ def __init__(self, ttl_seconds: float = 30.0):
41
+ """Initialize query cache with TTL."""
42
+ self.cache: dict[str, tuple[Any, float]] = {}
43
+ self.ttl_seconds = ttl_seconds
44
+ self.metrics: dict[str, dict[str, float]] = {}
45
+
46
+ def get(self, key: str) -> Any | None:
47
+ """Get cached value if exists and not expired."""
48
+ if key not in self.cache:
49
+ return None
50
+
51
+ value, timestamp = self.cache[key]
52
+ if time.time() - timestamp > self.ttl_seconds:
53
+ del self.cache[key]
54
+ return None
55
+
56
+ return value
57
+
58
+ def set(self, key: str, value: Any) -> None:
59
+ """Store value with current timestamp."""
60
+ self.cache[key] = (value, time.time())
61
+
62
+ def record_metric(self, key: str, query_time_ms: float, cache_hit: bool) -> None:
63
+ """Record performance metrics for a query."""
64
+ if key not in self.metrics:
65
+ self.metrics[key] = {"count": 0, "total_ms": 0, "avg_ms": 0, "hits": 0}
66
+
67
+ metrics = self.metrics[key]
68
+ metrics["count"] += 1
69
+ metrics["total_ms"] += query_time_ms
70
+ metrics["avg_ms"] = metrics["total_ms"] / metrics["count"]
71
+ if cache_hit:
72
+ metrics["hits"] += 1
73
+
74
+ def get_metrics(self) -> dict[str, dict[str, float]]:
75
+ """Get all collected metrics."""
76
+ return self.metrics
77
+
78
+
79
+ class EventModel(BaseModel):
80
+ """Event data model for API responses."""
81
+
82
+ event_id: str
83
+ agent_id: str
84
+ event_type: str
85
+ timestamp: str
86
+ tool_name: str | None = None
87
+ input_summary: str | None = None
88
+ output_summary: str | None = None
89
+ session_id: str
90
+ parent_event_id: str | None = None
91
+ status: str
92
+ model: str | None = None
93
+
94
+
95
+ class FeatureModel(BaseModel):
96
+ """Feature data model for API responses."""
97
+
98
+ id: str
99
+ type: str
100
+ title: str
101
+ description: str | None = None
102
+ status: str
103
+ priority: str
104
+ assigned_to: str | None = None
105
+ created_at: str
106
+ updated_at: str
107
+ completed_at: str | None = None
108
+
109
+
110
+ class SessionModel(BaseModel):
111
+ """Session data model for API responses."""
112
+
113
+ session_id: str
114
+ agent: str | None = None
115
+ status: str
116
+ started_at: str
117
+ ended_at: str | None = None
118
+ event_count: int = 0
119
+ duration_seconds: float | None = None
120
+
121
+
122
+ def _ensure_database_initialized(db_path: str) -> None:
123
+ """
124
+ Ensure SQLite database exists and has correct schema.
125
+
126
+ Args:
127
+ db_path: Path to SQLite database file
128
+ """
129
+ db_file = Path(db_path)
130
+ db_file.parent.mkdir(parents=True, exist_ok=True)
131
+
132
+ # Check if database exists and has tables
133
+ try:
134
+ conn = sqlite3.connect(db_path)
135
+ cursor = conn.cursor()
136
+
137
+ # Query existing tables
138
+ cursor.execute("SELECT name FROM sqlite_master WHERE type='table'")
139
+ tables = cursor.fetchall()
140
+ table_names = [t[0] for t in tables]
141
+
142
+ if not table_names:
143
+ # Database is empty, create schema
144
+ logger.info(f"Creating database schema at {db_path}")
145
+ from htmlgraph.db.schema import HtmlGraphDB
146
+
147
+ db = HtmlGraphDB(db_path)
148
+ db.connect()
149
+ db.create_tables()
150
+ db.disconnect()
151
+ logger.info("Database schema created successfully")
152
+ else:
153
+ logger.debug(f"Database already initialized with tables: {table_names}")
154
+
155
+ conn.close()
156
+
157
+ except sqlite3.Error as e:
158
+ logger.warning(f"Database check warning: {e}")
159
+ # Try to create anyway
160
+ try:
161
+ from htmlgraph.db.schema import HtmlGraphDB
162
+
163
+ db = HtmlGraphDB(db_path)
164
+ db.connect()
165
+ db.create_tables()
166
+ db.disconnect()
167
+ except Exception as create_error:
168
+ logger.error(f"Failed to create database: {create_error}")
169
+ raise
170
+
171
+
172
+ def get_app(db_path: str) -> FastAPI:
173
+ """
174
+ Create and configure FastAPI application.
175
+
176
+ Args:
177
+ db_path: Path to SQLite database file
178
+
179
+ Returns:
180
+ Configured FastAPI application instance
181
+ """
182
+ # Ensure database is initialized
183
+ _ensure_database_initialized(db_path)
184
+
185
+ app = FastAPI(
186
+ title="HtmlGraph Dashboard API",
187
+ description="Real-time agent observability dashboard",
188
+ version="0.1.0",
189
+ )
190
+
191
+ # Store database path and query cache in app state
192
+ app.state.db_path = db_path
193
+ app.state.query_cache = QueryCache(ttl_seconds=30.0)
194
+
195
+ # Setup Jinja2 templates
196
+ template_dir = Path(__file__).parent / "templates"
197
+ template_dir.mkdir(parents=True, exist_ok=True)
198
+ templates = Jinja2Templates(directory=str(template_dir))
199
+
200
+ # Add custom filters
201
+ def format_number(value: int | None) -> str:
202
+ if value is None:
203
+ return "0"
204
+ return f"{value:,}"
205
+
206
+ templates.env.filters["format_number"] = format_number
207
+
208
+ # Setup static files
209
+ static_dir = Path(__file__).parent / "static"
210
+ static_dir.mkdir(parents=True, exist_ok=True)
211
+ if static_dir.exists():
212
+ app.mount("/static", StaticFiles(directory=str(static_dir)), name="static")
213
+
214
+ # ========== DATABASE HELPERS ==========
215
+
216
+ async def get_db() -> aiosqlite.Connection:
217
+ """Get database connection with busy_timeout to prevent lock errors."""
218
+ db = await aiosqlite.connect(app.state.db_path)
219
+ db.row_factory = aiosqlite.Row
220
+ # Set busy_timeout to 5 seconds - prevents "database is locked" errors
221
+ # during concurrent access from spawner scripts and WebSocket polling
222
+ await db.execute("PRAGMA busy_timeout = 5000")
223
+ return db
224
+
225
+ # ========== ROUTES ==========
226
+
227
+ @app.get("/", response_class=HTMLResponse)
228
+ async def dashboard(request: Request) -> HTMLResponse:
229
+ """Main dashboard view with navigation tabs."""
230
+ return templates.TemplateResponse(
231
+ "dashboard-redesign.html",
232
+ {
233
+ "request": request,
234
+ "title": "HtmlGraph Agent Observability",
235
+ },
236
+ )
237
+
238
+ # ========== AGENTS ENDPOINTS ==========
239
+
240
+ @app.get("/views/agents", response_class=HTMLResponse)
241
+ async def agents_view(request: Request) -> HTMLResponse:
242
+ """Get agent workload and performance stats as HTMX partial."""
243
+ db = await get_db()
244
+ cache = app.state.query_cache
245
+ query_start_time = time.time()
246
+
247
+ try:
248
+ # Create cache key for agents view
249
+ cache_key = "agents_view:all"
250
+
251
+ # Check cache first
252
+ cached_response = cache.get(cache_key)
253
+ if cached_response is not None:
254
+ query_time_ms = (time.time() - query_start_time) * 1000
255
+ cache.record_metric(cache_key, query_time_ms, cache_hit=True)
256
+ logger.debug(
257
+ f"Cache HIT for agents_view (key={cache_key}, time={query_time_ms:.2f}ms)"
258
+ )
259
+ agents, total_actions, total_tokens = cached_response
260
+ else:
261
+ # Query agent statistics from 'agent_events' table joined with sessions
262
+ # Optimized with GROUP BY on indexed column
263
+ query = """
264
+ SELECT
265
+ e.agent_id,
266
+ COUNT(*) as event_count,
267
+ SUM(e.cost_tokens) as total_tokens,
268
+ COUNT(DISTINCT e.session_id) as session_count,
269
+ MAX(e.timestamp) as last_active
270
+ FROM agent_events e
271
+ GROUP BY e.agent_id
272
+ ORDER BY event_count DESC
273
+ """
274
+
275
+ # Execute query with timing
276
+ exec_start = time.time()
277
+ cursor = await db.execute(query)
278
+ rows = await cursor.fetchall()
279
+ exec_time_ms = (time.time() - exec_start) * 1000
280
+
281
+ agents = []
282
+ total_actions = 0
283
+ total_tokens = 0
284
+
285
+ # First pass to calculate totals
286
+ for row in rows:
287
+ total_actions += row[1]
288
+ total_tokens += row[2] or 0
289
+
290
+ # Second pass to build agent objects with percentages
291
+ for row in rows:
292
+ event_count = row[1]
293
+ workload_pct = (
294
+ (event_count / total_actions * 100) if total_actions > 0 else 0
295
+ )
296
+
297
+ agents.append(
298
+ {
299
+ "agent_id": row[0],
300
+ "event_count": event_count,
301
+ "total_tokens": row[2] or 0,
302
+ "session_count": row[3],
303
+ "last_active": row[4],
304
+ "workload_pct": round(workload_pct, 1),
305
+ }
306
+ )
307
+
308
+ # Cache the results
309
+ cache_data = (agents, total_actions, total_tokens)
310
+ cache.set(cache_key, cache_data)
311
+ query_time_ms = (time.time() - query_start_time) * 1000
312
+ cache.record_metric(cache_key, exec_time_ms, cache_hit=False)
313
+ logger.debug(
314
+ f"Cache MISS for agents_view (key={cache_key}, "
315
+ f"db_time={exec_time_ms:.2f}ms, total_time={query_time_ms:.2f}ms, "
316
+ f"agents={len(agents)})"
317
+ )
318
+
319
+ return templates.TemplateResponse(
320
+ "partials/agents.html",
321
+ {
322
+ "request": request,
323
+ "agents": agents,
324
+ "total_agents": len(agents),
325
+ "total_actions": total_actions,
326
+ "total_tokens": total_tokens,
327
+ },
328
+ )
329
+ finally:
330
+ await db.close()
331
+
332
+ # ========== ACTIVITY FEED ENDPOINTS ==========
333
+
334
+ @app.get("/views/activity-feed", response_class=HTMLResponse)
335
+ async def activity_feed(
336
+ request: Request,
337
+ limit: int = 50,
338
+ session_id: str | None = None,
339
+ agent_id: str | None = None,
340
+ ) -> HTMLResponse:
341
+ """Get latest agent events grouped by conversation turn (user prompt).
342
+
343
+ Returns grouped activity feed showing conversation turns with their child events.
344
+ """
345
+ db = await get_db()
346
+ cache = app.state.query_cache
347
+
348
+ try:
349
+ # Call the helper function to get grouped events
350
+ grouped_result = await _get_events_grouped_by_prompt_impl(db, cache, limit)
351
+
352
+ return templates.TemplateResponse(
353
+ "partials/activity-feed.html",
354
+ {
355
+ "request": request,
356
+ "conversation_turns": grouped_result.get("conversation_turns", []),
357
+ "total_turns": grouped_result.get("total_turns", 0),
358
+ "limit": limit,
359
+ },
360
+ )
361
+ finally:
362
+ await db.close()
363
+
364
+ @app.get("/api/events", response_model=list[EventModel])
365
+ async def get_events(
366
+ limit: int = 50,
367
+ session_id: str | None = None,
368
+ agent_id: str | None = None,
369
+ offset: int = 0,
370
+ ) -> list[EventModel]:
371
+ """Get events as JSON API with parent-child hierarchical linking."""
372
+ db = await get_db()
373
+ cache = app.state.query_cache
374
+ query_start_time = time.time()
375
+
376
+ try:
377
+ # Create cache key from query parameters
378
+ cache_key = (
379
+ f"api_events:{limit}:{offset}:{session_id or 'all'}:{agent_id or 'all'}"
380
+ )
381
+
382
+ # Check cache first
383
+ cached_results = cache.get(cache_key)
384
+ if cached_results is not None:
385
+ query_time_ms = (time.time() - query_start_time) * 1000
386
+ cache.record_metric(cache_key, query_time_ms, cache_hit=True)
387
+ logger.debug(
388
+ f"Cache HIT for api_events (key={cache_key}, time={query_time_ms:.2f}ms)"
389
+ )
390
+ return list(cached_results) if isinstance(cached_results, list) else []
391
+ else:
392
+ # Query from 'agent_events' table from Phase 1 PreToolUse hook implementation
393
+ # Optimized with column selection and proper indexing
394
+ query = """
395
+ SELECT e.event_id, e.agent_id, e.event_type, e.timestamp, e.tool_name,
396
+ e.input_summary, e.output_summary, e.session_id,
397
+ e.status, e.model
398
+ FROM agent_events e
399
+ WHERE 1=1
400
+ """
401
+ params: list = []
402
+
403
+ if session_id:
404
+ query += " AND e.session_id = ?"
405
+ params.append(session_id)
406
+
407
+ if agent_id:
408
+ query += " AND e.agent_id = ?"
409
+ params.append(agent_id)
410
+
411
+ query += " ORDER BY e.timestamp DESC LIMIT ? OFFSET ?"
412
+ params.extend([limit, offset])
413
+
414
+ # Execute query with timing
415
+ exec_start = time.time()
416
+ cursor = await db.execute(query, params)
417
+ rows = await cursor.fetchall()
418
+ exec_time_ms = (time.time() - exec_start) * 1000
419
+
420
+ # Build result models
421
+ results = [
422
+ EventModel(
423
+ event_id=row[0],
424
+ agent_id=row[1] or "unknown",
425
+ event_type=row[2],
426
+ timestamp=row[3],
427
+ tool_name=row[4],
428
+ input_summary=row[5],
429
+ output_summary=row[6],
430
+ session_id=row[7],
431
+ parent_event_id=None, # Not available in all schema versions
432
+ status=row[8],
433
+ model=row[9],
434
+ )
435
+ for row in rows
436
+ ]
437
+
438
+ # Cache the results
439
+ cache.set(cache_key, results)
440
+ query_time_ms = (time.time() - query_start_time) * 1000
441
+ cache.record_metric(cache_key, exec_time_ms, cache_hit=False)
442
+ logger.debug(
443
+ f"Cache MISS for api_events (key={cache_key}, "
444
+ f"db_time={exec_time_ms:.2f}ms, total_time={query_time_ms:.2f}ms, "
445
+ f"rows={len(results)})"
446
+ )
447
+
448
+ return results
449
+ finally:
450
+ await db.close()
451
+
452
+ # ========== INITIAL STATS ENDPOINT ==========
453
+
454
+ @app.get("/api/initial-stats")
455
+ async def initial_stats() -> dict[str, Any]:
456
+ """Get initial statistics for dashboard header (events, agents, sessions)."""
457
+ db = await get_db()
458
+ try:
459
+ # Query all stats in a single query for efficiency
460
+ stats_query = """
461
+ SELECT
462
+ (SELECT COUNT(*) FROM agent_events) as total_events,
463
+ (SELECT COUNT(DISTINCT agent_id) FROM agent_events) as total_agents,
464
+ (SELECT COUNT(*) FROM sessions) as total_sessions
465
+ """
466
+ cursor = await db.execute(stats_query)
467
+ row = await cursor.fetchone()
468
+
469
+ # Query distinct agent IDs for the agent set
470
+ agents_query = (
471
+ "SELECT DISTINCT agent_id FROM agent_events WHERE agent_id IS NOT NULL"
472
+ )
473
+ agents_cursor = await db.execute(agents_query)
474
+ agents_rows = await agents_cursor.fetchall()
475
+ agents = [row[0] for row in agents_rows]
476
+
477
+ if row is None:
478
+ return {
479
+ "total_events": 0,
480
+ "total_agents": 0,
481
+ "total_sessions": 0,
482
+ "agents": agents,
483
+ }
484
+
485
+ return {
486
+ "total_events": int(row[0]) if row[0] else 0,
487
+ "total_agents": int(row[1]) if row[1] else 0,
488
+ "total_sessions": int(row[2]) if row[2] else 0,
489
+ "agents": agents,
490
+ }
491
+ finally:
492
+ await db.close()
493
+
494
+ # ========== PERFORMANCE METRICS ENDPOINT ==========
495
+
496
+ @app.get("/api/query-metrics")
497
+ async def get_query_metrics() -> dict[str, Any]:
498
+ """Get query performance metrics and cache statistics."""
499
+ cache = app.state.query_cache
500
+ metrics = cache.get_metrics()
501
+
502
+ # Calculate aggregate statistics
503
+ total_queries = sum(m.get("count", 0) for m in metrics.values())
504
+ total_cache_hits = sum(m.get("hits", 0) for m in metrics.values())
505
+ hit_rate = (total_cache_hits / total_queries * 100) if total_queries > 0 else 0
506
+
507
+ return {
508
+ "timestamp": datetime.now().isoformat(),
509
+ "cache_status": {
510
+ "ttl_seconds": cache.ttl_seconds,
511
+ "cached_queries": len(cache.cache),
512
+ "total_queries_tracked": total_queries,
513
+ "cache_hits": total_cache_hits,
514
+ "cache_hit_rate_percent": round(hit_rate, 2),
515
+ },
516
+ "query_metrics": metrics,
517
+ }
518
+
519
+ # ========== EVENT TRACES ENDPOINT (Parent-Child Nesting) ==========
520
+
521
+ @app.get("/api/event-traces")
522
+ async def get_event_traces(
523
+ limit: int = 50,
524
+ session_id: str | None = None,
525
+ ) -> dict[str, Any]:
526
+ """
527
+ Get event traces showing parent-child relationships for Task delegations.
528
+
529
+ This endpoint returns task delegation events with their child events,
530
+ showing the complete hierarchy of delegated work:
531
+
532
+ Example:
533
+ {
534
+ "traces": [
535
+ {
536
+ "parent_event_id": "evt-abc123",
537
+ "agent_id": "claude-code",
538
+ "subagent_type": "gemini-spawner",
539
+ "started_at": "2025-01-08T16:40:54",
540
+ "status": "completed",
541
+ "duration_seconds": 287,
542
+ "child_events": [
543
+ {
544
+ "event_id": "subevt-xyz789",
545
+ "agent_id": "subagent-gemini-spawner",
546
+ "event_type": "delegation",
547
+ "timestamp": "2025-01-08T16:42:01",
548
+ "status": "completed"
549
+ }
550
+ ],
551
+ "child_spike_count": 2,
552
+ "child_spikes": ["spk-001", "spk-002"]
553
+ }
554
+ ]
555
+ }
556
+
557
+ Args:
558
+ limit: Maximum number of parent events to return (default 50)
559
+ session_id: Filter by session (optional)
560
+
561
+ Returns:
562
+ Dict with traces array showing parent-child relationships
563
+ """
564
+ db = await get_db()
565
+ cache = app.state.query_cache
566
+ query_start_time = time.time()
567
+
568
+ try:
569
+ # Create cache key
570
+ cache_key = f"event_traces:{limit}:{session_id or 'all'}"
571
+
572
+ # Check cache first
573
+ cached_result = cache.get(cache_key)
574
+ if cached_result is not None:
575
+ query_time_ms = (time.time() - query_start_time) * 1000
576
+ cache.record_metric(cache_key, query_time_ms, cache_hit=True)
577
+ return cached_result # type: ignore[no-any-return]
578
+
579
+ exec_start = time.time()
580
+
581
+ # Query parent events (task delegations)
582
+ parent_query = """
583
+ SELECT event_id, agent_id, subagent_type, timestamp, status,
584
+ child_spike_count, output_summary, model
585
+ FROM agent_events
586
+ WHERE event_type = 'task_delegation'
587
+ """
588
+ parent_params: list[Any] = []
589
+
590
+ if session_id:
591
+ parent_query += " AND session_id = ?"
592
+ parent_params.append(session_id)
593
+
594
+ parent_query += " ORDER BY timestamp DESC LIMIT ?"
595
+ parent_params.append(limit)
596
+
597
+ cursor = await db.execute(parent_query, parent_params)
598
+ parent_rows = await cursor.fetchall()
599
+
600
+ traces: list[dict[str, Any]] = []
601
+
602
+ for parent_row in parent_rows:
603
+ parent_event_id = parent_row[0]
604
+ agent_id = parent_row[1]
605
+ subagent_type = parent_row[2]
606
+ started_at = parent_row[3]
607
+ status = parent_row[4]
608
+ child_spike_count = parent_row[5] or 0
609
+ output_summary = parent_row[6]
610
+ model = parent_row[7]
611
+
612
+ # Parse output summary to get child spike IDs if available
613
+ child_spikes = []
614
+ try:
615
+ if output_summary:
616
+ output_data = (
617
+ json.loads(output_summary)
618
+ if isinstance(output_summary, str)
619
+ else output_summary
620
+ )
621
+ # Try to extract spike IDs if present
622
+ if isinstance(output_data, dict):
623
+ spikes_info = output_data.get("spikes_created", [])
624
+ if isinstance(spikes_info, list):
625
+ child_spikes = spikes_info
626
+ except Exception:
627
+ pass
628
+
629
+ # Query child events (subagent completion events)
630
+ child_query = """
631
+ SELECT event_id, agent_id, event_type, timestamp, status
632
+ FROM agent_events
633
+ WHERE parent_event_id = ?
634
+ ORDER BY timestamp ASC
635
+ """
636
+ child_cursor = await db.execute(child_query, (parent_event_id,))
637
+ child_rows = await child_cursor.fetchall()
638
+
639
+ child_events = []
640
+ for child_row in child_rows:
641
+ child_events.append(
642
+ {
643
+ "event_id": child_row[0],
644
+ "agent_id": child_row[1],
645
+ "event_type": child_row[2],
646
+ "timestamp": child_row[3],
647
+ "status": child_row[4],
648
+ }
649
+ )
650
+
651
+ # Calculate duration if completed
652
+ duration_seconds = None
653
+ if status == "completed" and started_at:
654
+ try:
655
+ from datetime import datetime as dt
656
+
657
+ start_dt = dt.fromisoformat(started_at)
658
+ now_dt = dt.now()
659
+ duration_seconds = (now_dt - start_dt).total_seconds()
660
+ except Exception:
661
+ pass
662
+
663
+ trace = {
664
+ "parent_event_id": parent_event_id,
665
+ "agent_id": agent_id,
666
+ "subagent_type": subagent_type or "general-purpose",
667
+ "started_at": started_at,
668
+ "status": status,
669
+ "duration_seconds": duration_seconds,
670
+ "child_events": child_events,
671
+ "child_spike_count": child_spike_count,
672
+ "child_spikes": child_spikes,
673
+ "model": model,
674
+ }
675
+
676
+ traces.append(trace)
677
+
678
+ exec_time_ms = (time.time() - exec_start) * 1000
679
+
680
+ # Build response
681
+ result = {
682
+ "timestamp": datetime.now().isoformat(),
683
+ "total_traces": len(traces),
684
+ "traces": traces,
685
+ "limitations": {
686
+ "note": "Child spike count is approximate and based on timestamp proximity",
687
+ "note_2": "Spike IDs in child_spikes only available if recorded in output_summary",
688
+ },
689
+ }
690
+
691
+ # Cache the result
692
+ cache.set(cache_key, result)
693
+ query_time_ms = (time.time() - query_start_time) * 1000
694
+ cache.record_metric(cache_key, exec_time_ms, cache_hit=False)
695
+ logger.debug(
696
+ f"Cache MISS for event_traces (key={cache_key}, "
697
+ f"db_time={exec_time_ms:.2f}ms, total_time={query_time_ms:.2f}ms, "
698
+ f"traces={len(traces)})"
699
+ )
700
+
701
+ return result
702
+
703
+ finally:
704
+ await db.close()
705
+
706
+ # ========== COMPLETE ACTIVITY FEED ENDPOINT ==========
707
+
708
+ @app.get("/api/complete-activity-feed")
709
+ async def complete_activity_feed(
710
+ limit: int = 100,
711
+ session_id: str | None = None,
712
+ include_delegations: bool = True,
713
+ include_spikes: bool = True,
714
+ ) -> dict[str, Any]:
715
+ """
716
+ Get unified activity feed combining events from all sources.
717
+
718
+ This endpoint aggregates:
719
+ - Hook events (tool_call from PreToolUse)
720
+ - Subagent events (delegation completions from SubagentStop)
721
+ - SDK spike logs (knowledge created by delegated tasks)
722
+
723
+ This provides complete visibility into ALL activity, including
724
+ delegated work that would otherwise be invisible due to Claude Code's
725
+ hook isolation design (see GitHub issue #14859).
726
+
727
+ Args:
728
+ limit: Maximum number of events to return
729
+ session_id: Filter by session (optional)
730
+ include_delegations: Include delegation events (default True)
731
+ include_spikes: Include spike creation events (default True)
732
+
733
+ Returns:
734
+ Dict with events array and metadata
735
+ """
736
+ db = await get_db()
737
+ cache = app.state.query_cache
738
+ query_start_time = time.time()
739
+
740
+ try:
741
+ # Create cache key
742
+ cache_key = f"complete_activity:{limit}:{session_id or 'all'}:{include_delegations}:{include_spikes}"
743
+
744
+ # Check cache first
745
+ cached_result = cache.get(cache_key)
746
+ if cached_result is not None:
747
+ query_time_ms = (time.time() - query_start_time) * 1000
748
+ cache.record_metric(cache_key, query_time_ms, cache_hit=True)
749
+ return cached_result # type: ignore[no-any-return]
750
+
751
+ events: list[dict[str, Any]] = []
752
+
753
+ # 1. Query hook events (tool_call, delegation from agent_events)
754
+ event_types = ["tool_call"]
755
+ if include_delegations:
756
+ event_types.extend(["delegation", "completion"])
757
+
758
+ event_type_placeholders = ",".join("?" for _ in event_types)
759
+ query = f"""
760
+ SELECT
761
+ 'hook_event' as source,
762
+ event_id,
763
+ agent_id,
764
+ event_type,
765
+ timestamp,
766
+ tool_name,
767
+ input_summary,
768
+ output_summary,
769
+ session_id,
770
+ status,
771
+ model
772
+ FROM agent_events
773
+ WHERE event_type IN ({event_type_placeholders})
774
+ """
775
+ params: list[Any] = list(event_types)
776
+
777
+ if session_id:
778
+ query += " AND session_id = ?"
779
+ params.append(session_id)
780
+
781
+ query += " ORDER BY timestamp DESC LIMIT ?"
782
+ params.append(limit)
783
+
784
+ exec_start = time.time()
785
+ cursor = await db.execute(query, params)
786
+ rows = await cursor.fetchall()
787
+
788
+ for row in rows:
789
+ events.append(
790
+ {
791
+ "source": row[0],
792
+ "event_id": row[1],
793
+ "agent_id": row[2] or "unknown",
794
+ "event_type": row[3],
795
+ "timestamp": row[4],
796
+ "tool_name": row[5],
797
+ "input_summary": row[6],
798
+ "output_summary": row[7],
799
+ "session_id": row[8],
800
+ "status": row[9],
801
+ "model": row[10],
802
+ }
803
+ )
804
+
805
+ # 2. Query spike logs if requested (knowledge created by delegated tasks)
806
+ if include_spikes:
807
+ try:
808
+ spike_query = """
809
+ SELECT
810
+ 'spike_log' as source,
811
+ id as event_id,
812
+ assigned_to as agent_id,
813
+ 'knowledge_created' as event_type,
814
+ created_at as timestamp,
815
+ title as tool_name,
816
+ hypothesis as input_summary,
817
+ findings as output_summary,
818
+ NULL as session_id,
819
+ status
820
+ FROM features
821
+ WHERE type = 'spike'
822
+ """
823
+ spike_params: list[Any] = []
824
+
825
+ spike_query += " ORDER BY created_at DESC LIMIT ?"
826
+ spike_params.append(limit)
827
+
828
+ spike_cursor = await db.execute(spike_query, spike_params)
829
+ spike_rows = await spike_cursor.fetchall()
830
+
831
+ for row in spike_rows:
832
+ events.append(
833
+ {
834
+ "source": row[0],
835
+ "event_id": row[1],
836
+ "agent_id": row[2] or "sdk",
837
+ "event_type": row[3],
838
+ "timestamp": row[4],
839
+ "tool_name": row[5],
840
+ "input_summary": row[6],
841
+ "output_summary": row[7],
842
+ "session_id": row[8],
843
+ "status": row[9] or "completed",
844
+ }
845
+ )
846
+ except Exception as e:
847
+ # Spike query might fail if columns don't exist
848
+ logger.debug(
849
+ f"Spike query failed (expected if schema differs): {e}"
850
+ )
851
+
852
+ # 3. Query delegation handoffs from agent_collaboration
853
+ if include_delegations:
854
+ try:
855
+ collab_query = """
856
+ SELECT
857
+ 'delegation' as source,
858
+ handoff_id as event_id,
859
+ from_agent || ' -> ' || to_agent as agent_id,
860
+ 'handoff' as event_type,
861
+ timestamp,
862
+ handoff_type as tool_name,
863
+ reason as input_summary,
864
+ context as output_summary,
865
+ session_id,
866
+ status
867
+ FROM agent_collaboration
868
+ WHERE handoff_type = 'delegation'
869
+ """
870
+ collab_params: list[Any] = []
871
+
872
+ if session_id:
873
+ collab_query += " AND session_id = ?"
874
+ collab_params.append(session_id)
875
+
876
+ collab_query += " ORDER BY timestamp DESC LIMIT ?"
877
+ collab_params.append(limit)
878
+
879
+ collab_cursor = await db.execute(collab_query, collab_params)
880
+ collab_rows = await collab_cursor.fetchall()
881
+
882
+ for row in collab_rows:
883
+ events.append(
884
+ {
885
+ "source": row[0],
886
+ "event_id": row[1],
887
+ "agent_id": row[2] or "orchestrator",
888
+ "event_type": row[3],
889
+ "timestamp": row[4],
890
+ "tool_name": row[5],
891
+ "input_summary": row[6],
892
+ "output_summary": row[7],
893
+ "session_id": row[8],
894
+ "status": row[9] or "pending",
895
+ }
896
+ )
897
+ except Exception as e:
898
+ logger.debug(f"Collaboration query failed: {e}")
899
+
900
+ # Sort all events by timestamp DESC
901
+ events.sort(key=lambda e: e.get("timestamp", ""), reverse=True)
902
+
903
+ # Limit to requested count
904
+ events = events[:limit]
905
+
906
+ exec_time_ms = (time.time() - exec_start) * 1000
907
+
908
+ # Build response
909
+ result = {
910
+ "timestamp": datetime.now().isoformat(),
911
+ "total_events": len(events),
912
+ "sources": {
913
+ "hook_events": sum(
914
+ 1 for e in events if e["source"] == "hook_event"
915
+ ),
916
+ "spike_logs": sum(1 for e in events if e["source"] == "spike_log"),
917
+ "delegations": sum(
918
+ 1 for e in events if e["source"] == "delegation"
919
+ ),
920
+ },
921
+ "events": events,
922
+ "limitations": {
923
+ "note": "Subagent tool activity not tracked (Claude Code limitation)",
924
+ "github_issue": "https://github.com/anthropics/claude-code/issues/14859",
925
+ "workaround": "SubagentStop hook captures completion, SDK logging captures results",
926
+ },
927
+ }
928
+
929
+ # Cache the result
930
+ cache.set(cache_key, result)
931
+ query_time_ms = (time.time() - query_start_time) * 1000
932
+ cache.record_metric(cache_key, exec_time_ms, cache_hit=False)
933
+
934
+ return result
935
+
936
+ finally:
937
+ await db.close()
938
+
939
+ # ========== HELPER: Grouped Events Logic ==========
940
+
941
+ async def _get_events_grouped_by_prompt_impl(
942
+ db: aiosqlite.Connection, cache: QueryCache, limit: int = 50
943
+ ) -> dict[str, Any]:
944
+ """
945
+ Implementation helper: Return activity events grouped by user prompt (conversation turns).
946
+
947
+ Each conversation turn includes:
948
+ - userQuery: The original UserQuery event with prompt text
949
+ - children: All child events triggered by this prompt
950
+ - stats: Aggregated statistics for the conversation turn
951
+
952
+ Args:
953
+ db: Database connection
954
+ cache: Query cache instance
955
+ limit: Maximum number of conversation turns to return (default 50)
956
+
957
+ Returns:
958
+ Dictionary with conversation turns and metadata
959
+ """
960
+ query_start_time = time.time()
961
+
962
+ try:
963
+ # Create cache key
964
+ cache_key = f"events_grouped_by_prompt:{limit}"
965
+
966
+ # Check cache first
967
+ cached_result = cache.get(cache_key)
968
+ if cached_result is not None:
969
+ query_time_ms = (time.time() - query_start_time) * 1000
970
+ cache.record_metric(cache_key, query_time_ms, cache_hit=True)
971
+ logger.debug(
972
+ f"Cache HIT for events_grouped_by_prompt (key={cache_key}, time={query_time_ms:.2f}ms)"
973
+ )
974
+ return cached_result # type: ignore[no-any-return]
975
+
976
+ exec_start = time.time()
977
+
978
+ # Step 1: Query UserQuery events (most recent first)
979
+ user_query_query = """
980
+ SELECT
981
+ event_id,
982
+ timestamp,
983
+ input_summary,
984
+ execution_duration_seconds,
985
+ status,
986
+ agent_id
987
+ FROM agent_events
988
+ WHERE tool_name = 'UserQuery'
989
+ ORDER BY timestamp DESC
990
+ LIMIT ?
991
+ """
992
+
993
+ cursor = await db.execute(user_query_query, [limit])
994
+ user_query_rows = await cursor.fetchall()
995
+
996
+ conversation_turns: list[dict[str, Any]] = []
997
+
998
+ # Step 2: For each UserQuery, fetch child events
999
+ for uq_row in user_query_rows:
1000
+ uq_event_id = uq_row[0]
1001
+ uq_timestamp = uq_row[1]
1002
+ uq_input = uq_row[2] or ""
1003
+ uq_duration = uq_row[3] or 0.0
1004
+ uq_status = uq_row[4]
1005
+
1006
+ # Extract prompt text from input_summary
1007
+ # Since format_tool_summary now properly formats UserQuery events,
1008
+ # input_summary contains just the prompt text (preview up to 100 chars)
1009
+ prompt_text = uq_input
1010
+
1011
+ # Step 2a: Query child events linked via parent_event_id
1012
+ children_query = """
1013
+ SELECT
1014
+ event_id,
1015
+ tool_name,
1016
+ timestamp,
1017
+ input_summary,
1018
+ execution_duration_seconds,
1019
+ status,
1020
+ COALESCE(subagent_type, agent_id) as agent_id,
1021
+ model
1022
+ FROM agent_events
1023
+ WHERE parent_event_id = ?
1024
+ ORDER BY timestamp ASC
1025
+ """
1026
+
1027
+ # Recursive helper to fetch children at any depth
1028
+ async def fetch_children_recursive(
1029
+ parent_id: str, depth: int = 0, max_depth: int = 4
1030
+ ) -> tuple[list[dict[str, Any]], float, int, int]:
1031
+ """Recursively fetch children up to max_depth levels."""
1032
+ if depth >= max_depth:
1033
+ return [], 0.0, 0, 0
1034
+
1035
+ cursor = await db.execute(children_query, [parent_id])
1036
+ rows = await cursor.fetchall()
1037
+
1038
+ children_list: list[dict[str, Any]] = []
1039
+ total_dur = 0.0
1040
+ success_cnt = 0
1041
+ error_cnt = 0
1042
+
1043
+ for row in rows:
1044
+ evt_id = row[0]
1045
+ tool = row[1]
1046
+ timestamp = row[2]
1047
+ input_text = row[3] or ""
1048
+ duration = row[4] or 0.0
1049
+ status = row[5]
1050
+ agent = row[6] or "unknown"
1051
+ model = row[7] # Add model field
1052
+
1053
+ # Build summary (input_text already contains formatted summary)
1054
+ summary = input_text[:80] + (
1055
+ "..." if len(input_text) > 80 else ""
1056
+ )
1057
+
1058
+ # Recursively fetch this child's children
1059
+ (
1060
+ nested_children,
1061
+ nested_dur,
1062
+ nested_success,
1063
+ nested_error,
1064
+ ) = await fetch_children_recursive(evt_id, depth + 1, max_depth)
1065
+
1066
+ child_dict: dict[str, Any] = {
1067
+ "event_id": evt_id,
1068
+ "tool_name": tool,
1069
+ "timestamp": timestamp,
1070
+ "summary": summary,
1071
+ "duration_seconds": round(duration, 2),
1072
+ "agent": agent,
1073
+ "depth": depth,
1074
+ "model": model, # Include model in child dict
1075
+ }
1076
+
1077
+ # Only add children key if there are nested children
1078
+ if nested_children:
1079
+ child_dict["children"] = nested_children
1080
+
1081
+ children_list.append(child_dict)
1082
+
1083
+ # Update stats (include nested)
1084
+ total_dur += duration + nested_dur
1085
+ if status == "recorded" or status == "success":
1086
+ success_cnt += 1
1087
+ else:
1088
+ error_cnt += 1
1089
+ success_cnt += nested_success
1090
+ error_cnt += nested_error
1091
+
1092
+ return children_list, total_dur, success_cnt, error_cnt
1093
+
1094
+ # Step 3: Build child events with recursive nesting
1095
+ (
1096
+ children,
1097
+ children_duration,
1098
+ children_success,
1099
+ children_error,
1100
+ ) = await fetch_children_recursive(uq_event_id, depth=0, max_depth=4)
1101
+
1102
+ total_duration = uq_duration + children_duration
1103
+ success_count = (
1104
+ 1 if uq_status == "recorded" or uq_status == "success" else 0
1105
+ ) + children_success
1106
+ error_count = (
1107
+ 0 if uq_status == "recorded" or uq_status == "success" else 1
1108
+ ) + children_error
1109
+
1110
+ # Step 4: Build conversation turn object
1111
+ conversation_turn = {
1112
+ "userQuery": {
1113
+ "event_id": uq_event_id,
1114
+ "timestamp": uq_timestamp,
1115
+ "prompt": prompt_text[:200], # Truncate for display
1116
+ "duration_seconds": round(uq_duration, 2),
1117
+ },
1118
+ "children": children,
1119
+ "stats": {
1120
+ "tool_count": len(children),
1121
+ "total_duration": round(total_duration, 2),
1122
+ "success_count": success_count,
1123
+ "error_count": error_count,
1124
+ },
1125
+ }
1126
+
1127
+ conversation_turns.append(conversation_turn)
1128
+
1129
+ exec_time_ms = (time.time() - exec_start) * 1000
1130
+
1131
+ # Build response
1132
+ result = {
1133
+ "timestamp": datetime.now().isoformat(),
1134
+ "total_turns": len(conversation_turns),
1135
+ "conversation_turns": conversation_turns,
1136
+ "note": "Groups events by UserQuery prompt (conversation turn). Child events are linked via parent_event_id.",
1137
+ }
1138
+
1139
+ # Cache the result
1140
+ cache.set(cache_key, result)
1141
+ query_time_ms = (time.time() - query_start_time) * 1000
1142
+ cache.record_metric(cache_key, exec_time_ms, cache_hit=False)
1143
+ logger.debug(
1144
+ f"Cache MISS for events_grouped_by_prompt (key={cache_key}, "
1145
+ f"db_time={exec_time_ms:.2f}ms, total_time={query_time_ms:.2f}ms, "
1146
+ f"turns={len(conversation_turns)})"
1147
+ )
1148
+
1149
+ return result
1150
+
1151
+ except Exception as e:
1152
+ logger.error(f"Error in _get_events_grouped_by_prompt_impl: {e}")
1153
+ raise
1154
+
1155
+ # ========== EVENTS GROUPED BY PROMPT ENDPOINT ==========
1156
+
1157
+ @app.get("/api/events-grouped-by-prompt")
1158
+ async def events_grouped_by_prompt(limit: int = 50) -> dict[str, Any]:
1159
+ """
1160
+ Return activity events grouped by user prompt (conversation turns).
1161
+
1162
+ Each conversation turn includes:
1163
+ - userQuery: The original UserQuery event with prompt text
1164
+ - children: All child events triggered by this prompt
1165
+ - stats: Aggregated statistics for the conversation turn
1166
+
1167
+ Args:
1168
+ limit: Maximum number of conversation turns to return (default 50)
1169
+
1170
+ Returns:
1171
+ Dictionary with conversation_turns list and metadata
1172
+ """
1173
+ db = await get_db()
1174
+ cache = app.state.query_cache
1175
+
1176
+ try:
1177
+ return await _get_events_grouped_by_prompt_impl(db, cache, limit)
1178
+ finally:
1179
+ await db.close()
1180
+
1181
+ # ========== SESSIONS API ENDPOINT ==========
1182
+
1183
+ @app.get("/api/sessions")
1184
+ async def get_sessions(
1185
+ status: str | None = None,
1186
+ limit: int = 50,
1187
+ offset: int = 0,
1188
+ ) -> dict[str, Any]:
1189
+ """Get sessions from the database.
1190
+
1191
+ Args:
1192
+ status: Filter by session status (e.g., 'active', 'completed')
1193
+ limit: Maximum number of sessions to return (default 50)
1194
+ offset: Number of sessions to skip (default 0)
1195
+
1196
+ Returns:
1197
+ {
1198
+ "total": int,
1199
+ "limit": int,
1200
+ "offset": int,
1201
+ "sessions": [
1202
+ {
1203
+ "session_id": str,
1204
+ "agent": str | None,
1205
+ "continued_from": str | None,
1206
+ "started_at": str,
1207
+ "status": str,
1208
+ "start_commit": str | None,
1209
+ "ended_at": str | None
1210
+ }
1211
+ ]
1212
+ }
1213
+ """
1214
+ db = await get_db()
1215
+ cache = app.state.query_cache
1216
+ query_start_time = time.time()
1217
+
1218
+ try:
1219
+ # Create cache key from query parameters
1220
+ cache_key = f"api_sessions:{status or 'all'}:{limit}:{offset}"
1221
+
1222
+ # Check cache first
1223
+ cached_result = cache.get(cache_key)
1224
+ if cached_result is not None:
1225
+ query_time_ms = (time.time() - query_start_time) * 1000
1226
+ cache.record_metric(cache_key, query_time_ms, cache_hit=True)
1227
+ logger.debug(
1228
+ f"Cache HIT for api_sessions (key={cache_key}, time={query_time_ms:.2f}ms)"
1229
+ )
1230
+ return cached_result # type: ignore[no-any-return]
1231
+
1232
+ exec_start = time.time()
1233
+
1234
+ # Build query with optional status filter
1235
+ # Note: Database uses agent_assigned but started_at/ended_at (partial migration)
1236
+ query = """
1237
+ SELECT
1238
+ session_id,
1239
+ agent_assigned,
1240
+ continued_from,
1241
+ started_at,
1242
+ status,
1243
+ start_commit,
1244
+ ended_at
1245
+ FROM sessions
1246
+ WHERE 1=1
1247
+ """
1248
+ params: list[Any] = []
1249
+
1250
+ if status:
1251
+ query += " AND status = ?"
1252
+ params.append(status)
1253
+
1254
+ query += " ORDER BY started_at DESC LIMIT ? OFFSET ?"
1255
+ params.extend([limit, offset])
1256
+
1257
+ cursor = await db.execute(query, params)
1258
+ rows = await cursor.fetchall()
1259
+
1260
+ # Get total count for pagination
1261
+ count_query = "SELECT COUNT(*) FROM sessions WHERE 1=1"
1262
+ count_params: list[Any] = []
1263
+ if status:
1264
+ count_query += " AND status = ?"
1265
+ count_params.append(status)
1266
+
1267
+ count_cursor = await db.execute(count_query, count_params)
1268
+ count_row = await count_cursor.fetchone()
1269
+ total = int(count_row[0]) if count_row else 0
1270
+
1271
+ # Build session objects
1272
+ # Map schema columns to API response fields for backward compatibility
1273
+ sessions = []
1274
+ for row in rows:
1275
+ sessions.append(
1276
+ {
1277
+ "session_id": row[0],
1278
+ "agent": row[1], # agent_assigned -> agent for API compat
1279
+ "continued_from": row[2], # parent_session_id
1280
+ "started_at": row[3], # created_at -> started_at for API compat
1281
+ "status": row[4] or "unknown",
1282
+ "start_commit": row[5],
1283
+ "ended_at": row[6], # completed_at -> ended_at for API compat
1284
+ }
1285
+ )
1286
+
1287
+ exec_time_ms = (time.time() - exec_start) * 1000
1288
+
1289
+ result = {
1290
+ "total": total,
1291
+ "limit": limit,
1292
+ "offset": offset,
1293
+ "sessions": sessions,
1294
+ }
1295
+
1296
+ # Cache the result
1297
+ cache.set(cache_key, result)
1298
+ query_time_ms = (time.time() - query_start_time) * 1000
1299
+ cache.record_metric(cache_key, exec_time_ms, cache_hit=False)
1300
+ logger.debug(
1301
+ f"Cache MISS for api_sessions (key={cache_key}, "
1302
+ f"db_time={exec_time_ms:.2f}ms, total_time={query_time_ms:.2f}ms, "
1303
+ f"sessions={len(sessions)})"
1304
+ )
1305
+
1306
+ return result
1307
+
1308
+ finally:
1309
+ await db.close()
1310
+
1311
+ # ========== ORCHESTRATION ENDPOINTS ==========
1312
+
1313
+ @app.get("/views/orchestration", response_class=HTMLResponse)
1314
+ async def orchestration_view(request: Request) -> HTMLResponse:
1315
+ """Get delegation chains and agent handoffs as HTMX partial."""
1316
+ db = await get_db()
1317
+ try:
1318
+ # Query delegation events from agent_events table
1319
+ # Use same query as API endpoint - filter by tool_name = 'Task'
1320
+ query = """
1321
+ SELECT
1322
+ event_id,
1323
+ agent_id as from_agent,
1324
+ subagent_type as to_agent,
1325
+ timestamp,
1326
+ input_summary,
1327
+ session_id,
1328
+ status
1329
+ FROM agent_events
1330
+ WHERE tool_name = 'Task'
1331
+ ORDER BY timestamp DESC
1332
+ LIMIT 50
1333
+ """
1334
+
1335
+ cursor = await db.execute(query)
1336
+ rows = list(await cursor.fetchall())
1337
+ logger.debug(f"orchestration_view: Query executed, got {len(rows)} rows")
1338
+
1339
+ delegations = []
1340
+ for row in rows:
1341
+ from_agent = row[1] or "unknown"
1342
+ to_agent = row[2] # May be NULL
1343
+ task_summary = row[4] or ""
1344
+
1345
+ # Extract to_agent from input_summary JSON if NULL
1346
+ if not to_agent:
1347
+ try:
1348
+ input_data = json.loads(task_summary) if task_summary else {}
1349
+ to_agent = input_data.get("subagent_type", "unknown")
1350
+ except Exception:
1351
+ to_agent = "unknown"
1352
+
1353
+ delegation = {
1354
+ "event_id": row[0],
1355
+ "from_agent": from_agent,
1356
+ "to_agent": to_agent,
1357
+ "timestamp": row[3],
1358
+ "task": task_summary or "Unnamed task",
1359
+ "session_id": row[5],
1360
+ "status": row[6] or "pending",
1361
+ "result": "", # Not available in agent_events
1362
+ }
1363
+ delegations.append(delegation)
1364
+
1365
+ logger.debug(
1366
+ f"orchestration_view: Created {len(delegations)} delegation dicts"
1367
+ )
1368
+
1369
+ return templates.TemplateResponse(
1370
+ "partials/orchestration.html",
1371
+ {
1372
+ "request": request,
1373
+ "delegations": delegations,
1374
+ },
1375
+ )
1376
+ except Exception as e:
1377
+ logger.error(f"orchestration_view ERROR: {e}")
1378
+ raise
1379
+ finally:
1380
+ await db.close()
1381
+
1382
+ @app.get("/api/orchestration")
1383
+ async def orchestration_api() -> dict[str, Any]:
1384
+ """Get delegation chains and agent coordination information as JSON.
1385
+
1386
+ Returns:
1387
+ {
1388
+ "delegation_count": int,
1389
+ "unique_agents": int,
1390
+ "agents": [str],
1391
+ "delegation_chains": {
1392
+ "from_agent": [
1393
+ {
1394
+ "to_agent": str,
1395
+ "event_type": str,
1396
+ "timestamp": str,
1397
+ "task": str,
1398
+ "status": str
1399
+ }
1400
+ ]
1401
+ }
1402
+ }
1403
+ """
1404
+ db = await get_db()
1405
+ try:
1406
+ # Query delegation events from agent_events table
1407
+ # Filter by tool_name = 'Task' (not event_type)
1408
+ query = """
1409
+ SELECT
1410
+ event_id,
1411
+ agent_id as from_agent,
1412
+ subagent_type as to_agent,
1413
+ timestamp,
1414
+ input_summary,
1415
+ status
1416
+ FROM agent_events
1417
+ WHERE tool_name = 'Task'
1418
+ ORDER BY timestamp DESC
1419
+ LIMIT 1000
1420
+ """
1421
+
1422
+ cursor = await db.execute(query)
1423
+ rows = await cursor.fetchall()
1424
+
1425
+ # Build delegation chains grouped by from_agent
1426
+ delegation_chains: dict[str, list[dict[str, Any]]] = {}
1427
+ agents = set()
1428
+ delegation_count = 0
1429
+
1430
+ for row in rows:
1431
+ from_agent = row[1] or "unknown"
1432
+ to_agent = row[2] # May be NULL
1433
+ timestamp = row[3] or ""
1434
+ task_summary = row[4] or ""
1435
+ status = row[5] or "pending"
1436
+
1437
+ # Extract to_agent from input_summary JSON if NULL
1438
+ if not to_agent:
1439
+ try:
1440
+ import json
1441
+
1442
+ input_data = json.loads(task_summary) if task_summary else {}
1443
+ to_agent = input_data.get("subagent_type", "unknown")
1444
+ except Exception:
1445
+ to_agent = "unknown"
1446
+
1447
+ agents.add(from_agent)
1448
+ agents.add(to_agent)
1449
+ delegation_count += 1
1450
+
1451
+ if from_agent not in delegation_chains:
1452
+ delegation_chains[from_agent] = []
1453
+
1454
+ delegation_chains[from_agent].append(
1455
+ {
1456
+ "to_agent": to_agent,
1457
+ "event_type": "delegation",
1458
+ "timestamp": timestamp,
1459
+ "task": task_summary or "Unnamed task",
1460
+ "status": status,
1461
+ }
1462
+ )
1463
+
1464
+ return {
1465
+ "delegation_count": delegation_count,
1466
+ "unique_agents": len(agents),
1467
+ "agents": sorted(list(agents)),
1468
+ "delegation_chains": delegation_chains,
1469
+ }
1470
+
1471
+ except Exception as e:
1472
+ logger.error(f"Failed to get orchestration data: {e}")
1473
+ raise
1474
+ finally:
1475
+ await db.close()
1476
+
1477
+ @app.get("/api/orchestration/delegations")
1478
+ async def orchestration_delegations_api() -> dict[str, Any]:
1479
+ """Get delegation statistics and chains as JSON.
1480
+
1481
+ This endpoint is used by the dashboard JavaScript to display
1482
+ delegation metrics in the orchestration panel.
1483
+
1484
+ Returns:
1485
+ {
1486
+ "delegation_count": int,
1487
+ "unique_agents": int,
1488
+ "delegation_chains": {
1489
+ "from_agent": [
1490
+ {
1491
+ "to_agent": str,
1492
+ "timestamp": str,
1493
+ "task": str,
1494
+ "status": str
1495
+ }
1496
+ ]
1497
+ }
1498
+ }
1499
+ """
1500
+ db = await get_db()
1501
+ cache = app.state.query_cache
1502
+ query_start_time = time.time()
1503
+
1504
+ try:
1505
+ # Create cache key
1506
+ cache_key = "orchestration_delegations:all"
1507
+
1508
+ # Check cache first
1509
+ cached_result = cache.get(cache_key)
1510
+ if cached_result is not None:
1511
+ query_time_ms = (time.time() - query_start_time) * 1000
1512
+ cache.record_metric(cache_key, query_time_ms, cache_hit=True)
1513
+ logger.debug(
1514
+ f"Cache HIT for orchestration_delegations (key={cache_key}, "
1515
+ f"time={query_time_ms:.2f}ms)"
1516
+ )
1517
+ return cached_result # type: ignore[no-any-return]
1518
+
1519
+ exec_start = time.time()
1520
+
1521
+ # Query delegation events from agent_events table
1522
+ # Filter by tool_name = 'Task' to get Task() delegations
1523
+ query = """
1524
+ SELECT
1525
+ event_id,
1526
+ agent_id as from_agent,
1527
+ subagent_type as to_agent,
1528
+ timestamp,
1529
+ input_summary,
1530
+ status
1531
+ FROM agent_events
1532
+ WHERE tool_name = 'Task'
1533
+ ORDER BY timestamp DESC
1534
+ LIMIT 1000
1535
+ """
1536
+
1537
+ cursor = await db.execute(query)
1538
+ rows = await cursor.fetchall()
1539
+
1540
+ # Build delegation chains grouped by from_agent
1541
+ delegation_chains: dict[str, list[dict[str, Any]]] = {}
1542
+ agents = set()
1543
+ delegation_count = 0
1544
+
1545
+ for row in rows:
1546
+ from_agent = row[1] or "unknown"
1547
+ to_agent = row[2] # May be NULL
1548
+ timestamp = row[3] or ""
1549
+ task_summary = row[4] or ""
1550
+ status = row[5] or "pending"
1551
+
1552
+ # Extract to_agent from input_summary JSON if NULL
1553
+ if not to_agent:
1554
+ try:
1555
+ input_data = json.loads(task_summary) if task_summary else {}
1556
+ to_agent = input_data.get("subagent_type", "unknown")
1557
+ except Exception:
1558
+ to_agent = "unknown"
1559
+
1560
+ agents.add(from_agent)
1561
+ agents.add(to_agent)
1562
+ delegation_count += 1
1563
+
1564
+ if from_agent not in delegation_chains:
1565
+ delegation_chains[from_agent] = []
1566
+
1567
+ delegation_chains[from_agent].append(
1568
+ {
1569
+ "to_agent": to_agent,
1570
+ "timestamp": timestamp,
1571
+ "task": task_summary or "Unnamed task",
1572
+ "status": status,
1573
+ }
1574
+ )
1575
+
1576
+ exec_time_ms = (time.time() - exec_start) * 1000
1577
+
1578
+ result = {
1579
+ "delegation_count": delegation_count,
1580
+ "unique_agents": len(agents),
1581
+ "delegation_chains": delegation_chains,
1582
+ }
1583
+
1584
+ # Cache the result
1585
+ cache.set(cache_key, result)
1586
+ query_time_ms = (time.time() - query_start_time) * 1000
1587
+ cache.record_metric(cache_key, exec_time_ms, cache_hit=False)
1588
+ logger.debug(
1589
+ f"Cache MISS for orchestration_delegations (key={cache_key}, "
1590
+ f"db_time={exec_time_ms:.2f}ms, total_time={query_time_ms:.2f}ms, "
1591
+ f"delegations={delegation_count})"
1592
+ )
1593
+
1594
+ return result
1595
+
1596
+ except Exception as e:
1597
+ logger.error(f"Failed to get orchestration delegations: {e}")
1598
+ raise
1599
+ finally:
1600
+ await db.close()
1601
+
1602
+ # ========== FEATURES ENDPOINTS ==========
1603
+
1604
+ @app.get("/views/features", response_class=HTMLResponse)
1605
+ async def features_view(request: Request, status: str = "all") -> HTMLResponse:
1606
+ """Get features by status as HTMX partial."""
1607
+ db = await get_db()
1608
+ cache = app.state.query_cache
1609
+ query_start_time = time.time()
1610
+
1611
+ try:
1612
+ # Create cache key from query parameters
1613
+ cache_key = f"features_view:{status}"
1614
+
1615
+ # Check cache first
1616
+ cached_response = cache.get(cache_key)
1617
+ features_by_status: dict = {
1618
+ "todo": [],
1619
+ "in_progress": [],
1620
+ "blocked": [],
1621
+ "done": [],
1622
+ }
1623
+
1624
+ if cached_response is not None:
1625
+ query_time_ms = (time.time() - query_start_time) * 1000
1626
+ cache.record_metric(cache_key, query_time_ms, cache_hit=True)
1627
+ logger.debug(
1628
+ f"Cache HIT for features_view (key={cache_key}, time={query_time_ms:.2f}ms)"
1629
+ )
1630
+ features_by_status = cached_response
1631
+ else:
1632
+ # OPTIMIZATION: Use composite index idx_features_status_priority
1633
+ # for efficient filtering and ordering
1634
+ query = """
1635
+ SELECT id, type, title, status, priority, assigned_to, created_at, updated_at
1636
+ FROM features
1637
+ WHERE 1=1
1638
+ """
1639
+ params: list = []
1640
+
1641
+ if status != "all":
1642
+ query += " AND status = ?"
1643
+ params.append(status)
1644
+
1645
+ query += " ORDER BY priority DESC, created_at DESC LIMIT 100"
1646
+
1647
+ exec_start = time.time()
1648
+ cursor = await db.execute(query, params)
1649
+ rows = await cursor.fetchall()
1650
+ exec_time_ms = (time.time() - exec_start) * 1000
1651
+
1652
+ for row in rows:
1653
+ feature_status = row[3]
1654
+ features_by_status.setdefault(feature_status, []).append(
1655
+ {
1656
+ "id": row[0],
1657
+ "type": row[1],
1658
+ "title": row[2],
1659
+ "status": feature_status,
1660
+ "priority": row[4],
1661
+ "assigned_to": row[5],
1662
+ "created_at": row[6],
1663
+ "updated_at": row[7],
1664
+ }
1665
+ )
1666
+
1667
+ # Cache the results
1668
+ cache.set(cache_key, features_by_status)
1669
+ query_time_ms = (time.time() - query_start_time) * 1000
1670
+ cache.record_metric(cache_key, exec_time_ms, cache_hit=False)
1671
+ logger.debug(
1672
+ f"Cache MISS for features_view (key={cache_key}, "
1673
+ f"db_time={exec_time_ms:.2f}ms, total_time={query_time_ms:.2f}ms)"
1674
+ )
1675
+
1676
+ return templates.TemplateResponse(
1677
+ "partials/features.html",
1678
+ {
1679
+ "request": request,
1680
+ "features_by_status": features_by_status,
1681
+ },
1682
+ )
1683
+ finally:
1684
+ await db.close()
1685
+
1686
+ # ========== SPAWNERS ENDPOINTS ==========
1687
+
1688
+ @app.get("/views/spawners", response_class=HTMLResponse)
1689
+ async def spawners_view(request: Request) -> HTMLResponse:
1690
+ """Get spawner activity dashboard as HTMX partial."""
1691
+ db = await get_db()
1692
+ try:
1693
+ # Get spawner statistics
1694
+ stats_response = await get_spawner_statistics()
1695
+ spawner_stats = stats_response.get("spawner_statistics", [])
1696
+
1697
+ # Get recent spawner activities
1698
+ activities_response = await get_spawner_activities(limit=50)
1699
+ recent_activities = activities_response.get("spawner_activities", [])
1700
+
1701
+ return templates.TemplateResponse(
1702
+ "partials/spawners.html",
1703
+ {
1704
+ "request": request,
1705
+ "spawner_stats": spawner_stats,
1706
+ "recent_activities": recent_activities,
1707
+ },
1708
+ )
1709
+ except Exception as e:
1710
+ logger.error(f"spawners_view ERROR: {e}")
1711
+ return templates.TemplateResponse(
1712
+ "partials/spawners.html",
1713
+ {
1714
+ "request": request,
1715
+ "spawner_stats": [],
1716
+ "recent_activities": [],
1717
+ },
1718
+ )
1719
+ finally:
1720
+ await db.close()
1721
+
1722
+ # ========== METRICS ENDPOINTS ==========
1723
+
1724
+ @app.get("/views/metrics", response_class=HTMLResponse)
1725
+ async def metrics_view(request: Request) -> HTMLResponse:
1726
+ """Get session metrics and performance data as HTMX partial."""
1727
+ db = await get_db()
1728
+ cache = app.state.query_cache
1729
+ query_start_time = time.time()
1730
+
1731
+ try:
1732
+ # Create cache key for metrics view
1733
+ cache_key = "metrics_view:all"
1734
+
1735
+ # Check cache first
1736
+ cached_response = cache.get(cache_key)
1737
+ if cached_response is not None:
1738
+ query_time_ms = (time.time() - query_start_time) * 1000
1739
+ cache.record_metric(cache_key, query_time_ms, cache_hit=True)
1740
+ logger.debug(
1741
+ f"Cache HIT for metrics_view (key={cache_key}, time={query_time_ms:.2f}ms)"
1742
+ )
1743
+ sessions, stats = cached_response
1744
+ else:
1745
+ # OPTIMIZATION: Combine session data with event counts in single query
1746
+ # This eliminates N+1 query problem (was 20+ queries, now 2)
1747
+ # Note: Database uses agent_assigned but started_at/ended_at (partial migration)
1748
+ query = """
1749
+ SELECT
1750
+ s.session_id,
1751
+ s.agent_assigned,
1752
+ s.status,
1753
+ s.started_at,
1754
+ s.ended_at,
1755
+ COUNT(DISTINCT e.event_id) as event_count
1756
+ FROM sessions s
1757
+ LEFT JOIN agent_events e ON s.session_id = e.session_id
1758
+ GROUP BY s.session_id
1759
+ ORDER BY s.started_at DESC
1760
+ LIMIT 20
1761
+ """
1762
+
1763
+ exec_start = time.time()
1764
+ cursor = await db.execute(query)
1765
+ rows = await cursor.fetchall()
1766
+ exec_time_ms = (time.time() - exec_start) * 1000
1767
+
1768
+ sessions = []
1769
+ for row in rows:
1770
+ started_at = datetime.fromisoformat(row[3])
1771
+
1772
+ # Calculate duration
1773
+ if row[4]:
1774
+ ended_at = datetime.fromisoformat(row[4])
1775
+ duration_seconds = (ended_at - started_at).total_seconds()
1776
+ else:
1777
+ duration_seconds = (datetime.now() - started_at).total_seconds()
1778
+
1779
+ sessions.append(
1780
+ {
1781
+ "session_id": row[0],
1782
+ "agent": row[1],
1783
+ "status": row[2],
1784
+ "started_at": row[3],
1785
+ "ended_at": row[4],
1786
+ "event_count": int(row[5]) if row[5] else 0,
1787
+ "duration_seconds": duration_seconds,
1788
+ }
1789
+ )
1790
+
1791
+ # OPTIMIZATION: Combine all stats in single query instead of subqueries
1792
+ # This reduces query count from 4 subqueries + 1 main to just 1
1793
+ stats_query = """
1794
+ SELECT
1795
+ (SELECT COUNT(*) FROM agent_events) as total_events,
1796
+ (SELECT COUNT(DISTINCT agent_id) FROM agent_events) as total_agents,
1797
+ (SELECT COUNT(*) FROM sessions) as total_sessions,
1798
+ (SELECT COUNT(*) FROM features) as total_features
1799
+ """
1800
+
1801
+ stats_cursor = await db.execute(stats_query)
1802
+ stats_row = await stats_cursor.fetchone()
1803
+
1804
+ if stats_row:
1805
+ stats = {
1806
+ "total_events": int(stats_row[0]) if stats_row[0] else 0,
1807
+ "total_agents": int(stats_row[1]) if stats_row[1] else 0,
1808
+ "total_sessions": int(stats_row[2]) if stats_row[2] else 0,
1809
+ "total_features": int(stats_row[3]) if stats_row[3] else 0,
1810
+ }
1811
+ else:
1812
+ stats = {
1813
+ "total_events": 0,
1814
+ "total_agents": 0,
1815
+ "total_sessions": 0,
1816
+ "total_features": 0,
1817
+ }
1818
+
1819
+ # Cache the results
1820
+ cache_data = (sessions, stats)
1821
+ cache.set(cache_key, cache_data)
1822
+ query_time_ms = (time.time() - query_start_time) * 1000
1823
+ cache.record_metric(cache_key, exec_time_ms, cache_hit=False)
1824
+ logger.debug(
1825
+ f"Cache MISS for metrics_view (key={cache_key}, "
1826
+ f"db_time={exec_time_ms:.2f}ms, total_time={query_time_ms:.2f}ms)"
1827
+ )
1828
+
1829
+ # Provide default values for metrics template variables
1830
+ # These prevent Jinja2 UndefinedError for variables the template expects
1831
+ exec_time_dist = {
1832
+ "very_fast": 0,
1833
+ "fast": 0,
1834
+ "medium": 0,
1835
+ "slow": 0,
1836
+ "very_slow": 0,
1837
+ }
1838
+
1839
+ # Count active sessions from the fetched sessions
1840
+ active_sessions = sum(1 for s in sessions if s.get("status") == "active")
1841
+
1842
+ # Default token stats (empty until we compute real values)
1843
+ token_stats = {
1844
+ "total_tokens": 0,
1845
+ "avg_per_event": 0,
1846
+ "peak_usage": 0,
1847
+ "estimated_cost": 0.0,
1848
+ }
1849
+
1850
+ # Default activity timeline (last 24 hours with 0 counts)
1851
+ activity_timeline = {str(h): 0 for h in range(24)}
1852
+ max_hourly_count = 1 # Avoid division by zero in template
1853
+
1854
+ # Default agent performance (empty list)
1855
+ agent_performance: list[dict[str, str | float]] = []
1856
+
1857
+ # Default system health metrics
1858
+ error_rate = 0.0
1859
+ avg_response_time = 0.5 # seconds
1860
+
1861
+ return templates.TemplateResponse(
1862
+ "partials/metrics.html",
1863
+ {
1864
+ "request": request,
1865
+ "sessions": sessions,
1866
+ "stats": stats,
1867
+ "exec_time_dist": exec_time_dist,
1868
+ "active_sessions": active_sessions,
1869
+ "token_stats": token_stats,
1870
+ "activity_timeline": activity_timeline,
1871
+ "max_hourly_count": max_hourly_count,
1872
+ "agent_performance": agent_performance,
1873
+ "error_rate": error_rate,
1874
+ "avg_response_time": avg_response_time,
1875
+ },
1876
+ )
1877
+ finally:
1878
+ await db.close()
1879
+
1880
+ # ========== SPAWNER OBSERVABILITY ENDPOINTS ==========
1881
+
1882
+ @app.get("/api/spawner-activities")
1883
+ async def get_spawner_activities(
1884
+ spawner_type: str | None = None,
1885
+ session_id: str | None = None,
1886
+ limit: int = 100,
1887
+ offset: int = 0,
1888
+ ) -> dict[str, Any]:
1889
+ """
1890
+ Get spawner delegation activities with clear attribution.
1891
+
1892
+ Returns events where spawner_type IS NOT NULL, ordered by recency.
1893
+ Shows which orchestrator delegated to which spawned AI.
1894
+
1895
+ Args:
1896
+ spawner_type: Filter by spawner type (gemini, codex, copilot)
1897
+ session_id: Filter by session
1898
+ limit: Maximum results (default 100)
1899
+ offset: Result offset for pagination
1900
+
1901
+ Returns:
1902
+ Dict with spawner_activities array and metadata
1903
+ """
1904
+ db = await get_db()
1905
+ cache = app.state.query_cache
1906
+ query_start_time = time.time()
1907
+
1908
+ try:
1909
+ # Create cache key
1910
+ cache_key = f"spawner_activities:{spawner_type or 'all'}:{session_id or 'all'}:{limit}:{offset}"
1911
+
1912
+ # Check cache first
1913
+ cached_result = cache.get(cache_key)
1914
+ if cached_result is not None:
1915
+ query_time_ms = (time.time() - query_start_time) * 1000
1916
+ cache.record_metric(cache_key, query_time_ms, cache_hit=True)
1917
+ return cached_result # type: ignore[no-any-return]
1918
+
1919
+ exec_start = time.time()
1920
+
1921
+ query = """
1922
+ SELECT
1923
+ event_id,
1924
+ agent_id AS orchestrator_agent,
1925
+ spawner_type,
1926
+ subagent_type AS spawned_agent,
1927
+ tool_name,
1928
+ input_summary AS task,
1929
+ output_summary AS result,
1930
+ status,
1931
+ execution_duration_seconds AS duration,
1932
+ cost_tokens AS tokens,
1933
+ cost_usd,
1934
+ child_spike_count AS artifacts,
1935
+ timestamp,
1936
+ created_at
1937
+ FROM agent_events
1938
+ WHERE spawner_type IS NOT NULL
1939
+ """
1940
+
1941
+ params: list[Any] = []
1942
+ if spawner_type:
1943
+ query += " AND spawner_type = ?"
1944
+ params.append(spawner_type)
1945
+ if session_id:
1946
+ query += " AND session_id = ?"
1947
+ params.append(session_id)
1948
+
1949
+ query += " ORDER BY timestamp DESC LIMIT ? OFFSET ?"
1950
+ params.extend([limit, offset])
1951
+
1952
+ cursor = await db.execute(query, params)
1953
+ events = [
1954
+ dict(zip([c[0] for c in cursor.description], row))
1955
+ for row in await cursor.fetchall()
1956
+ ]
1957
+
1958
+ # Get total count
1959
+ count_query = (
1960
+ "SELECT COUNT(*) FROM agent_events WHERE spawner_type IS NOT NULL"
1961
+ )
1962
+ count_params: list[Any] = []
1963
+ if spawner_type:
1964
+ count_query += " AND spawner_type = ?"
1965
+ count_params.append(spawner_type)
1966
+ if session_id:
1967
+ count_query += " AND session_id = ?"
1968
+ count_params.append(session_id)
1969
+
1970
+ count_cursor = await db.execute(count_query, count_params)
1971
+ count_row = await count_cursor.fetchone()
1972
+ total_count = int(count_row[0]) if count_row else 0
1973
+
1974
+ exec_time_ms = (time.time() - exec_start) * 1000
1975
+
1976
+ result = {
1977
+ "spawner_activities": events,
1978
+ "count": len(events),
1979
+ "total": total_count,
1980
+ "offset": offset,
1981
+ "limit": limit,
1982
+ }
1983
+
1984
+ # Cache the result
1985
+ cache.set(cache_key, result)
1986
+ query_time_ms = (time.time() - query_start_time) * 1000
1987
+ cache.record_metric(cache_key, exec_time_ms, cache_hit=False)
1988
+ logger.debug(
1989
+ f"Cache MISS for spawner_activities (key={cache_key}, "
1990
+ f"db_time={exec_time_ms:.2f}ms, total_time={query_time_ms:.2f}ms, "
1991
+ f"activities={len(events)})"
1992
+ )
1993
+
1994
+ return result
1995
+ finally:
1996
+ await db.close()
1997
+
1998
+ @app.get("/api/spawner-statistics")
1999
+ async def get_spawner_statistics(session_id: str | None = None) -> dict[str, Any]:
2000
+ """
2001
+ Get aggregated statistics for each spawner type.
2002
+
2003
+ Shows delegations, success rate, average duration, token usage, and costs
2004
+ broken down by spawner type (Gemini, Codex, Copilot).
2005
+
2006
+ Args:
2007
+ session_id: Filter by session (optional)
2008
+
2009
+ Returns:
2010
+ Dict with spawner_statistics array
2011
+ """
2012
+ db = await get_db()
2013
+ cache = app.state.query_cache
2014
+ query_start_time = time.time()
2015
+
2016
+ try:
2017
+ # Create cache key
2018
+ cache_key = f"spawner_statistics:{session_id or 'all'}"
2019
+
2020
+ # Check cache first
2021
+ cached_result = cache.get(cache_key)
2022
+ if cached_result is not None:
2023
+ query_time_ms = (time.time() - query_start_time) * 1000
2024
+ cache.record_metric(cache_key, query_time_ms, cache_hit=True)
2025
+ return cached_result # type: ignore[no-any-return]
2026
+
2027
+ exec_start = time.time()
2028
+
2029
+ query = """
2030
+ SELECT
2031
+ spawner_type,
2032
+ COUNT(*) as total_delegations,
2033
+ SUM(CASE WHEN status = 'completed' THEN 1 ELSE 0 END) as successful,
2034
+ ROUND(100.0 * SUM(CASE WHEN status = 'completed' THEN 1 ELSE 0 END) / COUNT(*), 1) as success_rate,
2035
+ ROUND(AVG(execution_duration_seconds), 2) as avg_duration,
2036
+ SUM(cost_tokens) as total_tokens,
2037
+ ROUND(SUM(cost_usd), 2) as total_cost,
2038
+ MIN(timestamp) as first_used,
2039
+ MAX(timestamp) as last_used
2040
+ FROM agent_events
2041
+ WHERE spawner_type IS NOT NULL
2042
+ """
2043
+
2044
+ params: list[Any] = []
2045
+ if session_id:
2046
+ query += " AND session_id = ?"
2047
+ params.append(session_id)
2048
+
2049
+ query += " GROUP BY spawner_type ORDER BY total_delegations DESC"
2050
+
2051
+ cursor = await db.execute(query, params)
2052
+ stats = [
2053
+ dict(zip([c[0] for c in cursor.description], row))
2054
+ for row in await cursor.fetchall()
2055
+ ]
2056
+
2057
+ exec_time_ms = (time.time() - exec_start) * 1000
2058
+
2059
+ result = {"spawner_statistics": stats}
2060
+
2061
+ # Cache the result
2062
+ cache.set(cache_key, result)
2063
+ query_time_ms = (time.time() - query_start_time) * 1000
2064
+ cache.record_metric(cache_key, exec_time_ms, cache_hit=False)
2065
+ logger.debug(
2066
+ f"Cache MISS for spawner_statistics (key={cache_key}, "
2067
+ f"db_time={exec_time_ms:.2f}ms, total_time={query_time_ms:.2f}ms)"
2068
+ )
2069
+
2070
+ return result
2071
+ finally:
2072
+ await db.close()
2073
+
2074
+ # ========== WEBSOCKET FOR REAL-TIME UPDATES ==========
2075
+
2076
+ @app.websocket("/ws/events")
2077
+ async def websocket_events(websocket: WebSocket) -> None:
2078
+ """WebSocket endpoint for real-time event streaming.
2079
+
2080
+ OPTIMIZATION: Uses timestamp-based filtering to minimize data transfers.
2081
+ The timestamp > ? filter with DESC index makes queries O(log n) instead of O(n).
2082
+
2083
+ IMPORTANT: Initializes last_timestamp to current time to only stream NEW events.
2084
+ Historical events are already counted in /api/initial-stats, so streaming them
2085
+ again would cause double-counting in the header stats.
2086
+
2087
+ LIVE EVENTS: Also polls live_events table for real-time spawner activity
2088
+ streaming. These events are marked as broadcast after sending and cleaned up.
2089
+ """
2090
+ await websocket.accept()
2091
+ # Initialize to current time - only stream events created AFTER connection
2092
+ # This prevents double-counting: initial-stats already includes historical events
2093
+ last_timestamp: str = datetime.now().isoformat()
2094
+ poll_interval = 0.5 # OPTIMIZATION: Adaptive polling (reduced from 1s)
2095
+ last_live_event_id = 0 # Track last broadcast live event ID
2096
+
2097
+ try:
2098
+ while True:
2099
+ db = await get_db()
2100
+ has_activity = False
2101
+ try:
2102
+ # ===== 1. Poll agent_events (existing logic) =====
2103
+ # OPTIMIZATION: Only select needed columns, use DESC index
2104
+ # Pattern uses index: idx_agent_events_timestamp DESC
2105
+ # Only fetch events AFTER last_timestamp to stream new events only
2106
+ query = """
2107
+ SELECT event_id, agent_id, event_type, timestamp, tool_name,
2108
+ input_summary, output_summary, session_id, status, model
2109
+ FROM agent_events
2110
+ WHERE timestamp > ?
2111
+ ORDER BY timestamp ASC
2112
+ LIMIT 100
2113
+ """
2114
+
2115
+ cursor = await db.execute(query, [last_timestamp])
2116
+ rows = await cursor.fetchall()
2117
+
2118
+ if rows:
2119
+ has_activity = True
2120
+ rows_list = [list(row) for row in rows]
2121
+ # Update last timestamp (last row since ORDER BY ts ASC)
2122
+ last_timestamp = rows_list[-1][3]
2123
+
2124
+ # Send events in order (no need to reverse with ASC)
2125
+ for row in rows_list:
2126
+ event_data = {
2127
+ "type": "event",
2128
+ "event_id": row[0],
2129
+ "agent_id": row[1] or "unknown",
2130
+ "event_type": row[2],
2131
+ "timestamp": row[3],
2132
+ "tool_name": row[4],
2133
+ "input_summary": row[5],
2134
+ "output_summary": row[6],
2135
+ "session_id": row[7],
2136
+ "status": row[8],
2137
+ "model": row[9],
2138
+ "parent_event_id": None,
2139
+ "cost_tokens": 0,
2140
+ "execution_duration_seconds": 0.0,
2141
+ }
2142
+ await websocket.send_json(event_data)
2143
+
2144
+ # ===== 2. Poll live_events for spawner streaming =====
2145
+ # Fetch pending live events that haven't been broadcast yet
2146
+ live_query = """
2147
+ SELECT id, event_type, event_data, parent_event_id,
2148
+ session_id, spawner_type, created_at
2149
+ FROM live_events
2150
+ WHERE broadcast_at IS NULL AND id > ?
2151
+ ORDER BY created_at ASC
2152
+ LIMIT 50
2153
+ """
2154
+ live_cursor = await db.execute(live_query, [last_live_event_id])
2155
+ live_rows = list(await live_cursor.fetchall())
2156
+
2157
+ if live_rows:
2158
+ logger.info(
2159
+ f"[WebSocket] Found {len(live_rows)} pending live_events to broadcast"
2160
+ )
2161
+ has_activity = True
2162
+ broadcast_ids: list[int] = []
2163
+
2164
+ for live_row in live_rows:
2165
+ live_id: int = live_row[0]
2166
+ event_type: str = live_row[1]
2167
+ event_data_json: str | None = live_row[2]
2168
+ parent_event_id: str | None = live_row[3]
2169
+ session_id: str | None = live_row[4]
2170
+ spawner_type: str | None = live_row[5]
2171
+ created_at: str = live_row[6]
2172
+
2173
+ # Parse event_data JSON
2174
+ try:
2175
+ event_data_parsed = (
2176
+ json.loads(event_data_json)
2177
+ if event_data_json
2178
+ else {}
2179
+ )
2180
+ except (json.JSONDecodeError, TypeError):
2181
+ event_data_parsed = {}
2182
+
2183
+ # Send spawner event to client
2184
+ spawner_event = {
2185
+ "type": "spawner_event",
2186
+ "live_event_id": live_id,
2187
+ "event_type": event_type,
2188
+ "spawner_type": spawner_type,
2189
+ "parent_event_id": parent_event_id,
2190
+ "session_id": session_id,
2191
+ "timestamp": created_at,
2192
+ "data": event_data_parsed,
2193
+ }
2194
+ logger.info(
2195
+ f"[WebSocket] Sending spawner_event: id={live_id}, type={event_type}, spawner={spawner_type}"
2196
+ )
2197
+ await websocket.send_json(spawner_event)
2198
+
2199
+ broadcast_ids.append(live_id)
2200
+ last_live_event_id = max(last_live_event_id, live_id)
2201
+
2202
+ # Mark events as broadcast
2203
+ if broadcast_ids:
2204
+ logger.info(
2205
+ f"[WebSocket] Marking {len(broadcast_ids)} events as broadcast: {broadcast_ids}"
2206
+ )
2207
+ placeholders = ",".join("?" for _ in broadcast_ids)
2208
+ await db.execute(
2209
+ f"""
2210
+ UPDATE live_events
2211
+ SET broadcast_at = CURRENT_TIMESTAMP
2212
+ WHERE id IN ({placeholders})
2213
+ """,
2214
+ broadcast_ids,
2215
+ )
2216
+ await db.commit()
2217
+
2218
+ # ===== 3. Periodic cleanup of old broadcast events =====
2219
+ # Clean up events older than 5 minutes (every ~10 poll cycles)
2220
+ if random.random() < 0.1: # 10% chance each cycle
2221
+ await db.execute(
2222
+ """
2223
+ DELETE FROM live_events
2224
+ WHERE broadcast_at IS NOT NULL
2225
+ AND created_at < datetime('now', '-5 minutes')
2226
+ """
2227
+ )
2228
+ await db.commit()
2229
+
2230
+ # Adjust poll interval based on activity
2231
+ if has_activity:
2232
+ poll_interval = 0.3 # Speed up when active
2233
+ else:
2234
+ # No new events, increase poll interval (exponential backoff)
2235
+ poll_interval = min(poll_interval * 1.2, 2.0)
2236
+
2237
+ finally:
2238
+ await db.close()
2239
+
2240
+ # OPTIMIZATION: Reduced sleep interval for faster real-time updates
2241
+ await asyncio.sleep(poll_interval)
2242
+
2243
+ except WebSocketDisconnect:
2244
+ logger.info("WebSocket client disconnected")
2245
+ except Exception as e:
2246
+ logger.error(f"WebSocket error: {e}")
2247
+ await websocket.close(code=1011)
2248
+
2249
+ return app
2250
+
2251
+
2252
+ # Create default app instance
2253
+ def create_app(db_path: str | None = None) -> FastAPI:
2254
+ """Create FastAPI app with default database path."""
2255
+ if db_path is None:
2256
+ # Use default database location - htmlgraph.db is the unified database
2257
+ db_path = str(Path.home() / ".htmlgraph" / "htmlgraph.db")
2258
+
2259
+ return get_app(db_path)
2260
+
2261
+
2262
+ # Export for uvicorn
2263
+ app = create_app()