htmlgraph 0.24.2__py3-none-any.whl → 0.25.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- htmlgraph/__init__.py +20 -1
- htmlgraph/agent_detection.py +26 -10
- htmlgraph/analytics/cross_session.py +4 -3
- htmlgraph/analytics/work_type.py +52 -16
- htmlgraph/analytics_index.py +51 -19
- htmlgraph/api/__init__.py +3 -0
- htmlgraph/api/main.py +2115 -0
- htmlgraph/api/static/htmx.min.js +1 -0
- htmlgraph/api/static/style-redesign.css +1344 -0
- htmlgraph/api/static/style.css +1079 -0
- htmlgraph/api/templates/dashboard-redesign.html +812 -0
- htmlgraph/api/templates/dashboard.html +783 -0
- htmlgraph/api/templates/partials/activity-feed-hierarchical.html +326 -0
- htmlgraph/api/templates/partials/activity-feed.html +570 -0
- htmlgraph/api/templates/partials/agents-redesign.html +317 -0
- htmlgraph/api/templates/partials/agents.html +317 -0
- htmlgraph/api/templates/partials/event-traces.html +373 -0
- htmlgraph/api/templates/partials/features-kanban-redesign.html +509 -0
- htmlgraph/api/templates/partials/features.html +509 -0
- htmlgraph/api/templates/partials/metrics-redesign.html +346 -0
- htmlgraph/api/templates/partials/metrics.html +346 -0
- htmlgraph/api/templates/partials/orchestration-redesign.html +443 -0
- htmlgraph/api/templates/partials/orchestration.html +163 -0
- htmlgraph/api/templates/partials/spawners.html +375 -0
- htmlgraph/atomic_ops.py +560 -0
- htmlgraph/builders/base.py +55 -1
- htmlgraph/builders/bug.py +17 -2
- htmlgraph/builders/chore.py +17 -2
- htmlgraph/builders/epic.py +17 -2
- htmlgraph/builders/feature.py +25 -2
- htmlgraph/builders/phase.py +17 -2
- htmlgraph/builders/spike.py +27 -2
- htmlgraph/builders/track.py +14 -0
- htmlgraph/cigs/__init__.py +4 -0
- htmlgraph/cigs/reporter.py +818 -0
- htmlgraph/cli.py +1427 -401
- htmlgraph/cli_commands/__init__.py +1 -0
- htmlgraph/cli_commands/feature.py +195 -0
- htmlgraph/cli_framework.py +115 -0
- htmlgraph/collections/__init__.py +2 -0
- htmlgraph/collections/base.py +21 -0
- htmlgraph/collections/session.py +189 -0
- htmlgraph/collections/spike.py +7 -1
- htmlgraph/collections/task_delegation.py +236 -0
- htmlgraph/collections/traces.py +482 -0
- htmlgraph/config.py +113 -0
- htmlgraph/converter.py +41 -0
- htmlgraph/cost_analysis/__init__.py +5 -0
- htmlgraph/cost_analysis/analyzer.py +438 -0
- htmlgraph/dashboard.html +3315 -492
- htmlgraph-0.24.2.data/data/htmlgraph/dashboard.html → htmlgraph/dashboard.html.backup +2246 -248
- htmlgraph/dashboard.html.bak +7181 -0
- htmlgraph/dashboard.html.bak2 +7231 -0
- htmlgraph/dashboard.html.bak3 +7232 -0
- htmlgraph/db/__init__.py +38 -0
- htmlgraph/db/queries.py +790 -0
- htmlgraph/db/schema.py +1334 -0
- htmlgraph/deploy.py +26 -27
- htmlgraph/docs/API_REFERENCE.md +841 -0
- htmlgraph/docs/HTTP_API.md +750 -0
- htmlgraph/docs/INTEGRATION_GUIDE.md +752 -0
- htmlgraph/docs/ORCHESTRATION_PATTERNS.md +710 -0
- htmlgraph/docs/README.md +533 -0
- htmlgraph/docs/version_check.py +3 -1
- htmlgraph/error_handler.py +544 -0
- htmlgraph/event_log.py +2 -0
- htmlgraph/hooks/__init__.py +8 -0
- htmlgraph/hooks/bootstrap.py +169 -0
- htmlgraph/hooks/context.py +271 -0
- htmlgraph/hooks/drift_handler.py +521 -0
- htmlgraph/hooks/event_tracker.py +405 -15
- htmlgraph/hooks/post_tool_use_handler.py +257 -0
- htmlgraph/hooks/pretooluse.py +476 -6
- htmlgraph/hooks/prompt_analyzer.py +648 -0
- htmlgraph/hooks/session_handler.py +583 -0
- htmlgraph/hooks/state_manager.py +501 -0
- htmlgraph/hooks/subagent_stop.py +309 -0
- htmlgraph/hooks/task_enforcer.py +39 -0
- htmlgraph/models.py +111 -15
- htmlgraph/operations/fastapi_server.py +230 -0
- htmlgraph/orchestration/headless_spawner.py +22 -14
- htmlgraph/pydantic_models.py +476 -0
- htmlgraph/quality_gates.py +350 -0
- htmlgraph/repo_hash.py +511 -0
- htmlgraph/sdk.py +348 -10
- htmlgraph/server.py +194 -0
- htmlgraph/session_hooks.py +300 -0
- htmlgraph/session_manager.py +131 -1
- htmlgraph/session_registry.py +587 -0
- htmlgraph/session_state.py +436 -0
- htmlgraph/system_prompts.py +449 -0
- htmlgraph/templates/orchestration-view.html +350 -0
- htmlgraph/track_builder.py +19 -0
- htmlgraph/validation.py +115 -0
- htmlgraph-0.25.0.data/data/htmlgraph/dashboard.html +7417 -0
- {htmlgraph-0.24.2.dist-info → htmlgraph-0.25.0.dist-info}/METADATA +91 -64
- {htmlgraph-0.24.2.dist-info → htmlgraph-0.25.0.dist-info}/RECORD +103 -42
- {htmlgraph-0.24.2.data → htmlgraph-0.25.0.data}/data/htmlgraph/styles.css +0 -0
- {htmlgraph-0.24.2.data → htmlgraph-0.25.0.data}/data/htmlgraph/templates/AGENTS.md.template +0 -0
- {htmlgraph-0.24.2.data → htmlgraph-0.25.0.data}/data/htmlgraph/templates/CLAUDE.md.template +0 -0
- {htmlgraph-0.24.2.data → htmlgraph-0.25.0.data}/data/htmlgraph/templates/GEMINI.md.template +0 -0
- {htmlgraph-0.24.2.dist-info → htmlgraph-0.25.0.dist-info}/WHEEL +0 -0
- {htmlgraph-0.24.2.dist-info → htmlgraph-0.25.0.dist-info}/entry_points.txt +0 -0
htmlgraph/api/main.py
ADDED
|
@@ -0,0 +1,2115 @@
|
|
|
1
|
+
"""
|
|
2
|
+
HtmlGraph FastAPI Backend - Real-time Agent Observability Dashboard
|
|
3
|
+
|
|
4
|
+
Provides REST API and WebSocket support for viewing:
|
|
5
|
+
- Agent activity feed with real-time event streaming
|
|
6
|
+
- Orchestration chains and delegation handoffs
|
|
7
|
+
- Feature tracker with Kanban views
|
|
8
|
+
- Session metrics and performance analytics
|
|
9
|
+
|
|
10
|
+
Architecture:
|
|
11
|
+
- FastAPI backend querying SQLite database
|
|
12
|
+
- Jinja2 templates for server-side rendering
|
|
13
|
+
- HTMX for interactive UI without page reloads
|
|
14
|
+
- WebSocket for real-time event streaming
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
import asyncio
|
|
18
|
+
import json
|
|
19
|
+
import logging
|
|
20
|
+
import sqlite3
|
|
21
|
+
import time
|
|
22
|
+
from datetime import datetime
|
|
23
|
+
from pathlib import Path
|
|
24
|
+
from typing import Any
|
|
25
|
+
|
|
26
|
+
import aiosqlite
|
|
27
|
+
from fastapi import FastAPI, Request, WebSocket, WebSocketDisconnect
|
|
28
|
+
from fastapi.responses import HTMLResponse
|
|
29
|
+
from fastapi.staticfiles import StaticFiles
|
|
30
|
+
from fastapi.templating import Jinja2Templates
|
|
31
|
+
from pydantic import BaseModel
|
|
32
|
+
|
|
33
|
+
logger = logging.getLogger(__name__)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class QueryCache:
|
|
37
|
+
"""Simple in-memory cache with TTL support for query results."""
|
|
38
|
+
|
|
39
|
+
def __init__(self, ttl_seconds: float = 30.0):
|
|
40
|
+
"""Initialize query cache with TTL."""
|
|
41
|
+
self.cache: dict[str, tuple[Any, float]] = {}
|
|
42
|
+
self.ttl_seconds = ttl_seconds
|
|
43
|
+
self.metrics: dict[str, dict[str, float]] = {}
|
|
44
|
+
|
|
45
|
+
def get(self, key: str) -> Any | None:
|
|
46
|
+
"""Get cached value if exists and not expired."""
|
|
47
|
+
if key not in self.cache:
|
|
48
|
+
return None
|
|
49
|
+
|
|
50
|
+
value, timestamp = self.cache[key]
|
|
51
|
+
if time.time() - timestamp > self.ttl_seconds:
|
|
52
|
+
del self.cache[key]
|
|
53
|
+
return None
|
|
54
|
+
|
|
55
|
+
return value
|
|
56
|
+
|
|
57
|
+
def set(self, key: str, value: Any) -> None:
|
|
58
|
+
"""Store value with current timestamp."""
|
|
59
|
+
self.cache[key] = (value, time.time())
|
|
60
|
+
|
|
61
|
+
def record_metric(self, key: str, query_time_ms: float, cache_hit: bool) -> None:
|
|
62
|
+
"""Record performance metrics for a query."""
|
|
63
|
+
if key not in self.metrics:
|
|
64
|
+
self.metrics[key] = {"count": 0, "total_ms": 0, "avg_ms": 0, "hits": 0}
|
|
65
|
+
|
|
66
|
+
metrics = self.metrics[key]
|
|
67
|
+
metrics["count"] += 1
|
|
68
|
+
metrics["total_ms"] += query_time_ms
|
|
69
|
+
metrics["avg_ms"] = metrics["total_ms"] / metrics["count"]
|
|
70
|
+
if cache_hit:
|
|
71
|
+
metrics["hits"] += 1
|
|
72
|
+
|
|
73
|
+
def get_metrics(self) -> dict[str, dict[str, float]]:
|
|
74
|
+
"""Get all collected metrics."""
|
|
75
|
+
return self.metrics
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
class EventModel(BaseModel):
|
|
79
|
+
"""Event data model for API responses."""
|
|
80
|
+
|
|
81
|
+
event_id: str
|
|
82
|
+
agent_id: str
|
|
83
|
+
event_type: str
|
|
84
|
+
timestamp: str
|
|
85
|
+
tool_name: str | None = None
|
|
86
|
+
input_summary: str | None = None
|
|
87
|
+
output_summary: str | None = None
|
|
88
|
+
session_id: str
|
|
89
|
+
parent_event_id: str | None = None
|
|
90
|
+
status: str
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
class FeatureModel(BaseModel):
|
|
94
|
+
"""Feature data model for API responses."""
|
|
95
|
+
|
|
96
|
+
id: str
|
|
97
|
+
type: str
|
|
98
|
+
title: str
|
|
99
|
+
description: str | None = None
|
|
100
|
+
status: str
|
|
101
|
+
priority: str
|
|
102
|
+
assigned_to: str | None = None
|
|
103
|
+
created_at: str
|
|
104
|
+
updated_at: str
|
|
105
|
+
completed_at: str | None = None
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
class SessionModel(BaseModel):
|
|
109
|
+
"""Session data model for API responses."""
|
|
110
|
+
|
|
111
|
+
session_id: str
|
|
112
|
+
agent: str | None = None
|
|
113
|
+
status: str
|
|
114
|
+
started_at: str
|
|
115
|
+
ended_at: str | None = None
|
|
116
|
+
event_count: int = 0
|
|
117
|
+
duration_seconds: float | None = None
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def _ensure_database_initialized(db_path: str) -> None:
|
|
121
|
+
"""
|
|
122
|
+
Ensure SQLite database exists and has correct schema.
|
|
123
|
+
|
|
124
|
+
Args:
|
|
125
|
+
db_path: Path to SQLite database file
|
|
126
|
+
"""
|
|
127
|
+
db_file = Path(db_path)
|
|
128
|
+
db_file.parent.mkdir(parents=True, exist_ok=True)
|
|
129
|
+
|
|
130
|
+
# Check if database exists and has tables
|
|
131
|
+
try:
|
|
132
|
+
conn = sqlite3.connect(db_path)
|
|
133
|
+
cursor = conn.cursor()
|
|
134
|
+
|
|
135
|
+
# Query existing tables
|
|
136
|
+
cursor.execute("SELECT name FROM sqlite_master WHERE type='table'")
|
|
137
|
+
tables = cursor.fetchall()
|
|
138
|
+
table_names = [t[0] for t in tables]
|
|
139
|
+
|
|
140
|
+
if not table_names:
|
|
141
|
+
# Database is empty, create schema
|
|
142
|
+
logger.info(f"Creating database schema at {db_path}")
|
|
143
|
+
from htmlgraph.db.schema import HtmlGraphDB
|
|
144
|
+
|
|
145
|
+
db = HtmlGraphDB(db_path)
|
|
146
|
+
db.connect()
|
|
147
|
+
db.create_tables()
|
|
148
|
+
db.disconnect()
|
|
149
|
+
logger.info("Database schema created successfully")
|
|
150
|
+
else:
|
|
151
|
+
logger.debug(f"Database already initialized with tables: {table_names}")
|
|
152
|
+
|
|
153
|
+
conn.close()
|
|
154
|
+
|
|
155
|
+
except sqlite3.Error as e:
|
|
156
|
+
logger.warning(f"Database check warning: {e}")
|
|
157
|
+
# Try to create anyway
|
|
158
|
+
try:
|
|
159
|
+
from htmlgraph.db.schema import HtmlGraphDB
|
|
160
|
+
|
|
161
|
+
db = HtmlGraphDB(db_path)
|
|
162
|
+
db.connect()
|
|
163
|
+
db.create_tables()
|
|
164
|
+
db.disconnect()
|
|
165
|
+
except Exception as create_error:
|
|
166
|
+
logger.error(f"Failed to create database: {create_error}")
|
|
167
|
+
raise
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
def get_app(db_path: str) -> FastAPI:
|
|
171
|
+
"""
|
|
172
|
+
Create and configure FastAPI application.
|
|
173
|
+
|
|
174
|
+
Args:
|
|
175
|
+
db_path: Path to SQLite database file
|
|
176
|
+
|
|
177
|
+
Returns:
|
|
178
|
+
Configured FastAPI application instance
|
|
179
|
+
"""
|
|
180
|
+
# Ensure database is initialized
|
|
181
|
+
_ensure_database_initialized(db_path)
|
|
182
|
+
|
|
183
|
+
app = FastAPI(
|
|
184
|
+
title="HtmlGraph Dashboard API",
|
|
185
|
+
description="Real-time agent observability dashboard",
|
|
186
|
+
version="0.1.0",
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
# Store database path and query cache in app state
|
|
190
|
+
app.state.db_path = db_path
|
|
191
|
+
app.state.query_cache = QueryCache(ttl_seconds=30.0)
|
|
192
|
+
|
|
193
|
+
# Setup Jinja2 templates
|
|
194
|
+
template_dir = Path(__file__).parent / "templates"
|
|
195
|
+
template_dir.mkdir(parents=True, exist_ok=True)
|
|
196
|
+
templates = Jinja2Templates(directory=str(template_dir))
|
|
197
|
+
|
|
198
|
+
# Add custom filters
|
|
199
|
+
def format_number(value: int | None) -> str:
|
|
200
|
+
if value is None:
|
|
201
|
+
return "0"
|
|
202
|
+
return f"{value:,}"
|
|
203
|
+
|
|
204
|
+
templates.env.filters["format_number"] = format_number
|
|
205
|
+
|
|
206
|
+
# Setup static files
|
|
207
|
+
static_dir = Path(__file__).parent / "static"
|
|
208
|
+
static_dir.mkdir(parents=True, exist_ok=True)
|
|
209
|
+
if static_dir.exists():
|
|
210
|
+
app.mount("/static", StaticFiles(directory=str(static_dir)), name="static")
|
|
211
|
+
|
|
212
|
+
# ========== DATABASE HELPERS ==========
|
|
213
|
+
|
|
214
|
+
async def get_db() -> aiosqlite.Connection:
|
|
215
|
+
"""Get database connection."""
|
|
216
|
+
db = await aiosqlite.connect(app.state.db_path)
|
|
217
|
+
db.row_factory = aiosqlite.Row
|
|
218
|
+
return db
|
|
219
|
+
|
|
220
|
+
# ========== ROUTES ==========
|
|
221
|
+
|
|
222
|
+
@app.get("/", response_class=HTMLResponse)
|
|
223
|
+
async def dashboard(request: Request) -> HTMLResponse:
|
|
224
|
+
"""Main dashboard view with navigation tabs."""
|
|
225
|
+
return templates.TemplateResponse(
|
|
226
|
+
"dashboard-redesign.html",
|
|
227
|
+
{
|
|
228
|
+
"request": request,
|
|
229
|
+
"title": "HtmlGraph Agent Observability",
|
|
230
|
+
},
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
# ========== AGENTS ENDPOINTS ==========
|
|
234
|
+
|
|
235
|
+
@app.get("/views/agents", response_class=HTMLResponse)
|
|
236
|
+
async def agents_view(request: Request) -> HTMLResponse:
|
|
237
|
+
"""Get agent workload and performance stats as HTMX partial."""
|
|
238
|
+
db = await get_db()
|
|
239
|
+
cache = app.state.query_cache
|
|
240
|
+
query_start_time = time.time()
|
|
241
|
+
|
|
242
|
+
try:
|
|
243
|
+
# Create cache key for agents view
|
|
244
|
+
cache_key = "agents_view:all"
|
|
245
|
+
|
|
246
|
+
# Check cache first
|
|
247
|
+
cached_response = cache.get(cache_key)
|
|
248
|
+
if cached_response is not None:
|
|
249
|
+
query_time_ms = (time.time() - query_start_time) * 1000
|
|
250
|
+
cache.record_metric(cache_key, query_time_ms, cache_hit=True)
|
|
251
|
+
logger.debug(
|
|
252
|
+
f"Cache HIT for agents_view (key={cache_key}, time={query_time_ms:.2f}ms)"
|
|
253
|
+
)
|
|
254
|
+
agents, total_actions, total_tokens = cached_response
|
|
255
|
+
else:
|
|
256
|
+
# Query agent statistics from 'agent_events' table joined with sessions
|
|
257
|
+
# Optimized with GROUP BY on indexed column
|
|
258
|
+
query = """
|
|
259
|
+
SELECT
|
|
260
|
+
e.agent_id,
|
|
261
|
+
COUNT(*) as event_count,
|
|
262
|
+
SUM(e.cost_tokens) as total_tokens,
|
|
263
|
+
COUNT(DISTINCT e.session_id) as session_count,
|
|
264
|
+
MAX(e.timestamp) as last_active
|
|
265
|
+
FROM agent_events e
|
|
266
|
+
GROUP BY e.agent_id
|
|
267
|
+
ORDER BY event_count DESC
|
|
268
|
+
"""
|
|
269
|
+
|
|
270
|
+
# Execute query with timing
|
|
271
|
+
exec_start = time.time()
|
|
272
|
+
cursor = await db.execute(query)
|
|
273
|
+
rows = await cursor.fetchall()
|
|
274
|
+
exec_time_ms = (time.time() - exec_start) * 1000
|
|
275
|
+
|
|
276
|
+
agents = []
|
|
277
|
+
total_actions = 0
|
|
278
|
+
total_tokens = 0
|
|
279
|
+
|
|
280
|
+
# First pass to calculate totals
|
|
281
|
+
for row in rows:
|
|
282
|
+
total_actions += row[1]
|
|
283
|
+
total_tokens += row[2] or 0
|
|
284
|
+
|
|
285
|
+
# Second pass to build agent objects with percentages
|
|
286
|
+
for row in rows:
|
|
287
|
+
event_count = row[1]
|
|
288
|
+
workload_pct = (
|
|
289
|
+
(event_count / total_actions * 100) if total_actions > 0 else 0
|
|
290
|
+
)
|
|
291
|
+
|
|
292
|
+
agents.append(
|
|
293
|
+
{
|
|
294
|
+
"agent_id": row[0],
|
|
295
|
+
"event_count": event_count,
|
|
296
|
+
"total_tokens": row[2] or 0,
|
|
297
|
+
"session_count": row[3],
|
|
298
|
+
"last_active": row[4],
|
|
299
|
+
"workload_pct": round(workload_pct, 1),
|
|
300
|
+
}
|
|
301
|
+
)
|
|
302
|
+
|
|
303
|
+
# Cache the results
|
|
304
|
+
cache_data = (agents, total_actions, total_tokens)
|
|
305
|
+
cache.set(cache_key, cache_data)
|
|
306
|
+
query_time_ms = (time.time() - query_start_time) * 1000
|
|
307
|
+
cache.record_metric(cache_key, exec_time_ms, cache_hit=False)
|
|
308
|
+
logger.debug(
|
|
309
|
+
f"Cache MISS for agents_view (key={cache_key}, "
|
|
310
|
+
f"db_time={exec_time_ms:.2f}ms, total_time={query_time_ms:.2f}ms, "
|
|
311
|
+
f"agents={len(agents)})"
|
|
312
|
+
)
|
|
313
|
+
|
|
314
|
+
return templates.TemplateResponse(
|
|
315
|
+
"partials/agents.html",
|
|
316
|
+
{
|
|
317
|
+
"request": request,
|
|
318
|
+
"agents": agents,
|
|
319
|
+
"total_agents": len(agents),
|
|
320
|
+
"total_actions": total_actions,
|
|
321
|
+
"total_tokens": total_tokens,
|
|
322
|
+
},
|
|
323
|
+
)
|
|
324
|
+
finally:
|
|
325
|
+
await db.close()
|
|
326
|
+
|
|
327
|
+
# ========== ACTIVITY FEED ENDPOINTS ==========
|
|
328
|
+
|
|
329
|
+
@app.get("/views/activity-feed", response_class=HTMLResponse)
|
|
330
|
+
async def activity_feed(
|
|
331
|
+
request: Request,
|
|
332
|
+
limit: int = 50,
|
|
333
|
+
session_id: str | None = None,
|
|
334
|
+
agent_id: str | None = None,
|
|
335
|
+
) -> HTMLResponse:
|
|
336
|
+
"""Get latest agent events grouped by conversation turn (user prompt).
|
|
337
|
+
|
|
338
|
+
Returns grouped activity feed showing conversation turns with their child events.
|
|
339
|
+
"""
|
|
340
|
+
db = await get_db()
|
|
341
|
+
cache = app.state.query_cache
|
|
342
|
+
|
|
343
|
+
try:
|
|
344
|
+
# Call the helper function to get grouped events
|
|
345
|
+
grouped_result = await _get_events_grouped_by_prompt_impl(db, cache, limit)
|
|
346
|
+
|
|
347
|
+
return templates.TemplateResponse(
|
|
348
|
+
"partials/activity-feed.html",
|
|
349
|
+
{
|
|
350
|
+
"request": request,
|
|
351
|
+
"conversation_turns": grouped_result.get("conversation_turns", []),
|
|
352
|
+
"total_turns": grouped_result.get("total_turns", 0),
|
|
353
|
+
"limit": limit,
|
|
354
|
+
},
|
|
355
|
+
)
|
|
356
|
+
finally:
|
|
357
|
+
await db.close()
|
|
358
|
+
|
|
359
|
+
@app.get("/api/events", response_model=list[EventModel])
|
|
360
|
+
async def get_events(
|
|
361
|
+
limit: int = 50,
|
|
362
|
+
session_id: str | None = None,
|
|
363
|
+
agent_id: str | None = None,
|
|
364
|
+
offset: int = 0,
|
|
365
|
+
) -> list[EventModel]:
|
|
366
|
+
"""Get events as JSON API with parent-child hierarchical linking."""
|
|
367
|
+
db = await get_db()
|
|
368
|
+
cache = app.state.query_cache
|
|
369
|
+
query_start_time = time.time()
|
|
370
|
+
|
|
371
|
+
try:
|
|
372
|
+
# Create cache key from query parameters
|
|
373
|
+
cache_key = (
|
|
374
|
+
f"api_events:{limit}:{offset}:{session_id or 'all'}:{agent_id or 'all'}"
|
|
375
|
+
)
|
|
376
|
+
|
|
377
|
+
# Check cache first
|
|
378
|
+
cached_results = cache.get(cache_key)
|
|
379
|
+
if cached_results is not None:
|
|
380
|
+
query_time_ms = (time.time() - query_start_time) * 1000
|
|
381
|
+
cache.record_metric(cache_key, query_time_ms, cache_hit=True)
|
|
382
|
+
logger.debug(
|
|
383
|
+
f"Cache HIT for api_events (key={cache_key}, time={query_time_ms:.2f}ms)"
|
|
384
|
+
)
|
|
385
|
+
return list(cached_results) if isinstance(cached_results, list) else []
|
|
386
|
+
else:
|
|
387
|
+
# Query from 'agent_events' table from Phase 1 PreToolUse hook implementation
|
|
388
|
+
# Optimized with column selection and proper indexing
|
|
389
|
+
query = """
|
|
390
|
+
SELECT e.event_id, e.agent_id, e.event_type, e.timestamp, e.tool_name,
|
|
391
|
+
e.input_summary, e.output_summary, e.session_id,
|
|
392
|
+
e.status
|
|
393
|
+
FROM agent_events e
|
|
394
|
+
WHERE 1=1
|
|
395
|
+
"""
|
|
396
|
+
params: list = []
|
|
397
|
+
|
|
398
|
+
if session_id:
|
|
399
|
+
query += " AND e.session_id = ?"
|
|
400
|
+
params.append(session_id)
|
|
401
|
+
|
|
402
|
+
if agent_id:
|
|
403
|
+
query += " AND e.agent_id = ?"
|
|
404
|
+
params.append(agent_id)
|
|
405
|
+
|
|
406
|
+
query += " ORDER BY e.timestamp DESC LIMIT ? OFFSET ?"
|
|
407
|
+
params.extend([limit, offset])
|
|
408
|
+
|
|
409
|
+
# Execute query with timing
|
|
410
|
+
exec_start = time.time()
|
|
411
|
+
cursor = await db.execute(query, params)
|
|
412
|
+
rows = await cursor.fetchall()
|
|
413
|
+
exec_time_ms = (time.time() - exec_start) * 1000
|
|
414
|
+
|
|
415
|
+
# Build result models
|
|
416
|
+
results = [
|
|
417
|
+
EventModel(
|
|
418
|
+
event_id=row[0],
|
|
419
|
+
agent_id=row[1] or "unknown",
|
|
420
|
+
event_type=row[2],
|
|
421
|
+
timestamp=row[3],
|
|
422
|
+
tool_name=row[4],
|
|
423
|
+
input_summary=row[5],
|
|
424
|
+
output_summary=row[6],
|
|
425
|
+
session_id=row[7],
|
|
426
|
+
parent_event_id=None, # Not available in all schema versions
|
|
427
|
+
status=row[8],
|
|
428
|
+
)
|
|
429
|
+
for row in rows
|
|
430
|
+
]
|
|
431
|
+
|
|
432
|
+
# Cache the results
|
|
433
|
+
cache.set(cache_key, results)
|
|
434
|
+
query_time_ms = (time.time() - query_start_time) * 1000
|
|
435
|
+
cache.record_metric(cache_key, exec_time_ms, cache_hit=False)
|
|
436
|
+
logger.debug(
|
|
437
|
+
f"Cache MISS for api_events (key={cache_key}, "
|
|
438
|
+
f"db_time={exec_time_ms:.2f}ms, total_time={query_time_ms:.2f}ms, "
|
|
439
|
+
f"rows={len(results)})"
|
|
440
|
+
)
|
|
441
|
+
|
|
442
|
+
return results
|
|
443
|
+
finally:
|
|
444
|
+
await db.close()
|
|
445
|
+
|
|
446
|
+
# ========== INITIAL STATS ENDPOINT ==========
|
|
447
|
+
|
|
448
|
+
@app.get("/api/initial-stats")
|
|
449
|
+
async def initial_stats() -> dict[str, Any]:
|
|
450
|
+
"""Get initial statistics for dashboard header (events, agents, sessions)."""
|
|
451
|
+
db = await get_db()
|
|
452
|
+
try:
|
|
453
|
+
# Query all stats in a single query for efficiency
|
|
454
|
+
stats_query = """
|
|
455
|
+
SELECT
|
|
456
|
+
(SELECT COUNT(*) FROM agent_events) as total_events,
|
|
457
|
+
(SELECT COUNT(DISTINCT agent_id) FROM agent_events) as total_agents,
|
|
458
|
+
(SELECT COUNT(*) FROM sessions) as total_sessions
|
|
459
|
+
"""
|
|
460
|
+
cursor = await db.execute(stats_query)
|
|
461
|
+
row = await cursor.fetchone()
|
|
462
|
+
|
|
463
|
+
# Query distinct agent IDs for the agent set
|
|
464
|
+
agents_query = (
|
|
465
|
+
"SELECT DISTINCT agent_id FROM agent_events WHERE agent_id IS NOT NULL"
|
|
466
|
+
)
|
|
467
|
+
agents_cursor = await db.execute(agents_query)
|
|
468
|
+
agents_rows = await agents_cursor.fetchall()
|
|
469
|
+
agents = [row[0] for row in agents_rows]
|
|
470
|
+
|
|
471
|
+
if row is None:
|
|
472
|
+
return {
|
|
473
|
+
"total_events": 0,
|
|
474
|
+
"total_agents": 0,
|
|
475
|
+
"total_sessions": 0,
|
|
476
|
+
"agents": agents,
|
|
477
|
+
}
|
|
478
|
+
|
|
479
|
+
return {
|
|
480
|
+
"total_events": int(row[0]) if row[0] else 0,
|
|
481
|
+
"total_agents": int(row[1]) if row[1] else 0,
|
|
482
|
+
"total_sessions": int(row[2]) if row[2] else 0,
|
|
483
|
+
"agents": agents,
|
|
484
|
+
}
|
|
485
|
+
finally:
|
|
486
|
+
await db.close()
|
|
487
|
+
|
|
488
|
+
# ========== PERFORMANCE METRICS ENDPOINT ==========
|
|
489
|
+
|
|
490
|
+
@app.get("/api/query-metrics")
|
|
491
|
+
async def get_query_metrics() -> dict[str, Any]:
|
|
492
|
+
"""Get query performance metrics and cache statistics."""
|
|
493
|
+
cache = app.state.query_cache
|
|
494
|
+
metrics = cache.get_metrics()
|
|
495
|
+
|
|
496
|
+
# Calculate aggregate statistics
|
|
497
|
+
total_queries = sum(m.get("count", 0) for m in metrics.values())
|
|
498
|
+
total_cache_hits = sum(m.get("hits", 0) for m in metrics.values())
|
|
499
|
+
hit_rate = (total_cache_hits / total_queries * 100) if total_queries > 0 else 0
|
|
500
|
+
|
|
501
|
+
return {
|
|
502
|
+
"timestamp": datetime.now().isoformat(),
|
|
503
|
+
"cache_status": {
|
|
504
|
+
"ttl_seconds": cache.ttl_seconds,
|
|
505
|
+
"cached_queries": len(cache.cache),
|
|
506
|
+
"total_queries_tracked": total_queries,
|
|
507
|
+
"cache_hits": total_cache_hits,
|
|
508
|
+
"cache_hit_rate_percent": round(hit_rate, 2),
|
|
509
|
+
},
|
|
510
|
+
"query_metrics": metrics,
|
|
511
|
+
}
|
|
512
|
+
|
|
513
|
+
# ========== EVENT TRACES ENDPOINT (Parent-Child Nesting) ==========
|
|
514
|
+
|
|
515
|
+
@app.get("/api/event-traces")
|
|
516
|
+
async def get_event_traces(
|
|
517
|
+
limit: int = 50,
|
|
518
|
+
session_id: str | None = None,
|
|
519
|
+
) -> dict[str, Any]:
|
|
520
|
+
"""
|
|
521
|
+
Get event traces showing parent-child relationships for Task delegations.
|
|
522
|
+
|
|
523
|
+
This endpoint returns task delegation events with their child events,
|
|
524
|
+
showing the complete hierarchy of delegated work:
|
|
525
|
+
|
|
526
|
+
Example:
|
|
527
|
+
{
|
|
528
|
+
"traces": [
|
|
529
|
+
{
|
|
530
|
+
"parent_event_id": "evt-abc123",
|
|
531
|
+
"agent_id": "claude-code",
|
|
532
|
+
"subagent_type": "gemini-spawner",
|
|
533
|
+
"started_at": "2025-01-08T16:40:54",
|
|
534
|
+
"status": "completed",
|
|
535
|
+
"duration_seconds": 287,
|
|
536
|
+
"child_events": [
|
|
537
|
+
{
|
|
538
|
+
"event_id": "subevt-xyz789",
|
|
539
|
+
"agent_id": "subagent-gemini-spawner",
|
|
540
|
+
"event_type": "delegation",
|
|
541
|
+
"timestamp": "2025-01-08T16:42:01",
|
|
542
|
+
"status": "completed"
|
|
543
|
+
}
|
|
544
|
+
],
|
|
545
|
+
"child_spike_count": 2,
|
|
546
|
+
"child_spikes": ["spk-001", "spk-002"]
|
|
547
|
+
}
|
|
548
|
+
]
|
|
549
|
+
}
|
|
550
|
+
|
|
551
|
+
Args:
|
|
552
|
+
limit: Maximum number of parent events to return (default 50)
|
|
553
|
+
session_id: Filter by session (optional)
|
|
554
|
+
|
|
555
|
+
Returns:
|
|
556
|
+
Dict with traces array showing parent-child relationships
|
|
557
|
+
"""
|
|
558
|
+
db = await get_db()
|
|
559
|
+
cache = app.state.query_cache
|
|
560
|
+
query_start_time = time.time()
|
|
561
|
+
|
|
562
|
+
try:
|
|
563
|
+
# Create cache key
|
|
564
|
+
cache_key = f"event_traces:{limit}:{session_id or 'all'}"
|
|
565
|
+
|
|
566
|
+
# Check cache first
|
|
567
|
+
cached_result = cache.get(cache_key)
|
|
568
|
+
if cached_result is not None:
|
|
569
|
+
query_time_ms = (time.time() - query_start_time) * 1000
|
|
570
|
+
cache.record_metric(cache_key, query_time_ms, cache_hit=True)
|
|
571
|
+
return cached_result # type: ignore[no-any-return]
|
|
572
|
+
|
|
573
|
+
exec_start = time.time()
|
|
574
|
+
|
|
575
|
+
# Query parent events (task delegations)
|
|
576
|
+
parent_query = """
|
|
577
|
+
SELECT event_id, agent_id, subagent_type, timestamp, status,
|
|
578
|
+
child_spike_count, output_summary
|
|
579
|
+
FROM agent_events
|
|
580
|
+
WHERE event_type = 'task_delegation'
|
|
581
|
+
"""
|
|
582
|
+
parent_params: list[Any] = []
|
|
583
|
+
|
|
584
|
+
if session_id:
|
|
585
|
+
parent_query += " AND session_id = ?"
|
|
586
|
+
parent_params.append(session_id)
|
|
587
|
+
|
|
588
|
+
parent_query += " ORDER BY timestamp DESC LIMIT ?"
|
|
589
|
+
parent_params.append(limit)
|
|
590
|
+
|
|
591
|
+
cursor = await db.execute(parent_query, parent_params)
|
|
592
|
+
parent_rows = await cursor.fetchall()
|
|
593
|
+
|
|
594
|
+
traces: list[dict[str, Any]] = []
|
|
595
|
+
|
|
596
|
+
for parent_row in parent_rows:
|
|
597
|
+
parent_event_id = parent_row[0]
|
|
598
|
+
agent_id = parent_row[1]
|
|
599
|
+
subagent_type = parent_row[2]
|
|
600
|
+
started_at = parent_row[3]
|
|
601
|
+
status = parent_row[4]
|
|
602
|
+
child_spike_count = parent_row[5] or 0
|
|
603
|
+
output_summary = parent_row[6]
|
|
604
|
+
|
|
605
|
+
# Parse output summary to get child spike IDs if available
|
|
606
|
+
child_spikes = []
|
|
607
|
+
try:
|
|
608
|
+
if output_summary:
|
|
609
|
+
output_data = (
|
|
610
|
+
json.loads(output_summary)
|
|
611
|
+
if isinstance(output_summary, str)
|
|
612
|
+
else output_summary
|
|
613
|
+
)
|
|
614
|
+
# Try to extract spike IDs if present
|
|
615
|
+
if isinstance(output_data, dict):
|
|
616
|
+
spikes_info = output_data.get("spikes_created", [])
|
|
617
|
+
if isinstance(spikes_info, list):
|
|
618
|
+
child_spikes = spikes_info
|
|
619
|
+
except Exception:
|
|
620
|
+
pass
|
|
621
|
+
|
|
622
|
+
# Query child events (subagent completion events)
|
|
623
|
+
child_query = """
|
|
624
|
+
SELECT event_id, agent_id, event_type, timestamp, status
|
|
625
|
+
FROM agent_events
|
|
626
|
+
WHERE parent_event_id = ?
|
|
627
|
+
ORDER BY timestamp ASC
|
|
628
|
+
"""
|
|
629
|
+
child_cursor = await db.execute(child_query, (parent_event_id,))
|
|
630
|
+
child_rows = await child_cursor.fetchall()
|
|
631
|
+
|
|
632
|
+
child_events = []
|
|
633
|
+
for child_row in child_rows:
|
|
634
|
+
child_events.append(
|
|
635
|
+
{
|
|
636
|
+
"event_id": child_row[0],
|
|
637
|
+
"agent_id": child_row[1],
|
|
638
|
+
"event_type": child_row[2],
|
|
639
|
+
"timestamp": child_row[3],
|
|
640
|
+
"status": child_row[4],
|
|
641
|
+
}
|
|
642
|
+
)
|
|
643
|
+
|
|
644
|
+
# Calculate duration if completed
|
|
645
|
+
duration_seconds = None
|
|
646
|
+
if status == "completed" and started_at:
|
|
647
|
+
try:
|
|
648
|
+
from datetime import datetime as dt
|
|
649
|
+
|
|
650
|
+
start_dt = dt.fromisoformat(started_at)
|
|
651
|
+
now_dt = dt.now()
|
|
652
|
+
duration_seconds = (now_dt - start_dt).total_seconds()
|
|
653
|
+
except Exception:
|
|
654
|
+
pass
|
|
655
|
+
|
|
656
|
+
trace = {
|
|
657
|
+
"parent_event_id": parent_event_id,
|
|
658
|
+
"agent_id": agent_id,
|
|
659
|
+
"subagent_type": subagent_type or "general-purpose",
|
|
660
|
+
"started_at": started_at,
|
|
661
|
+
"status": status,
|
|
662
|
+
"duration_seconds": duration_seconds,
|
|
663
|
+
"child_events": child_events,
|
|
664
|
+
"child_spike_count": child_spike_count,
|
|
665
|
+
"child_spikes": child_spikes,
|
|
666
|
+
}
|
|
667
|
+
|
|
668
|
+
traces.append(trace)
|
|
669
|
+
|
|
670
|
+
exec_time_ms = (time.time() - exec_start) * 1000
|
|
671
|
+
|
|
672
|
+
# Build response
|
|
673
|
+
result = {
|
|
674
|
+
"timestamp": datetime.now().isoformat(),
|
|
675
|
+
"total_traces": len(traces),
|
|
676
|
+
"traces": traces,
|
|
677
|
+
"limitations": {
|
|
678
|
+
"note": "Child spike count is approximate and based on timestamp proximity",
|
|
679
|
+
"note_2": "Spike IDs in child_spikes only available if recorded in output_summary",
|
|
680
|
+
},
|
|
681
|
+
}
|
|
682
|
+
|
|
683
|
+
# Cache the result
|
|
684
|
+
cache.set(cache_key, result)
|
|
685
|
+
query_time_ms = (time.time() - query_start_time) * 1000
|
|
686
|
+
cache.record_metric(cache_key, exec_time_ms, cache_hit=False)
|
|
687
|
+
logger.debug(
|
|
688
|
+
f"Cache MISS for event_traces (key={cache_key}, "
|
|
689
|
+
f"db_time={exec_time_ms:.2f}ms, total_time={query_time_ms:.2f}ms, "
|
|
690
|
+
f"traces={len(traces)})"
|
|
691
|
+
)
|
|
692
|
+
|
|
693
|
+
return result
|
|
694
|
+
|
|
695
|
+
finally:
|
|
696
|
+
await db.close()
|
|
697
|
+
|
|
698
|
+
# ========== COMPLETE ACTIVITY FEED ENDPOINT ==========
|
|
699
|
+
|
|
700
|
+
@app.get("/api/complete-activity-feed")
|
|
701
|
+
async def complete_activity_feed(
|
|
702
|
+
limit: int = 100,
|
|
703
|
+
session_id: str | None = None,
|
|
704
|
+
include_delegations: bool = True,
|
|
705
|
+
include_spikes: bool = True,
|
|
706
|
+
) -> dict[str, Any]:
|
|
707
|
+
"""
|
|
708
|
+
Get unified activity feed combining events from all sources.
|
|
709
|
+
|
|
710
|
+
This endpoint aggregates:
|
|
711
|
+
- Hook events (tool_call from PreToolUse)
|
|
712
|
+
- Subagent events (delegation completions from SubagentStop)
|
|
713
|
+
- SDK spike logs (knowledge created by delegated tasks)
|
|
714
|
+
|
|
715
|
+
This provides complete visibility into ALL activity, including
|
|
716
|
+
delegated work that would otherwise be invisible due to Claude Code's
|
|
717
|
+
hook isolation design (see GitHub issue #14859).
|
|
718
|
+
|
|
719
|
+
Args:
|
|
720
|
+
limit: Maximum number of events to return
|
|
721
|
+
session_id: Filter by session (optional)
|
|
722
|
+
include_delegations: Include delegation events (default True)
|
|
723
|
+
include_spikes: Include spike creation events (default True)
|
|
724
|
+
|
|
725
|
+
Returns:
|
|
726
|
+
Dict with events array and metadata
|
|
727
|
+
"""
|
|
728
|
+
db = await get_db()
|
|
729
|
+
cache = app.state.query_cache
|
|
730
|
+
query_start_time = time.time()
|
|
731
|
+
|
|
732
|
+
try:
|
|
733
|
+
# Create cache key
|
|
734
|
+
cache_key = f"complete_activity:{limit}:{session_id or 'all'}:{include_delegations}:{include_spikes}"
|
|
735
|
+
|
|
736
|
+
# Check cache first
|
|
737
|
+
cached_result = cache.get(cache_key)
|
|
738
|
+
if cached_result is not None:
|
|
739
|
+
query_time_ms = (time.time() - query_start_time) * 1000
|
|
740
|
+
cache.record_metric(cache_key, query_time_ms, cache_hit=True)
|
|
741
|
+
return cached_result # type: ignore[no-any-return]
|
|
742
|
+
|
|
743
|
+
events: list[dict[str, Any]] = []
|
|
744
|
+
|
|
745
|
+
# 1. Query hook events (tool_call, delegation from agent_events)
|
|
746
|
+
event_types = ["tool_call"]
|
|
747
|
+
if include_delegations:
|
|
748
|
+
event_types.extend(["delegation", "completion"])
|
|
749
|
+
|
|
750
|
+
event_type_placeholders = ",".join("?" for _ in event_types)
|
|
751
|
+
query = f"""
|
|
752
|
+
SELECT
|
|
753
|
+
'hook_event' as source,
|
|
754
|
+
event_id,
|
|
755
|
+
agent_id,
|
|
756
|
+
event_type,
|
|
757
|
+
timestamp,
|
|
758
|
+
tool_name,
|
|
759
|
+
input_summary,
|
|
760
|
+
output_summary,
|
|
761
|
+
session_id,
|
|
762
|
+
status
|
|
763
|
+
FROM agent_events
|
|
764
|
+
WHERE event_type IN ({event_type_placeholders})
|
|
765
|
+
"""
|
|
766
|
+
params: list[Any] = list(event_types)
|
|
767
|
+
|
|
768
|
+
if session_id:
|
|
769
|
+
query += " AND session_id = ?"
|
|
770
|
+
params.append(session_id)
|
|
771
|
+
|
|
772
|
+
query += " ORDER BY timestamp DESC LIMIT ?"
|
|
773
|
+
params.append(limit)
|
|
774
|
+
|
|
775
|
+
exec_start = time.time()
|
|
776
|
+
cursor = await db.execute(query, params)
|
|
777
|
+
rows = await cursor.fetchall()
|
|
778
|
+
|
|
779
|
+
for row in rows:
|
|
780
|
+
events.append(
|
|
781
|
+
{
|
|
782
|
+
"source": row[0],
|
|
783
|
+
"event_id": row[1],
|
|
784
|
+
"agent_id": row[2] or "unknown",
|
|
785
|
+
"event_type": row[3],
|
|
786
|
+
"timestamp": row[4],
|
|
787
|
+
"tool_name": row[5],
|
|
788
|
+
"input_summary": row[6],
|
|
789
|
+
"output_summary": row[7],
|
|
790
|
+
"session_id": row[8],
|
|
791
|
+
"status": row[9],
|
|
792
|
+
}
|
|
793
|
+
)
|
|
794
|
+
|
|
795
|
+
# 2. Query spike logs if requested (knowledge created by delegated tasks)
|
|
796
|
+
if include_spikes:
|
|
797
|
+
try:
|
|
798
|
+
spike_query = """
|
|
799
|
+
SELECT
|
|
800
|
+
'spike_log' as source,
|
|
801
|
+
id as event_id,
|
|
802
|
+
assigned_to as agent_id,
|
|
803
|
+
'knowledge_created' as event_type,
|
|
804
|
+
created_at as timestamp,
|
|
805
|
+
title as tool_name,
|
|
806
|
+
hypothesis as input_summary,
|
|
807
|
+
findings as output_summary,
|
|
808
|
+
NULL as session_id,
|
|
809
|
+
status
|
|
810
|
+
FROM features
|
|
811
|
+
WHERE type = 'spike'
|
|
812
|
+
"""
|
|
813
|
+
spike_params: list[Any] = []
|
|
814
|
+
|
|
815
|
+
spike_query += " ORDER BY created_at DESC LIMIT ?"
|
|
816
|
+
spike_params.append(limit)
|
|
817
|
+
|
|
818
|
+
spike_cursor = await db.execute(spike_query, spike_params)
|
|
819
|
+
spike_rows = await spike_cursor.fetchall()
|
|
820
|
+
|
|
821
|
+
for row in spike_rows:
|
|
822
|
+
events.append(
|
|
823
|
+
{
|
|
824
|
+
"source": row[0],
|
|
825
|
+
"event_id": row[1],
|
|
826
|
+
"agent_id": row[2] or "sdk",
|
|
827
|
+
"event_type": row[3],
|
|
828
|
+
"timestamp": row[4],
|
|
829
|
+
"tool_name": row[5],
|
|
830
|
+
"input_summary": row[6],
|
|
831
|
+
"output_summary": row[7],
|
|
832
|
+
"session_id": row[8],
|
|
833
|
+
"status": row[9] or "completed",
|
|
834
|
+
}
|
|
835
|
+
)
|
|
836
|
+
except Exception as e:
|
|
837
|
+
# Spike query might fail if columns don't exist
|
|
838
|
+
logger.debug(
|
|
839
|
+
f"Spike query failed (expected if schema differs): {e}"
|
|
840
|
+
)
|
|
841
|
+
|
|
842
|
+
# 3. Query delegation handoffs from agent_collaboration
|
|
843
|
+
if include_delegations:
|
|
844
|
+
try:
|
|
845
|
+
collab_query = """
|
|
846
|
+
SELECT
|
|
847
|
+
'delegation' as source,
|
|
848
|
+
handoff_id as event_id,
|
|
849
|
+
from_agent || ' -> ' || to_agent as agent_id,
|
|
850
|
+
'handoff' as event_type,
|
|
851
|
+
timestamp,
|
|
852
|
+
handoff_type as tool_name,
|
|
853
|
+
reason as input_summary,
|
|
854
|
+
context as output_summary,
|
|
855
|
+
session_id,
|
|
856
|
+
status
|
|
857
|
+
FROM agent_collaboration
|
|
858
|
+
WHERE handoff_type = 'delegation'
|
|
859
|
+
"""
|
|
860
|
+
collab_params: list[Any] = []
|
|
861
|
+
|
|
862
|
+
if session_id:
|
|
863
|
+
collab_query += " AND session_id = ?"
|
|
864
|
+
collab_params.append(session_id)
|
|
865
|
+
|
|
866
|
+
collab_query += " ORDER BY timestamp DESC LIMIT ?"
|
|
867
|
+
collab_params.append(limit)
|
|
868
|
+
|
|
869
|
+
collab_cursor = await db.execute(collab_query, collab_params)
|
|
870
|
+
collab_rows = await collab_cursor.fetchall()
|
|
871
|
+
|
|
872
|
+
for row in collab_rows:
|
|
873
|
+
events.append(
|
|
874
|
+
{
|
|
875
|
+
"source": row[0],
|
|
876
|
+
"event_id": row[1],
|
|
877
|
+
"agent_id": row[2] or "orchestrator",
|
|
878
|
+
"event_type": row[3],
|
|
879
|
+
"timestamp": row[4],
|
|
880
|
+
"tool_name": row[5],
|
|
881
|
+
"input_summary": row[6],
|
|
882
|
+
"output_summary": row[7],
|
|
883
|
+
"session_id": row[8],
|
|
884
|
+
"status": row[9] or "pending",
|
|
885
|
+
}
|
|
886
|
+
)
|
|
887
|
+
except Exception as e:
|
|
888
|
+
logger.debug(f"Collaboration query failed: {e}")
|
|
889
|
+
|
|
890
|
+
# Sort all events by timestamp DESC
|
|
891
|
+
events.sort(key=lambda e: e.get("timestamp", ""), reverse=True)
|
|
892
|
+
|
|
893
|
+
# Limit to requested count
|
|
894
|
+
events = events[:limit]
|
|
895
|
+
|
|
896
|
+
exec_time_ms = (time.time() - exec_start) * 1000
|
|
897
|
+
|
|
898
|
+
# Build response
|
|
899
|
+
result = {
|
|
900
|
+
"timestamp": datetime.now().isoformat(),
|
|
901
|
+
"total_events": len(events),
|
|
902
|
+
"sources": {
|
|
903
|
+
"hook_events": sum(
|
|
904
|
+
1 for e in events if e["source"] == "hook_event"
|
|
905
|
+
),
|
|
906
|
+
"spike_logs": sum(1 for e in events if e["source"] == "spike_log"),
|
|
907
|
+
"delegations": sum(
|
|
908
|
+
1 for e in events if e["source"] == "delegation"
|
|
909
|
+
),
|
|
910
|
+
},
|
|
911
|
+
"events": events,
|
|
912
|
+
"limitations": {
|
|
913
|
+
"note": "Subagent tool activity not tracked (Claude Code limitation)",
|
|
914
|
+
"github_issue": "https://github.com/anthropics/claude-code/issues/14859",
|
|
915
|
+
"workaround": "SubagentStop hook captures completion, SDK logging captures results",
|
|
916
|
+
},
|
|
917
|
+
}
|
|
918
|
+
|
|
919
|
+
# Cache the result
|
|
920
|
+
cache.set(cache_key, result)
|
|
921
|
+
query_time_ms = (time.time() - query_start_time) * 1000
|
|
922
|
+
cache.record_metric(cache_key, exec_time_ms, cache_hit=False)
|
|
923
|
+
|
|
924
|
+
return result
|
|
925
|
+
|
|
926
|
+
finally:
|
|
927
|
+
await db.close()
|
|
928
|
+
|
|
929
|
+
# ========== HELPER: Grouped Events Logic ==========
|
|
930
|
+
|
|
931
|
+
async def _get_events_grouped_by_prompt_impl(
|
|
932
|
+
db: aiosqlite.Connection, cache: QueryCache, limit: int = 50
|
|
933
|
+
) -> dict[str, Any]:
|
|
934
|
+
"""
|
|
935
|
+
Implementation helper: Return activity events grouped by user prompt (conversation turns).
|
|
936
|
+
|
|
937
|
+
Each conversation turn includes:
|
|
938
|
+
- userQuery: The original UserQuery event with prompt text
|
|
939
|
+
- children: All child events triggered by this prompt
|
|
940
|
+
- stats: Aggregated statistics for the conversation turn
|
|
941
|
+
|
|
942
|
+
Args:
|
|
943
|
+
db: Database connection
|
|
944
|
+
cache: Query cache instance
|
|
945
|
+
limit: Maximum number of conversation turns to return (default 50)
|
|
946
|
+
|
|
947
|
+
Returns:
|
|
948
|
+
Dictionary with conversation turns and metadata
|
|
949
|
+
"""
|
|
950
|
+
query_start_time = time.time()
|
|
951
|
+
|
|
952
|
+
try:
|
|
953
|
+
# Create cache key
|
|
954
|
+
cache_key = f"events_grouped_by_prompt:{limit}"
|
|
955
|
+
|
|
956
|
+
# Check cache first
|
|
957
|
+
cached_result = cache.get(cache_key)
|
|
958
|
+
if cached_result is not None:
|
|
959
|
+
query_time_ms = (time.time() - query_start_time) * 1000
|
|
960
|
+
cache.record_metric(cache_key, query_time_ms, cache_hit=True)
|
|
961
|
+
logger.debug(
|
|
962
|
+
f"Cache HIT for events_grouped_by_prompt (key={cache_key}, time={query_time_ms:.2f}ms)"
|
|
963
|
+
)
|
|
964
|
+
return cached_result # type: ignore[no-any-return]
|
|
965
|
+
|
|
966
|
+
exec_start = time.time()
|
|
967
|
+
|
|
968
|
+
# Step 1: Query UserQuery events (most recent first)
|
|
969
|
+
user_query_query = """
|
|
970
|
+
SELECT
|
|
971
|
+
event_id,
|
|
972
|
+
timestamp,
|
|
973
|
+
input_summary,
|
|
974
|
+
execution_duration_seconds,
|
|
975
|
+
status,
|
|
976
|
+
agent_id
|
|
977
|
+
FROM agent_events
|
|
978
|
+
WHERE tool_name = 'UserQuery'
|
|
979
|
+
ORDER BY timestamp DESC
|
|
980
|
+
LIMIT ?
|
|
981
|
+
"""
|
|
982
|
+
|
|
983
|
+
cursor = await db.execute(user_query_query, [limit])
|
|
984
|
+
user_query_rows = await cursor.fetchall()
|
|
985
|
+
|
|
986
|
+
conversation_turns: list[dict[str, Any]] = []
|
|
987
|
+
|
|
988
|
+
# Step 2: For each UserQuery, fetch child events
|
|
989
|
+
for uq_row in user_query_rows:
|
|
990
|
+
uq_event_id = uq_row[0]
|
|
991
|
+
uq_timestamp = uq_row[1]
|
|
992
|
+
uq_input = uq_row[2] or ""
|
|
993
|
+
uq_duration = uq_row[3] or 0.0
|
|
994
|
+
uq_status = uq_row[4]
|
|
995
|
+
|
|
996
|
+
# Extract prompt text from input_summary
|
|
997
|
+
# Since format_tool_summary now properly formats UserQuery events,
|
|
998
|
+
# input_summary contains just the prompt text (preview up to 100 chars)
|
|
999
|
+
prompt_text = uq_input
|
|
1000
|
+
|
|
1001
|
+
# Step 2a: Query child events linked via parent_event_id
|
|
1002
|
+
children_query = """
|
|
1003
|
+
SELECT
|
|
1004
|
+
event_id,
|
|
1005
|
+
tool_name,
|
|
1006
|
+
timestamp,
|
|
1007
|
+
input_summary,
|
|
1008
|
+
execution_duration_seconds,
|
|
1009
|
+
status,
|
|
1010
|
+
COALESCE(subagent_type, agent_id) as agent_id
|
|
1011
|
+
FROM agent_events
|
|
1012
|
+
WHERE parent_event_id = ?
|
|
1013
|
+
ORDER BY timestamp ASC
|
|
1014
|
+
"""
|
|
1015
|
+
|
|
1016
|
+
children_cursor = await db.execute(children_query, [uq_event_id])
|
|
1017
|
+
children_rows = await children_cursor.fetchall()
|
|
1018
|
+
|
|
1019
|
+
# Step 3: Build child events with proper formatting
|
|
1020
|
+
children: list[dict[str, Any]] = []
|
|
1021
|
+
total_duration = uq_duration
|
|
1022
|
+
success_count = (
|
|
1023
|
+
1 if uq_status == "recorded" or uq_status == "success" else 0
|
|
1024
|
+
)
|
|
1025
|
+
error_count = (
|
|
1026
|
+
0 if uq_status == "recorded" or uq_status == "success" else 1
|
|
1027
|
+
)
|
|
1028
|
+
|
|
1029
|
+
for child_row in children_rows:
|
|
1030
|
+
child_event_id = child_row[0]
|
|
1031
|
+
child_tool = child_row[1]
|
|
1032
|
+
child_timestamp = child_row[2]
|
|
1033
|
+
child_input = child_row[3] or ""
|
|
1034
|
+
child_duration = child_row[4] or 0.0
|
|
1035
|
+
child_status = child_row[5]
|
|
1036
|
+
child_agent = child_row[6] or "unknown"
|
|
1037
|
+
|
|
1038
|
+
# Build summary: "ToolName: description"
|
|
1039
|
+
summary = f"{child_tool}: {child_input[:60]}..."
|
|
1040
|
+
if len(child_input) <= 60:
|
|
1041
|
+
summary = f"{child_tool}: {child_input}"
|
|
1042
|
+
|
|
1043
|
+
children.append(
|
|
1044
|
+
{
|
|
1045
|
+
"event_id": child_event_id,
|
|
1046
|
+
"tool_name": child_tool,
|
|
1047
|
+
"timestamp": child_timestamp,
|
|
1048
|
+
"summary": summary,
|
|
1049
|
+
"duration_seconds": round(child_duration, 2),
|
|
1050
|
+
"agent": child_agent,
|
|
1051
|
+
}
|
|
1052
|
+
)
|
|
1053
|
+
|
|
1054
|
+
# Update stats
|
|
1055
|
+
total_duration += child_duration
|
|
1056
|
+
if child_status == "recorded" or child_status == "success":
|
|
1057
|
+
success_count += 1
|
|
1058
|
+
else:
|
|
1059
|
+
error_count += 1
|
|
1060
|
+
|
|
1061
|
+
# Step 4: Build conversation turn object
|
|
1062
|
+
conversation_turn = {
|
|
1063
|
+
"userQuery": {
|
|
1064
|
+
"event_id": uq_event_id,
|
|
1065
|
+
"timestamp": uq_timestamp,
|
|
1066
|
+
"prompt": prompt_text[:200], # Truncate for display
|
|
1067
|
+
"duration_seconds": round(uq_duration, 2),
|
|
1068
|
+
},
|
|
1069
|
+
"children": children,
|
|
1070
|
+
"stats": {
|
|
1071
|
+
"tool_count": len(children),
|
|
1072
|
+
"total_duration": round(total_duration, 2),
|
|
1073
|
+
"success_count": success_count,
|
|
1074
|
+
"error_count": error_count,
|
|
1075
|
+
},
|
|
1076
|
+
}
|
|
1077
|
+
|
|
1078
|
+
conversation_turns.append(conversation_turn)
|
|
1079
|
+
|
|
1080
|
+
exec_time_ms = (time.time() - exec_start) * 1000
|
|
1081
|
+
|
|
1082
|
+
# Build response
|
|
1083
|
+
result = {
|
|
1084
|
+
"timestamp": datetime.now().isoformat(),
|
|
1085
|
+
"total_turns": len(conversation_turns),
|
|
1086
|
+
"conversation_turns": conversation_turns,
|
|
1087
|
+
"note": "Groups events by UserQuery prompt (conversation turn). Child events are linked via parent_event_id.",
|
|
1088
|
+
}
|
|
1089
|
+
|
|
1090
|
+
# Cache the result
|
|
1091
|
+
cache.set(cache_key, result)
|
|
1092
|
+
query_time_ms = (time.time() - query_start_time) * 1000
|
|
1093
|
+
cache.record_metric(cache_key, exec_time_ms, cache_hit=False)
|
|
1094
|
+
logger.debug(
|
|
1095
|
+
f"Cache MISS for events_grouped_by_prompt (key={cache_key}, "
|
|
1096
|
+
f"db_time={exec_time_ms:.2f}ms, total_time={query_time_ms:.2f}ms, "
|
|
1097
|
+
f"turns={len(conversation_turns)})"
|
|
1098
|
+
)
|
|
1099
|
+
|
|
1100
|
+
return result
|
|
1101
|
+
|
|
1102
|
+
except Exception as e:
|
|
1103
|
+
logger.error(f"Error in _get_events_grouped_by_prompt_impl: {e}")
|
|
1104
|
+
raise
|
|
1105
|
+
|
|
1106
|
+
# ========== EVENTS GROUPED BY PROMPT ENDPOINT ==========
|
|
1107
|
+
|
|
1108
|
+
@app.get("/api/events-grouped-by-prompt")
|
|
1109
|
+
async def events_grouped_by_prompt(limit: int = 50) -> dict[str, Any]:
|
|
1110
|
+
"""
|
|
1111
|
+
Return activity events grouped by user prompt (conversation turns).
|
|
1112
|
+
|
|
1113
|
+
Each conversation turn includes:
|
|
1114
|
+
- userQuery: The original UserQuery event with prompt text
|
|
1115
|
+
- children: All child events triggered by this prompt
|
|
1116
|
+
- stats: Aggregated statistics for the conversation turn
|
|
1117
|
+
|
|
1118
|
+
Args:
|
|
1119
|
+
limit: Maximum number of conversation turns to return (default 50)
|
|
1120
|
+
|
|
1121
|
+
Returns:
|
|
1122
|
+
Dictionary with conversation_turns list and metadata
|
|
1123
|
+
"""
|
|
1124
|
+
db = await get_db()
|
|
1125
|
+
cache = app.state.query_cache
|
|
1126
|
+
|
|
1127
|
+
try:
|
|
1128
|
+
return await _get_events_grouped_by_prompt_impl(db, cache, limit)
|
|
1129
|
+
finally:
|
|
1130
|
+
await db.close()
|
|
1131
|
+
|
|
1132
|
+
# ========== SESSIONS API ENDPOINT ==========
|
|
1133
|
+
|
|
1134
|
+
@app.get("/api/sessions")
|
|
1135
|
+
async def get_sessions(
|
|
1136
|
+
status: str | None = None,
|
|
1137
|
+
limit: int = 50,
|
|
1138
|
+
offset: int = 0,
|
|
1139
|
+
) -> dict[str, Any]:
|
|
1140
|
+
"""Get sessions from the database.
|
|
1141
|
+
|
|
1142
|
+
Args:
|
|
1143
|
+
status: Filter by session status (e.g., 'active', 'completed')
|
|
1144
|
+
limit: Maximum number of sessions to return (default 50)
|
|
1145
|
+
offset: Number of sessions to skip (default 0)
|
|
1146
|
+
|
|
1147
|
+
Returns:
|
|
1148
|
+
{
|
|
1149
|
+
"total": int,
|
|
1150
|
+
"limit": int,
|
|
1151
|
+
"offset": int,
|
|
1152
|
+
"sessions": [
|
|
1153
|
+
{
|
|
1154
|
+
"session_id": str,
|
|
1155
|
+
"agent": str | None,
|
|
1156
|
+
"continued_from": str | None,
|
|
1157
|
+
"started_at": str,
|
|
1158
|
+
"status": str,
|
|
1159
|
+
"start_commit": str | None,
|
|
1160
|
+
"ended_at": str | None
|
|
1161
|
+
}
|
|
1162
|
+
]
|
|
1163
|
+
}
|
|
1164
|
+
"""
|
|
1165
|
+
db = await get_db()
|
|
1166
|
+
cache = app.state.query_cache
|
|
1167
|
+
query_start_time = time.time()
|
|
1168
|
+
|
|
1169
|
+
try:
|
|
1170
|
+
# Create cache key from query parameters
|
|
1171
|
+
cache_key = f"api_sessions:{status or 'all'}:{limit}:{offset}"
|
|
1172
|
+
|
|
1173
|
+
# Check cache first
|
|
1174
|
+
cached_result = cache.get(cache_key)
|
|
1175
|
+
if cached_result is not None:
|
|
1176
|
+
query_time_ms = (time.time() - query_start_time) * 1000
|
|
1177
|
+
cache.record_metric(cache_key, query_time_ms, cache_hit=True)
|
|
1178
|
+
logger.debug(
|
|
1179
|
+
f"Cache HIT for api_sessions (key={cache_key}, time={query_time_ms:.2f}ms)"
|
|
1180
|
+
)
|
|
1181
|
+
return cached_result # type: ignore[no-any-return]
|
|
1182
|
+
|
|
1183
|
+
exec_start = time.time()
|
|
1184
|
+
|
|
1185
|
+
# Build query with optional status filter
|
|
1186
|
+
# Note: Database uses agent_assigned but started_at/ended_at (partial migration)
|
|
1187
|
+
query = """
|
|
1188
|
+
SELECT
|
|
1189
|
+
session_id,
|
|
1190
|
+
agent_assigned,
|
|
1191
|
+
continued_from,
|
|
1192
|
+
started_at,
|
|
1193
|
+
status,
|
|
1194
|
+
start_commit,
|
|
1195
|
+
ended_at
|
|
1196
|
+
FROM sessions
|
|
1197
|
+
WHERE 1=1
|
|
1198
|
+
"""
|
|
1199
|
+
params: list[Any] = []
|
|
1200
|
+
|
|
1201
|
+
if status:
|
|
1202
|
+
query += " AND status = ?"
|
|
1203
|
+
params.append(status)
|
|
1204
|
+
|
|
1205
|
+
query += " ORDER BY started_at DESC LIMIT ? OFFSET ?"
|
|
1206
|
+
params.extend([limit, offset])
|
|
1207
|
+
|
|
1208
|
+
cursor = await db.execute(query, params)
|
|
1209
|
+
rows = await cursor.fetchall()
|
|
1210
|
+
|
|
1211
|
+
# Get total count for pagination
|
|
1212
|
+
count_query = "SELECT COUNT(*) FROM sessions WHERE 1=1"
|
|
1213
|
+
count_params: list[Any] = []
|
|
1214
|
+
if status:
|
|
1215
|
+
count_query += " AND status = ?"
|
|
1216
|
+
count_params.append(status)
|
|
1217
|
+
|
|
1218
|
+
count_cursor = await db.execute(count_query, count_params)
|
|
1219
|
+
count_row = await count_cursor.fetchone()
|
|
1220
|
+
total = int(count_row[0]) if count_row else 0
|
|
1221
|
+
|
|
1222
|
+
# Build session objects
|
|
1223
|
+
# Map schema columns to API response fields for backward compatibility
|
|
1224
|
+
sessions = []
|
|
1225
|
+
for row in rows:
|
|
1226
|
+
sessions.append(
|
|
1227
|
+
{
|
|
1228
|
+
"session_id": row[0],
|
|
1229
|
+
"agent": row[1], # agent_assigned -> agent for API compat
|
|
1230
|
+
"continued_from": row[2], # parent_session_id
|
|
1231
|
+
"started_at": row[3], # created_at -> started_at for API compat
|
|
1232
|
+
"status": row[4] or "unknown",
|
|
1233
|
+
"start_commit": row[5],
|
|
1234
|
+
"ended_at": row[6], # completed_at -> ended_at for API compat
|
|
1235
|
+
}
|
|
1236
|
+
)
|
|
1237
|
+
|
|
1238
|
+
exec_time_ms = (time.time() - exec_start) * 1000
|
|
1239
|
+
|
|
1240
|
+
result = {
|
|
1241
|
+
"total": total,
|
|
1242
|
+
"limit": limit,
|
|
1243
|
+
"offset": offset,
|
|
1244
|
+
"sessions": sessions,
|
|
1245
|
+
}
|
|
1246
|
+
|
|
1247
|
+
# Cache the result
|
|
1248
|
+
cache.set(cache_key, result)
|
|
1249
|
+
query_time_ms = (time.time() - query_start_time) * 1000
|
|
1250
|
+
cache.record_metric(cache_key, exec_time_ms, cache_hit=False)
|
|
1251
|
+
logger.debug(
|
|
1252
|
+
f"Cache MISS for api_sessions (key={cache_key}, "
|
|
1253
|
+
f"db_time={exec_time_ms:.2f}ms, total_time={query_time_ms:.2f}ms, "
|
|
1254
|
+
f"sessions={len(sessions)})"
|
|
1255
|
+
)
|
|
1256
|
+
|
|
1257
|
+
return result
|
|
1258
|
+
|
|
1259
|
+
finally:
|
|
1260
|
+
await db.close()
|
|
1261
|
+
|
|
1262
|
+
# ========== ORCHESTRATION ENDPOINTS ==========
|
|
1263
|
+
|
|
1264
|
+
@app.get("/views/orchestration", response_class=HTMLResponse)
|
|
1265
|
+
async def orchestration_view(request: Request) -> HTMLResponse:
|
|
1266
|
+
"""Get delegation chains and agent handoffs as HTMX partial."""
|
|
1267
|
+
db = await get_db()
|
|
1268
|
+
try:
|
|
1269
|
+
# Query delegation events from agent_events table
|
|
1270
|
+
# Use same query as API endpoint - filter by tool_name = 'Task'
|
|
1271
|
+
query = """
|
|
1272
|
+
SELECT
|
|
1273
|
+
event_id,
|
|
1274
|
+
agent_id as from_agent,
|
|
1275
|
+
subagent_type as to_agent,
|
|
1276
|
+
timestamp,
|
|
1277
|
+
input_summary,
|
|
1278
|
+
session_id,
|
|
1279
|
+
status
|
|
1280
|
+
FROM agent_events
|
|
1281
|
+
WHERE tool_name = 'Task'
|
|
1282
|
+
ORDER BY timestamp DESC
|
|
1283
|
+
LIMIT 50
|
|
1284
|
+
"""
|
|
1285
|
+
|
|
1286
|
+
cursor = await db.execute(query)
|
|
1287
|
+
rows = list(await cursor.fetchall())
|
|
1288
|
+
logger.debug(f"orchestration_view: Query executed, got {len(rows)} rows")
|
|
1289
|
+
|
|
1290
|
+
delegations = []
|
|
1291
|
+
for row in rows:
|
|
1292
|
+
from_agent = row[1] or "unknown"
|
|
1293
|
+
to_agent = row[2] # May be NULL
|
|
1294
|
+
task_summary = row[4] or ""
|
|
1295
|
+
|
|
1296
|
+
# Extract to_agent from input_summary JSON if NULL
|
|
1297
|
+
if not to_agent:
|
|
1298
|
+
try:
|
|
1299
|
+
input_data = json.loads(task_summary) if task_summary else {}
|
|
1300
|
+
to_agent = input_data.get("subagent_type", "unknown")
|
|
1301
|
+
except Exception:
|
|
1302
|
+
to_agent = "unknown"
|
|
1303
|
+
|
|
1304
|
+
delegation = {
|
|
1305
|
+
"event_id": row[0],
|
|
1306
|
+
"from_agent": from_agent,
|
|
1307
|
+
"to_agent": to_agent,
|
|
1308
|
+
"timestamp": row[3],
|
|
1309
|
+
"task": task_summary or "Unnamed task",
|
|
1310
|
+
"session_id": row[5],
|
|
1311
|
+
"status": row[6] or "pending",
|
|
1312
|
+
"result": "", # Not available in agent_events
|
|
1313
|
+
}
|
|
1314
|
+
delegations.append(delegation)
|
|
1315
|
+
|
|
1316
|
+
logger.debug(
|
|
1317
|
+
f"orchestration_view: Created {len(delegations)} delegation dicts"
|
|
1318
|
+
)
|
|
1319
|
+
|
|
1320
|
+
return templates.TemplateResponse(
|
|
1321
|
+
"partials/orchestration.html",
|
|
1322
|
+
{
|
|
1323
|
+
"request": request,
|
|
1324
|
+
"delegations": delegations,
|
|
1325
|
+
},
|
|
1326
|
+
)
|
|
1327
|
+
except Exception as e:
|
|
1328
|
+
logger.error(f"orchestration_view ERROR: {e}")
|
|
1329
|
+
raise
|
|
1330
|
+
finally:
|
|
1331
|
+
await db.close()
|
|
1332
|
+
|
|
1333
|
+
@app.get("/api/orchestration")
|
|
1334
|
+
async def orchestration_api() -> dict[str, Any]:
|
|
1335
|
+
"""Get delegation chains and agent coordination information as JSON.
|
|
1336
|
+
|
|
1337
|
+
Returns:
|
|
1338
|
+
{
|
|
1339
|
+
"delegation_count": int,
|
|
1340
|
+
"unique_agents": int,
|
|
1341
|
+
"agents": [str],
|
|
1342
|
+
"delegation_chains": {
|
|
1343
|
+
"from_agent": [
|
|
1344
|
+
{
|
|
1345
|
+
"to_agent": str,
|
|
1346
|
+
"event_type": str,
|
|
1347
|
+
"timestamp": str,
|
|
1348
|
+
"task": str,
|
|
1349
|
+
"status": str
|
|
1350
|
+
}
|
|
1351
|
+
]
|
|
1352
|
+
}
|
|
1353
|
+
}
|
|
1354
|
+
"""
|
|
1355
|
+
db = await get_db()
|
|
1356
|
+
try:
|
|
1357
|
+
# Query delegation events from agent_events table
|
|
1358
|
+
# Filter by tool_name = 'Task' (not event_type)
|
|
1359
|
+
query = """
|
|
1360
|
+
SELECT
|
|
1361
|
+
event_id,
|
|
1362
|
+
agent_id as from_agent,
|
|
1363
|
+
subagent_type as to_agent,
|
|
1364
|
+
timestamp,
|
|
1365
|
+
input_summary,
|
|
1366
|
+
status
|
|
1367
|
+
FROM agent_events
|
|
1368
|
+
WHERE tool_name = 'Task'
|
|
1369
|
+
ORDER BY timestamp DESC
|
|
1370
|
+
LIMIT 1000
|
|
1371
|
+
"""
|
|
1372
|
+
|
|
1373
|
+
cursor = await db.execute(query)
|
|
1374
|
+
rows = await cursor.fetchall()
|
|
1375
|
+
|
|
1376
|
+
# Build delegation chains grouped by from_agent
|
|
1377
|
+
delegation_chains: dict[str, list[dict[str, Any]]] = {}
|
|
1378
|
+
agents = set()
|
|
1379
|
+
delegation_count = 0
|
|
1380
|
+
|
|
1381
|
+
for row in rows:
|
|
1382
|
+
from_agent = row[1] or "unknown"
|
|
1383
|
+
to_agent = row[2] # May be NULL
|
|
1384
|
+
timestamp = row[3] or ""
|
|
1385
|
+
task_summary = row[4] or ""
|
|
1386
|
+
status = row[5] or "pending"
|
|
1387
|
+
|
|
1388
|
+
# Extract to_agent from input_summary JSON if NULL
|
|
1389
|
+
if not to_agent:
|
|
1390
|
+
try:
|
|
1391
|
+
import json
|
|
1392
|
+
|
|
1393
|
+
input_data = json.loads(task_summary) if task_summary else {}
|
|
1394
|
+
to_agent = input_data.get("subagent_type", "unknown")
|
|
1395
|
+
except Exception:
|
|
1396
|
+
to_agent = "unknown"
|
|
1397
|
+
|
|
1398
|
+
agents.add(from_agent)
|
|
1399
|
+
agents.add(to_agent)
|
|
1400
|
+
delegation_count += 1
|
|
1401
|
+
|
|
1402
|
+
if from_agent not in delegation_chains:
|
|
1403
|
+
delegation_chains[from_agent] = []
|
|
1404
|
+
|
|
1405
|
+
delegation_chains[from_agent].append(
|
|
1406
|
+
{
|
|
1407
|
+
"to_agent": to_agent,
|
|
1408
|
+
"event_type": "delegation",
|
|
1409
|
+
"timestamp": timestamp,
|
|
1410
|
+
"task": task_summary or "Unnamed task",
|
|
1411
|
+
"status": status,
|
|
1412
|
+
}
|
|
1413
|
+
)
|
|
1414
|
+
|
|
1415
|
+
return {
|
|
1416
|
+
"delegation_count": delegation_count,
|
|
1417
|
+
"unique_agents": len(agents),
|
|
1418
|
+
"agents": sorted(list(agents)),
|
|
1419
|
+
"delegation_chains": delegation_chains,
|
|
1420
|
+
}
|
|
1421
|
+
|
|
1422
|
+
except Exception as e:
|
|
1423
|
+
logger.error(f"Failed to get orchestration data: {e}")
|
|
1424
|
+
raise
|
|
1425
|
+
finally:
|
|
1426
|
+
await db.close()
|
|
1427
|
+
|
|
1428
|
+
@app.get("/api/orchestration/delegations")
|
|
1429
|
+
async def orchestration_delegations_api() -> dict[str, Any]:
|
|
1430
|
+
"""Get delegation statistics and chains as JSON.
|
|
1431
|
+
|
|
1432
|
+
This endpoint is used by the dashboard JavaScript to display
|
|
1433
|
+
delegation metrics in the orchestration panel.
|
|
1434
|
+
|
|
1435
|
+
Returns:
|
|
1436
|
+
{
|
|
1437
|
+
"delegation_count": int,
|
|
1438
|
+
"unique_agents": int,
|
|
1439
|
+
"delegation_chains": {
|
|
1440
|
+
"from_agent": [
|
|
1441
|
+
{
|
|
1442
|
+
"to_agent": str,
|
|
1443
|
+
"timestamp": str,
|
|
1444
|
+
"task": str,
|
|
1445
|
+
"status": str
|
|
1446
|
+
}
|
|
1447
|
+
]
|
|
1448
|
+
}
|
|
1449
|
+
}
|
|
1450
|
+
"""
|
|
1451
|
+
db = await get_db()
|
|
1452
|
+
cache = app.state.query_cache
|
|
1453
|
+
query_start_time = time.time()
|
|
1454
|
+
|
|
1455
|
+
try:
|
|
1456
|
+
# Create cache key
|
|
1457
|
+
cache_key = "orchestration_delegations:all"
|
|
1458
|
+
|
|
1459
|
+
# Check cache first
|
|
1460
|
+
cached_result = cache.get(cache_key)
|
|
1461
|
+
if cached_result is not None:
|
|
1462
|
+
query_time_ms = (time.time() - query_start_time) * 1000
|
|
1463
|
+
cache.record_metric(cache_key, query_time_ms, cache_hit=True)
|
|
1464
|
+
logger.debug(
|
|
1465
|
+
f"Cache HIT for orchestration_delegations (key={cache_key}, "
|
|
1466
|
+
f"time={query_time_ms:.2f}ms)"
|
|
1467
|
+
)
|
|
1468
|
+
return cached_result # type: ignore[no-any-return]
|
|
1469
|
+
|
|
1470
|
+
exec_start = time.time()
|
|
1471
|
+
|
|
1472
|
+
# Query delegation events from agent_events table
|
|
1473
|
+
# Filter by tool_name = 'Task' to get Task() delegations
|
|
1474
|
+
query = """
|
|
1475
|
+
SELECT
|
|
1476
|
+
event_id,
|
|
1477
|
+
agent_id as from_agent,
|
|
1478
|
+
subagent_type as to_agent,
|
|
1479
|
+
timestamp,
|
|
1480
|
+
input_summary,
|
|
1481
|
+
status
|
|
1482
|
+
FROM agent_events
|
|
1483
|
+
WHERE tool_name = 'Task'
|
|
1484
|
+
ORDER BY timestamp DESC
|
|
1485
|
+
LIMIT 1000
|
|
1486
|
+
"""
|
|
1487
|
+
|
|
1488
|
+
cursor = await db.execute(query)
|
|
1489
|
+
rows = await cursor.fetchall()
|
|
1490
|
+
|
|
1491
|
+
# Build delegation chains grouped by from_agent
|
|
1492
|
+
delegation_chains: dict[str, list[dict[str, Any]]] = {}
|
|
1493
|
+
agents = set()
|
|
1494
|
+
delegation_count = 0
|
|
1495
|
+
|
|
1496
|
+
for row in rows:
|
|
1497
|
+
from_agent = row[1] or "unknown"
|
|
1498
|
+
to_agent = row[2] # May be NULL
|
|
1499
|
+
timestamp = row[3] or ""
|
|
1500
|
+
task_summary = row[4] or ""
|
|
1501
|
+
status = row[5] or "pending"
|
|
1502
|
+
|
|
1503
|
+
# Extract to_agent from input_summary JSON if NULL
|
|
1504
|
+
if not to_agent:
|
|
1505
|
+
try:
|
|
1506
|
+
input_data = json.loads(task_summary) if task_summary else {}
|
|
1507
|
+
to_agent = input_data.get("subagent_type", "unknown")
|
|
1508
|
+
except Exception:
|
|
1509
|
+
to_agent = "unknown"
|
|
1510
|
+
|
|
1511
|
+
agents.add(from_agent)
|
|
1512
|
+
agents.add(to_agent)
|
|
1513
|
+
delegation_count += 1
|
|
1514
|
+
|
|
1515
|
+
if from_agent not in delegation_chains:
|
|
1516
|
+
delegation_chains[from_agent] = []
|
|
1517
|
+
|
|
1518
|
+
delegation_chains[from_agent].append(
|
|
1519
|
+
{
|
|
1520
|
+
"to_agent": to_agent,
|
|
1521
|
+
"timestamp": timestamp,
|
|
1522
|
+
"task": task_summary or "Unnamed task",
|
|
1523
|
+
"status": status,
|
|
1524
|
+
}
|
|
1525
|
+
)
|
|
1526
|
+
|
|
1527
|
+
exec_time_ms = (time.time() - exec_start) * 1000
|
|
1528
|
+
|
|
1529
|
+
result = {
|
|
1530
|
+
"delegation_count": delegation_count,
|
|
1531
|
+
"unique_agents": len(agents),
|
|
1532
|
+
"delegation_chains": delegation_chains,
|
|
1533
|
+
}
|
|
1534
|
+
|
|
1535
|
+
# Cache the result
|
|
1536
|
+
cache.set(cache_key, result)
|
|
1537
|
+
query_time_ms = (time.time() - query_start_time) * 1000
|
|
1538
|
+
cache.record_metric(cache_key, exec_time_ms, cache_hit=False)
|
|
1539
|
+
logger.debug(
|
|
1540
|
+
f"Cache MISS for orchestration_delegations (key={cache_key}, "
|
|
1541
|
+
f"db_time={exec_time_ms:.2f}ms, total_time={query_time_ms:.2f}ms, "
|
|
1542
|
+
f"delegations={delegation_count})"
|
|
1543
|
+
)
|
|
1544
|
+
|
|
1545
|
+
return result
|
|
1546
|
+
|
|
1547
|
+
except Exception as e:
|
|
1548
|
+
logger.error(f"Failed to get orchestration delegations: {e}")
|
|
1549
|
+
raise
|
|
1550
|
+
finally:
|
|
1551
|
+
await db.close()
|
|
1552
|
+
|
|
1553
|
+
# ========== FEATURES ENDPOINTS ==========
|
|
1554
|
+
|
|
1555
|
+
@app.get("/views/features", response_class=HTMLResponse)
|
|
1556
|
+
async def features_view(request: Request, status: str = "all") -> HTMLResponse:
|
|
1557
|
+
"""Get features by status as HTMX partial."""
|
|
1558
|
+
db = await get_db()
|
|
1559
|
+
cache = app.state.query_cache
|
|
1560
|
+
query_start_time = time.time()
|
|
1561
|
+
|
|
1562
|
+
try:
|
|
1563
|
+
# Create cache key from query parameters
|
|
1564
|
+
cache_key = f"features_view:{status}"
|
|
1565
|
+
|
|
1566
|
+
# Check cache first
|
|
1567
|
+
cached_response = cache.get(cache_key)
|
|
1568
|
+
features_by_status: dict = {
|
|
1569
|
+
"todo": [],
|
|
1570
|
+
"in_progress": [],
|
|
1571
|
+
"blocked": [],
|
|
1572
|
+
"done": [],
|
|
1573
|
+
}
|
|
1574
|
+
|
|
1575
|
+
if cached_response is not None:
|
|
1576
|
+
query_time_ms = (time.time() - query_start_time) * 1000
|
|
1577
|
+
cache.record_metric(cache_key, query_time_ms, cache_hit=True)
|
|
1578
|
+
logger.debug(
|
|
1579
|
+
f"Cache HIT for features_view (key={cache_key}, time={query_time_ms:.2f}ms)"
|
|
1580
|
+
)
|
|
1581
|
+
features_by_status = cached_response
|
|
1582
|
+
else:
|
|
1583
|
+
# OPTIMIZATION: Use composite index idx_features_status_priority
|
|
1584
|
+
# for efficient filtering and ordering
|
|
1585
|
+
query = """
|
|
1586
|
+
SELECT id, type, title, status, priority, assigned_to, created_at, updated_at
|
|
1587
|
+
FROM features
|
|
1588
|
+
WHERE 1=1
|
|
1589
|
+
"""
|
|
1590
|
+
params: list = []
|
|
1591
|
+
|
|
1592
|
+
if status != "all":
|
|
1593
|
+
query += " AND status = ?"
|
|
1594
|
+
params.append(status)
|
|
1595
|
+
|
|
1596
|
+
query += " ORDER BY priority DESC, created_at DESC LIMIT 100"
|
|
1597
|
+
|
|
1598
|
+
exec_start = time.time()
|
|
1599
|
+
cursor = await db.execute(query, params)
|
|
1600
|
+
rows = await cursor.fetchall()
|
|
1601
|
+
exec_time_ms = (time.time() - exec_start) * 1000
|
|
1602
|
+
|
|
1603
|
+
for row in rows:
|
|
1604
|
+
feature_status = row[3]
|
|
1605
|
+
features_by_status.setdefault(feature_status, []).append(
|
|
1606
|
+
{
|
|
1607
|
+
"id": row[0],
|
|
1608
|
+
"type": row[1],
|
|
1609
|
+
"title": row[2],
|
|
1610
|
+
"status": feature_status,
|
|
1611
|
+
"priority": row[4],
|
|
1612
|
+
"assigned_to": row[5],
|
|
1613
|
+
"created_at": row[6],
|
|
1614
|
+
"updated_at": row[7],
|
|
1615
|
+
}
|
|
1616
|
+
)
|
|
1617
|
+
|
|
1618
|
+
# Cache the results
|
|
1619
|
+
cache.set(cache_key, features_by_status)
|
|
1620
|
+
query_time_ms = (time.time() - query_start_time) * 1000
|
|
1621
|
+
cache.record_metric(cache_key, exec_time_ms, cache_hit=False)
|
|
1622
|
+
logger.debug(
|
|
1623
|
+
f"Cache MISS for features_view (key={cache_key}, "
|
|
1624
|
+
f"db_time={exec_time_ms:.2f}ms, total_time={query_time_ms:.2f}ms)"
|
|
1625
|
+
)
|
|
1626
|
+
|
|
1627
|
+
return templates.TemplateResponse(
|
|
1628
|
+
"partials/features.html",
|
|
1629
|
+
{
|
|
1630
|
+
"request": request,
|
|
1631
|
+
"features_by_status": features_by_status,
|
|
1632
|
+
},
|
|
1633
|
+
)
|
|
1634
|
+
finally:
|
|
1635
|
+
await db.close()
|
|
1636
|
+
|
|
1637
|
+
# ========== SPAWNERS ENDPOINTS ==========
|
|
1638
|
+
|
|
1639
|
+
@app.get("/views/spawners", response_class=HTMLResponse)
|
|
1640
|
+
async def spawners_view(request: Request) -> HTMLResponse:
|
|
1641
|
+
"""Get spawner activity dashboard as HTMX partial."""
|
|
1642
|
+
db = await get_db()
|
|
1643
|
+
try:
|
|
1644
|
+
# Get spawner statistics
|
|
1645
|
+
stats_response = await get_spawner_statistics()
|
|
1646
|
+
spawner_stats = stats_response.get("spawner_statistics", [])
|
|
1647
|
+
|
|
1648
|
+
# Get recent spawner activities
|
|
1649
|
+
activities_response = await get_spawner_activities(limit=50)
|
|
1650
|
+
recent_activities = activities_response.get("spawner_activities", [])
|
|
1651
|
+
|
|
1652
|
+
return templates.TemplateResponse(
|
|
1653
|
+
"partials/spawners.html",
|
|
1654
|
+
{
|
|
1655
|
+
"request": request,
|
|
1656
|
+
"spawner_stats": spawner_stats,
|
|
1657
|
+
"recent_activities": recent_activities,
|
|
1658
|
+
},
|
|
1659
|
+
)
|
|
1660
|
+
except Exception as e:
|
|
1661
|
+
logger.error(f"spawners_view ERROR: {e}")
|
|
1662
|
+
return templates.TemplateResponse(
|
|
1663
|
+
"partials/spawners.html",
|
|
1664
|
+
{
|
|
1665
|
+
"request": request,
|
|
1666
|
+
"spawner_stats": [],
|
|
1667
|
+
"recent_activities": [],
|
|
1668
|
+
},
|
|
1669
|
+
)
|
|
1670
|
+
finally:
|
|
1671
|
+
await db.close()
|
|
1672
|
+
|
|
1673
|
+
# ========== METRICS ENDPOINTS ==========
|
|
1674
|
+
|
|
1675
|
+
@app.get("/views/metrics", response_class=HTMLResponse)
|
|
1676
|
+
async def metrics_view(request: Request) -> HTMLResponse:
|
|
1677
|
+
"""Get session metrics and performance data as HTMX partial."""
|
|
1678
|
+
db = await get_db()
|
|
1679
|
+
cache = app.state.query_cache
|
|
1680
|
+
query_start_time = time.time()
|
|
1681
|
+
|
|
1682
|
+
try:
|
|
1683
|
+
# Create cache key for metrics view
|
|
1684
|
+
cache_key = "metrics_view:all"
|
|
1685
|
+
|
|
1686
|
+
# Check cache first
|
|
1687
|
+
cached_response = cache.get(cache_key)
|
|
1688
|
+
if cached_response is not None:
|
|
1689
|
+
query_time_ms = (time.time() - query_start_time) * 1000
|
|
1690
|
+
cache.record_metric(cache_key, query_time_ms, cache_hit=True)
|
|
1691
|
+
logger.debug(
|
|
1692
|
+
f"Cache HIT for metrics_view (key={cache_key}, time={query_time_ms:.2f}ms)"
|
|
1693
|
+
)
|
|
1694
|
+
sessions, stats = cached_response
|
|
1695
|
+
else:
|
|
1696
|
+
# OPTIMIZATION: Combine session data with event counts in single query
|
|
1697
|
+
# This eliminates N+1 query problem (was 20+ queries, now 2)
|
|
1698
|
+
# Note: Database uses agent_assigned but started_at/ended_at (partial migration)
|
|
1699
|
+
query = """
|
|
1700
|
+
SELECT
|
|
1701
|
+
s.session_id,
|
|
1702
|
+
s.agent_assigned,
|
|
1703
|
+
s.status,
|
|
1704
|
+
s.started_at,
|
|
1705
|
+
s.ended_at,
|
|
1706
|
+
COUNT(DISTINCT e.event_id) as event_count
|
|
1707
|
+
FROM sessions s
|
|
1708
|
+
LEFT JOIN agent_events e ON s.session_id = e.session_id
|
|
1709
|
+
GROUP BY s.session_id
|
|
1710
|
+
ORDER BY s.started_at DESC
|
|
1711
|
+
LIMIT 20
|
|
1712
|
+
"""
|
|
1713
|
+
|
|
1714
|
+
exec_start = time.time()
|
|
1715
|
+
cursor = await db.execute(query)
|
|
1716
|
+
rows = await cursor.fetchall()
|
|
1717
|
+
exec_time_ms = (time.time() - exec_start) * 1000
|
|
1718
|
+
|
|
1719
|
+
sessions = []
|
|
1720
|
+
for row in rows:
|
|
1721
|
+
started_at = datetime.fromisoformat(row[3])
|
|
1722
|
+
|
|
1723
|
+
# Calculate duration
|
|
1724
|
+
if row[4]:
|
|
1725
|
+
ended_at = datetime.fromisoformat(row[4])
|
|
1726
|
+
duration_seconds = (ended_at - started_at).total_seconds()
|
|
1727
|
+
else:
|
|
1728
|
+
duration_seconds = (datetime.now() - started_at).total_seconds()
|
|
1729
|
+
|
|
1730
|
+
sessions.append(
|
|
1731
|
+
{
|
|
1732
|
+
"session_id": row[0],
|
|
1733
|
+
"agent": row[1],
|
|
1734
|
+
"status": row[2],
|
|
1735
|
+
"started_at": row[3],
|
|
1736
|
+
"ended_at": row[4],
|
|
1737
|
+
"event_count": int(row[5]) if row[5] else 0,
|
|
1738
|
+
"duration_seconds": duration_seconds,
|
|
1739
|
+
}
|
|
1740
|
+
)
|
|
1741
|
+
|
|
1742
|
+
# OPTIMIZATION: Combine all stats in single query instead of subqueries
|
|
1743
|
+
# This reduces query count from 4 subqueries + 1 main to just 1
|
|
1744
|
+
stats_query = """
|
|
1745
|
+
SELECT
|
|
1746
|
+
(SELECT COUNT(*) FROM agent_events) as total_events,
|
|
1747
|
+
(SELECT COUNT(DISTINCT agent_id) FROM agent_events) as total_agents,
|
|
1748
|
+
(SELECT COUNT(*) FROM sessions) as total_sessions,
|
|
1749
|
+
(SELECT COUNT(*) FROM features) as total_features
|
|
1750
|
+
"""
|
|
1751
|
+
|
|
1752
|
+
stats_cursor = await db.execute(stats_query)
|
|
1753
|
+
stats_row = await stats_cursor.fetchone()
|
|
1754
|
+
|
|
1755
|
+
if stats_row:
|
|
1756
|
+
stats = {
|
|
1757
|
+
"total_events": int(stats_row[0]) if stats_row[0] else 0,
|
|
1758
|
+
"total_agents": int(stats_row[1]) if stats_row[1] else 0,
|
|
1759
|
+
"total_sessions": int(stats_row[2]) if stats_row[2] else 0,
|
|
1760
|
+
"total_features": int(stats_row[3]) if stats_row[3] else 0,
|
|
1761
|
+
}
|
|
1762
|
+
else:
|
|
1763
|
+
stats = {
|
|
1764
|
+
"total_events": 0,
|
|
1765
|
+
"total_agents": 0,
|
|
1766
|
+
"total_sessions": 0,
|
|
1767
|
+
"total_features": 0,
|
|
1768
|
+
}
|
|
1769
|
+
|
|
1770
|
+
# Cache the results
|
|
1771
|
+
cache_data = (sessions, stats)
|
|
1772
|
+
cache.set(cache_key, cache_data)
|
|
1773
|
+
query_time_ms = (time.time() - query_start_time) * 1000
|
|
1774
|
+
cache.record_metric(cache_key, exec_time_ms, cache_hit=False)
|
|
1775
|
+
logger.debug(
|
|
1776
|
+
f"Cache MISS for metrics_view (key={cache_key}, "
|
|
1777
|
+
f"db_time={exec_time_ms:.2f}ms, total_time={query_time_ms:.2f}ms)"
|
|
1778
|
+
)
|
|
1779
|
+
|
|
1780
|
+
# Provide default values for metrics template variables
|
|
1781
|
+
# These prevent Jinja2 UndefinedError for variables the template expects
|
|
1782
|
+
exec_time_dist = {
|
|
1783
|
+
"very_fast": 0,
|
|
1784
|
+
"fast": 0,
|
|
1785
|
+
"medium": 0,
|
|
1786
|
+
"slow": 0,
|
|
1787
|
+
"very_slow": 0,
|
|
1788
|
+
}
|
|
1789
|
+
|
|
1790
|
+
# Count active sessions from the fetched sessions
|
|
1791
|
+
active_sessions = sum(1 for s in sessions if s.get("status") == "active")
|
|
1792
|
+
|
|
1793
|
+
# Default token stats (empty until we compute real values)
|
|
1794
|
+
token_stats = {
|
|
1795
|
+
"total_tokens": 0,
|
|
1796
|
+
"avg_per_event": 0,
|
|
1797
|
+
"peak_usage": 0,
|
|
1798
|
+
"estimated_cost": 0.0,
|
|
1799
|
+
}
|
|
1800
|
+
|
|
1801
|
+
# Default activity timeline (last 24 hours with 0 counts)
|
|
1802
|
+
activity_timeline = {str(h): 0 for h in range(24)}
|
|
1803
|
+
max_hourly_count = 1 # Avoid division by zero in template
|
|
1804
|
+
|
|
1805
|
+
# Default agent performance (empty list)
|
|
1806
|
+
agent_performance: list[dict[str, str | float]] = []
|
|
1807
|
+
|
|
1808
|
+
# Default system health metrics
|
|
1809
|
+
error_rate = 0.0
|
|
1810
|
+
avg_response_time = 0.5 # seconds
|
|
1811
|
+
|
|
1812
|
+
return templates.TemplateResponse(
|
|
1813
|
+
"partials/metrics.html",
|
|
1814
|
+
{
|
|
1815
|
+
"request": request,
|
|
1816
|
+
"sessions": sessions,
|
|
1817
|
+
"stats": stats,
|
|
1818
|
+
"exec_time_dist": exec_time_dist,
|
|
1819
|
+
"active_sessions": active_sessions,
|
|
1820
|
+
"token_stats": token_stats,
|
|
1821
|
+
"activity_timeline": activity_timeline,
|
|
1822
|
+
"max_hourly_count": max_hourly_count,
|
|
1823
|
+
"agent_performance": agent_performance,
|
|
1824
|
+
"error_rate": error_rate,
|
|
1825
|
+
"avg_response_time": avg_response_time,
|
|
1826
|
+
},
|
|
1827
|
+
)
|
|
1828
|
+
finally:
|
|
1829
|
+
await db.close()
|
|
1830
|
+
|
|
1831
|
+
# ========== SPAWNER OBSERVABILITY ENDPOINTS ==========
|
|
1832
|
+
|
|
1833
|
+
@app.get("/api/spawner-activities")
|
|
1834
|
+
async def get_spawner_activities(
|
|
1835
|
+
spawner_type: str | None = None,
|
|
1836
|
+
session_id: str | None = None,
|
|
1837
|
+
limit: int = 100,
|
|
1838
|
+
offset: int = 0,
|
|
1839
|
+
) -> dict[str, Any]:
|
|
1840
|
+
"""
|
|
1841
|
+
Get spawner delegation activities with clear attribution.
|
|
1842
|
+
|
|
1843
|
+
Returns events where spawner_type IS NOT NULL, ordered by recency.
|
|
1844
|
+
Shows which orchestrator delegated to which spawned AI.
|
|
1845
|
+
|
|
1846
|
+
Args:
|
|
1847
|
+
spawner_type: Filter by spawner type (gemini, codex, copilot)
|
|
1848
|
+
session_id: Filter by session
|
|
1849
|
+
limit: Maximum results (default 100)
|
|
1850
|
+
offset: Result offset for pagination
|
|
1851
|
+
|
|
1852
|
+
Returns:
|
|
1853
|
+
Dict with spawner_activities array and metadata
|
|
1854
|
+
"""
|
|
1855
|
+
db = await get_db()
|
|
1856
|
+
cache = app.state.query_cache
|
|
1857
|
+
query_start_time = time.time()
|
|
1858
|
+
|
|
1859
|
+
try:
|
|
1860
|
+
# Create cache key
|
|
1861
|
+
cache_key = f"spawner_activities:{spawner_type or 'all'}:{session_id or 'all'}:{limit}:{offset}"
|
|
1862
|
+
|
|
1863
|
+
# Check cache first
|
|
1864
|
+
cached_result = cache.get(cache_key)
|
|
1865
|
+
if cached_result is not None:
|
|
1866
|
+
query_time_ms = (time.time() - query_start_time) * 1000
|
|
1867
|
+
cache.record_metric(cache_key, query_time_ms, cache_hit=True)
|
|
1868
|
+
return cached_result # type: ignore[no-any-return]
|
|
1869
|
+
|
|
1870
|
+
exec_start = time.time()
|
|
1871
|
+
|
|
1872
|
+
query = """
|
|
1873
|
+
SELECT
|
|
1874
|
+
event_id,
|
|
1875
|
+
agent_id AS orchestrator_agent,
|
|
1876
|
+
spawner_type,
|
|
1877
|
+
subagent_type AS spawned_agent,
|
|
1878
|
+
tool_name,
|
|
1879
|
+
input_summary AS task,
|
|
1880
|
+
output_summary AS result,
|
|
1881
|
+
status,
|
|
1882
|
+
execution_duration_seconds AS duration,
|
|
1883
|
+
cost_tokens AS tokens,
|
|
1884
|
+
cost_usd,
|
|
1885
|
+
child_spike_count AS artifacts,
|
|
1886
|
+
timestamp,
|
|
1887
|
+
created_at
|
|
1888
|
+
FROM agent_events
|
|
1889
|
+
WHERE spawner_type IS NOT NULL
|
|
1890
|
+
"""
|
|
1891
|
+
|
|
1892
|
+
params: list[Any] = []
|
|
1893
|
+
if spawner_type:
|
|
1894
|
+
query += " AND spawner_type = ?"
|
|
1895
|
+
params.append(spawner_type)
|
|
1896
|
+
if session_id:
|
|
1897
|
+
query += " AND session_id = ?"
|
|
1898
|
+
params.append(session_id)
|
|
1899
|
+
|
|
1900
|
+
query += " ORDER BY timestamp DESC LIMIT ? OFFSET ?"
|
|
1901
|
+
params.extend([limit, offset])
|
|
1902
|
+
|
|
1903
|
+
cursor = await db.execute(query, params)
|
|
1904
|
+
events = [
|
|
1905
|
+
dict(zip([c[0] for c in cursor.description], row))
|
|
1906
|
+
for row in await cursor.fetchall()
|
|
1907
|
+
]
|
|
1908
|
+
|
|
1909
|
+
# Get total count
|
|
1910
|
+
count_query = (
|
|
1911
|
+
"SELECT COUNT(*) FROM agent_events WHERE spawner_type IS NOT NULL"
|
|
1912
|
+
)
|
|
1913
|
+
count_params: list[Any] = []
|
|
1914
|
+
if spawner_type:
|
|
1915
|
+
count_query += " AND spawner_type = ?"
|
|
1916
|
+
count_params.append(spawner_type)
|
|
1917
|
+
if session_id:
|
|
1918
|
+
count_query += " AND session_id = ?"
|
|
1919
|
+
count_params.append(session_id)
|
|
1920
|
+
|
|
1921
|
+
count_cursor = await db.execute(count_query, count_params)
|
|
1922
|
+
count_row = await count_cursor.fetchone()
|
|
1923
|
+
total_count = int(count_row[0]) if count_row else 0
|
|
1924
|
+
|
|
1925
|
+
exec_time_ms = (time.time() - exec_start) * 1000
|
|
1926
|
+
|
|
1927
|
+
result = {
|
|
1928
|
+
"spawner_activities": events,
|
|
1929
|
+
"count": len(events),
|
|
1930
|
+
"total": total_count,
|
|
1931
|
+
"offset": offset,
|
|
1932
|
+
"limit": limit,
|
|
1933
|
+
}
|
|
1934
|
+
|
|
1935
|
+
# Cache the result
|
|
1936
|
+
cache.set(cache_key, result)
|
|
1937
|
+
query_time_ms = (time.time() - query_start_time) * 1000
|
|
1938
|
+
cache.record_metric(cache_key, exec_time_ms, cache_hit=False)
|
|
1939
|
+
logger.debug(
|
|
1940
|
+
f"Cache MISS for spawner_activities (key={cache_key}, "
|
|
1941
|
+
f"db_time={exec_time_ms:.2f}ms, total_time={query_time_ms:.2f}ms, "
|
|
1942
|
+
f"activities={len(events)})"
|
|
1943
|
+
)
|
|
1944
|
+
|
|
1945
|
+
return result
|
|
1946
|
+
finally:
|
|
1947
|
+
await db.close()
|
|
1948
|
+
|
|
1949
|
+
@app.get("/api/spawner-statistics")
|
|
1950
|
+
async def get_spawner_statistics(session_id: str | None = None) -> dict[str, Any]:
|
|
1951
|
+
"""
|
|
1952
|
+
Get aggregated statistics for each spawner type.
|
|
1953
|
+
|
|
1954
|
+
Shows delegations, success rate, average duration, token usage, and costs
|
|
1955
|
+
broken down by spawner type (Gemini, Codex, Copilot).
|
|
1956
|
+
|
|
1957
|
+
Args:
|
|
1958
|
+
session_id: Filter by session (optional)
|
|
1959
|
+
|
|
1960
|
+
Returns:
|
|
1961
|
+
Dict with spawner_statistics array
|
|
1962
|
+
"""
|
|
1963
|
+
db = await get_db()
|
|
1964
|
+
cache = app.state.query_cache
|
|
1965
|
+
query_start_time = time.time()
|
|
1966
|
+
|
|
1967
|
+
try:
|
|
1968
|
+
# Create cache key
|
|
1969
|
+
cache_key = f"spawner_statistics:{session_id or 'all'}"
|
|
1970
|
+
|
|
1971
|
+
# Check cache first
|
|
1972
|
+
cached_result = cache.get(cache_key)
|
|
1973
|
+
if cached_result is not None:
|
|
1974
|
+
query_time_ms = (time.time() - query_start_time) * 1000
|
|
1975
|
+
cache.record_metric(cache_key, query_time_ms, cache_hit=True)
|
|
1976
|
+
return cached_result # type: ignore[no-any-return]
|
|
1977
|
+
|
|
1978
|
+
exec_start = time.time()
|
|
1979
|
+
|
|
1980
|
+
query = """
|
|
1981
|
+
SELECT
|
|
1982
|
+
spawner_type,
|
|
1983
|
+
COUNT(*) as total_delegations,
|
|
1984
|
+
SUM(CASE WHEN status = 'completed' THEN 1 ELSE 0 END) as successful,
|
|
1985
|
+
ROUND(100.0 * SUM(CASE WHEN status = 'completed' THEN 1 ELSE 0 END) / COUNT(*), 1) as success_rate,
|
|
1986
|
+
ROUND(AVG(execution_duration_seconds), 2) as avg_duration,
|
|
1987
|
+
SUM(cost_tokens) as total_tokens,
|
|
1988
|
+
ROUND(SUM(cost_usd), 2) as total_cost,
|
|
1989
|
+
MIN(timestamp) as first_used,
|
|
1990
|
+
MAX(timestamp) as last_used
|
|
1991
|
+
FROM agent_events
|
|
1992
|
+
WHERE spawner_type IS NOT NULL
|
|
1993
|
+
"""
|
|
1994
|
+
|
|
1995
|
+
params: list[Any] = []
|
|
1996
|
+
if session_id:
|
|
1997
|
+
query += " AND session_id = ?"
|
|
1998
|
+
params.append(session_id)
|
|
1999
|
+
|
|
2000
|
+
query += " GROUP BY spawner_type ORDER BY total_delegations DESC"
|
|
2001
|
+
|
|
2002
|
+
cursor = await db.execute(query, params)
|
|
2003
|
+
stats = [
|
|
2004
|
+
dict(zip([c[0] for c in cursor.description], row))
|
|
2005
|
+
for row in await cursor.fetchall()
|
|
2006
|
+
]
|
|
2007
|
+
|
|
2008
|
+
exec_time_ms = (time.time() - exec_start) * 1000
|
|
2009
|
+
|
|
2010
|
+
result = {"spawner_statistics": stats}
|
|
2011
|
+
|
|
2012
|
+
# Cache the result
|
|
2013
|
+
cache.set(cache_key, result)
|
|
2014
|
+
query_time_ms = (time.time() - query_start_time) * 1000
|
|
2015
|
+
cache.record_metric(cache_key, exec_time_ms, cache_hit=False)
|
|
2016
|
+
logger.debug(
|
|
2017
|
+
f"Cache MISS for spawner_statistics (key={cache_key}, "
|
|
2018
|
+
f"db_time={exec_time_ms:.2f}ms, total_time={query_time_ms:.2f}ms)"
|
|
2019
|
+
)
|
|
2020
|
+
|
|
2021
|
+
return result
|
|
2022
|
+
finally:
|
|
2023
|
+
await db.close()
|
|
2024
|
+
|
|
2025
|
+
# ========== WEBSOCKET FOR REAL-TIME UPDATES ==========
|
|
2026
|
+
|
|
2027
|
+
@app.websocket("/ws/events")
|
|
2028
|
+
async def websocket_events(websocket: WebSocket) -> None:
|
|
2029
|
+
"""WebSocket endpoint for real-time event streaming.
|
|
2030
|
+
|
|
2031
|
+
OPTIMIZATION: Uses timestamp-based filtering to minimize data transfers.
|
|
2032
|
+
The timestamp > ? filter with DESC index makes queries O(log n) instead of O(n).
|
|
2033
|
+
|
|
2034
|
+
IMPORTANT: Initializes last_timestamp to current time to only stream NEW events.
|
|
2035
|
+
Historical events are already counted in /api/initial-stats, so streaming them
|
|
2036
|
+
again would cause double-counting in the header stats.
|
|
2037
|
+
"""
|
|
2038
|
+
await websocket.accept()
|
|
2039
|
+
# Initialize to current time - only stream events created AFTER connection
|
|
2040
|
+
# This prevents double-counting: initial-stats already includes historical events
|
|
2041
|
+
last_timestamp: str = datetime.now().isoformat()
|
|
2042
|
+
poll_interval = 0.5 # OPTIMIZATION: Adaptive polling (reduced from 1s)
|
|
2043
|
+
|
|
2044
|
+
try:
|
|
2045
|
+
while True:
|
|
2046
|
+
db = await get_db()
|
|
2047
|
+
try:
|
|
2048
|
+
# OPTIMIZATION: Only select needed columns, use DESC index
|
|
2049
|
+
# Pattern uses index: idx_agent_events_timestamp DESC
|
|
2050
|
+
# Only fetch events AFTER last_timestamp to stream new events only
|
|
2051
|
+
query = """
|
|
2052
|
+
SELECT event_id, agent_id, event_type, timestamp, tool_name,
|
|
2053
|
+
input_summary, output_summary, session_id, status
|
|
2054
|
+
FROM agent_events
|
|
2055
|
+
WHERE timestamp > ?
|
|
2056
|
+
ORDER BY timestamp ASC
|
|
2057
|
+
LIMIT 100
|
|
2058
|
+
"""
|
|
2059
|
+
|
|
2060
|
+
cursor = await db.execute(query, [last_timestamp])
|
|
2061
|
+
rows = await cursor.fetchall()
|
|
2062
|
+
|
|
2063
|
+
if rows:
|
|
2064
|
+
rows_list = [list(row) for row in rows]
|
|
2065
|
+
# Update last timestamp (last row since ORDER BY ts ASC)
|
|
2066
|
+
last_timestamp = rows_list[-1][3]
|
|
2067
|
+
|
|
2068
|
+
# Send events in order (no need to reverse with ASC)
|
|
2069
|
+
for row in rows_list:
|
|
2070
|
+
event_data = {
|
|
2071
|
+
"type": "event",
|
|
2072
|
+
"event_id": row[0],
|
|
2073
|
+
"agent_id": row[1] or "unknown",
|
|
2074
|
+
"event_type": row[2],
|
|
2075
|
+
"timestamp": row[3],
|
|
2076
|
+
"tool_name": row[4],
|
|
2077
|
+
"input_summary": row[5],
|
|
2078
|
+
"output_summary": row[6],
|
|
2079
|
+
"session_id": row[7],
|
|
2080
|
+
"status": row[8],
|
|
2081
|
+
"parent_event_id": None,
|
|
2082
|
+
"cost_tokens": 0,
|
|
2083
|
+
"execution_duration_seconds": 0.0,
|
|
2084
|
+
}
|
|
2085
|
+
await websocket.send_json(event_data)
|
|
2086
|
+
else:
|
|
2087
|
+
# No new events, increase poll interval (exponential backoff)
|
|
2088
|
+
poll_interval = min(poll_interval * 1.2, 2.0)
|
|
2089
|
+
finally:
|
|
2090
|
+
await db.close()
|
|
2091
|
+
|
|
2092
|
+
# OPTIMIZATION: Reduced sleep interval for faster real-time updates
|
|
2093
|
+
await asyncio.sleep(poll_interval)
|
|
2094
|
+
|
|
2095
|
+
except WebSocketDisconnect:
|
|
2096
|
+
logger.info("WebSocket client disconnected")
|
|
2097
|
+
except Exception as e:
|
|
2098
|
+
logger.error(f"WebSocket error: {e}")
|
|
2099
|
+
await websocket.close(code=1011)
|
|
2100
|
+
|
|
2101
|
+
return app
|
|
2102
|
+
|
|
2103
|
+
|
|
2104
|
+
# Create default app instance
|
|
2105
|
+
def create_app(db_path: str | None = None) -> FastAPI:
|
|
2106
|
+
"""Create FastAPI app with default database path."""
|
|
2107
|
+
if db_path is None:
|
|
2108
|
+
# Use default database location - htmlgraph.db is the unified database
|
|
2109
|
+
db_path = str(Path.home() / ".htmlgraph" / "htmlgraph.db")
|
|
2110
|
+
|
|
2111
|
+
return get_app(db_path)
|
|
2112
|
+
|
|
2113
|
+
|
|
2114
|
+
# Export for uvicorn
|
|
2115
|
+
app = create_app()
|