kailash 0.6.0__py3-none-any.whl → 0.6.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,822 @@
1
+ """Real-time monitoring dashboard for connection pools.
2
+
3
+ This module provides a web-based dashboard for monitoring connection pool
4
+ health, performance metrics, and alerts. It integrates with the metrics
5
+ collection system to provide real-time visualization.
6
+
7
+ Features:
8
+ - Real-time pool statistics with WebSocket updates
9
+ - Health score visualization with history
10
+ - Alert configuration and notifications
11
+ - Historical trend analysis with charts
12
+ - Export capabilities for reports
13
+
14
+ Example:
15
+ >>> dashboard = ConnectionDashboardNode(
16
+ ... name="pool_monitor",
17
+ ... port=8080,
18
+ ... update_interval=1.0,
19
+ ... retention_hours=24
20
+ ... )
21
+ >>>
22
+ >>> # Start dashboard server
23
+ >>> await dashboard.start()
24
+ >>>
25
+ >>> # Access at http://localhost:8080
26
+ """
27
+
28
+ import asyncio
29
+ import json
30
+ import logging
31
+ import time
32
+ from dataclasses import dataclass, field
33
+ from datetime import datetime, timedelta
34
+ from typing import Any, Dict, List, Optional, Set
35
+
36
+ import aiohttp_cors
37
+ from aiohttp import web
38
+
39
+ from kailash.nodes.base import Node, NodeParameter, register_node
40
+
41
+ logger = logging.getLogger(__name__)
42
+
43
+
44
+ @dataclass
45
+ class AlertRule:
46
+ """Alert rule configuration."""
47
+
48
+ id: str
49
+ name: str
50
+ condition: str # e.g., "pool_utilization > 0.9"
51
+ threshold: float
52
+ duration_seconds: int = 60 # How long condition must be true
53
+ cooldown_seconds: int = 300 # Prevent alert spam
54
+ severity: str = "warning" # info, warning, error, critical
55
+ enabled: bool = True
56
+ last_triggered: Optional[float] = None
57
+
58
+ def is_in_cooldown(self) -> bool:
59
+ """Check if alert is in cooldown period."""
60
+ if not self.last_triggered:
61
+ return False
62
+ return (time.time() - self.last_triggered) < self.cooldown_seconds
63
+
64
+
65
+ @dataclass
66
+ class Alert:
67
+ """Active alert instance."""
68
+
69
+ rule_id: str
70
+ triggered_at: float
71
+ severity: str
72
+ message: str
73
+ metric_value: float
74
+ resolved: bool = False
75
+ resolved_at: Optional[float] = None
76
+
77
+ def duration(self) -> float:
78
+ """Get alert duration in seconds."""
79
+ end_time = self.resolved_at or time.time()
80
+ return end_time - self.triggered_at
81
+
82
+
83
+ class MetricsCache:
84
+ """Cache for metrics data with time-based expiration."""
85
+
86
+ def __init__(self, retention_hours: int = 24):
87
+ """Initialize metrics cache."""
88
+ self.retention_hours = retention_hours
89
+ self._data: Dict[str, List[Dict[str, Any]]] = {}
90
+ self._last_cleanup = time.time()
91
+
92
+ def add(self, metric_name: str, value: Dict[str, Any]):
93
+ """Add metric value to cache."""
94
+ if metric_name not in self._data:
95
+ self._data[metric_name] = []
96
+
97
+ value["timestamp"] = time.time()
98
+ self._data[metric_name].append(value)
99
+
100
+ # Periodic cleanup
101
+ if time.time() - self._last_cleanup > 3600: # Every hour
102
+ self._cleanup()
103
+
104
+ def get_recent(self, metric_name: str, minutes: int = 60) -> List[Dict[str, Any]]:
105
+ """Get recent metric values."""
106
+ if metric_name not in self._data:
107
+ return []
108
+
109
+ cutoff = time.time() - (minutes * 60)
110
+ return [v for v in self._data[metric_name] if v["timestamp"] >= cutoff]
111
+
112
+ def _cleanup(self):
113
+ """Remove old data."""
114
+ cutoff = time.time() - (self.retention_hours * 3600)
115
+
116
+ for metric_name in list(self._data.keys()):
117
+ self._data[metric_name] = [
118
+ v for v in self._data[metric_name] if v["timestamp"] >= cutoff
119
+ ]
120
+
121
+ # Remove empty metrics
122
+ if not self._data[metric_name]:
123
+ del self._data[metric_name]
124
+
125
+ self._last_cleanup = time.time()
126
+
127
+
128
+ @register_node()
129
+ class ConnectionDashboardNode(Node):
130
+ """Web-based monitoring dashboard for connection pools.
131
+
132
+ Provides real-time visualization of connection pool metrics,
133
+ health scores, and alerts through a web interface.
134
+ """
135
+
136
+ def __init__(self, **config):
137
+ """Initialize dashboard node.
138
+
139
+ Args:
140
+ port: Web server port (default: 8080)
141
+ host: Web server host (default: localhost)
142
+ update_interval: Metric update interval in seconds (default: 1.0)
143
+ retention_hours: How long to keep historical data (default: 24)
144
+ enable_alerts: Enable alert system (default: True)
145
+ """
146
+ self.port = config.get("port", 8080)
147
+ self.host = config.get("host", "localhost")
148
+ self.update_interval = config.get("update_interval", 1.0)
149
+ self.retention_hours = config.get("retention_hours", 24)
150
+ self.enable_alerts = config.get("enable_alerts", True)
151
+
152
+ super().__init__(**config)
153
+
154
+ # Web server
155
+ self.app = None
156
+ self.runner = None
157
+ self.site = None
158
+
159
+ # WebSocket connections
160
+ self._websockets: Set[web.WebSocketResponse] = set()
161
+
162
+ # Metrics cache
163
+ self._cache = MetricsCache(self.retention_hours)
164
+
165
+ # Alert system
166
+ self._alert_rules: Dict[str, AlertRule] = {}
167
+ self._active_alerts: Dict[str, Alert] = {}
168
+ self._alert_history: List[Alert] = []
169
+
170
+ # Update task
171
+ self._update_task: Optional[asyncio.Task] = None
172
+
173
+ # Initialize default alert rules
174
+ self._init_default_alerts()
175
+
176
+ def get_parameters(self) -> Dict[str, NodeParameter]:
177
+ """Get node parameters."""
178
+ return {
179
+ "port": NodeParameter(
180
+ name="port", type=int, default=8080, description="Web server port"
181
+ ),
182
+ "host": NodeParameter(
183
+ name="host",
184
+ type=str,
185
+ default="localhost",
186
+ description="Web server host",
187
+ ),
188
+ "update_interval": NodeParameter(
189
+ name="update_interval",
190
+ type=float,
191
+ default=1.0,
192
+ description="Metric update interval in seconds",
193
+ ),
194
+ "retention_hours": NodeParameter(
195
+ name="retention_hours",
196
+ type=int,
197
+ default=24,
198
+ description="Historical data retention in hours",
199
+ ),
200
+ "enable_alerts": NodeParameter(
201
+ name="enable_alerts",
202
+ type=bool,
203
+ default=True,
204
+ description="Enable alert system",
205
+ ),
206
+ "action": NodeParameter(
207
+ name="action",
208
+ type=str,
209
+ required=False,
210
+ description="Dashboard action",
211
+ choices=["start", "stop", "status"],
212
+ ),
213
+ }
214
+
215
+ async def execute(self, input_data: Dict[str, Any]) -> Dict[str, Any]:
216
+ """Execute dashboard action.
217
+
218
+ Actions:
219
+ - start: Start the dashboard server
220
+ - stop: Stop the dashboard server
221
+ - status: Get dashboard status
222
+ """
223
+ action = input_data.get("action", "status")
224
+
225
+ if action == "start":
226
+ await self.start()
227
+ return {
228
+ "status": "started",
229
+ "url": f"http://{self.host}:{self.port}",
230
+ "websocket": f"ws://{self.host}:{self.port}/ws",
231
+ }
232
+
233
+ elif action == "stop":
234
+ await self.stop()
235
+ return {"status": "stopped"}
236
+
237
+ else:
238
+ return self.get_status()
239
+
240
+ async def start(self):
241
+ """Start the dashboard web server."""
242
+ if self.app:
243
+ logger.warning("Dashboard already running")
244
+ return
245
+
246
+ # Create web app
247
+ self.app = web.Application()
248
+
249
+ # Setup CORS
250
+ cors = aiohttp_cors.setup(
251
+ self.app,
252
+ defaults={
253
+ "*": aiohttp_cors.ResourceOptions(
254
+ allow_credentials=True, expose_headers="*", allow_headers="*"
255
+ )
256
+ },
257
+ )
258
+
259
+ # Add routes
260
+ self.app.router.add_get("/", self._handle_index)
261
+ self.app.router.add_get("/api/metrics", self._handle_metrics)
262
+ self.app.router.add_get("/api/pools", self._handle_pools)
263
+ self.app.router.add_get("/api/alerts", self._handle_alerts)
264
+ self.app.router.add_post("/api/alerts", self._handle_create_alert)
265
+ self.app.router.add_delete("/api/alerts/{alert_id}", self._handle_delete_alert)
266
+ self.app.router.add_get("/api/history/{metric_name}", self._handle_history)
267
+ self.app.router.add_get("/ws", self._handle_websocket)
268
+
269
+ # Configure CORS for all routes
270
+ for route in list(self.app.router.routes()):
271
+ cors.add(route)
272
+
273
+ # Start server
274
+ self.runner = web.AppRunner(self.app)
275
+ await self.runner.setup()
276
+ self.site = web.TCPSite(self.runner, self.host, self.port)
277
+ await self.site.start()
278
+
279
+ # Start update task
280
+ self._update_task = asyncio.create_task(self._update_loop())
281
+
282
+ logger.info(f"Dashboard started at http://{self.host}:{self.port}")
283
+
284
+ async def stop(self):
285
+ """Stop the dashboard web server."""
286
+ # Stop update task
287
+ if self._update_task:
288
+ self._update_task.cancel()
289
+ try:
290
+ await self._update_task
291
+ except asyncio.CancelledError:
292
+ pass
293
+
294
+ # Close WebSocket connections
295
+ for ws in list(self._websockets):
296
+ await ws.close()
297
+
298
+ # Stop web server
299
+ if self.site:
300
+ await self.site.stop()
301
+ if self.runner:
302
+ await self.runner.cleanup()
303
+
304
+ self.app = None
305
+ self.runner = None
306
+ self.site = None
307
+
308
+ logger.info("Dashboard stopped")
309
+
310
+ async def _handle_index(self, request: web.Request) -> web.Response:
311
+ """Serve dashboard HTML."""
312
+ html = """
313
+ <!DOCTYPE html>
314
+ <html>
315
+ <head>
316
+ <title>Connection Pool Dashboard</title>
317
+ <style>
318
+ body { font-family: Arial, sans-serif; margin: 20px; background: #f5f5f5; }
319
+ .container { max-width: 1200px; margin: 0 auto; }
320
+ .pool-card { background: white; padding: 20px; margin: 10px 0; border-radius: 8px; box-shadow: 0 2px 4px rgba(0,0,0,0.1); }
321
+ .metric { display: inline-block; margin: 10px 20px 10px 0; }
322
+ .metric-label { color: #666; font-size: 12px; }
323
+ .metric-value { font-size: 24px; font-weight: bold; }
324
+ .health-bar { width: 100%; height: 20px; background: #ddd; border-radius: 10px; overflow: hidden; }
325
+ .health-fill { height: 100%; transition: width 0.3s, background-color 0.3s; }
326
+ .health-good { background: #4caf50; }
327
+ .health-warning { background: #ff9800; }
328
+ .health-critical { background: #f44336; }
329
+ .alert { padding: 10px; margin: 5px 0; border-radius: 4px; }
330
+ .alert-warning { background: #fff3cd; border: 1px solid #ffeeba; }
331
+ .alert-error { background: #f8d7da; border: 1px solid #f5c6cb; }
332
+ .alert-critical { background: #d1ecf1; border: 1px solid #bee5eb; }
333
+ .chart { width: 100%; height: 200px; margin: 20px 0; }
334
+ </style>
335
+ <script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
336
+ </head>
337
+ <body>
338
+ <div class="container">
339
+ <h1>Connection Pool Dashboard</h1>
340
+
341
+ <div id="alerts"></div>
342
+
343
+ <h2>Connection Pools</h2>
344
+ <div id="pools"></div>
345
+
346
+ <h2>Metrics History</h2>
347
+ <canvas id="metricsChart" class="chart"></canvas>
348
+ </div>
349
+
350
+ <script>
351
+ // WebSocket connection
352
+ const ws = new WebSocket(`ws://${window.location.host}/ws`);
353
+
354
+ // Chart setup
355
+ const ctx = document.getElementById('metricsChart').getContext('2d');
356
+ const chart = new Chart(ctx, {
357
+ type: 'line',
358
+ data: {
359
+ labels: [],
360
+ datasets: [{
361
+ label: 'Pool Utilization',
362
+ data: [],
363
+ borderColor: 'rgb(75, 192, 192)',
364
+ tension: 0.1
365
+ }]
366
+ },
367
+ options: {
368
+ responsive: true,
369
+ scales: {
370
+ y: {
371
+ beginAtZero: true,
372
+ max: 1
373
+ }
374
+ }
375
+ }
376
+ });
377
+
378
+ // Update functions
379
+ function updatePools(pools) {
380
+ const container = document.getElementById('pools');
381
+ container.innerHTML = '';
382
+
383
+ for (const [name, pool] of Object.entries(pools)) {
384
+ const healthClass = pool.health_score > 80 ? 'health-good' :
385
+ pool.health_score > 60 ? 'health-warning' : 'health-critical';
386
+
387
+ container.innerHTML += `
388
+ <div class="pool-card">
389
+ <h3>${name}</h3>
390
+
391
+ <div class="health-bar">
392
+ <div class="health-fill ${healthClass}" style="width: ${pool.health_score}%"></div>
393
+ </div>
394
+
395
+ <div class="metric">
396
+ <div class="metric-label">Active Connections</div>
397
+ <div class="metric-value">${pool.active_connections}</div>
398
+ </div>
399
+
400
+ <div class="metric">
401
+ <div class="metric-label">Total Connections</div>
402
+ <div class="metric-value">${pool.total_connections}</div>
403
+ </div>
404
+
405
+ <div class="metric">
406
+ <div class="metric-label">Utilization</div>
407
+ <div class="metric-value">${(pool.utilization * 100).toFixed(1)}%</div>
408
+ </div>
409
+
410
+ <div class="metric">
411
+ <div class="metric-label">Queries/sec</div>
412
+ <div class="metric-value">${pool.queries_per_second.toFixed(1)}</div>
413
+ </div>
414
+
415
+ <div class="metric">
416
+ <div class="metric-label">Avg Query Time</div>
417
+ <div class="metric-value">${pool.avg_query_time_ms.toFixed(1)}ms</div>
418
+ </div>
419
+
420
+ <div class="metric">
421
+ <div class="metric-label">Error Rate</div>
422
+ <div class="metric-value">${(pool.error_rate * 100).toFixed(2)}%</div>
423
+ </div>
424
+ </div>
425
+ `;
426
+ }
427
+ }
428
+
429
+ function updateAlerts(alerts) {
430
+ const container = document.getElementById('alerts');
431
+ container.innerHTML = '';
432
+
433
+ if (alerts.length === 0) return;
434
+
435
+ container.innerHTML = '<h2>Active Alerts</h2>';
436
+
437
+ for (const alert of alerts) {
438
+ const alertClass = `alert-${alert.severity}`;
439
+ container.innerHTML += `
440
+ <div class="alert ${alertClass}">
441
+ <strong>${alert.message}</strong> -
442
+ ${new Date(alert.triggered_at * 1000).toLocaleTimeString()}
443
+ </div>
444
+ `;
445
+ }
446
+ }
447
+
448
+ function updateChart(data) {
449
+ // Update chart with latest data
450
+ if (data.timestamp && data.utilization !== undefined) {
451
+ const time = new Date(data.timestamp * 1000).toLocaleTimeString();
452
+
453
+ chart.data.labels.push(time);
454
+ chart.data.datasets[0].data.push(data.utilization);
455
+
456
+ // Keep only last 60 points
457
+ if (chart.data.labels.length > 60) {
458
+ chart.data.labels.shift();
459
+ chart.data.datasets[0].data.shift();
460
+ }
461
+
462
+ chart.update('none'); // No animation for real-time
463
+ }
464
+ }
465
+
466
+ // WebSocket handlers
467
+ ws.onmessage = (event) => {
468
+ const data = JSON.parse(event.data);
469
+
470
+ if (data.type === 'pools') {
471
+ updatePools(data.pools);
472
+ } else if (data.type === 'alerts') {
473
+ updateAlerts(data.alerts);
474
+ } else if (data.type === 'metrics') {
475
+ updateChart(data.data);
476
+ }
477
+ };
478
+
479
+ ws.onerror = (error) => {
480
+ console.error('WebSocket error:', error);
481
+ };
482
+
483
+ // Initial load
484
+ fetch('/api/pools').then(r => r.json()).then(data => updatePools(data));
485
+ fetch('/api/alerts').then(r => r.json()).then(data => updateAlerts(data.active));
486
+ </script>
487
+ </body>
488
+ </html>
489
+ """
490
+ return web.Response(text=html, content_type="text/html")
491
+
492
+ async def _handle_metrics(self, request: web.Request) -> web.Response:
493
+ """Get current metrics."""
494
+ metrics = await self._collect_metrics()
495
+ return web.json_response(metrics)
496
+
497
+ async def _handle_pools(self, request: web.Request) -> web.Response:
498
+ """Get pool information."""
499
+ pools = await self._get_pool_info()
500
+ return web.json_response(pools)
501
+
502
+ async def _handle_alerts(self, request: web.Request) -> web.Response:
503
+ """Get alerts."""
504
+ return web.json_response(
505
+ {
506
+ "active": [
507
+ {
508
+ "rule_id": alert.rule_id,
509
+ "triggered_at": alert.triggered_at,
510
+ "severity": alert.severity,
511
+ "message": alert.message,
512
+ "duration": alert.duration(),
513
+ }
514
+ for alert in self._active_alerts.values()
515
+ if not alert.resolved
516
+ ],
517
+ "rules": [
518
+ {
519
+ "id": rule.id,
520
+ "name": rule.name,
521
+ "condition": rule.condition,
522
+ "threshold": rule.threshold,
523
+ "severity": rule.severity,
524
+ "enabled": rule.enabled,
525
+ }
526
+ for rule in self._alert_rules.values()
527
+ ],
528
+ "history": [
529
+ {
530
+ "rule_id": alert.rule_id,
531
+ "triggered_at": alert.triggered_at,
532
+ "resolved_at": alert.resolved_at,
533
+ "severity": alert.severity,
534
+ "message": alert.message,
535
+ "duration": alert.duration(),
536
+ }
537
+ for alert in self._alert_history[-20:] # Last 20 alerts
538
+ ],
539
+ }
540
+ )
541
+
542
+ async def _handle_create_alert(self, request: web.Request) -> web.Response:
543
+ """Create new alert rule."""
544
+ data = await request.json()
545
+
546
+ rule = AlertRule(
547
+ id=f"rule_{len(self._alert_rules)}",
548
+ name=data["name"],
549
+ condition=data["condition"],
550
+ threshold=data["threshold"],
551
+ duration_seconds=data.get("duration_seconds", 60),
552
+ severity=data.get("severity", "warning"),
553
+ )
554
+
555
+ self._alert_rules[rule.id] = rule
556
+
557
+ return web.json_response({"id": rule.id})
558
+
559
+ async def _handle_delete_alert(self, request: web.Request) -> web.Response:
560
+ """Delete alert rule."""
561
+ alert_id = request.match_info["alert_id"]
562
+
563
+ if alert_id in self._alert_rules:
564
+ del self._alert_rules[alert_id]
565
+ return web.json_response({"deleted": True})
566
+
567
+ return web.json_response({"error": "Alert not found"}, status=404)
568
+
569
+ async def _handle_history(self, request: web.Request) -> web.Response:
570
+ """Get metric history."""
571
+ metric_name = request.match_info["metric_name"]
572
+ minutes = int(request.query.get("minutes", 60))
573
+
574
+ history = self._cache.get_recent(metric_name, minutes)
575
+
576
+ return web.json_response(history)
577
+
578
+ async def _handle_websocket(self, request: web.Request) -> web.WebSocketResponse:
579
+ """Handle WebSocket connection."""
580
+ ws = web.WebSocketResponse()
581
+ await ws.prepare(request)
582
+
583
+ self._websockets.add(ws)
584
+
585
+ try:
586
+ # Send initial data
587
+ pools = await self._get_pool_info()
588
+ await ws.send_json({"type": "pools", "pools": pools})
589
+
590
+ # Keep connection alive
591
+ async for msg in ws:
592
+ if msg.type == web.WSMsgType.TEXT:
593
+ # Handle client messages if needed
594
+ pass
595
+ elif msg.type == web.WSMsgType.ERROR:
596
+ logger.error(f"WebSocket error: {ws.exception()}")
597
+
598
+ finally:
599
+ self._websockets.discard(ws)
600
+
601
+ return ws
602
+
603
+ async def _update_loop(self):
604
+ """Periodically update metrics and check alerts."""
605
+ while True:
606
+ try:
607
+ # Collect metrics
608
+ metrics = await self._collect_metrics()
609
+ pools = await self._get_pool_info()
610
+
611
+ # Update cache
612
+ for pool_name, pool_data in pools.items():
613
+ self._cache.add(
614
+ f"{pool_name}_utilization",
615
+ {"value": pool_data["utilization"], "pool": pool_name},
616
+ )
617
+
618
+ # Check alerts
619
+ if self.enable_alerts:
620
+ await self._check_alerts(pools)
621
+
622
+ # Broadcast to WebSocket clients
623
+ await self._broadcast({"type": "pools", "pools": pools})
624
+
625
+ # Send sample metrics for chart
626
+ if pools:
627
+ first_pool = next(iter(pools.values()))
628
+ await self._broadcast(
629
+ {
630
+ "type": "metrics",
631
+ "data": {
632
+ "timestamp": time.time(),
633
+ "utilization": first_pool["utilization"],
634
+ },
635
+ }
636
+ )
637
+
638
+ # Send active alerts
639
+ active_alerts = [
640
+ {
641
+ "rule_id": alert.rule_id,
642
+ "triggered_at": alert.triggered_at,
643
+ "severity": alert.severity,
644
+ "message": alert.message,
645
+ }
646
+ for alert in self._active_alerts.values()
647
+ if not alert.resolved
648
+ ]
649
+
650
+ if active_alerts:
651
+ await self._broadcast({"type": "alerts", "alerts": active_alerts})
652
+
653
+ except Exception as e:
654
+ logger.error(f"Error in update loop: {e}")
655
+
656
+ await asyncio.sleep(self.update_interval)
657
+
658
+ async def _collect_metrics(self) -> Dict[str, Any]:
659
+ """Collect metrics from all pools."""
660
+ # This would integrate with the MetricsAggregator
661
+ # For now, return sample data
662
+ return {"timestamp": time.time(), "pools": await self._get_pool_info()}
663
+
664
+ async def _get_pool_info(self) -> Dict[str, Dict[str, Any]]:
665
+ """Get information about all pools."""
666
+ # This would get real data from connection pools
667
+ # For now, return sample data
668
+
669
+ # Try to get real pools from resource registry
670
+ pools_info = {}
671
+
672
+ if hasattr(self, "runtime") and hasattr(self.runtime, "resource_registry"):
673
+ resources = self.runtime.resource_registry.list_resources()
674
+
675
+ for name, resource in resources.items():
676
+ if hasattr(resource, "get_pool_statistics"):
677
+ try:
678
+ stats = await resource.get_pool_statistics()
679
+ pools_info[name] = {
680
+ "health_score": stats.get("health_score", 100),
681
+ "active_connections": stats.get("active_connections", 0),
682
+ "total_connections": stats.get("total_connections", 0),
683
+ "utilization": stats.get("utilization", 0.0),
684
+ "queries_per_second": stats.get("queries_per_second", 0.0),
685
+ "avg_query_time_ms": stats.get("avg_query_time_ms", 0.0),
686
+ "error_rate": stats.get("error_rate", 0.0),
687
+ }
688
+ except Exception as e:
689
+ logger.error(f"Error getting stats for pool {name}: {e}")
690
+
691
+ # If no real pools, return sample data
692
+ if not pools_info:
693
+ pools_info = {
694
+ "main_pool": {
695
+ "health_score": 85,
696
+ "active_connections": 8,
697
+ "total_connections": 10,
698
+ "utilization": 0.8,
699
+ "queries_per_second": 150.5,
700
+ "avg_query_time_ms": 12.3,
701
+ "error_rate": 0.002,
702
+ }
703
+ }
704
+
705
+ return pools_info
706
+
707
+ async def _check_alerts(self, pools: Dict[str, Dict[str, Any]]):
708
+ """Check alert conditions."""
709
+ for rule in self._alert_rules.values():
710
+ if not rule.enabled or rule.is_in_cooldown():
711
+ continue
712
+
713
+ # Simple condition evaluation (in production, use proper parser)
714
+ triggered = False
715
+ metric_value = 0.0
716
+
717
+ for pool_name, pool_data in pools.items():
718
+ if (
719
+ "utilization" in rule.condition
720
+ and pool_data["utilization"] > rule.threshold
721
+ ):
722
+ triggered = True
723
+ metric_value = pool_data["utilization"]
724
+ break
725
+ elif (
726
+ "error_rate" in rule.condition
727
+ and pool_data["error_rate"] > rule.threshold
728
+ ):
729
+ triggered = True
730
+ metric_value = pool_data["error_rate"]
731
+ break
732
+
733
+ # Check if alert should be triggered
734
+ alert_key = f"{rule.id}_{int(time.time() / rule.duration_seconds)}"
735
+
736
+ if triggered:
737
+ if alert_key not in self._active_alerts:
738
+ alert = Alert(
739
+ rule_id=rule.id,
740
+ triggered_at=time.time(),
741
+ severity=rule.severity,
742
+ message=f"{rule.name}: {rule.condition} (value: {metric_value:.2f})",
743
+ metric_value=metric_value,
744
+ )
745
+
746
+ self._active_alerts[alert_key] = alert
747
+ self._alert_history.append(alert)
748
+ rule.last_triggered = time.time()
749
+
750
+ logger.warning(f"Alert triggered: {alert.message}")
751
+ else:
752
+ # Resolve alert if condition no longer met
753
+ if alert_key in self._active_alerts:
754
+ alert = self._active_alerts[alert_key]
755
+ alert.resolved = True
756
+ alert.resolved_at = time.time()
757
+
758
+ logger.info(f"Alert resolved: {rule.name}")
759
+
760
+ async def _broadcast(self, data: Dict[str, Any]):
761
+ """Broadcast data to all WebSocket clients."""
762
+ if not self._websockets:
763
+ return
764
+
765
+ # Send to all connected clients
766
+ disconnected = set()
767
+
768
+ for ws in self._websockets:
769
+ try:
770
+ await ws.send_json(data)
771
+ except ConnectionResetError:
772
+ disconnected.add(ws)
773
+
774
+ # Remove disconnected clients
775
+ self._websockets -= disconnected
776
+
777
+ def _init_default_alerts(self):
778
+ """Initialize default alert rules."""
779
+ default_rules = [
780
+ AlertRule(
781
+ id="high_utilization",
782
+ name="High Pool Utilization",
783
+ condition="pool_utilization > 0.9",
784
+ threshold=0.9,
785
+ duration_seconds=60,
786
+ severity="warning",
787
+ ),
788
+ AlertRule(
789
+ id="high_error_rate",
790
+ name="High Error Rate",
791
+ condition="error_rate > 0.05",
792
+ threshold=0.05,
793
+ duration_seconds=30,
794
+ severity="error",
795
+ ),
796
+ AlertRule(
797
+ id="pool_exhausted",
798
+ name="Pool Exhausted",
799
+ condition="pool_utilization >= 1.0",
800
+ threshold=1.0,
801
+ duration_seconds=10,
802
+ severity="critical",
803
+ ),
804
+ ]
805
+
806
+ for rule in default_rules:
807
+ self._alert_rules[rule.id] = rule
808
+
809
+ def get_status(self) -> Dict[str, Any]:
810
+ """Get dashboard status."""
811
+ return {
812
+ "running": self.app is not None,
813
+ "url": f"http://{self.host}:{self.port}" if self.app else None,
814
+ "websocket_clients": len(self._websockets),
815
+ "active_alerts": len(
816
+ [a for a in self._active_alerts.values() if not a.resolved]
817
+ ),
818
+ "alert_rules": len(self._alert_rules),
819
+ "cached_metrics": len(self._cache._data),
820
+ "update_interval": self.update_interval,
821
+ "retention_hours": self.retention_hours,
822
+ }