kailash 0.6.0__py3-none-any.whl → 0.6.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. kailash/__init__.py +1 -1
  2. kailash/access_control/__init__.py +1 -1
  3. kailash/core/actors/adaptive_pool_controller.py +630 -0
  4. kailash/core/actors/connection_actor.py +3 -3
  5. kailash/core/ml/__init__.py +1 -0
  6. kailash/core/ml/query_patterns.py +544 -0
  7. kailash/core/monitoring/__init__.py +19 -0
  8. kailash/core/monitoring/connection_metrics.py +488 -0
  9. kailash/core/optimization/__init__.py +1 -0
  10. kailash/core/resilience/__init__.py +17 -0
  11. kailash/core/resilience/circuit_breaker.py +382 -0
  12. kailash/gateway/api.py +7 -5
  13. kailash/gateway/enhanced_gateway.py +1 -1
  14. kailash/middleware/auth/access_control.py +11 -11
  15. kailash/middleware/communication/ai_chat.py +7 -7
  16. kailash/middleware/communication/api_gateway.py +5 -15
  17. kailash/middleware/gateway/checkpoint_manager.py +45 -8
  18. kailash/middleware/gateway/event_store.py +66 -26
  19. kailash/middleware/mcp/enhanced_server.py +2 -2
  20. kailash/nodes/admin/permission_check.py +110 -30
  21. kailash/nodes/admin/schema.sql +387 -0
  22. kailash/nodes/admin/tenant_isolation.py +249 -0
  23. kailash/nodes/admin/transaction_utils.py +244 -0
  24. kailash/nodes/admin/user_management.py +37 -9
  25. kailash/nodes/ai/ai_providers.py +55 -3
  26. kailash/nodes/ai/llm_agent.py +115 -13
  27. kailash/nodes/data/query_pipeline.py +641 -0
  28. kailash/nodes/data/query_router.py +895 -0
  29. kailash/nodes/data/sql.py +24 -0
  30. kailash/nodes/data/workflow_connection_pool.py +451 -23
  31. kailash/nodes/monitoring/__init__.py +3 -5
  32. kailash/nodes/monitoring/connection_dashboard.py +822 -0
  33. kailash/nodes/rag/__init__.py +1 -3
  34. kailash/resources/registry.py +6 -0
  35. kailash/runtime/async_local.py +7 -0
  36. kailash/utils/export.py +152 -0
  37. kailash/workflow/builder.py +42 -0
  38. kailash/workflow/graph.py +86 -17
  39. kailash/workflow/templates.py +4 -9
  40. {kailash-0.6.0.dist-info → kailash-0.6.2.dist-info}/METADATA +14 -1
  41. {kailash-0.6.0.dist-info → kailash-0.6.2.dist-info}/RECORD +45 -31
  42. {kailash-0.6.0.dist-info → kailash-0.6.2.dist-info}/WHEEL +0 -0
  43. {kailash-0.6.0.dist-info → kailash-0.6.2.dist-info}/entry_points.txt +0 -0
  44. {kailash-0.6.0.dist-info → kailash-0.6.2.dist-info}/licenses/LICENSE +0 -0
  45. {kailash-0.6.0.dist-info → kailash-0.6.2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,488 @@
1
+ """Comprehensive metrics collection for connection management.
2
+
3
+ This module provides detailed metrics collection for database connections,
4
+ query execution, pool utilization, and health monitoring. It supports
5
+ multiple metric backends and provides real-time and historical analysis.
6
+
7
+ Features:
8
+ - Connection acquisition time tracking
9
+ - Query execution latency histograms
10
+ - Pool utilization monitoring
11
+ - Health check success rates
12
+ - Error categorization and analysis
13
+ - Export to Prometheus, StatsD, CloudWatch
14
+
15
+ Example:
16
+ >>> metrics = ConnectionMetricsCollector("production_pool")
17
+ >>>
18
+ >>> # Track connection acquisition
19
+ >>> with metrics.track_acquisition() as timer:
20
+ ... connection = await pool.acquire()
21
+ >>>
22
+ >>> # Track query execution
23
+ >>> with metrics.track_query("SELECT", "users") as timer:
24
+ ... result = await connection.execute(query)
25
+ """
26
+
27
+ import asyncio
28
+ import logging
29
+ import statistics
30
+ import time
31
+ from collections import defaultdict, deque
32
+ from contextlib import contextmanager
33
+ from dataclasses import dataclass, field
34
+ from datetime import datetime, timedelta
35
+ from enum import Enum
36
+ from typing import Any, ContextManager, Dict, List, Optional, Tuple
37
+
38
+ logger = logging.getLogger(__name__)
39
+
40
+
41
+ class MetricType(Enum):
42
+ """Types of metrics collected."""
43
+
44
+ COUNTER = "counter"
45
+ GAUGE = "gauge"
46
+ HISTOGRAM = "histogram"
47
+ TIMER = "timer"
48
+
49
+
50
+ class ErrorCategory(Enum):
51
+ """Categories of connection errors."""
52
+
53
+ CONNECTION_TIMEOUT = "connection_timeout"
54
+ CONNECTION_REFUSED = "connection_refused"
55
+ AUTHENTICATION_FAILED = "authentication_failed"
56
+ QUERY_TIMEOUT = "query_timeout"
57
+ QUERY_ERROR = "query_error"
58
+ POOL_EXHAUSTED = "pool_exhausted"
59
+ HEALTH_CHECK_FAILED = "health_check_failed"
60
+ UNKNOWN = "unknown"
61
+
62
+
63
+ @dataclass
64
+ class MetricPoint:
65
+ """Single metric data point."""
66
+
67
+ timestamp: float
68
+ value: float
69
+ labels: Dict[str, str] = field(default_factory=dict)
70
+
71
+
72
+ @dataclass
73
+ class HistogramData:
74
+ """Histogram data with percentiles."""
75
+
76
+ count: int
77
+ sum: float
78
+ min: float
79
+ max: float
80
+ p50: float
81
+ p75: float
82
+ p90: float
83
+ p95: float
84
+ p99: float
85
+
86
+ @classmethod
87
+ def from_values(cls, values: List[float]) -> "HistogramData":
88
+ """Create histogram data from values."""
89
+ if not values:
90
+ return cls(0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0)
91
+
92
+ sorted_values = sorted(values)
93
+ return cls(
94
+ count=len(values),
95
+ sum=sum(values),
96
+ min=sorted_values[0],
97
+ max=sorted_values[-1],
98
+ p50=cls._percentile(sorted_values, 0.50),
99
+ p75=cls._percentile(sorted_values, 0.75),
100
+ p90=cls._percentile(sorted_values, 0.90),
101
+ p95=cls._percentile(sorted_values, 0.95),
102
+ p99=cls._percentile(sorted_values, 0.99),
103
+ )
104
+
105
+ @staticmethod
106
+ def _percentile(sorted_values: List[float], percentile: float) -> float:
107
+ """Calculate percentile from sorted values."""
108
+ index = int(len(sorted_values) * percentile)
109
+ return sorted_values[min(index, len(sorted_values) - 1)]
110
+
111
+
112
+ class TimerContext:
113
+ """Context manager for timing operations."""
114
+
115
+ def __init__(self, callback):
116
+ self.callback = callback
117
+ self.start_time = None
118
+ self.duration = None
119
+
120
+ def __enter__(self):
121
+ self.start_time = time.time()
122
+ return self
123
+
124
+ def __exit__(self, exc_type, exc_val, exc_tb):
125
+ self.duration = (time.time() - self.start_time) * 1000 # Convert to ms
126
+ self.callback(self.duration)
127
+
128
+
129
+ class ConnectionMetricsCollector:
130
+ """Collects comprehensive metrics for connection management."""
131
+
132
+ def __init__(self, pool_name: str, retention_minutes: int = 60):
133
+ """Initialize metrics collector.
134
+
135
+ Args:
136
+ pool_name: Name of the connection pool
137
+ retention_minutes: How long to retain detailed metrics
138
+ """
139
+ self.pool_name = pool_name
140
+ self.retention_minutes = retention_minutes
141
+
142
+ # Counters
143
+ self._counters: Dict[str, int] = defaultdict(int)
144
+
145
+ # Gauges
146
+ self._gauges: Dict[str, float] = defaultdict(float)
147
+
148
+ # Histograms (using deques for sliding window)
149
+ self._histograms: Dict[str, deque] = defaultdict(
150
+ lambda: deque(maxlen=10000) # Keep last 10k samples
151
+ )
152
+
153
+ # Time series data
154
+ self._time_series: Dict[str, deque] = defaultdict(
155
+ lambda: deque(maxlen=retention_minutes * 60) # 1 sample per second
156
+ )
157
+
158
+ # Error tracking
159
+ self._errors: Dict[ErrorCategory, int] = defaultdict(int)
160
+ self._error_details: deque = deque(maxlen=1000)
161
+
162
+ # Query tracking
163
+ self._query_stats: Dict[str, Dict[str, Any]] = defaultdict(
164
+ lambda: {"count": 0, "total_time": 0.0, "errors": 0}
165
+ )
166
+
167
+ self._start_time = time.time()
168
+
169
+ # Connection metrics
170
+
171
+ def track_acquisition(self) -> TimerContext:
172
+ """Track connection acquisition time."""
173
+
174
+ def record(duration_ms: float):
175
+ self._histograms["connection_acquisition_ms"].append(duration_ms)
176
+ self._counters["connections_acquired"] += 1
177
+ self._record_time_series("acquisition_time_ms", duration_ms)
178
+
179
+ return TimerContext(record)
180
+
181
+ def track_release(self, reusable: bool = True):
182
+ """Track connection release."""
183
+ self._counters["connections_released"] += 1
184
+ if reusable:
185
+ self._counters["connections_reused"] += 1
186
+ else:
187
+ self._counters["connections_discarded"] += 1
188
+
189
+ def track_creation(self) -> TimerContext:
190
+ """Track new connection creation time."""
191
+
192
+ def record(duration_ms: float):
193
+ self._histograms["connection_creation_ms"].append(duration_ms)
194
+ self._counters["connections_created"] += 1
195
+
196
+ return TimerContext(record)
197
+
198
+ # Query metrics
199
+
200
+ def track_query(self, query_type: str, table: Optional[str] = None) -> TimerContext:
201
+ """Track query execution time.
202
+
203
+ Args:
204
+ query_type: Type of query (SELECT, INSERT, UPDATE, DELETE, etc.)
205
+ table: Optional table name
206
+ """
207
+ query_key = f"{query_type}:{table or 'unknown'}"
208
+
209
+ def record(duration_ms: float):
210
+ self._histograms["query_execution_ms"].append(duration_ms)
211
+ self._histograms[f"query_{query_type.lower()}_ms"].append(duration_ms)
212
+ self._counters[f"queries_{query_type.lower()}"] += 1
213
+ self._counters["queries_total"] += 1
214
+
215
+ # Update query stats
216
+ stats = self._query_stats[query_key]
217
+ stats["count"] += 1
218
+ stats["total_time"] += duration_ms
219
+
220
+ self._record_time_series("query_rate", 1.0)
221
+
222
+ return TimerContext(record)
223
+
224
+ def track_query_error(self, query_type: str, error: Exception):
225
+ """Track query execution error."""
226
+ self._counters["query_errors"] += 1
227
+ self._counters[f"query_errors_{query_type.lower()}"] += 1
228
+
229
+ error_category = self._categorize_error(error)
230
+ self._errors[error_category] += 1
231
+
232
+ # Store error details
233
+ self._error_details.append(
234
+ {
235
+ "timestamp": datetime.now().isoformat(),
236
+ "query_type": query_type,
237
+ "error_type": type(error).__name__,
238
+ "error_message": str(error),
239
+ "category": error_category.value,
240
+ }
241
+ )
242
+
243
+ # Pool metrics
244
+
245
+ def update_pool_stats(self, active: int, idle: int, total: int):
246
+ """Update pool utilization statistics."""
247
+ self._gauges["pool_connections_active"] = active
248
+ self._gauges["pool_connections_idle"] = idle
249
+ self._gauges["pool_connections_total"] = total
250
+ self._gauges["pool_utilization"] = active / total if total > 0 else 0.0
251
+
252
+ self._record_time_series("pool_active", active)
253
+ self._record_time_series("pool_utilization", self._gauges["pool_utilization"])
254
+
255
+ def track_pool_exhaustion(self):
256
+ """Track pool exhaustion event."""
257
+ self._counters["pool_exhaustion_events"] += 1
258
+ self._errors[ErrorCategory.POOL_EXHAUSTED] += 1
259
+
260
+ # Health metrics
261
+
262
+ def track_health_check(self, success: bool, duration_ms: float):
263
+ """Track health check result."""
264
+ self._counters["health_checks_total"] += 1
265
+ if success:
266
+ self._counters["health_checks_success"] += 1
267
+ else:
268
+ self._counters["health_checks_failed"] += 1
269
+ self._errors[ErrorCategory.HEALTH_CHECK_FAILED] += 1
270
+
271
+ self._histograms["health_check_duration_ms"].append(duration_ms)
272
+
273
+ # Calculate success rate
274
+ total = self._counters["health_checks_total"]
275
+ success_count = self._counters["health_checks_success"]
276
+ self._gauges["health_check_success_rate"] = (
277
+ success_count / total if total > 0 else 0.0
278
+ )
279
+
280
+ # Error categorization
281
+
282
+ def _categorize_error(self, error: Exception) -> ErrorCategory:
283
+ """Categorize error for tracking."""
284
+ error_msg = str(error).lower()
285
+ error_type = type(error).__name__.lower()
286
+
287
+ if "timeout" in error_msg:
288
+ if "connection" in error_msg:
289
+ return ErrorCategory.CONNECTION_TIMEOUT
290
+ else:
291
+ return ErrorCategory.QUERY_TIMEOUT
292
+ elif "refused" in error_msg or "unavailable" in error_msg:
293
+ return ErrorCategory.CONNECTION_REFUSED
294
+ elif "authentication" in error_msg or "password" in error_msg:
295
+ return ErrorCategory.AUTHENTICATION_FAILED
296
+ elif "pool" in error_msg and "exhausted" in error_msg:
297
+ return ErrorCategory.POOL_EXHAUSTED
298
+ elif "syntax" in error_msg or "column" in error_msg:
299
+ return ErrorCategory.QUERY_ERROR
300
+ else:
301
+ return ErrorCategory.UNKNOWN
302
+
303
+ # Time series recording
304
+
305
+ def _record_time_series(self, metric_name: str, value: float):
306
+ """Record time series data point."""
307
+ self._time_series[metric_name].append(
308
+ MetricPoint(
309
+ timestamp=time.time(), value=value, labels={"pool": self.pool_name}
310
+ )
311
+ )
312
+
313
+ # Metric retrieval
314
+
315
+ def get_histogram(self, metric_name: str) -> Optional[HistogramData]:
316
+ """Get histogram data for metric."""
317
+ values = list(self._histograms.get(metric_name, []))
318
+ if not values:
319
+ return None
320
+ return HistogramData.from_values(values)
321
+
322
+ def get_time_series(
323
+ self, metric_name: str, minutes: Optional[int] = None
324
+ ) -> List[MetricPoint]:
325
+ """Get time series data for metric."""
326
+ points = list(self._time_series.get(metric_name, []))
327
+
328
+ if minutes:
329
+ cutoff = time.time() - (minutes * 60)
330
+ points = [p for p in points if p.timestamp >= cutoff]
331
+
332
+ return points
333
+
334
+ def get_error_summary(self) -> Dict[str, Any]:
335
+ """Get error summary statistics."""
336
+ total_errors = sum(self._errors.values())
337
+ return {
338
+ "total_errors": total_errors,
339
+ "errors_by_category": {
340
+ category.value: count
341
+ for category, count in self._errors.items()
342
+ if count > 0
343
+ },
344
+ "error_rate": total_errors / max(1, self._counters["queries_total"]),
345
+ "recent_errors": list(self._error_details)[-10:], # Last 10 errors
346
+ }
347
+
348
+ def get_query_summary(self) -> Dict[str, Any]:
349
+ """Get query execution summary."""
350
+ summaries = {}
351
+
352
+ for query_key, stats in self._query_stats.items():
353
+ if stats["count"] == 0:
354
+ continue
355
+
356
+ avg_time = stats["total_time"] / stats["count"]
357
+ summaries[query_key] = {
358
+ "count": stats["count"],
359
+ "avg_time_ms": avg_time,
360
+ "total_time_ms": stats["total_time"],
361
+ "errors": stats["errors"],
362
+ "error_rate": stats["errors"] / stats["count"],
363
+ }
364
+
365
+ return summaries
366
+
367
+ def get_all_metrics(self) -> Dict[str, Any]:
368
+ """Get comprehensive metrics snapshot."""
369
+ uptime_seconds = time.time() - self._start_time
370
+
371
+ return {
372
+ "pool_name": self.pool_name,
373
+ "uptime_seconds": uptime_seconds,
374
+ "counters": dict(self._counters),
375
+ "gauges": dict(self._gauges),
376
+ "histograms": {
377
+ name: self.get_histogram(name).__dict__
378
+ for name in self._histograms
379
+ if self.get_histogram(name)
380
+ },
381
+ "errors": self.get_error_summary(),
382
+ "queries": self.get_query_summary(),
383
+ "rates": {
384
+ "queries_per_second": self._counters["queries_total"] / uptime_seconds,
385
+ "errors_per_second": sum(self._errors.values()) / uptime_seconds,
386
+ "connections_per_second": self._counters["connections_created"]
387
+ / uptime_seconds,
388
+ },
389
+ }
390
+
391
+ def export_prometheus(self) -> str:
392
+ """Export metrics in Prometheus format."""
393
+ lines = []
394
+
395
+ # Add metadata
396
+ lines.append("# HELP connection_pool_info Connection pool information")
397
+ lines.append("# TYPE connection_pool_info gauge")
398
+ lines.append(f'connection_pool_info{{pool="{self.pool_name}"}} 1')
399
+
400
+ # Export counters
401
+ for name, value in self._counters.items():
402
+ metric_name = f"connection_pool_{name}"
403
+ lines.append(f"# TYPE {metric_name} counter")
404
+ lines.append(f'{metric_name}{{pool="{self.pool_name}"}} {value}')
405
+
406
+ # Export gauges
407
+ for name, value in self._gauges.items():
408
+ metric_name = f"connection_pool_{name}"
409
+ lines.append(f"# TYPE {metric_name} gauge")
410
+ lines.append(f'{metric_name}{{pool="{self.pool_name}"}} {value}')
411
+
412
+ # Export histograms
413
+ for name, values in self._histograms.items():
414
+ if values:
415
+ hist = HistogramData.from_values(list(values))
416
+ metric_name = f"connection_pool_{name}"
417
+ lines.append(f"# TYPE {metric_name} histogram")
418
+ lines.append(
419
+ f'{metric_name}_count{{pool="{self.pool_name}"}} {hist.count}'
420
+ )
421
+ lines.append(f'{metric_name}_sum{{pool="{self.pool_name}"}} {hist.sum}')
422
+
423
+ for percentile, value in [
424
+ (0.5, hist.p50),
425
+ (0.75, hist.p75),
426
+ (0.9, hist.p90),
427
+ (0.95, hist.p95),
428
+ (0.99, hist.p99),
429
+ ]:
430
+ lines.append(
431
+ f'{metric_name}{{pool="{self.pool_name}",quantile="{percentile}"}} {value}'
432
+ )
433
+
434
+ return "\n".join(lines)
435
+
436
+ def reset(self):
437
+ """Reset all metrics (useful for testing)."""
438
+ self._counters.clear()
439
+ self._gauges.clear()
440
+ self._histograms.clear()
441
+ self._time_series.clear()
442
+ self._errors.clear()
443
+ self._error_details.clear()
444
+ self._query_stats.clear()
445
+ self._start_time = time.time()
446
+
447
+
448
+ class MetricsAggregator:
449
+ """Aggregates metrics from multiple collectors."""
450
+
451
+ def __init__(self):
452
+ """Initialize metrics aggregator."""
453
+ self._collectors: Dict[str, ConnectionMetricsCollector] = {}
454
+
455
+ def register_collector(self, collector: ConnectionMetricsCollector):
456
+ """Register a metrics collector."""
457
+ self._collectors[collector.pool_name] = collector
458
+
459
+ def get_global_metrics(self) -> Dict[str, Any]:
460
+ """Get aggregated metrics from all collectors."""
461
+ total_queries = 0
462
+ total_errors = 0
463
+ total_connections = 0
464
+ all_pool_metrics = {}
465
+
466
+ for name, collector in self._collectors.items():
467
+ metrics = collector.get_all_metrics()
468
+ all_pool_metrics[name] = metrics
469
+
470
+ total_queries += metrics["counters"].get("queries_total", 0)
471
+ total_errors += metrics["errors"]["total_errors"]
472
+ total_connections += metrics["gauges"].get("pool_connections_total", 0)
473
+
474
+ return {
475
+ "total_pools": len(self._collectors),
476
+ "total_queries": total_queries,
477
+ "total_errors": total_errors,
478
+ "total_connections": total_connections,
479
+ "global_error_rate": total_errors / max(1, total_queries),
480
+ "pools": all_pool_metrics,
481
+ }
482
+
483
+ def export_all_prometheus(self) -> str:
484
+ """Export all metrics in Prometheus format."""
485
+ outputs = []
486
+ for collector in self._collectors.values():
487
+ outputs.append(collector.export_prometheus())
488
+ return "\n\n".join(outputs)
@@ -0,0 +1 @@
1
+ """Optimization components for query and resource management."""
@@ -0,0 +1,17 @@
1
+ """Resilience patterns for connection management."""
2
+
3
+ from .circuit_breaker import (
4
+ CircuitBreakerConfig,
5
+ CircuitBreakerError,
6
+ CircuitBreakerManager,
7
+ CircuitState,
8
+ ConnectionCircuitBreaker,
9
+ )
10
+
11
+ __all__ = [
12
+ "CircuitBreakerConfig",
13
+ "CircuitBreakerError",
14
+ "CircuitBreakerManager",
15
+ "CircuitState",
16
+ "ConnectionCircuitBreaker",
17
+ ]