kailash 0.6.0__py3-none-any.whl → 0.6.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. kailash/__init__.py +1 -1
  2. kailash/access_control/__init__.py +1 -1
  3. kailash/core/actors/adaptive_pool_controller.py +630 -0
  4. kailash/core/actors/connection_actor.py +3 -3
  5. kailash/core/ml/__init__.py +1 -0
  6. kailash/core/ml/query_patterns.py +544 -0
  7. kailash/core/monitoring/__init__.py +19 -0
  8. kailash/core/monitoring/connection_metrics.py +488 -0
  9. kailash/core/optimization/__init__.py +1 -0
  10. kailash/core/resilience/__init__.py +17 -0
  11. kailash/core/resilience/circuit_breaker.py +382 -0
  12. kailash/gateway/api.py +7 -5
  13. kailash/gateway/enhanced_gateway.py +1 -1
  14. kailash/middleware/auth/access_control.py +11 -11
  15. kailash/middleware/communication/ai_chat.py +7 -7
  16. kailash/middleware/communication/api_gateway.py +5 -15
  17. kailash/middleware/gateway/checkpoint_manager.py +45 -8
  18. kailash/middleware/gateway/event_store.py +66 -26
  19. kailash/middleware/mcp/enhanced_server.py +2 -2
  20. kailash/nodes/admin/permission_check.py +110 -30
  21. kailash/nodes/admin/schema.sql +387 -0
  22. kailash/nodes/admin/tenant_isolation.py +249 -0
  23. kailash/nodes/admin/transaction_utils.py +244 -0
  24. kailash/nodes/admin/user_management.py +37 -9
  25. kailash/nodes/ai/ai_providers.py +55 -3
  26. kailash/nodes/ai/llm_agent.py +115 -13
  27. kailash/nodes/data/query_pipeline.py +641 -0
  28. kailash/nodes/data/query_router.py +895 -0
  29. kailash/nodes/data/sql.py +24 -0
  30. kailash/nodes/data/workflow_connection_pool.py +451 -23
  31. kailash/nodes/monitoring/__init__.py +3 -5
  32. kailash/nodes/monitoring/connection_dashboard.py +822 -0
  33. kailash/nodes/rag/__init__.py +1 -3
  34. kailash/resources/registry.py +6 -0
  35. kailash/runtime/async_local.py +7 -0
  36. kailash/utils/export.py +152 -0
  37. kailash/workflow/builder.py +42 -0
  38. kailash/workflow/graph.py +86 -17
  39. kailash/workflow/templates.py +4 -9
  40. {kailash-0.6.0.dist-info → kailash-0.6.2.dist-info}/METADATA +14 -1
  41. {kailash-0.6.0.dist-info → kailash-0.6.2.dist-info}/RECORD +45 -31
  42. {kailash-0.6.0.dist-info → kailash-0.6.2.dist-info}/WHEEL +0 -0
  43. {kailash-0.6.0.dist-info → kailash-0.6.2.dist-info}/entry_points.txt +0 -0
  44. {kailash-0.6.0.dist-info → kailash-0.6.2.dist-info}/licenses/LICENSE +0 -0
  45. {kailash-0.6.0.dist-info → kailash-0.6.2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,544 @@
1
+ """Query pattern learning and prediction for intelligent routing optimization.
2
+
3
+ This module implements pattern tracking and prediction to optimize query routing
4
+ and connection pre-warming based on historical execution patterns.
5
+ """
6
+
7
+ import asyncio
8
+ import logging
9
+ import time
10
+ from collections import defaultdict, deque
11
+ from dataclasses import dataclass
12
+ from datetime import datetime, timedelta
13
+ from typing import Any, Dict, List, Optional, Set, Tuple
14
+
15
+ import numpy as np
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ @dataclass
21
+ class QueryExecution:
22
+ """Record of a single query execution."""
23
+
24
+ fingerprint: str
25
+ timestamp: datetime
26
+ execution_time_ms: float
27
+ connection_id: str
28
+ parameters: Dict[str, Any]
29
+ success: bool
30
+ result_size: int
31
+
32
+
33
+ @dataclass
34
+ class QueryPattern:
35
+ """Identified pattern in query execution."""
36
+
37
+ fingerprint: str
38
+ frequency: float # Queries per minute
39
+ avg_execution_time: float
40
+ temporal_pattern: Optional[str] # e.g., "hourly", "daily", "weekly"
41
+ common_parameters: Dict[str, List[Any]]
42
+ typical_result_size: int
43
+ follows_queries: List[str] # Queries that often precede this one
44
+ followed_by_queries: List[str] # Queries that often follow this one
45
+
46
+
47
+ @dataclass
48
+ class PredictedQuery:
49
+ """Prediction of a future query."""
50
+
51
+ fingerprint: str
52
+ probability: float
53
+ expected_time: datetime
54
+ confidence: float
55
+ reason: str # Why this was predicted
56
+
57
+
58
+ class QueryPatternTracker:
59
+ """Tracks and analyzes query execution patterns."""
60
+
61
+ def __init__(
62
+ self, retention_hours: int = 168, min_pattern_frequency: int = 5 # 7 days
63
+ ):
64
+ self.retention_hours = retention_hours
65
+ self.min_pattern_frequency = min_pattern_frequency
66
+
67
+ # Storage
68
+ self.executions: deque = deque() # All executions in time order
69
+ self.execution_by_fingerprint: Dict[str, List[QueryExecution]] = defaultdict(
70
+ list
71
+ )
72
+ self.sequence_patterns: Dict[str, Dict[str, int]] = defaultdict(
73
+ lambda: defaultdict(int)
74
+ )
75
+
76
+ # Analysis cache
77
+ self.pattern_cache: Dict[str, QueryPattern] = {}
78
+ self.last_analysis_time = datetime.now()
79
+ self.analysis_interval = timedelta(minutes=5)
80
+
81
+ def record_execution(
82
+ self,
83
+ fingerprint: str,
84
+ execution_time_ms: float,
85
+ connection_id: str,
86
+ parameters: Optional[Dict[str, Any]] = None,
87
+ success: bool = True,
88
+ result_size: int = 0,
89
+ ):
90
+ """Record a query execution for pattern analysis."""
91
+ execution = QueryExecution(
92
+ fingerprint=fingerprint,
93
+ timestamp=datetime.now(),
94
+ execution_time_ms=execution_time_ms,
95
+ connection_id=connection_id,
96
+ parameters=parameters or {},
97
+ success=success,
98
+ result_size=result_size,
99
+ )
100
+
101
+ # Add to storage
102
+ self.executions.append(execution)
103
+ self.execution_by_fingerprint[fingerprint].append(execution)
104
+
105
+ # Update sequence patterns
106
+ if len(self.executions) > 1:
107
+ prev_execution = self.executions[-2]
108
+ self.sequence_patterns[prev_execution.fingerprint][fingerprint] += 1
109
+
110
+ # Clean old data
111
+ self._clean_old_data()
112
+
113
+ # Trigger analysis if needed
114
+ if datetime.now() - self.last_analysis_time > self.analysis_interval:
115
+ self._analyze_patterns()
116
+
117
+ def get_pattern(self, fingerprint: str) -> Optional[QueryPattern]:
118
+ """Get analyzed pattern for a query fingerprint."""
119
+ if fingerprint in self.pattern_cache:
120
+ return self.pattern_cache[fingerprint]
121
+
122
+ # Try to analyze just this fingerprint
123
+ pattern = self._analyze_single_pattern(fingerprint)
124
+ if pattern:
125
+ self.pattern_cache[fingerprint] = pattern
126
+
127
+ return pattern
128
+
129
+ def predict_next_queries(
130
+ self, current_fingerprint: str, time_window_minutes: int = 5
131
+ ) -> List[PredictedQuery]:
132
+ """Predict queries likely to follow the current one."""
133
+ predictions = []
134
+
135
+ # Get sequence patterns
136
+ if current_fingerprint in self.sequence_patterns:
137
+ followers = self.sequence_patterns[current_fingerprint]
138
+ total_occurrences = sum(followers.values())
139
+
140
+ for next_fingerprint, count in followers.items():
141
+ if count >= 2: # Minimum threshold
142
+ probability = count / total_occurrences
143
+
144
+ # Get timing information
145
+ avg_delay = self._calculate_average_delay(
146
+ current_fingerprint, next_fingerprint
147
+ )
148
+
149
+ predictions.append(
150
+ PredictedQuery(
151
+ fingerprint=next_fingerprint,
152
+ probability=probability,
153
+ expected_time=datetime.now() + avg_delay,
154
+ confidence=min(0.9, probability * 2), # Scale confidence
155
+ reason=f"Follows {current_fingerprint} in {count}/{total_occurrences} cases",
156
+ )
157
+ )
158
+
159
+ # Add temporal predictions
160
+ temporal_predictions = self._predict_temporal_queries(time_window_minutes)
161
+ predictions.extend(temporal_predictions)
162
+
163
+ # Sort by probability
164
+ predictions.sort(key=lambda p: p.probability, reverse=True)
165
+
166
+ return predictions[:10] # Top 10 predictions
167
+
168
+ def get_workload_forecast(self, horizon_minutes: int = 60) -> Dict[str, Any]:
169
+ """Forecast workload for the specified time horizon."""
170
+ now = datetime.now()
171
+ forecast_end = now + timedelta(minutes=horizon_minutes)
172
+
173
+ # Analyze historical patterns for this time period
174
+ historical_load = self._analyze_historical_load(
175
+ now.time(), forecast_end.time(), now.weekday()
176
+ )
177
+
178
+ # Calculate expected queries
179
+ expected_queries = []
180
+ for pattern in self.pattern_cache.values():
181
+ if pattern.temporal_pattern:
182
+ expected_count = self._estimate_query_count(pattern, horizon_minutes)
183
+ if expected_count > 0:
184
+ expected_queries.append(
185
+ {
186
+ "fingerprint": pattern.fingerprint,
187
+ "expected_count": expected_count,
188
+ "avg_execution_time": pattern.avg_execution_time,
189
+ }
190
+ )
191
+
192
+ return {
193
+ "horizon_minutes": horizon_minutes,
194
+ "historical_qps": historical_load.get("avg_qps", 0),
195
+ "expected_total_queries": sum(
196
+ q["expected_count"] for q in expected_queries
197
+ ),
198
+ "expected_queries": expected_queries,
199
+ "peak_load_probability": historical_load.get("peak_probability", 0),
200
+ "recommended_pool_size": self._calculate_recommended_pool_size(
201
+ expected_queries, historical_load
202
+ ),
203
+ }
204
+
205
+ def _clean_old_data(self):
206
+ """Remove data older than retention period."""
207
+ cutoff_time = datetime.now() - timedelta(hours=self.retention_hours)
208
+
209
+ # Clean executions deque
210
+ while self.executions and self.executions[0].timestamp < cutoff_time:
211
+ old_execution = self.executions.popleft()
212
+
213
+ # Remove from fingerprint index
214
+ fingerprint_list = self.execution_by_fingerprint[old_execution.fingerprint]
215
+ fingerprint_list.remove(old_execution)
216
+ if not fingerprint_list:
217
+ del self.execution_by_fingerprint[old_execution.fingerprint]
218
+
219
+ def _analyze_patterns(self):
220
+ """Analyze all patterns in the data."""
221
+ self.pattern_cache.clear()
222
+
223
+ for fingerprint, executions in self.execution_by_fingerprint.items():
224
+ if len(executions) >= self.min_pattern_frequency:
225
+ pattern = self._analyze_single_pattern(fingerprint)
226
+ if pattern:
227
+ self.pattern_cache[fingerprint] = pattern
228
+
229
+ self.last_analysis_time = datetime.now()
230
+
231
+ def _analyze_single_pattern(self, fingerprint: str) -> Optional[QueryPattern]:
232
+ """Analyze pattern for a single query fingerprint."""
233
+ executions = self.execution_by_fingerprint.get(fingerprint, [])
234
+
235
+ if len(executions) < self.min_pattern_frequency:
236
+ return None
237
+
238
+ # Calculate basic statistics
239
+ execution_times = [e.execution_time_ms for e in executions if e.success]
240
+ if not execution_times:
241
+ return None
242
+
243
+ avg_execution_time = np.mean(execution_times)
244
+
245
+ # Calculate frequency (queries per minute)
246
+ time_span = (
247
+ executions[-1].timestamp - executions[0].timestamp
248
+ ).total_seconds() / 60
249
+ frequency = len(executions) / time_span if time_span > 0 else 0
250
+
251
+ # Analyze temporal patterns
252
+ temporal_pattern = self._detect_temporal_pattern(executions)
253
+
254
+ # Analyze parameters
255
+ common_parameters = self._analyze_parameters(executions)
256
+
257
+ # Calculate typical result size
258
+ result_sizes = [e.result_size for e in executions if e.result_size > 0]
259
+ typical_result_size = int(np.median(result_sizes)) if result_sizes else 0
260
+
261
+ # Find sequence patterns
262
+ follows_queries = []
263
+ followed_by_queries = []
264
+
265
+ for other_fingerprint, followers in self.sequence_patterns.items():
266
+ if fingerprint in followers and followers[fingerprint] >= 2:
267
+ follows_queries.append(other_fingerprint)
268
+
269
+ if fingerprint in self.sequence_patterns:
270
+ for follower, count in self.sequence_patterns[fingerprint].items():
271
+ if count >= 2:
272
+ followed_by_queries.append(follower)
273
+
274
+ return QueryPattern(
275
+ fingerprint=fingerprint,
276
+ frequency=frequency,
277
+ avg_execution_time=avg_execution_time,
278
+ temporal_pattern=temporal_pattern,
279
+ common_parameters=common_parameters,
280
+ typical_result_size=typical_result_size,
281
+ follows_queries=follows_queries,
282
+ followed_by_queries=followed_by_queries,
283
+ )
284
+
285
+ def _detect_temporal_pattern(
286
+ self, executions: List[QueryExecution]
287
+ ) -> Optional[str]:
288
+ """Detect temporal patterns in query execution."""
289
+ if len(executions) < 10:
290
+ return None
291
+
292
+ # Extract hour of day for each execution
293
+ hours = [e.timestamp.hour for e in executions]
294
+ hour_counts = defaultdict(int)
295
+ for hour in hours:
296
+ hour_counts[hour] += 1
297
+
298
+ # Check for hourly pattern (concentrated in specific hours)
299
+ max_hour_count = max(hour_counts.values())
300
+ if max_hour_count > len(executions) * 0.3: # 30% in one hour
301
+ peak_hours = [
302
+ h for h, c in hour_counts.items() if c > len(executions) * 0.2
303
+ ]
304
+ if len(peak_hours) <= 3:
305
+ return "hourly"
306
+
307
+ # Check for daily pattern (regular daily execution)
308
+ dates = [e.timestamp.date() for e in executions]
309
+ unique_dates = len(set(dates))
310
+ date_span = (dates[-1] - dates[0]).days + 1
311
+
312
+ if unique_dates > 5 and unique_dates / date_span > 0.7:
313
+ return "daily"
314
+
315
+ # Check for weekly pattern
316
+ weekdays = [e.timestamp.weekday() for e in executions]
317
+ weekday_counts = defaultdict(int)
318
+ for wd in weekdays:
319
+ weekday_counts[wd] += 1
320
+
321
+ # Business days pattern
322
+ business_days = sum(weekday_counts[i] for i in range(5)) # Mon-Fri
323
+ if business_days > len(executions) * 0.8:
324
+ return "business_days"
325
+
326
+ return None
327
+
328
+ def _analyze_parameters(
329
+ self, executions: List[QueryExecution]
330
+ ) -> Dict[str, List[Any]]:
331
+ """Analyze common parameter values."""
332
+ param_values = defaultdict(list)
333
+
334
+ for execution in executions:
335
+ for param_name, param_value in execution.parameters.items():
336
+ param_values[param_name].append(param_value)
337
+
338
+ # Find most common values
339
+ common_parameters = {}
340
+ for param_name, values in param_values.items():
341
+ # Get unique values and their counts
342
+ unique_values = list(set(values))
343
+ if len(unique_values) <= 10: # Only track if limited variety
344
+ common_parameters[param_name] = unique_values[:5] # Top 5
345
+
346
+ return common_parameters
347
+
348
+ def _calculate_average_delay(
349
+ self, from_fingerprint: str, to_fingerprint: str
350
+ ) -> timedelta:
351
+ """Calculate average delay between two queries in sequence."""
352
+ delays = []
353
+
354
+ for i in range(1, len(self.executions)):
355
+ if (
356
+ self.executions[i - 1].fingerprint == from_fingerprint
357
+ and self.executions[i].fingerprint == to_fingerprint
358
+ ):
359
+ delay = self.executions[i].timestamp - self.executions[i - 1].timestamp
360
+ delays.append(delay.total_seconds())
361
+
362
+ if delays:
363
+ avg_delay_seconds = np.mean(delays)
364
+ return timedelta(seconds=avg_delay_seconds)
365
+ else:
366
+ return timedelta(seconds=30) # Default 30 seconds
367
+
368
+ def _predict_temporal_queries(
369
+ self, time_window_minutes: int
370
+ ) -> List[PredictedQuery]:
371
+ """Predict queries based on temporal patterns."""
372
+ predictions = []
373
+ now = datetime.now()
374
+
375
+ for fingerprint, pattern in self.pattern_cache.items():
376
+ if pattern.temporal_pattern == "hourly":
377
+ # Check if we're in a typical execution hour
378
+ current_hour = now.hour
379
+ executions = self.execution_by_fingerprint[fingerprint]
380
+ hour_counts = defaultdict(int)
381
+ for e in executions:
382
+ hour_counts[e.timestamp.hour] += 1
383
+
384
+ if hour_counts[current_hour] > len(executions) * 0.2:
385
+ predictions.append(
386
+ PredictedQuery(
387
+ fingerprint=fingerprint,
388
+ probability=0.7,
389
+ expected_time=now
390
+ + timedelta(minutes=time_window_minutes / 2),
391
+ confidence=0.6,
392
+ reason=f"Hourly pattern at {current_hour}:00",
393
+ )
394
+ )
395
+
396
+ elif pattern.temporal_pattern == "daily":
397
+ # Check if query typically runs around this time
398
+ executions = self.execution_by_fingerprint[fingerprint]
399
+ time_of_day = now.time()
400
+
401
+ # Find executions within 30 minutes of current time
402
+ matching_executions = [
403
+ e
404
+ for e in executions
405
+ if abs(
406
+ (e.timestamp.time().hour * 60 + e.timestamp.time().minute)
407
+ - (time_of_day.hour * 60 + time_of_day.minute)
408
+ )
409
+ <= 30
410
+ ]
411
+
412
+ if len(matching_executions) > len(executions) * 0.3:
413
+ predictions.append(
414
+ PredictedQuery(
415
+ fingerprint=fingerprint,
416
+ probability=0.6,
417
+ expected_time=now
418
+ + timedelta(minutes=time_window_minutes / 2),
419
+ confidence=0.5,
420
+ reason="Daily pattern at this time",
421
+ )
422
+ )
423
+
424
+ return predictions
425
+
426
+ def _analyze_historical_load(
427
+ self, start_time: datetime.time, end_time: datetime.time, weekday: int
428
+ ) -> Dict[str, Any]:
429
+ """Analyze historical load for a time period."""
430
+ matching_executions = []
431
+
432
+ for execution in self.executions:
433
+ exec_time = execution.timestamp.time()
434
+ exec_weekday = execution.timestamp.weekday()
435
+
436
+ # Check if execution falls within time window
437
+ if start_time <= exec_time <= end_time:
438
+ # Check weekday match (or adjacent days for more data)
439
+ if abs(exec_weekday - weekday) <= 1:
440
+ matching_executions.append(execution)
441
+
442
+ if not matching_executions:
443
+ return {"avg_qps": 0, "peak_probability": 0}
444
+
445
+ # Calculate QPS
446
+ time_span_minutes = (
447
+ datetime.combine(datetime.today(), end_time)
448
+ - datetime.combine(datetime.today(), start_time)
449
+ ).total_seconds() / 60
450
+
451
+ avg_qps = len(matching_executions) / time_span_minutes / 60
452
+
453
+ # Detect if this is typically a peak period
454
+ total_daily_queries = len(
455
+ [e for e in self.executions if e.timestamp.weekday() == weekday]
456
+ )
457
+
458
+ period_percentage = len(matching_executions) / max(total_daily_queries, 1)
459
+ peak_probability = min(1.0, period_percentage * 3) # Scale up
460
+
461
+ return {
462
+ "avg_qps": avg_qps,
463
+ "peak_probability": peak_probability,
464
+ "query_count": len(matching_executions),
465
+ }
466
+
467
+ def _estimate_query_count(self, pattern: QueryPattern, horizon_minutes: int) -> int:
468
+ """Estimate number of queries for a pattern in the time horizon."""
469
+ # Simple estimation based on frequency
470
+ return int(pattern.frequency * horizon_minutes)
471
+
472
+ def _calculate_recommended_pool_size(
473
+ self, expected_queries: List[Dict[str, Any]], historical_load: Dict[str, Any]
474
+ ) -> int:
475
+ """Calculate recommended connection pool size based on forecast."""
476
+ if not expected_queries:
477
+ return 5 # Default minimum
478
+
479
+ # Calculate expected concurrent load
480
+ total_execution_time = sum(
481
+ q["expected_count"] * q["avg_execution_time"] for q in expected_queries
482
+ )
483
+
484
+ # Convert to concurrent connections needed
485
+ avg_concurrent = total_execution_time / (60 * 1000) # Convert to minutes
486
+
487
+ # Add buffer for peak loads
488
+ peak_factor = 1 + historical_load.get("peak_probability", 0.5)
489
+ recommended = int(avg_concurrent * peak_factor) + 2 # +2 for safety
490
+
491
+ # Apply bounds
492
+ return max(5, min(50, recommended))
493
+
494
+
495
+ class PatternLearningOptimizer:
496
+ """Optimizer that learns from patterns to improve routing decisions."""
497
+
498
+ def __init__(self, pattern_tracker: QueryPatternTracker):
499
+ self.pattern_tracker = pattern_tracker
500
+ self.optimization_rules: Dict[str, Dict[str, Any]] = {}
501
+
502
+ def optimize_routing(
503
+ self, fingerprint: str, current_decision: Dict[str, Any]
504
+ ) -> Dict[str, Any]:
505
+ """Optimize routing decision based on learned patterns."""
506
+ pattern = self.pattern_tracker.get_pattern(fingerprint)
507
+
508
+ if not pattern:
509
+ return current_decision
510
+
511
+ optimized = current_decision.copy()
512
+
513
+ # Apply pattern-based optimizations
514
+ if pattern.frequency > 10: # High-frequency query
515
+ optimized["connection_affinity"] = True
516
+ optimized["cache_priority"] = "high"
517
+
518
+ if pattern.temporal_pattern == "hourly":
519
+ optimized["pre_warm_connections"] = True
520
+
521
+ if pattern.avg_execution_time > 1000: # Slow query
522
+ optimized["dedicated_connection"] = True
523
+ optimized["timeout_extension"] = 2.0
524
+
525
+ if pattern.typical_result_size > 10000: # Large results
526
+ optimized["streaming_enabled"] = True
527
+
528
+ return optimized
529
+
530
+ def suggest_pre_warming(self, current_time: datetime) -> List[str]:
531
+ """Suggest queries to pre-warm based on predictions."""
532
+ predictions = []
533
+
534
+ # Get predictions for next 5 minutes
535
+ for pattern in self.pattern_tracker.pattern_cache.values():
536
+ next_queries = self.pattern_tracker.predict_next_queries(
537
+ pattern.fingerprint, time_window_minutes=5
538
+ )
539
+
540
+ for prediction in next_queries:
541
+ if prediction.probability > 0.5 and prediction.confidence > 0.6:
542
+ predictions.append(prediction.fingerprint)
543
+
544
+ return list(set(predictions))[:10] # Top 10 unique queries
@@ -0,0 +1,19 @@
1
+ """Monitoring components for connection management."""
2
+
3
+ from .connection_metrics import (
4
+ ConnectionMetricsCollector,
5
+ ErrorCategory,
6
+ HistogramData,
7
+ MetricPoint,
8
+ MetricsAggregator,
9
+ MetricType,
10
+ )
11
+
12
+ __all__ = [
13
+ "ConnectionMetricsCollector",
14
+ "ErrorCategory",
15
+ "HistogramData",
16
+ "MetricPoint",
17
+ "MetricType",
18
+ "MetricsAggregator",
19
+ ]