kailash 0.5.0__py3-none-any.whl → 0.6.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. kailash/__init__.py +1 -1
  2. kailash/access_control/__init__.py +1 -1
  3. kailash/client/__init__.py +12 -0
  4. kailash/client/enhanced_client.py +306 -0
  5. kailash/core/actors/__init__.py +16 -0
  6. kailash/core/actors/adaptive_pool_controller.py +630 -0
  7. kailash/core/actors/connection_actor.py +566 -0
  8. kailash/core/actors/supervisor.py +364 -0
  9. kailash/core/ml/__init__.py +1 -0
  10. kailash/core/ml/query_patterns.py +544 -0
  11. kailash/core/monitoring/__init__.py +19 -0
  12. kailash/core/monitoring/connection_metrics.py +488 -0
  13. kailash/core/optimization/__init__.py +1 -0
  14. kailash/core/resilience/__init__.py +17 -0
  15. kailash/core/resilience/circuit_breaker.py +382 -0
  16. kailash/edge/__init__.py +16 -0
  17. kailash/edge/compliance.py +834 -0
  18. kailash/edge/discovery.py +659 -0
  19. kailash/edge/location.py +582 -0
  20. kailash/gateway/__init__.py +33 -0
  21. kailash/gateway/api.py +289 -0
  22. kailash/gateway/enhanced_gateway.py +357 -0
  23. kailash/gateway/resource_resolver.py +217 -0
  24. kailash/gateway/security.py +227 -0
  25. kailash/middleware/auth/access_control.py +6 -6
  26. kailash/middleware/auth/models.py +2 -2
  27. kailash/middleware/communication/ai_chat.py +7 -7
  28. kailash/middleware/communication/api_gateway.py +5 -15
  29. kailash/middleware/database/base_models.py +1 -7
  30. kailash/middleware/gateway/__init__.py +22 -0
  31. kailash/middleware/gateway/checkpoint_manager.py +398 -0
  32. kailash/middleware/gateway/deduplicator.py +382 -0
  33. kailash/middleware/gateway/durable_gateway.py +417 -0
  34. kailash/middleware/gateway/durable_request.py +498 -0
  35. kailash/middleware/gateway/event_store.py +499 -0
  36. kailash/middleware/mcp/enhanced_server.py +2 -2
  37. kailash/nodes/admin/permission_check.py +817 -33
  38. kailash/nodes/admin/role_management.py +1242 -108
  39. kailash/nodes/admin/schema_manager.py +438 -0
  40. kailash/nodes/admin/user_management.py +1124 -1582
  41. kailash/nodes/code/__init__.py +8 -1
  42. kailash/nodes/code/async_python.py +1035 -0
  43. kailash/nodes/code/python.py +1 -0
  44. kailash/nodes/data/async_sql.py +9 -3
  45. kailash/nodes/data/query_pipeline.py +641 -0
  46. kailash/nodes/data/query_router.py +895 -0
  47. kailash/nodes/data/sql.py +20 -11
  48. kailash/nodes/data/workflow_connection_pool.py +1071 -0
  49. kailash/nodes/monitoring/__init__.py +3 -5
  50. kailash/nodes/monitoring/connection_dashboard.py +822 -0
  51. kailash/nodes/rag/__init__.py +2 -7
  52. kailash/resources/__init__.py +40 -0
  53. kailash/resources/factory.py +533 -0
  54. kailash/resources/health.py +319 -0
  55. kailash/resources/reference.py +288 -0
  56. kailash/resources/registry.py +392 -0
  57. kailash/runtime/async_local.py +711 -302
  58. kailash/testing/__init__.py +34 -0
  59. kailash/testing/async_test_case.py +353 -0
  60. kailash/testing/async_utils.py +345 -0
  61. kailash/testing/fixtures.py +458 -0
  62. kailash/testing/mock_registry.py +495 -0
  63. kailash/workflow/__init__.py +8 -0
  64. kailash/workflow/async_builder.py +621 -0
  65. kailash/workflow/async_patterns.py +766 -0
  66. kailash/workflow/cyclic_runner.py +107 -16
  67. kailash/workflow/graph.py +7 -2
  68. kailash/workflow/resilience.py +11 -1
  69. {kailash-0.5.0.dist-info → kailash-0.6.1.dist-info}/METADATA +19 -4
  70. {kailash-0.5.0.dist-info → kailash-0.6.1.dist-info}/RECORD +74 -28
  71. {kailash-0.5.0.dist-info → kailash-0.6.1.dist-info}/WHEEL +0 -0
  72. {kailash-0.5.0.dist-info → kailash-0.6.1.dist-info}/entry_points.txt +0 -0
  73. {kailash-0.5.0.dist-info → kailash-0.6.1.dist-info}/licenses/LICENSE +0 -0
  74. {kailash-0.5.0.dist-info → kailash-0.6.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,364 @@
1
+ """Actor supervision for fault tolerance.
2
+
3
+ This module implements supervision strategies for managing actor lifecycles
4
+ and handling failures gracefully.
5
+ """
6
+
7
+ import asyncio
8
+ import logging
9
+ from datetime import UTC, datetime, timedelta
10
+ from enum import Enum
11
+ from typing import Any, Callable, Dict, List, Optional
12
+
13
+ from .connection_actor import ActorConnection, ConnectionState
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ class SupervisionStrategy(Enum):
19
+ """Supervision strategies for handling actor failures."""
20
+
21
+ ONE_FOR_ONE = "one_for_one" # Restart only the failed actor
22
+ ONE_FOR_ALL = "one_for_all" # Restart all actors on any failure
23
+ REST_FOR_ONE = "rest_for_one" # Restart failed actor and all after it
24
+
25
+
26
+ class RestartDecision(Enum):
27
+ """Decision on whether to restart a failed actor."""
28
+
29
+ RESTART = "restart"
30
+ STOP = "stop"
31
+ ESCALATE = "escalate"
32
+
33
+
34
+ class ActorSupervisor:
35
+ """
36
+ Supervises a group of actors, handling failures and restarts.
37
+
38
+ Inspired by Erlang/OTP supervision trees, this class manages
39
+ actor lifecycles and implements various restart strategies.
40
+ """
41
+
42
+ def __init__(
43
+ self,
44
+ name: str,
45
+ strategy: SupervisionStrategy = SupervisionStrategy.ONE_FOR_ONE,
46
+ max_restarts: int = 3,
47
+ restart_window: float = 60.0,
48
+ restart_delay: float = 1.0,
49
+ ):
50
+ """
51
+ Initialize actor supervisor.
52
+
53
+ Args:
54
+ name: Supervisor name
55
+ strategy: Supervision strategy to use
56
+ max_restarts: Maximum restarts within window
57
+ restart_window: Time window for restart counting (seconds)
58
+ restart_delay: Delay between restarts (seconds)
59
+ """
60
+ self.name = name
61
+ self.strategy = strategy
62
+ self.max_restarts = max_restarts
63
+ self.restart_window = restart_window
64
+ self.restart_delay = restart_delay
65
+
66
+ # Supervised actors
67
+ self.actors: Dict[str, ActorConnection] = {}
68
+ self.actor_order: List[str] = [] # For REST_FOR_ONE strategy
69
+
70
+ # Restart tracking
71
+ self.restart_counts: Dict[str, List[datetime]] = {}
72
+
73
+ # Callbacks
74
+ self.on_actor_failure: Optional[Callable[[str, Exception], None]] = None
75
+ self.on_actor_restart: Optional[Callable[[str, int], None]] = None
76
+ self.on_supervisor_failure: Optional[Callable[[Exception], None]] = None
77
+
78
+ # Supervisor state
79
+ self._running = False
80
+ self._monitor_task = None
81
+
82
+ async def start(self):
83
+ """Start the supervisor and all actors."""
84
+ self._running = True
85
+
86
+ # Start all actors
87
+ for actor_id in self.actor_order:
88
+ actor = self.actors[actor_id]
89
+ actor.supervisor = self
90
+ await self._start_actor(actor)
91
+
92
+ # Start monitoring
93
+ self._monitor_task = asyncio.create_task(self._monitor_actors())
94
+
95
+ logger.info(f"Supervisor {self.name} started with {len(self.actors)} actors")
96
+
97
+ async def stop(self):
98
+ """Stop the supervisor and all actors."""
99
+ self._running = False
100
+
101
+ # Cancel monitoring
102
+ if self._monitor_task:
103
+ self._monitor_task.cancel()
104
+ try:
105
+ await self._monitor_task
106
+ except asyncio.CancelledError:
107
+ pass
108
+
109
+ # Stop all actors
110
+ for actor in self.actors.values():
111
+ await actor.stop()
112
+
113
+ logger.info(f"Supervisor {self.name} stopped")
114
+
115
+ def add_actor(self, actor: ActorConnection):
116
+ """
117
+ Add an actor to supervision.
118
+
119
+ Args:
120
+ actor: Actor to supervise
121
+ """
122
+ self.actors[actor.id] = actor
123
+ self.actor_order.append(actor.id)
124
+ self.restart_counts[actor.id] = []
125
+ actor.supervisor = self
126
+
127
+ # Start actor if supervisor is running
128
+ if self._running:
129
+ asyncio.create_task(self._start_actor(actor))
130
+
131
+ def remove_actor(self, actor_id: str):
132
+ """
133
+ Remove an actor from supervision.
134
+
135
+ Args:
136
+ actor_id: ID of actor to remove
137
+ """
138
+ if actor_id in self.actors:
139
+ actor = self.actors[actor_id]
140
+ asyncio.create_task(actor.stop())
141
+
142
+ del self.actors[actor_id]
143
+ self.actor_order.remove(actor_id)
144
+ del self.restart_counts[actor_id]
145
+
146
+ async def notify_failure(self, actor_id: str, error: Optional[Exception] = None):
147
+ """
148
+ Notify supervisor of actor failure.
149
+
150
+ Args:
151
+ actor_id: ID of failed actor
152
+ error: Exception that caused failure
153
+ """
154
+ logger.warning(f"Actor {actor_id} failed: {error}")
155
+
156
+ # Callback
157
+ if self.on_actor_failure:
158
+ self.on_actor_failure(actor_id, error)
159
+
160
+ # Decide on restart
161
+ decision = self._decide_restart(actor_id)
162
+
163
+ if decision == RestartDecision.RESTART:
164
+ await self._handle_restart(actor_id)
165
+ elif decision == RestartDecision.STOP:
166
+ self.remove_actor(actor_id)
167
+ elif decision == RestartDecision.ESCALATE:
168
+ await self._escalate_failure(error)
169
+
170
+ async def notify_recycling(self, actor_id: str):
171
+ """
172
+ Notify supervisor that actor is recycling.
173
+
174
+ Args:
175
+ actor_id: ID of recycling actor
176
+ """
177
+ logger.info(f"Actor {actor_id} is recycling")
178
+
179
+ # Create replacement actor
180
+ if actor_id in self.actors:
181
+ old_actor = self.actors[actor_id]
182
+
183
+ # Create new actor with same config
184
+ new_actor = ActorConnection(
185
+ connection_id=f"{actor_id}_new",
186
+ db_config=old_actor.db_config,
187
+ health_check_query=old_actor.health_check_query,
188
+ health_check_interval=old_actor.health_check_interval,
189
+ max_lifetime=old_actor.max_lifetime,
190
+ max_idle_time=old_actor.max_idle_time,
191
+ )
192
+
193
+ # Start new actor
194
+ await self._start_actor(new_actor)
195
+
196
+ # Swap actors
197
+ await self._swap_actors(actor_id, new_actor)
198
+
199
+ async def _monitor_actors(self):
200
+ """Monitor actor health periodically."""
201
+ while self._running:
202
+ try:
203
+ await asyncio.sleep(10) # Check every 10 seconds
204
+
205
+ for actor_id, actor in list(self.actors.items()):
206
+ if actor.state == ConnectionState.FAILED:
207
+ await self.notify_failure(actor_id)
208
+ elif actor.state == ConnectionState.TERMINATED:
209
+ # Actor stopped unexpectedly
210
+ await self.notify_failure(
211
+ actor_id, RuntimeError("Actor terminated unexpectedly")
212
+ )
213
+
214
+ except Exception as e:
215
+ logger.error(f"Monitor error in supervisor {self.name}: {e}")
216
+
217
+ async def _start_actor(self, actor: ActorConnection):
218
+ """Start an actor with error handling."""
219
+ try:
220
+ await actor.start()
221
+ except Exception as e:
222
+ logger.error(f"Failed to start actor {actor.id}: {e}")
223
+ await self.notify_failure(actor.id, e)
224
+
225
+ def _decide_restart(self, actor_id: str) -> RestartDecision:
226
+ """Decide whether to restart a failed actor."""
227
+ # Check restart count within window
228
+ now = datetime.now(UTC)
229
+ window_start = now - timedelta(seconds=self.restart_window)
230
+
231
+ # Filter restarts within window
232
+ recent_restarts = [
233
+ ts for ts in self.restart_counts[actor_id] if ts > window_start
234
+ ]
235
+
236
+ if len(recent_restarts) >= self.max_restarts:
237
+ logger.error(
238
+ f"Actor {actor_id} exceeded max restarts "
239
+ f"({self.max_restarts} in {self.restart_window}s)"
240
+ )
241
+ return RestartDecision.ESCALATE
242
+
243
+ return RestartDecision.RESTART
244
+
245
+ async def _handle_restart(self, actor_id: str):
246
+ """Handle actor restart based on strategy."""
247
+ # Record restart
248
+ self.restart_counts[actor_id].append(datetime.now(UTC))
249
+
250
+ # Delay before restart
251
+ await asyncio.sleep(self.restart_delay)
252
+
253
+ if self.strategy == SupervisionStrategy.ONE_FOR_ONE:
254
+ await self._restart_one(actor_id)
255
+ elif self.strategy == SupervisionStrategy.ONE_FOR_ALL:
256
+ await self._restart_all()
257
+ elif self.strategy == SupervisionStrategy.REST_FOR_ONE:
258
+ await self._restart_rest(actor_id)
259
+
260
+ # Callback
261
+ if self.on_actor_restart:
262
+ restart_count = len(self.restart_counts[actor_id])
263
+ self.on_actor_restart(actor_id, restart_count)
264
+
265
+ async def _restart_one(self, actor_id: str):
266
+ """Restart a single actor."""
267
+ if actor_id not in self.actors:
268
+ return
269
+
270
+ actor = self.actors[actor_id]
271
+
272
+ # Stop the failed actor
273
+ await actor.stop()
274
+
275
+ # Create new actor with same config
276
+ new_actor = ActorConnection(
277
+ connection_id=actor_id,
278
+ db_config=actor.db_config,
279
+ health_check_query=actor.health_check_query,
280
+ health_check_interval=actor.health_check_interval,
281
+ max_lifetime=actor.max_lifetime,
282
+ max_idle_time=actor.max_idle_time,
283
+ )
284
+
285
+ # Replace and start
286
+ self.actors[actor_id] = new_actor
287
+ new_actor.supervisor = self
288
+ await self._start_actor(new_actor)
289
+
290
+ async def _restart_all(self):
291
+ """Restart all actors."""
292
+ # Stop all actors
293
+ for actor in self.actors.values():
294
+ await actor.stop()
295
+
296
+ # Restart all
297
+ for actor_id in self.actor_order:
298
+ await self._restart_one(actor_id)
299
+
300
+ async def _restart_rest(self, failed_actor_id: str):
301
+ """Restart failed actor and all actors after it."""
302
+ if failed_actor_id not in self.actor_order:
303
+ return
304
+
305
+ failed_index = self.actor_order.index(failed_actor_id)
306
+
307
+ # Restart from failed actor onwards
308
+ for i in range(failed_index, len(self.actor_order)):
309
+ actor_id = self.actor_order[i]
310
+ await self._restart_one(actor_id)
311
+
312
+ async def _swap_actors(self, old_id: str, new_actor: ActorConnection):
313
+ """Atomically swap an old actor with a new one."""
314
+ if old_id not in self.actors:
315
+ return
316
+
317
+ old_actor = self.actors[old_id]
318
+
319
+ # Wait for old actor to drain
320
+ drain_timeout = 30.0
321
+ start_time = asyncio.get_event_loop().time()
322
+
323
+ while old_actor.state != ConnectionState.TERMINATED:
324
+ if asyncio.get_event_loop().time() - start_time > drain_timeout:
325
+ logger.warning(f"Timeout draining actor {old_id}, forcing stop")
326
+ break
327
+ await asyncio.sleep(0.1)
328
+
329
+ # Stop old actor
330
+ await old_actor.stop()
331
+
332
+ # Replace with new actor
333
+ self.actors[old_id] = new_actor
334
+ new_actor.supervisor = self
335
+
336
+ logger.info(f"Swapped actor {old_id} with new instance")
337
+
338
+ async def _escalate_failure(self, error: Optional[Exception]):
339
+ """Escalate failure to higher level."""
340
+ logger.critical(f"Supervisor {self.name} escalating failure: {error}")
341
+
342
+ if self.on_supervisor_failure:
343
+ self.on_supervisor_failure(error)
344
+ else:
345
+ # Default behavior: stop supervisor
346
+ await self.stop()
347
+
348
+ def get_stats(self) -> Dict[str, Any]:
349
+ """Get supervisor statistics."""
350
+ stats = {
351
+ "name": self.name,
352
+ "strategy": self.strategy.value,
353
+ "running": self._running,
354
+ "actors": {},
355
+ }
356
+
357
+ for actor_id, actor in self.actors.items():
358
+ stats["actors"][actor_id] = {
359
+ "state": actor.state.value,
360
+ "health_score": actor.stats.health_score,
361
+ "restart_count": len(self.restart_counts.get(actor_id, [])),
362
+ }
363
+
364
+ return stats
@@ -0,0 +1 @@
1
+ """Machine learning components for intelligent SDK features."""