kailash 0.8.4__py3-none-any.whl → 0.8.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. kailash/__init__.py +1 -7
  2. kailash/cli/__init__.py +11 -1
  3. kailash/cli/validation_audit.py +570 -0
  4. kailash/core/actors/supervisor.py +1 -1
  5. kailash/core/resilience/circuit_breaker.py +71 -1
  6. kailash/core/resilience/health_monitor.py +172 -0
  7. kailash/edge/compliance.py +33 -0
  8. kailash/edge/consistency.py +609 -0
  9. kailash/edge/coordination/__init__.py +30 -0
  10. kailash/edge/coordination/global_ordering.py +355 -0
  11. kailash/edge/coordination/leader_election.py +217 -0
  12. kailash/edge/coordination/partition_detector.py +296 -0
  13. kailash/edge/coordination/raft.py +485 -0
  14. kailash/edge/discovery.py +63 -1
  15. kailash/edge/migration/__init__.py +19 -0
  16. kailash/edge/migration/edge_migrator.py +832 -0
  17. kailash/edge/monitoring/__init__.py +21 -0
  18. kailash/edge/monitoring/edge_monitor.py +736 -0
  19. kailash/edge/prediction/__init__.py +10 -0
  20. kailash/edge/prediction/predictive_warmer.py +591 -0
  21. kailash/edge/resource/__init__.py +102 -0
  22. kailash/edge/resource/cloud_integration.py +796 -0
  23. kailash/edge/resource/cost_optimizer.py +949 -0
  24. kailash/edge/resource/docker_integration.py +919 -0
  25. kailash/edge/resource/kubernetes_integration.py +893 -0
  26. kailash/edge/resource/platform_integration.py +913 -0
  27. kailash/edge/resource/predictive_scaler.py +959 -0
  28. kailash/edge/resource/resource_analyzer.py +824 -0
  29. kailash/edge/resource/resource_pools.py +610 -0
  30. kailash/integrations/dataflow_edge.py +261 -0
  31. kailash/mcp_server/registry_integration.py +1 -1
  32. kailash/monitoring/__init__.py +18 -0
  33. kailash/monitoring/alerts.py +646 -0
  34. kailash/monitoring/metrics.py +677 -0
  35. kailash/nodes/__init__.py +2 -0
  36. kailash/nodes/ai/semantic_memory.py +2 -2
  37. kailash/nodes/base.py +545 -0
  38. kailash/nodes/edge/__init__.py +36 -0
  39. kailash/nodes/edge/base.py +240 -0
  40. kailash/nodes/edge/cloud_node.py +710 -0
  41. kailash/nodes/edge/coordination.py +239 -0
  42. kailash/nodes/edge/docker_node.py +825 -0
  43. kailash/nodes/edge/edge_data.py +582 -0
  44. kailash/nodes/edge/edge_migration_node.py +392 -0
  45. kailash/nodes/edge/edge_monitoring_node.py +421 -0
  46. kailash/nodes/edge/edge_state.py +673 -0
  47. kailash/nodes/edge/edge_warming_node.py +393 -0
  48. kailash/nodes/edge/kubernetes_node.py +652 -0
  49. kailash/nodes/edge/platform_node.py +766 -0
  50. kailash/nodes/edge/resource_analyzer_node.py +378 -0
  51. kailash/nodes/edge/resource_optimizer_node.py +501 -0
  52. kailash/nodes/edge/resource_scaler_node.py +397 -0
  53. kailash/nodes/ports.py +676 -0
  54. kailash/runtime/local.py +344 -1
  55. kailash/runtime/validation/__init__.py +20 -0
  56. kailash/runtime/validation/connection_context.py +119 -0
  57. kailash/runtime/validation/enhanced_error_formatter.py +202 -0
  58. kailash/runtime/validation/error_categorizer.py +164 -0
  59. kailash/runtime/validation/metrics.py +380 -0
  60. kailash/runtime/validation/performance.py +615 -0
  61. kailash/runtime/validation/suggestion_engine.py +212 -0
  62. kailash/testing/fixtures.py +2 -2
  63. kailash/workflow/builder.py +230 -4
  64. kailash/workflow/contracts.py +418 -0
  65. kailash/workflow/edge_infrastructure.py +369 -0
  66. kailash/workflow/migration.py +3 -3
  67. kailash/workflow/type_inference.py +669 -0
  68. {kailash-0.8.4.dist-info → kailash-0.8.5.dist-info}/METADATA +43 -27
  69. {kailash-0.8.4.dist-info → kailash-0.8.5.dist-info}/RECORD +73 -27
  70. kailash/nexus/__init__.py +0 -21
  71. kailash/nexus/cli/__init__.py +0 -5
  72. kailash/nexus/cli/__main__.py +0 -6
  73. kailash/nexus/cli/main.py +0 -176
  74. kailash/nexus/factory.py +0 -413
  75. kailash/nexus/gateway.py +0 -545
  76. {kailash-0.8.4.dist-info → kailash-0.8.5.dist-info}/WHEEL +0 -0
  77. {kailash-0.8.4.dist-info → kailash-0.8.5.dist-info}/entry_points.txt +0 -0
  78. {kailash-0.8.4.dist-info → kailash-0.8.5.dist-info}/licenses/LICENSE +0 -0
  79. {kailash-0.8.4.dist-info → kailash-0.8.5.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,296 @@
1
+ """Network partition detection for edge coordination."""
2
+
3
+ import asyncio
4
+ import logging
5
+ from collections import defaultdict
6
+ from datetime import datetime, timedelta
7
+ from typing import Any, Dict, List, Optional, Set, Tuple
8
+
9
+
10
+ class PartitionDetector:
11
+ """Detects network partitions in distributed edge systems.
12
+
13
+ Uses heartbeat monitoring and cluster state analysis to detect:
14
+ - Network partitions (split-brain scenarios)
15
+ - Node failures
16
+ - Connectivity issues
17
+ - Quorum status
18
+ """
19
+
20
+ def __init__(
21
+ self,
22
+ node_id: str,
23
+ peers: List[str],
24
+ heartbeat_interval_ms: int = 100,
25
+ failure_threshold_ms: int = 500,
26
+ ):
27
+ """Initialize partition detector.
28
+
29
+ Args:
30
+ node_id: This node's identifier
31
+ peers: List of peer node IDs
32
+ heartbeat_interval_ms: Heartbeat interval in milliseconds
33
+ failure_threshold_ms: Time without heartbeat to consider failure
34
+ """
35
+ self.node_id = node_id
36
+ self.peers = set(peers)
37
+ self.heartbeat_interval_ms = heartbeat_interval_ms
38
+ self.failure_threshold_ms = failure_threshold_ms
39
+
40
+ # Heartbeat tracking
41
+ self.last_heartbeats: Dict[str, datetime] = {}
42
+ self.peer_connections: Dict[str, Set[str]] = defaultdict(set)
43
+ self.my_connections: Set[str] = set()
44
+
45
+ # Partition state
46
+ self.current_partition: Optional[Set[str]] = None
47
+ self.partition_start_time: Optional[datetime] = None
48
+ self.partition_history: List[Dict[str, Any]] = []
49
+
50
+ # Monitoring
51
+ self._monitor_task: Optional[asyncio.Task] = None
52
+ self._running = False
53
+ self.logger = logging.getLogger(f"PartitionDetector[{node_id}]")
54
+
55
+ async def start(self):
56
+ """Start partition detection."""
57
+ self._running = True
58
+ self._monitor_task = asyncio.create_task(self._monitor_partitions())
59
+ self.logger.info("Partition detector started")
60
+
61
+ async def stop(self):
62
+ """Stop partition detection."""
63
+ self._running = False
64
+ if self._monitor_task:
65
+ self._monitor_task.cancel()
66
+ try:
67
+ await self._monitor_task
68
+ except asyncio.CancelledError:
69
+ pass
70
+ self.logger.info("Partition detector stopped")
71
+
72
+ def record_heartbeat(self, from_node: str):
73
+ """Record heartbeat from a peer node.
74
+
75
+ Args:
76
+ from_node: Node ID that sent heartbeat
77
+ """
78
+ self.last_heartbeats[from_node] = datetime.now()
79
+ self.my_connections.add(from_node)
80
+
81
+ def update_peer_connections(self, node_id: str, connections: Set[str]):
82
+ """Update connectivity information for a peer.
83
+
84
+ Args:
85
+ node_id: Peer node ID
86
+ connections: Set of nodes the peer can reach
87
+ """
88
+ self.peer_connections[node_id] = connections
89
+
90
+ def get_partition_status(self) -> Dict[str, Any]:
91
+ """Get current partition status.
92
+
93
+ Returns:
94
+ Dict with partition information
95
+ """
96
+ now = datetime.now()
97
+ active_peers = self._get_active_peers(now)
98
+
99
+ # Check for partition
100
+ is_partitioned = self._detect_partition(active_peers)
101
+
102
+ # Calculate quorum
103
+ total_nodes = len(self.peers) + 1 # Include self
104
+ reachable_nodes = len(active_peers) + 1 # Include self
105
+ has_quorum = reachable_nodes > total_nodes // 2
106
+
107
+ # Get partition groups
108
+ groups = self._identify_partition_groups(active_peers)
109
+
110
+ return {
111
+ "is_partitioned": is_partitioned,
112
+ "has_quorum": has_quorum,
113
+ "reachable_nodes": reachable_nodes,
114
+ "total_nodes": total_nodes,
115
+ "active_peers": list(active_peers),
116
+ "unreachable_peers": list(self.peers - active_peers),
117
+ "partition_groups": groups,
118
+ "current_partition": (
119
+ list(self.current_partition) if self.current_partition else None
120
+ ),
121
+ "partition_duration": self._get_partition_duration(),
122
+ }
123
+
124
+ def _get_active_peers(self, now: datetime) -> Set[str]:
125
+ """Get set of currently active peers.
126
+
127
+ Args:
128
+ now: Current time
129
+
130
+ Returns:
131
+ Set of active peer IDs
132
+ """
133
+ active = set()
134
+ threshold = timedelta(milliseconds=self.failure_threshold_ms)
135
+
136
+ for peer in self.peers:
137
+ if peer in self.last_heartbeats:
138
+ if now - self.last_heartbeats[peer] < threshold:
139
+ active.add(peer)
140
+
141
+ return active
142
+
143
+ def _detect_partition(self, active_peers: Set[str]) -> bool:
144
+ """Detect if network is partitioned.
145
+
146
+ Args:
147
+ active_peers: Set of active peer IDs
148
+
149
+ Returns:
150
+ True if partition detected
151
+ """
152
+ # Simple detection: partition if we can't reach all peers
153
+ # but some peers can reach each other
154
+ if len(active_peers) < len(self.peers):
155
+ # Check if unreachable peers can reach each other
156
+ unreachable = self.peers - active_peers
157
+
158
+ for peer in unreachable:
159
+ if peer in self.peer_connections:
160
+ # Check if this peer can reach other unreachable peers
161
+ peer_reach = self.peer_connections[peer]
162
+ if peer_reach & unreachable:
163
+ # Partition detected
164
+ return True
165
+
166
+ return False
167
+
168
+ def _identify_partition_groups(self, active_peers: Set[str]) -> List[Set[str]]:
169
+ """Identify partition groups in the network.
170
+
171
+ Args:
172
+ active_peers: Set of active peer IDs
173
+
174
+ Returns:
175
+ List of partition groups (sets of node IDs)
176
+ """
177
+ # Build connectivity graph
178
+ graph = defaultdict(set)
179
+
180
+ # Add self connections
181
+ graph[self.node_id] = active_peers.copy()
182
+
183
+ # Add peer connections
184
+ for peer, connections in self.peer_connections.items():
185
+ graph[peer] = connections.copy()
186
+
187
+ # Find connected components
188
+ visited = set()
189
+ groups = []
190
+
191
+ def dfs(node: str, group: Set[str]):
192
+ if node in visited:
193
+ return
194
+ visited.add(node)
195
+ group.add(node)
196
+
197
+ for neighbor in graph.get(node, set()):
198
+ if neighbor not in visited:
199
+ dfs(neighbor, group)
200
+
201
+ # Start DFS from all nodes
202
+ all_nodes = {self.node_id} | self.peers
203
+ for node in all_nodes:
204
+ if node not in visited:
205
+ group = set()
206
+ dfs(node, group)
207
+ if group:
208
+ groups.append(group)
209
+
210
+ return groups
211
+
212
+ def _get_partition_duration(self) -> Optional[float]:
213
+ """Get duration of current partition in seconds.
214
+
215
+ Returns:
216
+ Duration in seconds or None if not partitioned
217
+ """
218
+ if self.partition_start_time:
219
+ return (datetime.now() - self.partition_start_time).total_seconds()
220
+ return None
221
+
222
+ async def _monitor_partitions(self):
223
+ """Background task to monitor for partitions."""
224
+ while self._running:
225
+ try:
226
+ status = self.get_partition_status()
227
+
228
+ # Check for partition state change
229
+ if status["is_partitioned"] and not self.current_partition:
230
+ # New partition detected
231
+ self.current_partition = set(status["active_peers"])
232
+ self.current_partition.add(self.node_id)
233
+ self.partition_start_time = datetime.now()
234
+
235
+ self.logger.warning(
236
+ f"Network partition detected! In partition with: {self.current_partition}"
237
+ )
238
+
239
+ # Record in history
240
+ self.partition_history.append(
241
+ {
242
+ "detected_at": self.partition_start_time,
243
+ "partition": list(self.current_partition),
244
+ "groups": status["partition_groups"],
245
+ }
246
+ )
247
+
248
+ elif not status["is_partitioned"] and self.current_partition:
249
+ # Partition healed
250
+ duration = self._get_partition_duration()
251
+ self.logger.info(
252
+ f"Network partition healed after {duration:.2f} seconds"
253
+ )
254
+
255
+ # Update history
256
+ if self.partition_history:
257
+ self.partition_history[-1]["healed_at"] = datetime.now()
258
+ self.partition_history[-1]["duration"] = duration
259
+
260
+ self.current_partition = None
261
+ self.partition_start_time = None
262
+
263
+ await asyncio.sleep(self.heartbeat_interval_ms / 1000)
264
+
265
+ except Exception as e:
266
+ self.logger.error(f"Partition monitor error: {e}")
267
+
268
+ def should_participate_in_election(self) -> bool:
269
+ """Check if this node should participate in leader election.
270
+
271
+ Returns:
272
+ True if node should participate (has quorum)
273
+ """
274
+ status = self.get_partition_status()
275
+ return status["has_quorum"]
276
+
277
+ def get_partition_metrics(self) -> Dict[str, Any]:
278
+ """Get partition detection metrics.
279
+
280
+ Returns:
281
+ Dict with partition metrics
282
+ """
283
+ total_partitions = len(self.partition_history)
284
+ total_duration = sum(p.get("duration", 0) for p in self.partition_history)
285
+
286
+ current_duration = self._get_partition_duration()
287
+ if current_duration:
288
+ total_duration += current_duration
289
+
290
+ return {
291
+ "total_partitions": total_partitions,
292
+ "total_partition_duration": total_duration,
293
+ "current_partition_duration": current_duration,
294
+ "partition_history_size": len(self.partition_history),
295
+ "is_currently_partitioned": self.current_partition is not None,
296
+ }