kailash 0.8.4__py3-none-any.whl → 0.8.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. kailash/__init__.py +1 -7
  2. kailash/cli/__init__.py +11 -1
  3. kailash/cli/validation_audit.py +570 -0
  4. kailash/core/actors/supervisor.py +1 -1
  5. kailash/core/resilience/circuit_breaker.py +71 -1
  6. kailash/core/resilience/health_monitor.py +172 -0
  7. kailash/edge/compliance.py +33 -0
  8. kailash/edge/consistency.py +609 -0
  9. kailash/edge/coordination/__init__.py +30 -0
  10. kailash/edge/coordination/global_ordering.py +355 -0
  11. kailash/edge/coordination/leader_election.py +217 -0
  12. kailash/edge/coordination/partition_detector.py +296 -0
  13. kailash/edge/coordination/raft.py +485 -0
  14. kailash/edge/discovery.py +63 -1
  15. kailash/edge/migration/__init__.py +19 -0
  16. kailash/edge/migration/edge_migrator.py +832 -0
  17. kailash/edge/monitoring/__init__.py +21 -0
  18. kailash/edge/monitoring/edge_monitor.py +736 -0
  19. kailash/edge/prediction/__init__.py +10 -0
  20. kailash/edge/prediction/predictive_warmer.py +591 -0
  21. kailash/edge/resource/__init__.py +102 -0
  22. kailash/edge/resource/cloud_integration.py +796 -0
  23. kailash/edge/resource/cost_optimizer.py +949 -0
  24. kailash/edge/resource/docker_integration.py +919 -0
  25. kailash/edge/resource/kubernetes_integration.py +893 -0
  26. kailash/edge/resource/platform_integration.py +913 -0
  27. kailash/edge/resource/predictive_scaler.py +959 -0
  28. kailash/edge/resource/resource_analyzer.py +824 -0
  29. kailash/edge/resource/resource_pools.py +610 -0
  30. kailash/integrations/dataflow_edge.py +261 -0
  31. kailash/mcp_server/registry_integration.py +1 -1
  32. kailash/monitoring/__init__.py +18 -0
  33. kailash/monitoring/alerts.py +646 -0
  34. kailash/monitoring/metrics.py +677 -0
  35. kailash/nodes/__init__.py +2 -0
  36. kailash/nodes/ai/semantic_memory.py +2 -2
  37. kailash/nodes/base.py +545 -0
  38. kailash/nodes/edge/__init__.py +36 -0
  39. kailash/nodes/edge/base.py +240 -0
  40. kailash/nodes/edge/cloud_node.py +710 -0
  41. kailash/nodes/edge/coordination.py +239 -0
  42. kailash/nodes/edge/docker_node.py +825 -0
  43. kailash/nodes/edge/edge_data.py +582 -0
  44. kailash/nodes/edge/edge_migration_node.py +392 -0
  45. kailash/nodes/edge/edge_monitoring_node.py +421 -0
  46. kailash/nodes/edge/edge_state.py +673 -0
  47. kailash/nodes/edge/edge_warming_node.py +393 -0
  48. kailash/nodes/edge/kubernetes_node.py +652 -0
  49. kailash/nodes/edge/platform_node.py +766 -0
  50. kailash/nodes/edge/resource_analyzer_node.py +378 -0
  51. kailash/nodes/edge/resource_optimizer_node.py +501 -0
  52. kailash/nodes/edge/resource_scaler_node.py +397 -0
  53. kailash/nodes/ports.py +676 -0
  54. kailash/runtime/local.py +344 -1
  55. kailash/runtime/validation/__init__.py +20 -0
  56. kailash/runtime/validation/connection_context.py +119 -0
  57. kailash/runtime/validation/enhanced_error_formatter.py +202 -0
  58. kailash/runtime/validation/error_categorizer.py +164 -0
  59. kailash/runtime/validation/metrics.py +380 -0
  60. kailash/runtime/validation/performance.py +615 -0
  61. kailash/runtime/validation/suggestion_engine.py +212 -0
  62. kailash/testing/fixtures.py +2 -2
  63. kailash/workflow/builder.py +230 -4
  64. kailash/workflow/contracts.py +418 -0
  65. kailash/workflow/edge_infrastructure.py +369 -0
  66. kailash/workflow/migration.py +3 -3
  67. kailash/workflow/type_inference.py +669 -0
  68. {kailash-0.8.4.dist-info → kailash-0.8.5.dist-info}/METADATA +43 -27
  69. {kailash-0.8.4.dist-info → kailash-0.8.5.dist-info}/RECORD +73 -27
  70. kailash/nexus/__init__.py +0 -21
  71. kailash/nexus/cli/__init__.py +0 -5
  72. kailash/nexus/cli/__main__.py +0 -6
  73. kailash/nexus/cli/main.py +0 -176
  74. kailash/nexus/factory.py +0 -413
  75. kailash/nexus/gateway.py +0 -545
  76. {kailash-0.8.4.dist-info → kailash-0.8.5.dist-info}/WHEEL +0 -0
  77. {kailash-0.8.4.dist-info → kailash-0.8.5.dist-info}/entry_points.txt +0 -0
  78. {kailash-0.8.4.dist-info → kailash-0.8.5.dist-info}/licenses/LICENSE +0 -0
  79. {kailash-0.8.4.dist-info → kailash-0.8.5.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,355 @@
1
+ """Global ordering service for distributed events using hybrid logical clocks."""
2
+
3
+ import asyncio
4
+ import hashlib
5
+ import json
6
+ from collections import defaultdict
7
+ from datetime import datetime
8
+ from typing import Any, Dict, List, Optional, Tuple
9
+
10
+
11
+ class HybridLogicalClock:
12
+ """Hybrid Logical Clock (HLC) implementation for global ordering.
13
+
14
+ Combines physical time with logical counters to provide:
15
+ - Causally consistent timestamps
16
+ - Tolerance for clock skew
17
+ - Total ordering of events
18
+ """
19
+
20
+ def __init__(self, node_id: str):
21
+ """Initialize HLC.
22
+
23
+ Args:
24
+ node_id: Unique identifier for this node
25
+ """
26
+ self.node_id = node_id
27
+ self.logical_time = 0
28
+ self.logical_counter = 0
29
+ self._lock = asyncio.Lock()
30
+
31
+ async def now(self) -> Tuple[int, int, str]:
32
+ """Get current HLC timestamp.
33
+
34
+ Returns:
35
+ Tuple of (logical_time, logical_counter, node_id)
36
+ """
37
+ async with self._lock:
38
+ physical_time = int(datetime.now().timestamp() * 1000) # Milliseconds
39
+
40
+ if physical_time > self.logical_time:
41
+ self.logical_time = physical_time
42
+ self.logical_counter = 0
43
+ else:
44
+ self.logical_counter += 1
45
+
46
+ return (self.logical_time, self.logical_counter, self.node_id)
47
+
48
+ async def update(self, remote_time: int, remote_counter: int):
49
+ """Update clock with remote timestamp.
50
+
51
+ Args:
52
+ remote_time: Remote logical time
53
+ remote_counter: Remote logical counter
54
+ """
55
+ async with self._lock:
56
+ physical_time = int(datetime.now().timestamp() * 1000)
57
+
58
+ if physical_time > max(self.logical_time, remote_time):
59
+ self.logical_time = physical_time
60
+ self.logical_counter = 0
61
+ elif self.logical_time == remote_time:
62
+ self.logical_counter = max(self.logical_counter, remote_counter) + 1
63
+ elif self.logical_time < remote_time:
64
+ self.logical_time = remote_time
65
+ self.logical_counter = remote_counter + 1
66
+ else:
67
+ self.logical_counter += 1
68
+
69
+ def compare(self, ts1: Tuple[int, int, str], ts2: Tuple[int, int, str]) -> int:
70
+ """Compare two HLC timestamps.
71
+
72
+ Args:
73
+ ts1: First timestamp
74
+ ts2: Second timestamp
75
+
76
+ Returns:
77
+ -1 if ts1 < ts2, 0 if equal, 1 if ts1 > ts2
78
+ """
79
+ if ts1[0] != ts2[0]:
80
+ return -1 if ts1[0] < ts2[0] else 1
81
+ if ts1[1] != ts2[1]:
82
+ return -1 if ts1[1] < ts2[1] else 1
83
+ if ts1[2] != ts2[2]:
84
+ return -1 if ts1[2] < ts2[2] else 1
85
+ return 0
86
+
87
+
88
+ class GlobalOrderingService:
89
+ """Global ordering service for distributed events.
90
+
91
+ Provides:
92
+ - Total ordering of events across edge nodes
93
+ - Causal dependency tracking
94
+ - Conflict detection and resolution
95
+ - Event deduplication
96
+ """
97
+
98
+ def __init__(self, node_id: str):
99
+ """Initialize global ordering service.
100
+
101
+ Args:
102
+ node_id: Unique identifier for this node
103
+ """
104
+ self.node_id = node_id
105
+ self.clock = HybridLogicalClock(node_id)
106
+ self.event_history: List[Dict[str, Any]] = []
107
+ self.causal_graph: Dict[str, List[str]] = defaultdict(list)
108
+ self.seen_events: set = set()
109
+ self._lock = asyncio.Lock()
110
+
111
+ async def order_events(self, events: List[Dict[str, Any]]) -> Dict[str, Any]:
112
+ """Order a list of events globally.
113
+
114
+ Args:
115
+ events: List of events to order
116
+
117
+ Returns:
118
+ Dict with ordered events and metadata
119
+ """
120
+ async with self._lock:
121
+ ordered_events = []
122
+
123
+ for event in events:
124
+ # Generate event ID if not present
125
+ if "id" not in event:
126
+ event["id"] = self._generate_event_id(event)
127
+
128
+ # Skip duplicates
129
+ if event["id"] in self.seen_events:
130
+ continue
131
+
132
+ # Assign HLC timestamp
133
+ timestamp = await self.clock.now()
134
+ event["hlc_timestamp"] = timestamp
135
+ event["hlc_time"] = timestamp[0]
136
+ event["hlc_counter"] = timestamp[1]
137
+ event["hlc_node"] = timestamp[2]
138
+
139
+ # Track causal dependencies
140
+ if "depends_on" in event:
141
+ for dep in event["depends_on"]:
142
+ self.causal_graph[event["id"]].append(dep)
143
+
144
+ ordered_events.append(event)
145
+ self.seen_events.add(event["id"])
146
+
147
+ # Sort by HLC timestamp
148
+ ordered_events.sort(
149
+ key=lambda e: (e["hlc_time"], e["hlc_counter"], e["hlc_node"])
150
+ )
151
+
152
+ # Add to history
153
+ self.event_history.extend(ordered_events)
154
+
155
+ return {
156
+ "ordered_events": ordered_events,
157
+ "logical_clock": self.clock.logical_time,
158
+ "causal_dependencies": dict(self.causal_graph),
159
+ "total_events": len(self.event_history),
160
+ }
161
+
162
+ async def merge_histories(
163
+ self, remote_history: List[Dict[str, Any]]
164
+ ) -> Dict[str, Any]:
165
+ """Merge remote event history with local history.
166
+
167
+ Args:
168
+ remote_history: Event history from remote node
169
+
170
+ Returns:
171
+ Dict with merged history and conflict information
172
+ """
173
+ async with self._lock:
174
+ conflicts = []
175
+ merged_events = []
176
+
177
+ # Update clock with remote timestamps
178
+ for event in remote_history:
179
+ if "hlc_time" in event and "hlc_counter" in event:
180
+ await self.clock.update(event["hlc_time"], event["hlc_counter"])
181
+
182
+ # Merge histories
183
+ local_by_id = {e["id"]: e for e in self.event_history if "id" in e}
184
+
185
+ for remote_event in remote_history:
186
+ event_id = remote_event.get("id")
187
+ if not event_id:
188
+ continue
189
+
190
+ if event_id in local_by_id:
191
+ # Check for conflicts
192
+ local_event = local_by_id[event_id]
193
+ if self._events_conflict(local_event, remote_event):
194
+ conflicts.append(
195
+ {
196
+ "event_id": event_id,
197
+ "local": local_event,
198
+ "remote": remote_event,
199
+ }
200
+ )
201
+ # Keep event with higher timestamp
202
+ if self._compare_event_timestamps(remote_event, local_event) > 0:
203
+ local_by_id[event_id] = remote_event
204
+ else:
205
+ # New event
206
+ local_by_id[event_id] = remote_event
207
+ self.seen_events.add(event_id)
208
+
209
+ # Rebuild ordered history
210
+ self.event_history = list(local_by_id.values())
211
+ self.event_history.sort(
212
+ key=lambda e: (
213
+ e.get("hlc_time", 0),
214
+ e.get("hlc_counter", 0),
215
+ e.get("hlc_node", ""),
216
+ )
217
+ )
218
+
219
+ return {
220
+ "merged_events": len(self.event_history),
221
+ "conflicts": conflicts,
222
+ "conflict_count": len(conflicts),
223
+ "logical_clock": self.clock.logical_time,
224
+ }
225
+
226
+ def get_causal_order(self, event_id: str) -> List[str]:
227
+ """Get causal ordering for an event.
228
+
229
+ Args:
230
+ event_id: Event ID to get dependencies for
231
+
232
+ Returns:
233
+ List of event IDs that must precede this event
234
+ """
235
+ visited = set()
236
+ order = []
237
+
238
+ def dfs(eid: str):
239
+ if eid in visited:
240
+ return
241
+ visited.add(eid)
242
+
243
+ for dep in self.causal_graph.get(eid, []):
244
+ dfs(dep)
245
+
246
+ order.append(eid)
247
+
248
+ dfs(event_id)
249
+ return order[:-1] # Exclude the event itself
250
+
251
+ def detect_causal_violations(self) -> List[Dict[str, Any]]:
252
+ """Detect violations of causal ordering.
253
+
254
+ Returns:
255
+ List of violations found
256
+ """
257
+ violations = []
258
+ event_positions = {
259
+ e["id"]: i for i, e in enumerate(self.event_history) if "id" in e
260
+ }
261
+
262
+ for event_id, deps in self.causal_graph.items():
263
+ event_pos = event_positions.get(event_id)
264
+ if event_pos is None:
265
+ continue
266
+
267
+ for dep in deps:
268
+ dep_pos = event_positions.get(dep)
269
+ if dep_pos is None:
270
+ violations.append(
271
+ {
272
+ "type": "missing_dependency",
273
+ "event": event_id,
274
+ "missing": dep,
275
+ }
276
+ )
277
+ elif dep_pos > event_pos:
278
+ violations.append(
279
+ {
280
+ "type": "causal_violation",
281
+ "event": event_id,
282
+ "dependency": dep,
283
+ "event_position": event_pos,
284
+ "dependency_position": dep_pos,
285
+ }
286
+ )
287
+
288
+ return violations
289
+
290
+ def _generate_event_id(self, event: Dict[str, Any]) -> str:
291
+ """Generate unique event ID.
292
+
293
+ Args:
294
+ event: Event data
295
+
296
+ Returns:
297
+ Unique event ID
298
+ """
299
+ # Create deterministic ID from event content
300
+ content = json.dumps(event, sort_keys=True)
301
+ hash_obj = hashlib.sha256(content.encode())
302
+ return f"event_{hash_obj.hexdigest()[:16]}_{self.node_id}"
303
+
304
+ def _events_conflict(self, event1: Dict[str, Any], event2: Dict[str, Any]) -> bool:
305
+ """Check if two events conflict.
306
+
307
+ Args:
308
+ event1: First event
309
+ event2: Second event
310
+
311
+ Returns:
312
+ True if events conflict
313
+ """
314
+ # Events conflict if they have same ID but different content
315
+ if event1.get("id") != event2.get("id"):
316
+ return False
317
+
318
+ # Compare non-timestamp fields
319
+ e1_copy = {
320
+ k: v
321
+ for k, v in event1.items()
322
+ if not k.startswith("hlc_") and k != "timestamp"
323
+ }
324
+ e2_copy = {
325
+ k: v
326
+ for k, v in event2.items()
327
+ if not k.startswith("hlc_") and k != "timestamp"
328
+ }
329
+
330
+ return e1_copy != e2_copy
331
+
332
+ def _compare_event_timestamps(
333
+ self, event1: Dict[str, Any], event2: Dict[str, Any]
334
+ ) -> int:
335
+ """Compare event timestamps.
336
+
337
+ Args:
338
+ event1: First event
339
+ event2: Second event
340
+
341
+ Returns:
342
+ -1 if event1 < event2, 0 if equal, 1 if event1 > event2
343
+ """
344
+ ts1 = (
345
+ event1.get("hlc_time", 0),
346
+ event1.get("hlc_counter", 0),
347
+ event1.get("hlc_node", ""),
348
+ )
349
+ ts2 = (
350
+ event2.get("hlc_time", 0),
351
+ event2.get("hlc_counter", 0),
352
+ event2.get("hlc_node", ""),
353
+ )
354
+
355
+ return self.clock.compare(ts1, ts2)
@@ -0,0 +1,217 @@
1
+ """Edge leader election service using Raft consensus."""
2
+
3
+ import asyncio
4
+ import logging
5
+ from datetime import datetime, timedelta
6
+ from typing import Any, Dict, List, Optional
7
+
8
+ from .raft import RaftNode, RaftState
9
+
10
+
11
+ class EdgeLeaderElection:
12
+ """Leader election service for edge nodes using Raft consensus.
13
+
14
+ This service manages leader election across edge nodes, providing:
15
+ - Automatic leader election on startup
16
+ - Leader failure detection and re-election
17
+ - Stable leader information for coordination
18
+ - Network partition handling
19
+ """
20
+
21
+ def __init__(self, raft_nodes: Dict[str, RaftNode]):
22
+ """Initialize leader election service.
23
+
24
+ Args:
25
+ raft_nodes: Dictionary of node_id -> RaftNode instances
26
+ """
27
+ self.raft_nodes = raft_nodes
28
+ self.current_leader: Optional[str] = None
29
+ self.current_term: int = 0
30
+ self.last_leader_change = datetime.now()
31
+ self.stability_threshold = timedelta(seconds=5)
32
+ self.logger = logging.getLogger("EdgeLeaderElection")
33
+
34
+ # Election monitoring
35
+ self._monitor_task: Optional[asyncio.Task] = None
36
+ self._running = False
37
+
38
+ async def start(self):
39
+ """Start leader election monitoring."""
40
+ self._running = True
41
+ self._monitor_task = asyncio.create_task(self._monitor_leadership())
42
+ self.logger.info("Leader election service started")
43
+
44
+ async def stop(self):
45
+ """Stop leader election monitoring."""
46
+ self._running = False
47
+ if self._monitor_task:
48
+ self._monitor_task.cancel()
49
+ try:
50
+ await self._monitor_task
51
+ except asyncio.CancelledError:
52
+ pass
53
+ self.logger.info("Leader election service stopped")
54
+
55
+ async def start_election(self) -> Dict[str, Any]:
56
+ """Start a new leader election.
57
+
58
+ Returns:
59
+ Dict with election results including leader and term
60
+ """
61
+ self.logger.info("Starting new leader election")
62
+
63
+ # Find a candidate node to trigger election
64
+ candidate_nodes = [
65
+ node for node in self.raft_nodes.values() if node.state != RaftState.LEADER
66
+ ]
67
+
68
+ if not candidate_nodes:
69
+ # Current leader still active
70
+ return self.get_current_leader()
71
+
72
+ # Trigger election on first non-leader node
73
+ candidate = candidate_nodes[0]
74
+ candidate._become_candidate()
75
+ await candidate._collect_votes()
76
+
77
+ # Wait briefly for election to complete
78
+ await asyncio.sleep(0.1)
79
+
80
+ # Update and return leader info
81
+ self._update_leader_info()
82
+ return self.get_current_leader()
83
+
84
+ def get_current_leader(self) -> Dict[str, Any]:
85
+ """Get current leader information.
86
+
87
+ Returns:
88
+ Dict with leader ID, term, and stability status
89
+ """
90
+ self._update_leader_info()
91
+
92
+ stable = False
93
+ if self.current_leader:
94
+ time_since_change = datetime.now() - self.last_leader_change
95
+ stable = time_since_change > self.stability_threshold
96
+
97
+ return {
98
+ "leader": self.current_leader,
99
+ "term": self.current_term,
100
+ "stable": stable,
101
+ "time_since_change": (
102
+ datetime.now() - self.last_leader_change
103
+ ).total_seconds(),
104
+ }
105
+
106
+ def force_election(self) -> None:
107
+ """Force a new election by demoting current leader."""
108
+ for node_id, node in self.raft_nodes.items():
109
+ if node.state == RaftState.LEADER:
110
+ node._become_follower()
111
+ self.logger.info(f"Forced leader {node_id} to step down")
112
+ break
113
+
114
+ async def wait_for_stable_leader(self, timeout: float = 10.0) -> Dict[str, Any]:
115
+ """Wait for a stable leader to be elected.
116
+
117
+ Args:
118
+ timeout: Maximum time to wait in seconds
119
+
120
+ Returns:
121
+ Leader information once stable
122
+
123
+ Raises:
124
+ TimeoutError: If no stable leader within timeout
125
+ """
126
+ start_time = datetime.now()
127
+
128
+ while (datetime.now() - start_time).total_seconds() < timeout:
129
+ leader_info = self.get_current_leader()
130
+
131
+ if leader_info["leader"] and leader_info["stable"]:
132
+ return leader_info
133
+
134
+ await asyncio.sleep(0.1)
135
+
136
+ raise TimeoutError(f"No stable leader elected within {timeout} seconds")
137
+
138
+ def _update_leader_info(self):
139
+ """Update current leader information from Raft nodes."""
140
+ new_leader = None
141
+ new_term = 0
142
+
143
+ for node_id, node in self.raft_nodes.items():
144
+ if node.state == RaftState.LEADER:
145
+ new_leader = node_id
146
+ new_term = node.current_term
147
+ break
148
+
149
+ # Check if leader changed
150
+ if new_leader != self.current_leader or new_term != self.current_term:
151
+ self.current_leader = new_leader
152
+ self.current_term = new_term
153
+ self.last_leader_change = datetime.now()
154
+
155
+ if new_leader:
156
+ self.logger.info(f"New leader elected: {new_leader} (term {new_term})")
157
+ else:
158
+ self.logger.warning("No leader - cluster in election")
159
+
160
+ async def _monitor_leadership(self):
161
+ """Background task to monitor leadership stability."""
162
+ while self._running:
163
+ try:
164
+ self._update_leader_info()
165
+
166
+ # Check if we need to trigger election
167
+ leader_info = self.get_current_leader()
168
+ if not leader_info["leader"]:
169
+ # No leader for too long
170
+ time_without_leader = (
171
+ datetime.now() - self.last_leader_change
172
+ ).total_seconds()
173
+ if time_without_leader > 2.0: # 2 seconds without leader
174
+ self.logger.warning(
175
+ "No leader for 2 seconds, triggering election"
176
+ )
177
+ await self.start_election()
178
+
179
+ await asyncio.sleep(0.5) # Check every 500ms
180
+
181
+ except Exception as e:
182
+ self.logger.error(f"Leadership monitor error: {e}")
183
+
184
+ def get_cluster_health(self) -> Dict[str, Any]:
185
+ """Get health information about the cluster.
186
+
187
+ Returns:
188
+ Dict with cluster health metrics
189
+ """
190
+ total_nodes = len(self.raft_nodes)
191
+ leader_count = sum(
192
+ 1 for node in self.raft_nodes.values() if node.state == RaftState.LEADER
193
+ )
194
+ follower_count = sum(
195
+ 1 for node in self.raft_nodes.values() if node.state == RaftState.FOLLOWER
196
+ )
197
+ candidate_count = sum(
198
+ 1 for node in self.raft_nodes.values() if node.state == RaftState.CANDIDATE
199
+ )
200
+
201
+ # Check for split brain
202
+ split_brain = leader_count > 1
203
+
204
+ # Check for partitions
205
+ has_quorum = (follower_count + leader_count) > total_nodes // 2
206
+
207
+ return {
208
+ "total_nodes": total_nodes,
209
+ "leader_count": leader_count,
210
+ "follower_count": follower_count,
211
+ "candidate_count": candidate_count,
212
+ "split_brain": split_brain,
213
+ "has_quorum": has_quorum,
214
+ "current_leader": self.current_leader,
215
+ "current_term": self.current_term,
216
+ "healthy": leader_count == 1 and has_quorum and not split_brain,
217
+ }