kailash 0.8.3__py3-none-any.whl → 0.8.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. kailash/__init__.py +1 -7
  2. kailash/cli/__init__.py +11 -1
  3. kailash/cli/validation_audit.py +570 -0
  4. kailash/core/actors/supervisor.py +1 -1
  5. kailash/core/resilience/circuit_breaker.py +71 -1
  6. kailash/core/resilience/health_monitor.py +172 -0
  7. kailash/edge/compliance.py +33 -0
  8. kailash/edge/consistency.py +609 -0
  9. kailash/edge/coordination/__init__.py +30 -0
  10. kailash/edge/coordination/global_ordering.py +355 -0
  11. kailash/edge/coordination/leader_election.py +217 -0
  12. kailash/edge/coordination/partition_detector.py +296 -0
  13. kailash/edge/coordination/raft.py +485 -0
  14. kailash/edge/discovery.py +63 -1
  15. kailash/edge/migration/__init__.py +19 -0
  16. kailash/edge/migration/edge_migrator.py +832 -0
  17. kailash/edge/monitoring/__init__.py +21 -0
  18. kailash/edge/monitoring/edge_monitor.py +736 -0
  19. kailash/edge/prediction/__init__.py +10 -0
  20. kailash/edge/prediction/predictive_warmer.py +591 -0
  21. kailash/edge/resource/__init__.py +102 -0
  22. kailash/edge/resource/cloud_integration.py +796 -0
  23. kailash/edge/resource/cost_optimizer.py +949 -0
  24. kailash/edge/resource/docker_integration.py +919 -0
  25. kailash/edge/resource/kubernetes_integration.py +893 -0
  26. kailash/edge/resource/platform_integration.py +913 -0
  27. kailash/edge/resource/predictive_scaler.py +959 -0
  28. kailash/edge/resource/resource_analyzer.py +824 -0
  29. kailash/edge/resource/resource_pools.py +610 -0
  30. kailash/integrations/dataflow_edge.py +261 -0
  31. kailash/mcp_server/registry_integration.py +1 -1
  32. kailash/monitoring/__init__.py +18 -0
  33. kailash/monitoring/alerts.py +646 -0
  34. kailash/monitoring/metrics.py +677 -0
  35. kailash/nodes/__init__.py +2 -0
  36. kailash/nodes/ai/__init__.py +17 -0
  37. kailash/nodes/ai/a2a.py +1914 -43
  38. kailash/nodes/ai/a2a_backup.py +1807 -0
  39. kailash/nodes/ai/hybrid_search.py +972 -0
  40. kailash/nodes/ai/semantic_memory.py +558 -0
  41. kailash/nodes/ai/streaming_analytics.py +947 -0
  42. kailash/nodes/base.py +545 -0
  43. kailash/nodes/edge/__init__.py +36 -0
  44. kailash/nodes/edge/base.py +240 -0
  45. kailash/nodes/edge/cloud_node.py +710 -0
  46. kailash/nodes/edge/coordination.py +239 -0
  47. kailash/nodes/edge/docker_node.py +825 -0
  48. kailash/nodes/edge/edge_data.py +582 -0
  49. kailash/nodes/edge/edge_migration_node.py +392 -0
  50. kailash/nodes/edge/edge_monitoring_node.py +421 -0
  51. kailash/nodes/edge/edge_state.py +673 -0
  52. kailash/nodes/edge/edge_warming_node.py +393 -0
  53. kailash/nodes/edge/kubernetes_node.py +652 -0
  54. kailash/nodes/edge/platform_node.py +766 -0
  55. kailash/nodes/edge/resource_analyzer_node.py +378 -0
  56. kailash/nodes/edge/resource_optimizer_node.py +501 -0
  57. kailash/nodes/edge/resource_scaler_node.py +397 -0
  58. kailash/nodes/ports.py +676 -0
  59. kailash/runtime/local.py +344 -1
  60. kailash/runtime/validation/__init__.py +20 -0
  61. kailash/runtime/validation/connection_context.py +119 -0
  62. kailash/runtime/validation/enhanced_error_formatter.py +202 -0
  63. kailash/runtime/validation/error_categorizer.py +164 -0
  64. kailash/runtime/validation/metrics.py +380 -0
  65. kailash/runtime/validation/performance.py +615 -0
  66. kailash/runtime/validation/suggestion_engine.py +212 -0
  67. kailash/testing/fixtures.py +2 -2
  68. kailash/workflow/builder.py +234 -8
  69. kailash/workflow/contracts.py +418 -0
  70. kailash/workflow/edge_infrastructure.py +369 -0
  71. kailash/workflow/migration.py +3 -3
  72. kailash/workflow/type_inference.py +669 -0
  73. {kailash-0.8.3.dist-info → kailash-0.8.5.dist-info}/METADATA +44 -27
  74. {kailash-0.8.3.dist-info → kailash-0.8.5.dist-info}/RECORD +78 -28
  75. kailash/nexus/__init__.py +0 -21
  76. kailash/nexus/cli/__init__.py +0 -5
  77. kailash/nexus/cli/__main__.py +0 -6
  78. kailash/nexus/cli/main.py +0 -176
  79. kailash/nexus/factory.py +0 -413
  80. kailash/nexus/gateway.py +0 -545
  81. {kailash-0.8.3.dist-info → kailash-0.8.5.dist-info}/WHEEL +0 -0
  82. {kailash-0.8.3.dist-info → kailash-0.8.5.dist-info}/entry_points.txt +0 -0
  83. {kailash-0.8.3.dist-info → kailash-0.8.5.dist-info}/licenses/LICENSE +0 -0
  84. {kailash-0.8.3.dist-info → kailash-0.8.5.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,421 @@
1
+ """Edge monitoring node for comprehensive edge observability.
2
+
3
+ This node integrates edge monitoring capabilities into workflows,
4
+ providing metrics collection, health monitoring, alerting, and analytics.
5
+ """
6
+
7
+ import asyncio
8
+ from datetime import datetime, timedelta
9
+ from typing import Any, Dict, List, Optional
10
+
11
+ from kailash.edge.monitoring.edge_monitor import (
12
+ AlertSeverity,
13
+ EdgeMetric,
14
+ EdgeMonitor,
15
+ HealthStatus,
16
+ MetricType,
17
+ )
18
+ from kailash.nodes.base import NodeParameter, register_node
19
+ from kailash.nodes.base_async import AsyncNode
20
+
21
+
22
+ @register_node()
23
+ class EdgeMonitoringNode(AsyncNode):
24
+ """Node for edge monitoring and observability operations.
25
+
26
+ This node provides comprehensive monitoring capabilities for edge nodes
27
+ including metrics collection, health checks, alerting, and analytics.
28
+
29
+ Example:
30
+ >>> # Record a metric
31
+ >>> result = await monitoring_node.execute_async(
32
+ ... operation="record_metric",
33
+ ... edge_node="edge-west-1",
34
+ ... metric_type="latency",
35
+ ... value=0.250,
36
+ ... tags={"region": "us-west", "service": "api"}
37
+ ... )
38
+
39
+ >>> # Get health status
40
+ >>> result = await monitoring_node.execute_async(
41
+ ... operation="get_health",
42
+ ... edge_node="edge-west-1"
43
+ ... )
44
+
45
+ >>> # Query metrics
46
+ >>> result = await monitoring_node.execute_async(
47
+ ... operation="query_metrics",
48
+ ... edge_node="edge-west-1",
49
+ ... metric_type="latency",
50
+ ... time_range_minutes=60
51
+ ... )
52
+
53
+ >>> # Get analytics
54
+ >>> result = await monitoring_node.execute_async(
55
+ ... operation="get_analytics",
56
+ ... edge_node="edge-west-1"
57
+ ... )
58
+ """
59
+
60
+ def __init__(self, **kwargs):
61
+ """Initialize edge monitoring node."""
62
+ super().__init__(**kwargs)
63
+
64
+ # Extract configuration
65
+ retention_period = kwargs.get("retention_period", 24 * 60 * 60)
66
+ alert_cooldown = kwargs.get("alert_cooldown", 300)
67
+ health_check_interval = kwargs.get("health_check_interval", 30)
68
+ anomaly_detection = kwargs.get("anomaly_detection", True)
69
+
70
+ # Initialize monitor
71
+ self.monitor = EdgeMonitor(
72
+ retention_period=retention_period,
73
+ alert_cooldown=alert_cooldown,
74
+ health_check_interval=health_check_interval,
75
+ anomaly_detection=anomaly_detection,
76
+ )
77
+
78
+ self._monitor_started = False
79
+
80
+ @property
81
+ def input_parameters(self) -> Dict[str, NodeParameter]:
82
+ """Define input parameters."""
83
+ return {
84
+ "operation": NodeParameter(
85
+ name="operation",
86
+ type=str,
87
+ required=True,
88
+ description="Operation to perform (record_metric, query_metrics, get_health, get_alerts, get_analytics, set_threshold, get_summary, start_monitor, stop_monitor)",
89
+ ),
90
+ # For record_metric
91
+ "edge_node": NodeParameter(
92
+ name="edge_node",
93
+ type=str,
94
+ required=False,
95
+ description="Edge node identifier",
96
+ ),
97
+ "metric_type": NodeParameter(
98
+ name="metric_type",
99
+ type=str,
100
+ required=False,
101
+ description="Type of metric (latency, throughput, error_rate, resource_usage, availability, cache_hit_rate)",
102
+ ),
103
+ "value": NodeParameter(
104
+ name="value", type=float, required=False, description="Metric value"
105
+ ),
106
+ "tags": NodeParameter(
107
+ name="tags",
108
+ type=dict,
109
+ required=False,
110
+ default={},
111
+ description="Metric tags for filtering",
112
+ ),
113
+ # For query_metrics
114
+ "time_range_minutes": NodeParameter(
115
+ name="time_range_minutes",
116
+ type=int,
117
+ required=False,
118
+ default=60,
119
+ description="Time range in minutes for queries",
120
+ ),
121
+ # For get_alerts
122
+ "severity": NodeParameter(
123
+ name="severity",
124
+ type=str,
125
+ required=False,
126
+ description="Alert severity filter (info, warning, error, critical)",
127
+ ),
128
+ "active_only": NodeParameter(
129
+ name="active_only",
130
+ type=bool,
131
+ required=False,
132
+ default=False,
133
+ description="Only return active alerts",
134
+ ),
135
+ # For set_threshold
136
+ "threshold_value": NodeParameter(
137
+ name="threshold_value",
138
+ type=float,
139
+ required=False,
140
+ description="Threshold value to set",
141
+ ),
142
+ # Configuration
143
+ "retention_period": NodeParameter(
144
+ name="retention_period",
145
+ type=int,
146
+ required=False,
147
+ default=24 * 60 * 60,
148
+ description="Metrics retention period (seconds)",
149
+ ),
150
+ "alert_cooldown": NodeParameter(
151
+ name="alert_cooldown",
152
+ type=int,
153
+ required=False,
154
+ default=300,
155
+ description="Alert cooldown period (seconds)",
156
+ ),
157
+ "health_check_interval": NodeParameter(
158
+ name="health_check_interval",
159
+ type=int,
160
+ required=False,
161
+ default=30,
162
+ description="Health check interval (seconds)",
163
+ ),
164
+ "anomaly_detection": NodeParameter(
165
+ name="anomaly_detection",
166
+ type=bool,
167
+ required=False,
168
+ default=True,
169
+ description="Enable anomaly detection",
170
+ ),
171
+ }
172
+
173
+ @property
174
+ def output_parameters(self) -> Dict[str, NodeParameter]:
175
+ """Define output parameters."""
176
+ return {
177
+ "status": NodeParameter(
178
+ name="status", type=str, description="Operation status"
179
+ ),
180
+ "metrics": NodeParameter(
181
+ name="metrics", type=list, required=False, description="List of metrics"
182
+ ),
183
+ "health": NodeParameter(
184
+ name="health",
185
+ type=dict,
186
+ required=False,
187
+ description="Health status information",
188
+ ),
189
+ "alerts": NodeParameter(
190
+ name="alerts", type=list, required=False, description="List of alerts"
191
+ ),
192
+ "analytics": NodeParameter(
193
+ name="analytics",
194
+ type=dict,
195
+ required=False,
196
+ description="Analytics summary",
197
+ ),
198
+ "summary": NodeParameter(
199
+ name="summary",
200
+ type=dict,
201
+ required=False,
202
+ description="Overall monitoring summary",
203
+ ),
204
+ "metric_recorded": NodeParameter(
205
+ name="metric_recorded",
206
+ type=bool,
207
+ required=False,
208
+ description="Whether metric was recorded",
209
+ ),
210
+ "monitor_active": NodeParameter(
211
+ name="monitor_active",
212
+ type=bool,
213
+ required=False,
214
+ description="Whether monitor is active",
215
+ ),
216
+ }
217
+
218
+ def get_parameters(self) -> Dict[str, NodeParameter]:
219
+ """Get all node parameters for compatibility."""
220
+ return self.input_parameters
221
+
222
+ async def async_run(self, **kwargs) -> Dict[str, Any]:
223
+ """Execute monitoring operation."""
224
+ operation = kwargs["operation"]
225
+
226
+ try:
227
+ if operation == "record_metric":
228
+ return await self._record_metric(kwargs)
229
+ elif operation == "query_metrics":
230
+ return await self._query_metrics(kwargs)
231
+ elif operation == "get_health":
232
+ return await self._get_health(kwargs)
233
+ elif operation == "get_alerts":
234
+ return await self._get_alerts(kwargs)
235
+ elif operation == "get_analytics":
236
+ return await self._get_analytics(kwargs)
237
+ elif operation == "set_threshold":
238
+ return await self._set_threshold(kwargs)
239
+ elif operation == "get_summary":
240
+ return await self._get_summary()
241
+ elif operation == "start_monitor":
242
+ return await self._start_monitor()
243
+ elif operation == "stop_monitor":
244
+ return await self._stop_monitor()
245
+ else:
246
+ raise ValueError(f"Unknown operation: {operation}")
247
+
248
+ except Exception as e:
249
+ self.logger.error(f"Edge monitoring operation failed: {str(e)}")
250
+ return {"status": "error", "error": str(e)}
251
+
252
+ async def _record_metric(self, kwargs: Dict[str, Any]) -> Dict[str, Any]:
253
+ """Record a metric."""
254
+ # Create metric
255
+ try:
256
+ metric_type = MetricType(kwargs.get("metric_type", "latency"))
257
+ except ValueError:
258
+ metric_type = MetricType.LATENCY
259
+
260
+ metric = EdgeMetric(
261
+ timestamp=datetime.now(),
262
+ edge_node=kwargs.get("edge_node", "unknown"),
263
+ metric_type=metric_type,
264
+ value=kwargs.get("value", 0.0),
265
+ tags=kwargs.get("tags", {}),
266
+ )
267
+
268
+ # Record metric
269
+ await self.monitor.record_metric(metric)
270
+
271
+ return {
272
+ "status": "success",
273
+ "metric_recorded": True,
274
+ "metric": metric.to_dict(),
275
+ }
276
+
277
+ async def _query_metrics(self, kwargs: Dict[str, Any]) -> Dict[str, Any]:
278
+ """Query metrics."""
279
+ # Parse parameters
280
+ edge_node = kwargs.get("edge_node")
281
+ metric_type_str = kwargs.get("metric_type")
282
+ time_range_minutes = kwargs.get("time_range_minutes", 60)
283
+ tags = kwargs.get("tags")
284
+
285
+ # Parse metric type
286
+ metric_type = None
287
+ if metric_type_str:
288
+ try:
289
+ metric_type = MetricType(metric_type_str)
290
+ except ValueError:
291
+ pass
292
+
293
+ # Calculate time range
294
+ end_time = datetime.now()
295
+ start_time = end_time - timedelta(minutes=time_range_minutes)
296
+
297
+ # Query metrics
298
+ metrics = await self.monitor.get_metrics(
299
+ edge_node=edge_node,
300
+ metric_type=metric_type,
301
+ start_time=start_time,
302
+ end_time=end_time,
303
+ tags=tags,
304
+ )
305
+
306
+ # Format results
307
+ return {
308
+ "status": "success",
309
+ "metrics": [m.to_dict() for m in metrics],
310
+ "count": len(metrics),
311
+ "time_range": {
312
+ "start": start_time.isoformat(),
313
+ "end": end_time.isoformat(),
314
+ },
315
+ }
316
+
317
+ async def _get_health(self, kwargs: Dict[str, Any]) -> Dict[str, Any]:
318
+ """Get health status."""
319
+ edge_node = kwargs.get("edge_node", "unknown")
320
+
321
+ health = await self.monitor.get_edge_health(edge_node)
322
+
323
+ return {"status": "success", "health": health.to_dict()}
324
+
325
+ async def _get_alerts(self, kwargs: Dict[str, Any]) -> Dict[str, Any]:
326
+ """Get alerts."""
327
+ # Parse parameters
328
+ edge_node = kwargs.get("edge_node")
329
+ severity_str = kwargs.get("severity")
330
+ active_only = kwargs.get("active_only", False)
331
+ time_range_minutes = kwargs.get("time_range_minutes", 60)
332
+
333
+ # Parse severity
334
+ severity = None
335
+ if severity_str:
336
+ try:
337
+ severity = AlertSeverity(severity_str)
338
+ except ValueError:
339
+ pass
340
+
341
+ # Calculate time range
342
+ start_time = datetime.now() - timedelta(minutes=time_range_minutes)
343
+
344
+ # Get alerts
345
+ alerts = await self.monitor.get_alerts(
346
+ edge_node=edge_node,
347
+ severity=severity,
348
+ start_time=start_time,
349
+ active_only=active_only,
350
+ )
351
+
352
+ # Format results
353
+ return {
354
+ "status": "success",
355
+ "alerts": [a.to_dict() for a in alerts],
356
+ "count": len(alerts),
357
+ "active_count": len(
358
+ [a for a in alerts if active_only or True]
359
+ ), # TODO: proper active check
360
+ }
361
+
362
+ async def _get_analytics(self, kwargs: Dict[str, Any]) -> Dict[str, Any]:
363
+ """Get analytics for an edge node."""
364
+ edge_node = kwargs.get("edge_node", "unknown")
365
+
366
+ analytics = self.monitor.get_analytics(edge_node)
367
+
368
+ return {"status": "success", "analytics": analytics}
369
+
370
+ async def _set_threshold(self, kwargs: Dict[str, Any]) -> Dict[str, Any]:
371
+ """Set alert threshold."""
372
+ metric_type_str = kwargs.get("metric_type", "latency")
373
+ severity = kwargs.get("severity", "warning")
374
+ value = kwargs.get("threshold_value", 1.0)
375
+
376
+ # Parse metric type
377
+ try:
378
+ metric_type = MetricType(metric_type_str)
379
+ except ValueError:
380
+ return {
381
+ "status": "error",
382
+ "error": f"Invalid metric type: {metric_type_str}",
383
+ }
384
+
385
+ # Set threshold
386
+ self.monitor.set_threshold(metric_type, severity, value)
387
+
388
+ return {
389
+ "status": "success",
390
+ "threshold_set": True,
391
+ "metric_type": metric_type.value,
392
+ "severity": severity,
393
+ "value": value,
394
+ }
395
+
396
+ async def _get_summary(self) -> Dict[str, Any]:
397
+ """Get monitoring summary."""
398
+ summary = self.monitor.get_summary()
399
+
400
+ return {"status": "success", "summary": summary}
401
+
402
+ async def _start_monitor(self) -> Dict[str, Any]:
403
+ """Start monitoring service."""
404
+ if not self._monitor_started:
405
+ await self.monitor.start()
406
+ self._monitor_started = True
407
+
408
+ return {"status": "success", "monitor_active": True}
409
+
410
+ async def _stop_monitor(self) -> Dict[str, Any]:
411
+ """Stop monitoring service."""
412
+ if self._monitor_started:
413
+ await self.monitor.stop()
414
+ self._monitor_started = False
415
+
416
+ return {"status": "success", "monitor_active": False}
417
+
418
+ async def cleanup(self):
419
+ """Clean up resources."""
420
+ if self._monitor_started:
421
+ await self.monitor.stop()