kailash 0.8.4__py3-none-any.whl → 0.8.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kailash/__init__.py +1 -7
- kailash/cli/__init__.py +11 -1
- kailash/cli/validation_audit.py +570 -0
- kailash/core/actors/supervisor.py +1 -1
- kailash/core/resilience/circuit_breaker.py +71 -1
- kailash/core/resilience/health_monitor.py +172 -0
- kailash/edge/compliance.py +33 -0
- kailash/edge/consistency.py +609 -0
- kailash/edge/coordination/__init__.py +30 -0
- kailash/edge/coordination/global_ordering.py +355 -0
- kailash/edge/coordination/leader_election.py +217 -0
- kailash/edge/coordination/partition_detector.py +296 -0
- kailash/edge/coordination/raft.py +485 -0
- kailash/edge/discovery.py +63 -1
- kailash/edge/migration/__init__.py +19 -0
- kailash/edge/migration/edge_migrator.py +832 -0
- kailash/edge/monitoring/__init__.py +21 -0
- kailash/edge/monitoring/edge_monitor.py +736 -0
- kailash/edge/prediction/__init__.py +10 -0
- kailash/edge/prediction/predictive_warmer.py +591 -0
- kailash/edge/resource/__init__.py +102 -0
- kailash/edge/resource/cloud_integration.py +796 -0
- kailash/edge/resource/cost_optimizer.py +949 -0
- kailash/edge/resource/docker_integration.py +919 -0
- kailash/edge/resource/kubernetes_integration.py +893 -0
- kailash/edge/resource/platform_integration.py +913 -0
- kailash/edge/resource/predictive_scaler.py +959 -0
- kailash/edge/resource/resource_analyzer.py +824 -0
- kailash/edge/resource/resource_pools.py +610 -0
- kailash/integrations/dataflow_edge.py +261 -0
- kailash/mcp_server/registry_integration.py +1 -1
- kailash/monitoring/__init__.py +18 -0
- kailash/monitoring/alerts.py +646 -0
- kailash/monitoring/metrics.py +677 -0
- kailash/nodes/__init__.py +2 -0
- kailash/nodes/ai/semantic_memory.py +2 -2
- kailash/nodes/base.py +545 -0
- kailash/nodes/edge/__init__.py +36 -0
- kailash/nodes/edge/base.py +240 -0
- kailash/nodes/edge/cloud_node.py +710 -0
- kailash/nodes/edge/coordination.py +239 -0
- kailash/nodes/edge/docker_node.py +825 -0
- kailash/nodes/edge/edge_data.py +582 -0
- kailash/nodes/edge/edge_migration_node.py +392 -0
- kailash/nodes/edge/edge_monitoring_node.py +421 -0
- kailash/nodes/edge/edge_state.py +673 -0
- kailash/nodes/edge/edge_warming_node.py +393 -0
- kailash/nodes/edge/kubernetes_node.py +652 -0
- kailash/nodes/edge/platform_node.py +766 -0
- kailash/nodes/edge/resource_analyzer_node.py +378 -0
- kailash/nodes/edge/resource_optimizer_node.py +501 -0
- kailash/nodes/edge/resource_scaler_node.py +397 -0
- kailash/nodes/ports.py +676 -0
- kailash/runtime/local.py +344 -1
- kailash/runtime/validation/__init__.py +20 -0
- kailash/runtime/validation/connection_context.py +119 -0
- kailash/runtime/validation/enhanced_error_formatter.py +202 -0
- kailash/runtime/validation/error_categorizer.py +164 -0
- kailash/runtime/validation/metrics.py +380 -0
- kailash/runtime/validation/performance.py +615 -0
- kailash/runtime/validation/suggestion_engine.py +212 -0
- kailash/testing/fixtures.py +2 -2
- kailash/workflow/builder.py +230 -4
- kailash/workflow/contracts.py +418 -0
- kailash/workflow/edge_infrastructure.py +369 -0
- kailash/workflow/migration.py +3 -3
- kailash/workflow/type_inference.py +669 -0
- {kailash-0.8.4.dist-info → kailash-0.8.5.dist-info}/METADATA +43 -27
- {kailash-0.8.4.dist-info → kailash-0.8.5.dist-info}/RECORD +73 -27
- kailash/nexus/__init__.py +0 -21
- kailash/nexus/cli/__init__.py +0 -5
- kailash/nexus/cli/__main__.py +0 -6
- kailash/nexus/cli/main.py +0 -176
- kailash/nexus/factory.py +0 -413
- kailash/nexus/gateway.py +0 -545
- {kailash-0.8.4.dist-info → kailash-0.8.5.dist-info}/WHEEL +0 -0
- {kailash-0.8.4.dist-info → kailash-0.8.5.dist-info}/entry_points.txt +0 -0
- {kailash-0.8.4.dist-info → kailash-0.8.5.dist-info}/licenses/LICENSE +0 -0
- {kailash-0.8.4.dist-info → kailash-0.8.5.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,421 @@
|
|
1
|
+
"""Edge monitoring node for comprehensive edge observability.
|
2
|
+
|
3
|
+
This node integrates edge monitoring capabilities into workflows,
|
4
|
+
providing metrics collection, health monitoring, alerting, and analytics.
|
5
|
+
"""
|
6
|
+
|
7
|
+
import asyncio
|
8
|
+
from datetime import datetime, timedelta
|
9
|
+
from typing import Any, Dict, List, Optional
|
10
|
+
|
11
|
+
from kailash.edge.monitoring.edge_monitor import (
|
12
|
+
AlertSeverity,
|
13
|
+
EdgeMetric,
|
14
|
+
EdgeMonitor,
|
15
|
+
HealthStatus,
|
16
|
+
MetricType,
|
17
|
+
)
|
18
|
+
from kailash.nodes.base import NodeParameter, register_node
|
19
|
+
from kailash.nodes.base_async import AsyncNode
|
20
|
+
|
21
|
+
|
22
|
+
@register_node()
|
23
|
+
class EdgeMonitoringNode(AsyncNode):
|
24
|
+
"""Node for edge monitoring and observability operations.
|
25
|
+
|
26
|
+
This node provides comprehensive monitoring capabilities for edge nodes
|
27
|
+
including metrics collection, health checks, alerting, and analytics.
|
28
|
+
|
29
|
+
Example:
|
30
|
+
>>> # Record a metric
|
31
|
+
>>> result = await monitoring_node.execute_async(
|
32
|
+
... operation="record_metric",
|
33
|
+
... edge_node="edge-west-1",
|
34
|
+
... metric_type="latency",
|
35
|
+
... value=0.250,
|
36
|
+
... tags={"region": "us-west", "service": "api"}
|
37
|
+
... )
|
38
|
+
|
39
|
+
>>> # Get health status
|
40
|
+
>>> result = await monitoring_node.execute_async(
|
41
|
+
... operation="get_health",
|
42
|
+
... edge_node="edge-west-1"
|
43
|
+
... )
|
44
|
+
|
45
|
+
>>> # Query metrics
|
46
|
+
>>> result = await monitoring_node.execute_async(
|
47
|
+
... operation="query_metrics",
|
48
|
+
... edge_node="edge-west-1",
|
49
|
+
... metric_type="latency",
|
50
|
+
... time_range_minutes=60
|
51
|
+
... )
|
52
|
+
|
53
|
+
>>> # Get analytics
|
54
|
+
>>> result = await monitoring_node.execute_async(
|
55
|
+
... operation="get_analytics",
|
56
|
+
... edge_node="edge-west-1"
|
57
|
+
... )
|
58
|
+
"""
|
59
|
+
|
60
|
+
def __init__(self, **kwargs):
|
61
|
+
"""Initialize edge monitoring node."""
|
62
|
+
super().__init__(**kwargs)
|
63
|
+
|
64
|
+
# Extract configuration
|
65
|
+
retention_period = kwargs.get("retention_period", 24 * 60 * 60)
|
66
|
+
alert_cooldown = kwargs.get("alert_cooldown", 300)
|
67
|
+
health_check_interval = kwargs.get("health_check_interval", 30)
|
68
|
+
anomaly_detection = kwargs.get("anomaly_detection", True)
|
69
|
+
|
70
|
+
# Initialize monitor
|
71
|
+
self.monitor = EdgeMonitor(
|
72
|
+
retention_period=retention_period,
|
73
|
+
alert_cooldown=alert_cooldown,
|
74
|
+
health_check_interval=health_check_interval,
|
75
|
+
anomaly_detection=anomaly_detection,
|
76
|
+
)
|
77
|
+
|
78
|
+
self._monitor_started = False
|
79
|
+
|
80
|
+
@property
|
81
|
+
def input_parameters(self) -> Dict[str, NodeParameter]:
|
82
|
+
"""Define input parameters."""
|
83
|
+
return {
|
84
|
+
"operation": NodeParameter(
|
85
|
+
name="operation",
|
86
|
+
type=str,
|
87
|
+
required=True,
|
88
|
+
description="Operation to perform (record_metric, query_metrics, get_health, get_alerts, get_analytics, set_threshold, get_summary, start_monitor, stop_monitor)",
|
89
|
+
),
|
90
|
+
# For record_metric
|
91
|
+
"edge_node": NodeParameter(
|
92
|
+
name="edge_node",
|
93
|
+
type=str,
|
94
|
+
required=False,
|
95
|
+
description="Edge node identifier",
|
96
|
+
),
|
97
|
+
"metric_type": NodeParameter(
|
98
|
+
name="metric_type",
|
99
|
+
type=str,
|
100
|
+
required=False,
|
101
|
+
description="Type of metric (latency, throughput, error_rate, resource_usage, availability, cache_hit_rate)",
|
102
|
+
),
|
103
|
+
"value": NodeParameter(
|
104
|
+
name="value", type=float, required=False, description="Metric value"
|
105
|
+
),
|
106
|
+
"tags": NodeParameter(
|
107
|
+
name="tags",
|
108
|
+
type=dict,
|
109
|
+
required=False,
|
110
|
+
default={},
|
111
|
+
description="Metric tags for filtering",
|
112
|
+
),
|
113
|
+
# For query_metrics
|
114
|
+
"time_range_minutes": NodeParameter(
|
115
|
+
name="time_range_minutes",
|
116
|
+
type=int,
|
117
|
+
required=False,
|
118
|
+
default=60,
|
119
|
+
description="Time range in minutes for queries",
|
120
|
+
),
|
121
|
+
# For get_alerts
|
122
|
+
"severity": NodeParameter(
|
123
|
+
name="severity",
|
124
|
+
type=str,
|
125
|
+
required=False,
|
126
|
+
description="Alert severity filter (info, warning, error, critical)",
|
127
|
+
),
|
128
|
+
"active_only": NodeParameter(
|
129
|
+
name="active_only",
|
130
|
+
type=bool,
|
131
|
+
required=False,
|
132
|
+
default=False,
|
133
|
+
description="Only return active alerts",
|
134
|
+
),
|
135
|
+
# For set_threshold
|
136
|
+
"threshold_value": NodeParameter(
|
137
|
+
name="threshold_value",
|
138
|
+
type=float,
|
139
|
+
required=False,
|
140
|
+
description="Threshold value to set",
|
141
|
+
),
|
142
|
+
# Configuration
|
143
|
+
"retention_period": NodeParameter(
|
144
|
+
name="retention_period",
|
145
|
+
type=int,
|
146
|
+
required=False,
|
147
|
+
default=24 * 60 * 60,
|
148
|
+
description="Metrics retention period (seconds)",
|
149
|
+
),
|
150
|
+
"alert_cooldown": NodeParameter(
|
151
|
+
name="alert_cooldown",
|
152
|
+
type=int,
|
153
|
+
required=False,
|
154
|
+
default=300,
|
155
|
+
description="Alert cooldown period (seconds)",
|
156
|
+
),
|
157
|
+
"health_check_interval": NodeParameter(
|
158
|
+
name="health_check_interval",
|
159
|
+
type=int,
|
160
|
+
required=False,
|
161
|
+
default=30,
|
162
|
+
description="Health check interval (seconds)",
|
163
|
+
),
|
164
|
+
"anomaly_detection": NodeParameter(
|
165
|
+
name="anomaly_detection",
|
166
|
+
type=bool,
|
167
|
+
required=False,
|
168
|
+
default=True,
|
169
|
+
description="Enable anomaly detection",
|
170
|
+
),
|
171
|
+
}
|
172
|
+
|
173
|
+
@property
|
174
|
+
def output_parameters(self) -> Dict[str, NodeParameter]:
|
175
|
+
"""Define output parameters."""
|
176
|
+
return {
|
177
|
+
"status": NodeParameter(
|
178
|
+
name="status", type=str, description="Operation status"
|
179
|
+
),
|
180
|
+
"metrics": NodeParameter(
|
181
|
+
name="metrics", type=list, required=False, description="List of metrics"
|
182
|
+
),
|
183
|
+
"health": NodeParameter(
|
184
|
+
name="health",
|
185
|
+
type=dict,
|
186
|
+
required=False,
|
187
|
+
description="Health status information",
|
188
|
+
),
|
189
|
+
"alerts": NodeParameter(
|
190
|
+
name="alerts", type=list, required=False, description="List of alerts"
|
191
|
+
),
|
192
|
+
"analytics": NodeParameter(
|
193
|
+
name="analytics",
|
194
|
+
type=dict,
|
195
|
+
required=False,
|
196
|
+
description="Analytics summary",
|
197
|
+
),
|
198
|
+
"summary": NodeParameter(
|
199
|
+
name="summary",
|
200
|
+
type=dict,
|
201
|
+
required=False,
|
202
|
+
description="Overall monitoring summary",
|
203
|
+
),
|
204
|
+
"metric_recorded": NodeParameter(
|
205
|
+
name="metric_recorded",
|
206
|
+
type=bool,
|
207
|
+
required=False,
|
208
|
+
description="Whether metric was recorded",
|
209
|
+
),
|
210
|
+
"monitor_active": NodeParameter(
|
211
|
+
name="monitor_active",
|
212
|
+
type=bool,
|
213
|
+
required=False,
|
214
|
+
description="Whether monitor is active",
|
215
|
+
),
|
216
|
+
}
|
217
|
+
|
218
|
+
def get_parameters(self) -> Dict[str, NodeParameter]:
|
219
|
+
"""Get all node parameters for compatibility."""
|
220
|
+
return self.input_parameters
|
221
|
+
|
222
|
+
async def async_run(self, **kwargs) -> Dict[str, Any]:
|
223
|
+
"""Execute monitoring operation."""
|
224
|
+
operation = kwargs["operation"]
|
225
|
+
|
226
|
+
try:
|
227
|
+
if operation == "record_metric":
|
228
|
+
return await self._record_metric(kwargs)
|
229
|
+
elif operation == "query_metrics":
|
230
|
+
return await self._query_metrics(kwargs)
|
231
|
+
elif operation == "get_health":
|
232
|
+
return await self._get_health(kwargs)
|
233
|
+
elif operation == "get_alerts":
|
234
|
+
return await self._get_alerts(kwargs)
|
235
|
+
elif operation == "get_analytics":
|
236
|
+
return await self._get_analytics(kwargs)
|
237
|
+
elif operation == "set_threshold":
|
238
|
+
return await self._set_threshold(kwargs)
|
239
|
+
elif operation == "get_summary":
|
240
|
+
return await self._get_summary()
|
241
|
+
elif operation == "start_monitor":
|
242
|
+
return await self._start_monitor()
|
243
|
+
elif operation == "stop_monitor":
|
244
|
+
return await self._stop_monitor()
|
245
|
+
else:
|
246
|
+
raise ValueError(f"Unknown operation: {operation}")
|
247
|
+
|
248
|
+
except Exception as e:
|
249
|
+
self.logger.error(f"Edge monitoring operation failed: {str(e)}")
|
250
|
+
return {"status": "error", "error": str(e)}
|
251
|
+
|
252
|
+
async def _record_metric(self, kwargs: Dict[str, Any]) -> Dict[str, Any]:
|
253
|
+
"""Record a metric."""
|
254
|
+
# Create metric
|
255
|
+
try:
|
256
|
+
metric_type = MetricType(kwargs.get("metric_type", "latency"))
|
257
|
+
except ValueError:
|
258
|
+
metric_type = MetricType.LATENCY
|
259
|
+
|
260
|
+
metric = EdgeMetric(
|
261
|
+
timestamp=datetime.now(),
|
262
|
+
edge_node=kwargs.get("edge_node", "unknown"),
|
263
|
+
metric_type=metric_type,
|
264
|
+
value=kwargs.get("value", 0.0),
|
265
|
+
tags=kwargs.get("tags", {}),
|
266
|
+
)
|
267
|
+
|
268
|
+
# Record metric
|
269
|
+
await self.monitor.record_metric(metric)
|
270
|
+
|
271
|
+
return {
|
272
|
+
"status": "success",
|
273
|
+
"metric_recorded": True,
|
274
|
+
"metric": metric.to_dict(),
|
275
|
+
}
|
276
|
+
|
277
|
+
async def _query_metrics(self, kwargs: Dict[str, Any]) -> Dict[str, Any]:
|
278
|
+
"""Query metrics."""
|
279
|
+
# Parse parameters
|
280
|
+
edge_node = kwargs.get("edge_node")
|
281
|
+
metric_type_str = kwargs.get("metric_type")
|
282
|
+
time_range_minutes = kwargs.get("time_range_minutes", 60)
|
283
|
+
tags = kwargs.get("tags")
|
284
|
+
|
285
|
+
# Parse metric type
|
286
|
+
metric_type = None
|
287
|
+
if metric_type_str:
|
288
|
+
try:
|
289
|
+
metric_type = MetricType(metric_type_str)
|
290
|
+
except ValueError:
|
291
|
+
pass
|
292
|
+
|
293
|
+
# Calculate time range
|
294
|
+
end_time = datetime.now()
|
295
|
+
start_time = end_time - timedelta(minutes=time_range_minutes)
|
296
|
+
|
297
|
+
# Query metrics
|
298
|
+
metrics = await self.monitor.get_metrics(
|
299
|
+
edge_node=edge_node,
|
300
|
+
metric_type=metric_type,
|
301
|
+
start_time=start_time,
|
302
|
+
end_time=end_time,
|
303
|
+
tags=tags,
|
304
|
+
)
|
305
|
+
|
306
|
+
# Format results
|
307
|
+
return {
|
308
|
+
"status": "success",
|
309
|
+
"metrics": [m.to_dict() for m in metrics],
|
310
|
+
"count": len(metrics),
|
311
|
+
"time_range": {
|
312
|
+
"start": start_time.isoformat(),
|
313
|
+
"end": end_time.isoformat(),
|
314
|
+
},
|
315
|
+
}
|
316
|
+
|
317
|
+
async def _get_health(self, kwargs: Dict[str, Any]) -> Dict[str, Any]:
|
318
|
+
"""Get health status."""
|
319
|
+
edge_node = kwargs.get("edge_node", "unknown")
|
320
|
+
|
321
|
+
health = await self.monitor.get_edge_health(edge_node)
|
322
|
+
|
323
|
+
return {"status": "success", "health": health.to_dict()}
|
324
|
+
|
325
|
+
async def _get_alerts(self, kwargs: Dict[str, Any]) -> Dict[str, Any]:
|
326
|
+
"""Get alerts."""
|
327
|
+
# Parse parameters
|
328
|
+
edge_node = kwargs.get("edge_node")
|
329
|
+
severity_str = kwargs.get("severity")
|
330
|
+
active_only = kwargs.get("active_only", False)
|
331
|
+
time_range_minutes = kwargs.get("time_range_minutes", 60)
|
332
|
+
|
333
|
+
# Parse severity
|
334
|
+
severity = None
|
335
|
+
if severity_str:
|
336
|
+
try:
|
337
|
+
severity = AlertSeverity(severity_str)
|
338
|
+
except ValueError:
|
339
|
+
pass
|
340
|
+
|
341
|
+
# Calculate time range
|
342
|
+
start_time = datetime.now() - timedelta(minutes=time_range_minutes)
|
343
|
+
|
344
|
+
# Get alerts
|
345
|
+
alerts = await self.monitor.get_alerts(
|
346
|
+
edge_node=edge_node,
|
347
|
+
severity=severity,
|
348
|
+
start_time=start_time,
|
349
|
+
active_only=active_only,
|
350
|
+
)
|
351
|
+
|
352
|
+
# Format results
|
353
|
+
return {
|
354
|
+
"status": "success",
|
355
|
+
"alerts": [a.to_dict() for a in alerts],
|
356
|
+
"count": len(alerts),
|
357
|
+
"active_count": len(
|
358
|
+
[a for a in alerts if active_only or True]
|
359
|
+
), # TODO: proper active check
|
360
|
+
}
|
361
|
+
|
362
|
+
async def _get_analytics(self, kwargs: Dict[str, Any]) -> Dict[str, Any]:
|
363
|
+
"""Get analytics for an edge node."""
|
364
|
+
edge_node = kwargs.get("edge_node", "unknown")
|
365
|
+
|
366
|
+
analytics = self.monitor.get_analytics(edge_node)
|
367
|
+
|
368
|
+
return {"status": "success", "analytics": analytics}
|
369
|
+
|
370
|
+
async def _set_threshold(self, kwargs: Dict[str, Any]) -> Dict[str, Any]:
|
371
|
+
"""Set alert threshold."""
|
372
|
+
metric_type_str = kwargs.get("metric_type", "latency")
|
373
|
+
severity = kwargs.get("severity", "warning")
|
374
|
+
value = kwargs.get("threshold_value", 1.0)
|
375
|
+
|
376
|
+
# Parse metric type
|
377
|
+
try:
|
378
|
+
metric_type = MetricType(metric_type_str)
|
379
|
+
except ValueError:
|
380
|
+
return {
|
381
|
+
"status": "error",
|
382
|
+
"error": f"Invalid metric type: {metric_type_str}",
|
383
|
+
}
|
384
|
+
|
385
|
+
# Set threshold
|
386
|
+
self.monitor.set_threshold(metric_type, severity, value)
|
387
|
+
|
388
|
+
return {
|
389
|
+
"status": "success",
|
390
|
+
"threshold_set": True,
|
391
|
+
"metric_type": metric_type.value,
|
392
|
+
"severity": severity,
|
393
|
+
"value": value,
|
394
|
+
}
|
395
|
+
|
396
|
+
async def _get_summary(self) -> Dict[str, Any]:
|
397
|
+
"""Get monitoring summary."""
|
398
|
+
summary = self.monitor.get_summary()
|
399
|
+
|
400
|
+
return {"status": "success", "summary": summary}
|
401
|
+
|
402
|
+
async def _start_monitor(self) -> Dict[str, Any]:
|
403
|
+
"""Start monitoring service."""
|
404
|
+
if not self._monitor_started:
|
405
|
+
await self.monitor.start()
|
406
|
+
self._monitor_started = True
|
407
|
+
|
408
|
+
return {"status": "success", "monitor_active": True}
|
409
|
+
|
410
|
+
async def _stop_monitor(self) -> Dict[str, Any]:
|
411
|
+
"""Stop monitoring service."""
|
412
|
+
if self._monitor_started:
|
413
|
+
await self.monitor.stop()
|
414
|
+
self._monitor_started = False
|
415
|
+
|
416
|
+
return {"status": "success", "monitor_active": False}
|
417
|
+
|
418
|
+
async def cleanup(self):
|
419
|
+
"""Clean up resources."""
|
420
|
+
if self._monitor_started:
|
421
|
+
await self.monitor.stop()
|