kailash 0.8.4__py3-none-any.whl → 0.8.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. kailash/__init__.py +1 -7
  2. kailash/cli/__init__.py +11 -1
  3. kailash/cli/validation_audit.py +570 -0
  4. kailash/core/actors/supervisor.py +1 -1
  5. kailash/core/resilience/circuit_breaker.py +71 -1
  6. kailash/core/resilience/health_monitor.py +172 -0
  7. kailash/edge/compliance.py +33 -0
  8. kailash/edge/consistency.py +609 -0
  9. kailash/edge/coordination/__init__.py +30 -0
  10. kailash/edge/coordination/global_ordering.py +355 -0
  11. kailash/edge/coordination/leader_election.py +217 -0
  12. kailash/edge/coordination/partition_detector.py +296 -0
  13. kailash/edge/coordination/raft.py +485 -0
  14. kailash/edge/discovery.py +63 -1
  15. kailash/edge/migration/__init__.py +19 -0
  16. kailash/edge/migration/edge_migrator.py +832 -0
  17. kailash/edge/monitoring/__init__.py +21 -0
  18. kailash/edge/monitoring/edge_monitor.py +736 -0
  19. kailash/edge/prediction/__init__.py +10 -0
  20. kailash/edge/prediction/predictive_warmer.py +591 -0
  21. kailash/edge/resource/__init__.py +102 -0
  22. kailash/edge/resource/cloud_integration.py +796 -0
  23. kailash/edge/resource/cost_optimizer.py +949 -0
  24. kailash/edge/resource/docker_integration.py +919 -0
  25. kailash/edge/resource/kubernetes_integration.py +893 -0
  26. kailash/edge/resource/platform_integration.py +913 -0
  27. kailash/edge/resource/predictive_scaler.py +959 -0
  28. kailash/edge/resource/resource_analyzer.py +824 -0
  29. kailash/edge/resource/resource_pools.py +610 -0
  30. kailash/integrations/dataflow_edge.py +261 -0
  31. kailash/mcp_server/registry_integration.py +1 -1
  32. kailash/monitoring/__init__.py +18 -0
  33. kailash/monitoring/alerts.py +646 -0
  34. kailash/monitoring/metrics.py +677 -0
  35. kailash/nodes/__init__.py +2 -0
  36. kailash/nodes/ai/semantic_memory.py +2 -2
  37. kailash/nodes/base.py +545 -0
  38. kailash/nodes/edge/__init__.py +36 -0
  39. kailash/nodes/edge/base.py +240 -0
  40. kailash/nodes/edge/cloud_node.py +710 -0
  41. kailash/nodes/edge/coordination.py +239 -0
  42. kailash/nodes/edge/docker_node.py +825 -0
  43. kailash/nodes/edge/edge_data.py +582 -0
  44. kailash/nodes/edge/edge_migration_node.py +392 -0
  45. kailash/nodes/edge/edge_monitoring_node.py +421 -0
  46. kailash/nodes/edge/edge_state.py +673 -0
  47. kailash/nodes/edge/edge_warming_node.py +393 -0
  48. kailash/nodes/edge/kubernetes_node.py +652 -0
  49. kailash/nodes/edge/platform_node.py +766 -0
  50. kailash/nodes/edge/resource_analyzer_node.py +378 -0
  51. kailash/nodes/edge/resource_optimizer_node.py +501 -0
  52. kailash/nodes/edge/resource_scaler_node.py +397 -0
  53. kailash/nodes/ports.py +676 -0
  54. kailash/runtime/local.py +344 -1
  55. kailash/runtime/validation/__init__.py +20 -0
  56. kailash/runtime/validation/connection_context.py +119 -0
  57. kailash/runtime/validation/enhanced_error_formatter.py +202 -0
  58. kailash/runtime/validation/error_categorizer.py +164 -0
  59. kailash/runtime/validation/metrics.py +380 -0
  60. kailash/runtime/validation/performance.py +615 -0
  61. kailash/runtime/validation/suggestion_engine.py +212 -0
  62. kailash/testing/fixtures.py +2 -2
  63. kailash/workflow/builder.py +230 -4
  64. kailash/workflow/contracts.py +418 -0
  65. kailash/workflow/edge_infrastructure.py +369 -0
  66. kailash/workflow/migration.py +3 -3
  67. kailash/workflow/type_inference.py +669 -0
  68. {kailash-0.8.4.dist-info → kailash-0.8.5.dist-info}/METADATA +43 -27
  69. {kailash-0.8.4.dist-info → kailash-0.8.5.dist-info}/RECORD +73 -27
  70. kailash/nexus/__init__.py +0 -21
  71. kailash/nexus/cli/__init__.py +0 -5
  72. kailash/nexus/cli/__main__.py +0 -6
  73. kailash/nexus/cli/main.py +0 -176
  74. kailash/nexus/factory.py +0 -413
  75. kailash/nexus/gateway.py +0 -545
  76. {kailash-0.8.4.dist-info → kailash-0.8.5.dist-info}/WHEEL +0 -0
  77. {kailash-0.8.4.dist-info → kailash-0.8.5.dist-info}/entry_points.txt +0 -0
  78. {kailash-0.8.4.dist-info → kailash-0.8.5.dist-info}/licenses/LICENSE +0 -0
  79. {kailash-0.8.4.dist-info → kailash-0.8.5.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,893 @@
1
+ """Kubernetes integration for edge resource management."""
2
+
3
+ import asyncio
4
+ import json
5
+ from dataclasses import asdict, dataclass
6
+ from datetime import datetime, timedelta
7
+ from enum import Enum
8
+ from typing import Any, Dict, List, Optional, Union
9
+
10
+ import yaml
11
+
12
+ try:
13
+ from kubernetes import client, config, watch
14
+ from kubernetes.client.rest import ApiException
15
+
16
+ KUBERNETES_AVAILABLE = True
17
+ except ImportError:
18
+ KUBERNETES_AVAILABLE = False
19
+
20
+
21
+ class KubernetesResourceType(Enum):
22
+ """Kubernetes resource types."""
23
+
24
+ DEPLOYMENT = "deployment"
25
+ SERVICE = "service"
26
+ CONFIGMAP = "configmap"
27
+ SECRET = "secret"
28
+ POD = "pod"
29
+ PERSISTENT_VOLUME = "persistent_volume"
30
+ PERSISTENT_VOLUME_CLAIM = "persistent_volume_claim"
31
+ INGRESS = "ingress"
32
+ HORIZONTAL_POD_AUTOSCALER = "horizontal_pod_autoscaler"
33
+ CUSTOM_RESOURCE = "custom_resource"
34
+
35
+
36
+ class ScalingPolicy(Enum):
37
+ """Pod scaling policies."""
38
+
39
+ MANUAL = "manual"
40
+ HORIZONTAL_POD_AUTOSCALER = "hpa"
41
+ VERTICAL_POD_AUTOSCALER = "vpa"
42
+ PREDICTIVE = "predictive"
43
+ REACTIVE = "reactive"
44
+
45
+
46
+ @dataclass
47
+ class KubernetesResource:
48
+ """Kubernetes resource definition."""
49
+
50
+ name: str
51
+ namespace: str
52
+ resource_type: KubernetesResourceType
53
+ spec: Dict[str, Any]
54
+ labels: Optional[Dict[str, str]] = None
55
+ annotations: Optional[Dict[str, str]] = None
56
+ edge_node: Optional[str] = None
57
+ created_at: Optional[datetime] = None
58
+ updated_at: Optional[datetime] = None
59
+
60
+ def __post_init__(self):
61
+ if self.created_at is None:
62
+ self.created_at = datetime.now()
63
+ if self.updated_at is None:
64
+ self.updated_at = datetime.now()
65
+ if self.labels is None:
66
+ self.labels = {}
67
+ if self.annotations is None:
68
+ self.annotations = {}
69
+
70
+ def to_dict(self) -> Dict[str, Any]:
71
+ """Convert to dictionary."""
72
+ data = asdict(self)
73
+ data["resource_type"] = self.resource_type.value
74
+ data["created_at"] = self.created_at.isoformat()
75
+ data["updated_at"] = self.updated_at.isoformat()
76
+ return data
77
+
78
+ def to_k8s_manifest(self) -> Dict[str, Any]:
79
+ """Convert to Kubernetes manifest."""
80
+ api_version, kind = self._get_api_version_kind()
81
+
82
+ manifest = {
83
+ "apiVersion": api_version,
84
+ "kind": kind,
85
+ "metadata": {
86
+ "name": self.name,
87
+ "namespace": self.namespace,
88
+ "labels": self.labels.copy(),
89
+ "annotations": self.annotations.copy(),
90
+ },
91
+ "spec": self.spec.copy(),
92
+ }
93
+
94
+ # Add edge node selector if specified
95
+ if self.edge_node:
96
+ if (
97
+ "spec" in manifest
98
+ and "template" in manifest["spec"]
99
+ and "spec" in manifest["spec"]["template"]
100
+ ):
101
+ # For Deployments and similar resources
102
+ node_selector = manifest["spec"]["template"]["spec"].get(
103
+ "nodeSelector", {}
104
+ )
105
+ node_selector["edge-node"] = self.edge_node
106
+ manifest["spec"]["template"]["spec"]["nodeSelector"] = node_selector
107
+ elif "spec" in manifest:
108
+ # For other resources
109
+ node_selector = manifest["spec"].get("nodeSelector", {})
110
+ node_selector["edge-node"] = self.edge_node
111
+ manifest["spec"]["nodeSelector"] = node_selector
112
+
113
+ return manifest
114
+
115
+ def _get_api_version_kind(self) -> tuple[str, str]:
116
+ """Get API version and kind for resource type."""
117
+ mapping = {
118
+ KubernetesResourceType.DEPLOYMENT: ("apps/v1", "Deployment"),
119
+ KubernetesResourceType.SERVICE: ("v1", "Service"),
120
+ KubernetesResourceType.CONFIGMAP: ("v1", "ConfigMap"),
121
+ KubernetesResourceType.SECRET: ("v1", "Secret"),
122
+ KubernetesResourceType.POD: ("v1", "Pod"),
123
+ KubernetesResourceType.PERSISTENT_VOLUME: ("v1", "PersistentVolume"),
124
+ KubernetesResourceType.PERSISTENT_VOLUME_CLAIM: (
125
+ "v1",
126
+ "PersistentVolumeClaim",
127
+ ),
128
+ KubernetesResourceType.INGRESS: ("networking.k8s.io/v1", "Ingress"),
129
+ KubernetesResourceType.HORIZONTAL_POD_AUTOSCALER: (
130
+ "autoscaling/v2",
131
+ "HorizontalPodAutoscaler",
132
+ ),
133
+ }
134
+ return mapping.get(self.resource_type, ("v1", "Unknown"))
135
+
136
+
137
+ @dataclass
138
+ class PodScalingSpec:
139
+ """Pod scaling specification."""
140
+
141
+ min_replicas: int
142
+ max_replicas: int
143
+ target_cpu_utilization: float
144
+ target_memory_utilization: Optional[float] = None
145
+ scale_up_policy: Optional[Dict[str, Any]] = None
146
+ scale_down_policy: Optional[Dict[str, Any]] = None
147
+ behavior: Optional[Dict[str, Any]] = None
148
+
149
+ def to_hpa_spec(self) -> Dict[str, Any]:
150
+ """Convert to HPA specification."""
151
+ spec = {
152
+ "minReplicas": self.min_replicas,
153
+ "maxReplicas": self.max_replicas,
154
+ "metrics": [
155
+ {
156
+ "type": "Resource",
157
+ "resource": {
158
+ "name": "cpu",
159
+ "target": {
160
+ "type": "Utilization",
161
+ "averageUtilization": int(
162
+ self.target_cpu_utilization * 100
163
+ ),
164
+ },
165
+ },
166
+ }
167
+ ],
168
+ }
169
+
170
+ if self.target_memory_utilization:
171
+ spec["metrics"].append(
172
+ {
173
+ "type": "Resource",
174
+ "resource": {
175
+ "name": "memory",
176
+ "target": {
177
+ "type": "Utilization",
178
+ "averageUtilization": int(
179
+ self.target_memory_utilization * 100
180
+ ),
181
+ },
182
+ },
183
+ }
184
+ )
185
+
186
+ if self.behavior:
187
+ spec["behavior"] = self.behavior
188
+
189
+ return spec
190
+
191
+
192
+ class KubernetesIntegration:
193
+ """Kubernetes integration for edge resource management."""
194
+
195
+ def __init__(
196
+ self,
197
+ kubeconfig_path: Optional[str] = None,
198
+ context_name: Optional[str] = None,
199
+ namespace: str = "default",
200
+ ):
201
+ """Initialize Kubernetes integration.
202
+
203
+ Args:
204
+ kubeconfig_path: Path to kubeconfig file
205
+ context_name: Kubernetes context to use
206
+ namespace: Default namespace
207
+ """
208
+ if not KUBERNETES_AVAILABLE:
209
+ raise ImportError(
210
+ "Kubernetes client not available. Install with: pip install kubernetes"
211
+ )
212
+
213
+ self.kubeconfig_path = kubeconfig_path
214
+ self.context_name = context_name
215
+ self.namespace = namespace
216
+
217
+ # Kubernetes clients
218
+ self.core_v1 = None
219
+ self.apps_v1 = None
220
+ self.autoscaling_v2 = None
221
+ self.custom_objects = None
222
+
223
+ # Resource cache
224
+ self.resources: Dict[str, KubernetesResource] = {}
225
+ self.resource_status: Dict[str, Dict[str, Any]] = {}
226
+
227
+ # Scaling management
228
+ self.scaling_policies: Dict[str, PodScalingSpec] = {}
229
+ self.autoscalers: Dict[str, str] = {} # deployment -> hpa name
230
+
231
+ # Background tasks
232
+ self._monitoring_task: Optional[asyncio.Task] = None
233
+ self._scaling_task: Optional[asyncio.Task] = None
234
+
235
+ # Configuration
236
+ self.monitoring_interval = 30 # seconds
237
+ self.scaling_check_interval = 60 # seconds
238
+ self.default_scaling_policy = ScalingPolicy.MANUAL
239
+
240
+ async def initialize(self) -> None:
241
+ """Initialize Kubernetes clients."""
242
+ try:
243
+ if self.kubeconfig_path:
244
+ config.load_kube_config(
245
+ config_file=self.kubeconfig_path, context=self.context_name
246
+ )
247
+ else:
248
+ # Try in-cluster config first, then kubeconfig
249
+ try:
250
+ config.load_incluster_config()
251
+ except config.ConfigException:
252
+ config.load_kube_config(context=self.context_name)
253
+
254
+ # Initialize clients
255
+ self.core_v1 = client.CoreV1Api()
256
+ self.apps_v1 = client.AppsV1Api()
257
+ self.autoscaling_v2 = client.AutoscalingV2Api()
258
+ self.custom_objects = client.CustomObjectsApi()
259
+
260
+ # Test connection
261
+ await asyncio.to_thread(self.core_v1.list_namespace)
262
+
263
+ except Exception as e:
264
+ raise RuntimeError(f"Failed to initialize Kubernetes client: {e}")
265
+
266
+ async def create_resource(self, resource: KubernetesResource) -> Dict[str, Any]:
267
+ """Create Kubernetes resource.
268
+
269
+ Args:
270
+ resource: Resource to create
271
+
272
+ Returns:
273
+ Creation result
274
+ """
275
+ if not self.core_v1:
276
+ await self.initialize()
277
+
278
+ try:
279
+ manifest = resource.to_k8s_manifest()
280
+
281
+ if resource.resource_type == KubernetesResourceType.DEPLOYMENT:
282
+ result = await asyncio.to_thread(
283
+ self.apps_v1.create_namespaced_deployment,
284
+ namespace=resource.namespace,
285
+ body=manifest,
286
+ )
287
+ elif resource.resource_type == KubernetesResourceType.SERVICE:
288
+ result = await asyncio.to_thread(
289
+ self.core_v1.create_namespaced_service,
290
+ namespace=resource.namespace,
291
+ body=manifest,
292
+ )
293
+ elif resource.resource_type == KubernetesResourceType.CONFIGMAP:
294
+ result = await asyncio.to_thread(
295
+ self.core_v1.create_namespaced_config_map,
296
+ namespace=resource.namespace,
297
+ body=manifest,
298
+ )
299
+ elif resource.resource_type == KubernetesResourceType.SECRET:
300
+ result = await asyncio.to_thread(
301
+ self.core_v1.create_namespaced_secret,
302
+ namespace=resource.namespace,
303
+ body=manifest,
304
+ )
305
+ elif resource.resource_type == KubernetesResourceType.POD:
306
+ result = await asyncio.to_thread(
307
+ self.core_v1.create_namespaced_pod,
308
+ namespace=resource.namespace,
309
+ body=manifest,
310
+ )
311
+ elif (
312
+ resource.resource_type
313
+ == KubernetesResourceType.HORIZONTAL_POD_AUTOSCALER
314
+ ):
315
+ result = await asyncio.to_thread(
316
+ self.autoscaling_v2.create_namespaced_horizontal_pod_autoscaler,
317
+ namespace=resource.namespace,
318
+ body=manifest,
319
+ )
320
+ else:
321
+ raise ValueError(f"Unsupported resource type: {resource.resource_type}")
322
+
323
+ # Store resource
324
+ resource_key = f"{resource.namespace}/{resource.name}"
325
+ self.resources[resource_key] = resource
326
+
327
+ return {
328
+ "status": "created",
329
+ "name": resource.name,
330
+ "namespace": resource.namespace,
331
+ "resource_type": resource.resource_type.value,
332
+ "uid": getattr(result.metadata, "uid", None),
333
+ "creation_timestamp": getattr(
334
+ result.metadata, "creation_timestamp", None
335
+ ),
336
+ }
337
+
338
+ except ApiException as e:
339
+ return {
340
+ "status": "error",
341
+ "error": f"Kubernetes API error: {e}",
342
+ "reason": getattr(e, "reason", "Unknown"),
343
+ "code": getattr(e, "status", 500),
344
+ }
345
+ except Exception as e:
346
+ return {"status": "error", "error": f"Failed to create resource: {e}"}
347
+
348
+ async def update_resource(self, resource: KubernetesResource) -> Dict[str, Any]:
349
+ """Update Kubernetes resource.
350
+
351
+ Args:
352
+ resource: Resource to update
353
+
354
+ Returns:
355
+ Update result
356
+ """
357
+ if not self.core_v1:
358
+ await self.initialize()
359
+
360
+ try:
361
+ manifest = resource.to_k8s_manifest()
362
+
363
+ if resource.resource_type == KubernetesResourceType.DEPLOYMENT:
364
+ result = await asyncio.to_thread(
365
+ self.apps_v1.patch_namespaced_deployment,
366
+ name=resource.name,
367
+ namespace=resource.namespace,
368
+ body=manifest,
369
+ )
370
+ elif resource.resource_type == KubernetesResourceType.SERVICE:
371
+ result = await asyncio.to_thread(
372
+ self.core_v1.patch_namespaced_service,
373
+ name=resource.name,
374
+ namespace=resource.namespace,
375
+ body=manifest,
376
+ )
377
+ elif resource.resource_type == KubernetesResourceType.CONFIGMAP:
378
+ result = await asyncio.to_thread(
379
+ self.core_v1.patch_namespaced_config_map,
380
+ name=resource.name,
381
+ namespace=resource.namespace,
382
+ body=manifest,
383
+ )
384
+ else:
385
+ raise ValueError(
386
+ f"Update not supported for resource type: {resource.resource_type}"
387
+ )
388
+
389
+ # Update stored resource
390
+ resource_key = f"{resource.namespace}/{resource.name}"
391
+ resource.updated_at = datetime.now()
392
+ self.resources[resource_key] = resource
393
+
394
+ return {
395
+ "status": "updated",
396
+ "name": resource.name,
397
+ "namespace": resource.namespace,
398
+ "resource_type": resource.resource_type.value,
399
+ "updated_at": resource.updated_at.isoformat(),
400
+ }
401
+
402
+ except ApiException as e:
403
+ return {
404
+ "status": "error",
405
+ "error": f"Kubernetes API error: {e}",
406
+ "reason": getattr(e, "reason", "Unknown"),
407
+ "code": getattr(e, "status", 500),
408
+ }
409
+ except Exception as e:
410
+ return {"status": "error", "error": f"Failed to update resource: {e}"}
411
+
412
+ async def delete_resource(
413
+ self, name: str, namespace: str, resource_type: KubernetesResourceType
414
+ ) -> Dict[str, Any]:
415
+ """Delete Kubernetes resource.
416
+
417
+ Args:
418
+ name: Resource name
419
+ namespace: Resource namespace
420
+ resource_type: Type of resource
421
+
422
+ Returns:
423
+ Deletion result
424
+ """
425
+ if not self.core_v1:
426
+ await self.initialize()
427
+
428
+ try:
429
+ if resource_type == KubernetesResourceType.DEPLOYMENT:
430
+ await asyncio.to_thread(
431
+ self.apps_v1.delete_namespaced_deployment,
432
+ name=name,
433
+ namespace=namespace,
434
+ )
435
+ elif resource_type == KubernetesResourceType.SERVICE:
436
+ await asyncio.to_thread(
437
+ self.core_v1.delete_namespaced_service,
438
+ name=name,
439
+ namespace=namespace,
440
+ )
441
+ elif resource_type == KubernetesResourceType.CONFIGMAP:
442
+ await asyncio.to_thread(
443
+ self.core_v1.delete_namespaced_config_map,
444
+ name=name,
445
+ namespace=namespace,
446
+ )
447
+ elif resource_type == KubernetesResourceType.SECRET:
448
+ await asyncio.to_thread(
449
+ self.core_v1.delete_namespaced_secret,
450
+ name=name,
451
+ namespace=namespace,
452
+ )
453
+ elif resource_type == KubernetesResourceType.POD:
454
+ await asyncio.to_thread(
455
+ self.core_v1.delete_namespaced_pod, name=name, namespace=namespace
456
+ )
457
+ else:
458
+ raise ValueError(
459
+ f"Delete not supported for resource type: {resource_type}"
460
+ )
461
+
462
+ # Remove from cache
463
+ resource_key = f"{namespace}/{name}"
464
+ self.resources.pop(resource_key, None)
465
+ self.resource_status.pop(resource_key, None)
466
+
467
+ return {
468
+ "status": "deleted",
469
+ "name": name,
470
+ "namespace": namespace,
471
+ "resource_type": resource_type.value,
472
+ }
473
+
474
+ except ApiException as e:
475
+ return {
476
+ "status": "error",
477
+ "error": f"Kubernetes API error: {e}",
478
+ "reason": getattr(e, "reason", "Unknown"),
479
+ "code": getattr(e, "status", 500),
480
+ }
481
+ except Exception as e:
482
+ return {"status": "error", "error": f"Failed to delete resource: {e}"}
483
+
484
+ async def get_resource_status(
485
+ self, name: str, namespace: str, resource_type: KubernetesResourceType
486
+ ) -> Dict[str, Any]:
487
+ """Get Kubernetes resource status.
488
+
489
+ Args:
490
+ name: Resource name
491
+ namespace: Resource namespace
492
+ resource_type: Type of resource
493
+
494
+ Returns:
495
+ Resource status
496
+ """
497
+ if not self.core_v1:
498
+ await self.initialize()
499
+
500
+ try:
501
+ if resource_type == KubernetesResourceType.DEPLOYMENT:
502
+ result = await asyncio.to_thread(
503
+ self.apps_v1.read_namespaced_deployment_status,
504
+ name=name,
505
+ namespace=namespace,
506
+ )
507
+ return {
508
+ "status": (
509
+ "ready"
510
+ if result.status.ready_replicas == result.status.replicas
511
+ else "not_ready"
512
+ ),
513
+ "replicas": result.status.replicas or 0,
514
+ "ready_replicas": result.status.ready_replicas or 0,
515
+ "available_replicas": result.status.available_replicas or 0,
516
+ "updated_replicas": result.status.updated_replicas or 0,
517
+ "conditions": [
518
+ {
519
+ "type": condition.type,
520
+ "status": condition.status,
521
+ "reason": condition.reason,
522
+ "message": condition.message,
523
+ }
524
+ for condition in (result.status.conditions or [])
525
+ ],
526
+ }
527
+ elif resource_type == KubernetesResourceType.POD:
528
+ result = await asyncio.to_thread(
529
+ self.core_v1.read_namespaced_pod_status,
530
+ name=name,
531
+ namespace=namespace,
532
+ )
533
+ return {
534
+ "status": result.status.phase,
535
+ "node_name": result.spec.node_name,
536
+ "pod_ip": result.status.pod_ip,
537
+ "start_time": (
538
+ result.status.start_time.isoformat()
539
+ if result.status.start_time
540
+ else None
541
+ ),
542
+ "conditions": [
543
+ {
544
+ "type": condition.type,
545
+ "status": condition.status,
546
+ "reason": condition.reason,
547
+ "message": condition.message,
548
+ }
549
+ for condition in (result.status.conditions or [])
550
+ ],
551
+ "container_statuses": [
552
+ {
553
+ "name": container.name,
554
+ "ready": container.ready,
555
+ "restart_count": container.restart_count,
556
+ "state": (
557
+ container.state.to_dict() if container.state else None
558
+ ),
559
+ }
560
+ for container in (result.status.container_statuses or [])
561
+ ],
562
+ }
563
+ else:
564
+ return {
565
+ "status": "unknown",
566
+ "message": f"Status not implemented for resource type: {resource_type.value}",
567
+ }
568
+
569
+ except ApiException as e:
570
+ return {
571
+ "status": "error",
572
+ "error": f"Kubernetes API error: {e}",
573
+ "reason": getattr(e, "reason", "Unknown"),
574
+ "code": getattr(e, "status", 500),
575
+ }
576
+ except Exception as e:
577
+ return {"status": "error", "error": f"Failed to get resource status: {e}"}
578
+
579
+ async def list_resources(
580
+ self,
581
+ namespace: Optional[str] = None,
582
+ resource_type: Optional[KubernetesResourceType] = None,
583
+ labels: Optional[Dict[str, str]] = None,
584
+ ) -> List[Dict[str, Any]]:
585
+ """List Kubernetes resources.
586
+
587
+ Args:
588
+ namespace: Filter by namespace (all if None)
589
+ resource_type: Filter by resource type (all if None)
590
+ labels: Label selector
591
+
592
+ Returns:
593
+ List of resources
594
+ """
595
+ if not self.core_v1:
596
+ await self.initialize()
597
+
598
+ resources = []
599
+ label_selector = ",".join([f"{k}={v}" for k, v in (labels or {}).items()])
600
+ target_namespace = namespace or self.namespace
601
+
602
+ try:
603
+ if not resource_type or resource_type == KubernetesResourceType.DEPLOYMENT:
604
+ deployments = await asyncio.to_thread(
605
+ self.apps_v1.list_namespaced_deployment,
606
+ namespace=target_namespace,
607
+ label_selector=label_selector,
608
+ )
609
+ for deployment in deployments.items:
610
+ resources.append(
611
+ {
612
+ "name": deployment.metadata.name,
613
+ "namespace": deployment.metadata.namespace,
614
+ "resource_type": "deployment",
615
+ "labels": deployment.metadata.labels or {},
616
+ "annotations": deployment.metadata.annotations or {},
617
+ "created_at": deployment.metadata.creation_timestamp.isoformat(),
618
+ "replicas": deployment.status.replicas or 0,
619
+ "ready_replicas": deployment.status.ready_replicas or 0,
620
+ }
621
+ )
622
+
623
+ if not resource_type or resource_type == KubernetesResourceType.SERVICE:
624
+ services = await asyncio.to_thread(
625
+ self.core_v1.list_namespaced_service,
626
+ namespace=target_namespace,
627
+ label_selector=label_selector,
628
+ )
629
+ for service in services.items:
630
+ resources.append(
631
+ {
632
+ "name": service.metadata.name,
633
+ "namespace": service.metadata.namespace,
634
+ "resource_type": "service",
635
+ "labels": service.metadata.labels or {},
636
+ "annotations": service.metadata.annotations or {},
637
+ "created_at": service.metadata.creation_timestamp.isoformat(),
638
+ "cluster_ip": service.spec.cluster_ip,
639
+ "external_ips": service.spec.external_i_ps or [],
640
+ "ports": [
641
+ {
642
+ "name": port.name,
643
+ "port": port.port,
644
+ "target_port": str(port.target_port),
645
+ "protocol": port.protocol,
646
+ }
647
+ for port in (service.spec.ports or [])
648
+ ],
649
+ }
650
+ )
651
+
652
+ if not resource_type or resource_type == KubernetesResourceType.POD:
653
+ pods = await asyncio.to_thread(
654
+ self.core_v1.list_namespaced_pod,
655
+ namespace=target_namespace,
656
+ label_selector=label_selector,
657
+ )
658
+ for pod in pods.items:
659
+ resources.append(
660
+ {
661
+ "name": pod.metadata.name,
662
+ "namespace": pod.metadata.namespace,
663
+ "resource_type": "pod",
664
+ "labels": pod.metadata.labels or {},
665
+ "annotations": pod.metadata.annotations or {},
666
+ "created_at": pod.metadata.creation_timestamp.isoformat(),
667
+ "phase": pod.status.phase,
668
+ "node_name": pod.spec.node_name,
669
+ "pod_ip": pod.status.pod_ip,
670
+ }
671
+ )
672
+
673
+ return resources
674
+
675
+ except ApiException as e:
676
+ raise RuntimeError(f"Kubernetes API error: {e}")
677
+ except Exception as e:
678
+ raise RuntimeError(f"Failed to list resources: {e}")
679
+
680
+ async def create_autoscaler(
681
+ self, deployment_name: str, namespace: str, scaling_spec: PodScalingSpec
682
+ ) -> Dict[str, Any]:
683
+ """Create Horizontal Pod Autoscaler for deployment.
684
+
685
+ Args:
686
+ deployment_name: Target deployment name
687
+ namespace: Namespace
688
+ scaling_spec: Scaling specification
689
+
690
+ Returns:
691
+ Creation result
692
+ """
693
+ if not self.autoscaling_v2:
694
+ await self.initialize()
695
+
696
+ hpa_name = f"{deployment_name}-hpa"
697
+
698
+ try:
699
+ # Create HPA manifest
700
+ hpa_manifest = {
701
+ "apiVersion": "autoscaling/v2",
702
+ "kind": "HorizontalPodAutoscaler",
703
+ "metadata": {
704
+ "name": hpa_name,
705
+ "namespace": namespace,
706
+ "labels": {"app": deployment_name, "component": "autoscaler"},
707
+ },
708
+ "spec": scaling_spec.to_hpa_spec(),
709
+ }
710
+
711
+ # Add scale target reference
712
+ hpa_manifest["spec"]["scaleTargetRef"] = {
713
+ "apiVersion": "apps/v1",
714
+ "kind": "Deployment",
715
+ "name": deployment_name,
716
+ }
717
+
718
+ # Create HPA
719
+ result = await asyncio.to_thread(
720
+ self.autoscaling_v2.create_namespaced_horizontal_pod_autoscaler,
721
+ namespace=namespace,
722
+ body=hpa_manifest,
723
+ )
724
+
725
+ # Store scaling policy and autoscaler reference
726
+ self.scaling_policies[f"{namespace}/{deployment_name}"] = scaling_spec
727
+ self.autoscalers[f"{namespace}/{deployment_name}"] = hpa_name
728
+
729
+ return {
730
+ "status": "created",
731
+ "hpa_name": hpa_name,
732
+ "deployment_name": deployment_name,
733
+ "namespace": namespace,
734
+ "min_replicas": scaling_spec.min_replicas,
735
+ "max_replicas": scaling_spec.max_replicas,
736
+ "target_cpu_utilization": scaling_spec.target_cpu_utilization,
737
+ }
738
+
739
+ except ApiException as e:
740
+ return {
741
+ "status": "error",
742
+ "error": f"Kubernetes API error: {e}",
743
+ "reason": getattr(e, "reason", "Unknown"),
744
+ "code": getattr(e, "status", 500),
745
+ }
746
+ except Exception as e:
747
+ return {"status": "error", "error": f"Failed to create autoscaler: {e}"}
748
+
749
+ async def scale_deployment(
750
+ self, deployment_name: str, namespace: str, replicas: int
751
+ ) -> Dict[str, Any]:
752
+ """Scale deployment to specified replica count.
753
+
754
+ Args:
755
+ deployment_name: Deployment name
756
+ namespace: Namespace
757
+ replicas: Target replica count
758
+
759
+ Returns:
760
+ Scaling result
761
+ """
762
+ if not self.apps_v1:
763
+ await self.initialize()
764
+
765
+ try:
766
+ # Update deployment replica count
767
+ await asyncio.to_thread(
768
+ self.apps_v1.patch_namespaced_deployment_scale,
769
+ name=deployment_name,
770
+ namespace=namespace,
771
+ body={"spec": {"replicas": replicas}},
772
+ )
773
+
774
+ return {
775
+ "status": "scaled",
776
+ "deployment_name": deployment_name,
777
+ "namespace": namespace,
778
+ "target_replicas": replicas,
779
+ "scaled_at": datetime.now().isoformat(),
780
+ }
781
+
782
+ except ApiException as e:
783
+ return {
784
+ "status": "error",
785
+ "error": f"Kubernetes API error: {e}",
786
+ "reason": getattr(e, "reason", "Unknown"),
787
+ "code": getattr(e, "status", 500),
788
+ }
789
+ except Exception as e:
790
+ return {"status": "error", "error": f"Failed to scale deployment: {e}"}
791
+
792
+ async def get_cluster_info(self) -> Dict[str, Any]:
793
+ """Get cluster information.
794
+
795
+ Returns:
796
+ Cluster information
797
+ """
798
+ if not self.core_v1:
799
+ await self.initialize()
800
+
801
+ try:
802
+ # Get cluster version
803
+ version = await asyncio.to_thread(
804
+ self.core_v1.api_client.call_api, "/version", "GET"
805
+ )
806
+ version_info = json.loads(version[0])
807
+
808
+ # Get nodes
809
+ nodes = await asyncio.to_thread(self.core_v1.list_node)
810
+
811
+ node_info = []
812
+ for node in nodes.items:
813
+ node_data = {
814
+ "name": node.metadata.name,
815
+ "labels": node.metadata.labels or {},
816
+ "ready": False,
817
+ "allocatable": {},
818
+ "capacity": {},
819
+ }
820
+
821
+ # Check node ready status
822
+ for condition in node.status.conditions or []:
823
+ if condition.type == "Ready" and condition.status == "True":
824
+ node_data["ready"] = True
825
+ break
826
+
827
+ # Get resource info
828
+ if node.status.allocatable:
829
+ node_data["allocatable"] = {
830
+ "cpu": node.status.allocatable.get("cpu", "0"),
831
+ "memory": node.status.allocatable.get("memory", "0"),
832
+ "storage": node.status.allocatable.get(
833
+ "ephemeral-storage", "0"
834
+ ),
835
+ }
836
+
837
+ if node.status.capacity:
838
+ node_data["capacity"] = {
839
+ "cpu": node.status.capacity.get("cpu", "0"),
840
+ "memory": node.status.capacity.get("memory", "0"),
841
+ "storage": node.status.capacity.get("ephemeral-storage", "0"),
842
+ }
843
+
844
+ node_info.append(node_data)
845
+
846
+ return {
847
+ "cluster_version": version_info,
848
+ "nodes": node_info,
849
+ "total_nodes": len(node_info),
850
+ "ready_nodes": sum(1 for node in node_info if node["ready"]),
851
+ }
852
+
853
+ except Exception as e:
854
+ return {"status": "error", "error": f"Failed to get cluster info: {e}"}
855
+
856
+ async def start_monitoring(self) -> None:
857
+ """Start resource monitoring."""
858
+ if self._monitoring_task and not self._monitoring_task.done():
859
+ return
860
+
861
+ self._monitoring_task = asyncio.create_task(self._monitor_resources())
862
+
863
+ async def stop_monitoring(self) -> None:
864
+ """Stop resource monitoring."""
865
+ if self._monitoring_task and not self._monitoring_task.done():
866
+ self._monitoring_task.cancel()
867
+ try:
868
+ await self._monitoring_task
869
+ except asyncio.CancelledError:
870
+ pass
871
+
872
+ async def _monitor_resources(self) -> None:
873
+ """Monitor resources continuously."""
874
+ while True:
875
+ try:
876
+ # Update status for all tracked resources
877
+ for resource_key, resource in self.resources.items():
878
+ status = await self.get_resource_status(
879
+ resource.name, resource.namespace, resource.resource_type
880
+ )
881
+ self.resource_status[resource_key] = {
882
+ "timestamp": datetime.now().isoformat(),
883
+ "status": status,
884
+ }
885
+
886
+ await asyncio.sleep(self.monitoring_interval)
887
+
888
+ except asyncio.CancelledError:
889
+ break
890
+ except Exception as e:
891
+ # Log error and continue monitoring
892
+ print(f"Monitoring error: {e}")
893
+ await asyncio.sleep(self.monitoring_interval)