kailash 0.8.4__py3-none-any.whl → 0.8.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kailash/__init__.py +1 -7
- kailash/cli/__init__.py +11 -1
- kailash/cli/validation_audit.py +570 -0
- kailash/core/actors/supervisor.py +1 -1
- kailash/core/resilience/circuit_breaker.py +71 -1
- kailash/core/resilience/health_monitor.py +172 -0
- kailash/edge/compliance.py +33 -0
- kailash/edge/consistency.py +609 -0
- kailash/edge/coordination/__init__.py +30 -0
- kailash/edge/coordination/global_ordering.py +355 -0
- kailash/edge/coordination/leader_election.py +217 -0
- kailash/edge/coordination/partition_detector.py +296 -0
- kailash/edge/coordination/raft.py +485 -0
- kailash/edge/discovery.py +63 -1
- kailash/edge/migration/__init__.py +19 -0
- kailash/edge/migration/edge_migrator.py +832 -0
- kailash/edge/monitoring/__init__.py +21 -0
- kailash/edge/monitoring/edge_monitor.py +736 -0
- kailash/edge/prediction/__init__.py +10 -0
- kailash/edge/prediction/predictive_warmer.py +591 -0
- kailash/edge/resource/__init__.py +102 -0
- kailash/edge/resource/cloud_integration.py +796 -0
- kailash/edge/resource/cost_optimizer.py +949 -0
- kailash/edge/resource/docker_integration.py +919 -0
- kailash/edge/resource/kubernetes_integration.py +893 -0
- kailash/edge/resource/platform_integration.py +913 -0
- kailash/edge/resource/predictive_scaler.py +959 -0
- kailash/edge/resource/resource_analyzer.py +824 -0
- kailash/edge/resource/resource_pools.py +610 -0
- kailash/integrations/dataflow_edge.py +261 -0
- kailash/mcp_server/registry_integration.py +1 -1
- kailash/monitoring/__init__.py +18 -0
- kailash/monitoring/alerts.py +646 -0
- kailash/monitoring/metrics.py +677 -0
- kailash/nodes/__init__.py +2 -0
- kailash/nodes/ai/semantic_memory.py +2 -2
- kailash/nodes/base.py +545 -0
- kailash/nodes/edge/__init__.py +36 -0
- kailash/nodes/edge/base.py +240 -0
- kailash/nodes/edge/cloud_node.py +710 -0
- kailash/nodes/edge/coordination.py +239 -0
- kailash/nodes/edge/docker_node.py +825 -0
- kailash/nodes/edge/edge_data.py +582 -0
- kailash/nodes/edge/edge_migration_node.py +392 -0
- kailash/nodes/edge/edge_monitoring_node.py +421 -0
- kailash/nodes/edge/edge_state.py +673 -0
- kailash/nodes/edge/edge_warming_node.py +393 -0
- kailash/nodes/edge/kubernetes_node.py +652 -0
- kailash/nodes/edge/platform_node.py +766 -0
- kailash/nodes/edge/resource_analyzer_node.py +378 -0
- kailash/nodes/edge/resource_optimizer_node.py +501 -0
- kailash/nodes/edge/resource_scaler_node.py +397 -0
- kailash/nodes/ports.py +676 -0
- kailash/runtime/local.py +344 -1
- kailash/runtime/validation/__init__.py +20 -0
- kailash/runtime/validation/connection_context.py +119 -0
- kailash/runtime/validation/enhanced_error_formatter.py +202 -0
- kailash/runtime/validation/error_categorizer.py +164 -0
- kailash/runtime/validation/metrics.py +380 -0
- kailash/runtime/validation/performance.py +615 -0
- kailash/runtime/validation/suggestion_engine.py +212 -0
- kailash/testing/fixtures.py +2 -2
- kailash/workflow/builder.py +230 -4
- kailash/workflow/contracts.py +418 -0
- kailash/workflow/edge_infrastructure.py +369 -0
- kailash/workflow/migration.py +3 -3
- kailash/workflow/type_inference.py +669 -0
- {kailash-0.8.4.dist-info → kailash-0.8.5.dist-info}/METADATA +43 -27
- {kailash-0.8.4.dist-info → kailash-0.8.5.dist-info}/RECORD +73 -27
- kailash/nexus/__init__.py +0 -21
- kailash/nexus/cli/__init__.py +0 -5
- kailash/nexus/cli/__main__.py +0 -6
- kailash/nexus/cli/main.py +0 -176
- kailash/nexus/factory.py +0 -413
- kailash/nexus/gateway.py +0 -545
- {kailash-0.8.4.dist-info → kailash-0.8.5.dist-info}/WHEEL +0 -0
- {kailash-0.8.4.dist-info → kailash-0.8.5.dist-info}/entry_points.txt +0 -0
- {kailash-0.8.4.dist-info → kailash-0.8.5.dist-info}/licenses/LICENSE +0 -0
- {kailash-0.8.4.dist-info → kailash-0.8.5.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,919 @@
|
|
1
|
+
"""Docker integration for edge resource management."""
|
2
|
+
|
3
|
+
import asyncio
|
4
|
+
import base64
|
5
|
+
import json
|
6
|
+
from dataclasses import asdict, dataclass
|
7
|
+
from datetime import datetime, timedelta
|
8
|
+
from enum import Enum
|
9
|
+
from typing import Any, Dict, List, Optional, Union
|
10
|
+
|
11
|
+
try:
|
12
|
+
from docker.types import EndpointSpec, LogConfig, RestartPolicy, UpdateConfig
|
13
|
+
|
14
|
+
import docker
|
15
|
+
|
16
|
+
DOCKER_AVAILABLE = True
|
17
|
+
except ImportError:
|
18
|
+
DOCKER_AVAILABLE = False
|
19
|
+
|
20
|
+
|
21
|
+
class ContainerState(Enum):
|
22
|
+
"""Container states."""
|
23
|
+
|
24
|
+
CREATED = "created"
|
25
|
+
RUNNING = "running"
|
26
|
+
PAUSED = "paused"
|
27
|
+
RESTARTING = "restarting"
|
28
|
+
REMOVING = "removing"
|
29
|
+
EXITED = "exited"
|
30
|
+
DEAD = "dead"
|
31
|
+
|
32
|
+
|
33
|
+
class RestartPolicyType(Enum):
|
34
|
+
"""Container restart policies."""
|
35
|
+
|
36
|
+
NONE = "no"
|
37
|
+
ALWAYS = "always"
|
38
|
+
UNLESS_STOPPED = "unless-stopped"
|
39
|
+
ON_FAILURE = "on-failure"
|
40
|
+
|
41
|
+
|
42
|
+
class NetworkMode(Enum):
|
43
|
+
"""Docker network modes."""
|
44
|
+
|
45
|
+
BRIDGE = "bridge"
|
46
|
+
HOST = "host"
|
47
|
+
NONE = "none"
|
48
|
+
CONTAINER = "container"
|
49
|
+
CUSTOM = "custom"
|
50
|
+
|
51
|
+
|
52
|
+
@dataclass
|
53
|
+
class ContainerSpec:
|
54
|
+
"""Docker container specification."""
|
55
|
+
|
56
|
+
name: str
|
57
|
+
image: str
|
58
|
+
command: Optional[List[str]] = None
|
59
|
+
environment: Optional[Dict[str, str]] = None
|
60
|
+
ports: Optional[Dict[str, int]] = None # container_port -> host_port
|
61
|
+
volumes: Optional[Dict[str, str]] = None # host_path -> container_path
|
62
|
+
restart_policy: RestartPolicyType = RestartPolicyType.UNLESS_STOPPED
|
63
|
+
memory_limit: Optional[str] = None # e.g., "512m", "1g"
|
64
|
+
cpu_limit: Optional[float] = None # CPU cores
|
65
|
+
network_mode: NetworkMode = NetworkMode.BRIDGE
|
66
|
+
labels: Optional[Dict[str, str]] = None
|
67
|
+
edge_node: Optional[str] = None
|
68
|
+
healthcheck: Optional[Dict[str, Any]] = None
|
69
|
+
|
70
|
+
def __post_init__(self):
|
71
|
+
if self.environment is None:
|
72
|
+
self.environment = {}
|
73
|
+
if self.ports is None:
|
74
|
+
self.ports = {}
|
75
|
+
if self.volumes is None:
|
76
|
+
self.volumes = {}
|
77
|
+
if self.labels is None:
|
78
|
+
self.labels = {}
|
79
|
+
|
80
|
+
def to_dict(self) -> Dict[str, Any]:
|
81
|
+
"""Convert to dictionary."""
|
82
|
+
data = asdict(self)
|
83
|
+
data["restart_policy"] = self.restart_policy.value
|
84
|
+
data["network_mode"] = self.network_mode.value
|
85
|
+
return data
|
86
|
+
|
87
|
+
def to_docker_config(self) -> Dict[str, Any]:
|
88
|
+
"""Convert to Docker API configuration."""
|
89
|
+
config = {
|
90
|
+
"image": self.image,
|
91
|
+
"name": self.name,
|
92
|
+
"environment": list(f"{k}={v}" for k, v in self.environment.items()),
|
93
|
+
"labels": self.labels.copy(),
|
94
|
+
}
|
95
|
+
|
96
|
+
# Add edge node label if specified
|
97
|
+
if self.edge_node:
|
98
|
+
config["labels"]["edge-node"] = self.edge_node
|
99
|
+
|
100
|
+
# Command
|
101
|
+
if self.command:
|
102
|
+
config["command"] = self.command
|
103
|
+
|
104
|
+
# Port bindings
|
105
|
+
if self.ports:
|
106
|
+
config["ports"] = self.ports
|
107
|
+
config["host_config"] = config.get("host_config", {})
|
108
|
+
config["host_config"]["port_bindings"] = {
|
109
|
+
f"{container_port}/tcp": host_port
|
110
|
+
for container_port, host_port in self.ports.items()
|
111
|
+
}
|
112
|
+
|
113
|
+
# Volume bindings
|
114
|
+
if self.volumes:
|
115
|
+
config["host_config"] = config.get("host_config", {})
|
116
|
+
config["host_config"]["binds"] = [
|
117
|
+
f"{host_path}:{container_path}"
|
118
|
+
for host_path, container_path in self.volumes.items()
|
119
|
+
]
|
120
|
+
|
121
|
+
# Restart policy
|
122
|
+
if self.restart_policy != RestartPolicyType.NONE:
|
123
|
+
config["host_config"] = config.get("host_config", {})
|
124
|
+
config["host_config"]["restart_policy"] = {
|
125
|
+
"Name": self.restart_policy.value
|
126
|
+
}
|
127
|
+
|
128
|
+
# Resource limits
|
129
|
+
if self.memory_limit or self.cpu_limit:
|
130
|
+
config["host_config"] = config.get("host_config", {})
|
131
|
+
if self.memory_limit:
|
132
|
+
config["host_config"]["mem_limit"] = self.memory_limit
|
133
|
+
if self.cpu_limit:
|
134
|
+
config["host_config"]["nano_cpus"] = int(self.cpu_limit * 1e9)
|
135
|
+
|
136
|
+
# Network mode
|
137
|
+
if self.network_mode != NetworkMode.BRIDGE:
|
138
|
+
config["host_config"] = config.get("host_config", {})
|
139
|
+
config["host_config"]["network_mode"] = self.network_mode.value
|
140
|
+
|
141
|
+
# Health check
|
142
|
+
if self.healthcheck:
|
143
|
+
config["healthcheck"] = self.healthcheck
|
144
|
+
|
145
|
+
return config
|
146
|
+
|
147
|
+
|
148
|
+
@dataclass
|
149
|
+
class ServiceSpec:
|
150
|
+
"""Docker Swarm service specification."""
|
151
|
+
|
152
|
+
name: str
|
153
|
+
image: str
|
154
|
+
replicas: int = 1
|
155
|
+
command: Optional[List[str]] = None
|
156
|
+
environment: Optional[Dict[str, str]] = None
|
157
|
+
ports: Optional[List[Dict[str, Any]]] = None
|
158
|
+
volumes: Optional[List[Dict[str, str]]] = None
|
159
|
+
constraints: Optional[List[str]] = None
|
160
|
+
placement_preferences: Optional[List[Dict[str, Any]]] = None
|
161
|
+
restart_policy: Optional[Dict[str, Any]] = None
|
162
|
+
update_config: Optional[Dict[str, Any]] = None
|
163
|
+
rollback_config: Optional[Dict[str, Any]] = None
|
164
|
+
labels: Optional[Dict[str, str]] = None
|
165
|
+
edge_node: Optional[str] = None
|
166
|
+
|
167
|
+
def __post_init__(self):
|
168
|
+
if self.environment is None:
|
169
|
+
self.environment = {}
|
170
|
+
if self.ports is None:
|
171
|
+
self.ports = []
|
172
|
+
if self.volumes is None:
|
173
|
+
self.volumes = []
|
174
|
+
if self.constraints is None:
|
175
|
+
self.constraints = []
|
176
|
+
if self.placement_preferences is None:
|
177
|
+
self.placement_preferences = []
|
178
|
+
if self.labels is None:
|
179
|
+
self.labels = {}
|
180
|
+
|
181
|
+
def to_dict(self) -> Dict[str, Any]:
|
182
|
+
"""Convert to dictionary."""
|
183
|
+
return asdict(self)
|
184
|
+
|
185
|
+
def to_docker_service_spec(self) -> Dict[str, Any]:
|
186
|
+
"""Convert to Docker service specification."""
|
187
|
+
task_template = {
|
188
|
+
"ContainerSpec": {
|
189
|
+
"Image": self.image,
|
190
|
+
"Env": [f"{k}={v}" for k, v in self.environment.items()],
|
191
|
+
"Labels": self.labels.copy(),
|
192
|
+
},
|
193
|
+
"Placement": {
|
194
|
+
"Constraints": self.constraints.copy(),
|
195
|
+
"Preferences": self.placement_preferences.copy(),
|
196
|
+
},
|
197
|
+
}
|
198
|
+
|
199
|
+
# Add edge node constraint if specified
|
200
|
+
if self.edge_node:
|
201
|
+
task_template["Placement"]["Constraints"].append(
|
202
|
+
f"node.labels.edge-node=={self.edge_node}"
|
203
|
+
)
|
204
|
+
|
205
|
+
# Command
|
206
|
+
if self.command:
|
207
|
+
task_template["ContainerSpec"]["Command"] = self.command
|
208
|
+
|
209
|
+
# Restart policy
|
210
|
+
if self.restart_policy:
|
211
|
+
task_template["RestartPolicy"] = self.restart_policy
|
212
|
+
|
213
|
+
spec = {
|
214
|
+
"Name": self.name,
|
215
|
+
"TaskTemplate": task_template,
|
216
|
+
"Mode": {"Replicated": {"Replicas": self.replicas}},
|
217
|
+
"Labels": self.labels.copy(),
|
218
|
+
}
|
219
|
+
|
220
|
+
# Update configuration
|
221
|
+
if self.update_config:
|
222
|
+
spec["UpdateConfig"] = self.update_config
|
223
|
+
|
224
|
+
# Rollback configuration
|
225
|
+
if self.rollback_config:
|
226
|
+
spec["RollbackConfig"] = self.rollback_config
|
227
|
+
|
228
|
+
# Endpoint spec for ports
|
229
|
+
if self.ports:
|
230
|
+
spec["EndpointSpec"] = {"Ports": self.ports}
|
231
|
+
|
232
|
+
return spec
|
233
|
+
|
234
|
+
|
235
|
+
@dataclass
|
236
|
+
class ContainerMetrics:
|
237
|
+
"""Container resource metrics."""
|
238
|
+
|
239
|
+
container_id: str
|
240
|
+
container_name: str
|
241
|
+
timestamp: datetime
|
242
|
+
cpu_usage_percent: float
|
243
|
+
memory_usage_bytes: int
|
244
|
+
memory_limit_bytes: int
|
245
|
+
network_rx_bytes: int
|
246
|
+
network_tx_bytes: int
|
247
|
+
block_read_bytes: int
|
248
|
+
block_write_bytes: int
|
249
|
+
|
250
|
+
@property
|
251
|
+
def memory_usage_percent(self) -> float:
|
252
|
+
"""Calculate memory usage percentage."""
|
253
|
+
if self.memory_limit_bytes > 0:
|
254
|
+
return (self.memory_usage_bytes / self.memory_limit_bytes) * 100
|
255
|
+
return 0.0
|
256
|
+
|
257
|
+
def to_dict(self) -> Dict[str, Any]:
|
258
|
+
"""Convert to dictionary."""
|
259
|
+
data = asdict(self)
|
260
|
+
data["timestamp"] = self.timestamp.isoformat()
|
261
|
+
data["memory_usage_percent"] = self.memory_usage_percent
|
262
|
+
return data
|
263
|
+
|
264
|
+
|
265
|
+
class DockerIntegration:
|
266
|
+
"""Docker integration for edge resource management."""
|
267
|
+
|
268
|
+
def __init__(
|
269
|
+
self,
|
270
|
+
docker_host: Optional[str] = None,
|
271
|
+
api_version: str = "auto",
|
272
|
+
timeout: int = 60,
|
273
|
+
):
|
274
|
+
"""Initialize Docker integration.
|
275
|
+
|
276
|
+
Args:
|
277
|
+
docker_host: Docker daemon socket (default: system default)
|
278
|
+
api_version: Docker API version
|
279
|
+
timeout: API timeout in seconds
|
280
|
+
"""
|
281
|
+
if not DOCKER_AVAILABLE:
|
282
|
+
raise ImportError(
|
283
|
+
"Docker client not available. Install with: pip install docker"
|
284
|
+
)
|
285
|
+
|
286
|
+
self.docker_host = docker_host
|
287
|
+
self.api_version = api_version
|
288
|
+
self.timeout = timeout
|
289
|
+
|
290
|
+
# Docker clients
|
291
|
+
self.docker_client: Optional[docker.DockerClient] = None
|
292
|
+
self.swarm_enabled = False
|
293
|
+
|
294
|
+
# Container tracking
|
295
|
+
self.containers: Dict[str, ContainerSpec] = {}
|
296
|
+
self.services: Dict[str, ServiceSpec] = {}
|
297
|
+
self.container_metrics: Dict[str, ContainerMetrics] = {}
|
298
|
+
|
299
|
+
# Background tasks
|
300
|
+
self._monitoring_task: Optional[asyncio.Task] = None
|
301
|
+
self._metrics_task: Optional[asyncio.Task] = None
|
302
|
+
|
303
|
+
# Configuration
|
304
|
+
self.monitoring_interval = 30 # seconds
|
305
|
+
self.metrics_interval = 10 # seconds
|
306
|
+
self.auto_pull_images = True
|
307
|
+
|
308
|
+
async def initialize(self) -> None:
|
309
|
+
"""Initialize Docker client."""
|
310
|
+
try:
|
311
|
+
if self.docker_host:
|
312
|
+
self.docker_client = docker.DockerClient(
|
313
|
+
base_url=self.docker_host,
|
314
|
+
version=self.api_version,
|
315
|
+
timeout=self.timeout,
|
316
|
+
)
|
317
|
+
else:
|
318
|
+
self.docker_client = docker.from_env(
|
319
|
+
version=self.api_version, timeout=self.timeout
|
320
|
+
)
|
321
|
+
|
322
|
+
# Test connection
|
323
|
+
await asyncio.to_thread(self.docker_client.ping)
|
324
|
+
|
325
|
+
# Check if Swarm is enabled
|
326
|
+
try:
|
327
|
+
swarm_info = await asyncio.to_thread(self.docker_client.swarm.attrs)
|
328
|
+
self.swarm_enabled = True
|
329
|
+
except:
|
330
|
+
self.swarm_enabled = False
|
331
|
+
|
332
|
+
except Exception as e:
|
333
|
+
raise RuntimeError(f"Failed to initialize Docker client: {e}")
|
334
|
+
|
335
|
+
async def create_container(self, container_spec: ContainerSpec) -> Dict[str, Any]:
|
336
|
+
"""Create Docker container.
|
337
|
+
|
338
|
+
Args:
|
339
|
+
container_spec: Container specification
|
340
|
+
|
341
|
+
Returns:
|
342
|
+
Creation result
|
343
|
+
"""
|
344
|
+
if not self.docker_client:
|
345
|
+
await self.initialize()
|
346
|
+
|
347
|
+
try:
|
348
|
+
# Pull image if auto-pull is enabled
|
349
|
+
if self.auto_pull_images:
|
350
|
+
try:
|
351
|
+
await asyncio.to_thread(
|
352
|
+
self.docker_client.images.pull, container_spec.image
|
353
|
+
)
|
354
|
+
except Exception as e:
|
355
|
+
# Continue if image already exists locally
|
356
|
+
pass
|
357
|
+
|
358
|
+
# Create container
|
359
|
+
docker_config = container_spec.to_docker_config()
|
360
|
+
container = await asyncio.to_thread(
|
361
|
+
self.docker_client.containers.create, **docker_config
|
362
|
+
)
|
363
|
+
|
364
|
+
# Store container spec
|
365
|
+
self.containers[container.id] = container_spec
|
366
|
+
|
367
|
+
return {
|
368
|
+
"status": "created",
|
369
|
+
"container_id": container.id,
|
370
|
+
"container_name": container_spec.name,
|
371
|
+
"image": container_spec.image,
|
372
|
+
"created_at": datetime.now().isoformat(),
|
373
|
+
}
|
374
|
+
|
375
|
+
except Exception as e:
|
376
|
+
return {"status": "error", "error": f"Failed to create container: {e}"}
|
377
|
+
|
378
|
+
async def start_container(self, container_id: str) -> Dict[str, Any]:
|
379
|
+
"""Start Docker container.
|
380
|
+
|
381
|
+
Args:
|
382
|
+
container_id: Container ID or name
|
383
|
+
|
384
|
+
Returns:
|
385
|
+
Start result
|
386
|
+
"""
|
387
|
+
if not self.docker_client:
|
388
|
+
await self.initialize()
|
389
|
+
|
390
|
+
try:
|
391
|
+
container = await asyncio.to_thread(
|
392
|
+
self.docker_client.containers.get, container_id
|
393
|
+
)
|
394
|
+
await asyncio.to_thread(container.start)
|
395
|
+
|
396
|
+
return {
|
397
|
+
"status": "started",
|
398
|
+
"container_id": container.id,
|
399
|
+
"container_name": container.name,
|
400
|
+
"started_at": datetime.now().isoformat(),
|
401
|
+
}
|
402
|
+
|
403
|
+
except Exception as e:
|
404
|
+
return {"status": "error", "error": f"Failed to start container: {e}"}
|
405
|
+
|
406
|
+
async def stop_container(
|
407
|
+
self, container_id: str, timeout: int = 10
|
408
|
+
) -> Dict[str, Any]:
|
409
|
+
"""Stop Docker container.
|
410
|
+
|
411
|
+
Args:
|
412
|
+
container_id: Container ID or name
|
413
|
+
timeout: Stop timeout in seconds
|
414
|
+
|
415
|
+
Returns:
|
416
|
+
Stop result
|
417
|
+
"""
|
418
|
+
if not self.docker_client:
|
419
|
+
await self.initialize()
|
420
|
+
|
421
|
+
try:
|
422
|
+
container = await asyncio.to_thread(
|
423
|
+
self.docker_client.containers.get, container_id
|
424
|
+
)
|
425
|
+
await asyncio.to_thread(container.stop, timeout=timeout)
|
426
|
+
|
427
|
+
return {
|
428
|
+
"status": "stopped",
|
429
|
+
"container_id": container.id,
|
430
|
+
"container_name": container.name,
|
431
|
+
"stopped_at": datetime.now().isoformat(),
|
432
|
+
}
|
433
|
+
|
434
|
+
except Exception as e:
|
435
|
+
return {"status": "error", "error": f"Failed to stop container: {e}"}
|
436
|
+
|
437
|
+
async def remove_container(
|
438
|
+
self, container_id: str, force: bool = False
|
439
|
+
) -> Dict[str, Any]:
|
440
|
+
"""Remove Docker container.
|
441
|
+
|
442
|
+
Args:
|
443
|
+
container_id: Container ID or name
|
444
|
+
force: Force removal
|
445
|
+
|
446
|
+
Returns:
|
447
|
+
Removal result
|
448
|
+
"""
|
449
|
+
if not self.docker_client:
|
450
|
+
await self.initialize()
|
451
|
+
|
452
|
+
try:
|
453
|
+
container = await asyncio.to_thread(
|
454
|
+
self.docker_client.containers.get, container_id
|
455
|
+
)
|
456
|
+
await asyncio.to_thread(container.remove, force=force)
|
457
|
+
|
458
|
+
# Remove from tracking
|
459
|
+
self.containers.pop(container.id, None)
|
460
|
+
self.container_metrics.pop(container.id, None)
|
461
|
+
|
462
|
+
return {
|
463
|
+
"status": "removed",
|
464
|
+
"container_id": container.id,
|
465
|
+
"container_name": container.name,
|
466
|
+
"removed_at": datetime.now().isoformat(),
|
467
|
+
}
|
468
|
+
|
469
|
+
except Exception as e:
|
470
|
+
return {"status": "error", "error": f"Failed to remove container: {e}"}
|
471
|
+
|
472
|
+
async def get_container_status(self, container_id: str) -> Dict[str, Any]:
|
473
|
+
"""Get container status.
|
474
|
+
|
475
|
+
Args:
|
476
|
+
container_id: Container ID or name
|
477
|
+
|
478
|
+
Returns:
|
479
|
+
Container status
|
480
|
+
"""
|
481
|
+
if not self.docker_client:
|
482
|
+
await self.initialize()
|
483
|
+
|
484
|
+
try:
|
485
|
+
container = await asyncio.to_thread(
|
486
|
+
self.docker_client.containers.get, container_id
|
487
|
+
)
|
488
|
+
await asyncio.to_thread(container.reload)
|
489
|
+
|
490
|
+
return {
|
491
|
+
"container_id": container.id,
|
492
|
+
"container_name": container.name,
|
493
|
+
"status": container.status,
|
494
|
+
"state": container.attrs["State"],
|
495
|
+
"image": (
|
496
|
+
container.image.tags[0]
|
497
|
+
if container.image.tags
|
498
|
+
else container.image.id
|
499
|
+
),
|
500
|
+
"created_at": container.attrs["Created"],
|
501
|
+
"started_at": container.attrs["State"].get("StartedAt"),
|
502
|
+
"finished_at": container.attrs["State"].get("FinishedAt"),
|
503
|
+
"ports": container.ports,
|
504
|
+
"labels": container.labels,
|
505
|
+
"mounts": [
|
506
|
+
{
|
507
|
+
"source": mount["Source"],
|
508
|
+
"destination": mount["Destination"],
|
509
|
+
"mode": mount["Mode"],
|
510
|
+
"type": mount["Type"],
|
511
|
+
}
|
512
|
+
for mount in container.attrs.get("Mounts", [])
|
513
|
+
],
|
514
|
+
}
|
515
|
+
|
516
|
+
except Exception as e:
|
517
|
+
return {"status": "error", "error": f"Failed to get container status: {e}"}
|
518
|
+
|
519
|
+
async def list_containers(
|
520
|
+
self, all_containers: bool = False, filters: Optional[Dict[str, Any]] = None
|
521
|
+
) -> List[Dict[str, Any]]:
|
522
|
+
"""List Docker containers.
|
523
|
+
|
524
|
+
Args:
|
525
|
+
all_containers: Include stopped containers
|
526
|
+
filters: Container filters
|
527
|
+
|
528
|
+
Returns:
|
529
|
+
List of containers
|
530
|
+
"""
|
531
|
+
if not self.docker_client:
|
532
|
+
await self.initialize()
|
533
|
+
|
534
|
+
try:
|
535
|
+
containers = await asyncio.to_thread(
|
536
|
+
self.docker_client.containers.list,
|
537
|
+
all=all_containers,
|
538
|
+
filters=filters or {},
|
539
|
+
)
|
540
|
+
|
541
|
+
container_list = []
|
542
|
+
for container in containers:
|
543
|
+
container_info = {
|
544
|
+
"container_id": container.id,
|
545
|
+
"container_name": container.name,
|
546
|
+
"status": container.status,
|
547
|
+
"image": (
|
548
|
+
container.image.tags[0]
|
549
|
+
if container.image.tags
|
550
|
+
else container.image.id
|
551
|
+
),
|
552
|
+
"created_at": container.attrs["Created"],
|
553
|
+
"labels": container.labels,
|
554
|
+
"ports": container.ports,
|
555
|
+
}
|
556
|
+
container_list.append(container_info)
|
557
|
+
|
558
|
+
return container_list
|
559
|
+
|
560
|
+
except Exception as e:
|
561
|
+
raise RuntimeError(f"Failed to list containers: {e}")
|
562
|
+
|
563
|
+
async def create_service(self, service_spec: ServiceSpec) -> Dict[str, Any]:
|
564
|
+
"""Create Docker Swarm service.
|
565
|
+
|
566
|
+
Args:
|
567
|
+
service_spec: Service specification
|
568
|
+
|
569
|
+
Returns:
|
570
|
+
Creation result
|
571
|
+
"""
|
572
|
+
if not self.docker_client:
|
573
|
+
await self.initialize()
|
574
|
+
|
575
|
+
if not self.swarm_enabled:
|
576
|
+
return {"status": "error", "error": "Docker Swarm is not enabled"}
|
577
|
+
|
578
|
+
try:
|
579
|
+
# Pull image if auto-pull is enabled
|
580
|
+
if self.auto_pull_images:
|
581
|
+
try:
|
582
|
+
await asyncio.to_thread(
|
583
|
+
self.docker_client.images.pull, service_spec.image
|
584
|
+
)
|
585
|
+
except Exception:
|
586
|
+
pass
|
587
|
+
|
588
|
+
# Create service
|
589
|
+
docker_spec = service_spec.to_docker_service_spec()
|
590
|
+
service = await asyncio.to_thread(
|
591
|
+
self.docker_client.services.create, **docker_spec
|
592
|
+
)
|
593
|
+
|
594
|
+
# Store service spec
|
595
|
+
self.services[service.id] = service_spec
|
596
|
+
|
597
|
+
return {
|
598
|
+
"status": "created",
|
599
|
+
"service_id": service.id,
|
600
|
+
"service_name": service_spec.name,
|
601
|
+
"image": service_spec.image,
|
602
|
+
"replicas": service_spec.replicas,
|
603
|
+
"created_at": datetime.now().isoformat(),
|
604
|
+
}
|
605
|
+
|
606
|
+
except Exception as e:
|
607
|
+
return {"status": "error", "error": f"Failed to create service: {e}"}
|
608
|
+
|
609
|
+
async def update_service(
|
610
|
+
self, service_id: str, service_spec: ServiceSpec
|
611
|
+
) -> Dict[str, Any]:
|
612
|
+
"""Update Docker Swarm service.
|
613
|
+
|
614
|
+
Args:
|
615
|
+
service_id: Service ID or name
|
616
|
+
service_spec: Updated service specification
|
617
|
+
|
618
|
+
Returns:
|
619
|
+
Update result
|
620
|
+
"""
|
621
|
+
if not self.docker_client:
|
622
|
+
await self.initialize()
|
623
|
+
|
624
|
+
if not self.swarm_enabled:
|
625
|
+
return {"status": "error", "error": "Docker Swarm is not enabled"}
|
626
|
+
|
627
|
+
try:
|
628
|
+
service = await asyncio.to_thread(
|
629
|
+
self.docker_client.services.get, service_id
|
630
|
+
)
|
631
|
+
docker_spec = service_spec.to_docker_service_spec()
|
632
|
+
|
633
|
+
await asyncio.to_thread(service.update, **docker_spec)
|
634
|
+
|
635
|
+
# Update stored spec
|
636
|
+
self.services[service.id] = service_spec
|
637
|
+
|
638
|
+
return {
|
639
|
+
"status": "updated",
|
640
|
+
"service_id": service.id,
|
641
|
+
"service_name": service_spec.name,
|
642
|
+
"updated_at": datetime.now().isoformat(),
|
643
|
+
}
|
644
|
+
|
645
|
+
except Exception as e:
|
646
|
+
return {"status": "error", "error": f"Failed to update service: {e}"}
|
647
|
+
|
648
|
+
async def scale_service(self, service_id: str, replicas: int) -> Dict[str, Any]:
|
649
|
+
"""Scale Docker Swarm service.
|
650
|
+
|
651
|
+
Args:
|
652
|
+
service_id: Service ID or name
|
653
|
+
replicas: Target replica count
|
654
|
+
|
655
|
+
Returns:
|
656
|
+
Scaling result
|
657
|
+
"""
|
658
|
+
if not self.docker_client:
|
659
|
+
await self.initialize()
|
660
|
+
|
661
|
+
if not self.swarm_enabled:
|
662
|
+
return {"status": "error", "error": "Docker Swarm is not enabled"}
|
663
|
+
|
664
|
+
try:
|
665
|
+
service = await asyncio.to_thread(
|
666
|
+
self.docker_client.services.get, service_id
|
667
|
+
)
|
668
|
+
await asyncio.to_thread(service.scale, replicas)
|
669
|
+
|
670
|
+
return {
|
671
|
+
"status": "scaled",
|
672
|
+
"service_id": service.id,
|
673
|
+
"service_name": service.name,
|
674
|
+
"target_replicas": replicas,
|
675
|
+
"scaled_at": datetime.now().isoformat(),
|
676
|
+
}
|
677
|
+
|
678
|
+
except Exception as e:
|
679
|
+
return {"status": "error", "error": f"Failed to scale service: {e}"}
|
680
|
+
|
681
|
+
async def get_service_status(self, service_id: str) -> Dict[str, Any]:
|
682
|
+
"""Get service status.
|
683
|
+
|
684
|
+
Args:
|
685
|
+
service_id: Service ID or name
|
686
|
+
|
687
|
+
Returns:
|
688
|
+
Service status
|
689
|
+
"""
|
690
|
+
if not self.docker_client:
|
691
|
+
await self.initialize()
|
692
|
+
|
693
|
+
if not self.swarm_enabled:
|
694
|
+
return {"status": "error", "error": "Docker Swarm is not enabled"}
|
695
|
+
|
696
|
+
try:
|
697
|
+
service = await asyncio.to_thread(
|
698
|
+
self.docker_client.services.get, service_id
|
699
|
+
)
|
700
|
+
tasks = await asyncio.to_thread(service.tasks)
|
701
|
+
|
702
|
+
running_tasks = sum(
|
703
|
+
1 for task in tasks if task.get("Status", {}).get("State") == "running"
|
704
|
+
)
|
705
|
+
total_tasks = len(tasks)
|
706
|
+
|
707
|
+
return {
|
708
|
+
"service_id": service.id,
|
709
|
+
"service_name": service.name,
|
710
|
+
"mode": service.attrs["Spec"]["Mode"],
|
711
|
+
"replicas": service.attrs["Spec"]["Mode"]
|
712
|
+
.get("Replicated", {})
|
713
|
+
.get("Replicas", 0),
|
714
|
+
"running_tasks": running_tasks,
|
715
|
+
"total_tasks": total_tasks,
|
716
|
+
"image": service.attrs["Spec"]["TaskTemplate"]["ContainerSpec"][
|
717
|
+
"Image"
|
718
|
+
],
|
719
|
+
"created_at": service.attrs["CreatedAt"],
|
720
|
+
"updated_at": service.attrs["UpdatedAt"],
|
721
|
+
"labels": service.attrs["Spec"].get("Labels", {}),
|
722
|
+
"tasks": [
|
723
|
+
{
|
724
|
+
"id": task["ID"],
|
725
|
+
"state": task.get("Status", {}).get("State"),
|
726
|
+
"desired_state": task.get("DesiredState"),
|
727
|
+
"node_id": task.get("NodeID"),
|
728
|
+
"timestamp": task.get("Status", {}).get("Timestamp"),
|
729
|
+
}
|
730
|
+
for task in tasks
|
731
|
+
],
|
732
|
+
}
|
733
|
+
|
734
|
+
except Exception as e:
|
735
|
+
return {"status": "error", "error": f"Failed to get service status: {e}"}
|
736
|
+
|
737
|
+
async def collect_container_metrics(
|
738
|
+
self, container_id: str
|
739
|
+
) -> Optional[ContainerMetrics]:
|
740
|
+
"""Collect container resource metrics.
|
741
|
+
|
742
|
+
Args:
|
743
|
+
container_id: Container ID
|
744
|
+
|
745
|
+
Returns:
|
746
|
+
Container metrics or None if failed
|
747
|
+
"""
|
748
|
+
if not self.docker_client:
|
749
|
+
await self.initialize()
|
750
|
+
|
751
|
+
try:
|
752
|
+
container = await asyncio.to_thread(
|
753
|
+
self.docker_client.containers.get, container_id
|
754
|
+
)
|
755
|
+
stats = await asyncio.to_thread(container.stats, stream=False)
|
756
|
+
|
757
|
+
# Calculate CPU usage percentage
|
758
|
+
cpu_delta = (
|
759
|
+
stats["cpu_stats"]["cpu_usage"]["total_usage"]
|
760
|
+
- stats["precpu_stats"]["cpu_usage"]["total_usage"]
|
761
|
+
)
|
762
|
+
system_delta = (
|
763
|
+
stats["cpu_stats"]["system_cpu_usage"]
|
764
|
+
- stats["precpu_stats"]["system_cpu_usage"]
|
765
|
+
)
|
766
|
+
|
767
|
+
cpu_usage_percent = 0.0
|
768
|
+
if system_delta > 0:
|
769
|
+
cpu_usage_percent = (
|
770
|
+
(cpu_delta / system_delta)
|
771
|
+
* len(stats["cpu_stats"]["cpu_usage"].get("percpu_usage", [1]))
|
772
|
+
* 100
|
773
|
+
)
|
774
|
+
|
775
|
+
# Memory usage
|
776
|
+
memory_usage = stats["memory_stats"]["usage"]
|
777
|
+
memory_limit = stats["memory_stats"]["limit"]
|
778
|
+
|
779
|
+
# Network I/O
|
780
|
+
networks = stats.get("networks", {})
|
781
|
+
network_rx = sum(net["rx_bytes"] for net in networks.values())
|
782
|
+
network_tx = sum(net["tx_bytes"] for net in networks.values())
|
783
|
+
|
784
|
+
# Block I/O
|
785
|
+
blkio_stats = stats.get("blkio_stats", {}).get(
|
786
|
+
"io_service_bytes_recursive", []
|
787
|
+
)
|
788
|
+
block_read = sum(
|
789
|
+
entry["value"] for entry in blkio_stats if entry["op"] == "Read"
|
790
|
+
)
|
791
|
+
block_write = sum(
|
792
|
+
entry["value"] for entry in blkio_stats if entry["op"] == "Write"
|
793
|
+
)
|
794
|
+
|
795
|
+
metrics = ContainerMetrics(
|
796
|
+
container_id=container.id,
|
797
|
+
container_name=container.name,
|
798
|
+
timestamp=datetime.now(),
|
799
|
+
cpu_usage_percent=cpu_usage_percent,
|
800
|
+
memory_usage_bytes=memory_usage,
|
801
|
+
memory_limit_bytes=memory_limit,
|
802
|
+
network_rx_bytes=network_rx,
|
803
|
+
network_tx_bytes=network_tx,
|
804
|
+
block_read_bytes=block_read,
|
805
|
+
block_write_bytes=block_write,
|
806
|
+
)
|
807
|
+
|
808
|
+
# Store metrics
|
809
|
+
self.container_metrics[container_id] = metrics
|
810
|
+
|
811
|
+
return metrics
|
812
|
+
|
813
|
+
except Exception:
|
814
|
+
return None
|
815
|
+
|
816
|
+
async def get_system_info(self) -> Dict[str, Any]:
|
817
|
+
"""Get Docker system information.
|
818
|
+
|
819
|
+
Returns:
|
820
|
+
System information
|
821
|
+
"""
|
822
|
+
if not self.docker_client:
|
823
|
+
await self.initialize()
|
824
|
+
|
825
|
+
try:
|
826
|
+
info = await asyncio.to_thread(self.docker_client.info)
|
827
|
+
version = await asyncio.to_thread(self.docker_client.version)
|
828
|
+
|
829
|
+
return {
|
830
|
+
"system_info": {
|
831
|
+
"containers": info.get("Containers", 0),
|
832
|
+
"containers_running": info.get("ContainersRunning", 0),
|
833
|
+
"containers_paused": info.get("ContainersPaused", 0),
|
834
|
+
"containers_stopped": info.get("ContainersStopped", 0),
|
835
|
+
"images": info.get("Images", 0),
|
836
|
+
"driver": info.get("Driver"),
|
837
|
+
"memory_limit": info.get("MemoryLimit"),
|
838
|
+
"swap_limit": info.get("SwapLimit"),
|
839
|
+
"cpus": info.get("NCPU", 0),
|
840
|
+
"memory": info.get("MemTotal", 0),
|
841
|
+
"docker_root_dir": info.get("DockerRootDir"),
|
842
|
+
"swarm": info.get("Swarm", {}),
|
843
|
+
},
|
844
|
+
"version_info": version,
|
845
|
+
"swarm_enabled": self.swarm_enabled,
|
846
|
+
}
|
847
|
+
|
848
|
+
except Exception as e:
|
849
|
+
return {"status": "error", "error": f"Failed to get system info: {e}"}
|
850
|
+
|
851
|
+
async def start_monitoring(self) -> None:
|
852
|
+
"""Start container monitoring."""
|
853
|
+
if self._monitoring_task and not self._monitoring_task.done():
|
854
|
+
return
|
855
|
+
|
856
|
+
self._monitoring_task = asyncio.create_task(self._monitor_containers())
|
857
|
+
self._metrics_task = asyncio.create_task(self._collect_metrics())
|
858
|
+
|
859
|
+
async def stop_monitoring(self) -> None:
|
860
|
+
"""Stop container monitoring."""
|
861
|
+
if self._monitoring_task and not self._monitoring_task.done():
|
862
|
+
self._monitoring_task.cancel()
|
863
|
+
try:
|
864
|
+
await self._monitoring_task
|
865
|
+
except asyncio.CancelledError:
|
866
|
+
pass
|
867
|
+
|
868
|
+
if self._metrics_task and not self._metrics_task.done():
|
869
|
+
self._metrics_task.cancel()
|
870
|
+
try:
|
871
|
+
await self._metrics_task
|
872
|
+
except asyncio.CancelledError:
|
873
|
+
pass
|
874
|
+
|
875
|
+
async def _monitor_containers(self) -> None:
|
876
|
+
"""Monitor containers continuously."""
|
877
|
+
while True:
|
878
|
+
try:
|
879
|
+
# Get list of running containers
|
880
|
+
containers = await self.list_containers(all_containers=False)
|
881
|
+
|
882
|
+
# Update container status for tracked containers
|
883
|
+
for container_id in list(self.containers.keys()):
|
884
|
+
try:
|
885
|
+
status = await self.get_container_status(container_id)
|
886
|
+
# Update internal tracking based on status
|
887
|
+
except Exception:
|
888
|
+
# Container might have been removed
|
889
|
+
self.containers.pop(container_id, None)
|
890
|
+
|
891
|
+
await asyncio.sleep(self.monitoring_interval)
|
892
|
+
|
893
|
+
except asyncio.CancelledError:
|
894
|
+
break
|
895
|
+
except Exception as e:
|
896
|
+
# Log error and continue monitoring
|
897
|
+
print(f"Container monitoring error: {e}")
|
898
|
+
await asyncio.sleep(self.monitoring_interval)
|
899
|
+
|
900
|
+
async def _collect_metrics(self) -> None:
|
901
|
+
"""Collect container metrics continuously."""
|
902
|
+
while True:
|
903
|
+
try:
|
904
|
+
# Collect metrics for all running containers
|
905
|
+
containers = await self.list_containers(all_containers=False)
|
906
|
+
|
907
|
+
for container_info in containers:
|
908
|
+
container_id = container_info["container_id"]
|
909
|
+
if container_info["status"] == "running":
|
910
|
+
await self.collect_container_metrics(container_id)
|
911
|
+
|
912
|
+
await asyncio.sleep(self.metrics_interval)
|
913
|
+
|
914
|
+
except asyncio.CancelledError:
|
915
|
+
break
|
916
|
+
except Exception as e:
|
917
|
+
# Log error and continue collecting
|
918
|
+
print(f"Metrics collection error: {e}")
|
919
|
+
await asyncio.sleep(self.metrics_interval)
|