clonebox 1.1.3__py3-none-any.whl → 1.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- clonebox/backends/libvirt_backend.py +217 -0
- clonebox/backends/qemu_disk.py +52 -0
- clonebox/backends/subprocess_runner.py +56 -0
- clonebox/cli.py +343 -22
- clonebox/cloner.py +327 -189
- clonebox/di.py +176 -0
- clonebox/health/__init__.py +17 -0
- clonebox/health/manager.py +328 -0
- clonebox/health/models.py +194 -0
- clonebox/health/probes.py +337 -0
- clonebox/interfaces/disk.py +40 -0
- clonebox/interfaces/hypervisor.py +89 -0
- clonebox/interfaces/network.py +33 -0
- clonebox/interfaces/process.py +46 -0
- clonebox/logging.py +125 -0
- clonebox/monitor.py +267 -0
- clonebox/p2p.py +4 -2
- clonebox/resource_monitor.py +162 -0
- clonebox/resources.py +222 -0
- clonebox/rollback.py +172 -0
- clonebox/secrets.py +331 -0
- clonebox/snapshots/__init__.py +12 -0
- clonebox/snapshots/manager.py +349 -0
- clonebox/snapshots/models.py +183 -0
- {clonebox-1.1.3.dist-info → clonebox-1.1.5.dist-info}/METADATA +51 -2
- clonebox-1.1.5.dist-info/RECORD +42 -0
- clonebox-1.1.3.dist-info/RECORD +0 -21
- {clonebox-1.1.3.dist-info → clonebox-1.1.5.dist-info}/WHEEL +0 -0
- {clonebox-1.1.3.dist-info → clonebox-1.1.5.dist-info}/entry_points.txt +0 -0
- {clonebox-1.1.3.dist-info → clonebox-1.1.5.dist-info}/licenses/LICENSE +0 -0
- {clonebox-1.1.3.dist-info → clonebox-1.1.5.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
"""Abstract interface for process execution."""
|
|
2
|
+
|
|
3
|
+
from abc import ABC, abstractmethod
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Dict, List, Optional
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass
|
|
10
|
+
class ProcessResult:
|
|
11
|
+
"""Result of process execution."""
|
|
12
|
+
|
|
13
|
+
returncode: int
|
|
14
|
+
stdout: str
|
|
15
|
+
stderr: str
|
|
16
|
+
|
|
17
|
+
@property
|
|
18
|
+
def success(self) -> bool:
|
|
19
|
+
return self.returncode == 0
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class ProcessRunner(ABC):
|
|
23
|
+
"""Abstract interface for process execution."""
|
|
24
|
+
|
|
25
|
+
@abstractmethod
|
|
26
|
+
def run(
|
|
27
|
+
self,
|
|
28
|
+
command: List[str],
|
|
29
|
+
capture_output: bool = True,
|
|
30
|
+
timeout: Optional[int] = None,
|
|
31
|
+
check: bool = True,
|
|
32
|
+
cwd: Optional[Path] = None,
|
|
33
|
+
env: Optional[Dict[str, str]] = None,
|
|
34
|
+
) -> ProcessResult:
|
|
35
|
+
"""Run a command."""
|
|
36
|
+
pass
|
|
37
|
+
|
|
38
|
+
@abstractmethod
|
|
39
|
+
def run_shell(
|
|
40
|
+
self,
|
|
41
|
+
command: str,
|
|
42
|
+
capture_output: bool = True,
|
|
43
|
+
timeout: Optional[int] = None,
|
|
44
|
+
) -> ProcessResult:
|
|
45
|
+
"""Run a shell command."""
|
|
46
|
+
pass
|
clonebox/logging.py
ADDED
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Structured logging for CloneBox using structlog.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
import sys
|
|
7
|
+
from datetime import datetime
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Optional
|
|
10
|
+
|
|
11
|
+
import structlog
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def configure_logging(
|
|
15
|
+
level: str = "INFO",
|
|
16
|
+
json_output: bool = False,
|
|
17
|
+
log_file: Optional[Path] = None,
|
|
18
|
+
console_output: bool = True,
|
|
19
|
+
) -> None:
|
|
20
|
+
"""
|
|
21
|
+
Configure structured logging for CloneBox.
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
level: Log level (DEBUG, INFO, WARNING, ERROR)
|
|
25
|
+
json_output: If True, output JSON format (good for log aggregation)
|
|
26
|
+
log_file: Optional file path for log output
|
|
27
|
+
console_output: If True, also output to console
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
# Shared processors for all outputs
|
|
31
|
+
shared_processors = [
|
|
32
|
+
structlog.contextvars.merge_contextvars,
|
|
33
|
+
structlog.processors.add_log_level,
|
|
34
|
+
structlog.processors.TimeStamper(fmt="iso"),
|
|
35
|
+
structlog.stdlib.PositionalArgumentsFormatter(),
|
|
36
|
+
structlog.processors.StackInfoRenderer(),
|
|
37
|
+
structlog.processors.UnicodeDecoder(),
|
|
38
|
+
]
|
|
39
|
+
|
|
40
|
+
if json_output:
|
|
41
|
+
# JSON output for production/aggregation
|
|
42
|
+
renderer = structlog.processors.JSONRenderer()
|
|
43
|
+
else:
|
|
44
|
+
# Human-readable output for development
|
|
45
|
+
renderer = structlog.dev.ConsoleRenderer(
|
|
46
|
+
colors=True,
|
|
47
|
+
exception_formatter=structlog.dev.plain_traceback,
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
structlog.configure(
|
|
51
|
+
processors=shared_processors
|
|
52
|
+
+ [
|
|
53
|
+
structlog.stdlib.ProcessorFormatter.wrap_for_formatter,
|
|
54
|
+
],
|
|
55
|
+
wrapper_class=structlog.stdlib.BoundLogger,
|
|
56
|
+
context_class=dict,
|
|
57
|
+
logger_factory=structlog.stdlib.LoggerFactory(),
|
|
58
|
+
cache_logger_on_first_use=True,
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
# Configure standard logging
|
|
62
|
+
handlers = []
|
|
63
|
+
|
|
64
|
+
if console_output:
|
|
65
|
+
console_handler = logging.StreamHandler(sys.stderr)
|
|
66
|
+
console_handler.setFormatter(
|
|
67
|
+
structlog.stdlib.ProcessorFormatter(
|
|
68
|
+
processor=renderer,
|
|
69
|
+
foreign_pre_chain=shared_processors,
|
|
70
|
+
)
|
|
71
|
+
)
|
|
72
|
+
handlers.append(console_handler)
|
|
73
|
+
|
|
74
|
+
if log_file:
|
|
75
|
+
file_handler = logging.FileHandler(log_file)
|
|
76
|
+
file_handler.setFormatter(
|
|
77
|
+
structlog.stdlib.ProcessorFormatter(
|
|
78
|
+
processor=structlog.processors.JSONRenderer(), # Always JSON for files
|
|
79
|
+
foreign_pre_chain=shared_processors,
|
|
80
|
+
)
|
|
81
|
+
)
|
|
82
|
+
handlers.append(file_handler)
|
|
83
|
+
|
|
84
|
+
logging.basicConfig(
|
|
85
|
+
format="%(message)s",
|
|
86
|
+
level=getattr(logging, level.upper()),
|
|
87
|
+
handlers=handlers,
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def get_logger(name: str = "clonebox") -> structlog.stdlib.BoundLogger:
|
|
92
|
+
"""Get a structured logger instance."""
|
|
93
|
+
return structlog.get_logger(name)
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
# Context managers for operation tracking
|
|
97
|
+
from contextlib import contextmanager
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
@contextmanager
|
|
101
|
+
def log_operation(logger: structlog.stdlib.BoundLogger, operation: str, **kwargs):
|
|
102
|
+
"""
|
|
103
|
+
Context manager for logging operation start/end.
|
|
104
|
+
|
|
105
|
+
Usage:
|
|
106
|
+
with log_operation(log, "create_vm", vm_name="my-vm"):
|
|
107
|
+
# do stuff
|
|
108
|
+
"""
|
|
109
|
+
log = logger.bind(operation=operation, **kwargs)
|
|
110
|
+
start_time = datetime.now()
|
|
111
|
+
log.info(f"{operation}.started")
|
|
112
|
+
|
|
113
|
+
try:
|
|
114
|
+
yield log
|
|
115
|
+
duration_ms = (datetime.now() - start_time).total_seconds() * 1000
|
|
116
|
+
log.info(f"{operation}.completed", duration_ms=round(duration_ms, 2))
|
|
117
|
+
except Exception as e:
|
|
118
|
+
duration_ms = (datetime.now() - start_time).total_seconds() * 1000
|
|
119
|
+
log.error(
|
|
120
|
+
f"{operation}.failed",
|
|
121
|
+
error=str(e),
|
|
122
|
+
error_type=type(e).__name__,
|
|
123
|
+
duration_ms=round(duration_ms, 2),
|
|
124
|
+
)
|
|
125
|
+
raise
|
clonebox/monitor.py
ADDED
|
@@ -0,0 +1,267 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Real-time resource monitoring for CloneBox VMs and containers.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import json
|
|
7
|
+
import subprocess
|
|
8
|
+
import time
|
|
9
|
+
from dataclasses import dataclass
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import Any, Dict, List, Optional
|
|
12
|
+
|
|
13
|
+
try:
|
|
14
|
+
import libvirt
|
|
15
|
+
except ImportError:
|
|
16
|
+
libvirt = None
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass
|
|
20
|
+
class VMStats:
|
|
21
|
+
"""VM resource statistics."""
|
|
22
|
+
|
|
23
|
+
name: str
|
|
24
|
+
state: str
|
|
25
|
+
cpu_percent: float
|
|
26
|
+
memory_used_mb: int
|
|
27
|
+
memory_total_mb: int
|
|
28
|
+
disk_used_gb: float
|
|
29
|
+
disk_total_gb: float
|
|
30
|
+
network_rx_bytes: int
|
|
31
|
+
network_tx_bytes: int
|
|
32
|
+
uptime_seconds: int
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@dataclass
|
|
36
|
+
class ContainerStats:
|
|
37
|
+
"""Container resource statistics."""
|
|
38
|
+
|
|
39
|
+
name: str
|
|
40
|
+
state: str
|
|
41
|
+
cpu_percent: float
|
|
42
|
+
memory_used_mb: int
|
|
43
|
+
memory_limit_mb: int
|
|
44
|
+
network_rx_bytes: int
|
|
45
|
+
network_tx_bytes: int
|
|
46
|
+
pids: int
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class ResourceMonitor:
|
|
50
|
+
"""Monitor VM and container resources in real-time."""
|
|
51
|
+
|
|
52
|
+
def __init__(self, conn_uri: str = "qemu:///session"):
|
|
53
|
+
self.conn_uri = conn_uri
|
|
54
|
+
self._conn = None
|
|
55
|
+
self._prev_cpu: Dict[str, tuple] = {}
|
|
56
|
+
|
|
57
|
+
@property
|
|
58
|
+
def conn(self):
|
|
59
|
+
if self._conn is None:
|
|
60
|
+
if libvirt is None:
|
|
61
|
+
raise RuntimeError("libvirt-python not installed")
|
|
62
|
+
self._conn = libvirt.open(self.conn_uri)
|
|
63
|
+
return self._conn
|
|
64
|
+
|
|
65
|
+
def get_vm_stats(self, vm_name: str) -> Optional[VMStats]:
|
|
66
|
+
"""Get resource statistics for a VM."""
|
|
67
|
+
try:
|
|
68
|
+
dom = self.conn.lookupByName(vm_name)
|
|
69
|
+
info = dom.info()
|
|
70
|
+
|
|
71
|
+
state_map = {
|
|
72
|
+
libvirt.VIR_DOMAIN_RUNNING: "running",
|
|
73
|
+
libvirt.VIR_DOMAIN_PAUSED: "paused",
|
|
74
|
+
libvirt.VIR_DOMAIN_SHUTDOWN: "shutdown",
|
|
75
|
+
libvirt.VIR_DOMAIN_SHUTOFF: "shutoff",
|
|
76
|
+
libvirt.VIR_DOMAIN_CRASHED: "crashed",
|
|
77
|
+
}
|
|
78
|
+
state = state_map.get(info[0], "unknown")
|
|
79
|
+
|
|
80
|
+
# Memory
|
|
81
|
+
memory_total_mb = info[1] // 1024
|
|
82
|
+
memory_used_mb = info[2] // 1024 if info[2] > 0 else memory_total_mb
|
|
83
|
+
|
|
84
|
+
# CPU percentage (requires two samples)
|
|
85
|
+
cpu_time = info[4]
|
|
86
|
+
now = time.time()
|
|
87
|
+
cpu_percent = 0.0
|
|
88
|
+
|
|
89
|
+
if vm_name in self._prev_cpu:
|
|
90
|
+
prev_time, prev_cpu = self._prev_cpu[vm_name]
|
|
91
|
+
time_delta = now - prev_time
|
|
92
|
+
if time_delta > 0:
|
|
93
|
+
cpu_delta = cpu_time - prev_cpu
|
|
94
|
+
# CPU time is in nanoseconds
|
|
95
|
+
cpu_percent = (cpu_delta / (time_delta * 1e9)) * 100
|
|
96
|
+
cpu_percent = min(cpu_percent, 100.0 * info[3]) # Cap at vcpus * 100%
|
|
97
|
+
|
|
98
|
+
self._prev_cpu[vm_name] = (now, cpu_time)
|
|
99
|
+
|
|
100
|
+
# Disk stats (from block devices)
|
|
101
|
+
disk_used_gb = 0.0
|
|
102
|
+
disk_total_gb = 0.0
|
|
103
|
+
try:
|
|
104
|
+
xml = dom.XMLDesc()
|
|
105
|
+
import xml.etree.ElementTree as ET
|
|
106
|
+
|
|
107
|
+
root = ET.fromstring(xml)
|
|
108
|
+
for disk in root.findall(".//disk[@type='file']"):
|
|
109
|
+
source = disk.find(".//source")
|
|
110
|
+
if source is not None and source.get("file"):
|
|
111
|
+
disk_path = Path(source.get("file"))
|
|
112
|
+
if disk_path.exists():
|
|
113
|
+
size_bytes = disk_path.stat().st_size
|
|
114
|
+
disk_total_gb += size_bytes / (1024**3)
|
|
115
|
+
# Actual usage requires qemu-img info
|
|
116
|
+
disk_used_gb += size_bytes / (1024**3)
|
|
117
|
+
except Exception:
|
|
118
|
+
pass
|
|
119
|
+
|
|
120
|
+
# Network stats
|
|
121
|
+
network_rx = 0
|
|
122
|
+
network_tx = 0
|
|
123
|
+
try:
|
|
124
|
+
for iface in dom.interfaceAddresses(
|
|
125
|
+
libvirt.VIR_DOMAIN_INTERFACE_ADDRESSES_SRC_AGENT
|
|
126
|
+
).keys():
|
|
127
|
+
stats = dom.interfaceStats(iface)
|
|
128
|
+
network_rx += stats[0]
|
|
129
|
+
network_tx += stats[4]
|
|
130
|
+
except Exception:
|
|
131
|
+
pass
|
|
132
|
+
|
|
133
|
+
return VMStats(
|
|
134
|
+
name=vm_name,
|
|
135
|
+
state=state,
|
|
136
|
+
cpu_percent=cpu_percent,
|
|
137
|
+
memory_used_mb=memory_used_mb,
|
|
138
|
+
memory_total_mb=memory_total_mb,
|
|
139
|
+
disk_used_gb=disk_used_gb,
|
|
140
|
+
disk_total_gb=disk_total_gb,
|
|
141
|
+
network_rx_bytes=network_rx,
|
|
142
|
+
network_tx_bytes=network_tx,
|
|
143
|
+
uptime_seconds=0, # Would need guest agent for accurate uptime
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
except Exception:
|
|
147
|
+
return None
|
|
148
|
+
|
|
149
|
+
def get_all_vm_stats(self) -> List[VMStats]:
|
|
150
|
+
"""Get stats for all VMs."""
|
|
151
|
+
stats = []
|
|
152
|
+
try:
|
|
153
|
+
for dom in self.conn.listAllDomains():
|
|
154
|
+
vm_stats = self.get_vm_stats(dom.name())
|
|
155
|
+
if vm_stats:
|
|
156
|
+
stats.append(vm_stats)
|
|
157
|
+
except Exception:
|
|
158
|
+
pass
|
|
159
|
+
return stats
|
|
160
|
+
|
|
161
|
+
def get_container_stats(self, engine: str = "auto") -> List[ContainerStats]:
|
|
162
|
+
"""Get resource statistics for containers."""
|
|
163
|
+
if engine == "auto":
|
|
164
|
+
engine = "podman" if self._check_engine("podman") else "docker"
|
|
165
|
+
|
|
166
|
+
try:
|
|
167
|
+
result = subprocess.run(
|
|
168
|
+
[engine, "stats", "--no-stream", "--format", "json"],
|
|
169
|
+
capture_output=True,
|
|
170
|
+
text=True,
|
|
171
|
+
timeout=10,
|
|
172
|
+
)
|
|
173
|
+
if result.returncode != 0:
|
|
174
|
+
return []
|
|
175
|
+
|
|
176
|
+
containers = json.loads(result.stdout) if result.stdout.strip() else []
|
|
177
|
+
stats = []
|
|
178
|
+
|
|
179
|
+
for c in containers:
|
|
180
|
+
# Parse CPU percentage
|
|
181
|
+
cpu_str = c.get("CPUPerc", "0%").replace("%", "")
|
|
182
|
+
try:
|
|
183
|
+
cpu_percent = float(cpu_str)
|
|
184
|
+
except ValueError:
|
|
185
|
+
cpu_percent = 0.0
|
|
186
|
+
|
|
187
|
+
# Parse memory
|
|
188
|
+
mem_usage = c.get("MemUsage", "0MiB / 0MiB")
|
|
189
|
+
mem_parts = mem_usage.split("/")
|
|
190
|
+
mem_used = self._parse_memory(mem_parts[0].strip()) if len(mem_parts) > 0 else 0
|
|
191
|
+
mem_limit = self._parse_memory(mem_parts[1].strip()) if len(mem_parts) > 1 else 0
|
|
192
|
+
|
|
193
|
+
# Parse network
|
|
194
|
+
net_io = c.get("NetIO", "0B / 0B")
|
|
195
|
+
net_parts = net_io.split("/")
|
|
196
|
+
net_rx = self._parse_bytes(net_parts[0].strip()) if len(net_parts) > 0 else 0
|
|
197
|
+
net_tx = self._parse_bytes(net_parts[1].strip()) if len(net_parts) > 1 else 0
|
|
198
|
+
|
|
199
|
+
stats.append(
|
|
200
|
+
ContainerStats(
|
|
201
|
+
name=c.get("Name", c.get("Names", "unknown")),
|
|
202
|
+
state="running",
|
|
203
|
+
cpu_percent=cpu_percent,
|
|
204
|
+
memory_used_mb=mem_used,
|
|
205
|
+
memory_limit_mb=mem_limit,
|
|
206
|
+
network_rx_bytes=net_rx,
|
|
207
|
+
network_tx_bytes=net_tx,
|
|
208
|
+
pids=int(c.get("PIDs", 0)),
|
|
209
|
+
)
|
|
210
|
+
)
|
|
211
|
+
|
|
212
|
+
return stats
|
|
213
|
+
|
|
214
|
+
except Exception:
|
|
215
|
+
return []
|
|
216
|
+
|
|
217
|
+
def _check_engine(self, engine: str) -> bool:
|
|
218
|
+
"""Check if container engine is available."""
|
|
219
|
+
try:
|
|
220
|
+
result = subprocess.run([engine, "--version"], capture_output=True, timeout=5)
|
|
221
|
+
return result.returncode == 0
|
|
222
|
+
except Exception:
|
|
223
|
+
return False
|
|
224
|
+
|
|
225
|
+
def _parse_memory(self, mem_str: str) -> int:
|
|
226
|
+
"""Parse memory string like '100MiB' to MB."""
|
|
227
|
+
mem_str = mem_str.upper()
|
|
228
|
+
try:
|
|
229
|
+
if "GIB" in mem_str or "GB" in mem_str:
|
|
230
|
+
return int(float(mem_str.replace("GIB", "").replace("GB", "").strip()) * 1024)
|
|
231
|
+
elif "MIB" in mem_str or "MB" in mem_str:
|
|
232
|
+
return int(float(mem_str.replace("MIB", "").replace("MB", "").strip()))
|
|
233
|
+
elif "KIB" in mem_str or "KB" in mem_str:
|
|
234
|
+
return int(float(mem_str.replace("KIB", "").replace("KB", "").strip()) / 1024)
|
|
235
|
+
else:
|
|
236
|
+
return int(float(mem_str.replace("B", "").strip()) / (1024 * 1024))
|
|
237
|
+
except ValueError:
|
|
238
|
+
return 0
|
|
239
|
+
|
|
240
|
+
def _parse_bytes(self, bytes_str: str) -> int:
|
|
241
|
+
"""Parse byte string like '1.5GB' to bytes."""
|
|
242
|
+
bytes_str = bytes_str.upper()
|
|
243
|
+
try:
|
|
244
|
+
if "GB" in bytes_str:
|
|
245
|
+
return int(float(bytes_str.replace("GB", "").strip()) * 1024**3)
|
|
246
|
+
elif "MB" in bytes_str:
|
|
247
|
+
return int(float(bytes_str.replace("MB", "").strip()) * 1024**2)
|
|
248
|
+
elif "KB" in bytes_str:
|
|
249
|
+
return int(float(bytes_str.replace("KB", "").strip()) * 1024)
|
|
250
|
+
else:
|
|
251
|
+
return int(float(bytes_str.replace("B", "").strip()))
|
|
252
|
+
except ValueError:
|
|
253
|
+
return 0
|
|
254
|
+
|
|
255
|
+
def close(self) -> None:
|
|
256
|
+
if self._conn is not None:
|
|
257
|
+
self._conn.close()
|
|
258
|
+
self._conn = None
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
def format_bytes(num_bytes: int) -> str:
|
|
262
|
+
"""Format bytes to human-readable string."""
|
|
263
|
+
for unit in ["B", "KB", "MB", "GB", "TB"]:
|
|
264
|
+
if abs(num_bytes) < 1024.0:
|
|
265
|
+
return f"{num_bytes:.1f}{unit}"
|
|
266
|
+
num_bytes /= 1024.0
|
|
267
|
+
return f"{num_bytes:.1f}PB"
|
clonebox/p2p.py
CHANGED
|
@@ -14,8 +14,10 @@ class P2PManager:
|
|
|
14
14
|
|
|
15
15
|
def __init__(self, ssh_options: Optional[list] = None):
|
|
16
16
|
self.ssh_options = ssh_options or [
|
|
17
|
-
"-o",
|
|
18
|
-
"
|
|
17
|
+
"-o",
|
|
18
|
+
"StrictHostKeyChecking=no",
|
|
19
|
+
"-o",
|
|
20
|
+
"UserKnownHostsFile=/dev/null",
|
|
19
21
|
]
|
|
20
22
|
|
|
21
23
|
def _run_ssh(self, host: str, command: str) -> subprocess.CompletedProcess:
|
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
"""Resource monitoring system for CloneBox."""
|
|
2
|
+
|
|
3
|
+
import xml.etree.ElementTree as ET
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from datetime import datetime
|
|
6
|
+
from typing import Any, Dict, List, Optional
|
|
7
|
+
|
|
8
|
+
try:
|
|
9
|
+
import libvirt
|
|
10
|
+
except ImportError:
|
|
11
|
+
libvirt = None
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass
|
|
15
|
+
class ResourceUsage:
|
|
16
|
+
"""Current resource usage of a VM."""
|
|
17
|
+
|
|
18
|
+
timestamp: datetime
|
|
19
|
+
cpu_time_ns: int
|
|
20
|
+
cpu_percent: float
|
|
21
|
+
memory_used_bytes: int
|
|
22
|
+
memory_total_bytes: int
|
|
23
|
+
memory_percent: float
|
|
24
|
+
swap_used_bytes: int
|
|
25
|
+
disk_read_bytes: int
|
|
26
|
+
disk_write_bytes: int
|
|
27
|
+
disk_read_requests: int
|
|
28
|
+
disk_write_requests: int
|
|
29
|
+
net_rx_bytes: int
|
|
30
|
+
net_tx_bytes: int
|
|
31
|
+
net_rx_packets: int
|
|
32
|
+
net_tx_packets: int
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class ResourceMonitor:
|
|
36
|
+
"""Monitor VM resource usage using libvirt."""
|
|
37
|
+
|
|
38
|
+
def __init__(self, conn: Optional[Any] = None):
|
|
39
|
+
self.conn = conn
|
|
40
|
+
self._prev_stats: Dict[str, Dict] = {}
|
|
41
|
+
|
|
42
|
+
def get_usage(self, vm_name: str) -> ResourceUsage:
|
|
43
|
+
"""Get current resource usage for a VM."""
|
|
44
|
+
if not self.conn:
|
|
45
|
+
raise RuntimeError("libvirt connection not available")
|
|
46
|
+
|
|
47
|
+
domain = self.conn.lookupByName(vm_name)
|
|
48
|
+
if not domain.isActive():
|
|
49
|
+
raise RuntimeError(f"VM '{vm_name}' is not running")
|
|
50
|
+
|
|
51
|
+
# CPU stats
|
|
52
|
+
cpu_stats = domain.getCPUStats(True)[0]
|
|
53
|
+
cpu_time = cpu_stats.get("cpu_time", 0)
|
|
54
|
+
cpu_percent = self._calculate_cpu_percent(vm_name, cpu_time)
|
|
55
|
+
|
|
56
|
+
# Memory stats
|
|
57
|
+
# Need to ensure memory balloon driver is active for accurate stats
|
|
58
|
+
mem_stats = domain.memoryStats()
|
|
59
|
+
memory_used = mem_stats.get("rss", 0) * 1024 # RSS is often most accurate for host view
|
|
60
|
+
memory_total = mem_stats.get("actual", 1) * 1024
|
|
61
|
+
if "unused" in mem_stats:
|
|
62
|
+
memory_used = (mem_stats["actual"] - mem_stats["unused"]) * 1024
|
|
63
|
+
|
|
64
|
+
memory_percent = (memory_used / memory_total * 100) if memory_total else 0
|
|
65
|
+
swap_used = mem_stats.get("swap_in", 0) * 1024
|
|
66
|
+
|
|
67
|
+
# Block and Network stats
|
|
68
|
+
disk_stats = self._get_disk_stats(domain)
|
|
69
|
+
net_stats = self._get_network_stats(domain)
|
|
70
|
+
|
|
71
|
+
return ResourceUsage(
|
|
72
|
+
timestamp=datetime.now(),
|
|
73
|
+
cpu_time_ns=cpu_time,
|
|
74
|
+
cpu_percent=cpu_percent,
|
|
75
|
+
memory_used_bytes=memory_used,
|
|
76
|
+
memory_total_bytes=memory_total,
|
|
77
|
+
memory_percent=memory_percent,
|
|
78
|
+
swap_used_bytes=swap_used,
|
|
79
|
+
**disk_stats,
|
|
80
|
+
**net_stats,
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
def _calculate_cpu_percent(self, vm_name: str, cpu_time: int) -> float:
|
|
84
|
+
"""Calculate CPU percentage from time delta."""
|
|
85
|
+
import time
|
|
86
|
+
|
|
87
|
+
now = time.time()
|
|
88
|
+
prev = self._prev_stats.get(vm_name, {})
|
|
89
|
+
prev_time = prev.get("cpu_time", cpu_time)
|
|
90
|
+
prev_timestamp = prev.get("timestamp", now)
|
|
91
|
+
|
|
92
|
+
# Update stored stats
|
|
93
|
+
self._prev_stats[vm_name] = {
|
|
94
|
+
"cpu_time": cpu_time,
|
|
95
|
+
"timestamp": now,
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
time_delta = now - prev_timestamp
|
|
99
|
+
if time_delta <= 0:
|
|
100
|
+
return 0.0
|
|
101
|
+
|
|
102
|
+
cpu_delta = cpu_time - prev_time
|
|
103
|
+
# cpu_time is in nanoseconds, time_delta in seconds
|
|
104
|
+
# (delta_ns / (delta_sec * 1e9)) * 100
|
|
105
|
+
return (cpu_delta / (time_delta * 1e9)) * 100
|
|
106
|
+
|
|
107
|
+
def _get_disk_stats(self, domain) -> Dict[str, int]:
|
|
108
|
+
"""Get aggregated disk stats."""
|
|
109
|
+
stats = {
|
|
110
|
+
"disk_read_bytes": 0,
|
|
111
|
+
"disk_write_bytes": 0,
|
|
112
|
+
"disk_read_requests": 0,
|
|
113
|
+
"disk_write_requests": 0,
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
xml = domain.XMLDesc()
|
|
117
|
+
tree = ET.fromstring(xml)
|
|
118
|
+
|
|
119
|
+
for disk in tree.findall(".//disk"):
|
|
120
|
+
target = disk.find("target")
|
|
121
|
+
if target is not None:
|
|
122
|
+
dev = target.get("dev")
|
|
123
|
+
try:
|
|
124
|
+
# blockStats returns: (read_req, read_bytes, write_req, write_bytes, errs)
|
|
125
|
+
ds = domain.blockStats(dev)
|
|
126
|
+
stats["disk_read_requests"] += ds[0]
|
|
127
|
+
stats["disk_read_bytes"] += ds[1]
|
|
128
|
+
stats["disk_write_requests"] += ds[2]
|
|
129
|
+
stats["disk_write_bytes"] += ds[3]
|
|
130
|
+
except Exception:
|
|
131
|
+
continue
|
|
132
|
+
|
|
133
|
+
return stats
|
|
134
|
+
|
|
135
|
+
def _get_network_stats(self, domain) -> Dict[str, int]:
|
|
136
|
+
"""Get aggregated network stats."""
|
|
137
|
+
stats = {
|
|
138
|
+
"net_rx_bytes": 0,
|
|
139
|
+
"net_tx_bytes": 0,
|
|
140
|
+
"net_rx_packets": 0,
|
|
141
|
+
"net_tx_packets": 0,
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
xml = domain.XMLDesc()
|
|
145
|
+
tree = ET.fromstring(xml)
|
|
146
|
+
|
|
147
|
+
for iface in tree.findall(".//interface"):
|
|
148
|
+
target = iface.find("target")
|
|
149
|
+
if target is not None:
|
|
150
|
+
dev = target.get("dev")
|
|
151
|
+
try:
|
|
152
|
+
# interfaceStats returns: (rx_bytes, rx_packets, rx_errs, rx_drop,
|
|
153
|
+
# tx_bytes, tx_packets, tx_errs, tx_drop)
|
|
154
|
+
ns = domain.interfaceStats(dev)
|
|
155
|
+
stats["net_rx_bytes"] += ns[0]
|
|
156
|
+
stats["net_rx_packets"] += ns[1]
|
|
157
|
+
stats["net_tx_bytes"] += ns[4]
|
|
158
|
+
stats["net_tx_packets"] += ns[5]
|
|
159
|
+
except Exception:
|
|
160
|
+
continue
|
|
161
|
+
|
|
162
|
+
return stats
|