maqet 0.0.1.4__py3-none-any.whl → 0.0.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- maqet/__init__.py +50 -6
- maqet/__main__.py +96 -0
- maqet/__version__.py +3 -0
- maqet/api/__init__.py +35 -0
- maqet/api/decorators.py +184 -0
- maqet/api/metadata.py +147 -0
- maqet/api/registry.py +182 -0
- maqet/cli.py +71 -0
- maqet/config/__init__.py +26 -0
- maqet/config/merger.py +237 -0
- maqet/config/parser.py +198 -0
- maqet/config/validators.py +519 -0
- maqet/config_handlers.py +684 -0
- maqet/constants.py +200 -0
- maqet/exceptions.py +226 -0
- maqet/formatters.py +294 -0
- maqet/generators/__init__.py +12 -0
- maqet/generators/base_generator.py +101 -0
- maqet/generators/cli_generator.py +635 -0
- maqet/generators/python_generator.py +247 -0
- maqet/generators/rest_generator.py +58 -0
- maqet/handlers/__init__.py +12 -0
- maqet/handlers/base.py +108 -0
- maqet/handlers/init.py +147 -0
- maqet/handlers/stage.py +196 -0
- maqet/ipc/__init__.py +29 -0
- maqet/ipc/retry.py +265 -0
- maqet/ipc/runner_client.py +285 -0
- maqet/ipc/unix_socket_server.py +239 -0
- maqet/logger.py +160 -55
- maqet/machine.py +884 -0
- maqet/managers/__init__.py +7 -0
- maqet/managers/qmp_manager.py +333 -0
- maqet/managers/snapshot_coordinator.py +327 -0
- maqet/managers/vm_manager.py +683 -0
- maqet/maqet.py +1120 -0
- maqet/os_interactions.py +46 -0
- maqet/process_spawner.py +395 -0
- maqet/qemu_args.py +76 -0
- maqet/qmp/__init__.py +10 -0
- maqet/qmp/commands.py +92 -0
- maqet/qmp/keyboard.py +311 -0
- maqet/qmp/qmp.py +17 -0
- maqet/snapshot.py +473 -0
- maqet/state.py +958 -0
- maqet/storage.py +702 -162
- maqet/validation/__init__.py +9 -0
- maqet/validation/config_validator.py +170 -0
- maqet/vm_runner.py +523 -0
- maqet-0.0.5.dist-info/METADATA +237 -0
- maqet-0.0.5.dist-info/RECORD +55 -0
- {maqet-0.0.1.4.dist-info → maqet-0.0.5.dist-info}/WHEEL +1 -1
- maqet-0.0.5.dist-info/entry_points.txt +2 -0
- maqet-0.0.5.dist-info/licenses/LICENSE +21 -0
- {maqet-0.0.1.4.dist-info → maqet-0.0.5.dist-info}/top_level.txt +0 -1
- maqet/core.py +0 -411
- maqet/functions.py +0 -104
- maqet-0.0.1.4.dist-info/METADATA +0 -6
- maqet-0.0.1.4.dist-info/RECORD +0 -33
- qemu/machine/__init__.py +0 -36
- qemu/machine/console_socket.py +0 -142
- qemu/machine/machine.py +0 -954
- qemu/machine/py.typed +0 -0
- qemu/machine/qtest.py +0 -191
- qemu/qmp/__init__.py +0 -59
- qemu/qmp/error.py +0 -50
- qemu/qmp/events.py +0 -717
- qemu/qmp/legacy.py +0 -319
- qemu/qmp/message.py +0 -209
- qemu/qmp/models.py +0 -146
- qemu/qmp/protocol.py +0 -1057
- qemu/qmp/py.typed +0 -0
- qemu/qmp/qmp_client.py +0 -655
- qemu/qmp/qmp_shell.py +0 -618
- qemu/qmp/qmp_tui.py +0 -655
- qemu/qmp/util.py +0 -219
- qemu/utils/__init__.py +0 -162
- qemu/utils/accel.py +0 -84
- qemu/utils/py.typed +0 -0
- qemu/utils/qemu_ga_client.py +0 -323
- qemu/utils/qom.py +0 -273
- qemu/utils/qom_common.py +0 -175
- qemu/utils/qom_fuse.py +0 -207
@@ -0,0 +1,170 @@
|
|
1
|
+
"""
|
2
|
+
Runtime Configuration Validator for MAQET.
|
3
|
+
|
4
|
+
Performs runtime validation and health checks before starting QEMU instances.
|
5
|
+
Delegates schema validation to config.validators module to avoid duplication.
|
6
|
+
|
7
|
+
This validator focuses on runtime concerns:
|
8
|
+
- Binary health checks (QEMU binary actually works)
|
9
|
+
- Tool availability (qemu-img installed)
|
10
|
+
- System resource validation
|
11
|
+
|
12
|
+
For schema/structure validation, see maqet.config.validators module.
|
13
|
+
"""
|
14
|
+
|
15
|
+
import subprocess
|
16
|
+
from pathlib import Path
|
17
|
+
from typing import Any, Dict
|
18
|
+
|
19
|
+
from ..constants import Timeouts
|
20
|
+
from ..logger import LOG
|
21
|
+
|
22
|
+
|
23
|
+
class ConfigValidationError(Exception):
|
24
|
+
"""Configuration validation errors."""
|
25
|
+
|
26
|
+
|
27
|
+
class ConfigValidator:
|
28
|
+
"""
|
29
|
+
Runtime validator for VM configuration.
|
30
|
+
|
31
|
+
This validator performs runtime health checks before starting VMs.
|
32
|
+
It delegates schema validation to the config.validators module to
|
33
|
+
avoid code duplication.
|
34
|
+
|
35
|
+
Separation of Concerns:
|
36
|
+
- config.validators: Schema validation + value normalization
|
37
|
+
- validation.ConfigValidator: Runtime health checks + pre-start validation
|
38
|
+
|
39
|
+
Use config.validators for:
|
40
|
+
- Validating config structure and types
|
41
|
+
- Normalizing values (e.g., bytes to "4G")
|
42
|
+
- Cross-field validation
|
43
|
+
|
44
|
+
Use validation.ConfigValidator for:
|
45
|
+
- Binary health checks (qemu-system-x86_64 --version works)
|
46
|
+
- Tool availability checks (qemu-img installed)
|
47
|
+
- Pre-start validation orchestration
|
48
|
+
|
49
|
+
Extracted from Machine class to follow single-responsibility principle.
|
50
|
+
"""
|
51
|
+
|
52
|
+
def validate_config(self, config_data: Dict[str, Any]) -> None:
|
53
|
+
"""
|
54
|
+
Validate VM configuration data using schema validator.
|
55
|
+
|
56
|
+
Delegates to config.validators.validate_config_data() for schema
|
57
|
+
validation, then performs any additional runtime checks if needed.
|
58
|
+
|
59
|
+
Args:
|
60
|
+
config_data: VM configuration dictionary
|
61
|
+
|
62
|
+
Raises:
|
63
|
+
ConfigValidationError: If configuration is invalid
|
64
|
+
"""
|
65
|
+
# Import schema validator to avoid circular dependency
|
66
|
+
from ..config.validators import (
|
67
|
+
ConfigValidationError as SchemaValidationError,
|
68
|
+
)
|
69
|
+
from ..config.validators import validate_config_data
|
70
|
+
|
71
|
+
try:
|
72
|
+
# Delegate to schema validator for structure/format validation
|
73
|
+
validate_config_data(config_data)
|
74
|
+
except SchemaValidationError as e:
|
75
|
+
# Re-raise as our own exception type for consistency
|
76
|
+
raise ConfigValidationError(str(e))
|
77
|
+
|
78
|
+
def validate_binary_health(self, binary: str) -> None:
|
79
|
+
"""
|
80
|
+
Perform health check on QEMU binary.
|
81
|
+
|
82
|
+
Verifies binary works by running --version command.
|
83
|
+
|
84
|
+
Args:
|
85
|
+
binary: Path to QEMU binary
|
86
|
+
|
87
|
+
Raises:
|
88
|
+
ConfigValidationError: If binary health check fails
|
89
|
+
"""
|
90
|
+
binary_path = Path(binary)
|
91
|
+
|
92
|
+
if not binary_path.exists():
|
93
|
+
raise ConfigValidationError(f"QEMU binary not found: {binary}")
|
94
|
+
|
95
|
+
# Health check: Verify binary works by running --version
|
96
|
+
try:
|
97
|
+
result = subprocess.run(
|
98
|
+
[str(binary_path), '--version'],
|
99
|
+
capture_output=True,
|
100
|
+
text=True,
|
101
|
+
timeout=Timeouts.BINARY_VERSION_CHECK,
|
102
|
+
)
|
103
|
+
if result.returncode != 0:
|
104
|
+
raise ConfigValidationError(
|
105
|
+
f"QEMU binary failed health check: {binary}\n"
|
106
|
+
f"Error: {result.stderr.strip()}"
|
107
|
+
)
|
108
|
+
LOG.debug(f"QEMU binary health check passed: {binary}")
|
109
|
+
|
110
|
+
except FileNotFoundError:
|
111
|
+
raise ConfigValidationError(
|
112
|
+
f"QEMU binary not executable: {binary}\n"
|
113
|
+
f"Check file permissions and ensure it's a valid binary."
|
114
|
+
)
|
115
|
+
except subprocess.TimeoutExpired:
|
116
|
+
raise ConfigValidationError(
|
117
|
+
f"QEMU binary health check timed out: {binary}\n"
|
118
|
+
f"Binary may be hung or unresponsive."
|
119
|
+
)
|
120
|
+
except Exception as e:
|
121
|
+
raise ConfigValidationError(
|
122
|
+
f"QEMU binary validation failed: {binary}\n"
|
123
|
+
f"Error: {e}"
|
124
|
+
)
|
125
|
+
|
126
|
+
def validate_qemu_img_available(self) -> None:
|
127
|
+
"""
|
128
|
+
Verify qemu-img tool is available for storage operations.
|
129
|
+
|
130
|
+
Logs warning if qemu-img is not found (storage auto-creation may fail).
|
131
|
+
"""
|
132
|
+
try:
|
133
|
+
subprocess.run(
|
134
|
+
["qemu-img", "--version"],
|
135
|
+
capture_output=True,
|
136
|
+
check=True,
|
137
|
+
timeout=Timeouts.BINARY_VERSION_CHECK,
|
138
|
+
)
|
139
|
+
LOG.debug("qemu-img utility found and working")
|
140
|
+
except (
|
141
|
+
subprocess.CalledProcessError,
|
142
|
+
FileNotFoundError,
|
143
|
+
subprocess.TimeoutExpired,
|
144
|
+
):
|
145
|
+
LOG.warning(
|
146
|
+
"qemu-img not found - storage auto-creation may fail. "
|
147
|
+
"Install QEMU tools (qemu-utils or qemu-img package)."
|
148
|
+
)
|
149
|
+
|
150
|
+
def pre_start_validation(self, config_data: Dict[str, Any]) -> None:
|
151
|
+
"""
|
152
|
+
Perform all pre-start validation checks.
|
153
|
+
|
154
|
+
Combines binary health check and qemu-img availability check.
|
155
|
+
Called immediately before starting VM.
|
156
|
+
|
157
|
+
Args:
|
158
|
+
config_data: VM configuration dictionary
|
159
|
+
|
160
|
+
Raises:
|
161
|
+
ConfigValidationError: If any validation check fails
|
162
|
+
"""
|
163
|
+
# Get binary path (use default if not specified)
|
164
|
+
binary = config_data.get("binary", "/usr/bin/qemu-system-x86_64")
|
165
|
+
|
166
|
+
# Perform binary health check
|
167
|
+
self.validate_binary_health(binary)
|
168
|
+
|
169
|
+
# Check qemu-img availability (warning only)
|
170
|
+
self.validate_qemu_img_available()
|
maqet/vm_runner.py
ADDED
@@ -0,0 +1,523 @@
|
|
1
|
+
"""
|
2
|
+
VM Runner Process
|
3
|
+
|
4
|
+
Long-running process that manages a single VM's lifecycle.
|
5
|
+
Each VM gets its own persistent Python process running an instance of VMRunner.
|
6
|
+
|
7
|
+
Responsibilities:
|
8
|
+
- Start and monitor QEMU process
|
9
|
+
- Handle QMP communication via QEMUMachine instance
|
10
|
+
- Provide IPC server for CLI commands
|
11
|
+
- Perform periodic DB consistency checks
|
12
|
+
- Handle graceful shutdown on QEMU exit or stop command
|
13
|
+
|
14
|
+
Architecture:
|
15
|
+
One VM = One VMRunner process = One QEMUMachine instance
|
16
|
+
No daemon, no shared state. DB is single source of truth.
|
17
|
+
"""
|
18
|
+
|
19
|
+
import asyncio
|
20
|
+
import json
|
21
|
+
import os
|
22
|
+
import signal
|
23
|
+
import sys
|
24
|
+
import threading
|
25
|
+
import time
|
26
|
+
from pathlib import Path
|
27
|
+
from typing import Any, Dict, Optional
|
28
|
+
|
29
|
+
from .constants import Intervals, Timeouts
|
30
|
+
from .logger import LOG
|
31
|
+
from .machine import Machine
|
32
|
+
from .state import StateManager, VMInstance
|
33
|
+
|
34
|
+
|
35
|
+
class VMRunnerError(Exception):
|
36
|
+
"""VM runner related errors."""
|
37
|
+
|
38
|
+
|
39
|
+
class VMRunner:
|
40
|
+
"""
|
41
|
+
Long-running process that manages a single VM's lifecycle.
|
42
|
+
|
43
|
+
Each VM gets its own persistent Python process with VMRunner instance.
|
44
|
+
The runner creates a Machine (QEMUMachine wrapper) and keeps it alive
|
45
|
+
while the VM is running. Provides IPC server for CLI communication.
|
46
|
+
"""
|
47
|
+
|
48
|
+
def __init__(self, vm_id: str, db_path: Optional[Path] = None):
|
49
|
+
"""
|
50
|
+
Initialize VM runner for specific VM.
|
51
|
+
|
52
|
+
Args:
|
53
|
+
vm_id: VM identifier
|
54
|
+
db_path: Optional path to database (for testing)
|
55
|
+
"""
|
56
|
+
self.vm_id = vm_id
|
57
|
+
self.db_path = db_path
|
58
|
+
self.machine: Optional[Machine] = None
|
59
|
+
self.ipc_server = None
|
60
|
+
self.socket_path: Optional[Path] = None
|
61
|
+
self.state_manager: Optional[StateManager] = None
|
62
|
+
|
63
|
+
# Thread-safe stop event
|
64
|
+
self._stop_event = threading.Event()
|
65
|
+
|
66
|
+
# Setup signal handlers
|
67
|
+
signal.signal(signal.SIGTERM, self._handle_signal)
|
68
|
+
signal.signal(signal.SIGINT, self._handle_signal)
|
69
|
+
|
70
|
+
LOG.debug(f"VMRunner initialized for VM {vm_id}")
|
71
|
+
|
72
|
+
def start(self) -> None:
|
73
|
+
"""
|
74
|
+
Initialize VM runner, start VM, run event loop.
|
75
|
+
|
76
|
+
Process:
|
77
|
+
1. Load VM from database
|
78
|
+
2. Start QEMU via Machine
|
79
|
+
3. Update DB with runner PID and socket path
|
80
|
+
4. Start IPC server
|
81
|
+
5. Run event loop (monitor QEMU, handle IPC, check DB)
|
82
|
+
"""
|
83
|
+
try:
|
84
|
+
# Initialize state manager
|
85
|
+
data_dir = None
|
86
|
+
if self.db_path:
|
87
|
+
data_dir = str(self.db_path.parent)
|
88
|
+
self.state_manager = StateManager(data_dir)
|
89
|
+
|
90
|
+
# Load VM from database
|
91
|
+
vm = self.state_manager.get_vm(self.vm_id)
|
92
|
+
if not vm:
|
93
|
+
LOG.error(f"VM {self.vm_id} not found in database")
|
94
|
+
sys.exit(1)
|
95
|
+
|
96
|
+
LOG.info(f"Starting VM runner for {self.vm_id}")
|
97
|
+
|
98
|
+
# SIMPLE SOLUTION: Use context manager!
|
99
|
+
# When this block exits (normally or via crash/exception),
|
100
|
+
# Machine.__exit__() is AUTOMATICALLY called and QEMU is stopped.
|
101
|
+
# This is Python's built-in reliability mechanism - no complex cleanup needed!
|
102
|
+
with Machine(
|
103
|
+
vm_id=self.vm_id,
|
104
|
+
config_data=vm.config_data,
|
105
|
+
state_manager=self.state_manager,
|
106
|
+
) as self.machine:
|
107
|
+
# Start QEMU process
|
108
|
+
self.machine.start()
|
109
|
+
|
110
|
+
# Get QEMU PID
|
111
|
+
qemu_pid = self.machine.pid
|
112
|
+
if not qemu_pid:
|
113
|
+
raise VMRunnerError("Failed to get QEMU PID after start")
|
114
|
+
|
115
|
+
LOG.info(f"QEMU started with PID {qemu_pid}")
|
116
|
+
|
117
|
+
# Update database with runner PID and socket path
|
118
|
+
self.socket_path = self._get_socket_path()
|
119
|
+
self.state_manager.update_vm_status(
|
120
|
+
self.vm_id,
|
121
|
+
status="running",
|
122
|
+
pid=qemu_pid,
|
123
|
+
runner_pid=os.getpid(),
|
124
|
+
socket_path=str(self.socket_path),
|
125
|
+
)
|
126
|
+
|
127
|
+
LOG.debug(
|
128
|
+
f"Updated DB: runner_pid={os.getpid()}, "
|
129
|
+
f"qemu_pid={qemu_pid}, socket={self.socket_path}"
|
130
|
+
)
|
131
|
+
|
132
|
+
# Start IPC server in background thread
|
133
|
+
from .ipc.unix_socket_server import UnixSocketIPCServer
|
134
|
+
import threading
|
135
|
+
|
136
|
+
self.ipc_server = UnixSocketIPCServer(
|
137
|
+
socket_path=self.socket_path, handler=self._handle_ipc_request
|
138
|
+
)
|
139
|
+
|
140
|
+
# Run IPC server in separate thread (it's blocking)
|
141
|
+
def run_ipc_server():
|
142
|
+
try:
|
143
|
+
asyncio.run(self.ipc_server.start())
|
144
|
+
except Exception as e:
|
145
|
+
LOG.error(f"IPC server error: {e}")
|
146
|
+
|
147
|
+
self.ipc_thread = threading.Thread(target=run_ipc_server, daemon=True)
|
148
|
+
self.ipc_thread.start()
|
149
|
+
|
150
|
+
# Wait for socket to be created
|
151
|
+
timeout = Timeouts.IPC_SOCKET_WAIT
|
152
|
+
start = time.time()
|
153
|
+
while not self.socket_path.exists() and time.time() - start < timeout:
|
154
|
+
time.sleep(Intervals.VM_HEALTH_CHECK)
|
155
|
+
|
156
|
+
if not self.socket_path.exists():
|
157
|
+
raise VMRunnerError("IPC server failed to start")
|
158
|
+
|
159
|
+
LOG.info(f"IPC server started on {self.socket_path}")
|
160
|
+
|
161
|
+
# Run event loop
|
162
|
+
self._run_event_loop()
|
163
|
+
|
164
|
+
# Context manager exit: Machine.__exit__() called automatically
|
165
|
+
# QEMU is stopped gracefully - no orphaned processes!
|
166
|
+
LOG.info(f"VM runner for {self.vm_id} exiting cleanly")
|
167
|
+
sys.exit(0)
|
168
|
+
|
169
|
+
except Exception as e:
|
170
|
+
LOG.error(f"VM runner failed to start: {e}")
|
171
|
+
self._cleanup()
|
172
|
+
sys.exit(1)
|
173
|
+
|
174
|
+
def _run_event_loop(self) -> None:
|
175
|
+
"""
|
176
|
+
Main event loop: monitor QEMU, handle IPC, check DB.
|
177
|
+
|
178
|
+
Loop tasks:
|
179
|
+
- Check if QEMU process still running
|
180
|
+
- Process IPC requests (non-blocking)
|
181
|
+
- Periodic DB consistency check (every 5 seconds)
|
182
|
+
- Exit on QEMU exit or DB stop command
|
183
|
+
"""
|
184
|
+
LOG.debug("Entering event loop")
|
185
|
+
last_db_check = time.time()
|
186
|
+
db_check_interval = 5 # Check DB every 5 seconds
|
187
|
+
|
188
|
+
while not self._stop_event.is_set():
|
189
|
+
try:
|
190
|
+
# Check if QEMU process still running
|
191
|
+
if not self._is_qemu_running():
|
192
|
+
LOG.warning("QEMU process exited")
|
193
|
+
self._handle_qemu_exit()
|
194
|
+
break
|
195
|
+
|
196
|
+
# Periodic DB state check (detect drift)
|
197
|
+
if time.time() - last_db_check >= db_check_interval:
|
198
|
+
if not self._check_db_consistency():
|
199
|
+
LOG.warning("DB consistency check failed, stopping")
|
200
|
+
self._handle_db_stop_command()
|
201
|
+
break
|
202
|
+
last_db_check = time.time()
|
203
|
+
|
204
|
+
# Sleep to avoid busy loop (short for faster detection)
|
205
|
+
time.sleep(Intervals.EVENT_LOOP_SLEEP)
|
206
|
+
|
207
|
+
except Exception as e:
|
208
|
+
LOG.error(f"Error in event loop: {e}")
|
209
|
+
self._stop_event.set()
|
210
|
+
|
211
|
+
# Event loop exited
|
212
|
+
LOG.debug("Exiting event loop")
|
213
|
+
# Note: QEMU cleanup is handled by context manager (Machine.__exit__)
|
214
|
+
# We just need to clean up IPC resources
|
215
|
+
self._cleanup()
|
216
|
+
|
217
|
+
def _is_qemu_running(self) -> bool:
|
218
|
+
"""
|
219
|
+
Check if QEMU process is still alive.
|
220
|
+
|
221
|
+
Uses both Machine.is_running() and explicit PID check for reliability.
|
222
|
+
|
223
|
+
Returns:
|
224
|
+
True if QEMU process is running, False otherwise
|
225
|
+
"""
|
226
|
+
if not self.machine:
|
227
|
+
return False
|
228
|
+
|
229
|
+
# First check Machine's is_running property
|
230
|
+
if not self.machine.is_running:
|
231
|
+
return False
|
232
|
+
|
233
|
+
# Double-check PID exists (more reliable)
|
234
|
+
qemu_pid = self.machine.pid
|
235
|
+
if not qemu_pid:
|
236
|
+
return False
|
237
|
+
|
238
|
+
# Verify PID is alive
|
239
|
+
try:
|
240
|
+
os.kill(qemu_pid, 0) # Signal 0 = check if process exists
|
241
|
+
return True
|
242
|
+
except ProcessLookupError:
|
243
|
+
return False
|
244
|
+
except PermissionError:
|
245
|
+
# Process exists but we can't signal it (shouldn't happen for our own process)
|
246
|
+
return True
|
247
|
+
|
248
|
+
def _check_db_consistency(self) -> bool:
|
249
|
+
"""
|
250
|
+
Check if DB state matches reality (detect drift).
|
251
|
+
|
252
|
+
Consistency checks:
|
253
|
+
1. VM deleted from DB → stop runner
|
254
|
+
2. DB status=stopped → stop runner
|
255
|
+
3. DB has different runner_pid → stop runner (another runner started)
|
256
|
+
4. QEMU PID mismatch → update DB
|
257
|
+
|
258
|
+
Returns:
|
259
|
+
True if consistent (continue running)
|
260
|
+
False if inconsistent (should exit)
|
261
|
+
"""
|
262
|
+
try:
|
263
|
+
vm = self.state_manager.get_vm(self.vm_id)
|
264
|
+
|
265
|
+
# Check 1: VM deleted from DB
|
266
|
+
if not vm:
|
267
|
+
LOG.warning(f"VM {self.vm_id} deleted from DB")
|
268
|
+
return False
|
269
|
+
|
270
|
+
# Check 2: DB says stopped
|
271
|
+
if vm.status == "stopped":
|
272
|
+
LOG.warning(f"DB indicates {self.vm_id} status is stopped")
|
273
|
+
return False
|
274
|
+
|
275
|
+
# Check 3: DB has different runner PID
|
276
|
+
if vm.runner_pid and vm.runner_pid != os.getpid():
|
277
|
+
LOG.warning(
|
278
|
+
f"DB has different runner PID ({vm.runner_pid} "
|
279
|
+
f"vs {os.getpid()})"
|
280
|
+
)
|
281
|
+
return False
|
282
|
+
|
283
|
+
# Check 4: QEMU PID mismatch (update DB if needed)
|
284
|
+
current_qemu_pid = self.machine.pid if self.machine else None
|
285
|
+
if vm.pid != current_qemu_pid:
|
286
|
+
LOG.debug(
|
287
|
+
f"QEMU PID mismatch, updating DB "
|
288
|
+
f"({vm.pid} -> {current_qemu_pid})"
|
289
|
+
)
|
290
|
+
self.state_manager.update_vm_status(
|
291
|
+
self.vm_id,
|
292
|
+
status=vm.status,
|
293
|
+
pid=current_qemu_pid,
|
294
|
+
runner_pid=vm.runner_pid,
|
295
|
+
socket_path=vm.socket_path,
|
296
|
+
)
|
297
|
+
|
298
|
+
return True # All consistent
|
299
|
+
|
300
|
+
except Exception as e:
|
301
|
+
LOG.error(f"Error checking DB consistency: {e}")
|
302
|
+
return False
|
303
|
+
|
304
|
+
def _handle_qemu_exit(self) -> None:
|
305
|
+
"""
|
306
|
+
QEMU process exited, cleanup and exit runner.
|
307
|
+
|
308
|
+
Called when QEMU process is detected as not running.
|
309
|
+
Updates database and stops runner process.
|
310
|
+
"""
|
311
|
+
LOG.info(f"QEMU for {self.vm_id} exited, cleaning up")
|
312
|
+
|
313
|
+
# Update database
|
314
|
+
try:
|
315
|
+
self.state_manager.update_vm_status(
|
316
|
+
self.vm_id,
|
317
|
+
status="stopped",
|
318
|
+
pid=None,
|
319
|
+
runner_pid=None,
|
320
|
+
socket_path=None,
|
321
|
+
)
|
322
|
+
except Exception as e:
|
323
|
+
LOG.error(f"Failed to update DB after QEMU exit: {e}")
|
324
|
+
|
325
|
+
self._stop_event.set()
|
326
|
+
|
327
|
+
def _handle_db_stop_command(self) -> None:
|
328
|
+
"""
|
329
|
+
DB says VM should be stopped (drift or manual stop).
|
330
|
+
|
331
|
+
Called when DB consistency check detects stop condition.
|
332
|
+
Stops QEMU gracefully and exits runner.
|
333
|
+
"""
|
334
|
+
LOG.info(f"DB indicates {self.vm_id} should stop, shutting down")
|
335
|
+
|
336
|
+
# Stop QEMU using Machine.stop() (handles graceful + force)
|
337
|
+
if self.machine and self._is_qemu_running():
|
338
|
+
try:
|
339
|
+
self.machine.stop(force=False, timeout=Timeouts.VM_GRACEFUL_SHUTDOWN)
|
340
|
+
except Exception as e:
|
341
|
+
LOG.error(f"Failed to stop QEMU: {e}")
|
342
|
+
|
343
|
+
self._stop_event.set()
|
344
|
+
|
345
|
+
async def _handle_ipc_request(self, request: Dict[str, Any]) -> Dict[str, Any]:
|
346
|
+
"""
|
347
|
+
Handle IPC request from CLI.
|
348
|
+
|
349
|
+
Args:
|
350
|
+
request: {
|
351
|
+
"method": "qmp" | "stop" | "status" | "ping",
|
352
|
+
"args": [...],
|
353
|
+
"kwargs": {...}
|
354
|
+
}
|
355
|
+
|
356
|
+
Returns:
|
357
|
+
{"status": "success", "result": ...} or
|
358
|
+
{"status": "error", "error": ...}
|
359
|
+
"""
|
360
|
+
method_name = request.get("method")
|
361
|
+
args = request.get("args", [])
|
362
|
+
kwargs = request.get("kwargs", {})
|
363
|
+
|
364
|
+
LOG.debug(f"IPC request: method={method_name}, args={args}")
|
365
|
+
|
366
|
+
try:
|
367
|
+
# Special methods handled directly
|
368
|
+
if method_name == "qmp":
|
369
|
+
# QMP command: args = [command], kwargs = QMP arguments
|
370
|
+
if not args:
|
371
|
+
return {"status": "error", "error": "QMP command required"}
|
372
|
+
|
373
|
+
command = args[0]
|
374
|
+
result = self.machine.qmp(command, **kwargs)
|
375
|
+
return {"status": "success", "result": result}
|
376
|
+
|
377
|
+
elif method_name == "stop":
|
378
|
+
# Signal runner to stop (event loop will handle QEMU shutdown)
|
379
|
+
LOG.info(f"Stop command received via IPC for {self.vm_id}")
|
380
|
+
|
381
|
+
# Don't stop QEMU here - avoid asyncio conflicts
|
382
|
+
# Just signal event loop to exit, it will handle cleanup
|
383
|
+
self._stop_event.set()
|
384
|
+
|
385
|
+
return {"status": "success", "result": "VM stopping"}
|
386
|
+
|
387
|
+
elif method_name == "status":
|
388
|
+
# Get VM status
|
389
|
+
status = {
|
390
|
+
"vm_id": self.vm_id,
|
391
|
+
"qemu_pid": self.machine.pid if self.machine else None,
|
392
|
+
"runner_pid": os.getpid(),
|
393
|
+
"running": self._is_qemu_running(),
|
394
|
+
"socket_path": str(self.socket_path) if self.socket_path else None,
|
395
|
+
}
|
396
|
+
return {"status": "success", "result": status}
|
397
|
+
|
398
|
+
elif method_name == "ping":
|
399
|
+
# Health check
|
400
|
+
return {"status": "success", "result": "pong"}
|
401
|
+
|
402
|
+
else:
|
403
|
+
return {"status": "error", "error": f"Unknown method: {method_name}"}
|
404
|
+
|
405
|
+
except Exception as e:
|
406
|
+
LOG.error(f"Error handling IPC request: {e}")
|
407
|
+
return {"status": "error", "error": str(e)}
|
408
|
+
|
409
|
+
def _handle_signal(self, signum: int, frame) -> None:
|
410
|
+
"""
|
411
|
+
Handle termination signals (SIGTERM, SIGINT).
|
412
|
+
|
413
|
+
Args:
|
414
|
+
signum: Signal number
|
415
|
+
frame: Current stack frame (unused)
|
416
|
+
"""
|
417
|
+
LOG.info(f"Received signal {signum}, initiating shutdown")
|
418
|
+
self._stop_event.set()
|
419
|
+
|
420
|
+
def _cleanup(self) -> None:
|
421
|
+
"""
|
422
|
+
Cleanup resources before exit.
|
423
|
+
|
424
|
+
Cleanup tasks:
|
425
|
+
- Stop IPC server
|
426
|
+
- Remove socket file EXPLICITLY
|
427
|
+
- Update database to stopped status
|
428
|
+
"""
|
429
|
+
LOG.debug("Cleaning up VM runner resources")
|
430
|
+
|
431
|
+
# Stop IPC server (use sync method for cross-thread safety)
|
432
|
+
if self.ipc_server:
|
433
|
+
try:
|
434
|
+
LOG.debug("Stopping IPC server thread")
|
435
|
+
self.ipc_server.stop_sync()
|
436
|
+
# Wait for IPC thread to finish (it's a daemon, but let's be clean)
|
437
|
+
if hasattr(self, 'ipc_thread') and self.ipc_thread.is_alive():
|
438
|
+
LOG.debug("Waiting for IPC thread to finish")
|
439
|
+
self.ipc_thread.join(timeout=1.0)
|
440
|
+
if self.ipc_thread.is_alive():
|
441
|
+
LOG.warning("IPC thread did not finish within timeout")
|
442
|
+
except Exception as e:
|
443
|
+
LOG.error(f"Failed to stop IPC server: {e}")
|
444
|
+
|
445
|
+
# Remove socket file EXPLICITLY (in case IPC server didn't)
|
446
|
+
if self.socket_path and self.socket_path.exists():
|
447
|
+
try:
|
448
|
+
self.socket_path.unlink()
|
449
|
+
LOG.debug(f"Removed socket file {self.socket_path}")
|
450
|
+
except Exception as e:
|
451
|
+
LOG.error(f"Failed to remove socket: {e}")
|
452
|
+
|
453
|
+
# Update DB to stopped status
|
454
|
+
if self.state_manager:
|
455
|
+
try:
|
456
|
+
self.state_manager.update_vm_status(
|
457
|
+
self.vm_id,
|
458
|
+
status="stopped",
|
459
|
+
pid=None,
|
460
|
+
runner_pid=None,
|
461
|
+
socket_path=None,
|
462
|
+
)
|
463
|
+
LOG.debug(f"Updated DB status to stopped for {self.vm_id}")
|
464
|
+
except Exception as e:
|
465
|
+
LOG.error(f"Failed to update DB on cleanup: {e}")
|
466
|
+
|
467
|
+
def _get_socket_path(self) -> Path:
|
468
|
+
"""
|
469
|
+
Get Unix socket path for this VM.
|
470
|
+
|
471
|
+
Socket location: XDG_RUNTIME_DIR/maqet/sockets/{vm_id}.sock
|
472
|
+
Falls back to /tmp/maqet-{uid}/sockets/ if XDG_RUNTIME_DIR not available.
|
473
|
+
|
474
|
+
Returns:
|
475
|
+
Path to Unix socket
|
476
|
+
"""
|
477
|
+
# Get runtime directory (prefer XDG_RUNTIME_DIR)
|
478
|
+
runtime_dir_base = os.environ.get(
|
479
|
+
"XDG_RUNTIME_DIR", f"/run/user/{os.getuid()}"
|
480
|
+
)
|
481
|
+
if not Path(runtime_dir_base).exists():
|
482
|
+
# Fallback to /tmp
|
483
|
+
runtime_dir_base = f"/tmp/maqet-{os.getuid()}"
|
484
|
+
|
485
|
+
runtime_dir = Path(runtime_dir_base) / "maqet"
|
486
|
+
socket_dir = runtime_dir / "sockets"
|
487
|
+
socket_dir.mkdir(parents=True, exist_ok=True)
|
488
|
+
|
489
|
+
return socket_dir / f"{self.vm_id}.sock"
|
490
|
+
|
491
|
+
|
492
|
+
def main() -> None:
|
493
|
+
"""
|
494
|
+
Entry point for VM runner process.
|
495
|
+
|
496
|
+
Usage: python3 -m maqet.vm_runner <vm_id> [db_path]
|
497
|
+
|
498
|
+
Args:
|
499
|
+
vm_id: Virtual machine identifier (required)
|
500
|
+
db_path: Optional database path (for testing)
|
501
|
+
"""
|
502
|
+
if len(sys.argv) < 2:
|
503
|
+
print("Usage: python3 -m maqet.vm_runner <vm_id> [db_path]", file=sys.stderr)
|
504
|
+
sys.exit(1)
|
505
|
+
|
506
|
+
vm_id = sys.argv[1]
|
507
|
+
db_path = Path(sys.argv[2]) if len(sys.argv) > 2 else None
|
508
|
+
|
509
|
+
# Create and start VM runner
|
510
|
+
runner = VMRunner(vm_id, db_path)
|
511
|
+
|
512
|
+
try:
|
513
|
+
runner.start()
|
514
|
+
except KeyboardInterrupt:
|
515
|
+
LOG.info("VM runner interrupted by user")
|
516
|
+
sys.exit(0)
|
517
|
+
except Exception as e:
|
518
|
+
LOG.error(f"VM runner error: {e}")
|
519
|
+
sys.exit(1)
|
520
|
+
|
521
|
+
|
522
|
+
if __name__ == "__main__":
|
523
|
+
main()
|