cube-vm-backend 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
|
+
Name: cube-vm-backend
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: VM backend implementations for CUBE desktop-automation benchmarks
|
|
5
|
+
Requires-Dist: cube-standard
|
|
6
|
+
Requires-Dist: requests>=2.28
|
|
7
|
+
Requires-Dist: tqdm>=4.60
|
|
8
|
+
Requires-Dist: pytest>=8.0.0 ; extra == 'dev'
|
|
9
|
+
Requires-Python: >=3.12
|
|
10
|
+
Provides-Extra: dev
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "cube-vm-backend"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "VM backend implementations for CUBE desktop-automation benchmarks"
|
|
5
|
+
requires-python = ">=3.12"
|
|
6
|
+
dependencies = [
|
|
7
|
+
"cube-standard",
|
|
8
|
+
"requests>=2.28",
|
|
9
|
+
"tqdm>=4.60",
|
|
10
|
+
]
|
|
11
|
+
|
|
12
|
+
[project.optional-dependencies]
|
|
13
|
+
dev = [
|
|
14
|
+
"pytest>=8.0.0",
|
|
15
|
+
]
|
|
16
|
+
|
|
17
|
+
[build-system]
|
|
18
|
+
requires = ["uv_build>=0.6.0,<0.7.0"]
|
|
19
|
+
build-backend = "uv_build"
|
|
20
|
+
|
|
21
|
+
[tool.uv.build-backend]
|
|
22
|
+
module-name = "cube_vm_backend"
|
|
23
|
+
|
|
24
|
+
[tool.ruff]
|
|
25
|
+
fix = true
|
|
26
|
+
line-length = 120
|
|
27
|
+
indent-width = 4
|
|
28
|
+
|
|
29
|
+
[tool.ruff.format]
|
|
30
|
+
quote-style = "double"
|
|
31
|
+
indent-style = "space"
|
|
32
|
+
skip-magic-trailing-comma = false
|
|
33
|
+
line-ending = "auto"
|
|
34
|
+
|
|
35
|
+
[tool.ruff.lint]
|
|
36
|
+
extend-select = ["I"]
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
"""cube-vm-backend: VM backend implementations for CUBE desktop-automation benchmarks."""
|
|
2
|
+
|
|
3
|
+
from cube_vm_backend.local import LocalQEMUVM, LocalQEMUVMBackend
|
|
4
|
+
from cube_vm_backend.qemu_manager import QEMUConfig, QEMUManager
|
|
5
|
+
|
|
6
|
+
__all__ = [
|
|
7
|
+
"LocalQEMUVM",
|
|
8
|
+
"LocalQEMUVMBackend",
|
|
9
|
+
"QEMUConfig",
|
|
10
|
+
"QEMUManager",
|
|
11
|
+
]
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
"""LocalQEMUVMBackend — VMBackend implementation using QEMU/KVM on the local host.
|
|
2
|
+
|
|
3
|
+
Uses a read-only base qcow2 image + per-task copy-on-write overlays.
|
|
4
|
+
Reset strategy: delete overlay + reboot (ResetIsolation.RESTART, ~30s).
|
|
5
|
+
|
|
6
|
+
Image:
|
|
7
|
+
path_to_vm must point to an existing qcow2 base image.
|
|
8
|
+
Benchmarks that need auto-download (e.g. OSWorld) subclass this backend
|
|
9
|
+
and override ensure_resource() to fetch the image before launch.
|
|
10
|
+
|
|
11
|
+
Port forwarding:
|
|
12
|
+
SLIRP user-mode networking — no root or bridge required.
|
|
13
|
+
Each VM gets unique host ports forwarded to guest ports 5000/9222/8006/8080.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
import logging
|
|
19
|
+
from pathlib import Path
|
|
20
|
+
|
|
21
|
+
from cube.vm import VM, VMBackend, VMConfig
|
|
22
|
+
|
|
23
|
+
from cube_vm_backend.qemu_manager import QEMUConfig, QEMUManager
|
|
24
|
+
|
|
25
|
+
logger = logging.getLogger(__name__)
|
|
26
|
+
|
|
27
|
+
# Default cache location — can be overridden via cache_dir field
|
|
28
|
+
_DEFAULT_CACHE_DIR = Path.home() / ".cube" / "vm_data"
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class LocalQEMUVM(VM):
|
|
32
|
+
"""Runtime handle to a QEMU/KVM VM managed by LocalQEMUVMBackend.
|
|
33
|
+
|
|
34
|
+
Not serializable. The caller owns the lifecycle via stop().
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
def __init__(self, manager: QEMUManager) -> None:
|
|
38
|
+
self._manager = manager
|
|
39
|
+
|
|
40
|
+
@property
|
|
41
|
+
def endpoint(self) -> str:
|
|
42
|
+
"""Base URL of the in-VM HTTP agent: ``http://localhost:<port>``."""
|
|
43
|
+
return f"http://localhost:{self._manager.server_port}"
|
|
44
|
+
|
|
45
|
+
@property
|
|
46
|
+
def chromium_port(self) -> int:
|
|
47
|
+
"""Host port forwarded to guest Chromium DevTools (9222)."""
|
|
48
|
+
return self._manager.chromium_port
|
|
49
|
+
|
|
50
|
+
@property
|
|
51
|
+
def vlc_port(self) -> int:
|
|
52
|
+
"""Host port forwarded to guest VLC HTTP (8080)."""
|
|
53
|
+
return self._manager.vlc_port
|
|
54
|
+
|
|
55
|
+
@property
|
|
56
|
+
def server_port(self) -> int:
|
|
57
|
+
"""Host port forwarded to guest Flask agent (5000)."""
|
|
58
|
+
return self._manager.server_port
|
|
59
|
+
|
|
60
|
+
def restore_snapshot(self, name: str) -> None:
|
|
61
|
+
"""Restore the VM to its initial state.
|
|
62
|
+
|
|
63
|
+
Implementation: delete overlay + create fresh overlay + reboot QEMU.
|
|
64
|
+
This provides RESTART isolation (~30s). The ``name`` argument is
|
|
65
|
+
accepted for API compatibility but ignored — only one snapshot state
|
|
66
|
+
(the base image) is available with the overlay strategy.
|
|
67
|
+
"""
|
|
68
|
+
logger.info("Restoring VM snapshot '%s' (overlay reset)", name)
|
|
69
|
+
self._manager.reset()
|
|
70
|
+
|
|
71
|
+
def stop(self) -> None:
|
|
72
|
+
"""Shut down the VM and clean up overlay and socket files."""
|
|
73
|
+
self._manager.stop()
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
class LocalQEMUVMBackend(VMBackend):
|
|
77
|
+
"""VMBackend that runs QEMU/KVM directly on the local host.
|
|
78
|
+
|
|
79
|
+
Attributes
|
|
80
|
+
----------
|
|
81
|
+
cache_dir : str
|
|
82
|
+
Directory for the base qcow2 image and per-task overlays.
|
|
83
|
+
path_to_vm : str | None
|
|
84
|
+
Explicit path to a pre-existing qcow2 base image.
|
|
85
|
+
If None, the image is auto-downloaded to cache_dir on first use.
|
|
86
|
+
headless : bool
|
|
87
|
+
Suppress the graphical display (default True).
|
|
88
|
+
memory : str
|
|
89
|
+
RAM allocation passed to QEMU -m (e.g. "4G").
|
|
90
|
+
cpus : int
|
|
91
|
+
Number of vCPUs.
|
|
92
|
+
"""
|
|
93
|
+
|
|
94
|
+
cache_dir: str = str(_DEFAULT_CACHE_DIR)
|
|
95
|
+
path_to_vm: str | None = None
|
|
96
|
+
headless: bool = True
|
|
97
|
+
memory: str = "4G"
|
|
98
|
+
cpus: int = 4
|
|
99
|
+
|
|
100
|
+
def ensure_resource(self, config: VMConfig) -> None:
|
|
101
|
+
"""Validate or prepare the base qcow2 image before launch.
|
|
102
|
+
|
|
103
|
+
Override in subclasses to add auto-download behaviour (e.g. OSWorld).
|
|
104
|
+
Default implementation raises if path_to_vm is not set.
|
|
105
|
+
"""
|
|
106
|
+
if self.path_to_vm is None:
|
|
107
|
+
raise ValueError(
|
|
108
|
+
"path_to_vm must be set on LocalQEMUVMBackend. "
|
|
109
|
+
"Provide a path to an existing qcow2 image, or use a subclass "
|
|
110
|
+
"that handles image acquisition (e.g. OSWorldQEMUVMBackend)."
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
def launch(self, config: VMConfig) -> LocalQEMUVM:
|
|
114
|
+
"""Ensure image exists, then start a QEMU VM and return a live handle.
|
|
115
|
+
|
|
116
|
+
Blocks until the in-VM HTTP agent is reachable.
|
|
117
|
+
"""
|
|
118
|
+
self.ensure_resource(config)
|
|
119
|
+
|
|
120
|
+
vm_dir = Path(self.cache_dir)
|
|
121
|
+
base_image = Path(self.path_to_vm) # type: ignore[arg-type]
|
|
122
|
+
|
|
123
|
+
qemu_config = QEMUConfig(
|
|
124
|
+
base_image=base_image,
|
|
125
|
+
overlay_dir=vm_dir / "overlays",
|
|
126
|
+
memory=self.memory,
|
|
127
|
+
cpus=self.cpus,
|
|
128
|
+
headless=self.headless,
|
|
129
|
+
screen_width=config.screen_size[0],
|
|
130
|
+
screen_height=config.screen_size[1],
|
|
131
|
+
)
|
|
132
|
+
manager = QEMUManager(qemu_config)
|
|
133
|
+
manager.start()
|
|
134
|
+
logger.info("VM launched at endpoint http://localhost:%d", manager.server_port)
|
|
135
|
+
return LocalQEMUVM(manager)
|
|
136
|
+
|
|
137
|
+
def close(self) -> None:
|
|
138
|
+
"""No-op at the backend level — each VM is stopped individually via vm.stop()."""
|
|
139
|
+
pass
|
|
@@ -0,0 +1,398 @@
|
|
|
1
|
+
"""QEMU/KVM VM lifecycle manager.
|
|
2
|
+
|
|
3
|
+
Uses SLIRP user-mode networking with port forwarding (no root / bridge required).
|
|
4
|
+
Snapshot strategy: read-only base qcow2 + per-instance copy-on-write overlay
|
|
5
|
+
(reset = stop QEMU, delete overlay, create new overlay, start QEMU).
|
|
6
|
+
Communicates with running QEMU via QMP Unix socket for clean shutdown.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import json
|
|
10
|
+
import logging
|
|
11
|
+
import os
|
|
12
|
+
import socket
|
|
13
|
+
import subprocess
|
|
14
|
+
import tempfile
|
|
15
|
+
import time
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
from typing import Optional
|
|
18
|
+
|
|
19
|
+
import requests
|
|
20
|
+
|
|
21
|
+
logger = logging.getLogger(__name__)
|
|
22
|
+
|
|
23
|
+
_VM_READY_TIMEOUT = 300 # seconds to wait for VM screenshot endpoint
|
|
24
|
+
_VM_READY_POLL_INTERVAL = 2 # seconds between readiness polls
|
|
25
|
+
_QMP_TIMEOUT = 10 # seconds to wait for QMP socket
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class QEMUConfig:
|
|
29
|
+
"""Configuration for a QEMU/KVM virtual machine.
|
|
30
|
+
|
|
31
|
+
Parameters
|
|
32
|
+
----------
|
|
33
|
+
base_image : Path
|
|
34
|
+
Path to the read-only base qcow2 disk image.
|
|
35
|
+
overlay_dir : Path
|
|
36
|
+
Directory where per-instance overlay qcow2 files are created.
|
|
37
|
+
memory : str
|
|
38
|
+
RAM allocation passed to QEMU ``-m`` flag (e.g. ``"4G"``).
|
|
39
|
+
cpus : int
|
|
40
|
+
Number of vCPUs (``-smp``).
|
|
41
|
+
headless : bool
|
|
42
|
+
If True, suppress the graphical display (``-display none``).
|
|
43
|
+
screen_width : int
|
|
44
|
+
Horizontal resolution injected into the guest via kernel cmdline (informational).
|
|
45
|
+
screen_height : int
|
|
46
|
+
Vertical resolution (informational).
|
|
47
|
+
enable_kvm : bool
|
|
48
|
+
Automatically enable KVM hardware acceleration if ``/dev/kvm`` is present.
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
def __init__(
|
|
52
|
+
self,
|
|
53
|
+
base_image: Path,
|
|
54
|
+
overlay_dir: Path,
|
|
55
|
+
memory: str = "4G",
|
|
56
|
+
cpus: int = 4,
|
|
57
|
+
headless: bool = True,
|
|
58
|
+
screen_width: int = 1920,
|
|
59
|
+
screen_height: int = 1080,
|
|
60
|
+
enable_kvm: bool = True,
|
|
61
|
+
) -> None:
|
|
62
|
+
self.base_image = Path(base_image)
|
|
63
|
+
self.overlay_dir = Path(overlay_dir)
|
|
64
|
+
self.memory = memory
|
|
65
|
+
self.cpus = cpus
|
|
66
|
+
self.headless = headless
|
|
67
|
+
self.screen_width = screen_width
|
|
68
|
+
self.screen_height = screen_height
|
|
69
|
+
self.enable_kvm = enable_kvm
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
class QEMUManager:
|
|
73
|
+
"""Manages the full lifecycle of an OSWorld QEMU/KVM virtual machine.
|
|
74
|
+
|
|
75
|
+
Usage::
|
|
76
|
+
|
|
77
|
+
manager = QEMUManager(config)
|
|
78
|
+
manager.start() # boot VM (allocates ports, creates overlay)
|
|
79
|
+
manager.reset() # restore initial state (stop → new overlay → boot)
|
|
80
|
+
manager.stop() # shut down VM cleanly
|
|
81
|
+
|
|
82
|
+
After ``start()`` the following properties are available:
|
|
83
|
+
|
|
84
|
+
- :attr:`server_port` — host port forwarded to guest :5000 (Flask agent)
|
|
85
|
+
- :attr:`chromium_port` — host port forwarded to guest :9222 (Chromium DevTools)
|
|
86
|
+
- :attr:`vnc_port` — host port forwarded to guest :8006 (VNC/noVNC)
|
|
87
|
+
- :attr:`vlc_port` — host port forwarded to guest :8080 (VLC HTTP)
|
|
88
|
+
|
|
89
|
+
Parameters
|
|
90
|
+
----------
|
|
91
|
+
config : QEMUConfig
|
|
92
|
+
VM configuration (image paths, memory, CPU, display settings).
|
|
93
|
+
"""
|
|
94
|
+
|
|
95
|
+
def __init__(self, config: QEMUConfig) -> None:
|
|
96
|
+
self.config = config
|
|
97
|
+
self._process: Optional[subprocess.Popen] = None
|
|
98
|
+
self._pid_file: Optional[Path] = None
|
|
99
|
+
self._overlay_path: Optional[Path] = None
|
|
100
|
+
self._qmp_socket: Optional[Path] = None
|
|
101
|
+
|
|
102
|
+
self._server_port: Optional[int] = None
|
|
103
|
+
self._chromium_port: Optional[int] = None
|
|
104
|
+
self._vnc_port: Optional[int] = None
|
|
105
|
+
self._vlc_port: Optional[int] = None
|
|
106
|
+
|
|
107
|
+
# ------------------------------------------------------------------
|
|
108
|
+
# Public properties
|
|
109
|
+
# ------------------------------------------------------------------
|
|
110
|
+
|
|
111
|
+
@property
|
|
112
|
+
def server_port(self) -> int:
|
|
113
|
+
if self._server_port is None:
|
|
114
|
+
raise RuntimeError("VM not started — call start() first")
|
|
115
|
+
return self._server_port
|
|
116
|
+
|
|
117
|
+
@property
|
|
118
|
+
def chromium_port(self) -> int:
|
|
119
|
+
if self._chromium_port is None:
|
|
120
|
+
raise RuntimeError("VM not started — call start() first")
|
|
121
|
+
return self._chromium_port
|
|
122
|
+
|
|
123
|
+
@property
|
|
124
|
+
def vnc_port(self) -> int:
|
|
125
|
+
if self._vnc_port is None:
|
|
126
|
+
raise RuntimeError("VM not started — call start() first")
|
|
127
|
+
return self._vnc_port
|
|
128
|
+
|
|
129
|
+
@property
|
|
130
|
+
def vlc_port(self) -> int:
|
|
131
|
+
if self._vlc_port is None:
|
|
132
|
+
raise RuntimeError("VM not started — call start() first")
|
|
133
|
+
return self._vlc_port
|
|
134
|
+
|
|
135
|
+
# ------------------------------------------------------------------
|
|
136
|
+
# Lifecycle
|
|
137
|
+
# ------------------------------------------------------------------
|
|
138
|
+
|
|
139
|
+
def start(self) -> None:
|
|
140
|
+
"""Allocate ports, create overlay, launch QEMU, and wait for VM readiness."""
|
|
141
|
+
self.config.overlay_dir.mkdir(parents=True, exist_ok=True)
|
|
142
|
+
|
|
143
|
+
self._server_port = _reserve_free_port(5000)
|
|
144
|
+
self._chromium_port = _reserve_free_port(9222)
|
|
145
|
+
self._vnc_port = _reserve_free_port(8006)
|
|
146
|
+
self._vlc_port = _reserve_free_port(8080)
|
|
147
|
+
|
|
148
|
+
self._overlay_path = self.config.overlay_dir / f"overlay_{self._server_port}.qcow2"
|
|
149
|
+
self._qmp_socket = Path(tempfile.gettempdir()) / f"qemu_qmp_{self._server_port}.sock"
|
|
150
|
+
self._pid_file = Path(tempfile.gettempdir()) / f"qemu_{self._server_port}.pid"
|
|
151
|
+
|
|
152
|
+
self._create_overlay()
|
|
153
|
+
self._launch_qemu()
|
|
154
|
+
self._wait_for_ready()
|
|
155
|
+
|
|
156
|
+
def reset(self) -> None:
|
|
157
|
+
"""Restore the VM to its initial state.
|
|
158
|
+
|
|
159
|
+
Stops the running instance, deletes the overlay, creates a fresh one,
|
|
160
|
+
and boots the VM again.
|
|
161
|
+
"""
|
|
162
|
+
self._stop_qemu()
|
|
163
|
+
if self._overlay_path and self._overlay_path.exists():
|
|
164
|
+
self._overlay_path.unlink()
|
|
165
|
+
self._create_overlay()
|
|
166
|
+
self._launch_qemu()
|
|
167
|
+
self._wait_for_ready()
|
|
168
|
+
|
|
169
|
+
def stop(self) -> None:
|
|
170
|
+
"""Shut down the VM and clean up overlay and socket files."""
|
|
171
|
+
self._stop_qemu()
|
|
172
|
+
for path in (self._overlay_path, self._qmp_socket, self._pid_file):
|
|
173
|
+
if path and path.exists():
|
|
174
|
+
try:
|
|
175
|
+
path.unlink()
|
|
176
|
+
except OSError:
|
|
177
|
+
pass
|
|
178
|
+
# Release port reservations so other workers can reuse these ports
|
|
179
|
+
for port in (self._server_port, self._chromium_port, self._vnc_port, self._vlc_port):
|
|
180
|
+
_release_port_reservation(port)
|
|
181
|
+
|
|
182
|
+
# ------------------------------------------------------------------
|
|
183
|
+
# Internal helpers
|
|
184
|
+
# ------------------------------------------------------------------
|
|
185
|
+
|
|
186
|
+
def _create_overlay(self) -> None:
|
|
187
|
+
"""Create a fresh qcow2 overlay on top of the read-only base image."""
|
|
188
|
+
if self._overlay_path.exists():
|
|
189
|
+
logger.warning("Stale overlay found at %s — removing before recreating", self._overlay_path)
|
|
190
|
+
self._overlay_path.unlink()
|
|
191
|
+
cmd = [
|
|
192
|
+
"qemu-img",
|
|
193
|
+
"create",
|
|
194
|
+
"-f",
|
|
195
|
+
"qcow2",
|
|
196
|
+
"-b",
|
|
197
|
+
str(self.config.base_image),
|
|
198
|
+
"-F",
|
|
199
|
+
"qcow2",
|
|
200
|
+
str(self._overlay_path),
|
|
201
|
+
]
|
|
202
|
+
logger.info("Creating overlay: %s", " ".join(cmd))
|
|
203
|
+
result = subprocess.run(cmd, capture_output=True, text=True)
|
|
204
|
+
if result.returncode != 0:
|
|
205
|
+
raise RuntimeError(f"qemu-img create failed (exit {result.returncode}):\n{result.stderr}")
|
|
206
|
+
|
|
207
|
+
def _launch_qemu(self) -> None:
|
|
208
|
+
"""Build QEMU command and launch as a background subprocess."""
|
|
209
|
+
for path in (self._qmp_socket, self._pid_file):
|
|
210
|
+
if path and path.exists():
|
|
211
|
+
logger.warning("Removing stale file before launch: %s", path)
|
|
212
|
+
path.unlink()
|
|
213
|
+
|
|
214
|
+
qemu_cmd = ["qemu-system-x86_64"]
|
|
215
|
+
|
|
216
|
+
# KVM hardware acceleration
|
|
217
|
+
if self.config.enable_kvm and os.path.exists("/dev/kvm"):
|
|
218
|
+
qemu_cmd += ["-enable-kvm"]
|
|
219
|
+
logger.info("KVM acceleration enabled")
|
|
220
|
+
else:
|
|
221
|
+
logger.warning("KVM not available — running without hardware acceleration (slow)")
|
|
222
|
+
|
|
223
|
+
# Machine resources
|
|
224
|
+
qemu_cmd += ["-m", self.config.memory, "-smp", str(self.config.cpus)]
|
|
225
|
+
|
|
226
|
+
# Disk
|
|
227
|
+
qemu_cmd += ["-drive", f"file={self._overlay_path},format=qcow2,if=virtio"]
|
|
228
|
+
|
|
229
|
+
# Network: SLIRP user-mode with port forwarding
|
|
230
|
+
hostfwds = ",".join(
|
|
231
|
+
[
|
|
232
|
+
f"hostfwd=tcp::{self._server_port}-:5000",
|
|
233
|
+
f"hostfwd=tcp::{self._chromium_port}-:9222",
|
|
234
|
+
f"hostfwd=tcp::{self._vnc_port}-:8006",
|
|
235
|
+
f"hostfwd=tcp::{self._vlc_port}-:8080",
|
|
236
|
+
]
|
|
237
|
+
)
|
|
238
|
+
qemu_cmd += [
|
|
239
|
+
"-netdev",
|
|
240
|
+
f"user,id=net0,{hostfwds}",
|
|
241
|
+
"-device",
|
|
242
|
+
"virtio-net-pci,netdev=net0",
|
|
243
|
+
]
|
|
244
|
+
|
|
245
|
+
# Display
|
|
246
|
+
if self.config.headless:
|
|
247
|
+
qemu_cmd += ["-display", "none"]
|
|
248
|
+
else:
|
|
249
|
+
qemu_cmd += ["-vga", "virtio"]
|
|
250
|
+
|
|
251
|
+
# QMP control socket
|
|
252
|
+
qemu_cmd += ["-qmp", f"unix:{self._qmp_socket},server,nowait"]
|
|
253
|
+
|
|
254
|
+
# PID file and daemonize
|
|
255
|
+
qemu_cmd += ["-pidfile", str(self._pid_file), "-daemonize"]
|
|
256
|
+
|
|
257
|
+
logger.info("Starting QEMU: %s", " ".join(qemu_cmd))
|
|
258
|
+
result = subprocess.run(qemu_cmd, capture_output=True, text=True)
|
|
259
|
+
if result.returncode != 0:
|
|
260
|
+
raise RuntimeError(
|
|
261
|
+
f"QEMU failed to start (exit {result.returncode}):\nstdout: {result.stdout}\nstderr: {result.stderr}"
|
|
262
|
+
)
|
|
263
|
+
logger.info("QEMU launched (server_port=%d)", self._server_port)
|
|
264
|
+
|
|
265
|
+
def _wait_for_ready(self) -> None:
|
|
266
|
+
"""Poll the guest's /screenshot endpoint until the VM is ready."""
|
|
267
|
+
deadline = time.time() + _VM_READY_TIMEOUT
|
|
268
|
+
url = f"http://localhost:{self._server_port}/screenshot"
|
|
269
|
+
logger.info("Waiting for VM to be ready at %s ...", url)
|
|
270
|
+
while time.time() < deadline:
|
|
271
|
+
try:
|
|
272
|
+
resp = requests.get(url, timeout=(5, 5))
|
|
273
|
+
if resp.status_code == 200:
|
|
274
|
+
logger.info("VM is ready")
|
|
275
|
+
return
|
|
276
|
+
except Exception:
|
|
277
|
+
pass
|
|
278
|
+
logger.info("VM not ready yet, retrying in %ds...", _VM_READY_POLL_INTERVAL)
|
|
279
|
+
time.sleep(_VM_READY_POLL_INTERVAL)
|
|
280
|
+
raise TimeoutError(f"VM failed to become ready within {_VM_READY_TIMEOUT}s")
|
|
281
|
+
|
|
282
|
+
def _stop_qemu(self) -> None:
|
|
283
|
+
"""Send QMP 'quit' to the running QEMU instance and wait for it to exit."""
|
|
284
|
+
if self._qmp_socket and self._qmp_socket.exists():
|
|
285
|
+
try:
|
|
286
|
+
_qmp_quit(str(self._qmp_socket))
|
|
287
|
+
logger.info("Sent QMP quit")
|
|
288
|
+
except Exception as exc:
|
|
289
|
+
logger.warning("QMP quit failed (%s), falling back to SIGTERM", exc)
|
|
290
|
+
self._kill_by_pid()
|
|
291
|
+
elif self._pid_file and self._pid_file.exists():
|
|
292
|
+
self._kill_by_pid()
|
|
293
|
+
|
|
294
|
+
def _kill_by_pid(self) -> None:
|
|
295
|
+
"""Terminate the QEMU process via the PID file."""
|
|
296
|
+
if not self._pid_file or not self._pid_file.exists():
|
|
297
|
+
return
|
|
298
|
+
try:
|
|
299
|
+
pid = int(self._pid_file.read_text().strip())
|
|
300
|
+
os.kill(pid, 15) # SIGTERM
|
|
301
|
+
logger.info("Sent SIGTERM to QEMU pid %d", pid)
|
|
302
|
+
# Wait up to 5s for clean exit, then SIGKILL
|
|
303
|
+
for _ in range(10):
|
|
304
|
+
time.sleep(0.5)
|
|
305
|
+
try:
|
|
306
|
+
os.kill(pid, 0) # check if still alive
|
|
307
|
+
except ProcessLookupError:
|
|
308
|
+
return # process exited cleanly
|
|
309
|
+
os.kill(pid, 9) # SIGKILL
|
|
310
|
+
logger.warning("QEMU pid %d did not exit after SIGTERM — sent SIGKILL", pid)
|
|
311
|
+
except Exception as exc:
|
|
312
|
+
logger.warning("Failed to kill QEMU by pid: %s", exc)
|
|
313
|
+
|
|
314
|
+
|
|
315
|
+
# ------------------------------------------------------------------
|
|
316
|
+
# QMP communication
|
|
317
|
+
# ------------------------------------------------------------------
|
|
318
|
+
|
|
319
|
+
|
|
320
|
+
def _qmp_quit(socket_path: str) -> None:
|
|
321
|
+
"""Connect to a QEMU QMP Unix socket and send the 'quit' command."""
|
|
322
|
+
with socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) as sock:
|
|
323
|
+
sock.settimeout(_QMP_TIMEOUT)
|
|
324
|
+
sock.connect(socket_path)
|
|
325
|
+
|
|
326
|
+
# Read the QMP greeting
|
|
327
|
+
greeting = _qmp_recv(sock)
|
|
328
|
+
logger.debug("QMP greeting: %s", greeting)
|
|
329
|
+
|
|
330
|
+
# Negotiate capabilities
|
|
331
|
+
sock.sendall(json.dumps({"execute": "qmp_capabilities"}).encode())
|
|
332
|
+
_qmp_recv(sock)
|
|
333
|
+
|
|
334
|
+
# Send quit
|
|
335
|
+
sock.sendall(json.dumps({"execute": "quit"}).encode())
|
|
336
|
+
try:
|
|
337
|
+
_qmp_recv(sock)
|
|
338
|
+
except (ConnectionResetError, OSError):
|
|
339
|
+
pass # Connection closed after quit — expected
|
|
340
|
+
|
|
341
|
+
|
|
342
|
+
def _qmp_recv(sock: socket.socket) -> dict:
|
|
343
|
+
"""Read a complete JSON object from a QMP socket."""
|
|
344
|
+
data = b""
|
|
345
|
+
while True:
|
|
346
|
+
chunk = sock.recv(4096)
|
|
347
|
+
if not chunk:
|
|
348
|
+
break
|
|
349
|
+
data += chunk
|
|
350
|
+
try:
|
|
351
|
+
return json.loads(data.decode())
|
|
352
|
+
except json.JSONDecodeError:
|
|
353
|
+
continue
|
|
354
|
+
return {}
|
|
355
|
+
|
|
356
|
+
|
|
357
|
+
# ------------------------------------------------------------------
|
|
358
|
+
# Port allocation
|
|
359
|
+
# ------------------------------------------------------------------
|
|
360
|
+
|
|
361
|
+
|
|
362
|
+
def _reserve_free_port(start: int = 5000) -> int:
|
|
363
|
+
"""Atomically find and reserve a free TCP port at or above ``start``.
|
|
364
|
+
|
|
365
|
+
Uses O_EXCL flag to create a reservation file — this is race-free even
|
|
366
|
+
across multiple processes (e.g. parallel Ray workers), unlike bind+release.
|
|
367
|
+
Call _release_port_reservation() when the port is no longer needed.
|
|
368
|
+
"""
|
|
369
|
+
tmp = Path(tempfile.gettempdir())
|
|
370
|
+
for port in range(start, 65354):
|
|
371
|
+
reservation = tmp / f"qemu_port_{port}.reserved"
|
|
372
|
+
try:
|
|
373
|
+
# Atomically create reservation — fails if another process beat us to it
|
|
374
|
+
fd = os.open(str(reservation), os.O_CREAT | os.O_EXCL | os.O_WRONLY, 0o644)
|
|
375
|
+
os.close(fd)
|
|
376
|
+
except FileExistsError:
|
|
377
|
+
continue # Another worker reserved this port
|
|
378
|
+
# Verify the port is actually free on the network
|
|
379
|
+
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
|
|
380
|
+
s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
|
|
381
|
+
try:
|
|
382
|
+
s.bind(("localhost", port))
|
|
383
|
+
return port # Reserved and free
|
|
384
|
+
except OSError:
|
|
385
|
+
reservation.unlink() # Port in use — release reservation and try next
|
|
386
|
+
continue
|
|
387
|
+
raise RuntimeError(f"No free port found starting from {start}")
|
|
388
|
+
|
|
389
|
+
|
|
390
|
+
def _release_port_reservation(port: Optional[int]) -> None:
|
|
391
|
+
"""Remove the reservation file for a port allocated by _reserve_free_port."""
|
|
392
|
+
if port is None:
|
|
393
|
+
return
|
|
394
|
+
reservation = Path(tempfile.gettempdir()) / f"qemu_port_{port}.reserved"
|
|
395
|
+
try:
|
|
396
|
+
reservation.unlink(missing_ok=True)
|
|
397
|
+
except OSError:
|
|
398
|
+
pass
|