maqet 0.0.1.4__py3-none-any.whl → 0.0.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. maqet/__init__.py +50 -6
  2. maqet/__main__.py +96 -0
  3. maqet/__version__.py +3 -0
  4. maqet/api/__init__.py +35 -0
  5. maqet/api/decorators.py +184 -0
  6. maqet/api/metadata.py +147 -0
  7. maqet/api/registry.py +182 -0
  8. maqet/cli.py +71 -0
  9. maqet/config/__init__.py +26 -0
  10. maqet/config/merger.py +237 -0
  11. maqet/config/parser.py +198 -0
  12. maqet/config/validators.py +519 -0
  13. maqet/config_handlers.py +684 -0
  14. maqet/constants.py +200 -0
  15. maqet/exceptions.py +226 -0
  16. maqet/formatters.py +294 -0
  17. maqet/generators/__init__.py +12 -0
  18. maqet/generators/base_generator.py +101 -0
  19. maqet/generators/cli_generator.py +635 -0
  20. maqet/generators/python_generator.py +247 -0
  21. maqet/generators/rest_generator.py +58 -0
  22. maqet/handlers/__init__.py +12 -0
  23. maqet/handlers/base.py +108 -0
  24. maqet/handlers/init.py +147 -0
  25. maqet/handlers/stage.py +196 -0
  26. maqet/ipc/__init__.py +29 -0
  27. maqet/ipc/retry.py +265 -0
  28. maqet/ipc/runner_client.py +285 -0
  29. maqet/ipc/unix_socket_server.py +239 -0
  30. maqet/logger.py +160 -55
  31. maqet/machine.py +884 -0
  32. maqet/managers/__init__.py +7 -0
  33. maqet/managers/qmp_manager.py +333 -0
  34. maqet/managers/snapshot_coordinator.py +327 -0
  35. maqet/managers/vm_manager.py +683 -0
  36. maqet/maqet.py +1120 -0
  37. maqet/os_interactions.py +46 -0
  38. maqet/process_spawner.py +395 -0
  39. maqet/qemu_args.py +76 -0
  40. maqet/qmp/__init__.py +10 -0
  41. maqet/qmp/commands.py +92 -0
  42. maqet/qmp/keyboard.py +311 -0
  43. maqet/qmp/qmp.py +17 -0
  44. maqet/snapshot.py +473 -0
  45. maqet/state.py +958 -0
  46. maqet/storage.py +702 -162
  47. maqet/validation/__init__.py +9 -0
  48. maqet/validation/config_validator.py +170 -0
  49. maqet/vm_runner.py +523 -0
  50. maqet-0.0.5.dist-info/METADATA +237 -0
  51. maqet-0.0.5.dist-info/RECORD +55 -0
  52. {maqet-0.0.1.4.dist-info → maqet-0.0.5.dist-info}/WHEEL +1 -1
  53. maqet-0.0.5.dist-info/entry_points.txt +2 -0
  54. maqet-0.0.5.dist-info/licenses/LICENSE +21 -0
  55. {maqet-0.0.1.4.dist-info → maqet-0.0.5.dist-info}/top_level.txt +0 -1
  56. maqet/core.py +0 -411
  57. maqet/functions.py +0 -104
  58. maqet-0.0.1.4.dist-info/METADATA +0 -6
  59. maqet-0.0.1.4.dist-info/RECORD +0 -33
  60. qemu/machine/__init__.py +0 -36
  61. qemu/machine/console_socket.py +0 -142
  62. qemu/machine/machine.py +0 -954
  63. qemu/machine/py.typed +0 -0
  64. qemu/machine/qtest.py +0 -191
  65. qemu/qmp/__init__.py +0 -59
  66. qemu/qmp/error.py +0 -50
  67. qemu/qmp/events.py +0 -717
  68. qemu/qmp/legacy.py +0 -319
  69. qemu/qmp/message.py +0 -209
  70. qemu/qmp/models.py +0 -146
  71. qemu/qmp/protocol.py +0 -1057
  72. qemu/qmp/py.typed +0 -0
  73. qemu/qmp/qmp_client.py +0 -655
  74. qemu/qmp/qmp_shell.py +0 -618
  75. qemu/qmp/qmp_tui.py +0 -655
  76. qemu/qmp/util.py +0 -219
  77. qemu/utils/__init__.py +0 -162
  78. qemu/utils/accel.py +0 -84
  79. qemu/utils/py.typed +0 -0
  80. qemu/utils/qemu_ga_client.py +0 -323
  81. qemu/utils/qom.py +0 -273
  82. qemu/utils/qom_common.py +0 -175
  83. qemu/utils/qom_fuse.py +0 -207
@@ -0,0 +1,683 @@
1
+ """
2
+ VM Manager
3
+
4
+ Manages VM lifecycle operations: add, start, stop, remove, list.
5
+ Extracted from Maqet class to follow Single Responsibility Principle.
6
+ """
7
+
8
+ import os
9
+ import time
10
+ from pathlib import Path
11
+ from typing import Any, Dict, List, Optional, Union
12
+
13
+ from ..config import ConfigMerger
14
+ from ..constants import Intervals, Timeouts
15
+ from ..exceptions import (
16
+ ConfigurationError,
17
+ RunnerSpawnError,
18
+ StateError,
19
+ VMAlreadyExistsError,
20
+ VMLifecycleError,
21
+ VMNotFoundError,
22
+ VMNotRunningError,
23
+ VMStartError,
24
+ VMStopError,
25
+ )
26
+ from ..logger import LOG
27
+ from ..state import StateManager, VMInstance
28
+
29
+ # Legacy exception alias (backward compatibility)
30
+ VMManagerError = VMLifecycleError
31
+ ConfigError = ConfigurationError
32
+ StateManagerError = StateError
33
+
34
+
35
+ class VMManager:
36
+ """
37
+ Manages VM lifecycle operations.
38
+
39
+ Responsibilities:
40
+ - Create VMs (add)
41
+ - Start VMs (spawn runner processes)
42
+ - Stop VMs (via IPC or process kill)
43
+ - Remove VMs (from database)
44
+ - List VMs
45
+ - Clean up dead processes
46
+ """
47
+
48
+ def __init__(self, state_manager: StateManager, config_parser):
49
+ """
50
+ Initialize VM manager.
51
+
52
+ Args:
53
+ state_manager: State management instance
54
+ config_parser: Configuration parser instance
55
+ """
56
+ self.state_manager = state_manager
57
+ self.config_parser = config_parser
58
+ LOG.debug("VMManager initialized")
59
+
60
+ def add(
61
+ self,
62
+ config: Optional[Union[str, List[str]]] = None,
63
+ name: Optional[str] = None,
64
+ empty: bool = False,
65
+ **kwargs,
66
+ ) -> str:
67
+ """
68
+ Create a new VM from configuration file(s) or parameters.
69
+
70
+ Args:
71
+ config: Path to YAML configuration file, or list of config
72
+ files for deep-merge
73
+ name: VM name (auto-generated if not provided)
74
+ empty: Create empty VM without any configuration (won't be
75
+ startable until configured)
76
+ **kwargs: Additional VM configuration parameters
77
+
78
+ Returns:
79
+ VM instance ID
80
+
81
+ Raises:
82
+ VMManagerError: If VM creation fails
83
+
84
+ Examples:
85
+ Single config: add(config="vm.yaml", name="myvm")
86
+ Multiple configs: add(
87
+ config=["base.yaml", "custom.yaml"], name="myvm"
88
+ )
89
+ Config + params: add(config="base.yaml", memory="8G", cpu=4)
90
+ Empty VM: add(name="placeholder-vm", empty=True)
91
+ """
92
+ try:
93
+ # Layer 2: Extract client working directory for path resolution
94
+ client_cwd = kwargs.pop("_client_cwd", None)
95
+
96
+ # Handle empty VM creation
97
+ if empty:
98
+ # Validate that no config or kwargs are provided with --empty
99
+ if config:
100
+ raise VMManagerError(
101
+ "Cannot specify config files with --empty flag"
102
+ )
103
+ if kwargs:
104
+ raise VMManagerError(
105
+ "Cannot specify configuration parameters "
106
+ "with --empty flag"
107
+ )
108
+
109
+ # Create completely empty config
110
+ config_data = {}
111
+ config_file = None
112
+
113
+ # Generate name if not provided
114
+ if not name:
115
+ # Generate unique name using UUID without creating temp VM
116
+ import uuid
117
+ unique_suffix = str(uuid.uuid4()).split('-')[-1][:8]
118
+ name = f"empty-vm-{unique_suffix}"
119
+
120
+ # Skip validation for empty VMs
121
+ # Create VM in database with empty config
122
+ vm_id = self.state_manager.create_vm(
123
+ name, config_data, config_file
124
+ )
125
+
126
+ return vm_id
127
+
128
+ # Normal VM creation path
129
+ # Load and deep-merge configuration files
130
+ if config:
131
+ config_data = ConfigMerger.load_and_merge_files(config)
132
+ if isinstance(config, str):
133
+ config_file = config
134
+ elif config:
135
+ config_file = config[0]
136
+ else:
137
+ config_file = None
138
+ else:
139
+ config_data = {}
140
+ config_file = None
141
+
142
+ # Merge kwargs with config data (kwargs take precedence)
143
+ if kwargs:
144
+ config_data = ConfigMerger.deep_merge(config_data, kwargs)
145
+
146
+ # Handle name priority: CLI --name > config name > auto-generated
147
+ if not name:
148
+ # Check if name is present in merged config
149
+ name = config_data.get("name")
150
+
151
+ # Always remove name from config_data as it's VM metadata, not QEMU
152
+ # config
153
+ if "name" in config_data:
154
+ config_data = {
155
+ k: v for k, v in config_data.items() if k != "name"
156
+ }
157
+
158
+ # Generate name if still not provided
159
+ if not name:
160
+ # Generate unique name using UUID without creating temp VM
161
+ import uuid
162
+ unique_suffix = str(uuid.uuid4()).split('-')[-1][:8]
163
+ name = f"vm-{unique_suffix}"
164
+
165
+ # Validate the final merged configuration
166
+ config_data = self.config_parser.validate_config(config_data)
167
+
168
+ # Create VM in database
169
+ vm_id = self.state_manager.create_vm(
170
+ name, config_data, config_file
171
+ )
172
+
173
+ return vm_id
174
+
175
+ # Specific exception handlers for better error messages
176
+ except FileNotFoundError as e:
177
+ raise VMManagerError(
178
+ f"Configuration file not found: {e.filename}. "
179
+ f"Check that the file path is correct."
180
+ )
181
+ except PermissionError as e:
182
+ raise VMManagerError(
183
+ f"Permission denied accessing configuration file: {
184
+ e.filename}. "
185
+ f"Check file permissions and ownership."
186
+ )
187
+ except ConfigError as e:
188
+ raise VMManagerError(f"Configuration error: {e}")
189
+ except StateManagerError as e:
190
+ raise VMManagerError(f"Database error: {e}")
191
+ except Exception as e:
192
+ # Last resort - log unexpected errors with context
193
+ LOG.error(
194
+ f"Unexpected error creating VM: {type(e).__name__}: {e}",
195
+ exc_info=True,
196
+ )
197
+ raise VMManagerError(f"Failed to create VM: {e}")
198
+
199
+ def start(self, vm_id: str) -> VMInstance:
200
+ """
201
+ Start a virtual machine by spawning a detached VM runner process.
202
+
203
+ Changes from previous implementation:
204
+ - No longer manages Machine directly
205
+ - Spawns VM runner process that manages QEMU lifecycle
206
+ - VM runner survives CLI exit
207
+ - Returns immediately after runner is ready
208
+
209
+ Args:
210
+ vm_id: VM identifier (name or ID)
211
+
212
+ Returns:
213
+ VM instance information
214
+
215
+ Raises:
216
+ VMManagerError: If VM start fails
217
+ """
218
+ try:
219
+ # Get VM from database
220
+ vm = self.state_manager.get_vm(vm_id)
221
+ if not vm:
222
+ raise VMManagerError(f"VM '{vm_id}' not found")
223
+
224
+ # Check if VM is already running
225
+ if vm.status == "running":
226
+ # Check if runner process is actually alive
227
+ from ..process_spawner import is_runner_alive
228
+
229
+ if vm.runner_pid and is_runner_alive(vm.runner_pid):
230
+ raise VMManagerError(
231
+ f"VM '{vm_id}' is already running "
232
+ f"(runner PID: {vm.runner_pid})"
233
+ )
234
+ else:
235
+ # Stale state - clean up and continue
236
+ LOG.warning(
237
+ f"VM '{vm_id}' has stale 'running' status, cleaning up"
238
+ )
239
+ self.state_manager.update_vm_status(
240
+ vm_id, "stopped", runner_pid=None, socket_path=None
241
+ )
242
+
243
+ # Check if VM has required configuration
244
+ if not vm.config_data or not vm.config_data.get("binary"):
245
+ raise VMManagerError(
246
+ f"VM '{vm_id}' cannot be started: missing required "
247
+ f"configuration. Use 'maqet apply {vm_id} "
248
+ f"--config <config.yaml>' to add configuration."
249
+ )
250
+
251
+ # Spawn VM runner process
252
+ from ..process_spawner import (
253
+ spawn_vm_runner,
254
+ wait_for_vm_ready,
255
+ get_socket_path,
256
+ )
257
+
258
+ try:
259
+ # Get database path for runner
260
+ db_path = self.state_manager.xdg.database_path
261
+
262
+ runner_pid = spawn_vm_runner(vm.id, db_path, timeout=Timeouts.PROCESS_SPAWN)
263
+ LOG.info(f"Spawned VM runner process for '{vm_id}' (PID: {runner_pid})")
264
+ except Exception as e:
265
+ raise VMManagerError(f"Failed to spawn VM runner: {e}")
266
+
267
+ # Wait for VM runner to be ready (socket available)
268
+ socket_path = get_socket_path(vm.id)
269
+ ready = wait_for_vm_ready(vm.id, socket_path, timeout=Timeouts.VM_START)
270
+
271
+ if not ready:
272
+ # Runner process started but socket not available - cleanup
273
+ from ..process_spawner import kill_runner
274
+
275
+ kill_runner(runner_pid, force=True)
276
+ raise VMManagerError("VM runner did not become ready within timeout")
277
+
278
+ # Verify VM is actually running (runner updated DB)
279
+ vm_updated = self.state_manager.get_vm(vm_id)
280
+ if vm_updated.status != "running":
281
+ raise VMManagerError(
282
+ f"VM runner started but VM status is '{vm_updated.status}'"
283
+ )
284
+
285
+ return vm_updated
286
+
287
+ except Exception as e:
288
+ raise VMManagerError(f"Failed to start VM '{vm_id}': {e}")
289
+
290
+ def stop(
291
+ self, vm_id: str, force: bool = False, timeout: int = 30
292
+ ) -> VMInstance:
293
+ """
294
+ Stop a VM by sending stop command to VM runner or killing runner process.
295
+
296
+ Changes from previous implementation:
297
+ - No longer uses Machine.stop() directly
298
+ - Sends stop command via IPC (graceful)
299
+ - Falls back to killing runner process if IPC fails
300
+
301
+ Args:
302
+ vm_id: VM identifier (name or ID)
303
+ force: If True, kill runner immediately (SIGKILL).
304
+ If False, graceful shutdown (SIGTERM)
305
+ timeout: Timeout for graceful shutdown (ignored for now)
306
+
307
+ Returns:
308
+ VM instance information
309
+
310
+ Raises:
311
+ VMManagerError: If VM stop fails
312
+ """
313
+ try:
314
+ # Get VM from database
315
+ vm = self.state_manager.get_vm(vm_id)
316
+ if not vm:
317
+ raise VMManagerError(f"VM '{vm_id}' not found")
318
+
319
+ # Check if VM is running
320
+ if vm.status != "running":
321
+ LOG.info(f"VM '{vm_id}' is not running (status: {vm.status})")
322
+ # Ensure status is "stopped" (not "created", etc.)
323
+ if vm.status != "stopped":
324
+ self.state_manager.update_vm_status(
325
+ vm_id, "stopped", pid=None, runner_pid=None, socket_path=None
326
+ )
327
+ vm = self.state_manager.get_vm(vm_id)
328
+ return vm
329
+
330
+ from ..process_spawner import is_runner_alive, kill_runner
331
+ from ..ipc.runner_client import RunnerClient, RunnerClientError
332
+
333
+ # Verify runner process exists
334
+ if not vm.runner_pid or not is_runner_alive(vm.runner_pid):
335
+ # Runner missing/dead - check for orphaned QEMU process
336
+ LOG.warning(
337
+ f"VM '{vm_id}' runner process not found, checking for orphaned QEMU"
338
+ )
339
+
340
+ # Fallback: Kill orphaned QEMU process if it exists
341
+ if vm.pid:
342
+ try:
343
+ # Check if QEMU process is alive
344
+ os.kill(vm.pid, 0) # Signal 0 checks existence
345
+
346
+ # QEMU is alive - kill it
347
+ LOG.warning(
348
+ f"Found orphaned QEMU process (PID {vm.pid}), "
349
+ f"terminating it"
350
+ )
351
+ if force:
352
+ os.kill(vm.pid, 9) # SIGKILL
353
+ else:
354
+ os.kill(vm.pid, 15) # SIGTERM
355
+
356
+ # Wait briefly for process to die
357
+ time.sleep(0.5)
358
+
359
+ except ProcessLookupError:
360
+ # QEMU process already dead, no action needed
361
+ LOG.debug(f"QEMU process {vm.pid} already dead")
362
+ except PermissionError:
363
+ LOG.error(
364
+ f"Permission denied when killing QEMU process {vm.pid}"
365
+ )
366
+ except Exception as e:
367
+ LOG.error(f"Failed to kill QEMU process {vm.pid}: {e}")
368
+
369
+ # Clean up DB
370
+ self.state_manager.update_vm_status(
371
+ vm_id, "stopped", pid=None, runner_pid=None, socket_path=None
372
+ )
373
+ return self.state_manager.get_vm(vm_id)
374
+
375
+ # Try graceful stop via IPC (if not force)
376
+ if not force:
377
+ client = RunnerClient(vm.id, self.state_manager)
378
+
379
+ try:
380
+ result = client.send_command("stop", timeout=timeout)
381
+ LOG.info(f"VM '{vm_id}' stopped gracefully via IPC")
382
+
383
+ # Wait briefly for runner to exit and update DB
384
+ time.sleep(Intervals.CLEANUP_WAIT)
385
+
386
+ # Verify status updated
387
+ vm_updated = self.state_manager.get_vm(vm_id)
388
+ return vm_updated
389
+
390
+ except RunnerClientError as e:
391
+ LOG.warning(
392
+ f"IPC stop failed for '{vm_id}': {e}, "
393
+ f"falling back to SIGTERM"
394
+ )
395
+
396
+ # Fallback: Kill runner process directly
397
+ LOG.info(
398
+ f"Killing VM runner process for '{vm_id}' "
399
+ f"(PID: {vm.runner_pid}, force={force})"
400
+ )
401
+ killed = kill_runner(vm.runner_pid, force=force)
402
+
403
+ if killed:
404
+ # Wait briefly for cleanup
405
+ time.sleep(Intervals.CLEANUP_WAIT)
406
+
407
+ # Verify DB updated (runner should clean up on exit)
408
+ vm_updated = self.state_manager.get_vm(vm_id)
409
+ if vm_updated.status == "running":
410
+ # Runner didn't clean up - do it manually
411
+ self.state_manager.update_vm_status(
412
+ vm_id, "stopped", runner_pid=None, socket_path=None
413
+ )
414
+ vm_updated = self.state_manager.get_vm(vm_id)
415
+
416
+ return vm_updated
417
+ else:
418
+ raise VMManagerError(f"Failed to kill runner process {vm.runner_pid}")
419
+
420
+ except Exception as e:
421
+ raise VMManagerError(f"Failed to stop VM '{vm_id}': {e}")
422
+
423
+ def remove(
424
+ self,
425
+ vm_id: Optional[str] = None,
426
+ force: bool = False,
427
+ all: bool = False,
428
+ clean_storage: bool = False,
429
+ ) -> bool:
430
+ """
431
+ Remove a virtual machine completely.
432
+
433
+ Args:
434
+ vm_id: VM identifier (name or ID)
435
+ force: Force removal even if VM is running
436
+ all: Remove all virtual machines
437
+ clean_storage: Also delete associated storage files
438
+
439
+ Returns:
440
+ True if removed successfully
441
+
442
+ Raises:
443
+ VMManagerError: If VM removal fails
444
+ """
445
+ try:
446
+ # Validate arguments
447
+ if all and vm_id:
448
+ raise VMManagerError("Cannot specify both vm_id and --all flag")
449
+ if not all and not vm_id:
450
+ raise VMManagerError("Must specify either vm_id or --all flag")
451
+
452
+ # Handle bulk removal
453
+ if all:
454
+ return self._remove_all_vms(force, clean_storage)
455
+
456
+ # Handle single VM removal
457
+ return self._remove_single_vm(vm_id, force, clean_storage)
458
+
459
+ except Exception as e:
460
+ raise VMManagerError(f"Failed to remove VM(s): {e}")
461
+
462
+ def _remove_single_vm(
463
+ self, vm_id: str, force: bool, clean_storage: bool = False
464
+ ) -> bool:
465
+ """Remove a single VM."""
466
+ # Get VM from database
467
+ vm = self.state_manager.get_vm(vm_id)
468
+ if not vm:
469
+ raise VMManagerError(f"VM '{vm_id}' not found")
470
+
471
+ # Stop VM if running
472
+ if vm.status == "running":
473
+ if not force:
474
+ raise VMManagerError(
475
+ f"VM '{vm_id}' is running. Use --force to remove "
476
+ f"running VMs"
477
+ )
478
+ self.stop(vm_id, force=True)
479
+
480
+ # Clean up storage files if requested
481
+ if clean_storage:
482
+ storage_configs = vm.config_data.get("storage", [])
483
+ for storage in storage_configs:
484
+ if "file" in storage:
485
+ storage_path = Path(storage["file"])
486
+ if storage_path.exists():
487
+ try:
488
+ LOG.info(f"Removing storage file: {storage_path}")
489
+ storage_path.unlink()
490
+ except OSError as e:
491
+ LOG.warning(
492
+ f"Failed to remove storage file {
493
+ storage_path}: {e}"
494
+ )
495
+
496
+ # Remove from database
497
+ removed = self.state_manager.remove_vm(vm_id)
498
+ if not removed:
499
+ raise VMManagerError(f"Failed to remove VM '{vm_id}' from database")
500
+
501
+ return True
502
+
503
+ def _remove_all_vms(
504
+ self, force: bool, clean_storage: bool = False
505
+ ) -> bool:
506
+ """Remove all VMs with confirmation."""
507
+ # Get all VMs
508
+ all_vms = self.state_manager.list_vms()
509
+
510
+ if not all_vms:
511
+ print("No virtual machines found.")
512
+ return True
513
+
514
+ # Display VMs that will be removed
515
+ print(f"Found {len(all_vms)} virtual machine(s) to remove:")
516
+ print()
517
+
518
+ # Create table header
519
+ header = f"{'NAME':<20} {'STATUS':<10} {'PID':<8}"
520
+ separator = "-" * 40
521
+ print(header)
522
+ print(separator)
523
+
524
+ running_count = 0
525
+ for vm in all_vms:
526
+ pid_str = str(vm.pid) if vm.pid else "-"
527
+ print(f"{vm.name:<20} {vm.status:<10} {pid_str:<8}")
528
+ if vm.status == "running":
529
+ running_count += 1
530
+
531
+ print()
532
+
533
+ # Show warning for running VMs
534
+ if running_count > 0 and not force:
535
+ print(
536
+ f"WARNING: {running_count} VM(s) are currently running "
537
+ f"and will be forcefully stopped."
538
+ )
539
+ print("Use --force to skip this warning in the future.")
540
+ print()
541
+
542
+ # Confirmation prompt
543
+ try:
544
+ response = (
545
+ input(
546
+ f"Are you sure you want to remove ALL {len(all_vms)} "
547
+ f"virtual machines? [y/N]: "
548
+ )
549
+ .strip()
550
+ .lower()
551
+ )
552
+ except (EOFError, KeyboardInterrupt):
553
+ print("\nOperation cancelled.")
554
+ return False
555
+
556
+ if response not in ["y", "yes"]:
557
+ print("Operation cancelled.")
558
+ return False
559
+
560
+ # Remove all VMs
561
+ removed_count = 0
562
+ failed_count = 0
563
+
564
+ print()
565
+ print("Removing virtual machines...")
566
+
567
+ for vm in all_vms:
568
+ try:
569
+ # Stop VM if running
570
+ if vm.status == "running":
571
+ try:
572
+ self.stop(vm.id, force=True)
573
+ print(f" Stopped VM: {vm.name}")
574
+ except Exception as e:
575
+ print(f" Warning: Failed to stop VM '{vm.name}': {e}")
576
+
577
+ # Remove from database
578
+ removed = self.state_manager.remove_vm(vm.id)
579
+ if removed:
580
+ print(f" Removed VM: {vm.name}")
581
+ removed_count += 1
582
+ else:
583
+ print(f" Failed to remove VM: {vm.name}")
584
+ failed_count += 1
585
+
586
+ except Exception as e:
587
+ print(f" Error removing VM '{vm.name}': {e}")
588
+ failed_count += 1
589
+
590
+ print()
591
+ print(
592
+ f"Removal complete: {removed_count} removed, "
593
+ f"{failed_count} failed"
594
+ )
595
+
596
+ if failed_count > 0:
597
+ raise VMManagerError(f"Failed to remove {failed_count} VM(s)")
598
+
599
+ return True
600
+
601
+ def list_vms(self, status: Optional[str] = None) -> List[VMInstance]:
602
+ """
603
+ List virtual machines.
604
+
605
+ Args:
606
+ status: Filter by status ('running', 'stopped', 'created',
607
+ 'failed')
608
+
609
+ Returns:
610
+ List of VM instances
611
+ """
612
+ vms = self.state_manager.list_vms(status_filter=status)
613
+
614
+ # Check process status and update if needed
615
+ for vm in vms:
616
+ if vm.status == "running" and vm.pid:
617
+ if not self.state_manager._is_process_alive(vm.pid):
618
+ # Process doesn't exist, update status
619
+ self.state_manager.update_vm_status(
620
+ vm.id, "stopped", pid=None
621
+ )
622
+ vm.status = "stopped"
623
+ vm.pid = None
624
+
625
+ return vms
626
+
627
+ def cleanup_dead_processes(self) -> List[str]:
628
+ """
629
+ Check for VMs with running status but dead runner processes.
630
+ Update DB to reflect reality.
631
+
632
+ This runs on VMManager initialization to clean up stale state from
633
+ crashed runners or improperly terminated VMs.
634
+
635
+ Returns:
636
+ List of VM IDs that were cleaned up
637
+ """
638
+ from ..process_spawner import is_runner_alive
639
+
640
+ cleaned = []
641
+
642
+ # Get all VMs marked as running
643
+ all_vms = self.state_manager.list_vms()
644
+ running_vms = [vm for vm in all_vms if vm.status == "running"]
645
+
646
+ for vm in running_vms:
647
+ if not vm.runner_pid or not is_runner_alive(vm.runner_pid):
648
+ LOG.warning(
649
+ f"VM '{vm.name}' marked as running but runner process "
650
+ f"(PID: {vm.runner_pid}) is dead"
651
+ )
652
+
653
+ # Check for orphaned QEMU process
654
+ if vm.pid:
655
+ try:
656
+ # Check if QEMU is alive
657
+ os.kill(vm.pid, 0)
658
+
659
+ # QEMU is alive - kill it
660
+ LOG.warning(
661
+ f"Found orphaned QEMU process (PID {vm.pid}) for VM "
662
+ f"'{vm.name}', terminating it"
663
+ )
664
+ os.kill(vm.pid, 9) # SIGKILL
665
+ time.sleep(0.5)
666
+
667
+ except ProcessLookupError:
668
+ # QEMU already dead
669
+ LOG.debug(f"QEMU process {vm.pid} already dead")
670
+ except PermissionError:
671
+ LOG.error(
672
+ f"Permission denied when killing QEMU process {vm.pid}"
673
+ )
674
+ except Exception as e:
675
+ LOG.error(f"Failed to kill QEMU process {vm.pid}: {e}")
676
+
677
+ # Update DB
678
+ self.state_manager.update_vm_status(
679
+ vm.name, "stopped", pid=None, runner_pid=None, socket_path=None
680
+ )
681
+ cleaned.append(vm.id)
682
+
683
+ return cleaned