cua-computer 0.1.29__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,943 @@
1
+ """
2
+ Lumier VM provider implementation.
3
+
4
+ This provider uses Docker containers running the Lumier image to create
5
+ macOS and Linux VMs. It handles VM lifecycle operations through Docker
6
+ commands and container management.
7
+ """
8
+
9
+ import logging
10
+ import os
11
+ import json
12
+ import asyncio
13
+ from typing import Dict, List, Optional, Any
14
+ import subprocess
15
+ import time
16
+ import re
17
+
18
+ from ..base import BaseVMProvider, VMProviderType
19
+ from ..lume_api import (
20
+ lume_api_get,
21
+ lume_api_run,
22
+ lume_api_stop,
23
+ lume_api_update
24
+ )
25
+
26
+ # Setup logging
27
+ logger = logging.getLogger(__name__)
28
+
29
+ # Check if Docker is available
30
+ try:
31
+ subprocess.run(["docker", "--version"], capture_output=True, check=True)
32
+ HAS_LUMIER = True
33
+ except (subprocess.SubprocessError, FileNotFoundError):
34
+ HAS_LUMIER = False
35
+
36
+
37
+ class LumierProvider(BaseVMProvider):
38
+ """
39
+ Lumier VM Provider implementation using Docker containers.
40
+
41
+ This provider uses Docker to run Lumier containers that can create
42
+ macOS and Linux VMs through containerization.
43
+ """
44
+
45
+ def __init__(
46
+ self,
47
+ port: Optional[int] = 7777,
48
+ host: str = "localhost",
49
+ storage: Optional[str] = None, # Can be a path or 'ephemeral'
50
+ shared_path: Optional[str] = None,
51
+ image: str = "macos-sequoia-cua:latest", # VM image to use
52
+ verbose: bool = False,
53
+ ephemeral: bool = False,
54
+ noVNC_port: Optional[int] = 8006,
55
+ ):
56
+ """Initialize the Lumier VM Provider.
57
+
58
+ Args:
59
+ port: Port for the API server (default: 7777)
60
+ host: Hostname for the API server (default: localhost)
61
+ storage: Path for persistent VM storage
62
+ shared_path: Path for shared folder between host and VM
63
+ image: VM image to use (e.g. "macos-sequoia-cua:latest")
64
+ verbose: Enable verbose logging
65
+ ephemeral: Use ephemeral (temporary) storage
66
+ noVNC_port: Specific port for noVNC interface (default: 8006)
67
+ """
68
+ self.host = host
69
+ # Always ensure api_port has a valid value (7777 is the default)
70
+ self.api_port = 7777 if port is None else port
71
+ self.vnc_port = noVNC_port # User-specified noVNC port, will be set in run_vm if provided
72
+ self.ephemeral = ephemeral
73
+
74
+ # Handle ephemeral storage (temporary directory)
75
+ if ephemeral:
76
+ self.storage = "ephemeral"
77
+ else:
78
+ self.storage = storage
79
+
80
+ self.shared_path = shared_path
81
+ self.image = image # Store the VM image name to use
82
+ # The container_name will be set in run_vm using the VM name
83
+ self.verbose = verbose
84
+ self._container_id = None
85
+ self._api_url = None # Will be set after container starts
86
+
87
+ @property
88
+ def provider_type(self) -> VMProviderType:
89
+ """Return the provider type."""
90
+ return VMProviderType.LUMIER
91
+
92
+ def _parse_memory(self, memory_str: str) -> int:
93
+ """Parse memory string to MB integer.
94
+
95
+ Examples:
96
+ "8GB" -> 8192
97
+ "1024MB" -> 1024
98
+ "512" -> 512
99
+ """
100
+ if isinstance(memory_str, int):
101
+ return memory_str
102
+
103
+ if isinstance(memory_str, str):
104
+ # Extract number and unit
105
+ match = re.match(r"(\d+)([A-Za-z]*)", memory_str)
106
+ if match:
107
+ value, unit = match.groups()
108
+ value = int(value)
109
+ unit = unit.upper()
110
+
111
+ if unit == "GB" or unit == "G":
112
+ return value * 1024
113
+ elif unit == "MB" or unit == "M" or unit == "":
114
+ return value
115
+
116
+ # Default fallback
117
+ logger.warning(f"Could not parse memory string '{memory_str}', using 8GB default")
118
+ return 8192 # Default to 8GB
119
+
120
+ # Helper methods for interacting with the Lumier API through curl
121
+ # These methods handle the various VM operations via API calls
122
+
123
+ def _get_curl_error_message(self, return_code: int) -> str:
124
+ """Get a descriptive error message for curl return codes.
125
+
126
+ Args:
127
+ return_code: The curl return code
128
+
129
+ Returns:
130
+ A descriptive error message
131
+ """
132
+ # Map common curl error codes to helpful messages
133
+ if return_code == 7:
134
+ return "Failed to connect - API server is starting up"
135
+ elif return_code == 22:
136
+ return "HTTP error returned from API server"
137
+ elif return_code == 28:
138
+ return "Operation timeout - API server is slow to respond"
139
+ elif return_code == 52:
140
+ return "Empty reply from server - API is starting but not ready"
141
+ elif return_code == 56:
142
+ return "Network problem during data transfer"
143
+ else:
144
+ return f"Unknown curl error code: {return_code}"
145
+
146
+
147
+ async def get_vm(self, name: str, storage: Optional[str] = None) -> Dict[str, Any]:
148
+ """Get VM information by name.
149
+
150
+ Args:
151
+ name: Name of the VM to get information for
152
+ storage: Optional storage path override. If provided, this will be used
153
+ instead of the provider's default storage path.
154
+
155
+ Returns:
156
+ Dictionary with VM information including status, IP address, etc.
157
+ """
158
+ if not HAS_LUMIER:
159
+ logger.error("Docker is not available. Cannot get VM status.")
160
+ return {
161
+ "name": name,
162
+ "status": "unavailable",
163
+ "error": "Docker is not available"
164
+ }
165
+
166
+ # Store the current name for API requests
167
+ self.container_name = name
168
+
169
+ try:
170
+ # Check if the container exists and is running
171
+ check_cmd = ["docker", "ps", "-a", "--filter", f"name={name}", "--format", "{{.Status}}"]
172
+ check_result = subprocess.run(check_cmd, capture_output=True, text=True)
173
+ container_status = check_result.stdout.strip()
174
+
175
+ if not container_status:
176
+ logger.info(f"Container {name} does not exist. Will create when run_vm is called.")
177
+ return {
178
+ "name": name,
179
+ "status": "not_found",
180
+ "message": "Container doesn't exist yet"
181
+ }
182
+
183
+ # Container exists, check if it's running
184
+ is_running = container_status.startswith("Up")
185
+
186
+ if not is_running:
187
+ logger.info(f"Container {name} exists but is not running. Status: {container_status}")
188
+ return {
189
+ "name": name,
190
+ "status": "stopped",
191
+ "container_status": container_status,
192
+ }
193
+
194
+ # Container is running, get the IP address and API status from Lumier API
195
+ logger.info(f"Container {name} is running. Getting VM status from API.")
196
+
197
+ # Use the shared lume_api_get function directly
198
+ vm_info = lume_api_get(
199
+ vm_name=name,
200
+ host=self.host,
201
+ port=self.api_port,
202
+ storage=storage if storage is not None else self.storage,
203
+ debug=self.verbose,
204
+ verbose=self.verbose
205
+ )
206
+
207
+ # Check for API errors
208
+ if "error" in vm_info:
209
+ # Use debug level instead of warning to reduce log noise during polling
210
+ logger.debug(f"API request error: {vm_info['error']}")
211
+ return {
212
+ "name": name,
213
+ "status": "running", # Container is running even if API is not responsive
214
+ "api_status": "error",
215
+ "error": vm_info["error"],
216
+ "container_status": container_status
217
+ }
218
+
219
+ # Process the VM status information
220
+ vm_status = vm_info.get("status", "unknown")
221
+ vnc_url = vm_info.get("vncUrl", "")
222
+ ip_address = vm_info.get("ipAddress", "")
223
+
224
+ # IMPORTANT: Always ensure we have a valid IP address for connectivity
225
+ # If the API doesn't return an IP address, default to localhost (127.0.0.1)
226
+ # This makes the behavior consistent with LumeProvider
227
+ if not ip_address and vm_status == "running":
228
+ ip_address = "127.0.0.1"
229
+ logger.info(f"No IP address returned from API, defaulting to {ip_address}")
230
+ vm_info["ipAddress"] = ip_address
231
+
232
+ logger.info(f"VM {name} status: {vm_status}")
233
+
234
+ if ip_address and vnc_url:
235
+ logger.info(f"VM {name} has IP: {ip_address} and VNC URL: {vnc_url}")
236
+ elif not ip_address and not vnc_url and vm_status != "running":
237
+ # Not running is expected in this case
238
+ logger.info(f"VM {name} is not running yet. Status: {vm_status}")
239
+ else:
240
+ # Missing IP or VNC but status is running - this is unusual but handled with default IP
241
+ logger.warning(f"VM {name} is running but missing expected fields. API response: {vm_info}")
242
+
243
+ # Return the full status information
244
+ return {
245
+ "name": name,
246
+ "status": vm_status,
247
+ "ip_address": ip_address,
248
+ "vnc_url": vnc_url,
249
+ "api_status": "ok",
250
+ "container_status": container_status,
251
+ **vm_info # Include all fields from the API response
252
+ }
253
+ except subprocess.SubprocessError as e:
254
+ logger.error(f"Failed to check container status: {e}")
255
+ return {
256
+ "name": name,
257
+ "status": "error",
258
+ "error": f"Failed to check container status: {str(e)}"
259
+ }
260
+
261
+ async def list_vms(self) -> List[Dict[str, Any]]:
262
+ """List all VMs managed by this provider.
263
+
264
+ For Lumier provider, there is only one VM per container.
265
+ """
266
+ try:
267
+ status = await self.get_vm("default")
268
+ return [status] if status.get("status") != "unknown" else []
269
+ except Exception as e:
270
+ logger.error(f"Failed to list VMs: {e}")
271
+ return []
272
+
273
+ async def run_vm(self, image: str, name: str, run_opts: Dict[str, Any], storage: Optional[str] = None) -> Dict[str, Any]:
274
+ """Run a VM with the given options.
275
+
276
+ Args:
277
+ image: Name/tag of the image to use
278
+ name: Name of the VM to run (used for the container name and Docker image tag)
279
+ run_opts: Options for running the VM, including:
280
+ - cpu: Number of CPU cores
281
+ - memory: Amount of memory (e.g. "8GB")
282
+ - noVNC_port: Specific port for noVNC interface
283
+
284
+ Returns:
285
+ Dictionary with VM status information
286
+ """
287
+ # Set the container name using the VM name for consistency
288
+ self.container_name = name
289
+ try:
290
+ # First, check if container already exists and remove it
291
+ try:
292
+ check_cmd = ["docker", "ps", "-a", "--filter", f"name={self.container_name}", "--format", "{{.ID}}"]
293
+ check_result = subprocess.run(check_cmd, capture_output=True, text=True)
294
+ existing_container = check_result.stdout.strip()
295
+
296
+ if existing_container:
297
+ logger.info(f"Removing existing container: {self.container_name}")
298
+ remove_cmd = ["docker", "rm", "-f", self.container_name]
299
+ subprocess.run(remove_cmd, check=True)
300
+ except subprocess.CalledProcessError as e:
301
+ logger.warning(f"Error removing existing container: {e}")
302
+ # Continue anyway, next steps will fail if there's a real problem
303
+
304
+ # Prepare the Docker run command
305
+ cmd = ["docker", "run", "-d", "--name", self.container_name]
306
+
307
+ cmd.extend(["-p", f"{self.vnc_port}:8006"])
308
+ print(f"Using specified noVNC_port: {self.vnc_port}")
309
+
310
+ # Set API URL using the API port
311
+ self._api_url = f"http://{self.host}:{self.api_port}"
312
+
313
+ # Parse memory setting
314
+ memory_mb = self._parse_memory(run_opts.get("memory", "8GB"))
315
+
316
+ # Add storage volume mount if storage is specified (for persistent VM storage)
317
+ if self.storage and self.storage != "ephemeral":
318
+ # Create storage directory if it doesn't exist
319
+ storage_dir = os.path.abspath(os.path.expanduser(self.storage or ""))
320
+ os.makedirs(storage_dir, exist_ok=True)
321
+
322
+ # Add volume mount for storage
323
+ cmd.extend([
324
+ "-v", f"{storage_dir}:/storage",
325
+ "-e", f"HOST_STORAGE_PATH={storage_dir}"
326
+ ])
327
+ print(f"Using persistent storage at: {storage_dir}")
328
+
329
+ # Add shared folder volume mount if shared_path is specified
330
+ if self.shared_path:
331
+ # Create shared directory if it doesn't exist
332
+ shared_dir = os.path.abspath(os.path.expanduser(self.shared_path or ""))
333
+ os.makedirs(shared_dir, exist_ok=True)
334
+
335
+ # Add volume mount for shared folder
336
+ cmd.extend([
337
+ "-v", f"{shared_dir}:/shared",
338
+ "-e", f"HOST_SHARED_PATH={shared_dir}"
339
+ ])
340
+ print(f"Using shared folder at: {shared_dir}")
341
+
342
+ # Add environment variables
343
+ # Always use the container_name as the VM_NAME for consistency
344
+ # Use the VM image passed from the Computer class
345
+ print(f"Using VM image: {self.image}")
346
+
347
+ cmd.extend([
348
+ "-e", f"VM_NAME={self.container_name}",
349
+ "-e", f"VERSION=ghcr.io/trycua/{self.image}",
350
+ "-e", f"CPU_CORES={run_opts.get('cpu', '4')}",
351
+ "-e", f"RAM_SIZE={memory_mb}",
352
+ ])
353
+
354
+ # Specify the Lumier image with the full image name
355
+ lumier_image = "trycua/lumier:latest"
356
+
357
+ # First check if the image exists locally
358
+ try:
359
+ print(f"Checking if Docker image {lumier_image} exists locally...")
360
+ check_image_cmd = ["docker", "image", "inspect", lumier_image]
361
+ subprocess.run(check_image_cmd, capture_output=True, check=True)
362
+ print(f"Docker image {lumier_image} found locally.")
363
+ except subprocess.CalledProcessError:
364
+ # Image doesn't exist locally
365
+ print(f"\nWARNING: Docker image {lumier_image} not found locally.")
366
+ print("The system will attempt to pull it from Docker Hub, which may fail if you have network connectivity issues.")
367
+ print("If the Docker pull fails, you may need to manually pull the image first with:")
368
+ print(f" docker pull {lumier_image}\n")
369
+
370
+ # Add the image to the command
371
+ cmd.append(lumier_image)
372
+
373
+ # Print the Docker command for debugging
374
+ print(f"DOCKER COMMAND: {' '.join(cmd)}")
375
+
376
+ # Run the container with improved error handling
377
+ try:
378
+ result = subprocess.run(cmd, capture_output=True, text=True, check=True)
379
+ except subprocess.CalledProcessError as e:
380
+ if "no route to host" in str(e.stderr).lower() or "failed to resolve reference" in str(e.stderr).lower():
381
+ error_msg = (f"Network error while trying to pull Docker image '{lumier_image}'\n"
382
+ f"Error: {e.stderr}\n\n"
383
+ f"SOLUTION: Please try one of the following:\n"
384
+ f"1. Check your internet connection\n"
385
+ f"2. Pull the image manually with: docker pull {lumier_image}\n"
386
+ f"3. Check if Docker is running properly\n")
387
+ logger.error(error_msg)
388
+ raise RuntimeError(error_msg)
389
+ raise
390
+
391
+ # Container started, now check VM status with polling
392
+ print("Container started, checking VM status...")
393
+ print("NOTE: This may take some time while the VM image is being pulled and initialized")
394
+
395
+ # Start a background thread to show container logs in real-time
396
+ import threading
397
+
398
+ def show_container_logs():
399
+ # Give the container a moment to start generating logs
400
+ time.sleep(1)
401
+ print(f"\n---- CONTAINER LOGS FOR '{name}' (LIVE) ----")
402
+ print("Showing logs as they are generated. Press Ctrl+C to stop viewing logs...\n")
403
+
404
+ try:
405
+ # Use docker logs with follow option
406
+ log_cmd = ["docker", "logs", "--tail", "30", "--follow", name]
407
+ process = subprocess.Popen(log_cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
408
+ text=True, bufsize=1, universal_newlines=True)
409
+
410
+ # Read and print logs line by line
411
+ for line in process.stdout:
412
+ print(line, end='')
413
+
414
+ # Break if process has exited
415
+ if process.poll() is not None:
416
+ break
417
+ except Exception as e:
418
+ print(f"\nError showing container logs: {e}")
419
+ if self.verbose:
420
+ logger.error(f"Error in log streaming thread: {e}")
421
+ finally:
422
+ print("\n---- LOG STREAMING ENDED ----")
423
+ # Make sure process is terminated
424
+ if 'process' in locals() and process.poll() is None:
425
+ process.terminate()
426
+
427
+ # Start log streaming in a background thread if verbose mode is enabled
428
+ log_thread = threading.Thread(target=show_container_logs)
429
+ log_thread.daemon = True # Thread will exit when main program exits
430
+ log_thread.start()
431
+
432
+ # Skip waiting for container readiness and just poll get_vm directly
433
+ # Poll the get_vm method indefinitely until the VM is ready with an IP address
434
+ attempt = 0
435
+ consecutive_errors = 0
436
+ vm_running = False
437
+
438
+ while True: # Wait indefinitely
439
+ try:
440
+ # Use longer delays to give the system time to initialize
441
+ if attempt > 0:
442
+ # Start with 5s delay, then increase gradually up to 30s for later attempts
443
+ # But use shorter delays while we're getting API errors
444
+ if consecutive_errors > 0 and consecutive_errors < 5:
445
+ wait_time = 3 # Use shorter delays when we're getting API errors
446
+ else:
447
+ wait_time = min(30, 5 + (attempt * 2))
448
+
449
+ print(f"Waiting {wait_time}s before retry #{attempt+1}...")
450
+ await asyncio.sleep(wait_time)
451
+
452
+ # Try to get VM status
453
+ print(f"Checking VM status (attempt {attempt+1})...")
454
+ vm_status = await self.get_vm(name)
455
+
456
+ # Check for API errors
457
+ if 'error' in vm_status:
458
+ consecutive_errors += 1
459
+ error_msg = vm_status.get('error', 'Unknown error')
460
+
461
+ # Only print a user-friendly status message, not the raw error
462
+ # since _lume_api_get already logged the technical details
463
+ if consecutive_errors == 1 or attempt % 5 == 0:
464
+ if 'Empty reply from server' in error_msg:
465
+ print("API server is starting up - container is running, but API isn't fully initialized yet.")
466
+ print("This is expected during the initial VM setup - will continue polling...")
467
+ else:
468
+ # Don't repeat the exact same error message each time
469
+ logger.debug(f"API request error (attempt {attempt+1}): {error_msg}")
470
+ # Just log that we're still working on it
471
+ if attempt > 3:
472
+ print("Still waiting for the API server to become available...")
473
+
474
+ # If we're getting errors but container is running, that's normal during startup
475
+ if vm_status.get('status') == 'running':
476
+ if not vm_running:
477
+ print("Container is running, waiting for the VM within it to become fully ready...")
478
+ print("This might take a minute while the VM initializes...")
479
+ vm_running = True
480
+
481
+ # Increase counter and continue
482
+ attempt += 1
483
+ continue
484
+
485
+ # Reset consecutive error counter when we get a successful response
486
+ consecutive_errors = 0
487
+
488
+ # If the VM is running, check if it has an IP address (which means it's fully ready)
489
+ if vm_status.get('status') == 'running':
490
+ vm_running = True
491
+
492
+ # Check if we have an IP address, which means the VM is fully ready
493
+ if 'ip_address' in vm_status and vm_status['ip_address']:
494
+ print(f"VM is now fully running with IP: {vm_status.get('ip_address')}")
495
+ if 'vnc_url' in vm_status and vm_status['vnc_url']:
496
+ print(f"VNC URL: {vm_status.get('vnc_url')}")
497
+ return vm_status
498
+ else:
499
+ print("VM is running but still initializing network interfaces...")
500
+ print("Waiting for IP address to be assigned...")
501
+ else:
502
+ # VM exists but might still be starting up
503
+ status = vm_status.get('status', 'unknown')
504
+ print(f"VM found but status is: {status}. Continuing to poll...")
505
+
506
+ # Increase counter for next iteration's delay calculation
507
+ attempt += 1
508
+
509
+ # If we reach a very large number of attempts, give a reassuring message but continue
510
+ if attempt % 10 == 0:
511
+ print(f"Still waiting after {attempt} attempts. This might take several minutes for first-time setup.")
512
+ if not vm_running and attempt >= 20:
513
+ print("\nNOTE: First-time VM initialization can be slow as images are downloaded.")
514
+ print("If this continues for more than 10 minutes, you may want to check:")
515
+ print(" 1. Docker logs with: docker logs " + name)
516
+ print(" 2. If your network can access container registries")
517
+ print("Press Ctrl+C to abort if needed.\n")
518
+
519
+ # After 150 attempts (likely over 30-40 minutes), return current status
520
+ if attempt >= 150:
521
+ print(f"Reached 150 polling attempts. VM status is: {vm_status.get('status', 'unknown')}")
522
+ print("Returning current VM status, but please check Docker logs if there are issues.")
523
+ return vm_status
524
+
525
+ except Exception as e:
526
+ # Always continue retrying, but with increasing delays
527
+ logger.warning(f"Error checking VM status (attempt {attempt+1}): {e}. Will retry.")
528
+ consecutive_errors += 1
529
+
530
+ # If we've had too many consecutive errors, might be a deeper problem
531
+ if consecutive_errors >= 10:
532
+ print(f"\nWARNING: Encountered {consecutive_errors} consecutive errors while checking VM status.")
533
+ print("You may need to check the Docker container logs or restart the process.")
534
+ print(f"Error details: {str(e)}\n")
535
+
536
+ # Increase attempt counter for next iteration
537
+ attempt += 1
538
+
539
+ # After many consecutive errors, add a delay to avoid hammering the system
540
+ if attempt > 5:
541
+ error_delay = min(30, 10 + attempt)
542
+ print(f"Multiple connection errors, waiting {error_delay}s before next attempt...")
543
+ await asyncio.sleep(error_delay)
544
+
545
+ except subprocess.CalledProcessError as e:
546
+ error_msg = f"Failed to start Lumier container: {e.stderr if hasattr(e, 'stderr') else str(e)}"
547
+ logger.error(error_msg)
548
+ raise RuntimeError(error_msg)
549
+
550
+ async def _wait_for_container_ready(self, container_name: str, timeout: int = 90) -> bool:
551
+ """Wait for the Lumier container to be fully ready with a valid API response.
552
+
553
+ Args:
554
+ container_name: Name of the Docker container to check
555
+ timeout: Maximum time to wait in seconds (default: 90 seconds)
556
+
557
+ Returns:
558
+ True if the container is running, even if API is not fully ready.
559
+ This allows operations to continue with appropriate fallbacks.
560
+ """
561
+ start_time = time.time()
562
+ api_ready = False
563
+ container_running = False
564
+
565
+ print(f"Waiting for container {container_name} to be ready (timeout: {timeout}s)...")
566
+
567
+ while time.time() - start_time < timeout:
568
+ # Check if container is running
569
+ try:
570
+ check_cmd = ["docker", "ps", "--filter", f"name={container_name}", "--format", "{{.Status}}"]
571
+ result = subprocess.run(check_cmd, capture_output=True, text=True, check=True)
572
+ container_status = result.stdout.strip()
573
+
574
+ if container_status and container_status.startswith("Up"):
575
+ container_running = True
576
+ print(f"Container {container_name} is running")
577
+ logger.info(f"Container {container_name} is running with status: {container_status}")
578
+ else:
579
+ logger.warning(f"Container {container_name} not yet running, status: {container_status}")
580
+ # container is not running yet, wait and try again
581
+ await asyncio.sleep(2) # Longer sleep to give Docker time
582
+ continue
583
+ except subprocess.CalledProcessError as e:
584
+ logger.warning(f"Error checking container status: {e}")
585
+ await asyncio.sleep(2)
586
+ continue
587
+
588
+ # Container is running, check if API is responsive
589
+ try:
590
+ # First check the health endpoint
591
+ api_url = f"http://{self.host}:{self.api_port}/health"
592
+ logger.info(f"Checking API health at: {api_url}")
593
+
594
+ # Use longer timeout for API health check since it may still be initializing
595
+ curl_cmd = ["curl", "-s", "--connect-timeout", "5", "--max-time", "10", api_url]
596
+ result = subprocess.run(curl_cmd, capture_output=True, text=True)
597
+
598
+ if result.returncode == 0 and "ok" in result.stdout.lower():
599
+ api_ready = True
600
+ print(f"API is ready at {api_url}")
601
+ logger.info(f"API is ready at {api_url}")
602
+ break
603
+ else:
604
+ # API health check failed, now let's check if the VM status endpoint is responsive
605
+ # This covers cases where the health endpoint isn't implemented but the VM API is working
606
+ vm_api_url = f"http://{self.host}:{self.api_port}/lume/vms/{container_name}"
607
+ if self.storage:
608
+ import urllib.parse
609
+ encoded_storage = urllib.parse.quote_plus(self.storage)
610
+ vm_api_url += f"?storage={encoded_storage}"
611
+
612
+ curl_vm_cmd = ["curl", "-s", "--connect-timeout", "5", "--max-time", "10", vm_api_url]
613
+ vm_result = subprocess.run(curl_vm_cmd, capture_output=True, text=True)
614
+
615
+ if vm_result.returncode == 0 and vm_result.stdout.strip():
616
+ # VM API responded with something - consider the API ready
617
+ api_ready = True
618
+ print(f"VM API is ready at {vm_api_url}")
619
+ logger.info(f"VM API is ready at {vm_api_url}")
620
+ break
621
+ else:
622
+ curl_code = result.returncode
623
+ if curl_code == 0:
624
+ curl_code = vm_result.returncode
625
+
626
+ # Map common curl error codes to helpful messages
627
+ if curl_code == 7:
628
+ curl_error = "Failed to connect - API server is starting up"
629
+ elif curl_code == 22:
630
+ curl_error = "HTTP error returned from API server"
631
+ elif curl_code == 28:
632
+ curl_error = "Operation timeout - API server is slow to respond"
633
+ elif curl_code == 52:
634
+ curl_error = "Empty reply from server - API is starting but not ready"
635
+ elif curl_code == 56:
636
+ curl_error = "Network problem during data transfer"
637
+ else:
638
+ curl_error = f"Unknown curl error code: {curl_code}"
639
+
640
+ print(f"API not ready yet: {curl_error}")
641
+ logger.info(f"API not ready yet: {curl_error}")
642
+ except subprocess.SubprocessError as e:
643
+ logger.warning(f"Error checking API status: {e}")
644
+
645
+ # If the container is running but API is not ready, that's OK - we'll just wait
646
+ # a bit longer before checking again, as the container may still be initializing
647
+ elapsed_seconds = time.time() - start_time
648
+ if int(elapsed_seconds) % 5 == 0: # Only print status every 5 seconds to reduce verbosity
649
+ print(f"Waiting for API to initialize... ({elapsed_seconds:.1f}s / {timeout}s)")
650
+
651
+ await asyncio.sleep(3) # Longer sleep between API checks
652
+
653
+ # Handle timeout - if the container is running but API is not ready, that's not
654
+ # necessarily an error - the API might just need more time to start up
655
+ if not container_running:
656
+ print(f"Timed out waiting for container {container_name} to start")
657
+ logger.warning(f"Timed out waiting for container {container_name} to start")
658
+ return False
659
+
660
+ if not api_ready:
661
+ print(f"Container {container_name} is running, but API is not fully ready yet.")
662
+ print("Proceeding with operations. API will become available shortly.")
663
+ print("NOTE: You may see some 'API request failed' messages while the API initializes.")
664
+ logger.warning(f"Container {container_name} is running, but API is not fully ready yet.")
665
+
666
+ # Return True if container is running, even if API isn't ready yet
667
+ # This allows VM operations to proceed, with appropriate retries for API calls
668
+ return container_running
669
+
670
+ async def stop_vm(self, name: str, storage: Optional[str] = None) -> Dict[str, Any]:
671
+ """Stop a running VM by stopping the Lumier container."""
672
+ try:
673
+ # Use Docker commands to stop the container directly
674
+ if hasattr(self, '_container_id') and self._container_id:
675
+ logger.info(f"Stopping Lumier container: {self.container_name}")
676
+ cmd = ["docker", "stop", self.container_name]
677
+ result = subprocess.run(cmd, capture_output=True, text=True, check=True)
678
+ logger.info(f"Container stopped: {result.stdout.strip()}")
679
+
680
+ # Return minimal status info
681
+ return {
682
+ "name": name,
683
+ "status": "stopped",
684
+ "container_id": self._container_id,
685
+ }
686
+ else:
687
+ # Try to find the container by name
688
+ check_cmd = ["docker", "ps", "-a", "--filter", f"name={self.container_name}", "--format", "{{.ID}}"]
689
+ check_result = subprocess.run(check_cmd, capture_output=True, text=True)
690
+ container_id = check_result.stdout.strip()
691
+
692
+ if container_id:
693
+ logger.info(f"Found container ID: {container_id}")
694
+ cmd = ["docker", "stop", self.container_name]
695
+ result = subprocess.run(cmd, capture_output=True, text=True, check=True)
696
+ logger.info(f"Container stopped: {result.stdout.strip()}")
697
+
698
+ return {
699
+ "name": name,
700
+ "status": "stopped",
701
+ "container_id": container_id,
702
+ }
703
+ else:
704
+ logger.warning(f"No container found with name {self.container_name}")
705
+ return {
706
+ "name": name,
707
+ "status": "unknown",
708
+ }
709
+ except subprocess.CalledProcessError as e:
710
+ error_msg = f"Failed to stop container: {e.stderr if hasattr(e, 'stderr') else str(e)}"
711
+ logger.error(error_msg)
712
+ raise RuntimeError(f"Failed to stop Lumier container: {error_msg}")
713
+
714
+ # update_vm is not implemented as it's not needed for Lumier
715
+ # The BaseVMProvider requires it, so we provide a minimal implementation
716
+ async def update_vm(self, name: str, update_opts: Dict[str, Any], storage: Optional[str] = None) -> Dict[str, Any]:
717
+ """Not implemented for Lumier provider."""
718
+ logger.warning("update_vm is not implemented for Lumier provider")
719
+ return {"name": name, "status": "unchanged"}
720
+
721
+ async def get_logs(self, name: str, num_lines: int = 100, follow: bool = False, timeout: Optional[int] = None) -> str:
722
+ """Get the logs from the Lumier container.
723
+
724
+ Args:
725
+ name: Name of the VM/container to get logs for
726
+ num_lines: Number of recent log lines to return (default: 100)
727
+ follow: If True, follow the logs (stream new logs as they are generated)
728
+ timeout: Optional timeout in seconds for follow mode (None means no timeout)
729
+
730
+ Returns:
731
+ Container logs as a string
732
+
733
+ Note:
734
+ If follow=True, this function will continuously stream logs until timeout
735
+ or until interrupted. The output will be printed to console in real-time.
736
+ """
737
+ if not HAS_LUMIER:
738
+ error_msg = "Docker is not available. Cannot get container logs."
739
+ logger.error(error_msg)
740
+ return error_msg
741
+
742
+ # Make sure we have a container name
743
+ container_name = name
744
+
745
+ # Check if the container exists and is running
746
+ try:
747
+ # Check if the container exists
748
+ inspect_cmd = ["docker", "container", "inspect", container_name]
749
+ result = subprocess.run(inspect_cmd, capture_output=True, text=True)
750
+
751
+ if result.returncode != 0:
752
+ error_msg = f"Container '{container_name}' does not exist or is not accessible"
753
+ logger.error(error_msg)
754
+ return error_msg
755
+ except Exception as e:
756
+ error_msg = f"Error checking container status: {str(e)}"
757
+ logger.error(error_msg)
758
+ return error_msg
759
+
760
+ # Base docker logs command
761
+ log_cmd = ["docker", "logs"]
762
+
763
+ # Add tail parameter to limit the number of lines
764
+ log_cmd.extend(["--tail", str(num_lines)])
765
+
766
+ # Handle follow mode with or without timeout
767
+ if follow:
768
+ log_cmd.append("--follow")
769
+
770
+ if timeout is not None:
771
+ # For follow mode with timeout, we'll run the command and handle the timeout
772
+ log_cmd.append(container_name)
773
+ logger.info(f"Following logs for container '{container_name}' with timeout {timeout}s")
774
+ print(f"\n---- CONTAINER LOGS FOR '{container_name}' (LIVE) ----")
775
+ print(f"Press Ctrl+C to stop following logs\n")
776
+
777
+ try:
778
+ # Run with timeout
779
+ process = subprocess.Popen(log_cmd, text=True)
780
+
781
+ # Wait for the specified timeout
782
+ if timeout:
783
+ try:
784
+ process.wait(timeout=timeout)
785
+ except subprocess.TimeoutExpired:
786
+ process.terminate() # Stop after timeout
787
+ print(f"\n---- LOG FOLLOWING STOPPED (timeout {timeout}s reached) ----")
788
+ else:
789
+ # Without timeout, wait for user interruption
790
+ process.wait()
791
+
792
+ return "Logs were displayed to console in follow mode"
793
+ except KeyboardInterrupt:
794
+ process.terminate()
795
+ print("\n---- LOG FOLLOWING STOPPED (user interrupted) ----")
796
+ return "Logs were displayed to console in follow mode (interrupted)"
797
+ else:
798
+ # For follow mode without timeout, we'll print a helpful message
799
+ log_cmd.append(container_name)
800
+ logger.info(f"Following logs for container '{container_name}' indefinitely")
801
+ print(f"\n---- CONTAINER LOGS FOR '{container_name}' (LIVE) ----")
802
+ print(f"Press Ctrl+C to stop following logs\n")
803
+
804
+ try:
805
+ # Run the command and let it run until interrupted
806
+ process = subprocess.Popen(log_cmd, text=True)
807
+ process.wait() # Wait indefinitely (until user interrupts)
808
+ return "Logs were displayed to console in follow mode"
809
+ except KeyboardInterrupt:
810
+ process.terminate()
811
+ print("\n---- LOG FOLLOWING STOPPED (user interrupted) ----")
812
+ return "Logs were displayed to console in follow mode (interrupted)"
813
+ else:
814
+ # For non-follow mode, capture and return the logs as a string
815
+ log_cmd.append(container_name)
816
+ logger.info(f"Getting {num_lines} log lines for container '{container_name}'")
817
+
818
+ try:
819
+ result = subprocess.run(log_cmd, capture_output=True, text=True, check=True)
820
+ logs = result.stdout
821
+
822
+ # Only print header and logs if there's content
823
+ if logs.strip():
824
+ print(f"\n---- CONTAINER LOGS FOR '{container_name}' (LAST {num_lines} LINES) ----\n")
825
+ print(logs)
826
+ print(f"\n---- END OF LOGS ----")
827
+ else:
828
+ print(f"\nNo logs available for container '{container_name}'")
829
+
830
+ return logs
831
+ except subprocess.CalledProcessError as e:
832
+ error_msg = f"Error getting logs: {e.stderr}"
833
+ logger.error(error_msg)
834
+ return error_msg
835
+ except Exception as e:
836
+ error_msg = f"Unexpected error getting logs: {str(e)}"
837
+ logger.error(error_msg)
838
+ return error_msg
839
+
840
+ async def get_ip(self, name: str, storage: Optional[str] = None, retry_delay: int = 2) -> str:
841
+ """Get the IP address of a VM, waiting indefinitely until it's available.
842
+
843
+ Args:
844
+ name: Name of the VM to get the IP for
845
+ storage: Optional storage path override
846
+ retry_delay: Delay between retries in seconds (default: 2)
847
+
848
+ Returns:
849
+ IP address of the VM when it becomes available
850
+ """
851
+ # Use container_name = name for consistency
852
+ self.container_name = name
853
+
854
+ # Track total attempts for logging purposes
855
+ total_attempts = 0
856
+
857
+ # Loop indefinitely until we get a valid IP
858
+ while True:
859
+ total_attempts += 1
860
+
861
+ # Log retry message but not on first attempt
862
+ if total_attempts > 1:
863
+ logger.info(f"Waiting for VM {name} IP address (attempt {total_attempts})...")
864
+
865
+ try:
866
+ # Get VM information
867
+ vm_info = await self.get_vm(name, storage=storage)
868
+
869
+ # Check if we got a valid IP
870
+ ip = vm_info.get("ip_address", None)
871
+ if ip and ip != "unknown" and not ip.startswith("0.0.0.0"):
872
+ logger.info(f"Got valid VM IP address: {ip}")
873
+ return ip
874
+
875
+ # Check the VM status
876
+ status = vm_info.get("status", "unknown")
877
+
878
+ # Special handling for Lumier: it may report "stopped" even when the VM is starting
879
+ # If the VM information contains an IP but status is stopped, it might be a race condition
880
+ if status == "stopped" and "ip_address" in vm_info:
881
+ ip = vm_info.get("ip_address")
882
+ if ip and ip != "unknown" and not ip.startswith("0.0.0.0"):
883
+ logger.info(f"Found valid IP {ip} despite VM status being {status}")
884
+ return ip
885
+ logger.info(f"VM status is {status}, but still waiting for IP to be assigned")
886
+ # If VM is not running yet, log and wait
887
+ elif status != "running":
888
+ logger.info(f"VM is not running yet (status: {status}). Waiting...")
889
+ # If VM is running but no IP yet, wait and retry
890
+ else:
891
+ logger.info("VM is running but no valid IP address yet. Waiting...")
892
+
893
+ except Exception as e:
894
+ logger.warning(f"Error getting VM {name} IP: {e}, continuing to wait...")
895
+
896
+ # Wait before next retry
897
+ await asyncio.sleep(retry_delay)
898
+
899
+ # Add progress log every 10 attempts
900
+ if total_attempts % 10 == 0:
901
+ logger.info(f"Still waiting for VM {name} IP after {total_attempts} attempts...")
902
+
903
+ async def __aenter__(self):
904
+ """Async context manager entry.
905
+
906
+ This method is called when entering an async context manager block.
907
+ Returns self to be used in the context.
908
+ """
909
+ logger.debug("Entering LumierProvider context")
910
+
911
+ # Initialize the API URL with the default value if not already set
912
+ # This ensures get_vm can work before run_vm is called
913
+ if not hasattr(self, '_api_url') or not self._api_url:
914
+ self._api_url = f"http://{self.host}:{self.api_port}"
915
+ logger.info(f"Initialized default Lumier API URL: {self._api_url}")
916
+
917
+ return self
918
+
919
+ async def __aexit__(self, exc_type, exc_val, exc_tb):
920
+ """Async context manager exit.
921
+
922
+ This method is called when exiting an async context manager block.
923
+ It handles proper cleanup of resources, including stopping any running containers.
924
+ """
925
+ logger.debug(f"Exiting LumierProvider context, handling exceptions: {exc_type}")
926
+ try:
927
+ # If we have a container ID, we should stop it to clean up resources
928
+ if hasattr(self, '_container_id') and self._container_id:
929
+ logger.info(f"Stopping Lumier container on context exit: {self.container_name}")
930
+ try:
931
+ cmd = ["docker", "stop", self.container_name]
932
+ subprocess.run(cmd, capture_output=True, text=True, check=True)
933
+ logger.info(f"Container stopped during context exit: {self.container_name}")
934
+ except subprocess.CalledProcessError as e:
935
+ logger.warning(f"Failed to stop container during cleanup: {e.stderr}")
936
+ # Don't raise an exception here, we want to continue with cleanup
937
+ except Exception as e:
938
+ logger.error(f"Error during LumierProvider cleanup: {e}")
939
+ # We don't want to suppress the original exception if there was one
940
+ if exc_type is None:
941
+ raise
942
+ # Return False to indicate that any exception should propagate
943
+ return False