golem-vm-provider 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- golem_vm_provider-0.1.0.dist-info/METADATA +398 -0
- golem_vm_provider-0.1.0.dist-info/RECORD +26 -0
- golem_vm_provider-0.1.0.dist-info/WHEEL +4 -0
- golem_vm_provider-0.1.0.dist-info/entry_points.txt +3 -0
- provider/__init__.py +3 -0
- provider/api/__init__.py +19 -0
- provider/api/models.py +108 -0
- provider/api/routes.py +159 -0
- provider/config.py +160 -0
- provider/discovery/__init__.py +6 -0
- provider/discovery/advertiser.py +179 -0
- provider/discovery/resource_tracker.py +152 -0
- provider/main.py +125 -0
- provider/network/port_verifier.py +287 -0
- provider/security/ethereum.py +41 -0
- provider/utils/ascii_art.py +79 -0
- provider/utils/logging.py +82 -0
- provider/utils/port_display.py +204 -0
- provider/utils/retry.py +48 -0
- provider/vm/__init__.py +31 -0
- provider/vm/cloud_init.py +67 -0
- provider/vm/models.py +205 -0
- provider/vm/multipass.py +427 -0
- provider/vm/name_mapper.py +108 -0
- provider/vm/port_manager.py +196 -0
- provider/vm/proxy_manager.py +239 -0
provider/api/routes.py
ADDED
@@ -0,0 +1,159 @@
|
|
1
|
+
import json
|
2
|
+
from typing import List
|
3
|
+
from pathlib import Path
|
4
|
+
from fastapi import APIRouter, HTTPException
|
5
|
+
|
6
|
+
from ..config import settings
|
7
|
+
from ..utils.logging import setup_logger, PROCESS, SUCCESS
|
8
|
+
from ..utils.ascii_art import vm_creation_animation, vm_status_change
|
9
|
+
from ..vm.models import VMInfo, VMStatus, VMAccessInfo, VMConfig, VMResources
|
10
|
+
from .models import CreateVMRequest
|
11
|
+
from ..vm.multipass import MultipassProvider, MultipassError
|
12
|
+
from ..discovery.resource_tracker import ResourceTracker
|
13
|
+
from ..vm.port_manager import PortManager
|
14
|
+
|
15
|
+
logger = setup_logger(__name__)
|
16
|
+
router = APIRouter()
|
17
|
+
|
18
|
+
# Initialize components
|
19
|
+
resource_tracker = ResourceTracker()
|
20
|
+
port_manager = PortManager()
|
21
|
+
provider = MultipassProvider(resource_tracker, port_manager)
|
22
|
+
|
23
|
+
@router.post("/vms", response_model=VMInfo)
|
24
|
+
async def create_vm(request: CreateVMRequest) -> VMInfo:
|
25
|
+
"""Create a new VM."""
|
26
|
+
try:
|
27
|
+
logger.info(f"📥 Received VM creation request for '{request.name}'")
|
28
|
+
|
29
|
+
# Determine resources based on request
|
30
|
+
resources = request.resources
|
31
|
+
if resources is None:
|
32
|
+
# This shouldn't happen due to validator, but just in case
|
33
|
+
resources = VMResources(cpu=1, memory=1, storage=10)
|
34
|
+
|
35
|
+
logger.info(f"📥 Using resources: {resources.cpu} CPU, {resources.memory}GB RAM, {resources.storage}GB storage")
|
36
|
+
|
37
|
+
# Validate against minimum requirements
|
38
|
+
if resources.cpu < settings.MIN_CPU_CORES:
|
39
|
+
logger.error(f"❌ CPU cores {resources.cpu} below minimum {settings.MIN_CPU_CORES}")
|
40
|
+
raise HTTPException(400, f"Minimum CPU cores required: {settings.MIN_CPU_CORES}")
|
41
|
+
if resources.memory < settings.MIN_MEMORY_GB:
|
42
|
+
logger.error(f"❌ Memory {resources.memory}GB below minimum {settings.MIN_MEMORY_GB}GB")
|
43
|
+
raise HTTPException(400, f"Minimum memory required: {settings.MIN_MEMORY_GB}GB")
|
44
|
+
if resources.storage < settings.MIN_STORAGE_GB:
|
45
|
+
logger.error(f"❌ Storage {resources.storage}GB below minimum {settings.MIN_STORAGE_GB}GB")
|
46
|
+
raise HTTPException(400, f"Minimum storage required: {settings.MIN_STORAGE_GB}GB")
|
47
|
+
|
48
|
+
# Check and allocate resources
|
49
|
+
logger.process("🔄 Allocating resources")
|
50
|
+
if not await resource_tracker.allocate(resources):
|
51
|
+
logger.error("❌ Insufficient resources available")
|
52
|
+
raise HTTPException(400, "Insufficient resources available on provider")
|
53
|
+
|
54
|
+
try:
|
55
|
+
# Create VM config
|
56
|
+
config = VMConfig(
|
57
|
+
name=request.name,
|
58
|
+
image=request.image or settings.DEFAULT_VM_IMAGE,
|
59
|
+
resources=resources,
|
60
|
+
ssh_key=request.ssh_key
|
61
|
+
)
|
62
|
+
|
63
|
+
# Create VM
|
64
|
+
logger.process(f"🔄 Creating VM with config: {config}")
|
65
|
+
vm_info = await provider.create_vm(config)
|
66
|
+
|
67
|
+
# Show success message
|
68
|
+
await vm_creation_animation(request.name)
|
69
|
+
return vm_info
|
70
|
+
except Exception as e:
|
71
|
+
# If VM creation fails, deallocate resources
|
72
|
+
logger.warning("⚠️ VM creation failed, deallocating resources")
|
73
|
+
await resource_tracker.deallocate(resources)
|
74
|
+
raise
|
75
|
+
|
76
|
+
except MultipassError as e:
|
77
|
+
logger.error(f"Failed to create VM: {e}")
|
78
|
+
raise HTTPException(500, str(e))
|
79
|
+
|
80
|
+
@router.get("/vms", response_model=List[VMInfo])
|
81
|
+
async def list_vms() -> List[VMInfo]:
|
82
|
+
"""List all VMs."""
|
83
|
+
try:
|
84
|
+
logger.info("📋 Listing all VMs")
|
85
|
+
vms = []
|
86
|
+
for vm_id in resource_tracker.get_allocated_vms():
|
87
|
+
vm_info = await provider.get_vm_status(vm_id)
|
88
|
+
vms.append(vm_info)
|
89
|
+
return vms
|
90
|
+
except MultipassError as e:
|
91
|
+
logger.error(f"Failed to list VMs: {e}")
|
92
|
+
raise HTTPException(500, str(e))
|
93
|
+
|
94
|
+
@router.get("/vms/{requestor_name}", response_model=VMInfo)
|
95
|
+
async def get_vm_status(requestor_name: str) -> VMInfo:
|
96
|
+
"""Get VM status."""
|
97
|
+
try:
|
98
|
+
logger.info(f"🔍 Getting status for VM '{requestor_name}'")
|
99
|
+
status = await provider.get_vm_status(requestor_name)
|
100
|
+
vm_status_change(requestor_name, status.status.value)
|
101
|
+
return status
|
102
|
+
except MultipassError as e:
|
103
|
+
logger.error(f"Failed to get VM status: {e}")
|
104
|
+
raise HTTPException(500, str(e))
|
105
|
+
|
106
|
+
@router.get("/vms/{requestor_name}/access", response_model=VMAccessInfo)
|
107
|
+
async def get_vm_access(requestor_name: str) -> VMAccessInfo:
|
108
|
+
"""Get VM access information."""
|
109
|
+
try:
|
110
|
+
# Get VM info
|
111
|
+
vm = await provider.get_vm_status(requestor_name)
|
112
|
+
if not vm:
|
113
|
+
raise HTTPException(404, "VM not found")
|
114
|
+
|
115
|
+
# Get multipass name from mapper
|
116
|
+
multipass_name = await provider.name_mapper.get_multipass_name(requestor_name)
|
117
|
+
if not multipass_name:
|
118
|
+
raise HTTPException(404, "VM mapping not found")
|
119
|
+
|
120
|
+
# Return access info with both names
|
121
|
+
return VMAccessInfo(
|
122
|
+
ssh_host=settings.PUBLIC_IP or "localhost",
|
123
|
+
ssh_port=vm.ssh_port or 22,
|
124
|
+
vm_id=requestor_name,
|
125
|
+
multipass_name=multipass_name
|
126
|
+
)
|
127
|
+
|
128
|
+
except MultipassError as e:
|
129
|
+
logger.error(f"Failed to get VM access info: {e}")
|
130
|
+
raise HTTPException(500, str(e))
|
131
|
+
|
132
|
+
@router.delete("/vms/{requestor_name}")
|
133
|
+
async def delete_vm(requestor_name: str) -> None:
|
134
|
+
"""Delete a VM.
|
135
|
+
|
136
|
+
Args:
|
137
|
+
requestor_name: Name of the VM as provided by requestor
|
138
|
+
"""
|
139
|
+
try:
|
140
|
+
logger.process(f"🗑️ Deleting VM '{requestor_name}'")
|
141
|
+
|
142
|
+
# Get multipass name from mapper
|
143
|
+
multipass_name = await provider.name_mapper.get_multipass_name(requestor_name)
|
144
|
+
if not multipass_name:
|
145
|
+
logger.warning(f"No multipass name found for VM '{requestor_name}' (may have been already deleted)")
|
146
|
+
return
|
147
|
+
|
148
|
+
try:
|
149
|
+
vm_status_change(requestor_name, "STOPPING", "Cleanup in progress")
|
150
|
+
await provider.delete_vm(requestor_name)
|
151
|
+
vm_status_change(requestor_name, "TERMINATED", "Cleanup complete")
|
152
|
+
logger.success(f"✨ Successfully deleted VM '{requestor_name}'")
|
153
|
+
except MultipassError as e:
|
154
|
+
logger.error(f"Failed to delete VM: {e}")
|
155
|
+
raise HTTPException(500, str(e))
|
156
|
+
|
157
|
+
except Exception as e:
|
158
|
+
logger.error(f"Failed to delete VM: {e}")
|
159
|
+
raise HTTPException(500, str(e))
|
provider/config.py
ADDED
@@ -0,0 +1,160 @@
|
|
1
|
+
import os
|
2
|
+
from pathlib import Path
|
3
|
+
from typing import Optional
|
4
|
+
import uuid
|
5
|
+
|
6
|
+
from pydantic import BaseSettings, validator
|
7
|
+
|
8
|
+
|
9
|
+
class Settings(BaseSettings):
|
10
|
+
"""Provider configuration settings."""
|
11
|
+
|
12
|
+
# API Settings
|
13
|
+
DEBUG: bool = True
|
14
|
+
HOST: str = "0.0.0.0"
|
15
|
+
PORT: int = 7466
|
16
|
+
|
17
|
+
# Provider Settings
|
18
|
+
PROVIDER_ID: str = "" # Will be set from Ethereum identity
|
19
|
+
PROVIDER_NAME: str = "golem-provider"
|
20
|
+
PROVIDER_COUNTRY: str = "SE"
|
21
|
+
ETHEREUM_KEY_DIR: str = ""
|
22
|
+
|
23
|
+
@validator("ETHEREUM_KEY_DIR", pre=True)
|
24
|
+
def resolve_key_dir(cls, v: str) -> str:
|
25
|
+
"""Resolve Ethereum key directory path."""
|
26
|
+
if not v:
|
27
|
+
return str(Path.home() / ".golem" / "provider" / "keys")
|
28
|
+
path = Path(v)
|
29
|
+
if not path.is_absolute():
|
30
|
+
path = Path.home() / path
|
31
|
+
return str(path)
|
32
|
+
|
33
|
+
@validator("PROVIDER_ID", always=True)
|
34
|
+
def get_or_create_provider_id(cls, v: str, values: dict) -> str:
|
35
|
+
"""Get or create provider ID from Ethereum identity."""
|
36
|
+
from provider.security.ethereum import EthereumIdentity
|
37
|
+
|
38
|
+
# If ID provided in env, use it
|
39
|
+
if v:
|
40
|
+
return v
|
41
|
+
|
42
|
+
# Get ID from Ethereum identity
|
43
|
+
key_dir = values.get("ETHEREUM_KEY_DIR")
|
44
|
+
identity = EthereumIdentity(key_dir)
|
45
|
+
return identity.get_or_create_identity()
|
46
|
+
|
47
|
+
# Discovery Service Settings
|
48
|
+
DISCOVERY_URL: str = "http://localhost:7465"
|
49
|
+
ADVERTISEMENT_INTERVAL: int = 240 # seconds
|
50
|
+
|
51
|
+
# VM Settings
|
52
|
+
MAX_VMS: int = 10
|
53
|
+
DEFAULT_VM_IMAGE: str = "ubuntu:24.04"
|
54
|
+
VM_DATA_DIR: str = ""
|
55
|
+
SSH_KEY_DIR: str = ""
|
56
|
+
|
57
|
+
@validator("VM_DATA_DIR", pre=True)
|
58
|
+
def resolve_vm_data_dir(cls, v: str) -> str:
|
59
|
+
"""Resolve VM data directory path."""
|
60
|
+
if not v:
|
61
|
+
return str(Path.home() / ".golem" / "provider" / "vms")
|
62
|
+
path = Path(v)
|
63
|
+
if not path.is_absolute():
|
64
|
+
path = Path.home() / path
|
65
|
+
return str(path)
|
66
|
+
|
67
|
+
@validator("SSH_KEY_DIR", pre=True)
|
68
|
+
def resolve_ssh_key_dir(cls, v: str) -> str:
|
69
|
+
"""Resolve SSH key directory path."""
|
70
|
+
if not v:
|
71
|
+
return str(Path.home() / ".golem" / "provider" / "ssh")
|
72
|
+
path = Path(v)
|
73
|
+
if not path.is_absolute():
|
74
|
+
path = Path.home() / path
|
75
|
+
return str(path)
|
76
|
+
|
77
|
+
# Resource Settings
|
78
|
+
MIN_MEMORY_GB: int = 1
|
79
|
+
MIN_STORAGE_GB: int = 10
|
80
|
+
MIN_CPU_CORES: int = 1
|
81
|
+
|
82
|
+
# Resource Thresholds (%)
|
83
|
+
CPU_THRESHOLD: int = 90
|
84
|
+
MEMORY_THRESHOLD: int = 85
|
85
|
+
STORAGE_THRESHOLD: int = 90
|
86
|
+
|
87
|
+
# Rate Limiting
|
88
|
+
RATE_LIMIT_PER_MINUTE: int = 100
|
89
|
+
|
90
|
+
# Multipass Settings
|
91
|
+
MULTIPASS_BINARY_PATH: str = ""
|
92
|
+
|
93
|
+
@validator("MULTIPASS_BINARY_PATH", pre=True)
|
94
|
+
def detect_multipass_path(cls, v: str) -> str:
|
95
|
+
"""Detect and validate Multipass binary path."""
|
96
|
+
if v:
|
97
|
+
path = v
|
98
|
+
else:
|
99
|
+
# Common Multipass binary locations
|
100
|
+
binary_name = "multipass"
|
101
|
+
search_paths = [
|
102
|
+
"/usr/local/bin", # Common Unix/Linux
|
103
|
+
"/usr/bin", # Linux
|
104
|
+
"/opt/homebrew/bin", # macOS M1 (Homebrew)
|
105
|
+
"/snap/bin", # Linux (Snap)
|
106
|
+
]
|
107
|
+
|
108
|
+
# Search for multipass binary
|
109
|
+
for directory in search_paths:
|
110
|
+
path = os.path.join(directory, binary_name)
|
111
|
+
if os.path.isfile(path) and os.access(path, os.X_OK):
|
112
|
+
return path
|
113
|
+
|
114
|
+
raise ValueError(
|
115
|
+
"Multipass binary not found. Please install Multipass or set "
|
116
|
+
"GOLEM_PROVIDER_MULTIPASS_BINARY_PATH to your Multipass binary path."
|
117
|
+
)
|
118
|
+
|
119
|
+
# Validate the path
|
120
|
+
if not os.path.isfile(path):
|
121
|
+
raise ValueError(f"Multipass binary not found at: {path}")
|
122
|
+
if not os.access(path, os.X_OK):
|
123
|
+
raise ValueError(f"Multipass binary at {path} is not executable")
|
124
|
+
return path
|
125
|
+
|
126
|
+
# Proxy Settings
|
127
|
+
PORT_RANGE_START: int = 50800
|
128
|
+
PORT_RANGE_END: int = 50900
|
129
|
+
PROXY_STATE_DIR: str = ""
|
130
|
+
PUBLIC_IP: Optional[str] = None
|
131
|
+
|
132
|
+
@validator("PROXY_STATE_DIR", pre=True)
|
133
|
+
def resolve_proxy_state_dir(cls, v: str) -> str:
|
134
|
+
"""Resolve proxy state directory path."""
|
135
|
+
if not v:
|
136
|
+
return str(Path.home() / ".golem" / "provider" / "proxy")
|
137
|
+
path = Path(v)
|
138
|
+
if not path.is_absolute():
|
139
|
+
path = Path.home() / path
|
140
|
+
return str(path)
|
141
|
+
|
142
|
+
@validator("PUBLIC_IP", pre=True)
|
143
|
+
def get_public_ip(cls, v: Optional[str]) -> Optional[str]:
|
144
|
+
"""Get public IP if set to 'auto'."""
|
145
|
+
if v == "auto":
|
146
|
+
try:
|
147
|
+
import requests
|
148
|
+
response = requests.get("https://api.ipify.org")
|
149
|
+
return response.text.strip()
|
150
|
+
except Exception:
|
151
|
+
return None
|
152
|
+
return v
|
153
|
+
|
154
|
+
class Config:
|
155
|
+
env_prefix = "GOLEM_PROVIDER_"
|
156
|
+
case_sensitive = True
|
157
|
+
|
158
|
+
|
159
|
+
# Global settings instance
|
160
|
+
settings = Settings()
|
@@ -0,0 +1,179 @@
|
|
1
|
+
import aiohttp
|
2
|
+
import asyncio
|
3
|
+
import logging
|
4
|
+
import psutil
|
5
|
+
from datetime import datetime
|
6
|
+
from typing import Dict, Optional
|
7
|
+
|
8
|
+
from ..config import settings
|
9
|
+
from ..utils.retry import async_retry
|
10
|
+
|
11
|
+
logger = logging.getLogger(__name__)
|
12
|
+
|
13
|
+
class ResourceMonitor:
|
14
|
+
"""Monitor system resources."""
|
15
|
+
|
16
|
+
@staticmethod
|
17
|
+
def get_cpu_count() -> int:
|
18
|
+
"""Get number of CPU cores."""
|
19
|
+
return psutil.cpu_count()
|
20
|
+
|
21
|
+
@staticmethod
|
22
|
+
def get_memory_gb() -> int:
|
23
|
+
"""Get available memory in GB."""
|
24
|
+
return psutil.virtual_memory().available // (1024 ** 3)
|
25
|
+
|
26
|
+
@staticmethod
|
27
|
+
def get_storage_gb() -> int:
|
28
|
+
"""Get available storage in GB."""
|
29
|
+
return psutil.disk_usage("/").free // (1024 ** 3)
|
30
|
+
|
31
|
+
@staticmethod
|
32
|
+
def get_cpu_percent() -> float:
|
33
|
+
"""Get CPU usage percentage."""
|
34
|
+
return psutil.cpu_percent(interval=1)
|
35
|
+
|
36
|
+
@staticmethod
|
37
|
+
def get_memory_percent() -> float:
|
38
|
+
"""Get memory usage percentage."""
|
39
|
+
return psutil.virtual_memory().percent
|
40
|
+
|
41
|
+
@staticmethod
|
42
|
+
def get_storage_percent() -> float:
|
43
|
+
"""Get storage usage percentage."""
|
44
|
+
return psutil.disk_usage("/").percent
|
45
|
+
|
46
|
+
class ResourceAdvertiser:
|
47
|
+
"""Advertise available resources to discovery service."""
|
48
|
+
|
49
|
+
def __init__(
|
50
|
+
self,
|
51
|
+
resource_tracker: 'ResourceTracker',
|
52
|
+
discovery_url: Optional[str] = None,
|
53
|
+
provider_id: Optional[str] = None,
|
54
|
+
update_interval: Optional[int] = None
|
55
|
+
):
|
56
|
+
self.resource_tracker = resource_tracker
|
57
|
+
self.discovery_url = discovery_url or settings.DISCOVERY_URL
|
58
|
+
self.provider_id = provider_id or settings.PROVIDER_ID
|
59
|
+
self.update_interval = update_interval or settings.ADVERTISEMENT_INTERVAL
|
60
|
+
self.session: Optional[aiohttp.ClientSession] = None
|
61
|
+
self._stop_event = asyncio.Event()
|
62
|
+
|
63
|
+
async def start(self):
|
64
|
+
"""Start advertising resources."""
|
65
|
+
self.session = aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=10))
|
66
|
+
# Register for resource updates
|
67
|
+
self.resource_tracker.on_update(self._post_advertisement)
|
68
|
+
|
69
|
+
# Test discovery service connection with retries
|
70
|
+
try:
|
71
|
+
await self._check_discovery_health()
|
72
|
+
except Exception as e:
|
73
|
+
logger.warning(f"Could not connect to discovery service after retries, continuing without advertising: {e}")
|
74
|
+
return
|
75
|
+
|
76
|
+
try:
|
77
|
+
while not self._stop_event.is_set():
|
78
|
+
try:
|
79
|
+
await self._post_advertisement()
|
80
|
+
except aiohttp.ClientError as e:
|
81
|
+
logger.error(f"Network error posting advertisement: {e}")
|
82
|
+
await asyncio.sleep(min(60, self.update_interval))
|
83
|
+
except Exception as e:
|
84
|
+
logger.error(f"Failed to post advertisement: {e}")
|
85
|
+
await asyncio.sleep(min(60, self.update_interval))
|
86
|
+
else:
|
87
|
+
await asyncio.sleep(self.update_interval)
|
88
|
+
finally:
|
89
|
+
await self.stop()
|
90
|
+
|
91
|
+
async def stop(self):
|
92
|
+
"""Stop advertising resources."""
|
93
|
+
self._stop_event.set()
|
94
|
+
if self.session:
|
95
|
+
await self.session.close()
|
96
|
+
self.session = None
|
97
|
+
|
98
|
+
@async_retry(retries=5, delay=1.0, backoff=2.0, exceptions=(aiohttp.ClientError, asyncio.TimeoutError))
|
99
|
+
async def _check_discovery_health(self):
|
100
|
+
"""Check discovery service health with retries."""
|
101
|
+
if not self.session:
|
102
|
+
raise RuntimeError("Session not initialized")
|
103
|
+
|
104
|
+
async with self.session.get(f"{self.discovery_url}/health") as response:
|
105
|
+
if not response.ok:
|
106
|
+
raise Exception(f"Discovery service health check failed: {response.status}")
|
107
|
+
|
108
|
+
@async_retry(retries=3, delay=1.0, backoff=2.0, exceptions=(aiohttp.ClientError, asyncio.TimeoutError))
|
109
|
+
async def _post_advertisement(self):
|
110
|
+
"""Post resource advertisement to discovery service."""
|
111
|
+
if not self.session:
|
112
|
+
raise RuntimeError("Session not initialized")
|
113
|
+
|
114
|
+
resources = self.resource_tracker.get_available_resources()
|
115
|
+
|
116
|
+
# Don't advertise if resources are too low
|
117
|
+
if not self.resource_tracker._meets_minimum_requirements(resources):
|
118
|
+
logger.warning("Resources too low, skipping advertisement")
|
119
|
+
return
|
120
|
+
|
121
|
+
# Get public IP with retries
|
122
|
+
try:
|
123
|
+
ip_address = await self._get_public_ip()
|
124
|
+
except Exception as e:
|
125
|
+
logger.error(f"Could not get public IP after retries: {e}")
|
126
|
+
return
|
127
|
+
|
128
|
+
try:
|
129
|
+
async with self.session.post(
|
130
|
+
f"{self.discovery_url}/api/v1/advertisements",
|
131
|
+
headers={
|
132
|
+
"X-Provider-ID": self.provider_id,
|
133
|
+
"X-Provider-Signature": "signature", # TODO: Implement signing
|
134
|
+
"Content-Type": "application/json"
|
135
|
+
},
|
136
|
+
json={
|
137
|
+
"ip_address": ip_address,
|
138
|
+
"country": settings.PROVIDER_COUNTRY,
|
139
|
+
"resources": resources
|
140
|
+
},
|
141
|
+
timeout=aiohttp.ClientTimeout(total=5) # 5 second timeout for advertisement
|
142
|
+
) as response:
|
143
|
+
if not response.ok:
|
144
|
+
error_text = await response.text()
|
145
|
+
raise Exception(
|
146
|
+
f"Failed to post advertisement: {response.status} - {error_text}"
|
147
|
+
)
|
148
|
+
logger.info(
|
149
|
+
f"Posted advertisement with resources: CPU={resources['cpu']}, "
|
150
|
+
f"Memory={resources['memory']}GB, Storage={resources['storage']}GB"
|
151
|
+
)
|
152
|
+
except asyncio.TimeoutError:
|
153
|
+
logger.error("Advertisement request timed out")
|
154
|
+
raise
|
155
|
+
|
156
|
+
@async_retry(retries=3, delay=1.0, backoff=2.0, exceptions=(aiohttp.ClientError, asyncio.TimeoutError))
|
157
|
+
async def _get_public_ip(self) -> str:
|
158
|
+
"""Get public IP address with retries."""
|
159
|
+
if not self.session:
|
160
|
+
raise RuntimeError("Session not initialized")
|
161
|
+
|
162
|
+
# Try multiple IP services in case one fails
|
163
|
+
services = [
|
164
|
+
"https://api.ipify.org",
|
165
|
+
"https://ifconfig.me/ip",
|
166
|
+
"https://api.my-ip.io/ip"
|
167
|
+
]
|
168
|
+
|
169
|
+
errors = []
|
170
|
+
for service in services:
|
171
|
+
try:
|
172
|
+
async with self.session.get(service) as response:
|
173
|
+
if response.ok:
|
174
|
+
return (await response.text()).strip()
|
175
|
+
except Exception as e:
|
176
|
+
errors.append(f"{service}: {str(e)}")
|
177
|
+
continue
|
178
|
+
|
179
|
+
raise Exception(f"Failed to get public IP address from all services: {'; '.join(errors)}")
|
@@ -0,0 +1,152 @@
|
|
1
|
+
import asyncio
|
2
|
+
import logging
|
3
|
+
from typing import Dict, List, Callable, Optional
|
4
|
+
from ..vm.models import VMResources
|
5
|
+
from ..config import settings
|
6
|
+
|
7
|
+
logger = logging.getLogger(__name__)
|
8
|
+
|
9
|
+
class ResourceTracker:
|
10
|
+
"""Track and manage provider resources."""
|
11
|
+
|
12
|
+
def __init__(self):
|
13
|
+
"""Initialize resource tracker."""
|
14
|
+
from .advertiser import ResourceMonitor
|
15
|
+
self.total_resources = {
|
16
|
+
"cpu": ResourceMonitor.get_cpu_count(),
|
17
|
+
"memory": ResourceMonitor.get_memory_gb(),
|
18
|
+
"storage": ResourceMonitor.get_storage_gb()
|
19
|
+
}
|
20
|
+
self.allocated_resources = {
|
21
|
+
"cpu": 0,
|
22
|
+
"memory": 0,
|
23
|
+
"storage": 0
|
24
|
+
}
|
25
|
+
self._lock = asyncio.Lock()
|
26
|
+
self._update_callbacks: List[Callable] = []
|
27
|
+
self._allocated_vms: Dict[str, VMResources] = {}
|
28
|
+
|
29
|
+
def _can_allocate(self, resources: VMResources) -> bool:
|
30
|
+
"""Check if resources can be allocated."""
|
31
|
+
available = self.get_available_resources()
|
32
|
+
return (
|
33
|
+
resources.cpu <= available["cpu"] and
|
34
|
+
resources.memory <= available["memory"] and
|
35
|
+
resources.storage <= available["storage"]
|
36
|
+
)
|
37
|
+
|
38
|
+
def _meets_minimum_requirements(self, resources: Dict[str, int]) -> bool:
|
39
|
+
"""Check if available resources meet minimum requirements."""
|
40
|
+
return (
|
41
|
+
resources["cpu"] >= settings.MIN_CPU_CORES and
|
42
|
+
resources["memory"] >= settings.MIN_MEMORY_GB and
|
43
|
+
resources["storage"] >= settings.MIN_STORAGE_GB
|
44
|
+
)
|
45
|
+
|
46
|
+
async def allocate(self, resources: VMResources, vm_id: Optional[str] = None) -> bool:
|
47
|
+
"""Allocate resources for a VM."""
|
48
|
+
async with self._lock:
|
49
|
+
if not self._can_allocate(resources):
|
50
|
+
return False
|
51
|
+
|
52
|
+
self.allocated_resources["cpu"] += resources.cpu
|
53
|
+
self.allocated_resources["memory"] += resources.memory
|
54
|
+
self.allocated_resources["storage"] += resources.storage
|
55
|
+
|
56
|
+
if vm_id:
|
57
|
+
self._allocated_vms[vm_id] = resources
|
58
|
+
|
59
|
+
logger.info(
|
60
|
+
f"Allocated resources: CPU={resources.cpu}, "
|
61
|
+
f"Memory={resources.memory}GB, Storage={resources.storage}GB"
|
62
|
+
)
|
63
|
+
|
64
|
+
await self._notify_update()
|
65
|
+
return True
|
66
|
+
|
67
|
+
async def deallocate(self, resources: VMResources, vm_id: Optional[str] = None) -> None:
|
68
|
+
"""Deallocate resources from a VM."""
|
69
|
+
async with self._lock:
|
70
|
+
self.allocated_resources["cpu"] = max(
|
71
|
+
0, self.allocated_resources["cpu"] - resources.cpu
|
72
|
+
)
|
73
|
+
self.allocated_resources["memory"] = max(
|
74
|
+
0, self.allocated_resources["memory"] - resources.memory
|
75
|
+
)
|
76
|
+
self.allocated_resources["storage"] = max(
|
77
|
+
0, self.allocated_resources["storage"] - resources.storage
|
78
|
+
)
|
79
|
+
|
80
|
+
if vm_id and vm_id in self._allocated_vms:
|
81
|
+
del self._allocated_vms[vm_id]
|
82
|
+
|
83
|
+
logger.info(
|
84
|
+
f"Deallocated resources: CPU={resources.cpu}, "
|
85
|
+
f"Memory={resources.memory}GB, Storage={resources.storage}GB"
|
86
|
+
)
|
87
|
+
|
88
|
+
await self._notify_update()
|
89
|
+
|
90
|
+
def get_allocated_vms(self) -> List[str]:
|
91
|
+
"""Get list of allocated VM IDs."""
|
92
|
+
return list(self._allocated_vms.keys())
|
93
|
+
|
94
|
+
def get_available_resources(self) -> Dict[str, int]:
|
95
|
+
"""Get currently available resources."""
|
96
|
+
return {
|
97
|
+
"cpu": max(0, self.total_resources["cpu"] - self.allocated_resources["cpu"]),
|
98
|
+
"memory": max(0, self.total_resources["memory"] - self.allocated_resources["memory"]),
|
99
|
+
"storage": max(0, self.total_resources["storage"] - self.allocated_resources["storage"])
|
100
|
+
}
|
101
|
+
|
102
|
+
def can_accept_resources(self, resources: VMResources) -> bool:
|
103
|
+
"""Check if resources can be accepted."""
|
104
|
+
available = self.get_available_resources()
|
105
|
+
return (
|
106
|
+
resources.cpu <= available["cpu"] and
|
107
|
+
resources.memory <= available["memory"] and
|
108
|
+
resources.storage <= available["storage"] and
|
109
|
+
self._meets_minimum_requirements(available)
|
110
|
+
)
|
111
|
+
|
112
|
+
def on_update(self, callback: Callable) -> None:
|
113
|
+
"""Register callback for resource updates."""
|
114
|
+
self._update_callbacks.append(callback)
|
115
|
+
|
116
|
+
async def _notify_update(self) -> None:
|
117
|
+
"""Notify all registered callbacks of resource update."""
|
118
|
+
for callback in self._update_callbacks:
|
119
|
+
try:
|
120
|
+
await callback()
|
121
|
+
except Exception as e:
|
122
|
+
logger.error(f"Error in resource update callback: {e}")
|
123
|
+
|
124
|
+
async def sync_with_multipass(self, vm_resources: Dict[str, VMResources]) -> None:
|
125
|
+
"""Sync resource tracker state with actual multipass VM states.
|
126
|
+
|
127
|
+
Args:
|
128
|
+
vm_resources: Dictionary mapping VM names to their resources
|
129
|
+
"""
|
130
|
+
async with self._lock:
|
131
|
+
# Reset allocated resources
|
132
|
+
self.allocated_resources = {
|
133
|
+
"cpu": 0,
|
134
|
+
"memory": 0,
|
135
|
+
"storage": 0
|
136
|
+
}
|
137
|
+
self._allocated_vms.clear()
|
138
|
+
|
139
|
+
# Add resources for each running VM
|
140
|
+
for vm_name, resources in vm_resources.items():
|
141
|
+
self.allocated_resources["cpu"] += resources.cpu
|
142
|
+
self.allocated_resources["memory"] += resources.memory
|
143
|
+
self.allocated_resources["storage"] += resources.storage
|
144
|
+
self._allocated_vms[vm_name] = resources
|
145
|
+
|
146
|
+
logger.info(
|
147
|
+
f"Synced allocated resources: CPU={self.allocated_resources['cpu']}, "
|
148
|
+
f"Memory={self.allocated_resources['memory']}GB, "
|
149
|
+
f"Storage={self.allocated_resources['storage']}GB"
|
150
|
+
)
|
151
|
+
|
152
|
+
await self._notify_update()
|