golem-vm-provider 0.1.57__py3-none-any.whl → 0.1.59__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: golem-vm-provider
3
- Version: 0.1.57
3
+ Version: 0.1.59
4
4
  Summary: VM on Golem Provider Node - Run your own provider node to offer VMs on the Golem Network
5
5
  Keywords: golem,vm,provider,cloud,decentralized
6
6
  Author: Phillip Jensen
@@ -470,6 +470,19 @@ golem-provider start
470
470
  GOLEM_PROVIDER_ENVIRONMENT=development golem-provider start --network testnet
471
471
  ```
472
472
 
473
+ Run as a background service (no terminal):
474
+
475
+ ```bash
476
+ # Start in background and write a PID file
477
+ golem-provider start --daemon [--network testnet|mainnet]
478
+
479
+ # Stop the background process
480
+ golem-provider stop
481
+
482
+ # Check environment and port health (unchanged)
483
+ golem-provider status [--json]
484
+ ```
485
+
473
486
  ### Mode vs. Network
474
487
 
475
488
  - Development Mode (`GOLEM_PROVIDER_ENVIRONMENT=development`)
@@ -1,45 +1,46 @@
1
1
  provider/__init__.py,sha256=HO1fkPpZqPO3z8O8-eVIyx8xXSMIVuTR_b1YF0RtXOg,45
2
2
  provider/api/__init__.py,sha256=ssX1ugDqEPt8Fn04IymgmG-Ev8PiXLsCSaiZVvHQnec,344
3
- provider/api/models.py,sha256=CmfgXqSH3m0HLqY6JvUFI-2IrdGf3EhNKtZ5kbIAX-U,4304
4
- provider/api/routes.py,sha256=tH6_msflEgx4O6nMku_Lgg4OW-JonqXHv89NibDFc94,13678
5
- provider/config.py,sha256=nQzYBujgn-Z7Rqh6q0eOsTpk6R9-V-YF1OysmPpSH0Q,28993
6
- provider/container.py,sha256=xN1a9qClciGomppCBnEGuPPNzGQkYIWlw1lzexrjptM,3726
3
+ provider/api/models.py,sha256=LcEWVUE8zvX_9ByyzbyoZGiDIJf_4MLDYqX7_nRO6B0,4754
4
+ provider/api/routes.py,sha256=4luagwoqW84PPvQRE3LR-W5XfoHpAbG0NK2fWnE1qSo,20228
5
+ provider/config.py,sha256=65L47ByUXoyvhD5DSsIYbM3yjlwFB5j4L6-__kAlkW0,29186
6
+ provider/container.py,sha256=QCYlIvR1m0ediA9PwJ_OXwZU3Ye-R7G6ZGsXFDji_jQ,3957
7
7
  provider/data/deployments/l2.json,sha256=XTNN2C5LkBfp4YbDKdUKfWMdp1fKnfv8D3TgcwVWxtQ,249
8
8
  provider/discovery/__init__.py,sha256=Y6o8RxGevBpuQS3k32y-zSVbP6HBXG3veBl9ElVPKaU,349
9
- provider/discovery/advertiser.py,sha256=o-LiDl1j0lXMUU0-zPe3qerjpoD2360EA60Y_V_VeBc,6571
10
- provider/discovery/golem_base_advertiser.py,sha256=A8bg40b2Ua7PIjx3Y8-SC0s-dUUPWxaiQCzr6AcpYaQ,7334
9
+ provider/discovery/advertiser.py,sha256=SZ4EN7zChEL9g-Dt4vqED8dAraGS-_4Iyx4F9AiVk9o,7056
10
+ provider/discovery/golem_base_advertiser.py,sha256=LGvPLljPxh6i-80OeZnezdphtHu3Tf6nkLE8zRYY2b0,7849
11
11
  provider/discovery/golem_base_utils.py,sha256=xk7vznhMgzrn0AuGyk6-9N9ukp9oPdBbbk1RI-sVjp0,607
12
12
  provider/discovery/multi_advertiser.py,sha256=_J79wA1-XQ4GsLzt9KrKpWigGSGBqtut7DaocIk2fyE,991
13
13
  provider/discovery/resource_monitor.py,sha256=AmiEc7yBGEGXCunQ-QKmVgosDX3gOhK1Y58LJZXrwAs,949
14
14
  provider/discovery/resource_tracker.py,sha256=MP7IXd3aIMsjB4xz5Oj9zFDTEnvrnw-Cyxpl33xcJcc,6006
15
- provider/discovery/service.py,sha256=vX_mVSxvn3arnb2cKDM_SeJp1ZgPdImP2aUubeXgdRg,915
16
- provider/main.py,sha256=2FicpbL8113Gvw3qQzhVHdpYixrNgIYbqwYI0nJaqRI,55746
15
+ provider/discovery/service.py,sha256=0H4H8HuFP52xE6lrXTOUHfvaIVjbT1uzcnlPxEswjNc,1000
16
+ provider/jobs/store.py,sha256=gLT5tWS7RmxGFfpjriECl4Kfn7osCP2woA8plKTZM4g,3958
17
+ provider/main.py,sha256=CFMlTeXAkTnE8zRVEGOdkbICzFYwwoZUjBPsUOLJsoM,64479
17
18
  provider/network/port_verifier.py,sha256=mlSzr9Z-W5Z5mL3EYg4zemgGoi8Z5ebNoeFgLGRaoH4,13253
18
19
  provider/payments/blockchain_service.py,sha256=4GrzDKwCSUVoENqjD4RLyJ0qwBOJKMyVk5Li-XNsyTc,3567
19
- provider/payments/monitor.py,sha256=seo8vE622IdbcRE3x69IpvHn2mel_tlMNGt_DxOIoww,5386
20
+ provider/payments/monitor.py,sha256=qBykIXD_Fks7_VS7Mh3Zb99HMC4x4Z_2PlYaMyfBORc,6069
20
21
  provider/payments/stream_map.py,sha256=qk6Y8hS72DplAifZ0ZMWPHBAyc_3IWIQyWUBuCU3_To,1191
21
22
  provider/security/ethereum.py,sha256=EwPZj4JR8OEpto6LhKjuuT3Z9pBX6P7-UQaqJtqFkYQ,1242
22
23
  provider/security/faucet.py,sha256=8T4lW1fVQgUk8EQILgbrr9UUosw9e7eA40tlZ2_KCPQ,4368
23
24
  provider/security/l2_faucet.py,sha256=yRV4xdPBgU8-LDTLqtuAijfgIoe2kYxvXqJLxFd-BVI,2662
24
- provider/service.py,sha256=hlQn0woppsYFHZDMEgq-40cOjmiPWruiWLy_dQvaCRU,6859
25
+ provider/service.py,sha256=lX017IDJYyW-zufoKts3GQptiJCR9MziQTKkJzXCk-k,8285
25
26
  provider/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
26
27
  provider/utils/ascii_art.py,sha256=ykBFsztk57GIiz1NJ-EII5UvN74iECqQL4h9VmiW6Z8,3161
27
28
  provider/utils/logging.py,sha256=1Br806ohJyYpDIw1i8NsNpg8Xc-8-rUYwKBU4LFomLk,2623
28
29
  provider/utils/port_display.py,sha256=u1HWQFA2kPbsM-TnsQfL6Hr4KmjIZWZfsjoxarHpbW0,11981
29
- provider/utils/pricing.py,sha256=eDEjt0s7REyTR-7b_3D_a_yPCnQ4req2KvtemYrE2Kw,6673
30
+ provider/utils/pricing.py,sha256=YeIeacjhb4wD0PucIKwimTy565N-8S6KJdmwhksXPtU,6716
30
31
  provider/utils/retry.py,sha256=GvBjpr0DpTOgw28M2hI0yt17dpYLRwrxUUqVxWHQPtM,3148
31
32
  provider/utils/setup.py,sha256=Z5dLuBQkb5vdoQsu1HJZwXmu9NWsiBYJ7Vq9-C-_tY8,2932
32
33
  provider/vm/__init__.py,sha256=LJL504QGbqZvBbMN3G9ixMgAwvOWAKW37zUm_EiaW9M,508
33
34
  provider/vm/cloud_init.py,sha256=E5dDH7dqStRcJNDfbarBBe83-c9N63W8B5ycIrHI8eU,4627
34
35
  provider/vm/models.py,sha256=hNeXgOnXWyeSiYt07Pdks0B20cDi_VC8jV-tCxULNng,6350
35
36
  provider/vm/multipass.py,sha256=rjO3GtuS4O_wXyYXSUiDGWYtQV2LpGxm6kITrA-ghBQ,617
36
- provider/vm/multipass_adapter.py,sha256=BUC9thQqzKVZqpSWMI9Nbx-YMz-8OeYFj2bAFzFAjg8,10621
37
+ provider/vm/multipass_adapter.py,sha256=HW4_7cs3O_SCi1unfIvhC44duO561bpO6yrHGkqhLQA,11578
37
38
  provider/vm/name_mapper.py,sha256=14nKfCjJ1WkXfC4vnCYIxNGQUwcl2vcxrJYUAz4fL40,4073
38
39
  provider/vm/port_manager.py,sha256=iYSwjTjD_ziOhG8aI7juKHw1OwwRUTJQyQoRUNQvz9w,12514
39
40
  provider/vm/provider.py,sha256=A7QN89EJjcSS40_SmKeinG1Jp_NGffJaLse-XdKciAs,1164
40
41
  provider/vm/proxy_manager.py,sha256=n4NTsyz2rtrvjtf_ceKBk-g2q_mzqPwruB1q7UlQVBc,14928
41
42
  provider/vm/service.py,sha256=Ki4SGNIZUq3XmaPMwAOoNzdZzKQsmFXid374wgjFPes,4636
42
- golem_vm_provider-0.1.57.dist-info/METADATA,sha256=qsWTEj2YWwwpv_NTw4f6cFqFulMm5NZvMUP2iAK8-B8,20932
43
- golem_vm_provider-0.1.57.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
44
- golem_vm_provider-0.1.57.dist-info/entry_points.txt,sha256=5Jiie1dIXygmxmDW66bKKxQpmBLJ7leSKRrb8bkQALw,52
45
- golem_vm_provider-0.1.57.dist-info/RECORD,,
43
+ golem_vm_provider-0.1.59.dist-info/METADATA,sha256=gk1kKhCSdXFCqD5sHNC81F8nl3PtgNSp1Rwxwpk2CXg,21221
44
+ golem_vm_provider-0.1.59.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
45
+ golem_vm_provider-0.1.59.dist-info/entry_points.txt,sha256=5Jiie1dIXygmxmDW66bKKxQpmBLJ7leSKRrb8bkQALw,52
46
+ golem_vm_provider-0.1.59.dist-info/RECORD,,
provider/api/models.py CHANGED
@@ -116,6 +116,9 @@ class ProviderInfoResponse(BaseModel):
116
116
  provider_id: str
117
117
  stream_payment_address: str
118
118
  glm_token_address: str
119
+ ip_address: Optional[str] = None
120
+ country: Optional[str] = None
121
+ platform: Optional[str] = None
119
122
 
120
123
 
121
124
  class StreamOnChain(BaseModel):
@@ -144,3 +147,10 @@ class StreamStatus(BaseModel):
144
147
  computed: StreamComputed
145
148
  verified: bool
146
149
  reason: str
150
+
151
+
152
+ class CreateVMJobResponse(BaseModel):
153
+ """Lightweight response for async VM creation scheduling."""
154
+ job_id: str = Field(..., description="Server-side job identifier for creation task")
155
+ vm_id: str = Field(..., description="Requestor VM identifier (name)")
156
+ status: str = Field("creating", description="Initial status indicator")
provider/api/routes.py CHANGED
@@ -2,17 +2,28 @@ import json
2
2
  import os
3
3
  from typing import List
4
4
  from pathlib import Path
5
- from fastapi import APIRouter, HTTPException, Request
5
+ import asyncio
6
+ import uuid
7
+ from fastapi import APIRouter, HTTPException, Request, Query
8
+ from fastapi.responses import JSONResponse
6
9
 
7
10
  from dependency_injector.wiring import inject, Provide
8
11
  from fastapi import APIRouter, HTTPException, Depends
9
12
 
10
13
  from typing import TYPE_CHECKING, Any
11
14
  from ..container import Container
15
+ from ..jobs.store import JobStore
12
16
  from ..utils.logging import setup_logger
13
17
  from ..utils.ascii_art import vm_creation_animation, vm_status_change
14
18
  from ..vm.models import VMInfo, VMAccessInfo, VMConfig, VMResources, VMNotFoundError
15
- from .models import CreateVMRequest, ProviderInfoResponse, StreamStatus, StreamOnChain, StreamComputed
19
+ from .models import (
20
+ CreateVMRequest,
21
+ ProviderInfoResponse,
22
+ StreamStatus,
23
+ StreamOnChain,
24
+ StreamComputed,
25
+ CreateVMJobResponse,
26
+ )
16
27
  from ..payments.blockchain_service import StreamPaymentReader
17
28
  from ..vm.service import VMService
18
29
  from ..vm.multipass_adapter import MultipassError
@@ -20,28 +31,30 @@ from ..vm.multipass_adapter import MultipassError
20
31
  logger = setup_logger(__name__)
21
32
  router = APIRouter()
22
33
 
34
+ # Job status persisted in SQLite via JobStore (see Container.job_store)
23
35
 
24
- @router.post("/vms", response_model=VMInfo)
36
+
37
+ @router.post("/vms")
25
38
  @inject
26
39
  async def create_vm(
27
40
  request: CreateVMRequest,
28
41
  vm_service: VMService = Depends(Provide[Container.vm_service]),
29
42
  settings: Any = Depends(Provide[Container.config]),
30
43
  stream_map = Depends(Provide[Container.stream_map]),
31
- ) -> VMInfo:
32
- """Create a new VM."""
44
+ job_store: JobStore = Depends(Provide[Container.job_store]),
45
+ async_mode: bool = Query(default=False, alias="async"),
46
+ ) -> Any:
47
+ """Create a VM (sync by default; async when `?async=true`)."""
33
48
  try:
34
- logger.info(f"📥 Received VM creation request for '{request.name}'")
35
-
49
+ logger.info(f"📥 Received VM creation request for '{request.name}' (async={async_mode})")
50
+
36
51
  resources = request.resources or VMResources()
37
52
 
38
53
  # If payments are enabled, require a valid stream before starting
39
- # Determine if we should enforce gating
40
54
  enforce = False
41
55
  spa = (settings.get("STREAM_PAYMENT_ADDRESS") if isinstance(settings, dict) else getattr(settings, "STREAM_PAYMENT_ADDRESS", None))
42
56
  if spa and spa != "0x0000000000000000000000000000000000000000":
43
57
  if os.environ.get("PYTEST_CURRENT_TEST"):
44
- # In pytest, skip gating only when using default deployment address
45
58
  try:
46
59
  from ..config import Settings as _Cfg # type: ignore
47
60
  default_spa, _ = _Cfg._load_l2_deployment() # type: ignore[attr-defined]
@@ -67,39 +80,70 @@ async def create_vm(
67
80
  f"start={s['startTime']} stop={s['stopTime']} rate={s['ratePerSecond']} deposit={s['deposit']} withdrawn={s['withdrawn']} remaining={remaining}s"
68
81
  )
69
82
  except Exception:
70
- # Best-effort logging; creation will continue/fail based on ok
71
83
  pass
72
84
  if not ok:
73
85
  raise HTTPException(status_code=400, detail=f"invalid stream: {reason}")
74
-
75
- # Create VM config
86
+
76
87
  config = VMConfig(
77
88
  name=request.name,
78
89
  image=request.image or (settings.get("DEFAULT_VM_IMAGE") if isinstance(settings, dict) else getattr(settings, "DEFAULT_VM_IMAGE", "")),
79
90
  resources=resources,
80
- ssh_key=request.ssh_key
91
+ ssh_key=request.ssh_key,
81
92
  )
82
-
83
- vm_info = await vm_service.create_vm(config)
84
- # Persist VM->stream mapping if provided
85
- if request.stream_id is not None:
93
+
94
+ if not async_mode:
95
+ vm_info = await vm_service.create_vm(config)
96
+ if request.stream_id is not None:
97
+ try:
98
+ await stream_map.set(vm_info.id, int(request.stream_id))
99
+ except Exception as e: # noqa: BLE001
100
+ logger.warning(f"failed to persist stream mapping for {vm_info.id}: {e}")
101
+ await vm_creation_animation(request.name)
102
+ return vm_info
103
+
104
+ # Async path
105
+ job_id = str(uuid.uuid4())
106
+ await job_store.create_job(job_id, request.name, status="creating")
107
+
108
+ async def _run_creation():
86
109
  try:
87
- await stream_map.set(vm_info.id, int(request.stream_id))
88
- except Exception as e:
89
- logger.warning(f"failed to persist stream mapping for {vm_info.id}: {e}")
90
- await vm_creation_animation(request.name)
91
- return vm_info
110
+ vm_info = await vm_service.create_vm(config)
111
+ if request.stream_id is not None:
112
+ try:
113
+ await stream_map.set(vm_info.id, int(request.stream_id))
114
+ except Exception as e: # noqa: BLE001
115
+ logger.warning(f"failed to persist stream mapping for {vm_info.id}: {e}")
116
+ await vm_creation_animation(request.name)
117
+ await job_store.update_job(job_id, status="ready")
118
+ except Exception as e: # noqa: BLE001
119
+ logger.error(f"Create VM job failed: {e}")
120
+ await job_store.update_job(job_id, status="failed", error=str(e))
121
+
122
+ asyncio.create_task(_run_creation(), name=f"create-vm:{request.name}")
123
+
124
+ env = CreateVMJobResponse(job_id=job_id, vm_id=request.name, status="creating")
125
+ return JSONResponse(status_code=202, content=env.model_json_schema() and env.model_dump())
126
+
92
127
  except MultipassError as e:
93
128
  logger.error(f"Failed to create VM: {e}")
94
129
  raise HTTPException(status_code=500, detail=str(e))
95
130
  except HTTPException:
96
- # Propagate explicit HTTP errors (e.g., payment gating)
97
131
  raise
98
132
  except Exception as e:
99
133
  logger.error(f"An unexpected error occurred: {e}")
100
134
  raise HTTPException(status_code=500, detail="An unexpected error occurred")
101
135
 
102
136
 
137
+ @router.get("/vms/jobs/{job_id}")
138
+ @inject
139
+ async def get_create_job(job_id: str, job_store: JobStore = Depends(Provide[Container.job_store])):
140
+ """Return async creation job status."""
141
+ job = await job_store.get_job(job_id)
142
+ if not job:
143
+ raise HTTPException(status_code=404, detail="job not found")
144
+ return job
145
+
146
+
103
147
  @router.get("/vms", response_model=List[VMInfo])
104
148
  @inject
105
149
  async def list_vms(
@@ -157,9 +201,18 @@ async def get_vm_access(
157
201
  if not multipass_name:
158
202
  raise HTTPException(404, "VM mapping not found")
159
203
 
204
+ # If ssh_port is not yet assigned, return 202 with a simple status payload
205
+ if vm.ssh_port is None:
206
+ return JSONResponse(status_code=202, content={
207
+ "vm_id": requestor_name,
208
+ "multipass_name": multipass_name,
209
+ "status": "creating",
210
+ "ssh_port": None,
211
+ })
212
+
160
213
  return VMAccessInfo(
161
214
  ssh_host=((settings.get("PUBLIC_IP") if isinstance(settings, dict) else getattr(settings, "PUBLIC_IP", None)) or "localhost"),
162
- ssh_port=vm.ssh_port,
215
+ ssh_port=int(vm.ssh_port),
163
216
  vm_id=requestor_name,
164
217
  multipass_name=multipass_name
165
218
  )
@@ -225,10 +278,33 @@ async def delete_vm(
225
278
  @router.get("/provider/info", response_model=ProviderInfoResponse)
226
279
  @inject
227
280
  async def provider_info(settings: Any = Depends(Provide[Container.config])) -> ProviderInfoResponse:
281
+ # Derive platform similar to advertiser
282
+ import platform as _plat
283
+ raw = _plat.machine().lower()
284
+ platform_str = None
285
+ try:
286
+ if 'arm' in raw:
287
+ platform_str = 'arm64'
288
+ elif 'x86_64' in raw or 'amd64' in raw or 'x64' in raw:
289
+ platform_str = 'x86_64'
290
+ else:
291
+ platform_str = raw
292
+ except Exception:
293
+ platform_str = None
294
+
295
+ ip_addr = None
296
+ try:
297
+ ip_addr = settings.get("PUBLIC_IP") if isinstance(settings, dict) else getattr(settings, "PUBLIC_IP", None)
298
+ except Exception:
299
+ ip_addr = None
300
+
228
301
  return ProviderInfoResponse(
229
302
  provider_id=settings["PROVIDER_ID"],
230
303
  stream_payment_address=settings["STREAM_PAYMENT_ADDRESS"],
231
304
  glm_token_address=settings["GLM_TOKEN_ADDRESS"],
305
+ ip_address=ip_addr,
306
+ country=(settings.get("PROVIDER_COUNTRY") if isinstance(settings, dict) else getattr(settings, "PROVIDER_COUNTRY", None)),
307
+ platform=platform_str,
232
308
  )
233
309
 
234
310
 
@@ -299,3 +375,83 @@ async def list_stream_statuses(
299
375
  logger.warning(f"stream {stream_id} lookup failed: {e}")
300
376
  continue
301
377
  return resp
378
+
379
+
380
+ # --- GUI support endpoints ---
381
+ @router.get("/summary")
382
+ @inject
383
+ async def provider_summary(
384
+ vm_service: VMService = Depends(Provide[Container.vm_service]),
385
+ settings: Any = Depends(Provide[Container.config]),
386
+ container: Container = Depends(Provide[Container]),
387
+ ):
388
+ """Concise provider summary for GUI: status, resources, pricing, VMs."""
389
+ try:
390
+ # Resources
391
+ rt = container.resource_tracker()
392
+ total = getattr(rt, "total_resources", {})
393
+ available = rt.get_available_resources() if hasattr(rt, "get_available_resources") else {}
394
+
395
+ # Pricing (both USD and GLM per month per unit)
396
+ pricing = {
397
+ "usd_per_core_month": float(settings["PRICE_USD_PER_CORE_MONTH"]) if isinstance(settings, dict) else float(getattr(settings, "PRICE_USD_PER_CORE_MONTH", 0)),
398
+ "usd_per_gb_ram_month": float(settings["PRICE_USD_PER_GB_RAM_MONTH"]) if isinstance(settings, dict) else float(getattr(settings, "PRICE_USD_PER_GB_RAM_MONTH", 0)),
399
+ "usd_per_gb_storage_month": float(settings["PRICE_USD_PER_GB_STORAGE_MONTH"]) if isinstance(settings, dict) else float(getattr(settings, "PRICE_USD_PER_GB_STORAGE_MONTH", 0)),
400
+ "glm_per_core_month": float(settings["PRICE_GLM_PER_CORE_MONTH"]) if isinstance(settings, dict) else float(getattr(settings, "PRICE_GLM_PER_CORE_MONTH", 0)),
401
+ "glm_per_gb_ram_month": float(settings["PRICE_GLM_PER_GB_RAM_MONTH"]) if isinstance(settings, dict) else float(getattr(settings, "PRICE_GLM_PER_GB_RAM_MONTH", 0)),
402
+ "glm_per_gb_storage_month": float(settings["PRICE_GLM_PER_GB_STORAGE_MONTH"]) if isinstance(settings, dict) else float(getattr(settings, "PRICE_GLM_PER_GB_STORAGE_MONTH", 0)),
403
+ }
404
+
405
+ # VMs
406
+ vms = []
407
+ try:
408
+ items = await vm_service.list_vms()
409
+ for vm in items:
410
+ vms.append({
411
+ "id": vm.id,
412
+ "status": vm.status.value if hasattr(vm, "status") else str(getattr(vm, "status", "")),
413
+ "ssh_port": getattr(vm, "ssh_port", None),
414
+ "resources": {
415
+ "cpu": getattr(getattr(vm, "resources", None), "cpu", None),
416
+ "memory": getattr(getattr(vm, "resources", None), "memory", None),
417
+ "storage": getattr(getattr(vm, "resources", None), "storage", None),
418
+ },
419
+ })
420
+ except Exception:
421
+ vms = []
422
+
423
+ # Basic environment info
424
+ env = {
425
+ "environment": settings["ENVIRONMENT"] if isinstance(settings, dict) else getattr(settings, "ENVIRONMENT", None),
426
+ "network": settings.get("NETWORK") if isinstance(settings, dict) else getattr(settings, "NETWORK", None),
427
+ }
428
+
429
+ return {
430
+ "status": "running",
431
+ "resources": {"total": total, "available": available},
432
+ "pricing": pricing,
433
+ "vms": vms,
434
+ "env": env,
435
+ }
436
+ except Exception as e:
437
+ logger.error(f"summary endpoint failed: {e}")
438
+ raise HTTPException(status_code=500, detail="failed to collect summary")
439
+
440
+
441
+ @router.post("/admin/shutdown")
442
+ async def admin_shutdown():
443
+ """Schedule a graceful provider shutdown. Returns immediately."""
444
+ try:
445
+ import asyncio, os, signal
446
+ loop = asyncio.get_running_loop()
447
+ # Try to signal our own process for a clean exit shortly after responding
448
+ def _sig():
449
+ try:
450
+ os.kill(os.getpid(), signal.SIGTERM)
451
+ except Exception:
452
+ os._exit(0) # last resort
453
+ loop.call_later(0.2, _sig)
454
+ return {"ok": True}
455
+ except Exception as e:
456
+ logger.error(f"shutdown scheduling failed: {e}")
457
+ raise HTTPException(status_code=500, detail="failed to schedule shutdown")
provider/config.py CHANGED
@@ -186,6 +186,12 @@ class Settings(BaseSettings):
186
186
  description="Min withdrawable amount (wei) before triggering withdraw"
187
187
  )
188
188
 
189
+ # Shutdown behavior
190
+ STOP_VMS_ON_EXIT: bool = Field(
191
+ default=False,
192
+ description="When true, stop all running VMs on provider shutdown. Default keeps VMs running."
193
+ )
194
+
189
195
  # Faucet settings (L3 for Golem Base adverts)
190
196
  FAUCET_URL: str = "https://ethwarsaw.holesky.golemdb.io/faucet"
191
197
  CAPTCHA_URL: str = "https://cap.gobas.me"
provider/container.py CHANGED
@@ -14,6 +14,7 @@ from .vm.name_mapper import VMNameMapper
14
14
  from .vm.port_manager import PortManager
15
15
  from .vm.proxy_manager import PythonProxyManager
16
16
  from .payments.stream_map import StreamMap
17
+ from .jobs.store import JobStore
17
18
  from .payments.blockchain_service import StreamPaymentReader, StreamPaymentClient, StreamPaymentConfig as _SPC
18
19
  from .payments.monitor import StreamMonitor
19
20
 
@@ -119,3 +120,9 @@ class Container(containers.DeclarativeContainer):
119
120
  advertisement_service=advertisement_service,
120
121
  port_manager=port_manager,
121
122
  )
123
+
124
+ # Async job store for VM creations
125
+ job_store = providers.Singleton(
126
+ JobStore,
127
+ db_path=providers.Callable(lambda base: Path(base) / "jobs.sqlite", config.VM_DATA_DIR),
128
+ )
@@ -115,6 +115,16 @@ class DiscoveryServerAdvertiser(Advertiser):
115
115
  return
116
116
 
117
117
  try:
118
+ import platform as _plat
119
+ raw = (_plat.machine() or '').lower()
120
+ platform_str = None
121
+ if raw:
122
+ if 'aarch64' in raw or 'arm64' in raw or raw.startswith('arm'):
123
+ platform_str = 'arm64'
124
+ elif 'x86_64' in raw or 'amd64' in raw or 'x64' in raw:
125
+ platform_str = 'x86_64'
126
+ else:
127
+ platform_str = raw
118
128
  async with self.session.post(
119
129
  f"{self.discovery_url}/api/v1/advertisements",
120
130
  headers={
@@ -125,6 +135,7 @@ class DiscoveryServerAdvertiser(Advertiser):
125
135
  json={
126
136
  "ip_address": ip_address,
127
137
  "country": settings.PROVIDER_COUNTRY,
138
+ "platform": platform_str,
128
139
  "resources": resources,
129
140
  "pricing": {
130
141
  "usd_per_core_month": settings.PRICE_USD_PER_CORE_MONTH,
@@ -65,6 +65,16 @@ class GolemBaseAdvertiser(Advertiser):
65
65
  existing_keys = await get_provider_entity_keys(self.client, settings.PROVIDER_ID)
66
66
 
67
67
  # String annotations (metadata + prices as strings; on-chain numeric annotations must be ints)
68
+ import platform as _plat
69
+ raw = (_plat.machine() or '').lower()
70
+ platform_str = None
71
+ if raw:
72
+ if 'aarch64' in raw or 'arm64' in raw or raw.startswith('arm'):
73
+ platform_str = 'arm64'
74
+ elif 'x86_64' in raw or 'amd64' in raw or 'x64' in raw:
75
+ platform_str = 'x86_64'
76
+ else:
77
+ platform_str = raw
68
78
  string_annotations = [
69
79
  Annotation(key="golem_type", value="provider"),
70
80
  Annotation(key="golem_network", value=settings.NETWORK),
@@ -73,6 +83,7 @@ class GolemBaseAdvertiser(Advertiser):
73
83
  Annotation(key="golem_ip_address", value=ip_address),
74
84
  Annotation(key="golem_country", value=settings.PROVIDER_COUNTRY),
75
85
  Annotation(key="golem_provider_name", value=settings.PROVIDER_NAME),
86
+ Annotation(key="golem_platform", value=platform_str or ""),
76
87
  Annotation(key="golem_price_currency", value="USD/GLM"),
77
88
  # Prices must be strings to avoid RLP sedes errors (ints only allowed for numeric annotations)
78
89
  Annotation(key="golem_price_usd_core_month", value=str(float(settings.PRICE_USD_PER_CORE_MONTH))),
@@ -20,7 +20,10 @@ class AdvertisementService:
20
20
  """Stop the advertiser."""
21
21
  if self._task:
22
22
  self._task.cancel()
23
- await self._task
23
+ try:
24
+ await self._task
25
+ except asyncio.CancelledError:
26
+ pass
24
27
  await self.advertiser.stop()
25
28
 
26
29
  async def trigger_update(self):
provider/jobs/store.py ADDED
@@ -0,0 +1,116 @@
1
+ import asyncio
2
+ import sqlite3
3
+ from dataclasses import dataclass
4
+ from datetime import datetime, timezone
5
+ from pathlib import Path
6
+ from typing import Optional, Dict, Any
7
+
8
+
9
+ @dataclass
10
+ class JobRecord:
11
+ job_id: str
12
+ vm_id: str
13
+ status: str
14
+ error: Optional[str]
15
+ created_at: str
16
+ updated_at: str
17
+
18
+
19
+ class JobStore:
20
+ """SQLite-backed store for VM creation jobs.
21
+
22
+ Keeps minimal fields to track progress and errors across restarts.
23
+ """
24
+
25
+ def __init__(self, db_path: Path):
26
+ self._db_path = Path(db_path)
27
+ # Ensure parent directory exists
28
+ self._db_path.parent.mkdir(parents=True, exist_ok=True)
29
+ self._init_schema()
30
+
31
+ def _init_schema(self) -> None:
32
+ conn = sqlite3.connect(self._db_path, check_same_thread=False)
33
+ try:
34
+ with conn:
35
+ conn.execute(
36
+ """
37
+ CREATE TABLE IF NOT EXISTS jobs (
38
+ job_id TEXT PRIMARY KEY,
39
+ vm_id TEXT NOT NULL,
40
+ status TEXT NOT NULL,
41
+ error TEXT,
42
+ created_at TEXT NOT NULL,
43
+ updated_at TEXT NOT NULL
44
+ )
45
+ """
46
+ )
47
+ finally:
48
+ conn.close()
49
+
50
+ async def create_job(self, job_id: str, vm_id: str, status: str = "creating") -> None:
51
+ now = datetime.now(timezone.utc).isoformat()
52
+
53
+ def _op():
54
+ conn = sqlite3.connect(self._db_path, check_same_thread=False)
55
+ try:
56
+ with conn:
57
+ conn.execute(
58
+ "INSERT OR REPLACE INTO jobs (job_id, vm_id, status, error, created_at, updated_at) VALUES (?, ?, ?, NULL, ?, ?)",
59
+ (job_id, vm_id, status, now, now),
60
+ )
61
+ finally:
62
+ conn.close()
63
+
64
+ await asyncio.to_thread(_op)
65
+
66
+ async def update_job(self, job_id: str, *, status: Optional[str] = None, error: Optional[str] = None) -> None:
67
+ now = datetime.now(timezone.utc).isoformat()
68
+
69
+ def _op():
70
+ conn = sqlite3.connect(self._db_path, check_same_thread=False)
71
+ try:
72
+ with conn:
73
+ if status is not None and error is not None:
74
+ conn.execute(
75
+ "UPDATE jobs SET status = ?, error = ?, updated_at = ? WHERE job_id = ?",
76
+ (status, error, now, job_id),
77
+ )
78
+ elif status is not None:
79
+ conn.execute(
80
+ "UPDATE jobs SET status = ?, updated_at = ? WHERE job_id = ?",
81
+ (status, now, job_id),
82
+ )
83
+ elif error is not None:
84
+ conn.execute(
85
+ "UPDATE jobs SET error = ?, updated_at = ? WHERE job_id = ?",
86
+ (error, now, job_id),
87
+ )
88
+ finally:
89
+ conn.close()
90
+
91
+ await asyncio.to_thread(_op)
92
+
93
+ async def get_job(self, job_id: str) -> Optional[Dict[str, Any]]:
94
+ def _op():
95
+ conn = sqlite3.connect(self._db_path, check_same_thread=False)
96
+ try:
97
+ cur = conn.execute(
98
+ "SELECT job_id, vm_id, status, error, created_at, updated_at FROM jobs WHERE job_id = ?",
99
+ (job_id,),
100
+ )
101
+ row = cur.fetchone()
102
+ if not row:
103
+ return None
104
+ return {
105
+ "job_id": row[0],
106
+ "vm_id": row[1],
107
+ "status": row[2],
108
+ "error": row[3],
109
+ "created_at": row[4],
110
+ "updated_at": row[5],
111
+ }
112
+ finally:
113
+ conn.close()
114
+
115
+ return await asyncio.to_thread(_op)
116
+
provider/main.py CHANGED
@@ -137,6 +137,11 @@ async def verify_provider_port(port: int) -> bool:
137
137
 
138
138
 
139
139
  import typer
140
+ import platform as _platform
141
+ import signal as _signal
142
+ import time as _time
143
+ import shutil as _shutil
144
+ import psutil
140
145
  try:
141
146
  from importlib import metadata
142
147
  except ImportError:
@@ -171,6 +176,91 @@ def _get_latest_version_from_pypi(pkg_name: str) -> Optional[str]:
171
176
  # Avoid network in pytest runs
172
177
  if os.environ.get("PYTEST_CURRENT_TEST"):
173
178
  return None
179
+
180
+
181
+ # ---------------------------
182
+ # Daemon/PID file management
183
+ # ---------------------------
184
+
185
+ def _pid_dir() -> str:
186
+ from pathlib import Path
187
+ plat = _platform.system().lower()
188
+ if plat.startswith("darwin"):
189
+ base = Path.home() / "Library" / "Application Support" / "Golem Provider"
190
+ elif plat.startswith("windows"):
191
+ base = Path(os.environ.get("APPDATA", str(Path.home() / "AppData" / "Roaming"))) / "Golem Provider"
192
+ else:
193
+ base = Path(os.environ.get("XDG_STATE_HOME", str(Path.home() / ".local" / "state"))) / "golem-provider"
194
+ base.mkdir(parents=True, exist_ok=True)
195
+ return str(base)
196
+
197
+
198
+ def _pid_path() -> str:
199
+ from pathlib import Path
200
+ return str(Path(_pid_dir()) / "provider.pid")
201
+
202
+
203
+ def _write_pid(pid: int) -> None:
204
+ with open(_pid_path(), "w") as fh:
205
+ fh.write(str(pid))
206
+
207
+
208
+ def _read_pid() -> int | None:
209
+ try:
210
+ with open(_pid_path(), "r") as fh:
211
+ c = fh.read().strip()
212
+ return int(c)
213
+ except Exception:
214
+ return None
215
+
216
+
217
+ def _remove_pid_file() -> None:
218
+ try:
219
+ os.remove(_pid_path())
220
+ except Exception:
221
+ pass
222
+
223
+
224
+ def _is_running(pid: int) -> bool:
225
+ try:
226
+ return psutil.pid_exists(pid) and psutil.Process(pid).is_running()
227
+ except Exception:
228
+ return False
229
+
230
+
231
+ def _spawn_detached(argv: list[str], env: dict | None = None) -> int:
232
+ import subprocess
233
+ popen_kwargs = {
234
+ "stdin": subprocess.DEVNULL,
235
+ "stdout": subprocess.DEVNULL,
236
+ "stderr": subprocess.DEVNULL,
237
+ "env": env or os.environ.copy(),
238
+ }
239
+ if _platform.system().lower().startswith("windows"):
240
+ creationflags = 0
241
+ for flag in ("CREATE_NEW_PROCESS_GROUP", "DETACHED_PROCESS"):
242
+ v = getattr(subprocess, flag, 0)
243
+ if v:
244
+ creationflags |= v
245
+ if creationflags:
246
+ popen_kwargs["creationflags"] = creationflags # type: ignore[assignment]
247
+ else:
248
+ popen_kwargs["preexec_fn"] = os.setsid # type: ignore[assignment]
249
+ proc = subprocess.Popen(argv, **popen_kwargs)
250
+ return int(proc.pid)
251
+
252
+
253
+ def _self_command(base_args: list[str]) -> list[str]:
254
+ import sys
255
+ # When frozen (PyInstaller), sys.executable is the CLI binary
256
+ if getattr(sys, "frozen", False):
257
+ return [sys.executable] + base_args
258
+ # Prefer the console_script when available
259
+ exe = _shutil.which("golem-provider")
260
+ if exe:
261
+ return [exe] + base_args
262
+ # Fallback to module execution
263
+ return [sys.executable, "-m", "provider.main"] + base_args
174
264
  try:
175
265
  import json as _json
176
266
  from urllib.request import urlopen
@@ -1149,10 +1239,89 @@ def streams_withdraw(
1149
1239
  @cli.command()
1150
1240
  def start(
1151
1241
  no_verify_port: bool = typer.Option(False, "--no-verify-port", help="Skip provider port verification."),
1152
- network: str = typer.Option(None, "--network", help="Target network: 'testnet' or 'mainnet' (overrides env)")
1242
+ network: str = typer.Option(None, "--network", help="Target network: 'testnet' or 'mainnet' (overrides env)"),
1243
+ gui: bool = typer.Option(False, "--gui/--no-gui", help="Launch Electron GUI (default: no)"),
1244
+ daemon: bool = typer.Option(False, "--daemon", help="Start in background and write a PID file"),
1245
+ stop_vms_on_exit: Optional[bool] = typer.Option(
1246
+ None, "--stop-vms-on-exit/--keep-vms-on-exit",
1247
+ help="On shutdown: stop all VMs (default: keep VMs running)"
1248
+ ),
1153
1249
  ):
1154
1250
  """Start the provider server."""
1155
- run_server(dev_mode=False, no_verify_port=no_verify_port, network=network)
1251
+ if daemon:
1252
+ # If a previous daemon is active, do not start another
1253
+ pid = _read_pid()
1254
+ if pid and _is_running(pid):
1255
+ print(f"Provider already running (pid={pid})")
1256
+ raise typer.Exit(code=0)
1257
+ # Build child command and detach
1258
+ args = ["start"]
1259
+ if no_verify_port:
1260
+ args.append("--no-verify-port")
1261
+ if network:
1262
+ args += ["--network", network]
1263
+ # Force no GUI for daemonized child to avoid duplicates
1264
+ args.append("--no-gui")
1265
+ if stop_vms_on_exit is not None:
1266
+ args.append("--stop-vms-on-exit" if stop_vms_on_exit else "--keep-vms-on-exit")
1267
+ cmd = _self_command(args)
1268
+ # Ensure GUI not auto-launched via env, regardless of defaults
1269
+ env = {**os.environ, "GOLEM_PROVIDER_LAUNCH_GUI": "0"}
1270
+ child_pid = _spawn_detached(cmd, env)
1271
+ _write_pid(child_pid)
1272
+ print(f"Started provider in background (pid={child_pid})")
1273
+ raise typer.Exit(code=0)
1274
+ else:
1275
+ run_server(
1276
+ dev_mode=False,
1277
+ no_verify_port=no_verify_port,
1278
+ network=network,
1279
+ launch_gui=gui,
1280
+ stop_vms_on_exit=stop_vms_on_exit,
1281
+ )
1282
+
1283
+
1284
+ @cli.command()
1285
+ def stop(timeout: int = typer.Option(15, "--timeout", help="Seconds to wait for graceful shutdown")):
1286
+ """Stop a background provider started with --daemon."""
1287
+ pid = _read_pid()
1288
+ if not pid:
1289
+ print("No PID file found; nothing to stop")
1290
+ raise typer.Exit(code=0)
1291
+ if not _is_running(pid):
1292
+ print("No running provider process; cleaning up PID file")
1293
+ _remove_pid_file()
1294
+ raise typer.Exit(code=0)
1295
+ try:
1296
+ p = psutil.Process(pid)
1297
+ p.terminate()
1298
+ except Exception:
1299
+ # Fallback to signal/kill
1300
+ try:
1301
+ if _platform.system().lower().startswith("windows"):
1302
+ os.system(f"taskkill /PID {pid} /T /F >NUL 2>&1")
1303
+ else:
1304
+ os.kill(pid, _signal.SIGTERM)
1305
+ except Exception:
1306
+ pass
1307
+ # Wait for exit
1308
+ start_ts = _time.time()
1309
+ while _time.time() - start_ts < max(0, int(timeout)):
1310
+ if not _is_running(pid):
1311
+ break
1312
+ _time.sleep(0.2)
1313
+ if _is_running(pid):
1314
+ print("Process did not exit in time; sending kill")
1315
+ try:
1316
+ psutil.Process(pid).kill()
1317
+ except Exception:
1318
+ try:
1319
+ if not _platform.system().lower().startswith("windows"):
1320
+ os.kill(pid, _signal.SIGKILL)
1321
+ except Exception:
1322
+ pass
1323
+ _remove_pid_file()
1324
+ print("Provider stopped")
1156
1325
 
1157
1326
  # Removed separate 'dev' command; use environment GOLEM_PROVIDER_ENVIRONMENT=development instead.
1158
1327
 
@@ -1288,7 +1457,78 @@ def _print_pricing_examples(glm_usd):
1288
1457
  f"- {name} ({res.cpu}C, {res.memory}GB RAM, {res.storage}GB Disk): ~{usd_str} per month (~{glm_str})"
1289
1458
  )
1290
1459
 
1291
- def run_server(dev_mode: bool | None = None, no_verify_port: bool = False, network: str | None = None):
1460
+ def _maybe_launch_gui(port: int):
1461
+ import subprocess, shutil
1462
+ import os as _os
1463
+ from pathlib import Path
1464
+ root = Path(__file__).parent.parent.parent
1465
+ gui_dir = root / "provider-gui"
1466
+ if not gui_dir.exists():
1467
+ logger.info("GUI directory not found; running headless")
1468
+ return
1469
+ cmd = None
1470
+ npm = shutil.which("npm")
1471
+ electron_bin = gui_dir / "node_modules" / "electron" / "dist" / ("electron.exe" if _sys.platform.startswith("win") else "electron")
1472
+ try:
1473
+ # Ensure dependencies (electron) are present
1474
+ if npm and not electron_bin.exists():
1475
+ install_cmd = [npm, "ci", "--silent"] if (gui_dir / "package-lock.json").exists() else [npm, "install", "--silent"]
1476
+ logger.info("Installing Provider GUI dependencies…")
1477
+ subprocess.run(install_cmd, cwd=str(gui_dir), env=os.environ, check=True)
1478
+ except Exception as e:
1479
+ logger.warning(f"GUI dependencies install failed: {e}")
1480
+
1481
+ if npm:
1482
+ cmd = [npm, "start", "--silent"]
1483
+ elif shutil.which("electron"):
1484
+ cmd = ["electron", "."]
1485
+ else:
1486
+ logger.info("No npm/electron found; skipping GUI")
1487
+ return
1488
+ env = {**os.environ, "PROVIDER_API_URL": f"http://127.0.0.1:{port}/api/v1"}
1489
+ try:
1490
+ # Detach GUI so it won't receive terminal signals (e.g., Ctrl+C) or
1491
+ # be terminated when the provider process exits.
1492
+ popen_kwargs = {
1493
+ "cwd": str(gui_dir),
1494
+ "env": env,
1495
+ "stdin": subprocess.DEVNULL,
1496
+ "stdout": subprocess.DEVNULL,
1497
+ "stderr": subprocess.DEVNULL,
1498
+ }
1499
+ if _sys.platform.startswith("win"):
1500
+ # Create a new process group and detach from console on Windows
1501
+ creationflags = 0
1502
+ try:
1503
+ creationflags |= getattr(subprocess, "CREATE_NEW_PROCESS_GROUP")
1504
+ except Exception:
1505
+ pass
1506
+ try:
1507
+ creationflags |= getattr(subprocess, "DETACHED_PROCESS")
1508
+ except Exception:
1509
+ pass
1510
+ if creationflags:
1511
+ popen_kwargs["creationflags"] = creationflags # type: ignore[assignment]
1512
+ else:
1513
+ # Start a new session/process group on POSIX
1514
+ try:
1515
+ popen_kwargs["preexec_fn"] = _os.setsid # type: ignore[assignment]
1516
+ except Exception:
1517
+ pass
1518
+
1519
+ subprocess.Popen(cmd, **popen_kwargs)
1520
+ logger.info("Launched Provider GUI")
1521
+ except Exception as e:
1522
+ logger.warning(f"Failed to launch GUI: {e}")
1523
+
1524
+
1525
+ def run_server(
1526
+ dev_mode: bool | None = None,
1527
+ no_verify_port: bool = False,
1528
+ network: str | None = None,
1529
+ launch_gui: bool = False,
1530
+ stop_vms_on_exit: bool | None = None,
1531
+ ):
1292
1532
  """Helper to run the uvicorn server."""
1293
1533
  import sys
1294
1534
  from pathlib import Path
@@ -1306,6 +1546,9 @@ def run_server(dev_mode: bool | None = None, no_verify_port: bool = False, netwo
1306
1546
  # Apply network override early (affects settings and annotations)
1307
1547
  if network:
1308
1548
  os.environ["GOLEM_PROVIDER_NETWORK"] = network
1549
+ # Apply shutdown behavior override early so it is reflected in settings
1550
+ if stop_vms_on_exit is not None:
1551
+ os.environ["GOLEM_PROVIDER_STOP_VMS_ON_EXIT"] = "1" if stop_vms_on_exit else "0"
1309
1552
 
1310
1553
  # The logic for setting the public IP in dev mode is now handled in config.py
1311
1554
  # The following lines are no longer needed and have been removed.
@@ -1344,9 +1587,15 @@ def run_server(dev_mode: bool | None = None, no_verify_port: bool = False, netwo
1344
1587
  log_config = uvicorn.config.LOGGING_CONFIG
1345
1588
  log_config["formatters"]["access"]["fmt"] = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
1346
1589
 
1590
+ # Optionally launch GUI (non-blocking) — disabled by default
1591
+ if bool(launch_gui):
1592
+ try:
1593
+ _maybe_launch_gui(int(settings.PORT))
1594
+ except Exception:
1595
+ logger.warning("GUI launch attempt failed; continuing headless")
1596
+
1347
1597
  # Run server
1348
- logger.process(
1349
- f"🚀 Starting provider server on {settings.HOST}:{settings.PORT}")
1598
+ logger.process(f"🚀 Starting provider server on {settings.HOST}:{settings.PORT}")
1350
1599
  uvicorn.run(
1351
1600
  "provider:app",
1352
1601
  host=settings.HOST,
@@ -2,6 +2,7 @@ import asyncio
2
2
  from typing import Optional
3
3
 
4
4
  from ..utils.logging import setup_logger
5
+ from ..vm.models import VMNotFoundError
5
6
 
6
7
  logger = setup_logger(__name__)
7
8
 
@@ -83,6 +84,16 @@ class StreamMonitor:
83
84
  )
84
85
  try:
85
86
  await self.vm_service.stop_vm(vm_id)
87
+ except VMNotFoundError as e:
88
+ # If the VM cannot be found, remove it from the stream map
89
+ # to avoid repeated stop attempts and log spam.
90
+ logger.warning(f"stop_vm failed for {vm_id}: {e}")
91
+ try:
92
+ await self.stream_map.remove(vm_id)
93
+ except Exception as rem_err:
94
+ logger.debug(
95
+ f"failed to remove vm {vm_id} from stream map after not-found: {rem_err}"
96
+ )
86
97
  except Exception as e:
87
98
  logger.warning(f"stop_vm failed for {vm_id}: {e}")
88
99
  continue
provider/service.py CHANGED
@@ -17,6 +17,7 @@ class ProviderService:
17
17
  self.advertisement_service = advertisement_service
18
18
  self.port_manager = port_manager
19
19
  self._pricing_updater: PricingAutoUpdater | None = None
20
+ self._pricing_task: asyncio.Task | None = None
20
21
  self._stream_monitor = None
21
22
 
22
23
  async def setup(self, app: FastAPI):
@@ -104,7 +105,8 @@ class ProviderService:
104
105
  async def _on_price_updated(platform: str, glm_usd):
105
106
  await self.advertisement_service.trigger_update()
106
107
  self._pricing_updater = PricingAutoUpdater(on_updated_callback=_on_price_updated)
107
- asyncio.create_task(self._pricing_updater.start())
108
+ # Keep a handle to the background task so we can cancel it promptly on shutdown
109
+ self._pricing_task = asyncio.create_task(self._pricing_updater.start(), name="pricing-updater")
108
110
 
109
111
  # Start stream monitor if enabled
110
112
  from .container import Container
@@ -130,10 +132,49 @@ class ProviderService:
130
132
  async def cleanup(self):
131
133
  """Cleanup provider components."""
132
134
  logger.process("🔄 Cleaning up provider...")
133
- await self.advertisement_service.stop()
134
- await self.vm_service.provider.cleanup()
135
+ from .config import settings
136
+
137
+ # Stop advertising loop
138
+ try:
139
+ await self.advertisement_service.stop()
140
+ except Exception:
141
+ pass
142
+
143
+ # Optionally stop all running VMs based on configuration (default: keep running)
144
+ try:
145
+ if bool(getattr(settings, "STOP_VMS_ON_EXIT", False)):
146
+ try:
147
+ vms = await self.vm_service.list_vms()
148
+ except Exception:
149
+ vms = []
150
+ for vm in vms:
151
+ try:
152
+ await self.vm_service.stop_vm(vm.id)
153
+ except Exception as e:
154
+ logger.warning(f"Failed to stop VM {getattr(vm, 'id', '?')}: {e}")
155
+ except Exception:
156
+ pass
157
+
158
+ # Provider cleanup hook
159
+ try:
160
+ await self.vm_service.provider.cleanup()
161
+ except Exception:
162
+ pass
163
+
164
+ # Stop pricing updater promptly (cancel background task and set stop flag)
135
165
  if self._pricing_updater:
136
- self._pricing_updater.stop()
166
+ try:
167
+ self._pricing_updater.stop()
168
+ except Exception:
169
+ pass
170
+ if self._pricing_task:
171
+ try:
172
+ self._pricing_task.cancel()
173
+ await self._pricing_task
174
+ except asyncio.CancelledError:
175
+ pass
176
+ except Exception:
177
+ pass
137
178
  if self._stream_monitor:
138
179
  await self._stream_monitor.stop()
139
180
  logger.success("✨ Provider cleanup complete")
provider/utils/pricing.py CHANGED
@@ -165,7 +165,8 @@ class PricingAutoUpdater:
165
165
  update_glm_unit_prices_from_usd(glm_usd)
166
166
  if callable(self._on_updated):
167
167
  # Inform callback which advertising platform is active
168
- platform = getattr(settings, "ADVERTISER_TYPE", "discovery_server")
168
+ _s = _get_settings()
169
+ platform = getattr(_s, "ADVERTISER_TYPE", "discovery_server")
169
170
  await self._on_updated(platform=platform, glm_usd=glm_usd)
170
171
  else:
171
172
  logger.warning("Skipping pricing update; failed to fetch GLM price")
@@ -157,13 +157,25 @@ class MultipassAdapter(VMProvider):
157
157
  async def list_vms(self) -> List[VMInfo]:
158
158
  """List all VMs."""
159
159
  all_mappings = self.name_mapper.list_mappings()
160
- vms = []
161
- for requestor_name in all_mappings.keys():
160
+ vms: List[VMInfo] = []
161
+ for requestor_name, multipass_name in list(all_mappings.items()):
162
162
  try:
163
- vm_info = await self.get_vm_status(requestor_name)
163
+ # get_vm_status expects multipass_name
164
+ vm_info = await self.get_vm_status(multipass_name)
164
165
  vms.append(vm_info)
165
166
  except VMNotFoundError:
166
- logger.warning(f"VM {requestor_name} not found, but a mapping exists. It may have been deleted externally.")
167
+ logger.warning(
168
+ f"VM {requestor_name} not found, but a mapping exists. It may have been deleted externally."
169
+ )
170
+ # Cleanup stale mapping and proxy allocation to avoid repeated warnings
171
+ try:
172
+ await self.proxy_manager.remove_vm(multipass_name)
173
+ except Exception:
174
+ pass
175
+ try:
176
+ await self.name_mapper.remove_mapping(requestor_name)
177
+ except Exception:
178
+ pass
167
179
  return vms
168
180
 
169
181
  async def start_vm(self, multipass_name: str) -> VMInfo:
@@ -211,8 +223,8 @@ class MultipassAdapter(VMProvider):
211
223
  async def get_all_vms_resources(self) -> Dict[str, VMResources]:
212
224
  """Get resources for all running VMs."""
213
225
  all_mappings = self.name_mapper.list_mappings()
214
- vm_resources = {}
215
- for requestor_name, multipass_name in all_mappings.items():
226
+ vm_resources: Dict[str, VMResources] = {}
227
+ for requestor_name, multipass_name in list(all_mappings.items()):
216
228
  try:
217
229
  info = await self._get_vm_info(multipass_name)
218
230
  disks_info = info.get("disks", {})
@@ -223,7 +235,18 @@ class MultipassAdapter(VMProvider):
223
235
  storage=round(total_storage / (1024**3)) if total_storage > 0 else 10
224
236
  )
225
237
  except (MultipassError, VMNotFoundError):
226
- logger.warning(f"Could not retrieve resources for VM {requestor_name} ({multipass_name}). It may have been deleted.")
238
+ logger.warning(
239
+ f"Could not retrieve resources for VM {requestor_name} ({multipass_name}). It may have been deleted."
240
+ )
241
+ # Cleanup stale mapping and proxy allocation
242
+ try:
243
+ await self.proxy_manager.remove_vm(multipass_name)
244
+ except Exception:
245
+ pass
246
+ try:
247
+ await self.name_mapper.remove_mapping(requestor_name)
248
+ except Exception:
249
+ pass
227
250
  except Exception as e:
228
251
  logger.error(f"Failed to get info for VM {requestor_name}: {e}")
229
252
  return vm_resources