portacode 1.4.16.dev10__py3-none-any.whl → 1.4.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -19,7 +19,7 @@ import time
19
19
  import threading
20
20
  from datetime import datetime, timezone
21
21
  from pathlib import Path
22
- from typing import Any, Callable, Dict, Iterable, List, Optional, Sequence, Set, Tuple
22
+ from typing import Any, Callable, Dict, Iterable, List, Optional, Sequence, Tuple
23
23
 
24
24
  import platformdirs
25
25
 
@@ -45,14 +45,38 @@ DNS_SERVER = "1.1.1.1"
45
45
  IFACES_PATH = Path("/etc/network/interfaces")
46
46
  SYSCTL_PATH = Path("/etc/sysctl.d/99-portacode-forward.conf")
47
47
  UNIT_DIR = Path("/etc/systemd/system")
48
- _MANAGED_CONTAINERS_CACHE_TTL_S = 30.0
49
- _MANAGED_CONTAINERS_CACHE: Dict[str, Any] = {"timestamp": 0.0, "summary": None}
50
- _MANAGED_CONTAINERS_CACHE_LOCK = threading.Lock()
48
+ _MANAGED_CONTAINERS_STATE_LOCK = threading.Lock()
49
+ _MANAGED_CONTAINERS_STATE: Dict[str, Any] = {
50
+ "initialized": False,
51
+ "base_summary": None,
52
+ "initial_totals": {"ram_mib": 0, "disk_gib": 0, "cpu_share": 0.0},
53
+ "records": {},
54
+ "pending": {},
55
+ }
51
56
  TEMPLATES_REFRESH_INTERVAL_S = 300
52
57
 
53
58
  ProgressCallback = Callable[[int, int, Dict[str, Any], str, Optional[Dict[str, Any]]], None]
54
59
 
55
60
 
61
+ def _emit_host_event(
62
+ handler: SyncHandler,
63
+ payload: Dict[str, Any],
64
+ ) -> None:
65
+ loop = handler.context.get("event_loop")
66
+ if not loop or loop.is_closed():
67
+ logger.debug("host event skipped (no event loop) event=%s", payload.get("event"))
68
+ return
69
+
70
+ future = asyncio.run_coroutine_threadsafe(handler.send_response(payload), loop)
71
+ future.add_done_callback(
72
+ lambda fut: logger.warning(
73
+ "Failed to emit host event %s: %s", payload.get("event"), fut.exception()
74
+ )
75
+ if fut.exception()
76
+ else None
77
+ )
78
+
79
+
56
80
  def _emit_progress_event(
57
81
  handler: SyncHandler,
58
82
  *,
@@ -202,67 +226,66 @@ def _current_time_iso() -> str:
202
226
  return datetime.now(timezone.utc).isoformat()
203
227
 
204
228
 
205
- def _to_int(value: Any, default: int = 0) -> int:
229
+ def _parse_iso_timestamp(value: str) -> Optional[datetime]:
230
+ if not value:
231
+ return None
232
+ text = value
233
+ if text.endswith("Z"):
234
+ text = text[:-1] + "+00:00"
206
235
  try:
207
- return int(value)
208
- except (TypeError, ValueError):
209
- return default
236
+ return datetime.fromisoformat(text)
237
+ except ValueError:
238
+ return None
239
+
240
+
241
+ def _templates_need_refresh(config: Dict[str, Any]) -> bool:
242
+ if not config or not config.get("token_value"):
243
+ return False
244
+ last = _parse_iso_timestamp(config.get("templates_last_refreshed") or "")
245
+ if not last:
246
+ return True
247
+ return (datetime.now(timezone.utc) - last).total_seconds() >= TEMPLATES_REFRESH_INTERVAL_S
210
248
 
211
249
 
212
- def _to_float(value: Any, default: float = 0.0) -> float:
250
+ def _ensure_templates_refreshed_on_startup(config: Dict[str, Any]) -> None:
251
+ if not _templates_need_refresh(config):
252
+ return
213
253
  try:
214
- return float(value)
215
- except (TypeError, ValueError):
216
- return default
254
+ client = _build_proxmox_client_from_config(config)
255
+ node = config.get("node") or _pick_node(client)
256
+ storages = client.nodes(node).storage.get()
257
+ templates = _list_templates(client, node, storages)
258
+ if templates:
259
+ config["templates"] = templates
260
+ config["templates_last_refreshed"] = _current_time_iso()
261
+ _save_config(config)
262
+ except Exception as exc:
263
+ logger.warning("Unable to refresh Proxmox templates on startup: %s", exc)
217
264
 
218
265
 
219
- def _calculate_available(total: Optional[float], used: float) -> Optional[float]:
220
- if total is None:
221
- return None
222
- delta = total - used
223
- return delta if delta >= 0 else 0.0
266
+ def _pick_storage(storages: Iterable[Dict[str, Any]]) -> str:
267
+ candidates = [s for s in storages if "rootdir" in s.get("content", "") and s.get("avail", 0) > 0]
268
+ if not candidates:
269
+ candidates = [s for s in storages if "rootdir" in s.get("content", "")]
270
+ if not candidates:
271
+ return ""
272
+ candidates.sort(key=lambda entry: entry.get("avail", 0), reverse=True)
273
+ return candidates[0].get("storage", "")
224
274
 
225
275
 
226
- def _normalize_bytes(value: Any) -> float:
227
- if isinstance(value, (int, float)):
228
- return float(value)
229
- text = str(value or "").strip()
230
- if not text:
231
- return 0.0
232
- match = re.match(r"(?i)^\s*([0-9]*\.?[0-9]+)\s*([kmgtp]?i?b?)?\s*$", text)
233
- if not match:
234
- return 0.0
235
- number = match.group(1)
236
- unit = (match.group(2) or "").lower()
276
+ def _bytes_to_gib(value: Any) -> float:
237
277
  try:
238
- value = float(number)
239
- except ValueError:
278
+ return float(value) / 1024**3
279
+ except (TypeError, ValueError):
240
280
  return 0.0
241
- if unit.startswith("k"):
242
- return value * 1024
243
- if unit.startswith("m"):
244
- return value * 1024**2
245
- if unit.startswith("g"):
246
- return value * 1024**3
247
- if unit.startswith("t"):
248
- return value * 1024**4
249
- if unit.startswith("p"):
250
- return value * 1024**5
251
- return value
252
-
253
-
254
- def _bytes_to_mib(value: Any) -> int:
255
- return int(round(_normalize_bytes(value) / (1024**2)))
256
281
 
257
282
 
258
- def _bytes_to_gib(value: Any) -> int:
259
- return int(round(_normalize_bytes(value) / (1024**3)))
260
-
283
+ def _bytes_to_mib(value: Any) -> float:
284
+ try:
285
+ return float(value) / 1024**2
286
+ except (TypeError, ValueError):
287
+ return 0.0
261
288
 
262
- def _normalize_storage_name(name: Any) -> str:
263
- if not name:
264
- return ""
265
- return str(name).strip().lower()
266
289
 
267
290
  def _size_token_to_gib(token: str) -> float:
268
291
  match = re.match(r"^\s*([0-9]+(?:\.[0-9]+)?)\s*([KMGTP])?([iI]?[bB])?\s*$", token)
@@ -272,11 +295,11 @@ def _size_token_to_gib(token: str) -> float:
272
295
  unit = (match.group(2) or "").upper()
273
296
  scale = {
274
297
  "": 1,
275
- "K": 1024 ** 1,
276
- "M": 1024 ** 2,
277
- "G": 1024 ** 3,
278
- "T": 1024 ** 4,
279
- "P": 1024 ** 5,
298
+ "K": 1024**1,
299
+ "M": 1024**2,
300
+ "G": 1024**3,
301
+ "T": 1024**4,
302
+ "P": 1024**5,
280
303
  }.get(unit, 1)
281
304
  return (number * scale) / 1024**3
282
305
 
@@ -292,7 +315,7 @@ def _extract_size_gib(value: Any) -> float:
292
315
  return _size_token_to_gib(text)
293
316
 
294
317
 
295
- def _extract_storage(value: Any) -> str:
318
+ def _extract_storage_token(value: Any) -> str:
296
319
  if not value:
297
320
  return "unknown"
298
321
  text = str(value)
@@ -303,12 +326,12 @@ def _extract_storage(value: Any) -> str:
303
326
 
304
327
  def _storage_from_lxc(cfg: Dict[str, Any], entry: Dict[str, Any]) -> str:
305
328
  rootfs = cfg.get("rootfs") or entry.get("rootfs")
306
- storage = _extract_storage(rootfs)
329
+ storage = _extract_storage_token(rootfs)
307
330
  if storage != "unknown":
308
331
  return storage
309
332
  for idx in range(0, 10):
310
333
  mp_value = cfg.get(f"mp{idx}")
311
- storage = _extract_storage(mp_value)
334
+ storage = _extract_storage_token(mp_value)
312
335
  if storage != "unknown":
313
336
  return storage
314
337
  return "unknown"
@@ -318,7 +341,7 @@ def _storage_from_qemu(cfg: Dict[str, Any]) -> str:
318
341
  preferred_keys: List[str] = []
319
342
  for prefix in ("scsi", "virtio", "sata", "ide"):
320
343
  preferred_keys.extend(f"{prefix}{idx}" for idx in range(0, 6))
321
- seen: Set[str] = set()
344
+ seen = set()
322
345
  for key in preferred_keys:
323
346
  value = cfg.get(key)
324
347
  if value is None:
@@ -327,7 +350,7 @@ def _storage_from_qemu(cfg: Dict[str, Any]) -> str:
327
350
  text = str(value)
328
351
  if "media=cdrom" in text or "cloudinit" in text:
329
352
  continue
330
- storage = _extract_storage(text)
353
+ storage = _extract_storage_token(text)
331
354
  if storage != "unknown":
332
355
  return storage
333
356
  for key in sorted(cfg.keys()):
@@ -341,11 +364,11 @@ def _storage_from_qemu(cfg: Dict[str, Any]) -> str:
341
364
  text = str(value)
342
365
  if "media=cdrom" in text or "cloudinit" in text:
343
366
  continue
344
- storage = _extract_storage(text)
367
+ storage = _extract_storage_token(text)
345
368
  if storage != "unknown":
346
369
  return storage
347
370
  for key in ("efidisk0", "tpmstate0"):
348
- storage = _extract_storage(cfg.get(key))
371
+ storage = _extract_storage_token(cfg.get(key))
349
372
  if storage != "unknown":
350
373
  return storage
351
374
  return "unknown"
@@ -359,7 +382,7 @@ def _primary_qemu_disk(cfg: Dict[str, Any]) -> str:
359
382
  preferred_keys: List[str] = []
360
383
  for prefix in ("scsi", "virtio", "sata", "ide"):
361
384
  preferred_keys.extend(f"{prefix}{idx}" for idx in range(0, 6))
362
- seen: Set[str] = set()
385
+ seen = set()
363
386
  for key in preferred_keys:
364
387
  value = cfg.get(key)
365
388
  if value is None:
@@ -384,8 +407,8 @@ def _primary_qemu_disk(cfg: Dict[str, Any]) -> str:
384
407
  return ""
385
408
 
386
409
 
387
- def _pick_storage(kind: str, cfg: Dict[str, Any], entry: Dict[str, Any]) -> str:
388
- storage = _extract_storage(cfg.get("storage") or entry.get("storage"))
410
+ def _pick_container_storage(kind: str, cfg: Dict[str, Any], entry: Dict[str, Any]) -> str:
411
+ storage = _extract_storage_token(cfg.get("storage") or entry.get("storage"))
389
412
  if storage != "unknown":
390
413
  return storage
391
414
  if kind == "lxc":
@@ -393,7 +416,7 @@ def _pick_storage(kind: str, cfg: Dict[str, Any], entry: Dict[str, Any]) -> str:
393
416
  return _storage_from_qemu(cfg)
394
417
 
395
418
 
396
- def _pick_disk_gib(kind: str, cfg: Dict[str, Any], entry: Dict[str, Any]) -> float:
419
+ def _pick_container_disk_gib(kind: str, cfg: Dict[str, Any], entry: Dict[str, Any]) -> float:
397
420
  if kind == "lxc":
398
421
  size = _extract_size_gib(_primary_lxc_disk(cfg, entry))
399
422
  if size:
@@ -402,68 +425,63 @@ def _pick_disk_gib(kind: str, cfg: Dict[str, Any], entry: Dict[str, Any]) -> flo
402
425
  size = _extract_size_gib(_primary_qemu_disk(cfg))
403
426
  if size:
404
427
  return size
405
- for candidate in (entry.get("maxdisk"), entry.get("disk")):
406
- if candidate in {None, 0}:
428
+ for candidate in (entry.get("maxdisk"), entry.get("disk"), cfg.get("disk")):
429
+ if candidate is None or candidate == 0:
407
430
  continue
408
- return _normalize_bytes(candidate) / 1024**3
409
- cfg_disk = cfg.get("disk")
410
- if cfg_disk not in (None, 0):
411
- return _normalize_bytes(cfg_disk) / 1024**3
431
+ return _bytes_to_gib(candidate)
412
432
  return 0.0
413
433
 
414
434
 
415
- def _parse_bool_flag(value: Any) -> bool:
416
- if isinstance(value, bool):
417
- return value
418
- text = str(value or "").strip().lower()
419
- return text in {"1", "true", "yes", "on"}
420
-
421
-
422
- def _parse_iso_timestamp(value: str) -> Optional[datetime]:
423
- if not value:
424
- return None
425
- text = value
426
- if text.endswith("Z"):
427
- text = text[:-1] + "+00:00"
435
+ def _to_mib(value: Any) -> float:
428
436
  try:
429
- return datetime.fromisoformat(text)
430
- except ValueError:
431
- return None
437
+ val = float(value)
438
+ except (TypeError, ValueError):
439
+ return 0.0
440
+ if val <= 0:
441
+ return 0.0
442
+ # Heuristic: large values are bytes, smaller ones are already MiB.
443
+ return _bytes_to_mib(val) if val > 10000 else val
432
444
 
433
445
 
434
- def _templates_need_refresh(config: Dict[str, Any]) -> bool:
435
- if not config or not config.get("token_value"):
436
- return False
437
- last = _parse_iso_timestamp(config.get("templates_last_refreshed") or "")
438
- if not last:
439
- return True
440
- return (datetime.now(timezone.utc) - last).total_seconds() >= TEMPLATES_REFRESH_INTERVAL_S
446
+ def _pick_container_ram_mib(kind: str, cfg: Dict[str, Any], entry: Dict[str, Any]) -> float:
447
+ for candidate in (cfg.get("memory"), entry.get("maxmem"), entry.get("mem")):
448
+ ram = _to_mib(candidate)
449
+ if ram:
450
+ return ram
451
+ return 0.0
441
452
 
442
453
 
443
- def _ensure_templates_refreshed_on_startup(config: Dict[str, Any]) -> None:
444
- if not _templates_need_refresh(config):
445
- return
454
+ def _safe_float(value: Any) -> float:
446
455
  try:
447
- client = _build_proxmox_client_from_config(config)
448
- node = config.get("node") or _pick_node(client)
449
- storages = client.nodes(node).storage.get()
450
- templates = _list_templates(client, node, storages)
451
- if templates:
452
- config["templates"] = templates
453
- config["templates_last_refreshed"] = _current_time_iso()
454
- _save_config(config)
455
- except Exception as exc:
456
- logger.warning("Unable to refresh Proxmox templates on startup: %s", exc)
456
+ return float(value)
457
+ except (TypeError, ValueError):
458
+ return 0.0
457
459
 
458
460
 
459
- def _pick_storage(storages: Iterable[Dict[str, Any]]) -> str:
460
- candidates = [s for s in storages if "rootdir" in s.get("content", "") and s.get("avail", 0) > 0]
461
- if not candidates:
462
- candidates = [s for s in storages if "rootdir" in s.get("content", "")]
463
- if not candidates:
464
- return ""
465
- candidates.sort(key=lambda entry: entry.get("avail", 0), reverse=True)
466
- return candidates[0].get("storage", "")
461
+ def _pick_container_cpu_share(kind: str, cfg: Dict[str, Any], entry: Dict[str, Any]) -> float:
462
+ if kind == "lxc":
463
+ for key in ("cpulimit", "cores", "cpus"):
464
+ val = _safe_float(cfg.get(key))
465
+ if val:
466
+ return val
467
+ return _safe_float(entry.get("cpus"))
468
+
469
+ cores = _safe_float(cfg.get("cores"))
470
+ sockets = _safe_float(cfg.get("sockets")) or 1.0
471
+ if cores:
472
+ return cores * sockets
473
+ val = _safe_float(cfg.get("vcpus"))
474
+ if val:
475
+ return val
476
+ val = _safe_float(entry.get("cpus") or entry.get("maxcpu"))
477
+ if val:
478
+ return val
479
+ return 0.0
480
+
481
+
482
+ def _parse_onboot_flag(value: Any) -> bool:
483
+ text = str(value).strip().lower()
484
+ return text in {"1", "true", "yes", "on"}
467
485
 
468
486
 
469
487
  def _write_bridge_config(bridge: str) -> None:
@@ -580,10 +598,76 @@ def _ensure_containers_dir() -> None:
580
598
  CONTAINERS_DIR.mkdir(parents=True, exist_ok=True)
581
599
 
582
600
 
583
- def _invalidate_managed_containers_cache() -> None:
584
- with _MANAGED_CONTAINERS_CACHE_LOCK:
585
- _MANAGED_CONTAINERS_CACHE["timestamp"] = 0.0
586
- _MANAGED_CONTAINERS_CACHE["summary"] = None
601
+ def _copy_summary(summary: Dict[str, Any]) -> Dict[str, Any]:
602
+ snapshot = summary.copy()
603
+ containers = summary.get("containers")
604
+ if isinstance(containers, list):
605
+ snapshot["containers"] = [entry.copy() for entry in containers]
606
+ unmanaged = summary.get("unmanaged_containers")
607
+ if isinstance(unmanaged, list):
608
+ snapshot["unmanaged_containers"] = [entry.copy() for entry in unmanaged]
609
+ return snapshot
610
+
611
+
612
+ def _refresh_container_statuses(records: List[Dict[str, Any]], config: Dict[str, Any] | None) -> None:
613
+ if not records or not config:
614
+ return
615
+ try:
616
+ proxmox = _connect_proxmox(config)
617
+ node = _get_node_from_config(config)
618
+ statuses = {
619
+ str(ct.get("vmid")): (ct.get("status") or "unknown").lower()
620
+ for ct in proxmox.nodes(node).lxc.get()
621
+ }
622
+ except Exception as exc: # pragma: no cover - best effort
623
+ logger.debug("Failed to refresh container statuses: %s", exc)
624
+ return
625
+ for record in records:
626
+ vmid = record.get("vmid")
627
+ if vmid is None:
628
+ continue
629
+ try:
630
+ vmid_key = str(int(vmid))
631
+ except (ValueError, TypeError):
632
+ continue
633
+ status = statuses.get(vmid_key)
634
+ if status:
635
+ record["status"] = status
636
+
637
+
638
+ def _initialize_managed_containers_state(force: bool = False) -> Dict[str, Any]:
639
+ with _MANAGED_CONTAINERS_STATE_LOCK:
640
+ if _MANAGED_CONTAINERS_STATE["initialized"] and not force:
641
+ base = _MANAGED_CONTAINERS_STATE["base_summary"]
642
+ return _copy_summary(base) if base else {}
643
+
644
+ config = _load_config()
645
+ records = _load_managed_container_records()
646
+ _refresh_container_statuses(records, config)
647
+ base_summary = _build_full_container_summary(records, config)
648
+ record_map: Dict[str, Dict[str, Any]] = {}
649
+ for record in records:
650
+ vmid = record.get("vmid")
651
+ if vmid is None:
652
+ continue
653
+ try:
654
+ record_map[str(int(vmid))] = record
655
+ except (TypeError, ValueError):
656
+ continue
657
+
658
+ initial_totals = {
659
+ "ram_mib": int(base_summary.get("total_ram_mib") or 0),
660
+ "disk_gib": int(base_summary.get("total_disk_gib") or 0),
661
+ "cpu_share": float(base_summary.get("total_cpu_share") or 0.0),
662
+ }
663
+
664
+ with _MANAGED_CONTAINERS_STATE_LOCK:
665
+ _MANAGED_CONTAINERS_STATE["initialized"] = True
666
+ _MANAGED_CONTAINERS_STATE["base_summary"] = base_summary
667
+ _MANAGED_CONTAINERS_STATE["initial_totals"] = initial_totals
668
+ _MANAGED_CONTAINERS_STATE["records"] = record_map
669
+ _MANAGED_CONTAINERS_STATE["pending"] = {}
670
+ return _copy_summary(base_summary)
587
671
 
588
672
 
589
673
  def _load_managed_container_records() -> List[Dict[str, Any]]:
@@ -599,104 +683,35 @@ def _load_managed_container_records() -> List[Dict[str, Any]]:
599
683
  return records
600
684
 
601
685
 
602
- def _extract_host_totals(node_status: Dict[str, Any] | None) -> Tuple[Optional[int], Optional[int], Optional[int]]:
603
- if not node_status:
604
- return None, None, None
605
- memory_total = node_status.get("memory", {}).get("total")
606
- disk_total = node_status.get("disk", {}).get("total")
607
- cpu_cores = node_status.get("cpuinfo", {}).get("cores")
608
- host_ram = _bytes_to_mib(memory_total) if memory_total is not None else None
609
- host_disk = _bytes_to_gib(disk_total) if disk_total is not None else None
610
- host_cpu = _to_int(cpu_cores) if cpu_cores is not None else None
611
- return host_ram, host_disk, host_cpu
612
-
613
-
614
- def _build_unmanaged_container_entry(
615
- ct: Dict[str, Any],
616
- cfg: Dict[str, Any],
617
- vmid: str,
618
- default_storage: str | None,
619
- *,
620
- entry_type: str = "lxc",
621
- ) -> Dict[str, Any]:
622
- ram_mib = _to_int(cfg.get("memory")) or _bytes_to_mib(ct.get("maxmem"))
623
- disk_gib = int(round(_pick_disk_gib(entry_type, cfg, ct)))
624
- cpu_share = _to_float(
625
- cfg.get("cpulimit")
626
- or cfg.get("cpus")
627
- or cfg.get("cores")
628
- or ct.get("cpus")
629
- or ct.get("cpu")
630
- )
631
- hostname = ct.get("name") or cfg.get("hostname") or f"ct{vmid}"
632
- storage = _pick_storage(entry_type, cfg, ct)
633
- status = (ct.get("status") or "unknown").lower()
634
- reserved = _parse_bool_flag(cfg.get("onboot"))
635
- storage_matches_default = _storage_matches_default(storage, default_storage)
636
- return {
637
- "vmid": vmid,
638
- "hostname": hostname,
639
- "template": cfg.get("ostemplate"),
640
- "storage": storage,
641
- "disk_gib": disk_gib,
642
- "ram_mib": ram_mib,
643
- "cpu_share": cpu_share,
644
- "reserve_on_boot": reserved,
645
- "matches_default_storage": storage_matches_default,
646
- "type": entry_type,
647
- "status": status,
648
- "managed": False,
649
- }
650
-
651
-
652
- def _get_storage_snapshot(proxmox: Any, node: str, storage_name: str | None) -> Dict[str, Any] | None:
653
- if not storage_name:
654
- return None
655
- try:
656
- storage = proxmox.nodes(node).storage(storage_name).status.get()
657
- except Exception as exc:
658
- logger.debug("Unable to read storage status %s:%s: %s", node, storage_name, exc)
659
- return None
660
- total_bytes = storage.get("total")
661
- avail_bytes = storage.get("avail")
662
- used_bytes = storage.get("used")
663
- return {
664
- "storage": storage_name,
665
- "total_gib": _bytes_to_gib(total_bytes) if total_bytes is not None else None,
666
- "avail_gib": _bytes_to_gib(avail_bytes) if avail_bytes is not None else None,
667
- "used_gib": _bytes_to_gib(used_bytes) if used_bytes is not None else None,
668
- }
669
-
670
-
671
- def _storage_matches_default(storage_name: Any, default_storage: str | None) -> bool:
672
- if not default_storage:
673
- return True
674
- return _normalize_storage_name(storage_name) == _normalize_storage_name(default_storage)
675
-
676
-
677
- def _build_managed_containers_summary(
678
- records: List[Dict[str, Any]],
679
- unmanaged_records: List[Dict[str, Any]],
680
- node_status: Dict[str, Any] | None,
681
- storage_snapshot: Dict[str, Any] | None,
682
- default_storage: str | None,
683
- ) -> Dict[str, Any]:
684
- managed_containers: List[Dict[str, Any]] = []
686
+ def _build_managed_containers_summary(records: List[Dict[str, Any]]) -> Dict[str, Any]:
685
687
  total_ram = 0
686
688
  total_disk = 0
687
689
  total_cpu_share = 0.0
690
+ containers: List[Dict[str, Any]] = []
691
+
692
+ def _as_int(value: Any) -> int:
693
+ try:
694
+ return int(value)
695
+ except (TypeError, ValueError):
696
+ return 0
688
697
 
689
- for record in sorted(records, key=lambda entry: _to_int(entry.get("vmid"))):
690
- ram_mib = _to_int(record.get("ram_mib"))
691
- disk_gib = _to_int(record.get("disk_gib"))
692
- cpu_share = _to_float(record.get("cpus"))
698
+ def _as_float(value: Any) -> float:
699
+ try:
700
+ return float(value)
701
+ except (TypeError, ValueError):
702
+ return 0.0
703
+
704
+ for record in sorted(records, key=lambda entry: _as_int(entry.get("vmid"))):
705
+ ram_mib = _as_int(record.get("ram_mib"))
706
+ disk_gib = _as_int(record.get("disk_gib"))
707
+ cpu_share = _as_float(record.get("cpus"))
693
708
  total_ram += ram_mib
694
709
  total_disk += disk_gib
695
710
  total_cpu_share += cpu_share
696
711
  status = (record.get("status") or "unknown").lower()
697
- managed_containers.append(
712
+ containers.append(
698
713
  {
699
- "vmid": str(_to_int(record.get("vmid"))) if record.get("vmid") is not None else None,
714
+ "vmid": str(_as_int(record.get("vmid"))) if record.get("vmid") is not None else None,
700
715
  "device_id": record.get("device_id"),
701
716
  "hostname": record.get("hostname"),
702
717
  "template": record.get("template"),
@@ -706,162 +721,279 @@ def _build_managed_containers_summary(
706
721
  "cpu_share": cpu_share,
707
722
  "created_at": record.get("created_at"),
708
723
  "status": status,
709
- "managed": True,
710
- "matches_default_storage": _storage_matches_default(record.get("storage"), default_storage),
711
724
  }
712
725
  )
713
726
 
714
- unmanaged_total_ram = sum(
715
- _to_int(entry.get("ram_mib"))
716
- for entry in unmanaged_records
717
- if entry.get("reserve_on_boot")
718
- )
719
- unmanaged_total_disk = sum(
720
- _to_int(entry.get("disk_gib"))
721
- for entry in unmanaged_records
722
- if entry.get("matches_default_storage")
723
- )
724
- unmanaged_total_cpu = sum(
725
- _to_float(entry.get("cpu_share"))
726
- for entry in unmanaged_records
727
- if entry.get("reserve_on_boot")
728
- )
729
-
730
- allocated_ram = total_ram + unmanaged_total_ram
731
- allocated_disk = total_disk + unmanaged_total_disk
732
- allocated_cpu = total_cpu_share + unmanaged_total_cpu
733
-
734
- host_ram_node, host_disk_node, host_cpu_node = _extract_host_totals(node_status)
735
- storage_host_disk = storage_snapshot.get("total_gib") if storage_snapshot else None
736
- host_ram = host_ram_node
737
- host_disk = storage_host_disk if storage_host_disk is not None else host_disk_node
738
- host_cpu = host_cpu_node
739
-
740
-
741
727
  return {
742
728
  "updated_at": datetime.utcnow().isoformat() + "Z",
743
- "count": len(managed_containers),
729
+ "count": len(containers),
744
730
  "total_ram_mib": total_ram,
745
731
  "total_disk_gib": total_disk,
746
732
  "total_cpu_share": round(total_cpu_share, 2),
747
- "containers": managed_containers,
748
- "unmanaged_containers": unmanaged_records,
749
- "unmanaged_count": len(unmanaged_records),
750
- "allocated_ram_mib": allocated_ram,
751
- "allocated_disk_gib": allocated_disk,
752
- "allocated_cpu_share": round(allocated_cpu, 2),
753
- "available_ram_mib": _calculate_available(host_ram, allocated_ram) if host_ram is not None else None,
754
- "available_disk_gib": _calculate_available(host_disk, allocated_disk) if host_disk is not None else None,
755
- "available_cpu_share": _calculate_available(host_cpu, allocated_cpu) if host_cpu is not None else None,
756
- "host_total_ram_mib": host_ram,
757
- "host_total_disk_gib": host_disk,
758
- "host_total_cpu_cores": host_cpu,
759
- "default_storage": default_storage,
760
- "default_storage_snapshot": storage_snapshot,
733
+ "containers": containers,
761
734
  }
762
735
 
763
736
 
764
- def _get_managed_containers_summary(force: bool = False) -> Dict[str, Any]:
765
- def _refresh_container_statuses(
766
- records: List[Dict[str, Any]],
767
- config: Dict[str, Any] | None,
768
- managed_vmids: Set[str],
769
- default_storage: str | None,
770
- ) -> Tuple[Dict[str, str], List[Dict[str, Any]], Dict[str, Any] | None, Dict[str, Any] | None]:
771
- statuses: Dict[str, str] = {}
772
- unmanaged: List[Dict[str, Any]] = []
773
- node_status: Dict[str, Any] | None = None
774
- if not config:
775
- return statuses, unmanaged, node_status, None
776
- proxmox = None
777
- node = None
778
- try:
779
- proxmox = _connect_proxmox(config)
780
- node = _get_node_from_config(config)
781
- node_status = proxmox.nodes(node).status.get()
782
- for ct in proxmox.nodes(node).lxc.get():
783
- vmid_val = ct.get("vmid")
784
- if vmid_val is None:
785
- continue
786
- vmid_key = str(_to_int(vmid_val))
787
- statuses[vmid_key] = (ct.get("status") or "unknown").lower()
788
- if vmid_key in managed_vmids:
789
- continue
790
- cfg: Dict[str, Any] = {}
791
- try:
792
- cfg = proxmox.nodes(node).lxc(vmid_key).config.get() or {}
793
- except Exception as exc: # pragma: no cover - best effort
794
- logger.debug("Failed to read config for container %s: %s", vmid_key, exc)
795
- description = (cfg.get("description") or "")
796
- if MANAGED_MARKER in description:
797
- continue
798
- unmanaged.append(
799
- _build_unmanaged_container_entry(ct, cfg, vmid_key, default_storage, entry_type="lxc")
800
- )
801
- for vm in proxmox.nodes(node).qemu.get():
802
- vmid_val = vm.get("vmid")
803
- if vmid_val is None:
804
- continue
805
- vmid_key = str(_to_int(vmid_val))
806
- statuses[vmid_key] = (vm.get("status") or "unknown").lower()
807
- if vmid_key in managed_vmids:
808
- continue
809
- cfg: Dict[str, Any] = {}
810
- try:
811
- cfg = proxmox.nodes(node).qemu(vmid_key).config.get() or {}
812
- except Exception as exc:
813
- logger.debug("Failed to read config for VM %s: %s", vmid_key, exc)
814
- description = (cfg.get("description") or "")
815
- if MANAGED_MARKER in description:
816
- continue
817
- unmanaged.append(
818
- _build_unmanaged_container_entry(
819
- vm,
820
- cfg,
821
- vmid_key,
822
- default_storage,
823
- entry_type="qemu",
824
- )
825
- )
826
- except Exception as exc: # pragma: no cover - best effort
827
- logger.debug("Failed to refresh container statuses: %s", exc)
828
- storage_snapshot = (
829
- _get_storage_snapshot(proxmox, node, default_storage)
830
- if proxmox and node
831
- else None
832
- )
833
- return statuses, unmanaged, node_status, storage_snapshot
834
-
835
- now = time.monotonic()
836
- with _MANAGED_CONTAINERS_CACHE_LOCK:
837
- cache_ts = _MANAGED_CONTAINERS_CACHE["timestamp"]
838
- cached = _MANAGED_CONTAINERS_CACHE["summary"]
839
- if not force and cached and now - cache_ts < _MANAGED_CONTAINERS_CACHE_TTL_S:
840
- return cached
841
- config = _load_config()
842
- records = _load_managed_container_records()
843
- managed_vmids: Set[str] = {
844
- str(_to_int(record.get("vmid"))) for record in records if record.get("vmid") is not None
845
- }
846
- default_storage = config.get("default_storage") if config else None
847
- statuses, unmanaged, node_status, storage_snapshot = _refresh_container_statuses(
848
- records, config, managed_vmids, default_storage
849
- )
737
+ def _build_full_container_summary(records: List[Dict[str, Any]], config: Dict[str, Any]) -> Dict[str, Any]:
738
+ base_summary = _build_managed_containers_summary(records)
739
+ if not config or not config.get("token_value"):
740
+ return base_summary
741
+
742
+ try:
743
+ proxmox = _connect_proxmox(config)
744
+ node = _get_node_from_config(config)
745
+ except Exception as exc: # pragma: no cover - best effort
746
+ logger.debug("Unable to extend container summary with Proxmox data: %s", exc)
747
+ return base_summary
748
+
749
+ default_storage = (config.get("default_storage") or "").strip()
750
+ record_map: Dict[str, Dict[str, Any]] = {}
850
751
  for record in records:
851
752
  vmid = record.get("vmid")
852
753
  if vmid is None:
853
754
  continue
854
- vmid_key = str(_to_int(vmid))
855
- status = statuses.get(vmid_key)
856
- if status:
857
- record["status"] = status
858
- summary = _build_managed_containers_summary(records, unmanaged, node_status, storage_snapshot, default_storage)
859
- with _MANAGED_CONTAINERS_CACHE_LOCK:
860
- _MANAGED_CONTAINERS_CACHE["timestamp"] = now
861
- _MANAGED_CONTAINERS_CACHE["summary"] = summary
755
+ try:
756
+ vmid_key = str(int(vmid))
757
+ except (ValueError, TypeError):
758
+ continue
759
+ record_map[vmid_key] = record
760
+
761
+ managed_entries: List[Dict[str, Any]] = []
762
+ unmanaged_entries: List[Dict[str, Any]] = []
763
+ allocated_ram = 0.0
764
+ allocated_disk = 0.0
765
+ allocated_cpu = 0.0
766
+
767
+ def _process_entries(kind: str, getter: str) -> None:
768
+ nonlocal allocated_ram, allocated_disk, allocated_cpu
769
+ entries = getattr(proxmox.nodes(node), getter).get()
770
+ for entry in entries:
771
+ vmid = entry.get("vmid")
772
+ if vmid is None:
773
+ continue
774
+ vmid_str = str(vmid)
775
+ cfg: Dict[str, Any] = {}
776
+ try:
777
+ cfg = getattr(proxmox.nodes(node), getter)(vmid_str).config.get() or {}
778
+ except Exception as exc: # pragma: no cover - best effort
779
+ logger.debug("Failed to load %s config for %s: %s", kind, vmid_str, exc)
780
+ cfg = {}
781
+
782
+ record = record_map.get(vmid_str)
783
+ description = cfg.get("description") or ""
784
+ managed = bool(record) or MANAGED_MARKER in description
785
+ hostname = entry.get("name") or cfg.get("hostname") or (record.get("hostname") if record else None)
786
+ storage = _pick_container_storage(kind, cfg, entry)
787
+ disk_gib = _pick_container_disk_gib(kind, cfg, entry)
788
+ ram_mib = _pick_container_ram_mib(kind, cfg, entry)
789
+ cpu_share = _pick_container_cpu_share(kind, cfg, entry)
790
+ reserve_on_boot = _parse_onboot_flag(cfg.get("onboot"))
791
+ matches_default_storage = bool(default_storage and storage and storage.lower() == default_storage.lower())
792
+
793
+ base_entry = {
794
+ "type": kind,
795
+ "vmid": vmid_str,
796
+ "hostname": hostname,
797
+ "status": (entry.get("status") or "unknown").lower(),
798
+ "storage": storage,
799
+ "disk_gib": disk_gib,
800
+ "ram_mib": ram_mib,
801
+ "cpu_share": cpu_share,
802
+ "reserve_on_boot": reserve_on_boot,
803
+ "matches_default_storage": matches_default_storage,
804
+ "managed": managed,
805
+ }
806
+
807
+ if managed:
808
+ merged = base_entry | {
809
+ "device_id": record.get("device_id") if record else None,
810
+ "template": record.get("template") if record else None,
811
+ "created_at": record.get("created_at") if record else None,
812
+ }
813
+ managed_entries.append(merged)
814
+ else:
815
+ unmanaged_entries.append(base_entry)
816
+
817
+ if managed or reserve_on_boot:
818
+ allocated_ram += ram_mib
819
+ allocated_cpu += cpu_share
820
+ if managed or matches_default_storage:
821
+ allocated_disk += disk_gib
822
+
823
+ _process_entries("lxc", "lxc")
824
+ _process_entries("qemu", "qemu")
825
+
826
+ memory_info = {}
827
+ cpu_info = {}
828
+ try:
829
+ node_status = proxmox.nodes(node).status.get()
830
+ memory_info = node_status.get("memory") or {}
831
+ cpu_info = node_status.get("cpuinfo") or {}
832
+ except Exception as exc: # pragma: no cover - best effort
833
+ logger.debug("Unable to read node status for resource totals: %s", exc)
834
+
835
+ host_total_ram_mib = _bytes_to_mib(memory_info.get("total"))
836
+ used_ram_mib = _bytes_to_mib(memory_info.get("used"))
837
+ available_ram_mib = max(host_total_ram_mib - used_ram_mib, 0.0) if host_total_ram_mib else None
838
+ host_total_cpu_cores = _safe_float(cpu_info.get("cores"))
839
+ available_cpu_share = max(host_total_cpu_cores - allocated_cpu, 0.0) if host_total_cpu_cores else None
840
+
841
+ host_total_disk_gib = None
842
+ available_disk_gib = None
843
+ if default_storage:
844
+ try:
845
+ storage_status = proxmox.nodes(node).storage(default_storage).status.get()
846
+ host_total_disk_gib = _bytes_to_gib(storage_status.get("total"))
847
+ available_disk_gib = _bytes_to_gib(storage_status.get("avail"))
848
+ except Exception as exc: # pragma: no cover - best effort
849
+ logger.debug("Unable to read storage status for %s: %s", default_storage, exc)
850
+
851
+ summary = base_summary.copy()
852
+ summary["containers"] = managed_entries
853
+ summary["count"] = len(managed_entries)
854
+ summary["total_ram_mib"] = int(sum(entry.get("ram_mib") or 0 for entry in managed_entries))
855
+ summary["total_disk_gib"] = int(sum(entry.get("disk_gib") or 0 for entry in managed_entries))
856
+ summary["total_cpu_share"] = round(sum(entry.get("cpu_share") or 0 for entry in managed_entries), 2)
857
+ summary["unmanaged_containers"] = unmanaged_entries
858
+ summary["allocated_ram_mib"] = round(allocated_ram, 2)
859
+ summary["allocated_disk_gib"] = round(allocated_disk, 2)
860
+ summary["allocated_cpu_share"] = round(allocated_cpu, 2)
861
+ summary["host_total_ram_mib"] = int(host_total_ram_mib) if host_total_ram_mib else None
862
+ summary["host_total_disk_gib"] = host_total_disk_gib
863
+ summary["host_total_cpu_cores"] = host_total_cpu_cores if host_total_cpu_cores else None
864
+ summary["available_ram_mib"] = int(available_ram_mib) if available_ram_mib is not None else None
865
+ summary["available_disk_gib"] = available_disk_gib
866
+ summary["available_cpu_share"] = available_cpu_share if available_cpu_share is not None else None
867
+ return summary
868
+
869
+
870
+ def _pending_totals(pending: Iterable[Dict[str, Any]]) -> Tuple[int, int, float]:
871
+ ram_total = 0
872
+ disk_total = 0
873
+ cpu_total = 0.0
874
+ for entry in pending:
875
+ ram_total += int(entry.get("ram_mib") or 0)
876
+ disk_total += int(entry.get("disk_gib") or 0)
877
+ cpu_total += float(entry.get("cpu_share") or 0.0)
878
+ return ram_total, disk_total, cpu_total
879
+
880
+
881
+ def _compose_managed_containers_summary(
882
+ records: Iterable[Dict[str, Any]],
883
+ pending: Iterable[Dict[str, Any]],
884
+ base_summary: Dict[str, Any],
885
+ initial_totals: Dict[str, Any],
886
+ ) -> Dict[str, Any]:
887
+ managed_summary = _build_managed_containers_summary(list(records))
888
+ summary = _copy_summary(base_summary)
889
+ base_by_vmid = {
890
+ str(entry.get("vmid")): entry
891
+ for entry in base_summary.get("containers", [])
892
+ if entry.get("vmid") is not None
893
+ }
894
+ default_storage = summary.get("default_storage")
895
+ merged_containers: List[Dict[str, Any]] = []
896
+ for entry in managed_summary["containers"]:
897
+ vmid = str(entry.get("vmid")) if entry.get("vmid") is not None else None
898
+ base_entry = base_by_vmid.get(vmid) if vmid is not None else None
899
+ if base_entry:
900
+ merged = base_entry.copy()
901
+ merged.update(entry)
902
+ else:
903
+ merged = entry.copy()
904
+ merged.setdefault("managed", True)
905
+ merged.setdefault("type", "lxc")
906
+ if default_storage and merged.get("storage"):
907
+ merged["matches_default_storage"] = (
908
+ str(merged["storage"]).lower() == str(default_storage).lower()
909
+ )
910
+ merged_containers.append(merged)
911
+ summary["updated_at"] = managed_summary["updated_at"]
912
+ summary["count"] = managed_summary["count"]
913
+ summary["total_ram_mib"] = managed_summary["total_ram_mib"]
914
+ summary["total_disk_gib"] = managed_summary["total_disk_gib"]
915
+ summary["total_cpu_share"] = managed_summary["total_cpu_share"]
916
+ summary["containers"] = merged_containers
917
+
918
+ pending_ram, pending_disk, pending_cpu = _pending_totals(pending)
919
+ delta_ram = managed_summary["total_ram_mib"] - int(initial_totals.get("ram_mib") or 0)
920
+ delta_disk = managed_summary["total_disk_gib"] - int(initial_totals.get("disk_gib") or 0)
921
+ delta_cpu = managed_summary["total_cpu_share"] - float(initial_totals.get("cpu_share") or 0.0)
922
+
923
+ if "allocated_ram_mib" in summary and summary.get("allocated_ram_mib") is not None:
924
+ summary["allocated_ram_mib"] = round(float(summary["allocated_ram_mib"]) + delta_ram + pending_ram, 2)
925
+ if "allocated_disk_gib" in summary and summary.get("allocated_disk_gib") is not None:
926
+ summary["allocated_disk_gib"] = round(float(summary["allocated_disk_gib"]) + delta_disk + pending_disk, 2)
927
+ if "allocated_cpu_share" in summary and summary.get("allocated_cpu_share") is not None:
928
+ summary["allocated_cpu_share"] = round(float(summary["allocated_cpu_share"]) + delta_cpu + pending_cpu, 2)
929
+
930
+ if "available_ram_mib" in summary and summary.get("available_ram_mib") is not None:
931
+ available_ram = float(summary["available_ram_mib"]) - delta_ram - pending_ram
932
+ summary["available_ram_mib"] = max(int(available_ram), 0)
933
+ if "available_disk_gib" in summary and summary.get("available_disk_gib") is not None:
934
+ available_disk = float(summary["available_disk_gib"]) - delta_disk - pending_disk
935
+ summary["available_disk_gib"] = max(available_disk, 0.0)
936
+ if "available_cpu_share" in summary and summary.get("available_cpu_share") is not None:
937
+ available_cpu = float(summary["available_cpu_share"]) - delta_cpu - pending_cpu
938
+ summary["available_cpu_share"] = max(available_cpu, 0.0)
939
+
862
940
  return summary
863
941
 
864
942
 
943
+ def _get_managed_containers_summary(force: bool = False) -> Dict[str, Any]:
944
+ _initialize_managed_containers_state(force=force)
945
+ with _MANAGED_CONTAINERS_STATE_LOCK:
946
+ base_summary = _MANAGED_CONTAINERS_STATE.get("base_summary") or {}
947
+ records = list(_MANAGED_CONTAINERS_STATE.get("records", {}).values())
948
+ pending = list(_MANAGED_CONTAINERS_STATE.get("pending", {}).values())
949
+ initial_totals = _MANAGED_CONTAINERS_STATE.get("initial_totals", {})
950
+ if not base_summary:
951
+ return {}
952
+ return _compose_managed_containers_summary(records, pending, base_summary, initial_totals)
953
+
954
+
955
+ def _reserve_container_resources(payload: Dict[str, Any], *, device_id: str, request_id: Optional[str]) -> str:
956
+ _initialize_managed_containers_state()
957
+ ram_mib = int(payload.get("ram_mib") or 0)
958
+ disk_gib = int(payload.get("disk_gib") or 0)
959
+ cpu_share = float(payload.get("cpus") or 0.0)
960
+ reservation_id = secrets.token_hex(8)
961
+ with _MANAGED_CONTAINERS_STATE_LOCK:
962
+ base_summary = _MANAGED_CONTAINERS_STATE.get("base_summary") or {}
963
+ records = list(_MANAGED_CONTAINERS_STATE.get("records", {}).values())
964
+ pending = list(_MANAGED_CONTAINERS_STATE.get("pending", {}).values())
965
+ initial_totals = _MANAGED_CONTAINERS_STATE.get("initial_totals", {})
966
+ summary = _compose_managed_containers_summary(records, pending, base_summary, initial_totals)
967
+
968
+ available_ram = summary.get("available_ram_mib")
969
+ if available_ram is not None and ram_mib > available_ram:
970
+ raise RuntimeError("Not enough RAM to create this container.")
971
+ available_disk = summary.get("available_disk_gib")
972
+ if available_disk is not None and disk_gib > available_disk:
973
+ raise RuntimeError("Not enough disk space to create this container.")
974
+ available_cpu = summary.get("available_cpu_share")
975
+ if available_cpu is not None and cpu_share > available_cpu:
976
+ raise RuntimeError("Not enough CPU capacity to create this container.")
977
+
978
+ _MANAGED_CONTAINERS_STATE["pending"][reservation_id] = {
979
+ "reservation_id": reservation_id,
980
+ "device_id": device_id,
981
+ "request_id": request_id,
982
+ "ram_mib": ram_mib,
983
+ "disk_gib": disk_gib,
984
+ "cpu_share": cpu_share,
985
+ "created_at": datetime.utcnow().isoformat() + "Z",
986
+ }
987
+ return reservation_id
988
+
989
+
990
+ def _release_container_reservation(reservation_id: Optional[str]) -> None:
991
+ if not reservation_id:
992
+ return
993
+ with _MANAGED_CONTAINERS_STATE_LOCK:
994
+ _MANAGED_CONTAINERS_STATE.get("pending", {}).pop(reservation_id, None)
995
+
996
+
865
997
  def _format_rootfs(storage: str, disk_gib: int, storage_type: str) -> str:
866
998
  if storage_type in ("lvm", "lvmthin"):
867
999
  return f"{storage}:{disk_gib}"
@@ -1145,23 +1277,6 @@ def _start_container(proxmox: Any, node: str, vmid: int) -> Tuple[Dict[str, Any]
1145
1277
  logger.info("Container %s already running (%ss)", vmid, uptime)
1146
1278
  return status, 0.0
1147
1279
 
1148
- node_status = proxmox.nodes(node).status.get()
1149
- mem_total_mb = int(node_status.get("memory", {}).get("total", 0) // (1024**2))
1150
- cores_total = int(node_status.get("cpuinfo", {}).get("cores", 0))
1151
-
1152
- running = _list_running_managed(proxmox, node)
1153
- used_mem_mb = sum(int(cfg.get("memory", 0)) for _, cfg in running)
1154
- used_cores = sum(int(cfg.get("cores", 0)) for _, cfg in running)
1155
-
1156
- target_cfg = proxmox.nodes(node).lxc(vmid).config.get()
1157
- target_mem_mb = int(target_cfg.get("memory", 0))
1158
- target_cores = int(target_cfg.get("cores", 0))
1159
-
1160
- if mem_total_mb and used_mem_mb + target_mem_mb > mem_total_mb:
1161
- raise RuntimeError("Not enough RAM to start this container safely.")
1162
- if cores_total and used_cores + target_cores > cores_total:
1163
- raise RuntimeError("Not enough CPU cores to start this container safely.")
1164
-
1165
1280
  upid = proxmox.nodes(node).lxc(vmid).status.start.post()
1166
1281
  return _wait_for_task(proxmox, node, upid)
1167
1282
 
@@ -1179,11 +1294,25 @@ def _delete_container(proxmox: Any, node: str, vmid: int) -> Tuple[Dict[str, Any
1179
1294
  return _wait_for_task(proxmox, node, upid)
1180
1295
 
1181
1296
 
1182
- def _write_container_record(vmid: int, payload: Dict[str, Any]) -> None:
1297
+ def _register_container_record(vmid: int, payload: Dict[str, Any], reservation_id: Optional[str] = None) -> None:
1298
+ _initialize_managed_containers_state()
1299
+ with _MANAGED_CONTAINERS_STATE_LOCK:
1300
+ if reservation_id:
1301
+ _MANAGED_CONTAINERS_STATE["pending"].pop(reservation_id, None)
1302
+ _MANAGED_CONTAINERS_STATE["records"][str(vmid)] = payload.copy()
1303
+
1304
+
1305
+ def _unregister_container_record(vmid: int) -> None:
1306
+ _initialize_managed_containers_state()
1307
+ with _MANAGED_CONTAINERS_STATE_LOCK:
1308
+ _MANAGED_CONTAINERS_STATE["records"].pop(str(vmid), None)
1309
+
1310
+
1311
+ def _write_container_record(vmid: int, payload: Dict[str, Any], reservation_id: Optional[str] = None) -> None:
1183
1312
  _ensure_containers_dir()
1184
1313
  path = CONTAINERS_DIR / f"ct-{vmid}.json"
1185
1314
  path.write_text(json.dumps(payload, indent=2), encoding="utf-8")
1186
- _invalidate_managed_containers_cache()
1315
+ _register_container_record(vmid, payload, reservation_id=reservation_id)
1187
1316
 
1188
1317
 
1189
1318
  def _read_container_record(vmid: int) -> Dict[str, Any]:
@@ -1203,7 +1332,7 @@ def _remove_container_record(vmid: int) -> None:
1203
1332
  path = CONTAINERS_DIR / f"ct-{vmid}.json"
1204
1333
  if path.exists():
1205
1334
  path.unlink()
1206
- _invalidate_managed_containers_cache()
1335
+ _unregister_container_record(vmid)
1207
1336
 
1208
1337
 
1209
1338
  def _build_container_payload(message: Dict[str, Any], config: Dict[str, Any]) -> Dict[str, Any]:
@@ -1309,6 +1438,15 @@ def _su_command(user: str, command: str) -> str:
1309
1438
  return f"su - {user} -s /bin/sh -c {shlex.quote(command)}"
1310
1439
 
1311
1440
 
1441
+ def _resolve_portacode_cli_path(vmid: int, user: str) -> str:
1442
+ """Resolve the full path to the portacode CLI inside the container."""
1443
+ res = _run_pct(vmid, _su_command(user, "command -v portacode"))
1444
+ path = (res.get("stdout") or "").strip()
1445
+ if path:
1446
+ return path
1447
+ return "portacode"
1448
+
1449
+
1312
1450
  def _run_pct_check(vmid: int, cmd: str) -> Dict[str, Any]:
1313
1451
  res = _run_pct(vmid, cmd)
1314
1452
  if res["returncode"] != 0:
@@ -1751,12 +1889,14 @@ def _allocate_vmid(proxmox: Any) -> int:
1751
1889
  return int(proxmox.cluster.nextid.get())
1752
1890
 
1753
1891
 
1754
- def _instantiate_container(proxmox: Any, node: str, payload: Dict[str, Any]) -> Tuple[int, float]:
1892
+ def _instantiate_container(
1893
+ proxmox: Any, node: str, payload: Dict[str, Any], vmid: Optional[int] = None
1894
+ ) -> Tuple[int, float]:
1755
1895
  from proxmoxer.core import ResourceException
1756
1896
 
1757
1897
  storage_type = _get_storage_type(proxmox.nodes(node).storage.get(), payload["storage"])
1758
1898
  rootfs = _format_rootfs(payload["storage"], payload["disk_gib"], storage_type)
1759
- vmid = _allocate_vmid(proxmox)
1899
+ vmid = vmid or _allocate_vmid(proxmox)
1760
1900
  if not payload.get("hostname"):
1761
1901
  payload["hostname"] = f"ct{vmid}"
1762
1902
  try:
@@ -1788,6 +1928,43 @@ def _instantiate_container(proxmox: Any, node: str, payload: Dict[str, Any]) ->
1788
1928
  raise RuntimeError(f"Failed to create container: {exc}") from exc
1789
1929
 
1790
1930
 
1931
+ def _cleanup_failed_container(
1932
+ proxmox: Any, node: str, vmid: int, provisioning_id: Optional[str]
1933
+ ) -> None:
1934
+ from proxmoxer.core import ResourceException
1935
+
1936
+ try:
1937
+ cfg = proxmox.nodes(node).lxc(str(vmid)).config.get()
1938
+ except ResourceException as exc:
1939
+ msg = str(exc).lower()
1940
+ if "does not exist" in msg or "not found" in msg:
1941
+ return
1942
+ logger.warning("Failed to inspect container %s after create failure: %s", vmid, exc)
1943
+ return
1944
+ except Exception as exc: # pragma: no cover - best effort
1945
+ logger.warning("Failed to inspect container %s after create failure: %s", vmid, exc)
1946
+ return
1947
+
1948
+ description = (cfg or {}).get("description") or ""
1949
+ if provisioning_id and provisioning_id not in description:
1950
+ logger.warning(
1951
+ "Skipping cleanup for vmid=%s; provisioning marker mismatch", vmid
1952
+ )
1953
+ return
1954
+
1955
+ try:
1956
+ status = proxmox.nodes(node).lxc(str(vmid)).status.current.get()
1957
+ if status.get("status") == "running":
1958
+ _stop_container(proxmox, node, vmid)
1959
+ except Exception as exc: # pragma: no cover - best effort
1960
+ logger.warning("Failed to stop container %s after create failure: %s", vmid, exc)
1961
+
1962
+ try:
1963
+ _delete_container(proxmox, node, vmid)
1964
+ except Exception as exc: # pragma: no cover - best effort
1965
+ logger.warning("Failed to delete container %s after create failure: %s", vmid, exc)
1966
+
1967
+
1791
1968
  class CreateProxmoxContainerHandler(SyncHandler):
1792
1969
  """Provision a new managed LXC container via the Proxmox API."""
1793
1970
 
@@ -1890,219 +2067,281 @@ class CreateProxmoxContainerHandler(SyncHandler):
1890
2067
  _validate_environment,
1891
2068
  )
1892
2069
 
1893
- def _create_container():
1894
- proxmox = _connect_proxmox(config)
1895
- node = config.get("node") or DEFAULT_NODE_NAME
1896
- payload = _build_container_payload(message, config)
1897
- payload["cpulimit"] = float(payload["cpus"])
1898
- payload["cores"] = int(max(math.ceil(payload["cpus"]), 1))
1899
- payload["memory"] = int(payload["ram_mib"])
1900
- payload["node"] = node
1901
- logger.debug(
1902
- "Provisioning container node=%s template=%s ram=%s cpu=%s storage=%s",
1903
- node,
1904
- payload["template"],
1905
- payload["ram_mib"],
1906
- payload["cpus"],
1907
- payload["storage"],
1908
- )
1909
- vmid, _ = _instantiate_container(proxmox, node, payload)
1910
- payload["vmid"] = vmid
1911
- payload["created_at"] = datetime.utcnow().isoformat() + "Z"
1912
- payload["status"] = "creating"
1913
- payload["device_id"] = device_id
1914
- _write_container_record(vmid, payload)
1915
- return proxmox, node, vmid, payload
1916
-
1917
- proxmox, node, vmid, payload = _run_lifecycle_step(
1918
- "create_container",
1919
- "Creating container",
1920
- "Provisioning the LXC container…",
1921
- "Container created.",
1922
- _create_container,
2070
+ node = config.get("node") or DEFAULT_NODE_NAME
2071
+ payload = _build_container_payload(message, config)
2072
+ payload["cpulimit"] = float(payload["cpus"])
2073
+ payload["cores"] = int(max(math.ceil(payload["cpus"]), 1))
2074
+ payload["memory"] = int(payload["ram_mib"])
2075
+ payload["node"] = node
2076
+
2077
+ reservation_id = _reserve_container_resources(
2078
+ payload, device_id=device_id, request_id=request_id
1923
2079
  )
2080
+ provisioning_id = secrets.token_hex(6)
2081
+ payload["description"] = f"{payload.get('description', MANAGED_MARKER)};provisioning_id={provisioning_id}"
1924
2082
 
1925
- def _start_container_step():
1926
- _start_container(proxmox, node, vmid)
2083
+ def _provision_background() -> None:
2084
+ nonlocal current_step_index
2085
+ proxmox: Any = None
2086
+ vmid: Optional[int] = None
2087
+ created_record = False
2088
+ try:
2089
+ def _create_container():
2090
+ nonlocal proxmox, vmid, created_record
2091
+ proxmox = _connect_proxmox(config)
2092
+ logger.debug(
2093
+ "Provisioning container node=%s template=%s ram=%s cpu=%s storage=%s",
2094
+ node,
2095
+ payload["template"],
2096
+ payload["ram_mib"],
2097
+ payload["cpus"],
2098
+ payload["storage"],
2099
+ )
2100
+ try:
2101
+ vmid = _allocate_vmid(proxmox)
2102
+ vmid, _ = _instantiate_container(proxmox, node, payload, vmid=vmid)
2103
+ except Exception:
2104
+ _release_container_reservation(reservation_id)
2105
+ if vmid is not None:
2106
+ _cleanup_failed_container(proxmox, node, vmid, provisioning_id)
2107
+ raise
2108
+ payload["vmid"] = vmid
2109
+ payload["created_at"] = datetime.utcnow().isoformat() + "Z"
2110
+ payload["status"] = "creating"
2111
+ payload["device_id"] = device_id
2112
+ try:
2113
+ _write_container_record(vmid, payload, reservation_id=reservation_id)
2114
+ created_record = True
2115
+ except Exception:
2116
+ _release_container_reservation(reservation_id)
2117
+ _cleanup_failed_container(proxmox, node, vmid, provisioning_id)
2118
+ raise
2119
+ return proxmox, node, vmid, payload
2120
+
2121
+ proxmox, _, vmid, payload_local = _run_lifecycle_step(
2122
+ "create_container",
2123
+ "Creating container",
2124
+ "Provisioning the LXC container…",
2125
+ "Container created.",
2126
+ _create_container,
2127
+ )
1927
2128
 
1928
- _run_lifecycle_step(
1929
- "start_container",
1930
- "Starting container",
1931
- "Booting the container…",
1932
- "Container startup completed.",
1933
- _start_container_step,
1934
- )
1935
- _update_container_record(vmid, {"status": "running"})
2129
+ def _start_container_step():
2130
+ _start_container(proxmox, node, vmid)
1936
2131
 
1937
- def _bootstrap_progress_callback(
1938
- step_index: int,
1939
- total: int,
1940
- step: Dict[str, Any],
1941
- status: str,
1942
- result: Optional[Dict[str, Any]],
1943
- ):
1944
- label = step.get("display_name") or _friendly_step_label(step.get("name", "bootstrap"))
1945
- error_summary = (result or {}).get("error_summary") or (result or {}).get("error")
1946
- attempt = (result or {}).get("attempt")
1947
- if status == "in_progress":
1948
- message_text = f"{label} is running…"
1949
- elif status == "completed":
1950
- message_text = f"{label} completed."
1951
- elif status == "retrying":
1952
- attempt_desc = f" (attempt {attempt})" if attempt else ""
1953
- message_text = f"{label} failed{attempt_desc}; retrying…"
1954
- else:
1955
- message_text = f"{label} failed"
1956
- if error_summary:
1957
- message_text += f": {error_summary}"
1958
- details: Dict[str, Any] = {}
1959
- if attempt:
1960
- details["attempt"] = attempt
1961
- if error_summary:
1962
- details["error_summary"] = error_summary
1963
- _emit_progress_event(
1964
- self,
1965
- step_index=step_index,
1966
- total_steps=total,
1967
- step_name=step.get("name", "bootstrap"),
1968
- step_label=label,
1969
- status=status,
1970
- message=message_text,
1971
- phase="bootstrap",
1972
- request_id=request_id,
1973
- details=details or None,
1974
- on_behalf_of_device=device_id,
1975
- )
2132
+ _run_lifecycle_step(
2133
+ "start_container",
2134
+ "Starting container",
2135
+ "Booting the container…",
2136
+ "Container startup completed.",
2137
+ _start_container_step,
2138
+ )
2139
+ _update_container_record(vmid, {"status": "running"})
2140
+
2141
+ def _bootstrap_progress_callback(
2142
+ step_index: int,
2143
+ total: int,
2144
+ step: Dict[str, Any],
2145
+ status: str,
2146
+ result: Optional[Dict[str, Any]],
2147
+ ):
2148
+ label = step.get("display_name") or _friendly_step_label(step.get("name", "bootstrap"))
2149
+ error_summary = (result or {}).get("error_summary") or (result or {}).get("error")
2150
+ attempt = (result or {}).get("attempt")
2151
+ if status == "in_progress":
2152
+ message_text = f"{label} is running…"
2153
+ elif status == "completed":
2154
+ message_text = f"{label} completed."
2155
+ elif status == "retrying":
2156
+ attempt_desc = f" (attempt {attempt})" if attempt else ""
2157
+ message_text = f"{label} failed{attempt_desc}; retrying…"
2158
+ else:
2159
+ message_text = f"{label} failed"
2160
+ if error_summary:
2161
+ message_text += f": {error_summary}"
2162
+ details: Dict[str, Any] = {}
2163
+ if attempt:
2164
+ details["attempt"] = attempt
2165
+ if error_summary:
2166
+ details["error_summary"] = error_summary
2167
+ _emit_progress_event(
2168
+ self,
2169
+ step_index=step_index,
2170
+ total_steps=total,
2171
+ step_name=step.get("name", "bootstrap"),
2172
+ step_label=label,
2173
+ status=status,
2174
+ message=message_text,
2175
+ phase="bootstrap",
2176
+ request_id=request_id,
2177
+ details=details or None,
2178
+ on_behalf_of_device=device_id,
2179
+ )
1976
2180
 
1977
- public_key, steps = _bootstrap_portacode(
1978
- vmid,
1979
- payload["username"],
1980
- payload["password"],
1981
- payload["ssh_public_key"],
1982
- steps=bootstrap_steps,
1983
- progress_callback=_bootstrap_progress_callback,
1984
- start_index=current_step_index,
1985
- total_steps=total_steps,
1986
- default_public_key=device_public_key if has_device_keypair else None,
1987
- )
1988
- current_step_index += len(bootstrap_steps)
1989
-
1990
- service_installed = False
1991
- if has_device_keypair:
1992
- logger.info(
1993
- "deploying dashboard-provided Portacode keypair (device_id=%s) into container %s",
1994
- device_id,
1995
- vmid,
1996
- )
1997
- _deploy_device_keypair(
1998
- vmid,
1999
- payload["username"],
2000
- device_private_key,
2001
- device_public_key,
2002
- )
2003
- service_installed = True
2004
- service_start_index = current_step_index
2181
+ public_key, steps = _bootstrap_portacode(
2182
+ vmid,
2183
+ payload_local["username"],
2184
+ payload_local["password"],
2185
+ payload_local["ssh_public_key"],
2186
+ steps=bootstrap_steps,
2187
+ progress_callback=_bootstrap_progress_callback,
2188
+ start_index=current_step_index,
2189
+ total_steps=total_steps,
2190
+ default_public_key=device_public_key if has_device_keypair else None,
2191
+ )
2192
+ current_step_index += len(bootstrap_steps)
2193
+
2194
+ service_installed = False
2195
+ if has_device_keypair:
2196
+ logger.info(
2197
+ "deploying dashboard-provided Portacode keypair (device_id=%s) into container %s",
2198
+ device_id,
2199
+ vmid,
2200
+ )
2201
+ _deploy_device_keypair(
2202
+ vmid,
2203
+ payload_local["username"],
2204
+ device_private_key,
2205
+ device_public_key,
2206
+ )
2207
+ service_installed = True
2208
+ service_start_index = current_step_index
2209
+
2210
+ auth_step_name = "setup_device_authentication"
2211
+ auth_label = "Setting up device authentication"
2212
+ _emit_progress_event(
2213
+ self,
2214
+ step_index=service_start_index,
2215
+ total_steps=total_steps,
2216
+ step_name=auth_step_name,
2217
+ step_label=auth_label,
2218
+ status="in_progress",
2219
+ message="Notifying the server of the new device…",
2220
+ phase="service",
2221
+ request_id=request_id,
2222
+ on_behalf_of_device=device_id,
2223
+ )
2224
+ _emit_progress_event(
2225
+ self,
2226
+ step_index=service_start_index,
2227
+ total_steps=total_steps,
2228
+ step_name=auth_step_name,
2229
+ step_label=auth_label,
2230
+ status="completed",
2231
+ message="Authentication metadata recorded.",
2232
+ phase="service",
2233
+ request_id=request_id,
2234
+ on_behalf_of_device=device_id,
2235
+ )
2005
2236
 
2006
- auth_step_name = "setup_device_authentication"
2007
- auth_label = "Setting up device authentication"
2008
- _emit_progress_event(
2009
- self,
2010
- step_index=service_start_index,
2011
- total_steps=total_steps,
2012
- step_name=auth_step_name,
2013
- step_label=auth_label,
2014
- status="in_progress",
2015
- message="Notifying the server of the new device…",
2016
- phase="service",
2017
- request_id=request_id,
2018
- on_behalf_of_device=device_id,
2019
- )
2020
- _emit_progress_event(
2021
- self,
2022
- step_index=service_start_index,
2023
- total_steps=total_steps,
2024
- step_name=auth_step_name,
2025
- step_label=auth_label,
2026
- status="completed",
2027
- message="Authentication metadata recorded.",
2028
- phase="service",
2029
- request_id=request_id,
2030
- on_behalf_of_device=device_id,
2031
- )
2237
+ install_step = service_start_index + 1
2238
+ install_label = "Launching Portacode service"
2239
+ _emit_progress_event(
2240
+ self,
2241
+ step_index=install_step,
2242
+ total_steps=total_steps,
2243
+ step_name="launch_portacode_service",
2244
+ step_label=install_label,
2245
+ status="in_progress",
2246
+ message="Running sudo portacode service install…",
2247
+ phase="service",
2248
+ request_id=request_id,
2249
+ on_behalf_of_device=device_id,
2250
+ )
2032
2251
 
2033
- install_step = service_start_index + 1
2034
- install_label = "Launching Portacode service"
2035
- _emit_progress_event(
2036
- self,
2037
- step_index=install_step,
2038
- total_steps=total_steps,
2039
- step_name="launch_portacode_service",
2040
- step_label=install_label,
2041
- status="in_progress",
2042
- message="Running sudo portacode service install…",
2043
- phase="service",
2044
- request_id=request_id,
2045
- on_behalf_of_device=device_id,
2046
- )
2252
+ cli_path = _resolve_portacode_cli_path(vmid, payload_local["username"])
2253
+ cmd = _su_command(
2254
+ payload_local["username"],
2255
+ f"sudo -S {shlex.quote(cli_path)} service install",
2256
+ )
2257
+ res = _run_pct(vmid, cmd, input_text=payload_local["password"] + "\n")
2258
+
2259
+ if res["returncode"] != 0:
2260
+ _emit_progress_event(
2261
+ self,
2262
+ step_index=install_step,
2263
+ total_steps=total_steps,
2264
+ step_name="launch_portacode_service",
2265
+ step_label=install_label,
2266
+ status="failed",
2267
+ message=f"{install_label} failed: {res.get('stderr') or res.get('stdout')}",
2268
+ phase="service",
2269
+ request_id=request_id,
2270
+ details={
2271
+ "stderr": res.get("stderr"),
2272
+ "stdout": res.get("stdout"),
2273
+ },
2274
+ on_behalf_of_device=device_id,
2275
+ )
2276
+ raise RuntimeError(res.get("stderr") or res.get("stdout") or "Service install failed")
2277
+
2278
+ _emit_progress_event(
2279
+ self,
2280
+ step_index=install_step,
2281
+ total_steps=total_steps,
2282
+ step_name="launch_portacode_service",
2283
+ step_label=install_label,
2284
+ status="completed",
2285
+ message="Portacode service install finished.",
2286
+ phase="service",
2287
+ request_id=request_id,
2288
+ on_behalf_of_device=device_id,
2289
+ )
2047
2290
 
2048
- cmd = _su_command(payload["username"], "sudo -S portacode service install")
2049
- res = _run_pct(vmid, cmd, input_text=payload["password"] + "\n")
2291
+ logger.info(
2292
+ "create_proxmox_container: portacode service install completed inside ct %s", vmid
2293
+ )
2050
2294
 
2051
- if res["returncode"] != 0:
2052
- _emit_progress_event(
2295
+ current_step_index += 2
2296
+
2297
+ _emit_host_event(
2053
2298
  self,
2054
- step_index=install_step,
2055
- total_steps=total_steps,
2056
- step_name="launch_portacode_service",
2057
- step_label=install_label,
2058
- status="failed",
2059
- message=f"{install_label} failed: {res.get('stderr') or res.get('stdout')}",
2060
- phase="service",
2061
- request_id=request_id,
2062
- details={
2063
- "stderr": res.get("stderr"),
2064
- "stdout": res.get("stdout"),
2299
+ {
2300
+ "event": "proxmox_container_created",
2301
+ "success": True,
2302
+ "message": f"Container {vmid} is ready and Portacode key captured.",
2303
+ "ctid": str(vmid),
2304
+ "public_key": public_key,
2305
+ "container": {
2306
+ "vmid": vmid,
2307
+ "hostname": payload_local["hostname"],
2308
+ "template": payload_local["template"],
2309
+ "storage": payload_local["storage"],
2310
+ "disk_gib": payload_local["disk_gib"],
2311
+ "ram_mib": payload_local["ram_mib"],
2312
+ "cpus": payload_local["cpus"],
2313
+ },
2314
+ "setup_steps": steps,
2315
+ "device_id": device_id,
2316
+ "on_behalf_of_device": device_id,
2317
+ "service_installed": service_installed,
2318
+ "request_id": request_id,
2319
+ },
2320
+ )
2321
+ except Exception as exc:
2322
+ if reservation_id and not created_record:
2323
+ _release_container_reservation(reservation_id)
2324
+ if vmid is not None and proxmox and node:
2325
+ _cleanup_failed_container(proxmox, node, vmid, provisioning_id)
2326
+ _remove_container_record(vmid)
2327
+ _emit_host_event(
2328
+ self,
2329
+ {
2330
+ "event": "error",
2331
+ "message": str(exc),
2332
+ "device_id": device_id,
2333
+ "request_id": request_id,
2065
2334
  },
2066
- on_behalf_of_device=device_id,
2067
2335
  )
2068
- raise RuntimeError(res.get("stderr") or res.get("stdout") or "Service install failed")
2069
-
2070
- _emit_progress_event(
2071
- self,
2072
- step_index=install_step,
2073
- total_steps=total_steps,
2074
- step_name="launch_portacode_service",
2075
- step_label=install_label,
2076
- status="completed",
2077
- message="Portacode service install finished.",
2078
- phase="service",
2079
- request_id=request_id,
2080
- on_behalf_of_device=device_id,
2081
- )
2082
-
2083
- logger.info("create_proxmox_container: portacode service install completed inside ct %s", vmid)
2084
2336
 
2085
- current_step_index += 2
2337
+ threading.Thread(target=_provision_background, daemon=True).start()
2086
2338
 
2087
2339
  return {
2088
- "event": "proxmox_container_created",
2340
+ "event": "proxmox_container_accepted",
2089
2341
  "success": True,
2090
- "message": f"Container {vmid} is ready and Portacode key captured.",
2091
- "ctid": str(vmid),
2092
- "public_key": public_key,
2093
- "container": {
2094
- "vmid": vmid,
2095
- "hostname": payload["hostname"],
2096
- "template": payload["template"],
2097
- "storage": payload["storage"],
2098
- "disk_gib": payload["disk_gib"],
2099
- "ram_mib": payload["ram_mib"],
2100
- "cpus": payload["cpus"],
2101
- },
2102
- "setup_steps": steps,
2342
+ "message": "Provisioning accepted; resources reserved.",
2103
2343
  "device_id": device_id,
2104
- "on_behalf_of_device": device_id,
2105
- "service_installed": service_installed,
2344
+ "request_id": request_id,
2106
2345
  }
2107
2346
 
2108
2347
 
@@ -2177,7 +2416,8 @@ class StartPortacodeServiceHandler(SyncHandler):
2177
2416
  on_behalf_of_device=on_behalf_of_device,
2178
2417
  )
2179
2418
 
2180
- cmd = _su_command(user, "sudo -S portacode service install")
2419
+ cli_path = _resolve_portacode_cli_path(vmid, user)
2420
+ cmd = _su_command(user, f"sudo -S {shlex.quote(cli_path)} service install")
2181
2421
  res = _run_pct(vmid, cmd, input_text=password + "\n")
2182
2422
 
2183
2423
  if res["returncode"] != 0: