gpustack-runtime 0.1.40.post1__py3-none-any.whl → 0.1.41.post1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. gpustack_runtime/__init__.py +1 -1
  2. gpustack_runtime/__main__.py +5 -3
  3. gpustack_runtime/_version.py +2 -2
  4. gpustack_runtime/_version_appendix.py +1 -1
  5. gpustack_runtime/cmds/__init__.py +5 -3
  6. gpustack_runtime/cmds/__types__.py +1 -1
  7. gpustack_runtime/cmds/deployer.py +140 -18
  8. gpustack_runtime/cmds/detector.py +1 -1
  9. gpustack_runtime/cmds/images.py +1 -1
  10. gpustack_runtime/deployer/__init__.py +28 -2
  11. gpustack_runtime/deployer/__patches__.py +1 -1
  12. gpustack_runtime/deployer/__types__.py +2 -1
  13. gpustack_runtime/deployer/__utils__.py +2 -2
  14. gpustack_runtime/deployer/cdi/__init__.py +86 -5
  15. gpustack_runtime/deployer/cdi/__types__.py +92 -29
  16. gpustack_runtime/deployer/cdi/__utils__.py +180 -0
  17. gpustack_runtime/deployer/cdi/amd.py +146 -0
  18. gpustack_runtime/deployer/cdi/ascend.py +164 -0
  19. gpustack_runtime/deployer/cdi/hygon.py +147 -0
  20. gpustack_runtime/deployer/cdi/iluvatar.py +136 -0
  21. gpustack_runtime/deployer/cdi/metax.py +148 -0
  22. gpustack_runtime/deployer/cdi/thead.py +57 -23
  23. gpustack_runtime/deployer/docker.py +9 -8
  24. gpustack_runtime/deployer/k8s/deviceplugin/__init__.py +325 -0
  25. gpustack_runtime/deployer/k8s/deviceplugin/__types__.py +131 -0
  26. gpustack_runtime/deployer/k8s/deviceplugin/plugin.py +590 -0
  27. gpustack_runtime/deployer/k8s/types/kubelet/deviceplugin/v1beta1/__init__.py +3 -0
  28. gpustack_runtime/deployer/k8s/types/kubelet/deviceplugin/v1beta1/api.proto +212 -0
  29. gpustack_runtime/deployer/k8s/types/kubelet/deviceplugin/v1beta1/api_pb2.py +86 -0
  30. gpustack_runtime/deployer/k8s/types/kubelet/deviceplugin/v1beta1/api_pb2.pyi +168 -0
  31. gpustack_runtime/deployer/k8s/types/kubelet/deviceplugin/v1beta1/api_pb2_grpc.py +358 -0
  32. gpustack_runtime/deployer/k8s/types/kubelet/deviceplugin/v1beta1/constants.py +34 -0
  33. gpustack_runtime/deployer/kuberentes.py +50 -4
  34. gpustack_runtime/deployer/podman.py +9 -8
  35. gpustack_runtime/detector/__init__.py +42 -5
  36. gpustack_runtime/detector/__types__.py +8 -24
  37. gpustack_runtime/detector/__utils__.py +46 -39
  38. gpustack_runtime/detector/amd.py +55 -66
  39. gpustack_runtime/detector/ascend.py +29 -41
  40. gpustack_runtime/detector/cambricon.py +3 -3
  41. gpustack_runtime/detector/hygon.py +21 -49
  42. gpustack_runtime/detector/iluvatar.py +44 -60
  43. gpustack_runtime/detector/metax.py +54 -37
  44. gpustack_runtime/detector/mthreads.py +74 -36
  45. gpustack_runtime/detector/nvidia.py +130 -93
  46. gpustack_runtime/detector/pyacl/__init__.py +1 -1
  47. gpustack_runtime/detector/pyamdgpu/__init__.py +1 -1
  48. gpustack_runtime/detector/pyamdsmi/__init__.py +1 -1
  49. gpustack_runtime/detector/pycuda/__init__.py +1 -1
  50. gpustack_runtime/detector/pydcmi/__init__.py +1 -1
  51. gpustack_runtime/detector/pyhsa/__init__.py +1 -1
  52. gpustack_runtime/detector/pymxsml/__init__.py +1553 -1
  53. gpustack_runtime/detector/pyrocmcore/__init__.py +1 -1
  54. gpustack_runtime/detector/pyrocmsmi/__init__.py +1 -1
  55. gpustack_runtime/detector/thead.py +41 -60
  56. gpustack_runtime/envs.py +106 -12
  57. gpustack_runtime/logging.py +6 -2
  58. {gpustack_runtime-0.1.40.post1.dist-info → gpustack_runtime-0.1.41.post1.dist-info}/METADATA +6 -1
  59. gpustack_runtime-0.1.41.post1.dist-info/RECORD +67 -0
  60. gpustack_runtime/detector/pymxsml/mxsml.py +0 -1580
  61. gpustack_runtime/detector/pymxsml/mxsml_extension.py +0 -816
  62. gpustack_runtime/detector/pymxsml/mxsml_mcm.py +0 -476
  63. gpustack_runtime-0.1.40.post1.dist-info/RECORD +0 -55
  64. {gpustack_runtime-0.1.40.post1.dist-info → gpustack_runtime-0.1.41.post1.dist-info}/WHEEL +0 -0
  65. {gpustack_runtime-0.1.40.post1.dist-info → gpustack_runtime-0.1.41.post1.dist-info}/entry_points.txt +0 -0
  66. {gpustack_runtime-0.1.40.post1.dist-info → gpustack_runtime-0.1.41.post1.dist-info}/licenses/LICENSE +0 -0
@@ -1,4 +1,4 @@
1
- from __future__ import annotations
1
+ from __future__ import annotations as __future_annotations__
2
2
 
3
3
  import os
4
4
  import sys
@@ -3,7 +3,7 @@
3
3
  # https://github.com/ROCm/rocm_smi_lib/blob/rocm-6.2.4/python_smi_tools/rsmiBindings.py,
4
4
  # https://rocm.docs.amd.com/projects/rocm_smi_lib/en/latest/doxygen/html/rocm__smi_8h_source.html,
5
5
  # https://rocm.docs.amd.com/projects/rocm_smi_lib/en/latest/doxygen/html/rocm__smi_8h.html.
6
- from __future__ import annotations
6
+ from __future__ import annotations as __future_annotations__
7
7
 
8
8
  import os
9
9
  import sys
@@ -1,4 +1,4 @@
1
- from __future__ import annotations
1
+ from __future__ import annotations as __future_annotations__
2
2
 
3
3
  import contextlib
4
4
  import logging
@@ -22,7 +22,6 @@ from .__utils__ import (
22
22
  bitmask_to_str,
23
23
  byte_to_mebibyte,
24
24
  get_brief_version,
25
- get_device_files,
26
25
  get_numa_node_by_bdf,
27
26
  get_numa_nodeset_size,
28
27
  get_pci_devices,
@@ -40,7 +39,7 @@ class THeadDetector(Detector):
40
39
  """
41
40
 
42
41
  @staticmethod
43
- @lru_cache
42
+ @lru_cache(maxsize=1)
44
43
  def is_supported() -> bool:
45
44
  """
46
45
  Check if the T-Head detector is supported.
@@ -69,7 +68,7 @@ class THeadDetector(Detector):
69
68
  return supported
70
69
 
71
70
  @staticmethod
72
- @lru_cache
71
+ @lru_cache(maxsize=1)
73
72
  def detect_pci_devices() -> dict[str, PCIDevice]:
74
73
  # See https://pcisig.com/membership/member-companies?combine=Alibaba.
75
74
  pci_devs = get_pci_devices(vendor="0x1ded")
@@ -121,17 +120,23 @@ class THeadDetector(Detector):
121
120
  )
122
121
 
123
122
  dev_count = pyhgml.hgmlDeviceGetCount()
124
- dev_files = None
125
123
  for dev_idx in range(dev_count):
126
124
  dev = pyhgml.hgmlDeviceGetHandleByIndex(dev_idx)
127
125
 
128
126
  dev_cc_t = pyhgml.hgmlDeviceGetHggcComputeCapability(dev)
129
127
  dev_cc = ".".join(map(str, dev_cc_t))
130
128
 
131
- dev_bdf = None
132
- with contextlib.suppress(pyhgml.HGMLError):
133
- dev_pci_info = pyhgml.hgmlDeviceGetPciInfo(dev)
134
- dev_bdf = str(dev_pci_info.busIdLegacy).lower()
129
+ dev_pci_info = pyhgml.hgmlDeviceGetPciInfo(dev)
130
+ dev_bdf = str(dev_pci_info.busIdLegacy).lower()
131
+
132
+ dev_numa = get_numa_node_by_bdf(dev_bdf)
133
+ if not dev_numa:
134
+ dev_node_affinity = pyhgml.hgmlDeviceGetMemoryAffinity(
135
+ dev,
136
+ get_numa_nodeset_size(),
137
+ pyhgml.HGML_AFFINITY_SCOPE_NODE,
138
+ )
139
+ dev_numa = bitmask_to_str(list(dev_node_affinity))
135
140
 
136
141
  dev_mig_mode = pyhgml.HGML_DEVICE_MIG_DISABLE
137
142
  with contextlib.suppress(pyhgml.HGMLError):
@@ -142,14 +147,7 @@ class THeadDetector(Detector):
142
147
  if dev_mig_mode == pyhgml.HGML_DEVICE_MIG_DISABLE:
143
148
  dev_index = dev_idx
144
149
  if envs.GPUSTACK_RUNTIME_DETECT_PHYSICAL_INDEX_PRIORITY:
145
- if dev_files is None:
146
- dev_files = get_device_files(
147
- pattern=r"alixpu_ppu(?P<number>\d+)",
148
- )
149
- if len(dev_files) >= dev_count:
150
- dev_file = dev_files[dev_idx]
151
- if dev_file.number is not None:
152
- dev_index = dev_file.number
150
+ dev_index = pyhgml.hgmlDeviceGetMinorNumber(dev)
153
151
 
154
152
  dev_name = pyhgml.hgmlDeviceGetName(dev)
155
153
 
@@ -204,12 +202,9 @@ class THeadDetector(Detector):
204
202
 
205
203
  dev_appendix = {
206
204
  "vgpu": dev_is_vgpu,
205
+ "bdf": dev_bdf,
206
+ "numa": dev_numa,
207
207
  }
208
- if dev_bdf:
209
- dev_appendix["bdf"] = dev_bdf
210
-
211
- if dev_links_state := _get_links_state(dev):
212
- dev_appendix.update(dev_links_state)
213
208
 
214
209
  ret.append(
215
210
  Device(
@@ -273,9 +268,9 @@ class THeadDetector(Detector):
273
268
 
274
269
  mdev_appendix = {
275
270
  "vgpu": True,
271
+ "bdf": dev_bdf,
272
+ "numa": dev_numa,
276
273
  }
277
- if dev_bdf:
278
- mdev_appendix["bdf"] = dev_bdf
279
274
 
280
275
  mdev_gi_id = pyhgml.hgmlDeviceGetGpuInstanceId(mdev)
281
276
  mdev_appendix["gpu_instance_id"] = mdev_gi_id
@@ -403,36 +398,24 @@ class THeadDetector(Detector):
403
398
  for i, dev_i in enumerate(devices):
404
399
  dev_i_handle = pyhgml.hgmlDeviceGetHandleByUUID(dev_i.uuid)
405
400
 
406
- # Get affinity with PCIe BDF if possible.
407
- if dev_i_bdf := dev_i.appendix.get("bdf", ""):
408
- ret.devices_numa_affinities[i] = get_numa_node_by_bdf(
409
- dev_i_bdf,
410
- )
411
- ret.devices_cpu_affinities[i] = map_numa_node_to_cpu_affinity(
412
- ret.devices_numa_affinities[i],
413
- )
414
- # Otherwise, get affinity via IXML.
415
- if not ret.devices_cpu_affinities[i]:
416
- # Get NUMA affinity.
417
- try:
418
- dev_i_memset = pyhgml.hgmlDeviceGetMemoryAffinity(
419
- dev_i_handle,
420
- get_numa_nodeset_size(),
421
- pyhgml.HGML_AFFINITY_SCOPE_NODE,
422
- )
423
- ret.devices_numa_affinities[i] = bitmask_to_str(
424
- list(dev_i_memset),
425
- )
426
- except pyhgml.HGMLError:
427
- debug_log_exception(
428
- logger,
429
- "Failed to get NUMA affinity for device %d",
430
- dev_i.index,
431
- )
432
- # Get CPU affinity.
433
- ret.devices_cpu_affinities[i] = map_numa_node_to_cpu_affinity(
434
- ret.devices_numa_affinities[i],
435
- )
401
+ # Get NUMA and CPU affinities.
402
+ ret.devices_numa_affinities[i] = dev_i.appendix.get("numa", "")
403
+ ret.devices_cpu_affinities[i] = map_numa_node_to_cpu_affinity(
404
+ ret.devices_numa_affinities[i],
405
+ )
406
+
407
+ # Get links state if applicable.
408
+ if dev_i_links_state := _get_links_state(dev_i_handle):
409
+ ret.appendices[i].update(dev_i_links_state)
410
+ # In practice, if a card has an active *Link,
411
+ # then other cards in the same machine should be interconnected with it through the *Link.
412
+ if dev_i_links_state.get("links_active_count", 0) > 0:
413
+ for j, dev_j in enumerate(devices):
414
+ if dev_i.index == dev_j.index:
415
+ continue
416
+ ret.devices_distances[i][j] = TopologyDistanceEnum.LINK
417
+ ret.devices_distances[j][i] = TopologyDistanceEnum.LINK
418
+ continue
436
419
 
437
420
  # Get distances to other devices.
438
421
  for j, dev_j in enumerate(devices):
@@ -447,8 +430,6 @@ class THeadDetector(Detector):
447
430
  dev_i_handle,
448
431
  dev_j_handle,
449
432
  )
450
- if dev_i.appendix.get("links_state", 0) > 0:
451
- distance = TopologyDistanceEnum.LINK
452
433
  except pyhgml.HGMLError:
453
434
  debug_log_exception(
454
435
  logger,
@@ -459,9 +440,6 @@ class THeadDetector(Detector):
459
440
 
460
441
  ret.devices_distances[i][j] = distance
461
442
  ret.devices_distances[j][i] = distance
462
- except pyhgml.HGMLError:
463
- debug_log_exception(logger, "Failed to fetch topology")
464
- raise
465
443
  except Exception:
466
444
  debug_log_exception(logger, "Failed to process topology fetching")
467
445
  raise
@@ -621,17 +599,20 @@ def _get_links_state(
621
599
  return None
622
600
 
623
601
  dev_links_state = 0
602
+ dev_links_active_count = 0
624
603
  try:
625
604
  for link_idx in range(int(dev_links_count)):
626
605
  dev_link_state = pyhgml.hgmlDeviceGetIcnLinkState(dev, link_idx)
627
606
  if dev_link_state:
628
- dev_links_state |= 1 << (link_idx + 1)
607
+ dev_links_state |= 1 << link_idx
608
+ dev_links_active_count += 1
629
609
  except pyhgml.HGMLError:
630
610
  debug_log_warning(logger, "Failed to get ICNLink link state")
631
611
 
632
612
  return {
633
613
  "links_count": dev_links_count,
634
614
  "links_state": dev_links_state,
615
+ "links_active_count": dev_links_active_count,
635
616
  }
636
617
 
637
618
 
gpustack_runtime/envs.py CHANGED
@@ -1,4 +1,4 @@
1
- from __future__ import annotations
1
+ from __future__ import annotations as __future_annotations__
2
2
 
3
3
  import contextlib
4
4
  import os
@@ -147,11 +147,6 @@ if TYPE_CHECKING:
147
147
  """
148
148
  Label prefix for the deployer.
149
149
  """
150
- GPUSTACK_RUNTIME_DEPLOY_CDI_SPECS_GENERATE: bool = True
151
- """
152
- During deployment, enable automatic generation of Container Device Interface (CDI) specifications
153
- for detected devices.
154
- """
155
150
  GPUSTACK_RUNTIME_DEPLOY_CDI_SPECS_DIRECTORY: Path | None = None
156
151
  """
157
152
  During deployment, path of directory containing Container Device Interface (CDI) specifications,
@@ -248,6 +243,15 @@ if TYPE_CHECKING:
248
243
  `Env`: Injects resources using standard environment variable, based on `GPUSTACK_RUNTIME_DEPLOY_RESOURCE_KEY_MAP_RUNTIME_VISIBLE_DEVICES`.
249
244
  `CDI`: Injects resources using CDI, based on `GPUSTACK_RUNTIME_DEPLOY_RESOURCE_KEY_MAP_CDI`.
250
245
  """
246
+ GPUSTACK_RUNTIME_DOCKER_CDI_SPECS_GENERATE: bool = True
247
+ """
248
+ Generate CDI specifications during deployment when using CDI resource injection policy,
249
+ requires `GPUSTACK_RUNTIME_DEPLOY_CDI_SPECS_DIRECTORY` to be existed.
250
+ Works only when `GPUSTACK_RUNTIME_DOCKER_RESOURCE_INJECTION_POLICY` is set to `CDI`.
251
+ Using internal knowledge to generate the CDI specifications for deployer,
252
+ if the output file conflicts with other tools generating CDI specifications(e.g., NVIDIA Container Toolkit),
253
+ please disable this and remove the output file manually.
254
+ """
251
255
  ## Kubernetes
252
256
  GPUSTACK_RUNTIME_KUBERNETES_NODE_NAME: str | None = None
253
257
  """
@@ -274,6 +278,35 @@ if TYPE_CHECKING:
274
278
  """
275
279
  Deletion propagation policy for Kubernetes resources (e.g., Foreground, Background, Orphan).
276
280
  """
281
+ GPUSTACK_RUNTIME_KUBERNETES_RESOURCE_INJECTION_POLICY: str | None = None
282
+ """
283
+ Resource injection policy for the Kubernetes deployer (e.g., Auto, Env, KDP).
284
+ `Auto`: Automatically choose the resource injection policy based on the environment.
285
+ `Env`: Injects resources using standard environment variable, depends on underlying Container Toolkit, based on `GPUSTACK_RUNTIME_DEPLOY_RESOURCE_KEY_MAP_RUNTIME_VISIBLE_DEVICES`.
286
+ `KDP`: Injects resources using Kubernetes Device Plugin, based on `GPUSTACK_RUNTIME_DEPLOY_RESOURCE_KEY_MAP_CDI`.
287
+ """
288
+ GPUSTACK_RUNTIME_KUBERNETES_KDP_PER_DEVICE_MAX_ALLOCATIONS: int | None = None
289
+ """
290
+ Maximum allocations for one device in Kubernetes Device Plugin.
291
+ If not set, it should be 10.
292
+ """
293
+ GPUSTACK_RUNTIME_KUBERNETES_KDP_DEVICE_ALLOCATION_POLICY: str | None = None
294
+ """
295
+ Device allocation policy for the Kubernetes Device Plugin (e.g., CDI, Env, Opaque).
296
+ `Auto`: Automatically choose the device allocation policy based on the environment.
297
+ `Env`: Allocates devices using runtime-visible environment variables; requires Container Toolkit support.
298
+ `CDI`: Allocates devices using generated CDI specifications, making it easy to debug and troubleshoot; requires `GPUSTACK_RUNTIME_DEPLOY_CDI_SPECS_DIRECTORY` to exist.
299
+ `Opaque`: Uses internal logic for allocation, which is convenient for deployment but difficult to troubleshoot.
300
+ """
301
+ GPUSTACK_RUNTIME_KUBERNETES_KDP_CDI_SPECS_GENERATE: bool = True
302
+ """
303
+ Generate CDI specifications during deployment,
304
+ requires `GPUSTACK_RUNTIME_DEPLOY_CDI_SPECS_DIRECTORY` to be existed.
305
+ Works only when `GPUSTACK_RUNTIME_KUBERNETES_KDP_DEVICE_ALLOCATION_POLICY` is set to `CDI`.
306
+ Using internal knowledge to generate the CDI specifications for deployer,
307
+ if the output file conflicts with other tools generating CDI specifications(e.g., NVIDIA Container Toolkit),
308
+ please disable this and remove the output file manually.
309
+ """
277
310
  ## Podman
278
311
  GPUSTACK_RUNTIME_PODMAN_HOST: str | None = None
279
312
  """
@@ -308,6 +341,15 @@ if TYPE_CHECKING:
308
341
  Mute the original healthcheck of the container in Podman.
309
342
  Default is same as `GPUSTACK_RUNTIME_DOCKER_MUTE_ORIGINAL_HEALTHCHECK`.
310
343
  """
344
+ GPUSTACK_RUNTIME_PODMAN_CDI_SPECS_GENERATE: bool = True
345
+ """
346
+ Generate CDI specifications during deployment,
347
+ requires `GPUSTACK_RUNTIME_DEPLOY_CDI_SPECS_DIRECTORY` to be existed.
348
+ Using internal knowledge to generate the CDI specifications for deployer,
349
+ if the output file conflicts with other tools generating CDI specifications(e.g., NVIDIA Container Toolkit),
350
+ please disable this and remove the output file manually.
351
+ Default is same as `GPUSTACK_RUNTIME_DOCKER_CDI_SPECS_GENERATE`.
352
+ """
311
353
 
312
354
  # --8<-- [start:env-vars-definition]
313
355
 
@@ -479,12 +521,6 @@ variables: dict[str, Callable[[], Any]] = {
479
521
  "GPUSTACK_RUNTIME_DEPLOY_LABEL_PREFIX",
480
522
  "runtime.gpustack.ai",
481
523
  ),
482
- "GPUSTACK_RUNTIME_DEPLOY_CDI_SPECS_GENERATE": lambda: to_bool(
483
- getenv(
484
- "GPUSTACK_RUNTIME_DEPLOY_CDI_SPECS_GENERATE",
485
- "1",
486
- ),
487
- ),
488
524
  "GPUSTACK_RUNTIME_DEPLOY_CDI_SPECS_DIRECTORY": lambda: mkdir_path(
489
525
  getenv(
490
526
  "GPUSTACK_RUNTIME_DEPLOY_CDI_SPECS_DIRECTORY",
@@ -608,6 +644,13 @@ variables: dict[str, Callable[[], Any]] = {
608
644
  options=["Env", "CDI"],
609
645
  default="Env",
610
646
  ),
647
+ "GPUSTACK_RUNTIME_DOCKER_CDI_SPECS_GENERATE": lambda: ternary(
648
+ lambda: (
649
+ getenv("GPUSTACK_RUNTIME_DOCKER_RESOURCE_INJECTION_POLICY", "Env") == "Env"
650
+ ),
651
+ lambda: False,
652
+ lambda: to_bool(getenv("GPUSTACK_RUNTIME_DOCKER_CDI_SPECS_GENERATE", "1")),
653
+ ),
611
654
  ## Kubernetes
612
655
  "GPUSTACK_RUNTIME_KUBERNETES_NODE_NAME": lambda: getenv(
613
656
  "GPUSTACK_RUNTIME_KUBERNETES_NODE_NAME",
@@ -640,6 +683,46 @@ variables: dict[str, Callable[[], Any]] = {
640
683
  options=["Foreground", "Background", "Orphan"],
641
684
  default="Foreground",
642
685
  ),
686
+ "GPUSTACK_RUNTIME_KUBERNETES_RESOURCE_INJECTION_POLICY": lambda: choice(
687
+ getenv(
688
+ "GPUSTACK_RUNTIME_KUBERNETES_RESOURCE_INJECTION_POLICY",
689
+ ),
690
+ options=["Auto", "Env", "KDP"],
691
+ default="Auto",
692
+ ),
693
+ "GPUSTACK_RUNTIME_KUBERNETES_KDP_PER_DEVICE_MAX_ALLOCATIONS": lambda: to_int(
694
+ getenv(
695
+ "GPUSTACK_RUNTIME_KUBERNETES_KDP_PER_DEVICE_MAX_ALLOCATIONS",
696
+ "10",
697
+ ),
698
+ ),
699
+ "GPUSTACK_RUNTIME_KUBERNETES_KDP_DEVICE_ALLOCATION_POLICY": lambda: choice(
700
+ getenv(
701
+ "GPUSTACK_RUNTIME_KUBERNETES_KDP_DEVICE_ALLOCATION_POLICY",
702
+ ),
703
+ options=["Auto", "Env", "CDI", "Opaque"],
704
+ default="Auto",
705
+ ),
706
+ "GPUSTACK_RUNTIME_KUBERNETES_KDP_CDI_SPECS_GENERATE": lambda: ternary(
707
+ lambda: (
708
+ getenv("GPUSTACK_RUNTIME_KUBERNETES_RESOURCE_INJECTION_POLICY", "Auto")
709
+ == "Env"
710
+ ),
711
+ lambda: False,
712
+ lambda: ternary(
713
+ lambda: (
714
+ getenv(
715
+ "GPUSTACK_RUNTIME_KUBERNETES_KDP_DEVICE_ALLOCATION_POLICY",
716
+ "Auto",
717
+ )
718
+ not in ["Auto", "CDI"]
719
+ ),
720
+ lambda: False,
721
+ lambda: to_bool(
722
+ getenv("GPUSTACK_RUNTIME_KUBERNETES_KDP_CDI_SPECS_GENERATE", "1"),
723
+ ),
724
+ ),
725
+ ),
643
726
  ## Podman
644
727
  "GPUSTACK_RUNTIME_PODMAN_HOST": lambda: trim_str(
645
728
  getenvs(
@@ -697,8 +780,19 @@ variables: dict[str, Callable[[], Any]] = {
697
780
  "1",
698
781
  ),
699
782
  ),
783
+ "GPUSTACK_RUNTIME_PODMAN_CDI_SPECS_GENERATE": lambda: to_bool(
784
+ getenvs(
785
+ [
786
+ "GPUSTACK_RUNTIME_PODMAN_CDI_SPECS_GENERATE",
787
+ # Fallback to Docker's setting.
788
+ "GPUSTACK_RUNTIME_DOCKER_CDI_SPECS_GENERATE",
789
+ ],
790
+ "1",
791
+ ),
792
+ ),
700
793
  }
701
794
 
795
+
702
796
  # --8<-- [end:env-vars-definition]
703
797
 
704
798
 
@@ -1,5 +1,5 @@
1
1
  # Borrowed from https://github.com/libp2p/py-libp2p/blob/main/libp2p/utils/logging.py.
2
- from __future__ import annotations
2
+ from __future__ import annotations as __future_annotations__
3
3
 
4
4
  import atexit
5
5
  import logging
@@ -133,7 +133,11 @@ def setup_logging():
133
133
  module_logger.propagate = False
134
134
 
135
135
  # Configure 3rd-party loggers, set slightly higher level than package level
136
- for _3rd in ["docker", "kubernetes"]:
136
+ for _3rd in [
137
+ "docker",
138
+ "kubernetes",
139
+ "grpc",
140
+ ]:
137
141
  _3rd_logger = logging.getLogger(_3rd)
138
142
  _3rd_logger.handlers.clear()
139
143
  _3rd_logger.addHandler(queue_handler)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: gpustack-runtime
3
- Version: 0.1.40.post1
3
+ Version: 0.1.41.post1
4
4
  Summary: GPUStack Runtime is library for detecting GPU resources and launching GPU workloads.
5
5
  Project-URL: Homepage, https://github.com/gpustack/runtime
6
6
  Project-URL: Bug Tracker, https://github.com/gpustack/gpustack/issues
@@ -14,14 +14,19 @@ Classifier: Programming Language :: Python :: 3.12
14
14
  Classifier: Programming Language :: Python :: 3.13
15
15
  Requires-Python: >=3.10
16
16
  Requires-Dist: argcomplete>=3.6.3
17
+ Requires-Dist: cachetools>=5.5.2
17
18
  Requires-Dist: docker>=7.1.0
18
19
  Requires-Dist: gpustack-runner>=0.1.24.post1
20
+ Requires-Dist: grpc-interceptor>=0.15.4
21
+ Requires-Dist: grpcio>=1.76.0
19
22
  Requires-Dist: kubernetes>=33.1.0
20
23
  Requires-Dist: mthreads-ml-py>=2.2.10
21
24
  Requires-Dist: nvidia-ml-py>=13.580.65
22
25
  Requires-Dist: podman==5.6.0
26
+ Requires-Dist: protobuf>=5.28.3
23
27
  Requires-Dist: pyyaml
24
28
  Requires-Dist: tqdm
29
+ Requires-Dist: types-protobuf>=6.32.1.20251210
25
30
  Description-Content-Type: text/markdown
26
31
 
27
32
  # GPUStack Runtime
@@ -0,0 +1,67 @@
1
+ gpustack_runtime/__init__.py,sha256=Xw_PVWneitx-8QmW6sJQeymj6zVbEgEndGhIB_km6TI,186
2
+ gpustack_runtime/__main__.py,sha256=O9yJKcN7vg0Ppgc13qesxHwST2wkH3ccOkTQXPWHnNA,3939
3
+ gpustack_runtime/_version.py,sha256=q92nZxyij31xSCTlm3-sXqxxPfJBIy3lPhgOdmDvsNE,792
4
+ gpustack_runtime/_version.pyi,sha256=A42NoSgcqEXVy2OeNm4LXC9CbyonbooYrSUBlPm2lGY,156
5
+ gpustack_runtime/envs.py,sha256=Q8vK42OpkY4T72zN6pOz_eCS_hnQElhAmxZ1wdks0xQ,38794
6
+ gpustack_runtime/logging.py,sha256=wMPriPpOuVsuClsjMh0qwEPQKyJiJa89ggdDjqkk7i0,6934
7
+ gpustack_runtime/cmds/__init__.py,sha256=-_X2O2lBn6KcdLGUzhL3lEjQC4_cwA36fvWDnFAgtVM,1382
8
+ gpustack_runtime/cmds/__types__.py,sha256=TBnUWUqzTkDtJnsMv363kdw-H8fOf-XQYbOvrmQif-M,815
9
+ gpustack_runtime/cmds/deployer.py,sha256=KvhPhU6ZW-UV6vLykI5adKI1ThgVFFJqWaII3n4OhL8,32846
10
+ gpustack_runtime/cmds/detector.py,sha256=AALcoqCiNuwYucKBnyj7r5ScOWc_BSzAhHR2C0QbEHE,8750
11
+ gpustack_runtime/cmds/images.py,sha256=7tb-D3G4yqLPkbS9aSuWI1bD3DYK8BLbPbgqac56blI,594
12
+ gpustack_runtime/deployer/__init__.py,sha256=impMrmvkMjuCBthsn3QUz3LuwpmmNAymHJKJ2o6SZoc,16249
13
+ gpustack_runtime/deployer/__patches__.py,sha256=cTBge8BT6IsY5MzETKY3kN28k3igYfNj7pcpgDzfDzw,17849
14
+ gpustack_runtime/deployer/__types__.py,sha256=PgIWogHOvHKsHoeBjmKFEEM3JrKck89Mmnwlfx01BbE,72248
15
+ gpustack_runtime/deployer/__utils__.py,sha256=paQu2M1UeoSfQPsiskmAqJSiln-8qwibTssEoWFMLec,21109
16
+ gpustack_runtime/deployer/docker.py,sha256=bOaXbTnaalbO42FlyWR1Ha26Y30LGWPzWKPV5Q-Nk7g,85039
17
+ gpustack_runtime/deployer/kuberentes.py,sha256=VkaAvuQJ5rRiNVD6OfM2pE3rmyT_a6oEvp-G8gW8Ojo,89816
18
+ gpustack_runtime/deployer/podman.py,sha256=_qdbsTezacRmiXa3n04OUPUsgVy1pSFgJSKxous4s14,82156
19
+ gpustack_runtime/deployer/cdi/__init__.py,sha256=2wHrxkud3GJokE3ytNc3jvjddemXkNuuz_oIKzxD3-I,4000
20
+ gpustack_runtime/deployer/cdi/__types__.py,sha256=04DKvcogk7OoHS7TU2Bmht3VVMu7iOEBWTEOvxpHt4w,18399
21
+ gpustack_runtime/deployer/cdi/__utils__.py,sha256=mvdOqkbhaSkphl0K-VpNwtFviAkttS9UrmKEA285kRw,3908
22
+ gpustack_runtime/deployer/cdi/amd.py,sha256=-eq_SOlC56VX2QscZXvnoeffWSRindhr8zFZmaIcKrE,4082
23
+ gpustack_runtime/deployer/cdi/ascend.py,sha256=lDs75a9--c0lM34xfJqu-_QbfWNFrf4zE-GXPKReBe4,4538
24
+ gpustack_runtime/deployer/cdi/hygon.py,sha256=h6-vQfv03sgxYjMJAf_JOMq9cHFPaNjK1YbUYIiSXck,4117
25
+ gpustack_runtime/deployer/cdi/iluvatar.py,sha256=6nNECZpU5IPP6-5l-O1rzU-ib-WcuwKvDg7ZV__1NE4,3650
26
+ gpustack_runtime/deployer/cdi/metax.py,sha256=tmJBvr-n9pERAp-dXsa54qv6xmxt0rJoJwY36TFdoWk,4143
27
+ gpustack_runtime/deployer/cdi/thead.py,sha256=SvIDKNYZx7FwMPTTxyJ2RRjlr9LXLN8BUYCUhidmiQk,3671
28
+ gpustack_runtime/deployer/k8s/deviceplugin/__init__.py,sha256=Rid_NUW5a0dAS0fTyi0k_GzSjNVaYqpG_MwiALaYvgk,10397
29
+ gpustack_runtime/deployer/k8s/deviceplugin/__types__.py,sha256=LCkgPDZ64Mra7bo5jmtsAO2Ypbc4qK99lMl6R_nQhnY,3043
30
+ gpustack_runtime/deployer/k8s/deviceplugin/plugin.py,sha256=Lfal3D1wfhTRyjkLMsTUaFNzul1-nfnY3kkpqIMB0b8,18140
31
+ gpustack_runtime/deployer/k8s/types/kubelet/deviceplugin/v1beta1/__init__.py,sha256=3rOYmgDIIJ4idEtwgnumGStH7PaK-J7EYrOnLa9A-8o,118
32
+ gpustack_runtime/deployer/k8s/types/kubelet/deviceplugin/v1beta1/api.proto,sha256=rmB8RDe4LN5FCVkQ608uS-pl32mk5tt6iGe-g2lKtPs,7919
33
+ gpustack_runtime/deployer/k8s/types/kubelet/deviceplugin/v1beta1/api_pb2.py,sha256=DzmeyfwekwE0U0dK6WfwX1NQYLEr2mFa1hdaUTxt2f8,8404
34
+ gpustack_runtime/deployer/k8s/types/kubelet/deviceplugin/v1beta1/api_pb2.pyi,sha256=lq1dbSgBYqJ7zyGfoKKHCyfr6R5vcCGzJxteeyQpbuI,8232
35
+ gpustack_runtime/deployer/k8s/types/kubelet/deviceplugin/v1beta1/api_pb2_grpc.py,sha256=GM6EyCEFeyOjL0XOCisbcHurRoLKqKDUI5obsUyTxpE,17446
36
+ gpustack_runtime/deployer/k8s/types/kubelet/deviceplugin/v1beta1/constants.py,sha256=tpNk3e_cvY67C9RwVsdTNl75YuNXBgsn53fSJIzeTR4,828
37
+ gpustack_runtime/detector/__init__.py,sha256=9i6KOd3Qp_BmnSyPURlOBHlHJMSInqlDTh1kpAbs4_U,8104
38
+ gpustack_runtime/detector/__types__.py,sha256=tiYbxPD6gV5wS79K3d2dUzy6btJl4QcsgunyxtJ240E,13162
39
+ gpustack_runtime/detector/__utils__.py,sha256=QdLWXwsU1_EMxXG5Y29psqnttWJyXWMphHDjiC_6Byc,25153
40
+ gpustack_runtime/detector/amd.py,sha256=qh86xGhPJRIXwiKaHmeyIrsxchUDRpyggR6yc0cLuKw,17553
41
+ gpustack_runtime/detector/ascend.py,sha256=E6YPoREI5r2HZIegUaQb0XwC3Qau1mnkNeCRbgtlE5k,17992
42
+ gpustack_runtime/detector/cambricon.py,sha256=GzXlS4et8zape0rr19d1EwSV9cfFEmHgaElTVMjR3IY,3947
43
+ gpustack_runtime/detector/hygon.py,sha256=R4I8h66YHJC00iAtDJhBX772VMKUdZ8nleRXZFPUt3Q,12299
44
+ gpustack_runtime/detector/iluvatar.py,sha256=bqciqjYE_tIxBg2PSOlIzH3WcFYBgTDjfN6nT90LSGg,10206
45
+ gpustack_runtime/detector/metax.py,sha256=W4NSZD7Kf5431B63UBpYnKIk2Jz1SutEpwXkwgYfmfE,10374
46
+ gpustack_runtime/detector/mthreads.py,sha256=GcElUIMvU5C6P4Fx_X_kSOsJps8WZ47tkZ2B2MJZmk4,12131
47
+ gpustack_runtime/detector/nvidia.py,sha256=xdvoMdNx943on5fd_mI3WI_85kMY-0dYm5NU3vqXb9M,33400
48
+ gpustack_runtime/detector/thead.py,sha256=43TGPq78FulpYBUeEMVxDTY-0X3ve2FsX4Hsd0Lswy0,25561
49
+ gpustack_runtime/detector/pyacl/__init__.py,sha256=UQjaBxP7nJNyzr08N8_lH-5wPtnFmUY9pyQhs6vIChU,16232
50
+ gpustack_runtime/detector/pyamdgpu/__init__.py,sha256=x-UO07EpKEgfTLmXQOD6j9f6kibuvDC7riQFof3YGdw,8617
51
+ gpustack_runtime/detector/pyamdsmi/__init__.py,sha256=800-khq2w6HLgXM12RkhcdvXBGeAJ4s1_TWJyHebCMk,955
52
+ gpustack_runtime/detector/pycuda/__init__.py,sha256=p-Na5eBjdsFGXPHCEEUfFj5TQFjPxPQGV0i4oU6ywBA,16922
53
+ gpustack_runtime/detector/pydcmi/__init__.py,sha256=zCeImelWgbPDn51dJgp3LlCK-uGOuHmmPGiOsyYB3cI,37264
54
+ gpustack_runtime/detector/pyhgml/__init__.py,sha256=Yp9s-QhHS4ck7Iq9kd4v6a4BruyJQcuOTYlyYSmnOgI,182896
55
+ gpustack_runtime/detector/pyhgml/libhgml.so,sha256=BPzGVBpzrMX1tSvbXddq8Q0Qhi8w-No2JXX8sRxTioI,2101640
56
+ gpustack_runtime/detector/pyhgml/libuki.so,sha256=EE6v1vIYYT4FSDMMm9rSfAqwrwIPFD-4_6KtP51lSps,702352
57
+ gpustack_runtime/detector/pyhsa/__init__.py,sha256=4DuGnBBMUVOCPa6vTx3XT5mffGrKk6M6CYbUWBoMTJ0,15792
58
+ gpustack_runtime/detector/pyixml/__init__.py,sha256=6ss_Dyl8lIT4WrKpfwmQqzBmg4Bxi38vg_eey_wsSY0,162681
59
+ gpustack_runtime/detector/pymxsml/__init__.py,sha256=YxfNHq7TWd7CpNroP45BGXhcWNpY_sXgVzNGtx68DII,45409
60
+ gpustack_runtime/detector/pyrocmcore/__init__.py,sha256=rgwIdPS-7GG7_5luRMR1XG9QyNM3lJh5ryD7kfZqpWg,2523
61
+ gpustack_runtime/detector/pyrocmsmi/__init__.py,sha256=ACwRtJWVIuJ4NTcBJxk0zrVb_qtDOMkApMdbJoag5g0,11906
62
+ gpustack_runtime/_version_appendix.py,sha256=7IjE8ryt0nhK-ZgkY6TRxoRV0C1ndSI9Pc9LZlSVWd8,23
63
+ gpustack_runtime-0.1.41.post1.dist-info/METADATA,sha256=G5W5svQb4rw_3pQf6A3eqbhF-TsFpFH7PdgF5nG0qfQ,2364
64
+ gpustack_runtime-0.1.41.post1.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
65
+ gpustack_runtime-0.1.41.post1.dist-info/entry_points.txt,sha256=bBO_61GxP6dIT74uZwbSDgW5Vt2pTePUS3CgjUJkUgg,68
66
+ gpustack_runtime-0.1.41.post1.dist-info/licenses/LICENSE,sha256=OiPibowBvB-NHV3TP_NOj18XNBlXcshXZFMpa3uvKVE,10362
67
+ gpustack_runtime-0.1.41.post1.dist-info/RECORD,,