gpustack-runtime 0.1.40.post1__py3-none-any.whl → 0.1.41__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. gpustack_runtime/__init__.py +1 -1
  2. gpustack_runtime/__main__.py +5 -3
  3. gpustack_runtime/_version.py +2 -2
  4. gpustack_runtime/_version_appendix.py +1 -1
  5. gpustack_runtime/cmds/__init__.py +5 -3
  6. gpustack_runtime/cmds/__types__.py +1 -1
  7. gpustack_runtime/cmds/deployer.py +140 -18
  8. gpustack_runtime/cmds/detector.py +1 -1
  9. gpustack_runtime/cmds/images.py +1 -1
  10. gpustack_runtime/deployer/__init__.py +28 -2
  11. gpustack_runtime/deployer/__patches__.py +1 -1
  12. gpustack_runtime/deployer/__types__.py +2 -1
  13. gpustack_runtime/deployer/__utils__.py +2 -2
  14. gpustack_runtime/deployer/cdi/__init__.py +85 -5
  15. gpustack_runtime/deployer/cdi/__types__.py +92 -29
  16. gpustack_runtime/deployer/cdi/__utils__.py +178 -0
  17. gpustack_runtime/deployer/cdi/amd.py +146 -0
  18. gpustack_runtime/deployer/cdi/ascend.py +164 -0
  19. gpustack_runtime/deployer/cdi/hygon.py +147 -0
  20. gpustack_runtime/deployer/cdi/iluvatar.py +136 -0
  21. gpustack_runtime/deployer/cdi/metax.py +148 -0
  22. gpustack_runtime/deployer/cdi/thead.py +57 -23
  23. gpustack_runtime/deployer/docker.py +9 -8
  24. gpustack_runtime/deployer/k8s/deviceplugin/__init__.py +240 -0
  25. gpustack_runtime/deployer/k8s/deviceplugin/__types__.py +131 -0
  26. gpustack_runtime/deployer/k8s/deviceplugin/plugin.py +586 -0
  27. gpustack_runtime/deployer/k8s/types/kubelet/deviceplugin/v1beta1/__init__.py +3 -0
  28. gpustack_runtime/deployer/k8s/types/kubelet/deviceplugin/v1beta1/api.proto +212 -0
  29. gpustack_runtime/deployer/k8s/types/kubelet/deviceplugin/v1beta1/api_pb2.py +86 -0
  30. gpustack_runtime/deployer/k8s/types/kubelet/deviceplugin/v1beta1/api_pb2.pyi +168 -0
  31. gpustack_runtime/deployer/k8s/types/kubelet/deviceplugin/v1beta1/api_pb2_grpc.py +358 -0
  32. gpustack_runtime/deployer/k8s/types/kubelet/deviceplugin/v1beta1/constants.py +34 -0
  33. gpustack_runtime/deployer/kuberentes.py +37 -4
  34. gpustack_runtime/deployer/podman.py +9 -8
  35. gpustack_runtime/detector/__init__.py +42 -5
  36. gpustack_runtime/detector/__types__.py +8 -24
  37. gpustack_runtime/detector/__utils__.py +46 -39
  38. gpustack_runtime/detector/amd.py +55 -66
  39. gpustack_runtime/detector/ascend.py +29 -41
  40. gpustack_runtime/detector/cambricon.py +3 -3
  41. gpustack_runtime/detector/hygon.py +21 -49
  42. gpustack_runtime/detector/iluvatar.py +44 -60
  43. gpustack_runtime/detector/metax.py +54 -37
  44. gpustack_runtime/detector/mthreads.py +74 -36
  45. gpustack_runtime/detector/nvidia.py +130 -93
  46. gpustack_runtime/detector/pyacl/__init__.py +1 -1
  47. gpustack_runtime/detector/pyamdgpu/__init__.py +1 -1
  48. gpustack_runtime/detector/pyamdsmi/__init__.py +1 -1
  49. gpustack_runtime/detector/pycuda/__init__.py +1 -1
  50. gpustack_runtime/detector/pydcmi/__init__.py +1 -1
  51. gpustack_runtime/detector/pyhsa/__init__.py +1 -1
  52. gpustack_runtime/detector/pymxsml/__init__.py +1553 -1
  53. gpustack_runtime/detector/pyrocmcore/__init__.py +1 -1
  54. gpustack_runtime/detector/pyrocmsmi/__init__.py +1 -1
  55. gpustack_runtime/detector/thead.py +41 -60
  56. gpustack_runtime/envs.py +104 -12
  57. gpustack_runtime/logging.py +6 -2
  58. {gpustack_runtime-0.1.40.post1.dist-info → gpustack_runtime-0.1.41.dist-info}/METADATA +6 -1
  59. gpustack_runtime-0.1.41.dist-info/RECORD +67 -0
  60. gpustack_runtime/detector/pymxsml/mxsml.py +0 -1580
  61. gpustack_runtime/detector/pymxsml/mxsml_extension.py +0 -816
  62. gpustack_runtime/detector/pymxsml/mxsml_mcm.py +0 -476
  63. gpustack_runtime-0.1.40.post1.dist-info/RECORD +0 -55
  64. {gpustack_runtime-0.1.40.post1.dist-info → gpustack_runtime-0.1.41.dist-info}/WHEEL +0 -0
  65. {gpustack_runtime-0.1.40.post1.dist-info → gpustack_runtime-0.1.41.dist-info}/entry_points.txt +0 -0
  66. {gpustack_runtime-0.1.40.post1.dist-info → gpustack_runtime-0.1.41.dist-info}/licenses/LICENSE +0 -0
@@ -1,4 +1,4 @@
1
- from __future__ import annotations
1
+ from __future__ import annotations as __future_annotations__
2
2
 
3
3
  import os
4
4
  import sys
@@ -3,7 +3,7 @@
3
3
  # https://github.com/ROCm/rocm_smi_lib/blob/rocm-6.2.4/python_smi_tools/rsmiBindings.py,
4
4
  # https://rocm.docs.amd.com/projects/rocm_smi_lib/en/latest/doxygen/html/rocm__smi_8h_source.html,
5
5
  # https://rocm.docs.amd.com/projects/rocm_smi_lib/en/latest/doxygen/html/rocm__smi_8h.html.
6
- from __future__ import annotations
6
+ from __future__ import annotations as __future_annotations__
7
7
 
8
8
  import os
9
9
  import sys
@@ -1,4 +1,4 @@
1
- from __future__ import annotations
1
+ from __future__ import annotations as __future_annotations__
2
2
 
3
3
  import contextlib
4
4
  import logging
@@ -22,7 +22,6 @@ from .__utils__ import (
22
22
  bitmask_to_str,
23
23
  byte_to_mebibyte,
24
24
  get_brief_version,
25
- get_device_files,
26
25
  get_numa_node_by_bdf,
27
26
  get_numa_nodeset_size,
28
27
  get_pci_devices,
@@ -40,7 +39,7 @@ class THeadDetector(Detector):
40
39
  """
41
40
 
42
41
  @staticmethod
43
- @lru_cache
42
+ @lru_cache(maxsize=1)
44
43
  def is_supported() -> bool:
45
44
  """
46
45
  Check if the T-Head detector is supported.
@@ -69,7 +68,7 @@ class THeadDetector(Detector):
69
68
  return supported
70
69
 
71
70
  @staticmethod
72
- @lru_cache
71
+ @lru_cache(maxsize=1)
73
72
  def detect_pci_devices() -> dict[str, PCIDevice]:
74
73
  # See https://pcisig.com/membership/member-companies?combine=Alibaba.
75
74
  pci_devs = get_pci_devices(vendor="0x1ded")
@@ -121,17 +120,23 @@ class THeadDetector(Detector):
121
120
  )
122
121
 
123
122
  dev_count = pyhgml.hgmlDeviceGetCount()
124
- dev_files = None
125
123
  for dev_idx in range(dev_count):
126
124
  dev = pyhgml.hgmlDeviceGetHandleByIndex(dev_idx)
127
125
 
128
126
  dev_cc_t = pyhgml.hgmlDeviceGetHggcComputeCapability(dev)
129
127
  dev_cc = ".".join(map(str, dev_cc_t))
130
128
 
131
- dev_bdf = None
132
- with contextlib.suppress(pyhgml.HGMLError):
133
- dev_pci_info = pyhgml.hgmlDeviceGetPciInfo(dev)
134
- dev_bdf = str(dev_pci_info.busIdLegacy).lower()
129
+ dev_pci_info = pyhgml.hgmlDeviceGetPciInfo(dev)
130
+ dev_bdf = str(dev_pci_info.busIdLegacy).lower()
131
+
132
+ dev_numa = get_numa_node_by_bdf(dev_bdf)
133
+ if not dev_numa:
134
+ dev_node_affinity = pyhgml.hgmlDeviceGetMemoryAffinity(
135
+ dev,
136
+ get_numa_nodeset_size(),
137
+ pyhgml.HGML_AFFINITY_SCOPE_NODE,
138
+ )
139
+ dev_numa = bitmask_to_str(list(dev_node_affinity))
135
140
 
136
141
  dev_mig_mode = pyhgml.HGML_DEVICE_MIG_DISABLE
137
142
  with contextlib.suppress(pyhgml.HGMLError):
@@ -142,14 +147,7 @@ class THeadDetector(Detector):
142
147
  if dev_mig_mode == pyhgml.HGML_DEVICE_MIG_DISABLE:
143
148
  dev_index = dev_idx
144
149
  if envs.GPUSTACK_RUNTIME_DETECT_PHYSICAL_INDEX_PRIORITY:
145
- if dev_files is None:
146
- dev_files = get_device_files(
147
- pattern=r"alixpu_ppu(?P<number>\d+)",
148
- )
149
- if len(dev_files) >= dev_count:
150
- dev_file = dev_files[dev_idx]
151
- if dev_file.number is not None:
152
- dev_index = dev_file.number
150
+ dev_index = pyhgml.hgmlDeviceGetMinorNumber(dev)
153
151
 
154
152
  dev_name = pyhgml.hgmlDeviceGetName(dev)
155
153
 
@@ -204,12 +202,9 @@ class THeadDetector(Detector):
204
202
 
205
203
  dev_appendix = {
206
204
  "vgpu": dev_is_vgpu,
205
+ "bdf": dev_bdf,
206
+ "numa": dev_numa,
207
207
  }
208
- if dev_bdf:
209
- dev_appendix["bdf"] = dev_bdf
210
-
211
- if dev_links_state := _get_links_state(dev):
212
- dev_appendix.update(dev_links_state)
213
208
 
214
209
  ret.append(
215
210
  Device(
@@ -273,9 +268,9 @@ class THeadDetector(Detector):
273
268
 
274
269
  mdev_appendix = {
275
270
  "vgpu": True,
271
+ "bdf": dev_bdf,
272
+ "numa": dev_numa,
276
273
  }
277
- if dev_bdf:
278
- mdev_appendix["bdf"] = dev_bdf
279
274
 
280
275
  mdev_gi_id = pyhgml.hgmlDeviceGetGpuInstanceId(mdev)
281
276
  mdev_appendix["gpu_instance_id"] = mdev_gi_id
@@ -403,36 +398,24 @@ class THeadDetector(Detector):
403
398
  for i, dev_i in enumerate(devices):
404
399
  dev_i_handle = pyhgml.hgmlDeviceGetHandleByUUID(dev_i.uuid)
405
400
 
406
- # Get affinity with PCIe BDF if possible.
407
- if dev_i_bdf := dev_i.appendix.get("bdf", ""):
408
- ret.devices_numa_affinities[i] = get_numa_node_by_bdf(
409
- dev_i_bdf,
410
- )
411
- ret.devices_cpu_affinities[i] = map_numa_node_to_cpu_affinity(
412
- ret.devices_numa_affinities[i],
413
- )
414
- # Otherwise, get affinity via IXML.
415
- if not ret.devices_cpu_affinities[i]:
416
- # Get NUMA affinity.
417
- try:
418
- dev_i_memset = pyhgml.hgmlDeviceGetMemoryAffinity(
419
- dev_i_handle,
420
- get_numa_nodeset_size(),
421
- pyhgml.HGML_AFFINITY_SCOPE_NODE,
422
- )
423
- ret.devices_numa_affinities[i] = bitmask_to_str(
424
- list(dev_i_memset),
425
- )
426
- except pyhgml.HGMLError:
427
- debug_log_exception(
428
- logger,
429
- "Failed to get NUMA affinity for device %d",
430
- dev_i.index,
431
- )
432
- # Get CPU affinity.
433
- ret.devices_cpu_affinities[i] = map_numa_node_to_cpu_affinity(
434
- ret.devices_numa_affinities[i],
435
- )
401
+ # Get NUMA and CPU affinities.
402
+ ret.devices_numa_affinities[i] = dev_i.appendix.get("numa", "")
403
+ ret.devices_cpu_affinities[i] = map_numa_node_to_cpu_affinity(
404
+ ret.devices_numa_affinities[i],
405
+ )
406
+
407
+ # Get links state if applicable.
408
+ if dev_i_links_state := _get_links_state(dev_i_handle):
409
+ ret.appendices[i].update(dev_i_links_state)
410
+ # In practice, if a card has an active *Link,
411
+ # then other cards in the same machine should be interconnected with it through the *Link.
412
+ if dev_i_links_state.get("links_active_count", 0) > 0:
413
+ for j, dev_j in enumerate(devices):
414
+ if dev_i.index == dev_j.index:
415
+ continue
416
+ ret.devices_distances[i][j] = TopologyDistanceEnum.LINK
417
+ ret.devices_distances[j][i] = TopologyDistanceEnum.LINK
418
+ continue
436
419
 
437
420
  # Get distances to other devices.
438
421
  for j, dev_j in enumerate(devices):
@@ -447,8 +430,6 @@ class THeadDetector(Detector):
447
430
  dev_i_handle,
448
431
  dev_j_handle,
449
432
  )
450
- if dev_i.appendix.get("links_state", 0) > 0:
451
- distance = TopologyDistanceEnum.LINK
452
433
  except pyhgml.HGMLError:
453
434
  debug_log_exception(
454
435
  logger,
@@ -459,9 +440,6 @@ class THeadDetector(Detector):
459
440
 
460
441
  ret.devices_distances[i][j] = distance
461
442
  ret.devices_distances[j][i] = distance
462
- except pyhgml.HGMLError:
463
- debug_log_exception(logger, "Failed to fetch topology")
464
- raise
465
443
  except Exception:
466
444
  debug_log_exception(logger, "Failed to process topology fetching")
467
445
  raise
@@ -621,17 +599,20 @@ def _get_links_state(
621
599
  return None
622
600
 
623
601
  dev_links_state = 0
602
+ dev_links_active_count = 0
624
603
  try:
625
604
  for link_idx in range(int(dev_links_count)):
626
605
  dev_link_state = pyhgml.hgmlDeviceGetIcnLinkState(dev, link_idx)
627
606
  if dev_link_state:
628
- dev_links_state |= 1 << (link_idx + 1)
607
+ dev_links_state |= 1 << link_idx
608
+ dev_links_active_count += 1
629
609
  except pyhgml.HGMLError:
630
610
  debug_log_warning(logger, "Failed to get ICNLink link state")
631
611
 
632
612
  return {
633
613
  "links_count": dev_links_count,
634
614
  "links_state": dev_links_state,
615
+ "links_active_count": dev_links_active_count,
635
616
  }
636
617
 
637
618
 
gpustack_runtime/envs.py CHANGED
@@ -1,4 +1,4 @@
1
- from __future__ import annotations
1
+ from __future__ import annotations as __future_annotations__
2
2
 
3
3
  import contextlib
4
4
  import os
@@ -147,11 +147,6 @@ if TYPE_CHECKING:
147
147
  """
148
148
  Label prefix for the deployer.
149
149
  """
150
- GPUSTACK_RUNTIME_DEPLOY_CDI_SPECS_GENERATE: bool = True
151
- """
152
- During deployment, enable automatic generation of Container Device Interface (CDI) specifications
153
- for detected devices.
154
- """
155
150
  GPUSTACK_RUNTIME_DEPLOY_CDI_SPECS_DIRECTORY: Path | None = None
156
151
  """
157
152
  During deployment, path of directory containing Container Device Interface (CDI) specifications,
@@ -248,6 +243,15 @@ if TYPE_CHECKING:
248
243
  `Env`: Injects resources using standard environment variable, based on `GPUSTACK_RUNTIME_DEPLOY_RESOURCE_KEY_MAP_RUNTIME_VISIBLE_DEVICES`.
249
244
  `CDI`: Injects resources using CDI, based on `GPUSTACK_RUNTIME_DEPLOY_RESOURCE_KEY_MAP_CDI`.
250
245
  """
246
+ GPUSTACK_RUNTIME_DOCKER_CDI_SPECS_GENERATE: bool = True
247
+ """
248
+ Generate CDI specifications during deployment when using CDI resource injection policy,
249
+ requires `GPUSTACK_RUNTIME_DEPLOY_CDI_SPECS_DIRECTORY` to be existed.
250
+ Works only when `GPUSTACK_RUNTIME_DOCKER_RESOURCE_INJECTION_POLICY` is set to `CDI`.
251
+ Using internal knowledge to generate the CDI specifications for deployer,
252
+ if the output file conflicts with other tools generating CDI specifications(e.g., NVIDIA Container Toolkit),
253
+ please disable this and remove the output file manually.
254
+ """
251
255
  ## Kubernetes
252
256
  GPUSTACK_RUNTIME_KUBERNETES_NODE_NAME: str | None = None
253
257
  """
@@ -274,6 +278,33 @@ if TYPE_CHECKING:
274
278
  """
275
279
  Deletion propagation policy for Kubernetes resources (e.g., Foreground, Background, Orphan).
276
280
  """
281
+ GPUSTACK_RUNTIME_KUBERNETES_RESOURCE_INJECTION_POLICY: str | None = None
282
+ """
283
+ Resource injection policy for the Kubernetes deployer (e.g., Env, KDP).
284
+ `Env`: Injects resources using standard environment variable, depends on underlying Container Toolkit, based on `GPUSTACK_RUNTIME_DEPLOY_RESOURCE_KEY_MAP_RUNTIME_VISIBLE_DEVICES`.
285
+ `KDP`: Injects resources using Kubernetes Device Plugin, based on `GPUSTACK_RUNTIME_DEPLOY_RESOURCE_KEY_MAP_CDI`.
286
+ """
287
+ GPUSTACK_RUNTIME_KUBERNETES_KDP_PER_DEVICE_MAX_ALLOCATIONS: int | None = None
288
+ """
289
+ Maximum allocations for one device in Kubernetes Device Plugin.
290
+ If not set, it should be 10.
291
+ """
292
+ GPUSTACK_RUNTIME_KUBERNETES_KDP_DEVICE_ALLOCATION_POLICY: str | None = None
293
+ """
294
+ Device allocation policy for the Kubernetes Device Plugin (e.g., CDI, Env, Opaque).
295
+ `CDI`: Allocates devices using generated CDI specifications, making it easy to debug and troubleshoot; requires `GPUSTACK_RUNTIME_DEPLOY_CDI_SPECS_DIRECTORY` to exist.
296
+ `Env`: Allocates devices using runtime-visible environment variables; requires Container Toolkit support.
297
+ `Opaque`: Uses internal logic for allocation, which is convenient for deployment but difficult to troubleshoot.
298
+ """
299
+ GPUSTACK_RUNTIME_KUBERNETES_KDP_CDI_SPECS_GENERATE: bool = True
300
+ """
301
+ Generate CDI specifications during deployment,
302
+ requires `GPUSTACK_RUNTIME_DEPLOY_CDI_SPECS_DIRECTORY` to be existed.
303
+ Works only when `GPUSTACK_RUNTIME_KUBERNETES_KDP_DEVICE_ALLOCATION_POLICY` is set to `CDI`.
304
+ Using internal knowledge to generate the CDI specifications for deployer,
305
+ if the output file conflicts with other tools generating CDI specifications(e.g., NVIDIA Container Toolkit),
306
+ please disable this and remove the output file manually.
307
+ """
277
308
  ## Podman
278
309
  GPUSTACK_RUNTIME_PODMAN_HOST: str | None = None
279
310
  """
@@ -308,6 +339,15 @@ if TYPE_CHECKING:
308
339
  Mute the original healthcheck of the container in Podman.
309
340
  Default is same as `GPUSTACK_RUNTIME_DOCKER_MUTE_ORIGINAL_HEALTHCHECK`.
310
341
  """
342
+ GPUSTACK_RUNTIME_PODMAN_CDI_SPECS_GENERATE: bool = True
343
+ """
344
+ Generate CDI specifications during deployment,
345
+ requires `GPUSTACK_RUNTIME_DEPLOY_CDI_SPECS_DIRECTORY` to be existed.
346
+ Using internal knowledge to generate the CDI specifications for deployer,
347
+ if the output file conflicts with other tools generating CDI specifications(e.g., NVIDIA Container Toolkit),
348
+ please disable this and remove the output file manually.
349
+ Default is same as `GPUSTACK_RUNTIME_DOCKER_CDI_SPECS_GENERATE`.
350
+ """
311
351
 
312
352
  # --8<-- [start:env-vars-definition]
313
353
 
@@ -479,12 +519,6 @@ variables: dict[str, Callable[[], Any]] = {
479
519
  "GPUSTACK_RUNTIME_DEPLOY_LABEL_PREFIX",
480
520
  "runtime.gpustack.ai",
481
521
  ),
482
- "GPUSTACK_RUNTIME_DEPLOY_CDI_SPECS_GENERATE": lambda: to_bool(
483
- getenv(
484
- "GPUSTACK_RUNTIME_DEPLOY_CDI_SPECS_GENERATE",
485
- "1",
486
- ),
487
- ),
488
522
  "GPUSTACK_RUNTIME_DEPLOY_CDI_SPECS_DIRECTORY": lambda: mkdir_path(
489
523
  getenv(
490
524
  "GPUSTACK_RUNTIME_DEPLOY_CDI_SPECS_DIRECTORY",
@@ -608,6 +642,13 @@ variables: dict[str, Callable[[], Any]] = {
608
642
  options=["Env", "CDI"],
609
643
  default="Env",
610
644
  ),
645
+ "GPUSTACK_RUNTIME_DOCKER_CDI_SPECS_GENERATE": lambda: ternary(
646
+ lambda: (
647
+ getenv("GPUSTACK_RUNTIME_DOCKER_RESOURCE_INJECTION_POLICY", "Env") == "Env"
648
+ ),
649
+ lambda: False,
650
+ lambda: to_bool(getenv("GPUSTACK_RUNTIME_DOCKER_CDI_SPECS_GENERATE", "1")),
651
+ ),
611
652
  ## Kubernetes
612
653
  "GPUSTACK_RUNTIME_KUBERNETES_NODE_NAME": lambda: getenv(
613
654
  "GPUSTACK_RUNTIME_KUBERNETES_NODE_NAME",
@@ -640,6 +681,46 @@ variables: dict[str, Callable[[], Any]] = {
640
681
  options=["Foreground", "Background", "Orphan"],
641
682
  default="Foreground",
642
683
  ),
684
+ "GPUSTACK_RUNTIME_KUBERNETES_RESOURCE_INJECTION_POLICY": lambda: choice(
685
+ getenv(
686
+ "GPUSTACK_RUNTIME_KUBERNETES_RESOURCE_INJECTION_POLICY",
687
+ ),
688
+ options=["Env", "KDP"],
689
+ default="Env",
690
+ ),
691
+ "GPUSTACK_RUNTIME_KUBERNETES_KDP_PER_DEVICE_MAX_ALLOCATIONS": lambda: to_int(
692
+ getenv(
693
+ "GPUSTACK_RUNTIME_KUBERNETES_KDP_PER_DEVICE_MAX_ALLOCATIONS",
694
+ "10",
695
+ ),
696
+ ),
697
+ "GPUSTACK_RUNTIME_KUBERNETES_KDP_DEVICE_ALLOCATION_POLICY": lambda: choice(
698
+ getenv(
699
+ "GPUSTACK_RUNTIME_KUBERNETES_KDP_DEVICE_ALLOCATION_POLICY",
700
+ ),
701
+ options=["CDI", "Env", "Opaque"],
702
+ default="CDI",
703
+ ),
704
+ "GPUSTACK_RUNTIME_KUBERNETES_KDP_CDI_SPECS_GENERATE": lambda: ternary(
705
+ lambda: (
706
+ getenv("GPUSTACK_RUNTIME_KUBERNETES_RESOURCE_INJECTION_POLICY", "Env")
707
+ == "Env"
708
+ ),
709
+ lambda: False,
710
+ lambda: ternary(
711
+ lambda: (
712
+ getenv(
713
+ "GPUSTACK_RUNTIME_KUBERNETES_KDP_DEVICE_ALLOCATION_POLICY",
714
+ "Opaque",
715
+ )
716
+ == "Opaque"
717
+ ),
718
+ lambda: False,
719
+ lambda: to_bool(
720
+ getenv("GPUSTACK_RUNTIME_KUBERNETES_KDP_CDI_SPECS_GENERATE", "1"),
721
+ ),
722
+ ),
723
+ ),
643
724
  ## Podman
644
725
  "GPUSTACK_RUNTIME_PODMAN_HOST": lambda: trim_str(
645
726
  getenvs(
@@ -697,8 +778,19 @@ variables: dict[str, Callable[[], Any]] = {
697
778
  "1",
698
779
  ),
699
780
  ),
781
+ "GPUSTACK_RUNTIME_PODMAN_CDI_SPECS_GENERATE": lambda: to_bool(
782
+ getenvs(
783
+ [
784
+ "GPUSTACK_RUNTIME_PODMAN_CDI_SPECS_GENERATE",
785
+ # Fallback to Docker's setting.
786
+ "GPUSTACK_RUNTIME_DOCKER_CDI_SPECS_GENERATE",
787
+ ],
788
+ "1",
789
+ ),
790
+ ),
700
791
  }
701
792
 
793
+
702
794
  # --8<-- [end:env-vars-definition]
703
795
 
704
796
 
@@ -1,5 +1,5 @@
1
1
  # Borrowed from https://github.com/libp2p/py-libp2p/blob/main/libp2p/utils/logging.py.
2
- from __future__ import annotations
2
+ from __future__ import annotations as __future_annotations__
3
3
 
4
4
  import atexit
5
5
  import logging
@@ -133,7 +133,11 @@ def setup_logging():
133
133
  module_logger.propagate = False
134
134
 
135
135
  # Configure 3rd-party loggers, set slightly higher level than package level
136
- for _3rd in ["docker", "kubernetes"]:
136
+ for _3rd in [
137
+ "docker",
138
+ "kubernetes",
139
+ "grpc",
140
+ ]:
137
141
  _3rd_logger = logging.getLogger(_3rd)
138
142
  _3rd_logger.handlers.clear()
139
143
  _3rd_logger.addHandler(queue_handler)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: gpustack-runtime
3
- Version: 0.1.40.post1
3
+ Version: 0.1.41
4
4
  Summary: GPUStack Runtime is library for detecting GPU resources and launching GPU workloads.
5
5
  Project-URL: Homepage, https://github.com/gpustack/runtime
6
6
  Project-URL: Bug Tracker, https://github.com/gpustack/gpustack/issues
@@ -14,14 +14,19 @@ Classifier: Programming Language :: Python :: 3.12
14
14
  Classifier: Programming Language :: Python :: 3.13
15
15
  Requires-Python: >=3.10
16
16
  Requires-Dist: argcomplete>=3.6.3
17
+ Requires-Dist: cachetools>=5.5.2
17
18
  Requires-Dist: docker>=7.1.0
18
19
  Requires-Dist: gpustack-runner>=0.1.24.post1
20
+ Requires-Dist: grpc-interceptor>=0.15.4
21
+ Requires-Dist: grpcio>=1.76.0
19
22
  Requires-Dist: kubernetes>=33.1.0
20
23
  Requires-Dist: mthreads-ml-py>=2.2.10
21
24
  Requires-Dist: nvidia-ml-py>=13.580.65
22
25
  Requires-Dist: podman==5.6.0
26
+ Requires-Dist: protobuf>=5.28.3
23
27
  Requires-Dist: pyyaml
24
28
  Requires-Dist: tqdm
29
+ Requires-Dist: types-protobuf>=6.32.1.20251210
25
30
  Description-Content-Type: text/markdown
26
31
 
27
32
  # GPUStack Runtime
@@ -0,0 +1,67 @@
1
+ gpustack_runtime/__init__.py,sha256=Xw_PVWneitx-8QmW6sJQeymj6zVbEgEndGhIB_km6TI,186
2
+ gpustack_runtime/__main__.py,sha256=O9yJKcN7vg0Ppgc13qesxHwST2wkH3ccOkTQXPWHnNA,3939
3
+ gpustack_runtime/_version.py,sha256=3VyVzUi8Smv4sZH0Q9rdGQXWEqqoTm7L84ArFLnPqZM,777
4
+ gpustack_runtime/_version.pyi,sha256=A42NoSgcqEXVy2OeNm4LXC9CbyonbooYrSUBlPm2lGY,156
5
+ gpustack_runtime/envs.py,sha256=cprrZ-c4owsRLq7M__DlfpK6Ob9zbqMzRGVOsPTMJX0,38583
6
+ gpustack_runtime/logging.py,sha256=wMPriPpOuVsuClsjMh0qwEPQKyJiJa89ggdDjqkk7i0,6934
7
+ gpustack_runtime/cmds/__init__.py,sha256=-_X2O2lBn6KcdLGUzhL3lEjQC4_cwA36fvWDnFAgtVM,1382
8
+ gpustack_runtime/cmds/__types__.py,sha256=TBnUWUqzTkDtJnsMv363kdw-H8fOf-XQYbOvrmQif-M,815
9
+ gpustack_runtime/cmds/deployer.py,sha256=KvhPhU6ZW-UV6vLykI5adKI1ThgVFFJqWaII3n4OhL8,32846
10
+ gpustack_runtime/cmds/detector.py,sha256=AALcoqCiNuwYucKBnyj7r5ScOWc_BSzAhHR2C0QbEHE,8750
11
+ gpustack_runtime/cmds/images.py,sha256=7tb-D3G4yqLPkbS9aSuWI1bD3DYK8BLbPbgqac56blI,594
12
+ gpustack_runtime/deployer/__init__.py,sha256=impMrmvkMjuCBthsn3QUz3LuwpmmNAymHJKJ2o6SZoc,16249
13
+ gpustack_runtime/deployer/__patches__.py,sha256=cTBge8BT6IsY5MzETKY3kN28k3igYfNj7pcpgDzfDzw,17849
14
+ gpustack_runtime/deployer/__types__.py,sha256=PgIWogHOvHKsHoeBjmKFEEM3JrKck89Mmnwlfx01BbE,72248
15
+ gpustack_runtime/deployer/__utils__.py,sha256=paQu2M1UeoSfQPsiskmAqJSiln-8qwibTssEoWFMLec,21109
16
+ gpustack_runtime/deployer/docker.py,sha256=bOaXbTnaalbO42FlyWR1Ha26Y30LGWPzWKPV5Q-Nk7g,85039
17
+ gpustack_runtime/deployer/kuberentes.py,sha256=2M0RgsR1TlzkZ2QsElKZulftUnUMipYQPeQKKSm_Alo,89387
18
+ gpustack_runtime/deployer/podman.py,sha256=_qdbsTezacRmiXa3n04OUPUsgVy1pSFgJSKxous4s14,82156
19
+ gpustack_runtime/deployer/cdi/__init__.py,sha256=y5fFNLhlrVhWzW-GTazT7xXBukHkBQ0DGfncKqKpBzk,3986
20
+ gpustack_runtime/deployer/cdi/__types__.py,sha256=04DKvcogk7OoHS7TU2Bmht3VVMu7iOEBWTEOvxpHt4w,18399
21
+ gpustack_runtime/deployer/cdi/__utils__.py,sha256=cgqub-TQkXEjujlcQC4hJpignmmgxN5Yq9Rn6Z-TAh8,3866
22
+ gpustack_runtime/deployer/cdi/amd.py,sha256=-eq_SOlC56VX2QscZXvnoeffWSRindhr8zFZmaIcKrE,4082
23
+ gpustack_runtime/deployer/cdi/ascend.py,sha256=lDs75a9--c0lM34xfJqu-_QbfWNFrf4zE-GXPKReBe4,4538
24
+ gpustack_runtime/deployer/cdi/hygon.py,sha256=h6-vQfv03sgxYjMJAf_JOMq9cHFPaNjK1YbUYIiSXck,4117
25
+ gpustack_runtime/deployer/cdi/iluvatar.py,sha256=6nNECZpU5IPP6-5l-O1rzU-ib-WcuwKvDg7ZV__1NE4,3650
26
+ gpustack_runtime/deployer/cdi/metax.py,sha256=tmJBvr-n9pERAp-dXsa54qv6xmxt0rJoJwY36TFdoWk,4143
27
+ gpustack_runtime/deployer/cdi/thead.py,sha256=SvIDKNYZx7FwMPTTxyJ2RRjlr9LXLN8BUYCUhidmiQk,3671
28
+ gpustack_runtime/deployer/k8s/deviceplugin/__init__.py,sha256=RTQfArfX3FmS9lXKTwoWnb2gyqTIiDhRHEGe7MPxLzE,8040
29
+ gpustack_runtime/deployer/k8s/deviceplugin/__types__.py,sha256=QaswnGns_hqMdVPeryDqmyhJrAHu_pfAlIC_25NHp4k,3057
30
+ gpustack_runtime/deployer/k8s/deviceplugin/plugin.py,sha256=ktQ44wdNJNnv0Fn-TAnTQN0Jq5ZSqRA09WV407CrPvc,17931
31
+ gpustack_runtime/deployer/k8s/types/kubelet/deviceplugin/v1beta1/__init__.py,sha256=3rOYmgDIIJ4idEtwgnumGStH7PaK-J7EYrOnLa9A-8o,118
32
+ gpustack_runtime/deployer/k8s/types/kubelet/deviceplugin/v1beta1/api.proto,sha256=rmB8RDe4LN5FCVkQ608uS-pl32mk5tt6iGe-g2lKtPs,7919
33
+ gpustack_runtime/deployer/k8s/types/kubelet/deviceplugin/v1beta1/api_pb2.py,sha256=DzmeyfwekwE0U0dK6WfwX1NQYLEr2mFa1hdaUTxt2f8,8404
34
+ gpustack_runtime/deployer/k8s/types/kubelet/deviceplugin/v1beta1/api_pb2.pyi,sha256=lq1dbSgBYqJ7zyGfoKKHCyfr6R5vcCGzJxteeyQpbuI,8232
35
+ gpustack_runtime/deployer/k8s/types/kubelet/deviceplugin/v1beta1/api_pb2_grpc.py,sha256=GM6EyCEFeyOjL0XOCisbcHurRoLKqKDUI5obsUyTxpE,17446
36
+ gpustack_runtime/deployer/k8s/types/kubelet/deviceplugin/v1beta1/constants.py,sha256=tpNk3e_cvY67C9RwVsdTNl75YuNXBgsn53fSJIzeTR4,828
37
+ gpustack_runtime/detector/__init__.py,sha256=9i6KOd3Qp_BmnSyPURlOBHlHJMSInqlDTh1kpAbs4_U,8104
38
+ gpustack_runtime/detector/__types__.py,sha256=tiYbxPD6gV5wS79K3d2dUzy6btJl4QcsgunyxtJ240E,13162
39
+ gpustack_runtime/detector/__utils__.py,sha256=QdLWXwsU1_EMxXG5Y29psqnttWJyXWMphHDjiC_6Byc,25153
40
+ gpustack_runtime/detector/amd.py,sha256=qh86xGhPJRIXwiKaHmeyIrsxchUDRpyggR6yc0cLuKw,17553
41
+ gpustack_runtime/detector/ascend.py,sha256=E6YPoREI5r2HZIegUaQb0XwC3Qau1mnkNeCRbgtlE5k,17992
42
+ gpustack_runtime/detector/cambricon.py,sha256=GzXlS4et8zape0rr19d1EwSV9cfFEmHgaElTVMjR3IY,3947
43
+ gpustack_runtime/detector/hygon.py,sha256=R4I8h66YHJC00iAtDJhBX772VMKUdZ8nleRXZFPUt3Q,12299
44
+ gpustack_runtime/detector/iluvatar.py,sha256=bqciqjYE_tIxBg2PSOlIzH3WcFYBgTDjfN6nT90LSGg,10206
45
+ gpustack_runtime/detector/metax.py,sha256=W4NSZD7Kf5431B63UBpYnKIk2Jz1SutEpwXkwgYfmfE,10374
46
+ gpustack_runtime/detector/mthreads.py,sha256=GcElUIMvU5C6P4Fx_X_kSOsJps8WZ47tkZ2B2MJZmk4,12131
47
+ gpustack_runtime/detector/nvidia.py,sha256=xdvoMdNx943on5fd_mI3WI_85kMY-0dYm5NU3vqXb9M,33400
48
+ gpustack_runtime/detector/thead.py,sha256=43TGPq78FulpYBUeEMVxDTY-0X3ve2FsX4Hsd0Lswy0,25561
49
+ gpustack_runtime/detector/pyacl/__init__.py,sha256=UQjaBxP7nJNyzr08N8_lH-5wPtnFmUY9pyQhs6vIChU,16232
50
+ gpustack_runtime/detector/pyamdgpu/__init__.py,sha256=x-UO07EpKEgfTLmXQOD6j9f6kibuvDC7riQFof3YGdw,8617
51
+ gpustack_runtime/detector/pyamdsmi/__init__.py,sha256=800-khq2w6HLgXM12RkhcdvXBGeAJ4s1_TWJyHebCMk,955
52
+ gpustack_runtime/detector/pycuda/__init__.py,sha256=p-Na5eBjdsFGXPHCEEUfFj5TQFjPxPQGV0i4oU6ywBA,16922
53
+ gpustack_runtime/detector/pydcmi/__init__.py,sha256=zCeImelWgbPDn51dJgp3LlCK-uGOuHmmPGiOsyYB3cI,37264
54
+ gpustack_runtime/detector/pyhgml/__init__.py,sha256=Yp9s-QhHS4ck7Iq9kd4v6a4BruyJQcuOTYlyYSmnOgI,182896
55
+ gpustack_runtime/detector/pyhgml/libhgml.so,sha256=BPzGVBpzrMX1tSvbXddq8Q0Qhi8w-No2JXX8sRxTioI,2101640
56
+ gpustack_runtime/detector/pyhgml/libuki.so,sha256=EE6v1vIYYT4FSDMMm9rSfAqwrwIPFD-4_6KtP51lSps,702352
57
+ gpustack_runtime/detector/pyhsa/__init__.py,sha256=4DuGnBBMUVOCPa6vTx3XT5mffGrKk6M6CYbUWBoMTJ0,15792
58
+ gpustack_runtime/detector/pyixml/__init__.py,sha256=6ss_Dyl8lIT4WrKpfwmQqzBmg4Bxi38vg_eey_wsSY0,162681
59
+ gpustack_runtime/detector/pymxsml/__init__.py,sha256=YxfNHq7TWd7CpNroP45BGXhcWNpY_sXgVzNGtx68DII,45409
60
+ gpustack_runtime/detector/pyrocmcore/__init__.py,sha256=rgwIdPS-7GG7_5luRMR1XG9QyNM3lJh5ryD7kfZqpWg,2523
61
+ gpustack_runtime/detector/pyrocmsmi/__init__.py,sha256=ACwRtJWVIuJ4NTcBJxk0zrVb_qtDOMkApMdbJoag5g0,11906
62
+ gpustack_runtime/_version_appendix.py,sha256=C8KJWDWnWAHNEMYzKYXyeA2PTIO31o7aTAhQlRkmhUA,23
63
+ gpustack_runtime-0.1.41.dist-info/METADATA,sha256=RDItHofAkys1Iqn6uwI03D6nqq-QrrMOcaJ43Iu2XH4,2358
64
+ gpustack_runtime-0.1.41.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
65
+ gpustack_runtime-0.1.41.dist-info/entry_points.txt,sha256=bBO_61GxP6dIT74uZwbSDgW5Vt2pTePUS3CgjUJkUgg,68
66
+ gpustack_runtime-0.1.41.dist-info/licenses/LICENSE,sha256=OiPibowBvB-NHV3TP_NOj18XNBlXcshXZFMpa3uvKVE,10362
67
+ gpustack_runtime-0.1.41.dist-info/RECORD,,