gpustack-runtime 0.1.40.post1__py3-none-any.whl → 0.1.41__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gpustack_runtime/__init__.py +1 -1
- gpustack_runtime/__main__.py +5 -3
- gpustack_runtime/_version.py +2 -2
- gpustack_runtime/_version_appendix.py +1 -1
- gpustack_runtime/cmds/__init__.py +5 -3
- gpustack_runtime/cmds/__types__.py +1 -1
- gpustack_runtime/cmds/deployer.py +140 -18
- gpustack_runtime/cmds/detector.py +1 -1
- gpustack_runtime/cmds/images.py +1 -1
- gpustack_runtime/deployer/__init__.py +28 -2
- gpustack_runtime/deployer/__patches__.py +1 -1
- gpustack_runtime/deployer/__types__.py +2 -1
- gpustack_runtime/deployer/__utils__.py +2 -2
- gpustack_runtime/deployer/cdi/__init__.py +85 -5
- gpustack_runtime/deployer/cdi/__types__.py +92 -29
- gpustack_runtime/deployer/cdi/__utils__.py +178 -0
- gpustack_runtime/deployer/cdi/amd.py +146 -0
- gpustack_runtime/deployer/cdi/ascend.py +164 -0
- gpustack_runtime/deployer/cdi/hygon.py +147 -0
- gpustack_runtime/deployer/cdi/iluvatar.py +136 -0
- gpustack_runtime/deployer/cdi/metax.py +148 -0
- gpustack_runtime/deployer/cdi/thead.py +57 -23
- gpustack_runtime/deployer/docker.py +9 -8
- gpustack_runtime/deployer/k8s/deviceplugin/__init__.py +240 -0
- gpustack_runtime/deployer/k8s/deviceplugin/__types__.py +131 -0
- gpustack_runtime/deployer/k8s/deviceplugin/plugin.py +586 -0
- gpustack_runtime/deployer/k8s/types/kubelet/deviceplugin/v1beta1/__init__.py +3 -0
- gpustack_runtime/deployer/k8s/types/kubelet/deviceplugin/v1beta1/api.proto +212 -0
- gpustack_runtime/deployer/k8s/types/kubelet/deviceplugin/v1beta1/api_pb2.py +86 -0
- gpustack_runtime/deployer/k8s/types/kubelet/deviceplugin/v1beta1/api_pb2.pyi +168 -0
- gpustack_runtime/deployer/k8s/types/kubelet/deviceplugin/v1beta1/api_pb2_grpc.py +358 -0
- gpustack_runtime/deployer/k8s/types/kubelet/deviceplugin/v1beta1/constants.py +34 -0
- gpustack_runtime/deployer/kuberentes.py +37 -4
- gpustack_runtime/deployer/podman.py +9 -8
- gpustack_runtime/detector/__init__.py +42 -5
- gpustack_runtime/detector/__types__.py +8 -24
- gpustack_runtime/detector/__utils__.py +46 -39
- gpustack_runtime/detector/amd.py +55 -66
- gpustack_runtime/detector/ascend.py +29 -41
- gpustack_runtime/detector/cambricon.py +3 -3
- gpustack_runtime/detector/hygon.py +21 -49
- gpustack_runtime/detector/iluvatar.py +44 -60
- gpustack_runtime/detector/metax.py +54 -37
- gpustack_runtime/detector/mthreads.py +74 -36
- gpustack_runtime/detector/nvidia.py +130 -93
- gpustack_runtime/detector/pyacl/__init__.py +1 -1
- gpustack_runtime/detector/pyamdgpu/__init__.py +1 -1
- gpustack_runtime/detector/pyamdsmi/__init__.py +1 -1
- gpustack_runtime/detector/pycuda/__init__.py +1 -1
- gpustack_runtime/detector/pydcmi/__init__.py +1 -1
- gpustack_runtime/detector/pyhsa/__init__.py +1 -1
- gpustack_runtime/detector/pymxsml/__init__.py +1553 -1
- gpustack_runtime/detector/pyrocmcore/__init__.py +1 -1
- gpustack_runtime/detector/pyrocmsmi/__init__.py +1 -1
- gpustack_runtime/detector/thead.py +41 -60
- gpustack_runtime/envs.py +104 -12
- gpustack_runtime/logging.py +6 -2
- {gpustack_runtime-0.1.40.post1.dist-info → gpustack_runtime-0.1.41.dist-info}/METADATA +6 -1
- gpustack_runtime-0.1.41.dist-info/RECORD +67 -0
- gpustack_runtime/detector/pymxsml/mxsml.py +0 -1580
- gpustack_runtime/detector/pymxsml/mxsml_extension.py +0 -816
- gpustack_runtime/detector/pymxsml/mxsml_mcm.py +0 -476
- gpustack_runtime-0.1.40.post1.dist-info/RECORD +0 -55
- {gpustack_runtime-0.1.40.post1.dist-info → gpustack_runtime-0.1.41.dist-info}/WHEEL +0 -0
- {gpustack_runtime-0.1.40.post1.dist-info → gpustack_runtime-0.1.41.dist-info}/entry_points.txt +0 -0
- {gpustack_runtime-0.1.40.post1.dist-info → gpustack_runtime-0.1.41.dist-info}/licenses/LICENSE +0 -0
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
# https://github.com/ROCm/rocm_smi_lib/blob/rocm-6.2.4/python_smi_tools/rsmiBindings.py,
|
|
4
4
|
# https://rocm.docs.amd.com/projects/rocm_smi_lib/en/latest/doxygen/html/rocm__smi_8h_source.html,
|
|
5
5
|
# https://rocm.docs.amd.com/projects/rocm_smi_lib/en/latest/doxygen/html/rocm__smi_8h.html.
|
|
6
|
-
from __future__ import annotations
|
|
6
|
+
from __future__ import annotations as __future_annotations__
|
|
7
7
|
|
|
8
8
|
import os
|
|
9
9
|
import sys
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
1
|
+
from __future__ import annotations as __future_annotations__
|
|
2
2
|
|
|
3
3
|
import contextlib
|
|
4
4
|
import logging
|
|
@@ -22,7 +22,6 @@ from .__utils__ import (
|
|
|
22
22
|
bitmask_to_str,
|
|
23
23
|
byte_to_mebibyte,
|
|
24
24
|
get_brief_version,
|
|
25
|
-
get_device_files,
|
|
26
25
|
get_numa_node_by_bdf,
|
|
27
26
|
get_numa_nodeset_size,
|
|
28
27
|
get_pci_devices,
|
|
@@ -40,7 +39,7 @@ class THeadDetector(Detector):
|
|
|
40
39
|
"""
|
|
41
40
|
|
|
42
41
|
@staticmethod
|
|
43
|
-
@lru_cache
|
|
42
|
+
@lru_cache(maxsize=1)
|
|
44
43
|
def is_supported() -> bool:
|
|
45
44
|
"""
|
|
46
45
|
Check if the T-Head detector is supported.
|
|
@@ -69,7 +68,7 @@ class THeadDetector(Detector):
|
|
|
69
68
|
return supported
|
|
70
69
|
|
|
71
70
|
@staticmethod
|
|
72
|
-
@lru_cache
|
|
71
|
+
@lru_cache(maxsize=1)
|
|
73
72
|
def detect_pci_devices() -> dict[str, PCIDevice]:
|
|
74
73
|
# See https://pcisig.com/membership/member-companies?combine=Alibaba.
|
|
75
74
|
pci_devs = get_pci_devices(vendor="0x1ded")
|
|
@@ -121,17 +120,23 @@ class THeadDetector(Detector):
|
|
|
121
120
|
)
|
|
122
121
|
|
|
123
122
|
dev_count = pyhgml.hgmlDeviceGetCount()
|
|
124
|
-
dev_files = None
|
|
125
123
|
for dev_idx in range(dev_count):
|
|
126
124
|
dev = pyhgml.hgmlDeviceGetHandleByIndex(dev_idx)
|
|
127
125
|
|
|
128
126
|
dev_cc_t = pyhgml.hgmlDeviceGetHggcComputeCapability(dev)
|
|
129
127
|
dev_cc = ".".join(map(str, dev_cc_t))
|
|
130
128
|
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
129
|
+
dev_pci_info = pyhgml.hgmlDeviceGetPciInfo(dev)
|
|
130
|
+
dev_bdf = str(dev_pci_info.busIdLegacy).lower()
|
|
131
|
+
|
|
132
|
+
dev_numa = get_numa_node_by_bdf(dev_bdf)
|
|
133
|
+
if not dev_numa:
|
|
134
|
+
dev_node_affinity = pyhgml.hgmlDeviceGetMemoryAffinity(
|
|
135
|
+
dev,
|
|
136
|
+
get_numa_nodeset_size(),
|
|
137
|
+
pyhgml.HGML_AFFINITY_SCOPE_NODE,
|
|
138
|
+
)
|
|
139
|
+
dev_numa = bitmask_to_str(list(dev_node_affinity))
|
|
135
140
|
|
|
136
141
|
dev_mig_mode = pyhgml.HGML_DEVICE_MIG_DISABLE
|
|
137
142
|
with contextlib.suppress(pyhgml.HGMLError):
|
|
@@ -142,14 +147,7 @@ class THeadDetector(Detector):
|
|
|
142
147
|
if dev_mig_mode == pyhgml.HGML_DEVICE_MIG_DISABLE:
|
|
143
148
|
dev_index = dev_idx
|
|
144
149
|
if envs.GPUSTACK_RUNTIME_DETECT_PHYSICAL_INDEX_PRIORITY:
|
|
145
|
-
|
|
146
|
-
dev_files = get_device_files(
|
|
147
|
-
pattern=r"alixpu_ppu(?P<number>\d+)",
|
|
148
|
-
)
|
|
149
|
-
if len(dev_files) >= dev_count:
|
|
150
|
-
dev_file = dev_files[dev_idx]
|
|
151
|
-
if dev_file.number is not None:
|
|
152
|
-
dev_index = dev_file.number
|
|
150
|
+
dev_index = pyhgml.hgmlDeviceGetMinorNumber(dev)
|
|
153
151
|
|
|
154
152
|
dev_name = pyhgml.hgmlDeviceGetName(dev)
|
|
155
153
|
|
|
@@ -204,12 +202,9 @@ class THeadDetector(Detector):
|
|
|
204
202
|
|
|
205
203
|
dev_appendix = {
|
|
206
204
|
"vgpu": dev_is_vgpu,
|
|
205
|
+
"bdf": dev_bdf,
|
|
206
|
+
"numa": dev_numa,
|
|
207
207
|
}
|
|
208
|
-
if dev_bdf:
|
|
209
|
-
dev_appendix["bdf"] = dev_bdf
|
|
210
|
-
|
|
211
|
-
if dev_links_state := _get_links_state(dev):
|
|
212
|
-
dev_appendix.update(dev_links_state)
|
|
213
208
|
|
|
214
209
|
ret.append(
|
|
215
210
|
Device(
|
|
@@ -273,9 +268,9 @@ class THeadDetector(Detector):
|
|
|
273
268
|
|
|
274
269
|
mdev_appendix = {
|
|
275
270
|
"vgpu": True,
|
|
271
|
+
"bdf": dev_bdf,
|
|
272
|
+
"numa": dev_numa,
|
|
276
273
|
}
|
|
277
|
-
if dev_bdf:
|
|
278
|
-
mdev_appendix["bdf"] = dev_bdf
|
|
279
274
|
|
|
280
275
|
mdev_gi_id = pyhgml.hgmlDeviceGetGpuInstanceId(mdev)
|
|
281
276
|
mdev_appendix["gpu_instance_id"] = mdev_gi_id
|
|
@@ -403,36 +398,24 @@ class THeadDetector(Detector):
|
|
|
403
398
|
for i, dev_i in enumerate(devices):
|
|
404
399
|
dev_i_handle = pyhgml.hgmlDeviceGetHandleByUUID(dev_i.uuid)
|
|
405
400
|
|
|
406
|
-
# Get
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
#
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
list(dev_i_memset),
|
|
425
|
-
)
|
|
426
|
-
except pyhgml.HGMLError:
|
|
427
|
-
debug_log_exception(
|
|
428
|
-
logger,
|
|
429
|
-
"Failed to get NUMA affinity for device %d",
|
|
430
|
-
dev_i.index,
|
|
431
|
-
)
|
|
432
|
-
# Get CPU affinity.
|
|
433
|
-
ret.devices_cpu_affinities[i] = map_numa_node_to_cpu_affinity(
|
|
434
|
-
ret.devices_numa_affinities[i],
|
|
435
|
-
)
|
|
401
|
+
# Get NUMA and CPU affinities.
|
|
402
|
+
ret.devices_numa_affinities[i] = dev_i.appendix.get("numa", "")
|
|
403
|
+
ret.devices_cpu_affinities[i] = map_numa_node_to_cpu_affinity(
|
|
404
|
+
ret.devices_numa_affinities[i],
|
|
405
|
+
)
|
|
406
|
+
|
|
407
|
+
# Get links state if applicable.
|
|
408
|
+
if dev_i_links_state := _get_links_state(dev_i_handle):
|
|
409
|
+
ret.appendices[i].update(dev_i_links_state)
|
|
410
|
+
# In practice, if a card has an active *Link,
|
|
411
|
+
# then other cards in the same machine should be interconnected with it through the *Link.
|
|
412
|
+
if dev_i_links_state.get("links_active_count", 0) > 0:
|
|
413
|
+
for j, dev_j in enumerate(devices):
|
|
414
|
+
if dev_i.index == dev_j.index:
|
|
415
|
+
continue
|
|
416
|
+
ret.devices_distances[i][j] = TopologyDistanceEnum.LINK
|
|
417
|
+
ret.devices_distances[j][i] = TopologyDistanceEnum.LINK
|
|
418
|
+
continue
|
|
436
419
|
|
|
437
420
|
# Get distances to other devices.
|
|
438
421
|
for j, dev_j in enumerate(devices):
|
|
@@ -447,8 +430,6 @@ class THeadDetector(Detector):
|
|
|
447
430
|
dev_i_handle,
|
|
448
431
|
dev_j_handle,
|
|
449
432
|
)
|
|
450
|
-
if dev_i.appendix.get("links_state", 0) > 0:
|
|
451
|
-
distance = TopologyDistanceEnum.LINK
|
|
452
433
|
except pyhgml.HGMLError:
|
|
453
434
|
debug_log_exception(
|
|
454
435
|
logger,
|
|
@@ -459,9 +440,6 @@ class THeadDetector(Detector):
|
|
|
459
440
|
|
|
460
441
|
ret.devices_distances[i][j] = distance
|
|
461
442
|
ret.devices_distances[j][i] = distance
|
|
462
|
-
except pyhgml.HGMLError:
|
|
463
|
-
debug_log_exception(logger, "Failed to fetch topology")
|
|
464
|
-
raise
|
|
465
443
|
except Exception:
|
|
466
444
|
debug_log_exception(logger, "Failed to process topology fetching")
|
|
467
445
|
raise
|
|
@@ -621,17 +599,20 @@ def _get_links_state(
|
|
|
621
599
|
return None
|
|
622
600
|
|
|
623
601
|
dev_links_state = 0
|
|
602
|
+
dev_links_active_count = 0
|
|
624
603
|
try:
|
|
625
604
|
for link_idx in range(int(dev_links_count)):
|
|
626
605
|
dev_link_state = pyhgml.hgmlDeviceGetIcnLinkState(dev, link_idx)
|
|
627
606
|
if dev_link_state:
|
|
628
|
-
dev_links_state |= 1 <<
|
|
607
|
+
dev_links_state |= 1 << link_idx
|
|
608
|
+
dev_links_active_count += 1
|
|
629
609
|
except pyhgml.HGMLError:
|
|
630
610
|
debug_log_warning(logger, "Failed to get ICNLink link state")
|
|
631
611
|
|
|
632
612
|
return {
|
|
633
613
|
"links_count": dev_links_count,
|
|
634
614
|
"links_state": dev_links_state,
|
|
615
|
+
"links_active_count": dev_links_active_count,
|
|
635
616
|
}
|
|
636
617
|
|
|
637
618
|
|
gpustack_runtime/envs.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
1
|
+
from __future__ import annotations as __future_annotations__
|
|
2
2
|
|
|
3
3
|
import contextlib
|
|
4
4
|
import os
|
|
@@ -147,11 +147,6 @@ if TYPE_CHECKING:
|
|
|
147
147
|
"""
|
|
148
148
|
Label prefix for the deployer.
|
|
149
149
|
"""
|
|
150
|
-
GPUSTACK_RUNTIME_DEPLOY_CDI_SPECS_GENERATE: bool = True
|
|
151
|
-
"""
|
|
152
|
-
During deployment, enable automatic generation of Container Device Interface (CDI) specifications
|
|
153
|
-
for detected devices.
|
|
154
|
-
"""
|
|
155
150
|
GPUSTACK_RUNTIME_DEPLOY_CDI_SPECS_DIRECTORY: Path | None = None
|
|
156
151
|
"""
|
|
157
152
|
During deployment, path of directory containing Container Device Interface (CDI) specifications,
|
|
@@ -248,6 +243,15 @@ if TYPE_CHECKING:
|
|
|
248
243
|
`Env`: Injects resources using standard environment variable, based on `GPUSTACK_RUNTIME_DEPLOY_RESOURCE_KEY_MAP_RUNTIME_VISIBLE_DEVICES`.
|
|
249
244
|
`CDI`: Injects resources using CDI, based on `GPUSTACK_RUNTIME_DEPLOY_RESOURCE_KEY_MAP_CDI`.
|
|
250
245
|
"""
|
|
246
|
+
GPUSTACK_RUNTIME_DOCKER_CDI_SPECS_GENERATE: bool = True
|
|
247
|
+
"""
|
|
248
|
+
Generate CDI specifications during deployment when using CDI resource injection policy,
|
|
249
|
+
requires `GPUSTACK_RUNTIME_DEPLOY_CDI_SPECS_DIRECTORY` to be existed.
|
|
250
|
+
Works only when `GPUSTACK_RUNTIME_DOCKER_RESOURCE_INJECTION_POLICY` is set to `CDI`.
|
|
251
|
+
Using internal knowledge to generate the CDI specifications for deployer,
|
|
252
|
+
if the output file conflicts with other tools generating CDI specifications(e.g., NVIDIA Container Toolkit),
|
|
253
|
+
please disable this and remove the output file manually.
|
|
254
|
+
"""
|
|
251
255
|
## Kubernetes
|
|
252
256
|
GPUSTACK_RUNTIME_KUBERNETES_NODE_NAME: str | None = None
|
|
253
257
|
"""
|
|
@@ -274,6 +278,33 @@ if TYPE_CHECKING:
|
|
|
274
278
|
"""
|
|
275
279
|
Deletion propagation policy for Kubernetes resources (e.g., Foreground, Background, Orphan).
|
|
276
280
|
"""
|
|
281
|
+
GPUSTACK_RUNTIME_KUBERNETES_RESOURCE_INJECTION_POLICY: str | None = None
|
|
282
|
+
"""
|
|
283
|
+
Resource injection policy for the Kubernetes deployer (e.g., Env, KDP).
|
|
284
|
+
`Env`: Injects resources using standard environment variable, depends on underlying Container Toolkit, based on `GPUSTACK_RUNTIME_DEPLOY_RESOURCE_KEY_MAP_RUNTIME_VISIBLE_DEVICES`.
|
|
285
|
+
`KDP`: Injects resources using Kubernetes Device Plugin, based on `GPUSTACK_RUNTIME_DEPLOY_RESOURCE_KEY_MAP_CDI`.
|
|
286
|
+
"""
|
|
287
|
+
GPUSTACK_RUNTIME_KUBERNETES_KDP_PER_DEVICE_MAX_ALLOCATIONS: int | None = None
|
|
288
|
+
"""
|
|
289
|
+
Maximum allocations for one device in Kubernetes Device Plugin.
|
|
290
|
+
If not set, it should be 10.
|
|
291
|
+
"""
|
|
292
|
+
GPUSTACK_RUNTIME_KUBERNETES_KDP_DEVICE_ALLOCATION_POLICY: str | None = None
|
|
293
|
+
"""
|
|
294
|
+
Device allocation policy for the Kubernetes Device Plugin (e.g., CDI, Env, Opaque).
|
|
295
|
+
`CDI`: Allocates devices using generated CDI specifications, making it easy to debug and troubleshoot; requires `GPUSTACK_RUNTIME_DEPLOY_CDI_SPECS_DIRECTORY` to exist.
|
|
296
|
+
`Env`: Allocates devices using runtime-visible environment variables; requires Container Toolkit support.
|
|
297
|
+
`Opaque`: Uses internal logic for allocation, which is convenient for deployment but difficult to troubleshoot.
|
|
298
|
+
"""
|
|
299
|
+
GPUSTACK_RUNTIME_KUBERNETES_KDP_CDI_SPECS_GENERATE: bool = True
|
|
300
|
+
"""
|
|
301
|
+
Generate CDI specifications during deployment,
|
|
302
|
+
requires `GPUSTACK_RUNTIME_DEPLOY_CDI_SPECS_DIRECTORY` to be existed.
|
|
303
|
+
Works only when `GPUSTACK_RUNTIME_KUBERNETES_KDP_DEVICE_ALLOCATION_POLICY` is set to `CDI`.
|
|
304
|
+
Using internal knowledge to generate the CDI specifications for deployer,
|
|
305
|
+
if the output file conflicts with other tools generating CDI specifications(e.g., NVIDIA Container Toolkit),
|
|
306
|
+
please disable this and remove the output file manually.
|
|
307
|
+
"""
|
|
277
308
|
## Podman
|
|
278
309
|
GPUSTACK_RUNTIME_PODMAN_HOST: str | None = None
|
|
279
310
|
"""
|
|
@@ -308,6 +339,15 @@ if TYPE_CHECKING:
|
|
|
308
339
|
Mute the original healthcheck of the container in Podman.
|
|
309
340
|
Default is same as `GPUSTACK_RUNTIME_DOCKER_MUTE_ORIGINAL_HEALTHCHECK`.
|
|
310
341
|
"""
|
|
342
|
+
GPUSTACK_RUNTIME_PODMAN_CDI_SPECS_GENERATE: bool = True
|
|
343
|
+
"""
|
|
344
|
+
Generate CDI specifications during deployment,
|
|
345
|
+
requires `GPUSTACK_RUNTIME_DEPLOY_CDI_SPECS_DIRECTORY` to be existed.
|
|
346
|
+
Using internal knowledge to generate the CDI specifications for deployer,
|
|
347
|
+
if the output file conflicts with other tools generating CDI specifications(e.g., NVIDIA Container Toolkit),
|
|
348
|
+
please disable this and remove the output file manually.
|
|
349
|
+
Default is same as `GPUSTACK_RUNTIME_DOCKER_CDI_SPECS_GENERATE`.
|
|
350
|
+
"""
|
|
311
351
|
|
|
312
352
|
# --8<-- [start:env-vars-definition]
|
|
313
353
|
|
|
@@ -479,12 +519,6 @@ variables: dict[str, Callable[[], Any]] = {
|
|
|
479
519
|
"GPUSTACK_RUNTIME_DEPLOY_LABEL_PREFIX",
|
|
480
520
|
"runtime.gpustack.ai",
|
|
481
521
|
),
|
|
482
|
-
"GPUSTACK_RUNTIME_DEPLOY_CDI_SPECS_GENERATE": lambda: to_bool(
|
|
483
|
-
getenv(
|
|
484
|
-
"GPUSTACK_RUNTIME_DEPLOY_CDI_SPECS_GENERATE",
|
|
485
|
-
"1",
|
|
486
|
-
),
|
|
487
|
-
),
|
|
488
522
|
"GPUSTACK_RUNTIME_DEPLOY_CDI_SPECS_DIRECTORY": lambda: mkdir_path(
|
|
489
523
|
getenv(
|
|
490
524
|
"GPUSTACK_RUNTIME_DEPLOY_CDI_SPECS_DIRECTORY",
|
|
@@ -608,6 +642,13 @@ variables: dict[str, Callable[[], Any]] = {
|
|
|
608
642
|
options=["Env", "CDI"],
|
|
609
643
|
default="Env",
|
|
610
644
|
),
|
|
645
|
+
"GPUSTACK_RUNTIME_DOCKER_CDI_SPECS_GENERATE": lambda: ternary(
|
|
646
|
+
lambda: (
|
|
647
|
+
getenv("GPUSTACK_RUNTIME_DOCKER_RESOURCE_INJECTION_POLICY", "Env") == "Env"
|
|
648
|
+
),
|
|
649
|
+
lambda: False,
|
|
650
|
+
lambda: to_bool(getenv("GPUSTACK_RUNTIME_DOCKER_CDI_SPECS_GENERATE", "1")),
|
|
651
|
+
),
|
|
611
652
|
## Kubernetes
|
|
612
653
|
"GPUSTACK_RUNTIME_KUBERNETES_NODE_NAME": lambda: getenv(
|
|
613
654
|
"GPUSTACK_RUNTIME_KUBERNETES_NODE_NAME",
|
|
@@ -640,6 +681,46 @@ variables: dict[str, Callable[[], Any]] = {
|
|
|
640
681
|
options=["Foreground", "Background", "Orphan"],
|
|
641
682
|
default="Foreground",
|
|
642
683
|
),
|
|
684
|
+
"GPUSTACK_RUNTIME_KUBERNETES_RESOURCE_INJECTION_POLICY": lambda: choice(
|
|
685
|
+
getenv(
|
|
686
|
+
"GPUSTACK_RUNTIME_KUBERNETES_RESOURCE_INJECTION_POLICY",
|
|
687
|
+
),
|
|
688
|
+
options=["Env", "KDP"],
|
|
689
|
+
default="Env",
|
|
690
|
+
),
|
|
691
|
+
"GPUSTACK_RUNTIME_KUBERNETES_KDP_PER_DEVICE_MAX_ALLOCATIONS": lambda: to_int(
|
|
692
|
+
getenv(
|
|
693
|
+
"GPUSTACK_RUNTIME_KUBERNETES_KDP_PER_DEVICE_MAX_ALLOCATIONS",
|
|
694
|
+
"10",
|
|
695
|
+
),
|
|
696
|
+
),
|
|
697
|
+
"GPUSTACK_RUNTIME_KUBERNETES_KDP_DEVICE_ALLOCATION_POLICY": lambda: choice(
|
|
698
|
+
getenv(
|
|
699
|
+
"GPUSTACK_RUNTIME_KUBERNETES_KDP_DEVICE_ALLOCATION_POLICY",
|
|
700
|
+
),
|
|
701
|
+
options=["CDI", "Env", "Opaque"],
|
|
702
|
+
default="CDI",
|
|
703
|
+
),
|
|
704
|
+
"GPUSTACK_RUNTIME_KUBERNETES_KDP_CDI_SPECS_GENERATE": lambda: ternary(
|
|
705
|
+
lambda: (
|
|
706
|
+
getenv("GPUSTACK_RUNTIME_KUBERNETES_RESOURCE_INJECTION_POLICY", "Env")
|
|
707
|
+
== "Env"
|
|
708
|
+
),
|
|
709
|
+
lambda: False,
|
|
710
|
+
lambda: ternary(
|
|
711
|
+
lambda: (
|
|
712
|
+
getenv(
|
|
713
|
+
"GPUSTACK_RUNTIME_KUBERNETES_KDP_DEVICE_ALLOCATION_POLICY",
|
|
714
|
+
"Opaque",
|
|
715
|
+
)
|
|
716
|
+
== "Opaque"
|
|
717
|
+
),
|
|
718
|
+
lambda: False,
|
|
719
|
+
lambda: to_bool(
|
|
720
|
+
getenv("GPUSTACK_RUNTIME_KUBERNETES_KDP_CDI_SPECS_GENERATE", "1"),
|
|
721
|
+
),
|
|
722
|
+
),
|
|
723
|
+
),
|
|
643
724
|
## Podman
|
|
644
725
|
"GPUSTACK_RUNTIME_PODMAN_HOST": lambda: trim_str(
|
|
645
726
|
getenvs(
|
|
@@ -697,8 +778,19 @@ variables: dict[str, Callable[[], Any]] = {
|
|
|
697
778
|
"1",
|
|
698
779
|
),
|
|
699
780
|
),
|
|
781
|
+
"GPUSTACK_RUNTIME_PODMAN_CDI_SPECS_GENERATE": lambda: to_bool(
|
|
782
|
+
getenvs(
|
|
783
|
+
[
|
|
784
|
+
"GPUSTACK_RUNTIME_PODMAN_CDI_SPECS_GENERATE",
|
|
785
|
+
# Fallback to Docker's setting.
|
|
786
|
+
"GPUSTACK_RUNTIME_DOCKER_CDI_SPECS_GENERATE",
|
|
787
|
+
],
|
|
788
|
+
"1",
|
|
789
|
+
),
|
|
790
|
+
),
|
|
700
791
|
}
|
|
701
792
|
|
|
793
|
+
|
|
702
794
|
# --8<-- [end:env-vars-definition]
|
|
703
795
|
|
|
704
796
|
|
gpustack_runtime/logging.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
# Borrowed from https://github.com/libp2p/py-libp2p/blob/main/libp2p/utils/logging.py.
|
|
2
|
-
from __future__ import annotations
|
|
2
|
+
from __future__ import annotations as __future_annotations__
|
|
3
3
|
|
|
4
4
|
import atexit
|
|
5
5
|
import logging
|
|
@@ -133,7 +133,11 @@ def setup_logging():
|
|
|
133
133
|
module_logger.propagate = False
|
|
134
134
|
|
|
135
135
|
# Configure 3rd-party loggers, set slightly higher level than package level
|
|
136
|
-
for _3rd in [
|
|
136
|
+
for _3rd in [
|
|
137
|
+
"docker",
|
|
138
|
+
"kubernetes",
|
|
139
|
+
"grpc",
|
|
140
|
+
]:
|
|
137
141
|
_3rd_logger = logging.getLogger(_3rd)
|
|
138
142
|
_3rd_logger.handlers.clear()
|
|
139
143
|
_3rd_logger.addHandler(queue_handler)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: gpustack-runtime
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.41
|
|
4
4
|
Summary: GPUStack Runtime is library for detecting GPU resources and launching GPU workloads.
|
|
5
5
|
Project-URL: Homepage, https://github.com/gpustack/runtime
|
|
6
6
|
Project-URL: Bug Tracker, https://github.com/gpustack/gpustack/issues
|
|
@@ -14,14 +14,19 @@ Classifier: Programming Language :: Python :: 3.12
|
|
|
14
14
|
Classifier: Programming Language :: Python :: 3.13
|
|
15
15
|
Requires-Python: >=3.10
|
|
16
16
|
Requires-Dist: argcomplete>=3.6.3
|
|
17
|
+
Requires-Dist: cachetools>=5.5.2
|
|
17
18
|
Requires-Dist: docker>=7.1.0
|
|
18
19
|
Requires-Dist: gpustack-runner>=0.1.24.post1
|
|
20
|
+
Requires-Dist: grpc-interceptor>=0.15.4
|
|
21
|
+
Requires-Dist: grpcio>=1.76.0
|
|
19
22
|
Requires-Dist: kubernetes>=33.1.0
|
|
20
23
|
Requires-Dist: mthreads-ml-py>=2.2.10
|
|
21
24
|
Requires-Dist: nvidia-ml-py>=13.580.65
|
|
22
25
|
Requires-Dist: podman==5.6.0
|
|
26
|
+
Requires-Dist: protobuf>=5.28.3
|
|
23
27
|
Requires-Dist: pyyaml
|
|
24
28
|
Requires-Dist: tqdm
|
|
29
|
+
Requires-Dist: types-protobuf>=6.32.1.20251210
|
|
25
30
|
Description-Content-Type: text/markdown
|
|
26
31
|
|
|
27
32
|
# GPUStack Runtime
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
gpustack_runtime/__init__.py,sha256=Xw_PVWneitx-8QmW6sJQeymj6zVbEgEndGhIB_km6TI,186
|
|
2
|
+
gpustack_runtime/__main__.py,sha256=O9yJKcN7vg0Ppgc13qesxHwST2wkH3ccOkTQXPWHnNA,3939
|
|
3
|
+
gpustack_runtime/_version.py,sha256=3VyVzUi8Smv4sZH0Q9rdGQXWEqqoTm7L84ArFLnPqZM,777
|
|
4
|
+
gpustack_runtime/_version.pyi,sha256=A42NoSgcqEXVy2OeNm4LXC9CbyonbooYrSUBlPm2lGY,156
|
|
5
|
+
gpustack_runtime/envs.py,sha256=cprrZ-c4owsRLq7M__DlfpK6Ob9zbqMzRGVOsPTMJX0,38583
|
|
6
|
+
gpustack_runtime/logging.py,sha256=wMPriPpOuVsuClsjMh0qwEPQKyJiJa89ggdDjqkk7i0,6934
|
|
7
|
+
gpustack_runtime/cmds/__init__.py,sha256=-_X2O2lBn6KcdLGUzhL3lEjQC4_cwA36fvWDnFAgtVM,1382
|
|
8
|
+
gpustack_runtime/cmds/__types__.py,sha256=TBnUWUqzTkDtJnsMv363kdw-H8fOf-XQYbOvrmQif-M,815
|
|
9
|
+
gpustack_runtime/cmds/deployer.py,sha256=KvhPhU6ZW-UV6vLykI5adKI1ThgVFFJqWaII3n4OhL8,32846
|
|
10
|
+
gpustack_runtime/cmds/detector.py,sha256=AALcoqCiNuwYucKBnyj7r5ScOWc_BSzAhHR2C0QbEHE,8750
|
|
11
|
+
gpustack_runtime/cmds/images.py,sha256=7tb-D3G4yqLPkbS9aSuWI1bD3DYK8BLbPbgqac56blI,594
|
|
12
|
+
gpustack_runtime/deployer/__init__.py,sha256=impMrmvkMjuCBthsn3QUz3LuwpmmNAymHJKJ2o6SZoc,16249
|
|
13
|
+
gpustack_runtime/deployer/__patches__.py,sha256=cTBge8BT6IsY5MzETKY3kN28k3igYfNj7pcpgDzfDzw,17849
|
|
14
|
+
gpustack_runtime/deployer/__types__.py,sha256=PgIWogHOvHKsHoeBjmKFEEM3JrKck89Mmnwlfx01BbE,72248
|
|
15
|
+
gpustack_runtime/deployer/__utils__.py,sha256=paQu2M1UeoSfQPsiskmAqJSiln-8qwibTssEoWFMLec,21109
|
|
16
|
+
gpustack_runtime/deployer/docker.py,sha256=bOaXbTnaalbO42FlyWR1Ha26Y30LGWPzWKPV5Q-Nk7g,85039
|
|
17
|
+
gpustack_runtime/deployer/kuberentes.py,sha256=2M0RgsR1TlzkZ2QsElKZulftUnUMipYQPeQKKSm_Alo,89387
|
|
18
|
+
gpustack_runtime/deployer/podman.py,sha256=_qdbsTezacRmiXa3n04OUPUsgVy1pSFgJSKxous4s14,82156
|
|
19
|
+
gpustack_runtime/deployer/cdi/__init__.py,sha256=y5fFNLhlrVhWzW-GTazT7xXBukHkBQ0DGfncKqKpBzk,3986
|
|
20
|
+
gpustack_runtime/deployer/cdi/__types__.py,sha256=04DKvcogk7OoHS7TU2Bmht3VVMu7iOEBWTEOvxpHt4w,18399
|
|
21
|
+
gpustack_runtime/deployer/cdi/__utils__.py,sha256=cgqub-TQkXEjujlcQC4hJpignmmgxN5Yq9Rn6Z-TAh8,3866
|
|
22
|
+
gpustack_runtime/deployer/cdi/amd.py,sha256=-eq_SOlC56VX2QscZXvnoeffWSRindhr8zFZmaIcKrE,4082
|
|
23
|
+
gpustack_runtime/deployer/cdi/ascend.py,sha256=lDs75a9--c0lM34xfJqu-_QbfWNFrf4zE-GXPKReBe4,4538
|
|
24
|
+
gpustack_runtime/deployer/cdi/hygon.py,sha256=h6-vQfv03sgxYjMJAf_JOMq9cHFPaNjK1YbUYIiSXck,4117
|
|
25
|
+
gpustack_runtime/deployer/cdi/iluvatar.py,sha256=6nNECZpU5IPP6-5l-O1rzU-ib-WcuwKvDg7ZV__1NE4,3650
|
|
26
|
+
gpustack_runtime/deployer/cdi/metax.py,sha256=tmJBvr-n9pERAp-dXsa54qv6xmxt0rJoJwY36TFdoWk,4143
|
|
27
|
+
gpustack_runtime/deployer/cdi/thead.py,sha256=SvIDKNYZx7FwMPTTxyJ2RRjlr9LXLN8BUYCUhidmiQk,3671
|
|
28
|
+
gpustack_runtime/deployer/k8s/deviceplugin/__init__.py,sha256=RTQfArfX3FmS9lXKTwoWnb2gyqTIiDhRHEGe7MPxLzE,8040
|
|
29
|
+
gpustack_runtime/deployer/k8s/deviceplugin/__types__.py,sha256=QaswnGns_hqMdVPeryDqmyhJrAHu_pfAlIC_25NHp4k,3057
|
|
30
|
+
gpustack_runtime/deployer/k8s/deviceplugin/plugin.py,sha256=ktQ44wdNJNnv0Fn-TAnTQN0Jq5ZSqRA09WV407CrPvc,17931
|
|
31
|
+
gpustack_runtime/deployer/k8s/types/kubelet/deviceplugin/v1beta1/__init__.py,sha256=3rOYmgDIIJ4idEtwgnumGStH7PaK-J7EYrOnLa9A-8o,118
|
|
32
|
+
gpustack_runtime/deployer/k8s/types/kubelet/deviceplugin/v1beta1/api.proto,sha256=rmB8RDe4LN5FCVkQ608uS-pl32mk5tt6iGe-g2lKtPs,7919
|
|
33
|
+
gpustack_runtime/deployer/k8s/types/kubelet/deviceplugin/v1beta1/api_pb2.py,sha256=DzmeyfwekwE0U0dK6WfwX1NQYLEr2mFa1hdaUTxt2f8,8404
|
|
34
|
+
gpustack_runtime/deployer/k8s/types/kubelet/deviceplugin/v1beta1/api_pb2.pyi,sha256=lq1dbSgBYqJ7zyGfoKKHCyfr6R5vcCGzJxteeyQpbuI,8232
|
|
35
|
+
gpustack_runtime/deployer/k8s/types/kubelet/deviceplugin/v1beta1/api_pb2_grpc.py,sha256=GM6EyCEFeyOjL0XOCisbcHurRoLKqKDUI5obsUyTxpE,17446
|
|
36
|
+
gpustack_runtime/deployer/k8s/types/kubelet/deviceplugin/v1beta1/constants.py,sha256=tpNk3e_cvY67C9RwVsdTNl75YuNXBgsn53fSJIzeTR4,828
|
|
37
|
+
gpustack_runtime/detector/__init__.py,sha256=9i6KOd3Qp_BmnSyPURlOBHlHJMSInqlDTh1kpAbs4_U,8104
|
|
38
|
+
gpustack_runtime/detector/__types__.py,sha256=tiYbxPD6gV5wS79K3d2dUzy6btJl4QcsgunyxtJ240E,13162
|
|
39
|
+
gpustack_runtime/detector/__utils__.py,sha256=QdLWXwsU1_EMxXG5Y29psqnttWJyXWMphHDjiC_6Byc,25153
|
|
40
|
+
gpustack_runtime/detector/amd.py,sha256=qh86xGhPJRIXwiKaHmeyIrsxchUDRpyggR6yc0cLuKw,17553
|
|
41
|
+
gpustack_runtime/detector/ascend.py,sha256=E6YPoREI5r2HZIegUaQb0XwC3Qau1mnkNeCRbgtlE5k,17992
|
|
42
|
+
gpustack_runtime/detector/cambricon.py,sha256=GzXlS4et8zape0rr19d1EwSV9cfFEmHgaElTVMjR3IY,3947
|
|
43
|
+
gpustack_runtime/detector/hygon.py,sha256=R4I8h66YHJC00iAtDJhBX772VMKUdZ8nleRXZFPUt3Q,12299
|
|
44
|
+
gpustack_runtime/detector/iluvatar.py,sha256=bqciqjYE_tIxBg2PSOlIzH3WcFYBgTDjfN6nT90LSGg,10206
|
|
45
|
+
gpustack_runtime/detector/metax.py,sha256=W4NSZD7Kf5431B63UBpYnKIk2Jz1SutEpwXkwgYfmfE,10374
|
|
46
|
+
gpustack_runtime/detector/mthreads.py,sha256=GcElUIMvU5C6P4Fx_X_kSOsJps8WZ47tkZ2B2MJZmk4,12131
|
|
47
|
+
gpustack_runtime/detector/nvidia.py,sha256=xdvoMdNx943on5fd_mI3WI_85kMY-0dYm5NU3vqXb9M,33400
|
|
48
|
+
gpustack_runtime/detector/thead.py,sha256=43TGPq78FulpYBUeEMVxDTY-0X3ve2FsX4Hsd0Lswy0,25561
|
|
49
|
+
gpustack_runtime/detector/pyacl/__init__.py,sha256=UQjaBxP7nJNyzr08N8_lH-5wPtnFmUY9pyQhs6vIChU,16232
|
|
50
|
+
gpustack_runtime/detector/pyamdgpu/__init__.py,sha256=x-UO07EpKEgfTLmXQOD6j9f6kibuvDC7riQFof3YGdw,8617
|
|
51
|
+
gpustack_runtime/detector/pyamdsmi/__init__.py,sha256=800-khq2w6HLgXM12RkhcdvXBGeAJ4s1_TWJyHebCMk,955
|
|
52
|
+
gpustack_runtime/detector/pycuda/__init__.py,sha256=p-Na5eBjdsFGXPHCEEUfFj5TQFjPxPQGV0i4oU6ywBA,16922
|
|
53
|
+
gpustack_runtime/detector/pydcmi/__init__.py,sha256=zCeImelWgbPDn51dJgp3LlCK-uGOuHmmPGiOsyYB3cI,37264
|
|
54
|
+
gpustack_runtime/detector/pyhgml/__init__.py,sha256=Yp9s-QhHS4ck7Iq9kd4v6a4BruyJQcuOTYlyYSmnOgI,182896
|
|
55
|
+
gpustack_runtime/detector/pyhgml/libhgml.so,sha256=BPzGVBpzrMX1tSvbXddq8Q0Qhi8w-No2JXX8sRxTioI,2101640
|
|
56
|
+
gpustack_runtime/detector/pyhgml/libuki.so,sha256=EE6v1vIYYT4FSDMMm9rSfAqwrwIPFD-4_6KtP51lSps,702352
|
|
57
|
+
gpustack_runtime/detector/pyhsa/__init__.py,sha256=4DuGnBBMUVOCPa6vTx3XT5mffGrKk6M6CYbUWBoMTJ0,15792
|
|
58
|
+
gpustack_runtime/detector/pyixml/__init__.py,sha256=6ss_Dyl8lIT4WrKpfwmQqzBmg4Bxi38vg_eey_wsSY0,162681
|
|
59
|
+
gpustack_runtime/detector/pymxsml/__init__.py,sha256=YxfNHq7TWd7CpNroP45BGXhcWNpY_sXgVzNGtx68DII,45409
|
|
60
|
+
gpustack_runtime/detector/pyrocmcore/__init__.py,sha256=rgwIdPS-7GG7_5luRMR1XG9QyNM3lJh5ryD7kfZqpWg,2523
|
|
61
|
+
gpustack_runtime/detector/pyrocmsmi/__init__.py,sha256=ACwRtJWVIuJ4NTcBJxk0zrVb_qtDOMkApMdbJoag5g0,11906
|
|
62
|
+
gpustack_runtime/_version_appendix.py,sha256=C8KJWDWnWAHNEMYzKYXyeA2PTIO31o7aTAhQlRkmhUA,23
|
|
63
|
+
gpustack_runtime-0.1.41.dist-info/METADATA,sha256=RDItHofAkys1Iqn6uwI03D6nqq-QrrMOcaJ43Iu2XH4,2358
|
|
64
|
+
gpustack_runtime-0.1.41.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
65
|
+
gpustack_runtime-0.1.41.dist-info/entry_points.txt,sha256=bBO_61GxP6dIT74uZwbSDgW5Vt2pTePUS3CgjUJkUgg,68
|
|
66
|
+
gpustack_runtime-0.1.41.dist-info/licenses/LICENSE,sha256=OiPibowBvB-NHV3TP_NOj18XNBlXcshXZFMpa3uvKVE,10362
|
|
67
|
+
gpustack_runtime-0.1.41.dist-info/RECORD,,
|