gpustack-runtime 0.1.40.post1__py3-none-any.whl → 0.1.41__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gpustack_runtime/__init__.py +1 -1
- gpustack_runtime/__main__.py +5 -3
- gpustack_runtime/_version.py +2 -2
- gpustack_runtime/_version_appendix.py +1 -1
- gpustack_runtime/cmds/__init__.py +5 -3
- gpustack_runtime/cmds/__types__.py +1 -1
- gpustack_runtime/cmds/deployer.py +140 -18
- gpustack_runtime/cmds/detector.py +1 -1
- gpustack_runtime/cmds/images.py +1 -1
- gpustack_runtime/deployer/__init__.py +28 -2
- gpustack_runtime/deployer/__patches__.py +1 -1
- gpustack_runtime/deployer/__types__.py +2 -1
- gpustack_runtime/deployer/__utils__.py +2 -2
- gpustack_runtime/deployer/cdi/__init__.py +85 -5
- gpustack_runtime/deployer/cdi/__types__.py +92 -29
- gpustack_runtime/deployer/cdi/__utils__.py +178 -0
- gpustack_runtime/deployer/cdi/amd.py +146 -0
- gpustack_runtime/deployer/cdi/ascend.py +164 -0
- gpustack_runtime/deployer/cdi/hygon.py +147 -0
- gpustack_runtime/deployer/cdi/iluvatar.py +136 -0
- gpustack_runtime/deployer/cdi/metax.py +148 -0
- gpustack_runtime/deployer/cdi/thead.py +57 -23
- gpustack_runtime/deployer/docker.py +9 -8
- gpustack_runtime/deployer/k8s/deviceplugin/__init__.py +240 -0
- gpustack_runtime/deployer/k8s/deviceplugin/__types__.py +131 -0
- gpustack_runtime/deployer/k8s/deviceplugin/plugin.py +586 -0
- gpustack_runtime/deployer/k8s/types/kubelet/deviceplugin/v1beta1/__init__.py +3 -0
- gpustack_runtime/deployer/k8s/types/kubelet/deviceplugin/v1beta1/api.proto +212 -0
- gpustack_runtime/deployer/k8s/types/kubelet/deviceplugin/v1beta1/api_pb2.py +86 -0
- gpustack_runtime/deployer/k8s/types/kubelet/deviceplugin/v1beta1/api_pb2.pyi +168 -0
- gpustack_runtime/deployer/k8s/types/kubelet/deviceplugin/v1beta1/api_pb2_grpc.py +358 -0
- gpustack_runtime/deployer/k8s/types/kubelet/deviceplugin/v1beta1/constants.py +34 -0
- gpustack_runtime/deployer/kuberentes.py +37 -4
- gpustack_runtime/deployer/podman.py +9 -8
- gpustack_runtime/detector/__init__.py +42 -5
- gpustack_runtime/detector/__types__.py +8 -24
- gpustack_runtime/detector/__utils__.py +46 -39
- gpustack_runtime/detector/amd.py +55 -66
- gpustack_runtime/detector/ascend.py +29 -41
- gpustack_runtime/detector/cambricon.py +3 -3
- gpustack_runtime/detector/hygon.py +21 -49
- gpustack_runtime/detector/iluvatar.py +44 -60
- gpustack_runtime/detector/metax.py +54 -37
- gpustack_runtime/detector/mthreads.py +74 -36
- gpustack_runtime/detector/nvidia.py +130 -93
- gpustack_runtime/detector/pyacl/__init__.py +1 -1
- gpustack_runtime/detector/pyamdgpu/__init__.py +1 -1
- gpustack_runtime/detector/pyamdsmi/__init__.py +1 -1
- gpustack_runtime/detector/pycuda/__init__.py +1 -1
- gpustack_runtime/detector/pydcmi/__init__.py +1 -1
- gpustack_runtime/detector/pyhsa/__init__.py +1 -1
- gpustack_runtime/detector/pymxsml/__init__.py +1553 -1
- gpustack_runtime/detector/pyrocmcore/__init__.py +1 -1
- gpustack_runtime/detector/pyrocmsmi/__init__.py +1 -1
- gpustack_runtime/detector/thead.py +41 -60
- gpustack_runtime/envs.py +104 -12
- gpustack_runtime/logging.py +6 -2
- {gpustack_runtime-0.1.40.post1.dist-info → gpustack_runtime-0.1.41.dist-info}/METADATA +6 -1
- gpustack_runtime-0.1.41.dist-info/RECORD +67 -0
- gpustack_runtime/detector/pymxsml/mxsml.py +0 -1580
- gpustack_runtime/detector/pymxsml/mxsml_extension.py +0 -816
- gpustack_runtime/detector/pymxsml/mxsml_mcm.py +0 -476
- gpustack_runtime-0.1.40.post1.dist-info/RECORD +0 -55
- {gpustack_runtime-0.1.40.post1.dist-info → gpustack_runtime-0.1.41.dist-info}/WHEEL +0 -0
- {gpustack_runtime-0.1.40.post1.dist-info → gpustack_runtime-0.1.41.dist-info}/entry_points.txt +0 -0
- {gpustack_runtime-0.1.40.post1.dist-info → gpustack_runtime-0.1.41.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
1
|
+
from __future__ import annotations as __future_annotations__
|
|
2
2
|
|
|
3
3
|
import contextlib
|
|
4
4
|
import logging
|
|
@@ -6,6 +6,8 @@ import math
|
|
|
6
6
|
import time
|
|
7
7
|
from _ctypes import byref
|
|
8
8
|
from functools import lru_cache
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import re
|
|
9
11
|
|
|
10
12
|
import pynvml
|
|
11
13
|
|
|
@@ -18,7 +20,6 @@ from .__utils__ import (
|
|
|
18
20
|
bitmask_to_str,
|
|
19
21
|
byte_to_mebibyte,
|
|
20
22
|
get_brief_version,
|
|
21
|
-
get_device_files,
|
|
22
23
|
get_memory,
|
|
23
24
|
get_numa_node_by_bdf,
|
|
24
25
|
get_numa_nodeset_size,
|
|
@@ -37,7 +38,7 @@ class NVIDIADetector(Detector):
|
|
|
37
38
|
"""
|
|
38
39
|
|
|
39
40
|
@staticmethod
|
|
40
|
-
@lru_cache
|
|
41
|
+
@lru_cache(maxsize=1)
|
|
41
42
|
def is_supported() -> bool:
|
|
42
43
|
"""
|
|
43
44
|
Check if NVIDIA detection is supported.
|
|
@@ -66,7 +67,7 @@ class NVIDIADetector(Detector):
|
|
|
66
67
|
return supported
|
|
67
68
|
|
|
68
69
|
@staticmethod
|
|
69
|
-
@lru_cache
|
|
70
|
+
@lru_cache(maxsize=1)
|
|
70
71
|
def detect_pci_devices() -> dict[str, PCIDevice]:
|
|
71
72
|
# See https://pcisig.com/membership/member-companies?combine=NVIDIA.
|
|
72
73
|
pci_devs = get_pci_devices(vendor="0x10de")
|
|
@@ -122,36 +123,35 @@ class NVIDIADetector(Detector):
|
|
|
122
123
|
)
|
|
123
124
|
|
|
124
125
|
dev_count = pynvml.nvmlDeviceGetCount()
|
|
125
|
-
dev_files = None
|
|
126
126
|
for dev_idx in range(dev_count):
|
|
127
127
|
dev = pynvml.nvmlDeviceGetHandleByIndex(dev_idx)
|
|
128
128
|
|
|
129
129
|
dev_cc_t = pynvml.nvmlDeviceGetCudaComputeCapability(dev)
|
|
130
130
|
dev_cc = ".".join(map(str, dev_cc_t))
|
|
131
131
|
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
132
|
+
dev_pci_info = pynvml.nvmlDeviceGetPciInfo(dev)
|
|
133
|
+
dev_bdf = str(dev_pci_info.busIdLegacy).lower()
|
|
134
|
+
|
|
135
|
+
dev_numa = get_numa_node_by_bdf(dev_bdf)
|
|
136
|
+
if not dev_numa:
|
|
137
|
+
dev_node_affinity = pynvml.nvmlDeviceGetMemoryAffinity(
|
|
138
|
+
dev,
|
|
139
|
+
get_numa_nodeset_size(),
|
|
140
|
+
pynvml.NVML_AFFINITY_SCOPE_NODE,
|
|
141
|
+
)
|
|
142
|
+
dev_numa = bitmask_to_str(list(dev_node_affinity))
|
|
136
143
|
|
|
137
144
|
dev_mig_mode = pynvml.NVML_DEVICE_MIG_DISABLE
|
|
138
145
|
with contextlib.suppress(pynvml.NVMLError):
|
|
139
146
|
dev_mig_mode, _ = pynvml.nvmlDeviceGetMigMode(dev)
|
|
140
147
|
|
|
148
|
+
dev_index = dev_idx
|
|
149
|
+
if envs.GPUSTACK_RUNTIME_DETECT_PHYSICAL_INDEX_PRIORITY:
|
|
150
|
+
dev_index = pynvml.nvmlDeviceGetMinorNumber(dev)
|
|
151
|
+
|
|
141
152
|
# With MIG disabled, treat as a single device.
|
|
142
153
|
|
|
143
154
|
if dev_mig_mode == pynvml.NVML_DEVICE_MIG_DISABLE:
|
|
144
|
-
dev_index = dev_idx
|
|
145
|
-
if envs.GPUSTACK_RUNTIME_DETECT_PHYSICAL_INDEX_PRIORITY:
|
|
146
|
-
if dev_files is None:
|
|
147
|
-
dev_files = get_device_files(
|
|
148
|
-
pattern=r"nvidia(?P<number>\d+)",
|
|
149
|
-
)
|
|
150
|
-
if len(dev_files) >= dev_count:
|
|
151
|
-
dev_file = dev_files[dev_idx]
|
|
152
|
-
if dev_file.number is not None:
|
|
153
|
-
dev_index = dev_file.number
|
|
154
|
-
|
|
155
155
|
dev_name = pynvml.nvmlDeviceGetName(dev)
|
|
156
156
|
|
|
157
157
|
dev_uuid = pynvml.nvmlDeviceGetUUID(dev)
|
|
@@ -208,18 +208,15 @@ class NVIDIADetector(Detector):
|
|
|
208
208
|
) # mW to W
|
|
209
209
|
|
|
210
210
|
dev_is_vgpu = False
|
|
211
|
-
if dev_bdf
|
|
211
|
+
if dev_bdf in pci_devs:
|
|
212
212
|
dev_is_vgpu = _is_vgpu(pci_devs[dev_bdf].config)
|
|
213
213
|
|
|
214
214
|
dev_appendix = {
|
|
215
215
|
"arch_family": _get_arch_family(dev_cc_t),
|
|
216
216
|
"vgpu": dev_is_vgpu,
|
|
217
|
+
"bdf": dev_bdf,
|
|
218
|
+
"numa": dev_numa,
|
|
217
219
|
}
|
|
218
|
-
if dev_bdf:
|
|
219
|
-
dev_appendix["bdf"] = dev_bdf
|
|
220
|
-
|
|
221
|
-
if dev_links_state := _get_links_state(dev):
|
|
222
|
-
dev_appendix.update(dev_links_state)
|
|
223
220
|
|
|
224
221
|
if dev_fabric_info := _get_fabric_info(dev):
|
|
225
222
|
dev_appendix.update(dev_fabric_info)
|
|
@@ -251,6 +248,8 @@ class NVIDIADetector(Detector):
|
|
|
251
248
|
# Otherwise, get MIG devices,
|
|
252
249
|
# inspired by https://github.com/NVIDIA/go-nvlib/blob/fdfe25d0ffc9d7a8c166f4639ef236da81116262/pkg/nvlib/device/mig_device.go#L61-L154.
|
|
253
250
|
|
|
251
|
+
dev_mig_minors = _get_mig_minors()
|
|
252
|
+
|
|
254
253
|
mdev_name = ""
|
|
255
254
|
mdev_cores = None
|
|
256
255
|
mdev_count = pynvml.nvmlDeviceGetMaxMigDeviceCount(dev)
|
|
@@ -288,14 +287,21 @@ class NVIDIADetector(Detector):
|
|
|
288
287
|
mdev_appendix = {
|
|
289
288
|
"arch_family": _get_arch_family(dev_cc_t),
|
|
290
289
|
"vgpu": True,
|
|
290
|
+
"bdf": dev_bdf,
|
|
291
|
+
"numa": dev_numa,
|
|
291
292
|
}
|
|
292
|
-
if dev_bdf:
|
|
293
|
-
mdev_appendix["bdf"] = dev_bdf
|
|
294
293
|
|
|
295
294
|
mdev_gi_id = pynvml.nvmlDeviceGetGpuInstanceId(mdev)
|
|
296
295
|
mdev_appendix["gpu_instance_id"] = mdev_gi_id
|
|
297
296
|
mdev_ci_id = pynvml.nvmlDeviceGetComputeInstanceId(mdev)
|
|
298
297
|
mdev_appendix["compute_instance_id"] = mdev_ci_id
|
|
298
|
+
if envs.GPUSTACK_RUNTIME_DETECT_PHYSICAL_INDEX_PRIORITY:
|
|
299
|
+
mdev_appendix["gpu_instance_index"] = dev_mig_minors.get(
|
|
300
|
+
(dev_index, mdev_gi_id, None),
|
|
301
|
+
)
|
|
302
|
+
mdev_appendix["compute_instance_index"] = dev_mig_minors.get(
|
|
303
|
+
(dev_index, mdev_gi_id, mdev_ci_id),
|
|
304
|
+
)
|
|
299
305
|
|
|
300
306
|
mdev_cores_util = _get_sm_util_from_gpm_metrics(dev, mdev_gi_id)
|
|
301
307
|
|
|
@@ -426,36 +432,24 @@ class NVIDIADetector(Detector):
|
|
|
426
432
|
for i, dev_i in enumerate(devices):
|
|
427
433
|
dev_i_handle = pynvml.nvmlDeviceGetHandleByUUID(dev_i.uuid)
|
|
428
434
|
|
|
429
|
-
# Get
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
#
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
list(dev_i_memset),
|
|
448
|
-
)
|
|
449
|
-
except pynvml.NVMLError:
|
|
450
|
-
debug_log_exception(
|
|
451
|
-
logger,
|
|
452
|
-
"Failed to get NUMA affinity for device %d",
|
|
453
|
-
dev_i.index,
|
|
454
|
-
)
|
|
455
|
-
# Get CPU affinity.
|
|
456
|
-
ret.devices_cpu_affinities[i] = map_numa_node_to_cpu_affinity(
|
|
457
|
-
ret.devices_numa_affinities[i],
|
|
458
|
-
)
|
|
435
|
+
# Get NUMA and CPU affinities.
|
|
436
|
+
ret.devices_numa_affinities[i] = dev_i.appendix.get("numa", "")
|
|
437
|
+
ret.devices_cpu_affinities[i] = map_numa_node_to_cpu_affinity(
|
|
438
|
+
ret.devices_numa_affinities[i],
|
|
439
|
+
)
|
|
440
|
+
|
|
441
|
+
# Get links state if applicable.
|
|
442
|
+
if dev_i_links_state := _get_links_state(dev_i_handle):
|
|
443
|
+
ret.appendices[i].update(dev_i_links_state)
|
|
444
|
+
# In practice, if a card has an active *Link,
|
|
445
|
+
# then other cards in the same machine should be interconnected with it through the *Link.
|
|
446
|
+
if dev_i_links_state.get("links_active_count", 0) > 0:
|
|
447
|
+
for j, dev_j in enumerate(devices):
|
|
448
|
+
if dev_i.index == dev_j.index:
|
|
449
|
+
continue
|
|
450
|
+
ret.devices_distances[i][j] = TopologyDistanceEnum.LINK
|
|
451
|
+
ret.devices_distances[j][i] = TopologyDistanceEnum.LINK
|
|
452
|
+
continue
|
|
459
453
|
|
|
460
454
|
# Get distances to other devices.
|
|
461
455
|
for j, dev_j in enumerate(devices):
|
|
@@ -470,8 +464,6 @@ class NVIDIADetector(Detector):
|
|
|
470
464
|
dev_i_handle,
|
|
471
465
|
dev_j_handle,
|
|
472
466
|
)
|
|
473
|
-
if dev_i.appendix.get("links_state", 0) > 0:
|
|
474
|
-
distance = TopologyDistanceEnum.LINK
|
|
475
467
|
except pynvml.NVMLError:
|
|
476
468
|
debug_log_exception(
|
|
477
469
|
logger,
|
|
@@ -482,9 +474,6 @@ class NVIDIADetector(Detector):
|
|
|
482
474
|
|
|
483
475
|
ret.devices_distances[i][j] = distance
|
|
484
476
|
ret.devices_distances[j][i] = distance
|
|
485
|
-
except pynvml.NVMLError:
|
|
486
|
-
debug_log_exception(logger, "Failed to fetch topology")
|
|
487
|
-
raise
|
|
488
477
|
except Exception:
|
|
489
478
|
debug_log_exception(logger, "Failed to process topology fetching")
|
|
490
479
|
raise
|
|
@@ -619,6 +608,37 @@ def _extract_field_value(
|
|
|
619
608
|
return None
|
|
620
609
|
|
|
621
610
|
|
|
611
|
+
def _get_fabric_info(
|
|
612
|
+
dev: pynvml.c_nvmlDevice_t,
|
|
613
|
+
) -> dict | None:
|
|
614
|
+
"""
|
|
615
|
+
Get the NVSwitch fabric information for a device.
|
|
616
|
+
|
|
617
|
+
Args:
|
|
618
|
+
dev:
|
|
619
|
+
The NVML device handle.
|
|
620
|
+
|
|
621
|
+
Returns:
|
|
622
|
+
A dict includes fabric info or None if failed.
|
|
623
|
+
|
|
624
|
+
"""
|
|
625
|
+
try:
|
|
626
|
+
dev_fabric = pynvml.c_nvmlGpuFabricInfoV_t()
|
|
627
|
+
ret = pynvml.nvmlDeviceGetGpuFabricInfoV(dev, byref(dev_fabric))
|
|
628
|
+
if ret != pynvml.NVML_SUCCESS:
|
|
629
|
+
return None
|
|
630
|
+
if dev_fabric.state != pynvml.NVML_GPU_FABRIC_STATE_COMPLETED:
|
|
631
|
+
return None
|
|
632
|
+
return {
|
|
633
|
+
"fabric_cluster_uuid": stringify_uuid(bytes(dev_fabric.clusterUuid)),
|
|
634
|
+
"fabric_clique_id": dev_fabric.cliqueId,
|
|
635
|
+
}
|
|
636
|
+
except pynvml.NVMLError:
|
|
637
|
+
debug_log_warning(logger, "Failed to get NVSwitch fabric info")
|
|
638
|
+
|
|
639
|
+
return None
|
|
640
|
+
|
|
641
|
+
|
|
622
642
|
def _get_links_state(
|
|
623
643
|
dev: pynvml.c_nvmlDevice_t,
|
|
624
644
|
) -> dict | None:
|
|
@@ -646,49 +666,23 @@ def _get_links_state(
|
|
|
646
666
|
return None
|
|
647
667
|
|
|
648
668
|
dev_links_state = 0
|
|
669
|
+
dev_links_active_count = 0
|
|
649
670
|
try:
|
|
650
671
|
for link_idx in range(int(dev_links_count)):
|
|
651
672
|
dev_link_state = pynvml.nvmlDeviceGetNvLinkState(dev, link_idx)
|
|
652
673
|
if dev_link_state:
|
|
653
|
-
dev_links_state |= 1 <<
|
|
674
|
+
dev_links_state |= 1 << link_idx
|
|
675
|
+
dev_links_active_count += 1
|
|
654
676
|
except pynvml.NVMLError:
|
|
655
677
|
debug_log_warning(logger, "Failed to get NVLink link state")
|
|
656
678
|
|
|
657
679
|
return {
|
|
658
680
|
"links_count": dev_links_count,
|
|
659
681
|
"links_state": dev_links_state,
|
|
682
|
+
"links_active_count": dev_links_active_count,
|
|
660
683
|
}
|
|
661
684
|
|
|
662
685
|
|
|
663
|
-
def _get_fabric_info(
|
|
664
|
-
dev: pynvml.c_nvmlDevice_t,
|
|
665
|
-
) -> dict | None:
|
|
666
|
-
"""
|
|
667
|
-
Get the NVSwitch fabric information for a device.
|
|
668
|
-
|
|
669
|
-
Args:
|
|
670
|
-
dev:
|
|
671
|
-
The NVML device handle.
|
|
672
|
-
|
|
673
|
-
Returns:
|
|
674
|
-
A dict includes fabric info or None if failed.
|
|
675
|
-
|
|
676
|
-
"""
|
|
677
|
-
try:
|
|
678
|
-
dev_fabric = pynvml.c_nvmlGpuFabricInfoV_t()
|
|
679
|
-
ret = pynvml.nvmlDeviceGetGpuFabricInfoV(dev, byref(dev_fabric))
|
|
680
|
-
if ret != pynvml.NVML_SUCCESS:
|
|
681
|
-
return None
|
|
682
|
-
if dev_fabric.state != pynvml.NVML_GPU_FABRIC_STATE_COMPLETED:
|
|
683
|
-
return None
|
|
684
|
-
return {
|
|
685
|
-
"fabric_cluster_uuid": stringify_uuid(bytes(dev_fabric.clusterUuid)),
|
|
686
|
-
"fabric_clique_id": dev_fabric.cliqueId,
|
|
687
|
-
}
|
|
688
|
-
except pynvml.NVMLError:
|
|
689
|
-
debug_log_warning(logger, "Failed to get NVSwitch fabric info")
|
|
690
|
-
|
|
691
|
-
|
|
692
686
|
def _get_arch_family(dev_cc_t: list[int]) -> str:
|
|
693
687
|
"""
|
|
694
688
|
Get the architecture family based on the CUDA compute capability.
|
|
@@ -917,3 +911,46 @@ def _is_vgpu(dev_config: bytes) -> bool:
|
|
|
917
911
|
# Check for vGPU signature,
|
|
918
912
|
# which is either 0x56 (NVIDIA vGPU) or 0x46 (NVIDIA GRID).
|
|
919
913
|
return dev_cap[3] == 0x56 or dev_cap[4] == 0x46
|
|
914
|
+
|
|
915
|
+
|
|
916
|
+
def _get_mig_minors() -> dict[tuple, int] | None:
|
|
917
|
+
"""
|
|
918
|
+
Get the minor mapping for MIG capability devices.
|
|
919
|
+
|
|
920
|
+
Returns:
|
|
921
|
+
A dict mapping (gpu_id, gi_id, ci_id) to minor number,
|
|
922
|
+
or None if not supported.
|
|
923
|
+
|
|
924
|
+
"""
|
|
925
|
+
mig_minors_path = Path("/proc/driver/nvidia-caps/mig-minors")
|
|
926
|
+
if not mig_minors_path.exists():
|
|
927
|
+
return None
|
|
928
|
+
|
|
929
|
+
ret = {}
|
|
930
|
+
for _line in mig_minors_path.read_text(encoding="utf-8").splitlines():
|
|
931
|
+
line = _line.strip()
|
|
932
|
+
if not line:
|
|
933
|
+
continue
|
|
934
|
+
|
|
935
|
+
# Scan lines like:
|
|
936
|
+
# gpu%d/gi%d/ci%d/access %d
|
|
937
|
+
m = re.match(r"gpu(\d+)/gi(\d+)/ci(\d+)/access (\d+)", line)
|
|
938
|
+
if m:
|
|
939
|
+
gpu_id = int(m.group(1))
|
|
940
|
+
gi_id = int(m.group(2))
|
|
941
|
+
ci_id = int(m.group(3))
|
|
942
|
+
minor = int(m.group(4))
|
|
943
|
+
ret[(gpu_id, gi_id, ci_id)] = minor
|
|
944
|
+
continue
|
|
945
|
+
|
|
946
|
+
# Scan lines like:
|
|
947
|
+
# gpu%d/gi%d/access %d
|
|
948
|
+
m = re.match(r"gpu(\d+)/gi(\d+)/access (\d+)", line)
|
|
949
|
+
if m:
|
|
950
|
+
gpu_id = int(m.group(1))
|
|
951
|
+
gi_id = int(m.group(2))
|
|
952
|
+
minor = int(m.group(3))
|
|
953
|
+
ret[(gpu_id, gi_id, None)] = minor
|
|
954
|
+
continue
|
|
955
|
+
|
|
956
|
+
return ret
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
# Bridge amdsmi module to avoid import errors when amdsmi is not installed
|
|
2
2
|
# This module raises an exception when amdsmi_init is called
|
|
3
3
|
# and does nothing when amdsmi_shut_down is called.
|
|
4
|
-
from __future__ import annotations
|
|
4
|
+
from __future__ import annotations as __future_annotations__
|
|
5
5
|
|
|
6
6
|
import contextlib
|
|
7
7
|
import os
|