PyPI - gpustack-runtime - Versions diffs - 0.1.39.post2__py3-none-any.whl → 0.1.39.post3__py3-none-any.whl - Mend

gpustack-runtime 0.1.39.post2py3-none-any.whl → 0.1.39.post3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

gpustack_runtime/_version.py CHANGED Viewed

@@ -27,8 +27,8 @@ version_tuple: VERSION_TUPLE
 __commit_id__: COMMIT_ID
 commit_id: COMMIT_ID
-__version__ = version = '0.1.39.post2'
-__version_tuple__ = version_tuple = (0, 1, 39, 'post2')
+__version__ = version = '0.1.39.post3'
+__version_tuple__ = version_tuple = (0, 1, 39, 'post3')
 try:
     from ._version_appendix import git_commit
     __commit_id__ = commit_id = git_commit

gpustack_runtime/_version_appendix.py CHANGED Viewed

	@@ -1 +1 @@
1	- git_commit = "~~e044bab~~"
1	+ git_commit = "d65920e"

gpustack_runtime/deployer/docker.py CHANGED Viewed

@@ -1213,8 +1213,12 @@ class DockerDeployer(EndoscopicDeployer):
         self_container_envs: dict[str, str] = dict(
             item.split("=", 1) for item in self_container.attrs["Config"].get("Env", [])
         )
-        self_image_envs: dict[str, str] = dict(
-            item.split("=", 1) for item in self_image.attrs["Config"].get("Env", [])
+        self_image_envs: dict[str, str] = (
+            dict(
+                item.split("=", 1) for item in self_image.attrs["Config"].get("Env", [])
+            )
+            if self_image.attrs["Config"]
+            else {}
         )
         mirrored_envs: dict[str, str] = {
             # Filter out gpustack-internal envs and same-as-image envs.

gpustack_runtime/deployer/podman.py CHANGED Viewed

@@ -1189,8 +1189,12 @@ class PodmanDeployer(EndoscopicDeployer):
         self_container_envs: dict[str, str] = dict(
             item.split("=", 1) for item in self_container.attrs["Config"].get("Env", [])
         )
-        self_image_envs: dict[str, str] = dict(
-            item.split("=", 1) for item in self_image.attrs["Config"].get("Env", [])
+        self_image_envs: dict[str, str] = (
+            dict(
+                item.split("=", 1) for item in self_image.attrs["Config"].get("Env", [])
+            )
+            if self_image.attrs["Config"]
+            else {}
         )
         mirrored_envs: dict[str, str] = {
             # Filter out gpustack-internal envs and same-as-image envs.

gpustack_runtime/detector/__utils__.py CHANGED Viewed

@@ -951,3 +951,26 @@ def bitmask_to_str(bitmask_list: list) -> str:
         offset += get_bits_size()
     return list_to_range_str(sorted(bits_lists))
+def get_physical_function_by_bdf(bdf: str) -> str:
+    """
+    Get the physical function BDF for a given PCI device BDF address.
+    Args:
+        bdf:
+            The PCI device BDF address (e.g., "0000:00:1f.0").
+    Returns:
+        The physical function BDF if found, otherwise returns the original BDF.
+    """
+    if bdf:
+        with contextlib.suppress(Exception):
+            dev_path = Path(f"/sys/bus/pci/devices/{bdf}")
+            if dev_path.exists():
+                physfn_path = dev_path / "physfn"
+                if physfn_path.exists():
+                    physfn_realpath = physfn_path.resolve()
+                    return physfn_realpath.name
+    return bdf

gpustack_runtime/detector/amd.py CHANGED Viewed

@@ -16,6 +16,7 @@ from .__utils__ import (
     get_brief_version,
     get_numa_node_by_bdf,
     get_pci_devices,
+    get_physical_function_by_bdf,
     get_utilization,
     map_numa_node_to_cpu_affinity,
 )
@@ -107,7 +108,11 @@ class AMDDetector(Detector):
                     asic_serial = dev_gpu_asic_info.get("asic_serial")
                     dev_uuid = f"GPU-{(asic_serial[2:]).lower()}"
                 else:
-                    dev_uuid = f"GPU-{pyrocmsmi.rsmi_dev_unique_id_get(dev_idx)[2:]}"
+                    dev_uuid = ""
+                    with contextlib.suppress(pyrocmsmi.ROCMSMIError):
+                        dev_uuid = (
+                            f"GPU-{pyrocmsmi.rsmi_dev_unique_id_get(dev_idx)[2:]}"
+                        )
                 dev_hsa_agent = hsa_agents.get(dev_uuid, pyhsa.Agent())
                 dev_gpu_driver_info = pyamdsmi.amdsmi_get_gpu_driver_info(dev)
@@ -119,8 +124,13 @@ class AMDDetector(Detector):
                 dev_cc = dev_hsa_agent.compute_capability
                 if not dev_cc:
-                    with contextlib.suppress(pyrocmsmi.ROCMSMIError):
-                        dev_cc = pyrocmsmi.rsmi_dev_target_graphics_version_get(dev_idx)
+                    if "target_graphics_version" in dev_gpu_asic_info:
+                        dev_cc = dev_gpu_asic_info.get("target_graphics_version")
+                    else:
+                        with contextlib.suppress(pyrocmsmi.ROCMSMIError):
+                            dev_cc = pyrocmsmi.rsmi_dev_target_graphics_version_get(
+                                dev_idx,
+                            )
                 dev_bdf = None
                 dev_card_id = None
@@ -195,15 +205,13 @@ class AMDDetector(Detector):
                         dev_power = pyrocmsmi.rsmi_dev_power_cap_get(dev_idx)
                         dev_power_used = pyrocmsmi.rsmi_dev_power_get(dev_idx)
-                dev_compute_partition = None
-                with contextlib.suppress(pyamdsmi.AmdSmiException):
-                    dev_compute_partition = pyamdsmi.amdsmi_get_gpu_compute_partition(
-                        dev,
-                    )
+                dev_is_vgpu = False
+                if dev_bdf:
+                    dev_is_vgpu = get_physical_function_by_bdf(dev_bdf) != dev_bdf
                 dev_appendix = {
                     "arch_family": _get_arch_family(dev_asic_family_id),
-                    "vgpu": dev_compute_partition is not None,
+                    "vgpu": dev_is_vgpu,
                 }
                 if dev_bdf:
                     dev_appendix["bdf"] = dev_bdf

gpustack_runtime/detector/hygon.py CHANGED Viewed

@@ -16,6 +16,7 @@ from .__utils__ import (
     get_brief_version,
     get_numa_node_by_bdf,
     get_pci_devices,
+    get_physical_function_by_bdf,
     get_utilization,
     map_numa_node_to_cpu_affinity,
 )
@@ -156,8 +157,12 @@ class HygonDetector(Detector):
                 dev_power = pyrocmsmi.rsmi_dev_power_cap_get(dev_idx)
                 dev_power_used = pyrocmsmi.rsmi_dev_power_get(dev_idx)
+                dev_is_vgpu = False
+                if dev_bdf:
+                    dev_is_vgpu = get_physical_function_by_bdf(dev_bdf) != dev_bdf
                 dev_appendix = {
-                    "vgpu": False,
+                    "vgpu": dev_is_vgpu,
                 }
                 if dev_bdf is not None:
                     dev_appendix["bdf"] = dev_bdf

gpustack_runtime/detector/iluvatar.py CHANGED Viewed

@@ -23,6 +23,7 @@ from .__utils__ import (
     get_numa_node_by_bdf,
     get_numa_nodeset_size,
     get_pci_devices,
+    get_physical_function_by_bdf,
     get_utilization,
     map_numa_node_to_cpu_affinity,
     support_command,
@@ -165,13 +166,20 @@ class IluvatarDetector(Detector):
                     if dev_cc_t:
                         dev_cc = ".".join(map(str, dev_cc_t))
+                dev_bdf = None
+                with contextlib.suppress(pyixml.NVMLError):
+                    dev_pci_info = pyixml.nvmlDeviceGetPciInfo(dev)
+                    dev_bdf = str(dev_pci_info.busIdLegacy).lower()
                 dev_is_vgpu = False
-                dev_pci_info = pyixml.nvmlDeviceGetPciInfo(dev)
+                if dev_bdf:
+                    dev_is_vgpu = get_physical_function_by_bdf(dev_bdf) != dev_bdf
                 dev_appendix = {
                     "vgpu": dev_is_vgpu,
-                    "bdf": str(dev_pci_info.busIdLegacy).lower(),
                 }
+                if dev_bdf:
+                    dev_appendix["bdf"] = dev_bdf
                 ret.append(
                     Device(

gpustack_runtime/detector/mthreads.py CHANGED Viewed

@@ -3,9 +3,10 @@ from __future__ import annotations
 import logging
 from functools import lru_cache
+import pymtml
 from .. import envs
 from ..logging import debug_log_exception, debug_log_warning
-from . import pymtml
 from .__types__ import (
     Detector,
     Device,
@@ -105,9 +106,8 @@ class MThreadsDetector(Detector):
         try:
             pymtml.mtmlLibraryInit()
-            sys_driver_ver = pymtml.mtmlSystemGetDriverVersion()
+            system = pymtml.mtmlLibraryInitSystem()
+            sys_driver_ver = pymtml.mtmlSystemGetDriverVersion(system)
             dev_count = pymtml.mtmlLibraryCountDevice()
             for dev_idx in range(dev_count):
                 dev_index = dev_idx
@@ -139,25 +139,20 @@ class MThreadsDetector(Detector):
                 dev_mem = 0
                 dev_mem_used = 0
-                devmem = pymtml.mtmlDeviceInitMemory(dev)
-                try:
+                with pymtml.mtmlMemoryContext(dev) as devmem:
                     dev_mem = byte_to_mebibyte(  # byte to MiB
                         pymtml.mtmlMemoryGetTotal(devmem),
                     )
                     dev_mem_used = byte_to_mebibyte(  # byte to MiB
                         pymtml.mtmlMemoryGetUsed(devmem),
                     )
-                finally:
-                    pymtml.mtmlDeviceFreeMemory(devmem)
                 dev_cores_util = None
                 dev_temp = None
-                devgpu = pymtml.mtmlDeviceInitGpu(dev)
-                try:
+                with pymtml.mtmlGpuContext(dev) as devgpu:
                     dev_cores_util = pymtml.mtmlGpuGetUtilization(devgpu)
                     dev_temp = pymtml.mtmlGpuGetTemperature(devgpu)
-                finally:
-                    pymtml.mtmlDeviceFreeGpu(devgpu)
                 if dev_cores_util is None:
                     debug_log_warning(
                         logger,
@@ -198,6 +193,7 @@ class MThreadsDetector(Detector):
             debug_log_exception(logger, "Failed to process devices fetching")
             raise
         finally:
+            pymtml.mtmlLibraryFreeSystem(system)
             pymtml.mtmlLibraryShutDown()
         return ret

gpustack_runtime/detector/nvidia.py CHANGED Viewed

@@ -2,6 +2,8 @@ from __future__ import annotations
 import contextlib
 import logging
+import math
+import time
 from _ctypes import byref
 from functools import lru_cache
 from math import ceil
@@ -76,7 +78,7 @@ class NVIDIADetector(Detector):
     def __init__(self):
         super().__init__(ManufacturerEnum.NVIDIA)
-    def detect(self) -> Devices | None:
+    def detect(self) -> Devices | None:  # noqa: PLR0915
         """
         Detect NVIDIA GPUs using pynvml.
@@ -125,103 +127,110 @@ class NVIDIADetector(Detector):
             for dev_idx in range(dev_count):
                 dev = pynvml.nvmlDeviceGetHandleByIndex(dev_idx)
-                dev_index = dev_idx
-                if envs.GPUSTACK_RUNTIME_DETECT_PHYSICAL_INDEX_PRIORITY:
-                    if dev_files is None:
-                        dev_files = get_device_files(pattern=r"nvidia(?P<number>\d+)")
-                    if len(dev_files) >= dev_count:
-                        dev_file = dev_files[dev_idx]
-                        if dev_file.number is not None:
-                            dev_index = dev_file.number
-                dev_uuid = pynvml.nvmlDeviceGetUUID(dev)
-                dev_cores = None
-                if not envs.GPUSTACK_RUNTIME_DETECT_NO_TOOLKIT_CALL:
-                    with contextlib.suppress(pycuda.CUDAError):
-                        dev_gpudev = pycuda.cuDeviceGet(dev_idx)
-                        dev_cores = pycuda.cuDeviceGetAttribute(
-                            dev_gpudev,
-                            pycuda.CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
-                        )
+                dev_cc_t = pynvml.nvmlDeviceGetCudaComputeCapability(dev)
+                dev_cc = ".".join(map(str, dev_cc_t))
-                dev_mem = 0
-                dev_mem_used = 0
+                dev_bdf = None
                 with contextlib.suppress(pynvml.NVMLError):
-                    dev_mem_info = pynvml.nvmlDeviceGetMemoryInfo(dev)
-                    dev_mem = byte_to_mebibyte(  # byte to MiB
-                        dev_mem_info.total,
-                    )
-                    dev_mem_used = byte_to_mebibyte(  # byte to MiB
-                        dev_mem_info.used,
-                    )
-                if dev_mem == 0:
-                    dev_mem, dev_mem_used = get_memory()
+                    dev_pci_info = pynvml.nvmlDeviceGetPciInfo(dev)
+                    dev_bdf = str(dev_pci_info.busIdLegacy).lower()
-                dev_cores_util = None
-                with contextlib.suppress(pynvml.NVMLError):
-                    dev_util_rates = pynvml.nvmlDeviceGetUtilizationRates(dev)
-                    dev_cores_util = dev_util_rates.gpu
-                if dev_cores_util is None:
-                    debug_log_warning(
-                        logger,
-                        "Failed to get device %d cores utilization, setting to 0",
-                        dev_index,
-                    )
-                    dev_cores_util = 0
-                dev_temp = None
+                dev_mig_mode = pynvml.NVML_DEVICE_MIG_DISABLE
                 with contextlib.suppress(pynvml.NVMLError):
-                    dev_temp = pynvml.nvmlDeviceGetTemperature(
-                        dev,
-                        pynvml.NVML_TEMPERATURE_GPU,
-                    )
+                    dev_mig_mode, _ = pynvml.nvmlDeviceGetMigMode(dev)
-                dev_power = None
-                dev_power_used = None
-                with contextlib.suppress(pynvml.NVMLError):
-                    dev_power = pynvml.nvmlDeviceGetPowerManagementDefaultLimit(dev)
-                    dev_power = dev_power // 1000  # mW to W
-                    dev_power_used = (
-                        pynvml.nvmlDeviceGetPowerUsage(dev) // 1000
-                    )  # mW to W
+                # With MIG disabled, treat as a single device.
+                if dev_mig_mode == pynvml.NVML_DEVICE_MIG_DISABLE:
+                    dev_index = dev_idx
+                    if envs.GPUSTACK_RUNTIME_DETECT_PHYSICAL_INDEX_PRIORITY:
+                        if dev_files is None:
+                            dev_files = get_device_files(
+                                pattern=r"nvidia(?P<number>\d+)",
+                            )
+                        if len(dev_files) >= dev_count:
+                            dev_file = dev_files[dev_idx]
+                            if dev_file.number is not None:
+                                dev_index = dev_file.number
-                dev_cc_t = pynvml.nvmlDeviceGetCudaComputeCapability(dev)
-                dev_cc = ".".join(map(str, dev_cc_t))
+                    dev_name = pynvml.nvmlDeviceGetName(dev)
-                dev_is_vgpu = False
-                dev_pci_info = pynvml.nvmlDeviceGetPciInfo(dev)
-                for addr in [dev_pci_info.busIdLegacy, dev_pci_info.busId]:
-                    if addr in pci_devs:
-                        dev_is_vgpu = _is_vgpu(pci_devs[addr].config)
-                        break
+                    dev_uuid = pynvml.nvmlDeviceGetUUID(dev)
+                    dev_cores = None
+                    if not envs.GPUSTACK_RUNTIME_DETECT_NO_TOOLKIT_CALL:
+                        with contextlib.suppress(pycuda.CUDAError):
+                            dev_gpudev = pycuda.cuDeviceGet(dev_idx)
+                            dev_cores = pycuda.cuDeviceGetAttribute(
+                                dev_gpudev,
+                                pycuda.CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
+                            )
+                    dev_cores_util = _get_sm_util_from_gpm_metrics(dev)
+                    if dev_cores_util is None:
+                        with contextlib.suppress(pynvml.NVMLError):
+                            dev_util_rates = pynvml.nvmlDeviceGetUtilizationRates(dev)
+                            dev_cores_util = dev_util_rates.gpu
+                    if dev_cores_util is None:
+                        debug_log_warning(
+                            logger,
+                            "Failed to get device %d cores utilization, setting to 0",
+                            dev_index,
+                        )
+                        dev_cores_util = 0
-                dev_appendix = {
-                    "arch_family": _get_arch_family(dev_cc_t),
-                    "vgpu": dev_is_vgpu,
-                    "bdf": str(dev_pci_info.busIdLegacy).lower(),
-                }
+                    dev_mem = 0
+                    dev_mem_used = 0
+                    with contextlib.suppress(pynvml.NVMLError):
+                        dev_mem_info = pynvml.nvmlDeviceGetMemoryInfo(dev)
+                        dev_mem = byte_to_mebibyte(  # byte to MiB
+                            dev_mem_info.total,
+                        )
+                        dev_mem_used = byte_to_mebibyte(  # byte to MiB
+                            dev_mem_info.used,
+                        )
+                    if dev_mem == 0:
+                        dev_mem, dev_mem_used = get_memory()
-                with contextlib.suppress(pynvml.NVMLError):
-                    dev_fabric = pynvml.c_nvmlGpuFabricInfoV_t()
-                    r = pynvml.nvmlDeviceGetGpuFabricInfoV(dev, byref(dev_fabric))
-                    if r != pynvml.NVML_SUCCESS:
-                        dev_fabric = None
-                    if dev_fabric.state != pynvml.NVML_GPU_FABRIC_STATE_COMPLETED:
-                        dev_fabric = None
-                    if dev_fabric:
-                        dev_appendix["fabric_cluster_uuid"] = stringify_uuid(
-                            bytes(dev_fabric.clusterUuid),
+                    dev_temp = None
+                    with contextlib.suppress(pynvml.NVMLError):
+                        dev_temp = pynvml.nvmlDeviceGetTemperature(
+                            dev,
+                            pynvml.NVML_TEMPERATURE_GPU,
                         )
-                        dev_appendix["fabric_clique_id"] = dev_fabric.cliqueId
-                dev_mig_mode = pynvml.NVML_DEVICE_MIG_DISABLE
-                with contextlib.suppress(pynvml.NVMLError):
-                    dev_mig_mode, _ = pynvml.nvmlDeviceGetMigMode(dev)
+                    dev_power = None
+                    dev_power_used = None
+                    with contextlib.suppress(pynvml.NVMLError):
+                        dev_power = pynvml.nvmlDeviceGetPowerManagementDefaultLimit(dev)
+                        dev_power = dev_power // 1000  # mW to W
+                        dev_power_used = (
+                            pynvml.nvmlDeviceGetPowerUsage(dev) // 1000
+                        )  # mW to W
+                    dev_is_vgpu = False
+                    if dev_bdf and dev_bdf in pci_devs:
+                        dev_is_vgpu = _is_vgpu(pci_devs[dev_bdf].config)
+                    dev_appendix = {
+                        "arch_family": _get_arch_family(dev_cc_t),
+                        "vgpu": dev_is_vgpu,
+                    }
+                    if dev_bdf:
+                        dev_appendix["bdf"] = dev_bdf
-                # If MIG is not enabled, return the GPU itself.
+                    with contextlib.suppress(pynvml.NVMLError):
+                        dev_fabric = pynvml.c_nvmlGpuFabricInfoV_t()
+                        r = pynvml.nvmlDeviceGetGpuFabricInfoV(dev, byref(dev_fabric))
+                        if r != pynvml.NVML_SUCCESS:
+                            dev_fabric = None
+                        if dev_fabric.state != pynvml.NVML_GPU_FABRIC_STATE_COMPLETED:
+                            dev_fabric = None
+                        if dev_fabric:
+                            dev_appendix["fabric_cluster_uuid"] = stringify_uuid(
+                                bytes(dev_fabric.clusterUuid),
+                            )
+                            dev_appendix["fabric_clique_id"] = dev_fabric.cliqueId
-                if dev_mig_mode == pynvml.NVML_DEVICE_MIG_DISABLE:
-                    dev_name = pynvml.nvmlDeviceGetName(dev)
                     ret.append(
                         Device(
                             manufacturer=self.manufacturer,
@@ -283,13 +292,20 @@ class NVIDIADetector(Detector):
                         pynvml.nvmlDeviceGetPowerUsage(mdev) // 1000
                     )  # mW to W
-                    mdev_appendix = dev_appendix.copy()
+                    mdev_appendix = {
+                        "arch_family": _get_arch_family(dev_cc_t),
+                        "vgpu": True,
+                    }
+                    if dev_bdf:
+                        mdev_appendix["bdf"] = dev_bdf
                     mdev_gi_id = pynvml.nvmlDeviceGetGpuInstanceId(mdev)
                     mdev_appendix["gpu_instance_id"] = mdev_gi_id
                     mdev_ci_id = pynvml.nvmlDeviceGetComputeInstanceId(mdev)
                     mdev_appendix["compute_instance_id"] = mdev_ci_id
+                    mdev_cores_util = _get_sm_util_from_gpm_metrics(dev, mdev_gi_id)
                     if not mdev_name:
                         mdev_attrs = pynvml.nvmlDeviceGetAttributes(mdev)
@@ -374,6 +390,7 @@ class NVIDIADetector(Detector):
                             runtime_version_original=sys_runtime_ver_original,
                             compute_capability=dev_cc,
                             cores=mdev_cores,
+                            cores_utilization=mdev_cores_util,
                             memory=mdev_mem,
                             memory_used=mdev_mem_used,
                             memory_utilization=get_utilization(mdev_mem_used, mdev_mem),
@@ -492,6 +509,97 @@ class NVIDIADetector(Detector):
         return ret
+def _get_gpm_metrics(
+    metrics: list[int],
+    dev: pynvml.c_nvmlDevice_t,
+    gpu_instance_id: int | None = None,
+    interval: float = 0.1,
+) -> list[pynvml.c_nvmlGpmMetric_t] | None:
+    """
+    Get GPM metrics for a device or a MIG GPU instance.
+    Args:
+        metrics:
+            A list of GPM metric IDs to query.
+        dev:
+            The NVML device handle.
+        gpu_instance_id:
+            The GPU instance ID for MIG devices.
+        interval:
+            Interval in seconds between two samples.
+    Returns:
+        A list of GPM metric structures, or None if failed.
+    """
+    try:
+        dev_gpm_support = pynvml.nvmlGpmQueryDeviceSupport(dev)
+        if not bool(dev_gpm_support.isSupportedDevice):
+            return None
+    except pynvml.NVMLError:
+        debug_log_warning(logger, "Unsupported GPM query")
+        return None
+    dev_gpm_metrics = pynvml.c_nvmlGpmMetricsGet_t()
+    try:
+        dev_gpm_metrics.sample1 = pynvml.nvmlGpmSampleAlloc()
+        dev_gpm_metrics.sample2 = pynvml.nvmlGpmSampleAlloc()
+        if gpu_instance_id is None:
+            pynvml.nvmlGpmSampleGet(dev, dev_gpm_metrics.sample1)
+            time.sleep(interval)
+            pynvml.nvmlGpmSampleGet(dev, dev_gpm_metrics.sample2)
+        else:
+            pynvml.nvmlGpmMigSampleGet(dev, gpu_instance_id, dev_gpm_metrics.sample1)
+            time.sleep(interval)
+            pynvml.nvmlGpmMigSampleGet(dev, gpu_instance_id, dev_gpm_metrics.sample2)
+        dev_gpm_metrics.version = pynvml.NVML_GPM_METRICS_GET_VERSION
+        dev_gpm_metrics.numMetrics = len(metrics)
+        for metric_idx, metric in enumerate(metrics):
+            dev_gpm_metrics.metrics[metric_idx].metricId = metric
+        pynvml.nvmlGpmMetricsGet(dev_gpm_metrics)
+    except pynvml.NVMLError:
+        debug_log_exception(logger, "Failed to get GPM metrics")
+        return None
+    finally:
+        if dev_gpm_metrics.sample1:
+            pynvml.nvmlGpmSampleFree(dev_gpm_metrics.sample1)
+        if dev_gpm_metrics.sample2:
+            pynvml.nvmlGpmSampleFree(dev_gpm_metrics.sample2)
+    return list(dev_gpm_metrics.metrics)
+def _get_sm_util_from_gpm_metrics(
+    dev: pynvml.c_nvmlDevice_t,
+    gpu_instance_id: int | None = None,
+    interval: float = 0.1,
+) -> int | None:
+    """
+    Get SM utilization from GPM metrics.
+    Args:
+        dev:
+            The NVML device handle.
+        gpu_instance_id:
+            The GPU instance ID for MIG devices.
+        interval:
+            Interval in seconds between two samples.
+    Returns:
+        The SM utilization as an integer percentage, or None if failed.
+    """
+    dev_gpm_metrics = _get_gpm_metrics(
+        metrics=[pynvml.NVML_GPM_METRIC_SM_UTIL],
+        dev=dev,
+        gpu_instance_id=gpu_instance_id,
+        interval=interval,
+    )
+    if dev_gpm_metrics and not math.isnan(dev_gpm_metrics[0].value):
+        return int(dev_gpm_metrics[0].value)
+    return None
 def _get_arch_family(dev_cc_t: list[int]) -> str:
     """
     Get the architecture family based on the CUDA compute capability.

gpustack_runtime/detector/pyrocmsmi/__init__.py CHANGED Viewed

@@ -223,15 +223,9 @@ def rsmi_dev_target_graphics_version_get(device=0):
         c_version = c_uint64()
         ret = rocmsmiLib.rsmi_dev_target_graphics_version_get(device, byref(c_version))
         _rocmsmiCheckReturn(ret)
-        version = str(c_version.value)
-        if len(version) == 4:
-            dev_name = rsmi_dev_name_get(device)
-            if "Instinct MI2" in dev_name:
-                hex_part = str(hex(int(version[2:]))).replace("0x", "")
-                version = version[:2] + hex_part
-        else:
-            version = str(c_version.value // 10 + c_version.value % 10)
-        return "gfx" + version
+        if c_version.value < 2000:
+            return "gfx" + str(c_version.value)
+        return "gfx" + hex(c_version.value)[2:]
     except AttributeError:
         return None

gpustack_runtime/envs.py CHANGED Viewed

@@ -476,7 +476,7 @@ variables: dict[str, Callable[[], Any]] = {
             "hygon.com/devices=HIP_VISIBLE_DEVICES;"
             "iluvatar.ai/devices=CUDA_VISIBLE_DEVICES;"
             "metax-tech.com/devices=CUDA_VISIBLE_DEVICES;"
-            "mthreads.com/devices=CUDA_VISIBLE_DEVICES;"
+            "mthreads.com/devices=CUDA_VISIBLE_DEVICES,MUSA_VISIBLE_DEVICES;"
             "nvidia.com/devices=CUDA_VISIBLE_DEVICES;",
         ),
         list_sep=",",

{gpustack_runtime-0.1.39.post2.dist-info → gpustack_runtime-0.1.39.post3.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: gpustack-runtime
-Version: 0.1.39.post2
+Version: 0.1.39.post3
 Summary: GPUStack Runtime is library for detecting GPU resources and launching GPU workloads.
 Project-URL: Homepage, https://github.com/gpustack/runtime
 Project-URL: Bug Tracker, https://github.com/gpustack/gpustack/issues
@@ -15,8 +15,9 @@ Classifier: Programming Language :: Python :: 3.13
 Requires-Python: >=3.10
 Requires-Dist: argcomplete>=3.6.3
 Requires-Dist: docker>=7.1.0
-Requires-Dist: gpustack-runner>=0.1.23.post4
+Requires-Dist: gpustack-runner>=0.1.23.post5
 Requires-Dist: kubernetes>=33.1.0
+Requires-Dist: mthreads-ml-py>=2.2.10
 Requires-Dist: nvidia-ml-py>=13.580.65
 Requires-Dist: podman==5.6.0
 Requires-Dist: pyyaml

gpustack-runtime 0.1.39.post2__py3-none-any.whl → 0.1.39.post3__py3-none-any.whl

gpustack-runtime 0.1.39.post2py3-none-any.whl → 0.1.39.post3py3-none-any.whl