PyPI - gpustack-runtime - Versions diffs - 0.1.39.post1__py3-none-any.whl → 0.1.39.post3__py3-none-any.whl - Mend

gpustack-runtime 0.1.39.post1py3-none-any.whl → 0.1.39.post3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

gpustack_runtime/__main__.py +6 -0
gpustack_runtime/_version.py +2 -2
gpustack_runtime/_version_appendix.py +1 -1
gpustack_runtime/cmds/__init__.py +6 -0
gpustack_runtime/cmds/deployer.py +170 -40
gpustack_runtime/deployer/__init__.py +197 -0
gpustack_runtime/deployer/__types__.py +382 -17
gpustack_runtime/deployer/__utils__.py +34 -0
gpustack_runtime/deployer/docker.py +280 -66
gpustack_runtime/deployer/kuberentes.py +288 -45
gpustack_runtime/deployer/podman.py +290 -66
gpustack_runtime/detector/__utils__.py +23 -0
gpustack_runtime/detector/amd.py +18 -10
gpustack_runtime/detector/hygon.py +7 -2
gpustack_runtime/detector/iluvatar.py +10 -2
gpustack_runtime/detector/mthreads.py +8 -12
gpustack_runtime/detector/nvidia.py +194 -86
gpustack_runtime/detector/pyhsa/__init__.py +7 -7
gpustack_runtime/detector/pyrocmsmi/__init__.py +3 -9
gpustack_runtime/envs.py +30 -18
{gpustack_runtime-0.1.39.post1.dist-info → gpustack_runtime-0.1.39.post3.dist-info}/METADATA +3 -2
{gpustack_runtime-0.1.39.post1.dist-info → gpustack_runtime-0.1.39.post3.dist-info}/RECORD +25 -26
gpustack_runtime/detector/pymtml/__init__.py +0 -770
{gpustack_runtime-0.1.39.post1.dist-info → gpustack_runtime-0.1.39.post3.dist-info}/WHEEL +0 -0
{gpustack_runtime-0.1.39.post1.dist-info → gpustack_runtime-0.1.39.post3.dist-info}/entry_points.txt +0 -0
{gpustack_runtime-0.1.39.post1.dist-info → gpustack_runtime-0.1.39.post3.dist-info}/licenses/LICENSE +0 -0

gpustack_runtime/deployer/podman.py CHANGED Viewed

@@ -28,7 +28,7 @@ from podman.domain.containers_create import CreateMixin
 from tqdm import tqdm
 from .. import envs
-from ..logging import debug_log_exception
+from ..logging import debug_log_exception, debug_log_warning
 from .__patches__ import patch_render_payload
 from .__types__ import (
     Container,
@@ -37,7 +37,7 @@ from .__types__ import (
     ContainerMountModeEnum,
     ContainerProfileEnum,
     ContainerRestartPolicyEnum,
-    Deployer,
+    EndoscopicDeployer,
     OperationError,
     UnsupportedError,
     WorkloadExecStream,
@@ -49,7 +49,13 @@ from .__types__ import (
     WorkloadStatusOperation,
     WorkloadStatusStateEnum,
 )
-from .__utils__ import _MiB, bytes_to_human_readable, replace_image_with, safe_json
+from .__utils__ import (
+    _MiB,
+    bytes_to_human_readable,
+    replace_image_with,
+    safe_json,
+    sensitive_env_var,
+)
 if TYPE_CHECKING:
     from collections.abc import Callable, Generator
@@ -144,7 +150,7 @@ class PodmanWorkloadPlan(WorkloadPlan):
         super().validate_and_default()
         # Adjust default image namespace if needed.
-        if namespace := envs.GPUSTACK_RUNTIME_DEPLOY_DEFAULT_IMAGE_NAMESPACE:
+        if namespace := envs.GPUSTACK_RUNTIME_DEPLOY_DEFAULT_CONTAINER_NAMESPACE:
             self.pause_image = replace_image_with(
                 image=self.pause_image,
                 namespace=namespace,
@@ -299,7 +305,7 @@ Name of the Podman deployer.
 """
-class PodmanDeployer(Deployer):
+class PodmanDeployer(EndoscopicDeployer):
     """
     Deployer implementation for Podman containers.
     """
@@ -429,12 +435,12 @@ class PodmanDeployer(Deployer):
             tag = tag or "latest"
             auth_config = None
             if (
-                envs.GPUSTACK_RUNTIME_DEPLOY_DEFAULT_IMAGE_REGISTRY_USERNAME
-                and envs.GPUSTACK_RUNTIME_DEPLOY_DEFAULT_IMAGE_REGISTRY_PASSWORD
+                envs.GPUSTACK_RUNTIME_DEPLOY_DEFAULT_CONTAINER_REGISTRY_USERNAME
+                and envs.GPUSTACK_RUNTIME_DEPLOY_DEFAULT_CONTAINER_REGISTRY_PASSWORD
             ):
                 auth_config = {
-                    "username": envs.GPUSTACK_RUNTIME_DEPLOY_DEFAULT_IMAGE_REGISTRY_USERNAME,
-                    "password": envs.GPUSTACK_RUNTIME_DEPLOY_DEFAULT_IMAGE_REGISTRY_PASSWORD,
+                    "username": envs.GPUSTACK_RUNTIME_DEPLOY_DEFAULT_CONTAINER_REGISTRY_USERNAME,
+                    "password": envs.GPUSTACK_RUNTIME_DEPLOY_DEFAULT_CONTAINER_REGISTRY_PASSWORD,
                 }
             logs = self._client.api.pull(
@@ -1150,39 +1156,29 @@ class PodmanDeployer(Deployer):
         super().__init__(_NAME)
         self._client = self._get_client()
-    def _prepare_create(self):
+    def _prepare_mirrored_deployment(self):
         """
-        Prepare for creation.
+        Prepare for mirrored deployment.
         """
         # Prepare mirrored deployment if enabled.
         if self._mutate_create_options:
             return
         self._mutate_create_options = lambda o: o
-        if not envs.GPUSTACK_RUNTIME_DEPLOY_MIRRORED_DEPLOYMENT:
-            logger.debug("Mirrored deployment disabled")
-            return
         # Retrieve self-container info.
-        ## - Get Container name, default to hostname if not set.
-        self_container_id = envs.GPUSTACK_RUNTIME_DEPLOY_MIRRORED_NAME
-        if not self_container_id:
-            self_container_id = socket.gethostname()
-            logger.warning(
-                "Mirrored deployment enabled, but no Container name set, using hostname(%s) instead",
-                self_container_id,
-            )
         try:
-            self_container = self._find_self_container(self_container_id)
+            self_container = self._find_self_container()
+            if not self_container:
+                return
             logger.info(
                 "Mirrored deployment enabled, using self Container %s for options mirroring",
-                self_container.id[:12],
+                self_container.short_id,
             )
             self_image = self_container.image
         except podman.errors.APIError:
             logger.exception(
-                "Mirrored deployment enabled, but failed to get self Container %s, skipping",
-                self_container_id,
+                "Mirrored deployment enabled, but failed to get self Container, skipping",
             )
             return
@@ -1193,8 +1189,12 @@ class PodmanDeployer(Deployer):
         self_container_envs: dict[str, str] = dict(
             item.split("=", 1) for item in self_container.attrs["Config"].get("Env", [])
         )
-        self_image_envs: dict[str, str] = dict(
-            item.split("=", 1) for item in self_image.attrs["Config"].get("Env", [])
+        self_image_envs: dict[str, str] = (
+            dict(
+                item.split("=", 1) for item in self_image.attrs["Config"].get("Env", [])
+            )
+            if self_image.attrs["Config"]
+            else {}
         )
         mirrored_envs: dict[str, str] = {
             # Filter out gpustack-internal envs and same-as-image envs.
@@ -1342,17 +1342,10 @@ class PodmanDeployer(Deployer):
         self._mutate_create_options = mutate_create_options
-    def _find_self_container(
-        self,
-        self_container_id: str,
-    ) -> podman.domain.containers.Container:
+    def _find_self_container(self) -> podman.domain.containers.Container | None:
         """
         Find the current container if running inside a Podman container.
-        Args:
-            self_container_id:
-                The container name or ID to find.
         Returns:
             The Podman container if found, None otherwise.
@@ -1360,38 +1353,54 @@ class PodmanDeployer(Deployer):
             If failed to find itself.
         """
-        if envs.GPUSTACK_RUNTIME_DEPLOY_MIRRORED_NAME:
-            # Directly get container by name or ID.
-            return self._client.containers.get(self_container_id)
-        # Find containers that matches the hostname.
-        containers: list[podman.domain.containers.Container] = []
-        for c in self._client.containers.list(compatible=True):
-            # Ignore workload containers with host network enabled.
-            if _LABEL_WORKLOAD in c.labels:
-                continue
-            # Ignore containers that do not match the hostname.
-            if c.attrs["Config"].get("Hostname", "") != self_container_id:
-                continue
-            # Ignore containers that do not match the filter labels.
-            if envs.GPUSTACK_RUNTIME_PODMAN_MIRRORED_NAME_FILTER_LABELS and any(
-                c.labels.get(k) != v
-                for k, v in envs.GPUSTACK_RUNTIME_PODMAN_MIRRORED_NAME_FILTER_LABELS.items()
-            ):
-                continue
-            containers.append(c)
+        if not envs.GPUSTACK_RUNTIME_DEPLOY_MIRRORED_DEPLOYMENT:
+            logger.debug("Mirrored deployment disabled")
+            return None
-        # Validate found containers.
-        if len(containers) != 1:
-            msg = (
-                f"Found multiple Containers with the same hostname {self_container_id}, "
-                if len(containers) > 1
-                else f"Not found Container with hostname {self_container_id}, "
-                "please use `--env GPUSTACK_RUNTIME_DEPLOY_MIRRORED_NAME=...` to specify the exact container name"
+        # Get container ID or hostname.
+        self_container_id = envs.GPUSTACK_RUNTIME_DEPLOY_MIRRORED_NAME
+        if not self_container_id:
+            self_container_id = socket.gethostname()
+            debug_log_warning(
+                logger,
+                "Mirrored deployment enabled, but no Container name set, using hostname(%s) instead",
+                self_container_id,
             )
-            raise podman.errors.NotFound(msg)
-        return containers[0]
+        if envs.GPUSTACK_RUNTIME_DEPLOY_MIRRORED_NAME:
+            # Directly get container.
+            self_container = self._client.containers.get(self_container_id)
+        else:
+            # Find containers that matches the hostname.
+            containers: list[podman.domain.containers.Container] = []
+            for c in self._client.containers.list(compatible=True):
+                # Ignore workload containers with host network enabled.
+                if _LABEL_WORKLOAD in c.labels:
+                    continue
+                # Ignore containers that do not match the hostname.
+                if c.attrs["Config"].get("Hostname", "") != self_container_id:
+                    continue
+                # Ignore containers that do not match the filter labels.
+                if envs.GPUSTACK_RUNTIME_PODMAN_MIRRORED_NAME_FILTER_LABELS and any(
+                    c.labels.get(k) != v
+                    for k, v in envs.GPUSTACK_RUNTIME_PODMAN_MIRRORED_NAME_FILTER_LABELS.items()
+                ):
+                    continue
+                containers.append(c)
+            # Validate found containers.
+            if len(containers) != 1:
+                msg = (
+                    f"Found multiple Containers with the same hostname {self_container_id}, "
+                    if len(containers) > 1
+                    else f"Not found Container with hostname {self_container_id}, "
+                    "please use `--env GPUSTACK_RUNTIME_DEPLOY_MIRRORED_NAME=...` to specify the exact Container name"
+                )
+                raise podman.errors.NotFound(msg)
+            self_container = containers[0]
+        return self_container
     @_supported
     def _create(self, workload: WorkloadPlan):
@@ -1417,7 +1426,7 @@ class PodmanDeployer(Deployer):
             msg = f"Invalid workload type: {type(workload)}"
             raise TypeError(msg)
-        self._prepare_create()
+        self._prepare_mirrored_deployment()
         if isinstance(workload, WorkloadPlan):
             workload = PodmanWorkloadPlan(**workload.__dict__)
@@ -1731,6 +1740,11 @@ class PodmanDeployer(Deployer):
             msg = f"Failed to fetch logs for container {container.name} of workload {name}{_detail_api_call_error(e)}"
             raise OperationError(msg) from e
         else:
+            if not follow:
+                result = bytearray()
+                for chunk in output:
+                    result.extend(chunk)
+                return result.decode("utf-8")
             return output
     @_supported
@@ -1815,6 +1829,216 @@ class PodmanDeployer(Deployer):
                 return output
             return PodmanWorkloadExecStream(output)
+    @_supported
+    def _inspect(
+        self,
+        name: WorkloadName,
+        namespace: WorkloadNamespace | None = None,
+    ) -> str | None:
+        """
+        Inspect a Podman workload.
+        Args:
+            name:
+                The name of the workload.
+            namespace:
+                The namespace of the workload.
+        Returns:
+            The inspection result as a JSON string. None if not found.
+        Raises:
+            UnsupportedError:
+                If Podman is not supported in the current environment.
+            OperationError:
+                If the Podman workload fails to inspect.
+        """
+        workload = self._get(name=name, namespace=namespace)
+        if not workload:
+            return None
+        d_containers = getattr(workload, "_d_containers", [])
+        if not d_containers:
+            return None
+        result = []
+        for c in d_containers:
+            c_attrs = c.attrs
+            # Mask sensitive environment variables
+            if "Env" in c_attrs["Config"]:
+                for i, env in enumerate(c_attrs["Config"]["Env"] or []):
+                    env_name, _ = env.split("=", maxsplit=1)
+                    if sensitive_env_var(env_name):
+                        c_attrs["Config"]["Env"][i] = f"{env_name}=******"
+            result.append(c_attrs)
+        return safe_json(result, indent=2)
+    def _find_self_container_for_endoscopy(self) -> podman.domain.containers.Container:
+        """
+        Find the self container for endoscopy.
+        Only works in mirrored deployment mode.
+        Returns:
+            The self container object.
+        Raises:
+            UnsupportedError:
+                If endoscopy is not supported in the current environment.
+        """
+        try:
+            self_container = self._find_self_container()
+        except podman.errors.APIError as e:
+            msg = "Endoscopy is not supported in the current environment: Mirrored deployment enabled, but failed to get self Container"
+            raise UnsupportedError(msg) from e
+        except Exception as e:
+            msg = "Endoscopy is not supported in the current environment: Failed to get self Container"
+            raise UnsupportedError(msg) from e
+        if not self_container:
+            msg = "Endoscopy is not supported in the current environment: Mirrored deployment disabled"
+            raise UnsupportedError(msg)
+        return self_container
+    def _endoscopic_logs(
+        self,
+        timestamps: bool = False,
+        tail: int | None = None,
+        since: int | None = None,
+        follow: bool = False,
+    ) -> Generator[bytes | str, None, None] | bytes | str:
+        """
+        Get the logs of the deployer itself.
+        Only works in mirrored deployment mode.
+        Args:
+            timestamps:
+                Show timestamps in the logs.
+            tail:
+                Number of lines to show from the end of the logs.
+            since:
+                Show logs since the given epoch in seconds.
+            follow:
+                Whether to follow the logs.
+        Returns:
+            The logs as a byte string or a generator yielding byte strings if follow is True.
+        Raises:
+            UnsupportedError:
+                If endoscopy is not supported in the current environment.
+            OperationError:
+                If the deployer fails to get logs.
+        """
+        self_container = self._find_self_container_for_endoscopy()
+        logs_options = {
+            "timestamps": timestamps,
+            "tail": tail if tail >= 0 else None,
+            "since": since,
+            "follow": follow,
+        }
+        try:
+            output = self_container.logs(
+                stream=follow,
+                **logs_options,
+            )
+        except podman.errors.APIError as e:
+            msg = f"Failed to fetch logs for self Container {self_container.short_id}{_detail_api_call_error(e)}"
+            raise OperationError(msg) from e
+        else:
+            if not follow:
+                result = bytearray()
+                for chunk in output:
+                    result.extend(chunk)
+                return result.decode("utf-8")
+            return output
+    def _endoscopic_exec(
+        self,
+        detach: bool = True,
+        command: list[str] | None = None,
+        args: list[str] | None = None,
+    ) -> WorkloadExecStream | bytes | str:
+        """
+        Execute a command in the deployer itself.
+        Only works in mirrored deployment mode.
+        Args:
+            detach:
+                Whether to detach from the command.
+            command:
+                The command to execute.
+                If not specified, use /bin/sh and implicitly attach.
+            args:
+                The arguments to pass to the command.
+        Returns:
+            If detach is False, return a WorkloadExecStream.
+            otherwise, return the output of the command as a byte string or string.
+        Raises:
+            UnsupportedError:
+                If endoscopy is not supported in the current environment.
+            OperationError:
+                If the deployer fails to execute the command.
+        """
+        self_container = self._find_self_container_for_endoscopy()
+        attach = not detach or not command
+        exec_options = {
+            "stdout": True,
+            "stderr": True,
+            "stdin": attach,
+            "socket": attach,
+            "tty": attach,
+            "cmd": [*command, *(args or [])] if command else ["/bin/sh"],
+        }
+        try:
+            _, output = self_container.exec_run(
+                detach=False,
+                **exec_options,
+            )
+        except podman.errors.APIError as e:
+            msg = f"Failed to exec command in self Container {self_container.short_id}{_detail_api_call_error(e)}"
+            raise OperationError(msg) from e
+        else:
+            if not attach:
+                return output
+            return PodmanWorkloadExecStream(output)
+    def _endoscopic_inspect(self) -> str:
+        """
+        Inspect the deployer itself.
+        Only works in mirrored deployment mode.
+        Returns:
+            The inspection result.
+        Raises:
+            UnsupportedError:
+                If endoscopy is not supported in the current environment.
+            OperationError:
+                If the deployer fails to execute the command.
+        """
+        self_container = self._find_self_container_for_endoscopy()
+        c_attrs = self_container.attrs
+        # Mask sensitive environment variables
+        if "Env" in c_attrs["Config"]:
+            for i, env in enumerate(c_attrs["Config"]["Env"] or []):
+                env_name, _ = env.split("=", maxsplit=1)
+                if sensitive_env_var(env_name):
+                    c_attrs["Config"]["Env"][i] = f"{env_name}=******"
+        return safe_json(c_attrs, indent=2)
 def _has_restart_policy(
     container: podman.domain.containers.Container,

gpustack_runtime/detector/__utils__.py CHANGED Viewed

@@ -951,3 +951,26 @@ def bitmask_to_str(bitmask_list: list) -> str:
         offset += get_bits_size()
     return list_to_range_str(sorted(bits_lists))
+def get_physical_function_by_bdf(bdf: str) -> str:
+    """
+    Get the physical function BDF for a given PCI device BDF address.
+    Args:
+        bdf:
+            The PCI device BDF address (e.g., "0000:00:1f.0").
+    Returns:
+        The physical function BDF if found, otherwise returns the original BDF.
+    """
+    if bdf:
+        with contextlib.suppress(Exception):
+            dev_path = Path(f"/sys/bus/pci/devices/{bdf}")
+            if dev_path.exists():
+                physfn_path = dev_path / "physfn"
+                if physfn_path.exists():
+                    physfn_realpath = physfn_path.resolve()
+                    return physfn_realpath.name
+    return bdf

gpustack_runtime/detector/amd.py CHANGED Viewed

@@ -16,6 +16,7 @@ from .__utils__ import (
     get_brief_version,
     get_numa_node_by_bdf,
     get_pci_devices,
+    get_physical_function_by_bdf,
     get_utilization,
     map_numa_node_to_cpu_affinity,
 )
@@ -107,8 +108,12 @@ class AMDDetector(Detector):
                     asic_serial = dev_gpu_asic_info.get("asic_serial")
                     dev_uuid = f"GPU-{(asic_serial[2:]).lower()}"
                 else:
-                    dev_uuid = f"GPU-{pyrocmsmi.rsmi_dev_unique_id_get(dev_idx)[2:]}"
-                dev_hsa_agent = hsa_agents.get(dev_uuid)
+                    dev_uuid = ""
+                    with contextlib.suppress(pyrocmsmi.ROCMSMIError):
+                        dev_uuid = (
+                            f"GPU-{pyrocmsmi.rsmi_dev_unique_id_get(dev_idx)[2:]}"
+                        )
+                dev_hsa_agent = hsa_agents.get(dev_uuid, pyhsa.Agent())
                 dev_gpu_driver_info = pyamdsmi.amdsmi_get_gpu_driver_info(dev)
                 dev_driver_ver = dev_gpu_driver_info.get("driver_version")
@@ -119,8 +124,13 @@ class AMDDetector(Detector):
                 dev_cc = dev_hsa_agent.compute_capability
                 if not dev_cc:
-                    with contextlib.suppress(pyrocmsmi.ROCMSMIError):
-                        dev_cc = pyrocmsmi.rsmi_dev_target_graphics_version_get(dev_idx)
+                    if "target_graphics_version" in dev_gpu_asic_info:
+                        dev_cc = dev_gpu_asic_info.get("target_graphics_version")
+                    else:
+                        with contextlib.suppress(pyrocmsmi.ROCMSMIError):
+                            dev_cc = pyrocmsmi.rsmi_dev_target_graphics_version_get(
+                                dev_idx,
+                            )
                 dev_bdf = None
                 dev_card_id = None
@@ -195,15 +205,13 @@ class AMDDetector(Detector):
                         dev_power = pyrocmsmi.rsmi_dev_power_cap_get(dev_idx)
                         dev_power_used = pyrocmsmi.rsmi_dev_power_get(dev_idx)
-                dev_compute_partition = None
-                with contextlib.suppress(pyamdsmi.AmdSmiException):
-                    dev_compute_partition = pyamdsmi.amdsmi_get_gpu_compute_partition(
-                        dev,
-                    )
+                dev_is_vgpu = False
+                if dev_bdf:
+                    dev_is_vgpu = get_physical_function_by_bdf(dev_bdf) != dev_bdf
                 dev_appendix = {
                     "arch_family": _get_arch_family(dev_asic_family_id),
-                    "vgpu": dev_compute_partition is not None,
+                    "vgpu": dev_is_vgpu,
                 }
                 if dev_bdf:
                     dev_appendix["bdf"] = dev_bdf

gpustack_runtime/detector/hygon.py CHANGED Viewed

@@ -16,6 +16,7 @@ from .__utils__ import (
     get_brief_version,
     get_numa_node_by_bdf,
     get_pci_devices,
+    get_physical_function_by_bdf,
     get_utilization,
     map_numa_node_to_cpu_affinity,
 )
@@ -108,7 +109,7 @@ class HygonDetector(Detector):
                 dev_index = dev_idx
                 dev_uuid = f"GPU-{pyrocmsmi.rsmi_dev_unique_id_get(dev_idx)[2:]}"
-                dev_hsa_agent = hsa_agents.get(dev_uuid)
+                dev_hsa_agent = hsa_agents.get(dev_uuid, pyhsa.Agent())
                 dev_name = dev_hsa_agent.name
                 if not dev_name:
@@ -156,8 +157,12 @@ class HygonDetector(Detector):
                 dev_power = pyrocmsmi.rsmi_dev_power_cap_get(dev_idx)
                 dev_power_used = pyrocmsmi.rsmi_dev_power_get(dev_idx)
+                dev_is_vgpu = False
+                if dev_bdf:
+                    dev_is_vgpu = get_physical_function_by_bdf(dev_bdf) != dev_bdf
                 dev_appendix = {
-                    "vgpu": False,
+                    "vgpu": dev_is_vgpu,
                 }
                 if dev_bdf is not None:
                     dev_appendix["bdf"] = dev_bdf

gpustack_runtime/detector/iluvatar.py CHANGED Viewed

@@ -23,6 +23,7 @@ from .__utils__ import (
     get_numa_node_by_bdf,
     get_numa_nodeset_size,
     get_pci_devices,
+    get_physical_function_by_bdf,
     get_utilization,
     map_numa_node_to_cpu_affinity,
     support_command,
@@ -165,13 +166,20 @@ class IluvatarDetector(Detector):
                     if dev_cc_t:
                         dev_cc = ".".join(map(str, dev_cc_t))
+                dev_bdf = None
+                with contextlib.suppress(pyixml.NVMLError):
+                    dev_pci_info = pyixml.nvmlDeviceGetPciInfo(dev)
+                    dev_bdf = str(dev_pci_info.busIdLegacy).lower()
                 dev_is_vgpu = False
-                dev_pci_info = pyixml.nvmlDeviceGetPciInfo(dev)
+                if dev_bdf:
+                    dev_is_vgpu = get_physical_function_by_bdf(dev_bdf) != dev_bdf
                 dev_appendix = {
                     "vgpu": dev_is_vgpu,
-                    "bdf": str(dev_pci_info.busIdLegacy).lower(),
                 }
+                if dev_bdf:
+                    dev_appendix["bdf"] = dev_bdf
                 ret.append(
                     Device(

gpustack_runtime/detector/mthreads.py CHANGED Viewed

@@ -3,9 +3,10 @@ from __future__ import annotations
 import logging
 from functools import lru_cache
+import pymtml
 from .. import envs
 from ..logging import debug_log_exception, debug_log_warning
-from . import pymtml
 from .__types__ import (
     Detector,
     Device,
@@ -105,9 +106,8 @@ class MThreadsDetector(Detector):
         try:
             pymtml.mtmlLibraryInit()
-            sys_driver_ver = pymtml.mtmlSystemGetDriverVersion()
+            system = pymtml.mtmlLibraryInitSystem()
+            sys_driver_ver = pymtml.mtmlSystemGetDriverVersion(system)
             dev_count = pymtml.mtmlLibraryCountDevice()
             for dev_idx in range(dev_count):
                 dev_index = dev_idx
@@ -139,25 +139,20 @@ class MThreadsDetector(Detector):
                 dev_mem = 0
                 dev_mem_used = 0
-                devmem = pymtml.mtmlDeviceInitMemory(dev)
-                try:
+                with pymtml.mtmlMemoryContext(dev) as devmem:
                     dev_mem = byte_to_mebibyte(  # byte to MiB
                         pymtml.mtmlMemoryGetTotal(devmem),
                     )
                     dev_mem_used = byte_to_mebibyte(  # byte to MiB
                         pymtml.mtmlMemoryGetUsed(devmem),
                     )
-                finally:
-                    pymtml.mtmlDeviceFreeMemory(devmem)
                 dev_cores_util = None
                 dev_temp = None
-                devgpu = pymtml.mtmlDeviceInitGpu(dev)
-                try:
+                with pymtml.mtmlGpuContext(dev) as devgpu:
                     dev_cores_util = pymtml.mtmlGpuGetUtilization(devgpu)
                     dev_temp = pymtml.mtmlGpuGetTemperature(devgpu)
-                finally:
-                    pymtml.mtmlDeviceFreeGpu(devgpu)
                 if dev_cores_util is None:
                     debug_log_warning(
                         logger,
@@ -198,6 +193,7 @@ class MThreadsDetector(Detector):
             debug_log_exception(logger, "Failed to process devices fetching")
             raise
         finally:
+            pymtml.mtmlLibraryFreeSystem(system)
             pymtml.mtmlLibraryShutDown()
         return ret

gpustack-runtime 0.1.39.post1__py3-none-any.whl → 0.1.39.post3__py3-none-any.whl

gpustack-runtime 0.1.39.post1py3-none-any.whl → 0.1.39.post3py3-none-any.whl