PyPI - gpustack-runtime - Versions diffs - 0.1.41.post3__py3-none-any.whl → 0.1.42.post1__py3-none-any.whl - Mend

gpustack-runtime 0.1.41.post3py3-none-any.whl → 0.1.42.post1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (41) hide show

gpustack_runtime/_version.py +2 -2
gpustack_runtime/_version_appendix.py +1 -1
gpustack_runtime/cmds/detector.py +4 -2
gpustack_runtime/deployer/__types__.py +314 -233
gpustack_runtime/deployer/cdi/__init__.py +1 -1
gpustack_runtime/deployer/cdi/__types__.py +2 -2
gpustack_runtime/deployer/cdi/__utils__.py +4 -1
gpustack_runtime/deployer/cdi/amd.py +6 -8
gpustack_runtime/deployer/cdi/ascend.py +7 -9
gpustack_runtime/deployer/cdi/hygon.py +6 -8
gpustack_runtime/deployer/cdi/iluvatar.py +6 -8
gpustack_runtime/deployer/cdi/metax.py +6 -8
gpustack_runtime/deployer/cdi/thead.py +6 -8
gpustack_runtime/deployer/docker.py +133 -146
gpustack_runtime/deployer/k8s/deviceplugin/__init__.py +13 -8
gpustack_runtime/deployer/k8s/deviceplugin/plugin.py +26 -21
gpustack_runtime/deployer/kuberentes.py +89 -108
gpustack_runtime/deployer/podman.py +113 -120
gpustack_runtime/detector/__init__.py +2 -0
gpustack_runtime/detector/__types__.py +26 -0
gpustack_runtime/detector/__utils__.py +3 -0
gpustack_runtime/detector/amd.py +32 -10
gpustack_runtime/detector/ascend.py +67 -13
gpustack_runtime/detector/cambricon.py +3 -0
gpustack_runtime/detector/hygon.py +22 -3
gpustack_runtime/detector/iluvatar.py +15 -7
gpustack_runtime/detector/metax.py +16 -6
gpustack_runtime/detector/mthreads.py +22 -8
gpustack_runtime/detector/nvidia.py +148 -140
gpustack_runtime/detector/pyacl/__init__.py +34 -14
gpustack_runtime/detector/pydcmi/__init__.py +4 -2
gpustack_runtime/detector/pyixml/__init__.py +16 -0
gpustack_runtime/detector/pyrocmsmi/__init__.py +14 -0
gpustack_runtime/detector/thead.py +145 -134
gpustack_runtime/envs.py +7 -6
{gpustack_runtime-0.1.41.post3.dist-info → gpustack_runtime-0.1.42.post1.dist-info}/METADATA +2 -2
gpustack_runtime-0.1.42.post1.dist-info/RECORD +67 -0
gpustack_runtime-0.1.41.post3.dist-info/RECORD +0 -67
{gpustack_runtime-0.1.41.post3.dist-info → gpustack_runtime-0.1.42.post1.dist-info}/WHEEL +0 -0
{gpustack_runtime-0.1.41.post3.dist-info → gpustack_runtime-0.1.42.post1.dist-info}/entry_points.txt +0 -0
{gpustack_runtime-0.1.41.post3.dist-info → gpustack_runtime-0.1.42.post1.dist-info}/licenses/LICENSE +0 -0

gpustack_runtime/deployer/cdi/__init__.py CHANGED Viewed

@@ -82,7 +82,7 @@ def dump_config(
     if cdi_path.exists():
         actual = cdi_path.read_text(encoding="utf-8")
         if actual == expected:
-            return expected, str(cdi_path)
+            return expected, None
     cdi_path.write_text(expected, encoding="utf-8")
     return expected, str(cdi_path)

gpustack_runtime/deployer/cdi/__types__.py CHANGED Viewed

@@ -527,7 +527,7 @@ class Config(dict):
         self,
         kind: str,
         devices: list[ConfigDevice],
-        container_edits: list[ConfigContainerEdits] | None = None,
+        container_edits: ConfigContainerEdits | None = None,
         cdi_version: str = _DEFAULT_CDI_VERSION,
         annotations: dict[str, str] | None = None,
     ):
@@ -581,7 +581,7 @@ class Config(dict):
         return self["kind"]
     @property
-    def container_edits(self) -> list[ConfigContainerEdits] | None:
+    def container_edits(self) -> ConfigContainerEdits | None:
         """
         Return the list of container edits in the CDI configuration.

gpustack_runtime/deployer/cdi/__utils__.py CHANGED Viewed

@@ -147,6 +147,7 @@ def path_to_cdi_mount(
     path: str,
     container_path: str | None = None,
     options: list[str] | None = None,
+    ignore_notfound: bool = False,
 ) -> ConfigMount | None:
     """
     Convert a file/directory path to a ConfigMount.
@@ -158,13 +159,15 @@ def path_to_cdi_mount(
             Path to the file or directory inside the container.
         options:
             Mount options.
+        ignore_notfound:
+            Whether to ignore if the path does not exist.
     Returns:
         The ConfigMount object.
         None if the path does not exist.
     """
-    if not Path(path).exists():
+    if not Path(path).exists() and not ignore_notfound:
         return None
     if container_path is None:

gpustack_runtime/deployer/cdi/amd.py CHANGED Viewed

@@ -135,12 +135,10 @@ class AMDGenerator(Generator):
         return Config(
             kind=kind,
             devices=cdi_devices,
-            container_edits=[
-                ConfigContainerEdits(
-                    env=[
-                        f"{runtime_env}=void",
-                    ],
-                    device_nodes=common_device_nodes,
-                ),
-            ],
+            container_edits=ConfigContainerEdits(
+                env=[
+                    f"{runtime_env}=void",
+                ],
+                device_nodes=common_device_nodes,
+            ),
         )

gpustack_runtime/deployer/cdi/ascend.py CHANGED Viewed

@@ -152,13 +152,11 @@ class AscendGenerator(Generator):
         return Config(
             kind=kind,
             devices=cdi_devices,
-            container_edits=[
-                ConfigContainerEdits(
-                    env=[
-                        f"{runtime_env}=void",
-                    ],
-                    device_nodes=common_device_nodes,
-                    mounts=common_mounts,
-                ),
-            ],
+            container_edits=ConfigContainerEdits(
+                env=[
+                    f"{runtime_env}=void",
+                ],
+                device_nodes=common_device_nodes,
+                mounts=common_mounts,
+            ),
         )

gpustack_runtime/deployer/cdi/hygon.py CHANGED Viewed

@@ -136,12 +136,10 @@ class HygonGenerator(Generator):
         return Config(
             kind=kind,
             devices=cdi_devices,
-            container_edits=[
-                ConfigContainerEdits(
-                    env=[
-                        f"{runtime_env}=void",
-                    ],
-                    device_nodes=common_device_nodes,
-                ),
-            ],
+            container_edits=ConfigContainerEdits(
+                env=[
+                    f"{runtime_env}=void",
+                ],
+                device_nodes=common_device_nodes,
+            ),
         )

gpustack_runtime/deployer/cdi/iluvatar.py CHANGED Viewed

@@ -125,12 +125,10 @@ class IluvatarGenerator(Generator):
         return Config(
             kind=kind,
             devices=cdi_devices,
-            container_edits=[
-                ConfigContainerEdits(
-                    env=[
-                        f"{runtime_env}=void",
-                    ],
-                    device_nodes=common_device_nodes,
-                ),
-            ],
+            container_edits=ConfigContainerEdits(
+                env=[
+                    f"{runtime_env}=void",
+                ],
+                device_nodes=common_device_nodes,
+            ),
         )

gpustack_runtime/deployer/cdi/metax.py CHANGED Viewed

@@ -137,12 +137,10 @@ class MetaXGenerator(Generator):
         return Config(
             kind=kind,
             devices=cdi_devices,
-            container_edits=[
-                ConfigContainerEdits(
-                    env=[
-                        f"{runtime_env}=void",
-                    ],
-                    device_nodes=common_device_nodes,
-                ),
-            ],
+            container_edits=ConfigContainerEdits(
+                env=[
+                    f"{runtime_env}=void",
+                ],
+                device_nodes=common_device_nodes,
+            ),
         )

gpustack_runtime/deployer/cdi/thead.py CHANGED Viewed

@@ -126,12 +126,10 @@ class THeadGenerator(Generator):
         return Config(
             kind=kind,
             devices=cdi_devices,
-            container_edits=[
-                ConfigContainerEdits(
-                    env=[
-                        f"{runtime_env}=void",
-                    ],
-                    device_nodes=common_device_nodes,
-                ),
-            ],
+            container_edits=ConfigContainerEdits(
+                env=[
+                    f"{runtime_env}=void",
+                ],
+                device_nodes=common_device_nodes,
+            ),
         )

gpustack_runtime/deployer/docker.py CHANGED Viewed

@@ -4,13 +4,11 @@ import contextlib
 import io
 import json
 import logging
-import operator
 import os
 import socket
 import sys
 import tarfile
 from dataclasses import dataclass, field
-from functools import reduce
 from math import ceil
 from pathlib import Path
 from typing import TYPE_CHECKING, Any
@@ -81,17 +79,6 @@ class DockerWorkloadPlan(WorkloadPlan):
             Image used for the pause container.
         unhealthy_restart_image (str):
             Image used for unhealthy restart container.
-        resource_key_runtime_env_mapping: (dict[str, str]):
-            Mapping from resource names to environment variable names for device allocation,
-            which is used to tell the Container Runtime which GPUs to mount into the container.
-            For example, {"nvidia.com/gpu": "NVIDIA_VISIBLE_DEVICES"},
-            which sets the "NVIDIA_VISIBLE_DEVICES" environment variable to the allocated GPU device IDs.
-            With privileged mode, the container can access all GPUs even if specified.
-        resource_key_backend_env_mapping: (dict[str, list[str]]):
-            Mapping from resource names to environment variable names for device runtime,
-            which is used to tell the Device Runtime (e.g., ROCm, CUDA, OneAPI) which GPUs to use inside the container.
-            For example, {"nvidia.com/gpu": ["CUDA_VISIBLE_DEVICES"]},
-            which sets the "CUDA_VISIBLE_DEVICES" environment variable to the allocated GPU device IDs.
         namespace (str | None):
             Namespace of the workload.
         name (str):
@@ -845,7 +832,7 @@ class DockerDeployer(EndoscopicDeployer):
             msg = f"Failed to upload ephemeral files to container {container.name}"
             raise OperationError(msg)
-    def _create_containers(  # noqa: C901
+    def _create_containers(
         self,
         workload: DockerWorkloadPlan,
         ephemeral_volume_name_mapping: dict[str, str],
@@ -955,146 +942,146 @@ class DockerDeployer(EndoscopicDeployer):
                     envs.GPUSTACK_RUNTIME_DOCKER_RESOURCE_INJECTION_POLICY.lower()
                     == "cdi"
                 )
+                fmt = "plain" if not cdi else "cdi"
-                r_k_runtime_env = workload.resource_key_runtime_env_mapping or {}
-                r_k_backend_env = workload.resource_key_backend_env_mapping or {}
-                vd_manus, vd_env, vd_cdis, vd_values = (
-                    self.get_visible_devices_materials()
-                )
                 for r_k, r_v in c.resources.items():
-                    match r_k:
-                        case "cpu":
-                            if isinstance(r_v, int | float):
-                                create_options["cpu_shares"] = ceil(r_v * 1024)
-                            elif isinstance(r_v, str) and r_v.isdigit():
-                                create_options["cpu_shares"] = ceil(float(r_v) * 1024)
-                        case "memory":
-                            if isinstance(r_v, int):
-                                create_options["mem_limit"] = r_v
-                                create_options["mem_reservation"] = r_v
-                                create_options["memswap_limit"] = r_v
-                            elif isinstance(r_v, str):
-                                v = r_v.lower().removesuffix("i")
-                                create_options["mem_limit"] = v
-                                create_options["mem_reservation"] = v
-                                create_options["memswap_limit"] = v
-                        case _:
-                            if r_k in r_k_runtime_env:
-                                # Set env if resource key is mapped.
-                                runtime_env = [r_k_runtime_env[r_k]]
-                            elif (
-                                r_k == envs.GPUSTACK_RUNTIME_DEPLOY_AUTOMAP_RESOURCE_KEY
-                            ):
-                                # Set env if auto-mapping key is matched.
-                                runtime_env = list(vd_env.keys())
-                            else:
-                                continue
-                            if r_k in r_k_backend_env:
-                                # Set env if resource key is mapped.
-                                backend_env = r_k_backend_env[r_k]
-                            else:
-                                # Otherwise, use the default backend env names.
-                                backend_env = reduce(
-                                    operator.add,
-                                    list(vd_env.values()),
-                                )
+                    if r_k == "cpu":
+                        if isinstance(r_v, int | float):
+                            create_options["cpu_shares"] = ceil(r_v * 1024)
+                        elif isinstance(r_v, str) and r_v.isdigit():
+                            create_options["cpu_shares"] = ceil(float(r_v) * 1024)
+                        continue
+                    if r_k == "memory":
+                        if isinstance(r_v, int):
+                            create_options["mem_limit"] = r_v
+                            create_options["mem_reservation"] = r_v
+                            create_options["memswap_limit"] = r_v
+                        elif isinstance(r_v, str):
+                            v = r_v.lower().removesuffix("i")
+                            create_options["mem_limit"] = v
+                            create_options["mem_reservation"] = v
+                            create_options["memswap_limit"] = v
+                        continue
-                            privileged = create_options.get("privileged", False)
+                    if (
+                        r_k
+                        in envs.GPUSTACK_RUNTIME_DEPLOY_RESOURCE_KEY_MAP_RUNTIME_VISIBLE_DEVICES
+                    ):
+                        # Set env if resource key is mapped.
+                        runtime_envs = [
+                            envs.GPUSTACK_RUNTIME_DEPLOY_RESOURCE_KEY_MAP_RUNTIME_VISIBLE_DEVICES[
+                                r_k
+                            ],
+                        ]
+                    elif r_k == envs.GPUSTACK_RUNTIME_DEPLOY_AUTOMAP_RESOURCE_KEY:
+                        # Set env if auto-mapping key is matched.
+                        runtime_envs = self.get_runtime_envs()
+                    else:
+                        continue
-                            # Generate CDI config if not yet.
-                            if cdi and envs.GPUSTACK_RUNTIME_DOCKER_CDI_SPECS_GENERATE:
-                                for re in runtime_env:
-                                    cdi_dump_config(
-                                        manufacturer=vd_manus[re],
-                                        output=envs.GPUSTACK_RUNTIME_DEPLOY_CDI_SPECS_DIRECTORY,
+                    privileged = create_options.get("privileged", False)
+                    resource_values = [x.strip() for x in r_v.split(",")]
+                    # Generate CDI config if not yet.
+                    if cdi and envs.GPUSTACK_RUNTIME_DOCKER_CDI_SPECS_GENERATE:
+                        for ren in runtime_envs:
+                            manu = self.get_manufacturer(ren)
+                            cdi_config, cdi_config_path = cdi_dump_config(
+                                manufacturer=manu,
+                                output=envs.GPUSTACK_RUNTIME_DEPLOY_CDI_SPECS_DIRECTORY,
+                            )
+                            if cdi_config and cdi_config_path:
+                                if logger.isEnabledFor(logging.DEBUG):
+                                    logger.debug(
+                                        "Generated CDI configuration for '%s' at '%s':\n%s",
+                                        manu,
+                                        cdi_config_path,
+                                        cdi_config,
                                     )
-                            # Configure device access environment variable.
-                            if r_v == "all" and backend_env:
-                                # Configure privileged if requested all devices.
-                                create_options["privileged"] = True
-                                # Then, set container backend visible devices env to all devices,
-                                # so that the container backend (e.g., NVIDIA Container Toolkit) can handle it,
-                                # and mount corresponding libs if needed.
-                                for re in runtime_env:
-                                    # Request device via CDI.
-                                    if cdi:
-                                        rv = [
-                                            f"{vd_cdis[re]}={v}"
-                                            for v in (vd_values.get(re) or ["all"])
-                                        ]
-                                        if "device_requests" not in create_options:
-                                            create_options["device_requests"] = []
-                                        create_options["device_requests"].append(
-                                            docker.types.DeviceRequest(
-                                                driver="cdi",
-                                                count=0,
-                                                device_ids=rv,
-                                            ),
-                                        )
-                                        continue
-                                    # Request device via visible devices env.
-                                    rv = ",".join(vd_values.get(re) or ["all"])
-                                    create_options["environment"][re] = rv
-                            else:
-                                # Set env to the allocated device IDs if no privileged,
-                                # otherwise, set container backend visible devices env to all devices,
-                                # so that the container backend (e.g., NVIDIA Container Toolkit) can handle it,
-                                # and mount corresponding libs if needed.
-                                for re in runtime_env:
-                                    # Request device via CDI.
-                                    if cdi:
-                                        if not privileged:
-                                            rv = [
-                                                f"{vd_cdis[re]}={v.strip()}"
-                                                for v in r_v.split(",")
-                                            ]
-                                        else:
-                                            rv = [
-                                                f"{vd_cdis[re]}={v}"
-                                                for v in (vd_values.get(re) or ["all"])
-                                            ]
-                                        if "device_requests" not in create_options:
-                                            create_options["device_requests"] = []
-                                        create_options["device_requests"].append(
-                                            docker.types.DeviceRequest(
-                                                driver="cdi",
-                                                count=0,
-                                                device_ids=rv,
-                                            ),
-                                        )
-                                        continue
-                                    # Request device via visible devices env.
-                                    if not privileged:
-                                        rv = str(r_v)
-                                    else:
-                                        rv = ",".join(vd_values.get(re) or ["all"])
-                                    create_options["environment"][re] = rv
-                            # Configure runtime device access environment variables.
-                            if r_v != "all" and privileged:
-                                for be in backend_env:
-                                    create_options["environment"][be] = (
-                                        self.align_backend_visible_devices_env_values(
-                                            be,
-                                            str(r_v),
-                                        )
+                                else:
+                                    logger.info(
+                                        "Generated CDI configuration for '%s' at '%s'",
+                                        manu,
+                                        cdi_config_path,
                                     )
+                            elif cdi_config:
+                                logger.info(
+                                    "Reuse generated CDI configuration for '%s'",
+                                    manu,
+                                )
+                            else:
+                                logger.warning(
+                                    "Delegated CDI configuration by other tools for '%s', "
+                                    "e.g. for NVIDIA devices, please follow NVIDIA Container Toolkit Manual CDI Specification Generation to generate the CDI configuration, "
+                                    "see https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/cdi-support.html#manual-cdi-specification-generation",
+                                    manu,
+                                )
-                            # Configure affinity if applicable.
-                            if (
-                                envs.GPUSTACK_RUNTIME_DEPLOY_CPU_AFFINITY
-                                or envs.GPUSTACK_RUNTIME_DEPLOY_NUMA_AFFINITY
-                            ):
-                                cpus, numas = self.get_visible_devices_affinities(
-                                    runtime_env,
-                                    r_v,
+                    # Request devices.
+                    if r_v == "all":
+                        # Configure privileged.
+                        create_options["privileged"] = True
+                        # Request all devices.
+                        for ren in runtime_envs:
+                            r_vs = self.get_runtime_visible_devices(ren, fmt)
+                            # Request device via CDI.
+                            if cdi:
+                                if "device_requests" not in create_options:
+                                    create_options["device_requests"] = []
+                                create_options["device_requests"].append(
+                                    docker.types.DeviceRequest(
+                                        driver="cdi",
+                                        count=0,
+                                        device_ids=r_vs,
+                                    ),
+                                )
+                                continue
+                            # Request device via visible devices env.
+                            create_options["environment"][ren] = ",".join(r_vs)
+                    else:
+                        # Request specific devices.
+                        for ren in runtime_envs:
+                            # Request all devices if privileged,
+                            # otherwise, normalize requested devices.
+                            if privileged:
+                                r_vs = self.get_runtime_visible_devices(ren, fmt)
+                            else:
+                                r_vs = self.map_runtime_visible_devices(
+                                    ren,
+                                    resource_values,
+                                    fmt,
                                 )
-                                if cpus:
-                                    create_options["cpuset_cpus"] = cpus
-                                if numas and envs.GPUSTACK_RUNTIME_DEPLOY_NUMA_AFFINITY:
-                                    create_options["cpuset_mems"] = numas
+                            # Request device via CDI.
+                            if cdi:
+                                if "device_requests" not in create_options:
+                                    create_options["device_requests"] = []
+                                create_options["device_requests"].append(
+                                    docker.types.DeviceRequest(
+                                        driver="cdi",
+                                        count=0,
+                                        device_ids=r_vs,
+                                    ),
+                                )
+                                continue
+                            # Request device via visible devices env.
+                            create_options["environment"][ren] = ",".join(r_vs)
+                    # If not requesting all devices but privileged,
+                    # must configure visible devices.
+                    if r_v != "all" and privileged:
+                        b_vs = self.map_backend_visible_devices(
+                            runtime_envs,
+                            resource_values,
+                        )
+                        create_options["environment"].update(b_vs)
+                    # Configure affinity if applicable.
+                    create_options.update(
+                        self.map_visible_devices_affinities(
+                            runtime_envs,
+                            resource_values,
+                        ),
+                    )
             # Parameterize mounts.
             self._append_container_mounts(

gpustack_runtime/deployer/k8s/deviceplugin/__init__.py CHANGED Viewed

@@ -78,28 +78,33 @@ async def serve_async(
             allocation_policy == "cdi"
             and envs.GPUSTACK_RUNTIME_KUBERNETES_KDP_CDI_SPECS_GENERATE
         ):
-            generated_content, generated_path = cdi_dump_config(
+            cdi_config, cdi_config_path = cdi_dump_config(
                 manufacturer=manu,
                 output=cdi_generation_output,
             )
-            if generated_content:
+            if cdi_config and cdi_config_path:
                 if logger.isEnabledFor(logging.DEBUG):
                     logger.debug(
                         "Generated CDI configuration for '%s' at '%s':\n%s",
                         manu,
-                        generated_path,
-                        generated_content,
+                        cdi_config_path,
+                        cdi_config,
                     )
                 else:
                     logger.info(
                         "Generated CDI configuration for '%s' at '%s'",
                         manu,
-                        generated_path,
+                        cdi_config_path,
                     )
+            elif cdi_config:
+                logger.info(
+                    "Reuse generated CDI configuration for '%s'",
+                    manu,
+                )
             else:
                 logger.warning(
-                    "Delegated CDI configuration by other tools for manufacturer '%s', "
-                    "e.g. NVIDIA Container Toolkit Manual CDI Specification Generation, "
+                    "Delegated CDI configuration by other tools for '%s', "
+                    "e.g. for NVIDIA devices, please follow NVIDIA Container Toolkit Manual CDI Specification Generation to generate the CDI configuration, "
                     "see https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/cdi-support.html#manual-cdi-specification-generation",
                     manu,
                 )
@@ -323,7 +328,7 @@ def get_device_allocation_policy(
     if manufacturer in [
         ManufacturerEnum.AMD,
-        # ManufacturerEnum.ASCEND, # Prioritize using Env policy for Ascend.
+        ManufacturerEnum.ASCEND,
         ManufacturerEnum.HYGON,
         ManufacturerEnum.ILUVATAR,
         ManufacturerEnum.METAX,

gpustack_runtime/deployer/k8s/deviceplugin/plugin.py CHANGED Viewed

@@ -11,7 +11,8 @@ import grpc
 from grpc_interceptor import AsyncServerInterceptor
 from grpc_interceptor.exceptions import GrpcException
-from ....detector import Device, str_range_to_list
+from ....detector import Device, DeviceMemoryStatusEnum, str_range_to_list
+from ....detector.__utils__ import get_numa_node_size
 from ...cdi import (
     generate_config,
     manufacturer_to_cdi_kind,
@@ -40,6 +41,7 @@ from ..types.kubelet.deviceplugin.v1beta1 import (
     RegisterRequest,
     RegistrationStub,
     TopologyInfo,
+    Unhealthy,
     Version,
     add_DevicePluginServicer_to_server,
 )
@@ -159,7 +161,7 @@ class SharableDevicePlugin(PluginServer, DevicePluginServicer):
         self._runtime_env = manufacturer_to_runtime_env(device.manufacturer)
         self._kdp_resource = cdi_kind_to_kdp_resource(
             cdi_kind=self._cdi_kind,
-            device_index=device.index,
+            device_index=str(device.index),
         )
         super().__init__(self._kdp_resource)
@@ -334,25 +336,31 @@ class SharableDevicePlugin(PluginServer, DevicePluginServicer):
             The response containing the list of devices.
         """
-        device_id = (
-            self._device.uuid if self._id_by == "uuid" else str(self._device.index)
-        )
         dp_devices: list[DevicePluginDevice] = []
-        dp_device_health = Healthy
+        dp_device_health = (
+            Healthy
+            if self._device.memory_status == DeviceMemoryStatusEnum.HEALTHY
+            else Unhealthy
+        )
         dp_device_topo = TopologyInfo(
             nodes=[
                 NUMANode(
                     ID=node_id,
                 )
-                for node_id in str_range_to_list(
-                    self._device.appendix.get("numa", "0"),
+                for node_id in (
+                    str_range_to_list(
+                        self._device.appendix.get("numa", ""),
+                    )
+                    or list(range(get_numa_node_size()))
                 )
             ],
         )
         for device_replica in range(1, self._max_allocations + 1):
-            dp_device_id = _to_device_plugin_device_id(device_id, device_replica)
+            dp_device_id = _to_device_plugin_device_id(
+                str(self._device.index),
+                device_replica,
+            )
             dp_devices.append(
                 DevicePluginDevice(
                     ID=dp_device_id,
@@ -419,28 +427,25 @@ class SharableDevicePlugin(PluginServer, DevicePluginServicer):
         req: ContainerAllocateRequest,
     ) -> ContainerAllocateResponse:
         policy = self._allocation_policy
-        request_dp_device_ids = req.devices_ids
+        device_id = self._device.uuid
+        if self._id_by == "index":
+            device_id = str(self._device.index)
         # CDI device allocation.
         if policy == "cdi":
-            cdi_devices: list[CDIDevice] = []
-            for dp_device_id in request_dp_device_ids:
-                device_id, _ = _from_device_plugin_device_id(dp_device_id)
-                cdi_devices.append(
+            return ContainerAllocateResponse(
+                cdi_devices=[
                     CDIDevice(
                         name=f"{self._cdi_kind}={device_id}",
                     ),
-                )
-            return ContainerAllocateResponse(
-                cdi_devices=cdi_devices,
+                ],
             )
         # Environment variable device allocation.
         if policy == "env":
             return ContainerAllocateResponse(
                 envs={
-                    self._runtime_env: ",".join(request_dp_device_ids),
+                    self._runtime_env: device_id,
                 },
             )
@@ -509,7 +514,7 @@ class SharableDevicePlugin(PluginServer, DevicePluginServicer):
 @lru_cache
 def cdi_kind_to_kdp_resource(
     cdi_kind: str,
-    device_index: int,
+    device_index: str,
 ) -> str:
     """
     Map CDI kind and device index to a Kubernetes Device Plugin resource name.

gpustack-runtime 0.1.41.post3__py3-none-any.whl → 0.1.42.post1__py3-none-any.whl

gpustack-runtime 0.1.41.post3py3-none-any.whl → 0.1.42.post1py3-none-any.whl