PyPI - gpustack-runtime - Versions diffs - 0.1.39.post1__py3-none-any.whl → 0.1.39.post3__py3-none-any.whl - Mend

gpustack-runtime 0.1.39.post1py3-none-any.whl → 0.1.39.post3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

gpustack_runtime/__main__.py +6 -0
gpustack_runtime/_version.py +2 -2
gpustack_runtime/_version_appendix.py +1 -1
gpustack_runtime/cmds/__init__.py +6 -0
gpustack_runtime/cmds/deployer.py +170 -40
gpustack_runtime/deployer/__init__.py +197 -0
gpustack_runtime/deployer/__types__.py +382 -17
gpustack_runtime/deployer/__utils__.py +34 -0
gpustack_runtime/deployer/docker.py +280 -66
gpustack_runtime/deployer/kuberentes.py +288 -45
gpustack_runtime/deployer/podman.py +290 -66
gpustack_runtime/detector/__utils__.py +23 -0
gpustack_runtime/detector/amd.py +18 -10
gpustack_runtime/detector/hygon.py +7 -2
gpustack_runtime/detector/iluvatar.py +10 -2
gpustack_runtime/detector/mthreads.py +8 -12
gpustack_runtime/detector/nvidia.py +194 -86
gpustack_runtime/detector/pyhsa/__init__.py +7 -7
gpustack_runtime/detector/pyrocmsmi/__init__.py +3 -9
gpustack_runtime/envs.py +30 -18
{gpustack_runtime-0.1.39.post1.dist-info → gpustack_runtime-0.1.39.post3.dist-info}/METADATA +3 -2
{gpustack_runtime-0.1.39.post1.dist-info → gpustack_runtime-0.1.39.post3.dist-info}/RECORD +25 -26
gpustack_runtime/detector/pymtml/__init__.py +0 -770
{gpustack_runtime-0.1.39.post1.dist-info → gpustack_runtime-0.1.39.post3.dist-info}/WHEEL +0 -0
{gpustack_runtime-0.1.39.post1.dist-info → gpustack_runtime-0.1.39.post3.dist-info}/entry_points.txt +0 -0
{gpustack_runtime-0.1.39.post1.dist-info → gpustack_runtime-0.1.39.post3.dist-info}/licenses/LICENSE +0 -0

gpustack_runtime/deployer/docker.py CHANGED Viewed

@@ -26,7 +26,7 @@ from docker.utils import parse_repository_tag
 from tqdm import tqdm
 from .. import envs
-from ..logging import debug_log_exception
+from ..logging import debug_log_exception, debug_log_warning
 from .__types__ import (
     Container,
     ContainerCheck,
@@ -34,7 +34,7 @@ from .__types__ import (
     ContainerMountModeEnum,
     ContainerProfileEnum,
     ContainerRestartPolicyEnum,
-    Deployer,
+    EndoscopicDeployer,
     OperationError,
     UnsupportedError,
     WorkloadExecStream,
@@ -46,7 +46,13 @@ from .__types__ import (
     WorkloadStatusOperation,
     WorkloadStatusStateEnum,
 )
-from .__utils__ import _MiB, bytes_to_human_readable, replace_image_with, safe_json
+from .__utils__ import (
+    _MiB,
+    bytes_to_human_readable,
+    replace_image_with,
+    safe_json,
+    sensitive_env_var,
+)
 if TYPE_CHECKING:
     from collections.abc import Callable, Generator
@@ -141,7 +147,7 @@ class DockerWorkloadPlan(WorkloadPlan):
         super().validate_and_default()
         # Adjust default image namespace if needed.
-        if namespace := envs.GPUSTACK_RUNTIME_DEPLOY_DEFAULT_IMAGE_NAMESPACE:
+        if namespace := envs.GPUSTACK_RUNTIME_DEPLOY_DEFAULT_CONTAINER_NAMESPACE:
             self.pause_image = replace_image_with(
                 image=self.pause_image,
                 namespace=namespace,
@@ -296,7 +302,7 @@ Name of the Docker deployer.
 """
-class DockerDeployer(Deployer):
+class DockerDeployer(EndoscopicDeployer):
     """
     Deployer implementation for Docker containers.
     """
@@ -426,12 +432,12 @@ class DockerDeployer(Deployer):
             tag = tag or "latest"
             auth_config = None
             if (
-                envs.GPUSTACK_RUNTIME_DEPLOY_DEFAULT_IMAGE_REGISTRY_USERNAME
-                and envs.GPUSTACK_RUNTIME_DEPLOY_DEFAULT_IMAGE_REGISTRY_PASSWORD
+                envs.GPUSTACK_RUNTIME_DEPLOY_DEFAULT_CONTAINER_REGISTRY_USERNAME
+                and envs.GPUSTACK_RUNTIME_DEPLOY_DEFAULT_CONTAINER_REGISTRY_PASSWORD
             ):
                 auth_config = {
-                    "username": envs.GPUSTACK_RUNTIME_DEPLOY_DEFAULT_IMAGE_REGISTRY_USERNAME,
-                    "password": envs.GPUSTACK_RUNTIME_DEPLOY_DEFAULT_IMAGE_REGISTRY_PASSWORD,
+                    "username": envs.GPUSTACK_RUNTIME_DEPLOY_DEFAULT_CONTAINER_REGISTRY_USERNAME,
+                    "password": envs.GPUSTACK_RUNTIME_DEPLOY_DEFAULT_CONTAINER_REGISTRY_PASSWORD,
                 }
             logs = self._client.api.pull(
@@ -1174,39 +1180,29 @@ class DockerDeployer(Deployer):
         super().__init__(_NAME)
         self._client = self._get_client()
-    def _prepare_create(self):
+    def _prepare_mirrored_deployment(self):
         """
-        Prepare for creation.
+        Prepare for mirrored deployment.
         """
         # Prepare mirrored deployment if enabled.
         if self._mutate_create_options:
             return
         self._mutate_create_options = lambda o: o
-        if not envs.GPUSTACK_RUNTIME_DEPLOY_MIRRORED_DEPLOYMENT:
-            logger.debug("Mirrored deployment disabled")
-            return
         # Retrieve self-container info.
-        ## - Get Container name, default to hostname if not set.
-        self_container_id = envs.GPUSTACK_RUNTIME_DEPLOY_MIRRORED_NAME
-        if not self_container_id:
-            self_container_id = socket.gethostname()
-            logger.warning(
-                "Mirrored deployment enabled, but no Container name set, using hostname(%s) instead",
-                self_container_id,
-            )
         try:
-            self_container = self._find_self_container(self_container_id)
+            self_container = self._find_self_container()
+            if not self_container:
+                return
             logger.info(
                 "Mirrored deployment enabled, using self Container %s for options mirroring",
-                self_container.id[:12],
+                self_container.short_id,
             )
             self_image = self_container.image
         except docker.errors.APIError:
             logger.exception(
-                "Mirrored deployment enabled, but failed to get self Container %s, skipping",
-                self_container_id,
+                "Mirrored deployment enabled, but failed to get self Container, skipping",
             )
             return
@@ -1217,8 +1213,12 @@ class DockerDeployer(Deployer):
         self_container_envs: dict[str, str] = dict(
             item.split("=", 1) for item in self_container.attrs["Config"].get("Env", [])
         )
-        self_image_envs: dict[str, str] = dict(
-            item.split("=", 1) for item in self_image.attrs["Config"].get("Env", [])
+        self_image_envs: dict[str, str] = (
+            dict(
+                item.split("=", 1) for item in self_image.attrs["Config"].get("Env", [])
+            )
+            if self_image.attrs["Config"]
+            else {}
         )
         mirrored_envs: dict[str, str] = {
             # Filter out gpustack-internal envs and same-as-image envs.
@@ -1406,17 +1406,10 @@ class DockerDeployer(Deployer):
         self._mutate_create_options = mutate_create_options
-    def _find_self_container(
-        self,
-        self_container_id: str,
-    ) -> docker.models.containers.Container:
+    def _find_self_container(self) -> docker.models.containers.Container | None:
         """
         Find the current container if running inside a Docker container.
-        Args:
-            self_container_id:
-                The container name or ID to find.
         Returns:
             The Docker container if found, None otherwise.
@@ -1424,38 +1417,54 @@ class DockerDeployer(Deployer):
             If failed to find itself.
         """
-        if envs.GPUSTACK_RUNTIME_DEPLOY_MIRRORED_NAME:
-            # Directly get container by name or ID.
-            return self._client.containers.get(self_container_id)
-        # Find containers that matches the hostname.
-        containers: list[docker.models.containers.Container] = []
-        for c in self._client.containers.list():
-            # Ignore workload containers with host network enabled.
-            if _LABEL_WORKLOAD in c.labels:
-                continue
-            # Ignore containers that do not match the hostname.
-            if c.attrs["Config"].get("Hostname", "") != self_container_id:
-                continue
-            # Ignore containers that do not match the filter labels.
-            if envs.GPUSTACK_RUNTIME_DOCKER_MIRRORED_NAME_FILTER_LABELS and any(
-                c.labels.get(k) != v
-                for k, v in envs.GPUSTACK_RUNTIME_DOCKER_MIRRORED_NAME_FILTER_LABELS.items()
-            ):
-                continue
-            containers.append(c)
+        if not envs.GPUSTACK_RUNTIME_DEPLOY_MIRRORED_DEPLOYMENT:
+            logger.debug("Mirrored deployment disabled")
+            return None
-        # Validate found containers.
-        if len(containers) != 1:
-            msg = (
-                f"Found multiple Containers with the same hostname {self_container_id}, "
-                if len(containers) > 1
-                else f"Not found Container with hostname {self_container_id}, "
-                "please use `--env GPUSTACK_RUNTIME_DEPLOY_MIRRORED_NAME=...` to specify the exact container name"
+        # Get container ID or hostname.
+        self_container_id = envs.GPUSTACK_RUNTIME_DEPLOY_MIRRORED_NAME
+        if not self_container_id:
+            self_container_id = socket.gethostname()
+            debug_log_warning(
+                logger,
+                "Mirrored deployment enabled, but no Container name set, using hostname(%s) instead",
+                self_container_id,
             )
-            raise docker.errors.NotFound(msg)
-        return containers[0]
+        if envs.GPUSTACK_RUNTIME_DEPLOY_MIRRORED_NAME:
+            # Directly get container.
+            self_container = self._client.containers.get(self_container_id)
+        else:
+            # Find containers that matches the hostname.
+            containers: list[docker.models.containers.Container] = []
+            for c in self._client.containers.list():
+                # Ignore workload containers with host network enabled.
+                if _LABEL_WORKLOAD in c.labels:
+                    continue
+                # Ignore containers that do not match the hostname.
+                if c.attrs["Config"].get("Hostname", "") != self_container_id:
+                    continue
+                # Ignore containers that do not match the filter labels.
+                if envs.GPUSTACK_RUNTIME_DOCKER_MIRRORED_NAME_FILTER_LABELS and any(
+                    c.labels.get(k) != v
+                    for k, v in envs.GPUSTACK_RUNTIME_DOCKER_MIRRORED_NAME_FILTER_LABELS.items()
+                ):
+                    continue
+                containers.append(c)
+            # Validate found containers.
+            if len(containers) != 1:
+                msg = (
+                    f"Found multiple Containers with the same hostname {self_container_id}, "
+                    if len(containers) > 1
+                    else f"Not found Container with hostname {self_container_id}, "
+                    "please use `--env GPUSTACK_RUNTIME_DEPLOY_MIRRORED_NAME=...` to specify the exact Container name"
+                )
+                raise docker.errors.NotFound(msg)
+            self_container = containers[0]
+        return self_container
     @_supported
     def _create(self, workload: WorkloadPlan):
@@ -1481,7 +1490,7 @@ class DockerDeployer(Deployer):
             msg = f"Invalid workload type: {type(workload)}"
             raise TypeError(msg)
-        self._prepare_create()
+        self._prepare_mirrored_deployment()
         if isinstance(workload, WorkloadPlan):
             workload = DockerWorkloadPlan(**workload.__dict__)
@@ -1881,6 +1890,211 @@ class DockerDeployer(Deployer):
                 return output
             return DockerWorkloadExecStream(output)
+    @_supported
+    def _inspect(
+        self,
+        name: WorkloadName,
+        namespace: WorkloadNamespace | None = None,
+    ) -> str | None:
+        """
+        Inspect a Docker workload.
+        Args:
+            name:
+                The name of the workload.
+            namespace:
+                The namespace of the workload.
+        Returns:
+            The inspection result as a JSON string. None if not found.
+        Raises:
+            UnsupportedError:
+                If Docker is not supported in the current environment.
+            OperationError:
+                If the Docker workload fails to inspect.
+        """
+        workload = self._get(name=name, namespace=namespace)
+        if not workload:
+            return None
+        d_containers = getattr(workload, "_d_containers", [])
+        if not d_containers:
+            return None
+        result = []
+        for c in d_containers:
+            c_attrs = c.attrs
+            # Mask sensitive environment variables
+            if "Env" in c_attrs["Config"]:
+                for i, env in enumerate(c_attrs["Config"]["Env"] or []):
+                    env_name, _ = env.split("=", maxsplit=1)
+                    if sensitive_env_var(env_name):
+                        c_attrs["Config"]["Env"][i] = f"{env_name}=******"
+            result.append(c_attrs)
+        return safe_json(result, indent=2)
+    def _find_self_container_for_endoscopy(self) -> docker.models.containers.Container:
+        """
+        Find the self container for endoscopy.
+        Only works in mirrored deployment mode.
+        Returns:
+            The self container object.
+        Raises:
+            UnsupportedError:
+                If endoscopy is not supported in the current environment.
+        """
+        try:
+            self_container = self._find_self_container()
+        except docker.errors.APIError as e:
+            msg = "Endoscopy is not supported in the current environment: Mirrored deployment enabled, but failed to get self Container"
+            raise UnsupportedError(msg) from e
+        except Exception as e:
+            msg = "Endoscopy is not supported in the current environment: Failed to get self Container"
+            raise UnsupportedError(msg) from e
+        if not self_container:
+            msg = "Endoscopy is not supported in the current environment: Mirrored deployment disabled"
+            raise UnsupportedError(msg)
+        return self_container
+    def _endoscopic_logs(
+        self,
+        timestamps: bool = False,
+        tail: int | None = None,
+        since: int | None = None,
+        follow: bool = False,
+    ) -> Generator[bytes | str, None, None] | bytes | str:
+        """
+        Get the logs of the deployer itself.
+        Only works in mirrored deployment mode.
+        Args:
+            timestamps:
+                Show timestamps in the logs.
+            tail:
+                Number of lines to show from the end of the logs.
+            since:
+                Show logs since the given epoch in seconds.
+            follow:
+                Whether to follow the logs.
+        Returns:
+            The logs as a byte string or a generator yielding byte strings if follow is True.
+        Raises:
+            UnsupportedError:
+                If endoscopy is not supported in the current environment.
+            OperationError:
+                If the deployer fails to get logs.
+        """
+        self_container = self._find_self_container_for_endoscopy()
+        logs_options = {
+            "timestamps": timestamps,
+            "tail": tail if tail >= 0 else None,
+            "since": since,
+            "follow": follow,
+        }
+        try:
+            output = self_container.logs(
+                stream=follow,
+                **logs_options,
+            )
+        except docker.errors.APIError as e:
+            msg = f"Failed to fetch logs for self Container {self_container.short_id}{_detail_api_call_error(e)}"
+            raise OperationError(msg) from e
+        else:
+            return output
+    def _endoscopic_exec(
+        self,
+        detach: bool = True,
+        command: list[str] | None = None,
+        args: list[str] | None = None,
+    ) -> WorkloadExecStream | bytes | str:
+        """
+        Execute a command in the deployer itself.
+        Only works in mirrored deployment mode.
+        Args:
+            detach:
+                Whether to detach from the command.
+            command:
+                The command to execute.
+                If not specified, use /bin/sh and implicitly attach.
+            args:
+                The arguments to pass to the command.
+        Returns:
+            If detach is False, return a WorkloadExecStream.
+            otherwise, return the output of the command as a byte string or string.
+        Raises:
+            UnsupportedError:
+                If endoscopy is not supported in the current environment.
+            OperationError:
+                If the deployer fails to execute the command.
+        """
+        self_container = self._find_self_container_for_endoscopy()
+        attach = not detach or not command
+        exec_options = {
+            "stdout": True,
+            "stderr": True,
+            "stdin": attach,
+            "socket": attach,
+            "tty": attach,
+            "cmd": [*command, *(args or [])] if command else ["/bin/sh"],
+        }
+        try:
+            _, output = self_container.exec_run(
+                detach=False,
+                **exec_options,
+            )
+        except docker.errors.APIError as e:
+            msg = f"Failed to exec command in self Container {self_container.short_id}{_detail_api_call_error(e)}"
+            raise OperationError(msg) from e
+        else:
+            if not attach:
+                return output
+            return DockerWorkloadExecStream(output)
+    def _endoscopic_inspect(self) -> str:
+        """
+        Inspect the deployer itself.
+        Only works in mirrored deployment mode.
+        Returns:
+            The inspection result.
+        Raises:
+            UnsupportedError:
+                If endoscopy is not supported in the current environment.
+            OperationError:
+                If the deployer fails to execute the command.
+        """
+        self_container = self._find_self_container_for_endoscopy()
+        c_attrs = self_container.attrs
+        # Mask sensitive environment variables
+        if "Env" in c_attrs["Config"]:
+            for i, env in enumerate(c_attrs["Config"]["Env"] or []):
+                env_name, _ = env.split("=", maxsplit=1)
+                if sensitive_env_var(env_name):
+                    c_attrs["Config"]["Env"][i] = f"{env_name}=******"
+        return safe_json(c_attrs, indent=2)
 def _has_restart_policy(
     container: docker.models.containers.Container,

gpustack-runtime 0.1.39.post1__py3-none-any.whl → 0.1.39.post3__py3-none-any.whl

gpustack-runtime 0.1.39.post1py3-none-any.whl → 0.1.39.post3py3-none-any.whl