gpustack-runtime 0.1.39__py3-none-any.whl → 0.1.39.post2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,7 +5,6 @@ import json
5
5
  import logging
6
6
  import operator
7
7
  import os
8
- import socket
9
8
  from dataclasses import dataclass, field
10
9
  from enum import Enum
11
10
  from functools import lru_cache, reduce
@@ -24,7 +23,7 @@ from .__types__ import (
24
23
  Container,
25
24
  ContainerPort,
26
25
  ContainerProfileEnum,
27
- Deployer,
26
+ EndoscopicDeployer,
28
27
  UnsupportedError,
29
28
  WorkloadExecStream,
30
29
  WorkloadName,
@@ -40,6 +39,7 @@ from .__utils__ import (
40
39
  fnv1a_32_hex,
41
40
  fnv1a_64_hex,
42
41
  safe_yaml,
42
+ sensitive_env_var,
43
43
  validate_rfc1123_domain_name,
44
44
  )
45
45
 
@@ -277,7 +277,7 @@ Name of the Kubernetes deployer.
277
277
  """
278
278
 
279
279
 
280
- class KubernetesDeployer(Deployer):
280
+ class KubernetesDeployer(EndoscopicDeployer):
281
281
  """
282
282
  Deployer implementation for Kubernetes.
283
283
  """
@@ -1195,9 +1195,9 @@ class KubernetesDeployer(Deployer):
1195
1195
  self._client = self._get_client()
1196
1196
  self._node_name = envs.GPUSTACK_RUNTIME_KUBERNETES_NODE_NAME
1197
1197
 
1198
- def _prepare_create(self):
1198
+ def _prepare_mirrored_deployment(self):
1199
1199
  """
1200
- Prepare for creation.
1200
+ Prepare for mirrored deployment.
1201
1201
 
1202
1202
  """
1203
1203
  # Get the first node name of the cluster if not configured.
@@ -1207,61 +1207,39 @@ class KubernetesDeployer(Deployer):
1207
1207
 
1208
1208
  # Create image pull secrets if default registry credentials are set.
1209
1209
  if not self._image_pull_secret and (
1210
- envs.GPUSTACK_RUNTIME_DEPLOY_DEFAULT_IMAGE_REGISTRY_USERNAME
1211
- and envs.GPUSTACK_RUNTIME_DEPLOY_DEFAULT_IMAGE_REGISTRY_PASSWORD
1210
+ envs.GPUSTACK_RUNTIME_DEPLOY_DEFAULT_CONTAINER_REGISTRY_USERNAME
1211
+ and envs.GPUSTACK_RUNTIME_DEPLOY_DEFAULT_CONTAINER_REGISTRY_PASSWORD
1212
1212
  ):
1213
1213
  registry = (
1214
- envs.GPUSTACK_RUNTIME_DEPLOY_DEFAULT_IMAGE_REGISTRY or "index.docker.io"
1214
+ envs.GPUSTACK_RUNTIME_DEPLOY_DEFAULT_CONTAINER_REGISTRY
1215
+ or "index.docker.io"
1215
1216
  )
1216
1217
  self._image_pull_secret = self._apply_image_pull_secret(
1217
1218
  registry=f"https://{registry}/v1/",
1218
- username=envs.GPUSTACK_RUNTIME_DEPLOY_DEFAULT_IMAGE_REGISTRY_USERNAME,
1219
- password=envs.GPUSTACK_RUNTIME_DEPLOY_DEFAULT_IMAGE_REGISTRY_PASSWORD,
1219
+ username=envs.GPUSTACK_RUNTIME_DEPLOY_DEFAULT_CONTAINER_REGISTRY_USERNAME,
1220
+ password=envs.GPUSTACK_RUNTIME_DEPLOY_DEFAULT_CONTAINER_REGISTRY_PASSWORD,
1220
1221
  )
1221
1222
 
1222
1223
  # Prepare mirrored deployment if enabled.
1223
1224
  if self._mutate_create_pod:
1224
1225
  return
1225
1226
  self._mutate_create_pod = lambda o: o
1226
- if not envs.GPUSTACK_RUNTIME_DEPLOY_MIRRORED_DEPLOYMENT:
1227
- logger.debug("Mirrored deployment disabled")
1228
- return
1229
1227
 
1230
1228
  # Retrieve self-pod info.
1231
- core_api = kubernetes.client.CoreV1Api(self._client)
1232
- ## - Get Pod name, default to hostname if not set.
1233
- self_pod_name = envs.GPUSTACK_RUNTIME_DEPLOY_MIRRORED_NAME
1234
- if not self_pod_name:
1235
- self_pod_name = socket.gethostname()
1236
- logger.warning(
1237
- "Mirrored deployment enabled, but no Pod name set, using hostname(%s) instead",
1238
- self_pod_name,
1239
- )
1240
- ## - Get Pod namespace, default to "default" if not found.
1241
- try:
1242
- self_pod_namespace_f = Path(
1243
- "/var/run/secrets/kubernetes.io/serviceaccount/namespace",
1244
- )
1245
- self_pod_namespace = self_pod_namespace_f.read_text(
1246
- encoding="utf-8",
1247
- ).strip()
1248
- except (FileNotFoundError, OSError):
1249
- self_pod_namespace = "default"
1250
- logger.warning(
1251
- "Mirrored deployment enabled, but no Pod namespace found, using 'default' instead",
1252
- )
1253
1229
  try:
1254
- self_pod = core_api.read_namespaced_pod(
1255
- name=self_pod_name,
1256
- namespace=self_pod_namespace,
1257
- )
1230
+ self_pod = self._find_self_pod()
1231
+ if not self_pod:
1232
+ return
1258
1233
  except kubernetes.client.exceptions.ApiException:
1259
1234
  logger.exception(
1260
- "Mirrored deployment enabled, but failed to get self Pod %s/%s, skipping",
1261
- self_pod_namespace,
1262
- self_pod_name,
1235
+ "Mirrored deployment enabled, but failed to get self Pod, skipping",
1263
1236
  )
1264
1237
  return
1238
+
1239
+ self_pod_name = self_pod.metadata.name
1240
+ self_pod_namespace = self_pod.metadata.namespace
1241
+
1242
+ # Retrieve self-container
1265
1243
  ## - Get the first Container, or the Container named "default" if exists.
1266
1244
  self_container = next(
1267
1245
  (c for c in self_pod.spec.containers if c.name == "default"),
@@ -1273,11 +1251,14 @@ class KubernetesDeployer(Deployer):
1273
1251
  "Mirrored deployment enabled, but no Container named 'default' found, using the first Container instead",
1274
1252
  )
1275
1253
  logger.info(
1276
- "Mirrored deployment enabled, using self Container %s of self Pod %s for options mirroring",
1254
+ "Mirrored deployment enabled, using self Container %s of self Pod %s/%s for options mirroring",
1277
1255
  self_container.name,
1278
- f"{self_pod_namespace}/{self_pod_name}",
1256
+ self_pod_namespace,
1257
+ self_pod_name,
1279
1258
  )
1280
1259
 
1260
+ core_api = kubernetes.client.CoreV1Api(self._client)
1261
+
1281
1262
  # Preprocess mirrored deployment options.
1282
1263
  in_same_namespace = (
1283
1264
  self_pod_namespace == envs.GPUSTACK_RUNTIME_KUBERNETES_NAMESPACE
@@ -1460,6 +1441,51 @@ class KubernetesDeployer(Deployer):
1460
1441
 
1461
1442
  self._mutate_create_pod = mutate_create_pod
1462
1443
 
1444
+ def _find_self_pod(self) -> kubernetes.client.V1Pod | None:
1445
+ """
1446
+ Find the self Pod in the cluster.
1447
+
1448
+ Returns:
1449
+ The self Pod object if found, None otherwise.
1450
+
1451
+ Raises:
1452
+ If failed to find itself.
1453
+
1454
+ """
1455
+ if not envs.GPUSTACK_RUNTIME_DEPLOY_MIRRORED_DEPLOYMENT:
1456
+ logger.debug("Mirrored deployment disabled")
1457
+ return None
1458
+
1459
+ # Get Pod name or hostname.
1460
+ self_pod_name = envs.GPUSTACK_RUNTIME_DEPLOY_MIRRORED_NAME
1461
+ if not self_pod_name:
1462
+ msg = "Please use env `GPUSTACK_RUNTIME_DEPLOY_MIRRORED_NAME` to specify the exact Pod name"
1463
+ raise kubernetes.client.exceptions.ApiException(
1464
+ status=404,
1465
+ reason=msg,
1466
+ )
1467
+
1468
+ # Get Pod namespace, default to "default" if not found.
1469
+ try:
1470
+ self_pod_namespace_f = Path(
1471
+ "/var/run/secrets/kubernetes.io/serviceaccount/namespace",
1472
+ )
1473
+ self_pod_namespace = self_pod_namespace_f.read_text(
1474
+ encoding="utf-8",
1475
+ ).strip()
1476
+ except (FileNotFoundError, OSError):
1477
+ self_pod_namespace = "default"
1478
+ logger.warning(
1479
+ "Mirrored deployment enabled, but no Pod namespace found, using 'default' instead",
1480
+ )
1481
+
1482
+ core_api = kubernetes.client.CoreV1Api(self._client)
1483
+
1484
+ return core_api.read_namespaced_pod(
1485
+ name=self_pod_name,
1486
+ namespace=self_pod_namespace,
1487
+ )
1488
+
1463
1489
  @_supported
1464
1490
  def _create(self, workload: WorkloadPlan):
1465
1491
  """
@@ -1484,7 +1510,7 @@ class KubernetesDeployer(Deployer):
1484
1510
  msg = f"Invalid workload plan type: {type(workload)}"
1485
1511
  raise TypeError(msg)
1486
1512
 
1487
- self._prepare_create()
1513
+ self._prepare_mirrored_deployment()
1488
1514
 
1489
1515
  if isinstance(workload, WorkloadPlan):
1490
1516
  workload = KubernetesWorkloadPlan(**workload.__dict__)
@@ -1930,6 +1956,223 @@ class KubernetesDeployer(Deployer):
1930
1956
  return result
1931
1957
  return KubernetesWorkloadExecStream(result)
1932
1958
 
1959
+ @_supported
1960
+ def _inspect(
1961
+ self,
1962
+ name: WorkloadName,
1963
+ namespace: WorkloadNamespace | None = None,
1964
+ ) -> str | None:
1965
+ """
1966
+ Inspect a Kubernetes workload.
1967
+
1968
+ Args:
1969
+ name:
1970
+ The name of the workload.
1971
+ namespace:
1972
+ The namespace of the workload.
1973
+
1974
+ Returns:
1975
+ The inspection result as a YAML string if found, None otherwise.
1976
+
1977
+ Raises:
1978
+ UnsupportedError:
1979
+ If Kubernetes is not supported in the current environment.
1980
+ OperationError:
1981
+ If the Kubernetes workload fails to inspect.
1982
+
1983
+ """
1984
+ workload = self._get(name=name, namespace=namespace)
1985
+ if not workload:
1986
+ return None
1987
+
1988
+ k_pod = getattr(workload, "_k_pod", None)
1989
+ if not k_pod:
1990
+ return None
1991
+
1992
+ # Remove managed fields to reduce output size
1993
+ k_pod.metadata.managed_fields = None
1994
+ # Mask sensitive environment variables
1995
+ for c in k_pod.spec.containers:
1996
+ for env in c.env or []:
1997
+ if sensitive_env_var(env.name):
1998
+ env.value = "******"
1999
+
2000
+ return safe_yaml(k_pod, indent=2, sort_keys=False)
2001
+
2002
+ def _find_self_pod_for_endoscopy(self) -> kubernetes.client.V1Pod:
2003
+ """
2004
+ Find the self Pod for endoscopy.
2005
+ Only works in mirrored deployment mode.
2006
+
2007
+ Returns:
2008
+ The self Pod object.
2009
+
2010
+ Raises:
2011
+ UnsupportedError:
2012
+ If endoscopy is not supported in the current environment.
2013
+
2014
+ """
2015
+ try:
2016
+ self_pod = self._find_self_pod()
2017
+ except kubernetes.client.exceptions.ApiException as e:
2018
+ msg = "Endoscopy is not supported in the current environment: Mirrored deployment enabled, but failed to get self Pod"
2019
+ raise UnsupportedError(msg) from e
2020
+ except Exception as e:
2021
+ msg = "Endoscopy is not supported in the current environment: Failed to get self Pod"
2022
+ raise UnsupportedError(msg) from e
2023
+
2024
+ if not self_pod:
2025
+ msg = "Endoscopy is not supported in the current environment: Mirrored deployment disabled"
2026
+ raise UnsupportedError(msg)
2027
+ return self_pod
2028
+
2029
+ def _endoscopic_logs(
2030
+ self,
2031
+ timestamps: bool = False,
2032
+ tail: int | None = None,
2033
+ since: int | None = None,
2034
+ follow: bool = False,
2035
+ ) -> Generator[bytes | str, None, None] | bytes | str:
2036
+ """
2037
+ Get the logs of the deployer itself.
2038
+ Only works in mirrored deployment mode.
2039
+
2040
+ Args:
2041
+ timestamps:
2042
+ Show timestamps in the logs.
2043
+ tail:
2044
+ Number of lines to show from the end of the logs.
2045
+ since:
2046
+ Show logs since the given epoch in seconds.
2047
+ follow:
2048
+ Whether to follow the logs.
2049
+
2050
+ Returns:
2051
+ The logs as a byte string or a generator yielding byte strings if follow is True.
2052
+
2053
+ Raises:
2054
+ UnsupportedError:
2055
+ If endoscopy is not supported in the current environment.
2056
+ OperationError:
2057
+ If the deployer fails to get logs.
2058
+
2059
+ """
2060
+ self_pod = self._find_self_pod_for_endoscopy()
2061
+
2062
+ logs_options = {
2063
+ "timestamps": timestamps,
2064
+ "tail_lines": tail if tail >= 0 else None,
2065
+ "since_seconds": since,
2066
+ "follow": follow,
2067
+ "_preload_content": not follow,
2068
+ }
2069
+
2070
+ self_pod_name = self_pod.metadata.name
2071
+ self_pod_namespace = self_pod.metadata.namespace
2072
+
2073
+ core_api = kubernetes.client.CoreV1Api(self._client)
2074
+
2075
+ try:
2076
+ output = core_api.read_namespaced_pod_log(
2077
+ namespace=self_pod_namespace,
2078
+ name=self_pod_name,
2079
+ **logs_options,
2080
+ )
2081
+ except kubernetes.client.exceptions.ApiException as e:
2082
+ msg = f"Failed to fetch logs for self Pod {self_pod_namespace}/{self_pod_name}{_detail_api_call_error(e)}"
2083
+ raise OperationError(msg) from e
2084
+ else:
2085
+ return output
2086
+
2087
+ def _endoscopic_exec(
2088
+ self,
2089
+ detach: bool = True,
2090
+ command: list[str] | None = None,
2091
+ args: list[str] | None = None,
2092
+ ) -> WorkloadExecStream | bytes | str:
2093
+ """
2094
+ Execute a command in the deployer itself.
2095
+ Only works in mirrored deployment mode.
2096
+
2097
+ Args:
2098
+ detach:
2099
+ Whether to detach from the command.
2100
+ command:
2101
+ The command to execute.
2102
+ If not specified, use /bin/sh and implicitly attach.
2103
+ args:
2104
+ The arguments to pass to the command.
2105
+
2106
+ Returns:
2107
+ If detach is False, return a WorkloadExecStream.
2108
+ otherwise, return the output of the command as a byte string or string.
2109
+
2110
+ Raises:
2111
+ UnsupportedError:
2112
+ If endoscopy is not supported in the current environment.
2113
+ OperationError:
2114
+ If the deployer fails to execute the command.
2115
+
2116
+ """
2117
+ self_pod = self._find_self_pod_for_endoscopy()
2118
+
2119
+ attach = not detach or not command
2120
+ exec_options = {
2121
+ "stdout": True,
2122
+ "stderr": True,
2123
+ "stdin": attach,
2124
+ "tty": attach,
2125
+ "command": [*command, *(args or [])] if command else ["/bin/sh"],
2126
+ "_preload_content": not attach,
2127
+ }
2128
+
2129
+ self_pod_name = self_pod.metadata.name
2130
+ self_pod_namespace = self_pod.metadata.namespace
2131
+
2132
+ core_api = kubernetes.client.CoreV1Api(self._client)
2133
+
2134
+ try:
2135
+ result = kubernetes.stream.stream(
2136
+ core_api.connect_get_namespaced_pod_exec,
2137
+ namespace=self_pod_namespace,
2138
+ name=self_pod_name,
2139
+ **exec_options,
2140
+ )
2141
+ except kubernetes.client.exceptions.ApiException as e:
2142
+ msg = f"Failed to exec command in self Pod {self_pod_namespace}/{self_pod_name}{_detail_api_call_error(e)}"
2143
+ raise OperationError(msg) from e
2144
+ else:
2145
+ if not attach:
2146
+ return result
2147
+ return KubernetesWorkloadExecStream(result)
2148
+
2149
+ def _endoscopic_inspect(self) -> str:
2150
+ """
2151
+ Inspect the deployer itself.
2152
+ Only works in mirrored deployment mode.
2153
+
2154
+ Returns:
2155
+ The inspection result.
2156
+
2157
+ Raises:
2158
+ UnsupportedError:
2159
+ If endoscopy is not supported in the current environment.
2160
+ OperationError:
2161
+ If the deployer fails to execute the command.
2162
+
2163
+ """
2164
+ self_pod = self._find_self_pod_for_endoscopy()
2165
+
2166
+ # Remove managed fields to reduce output size
2167
+ self_pod.metadata.managed_fields = None
2168
+ # Mask sensitive environment variables
2169
+ for c in self_pod.spec.containers:
2170
+ for env in c.env or []:
2171
+ if sensitive_env_var(env.name):
2172
+ env.value = "******"
2173
+
2174
+ return safe_yaml(self_pod, indent=2, sort_keys=False)
2175
+
1933
2176
 
1934
2177
  def equal_config_maps(
1935
2178
  a: kubernetes.client.V1ConfigMap,