kubetorch 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kubetorch might be problematic. Click here for more details.

Files changed (93) hide show
  1. kubetorch/__init__.py +60 -0
  2. kubetorch/cli.py +1985 -0
  3. kubetorch/cli_utils.py +1025 -0
  4. kubetorch/config.py +453 -0
  5. kubetorch/constants.py +18 -0
  6. kubetorch/docs/Makefile +18 -0
  7. kubetorch/docs/__init__.py +0 -0
  8. kubetorch/docs/_ext/json_globaltoc.py +42 -0
  9. kubetorch/docs/api/cli.rst +10 -0
  10. kubetorch/docs/api/python/app.rst +21 -0
  11. kubetorch/docs/api/python/cls.rst +19 -0
  12. kubetorch/docs/api/python/compute.rst +25 -0
  13. kubetorch/docs/api/python/config.rst +11 -0
  14. kubetorch/docs/api/python/fn.rst +19 -0
  15. kubetorch/docs/api/python/image.rst +14 -0
  16. kubetorch/docs/api/python/secret.rst +18 -0
  17. kubetorch/docs/api/python/volumes.rst +13 -0
  18. kubetorch/docs/api/python.rst +101 -0
  19. kubetorch/docs/conf.py +69 -0
  20. kubetorch/docs/index.rst +20 -0
  21. kubetorch/docs/requirements.txt +5 -0
  22. kubetorch/globals.py +285 -0
  23. kubetorch/logger.py +59 -0
  24. kubetorch/resources/__init__.py +0 -0
  25. kubetorch/resources/callables/__init__.py +0 -0
  26. kubetorch/resources/callables/cls/__init__.py +0 -0
  27. kubetorch/resources/callables/cls/cls.py +157 -0
  28. kubetorch/resources/callables/fn/__init__.py +0 -0
  29. kubetorch/resources/callables/fn/fn.py +133 -0
  30. kubetorch/resources/callables/module.py +1416 -0
  31. kubetorch/resources/callables/utils.py +174 -0
  32. kubetorch/resources/compute/__init__.py +0 -0
  33. kubetorch/resources/compute/app.py +261 -0
  34. kubetorch/resources/compute/compute.py +2596 -0
  35. kubetorch/resources/compute/decorators.py +139 -0
  36. kubetorch/resources/compute/rbac.py +74 -0
  37. kubetorch/resources/compute/utils.py +1114 -0
  38. kubetorch/resources/compute/websocket.py +137 -0
  39. kubetorch/resources/images/__init__.py +1 -0
  40. kubetorch/resources/images/image.py +414 -0
  41. kubetorch/resources/images/images.py +74 -0
  42. kubetorch/resources/secrets/__init__.py +2 -0
  43. kubetorch/resources/secrets/kubernetes_secrets_client.py +412 -0
  44. kubetorch/resources/secrets/provider_secrets/__init__.py +0 -0
  45. kubetorch/resources/secrets/provider_secrets/anthropic_secret.py +12 -0
  46. kubetorch/resources/secrets/provider_secrets/aws_secret.py +16 -0
  47. kubetorch/resources/secrets/provider_secrets/azure_secret.py +14 -0
  48. kubetorch/resources/secrets/provider_secrets/cohere_secret.py +12 -0
  49. kubetorch/resources/secrets/provider_secrets/gcp_secret.py +16 -0
  50. kubetorch/resources/secrets/provider_secrets/github_secret.py +13 -0
  51. kubetorch/resources/secrets/provider_secrets/huggingface_secret.py +20 -0
  52. kubetorch/resources/secrets/provider_secrets/kubeconfig_secret.py +12 -0
  53. kubetorch/resources/secrets/provider_secrets/lambda_secret.py +13 -0
  54. kubetorch/resources/secrets/provider_secrets/langchain_secret.py +12 -0
  55. kubetorch/resources/secrets/provider_secrets/openai_secret.py +11 -0
  56. kubetorch/resources/secrets/provider_secrets/pinecone_secret.py +12 -0
  57. kubetorch/resources/secrets/provider_secrets/providers.py +93 -0
  58. kubetorch/resources/secrets/provider_secrets/ssh_secret.py +12 -0
  59. kubetorch/resources/secrets/provider_secrets/wandb_secret.py +11 -0
  60. kubetorch/resources/secrets/secret.py +238 -0
  61. kubetorch/resources/secrets/secret_factory.py +70 -0
  62. kubetorch/resources/secrets/utils.py +209 -0
  63. kubetorch/resources/volumes/__init__.py +0 -0
  64. kubetorch/resources/volumes/volume.py +365 -0
  65. kubetorch/servers/__init__.py +0 -0
  66. kubetorch/servers/http/__init__.py +0 -0
  67. kubetorch/servers/http/distributed_utils.py +3223 -0
  68. kubetorch/servers/http/http_client.py +730 -0
  69. kubetorch/servers/http/http_server.py +1788 -0
  70. kubetorch/servers/http/server_metrics.py +278 -0
  71. kubetorch/servers/http/utils.py +728 -0
  72. kubetorch/serving/__init__.py +0 -0
  73. kubetorch/serving/autoscaling.py +173 -0
  74. kubetorch/serving/base_service_manager.py +363 -0
  75. kubetorch/serving/constants.py +83 -0
  76. kubetorch/serving/deployment_service_manager.py +478 -0
  77. kubetorch/serving/knative_service_manager.py +519 -0
  78. kubetorch/serving/raycluster_service_manager.py +582 -0
  79. kubetorch/serving/service_manager.py +18 -0
  80. kubetorch/serving/templates/deployment_template.yaml +17 -0
  81. kubetorch/serving/templates/knative_service_template.yaml +19 -0
  82. kubetorch/serving/templates/kt_setup_template.sh.j2 +81 -0
  83. kubetorch/serving/templates/pod_template.yaml +194 -0
  84. kubetorch/serving/templates/raycluster_service_template.yaml +42 -0
  85. kubetorch/serving/templates/raycluster_template.yaml +35 -0
  86. kubetorch/serving/templates/service_template.yaml +21 -0
  87. kubetorch/serving/templates/workerset_template.yaml +36 -0
  88. kubetorch/serving/utils.py +377 -0
  89. kubetorch/utils.py +284 -0
  90. kubetorch-0.2.0.dist-info/METADATA +121 -0
  91. kubetorch-0.2.0.dist-info/RECORD +93 -0
  92. kubetorch-0.2.0.dist-info/WHEEL +4 -0
  93. kubetorch-0.2.0.dist-info/entry_points.txt +5 -0
@@ -0,0 +1,12 @@
1
+ from .. import Secret
2
+
3
+
4
+ class SSHSecret(Secret):
5
+ """
6
+ .. note::
7
+ To create a SSHSecret, please use the factory method :func:`secret` with ``provider="ssh"``.
8
+ """
9
+
10
+ _DEFAULT_PATH = "~/.ssh"
11
+ _DEFAULT_FILENAMES = ["id_rsa"]
12
+ _PROVIDER = "ssh"
@@ -0,0 +1,11 @@
1
+ from .. import Secret
2
+
3
+
4
+ class WandBSecret(Secret):
5
+ """
6
+ .. note::
7
+ To create an WandBSecret, please use the factory method :func:`secret` with ``provider="wandb"``.
8
+ """
9
+
10
+ _PROVIDER = "wandb"
11
+ _DEFAULT_ENV_VARS = {"api_key": "WANDB_API_KEY"}
@@ -0,0 +1,238 @@
1
+ import os
2
+ from typing import Dict, List, Optional, Tuple
3
+
4
+ from kubetorch.globals import config
5
+
6
+ from kubetorch.resources.secrets.utils import read_files_as_secrets_dict
7
+
8
+
9
+ class Secret:
10
+ _DEFAULT_PATH = None
11
+ _DEFAULT_FILENAMES = None
12
+ _DEFAULT_ENV_VARS = {}
13
+ _MAP_FILENAMES_TO_ENV_VARS = {}
14
+ _PROVIDER = None
15
+
16
+ def __init__(
17
+ self,
18
+ name: Optional[str] = None,
19
+ provider: Optional[str] = None,
20
+ values: Dict = None,
21
+ path: str = None,
22
+ env_vars: Dict = None,
23
+ override: bool = False,
24
+ **kwargs,
25
+ ):
26
+ """
27
+ Secret class. Built-in provider classes contain default path and/or environment variable mappings,
28
+ based on it's expected usage.
29
+
30
+ Note:
31
+ Currently supported built-in providers:
32
+ anthropic, aws, azure, gcp, github, huggingface, lambda, langchain, openai, pinecone, ssh, wandb.
33
+
34
+ Args:
35
+ name (str, optional): Name to assign the Kubetorch secret.
36
+ provider (str, optional): Provider corresponding to the secret (e.g. "aws", "gcp").
37
+ values (Dict, optional): Dictionary mapping secret keys to the corresponding secret values.
38
+ path (str, optional): Path where the secret values are held.
39
+ env_vars (Dict, optional): Dictionary mapping secret keys to the corresponding environment variable key.
40
+ override (bool, optional): If True, override the secret's values in Kubernetes if a secret with the same
41
+ name already exists.
42
+ """
43
+ name_prefix = (
44
+ f"{config.username}-" if config.username else ""
45
+ ) # we need the username as prefix in case diffrent users will create the same provider secret
46
+ self._name = (
47
+ name or f"{name_prefix}{provider}" or f"{name_prefix}{self._PROVIDER}"
48
+ )
49
+ self._name = self._name.replace(
50
+ "_", "-"
51
+ ) # cleanup so the name will match k8 standards.
52
+ self._namespace = kwargs.get("namespace", None) or config.namespace
53
+ self._values = values
54
+
55
+ self.provider = provider or self._PROVIDER
56
+ self.path = path
57
+ if path:
58
+ filenames = kwargs.get(
59
+ "filenames", None
60
+ ) # we might get filenames as kwarg if we load the secret from name or form config
61
+ updated_path, filenames = self._split_path_if_needed(
62
+ path=path, filenames=filenames
63
+ )
64
+ self.path = updated_path
65
+ self.filenames = filenames
66
+ self.env_vars = env_vars
67
+ self._override = override
68
+
69
+ if not any([values, path, env_vars]):
70
+ if self._values_from_path():
71
+ pass
72
+ elif self._values_from_env(self._DEFAULT_ENV_VARS):
73
+ self.env_vars = self._DEFAULT_ENV_VARS
74
+ else:
75
+ raise ValueError(
76
+ "Secrets values not provided and could not be extracted from default file "
77
+ f"({self._DEFAULT_PATH}) or env vars ({self._DEFAULT_ENV_VARS.values()}) locations."
78
+ )
79
+
80
+ @property
81
+ def name(self):
82
+ """Name of the secret."""
83
+ return self._name
84
+
85
+ @property
86
+ def override(self):
87
+ """Should we override secret's values in Kubernetes if a secret with the same name already exists"""
88
+ return self._override
89
+
90
+ @property
91
+ def values(self):
92
+ """Secret values."""
93
+ if self._values:
94
+ return self._values
95
+ if self.path:
96
+ return self._values_from_path(self.path)
97
+ if self.env_vars:
98
+ return self._values_from_env(self.env_vars)
99
+ return {}
100
+
101
+ def _values_from_env(self, env_vars: Dict = None):
102
+ env_vars = env_vars or self.env_vars
103
+ if not env_vars:
104
+ return {}
105
+ return {key: os.environ[key] for key in env_vars}
106
+
107
+ def _values_from_path(self, path: str = None):
108
+ path = path or self.path or self._DEFAULT_PATH
109
+ if not path:
110
+ return {}
111
+
112
+ # Double-check that the path is a directory
113
+ path, filenames = self._split_path_if_needed(path)
114
+
115
+ values = read_files_as_secrets_dict(path=path, filenames=filenames)
116
+ if values:
117
+ # Only set if the values were successfully found
118
+ if self._MAP_FILENAMES_TO_ENV_VARS:
119
+ env_vars = []
120
+ for filename, env_var in self._MAP_FILENAMES_TO_ENV_VARS.items():
121
+ if filename in values:
122
+ values[env_var] = values[filename].strip()
123
+ del values[filename]
124
+ env_vars.append(env_var)
125
+ if env_vars:
126
+ self.env_vars = env_vars
127
+ self._values = values
128
+ return values
129
+
130
+ self.path = path
131
+ self.filenames = filenames
132
+ return values
133
+
134
+ def _split_path_if_needed(
135
+ self, path: str, filenames: list = None
136
+ ) -> Tuple[str, List[str]]:
137
+ """Split path into path and filesnames if a single file is specified as a full path"""
138
+ updated_path = path
139
+ is_default_path = updated_path == self._DEFAULT_PATH
140
+ updated_filenames = getattr(self, "filenames", None) or filenames
141
+ if not updated_filenames:
142
+ if not is_default_path or not self._DEFAULT_FILENAMES:
143
+ # Reform single-file path a directory and filenames list
144
+ updated_filenames = [os.path.basename(path)]
145
+ updated_path = os.path.dirname(path)
146
+ else:
147
+ updated_filenames = self._DEFAULT_FILENAMES
148
+ return updated_path, updated_filenames
149
+
150
+ @classmethod
151
+ def from_config(cls, config: dict):
152
+ override_value = config.get("override", "False").lower()
153
+ bool_override_value = override_value == "true"
154
+ config["override"] = bool_override_value
155
+ if "provider" in config:
156
+ from .provider_secrets.providers import _get_provider_class
157
+
158
+ provider_class = _get_provider_class(config["provider"])
159
+ return provider_class.from_config(config)
160
+ return cls(**config)
161
+
162
+ @classmethod
163
+ def from_name(cls, name, namespace: str = config.namespace):
164
+
165
+ from kubetorch.resources.secrets.kubernetes_secrets_client import (
166
+ KubernetesSecretsClient,
167
+ )
168
+
169
+ secrets_client = KubernetesSecretsClient(namespace=namespace)
170
+ secret = secrets_client.load_secret(name=name)
171
+ return secret
172
+
173
+ @classmethod
174
+ def builtin_providers(cls, as_str: bool = False) -> List:
175
+ """Return list of all Kubetorch providers (as class objects) supported out of the box.
176
+
177
+ Args:
178
+ as_str (bool, optional): Whether to return the providers as a string or as a class.
179
+ (Default: ``False``)
180
+ """
181
+ from .provider_secrets.providers import _str_to_provider_class
182
+
183
+ if as_str:
184
+ return list(_str_to_provider_class.keys())
185
+ return list(_str_to_provider_class.values())
186
+
187
+ @classmethod
188
+ def from_provider(
189
+ cls, provider: str, name: str = None, path: str = None, override: bool = False
190
+ ):
191
+ """Return kubetorch provider secret object
192
+
193
+ Args:
194
+ provider (str): Provider's name
195
+ name (str, Optional): Secret name
196
+ path (str, optional): Path where the secret values are held.
197
+ override (Bool, optional): If True, override the secret's values in Kubernetes if a secret with the same name already exists.
198
+ """
199
+ from .provider_secrets.providers import _get_provider_class
200
+
201
+ secret_class = _get_provider_class(provider)
202
+ if not secret_class:
203
+ raise ValueError(
204
+ f"{provider} is not a supported provider: {Secret.builtin_providers(as_str=True)}"
205
+ )
206
+ return secret_class(name=name, provider=provider, path=path, override=override)
207
+
208
+ @classmethod
209
+ def from_path(cls, path: str, name: str = None, override: bool = False):
210
+ """Return kubetorch provider secret object
211
+
212
+ Args:
213
+ path (str): Local path to the secret values file
214
+ name (str, Optional): Secret name
215
+ override (Bool, optional): If True, override the secret's values in Kubernetes if a secret with the same name already exists.
216
+ """
217
+ from .provider_secrets.providers import _get_provider_class
218
+
219
+ secret_class = _get_provider_class(path) or Secret
220
+ if not secret_class._PROVIDER and not name:
221
+ raise ValueError("secret name must be provided.")
222
+
223
+ return secret_class(name=name, path=path, override=override)
224
+
225
+ @classmethod
226
+ def from_env(cls, env_vars: dict, name: str = None, override: bool = False):
227
+ """Return kubetorch provider secret object
228
+
229
+ Args:
230
+ env_vars (dict): Dictionary mapping secret keys to the corresponding
231
+ environment variable key.
232
+ name (str, Optional): Secret name
233
+ override (Bool, optional): If True, override the secret's values in Kubernetes if a secret with the same name already exists.
234
+ """
235
+ from .provider_secrets.providers import _get_provider_class
236
+
237
+ secret_class = _get_provider_class(env_vars) or Secret
238
+ return secret_class(name=name, env_vars=env_vars, override=override)
@@ -0,0 +1,70 @@
1
+ from typing import Dict, Optional
2
+
3
+ from kubetorch.globals import config
4
+
5
+ from .secret import Secret
6
+
7
+
8
+ def secret(
9
+ name: Optional[str] = None,
10
+ provider: Optional[str] = None,
11
+ path: Optional[str] = None,
12
+ env_vars: Optional[Dict] = None,
13
+ namespace: Optional[str] = config.namespace,
14
+ override: Optional[bool] = False,
15
+ ) -> Secret:
16
+ """
17
+ Builds an instance of :class:`Secret`. At most one of `provider`, `path`, or `env_vars` can be provided, to maintain
18
+ one source of truth. For a provider, the values are inferred from the default path or environment variables for that
19
+ provider. To load a secret by name, provide its name and namespace.
20
+
21
+ Args:
22
+ namespace (str, optional): Namespace to load the secret from, if we create a secret from name. Default: "default".
23
+ name (str, optional): Name to assign the resource. If none is provided, resource name defaults to the
24
+ provider name.
25
+ provider (str, optional): Provider corresponding to the secret (e.g. "aws", "gcp"). To see all supported provider
26
+ types, run ``kt.Secret.builtin_providers(as_str=True)``.
27
+ path (str, optional): Path where the secret values are held.
28
+ env_vars (Dict, optional): Dictionary mapping secret keys to the corresponding
29
+ environment variable key.
30
+ override (Bool, optional): If True, override the secret's values in Kubernetes if a secret with the same name already exists.
31
+
32
+ Returns:
33
+ Secret: The resulting secret object.
34
+
35
+ Examples:
36
+
37
+ .. code-block:: python
38
+
39
+ import kubetorch as kt
40
+
41
+ local_secret = kt.secret(name="in_memory_secret", values={"secret_key": "secret_val"})
42
+ aws_secret = kt.secret(provider="aws")
43
+ gcp_secret = kt.secret(name="my-gcp-secret", path="~/.gcp/credentials")
44
+ lambda_secret = kt.secret(name= "my-lambda-secret", env_vars={"api_key": "LAMBDA_API_KEY"})
45
+ """
46
+
47
+ # env_vars or path or provider are provided
48
+ valid_input = sum([bool(x) for x in [provider, path, env_vars]]) == 1 or (
49
+ provider and path
50
+ )
51
+ valid_from_name_input = (
52
+ sum([bool(x) for x in [provider, path, env_vars]]) == 0 and name
53
+ )
54
+
55
+ if not (valid_from_name_input or valid_input):
56
+ raise ValueError(
57
+ "You must provide exactly one of: `provider`, `path`, or `env_vars`. Alternatively, you may provide `name` to load a secret from name."
58
+ )
59
+
60
+ if valid_input:
61
+ if provider:
62
+ return Secret.from_provider(
63
+ provider=provider, name=name, path=path, override=override
64
+ )
65
+ elif path and not provider: # the case where provider + path are provided are
66
+ return Secret.from_path(path=path, name=name, override=override)
67
+ elif env_vars:
68
+ return Secret.from_env(env_vars=env_vars, name=name, override=override)
69
+ else:
70
+ return Secret.from_name(name=name, namespace=namespace)
@@ -0,0 +1,209 @@
1
+ import os
2
+ import re
3
+ import time
4
+ from typing import List, Optional
5
+
6
+ from kubernetes import client, config
7
+ from kubernetes.client import V1Pod, V1TokenReview, V1TokenReviewSpec
8
+ from kubernetes.stream import stream
9
+
10
+ from kubetorch.globals import config as kt_config
11
+
12
+ from kubetorch.logger import get_logger
13
+ from kubetorch.servers.http.utils import is_running_in_kubernetes
14
+
15
+ logger = get_logger(__name__)
16
+
17
+
18
+ def get_k8s_identity_name() -> Optional[str]:
19
+ try:
20
+ if is_running_in_kubernetes():
21
+ config.load_incluster_config()
22
+ else:
23
+ config.load_kube_config()
24
+ configuration = client.Configuration.get_default_copy()
25
+
26
+ token = configuration.api_key.get("authorization", "")
27
+ token = re.sub(r"^Bearer\s", "", token)
28
+
29
+ api = client.AuthenticationV1Api()
30
+ token_review = V1TokenReview(spec=V1TokenReviewSpec(token=token))
31
+
32
+ result = api.create_token_review(token_review)
33
+
34
+ if result.status.authenticated:
35
+ user = result.status.user
36
+ # For EKS IAM users/roles
37
+ if hasattr(user, "username") and user.username.endswith("amazonaws.com"):
38
+ # ARN format: arn:aws:iam::ACCOUNT_ID:user/USERNAME or
39
+ # arn:aws:sts::ACCOUNT_ID:assumed-role/ROLE_NAME/SESSION_NAME
40
+ arn_parts = user.username.split("/")
41
+ if "assumed-role" in user.username:
42
+ return "role-" + arn_parts[-2].lower() # Returns ROLE_NAME
43
+ return "user-" + arn_parts[-1].lower() # Returns USERNAME for IAM users
44
+ # For Kubernetes service accounts (works for both GKE and EKS)
45
+ elif (
46
+ hasattr(user, "username") and "system:serviceaccount:" in user.username
47
+ ):
48
+ return (
49
+ "sa-" + user.username.split(":")[-1].lower()
50
+ ) # Returns service account name
51
+
52
+ except Exception as e:
53
+ logger.info(f"Failed to get identity name: {e}")
54
+
55
+ return None
56
+
57
+
58
+ def read_files_as_secrets_dict(path: str, filenames: List[str]):
59
+ values = {}
60
+ cred_path = os.path.expanduser(path)
61
+
62
+ for filename in filenames:
63
+ file_path = os.path.join(cred_path, filename)
64
+ # Read the files
65
+ content = _read_file_if_exists(file_path)
66
+ if content:
67
+ values[filename] = content
68
+ # # Base64 encode the content
69
+ # encoded = base64.b64encode(content).decode("utf-8")
70
+ # values[filename] = encoded
71
+
72
+ return values
73
+
74
+
75
+ def _read_file_if_exists(file_path: str) -> Optional[str]:
76
+ try:
77
+ with open(file_path, "r") as f: # "rb" if you encode above.
78
+ return f.read()
79
+ except FileNotFoundError:
80
+ logger.error(f"Warning: {file_path} not found, using empty content")
81
+ return None
82
+
83
+
84
+ # ------------------------------------------------------------------------------------------------
85
+ # Secret testing utils
86
+ # ------------------------------------------------------------------------------------------------
87
+
88
+
89
+ def check_path_on_kubernetes_pods(
90
+ path: str, service_name: str, namespace: str = None
91
+ ) -> bool:
92
+ """
93
+ Check if a path exists on a specific Knative service's pods
94
+ """
95
+ namespace = namespace or kt_config.namespace
96
+ # Load Kubernetes configuration
97
+ config.load_kube_config()
98
+ # Initialize API clients
99
+ core_v1_api = client.CoreV1Api()
100
+
101
+ pods = _fetch_pods_for_kubernetes_service(service_name, namespace, core_v1_api)
102
+ if not pods:
103
+ logger.error(
104
+ f"No pods found for service {service_name} in namespace {namespace}"
105
+ )
106
+ return False
107
+
108
+ path_found = True
109
+ for pod in pods:
110
+ pod_name = pod.metadata.name
111
+ command = ["/bin/bash", "-c", f"[ -f {path} ] && echo yes || echo no"]
112
+ try:
113
+ resp = stream(
114
+ core_v1_api.connect_get_namespaced_pod_exec,
115
+ name=pod_name,
116
+ namespace=namespace,
117
+ command=command,
118
+ container="kubetorch",
119
+ stderr=True,
120
+ stdout=True,
121
+ )
122
+ if "yes" in resp:
123
+ continue
124
+ except client.exceptions.ApiException as e:
125
+ logger.error(f"Error executing command on pod {pod_name}: {e}")
126
+
127
+ path_found = False
128
+
129
+ return path_found
130
+
131
+
132
+ def check_env_vars_on_kubernetes_pods(
133
+ env_vars: list, service_name: str, namespace: str = None
134
+ ) -> dict:
135
+ """
136
+ Check if an AWS role is assumed on a specific Knative service's pods
137
+
138
+ :param namespace: Kubernetes namespace
139
+ :param service_name: Name of the Knative service
140
+ :return: Dictionary with role assumption details
141
+ """
142
+ namespace = namespace or kt_config.namespace
143
+ # Load Kubernetes configuration
144
+ config.load_kube_config()
145
+ # Initialize API clients
146
+ core_v1_api = client.CoreV1Api()
147
+
148
+ pods = _fetch_pods_for_kubernetes_service(service_name, namespace, core_v1_api)
149
+ if not pods:
150
+ logger.error(
151
+ f"No pods found for service {service_name} in namespace {namespace}"
152
+ )
153
+ return {}
154
+
155
+ found_env_vars = {}
156
+
157
+ for pod in pods:
158
+ for env_var in env_vars:
159
+ if found_env_vars.get(env_var):
160
+ # Skip if already found on another pod
161
+ continue
162
+ pod_name = pod.metadata.name
163
+ command = ["/bin/bash", "-c", f"echo ${env_var}"]
164
+ try:
165
+ resp = stream(
166
+ core_v1_api.connect_get_namespaced_pod_exec,
167
+ name=pod_name,
168
+ namespace=namespace,
169
+ command=command,
170
+ container="kubetorch",
171
+ stderr=True,
172
+ stdout=True,
173
+ )
174
+ if len(resp.strip()) > 0:
175
+ found_env_vars[env_var] = resp.strip()
176
+ except client.exceptions.ApiException as e:
177
+ logger.error(f"Error executing command: {e}")
178
+
179
+ if set(found_env_vars.keys()) == set(env_vars):
180
+ # Found all env vars: skip the remaining pods
181
+ break
182
+
183
+ return found_env_vars
184
+
185
+
186
+ def _fetch_pods_for_kubernetes_service(
187
+ service_name: str, namespace: str, client_api: client.CoreV1Api
188
+ ) -> List[V1Pod]:
189
+ """
190
+ Fetch pods for a specific Knative service with timeout
191
+ """
192
+ start_time = time.time()
193
+ while time.time() - start_time < 30:
194
+ try:
195
+ # List pods matching the service
196
+ pods = client_api.list_namespaced_pod(
197
+ namespace=namespace,
198
+ label_selector=f"kubetorch.com/service={service_name}",
199
+ )
200
+ ready_pods = [pod for pod in pods.items if pod.status.phase == "Running"]
201
+ if ready_pods:
202
+ return ready_pods
203
+ except Exception as e:
204
+ logger.error(
205
+ f"Error fetching pods for service {service_name} in namespace {namespace}: {e}"
206
+ )
207
+ time.sleep(1)
208
+
209
+ return []
File without changes