kubetorch 0.2.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kubetorch/__init__.py +59 -0
- kubetorch/cli.py +1939 -0
- kubetorch/cli_utils.py +967 -0
- kubetorch/config.py +453 -0
- kubetorch/constants.py +18 -0
- kubetorch/docs/Makefile +18 -0
- kubetorch/docs/__init__.py +0 -0
- kubetorch/docs/_ext/json_globaltoc.py +42 -0
- kubetorch/docs/api/cli.rst +10 -0
- kubetorch/docs/api/python/app.rst +21 -0
- kubetorch/docs/api/python/cls.rst +19 -0
- kubetorch/docs/api/python/compute.rst +25 -0
- kubetorch/docs/api/python/config.rst +11 -0
- kubetorch/docs/api/python/fn.rst +19 -0
- kubetorch/docs/api/python/image.rst +14 -0
- kubetorch/docs/api/python/secret.rst +18 -0
- kubetorch/docs/api/python/volumes.rst +13 -0
- kubetorch/docs/api/python.rst +101 -0
- kubetorch/docs/conf.py +69 -0
- kubetorch/docs/index.rst +20 -0
- kubetorch/docs/requirements.txt +5 -0
- kubetorch/globals.py +269 -0
- kubetorch/logger.py +59 -0
- kubetorch/resources/__init__.py +0 -0
- kubetorch/resources/callables/__init__.py +0 -0
- kubetorch/resources/callables/cls/__init__.py +0 -0
- kubetorch/resources/callables/cls/cls.py +159 -0
- kubetorch/resources/callables/fn/__init__.py +0 -0
- kubetorch/resources/callables/fn/fn.py +140 -0
- kubetorch/resources/callables/module.py +1315 -0
- kubetorch/resources/callables/utils.py +203 -0
- kubetorch/resources/compute/__init__.py +0 -0
- kubetorch/resources/compute/app.py +253 -0
- kubetorch/resources/compute/compute.py +2414 -0
- kubetorch/resources/compute/decorators.py +137 -0
- kubetorch/resources/compute/utils.py +1026 -0
- kubetorch/resources/compute/websocket.py +135 -0
- kubetorch/resources/images/__init__.py +1 -0
- kubetorch/resources/images/image.py +412 -0
- kubetorch/resources/images/images.py +64 -0
- kubetorch/resources/secrets/__init__.py +2 -0
- kubetorch/resources/secrets/kubernetes_secrets_client.py +377 -0
- kubetorch/resources/secrets/provider_secrets/__init__.py +0 -0
- kubetorch/resources/secrets/provider_secrets/anthropic_secret.py +12 -0
- kubetorch/resources/secrets/provider_secrets/aws_secret.py +16 -0
- kubetorch/resources/secrets/provider_secrets/azure_secret.py +14 -0
- kubetorch/resources/secrets/provider_secrets/cohere_secret.py +12 -0
- kubetorch/resources/secrets/provider_secrets/gcp_secret.py +16 -0
- kubetorch/resources/secrets/provider_secrets/github_secret.py +13 -0
- kubetorch/resources/secrets/provider_secrets/huggingface_secret.py +20 -0
- kubetorch/resources/secrets/provider_secrets/kubeconfig_secret.py +12 -0
- kubetorch/resources/secrets/provider_secrets/lambda_secret.py +13 -0
- kubetorch/resources/secrets/provider_secrets/langchain_secret.py +12 -0
- kubetorch/resources/secrets/provider_secrets/openai_secret.py +11 -0
- kubetorch/resources/secrets/provider_secrets/pinecone_secret.py +12 -0
- kubetorch/resources/secrets/provider_secrets/providers.py +92 -0
- kubetorch/resources/secrets/provider_secrets/ssh_secret.py +12 -0
- kubetorch/resources/secrets/provider_secrets/wandb_secret.py +11 -0
- kubetorch/resources/secrets/secret.py +224 -0
- kubetorch/resources/secrets/secret_factory.py +64 -0
- kubetorch/resources/secrets/utils.py +222 -0
- kubetorch/resources/volumes/__init__.py +0 -0
- kubetorch/resources/volumes/volume.py +340 -0
- kubetorch/servers/__init__.py +0 -0
- kubetorch/servers/http/__init__.py +0 -0
- kubetorch/servers/http/distributed_utils.py +2968 -0
- kubetorch/servers/http/http_client.py +802 -0
- kubetorch/servers/http/http_server.py +1622 -0
- kubetorch/servers/http/server_metrics.py +255 -0
- kubetorch/servers/http/utils.py +722 -0
- kubetorch/serving/__init__.py +0 -0
- kubetorch/serving/autoscaling.py +153 -0
- kubetorch/serving/base_service_manager.py +344 -0
- kubetorch/serving/constants.py +77 -0
- kubetorch/serving/deployment_service_manager.py +431 -0
- kubetorch/serving/knative_service_manager.py +487 -0
- kubetorch/serving/raycluster_service_manager.py +526 -0
- kubetorch/serving/service_manager.py +18 -0
- kubetorch/serving/templates/deployment_template.yaml +17 -0
- kubetorch/serving/templates/knative_service_template.yaml +19 -0
- kubetorch/serving/templates/kt_setup_template.sh.j2 +91 -0
- kubetorch/serving/templates/pod_template.yaml +198 -0
- kubetorch/serving/templates/raycluster_service_template.yaml +42 -0
- kubetorch/serving/templates/raycluster_template.yaml +35 -0
- kubetorch/serving/templates/service_template.yaml +21 -0
- kubetorch/serving/templates/workerset_template.yaml +36 -0
- kubetorch/serving/utils.py +344 -0
- kubetorch/utils.py +263 -0
- kubetorch-0.2.5.dist-info/METADATA +75 -0
- kubetorch-0.2.5.dist-info/RECORD +92 -0
- kubetorch-0.2.5.dist-info/WHEEL +4 -0
- kubetorch-0.2.5.dist-info/entry_points.txt +5 -0
|
@@ -0,0 +1,340 @@
|
|
|
1
|
+
import subprocess
|
|
2
|
+
import textwrap
|
|
3
|
+
import uuid
|
|
4
|
+
|
|
5
|
+
from functools import cached_property
|
|
6
|
+
from typing import Dict
|
|
7
|
+
|
|
8
|
+
from kubernetes import client
|
|
9
|
+
from kubernetes.client import ApiException, V1PersistentVolumeClaim
|
|
10
|
+
|
|
11
|
+
from kubetorch.constants import DEFAULT_VOLUME_ACCESS_MODE, KT_MOUNT_FOLDER
|
|
12
|
+
from kubetorch.globals import config
|
|
13
|
+
from kubetorch.logger import get_logger
|
|
14
|
+
from kubetorch.utils import load_kubeconfig
|
|
15
|
+
|
|
16
|
+
logger = get_logger(__name__)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class Volume:
|
|
20
|
+
"""
|
|
21
|
+
Manages persistent storage for Kubetorch services and deployments.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
def __init__(
|
|
25
|
+
self,
|
|
26
|
+
name: str,
|
|
27
|
+
size: str,
|
|
28
|
+
storage_class: str = None,
|
|
29
|
+
mount_path: str = None,
|
|
30
|
+
access_mode: str = None,
|
|
31
|
+
namespace: str = None,
|
|
32
|
+
core_v1: client.CoreV1Api = None,
|
|
33
|
+
):
|
|
34
|
+
"""
|
|
35
|
+
Kubetorch Volume object, specifying persistent storage properties.
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
name (str): Name of the volume.
|
|
39
|
+
size (str): Size of the volume.
|
|
40
|
+
storage_class (str, optional): Storage class to use for the volume.
|
|
41
|
+
mount_path (str, optional): Mount path for the volume.
|
|
42
|
+
access_mode (str, optional): Access mode for the volume.
|
|
43
|
+
namespace (str, optional): Namespace for the volume.
|
|
44
|
+
|
|
45
|
+
Example:
|
|
46
|
+
|
|
47
|
+
.. code-block:: python
|
|
48
|
+
|
|
49
|
+
import kubetorch as kt
|
|
50
|
+
|
|
51
|
+
kt.Volume(name="my-data", size="5Gi"), # Standard volume (ReadWriteOnce)
|
|
52
|
+
|
|
53
|
+
# Shared volume (ReadWriteMany, requires JuiceFS or similar)
|
|
54
|
+
kt.Volume(name="shared-data", size="10Gi", storage_class="juicefs-sc-shared", access_mode="ReadWriteMany")
|
|
55
|
+
|
|
56
|
+
# uv cache
|
|
57
|
+
compute = kt.Compute(
|
|
58
|
+
cpus=".01",
|
|
59
|
+
env_vars={
|
|
60
|
+
"UV_CACHE_DIR": "/ktfs/kt-global-cache/uv_cache",
|
|
61
|
+
"HF_HOME": "/ktfs/kt-global-cache/hf_cache",
|
|
62
|
+
},
|
|
63
|
+
volumes=[kt.Volume("kt-global-cache", size="10Gi")],
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
"""
|
|
67
|
+
self._storage_class = storage_class
|
|
68
|
+
if core_v1 is None:
|
|
69
|
+
load_kubeconfig()
|
|
70
|
+
|
|
71
|
+
self.size = size
|
|
72
|
+
self.access_mode = access_mode or DEFAULT_VOLUME_ACCESS_MODE
|
|
73
|
+
self.mount_path = mount_path or f"/{KT_MOUNT_FOLDER}/{name}"
|
|
74
|
+
|
|
75
|
+
self.name = name
|
|
76
|
+
self.namespace = namespace
|
|
77
|
+
self.core_v1 = core_v1 or client.CoreV1Api()
|
|
78
|
+
|
|
79
|
+
@property
|
|
80
|
+
def pvc_name(self) -> str:
|
|
81
|
+
return self.name
|
|
82
|
+
|
|
83
|
+
@cached_property
|
|
84
|
+
def storage_class(self) -> str:
|
|
85
|
+
"""Get storage class - either specified or cluster default"""
|
|
86
|
+
if self._storage_class:
|
|
87
|
+
return self._storage_class
|
|
88
|
+
|
|
89
|
+
try:
|
|
90
|
+
storage_v1 = client.StorageV1Api()
|
|
91
|
+
storage_classes = storage_v1.list_storage_class().items
|
|
92
|
+
|
|
93
|
+
# If RWX is requested, prefer RWX-capable classes
|
|
94
|
+
if self.access_mode == "ReadWriteMany":
|
|
95
|
+
for sc in storage_classes:
|
|
96
|
+
provisioner = getattr(sc, "provisioner", "")
|
|
97
|
+
if provisioner in {
|
|
98
|
+
"csi.juicefs.com",
|
|
99
|
+
"nfs.csi.k8s.io",
|
|
100
|
+
"cephfs.csi.ceph.com",
|
|
101
|
+
}:
|
|
102
|
+
return sc.metadata.name
|
|
103
|
+
raise ValueError("No RWX-capable storage class found")
|
|
104
|
+
|
|
105
|
+
# Otherwise, pick the default StorageClass
|
|
106
|
+
for sc in storage_classes:
|
|
107
|
+
annotations = sc.metadata.annotations or {}
|
|
108
|
+
if annotations.get("storageclass.kubernetes.io/is-default-class") == "true":
|
|
109
|
+
logger.info(f"Using default storage class: {sc.metadata.name}")
|
|
110
|
+
return sc.metadata.name
|
|
111
|
+
|
|
112
|
+
# No default found, fall back to first available
|
|
113
|
+
available_classes = [sc.metadata.name for sc in storage_classes]
|
|
114
|
+
first_sc = available_classes[0]
|
|
115
|
+
if len(available_classes) == 1:
|
|
116
|
+
logger.info(f"No default storage class found, using only available one: {first_sc}")
|
|
117
|
+
else:
|
|
118
|
+
logger.warning(
|
|
119
|
+
f"No default storage class found, using first available: {first_sc}. "
|
|
120
|
+
f"Available: {available_classes}. Consider setting a default or specifying storage_class parameter."
|
|
121
|
+
)
|
|
122
|
+
return first_sc
|
|
123
|
+
|
|
124
|
+
except Exception as e:
|
|
125
|
+
logger.error(f"Failed to get storage classes: {e}")
|
|
126
|
+
raise
|
|
127
|
+
|
|
128
|
+
@classmethod
|
|
129
|
+
def from_name(
|
|
130
|
+
cls,
|
|
131
|
+
name: str,
|
|
132
|
+
create_if_missing: bool = False,
|
|
133
|
+
namespace: str = None,
|
|
134
|
+
core_v1: client.CoreV1Api = None,
|
|
135
|
+
) -> "Volume":
|
|
136
|
+
"""Get existing volume or optionally create it"""
|
|
137
|
+
if core_v1 is None:
|
|
138
|
+
load_kubeconfig()
|
|
139
|
+
core_v1 = client.CoreV1Api()
|
|
140
|
+
|
|
141
|
+
namespace = namespace or config.namespace
|
|
142
|
+
pvc_name = name
|
|
143
|
+
|
|
144
|
+
try:
|
|
145
|
+
pvc = core_v1.read_namespaced_persistent_volume_claim(pvc_name, namespace)
|
|
146
|
+
|
|
147
|
+
storage_class = pvc.spec.storage_class_name
|
|
148
|
+
size = pvc.spec.resources.requests.get("storage")
|
|
149
|
+
access_mode = pvc.spec.access_modes[0] if pvc.spec.access_modes else DEFAULT_VOLUME_ACCESS_MODE
|
|
150
|
+
|
|
151
|
+
annotations = pvc.metadata.annotations or {}
|
|
152
|
+
mount_path = annotations.get("kubetorch.com/mount-path", f"/{KT_MOUNT_FOLDER}/{name}")
|
|
153
|
+
|
|
154
|
+
# Create Volume with actual attributes from PVC
|
|
155
|
+
vol = cls(
|
|
156
|
+
name=name,
|
|
157
|
+
storage_class=storage_class,
|
|
158
|
+
mount_path=mount_path,
|
|
159
|
+
size=size,
|
|
160
|
+
access_mode=access_mode,
|
|
161
|
+
namespace=namespace,
|
|
162
|
+
core_v1=core_v1,
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
logger.debug(f"Loaded existing PVC {pvc_name} with storage_class={storage_class}")
|
|
166
|
+
return vol
|
|
167
|
+
|
|
168
|
+
except ApiException as e:
|
|
169
|
+
if e.status == 404:
|
|
170
|
+
# PVC doesn't exist
|
|
171
|
+
if create_if_missing:
|
|
172
|
+
vol = cls(name, namespace=namespace, core_v1=core_v1)
|
|
173
|
+
vol.create()
|
|
174
|
+
return vol
|
|
175
|
+
else:
|
|
176
|
+
raise ValueError(f"Volume '{name}' (PVC: {pvc_name}) does not exist in namespace '{namespace}'")
|
|
177
|
+
else:
|
|
178
|
+
# Some other API error
|
|
179
|
+
raise
|
|
180
|
+
|
|
181
|
+
def config(self) -> Dict[str, str]:
|
|
182
|
+
"""Get configuration for this volume"""
|
|
183
|
+
return {
|
|
184
|
+
"name": self.name,
|
|
185
|
+
"size": self.size,
|
|
186
|
+
"access_mode": self.access_mode,
|
|
187
|
+
"mount_path": self.mount_path,
|
|
188
|
+
"storage_class": self.storage_class,
|
|
189
|
+
"namespace": self.namespace,
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
def pod_template_spec(self) -> dict:
|
|
193
|
+
"""Convert to Kubernetes volume spec for pod template"""
|
|
194
|
+
return {
|
|
195
|
+
"name": self.name,
|
|
196
|
+
"persistentVolumeClaim": {"claimName": self.pvc_name},
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
def create(self) -> V1PersistentVolumeClaim:
|
|
200
|
+
"""Create PVC if it doesn't exist"""
|
|
201
|
+
try:
|
|
202
|
+
try:
|
|
203
|
+
# Check if PVC already exists
|
|
204
|
+
existing_pvc = self.core_v1.read_namespaced_persistent_volume_claim(
|
|
205
|
+
name=self.pvc_name, namespace=self.namespace
|
|
206
|
+
)
|
|
207
|
+
logger.debug(f"PVC {self.pvc_name} already exists in namespace {self.namespace}")
|
|
208
|
+
return existing_pvc
|
|
209
|
+
except ApiException as e:
|
|
210
|
+
if e.status != 404:
|
|
211
|
+
# Some other error occurred
|
|
212
|
+
raise
|
|
213
|
+
|
|
214
|
+
logger.info(f"Creating new PVC with name: {self.pvc_name}")
|
|
215
|
+
|
|
216
|
+
storage_class_name = self.storage_class
|
|
217
|
+
|
|
218
|
+
pvc_spec = client.V1PersistentVolumeClaimSpec(
|
|
219
|
+
access_modes=[self.access_mode],
|
|
220
|
+
resources=client.V1ResourceRequirements(requests={"storage": self.size}),
|
|
221
|
+
storage_class_name=storage_class_name,
|
|
222
|
+
)
|
|
223
|
+
|
|
224
|
+
pvc_metadata = client.V1ObjectMeta(
|
|
225
|
+
name=self.pvc_name,
|
|
226
|
+
labels={
|
|
227
|
+
"app": "kubetorch",
|
|
228
|
+
"kubetorch.com/volume": self.name,
|
|
229
|
+
},
|
|
230
|
+
annotations={"kubetorch.com/mount-path": self.mount_path},
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
pvc = client.V1PersistentVolumeClaim(
|
|
234
|
+
api_version="v1",
|
|
235
|
+
kind="PersistentVolumeClaim",
|
|
236
|
+
metadata=pvc_metadata,
|
|
237
|
+
spec=pvc_spec,
|
|
238
|
+
)
|
|
239
|
+
|
|
240
|
+
created_pvc = self.core_v1.create_namespaced_persistent_volume_claim(namespace=self.namespace, body=pvc)
|
|
241
|
+
|
|
242
|
+
logger.info(
|
|
243
|
+
f"Successfully created PVC {self.pvc_name} in namespace {self.namespace} with "
|
|
244
|
+
f"storage class {storage_class_name}"
|
|
245
|
+
)
|
|
246
|
+
return created_pvc
|
|
247
|
+
|
|
248
|
+
except Exception as e:
|
|
249
|
+
logger.error(f"Failed to create PVC {self.pvc_name}: {e}")
|
|
250
|
+
raise
|
|
251
|
+
|
|
252
|
+
def delete(self) -> None:
|
|
253
|
+
"""Delete the PVC"""
|
|
254
|
+
try:
|
|
255
|
+
self.core_v1.delete_namespaced_persistent_volume_claim(name=self.pvc_name, namespace=self.namespace)
|
|
256
|
+
logger.debug(f"Successfully deleted PVC {self.pvc_name}")
|
|
257
|
+
except ApiException as e:
|
|
258
|
+
if e.status == 404:
|
|
259
|
+
logger.warning(f"PVC {self.pvc_name} not found")
|
|
260
|
+
else:
|
|
261
|
+
logger.error(f"Failed to delete PVC {self.pvc_name}: {e}")
|
|
262
|
+
raise
|
|
263
|
+
|
|
264
|
+
def exists(self) -> bool:
|
|
265
|
+
"""Check if the PVC exists"""
|
|
266
|
+
try:
|
|
267
|
+
self.core_v1.read_namespaced_persistent_volume_claim(name=self.pvc_name, namespace=self.namespace)
|
|
268
|
+
return True
|
|
269
|
+
except ApiException as e:
|
|
270
|
+
if e.status == 404:
|
|
271
|
+
return False
|
|
272
|
+
else:
|
|
273
|
+
# Some other API error, re-raise
|
|
274
|
+
raise
|
|
275
|
+
|
|
276
|
+
def ssh(self, image: str = "alpine:latest"):
|
|
277
|
+
"""
|
|
278
|
+
Launch an interactive debug shell with this volume mounted.
|
|
279
|
+
|
|
280
|
+
This method creates a temporary Kubernetes pod that mounts the
|
|
281
|
+
PersistentVolumeClaim (PVC) backing this Volume at the same path
|
|
282
|
+
(`self.mount_path`) used by Kubetorch services.
|
|
283
|
+
|
|
284
|
+
Args:
|
|
285
|
+
image (str, optional): Container image to use for the debug pod.
|
|
286
|
+
Must include a shell (e.g., `alpine:3.18`, `ubuntu:22.04`,
|
|
287
|
+
or a custom tools image). Defaults to `alpine:latest`.
|
|
288
|
+
|
|
289
|
+
Example:
|
|
290
|
+
|
|
291
|
+
.. code-block:: python
|
|
292
|
+
|
|
293
|
+
import kubetorch as kt
|
|
294
|
+
|
|
295
|
+
vol = kt.Volume.from_name("kt-global-cache")
|
|
296
|
+
vol.ssh()
|
|
297
|
+
"""
|
|
298
|
+
pod_name = f"debug-{self.name}-{uuid.uuid4().hex[:6]}"
|
|
299
|
+
|
|
300
|
+
cmd = [
|
|
301
|
+
"kubectl",
|
|
302
|
+
"run",
|
|
303
|
+
pod_name,
|
|
304
|
+
"--rm",
|
|
305
|
+
"-it",
|
|
306
|
+
"--namespace",
|
|
307
|
+
self.namespace,
|
|
308
|
+
"--image",
|
|
309
|
+
image,
|
|
310
|
+
"--restart=Never",
|
|
311
|
+
"--overrides",
|
|
312
|
+
textwrap.dedent(
|
|
313
|
+
f"""
|
|
314
|
+
{{
|
|
315
|
+
"apiVersion": "v1",
|
|
316
|
+
"spec": {{
|
|
317
|
+
"containers": [{{
|
|
318
|
+
"name": "debug",
|
|
319
|
+
"image": "{image}",
|
|
320
|
+
"stdin": true,
|
|
321
|
+
"tty": true,
|
|
322
|
+
"volumeMounts": [{{
|
|
323
|
+
"name": "vol",
|
|
324
|
+
"mountPath": "{self.mount_path}"
|
|
325
|
+
}}]
|
|
326
|
+
}}],
|
|
327
|
+
"volumes": [{{
|
|
328
|
+
"name": "vol",
|
|
329
|
+
"persistentVolumeClaim": {{
|
|
330
|
+
"claimName": "{self.pvc_name}"
|
|
331
|
+
}}
|
|
332
|
+
}}]
|
|
333
|
+
}}
|
|
334
|
+
}}
|
|
335
|
+
"""
|
|
336
|
+
).strip(),
|
|
337
|
+
]
|
|
338
|
+
|
|
339
|
+
# Suppress noisy "write on closed stream" error when exiting
|
|
340
|
+
subprocess.run(cmd, stderr=subprocess.DEVNULL)
|
|
File without changes
|
|
File without changes
|