kubetorch 0.2.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. kubetorch/__init__.py +59 -0
  2. kubetorch/cli.py +1939 -0
  3. kubetorch/cli_utils.py +967 -0
  4. kubetorch/config.py +453 -0
  5. kubetorch/constants.py +18 -0
  6. kubetorch/docs/Makefile +18 -0
  7. kubetorch/docs/__init__.py +0 -0
  8. kubetorch/docs/_ext/json_globaltoc.py +42 -0
  9. kubetorch/docs/api/cli.rst +10 -0
  10. kubetorch/docs/api/python/app.rst +21 -0
  11. kubetorch/docs/api/python/cls.rst +19 -0
  12. kubetorch/docs/api/python/compute.rst +25 -0
  13. kubetorch/docs/api/python/config.rst +11 -0
  14. kubetorch/docs/api/python/fn.rst +19 -0
  15. kubetorch/docs/api/python/image.rst +14 -0
  16. kubetorch/docs/api/python/secret.rst +18 -0
  17. kubetorch/docs/api/python/volumes.rst +13 -0
  18. kubetorch/docs/api/python.rst +101 -0
  19. kubetorch/docs/conf.py +69 -0
  20. kubetorch/docs/index.rst +20 -0
  21. kubetorch/docs/requirements.txt +5 -0
  22. kubetorch/globals.py +269 -0
  23. kubetorch/logger.py +59 -0
  24. kubetorch/resources/__init__.py +0 -0
  25. kubetorch/resources/callables/__init__.py +0 -0
  26. kubetorch/resources/callables/cls/__init__.py +0 -0
  27. kubetorch/resources/callables/cls/cls.py +159 -0
  28. kubetorch/resources/callables/fn/__init__.py +0 -0
  29. kubetorch/resources/callables/fn/fn.py +140 -0
  30. kubetorch/resources/callables/module.py +1315 -0
  31. kubetorch/resources/callables/utils.py +203 -0
  32. kubetorch/resources/compute/__init__.py +0 -0
  33. kubetorch/resources/compute/app.py +253 -0
  34. kubetorch/resources/compute/compute.py +2414 -0
  35. kubetorch/resources/compute/decorators.py +137 -0
  36. kubetorch/resources/compute/utils.py +1026 -0
  37. kubetorch/resources/compute/websocket.py +135 -0
  38. kubetorch/resources/images/__init__.py +1 -0
  39. kubetorch/resources/images/image.py +412 -0
  40. kubetorch/resources/images/images.py +64 -0
  41. kubetorch/resources/secrets/__init__.py +2 -0
  42. kubetorch/resources/secrets/kubernetes_secrets_client.py +377 -0
  43. kubetorch/resources/secrets/provider_secrets/__init__.py +0 -0
  44. kubetorch/resources/secrets/provider_secrets/anthropic_secret.py +12 -0
  45. kubetorch/resources/secrets/provider_secrets/aws_secret.py +16 -0
  46. kubetorch/resources/secrets/provider_secrets/azure_secret.py +14 -0
  47. kubetorch/resources/secrets/provider_secrets/cohere_secret.py +12 -0
  48. kubetorch/resources/secrets/provider_secrets/gcp_secret.py +16 -0
  49. kubetorch/resources/secrets/provider_secrets/github_secret.py +13 -0
  50. kubetorch/resources/secrets/provider_secrets/huggingface_secret.py +20 -0
  51. kubetorch/resources/secrets/provider_secrets/kubeconfig_secret.py +12 -0
  52. kubetorch/resources/secrets/provider_secrets/lambda_secret.py +13 -0
  53. kubetorch/resources/secrets/provider_secrets/langchain_secret.py +12 -0
  54. kubetorch/resources/secrets/provider_secrets/openai_secret.py +11 -0
  55. kubetorch/resources/secrets/provider_secrets/pinecone_secret.py +12 -0
  56. kubetorch/resources/secrets/provider_secrets/providers.py +92 -0
  57. kubetorch/resources/secrets/provider_secrets/ssh_secret.py +12 -0
  58. kubetorch/resources/secrets/provider_secrets/wandb_secret.py +11 -0
  59. kubetorch/resources/secrets/secret.py +224 -0
  60. kubetorch/resources/secrets/secret_factory.py +64 -0
  61. kubetorch/resources/secrets/utils.py +222 -0
  62. kubetorch/resources/volumes/__init__.py +0 -0
  63. kubetorch/resources/volumes/volume.py +340 -0
  64. kubetorch/servers/__init__.py +0 -0
  65. kubetorch/servers/http/__init__.py +0 -0
  66. kubetorch/servers/http/distributed_utils.py +2968 -0
  67. kubetorch/servers/http/http_client.py +802 -0
  68. kubetorch/servers/http/http_server.py +1622 -0
  69. kubetorch/servers/http/server_metrics.py +255 -0
  70. kubetorch/servers/http/utils.py +722 -0
  71. kubetorch/serving/__init__.py +0 -0
  72. kubetorch/serving/autoscaling.py +153 -0
  73. kubetorch/serving/base_service_manager.py +344 -0
  74. kubetorch/serving/constants.py +77 -0
  75. kubetorch/serving/deployment_service_manager.py +431 -0
  76. kubetorch/serving/knative_service_manager.py +487 -0
  77. kubetorch/serving/raycluster_service_manager.py +526 -0
  78. kubetorch/serving/service_manager.py +18 -0
  79. kubetorch/serving/templates/deployment_template.yaml +17 -0
  80. kubetorch/serving/templates/knative_service_template.yaml +19 -0
  81. kubetorch/serving/templates/kt_setup_template.sh.j2 +91 -0
  82. kubetorch/serving/templates/pod_template.yaml +198 -0
  83. kubetorch/serving/templates/raycluster_service_template.yaml +42 -0
  84. kubetorch/serving/templates/raycluster_template.yaml +35 -0
  85. kubetorch/serving/templates/service_template.yaml +21 -0
  86. kubetorch/serving/templates/workerset_template.yaml +36 -0
  87. kubetorch/serving/utils.py +344 -0
  88. kubetorch/utils.py +263 -0
  89. kubetorch-0.2.5.dist-info/METADATA +75 -0
  90. kubetorch-0.2.5.dist-info/RECORD +92 -0
  91. kubetorch-0.2.5.dist-info/WHEEL +4 -0
  92. kubetorch-0.2.5.dist-info/entry_points.txt +5 -0
@@ -0,0 +1,340 @@
1
+ import subprocess
2
+ import textwrap
3
+ import uuid
4
+
5
+ from functools import cached_property
6
+ from typing import Dict
7
+
8
+ from kubernetes import client
9
+ from kubernetes.client import ApiException, V1PersistentVolumeClaim
10
+
11
+ from kubetorch.constants import DEFAULT_VOLUME_ACCESS_MODE, KT_MOUNT_FOLDER
12
+ from kubetorch.globals import config
13
+ from kubetorch.logger import get_logger
14
+ from kubetorch.utils import load_kubeconfig
15
+
16
+ logger = get_logger(__name__)
17
+
18
+
19
+ class Volume:
20
+ """
21
+ Manages persistent storage for Kubetorch services and deployments.
22
+ """
23
+
24
+ def __init__(
25
+ self,
26
+ name: str,
27
+ size: str,
28
+ storage_class: str = None,
29
+ mount_path: str = None,
30
+ access_mode: str = None,
31
+ namespace: str = None,
32
+ core_v1: client.CoreV1Api = None,
33
+ ):
34
+ """
35
+ Kubetorch Volume object, specifying persistent storage properties.
36
+
37
+ Args:
38
+ name (str): Name of the volume.
39
+ size (str): Size of the volume.
40
+ storage_class (str, optional): Storage class to use for the volume.
41
+ mount_path (str, optional): Mount path for the volume.
42
+ access_mode (str, optional): Access mode for the volume.
43
+ namespace (str, optional): Namespace for the volume.
44
+
45
+ Example:
46
+
47
+ .. code-block:: python
48
+
49
+ import kubetorch as kt
50
+
51
+ kt.Volume(name="my-data", size="5Gi"), # Standard volume (ReadWriteOnce)
52
+
53
+ # Shared volume (ReadWriteMany, requires JuiceFS or similar)
54
+ kt.Volume(name="shared-data", size="10Gi", storage_class="juicefs-sc-shared", access_mode="ReadWriteMany")
55
+
56
+ # uv cache
57
+ compute = kt.Compute(
58
+ cpus=".01",
59
+ env_vars={
60
+ "UV_CACHE_DIR": "/ktfs/kt-global-cache/uv_cache",
61
+ "HF_HOME": "/ktfs/kt-global-cache/hf_cache",
62
+ },
63
+ volumes=[kt.Volume("kt-global-cache", size="10Gi")],
64
+ )
65
+
66
+ """
67
+ self._storage_class = storage_class
68
+ if core_v1 is None:
69
+ load_kubeconfig()
70
+
71
+ self.size = size
72
+ self.access_mode = access_mode or DEFAULT_VOLUME_ACCESS_MODE
73
+ self.mount_path = mount_path or f"/{KT_MOUNT_FOLDER}/{name}"
74
+
75
+ self.name = name
76
+ self.namespace = namespace
77
+ self.core_v1 = core_v1 or client.CoreV1Api()
78
+
79
+ @property
80
+ def pvc_name(self) -> str:
81
+ return self.name
82
+
83
+ @cached_property
84
+ def storage_class(self) -> str:
85
+ """Get storage class - either specified or cluster default"""
86
+ if self._storage_class:
87
+ return self._storage_class
88
+
89
+ try:
90
+ storage_v1 = client.StorageV1Api()
91
+ storage_classes = storage_v1.list_storage_class().items
92
+
93
+ # If RWX is requested, prefer RWX-capable classes
94
+ if self.access_mode == "ReadWriteMany":
95
+ for sc in storage_classes:
96
+ provisioner = getattr(sc, "provisioner", "")
97
+ if provisioner in {
98
+ "csi.juicefs.com",
99
+ "nfs.csi.k8s.io",
100
+ "cephfs.csi.ceph.com",
101
+ }:
102
+ return sc.metadata.name
103
+ raise ValueError("No RWX-capable storage class found")
104
+
105
+ # Otherwise, pick the default StorageClass
106
+ for sc in storage_classes:
107
+ annotations = sc.metadata.annotations or {}
108
+ if annotations.get("storageclass.kubernetes.io/is-default-class") == "true":
109
+ logger.info(f"Using default storage class: {sc.metadata.name}")
110
+ return sc.metadata.name
111
+
112
+ # No default found, fall back to first available
113
+ available_classes = [sc.metadata.name for sc in storage_classes]
114
+ first_sc = available_classes[0]
115
+ if len(available_classes) == 1:
116
+ logger.info(f"No default storage class found, using only available one: {first_sc}")
117
+ else:
118
+ logger.warning(
119
+ f"No default storage class found, using first available: {first_sc}. "
120
+ f"Available: {available_classes}. Consider setting a default or specifying storage_class parameter."
121
+ )
122
+ return first_sc
123
+
124
+ except Exception as e:
125
+ logger.error(f"Failed to get storage classes: {e}")
126
+ raise
127
+
128
+ @classmethod
129
+ def from_name(
130
+ cls,
131
+ name: str,
132
+ create_if_missing: bool = False,
133
+ namespace: str = None,
134
+ core_v1: client.CoreV1Api = None,
135
+ ) -> "Volume":
136
+ """Get existing volume or optionally create it"""
137
+ if core_v1 is None:
138
+ load_kubeconfig()
139
+ core_v1 = client.CoreV1Api()
140
+
141
+ namespace = namespace or config.namespace
142
+ pvc_name = name
143
+
144
+ try:
145
+ pvc = core_v1.read_namespaced_persistent_volume_claim(pvc_name, namespace)
146
+
147
+ storage_class = pvc.spec.storage_class_name
148
+ size = pvc.spec.resources.requests.get("storage")
149
+ access_mode = pvc.spec.access_modes[0] if pvc.spec.access_modes else DEFAULT_VOLUME_ACCESS_MODE
150
+
151
+ annotations = pvc.metadata.annotations or {}
152
+ mount_path = annotations.get("kubetorch.com/mount-path", f"/{KT_MOUNT_FOLDER}/{name}")
153
+
154
+ # Create Volume with actual attributes from PVC
155
+ vol = cls(
156
+ name=name,
157
+ storage_class=storage_class,
158
+ mount_path=mount_path,
159
+ size=size,
160
+ access_mode=access_mode,
161
+ namespace=namespace,
162
+ core_v1=core_v1,
163
+ )
164
+
165
+ logger.debug(f"Loaded existing PVC {pvc_name} with storage_class={storage_class}")
166
+ return vol
167
+
168
+ except ApiException as e:
169
+ if e.status == 404:
170
+ # PVC doesn't exist
171
+ if create_if_missing:
172
+ vol = cls(name, namespace=namespace, core_v1=core_v1)
173
+ vol.create()
174
+ return vol
175
+ else:
176
+ raise ValueError(f"Volume '{name}' (PVC: {pvc_name}) does not exist in namespace '{namespace}'")
177
+ else:
178
+ # Some other API error
179
+ raise
180
+
181
+ def config(self) -> Dict[str, str]:
182
+ """Get configuration for this volume"""
183
+ return {
184
+ "name": self.name,
185
+ "size": self.size,
186
+ "access_mode": self.access_mode,
187
+ "mount_path": self.mount_path,
188
+ "storage_class": self.storage_class,
189
+ "namespace": self.namespace,
190
+ }
191
+
192
+ def pod_template_spec(self) -> dict:
193
+ """Convert to Kubernetes volume spec for pod template"""
194
+ return {
195
+ "name": self.name,
196
+ "persistentVolumeClaim": {"claimName": self.pvc_name},
197
+ }
198
+
199
+ def create(self) -> V1PersistentVolumeClaim:
200
+ """Create PVC if it doesn't exist"""
201
+ try:
202
+ try:
203
+ # Check if PVC already exists
204
+ existing_pvc = self.core_v1.read_namespaced_persistent_volume_claim(
205
+ name=self.pvc_name, namespace=self.namespace
206
+ )
207
+ logger.debug(f"PVC {self.pvc_name} already exists in namespace {self.namespace}")
208
+ return existing_pvc
209
+ except ApiException as e:
210
+ if e.status != 404:
211
+ # Some other error occurred
212
+ raise
213
+
214
+ logger.info(f"Creating new PVC with name: {self.pvc_name}")
215
+
216
+ storage_class_name = self.storage_class
217
+
218
+ pvc_spec = client.V1PersistentVolumeClaimSpec(
219
+ access_modes=[self.access_mode],
220
+ resources=client.V1ResourceRequirements(requests={"storage": self.size}),
221
+ storage_class_name=storage_class_name,
222
+ )
223
+
224
+ pvc_metadata = client.V1ObjectMeta(
225
+ name=self.pvc_name,
226
+ labels={
227
+ "app": "kubetorch",
228
+ "kubetorch.com/volume": self.name,
229
+ },
230
+ annotations={"kubetorch.com/mount-path": self.mount_path},
231
+ )
232
+
233
+ pvc = client.V1PersistentVolumeClaim(
234
+ api_version="v1",
235
+ kind="PersistentVolumeClaim",
236
+ metadata=pvc_metadata,
237
+ spec=pvc_spec,
238
+ )
239
+
240
+ created_pvc = self.core_v1.create_namespaced_persistent_volume_claim(namespace=self.namespace, body=pvc)
241
+
242
+ logger.info(
243
+ f"Successfully created PVC {self.pvc_name} in namespace {self.namespace} with "
244
+ f"storage class {storage_class_name}"
245
+ )
246
+ return created_pvc
247
+
248
+ except Exception as e:
249
+ logger.error(f"Failed to create PVC {self.pvc_name}: {e}")
250
+ raise
251
+
252
+ def delete(self) -> None:
253
+ """Delete the PVC"""
254
+ try:
255
+ self.core_v1.delete_namespaced_persistent_volume_claim(name=self.pvc_name, namespace=self.namespace)
256
+ logger.debug(f"Successfully deleted PVC {self.pvc_name}")
257
+ except ApiException as e:
258
+ if e.status == 404:
259
+ logger.warning(f"PVC {self.pvc_name} not found")
260
+ else:
261
+ logger.error(f"Failed to delete PVC {self.pvc_name}: {e}")
262
+ raise
263
+
264
+ def exists(self) -> bool:
265
+ """Check if the PVC exists"""
266
+ try:
267
+ self.core_v1.read_namespaced_persistent_volume_claim(name=self.pvc_name, namespace=self.namespace)
268
+ return True
269
+ except ApiException as e:
270
+ if e.status == 404:
271
+ return False
272
+ else:
273
+ # Some other API error, re-raise
274
+ raise
275
+
276
+ def ssh(self, image: str = "alpine:latest"):
277
+ """
278
+ Launch an interactive debug shell with this volume mounted.
279
+
280
+ This method creates a temporary Kubernetes pod that mounts the
281
+ PersistentVolumeClaim (PVC) backing this Volume at the same path
282
+ (`self.mount_path`) used by Kubetorch services.
283
+
284
+ Args:
285
+ image (str, optional): Container image to use for the debug pod.
286
+ Must include a shell (e.g., `alpine:3.18`, `ubuntu:22.04`,
287
+ or a custom tools image). Defaults to `alpine:latest`.
288
+
289
+ Example:
290
+
291
+ .. code-block:: python
292
+
293
+ import kubetorch as kt
294
+
295
+ vol = kt.Volume.from_name("kt-global-cache")
296
+ vol.ssh()
297
+ """
298
+ pod_name = f"debug-{self.name}-{uuid.uuid4().hex[:6]}"
299
+
300
+ cmd = [
301
+ "kubectl",
302
+ "run",
303
+ pod_name,
304
+ "--rm",
305
+ "-it",
306
+ "--namespace",
307
+ self.namespace,
308
+ "--image",
309
+ image,
310
+ "--restart=Never",
311
+ "--overrides",
312
+ textwrap.dedent(
313
+ f"""
314
+ {{
315
+ "apiVersion": "v1",
316
+ "spec": {{
317
+ "containers": [{{
318
+ "name": "debug",
319
+ "image": "{image}",
320
+ "stdin": true,
321
+ "tty": true,
322
+ "volumeMounts": [{{
323
+ "name": "vol",
324
+ "mountPath": "{self.mount_path}"
325
+ }}]
326
+ }}],
327
+ "volumes": [{{
328
+ "name": "vol",
329
+ "persistentVolumeClaim": {{
330
+ "claimName": "{self.pvc_name}"
331
+ }}
332
+ }}]
333
+ }}
334
+ }}
335
+ """
336
+ ).strip(),
337
+ ]
338
+
339
+ # Suppress noisy "write on closed stream" error when exiting
340
+ subprocess.run(cmd, stderr=subprocess.DEVNULL)
File without changes
File without changes