kubetorch 0.2.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kubetorch/__init__.py +59 -0
- kubetorch/cli.py +1939 -0
- kubetorch/cli_utils.py +967 -0
- kubetorch/config.py +453 -0
- kubetorch/constants.py +18 -0
- kubetorch/docs/Makefile +18 -0
- kubetorch/docs/__init__.py +0 -0
- kubetorch/docs/_ext/json_globaltoc.py +42 -0
- kubetorch/docs/api/cli.rst +10 -0
- kubetorch/docs/api/python/app.rst +21 -0
- kubetorch/docs/api/python/cls.rst +19 -0
- kubetorch/docs/api/python/compute.rst +25 -0
- kubetorch/docs/api/python/config.rst +11 -0
- kubetorch/docs/api/python/fn.rst +19 -0
- kubetorch/docs/api/python/image.rst +14 -0
- kubetorch/docs/api/python/secret.rst +18 -0
- kubetorch/docs/api/python/volumes.rst +13 -0
- kubetorch/docs/api/python.rst +101 -0
- kubetorch/docs/conf.py +69 -0
- kubetorch/docs/index.rst +20 -0
- kubetorch/docs/requirements.txt +5 -0
- kubetorch/globals.py +269 -0
- kubetorch/logger.py +59 -0
- kubetorch/resources/__init__.py +0 -0
- kubetorch/resources/callables/__init__.py +0 -0
- kubetorch/resources/callables/cls/__init__.py +0 -0
- kubetorch/resources/callables/cls/cls.py +159 -0
- kubetorch/resources/callables/fn/__init__.py +0 -0
- kubetorch/resources/callables/fn/fn.py +140 -0
- kubetorch/resources/callables/module.py +1315 -0
- kubetorch/resources/callables/utils.py +203 -0
- kubetorch/resources/compute/__init__.py +0 -0
- kubetorch/resources/compute/app.py +253 -0
- kubetorch/resources/compute/compute.py +2414 -0
- kubetorch/resources/compute/decorators.py +137 -0
- kubetorch/resources/compute/utils.py +1026 -0
- kubetorch/resources/compute/websocket.py +135 -0
- kubetorch/resources/images/__init__.py +1 -0
- kubetorch/resources/images/image.py +412 -0
- kubetorch/resources/images/images.py +64 -0
- kubetorch/resources/secrets/__init__.py +2 -0
- kubetorch/resources/secrets/kubernetes_secrets_client.py +377 -0
- kubetorch/resources/secrets/provider_secrets/__init__.py +0 -0
- kubetorch/resources/secrets/provider_secrets/anthropic_secret.py +12 -0
- kubetorch/resources/secrets/provider_secrets/aws_secret.py +16 -0
- kubetorch/resources/secrets/provider_secrets/azure_secret.py +14 -0
- kubetorch/resources/secrets/provider_secrets/cohere_secret.py +12 -0
- kubetorch/resources/secrets/provider_secrets/gcp_secret.py +16 -0
- kubetorch/resources/secrets/provider_secrets/github_secret.py +13 -0
- kubetorch/resources/secrets/provider_secrets/huggingface_secret.py +20 -0
- kubetorch/resources/secrets/provider_secrets/kubeconfig_secret.py +12 -0
- kubetorch/resources/secrets/provider_secrets/lambda_secret.py +13 -0
- kubetorch/resources/secrets/provider_secrets/langchain_secret.py +12 -0
- kubetorch/resources/secrets/provider_secrets/openai_secret.py +11 -0
- kubetorch/resources/secrets/provider_secrets/pinecone_secret.py +12 -0
- kubetorch/resources/secrets/provider_secrets/providers.py +92 -0
- kubetorch/resources/secrets/provider_secrets/ssh_secret.py +12 -0
- kubetorch/resources/secrets/provider_secrets/wandb_secret.py +11 -0
- kubetorch/resources/secrets/secret.py +224 -0
- kubetorch/resources/secrets/secret_factory.py +64 -0
- kubetorch/resources/secrets/utils.py +222 -0
- kubetorch/resources/volumes/__init__.py +0 -0
- kubetorch/resources/volumes/volume.py +340 -0
- kubetorch/servers/__init__.py +0 -0
- kubetorch/servers/http/__init__.py +0 -0
- kubetorch/servers/http/distributed_utils.py +2968 -0
- kubetorch/servers/http/http_client.py +802 -0
- kubetorch/servers/http/http_server.py +1622 -0
- kubetorch/servers/http/server_metrics.py +255 -0
- kubetorch/servers/http/utils.py +722 -0
- kubetorch/serving/__init__.py +0 -0
- kubetorch/serving/autoscaling.py +153 -0
- kubetorch/serving/base_service_manager.py +344 -0
- kubetorch/serving/constants.py +77 -0
- kubetorch/serving/deployment_service_manager.py +431 -0
- kubetorch/serving/knative_service_manager.py +487 -0
- kubetorch/serving/raycluster_service_manager.py +526 -0
- kubetorch/serving/service_manager.py +18 -0
- kubetorch/serving/templates/deployment_template.yaml +17 -0
- kubetorch/serving/templates/knative_service_template.yaml +19 -0
- kubetorch/serving/templates/kt_setup_template.sh.j2 +91 -0
- kubetorch/serving/templates/pod_template.yaml +198 -0
- kubetorch/serving/templates/raycluster_service_template.yaml +42 -0
- kubetorch/serving/templates/raycluster_template.yaml +35 -0
- kubetorch/serving/templates/service_template.yaml +21 -0
- kubetorch/serving/templates/workerset_template.yaml +36 -0
- kubetorch/serving/utils.py +344 -0
- kubetorch/utils.py +263 -0
- kubetorch-0.2.5.dist-info/METADATA +75 -0
- kubetorch-0.2.5.dist-info/RECORD +92 -0
- kubetorch-0.2.5.dist-info/WHEEL +4 -0
- kubetorch-0.2.5.dist-info/entry_points.txt +5 -0
kubetorch/config.py
ADDED
|
@@ -0,0 +1,453 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from functools import cached_property
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
import yaml
|
|
6
|
+
|
|
7
|
+
from kubetorch.logger import get_logger
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
logger = get_logger(__name__)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
ENV_MAPPINGS = {
|
|
14
|
+
"username": "KT_USERNAME",
|
|
15
|
+
"license_key": "KT_LICENSE_KEY",
|
|
16
|
+
"namespace": "KT_NAMESPACE",
|
|
17
|
+
"install_namespace": "KT_INSTALL_NAMESPACE",
|
|
18
|
+
"install_url": "KT_INSTALL_URL",
|
|
19
|
+
"stream_logs": "KT_STREAM_LOGS",
|
|
20
|
+
"stream_metrics": "KT_STREAM_METRICS",
|
|
21
|
+
"log_verbosity": "KT_LOG_VERBOSITY",
|
|
22
|
+
"queue": "KT_QUEUE",
|
|
23
|
+
"volumes": "KT_VOLUMES",
|
|
24
|
+
"api_url": "KT_API_URL",
|
|
25
|
+
"cluster_config": "KT_CLUSTER_CONFIG",
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
DEFAULT_INSTALL_NAMESPACE = "kubetorch"
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class KubetorchConfig:
|
|
32
|
+
CONFIG_FILE = Path("~/.kt/config.yaml")
|
|
33
|
+
|
|
34
|
+
def __init__(self):
|
|
35
|
+
self._api_url = None
|
|
36
|
+
self._cluster_config = None
|
|
37
|
+
self._install_namespace = None
|
|
38
|
+
self._install_url = None
|
|
39
|
+
self._license_key = None
|
|
40
|
+
self._log_verbosity = None
|
|
41
|
+
self._namespace = None
|
|
42
|
+
self._queue = None
|
|
43
|
+
self._stream_logs = None
|
|
44
|
+
self._stream_metrics = None
|
|
45
|
+
self._username = None
|
|
46
|
+
self._volumes = None
|
|
47
|
+
|
|
48
|
+
@cached_property
|
|
49
|
+
def file_cache(self):
|
|
50
|
+
return self._load_from_file()
|
|
51
|
+
|
|
52
|
+
@cached_property
|
|
53
|
+
def current_context(self):
|
|
54
|
+
try:
|
|
55
|
+
from kubetorch.servers.http.utils import is_running_in_kubernetes
|
|
56
|
+
|
|
57
|
+
if is_running_in_kubernetes():
|
|
58
|
+
try:
|
|
59
|
+
with open("/var/run/secrets/kubernetes.io/serviceaccount/namespace") as f:
|
|
60
|
+
return f.read().strip()
|
|
61
|
+
except FileNotFoundError:
|
|
62
|
+
return "default"
|
|
63
|
+
|
|
64
|
+
else:
|
|
65
|
+
from kubernetes import config
|
|
66
|
+
|
|
67
|
+
from kubetorch.utils import load_kubeconfig
|
|
68
|
+
|
|
69
|
+
load_kubeconfig()
|
|
70
|
+
_, active_context = config.list_kube_config_contexts()
|
|
71
|
+
return active_context.get("context", {}).get("namespace", "default")
|
|
72
|
+
|
|
73
|
+
except Exception:
|
|
74
|
+
return "default"
|
|
75
|
+
|
|
76
|
+
@property
|
|
77
|
+
def username(self):
|
|
78
|
+
"""Username to use for Kubetorch deployments.
|
|
79
|
+
|
|
80
|
+
Used for authentication and resource naming. Will be validated to ensure Kubernetes compatibility.
|
|
81
|
+
"""
|
|
82
|
+
if not self._username:
|
|
83
|
+
if self._get_env_var("username"):
|
|
84
|
+
self._username = self._get_env_var("username")
|
|
85
|
+
else:
|
|
86
|
+
self._username = self.file_cache.get("username")
|
|
87
|
+
return self._username
|
|
88
|
+
|
|
89
|
+
@username.setter
|
|
90
|
+
def username(self, value):
|
|
91
|
+
"""Set kubetorch username for current process."""
|
|
92
|
+
from kubetorch.utils import validate_username
|
|
93
|
+
|
|
94
|
+
validated = validate_username(value)
|
|
95
|
+
if validated != value:
|
|
96
|
+
logger.info(f"Username was validated and changed to {validated} to be Kubernetes-compatible.")
|
|
97
|
+
self._username = validated
|
|
98
|
+
|
|
99
|
+
@property
|
|
100
|
+
def license_key(self):
|
|
101
|
+
"""License key for authentication and billing.
|
|
102
|
+
|
|
103
|
+
Required for usage reporting and cluster authentication.
|
|
104
|
+
Can be found in the `basic install guide <https://www.run.house/kubetorch/installation>`_.
|
|
105
|
+
"""
|
|
106
|
+
if not self._license_key:
|
|
107
|
+
if self._get_env_var("license_key"):
|
|
108
|
+
self._license_key = self._get_env_var("license_key")
|
|
109
|
+
else:
|
|
110
|
+
self._license_key = self.file_cache.get("license_key")
|
|
111
|
+
return self._license_key
|
|
112
|
+
|
|
113
|
+
@license_key.setter
|
|
114
|
+
def license_key(self, value: str):
|
|
115
|
+
"""Set kubetorch license key for current process."""
|
|
116
|
+
self._license_key = value
|
|
117
|
+
|
|
118
|
+
@property
|
|
119
|
+
def queue(self):
|
|
120
|
+
"""Default queue name for scheduling services.
|
|
121
|
+
|
|
122
|
+
Controls how cluster resources are allocated and prioritized for services.
|
|
123
|
+
See `scheduling and queues <https://www.run.house/kubetorch/advanced-installation#scheduling-and-queues>`_ for more info.
|
|
124
|
+
"""
|
|
125
|
+
if not self._queue:
|
|
126
|
+
if self._get_env_var("queue"):
|
|
127
|
+
self._queue = self._get_env_var("queue")
|
|
128
|
+
else:
|
|
129
|
+
self._queue = self.file_cache.get("queue")
|
|
130
|
+
return self._queue
|
|
131
|
+
|
|
132
|
+
@queue.setter
|
|
133
|
+
def queue(self, value: str):
|
|
134
|
+
self._queue = value
|
|
135
|
+
|
|
136
|
+
@property
|
|
137
|
+
def volumes(self):
|
|
138
|
+
if not self._volumes:
|
|
139
|
+
if self._get_env_var("volumes"):
|
|
140
|
+
self._volumes = self._get_env_var("volumes")
|
|
141
|
+
else:
|
|
142
|
+
self._volumes = self.file_cache.get("volumes")
|
|
143
|
+
return self._volumes
|
|
144
|
+
|
|
145
|
+
@volumes.setter
|
|
146
|
+
def volumes(self, values):
|
|
147
|
+
if values is None or values == "None":
|
|
148
|
+
self._volumes = None
|
|
149
|
+
elif isinstance(values, str):
|
|
150
|
+
# Handle comma-separated string
|
|
151
|
+
self._volumes = [v.strip() for v in values.split(",") if v.strip()]
|
|
152
|
+
elif isinstance(values, list):
|
|
153
|
+
self._volumes = values
|
|
154
|
+
else:
|
|
155
|
+
raise ValueError("volumes must be a list of strings or comma-separated string")
|
|
156
|
+
|
|
157
|
+
@property
|
|
158
|
+
def api_url(self):
|
|
159
|
+
if not self._api_url:
|
|
160
|
+
if self._get_env_var("api_url"):
|
|
161
|
+
self._api_url = self._get_env_var("api_url")
|
|
162
|
+
else:
|
|
163
|
+
self._api_url = self.file_cache.get("api_url")
|
|
164
|
+
return self._api_url
|
|
165
|
+
|
|
166
|
+
@api_url.setter
|
|
167
|
+
def api_url(self, value: str):
|
|
168
|
+
self._api_url = value
|
|
169
|
+
|
|
170
|
+
@property
|
|
171
|
+
def namespace(self):
|
|
172
|
+
"""Default Kubernetes namespace for Kubetorch deployments.
|
|
173
|
+
|
|
174
|
+
All services will be deployed to this namespace unless overridden in the
|
|
175
|
+
Compute resource constructor. If `install_namespace` is set, it will override this namespace.
|
|
176
|
+
|
|
177
|
+
Priority:
|
|
178
|
+
1. Explicit override
|
|
179
|
+
2. Environment variable
|
|
180
|
+
3. File cache
|
|
181
|
+
4. In-cluster namespace or kubeconfig current context
|
|
182
|
+
"""
|
|
183
|
+
if self.install_namespace and self.install_namespace != DEFAULT_INSTALL_NAMESPACE:
|
|
184
|
+
self._namespace = self.install_namespace
|
|
185
|
+
elif self._namespace is None:
|
|
186
|
+
ns = self._get_env_var("namespace") or self.file_cache.get("namespace")
|
|
187
|
+
self._namespace = ns or self.current_context
|
|
188
|
+
return self._namespace
|
|
189
|
+
|
|
190
|
+
@namespace.setter
|
|
191
|
+
def namespace(self, value):
|
|
192
|
+
"""Set namespace for current process."""
|
|
193
|
+
self._namespace = value
|
|
194
|
+
|
|
195
|
+
@property
|
|
196
|
+
def install_namespace(self):
|
|
197
|
+
"""Namespace for Kubetorch installation. Used for Kubetorch Cloud clients.
|
|
198
|
+
|
|
199
|
+
Priority:
|
|
200
|
+
1. Explicit override
|
|
201
|
+
2. Environment variable
|
|
202
|
+
3. File cache
|
|
203
|
+
4. Default install namespace
|
|
204
|
+
"""
|
|
205
|
+
if self._install_namespace is None:
|
|
206
|
+
ns = self._get_env_var("install_namespace") or self.file_cache.get("install_namespace")
|
|
207
|
+
self._install_namespace = ns or DEFAULT_INSTALL_NAMESPACE
|
|
208
|
+
return self._install_namespace
|
|
209
|
+
|
|
210
|
+
@install_namespace.setter
|
|
211
|
+
def install_namespace(self, value):
|
|
212
|
+
"""Set installnamespace for current process."""
|
|
213
|
+
self._install_namespace = value
|
|
214
|
+
|
|
215
|
+
@property
|
|
216
|
+
def install_url(self):
|
|
217
|
+
"""URL of the Kubetorch version to install.
|
|
218
|
+
|
|
219
|
+
Used when installing Kubetorch in a Docker image or remote environment.
|
|
220
|
+
Can be found in the `basic install guide <https://www.run.house/kubetorch/installation>`_.
|
|
221
|
+
"""
|
|
222
|
+
if self._install_url is None:
|
|
223
|
+
if self._get_env_var("install_url"):
|
|
224
|
+
self._install_url = self._get_env_var("install_url")
|
|
225
|
+
else:
|
|
226
|
+
self._install_url = self.file_cache.get("install_url")
|
|
227
|
+
return self._install_url
|
|
228
|
+
|
|
229
|
+
@install_url.setter
|
|
230
|
+
def install_url(self, value):
|
|
231
|
+
"""Set default kubetorch install url in current process."""
|
|
232
|
+
self._install_url = value
|
|
233
|
+
|
|
234
|
+
@property
|
|
235
|
+
def log_verbosity(self):
|
|
236
|
+
"""Verbosity of logs streamed from a remote deployment.
|
|
237
|
+
Log levels include ``debug``, ``info``, and ``critical``. Default is ``info``.
|
|
238
|
+
|
|
239
|
+
Note:
|
|
240
|
+
Only relevant when ``stream_logs`` is set to ``true``.
|
|
241
|
+
"""
|
|
242
|
+
from kubetorch.utils import LogVerbosity
|
|
243
|
+
|
|
244
|
+
default_verbosity = LogVerbosity.INFO.value
|
|
245
|
+
|
|
246
|
+
if self._log_verbosity is None:
|
|
247
|
+
verbosity_env_var = self._get_env_var("log_verbosity")
|
|
248
|
+
if verbosity_env_var:
|
|
249
|
+
try:
|
|
250
|
+
verbosity_env_var = LogVerbosity(verbosity_env_var).value
|
|
251
|
+
except ValueError:
|
|
252
|
+
verbosity_env_var = default_verbosity
|
|
253
|
+
|
|
254
|
+
self._log_verbosity = verbosity_env_var
|
|
255
|
+
else:
|
|
256
|
+
self._log_verbosity = self.file_cache.get("log_verbosity", default_verbosity)
|
|
257
|
+
|
|
258
|
+
return self._log_verbosity
|
|
259
|
+
|
|
260
|
+
@log_verbosity.setter
|
|
261
|
+
def log_verbosity(self, value):
|
|
262
|
+
"""Set log verbosity."""
|
|
263
|
+
from kubetorch.utils import LogVerbosity
|
|
264
|
+
|
|
265
|
+
try:
|
|
266
|
+
# In case we are unsetting log_verbosity, None is a valid value
|
|
267
|
+
verbosity = LogVerbosity(value).value if value else None
|
|
268
|
+
except ValueError:
|
|
269
|
+
raise ValueError("Invalid log verbosity value. Must be one of: 'debug', 'info', 'critical'.")
|
|
270
|
+
|
|
271
|
+
self._log_verbosity = verbosity
|
|
272
|
+
|
|
273
|
+
@property
|
|
274
|
+
def stream_logs(self):
|
|
275
|
+
"""Whether to stream logs for Kubetorch services.
|
|
276
|
+
|
|
277
|
+
When enabled, logs from remote services are streamed back to your local environment
|
|
278
|
+
in real-time. Verbosity of the streamed logs can be controlled with ``log_verbosity``.
|
|
279
|
+
Default is ``True``
|
|
280
|
+
|
|
281
|
+
When disabled, logs remain accessible in-cluster but are not streamed to the client.
|
|
282
|
+
|
|
283
|
+
Note:
|
|
284
|
+
Requires logging to be configured in the cluster (`logStreaming.enabled: true`` in the Helm chart)
|
|
285
|
+
"""
|
|
286
|
+
if self._stream_logs is None:
|
|
287
|
+
if self._get_env_var("stream_logs"):
|
|
288
|
+
self._stream_logs = self._get_env_var("stream_logs").lower() == "true"
|
|
289
|
+
else:
|
|
290
|
+
self._stream_logs = self.file_cache.get("stream_logs", True) # Default to True
|
|
291
|
+
return self._stream_logs
|
|
292
|
+
|
|
293
|
+
@stream_logs.setter
|
|
294
|
+
def stream_logs(self, value):
|
|
295
|
+
"""Set log streaming for current process."""
|
|
296
|
+
from kubetorch.serving.utils import check_loki_enabled
|
|
297
|
+
|
|
298
|
+
bool_value = value
|
|
299
|
+
|
|
300
|
+
if not isinstance(value, bool):
|
|
301
|
+
if value is None:
|
|
302
|
+
pass # case we are unsetting stream_logs, so None is a valid value
|
|
303
|
+
elif isinstance(value, str) and value.lower() in ["true", "false"]:
|
|
304
|
+
bool_value = value.lower() == "true"
|
|
305
|
+
else:
|
|
306
|
+
raise ValueError("stream_logs must be a boolean value")
|
|
307
|
+
if bool_value:
|
|
308
|
+
# Check if the cluster has loki enabled
|
|
309
|
+
if not check_loki_enabled():
|
|
310
|
+
raise ValueError(
|
|
311
|
+
"Log streaming is not enabled in the cluster. Set `stream_logs` to False or "
|
|
312
|
+
"re-install the Kubetorch Helm chart with `logStreaming.enabled = true`"
|
|
313
|
+
)
|
|
314
|
+
self._stream_logs = bool_value
|
|
315
|
+
|
|
316
|
+
@property
|
|
317
|
+
def stream_metrics(self):
|
|
318
|
+
"""Whether to stream metrics during execution of Kubetorch services.
|
|
319
|
+
|
|
320
|
+
When enabled, real-time CPU, memory, and GPU utilization metrics from remote Kubetorch services
|
|
321
|
+
are streamed back to the local environment for live monitoring.
|
|
322
|
+
Default is ``True``.
|
|
323
|
+
|
|
324
|
+
When disabled, metrics are not collected.
|
|
325
|
+
|
|
326
|
+
Note:
|
|
327
|
+
Requires monitoring to be configured in the cluster (`metrics.enabled: true`` in the Helm chart)
|
|
328
|
+
"""
|
|
329
|
+
if self._stream_metrics is None:
|
|
330
|
+
if self._get_env_var("stream_metrics"):
|
|
331
|
+
self._stream_metrics = self._get_env_var("stream_metrics").lower() == "true"
|
|
332
|
+
else:
|
|
333
|
+
self._stream_metrics = self.file_cache.get("stream_metrics", True) # Default to True
|
|
334
|
+
return self._stream_metrics
|
|
335
|
+
|
|
336
|
+
@stream_metrics.setter
|
|
337
|
+
def stream_metrics(self, value):
|
|
338
|
+
"""Set metrics streaming for current process."""
|
|
339
|
+
from kubetorch.serving.utils import check_prometheus_enabled
|
|
340
|
+
|
|
341
|
+
bool_value = value
|
|
342
|
+
|
|
343
|
+
if not isinstance(value, bool):
|
|
344
|
+
if value is None:
|
|
345
|
+
pass # case we are unsetting stream_metrics, so None is a valid value
|
|
346
|
+
elif isinstance(value, str) and value.lower() in ["true", "false"]:
|
|
347
|
+
bool_value = value.lower() == "true"
|
|
348
|
+
else:
|
|
349
|
+
raise ValueError("stream_metrics must be a boolean value")
|
|
350
|
+
if bool_value:
|
|
351
|
+
# Check if the cluster has prometheus enabled
|
|
352
|
+
if not check_prometheus_enabled():
|
|
353
|
+
raise ValueError(
|
|
354
|
+
"Metrics is not enabled in the cluster. Set `stream_metrics` to False or "
|
|
355
|
+
"re-install the Kubetorch Helm chart with `metrics.enabled = true`"
|
|
356
|
+
)
|
|
357
|
+
self._stream_metrics = bool_value
|
|
358
|
+
|
|
359
|
+
@property
|
|
360
|
+
def cluster_config(self):
|
|
361
|
+
"""Cluster Config.
|
|
362
|
+
Default is ``{}``.
|
|
363
|
+
"""
|
|
364
|
+
from kubetorch.utils import string_to_dict
|
|
365
|
+
|
|
366
|
+
config = self._cluster_config
|
|
367
|
+
if self._cluster_config is None:
|
|
368
|
+
config = string_to_dict(self._get_env_var("cluster_config") or "")
|
|
369
|
+
if not config:
|
|
370
|
+
config = string_to_dict(self.file_cache.get("cluster_config", "{}"))
|
|
371
|
+
self._cluster_config = config
|
|
372
|
+
return config
|
|
373
|
+
|
|
374
|
+
@cluster_config.setter
|
|
375
|
+
def cluster_config(self, value):
|
|
376
|
+
"""Set Cluster Config."""
|
|
377
|
+
from kubetorch.utils import string_to_dict
|
|
378
|
+
|
|
379
|
+
new_value = value
|
|
380
|
+
if not isinstance(new_value, dict):
|
|
381
|
+
if isinstance(new_value, str):
|
|
382
|
+
new_value = string_to_dict(new_value)
|
|
383
|
+
else:
|
|
384
|
+
new_value = {} # Default to empty dict
|
|
385
|
+
self._cluster_config = new_value
|
|
386
|
+
|
|
387
|
+
def __iter__(self):
|
|
388
|
+
for key in ENV_MAPPINGS:
|
|
389
|
+
value = getattr(self, key)
|
|
390
|
+
if value == "None":
|
|
391
|
+
value = None
|
|
392
|
+
yield key, value
|
|
393
|
+
|
|
394
|
+
def set(self, key, value):
|
|
395
|
+
if key not in ENV_MAPPINGS:
|
|
396
|
+
raise ValueError(f"Unknown config key: {key}")
|
|
397
|
+
setattr(self, key, value)
|
|
398
|
+
# if key is 'username' and value is None (unsetting username), we'll get the cached username,and not the
|
|
399
|
+
# new value
|
|
400
|
+
new_value = value if value is None else getattr(self, key)
|
|
401
|
+
return new_value
|
|
402
|
+
|
|
403
|
+
def get(self, key):
|
|
404
|
+
if key not in ENV_MAPPINGS:
|
|
405
|
+
raise ValueError(f"Unknown config key: {key}")
|
|
406
|
+
return getattr(self, key)
|
|
407
|
+
|
|
408
|
+
def write(self, values: dict = None):
|
|
409
|
+
"""Write out config to local ``~/.kt/config.yaml``, to be used globally.
|
|
410
|
+
|
|
411
|
+
Args:
|
|
412
|
+
values (optional): Dict of key-value pairs to write/update in the filesystem.
|
|
413
|
+
If provided, only these keys will be updated. None values will remove the key from the file.
|
|
414
|
+
"""
|
|
415
|
+
# Ensure directory exists
|
|
416
|
+
self.CONFIG_FILE.expanduser().parent.mkdir(parents=True, exist_ok=True)
|
|
417
|
+
|
|
418
|
+
if values:
|
|
419
|
+
values_to_write = self._load_from_file()
|
|
420
|
+
for k, v in values.items():
|
|
421
|
+
if k not in ENV_MAPPINGS:
|
|
422
|
+
raise ValueError(f"Unknown config key: {k}")
|
|
423
|
+
if v is None:
|
|
424
|
+
values_to_write.pop(k, None)
|
|
425
|
+
else:
|
|
426
|
+
values_to_write[k] = str(v) if isinstance(v, dict) else v
|
|
427
|
+
else:
|
|
428
|
+
values_to_write = {k: str(v) if isinstance(v, dict) else v for k, v in dict(self).items() if v is not None}
|
|
429
|
+
|
|
430
|
+
# Write to file
|
|
431
|
+
with self.CONFIG_FILE.expanduser().open("w") as stream:
|
|
432
|
+
yaml.safe_dump(values_to_write, stream)
|
|
433
|
+
|
|
434
|
+
# Invalidate file cache so it reloads on next access
|
|
435
|
+
if "file_cache" in self.__dict__:
|
|
436
|
+
del self.__dict__["file_cache"]
|
|
437
|
+
|
|
438
|
+
def _get_env_var(self, key):
|
|
439
|
+
return os.getenv(ENV_MAPPINGS[key])
|
|
440
|
+
|
|
441
|
+
def _get_config_env_vars(self):
|
|
442
|
+
"""Get config values as environment variables with proper KT_ prefixes."""
|
|
443
|
+
env_vars = {}
|
|
444
|
+
for key, value in dict(self).items():
|
|
445
|
+
if value is not None and key in ENV_MAPPINGS:
|
|
446
|
+
env_vars[ENV_MAPPINGS[key]] = value
|
|
447
|
+
return env_vars
|
|
448
|
+
|
|
449
|
+
def _load_from_file(self):
|
|
450
|
+
if self.CONFIG_FILE.expanduser().exists():
|
|
451
|
+
with open(self.CONFIG_FILE.expanduser(), "r") as stream:
|
|
452
|
+
return yaml.safe_load(stream) or {}
|
|
453
|
+
return {}
|
kubetorch/constants.py
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
LOCALHOST: str = "127.0.0.1"
|
|
2
|
+
DEFAULT_KUBECONFIG_PATH = "~/.kube/config"
|
|
3
|
+
MAX_PORT_TRIES = 10
|
|
4
|
+
|
|
5
|
+
# CLI constants
|
|
6
|
+
DOUBLE_SPACE_UNICODE = "\u00A0\u00A0"
|
|
7
|
+
BULLET_UNICODE = "\u2022"
|
|
8
|
+
|
|
9
|
+
MAX_USERNAME_LENGTH = 16
|
|
10
|
+
|
|
11
|
+
CPU_RATE = 0.01
|
|
12
|
+
GPU_RATE = 0.05
|
|
13
|
+
|
|
14
|
+
KT_MOUNT_FOLDER = "ktfs"
|
|
15
|
+
DEFAULT_VOLUME_ACCESS_MODE = "ReadWriteMany"
|
|
16
|
+
|
|
17
|
+
DASHBOARD_PORT = 3001
|
|
18
|
+
GRAFANA_PORT = 3000
|
kubetorch/docs/Makefile
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
# Makefile for Sphinx documentation
|
|
2
|
+
|
|
3
|
+
# You can override these from the command line:
|
|
4
|
+
SPHINXBUILD ?= sphinx-build
|
|
5
|
+
SPHINXOPTS ?=
|
|
6
|
+
SOURCEDIR = .
|
|
7
|
+
BUILDDIR = _build
|
|
8
|
+
|
|
9
|
+
# Default target
|
|
10
|
+
help:
|
|
11
|
+
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS)
|
|
12
|
+
|
|
13
|
+
json:
|
|
14
|
+
@$(SPHINXBUILD) -M json "$(SOURCEDIR)" "$(BUILDDIR)" -b json -t json
|
|
15
|
+
|
|
16
|
+
# Catch-all: route targets like `make html`, `make latexpdf`, etc.
|
|
17
|
+
%:
|
|
18
|
+
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS)
|
|
File without changes
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
from typing import Any, Dict
|
|
2
|
+
|
|
3
|
+
from sphinx.application import Sphinx
|
|
4
|
+
from sphinx.environment.adapters.toctree import TocTree
|
|
5
|
+
from sphinxcontrib.serializinghtml import JSONHTMLBuilder
|
|
6
|
+
|
|
7
|
+
__version__ = "0.0.1"
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def setup(app: Sphinx) -> Dict[str, Any]:
|
|
11
|
+
app.add_builder(SphinxGlobalTOCJSONHTMLBuilder, override=True)
|
|
12
|
+
|
|
13
|
+
return {"version": __version__, "parallel_read_safe": True}
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class SphinxGlobalTOCJSONHTMLBuilder(JSONHTMLBuilder):
|
|
17
|
+
|
|
18
|
+
name: str = "json"
|
|
19
|
+
|
|
20
|
+
def get_doc_context(self, docname: str, body: str, metatags: str) -> Dict[str, Any]:
|
|
21
|
+
"""
|
|
22
|
+
Extends :py:class:`sphinxcontrib.serializinghtml.JSONHTMLBuilder`.
|
|
23
|
+
|
|
24
|
+
Add a ``globaltoc`` key to our document that contains the HTML for the
|
|
25
|
+
global table of contents.
|
|
26
|
+
|
|
27
|
+
Note:
|
|
28
|
+
|
|
29
|
+
We're rendering the **full global toc** for the entire documentation
|
|
30
|
+
set into every page. We do this to easily render the toc on each
|
|
31
|
+
page and allow for a unique toc for each branch and repo version.
|
|
32
|
+
"""
|
|
33
|
+
doc = super().get_doc_context(docname, body, metatags)
|
|
34
|
+
# Get the entire doctree. It is the 3rd argument (``collapse``) that
|
|
35
|
+
# does this. If you set that to ``True`` you will only get the submenu
|
|
36
|
+
# HTML included if you are on a page that is within that submenu.
|
|
37
|
+
self_toctree = TocTree(self.env).get_toctree_for(
|
|
38
|
+
"index", self, False, titles_only=True, includehidden=False, maxdepth=2
|
|
39
|
+
)
|
|
40
|
+
toctree = self.render_partial(self_toctree)["fragment"]
|
|
41
|
+
doc["globaltoc"] = toctree
|
|
42
|
+
return doc
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
Command Line Interface
|
|
2
|
+
----------------------
|
|
3
|
+
|
|
4
|
+
Kubetorch offers a rich set of commands to offer you insight into running workloads at the individual and cluster level.
|
|
5
|
+
For more details on the inputs, you can run ``kt <method> --help``.
|
|
6
|
+
|
|
7
|
+
.. automodule:: kubetorch.cli
|
|
8
|
+
:members:
|
|
9
|
+
:show-inheritance:
|
|
10
|
+
:exclude-members: kt_logs, kt_billing, kt_queues, kt_metrics, kt_dashboard, kt_status, kt_docs
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
App
|
|
2
|
+
===
|
|
3
|
+
|
|
4
|
+
The ``App`` class wraps a Python CLI command. It syncs over the file and any necessary requirements to the specified
|
|
5
|
+
compute, where it runs your file remotely. The file can be any Python file: a basic training script, a script that
|
|
6
|
+
uses kubetorch to deploy further services, or a FastAPI app.
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
Factory Method
|
|
10
|
+
~~~~~~~~~~~~~~
|
|
11
|
+
|
|
12
|
+
.. autofunction:: kubetorch.app
|
|
13
|
+
|
|
14
|
+
App Class
|
|
15
|
+
~~~~~~~~~
|
|
16
|
+
|
|
17
|
+
.. autoclass:: kubetorch.App
|
|
18
|
+
:members:
|
|
19
|
+
:exclude-members: from_name
|
|
20
|
+
|
|
21
|
+
.. automethod:: __init__
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
Cls
|
|
2
|
+
===
|
|
3
|
+
|
|
4
|
+
The ``Cls`` class is a wrapper around your local Python classes. It can be sent to and live remotely on your compute,
|
|
5
|
+
then be called natively in Python from your local environment, while being run on remote compute.
|
|
6
|
+
|
|
7
|
+
Factory Method
|
|
8
|
+
~~~~~~~~~~~~~~
|
|
9
|
+
|
|
10
|
+
.. autofunction:: kubetorch.cls
|
|
11
|
+
|
|
12
|
+
Class
|
|
13
|
+
~~~~~
|
|
14
|
+
|
|
15
|
+
.. autoclass:: kubetorch.Cls
|
|
16
|
+
:members:
|
|
17
|
+
:inherited-members:
|
|
18
|
+
|
|
19
|
+
.. automethod:: __init__
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
Compute
|
|
2
|
+
=======
|
|
3
|
+
|
|
4
|
+
The ``Compute`` class lets you specify the right resources to request for your workloads, and control how that compute
|
|
5
|
+
behaves.
|
|
6
|
+
|
|
7
|
+
Compute Class
|
|
8
|
+
~~~~~~~~~~~~~~
|
|
9
|
+
|
|
10
|
+
.. autoclass:: kubetorch.Compute
|
|
11
|
+
:members:
|
|
12
|
+
:exclude-members: autoscale, distribute
|
|
13
|
+
|
|
14
|
+
.. automethod:: __init__
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
Autoscaling
|
|
18
|
+
~~~~~~~~~~~
|
|
19
|
+
|
|
20
|
+
.. automethod:: kubetorch.Compute.autoscale
|
|
21
|
+
|
|
22
|
+
Distributed
|
|
23
|
+
~~~~~~~~~~~
|
|
24
|
+
|
|
25
|
+
.. automethod:: kubetorch.Compute.distribute
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
Config
|
|
2
|
+
=======
|
|
3
|
+
|
|
4
|
+
Certain configuration settings can be set globally for Kubetorch, such as a unique username, default namespace, or
|
|
5
|
+
installation url to use. More options to be added soon.
|
|
6
|
+
|
|
7
|
+
Config Class
|
|
8
|
+
~~~~~~~~~~~~~~
|
|
9
|
+
|
|
10
|
+
.. autoclass:: kubetorch.config.KubetorchConfig
|
|
11
|
+
:members:
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
Fn
|
|
2
|
+
===
|
|
3
|
+
|
|
4
|
+
The ``Fn`` class is a wrapper around your local Python functions. It can be sent to and live remotely on your compute,
|
|
5
|
+
then be called natively in Python from your local environment, while being run on remote compute.
|
|
6
|
+
|
|
7
|
+
Factory Method
|
|
8
|
+
~~~~~~~~~~~~~~
|
|
9
|
+
|
|
10
|
+
.. autofunction:: kubetorch.fn
|
|
11
|
+
|
|
12
|
+
Class
|
|
13
|
+
~~~~~
|
|
14
|
+
|
|
15
|
+
.. autoclass:: kubetorch.Fn
|
|
16
|
+
:members:
|
|
17
|
+
:inherited-members:
|
|
18
|
+
|
|
19
|
+
.. automethod:: __init__
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
Image
|
|
2
|
+
=====
|
|
3
|
+
|
|
4
|
+
The ``Image`` class, which lets you specify a pre-built base Image to use
|
|
5
|
+
at launch time, as well as additional setup steps required for your program, such as installs and env vars.
|
|
6
|
+
|
|
7
|
+
Image Class
|
|
8
|
+
~~~~~~~~~~~
|
|
9
|
+
|
|
10
|
+
.. autoclass:: kubetorch.Image
|
|
11
|
+
:members:
|
|
12
|
+
:undoc-members:
|
|
13
|
+
|
|
14
|
+
.. automethod:: __init__
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
Secret
|
|
2
|
+
======
|
|
3
|
+
|
|
4
|
+
Secrets such as provider keys and environment variables can be set when defining compute. These are set at launch time
|
|
5
|
+
and accessible during the scope of your program.
|
|
6
|
+
|
|
7
|
+
Factory Method
|
|
8
|
+
~~~~~~~~~~~~~~
|
|
9
|
+
|
|
10
|
+
.. autofunction:: kubetorch.secret
|
|
11
|
+
|
|
12
|
+
Class
|
|
13
|
+
~~~~~
|
|
14
|
+
|
|
15
|
+
.. autoclass:: kubetorch.Secret
|
|
16
|
+
:members:
|
|
17
|
+
|
|
18
|
+
.. automethod:: __init__
|