kubetorch 0.2.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kubetorch/__init__.py +59 -0
- kubetorch/cli.py +1939 -0
- kubetorch/cli_utils.py +967 -0
- kubetorch/config.py +453 -0
- kubetorch/constants.py +18 -0
- kubetorch/docs/Makefile +18 -0
- kubetorch/docs/__init__.py +0 -0
- kubetorch/docs/_ext/json_globaltoc.py +42 -0
- kubetorch/docs/api/cli.rst +10 -0
- kubetorch/docs/api/python/app.rst +21 -0
- kubetorch/docs/api/python/cls.rst +19 -0
- kubetorch/docs/api/python/compute.rst +25 -0
- kubetorch/docs/api/python/config.rst +11 -0
- kubetorch/docs/api/python/fn.rst +19 -0
- kubetorch/docs/api/python/image.rst +14 -0
- kubetorch/docs/api/python/secret.rst +18 -0
- kubetorch/docs/api/python/volumes.rst +13 -0
- kubetorch/docs/api/python.rst +101 -0
- kubetorch/docs/conf.py +69 -0
- kubetorch/docs/index.rst +20 -0
- kubetorch/docs/requirements.txt +5 -0
- kubetorch/globals.py +269 -0
- kubetorch/logger.py +59 -0
- kubetorch/resources/__init__.py +0 -0
- kubetorch/resources/callables/__init__.py +0 -0
- kubetorch/resources/callables/cls/__init__.py +0 -0
- kubetorch/resources/callables/cls/cls.py +159 -0
- kubetorch/resources/callables/fn/__init__.py +0 -0
- kubetorch/resources/callables/fn/fn.py +140 -0
- kubetorch/resources/callables/module.py +1315 -0
- kubetorch/resources/callables/utils.py +203 -0
- kubetorch/resources/compute/__init__.py +0 -0
- kubetorch/resources/compute/app.py +253 -0
- kubetorch/resources/compute/compute.py +2414 -0
- kubetorch/resources/compute/decorators.py +137 -0
- kubetorch/resources/compute/utils.py +1026 -0
- kubetorch/resources/compute/websocket.py +135 -0
- kubetorch/resources/images/__init__.py +1 -0
- kubetorch/resources/images/image.py +412 -0
- kubetorch/resources/images/images.py +64 -0
- kubetorch/resources/secrets/__init__.py +2 -0
- kubetorch/resources/secrets/kubernetes_secrets_client.py +377 -0
- kubetorch/resources/secrets/provider_secrets/__init__.py +0 -0
- kubetorch/resources/secrets/provider_secrets/anthropic_secret.py +12 -0
- kubetorch/resources/secrets/provider_secrets/aws_secret.py +16 -0
- kubetorch/resources/secrets/provider_secrets/azure_secret.py +14 -0
- kubetorch/resources/secrets/provider_secrets/cohere_secret.py +12 -0
- kubetorch/resources/secrets/provider_secrets/gcp_secret.py +16 -0
- kubetorch/resources/secrets/provider_secrets/github_secret.py +13 -0
- kubetorch/resources/secrets/provider_secrets/huggingface_secret.py +20 -0
- kubetorch/resources/secrets/provider_secrets/kubeconfig_secret.py +12 -0
- kubetorch/resources/secrets/provider_secrets/lambda_secret.py +13 -0
- kubetorch/resources/secrets/provider_secrets/langchain_secret.py +12 -0
- kubetorch/resources/secrets/provider_secrets/openai_secret.py +11 -0
- kubetorch/resources/secrets/provider_secrets/pinecone_secret.py +12 -0
- kubetorch/resources/secrets/provider_secrets/providers.py +92 -0
- kubetorch/resources/secrets/provider_secrets/ssh_secret.py +12 -0
- kubetorch/resources/secrets/provider_secrets/wandb_secret.py +11 -0
- kubetorch/resources/secrets/secret.py +224 -0
- kubetorch/resources/secrets/secret_factory.py +64 -0
- kubetorch/resources/secrets/utils.py +222 -0
- kubetorch/resources/volumes/__init__.py +0 -0
- kubetorch/resources/volumes/volume.py +340 -0
- kubetorch/servers/__init__.py +0 -0
- kubetorch/servers/http/__init__.py +0 -0
- kubetorch/servers/http/distributed_utils.py +2968 -0
- kubetorch/servers/http/http_client.py +802 -0
- kubetorch/servers/http/http_server.py +1622 -0
- kubetorch/servers/http/server_metrics.py +255 -0
- kubetorch/servers/http/utils.py +722 -0
- kubetorch/serving/__init__.py +0 -0
- kubetorch/serving/autoscaling.py +153 -0
- kubetorch/serving/base_service_manager.py +344 -0
- kubetorch/serving/constants.py +77 -0
- kubetorch/serving/deployment_service_manager.py +431 -0
- kubetorch/serving/knative_service_manager.py +487 -0
- kubetorch/serving/raycluster_service_manager.py +526 -0
- kubetorch/serving/service_manager.py +18 -0
- kubetorch/serving/templates/deployment_template.yaml +17 -0
- kubetorch/serving/templates/knative_service_template.yaml +19 -0
- kubetorch/serving/templates/kt_setup_template.sh.j2 +91 -0
- kubetorch/serving/templates/pod_template.yaml +198 -0
- kubetorch/serving/templates/raycluster_service_template.yaml +42 -0
- kubetorch/serving/templates/raycluster_template.yaml +35 -0
- kubetorch/serving/templates/service_template.yaml +21 -0
- kubetorch/serving/templates/workerset_template.yaml +36 -0
- kubetorch/serving/utils.py +344 -0
- kubetorch/utils.py +263 -0
- kubetorch-0.2.5.dist-info/METADATA +75 -0
- kubetorch-0.2.5.dist-info/RECORD +92 -0
- kubetorch-0.2.5.dist-info/WHEEL +4 -0
- kubetorch-0.2.5.dist-info/entry_points.txt +5 -0
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
import random
|
|
2
|
+
import socket
|
|
3
|
+
import threading
|
|
4
|
+
import time
|
|
5
|
+
|
|
6
|
+
import websocket
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class WebSocketRsyncTunnel:
|
|
10
|
+
def __init__(self, local_port: int, ws_url: str):
|
|
11
|
+
self.requested_port = local_port
|
|
12
|
+
self.local_port = None # Will be set in __enter__
|
|
13
|
+
self.ws_url = ws_url
|
|
14
|
+
self.running = False
|
|
15
|
+
self.server_socket = None
|
|
16
|
+
|
|
17
|
+
def __enter__(self):
|
|
18
|
+
self.running = True
|
|
19
|
+
|
|
20
|
+
# Add randomization to reduce concurrent collision probability
|
|
21
|
+
# Try multiple times with different socket instances
|
|
22
|
+
max_attempts = 20
|
|
23
|
+
port_range = 100 # Much wider range to avoid collisions
|
|
24
|
+
|
|
25
|
+
for attempt in range(max_attempts):
|
|
26
|
+
# Add random offset to spread out concurrent requests
|
|
27
|
+
random_offset = random.randint(0, 50)
|
|
28
|
+
start_port = self.requested_port + random_offset
|
|
29
|
+
|
|
30
|
+
# Create a new socket for each attempt
|
|
31
|
+
server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
|
32
|
+
server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
|
|
33
|
+
|
|
34
|
+
# Try ports in this range
|
|
35
|
+
for i in range(port_range):
|
|
36
|
+
port = start_port + i
|
|
37
|
+
try:
|
|
38
|
+
server_socket.bind(("127.0.0.1", port))
|
|
39
|
+
server_socket.listen(5)
|
|
40
|
+
# Success! Save the socket and port
|
|
41
|
+
self.server_socket = server_socket
|
|
42
|
+
self.local_port = port
|
|
43
|
+
break
|
|
44
|
+
except OSError as e:
|
|
45
|
+
if e.errno == 98: # Address already in use
|
|
46
|
+
continue
|
|
47
|
+
else:
|
|
48
|
+
server_socket.close()
|
|
49
|
+
raise
|
|
50
|
+
else:
|
|
51
|
+
# No ports available in this range, close socket and try again
|
|
52
|
+
server_socket.close()
|
|
53
|
+
# Add small random delay to reduce simultaneous collision probability
|
|
54
|
+
time.sleep(random.uniform(0.01, 0.05))
|
|
55
|
+
continue
|
|
56
|
+
|
|
57
|
+
# Websocket server successfully bound and is listening
|
|
58
|
+
break
|
|
59
|
+
else:
|
|
60
|
+
raise RuntimeError(
|
|
61
|
+
f"Could not find available port after {max_attempts} attempts starting from {self.requested_port}"
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
threading.Thread(target=self._accept_loop, daemon=True).start()
|
|
65
|
+
|
|
66
|
+
# Wait for ready
|
|
67
|
+
for _ in range(50):
|
|
68
|
+
try:
|
|
69
|
+
with socket.socket() as s:
|
|
70
|
+
s.settimeout(0.1)
|
|
71
|
+
if s.connect_ex(("127.0.0.1", self.local_port)) == 0:
|
|
72
|
+
return self
|
|
73
|
+
except:
|
|
74
|
+
pass
|
|
75
|
+
time.sleep(0.1)
|
|
76
|
+
raise RuntimeError("Tunnel failed to start")
|
|
77
|
+
|
|
78
|
+
def __exit__(self, *args):
|
|
79
|
+
self.running = False
|
|
80
|
+
if self.server_socket:
|
|
81
|
+
try:
|
|
82
|
+
self.server_socket.close()
|
|
83
|
+
except:
|
|
84
|
+
pass # Already closed
|
|
85
|
+
|
|
86
|
+
def _accept_loop(self):
|
|
87
|
+
while self.running:
|
|
88
|
+
try:
|
|
89
|
+
client_sock, _ = self.server_socket.accept()
|
|
90
|
+
threading.Thread(target=self._handle_client, args=(client_sock,), daemon=True).start()
|
|
91
|
+
except:
|
|
92
|
+
break
|
|
93
|
+
|
|
94
|
+
def _handle_client(self, client_sock):
|
|
95
|
+
ws = None
|
|
96
|
+
try:
|
|
97
|
+
ws = websocket.create_connection(self.ws_url)
|
|
98
|
+
|
|
99
|
+
def tcp_to_ws():
|
|
100
|
+
while self.running:
|
|
101
|
+
try:
|
|
102
|
+
data = client_sock.recv(65536)
|
|
103
|
+
if not data:
|
|
104
|
+
break
|
|
105
|
+
ws.send_binary(data)
|
|
106
|
+
except:
|
|
107
|
+
break
|
|
108
|
+
|
|
109
|
+
def ws_to_tcp():
|
|
110
|
+
while self.running:
|
|
111
|
+
try:
|
|
112
|
+
data = ws.recv()
|
|
113
|
+
if isinstance(data, bytes):
|
|
114
|
+
client_sock.send(data)
|
|
115
|
+
except:
|
|
116
|
+
break
|
|
117
|
+
|
|
118
|
+
t1 = threading.Thread(target=tcp_to_ws, daemon=True)
|
|
119
|
+
t2 = threading.Thread(target=ws_to_tcp, daemon=True)
|
|
120
|
+
t1.start()
|
|
121
|
+
t2.start()
|
|
122
|
+
t1.join()
|
|
123
|
+
t2.join()
|
|
124
|
+
|
|
125
|
+
except Exception as e:
|
|
126
|
+
print(f"WebSocket connection error: {e}")
|
|
127
|
+
|
|
128
|
+
finally:
|
|
129
|
+
# close both connections
|
|
130
|
+
for conn in [client_sock, ws]:
|
|
131
|
+
if conn:
|
|
132
|
+
try:
|
|
133
|
+
conn.close()
|
|
134
|
+
except:
|
|
135
|
+
pass
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .images import * # noqa: F403
|
|
@@ -0,0 +1,412 @@
|
|
|
1
|
+
from enum import Enum
|
|
2
|
+
from typing import Any, Dict, List, Union
|
|
3
|
+
|
|
4
|
+
# Internal class to represent the image construction process
|
|
5
|
+
class ImageSetupStepType(Enum):
|
|
6
|
+
"""Enum for valid Image setup step types"""
|
|
7
|
+
|
|
8
|
+
CMD_RUN = "cmd_run"
|
|
9
|
+
RSYNC = "rsync"
|
|
10
|
+
PIP_INSTALL = "pip_install"
|
|
11
|
+
SYNC_PACKAGE = "sync_package"
|
|
12
|
+
SET_ENV_VARS = "set_env_vars"
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class ImageSetupStep:
|
|
16
|
+
def __init__(
|
|
17
|
+
self,
|
|
18
|
+
step_type: ImageSetupStepType,
|
|
19
|
+
**kwargs: Dict[str, Any],
|
|
20
|
+
):
|
|
21
|
+
"""
|
|
22
|
+
A component of the Kubetorch Image, consisting of the step type (e.g. packages, set_env_vars),
|
|
23
|
+
along with arguments to provide to the function corresponding to the step type.
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
step_type (ImageSetupStepType): Type of setup step used to provide the Image.
|
|
27
|
+
kwargs (Dict[str, Any]): Please refer to the corresponding functions in ``Image`` to determine
|
|
28
|
+
the correct keyword arguments to provide.
|
|
29
|
+
"""
|
|
30
|
+
self.step_type = step_type
|
|
31
|
+
self.kwargs = kwargs
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class Image:
|
|
35
|
+
def __init__(
|
|
36
|
+
self,
|
|
37
|
+
name: str = None,
|
|
38
|
+
image_id: str = None,
|
|
39
|
+
python_path: str = None,
|
|
40
|
+
install_cmd: str = None,
|
|
41
|
+
):
|
|
42
|
+
"""
|
|
43
|
+
Kubetorch Image object, specifying cluster setup properties and steps.
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
name (str, optional): Name to assign the Kubetorch image.
|
|
47
|
+
image_id (str, optional): Machine image to use, if any. (Default: ``None``)
|
|
48
|
+
python_path (str, optional): Absolute path to the Python executable to use for remote server and installs.
|
|
49
|
+
(Default: ``None``)
|
|
50
|
+
install_cmd (str, optional): Custom pip/uv install command to use for package installations.
|
|
51
|
+
If not provided, will be inferred based on python_path and available tools (preferring to use uv).
|
|
52
|
+
Examples: "uv pip install", "python -m pip install", "/path/to/.venv/bin/python -m uv pip install"
|
|
53
|
+
(Default: ``None``)
|
|
54
|
+
|
|
55
|
+
Note:
|
|
56
|
+
For convenience, Kubetorch provides ready-to-use base images under ``kt.images``.
|
|
57
|
+
These cover common environments like Python, CUDA, and Ray:
|
|
58
|
+
|
|
59
|
+
* ``kt.images.Python310()``, ``kt.images.Python311()``, ``kt.images.Python312()``
|
|
60
|
+
* ``kt.images.Debian()``
|
|
61
|
+
* ``kt.images.Ubuntu()``
|
|
62
|
+
* ``kt.images.Ray()`` (defaults to the latest Ray release)
|
|
63
|
+
|
|
64
|
+
You can also use flexible factories:
|
|
65
|
+
|
|
66
|
+
* ``kt.images.python("3.12")``
|
|
67
|
+
* ``kt.images.pytorch()`` (defaults to "nvcr.io/nvidia/pytorch:23.12-py3")
|
|
68
|
+
* ``kt.images.ray("2.32.0-py311")``
|
|
69
|
+
|
|
70
|
+
These base images can be further customized with methods like
|
|
71
|
+
``.pip_install()``, ``.set_env_vars()``, ``.sync_package()``, etc.
|
|
72
|
+
|
|
73
|
+
Example:
|
|
74
|
+
|
|
75
|
+
.. code-block:: python
|
|
76
|
+
|
|
77
|
+
import kubetorch as kt
|
|
78
|
+
|
|
79
|
+
custom_image = (
|
|
80
|
+
kt.Image(name="base_image")
|
|
81
|
+
.pip_install(["numpy", "pandas"])
|
|
82
|
+
.set_env_vars({"OMP_NUM_THREADS": 1})
|
|
83
|
+
)
|
|
84
|
+
debian_image = (
|
|
85
|
+
kt.images.Debian()
|
|
86
|
+
.pip_install(["numpy", "pandas"])
|
|
87
|
+
.set_env_vars({"OMP_NUM_THREADS": 1})
|
|
88
|
+
)
|
|
89
|
+
"""
|
|
90
|
+
|
|
91
|
+
self.name = name
|
|
92
|
+
self.image_id = image_id
|
|
93
|
+
self.python_path = python_path
|
|
94
|
+
self.install_cmd = install_cmd
|
|
95
|
+
|
|
96
|
+
self.setup_steps = []
|
|
97
|
+
self.docker_secret = None
|
|
98
|
+
|
|
99
|
+
@staticmethod
|
|
100
|
+
def _setup_step_config(step: ImageSetupStep):
|
|
101
|
+
"""Get ImageSetupStep config"""
|
|
102
|
+
config = {
|
|
103
|
+
"step_type": step.step_type.value,
|
|
104
|
+
"kwargs": step.kwargs,
|
|
105
|
+
}
|
|
106
|
+
return config
|
|
107
|
+
|
|
108
|
+
@staticmethod
|
|
109
|
+
def _setup_step_from_config(step: Dict):
|
|
110
|
+
"""Convert setup step config (dict) to ImageSetupStep object"""
|
|
111
|
+
step_type = step["step_type"]
|
|
112
|
+
kwargs = step["kwargs"]
|
|
113
|
+
return ImageSetupStep(
|
|
114
|
+
step_type=ImageSetupStepType(step_type),
|
|
115
|
+
**kwargs,
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
def from_docker(self, image_id: str):
|
|
119
|
+
"""Set up and use an existing Docker image.
|
|
120
|
+
|
|
121
|
+
Args:
|
|
122
|
+
image_id (str): Docker image in the following format ``"<registry>/<image>:<tag>"``
|
|
123
|
+
"""
|
|
124
|
+
if self.image_id:
|
|
125
|
+
raise ValueError("Setting both a machine image and docker image is not yet supported.")
|
|
126
|
+
self.image_id = image_id
|
|
127
|
+
return self
|
|
128
|
+
|
|
129
|
+
########################################################
|
|
130
|
+
# Steps to build the image
|
|
131
|
+
########################################################
|
|
132
|
+
|
|
133
|
+
def pip_install(
|
|
134
|
+
self,
|
|
135
|
+
reqs: List[Union["Package", str]],
|
|
136
|
+
force: bool = False,
|
|
137
|
+
):
|
|
138
|
+
"""Pip install the given packages.
|
|
139
|
+
|
|
140
|
+
Args:
|
|
141
|
+
reqs (List[Package or str]): List of packages to pip install on cluster and env.
|
|
142
|
+
Each string is passed directly to the pip/uv command, allowing full control
|
|
143
|
+
over pip arguments. Examples:
|
|
144
|
+
- Simple package: ``"numpy"``
|
|
145
|
+
- Version constraint: ``"pandas>=1.2.0"``
|
|
146
|
+
- With pip flags: ``"--pre torch==2.0.0rc1"``
|
|
147
|
+
- Multiple flags: ``"--index-url https://pypi.org/simple torch"``
|
|
148
|
+
force (bool, optional): Whether to force re-install a package, if it already exists on the compute. (Default: ``False``)
|
|
149
|
+
|
|
150
|
+
Example:
|
|
151
|
+
.. code-block:: python
|
|
152
|
+
|
|
153
|
+
import kubetorch as kt
|
|
154
|
+
|
|
155
|
+
image = (
|
|
156
|
+
kt.images.Debian()
|
|
157
|
+
.pip_install([
|
|
158
|
+
"numpy>=1.20",
|
|
159
|
+
"pandas",
|
|
160
|
+
"--pre torchmonarch==0.1.0rc7", # Install pre-release
|
|
161
|
+
"--index-url https://test.pypi.org/simple/ mypackage"
|
|
162
|
+
])
|
|
163
|
+
)
|
|
164
|
+
"""
|
|
165
|
+
|
|
166
|
+
self.setup_steps.append(
|
|
167
|
+
ImageSetupStep(
|
|
168
|
+
step_type=ImageSetupStepType.PIP_INSTALL,
|
|
169
|
+
reqs=reqs,
|
|
170
|
+
force=force,
|
|
171
|
+
)
|
|
172
|
+
)
|
|
173
|
+
return self
|
|
174
|
+
|
|
175
|
+
def set_env_vars(self, env_vars: Dict):
|
|
176
|
+
"""Set environment variables with support for variable expansion.
|
|
177
|
+
|
|
178
|
+
Environment variables can reference other variables using shell-style expansion:
|
|
179
|
+
- ``$VAR`` or ``${VAR}`` syntax to reference existing variables
|
|
180
|
+
- Variables are expanded when the container starts
|
|
181
|
+
- Variables are expanded in the order they are defined
|
|
182
|
+
|
|
183
|
+
Args:
|
|
184
|
+
env_vars (Dict): Dict of environment variables and values to set.
|
|
185
|
+
Values can include references to other environment variables.
|
|
186
|
+
|
|
187
|
+
Example:
|
|
188
|
+
.. code-block:: python
|
|
189
|
+
|
|
190
|
+
import kubetorch as kt
|
|
191
|
+
|
|
192
|
+
image = (
|
|
193
|
+
kt.images.Debian()
|
|
194
|
+
.set_env_vars({
|
|
195
|
+
"BASE_PATH": "/usr/local",
|
|
196
|
+
"BIN_PATH": "$BASE_PATH/bin", # Expands to /usr/local/bin
|
|
197
|
+
"PATH": "$BIN_PATH:$PATH", # Prepends to existing PATH
|
|
198
|
+
"LD_LIBRARY_PATH": "/opt/lib:${LD_LIBRARY_PATH}", # Appends to existing
|
|
199
|
+
"CUSTOM": "${HOME}/data", # Uses HOME from container
|
|
200
|
+
})
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
Note:
|
|
204
|
+
- Variables are expanded using Python's ``os.path.expandvars()``
|
|
205
|
+
- Undefined variables remain as literal strings (e.g., ``$UNDEFINED`` stays as ``$UNDEFINED``)
|
|
206
|
+
- To include a literal ``$``, escape it with backslash: ``\\$``
|
|
207
|
+
"""
|
|
208
|
+
# TODO - support .env files
|
|
209
|
+
self.setup_steps.append(
|
|
210
|
+
ImageSetupStep(
|
|
211
|
+
step_type=ImageSetupStepType.SET_ENV_VARS,
|
|
212
|
+
env_vars=env_vars,
|
|
213
|
+
)
|
|
214
|
+
)
|
|
215
|
+
return self
|
|
216
|
+
|
|
217
|
+
def sync_package(
|
|
218
|
+
self,
|
|
219
|
+
package: str,
|
|
220
|
+
force: bool = False,
|
|
221
|
+
):
|
|
222
|
+
"""Sync local package over and add to path.
|
|
223
|
+
|
|
224
|
+
Args:
|
|
225
|
+
package (Package or str): Package to sync. Either the name of a local editably installed package, or
|
|
226
|
+
the path to the folder to sync over.
|
|
227
|
+
force (bool, optional): Whether to re-sync the package over, if already previously synced over. (Default: ``False``)
|
|
228
|
+
"""
|
|
229
|
+
self.setup_steps.append(
|
|
230
|
+
ImageSetupStep(
|
|
231
|
+
step_type=ImageSetupStepType.SYNC_PACKAGE,
|
|
232
|
+
package=package,
|
|
233
|
+
force=force,
|
|
234
|
+
)
|
|
235
|
+
)
|
|
236
|
+
return self
|
|
237
|
+
|
|
238
|
+
def run_bash(
|
|
239
|
+
self,
|
|
240
|
+
command: str,
|
|
241
|
+
force: bool = False,
|
|
242
|
+
):
|
|
243
|
+
"""Run bash commands during image setup.
|
|
244
|
+
|
|
245
|
+
Executes shell commands during container initialization. Commands run in the
|
|
246
|
+
order they are defined and can be used to install software, configure the
|
|
247
|
+
environment, or start background services.
|
|
248
|
+
|
|
249
|
+
Args:
|
|
250
|
+
command (str): Shell command(s) to run on the cluster. Supports:
|
|
251
|
+
- Single commands: ``"apt-get update"``
|
|
252
|
+
- Chained commands: ``"apt-get update && apt-get install -y curl"``
|
|
253
|
+
- Background processes: ``"jupyter notebook --no-browser &"``
|
|
254
|
+
force (bool): Whether to rerun the command on the cluster, if previously run in image setup already. (Default: ``False``)
|
|
255
|
+
|
|
256
|
+
Example:
|
|
257
|
+
.. code-block:: python
|
|
258
|
+
|
|
259
|
+
import kubetorch as kt
|
|
260
|
+
|
|
261
|
+
image = (
|
|
262
|
+
kt.images.Debian()
|
|
263
|
+
.run_bash("apt-get update && apt-get install -y vim")
|
|
264
|
+
.run_bash("pip install jupyter")
|
|
265
|
+
.run_bash("jupyter notebook --no-browser --port=8888 &") # Runs in background
|
|
266
|
+
)
|
|
267
|
+
|
|
268
|
+
Note:
|
|
269
|
+
- Commands ending with ``&`` run in the background and won't block image setup
|
|
270
|
+
- Background processes continue running after setup completes
|
|
271
|
+
- The setup waits 0.5s to catch immediate failures in background processes
|
|
272
|
+
- Use ``&&`` to chain commands that depend on each other
|
|
273
|
+
- Use ``;`` to run commands sequentially regardless of success/failure
|
|
274
|
+
"""
|
|
275
|
+
self.setup_steps.append(
|
|
276
|
+
ImageSetupStep(
|
|
277
|
+
step_type=ImageSetupStepType.CMD_RUN,
|
|
278
|
+
command=command,
|
|
279
|
+
force=force,
|
|
280
|
+
)
|
|
281
|
+
)
|
|
282
|
+
return self
|
|
283
|
+
|
|
284
|
+
def rsync(
|
|
285
|
+
self,
|
|
286
|
+
source: str,
|
|
287
|
+
dest: str = None,
|
|
288
|
+
contents: bool = False,
|
|
289
|
+
filter_options: str = None,
|
|
290
|
+
force: bool = False,
|
|
291
|
+
):
|
|
292
|
+
"""Sync files or directories from local machine to the remote container.
|
|
293
|
+
|
|
294
|
+
This method efficiently transfers files to remote containers using rsync,
|
|
295
|
+
which only copies changed files and supports compression. Files are first
|
|
296
|
+
uploaded to a jump pod and then distributed to worker pods on startup.
|
|
297
|
+
|
|
298
|
+
Args:
|
|
299
|
+
source (str): Path to the local file or directory to sync. Supports:
|
|
300
|
+
- Absolute paths: ``/path/to/file``
|
|
301
|
+
- Relative paths: ``./data/file.txt``
|
|
302
|
+
- Home directory paths: ``~/documents/data.csv``
|
|
303
|
+
dest (str, optional): Target path on the remote container. Supports:
|
|
304
|
+
- Absolute paths: ``/data/config.yaml`` (places file at exact location)
|
|
305
|
+
- Relative paths: ``configs/settings.json`` (relative to working directory)
|
|
306
|
+
- Tilde paths: ``~/results/output.txt`` (relative to working directory, ~ is stripped)
|
|
307
|
+
- None: Uses the basename of source in working directory
|
|
308
|
+
contents (bool, optional): For directories only - whether to copy the contents
|
|
309
|
+
or the directory itself.
|
|
310
|
+
If ``True`` the contents of the source directory are copied to the destination,
|
|
311
|
+
and the source directory itself is not created at the destination.
|
|
312
|
+
If ``False`` the source directory along with its contents are copied to the
|
|
313
|
+
destination, creating an additional directory layer at the destination.
|
|
314
|
+
(Default: ``False``)
|
|
315
|
+
filter_options (str, optional): Additional rsync filter options. These are added
|
|
316
|
+
to (not replacing) the default filters. By default, rsync excludes:
|
|
317
|
+
|
|
318
|
+
- Files from ``.gitignore`` (if present)
|
|
319
|
+
- Files from ``.ktignore`` (if present)
|
|
320
|
+
- Common Python artifacts: ``*.pyc``, ``__pycache__``
|
|
321
|
+
- Virtual environments: ``.venv``
|
|
322
|
+
- Git metadata: ``.git``
|
|
323
|
+
|
|
324
|
+
Your filter_options are appended after these defaults. Examples:
|
|
325
|
+
|
|
326
|
+
- Exclude more patterns: ``"--exclude='*.log' --exclude='temp/'"``
|
|
327
|
+
- Include specific files: ``"--include='important.log' --exclude='*.log'"``
|
|
328
|
+
- Override all defaults: Set ``KT_RSYNC_FILTERS`` environment variable
|
|
329
|
+
|
|
330
|
+
(Default: ``None``)
|
|
331
|
+
force (bool, optional): When ``True``, forces rsync to transfer all files
|
|
332
|
+
regardless of modification times by using ``--ignore-times`` flag. This ensures
|
|
333
|
+
all files are copied even if timestamps suggest they haven't changed.
|
|
334
|
+
Useful when timestamp-based change detection is unreliable.
|
|
335
|
+
Note: Files are always synced when deploying with ``.to()``, this flag just
|
|
336
|
+
affects how rsync determines which files need updating.
|
|
337
|
+
(Default: ``False``)
|
|
338
|
+
|
|
339
|
+
Returns:
|
|
340
|
+
Image: Returns self for method chaining.
|
|
341
|
+
|
|
342
|
+
Examples:
|
|
343
|
+
.. code-block:: python
|
|
344
|
+
|
|
345
|
+
import kubetorch as kt
|
|
346
|
+
|
|
347
|
+
# Basic file sync
|
|
348
|
+
image = (
|
|
349
|
+
kt.images.Debian()
|
|
350
|
+
.rsync("./config.yaml", "app/config.yaml")
|
|
351
|
+
)
|
|
352
|
+
|
|
353
|
+
# Sync to absolute path
|
|
354
|
+
image = (
|
|
355
|
+
kt.images.Python312()
|
|
356
|
+
.rsync("./model_weights.pth", "/models/weights.pth")
|
|
357
|
+
)
|
|
358
|
+
|
|
359
|
+
# No destination specified - uses basename
|
|
360
|
+
image = (
|
|
361
|
+
kt.images.Ubuntu()
|
|
362
|
+
.rsync("/local/data/dataset.csv") # Goes to ./dataset.csv
|
|
363
|
+
)
|
|
364
|
+
|
|
365
|
+
# Directory sync - copy directory itself
|
|
366
|
+
image = (
|
|
367
|
+
kt.images.Debian()
|
|
368
|
+
.rsync("./src", "app") # Creates app/src/
|
|
369
|
+
)
|
|
370
|
+
|
|
371
|
+
# Directory sync - copy contents only
|
|
372
|
+
image = (
|
|
373
|
+
kt.images.Debian()
|
|
374
|
+
.rsync("./src", "app", contents=True) # Contents go directly into app/
|
|
375
|
+
)
|
|
376
|
+
|
|
377
|
+
# Multiple rsync operations with filtering
|
|
378
|
+
image = (
|
|
379
|
+
kt.images.Python312()
|
|
380
|
+
.rsync("./data", "/data", filter_options="--exclude='*.tmp'")
|
|
381
|
+
.rsync("./configs", "~/configs")
|
|
382
|
+
.rsync("./scripts")
|
|
383
|
+
.pip_install(["numpy", "pandas"])
|
|
384
|
+
)
|
|
385
|
+
|
|
386
|
+
# Force re-sync for development
|
|
387
|
+
image = (
|
|
388
|
+
kt.images.Debian()
|
|
389
|
+
.rsync("./rapidly_changing_code", "app", force=True)
|
|
390
|
+
)
|
|
391
|
+
|
|
392
|
+
Note:
|
|
393
|
+
- Absolute destination paths (starting with ``/``) place files at exact locations
|
|
394
|
+
- Relative paths and ``~/`` paths are relative to the container's working directory
|
|
395
|
+
- The ``contents`` parameter only affects directory sources, not files
|
|
396
|
+
- Default exclusions include ``.gitignore`` patterns, ``__pycache__``, ``.venv``, and ``.git``
|
|
397
|
+
- User-provided ``filter_options`` are added to (not replacing) the default filters
|
|
398
|
+
- To completely override filters, set the ``KT_RSYNC_FILTERS`` environment variable
|
|
399
|
+
- Use ``force=True`` to bypass timestamp checks and transfer all files
|
|
400
|
+
"""
|
|
401
|
+
|
|
402
|
+
self.setup_steps.append(
|
|
403
|
+
ImageSetupStep(
|
|
404
|
+
step_type=ImageSetupStepType.RSYNC,
|
|
405
|
+
source=source,
|
|
406
|
+
dest=dest,
|
|
407
|
+
contents=contents,
|
|
408
|
+
filter_options=filter_options,
|
|
409
|
+
force=force,
|
|
410
|
+
)
|
|
411
|
+
)
|
|
412
|
+
return self
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
from .image import Image
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def debian() -> Image:
|
|
5
|
+
"""Return a Kubetorch Debian slim image.
|
|
6
|
+
|
|
7
|
+
This uses the default Kubetorch server image, which is built on top of
|
|
8
|
+
a minimal Debian base.
|
|
9
|
+
"""
|
|
10
|
+
import kubetorch.serving.constants as serving_constants
|
|
11
|
+
|
|
12
|
+
return Image(name="debian", image_id=serving_constants.SERVER_IMAGE_MINIMAL)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def ubuntu() -> Image:
|
|
16
|
+
"""Return a Kubetorch ubuntu image."""
|
|
17
|
+
import kubetorch.serving.constants as serving_constants
|
|
18
|
+
|
|
19
|
+
return Image(name="ubuntu", image_id=serving_constants.UBUNTU_IMAGE_MINIMAL)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def python(version: str) -> Image:
|
|
23
|
+
"""Return a Python slim base image, e.g. ``python('3.12')``."""
|
|
24
|
+
tag = version.replace(".", "")
|
|
25
|
+
return Image(name=f"python{tag}", image_id=f"python:{version}-slim")
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def ray(version: str = "latest") -> Image:
|
|
29
|
+
"""Return a Ray base image, defaults to ``ray:latest``."""
|
|
30
|
+
return Image(
|
|
31
|
+
name=f"ray{version if version != 'latest' else ''}".strip(),
|
|
32
|
+
image_id=f"rayproject/ray:{version}",
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def pytorch(version: str = "23.12-py3") -> Image:
|
|
37
|
+
"""Return an NVIDIA PyTorch base image. Defaults to ``nvcr.io/nvidia/pytorch:23.12-py3``."""
|
|
38
|
+
tag = version.replace(".", "").replace("-", "")
|
|
39
|
+
return Image(name=f"pytorch{tag}", image_id=f"nvcr.io/nvidia/pytorch:{version}")
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
# Predefined convenience aliases for common versions
|
|
43
|
+
Python310 = lambda: python("3.10")
|
|
44
|
+
Python311 = lambda: python("3.11")
|
|
45
|
+
Python312 = lambda: python("3.12")
|
|
46
|
+
Ray = lambda: ray("latest")
|
|
47
|
+
Pytorch2312 = lambda: pytorch("23.12-py3")
|
|
48
|
+
Debian = lambda: debian()
|
|
49
|
+
Ubuntu = lambda: ubuntu()
|
|
50
|
+
|
|
51
|
+
__all__ = [
|
|
52
|
+
"python",
|
|
53
|
+
"ray",
|
|
54
|
+
"pytorch",
|
|
55
|
+
"debian",
|
|
56
|
+
"ubuntu",
|
|
57
|
+
"Python310",
|
|
58
|
+
"Python311",
|
|
59
|
+
"Python312",
|
|
60
|
+
"Ray",
|
|
61
|
+
"Pytorch2312",
|
|
62
|
+
"Debian",
|
|
63
|
+
"Ubuntu",
|
|
64
|
+
]
|