kubetorch 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kubetorch might be problematic. Click here for more details.

Files changed (93) hide show
  1. kubetorch/__init__.py +60 -0
  2. kubetorch/cli.py +1985 -0
  3. kubetorch/cli_utils.py +1025 -0
  4. kubetorch/config.py +453 -0
  5. kubetorch/constants.py +18 -0
  6. kubetorch/docs/Makefile +18 -0
  7. kubetorch/docs/__init__.py +0 -0
  8. kubetorch/docs/_ext/json_globaltoc.py +42 -0
  9. kubetorch/docs/api/cli.rst +10 -0
  10. kubetorch/docs/api/python/app.rst +21 -0
  11. kubetorch/docs/api/python/cls.rst +19 -0
  12. kubetorch/docs/api/python/compute.rst +25 -0
  13. kubetorch/docs/api/python/config.rst +11 -0
  14. kubetorch/docs/api/python/fn.rst +19 -0
  15. kubetorch/docs/api/python/image.rst +14 -0
  16. kubetorch/docs/api/python/secret.rst +18 -0
  17. kubetorch/docs/api/python/volumes.rst +13 -0
  18. kubetorch/docs/api/python.rst +101 -0
  19. kubetorch/docs/conf.py +69 -0
  20. kubetorch/docs/index.rst +20 -0
  21. kubetorch/docs/requirements.txt +5 -0
  22. kubetorch/globals.py +285 -0
  23. kubetorch/logger.py +59 -0
  24. kubetorch/resources/__init__.py +0 -0
  25. kubetorch/resources/callables/__init__.py +0 -0
  26. kubetorch/resources/callables/cls/__init__.py +0 -0
  27. kubetorch/resources/callables/cls/cls.py +157 -0
  28. kubetorch/resources/callables/fn/__init__.py +0 -0
  29. kubetorch/resources/callables/fn/fn.py +133 -0
  30. kubetorch/resources/callables/module.py +1416 -0
  31. kubetorch/resources/callables/utils.py +174 -0
  32. kubetorch/resources/compute/__init__.py +0 -0
  33. kubetorch/resources/compute/app.py +261 -0
  34. kubetorch/resources/compute/compute.py +2596 -0
  35. kubetorch/resources/compute/decorators.py +139 -0
  36. kubetorch/resources/compute/rbac.py +74 -0
  37. kubetorch/resources/compute/utils.py +1114 -0
  38. kubetorch/resources/compute/websocket.py +137 -0
  39. kubetorch/resources/images/__init__.py +1 -0
  40. kubetorch/resources/images/image.py +414 -0
  41. kubetorch/resources/images/images.py +74 -0
  42. kubetorch/resources/secrets/__init__.py +2 -0
  43. kubetorch/resources/secrets/kubernetes_secrets_client.py +412 -0
  44. kubetorch/resources/secrets/provider_secrets/__init__.py +0 -0
  45. kubetorch/resources/secrets/provider_secrets/anthropic_secret.py +12 -0
  46. kubetorch/resources/secrets/provider_secrets/aws_secret.py +16 -0
  47. kubetorch/resources/secrets/provider_secrets/azure_secret.py +14 -0
  48. kubetorch/resources/secrets/provider_secrets/cohere_secret.py +12 -0
  49. kubetorch/resources/secrets/provider_secrets/gcp_secret.py +16 -0
  50. kubetorch/resources/secrets/provider_secrets/github_secret.py +13 -0
  51. kubetorch/resources/secrets/provider_secrets/huggingface_secret.py +20 -0
  52. kubetorch/resources/secrets/provider_secrets/kubeconfig_secret.py +12 -0
  53. kubetorch/resources/secrets/provider_secrets/lambda_secret.py +13 -0
  54. kubetorch/resources/secrets/provider_secrets/langchain_secret.py +12 -0
  55. kubetorch/resources/secrets/provider_secrets/openai_secret.py +11 -0
  56. kubetorch/resources/secrets/provider_secrets/pinecone_secret.py +12 -0
  57. kubetorch/resources/secrets/provider_secrets/providers.py +93 -0
  58. kubetorch/resources/secrets/provider_secrets/ssh_secret.py +12 -0
  59. kubetorch/resources/secrets/provider_secrets/wandb_secret.py +11 -0
  60. kubetorch/resources/secrets/secret.py +238 -0
  61. kubetorch/resources/secrets/secret_factory.py +70 -0
  62. kubetorch/resources/secrets/utils.py +209 -0
  63. kubetorch/resources/volumes/__init__.py +0 -0
  64. kubetorch/resources/volumes/volume.py +365 -0
  65. kubetorch/servers/__init__.py +0 -0
  66. kubetorch/servers/http/__init__.py +0 -0
  67. kubetorch/servers/http/distributed_utils.py +3223 -0
  68. kubetorch/servers/http/http_client.py +730 -0
  69. kubetorch/servers/http/http_server.py +1788 -0
  70. kubetorch/servers/http/server_metrics.py +278 -0
  71. kubetorch/servers/http/utils.py +728 -0
  72. kubetorch/serving/__init__.py +0 -0
  73. kubetorch/serving/autoscaling.py +173 -0
  74. kubetorch/serving/base_service_manager.py +363 -0
  75. kubetorch/serving/constants.py +83 -0
  76. kubetorch/serving/deployment_service_manager.py +478 -0
  77. kubetorch/serving/knative_service_manager.py +519 -0
  78. kubetorch/serving/raycluster_service_manager.py +582 -0
  79. kubetorch/serving/service_manager.py +18 -0
  80. kubetorch/serving/templates/deployment_template.yaml +17 -0
  81. kubetorch/serving/templates/knative_service_template.yaml +19 -0
  82. kubetorch/serving/templates/kt_setup_template.sh.j2 +81 -0
  83. kubetorch/serving/templates/pod_template.yaml +194 -0
  84. kubetorch/serving/templates/raycluster_service_template.yaml +42 -0
  85. kubetorch/serving/templates/raycluster_template.yaml +35 -0
  86. kubetorch/serving/templates/service_template.yaml +21 -0
  87. kubetorch/serving/templates/workerset_template.yaml +36 -0
  88. kubetorch/serving/utils.py +377 -0
  89. kubetorch/utils.py +284 -0
  90. kubetorch-0.2.0.dist-info/METADATA +121 -0
  91. kubetorch-0.2.0.dist-info/RECORD +93 -0
  92. kubetorch-0.2.0.dist-info/WHEEL +4 -0
  93. kubetorch-0.2.0.dist-info/entry_points.txt +5 -0
@@ -0,0 +1,174 @@
1
+ import importlib.metadata as metadata
2
+ import inspect
3
+ import json
4
+ import os
5
+ from pathlib import Path
6
+ from typing import Callable, Optional, Type, Union
7
+
8
+ from kubetorch.logger import get_logger
9
+
10
+ logger = get_logger(__name__)
11
+
12
+ SHELL_COMMANDS = {"ssh", "run_bash", "rsync"}
13
+
14
+
15
+ def _extract_pointers(raw_cls_or_fn: Union[Type, Callable]):
16
+ """Get the path to the module, module name, and function name to be able to import it on the server"""
17
+ if not (isinstance(raw_cls_or_fn, Type) or isinstance(raw_cls_or_fn, Callable)):
18
+ raise TypeError(f"Expected Type or Callable but received {type(raw_cls_or_fn)}")
19
+
20
+ # (root_path, module_name, cls_or_fn_name)
21
+ return _get_module_import_info(raw_cls_or_fn)
22
+
23
+
24
+ def _get_module_import_info(raw_cls_or_fn: Union[Type, Callable]):
25
+ """
26
+ Given a class or function in Python, get all the information needed to import it in another Python process.
27
+ """
28
+
29
+ # Background on all these dunders: https://docs.python.org/3/reference/import.html
30
+ py_module = inspect.getmodule(raw_cls_or_fn)
31
+
32
+ # Need to resolve in case just filename is given
33
+ module_path = _extract_module_path(raw_cls_or_fn)
34
+
35
+ # TODO better way of detecting if in a notebook or interactive Python env
36
+ if not module_path or module_path.endswith("ipynb"):
37
+ # The only time __file__ wouldn't be present is if the function is defined in an interactive
38
+ # interpreter or a notebook. We can't import on the server in that case, so we need to cloudpickle
39
+ # the fn to send it over. The __call__ function will serialize the function if we return it this way.
40
+ # This is a short-term hack.
41
+ # return None, "notebook", raw_fn.__name__
42
+ root_path = os.getcwd()
43
+ module_name = "notebook"
44
+ cls_or_fn_name = raw_cls_or_fn.__name__
45
+ else:
46
+ root_path = os.path.dirname(module_path)
47
+ module_name = inspect.getmodulename(module_path)
48
+ # TODO __qualname__ doesn't work when fn is aliased funnily, like torch.sum
49
+ cls_or_fn_name = getattr(raw_cls_or_fn, "__qualname__", raw_cls_or_fn.__name__)
50
+
51
+ # Adapted from https://github.com/modal-labs/modal-client/blob/main/modal/_function_utils.py#L94
52
+ if getattr(py_module, "__package__", None):
53
+ module_path = os.path.abspath(py_module.__file__)
54
+ package_paths = [
55
+ os.path.abspath(p) for p in __import__(py_module.__package__).__path__
56
+ ]
57
+ base_dirs = [
58
+ base_dir
59
+ for base_dir in package_paths
60
+ if os.path.commonpath((base_dir, module_path)) == base_dir
61
+ ]
62
+
63
+ if len(base_dirs) != 1:
64
+ raise Exception("Wasn't able to find the package directory!")
65
+ root_path = os.path.dirname(base_dirs[0])
66
+ module_name = py_module.__spec__.name
67
+
68
+ return root_path, module_name, cls_or_fn_name
69
+
70
+
71
+ def _extract_module_path(raw_cls_or_fn: Union[Type, Callable]):
72
+ py_module = inspect.getmodule(raw_cls_or_fn)
73
+
74
+ # Need to resolve in case just filename is given
75
+ module_path = (
76
+ str(Path(inspect.getfile(py_module)).resolve())
77
+ if hasattr(py_module, "__file__")
78
+ else None
79
+ )
80
+
81
+ return module_path
82
+
83
+
84
+ def locate_working_dir(start_dir=None):
85
+ if start_dir is None:
86
+ start_dir = os.getcwd()
87
+
88
+ # Search first for anything that represents a Python package
89
+ target_files = [
90
+ ".git",
91
+ "setup.py",
92
+ "setup.cfg",
93
+ "pyproject.toml",
94
+ "requirements.txt",
95
+ ]
96
+
97
+ dir_with_target = _find_directory_containing_any_file(
98
+ start_dir, target_files, searched_dirs=set()
99
+ )
100
+
101
+ return dir_with_target if dir_with_target is not None else start_dir
102
+
103
+
104
+ def _find_directory_containing_any_file(dir_path, files, searched_dirs=None):
105
+ if Path(dir_path) == Path.home() or dir_path == Path("/"):
106
+ return None
107
+
108
+ if any(Path(dir_path, file).exists() for file in files):
109
+ return str(dir_path)
110
+
111
+ searched_dirs.add(dir_path)
112
+ parent_path = Path(dir_path).parent
113
+ if parent_path in searched_dirs:
114
+ return None
115
+ return _find_directory_containing_any_file(
116
+ parent_path, files, searched_dirs=searched_dirs
117
+ )
118
+
119
+
120
+ def get_local_install_path(package_name: str) -> Optional[str]:
121
+ from importlib.metadata import distributions
122
+
123
+ for dist in distributions():
124
+ direct_url_json = dist.read_text("direct_url.json")
125
+ if direct_url_json and dist.metadata["Name"].lower() == package_name.lower():
126
+ try:
127
+ url = json.loads(direct_url_json).get("url", None)
128
+ if url:
129
+ if url.startswith("file://"):
130
+ return url[len("file://") :]
131
+ except json.JSONDecodeError:
132
+ pass
133
+ return None
134
+
135
+
136
+ def find_locally_installed_version(package_name: str) -> Optional[str]:
137
+ try:
138
+ return metadata.version(package_name)
139
+ except metadata.PackageNotFoundError:
140
+ return None
141
+
142
+
143
+ def get_names_for_reload_fallbacks(name: str, prefixes: list[str] = []):
144
+ from kubetorch.globals import config
145
+ from kubetorch.servers.http.utils import clean_and_validate_k8s_name
146
+ from kubetorch.utils import current_git_branch, validate_username
147
+
148
+ current_prefix = config.username
149
+ fallback_prefixes = []
150
+
151
+ if prefixes:
152
+ fallback_prefixes = prefixes
153
+ else:
154
+ # try reloading based on current username or current git branch (in that order)
155
+ branch = current_git_branch()
156
+ if branch:
157
+ # Ensure that we use the truncated branch name that was used to create the service initially
158
+ valid_branch = validate_username(branch)
159
+ # Note: username/prefix takes precedence over branch (in the event they differ)
160
+ fallback_prefixes = [
161
+ v for v in (current_prefix, valid_branch) if v is not None
162
+ ]
163
+ else:
164
+ fallback_prefixes = [current_prefix] if current_prefix else []
165
+
166
+ potential_names = [
167
+ clean_and_validate_k8s_name(f"{prefix}-{name}", allow_full_length=True)
168
+ for prefix in fallback_prefixes
169
+ ]
170
+ if not prefixes and name not in potential_names:
171
+ # try loading the bare name (i.e. prod mode) last, but only if we're not looking for specific prefixes
172
+ potential_names.append(name)
173
+
174
+ return potential_names
File without changes
@@ -0,0 +1,261 @@
1
+ import os
2
+ import re
3
+ import signal
4
+ import sys
5
+ import threading
6
+ import time
7
+ from datetime import datetime, timezone
8
+ from typing import Dict
9
+
10
+ from kubetorch.globals import config
11
+ from kubetorch.logger import get_logger
12
+
13
+ from kubetorch.resources.callables.module import Module
14
+ from kubetorch.resources.compute.compute import Compute
15
+ from kubetorch.resources.compute.utils import ServiceTimeoutError
16
+ from kubetorch.servers.http.utils import is_running_in_kubernetes
17
+ from kubetorch.utils import get_kt_install_url
18
+
19
+ logger = get_logger(__name__)
20
+
21
+
22
+ class App(Module):
23
+ MODULE_TYPE = "app"
24
+
25
+ def __init__(
26
+ self,
27
+ compute: Compute,
28
+ cli_command: str,
29
+ pointers: tuple,
30
+ name: str = None,
31
+ run_async: bool = False,
32
+ ):
33
+ """
34
+ Initialize an App object for remote execution.
35
+
36
+ .. note::
37
+
38
+ To create an App, please use the factory method :func:`app` in conjunction with the `kt run` CLI command.
39
+
40
+ Args:
41
+ compute (Compute): Compute
42
+ cli_command (str): CLI command to run on the compute.
43
+ pointers (tuple): A tuple containing references needed to locate the app file, of the format
44
+ (current working directory, path of file relative to cwd, None)
45
+ name (str, optional): Name to assign the app. If not provided, will be based on the name of the file in
46
+ which the app was defined.
47
+ run_async (bool, optional): Whether to run the app async. (Default: ``False``)
48
+ """
49
+ super().__init__(name=name, pointers=pointers)
50
+ self.cli_command = cli_command
51
+ self.pointers = pointers
52
+ self.name = name or self.module_name
53
+ self._compute = compute
54
+ self._run_async = run_async
55
+ self._remote_pointers = None
56
+
57
+ self._http_client = None
58
+
59
+ @property
60
+ def module_name(self):
61
+ return os.path.splitext(self.pointers[1])[0]
62
+
63
+ def from_name(self):
64
+ raise ValueError("Reloading app is not supported.")
65
+
66
+ def setup_signal_handlers(self):
67
+ signal.signal(signal.SIGINT, self.handle_termination_signal)
68
+ signal.signal(signal.SIGTERM, self.handle_termination_signal)
69
+
70
+ def handle_termination_signal(self, signum, frame):
71
+ red = "\u001b[31m"
72
+ reset = "\u001b[0m"
73
+
74
+ logger.info(
75
+ f"{red}Received {signal.Signals(signum).name}. Exiting parent process.{reset}"
76
+ )
77
+ self._print_kt_cmds()
78
+ sys.exit(0)
79
+
80
+ def deploy(self):
81
+ """
82
+ Deploy the app to the compute specified by the app arguments.
83
+ """
84
+ self.compute.service_name = self.service_name
85
+
86
+ install_url, use_editable = get_kt_install_url(self.compute.freeze)
87
+ if not self.compute.freeze:
88
+ deployment_timestamp = datetime.now(timezone.utc).isoformat()
89
+ self._rsync_repo_and_image_patches(install_url, use_editable, init_args={})
90
+ else:
91
+ deployment_timestamp = None
92
+
93
+ self.setup_signal_handlers()
94
+
95
+ stream_logs = not self._run_async
96
+ self._launch_service(
97
+ install_url, use_editable, deployment_timestamp, stream_logs
98
+ )
99
+
100
+ def _get_service_dockerfile(self, metadata_env_vars):
101
+ image_instructions = super()._get_service_dockerfile(metadata_env_vars)
102
+
103
+ remote_script = os.path.join(self.remote_pointers[0], self.remote_pointers[1])
104
+ local_script = r"\b" + re.escape(self.remote_pointers[1]) + r"\b"
105
+ remote_cmd = re.sub(local_script, remote_script, self.cli_command)
106
+
107
+ image_instructions += f"CMD {remote_cmd}\n"
108
+ return image_instructions
109
+
110
+ def _launch_service(
111
+ self,
112
+ install_url,
113
+ use_editable,
114
+ deployment_timestamp,
115
+ stream_logs,
116
+ ):
117
+ trigger_reload = self.compute.is_up()
118
+ if self._run_async:
119
+ thread = threading.Thread(
120
+ target=super()._launch_service,
121
+ args=(
122
+ install_url,
123
+ use_editable,
124
+ {},
125
+ deployment_timestamp,
126
+ stream_logs,
127
+ config.log_verbosity,
128
+ False,
129
+ ),
130
+ )
131
+ thread.start()
132
+
133
+ if trigger_reload:
134
+ self._update_service(stream_logs, deployment_timestamp)
135
+ time.sleep(1)
136
+ else:
137
+ # wait for pods to be ready before exiting out
138
+ start_time = time.time()
139
+ while not self.compute.is_up() and time.time() - start_time < 60:
140
+ time.sleep(5)
141
+
142
+ if not self.compute.is_up():
143
+ raise ServiceTimeoutError(
144
+ f"Service {self.service_name} is not up after 60 seconds."
145
+ )
146
+ else:
147
+ super()._launch_service(
148
+ install_url,
149
+ use_editable,
150
+ init_args={},
151
+ deployment_timestamp=deployment_timestamp,
152
+ stream_logs=stream_logs,
153
+ verbosity=config.log_verbosity,
154
+ dryrun=False,
155
+ )
156
+
157
+ if trigger_reload:
158
+ self._update_service(stream_logs, deployment_timestamp)
159
+
160
+ def _update_service(self, stream_logs, deployment_timestamp):
161
+ client = self._client()
162
+
163
+ if self._run_async:
164
+ thread = threading.Thread(
165
+ target=client.call_method,
166
+ args=(
167
+ self.endpoint(),
168
+ stream_logs,
169
+ ),
170
+ kwargs={"headers": {"X-Deployed-As-Of": deployment_timestamp}},
171
+ )
172
+ thread.start()
173
+ time.sleep(1)
174
+ sys.exit()
175
+ else:
176
+ client.call_method(
177
+ self.endpoint(),
178
+ stream_logs=stream_logs,
179
+ headers={"X-Deployed-As-Of": deployment_timestamp},
180
+ )
181
+
182
+ def _print_kt_cmds(self):
183
+ logger.info(f"To see logs, run: kt logs {self.service_name}.")
184
+ logger.info(f"To teardown service, run: kt teardown {self.service_name}")
185
+
186
+ def endpoint(self):
187
+ return f"{self.base_endpoint}/_reload_image"
188
+
189
+
190
+ def app(
191
+ name: str = None,
192
+ port: int = None,
193
+ health_check: str = None,
194
+ **kwargs: Dict,
195
+ ):
196
+ """
197
+ Builds and deploys an instance of :class:`App`.
198
+
199
+ Args:
200
+ name (str, optional): Name to give the remote app. If not provided, will be based off the name of the file in
201
+ which the app was defined.
202
+ port (int, optional): Server port to expose, if the app starts an HTTP server.
203
+ health_check (str, optional): Health check endpoint, if running a server, to check when server is up and ready.
204
+ **kwargs: Compute kwargs, to define the compute on which to run the app on.
205
+
206
+ Examples:
207
+
208
+ Define the ``kt.app`` object and compute in your Python file:
209
+
210
+ .. code-block:: python
211
+
212
+ import kubetorch as kt
213
+
214
+ # Define the app at the top of the Python file to deploy
215
+ # train.py
216
+ kt.app(name="my-app", image=kt.Image("docker-latest"), cpus="0.01")
217
+
218
+ if __name__ == "__main__":
219
+ ...
220
+
221
+ Deploy and run the app remotely using the ``kt run`` CLI command:
222
+
223
+ .. code-block:: bash
224
+
225
+ kt run python train.py --epochs 5
226
+ kt run fastapi run my_app.py --name fastapi-app
227
+ """
228
+ if not os.getenv("KT_RUN") == "1" or is_running_in_kubernetes():
229
+ return None
230
+
231
+ if name and os.getenv("KT_RUN_NAME") and not (name == os.getenv("KT_RUN_NAME")):
232
+ raise ValueError(
233
+ f"Name mismatch between kt.App definition ({name}) and kt run command ({os.getenv('KT_RUN_NAME')})."
234
+ )
235
+ name = name or os.getenv("KT_RUN_NAME")
236
+ cli_command = os.getenv("KT_RUN_CMD") # set in kt run
237
+ run_async = os.getenv("KT_RUN_ASYNC") == 1
238
+
239
+ env_vars = kwargs.get("env_vars", {})
240
+ if port:
241
+ env_vars["KT_APP_PORT"] = port
242
+ if health_check:
243
+ env_vars["KT_APP_HEALTHCHECK"] = health_check
244
+ kwargs["env_vars"] = env_vars
245
+ compute = Compute(**kwargs)
246
+
247
+ main_file = os.getenv("KT_RUN_FILE") or os.path.abspath(
248
+ sys.modules["__main__"].__file__
249
+ )
250
+ relative_path = os.path.relpath(main_file, os.getcwd())
251
+ pointers = [os.getcwd(), relative_path, None]
252
+ relative_cli_command = re.sub(main_file, relative_path, cli_command)
253
+
254
+ kt_app = App(
255
+ compute=compute,
256
+ cli_command=relative_cli_command,
257
+ pointers=pointers,
258
+ name=name,
259
+ run_async=run_async,
260
+ )
261
+ return kt_app