konduktor-nightly 0.1.0.dev20251128104812__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- konduktor/__init__.py +49 -0
- konduktor/adaptors/__init__.py +0 -0
- konduktor/adaptors/aws.py +221 -0
- konduktor/adaptors/common.py +118 -0
- konduktor/adaptors/gcp.py +126 -0
- konduktor/authentication.py +124 -0
- konduktor/backends/__init__.py +6 -0
- konduktor/backends/backend.py +86 -0
- konduktor/backends/constants.py +21 -0
- konduktor/backends/deployment.py +204 -0
- konduktor/backends/deployment_utils.py +1351 -0
- konduktor/backends/jobset.py +225 -0
- konduktor/backends/jobset_utils.py +726 -0
- konduktor/backends/pod_utils.py +501 -0
- konduktor/check.py +184 -0
- konduktor/cli.py +1945 -0
- konduktor/config.py +420 -0
- konduktor/constants.py +36 -0
- konduktor/controller/__init__.py +0 -0
- konduktor/controller/constants.py +56 -0
- konduktor/controller/launch.py +44 -0
- konduktor/controller/node.py +116 -0
- konduktor/controller/parse.py +111 -0
- konduktor/dashboard/README.md +30 -0
- konduktor/dashboard/backend/main.py +169 -0
- konduktor/dashboard/backend/sockets.py +154 -0
- konduktor/dashboard/frontend/.eslintrc.json +3 -0
- konduktor/dashboard/frontend/.gitignore +36 -0
- konduktor/dashboard/frontend/app/api/jobs/route.js +71 -0
- konduktor/dashboard/frontend/app/api/namespaces/route.js +69 -0
- konduktor/dashboard/frontend/app/components/Grafana.jsx +66 -0
- konduktor/dashboard/frontend/app/components/JobsData.jsx +197 -0
- konduktor/dashboard/frontend/app/components/LogsData.jsx +139 -0
- konduktor/dashboard/frontend/app/components/NavMenu.jsx +39 -0
- konduktor/dashboard/frontend/app/components/NavTabs.jsx +73 -0
- konduktor/dashboard/frontend/app/components/NavTabs2.jsx +30 -0
- konduktor/dashboard/frontend/app/components/SelectBtn.jsx +27 -0
- konduktor/dashboard/frontend/app/components/lib/utils.js +6 -0
- konduktor/dashboard/frontend/app/components/ui/chip-select.jsx +78 -0
- konduktor/dashboard/frontend/app/components/ui/input.jsx +19 -0
- konduktor/dashboard/frontend/app/components/ui/navigation-menu.jsx +104 -0
- konduktor/dashboard/frontend/app/components/ui/select.jsx +120 -0
- konduktor/dashboard/frontend/app/favicon.ico +0 -0
- konduktor/dashboard/frontend/app/globals.css +120 -0
- konduktor/dashboard/frontend/app/jobs/page.js +10 -0
- konduktor/dashboard/frontend/app/layout.js +22 -0
- konduktor/dashboard/frontend/app/logs/page.js +11 -0
- konduktor/dashboard/frontend/app/page.js +12 -0
- konduktor/dashboard/frontend/jsconfig.json +7 -0
- konduktor/dashboard/frontend/next.config.mjs +4 -0
- konduktor/dashboard/frontend/package-lock.json +6687 -0
- konduktor/dashboard/frontend/package.json +37 -0
- konduktor/dashboard/frontend/postcss.config.mjs +8 -0
- konduktor/dashboard/frontend/server.js +64 -0
- konduktor/dashboard/frontend/tailwind.config.js +17 -0
- konduktor/data/__init__.py +9 -0
- konduktor/data/aws/__init__.py +15 -0
- konduktor/data/aws/s3.py +1138 -0
- konduktor/data/constants.py +7 -0
- konduktor/data/data_utils.py +268 -0
- konduktor/data/gcp/__init__.py +19 -0
- konduktor/data/gcp/constants.py +42 -0
- konduktor/data/gcp/gcs.py +994 -0
- konduktor/data/gcp/utils.py +9 -0
- konduktor/data/registry.py +19 -0
- konduktor/data/storage.py +812 -0
- konduktor/data/storage_utils.py +535 -0
- konduktor/execution.py +447 -0
- konduktor/kube_client.py +237 -0
- konduktor/logging.py +111 -0
- konduktor/manifests/aibrix-setup.yaml +430 -0
- konduktor/manifests/apoxy-setup.yaml +184 -0
- konduktor/manifests/apoxy-setup2.yaml +98 -0
- konduktor/manifests/controller_deployment.yaml +69 -0
- konduktor/manifests/dashboard_deployment.yaml +131 -0
- konduktor/manifests/dmesg_daemonset.yaml +57 -0
- konduktor/manifests/pod_cleanup_controller.yaml +129 -0
- konduktor/resource.py +546 -0
- konduktor/serving.py +153 -0
- konduktor/task.py +949 -0
- konduktor/templates/deployment.yaml.j2 +191 -0
- konduktor/templates/jobset.yaml.j2 +43 -0
- konduktor/templates/pod.yaml.j2 +563 -0
- konduktor/usage/__init__.py +0 -0
- konduktor/usage/constants.py +21 -0
- konduktor/utils/__init__.py +0 -0
- konduktor/utils/accelerator_registry.py +17 -0
- konduktor/utils/annotations.py +62 -0
- konduktor/utils/base64_utils.py +95 -0
- konduktor/utils/common_utils.py +426 -0
- konduktor/utils/constants.py +5 -0
- konduktor/utils/env_options.py +55 -0
- konduktor/utils/exceptions.py +234 -0
- konduktor/utils/kubernetes_enums.py +8 -0
- konduktor/utils/kubernetes_utils.py +763 -0
- konduktor/utils/log_utils.py +467 -0
- konduktor/utils/loki_utils.py +102 -0
- konduktor/utils/rich_utils.py +123 -0
- konduktor/utils/schemas.py +625 -0
- konduktor/utils/subprocess_utils.py +273 -0
- konduktor/utils/ux_utils.py +247 -0
- konduktor/utils/validator.py +461 -0
- konduktor_nightly-0.1.0.dev20251128104812.dist-info/LICENSE +91 -0
- konduktor_nightly-0.1.0.dev20251128104812.dist-info/METADATA +98 -0
- konduktor_nightly-0.1.0.dev20251128104812.dist-info/RECORD +107 -0
- konduktor_nightly-0.1.0.dev20251128104812.dist-info/WHEEL +4 -0
- konduktor_nightly-0.1.0.dev20251128104812.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,234 @@
|
|
|
1
|
+
# Proprietary Changes made for Trainy under the Trainy Software License
|
|
2
|
+
# Original source: skypilot: https://github.com/skypilot-org/skypilot
|
|
3
|
+
# which is Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
7
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
8
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
9
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
10
|
+
# See the License for the specific language governing permissions and
|
|
11
|
+
# limitations under the License.
|
|
12
|
+
|
|
13
|
+
"""Exceptions."""
|
|
14
|
+
|
|
15
|
+
import builtins
|
|
16
|
+
import traceback
|
|
17
|
+
import types
|
|
18
|
+
from typing import Any, Dict
|
|
19
|
+
|
|
20
|
+
# Return code for keyboard interruption and SIGTSTP
|
|
21
|
+
KEYBOARD_INTERRUPT_CODE = 130
|
|
22
|
+
SIGTSTP_CODE = 146
|
|
23
|
+
RSYNC_FILE_NOT_FOUND_CODE = 23
|
|
24
|
+
# Arbitrarily chosen value. Used in SkyPilot's storage mounting scripts
|
|
25
|
+
MOUNT_PATH_NON_EMPTY_CODE = 42
|
|
26
|
+
# Arbitrarily chosen value. Used to provision Kubernetes instance in Skypilot
|
|
27
|
+
INSUFFICIENT_PRIVILEGES_CODE = 52
|
|
28
|
+
# Return code when git command is ran in a dir that is not git repo
|
|
29
|
+
GIT_FATAL_EXIT_CODE = 128
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def is_safe_exception(exc: Exception) -> bool:
|
|
33
|
+
"""Returns True if the exception is safe to send to clients.
|
|
34
|
+
|
|
35
|
+
Safe exceptions are:
|
|
36
|
+
1. Built-in exceptions
|
|
37
|
+
2. Konduktor's own exceptions
|
|
38
|
+
"""
|
|
39
|
+
module = type(exc).__module__
|
|
40
|
+
|
|
41
|
+
# Builtin exceptions (e.g., ValueError, RuntimeError)
|
|
42
|
+
if module == 'builtins':
|
|
43
|
+
return True
|
|
44
|
+
|
|
45
|
+
# Konduktor's own exceptions
|
|
46
|
+
if module.startswith('konduktor.'):
|
|
47
|
+
return True
|
|
48
|
+
|
|
49
|
+
return False
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def wrap_exception(exc: Exception) -> Exception:
|
|
53
|
+
"""Wraps non-safe exceptions into Konduktor exceptions
|
|
54
|
+
|
|
55
|
+
This is used to wrap exceptions that are not safe to deserialize at clients.
|
|
56
|
+
|
|
57
|
+
Examples include exceptions from cloud providers whose packages are not
|
|
58
|
+
available at clients.
|
|
59
|
+
"""
|
|
60
|
+
if is_safe_exception(exc):
|
|
61
|
+
return exc
|
|
62
|
+
|
|
63
|
+
return CloudError(
|
|
64
|
+
message=str(exc),
|
|
65
|
+
cloud_provider=type(exc).__module__.split('.')[0],
|
|
66
|
+
error_type=type(exc).__name__,
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def serialize_exception(e: Exception) -> Dict[str, Any]:
|
|
71
|
+
"""Serialize the exception.
|
|
72
|
+
|
|
73
|
+
This function also wraps any unsafe exceptions (e.g., cloud exceptions)
|
|
74
|
+
into Konduktor's CloudError before serialization to ensure clients can
|
|
75
|
+
deserialize them without needing cloud provider packages installed.
|
|
76
|
+
"""
|
|
77
|
+
# Wrap unsafe exceptions before serialization
|
|
78
|
+
e = wrap_exception(e)
|
|
79
|
+
|
|
80
|
+
stacktrace = getattr(e, 'stacktrace', None)
|
|
81
|
+
attributes = e.__dict__.copy()
|
|
82
|
+
if 'stacktrace' in attributes:
|
|
83
|
+
del attributes['stacktrace']
|
|
84
|
+
for attr_k in list(attributes.keys()):
|
|
85
|
+
attr_v = attributes[attr_k]
|
|
86
|
+
if isinstance(attr_v, types.TracebackType):
|
|
87
|
+
attributes[attr_k] = traceback.format_tb(attr_v)
|
|
88
|
+
|
|
89
|
+
data = {
|
|
90
|
+
'type': e.__class__.__name__,
|
|
91
|
+
'message': str(e),
|
|
92
|
+
'args': e.args,
|
|
93
|
+
'attributes': attributes,
|
|
94
|
+
'stacktrace': stacktrace,
|
|
95
|
+
}
|
|
96
|
+
return data
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def deserialize_exception(serialized: Dict[str, Any]) -> Exception:
|
|
100
|
+
"""Deserialize the exception."""
|
|
101
|
+
exception_type = serialized['type']
|
|
102
|
+
if hasattr(builtins, exception_type):
|
|
103
|
+
exception_class = getattr(builtins, exception_type)
|
|
104
|
+
else:
|
|
105
|
+
exception_class = globals().get(exception_type, None)
|
|
106
|
+
if exception_class is None:
|
|
107
|
+
# Unknown exception type.
|
|
108
|
+
return Exception(f'{exception_type}: {serialized["message"]}')
|
|
109
|
+
e = exception_class(*serialized['args'], **serialized['attributes'])
|
|
110
|
+
if serialized['stacktrace'] is not None:
|
|
111
|
+
setattr(e, 'stacktrace', serialized['stacktrace'])
|
|
112
|
+
return e
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
class CloudError(Exception):
|
|
116
|
+
"""Wraps cloud-specific errors into a SkyPilot exception."""
|
|
117
|
+
|
|
118
|
+
def __init__(self, message: str, cloud_provider: str, error_type: str):
|
|
119
|
+
super().__init__(message)
|
|
120
|
+
self.cloud_provider = cloud_provider
|
|
121
|
+
self.error_type = error_type
|
|
122
|
+
|
|
123
|
+
def __str__(self):
|
|
124
|
+
return (
|
|
125
|
+
f'{self.cloud_provider} error ({self.error_type}): ' f'{super().__str__()}'
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
class CommandError(Exception):
|
|
130
|
+
pass
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
class CreateSecretError(Exception):
|
|
134
|
+
pass
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
class MissingSecretError(Exception):
|
|
138
|
+
pass
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
class NotSupportedError(Exception):
|
|
142
|
+
"""Raised when a feature is not supported."""
|
|
143
|
+
|
|
144
|
+
pass
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
class StorageError(Exception):
|
|
148
|
+
pass
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
class StorageSpecError(ValueError):
|
|
152
|
+
# Errors raised due to invalid specification of the Storage object
|
|
153
|
+
pass
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
class StorageInitError(StorageError):
|
|
157
|
+
# Error raised when Initialization fails - either due to permissions,
|
|
158
|
+
# unavailable name, or other reasons.
|
|
159
|
+
pass
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
class StorageBucketCreateError(StorageInitError):
|
|
163
|
+
# Error raised when bucket creation fails.
|
|
164
|
+
pass
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
class StorageBucketGetError(StorageInitError):
|
|
168
|
+
# Error raised if attempt to fetch an existing bucket fails.
|
|
169
|
+
pass
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
class StorageBucketDeleteError(StorageError):
|
|
173
|
+
# Error raised if attempt to delete an existing bucket fails.
|
|
174
|
+
pass
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
class StorageUploadError(StorageError):
|
|
178
|
+
# Error raised when bucket is successfully initialized, but upload fails,
|
|
179
|
+
# either due to permissions, ctrl-c, or other reasons.
|
|
180
|
+
pass
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
class StorageSourceError(StorageSpecError):
|
|
184
|
+
# Error raised when the source of the storage is invalid. E.g., does not
|
|
185
|
+
# exist, malformed path, or other reasons.
|
|
186
|
+
pass
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
class StorageNameError(StorageSpecError):
|
|
190
|
+
# Error raised when the source of the storage is invalid. E.g., does not
|
|
191
|
+
# exist, malformed path, or other reasons.
|
|
192
|
+
pass
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
class StorageModeError(StorageSpecError):
|
|
196
|
+
# Error raised when the storage mode is invalid or does not support the
|
|
197
|
+
# requested operation (e.g., passing a file as source to MOUNT mode)
|
|
198
|
+
pass
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
class StorageExternalDeletionError(StorageBucketGetError):
|
|
202
|
+
# Error raised when the bucket is attempted to be fetched while it has been
|
|
203
|
+
# deleted externally.
|
|
204
|
+
pass
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
class NonExistentStorageAccountError(StorageExternalDeletionError):
|
|
208
|
+
# Error raise when storage account provided through config.yaml or read
|
|
209
|
+
# from store handle(local db) does not exist.
|
|
210
|
+
pass
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
class NetworkError(Exception):
|
|
214
|
+
"""Raised when network fails."""
|
|
215
|
+
|
|
216
|
+
pass
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
class CloudUserIdentityError(Exception):
|
|
220
|
+
"""Raised when the cloud identity is invalid."""
|
|
221
|
+
|
|
222
|
+
pass
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
class ClusterOwnerIdentityMismatchError(Exception):
|
|
226
|
+
"""The cluster's owner identity does not match the current user identity."""
|
|
227
|
+
|
|
228
|
+
pass
|
|
229
|
+
|
|
230
|
+
|
|
231
|
+
class NoCloudAccessError(Exception):
|
|
232
|
+
"""Raised when all clouds are disabled."""
|
|
233
|
+
|
|
234
|
+
pass
|