konduktor-nightly 0.1.0.dev20251128104812__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. konduktor/__init__.py +49 -0
  2. konduktor/adaptors/__init__.py +0 -0
  3. konduktor/adaptors/aws.py +221 -0
  4. konduktor/adaptors/common.py +118 -0
  5. konduktor/adaptors/gcp.py +126 -0
  6. konduktor/authentication.py +124 -0
  7. konduktor/backends/__init__.py +6 -0
  8. konduktor/backends/backend.py +86 -0
  9. konduktor/backends/constants.py +21 -0
  10. konduktor/backends/deployment.py +204 -0
  11. konduktor/backends/deployment_utils.py +1351 -0
  12. konduktor/backends/jobset.py +225 -0
  13. konduktor/backends/jobset_utils.py +726 -0
  14. konduktor/backends/pod_utils.py +501 -0
  15. konduktor/check.py +184 -0
  16. konduktor/cli.py +1945 -0
  17. konduktor/config.py +420 -0
  18. konduktor/constants.py +36 -0
  19. konduktor/controller/__init__.py +0 -0
  20. konduktor/controller/constants.py +56 -0
  21. konduktor/controller/launch.py +44 -0
  22. konduktor/controller/node.py +116 -0
  23. konduktor/controller/parse.py +111 -0
  24. konduktor/dashboard/README.md +30 -0
  25. konduktor/dashboard/backend/main.py +169 -0
  26. konduktor/dashboard/backend/sockets.py +154 -0
  27. konduktor/dashboard/frontend/.eslintrc.json +3 -0
  28. konduktor/dashboard/frontend/.gitignore +36 -0
  29. konduktor/dashboard/frontend/app/api/jobs/route.js +71 -0
  30. konduktor/dashboard/frontend/app/api/namespaces/route.js +69 -0
  31. konduktor/dashboard/frontend/app/components/Grafana.jsx +66 -0
  32. konduktor/dashboard/frontend/app/components/JobsData.jsx +197 -0
  33. konduktor/dashboard/frontend/app/components/LogsData.jsx +139 -0
  34. konduktor/dashboard/frontend/app/components/NavMenu.jsx +39 -0
  35. konduktor/dashboard/frontend/app/components/NavTabs.jsx +73 -0
  36. konduktor/dashboard/frontend/app/components/NavTabs2.jsx +30 -0
  37. konduktor/dashboard/frontend/app/components/SelectBtn.jsx +27 -0
  38. konduktor/dashboard/frontend/app/components/lib/utils.js +6 -0
  39. konduktor/dashboard/frontend/app/components/ui/chip-select.jsx +78 -0
  40. konduktor/dashboard/frontend/app/components/ui/input.jsx +19 -0
  41. konduktor/dashboard/frontend/app/components/ui/navigation-menu.jsx +104 -0
  42. konduktor/dashboard/frontend/app/components/ui/select.jsx +120 -0
  43. konduktor/dashboard/frontend/app/favicon.ico +0 -0
  44. konduktor/dashboard/frontend/app/globals.css +120 -0
  45. konduktor/dashboard/frontend/app/jobs/page.js +10 -0
  46. konduktor/dashboard/frontend/app/layout.js +22 -0
  47. konduktor/dashboard/frontend/app/logs/page.js +11 -0
  48. konduktor/dashboard/frontend/app/page.js +12 -0
  49. konduktor/dashboard/frontend/jsconfig.json +7 -0
  50. konduktor/dashboard/frontend/next.config.mjs +4 -0
  51. konduktor/dashboard/frontend/package-lock.json +6687 -0
  52. konduktor/dashboard/frontend/package.json +37 -0
  53. konduktor/dashboard/frontend/postcss.config.mjs +8 -0
  54. konduktor/dashboard/frontend/server.js +64 -0
  55. konduktor/dashboard/frontend/tailwind.config.js +17 -0
  56. konduktor/data/__init__.py +9 -0
  57. konduktor/data/aws/__init__.py +15 -0
  58. konduktor/data/aws/s3.py +1138 -0
  59. konduktor/data/constants.py +7 -0
  60. konduktor/data/data_utils.py +268 -0
  61. konduktor/data/gcp/__init__.py +19 -0
  62. konduktor/data/gcp/constants.py +42 -0
  63. konduktor/data/gcp/gcs.py +994 -0
  64. konduktor/data/gcp/utils.py +9 -0
  65. konduktor/data/registry.py +19 -0
  66. konduktor/data/storage.py +812 -0
  67. konduktor/data/storage_utils.py +535 -0
  68. konduktor/execution.py +447 -0
  69. konduktor/kube_client.py +237 -0
  70. konduktor/logging.py +111 -0
  71. konduktor/manifests/aibrix-setup.yaml +430 -0
  72. konduktor/manifests/apoxy-setup.yaml +184 -0
  73. konduktor/manifests/apoxy-setup2.yaml +98 -0
  74. konduktor/manifests/controller_deployment.yaml +69 -0
  75. konduktor/manifests/dashboard_deployment.yaml +131 -0
  76. konduktor/manifests/dmesg_daemonset.yaml +57 -0
  77. konduktor/manifests/pod_cleanup_controller.yaml +129 -0
  78. konduktor/resource.py +546 -0
  79. konduktor/serving.py +153 -0
  80. konduktor/task.py +949 -0
  81. konduktor/templates/deployment.yaml.j2 +191 -0
  82. konduktor/templates/jobset.yaml.j2 +43 -0
  83. konduktor/templates/pod.yaml.j2 +563 -0
  84. konduktor/usage/__init__.py +0 -0
  85. konduktor/usage/constants.py +21 -0
  86. konduktor/utils/__init__.py +0 -0
  87. konduktor/utils/accelerator_registry.py +17 -0
  88. konduktor/utils/annotations.py +62 -0
  89. konduktor/utils/base64_utils.py +95 -0
  90. konduktor/utils/common_utils.py +426 -0
  91. konduktor/utils/constants.py +5 -0
  92. konduktor/utils/env_options.py +55 -0
  93. konduktor/utils/exceptions.py +234 -0
  94. konduktor/utils/kubernetes_enums.py +8 -0
  95. konduktor/utils/kubernetes_utils.py +763 -0
  96. konduktor/utils/log_utils.py +467 -0
  97. konduktor/utils/loki_utils.py +102 -0
  98. konduktor/utils/rich_utils.py +123 -0
  99. konduktor/utils/schemas.py +625 -0
  100. konduktor/utils/subprocess_utils.py +273 -0
  101. konduktor/utils/ux_utils.py +247 -0
  102. konduktor/utils/validator.py +461 -0
  103. konduktor_nightly-0.1.0.dev20251128104812.dist-info/LICENSE +91 -0
  104. konduktor_nightly-0.1.0.dev20251128104812.dist-info/METADATA +98 -0
  105. konduktor_nightly-0.1.0.dev20251128104812.dist-info/RECORD +107 -0
  106. konduktor_nightly-0.1.0.dev20251128104812.dist-info/WHEEL +4 -0
  107. konduktor_nightly-0.1.0.dev20251128104812.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,234 @@
1
+ # Proprietary Changes made for Trainy under the Trainy Software License
2
+ # Original source: skypilot: https://github.com/skypilot-org/skypilot
3
+ # which is Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+ """Exceptions."""
14
+
15
+ import builtins
16
+ import traceback
17
+ import types
18
+ from typing import Any, Dict
19
+
20
+ # Return code for keyboard interruption and SIGTSTP
21
+ KEYBOARD_INTERRUPT_CODE = 130
22
+ SIGTSTP_CODE = 146
23
+ RSYNC_FILE_NOT_FOUND_CODE = 23
24
+ # Arbitrarily chosen value. Used in SkyPilot's storage mounting scripts
25
+ MOUNT_PATH_NON_EMPTY_CODE = 42
26
+ # Arbitrarily chosen value. Used to provision Kubernetes instance in Skypilot
27
+ INSUFFICIENT_PRIVILEGES_CODE = 52
28
+ # Return code when git command is ran in a dir that is not git repo
29
+ GIT_FATAL_EXIT_CODE = 128
30
+
31
+
32
+ def is_safe_exception(exc: Exception) -> bool:
33
+ """Returns True if the exception is safe to send to clients.
34
+
35
+ Safe exceptions are:
36
+ 1. Built-in exceptions
37
+ 2. Konduktor's own exceptions
38
+ """
39
+ module = type(exc).__module__
40
+
41
+ # Builtin exceptions (e.g., ValueError, RuntimeError)
42
+ if module == 'builtins':
43
+ return True
44
+
45
+ # Konduktor's own exceptions
46
+ if module.startswith('konduktor.'):
47
+ return True
48
+
49
+ return False
50
+
51
+
52
+ def wrap_exception(exc: Exception) -> Exception:
53
+ """Wraps non-safe exceptions into Konduktor exceptions
54
+
55
+ This is used to wrap exceptions that are not safe to deserialize at clients.
56
+
57
+ Examples include exceptions from cloud providers whose packages are not
58
+ available at clients.
59
+ """
60
+ if is_safe_exception(exc):
61
+ return exc
62
+
63
+ return CloudError(
64
+ message=str(exc),
65
+ cloud_provider=type(exc).__module__.split('.')[0],
66
+ error_type=type(exc).__name__,
67
+ )
68
+
69
+
70
+ def serialize_exception(e: Exception) -> Dict[str, Any]:
71
+ """Serialize the exception.
72
+
73
+ This function also wraps any unsafe exceptions (e.g., cloud exceptions)
74
+ into Konduktor's CloudError before serialization to ensure clients can
75
+ deserialize them without needing cloud provider packages installed.
76
+ """
77
+ # Wrap unsafe exceptions before serialization
78
+ e = wrap_exception(e)
79
+
80
+ stacktrace = getattr(e, 'stacktrace', None)
81
+ attributes = e.__dict__.copy()
82
+ if 'stacktrace' in attributes:
83
+ del attributes['stacktrace']
84
+ for attr_k in list(attributes.keys()):
85
+ attr_v = attributes[attr_k]
86
+ if isinstance(attr_v, types.TracebackType):
87
+ attributes[attr_k] = traceback.format_tb(attr_v)
88
+
89
+ data = {
90
+ 'type': e.__class__.__name__,
91
+ 'message': str(e),
92
+ 'args': e.args,
93
+ 'attributes': attributes,
94
+ 'stacktrace': stacktrace,
95
+ }
96
+ return data
97
+
98
+
99
+ def deserialize_exception(serialized: Dict[str, Any]) -> Exception:
100
+ """Deserialize the exception."""
101
+ exception_type = serialized['type']
102
+ if hasattr(builtins, exception_type):
103
+ exception_class = getattr(builtins, exception_type)
104
+ else:
105
+ exception_class = globals().get(exception_type, None)
106
+ if exception_class is None:
107
+ # Unknown exception type.
108
+ return Exception(f'{exception_type}: {serialized["message"]}')
109
+ e = exception_class(*serialized['args'], **serialized['attributes'])
110
+ if serialized['stacktrace'] is not None:
111
+ setattr(e, 'stacktrace', serialized['stacktrace'])
112
+ return e
113
+
114
+
115
+ class CloudError(Exception):
116
+ """Wraps cloud-specific errors into a SkyPilot exception."""
117
+
118
+ def __init__(self, message: str, cloud_provider: str, error_type: str):
119
+ super().__init__(message)
120
+ self.cloud_provider = cloud_provider
121
+ self.error_type = error_type
122
+
123
+ def __str__(self):
124
+ return (
125
+ f'{self.cloud_provider} error ({self.error_type}): ' f'{super().__str__()}'
126
+ )
127
+
128
+
129
+ class CommandError(Exception):
130
+ pass
131
+
132
+
133
+ class CreateSecretError(Exception):
134
+ pass
135
+
136
+
137
+ class MissingSecretError(Exception):
138
+ pass
139
+
140
+
141
+ class NotSupportedError(Exception):
142
+ """Raised when a feature is not supported."""
143
+
144
+ pass
145
+
146
+
147
+ class StorageError(Exception):
148
+ pass
149
+
150
+
151
+ class StorageSpecError(ValueError):
152
+ # Errors raised due to invalid specification of the Storage object
153
+ pass
154
+
155
+
156
+ class StorageInitError(StorageError):
157
+ # Error raised when Initialization fails - either due to permissions,
158
+ # unavailable name, or other reasons.
159
+ pass
160
+
161
+
162
+ class StorageBucketCreateError(StorageInitError):
163
+ # Error raised when bucket creation fails.
164
+ pass
165
+
166
+
167
+ class StorageBucketGetError(StorageInitError):
168
+ # Error raised if attempt to fetch an existing bucket fails.
169
+ pass
170
+
171
+
172
+ class StorageBucketDeleteError(StorageError):
173
+ # Error raised if attempt to delete an existing bucket fails.
174
+ pass
175
+
176
+
177
+ class StorageUploadError(StorageError):
178
+ # Error raised when bucket is successfully initialized, but upload fails,
179
+ # either due to permissions, ctrl-c, or other reasons.
180
+ pass
181
+
182
+
183
+ class StorageSourceError(StorageSpecError):
184
+ # Error raised when the source of the storage is invalid. E.g., does not
185
+ # exist, malformed path, or other reasons.
186
+ pass
187
+
188
+
189
+ class StorageNameError(StorageSpecError):
190
+ # Error raised when the source of the storage is invalid. E.g., does not
191
+ # exist, malformed path, or other reasons.
192
+ pass
193
+
194
+
195
+ class StorageModeError(StorageSpecError):
196
+ # Error raised when the storage mode is invalid or does not support the
197
+ # requested operation (e.g., passing a file as source to MOUNT mode)
198
+ pass
199
+
200
+
201
+ class StorageExternalDeletionError(StorageBucketGetError):
202
+ # Error raised when the bucket is attempted to be fetched while it has been
203
+ # deleted externally.
204
+ pass
205
+
206
+
207
+ class NonExistentStorageAccountError(StorageExternalDeletionError):
208
+ # Error raise when storage account provided through config.yaml or read
209
+ # from store handle(local db) does not exist.
210
+ pass
211
+
212
+
213
+ class NetworkError(Exception):
214
+ """Raised when network fails."""
215
+
216
+ pass
217
+
218
+
219
+ class CloudUserIdentityError(Exception):
220
+ """Raised when the cloud identity is invalid."""
221
+
222
+ pass
223
+
224
+
225
+ class ClusterOwnerIdentityMismatchError(Exception):
226
+ """The cluster's owner identity does not match the current user identity."""
227
+
228
+ pass
229
+
230
+
231
+ class NoCloudAccessError(Exception):
232
+ """Raised when all clouds are disabled."""
233
+
234
+ pass
@@ -0,0 +1,8 @@
1
+ import enum
2
+
3
+
4
+ class KubernetesAutoscalerType(enum.Enum):
5
+ """Enum for the different types of cluster autoscalers for Kubernetes."""
6
+
7
+ GKE = 'gke'
8
+ GENERIC = 'generic'