konduktor-nightly 0.1.0.dev20251128104812__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. konduktor/__init__.py +49 -0
  2. konduktor/adaptors/__init__.py +0 -0
  3. konduktor/adaptors/aws.py +221 -0
  4. konduktor/adaptors/common.py +118 -0
  5. konduktor/adaptors/gcp.py +126 -0
  6. konduktor/authentication.py +124 -0
  7. konduktor/backends/__init__.py +6 -0
  8. konduktor/backends/backend.py +86 -0
  9. konduktor/backends/constants.py +21 -0
  10. konduktor/backends/deployment.py +204 -0
  11. konduktor/backends/deployment_utils.py +1351 -0
  12. konduktor/backends/jobset.py +225 -0
  13. konduktor/backends/jobset_utils.py +726 -0
  14. konduktor/backends/pod_utils.py +501 -0
  15. konduktor/check.py +184 -0
  16. konduktor/cli.py +1945 -0
  17. konduktor/config.py +420 -0
  18. konduktor/constants.py +36 -0
  19. konduktor/controller/__init__.py +0 -0
  20. konduktor/controller/constants.py +56 -0
  21. konduktor/controller/launch.py +44 -0
  22. konduktor/controller/node.py +116 -0
  23. konduktor/controller/parse.py +111 -0
  24. konduktor/dashboard/README.md +30 -0
  25. konduktor/dashboard/backend/main.py +169 -0
  26. konduktor/dashboard/backend/sockets.py +154 -0
  27. konduktor/dashboard/frontend/.eslintrc.json +3 -0
  28. konduktor/dashboard/frontend/.gitignore +36 -0
  29. konduktor/dashboard/frontend/app/api/jobs/route.js +71 -0
  30. konduktor/dashboard/frontend/app/api/namespaces/route.js +69 -0
  31. konduktor/dashboard/frontend/app/components/Grafana.jsx +66 -0
  32. konduktor/dashboard/frontend/app/components/JobsData.jsx +197 -0
  33. konduktor/dashboard/frontend/app/components/LogsData.jsx +139 -0
  34. konduktor/dashboard/frontend/app/components/NavMenu.jsx +39 -0
  35. konduktor/dashboard/frontend/app/components/NavTabs.jsx +73 -0
  36. konduktor/dashboard/frontend/app/components/NavTabs2.jsx +30 -0
  37. konduktor/dashboard/frontend/app/components/SelectBtn.jsx +27 -0
  38. konduktor/dashboard/frontend/app/components/lib/utils.js +6 -0
  39. konduktor/dashboard/frontend/app/components/ui/chip-select.jsx +78 -0
  40. konduktor/dashboard/frontend/app/components/ui/input.jsx +19 -0
  41. konduktor/dashboard/frontend/app/components/ui/navigation-menu.jsx +104 -0
  42. konduktor/dashboard/frontend/app/components/ui/select.jsx +120 -0
  43. konduktor/dashboard/frontend/app/favicon.ico +0 -0
  44. konduktor/dashboard/frontend/app/globals.css +120 -0
  45. konduktor/dashboard/frontend/app/jobs/page.js +10 -0
  46. konduktor/dashboard/frontend/app/layout.js +22 -0
  47. konduktor/dashboard/frontend/app/logs/page.js +11 -0
  48. konduktor/dashboard/frontend/app/page.js +12 -0
  49. konduktor/dashboard/frontend/jsconfig.json +7 -0
  50. konduktor/dashboard/frontend/next.config.mjs +4 -0
  51. konduktor/dashboard/frontend/package-lock.json +6687 -0
  52. konduktor/dashboard/frontend/package.json +37 -0
  53. konduktor/dashboard/frontend/postcss.config.mjs +8 -0
  54. konduktor/dashboard/frontend/server.js +64 -0
  55. konduktor/dashboard/frontend/tailwind.config.js +17 -0
  56. konduktor/data/__init__.py +9 -0
  57. konduktor/data/aws/__init__.py +15 -0
  58. konduktor/data/aws/s3.py +1138 -0
  59. konduktor/data/constants.py +7 -0
  60. konduktor/data/data_utils.py +268 -0
  61. konduktor/data/gcp/__init__.py +19 -0
  62. konduktor/data/gcp/constants.py +42 -0
  63. konduktor/data/gcp/gcs.py +994 -0
  64. konduktor/data/gcp/utils.py +9 -0
  65. konduktor/data/registry.py +19 -0
  66. konduktor/data/storage.py +812 -0
  67. konduktor/data/storage_utils.py +535 -0
  68. konduktor/execution.py +447 -0
  69. konduktor/kube_client.py +237 -0
  70. konduktor/logging.py +111 -0
  71. konduktor/manifests/aibrix-setup.yaml +430 -0
  72. konduktor/manifests/apoxy-setup.yaml +184 -0
  73. konduktor/manifests/apoxy-setup2.yaml +98 -0
  74. konduktor/manifests/controller_deployment.yaml +69 -0
  75. konduktor/manifests/dashboard_deployment.yaml +131 -0
  76. konduktor/manifests/dmesg_daemonset.yaml +57 -0
  77. konduktor/manifests/pod_cleanup_controller.yaml +129 -0
  78. konduktor/resource.py +546 -0
  79. konduktor/serving.py +153 -0
  80. konduktor/task.py +949 -0
  81. konduktor/templates/deployment.yaml.j2 +191 -0
  82. konduktor/templates/jobset.yaml.j2 +43 -0
  83. konduktor/templates/pod.yaml.j2 +563 -0
  84. konduktor/usage/__init__.py +0 -0
  85. konduktor/usage/constants.py +21 -0
  86. konduktor/utils/__init__.py +0 -0
  87. konduktor/utils/accelerator_registry.py +17 -0
  88. konduktor/utils/annotations.py +62 -0
  89. konduktor/utils/base64_utils.py +95 -0
  90. konduktor/utils/common_utils.py +426 -0
  91. konduktor/utils/constants.py +5 -0
  92. konduktor/utils/env_options.py +55 -0
  93. konduktor/utils/exceptions.py +234 -0
  94. konduktor/utils/kubernetes_enums.py +8 -0
  95. konduktor/utils/kubernetes_utils.py +763 -0
  96. konduktor/utils/log_utils.py +467 -0
  97. konduktor/utils/loki_utils.py +102 -0
  98. konduktor/utils/rich_utils.py +123 -0
  99. konduktor/utils/schemas.py +625 -0
  100. konduktor/utils/subprocess_utils.py +273 -0
  101. konduktor/utils/ux_utils.py +247 -0
  102. konduktor/utils/validator.py +461 -0
  103. konduktor_nightly-0.1.0.dev20251128104812.dist-info/LICENSE +91 -0
  104. konduktor_nightly-0.1.0.dev20251128104812.dist-info/METADATA +98 -0
  105. konduktor_nightly-0.1.0.dev20251128104812.dist-info/RECORD +107 -0
  106. konduktor_nightly-0.1.0.dev20251128104812.dist-info/WHEEL +4 -0
  107. konduktor_nightly-0.1.0.dev20251128104812.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,95 @@
1
+ """
2
+ Utility for (un)zip and encode/decoding k8s secrets in base64
3
+ """
4
+
5
+ import base64
6
+ import os
7
+ import shutil
8
+ import tempfile
9
+ import zipfile
10
+ from typing import List
11
+
12
+
13
+ def zip_base64encode(files: List[str]) -> str:
14
+ """Zips files and encodes them in base64.
15
+
16
+ Args:
17
+ files: List of file paths to zip. Can include files and directories.
18
+
19
+ Returns:
20
+ Base64 encoded string of the zipped files.
21
+ """
22
+ with tempfile.TemporaryDirectory() as temp_dir:
23
+ # Copy all files/directories to temp dir preserving structure
24
+ for file_path in files:
25
+ src_path = os.path.expanduser(file_path)
26
+ if not os.path.exists(src_path):
27
+ continue
28
+ dst_path = os.path.join(temp_dir, os.path.basename(file_path))
29
+
30
+ if os.path.isdir(src_path):
31
+ shutil.copytree(src_path, dst_path)
32
+ else:
33
+ shutil.copy2(src_path, dst_path)
34
+
35
+ # Create zip file
36
+ zip_path = os.path.join(temp_dir, 'archive.zip')
37
+ with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
38
+ for item in os.listdir(temp_dir):
39
+ if item == 'archive.zip':
40
+ continue
41
+ item_path = os.path.join(temp_dir, item)
42
+ if os.path.isfile(item_path):
43
+ zipf.write(item_path, item)
44
+ else:
45
+ for root, _, files in os.walk(item_path):
46
+ for file in files:
47
+ if file == '.DS_Store':
48
+ continue
49
+ file_path = os.path.join(root, file)
50
+ arcname = os.path.relpath(file_path, temp_dir)
51
+ zipf.write(file_path, arcname)
52
+
53
+ # Read and encode zip file
54
+ with open(zip_path, 'rb') as f:
55
+ zip_str = f.read()
56
+ secret_value = base64.b64encode(zip_str).decode('utf-8')
57
+ # print("encoding")
58
+ # print(type(secret_value))
59
+ # print(len(secret_value))
60
+ # print(secret_value[-20:])
61
+ return secret_value
62
+
63
+
64
+ def base64decode_unzip(secret_value: str, output_path: str) -> str:
65
+ """Decodes a base64 encoded string and unzips the files.
66
+
67
+ Args:
68
+ secret_value: Base64 encoded string of the zipped files.
69
+ output_path: Path where to extract the unzipped files.
70
+
71
+ Returns:
72
+ Path to the unzipped files.
73
+ """
74
+ # TODO(asaiacai): this is messy I know...
75
+ # Decode base64 string
76
+ # print("decoding")
77
+ # print(type(secret_value))
78
+ # print(len(secret_value))
79
+ # print(secret_value[-20:])
80
+ decoded_data = base64.b64decode(secret_value)
81
+
82
+ # Write decoded data to temporary zip file
83
+ with tempfile.TemporaryDirectory() as temp_dir:
84
+ zip_path = os.path.join(temp_dir, 'archive.zip')
85
+
86
+ with zipfile.ZipFile(zip_path, 'w') as zipf:
87
+ zipf.writestr('data.zip', decoded_data)
88
+
89
+ with zipfile.ZipFile(zip_path, 'r') as zipf:
90
+ zipf.extractall(path=output_path)
91
+
92
+ with zipfile.ZipFile(os.path.join(output_path, 'data.zip'), 'r') as zipf:
93
+ zipf.extractall(path=output_path)
94
+
95
+ return output_path
@@ -0,0 +1,426 @@
1
+ # Proprietary Changes made for Trainy under the Trainy Software License
2
+ # Original source: skypilot: https://github.com/skypilot-org/skypilot
3
+ # which is Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+ import datetime
14
+ import difflib
15
+ import functools
16
+ import getpass
17
+ import hashlib
18
+ import inspect
19
+ import os
20
+ import random
21
+ import re
22
+ import socket
23
+ import sys
24
+ import uuid
25
+ from typing import Any, Callable, Dict, List, Optional, Union
26
+
27
+ import jinja2
28
+ import jsonschema
29
+ import yaml # type: ignore
30
+
31
+ from konduktor.utils import annotations, constants, ux_utils, validator
32
+
33
+ _USER_HASH_FILE = os.path.expanduser('~/.konduktor/user_hash')
34
+ _usage_run_id = None
35
+ _VALID_ENV_VAR_REGEX = '[a-zA-Z_][a-zA-Z0-9_]*'
36
+ USER_HASH_LENGTH = 8
37
+ USER_HASH_LENGTH_IN_CLUSTER_NAME = 4
38
+
39
+
40
+ def get_timestamp() -> str:
41
+ return datetime.datetime.now().strftime('%Y%m%d-%H%M%S')
42
+
43
+
44
+ def user_and_hostname_hash() -> str:
45
+ """Returns a string containing <user>-<hostname hash last 4 chars>.
46
+
47
+ For uniquefying user workloads on a shared-k8s cluster.
48
+
49
+ Using uuid.getnode() instead of gethostname() is incorrect; observed to
50
+ collide on Macs.
51
+ """
52
+ hostname_hash = hashlib.md5(socket.gethostname().encode()).hexdigest()[-4:]
53
+ return f'{getpass.getuser()}-{hostname_hash}'
54
+
55
+
56
+ def base36_encode(hex_str: str) -> str:
57
+ """Converts a hex string to a base36 string."""
58
+ int_value = int(hex_str, 16)
59
+
60
+ def _base36_encode(num: int) -> str:
61
+ if num == 0:
62
+ return '0'
63
+ alphabet = '0123456789abcdefghijklmnopqrstuvwxyz'
64
+ base36 = ''
65
+ while num != 0:
66
+ num, i = divmod(num, 36)
67
+ base36 = alphabet[i] + base36
68
+ return base36
69
+
70
+ return _base36_encode(int_value)
71
+
72
+
73
+ def get_cleaned_username(username: str = '') -> str:
74
+ """Cleans the username. Underscores are allowed, as we will
75
+ handle it when mapping to the cluster_name_on_cloud in
76
+ common_utils.make_cluster_name_on_cloud.
77
+
78
+ Clean up includes:
79
+ 1. Making all characters lowercase
80
+ 2. Removing any non-alphanumeric characters (excluding hyphens and
81
+ underscores)
82
+ 3. Removing any numbers and/or hyphens at the start of the username.
83
+ 4. Removing any hyphens at the end of the username
84
+ 5. Truncate the username to 63 characters, as requested by GCP labels
85
+
86
+ Dots are removed due to: https://cloud.google.com/compute/docs/labeling-resources#requirements
87
+
88
+ e.g. 1SkY-PiLot2- becomes sky-pilot2
89
+
90
+ Returns:
91
+ A cleaned username.
92
+ """ # noqa: E501
93
+ username = username or getpass.getuser()
94
+ username = username.lower()
95
+ username = re.sub(r'[^a-z0-9-_]', '', username)
96
+ username = re.sub(r'^[0-9-]+', '', username)
97
+ username = re.sub(r'-$', '', username)
98
+ username = username[:63]
99
+ return username
100
+
101
+
102
+ def is_valid_env_var(name: str) -> bool:
103
+ """Checks if the task environment variable name is valid."""
104
+ return bool(re.fullmatch(_VALID_ENV_VAR_REGEX, name))
105
+
106
+
107
+ def get_pretty_entry_point() -> str:
108
+ """Returns the prettified entry point of this process (sys.argv).
109
+
110
+ Example return values:
111
+ $ konduktor launch app.yaml # 'konduktor launch app.yaml'
112
+ $ python examples/app.py # 'app.py'
113
+ """
114
+ argv = sys.argv
115
+ basename = os.path.basename(argv[0])
116
+ if basename == 'konduktor':
117
+ # Turn '/.../anaconda/envs/py36/bin/sky' into 'konduktor', but keep other
118
+ # things like 'examples/app.py'.
119
+ argv[0] = basename
120
+ return ' '.join(argv)
121
+
122
+
123
+ @annotations.lru_cache(scope='request')
124
+ def get_usage_run_id() -> str:
125
+ """Returns a unique run id for each 'run'.
126
+
127
+ A run is defined as the lifetime of a process that has imported `sky`
128
+ and has called its CLI or programmatic APIs. For example, two successive
129
+ `sky launch` are two runs.
130
+ """
131
+ global _usage_run_id
132
+ if _usage_run_id is None:
133
+ _usage_run_id = str(uuid.uuid4())
134
+ return _usage_run_id
135
+
136
+
137
+ def make_decorator(cls, name_or_fn: Union[str, Callable], **ctx_kwargs) -> Callable:
138
+ """Make the cls a decorator.
139
+
140
+ class cls:
141
+ def __init__(self, name, **kwargs):
142
+ pass
143
+ def __enter__(self):
144
+ pass
145
+ def __exit__(self, exc_type, exc_value, traceback):
146
+ pass
147
+
148
+ Args:
149
+ name_or_fn: The name of the event or the function to be wrapped.
150
+ message: The message attached to the event.
151
+ """
152
+ if isinstance(name_or_fn, str):
153
+
154
+ def _wrapper(f):
155
+ @functools.wraps(f)
156
+ def _record(*args, **kwargs):
157
+ with cls(name_or_fn, **ctx_kwargs):
158
+ return f(*args, **kwargs)
159
+
160
+ return _record
161
+
162
+ return _wrapper
163
+ else:
164
+ if not inspect.isfunction(name_or_fn):
165
+ raise ValueError('Should directly apply the decorator to a function.')
166
+
167
+ @functools.wraps(name_or_fn)
168
+ def _record(*args, **kwargs):
169
+ f = name_or_fn
170
+ func_name = getattr(f, '__qualname__', f.__name__)
171
+ module_name = getattr(f, '__module__', '')
172
+ if module_name:
173
+ full_name = f'{module_name}.{func_name}'
174
+ else:
175
+ full_name = func_name
176
+ with cls(full_name, **ctx_kwargs):
177
+ return f(*args, **kwargs)
178
+
179
+ return _record
180
+
181
+
182
+ def get_user_hash(force_fresh_hash: bool = False) -> str:
183
+ """Returns a unique user-machine specific hash as a user id.
184
+
185
+ We cache the user hash in a file to avoid potential user_name or
186
+ hostname changes causing a new user hash to be generated.
187
+
188
+ Args:
189
+ force_fresh_hash: Bypasses the cached hash in USER_HASH_FILE and the
190
+ hash in the USER_ID_ENV_VAR and forces a fresh user-machine hash
191
+ to be generated. Used by `kubernetes.ssh_key_secret_field_name` to
192
+ avoid controllers sharing the same ssh key field name as the
193
+ local client.
194
+ """
195
+
196
+ override = os.environ.get('KONDUKTOR_TEST_USER_HASH')
197
+ if override:
198
+ return override
199
+
200
+ def _is_valid_user_hash(user_hash: Optional[str]) -> bool:
201
+ if user_hash is None:
202
+ return False
203
+ try:
204
+ int(user_hash, 16)
205
+ except (TypeError, ValueError):
206
+ return False
207
+ return len(user_hash) == USER_HASH_LENGTH
208
+
209
+ if not force_fresh_hash:
210
+ user_hash = os.getenv(constants.USER_ID_ENV_VAR)
211
+ if _is_valid_user_hash(user_hash):
212
+ assert user_hash is not None
213
+ return user_hash
214
+
215
+ if not force_fresh_hash and os.path.exists(_USER_HASH_FILE):
216
+ # Read from cached user hash file.
217
+ with open(_USER_HASH_FILE, 'r', encoding='utf-8') as f:
218
+ # Remove invalid characters.
219
+ user_hash = f.read().strip()
220
+ if _is_valid_user_hash(user_hash):
221
+ return user_hash
222
+
223
+ hash_str = user_and_hostname_hash()
224
+ user_hash = hashlib.md5(hash_str.encode()).hexdigest()[:USER_HASH_LENGTH]
225
+ if not _is_valid_user_hash(user_hash):
226
+ # A fallback in case the hash is invalid.
227
+ user_hash = uuid.uuid4().hex[:USER_HASH_LENGTH]
228
+ os.makedirs(os.path.dirname(_USER_HASH_FILE), exist_ok=True)
229
+ if not force_fresh_hash:
230
+ # Do not cache to file if force_fresh_hash is True since the file may
231
+ # be intentionally using a different hash, e.g. we want to keep the
232
+ # user_hash for usage collection the same on the jobs/serve controller
233
+ # as users' local client.
234
+ with open(_USER_HASH_FILE, 'w', encoding='utf-8') as f:
235
+ f.write(user_hash)
236
+ return user_hash
237
+
238
+
239
+ def read_yaml(path: str) -> Dict[str, Any]:
240
+ with open(path, 'r', encoding='utf-8') as f:
241
+ config = yaml.safe_load(f)
242
+ return config
243
+
244
+
245
+ def read_yaml_all(path: str) -> List[Dict[str, Any]]:
246
+ with open(path, 'r', encoding='utf-8') as f:
247
+ config = yaml.safe_load_all(f)
248
+ configs = list(config)
249
+ if not configs:
250
+ # Empty YAML file.
251
+ return [{}]
252
+ return configs
253
+
254
+
255
+ def validate_schema(obj, schema, err_msg_prefix='', skip_none=True):
256
+ """Validates an object against a given JSON schema.
257
+
258
+ Args:
259
+ obj: The object to validate.
260
+ schema: The JSON schema against which to validate the object.
261
+ err_msg_prefix: The string to prepend to the error message if
262
+ validation fails.
263
+ skip_none: If True, removes fields with value None from the object
264
+ before validation. This is useful for objects that will never contain
265
+ None because yaml.safe_load() loads empty fields as None.
266
+
267
+ Raises:
268
+ ValueError: if the object does not match the schema.
269
+ """
270
+ if skip_none:
271
+ obj = {k: v for k, v in obj.items() if v is not None}
272
+ err_msg = None
273
+ try:
274
+ validator.SchemaValidator(schema).validate(obj)
275
+ except jsonschema.ValidationError as e:
276
+ if e.validator == 'additionalProperties':
277
+ if tuple(e.schema_path) == ('properties', 'envs', 'additionalProperties'):
278
+ # Hack. Here the error is Task.envs having some invalid keys. So
279
+ # we should not print "unsupported field".
280
+ #
281
+ # This will print something like:
282
+ # 'hello world' does not match any of the regexes: <regex>
283
+ err_msg = (
284
+ err_msg_prefix
285
+ + 'The `envs` field contains invalid keys:\n'
286
+ + e.message
287
+ )
288
+ else:
289
+ err_msg = err_msg_prefix
290
+ assert isinstance(e.schema, dict), 'Schema must be a dictionary'
291
+ known_fields = set(e.schema.get('properties', {}).keys())
292
+ assert isinstance(e.instance, dict), 'Instance must be a dictionary'
293
+ for field in e.instance:
294
+ if field not in known_fields:
295
+ most_similar_field = difflib.get_close_matches(
296
+ field, known_fields, 1
297
+ )
298
+ if most_similar_field:
299
+ err_msg += (
300
+ f'Instead of {field!r}, did you mean '
301
+ f'{most_similar_field[0]!r}?'
302
+ )
303
+ else:
304
+ err_msg += f'Found unsupported field {field!r}.'
305
+ else:
306
+ message = e.message
307
+ # Object in jsonschema is represented as dict in Python. Replace
308
+ # 'object' with 'dict' for better readability.
309
+ message = message.replace("type 'object'", "type 'dict'")
310
+ # Example e.json_path value: '$.resources'
311
+ err_msg = (
312
+ err_msg_prefix
313
+ + message
314
+ + f'. Check problematic field(s): {e.json_path}'
315
+ )
316
+
317
+ if err_msg:
318
+ with ux_utils.print_exception_no_traceback():
319
+ raise ValueError(err_msg)
320
+
321
+
322
+ def dump_yaml(path: str, config: Union[List[Dict[str, Any]], Dict[str, Any]]) -> None:
323
+ with open(path, 'w', encoding='utf-8') as f:
324
+ f.write(dump_yaml_str(config))
325
+
326
+
327
+ def dump_yaml_str(config: Union[List[Dict[str, Any]], Dict[str, Any]]) -> str:
328
+ # https://github.com/yaml/pyyaml/issues/127
329
+ class LineBreakDumper(yaml.SafeDumper):
330
+ def write_line_break(self, data=None):
331
+ super().write_line_break(data)
332
+ if len(self.indents) == 1:
333
+ super().write_line_break()
334
+
335
+ if isinstance(config, list):
336
+ dump_func = yaml.dump_all # type: ignore
337
+ else:
338
+ dump_func = yaml.dump # type: ignore
339
+ return dump_func(
340
+ config, Dumper=LineBreakDumper, sort_keys=False, default_flow_style=False
341
+ )
342
+
343
+
344
+ def fill_template(
345
+ template_name: str, variables: Dict[str, Any], output_path: str
346
+ ) -> None:
347
+ """Create a file from a Jinja template and return the filename."""
348
+ assert template_name.endswith('.j2'), template_name
349
+ root_dir = os.path.dirname(os.path.dirname(__file__))
350
+ template_path = os.path.join(root_dir, 'templates', template_name)
351
+ if not os.path.exists(template_path):
352
+ raise FileNotFoundError(f'Template "{template_name}" does not exist.')
353
+ with open(template_path, 'r', encoding='utf-8') as fin:
354
+ template = fin.read()
355
+ output_path = os.path.abspath(os.path.expanduser(output_path))
356
+ os.makedirs(os.path.dirname(output_path), exist_ok=True)
357
+
358
+ # Write out yaml config.
359
+ j2_template = jinja2.Template(template)
360
+ content = j2_template.render(**variables)
361
+ with open(output_path, 'w', encoding='utf-8') as fout:
362
+ fout.write(content)
363
+
364
+
365
+ def class_fullname(cls, skip_builtins: bool = True):
366
+ """Get the full name of a class.
367
+
368
+ Example:
369
+ >>> e = konduktor.exceptions.FetchClusterInfoError()
370
+ >>> class_fullname(e.__class__)
371
+ 'konduktor.exceptions.FetchClusterInfoError'
372
+
373
+ Args:
374
+ cls: The class to get the full name.
375
+
376
+ Returns:
377
+ The full name of the class.
378
+ """
379
+ module_name = getattr(cls, '__module__', '')
380
+ if not module_name or (module_name == 'builtins' and skip_builtins):
381
+ return cls.__name__
382
+ return f'{cls.__module__}.{cls.__name__}'
383
+
384
+
385
+ def format_exception(
386
+ e: Union[Exception, SystemExit, KeyboardInterrupt], use_bracket: bool = False
387
+ ) -> str:
388
+ """Format an exception to a string.
389
+
390
+ Args:
391
+ e: The exception to format.
392
+
393
+ Returns:
394
+ A string that represents the exception.
395
+ """
396
+ if use_bracket:
397
+ return f'[{class_fullname(e.__class__)}] {e}'
398
+ return f'{class_fullname(e.__class__)}: {e}'
399
+
400
+
401
+ class Backoff:
402
+ """Exponential backoff with jittering."""
403
+
404
+ MULTIPLIER = 1.6
405
+ JITTER = 0.4
406
+
407
+ def __init__(self, initial_backoff: float = 5, max_backoff_factor: int = 5):
408
+ self._initial = True
409
+ self._backoff = 0.0
410
+ self._initial_backoff = initial_backoff
411
+ self._max_backoff = max_backoff_factor * self._initial_backoff
412
+
413
+ # https://github.com/grpc/grpc/blob/2d4f3c56001cd1e1f85734b2f7c5ce5f2797c38a/doc/connection-backoff.md
414
+ # https://github.com/grpc/grpc/blob/5fc3ff82032d0ebc4bf252a170ebe66aacf9ed9d/src/core/lib/backoff/backoff.cc
415
+
416
+ def current_backoff(self) -> float:
417
+ """Backs off once and returns the current backoff in seconds."""
418
+ if self._initial:
419
+ self._initial = False
420
+ self._backoff = min(self._initial_backoff, self._max_backoff)
421
+ else:
422
+ self._backoff = min(self._backoff * self.MULTIPLIER, self._max_backoff)
423
+ self._backoff += random.uniform(
424
+ -self.JITTER * self._backoff, self.JITTER * self._backoff
425
+ )
426
+ return self._backoff
@@ -0,0 +1,5 @@
1
+ # The name for the environment variable that stores KONDUKTOR user hash
2
+ USER_ID_ENV_VAR = 'KONDUKTOR_USER_ID'
3
+
4
+ # The name for the environment variable that stores KONDUKTOR user name.
5
+ USER_ENV_VAR = 'KONDUKTOR_USER'
@@ -0,0 +1,55 @@
1
+ # Proprietary Changes made for Trainy under the Trainy Software License
2
+ # Original source: skypilot: https://github.com/skypilot-org/skypilot
3
+ # which is Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+ """Global environment options for konduktor."""
14
+
15
+ import enum
16
+ import os
17
+ from typing import Dict
18
+
19
+
20
+ class Options(enum.Enum):
21
+ """Environment variables for SkyPilot."""
22
+
23
+ # (env var name, default value)
24
+ IS_DEVELOPER = ('KONDUKTOR_DEV', False)
25
+ SHOW_DEBUG_INFO = ('KONDUKTOR_DEBUG', True)
26
+ DISABLE_LOGGING = ('KONDUKTOR_DISABLE_USAGE_COLLECTION', False)
27
+ MINIMIZE_LOGGING = ('KONDUKTOR_MINIMIZE_LOGGING', False)
28
+ SUPPRESS_SENSITIVE_LOG = ('KONDUKTOR_SUPPRESS_SENSITIVE_LOG', False)
29
+ # Internal: this is used to skip the cloud user identity check, which is
30
+ # used to protect cluster operations in a multi-identity scenario.
31
+ # Currently, this is only used in the job and serve controller, as there
32
+ # will not be multiple identities, and skipping the check can increase
33
+ # robustness.
34
+ SKIP_CLOUD_IDENTITY_CHECK = ('KONDUKTOR_SKIP_CLOUD_IDENTITY_CHECK', False)
35
+
36
+ def __init__(self, env_var: str, default: bool) -> None:
37
+ self.env_var = env_var
38
+ self.default = default
39
+
40
+ def __repr__(self) -> str:
41
+ return self.env_var
42
+
43
+ def get(self) -> bool:
44
+ """Check if an environment variable is set to True."""
45
+ return os.getenv(self.env_var, str(self.default)).lower() in ('true', '1')
46
+
47
+ @property
48
+ def env_key(self) -> str:
49
+ """The environment variable key name."""
50
+ return self.value[0]
51
+
52
+ @classmethod
53
+ def all_options(cls) -> Dict[str, bool]:
54
+ """Returns all options as a dictionary."""
55
+ return {option.env_key: option.get() for option in list(Options)}