konduktor-nightly 0.1.0.dev20251128104812__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. konduktor/__init__.py +49 -0
  2. konduktor/adaptors/__init__.py +0 -0
  3. konduktor/adaptors/aws.py +221 -0
  4. konduktor/adaptors/common.py +118 -0
  5. konduktor/adaptors/gcp.py +126 -0
  6. konduktor/authentication.py +124 -0
  7. konduktor/backends/__init__.py +6 -0
  8. konduktor/backends/backend.py +86 -0
  9. konduktor/backends/constants.py +21 -0
  10. konduktor/backends/deployment.py +204 -0
  11. konduktor/backends/deployment_utils.py +1351 -0
  12. konduktor/backends/jobset.py +225 -0
  13. konduktor/backends/jobset_utils.py +726 -0
  14. konduktor/backends/pod_utils.py +501 -0
  15. konduktor/check.py +184 -0
  16. konduktor/cli.py +1945 -0
  17. konduktor/config.py +420 -0
  18. konduktor/constants.py +36 -0
  19. konduktor/controller/__init__.py +0 -0
  20. konduktor/controller/constants.py +56 -0
  21. konduktor/controller/launch.py +44 -0
  22. konduktor/controller/node.py +116 -0
  23. konduktor/controller/parse.py +111 -0
  24. konduktor/dashboard/README.md +30 -0
  25. konduktor/dashboard/backend/main.py +169 -0
  26. konduktor/dashboard/backend/sockets.py +154 -0
  27. konduktor/dashboard/frontend/.eslintrc.json +3 -0
  28. konduktor/dashboard/frontend/.gitignore +36 -0
  29. konduktor/dashboard/frontend/app/api/jobs/route.js +71 -0
  30. konduktor/dashboard/frontend/app/api/namespaces/route.js +69 -0
  31. konduktor/dashboard/frontend/app/components/Grafana.jsx +66 -0
  32. konduktor/dashboard/frontend/app/components/JobsData.jsx +197 -0
  33. konduktor/dashboard/frontend/app/components/LogsData.jsx +139 -0
  34. konduktor/dashboard/frontend/app/components/NavMenu.jsx +39 -0
  35. konduktor/dashboard/frontend/app/components/NavTabs.jsx +73 -0
  36. konduktor/dashboard/frontend/app/components/NavTabs2.jsx +30 -0
  37. konduktor/dashboard/frontend/app/components/SelectBtn.jsx +27 -0
  38. konduktor/dashboard/frontend/app/components/lib/utils.js +6 -0
  39. konduktor/dashboard/frontend/app/components/ui/chip-select.jsx +78 -0
  40. konduktor/dashboard/frontend/app/components/ui/input.jsx +19 -0
  41. konduktor/dashboard/frontend/app/components/ui/navigation-menu.jsx +104 -0
  42. konduktor/dashboard/frontend/app/components/ui/select.jsx +120 -0
  43. konduktor/dashboard/frontend/app/favicon.ico +0 -0
  44. konduktor/dashboard/frontend/app/globals.css +120 -0
  45. konduktor/dashboard/frontend/app/jobs/page.js +10 -0
  46. konduktor/dashboard/frontend/app/layout.js +22 -0
  47. konduktor/dashboard/frontend/app/logs/page.js +11 -0
  48. konduktor/dashboard/frontend/app/page.js +12 -0
  49. konduktor/dashboard/frontend/jsconfig.json +7 -0
  50. konduktor/dashboard/frontend/next.config.mjs +4 -0
  51. konduktor/dashboard/frontend/package-lock.json +6687 -0
  52. konduktor/dashboard/frontend/package.json +37 -0
  53. konduktor/dashboard/frontend/postcss.config.mjs +8 -0
  54. konduktor/dashboard/frontend/server.js +64 -0
  55. konduktor/dashboard/frontend/tailwind.config.js +17 -0
  56. konduktor/data/__init__.py +9 -0
  57. konduktor/data/aws/__init__.py +15 -0
  58. konduktor/data/aws/s3.py +1138 -0
  59. konduktor/data/constants.py +7 -0
  60. konduktor/data/data_utils.py +268 -0
  61. konduktor/data/gcp/__init__.py +19 -0
  62. konduktor/data/gcp/constants.py +42 -0
  63. konduktor/data/gcp/gcs.py +994 -0
  64. konduktor/data/gcp/utils.py +9 -0
  65. konduktor/data/registry.py +19 -0
  66. konduktor/data/storage.py +812 -0
  67. konduktor/data/storage_utils.py +535 -0
  68. konduktor/execution.py +447 -0
  69. konduktor/kube_client.py +237 -0
  70. konduktor/logging.py +111 -0
  71. konduktor/manifests/aibrix-setup.yaml +430 -0
  72. konduktor/manifests/apoxy-setup.yaml +184 -0
  73. konduktor/manifests/apoxy-setup2.yaml +98 -0
  74. konduktor/manifests/controller_deployment.yaml +69 -0
  75. konduktor/manifests/dashboard_deployment.yaml +131 -0
  76. konduktor/manifests/dmesg_daemonset.yaml +57 -0
  77. konduktor/manifests/pod_cleanup_controller.yaml +129 -0
  78. konduktor/resource.py +546 -0
  79. konduktor/serving.py +153 -0
  80. konduktor/task.py +949 -0
  81. konduktor/templates/deployment.yaml.j2 +191 -0
  82. konduktor/templates/jobset.yaml.j2 +43 -0
  83. konduktor/templates/pod.yaml.j2 +563 -0
  84. konduktor/usage/__init__.py +0 -0
  85. konduktor/usage/constants.py +21 -0
  86. konduktor/utils/__init__.py +0 -0
  87. konduktor/utils/accelerator_registry.py +17 -0
  88. konduktor/utils/annotations.py +62 -0
  89. konduktor/utils/base64_utils.py +95 -0
  90. konduktor/utils/common_utils.py +426 -0
  91. konduktor/utils/constants.py +5 -0
  92. konduktor/utils/env_options.py +55 -0
  93. konduktor/utils/exceptions.py +234 -0
  94. konduktor/utils/kubernetes_enums.py +8 -0
  95. konduktor/utils/kubernetes_utils.py +763 -0
  96. konduktor/utils/log_utils.py +467 -0
  97. konduktor/utils/loki_utils.py +102 -0
  98. konduktor/utils/rich_utils.py +123 -0
  99. konduktor/utils/schemas.py +625 -0
  100. konduktor/utils/subprocess_utils.py +273 -0
  101. konduktor/utils/ux_utils.py +247 -0
  102. konduktor/utils/validator.py +461 -0
  103. konduktor_nightly-0.1.0.dev20251128104812.dist-info/LICENSE +91 -0
  104. konduktor_nightly-0.1.0.dev20251128104812.dist-info/METADATA +98 -0
  105. konduktor_nightly-0.1.0.dev20251128104812.dist-info/RECORD +107 -0
  106. konduktor_nightly-0.1.0.dev20251128104812.dist-info/WHEEL +4 -0
  107. konduktor_nightly-0.1.0.dev20251128104812.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,273 @@
1
+ # Proprietary Changes made for Trainy under the Trainy Software License
2
+ # Original source: skypilot: https://github.com/skypilot-org/skypilot
3
+ # which is Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+ """Utility functions for subprocesses."""
14
+
15
+ import os
16
+ import resource
17
+ import subprocess
18
+ from multiprocessing import pool
19
+ from typing import Any, Callable, Dict, List, Optional, Union
20
+
21
+ import colorama
22
+ import psutil
23
+
24
+ from konduktor import constants, logging
25
+ from konduktor.utils import exceptions, log_utils, ux_utils
26
+
27
+ logger = logging.get_logger(__name__)
28
+
29
+ _fd_limit_warning_shown = False
30
+
31
+
32
+ def run(cmd, **kwargs):
33
+ # Should be careful to use this function, as the child process cmd spawn may
34
+ # keep running in the background after the current program is killed. To get
35
+ # rid of this problem, use `log_utils.run_with_log`.
36
+ shell = kwargs.pop('shell', True)
37
+ check = kwargs.pop('check', True)
38
+ executable = kwargs.pop('executable', '/bin/bash')
39
+ if not shell:
40
+ executable = None
41
+ return subprocess.run(
42
+ cmd, shell=shell, check=check, executable=executable, **kwargs
43
+ )
44
+
45
+
46
+ def run_no_outputs(cmd, **kwargs):
47
+ return run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, **kwargs)
48
+
49
+
50
+ def _get_thread_multiplier(cloud_str: Optional[str] = None) -> int:
51
+ # If using Kubernetes, we use 4x the number of cores.
52
+ if cloud_str and cloud_str.lower() == 'kubernetes':
53
+ return 4
54
+ return 1
55
+
56
+
57
+ def get_max_workers_for_file_mounts(
58
+ common_file_mounts: Dict[str, str], cloud_str: Optional[str] = None
59
+ ) -> int:
60
+ global _fd_limit_warning_shown
61
+ fd_limit, _ = resource.getrlimit(resource.RLIMIT_NOFILE)
62
+
63
+ # Raise warning for low fd_limit (only once)
64
+ if fd_limit < 1024 and not _fd_limit_warning_shown:
65
+ logger.warning(
66
+ f'Open file descriptor limit ({fd_limit}) is low. File sync to '
67
+ 'remote clusters may be slow. Consider increasing the limit using '
68
+ '`ulimit -n <number>` or modifying system limits.'
69
+ )
70
+ _fd_limit_warning_shown = True
71
+
72
+ fd_per_rsync = 5
73
+ for src in common_file_mounts.values():
74
+ if os.path.isdir(src):
75
+ # Assume that each file/folder under src takes 5 file descriptors
76
+ # on average.
77
+ fd_per_rsync = max(fd_per_rsync, len(os.listdir(src)) * 5)
78
+
79
+ # Reserve some file descriptors for the system and other processes
80
+ fd_reserve = 100
81
+
82
+ max_workers = (fd_limit - fd_reserve) // fd_per_rsync
83
+ # At least 1 worker, and avoid too many workers overloading the system.
84
+ num_threads = get_parallel_threads(cloud_str)
85
+ max_workers = min(max(max_workers, 1), num_threads)
86
+ logger.debug(f'Using {max_workers} workers for file mounts.')
87
+ return max_workers
88
+
89
+
90
+ def get_parallel_threads(cloud_str: Optional[str] = None) -> int:
91
+ """Returns the number of threads to use for parallel execution.
92
+
93
+ Args:
94
+ cloud_str: The cloud
95
+ """
96
+ cpu_count = os.cpu_count()
97
+ if cpu_count is None:
98
+ cpu_count = 1
99
+ return max(4, cpu_count - 1) * _get_thread_multiplier(cloud_str)
100
+
101
+
102
+ def run_in_parallel(
103
+ func: Callable, args: List[Any], num_threads: Optional[int] = None
104
+ ) -> List[Any]:
105
+ """Run a function in parallel on a list of arguments.
106
+
107
+ Args:
108
+ func: The function to run in parallel
109
+ args: Iterable of arguments to pass to func
110
+ num_threads: Number of threads to use. If None, uses
111
+ get_parallel_threads()
112
+
113
+ Returns:
114
+ A list of the return values of the function func, in the same order as the
115
+ arguments.
116
+
117
+ Raises:
118
+ Exception: The first exception encountered.
119
+ """
120
+ # Short-circuit for short lists
121
+ if len(args) == 0:
122
+ return []
123
+ if len(args) == 1:
124
+ return [func(args[0])]
125
+
126
+ processes = num_threads if num_threads is not None else get_parallel_threads()
127
+
128
+ with pool.ThreadPool(processes=processes) as p:
129
+ ordered_iterators = p.imap(func, args)
130
+ return list(ordered_iterators)
131
+
132
+
133
+ def handle_returncode(
134
+ returncode: int,
135
+ command: str,
136
+ error_msg: Union[str, Callable[[], str]],
137
+ stderr: Optional[str] = None,
138
+ stream_logs: bool = True,
139
+ ) -> None:
140
+ """Handle the returncode of a command.
141
+
142
+ Args:
143
+ returncode: The returncode of the command.
144
+ command: The command that was run.
145
+ error_msg: The error message to print.
146
+ stderr: The stderr of the command.
147
+ stream_logs: Whether to stream logs.
148
+ """
149
+ echo = logger.error if stream_logs else logger.debug
150
+ if returncode != 0:
151
+ if stderr is not None:
152
+ echo(stderr)
153
+
154
+ if callable(error_msg):
155
+ error_msg = error_msg()
156
+ format_err_msg = f'{colorama.Fore.RED}{error_msg}{colorama.Style.RESET_ALL}'
157
+ with ux_utils.print_exception_no_traceback():
158
+ raise exceptions.CommandError(returncode, command, format_err_msg, stderr)
159
+
160
+
161
+ def kill_children_processes(
162
+ parent_pids: Optional[Union[int, List[Optional[int]]]] = None, force: bool = False
163
+ ) -> None:
164
+ """Kill children processes recursively.
165
+
166
+ We need to kill the children, so that
167
+ 1. The underlying subprocess will not print the logs to the terminal,
168
+ after this program exits.
169
+ 2. The underlying subprocess will not continue with starting a cluster
170
+ etc. while we are cleaning up the clusters.
171
+
172
+ Args:
173
+ parent_pids: Optional PIDs of a series of processes. The processes and
174
+ their children will be killed. If a list of PID is specified, it is
175
+ killed by the order in the list. This is for guaranteeing the order
176
+ of cleaning up and suppress flaky errors.
177
+ force: bool, send SIGKILL if force, otherwise, use SIGTERM for
178
+ gracefully kill the process.
179
+ """
180
+ if isinstance(parent_pids, int):
181
+ parent_pids = [parent_pids]
182
+
183
+ def kill(proc: psutil.Process):
184
+ if not proc.is_running():
185
+ # Skip if the process is not running.
186
+ return
187
+ logger.debug(f'Killing process {proc.pid}')
188
+ try:
189
+ if force:
190
+ proc.kill()
191
+ else:
192
+ proc.terminate()
193
+ proc.wait(timeout=10)
194
+ except psutil.NoSuchProcess:
195
+ # The child process may have already been terminated.
196
+ pass
197
+ except psutil.TimeoutExpired:
198
+ logger.debug(f'Process {proc.pid} did not terminate after 10 seconds')
199
+ # Attempt to force kill if the normal termination fails
200
+ if not force:
201
+ logger.debug(f'Force killing process {proc.pid}')
202
+ proc.kill()
203
+ proc.wait(timeout=5) # Shorter timeout after force kill
204
+
205
+ parent_processes = []
206
+ if parent_pids is None:
207
+ parent_processes = [psutil.Process()]
208
+ else:
209
+ for pid in parent_pids:
210
+ try:
211
+ process = psutil.Process(pid)
212
+ except psutil.NoSuchProcess:
213
+ continue
214
+ parent_processes.append(process)
215
+
216
+ for parent_process in parent_processes:
217
+ child_processes = parent_process.children(recursive=True)
218
+ if parent_pids is not None:
219
+ kill(parent_process)
220
+ logger.debug(f'Killing child processes: {child_processes}')
221
+ for child in child_processes:
222
+ kill(child)
223
+
224
+
225
+ def kill_process_daemon(process_pid: int) -> None:
226
+ """Start a daemon as a safety net to kill the process.
227
+
228
+ Args:
229
+ process_pid: The PID of the process to kill.
230
+ """
231
+ # Get initial children list
232
+ try:
233
+ process = psutil.Process(process_pid)
234
+ initial_children = [p.pid for p in process.children(recursive=True)]
235
+ except psutil.NoSuchProcess:
236
+ initial_children = []
237
+
238
+ parent_pid = os.getpid()
239
+ daemon_script = os.path.join(
240
+ os.path.dirname(os.path.abspath(log_utils.__file__)), 'subprocess_daemon.py'
241
+ )
242
+ python_path = subprocess.check_output(
243
+ constants.GET_PYTHON_PATH_CMD,
244
+ shell=True,
245
+ stderr=subprocess.DEVNULL,
246
+ encoding='utf-8',
247
+ ).strip()
248
+ daemon_cmd = [
249
+ python_path,
250
+ daemon_script,
251
+ '--parent-pid',
252
+ str(parent_pid),
253
+ '--proc-pid',
254
+ str(process_pid),
255
+ # We pass the initial children list to avoid the race condition where
256
+ # the process_pid is terminated before the daemon starts and gets the
257
+ # children list.
258
+ '--initial-children',
259
+ ','.join(map(str, initial_children)),
260
+ ]
261
+
262
+ # We do not need to set `start_new_session=True` here, as the
263
+ # daemon script will detach itself from the parent process with
264
+ # fork to avoid being killed by parent process. See the reason we
265
+ # daemonize the process in `sky/skylet/subprocess_daemon.py`.
266
+ subprocess.Popen(
267
+ daemon_cmd,
268
+ # Suppress output
269
+ stdout=subprocess.DEVNULL,
270
+ stderr=subprocess.DEVNULL,
271
+ # Disable input
272
+ stdin=subprocess.DEVNULL,
273
+ )
@@ -0,0 +1,247 @@
1
+ # Proprietary Changes made for Trainy under the Trainy Software License
2
+ # Original source: skypilot: https://github.com/skypilot-org/skypilot
3
+ # which is Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+ """Utility functions for UX."""
13
+
14
+ import contextlib
15
+ import enum
16
+ import os
17
+ import sys
18
+ import traceback
19
+ import typing
20
+ from typing import Callable, List, Optional, Union
21
+
22
+ import colorama
23
+ import rich.console as rich_console
24
+
25
+ from konduktor import config
26
+ from konduktor import logging as konduktor_logging
27
+
28
+ if typing.TYPE_CHECKING:
29
+ import pathlib
30
+
31
+ console = rich_console.Console()
32
+
33
+ INDENT_SYMBOL = f'{colorama.Style.DIM}├── {colorama.Style.RESET_ALL}'
34
+ INDENT_LAST_SYMBOL = f'{colorama.Style.DIM}└── {colorama.Style.RESET_ALL}'
35
+
36
+ # Console formatting constants
37
+ BOLD = '\033[1m'
38
+ RESET_BOLD = '\033[0m'
39
+
40
+ # Log path hint in the spinner during launching
41
+ _LOG_PATH_HINT = (
42
+ f'{colorama.Style.DIM}View logs at: {{log_path}}' f'{colorama.Style.RESET_ALL}'
43
+ )
44
+
45
+
46
+ def console_newline():
47
+ """Prints a newline to the console using rich.
48
+
49
+ Useful when catching exceptions inside console.status()
50
+ """
51
+ console.print()
52
+
53
+
54
+ @contextlib.contextmanager
55
+ def print_exception_no_traceback():
56
+ """A context manager that prints out an exception without traceback.
57
+
58
+ Mainly for UX: user-facing errors, e.g., ValueError, should suppress long
59
+ tracebacks.
60
+
61
+ If KONDUKTOR_DEBUG environment variable is set, this context manager is a
62
+ no-op and the full traceback will be shown.
63
+
64
+ Example usage:
65
+
66
+ with print_exception_no_traceback():
67
+ if error():
68
+ raise ValueError('...')
69
+ """
70
+ if os.environ.get('KONDUKTOR_DEBUG'):
71
+ # When KONDUKTOR_DEBUG is set, show the full traceback
72
+ yield
73
+ else:
74
+ original_tracelimit = getattr(sys, 'tracebacklimit', 1000)
75
+ sys.tracebacklimit = 0
76
+ yield
77
+ sys.tracebacklimit = original_tracelimit
78
+
79
+
80
+ @contextlib.contextmanager
81
+ def enable_traceback():
82
+ """Reverts the effect of print_exception_no_traceback().
83
+
84
+ This is used for usage_lib to collect the full traceback.
85
+ """
86
+ original_tracelimit = getattr(sys, 'tracebacklimit', 1000)
87
+ sys.tracebacklimit = 1000
88
+ yield
89
+ sys.tracebacklimit = original_tracelimit
90
+
91
+
92
+ class RedirectOutputForProcess:
93
+ """Redirects stdout and stderr to a file.
94
+
95
+ This class enabled output redirect for multiprocessing.Process.
96
+ Example usage:
97
+
98
+ p = multiprocessing.Process(
99
+ target=RedirectOutputForProcess(func, file_name).run, args=...)
100
+
101
+ This is equal to:
102
+
103
+ p = multiprocessing.Process(target=func, args=...)
104
+
105
+ Plus redirect all stdout/stderr to file_name.
106
+ """
107
+
108
+ def __init__(self, func: Callable, file: str, mode: str = 'w') -> None:
109
+ self.func = func
110
+ self.file = file
111
+ self.mode = mode
112
+
113
+ def run(self, *args, **kwargs):
114
+ with open(self.file, self.mode, encoding='utf-8') as f:
115
+ sys.stdout = f
116
+ sys.stderr = f
117
+ # reconfigure logger since the logger is initialized before
118
+ # with previous stdout/stderr
119
+ konduktor_logging.reload_logger()
120
+ logger = konduktor_logging.init_logger(__name__)
121
+ # The subprocess_util.run('konduktor status') inside
122
+ # konduktor.execution::_execute cannot be redirect, since we cannot
123
+ # directly operate on the stdout/stderr of the subprocess. This
124
+ # is because some code in konduktor will specify the stdout/stderr
125
+ # of the subprocess.
126
+ try:
127
+ self.func(*args, **kwargs)
128
+ except Exception as e: # pylint: disable=broad-except
129
+ logger.error(f'Failed to run {self.func.__name__}. ' f'Details: {e}')
130
+ with enable_traceback():
131
+ logger.error(f' Traceback:\n{traceback.format_exc()}')
132
+ raise
133
+
134
+
135
+ def log_path_hint(log_path: Union[str, 'pathlib.Path']) -> str:
136
+ """Gets the log path hint for the given log path."""
137
+ log_path = str(log_path)
138
+ expanded_home = os.path.expanduser('~')
139
+ if log_path.startswith(expanded_home):
140
+ log_path = '~' + log_path[len(expanded_home) :]
141
+ return _LOG_PATH_HINT.format(log_path=log_path)
142
+
143
+
144
+ def starting_message(message: str) -> str:
145
+ """Gets the starting message for the given message."""
146
+ # We have to reset the color before the message, because sometimes if a
147
+ # previous spinner with dimmed color overflows in a narrow terminal, the
148
+ # color might be messed up.
149
+ return f'{colorama.Style.RESET_ALL}⚙︎ {message}'
150
+
151
+
152
+ def finishing_message(
153
+ message: str, log_path: Optional[Union[str, 'pathlib.Path']] = None
154
+ ) -> str:
155
+ """Gets the finishing message for the given message."""
156
+ # We have to reset the color before the message, because sometimes if a
157
+ # previous spinner with dimmed color overflows in a narrow terminal, the
158
+ # color might be messed up.
159
+ success_prefix = (
160
+ f'{colorama.Style.RESET_ALL}{colorama.Fore.GREEN}✓ '
161
+ f'{message}{colorama.Style.RESET_ALL}'
162
+ )
163
+ if log_path is None:
164
+ return success_prefix
165
+ path_hint = log_path_hint(log_path)
166
+ return f'{success_prefix} {path_hint}'
167
+
168
+
169
+ def error_message(
170
+ message: str, log_path: Optional[Union[str, 'pathlib.Path']] = None
171
+ ) -> str:
172
+ """Gets the error message for the given message."""
173
+ # We have to reset the color before the message, because sometimes if a
174
+ # previous spinner with dimmed color overflows in a narrow terminal, the
175
+ # color might be messed up.
176
+ error_prefix = (
177
+ f'{colorama.Style.RESET_ALL}{colorama.Fore.RED}⨯'
178
+ f'{colorama.Style.RESET_ALL} {message}'
179
+ )
180
+ if log_path is None:
181
+ return error_prefix
182
+ path_hint = log_path_hint(log_path)
183
+ return f'{error_prefix} {path_hint}'
184
+
185
+
186
+ def retry_message(message: str) -> str:
187
+ """Gets the retry message for the given message."""
188
+ # We have to reset the color before the message, because sometimes if a
189
+ # previous spinner with dimmed color overflows in a narrow terminal, the
190
+ # color might be messed up.
191
+ return (
192
+ f'{colorama.Style.RESET_ALL}{colorama.Fore.YELLOW}↺'
193
+ f'{colorama.Style.RESET_ALL} {message}'
194
+ )
195
+
196
+
197
+ def spinner_message(
198
+ message: str, log_path: Optional[Union[str, 'pathlib.Path']] = None
199
+ ) -> str:
200
+ """Gets the spinner message for the given message and log path."""
201
+ colored_spinner = f'[bold cyan]{message}[/]'
202
+ if log_path is None:
203
+ return colored_spinner
204
+ path_hint = log_path_hint(log_path)
205
+ return f'{colored_spinner} {path_hint}'
206
+
207
+
208
+ class CommandHintType(enum.Enum):
209
+ JOB = 'JOB'
210
+ JOB_STOP = 'JOB_STOP'
211
+
212
+
213
+ def command_hint_messages(
214
+ hint_type: CommandHintType,
215
+ job_id: Union[str, List[str]],
216
+ ) -> str:
217
+ """Gets the command hint messages for the given job id."""
218
+ hint_str = '\n📋 Useful Commands'
219
+ if hint_type == CommandHintType.JOB:
220
+ job_hint_str = (
221
+ f'\nJob ID: {job_id}'
222
+ f'\n{INDENT_SYMBOL}To stream job logs:\t\t'
223
+ f'{BOLD}konduktor logs {job_id} {RESET_BOLD}'
224
+ f'\n{INDENT_SYMBOL}To list all jobs:\t\t'
225
+ f'{BOLD}konduktor status{RESET_BOLD}'
226
+ f'\n{INDENT_SYMBOL}To suspend the job:\t\t'
227
+ f'{BOLD}konduktor stop {job_id} {RESET_BOLD}'
228
+ f'\n{INDENT_SYMBOL}{colorama.Fore.RED}To delete the job:\t\t'
229
+ f'{BOLD}konduktor down {job_id} {RESET_BOLD}{colorama.Style.RESET_ALL}'
230
+ )
231
+ hint_str += f'{job_hint_str}'
232
+ elif hint_type == CommandHintType.JOB_STOP:
233
+ assert isinstance(job_id, list), 'job_id must be a list of strings'
234
+ job_ids_str = ' '.join(job_id)
235
+ hint_str += (
236
+ f'\n{INDENT_SYMBOL}To resume the following jobs:\t\t'
237
+ f'{BOLD}konduktor start {job_ids_str} {RESET_BOLD}'
238
+ )
239
+ else:
240
+ raise ValueError(f'Invalid hint type: {hint_type}')
241
+
242
+ if config.get_nested(('tailscale', 'secret_name'), None) is not None:
243
+ hint_str += (
244
+ f'\n{INDENT_SYMBOL}To tailscale ssh:\t\t'
245
+ f'{BOLD}ssh root@{job_id}-workers-0-0 {RESET_BOLD}'
246
+ )
247
+ return hint_str