skypilot-nightly 1.0.0.dev20241012__py3-none-any.whl → 1.0.0.dev20241013__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +2 -2
- sky/adaptors/azure.py +3 -1
- sky/adaptors/common.py +6 -2
- sky/backends/backend.py +9 -4
- sky/backends/backend_utils.py +13 -16
- sky/backends/cloud_vm_ray_backend.py +207 -161
- sky/backends/local_docker_backend.py +3 -1
- sky/benchmark/benchmark_utils.py +5 -4
- sky/cli.py +36 -28
- sky/clouds/service_catalog/aws_catalog.py +6 -7
- sky/clouds/service_catalog/common.py +4 -3
- sky/clouds/service_catalog/cudo_catalog.py +11 -1
- sky/core.py +4 -2
- sky/data/storage.py +44 -32
- sky/data/storage_utils.py +8 -4
- sky/exceptions.py +5 -0
- sky/execution.py +10 -24
- sky/jobs/core.py +9 -7
- sky/jobs/utils.py +15 -10
- sky/optimizer.py +50 -37
- sky/provision/aws/config.py +15 -6
- sky/provision/azure/config.py +14 -3
- sky/provision/azure/instance.py +15 -9
- sky/provision/kubernetes/instance.py +3 -1
- sky/provision/provisioner.py +63 -74
- sky/serve/core.py +42 -40
- sky/sky_logging.py +9 -5
- sky/skylet/log_lib.py +5 -4
- sky/skylet/providers/lambda_cloud/node_provider.py +1 -1
- sky/utils/command_runner.py +11 -11
- sky/utils/common_utils.py +2 -5
- sky/utils/controller_utils.py +78 -29
- sky/utils/env_options.py +22 -7
- sky/utils/log_utils.py +39 -24
- sky/utils/resources_utils.py +23 -0
- sky/utils/rich_utils.py +55 -5
- sky/utils/ux_utils.py +63 -4
- {skypilot_nightly-1.0.0.dev20241012.dist-info → skypilot_nightly-1.0.0.dev20241013.dist-info}/METADATA +1 -1
- {skypilot_nightly-1.0.0.dev20241012.dist-info → skypilot_nightly-1.0.0.dev20241013.dist-info}/RECORD +43 -43
- {skypilot_nightly-1.0.0.dev20241012.dist-info → skypilot_nightly-1.0.0.dev20241013.dist-info}/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20241012.dist-info → skypilot_nightly-1.0.0.dev20241013.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20241012.dist-info → skypilot_nightly-1.0.0.dev20241013.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20241012.dist-info → skypilot_nightly-1.0.0.dev20241013.dist-info}/top_level.txt +0 -0
sky/serve/core.py
CHANGED
@@ -129,8 +129,10 @@ def up(
|
|
129
129
|
task, use_mutated_config_in_current_request=False)
|
130
130
|
task = dag.tasks[0]
|
131
131
|
|
132
|
-
|
133
|
-
|
132
|
+
with rich_utils.safe_status(
|
133
|
+
ux_utils.spinner_message('Initializing service')):
|
134
|
+
controller_utils.maybe_translate_local_file_mounts_and_sync_up(
|
135
|
+
task, path='serve')
|
134
136
|
|
135
137
|
with tempfile.NamedTemporaryFile(
|
136
138
|
prefix=f'service-task-{service_name}-',
|
@@ -215,7 +217,8 @@ def up(
|
|
215
217
|
# TODO(tian): Cache endpoint locally to speedup. Endpoint won't
|
216
218
|
# change after the first time, so there is no consistency issue.
|
217
219
|
with rich_utils.safe_status(
|
218
|
-
|
220
|
+
ux_utils.spinner_message(
|
221
|
+
'Waiting for the service to register')):
|
219
222
|
# This function will check the controller job id in the database
|
220
223
|
# and return the endpoint if the job id matches. Otherwise it will
|
221
224
|
# return None.
|
@@ -274,34 +277,31 @@ def up(
|
|
274
277
|
f'{style.BRIGHT}{service_name}{style.RESET_ALL}'
|
275
278
|
f'\n{fore.CYAN}Endpoint URL: '
|
276
279
|
f'{style.BRIGHT}{endpoint}{style.RESET_ALL}'
|
277
|
-
'\
|
278
|
-
f'{
|
279
|
-
f'
|
280
|
-
'
|
281
|
-
f'{
|
282
|
-
f'{
|
283
|
-
'
|
284
|
-
'\
|
285
|
-
f'{
|
286
|
-
f'{
|
287
|
-
'\
|
288
|
-
f'{
|
289
|
-
f'{
|
290
|
-
'\
|
291
|
-
f'{
|
292
|
-
f'{
|
293
|
-
'\n'
|
294
|
-
'
|
295
|
-
f'{
|
296
|
-
f'{
|
297
|
-
'
|
298
|
-
f'{
|
299
|
-
|
300
|
-
'
|
301
|
-
|
302
|
-
f'{style.RESET_ALL}'
|
303
|
-
f'\n{fore.GREEN}The replicas should be ready within a '
|
304
|
-
f'short time.{style.RESET_ALL}')
|
280
|
+
f'\n📋 Useful Commands'
|
281
|
+
f'\n{ux_utils.INDENT_SYMBOL}To check service status:\t'
|
282
|
+
f'{ux_utils.BOLD}sky serve status {service_name} '
|
283
|
+
f'[--endpoint]{ux_utils.RESET_BOLD}'
|
284
|
+
f'\n{ux_utils.INDENT_SYMBOL}To teardown the service:\t'
|
285
|
+
f'{ux_utils.BOLD}sky serve down {service_name}'
|
286
|
+
f'{ux_utils.RESET_BOLD}'
|
287
|
+
f'\n{ux_utils.INDENT_SYMBOL}To see replica logs:\t'
|
288
|
+
f'{ux_utils.BOLD}sky serve logs {service_name} [REPLICA_ID]'
|
289
|
+
f'{ux_utils.RESET_BOLD}'
|
290
|
+
f'\n{ux_utils.INDENT_SYMBOL}To see load balancer logs:\t'
|
291
|
+
f'{ux_utils.BOLD}sky serve logs --load-balancer {service_name}'
|
292
|
+
f'{ux_utils.RESET_BOLD}'
|
293
|
+
f'\n{ux_utils.INDENT_SYMBOL}To see controller logs:\t'
|
294
|
+
f'{ux_utils.BOLD}sky serve logs --controller {service_name}'
|
295
|
+
f'{ux_utils.RESET_BOLD}'
|
296
|
+
f'\n{ux_utils.INDENT_SYMBOL}To monitor the status:\t'
|
297
|
+
f'{ux_utils.BOLD}watch -n10 sky serve status {service_name}'
|
298
|
+
f'{ux_utils.RESET_BOLD}'
|
299
|
+
f'\n{ux_utils.INDENT_LAST_SYMBOL}To send a test request:\t'
|
300
|
+
f'{ux_utils.BOLD}curl {endpoint}'
|
301
|
+
f'{ux_utils.RESET_BOLD}'
|
302
|
+
'\n\n' +
|
303
|
+
ux_utils.finishing_message('Service is spinning up and replicas '
|
304
|
+
'will be ready shortly.'))
|
305
305
|
return service_name, endpoint
|
306
306
|
|
307
307
|
|
@@ -323,11 +323,11 @@ def update(
|
|
323
323
|
controller=controller_utils.Controllers.SKY_SERVE_CONTROLLER,
|
324
324
|
stopped_message=
|
325
325
|
'Service controller is stopped. There is no service to update. '
|
326
|
-
f'To spin up a new service, use {
|
327
|
-
f'sky serve up{
|
326
|
+
f'To spin up a new service, use {ux_utils.BOLD}'
|
327
|
+
f'sky serve up{ux_utils.RESET_BOLD}',
|
328
328
|
non_existent_message='Service does not exist. '
|
329
329
|
'To spin up a new service, '
|
330
|
-
f'use {
|
330
|
+
f'use {ux_utils.BOLD}sky serve up{ux_utils.RESET_BOLD}',
|
331
331
|
)
|
332
332
|
|
333
333
|
backend = backend_utils.get_backend_from_handle(handle)
|
@@ -353,8 +353,8 @@ def update(
|
|
353
353
|
if len(service_statuses) == 0:
|
354
354
|
with ux_utils.print_exception_no_traceback():
|
355
355
|
raise RuntimeError(f'Cannot find service {service_name!r}.'
|
356
|
-
f'To spin up a service, use {
|
357
|
-
f'sky serve up{
|
356
|
+
f'To spin up a service, use {ux_utils.BOLD}'
|
357
|
+
f'sky serve up{ux_utils.RESET_BOLD}')
|
358
358
|
|
359
359
|
if len(service_statuses) > 1:
|
360
360
|
with ux_utils.print_exception_no_traceback():
|
@@ -374,8 +374,10 @@ def update(
|
|
374
374
|
with ux_utils.print_exception_no_traceback():
|
375
375
|
raise RuntimeError(prompt)
|
376
376
|
|
377
|
-
|
378
|
-
|
377
|
+
with rich_utils.safe_status(
|
378
|
+
ux_utils.spinner_message('Initializing service')):
|
379
|
+
controller_utils.maybe_translate_local_file_mounts_and_sync_up(
|
380
|
+
task, path='serve')
|
379
381
|
|
380
382
|
code = serve_utils.ServeCodeGen.add_version(service_name)
|
381
383
|
returncode, version_string_payload, stderr = backend.run_on_head(
|
@@ -433,8 +435,8 @@ def update(
|
|
433
435
|
|
434
436
|
print(f'{colorama.Fore.GREEN}Service {service_name!r} update scheduled.'
|
435
437
|
f'{colorama.Style.RESET_ALL}\n'
|
436
|
-
f'Please use {
|
437
|
-
f'{
|
438
|
+
f'Please use {ux_utils.BOLD}sky serve status {service_name} '
|
439
|
+
f'{ux_utils.RESET_BOLD}to check the latest status.')
|
438
440
|
|
439
441
|
|
440
442
|
@usage_lib.entrypoint
|
sky/sky_logging.py
CHANGED
@@ -10,10 +10,10 @@ import colorama
|
|
10
10
|
from sky.utils import env_options
|
11
11
|
from sky.utils import rich_utils
|
12
12
|
|
13
|
-
#
|
14
|
-
|
15
|
-
|
16
|
-
|
13
|
+
# UX: Should we show logging prefixes and some extra information in optimizer?
|
14
|
+
_show_logging_prefix = (env_options.Options.SHOW_DEBUG_INFO.get() or
|
15
|
+
not env_options.Options.MINIMIZE_LOGGING.get())
|
16
|
+
_FORMAT = '%(levelname).1s %(asctime)s %(filename)s:%(lineno)d] %(message)s'
|
17
17
|
_DATE_FORMAT = '%m-%d %H:%M:%S'
|
18
18
|
|
19
19
|
|
@@ -45,6 +45,7 @@ _root_logger = logging.getLogger('sky')
|
|
45
45
|
_default_handler = None
|
46
46
|
_logging_config = threading.local()
|
47
47
|
|
48
|
+
NO_PREFIX_FORMATTER = NewLineFormatter(None, datefmt=_DATE_FORMAT)
|
48
49
|
FORMATTER = NewLineFormatter(_FORMAT, datefmt=_DATE_FORMAT)
|
49
50
|
DIM_FORMATTER = NewLineFormatter(_FORMAT, datefmt=_DATE_FORMAT, dim=True)
|
50
51
|
|
@@ -67,7 +68,10 @@ def _setup_logger():
|
|
67
68
|
else:
|
68
69
|
_default_handler.setLevel(logging.INFO)
|
69
70
|
_root_logger.addHandler(_default_handler)
|
70
|
-
|
71
|
+
if _show_logging_prefix:
|
72
|
+
_default_handler.setFormatter(FORMATTER)
|
73
|
+
else:
|
74
|
+
_default_handler.setFormatter(NO_PREFIX_FORMATTER)
|
71
75
|
# Setting this will avoid the message
|
72
76
|
# being propagated to the parent logger.
|
73
77
|
_root_logger.propagate = False
|
sky/skylet/log_lib.py
CHANGED
@@ -21,6 +21,7 @@ from sky.skylet import constants
|
|
21
21
|
from sky.skylet import job_lib
|
22
22
|
from sky.utils import log_utils
|
23
23
|
from sky.utils import subprocess_utils
|
24
|
+
from sky.utils import ux_utils
|
24
25
|
|
25
26
|
_SKY_LOG_WAITING_GAP_SECONDS = 1
|
26
27
|
_SKY_LOG_WAITING_MAX_RETRY = 5
|
@@ -377,7 +378,9 @@ def _follow_job_logs(file,
|
|
377
378
|
wait_last_logs = False
|
378
379
|
continue
|
379
380
|
status_str = status.value if status is not None else 'None'
|
380
|
-
print(
|
381
|
+
print(
|
382
|
+
ux_utils.finishing_message(
|
383
|
+
f'Job finished (status: {status_str}).'))
|
381
384
|
return
|
382
385
|
|
383
386
|
time.sleep(_SKY_LOG_TAILING_GAP_SECONDS)
|
@@ -412,8 +415,6 @@ def tail_logs(job_id: Optional[int],
|
|
412
415
|
return
|
413
416
|
logger.debug(f'Tailing logs for job, real job_id {job_id}, managed_job_id '
|
414
417
|
f'{managed_job_id}.')
|
415
|
-
logger.info(f'{colorama.Fore.YELLOW}Start streaming logs for {job_str}.'
|
416
|
-
f'{colorama.Style.RESET_ALL}')
|
417
418
|
log_path = os.path.join(log_dir, 'run.log')
|
418
419
|
log_path = os.path.expanduser(log_path)
|
419
420
|
|
@@ -437,7 +438,7 @@ def tail_logs(job_id: Optional[int],
|
|
437
438
|
time.sleep(_SKY_LOG_WAITING_GAP_SECONDS)
|
438
439
|
status = job_lib.update_job_status([job_id], silent=True)[0]
|
439
440
|
|
440
|
-
start_stream_at = '
|
441
|
+
start_stream_at = 'Waiting for task resources on '
|
441
442
|
if follow and status in [
|
442
443
|
job_lib.JobStatus.SETTING_UP,
|
443
444
|
job_lib.JobStatus.PENDING,
|
@@ -25,7 +25,7 @@ _TAG_PATH_PREFIX = '~/.sky/generated/lambda_cloud/metadata'
|
|
25
25
|
_REMOTE_SSH_KEY_NAME = '~/.lambda_cloud/ssh_key_name'
|
26
26
|
_REMOTE_RAY_SSH_KEY = '~/ray_bootstrap_key.pem'
|
27
27
|
_REMOTE_RAY_YAML = '~/ray_bootstrap_config.yaml'
|
28
|
-
_GET_INTERNAL_IP_CMD = 'ip -4 -br addr show | grep UP | grep -Eo "(10\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)|172\.(1[6-9]|2[0-9]|3[0-1]))\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)"'
|
28
|
+
_GET_INTERNAL_IP_CMD = 's=$(ip -4 -br addr show | grep UP); echo "$s"; echo "$s" | grep -Eo "(10\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)|172\.(1[6-9]|2[0-9]|3[0-1])|104\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?))\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)"'
|
29
29
|
|
30
30
|
logger = logging.getLogger(__name__)
|
31
31
|
|
sky/utils/command_runner.py
CHANGED
@@ -171,7 +171,7 @@ class CommandRunner:
|
|
171
171
|
cmd: Union[str, List[str]],
|
172
172
|
process_stream: bool,
|
173
173
|
separate_stderr: bool,
|
174
|
-
|
174
|
+
skip_num_lines: int,
|
175
175
|
source_bashrc: bool = False,
|
176
176
|
) -> str:
|
177
177
|
"""Returns the command to run."""
|
@@ -203,12 +203,12 @@ class CommandRunner:
|
|
203
203
|
]
|
204
204
|
if not separate_stderr:
|
205
205
|
command.append('2>&1')
|
206
|
-
if not process_stream and
|
206
|
+
if not process_stream and skip_num_lines:
|
207
207
|
command += [
|
208
208
|
# A hack to remove the following bash warnings (twice):
|
209
209
|
# bash: cannot set terminal process group
|
210
210
|
# bash: no job control in this shell
|
211
|
-
f'| stdbuf -o0 tail -n +{
|
211
|
+
f'| stdbuf -o0 tail -n +{skip_num_lines}',
|
212
212
|
# This is required to make sure the executor of command can get
|
213
213
|
# correct returncode, since linux pipe is used.
|
214
214
|
'; exit ${PIPESTATUS[0]}'
|
@@ -320,7 +320,7 @@ class CommandRunner:
|
|
320
320
|
separate_stderr: bool = False,
|
321
321
|
connect_timeout: Optional[int] = None,
|
322
322
|
source_bashrc: bool = False,
|
323
|
-
|
323
|
+
skip_num_lines: int = 0,
|
324
324
|
**kwargs) -> Union[int, Tuple[int, str, str]]:
|
325
325
|
"""Runs the command on the cluster.
|
326
326
|
|
@@ -335,7 +335,7 @@ class CommandRunner:
|
|
335
335
|
connect_timeout: timeout in seconds for the ssh connection.
|
336
336
|
source_bashrc: Whether to source the ~/.bashrc before running the
|
337
337
|
command.
|
338
|
-
|
338
|
+
skip_num_lines: The number of lines to skip at the beginning of the
|
339
339
|
output. This is used when the output is not processed by
|
340
340
|
SkyPilot but we still want to get rid of some warning messages,
|
341
341
|
such as SSH warnings.
|
@@ -529,7 +529,7 @@ class SSHCommandRunner(CommandRunner):
|
|
529
529
|
separate_stderr: bool = False,
|
530
530
|
connect_timeout: Optional[int] = None,
|
531
531
|
source_bashrc: bool = False,
|
532
|
-
|
532
|
+
skip_num_lines: int = 0,
|
533
533
|
**kwargs) -> Union[int, Tuple[int, str, str]]:
|
534
534
|
"""Uses 'ssh' to run 'cmd' on a node with ip.
|
535
535
|
|
@@ -550,7 +550,7 @@ class SSHCommandRunner(CommandRunner):
|
|
550
550
|
connect_timeout: timeout in seconds for the ssh connection.
|
551
551
|
source_bashrc: Whether to source the bashrc before running the
|
552
552
|
command.
|
553
|
-
|
553
|
+
skip_num_lines: The number of lines to skip at the beginning of the
|
554
554
|
output. This is used when the output is not processed by
|
555
555
|
SkyPilot but we still want to get rid of some warning messages,
|
556
556
|
such as SSH warnings.
|
@@ -573,7 +573,7 @@ class SSHCommandRunner(CommandRunner):
|
|
573
573
|
command_str = self._get_command_to_run(cmd,
|
574
574
|
process_stream,
|
575
575
|
separate_stderr,
|
576
|
-
|
576
|
+
skip_num_lines=skip_num_lines,
|
577
577
|
source_bashrc=source_bashrc)
|
578
578
|
command = base_ssh_command + [shlex.quote(command_str)]
|
579
579
|
|
@@ -693,7 +693,7 @@ class KubernetesCommandRunner(CommandRunner):
|
|
693
693
|
separate_stderr: bool = False,
|
694
694
|
connect_timeout: Optional[int] = None,
|
695
695
|
source_bashrc: bool = False,
|
696
|
-
|
696
|
+
skip_num_lines: int = 0,
|
697
697
|
**kwargs) -> Union[int, Tuple[int, str, str]]:
|
698
698
|
"""Uses 'kubectl exec' to run 'cmd' on a pod by its name and namespace.
|
699
699
|
|
@@ -713,7 +713,7 @@ class KubernetesCommandRunner(CommandRunner):
|
|
713
713
|
connect_timeout: timeout in seconds for the pod connection.
|
714
714
|
source_bashrc: Whether to source the bashrc before running the
|
715
715
|
command.
|
716
|
-
|
716
|
+
skip_num_lines: The number of lines to skip at the beginning of the
|
717
717
|
output. This is used when the output is not processed by
|
718
718
|
SkyPilot but we still want to get rid of some warning messages,
|
719
719
|
such as SSH warnings.
|
@@ -751,7 +751,7 @@ class KubernetesCommandRunner(CommandRunner):
|
|
751
751
|
command_str = self._get_command_to_run(cmd,
|
752
752
|
process_stream,
|
753
753
|
separate_stderr,
|
754
|
-
|
754
|
+
skip_num_lines=skip_num_lines,
|
755
755
|
source_bashrc=source_bashrc)
|
756
756
|
command = kubectl_base_command + [
|
757
757
|
# It is important to use /bin/bash -c here to make sure we quote the
|
sky/utils/common_utils.py
CHANGED
@@ -16,7 +16,6 @@ import time
|
|
16
16
|
from typing import Any, Callable, Dict, List, Optional, Union
|
17
17
|
import uuid
|
18
18
|
|
19
|
-
import colorama
|
20
19
|
import jinja2
|
21
20
|
import jsonschema
|
22
21
|
import yaml
|
@@ -479,11 +478,9 @@ def format_exception(e: Union[Exception, SystemExit, KeyboardInterrupt],
|
|
479
478
|
Returns:
|
480
479
|
A string that represents the exception.
|
481
480
|
"""
|
482
|
-
bright = colorama.Style.BRIGHT
|
483
|
-
reset = colorama.Style.RESET_ALL
|
484
481
|
if use_bracket:
|
485
|
-
return f'
|
486
|
-
return f'{
|
482
|
+
return f'[{class_fullname(e.__class__)}] {e}'
|
483
|
+
return f'{class_fullname(e.__class__)}: {e}'
|
487
484
|
|
488
485
|
|
489
486
|
def remove_color(s: str):
|
sky/utils/controller_utils.py
CHANGED
@@ -28,6 +28,7 @@ from sky.serve import serve_utils
|
|
28
28
|
from sky.skylet import constants
|
29
29
|
from sky.utils import common_utils
|
30
30
|
from sky.utils import env_options
|
31
|
+
from sky.utils import rich_utils
|
31
32
|
from sky.utils import ux_utils
|
32
33
|
|
33
34
|
if typing.TYPE_CHECKING:
|
@@ -192,7 +193,11 @@ def _get_cloud_dependencies_installation_commands(
|
|
192
193
|
# TODO(tian): Make dependency installation command a method of cloud
|
193
194
|
# class and get all installation command for enabled clouds.
|
194
195
|
commands = []
|
195
|
-
|
196
|
+
# We use <step>/<total> instead of strong formatting, as we need to update
|
197
|
+
# the <total> at the end of the for loop, and python does not support
|
198
|
+
# partial string formatting.
|
199
|
+
prefix_str = ('[<step>/<total>] Check & install cloud dependencies '
|
200
|
+
'on controller: ')
|
196
201
|
# This is to make sure the shorter checking message does not have junk
|
197
202
|
# characters from the previous message.
|
198
203
|
empty_str = ' ' * 10
|
@@ -203,6 +208,7 @@ def _get_cloud_dependencies_installation_commands(
|
|
203
208
|
# other clouds will install boto3 but not awscli.
|
204
209
|
'pip list | grep awscli> /dev/null 2>&1 || pip install "urllib3<2" '
|
205
210
|
'awscli>=1.27.10 "colorama<0.4.5" > /dev/null 2>&1')
|
211
|
+
setup_clouds: List[str] = []
|
206
212
|
for cloud in sky_check.get_cached_enabled_clouds_or_refresh():
|
207
213
|
if isinstance(
|
208
214
|
clouds,
|
@@ -211,11 +217,16 @@ def _get_cloud_dependencies_installation_commands(
|
|
211
217
|
# fluidstack and paperspace
|
212
218
|
continue
|
213
219
|
if isinstance(cloud, clouds.AWS):
|
214
|
-
|
220
|
+
step_prefix = prefix_str.replace('<step>',
|
221
|
+
str(len(setup_clouds) + 1))
|
222
|
+
commands.append(f'echo -en "\\r{step_prefix}AWS{empty_str}" && ' +
|
215
223
|
aws_dependencies_installation)
|
224
|
+
setup_clouds.append(str(cloud))
|
216
225
|
elif isinstance(cloud, clouds.Azure):
|
226
|
+
step_prefix = prefix_str.replace('<step>',
|
227
|
+
str(len(setup_clouds) + 1))
|
217
228
|
commands.append(
|
218
|
-
f'echo -en "\\r{
|
229
|
+
f'echo -en "\\r{step_prefix}Azure{empty_str}" && '
|
219
230
|
'pip list | grep azure-cli > /dev/null 2>&1 || '
|
220
231
|
'pip install "azure-cli>=2.31.0" azure-core '
|
221
232
|
'"azure-identity>=1.13.0" azure-mgmt-network > /dev/null 2>&1')
|
@@ -225,9 +236,12 @@ def _get_cloud_dependencies_installation_commands(
|
|
225
236
|
commands.append(
|
226
237
|
'pip list | grep azure-storage-blob > /dev/null 2>&1 || '
|
227
238
|
'pip install azure-storage-blob msgraph-sdk > /dev/null 2>&1')
|
239
|
+
setup_clouds.append(str(cloud))
|
228
240
|
elif isinstance(cloud, clouds.GCP):
|
241
|
+
step_prefix = prefix_str.replace('<step>',
|
242
|
+
str(len(setup_clouds) + 1))
|
229
243
|
commands.append(
|
230
|
-
f'echo -en "\\r{
|
244
|
+
f'echo -en "\\r{step_prefix}GCP{empty_str}" && '
|
231
245
|
'pip list | grep google-api-python-client > /dev/null 2>&1 || '
|
232
246
|
'pip install "google-api-python-client>=2.69.0" '
|
233
247
|
'> /dev/null 2>&1')
|
@@ -238,9 +252,12 @@ def _get_cloud_dependencies_installation_commands(
|
|
238
252
|
'pip list | grep google-cloud-storage > /dev/null 2>&1 || '
|
239
253
|
'pip install google-cloud-storage > /dev/null 2>&1')
|
240
254
|
commands.append(f'{gcp.GOOGLE_SDK_INSTALLATION_COMMAND}')
|
255
|
+
setup_clouds.append(str(cloud))
|
241
256
|
elif isinstance(cloud, clouds.Kubernetes):
|
257
|
+
step_prefix = prefix_str.replace('<step>',
|
258
|
+
str(len(setup_clouds) + 1))
|
242
259
|
commands.append(
|
243
|
-
f'echo -en "\\r{
|
260
|
+
f'echo -en "\\r{step_prefix}Kubernetes{empty_str}" && '
|
244
261
|
'pip list | grep kubernetes > /dev/null 2>&1 || '
|
245
262
|
'pip install "kubernetes>=20.0.0" > /dev/null 2>&1 &&'
|
246
263
|
# Install k8s + skypilot dependencies
|
@@ -248,8 +265,8 @@ def _get_cloud_dependencies_installation_commands(
|
|
248
265
|
'! command -v curl &> /dev/null || '
|
249
266
|
'! command -v socat &> /dev/null || '
|
250
267
|
'! command -v netcat &> /dev/null; '
|
251
|
-
'then apt update
|
252
|
-
'&> /dev/null; '
|
268
|
+
'then apt update &> /dev/null && '
|
269
|
+
'apt install curl socat netcat -y &> /dev/null; '
|
253
270
|
'fi" && '
|
254
271
|
# Install kubectl
|
255
272
|
'(command -v kubectl &>/dev/null || '
|
@@ -258,34 +275,55 @@ def _get_cloud_dependencies_installation_commands(
|
|
258
275
|
'/bin/linux/amd64/kubectl" && '
|
259
276
|
'sudo install -o root -g root -m 0755 '
|
260
277
|
'kubectl /usr/local/bin/kubectl))')
|
278
|
+
setup_clouds.append(str(cloud))
|
261
279
|
elif isinstance(cloud, clouds.Cudo):
|
280
|
+
step_prefix = prefix_str.replace('<step>',
|
281
|
+
str(len(setup_clouds) + 1))
|
262
282
|
commands.append(
|
263
|
-
f'echo -en "\\r{
|
283
|
+
f'echo -en "\\r{step_prefix}Cudo{empty_str}" && '
|
264
284
|
'pip list | grep cudo-compute > /dev/null 2>&1 || '
|
265
285
|
'pip install "cudo-compute>=0.1.10" > /dev/null 2>&1 && '
|
266
286
|
'wget https://download.cudo.org/compute/cudoctl-0.3.2-amd64.deb -O ~/cudoctl.deb > /dev/null 2>&1 && ' # pylint: disable=line-too-long
|
267
287
|
'sudo dpkg -i ~/cudoctl.deb > /dev/null 2>&1')
|
288
|
+
setup_clouds.append(str(cloud))
|
268
289
|
elif isinstance(cloud, clouds.RunPod):
|
269
|
-
|
290
|
+
step_prefix = prefix_str.replace('<step>',
|
291
|
+
str(len(setup_clouds) + 1))
|
292
|
+
commands.append(f'echo -en "\\r{step_prefix}RunPod{empty_str}" && '
|
270
293
|
'pip list | grep runpod > /dev/null 2>&1 || '
|
271
294
|
'pip install "runpod>=1.5.1" > /dev/null 2>&1')
|
295
|
+
setup_clouds.append(str(cloud))
|
272
296
|
if controller == Controllers.JOBS_CONTROLLER:
|
273
297
|
if isinstance(cloud, clouds.IBM):
|
298
|
+
step_prefix = prefix_str.replace('<step>',
|
299
|
+
str(len(setup_clouds) + 1))
|
274
300
|
commands.append(
|
275
|
-
f'echo -en "\\r{
|
301
|
+
f'echo -en "\\r{step_prefix}IBM{empty_str}" '
|
276
302
|
'&& pip list | grep ibm-cloud-sdk-core > /dev/null 2>&1 || '
|
277
303
|
'pip install ibm-cloud-sdk-core ibm-vpc '
|
278
304
|
'ibm-platform-services ibm-cos-sdk > /dev/null 2>&1')
|
305
|
+
setup_clouds.append(str(cloud))
|
279
306
|
elif isinstance(cloud, clouds.OCI):
|
307
|
+
step_prefix = prefix_str.replace('<step>',
|
308
|
+
str(len(setup_clouds) + 1))
|
280
309
|
commands.append(f'echo -en "\\r{prefix_str}OCI{empty_str}" && '
|
281
310
|
'pip list | grep oci > /dev/null 2>&1 || '
|
282
311
|
'pip install oci > /dev/null 2>&1')
|
312
|
+
setup_clouds.append(str(cloud))
|
283
313
|
if (cloudflare.NAME
|
284
314
|
in storage_lib.get_cached_enabled_storage_clouds_or_refresh()):
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
315
|
+
step_prefix = prefix_str.replace('<step>', str(len(setup_clouds) + 1))
|
316
|
+
commands.append(
|
317
|
+
f'echo -en "\\r{step_prefix}Cloudflare{empty_str}" && ' +
|
318
|
+
aws_dependencies_installation)
|
319
|
+
setup_clouds.append(cloudflare.NAME)
|
320
|
+
|
321
|
+
finish_prefix = prefix_str.replace('[<step>/<total>] ', ' ')
|
322
|
+
commands.append(f'echo -e "\\r{finish_prefix}done.{empty_str}"')
|
323
|
+
commands = [
|
324
|
+
command.replace('<total>', str(len(setup_clouds)))
|
325
|
+
for command in commands
|
326
|
+
]
|
289
327
|
return commands
|
290
328
|
|
291
329
|
|
@@ -388,7 +426,7 @@ def shared_controller_vars_to_fill(
|
|
388
426
|
'local_user_config_path': local_user_config_path,
|
389
427
|
}
|
390
428
|
env_vars: Dict[str, str] = {
|
391
|
-
env.
|
429
|
+
env.env_key: str(int(env.get())) for env in env_options.Options
|
392
430
|
}
|
393
431
|
env_vars.update({
|
394
432
|
# Should not use $USER here, as that env var can be empty when
|
@@ -396,7 +434,9 @@ def shared_controller_vars_to_fill(
|
|
396
434
|
constants.USER_ENV_VAR: getpass.getuser(),
|
397
435
|
constants.USER_ID_ENV_VAR: common_utils.get_user_hash(),
|
398
436
|
# Skip cloud identity check to avoid the overhead.
|
399
|
-
env_options.Options.SKIP_CLOUD_IDENTITY_CHECK.
|
437
|
+
env_options.Options.SKIP_CLOUD_IDENTITY_CHECK.env_key: '1',
|
438
|
+
# Disable minimize logging to get more details on the controller.
|
439
|
+
env_options.Options.MINIMIZE_LOGGING.env_key: '0',
|
400
440
|
})
|
401
441
|
if skypilot_config.loaded():
|
402
442
|
# Only set the SKYPILOT_CONFIG env var if the user has a config file.
|
@@ -599,6 +639,7 @@ def maybe_translate_local_file_mounts_and_sync_up(task: 'task_lib.Task',
|
|
599
639
|
# ================================================================
|
600
640
|
# Translate the workdir and local file mounts to cloud file mounts.
|
601
641
|
# ================================================================
|
642
|
+
|
602
643
|
run_id = common_utils.get_usage_run_id()[:8]
|
603
644
|
original_file_mounts = task.file_mounts if task.file_mounts else {}
|
604
645
|
original_storage_mounts = task.storage_mounts if task.storage_mounts else {}
|
@@ -618,8 +659,12 @@ def maybe_translate_local_file_mounts_and_sync_up(task: 'task_lib.Task',
|
|
618
659
|
elif has_local_source_paths_workdir:
|
619
660
|
msg = 'workdir'
|
620
661
|
if msg:
|
621
|
-
logger.info(
|
622
|
-
|
662
|
+
logger.info(
|
663
|
+
ux_utils.starting_message(f'Translating {msg} to '
|
664
|
+
'SkyPilot Storage...'))
|
665
|
+
rich_utils.force_update_status(
|
666
|
+
ux_utils.spinner_message(
|
667
|
+
f'Translating {msg} to SkyPilot Storage...'))
|
623
668
|
|
624
669
|
# Step 1: Translate the workdir to SkyPilot storage.
|
625
670
|
new_storage_mounts = {}
|
@@ -643,8 +688,8 @@ def maybe_translate_local_file_mounts_and_sync_up(task: 'task_lib.Task',
|
|
643
688
|
})
|
644
689
|
# Check of the existence of the workdir in file_mounts is done in
|
645
690
|
# the task construction.
|
646
|
-
logger.info(f'Workdir {workdir!r}
|
647
|
-
f'{bucket_name!r}.')
|
691
|
+
logger.info(f' {colorama.Style.DIM}Workdir: {workdir!r} '
|
692
|
+
f'-> storage: {bucket_name!r}.{colorama.Style.RESET_ALL}')
|
648
693
|
|
649
694
|
# Step 2: Translate the local file mounts with folder in src to SkyPilot
|
650
695
|
# storage.
|
@@ -668,9 +713,8 @@ def maybe_translate_local_file_mounts_and_sync_up(task: 'task_lib.Task',
|
|
668
713
|
'persistent': False,
|
669
714
|
'mode': 'COPY',
|
670
715
|
})
|
671
|
-
logger.info(
|
672
|
-
|
673
|
-
f'storage {bucket_name}.')
|
716
|
+
logger.info(f' {colorama.Style.DIM}Folder : {src!r} '
|
717
|
+
f'-> storage: {bucket_name!r}.{colorama.Style.RESET_ALL}')
|
674
718
|
|
675
719
|
# Step 3: Translate local file mounts with file in src to SkyPilot storage.
|
676
720
|
# Hard link the files in src to a temporary directory, and upload folder.
|
@@ -703,10 +747,12 @@ def maybe_translate_local_file_mounts_and_sync_up(task: 'task_lib.Task',
|
|
703
747
|
f'destination {file_mount_remote_tmp_dir} '
|
704
748
|
'being taken.')
|
705
749
|
sources = list(src_to_file_id.keys())
|
706
|
-
sources_str = '\n
|
707
|
-
logger.info('
|
708
|
-
f'
|
709
|
-
f'\n
|
750
|
+
sources_str = '\n '.join(sources)
|
751
|
+
logger.info(f' {colorama.Style.DIM}Files (listed below) '
|
752
|
+
f' -> storage: {file_bucket_name}:'
|
753
|
+
f'\n {sources_str}{colorama.Style.RESET_ALL}')
|
754
|
+
rich_utils.force_update_status(
|
755
|
+
ux_utils.spinner_message('Uploading translated local files/folders'))
|
710
756
|
task.update_storage_mounts(new_storage_mounts)
|
711
757
|
|
712
758
|
# Step 4: Upload storage from sources
|
@@ -716,8 +762,9 @@ def maybe_translate_local_file_mounts_and_sync_up(task: 'task_lib.Task',
|
|
716
762
|
if task.storage_mounts:
|
717
763
|
# There may be existing (non-translated) storage mounts, so log this
|
718
764
|
# whenever task.storage_mounts is non-empty.
|
719
|
-
|
720
|
-
|
765
|
+
rich_utils.force_update_status(
|
766
|
+
ux_utils.spinner_message('Uploading local sources to storage[/] '
|
767
|
+
'[dim]View storages: sky storage ls'))
|
721
768
|
try:
|
722
769
|
task.sync_storage_mounts()
|
723
770
|
except ValueError as e:
|
@@ -800,3 +847,5 @@ def maybe_translate_local_file_mounts_and_sync_up(task: 'task_lib.Task',
|
|
800
847
|
})
|
801
848
|
updated_mount_storages[storage_path] = new_storage
|
802
849
|
task.update_storage_mounts(updated_mount_storages)
|
850
|
+
if msg:
|
851
|
+
logger.info(ux_utils.finishing_message('Uploaded local files/folders.'))
|
sky/utils/env_options.py
CHANGED
@@ -5,17 +5,32 @@ import os
|
|
5
5
|
|
6
6
|
class Options(enum.Enum):
|
7
7
|
"""Environment variables for SkyPilot."""
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
8
|
+
|
9
|
+
# (env var name, default value)
|
10
|
+
IS_DEVELOPER = ('SKYPILOT_DEV', False)
|
11
|
+
SHOW_DEBUG_INFO = ('SKYPILOT_DEBUG', False)
|
12
|
+
DISABLE_LOGGING = ('SKYPILOT_DISABLE_USAGE_COLLECTION', False)
|
13
|
+
MINIMIZE_LOGGING = ('SKYPILOT_MINIMIZE_LOGGING', True)
|
12
14
|
# Internal: this is used to skip the cloud user identity check, which is
|
13
15
|
# used to protect cluster operations in a multi-identity scenario.
|
14
16
|
# Currently, this is only used in the job and serve controller, as there
|
15
17
|
# will not be multiple identities, and skipping the check can increase
|
16
18
|
# robustness.
|
17
|
-
SKIP_CLOUD_IDENTITY_CHECK = 'SKYPILOT_SKIP_CLOUD_IDENTITY_CHECK'
|
19
|
+
SKIP_CLOUD_IDENTITY_CHECK = ('SKYPILOT_SKIP_CLOUD_IDENTITY_CHECK', False)
|
20
|
+
|
21
|
+
def __init__(self, env_var: str, default: bool) -> None:
|
22
|
+
self.env_var = env_var
|
23
|
+
self.default = default
|
18
24
|
|
19
|
-
def
|
25
|
+
def __repr__(self) -> str:
|
26
|
+
return self.env_var
|
27
|
+
|
28
|
+
def get(self) -> bool:
|
20
29
|
"""Check if an environment variable is set to True."""
|
21
|
-
return os.getenv(self.
|
30
|
+
return os.getenv(self.env_var,
|
31
|
+
str(self.default)).lower() in ('true', '1')
|
32
|
+
|
33
|
+
@property
|
34
|
+
def env_key(self) -> str:
|
35
|
+
"""The environment variable key name."""
|
36
|
+
return self.value[0]
|