skypilot-nightly 1.0.0.dev20241012__py3-none-any.whl → 1.0.0.dev20241014__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. sky/__init__.py +2 -2
  2. sky/adaptors/azure.py +3 -1
  3. sky/adaptors/common.py +6 -2
  4. sky/backends/backend.py +9 -4
  5. sky/backends/backend_utils.py +13 -16
  6. sky/backends/cloud_vm_ray_backend.py +207 -161
  7. sky/backends/local_docker_backend.py +3 -1
  8. sky/benchmark/benchmark_utils.py +5 -4
  9. sky/cli.py +36 -28
  10. sky/clouds/oci.py +17 -2
  11. sky/clouds/service_catalog/aws_catalog.py +6 -7
  12. sky/clouds/service_catalog/common.py +4 -3
  13. sky/clouds/service_catalog/cudo_catalog.py +11 -1
  14. sky/core.py +4 -2
  15. sky/data/storage.py +44 -32
  16. sky/data/storage_utils.py +8 -4
  17. sky/exceptions.py +5 -0
  18. sky/execution.py +10 -24
  19. sky/jobs/core.py +9 -7
  20. sky/jobs/utils.py +15 -10
  21. sky/optimizer.py +50 -37
  22. sky/provision/aws/config.py +15 -6
  23. sky/provision/azure/config.py +14 -3
  24. sky/provision/azure/instance.py +15 -9
  25. sky/provision/kubernetes/instance.py +3 -1
  26. sky/provision/provisioner.py +63 -74
  27. sky/serve/core.py +42 -40
  28. sky/sky_logging.py +9 -5
  29. sky/skylet/job_lib.py +15 -0
  30. sky/skylet/log_lib.py +5 -4
  31. sky/skylet/providers/lambda_cloud/node_provider.py +1 -1
  32. sky/utils/command_runner.py +11 -11
  33. sky/utils/common_utils.py +2 -5
  34. sky/utils/controller_utils.py +78 -29
  35. sky/utils/env_options.py +22 -7
  36. sky/utils/log_utils.py +39 -24
  37. sky/utils/resources_utils.py +23 -0
  38. sky/utils/rich_utils.py +55 -5
  39. sky/utils/ux_utils.py +63 -4
  40. {skypilot_nightly-1.0.0.dev20241012.dist-info → skypilot_nightly-1.0.0.dev20241014.dist-info}/METADATA +1 -1
  41. {skypilot_nightly-1.0.0.dev20241012.dist-info → skypilot_nightly-1.0.0.dev20241014.dist-info}/RECORD +45 -45
  42. {skypilot_nightly-1.0.0.dev20241012.dist-info → skypilot_nightly-1.0.0.dev20241014.dist-info}/LICENSE +0 -0
  43. {skypilot_nightly-1.0.0.dev20241012.dist-info → skypilot_nightly-1.0.0.dev20241014.dist-info}/WHEEL +0 -0
  44. {skypilot_nightly-1.0.0.dev20241012.dist-info → skypilot_nightly-1.0.0.dev20241014.dist-info}/entry_points.txt +0 -0
  45. {skypilot_nightly-1.0.0.dev20241012.dist-info → skypilot_nightly-1.0.0.dev20241014.dist-info}/top_level.txt +0 -0
sky/serve/core.py CHANGED
@@ -129,8 +129,10 @@ def up(
129
129
  task, use_mutated_config_in_current_request=False)
130
130
  task = dag.tasks[0]
131
131
 
132
- controller_utils.maybe_translate_local_file_mounts_and_sync_up(task,
133
- path='serve')
132
+ with rich_utils.safe_status(
133
+ ux_utils.spinner_message('Initializing service')):
134
+ controller_utils.maybe_translate_local_file_mounts_and_sync_up(
135
+ task, path='serve')
134
136
 
135
137
  with tempfile.NamedTemporaryFile(
136
138
  prefix=f'service-task-{service_name}-',
@@ -215,7 +217,8 @@ def up(
215
217
  # TODO(tian): Cache endpoint locally to speedup. Endpoint won't
216
218
  # change after the first time, so there is no consistency issue.
217
219
  with rich_utils.safe_status(
218
- '[cyan]Waiting for the service to register[/]'):
220
+ ux_utils.spinner_message(
221
+ 'Waiting for the service to register')):
219
222
  # This function will check the controller job id in the database
220
223
  # and return the endpoint if the job id matches. Otherwise it will
221
224
  # return None.
@@ -274,34 +277,31 @@ def up(
274
277
  f'{style.BRIGHT}{service_name}{style.RESET_ALL}'
275
278
  f'\n{fore.CYAN}Endpoint URL: '
276
279
  f'{style.BRIGHT}{endpoint}{style.RESET_ALL}'
277
- '\nTo see detailed info:\t\t'
278
- f'{backend_utils.BOLD}sky serve status {service_name} '
279
- f'[--endpoint]{backend_utils.RESET_BOLD}'
280
- '\nTo teardown the service:\t'
281
- f'{backend_utils.BOLD}sky serve down {service_name}'
282
- f'{backend_utils.RESET_BOLD}'
283
- '\n'
284
- '\nTo see logs of a replica:\t'
285
- f'{backend_utils.BOLD}sky serve logs {service_name} [REPLICA_ID]'
286
- f'{backend_utils.RESET_BOLD}'
287
- '\nTo see logs of load balancer:\t'
288
- f'{backend_utils.BOLD}sky serve logs --load-balancer {service_name}'
289
- f'{backend_utils.RESET_BOLD}'
290
- '\nTo see logs of controller:\t'
291
- f'{backend_utils.BOLD}sky serve logs --controller {service_name}'
292
- f'{backend_utils.RESET_BOLD}'
293
- '\n'
294
- '\nTo monitor replica status:\t'
295
- f'{backend_utils.BOLD}watch -n10 sky serve status {service_name}'
296
- f'{backend_utils.RESET_BOLD}'
297
- '\nTo send a test request:\t\t'
298
- f'{backend_utils.BOLD}curl {endpoint}'
299
- f'{backend_utils.RESET_BOLD}'
300
- '\n'
301
- f'\n{fore.GREEN}SkyServe is spinning up your service now.'
302
- f'{style.RESET_ALL}'
303
- f'\n{fore.GREEN}The replicas should be ready within a '
304
- f'short time.{style.RESET_ALL}')
280
+ f'\n📋 Useful Commands'
281
+ f'\n{ux_utils.INDENT_SYMBOL}To check service status:\t'
282
+ f'{ux_utils.BOLD}sky serve status {service_name} '
283
+ f'[--endpoint]{ux_utils.RESET_BOLD}'
284
+ f'\n{ux_utils.INDENT_SYMBOL}To teardown the service:\t'
285
+ f'{ux_utils.BOLD}sky serve down {service_name}'
286
+ f'{ux_utils.RESET_BOLD}'
287
+ f'\n{ux_utils.INDENT_SYMBOL}To see replica logs:\t'
288
+ f'{ux_utils.BOLD}sky serve logs {service_name} [REPLICA_ID]'
289
+ f'{ux_utils.RESET_BOLD}'
290
+ f'\n{ux_utils.INDENT_SYMBOL}To see load balancer logs:\t'
291
+ f'{ux_utils.BOLD}sky serve logs --load-balancer {service_name}'
292
+ f'{ux_utils.RESET_BOLD}'
293
+ f'\n{ux_utils.INDENT_SYMBOL}To see controller logs:\t'
294
+ f'{ux_utils.BOLD}sky serve logs --controller {service_name}'
295
+ f'{ux_utils.RESET_BOLD}'
296
+ f'\n{ux_utils.INDENT_SYMBOL}To monitor the status:\t'
297
+ f'{ux_utils.BOLD}watch -n10 sky serve status {service_name}'
298
+ f'{ux_utils.RESET_BOLD}'
299
+ f'\n{ux_utils.INDENT_LAST_SYMBOL}To send a test request:\t'
300
+ f'{ux_utils.BOLD}curl {endpoint}'
301
+ f'{ux_utils.RESET_BOLD}'
302
+ '\n\n' +
303
+ ux_utils.finishing_message('Service is spinning up and replicas '
304
+ 'will be ready shortly.'))
305
305
  return service_name, endpoint
306
306
 
307
307
 
@@ -323,11 +323,11 @@ def update(
323
323
  controller=controller_utils.Controllers.SKY_SERVE_CONTROLLER,
324
324
  stopped_message=
325
325
  'Service controller is stopped. There is no service to update. '
326
- f'To spin up a new service, use {backend_utils.BOLD}'
327
- f'sky serve up{backend_utils.RESET_BOLD}',
326
+ f'To spin up a new service, use {ux_utils.BOLD}'
327
+ f'sky serve up{ux_utils.RESET_BOLD}',
328
328
  non_existent_message='Service does not exist. '
329
329
  'To spin up a new service, '
330
- f'use {backend_utils.BOLD}sky serve up{backend_utils.RESET_BOLD}',
330
+ f'use {ux_utils.BOLD}sky serve up{ux_utils.RESET_BOLD}',
331
331
  )
332
332
 
333
333
  backend = backend_utils.get_backend_from_handle(handle)
@@ -353,8 +353,8 @@ def update(
353
353
  if len(service_statuses) == 0:
354
354
  with ux_utils.print_exception_no_traceback():
355
355
  raise RuntimeError(f'Cannot find service {service_name!r}.'
356
- f'To spin up a service, use {backend_utils.BOLD}'
357
- f'sky serve up{backend_utils.RESET_BOLD}')
356
+ f'To spin up a service, use {ux_utils.BOLD}'
357
+ f'sky serve up{ux_utils.RESET_BOLD}')
358
358
 
359
359
  if len(service_statuses) > 1:
360
360
  with ux_utils.print_exception_no_traceback():
@@ -374,8 +374,10 @@ def update(
374
374
  with ux_utils.print_exception_no_traceback():
375
375
  raise RuntimeError(prompt)
376
376
 
377
- controller_utils.maybe_translate_local_file_mounts_and_sync_up(task,
378
- path='serve')
377
+ with rich_utils.safe_status(
378
+ ux_utils.spinner_message('Initializing service')):
379
+ controller_utils.maybe_translate_local_file_mounts_and_sync_up(
380
+ task, path='serve')
379
381
 
380
382
  code = serve_utils.ServeCodeGen.add_version(service_name)
381
383
  returncode, version_string_payload, stderr = backend.run_on_head(
@@ -433,8 +435,8 @@ def update(
433
435
 
434
436
  print(f'{colorama.Fore.GREEN}Service {service_name!r} update scheduled.'
435
437
  f'{colorama.Style.RESET_ALL}\n'
436
- f'Please use {backend_utils.BOLD}sky serve status {service_name} '
437
- f'{backend_utils.RESET_BOLD}to check the latest status.')
438
+ f'Please use {ux_utils.BOLD}sky serve status {service_name} '
439
+ f'{ux_utils.RESET_BOLD}to check the latest status.')
438
440
 
439
441
 
440
442
  @usage_lib.entrypoint
sky/sky_logging.py CHANGED
@@ -10,10 +10,10 @@ import colorama
10
10
  from sky.utils import env_options
11
11
  from sky.utils import rich_utils
12
12
 
13
- # If the SKYPILOT_MINIMIZE_LOGGING environment variable is set to True,
14
- # remove logging prefixes and unnecessary information in optimizer
15
- _FORMAT = (None if env_options.Options.MINIMIZE_LOGGING.get() else
16
- '%(levelname).1s %(asctime)s %(filename)s:%(lineno)d] %(message)s')
13
+ # UX: Should we show logging prefixes and some extra information in optimizer?
14
+ _show_logging_prefix = (env_options.Options.SHOW_DEBUG_INFO.get() or
15
+ not env_options.Options.MINIMIZE_LOGGING.get())
16
+ _FORMAT = '%(levelname).1s %(asctime)s %(filename)s:%(lineno)d] %(message)s'
17
17
  _DATE_FORMAT = '%m-%d %H:%M:%S'
18
18
 
19
19
 
@@ -45,6 +45,7 @@ _root_logger = logging.getLogger('sky')
45
45
  _default_handler = None
46
46
  _logging_config = threading.local()
47
47
 
48
+ NO_PREFIX_FORMATTER = NewLineFormatter(None, datefmt=_DATE_FORMAT)
48
49
  FORMATTER = NewLineFormatter(_FORMAT, datefmt=_DATE_FORMAT)
49
50
  DIM_FORMATTER = NewLineFormatter(_FORMAT, datefmt=_DATE_FORMAT, dim=True)
50
51
 
@@ -67,7 +68,10 @@ def _setup_logger():
67
68
  else:
68
69
  _default_handler.setLevel(logging.INFO)
69
70
  _root_logger.addHandler(_default_handler)
70
- _default_handler.setFormatter(FORMATTER)
71
+ if _show_logging_prefix:
72
+ _default_handler.setFormatter(FORMATTER)
73
+ else:
74
+ _default_handler.setFormatter(NO_PREFIX_FORMATTER)
71
75
  # Setting this will avoid the message
72
76
  # being propagated to the parent logger.
73
77
  _root_logger.propagate = False
sky/skylet/job_lib.py CHANGED
@@ -8,6 +8,7 @@ import json
8
8
  import os
9
9
  import pathlib
10
10
  import shlex
11
+ import sqlite3
11
12
  import subprocess
12
13
  import time
13
14
  import typing
@@ -55,6 +56,20 @@ os.makedirs(pathlib.Path(_DB_PATH).parents[0], exist_ok=True)
55
56
 
56
57
 
57
58
  def create_table(cursor, conn):
59
+ # Enable WAL mode to avoid locking issues.
60
+ # See: issue #3863, #1441 and PR #1509
61
+ # https://github.com/microsoft/WSL/issues/2395
62
+ # TODO(romilb): We do not enable WAL for WSL because of known issue in WSL.
63
+ # This may cause the database locked problem from WSL issue #1441.
64
+ if not common_utils.is_wsl():
65
+ try:
66
+ cursor.execute('PRAGMA journal_mode=WAL')
67
+ except sqlite3.OperationalError as e:
68
+ if 'database is locked' not in str(e):
69
+ raise
70
+ # If the database is locked, it is OK to continue, as the WAL mode
71
+ # is not critical and is likely to be enabled by other processes.
72
+
58
73
  cursor.execute("""\
59
74
  CREATE TABLE IF NOT EXISTS jobs (
60
75
  job_id INTEGER PRIMARY KEY AUTOINCREMENT,
sky/skylet/log_lib.py CHANGED
@@ -21,6 +21,7 @@ from sky.skylet import constants
21
21
  from sky.skylet import job_lib
22
22
  from sky.utils import log_utils
23
23
  from sky.utils import subprocess_utils
24
+ from sky.utils import ux_utils
24
25
 
25
26
  _SKY_LOG_WAITING_GAP_SECONDS = 1
26
27
  _SKY_LOG_WAITING_MAX_RETRY = 5
@@ -377,7 +378,9 @@ def _follow_job_logs(file,
377
378
  wait_last_logs = False
378
379
  continue
379
380
  status_str = status.value if status is not None else 'None'
380
- print(f'INFO: Job finished (status: {status_str}).')
381
+ print(
382
+ ux_utils.finishing_message(
383
+ f'Job finished (status: {status_str}).'))
381
384
  return
382
385
 
383
386
  time.sleep(_SKY_LOG_TAILING_GAP_SECONDS)
@@ -412,8 +415,6 @@ def tail_logs(job_id: Optional[int],
412
415
  return
413
416
  logger.debug(f'Tailing logs for job, real job_id {job_id}, managed_job_id '
414
417
  f'{managed_job_id}.')
415
- logger.info(f'{colorama.Fore.YELLOW}Start streaming logs for {job_str}.'
416
- f'{colorama.Style.RESET_ALL}')
417
418
  log_path = os.path.join(log_dir, 'run.log')
418
419
  log_path = os.path.expanduser(log_path)
419
420
 
@@ -437,7 +438,7 @@ def tail_logs(job_id: Optional[int],
437
438
  time.sleep(_SKY_LOG_WAITING_GAP_SECONDS)
438
439
  status = job_lib.update_job_status([job_id], silent=True)[0]
439
440
 
440
- start_stream_at = 'INFO: Tip: use Ctrl-C to exit log'
441
+ start_stream_at = 'Waiting for task resources on '
441
442
  if follow and status in [
442
443
  job_lib.JobStatus.SETTING_UP,
443
444
  job_lib.JobStatus.PENDING,
@@ -25,7 +25,7 @@ _TAG_PATH_PREFIX = '~/.sky/generated/lambda_cloud/metadata'
25
25
  _REMOTE_SSH_KEY_NAME = '~/.lambda_cloud/ssh_key_name'
26
26
  _REMOTE_RAY_SSH_KEY = '~/ray_bootstrap_key.pem'
27
27
  _REMOTE_RAY_YAML = '~/ray_bootstrap_config.yaml'
28
- _GET_INTERNAL_IP_CMD = 'ip -4 -br addr show | grep UP | grep -Eo "(10\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)|172\.(1[6-9]|2[0-9]|3[0-1]))\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)"'
28
+ _GET_INTERNAL_IP_CMD = 's=$(ip -4 -br addr show | grep UP); echo "$s"; echo "$s" | grep -Eo "(10\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)|172\.(1[6-9]|2[0-9]|3[0-1])|104\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?))\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)"'
29
29
 
30
30
  logger = logging.getLogger(__name__)
31
31
 
@@ -171,7 +171,7 @@ class CommandRunner:
171
171
  cmd: Union[str, List[str]],
172
172
  process_stream: bool,
173
173
  separate_stderr: bool,
174
- skip_lines: int,
174
+ skip_num_lines: int,
175
175
  source_bashrc: bool = False,
176
176
  ) -> str:
177
177
  """Returns the command to run."""
@@ -203,12 +203,12 @@ class CommandRunner:
203
203
  ]
204
204
  if not separate_stderr:
205
205
  command.append('2>&1')
206
- if not process_stream and skip_lines:
206
+ if not process_stream and skip_num_lines:
207
207
  command += [
208
208
  # A hack to remove the following bash warnings (twice):
209
209
  # bash: cannot set terminal process group
210
210
  # bash: no job control in this shell
211
- f'| stdbuf -o0 tail -n +{skip_lines}',
211
+ f'| stdbuf -o0 tail -n +{skip_num_lines}',
212
212
  # This is required to make sure the executor of command can get
213
213
  # correct returncode, since linux pipe is used.
214
214
  '; exit ${PIPESTATUS[0]}'
@@ -320,7 +320,7 @@ class CommandRunner:
320
320
  separate_stderr: bool = False,
321
321
  connect_timeout: Optional[int] = None,
322
322
  source_bashrc: bool = False,
323
- skip_lines: int = 0,
323
+ skip_num_lines: int = 0,
324
324
  **kwargs) -> Union[int, Tuple[int, str, str]]:
325
325
  """Runs the command on the cluster.
326
326
 
@@ -335,7 +335,7 @@ class CommandRunner:
335
335
  connect_timeout: timeout in seconds for the ssh connection.
336
336
  source_bashrc: Whether to source the ~/.bashrc before running the
337
337
  command.
338
- skip_lines: The number of lines to skip at the beginning of the
338
+ skip_num_lines: The number of lines to skip at the beginning of the
339
339
  output. This is used when the output is not processed by
340
340
  SkyPilot but we still want to get rid of some warning messages,
341
341
  such as SSH warnings.
@@ -529,7 +529,7 @@ class SSHCommandRunner(CommandRunner):
529
529
  separate_stderr: bool = False,
530
530
  connect_timeout: Optional[int] = None,
531
531
  source_bashrc: bool = False,
532
- skip_lines: int = 0,
532
+ skip_num_lines: int = 0,
533
533
  **kwargs) -> Union[int, Tuple[int, str, str]]:
534
534
  """Uses 'ssh' to run 'cmd' on a node with ip.
535
535
 
@@ -550,7 +550,7 @@ class SSHCommandRunner(CommandRunner):
550
550
  connect_timeout: timeout in seconds for the ssh connection.
551
551
  source_bashrc: Whether to source the bashrc before running the
552
552
  command.
553
- skip_lines: The number of lines to skip at the beginning of the
553
+ skip_num_lines: The number of lines to skip at the beginning of the
554
554
  output. This is used when the output is not processed by
555
555
  SkyPilot but we still want to get rid of some warning messages,
556
556
  such as SSH warnings.
@@ -573,7 +573,7 @@ class SSHCommandRunner(CommandRunner):
573
573
  command_str = self._get_command_to_run(cmd,
574
574
  process_stream,
575
575
  separate_stderr,
576
- skip_lines=skip_lines,
576
+ skip_num_lines=skip_num_lines,
577
577
  source_bashrc=source_bashrc)
578
578
  command = base_ssh_command + [shlex.quote(command_str)]
579
579
 
@@ -693,7 +693,7 @@ class KubernetesCommandRunner(CommandRunner):
693
693
  separate_stderr: bool = False,
694
694
  connect_timeout: Optional[int] = None,
695
695
  source_bashrc: bool = False,
696
- skip_lines: int = 0,
696
+ skip_num_lines: int = 0,
697
697
  **kwargs) -> Union[int, Tuple[int, str, str]]:
698
698
  """Uses 'kubectl exec' to run 'cmd' on a pod by its name and namespace.
699
699
 
@@ -713,7 +713,7 @@ class KubernetesCommandRunner(CommandRunner):
713
713
  connect_timeout: timeout in seconds for the pod connection.
714
714
  source_bashrc: Whether to source the bashrc before running the
715
715
  command.
716
- skip_lines: The number of lines to skip at the beginning of the
716
+ skip_num_lines: The number of lines to skip at the beginning of the
717
717
  output. This is used when the output is not processed by
718
718
  SkyPilot but we still want to get rid of some warning messages,
719
719
  such as SSH warnings.
@@ -751,7 +751,7 @@ class KubernetesCommandRunner(CommandRunner):
751
751
  command_str = self._get_command_to_run(cmd,
752
752
  process_stream,
753
753
  separate_stderr,
754
- skip_lines=skip_lines,
754
+ skip_num_lines=skip_num_lines,
755
755
  source_bashrc=source_bashrc)
756
756
  command = kubectl_base_command + [
757
757
  # It is important to use /bin/bash -c here to make sure we quote the
sky/utils/common_utils.py CHANGED
@@ -16,7 +16,6 @@ import time
16
16
  from typing import Any, Callable, Dict, List, Optional, Union
17
17
  import uuid
18
18
 
19
- import colorama
20
19
  import jinja2
21
20
  import jsonschema
22
21
  import yaml
@@ -479,11 +478,9 @@ def format_exception(e: Union[Exception, SystemExit, KeyboardInterrupt],
479
478
  Returns:
480
479
  A string that represents the exception.
481
480
  """
482
- bright = colorama.Style.BRIGHT
483
- reset = colorama.Style.RESET_ALL
484
481
  if use_bracket:
485
- return f'{bright}[{class_fullname(e.__class__)}]{reset} {e}'
486
- return f'{bright}{class_fullname(e.__class__)}:{reset} {e}'
482
+ return f'[{class_fullname(e.__class__)}] {e}'
483
+ return f'{class_fullname(e.__class__)}: {e}'
487
484
 
488
485
 
489
486
  def remove_color(s: str):
@@ -28,6 +28,7 @@ from sky.serve import serve_utils
28
28
  from sky.skylet import constants
29
29
  from sky.utils import common_utils
30
30
  from sky.utils import env_options
31
+ from sky.utils import rich_utils
31
32
  from sky.utils import ux_utils
32
33
 
33
34
  if typing.TYPE_CHECKING:
@@ -192,7 +193,11 @@ def _get_cloud_dependencies_installation_commands(
192
193
  # TODO(tian): Make dependency installation command a method of cloud
193
194
  # class and get all installation command for enabled clouds.
194
195
  commands = []
195
- prefix_str = 'Check & install cloud dependencies on controller: '
196
+ # We use <step>/<total> instead of strong formatting, as we need to update
197
+ # the <total> at the end of the for loop, and python does not support
198
+ # partial string formatting.
199
+ prefix_str = ('[<step>/<total>] Check & install cloud dependencies '
200
+ 'on controller: ')
196
201
  # This is to make sure the shorter checking message does not have junk
197
202
  # characters from the previous message.
198
203
  empty_str = ' ' * 10
@@ -203,6 +208,7 @@ def _get_cloud_dependencies_installation_commands(
203
208
  # other clouds will install boto3 but not awscli.
204
209
  'pip list | grep awscli> /dev/null 2>&1 || pip install "urllib3<2" '
205
210
  'awscli>=1.27.10 "colorama<0.4.5" > /dev/null 2>&1')
211
+ setup_clouds: List[str] = []
206
212
  for cloud in sky_check.get_cached_enabled_clouds_or_refresh():
207
213
  if isinstance(
208
214
  clouds,
@@ -211,11 +217,16 @@ def _get_cloud_dependencies_installation_commands(
211
217
  # fluidstack and paperspace
212
218
  continue
213
219
  if isinstance(cloud, clouds.AWS):
214
- commands.append(f'echo -en "\\r{prefix_str}AWS{empty_str}" && ' +
220
+ step_prefix = prefix_str.replace('<step>',
221
+ str(len(setup_clouds) + 1))
222
+ commands.append(f'echo -en "\\r{step_prefix}AWS{empty_str}" && ' +
215
223
  aws_dependencies_installation)
224
+ setup_clouds.append(str(cloud))
216
225
  elif isinstance(cloud, clouds.Azure):
226
+ step_prefix = prefix_str.replace('<step>',
227
+ str(len(setup_clouds) + 1))
217
228
  commands.append(
218
- f'echo -en "\\r{prefix_str}Azure{empty_str}" && '
229
+ f'echo -en "\\r{step_prefix}Azure{empty_str}" && '
219
230
  'pip list | grep azure-cli > /dev/null 2>&1 || '
220
231
  'pip install "azure-cli>=2.31.0" azure-core '
221
232
  '"azure-identity>=1.13.0" azure-mgmt-network > /dev/null 2>&1')
@@ -225,9 +236,12 @@ def _get_cloud_dependencies_installation_commands(
225
236
  commands.append(
226
237
  'pip list | grep azure-storage-blob > /dev/null 2>&1 || '
227
238
  'pip install azure-storage-blob msgraph-sdk > /dev/null 2>&1')
239
+ setup_clouds.append(str(cloud))
228
240
  elif isinstance(cloud, clouds.GCP):
241
+ step_prefix = prefix_str.replace('<step>',
242
+ str(len(setup_clouds) + 1))
229
243
  commands.append(
230
- f'echo -en "\\r{prefix_str}GCP{empty_str}" && '
244
+ f'echo -en "\\r{step_prefix}GCP{empty_str}" && '
231
245
  'pip list | grep google-api-python-client > /dev/null 2>&1 || '
232
246
  'pip install "google-api-python-client>=2.69.0" '
233
247
  '> /dev/null 2>&1')
@@ -238,9 +252,12 @@ def _get_cloud_dependencies_installation_commands(
238
252
  'pip list | grep google-cloud-storage > /dev/null 2>&1 || '
239
253
  'pip install google-cloud-storage > /dev/null 2>&1')
240
254
  commands.append(f'{gcp.GOOGLE_SDK_INSTALLATION_COMMAND}')
255
+ setup_clouds.append(str(cloud))
241
256
  elif isinstance(cloud, clouds.Kubernetes):
257
+ step_prefix = prefix_str.replace('<step>',
258
+ str(len(setup_clouds) + 1))
242
259
  commands.append(
243
- f'echo -en "\\r{prefix_str}Kubernetes{empty_str}" && '
260
+ f'echo -en "\\r{step_prefix}Kubernetes{empty_str}" && '
244
261
  'pip list | grep kubernetes > /dev/null 2>&1 || '
245
262
  'pip install "kubernetes>=20.0.0" > /dev/null 2>&1 &&'
246
263
  # Install k8s + skypilot dependencies
@@ -248,8 +265,8 @@ def _get_cloud_dependencies_installation_commands(
248
265
  '! command -v curl &> /dev/null || '
249
266
  '! command -v socat &> /dev/null || '
250
267
  '! command -v netcat &> /dev/null; '
251
- 'then apt update && apt install curl socat netcat -y '
252
- '&> /dev/null; '
268
+ 'then apt update &> /dev/null && '
269
+ 'apt install curl socat netcat -y &> /dev/null; '
253
270
  'fi" && '
254
271
  # Install kubectl
255
272
  '(command -v kubectl &>/dev/null || '
@@ -258,34 +275,55 @@ def _get_cloud_dependencies_installation_commands(
258
275
  '/bin/linux/amd64/kubectl" && '
259
276
  'sudo install -o root -g root -m 0755 '
260
277
  'kubectl /usr/local/bin/kubectl))')
278
+ setup_clouds.append(str(cloud))
261
279
  elif isinstance(cloud, clouds.Cudo):
280
+ step_prefix = prefix_str.replace('<step>',
281
+ str(len(setup_clouds) + 1))
262
282
  commands.append(
263
- f'echo -en "\\r{prefix_str}Cudo{empty_str}" && '
283
+ f'echo -en "\\r{step_prefix}Cudo{empty_str}" && '
264
284
  'pip list | grep cudo-compute > /dev/null 2>&1 || '
265
285
  'pip install "cudo-compute>=0.1.10" > /dev/null 2>&1 && '
266
286
  'wget https://download.cudo.org/compute/cudoctl-0.3.2-amd64.deb -O ~/cudoctl.deb > /dev/null 2>&1 && ' # pylint: disable=line-too-long
267
287
  'sudo dpkg -i ~/cudoctl.deb > /dev/null 2>&1')
288
+ setup_clouds.append(str(cloud))
268
289
  elif isinstance(cloud, clouds.RunPod):
269
- commands.append(f'echo -en "\\r{prefix_str}RunPod{empty_str}" && '
290
+ step_prefix = prefix_str.replace('<step>',
291
+ str(len(setup_clouds) + 1))
292
+ commands.append(f'echo -en "\\r{step_prefix}RunPod{empty_str}" && '
270
293
  'pip list | grep runpod > /dev/null 2>&1 || '
271
294
  'pip install "runpod>=1.5.1" > /dev/null 2>&1')
295
+ setup_clouds.append(str(cloud))
272
296
  if controller == Controllers.JOBS_CONTROLLER:
273
297
  if isinstance(cloud, clouds.IBM):
298
+ step_prefix = prefix_str.replace('<step>',
299
+ str(len(setup_clouds) + 1))
274
300
  commands.append(
275
- f'echo -en "\\r{prefix_str}IBM{empty_str}" '
301
+ f'echo -en "\\r{step_prefix}IBM{empty_str}" '
276
302
  '&& pip list | grep ibm-cloud-sdk-core > /dev/null 2>&1 || '
277
303
  'pip install ibm-cloud-sdk-core ibm-vpc '
278
304
  'ibm-platform-services ibm-cos-sdk > /dev/null 2>&1')
305
+ setup_clouds.append(str(cloud))
279
306
  elif isinstance(cloud, clouds.OCI):
307
+ step_prefix = prefix_str.replace('<step>',
308
+ str(len(setup_clouds) + 1))
280
309
  commands.append(f'echo -en "\\r{prefix_str}OCI{empty_str}" && '
281
310
  'pip list | grep oci > /dev/null 2>&1 || '
282
311
  'pip install oci > /dev/null 2>&1')
312
+ setup_clouds.append(str(cloud))
283
313
  if (cloudflare.NAME
284
314
  in storage_lib.get_cached_enabled_storage_clouds_or_refresh()):
285
- commands.append(f'echo -en "\\r{prefix_str}Cloudflare{empty_str}" && ' +
286
- aws_dependencies_installation)
287
- commands.append(f'echo -e "\\r{prefix_str}Done for {len(commands)} '
288
- 'clouds."')
315
+ step_prefix = prefix_str.replace('<step>', str(len(setup_clouds) + 1))
316
+ commands.append(
317
+ f'echo -en "\\r{step_prefix}Cloudflare{empty_str}" && ' +
318
+ aws_dependencies_installation)
319
+ setup_clouds.append(cloudflare.NAME)
320
+
321
+ finish_prefix = prefix_str.replace('[<step>/<total>] ', ' ')
322
+ commands.append(f'echo -e "\\r{finish_prefix}done.{empty_str}"')
323
+ commands = [
324
+ command.replace('<total>', str(len(setup_clouds)))
325
+ for command in commands
326
+ ]
289
327
  return commands
290
328
 
291
329
 
@@ -388,7 +426,7 @@ def shared_controller_vars_to_fill(
388
426
  'local_user_config_path': local_user_config_path,
389
427
  }
390
428
  env_vars: Dict[str, str] = {
391
- env.value: '1' for env in env_options.Options if env.get()
429
+ env.env_key: str(int(env.get())) for env in env_options.Options
392
430
  }
393
431
  env_vars.update({
394
432
  # Should not use $USER here, as that env var can be empty when
@@ -396,7 +434,9 @@ def shared_controller_vars_to_fill(
396
434
  constants.USER_ENV_VAR: getpass.getuser(),
397
435
  constants.USER_ID_ENV_VAR: common_utils.get_user_hash(),
398
436
  # Skip cloud identity check to avoid the overhead.
399
- env_options.Options.SKIP_CLOUD_IDENTITY_CHECK.value: '1',
437
+ env_options.Options.SKIP_CLOUD_IDENTITY_CHECK.env_key: '1',
438
+ # Disable minimize logging to get more details on the controller.
439
+ env_options.Options.MINIMIZE_LOGGING.env_key: '0',
400
440
  })
401
441
  if skypilot_config.loaded():
402
442
  # Only set the SKYPILOT_CONFIG env var if the user has a config file.
@@ -599,6 +639,7 @@ def maybe_translate_local_file_mounts_and_sync_up(task: 'task_lib.Task',
599
639
  # ================================================================
600
640
  # Translate the workdir and local file mounts to cloud file mounts.
601
641
  # ================================================================
642
+
602
643
  run_id = common_utils.get_usage_run_id()[:8]
603
644
  original_file_mounts = task.file_mounts if task.file_mounts else {}
604
645
  original_storage_mounts = task.storage_mounts if task.storage_mounts else {}
@@ -618,8 +659,12 @@ def maybe_translate_local_file_mounts_and_sync_up(task: 'task_lib.Task',
618
659
  elif has_local_source_paths_workdir:
619
660
  msg = 'workdir'
620
661
  if msg:
621
- logger.info(f'{colorama.Fore.YELLOW}Translating {msg} to SkyPilot '
622
- f'Storage...{colorama.Style.RESET_ALL}')
662
+ logger.info(
663
+ ux_utils.starting_message(f'Translating {msg} to '
664
+ 'SkyPilot Storage...'))
665
+ rich_utils.force_update_status(
666
+ ux_utils.spinner_message(
667
+ f'Translating {msg} to SkyPilot Storage...'))
623
668
 
624
669
  # Step 1: Translate the workdir to SkyPilot storage.
625
670
  new_storage_mounts = {}
@@ -643,8 +688,8 @@ def maybe_translate_local_file_mounts_and_sync_up(task: 'task_lib.Task',
643
688
  })
644
689
  # Check of the existence of the workdir in file_mounts is done in
645
690
  # the task construction.
646
- logger.info(f'Workdir {workdir!r} will be synced to cloud storage '
647
- f'{bucket_name!r}.')
691
+ logger.info(f' {colorama.Style.DIM}Workdir: {workdir!r} '
692
+ f'-> storage: {bucket_name!r}.{colorama.Style.RESET_ALL}')
648
693
 
649
694
  # Step 2: Translate the local file mounts with folder in src to SkyPilot
650
695
  # storage.
@@ -668,9 +713,8 @@ def maybe_translate_local_file_mounts_and_sync_up(task: 'task_lib.Task',
668
713
  'persistent': False,
669
714
  'mode': 'COPY',
670
715
  })
671
- logger.info(
672
- f'Folder in local file mount {src!r} will be synced to SkyPilot '
673
- f'storage {bucket_name}.')
716
+ logger.info(f' {colorama.Style.DIM}Folder : {src!r} '
717
+ f'-> storage: {bucket_name!r}.{colorama.Style.RESET_ALL}')
674
718
 
675
719
  # Step 3: Translate local file mounts with file in src to SkyPilot storage.
676
720
  # Hard link the files in src to a temporary directory, and upload folder.
@@ -703,10 +747,12 @@ def maybe_translate_local_file_mounts_and_sync_up(task: 'task_lib.Task',
703
747
  f'destination {file_mount_remote_tmp_dir} '
704
748
  'being taken.')
705
749
  sources = list(src_to_file_id.keys())
706
- sources_str = '\n\t'.join(sources)
707
- logger.info('Source files in file_mounts will be synced to '
708
- f'cloud storage {file_bucket_name}:'
709
- f'\n\t{sources_str}')
750
+ sources_str = '\n '.join(sources)
751
+ logger.info(f' {colorama.Style.DIM}Files (listed below) '
752
+ f' -> storage: {file_bucket_name}:'
753
+ f'\n {sources_str}{colorama.Style.RESET_ALL}')
754
+ rich_utils.force_update_status(
755
+ ux_utils.spinner_message('Uploading translated local files/folders'))
710
756
  task.update_storage_mounts(new_storage_mounts)
711
757
 
712
758
  # Step 4: Upload storage from sources
@@ -716,8 +762,9 @@ def maybe_translate_local_file_mounts_and_sync_up(task: 'task_lib.Task',
716
762
  if task.storage_mounts:
717
763
  # There may be existing (non-translated) storage mounts, so log this
718
764
  # whenever task.storage_mounts is non-empty.
719
- logger.info(f'{colorama.Fore.YELLOW}Uploading sources to cloud storage.'
720
- f'{colorama.Style.RESET_ALL} See: sky storage ls')
765
+ rich_utils.force_update_status(
766
+ ux_utils.spinner_message('Uploading local sources to storage[/] '
767
+ '[dim]View storages: sky storage ls'))
721
768
  try:
722
769
  task.sync_storage_mounts()
723
770
  except ValueError as e:
@@ -800,3 +847,5 @@ def maybe_translate_local_file_mounts_and_sync_up(task: 'task_lib.Task',
800
847
  })
801
848
  updated_mount_storages[storage_path] = new_storage
802
849
  task.update_storage_mounts(updated_mount_storages)
850
+ if msg:
851
+ logger.info(ux_utils.finishing_message('Uploaded local files/folders.'))
sky/utils/env_options.py CHANGED
@@ -5,17 +5,32 @@ import os
5
5
 
6
6
  class Options(enum.Enum):
7
7
  """Environment variables for SkyPilot."""
8
- IS_DEVELOPER = 'SKYPILOT_DEV'
9
- SHOW_DEBUG_INFO = 'SKYPILOT_DEBUG'
10
- DISABLE_LOGGING = 'SKYPILOT_DISABLE_USAGE_COLLECTION'
11
- MINIMIZE_LOGGING = 'SKYPILOT_MINIMIZE_LOGGING'
8
+
9
+ # (env var name, default value)
10
+ IS_DEVELOPER = ('SKYPILOT_DEV', False)
11
+ SHOW_DEBUG_INFO = ('SKYPILOT_DEBUG', False)
12
+ DISABLE_LOGGING = ('SKYPILOT_DISABLE_USAGE_COLLECTION', False)
13
+ MINIMIZE_LOGGING = ('SKYPILOT_MINIMIZE_LOGGING', True)
12
14
  # Internal: this is used to skip the cloud user identity check, which is
13
15
  # used to protect cluster operations in a multi-identity scenario.
14
16
  # Currently, this is only used in the job and serve controller, as there
15
17
  # will not be multiple identities, and skipping the check can increase
16
18
  # robustness.
17
- SKIP_CLOUD_IDENTITY_CHECK = 'SKYPILOT_SKIP_CLOUD_IDENTITY_CHECK'
19
+ SKIP_CLOUD_IDENTITY_CHECK = ('SKYPILOT_SKIP_CLOUD_IDENTITY_CHECK', False)
20
+
21
+ def __init__(self, env_var: str, default: bool) -> None:
22
+ self.env_var = env_var
23
+ self.default = default
18
24
 
19
- def get(self):
25
+ def __repr__(self) -> str:
26
+ return self.env_var
27
+
28
+ def get(self) -> bool:
20
29
  """Check if an environment variable is set to True."""
21
- return os.getenv(self.value, 'False').lower() in ('true', '1')
30
+ return os.getenv(self.env_var,
31
+ str(self.default)).lower() in ('true', '1')
32
+
33
+ @property
34
+ def env_key(self) -> str:
35
+ """The environment variable key name."""
36
+ return self.value[0]