skypilot-nightly 1.0.0.dev20241011__py3-none-any.whl → 1.0.0.dev20241013__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. sky/__init__.py +2 -2
  2. sky/adaptors/azure.py +3 -1
  3. sky/adaptors/common.py +6 -2
  4. sky/backends/backend.py +9 -4
  5. sky/backends/backend_utils.py +13 -16
  6. sky/backends/cloud_vm_ray_backend.py +207 -161
  7. sky/backends/local_docker_backend.py +3 -1
  8. sky/benchmark/benchmark_utils.py +5 -4
  9. sky/cli.py +128 -31
  10. sky/clouds/service_catalog/aws_catalog.py +6 -7
  11. sky/clouds/service_catalog/common.py +4 -3
  12. sky/clouds/service_catalog/cudo_catalog.py +11 -1
  13. sky/core.py +4 -2
  14. sky/data/storage.py +44 -32
  15. sky/data/storage_utils.py +12 -7
  16. sky/exceptions.py +5 -0
  17. sky/execution.py +10 -24
  18. sky/jobs/__init__.py +2 -0
  19. sky/jobs/core.py +87 -7
  20. sky/jobs/utils.py +35 -19
  21. sky/optimizer.py +50 -37
  22. sky/provision/aws/config.py +15 -6
  23. sky/provision/azure/config.py +14 -3
  24. sky/provision/azure/instance.py +15 -9
  25. sky/provision/kubernetes/instance.py +3 -1
  26. sky/provision/kubernetes/utils.py +25 -0
  27. sky/provision/provisioner.py +63 -74
  28. sky/serve/core.py +42 -40
  29. sky/sky_logging.py +9 -5
  30. sky/skylet/log_lib.py +5 -4
  31. sky/skylet/providers/lambda_cloud/node_provider.py +1 -1
  32. sky/utils/cli_utils/status_utils.py +168 -21
  33. sky/utils/command_runner.py +11 -11
  34. sky/utils/common_utils.py +22 -5
  35. sky/utils/controller_utils.py +78 -29
  36. sky/utils/env_options.py +22 -7
  37. sky/utils/log_utils.py +39 -24
  38. sky/utils/resources_utils.py +23 -0
  39. sky/utils/rich_utils.py +55 -5
  40. sky/utils/ux_utils.py +63 -4
  41. {skypilot_nightly-1.0.0.dev20241011.dist-info → skypilot_nightly-1.0.0.dev20241013.dist-info}/METADATA +1 -1
  42. {skypilot_nightly-1.0.0.dev20241011.dist-info → skypilot_nightly-1.0.0.dev20241013.dist-info}/RECORD +46 -46
  43. {skypilot_nightly-1.0.0.dev20241011.dist-info → skypilot_nightly-1.0.0.dev20241013.dist-info}/LICENSE +0 -0
  44. {skypilot_nightly-1.0.0.dev20241011.dist-info → skypilot_nightly-1.0.0.dev20241013.dist-info}/WHEEL +0 -0
  45. {skypilot_nightly-1.0.0.dev20241011.dist-info → skypilot_nightly-1.0.0.dev20241013.dist-info}/entry_points.txt +0 -0
  46. {skypilot_nightly-1.0.0.dev20241011.dist-info → skypilot_nightly-1.0.0.dev20241013.dist-info}/top_level.txt +0 -0
@@ -14,6 +14,7 @@ from sky.backends import backend_utils
14
14
  from sky.backends import docker_utils
15
15
  from sky.data import storage as storage_lib
16
16
  from sky.utils import rich_utils
17
+ from sky.utils import ux_utils
17
18
 
18
19
  if typing.TYPE_CHECKING:
19
20
  from sky import resources
@@ -159,7 +160,8 @@ class LocalDockerBackend(backends.Backend['LocalDockerResourceHandle']):
159
160
  handle = LocalDockerResourceHandle(cluster_name)
160
161
  logger.info(f'Building docker image for task {task.name}. '
161
162
  'This might take some time.')
162
- with rich_utils.safe_status('[bold cyan]Building Docker image[/]'):
163
+ with rich_utils.safe_status(
164
+ ux_utils.spinner_message('Building Docker image')):
163
165
  image_tag, metadata = docker_utils.build_dockerimage_from_task(task)
164
166
  self.images[handle] = (image_tag, metadata)
165
167
  logger.info(f'Image {image_tag} built.')
@@ -595,7 +595,8 @@ def update_benchmark_state(benchmark: str) -> None:
595
595
  remote_dir = os.path.join(bucket_name, benchmark)
596
596
  local_dir = os.path.join(_SKY_LOCAL_BENCHMARK_DIR, benchmark)
597
597
  os.makedirs(local_dir, exist_ok=True)
598
- with rich_utils.safe_status('[bold cyan]Downloading benchmark logs[/]'):
598
+ with rich_utils.safe_status(
599
+ ux_utils.spinner_message('Downloading benchmark logs')):
599
600
  _download_remote_dir(remote_dir, local_dir, bucket_type)
600
601
 
601
602
  # Update the benchmark results in parallel.
@@ -604,9 +605,9 @@ def update_benchmark_state(benchmark: str) -> None:
604
605
  progress = rich_progress.Progress(transient=True,
605
606
  redirect_stdout=False,
606
607
  redirect_stderr=False)
607
- task = progress.add_task(
608
- f'[bold cyan]Processing {num_candidates} benchmark result{plural}[/]',
609
- total=num_candidates)
608
+ task = progress.add_task(ux_utils.spinner_message(
609
+ f'Processing {num_candidates} benchmark result{plural}'),
610
+ total=num_candidates)
610
611
 
611
612
  def _update_with_progress_bar(arg: Any) -> None:
612
613
  message = _update_benchmark_result(arg)
sky/cli.py CHANGED
@@ -1458,6 +1458,79 @@ def _get_services(service_names: Optional[List[str]],
1458
1458
  return num_services, msg
1459
1459
 
1460
1460
 
1461
+ def _status_kubernetes(show_all: bool):
1462
+ """Show all SkyPilot resources in the current Kubernetes context.
1463
+
1464
+ Args:
1465
+ show_all (bool): Show all job information (e.g., start time, failures).
1466
+ """
1467
+ context = kubernetes_utils.get_current_kube_config_context_name()
1468
+ try:
1469
+ pods = kubernetes_utils.get_skypilot_pods(context)
1470
+ except exceptions.ResourcesUnavailableError as e:
1471
+ with ux_utils.print_exception_no_traceback():
1472
+ raise ValueError('Failed to get SkyPilot pods from '
1473
+ f'Kubernetes: {str(e)}') from e
1474
+ all_clusters, jobs_controllers, serve_controllers = (
1475
+ status_utils.process_skypilot_pods(pods, context))
1476
+ all_jobs = []
1477
+ with rich_utils.safe_status(
1478
+ '[bold cyan]Checking in-progress managed jobs[/]') as spinner:
1479
+ for i, (_, job_controller_info) in enumerate(jobs_controllers.items()):
1480
+ user = job_controller_info['user']
1481
+ pod = job_controller_info['pods'][0]
1482
+ status_message = ('[bold cyan]Checking managed jobs controller')
1483
+ if len(jobs_controllers) > 1:
1484
+ status_message += f's ({i+1}/{len(jobs_controllers)})'
1485
+ spinner.update(f'{status_message}[/]')
1486
+ try:
1487
+ job_list = managed_jobs.queue_from_kubernetes_pod(
1488
+ pod.metadata.name)
1489
+ except RuntimeError as e:
1490
+ logger.warning('Failed to get managed jobs from controller '
1491
+ f'{pod.metadata.name}: {str(e)}')
1492
+ job_list = []
1493
+ # Add user field to jobs
1494
+ for job in job_list:
1495
+ job['user'] = user
1496
+ all_jobs.extend(job_list)
1497
+ # Reconcile cluster state between managed jobs and clusters:
1498
+ # To maintain a clear separation between regular SkyPilot clusters
1499
+ # and those from managed jobs, we need to exclude the latter from
1500
+ # the main cluster list.
1501
+ # We do this by reconstructing managed job cluster names from each
1502
+ # job's name and ID. We then use this set to filter out managed
1503
+ # clusters from the main cluster list. This is necessary because there
1504
+ # are no identifiers distinguishing clusters from managed jobs from
1505
+ # regular clusters.
1506
+ managed_job_cluster_names = set()
1507
+ for job in all_jobs:
1508
+ # Managed job cluster name is <job_name>-<job_id>
1509
+ managed_cluster_name = f'{job["job_name"]}-{job["job_id"]}'
1510
+ managed_job_cluster_names.add(managed_cluster_name)
1511
+ unmanaged_clusters = [
1512
+ c for c in all_clusters
1513
+ if c['cluster_name'] not in managed_job_cluster_names
1514
+ ]
1515
+ click.echo(f'{colorama.Fore.CYAN}{colorama.Style.BRIGHT}'
1516
+ f'Kubernetes cluster state (context: {context})'
1517
+ f'{colorama.Style.RESET_ALL}')
1518
+ status_utils.show_kubernetes_cluster_status_table(unmanaged_clusters,
1519
+ show_all)
1520
+ if all_jobs:
1521
+ click.echo(f'\n{colorama.Fore.CYAN}{colorama.Style.BRIGHT}'
1522
+ f'Managed jobs'
1523
+ f'{colorama.Style.RESET_ALL}')
1524
+ msg = managed_jobs.format_job_table(all_jobs, show_all=show_all)
1525
+ click.echo(msg)
1526
+ if serve_controllers:
1527
+ # TODO: Parse serve controllers and show services separately.
1528
+ # Currently we show a hint that services are shown as clusters.
1529
+ click.echo(f'\n{colorama.Style.DIM}Hint: SkyServe replica pods are '
1530
+ 'shown in the "SkyPilot clusters" section.'
1531
+ f'{colorama.Style.RESET_ALL}')
1532
+
1533
+
1461
1534
  @cli.command()
1462
1535
  @click.option('--all',
1463
1536
  '-a',
@@ -1503,6 +1576,14 @@ def _get_services(service_names: Optional[List[str]],
1503
1576
  is_flag=True,
1504
1577
  required=False,
1505
1578
  help='Also show sky serve services, if any.')
1579
+ @click.option(
1580
+ '--kubernetes',
1581
+ '--k8s',
1582
+ default=False,
1583
+ is_flag=True,
1584
+ required=False,
1585
+ help='[Experimental] Show all SkyPilot resources (including from other '
1586
+ 'users) in the current Kubernetes context.')
1506
1587
  @click.argument('clusters',
1507
1588
  required=False,
1508
1589
  type=str,
@@ -1512,7 +1593,7 @@ def _get_services(service_names: Optional[List[str]],
1512
1593
  # pylint: disable=redefined-builtin
1513
1594
  def status(all: bool, refresh: bool, ip: bool, endpoints: bool,
1514
1595
  endpoint: Optional[int], show_managed_jobs: bool,
1515
- show_services: bool, clusters: List[str]):
1596
+ show_services: bool, kubernetes: bool, clusters: List[str]):
1516
1597
  # NOTE(dev): Keep the docstring consistent between the Python API and CLI.
1517
1598
  """Show clusters.
1518
1599
 
@@ -1571,6 +1652,9 @@ def status(all: bool, refresh: bool, ip: bool, endpoints: bool,
1571
1652
  or for autostop-enabled clusters, use ``--refresh`` to query the latest
1572
1653
  cluster statuses from the cloud providers.
1573
1654
  """
1655
+ if kubernetes:
1656
+ _status_kubernetes(all)
1657
+ return
1574
1658
  # Using a pool with 2 worker to run the managed job query and sky serve
1575
1659
  # service query in parallel to speed up. The pool provides a AsyncResult
1576
1660
  # object that can be used as a future.
@@ -1730,7 +1814,8 @@ def status(all: bool, refresh: bool, ip: bool, endpoints: bool,
1730
1814
  if show_managed_jobs:
1731
1815
  click.echo(f'\n{colorama.Fore.CYAN}{colorama.Style.BRIGHT}'
1732
1816
  f'Managed jobs{colorama.Style.RESET_ALL}')
1733
- with rich_utils.safe_status('[cyan]Checking managed jobs[/]'):
1817
+ with rich_utils.safe_status(
1818
+ ux_utils.spinner_message('Checking managed jobs')):
1734
1819
  managed_jobs_query_interrupted, result = _try_get_future_result(
1735
1820
  managed_jobs_future)
1736
1821
  if managed_jobs_query_interrupted:
@@ -1771,7 +1856,8 @@ def status(all: bool, refresh: bool, ip: bool, endpoints: bool,
1771
1856
  # The pool is terminated, so we cannot run the service query.
1772
1857
  msg = 'KeyboardInterrupt'
1773
1858
  else:
1774
- with rich_utils.safe_status('[cyan]Checking services[/]'):
1859
+ with rich_utils.safe_status(
1860
+ ux_utils.spinner_message('Checking services')):
1775
1861
  interrupted, result = _try_get_future_result(
1776
1862
  services_future)
1777
1863
  if interrupted:
@@ -2467,8 +2553,8 @@ def start(
2467
2553
  'is currently not supported.\n'
2468
2554
  'Please start the former independently.')
2469
2555
  if controllers:
2470
- bold = backend_utils.BOLD
2471
- reset_bold = backend_utils.RESET_BOLD
2556
+ bold = ux_utils.BOLD
2557
+ reset_bold = ux_utils.RESET_BOLD
2472
2558
  if len(controllers) != 1:
2473
2559
  raise click.UsageError(
2474
2560
  'Starting multiple controllers is currently not supported.\n'
@@ -2589,7 +2675,7 @@ def _hint_or_raise_for_down_jobs_controller(controller_name: str):
2589
2675
  assert controller is not None, controller_name
2590
2676
 
2591
2677
  with rich_utils.safe_status(
2592
- '[bold cyan]Checking for in-progress managed jobs[/]'):
2678
+ ux_utils.spinner_message('Checking for in-progress managed jobs')):
2593
2679
  try:
2594
2680
  managed_jobs_ = managed_jobs.queue(refresh=False,
2595
2681
  skip_finished=True)
@@ -2641,7 +2727,8 @@ def _hint_or_raise_for_down_sky_serve_controller(controller_name: str):
2641
2727
  """
2642
2728
  controller = controller_utils.Controllers.from_name(controller_name)
2643
2729
  assert controller is not None, controller_name
2644
- with rich_utils.safe_status('[bold cyan]Checking for live services[/]'):
2730
+ with rich_utils.safe_status(
2731
+ ux_utils.spinner_message('Checking for live services')):
2645
2732
  try:
2646
2733
  services = serve_lib.status()
2647
2734
  except exceptions.ClusterNotUpError as e:
@@ -2825,9 +2912,9 @@ def _down_or_stop_clusters(
2825
2912
  progress = rich_progress.Progress(transient=True,
2826
2913
  redirect_stdout=False,
2827
2914
  redirect_stderr=False)
2828
- task = progress.add_task(
2829
- f'[bold cyan]{operation} {len(clusters)} cluster{plural}[/]',
2830
- total=len(clusters))
2915
+ task = progress.add_task(ux_utils.spinner_message(
2916
+ f'{operation} {len(clusters)} cluster{plural}'),
2917
+ total=len(clusters))
2831
2918
 
2832
2919
  def _down_or_stop(name: str):
2833
2920
  success_progress = False
@@ -3113,7 +3200,12 @@ def show_gpus(
3113
3200
  print_section_titles = False
3114
3201
  # If cloud is kubernetes, we want to show real-time capacity
3115
3202
  if kubernetes_is_enabled and (cloud is None or cloud_is_kubernetes):
3116
- context = region
3203
+ if region:
3204
+ context = region
3205
+ else:
3206
+ # If region is not specified, we use the current context
3207
+ context = (
3208
+ kubernetes_utils.get_current_kube_config_context_name())
3117
3209
  try:
3118
3210
  # If --cloud kubernetes is not specified, we want to catch
3119
3211
  # the case where no GPUs are available on the cluster and
@@ -3128,7 +3220,7 @@ def show_gpus(
3128
3220
  else:
3129
3221
  print_section_titles = True
3130
3222
  yield (f'{colorama.Fore.CYAN}{colorama.Style.BRIGHT}'
3131
- f'Kubernetes GPUs (Context: {context})'
3223
+ f'Kubernetes GPUs (context: {context})'
3132
3224
  f'{colorama.Style.RESET_ALL}\n')
3133
3225
  yield from k8s_realtime_table.get_string()
3134
3226
  k8s_node_table = _get_kubernetes_node_info_table(context)
@@ -3591,7 +3683,7 @@ def jobs_launch(
3591
3683
  dag_utils.fill_default_config_in_dag_for_job_launch(dag)
3592
3684
 
3593
3685
  click.secho(f'Managed job {dag.name!r} will be launched on (estimated):',
3594
- fg='yellow')
3686
+ fg='cyan')
3595
3687
  dag = sky.optimize(dag)
3596
3688
 
3597
3689
  if not yes:
@@ -3685,7 +3777,8 @@ def jobs_queue(all: bool, refresh: bool, skip_finished: bool):
3685
3777
 
3686
3778
  """
3687
3779
  click.secho('Fetching managed job statuses...', fg='yellow')
3688
- with rich_utils.safe_status('[cyan]Checking managed jobs[/]'):
3780
+ with rich_utils.safe_status(
3781
+ ux_utils.spinner_message('Checking managed jobs')):
3689
3782
  _, msg = _get_managed_jobs(refresh=refresh,
3690
3783
  skip_finished=skip_finished,
3691
3784
  show_all=all,
@@ -3736,10 +3829,12 @@ def jobs_cancel(name: Optional[str], job_ids: Tuple[int], all: bool, yes: bool):
3736
3829
  # Cancel managed jobs with IDs 1, 2, 3
3737
3830
  $ sky jobs cancel 1 2 3
3738
3831
  """
3739
- backend_utils.is_controller_accessible(
3740
- controller=controller_utils.Controllers.JOBS_CONTROLLER,
3741
- stopped_message='All managed jobs should have finished.',
3742
- exit_if_not_accessible=True)
3832
+ with rich_utils.safe_status(
3833
+ ux_utils.spinner_message('Checking managed jobs')):
3834
+ backend_utils.is_controller_accessible(
3835
+ controller=controller_utils.Controllers.JOBS_CONTROLLER,
3836
+ stopped_message='All managed jobs should have finished.',
3837
+ exit_if_not_accessible=True)
3743
3838
 
3744
3839
  job_id_str = ','.join(map(str, job_ids))
3745
3840
  if sum([len(job_ids) > 0, name is not None, all]) != 1:
@@ -4301,7 +4396,7 @@ def serve_status(all: bool, endpoint: bool, service_names: List[str]):
4301
4396
  sky serve status my-service
4302
4397
  """
4303
4398
  # This won't pollute the output of --endpoint.
4304
- with rich_utils.safe_status('[cyan]Checking services[/]'):
4399
+ with rich_utils.safe_status(ux_utils.spinner_message('Checking services')):
4305
4400
  _, msg = _get_services(service_names,
4306
4401
  show_all=all,
4307
4402
  show_endpoint=endpoint,
@@ -4725,11 +4820,11 @@ def benchmark_launch(
4725
4820
  f'\n{colorama.Fore.CYAN}Benchmark name: '
4726
4821
  f'{colorama.Style.BRIGHT}{benchmark}{colorama.Style.RESET_ALL}'
4727
4822
  '\nTo see the benchmark results: '
4728
- f'{backend_utils.BOLD}sky bench show '
4729
- f'{benchmark}{backend_utils.RESET_BOLD}'
4823
+ f'{ux_utils.BOLD}sky bench show '
4824
+ f'{benchmark}{ux_utils.RESET_BOLD}'
4730
4825
  '\nTo teardown the clusters: '
4731
- f'{backend_utils.BOLD}sky bench down '
4732
- f'{benchmark}{backend_utils.RESET_BOLD}')
4826
+ f'{ux_utils.BOLD}sky bench down '
4827
+ f'{benchmark}{ux_utils.RESET_BOLD}')
4733
4828
  subprocess_utils.run('sky bench ls')
4734
4829
  else:
4735
4830
  logger.error('No benchmarking clusters are created.')
@@ -5020,9 +5115,9 @@ def benchmark_delete(benchmarks: Tuple[str], all: Optional[bool],
5020
5115
  progress = rich_progress.Progress(transient=True,
5021
5116
  redirect_stdout=False,
5022
5117
  redirect_stderr=False)
5023
- task = progress.add_task(
5024
- f'[bold cyan]Deleting {len(to_delete)} benchmark{plural}: ',
5025
- total=len(to_delete))
5118
+ task = progress.add_task(ux_utils.spinner_message(
5119
+ f'Deleting {len(to_delete)} benchmark{plural}'),
5120
+ total=len(to_delete))
5026
5121
 
5027
5122
  def _delete_benchmark(benchmark: str) -> None:
5028
5123
  clusters = benchmark_state.get_benchmark_clusters(benchmark)
@@ -5037,8 +5132,8 @@ def benchmark_delete(benchmarks: Tuple[str], all: Optional[bool],
5037
5132
  message = (f'{colorama.Fore.YELLOW}Benchmark {benchmark} '
5038
5133
  f'has {num_clusters} un-terminated cluster{plural}. '
5039
5134
  f'Terminate the cluster{plural} with '
5040
- f'{backend_utils.BOLD} sky bench down {benchmark} '
5041
- f'{backend_utils.RESET_BOLD} '
5135
+ f'{ux_utils.BOLD} sky bench down {benchmark} '
5136
+ f'{ux_utils.RESET_BOLD} '
5042
5137
  'before deleting the benchmark report.')
5043
5138
  success = False
5044
5139
  else:
@@ -5139,7 +5234,7 @@ def _deploy_local_cluster(gpus: bool):
5139
5234
  f'Full log: {log_path}'
5140
5235
  f'\nError: {style.BRIGHT}{stderr}{style.RESET_ALL}')
5141
5236
  # Run sky check
5142
- with rich_utils.safe_status('[bold cyan]Running sky check...'):
5237
+ with rich_utils.safe_status(ux_utils.spinner_message('Running sky check')):
5143
5238
  sky_check.check(clouds=['kubernetes'], quiet=True)
5144
5239
  if cluster_created:
5145
5240
  # Prepare completion message which shows CPU and GPU count
@@ -5336,7 +5431,8 @@ def local_down():
5336
5431
  'local_down.log')
5337
5432
  tail_cmd = 'tail -n100 -f ' + log_path
5338
5433
 
5339
- with rich_utils.safe_status('[bold cyan]Removing local cluster...'):
5434
+ with rich_utils.safe_status(
5435
+ ux_utils.spinner_message('Removing local cluster')):
5340
5436
  style = colorama.Style
5341
5437
  click.echo('To view detailed progress: '
5342
5438
  f'{style.BRIGHT}{tail_cmd}{style.RESET_ALL}')
@@ -5359,7 +5455,8 @@ def local_down():
5359
5455
  f'\nError: {style.BRIGHT}{stderr}{style.RESET_ALL}')
5360
5456
  if cluster_removed:
5361
5457
  # Run sky check
5362
- with rich_utils.safe_status('[bold cyan]Running sky check...'):
5458
+ with rich_utils.safe_status(
5459
+ ux_utils.spinner_message('Running sky check')):
5363
5460
  sky_check.check(clouds=['kubernetes'], quiet=True)
5364
5461
  click.echo(
5365
5462
  f'{colorama.Fore.GREEN}Local cluster removed.{style.RESET_ALL}')
@@ -10,8 +10,6 @@ import threading
10
10
  import typing
11
11
  from typing import Dict, List, Optional, Tuple
12
12
 
13
- import colorama
14
-
15
13
  from sky import exceptions
16
14
  from sky import sky_logging
17
15
  from sky.adaptors import common as adaptors_common
@@ -21,6 +19,8 @@ from sky.clouds.service_catalog import config
21
19
  from sky.clouds.service_catalog.data_fetchers import fetch_aws
22
20
  from sky.utils import common_utils
23
21
  from sky.utils import resources_utils
22
+ from sky.utils import rich_utils
23
+ from sky.utils import ux_utils
24
24
 
25
25
  if typing.TYPE_CHECKING:
26
26
  import pandas as pd
@@ -82,11 +82,10 @@ def _get_az_mappings(aws_user_hash: str) -> Optional['pd.DataFrame']:
82
82
  az_mappings = None
83
83
  if aws_user_hash != 'default':
84
84
  # Fetch az mapping from AWS.
85
- print(
86
- f'\r{colorama.Style.DIM}AWS: Fetching availability zones '
87
- f'mapping...{colorama.Style.RESET_ALL}',
88
- end='')
89
- az_mappings = fetch_aws.fetch_availability_zone_mappings()
85
+ with rich_utils.safe_status(
86
+ ux_utils.spinner_message('AWS: Fetching availability '
87
+ 'zones mapping')):
88
+ az_mappings = fetch_aws.fetch_availability_zone_mappings()
90
89
  else:
91
90
  return None
92
91
  az_mappings.to_csv(az_mapping_path, index=False)
@@ -198,9 +198,10 @@ def read_catalog(filename: str,
198
198
  if pull_frequency_hours is not None:
199
199
  update_frequency_str = (
200
200
  f' (every {pull_frequency_hours} hours)')
201
- with rich_utils.safe_status((f'Updating {cloud} catalog: '
202
- f'{filename}'
203
- f'{update_frequency_str}')):
201
+ with rich_utils.safe_status(
202
+ ux_utils.spinner_message(
203
+ f'Updating {cloud} catalog: {filename}') +
204
+ f'{update_frequency_str}'):
204
205
  try:
205
206
  r = requests.get(url)
206
207
  r.raise_for_status()
@@ -14,6 +14,9 @@ _PULL_FREQUENCY_HOURS = 1
14
14
  _df = common.read_catalog(cudo_mt.VMS_CSV,
15
15
  pull_frequency_hours=_PULL_FREQUENCY_HOURS)
16
16
 
17
+ _DEFAULT_NUM_VCPUS = 8
18
+ _DEFAULT_MEMORY_CPU_RATIO = 2
19
+
17
20
 
18
21
  def instance_type_exists(instance_type: str) -> bool:
19
22
  return common.instance_type_exists_impl(_df, instance_type)
@@ -52,7 +55,14 @@ def get_default_instance_type(cpus: Optional[str] = None,
52
55
  del disk_tier
53
56
  # NOTE: After expanding catalog to multiple entries, you may
54
57
  # want to specify a default instance type or family.
55
- return common.get_instance_type_for_cpus_mem_impl(_df, cpus, memory)
58
+ if cpus is None and memory is None:
59
+ cpus = f'{_DEFAULT_NUM_VCPUS}+'
60
+
61
+ memory_gb_or_ratio = memory
62
+ if memory is None:
63
+ memory_gb_or_ratio = f'{_DEFAULT_MEMORY_CPU_RATIO}x'
64
+ return common.get_instance_type_for_cpus_mem_impl(_df, cpus,
65
+ memory_gb_or_ratio)
56
66
 
57
67
 
58
68
  def get_accelerators_from_instance_type(
sky/core.py CHANGED
@@ -21,6 +21,7 @@ from sky.usage import usage_lib
21
21
  from sky.utils import controller_utils
22
22
  from sky.utils import rich_utils
23
23
  from sky.utils import subprocess_utils
24
+ from sky.utils import ux_utils
24
25
 
25
26
  if typing.TYPE_CHECKING:
26
27
  from sky import resources as resources_lib
@@ -127,8 +128,9 @@ def endpoints(cluster: str,
127
128
  RuntimeError: if the cluster has no ports to be exposed or no endpoints
128
129
  are exposed yet.
129
130
  """
130
- with rich_utils.safe_status('[bold cyan]Fetching endpoints for cluster '
131
- f'{cluster}...[/]'):
131
+ with rich_utils.safe_status(
132
+ ux_utils.spinner_message(
133
+ f'Fetching endpoints for cluster {cluster}')):
132
134
  return backend_utils.get_endpoints(cluster=cluster, port=port)
133
135
 
134
136
 
sky/data/storage.py CHANGED
@@ -1317,8 +1317,8 @@ class S3Store(AbstractStore):
1317
1317
  source_message = source_path_list[0]
1318
1318
 
1319
1319
  with rich_utils.safe_status(
1320
- f'[bold cyan]Syncing '
1321
- f'[green]{source_message}[/] to [green]s3://{self.name}/[/]'):
1320
+ ux_utils.spinner_message(f'Syncing {source_message} -> '
1321
+ f's3://{self.name}/')):
1322
1322
  data_utils.parallel_upload(
1323
1323
  source_path_list,
1324
1324
  get_file_sync_command,
@@ -1445,7 +1445,8 @@ class S3Store(AbstractStore):
1445
1445
  }
1446
1446
  s3_client.create_bucket(**create_bucket_config)
1447
1447
  logger.info(
1448
- f'Created S3 bucket {bucket_name!r} in {region or "us-east-1"}')
1448
+ f' {colorama.Style.DIM}Created S3 bucket {bucket_name!r} in '
1449
+ f'{region or "us-east-1"}{colorama.Style.RESET_ALL}')
1449
1450
 
1450
1451
  # Add AWS tags configured in config.yaml to the bucket.
1451
1452
  # This is useful for cost tracking and external cleanup.
@@ -1486,7 +1487,8 @@ class S3Store(AbstractStore):
1486
1487
  remove_command = f'aws s3 rb s3://{bucket_name} --force'
1487
1488
  try:
1488
1489
  with rich_utils.safe_status(
1489
- f'[bold cyan]Deleting S3 bucket {bucket_name}[/]'):
1490
+ ux_utils.spinner_message(
1491
+ f'Deleting S3 bucket [green]{bucket_name}')):
1490
1492
  subprocess.check_output(remove_command.split(' '),
1491
1493
  stderr=subprocess.STDOUT)
1492
1494
  except subprocess.CalledProcessError as e:
@@ -1726,8 +1728,8 @@ class GcsStore(AbstractStore):
1726
1728
  f'cp -e -n -r -I gs://{self.name}')
1727
1729
 
1728
1730
  with rich_utils.safe_status(
1729
- f'[bold cyan]Syncing '
1730
- f'[green]{source_message}[/] to [green]gs://{self.name}/[/]'):
1731
+ ux_utils.spinner_message(f'Syncing {source_message} -> '
1732
+ f'gs://{self.name}/')):
1731
1733
  data_utils.run_upload_cli(sync_command,
1732
1734
  self._ACCESS_DENIED_MESSAGE,
1733
1735
  bucket_name=self.name)
@@ -1781,8 +1783,8 @@ class GcsStore(AbstractStore):
1781
1783
  source_message = source_path_list[0]
1782
1784
 
1783
1785
  with rich_utils.safe_status(
1784
- f'[bold cyan]Syncing '
1785
- f'[green]{source_message}[/] to [green]gs://{self.name}/[/]'):
1786
+ ux_utils.spinner_message(f'Syncing {source_message} -> '
1787
+ f'gs://{self.name}/')):
1786
1788
  data_utils.parallel_upload(
1787
1789
  source_path_list,
1788
1790
  get_file_sync_command,
@@ -1904,8 +1906,9 @@ class GcsStore(AbstractStore):
1904
1906
  f'Attempted to create a bucket {self.name} but failed.'
1905
1907
  ) from e
1906
1908
  logger.info(
1907
- f'Created GCS bucket {new_bucket.name} in {new_bucket.location} '
1908
- f'with storage class {new_bucket.storage_class}')
1909
+ f' {colorama.Style.DIM}Created GCS bucket {new_bucket.name!r} in '
1910
+ f'{new_bucket.location} with storage class '
1911
+ f'{new_bucket.storage_class}{colorama.Style.RESET_ALL}')
1909
1912
  return new_bucket
1910
1913
 
1911
1914
  def _delete_gcs_bucket(self, bucket_name: str) -> bool:
@@ -1919,7 +1922,8 @@ class GcsStore(AbstractStore):
1919
1922
  """
1920
1923
 
1921
1924
  with rich_utils.safe_status(
1922
- f'[bold cyan]Deleting GCS bucket {bucket_name}[/]'):
1925
+ ux_utils.spinner_message(
1926
+ f'Deleting GCS bucket [green]{bucket_name}')):
1923
1927
  try:
1924
1928
  self.client.get_bucket(bucket_name)
1925
1929
  except gcp.forbidden_exception() as e:
@@ -2306,11 +2310,12 @@ class AzureBlobStore(AbstractStore):
2306
2310
  resource_group_name)
2307
2311
  except azure.exceptions().ResourceNotFoundError:
2308
2312
  with rich_utils.safe_status(
2309
- '[bold cyan]Setting up resource group: '
2310
- f'{resource_group_name}'):
2313
+ ux_utils.spinner_message(
2314
+ f'Setting up resource group: '
2315
+ f'{resource_group_name}')):
2311
2316
  self.resource_client.resource_groups.create_or_update(
2312
2317
  resource_group_name, {'location': self.region})
2313
- logger.info('Created Azure resource group '
2318
+ logger.info(' Created Azure resource group '
2314
2319
  f'{resource_group_name!r}.')
2315
2320
  # check if the storage account name already exists under the
2316
2321
  # given resource group name.
@@ -2319,13 +2324,14 @@ class AzureBlobStore(AbstractStore):
2319
2324
  resource_group_name, storage_account_name)
2320
2325
  except azure.exceptions().ResourceNotFoundError:
2321
2326
  with rich_utils.safe_status(
2322
- '[bold cyan]Setting up storage account: '
2323
- f'{storage_account_name}'):
2327
+ ux_utils.spinner_message(
2328
+ f'Setting up storage account: '
2329
+ f'{storage_account_name}')):
2324
2330
  self._create_storage_account(resource_group_name,
2325
2331
  storage_account_name)
2326
2332
  # wait until new resource creation propagates to Azure.
2327
2333
  time.sleep(1)
2328
- logger.info('Created Azure storage account '
2334
+ logger.info(' Created Azure storage account '
2329
2335
  f'{storage_account_name!r}.')
2330
2336
 
2331
2337
  return storage_account_name, resource_group_name
@@ -2514,9 +2520,9 @@ class AzureBlobStore(AbstractStore):
2514
2520
  container_endpoint = data_utils.AZURE_CONTAINER_URL.format(
2515
2521
  storage_account_name=self.storage_account_name,
2516
2522
  container_name=self.name)
2517
- with rich_utils.safe_status(f'[bold cyan]Syncing '
2518
- f'[green]{source_message}[/] to '
2519
- f'[green]{container_endpoint}/[/]'):
2523
+ with rich_utils.safe_status(
2524
+ ux_utils.spinner_message(
2525
+ f'Syncing {source_message} -> {container_endpoint}/')):
2520
2526
  data_utils.parallel_upload(
2521
2527
  source_path_list,
2522
2528
  get_file_sync_command,
@@ -2665,9 +2671,10 @@ class AzureBlobStore(AbstractStore):
2665
2671
  self.storage_account_name,
2666
2672
  container_name,
2667
2673
  blob_container={})
2668
- logger.info('Created AZ Container '
2674
+ logger.info(f' {colorama.Style.DIM}Created AZ Container '
2669
2675
  f'{container_name!r} in {self.region!r} under storage '
2670
- f'account {self.storage_account_name!r}.')
2676
+ f'account {self.storage_account_name!r}.'
2677
+ f'{colorama.Style.RESET_ALL}')
2671
2678
  except azure.exceptions().ResourceExistsError as e:
2672
2679
  if 'container is being deleted' in e.error.message:
2673
2680
  with ux_utils.print_exception_no_traceback():
@@ -2700,7 +2707,8 @@ class AzureBlobStore(AbstractStore):
2700
2707
  """
2701
2708
  try:
2702
2709
  with rich_utils.safe_status(
2703
- f'[bold cyan]Deleting Azure container {container_name}[/]'):
2710
+ ux_utils.spinner_message(
2711
+ f'Deleting Azure container {container_name}')):
2704
2712
  # Check for the existance of the container before deletion.
2705
2713
  self.storage_client.blob_containers.get(
2706
2714
  self.resource_group_name,
@@ -2916,8 +2924,8 @@ class R2Store(AbstractStore):
2916
2924
  source_message = source_path_list[0]
2917
2925
 
2918
2926
  with rich_utils.safe_status(
2919
- f'[bold cyan]Syncing '
2920
- f'[green]{source_message}[/] to [green]r2://{self.name}/[/]'):
2927
+ ux_utils.spinner_message(
2928
+ f'Syncing {source_message} -> r2://{self.name}/')):
2921
2929
  data_utils.parallel_upload(
2922
2930
  source_path_list,
2923
2931
  get_file_sync_command,
@@ -3055,7 +3063,9 @@ class R2Store(AbstractStore):
3055
3063
  location = {'LocationConstraint': region}
3056
3064
  r2_client.create_bucket(Bucket=bucket_name,
3057
3065
  CreateBucketConfiguration=location)
3058
- logger.info(f'Created R2 bucket {bucket_name} in {region}')
3066
+ logger.info(f' {colorama.Style.DIM}Created R2 bucket '
3067
+ f'{bucket_name!r} in {region}'
3068
+ f'{colorama.Style.RESET_ALL}')
3059
3069
  except aws.botocore_exceptions().ClientError as e:
3060
3070
  with ux_utils.print_exception_no_traceback():
3061
3071
  raise exceptions.StorageBucketCreateError(
@@ -3087,7 +3097,8 @@ class R2Store(AbstractStore):
3087
3097
  f'--profile={cloudflare.R2_PROFILE_NAME}')
3088
3098
  try:
3089
3099
  with rich_utils.safe_status(
3090
- f'[bold cyan]Deleting R2 bucket {bucket_name}[/]'):
3100
+ ux_utils.spinner_message(
3101
+ f'Deleting R2 bucket {bucket_name}')):
3091
3102
  subprocess.check_output(remove_command,
3092
3103
  stderr=subprocess.STDOUT,
3093
3104
  shell=True)
@@ -3354,9 +3365,8 @@ class IBMCosStore(AbstractStore):
3354
3365
  source_message = source_path_list[0]
3355
3366
 
3356
3367
  with rich_utils.safe_status(
3357
- f'[bold cyan]Syncing '
3358
- f'[green]{source_message}[/] to '
3359
- f'[green]cos://{self.region}/{self.name}/[/]'):
3368
+ ux_utils.spinner_message(f'Syncing {source_message} -> '
3369
+ f'cos://{self.region}/{self.name}/')):
3360
3370
  data_utils.parallel_upload(
3361
3371
  source_path_list,
3362
3372
  get_file_sync_command,
@@ -3490,8 +3500,10 @@ class IBMCosStore(AbstractStore):
3490
3500
  CreateBucketConfiguration={
3491
3501
  'LocationConstraint': f'{region}-smart'
3492
3502
  })
3493
- logger.info(f'Created IBM COS bucket {bucket_name} in {region} '
3494
- f'with storage class smart tier')
3503
+ logger.info(f' {colorama.Style.DIM}Created IBM COS bucket '
3504
+ f'{bucket_name!r} in {region} '
3505
+ 'with storage class smart tier'
3506
+ f'{colorama.Style.RESET_ALL}')
3495
3507
  self.bucket = self.s3_resource.Bucket(bucket_name)
3496
3508
 
3497
3509
  except ibm.ibm_botocore.exceptions.ClientError as e: # type: ignore[union-attr] # pylint: disable=line-too-long