skypilot-nightly 1.0.0.dev20241011__py3-none-any.whl → 1.0.0.dev20241013__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. sky/__init__.py +2 -2
  2. sky/adaptors/azure.py +3 -1
  3. sky/adaptors/common.py +6 -2
  4. sky/backends/backend.py +9 -4
  5. sky/backends/backend_utils.py +13 -16
  6. sky/backends/cloud_vm_ray_backend.py +207 -161
  7. sky/backends/local_docker_backend.py +3 -1
  8. sky/benchmark/benchmark_utils.py +5 -4
  9. sky/cli.py +128 -31
  10. sky/clouds/service_catalog/aws_catalog.py +6 -7
  11. sky/clouds/service_catalog/common.py +4 -3
  12. sky/clouds/service_catalog/cudo_catalog.py +11 -1
  13. sky/core.py +4 -2
  14. sky/data/storage.py +44 -32
  15. sky/data/storage_utils.py +12 -7
  16. sky/exceptions.py +5 -0
  17. sky/execution.py +10 -24
  18. sky/jobs/__init__.py +2 -0
  19. sky/jobs/core.py +87 -7
  20. sky/jobs/utils.py +35 -19
  21. sky/optimizer.py +50 -37
  22. sky/provision/aws/config.py +15 -6
  23. sky/provision/azure/config.py +14 -3
  24. sky/provision/azure/instance.py +15 -9
  25. sky/provision/kubernetes/instance.py +3 -1
  26. sky/provision/kubernetes/utils.py +25 -0
  27. sky/provision/provisioner.py +63 -74
  28. sky/serve/core.py +42 -40
  29. sky/sky_logging.py +9 -5
  30. sky/skylet/log_lib.py +5 -4
  31. sky/skylet/providers/lambda_cloud/node_provider.py +1 -1
  32. sky/utils/cli_utils/status_utils.py +168 -21
  33. sky/utils/command_runner.py +11 -11
  34. sky/utils/common_utils.py +22 -5
  35. sky/utils/controller_utils.py +78 -29
  36. sky/utils/env_options.py +22 -7
  37. sky/utils/log_utils.py +39 -24
  38. sky/utils/resources_utils.py +23 -0
  39. sky/utils/rich_utils.py +55 -5
  40. sky/utils/ux_utils.py +63 -4
  41. {skypilot_nightly-1.0.0.dev20241011.dist-info → skypilot_nightly-1.0.0.dev20241013.dist-info}/METADATA +1 -1
  42. {skypilot_nightly-1.0.0.dev20241011.dist-info → skypilot_nightly-1.0.0.dev20241013.dist-info}/RECORD +46 -46
  43. {skypilot_nightly-1.0.0.dev20241011.dist-info → skypilot_nightly-1.0.0.dev20241013.dist-info}/LICENSE +0 -0
  44. {skypilot_nightly-1.0.0.dev20241011.dist-info → skypilot_nightly-1.0.0.dev20241013.dist-info}/WHEEL +0 -0
  45. {skypilot_nightly-1.0.0.dev20241011.dist-info → skypilot_nightly-1.0.0.dev20241013.dist-info}/entry_points.txt +0 -0
  46. {skypilot_nightly-1.0.0.dev20241011.dist-info → skypilot_nightly-1.0.0.dev20241013.dist-info}/top_level.txt +0 -0
sky/__init__.py CHANGED
@@ -5,7 +5,7 @@ from typing import Optional
5
5
  import urllib.request
6
6
 
7
7
  # Replaced with the current commit when building the wheels.
8
- _SKYPILOT_COMMIT_SHA = 'd0d221fae659ccce73df5684fca53e0719dab814'
8
+ _SKYPILOT_COMMIT_SHA = 'd63497c267b62ebc6cb952d25312f98852ca6c8d'
9
9
 
10
10
 
11
11
  def _get_git_commit():
@@ -35,7 +35,7 @@ def _get_git_commit():
35
35
 
36
36
 
37
37
  __commit__ = _get_git_commit()
38
- __version__ = '1.0.0.dev20241011'
38
+ __version__ = '1.0.0.dev20241013'
39
39
  __root_dir__ = os.path.dirname(os.path.abspath(__file__))
40
40
 
41
41
 
sky/adaptors/azure.py CHANGED
@@ -20,7 +20,9 @@ from sky.utils import ux_utils
20
20
  azure = common.LazyImport(
21
21
  'azure',
22
22
  import_error_message=('Failed to import dependencies for Azure.'
23
- 'Try pip install "skypilot[azure]"'))
23
+ 'Try pip install "skypilot[azure]"'),
24
+ set_loggers=lambda: logging.getLogger('azure.identity').setLevel(logging.
25
+ ERROR))
24
26
  Client = Any
25
27
  sky_logger = sky_logging.init_logger(__name__)
26
28
 
sky/adaptors/common.py CHANGED
@@ -1,7 +1,7 @@
1
1
  """Lazy import for modules to avoid import error when not used."""
2
2
  import functools
3
3
  import importlib
4
- from typing import Any, Optional, Tuple
4
+ from typing import Any, Callable, Optional, Tuple
5
5
 
6
6
 
7
7
  class LazyImport:
@@ -18,15 +18,19 @@ class LazyImport:
18
18
 
19
19
  def __init__(self,
20
20
  module_name: str,
21
- import_error_message: Optional[str] = None):
21
+ import_error_message: Optional[str] = None,
22
+ set_loggers: Optional[Callable] = None):
22
23
  self._module_name = module_name
23
24
  self._module = None
24
25
  self._import_error_message = import_error_message
26
+ self._set_loggers = set_loggers
25
27
 
26
28
  def load_module(self):
27
29
  if self._module is None:
28
30
  try:
29
31
  self._module = importlib.import_module(self._module_name)
32
+ if self._set_loggers is not None:
33
+ self._set_loggers()
30
34
  except ImportError as e:
31
35
  if self._import_error_message is not None:
32
36
  raise ImportError(self._import_error_message) from e
sky/backends/backend.py CHANGED
@@ -4,7 +4,9 @@ from typing import Dict, Generic, Optional
4
4
 
5
5
  import sky
6
6
  from sky.usage import usage_lib
7
+ from sky.utils import rich_utils
7
8
  from sky.utils import timeline
9
+ from sky.utils import ux_utils
8
10
 
9
11
  if typing.TYPE_CHECKING:
10
12
  from sky import resources
@@ -54,8 +56,9 @@ class Backend(Generic[_ResourceHandleType]):
54
56
  cluster_name = sky.backends.backend_utils.generate_cluster_name()
55
57
  usage_lib.record_cluster_name_for_current_operation(cluster_name)
56
58
  usage_lib.messages.usage.update_actual_task(task)
57
- return self._provision(task, to_provision, dryrun, stream_logs,
58
- cluster_name, retry_until_up)
59
+ with rich_utils.safe_status(ux_utils.spinner_message('Launching')):
60
+ return self._provision(task, to_provision, dryrun, stream_logs,
61
+ cluster_name, retry_until_up)
59
62
 
60
63
  @timeline.event
61
64
  @usage_lib.messages.usage.update_runtime('sync_workdir')
@@ -76,7 +79,8 @@ class Backend(Generic[_ResourceHandleType]):
76
79
  @usage_lib.messages.usage.update_runtime('setup')
77
80
  def setup(self, handle: _ResourceHandleType, task: 'task_lib.Task',
78
81
  detach_setup: bool) -> None:
79
- return self._setup(handle, task, detach_setup)
82
+ with rich_utils.safe_status(ux_utils.spinner_message('Running setup')):
83
+ return self._setup(handle, task, detach_setup)
80
84
 
81
85
  def add_storage_objects(self, task: 'task_lib.Task') -> None:
82
86
  raise NotImplementedError
@@ -96,7 +100,8 @@ class Backend(Generic[_ResourceHandleType]):
96
100
  usage_lib.record_cluster_name_for_current_operation(
97
101
  handle.get_cluster_name())
98
102
  usage_lib.messages.usage.update_actual_task(task)
99
- return self._execute(handle, task, detach_run, dryrun)
103
+ with rich_utils.safe_status(ux_utils.spinner_message('Submitting job')):
104
+ return self._execute(handle, task, detach_run, dryrun)
100
105
 
101
106
  @timeline.event
102
107
  def post_execute(self, handle: _ResourceHandleType, down: bool) -> None:
@@ -70,9 +70,6 @@ IP_ADDR_REGEX = r'\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}(?!/\d{1,2})\b'
70
70
  SKY_REMOTE_PATH = '~/.sky/wheels'
71
71
  SKY_USER_FILE_PATH = '~/.sky/generated'
72
72
 
73
- BOLD = '\033[1m'
74
- RESET_BOLD = '\033[0m'
75
-
76
73
  # Do not use /tmp because it gets cleared on VM restart.
77
74
  _SKY_REMOTE_FILE_MOUNTS_DIR = '~/.sky/file_mounts/'
78
75
 
@@ -1171,7 +1168,8 @@ def wait_until_ray_cluster_ready(
1171
1168
  runner = command_runner.SSHCommandRunner(node=(head_ip, 22),
1172
1169
  **ssh_credentials)
1173
1170
  with rich_utils.safe_status(
1174
- '[bold cyan]Waiting for workers...') as worker_status:
1171
+ ux_utils.spinner_message('Waiting for workers',
1172
+ log_path=log_path)) as worker_status:
1175
1173
  while True:
1176
1174
  rc, output, stderr = runner.run(
1177
1175
  instance_setup.RAY_STATUS_WITH_SKY_RAY_PORT_COMMAND,
@@ -1187,9 +1185,11 @@ def wait_until_ray_cluster_ready(
1187
1185
  ready_head, ready_workers = _count_healthy_nodes_from_ray(
1188
1186
  output, is_local_cloud=is_local_cloud)
1189
1187
 
1190
- worker_status.update('[bold cyan]'
1191
- f'{ready_workers} out of {num_nodes - 1} '
1192
- 'workers ready')
1188
+ worker_status.update(
1189
+ ux_utils.spinner_message(
1190
+ f'{ready_workers} out of {num_nodes - 1} '
1191
+ 'workers ready',
1192
+ log_path=log_path))
1193
1193
 
1194
1194
  # In the local case, ready_head=0 and ready_workers=num_nodes. This
1195
1195
  # is because there is no matching regex for _LAUNCHED_HEAD_PATTERN.
@@ -1304,7 +1304,6 @@ def parallel_data_transfer_to_nodes(
1304
1304
  stream_logs: bool; Whether to stream logs to stdout
1305
1305
  source_bashrc: bool; Source bashrc before running the command.
1306
1306
  """
1307
- fore = colorama.Fore
1308
1307
  style = colorama.Style
1309
1308
 
1310
1309
  origin_source = source
@@ -1341,12 +1340,10 @@ def parallel_data_transfer_to_nodes(
1341
1340
 
1342
1341
  num_nodes = len(runners)
1343
1342
  plural = 's' if num_nodes > 1 else ''
1344
- message = (f'{fore.CYAN}{action_message} (to {num_nodes} node{plural})'
1345
- f': {style.BRIGHT}{origin_source}{style.RESET_ALL} -> '
1346
- f'{style.BRIGHT}{target}{style.RESET_ALL}')
1343
+ message = (f' {style.DIM}{action_message} (to {num_nodes} node{plural})'
1344
+ f': {origin_source} -> {target}{style.RESET_ALL}')
1347
1345
  logger.info(message)
1348
- with rich_utils.safe_status(f'[bold cyan]{action_message}[/]'):
1349
- subprocess_utils.run_in_parallel(_sync_node, runners)
1346
+ subprocess_utils.run_in_parallel(_sync_node, runners)
1350
1347
 
1351
1348
 
1352
1349
  def check_local_gpus() -> bool:
@@ -2488,9 +2485,9 @@ def get_clusters(
2488
2485
  progress = rich_progress.Progress(transient=True,
2489
2486
  redirect_stdout=False,
2490
2487
  redirect_stderr=False)
2491
- task = progress.add_task(
2492
- f'[bold cyan]Refreshing status for {len(records)} cluster{plural}[/]',
2493
- total=len(records))
2488
+ task = progress.add_task(ux_utils.spinner_message(
2489
+ f'Refreshing status for {len(records)} cluster{plural}'),
2490
+ total=len(records))
2494
2491
 
2495
2492
  def _refresh_cluster(cluster_name):
2496
2493
  try: