skypilot-nightly 1.0.0.dev20241012__py3-none-any.whl → 1.0.0.dev20241013__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +2 -2
- sky/adaptors/azure.py +3 -1
- sky/adaptors/common.py +6 -2
- sky/backends/backend.py +9 -4
- sky/backends/backend_utils.py +13 -16
- sky/backends/cloud_vm_ray_backend.py +207 -161
- sky/backends/local_docker_backend.py +3 -1
- sky/benchmark/benchmark_utils.py +5 -4
- sky/cli.py +36 -28
- sky/clouds/service_catalog/aws_catalog.py +6 -7
- sky/clouds/service_catalog/common.py +4 -3
- sky/clouds/service_catalog/cudo_catalog.py +11 -1
- sky/core.py +4 -2
- sky/data/storage.py +44 -32
- sky/data/storage_utils.py +8 -4
- sky/exceptions.py +5 -0
- sky/execution.py +10 -24
- sky/jobs/core.py +9 -7
- sky/jobs/utils.py +15 -10
- sky/optimizer.py +50 -37
- sky/provision/aws/config.py +15 -6
- sky/provision/azure/config.py +14 -3
- sky/provision/azure/instance.py +15 -9
- sky/provision/kubernetes/instance.py +3 -1
- sky/provision/provisioner.py +63 -74
- sky/serve/core.py +42 -40
- sky/sky_logging.py +9 -5
- sky/skylet/log_lib.py +5 -4
- sky/skylet/providers/lambda_cloud/node_provider.py +1 -1
- sky/utils/command_runner.py +11 -11
- sky/utils/common_utils.py +2 -5
- sky/utils/controller_utils.py +78 -29
- sky/utils/env_options.py +22 -7
- sky/utils/log_utils.py +39 -24
- sky/utils/resources_utils.py +23 -0
- sky/utils/rich_utils.py +55 -5
- sky/utils/ux_utils.py +63 -4
- {skypilot_nightly-1.0.0.dev20241012.dist-info → skypilot_nightly-1.0.0.dev20241013.dist-info}/METADATA +1 -1
- {skypilot_nightly-1.0.0.dev20241012.dist-info → skypilot_nightly-1.0.0.dev20241013.dist-info}/RECORD +43 -43
- {skypilot_nightly-1.0.0.dev20241012.dist-info → skypilot_nightly-1.0.0.dev20241013.dist-info}/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20241012.dist-info → skypilot_nightly-1.0.0.dev20241013.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20241012.dist-info → skypilot_nightly-1.0.0.dev20241013.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20241012.dist-info → skypilot_nightly-1.0.0.dev20241013.dist-info}/top_level.txt +0 -0
sky/__init__.py
CHANGED
@@ -5,7 +5,7 @@ from typing import Optional
|
|
5
5
|
import urllib.request
|
6
6
|
|
7
7
|
# Replaced with the current commit when building the wheels.
|
8
|
-
_SKYPILOT_COMMIT_SHA = '
|
8
|
+
_SKYPILOT_COMMIT_SHA = 'd63497c267b62ebc6cb952d25312f98852ca6c8d'
|
9
9
|
|
10
10
|
|
11
11
|
def _get_git_commit():
|
@@ -35,7 +35,7 @@ def _get_git_commit():
|
|
35
35
|
|
36
36
|
|
37
37
|
__commit__ = _get_git_commit()
|
38
|
-
__version__ = '1.0.0.
|
38
|
+
__version__ = '1.0.0.dev20241013'
|
39
39
|
__root_dir__ = os.path.dirname(os.path.abspath(__file__))
|
40
40
|
|
41
41
|
|
sky/adaptors/azure.py
CHANGED
@@ -20,7 +20,9 @@ from sky.utils import ux_utils
|
|
20
20
|
azure = common.LazyImport(
|
21
21
|
'azure',
|
22
22
|
import_error_message=('Failed to import dependencies for Azure.'
|
23
|
-
'Try pip install "skypilot[azure]"')
|
23
|
+
'Try pip install "skypilot[azure]"'),
|
24
|
+
set_loggers=lambda: logging.getLogger('azure.identity').setLevel(logging.
|
25
|
+
ERROR))
|
24
26
|
Client = Any
|
25
27
|
sky_logger = sky_logging.init_logger(__name__)
|
26
28
|
|
sky/adaptors/common.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
"""Lazy import for modules to avoid import error when not used."""
|
2
2
|
import functools
|
3
3
|
import importlib
|
4
|
-
from typing import Any, Optional, Tuple
|
4
|
+
from typing import Any, Callable, Optional, Tuple
|
5
5
|
|
6
6
|
|
7
7
|
class LazyImport:
|
@@ -18,15 +18,19 @@ class LazyImport:
|
|
18
18
|
|
19
19
|
def __init__(self,
|
20
20
|
module_name: str,
|
21
|
-
import_error_message: Optional[str] = None
|
21
|
+
import_error_message: Optional[str] = None,
|
22
|
+
set_loggers: Optional[Callable] = None):
|
22
23
|
self._module_name = module_name
|
23
24
|
self._module = None
|
24
25
|
self._import_error_message = import_error_message
|
26
|
+
self._set_loggers = set_loggers
|
25
27
|
|
26
28
|
def load_module(self):
|
27
29
|
if self._module is None:
|
28
30
|
try:
|
29
31
|
self._module = importlib.import_module(self._module_name)
|
32
|
+
if self._set_loggers is not None:
|
33
|
+
self._set_loggers()
|
30
34
|
except ImportError as e:
|
31
35
|
if self._import_error_message is not None:
|
32
36
|
raise ImportError(self._import_error_message) from e
|
sky/backends/backend.py
CHANGED
@@ -4,7 +4,9 @@ from typing import Dict, Generic, Optional
|
|
4
4
|
|
5
5
|
import sky
|
6
6
|
from sky.usage import usage_lib
|
7
|
+
from sky.utils import rich_utils
|
7
8
|
from sky.utils import timeline
|
9
|
+
from sky.utils import ux_utils
|
8
10
|
|
9
11
|
if typing.TYPE_CHECKING:
|
10
12
|
from sky import resources
|
@@ -54,8 +56,9 @@ class Backend(Generic[_ResourceHandleType]):
|
|
54
56
|
cluster_name = sky.backends.backend_utils.generate_cluster_name()
|
55
57
|
usage_lib.record_cluster_name_for_current_operation(cluster_name)
|
56
58
|
usage_lib.messages.usage.update_actual_task(task)
|
57
|
-
|
58
|
-
|
59
|
+
with rich_utils.safe_status(ux_utils.spinner_message('Launching')):
|
60
|
+
return self._provision(task, to_provision, dryrun, stream_logs,
|
61
|
+
cluster_name, retry_until_up)
|
59
62
|
|
60
63
|
@timeline.event
|
61
64
|
@usage_lib.messages.usage.update_runtime('sync_workdir')
|
@@ -76,7 +79,8 @@ class Backend(Generic[_ResourceHandleType]):
|
|
76
79
|
@usage_lib.messages.usage.update_runtime('setup')
|
77
80
|
def setup(self, handle: _ResourceHandleType, task: 'task_lib.Task',
|
78
81
|
detach_setup: bool) -> None:
|
79
|
-
|
82
|
+
with rich_utils.safe_status(ux_utils.spinner_message('Running setup')):
|
83
|
+
return self._setup(handle, task, detach_setup)
|
80
84
|
|
81
85
|
def add_storage_objects(self, task: 'task_lib.Task') -> None:
|
82
86
|
raise NotImplementedError
|
@@ -96,7 +100,8 @@ class Backend(Generic[_ResourceHandleType]):
|
|
96
100
|
usage_lib.record_cluster_name_for_current_operation(
|
97
101
|
handle.get_cluster_name())
|
98
102
|
usage_lib.messages.usage.update_actual_task(task)
|
99
|
-
|
103
|
+
with rich_utils.safe_status(ux_utils.spinner_message('Submitting job')):
|
104
|
+
return self._execute(handle, task, detach_run, dryrun)
|
100
105
|
|
101
106
|
@timeline.event
|
102
107
|
def post_execute(self, handle: _ResourceHandleType, down: bool) -> None:
|
sky/backends/backend_utils.py
CHANGED
@@ -70,9 +70,6 @@ IP_ADDR_REGEX = r'\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}(?!/\d{1,2})\b'
|
|
70
70
|
SKY_REMOTE_PATH = '~/.sky/wheels'
|
71
71
|
SKY_USER_FILE_PATH = '~/.sky/generated'
|
72
72
|
|
73
|
-
BOLD = '\033[1m'
|
74
|
-
RESET_BOLD = '\033[0m'
|
75
|
-
|
76
73
|
# Do not use /tmp because it gets cleared on VM restart.
|
77
74
|
_SKY_REMOTE_FILE_MOUNTS_DIR = '~/.sky/file_mounts/'
|
78
75
|
|
@@ -1171,7 +1168,8 @@ def wait_until_ray_cluster_ready(
|
|
1171
1168
|
runner = command_runner.SSHCommandRunner(node=(head_ip, 22),
|
1172
1169
|
**ssh_credentials)
|
1173
1170
|
with rich_utils.safe_status(
|
1174
|
-
'
|
1171
|
+
ux_utils.spinner_message('Waiting for workers',
|
1172
|
+
log_path=log_path)) as worker_status:
|
1175
1173
|
while True:
|
1176
1174
|
rc, output, stderr = runner.run(
|
1177
1175
|
instance_setup.RAY_STATUS_WITH_SKY_RAY_PORT_COMMAND,
|
@@ -1187,9 +1185,11 @@ def wait_until_ray_cluster_ready(
|
|
1187
1185
|
ready_head, ready_workers = _count_healthy_nodes_from_ray(
|
1188
1186
|
output, is_local_cloud=is_local_cloud)
|
1189
1187
|
|
1190
|
-
worker_status.update(
|
1191
|
-
|
1192
|
-
|
1188
|
+
worker_status.update(
|
1189
|
+
ux_utils.spinner_message(
|
1190
|
+
f'{ready_workers} out of {num_nodes - 1} '
|
1191
|
+
'workers ready',
|
1192
|
+
log_path=log_path))
|
1193
1193
|
|
1194
1194
|
# In the local case, ready_head=0 and ready_workers=num_nodes. This
|
1195
1195
|
# is because there is no matching regex for _LAUNCHED_HEAD_PATTERN.
|
@@ -1304,7 +1304,6 @@ def parallel_data_transfer_to_nodes(
|
|
1304
1304
|
stream_logs: bool; Whether to stream logs to stdout
|
1305
1305
|
source_bashrc: bool; Source bashrc before running the command.
|
1306
1306
|
"""
|
1307
|
-
fore = colorama.Fore
|
1308
1307
|
style = colorama.Style
|
1309
1308
|
|
1310
1309
|
origin_source = source
|
@@ -1341,12 +1340,10 @@ def parallel_data_transfer_to_nodes(
|
|
1341
1340
|
|
1342
1341
|
num_nodes = len(runners)
|
1343
1342
|
plural = 's' if num_nodes > 1 else ''
|
1344
|
-
message = (f'{
|
1345
|
-
f': {
|
1346
|
-
f'{style.BRIGHT}{target}{style.RESET_ALL}')
|
1343
|
+
message = (f' {style.DIM}{action_message} (to {num_nodes} node{plural})'
|
1344
|
+
f': {origin_source} -> {target}{style.RESET_ALL}')
|
1347
1345
|
logger.info(message)
|
1348
|
-
|
1349
|
-
subprocess_utils.run_in_parallel(_sync_node, runners)
|
1346
|
+
subprocess_utils.run_in_parallel(_sync_node, runners)
|
1350
1347
|
|
1351
1348
|
|
1352
1349
|
def check_local_gpus() -> bool:
|
@@ -2488,9 +2485,9 @@ def get_clusters(
|
|
2488
2485
|
progress = rich_progress.Progress(transient=True,
|
2489
2486
|
redirect_stdout=False,
|
2490
2487
|
redirect_stderr=False)
|
2491
|
-
task = progress.add_task(
|
2492
|
-
f'
|
2493
|
-
|
2488
|
+
task = progress.add_task(ux_utils.spinner_message(
|
2489
|
+
f'Refreshing status for {len(records)} cluster{plural}'),
|
2490
|
+
total=len(records))
|
2494
2491
|
|
2495
2492
|
def _refresh_cluster(cluster_name):
|
2496
2493
|
try:
|