skypilot-nightly 1.0.0.dev2024053101__py3-none-any.whl → 1.0.0.dev2025022801__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +64 -32
- sky/adaptors/aws.py +23 -6
- sky/adaptors/azure.py +432 -15
- sky/adaptors/cloudflare.py +5 -5
- sky/adaptors/common.py +19 -9
- sky/adaptors/do.py +20 -0
- sky/adaptors/gcp.py +3 -2
- sky/adaptors/kubernetes.py +122 -88
- sky/adaptors/nebius.py +100 -0
- sky/adaptors/oci.py +39 -1
- sky/adaptors/vast.py +29 -0
- sky/admin_policy.py +101 -0
- sky/authentication.py +117 -98
- sky/backends/backend.py +52 -20
- sky/backends/backend_utils.py +669 -557
- sky/backends/cloud_vm_ray_backend.py +1099 -808
- sky/backends/local_docker_backend.py +14 -8
- sky/backends/wheel_utils.py +38 -20
- sky/benchmark/benchmark_utils.py +22 -23
- sky/check.py +76 -27
- sky/cli.py +1586 -1139
- sky/client/__init__.py +1 -0
- sky/client/cli.py +5683 -0
- sky/client/common.py +345 -0
- sky/client/sdk.py +1765 -0
- sky/cloud_stores.py +283 -19
- sky/clouds/__init__.py +7 -2
- sky/clouds/aws.py +303 -112
- sky/clouds/azure.py +185 -179
- sky/clouds/cloud.py +115 -37
- sky/clouds/cudo.py +29 -22
- sky/clouds/do.py +313 -0
- sky/clouds/fluidstack.py +44 -54
- sky/clouds/gcp.py +206 -65
- sky/clouds/ibm.py +26 -21
- sky/clouds/kubernetes.py +345 -91
- sky/clouds/lambda_cloud.py +40 -29
- sky/clouds/nebius.py +297 -0
- sky/clouds/oci.py +129 -90
- sky/clouds/paperspace.py +22 -18
- sky/clouds/runpod.py +53 -34
- sky/clouds/scp.py +28 -24
- sky/clouds/service_catalog/__init__.py +19 -13
- sky/clouds/service_catalog/aws_catalog.py +29 -12
- sky/clouds/service_catalog/azure_catalog.py +33 -6
- sky/clouds/service_catalog/common.py +95 -75
- sky/clouds/service_catalog/constants.py +3 -3
- sky/clouds/service_catalog/cudo_catalog.py +13 -3
- sky/clouds/service_catalog/data_fetchers/fetch_aws.py +36 -21
- sky/clouds/service_catalog/data_fetchers/fetch_azure.py +31 -4
- sky/clouds/service_catalog/data_fetchers/fetch_cudo.py +8 -117
- sky/clouds/service_catalog/data_fetchers/fetch_fluidstack.py +197 -44
- sky/clouds/service_catalog/data_fetchers/fetch_gcp.py +224 -36
- sky/clouds/service_catalog/data_fetchers/fetch_lambda_cloud.py +44 -24
- sky/clouds/service_catalog/data_fetchers/fetch_vast.py +147 -0
- sky/clouds/service_catalog/data_fetchers/fetch_vsphere.py +1 -1
- sky/clouds/service_catalog/do_catalog.py +111 -0
- sky/clouds/service_catalog/fluidstack_catalog.py +2 -2
- sky/clouds/service_catalog/gcp_catalog.py +16 -2
- sky/clouds/service_catalog/ibm_catalog.py +2 -2
- sky/clouds/service_catalog/kubernetes_catalog.py +192 -70
- sky/clouds/service_catalog/lambda_catalog.py +8 -3
- sky/clouds/service_catalog/nebius_catalog.py +116 -0
- sky/clouds/service_catalog/oci_catalog.py +31 -4
- sky/clouds/service_catalog/paperspace_catalog.py +2 -2
- sky/clouds/service_catalog/runpod_catalog.py +2 -2
- sky/clouds/service_catalog/scp_catalog.py +2 -2
- sky/clouds/service_catalog/vast_catalog.py +104 -0
- sky/clouds/service_catalog/vsphere_catalog.py +2 -2
- sky/clouds/utils/aws_utils.py +65 -0
- sky/clouds/utils/azure_utils.py +91 -0
- sky/clouds/utils/gcp_utils.py +5 -9
- sky/clouds/utils/oci_utils.py +47 -5
- sky/clouds/utils/scp_utils.py +4 -3
- sky/clouds/vast.py +280 -0
- sky/clouds/vsphere.py +22 -18
- sky/core.py +361 -107
- sky/dag.py +41 -28
- sky/data/data_transfer.py +37 -0
- sky/data/data_utils.py +211 -32
- sky/data/mounting_utils.py +182 -30
- sky/data/storage.py +2118 -270
- sky/data/storage_utils.py +126 -5
- sky/exceptions.py +179 -8
- sky/execution.py +158 -85
- sky/global_user_state.py +150 -34
- sky/jobs/__init__.py +12 -10
- sky/jobs/client/__init__.py +0 -0
- sky/jobs/client/sdk.py +302 -0
- sky/jobs/constants.py +49 -11
- sky/jobs/controller.py +161 -99
- sky/jobs/dashboard/dashboard.py +171 -25
- sky/jobs/dashboard/templates/index.html +572 -60
- sky/jobs/recovery_strategy.py +157 -156
- sky/jobs/scheduler.py +307 -0
- sky/jobs/server/__init__.py +1 -0
- sky/jobs/server/core.py +598 -0
- sky/jobs/server/dashboard_utils.py +69 -0
- sky/jobs/server/server.py +190 -0
- sky/jobs/state.py +627 -122
- sky/jobs/utils.py +615 -206
- sky/models.py +27 -0
- sky/optimizer.py +142 -83
- sky/provision/__init__.py +20 -5
- sky/provision/aws/config.py +124 -42
- sky/provision/aws/instance.py +130 -53
- sky/provision/azure/__init__.py +7 -0
- sky/{skylet/providers → provision}/azure/azure-config-template.json +19 -7
- sky/provision/azure/config.py +220 -0
- sky/provision/azure/instance.py +1012 -37
- sky/provision/common.py +31 -3
- sky/provision/constants.py +25 -0
- sky/provision/cudo/__init__.py +2 -1
- sky/provision/cudo/cudo_utils.py +112 -0
- sky/provision/cudo/cudo_wrapper.py +37 -16
- sky/provision/cudo/instance.py +28 -12
- sky/provision/do/__init__.py +11 -0
- sky/provision/do/config.py +14 -0
- sky/provision/do/constants.py +10 -0
- sky/provision/do/instance.py +287 -0
- sky/provision/do/utils.py +301 -0
- sky/provision/docker_utils.py +82 -46
- sky/provision/fluidstack/fluidstack_utils.py +57 -125
- sky/provision/fluidstack/instance.py +15 -43
- sky/provision/gcp/config.py +19 -9
- sky/provision/gcp/constants.py +7 -1
- sky/provision/gcp/instance.py +55 -34
- sky/provision/gcp/instance_utils.py +339 -80
- sky/provision/gcp/mig_utils.py +210 -0
- sky/provision/instance_setup.py +172 -133
- sky/provision/kubernetes/__init__.py +1 -0
- sky/provision/kubernetes/config.py +104 -90
- sky/provision/kubernetes/constants.py +8 -0
- sky/provision/kubernetes/instance.py +680 -325
- sky/provision/kubernetes/manifests/smarter-device-manager-daemonset.yaml +3 -0
- sky/provision/kubernetes/network.py +54 -20
- sky/provision/kubernetes/network_utils.py +70 -21
- sky/provision/kubernetes/utils.py +1370 -251
- sky/provision/lambda_cloud/__init__.py +11 -0
- sky/provision/lambda_cloud/config.py +10 -0
- sky/provision/lambda_cloud/instance.py +265 -0
- sky/{clouds/utils → provision/lambda_cloud}/lambda_utils.py +24 -23
- sky/provision/logging.py +1 -1
- sky/provision/nebius/__init__.py +11 -0
- sky/provision/nebius/config.py +11 -0
- sky/provision/nebius/instance.py +285 -0
- sky/provision/nebius/utils.py +318 -0
- sky/provision/oci/__init__.py +15 -0
- sky/provision/oci/config.py +51 -0
- sky/provision/oci/instance.py +436 -0
- sky/provision/oci/query_utils.py +681 -0
- sky/provision/paperspace/constants.py +6 -0
- sky/provision/paperspace/instance.py +4 -3
- sky/provision/paperspace/utils.py +2 -0
- sky/provision/provisioner.py +207 -130
- sky/provision/runpod/__init__.py +1 -0
- sky/provision/runpod/api/__init__.py +3 -0
- sky/provision/runpod/api/commands.py +119 -0
- sky/provision/runpod/api/pods.py +142 -0
- sky/provision/runpod/instance.py +64 -8
- sky/provision/runpod/utils.py +239 -23
- sky/provision/vast/__init__.py +10 -0
- sky/provision/vast/config.py +11 -0
- sky/provision/vast/instance.py +247 -0
- sky/provision/vast/utils.py +162 -0
- sky/provision/vsphere/common/vim_utils.py +1 -1
- sky/provision/vsphere/instance.py +8 -18
- sky/provision/vsphere/vsphere_utils.py +1 -1
- sky/resources.py +247 -102
- sky/serve/__init__.py +9 -9
- sky/serve/autoscalers.py +361 -299
- sky/serve/client/__init__.py +0 -0
- sky/serve/client/sdk.py +366 -0
- sky/serve/constants.py +12 -3
- sky/serve/controller.py +106 -36
- sky/serve/load_balancer.py +63 -12
- sky/serve/load_balancing_policies.py +84 -2
- sky/serve/replica_managers.py +42 -34
- sky/serve/serve_state.py +62 -32
- sky/serve/serve_utils.py +271 -160
- sky/serve/server/__init__.py +0 -0
- sky/serve/{core.py → server/core.py} +271 -90
- sky/serve/server/server.py +112 -0
- sky/serve/service.py +52 -16
- sky/serve/service_spec.py +95 -32
- sky/server/__init__.py +1 -0
- sky/server/common.py +430 -0
- sky/server/constants.py +21 -0
- sky/server/html/log.html +174 -0
- sky/server/requests/__init__.py +0 -0
- sky/server/requests/executor.py +472 -0
- sky/server/requests/payloads.py +487 -0
- sky/server/requests/queues/__init__.py +0 -0
- sky/server/requests/queues/mp_queue.py +76 -0
- sky/server/requests/requests.py +567 -0
- sky/server/requests/serializers/__init__.py +0 -0
- sky/server/requests/serializers/decoders.py +192 -0
- sky/server/requests/serializers/encoders.py +166 -0
- sky/server/server.py +1106 -0
- sky/server/stream_utils.py +141 -0
- sky/setup_files/MANIFEST.in +2 -5
- sky/setup_files/dependencies.py +159 -0
- sky/setup_files/setup.py +14 -125
- sky/sky_logging.py +59 -14
- sky/skylet/autostop_lib.py +2 -2
- sky/skylet/constants.py +183 -50
- sky/skylet/events.py +22 -10
- sky/skylet/job_lib.py +403 -258
- sky/skylet/log_lib.py +111 -71
- sky/skylet/log_lib.pyi +6 -0
- sky/skylet/providers/command_runner.py +6 -8
- sky/skylet/providers/ibm/node_provider.py +2 -2
- sky/skylet/providers/scp/config.py +11 -3
- sky/skylet/providers/scp/node_provider.py +8 -8
- sky/skylet/skylet.py +3 -1
- sky/skylet/subprocess_daemon.py +69 -17
- sky/skypilot_config.py +119 -57
- sky/task.py +205 -64
- sky/templates/aws-ray.yml.j2 +37 -7
- sky/templates/azure-ray.yml.j2 +27 -82
- sky/templates/cudo-ray.yml.j2 +7 -3
- sky/templates/do-ray.yml.j2 +98 -0
- sky/templates/fluidstack-ray.yml.j2 +7 -4
- sky/templates/gcp-ray.yml.j2 +26 -6
- sky/templates/ibm-ray.yml.j2 +3 -2
- sky/templates/jobs-controller.yaml.j2 +46 -11
- sky/templates/kubernetes-ingress.yml.j2 +7 -0
- sky/templates/kubernetes-loadbalancer.yml.j2 +7 -0
- sky/templates/{kubernetes-port-forward-proxy-command.sh.j2 → kubernetes-port-forward-proxy-command.sh} +51 -7
- sky/templates/kubernetes-ray.yml.j2 +292 -25
- sky/templates/lambda-ray.yml.j2 +30 -40
- sky/templates/nebius-ray.yml.j2 +79 -0
- sky/templates/oci-ray.yml.j2 +18 -57
- sky/templates/paperspace-ray.yml.j2 +10 -6
- sky/templates/runpod-ray.yml.j2 +26 -4
- sky/templates/scp-ray.yml.j2 +3 -2
- sky/templates/sky-serve-controller.yaml.j2 +12 -1
- sky/templates/skypilot-server-kubernetes-proxy.sh +36 -0
- sky/templates/vast-ray.yml.j2 +70 -0
- sky/templates/vsphere-ray.yml.j2 +8 -3
- sky/templates/websocket_proxy.py +64 -0
- sky/usage/constants.py +10 -1
- sky/usage/usage_lib.py +130 -37
- sky/utils/accelerator_registry.py +35 -51
- sky/utils/admin_policy_utils.py +147 -0
- sky/utils/annotations.py +51 -0
- sky/utils/cli_utils/status_utils.py +81 -23
- sky/utils/cluster_utils.py +356 -0
- sky/utils/command_runner.py +452 -89
- sky/utils/command_runner.pyi +77 -3
- sky/utils/common.py +54 -0
- sky/utils/common_utils.py +319 -108
- sky/utils/config_utils.py +204 -0
- sky/utils/control_master_utils.py +48 -0
- sky/utils/controller_utils.py +548 -266
- sky/utils/dag_utils.py +93 -32
- sky/utils/db_utils.py +18 -4
- sky/utils/env_options.py +29 -7
- sky/utils/kubernetes/create_cluster.sh +8 -60
- sky/utils/kubernetes/deploy_remote_cluster.sh +243 -0
- sky/utils/kubernetes/exec_kubeconfig_converter.py +73 -0
- sky/utils/kubernetes/generate_kubeconfig.sh +336 -0
- sky/utils/kubernetes/gpu_labeler.py +4 -4
- sky/utils/kubernetes/k8s_gpu_labeler_job.yaml +4 -3
- sky/utils/kubernetes/kubernetes_deploy_utils.py +228 -0
- sky/utils/kubernetes/rsync_helper.sh +24 -0
- sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +1 -1
- sky/utils/log_utils.py +240 -33
- sky/utils/message_utils.py +81 -0
- sky/utils/registry.py +127 -0
- sky/utils/resources_utils.py +94 -22
- sky/utils/rich_utils.py +247 -18
- sky/utils/schemas.py +284 -64
- sky/{status_lib.py → utils/status_lib.py} +12 -7
- sky/utils/subprocess_utils.py +212 -46
- sky/utils/timeline.py +12 -7
- sky/utils/ux_utils.py +168 -15
- skypilot_nightly-1.0.0.dev2025022801.dist-info/METADATA +363 -0
- skypilot_nightly-1.0.0.dev2025022801.dist-info/RECORD +352 -0
- {skypilot_nightly-1.0.0.dev2024053101.dist-info → skypilot_nightly-1.0.0.dev2025022801.dist-info}/WHEEL +1 -1
- sky/clouds/cloud_registry.py +0 -31
- sky/jobs/core.py +0 -330
- sky/skylet/providers/azure/__init__.py +0 -2
- sky/skylet/providers/azure/azure-vm-template.json +0 -301
- sky/skylet/providers/azure/config.py +0 -170
- sky/skylet/providers/azure/node_provider.py +0 -466
- sky/skylet/providers/lambda_cloud/__init__.py +0 -2
- sky/skylet/providers/lambda_cloud/node_provider.py +0 -320
- sky/skylet/providers/oci/__init__.py +0 -2
- sky/skylet/providers/oci/node_provider.py +0 -488
- sky/skylet/providers/oci/query_helper.py +0 -383
- sky/skylet/providers/oci/utils.py +0 -21
- sky/utils/cluster_yaml_utils.py +0 -24
- sky/utils/kubernetes/generate_static_kubeconfig.sh +0 -137
- skypilot_nightly-1.0.0.dev2024053101.dist-info/METADATA +0 -315
- skypilot_nightly-1.0.0.dev2024053101.dist-info/RECORD +0 -275
- {skypilot_nightly-1.0.0.dev2024053101.dist-info → skypilot_nightly-1.0.0.dev2025022801.dist-info}/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev2024053101.dist-info → skypilot_nightly-1.0.0.dev2025022801.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev2024053101.dist-info → skypilot_nightly-1.0.0.dev2025022801.dist-info}/top_level.txt +0 -0
sky/data/storage_utils.py
CHANGED
@@ -1,15 +1,20 @@
|
|
1
1
|
"""Utility functions for the storage module."""
|
2
|
+
import glob
|
2
3
|
import os
|
4
|
+
import pathlib
|
3
5
|
import shlex
|
4
6
|
import subprocess
|
5
|
-
from typing import Any, Dict, List
|
7
|
+
from typing import Any, Dict, List, Optional, TextIO, Union
|
8
|
+
import warnings
|
9
|
+
import zipfile
|
6
10
|
|
7
11
|
import colorama
|
8
12
|
|
9
13
|
from sky import exceptions
|
10
14
|
from sky import sky_logging
|
15
|
+
from sky.skylet import constants
|
16
|
+
from sky.utils import common_utils
|
11
17
|
from sky.utils import log_utils
|
12
|
-
from sky.utils.cli_utils import status_utils
|
13
18
|
|
14
19
|
logger = sky_logging.init_logger(__name__)
|
15
20
|
|
@@ -19,6 +24,8 @@ _FILE_EXCLUSION_FROM_GITIGNORE_FAILURE_MSG = (
|
|
19
24
|
'to the cloud storage for {path!r}'
|
20
25
|
'due to the following error: {error_msg!r}')
|
21
26
|
|
27
|
+
_LAST_USE_TRUNC_LENGTH = 25
|
28
|
+
|
22
29
|
|
23
30
|
def format_storage_table(storages: List[Dict[str, Any]],
|
24
31
|
show_all: bool = False) -> str:
|
@@ -43,8 +50,8 @@ def format_storage_table(storages: List[Dict[str, Any]],
|
|
43
50
|
if show_all:
|
44
51
|
command = row['last_use']
|
45
52
|
else:
|
46
|
-
command =
|
47
|
-
|
53
|
+
command = common_utils.truncate_long_string(row['last_use'],
|
54
|
+
_LAST_USE_TRUNC_LENGTH)
|
48
55
|
storage_table.add_row([
|
49
56
|
# NAME
|
50
57
|
row['name'],
|
@@ -63,6 +70,42 @@ def format_storage_table(storages: List[Dict[str, Any]],
|
|
63
70
|
return 'No existing storage.'
|
64
71
|
|
65
72
|
|
73
|
+
def get_excluded_files_from_skyignore(src_dir_path: str) -> List[str]:
|
74
|
+
"""List files and patterns ignored by the .skyignore file
|
75
|
+
in the given source directory.
|
76
|
+
"""
|
77
|
+
excluded_list: List[str] = []
|
78
|
+
expand_src_dir_path = os.path.expanduser(src_dir_path)
|
79
|
+
skyignore_path = os.path.join(expand_src_dir_path,
|
80
|
+
constants.SKY_IGNORE_FILE)
|
81
|
+
|
82
|
+
try:
|
83
|
+
with open(skyignore_path, 'r', encoding='utf-8') as f:
|
84
|
+
for line in f:
|
85
|
+
line = line.strip()
|
86
|
+
if line and not line.startswith('#'):
|
87
|
+
# Make parsing consistent with rsync.
|
88
|
+
# Rsync uses '/' as current directory.
|
89
|
+
if line.startswith('/'):
|
90
|
+
line = '.' + line
|
91
|
+
else:
|
92
|
+
line = '**/' + line
|
93
|
+
# Find all files matching the pattern.
|
94
|
+
matching_files = glob.glob(os.path.join(
|
95
|
+
expand_src_dir_path, line),
|
96
|
+
recursive=True)
|
97
|
+
# Process filenames to comply with cloud rsync format.
|
98
|
+
for i in range(len(matching_files)):
|
99
|
+
matching_files[i] = os.path.relpath(
|
100
|
+
matching_files[i], expand_src_dir_path)
|
101
|
+
excluded_list.extend(matching_files)
|
102
|
+
except IOError as e:
|
103
|
+
logger.warning(f'Error reading {skyignore_path}: '
|
104
|
+
f'{common_utils.format_exception(e, use_bracket=True)}')
|
105
|
+
|
106
|
+
return excluded_list
|
107
|
+
|
108
|
+
|
66
109
|
def get_excluded_files_from_gitignore(src_dir_path: str) -> List[str]:
|
67
110
|
""" Lists files and patterns ignored by git in the source directory
|
68
111
|
|
@@ -78,7 +121,8 @@ def get_excluded_files_from_gitignore(src_dir_path: str) -> List[str]:
|
|
78
121
|
expand_src_dir_path = os.path.expanduser(src_dir_path)
|
79
122
|
|
80
123
|
git_exclude_path = os.path.join(expand_src_dir_path, '.git/info/exclude')
|
81
|
-
gitignore_path = os.path.join(expand_src_dir_path,
|
124
|
+
gitignore_path = os.path.join(expand_src_dir_path,
|
125
|
+
constants.GIT_IGNORE_FILE)
|
82
126
|
|
83
127
|
git_exclude_exists = os.path.isfile(git_exclude_path)
|
84
128
|
gitignore_exists = os.path.isfile(gitignore_path)
|
@@ -162,3 +206,80 @@ def get_excluded_files_from_gitignore(src_dir_path: str) -> List[str]:
|
|
162
206
|
to_be_excluded += '*'
|
163
207
|
excluded_list.append(to_be_excluded)
|
164
208
|
return excluded_list
|
209
|
+
|
210
|
+
|
211
|
+
def get_excluded_files(src_dir_path: str) -> List[str]:
|
212
|
+
# TODO: this could return a huge list of files,
|
213
|
+
# should think of ways to optimize.
|
214
|
+
""" List files and directories to be excluded."""
|
215
|
+
expand_src_dir_path = os.path.expanduser(src_dir_path)
|
216
|
+
skyignore_path = os.path.join(expand_src_dir_path,
|
217
|
+
constants.SKY_IGNORE_FILE)
|
218
|
+
if os.path.exists(skyignore_path):
|
219
|
+
logger.debug(f' {colorama.Style.DIM}'
|
220
|
+
f'Excluded files to sync to cluster based on '
|
221
|
+
f'{constants.SKY_IGNORE_FILE}.'
|
222
|
+
f'{colorama.Style.RESET_ALL}')
|
223
|
+
return get_excluded_files_from_skyignore(src_dir_path)
|
224
|
+
logger.debug(f' {colorama.Style.DIM}'
|
225
|
+
f'Excluded files to sync to cluster based on '
|
226
|
+
f'{constants.GIT_IGNORE_FILE}.'
|
227
|
+
f'{colorama.Style.RESET_ALL}')
|
228
|
+
return get_excluded_files_from_gitignore(src_dir_path)
|
229
|
+
|
230
|
+
|
231
|
+
def zip_files_and_folders(items: List[str],
|
232
|
+
output_file: Union[str, pathlib.Path],
|
233
|
+
log_file: Optional[TextIO] = None):
|
234
|
+
|
235
|
+
def _store_symlink(zipf, path: str, is_dir: bool):
|
236
|
+
# Get the target of the symlink
|
237
|
+
target = os.readlink(path)
|
238
|
+
# Use relative path as absolute path will not be able to resolve on
|
239
|
+
# remote API server.
|
240
|
+
if os.path.isabs(target):
|
241
|
+
target = os.path.relpath(target, os.path.dirname(path))
|
242
|
+
# Create a ZipInfo instance
|
243
|
+
zi = zipfile.ZipInfo(path + '/') if is_dir else zipfile.ZipInfo(path)
|
244
|
+
# Set external attributes to mark as symlink
|
245
|
+
zi.external_attr = 0xA1ED0000
|
246
|
+
# Write symlink target as content
|
247
|
+
zipf.writestr(zi, target)
|
248
|
+
|
249
|
+
with warnings.catch_warnings():
|
250
|
+
warnings.filterwarnings('ignore',
|
251
|
+
category=UserWarning,
|
252
|
+
message='Duplicate name:')
|
253
|
+
with zipfile.ZipFile(output_file, 'w') as zipf:
|
254
|
+
for item in items:
|
255
|
+
item = os.path.expanduser(item)
|
256
|
+
if not os.path.isfile(item) and not os.path.isdir(item):
|
257
|
+
raise ValueError(f'{item} does not exist.')
|
258
|
+
excluded_files = set(
|
259
|
+
[os.path.join(item, f) for f in get_excluded_files(item)])
|
260
|
+
if os.path.isfile(item) and item not in excluded_files:
|
261
|
+
zipf.write(item)
|
262
|
+
elif os.path.isdir(item):
|
263
|
+
for root, dirs, files in os.walk(item, followlinks=False):
|
264
|
+
# Store directory entries (important for empty
|
265
|
+
# directories)
|
266
|
+
for dir_name in dirs:
|
267
|
+
dir_path = os.path.join(root, dir_name)
|
268
|
+
if dir_path in excluded_files:
|
269
|
+
continue
|
270
|
+
# If it's a symlink, store it as a symlink
|
271
|
+
if os.path.islink(dir_path):
|
272
|
+
_store_symlink(zipf, dir_path, is_dir=True)
|
273
|
+
else:
|
274
|
+
zipf.write(dir_path)
|
275
|
+
|
276
|
+
for file in files:
|
277
|
+
file_path = os.path.join(root, file)
|
278
|
+
if file_path in excluded_files:
|
279
|
+
continue
|
280
|
+
if os.path.islink(file_path):
|
281
|
+
_store_symlink(zipf, file_path, is_dir=False)
|
282
|
+
else:
|
283
|
+
zipf.write(file_path)
|
284
|
+
if log_file is not None:
|
285
|
+
log_file.write(f'Zipped {item}\n')
|
sky/exceptions.py
CHANGED
@@ -1,11 +1,16 @@
|
|
1
1
|
"""Exceptions."""
|
2
|
+
import builtins
|
2
3
|
import enum
|
4
|
+
import traceback
|
5
|
+
import types
|
3
6
|
import typing
|
4
|
-
from typing import List, Optional
|
7
|
+
from typing import Any, Dict, List, Optional, Sequence
|
8
|
+
|
9
|
+
from sky.utils import env_options
|
5
10
|
|
6
11
|
if typing.TYPE_CHECKING:
|
7
|
-
from sky import status_lib
|
8
12
|
from sky.backends import backend
|
13
|
+
from sky.utils import status_lib
|
9
14
|
|
10
15
|
# Return code for keyboard interruption and SIGTSTP
|
11
16
|
KEYBOARD_INTERRUPT_CODE = 130
|
@@ -19,6 +24,107 @@ INSUFFICIENT_PRIVILEGES_CODE = 52
|
|
19
24
|
GIT_FATAL_EXIT_CODE = 128
|
20
25
|
|
21
26
|
|
27
|
+
def is_safe_exception(exc: Exception) -> bool:
|
28
|
+
"""Returns True if the exception is safe to send to clients.
|
29
|
+
|
30
|
+
Safe exceptions are:
|
31
|
+
1. Built-in exceptions
|
32
|
+
2. SkyPilot's own exceptions
|
33
|
+
"""
|
34
|
+
module = type(exc).__module__
|
35
|
+
|
36
|
+
# Builtin exceptions (e.g., ValueError, RuntimeError)
|
37
|
+
if module == 'builtins':
|
38
|
+
return True
|
39
|
+
|
40
|
+
# SkyPilot exceptions
|
41
|
+
if module.startswith('sky.'):
|
42
|
+
return True
|
43
|
+
|
44
|
+
return False
|
45
|
+
|
46
|
+
|
47
|
+
def wrap_exception(exc: Exception) -> Exception:
|
48
|
+
"""Wraps non-safe exceptions into SkyPilot exceptions
|
49
|
+
|
50
|
+
This is used to wrap exceptions that are not safe to deserialize at clients.
|
51
|
+
|
52
|
+
Examples include exceptions from cloud providers whose packages are not
|
53
|
+
available at clients.
|
54
|
+
"""
|
55
|
+
if is_safe_exception(exc):
|
56
|
+
return exc
|
57
|
+
|
58
|
+
return CloudError(message=str(exc),
|
59
|
+
cloud_provider=type(exc).__module__.split('.')[0],
|
60
|
+
error_type=type(exc).__name__)
|
61
|
+
|
62
|
+
|
63
|
+
def serialize_exception(e: Exception) -> Dict[str, Any]:
|
64
|
+
"""Serialize the exception.
|
65
|
+
|
66
|
+
This function also wraps any unsafe exceptions (e.g., cloud exceptions)
|
67
|
+
into SkyPilot's CloudError before serialization to ensure clients can
|
68
|
+
deserialize them without needing cloud provider packages installed.
|
69
|
+
"""
|
70
|
+
# Wrap unsafe exceptions before serialization
|
71
|
+
e = wrap_exception(e)
|
72
|
+
|
73
|
+
stacktrace = getattr(e, 'stacktrace', None)
|
74
|
+
attributes = e.__dict__.copy()
|
75
|
+
if 'stacktrace' in attributes:
|
76
|
+
del attributes['stacktrace']
|
77
|
+
for attr_k in list(attributes.keys()):
|
78
|
+
attr_v = attributes[attr_k]
|
79
|
+
if isinstance(attr_v, types.TracebackType):
|
80
|
+
attributes[attr_k] = traceback.format_tb(attr_v)
|
81
|
+
|
82
|
+
data = {
|
83
|
+
'type': e.__class__.__name__,
|
84
|
+
'message': str(e),
|
85
|
+
'args': e.args,
|
86
|
+
'attributes': attributes,
|
87
|
+
'stacktrace': stacktrace,
|
88
|
+
}
|
89
|
+
if isinstance(e, SkyPilotExcludeArgsBaseException):
|
90
|
+
data['args'] = tuple()
|
91
|
+
return data
|
92
|
+
|
93
|
+
|
94
|
+
def deserialize_exception(serialized: Dict[str, Any]) -> Exception:
|
95
|
+
"""Deserialize the exception."""
|
96
|
+
exception_type = serialized['type']
|
97
|
+
if hasattr(builtins, exception_type):
|
98
|
+
exception_class = getattr(builtins, exception_type)
|
99
|
+
else:
|
100
|
+
exception_class = globals().get(exception_type, None)
|
101
|
+
if exception_class is None:
|
102
|
+
# Unknown exception type.
|
103
|
+
return Exception(f'{exception_type}: {serialized["message"]}')
|
104
|
+
e = exception_class(*serialized['args'], **serialized['attributes'])
|
105
|
+
if serialized['stacktrace'] is not None:
|
106
|
+
setattr(e, 'stacktrace', serialized['stacktrace'])
|
107
|
+
return e
|
108
|
+
|
109
|
+
|
110
|
+
class CloudError(Exception):
|
111
|
+
"""Wraps cloud-specific errors into a SkyPilot exception."""
|
112
|
+
|
113
|
+
def __init__(self, message: str, cloud_provider: str, error_type: str):
|
114
|
+
super().__init__(message)
|
115
|
+
self.cloud_provider = cloud_provider
|
116
|
+
self.error_type = error_type
|
117
|
+
|
118
|
+
def __str__(self):
|
119
|
+
return (f'{self.cloud_provider} error ({self.error_type}): '
|
120
|
+
f'{super().__str__()}')
|
121
|
+
|
122
|
+
|
123
|
+
class InvalidSkyPilotConfigError(ValueError):
|
124
|
+
"""Raised when the SkyPilot config is invalid."""
|
125
|
+
pass
|
126
|
+
|
127
|
+
|
22
128
|
class ResourcesUnavailableError(Exception):
|
23
129
|
"""Raised when resources are unavailable.
|
24
130
|
|
@@ -61,12 +167,12 @@ class ProvisionPrechecksError(Exception):
|
|
61
167
|
the error will be raised.
|
62
168
|
|
63
169
|
Args:
|
64
|
-
reasons: (
|
170
|
+
reasons: (Sequence[Exception]) The reasons why the prechecks failed.
|
65
171
|
"""
|
66
172
|
|
67
|
-
def __init__(self, reasons:
|
173
|
+
def __init__(self, reasons: Sequence[Exception]) -> None:
|
68
174
|
super().__init__()
|
69
|
-
self.reasons =
|
175
|
+
self.reasons = reasons
|
70
176
|
|
71
177
|
|
72
178
|
class ManagedJobReachedMaxRetriesError(Exception):
|
@@ -79,12 +185,34 @@ class ManagedJobReachedMaxRetriesError(Exception):
|
|
79
185
|
pass
|
80
186
|
|
81
187
|
|
188
|
+
class ManagedJobStatusError(Exception):
|
189
|
+
"""Raised when a managed job task status update is invalid.
|
190
|
+
|
191
|
+
For instance, a RUNNING job cannot become SUBMITTED.
|
192
|
+
"""
|
193
|
+
pass
|
194
|
+
|
195
|
+
|
82
196
|
class ResourcesMismatchError(Exception):
|
83
197
|
"""Raised when resources are mismatched."""
|
84
198
|
pass
|
85
199
|
|
86
200
|
|
87
|
-
class
|
201
|
+
class SkyPilotExcludeArgsBaseException(Exception):
|
202
|
+
"""Base class for exceptions that don't need args while serialization.
|
203
|
+
|
204
|
+
Due to our serialization/deserialization logic, when an exception does
|
205
|
+
not take `args` as an argument in __init__, `args` should not be included
|
206
|
+
in the serialized exception.
|
207
|
+
|
208
|
+
This is useful when an exception needs to construct the error message based
|
209
|
+
on the arguments passed in instead of directly having the error message as
|
210
|
+
the first argument in __init__. Refer to `CommandError` for an example.
|
211
|
+
"""
|
212
|
+
pass
|
213
|
+
|
214
|
+
|
215
|
+
class CommandError(SkyPilotExcludeArgsBaseException):
|
88
216
|
"""Raised when a command fails.
|
89
217
|
|
90
218
|
Args:
|
@@ -100,9 +228,14 @@ class CommandError(Exception):
|
|
100
228
|
self.command = command
|
101
229
|
self.error_msg = error_msg
|
102
230
|
self.detailed_reason = detailed_reason
|
231
|
+
|
103
232
|
if not command:
|
104
233
|
message = error_msg
|
105
234
|
else:
|
235
|
+
if (len(command) > 100 and
|
236
|
+
not env_options.Options.SHOW_DEBUG_INFO.get()):
|
237
|
+
# Chunck the command to avoid overflow.
|
238
|
+
command = command[:100] + '...'
|
106
239
|
message = (f'Command {command} failed with return code '
|
107
240
|
f'{returncode}.\n{error_msg}')
|
108
241
|
super().__init__(message)
|
@@ -113,7 +246,7 @@ class ClusterNotUpError(Exception):
|
|
113
246
|
|
114
247
|
def __init__(self,
|
115
248
|
message: str,
|
116
|
-
cluster_status: Optional['status_lib.ClusterStatus'],
|
249
|
+
cluster_status: Optional['status_lib.ClusterStatus'] = None,
|
117
250
|
handle: Optional['backend.ResourceHandle'] = None) -> None:
|
118
251
|
super().__init__(message)
|
119
252
|
self.cluster_status = cluster_status
|
@@ -125,6 +258,13 @@ class ClusterSetUpError(Exception):
|
|
125
258
|
pass
|
126
259
|
|
127
260
|
|
261
|
+
class ClusterDoesNotExist(ValueError):
|
262
|
+
"""Raise when trying to operate on a cluster that does not exist."""
|
263
|
+
# This extends ValueError for compatibility reasons - we used to throw
|
264
|
+
# ValueError instead of this.
|
265
|
+
pass
|
266
|
+
|
267
|
+
|
128
268
|
class NotSupportedError(Exception):
|
129
269
|
"""Raised when a feature is not supported."""
|
130
270
|
pass
|
@@ -190,6 +330,12 @@ class StorageExternalDeletionError(StorageBucketGetError):
|
|
190
330
|
pass
|
191
331
|
|
192
332
|
|
333
|
+
class NonExistentStorageAccountError(StorageExternalDeletionError):
|
334
|
+
# Error raise when storage account provided through config.yaml or read
|
335
|
+
# from store handle(local db) does not exist.
|
336
|
+
pass
|
337
|
+
|
338
|
+
|
193
339
|
class FetchClusterInfoError(Exception):
|
194
340
|
"""Raised when fetching the cluster info fails."""
|
195
341
|
|
@@ -237,7 +383,7 @@ class NoCloudAccessError(Exception):
|
|
237
383
|
pass
|
238
384
|
|
239
385
|
|
240
|
-
class AWSAzFetchingError(
|
386
|
+
class AWSAzFetchingError(SkyPilotExcludeArgsBaseException):
|
241
387
|
"""Raised when fetching the AWS availability zone fails."""
|
242
388
|
|
243
389
|
class Reason(enum.Enum):
|
@@ -276,3 +422,28 @@ class ServeUserTerminatedError(Exception):
|
|
276
422
|
|
277
423
|
class PortDoesNotExistError(Exception):
|
278
424
|
"""Raised when the port does not exist."""
|
425
|
+
|
426
|
+
|
427
|
+
class UserRequestRejectedByPolicy(Exception):
|
428
|
+
"""Raised when a user request is rejected by an admin policy."""
|
429
|
+
pass
|
430
|
+
|
431
|
+
|
432
|
+
class NoClusterLaunchedError(Exception):
|
433
|
+
"""No cluster launched, so cleanup can be skipped during failover."""
|
434
|
+
pass
|
435
|
+
|
436
|
+
|
437
|
+
class RequestCancelled(Exception):
|
438
|
+
"""Raised when a request is cancelled."""
|
439
|
+
pass
|
440
|
+
|
441
|
+
|
442
|
+
class ApiServerConnectionError(RuntimeError):
|
443
|
+
"""Raised when the API server cannot be connected."""
|
444
|
+
|
445
|
+
def __init__(self, server_url: str):
|
446
|
+
super().__init__(
|
447
|
+
f'Could not connect to SkyPilot API server at {server_url}. '
|
448
|
+
f'Please ensure that the server is running. '
|
449
|
+
f'Try: curl {server_url}/api/health')
|