skypilot-nightly 1.0.0.dev2024053101__py3-none-any.whl → 1.0.0.dev2025022801__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (299) hide show
  1. sky/__init__.py +64 -32
  2. sky/adaptors/aws.py +23 -6
  3. sky/adaptors/azure.py +432 -15
  4. sky/adaptors/cloudflare.py +5 -5
  5. sky/adaptors/common.py +19 -9
  6. sky/adaptors/do.py +20 -0
  7. sky/adaptors/gcp.py +3 -2
  8. sky/adaptors/kubernetes.py +122 -88
  9. sky/adaptors/nebius.py +100 -0
  10. sky/adaptors/oci.py +39 -1
  11. sky/adaptors/vast.py +29 -0
  12. sky/admin_policy.py +101 -0
  13. sky/authentication.py +117 -98
  14. sky/backends/backend.py +52 -20
  15. sky/backends/backend_utils.py +669 -557
  16. sky/backends/cloud_vm_ray_backend.py +1099 -808
  17. sky/backends/local_docker_backend.py +14 -8
  18. sky/backends/wheel_utils.py +38 -20
  19. sky/benchmark/benchmark_utils.py +22 -23
  20. sky/check.py +76 -27
  21. sky/cli.py +1586 -1139
  22. sky/client/__init__.py +1 -0
  23. sky/client/cli.py +5683 -0
  24. sky/client/common.py +345 -0
  25. sky/client/sdk.py +1765 -0
  26. sky/cloud_stores.py +283 -19
  27. sky/clouds/__init__.py +7 -2
  28. sky/clouds/aws.py +303 -112
  29. sky/clouds/azure.py +185 -179
  30. sky/clouds/cloud.py +115 -37
  31. sky/clouds/cudo.py +29 -22
  32. sky/clouds/do.py +313 -0
  33. sky/clouds/fluidstack.py +44 -54
  34. sky/clouds/gcp.py +206 -65
  35. sky/clouds/ibm.py +26 -21
  36. sky/clouds/kubernetes.py +345 -91
  37. sky/clouds/lambda_cloud.py +40 -29
  38. sky/clouds/nebius.py +297 -0
  39. sky/clouds/oci.py +129 -90
  40. sky/clouds/paperspace.py +22 -18
  41. sky/clouds/runpod.py +53 -34
  42. sky/clouds/scp.py +28 -24
  43. sky/clouds/service_catalog/__init__.py +19 -13
  44. sky/clouds/service_catalog/aws_catalog.py +29 -12
  45. sky/clouds/service_catalog/azure_catalog.py +33 -6
  46. sky/clouds/service_catalog/common.py +95 -75
  47. sky/clouds/service_catalog/constants.py +3 -3
  48. sky/clouds/service_catalog/cudo_catalog.py +13 -3
  49. sky/clouds/service_catalog/data_fetchers/fetch_aws.py +36 -21
  50. sky/clouds/service_catalog/data_fetchers/fetch_azure.py +31 -4
  51. sky/clouds/service_catalog/data_fetchers/fetch_cudo.py +8 -117
  52. sky/clouds/service_catalog/data_fetchers/fetch_fluidstack.py +197 -44
  53. sky/clouds/service_catalog/data_fetchers/fetch_gcp.py +224 -36
  54. sky/clouds/service_catalog/data_fetchers/fetch_lambda_cloud.py +44 -24
  55. sky/clouds/service_catalog/data_fetchers/fetch_vast.py +147 -0
  56. sky/clouds/service_catalog/data_fetchers/fetch_vsphere.py +1 -1
  57. sky/clouds/service_catalog/do_catalog.py +111 -0
  58. sky/clouds/service_catalog/fluidstack_catalog.py +2 -2
  59. sky/clouds/service_catalog/gcp_catalog.py +16 -2
  60. sky/clouds/service_catalog/ibm_catalog.py +2 -2
  61. sky/clouds/service_catalog/kubernetes_catalog.py +192 -70
  62. sky/clouds/service_catalog/lambda_catalog.py +8 -3
  63. sky/clouds/service_catalog/nebius_catalog.py +116 -0
  64. sky/clouds/service_catalog/oci_catalog.py +31 -4
  65. sky/clouds/service_catalog/paperspace_catalog.py +2 -2
  66. sky/clouds/service_catalog/runpod_catalog.py +2 -2
  67. sky/clouds/service_catalog/scp_catalog.py +2 -2
  68. sky/clouds/service_catalog/vast_catalog.py +104 -0
  69. sky/clouds/service_catalog/vsphere_catalog.py +2 -2
  70. sky/clouds/utils/aws_utils.py +65 -0
  71. sky/clouds/utils/azure_utils.py +91 -0
  72. sky/clouds/utils/gcp_utils.py +5 -9
  73. sky/clouds/utils/oci_utils.py +47 -5
  74. sky/clouds/utils/scp_utils.py +4 -3
  75. sky/clouds/vast.py +280 -0
  76. sky/clouds/vsphere.py +22 -18
  77. sky/core.py +361 -107
  78. sky/dag.py +41 -28
  79. sky/data/data_transfer.py +37 -0
  80. sky/data/data_utils.py +211 -32
  81. sky/data/mounting_utils.py +182 -30
  82. sky/data/storage.py +2118 -270
  83. sky/data/storage_utils.py +126 -5
  84. sky/exceptions.py +179 -8
  85. sky/execution.py +158 -85
  86. sky/global_user_state.py +150 -34
  87. sky/jobs/__init__.py +12 -10
  88. sky/jobs/client/__init__.py +0 -0
  89. sky/jobs/client/sdk.py +302 -0
  90. sky/jobs/constants.py +49 -11
  91. sky/jobs/controller.py +161 -99
  92. sky/jobs/dashboard/dashboard.py +171 -25
  93. sky/jobs/dashboard/templates/index.html +572 -60
  94. sky/jobs/recovery_strategy.py +157 -156
  95. sky/jobs/scheduler.py +307 -0
  96. sky/jobs/server/__init__.py +1 -0
  97. sky/jobs/server/core.py +598 -0
  98. sky/jobs/server/dashboard_utils.py +69 -0
  99. sky/jobs/server/server.py +190 -0
  100. sky/jobs/state.py +627 -122
  101. sky/jobs/utils.py +615 -206
  102. sky/models.py +27 -0
  103. sky/optimizer.py +142 -83
  104. sky/provision/__init__.py +20 -5
  105. sky/provision/aws/config.py +124 -42
  106. sky/provision/aws/instance.py +130 -53
  107. sky/provision/azure/__init__.py +7 -0
  108. sky/{skylet/providers → provision}/azure/azure-config-template.json +19 -7
  109. sky/provision/azure/config.py +220 -0
  110. sky/provision/azure/instance.py +1012 -37
  111. sky/provision/common.py +31 -3
  112. sky/provision/constants.py +25 -0
  113. sky/provision/cudo/__init__.py +2 -1
  114. sky/provision/cudo/cudo_utils.py +112 -0
  115. sky/provision/cudo/cudo_wrapper.py +37 -16
  116. sky/provision/cudo/instance.py +28 -12
  117. sky/provision/do/__init__.py +11 -0
  118. sky/provision/do/config.py +14 -0
  119. sky/provision/do/constants.py +10 -0
  120. sky/provision/do/instance.py +287 -0
  121. sky/provision/do/utils.py +301 -0
  122. sky/provision/docker_utils.py +82 -46
  123. sky/provision/fluidstack/fluidstack_utils.py +57 -125
  124. sky/provision/fluidstack/instance.py +15 -43
  125. sky/provision/gcp/config.py +19 -9
  126. sky/provision/gcp/constants.py +7 -1
  127. sky/provision/gcp/instance.py +55 -34
  128. sky/provision/gcp/instance_utils.py +339 -80
  129. sky/provision/gcp/mig_utils.py +210 -0
  130. sky/provision/instance_setup.py +172 -133
  131. sky/provision/kubernetes/__init__.py +1 -0
  132. sky/provision/kubernetes/config.py +104 -90
  133. sky/provision/kubernetes/constants.py +8 -0
  134. sky/provision/kubernetes/instance.py +680 -325
  135. sky/provision/kubernetes/manifests/smarter-device-manager-daemonset.yaml +3 -0
  136. sky/provision/kubernetes/network.py +54 -20
  137. sky/provision/kubernetes/network_utils.py +70 -21
  138. sky/provision/kubernetes/utils.py +1370 -251
  139. sky/provision/lambda_cloud/__init__.py +11 -0
  140. sky/provision/lambda_cloud/config.py +10 -0
  141. sky/provision/lambda_cloud/instance.py +265 -0
  142. sky/{clouds/utils → provision/lambda_cloud}/lambda_utils.py +24 -23
  143. sky/provision/logging.py +1 -1
  144. sky/provision/nebius/__init__.py +11 -0
  145. sky/provision/nebius/config.py +11 -0
  146. sky/provision/nebius/instance.py +285 -0
  147. sky/provision/nebius/utils.py +318 -0
  148. sky/provision/oci/__init__.py +15 -0
  149. sky/provision/oci/config.py +51 -0
  150. sky/provision/oci/instance.py +436 -0
  151. sky/provision/oci/query_utils.py +681 -0
  152. sky/provision/paperspace/constants.py +6 -0
  153. sky/provision/paperspace/instance.py +4 -3
  154. sky/provision/paperspace/utils.py +2 -0
  155. sky/provision/provisioner.py +207 -130
  156. sky/provision/runpod/__init__.py +1 -0
  157. sky/provision/runpod/api/__init__.py +3 -0
  158. sky/provision/runpod/api/commands.py +119 -0
  159. sky/provision/runpod/api/pods.py +142 -0
  160. sky/provision/runpod/instance.py +64 -8
  161. sky/provision/runpod/utils.py +239 -23
  162. sky/provision/vast/__init__.py +10 -0
  163. sky/provision/vast/config.py +11 -0
  164. sky/provision/vast/instance.py +247 -0
  165. sky/provision/vast/utils.py +162 -0
  166. sky/provision/vsphere/common/vim_utils.py +1 -1
  167. sky/provision/vsphere/instance.py +8 -18
  168. sky/provision/vsphere/vsphere_utils.py +1 -1
  169. sky/resources.py +247 -102
  170. sky/serve/__init__.py +9 -9
  171. sky/serve/autoscalers.py +361 -299
  172. sky/serve/client/__init__.py +0 -0
  173. sky/serve/client/sdk.py +366 -0
  174. sky/serve/constants.py +12 -3
  175. sky/serve/controller.py +106 -36
  176. sky/serve/load_balancer.py +63 -12
  177. sky/serve/load_balancing_policies.py +84 -2
  178. sky/serve/replica_managers.py +42 -34
  179. sky/serve/serve_state.py +62 -32
  180. sky/serve/serve_utils.py +271 -160
  181. sky/serve/server/__init__.py +0 -0
  182. sky/serve/{core.py → server/core.py} +271 -90
  183. sky/serve/server/server.py +112 -0
  184. sky/serve/service.py +52 -16
  185. sky/serve/service_spec.py +95 -32
  186. sky/server/__init__.py +1 -0
  187. sky/server/common.py +430 -0
  188. sky/server/constants.py +21 -0
  189. sky/server/html/log.html +174 -0
  190. sky/server/requests/__init__.py +0 -0
  191. sky/server/requests/executor.py +472 -0
  192. sky/server/requests/payloads.py +487 -0
  193. sky/server/requests/queues/__init__.py +0 -0
  194. sky/server/requests/queues/mp_queue.py +76 -0
  195. sky/server/requests/requests.py +567 -0
  196. sky/server/requests/serializers/__init__.py +0 -0
  197. sky/server/requests/serializers/decoders.py +192 -0
  198. sky/server/requests/serializers/encoders.py +166 -0
  199. sky/server/server.py +1106 -0
  200. sky/server/stream_utils.py +141 -0
  201. sky/setup_files/MANIFEST.in +2 -5
  202. sky/setup_files/dependencies.py +159 -0
  203. sky/setup_files/setup.py +14 -125
  204. sky/sky_logging.py +59 -14
  205. sky/skylet/autostop_lib.py +2 -2
  206. sky/skylet/constants.py +183 -50
  207. sky/skylet/events.py +22 -10
  208. sky/skylet/job_lib.py +403 -258
  209. sky/skylet/log_lib.py +111 -71
  210. sky/skylet/log_lib.pyi +6 -0
  211. sky/skylet/providers/command_runner.py +6 -8
  212. sky/skylet/providers/ibm/node_provider.py +2 -2
  213. sky/skylet/providers/scp/config.py +11 -3
  214. sky/skylet/providers/scp/node_provider.py +8 -8
  215. sky/skylet/skylet.py +3 -1
  216. sky/skylet/subprocess_daemon.py +69 -17
  217. sky/skypilot_config.py +119 -57
  218. sky/task.py +205 -64
  219. sky/templates/aws-ray.yml.j2 +37 -7
  220. sky/templates/azure-ray.yml.j2 +27 -82
  221. sky/templates/cudo-ray.yml.j2 +7 -3
  222. sky/templates/do-ray.yml.j2 +98 -0
  223. sky/templates/fluidstack-ray.yml.j2 +7 -4
  224. sky/templates/gcp-ray.yml.j2 +26 -6
  225. sky/templates/ibm-ray.yml.j2 +3 -2
  226. sky/templates/jobs-controller.yaml.j2 +46 -11
  227. sky/templates/kubernetes-ingress.yml.j2 +7 -0
  228. sky/templates/kubernetes-loadbalancer.yml.j2 +7 -0
  229. sky/templates/{kubernetes-port-forward-proxy-command.sh.j2 → kubernetes-port-forward-proxy-command.sh} +51 -7
  230. sky/templates/kubernetes-ray.yml.j2 +292 -25
  231. sky/templates/lambda-ray.yml.j2 +30 -40
  232. sky/templates/nebius-ray.yml.j2 +79 -0
  233. sky/templates/oci-ray.yml.j2 +18 -57
  234. sky/templates/paperspace-ray.yml.j2 +10 -6
  235. sky/templates/runpod-ray.yml.j2 +26 -4
  236. sky/templates/scp-ray.yml.j2 +3 -2
  237. sky/templates/sky-serve-controller.yaml.j2 +12 -1
  238. sky/templates/skypilot-server-kubernetes-proxy.sh +36 -0
  239. sky/templates/vast-ray.yml.j2 +70 -0
  240. sky/templates/vsphere-ray.yml.j2 +8 -3
  241. sky/templates/websocket_proxy.py +64 -0
  242. sky/usage/constants.py +10 -1
  243. sky/usage/usage_lib.py +130 -37
  244. sky/utils/accelerator_registry.py +35 -51
  245. sky/utils/admin_policy_utils.py +147 -0
  246. sky/utils/annotations.py +51 -0
  247. sky/utils/cli_utils/status_utils.py +81 -23
  248. sky/utils/cluster_utils.py +356 -0
  249. sky/utils/command_runner.py +452 -89
  250. sky/utils/command_runner.pyi +77 -3
  251. sky/utils/common.py +54 -0
  252. sky/utils/common_utils.py +319 -108
  253. sky/utils/config_utils.py +204 -0
  254. sky/utils/control_master_utils.py +48 -0
  255. sky/utils/controller_utils.py +548 -266
  256. sky/utils/dag_utils.py +93 -32
  257. sky/utils/db_utils.py +18 -4
  258. sky/utils/env_options.py +29 -7
  259. sky/utils/kubernetes/create_cluster.sh +8 -60
  260. sky/utils/kubernetes/deploy_remote_cluster.sh +243 -0
  261. sky/utils/kubernetes/exec_kubeconfig_converter.py +73 -0
  262. sky/utils/kubernetes/generate_kubeconfig.sh +336 -0
  263. sky/utils/kubernetes/gpu_labeler.py +4 -4
  264. sky/utils/kubernetes/k8s_gpu_labeler_job.yaml +4 -3
  265. sky/utils/kubernetes/kubernetes_deploy_utils.py +228 -0
  266. sky/utils/kubernetes/rsync_helper.sh +24 -0
  267. sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +1 -1
  268. sky/utils/log_utils.py +240 -33
  269. sky/utils/message_utils.py +81 -0
  270. sky/utils/registry.py +127 -0
  271. sky/utils/resources_utils.py +94 -22
  272. sky/utils/rich_utils.py +247 -18
  273. sky/utils/schemas.py +284 -64
  274. sky/{status_lib.py → utils/status_lib.py} +12 -7
  275. sky/utils/subprocess_utils.py +212 -46
  276. sky/utils/timeline.py +12 -7
  277. sky/utils/ux_utils.py +168 -15
  278. skypilot_nightly-1.0.0.dev2025022801.dist-info/METADATA +363 -0
  279. skypilot_nightly-1.0.0.dev2025022801.dist-info/RECORD +352 -0
  280. {skypilot_nightly-1.0.0.dev2024053101.dist-info → skypilot_nightly-1.0.0.dev2025022801.dist-info}/WHEEL +1 -1
  281. sky/clouds/cloud_registry.py +0 -31
  282. sky/jobs/core.py +0 -330
  283. sky/skylet/providers/azure/__init__.py +0 -2
  284. sky/skylet/providers/azure/azure-vm-template.json +0 -301
  285. sky/skylet/providers/azure/config.py +0 -170
  286. sky/skylet/providers/azure/node_provider.py +0 -466
  287. sky/skylet/providers/lambda_cloud/__init__.py +0 -2
  288. sky/skylet/providers/lambda_cloud/node_provider.py +0 -320
  289. sky/skylet/providers/oci/__init__.py +0 -2
  290. sky/skylet/providers/oci/node_provider.py +0 -488
  291. sky/skylet/providers/oci/query_helper.py +0 -383
  292. sky/skylet/providers/oci/utils.py +0 -21
  293. sky/utils/cluster_yaml_utils.py +0 -24
  294. sky/utils/kubernetes/generate_static_kubeconfig.sh +0 -137
  295. skypilot_nightly-1.0.0.dev2024053101.dist-info/METADATA +0 -315
  296. skypilot_nightly-1.0.0.dev2024053101.dist-info/RECORD +0 -275
  297. {skypilot_nightly-1.0.0.dev2024053101.dist-info → skypilot_nightly-1.0.0.dev2025022801.dist-info}/LICENSE +0 -0
  298. {skypilot_nightly-1.0.0.dev2024053101.dist-info → skypilot_nightly-1.0.0.dev2025022801.dist-info}/entry_points.txt +0 -0
  299. {skypilot_nightly-1.0.0.dev2024053101.dist-info → skypilot_nightly-1.0.0.dev2025022801.dist-info}/top_level.txt +0 -0
sky/data/storage_utils.py CHANGED
@@ -1,15 +1,20 @@
1
1
  """Utility functions for the storage module."""
2
+ import glob
2
3
  import os
4
+ import pathlib
3
5
  import shlex
4
6
  import subprocess
5
- from typing import Any, Dict, List
7
+ from typing import Any, Dict, List, Optional, TextIO, Union
8
+ import warnings
9
+ import zipfile
6
10
 
7
11
  import colorama
8
12
 
9
13
  from sky import exceptions
10
14
  from sky import sky_logging
15
+ from sky.skylet import constants
16
+ from sky.utils import common_utils
11
17
  from sky.utils import log_utils
12
- from sky.utils.cli_utils import status_utils
13
18
 
14
19
  logger = sky_logging.init_logger(__name__)
15
20
 
@@ -19,6 +24,8 @@ _FILE_EXCLUSION_FROM_GITIGNORE_FAILURE_MSG = (
19
24
  'to the cloud storage for {path!r}'
20
25
  'due to the following error: {error_msg!r}')
21
26
 
27
+ _LAST_USE_TRUNC_LENGTH = 25
28
+
22
29
 
23
30
  def format_storage_table(storages: List[Dict[str, Any]],
24
31
  show_all: bool = False) -> str:
@@ -43,8 +50,8 @@ def format_storage_table(storages: List[Dict[str, Any]],
43
50
  if show_all:
44
51
  command = row['last_use']
45
52
  else:
46
- command = status_utils.truncate_long_string(
47
- row['last_use'], status_utils.COMMAND_TRUNC_LENGTH)
53
+ command = common_utils.truncate_long_string(row['last_use'],
54
+ _LAST_USE_TRUNC_LENGTH)
48
55
  storage_table.add_row([
49
56
  # NAME
50
57
  row['name'],
@@ -63,6 +70,42 @@ def format_storage_table(storages: List[Dict[str, Any]],
63
70
  return 'No existing storage.'
64
71
 
65
72
 
73
+ def get_excluded_files_from_skyignore(src_dir_path: str) -> List[str]:
74
+ """List files and patterns ignored by the .skyignore file
75
+ in the given source directory.
76
+ """
77
+ excluded_list: List[str] = []
78
+ expand_src_dir_path = os.path.expanduser(src_dir_path)
79
+ skyignore_path = os.path.join(expand_src_dir_path,
80
+ constants.SKY_IGNORE_FILE)
81
+
82
+ try:
83
+ with open(skyignore_path, 'r', encoding='utf-8') as f:
84
+ for line in f:
85
+ line = line.strip()
86
+ if line and not line.startswith('#'):
87
+ # Make parsing consistent with rsync.
88
+ # Rsync uses '/' as current directory.
89
+ if line.startswith('/'):
90
+ line = '.' + line
91
+ else:
92
+ line = '**/' + line
93
+ # Find all files matching the pattern.
94
+ matching_files = glob.glob(os.path.join(
95
+ expand_src_dir_path, line),
96
+ recursive=True)
97
+ # Process filenames to comply with cloud rsync format.
98
+ for i in range(len(matching_files)):
99
+ matching_files[i] = os.path.relpath(
100
+ matching_files[i], expand_src_dir_path)
101
+ excluded_list.extend(matching_files)
102
+ except IOError as e:
103
+ logger.warning(f'Error reading {skyignore_path}: '
104
+ f'{common_utils.format_exception(e, use_bracket=True)}')
105
+
106
+ return excluded_list
107
+
108
+
66
109
  def get_excluded_files_from_gitignore(src_dir_path: str) -> List[str]:
67
110
  """ Lists files and patterns ignored by git in the source directory
68
111
 
@@ -78,7 +121,8 @@ def get_excluded_files_from_gitignore(src_dir_path: str) -> List[str]:
78
121
  expand_src_dir_path = os.path.expanduser(src_dir_path)
79
122
 
80
123
  git_exclude_path = os.path.join(expand_src_dir_path, '.git/info/exclude')
81
- gitignore_path = os.path.join(expand_src_dir_path, '.gitignore')
124
+ gitignore_path = os.path.join(expand_src_dir_path,
125
+ constants.GIT_IGNORE_FILE)
82
126
 
83
127
  git_exclude_exists = os.path.isfile(git_exclude_path)
84
128
  gitignore_exists = os.path.isfile(gitignore_path)
@@ -162,3 +206,80 @@ def get_excluded_files_from_gitignore(src_dir_path: str) -> List[str]:
162
206
  to_be_excluded += '*'
163
207
  excluded_list.append(to_be_excluded)
164
208
  return excluded_list
209
+
210
+
211
+ def get_excluded_files(src_dir_path: str) -> List[str]:
212
+ # TODO: this could return a huge list of files,
213
+ # should think of ways to optimize.
214
+ """ List files and directories to be excluded."""
215
+ expand_src_dir_path = os.path.expanduser(src_dir_path)
216
+ skyignore_path = os.path.join(expand_src_dir_path,
217
+ constants.SKY_IGNORE_FILE)
218
+ if os.path.exists(skyignore_path):
219
+ logger.debug(f' {colorama.Style.DIM}'
220
+ f'Excluded files to sync to cluster based on '
221
+ f'{constants.SKY_IGNORE_FILE}.'
222
+ f'{colorama.Style.RESET_ALL}')
223
+ return get_excluded_files_from_skyignore(src_dir_path)
224
+ logger.debug(f' {colorama.Style.DIM}'
225
+ f'Excluded files to sync to cluster based on '
226
+ f'{constants.GIT_IGNORE_FILE}.'
227
+ f'{colorama.Style.RESET_ALL}')
228
+ return get_excluded_files_from_gitignore(src_dir_path)
229
+
230
+
231
+ def zip_files_and_folders(items: List[str],
232
+ output_file: Union[str, pathlib.Path],
233
+ log_file: Optional[TextIO] = None):
234
+
235
+ def _store_symlink(zipf, path: str, is_dir: bool):
236
+ # Get the target of the symlink
237
+ target = os.readlink(path)
238
+ # Use relative path as absolute path will not be able to resolve on
239
+ # remote API server.
240
+ if os.path.isabs(target):
241
+ target = os.path.relpath(target, os.path.dirname(path))
242
+ # Create a ZipInfo instance
243
+ zi = zipfile.ZipInfo(path + '/') if is_dir else zipfile.ZipInfo(path)
244
+ # Set external attributes to mark as symlink
245
+ zi.external_attr = 0xA1ED0000
246
+ # Write symlink target as content
247
+ zipf.writestr(zi, target)
248
+
249
+ with warnings.catch_warnings():
250
+ warnings.filterwarnings('ignore',
251
+ category=UserWarning,
252
+ message='Duplicate name:')
253
+ with zipfile.ZipFile(output_file, 'w') as zipf:
254
+ for item in items:
255
+ item = os.path.expanduser(item)
256
+ if not os.path.isfile(item) and not os.path.isdir(item):
257
+ raise ValueError(f'{item} does not exist.')
258
+ excluded_files = set(
259
+ [os.path.join(item, f) for f in get_excluded_files(item)])
260
+ if os.path.isfile(item) and item not in excluded_files:
261
+ zipf.write(item)
262
+ elif os.path.isdir(item):
263
+ for root, dirs, files in os.walk(item, followlinks=False):
264
+ # Store directory entries (important for empty
265
+ # directories)
266
+ for dir_name in dirs:
267
+ dir_path = os.path.join(root, dir_name)
268
+ if dir_path in excluded_files:
269
+ continue
270
+ # If it's a symlink, store it as a symlink
271
+ if os.path.islink(dir_path):
272
+ _store_symlink(zipf, dir_path, is_dir=True)
273
+ else:
274
+ zipf.write(dir_path)
275
+
276
+ for file in files:
277
+ file_path = os.path.join(root, file)
278
+ if file_path in excluded_files:
279
+ continue
280
+ if os.path.islink(file_path):
281
+ _store_symlink(zipf, file_path, is_dir=False)
282
+ else:
283
+ zipf.write(file_path)
284
+ if log_file is not None:
285
+ log_file.write(f'Zipped {item}\n')
sky/exceptions.py CHANGED
@@ -1,11 +1,16 @@
1
1
  """Exceptions."""
2
+ import builtins
2
3
  import enum
4
+ import traceback
5
+ import types
3
6
  import typing
4
- from typing import List, Optional
7
+ from typing import Any, Dict, List, Optional, Sequence
8
+
9
+ from sky.utils import env_options
5
10
 
6
11
  if typing.TYPE_CHECKING:
7
- from sky import status_lib
8
12
  from sky.backends import backend
13
+ from sky.utils import status_lib
9
14
 
10
15
  # Return code for keyboard interruption and SIGTSTP
11
16
  KEYBOARD_INTERRUPT_CODE = 130
@@ -19,6 +24,107 @@ INSUFFICIENT_PRIVILEGES_CODE = 52
19
24
  GIT_FATAL_EXIT_CODE = 128
20
25
 
21
26
 
27
+ def is_safe_exception(exc: Exception) -> bool:
28
+ """Returns True if the exception is safe to send to clients.
29
+
30
+ Safe exceptions are:
31
+ 1. Built-in exceptions
32
+ 2. SkyPilot's own exceptions
33
+ """
34
+ module = type(exc).__module__
35
+
36
+ # Builtin exceptions (e.g., ValueError, RuntimeError)
37
+ if module == 'builtins':
38
+ return True
39
+
40
+ # SkyPilot exceptions
41
+ if module.startswith('sky.'):
42
+ return True
43
+
44
+ return False
45
+
46
+
47
+ def wrap_exception(exc: Exception) -> Exception:
48
+ """Wraps non-safe exceptions into SkyPilot exceptions
49
+
50
+ This is used to wrap exceptions that are not safe to deserialize at clients.
51
+
52
+ Examples include exceptions from cloud providers whose packages are not
53
+ available at clients.
54
+ """
55
+ if is_safe_exception(exc):
56
+ return exc
57
+
58
+ return CloudError(message=str(exc),
59
+ cloud_provider=type(exc).__module__.split('.')[0],
60
+ error_type=type(exc).__name__)
61
+
62
+
63
+ def serialize_exception(e: Exception) -> Dict[str, Any]:
64
+ """Serialize the exception.
65
+
66
+ This function also wraps any unsafe exceptions (e.g., cloud exceptions)
67
+ into SkyPilot's CloudError before serialization to ensure clients can
68
+ deserialize them without needing cloud provider packages installed.
69
+ """
70
+ # Wrap unsafe exceptions before serialization
71
+ e = wrap_exception(e)
72
+
73
+ stacktrace = getattr(e, 'stacktrace', None)
74
+ attributes = e.__dict__.copy()
75
+ if 'stacktrace' in attributes:
76
+ del attributes['stacktrace']
77
+ for attr_k in list(attributes.keys()):
78
+ attr_v = attributes[attr_k]
79
+ if isinstance(attr_v, types.TracebackType):
80
+ attributes[attr_k] = traceback.format_tb(attr_v)
81
+
82
+ data = {
83
+ 'type': e.__class__.__name__,
84
+ 'message': str(e),
85
+ 'args': e.args,
86
+ 'attributes': attributes,
87
+ 'stacktrace': stacktrace,
88
+ }
89
+ if isinstance(e, SkyPilotExcludeArgsBaseException):
90
+ data['args'] = tuple()
91
+ return data
92
+
93
+
94
+ def deserialize_exception(serialized: Dict[str, Any]) -> Exception:
95
+ """Deserialize the exception."""
96
+ exception_type = serialized['type']
97
+ if hasattr(builtins, exception_type):
98
+ exception_class = getattr(builtins, exception_type)
99
+ else:
100
+ exception_class = globals().get(exception_type, None)
101
+ if exception_class is None:
102
+ # Unknown exception type.
103
+ return Exception(f'{exception_type}: {serialized["message"]}')
104
+ e = exception_class(*serialized['args'], **serialized['attributes'])
105
+ if serialized['stacktrace'] is not None:
106
+ setattr(e, 'stacktrace', serialized['stacktrace'])
107
+ return e
108
+
109
+
110
+ class CloudError(Exception):
111
+ """Wraps cloud-specific errors into a SkyPilot exception."""
112
+
113
+ def __init__(self, message: str, cloud_provider: str, error_type: str):
114
+ super().__init__(message)
115
+ self.cloud_provider = cloud_provider
116
+ self.error_type = error_type
117
+
118
+ def __str__(self):
119
+ return (f'{self.cloud_provider} error ({self.error_type}): '
120
+ f'{super().__str__()}')
121
+
122
+
123
+ class InvalidSkyPilotConfigError(ValueError):
124
+ """Raised when the SkyPilot config is invalid."""
125
+ pass
126
+
127
+
22
128
  class ResourcesUnavailableError(Exception):
23
129
  """Raised when resources are unavailable.
24
130
 
@@ -61,12 +167,12 @@ class ProvisionPrechecksError(Exception):
61
167
  the error will be raised.
62
168
 
63
169
  Args:
64
- reasons: (List[Exception]) The reasons why the prechecks failed.
170
+ reasons: (Sequence[Exception]) The reasons why the prechecks failed.
65
171
  """
66
172
 
67
- def __init__(self, reasons: List[Exception]) -> None:
173
+ def __init__(self, reasons: Sequence[Exception]) -> None:
68
174
  super().__init__()
69
- self.reasons = list(reasons)
175
+ self.reasons = reasons
70
176
 
71
177
 
72
178
  class ManagedJobReachedMaxRetriesError(Exception):
@@ -79,12 +185,34 @@ class ManagedJobReachedMaxRetriesError(Exception):
79
185
  pass
80
186
 
81
187
 
188
+ class ManagedJobStatusError(Exception):
189
+ """Raised when a managed job task status update is invalid.
190
+
191
+ For instance, a RUNNING job cannot become SUBMITTED.
192
+ """
193
+ pass
194
+
195
+
82
196
  class ResourcesMismatchError(Exception):
83
197
  """Raised when resources are mismatched."""
84
198
  pass
85
199
 
86
200
 
87
- class CommandError(Exception):
201
+ class SkyPilotExcludeArgsBaseException(Exception):
202
+ """Base class for exceptions that don't need args while serialization.
203
+
204
+ Due to our serialization/deserialization logic, when an exception does
205
+ not take `args` as an argument in __init__, `args` should not be included
206
+ in the serialized exception.
207
+
208
+ This is useful when an exception needs to construct the error message based
209
+ on the arguments passed in instead of directly having the error message as
210
+ the first argument in __init__. Refer to `CommandError` for an example.
211
+ """
212
+ pass
213
+
214
+
215
+ class CommandError(SkyPilotExcludeArgsBaseException):
88
216
  """Raised when a command fails.
89
217
 
90
218
  Args:
@@ -100,9 +228,14 @@ class CommandError(Exception):
100
228
  self.command = command
101
229
  self.error_msg = error_msg
102
230
  self.detailed_reason = detailed_reason
231
+
103
232
  if not command:
104
233
  message = error_msg
105
234
  else:
235
+ if (len(command) > 100 and
236
+ not env_options.Options.SHOW_DEBUG_INFO.get()):
237
+ # Chunck the command to avoid overflow.
238
+ command = command[:100] + '...'
106
239
  message = (f'Command {command} failed with return code '
107
240
  f'{returncode}.\n{error_msg}')
108
241
  super().__init__(message)
@@ -113,7 +246,7 @@ class ClusterNotUpError(Exception):
113
246
 
114
247
  def __init__(self,
115
248
  message: str,
116
- cluster_status: Optional['status_lib.ClusterStatus'],
249
+ cluster_status: Optional['status_lib.ClusterStatus'] = None,
117
250
  handle: Optional['backend.ResourceHandle'] = None) -> None:
118
251
  super().__init__(message)
119
252
  self.cluster_status = cluster_status
@@ -125,6 +258,13 @@ class ClusterSetUpError(Exception):
125
258
  pass
126
259
 
127
260
 
261
+ class ClusterDoesNotExist(ValueError):
262
+ """Raise when trying to operate on a cluster that does not exist."""
263
+ # This extends ValueError for compatibility reasons - we used to throw
264
+ # ValueError instead of this.
265
+ pass
266
+
267
+
128
268
  class NotSupportedError(Exception):
129
269
  """Raised when a feature is not supported."""
130
270
  pass
@@ -190,6 +330,12 @@ class StorageExternalDeletionError(StorageBucketGetError):
190
330
  pass
191
331
 
192
332
 
333
+ class NonExistentStorageAccountError(StorageExternalDeletionError):
334
+ # Error raise when storage account provided through config.yaml or read
335
+ # from store handle(local db) does not exist.
336
+ pass
337
+
338
+
193
339
  class FetchClusterInfoError(Exception):
194
340
  """Raised when fetching the cluster info fails."""
195
341
 
@@ -237,7 +383,7 @@ class NoCloudAccessError(Exception):
237
383
  pass
238
384
 
239
385
 
240
- class AWSAzFetchingError(Exception):
386
+ class AWSAzFetchingError(SkyPilotExcludeArgsBaseException):
241
387
  """Raised when fetching the AWS availability zone fails."""
242
388
 
243
389
  class Reason(enum.Enum):
@@ -276,3 +422,28 @@ class ServeUserTerminatedError(Exception):
276
422
 
277
423
  class PortDoesNotExistError(Exception):
278
424
  """Raised when the port does not exist."""
425
+
426
+
427
+ class UserRequestRejectedByPolicy(Exception):
428
+ """Raised when a user request is rejected by an admin policy."""
429
+ pass
430
+
431
+
432
+ class NoClusterLaunchedError(Exception):
433
+ """No cluster launched, so cleanup can be skipped during failover."""
434
+ pass
435
+
436
+
437
+ class RequestCancelled(Exception):
438
+ """Raised when a request is cancelled."""
439
+ pass
440
+
441
+
442
+ class ApiServerConnectionError(RuntimeError):
443
+ """Raised when the API server cannot be connected."""
444
+
445
+ def __init__(self, server_url: str):
446
+ super().__init__(
447
+ f'Could not connect to SkyPilot API server at {server_url}. '
448
+ f'Please ensure that the server is running. '
449
+ f'Try: curl {server_url}/api/health')