skypilot-nightly 1.0.0.dev2024053101__py3-none-any.whl → 1.0.0.dev2025022801__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (299) hide show
  1. sky/__init__.py +64 -32
  2. sky/adaptors/aws.py +23 -6
  3. sky/adaptors/azure.py +432 -15
  4. sky/adaptors/cloudflare.py +5 -5
  5. sky/adaptors/common.py +19 -9
  6. sky/adaptors/do.py +20 -0
  7. sky/adaptors/gcp.py +3 -2
  8. sky/adaptors/kubernetes.py +122 -88
  9. sky/adaptors/nebius.py +100 -0
  10. sky/adaptors/oci.py +39 -1
  11. sky/adaptors/vast.py +29 -0
  12. sky/admin_policy.py +101 -0
  13. sky/authentication.py +117 -98
  14. sky/backends/backend.py +52 -20
  15. sky/backends/backend_utils.py +669 -557
  16. sky/backends/cloud_vm_ray_backend.py +1099 -808
  17. sky/backends/local_docker_backend.py +14 -8
  18. sky/backends/wheel_utils.py +38 -20
  19. sky/benchmark/benchmark_utils.py +22 -23
  20. sky/check.py +76 -27
  21. sky/cli.py +1586 -1139
  22. sky/client/__init__.py +1 -0
  23. sky/client/cli.py +5683 -0
  24. sky/client/common.py +345 -0
  25. sky/client/sdk.py +1765 -0
  26. sky/cloud_stores.py +283 -19
  27. sky/clouds/__init__.py +7 -2
  28. sky/clouds/aws.py +303 -112
  29. sky/clouds/azure.py +185 -179
  30. sky/clouds/cloud.py +115 -37
  31. sky/clouds/cudo.py +29 -22
  32. sky/clouds/do.py +313 -0
  33. sky/clouds/fluidstack.py +44 -54
  34. sky/clouds/gcp.py +206 -65
  35. sky/clouds/ibm.py +26 -21
  36. sky/clouds/kubernetes.py +345 -91
  37. sky/clouds/lambda_cloud.py +40 -29
  38. sky/clouds/nebius.py +297 -0
  39. sky/clouds/oci.py +129 -90
  40. sky/clouds/paperspace.py +22 -18
  41. sky/clouds/runpod.py +53 -34
  42. sky/clouds/scp.py +28 -24
  43. sky/clouds/service_catalog/__init__.py +19 -13
  44. sky/clouds/service_catalog/aws_catalog.py +29 -12
  45. sky/clouds/service_catalog/azure_catalog.py +33 -6
  46. sky/clouds/service_catalog/common.py +95 -75
  47. sky/clouds/service_catalog/constants.py +3 -3
  48. sky/clouds/service_catalog/cudo_catalog.py +13 -3
  49. sky/clouds/service_catalog/data_fetchers/fetch_aws.py +36 -21
  50. sky/clouds/service_catalog/data_fetchers/fetch_azure.py +31 -4
  51. sky/clouds/service_catalog/data_fetchers/fetch_cudo.py +8 -117
  52. sky/clouds/service_catalog/data_fetchers/fetch_fluidstack.py +197 -44
  53. sky/clouds/service_catalog/data_fetchers/fetch_gcp.py +224 -36
  54. sky/clouds/service_catalog/data_fetchers/fetch_lambda_cloud.py +44 -24
  55. sky/clouds/service_catalog/data_fetchers/fetch_vast.py +147 -0
  56. sky/clouds/service_catalog/data_fetchers/fetch_vsphere.py +1 -1
  57. sky/clouds/service_catalog/do_catalog.py +111 -0
  58. sky/clouds/service_catalog/fluidstack_catalog.py +2 -2
  59. sky/clouds/service_catalog/gcp_catalog.py +16 -2
  60. sky/clouds/service_catalog/ibm_catalog.py +2 -2
  61. sky/clouds/service_catalog/kubernetes_catalog.py +192 -70
  62. sky/clouds/service_catalog/lambda_catalog.py +8 -3
  63. sky/clouds/service_catalog/nebius_catalog.py +116 -0
  64. sky/clouds/service_catalog/oci_catalog.py +31 -4
  65. sky/clouds/service_catalog/paperspace_catalog.py +2 -2
  66. sky/clouds/service_catalog/runpod_catalog.py +2 -2
  67. sky/clouds/service_catalog/scp_catalog.py +2 -2
  68. sky/clouds/service_catalog/vast_catalog.py +104 -0
  69. sky/clouds/service_catalog/vsphere_catalog.py +2 -2
  70. sky/clouds/utils/aws_utils.py +65 -0
  71. sky/clouds/utils/azure_utils.py +91 -0
  72. sky/clouds/utils/gcp_utils.py +5 -9
  73. sky/clouds/utils/oci_utils.py +47 -5
  74. sky/clouds/utils/scp_utils.py +4 -3
  75. sky/clouds/vast.py +280 -0
  76. sky/clouds/vsphere.py +22 -18
  77. sky/core.py +361 -107
  78. sky/dag.py +41 -28
  79. sky/data/data_transfer.py +37 -0
  80. sky/data/data_utils.py +211 -32
  81. sky/data/mounting_utils.py +182 -30
  82. sky/data/storage.py +2118 -270
  83. sky/data/storage_utils.py +126 -5
  84. sky/exceptions.py +179 -8
  85. sky/execution.py +158 -85
  86. sky/global_user_state.py +150 -34
  87. sky/jobs/__init__.py +12 -10
  88. sky/jobs/client/__init__.py +0 -0
  89. sky/jobs/client/sdk.py +302 -0
  90. sky/jobs/constants.py +49 -11
  91. sky/jobs/controller.py +161 -99
  92. sky/jobs/dashboard/dashboard.py +171 -25
  93. sky/jobs/dashboard/templates/index.html +572 -60
  94. sky/jobs/recovery_strategy.py +157 -156
  95. sky/jobs/scheduler.py +307 -0
  96. sky/jobs/server/__init__.py +1 -0
  97. sky/jobs/server/core.py +598 -0
  98. sky/jobs/server/dashboard_utils.py +69 -0
  99. sky/jobs/server/server.py +190 -0
  100. sky/jobs/state.py +627 -122
  101. sky/jobs/utils.py +615 -206
  102. sky/models.py +27 -0
  103. sky/optimizer.py +142 -83
  104. sky/provision/__init__.py +20 -5
  105. sky/provision/aws/config.py +124 -42
  106. sky/provision/aws/instance.py +130 -53
  107. sky/provision/azure/__init__.py +7 -0
  108. sky/{skylet/providers → provision}/azure/azure-config-template.json +19 -7
  109. sky/provision/azure/config.py +220 -0
  110. sky/provision/azure/instance.py +1012 -37
  111. sky/provision/common.py +31 -3
  112. sky/provision/constants.py +25 -0
  113. sky/provision/cudo/__init__.py +2 -1
  114. sky/provision/cudo/cudo_utils.py +112 -0
  115. sky/provision/cudo/cudo_wrapper.py +37 -16
  116. sky/provision/cudo/instance.py +28 -12
  117. sky/provision/do/__init__.py +11 -0
  118. sky/provision/do/config.py +14 -0
  119. sky/provision/do/constants.py +10 -0
  120. sky/provision/do/instance.py +287 -0
  121. sky/provision/do/utils.py +301 -0
  122. sky/provision/docker_utils.py +82 -46
  123. sky/provision/fluidstack/fluidstack_utils.py +57 -125
  124. sky/provision/fluidstack/instance.py +15 -43
  125. sky/provision/gcp/config.py +19 -9
  126. sky/provision/gcp/constants.py +7 -1
  127. sky/provision/gcp/instance.py +55 -34
  128. sky/provision/gcp/instance_utils.py +339 -80
  129. sky/provision/gcp/mig_utils.py +210 -0
  130. sky/provision/instance_setup.py +172 -133
  131. sky/provision/kubernetes/__init__.py +1 -0
  132. sky/provision/kubernetes/config.py +104 -90
  133. sky/provision/kubernetes/constants.py +8 -0
  134. sky/provision/kubernetes/instance.py +680 -325
  135. sky/provision/kubernetes/manifests/smarter-device-manager-daemonset.yaml +3 -0
  136. sky/provision/kubernetes/network.py +54 -20
  137. sky/provision/kubernetes/network_utils.py +70 -21
  138. sky/provision/kubernetes/utils.py +1370 -251
  139. sky/provision/lambda_cloud/__init__.py +11 -0
  140. sky/provision/lambda_cloud/config.py +10 -0
  141. sky/provision/lambda_cloud/instance.py +265 -0
  142. sky/{clouds/utils → provision/lambda_cloud}/lambda_utils.py +24 -23
  143. sky/provision/logging.py +1 -1
  144. sky/provision/nebius/__init__.py +11 -0
  145. sky/provision/nebius/config.py +11 -0
  146. sky/provision/nebius/instance.py +285 -0
  147. sky/provision/nebius/utils.py +318 -0
  148. sky/provision/oci/__init__.py +15 -0
  149. sky/provision/oci/config.py +51 -0
  150. sky/provision/oci/instance.py +436 -0
  151. sky/provision/oci/query_utils.py +681 -0
  152. sky/provision/paperspace/constants.py +6 -0
  153. sky/provision/paperspace/instance.py +4 -3
  154. sky/provision/paperspace/utils.py +2 -0
  155. sky/provision/provisioner.py +207 -130
  156. sky/provision/runpod/__init__.py +1 -0
  157. sky/provision/runpod/api/__init__.py +3 -0
  158. sky/provision/runpod/api/commands.py +119 -0
  159. sky/provision/runpod/api/pods.py +142 -0
  160. sky/provision/runpod/instance.py +64 -8
  161. sky/provision/runpod/utils.py +239 -23
  162. sky/provision/vast/__init__.py +10 -0
  163. sky/provision/vast/config.py +11 -0
  164. sky/provision/vast/instance.py +247 -0
  165. sky/provision/vast/utils.py +162 -0
  166. sky/provision/vsphere/common/vim_utils.py +1 -1
  167. sky/provision/vsphere/instance.py +8 -18
  168. sky/provision/vsphere/vsphere_utils.py +1 -1
  169. sky/resources.py +247 -102
  170. sky/serve/__init__.py +9 -9
  171. sky/serve/autoscalers.py +361 -299
  172. sky/serve/client/__init__.py +0 -0
  173. sky/serve/client/sdk.py +366 -0
  174. sky/serve/constants.py +12 -3
  175. sky/serve/controller.py +106 -36
  176. sky/serve/load_balancer.py +63 -12
  177. sky/serve/load_balancing_policies.py +84 -2
  178. sky/serve/replica_managers.py +42 -34
  179. sky/serve/serve_state.py +62 -32
  180. sky/serve/serve_utils.py +271 -160
  181. sky/serve/server/__init__.py +0 -0
  182. sky/serve/{core.py → server/core.py} +271 -90
  183. sky/serve/server/server.py +112 -0
  184. sky/serve/service.py +52 -16
  185. sky/serve/service_spec.py +95 -32
  186. sky/server/__init__.py +1 -0
  187. sky/server/common.py +430 -0
  188. sky/server/constants.py +21 -0
  189. sky/server/html/log.html +174 -0
  190. sky/server/requests/__init__.py +0 -0
  191. sky/server/requests/executor.py +472 -0
  192. sky/server/requests/payloads.py +487 -0
  193. sky/server/requests/queues/__init__.py +0 -0
  194. sky/server/requests/queues/mp_queue.py +76 -0
  195. sky/server/requests/requests.py +567 -0
  196. sky/server/requests/serializers/__init__.py +0 -0
  197. sky/server/requests/serializers/decoders.py +192 -0
  198. sky/server/requests/serializers/encoders.py +166 -0
  199. sky/server/server.py +1106 -0
  200. sky/server/stream_utils.py +141 -0
  201. sky/setup_files/MANIFEST.in +2 -5
  202. sky/setup_files/dependencies.py +159 -0
  203. sky/setup_files/setup.py +14 -125
  204. sky/sky_logging.py +59 -14
  205. sky/skylet/autostop_lib.py +2 -2
  206. sky/skylet/constants.py +183 -50
  207. sky/skylet/events.py +22 -10
  208. sky/skylet/job_lib.py +403 -258
  209. sky/skylet/log_lib.py +111 -71
  210. sky/skylet/log_lib.pyi +6 -0
  211. sky/skylet/providers/command_runner.py +6 -8
  212. sky/skylet/providers/ibm/node_provider.py +2 -2
  213. sky/skylet/providers/scp/config.py +11 -3
  214. sky/skylet/providers/scp/node_provider.py +8 -8
  215. sky/skylet/skylet.py +3 -1
  216. sky/skylet/subprocess_daemon.py +69 -17
  217. sky/skypilot_config.py +119 -57
  218. sky/task.py +205 -64
  219. sky/templates/aws-ray.yml.j2 +37 -7
  220. sky/templates/azure-ray.yml.j2 +27 -82
  221. sky/templates/cudo-ray.yml.j2 +7 -3
  222. sky/templates/do-ray.yml.j2 +98 -0
  223. sky/templates/fluidstack-ray.yml.j2 +7 -4
  224. sky/templates/gcp-ray.yml.j2 +26 -6
  225. sky/templates/ibm-ray.yml.j2 +3 -2
  226. sky/templates/jobs-controller.yaml.j2 +46 -11
  227. sky/templates/kubernetes-ingress.yml.j2 +7 -0
  228. sky/templates/kubernetes-loadbalancer.yml.j2 +7 -0
  229. sky/templates/{kubernetes-port-forward-proxy-command.sh.j2 → kubernetes-port-forward-proxy-command.sh} +51 -7
  230. sky/templates/kubernetes-ray.yml.j2 +292 -25
  231. sky/templates/lambda-ray.yml.j2 +30 -40
  232. sky/templates/nebius-ray.yml.j2 +79 -0
  233. sky/templates/oci-ray.yml.j2 +18 -57
  234. sky/templates/paperspace-ray.yml.j2 +10 -6
  235. sky/templates/runpod-ray.yml.j2 +26 -4
  236. sky/templates/scp-ray.yml.j2 +3 -2
  237. sky/templates/sky-serve-controller.yaml.j2 +12 -1
  238. sky/templates/skypilot-server-kubernetes-proxy.sh +36 -0
  239. sky/templates/vast-ray.yml.j2 +70 -0
  240. sky/templates/vsphere-ray.yml.j2 +8 -3
  241. sky/templates/websocket_proxy.py +64 -0
  242. sky/usage/constants.py +10 -1
  243. sky/usage/usage_lib.py +130 -37
  244. sky/utils/accelerator_registry.py +35 -51
  245. sky/utils/admin_policy_utils.py +147 -0
  246. sky/utils/annotations.py +51 -0
  247. sky/utils/cli_utils/status_utils.py +81 -23
  248. sky/utils/cluster_utils.py +356 -0
  249. sky/utils/command_runner.py +452 -89
  250. sky/utils/command_runner.pyi +77 -3
  251. sky/utils/common.py +54 -0
  252. sky/utils/common_utils.py +319 -108
  253. sky/utils/config_utils.py +204 -0
  254. sky/utils/control_master_utils.py +48 -0
  255. sky/utils/controller_utils.py +548 -266
  256. sky/utils/dag_utils.py +93 -32
  257. sky/utils/db_utils.py +18 -4
  258. sky/utils/env_options.py +29 -7
  259. sky/utils/kubernetes/create_cluster.sh +8 -60
  260. sky/utils/kubernetes/deploy_remote_cluster.sh +243 -0
  261. sky/utils/kubernetes/exec_kubeconfig_converter.py +73 -0
  262. sky/utils/kubernetes/generate_kubeconfig.sh +336 -0
  263. sky/utils/kubernetes/gpu_labeler.py +4 -4
  264. sky/utils/kubernetes/k8s_gpu_labeler_job.yaml +4 -3
  265. sky/utils/kubernetes/kubernetes_deploy_utils.py +228 -0
  266. sky/utils/kubernetes/rsync_helper.sh +24 -0
  267. sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +1 -1
  268. sky/utils/log_utils.py +240 -33
  269. sky/utils/message_utils.py +81 -0
  270. sky/utils/registry.py +127 -0
  271. sky/utils/resources_utils.py +94 -22
  272. sky/utils/rich_utils.py +247 -18
  273. sky/utils/schemas.py +284 -64
  274. sky/{status_lib.py → utils/status_lib.py} +12 -7
  275. sky/utils/subprocess_utils.py +212 -46
  276. sky/utils/timeline.py +12 -7
  277. sky/utils/ux_utils.py +168 -15
  278. skypilot_nightly-1.0.0.dev2025022801.dist-info/METADATA +363 -0
  279. skypilot_nightly-1.0.0.dev2025022801.dist-info/RECORD +352 -0
  280. {skypilot_nightly-1.0.0.dev2024053101.dist-info → skypilot_nightly-1.0.0.dev2025022801.dist-info}/WHEEL +1 -1
  281. sky/clouds/cloud_registry.py +0 -31
  282. sky/jobs/core.py +0 -330
  283. sky/skylet/providers/azure/__init__.py +0 -2
  284. sky/skylet/providers/azure/azure-vm-template.json +0 -301
  285. sky/skylet/providers/azure/config.py +0 -170
  286. sky/skylet/providers/azure/node_provider.py +0 -466
  287. sky/skylet/providers/lambda_cloud/__init__.py +0 -2
  288. sky/skylet/providers/lambda_cloud/node_provider.py +0 -320
  289. sky/skylet/providers/oci/__init__.py +0 -2
  290. sky/skylet/providers/oci/node_provider.py +0 -488
  291. sky/skylet/providers/oci/query_helper.py +0 -383
  292. sky/skylet/providers/oci/utils.py +0 -21
  293. sky/utils/cluster_yaml_utils.py +0 -24
  294. sky/utils/kubernetes/generate_static_kubeconfig.sh +0 -137
  295. skypilot_nightly-1.0.0.dev2024053101.dist-info/METADATA +0 -315
  296. skypilot_nightly-1.0.0.dev2024053101.dist-info/RECORD +0 -275
  297. {skypilot_nightly-1.0.0.dev2024053101.dist-info → skypilot_nightly-1.0.0.dev2025022801.dist-info}/LICENSE +0 -0
  298. {skypilot_nightly-1.0.0.dev2024053101.dist-info → skypilot_nightly-1.0.0.dev2025022801.dist-info}/entry_points.txt +0 -0
  299. {skypilot_nightly-1.0.0.dev2024053101.dist-info → skypilot_nightly-1.0.0.dev2025022801.dist-info}/top_level.txt +0 -0
@@ -14,6 +14,7 @@ from sky.backends import backend_utils
14
14
  from sky.backends import docker_utils
15
15
  from sky.data import storage as storage_lib
16
16
  from sky.utils import rich_utils
17
+ from sky.utils import ux_utils
17
18
 
18
19
  if typing.TYPE_CHECKING:
19
20
  from sky import resources
@@ -130,13 +131,14 @@ class LocalDockerBackend(backends.Backend['LocalDockerResourceHandle']):
130
131
  pass
131
132
 
132
133
  def _provision(
133
- self,
134
- task: 'task_lib.Task',
135
- to_provision: Optional['resources.Resources'],
136
- dryrun: bool,
137
- stream_logs: bool,
138
- cluster_name: str,
139
- retry_until_up: bool = False
134
+ self,
135
+ task: 'task_lib.Task',
136
+ to_provision: Optional['resources.Resources'],
137
+ dryrun: bool,
138
+ stream_logs: bool,
139
+ cluster_name: str,
140
+ retry_until_up: bool = False,
141
+ skip_unnecessary_provisioning: bool = False,
140
142
  ) -> Optional[LocalDockerResourceHandle]:
141
143
  """Builds docker image for the task and returns cluster name as handle.
142
144
 
@@ -152,6 +154,9 @@ class LocalDockerBackend(backends.Backend['LocalDockerResourceHandle']):
152
154
  logger.warning(
153
155
  f'Retrying until up is not supported in backend: {self.NAME}. '
154
156
  'Ignored the flag.')
157
+ if skip_unnecessary_provisioning:
158
+ logger.warning(f'skip_unnecessary_provisioning is not supported in '
159
+ f'backend: {self.NAME}. Ignored the flag.')
155
160
  if stream_logs:
156
161
  logger.info(
157
162
  'Streaming build logs is not supported in LocalDockerBackend. '
@@ -159,7 +164,8 @@ class LocalDockerBackend(backends.Backend['LocalDockerResourceHandle']):
159
164
  handle = LocalDockerResourceHandle(cluster_name)
160
165
  logger.info(f'Building docker image for task {task.name}. '
161
166
  'This might take some time.')
162
- with rich_utils.safe_status('[bold cyan]Building Docker image[/]'):
167
+ with rich_utils.safe_status(
168
+ ux_utils.spinner_message('Building Docker image')):
163
169
  image_tag, metadata = docker_utils.build_dockerimage_from_task(task)
164
170
  self.images[handle] = (image_tag, metadata)
165
171
  logger.info(f'Image {image_tag} built.')
@@ -39,29 +39,30 @@ _WHEEL_PATTERN = (f'{_PACKAGE_WHEEL_NAME}-'
39
39
  f'{version.parse(sky.__version__)}-*.whl')
40
40
 
41
41
 
42
- def _get_latest_wheel_and_remove_all_others() -> pathlib.Path:
43
- wheel_name = (f'**/{_WHEEL_PATTERN}')
42
+ def _remove_stale_wheels(latest_wheel_dir: pathlib.Path) -> None:
43
+ """Remove all wheels except the latest one."""
44
+ for f in WHEEL_DIR.iterdir():
45
+ if f != latest_wheel_dir:
46
+ if f.is_dir() and not f.is_symlink():
47
+ shutil.rmtree(f, ignore_errors=True)
48
+
49
+
50
+ def _get_latest_wheel() -> pathlib.Path:
51
+ wheel_name = f'**/{_WHEEL_PATTERN}'
44
52
  try:
45
53
  latest_wheel = max(WHEEL_DIR.glob(wheel_name), key=os.path.getctime)
46
54
  except ValueError:
47
55
  raise FileNotFoundError(
48
56
  'Could not find built SkyPilot wheels with glob pattern '
49
57
  f'{wheel_name} under {WHEEL_DIR!r}') from None
50
-
51
- latest_wheel_dir_name = latest_wheel.parent
52
- # Cleanup older wheels.
53
- for f in WHEEL_DIR.iterdir():
54
- if f != latest_wheel_dir_name:
55
- if f.is_dir() and not f.is_symlink():
56
- shutil.rmtree(f, ignore_errors=True)
57
58
  return latest_wheel
58
59
 
59
60
 
60
- def _build_sky_wheel():
61
- """Build a wheel for SkyPilot."""
62
- with tempfile.TemporaryDirectory() as tmp_dir:
61
+ def _build_sky_wheel() -> pathlib.Path:
62
+ """Build a wheel for SkyPilot and return the path to the wheel."""
63
+ with tempfile.TemporaryDirectory() as tmp_dir_str:
63
64
  # prepare files
64
- tmp_dir = pathlib.Path(tmp_dir)
65
+ tmp_dir = pathlib.Path(tmp_dir_str)
65
66
  sky_tmp_dir = tmp_dir / 'sky'
66
67
  sky_tmp_dir.mkdir()
67
68
  for item in SKY_PACKAGE_PATH.iterdir():
@@ -128,7 +129,12 @@ def _build_sky_wheel():
128
129
 
129
130
  wheel_dir = WHEEL_DIR / hash_of_latest_wheel
130
131
  wheel_dir.mkdir(parents=True, exist_ok=True)
131
- shutil.move(str(wheel_path), wheel_dir)
132
+ # shutil.move will fail when the file already exists and is being
133
+ # moved across filesystems.
134
+ if not os.path.exists(
135
+ os.path.join(wheel_dir, os.path.basename(wheel_path))):
136
+ shutil.move(str(wheel_path), wheel_dir)
137
+ return wheel_dir / wheel_path.name
132
138
 
133
139
 
134
140
  def build_sky_wheel() -> Tuple[pathlib.Path, str]:
@@ -147,7 +153,10 @@ def build_sky_wheel() -> Tuple[pathlib.Path, str]:
147
153
  if not path.exists():
148
154
  return -1.
149
155
  try:
150
- return max(os.path.getmtime(root) for root, _, _ in os.walk(path))
156
+ return max(
157
+ os.path.getmtime(os.path.join(root, f))
158
+ for root, dirs, files in os.walk(path)
159
+ for f in (*dirs, *files))
151
160
  except ValueError:
152
161
  return -1.
153
162
 
@@ -161,13 +170,22 @@ def build_sky_wheel() -> Tuple[pathlib.Path, str]:
161
170
  last_modification_time = _get_latest_modification_time(SKY_PACKAGE_PATH)
162
171
  last_wheel_modification_time = _get_latest_modification_time(WHEEL_DIR)
163
172
 
164
- # only build wheels if the wheel is outdated
165
- if last_wheel_modification_time < last_modification_time:
173
+ # Only build wheels if the wheel is outdated or wheel does not exist
174
+ # for the requested version.
175
+ if (last_wheel_modification_time < last_modification_time) or not any(
176
+ WHEEL_DIR.glob(f'**/{_WHEEL_PATTERN}')):
166
177
  if not WHEEL_DIR.exists():
167
178
  WHEEL_DIR.mkdir(parents=True, exist_ok=True)
168
- _build_sky_wheel()
169
-
170
- latest_wheel = _get_latest_wheel_and_remove_all_others()
179
+ latest_wheel = _build_sky_wheel()
180
+ else:
181
+ latest_wheel = _get_latest_wheel()
182
+
183
+ # We remove all wheels except the latest one for garbage collection.
184
+ # Otherwise stale wheels will accumulate over time.
185
+ # TODO(romilb): If the user switches versions every alternate launch,
186
+ # the wheel will be rebuilt every time. At the risk of adding
187
+ # complexity, we can consider TTL caching wheels by version here.
188
+ _remove_stale_wheels(latest_wheel.parent)
171
189
 
172
190
  wheel_hash = latest_wheel.parent.name
173
191
 
@@ -20,10 +20,11 @@ from rich import progress as rich_progress
20
20
 
21
21
  import sky
22
22
  from sky import backends
23
+ from sky import clouds
23
24
  from sky import data
24
25
  from sky import global_user_state
26
+ from sky import optimizer
25
27
  from sky import sky_logging
26
- from sky import status_lib
27
28
  from sky.backends import backend_utils
28
29
  from sky.benchmark import benchmark_state
29
30
  from sky.data import storage as storage_lib
@@ -33,6 +34,7 @@ from sky.skylet import log_lib
33
34
  from sky.utils import common_utils
34
35
  from sky.utils import log_utils
35
36
  from sky.utils import rich_utils
37
+ from sky.utils import status_lib
36
38
  from sky.utils import subprocess_utils
37
39
  from sky.utils import ux_utils
38
40
 
@@ -99,7 +101,9 @@ def _get_optimized_resources(
99
101
  task = sky.Task()
100
102
  task.set_resources(resources)
101
103
 
102
- dag = sky.optimize(dag, quiet=True)
104
+ # Do not use `sky.optimize` here, as this should be called on the API
105
+ # server side.
106
+ dag = optimizer.Optimizer.optimize(dag, quiet=True)
103
107
  task = dag.tasks[0]
104
108
  optimized_resources.append(task.best_resources)
105
109
  return optimized_resources
@@ -170,13 +174,19 @@ def _create_benchmark_bucket() -> Tuple[str, str]:
170
174
  # Select the bucket type.
171
175
  enabled_clouds = storage_lib.get_cached_enabled_storage_clouds_or_refresh(
172
176
  raise_if_no_cloud_access=True)
173
- # Already checked by raise_if_no_cloud_access=True.
174
- assert enabled_clouds
177
+ # Sky Benchmark only supports S3 (see _download_remote_dir and
178
+ # _delete_remote_dir).
179
+ enabled_clouds = [
180
+ cloud for cloud in enabled_clouds if cloud in [str(clouds.AWS())]
181
+ ]
182
+ assert enabled_clouds, ('No enabled cloud storage found. Sky Benchmark '
183
+ 'requires GCP or AWS to store logs.')
175
184
  bucket_type = data.StoreType.from_cloud(enabled_clouds[0]).value
176
185
 
177
186
  # Create a benchmark bucket.
178
187
  logger.info(f'Creating a bucket {bucket_name} to save the benchmark logs.')
179
188
  storage = data.Storage(bucket_name, source=None, persistent=True)
189
+ storage.construct()
180
190
  storage.add_store(bucket_type)
181
191
 
182
192
  # Save the bucket name and type to the config.
@@ -242,14 +252,8 @@ def _download_remote_dir(remote_dir: str, local_dir: str,
242
252
  stdout=subprocess.DEVNULL,
243
253
  stderr=subprocess.DEVNULL,
244
254
  check=True)
245
- elif bucket_type == data.StoreType.GCS:
246
- remote_dir = f'gs://{remote_dir}'
247
- subprocess.run(['gsutil', '-m', 'cp', '-r', remote_dir, local_dir],
248
- stdout=subprocess.DEVNULL,
249
- stderr=subprocess.DEVNULL,
250
- check=True)
251
255
  else:
252
- raise RuntimeError('Azure Blob Storage is not supported yet.')
256
+ raise RuntimeError(f'{bucket_type} is not supported yet.')
253
257
 
254
258
 
255
259
  def _delete_remote_dir(remote_dir: str, bucket_type: data.StoreType) -> None:
@@ -260,14 +264,8 @@ def _delete_remote_dir(remote_dir: str, bucket_type: data.StoreType) -> None:
260
264
  stdout=subprocess.DEVNULL,
261
265
  stderr=subprocess.DEVNULL,
262
266
  check=True)
263
- elif bucket_type == data.StoreType.GCS:
264
- remote_dir = f'gs://{remote_dir}'
265
- subprocess.run(['gsutil', '-m', 'rm', '-r', remote_dir],
266
- stdout=subprocess.DEVNULL,
267
- stderr=subprocess.DEVNULL,
268
- check=True)
269
267
  else:
270
- raise RuntimeError('Azure Blob Storage is not supported yet.')
268
+ raise RuntimeError(f'{bucket_type} is not supported yet.')
271
269
 
272
270
 
273
271
  def _read_timestamp(path: str) -> float:
@@ -541,7 +539,7 @@ def launch_benchmark_clusters(benchmark: str, clusters: List[str],
541
539
  for yaml_fd, cluster in zip(yaml_fds, clusters)]
542
540
 
543
541
  # Save stdout/stderr from cluster launches.
544
- run_timestamp = backend_utils.get_run_timestamp()
542
+ run_timestamp = sky_logging.get_run_timestamp()
545
543
  log_dir = os.path.join(constants.SKY_LOGS_DIRECTORY, run_timestamp)
546
544
  log_dir = os.path.expanduser(log_dir)
547
545
  logger.info(
@@ -601,7 +599,8 @@ def update_benchmark_state(benchmark: str) -> None:
601
599
  remote_dir = os.path.join(bucket_name, benchmark)
602
600
  local_dir = os.path.join(_SKY_LOCAL_BENCHMARK_DIR, benchmark)
603
601
  os.makedirs(local_dir, exist_ok=True)
604
- with rich_utils.safe_status('[bold cyan]Downloading benchmark logs[/]'):
602
+ with rich_utils.safe_status(
603
+ ux_utils.spinner_message('Downloading benchmark logs')):
605
604
  _download_remote_dir(remote_dir, local_dir, bucket_type)
606
605
 
607
606
  # Update the benchmark results in parallel.
@@ -610,9 +609,9 @@ def update_benchmark_state(benchmark: str) -> None:
610
609
  progress = rich_progress.Progress(transient=True,
611
610
  redirect_stdout=False,
612
611
  redirect_stderr=False)
613
- task = progress.add_task(
614
- f'[bold cyan]Processing {num_candidates} benchmark result{plural}[/]',
615
- total=num_candidates)
612
+ task = progress.add_task(ux_utils.spinner_message(
613
+ f'Processing {num_candidates} benchmark result{plural}'),
614
+ total=num_candidates)
616
615
 
617
616
  def _update_with_progress_bar(arg: Any) -> None:
618
617
  message = _update_benchmark_result(arg)
sky/check.py CHANGED
@@ -1,26 +1,33 @@
1
1
  """Credential checks: check cloud credentials and enable clouds."""
2
+ import os
2
3
  import traceback
3
4
  from types import ModuleType
4
5
  from typing import Dict, Iterable, List, Optional, Tuple, Union
5
6
 
6
7
  import click
7
8
  import colorama
8
- import rich
9
9
 
10
10
  from sky import clouds as sky_clouds
11
11
  from sky import exceptions
12
12
  from sky import global_user_state
13
13
  from sky import skypilot_config
14
14
  from sky.adaptors import cloudflare
15
+ from sky.utils import registry
16
+ from sky.utils import rich_utils
15
17
  from sky.utils import ux_utils
16
18
 
19
+ CHECK_MARK_EMOJI = '\U00002714' # Heavy check mark unicode
20
+ PARTY_POPPER_EMOJI = '\U0001F389' # Party popper unicode
21
+
17
22
 
18
23
  def check(
19
24
  quiet: bool = False,
20
25
  verbose: bool = False,
21
26
  clouds: Optional[Iterable[str]] = None,
22
- ) -> None:
23
- echo = (lambda *_args, **_kwargs: None) if quiet else click.echo
27
+ ) -> List[str]:
28
+ echo = (lambda *_args, **_kwargs: None
29
+ ) if quiet else lambda *args, **kwargs: click.echo(
30
+ *args, **kwargs, color=True)
24
31
  echo('Checking credentials to enable clouds for SkyPilot.')
25
32
  enabled_clouds = []
26
33
  disabled_clouds = []
@@ -29,14 +36,13 @@ def check(
29
36
  cloud_tuple: Tuple[str, Union[sky_clouds.Cloud,
30
37
  ModuleType]]) -> None:
31
38
  cloud_repr, cloud = cloud_tuple
32
- echo(f' Checking {cloud_repr}...', nl=False)
33
- try:
34
- ok, reason = cloud.check_credentials()
35
- except Exception: # pylint: disable=broad-except
36
- # Catch all exceptions to prevent a single cloud from blocking the
37
- # check for other clouds.
38
- ok, reason = False, traceback.format_exc()
39
- echo('\r', nl=False)
39
+ with rich_utils.safe_status(f'Checking {cloud_repr}...'):
40
+ try:
41
+ ok, reason = cloud.check_credentials()
42
+ except Exception: # pylint: disable=broad-except
43
+ # Catch all exceptions to prevent a single cloud from blocking
44
+ # the check for other clouds.
45
+ ok, reason = False, traceback.format_exc()
40
46
  status_msg = 'enabled' if ok else 'disabled'
41
47
  styles = {'fg': 'green', 'bold': False} if ok else {'dim': True}
42
48
  echo(' ' + click.style(f'{cloud_repr}: {status_msg}', **styles) +
@@ -44,7 +50,7 @@ def check(
44
50
  if ok:
45
51
  enabled_clouds.append(cloud_repr)
46
52
  if verbose and cloud is not cloudflare:
47
- activated_account = cloud.get_current_user_identity_str()
53
+ activated_account = cloud.get_active_user_identity_str()
48
54
  if activated_account is not None:
49
55
  echo(f' Activated account: {activated_account}')
50
56
  if reason is not None:
@@ -60,12 +66,12 @@ def check(
60
66
  if cloud_name.lower().startswith('cloudflare'):
61
67
  return cloudflare.SKY_CHECK_NAME, cloudflare
62
68
  else:
63
- cloud_obj = sky_clouds.CLOUD_REGISTRY.from_str(cloud_name)
69
+ cloud_obj = registry.CLOUD_REGISTRY.from_str(cloud_name)
64
70
  assert cloud_obj is not None, f'Cloud {cloud_name!r} not found'
65
71
  return repr(cloud_obj), cloud_obj
66
72
 
67
73
  def get_all_clouds():
68
- return tuple([repr(c) for c in sky_clouds.CLOUD_REGISTRY.values()] +
74
+ return tuple([repr(c) for c in registry.CLOUD_REGISTRY.values()] +
69
75
  [cloudflare.SKY_CHECK_NAME])
70
76
 
71
77
  if clouds is not None:
@@ -77,8 +83,8 @@ def check(
77
83
  # Use allowed_clouds from config if it exists, otherwise check all clouds.
78
84
  # Also validate names with get_cloud_tuple.
79
85
  config_allowed_cloud_names = [
80
- get_cloud_tuple(c)[0] for c in skypilot_config.get_nested(
81
- ['allowed_clouds'], get_all_clouds())
86
+ get_cloud_tuple(c)[0] for c in skypilot_config.get_nested((
87
+ 'allowed_clouds',), get_all_clouds())
82
88
  ]
83
89
  # Use disallowed_cloud_names for logging the clouds that will be disabled
84
90
  # because they are not included in allowed_clouds in config.yaml.
@@ -93,7 +99,7 @@ def check(
93
99
  for cloud_tuple in sorted(clouds_to_check):
94
100
  check_one_cloud(cloud_tuple)
95
101
 
96
- # Cloudflare is not a real cloud in sky_clouds.CLOUD_REGISTRY, and should
102
+ # Cloudflare is not a real cloud in registry.CLOUD_REGISTRY, and should
97
103
  # not be inserted into the DB (otherwise `sky launch` and other code would
98
104
  # error out when it's trying to look it up in the registry).
99
105
  enabled_clouds_set = {
@@ -126,7 +132,7 @@ def check(
126
132
  '\nNote: The following clouds were disabled because they were not '
127
133
  'included in allowed_clouds in ~/.sky/config.yaml: '
128
134
  f'{", ".join([c for c in disallowed_cloud_names])}')
129
- if len(all_enabled_clouds) == 0:
135
+ if not all_enabled_clouds:
130
136
  echo(
131
137
  click.style(
132
138
  'No cloud is enabled. SkyPilot will not be able to run any '
@@ -145,7 +151,7 @@ def check(
145
151
  dim=True) + click.style(f'sky check{clouds_arg}', bold=True) +
146
152
  '\n' + click.style(
147
153
  'If any problems remain, refer to detailed docs at: '
148
- 'https://skypilot.readthedocs.io/en/latest/getting-started/installation.html', # pylint: disable=line-too-long
154
+ 'https://docs.skypilot.co/en/latest/getting-started/installation.html', # pylint: disable=line-too-long
149
155
  dim=True))
150
156
 
151
157
  if disallowed_clouds_hint:
@@ -153,10 +159,14 @@ def check(
153
159
 
154
160
  # Pretty print for UX.
155
161
  if not quiet:
156
- enabled_clouds_str = '\n :heavy_check_mark: '.join(
157
- [''] + sorted(all_enabled_clouds))
158
- rich.print('\n[green]:tada: Enabled clouds :tada:'
159
- f'{enabled_clouds_str}[/green]')
162
+ enabled_clouds_str = '\n ' + '\n '.join([
163
+ _format_enabled_cloud(cloud)
164
+ for cloud in sorted(all_enabled_clouds)
165
+ ])
166
+ echo(f'\n{colorama.Fore.GREEN}{PARTY_POPPER_EMOJI} '
167
+ f'Enabled clouds {PARTY_POPPER_EMOJI}'
168
+ f'{colorama.Style.RESET_ALL}{enabled_clouds_str}')
169
+ return enabled_clouds
160
170
 
161
171
 
162
172
  def get_cached_enabled_clouds_or_refresh(
@@ -194,19 +204,25 @@ def get_cached_enabled_clouds_or_refresh(
194
204
  def get_cloud_credential_file_mounts(
195
205
  excluded_clouds: Optional[Iterable[sky_clouds.Cloud]]
196
206
  ) -> Dict[str, str]:
197
- """Returns the files necessary to access all enabled clouds.
207
+ """Returns the files necessary to access all clouds.
198
208
 
199
209
  Returns a dictionary that will be added to a task's file mounts
200
210
  and a list of patterns that will be excluded (used as rsync_exclude).
201
211
  """
202
- enabled_clouds = get_cached_enabled_clouds_or_refresh()
212
+ # Uploading credentials for all clouds instead of only sky check
213
+ # enabled clouds because users may have partial credentials for some
214
+ # clouds to access their specific resources (e.g. cloud storage) but
215
+ # not have the complete credentials to pass sky check.
216
+ clouds = registry.CLOUD_REGISTRY.values()
203
217
  file_mounts = {}
204
- for cloud in enabled_clouds:
218
+ for cloud in clouds:
205
219
  if (excluded_clouds is not None and
206
220
  sky_clouds.cloud_in_iterable(cloud, excluded_clouds)):
207
221
  continue
208
222
  cloud_file_mounts = cloud.get_credential_file_mounts()
209
- file_mounts.update(cloud_file_mounts)
223
+ for remote_path, local_path in cloud_file_mounts.items():
224
+ if os.path.exists(os.path.expanduser(local_path)):
225
+ file_mounts[remote_path] = local_path
210
226
  # Currently, get_cached_enabled_clouds_or_refresh() does not support r2 as
211
227
  # only clouds with computing instances are marked as enabled by skypilot.
212
228
  # This will be removed when cloudflare/r2 is added as a 'cloud'.
@@ -215,3 +231,36 @@ def get_cloud_credential_file_mounts(
215
231
  r2_credential_mounts = cloudflare.get_credential_file_mounts()
216
232
  file_mounts.update(r2_credential_mounts)
217
233
  return file_mounts
234
+
235
+
236
+ def _format_enabled_cloud(cloud_name: str) -> str:
237
+
238
+ def _green_color(cloud_name: str) -> str:
239
+ return f'{colorama.Fore.GREEN}{cloud_name}{colorama.Style.RESET_ALL}'
240
+
241
+ if cloud_name == repr(sky_clouds.Kubernetes()):
242
+ # Get enabled contexts for Kubernetes
243
+ existing_contexts = sky_clouds.Kubernetes.existing_allowed_contexts()
244
+ if not existing_contexts:
245
+ return _green_color(cloud_name)
246
+
247
+ # Check if allowed_contexts is explicitly set in config
248
+ allowed_contexts = skypilot_config.get_nested(
249
+ ('kubernetes', 'allowed_contexts'), None)
250
+
251
+ # Format the context info with consistent styling
252
+ if allowed_contexts is not None:
253
+ contexts_formatted = []
254
+ for i, context in enumerate(existing_contexts):
255
+ symbol = (ux_utils.INDENT_LAST_SYMBOL
256
+ if i == len(existing_contexts) -
257
+ 1 else ux_utils.INDENT_SYMBOL)
258
+ contexts_formatted.append(f'\n {symbol}{context}')
259
+ context_info = f'Allowed contexts:{"".join(contexts_formatted)}'
260
+ else:
261
+ context_info = f'Active context: {existing_contexts[0]}'
262
+
263
+ return (f'{_green_color(cloud_name)}\n'
264
+ f' {colorama.Style.DIM}{context_info}'
265
+ f'{colorama.Style.RESET_ALL}')
266
+ return _green_color(cloud_name)