skypilot-nightly 1.0.0.dev2024053101__py3-none-any.whl → 1.0.0.dev2025022801__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (299) hide show
  1. sky/__init__.py +64 -32
  2. sky/adaptors/aws.py +23 -6
  3. sky/adaptors/azure.py +432 -15
  4. sky/adaptors/cloudflare.py +5 -5
  5. sky/adaptors/common.py +19 -9
  6. sky/adaptors/do.py +20 -0
  7. sky/adaptors/gcp.py +3 -2
  8. sky/adaptors/kubernetes.py +122 -88
  9. sky/adaptors/nebius.py +100 -0
  10. sky/adaptors/oci.py +39 -1
  11. sky/adaptors/vast.py +29 -0
  12. sky/admin_policy.py +101 -0
  13. sky/authentication.py +117 -98
  14. sky/backends/backend.py +52 -20
  15. sky/backends/backend_utils.py +669 -557
  16. sky/backends/cloud_vm_ray_backend.py +1099 -808
  17. sky/backends/local_docker_backend.py +14 -8
  18. sky/backends/wheel_utils.py +38 -20
  19. sky/benchmark/benchmark_utils.py +22 -23
  20. sky/check.py +76 -27
  21. sky/cli.py +1586 -1139
  22. sky/client/__init__.py +1 -0
  23. sky/client/cli.py +5683 -0
  24. sky/client/common.py +345 -0
  25. sky/client/sdk.py +1765 -0
  26. sky/cloud_stores.py +283 -19
  27. sky/clouds/__init__.py +7 -2
  28. sky/clouds/aws.py +303 -112
  29. sky/clouds/azure.py +185 -179
  30. sky/clouds/cloud.py +115 -37
  31. sky/clouds/cudo.py +29 -22
  32. sky/clouds/do.py +313 -0
  33. sky/clouds/fluidstack.py +44 -54
  34. sky/clouds/gcp.py +206 -65
  35. sky/clouds/ibm.py +26 -21
  36. sky/clouds/kubernetes.py +345 -91
  37. sky/clouds/lambda_cloud.py +40 -29
  38. sky/clouds/nebius.py +297 -0
  39. sky/clouds/oci.py +129 -90
  40. sky/clouds/paperspace.py +22 -18
  41. sky/clouds/runpod.py +53 -34
  42. sky/clouds/scp.py +28 -24
  43. sky/clouds/service_catalog/__init__.py +19 -13
  44. sky/clouds/service_catalog/aws_catalog.py +29 -12
  45. sky/clouds/service_catalog/azure_catalog.py +33 -6
  46. sky/clouds/service_catalog/common.py +95 -75
  47. sky/clouds/service_catalog/constants.py +3 -3
  48. sky/clouds/service_catalog/cudo_catalog.py +13 -3
  49. sky/clouds/service_catalog/data_fetchers/fetch_aws.py +36 -21
  50. sky/clouds/service_catalog/data_fetchers/fetch_azure.py +31 -4
  51. sky/clouds/service_catalog/data_fetchers/fetch_cudo.py +8 -117
  52. sky/clouds/service_catalog/data_fetchers/fetch_fluidstack.py +197 -44
  53. sky/clouds/service_catalog/data_fetchers/fetch_gcp.py +224 -36
  54. sky/clouds/service_catalog/data_fetchers/fetch_lambda_cloud.py +44 -24
  55. sky/clouds/service_catalog/data_fetchers/fetch_vast.py +147 -0
  56. sky/clouds/service_catalog/data_fetchers/fetch_vsphere.py +1 -1
  57. sky/clouds/service_catalog/do_catalog.py +111 -0
  58. sky/clouds/service_catalog/fluidstack_catalog.py +2 -2
  59. sky/clouds/service_catalog/gcp_catalog.py +16 -2
  60. sky/clouds/service_catalog/ibm_catalog.py +2 -2
  61. sky/clouds/service_catalog/kubernetes_catalog.py +192 -70
  62. sky/clouds/service_catalog/lambda_catalog.py +8 -3
  63. sky/clouds/service_catalog/nebius_catalog.py +116 -0
  64. sky/clouds/service_catalog/oci_catalog.py +31 -4
  65. sky/clouds/service_catalog/paperspace_catalog.py +2 -2
  66. sky/clouds/service_catalog/runpod_catalog.py +2 -2
  67. sky/clouds/service_catalog/scp_catalog.py +2 -2
  68. sky/clouds/service_catalog/vast_catalog.py +104 -0
  69. sky/clouds/service_catalog/vsphere_catalog.py +2 -2
  70. sky/clouds/utils/aws_utils.py +65 -0
  71. sky/clouds/utils/azure_utils.py +91 -0
  72. sky/clouds/utils/gcp_utils.py +5 -9
  73. sky/clouds/utils/oci_utils.py +47 -5
  74. sky/clouds/utils/scp_utils.py +4 -3
  75. sky/clouds/vast.py +280 -0
  76. sky/clouds/vsphere.py +22 -18
  77. sky/core.py +361 -107
  78. sky/dag.py +41 -28
  79. sky/data/data_transfer.py +37 -0
  80. sky/data/data_utils.py +211 -32
  81. sky/data/mounting_utils.py +182 -30
  82. sky/data/storage.py +2118 -270
  83. sky/data/storage_utils.py +126 -5
  84. sky/exceptions.py +179 -8
  85. sky/execution.py +158 -85
  86. sky/global_user_state.py +150 -34
  87. sky/jobs/__init__.py +12 -10
  88. sky/jobs/client/__init__.py +0 -0
  89. sky/jobs/client/sdk.py +302 -0
  90. sky/jobs/constants.py +49 -11
  91. sky/jobs/controller.py +161 -99
  92. sky/jobs/dashboard/dashboard.py +171 -25
  93. sky/jobs/dashboard/templates/index.html +572 -60
  94. sky/jobs/recovery_strategy.py +157 -156
  95. sky/jobs/scheduler.py +307 -0
  96. sky/jobs/server/__init__.py +1 -0
  97. sky/jobs/server/core.py +598 -0
  98. sky/jobs/server/dashboard_utils.py +69 -0
  99. sky/jobs/server/server.py +190 -0
  100. sky/jobs/state.py +627 -122
  101. sky/jobs/utils.py +615 -206
  102. sky/models.py +27 -0
  103. sky/optimizer.py +142 -83
  104. sky/provision/__init__.py +20 -5
  105. sky/provision/aws/config.py +124 -42
  106. sky/provision/aws/instance.py +130 -53
  107. sky/provision/azure/__init__.py +7 -0
  108. sky/{skylet/providers → provision}/azure/azure-config-template.json +19 -7
  109. sky/provision/azure/config.py +220 -0
  110. sky/provision/azure/instance.py +1012 -37
  111. sky/provision/common.py +31 -3
  112. sky/provision/constants.py +25 -0
  113. sky/provision/cudo/__init__.py +2 -1
  114. sky/provision/cudo/cudo_utils.py +112 -0
  115. sky/provision/cudo/cudo_wrapper.py +37 -16
  116. sky/provision/cudo/instance.py +28 -12
  117. sky/provision/do/__init__.py +11 -0
  118. sky/provision/do/config.py +14 -0
  119. sky/provision/do/constants.py +10 -0
  120. sky/provision/do/instance.py +287 -0
  121. sky/provision/do/utils.py +301 -0
  122. sky/provision/docker_utils.py +82 -46
  123. sky/provision/fluidstack/fluidstack_utils.py +57 -125
  124. sky/provision/fluidstack/instance.py +15 -43
  125. sky/provision/gcp/config.py +19 -9
  126. sky/provision/gcp/constants.py +7 -1
  127. sky/provision/gcp/instance.py +55 -34
  128. sky/provision/gcp/instance_utils.py +339 -80
  129. sky/provision/gcp/mig_utils.py +210 -0
  130. sky/provision/instance_setup.py +172 -133
  131. sky/provision/kubernetes/__init__.py +1 -0
  132. sky/provision/kubernetes/config.py +104 -90
  133. sky/provision/kubernetes/constants.py +8 -0
  134. sky/provision/kubernetes/instance.py +680 -325
  135. sky/provision/kubernetes/manifests/smarter-device-manager-daemonset.yaml +3 -0
  136. sky/provision/kubernetes/network.py +54 -20
  137. sky/provision/kubernetes/network_utils.py +70 -21
  138. sky/provision/kubernetes/utils.py +1370 -251
  139. sky/provision/lambda_cloud/__init__.py +11 -0
  140. sky/provision/lambda_cloud/config.py +10 -0
  141. sky/provision/lambda_cloud/instance.py +265 -0
  142. sky/{clouds/utils → provision/lambda_cloud}/lambda_utils.py +24 -23
  143. sky/provision/logging.py +1 -1
  144. sky/provision/nebius/__init__.py +11 -0
  145. sky/provision/nebius/config.py +11 -0
  146. sky/provision/nebius/instance.py +285 -0
  147. sky/provision/nebius/utils.py +318 -0
  148. sky/provision/oci/__init__.py +15 -0
  149. sky/provision/oci/config.py +51 -0
  150. sky/provision/oci/instance.py +436 -0
  151. sky/provision/oci/query_utils.py +681 -0
  152. sky/provision/paperspace/constants.py +6 -0
  153. sky/provision/paperspace/instance.py +4 -3
  154. sky/provision/paperspace/utils.py +2 -0
  155. sky/provision/provisioner.py +207 -130
  156. sky/provision/runpod/__init__.py +1 -0
  157. sky/provision/runpod/api/__init__.py +3 -0
  158. sky/provision/runpod/api/commands.py +119 -0
  159. sky/provision/runpod/api/pods.py +142 -0
  160. sky/provision/runpod/instance.py +64 -8
  161. sky/provision/runpod/utils.py +239 -23
  162. sky/provision/vast/__init__.py +10 -0
  163. sky/provision/vast/config.py +11 -0
  164. sky/provision/vast/instance.py +247 -0
  165. sky/provision/vast/utils.py +162 -0
  166. sky/provision/vsphere/common/vim_utils.py +1 -1
  167. sky/provision/vsphere/instance.py +8 -18
  168. sky/provision/vsphere/vsphere_utils.py +1 -1
  169. sky/resources.py +247 -102
  170. sky/serve/__init__.py +9 -9
  171. sky/serve/autoscalers.py +361 -299
  172. sky/serve/client/__init__.py +0 -0
  173. sky/serve/client/sdk.py +366 -0
  174. sky/serve/constants.py +12 -3
  175. sky/serve/controller.py +106 -36
  176. sky/serve/load_balancer.py +63 -12
  177. sky/serve/load_balancing_policies.py +84 -2
  178. sky/serve/replica_managers.py +42 -34
  179. sky/serve/serve_state.py +62 -32
  180. sky/serve/serve_utils.py +271 -160
  181. sky/serve/server/__init__.py +0 -0
  182. sky/serve/{core.py → server/core.py} +271 -90
  183. sky/serve/server/server.py +112 -0
  184. sky/serve/service.py +52 -16
  185. sky/serve/service_spec.py +95 -32
  186. sky/server/__init__.py +1 -0
  187. sky/server/common.py +430 -0
  188. sky/server/constants.py +21 -0
  189. sky/server/html/log.html +174 -0
  190. sky/server/requests/__init__.py +0 -0
  191. sky/server/requests/executor.py +472 -0
  192. sky/server/requests/payloads.py +487 -0
  193. sky/server/requests/queues/__init__.py +0 -0
  194. sky/server/requests/queues/mp_queue.py +76 -0
  195. sky/server/requests/requests.py +567 -0
  196. sky/server/requests/serializers/__init__.py +0 -0
  197. sky/server/requests/serializers/decoders.py +192 -0
  198. sky/server/requests/serializers/encoders.py +166 -0
  199. sky/server/server.py +1106 -0
  200. sky/server/stream_utils.py +141 -0
  201. sky/setup_files/MANIFEST.in +2 -5
  202. sky/setup_files/dependencies.py +159 -0
  203. sky/setup_files/setup.py +14 -125
  204. sky/sky_logging.py +59 -14
  205. sky/skylet/autostop_lib.py +2 -2
  206. sky/skylet/constants.py +183 -50
  207. sky/skylet/events.py +22 -10
  208. sky/skylet/job_lib.py +403 -258
  209. sky/skylet/log_lib.py +111 -71
  210. sky/skylet/log_lib.pyi +6 -0
  211. sky/skylet/providers/command_runner.py +6 -8
  212. sky/skylet/providers/ibm/node_provider.py +2 -2
  213. sky/skylet/providers/scp/config.py +11 -3
  214. sky/skylet/providers/scp/node_provider.py +8 -8
  215. sky/skylet/skylet.py +3 -1
  216. sky/skylet/subprocess_daemon.py +69 -17
  217. sky/skypilot_config.py +119 -57
  218. sky/task.py +205 -64
  219. sky/templates/aws-ray.yml.j2 +37 -7
  220. sky/templates/azure-ray.yml.j2 +27 -82
  221. sky/templates/cudo-ray.yml.j2 +7 -3
  222. sky/templates/do-ray.yml.j2 +98 -0
  223. sky/templates/fluidstack-ray.yml.j2 +7 -4
  224. sky/templates/gcp-ray.yml.j2 +26 -6
  225. sky/templates/ibm-ray.yml.j2 +3 -2
  226. sky/templates/jobs-controller.yaml.j2 +46 -11
  227. sky/templates/kubernetes-ingress.yml.j2 +7 -0
  228. sky/templates/kubernetes-loadbalancer.yml.j2 +7 -0
  229. sky/templates/{kubernetes-port-forward-proxy-command.sh.j2 → kubernetes-port-forward-proxy-command.sh} +51 -7
  230. sky/templates/kubernetes-ray.yml.j2 +292 -25
  231. sky/templates/lambda-ray.yml.j2 +30 -40
  232. sky/templates/nebius-ray.yml.j2 +79 -0
  233. sky/templates/oci-ray.yml.j2 +18 -57
  234. sky/templates/paperspace-ray.yml.j2 +10 -6
  235. sky/templates/runpod-ray.yml.j2 +26 -4
  236. sky/templates/scp-ray.yml.j2 +3 -2
  237. sky/templates/sky-serve-controller.yaml.j2 +12 -1
  238. sky/templates/skypilot-server-kubernetes-proxy.sh +36 -0
  239. sky/templates/vast-ray.yml.j2 +70 -0
  240. sky/templates/vsphere-ray.yml.j2 +8 -3
  241. sky/templates/websocket_proxy.py +64 -0
  242. sky/usage/constants.py +10 -1
  243. sky/usage/usage_lib.py +130 -37
  244. sky/utils/accelerator_registry.py +35 -51
  245. sky/utils/admin_policy_utils.py +147 -0
  246. sky/utils/annotations.py +51 -0
  247. sky/utils/cli_utils/status_utils.py +81 -23
  248. sky/utils/cluster_utils.py +356 -0
  249. sky/utils/command_runner.py +452 -89
  250. sky/utils/command_runner.pyi +77 -3
  251. sky/utils/common.py +54 -0
  252. sky/utils/common_utils.py +319 -108
  253. sky/utils/config_utils.py +204 -0
  254. sky/utils/control_master_utils.py +48 -0
  255. sky/utils/controller_utils.py +548 -266
  256. sky/utils/dag_utils.py +93 -32
  257. sky/utils/db_utils.py +18 -4
  258. sky/utils/env_options.py +29 -7
  259. sky/utils/kubernetes/create_cluster.sh +8 -60
  260. sky/utils/kubernetes/deploy_remote_cluster.sh +243 -0
  261. sky/utils/kubernetes/exec_kubeconfig_converter.py +73 -0
  262. sky/utils/kubernetes/generate_kubeconfig.sh +336 -0
  263. sky/utils/kubernetes/gpu_labeler.py +4 -4
  264. sky/utils/kubernetes/k8s_gpu_labeler_job.yaml +4 -3
  265. sky/utils/kubernetes/kubernetes_deploy_utils.py +228 -0
  266. sky/utils/kubernetes/rsync_helper.sh +24 -0
  267. sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +1 -1
  268. sky/utils/log_utils.py +240 -33
  269. sky/utils/message_utils.py +81 -0
  270. sky/utils/registry.py +127 -0
  271. sky/utils/resources_utils.py +94 -22
  272. sky/utils/rich_utils.py +247 -18
  273. sky/utils/schemas.py +284 -64
  274. sky/{status_lib.py → utils/status_lib.py} +12 -7
  275. sky/utils/subprocess_utils.py +212 -46
  276. sky/utils/timeline.py +12 -7
  277. sky/utils/ux_utils.py +168 -15
  278. skypilot_nightly-1.0.0.dev2025022801.dist-info/METADATA +363 -0
  279. skypilot_nightly-1.0.0.dev2025022801.dist-info/RECORD +352 -0
  280. {skypilot_nightly-1.0.0.dev2024053101.dist-info → skypilot_nightly-1.0.0.dev2025022801.dist-info}/WHEEL +1 -1
  281. sky/clouds/cloud_registry.py +0 -31
  282. sky/jobs/core.py +0 -330
  283. sky/skylet/providers/azure/__init__.py +0 -2
  284. sky/skylet/providers/azure/azure-vm-template.json +0 -301
  285. sky/skylet/providers/azure/config.py +0 -170
  286. sky/skylet/providers/azure/node_provider.py +0 -466
  287. sky/skylet/providers/lambda_cloud/__init__.py +0 -2
  288. sky/skylet/providers/lambda_cloud/node_provider.py +0 -320
  289. sky/skylet/providers/oci/__init__.py +0 -2
  290. sky/skylet/providers/oci/node_provider.py +0 -488
  291. sky/skylet/providers/oci/query_helper.py +0 -383
  292. sky/skylet/providers/oci/utils.py +0 -21
  293. sky/utils/cluster_yaml_utils.py +0 -24
  294. sky/utils/kubernetes/generate_static_kubeconfig.sh +0 -137
  295. skypilot_nightly-1.0.0.dev2024053101.dist-info/METADATA +0 -315
  296. skypilot_nightly-1.0.0.dev2024053101.dist-info/RECORD +0 -275
  297. {skypilot_nightly-1.0.0.dev2024053101.dist-info → skypilot_nightly-1.0.0.dev2025022801.dist-info}/LICENSE +0 -0
  298. {skypilot_nightly-1.0.0.dev2024053101.dist-info → skypilot_nightly-1.0.0.dev2025022801.dist-info}/entry_points.txt +0 -0
  299. {skypilot_nightly-1.0.0.dev2024053101.dist-info → skypilot_nightly-1.0.0.dev2025022801.dist-info}/top_level.txt +0 -0
sky/cloud_stores.py CHANGED
@@ -7,15 +7,26 @@ TODO:
7
7
  * Better interface.
8
8
  * Better implementation (e.g., fsspec, smart_open, using each cloud's SDK).
9
9
  """
10
+ import os
11
+ import shlex
10
12
  import subprocess
13
+ import time
11
14
  import urllib.parse
12
15
 
16
+ from sky import exceptions as sky_exceptions
17
+ from sky import sky_logging
13
18
  from sky.adaptors import aws
19
+ from sky.adaptors import azure
14
20
  from sky.adaptors import cloudflare
15
21
  from sky.adaptors import ibm
22
+ from sky.adaptors import oci
16
23
  from sky.clouds import gcp
17
24
  from sky.data import data_utils
18
25
  from sky.data.data_utils import Rclone
26
+ from sky.skylet import constants
27
+ from sky.utils import ux_utils
28
+
29
+ logger = sky_logging.init_logger(__name__)
19
30
 
20
31
 
21
32
  class CloudStorage:
@@ -43,7 +54,8 @@ class S3CloudStorage(CloudStorage):
43
54
 
44
55
  # List of commands to install AWS CLI
45
56
  _GET_AWSCLI = [
46
- 'aws --version >/dev/null 2>&1 || pip3 install awscli',
57
+ 'aws --version >/dev/null 2>&1 || '
58
+ f'{constants.SKY_UV_PIP_CMD} install awscli',
47
59
  ]
48
60
 
49
61
  def is_directory(self, url: str) -> bool:
@@ -73,7 +85,8 @@ class S3CloudStorage(CloudStorage):
73
85
  # AWS Sync by default uses 10 threads to upload files to the bucket.
74
86
  # To increase parallelism, modify max_concurrent_requests in your
75
87
  # aws config file (Default path: ~/.aws/config).
76
- download_via_awscli = ('aws s3 sync --no-follow-symlinks '
88
+ download_via_awscli = (f'{constants.SKY_REMOTE_PYTHON_ENV}/bin/aws s3 '
89
+ 'sync --no-follow-symlinks '
77
90
  f'{source} {destination}')
78
91
 
79
92
  all_commands = list(self._GET_AWSCLI)
@@ -82,7 +95,8 @@ class S3CloudStorage(CloudStorage):
82
95
 
83
96
  def make_sync_file_command(self, source: str, destination: str) -> str:
84
97
  """Downloads a file using AWS CLI."""
85
- download_via_awscli = f'aws s3 cp {source} {destination}'
98
+ download_via_awscli = (f'{constants.SKY_REMOTE_PYTHON_ENV}/bin/aws s3 '
99
+ f'cp {source} {destination}')
86
100
 
87
101
  all_commands = list(self._GET_AWSCLI)
88
102
  all_commands.append(download_via_awscli)
@@ -102,8 +116,16 @@ class GcsCloudStorage(CloudStorage):
102
116
  @property
103
117
  def _gsutil_command(self):
104
118
  gsutil_alias, alias_gen = data_utils.get_gsutil_command()
105
- return (f'{alias_gen}; GOOGLE_APPLICATION_CREDENTIALS='
106
- f'{gcp.DEFAULT_GCP_APPLICATION_CREDENTIAL_PATH} {gsutil_alias}')
119
+ return (
120
+ f'{alias_gen}; GOOGLE_APPLICATION_CREDENTIALS='
121
+ f'{gcp.DEFAULT_GCP_APPLICATION_CREDENTIAL_PATH}; '
122
+ # Explicitly activate service account. Unlike the gcp packages
123
+ # and other GCP commands, gsutil does not automatically pick up
124
+ # the default credential keys when it is a service account.
125
+ 'gcloud auth activate-service-account '
126
+ '--key-file=$GOOGLE_APPLICATION_CREDENTIALS '
127
+ '2> /dev/null || true; '
128
+ f'{gsutil_alias}')
107
129
 
108
130
  def is_directory(self, url: str) -> bool:
109
131
  """Returns whether 'url' is a directory.
@@ -124,7 +146,7 @@ class GcsCloudStorage(CloudStorage):
124
146
  # If <url> is a bucket root, then we only need `gsutil` to succeed
125
147
  # to make sure the bucket exists. It is already a directory.
126
148
  _, key = data_utils.split_gcs_path(url)
127
- if len(key) == 0:
149
+ if not key:
128
150
  return True
129
151
  # Otherwise, gsutil ls -d url will return:
130
152
  # --> url.rstrip('/') if url is not a directory
@@ -153,12 +175,190 @@ class GcsCloudStorage(CloudStorage):
153
175
  return ' && '.join(all_commands)
154
176
 
155
177
 
178
+ class AzureBlobCloudStorage(CloudStorage):
179
+ """Azure Blob Storage."""
180
+ # AzCopy is utilized for downloading data from Azure Blob Storage
181
+ # containers to remote systems due to its superior performance compared to
182
+ # az-cli. While az-cli's `az storage blob sync` can synchronize data from
183
+ # local to container, it lacks support to sync from container to remote
184
+ # synchronization. Moreover, `az storage blob download-batch` in az-cli
185
+ # does not leverage AzCopy's efficient multi-threaded capabilities, leading
186
+ # to slower performance.
187
+ #
188
+ # AzCopy requires appending SAS tokens directly in commands, as it does not
189
+ # support using STORAGE_ACCOUNT_KEY, unlike az-cli, which can generate
190
+ # SAS tokens but lacks direct multi-threading support like AzCopy.
191
+ # Hence, az-cli for SAS token generation is ran on the local machine and
192
+ # AzCopy is installed at the remote machine for efficient data transfer
193
+ # from containers to remote systems.
194
+ # Note that on Azure instances, both az-cli and AzCopy are typically
195
+ # pre-installed. And installing both would be used with AZ container is
196
+ # used from non-Azure instances.
197
+
198
+ _GET_AZCOPY = [
199
+ 'azcopy --version > /dev/null 2>&1 || '
200
+ '(mkdir -p /usr/local/bin; '
201
+ 'curl -L https://aka.ms/downloadazcopy-v10-linux -o azcopy.tar.gz; '
202
+ 'sudo tar -xvzf azcopy.tar.gz --strip-components=1 -C /usr/local/bin --exclude=*.txt; ' # pylint: disable=line-too-long
203
+ 'sudo chmod +x /usr/local/bin/azcopy; '
204
+ 'rm azcopy.tar.gz)'
205
+ ]
206
+
207
+ def is_directory(self, url: str) -> bool:
208
+ """Returns whether 'url' of the AZ Container is a directory.
209
+
210
+ In cloud object stores, a "directory" refers to a regular object whose
211
+ name is a prefix of other objects.
212
+
213
+ Args:
214
+ url: Endpoint url of the container/blob.
215
+
216
+ Returns:
217
+ True if the url is an endpoint of a directory and False if it
218
+ is a blob(file).
219
+
220
+ Raises:
221
+ azure.core.exceptions.HttpResponseError: If the user's Azure
222
+ Azure account does not have sufficient IAM role for the given
223
+ storage account.
224
+ StorageBucketGetError: Provided container name does not exist.
225
+ TimeoutError: If unable to determine the container path status
226
+ in time.
227
+ """
228
+ storage_account_name, container_name, path = data_utils.split_az_path(
229
+ url)
230
+
231
+ # If there are more, we need to check if it is a directory or a file.
232
+ container_url = data_utils.AZURE_CONTAINER_URL.format(
233
+ storage_account_name=storage_account_name,
234
+ container_name=container_name)
235
+ resource_group_name = azure.get_az_resource_group(storage_account_name)
236
+ role_assignment_start = time.time()
237
+ refresh_client = False
238
+ role_assigned = False
239
+
240
+ # 1. List blobs in the container_url to decide wether it is a directory
241
+ # 2. If it fails due to permission issues, try to assign a permissive
242
+ # role for the storage account to the current Azure account
243
+ # 3. Wait for the role assignment to propagate and retry.
244
+ while (time.time() - role_assignment_start <
245
+ constants.WAIT_FOR_STORAGE_ACCOUNT_ROLE_ASSIGNMENT):
246
+ container_client = data_utils.create_az_client(
247
+ client_type='container',
248
+ container_url=container_url,
249
+ storage_account_name=storage_account_name,
250
+ resource_group_name=resource_group_name,
251
+ refresh_client=refresh_client)
252
+
253
+ if not container_client.exists():
254
+ with ux_utils.print_exception_no_traceback():
255
+ raise sky_exceptions.StorageBucketGetError(
256
+ f'The provided container {container_name!r} from the '
257
+ f'passed endpoint url {url!r} does not exist. Please '
258
+ 'check if the name is correct.')
259
+
260
+ # If there aren't more than just container name and storage account,
261
+ # that's a directory.
262
+ # Note: This must be ran after existence of the storage account is
263
+ # checked while obtaining container client.
264
+ if not path:
265
+ return True
266
+
267
+ num_objects = 0
268
+ try:
269
+ for blob in container_client.list_blobs(name_starts_with=path):
270
+ if blob.name == path:
271
+ return False
272
+ num_objects += 1
273
+ if num_objects > 1:
274
+ return True
275
+ # A directory with few or no items
276
+ return True
277
+ except azure.exceptions().HttpResponseError as e:
278
+ # Handle case where user lacks sufficient IAM role for
279
+ # a private container in the same subscription. Attempt to
280
+ # assign appropriate role to current user.
281
+ if 'AuthorizationPermissionMismatch' in str(e):
282
+ if not role_assigned:
283
+ logger.info('Failed to list blobs in container '
284
+ f'{container_url!r}. This implies '
285
+ 'insufficient IAM role for storage account'
286
+ f' {storage_account_name!r}.')
287
+ azure.assign_storage_account_iam_role(
288
+ storage_account_name=storage_account_name,
289
+ resource_group_name=resource_group_name)
290
+ role_assigned = True
291
+ refresh_client = True
292
+ else:
293
+ logger.info(
294
+ 'Waiting due to the propagation delay of IAM '
295
+ 'role assignment to the storage account '
296
+ f'{storage_account_name!r}.')
297
+ time.sleep(
298
+ constants.RETRY_INTERVAL_AFTER_ROLE_ASSIGNMENT)
299
+ continue
300
+ raise
301
+ else:
302
+ raise TimeoutError(
303
+ 'Failed to determine the container path status within '
304
+ f'{constants.WAIT_FOR_STORAGE_ACCOUNT_ROLE_ASSIGNMENT}'
305
+ 'seconds.')
306
+
307
+ def _get_azcopy_source(self, source: str, is_dir: bool) -> str:
308
+ """Converts the source so it can be used as an argument for azcopy."""
309
+ storage_account_name, container_name, blob_path = (
310
+ data_utils.split_az_path(source))
311
+ storage_account_key = data_utils.get_az_storage_account_key(
312
+ storage_account_name)
313
+
314
+ if storage_account_key is None:
315
+ # public containers do not require SAS token for access
316
+ sas_token = ''
317
+ else:
318
+ if is_dir:
319
+ sas_token = azure.get_az_container_sas_token(
320
+ storage_account_name, storage_account_key, container_name)
321
+ else:
322
+ sas_token = azure.get_az_blob_sas_token(storage_account_name,
323
+ storage_account_key,
324
+ container_name,
325
+ blob_path)
326
+ # "?" is a delimiter character used when SAS token is attached to the
327
+ # container endpoint.
328
+ # Reference: https://learn.microsoft.com/en-us/azure/ai-services/translator/document-translation/how-to-guides/create-sas-tokens?tabs=Containers # pylint: disable=line-too-long
329
+ converted_source = f'{source}?{sas_token}' if sas_token else source
330
+
331
+ return shlex.quote(converted_source)
332
+
333
+ def make_sync_dir_command(self, source: str, destination: str) -> str:
334
+ """Fetches a directory using AZCOPY from storage to remote instance."""
335
+ source = self._get_azcopy_source(source, is_dir=True)
336
+ # destination is guaranteed to not have '/' at the end of the string
337
+ # by tasks.py::set_file_mounts(). It is necessary to add from this
338
+ # method due to syntax of azcopy.
339
+ destination = f'{destination}/'
340
+ download_command = (f'azcopy sync {source} {destination} '
341
+ '--recursive --delete-destination=false')
342
+ all_commands = list(self._GET_AZCOPY)
343
+ all_commands.append(download_command)
344
+ return ' && '.join(all_commands)
345
+
346
+ def make_sync_file_command(self, source: str, destination: str) -> str:
347
+ """Fetches a file using AZCOPY from storage to remote instance."""
348
+ source = self._get_azcopy_source(source, is_dir=False)
349
+ download_command = f'azcopy copy {source} {destination}'
350
+ all_commands = list(self._GET_AZCOPY)
351
+ all_commands.append(download_command)
352
+ return ' && '.join(all_commands)
353
+
354
+
156
355
  class R2CloudStorage(CloudStorage):
157
356
  """Cloudflare Cloud Storage."""
158
357
 
159
358
  # List of commands to install AWS CLI
160
359
  _GET_AWSCLI = [
161
- 'aws --version >/dev/null 2>&1 || pip3 install awscli',
360
+ 'aws --version >/dev/null 2>&1 || '
361
+ f'{constants.SKY_UV_PIP_CMD} install awscli',
162
362
  ]
163
363
 
164
364
  def is_directory(self, url: str) -> bool:
@@ -193,7 +393,8 @@ class R2CloudStorage(CloudStorage):
193
393
  source = source.replace('r2://', 's3://')
194
394
  download_via_awscli = ('AWS_SHARED_CREDENTIALS_FILE='
195
395
  f'{cloudflare.R2_CREDENTIALS_PATH} '
196
- 'aws s3 sync --no-follow-symlinks '
396
+ f'{constants.SKY_REMOTE_PYTHON_ENV}/bin/aws s3 '
397
+ 'sync --no-follow-symlinks '
197
398
  f'{source} {destination} '
198
399
  f'--endpoint {endpoint_url} '
199
400
  f'--profile={cloudflare.R2_PROFILE_NAME}')
@@ -209,7 +410,8 @@ class R2CloudStorage(CloudStorage):
209
410
  source = source.replace('r2://', 's3://')
210
411
  download_via_awscli = ('AWS_SHARED_CREDENTIALS_FILE='
211
412
  f'{cloudflare.R2_CREDENTIALS_PATH} '
212
- f'aws s3 cp {source} {destination} '
413
+ f'{constants.SKY_REMOTE_PYTHON_ENV}/bin/aws s3 '
414
+ f'cp {source} {destination} '
213
415
  f'--endpoint {endpoint_url} '
214
416
  f'--profile={cloudflare.R2_PROFILE_NAME}')
215
417
 
@@ -218,16 +420,6 @@ class R2CloudStorage(CloudStorage):
218
420
  return ' && '.join(all_commands)
219
421
 
220
422
 
221
- def get_storage_from_path(url: str) -> CloudStorage:
222
- """Returns a CloudStorage by identifying the scheme:// in a URL."""
223
- result = urllib.parse.urlsplit(url)
224
-
225
- if result.scheme not in _REGISTRY:
226
- assert False, (f'Scheme {result.scheme} not found in'
227
- f' supported storage ({_REGISTRY.keys()}); path {url}')
228
- return _REGISTRY[result.scheme]
229
-
230
-
231
423
  class IBMCosCloudStorage(CloudStorage):
232
424
  """IBM Cloud Storage."""
233
425
  # install rclone if package isn't already installed
@@ -294,10 +486,82 @@ class IBMCosCloudStorage(CloudStorage):
294
486
  return self.make_sync_dir_command(source, destination)
295
487
 
296
488
 
489
+ class OciCloudStorage(CloudStorage):
490
+ """OCI Cloud Storage."""
491
+
492
+ def is_directory(self, url: str) -> bool:
493
+ """Returns whether OCI 'url' is a directory.
494
+ In cloud object stores, a "directory" refers to a regular object whose
495
+ name is a prefix of other objects.
496
+ """
497
+ bucket_name, path = data_utils.split_oci_path(url)
498
+
499
+ client = oci.get_object_storage_client()
500
+ namespace = client.get_namespace(
501
+ compartment_id=oci.get_oci_config()['tenancy']).data
502
+
503
+ objects = client.list_objects(namespace_name=namespace,
504
+ bucket_name=bucket_name,
505
+ prefix=path).data.objects
506
+
507
+ if len(objects) == 0:
508
+ # A directory with few or no items
509
+ return True
510
+
511
+ if len(objects) > 1:
512
+ # A directory with more than 1 items
513
+ return True
514
+
515
+ object_name = objects[0].name
516
+ if path.endswith(object_name):
517
+ # An object path
518
+ return False
519
+
520
+ # A directory with only 1 item
521
+ return True
522
+
523
+ @oci.with_oci_env
524
+ def make_sync_dir_command(self, source: str, destination: str) -> str:
525
+ """Downloads using OCI CLI."""
526
+ bucket_name, path = data_utils.split_oci_path(source)
527
+
528
+ download_via_ocicli = (f'oci os object sync --no-follow-symlinks '
529
+ f'--bucket-name {bucket_name} '
530
+ f'--prefix "{path}" --dest-dir "{destination}"')
531
+
532
+ return download_via_ocicli
533
+
534
+ @oci.with_oci_env
535
+ def make_sync_file_command(self, source: str, destination: str) -> str:
536
+ """Downloads a file using OCI CLI."""
537
+ bucket_name, path = data_utils.split_oci_path(source)
538
+ filename = os.path.basename(path)
539
+ destination = os.path.join(destination, filename)
540
+
541
+ download_via_ocicli = (f'oci os object get --bucket-name {bucket_name} '
542
+ f'--name "{path}" --file "{destination}"')
543
+
544
+ return download_via_ocicli
545
+
546
+
547
+ def get_storage_from_path(url: str) -> CloudStorage:
548
+ """Returns a CloudStorage by identifying the scheme:// in a URL."""
549
+ result = urllib.parse.urlsplit(url)
550
+ if result.scheme not in _REGISTRY:
551
+ assert False, (f'Scheme {result.scheme} not found in'
552
+ f' supported storage ({_REGISTRY.keys()}); path {url}')
553
+ return _REGISTRY[result.scheme]
554
+
555
+
297
556
  # Maps bucket's URIs prefix(scheme) to its corresponding storage class
298
557
  _REGISTRY = {
299
558
  'gs': GcsCloudStorage(),
300
559
  's3': S3CloudStorage(),
301
560
  'r2': R2CloudStorage(),
302
561
  'cos': IBMCosCloudStorage(),
562
+ 'oci': OciCloudStorage(),
563
+ # TODO: This is a hack, as Azure URL starts with https://, we should
564
+ # refactor the registry to be able to take regex, so that Azure blob can
565
+ # be identified with `https://(.*?)\.blob\.core\.windows\.net`
566
+ 'https': AzureBlobCloudStorage()
303
567
  }
sky/clouds/__init__.py CHANGED
@@ -3,26 +3,29 @@
3
3
  from sky.clouds.cloud import Cloud
4
4
  from sky.clouds.cloud import cloud_in_iterable
5
5
  from sky.clouds.cloud import CloudImplementationFeatures
6
+ from sky.clouds.cloud import OpenPortsVersion
6
7
  from sky.clouds.cloud import ProvisionerVersion
7
8
  from sky.clouds.cloud import Region
8
9
  from sky.clouds.cloud import StatusVersion
9
10
  from sky.clouds.cloud import Zone
10
- from sky.clouds.cloud_registry import CLOUD_REGISTRY
11
11
 
12
12
  # NOTE: import the above first to avoid circular imports.
13
13
  # isort: split
14
14
  from sky.clouds.aws import AWS
15
15
  from sky.clouds.azure import Azure
16
16
  from sky.clouds.cudo import Cudo
17
+ from sky.clouds.do import DO
17
18
  from sky.clouds.fluidstack import Fluidstack
18
19
  from sky.clouds.gcp import GCP
19
20
  from sky.clouds.ibm import IBM
20
21
  from sky.clouds.kubernetes import Kubernetes
21
22
  from sky.clouds.lambda_cloud import Lambda
23
+ from sky.clouds.nebius import Nebius
22
24
  from sky.clouds.oci import OCI
23
25
  from sky.clouds.paperspace import Paperspace
24
26
  from sky.clouds.runpod import RunPod
25
27
  from sky.clouds.scp import SCP
28
+ from sky.clouds.vast import Vast
26
29
  from sky.clouds.vsphere import Vsphere
27
30
 
28
31
  __all__ = [
@@ -33,19 +36,21 @@ __all__ = [
33
36
  'Cudo',
34
37
  'GCP',
35
38
  'Lambda',
39
+ 'DO',
36
40
  'Paperspace',
37
41
  'SCP',
38
42
  'RunPod',
43
+ 'Vast',
39
44
  'OCI',
40
45
  'Vsphere',
41
46
  'Kubernetes',
42
47
  'CloudImplementationFeatures',
43
48
  'Region',
44
49
  'Zone',
45
- 'CLOUD_REGISTRY',
46
50
  'ProvisionerVersion',
47
51
  'StatusVersion',
48
52
  'Fluidstack',
53
+ 'Nebius',
49
54
  # Utility functions
50
55
  'cloud_in_iterable',
51
56
  ]