skypilot-nightly 1.0.0.dev2024053101__py3-none-any.whl → 1.0.0.dev2025022801__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +64 -32
- sky/adaptors/aws.py +23 -6
- sky/adaptors/azure.py +432 -15
- sky/adaptors/cloudflare.py +5 -5
- sky/adaptors/common.py +19 -9
- sky/adaptors/do.py +20 -0
- sky/adaptors/gcp.py +3 -2
- sky/adaptors/kubernetes.py +122 -88
- sky/adaptors/nebius.py +100 -0
- sky/adaptors/oci.py +39 -1
- sky/adaptors/vast.py +29 -0
- sky/admin_policy.py +101 -0
- sky/authentication.py +117 -98
- sky/backends/backend.py +52 -20
- sky/backends/backend_utils.py +669 -557
- sky/backends/cloud_vm_ray_backend.py +1099 -808
- sky/backends/local_docker_backend.py +14 -8
- sky/backends/wheel_utils.py +38 -20
- sky/benchmark/benchmark_utils.py +22 -23
- sky/check.py +76 -27
- sky/cli.py +1586 -1139
- sky/client/__init__.py +1 -0
- sky/client/cli.py +5683 -0
- sky/client/common.py +345 -0
- sky/client/sdk.py +1765 -0
- sky/cloud_stores.py +283 -19
- sky/clouds/__init__.py +7 -2
- sky/clouds/aws.py +303 -112
- sky/clouds/azure.py +185 -179
- sky/clouds/cloud.py +115 -37
- sky/clouds/cudo.py +29 -22
- sky/clouds/do.py +313 -0
- sky/clouds/fluidstack.py +44 -54
- sky/clouds/gcp.py +206 -65
- sky/clouds/ibm.py +26 -21
- sky/clouds/kubernetes.py +345 -91
- sky/clouds/lambda_cloud.py +40 -29
- sky/clouds/nebius.py +297 -0
- sky/clouds/oci.py +129 -90
- sky/clouds/paperspace.py +22 -18
- sky/clouds/runpod.py +53 -34
- sky/clouds/scp.py +28 -24
- sky/clouds/service_catalog/__init__.py +19 -13
- sky/clouds/service_catalog/aws_catalog.py +29 -12
- sky/clouds/service_catalog/azure_catalog.py +33 -6
- sky/clouds/service_catalog/common.py +95 -75
- sky/clouds/service_catalog/constants.py +3 -3
- sky/clouds/service_catalog/cudo_catalog.py +13 -3
- sky/clouds/service_catalog/data_fetchers/fetch_aws.py +36 -21
- sky/clouds/service_catalog/data_fetchers/fetch_azure.py +31 -4
- sky/clouds/service_catalog/data_fetchers/fetch_cudo.py +8 -117
- sky/clouds/service_catalog/data_fetchers/fetch_fluidstack.py +197 -44
- sky/clouds/service_catalog/data_fetchers/fetch_gcp.py +224 -36
- sky/clouds/service_catalog/data_fetchers/fetch_lambda_cloud.py +44 -24
- sky/clouds/service_catalog/data_fetchers/fetch_vast.py +147 -0
- sky/clouds/service_catalog/data_fetchers/fetch_vsphere.py +1 -1
- sky/clouds/service_catalog/do_catalog.py +111 -0
- sky/clouds/service_catalog/fluidstack_catalog.py +2 -2
- sky/clouds/service_catalog/gcp_catalog.py +16 -2
- sky/clouds/service_catalog/ibm_catalog.py +2 -2
- sky/clouds/service_catalog/kubernetes_catalog.py +192 -70
- sky/clouds/service_catalog/lambda_catalog.py +8 -3
- sky/clouds/service_catalog/nebius_catalog.py +116 -0
- sky/clouds/service_catalog/oci_catalog.py +31 -4
- sky/clouds/service_catalog/paperspace_catalog.py +2 -2
- sky/clouds/service_catalog/runpod_catalog.py +2 -2
- sky/clouds/service_catalog/scp_catalog.py +2 -2
- sky/clouds/service_catalog/vast_catalog.py +104 -0
- sky/clouds/service_catalog/vsphere_catalog.py +2 -2
- sky/clouds/utils/aws_utils.py +65 -0
- sky/clouds/utils/azure_utils.py +91 -0
- sky/clouds/utils/gcp_utils.py +5 -9
- sky/clouds/utils/oci_utils.py +47 -5
- sky/clouds/utils/scp_utils.py +4 -3
- sky/clouds/vast.py +280 -0
- sky/clouds/vsphere.py +22 -18
- sky/core.py +361 -107
- sky/dag.py +41 -28
- sky/data/data_transfer.py +37 -0
- sky/data/data_utils.py +211 -32
- sky/data/mounting_utils.py +182 -30
- sky/data/storage.py +2118 -270
- sky/data/storage_utils.py +126 -5
- sky/exceptions.py +179 -8
- sky/execution.py +158 -85
- sky/global_user_state.py +150 -34
- sky/jobs/__init__.py +12 -10
- sky/jobs/client/__init__.py +0 -0
- sky/jobs/client/sdk.py +302 -0
- sky/jobs/constants.py +49 -11
- sky/jobs/controller.py +161 -99
- sky/jobs/dashboard/dashboard.py +171 -25
- sky/jobs/dashboard/templates/index.html +572 -60
- sky/jobs/recovery_strategy.py +157 -156
- sky/jobs/scheduler.py +307 -0
- sky/jobs/server/__init__.py +1 -0
- sky/jobs/server/core.py +598 -0
- sky/jobs/server/dashboard_utils.py +69 -0
- sky/jobs/server/server.py +190 -0
- sky/jobs/state.py +627 -122
- sky/jobs/utils.py +615 -206
- sky/models.py +27 -0
- sky/optimizer.py +142 -83
- sky/provision/__init__.py +20 -5
- sky/provision/aws/config.py +124 -42
- sky/provision/aws/instance.py +130 -53
- sky/provision/azure/__init__.py +7 -0
- sky/{skylet/providers → provision}/azure/azure-config-template.json +19 -7
- sky/provision/azure/config.py +220 -0
- sky/provision/azure/instance.py +1012 -37
- sky/provision/common.py +31 -3
- sky/provision/constants.py +25 -0
- sky/provision/cudo/__init__.py +2 -1
- sky/provision/cudo/cudo_utils.py +112 -0
- sky/provision/cudo/cudo_wrapper.py +37 -16
- sky/provision/cudo/instance.py +28 -12
- sky/provision/do/__init__.py +11 -0
- sky/provision/do/config.py +14 -0
- sky/provision/do/constants.py +10 -0
- sky/provision/do/instance.py +287 -0
- sky/provision/do/utils.py +301 -0
- sky/provision/docker_utils.py +82 -46
- sky/provision/fluidstack/fluidstack_utils.py +57 -125
- sky/provision/fluidstack/instance.py +15 -43
- sky/provision/gcp/config.py +19 -9
- sky/provision/gcp/constants.py +7 -1
- sky/provision/gcp/instance.py +55 -34
- sky/provision/gcp/instance_utils.py +339 -80
- sky/provision/gcp/mig_utils.py +210 -0
- sky/provision/instance_setup.py +172 -133
- sky/provision/kubernetes/__init__.py +1 -0
- sky/provision/kubernetes/config.py +104 -90
- sky/provision/kubernetes/constants.py +8 -0
- sky/provision/kubernetes/instance.py +680 -325
- sky/provision/kubernetes/manifests/smarter-device-manager-daemonset.yaml +3 -0
- sky/provision/kubernetes/network.py +54 -20
- sky/provision/kubernetes/network_utils.py +70 -21
- sky/provision/kubernetes/utils.py +1370 -251
- sky/provision/lambda_cloud/__init__.py +11 -0
- sky/provision/lambda_cloud/config.py +10 -0
- sky/provision/lambda_cloud/instance.py +265 -0
- sky/{clouds/utils → provision/lambda_cloud}/lambda_utils.py +24 -23
- sky/provision/logging.py +1 -1
- sky/provision/nebius/__init__.py +11 -0
- sky/provision/nebius/config.py +11 -0
- sky/provision/nebius/instance.py +285 -0
- sky/provision/nebius/utils.py +318 -0
- sky/provision/oci/__init__.py +15 -0
- sky/provision/oci/config.py +51 -0
- sky/provision/oci/instance.py +436 -0
- sky/provision/oci/query_utils.py +681 -0
- sky/provision/paperspace/constants.py +6 -0
- sky/provision/paperspace/instance.py +4 -3
- sky/provision/paperspace/utils.py +2 -0
- sky/provision/provisioner.py +207 -130
- sky/provision/runpod/__init__.py +1 -0
- sky/provision/runpod/api/__init__.py +3 -0
- sky/provision/runpod/api/commands.py +119 -0
- sky/provision/runpod/api/pods.py +142 -0
- sky/provision/runpod/instance.py +64 -8
- sky/provision/runpod/utils.py +239 -23
- sky/provision/vast/__init__.py +10 -0
- sky/provision/vast/config.py +11 -0
- sky/provision/vast/instance.py +247 -0
- sky/provision/vast/utils.py +162 -0
- sky/provision/vsphere/common/vim_utils.py +1 -1
- sky/provision/vsphere/instance.py +8 -18
- sky/provision/vsphere/vsphere_utils.py +1 -1
- sky/resources.py +247 -102
- sky/serve/__init__.py +9 -9
- sky/serve/autoscalers.py +361 -299
- sky/serve/client/__init__.py +0 -0
- sky/serve/client/sdk.py +366 -0
- sky/serve/constants.py +12 -3
- sky/serve/controller.py +106 -36
- sky/serve/load_balancer.py +63 -12
- sky/serve/load_balancing_policies.py +84 -2
- sky/serve/replica_managers.py +42 -34
- sky/serve/serve_state.py +62 -32
- sky/serve/serve_utils.py +271 -160
- sky/serve/server/__init__.py +0 -0
- sky/serve/{core.py → server/core.py} +271 -90
- sky/serve/server/server.py +112 -0
- sky/serve/service.py +52 -16
- sky/serve/service_spec.py +95 -32
- sky/server/__init__.py +1 -0
- sky/server/common.py +430 -0
- sky/server/constants.py +21 -0
- sky/server/html/log.html +174 -0
- sky/server/requests/__init__.py +0 -0
- sky/server/requests/executor.py +472 -0
- sky/server/requests/payloads.py +487 -0
- sky/server/requests/queues/__init__.py +0 -0
- sky/server/requests/queues/mp_queue.py +76 -0
- sky/server/requests/requests.py +567 -0
- sky/server/requests/serializers/__init__.py +0 -0
- sky/server/requests/serializers/decoders.py +192 -0
- sky/server/requests/serializers/encoders.py +166 -0
- sky/server/server.py +1106 -0
- sky/server/stream_utils.py +141 -0
- sky/setup_files/MANIFEST.in +2 -5
- sky/setup_files/dependencies.py +159 -0
- sky/setup_files/setup.py +14 -125
- sky/sky_logging.py +59 -14
- sky/skylet/autostop_lib.py +2 -2
- sky/skylet/constants.py +183 -50
- sky/skylet/events.py +22 -10
- sky/skylet/job_lib.py +403 -258
- sky/skylet/log_lib.py +111 -71
- sky/skylet/log_lib.pyi +6 -0
- sky/skylet/providers/command_runner.py +6 -8
- sky/skylet/providers/ibm/node_provider.py +2 -2
- sky/skylet/providers/scp/config.py +11 -3
- sky/skylet/providers/scp/node_provider.py +8 -8
- sky/skylet/skylet.py +3 -1
- sky/skylet/subprocess_daemon.py +69 -17
- sky/skypilot_config.py +119 -57
- sky/task.py +205 -64
- sky/templates/aws-ray.yml.j2 +37 -7
- sky/templates/azure-ray.yml.j2 +27 -82
- sky/templates/cudo-ray.yml.j2 +7 -3
- sky/templates/do-ray.yml.j2 +98 -0
- sky/templates/fluidstack-ray.yml.j2 +7 -4
- sky/templates/gcp-ray.yml.j2 +26 -6
- sky/templates/ibm-ray.yml.j2 +3 -2
- sky/templates/jobs-controller.yaml.j2 +46 -11
- sky/templates/kubernetes-ingress.yml.j2 +7 -0
- sky/templates/kubernetes-loadbalancer.yml.j2 +7 -0
- sky/templates/{kubernetes-port-forward-proxy-command.sh.j2 → kubernetes-port-forward-proxy-command.sh} +51 -7
- sky/templates/kubernetes-ray.yml.j2 +292 -25
- sky/templates/lambda-ray.yml.j2 +30 -40
- sky/templates/nebius-ray.yml.j2 +79 -0
- sky/templates/oci-ray.yml.j2 +18 -57
- sky/templates/paperspace-ray.yml.j2 +10 -6
- sky/templates/runpod-ray.yml.j2 +26 -4
- sky/templates/scp-ray.yml.j2 +3 -2
- sky/templates/sky-serve-controller.yaml.j2 +12 -1
- sky/templates/skypilot-server-kubernetes-proxy.sh +36 -0
- sky/templates/vast-ray.yml.j2 +70 -0
- sky/templates/vsphere-ray.yml.j2 +8 -3
- sky/templates/websocket_proxy.py +64 -0
- sky/usage/constants.py +10 -1
- sky/usage/usage_lib.py +130 -37
- sky/utils/accelerator_registry.py +35 -51
- sky/utils/admin_policy_utils.py +147 -0
- sky/utils/annotations.py +51 -0
- sky/utils/cli_utils/status_utils.py +81 -23
- sky/utils/cluster_utils.py +356 -0
- sky/utils/command_runner.py +452 -89
- sky/utils/command_runner.pyi +77 -3
- sky/utils/common.py +54 -0
- sky/utils/common_utils.py +319 -108
- sky/utils/config_utils.py +204 -0
- sky/utils/control_master_utils.py +48 -0
- sky/utils/controller_utils.py +548 -266
- sky/utils/dag_utils.py +93 -32
- sky/utils/db_utils.py +18 -4
- sky/utils/env_options.py +29 -7
- sky/utils/kubernetes/create_cluster.sh +8 -60
- sky/utils/kubernetes/deploy_remote_cluster.sh +243 -0
- sky/utils/kubernetes/exec_kubeconfig_converter.py +73 -0
- sky/utils/kubernetes/generate_kubeconfig.sh +336 -0
- sky/utils/kubernetes/gpu_labeler.py +4 -4
- sky/utils/kubernetes/k8s_gpu_labeler_job.yaml +4 -3
- sky/utils/kubernetes/kubernetes_deploy_utils.py +228 -0
- sky/utils/kubernetes/rsync_helper.sh +24 -0
- sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +1 -1
- sky/utils/log_utils.py +240 -33
- sky/utils/message_utils.py +81 -0
- sky/utils/registry.py +127 -0
- sky/utils/resources_utils.py +94 -22
- sky/utils/rich_utils.py +247 -18
- sky/utils/schemas.py +284 -64
- sky/{status_lib.py → utils/status_lib.py} +12 -7
- sky/utils/subprocess_utils.py +212 -46
- sky/utils/timeline.py +12 -7
- sky/utils/ux_utils.py +168 -15
- skypilot_nightly-1.0.0.dev2025022801.dist-info/METADATA +363 -0
- skypilot_nightly-1.0.0.dev2025022801.dist-info/RECORD +352 -0
- {skypilot_nightly-1.0.0.dev2024053101.dist-info → skypilot_nightly-1.0.0.dev2025022801.dist-info}/WHEEL +1 -1
- sky/clouds/cloud_registry.py +0 -31
- sky/jobs/core.py +0 -330
- sky/skylet/providers/azure/__init__.py +0 -2
- sky/skylet/providers/azure/azure-vm-template.json +0 -301
- sky/skylet/providers/azure/config.py +0 -170
- sky/skylet/providers/azure/node_provider.py +0 -466
- sky/skylet/providers/lambda_cloud/__init__.py +0 -2
- sky/skylet/providers/lambda_cloud/node_provider.py +0 -320
- sky/skylet/providers/oci/__init__.py +0 -2
- sky/skylet/providers/oci/node_provider.py +0 -488
- sky/skylet/providers/oci/query_helper.py +0 -383
- sky/skylet/providers/oci/utils.py +0 -21
- sky/utils/cluster_yaml_utils.py +0 -24
- sky/utils/kubernetes/generate_static_kubeconfig.sh +0 -137
- skypilot_nightly-1.0.0.dev2024053101.dist-info/METADATA +0 -315
- skypilot_nightly-1.0.0.dev2024053101.dist-info/RECORD +0 -275
- {skypilot_nightly-1.0.0.dev2024053101.dist-info → skypilot_nightly-1.0.0.dev2025022801.dist-info}/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev2024053101.dist-info → skypilot_nightly-1.0.0.dev2025022801.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev2024053101.dist-info → skypilot_nightly-1.0.0.dev2025022801.dist-info}/top_level.txt +0 -0
sky/cloud_stores.py
CHANGED
@@ -7,15 +7,26 @@ TODO:
|
|
7
7
|
* Better interface.
|
8
8
|
* Better implementation (e.g., fsspec, smart_open, using each cloud's SDK).
|
9
9
|
"""
|
10
|
+
import os
|
11
|
+
import shlex
|
10
12
|
import subprocess
|
13
|
+
import time
|
11
14
|
import urllib.parse
|
12
15
|
|
16
|
+
from sky import exceptions as sky_exceptions
|
17
|
+
from sky import sky_logging
|
13
18
|
from sky.adaptors import aws
|
19
|
+
from sky.adaptors import azure
|
14
20
|
from sky.adaptors import cloudflare
|
15
21
|
from sky.adaptors import ibm
|
22
|
+
from sky.adaptors import oci
|
16
23
|
from sky.clouds import gcp
|
17
24
|
from sky.data import data_utils
|
18
25
|
from sky.data.data_utils import Rclone
|
26
|
+
from sky.skylet import constants
|
27
|
+
from sky.utils import ux_utils
|
28
|
+
|
29
|
+
logger = sky_logging.init_logger(__name__)
|
19
30
|
|
20
31
|
|
21
32
|
class CloudStorage:
|
@@ -43,7 +54,8 @@ class S3CloudStorage(CloudStorage):
|
|
43
54
|
|
44
55
|
# List of commands to install AWS CLI
|
45
56
|
_GET_AWSCLI = [
|
46
|
-
'aws --version >/dev/null 2>&1 ||
|
57
|
+
'aws --version >/dev/null 2>&1 || '
|
58
|
+
f'{constants.SKY_UV_PIP_CMD} install awscli',
|
47
59
|
]
|
48
60
|
|
49
61
|
def is_directory(self, url: str) -> bool:
|
@@ -73,7 +85,8 @@ class S3CloudStorage(CloudStorage):
|
|
73
85
|
# AWS Sync by default uses 10 threads to upload files to the bucket.
|
74
86
|
# To increase parallelism, modify max_concurrent_requests in your
|
75
87
|
# aws config file (Default path: ~/.aws/config).
|
76
|
-
download_via_awscli = ('aws s3
|
88
|
+
download_via_awscli = (f'{constants.SKY_REMOTE_PYTHON_ENV}/bin/aws s3 '
|
89
|
+
'sync --no-follow-symlinks '
|
77
90
|
f'{source} {destination}')
|
78
91
|
|
79
92
|
all_commands = list(self._GET_AWSCLI)
|
@@ -82,7 +95,8 @@ class S3CloudStorage(CloudStorage):
|
|
82
95
|
|
83
96
|
def make_sync_file_command(self, source: str, destination: str) -> str:
|
84
97
|
"""Downloads a file using AWS CLI."""
|
85
|
-
download_via_awscli = f'aws s3
|
98
|
+
download_via_awscli = (f'{constants.SKY_REMOTE_PYTHON_ENV}/bin/aws s3 '
|
99
|
+
f'cp {source} {destination}')
|
86
100
|
|
87
101
|
all_commands = list(self._GET_AWSCLI)
|
88
102
|
all_commands.append(download_via_awscli)
|
@@ -102,8 +116,16 @@ class GcsCloudStorage(CloudStorage):
|
|
102
116
|
@property
|
103
117
|
def _gsutil_command(self):
|
104
118
|
gsutil_alias, alias_gen = data_utils.get_gsutil_command()
|
105
|
-
return (
|
106
|
-
|
119
|
+
return (
|
120
|
+
f'{alias_gen}; GOOGLE_APPLICATION_CREDENTIALS='
|
121
|
+
f'{gcp.DEFAULT_GCP_APPLICATION_CREDENTIAL_PATH}; '
|
122
|
+
# Explicitly activate service account. Unlike the gcp packages
|
123
|
+
# and other GCP commands, gsutil does not automatically pick up
|
124
|
+
# the default credential keys when it is a service account.
|
125
|
+
'gcloud auth activate-service-account '
|
126
|
+
'--key-file=$GOOGLE_APPLICATION_CREDENTIALS '
|
127
|
+
'2> /dev/null || true; '
|
128
|
+
f'{gsutil_alias}')
|
107
129
|
|
108
130
|
def is_directory(self, url: str) -> bool:
|
109
131
|
"""Returns whether 'url' is a directory.
|
@@ -124,7 +146,7 @@ class GcsCloudStorage(CloudStorage):
|
|
124
146
|
# If <url> is a bucket root, then we only need `gsutil` to succeed
|
125
147
|
# to make sure the bucket exists. It is already a directory.
|
126
148
|
_, key = data_utils.split_gcs_path(url)
|
127
|
-
if
|
149
|
+
if not key:
|
128
150
|
return True
|
129
151
|
# Otherwise, gsutil ls -d url will return:
|
130
152
|
# --> url.rstrip('/') if url is not a directory
|
@@ -153,12 +175,190 @@ class GcsCloudStorage(CloudStorage):
|
|
153
175
|
return ' && '.join(all_commands)
|
154
176
|
|
155
177
|
|
178
|
+
class AzureBlobCloudStorage(CloudStorage):
|
179
|
+
"""Azure Blob Storage."""
|
180
|
+
# AzCopy is utilized for downloading data from Azure Blob Storage
|
181
|
+
# containers to remote systems due to its superior performance compared to
|
182
|
+
# az-cli. While az-cli's `az storage blob sync` can synchronize data from
|
183
|
+
# local to container, it lacks support to sync from container to remote
|
184
|
+
# synchronization. Moreover, `az storage blob download-batch` in az-cli
|
185
|
+
# does not leverage AzCopy's efficient multi-threaded capabilities, leading
|
186
|
+
# to slower performance.
|
187
|
+
#
|
188
|
+
# AzCopy requires appending SAS tokens directly in commands, as it does not
|
189
|
+
# support using STORAGE_ACCOUNT_KEY, unlike az-cli, which can generate
|
190
|
+
# SAS tokens but lacks direct multi-threading support like AzCopy.
|
191
|
+
# Hence, az-cli for SAS token generation is ran on the local machine and
|
192
|
+
# AzCopy is installed at the remote machine for efficient data transfer
|
193
|
+
# from containers to remote systems.
|
194
|
+
# Note that on Azure instances, both az-cli and AzCopy are typically
|
195
|
+
# pre-installed. And installing both would be used with AZ container is
|
196
|
+
# used from non-Azure instances.
|
197
|
+
|
198
|
+
_GET_AZCOPY = [
|
199
|
+
'azcopy --version > /dev/null 2>&1 || '
|
200
|
+
'(mkdir -p /usr/local/bin; '
|
201
|
+
'curl -L https://aka.ms/downloadazcopy-v10-linux -o azcopy.tar.gz; '
|
202
|
+
'sudo tar -xvzf azcopy.tar.gz --strip-components=1 -C /usr/local/bin --exclude=*.txt; ' # pylint: disable=line-too-long
|
203
|
+
'sudo chmod +x /usr/local/bin/azcopy; '
|
204
|
+
'rm azcopy.tar.gz)'
|
205
|
+
]
|
206
|
+
|
207
|
+
def is_directory(self, url: str) -> bool:
|
208
|
+
"""Returns whether 'url' of the AZ Container is a directory.
|
209
|
+
|
210
|
+
In cloud object stores, a "directory" refers to a regular object whose
|
211
|
+
name is a prefix of other objects.
|
212
|
+
|
213
|
+
Args:
|
214
|
+
url: Endpoint url of the container/blob.
|
215
|
+
|
216
|
+
Returns:
|
217
|
+
True if the url is an endpoint of a directory and False if it
|
218
|
+
is a blob(file).
|
219
|
+
|
220
|
+
Raises:
|
221
|
+
azure.core.exceptions.HttpResponseError: If the user's Azure
|
222
|
+
Azure account does not have sufficient IAM role for the given
|
223
|
+
storage account.
|
224
|
+
StorageBucketGetError: Provided container name does not exist.
|
225
|
+
TimeoutError: If unable to determine the container path status
|
226
|
+
in time.
|
227
|
+
"""
|
228
|
+
storage_account_name, container_name, path = data_utils.split_az_path(
|
229
|
+
url)
|
230
|
+
|
231
|
+
# If there are more, we need to check if it is a directory or a file.
|
232
|
+
container_url = data_utils.AZURE_CONTAINER_URL.format(
|
233
|
+
storage_account_name=storage_account_name,
|
234
|
+
container_name=container_name)
|
235
|
+
resource_group_name = azure.get_az_resource_group(storage_account_name)
|
236
|
+
role_assignment_start = time.time()
|
237
|
+
refresh_client = False
|
238
|
+
role_assigned = False
|
239
|
+
|
240
|
+
# 1. List blobs in the container_url to decide wether it is a directory
|
241
|
+
# 2. If it fails due to permission issues, try to assign a permissive
|
242
|
+
# role for the storage account to the current Azure account
|
243
|
+
# 3. Wait for the role assignment to propagate and retry.
|
244
|
+
while (time.time() - role_assignment_start <
|
245
|
+
constants.WAIT_FOR_STORAGE_ACCOUNT_ROLE_ASSIGNMENT):
|
246
|
+
container_client = data_utils.create_az_client(
|
247
|
+
client_type='container',
|
248
|
+
container_url=container_url,
|
249
|
+
storage_account_name=storage_account_name,
|
250
|
+
resource_group_name=resource_group_name,
|
251
|
+
refresh_client=refresh_client)
|
252
|
+
|
253
|
+
if not container_client.exists():
|
254
|
+
with ux_utils.print_exception_no_traceback():
|
255
|
+
raise sky_exceptions.StorageBucketGetError(
|
256
|
+
f'The provided container {container_name!r} from the '
|
257
|
+
f'passed endpoint url {url!r} does not exist. Please '
|
258
|
+
'check if the name is correct.')
|
259
|
+
|
260
|
+
# If there aren't more than just container name and storage account,
|
261
|
+
# that's a directory.
|
262
|
+
# Note: This must be ran after existence of the storage account is
|
263
|
+
# checked while obtaining container client.
|
264
|
+
if not path:
|
265
|
+
return True
|
266
|
+
|
267
|
+
num_objects = 0
|
268
|
+
try:
|
269
|
+
for blob in container_client.list_blobs(name_starts_with=path):
|
270
|
+
if blob.name == path:
|
271
|
+
return False
|
272
|
+
num_objects += 1
|
273
|
+
if num_objects > 1:
|
274
|
+
return True
|
275
|
+
# A directory with few or no items
|
276
|
+
return True
|
277
|
+
except azure.exceptions().HttpResponseError as e:
|
278
|
+
# Handle case where user lacks sufficient IAM role for
|
279
|
+
# a private container in the same subscription. Attempt to
|
280
|
+
# assign appropriate role to current user.
|
281
|
+
if 'AuthorizationPermissionMismatch' in str(e):
|
282
|
+
if not role_assigned:
|
283
|
+
logger.info('Failed to list blobs in container '
|
284
|
+
f'{container_url!r}. This implies '
|
285
|
+
'insufficient IAM role for storage account'
|
286
|
+
f' {storage_account_name!r}.')
|
287
|
+
azure.assign_storage_account_iam_role(
|
288
|
+
storage_account_name=storage_account_name,
|
289
|
+
resource_group_name=resource_group_name)
|
290
|
+
role_assigned = True
|
291
|
+
refresh_client = True
|
292
|
+
else:
|
293
|
+
logger.info(
|
294
|
+
'Waiting due to the propagation delay of IAM '
|
295
|
+
'role assignment to the storage account '
|
296
|
+
f'{storage_account_name!r}.')
|
297
|
+
time.sleep(
|
298
|
+
constants.RETRY_INTERVAL_AFTER_ROLE_ASSIGNMENT)
|
299
|
+
continue
|
300
|
+
raise
|
301
|
+
else:
|
302
|
+
raise TimeoutError(
|
303
|
+
'Failed to determine the container path status within '
|
304
|
+
f'{constants.WAIT_FOR_STORAGE_ACCOUNT_ROLE_ASSIGNMENT}'
|
305
|
+
'seconds.')
|
306
|
+
|
307
|
+
def _get_azcopy_source(self, source: str, is_dir: bool) -> str:
|
308
|
+
"""Converts the source so it can be used as an argument for azcopy."""
|
309
|
+
storage_account_name, container_name, blob_path = (
|
310
|
+
data_utils.split_az_path(source))
|
311
|
+
storage_account_key = data_utils.get_az_storage_account_key(
|
312
|
+
storage_account_name)
|
313
|
+
|
314
|
+
if storage_account_key is None:
|
315
|
+
# public containers do not require SAS token for access
|
316
|
+
sas_token = ''
|
317
|
+
else:
|
318
|
+
if is_dir:
|
319
|
+
sas_token = azure.get_az_container_sas_token(
|
320
|
+
storage_account_name, storage_account_key, container_name)
|
321
|
+
else:
|
322
|
+
sas_token = azure.get_az_blob_sas_token(storage_account_name,
|
323
|
+
storage_account_key,
|
324
|
+
container_name,
|
325
|
+
blob_path)
|
326
|
+
# "?" is a delimiter character used when SAS token is attached to the
|
327
|
+
# container endpoint.
|
328
|
+
# Reference: https://learn.microsoft.com/en-us/azure/ai-services/translator/document-translation/how-to-guides/create-sas-tokens?tabs=Containers # pylint: disable=line-too-long
|
329
|
+
converted_source = f'{source}?{sas_token}' if sas_token else source
|
330
|
+
|
331
|
+
return shlex.quote(converted_source)
|
332
|
+
|
333
|
+
def make_sync_dir_command(self, source: str, destination: str) -> str:
|
334
|
+
"""Fetches a directory using AZCOPY from storage to remote instance."""
|
335
|
+
source = self._get_azcopy_source(source, is_dir=True)
|
336
|
+
# destination is guaranteed to not have '/' at the end of the string
|
337
|
+
# by tasks.py::set_file_mounts(). It is necessary to add from this
|
338
|
+
# method due to syntax of azcopy.
|
339
|
+
destination = f'{destination}/'
|
340
|
+
download_command = (f'azcopy sync {source} {destination} '
|
341
|
+
'--recursive --delete-destination=false')
|
342
|
+
all_commands = list(self._GET_AZCOPY)
|
343
|
+
all_commands.append(download_command)
|
344
|
+
return ' && '.join(all_commands)
|
345
|
+
|
346
|
+
def make_sync_file_command(self, source: str, destination: str) -> str:
|
347
|
+
"""Fetches a file using AZCOPY from storage to remote instance."""
|
348
|
+
source = self._get_azcopy_source(source, is_dir=False)
|
349
|
+
download_command = f'azcopy copy {source} {destination}'
|
350
|
+
all_commands = list(self._GET_AZCOPY)
|
351
|
+
all_commands.append(download_command)
|
352
|
+
return ' && '.join(all_commands)
|
353
|
+
|
354
|
+
|
156
355
|
class R2CloudStorage(CloudStorage):
|
157
356
|
"""Cloudflare Cloud Storage."""
|
158
357
|
|
159
358
|
# List of commands to install AWS CLI
|
160
359
|
_GET_AWSCLI = [
|
161
|
-
'aws --version >/dev/null 2>&1 ||
|
360
|
+
'aws --version >/dev/null 2>&1 || '
|
361
|
+
f'{constants.SKY_UV_PIP_CMD} install awscli',
|
162
362
|
]
|
163
363
|
|
164
364
|
def is_directory(self, url: str) -> bool:
|
@@ -193,7 +393,8 @@ class R2CloudStorage(CloudStorage):
|
|
193
393
|
source = source.replace('r2://', 's3://')
|
194
394
|
download_via_awscli = ('AWS_SHARED_CREDENTIALS_FILE='
|
195
395
|
f'{cloudflare.R2_CREDENTIALS_PATH} '
|
196
|
-
'aws s3
|
396
|
+
f'{constants.SKY_REMOTE_PYTHON_ENV}/bin/aws s3 '
|
397
|
+
'sync --no-follow-symlinks '
|
197
398
|
f'{source} {destination} '
|
198
399
|
f'--endpoint {endpoint_url} '
|
199
400
|
f'--profile={cloudflare.R2_PROFILE_NAME}')
|
@@ -209,7 +410,8 @@ class R2CloudStorage(CloudStorage):
|
|
209
410
|
source = source.replace('r2://', 's3://')
|
210
411
|
download_via_awscli = ('AWS_SHARED_CREDENTIALS_FILE='
|
211
412
|
f'{cloudflare.R2_CREDENTIALS_PATH} '
|
212
|
-
f'aws s3
|
413
|
+
f'{constants.SKY_REMOTE_PYTHON_ENV}/bin/aws s3 '
|
414
|
+
f'cp {source} {destination} '
|
213
415
|
f'--endpoint {endpoint_url} '
|
214
416
|
f'--profile={cloudflare.R2_PROFILE_NAME}')
|
215
417
|
|
@@ -218,16 +420,6 @@ class R2CloudStorage(CloudStorage):
|
|
218
420
|
return ' && '.join(all_commands)
|
219
421
|
|
220
422
|
|
221
|
-
def get_storage_from_path(url: str) -> CloudStorage:
|
222
|
-
"""Returns a CloudStorage by identifying the scheme:// in a URL."""
|
223
|
-
result = urllib.parse.urlsplit(url)
|
224
|
-
|
225
|
-
if result.scheme not in _REGISTRY:
|
226
|
-
assert False, (f'Scheme {result.scheme} not found in'
|
227
|
-
f' supported storage ({_REGISTRY.keys()}); path {url}')
|
228
|
-
return _REGISTRY[result.scheme]
|
229
|
-
|
230
|
-
|
231
423
|
class IBMCosCloudStorage(CloudStorage):
|
232
424
|
"""IBM Cloud Storage."""
|
233
425
|
# install rclone if package isn't already installed
|
@@ -294,10 +486,82 @@ class IBMCosCloudStorage(CloudStorage):
|
|
294
486
|
return self.make_sync_dir_command(source, destination)
|
295
487
|
|
296
488
|
|
489
|
+
class OciCloudStorage(CloudStorage):
|
490
|
+
"""OCI Cloud Storage."""
|
491
|
+
|
492
|
+
def is_directory(self, url: str) -> bool:
|
493
|
+
"""Returns whether OCI 'url' is a directory.
|
494
|
+
In cloud object stores, a "directory" refers to a regular object whose
|
495
|
+
name is a prefix of other objects.
|
496
|
+
"""
|
497
|
+
bucket_name, path = data_utils.split_oci_path(url)
|
498
|
+
|
499
|
+
client = oci.get_object_storage_client()
|
500
|
+
namespace = client.get_namespace(
|
501
|
+
compartment_id=oci.get_oci_config()['tenancy']).data
|
502
|
+
|
503
|
+
objects = client.list_objects(namespace_name=namespace,
|
504
|
+
bucket_name=bucket_name,
|
505
|
+
prefix=path).data.objects
|
506
|
+
|
507
|
+
if len(objects) == 0:
|
508
|
+
# A directory with few or no items
|
509
|
+
return True
|
510
|
+
|
511
|
+
if len(objects) > 1:
|
512
|
+
# A directory with more than 1 items
|
513
|
+
return True
|
514
|
+
|
515
|
+
object_name = objects[0].name
|
516
|
+
if path.endswith(object_name):
|
517
|
+
# An object path
|
518
|
+
return False
|
519
|
+
|
520
|
+
# A directory with only 1 item
|
521
|
+
return True
|
522
|
+
|
523
|
+
@oci.with_oci_env
|
524
|
+
def make_sync_dir_command(self, source: str, destination: str) -> str:
|
525
|
+
"""Downloads using OCI CLI."""
|
526
|
+
bucket_name, path = data_utils.split_oci_path(source)
|
527
|
+
|
528
|
+
download_via_ocicli = (f'oci os object sync --no-follow-symlinks '
|
529
|
+
f'--bucket-name {bucket_name} '
|
530
|
+
f'--prefix "{path}" --dest-dir "{destination}"')
|
531
|
+
|
532
|
+
return download_via_ocicli
|
533
|
+
|
534
|
+
@oci.with_oci_env
|
535
|
+
def make_sync_file_command(self, source: str, destination: str) -> str:
|
536
|
+
"""Downloads a file using OCI CLI."""
|
537
|
+
bucket_name, path = data_utils.split_oci_path(source)
|
538
|
+
filename = os.path.basename(path)
|
539
|
+
destination = os.path.join(destination, filename)
|
540
|
+
|
541
|
+
download_via_ocicli = (f'oci os object get --bucket-name {bucket_name} '
|
542
|
+
f'--name "{path}" --file "{destination}"')
|
543
|
+
|
544
|
+
return download_via_ocicli
|
545
|
+
|
546
|
+
|
547
|
+
def get_storage_from_path(url: str) -> CloudStorage:
|
548
|
+
"""Returns a CloudStorage by identifying the scheme:// in a URL."""
|
549
|
+
result = urllib.parse.urlsplit(url)
|
550
|
+
if result.scheme not in _REGISTRY:
|
551
|
+
assert False, (f'Scheme {result.scheme} not found in'
|
552
|
+
f' supported storage ({_REGISTRY.keys()}); path {url}')
|
553
|
+
return _REGISTRY[result.scheme]
|
554
|
+
|
555
|
+
|
297
556
|
# Maps bucket's URIs prefix(scheme) to its corresponding storage class
|
298
557
|
_REGISTRY = {
|
299
558
|
'gs': GcsCloudStorage(),
|
300
559
|
's3': S3CloudStorage(),
|
301
560
|
'r2': R2CloudStorage(),
|
302
561
|
'cos': IBMCosCloudStorage(),
|
562
|
+
'oci': OciCloudStorage(),
|
563
|
+
# TODO: This is a hack, as Azure URL starts with https://, we should
|
564
|
+
# refactor the registry to be able to take regex, so that Azure blob can
|
565
|
+
# be identified with `https://(.*?)\.blob\.core\.windows\.net`
|
566
|
+
'https': AzureBlobCloudStorage()
|
303
567
|
}
|
sky/clouds/__init__.py
CHANGED
@@ -3,26 +3,29 @@
|
|
3
3
|
from sky.clouds.cloud import Cloud
|
4
4
|
from sky.clouds.cloud import cloud_in_iterable
|
5
5
|
from sky.clouds.cloud import CloudImplementationFeatures
|
6
|
+
from sky.clouds.cloud import OpenPortsVersion
|
6
7
|
from sky.clouds.cloud import ProvisionerVersion
|
7
8
|
from sky.clouds.cloud import Region
|
8
9
|
from sky.clouds.cloud import StatusVersion
|
9
10
|
from sky.clouds.cloud import Zone
|
10
|
-
from sky.clouds.cloud_registry import CLOUD_REGISTRY
|
11
11
|
|
12
12
|
# NOTE: import the above first to avoid circular imports.
|
13
13
|
# isort: split
|
14
14
|
from sky.clouds.aws import AWS
|
15
15
|
from sky.clouds.azure import Azure
|
16
16
|
from sky.clouds.cudo import Cudo
|
17
|
+
from sky.clouds.do import DO
|
17
18
|
from sky.clouds.fluidstack import Fluidstack
|
18
19
|
from sky.clouds.gcp import GCP
|
19
20
|
from sky.clouds.ibm import IBM
|
20
21
|
from sky.clouds.kubernetes import Kubernetes
|
21
22
|
from sky.clouds.lambda_cloud import Lambda
|
23
|
+
from sky.clouds.nebius import Nebius
|
22
24
|
from sky.clouds.oci import OCI
|
23
25
|
from sky.clouds.paperspace import Paperspace
|
24
26
|
from sky.clouds.runpod import RunPod
|
25
27
|
from sky.clouds.scp import SCP
|
28
|
+
from sky.clouds.vast import Vast
|
26
29
|
from sky.clouds.vsphere import Vsphere
|
27
30
|
|
28
31
|
__all__ = [
|
@@ -33,19 +36,21 @@ __all__ = [
|
|
33
36
|
'Cudo',
|
34
37
|
'GCP',
|
35
38
|
'Lambda',
|
39
|
+
'DO',
|
36
40
|
'Paperspace',
|
37
41
|
'SCP',
|
38
42
|
'RunPod',
|
43
|
+
'Vast',
|
39
44
|
'OCI',
|
40
45
|
'Vsphere',
|
41
46
|
'Kubernetes',
|
42
47
|
'CloudImplementationFeatures',
|
43
48
|
'Region',
|
44
49
|
'Zone',
|
45
|
-
'CLOUD_REGISTRY',
|
46
50
|
'ProvisionerVersion',
|
47
51
|
'StatusVersion',
|
48
52
|
'Fluidstack',
|
53
|
+
'Nebius',
|
49
54
|
# Utility functions
|
50
55
|
'cloud_in_iterable',
|
51
56
|
]
|