skypilot-nightly 1.0.0.dev20250319__py3-none-any.whl → 1.0.0.dev20250321__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +2 -2
- sky/adaptors/cloudflare.py +19 -3
- sky/adaptors/kubernetes.py +2 -1
- sky/adaptors/nebius.py +128 -6
- sky/backends/cloud_vm_ray_backend.py +3 -1
- sky/benchmark/benchmark_utils.py +3 -2
- sky/check.py +89 -55
- sky/cloud_stores.py +66 -0
- sky/clouds/aws.py +14 -2
- sky/clouds/azure.py +13 -1
- sky/clouds/cloud.py +37 -2
- sky/clouds/cudo.py +3 -2
- sky/clouds/do.py +3 -2
- sky/clouds/fluidstack.py +3 -2
- sky/clouds/gcp.py +55 -34
- sky/clouds/ibm.py +15 -1
- sky/clouds/kubernetes.py +3 -1
- sky/clouds/lambda_cloud.py +3 -1
- sky/clouds/nebius.py +7 -3
- sky/clouds/oci.py +15 -1
- sky/clouds/paperspace.py +3 -2
- sky/clouds/runpod.py +7 -1
- sky/clouds/scp.py +3 -1
- sky/clouds/service_catalog/kubernetes_catalog.py +3 -1
- sky/clouds/utils/gcp_utils.py +11 -1
- sky/clouds/vast.py +3 -2
- sky/clouds/vsphere.py +3 -2
- sky/core.py +6 -2
- sky/data/data_transfer.py +75 -0
- sky/data/data_utils.py +34 -0
- sky/data/mounting_utils.py +18 -0
- sky/data/storage.py +542 -16
- sky/data/storage_utils.py +102 -84
- sky/exceptions.py +2 -0
- sky/global_user_state.py +15 -6
- sky/jobs/server/core.py +1 -1
- sky/jobs/utils.py +5 -0
- sky/optimizer.py +8 -2
- sky/provision/gcp/config.py +3 -3
- sky/provision/gcp/constants.py +16 -2
- sky/provision/gcp/instance.py +4 -1
- sky/provision/kubernetes/utils.py +26 -21
- sky/resources.py +6 -1
- sky/serve/replica_managers.py +10 -1
- sky/setup_files/dependencies.py +3 -1
- sky/task.py +16 -5
- sky/utils/command_runner.py +2 -0
- sky/utils/controller_utils.py +13 -4
- sky/utils/kubernetes/kubernetes_deploy_utils.py +4 -1
- {skypilot_nightly-1.0.0.dev20250319.dist-info → skypilot_nightly-1.0.0.dev20250321.dist-info}/METADATA +13 -2
- {skypilot_nightly-1.0.0.dev20250319.dist-info → skypilot_nightly-1.0.0.dev20250321.dist-info}/RECORD +55 -55
- {skypilot_nightly-1.0.0.dev20250319.dist-info → skypilot_nightly-1.0.0.dev20250321.dist-info}/WHEEL +1 -1
- {skypilot_nightly-1.0.0.dev20250319.dist-info → skypilot_nightly-1.0.0.dev20250321.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250319.dist-info → skypilot_nightly-1.0.0.dev20250321.dist-info/licenses}/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20250319.dist-info → skypilot_nightly-1.0.0.dev20250321.dist-info}/top_level.txt +0 -0
sky/__init__.py
CHANGED
@@ -5,7 +5,7 @@ from typing import Optional
|
|
5
5
|
import urllib.request
|
6
6
|
|
7
7
|
# Replaced with the current commit when building the wheels.
|
8
|
-
_SKYPILOT_COMMIT_SHA = '
|
8
|
+
_SKYPILOT_COMMIT_SHA = 'a7f92951b96fdca825348d8291d01bd88f6f9dfe'
|
9
9
|
|
10
10
|
|
11
11
|
def _get_git_commit():
|
@@ -35,7 +35,7 @@ def _get_git_commit():
|
|
35
35
|
|
36
36
|
|
37
37
|
__commit__ = _get_git_commit()
|
38
|
-
__version__ = '1.0.0.
|
38
|
+
__version__ = '1.0.0.dev20250321'
|
39
39
|
__root_dir__ = os.path.dirname(os.path.abspath(__file__))
|
40
40
|
|
41
41
|
|
sky/adaptors/cloudflare.py
CHANGED
@@ -6,7 +6,9 @@ import os
|
|
6
6
|
import threading
|
7
7
|
from typing import Dict, Optional, Tuple
|
8
8
|
|
9
|
+
from sky import exceptions
|
9
10
|
from sky.adaptors import common
|
11
|
+
from sky.clouds import cloud
|
10
12
|
from sky.utils import annotations
|
11
13
|
from sky.utils import ux_utils
|
12
14
|
|
@@ -130,8 +132,8 @@ def client(service_name: str, region):
|
|
130
132
|
@common.load_lazy_modules(_LAZY_MODULES)
|
131
133
|
def botocore_exceptions():
|
132
134
|
"""AWS botocore exception."""
|
133
|
-
from botocore import exceptions
|
134
|
-
return
|
135
|
+
from botocore import exceptions as boto_exceptions
|
136
|
+
return boto_exceptions
|
135
137
|
|
136
138
|
|
137
139
|
def create_endpoint():
|
@@ -148,7 +150,21 @@ def create_endpoint():
|
|
148
150
|
return endpoint
|
149
151
|
|
150
152
|
|
151
|
-
def check_credentials(
|
153
|
+
def check_credentials(
|
154
|
+
cloud_capability: cloud.CloudCapability) -> Tuple[bool, Optional[str]]:
|
155
|
+
if cloud_capability == cloud.CloudCapability.COMPUTE:
|
156
|
+
# for backward compatibility,
|
157
|
+
# we check storage credentials for compute.
|
158
|
+
# TODO(seungjin): properly return not supported error for compute.
|
159
|
+
return check_storage_credentials()
|
160
|
+
elif cloud_capability == cloud.CloudCapability.STORAGE:
|
161
|
+
return check_storage_credentials()
|
162
|
+
else:
|
163
|
+
raise exceptions.NotSupportedError(
|
164
|
+
f'{NAME} does not support {cloud_capability}.')
|
165
|
+
|
166
|
+
|
167
|
+
def check_storage_credentials() -> Tuple[bool, Optional[str]]:
|
152
168
|
"""Checks if the user has access credentials to Cloudflare R2.
|
153
169
|
|
154
170
|
Returns:
|
sky/adaptors/kubernetes.py
CHANGED
@@ -79,10 +79,11 @@ def _load_config(context: Optional[str] = None):
|
|
79
79
|
' If you were running a local Kubernetes '
|
80
80
|
'cluster, run `sky local up` to start the cluster.')
|
81
81
|
else:
|
82
|
+
kubeconfig_path = os.environ.get('KUBECONFIG', '~/.kube/config')
|
82
83
|
err_str = (
|
83
84
|
f'Failed to load Kubernetes configuration for {context!r}. '
|
84
85
|
'Please check if your kubeconfig file exists at '
|
85
|
-
f'
|
86
|
+
f'{kubeconfig_path} and is valid.\n{suffix}')
|
86
87
|
err_str += '\nTo disable Kubernetes for SkyPilot: run `sky check`.'
|
87
88
|
with ux_utils.print_exception_no_traceback():
|
88
89
|
raise ValueError(err_str) from None
|
sky/adaptors/nebius.py
CHANGED
@@ -1,7 +1,11 @@
|
|
1
1
|
"""Nebius cloud adaptor."""
|
2
2
|
import os
|
3
|
+
import threading
|
4
|
+
from typing import Optional
|
3
5
|
|
4
6
|
from sky.adaptors import common
|
7
|
+
from sky.utils import annotations
|
8
|
+
from sky.utils import ux_utils
|
5
9
|
|
6
10
|
NEBIUS_TENANT_ID_FILENAME = 'NEBIUS_TENANT_ID.txt'
|
7
11
|
NEBIUS_IAM_TOKEN_FILENAME = 'NEBIUS_IAM_TOKEN.txt'
|
@@ -12,6 +16,10 @@ NEBIUS_IAM_TOKEN_PATH = '~/.nebius/' + NEBIUS_IAM_TOKEN_FILENAME
|
|
12
16
|
NEBIUS_PROJECT_ID_PATH = '~/.nebius/' + NEBIUS_PROJECT_ID_FILENAME
|
13
17
|
NEBIUS_CREDENTIALS_PATH = '~/.nebius/' + NEBIUS_CREDENTIALS_FILENAME
|
14
18
|
|
19
|
+
DEFAULT_REGION = 'eu-north1'
|
20
|
+
|
21
|
+
NEBIUS_PROFILE_NAME = 'nebius'
|
22
|
+
|
15
23
|
MAX_RETRIES_TO_DISK_CREATE = 120
|
16
24
|
MAX_RETRIES_TO_INSTANCE_STOP = 120
|
17
25
|
MAX_RETRIES_TO_INSTANCE_START = 120
|
@@ -23,15 +31,27 @@ MAX_RETRIES_TO_INSTANCE_WAIT = 120 # Maximum number of retries
|
|
23
31
|
POLL_INTERVAL = 5
|
24
32
|
|
25
33
|
_iam_token = None
|
34
|
+
_sdk = None
|
26
35
|
_tenant_id = None
|
27
36
|
_project_id = None
|
28
37
|
|
38
|
+
_IMPORT_ERROR_MESSAGE = ('Failed to import dependencies for Nebius AI Cloud.'
|
39
|
+
'Try pip install "skypilot[nebius]"')
|
40
|
+
|
29
41
|
nebius = common.LazyImport(
|
30
42
|
'nebius',
|
31
|
-
import_error_message=
|
32
|
-
'Try running: pip install "skypilot[nebius]"',
|
43
|
+
import_error_message=_IMPORT_ERROR_MESSAGE,
|
33
44
|
# https://github.com/grpc/grpc/issues/37642 to avoid spam in console
|
34
45
|
set_loggers=lambda: os.environ.update({'GRPC_VERBOSITY': 'NONE'}))
|
46
|
+
boto3 = common.LazyImport('boto3', import_error_message=_IMPORT_ERROR_MESSAGE)
|
47
|
+
botocore = common.LazyImport('botocore',
|
48
|
+
import_error_message=_IMPORT_ERROR_MESSAGE)
|
49
|
+
|
50
|
+
_LAZY_MODULES = (boto3, botocore, nebius)
|
51
|
+
_session_creation_lock = threading.RLock()
|
52
|
+
_INDENT_PREFIX = ' '
|
53
|
+
NAME = 'Nebius'
|
54
|
+
SKY_CHECK_NAME = 'Nebius (for Nebius Object Storae)'
|
35
55
|
|
36
56
|
|
37
57
|
def request_error():
|
@@ -104,7 +124,109 @@ def get_tenant_id():
|
|
104
124
|
|
105
125
|
|
106
126
|
def sdk():
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
127
|
+
global _sdk
|
128
|
+
if _sdk is None:
|
129
|
+
if get_iam_token() is not None:
|
130
|
+
_sdk = nebius.sdk.SDK(credentials=get_iam_token())
|
131
|
+
return _sdk
|
132
|
+
_sdk = nebius.sdk.SDK(
|
133
|
+
credentials_file_name=os.path.expanduser(NEBIUS_CREDENTIALS_PATH))
|
134
|
+
return _sdk
|
135
|
+
|
136
|
+
|
137
|
+
def get_nebius_credentials(boto3_session):
|
138
|
+
"""Gets the Nebius credentials from the boto3 session object.
|
139
|
+
|
140
|
+
Args:
|
141
|
+
boto3_session: The boto3 session object.
|
142
|
+
Returns:
|
143
|
+
botocore.credentials.ReadOnlyCredentials object with the R2 credentials.
|
144
|
+
"""
|
145
|
+
nebius_credentials = boto3_session.get_credentials()
|
146
|
+
if nebius_credentials is None:
|
147
|
+
with ux_utils.print_exception_no_traceback():
|
148
|
+
raise ValueError('Nebius credentials not found. Run '
|
149
|
+
'`sky check` to verify credentials are '
|
150
|
+
'correctly set up.')
|
151
|
+
return nebius_credentials.get_frozen_credentials()
|
152
|
+
|
153
|
+
|
154
|
+
# lru_cache() is thread-safe and it will return the same session object
|
155
|
+
# for different threads.
|
156
|
+
# Reference: https://docs.python.org/3/library/functools.html#functools.lru_cache # pylint: disable=line-too-long
|
157
|
+
@annotations.lru_cache(scope='global')
|
158
|
+
def session():
|
159
|
+
"""Create an AWS session."""
|
160
|
+
# Creating the session object is not thread-safe for boto3,
|
161
|
+
# so we add a reentrant lock to synchronize the session creation.
|
162
|
+
# Reference: https://github.com/boto/boto3/issues/1592
|
163
|
+
# However, the session object itself is thread-safe, so we are
|
164
|
+
# able to use lru_cache() to cache the session object.
|
165
|
+
with _session_creation_lock:
|
166
|
+
session_ = boto3.session.Session(profile_name=NEBIUS_PROFILE_NAME)
|
167
|
+
return session_
|
168
|
+
|
169
|
+
|
170
|
+
@annotations.lru_cache(scope='global')
|
171
|
+
def resource(resource_name: str, region: str = DEFAULT_REGION, **kwargs):
|
172
|
+
"""Create a Nebius resource.
|
173
|
+
|
174
|
+
Args:
|
175
|
+
resource_name: Nebius resource name (e.g., 's3').
|
176
|
+
kwargs: Other options.
|
177
|
+
"""
|
178
|
+
# Need to use the resource retrieved from the per-thread session
|
179
|
+
# to avoid thread-safety issues (Directly creating the client
|
180
|
+
# with boto3.resource() is not thread-safe).
|
181
|
+
# Reference: https://stackoverflow.com/a/59635814
|
182
|
+
|
183
|
+
session_ = session()
|
184
|
+
nebius_credentials = get_nebius_credentials(session_)
|
185
|
+
endpoint = create_endpoint(region)
|
186
|
+
|
187
|
+
return session_.resource(
|
188
|
+
resource_name,
|
189
|
+
endpoint_url=endpoint,
|
190
|
+
aws_access_key_id=nebius_credentials.access_key,
|
191
|
+
aws_secret_access_key=nebius_credentials.secret_key,
|
192
|
+
region_name=region,
|
193
|
+
**kwargs)
|
194
|
+
|
195
|
+
|
196
|
+
@annotations.lru_cache(scope='global')
|
197
|
+
def client(service_name: str, region):
|
198
|
+
"""Create an Nebius client of a certain service.
|
199
|
+
|
200
|
+
Args:
|
201
|
+
service_name: Nebius service name (e.g., 's3').
|
202
|
+
kwargs: Other options.
|
203
|
+
"""
|
204
|
+
# Need to use the client retrieved from the per-thread session
|
205
|
+
# to avoid thread-safety issues (Directly creating the client
|
206
|
+
# with boto3.client() is not thread-safe).
|
207
|
+
# Reference: https://stackoverflow.com/a/59635814
|
208
|
+
|
209
|
+
session_ = session()
|
210
|
+
nebius_credentials = get_nebius_credentials(session_)
|
211
|
+
endpoint = create_endpoint(region)
|
212
|
+
|
213
|
+
return session_.client(service_name,
|
214
|
+
endpoint_url=endpoint,
|
215
|
+
aws_access_key_id=nebius_credentials.access_key,
|
216
|
+
aws_secret_access_key=nebius_credentials.secret_key,
|
217
|
+
region_name=region)
|
218
|
+
|
219
|
+
|
220
|
+
@common.load_lazy_modules(_LAZY_MODULES)
|
221
|
+
def botocore_exceptions():
|
222
|
+
"""AWS botocore exception."""
|
223
|
+
# pylint: disable=import-outside-toplevel
|
224
|
+
from botocore import exceptions
|
225
|
+
return exceptions
|
226
|
+
|
227
|
+
|
228
|
+
def create_endpoint(region: Optional[str] = DEFAULT_REGION) -> str:
|
229
|
+
"""Reads accountid necessary to interact with Nebius Object Storage"""
|
230
|
+
if region is None:
|
231
|
+
region = DEFAULT_REGION
|
232
|
+
return f'https://storage.{region}.nebius.cloud:443'
|
@@ -38,6 +38,7 @@ from sky import sky_logging
|
|
38
38
|
from sky import task as task_lib
|
39
39
|
from sky.backends import backend_utils
|
40
40
|
from sky.backends import wheel_utils
|
41
|
+
from sky.clouds import cloud as sky_cloud
|
41
42
|
from sky.clouds import service_catalog
|
42
43
|
from sky.clouds.utils import gcp_utils
|
43
44
|
from sky.data import data_utils
|
@@ -1981,7 +1982,8 @@ class RetryingVmProvisioner(object):
|
|
1981
1982
|
# is running. Here we check the enabled clouds and expiring credentials
|
1982
1983
|
# and raise a warning to the user.
|
1983
1984
|
if task.is_controller_task():
|
1984
|
-
enabled_clouds = sky_check.get_cached_enabled_clouds_or_refresh(
|
1985
|
+
enabled_clouds = sky_check.get_cached_enabled_clouds_or_refresh(
|
1986
|
+
sky_cloud.CloudCapability.COMPUTE)
|
1985
1987
|
expirable_clouds = backend_utils.get_expirable_clouds(
|
1986
1988
|
enabled_clouds)
|
1987
1989
|
|
sky/benchmark/benchmark_utils.py
CHANGED
@@ -172,8 +172,9 @@ def _create_benchmark_bucket() -> Tuple[str, str]:
|
|
172
172
|
bucket_name = f'sky-bench-{uuid.uuid4().hex[:4]}-{getpass.getuser()}'
|
173
173
|
|
174
174
|
# Select the bucket type.
|
175
|
-
enabled_clouds =
|
176
|
-
|
175
|
+
enabled_clouds = (
|
176
|
+
storage_lib.get_cached_enabled_storage_cloud_names_or_refresh(
|
177
|
+
raise_if_no_cloud_access=True))
|
177
178
|
# Sky Benchmark only supports S3 (see _download_remote_dir and
|
178
179
|
# _delete_remote_dir).
|
179
180
|
enabled_clouds = [
|
sky/check.py
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
import os
|
3
3
|
import traceback
|
4
4
|
from types import ModuleType
|
5
|
-
from typing import Dict, Iterable, List, Optional, Tuple, Union
|
5
|
+
from typing import Dict, Iterable, List, Optional, Set, Tuple, Union
|
6
6
|
|
7
7
|
import click
|
8
8
|
import colorama
|
@@ -12,6 +12,7 @@ from sky import exceptions
|
|
12
12
|
from sky import global_user_state
|
13
13
|
from sky import skypilot_config
|
14
14
|
from sky.adaptors import cloudflare
|
15
|
+
from sky.clouds import cloud as sky_cloud
|
15
16
|
from sky.utils import registry
|
16
17
|
from sky.utils import rich_utils
|
17
18
|
from sky.utils import ux_utils
|
@@ -20,44 +21,52 @@ CHECK_MARK_EMOJI = '\U00002714' # Heavy check mark unicode
|
|
20
21
|
PARTY_POPPER_EMOJI = '\U0001F389' # Party popper unicode
|
21
22
|
|
22
23
|
|
23
|
-
def
|
24
|
+
def check_capabilities(
|
24
25
|
quiet: bool = False,
|
25
26
|
verbose: bool = False,
|
26
27
|
clouds: Optional[Iterable[str]] = None,
|
27
|
-
|
28
|
+
capabilities: Optional[List[sky_cloud.CloudCapability]] = None,
|
29
|
+
) -> Dict[str, List[sky_cloud.CloudCapability]]:
|
28
30
|
echo = (lambda *_args, **_kwargs: None
|
29
31
|
) if quiet else lambda *args, **kwargs: click.echo(
|
30
32
|
*args, **kwargs, color=True)
|
31
33
|
echo('Checking credentials to enable clouds for SkyPilot.')
|
32
|
-
|
33
|
-
|
34
|
+
if capabilities is None:
|
35
|
+
capabilities = sky_cloud.ALL_CAPABILITIES
|
36
|
+
assert capabilities is not None
|
37
|
+
enabled_clouds: Dict[str, List[sky_cloud.CloudCapability]] = {}
|
38
|
+
disabled_clouds: Dict[str, List[sky_cloud.CloudCapability]] = {}
|
34
39
|
|
35
40
|
def check_one_cloud(
|
36
41
|
cloud_tuple: Tuple[str, Union[sky_clouds.Cloud,
|
37
42
|
ModuleType]]) -> None:
|
38
43
|
cloud_repr, cloud = cloud_tuple
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
44
|
+
assert capabilities is not None
|
45
|
+
for capability in capabilities:
|
46
|
+
with rich_utils.safe_status(f'Checking {cloud_repr}...'):
|
47
|
+
try:
|
48
|
+
ok, reason = cloud.check_credentials(capability)
|
49
|
+
except exceptions.NotSupportedError:
|
50
|
+
continue
|
51
|
+
except Exception: # pylint: disable=broad-except
|
52
|
+
# Catch all exceptions to prevent a single cloud
|
53
|
+
# from blocking the check for other clouds.
|
54
|
+
ok, reason = False, traceback.format_exc()
|
55
|
+
status_msg = ('enabled' if ok else 'disabled')
|
56
|
+
styles = {'fg': 'green', 'bold': False} if ok else {'dim': True}
|
57
|
+
echo(' ' + click.style(f'{cloud_repr}: {status_msg}', **styles) +
|
58
|
+
' ' * 30)
|
59
|
+
if ok:
|
60
|
+
enabled_clouds.setdefault(cloud_repr, []).append(capability)
|
61
|
+
if verbose and cloud is not cloudflare:
|
62
|
+
activated_account = cloud.get_active_user_identity_str()
|
63
|
+
if activated_account is not None:
|
64
|
+
echo(f' Activated account: {activated_account}')
|
65
|
+
if reason is not None:
|
66
|
+
echo(f' Hint: {reason}')
|
67
|
+
else:
|
68
|
+
disabled_clouds.setdefault(cloud_repr, []).append(capability)
|
69
|
+
echo(f' Reason: {reason}')
|
61
70
|
|
62
71
|
def get_cloud_tuple(
|
63
72
|
cloud_name: str) -> Tuple[str, Union[sky_clouds.Cloud, ModuleType]]:
|
@@ -99,33 +108,39 @@ def check(
|
|
99
108
|
for cloud_tuple in sorted(clouds_to_check):
|
100
109
|
check_one_cloud(cloud_tuple)
|
101
110
|
|
102
|
-
# Cloudflare is not a real cloud in registry.CLOUD_REGISTRY, and should
|
103
|
-
# not be inserted into the DB (otherwise `sky launch` and other code would
|
104
|
-
# error out when it's trying to look it up in the registry).
|
105
|
-
enabled_clouds_set = {
|
106
|
-
cloud for cloud in enabled_clouds if not cloud.startswith('Cloudflare')
|
107
|
-
}
|
108
|
-
disabled_clouds_set = {
|
109
|
-
cloud for cloud in disabled_clouds if not cloud.startswith('Cloudflare')
|
110
|
-
}
|
111
|
-
config_allowed_clouds_set = {
|
112
|
-
cloud for cloud in config_allowed_cloud_names
|
113
|
-
if not cloud.startswith('Cloudflare')
|
114
|
-
}
|
115
|
-
previously_enabled_clouds_set = {
|
116
|
-
repr(cloud) for cloud in global_user_state.get_cached_enabled_clouds()
|
117
|
-
}
|
118
|
-
|
119
111
|
# Determine the set of enabled clouds: (previously enabled clouds + newly
|
120
112
|
# enabled clouds - newly disabled clouds) intersected with
|
121
113
|
# config_allowed_clouds, if specified in config.yaml.
|
122
114
|
# This means that if a cloud is already enabled and is not included in
|
123
115
|
# allowed_clouds in config.yaml, it will be disabled.
|
124
|
-
all_enabled_clouds = (
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
116
|
+
all_enabled_clouds: Set[str] = set()
|
117
|
+
for capability in capabilities:
|
118
|
+
# Cloudflare is not a real cloud in registry.CLOUD_REGISTRY, and should
|
119
|
+
# not be inserted into the DB (otherwise `sky launch` and other code
|
120
|
+
# would error out when it's trying to look it up in the registry).
|
121
|
+
enabled_clouds_set = {
|
122
|
+
cloud for cloud, capabilities in enabled_clouds.items()
|
123
|
+
if capability in capabilities and not cloud.startswith('Cloudflare')
|
124
|
+
}
|
125
|
+
disabled_clouds_set = {
|
126
|
+
cloud for cloud, capabilities in disabled_clouds.items()
|
127
|
+
if capability in capabilities and not cloud.startswith('Cloudflare')
|
128
|
+
}
|
129
|
+
config_allowed_clouds_set = {
|
130
|
+
cloud for cloud in config_allowed_cloud_names
|
131
|
+
if not cloud.startswith('Cloudflare')
|
132
|
+
}
|
133
|
+
previously_enabled_clouds_set = {
|
134
|
+
repr(cloud)
|
135
|
+
for cloud in global_user_state.get_cached_enabled_clouds(capability)
|
136
|
+
}
|
137
|
+
enabled_clouds_for_capability = (config_allowed_clouds_set & (
|
138
|
+
(previously_enabled_clouds_set | enabled_clouds_set) -
|
139
|
+
disabled_clouds_set))
|
140
|
+
global_user_state.set_enabled_clouds(
|
141
|
+
list(enabled_clouds_for_capability), capability)
|
142
|
+
all_enabled_clouds = all_enabled_clouds.union(
|
143
|
+
enabled_clouds_for_capability)
|
129
144
|
disallowed_clouds_hint = None
|
130
145
|
if disallowed_cloud_names:
|
131
146
|
disallowed_clouds_hint = (
|
@@ -160,8 +175,7 @@ def check(
|
|
160
175
|
# Pretty print for UX.
|
161
176
|
if not quiet:
|
162
177
|
enabled_clouds_str = '\n ' + '\n '.join([
|
163
|
-
_format_enabled_cloud(cloud)
|
164
|
-
for cloud in sorted(all_enabled_clouds)
|
178
|
+
_format_enabled_cloud(cloud) for cloud in sorted(enabled_clouds)
|
165
179
|
])
|
166
180
|
echo(f'\n{colorama.Fore.GREEN}{PARTY_POPPER_EMOJI} '
|
167
181
|
f'Enabled clouds {PARTY_POPPER_EMOJI}'
|
@@ -169,7 +183,25 @@ def check(
|
|
169
183
|
return enabled_clouds
|
170
184
|
|
171
185
|
|
186
|
+
# 'sky check' command and associated '/check' server endpoint
|
187
|
+
# only checks compute capability for backward compatibility.
|
188
|
+
# This necessitates setting default capability to CloudCapability.COMPUTE.
|
189
|
+
def check(
|
190
|
+
quiet: bool = False,
|
191
|
+
verbose: bool = False,
|
192
|
+
clouds: Optional[Iterable[str]] = None,
|
193
|
+
capability: sky_cloud.CloudCapability = sky_cloud.CloudCapability.COMPUTE,
|
194
|
+
) -> List[str]:
|
195
|
+
clouds_with_capability = []
|
196
|
+
enabled_clouds = check_capabilities(quiet, verbose, clouds, [capability])
|
197
|
+
for cloud, capabilities in enabled_clouds.items():
|
198
|
+
if capability in capabilities:
|
199
|
+
clouds_with_capability.append(cloud)
|
200
|
+
return clouds_with_capability
|
201
|
+
|
202
|
+
|
172
203
|
def get_cached_enabled_clouds_or_refresh(
|
204
|
+
capability: sky_cloud.CloudCapability,
|
173
205
|
raise_if_no_cloud_access: bool = False) -> List[sky_clouds.Cloud]:
|
174
206
|
"""Returns cached enabled clouds and if no cloud is enabled, refresh.
|
175
207
|
|
@@ -183,16 +215,18 @@ def get_cached_enabled_clouds_or_refresh(
|
|
183
215
|
exceptions.NoCloudAccessError: if no public cloud is enabled and
|
184
216
|
raise_if_no_cloud_access is set to True.
|
185
217
|
"""
|
186
|
-
cached_enabled_clouds = global_user_state.get_cached_enabled_clouds(
|
218
|
+
cached_enabled_clouds = global_user_state.get_cached_enabled_clouds(
|
219
|
+
capability)
|
187
220
|
if not cached_enabled_clouds:
|
188
221
|
try:
|
189
|
-
check(quiet=True)
|
222
|
+
check(quiet=True, capability=capability)
|
190
223
|
except SystemExit:
|
191
224
|
# If no cloud is enabled, check() will raise SystemExit.
|
192
225
|
# Here we catch it and raise the exception later only if
|
193
226
|
# raise_if_no_cloud_access is set to True.
|
194
227
|
pass
|
195
|
-
cached_enabled_clouds = global_user_state.get_cached_enabled_clouds(
|
228
|
+
cached_enabled_clouds = global_user_state.get_cached_enabled_clouds(
|
229
|
+
capability)
|
196
230
|
if raise_if_no_cloud_access and not cached_enabled_clouds:
|
197
231
|
with ux_utils.print_exception_no_traceback():
|
198
232
|
raise exceptions.NoCloudAccessError(
|
@@ -226,7 +260,7 @@ def get_cloud_credential_file_mounts(
|
|
226
260
|
# Currently, get_cached_enabled_clouds_or_refresh() does not support r2 as
|
227
261
|
# only clouds with computing instances are marked as enabled by skypilot.
|
228
262
|
# This will be removed when cloudflare/r2 is added as a 'cloud'.
|
229
|
-
r2_is_enabled, _ = cloudflare.
|
263
|
+
r2_is_enabled, _ = cloudflare.check_storage_credentials()
|
230
264
|
if r2_is_enabled:
|
231
265
|
r2_credential_mounts = cloudflare.get_credential_file_mounts()
|
232
266
|
file_mounts.update(r2_credential_mounts)
|
sky/cloud_stores.py
CHANGED
@@ -19,6 +19,7 @@ from sky.adaptors import aws
|
|
19
19
|
from sky.adaptors import azure
|
20
20
|
from sky.adaptors import cloudflare
|
21
21
|
from sky.adaptors import ibm
|
22
|
+
from sky.adaptors import nebius
|
22
23
|
from sky.adaptors import oci
|
23
24
|
from sky.clouds import gcp
|
24
25
|
from sky.data import data_utils
|
@@ -543,6 +544,70 @@ class OciCloudStorage(CloudStorage):
|
|
543
544
|
return download_via_ocicli
|
544
545
|
|
545
546
|
|
547
|
+
class NebiusCloudStorage(CloudStorage):
|
548
|
+
"""Nebius Cloud Storage."""
|
549
|
+
|
550
|
+
# List of commands to install AWS CLI
|
551
|
+
_GET_AWSCLI = [
|
552
|
+
'aws --version >/dev/null 2>&1 || '
|
553
|
+
f'{constants.SKY_UV_PIP_CMD} install awscli',
|
554
|
+
]
|
555
|
+
|
556
|
+
def is_directory(self, url: str) -> bool:
|
557
|
+
"""Returns whether nebius 'url' is a directory.
|
558
|
+
|
559
|
+
In cloud object stores, a "directory" refers to a regular object whose
|
560
|
+
name is a prefix of other objects.
|
561
|
+
"""
|
562
|
+
nebius_s3 = nebius.resource('s3')
|
563
|
+
bucket_name, path = data_utils.split_nebius_path(url)
|
564
|
+
bucket = nebius_s3.Bucket(bucket_name)
|
565
|
+
|
566
|
+
num_objects = 0
|
567
|
+
for obj in bucket.objects.filter(Prefix=path):
|
568
|
+
num_objects += 1
|
569
|
+
if obj.key == path:
|
570
|
+
return False
|
571
|
+
# If there are more than 1 object in filter, then it is a directory
|
572
|
+
if num_objects == 3:
|
573
|
+
return True
|
574
|
+
|
575
|
+
# A directory with few or no items
|
576
|
+
return True
|
577
|
+
|
578
|
+
def make_sync_dir_command(self, source: str, destination: str) -> str:
|
579
|
+
"""Downloads using AWS CLI."""
|
580
|
+
# AWS Sync by default uses 10 threads to upload files to the bucket.
|
581
|
+
# To increase parallelism, modify max_concurrent_requests in your
|
582
|
+
# aws config file (Default path: ~/.aws/config).
|
583
|
+
endpoint_url = nebius.create_endpoint()
|
584
|
+
assert 'nebius://' in source, 'nebius:// is not in source'
|
585
|
+
source = source.replace('nebius://', 's3://')
|
586
|
+
download_via_awscli = (f'{constants.SKY_REMOTE_PYTHON_ENV}/bin/aws s3 '
|
587
|
+
'sync --no-follow-symlinks '
|
588
|
+
f'{source} {destination} '
|
589
|
+
f'--endpoint {endpoint_url} '
|
590
|
+
f'--profile={nebius.NEBIUS_PROFILE_NAME}')
|
591
|
+
|
592
|
+
all_commands = list(self._GET_AWSCLI)
|
593
|
+
all_commands.append(download_via_awscli)
|
594
|
+
return ' && '.join(all_commands)
|
595
|
+
|
596
|
+
def make_sync_file_command(self, source: str, destination: str) -> str:
|
597
|
+
"""Downloads a file using AWS CLI."""
|
598
|
+
endpoint_url = nebius.create_endpoint()
|
599
|
+
assert 'nebius://' in source, 'nebius:// is not in source'
|
600
|
+
source = source.replace('nebius://', 's3://')
|
601
|
+
download_via_awscli = (f'{constants.SKY_REMOTE_PYTHON_ENV}/bin/aws s3 '
|
602
|
+
f'cp {source} {destination} '
|
603
|
+
f'--endpoint {endpoint_url} '
|
604
|
+
f'--profile={nebius.NEBIUS_PROFILE_NAME}')
|
605
|
+
|
606
|
+
all_commands = list(self._GET_AWSCLI)
|
607
|
+
all_commands.append(download_via_awscli)
|
608
|
+
return ' && '.join(all_commands)
|
609
|
+
|
610
|
+
|
546
611
|
def get_storage_from_path(url: str) -> CloudStorage:
|
547
612
|
"""Returns a CloudStorage by identifying the scheme:// in a URL."""
|
548
613
|
result = urllib.parse.urlsplit(url)
|
@@ -559,6 +624,7 @@ _REGISTRY = {
|
|
559
624
|
'r2': R2CloudStorage(),
|
560
625
|
'cos': IBMCosCloudStorage(),
|
561
626
|
'oci': OciCloudStorage(),
|
627
|
+
'nebius': NebiusCloudStorage(),
|
562
628
|
# TODO: This is a hack, as Azure URL starts with https://, we should
|
563
629
|
# refactor the registry to be able to take regex, so that Azure blob can
|
564
630
|
# be identified with `https://(.*?)\.blob\.core\.windows\.net`
|
sky/clouds/aws.py
CHANGED
@@ -558,11 +558,23 @@ class AWS(clouds.Cloud):
|
|
558
558
|
return resources_utils.FeasibleResources(_make(instance_list),
|
559
559
|
fuzzy_candidate_list, None)
|
560
560
|
|
561
|
+
@classmethod
|
562
|
+
def _check_compute_credentials(cls) -> Tuple[bool, Optional[str]]:
|
563
|
+
"""Checks if the user has access credentials to this AWS's compute service."""
|
564
|
+
return cls._check_credentials()
|
565
|
+
|
566
|
+
@classmethod
|
567
|
+
def _check_storage_credentials(cls) -> Tuple[bool, Optional[str]]:
|
568
|
+
"""Checks if the user has access credentials to this AWS's storage service."""
|
569
|
+
# TODO(seungjin): Implement separate check for
|
570
|
+
# if the user has access to S3.
|
571
|
+
return cls._check_credentials()
|
572
|
+
|
561
573
|
@classmethod
|
562
574
|
@annotations.lru_cache(scope='global',
|
563
575
|
maxsize=1) # Cache since getting identity is slow.
|
564
|
-
def
|
565
|
-
"""Checks if the user has access credentials to
|
576
|
+
def _check_credentials(cls) -> Tuple[bool, Optional[str]]:
|
577
|
+
"""Checks if the user has access credentials to AWS."""
|
566
578
|
|
567
579
|
dependency_installation_hints = (
|
568
580
|
'AWS dependencies are not installed. '
|
sky/clouds/azure.py
CHANGED
@@ -512,7 +512,19 @@ class Azure(clouds.Cloud):
|
|
512
512
|
fuzzy_candidate_list, None)
|
513
513
|
|
514
514
|
@classmethod
|
515
|
-
def
|
515
|
+
def _check_compute_credentials(cls) -> Tuple[bool, Optional[str]]:
|
516
|
+
"""Checks if the user has access credentials to this cloud's compute service."""
|
517
|
+
return cls._check_credentials()
|
518
|
+
|
519
|
+
@classmethod
|
520
|
+
def _check_storage_credentials(cls) -> Tuple[bool, Optional[str]]:
|
521
|
+
"""Checks if the user has access credentials to this cloud's storage service."""
|
522
|
+
# TODO(seungjin): Implement separate check for
|
523
|
+
# if the user has access to Azure Blob Storage.
|
524
|
+
return cls._check_credentials()
|
525
|
+
|
526
|
+
@classmethod
|
527
|
+
def _check_credentials(cls) -> Tuple[bool, Optional[str]]:
|
516
528
|
"""Checks if the user has access credentials to this cloud."""
|
517
529
|
help_str = (
|
518
530
|
' Run the following commands:'
|