skypilot-nightly 1.0.0.dev20250320__py3-none-any.whl → 1.0.0.dev20250321__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +2 -2
- sky/adaptors/cloudflare.py +16 -4
- sky/adaptors/kubernetes.py +2 -1
- sky/adaptors/nebius.py +128 -6
- sky/backends/cloud_vm_ray_backend.py +3 -1
- sky/benchmark/benchmark_utils.py +3 -2
- sky/check.py +18 -87
- sky/cloud_stores.py +66 -0
- sky/clouds/aws.py +14 -7
- sky/clouds/azure.py +13 -6
- sky/clouds/cloud.py +33 -10
- sky/clouds/cudo.py +3 -2
- sky/clouds/do.py +3 -2
- sky/clouds/fluidstack.py +3 -2
- sky/clouds/gcp.py +8 -9
- sky/clouds/ibm.py +15 -6
- sky/clouds/kubernetes.py +3 -1
- sky/clouds/lambda_cloud.py +3 -1
- sky/clouds/nebius.py +7 -3
- sky/clouds/oci.py +15 -6
- sky/clouds/paperspace.py +3 -2
- sky/clouds/runpod.py +7 -1
- sky/clouds/scp.py +3 -1
- sky/clouds/service_catalog/kubernetes_catalog.py +3 -1
- sky/clouds/vast.py +3 -2
- sky/clouds/vsphere.py +3 -2
- sky/core.py +4 -2
- sky/data/data_transfer.py +75 -0
- sky/data/data_utils.py +34 -0
- sky/data/mounting_utils.py +18 -0
- sky/data/storage.py +537 -9
- sky/data/storage_utils.py +102 -84
- sky/exceptions.py +2 -0
- sky/global_user_state.py +12 -33
- sky/jobs/server/core.py +1 -1
- sky/jobs/utils.py +5 -0
- sky/optimizer.py +7 -2
- sky/resources.py +6 -1
- sky/setup_files/dependencies.py +3 -1
- sky/task.py +16 -5
- sky/utils/command_runner.py +2 -0
- sky/utils/controller_utils.py +8 -5
- sky/utils/kubernetes/kubernetes_deploy_utils.py +2 -1
- {skypilot_nightly-1.0.0.dev20250320.dist-info → skypilot_nightly-1.0.0.dev20250321.dist-info}/METADATA +11 -1
- {skypilot_nightly-1.0.0.dev20250320.dist-info → skypilot_nightly-1.0.0.dev20250321.dist-info}/RECORD +49 -49
- {skypilot_nightly-1.0.0.dev20250320.dist-info → skypilot_nightly-1.0.0.dev20250321.dist-info}/WHEEL +1 -1
- {skypilot_nightly-1.0.0.dev20250320.dist-info → skypilot_nightly-1.0.0.dev20250321.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250320.dist-info → skypilot_nightly-1.0.0.dev20250321.dist-info}/licenses/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20250320.dist-info → skypilot_nightly-1.0.0.dev20250321.dist-info}/top_level.txt +0 -0
sky/__init__.py
CHANGED
@@ -5,7 +5,7 @@ from typing import Optional
|
|
5
5
|
import urllib.request
|
6
6
|
|
7
7
|
# Replaced with the current commit when building the wheels.
|
8
|
-
_SKYPILOT_COMMIT_SHA = '
|
8
|
+
_SKYPILOT_COMMIT_SHA = 'a7f92951b96fdca825348d8291d01bd88f6f9dfe'
|
9
9
|
|
10
10
|
|
11
11
|
def _get_git_commit():
|
@@ -35,7 +35,7 @@ def _get_git_commit():
|
|
35
35
|
|
36
36
|
|
37
37
|
__commit__ = _get_git_commit()
|
38
|
-
__version__ = '1.0.0.
|
38
|
+
__version__ = '1.0.0.dev20250321'
|
39
39
|
__root_dir__ = os.path.dirname(os.path.abspath(__file__))
|
40
40
|
|
41
41
|
|
sky/adaptors/cloudflare.py
CHANGED
@@ -6,7 +6,9 @@ import os
|
|
6
6
|
import threading
|
7
7
|
from typing import Dict, Optional, Tuple
|
8
8
|
|
9
|
+
from sky import exceptions
|
9
10
|
from sky.adaptors import common
|
11
|
+
from sky.clouds import cloud
|
10
12
|
from sky.utils import annotations
|
11
13
|
from sky.utils import ux_utils
|
12
14
|
|
@@ -130,8 +132,8 @@ def client(service_name: str, region):
|
|
130
132
|
@common.load_lazy_modules(_LAZY_MODULES)
|
131
133
|
def botocore_exceptions():
|
132
134
|
"""AWS botocore exception."""
|
133
|
-
from botocore import exceptions
|
134
|
-
return
|
135
|
+
from botocore import exceptions as boto_exceptions
|
136
|
+
return boto_exceptions
|
135
137
|
|
136
138
|
|
137
139
|
def create_endpoint():
|
@@ -148,8 +150,18 @@ def create_endpoint():
|
|
148
150
|
return endpoint
|
149
151
|
|
150
152
|
|
151
|
-
def check_credentials(
|
152
|
-
|
153
|
+
def check_credentials(
|
154
|
+
cloud_capability: cloud.CloudCapability) -> Tuple[bool, Optional[str]]:
|
155
|
+
if cloud_capability == cloud.CloudCapability.COMPUTE:
|
156
|
+
# for backward compatibility,
|
157
|
+
# we check storage credentials for compute.
|
158
|
+
# TODO(seungjin): properly return not supported error for compute.
|
159
|
+
return check_storage_credentials()
|
160
|
+
elif cloud_capability == cloud.CloudCapability.STORAGE:
|
161
|
+
return check_storage_credentials()
|
162
|
+
else:
|
163
|
+
raise exceptions.NotSupportedError(
|
164
|
+
f'{NAME} does not support {cloud_capability}.')
|
153
165
|
|
154
166
|
|
155
167
|
def check_storage_credentials() -> Tuple[bool, Optional[str]]:
|
sky/adaptors/kubernetes.py
CHANGED
@@ -79,10 +79,11 @@ def _load_config(context: Optional[str] = None):
|
|
79
79
|
' If you were running a local Kubernetes '
|
80
80
|
'cluster, run `sky local up` to start the cluster.')
|
81
81
|
else:
|
82
|
+
kubeconfig_path = os.environ.get('KUBECONFIG', '~/.kube/config')
|
82
83
|
err_str = (
|
83
84
|
f'Failed to load Kubernetes configuration for {context!r}. '
|
84
85
|
'Please check if your kubeconfig file exists at '
|
85
|
-
f'
|
86
|
+
f'{kubeconfig_path} and is valid.\n{suffix}')
|
86
87
|
err_str += '\nTo disable Kubernetes for SkyPilot: run `sky check`.'
|
87
88
|
with ux_utils.print_exception_no_traceback():
|
88
89
|
raise ValueError(err_str) from None
|
sky/adaptors/nebius.py
CHANGED
@@ -1,7 +1,11 @@
|
|
1
1
|
"""Nebius cloud adaptor."""
|
2
2
|
import os
|
3
|
+
import threading
|
4
|
+
from typing import Optional
|
3
5
|
|
4
6
|
from sky.adaptors import common
|
7
|
+
from sky.utils import annotations
|
8
|
+
from sky.utils import ux_utils
|
5
9
|
|
6
10
|
NEBIUS_TENANT_ID_FILENAME = 'NEBIUS_TENANT_ID.txt'
|
7
11
|
NEBIUS_IAM_TOKEN_FILENAME = 'NEBIUS_IAM_TOKEN.txt'
|
@@ -12,6 +16,10 @@ NEBIUS_IAM_TOKEN_PATH = '~/.nebius/' + NEBIUS_IAM_TOKEN_FILENAME
|
|
12
16
|
NEBIUS_PROJECT_ID_PATH = '~/.nebius/' + NEBIUS_PROJECT_ID_FILENAME
|
13
17
|
NEBIUS_CREDENTIALS_PATH = '~/.nebius/' + NEBIUS_CREDENTIALS_FILENAME
|
14
18
|
|
19
|
+
DEFAULT_REGION = 'eu-north1'
|
20
|
+
|
21
|
+
NEBIUS_PROFILE_NAME = 'nebius'
|
22
|
+
|
15
23
|
MAX_RETRIES_TO_DISK_CREATE = 120
|
16
24
|
MAX_RETRIES_TO_INSTANCE_STOP = 120
|
17
25
|
MAX_RETRIES_TO_INSTANCE_START = 120
|
@@ -23,15 +31,27 @@ MAX_RETRIES_TO_INSTANCE_WAIT = 120 # Maximum number of retries
|
|
23
31
|
POLL_INTERVAL = 5
|
24
32
|
|
25
33
|
_iam_token = None
|
34
|
+
_sdk = None
|
26
35
|
_tenant_id = None
|
27
36
|
_project_id = None
|
28
37
|
|
38
|
+
_IMPORT_ERROR_MESSAGE = ('Failed to import dependencies for Nebius AI Cloud.'
|
39
|
+
'Try pip install "skypilot[nebius]"')
|
40
|
+
|
29
41
|
nebius = common.LazyImport(
|
30
42
|
'nebius',
|
31
|
-
import_error_message=
|
32
|
-
'Try running: pip install "skypilot[nebius]"',
|
43
|
+
import_error_message=_IMPORT_ERROR_MESSAGE,
|
33
44
|
# https://github.com/grpc/grpc/issues/37642 to avoid spam in console
|
34
45
|
set_loggers=lambda: os.environ.update({'GRPC_VERBOSITY': 'NONE'}))
|
46
|
+
boto3 = common.LazyImport('boto3', import_error_message=_IMPORT_ERROR_MESSAGE)
|
47
|
+
botocore = common.LazyImport('botocore',
|
48
|
+
import_error_message=_IMPORT_ERROR_MESSAGE)
|
49
|
+
|
50
|
+
_LAZY_MODULES = (boto3, botocore, nebius)
|
51
|
+
_session_creation_lock = threading.RLock()
|
52
|
+
_INDENT_PREFIX = ' '
|
53
|
+
NAME = 'Nebius'
|
54
|
+
SKY_CHECK_NAME = 'Nebius (for Nebius Object Storae)'
|
35
55
|
|
36
56
|
|
37
57
|
def request_error():
|
@@ -104,7 +124,109 @@ def get_tenant_id():
|
|
104
124
|
|
105
125
|
|
106
126
|
def sdk():
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
127
|
+
global _sdk
|
128
|
+
if _sdk is None:
|
129
|
+
if get_iam_token() is not None:
|
130
|
+
_sdk = nebius.sdk.SDK(credentials=get_iam_token())
|
131
|
+
return _sdk
|
132
|
+
_sdk = nebius.sdk.SDK(
|
133
|
+
credentials_file_name=os.path.expanduser(NEBIUS_CREDENTIALS_PATH))
|
134
|
+
return _sdk
|
135
|
+
|
136
|
+
|
137
|
+
def get_nebius_credentials(boto3_session):
|
138
|
+
"""Gets the Nebius credentials from the boto3 session object.
|
139
|
+
|
140
|
+
Args:
|
141
|
+
boto3_session: The boto3 session object.
|
142
|
+
Returns:
|
143
|
+
botocore.credentials.ReadOnlyCredentials object with the R2 credentials.
|
144
|
+
"""
|
145
|
+
nebius_credentials = boto3_session.get_credentials()
|
146
|
+
if nebius_credentials is None:
|
147
|
+
with ux_utils.print_exception_no_traceback():
|
148
|
+
raise ValueError('Nebius credentials not found. Run '
|
149
|
+
'`sky check` to verify credentials are '
|
150
|
+
'correctly set up.')
|
151
|
+
return nebius_credentials.get_frozen_credentials()
|
152
|
+
|
153
|
+
|
154
|
+
# lru_cache() is thread-safe and it will return the same session object
|
155
|
+
# for different threads.
|
156
|
+
# Reference: https://docs.python.org/3/library/functools.html#functools.lru_cache # pylint: disable=line-too-long
|
157
|
+
@annotations.lru_cache(scope='global')
|
158
|
+
def session():
|
159
|
+
"""Create an AWS session."""
|
160
|
+
# Creating the session object is not thread-safe for boto3,
|
161
|
+
# so we add a reentrant lock to synchronize the session creation.
|
162
|
+
# Reference: https://github.com/boto/boto3/issues/1592
|
163
|
+
# However, the session object itself is thread-safe, so we are
|
164
|
+
# able to use lru_cache() to cache the session object.
|
165
|
+
with _session_creation_lock:
|
166
|
+
session_ = boto3.session.Session(profile_name=NEBIUS_PROFILE_NAME)
|
167
|
+
return session_
|
168
|
+
|
169
|
+
|
170
|
+
@annotations.lru_cache(scope='global')
|
171
|
+
def resource(resource_name: str, region: str = DEFAULT_REGION, **kwargs):
|
172
|
+
"""Create a Nebius resource.
|
173
|
+
|
174
|
+
Args:
|
175
|
+
resource_name: Nebius resource name (e.g., 's3').
|
176
|
+
kwargs: Other options.
|
177
|
+
"""
|
178
|
+
# Need to use the resource retrieved from the per-thread session
|
179
|
+
# to avoid thread-safety issues (Directly creating the client
|
180
|
+
# with boto3.resource() is not thread-safe).
|
181
|
+
# Reference: https://stackoverflow.com/a/59635814
|
182
|
+
|
183
|
+
session_ = session()
|
184
|
+
nebius_credentials = get_nebius_credentials(session_)
|
185
|
+
endpoint = create_endpoint(region)
|
186
|
+
|
187
|
+
return session_.resource(
|
188
|
+
resource_name,
|
189
|
+
endpoint_url=endpoint,
|
190
|
+
aws_access_key_id=nebius_credentials.access_key,
|
191
|
+
aws_secret_access_key=nebius_credentials.secret_key,
|
192
|
+
region_name=region,
|
193
|
+
**kwargs)
|
194
|
+
|
195
|
+
|
196
|
+
@annotations.lru_cache(scope='global')
|
197
|
+
def client(service_name: str, region):
|
198
|
+
"""Create an Nebius client of a certain service.
|
199
|
+
|
200
|
+
Args:
|
201
|
+
service_name: Nebius service name (e.g., 's3').
|
202
|
+
kwargs: Other options.
|
203
|
+
"""
|
204
|
+
# Need to use the client retrieved from the per-thread session
|
205
|
+
# to avoid thread-safety issues (Directly creating the client
|
206
|
+
# with boto3.client() is not thread-safe).
|
207
|
+
# Reference: https://stackoverflow.com/a/59635814
|
208
|
+
|
209
|
+
session_ = session()
|
210
|
+
nebius_credentials = get_nebius_credentials(session_)
|
211
|
+
endpoint = create_endpoint(region)
|
212
|
+
|
213
|
+
return session_.client(service_name,
|
214
|
+
endpoint_url=endpoint,
|
215
|
+
aws_access_key_id=nebius_credentials.access_key,
|
216
|
+
aws_secret_access_key=nebius_credentials.secret_key,
|
217
|
+
region_name=region)
|
218
|
+
|
219
|
+
|
220
|
+
@common.load_lazy_modules(_LAZY_MODULES)
|
221
|
+
def botocore_exceptions():
|
222
|
+
"""AWS botocore exception."""
|
223
|
+
# pylint: disable=import-outside-toplevel
|
224
|
+
from botocore import exceptions
|
225
|
+
return exceptions
|
226
|
+
|
227
|
+
|
228
|
+
def create_endpoint(region: Optional[str] = DEFAULT_REGION) -> str:
|
229
|
+
"""Reads accountid necessary to interact with Nebius Object Storage"""
|
230
|
+
if region is None:
|
231
|
+
region = DEFAULT_REGION
|
232
|
+
return f'https://storage.{region}.nebius.cloud:443'
|
@@ -38,6 +38,7 @@ from sky import sky_logging
|
|
38
38
|
from sky import task as task_lib
|
39
39
|
from sky.backends import backend_utils
|
40
40
|
from sky.backends import wheel_utils
|
41
|
+
from sky.clouds import cloud as sky_cloud
|
41
42
|
from sky.clouds import service_catalog
|
42
43
|
from sky.clouds.utils import gcp_utils
|
43
44
|
from sky.data import data_utils
|
@@ -1981,7 +1982,8 @@ class RetryingVmProvisioner(object):
|
|
1981
1982
|
# is running. Here we check the enabled clouds and expiring credentials
|
1982
1983
|
# and raise a warning to the user.
|
1983
1984
|
if task.is_controller_task():
|
1984
|
-
enabled_clouds = sky_check.get_cached_enabled_clouds_or_refresh(
|
1985
|
+
enabled_clouds = sky_check.get_cached_enabled_clouds_or_refresh(
|
1986
|
+
sky_cloud.CloudCapability.COMPUTE)
|
1985
1987
|
expirable_clouds = backend_utils.get_expirable_clouds(
|
1986
1988
|
enabled_clouds)
|
1987
1989
|
|
sky/benchmark/benchmark_utils.py
CHANGED
@@ -172,8 +172,9 @@ def _create_benchmark_bucket() -> Tuple[str, str]:
|
|
172
172
|
bucket_name = f'sky-bench-{uuid.uuid4().hex[:4]}-{getpass.getuser()}'
|
173
173
|
|
174
174
|
# Select the bucket type.
|
175
|
-
enabled_clouds =
|
176
|
-
|
175
|
+
enabled_clouds = (
|
176
|
+
storage_lib.get_cached_enabled_storage_cloud_names_or_refresh(
|
177
|
+
raise_if_no_cloud_access=True))
|
177
178
|
# Sky Benchmark only supports S3 (see _download_remote_dir and
|
178
179
|
# _delete_remote_dir).
|
179
180
|
enabled_clouds = [
|
sky/check.py
CHANGED
@@ -1,5 +1,4 @@
|
|
1
1
|
"""Credential checks: check cloud credentials and enable clouds."""
|
2
|
-
import enum
|
3
2
|
import os
|
4
3
|
import traceback
|
5
4
|
from types import ModuleType
|
@@ -13,6 +12,7 @@ from sky import exceptions
|
|
13
12
|
from sky import global_user_state
|
14
13
|
from sky import skypilot_config
|
15
14
|
from sky.adaptors import cloudflare
|
15
|
+
from sky.clouds import cloud as sky_cloud
|
16
16
|
from sky.utils import registry
|
17
17
|
from sky.utils import rich_utils
|
18
18
|
from sky.utils import ux_utils
|
@@ -21,60 +21,21 @@ CHECK_MARK_EMOJI = '\U00002714' # Heavy check mark unicode
|
|
21
21
|
PARTY_POPPER_EMOJI = '\U0001F389' # Party popper unicode
|
22
22
|
|
23
23
|
|
24
|
-
# Declaring CloudCapability as a subclass of str
|
25
|
-
# allows it to be JSON serializable.
|
26
|
-
class CloudCapability(str, enum.Enum):
|
27
|
-
# Compute capability.
|
28
|
-
COMPUTE = 'compute'
|
29
|
-
# Storage capability.
|
30
|
-
STORAGE = 'storage'
|
31
|
-
|
32
|
-
|
33
|
-
ALL_CAPABILITIES = [CloudCapability.COMPUTE, CloudCapability.STORAGE]
|
34
|
-
|
35
|
-
|
36
24
|
def check_capabilities(
|
37
25
|
quiet: bool = False,
|
38
26
|
verbose: bool = False,
|
39
27
|
clouds: Optional[Iterable[str]] = None,
|
40
|
-
capabilities: Optional[List[CloudCapability]] = None,
|
41
|
-
) -> Dict[str, List[CloudCapability]]:
|
28
|
+
capabilities: Optional[List[sky_cloud.CloudCapability]] = None,
|
29
|
+
) -> Dict[str, List[sky_cloud.CloudCapability]]:
|
42
30
|
echo = (lambda *_args, **_kwargs: None
|
43
31
|
) if quiet else lambda *args, **kwargs: click.echo(
|
44
32
|
*args, **kwargs, color=True)
|
45
33
|
echo('Checking credentials to enable clouds for SkyPilot.')
|
46
34
|
if capabilities is None:
|
47
|
-
capabilities = ALL_CAPABILITIES
|
35
|
+
capabilities = sky_cloud.ALL_CAPABILITIES
|
48
36
|
assert capabilities is not None
|
49
|
-
enabled_clouds: Dict[str, List[CloudCapability]] = {}
|
50
|
-
disabled_clouds: Dict[str, List[CloudCapability]] = {}
|
51
|
-
|
52
|
-
def check_credentials(
|
53
|
-
cloud: Union[sky_clouds.Cloud, ModuleType],
|
54
|
-
capability: CloudCapability) -> Tuple[bool, Optional[str]]:
|
55
|
-
if capability == CloudCapability.COMPUTE:
|
56
|
-
return cloud.check_credentials()
|
57
|
-
elif capability == CloudCapability.STORAGE:
|
58
|
-
return cloud.check_storage_credentials()
|
59
|
-
else:
|
60
|
-
raise ValueError(f'Invalid capability: {capability}')
|
61
|
-
|
62
|
-
def get_cached_state(capability: CloudCapability) -> List[sky_clouds.Cloud]:
|
63
|
-
if capability == CloudCapability.COMPUTE:
|
64
|
-
return global_user_state.get_cached_enabled_clouds()
|
65
|
-
elif capability == CloudCapability.STORAGE:
|
66
|
-
return global_user_state.get_cached_enabled_storage_clouds()
|
67
|
-
else:
|
68
|
-
raise ValueError(f'Invalid capability: {capability}')
|
69
|
-
|
70
|
-
def set_cached_state(clouds: List[str],
|
71
|
-
capability: CloudCapability) -> None:
|
72
|
-
if capability == CloudCapability.COMPUTE:
|
73
|
-
global_user_state.set_enabled_clouds(clouds)
|
74
|
-
elif capability == CloudCapability.STORAGE:
|
75
|
-
global_user_state.set_enabled_storage_clouds(clouds)
|
76
|
-
else:
|
77
|
-
raise ValueError(f'Invalid capability: {capability}')
|
37
|
+
enabled_clouds: Dict[str, List[sky_cloud.CloudCapability]] = {}
|
38
|
+
disabled_clouds: Dict[str, List[sky_cloud.CloudCapability]] = {}
|
78
39
|
|
79
40
|
def check_one_cloud(
|
80
41
|
cloud_tuple: Tuple[str, Union[sky_clouds.Cloud,
|
@@ -84,7 +45,7 @@ def check_capabilities(
|
|
84
45
|
for capability in capabilities:
|
85
46
|
with rich_utils.safe_status(f'Checking {cloud_repr}...'):
|
86
47
|
try:
|
87
|
-
ok, reason = check_credentials(
|
48
|
+
ok, reason = cloud.check_credentials(capability)
|
88
49
|
except exceptions.NotSupportedError:
|
89
50
|
continue
|
90
51
|
except Exception: # pylint: disable=broad-except
|
@@ -170,12 +131,14 @@ def check_capabilities(
|
|
170
131
|
if not cloud.startswith('Cloudflare')
|
171
132
|
}
|
172
133
|
previously_enabled_clouds_set = {
|
173
|
-
repr(cloud)
|
134
|
+
repr(cloud)
|
135
|
+
for cloud in global_user_state.get_cached_enabled_clouds(capability)
|
174
136
|
}
|
175
137
|
enabled_clouds_for_capability = (config_allowed_clouds_set & (
|
176
138
|
(previously_enabled_clouds_set | enabled_clouds_set) -
|
177
139
|
disabled_clouds_set))
|
178
|
-
|
140
|
+
global_user_state.set_enabled_clouds(
|
141
|
+
list(enabled_clouds_for_capability), capability)
|
179
142
|
all_enabled_clouds = all_enabled_clouds.union(
|
180
143
|
enabled_clouds_for_capability)
|
181
144
|
disallowed_clouds_hint = None
|
@@ -227,7 +190,7 @@ def check(
|
|
227
190
|
quiet: bool = False,
|
228
191
|
verbose: bool = False,
|
229
192
|
clouds: Optional[Iterable[str]] = None,
|
230
|
-
capability: CloudCapability = CloudCapability.COMPUTE,
|
193
|
+
capability: sky_cloud.CloudCapability = sky_cloud.CloudCapability.COMPUTE,
|
231
194
|
) -> List[str]:
|
232
195
|
clouds_with_capability = []
|
233
196
|
enabled_clouds = check_capabilities(quiet, verbose, clouds, [capability])
|
@@ -238,6 +201,7 @@ def check(
|
|
238
201
|
|
239
202
|
|
240
203
|
def get_cached_enabled_clouds_or_refresh(
|
204
|
+
capability: sky_cloud.CloudCapability,
|
241
205
|
raise_if_no_cloud_access: bool = False) -> List[sky_clouds.Cloud]:
|
242
206
|
"""Returns cached enabled clouds and if no cloud is enabled, refresh.
|
243
207
|
|
@@ -251,16 +215,18 @@ def get_cached_enabled_clouds_or_refresh(
|
|
251
215
|
exceptions.NoCloudAccessError: if no public cloud is enabled and
|
252
216
|
raise_if_no_cloud_access is set to True.
|
253
217
|
"""
|
254
|
-
cached_enabled_clouds = global_user_state.get_cached_enabled_clouds(
|
218
|
+
cached_enabled_clouds = global_user_state.get_cached_enabled_clouds(
|
219
|
+
capability)
|
255
220
|
if not cached_enabled_clouds:
|
256
221
|
try:
|
257
|
-
check(quiet=True, capability=
|
222
|
+
check(quiet=True, capability=capability)
|
258
223
|
except SystemExit:
|
259
224
|
# If no cloud is enabled, check() will raise SystemExit.
|
260
225
|
# Here we catch it and raise the exception later only if
|
261
226
|
# raise_if_no_cloud_access is set to True.
|
262
227
|
pass
|
263
|
-
cached_enabled_clouds = global_user_state.get_cached_enabled_clouds(
|
228
|
+
cached_enabled_clouds = global_user_state.get_cached_enabled_clouds(
|
229
|
+
capability)
|
264
230
|
if raise_if_no_cloud_access and not cached_enabled_clouds:
|
265
231
|
with ux_utils.print_exception_no_traceback():
|
266
232
|
raise exceptions.NoCloudAccessError(
|
@@ -269,41 +235,6 @@ def get_cached_enabled_clouds_or_refresh(
|
|
269
235
|
return cached_enabled_clouds
|
270
236
|
|
271
237
|
|
272
|
-
def get_cached_enabled_storage_clouds_or_refresh(
|
273
|
-
raise_if_no_cloud_access: bool = False) -> List[sky_clouds.Cloud]:
|
274
|
-
"""Returns cached enabled storage clouds and if no cloud is enabled,
|
275
|
-
refresh.
|
276
|
-
|
277
|
-
This function will perform a refresh if no public cloud is enabled.
|
278
|
-
|
279
|
-
Args:
|
280
|
-
raise_if_no_cloud_access: if True, raise an exception if no public
|
281
|
-
cloud is enabled.
|
282
|
-
|
283
|
-
Raises:
|
284
|
-
exceptions.NoCloudAccessError: if no public cloud is enabled and
|
285
|
-
raise_if_no_cloud_access is set to True.
|
286
|
-
"""
|
287
|
-
cached_enabled_storage_clouds = (
|
288
|
-
global_user_state.get_cached_enabled_storage_clouds())
|
289
|
-
if not cached_enabled_storage_clouds:
|
290
|
-
try:
|
291
|
-
check(quiet=True, capability=CloudCapability.STORAGE)
|
292
|
-
except SystemExit:
|
293
|
-
# If no cloud is enabled, check() will raise SystemExit.
|
294
|
-
# Here we catch it and raise the exception later only if
|
295
|
-
# raise_if_no_cloud_access is set to True.
|
296
|
-
pass
|
297
|
-
cached_enabled_storage_clouds = (
|
298
|
-
global_user_state.get_cached_enabled_storage_clouds())
|
299
|
-
if raise_if_no_cloud_access and not cached_enabled_storage_clouds:
|
300
|
-
with ux_utils.print_exception_no_traceback():
|
301
|
-
raise exceptions.NoCloudAccessError(
|
302
|
-
'Cloud access is not set up. Run: '
|
303
|
-
f'{colorama.Style.BRIGHT}sky check{colorama.Style.RESET_ALL}')
|
304
|
-
return cached_enabled_storage_clouds
|
305
|
-
|
306
|
-
|
307
238
|
def get_cloud_credential_file_mounts(
|
308
239
|
excluded_clouds: Optional[Iterable[sky_clouds.Cloud]]
|
309
240
|
) -> Dict[str, str]:
|
sky/cloud_stores.py
CHANGED
@@ -19,6 +19,7 @@ from sky.adaptors import aws
|
|
19
19
|
from sky.adaptors import azure
|
20
20
|
from sky.adaptors import cloudflare
|
21
21
|
from sky.adaptors import ibm
|
22
|
+
from sky.adaptors import nebius
|
22
23
|
from sky.adaptors import oci
|
23
24
|
from sky.clouds import gcp
|
24
25
|
from sky.data import data_utils
|
@@ -543,6 +544,70 @@ class OciCloudStorage(CloudStorage):
|
|
543
544
|
return download_via_ocicli
|
544
545
|
|
545
546
|
|
547
|
+
class NebiusCloudStorage(CloudStorage):
|
548
|
+
"""Nebius Cloud Storage."""
|
549
|
+
|
550
|
+
# List of commands to install AWS CLI
|
551
|
+
_GET_AWSCLI = [
|
552
|
+
'aws --version >/dev/null 2>&1 || '
|
553
|
+
f'{constants.SKY_UV_PIP_CMD} install awscli',
|
554
|
+
]
|
555
|
+
|
556
|
+
def is_directory(self, url: str) -> bool:
|
557
|
+
"""Returns whether nebius 'url' is a directory.
|
558
|
+
|
559
|
+
In cloud object stores, a "directory" refers to a regular object whose
|
560
|
+
name is a prefix of other objects.
|
561
|
+
"""
|
562
|
+
nebius_s3 = nebius.resource('s3')
|
563
|
+
bucket_name, path = data_utils.split_nebius_path(url)
|
564
|
+
bucket = nebius_s3.Bucket(bucket_name)
|
565
|
+
|
566
|
+
num_objects = 0
|
567
|
+
for obj in bucket.objects.filter(Prefix=path):
|
568
|
+
num_objects += 1
|
569
|
+
if obj.key == path:
|
570
|
+
return False
|
571
|
+
# If there are more than 1 object in filter, then it is a directory
|
572
|
+
if num_objects == 3:
|
573
|
+
return True
|
574
|
+
|
575
|
+
# A directory with few or no items
|
576
|
+
return True
|
577
|
+
|
578
|
+
def make_sync_dir_command(self, source: str, destination: str) -> str:
|
579
|
+
"""Downloads using AWS CLI."""
|
580
|
+
# AWS Sync by default uses 10 threads to upload files to the bucket.
|
581
|
+
# To increase parallelism, modify max_concurrent_requests in your
|
582
|
+
# aws config file (Default path: ~/.aws/config).
|
583
|
+
endpoint_url = nebius.create_endpoint()
|
584
|
+
assert 'nebius://' in source, 'nebius:// is not in source'
|
585
|
+
source = source.replace('nebius://', 's3://')
|
586
|
+
download_via_awscli = (f'{constants.SKY_REMOTE_PYTHON_ENV}/bin/aws s3 '
|
587
|
+
'sync --no-follow-symlinks '
|
588
|
+
f'{source} {destination} '
|
589
|
+
f'--endpoint {endpoint_url} '
|
590
|
+
f'--profile={nebius.NEBIUS_PROFILE_NAME}')
|
591
|
+
|
592
|
+
all_commands = list(self._GET_AWSCLI)
|
593
|
+
all_commands.append(download_via_awscli)
|
594
|
+
return ' && '.join(all_commands)
|
595
|
+
|
596
|
+
def make_sync_file_command(self, source: str, destination: str) -> str:
|
597
|
+
"""Downloads a file using AWS CLI."""
|
598
|
+
endpoint_url = nebius.create_endpoint()
|
599
|
+
assert 'nebius://' in source, 'nebius:// is not in source'
|
600
|
+
source = source.replace('nebius://', 's3://')
|
601
|
+
download_via_awscli = (f'{constants.SKY_REMOTE_PYTHON_ENV}/bin/aws s3 '
|
602
|
+
f'cp {source} {destination} '
|
603
|
+
f'--endpoint {endpoint_url} '
|
604
|
+
f'--profile={nebius.NEBIUS_PROFILE_NAME}')
|
605
|
+
|
606
|
+
all_commands = list(self._GET_AWSCLI)
|
607
|
+
all_commands.append(download_via_awscli)
|
608
|
+
return ' && '.join(all_commands)
|
609
|
+
|
610
|
+
|
546
611
|
def get_storage_from_path(url: str) -> CloudStorage:
|
547
612
|
"""Returns a CloudStorage by identifying the scheme:// in a URL."""
|
548
613
|
result = urllib.parse.urlsplit(url)
|
@@ -559,6 +624,7 @@ _REGISTRY = {
|
|
559
624
|
'r2': R2CloudStorage(),
|
560
625
|
'cos': IBMCosCloudStorage(),
|
561
626
|
'oci': OciCloudStorage(),
|
627
|
+
'nebius': NebiusCloudStorage(),
|
562
628
|
# TODO: This is a hack, as Azure URL starts with https://, we should
|
563
629
|
# refactor the registry to be able to take regex, so that Azure blob can
|
564
630
|
# be identified with `https://(.*?)\.blob\.core\.windows\.net`
|
sky/clouds/aws.py
CHANGED
@@ -558,11 +558,23 @@ class AWS(clouds.Cloud):
|
|
558
558
|
return resources_utils.FeasibleResources(_make(instance_list),
|
559
559
|
fuzzy_candidate_list, None)
|
560
560
|
|
561
|
+
@classmethod
|
562
|
+
def _check_compute_credentials(cls) -> Tuple[bool, Optional[str]]:
|
563
|
+
"""Checks if the user has access credentials to this AWS's compute service."""
|
564
|
+
return cls._check_credentials()
|
565
|
+
|
566
|
+
@classmethod
|
567
|
+
def _check_storage_credentials(cls) -> Tuple[bool, Optional[str]]:
|
568
|
+
"""Checks if the user has access credentials to this AWS's storage service."""
|
569
|
+
# TODO(seungjin): Implement separate check for
|
570
|
+
# if the user has access to S3.
|
571
|
+
return cls._check_credentials()
|
572
|
+
|
561
573
|
@classmethod
|
562
574
|
@annotations.lru_cache(scope='global',
|
563
575
|
maxsize=1) # Cache since getting identity is slow.
|
564
|
-
def
|
565
|
-
"""Checks if the user has access credentials to
|
576
|
+
def _check_credentials(cls) -> Tuple[bool, Optional[str]]:
|
577
|
+
"""Checks if the user has access credentials to AWS."""
|
566
578
|
|
567
579
|
dependency_installation_hints = (
|
568
580
|
'AWS dependencies are not installed. '
|
@@ -666,11 +678,6 @@ class AWS(clouds.Cloud):
|
|
666
678
|
f'{common_utils.format_exception(e, use_bracket=True)}')
|
667
679
|
return True, hints
|
668
680
|
|
669
|
-
@classmethod
|
670
|
-
def check_storage_credentials(cls) -> Tuple[bool, Optional[str]]:
|
671
|
-
# TODO(seungjin): Check if the user has access to S3.
|
672
|
-
return cls.check_credentials()
|
673
|
-
|
674
681
|
@classmethod
|
675
682
|
def _current_identity_type(cls) -> Optional[AWSIdentityType]:
|
676
683
|
stdout = cls._aws_configure_list()
|
sky/clouds/azure.py
CHANGED
@@ -512,7 +512,19 @@ class Azure(clouds.Cloud):
|
|
512
512
|
fuzzy_candidate_list, None)
|
513
513
|
|
514
514
|
@classmethod
|
515
|
-
def
|
515
|
+
def _check_compute_credentials(cls) -> Tuple[bool, Optional[str]]:
|
516
|
+
"""Checks if the user has access credentials to this cloud's compute service."""
|
517
|
+
return cls._check_credentials()
|
518
|
+
|
519
|
+
@classmethod
|
520
|
+
def _check_storage_credentials(cls) -> Tuple[bool, Optional[str]]:
|
521
|
+
"""Checks if the user has access credentials to this cloud's storage service."""
|
522
|
+
# TODO(seungjin): Implement separate check for
|
523
|
+
# if the user has access to Azure Blob Storage.
|
524
|
+
return cls._check_credentials()
|
525
|
+
|
526
|
+
@classmethod
|
527
|
+
def _check_credentials(cls) -> Tuple[bool, Optional[str]]:
|
516
528
|
"""Checks if the user has access credentials to this cloud."""
|
517
529
|
help_str = (
|
518
530
|
' Run the following commands:'
|
@@ -574,11 +586,6 @@ class Azure(clouds.Cloud):
|
|
574
586
|
return service_catalog.instance_type_exists(instance_type,
|
575
587
|
clouds='azure')
|
576
588
|
|
577
|
-
@classmethod
|
578
|
-
def check_storage_credentials(cls) -> Tuple[bool, Optional[str]]:
|
579
|
-
# TODO(seungjin): Check if the user has access to Azure Blob Storage.
|
580
|
-
return cls.check_credentials()
|
581
|
-
|
582
589
|
@classmethod
|
583
590
|
@annotations.lru_cache(scope='global',
|
584
591
|
maxsize=1) # Cache since getting identity is slow.
|