skypilot-nightly 1.0.0.dev20250319__py3-none-any.whl → 1.0.0.dev20250321__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +2 -2
- sky/adaptors/cloudflare.py +19 -3
- sky/adaptors/kubernetes.py +2 -1
- sky/adaptors/nebius.py +128 -6
- sky/backends/cloud_vm_ray_backend.py +3 -1
- sky/benchmark/benchmark_utils.py +3 -2
- sky/check.py +89 -55
- sky/cloud_stores.py +66 -0
- sky/clouds/aws.py +14 -2
- sky/clouds/azure.py +13 -1
- sky/clouds/cloud.py +37 -2
- sky/clouds/cudo.py +3 -2
- sky/clouds/do.py +3 -2
- sky/clouds/fluidstack.py +3 -2
- sky/clouds/gcp.py +55 -34
- sky/clouds/ibm.py +15 -1
- sky/clouds/kubernetes.py +3 -1
- sky/clouds/lambda_cloud.py +3 -1
- sky/clouds/nebius.py +7 -3
- sky/clouds/oci.py +15 -1
- sky/clouds/paperspace.py +3 -2
- sky/clouds/runpod.py +7 -1
- sky/clouds/scp.py +3 -1
- sky/clouds/service_catalog/kubernetes_catalog.py +3 -1
- sky/clouds/utils/gcp_utils.py +11 -1
- sky/clouds/vast.py +3 -2
- sky/clouds/vsphere.py +3 -2
- sky/core.py +6 -2
- sky/data/data_transfer.py +75 -0
- sky/data/data_utils.py +34 -0
- sky/data/mounting_utils.py +18 -0
- sky/data/storage.py +542 -16
- sky/data/storage_utils.py +102 -84
- sky/exceptions.py +2 -0
- sky/global_user_state.py +15 -6
- sky/jobs/server/core.py +1 -1
- sky/jobs/utils.py +5 -0
- sky/optimizer.py +8 -2
- sky/provision/gcp/config.py +3 -3
- sky/provision/gcp/constants.py +16 -2
- sky/provision/gcp/instance.py +4 -1
- sky/provision/kubernetes/utils.py +26 -21
- sky/resources.py +6 -1
- sky/serve/replica_managers.py +10 -1
- sky/setup_files/dependencies.py +3 -1
- sky/task.py +16 -5
- sky/utils/command_runner.py +2 -0
- sky/utils/controller_utils.py +13 -4
- sky/utils/kubernetes/kubernetes_deploy_utils.py +4 -1
- {skypilot_nightly-1.0.0.dev20250319.dist-info → skypilot_nightly-1.0.0.dev20250321.dist-info}/METADATA +13 -2
- {skypilot_nightly-1.0.0.dev20250319.dist-info → skypilot_nightly-1.0.0.dev20250321.dist-info}/RECORD +55 -55
- {skypilot_nightly-1.0.0.dev20250319.dist-info → skypilot_nightly-1.0.0.dev20250321.dist-info}/WHEEL +1 -1
- {skypilot_nightly-1.0.0.dev20250319.dist-info → skypilot_nightly-1.0.0.dev20250321.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250319.dist-info → skypilot_nightly-1.0.0.dev20250321.dist-info/licenses}/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20250319.dist-info → skypilot_nightly-1.0.0.dev20250321.dist-info}/top_level.txt +0 -0
sky/data/storage.py
CHANGED
@@ -23,7 +23,9 @@ from sky.adaptors import azure
|
|
23
23
|
from sky.adaptors import cloudflare
|
24
24
|
from sky.adaptors import gcp
|
25
25
|
from sky.adaptors import ibm
|
26
|
+
from sky.adaptors import nebius
|
26
27
|
from sky.adaptors import oci
|
28
|
+
from sky.clouds import cloud as sky_cloud
|
27
29
|
from sky.data import data_transfer
|
28
30
|
from sky.data import data_utils
|
29
31
|
from sky.data import mounting_utils
|
@@ -57,6 +59,7 @@ STORE_ENABLED_CLOUDS: List[str] = [
|
|
57
59
|
str(clouds.Azure()),
|
58
60
|
str(clouds.IBM()),
|
59
61
|
str(clouds.OCI()),
|
62
|
+
str(clouds.Nebius()),
|
60
63
|
cloudflare.NAME,
|
61
64
|
]
|
62
65
|
|
@@ -78,34 +81,34 @@ _BUCKET_EXTERNALLY_DELETED_DEBUG_MESSAGE = (
|
|
78
81
|
_STORAGE_LOG_FILE_NAME = 'storage_sync.log'
|
79
82
|
|
80
83
|
|
81
|
-
def
|
84
|
+
def get_cached_enabled_storage_cloud_names_or_refresh(
|
82
85
|
raise_if_no_cloud_access: bool = False) -> List[str]:
|
83
86
|
# This is a temporary solution until https://github.com/skypilot-org/skypilot/issues/1943 # pylint: disable=line-too-long
|
84
87
|
# is resolved by implementing separate 'enabled_storage_clouds'
|
85
|
-
enabled_clouds = sky_check.get_cached_enabled_clouds_or_refresh(
|
88
|
+
enabled_clouds = sky_check.get_cached_enabled_clouds_or_refresh(
|
89
|
+
sky_cloud.CloudCapability.STORAGE)
|
86
90
|
enabled_clouds = [str(cloud) for cloud in enabled_clouds]
|
87
91
|
|
88
|
-
|
89
|
-
cloud for cloud in enabled_clouds if cloud in STORE_ENABLED_CLOUDS
|
90
|
-
]
|
91
|
-
r2_is_enabled, _ = cloudflare.check_credentials()
|
92
|
+
r2_is_enabled, _ = cloudflare.check_storage_credentials()
|
92
93
|
if r2_is_enabled:
|
93
|
-
|
94
|
-
if raise_if_no_cloud_access and not
|
94
|
+
enabled_clouds.append(cloudflare.NAME)
|
95
|
+
if raise_if_no_cloud_access and not enabled_clouds:
|
95
96
|
raise exceptions.NoCloudAccessError(
|
96
97
|
'No cloud access available for storage. '
|
97
98
|
'Please check your cloud credentials.')
|
98
|
-
return
|
99
|
+
return enabled_clouds
|
99
100
|
|
100
101
|
|
101
102
|
def _is_storage_cloud_enabled(cloud_name: str,
|
102
103
|
try_fix_with_sky_check: bool = True) -> bool:
|
103
|
-
|
104
|
-
|
104
|
+
enabled_storage_cloud_names = (
|
105
|
+
get_cached_enabled_storage_cloud_names_or_refresh())
|
106
|
+
if cloud_name in enabled_storage_cloud_names:
|
105
107
|
return True
|
106
108
|
if try_fix_with_sky_check:
|
107
109
|
# TODO(zhwu): Only check the specified cloud to speed up.
|
108
|
-
sky_check.check(quiet=True
|
110
|
+
sky_check.check(quiet=True,
|
111
|
+
capability=sky_cloud.CloudCapability.STORAGE)
|
109
112
|
return _is_storage_cloud_enabled(cloud_name,
|
110
113
|
try_fix_with_sky_check=False)
|
111
114
|
return False
|
@@ -119,6 +122,7 @@ class StoreType(enum.Enum):
|
|
119
122
|
R2 = 'R2'
|
120
123
|
IBM = 'IBM'
|
121
124
|
OCI = 'OCI'
|
125
|
+
NEBIUS = 'NEBIUS'
|
122
126
|
|
123
127
|
@classmethod
|
124
128
|
def from_cloud(cls, cloud: str) -> 'StoreType':
|
@@ -134,6 +138,8 @@ class StoreType(enum.Enum):
|
|
134
138
|
return StoreType.AZURE
|
135
139
|
elif cloud.lower() == str(clouds.OCI()).lower():
|
136
140
|
return StoreType.OCI
|
141
|
+
elif cloud.lower() == str(clouds.Nebius()).lower():
|
142
|
+
return StoreType.NEBIUS
|
137
143
|
elif cloud.lower() == str(clouds.Lambda()).lower():
|
138
144
|
with ux_utils.print_exception_no_traceback():
|
139
145
|
raise ValueError('Lambda Cloud does not provide cloud storage.')
|
@@ -173,6 +179,8 @@ class StoreType(enum.Enum):
|
|
173
179
|
return StoreType.IBM
|
174
180
|
elif isinstance(store, OciStore):
|
175
181
|
return StoreType.OCI
|
182
|
+
elif isinstance(store, NebiusStore):
|
183
|
+
return StoreType.NEBIUS
|
176
184
|
else:
|
177
185
|
with ux_utils.print_exception_no_traceback():
|
178
186
|
raise ValueError(f'Unknown store type: {store}')
|
@@ -191,6 +199,9 @@ class StoreType(enum.Enum):
|
|
191
199
|
return 'cos://'
|
192
200
|
elif self == StoreType.OCI:
|
193
201
|
return 'oci://'
|
202
|
+
# Nebius storages use 's3://' as a prefix for various aws cli commands
|
203
|
+
elif self == StoreType.NEBIUS:
|
204
|
+
return 's3://'
|
194
205
|
else:
|
195
206
|
with ux_utils.print_exception_no_traceback():
|
196
207
|
raise ValueError(f'Unknown store type: {self}')
|
@@ -728,6 +739,8 @@ class Storage(object):
|
|
728
739
|
self.add_store(StoreType.IBM)
|
729
740
|
elif self.source.startswith('oci://'):
|
730
741
|
self.add_store(StoreType.OCI)
|
742
|
+
elif self.source.startswith('nebius://'):
|
743
|
+
self.add_store(StoreType.NEBIUS)
|
731
744
|
|
732
745
|
def get_bucket_sub_path_prefix(self, blob_path: str) -> str:
|
733
746
|
"""Adds the bucket sub path prefix to the blob path."""
|
@@ -814,7 +827,9 @@ class Storage(object):
|
|
814
827
|
'using a bucket by writing <destination_path>: '
|
815
828
|
f'{source} in the file_mounts section of your YAML')
|
816
829
|
is_local_source = True
|
817
|
-
elif split_path.scheme in [
|
830
|
+
elif split_path.scheme in [
|
831
|
+
's3', 'gs', 'https', 'r2', 'cos', 'oci', 'nebius'
|
832
|
+
]:
|
818
833
|
is_local_source = False
|
819
834
|
# Storage mounting does not support mounting specific files from
|
820
835
|
# cloud store - ensure path points to only a directory
|
@@ -838,7 +853,7 @@ class Storage(object):
|
|
838
853
|
with ux_utils.print_exception_no_traceback():
|
839
854
|
raise exceptions.StorageSourceError(
|
840
855
|
f'Supported paths: local, s3://, gs://, https://, '
|
841
|
-
f'r2://, cos://, oci://. Got: {source}')
|
856
|
+
f'r2://, cos://, oci://, nebius://. Got: {source}')
|
842
857
|
return source, is_local_source
|
843
858
|
|
844
859
|
def _validate_storage_spec(self, name: Optional[str]) -> None:
|
@@ -853,7 +868,7 @@ class Storage(object):
|
|
853
868
|
"""
|
854
869
|
prefix = name.split('://')[0]
|
855
870
|
prefix = prefix.lower()
|
856
|
-
if prefix in ['s3', 'gs', 'https', 'r2', 'cos', 'oci']:
|
871
|
+
if prefix in ['s3', 'gs', 'https', 'r2', 'cos', 'oci', 'nebius']:
|
857
872
|
with ux_utils.print_exception_no_traceback():
|
858
873
|
raise exceptions.StorageNameError(
|
859
874
|
'Prefix detected: `name` cannot start with '
|
@@ -979,6 +994,12 @@ class Storage(object):
|
|
979
994
|
source=self.source,
|
980
995
|
sync_on_reconstruction=self.sync_on_reconstruction,
|
981
996
|
_bucket_sub_path=self._bucket_sub_path)
|
997
|
+
elif s_type == StoreType.NEBIUS:
|
998
|
+
store = NebiusStore.from_metadata(
|
999
|
+
s_metadata,
|
1000
|
+
source=self.source,
|
1001
|
+
sync_on_reconstruction=self.sync_on_reconstruction,
|
1002
|
+
_bucket_sub_path=self._bucket_sub_path)
|
982
1003
|
else:
|
983
1004
|
with ux_utils.print_exception_no_traceback():
|
984
1005
|
raise ValueError(f'Unknown store type: {s_type}')
|
@@ -1073,6 +1094,8 @@ class Storage(object):
|
|
1073
1094
|
store_cls = IBMCosStore
|
1074
1095
|
elif store_type == StoreType.OCI:
|
1075
1096
|
store_cls = OciStore
|
1097
|
+
elif store_type == StoreType.NEBIUS:
|
1098
|
+
store_cls = NebiusStore
|
1076
1099
|
else:
|
1077
1100
|
with ux_utils.print_exception_no_traceback():
|
1078
1101
|
raise exceptions.StorageSpecError(
|
@@ -1106,7 +1129,7 @@ class Storage(object):
|
|
1106
1129
|
f'name {self.name}. General initialization error.')
|
1107
1130
|
raise
|
1108
1131
|
except exceptions.StorageSpecError:
|
1109
|
-
logger.error(f'Could not mount externally created {store_type}'
|
1132
|
+
logger.error(f'Could not mount externally created {store_type} '
|
1110
1133
|
f'store with name {self.name!r}.')
|
1111
1134
|
raise
|
1112
1135
|
|
@@ -1361,6 +1384,15 @@ class S3Store(AbstractStore):
|
|
1361
1384
|
assert data_utils.verify_r2_bucket(self.name), (
|
1362
1385
|
f'Source specified as {self.source}, a R2 bucket. ',
|
1363
1386
|
'R2 Bucket should exist.')
|
1387
|
+
elif self.source.startswith('nebius://'):
|
1388
|
+
assert self.name == data_utils.split_nebius_path(
|
1389
|
+
self.source)[0], (
|
1390
|
+
'Nebius Object Storage is specified as path, the name '
|
1391
|
+
'should be the same as Nebius Object Storage bucket.')
|
1392
|
+
assert data_utils.verify_nebius_bucket(self.name), (
|
1393
|
+
f'Source specified as {self.source}, a Nebius Object '
|
1394
|
+
f'Storage bucket. Nebius Object Storage Bucket should'
|
1395
|
+
f' exist.')
|
1364
1396
|
elif self.source.startswith('cos://'):
|
1365
1397
|
assert self.name == data_utils.split_cos_path(self.source)[0], (
|
1366
1398
|
'COS Bucket is specified as path, the name should be '
|
@@ -1484,6 +1516,8 @@ class S3Store(AbstractStore):
|
|
1484
1516
|
self._transfer_to_s3()
|
1485
1517
|
elif self.source.startswith('oci://'):
|
1486
1518
|
self._transfer_to_s3()
|
1519
|
+
elif self.source.startswith('nebius://'):
|
1520
|
+
self._transfer_to_s3()
|
1487
1521
|
else:
|
1488
1522
|
self.batch_aws_rsync([self.source])
|
1489
1523
|
except exceptions.StorageUploadError:
|
@@ -1848,6 +1882,15 @@ class GcsStore(AbstractStore):
|
|
1848
1882
|
assert data_utils.verify_r2_bucket(self.name), (
|
1849
1883
|
f'Source specified as {self.source}, a R2 bucket. ',
|
1850
1884
|
'R2 Bucket should exist.')
|
1885
|
+
elif self.source.startswith('nebius://'):
|
1886
|
+
assert self.name == data_utils.split_nebius_path(
|
1887
|
+
self.source)[0], (
|
1888
|
+
'Nebius Object Storage is specified as path, the name '
|
1889
|
+
'should be the same as R2 bucket.')
|
1890
|
+
assert data_utils.verify_nebius_bucket(self.name), (
|
1891
|
+
f'Source specified as {self.source}, a Nebius Object '
|
1892
|
+
f'Storage bucket. Nebius Object Storage Bucket should '
|
1893
|
+
f'exist.')
|
1851
1894
|
elif self.source.startswith('cos://'):
|
1852
1895
|
assert self.name == data_utils.split_cos_path(self.source)[0], (
|
1853
1896
|
'COS Bucket is specified as path, the name should be '
|
@@ -2439,6 +2482,15 @@ class AzureBlobStore(AbstractStore):
|
|
2439
2482
|
assert data_utils.verify_r2_bucket(self.name), (
|
2440
2483
|
f'Source specified as {self.source}, a R2 bucket. ',
|
2441
2484
|
'R2 Bucket should exist.')
|
2485
|
+
elif self.source.startswith('nebius://'):
|
2486
|
+
assert self.name == data_utils.split_nebius_path(
|
2487
|
+
self.source)[0], (
|
2488
|
+
'Nebius Object Storage is specified as path, the name '
|
2489
|
+
'should be the same as Nebius Object Storage bucket.')
|
2490
|
+
assert data_utils.verify_nebius_bucket(self.name), (
|
2491
|
+
f'Source specified as {self.source}, a Nebius Object '
|
2492
|
+
f'Storage bucket. Nebius Object Storage Bucket should '
|
2493
|
+
f'exist.')
|
2442
2494
|
elif self.source.startswith('cos://'):
|
2443
2495
|
assert self.name == data_utils.split_cos_path(self.source)[0], (
|
2444
2496
|
'COS Bucket is specified as path, the name should be '
|
@@ -2814,6 +2866,8 @@ class AzureBlobStore(AbstractStore):
|
|
2814
2866
|
raise NotImplementedError(error_message.format('IBM COS'))
|
2815
2867
|
elif self.source.startswith('oci://'):
|
2816
2868
|
raise NotImplementedError(error_message.format('OCI'))
|
2869
|
+
elif self.source.startswith('nebius://'):
|
2870
|
+
raise NotImplementedError(error_message.format('NEBIUS'))
|
2817
2871
|
else:
|
2818
2872
|
self.batch_az_blob_sync([self.source])
|
2819
2873
|
except exceptions.StorageUploadError:
|
@@ -3202,6 +3256,15 @@ class R2Store(AbstractStore):
|
|
3202
3256
|
assert self.name == data_utils.split_r2_path(self.source)[0], (
|
3203
3257
|
'R2 Bucket is specified as path, the name should be '
|
3204
3258
|
'the same as R2 bucket.')
|
3259
|
+
elif self.source.startswith('nebius://'):
|
3260
|
+
assert self.name == data_utils.split_nebius_path(
|
3261
|
+
self.source)[0], (
|
3262
|
+
'Nebius Object Storage is specified as path, the name '
|
3263
|
+
'should be the same as Nebius Object Storage bucket.')
|
3264
|
+
assert data_utils.verify_nebius_bucket(self.name), (
|
3265
|
+
f'Source specified as {self.source}, a Nebius Object '
|
3266
|
+
f'Storage bucket. Nebius Object Storage Bucket should '
|
3267
|
+
f'exist.')
|
3205
3268
|
elif self.source.startswith('cos://'):
|
3206
3269
|
assert self.name == data_utils.split_cos_path(self.source)[0], (
|
3207
3270
|
'IBM COS Bucket is specified as path, the name should be '
|
@@ -3266,6 +3329,8 @@ class R2Store(AbstractStore):
|
|
3266
3329
|
pass
|
3267
3330
|
elif self.source.startswith('oci://'):
|
3268
3331
|
self._transfer_to_r2()
|
3332
|
+
elif self.source.startswith('nebius://'):
|
3333
|
+
self._transfer_to_r2()
|
3269
3334
|
else:
|
3270
3335
|
self.batch_aws_rsync([self.source])
|
3271
3336
|
except exceptions.StorageUploadError:
|
@@ -3392,6 +3457,8 @@ class R2Store(AbstractStore):
|
|
3392
3457
|
data_transfer.gcs_to_r2(self.name, self.name)
|
3393
3458
|
elif self.source.startswith('s3://'):
|
3394
3459
|
data_transfer.s3_to_r2(self.name, self.name)
|
3460
|
+
elif self.source.startswith('nebius://'):
|
3461
|
+
data_transfer.s3_to_r2(self.name, self.name)
|
3395
3462
|
|
3396
3463
|
def _get_bucket(self) -> Tuple[StorageHandle, bool]:
|
3397
3464
|
"""Obtains the R2 bucket.
|
@@ -3651,6 +3718,15 @@ class IBMCosStore(AbstractStore):
|
|
3651
3718
|
assert data_utils.verify_r2_bucket(self.name), (
|
3652
3719
|
f'Source specified as {self.source}, a R2 bucket. ',
|
3653
3720
|
'R2 Bucket should exist.')
|
3721
|
+
elif self.source.startswith('nebius://'):
|
3722
|
+
assert self.name == data_utils.split_nebius_path(
|
3723
|
+
self.source)[0], (
|
3724
|
+
'Nebius Object Storage is specified as path, the name '
|
3725
|
+
'should be the same as Nebius Object Storage bucket.')
|
3726
|
+
assert data_utils.verify_nebius_bucket(self.name), (
|
3727
|
+
f'Source specified as {self.source}, a Nebius Object '
|
3728
|
+
f'Storage bucket. Nebius Object Storage Bucket should '
|
3729
|
+
f'exist.')
|
3654
3730
|
elif self.source.startswith('cos://'):
|
3655
3731
|
assert self.name == data_utils.split_cos_path(self.source)[0], (
|
3656
3732
|
'COS Bucket is specified as path, the name should be '
|
@@ -3744,6 +3820,9 @@ class IBMCosStore(AbstractStore):
|
|
3744
3820
|
elif self.source.startswith('s3://'):
|
3745
3821
|
raise Exception('IBM COS currently not supporting'
|
3746
3822
|
'data transfers between COS and S3')
|
3823
|
+
elif self.source.startswith('nebius://'):
|
3824
|
+
raise Exception('IBM COS currently not supporting'
|
3825
|
+
'data transfers between COS and Nebius')
|
3747
3826
|
elif self.source.startswith('gs://'):
|
3748
3827
|
raise Exception('IBM COS currently not supporting'
|
3749
3828
|
'data transfers between COS and GS')
|
@@ -4510,3 +4589,450 @@ class OciStore(AbstractStore):
|
|
4510
4589
|
raise exceptions.StorageBucketDeleteError(
|
4511
4590
|
f'Failed to delete OCI bucket {bucket_name}.')
|
4512
4591
|
return True
|
4592
|
+
|
4593
|
+
|
4594
|
+
class NebiusStore(AbstractStore):
|
4595
|
+
"""NebiusStore inherits from Storage Object and represents the backend
|
4596
|
+
for S3 buckets.
|
4597
|
+
"""
|
4598
|
+
|
4599
|
+
_ACCESS_DENIED_MESSAGE = 'Access Denied'
|
4600
|
+
_TIMEOUT_TO_PROPAGATES = 20
|
4601
|
+
|
4602
|
+
def __init__(self,
|
4603
|
+
name: str,
|
4604
|
+
source: str,
|
4605
|
+
region: Optional[str] = None,
|
4606
|
+
is_sky_managed: Optional[bool] = None,
|
4607
|
+
sync_on_reconstruction: bool = True,
|
4608
|
+
_bucket_sub_path: Optional[str] = None):
|
4609
|
+
self.client: 'boto3.client.Client'
|
4610
|
+
self.bucket: 'StorageHandle'
|
4611
|
+
self.region = region if region is not None else nebius.DEFAULT_REGION
|
4612
|
+
super().__init__(name, source, region, is_sky_managed,
|
4613
|
+
sync_on_reconstruction, _bucket_sub_path)
|
4614
|
+
|
4615
|
+
def _validate(self):
|
4616
|
+
if self.source is not None and isinstance(self.source, str):
|
4617
|
+
if self.source.startswith('s3://'):
|
4618
|
+
assert self.name == data_utils.split_s3_path(self.source)[0], (
|
4619
|
+
'S3 Bucket is specified as path, the name should be the'
|
4620
|
+
' same as S3 bucket.')
|
4621
|
+
elif self.source.startswith('gs://'):
|
4622
|
+
assert self.name == data_utils.split_gcs_path(self.source)[0], (
|
4623
|
+
'GCS Bucket is specified as path, the name should be '
|
4624
|
+
'the same as GCS bucket.')
|
4625
|
+
assert data_utils.verify_gcs_bucket(self.name), (
|
4626
|
+
f'Source specified as {self.source}, a GCS bucket. ',
|
4627
|
+
'GCS Bucket should exist.')
|
4628
|
+
elif data_utils.is_az_container_endpoint(self.source):
|
4629
|
+
storage_account_name, container_name, _ = (
|
4630
|
+
data_utils.split_az_path(self.source))
|
4631
|
+
assert self.name == container_name, (
|
4632
|
+
'Azure bucket is specified as path, the name should be '
|
4633
|
+
'the same as Azure bucket.')
|
4634
|
+
assert data_utils.verify_az_bucket(
|
4635
|
+
storage_account_name, self.name), (
|
4636
|
+
f'Source specified as {self.source}, an Azure bucket. '
|
4637
|
+
'Azure bucket should exist.')
|
4638
|
+
elif self.source.startswith('r2://'):
|
4639
|
+
assert self.name == data_utils.split_r2_path(self.source)[0], (
|
4640
|
+
'R2 Bucket is specified as path, the name should be '
|
4641
|
+
'the same as R2 bucket.')
|
4642
|
+
assert data_utils.verify_r2_bucket(self.name), (
|
4643
|
+
f'Source specified as {self.source}, a R2 bucket. ',
|
4644
|
+
'R2 Bucket should exist.')
|
4645
|
+
elif self.source.startswith('nebius://'):
|
4646
|
+
assert self.name == data_utils.split_nebius_path(
|
4647
|
+
self.source)[0], (
|
4648
|
+
'Nebius Object Storage is specified as path, the name '
|
4649
|
+
'should be the same as Nebius Object Storage bucket.')
|
4650
|
+
elif self.source.startswith('cos://'):
|
4651
|
+
assert self.name == data_utils.split_cos_path(self.source)[0], (
|
4652
|
+
'COS Bucket is specified as path, the name should be '
|
4653
|
+
'the same as COS bucket.')
|
4654
|
+
assert data_utils.verify_ibm_cos_bucket(self.name), (
|
4655
|
+
f'Source specified as {self.source}, a COS bucket. ',
|
4656
|
+
'COS Bucket should exist.')
|
4657
|
+
elif self.source.startswith('oci://'):
|
4658
|
+
raise NotImplementedError(
|
4659
|
+
'Moving data from OCI to S3 is currently not supported.')
|
4660
|
+
# Validate name
|
4661
|
+
self.name = S3Store.validate_name(self.name)
|
4662
|
+
|
4663
|
+
# Check if the storage is enabled
|
4664
|
+
if not _is_storage_cloud_enabled(str(clouds.Nebius())):
|
4665
|
+
with ux_utils.print_exception_no_traceback():
|
4666
|
+
raise exceptions.ResourcesUnavailableError((
|
4667
|
+
'Storage \'store: nebius\' specified, but '
|
4668
|
+
'Nebius access is disabled. To fix, enable '
|
4669
|
+
'Nebius by running `sky check`. More info: '
|
4670
|
+
'https://docs.skypilot.co/en/latest/getting-started/installation.html.' # pylint: disable=line-too-long
|
4671
|
+
))
|
4672
|
+
|
4673
|
+
def initialize(self):
|
4674
|
+
"""Initializes the Nebius Object Storage on the cloud.
|
4675
|
+
|
4676
|
+
Initialization involves fetching bucket if exists, or creating it if
|
4677
|
+
it does not.
|
4678
|
+
|
4679
|
+
Raises:
|
4680
|
+
StorageBucketCreateError: If bucket creation fails
|
4681
|
+
StorageBucketGetError: If fetching existing bucket fails
|
4682
|
+
StorageInitError: If general initialization fails.
|
4683
|
+
"""
|
4684
|
+
self.client = data_utils.create_nebius_client(self.region)
|
4685
|
+
self.bucket, is_new_bucket = self._get_bucket()
|
4686
|
+
if self.is_sky_managed is None:
|
4687
|
+
# If is_sky_managed is not specified, then this is a new storage
|
4688
|
+
# object (i.e., did not exist in global_user_state) and we should
|
4689
|
+
# set the is_sky_managed property.
|
4690
|
+
# If is_sky_managed is specified, then we take no action.
|
4691
|
+
self.is_sky_managed = is_new_bucket
|
4692
|
+
|
4693
|
+
def upload(self):
|
4694
|
+
"""Uploads source to store bucket.
|
4695
|
+
|
4696
|
+
Upload must be called by the Storage handler - it is not called on
|
4697
|
+
Store initialization.
|
4698
|
+
|
4699
|
+
Raises:
|
4700
|
+
StorageUploadError: if upload fails.
|
4701
|
+
"""
|
4702
|
+
try:
|
4703
|
+
if isinstance(self.source, list):
|
4704
|
+
self.batch_aws_rsync(self.source, create_dirs=True)
|
4705
|
+
elif self.source is not None:
|
4706
|
+
if self.source.startswith('nebius://'):
|
4707
|
+
pass
|
4708
|
+
elif self.source.startswith('s3://'):
|
4709
|
+
self._transfer_to_nebius()
|
4710
|
+
elif self.source.startswith('gs://'):
|
4711
|
+
self._transfer_to_nebius()
|
4712
|
+
elif self.source.startswith('r2://'):
|
4713
|
+
self._transfer_to_nebius()
|
4714
|
+
elif self.source.startswith('oci://'):
|
4715
|
+
self._transfer_to_nebius()
|
4716
|
+
else:
|
4717
|
+
self.batch_aws_rsync([self.source])
|
4718
|
+
except exceptions.StorageUploadError:
|
4719
|
+
raise
|
4720
|
+
except Exception as e:
|
4721
|
+
raise exceptions.StorageUploadError(
|
4722
|
+
f'Upload failed for store {self.name}') from e
|
4723
|
+
|
4724
|
+
def delete(self) -> None:
|
4725
|
+
if self._bucket_sub_path is not None and not self.is_sky_managed:
|
4726
|
+
return self._delete_sub_path()
|
4727
|
+
|
4728
|
+
deleted_by_skypilot = self._delete_nebius_bucket(self.name)
|
4729
|
+
if deleted_by_skypilot:
|
4730
|
+
msg_str = f'Deleted Nebius bucket {self.name}.'
|
4731
|
+
else:
|
4732
|
+
msg_str = (f'Nebius bucket {self.name} may have been deleted '
|
4733
|
+
f'externally. Removing from local state.')
|
4734
|
+
logger.info(f'{colorama.Fore.GREEN}{msg_str}'
|
4735
|
+
f'{colorama.Style.RESET_ALL}')
|
4736
|
+
|
4737
|
+
def _delete_sub_path(self) -> None:
|
4738
|
+
assert self._bucket_sub_path is not None, 'bucket_sub_path is not set'
|
4739
|
+
deleted_by_skypilot = self._delete_nebius_bucket_sub_path(
|
4740
|
+
self.name, self._bucket_sub_path)
|
4741
|
+
if deleted_by_skypilot:
|
4742
|
+
msg_str = (f'Removed objects from S3 bucket '
|
4743
|
+
f'{self.name}/{self._bucket_sub_path}.')
|
4744
|
+
else:
|
4745
|
+
msg_str = (f'Failed to remove objects from S3 bucket '
|
4746
|
+
f'{self.name}/{self._bucket_sub_path}.')
|
4747
|
+
logger.info(f'{colorama.Fore.GREEN}{msg_str}'
|
4748
|
+
f'{colorama.Style.RESET_ALL}')
|
4749
|
+
|
4750
|
+
def get_handle(self) -> StorageHandle:
|
4751
|
+
return nebius.resource('s3').Bucket(self.name)
|
4752
|
+
|
4753
|
+
def batch_aws_rsync(self,
|
4754
|
+
source_path_list: List[Path],
|
4755
|
+
create_dirs: bool = False) -> None:
|
4756
|
+
"""Invokes aws s3 sync to batch upload a list of local paths to S3
|
4757
|
+
|
4758
|
+
AWS Sync by default uses 10 threads to upload files to the bucket. To
|
4759
|
+
increase parallelism, modify max_concurrent_requests in your aws config
|
4760
|
+
file (Default path: ~/.aws/config).
|
4761
|
+
|
4762
|
+
Since aws s3 sync does not support batch operations, we construct
|
4763
|
+
multiple commands to be run in parallel.
|
4764
|
+
|
4765
|
+
Args:
|
4766
|
+
source_path_list: List of paths to local files or directories
|
4767
|
+
create_dirs: If the local_path is a directory and this is set to
|
4768
|
+
False, the contents of the directory are directly uploaded to
|
4769
|
+
root of the bucket. If the local_path is a directory and this is
|
4770
|
+
set to True, the directory is created in the bucket root and
|
4771
|
+
contents are uploaded to it.
|
4772
|
+
"""
|
4773
|
+
sub_path = (f'/{self._bucket_sub_path}'
|
4774
|
+
if self._bucket_sub_path else '')
|
4775
|
+
|
4776
|
+
def get_file_sync_command(base_dir_path, file_names):
|
4777
|
+
includes = ' '.join([
|
4778
|
+
f'--include {shlex.quote(file_name)}'
|
4779
|
+
for file_name in file_names
|
4780
|
+
])
|
4781
|
+
endpoint_url = nebius.create_endpoint(self.region)
|
4782
|
+
base_dir_path = shlex.quote(base_dir_path)
|
4783
|
+
sync_command = ('aws s3 sync --no-follow-symlinks --exclude="*" '
|
4784
|
+
f'{includes} {base_dir_path} '
|
4785
|
+
f's3://{self.name}{sub_path} '
|
4786
|
+
f'--endpoint={endpoint_url} '
|
4787
|
+
f'--profile={nebius.NEBIUS_PROFILE_NAME}')
|
4788
|
+
return sync_command
|
4789
|
+
|
4790
|
+
def get_dir_sync_command(src_dir_path, dest_dir_name):
|
4791
|
+
# we exclude .git directory from the sync
|
4792
|
+
excluded_list = storage_utils.get_excluded_files(src_dir_path)
|
4793
|
+
excluded_list.append('.git/*')
|
4794
|
+
excludes = ' '.join([
|
4795
|
+
f'--exclude {shlex.quote(file_name)}'
|
4796
|
+
for file_name in excluded_list
|
4797
|
+
])
|
4798
|
+
endpoint_url = nebius.create_endpoint(self.region)
|
4799
|
+
src_dir_path = shlex.quote(src_dir_path)
|
4800
|
+
sync_command = (f'aws s3 sync --no-follow-symlinks {excludes} '
|
4801
|
+
f'{src_dir_path} '
|
4802
|
+
f's3://{self.name}{sub_path}/{dest_dir_name} '
|
4803
|
+
f'--endpoint={endpoint_url} '
|
4804
|
+
f'--profile={nebius.NEBIUS_PROFILE_NAME}')
|
4805
|
+
return sync_command
|
4806
|
+
|
4807
|
+
# Generate message for upload
|
4808
|
+
if len(source_path_list) > 1:
|
4809
|
+
source_message = f'{len(source_path_list)} paths'
|
4810
|
+
else:
|
4811
|
+
source_message = source_path_list[0]
|
4812
|
+
|
4813
|
+
log_path = sky_logging.generate_tmp_logging_file_path(
|
4814
|
+
_STORAGE_LOG_FILE_NAME)
|
4815
|
+
sync_path = f'{source_message} -> nebius://{self.name}{sub_path}/'
|
4816
|
+
with rich_utils.safe_status(
|
4817
|
+
ux_utils.spinner_message(f'Syncing {sync_path}',
|
4818
|
+
log_path=log_path)):
|
4819
|
+
data_utils.parallel_upload(
|
4820
|
+
source_path_list,
|
4821
|
+
get_file_sync_command,
|
4822
|
+
get_dir_sync_command,
|
4823
|
+
log_path,
|
4824
|
+
self.name,
|
4825
|
+
self._ACCESS_DENIED_MESSAGE,
|
4826
|
+
create_dirs=create_dirs,
|
4827
|
+
max_concurrent_uploads=_MAX_CONCURRENT_UPLOADS)
|
4828
|
+
logger.info(
|
4829
|
+
ux_utils.finishing_message(f'Storage synced: {sync_path}',
|
4830
|
+
log_path))
|
4831
|
+
|
4832
|
+
def _transfer_to_nebius(self) -> None:
|
4833
|
+
assert isinstance(self.source, str), self.source
|
4834
|
+
if self.source.startswith('gs://'):
|
4835
|
+
data_transfer.gcs_to_nebius(self.name, self.name)
|
4836
|
+
elif self.source.startswith('r2://'):
|
4837
|
+
data_transfer.r2_to_nebius(self.name, self.name)
|
4838
|
+
elif self.source.startswith('s3://'):
|
4839
|
+
data_transfer.s3_to_nebius(self.name, self.name)
|
4840
|
+
|
4841
|
+
def _get_bucket(self) -> Tuple[StorageHandle, bool]:
|
4842
|
+
"""Obtains the S3 bucket.
|
4843
|
+
|
4844
|
+
If the bucket exists, this method will return the bucket.
|
4845
|
+
If the bucket does not exist, there are three cases:
|
4846
|
+
1) Raise an error if the bucket source starts with s3://
|
4847
|
+
2) Return None if bucket has been externally deleted and
|
4848
|
+
sync_on_reconstruction is False
|
4849
|
+
3) Create and return a new bucket otherwise
|
4850
|
+
|
4851
|
+
Raises:
|
4852
|
+
StorageSpecError: If externally created bucket is attempted to be
|
4853
|
+
mounted without specifying storage source.
|
4854
|
+
StorageBucketCreateError: If creating the bucket fails
|
4855
|
+
StorageBucketGetError: If fetching a bucket fails
|
4856
|
+
StorageExternalDeletionError: If externally deleted storage is
|
4857
|
+
attempted to be fetched while reconstructing the storage for
|
4858
|
+
'sky storage delete' or 'sky start'
|
4859
|
+
"""
|
4860
|
+
nebius_s = nebius.resource('s3')
|
4861
|
+
bucket = nebius_s.Bucket(self.name)
|
4862
|
+
endpoint_url = nebius.create_endpoint(self.region)
|
4863
|
+
try:
|
4864
|
+
# Try Public bucket case.
|
4865
|
+
# This line does not error out if the bucket is an external public
|
4866
|
+
# bucket or if it is a user's bucket that is publicly
|
4867
|
+
# accessible.
|
4868
|
+
self.client.head_bucket(Bucket=self.name)
|
4869
|
+
self._validate_existing_bucket()
|
4870
|
+
return bucket, False
|
4871
|
+
except aws.botocore_exceptions().ClientError as e:
|
4872
|
+
error_code = e.response['Error']['Code']
|
4873
|
+
# AccessDenied error for buckets that are private and not owned by
|
4874
|
+
# user.
|
4875
|
+
if error_code == '403':
|
4876
|
+
command = (f'aws s3 ls s3://{self.name} '
|
4877
|
+
f'--endpoint={endpoint_url} '
|
4878
|
+
f'--profile={nebius.NEBIUS_PROFILE_NAME}')
|
4879
|
+
with ux_utils.print_exception_no_traceback():
|
4880
|
+
raise exceptions.StorageBucketGetError(
|
4881
|
+
_BUCKET_FAIL_TO_CONNECT_MESSAGE.format(name=self.name) +
|
4882
|
+
f' To debug, consider running `{command}`.') from e
|
4883
|
+
|
4884
|
+
if isinstance(self.source, str) and self.source.startswith('nebius://'):
|
4885
|
+
with ux_utils.print_exception_no_traceback():
|
4886
|
+
raise exceptions.StorageBucketGetError(
|
4887
|
+
'Attempted to use a non-existent bucket as a source: '
|
4888
|
+
f'{self.source}. Consider using `aws s3 ls '
|
4889
|
+
f'{self.source} --endpoint={endpoint_url}`'
|
4890
|
+
f'--profile={nebius.NEBIUS_PROFILE_NAME} to debug.')
|
4891
|
+
|
4892
|
+
# If bucket cannot be found in both private and public settings,
|
4893
|
+
# the bucket is to be created by Sky. However, creation is skipped if
|
4894
|
+
# Store object is being reconstructed for deletion or re-mount with
|
4895
|
+
# sky start, and error is raised instead.
|
4896
|
+
if self.sync_on_reconstruction:
|
4897
|
+
bucket = self._create_nebius_bucket(self.name, self.region)
|
4898
|
+
return bucket, True
|
4899
|
+
else:
|
4900
|
+
# Raised when Storage object is reconstructed for sky storage
|
4901
|
+
# delete or to re-mount Storages with sky start but the storage
|
4902
|
+
# is already removed externally.
|
4903
|
+
raise exceptions.StorageExternalDeletionError(
|
4904
|
+
'Attempted to fetch a non-existent bucket: '
|
4905
|
+
f'{self.name}')
|
4906
|
+
|
4907
|
+
def _download_file(self, remote_path: str, local_path: str) -> None:
|
4908
|
+
"""Downloads file from remote to local on s3 bucket
|
4909
|
+
using the boto3 API
|
4910
|
+
|
4911
|
+
Args:
|
4912
|
+
remote_path: str; Remote path on S3 bucket
|
4913
|
+
local_path: str; Local path on user's device
|
4914
|
+
"""
|
4915
|
+
self.bucket.download_file(remote_path, local_path)
|
4916
|
+
|
4917
|
+
def mount_command(self, mount_path: str) -> str:
|
4918
|
+
"""Returns the command to mount the bucket to the mount_path.
|
4919
|
+
|
4920
|
+
Uses goofys to mount the bucket.
|
4921
|
+
|
4922
|
+
Args:
|
4923
|
+
mount_path: str; Path to mount the bucket to.
|
4924
|
+
"""
|
4925
|
+
install_cmd = mounting_utils.get_s3_mount_install_cmd()
|
4926
|
+
endpoint_url = nebius.create_endpoint(self.region)
|
4927
|
+
nebius_profile_name = nebius.NEBIUS_PROFILE_NAME
|
4928
|
+
mount_cmd = mounting_utils.get_nebius_mount_cmd(nebius_profile_name,
|
4929
|
+
endpoint_url,
|
4930
|
+
self.bucket.name,
|
4931
|
+
mount_path,
|
4932
|
+
self._bucket_sub_path)
|
4933
|
+
return mounting_utils.get_mounting_command(mount_path, install_cmd,
|
4934
|
+
mount_cmd)
|
4935
|
+
|
4936
|
+
def _create_nebius_bucket(self,
|
4937
|
+
bucket_name: str,
|
4938
|
+
region='auto') -> StorageHandle:
|
4939
|
+
"""Creates S3 bucket with specific name in specific region
|
4940
|
+
|
4941
|
+
Args:
|
4942
|
+
bucket_name: str; Name of bucket
|
4943
|
+
region: str; Region name, e.g. us-west-1, us-east-2
|
4944
|
+
Raises:
|
4945
|
+
StorageBucketCreateError: If bucket creation fails.
|
4946
|
+
"""
|
4947
|
+
nebius_client = self.client
|
4948
|
+
try:
|
4949
|
+
if region is None:
|
4950
|
+
nebius_client.create_bucket(Bucket=bucket_name)
|
4951
|
+
else:
|
4952
|
+
location = {'LocationConstraint': region}
|
4953
|
+
nebius_client.create_bucket(Bucket=bucket_name,
|
4954
|
+
CreateBucketConfiguration=location)
|
4955
|
+
logger.info(f' {colorama.Style.DIM}Created Nebius bucket '
|
4956
|
+
f'{bucket_name!r} in {region}'
|
4957
|
+
f'{colorama.Style.RESET_ALL}')
|
4958
|
+
except aws.botocore_exceptions().ClientError as e:
|
4959
|
+
with ux_utils.print_exception_no_traceback():
|
4960
|
+
raise exceptions.StorageBucketCreateError(
|
4961
|
+
f'Attempted to create a bucket '
|
4962
|
+
f'{self.name} but failed.') from e
|
4963
|
+
return nebius.resource('s3').Bucket(bucket_name)
|
4964
|
+
|
4965
|
+
def _execute_nebius_remove_command(self, command: str, bucket_name: str,
|
4966
|
+
hint_operating: str,
|
4967
|
+
hint_failed: str) -> bool:
|
4968
|
+
try:
|
4969
|
+
with rich_utils.safe_status(
|
4970
|
+
ux_utils.spinner_message(hint_operating)):
|
4971
|
+
subprocess.check_output(command.split(' '),
|
4972
|
+
stderr=subprocess.STDOUT)
|
4973
|
+
except subprocess.CalledProcessError as e:
|
4974
|
+
if 'NoSuchBucket' in e.output.decode('utf-8'):
|
4975
|
+
logger.debug(
|
4976
|
+
_BUCKET_EXTERNALLY_DELETED_DEBUG_MESSAGE.format(
|
4977
|
+
bucket_name=bucket_name))
|
4978
|
+
return False
|
4979
|
+
else:
|
4980
|
+
with ux_utils.print_exception_no_traceback():
|
4981
|
+
raise exceptions.StorageBucketDeleteError(
|
4982
|
+
f'{hint_failed}'
|
4983
|
+
f'Detailed error: {e.output}')
|
4984
|
+
return True
|
4985
|
+
|
4986
|
+
def _delete_nebius_bucket(self, bucket_name: str) -> bool:
|
4987
|
+
"""Deletes S3 bucket, including all objects in bucket
|
4988
|
+
|
4989
|
+
Args:
|
4990
|
+
bucket_name: str; Name of bucket
|
4991
|
+
|
4992
|
+
Returns:
|
4993
|
+
bool; True if bucket was deleted, False if it was deleted externally.
|
4994
|
+
|
4995
|
+
Raises:
|
4996
|
+
StorageBucketDeleteError: If deleting the bucket fails.
|
4997
|
+
"""
|
4998
|
+
# Deleting objects is very slow programatically
|
4999
|
+
# (i.e. bucket.objects.all().delete() is slow).
|
5000
|
+
# In addition, standard delete operations (i.e. via `aws s3 rm`)
|
5001
|
+
# are slow, since AWS puts deletion markers.
|
5002
|
+
# https://stackoverflow.com/questions/49239351/why-is-it-so-much-slower-to-delete-objects-in-aws-s3-than-it-is-to-create-them
|
5003
|
+
# The fastest way to delete is to run `aws s3 rb --force`,
|
5004
|
+
# which removes the bucket by force.
|
5005
|
+
endpoint_url = nebius.create_endpoint(self.region)
|
5006
|
+
remove_command = (f'aws s3 rb s3://{bucket_name} --force '
|
5007
|
+
f'--endpoint {endpoint_url} '
|
5008
|
+
f'--profile={nebius.NEBIUS_PROFILE_NAME}')
|
5009
|
+
|
5010
|
+
success = self._execute_nebius_remove_command(
|
5011
|
+
remove_command, bucket_name,
|
5012
|
+
f'Deleting Nebius bucket {bucket_name}',
|
5013
|
+
f'Failed to delete Nebius bucket {bucket_name}.')
|
5014
|
+
if not success:
|
5015
|
+
return False
|
5016
|
+
|
5017
|
+
# Wait until bucket deletion propagates on Nebius servers
|
5018
|
+
start_time = time.time()
|
5019
|
+
while data_utils.verify_nebius_bucket(bucket_name):
|
5020
|
+
if time.time() - start_time > self._TIMEOUT_TO_PROPAGATES:
|
5021
|
+
raise TimeoutError(
|
5022
|
+
f'Timeout while verifying {bucket_name} Nebius bucket.')
|
5023
|
+
time.sleep(0.1)
|
5024
|
+
return True
|
5025
|
+
|
5026
|
+
def _delete_nebius_bucket_sub_path(self, bucket_name: str,
|
5027
|
+
sub_path: str) -> bool:
|
5028
|
+
"""Deletes the sub path from the bucket."""
|
5029
|
+
endpoint_url = nebius.create_endpoint(self.region)
|
5030
|
+
remove_command = (
|
5031
|
+
f'aws s3 rm s3://{bucket_name}/{sub_path}/ --recursive '
|
5032
|
+
f'--endpoint {endpoint_url} '
|
5033
|
+
f'--profile={nebius.NEBIUS_PROFILE_NAME}')
|
5034
|
+
return self._execute_nebius_remove_command(
|
5035
|
+
remove_command, bucket_name, f'Removing objects from '
|
5036
|
+
f'Nebius bucket {bucket_name}/{sub_path}',
|
5037
|
+
f'Failed to remove objects from '
|
5038
|
+
f'Nebius bucket {bucket_name}/{sub_path}.')
|