skypilot-nightly 1.0.0.dev20250320__py3-none-any.whl → 1.0.0.dev20250321__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +2 -2
- sky/adaptors/cloudflare.py +16 -4
- sky/adaptors/kubernetes.py +2 -1
- sky/adaptors/nebius.py +128 -6
- sky/backends/cloud_vm_ray_backend.py +3 -1
- sky/benchmark/benchmark_utils.py +3 -2
- sky/check.py +18 -87
- sky/cloud_stores.py +66 -0
- sky/clouds/aws.py +14 -7
- sky/clouds/azure.py +13 -6
- sky/clouds/cloud.py +33 -10
- sky/clouds/cudo.py +3 -2
- sky/clouds/do.py +3 -2
- sky/clouds/fluidstack.py +3 -2
- sky/clouds/gcp.py +8 -9
- sky/clouds/ibm.py +15 -6
- sky/clouds/kubernetes.py +3 -1
- sky/clouds/lambda_cloud.py +3 -1
- sky/clouds/nebius.py +7 -3
- sky/clouds/oci.py +15 -6
- sky/clouds/paperspace.py +3 -2
- sky/clouds/runpod.py +7 -1
- sky/clouds/scp.py +3 -1
- sky/clouds/service_catalog/kubernetes_catalog.py +3 -1
- sky/clouds/vast.py +3 -2
- sky/clouds/vsphere.py +3 -2
- sky/core.py +4 -2
- sky/data/data_transfer.py +75 -0
- sky/data/data_utils.py +34 -0
- sky/data/mounting_utils.py +18 -0
- sky/data/storage.py +537 -9
- sky/data/storage_utils.py +102 -84
- sky/exceptions.py +2 -0
- sky/global_user_state.py +12 -33
- sky/jobs/server/core.py +1 -1
- sky/jobs/utils.py +5 -0
- sky/optimizer.py +7 -2
- sky/resources.py +6 -1
- sky/setup_files/dependencies.py +3 -1
- sky/task.py +16 -5
- sky/utils/command_runner.py +2 -0
- sky/utils/controller_utils.py +8 -5
- sky/utils/kubernetes/kubernetes_deploy_utils.py +2 -1
- {skypilot_nightly-1.0.0.dev20250320.dist-info → skypilot_nightly-1.0.0.dev20250321.dist-info}/METADATA +11 -1
- {skypilot_nightly-1.0.0.dev20250320.dist-info → skypilot_nightly-1.0.0.dev20250321.dist-info}/RECORD +49 -49
- {skypilot_nightly-1.0.0.dev20250320.dist-info → skypilot_nightly-1.0.0.dev20250321.dist-info}/WHEEL +1 -1
- {skypilot_nightly-1.0.0.dev20250320.dist-info → skypilot_nightly-1.0.0.dev20250321.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250320.dist-info → skypilot_nightly-1.0.0.dev20250321.dist-info}/licenses/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20250320.dist-info → skypilot_nightly-1.0.0.dev20250321.dist-info}/top_level.txt +0 -0
sky/data/storage.py
CHANGED
@@ -23,7 +23,9 @@ from sky.adaptors import azure
|
|
23
23
|
from sky.adaptors import cloudflare
|
24
24
|
from sky.adaptors import gcp
|
25
25
|
from sky.adaptors import ibm
|
26
|
+
from sky.adaptors import nebius
|
26
27
|
from sky.adaptors import oci
|
28
|
+
from sky.clouds import cloud as sky_cloud
|
27
29
|
from sky.data import data_transfer
|
28
30
|
from sky.data import data_utils
|
29
31
|
from sky.data import mounting_utils
|
@@ -57,6 +59,7 @@ STORE_ENABLED_CLOUDS: List[str] = [
|
|
57
59
|
str(clouds.Azure()),
|
58
60
|
str(clouds.IBM()),
|
59
61
|
str(clouds.OCI()),
|
62
|
+
str(clouds.Nebius()),
|
60
63
|
cloudflare.NAME,
|
61
64
|
]
|
62
65
|
|
@@ -78,11 +81,12 @@ _BUCKET_EXTERNALLY_DELETED_DEBUG_MESSAGE = (
|
|
78
81
|
_STORAGE_LOG_FILE_NAME = 'storage_sync.log'
|
79
82
|
|
80
83
|
|
81
|
-
def
|
84
|
+
def get_cached_enabled_storage_cloud_names_or_refresh(
|
82
85
|
raise_if_no_cloud_access: bool = False) -> List[str]:
|
83
86
|
# This is a temporary solution until https://github.com/skypilot-org/skypilot/issues/1943 # pylint: disable=line-too-long
|
84
87
|
# is resolved by implementing separate 'enabled_storage_clouds'
|
85
|
-
enabled_clouds = sky_check.
|
88
|
+
enabled_clouds = sky_check.get_cached_enabled_clouds_or_refresh(
|
89
|
+
sky_cloud.CloudCapability.STORAGE)
|
86
90
|
enabled_clouds = [str(cloud) for cloud in enabled_clouds]
|
87
91
|
|
88
92
|
r2_is_enabled, _ = cloudflare.check_storage_credentials()
|
@@ -97,13 +101,14 @@ def get_cached_enabled_storage_clouds_or_refresh(
|
|
97
101
|
|
98
102
|
def _is_storage_cloud_enabled(cloud_name: str,
|
99
103
|
try_fix_with_sky_check: bool = True) -> bool:
|
100
|
-
|
101
|
-
|
104
|
+
enabled_storage_cloud_names = (
|
105
|
+
get_cached_enabled_storage_cloud_names_or_refresh())
|
106
|
+
if cloud_name in enabled_storage_cloud_names:
|
102
107
|
return True
|
103
108
|
if try_fix_with_sky_check:
|
104
109
|
# TODO(zhwu): Only check the specified cloud to speed up.
|
105
110
|
sky_check.check(quiet=True,
|
106
|
-
capability=
|
111
|
+
capability=sky_cloud.CloudCapability.STORAGE)
|
107
112
|
return _is_storage_cloud_enabled(cloud_name,
|
108
113
|
try_fix_with_sky_check=False)
|
109
114
|
return False
|
@@ -117,6 +122,7 @@ class StoreType(enum.Enum):
|
|
117
122
|
R2 = 'R2'
|
118
123
|
IBM = 'IBM'
|
119
124
|
OCI = 'OCI'
|
125
|
+
NEBIUS = 'NEBIUS'
|
120
126
|
|
121
127
|
@classmethod
|
122
128
|
def from_cloud(cls, cloud: str) -> 'StoreType':
|
@@ -132,6 +138,8 @@ class StoreType(enum.Enum):
|
|
132
138
|
return StoreType.AZURE
|
133
139
|
elif cloud.lower() == str(clouds.OCI()).lower():
|
134
140
|
return StoreType.OCI
|
141
|
+
elif cloud.lower() == str(clouds.Nebius()).lower():
|
142
|
+
return StoreType.NEBIUS
|
135
143
|
elif cloud.lower() == str(clouds.Lambda()).lower():
|
136
144
|
with ux_utils.print_exception_no_traceback():
|
137
145
|
raise ValueError('Lambda Cloud does not provide cloud storage.')
|
@@ -171,6 +179,8 @@ class StoreType(enum.Enum):
|
|
171
179
|
return StoreType.IBM
|
172
180
|
elif isinstance(store, OciStore):
|
173
181
|
return StoreType.OCI
|
182
|
+
elif isinstance(store, NebiusStore):
|
183
|
+
return StoreType.NEBIUS
|
174
184
|
else:
|
175
185
|
with ux_utils.print_exception_no_traceback():
|
176
186
|
raise ValueError(f'Unknown store type: {store}')
|
@@ -189,6 +199,9 @@ class StoreType(enum.Enum):
|
|
189
199
|
return 'cos://'
|
190
200
|
elif self == StoreType.OCI:
|
191
201
|
return 'oci://'
|
202
|
+
# Nebius storages use 's3://' as a prefix for various aws cli commands
|
203
|
+
elif self == StoreType.NEBIUS:
|
204
|
+
return 's3://'
|
192
205
|
else:
|
193
206
|
with ux_utils.print_exception_no_traceback():
|
194
207
|
raise ValueError(f'Unknown store type: {self}')
|
@@ -726,6 +739,8 @@ class Storage(object):
|
|
726
739
|
self.add_store(StoreType.IBM)
|
727
740
|
elif self.source.startswith('oci://'):
|
728
741
|
self.add_store(StoreType.OCI)
|
742
|
+
elif self.source.startswith('nebius://'):
|
743
|
+
self.add_store(StoreType.NEBIUS)
|
729
744
|
|
730
745
|
def get_bucket_sub_path_prefix(self, blob_path: str) -> str:
|
731
746
|
"""Adds the bucket sub path prefix to the blob path."""
|
@@ -812,7 +827,9 @@ class Storage(object):
|
|
812
827
|
'using a bucket by writing <destination_path>: '
|
813
828
|
f'{source} in the file_mounts section of your YAML')
|
814
829
|
is_local_source = True
|
815
|
-
elif split_path.scheme in [
|
830
|
+
elif split_path.scheme in [
|
831
|
+
's3', 'gs', 'https', 'r2', 'cos', 'oci', 'nebius'
|
832
|
+
]:
|
816
833
|
is_local_source = False
|
817
834
|
# Storage mounting does not support mounting specific files from
|
818
835
|
# cloud store - ensure path points to only a directory
|
@@ -836,7 +853,7 @@ class Storage(object):
|
|
836
853
|
with ux_utils.print_exception_no_traceback():
|
837
854
|
raise exceptions.StorageSourceError(
|
838
855
|
f'Supported paths: local, s3://, gs://, https://, '
|
839
|
-
f'r2://, cos://, oci://. Got: {source}')
|
856
|
+
f'r2://, cos://, oci://, nebius://. Got: {source}')
|
840
857
|
return source, is_local_source
|
841
858
|
|
842
859
|
def _validate_storage_spec(self, name: Optional[str]) -> None:
|
@@ -851,7 +868,7 @@ class Storage(object):
|
|
851
868
|
"""
|
852
869
|
prefix = name.split('://')[0]
|
853
870
|
prefix = prefix.lower()
|
854
|
-
if prefix in ['s3', 'gs', 'https', 'r2', 'cos', 'oci']:
|
871
|
+
if prefix in ['s3', 'gs', 'https', 'r2', 'cos', 'oci', 'nebius']:
|
855
872
|
with ux_utils.print_exception_no_traceback():
|
856
873
|
raise exceptions.StorageNameError(
|
857
874
|
'Prefix detected: `name` cannot start with '
|
@@ -977,6 +994,12 @@ class Storage(object):
|
|
977
994
|
source=self.source,
|
978
995
|
sync_on_reconstruction=self.sync_on_reconstruction,
|
979
996
|
_bucket_sub_path=self._bucket_sub_path)
|
997
|
+
elif s_type == StoreType.NEBIUS:
|
998
|
+
store = NebiusStore.from_metadata(
|
999
|
+
s_metadata,
|
1000
|
+
source=self.source,
|
1001
|
+
sync_on_reconstruction=self.sync_on_reconstruction,
|
1002
|
+
_bucket_sub_path=self._bucket_sub_path)
|
980
1003
|
else:
|
981
1004
|
with ux_utils.print_exception_no_traceback():
|
982
1005
|
raise ValueError(f'Unknown store type: {s_type}')
|
@@ -1071,6 +1094,8 @@ class Storage(object):
|
|
1071
1094
|
store_cls = IBMCosStore
|
1072
1095
|
elif store_type == StoreType.OCI:
|
1073
1096
|
store_cls = OciStore
|
1097
|
+
elif store_type == StoreType.NEBIUS:
|
1098
|
+
store_cls = NebiusStore
|
1074
1099
|
else:
|
1075
1100
|
with ux_utils.print_exception_no_traceback():
|
1076
1101
|
raise exceptions.StorageSpecError(
|
@@ -1104,7 +1129,7 @@ class Storage(object):
|
|
1104
1129
|
f'name {self.name}. General initialization error.')
|
1105
1130
|
raise
|
1106
1131
|
except exceptions.StorageSpecError:
|
1107
|
-
logger.error(f'Could not mount externally created {store_type}'
|
1132
|
+
logger.error(f'Could not mount externally created {store_type} '
|
1108
1133
|
f'store with name {self.name!r}.')
|
1109
1134
|
raise
|
1110
1135
|
|
@@ -1359,6 +1384,15 @@ class S3Store(AbstractStore):
|
|
1359
1384
|
assert data_utils.verify_r2_bucket(self.name), (
|
1360
1385
|
f'Source specified as {self.source}, a R2 bucket. ',
|
1361
1386
|
'R2 Bucket should exist.')
|
1387
|
+
elif self.source.startswith('nebius://'):
|
1388
|
+
assert self.name == data_utils.split_nebius_path(
|
1389
|
+
self.source)[0], (
|
1390
|
+
'Nebius Object Storage is specified as path, the name '
|
1391
|
+
'should be the same as Nebius Object Storage bucket.')
|
1392
|
+
assert data_utils.verify_nebius_bucket(self.name), (
|
1393
|
+
f'Source specified as {self.source}, a Nebius Object '
|
1394
|
+
f'Storage bucket. Nebius Object Storage Bucket should'
|
1395
|
+
f' exist.')
|
1362
1396
|
elif self.source.startswith('cos://'):
|
1363
1397
|
assert self.name == data_utils.split_cos_path(self.source)[0], (
|
1364
1398
|
'COS Bucket is specified as path, the name should be '
|
@@ -1482,6 +1516,8 @@ class S3Store(AbstractStore):
|
|
1482
1516
|
self._transfer_to_s3()
|
1483
1517
|
elif self.source.startswith('oci://'):
|
1484
1518
|
self._transfer_to_s3()
|
1519
|
+
elif self.source.startswith('nebius://'):
|
1520
|
+
self._transfer_to_s3()
|
1485
1521
|
else:
|
1486
1522
|
self.batch_aws_rsync([self.source])
|
1487
1523
|
except exceptions.StorageUploadError:
|
@@ -1846,6 +1882,15 @@ class GcsStore(AbstractStore):
|
|
1846
1882
|
assert data_utils.verify_r2_bucket(self.name), (
|
1847
1883
|
f'Source specified as {self.source}, a R2 bucket. ',
|
1848
1884
|
'R2 Bucket should exist.')
|
1885
|
+
elif self.source.startswith('nebius://'):
|
1886
|
+
assert self.name == data_utils.split_nebius_path(
|
1887
|
+
self.source)[0], (
|
1888
|
+
'Nebius Object Storage is specified as path, the name '
|
1889
|
+
'should be the same as R2 bucket.')
|
1890
|
+
assert data_utils.verify_nebius_bucket(self.name), (
|
1891
|
+
f'Source specified as {self.source}, a Nebius Object '
|
1892
|
+
f'Storage bucket. Nebius Object Storage Bucket should '
|
1893
|
+
f'exist.')
|
1849
1894
|
elif self.source.startswith('cos://'):
|
1850
1895
|
assert self.name == data_utils.split_cos_path(self.source)[0], (
|
1851
1896
|
'COS Bucket is specified as path, the name should be '
|
@@ -2437,6 +2482,15 @@ class AzureBlobStore(AbstractStore):
|
|
2437
2482
|
assert data_utils.verify_r2_bucket(self.name), (
|
2438
2483
|
f'Source specified as {self.source}, a R2 bucket. ',
|
2439
2484
|
'R2 Bucket should exist.')
|
2485
|
+
elif self.source.startswith('nebius://'):
|
2486
|
+
assert self.name == data_utils.split_nebius_path(
|
2487
|
+
self.source)[0], (
|
2488
|
+
'Nebius Object Storage is specified as path, the name '
|
2489
|
+
'should be the same as Nebius Object Storage bucket.')
|
2490
|
+
assert data_utils.verify_nebius_bucket(self.name), (
|
2491
|
+
f'Source specified as {self.source}, a Nebius Object '
|
2492
|
+
f'Storage bucket. Nebius Object Storage Bucket should '
|
2493
|
+
f'exist.')
|
2440
2494
|
elif self.source.startswith('cos://'):
|
2441
2495
|
assert self.name == data_utils.split_cos_path(self.source)[0], (
|
2442
2496
|
'COS Bucket is specified as path, the name should be '
|
@@ -2812,6 +2866,8 @@ class AzureBlobStore(AbstractStore):
|
|
2812
2866
|
raise NotImplementedError(error_message.format('IBM COS'))
|
2813
2867
|
elif self.source.startswith('oci://'):
|
2814
2868
|
raise NotImplementedError(error_message.format('OCI'))
|
2869
|
+
elif self.source.startswith('nebius://'):
|
2870
|
+
raise NotImplementedError(error_message.format('NEBIUS'))
|
2815
2871
|
else:
|
2816
2872
|
self.batch_az_blob_sync([self.source])
|
2817
2873
|
except exceptions.StorageUploadError:
|
@@ -3200,6 +3256,15 @@ class R2Store(AbstractStore):
|
|
3200
3256
|
assert self.name == data_utils.split_r2_path(self.source)[0], (
|
3201
3257
|
'R2 Bucket is specified as path, the name should be '
|
3202
3258
|
'the same as R2 bucket.')
|
3259
|
+
elif self.source.startswith('nebius://'):
|
3260
|
+
assert self.name == data_utils.split_nebius_path(
|
3261
|
+
self.source)[0], (
|
3262
|
+
'Nebius Object Storage is specified as path, the name '
|
3263
|
+
'should be the same as Nebius Object Storage bucket.')
|
3264
|
+
assert data_utils.verify_nebius_bucket(self.name), (
|
3265
|
+
f'Source specified as {self.source}, a Nebius Object '
|
3266
|
+
f'Storage bucket. Nebius Object Storage Bucket should '
|
3267
|
+
f'exist.')
|
3203
3268
|
elif self.source.startswith('cos://'):
|
3204
3269
|
assert self.name == data_utils.split_cos_path(self.source)[0], (
|
3205
3270
|
'IBM COS Bucket is specified as path, the name should be '
|
@@ -3264,6 +3329,8 @@ class R2Store(AbstractStore):
|
|
3264
3329
|
pass
|
3265
3330
|
elif self.source.startswith('oci://'):
|
3266
3331
|
self._transfer_to_r2()
|
3332
|
+
elif self.source.startswith('nebius://'):
|
3333
|
+
self._transfer_to_r2()
|
3267
3334
|
else:
|
3268
3335
|
self.batch_aws_rsync([self.source])
|
3269
3336
|
except exceptions.StorageUploadError:
|
@@ -3390,6 +3457,8 @@ class R2Store(AbstractStore):
|
|
3390
3457
|
data_transfer.gcs_to_r2(self.name, self.name)
|
3391
3458
|
elif self.source.startswith('s3://'):
|
3392
3459
|
data_transfer.s3_to_r2(self.name, self.name)
|
3460
|
+
elif self.source.startswith('nebius://'):
|
3461
|
+
data_transfer.s3_to_r2(self.name, self.name)
|
3393
3462
|
|
3394
3463
|
def _get_bucket(self) -> Tuple[StorageHandle, bool]:
|
3395
3464
|
"""Obtains the R2 bucket.
|
@@ -3649,6 +3718,15 @@ class IBMCosStore(AbstractStore):
|
|
3649
3718
|
assert data_utils.verify_r2_bucket(self.name), (
|
3650
3719
|
f'Source specified as {self.source}, a R2 bucket. ',
|
3651
3720
|
'R2 Bucket should exist.')
|
3721
|
+
elif self.source.startswith('nebius://'):
|
3722
|
+
assert self.name == data_utils.split_nebius_path(
|
3723
|
+
self.source)[0], (
|
3724
|
+
'Nebius Object Storage is specified as path, the name '
|
3725
|
+
'should be the same as Nebius Object Storage bucket.')
|
3726
|
+
assert data_utils.verify_nebius_bucket(self.name), (
|
3727
|
+
f'Source specified as {self.source}, a Nebius Object '
|
3728
|
+
f'Storage bucket. Nebius Object Storage Bucket should '
|
3729
|
+
f'exist.')
|
3652
3730
|
elif self.source.startswith('cos://'):
|
3653
3731
|
assert self.name == data_utils.split_cos_path(self.source)[0], (
|
3654
3732
|
'COS Bucket is specified as path, the name should be '
|
@@ -3742,6 +3820,9 @@ class IBMCosStore(AbstractStore):
|
|
3742
3820
|
elif self.source.startswith('s3://'):
|
3743
3821
|
raise Exception('IBM COS currently not supporting'
|
3744
3822
|
'data transfers between COS and S3')
|
3823
|
+
elif self.source.startswith('nebius://'):
|
3824
|
+
raise Exception('IBM COS currently not supporting'
|
3825
|
+
'data transfers between COS and Nebius')
|
3745
3826
|
elif self.source.startswith('gs://'):
|
3746
3827
|
raise Exception('IBM COS currently not supporting'
|
3747
3828
|
'data transfers between COS and GS')
|
@@ -4508,3 +4589,450 @@ class OciStore(AbstractStore):
|
|
4508
4589
|
raise exceptions.StorageBucketDeleteError(
|
4509
4590
|
f'Failed to delete OCI bucket {bucket_name}.')
|
4510
4591
|
return True
|
4592
|
+
|
4593
|
+
|
4594
|
+
class NebiusStore(AbstractStore):
|
4595
|
+
"""NebiusStore inherits from Storage Object and represents the backend
|
4596
|
+
for S3 buckets.
|
4597
|
+
"""
|
4598
|
+
|
4599
|
+
_ACCESS_DENIED_MESSAGE = 'Access Denied'
|
4600
|
+
_TIMEOUT_TO_PROPAGATES = 20
|
4601
|
+
|
4602
|
+
def __init__(self,
|
4603
|
+
name: str,
|
4604
|
+
source: str,
|
4605
|
+
region: Optional[str] = None,
|
4606
|
+
is_sky_managed: Optional[bool] = None,
|
4607
|
+
sync_on_reconstruction: bool = True,
|
4608
|
+
_bucket_sub_path: Optional[str] = None):
|
4609
|
+
self.client: 'boto3.client.Client'
|
4610
|
+
self.bucket: 'StorageHandle'
|
4611
|
+
self.region = region if region is not None else nebius.DEFAULT_REGION
|
4612
|
+
super().__init__(name, source, region, is_sky_managed,
|
4613
|
+
sync_on_reconstruction, _bucket_sub_path)
|
4614
|
+
|
4615
|
+
def _validate(self):
|
4616
|
+
if self.source is not None and isinstance(self.source, str):
|
4617
|
+
if self.source.startswith('s3://'):
|
4618
|
+
assert self.name == data_utils.split_s3_path(self.source)[0], (
|
4619
|
+
'S3 Bucket is specified as path, the name should be the'
|
4620
|
+
' same as S3 bucket.')
|
4621
|
+
elif self.source.startswith('gs://'):
|
4622
|
+
assert self.name == data_utils.split_gcs_path(self.source)[0], (
|
4623
|
+
'GCS Bucket is specified as path, the name should be '
|
4624
|
+
'the same as GCS bucket.')
|
4625
|
+
assert data_utils.verify_gcs_bucket(self.name), (
|
4626
|
+
f'Source specified as {self.source}, a GCS bucket. ',
|
4627
|
+
'GCS Bucket should exist.')
|
4628
|
+
elif data_utils.is_az_container_endpoint(self.source):
|
4629
|
+
storage_account_name, container_name, _ = (
|
4630
|
+
data_utils.split_az_path(self.source))
|
4631
|
+
assert self.name == container_name, (
|
4632
|
+
'Azure bucket is specified as path, the name should be '
|
4633
|
+
'the same as Azure bucket.')
|
4634
|
+
assert data_utils.verify_az_bucket(
|
4635
|
+
storage_account_name, self.name), (
|
4636
|
+
f'Source specified as {self.source}, an Azure bucket. '
|
4637
|
+
'Azure bucket should exist.')
|
4638
|
+
elif self.source.startswith('r2://'):
|
4639
|
+
assert self.name == data_utils.split_r2_path(self.source)[0], (
|
4640
|
+
'R2 Bucket is specified as path, the name should be '
|
4641
|
+
'the same as R2 bucket.')
|
4642
|
+
assert data_utils.verify_r2_bucket(self.name), (
|
4643
|
+
f'Source specified as {self.source}, a R2 bucket. ',
|
4644
|
+
'R2 Bucket should exist.')
|
4645
|
+
elif self.source.startswith('nebius://'):
|
4646
|
+
assert self.name == data_utils.split_nebius_path(
|
4647
|
+
self.source)[0], (
|
4648
|
+
'Nebius Object Storage is specified as path, the name '
|
4649
|
+
'should be the same as Nebius Object Storage bucket.')
|
4650
|
+
elif self.source.startswith('cos://'):
|
4651
|
+
assert self.name == data_utils.split_cos_path(self.source)[0], (
|
4652
|
+
'COS Bucket is specified as path, the name should be '
|
4653
|
+
'the same as COS bucket.')
|
4654
|
+
assert data_utils.verify_ibm_cos_bucket(self.name), (
|
4655
|
+
f'Source specified as {self.source}, a COS bucket. ',
|
4656
|
+
'COS Bucket should exist.')
|
4657
|
+
elif self.source.startswith('oci://'):
|
4658
|
+
raise NotImplementedError(
|
4659
|
+
'Moving data from OCI to S3 is currently not supported.')
|
4660
|
+
# Validate name
|
4661
|
+
self.name = S3Store.validate_name(self.name)
|
4662
|
+
|
4663
|
+
# Check if the storage is enabled
|
4664
|
+
if not _is_storage_cloud_enabled(str(clouds.Nebius())):
|
4665
|
+
with ux_utils.print_exception_no_traceback():
|
4666
|
+
raise exceptions.ResourcesUnavailableError((
|
4667
|
+
'Storage \'store: nebius\' specified, but '
|
4668
|
+
'Nebius access is disabled. To fix, enable '
|
4669
|
+
'Nebius by running `sky check`. More info: '
|
4670
|
+
'https://docs.skypilot.co/en/latest/getting-started/installation.html.' # pylint: disable=line-too-long
|
4671
|
+
))
|
4672
|
+
|
4673
|
+
def initialize(self):
|
4674
|
+
"""Initializes the Nebius Object Storage on the cloud.
|
4675
|
+
|
4676
|
+
Initialization involves fetching bucket if exists, or creating it if
|
4677
|
+
it does not.
|
4678
|
+
|
4679
|
+
Raises:
|
4680
|
+
StorageBucketCreateError: If bucket creation fails
|
4681
|
+
StorageBucketGetError: If fetching existing bucket fails
|
4682
|
+
StorageInitError: If general initialization fails.
|
4683
|
+
"""
|
4684
|
+
self.client = data_utils.create_nebius_client(self.region)
|
4685
|
+
self.bucket, is_new_bucket = self._get_bucket()
|
4686
|
+
if self.is_sky_managed is None:
|
4687
|
+
# If is_sky_managed is not specified, then this is a new storage
|
4688
|
+
# object (i.e., did not exist in global_user_state) and we should
|
4689
|
+
# set the is_sky_managed property.
|
4690
|
+
# If is_sky_managed is specified, then we take no action.
|
4691
|
+
self.is_sky_managed = is_new_bucket
|
4692
|
+
|
4693
|
+
def upload(self):
|
4694
|
+
"""Uploads source to store bucket.
|
4695
|
+
|
4696
|
+
Upload must be called by the Storage handler - it is not called on
|
4697
|
+
Store initialization.
|
4698
|
+
|
4699
|
+
Raises:
|
4700
|
+
StorageUploadError: if upload fails.
|
4701
|
+
"""
|
4702
|
+
try:
|
4703
|
+
if isinstance(self.source, list):
|
4704
|
+
self.batch_aws_rsync(self.source, create_dirs=True)
|
4705
|
+
elif self.source is not None:
|
4706
|
+
if self.source.startswith('nebius://'):
|
4707
|
+
pass
|
4708
|
+
elif self.source.startswith('s3://'):
|
4709
|
+
self._transfer_to_nebius()
|
4710
|
+
elif self.source.startswith('gs://'):
|
4711
|
+
self._transfer_to_nebius()
|
4712
|
+
elif self.source.startswith('r2://'):
|
4713
|
+
self._transfer_to_nebius()
|
4714
|
+
elif self.source.startswith('oci://'):
|
4715
|
+
self._transfer_to_nebius()
|
4716
|
+
else:
|
4717
|
+
self.batch_aws_rsync([self.source])
|
4718
|
+
except exceptions.StorageUploadError:
|
4719
|
+
raise
|
4720
|
+
except Exception as e:
|
4721
|
+
raise exceptions.StorageUploadError(
|
4722
|
+
f'Upload failed for store {self.name}') from e
|
4723
|
+
|
4724
|
+
def delete(self) -> None:
|
4725
|
+
if self._bucket_sub_path is not None and not self.is_sky_managed:
|
4726
|
+
return self._delete_sub_path()
|
4727
|
+
|
4728
|
+
deleted_by_skypilot = self._delete_nebius_bucket(self.name)
|
4729
|
+
if deleted_by_skypilot:
|
4730
|
+
msg_str = f'Deleted Nebius bucket {self.name}.'
|
4731
|
+
else:
|
4732
|
+
msg_str = (f'Nebius bucket {self.name} may have been deleted '
|
4733
|
+
f'externally. Removing from local state.')
|
4734
|
+
logger.info(f'{colorama.Fore.GREEN}{msg_str}'
|
4735
|
+
f'{colorama.Style.RESET_ALL}')
|
4736
|
+
|
4737
|
+
def _delete_sub_path(self) -> None:
|
4738
|
+
assert self._bucket_sub_path is not None, 'bucket_sub_path is not set'
|
4739
|
+
deleted_by_skypilot = self._delete_nebius_bucket_sub_path(
|
4740
|
+
self.name, self._bucket_sub_path)
|
4741
|
+
if deleted_by_skypilot:
|
4742
|
+
msg_str = (f'Removed objects from S3 bucket '
|
4743
|
+
f'{self.name}/{self._bucket_sub_path}.')
|
4744
|
+
else:
|
4745
|
+
msg_str = (f'Failed to remove objects from S3 bucket '
|
4746
|
+
f'{self.name}/{self._bucket_sub_path}.')
|
4747
|
+
logger.info(f'{colorama.Fore.GREEN}{msg_str}'
|
4748
|
+
f'{colorama.Style.RESET_ALL}')
|
4749
|
+
|
4750
|
+
def get_handle(self) -> StorageHandle:
|
4751
|
+
return nebius.resource('s3').Bucket(self.name)
|
4752
|
+
|
4753
|
+
def batch_aws_rsync(self,
|
4754
|
+
source_path_list: List[Path],
|
4755
|
+
create_dirs: bool = False) -> None:
|
4756
|
+
"""Invokes aws s3 sync to batch upload a list of local paths to S3
|
4757
|
+
|
4758
|
+
AWS Sync by default uses 10 threads to upload files to the bucket. To
|
4759
|
+
increase parallelism, modify max_concurrent_requests in your aws config
|
4760
|
+
file (Default path: ~/.aws/config).
|
4761
|
+
|
4762
|
+
Since aws s3 sync does not support batch operations, we construct
|
4763
|
+
multiple commands to be run in parallel.
|
4764
|
+
|
4765
|
+
Args:
|
4766
|
+
source_path_list: List of paths to local files or directories
|
4767
|
+
create_dirs: If the local_path is a directory and this is set to
|
4768
|
+
False, the contents of the directory are directly uploaded to
|
4769
|
+
root of the bucket. If the local_path is a directory and this is
|
4770
|
+
set to True, the directory is created in the bucket root and
|
4771
|
+
contents are uploaded to it.
|
4772
|
+
"""
|
4773
|
+
sub_path = (f'/{self._bucket_sub_path}'
|
4774
|
+
if self._bucket_sub_path else '')
|
4775
|
+
|
4776
|
+
def get_file_sync_command(base_dir_path, file_names):
|
4777
|
+
includes = ' '.join([
|
4778
|
+
f'--include {shlex.quote(file_name)}'
|
4779
|
+
for file_name in file_names
|
4780
|
+
])
|
4781
|
+
endpoint_url = nebius.create_endpoint(self.region)
|
4782
|
+
base_dir_path = shlex.quote(base_dir_path)
|
4783
|
+
sync_command = ('aws s3 sync --no-follow-symlinks --exclude="*" '
|
4784
|
+
f'{includes} {base_dir_path} '
|
4785
|
+
f's3://{self.name}{sub_path} '
|
4786
|
+
f'--endpoint={endpoint_url} '
|
4787
|
+
f'--profile={nebius.NEBIUS_PROFILE_NAME}')
|
4788
|
+
return sync_command
|
4789
|
+
|
4790
|
+
def get_dir_sync_command(src_dir_path, dest_dir_name):
|
4791
|
+
# we exclude .git directory from the sync
|
4792
|
+
excluded_list = storage_utils.get_excluded_files(src_dir_path)
|
4793
|
+
excluded_list.append('.git/*')
|
4794
|
+
excludes = ' '.join([
|
4795
|
+
f'--exclude {shlex.quote(file_name)}'
|
4796
|
+
for file_name in excluded_list
|
4797
|
+
])
|
4798
|
+
endpoint_url = nebius.create_endpoint(self.region)
|
4799
|
+
src_dir_path = shlex.quote(src_dir_path)
|
4800
|
+
sync_command = (f'aws s3 sync --no-follow-symlinks {excludes} '
|
4801
|
+
f'{src_dir_path} '
|
4802
|
+
f's3://{self.name}{sub_path}/{dest_dir_name} '
|
4803
|
+
f'--endpoint={endpoint_url} '
|
4804
|
+
f'--profile={nebius.NEBIUS_PROFILE_NAME}')
|
4805
|
+
return sync_command
|
4806
|
+
|
4807
|
+
# Generate message for upload
|
4808
|
+
if len(source_path_list) > 1:
|
4809
|
+
source_message = f'{len(source_path_list)} paths'
|
4810
|
+
else:
|
4811
|
+
source_message = source_path_list[0]
|
4812
|
+
|
4813
|
+
log_path = sky_logging.generate_tmp_logging_file_path(
|
4814
|
+
_STORAGE_LOG_FILE_NAME)
|
4815
|
+
sync_path = f'{source_message} -> nebius://{self.name}{sub_path}/'
|
4816
|
+
with rich_utils.safe_status(
|
4817
|
+
ux_utils.spinner_message(f'Syncing {sync_path}',
|
4818
|
+
log_path=log_path)):
|
4819
|
+
data_utils.parallel_upload(
|
4820
|
+
source_path_list,
|
4821
|
+
get_file_sync_command,
|
4822
|
+
get_dir_sync_command,
|
4823
|
+
log_path,
|
4824
|
+
self.name,
|
4825
|
+
self._ACCESS_DENIED_MESSAGE,
|
4826
|
+
create_dirs=create_dirs,
|
4827
|
+
max_concurrent_uploads=_MAX_CONCURRENT_UPLOADS)
|
4828
|
+
logger.info(
|
4829
|
+
ux_utils.finishing_message(f'Storage synced: {sync_path}',
|
4830
|
+
log_path))
|
4831
|
+
|
4832
|
+
def _transfer_to_nebius(self) -> None:
|
4833
|
+
assert isinstance(self.source, str), self.source
|
4834
|
+
if self.source.startswith('gs://'):
|
4835
|
+
data_transfer.gcs_to_nebius(self.name, self.name)
|
4836
|
+
elif self.source.startswith('r2://'):
|
4837
|
+
data_transfer.r2_to_nebius(self.name, self.name)
|
4838
|
+
elif self.source.startswith('s3://'):
|
4839
|
+
data_transfer.s3_to_nebius(self.name, self.name)
|
4840
|
+
|
4841
|
+
def _get_bucket(self) -> Tuple[StorageHandle, bool]:
|
4842
|
+
"""Obtains the S3 bucket.
|
4843
|
+
|
4844
|
+
If the bucket exists, this method will return the bucket.
|
4845
|
+
If the bucket does not exist, there are three cases:
|
4846
|
+
1) Raise an error if the bucket source starts with s3://
|
4847
|
+
2) Return None if bucket has been externally deleted and
|
4848
|
+
sync_on_reconstruction is False
|
4849
|
+
3) Create and return a new bucket otherwise
|
4850
|
+
|
4851
|
+
Raises:
|
4852
|
+
StorageSpecError: If externally created bucket is attempted to be
|
4853
|
+
mounted without specifying storage source.
|
4854
|
+
StorageBucketCreateError: If creating the bucket fails
|
4855
|
+
StorageBucketGetError: If fetching a bucket fails
|
4856
|
+
StorageExternalDeletionError: If externally deleted storage is
|
4857
|
+
attempted to be fetched while reconstructing the storage for
|
4858
|
+
'sky storage delete' or 'sky start'
|
4859
|
+
"""
|
4860
|
+
nebius_s = nebius.resource('s3')
|
4861
|
+
bucket = nebius_s.Bucket(self.name)
|
4862
|
+
endpoint_url = nebius.create_endpoint(self.region)
|
4863
|
+
try:
|
4864
|
+
# Try Public bucket case.
|
4865
|
+
# This line does not error out if the bucket is an external public
|
4866
|
+
# bucket or if it is a user's bucket that is publicly
|
4867
|
+
# accessible.
|
4868
|
+
self.client.head_bucket(Bucket=self.name)
|
4869
|
+
self._validate_existing_bucket()
|
4870
|
+
return bucket, False
|
4871
|
+
except aws.botocore_exceptions().ClientError as e:
|
4872
|
+
error_code = e.response['Error']['Code']
|
4873
|
+
# AccessDenied error for buckets that are private and not owned by
|
4874
|
+
# user.
|
4875
|
+
if error_code == '403':
|
4876
|
+
command = (f'aws s3 ls s3://{self.name} '
|
4877
|
+
f'--endpoint={endpoint_url} '
|
4878
|
+
f'--profile={nebius.NEBIUS_PROFILE_NAME}')
|
4879
|
+
with ux_utils.print_exception_no_traceback():
|
4880
|
+
raise exceptions.StorageBucketGetError(
|
4881
|
+
_BUCKET_FAIL_TO_CONNECT_MESSAGE.format(name=self.name) +
|
4882
|
+
f' To debug, consider running `{command}`.') from e
|
4883
|
+
|
4884
|
+
if isinstance(self.source, str) and self.source.startswith('nebius://'):
|
4885
|
+
with ux_utils.print_exception_no_traceback():
|
4886
|
+
raise exceptions.StorageBucketGetError(
|
4887
|
+
'Attempted to use a non-existent bucket as a source: '
|
4888
|
+
f'{self.source}. Consider using `aws s3 ls '
|
4889
|
+
f'{self.source} --endpoint={endpoint_url}`'
|
4890
|
+
f'--profile={nebius.NEBIUS_PROFILE_NAME} to debug.')
|
4891
|
+
|
4892
|
+
# If bucket cannot be found in both private and public settings,
|
4893
|
+
# the bucket is to be created by Sky. However, creation is skipped if
|
4894
|
+
# Store object is being reconstructed for deletion or re-mount with
|
4895
|
+
# sky start, and error is raised instead.
|
4896
|
+
if self.sync_on_reconstruction:
|
4897
|
+
bucket = self._create_nebius_bucket(self.name, self.region)
|
4898
|
+
return bucket, True
|
4899
|
+
else:
|
4900
|
+
# Raised when Storage object is reconstructed for sky storage
|
4901
|
+
# delete or to re-mount Storages with sky start but the storage
|
4902
|
+
# is already removed externally.
|
4903
|
+
raise exceptions.StorageExternalDeletionError(
|
4904
|
+
'Attempted to fetch a non-existent bucket: '
|
4905
|
+
f'{self.name}')
|
4906
|
+
|
4907
|
+
def _download_file(self, remote_path: str, local_path: str) -> None:
|
4908
|
+
"""Downloads file from remote to local on s3 bucket
|
4909
|
+
using the boto3 API
|
4910
|
+
|
4911
|
+
Args:
|
4912
|
+
remote_path: str; Remote path on S3 bucket
|
4913
|
+
local_path: str; Local path on user's device
|
4914
|
+
"""
|
4915
|
+
self.bucket.download_file(remote_path, local_path)
|
4916
|
+
|
4917
|
+
def mount_command(self, mount_path: str) -> str:
|
4918
|
+
"""Returns the command to mount the bucket to the mount_path.
|
4919
|
+
|
4920
|
+
Uses goofys to mount the bucket.
|
4921
|
+
|
4922
|
+
Args:
|
4923
|
+
mount_path: str; Path to mount the bucket to.
|
4924
|
+
"""
|
4925
|
+
install_cmd = mounting_utils.get_s3_mount_install_cmd()
|
4926
|
+
endpoint_url = nebius.create_endpoint(self.region)
|
4927
|
+
nebius_profile_name = nebius.NEBIUS_PROFILE_NAME
|
4928
|
+
mount_cmd = mounting_utils.get_nebius_mount_cmd(nebius_profile_name,
|
4929
|
+
endpoint_url,
|
4930
|
+
self.bucket.name,
|
4931
|
+
mount_path,
|
4932
|
+
self._bucket_sub_path)
|
4933
|
+
return mounting_utils.get_mounting_command(mount_path, install_cmd,
|
4934
|
+
mount_cmd)
|
4935
|
+
|
4936
|
+
def _create_nebius_bucket(self,
|
4937
|
+
bucket_name: str,
|
4938
|
+
region='auto') -> StorageHandle:
|
4939
|
+
"""Creates S3 bucket with specific name in specific region
|
4940
|
+
|
4941
|
+
Args:
|
4942
|
+
bucket_name: str; Name of bucket
|
4943
|
+
region: str; Region name, e.g. us-west-1, us-east-2
|
4944
|
+
Raises:
|
4945
|
+
StorageBucketCreateError: If bucket creation fails.
|
4946
|
+
"""
|
4947
|
+
nebius_client = self.client
|
4948
|
+
try:
|
4949
|
+
if region is None:
|
4950
|
+
nebius_client.create_bucket(Bucket=bucket_name)
|
4951
|
+
else:
|
4952
|
+
location = {'LocationConstraint': region}
|
4953
|
+
nebius_client.create_bucket(Bucket=bucket_name,
|
4954
|
+
CreateBucketConfiguration=location)
|
4955
|
+
logger.info(f' {colorama.Style.DIM}Created Nebius bucket '
|
4956
|
+
f'{bucket_name!r} in {region}'
|
4957
|
+
f'{colorama.Style.RESET_ALL}')
|
4958
|
+
except aws.botocore_exceptions().ClientError as e:
|
4959
|
+
with ux_utils.print_exception_no_traceback():
|
4960
|
+
raise exceptions.StorageBucketCreateError(
|
4961
|
+
f'Attempted to create a bucket '
|
4962
|
+
f'{self.name} but failed.') from e
|
4963
|
+
return nebius.resource('s3').Bucket(bucket_name)
|
4964
|
+
|
4965
|
+
def _execute_nebius_remove_command(self, command: str, bucket_name: str,
|
4966
|
+
hint_operating: str,
|
4967
|
+
hint_failed: str) -> bool:
|
4968
|
+
try:
|
4969
|
+
with rich_utils.safe_status(
|
4970
|
+
ux_utils.spinner_message(hint_operating)):
|
4971
|
+
subprocess.check_output(command.split(' '),
|
4972
|
+
stderr=subprocess.STDOUT)
|
4973
|
+
except subprocess.CalledProcessError as e:
|
4974
|
+
if 'NoSuchBucket' in e.output.decode('utf-8'):
|
4975
|
+
logger.debug(
|
4976
|
+
_BUCKET_EXTERNALLY_DELETED_DEBUG_MESSAGE.format(
|
4977
|
+
bucket_name=bucket_name))
|
4978
|
+
return False
|
4979
|
+
else:
|
4980
|
+
with ux_utils.print_exception_no_traceback():
|
4981
|
+
raise exceptions.StorageBucketDeleteError(
|
4982
|
+
f'{hint_failed}'
|
4983
|
+
f'Detailed error: {e.output}')
|
4984
|
+
return True
|
4985
|
+
|
4986
|
+
def _delete_nebius_bucket(self, bucket_name: str) -> bool:
|
4987
|
+
"""Deletes S3 bucket, including all objects in bucket
|
4988
|
+
|
4989
|
+
Args:
|
4990
|
+
bucket_name: str; Name of bucket
|
4991
|
+
|
4992
|
+
Returns:
|
4993
|
+
bool; True if bucket was deleted, False if it was deleted externally.
|
4994
|
+
|
4995
|
+
Raises:
|
4996
|
+
StorageBucketDeleteError: If deleting the bucket fails.
|
4997
|
+
"""
|
4998
|
+
# Deleting objects is very slow programatically
|
4999
|
+
# (i.e. bucket.objects.all().delete() is slow).
|
5000
|
+
# In addition, standard delete operations (i.e. via `aws s3 rm`)
|
5001
|
+
# are slow, since AWS puts deletion markers.
|
5002
|
+
# https://stackoverflow.com/questions/49239351/why-is-it-so-much-slower-to-delete-objects-in-aws-s3-than-it-is-to-create-them
|
5003
|
+
# The fastest way to delete is to run `aws s3 rb --force`,
|
5004
|
+
# which removes the bucket by force.
|
5005
|
+
endpoint_url = nebius.create_endpoint(self.region)
|
5006
|
+
remove_command = (f'aws s3 rb s3://{bucket_name} --force '
|
5007
|
+
f'--endpoint {endpoint_url} '
|
5008
|
+
f'--profile={nebius.NEBIUS_PROFILE_NAME}')
|
5009
|
+
|
5010
|
+
success = self._execute_nebius_remove_command(
|
5011
|
+
remove_command, bucket_name,
|
5012
|
+
f'Deleting Nebius bucket {bucket_name}',
|
5013
|
+
f'Failed to delete Nebius bucket {bucket_name}.')
|
5014
|
+
if not success:
|
5015
|
+
return False
|
5016
|
+
|
5017
|
+
# Wait until bucket deletion propagates on Nebius servers
|
5018
|
+
start_time = time.time()
|
5019
|
+
while data_utils.verify_nebius_bucket(bucket_name):
|
5020
|
+
if time.time() - start_time > self._TIMEOUT_TO_PROPAGATES:
|
5021
|
+
raise TimeoutError(
|
5022
|
+
f'Timeout while verifying {bucket_name} Nebius bucket.')
|
5023
|
+
time.sleep(0.1)
|
5024
|
+
return True
|
5025
|
+
|
5026
|
+
def _delete_nebius_bucket_sub_path(self, bucket_name: str,
|
5027
|
+
sub_path: str) -> bool:
|
5028
|
+
"""Deletes the sub path from the bucket."""
|
5029
|
+
endpoint_url = nebius.create_endpoint(self.region)
|
5030
|
+
remove_command = (
|
5031
|
+
f'aws s3 rm s3://{bucket_name}/{sub_path}/ --recursive '
|
5032
|
+
f'--endpoint {endpoint_url} '
|
5033
|
+
f'--profile={nebius.NEBIUS_PROFILE_NAME}')
|
5034
|
+
return self._execute_nebius_remove_command(
|
5035
|
+
remove_command, bucket_name, f'Removing objects from '
|
5036
|
+
f'Nebius bucket {bucket_name}/{sub_path}',
|
5037
|
+
f'Failed to remove objects from '
|
5038
|
+
f'Nebius bucket {bucket_name}/{sub_path}.')
|