skypilot-nightly 1.0.0.dev20241229__py3-none-any.whl → 1.0.0.dev20241230__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +2 -2
- sky/data/mounting_utils.py +49 -14
- sky/data/storage.py +453 -124
- sky/skylet/constants.py +7 -3
- sky/task.py +18 -1
- sky/utils/controller_utils.py +111 -39
- sky/utils/schemas.py +11 -0
- {skypilot_nightly-1.0.0.dev20241229.dist-info → skypilot_nightly-1.0.0.dev20241230.dist-info}/METADATA +1 -1
- {skypilot_nightly-1.0.0.dev20241229.dist-info → skypilot_nightly-1.0.0.dev20241230.dist-info}/RECORD +13 -13
- {skypilot_nightly-1.0.0.dev20241229.dist-info → skypilot_nightly-1.0.0.dev20241230.dist-info}/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20241229.dist-info → skypilot_nightly-1.0.0.dev20241230.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20241229.dist-info → skypilot_nightly-1.0.0.dev20241230.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20241229.dist-info → skypilot_nightly-1.0.0.dev20241230.dist-info}/top_level.txt +0 -0
sky/data/storage.py
CHANGED
@@ -200,6 +200,45 @@ class StoreType(enum.Enum):
|
|
200
200
|
bucket_endpoint_url = f'{store_type.store_prefix()}{path}'
|
201
201
|
return bucket_endpoint_url
|
202
202
|
|
203
|
+
@classmethod
|
204
|
+
def get_fields_from_store_url(
|
205
|
+
cls, store_url: str
|
206
|
+
) -> Tuple['StoreType', Type['AbstractStore'], str, str, Optional[str],
|
207
|
+
Optional[str]]:
|
208
|
+
"""Returns the store type, store class, bucket name, and sub path from
|
209
|
+
a store URL, and the storage account name and region if applicable.
|
210
|
+
|
211
|
+
Args:
|
212
|
+
store_url: str; The store URL.
|
213
|
+
"""
|
214
|
+
# The full path from the user config of IBM COS contains the region,
|
215
|
+
# and Azure Blob Storage contains the storage account name, we need to
|
216
|
+
# pass these information to the store constructor.
|
217
|
+
storage_account_name = None
|
218
|
+
region = None
|
219
|
+
for store_type in StoreType:
|
220
|
+
if store_url.startswith(store_type.store_prefix()):
|
221
|
+
if store_type == StoreType.AZURE:
|
222
|
+
storage_account_name, bucket_name, sub_path = \
|
223
|
+
data_utils.split_az_path(store_url)
|
224
|
+
store_cls: Type['AbstractStore'] = AzureBlobStore
|
225
|
+
elif store_type == StoreType.IBM:
|
226
|
+
bucket_name, sub_path, region = data_utils.split_cos_path(
|
227
|
+
store_url)
|
228
|
+
store_cls = IBMCosStore
|
229
|
+
elif store_type == StoreType.R2:
|
230
|
+
bucket_name, sub_path = data_utils.split_r2_path(store_url)
|
231
|
+
store_cls = R2Store
|
232
|
+
elif store_type == StoreType.GCS:
|
233
|
+
bucket_name, sub_path = data_utils.split_gcs_path(store_url)
|
234
|
+
store_cls = GcsStore
|
235
|
+
elif store_type == StoreType.S3:
|
236
|
+
bucket_name, sub_path = data_utils.split_s3_path(store_url)
|
237
|
+
store_cls = S3Store
|
238
|
+
return store_type, store_cls,bucket_name, \
|
239
|
+
sub_path, storage_account_name, region
|
240
|
+
raise ValueError(f'Unknown store URL: {store_url}')
|
241
|
+
|
203
242
|
|
204
243
|
class StorageMode(enum.Enum):
|
205
244
|
MOUNT = 'MOUNT'
|
@@ -226,25 +265,29 @@ class AbstractStore:
|
|
226
265
|
name: str,
|
227
266
|
source: Optional[SourceType],
|
228
267
|
region: Optional[str] = None,
|
229
|
-
is_sky_managed: Optional[bool] = None
|
268
|
+
is_sky_managed: Optional[bool] = None,
|
269
|
+
_bucket_sub_path: Optional[str] = None):
|
230
270
|
self.name = name
|
231
271
|
self.source = source
|
232
272
|
self.region = region
|
233
273
|
self.is_sky_managed = is_sky_managed
|
274
|
+
self._bucket_sub_path = _bucket_sub_path
|
234
275
|
|
235
276
|
def __repr__(self):
|
236
277
|
return (f'StoreMetadata('
|
237
278
|
f'\n\tname={self.name},'
|
238
279
|
f'\n\tsource={self.source},'
|
239
280
|
f'\n\tregion={self.region},'
|
240
|
-
f'\n\tis_sky_managed={self.is_sky_managed}
|
281
|
+
f'\n\tis_sky_managed={self.is_sky_managed},'
|
282
|
+
f'\n\t_bucket_sub_path={self._bucket_sub_path})')
|
241
283
|
|
242
284
|
def __init__(self,
|
243
285
|
name: str,
|
244
286
|
source: Optional[SourceType],
|
245
287
|
region: Optional[str] = None,
|
246
288
|
is_sky_managed: Optional[bool] = None,
|
247
|
-
sync_on_reconstruction: Optional[bool] = True
|
289
|
+
sync_on_reconstruction: Optional[bool] = True,
|
290
|
+
_bucket_sub_path: Optional[str] = None): # pylint: disable=invalid-name
|
248
291
|
"""Initialize AbstractStore
|
249
292
|
|
250
293
|
Args:
|
@@ -258,7 +301,11 @@ class AbstractStore:
|
|
258
301
|
there. This is set to false when the Storage object is created not
|
259
302
|
for direct use, e.g. for 'sky storage delete', or the storage is
|
260
303
|
being re-used, e.g., for `sky start` on a stopped cluster.
|
261
|
-
|
304
|
+
_bucket_sub_path: str; The prefix of the bucket directory to be
|
305
|
+
created in the store, e.g. if _bucket_sub_path=my-dir, the files
|
306
|
+
will be uploaded to s3://<bucket>/my-dir/.
|
307
|
+
This only works if source is a local directory.
|
308
|
+
# TODO(zpoint): Add support for non-local source.
|
262
309
|
Raises:
|
263
310
|
StorageBucketCreateError: If bucket creation fails
|
264
311
|
StorageBucketGetError: If fetching existing bucket fails
|
@@ -269,10 +316,29 @@ class AbstractStore:
|
|
269
316
|
self.region = region
|
270
317
|
self.is_sky_managed = is_sky_managed
|
271
318
|
self.sync_on_reconstruction = sync_on_reconstruction
|
319
|
+
|
320
|
+
# To avoid mypy error
|
321
|
+
self._bucket_sub_path: Optional[str] = None
|
322
|
+
# Trigger the setter to strip any leading/trailing slashes.
|
323
|
+
self.bucket_sub_path = _bucket_sub_path
|
272
324
|
# Whether sky is responsible for the lifecycle of the Store.
|
273
325
|
self._validate()
|
274
326
|
self.initialize()
|
275
327
|
|
328
|
+
@property
|
329
|
+
def bucket_sub_path(self) -> Optional[str]:
|
330
|
+
"""Get the bucket_sub_path."""
|
331
|
+
return self._bucket_sub_path
|
332
|
+
|
333
|
+
@bucket_sub_path.setter
|
334
|
+
# pylint: disable=invalid-name
|
335
|
+
def bucket_sub_path(self, bucket_sub_path: Optional[str]) -> None:
|
336
|
+
"""Set the bucket_sub_path, stripping any leading/trailing slashes."""
|
337
|
+
if bucket_sub_path is not None:
|
338
|
+
self._bucket_sub_path = bucket_sub_path.strip('/')
|
339
|
+
else:
|
340
|
+
self._bucket_sub_path = None
|
341
|
+
|
276
342
|
@classmethod
|
277
343
|
def from_metadata(cls, metadata: StoreMetadata, **override_args):
|
278
344
|
"""Create a Store from a StoreMetadata object.
|
@@ -280,19 +346,26 @@ class AbstractStore:
|
|
280
346
|
Used when reconstructing Storage and Store objects from
|
281
347
|
global_user_state.
|
282
348
|
"""
|
283
|
-
return cls(
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
349
|
+
return cls(
|
350
|
+
name=override_args.get('name', metadata.name),
|
351
|
+
source=override_args.get('source', metadata.source),
|
352
|
+
region=override_args.get('region', metadata.region),
|
353
|
+
is_sky_managed=override_args.get('is_sky_managed',
|
354
|
+
metadata.is_sky_managed),
|
355
|
+
sync_on_reconstruction=override_args.get('sync_on_reconstruction',
|
356
|
+
True),
|
357
|
+
# backward compatibility
|
358
|
+
_bucket_sub_path=override_args.get(
|
359
|
+
'_bucket_sub_path',
|
360
|
+
metadata._bucket_sub_path # pylint: disable=protected-access
|
361
|
+
) if hasattr(metadata, '_bucket_sub_path') else None)
|
290
362
|
|
291
363
|
def get_metadata(self) -> StoreMetadata:
|
292
364
|
return self.StoreMetadata(name=self.name,
|
293
365
|
source=self.source,
|
294
366
|
region=self.region,
|
295
|
-
is_sky_managed=self.is_sky_managed
|
367
|
+
is_sky_managed=self.is_sky_managed,
|
368
|
+
_bucket_sub_path=self._bucket_sub_path)
|
296
369
|
|
297
370
|
def initialize(self):
|
298
371
|
"""Initializes the Store object on the cloud.
|
@@ -320,7 +393,11 @@ class AbstractStore:
|
|
320
393
|
raise NotImplementedError
|
321
394
|
|
322
395
|
def delete(self) -> None:
|
323
|
-
"""Removes the Storage
|
396
|
+
"""Removes the Storage from the cloud."""
|
397
|
+
raise NotImplementedError
|
398
|
+
|
399
|
+
def _delete_sub_path(self) -> None:
|
400
|
+
"""Removes objects from the sub path in the bucket."""
|
324
401
|
raise NotImplementedError
|
325
402
|
|
326
403
|
def get_handle(self) -> StorageHandle:
|
@@ -464,13 +541,19 @@ class Storage(object):
|
|
464
541
|
if storetype in self.sky_stores:
|
465
542
|
del self.sky_stores[storetype]
|
466
543
|
|
467
|
-
def __init__(
|
468
|
-
|
469
|
-
|
470
|
-
|
471
|
-
|
472
|
-
|
473
|
-
|
544
|
+
def __init__(
|
545
|
+
self,
|
546
|
+
name: Optional[str] = None,
|
547
|
+
source: Optional[SourceType] = None,
|
548
|
+
stores: Optional[Dict[StoreType, AbstractStore]] = None,
|
549
|
+
persistent: Optional[bool] = True,
|
550
|
+
mode: StorageMode = StorageMode.MOUNT,
|
551
|
+
sync_on_reconstruction: bool = True,
|
552
|
+
# pylint: disable=invalid-name
|
553
|
+
_is_sky_managed: Optional[bool] = None,
|
554
|
+
# pylint: disable=invalid-name
|
555
|
+
_bucket_sub_path: Optional[str] = None
|
556
|
+
) -> None:
|
474
557
|
"""Initializes a Storage object.
|
475
558
|
|
476
559
|
Three fields are required: the name of the storage, the source
|
@@ -508,6 +591,18 @@ class Storage(object):
|
|
508
591
|
there. This is set to false when the Storage object is created not
|
509
592
|
for direct use, e.g. for 'sky storage delete', or the storage is
|
510
593
|
being re-used, e.g., for `sky start` on a stopped cluster.
|
594
|
+
_is_sky_managed: Optional[bool]; Indicates if the storage is managed
|
595
|
+
by Sky. Without this argument, the controller's behavior differs
|
596
|
+
from the local machine. For example, if a bucket does not exist:
|
597
|
+
Local Machine (is_sky_managed=True) →
|
598
|
+
Controller (is_sky_managed=False).
|
599
|
+
With this argument, the controller aligns with the local machine,
|
600
|
+
ensuring it retains the is_sky_managed information from the YAML.
|
601
|
+
During teardown, if is_sky_managed is True, the controller should
|
602
|
+
delete the bucket. Otherwise, it might mistakenly delete only the
|
603
|
+
sub-path, assuming is_sky_managed is False.
|
604
|
+
_bucket_sub_path: Optional[str]; The subdirectory to use for the
|
605
|
+
storage object.
|
511
606
|
"""
|
512
607
|
self.name: str
|
513
608
|
self.source = source
|
@@ -515,6 +610,8 @@ class Storage(object):
|
|
515
610
|
self.mode = mode
|
516
611
|
assert mode in StorageMode
|
517
612
|
self.sync_on_reconstruction = sync_on_reconstruction
|
613
|
+
self._is_sky_managed = _is_sky_managed
|
614
|
+
self._bucket_sub_path = _bucket_sub_path
|
518
615
|
|
519
616
|
# TODO(romilb, zhwu): This is a workaround to support storage deletion
|
520
617
|
# for spot. Once sky storage supports forced management for external
|
@@ -577,6 +674,12 @@ class Storage(object):
|
|
577
674
|
elif self.source.startswith('oci://'):
|
578
675
|
self.add_store(StoreType.OCI)
|
579
676
|
|
677
|
+
def get_bucket_sub_path_prefix(self, blob_path: str) -> str:
|
678
|
+
"""Adds the bucket sub path prefix to the blob path."""
|
679
|
+
if self._bucket_sub_path is not None:
|
680
|
+
return f'{blob_path}/{self._bucket_sub_path}'
|
681
|
+
return blob_path
|
682
|
+
|
580
683
|
@staticmethod
|
581
684
|
def _validate_source(
|
582
685
|
source: SourceType, mode: StorageMode,
|
@@ -787,34 +890,40 @@ class Storage(object):
|
|
787
890
|
store = S3Store.from_metadata(
|
788
891
|
s_metadata,
|
789
892
|
source=self.source,
|
790
|
-
sync_on_reconstruction=self.sync_on_reconstruction
|
893
|
+
sync_on_reconstruction=self.sync_on_reconstruction,
|
894
|
+
_bucket_sub_path=self._bucket_sub_path)
|
791
895
|
elif s_type == StoreType.GCS:
|
792
896
|
store = GcsStore.from_metadata(
|
793
897
|
s_metadata,
|
794
898
|
source=self.source,
|
795
|
-
sync_on_reconstruction=self.sync_on_reconstruction
|
899
|
+
sync_on_reconstruction=self.sync_on_reconstruction,
|
900
|
+
_bucket_sub_path=self._bucket_sub_path)
|
796
901
|
elif s_type == StoreType.AZURE:
|
797
902
|
assert isinstance(s_metadata,
|
798
903
|
AzureBlobStore.AzureBlobStoreMetadata)
|
799
904
|
store = AzureBlobStore.from_metadata(
|
800
905
|
s_metadata,
|
801
906
|
source=self.source,
|
802
|
-
sync_on_reconstruction=self.sync_on_reconstruction
|
907
|
+
sync_on_reconstruction=self.sync_on_reconstruction,
|
908
|
+
_bucket_sub_path=self._bucket_sub_path)
|
803
909
|
elif s_type == StoreType.R2:
|
804
910
|
store = R2Store.from_metadata(
|
805
911
|
s_metadata,
|
806
912
|
source=self.source,
|
807
|
-
sync_on_reconstruction=self.sync_on_reconstruction
|
913
|
+
sync_on_reconstruction=self.sync_on_reconstruction,
|
914
|
+
_bucket_sub_path=self._bucket_sub_path)
|
808
915
|
elif s_type == StoreType.IBM:
|
809
916
|
store = IBMCosStore.from_metadata(
|
810
917
|
s_metadata,
|
811
918
|
source=self.source,
|
812
|
-
sync_on_reconstruction=self.sync_on_reconstruction
|
919
|
+
sync_on_reconstruction=self.sync_on_reconstruction,
|
920
|
+
_bucket_sub_path=self._bucket_sub_path)
|
813
921
|
elif s_type == StoreType.OCI:
|
814
922
|
store = OciStore.from_metadata(
|
815
923
|
s_metadata,
|
816
924
|
source=self.source,
|
817
|
-
sync_on_reconstruction=self.sync_on_reconstruction
|
925
|
+
sync_on_reconstruction=self.sync_on_reconstruction,
|
926
|
+
_bucket_sub_path=self._bucket_sub_path)
|
818
927
|
else:
|
819
928
|
with ux_utils.print_exception_no_traceback():
|
820
929
|
raise ValueError(f'Unknown store type: {s_type}')
|
@@ -834,7 +943,6 @@ class Storage(object):
|
|
834
943
|
'to be reconstructed while the corresponding '
|
835
944
|
'bucket was externally deleted.')
|
836
945
|
continue
|
837
|
-
|
838
946
|
self._add_store(store, is_reconstructed=True)
|
839
947
|
|
840
948
|
@classmethod
|
@@ -890,6 +998,7 @@ class Storage(object):
|
|
890
998
|
f'storage account {storage_account_name!r}.')
|
891
999
|
else:
|
892
1000
|
logger.info(f'Storage type {store_type} already exists.')
|
1001
|
+
|
893
1002
|
return self.stores[store_type]
|
894
1003
|
|
895
1004
|
store_cls: Type[AbstractStore]
|
@@ -909,21 +1018,24 @@ class Storage(object):
|
|
909
1018
|
with ux_utils.print_exception_no_traceback():
|
910
1019
|
raise exceptions.StorageSpecError(
|
911
1020
|
f'{store_type} not supported as a Store.')
|
912
|
-
|
913
|
-
# Initialize store object and get/create bucket
|
914
1021
|
try:
|
915
1022
|
store = store_cls(
|
916
1023
|
name=self.name,
|
917
1024
|
source=self.source,
|
918
1025
|
region=region,
|
919
|
-
sync_on_reconstruction=self.sync_on_reconstruction
|
1026
|
+
sync_on_reconstruction=self.sync_on_reconstruction,
|
1027
|
+
is_sky_managed=self._is_sky_managed,
|
1028
|
+
_bucket_sub_path=self._bucket_sub_path)
|
920
1029
|
except exceptions.StorageBucketCreateError:
|
921
1030
|
# Creation failed, so this must be sky managed store. Add failure
|
922
1031
|
# to state.
|
923
1032
|
logger.error(f'Could not create {store_type} store '
|
924
1033
|
f'with name {self.name}.')
|
925
|
-
|
926
|
-
|
1034
|
+
try:
|
1035
|
+
global_user_state.set_storage_status(self.name,
|
1036
|
+
StorageStatus.INIT_FAILED)
|
1037
|
+
except ValueError as e:
|
1038
|
+
logger.error(f'Error setting storage status: {e}')
|
927
1039
|
raise
|
928
1040
|
except exceptions.StorageBucketGetError:
|
929
1041
|
# Bucket get failed, so this is not sky managed. Do not update state
|
@@ -1039,12 +1151,15 @@ class Storage(object):
|
|
1039
1151
|
def from_yaml_config(cls, config: Dict[str, Any]) -> 'Storage':
|
1040
1152
|
common_utils.validate_schema(config, schemas.get_storage_schema(),
|
1041
1153
|
'Invalid storage YAML: ')
|
1042
|
-
|
1043
1154
|
name = config.pop('name', None)
|
1044
1155
|
source = config.pop('source', None)
|
1045
1156
|
store = config.pop('store', None)
|
1046
1157
|
mode_str = config.pop('mode', None)
|
1047
1158
|
force_delete = config.pop('_force_delete', None)
|
1159
|
+
# pylint: disable=invalid-name
|
1160
|
+
_is_sky_managed = config.pop('_is_sky_managed', None)
|
1161
|
+
# pylint: disable=invalid-name
|
1162
|
+
_bucket_sub_path = config.pop('_bucket_sub_path', None)
|
1048
1163
|
if force_delete is None:
|
1049
1164
|
force_delete = False
|
1050
1165
|
|
@@ -1064,7 +1179,9 @@ class Storage(object):
|
|
1064
1179
|
storage_obj = cls(name=name,
|
1065
1180
|
source=source,
|
1066
1181
|
persistent=persistent,
|
1067
|
-
mode=mode
|
1182
|
+
mode=mode,
|
1183
|
+
_is_sky_managed=_is_sky_managed,
|
1184
|
+
_bucket_sub_path=_bucket_sub_path)
|
1068
1185
|
if store is not None:
|
1069
1186
|
storage_obj.add_store(StoreType(store.upper()))
|
1070
1187
|
|
@@ -1072,7 +1189,7 @@ class Storage(object):
|
|
1072
1189
|
storage_obj.force_delete = force_delete
|
1073
1190
|
return storage_obj
|
1074
1191
|
|
1075
|
-
def to_yaml_config(self) -> Dict[str,
|
1192
|
+
def to_yaml_config(self) -> Dict[str, Any]:
|
1076
1193
|
config = {}
|
1077
1194
|
|
1078
1195
|
def add_if_not_none(key: str, value: Optional[Any]):
|
@@ -1088,13 +1205,18 @@ class Storage(object):
|
|
1088
1205
|
add_if_not_none('source', self.source)
|
1089
1206
|
|
1090
1207
|
stores = None
|
1208
|
+
is_sky_managed = self._is_sky_managed
|
1091
1209
|
if self.stores:
|
1092
1210
|
stores = ','.join([store.value for store in self.stores])
|
1211
|
+
is_sky_managed = list(self.stores.values())[0].is_sky_managed
|
1093
1212
|
add_if_not_none('store', stores)
|
1213
|
+
add_if_not_none('_is_sky_managed', is_sky_managed)
|
1094
1214
|
add_if_not_none('persistent', self.persistent)
|
1095
1215
|
add_if_not_none('mode', self.mode.value)
|
1096
1216
|
if self.force_delete:
|
1097
1217
|
config['_force_delete'] = True
|
1218
|
+
if self._bucket_sub_path is not None:
|
1219
|
+
config['_bucket_sub_path'] = self._bucket_sub_path
|
1098
1220
|
return config
|
1099
1221
|
|
1100
1222
|
|
@@ -1116,7 +1238,8 @@ class S3Store(AbstractStore):
|
|
1116
1238
|
source: str,
|
1117
1239
|
region: Optional[str] = _DEFAULT_REGION,
|
1118
1240
|
is_sky_managed: Optional[bool] = None,
|
1119
|
-
sync_on_reconstruction: bool = True
|
1241
|
+
sync_on_reconstruction: bool = True,
|
1242
|
+
_bucket_sub_path: Optional[str] = None):
|
1120
1243
|
self.client: 'boto3.client.Client'
|
1121
1244
|
self.bucket: 'StorageHandle'
|
1122
1245
|
# TODO(romilb): This is purely a stopgap fix for
|
@@ -1129,7 +1252,7 @@ class S3Store(AbstractStore):
|
|
1129
1252
|
f'{self._DEFAULT_REGION} for bucket {name!r}.')
|
1130
1253
|
region = self._DEFAULT_REGION
|
1131
1254
|
super().__init__(name, source, region, is_sky_managed,
|
1132
|
-
sync_on_reconstruction)
|
1255
|
+
sync_on_reconstruction, _bucket_sub_path)
|
1133
1256
|
|
1134
1257
|
def _validate(self):
|
1135
1258
|
if self.source is not None and isinstance(self.source, str):
|
@@ -1293,6 +1416,9 @@ class S3Store(AbstractStore):
|
|
1293
1416
|
f'Upload failed for store {self.name}') from e
|
1294
1417
|
|
1295
1418
|
def delete(self) -> None:
|
1419
|
+
if self._bucket_sub_path is not None and not self.is_sky_managed:
|
1420
|
+
return self._delete_sub_path()
|
1421
|
+
|
1296
1422
|
deleted_by_skypilot = self._delete_s3_bucket(self.name)
|
1297
1423
|
if deleted_by_skypilot:
|
1298
1424
|
msg_str = f'Deleted S3 bucket {self.name}.'
|
@@ -1302,6 +1428,19 @@ class S3Store(AbstractStore):
|
|
1302
1428
|
logger.info(f'{colorama.Fore.GREEN}{msg_str}'
|
1303
1429
|
f'{colorama.Style.RESET_ALL}')
|
1304
1430
|
|
1431
|
+
def _delete_sub_path(self) -> None:
|
1432
|
+
assert self._bucket_sub_path is not None, 'bucket_sub_path is not set'
|
1433
|
+
deleted_by_skypilot = self._delete_s3_bucket_sub_path(
|
1434
|
+
self.name, self._bucket_sub_path)
|
1435
|
+
if deleted_by_skypilot:
|
1436
|
+
msg_str = f'Removed objects from S3 bucket ' \
|
1437
|
+
f'{self.name}/{self._bucket_sub_path}.'
|
1438
|
+
else:
|
1439
|
+
msg_str = f'Failed to remove objects from S3 bucket ' \
|
1440
|
+
f'{self.name}/{self._bucket_sub_path}.'
|
1441
|
+
logger.info(f'{colorama.Fore.GREEN}{msg_str}'
|
1442
|
+
f'{colorama.Style.RESET_ALL}')
|
1443
|
+
|
1305
1444
|
def get_handle(self) -> StorageHandle:
|
1306
1445
|
return aws.resource('s3').Bucket(self.name)
|
1307
1446
|
|
@@ -1332,9 +1471,11 @@ class S3Store(AbstractStore):
|
|
1332
1471
|
for file_name in file_names
|
1333
1472
|
])
|
1334
1473
|
base_dir_path = shlex.quote(base_dir_path)
|
1474
|
+
sub_path = (f'/{self._bucket_sub_path}'
|
1475
|
+
if self._bucket_sub_path else '')
|
1335
1476
|
sync_command = ('aws s3 sync --no-follow-symlinks --exclude="*" '
|
1336
1477
|
f'{includes} {base_dir_path} '
|
1337
|
-
f's3://{self.name}')
|
1478
|
+
f's3://{self.name}{sub_path}')
|
1338
1479
|
return sync_command
|
1339
1480
|
|
1340
1481
|
def get_dir_sync_command(src_dir_path, dest_dir_name):
|
@@ -1346,9 +1487,11 @@ class S3Store(AbstractStore):
|
|
1346
1487
|
for file_name in excluded_list
|
1347
1488
|
])
|
1348
1489
|
src_dir_path = shlex.quote(src_dir_path)
|
1490
|
+
sub_path = (f'/{self._bucket_sub_path}'
|
1491
|
+
if self._bucket_sub_path else '')
|
1349
1492
|
sync_command = (f'aws s3 sync --no-follow-symlinks {excludes} '
|
1350
1493
|
f'{src_dir_path} '
|
1351
|
-
f's3://{self.name}/{dest_dir_name}')
|
1494
|
+
f's3://{self.name}{sub_path}/{dest_dir_name}')
|
1352
1495
|
return sync_command
|
1353
1496
|
|
1354
1497
|
# Generate message for upload
|
@@ -1466,7 +1609,8 @@ class S3Store(AbstractStore):
|
|
1466
1609
|
"""
|
1467
1610
|
install_cmd = mounting_utils.get_s3_mount_install_cmd()
|
1468
1611
|
mount_cmd = mounting_utils.get_s3_mount_cmd(self.bucket.name,
|
1469
|
-
mount_path
|
1612
|
+
mount_path,
|
1613
|
+
self._bucket_sub_path)
|
1470
1614
|
return mounting_utils.get_mounting_command(mount_path, install_cmd,
|
1471
1615
|
mount_cmd)
|
1472
1616
|
|
@@ -1516,6 +1660,27 @@ class S3Store(AbstractStore):
|
|
1516
1660
|
) from e
|
1517
1661
|
return aws.resource('s3').Bucket(bucket_name)
|
1518
1662
|
|
1663
|
+
def _execute_s3_remove_command(self, command: str, bucket_name: str,
|
1664
|
+
hint_operating: str,
|
1665
|
+
hint_failed: str) -> bool:
|
1666
|
+
try:
|
1667
|
+
with rich_utils.safe_status(
|
1668
|
+
ux_utils.spinner_message(hint_operating)):
|
1669
|
+
subprocess.check_output(command.split(' '),
|
1670
|
+
stderr=subprocess.STDOUT)
|
1671
|
+
except subprocess.CalledProcessError as e:
|
1672
|
+
if 'NoSuchBucket' in e.output.decode('utf-8'):
|
1673
|
+
logger.debug(
|
1674
|
+
_BUCKET_EXTERNALLY_DELETED_DEBUG_MESSAGE.format(
|
1675
|
+
bucket_name=bucket_name))
|
1676
|
+
return False
|
1677
|
+
else:
|
1678
|
+
with ux_utils.print_exception_no_traceback():
|
1679
|
+
raise exceptions.StorageBucketDeleteError(
|
1680
|
+
f'{hint_failed}'
|
1681
|
+
f'Detailed error: {e.output}')
|
1682
|
+
return True
|
1683
|
+
|
1519
1684
|
def _delete_s3_bucket(self, bucket_name: str) -> bool:
|
1520
1685
|
"""Deletes S3 bucket, including all objects in bucket
|
1521
1686
|
|
@@ -1533,29 +1698,28 @@ class S3Store(AbstractStore):
|
|
1533
1698
|
# The fastest way to delete is to run `aws s3 rb --force`,
|
1534
1699
|
# which removes the bucket by force.
|
1535
1700
|
remove_command = f'aws s3 rb s3://{bucket_name} --force'
|
1536
|
-
|
1537
|
-
|
1538
|
-
|
1539
|
-
|
1540
|
-
|
1541
|
-
|
1542
|
-
except subprocess.CalledProcessError as e:
|
1543
|
-
if 'NoSuchBucket' in e.output.decode('utf-8'):
|
1544
|
-
logger.debug(
|
1545
|
-
_BUCKET_EXTERNALLY_DELETED_DEBUG_MESSAGE.format(
|
1546
|
-
bucket_name=bucket_name))
|
1547
|
-
return False
|
1548
|
-
else:
|
1549
|
-
with ux_utils.print_exception_no_traceback():
|
1550
|
-
raise exceptions.StorageBucketDeleteError(
|
1551
|
-
f'Failed to delete S3 bucket {bucket_name}.'
|
1552
|
-
f'Detailed error: {e.output}')
|
1701
|
+
success = self._execute_s3_remove_command(
|
1702
|
+
remove_command, bucket_name,
|
1703
|
+
f'Deleting S3 bucket [green]{bucket_name}[/]',
|
1704
|
+
f'Failed to delete S3 bucket {bucket_name}.')
|
1705
|
+
if not success:
|
1706
|
+
return False
|
1553
1707
|
|
1554
1708
|
# Wait until bucket deletion propagates on AWS servers
|
1555
1709
|
while data_utils.verify_s3_bucket(bucket_name):
|
1556
1710
|
time.sleep(0.1)
|
1557
1711
|
return True
|
1558
1712
|
|
1713
|
+
def _delete_s3_bucket_sub_path(self, bucket_name: str,
|
1714
|
+
sub_path: str) -> bool:
|
1715
|
+
"""Deletes the sub path from the bucket."""
|
1716
|
+
remove_command = f'aws s3 rm s3://{bucket_name}/{sub_path}/ --recursive'
|
1717
|
+
return self._execute_s3_remove_command(
|
1718
|
+
remove_command, bucket_name, f'Removing objects from S3 bucket '
|
1719
|
+
f'[green]{bucket_name}/{sub_path}[/]',
|
1720
|
+
f'Failed to remove objects from S3 bucket {bucket_name}/{sub_path}.'
|
1721
|
+
)
|
1722
|
+
|
1559
1723
|
|
1560
1724
|
class GcsStore(AbstractStore):
|
1561
1725
|
"""GcsStore inherits from Storage Object and represents the backend
|
@@ -1569,11 +1733,12 @@ class GcsStore(AbstractStore):
|
|
1569
1733
|
source: str,
|
1570
1734
|
region: Optional[str] = 'us-central1',
|
1571
1735
|
is_sky_managed: Optional[bool] = None,
|
1572
|
-
sync_on_reconstruction: Optional[bool] = True
|
1736
|
+
sync_on_reconstruction: Optional[bool] = True,
|
1737
|
+
_bucket_sub_path: Optional[str] = None):
|
1573
1738
|
self.client: 'storage.Client'
|
1574
1739
|
self.bucket: StorageHandle
|
1575
1740
|
super().__init__(name, source, region, is_sky_managed,
|
1576
|
-
sync_on_reconstruction)
|
1741
|
+
sync_on_reconstruction, _bucket_sub_path)
|
1577
1742
|
|
1578
1743
|
def _validate(self):
|
1579
1744
|
if self.source is not None and isinstance(self.source, str):
|
@@ -1736,6 +1901,9 @@ class GcsStore(AbstractStore):
|
|
1736
1901
|
f'Upload failed for store {self.name}') from e
|
1737
1902
|
|
1738
1903
|
def delete(self) -> None:
|
1904
|
+
if self._bucket_sub_path is not None and not self.is_sky_managed:
|
1905
|
+
return self._delete_sub_path()
|
1906
|
+
|
1739
1907
|
deleted_by_skypilot = self._delete_gcs_bucket(self.name)
|
1740
1908
|
if deleted_by_skypilot:
|
1741
1909
|
msg_str = f'Deleted GCS bucket {self.name}.'
|
@@ -1745,6 +1913,19 @@ class GcsStore(AbstractStore):
|
|
1745
1913
|
logger.info(f'{colorama.Fore.GREEN}{msg_str}'
|
1746
1914
|
f'{colorama.Style.RESET_ALL}')
|
1747
1915
|
|
1916
|
+
def _delete_sub_path(self) -> None:
|
1917
|
+
assert self._bucket_sub_path is not None, 'bucket_sub_path is not set'
|
1918
|
+
deleted_by_skypilot = self._delete_gcs_bucket(self.name,
|
1919
|
+
self._bucket_sub_path)
|
1920
|
+
if deleted_by_skypilot:
|
1921
|
+
msg_str = f'Deleted objects in GCS bucket ' \
|
1922
|
+
f'{self.name}/{self._bucket_sub_path}.'
|
1923
|
+
else:
|
1924
|
+
msg_str = f'GCS bucket {self.name} may have ' \
|
1925
|
+
'been deleted externally.'
|
1926
|
+
logger.info(f'{colorama.Fore.GREEN}{msg_str}'
|
1927
|
+
f'{colorama.Style.RESET_ALL}')
|
1928
|
+
|
1748
1929
|
def get_handle(self) -> StorageHandle:
|
1749
1930
|
return self.client.get_bucket(self.name)
|
1750
1931
|
|
@@ -1818,9 +1999,11 @@ class GcsStore(AbstractStore):
|
|
1818
1999
|
sync_format = '|'.join(file_names)
|
1819
2000
|
gsutil_alias, alias_gen = data_utils.get_gsutil_command()
|
1820
2001
|
base_dir_path = shlex.quote(base_dir_path)
|
2002
|
+
sub_path = (f'/{self._bucket_sub_path}'
|
2003
|
+
if self._bucket_sub_path else '')
|
1821
2004
|
sync_command = (f'{alias_gen}; {gsutil_alias} '
|
1822
2005
|
f'rsync -e -x \'^(?!{sync_format}$).*\' '
|
1823
|
-
f'{base_dir_path} gs://{self.name}')
|
2006
|
+
f'{base_dir_path} gs://{self.name}{sub_path}')
|
1824
2007
|
return sync_command
|
1825
2008
|
|
1826
2009
|
def get_dir_sync_command(src_dir_path, dest_dir_name):
|
@@ -1830,9 +2013,11 @@ class GcsStore(AbstractStore):
|
|
1830
2013
|
excludes = '|'.join(excluded_list)
|
1831
2014
|
gsutil_alias, alias_gen = data_utils.get_gsutil_command()
|
1832
2015
|
src_dir_path = shlex.quote(src_dir_path)
|
2016
|
+
sub_path = (f'/{self._bucket_sub_path}'
|
2017
|
+
if self._bucket_sub_path else '')
|
1833
2018
|
sync_command = (f'{alias_gen}; {gsutil_alias} '
|
1834
2019
|
f'rsync -e -r -x \'({excludes})\' {src_dir_path} '
|
1835
|
-
f'gs://{self.name}/{dest_dir_name}')
|
2020
|
+
f'gs://{self.name}{sub_path}/{dest_dir_name}')
|
1836
2021
|
return sync_command
|
1837
2022
|
|
1838
2023
|
# Generate message for upload
|
@@ -1937,7 +2122,8 @@ class GcsStore(AbstractStore):
|
|
1937
2122
|
"""
|
1938
2123
|
install_cmd = mounting_utils.get_gcs_mount_install_cmd()
|
1939
2124
|
mount_cmd = mounting_utils.get_gcs_mount_cmd(self.bucket.name,
|
1940
|
-
mount_path
|
2125
|
+
mount_path,
|
2126
|
+
self._bucket_sub_path)
|
1941
2127
|
version_check_cmd = (
|
1942
2128
|
f'gcsfuse --version | grep -q {mounting_utils.GCSFUSE_VERSION}')
|
1943
2129
|
return mounting_utils.get_mounting_command(mount_path, install_cmd,
|
@@ -1977,19 +2163,33 @@ class GcsStore(AbstractStore):
|
|
1977
2163
|
f'{new_bucket.storage_class}{colorama.Style.RESET_ALL}')
|
1978
2164
|
return new_bucket
|
1979
2165
|
|
1980
|
-
def _delete_gcs_bucket(
|
1981
|
-
|
2166
|
+
def _delete_gcs_bucket(
|
2167
|
+
self,
|
2168
|
+
bucket_name: str,
|
2169
|
+
# pylint: disable=invalid-name
|
2170
|
+
_bucket_sub_path: Optional[str] = None
|
2171
|
+
) -> bool:
|
2172
|
+
"""Deletes objects in GCS bucket
|
1982
2173
|
|
1983
2174
|
Args:
|
1984
2175
|
bucket_name: str; Name of bucket
|
2176
|
+
_bucket_sub_path: str; Sub path in the bucket, if provided only
|
2177
|
+
objects in the sub path will be deleted, else the whole bucket will
|
2178
|
+
be deleted
|
1985
2179
|
|
1986
2180
|
Returns:
|
1987
2181
|
bool; True if bucket was deleted, False if it was deleted externally.
|
1988
2182
|
"""
|
1989
|
-
|
2183
|
+
if _bucket_sub_path is not None:
|
2184
|
+
command_suffix = f'/{_bucket_sub_path}'
|
2185
|
+
hint_text = 'objects in '
|
2186
|
+
else:
|
2187
|
+
command_suffix = ''
|
2188
|
+
hint_text = ''
|
1990
2189
|
with rich_utils.safe_status(
|
1991
2190
|
ux_utils.spinner_message(
|
1992
|
-
f'Deleting GCS bucket
|
2191
|
+
f'Deleting {hint_text}GCS bucket '
|
2192
|
+
f'[green]{bucket_name}{command_suffix}[/]')):
|
1993
2193
|
try:
|
1994
2194
|
self.client.get_bucket(bucket_name)
|
1995
2195
|
except gcp.forbidden_exception() as e:
|
@@ -2007,8 +2207,9 @@ class GcsStore(AbstractStore):
|
|
2007
2207
|
return False
|
2008
2208
|
try:
|
2009
2209
|
gsutil_alias, alias_gen = data_utils.get_gsutil_command()
|
2010
|
-
remove_obj_command = (
|
2011
|
-
|
2210
|
+
remove_obj_command = (
|
2211
|
+
f'{alias_gen};{gsutil_alias} '
|
2212
|
+
f'rm -r gs://{bucket_name}{command_suffix}')
|
2012
2213
|
subprocess.check_output(remove_obj_command,
|
2013
2214
|
stderr=subprocess.STDOUT,
|
2014
2215
|
shell=True,
|
@@ -2017,7 +2218,8 @@ class GcsStore(AbstractStore):
|
|
2017
2218
|
except subprocess.CalledProcessError as e:
|
2018
2219
|
with ux_utils.print_exception_no_traceback():
|
2019
2220
|
raise exceptions.StorageBucketDeleteError(
|
2020
|
-
f'Failed to delete GCS bucket
|
2221
|
+
f'Failed to delete {hint_text}GCS bucket '
|
2222
|
+
f'{bucket_name}{command_suffix}.'
|
2021
2223
|
f'Detailed error: {e.output}')
|
2022
2224
|
|
2023
2225
|
|
@@ -2069,7 +2271,8 @@ class AzureBlobStore(AbstractStore):
|
|
2069
2271
|
storage_account_name: str = '',
|
2070
2272
|
region: Optional[str] = 'eastus',
|
2071
2273
|
is_sky_managed: Optional[bool] = None,
|
2072
|
-
sync_on_reconstruction: bool = True
|
2274
|
+
sync_on_reconstruction: bool = True,
|
2275
|
+
_bucket_sub_path: Optional[str] = None):
|
2073
2276
|
self.storage_client: 'storage.Client'
|
2074
2277
|
self.resource_client: 'storage.Client'
|
2075
2278
|
self.container_name: str
|
@@ -2081,7 +2284,7 @@ class AzureBlobStore(AbstractStore):
|
|
2081
2284
|
if region is None:
|
2082
2285
|
region = 'eastus'
|
2083
2286
|
super().__init__(name, source, region, is_sky_managed,
|
2084
|
-
sync_on_reconstruction)
|
2287
|
+
sync_on_reconstruction, _bucket_sub_path)
|
2085
2288
|
|
2086
2289
|
@classmethod
|
2087
2290
|
def from_metadata(cls, metadata: AbstractStore.StoreMetadata,
|
@@ -2231,6 +2434,17 @@ class AzureBlobStore(AbstractStore):
|
|
2231
2434
|
"""
|
2232
2435
|
self.storage_client = data_utils.create_az_client('storage')
|
2233
2436
|
self.resource_client = data_utils.create_az_client('resource')
|
2437
|
+
self._update_storage_account_name_and_resource()
|
2438
|
+
|
2439
|
+
self.container_name, is_new_bucket = self._get_bucket()
|
2440
|
+
if self.is_sky_managed is None:
|
2441
|
+
# If is_sky_managed is not specified, then this is a new storage
|
2442
|
+
# object (i.e., did not exist in global_user_state) and we should
|
2443
|
+
# set the is_sky_managed property.
|
2444
|
+
# If is_sky_managed is specified, then we take no action.
|
2445
|
+
self.is_sky_managed = is_new_bucket
|
2446
|
+
|
2447
|
+
def _update_storage_account_name_and_resource(self):
|
2234
2448
|
self.storage_account_name, self.resource_group_name = (
|
2235
2449
|
self._get_storage_account_and_resource_group())
|
2236
2450
|
|
@@ -2241,13 +2455,13 @@ class AzureBlobStore(AbstractStore):
|
|
2241
2455
|
self.storage_account_name, self.resource_group_name,
|
2242
2456
|
self.storage_client, self.resource_client)
|
2243
2457
|
|
2244
|
-
|
2245
|
-
|
2246
|
-
|
2247
|
-
|
2248
|
-
|
2249
|
-
|
2250
|
-
|
2458
|
+
def update_storage_attributes(self, **kwargs: Dict[str, Any]):
|
2459
|
+
assert 'storage_account_name' in kwargs, (
|
2460
|
+
'only storage_account_name supported')
|
2461
|
+
assert isinstance(kwargs['storage_account_name'],
|
2462
|
+
str), ('storage_account_name must be a string')
|
2463
|
+
self.storage_account_name = kwargs['storage_account_name']
|
2464
|
+
self._update_storage_account_name_and_resource()
|
2251
2465
|
|
2252
2466
|
@staticmethod
|
2253
2467
|
def get_default_storage_account_name(region: Optional[str]) -> str:
|
@@ -2518,6 +2732,9 @@ class AzureBlobStore(AbstractStore):
|
|
2518
2732
|
|
2519
2733
|
def delete(self) -> None:
|
2520
2734
|
"""Deletes the storage."""
|
2735
|
+
if self._bucket_sub_path is not None and not self.is_sky_managed:
|
2736
|
+
return self._delete_sub_path()
|
2737
|
+
|
2521
2738
|
deleted_by_skypilot = self._delete_az_bucket(self.name)
|
2522
2739
|
if deleted_by_skypilot:
|
2523
2740
|
msg_str = (f'Deleted AZ Container {self.name!r} under storage '
|
@@ -2528,6 +2745,32 @@ class AzureBlobStore(AbstractStore):
|
|
2528
2745
|
logger.info(f'{colorama.Fore.GREEN}{msg_str}'
|
2529
2746
|
f'{colorama.Style.RESET_ALL}')
|
2530
2747
|
|
2748
|
+
def _delete_sub_path(self) -> None:
|
2749
|
+
assert self._bucket_sub_path is not None, 'bucket_sub_path is not set'
|
2750
|
+
try:
|
2751
|
+
container_url = data_utils.AZURE_CONTAINER_URL.format(
|
2752
|
+
storage_account_name=self.storage_account_name,
|
2753
|
+
container_name=self.name)
|
2754
|
+
container_client = data_utils.create_az_client(
|
2755
|
+
client_type='container',
|
2756
|
+
container_url=container_url,
|
2757
|
+
storage_account_name=self.storage_account_name,
|
2758
|
+
resource_group_name=self.resource_group_name)
|
2759
|
+
# List and delete blobs in the specified directory
|
2760
|
+
blobs = container_client.list_blobs(
|
2761
|
+
name_starts_with=self._bucket_sub_path + '/')
|
2762
|
+
for blob in blobs:
|
2763
|
+
container_client.delete_blob(blob.name)
|
2764
|
+
logger.info(
|
2765
|
+
f'Deleted objects from sub path {self._bucket_sub_path} '
|
2766
|
+
f'in container {self.name}.')
|
2767
|
+
except Exception as e: # pylint: disable=broad-except
|
2768
|
+
logger.error(
|
2769
|
+
f'Failed to delete objects from sub path '
|
2770
|
+
f'{self._bucket_sub_path} in container {self.name}. '
|
2771
|
+
f'Details: {common_utils.format_exception(e, use_bracket=True)}'
|
2772
|
+
)
|
2773
|
+
|
2531
2774
|
def get_handle(self) -> StorageHandle:
|
2532
2775
|
"""Returns the Storage Handle object."""
|
2533
2776
|
return self.storage_client.blob_containers.get(
|
@@ -2554,13 +2797,15 @@ class AzureBlobStore(AbstractStore):
|
|
2554
2797
|
includes_list = ';'.join(file_names)
|
2555
2798
|
includes = f'--include-pattern "{includes_list}"'
|
2556
2799
|
base_dir_path = shlex.quote(base_dir_path)
|
2800
|
+
container_path = (f'{self.container_name}/{self._bucket_sub_path}'
|
2801
|
+
if self._bucket_sub_path else self.container_name)
|
2557
2802
|
sync_command = (f'az storage blob sync '
|
2558
2803
|
f'--account-name {self.storage_account_name} '
|
2559
2804
|
f'--account-key {self.storage_account_key} '
|
2560
2805
|
f'{includes} '
|
2561
2806
|
'--delete-destination false '
|
2562
2807
|
f'--source {base_dir_path} '
|
2563
|
-
f'--container {
|
2808
|
+
f'--container {container_path}')
|
2564
2809
|
return sync_command
|
2565
2810
|
|
2566
2811
|
def get_dir_sync_command(src_dir_path, dest_dir_name) -> str:
|
@@ -2571,8 +2816,11 @@ class AzureBlobStore(AbstractStore):
|
|
2571
2816
|
[file_name.rstrip('*') for file_name in excluded_list])
|
2572
2817
|
excludes = f'--exclude-path "{excludes_list}"'
|
2573
2818
|
src_dir_path = shlex.quote(src_dir_path)
|
2574
|
-
container_path = (f'{self.container_name}/{
|
2575
|
-
if
|
2819
|
+
container_path = (f'{self.container_name}/{self._bucket_sub_path}'
|
2820
|
+
if self._bucket_sub_path else
|
2821
|
+
f'{self.container_name}')
|
2822
|
+
if dest_dir_name:
|
2823
|
+
container_path = f'{container_path}/{dest_dir_name}'
|
2576
2824
|
sync_command = (f'az storage blob sync '
|
2577
2825
|
f'--account-name {self.storage_account_name} '
|
2578
2826
|
f'--account-key {self.storage_account_key} '
|
@@ -2695,6 +2943,7 @@ class AzureBlobStore(AbstractStore):
|
|
2695
2943
|
f'{self.storage_account_name!r}.'
|
2696
2944
|
'Details: '
|
2697
2945
|
f'{common_utils.format_exception(e, use_bracket=True)}')
|
2946
|
+
|
2698
2947
|
# If the container cannot be found in both private and public settings,
|
2699
2948
|
# the container is to be created by Sky. However, creation is skipped
|
2700
2949
|
# if Store object is being reconstructed for deletion or re-mount with
|
@@ -2725,7 +2974,8 @@ class AzureBlobStore(AbstractStore):
|
|
2725
2974
|
mount_cmd = mounting_utils.get_az_mount_cmd(self.container_name,
|
2726
2975
|
self.storage_account_name,
|
2727
2976
|
mount_path,
|
2728
|
-
self.storage_account_key
|
2977
|
+
self.storage_account_key,
|
2978
|
+
self._bucket_sub_path)
|
2729
2979
|
return mounting_utils.get_mounting_command(mount_path, install_cmd,
|
2730
2980
|
mount_cmd)
|
2731
2981
|
|
@@ -2824,11 +3074,12 @@ class R2Store(AbstractStore):
|
|
2824
3074
|
source: str,
|
2825
3075
|
region: Optional[str] = 'auto',
|
2826
3076
|
is_sky_managed: Optional[bool] = None,
|
2827
|
-
sync_on_reconstruction: Optional[bool] = True
|
3077
|
+
sync_on_reconstruction: Optional[bool] = True,
|
3078
|
+
_bucket_sub_path: Optional[str] = None):
|
2828
3079
|
self.client: 'boto3.client.Client'
|
2829
3080
|
self.bucket: 'StorageHandle'
|
2830
3081
|
super().__init__(name, source, region, is_sky_managed,
|
2831
|
-
sync_on_reconstruction)
|
3082
|
+
sync_on_reconstruction, _bucket_sub_path)
|
2832
3083
|
|
2833
3084
|
def _validate(self):
|
2834
3085
|
if self.source is not None and isinstance(self.source, str):
|
@@ -2933,6 +3184,9 @@ class R2Store(AbstractStore):
|
|
2933
3184
|
f'Upload failed for store {self.name}') from e
|
2934
3185
|
|
2935
3186
|
def delete(self) -> None:
|
3187
|
+
if self._bucket_sub_path is not None and not self.is_sky_managed:
|
3188
|
+
return self._delete_sub_path()
|
3189
|
+
|
2936
3190
|
deleted_by_skypilot = self._delete_r2_bucket(self.name)
|
2937
3191
|
if deleted_by_skypilot:
|
2938
3192
|
msg_str = f'Deleted R2 bucket {self.name}.'
|
@@ -2942,6 +3196,19 @@ class R2Store(AbstractStore):
|
|
2942
3196
|
logger.info(f'{colorama.Fore.GREEN}{msg_str}'
|
2943
3197
|
f'{colorama.Style.RESET_ALL}')
|
2944
3198
|
|
3199
|
+
def _delete_sub_path(self) -> None:
|
3200
|
+
assert self._bucket_sub_path is not None, 'bucket_sub_path is not set'
|
3201
|
+
deleted_by_skypilot = self._delete_r2_bucket_sub_path(
|
3202
|
+
self.name, self._bucket_sub_path)
|
3203
|
+
if deleted_by_skypilot:
|
3204
|
+
msg_str = f'Removed objects from R2 bucket ' \
|
3205
|
+
f'{self.name}/{self._bucket_sub_path}.'
|
3206
|
+
else:
|
3207
|
+
msg_str = f'Failed to remove objects from R2 bucket ' \
|
3208
|
+
f'{self.name}/{self._bucket_sub_path}.'
|
3209
|
+
logger.info(f'{colorama.Fore.GREEN}{msg_str}'
|
3210
|
+
f'{colorama.Style.RESET_ALL}')
|
3211
|
+
|
2945
3212
|
def get_handle(self) -> StorageHandle:
|
2946
3213
|
return cloudflare.resource('s3').Bucket(self.name)
|
2947
3214
|
|
@@ -2973,11 +3240,13 @@ class R2Store(AbstractStore):
|
|
2973
3240
|
])
|
2974
3241
|
endpoint_url = cloudflare.create_endpoint()
|
2975
3242
|
base_dir_path = shlex.quote(base_dir_path)
|
3243
|
+
sub_path = (f'/{self._bucket_sub_path}'
|
3244
|
+
if self._bucket_sub_path else '')
|
2976
3245
|
sync_command = ('AWS_SHARED_CREDENTIALS_FILE='
|
2977
3246
|
f'{cloudflare.R2_CREDENTIALS_PATH} '
|
2978
3247
|
'aws s3 sync --no-follow-symlinks --exclude="*" '
|
2979
3248
|
f'{includes} {base_dir_path} '
|
2980
|
-
f's3://{self.name} '
|
3249
|
+
f's3://{self.name}{sub_path} '
|
2981
3250
|
f'--endpoint {endpoint_url} '
|
2982
3251
|
f'--profile={cloudflare.R2_PROFILE_NAME}')
|
2983
3252
|
return sync_command
|
@@ -2992,11 +3261,13 @@ class R2Store(AbstractStore):
|
|
2992
3261
|
])
|
2993
3262
|
endpoint_url = cloudflare.create_endpoint()
|
2994
3263
|
src_dir_path = shlex.quote(src_dir_path)
|
3264
|
+
sub_path = (f'/{self._bucket_sub_path}'
|
3265
|
+
if self._bucket_sub_path else '')
|
2995
3266
|
sync_command = ('AWS_SHARED_CREDENTIALS_FILE='
|
2996
3267
|
f'{cloudflare.R2_CREDENTIALS_PATH} '
|
2997
3268
|
f'aws s3 sync --no-follow-symlinks {excludes} '
|
2998
3269
|
f'{src_dir_path} '
|
2999
|
-
f's3://{self.name}/{dest_dir_name} '
|
3270
|
+
f's3://{self.name}{sub_path}/{dest_dir_name} '
|
3000
3271
|
f'--endpoint {endpoint_url} '
|
3001
3272
|
f'--profile={cloudflare.R2_PROFILE_NAME}')
|
3002
3273
|
return sync_command
|
@@ -3127,11 +3398,9 @@ class R2Store(AbstractStore):
|
|
3127
3398
|
endpoint_url = cloudflare.create_endpoint()
|
3128
3399
|
r2_credential_path = cloudflare.R2_CREDENTIALS_PATH
|
3129
3400
|
r2_profile_name = cloudflare.R2_PROFILE_NAME
|
3130
|
-
mount_cmd = mounting_utils.get_r2_mount_cmd(
|
3131
|
-
|
3132
|
-
|
3133
|
-
self.bucket.name,
|
3134
|
-
mount_path)
|
3401
|
+
mount_cmd = mounting_utils.get_r2_mount_cmd(
|
3402
|
+
r2_credential_path, r2_profile_name, endpoint_url, self.bucket.name,
|
3403
|
+
mount_path, self._bucket_sub_path)
|
3135
3404
|
return mounting_utils.get_mounting_command(mount_path, install_cmd,
|
3136
3405
|
mount_cmd)
|
3137
3406
|
|
@@ -3164,6 +3433,43 @@ class R2Store(AbstractStore):
|
|
3164
3433
|
f'{self.name} but failed.') from e
|
3165
3434
|
return cloudflare.resource('s3').Bucket(bucket_name)
|
3166
3435
|
|
3436
|
+
def _execute_r2_remove_command(self, command: str, bucket_name: str,
|
3437
|
+
hint_operating: str,
|
3438
|
+
hint_failed: str) -> bool:
|
3439
|
+
try:
|
3440
|
+
with rich_utils.safe_status(
|
3441
|
+
ux_utils.spinner_message(hint_operating)):
|
3442
|
+
subprocess.check_output(command.split(' '),
|
3443
|
+
stderr=subprocess.STDOUT,
|
3444
|
+
shell=True)
|
3445
|
+
except subprocess.CalledProcessError as e:
|
3446
|
+
if 'NoSuchBucket' in e.output.decode('utf-8'):
|
3447
|
+
logger.debug(
|
3448
|
+
_BUCKET_EXTERNALLY_DELETED_DEBUG_MESSAGE.format(
|
3449
|
+
bucket_name=bucket_name))
|
3450
|
+
return False
|
3451
|
+
else:
|
3452
|
+
with ux_utils.print_exception_no_traceback():
|
3453
|
+
raise exceptions.StorageBucketDeleteError(
|
3454
|
+
f'{hint_failed}'
|
3455
|
+
f'Detailed error: {e.output}')
|
3456
|
+
return True
|
3457
|
+
|
3458
|
+
def _delete_r2_bucket_sub_path(self, bucket_name: str,
|
3459
|
+
sub_path: str) -> bool:
|
3460
|
+
"""Deletes the sub path from the bucket."""
|
3461
|
+
endpoint_url = cloudflare.create_endpoint()
|
3462
|
+
remove_command = (
|
3463
|
+
f'AWS_SHARED_CREDENTIALS_FILE={cloudflare.R2_CREDENTIALS_PATH} '
|
3464
|
+
f'aws s3 rm s3://{bucket_name}/{sub_path}/ --recursive '
|
3465
|
+
f'--endpoint {endpoint_url} '
|
3466
|
+
f'--profile={cloudflare.R2_PROFILE_NAME}')
|
3467
|
+
return self._execute_r2_remove_command(
|
3468
|
+
remove_command, bucket_name,
|
3469
|
+
f'Removing objects from R2 bucket {bucket_name}/{sub_path}',
|
3470
|
+
f'Failed to remove objects from R2 bucket {bucket_name}/{sub_path}.'
|
3471
|
+
)
|
3472
|
+
|
3167
3473
|
def _delete_r2_bucket(self, bucket_name: str) -> bool:
|
3168
3474
|
"""Deletes R2 bucket, including all objects in bucket
|
3169
3475
|
|
@@ -3186,24 +3492,12 @@ class R2Store(AbstractStore):
|
|
3186
3492
|
f'aws s3 rb s3://{bucket_name} --force '
|
3187
3493
|
f'--endpoint {endpoint_url} '
|
3188
3494
|
f'--profile={cloudflare.R2_PROFILE_NAME}')
|
3189
|
-
|
3190
|
-
|
3191
|
-
|
3192
|
-
|
3193
|
-
|
3194
|
-
|
3195
|
-
shell=True)
|
3196
|
-
except subprocess.CalledProcessError as e:
|
3197
|
-
if 'NoSuchBucket' in e.output.decode('utf-8'):
|
3198
|
-
logger.debug(
|
3199
|
-
_BUCKET_EXTERNALLY_DELETED_DEBUG_MESSAGE.format(
|
3200
|
-
bucket_name=bucket_name))
|
3201
|
-
return False
|
3202
|
-
else:
|
3203
|
-
with ux_utils.print_exception_no_traceback():
|
3204
|
-
raise exceptions.StorageBucketDeleteError(
|
3205
|
-
f'Failed to delete R2 bucket {bucket_name}.'
|
3206
|
-
f'Detailed error: {e.output}')
|
3495
|
+
|
3496
|
+
success = self._execute_r2_remove_command(
|
3497
|
+
remove_command, bucket_name, f'Deleting R2 bucket {bucket_name}',
|
3498
|
+
f'Failed to delete R2 bucket {bucket_name}.')
|
3499
|
+
if not success:
|
3500
|
+
return False
|
3207
3501
|
|
3208
3502
|
# Wait until bucket deletion propagates on AWS servers
|
3209
3503
|
while data_utils.verify_r2_bucket(bucket_name):
|
@@ -3222,11 +3516,12 @@ class IBMCosStore(AbstractStore):
|
|
3222
3516
|
source: str,
|
3223
3517
|
region: Optional[str] = 'us-east',
|
3224
3518
|
is_sky_managed: Optional[bool] = None,
|
3225
|
-
sync_on_reconstruction: bool = True
|
3519
|
+
sync_on_reconstruction: bool = True,
|
3520
|
+
_bucket_sub_path: Optional[str] = None):
|
3226
3521
|
self.client: 'storage.Client'
|
3227
3522
|
self.bucket: 'StorageHandle'
|
3228
3523
|
super().__init__(name, source, region, is_sky_managed,
|
3229
|
-
sync_on_reconstruction)
|
3524
|
+
sync_on_reconstruction, _bucket_sub_path)
|
3230
3525
|
self.bucket_rclone_profile = \
|
3231
3526
|
Rclone.generate_rclone_bucket_profile_name(
|
3232
3527
|
self.name, Rclone.RcloneClouds.IBM)
|
@@ -3371,10 +3666,22 @@ class IBMCosStore(AbstractStore):
|
|
3371
3666
|
f'Upload failed for store {self.name}') from e
|
3372
3667
|
|
3373
3668
|
def delete(self) -> None:
|
3669
|
+
if self._bucket_sub_path is not None and not self.is_sky_managed:
|
3670
|
+
return self._delete_sub_path()
|
3671
|
+
|
3374
3672
|
self._delete_cos_bucket()
|
3375
3673
|
logger.info(f'{colorama.Fore.GREEN}Deleted COS bucket {self.name}.'
|
3376
3674
|
f'{colorama.Style.RESET_ALL}')
|
3377
3675
|
|
3676
|
+
def _delete_sub_path(self) -> None:
|
3677
|
+
assert self._bucket_sub_path is not None, 'bucket_sub_path is not set'
|
3678
|
+
bucket = self.s3_resource.Bucket(self.name)
|
3679
|
+
try:
|
3680
|
+
self._delete_cos_bucket_objects(bucket, self._bucket_sub_path + '/')
|
3681
|
+
except ibm.ibm_botocore.exceptions.ClientError as e:
|
3682
|
+
if e.__class__.__name__ == 'NoSuchBucket':
|
3683
|
+
logger.debug('bucket already removed')
|
3684
|
+
|
3378
3685
|
def get_handle(self) -> StorageHandle:
|
3379
3686
|
return self.s3_resource.Bucket(self.name)
|
3380
3687
|
|
@@ -3415,10 +3722,13 @@ class IBMCosStore(AbstractStore):
|
|
3415
3722
|
# .git directory is excluded from the sync
|
3416
3723
|
# wrapping src_dir_path with "" to support path with spaces
|
3417
3724
|
src_dir_path = shlex.quote(src_dir_path)
|
3725
|
+
sub_path = (f'/{self._bucket_sub_path}'
|
3726
|
+
if self._bucket_sub_path else '')
|
3418
3727
|
sync_command = (
|
3419
3728
|
'rclone copy --exclude ".git/*" '
|
3420
3729
|
f'{src_dir_path} '
|
3421
|
-
f'{self.bucket_rclone_profile}:{self.name}
|
3730
|
+
f'{self.bucket_rclone_profile}:{self.name}{sub_path}'
|
3731
|
+
f'/{dest_dir_name}')
|
3422
3732
|
return sync_command
|
3423
3733
|
|
3424
3734
|
def get_file_sync_command(base_dir_path, file_names) -> str:
|
@@ -3444,9 +3754,12 @@ class IBMCosStore(AbstractStore):
|
|
3444
3754
|
for file_name in file_names
|
3445
3755
|
])
|
3446
3756
|
base_dir_path = shlex.quote(base_dir_path)
|
3447
|
-
|
3448
|
-
|
3449
|
-
|
3757
|
+
sub_path = (f'/{self._bucket_sub_path}'
|
3758
|
+
if self._bucket_sub_path else '')
|
3759
|
+
sync_command = (
|
3760
|
+
'rclone copy '
|
3761
|
+
f'{includes} {base_dir_path} '
|
3762
|
+
f'{self.bucket_rclone_profile}:{self.name}{sub_path}')
|
3450
3763
|
return sync_command
|
3451
3764
|
|
3452
3765
|
# Generate message for upload
|
@@ -3531,6 +3844,7 @@ class IBMCosStore(AbstractStore):
|
|
3531
3844
|
Rclone.RcloneClouds.IBM,
|
3532
3845
|
self.region, # type: ignore
|
3533
3846
|
)
|
3847
|
+
|
3534
3848
|
if not bucket_region and self.sync_on_reconstruction:
|
3535
3849
|
# bucket doesn't exist
|
3536
3850
|
return self._create_cos_bucket(self.name, self.region), True
|
@@ -3577,7 +3891,8 @@ class IBMCosStore(AbstractStore):
|
|
3577
3891
|
Rclone.RCLONE_CONFIG_PATH,
|
3578
3892
|
self.bucket_rclone_profile,
|
3579
3893
|
self.bucket.name,
|
3580
|
-
mount_path
|
3894
|
+
mount_path,
|
3895
|
+
self._bucket_sub_path)
|
3581
3896
|
return mounting_utils.get_mounting_command(mount_path, install_cmd,
|
3582
3897
|
mount_cmd)
|
3583
3898
|
|
@@ -3615,15 +3930,27 @@ class IBMCosStore(AbstractStore):
|
|
3615
3930
|
|
3616
3931
|
return self.bucket
|
3617
3932
|
|
3618
|
-
def
|
3619
|
-
|
3620
|
-
|
3621
|
-
|
3622
|
-
|
3933
|
+
def _delete_cos_bucket_objects(self,
|
3934
|
+
bucket: Any,
|
3935
|
+
prefix: Optional[str] = None):
|
3936
|
+
bucket_versioning = self.s3_resource.BucketVersioning(bucket.name)
|
3937
|
+
if bucket_versioning.status == 'Enabled':
|
3938
|
+
if prefix is not None:
|
3939
|
+
res = list(
|
3940
|
+
bucket.object_versions.filter(Prefix=prefix).delete())
|
3941
|
+
else:
|
3623
3942
|
res = list(bucket.object_versions.delete())
|
3943
|
+
else:
|
3944
|
+
if prefix is not None:
|
3945
|
+
res = list(bucket.objects.filter(Prefix=prefix).delete())
|
3624
3946
|
else:
|
3625
3947
|
res = list(bucket.objects.delete())
|
3626
|
-
|
3948
|
+
logger.debug(f'Deleted bucket\'s content:\n{res}, prefix: {prefix}')
|
3949
|
+
|
3950
|
+
def _delete_cos_bucket(self):
|
3951
|
+
bucket = self.s3_resource.Bucket(self.name)
|
3952
|
+
try:
|
3953
|
+
self._delete_cos_bucket_objects(bucket)
|
3627
3954
|
bucket.delete()
|
3628
3955
|
bucket.wait_until_not_exists()
|
3629
3956
|
except ibm.ibm_botocore.exceptions.ClientError as e:
|
@@ -3644,7 +3971,8 @@ class OciStore(AbstractStore):
|
|
3644
3971
|
source: str,
|
3645
3972
|
region: Optional[str] = None,
|
3646
3973
|
is_sky_managed: Optional[bool] = None,
|
3647
|
-
sync_on_reconstruction: Optional[bool] = True
|
3974
|
+
sync_on_reconstruction: Optional[bool] = True,
|
3975
|
+
_bucket_sub_path: Optional[str] = None):
|
3648
3976
|
self.client: Any
|
3649
3977
|
self.bucket: StorageHandle
|
3650
3978
|
self.oci_config_file: str
|
@@ -3656,7 +3984,8 @@ class OciStore(AbstractStore):
|
|
3656
3984
|
region = oci.get_oci_config()['region']
|
3657
3985
|
|
3658
3986
|
super().__init__(name, source, region, is_sky_managed,
|
3659
|
-
sync_on_reconstruction)
|
3987
|
+
sync_on_reconstruction, _bucket_sub_path)
|
3988
|
+
# TODO(zpoint): add _bucket_sub_path to the sync/mount/delete commands
|
3660
3989
|
|
3661
3990
|
def _validate(self):
|
3662
3991
|
if self.source is not None and isinstance(self.source, str):
|