skypilot-nightly 1.0.0.dev20241228__py3-none-any.whl → 1.0.0.dev20241230__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sky/data/storage.py CHANGED
@@ -24,6 +24,7 @@ from sky.adaptors import azure
24
24
  from sky.adaptors import cloudflare
25
25
  from sky.adaptors import gcp
26
26
  from sky.adaptors import ibm
27
+ from sky.adaptors import oci
27
28
  from sky.data import data_transfer
28
29
  from sky.data import data_utils
29
30
  from sky.data import mounting_utils
@@ -54,7 +55,9 @@ STORE_ENABLED_CLOUDS: List[str] = [
54
55
  str(clouds.AWS()),
55
56
  str(clouds.GCP()),
56
57
  str(clouds.Azure()),
57
- str(clouds.IBM()), cloudflare.NAME
58
+ str(clouds.IBM()),
59
+ str(clouds.OCI()),
60
+ cloudflare.NAME,
58
61
  ]
59
62
 
60
63
  # Maximum number of concurrent rsync upload processes
@@ -115,6 +118,7 @@ class StoreType(enum.Enum):
115
118
  AZURE = 'AZURE'
116
119
  R2 = 'R2'
117
120
  IBM = 'IBM'
121
+ OCI = 'OCI'
118
122
 
119
123
  @classmethod
120
124
  def from_cloud(cls, cloud: str) -> 'StoreType':
@@ -128,6 +132,8 @@ class StoreType(enum.Enum):
128
132
  return StoreType.R2
129
133
  elif cloud.lower() == str(clouds.Azure()).lower():
130
134
  return StoreType.AZURE
135
+ elif cloud.lower() == str(clouds.OCI()).lower():
136
+ return StoreType.OCI
131
137
  elif cloud.lower() == str(clouds.Lambda()).lower():
132
138
  with ux_utils.print_exception_no_traceback():
133
139
  raise ValueError('Lambda Cloud does not provide cloud storage.')
@@ -149,6 +155,8 @@ class StoreType(enum.Enum):
149
155
  return StoreType.R2
150
156
  elif isinstance(store, IBMCosStore):
151
157
  return StoreType.IBM
158
+ elif isinstance(store, OciStore):
159
+ return StoreType.OCI
152
160
  else:
153
161
  with ux_utils.print_exception_no_traceback():
154
162
  raise ValueError(f'Unknown store type: {store}')
@@ -165,6 +173,8 @@ class StoreType(enum.Enum):
165
173
  return 'r2://'
166
174
  elif self == StoreType.IBM:
167
175
  return 'cos://'
176
+ elif self == StoreType.OCI:
177
+ return 'oci://'
168
178
  else:
169
179
  with ux_utils.print_exception_no_traceback():
170
180
  raise ValueError(f'Unknown store type: {self}')
@@ -190,6 +200,45 @@ class StoreType(enum.Enum):
190
200
  bucket_endpoint_url = f'{store_type.store_prefix()}{path}'
191
201
  return bucket_endpoint_url
192
202
 
203
+ @classmethod
204
+ def get_fields_from_store_url(
205
+ cls, store_url: str
206
+ ) -> Tuple['StoreType', Type['AbstractStore'], str, str, Optional[str],
207
+ Optional[str]]:
208
+ """Returns the store type, store class, bucket name, and sub path from
209
+ a store URL, and the storage account name and region if applicable.
210
+
211
+ Args:
212
+ store_url: str; The store URL.
213
+ """
214
+ # The full path from the user config of IBM COS contains the region,
215
+ # and Azure Blob Storage contains the storage account name, we need to
216
+ # pass these information to the store constructor.
217
+ storage_account_name = None
218
+ region = None
219
+ for store_type in StoreType:
220
+ if store_url.startswith(store_type.store_prefix()):
221
+ if store_type == StoreType.AZURE:
222
+ storage_account_name, bucket_name, sub_path = \
223
+ data_utils.split_az_path(store_url)
224
+ store_cls: Type['AbstractStore'] = AzureBlobStore
225
+ elif store_type == StoreType.IBM:
226
+ bucket_name, sub_path, region = data_utils.split_cos_path(
227
+ store_url)
228
+ store_cls = IBMCosStore
229
+ elif store_type == StoreType.R2:
230
+ bucket_name, sub_path = data_utils.split_r2_path(store_url)
231
+ store_cls = R2Store
232
+ elif store_type == StoreType.GCS:
233
+ bucket_name, sub_path = data_utils.split_gcs_path(store_url)
234
+ store_cls = GcsStore
235
+ elif store_type == StoreType.S3:
236
+ bucket_name, sub_path = data_utils.split_s3_path(store_url)
237
+ store_cls = S3Store
238
+ return store_type, store_cls,bucket_name, \
239
+ sub_path, storage_account_name, region
240
+ raise ValueError(f'Unknown store URL: {store_url}')
241
+
193
242
 
194
243
  class StorageMode(enum.Enum):
195
244
  MOUNT = 'MOUNT'
@@ -216,25 +265,29 @@ class AbstractStore:
216
265
  name: str,
217
266
  source: Optional[SourceType],
218
267
  region: Optional[str] = None,
219
- is_sky_managed: Optional[bool] = None):
268
+ is_sky_managed: Optional[bool] = None,
269
+ _bucket_sub_path: Optional[str] = None):
220
270
  self.name = name
221
271
  self.source = source
222
272
  self.region = region
223
273
  self.is_sky_managed = is_sky_managed
274
+ self._bucket_sub_path = _bucket_sub_path
224
275
 
225
276
  def __repr__(self):
226
277
  return (f'StoreMetadata('
227
278
  f'\n\tname={self.name},'
228
279
  f'\n\tsource={self.source},'
229
280
  f'\n\tregion={self.region},'
230
- f'\n\tis_sky_managed={self.is_sky_managed})')
281
+ f'\n\tis_sky_managed={self.is_sky_managed},'
282
+ f'\n\t_bucket_sub_path={self._bucket_sub_path})')
231
283
 
232
284
  def __init__(self,
233
285
  name: str,
234
286
  source: Optional[SourceType],
235
287
  region: Optional[str] = None,
236
288
  is_sky_managed: Optional[bool] = None,
237
- sync_on_reconstruction: Optional[bool] = True):
289
+ sync_on_reconstruction: Optional[bool] = True,
290
+ _bucket_sub_path: Optional[str] = None): # pylint: disable=invalid-name
238
291
  """Initialize AbstractStore
239
292
 
240
293
  Args:
@@ -248,7 +301,11 @@ class AbstractStore:
248
301
  there. This is set to false when the Storage object is created not
249
302
  for direct use, e.g. for 'sky storage delete', or the storage is
250
303
  being re-used, e.g., for `sky start` on a stopped cluster.
251
-
304
+ _bucket_sub_path: str; The prefix of the bucket directory to be
305
+ created in the store, e.g. if _bucket_sub_path=my-dir, the files
306
+ will be uploaded to s3://<bucket>/my-dir/.
307
+ This only works if source is a local directory.
308
+ # TODO(zpoint): Add support for non-local source.
252
309
  Raises:
253
310
  StorageBucketCreateError: If bucket creation fails
254
311
  StorageBucketGetError: If fetching existing bucket fails
@@ -259,10 +316,29 @@ class AbstractStore:
259
316
  self.region = region
260
317
  self.is_sky_managed = is_sky_managed
261
318
  self.sync_on_reconstruction = sync_on_reconstruction
319
+
320
+ # To avoid mypy error
321
+ self._bucket_sub_path: Optional[str] = None
322
+ # Trigger the setter to strip any leading/trailing slashes.
323
+ self.bucket_sub_path = _bucket_sub_path
262
324
  # Whether sky is responsible for the lifecycle of the Store.
263
325
  self._validate()
264
326
  self.initialize()
265
327
 
328
+ @property
329
+ def bucket_sub_path(self) -> Optional[str]:
330
+ """Get the bucket_sub_path."""
331
+ return self._bucket_sub_path
332
+
333
+ @bucket_sub_path.setter
334
+ # pylint: disable=invalid-name
335
+ def bucket_sub_path(self, bucket_sub_path: Optional[str]) -> None:
336
+ """Set the bucket_sub_path, stripping any leading/trailing slashes."""
337
+ if bucket_sub_path is not None:
338
+ self._bucket_sub_path = bucket_sub_path.strip('/')
339
+ else:
340
+ self._bucket_sub_path = None
341
+
266
342
  @classmethod
267
343
  def from_metadata(cls, metadata: StoreMetadata, **override_args):
268
344
  """Create a Store from a StoreMetadata object.
@@ -270,19 +346,26 @@ class AbstractStore:
270
346
  Used when reconstructing Storage and Store objects from
271
347
  global_user_state.
272
348
  """
273
- return cls(name=override_args.get('name', metadata.name),
274
- source=override_args.get('source', metadata.source),
275
- region=override_args.get('region', metadata.region),
276
- is_sky_managed=override_args.get('is_sky_managed',
277
- metadata.is_sky_managed),
278
- sync_on_reconstruction=override_args.get(
279
- 'sync_on_reconstruction', True))
349
+ return cls(
350
+ name=override_args.get('name', metadata.name),
351
+ source=override_args.get('source', metadata.source),
352
+ region=override_args.get('region', metadata.region),
353
+ is_sky_managed=override_args.get('is_sky_managed',
354
+ metadata.is_sky_managed),
355
+ sync_on_reconstruction=override_args.get('sync_on_reconstruction',
356
+ True),
357
+ # backward compatibility
358
+ _bucket_sub_path=override_args.get(
359
+ '_bucket_sub_path',
360
+ metadata._bucket_sub_path # pylint: disable=protected-access
361
+ ) if hasattr(metadata, '_bucket_sub_path') else None)
280
362
 
281
363
  def get_metadata(self) -> StoreMetadata:
282
364
  return self.StoreMetadata(name=self.name,
283
365
  source=self.source,
284
366
  region=self.region,
285
- is_sky_managed=self.is_sky_managed)
367
+ is_sky_managed=self.is_sky_managed,
368
+ _bucket_sub_path=self._bucket_sub_path)
286
369
 
287
370
  def initialize(self):
288
371
  """Initializes the Store object on the cloud.
@@ -310,7 +393,11 @@ class AbstractStore:
310
393
  raise NotImplementedError
311
394
 
312
395
  def delete(self) -> None:
313
- """Removes the Storage object from the cloud."""
396
+ """Removes the Storage from the cloud."""
397
+ raise NotImplementedError
398
+
399
+ def _delete_sub_path(self) -> None:
400
+ """Removes objects from the sub path in the bucket."""
314
401
  raise NotImplementedError
315
402
 
316
403
  def get_handle(self) -> StorageHandle:
@@ -454,13 +541,19 @@ class Storage(object):
454
541
  if storetype in self.sky_stores:
455
542
  del self.sky_stores[storetype]
456
543
 
457
- def __init__(self,
458
- name: Optional[str] = None,
459
- source: Optional[SourceType] = None,
460
- stores: Optional[Dict[StoreType, AbstractStore]] = None,
461
- persistent: Optional[bool] = True,
462
- mode: StorageMode = StorageMode.MOUNT,
463
- sync_on_reconstruction: bool = True) -> None:
544
+ def __init__(
545
+ self,
546
+ name: Optional[str] = None,
547
+ source: Optional[SourceType] = None,
548
+ stores: Optional[Dict[StoreType, AbstractStore]] = None,
549
+ persistent: Optional[bool] = True,
550
+ mode: StorageMode = StorageMode.MOUNT,
551
+ sync_on_reconstruction: bool = True,
552
+ # pylint: disable=invalid-name
553
+ _is_sky_managed: Optional[bool] = None,
554
+ # pylint: disable=invalid-name
555
+ _bucket_sub_path: Optional[str] = None
556
+ ) -> None:
464
557
  """Initializes a Storage object.
465
558
 
466
559
  Three fields are required: the name of the storage, the source
@@ -498,6 +591,18 @@ class Storage(object):
498
591
  there. This is set to false when the Storage object is created not
499
592
  for direct use, e.g. for 'sky storage delete', or the storage is
500
593
  being re-used, e.g., for `sky start` on a stopped cluster.
594
+ _is_sky_managed: Optional[bool]; Indicates if the storage is managed
595
+ by Sky. Without this argument, the controller's behavior differs
596
+ from the local machine. For example, if a bucket does not exist:
597
+ Local Machine (is_sky_managed=True) →
598
+ Controller (is_sky_managed=False).
599
+ With this argument, the controller aligns with the local machine,
600
+ ensuring it retains the is_sky_managed information from the YAML.
601
+ During teardown, if is_sky_managed is True, the controller should
602
+ delete the bucket. Otherwise, it might mistakenly delete only the
603
+ sub-path, assuming is_sky_managed is False.
604
+ _bucket_sub_path: Optional[str]; The subdirectory to use for the
605
+ storage object.
501
606
  """
502
607
  self.name: str
503
608
  self.source = source
@@ -505,6 +610,8 @@ class Storage(object):
505
610
  self.mode = mode
506
611
  assert mode in StorageMode
507
612
  self.sync_on_reconstruction = sync_on_reconstruction
613
+ self._is_sky_managed = _is_sky_managed
614
+ self._bucket_sub_path = _bucket_sub_path
508
615
 
509
616
  # TODO(romilb, zhwu): This is a workaround to support storage deletion
510
617
  # for spot. Once sky storage supports forced management for external
@@ -564,6 +671,14 @@ class Storage(object):
564
671
  self.add_store(StoreType.R2)
565
672
  elif self.source.startswith('cos://'):
566
673
  self.add_store(StoreType.IBM)
674
+ elif self.source.startswith('oci://'):
675
+ self.add_store(StoreType.OCI)
676
+
677
+ def get_bucket_sub_path_prefix(self, blob_path: str) -> str:
678
+ """Adds the bucket sub path prefix to the blob path."""
679
+ if self._bucket_sub_path is not None:
680
+ return f'{blob_path}/{self._bucket_sub_path}'
681
+ return blob_path
567
682
 
568
683
  @staticmethod
569
684
  def _validate_source(
@@ -644,7 +759,7 @@ class Storage(object):
644
759
  'using a bucket by writing <destination_path>: '
645
760
  f'{source} in the file_mounts section of your YAML')
646
761
  is_local_source = True
647
- elif split_path.scheme in ['s3', 'gs', 'https', 'r2', 'cos']:
762
+ elif split_path.scheme in ['s3', 'gs', 'https', 'r2', 'cos', 'oci']:
648
763
  is_local_source = False
649
764
  # Storage mounting does not support mounting specific files from
650
765
  # cloud store - ensure path points to only a directory
@@ -668,7 +783,7 @@ class Storage(object):
668
783
  with ux_utils.print_exception_no_traceback():
669
784
  raise exceptions.StorageSourceError(
670
785
  f'Supported paths: local, s3://, gs://, https://, '
671
- f'r2://, cos://. Got: {source}')
786
+ f'r2://, cos://, oci://. Got: {source}')
672
787
  return source, is_local_source
673
788
 
674
789
  def _validate_storage_spec(self, name: Optional[str]) -> None:
@@ -683,7 +798,7 @@ class Storage(object):
683
798
  """
684
799
  prefix = name.split('://')[0]
685
800
  prefix = prefix.lower()
686
- if prefix in ['s3', 'gs', 'https', 'r2', 'cos']:
801
+ if prefix in ['s3', 'gs', 'https', 'r2', 'cos', 'oci']:
687
802
  with ux_utils.print_exception_no_traceback():
688
803
  raise exceptions.StorageNameError(
689
804
  'Prefix detected: `name` cannot start with '
@@ -775,29 +890,40 @@ class Storage(object):
775
890
  store = S3Store.from_metadata(
776
891
  s_metadata,
777
892
  source=self.source,
778
- sync_on_reconstruction=self.sync_on_reconstruction)
893
+ sync_on_reconstruction=self.sync_on_reconstruction,
894
+ _bucket_sub_path=self._bucket_sub_path)
779
895
  elif s_type == StoreType.GCS:
780
896
  store = GcsStore.from_metadata(
781
897
  s_metadata,
782
898
  source=self.source,
783
- sync_on_reconstruction=self.sync_on_reconstruction)
899
+ sync_on_reconstruction=self.sync_on_reconstruction,
900
+ _bucket_sub_path=self._bucket_sub_path)
784
901
  elif s_type == StoreType.AZURE:
785
902
  assert isinstance(s_metadata,
786
903
  AzureBlobStore.AzureBlobStoreMetadata)
787
904
  store = AzureBlobStore.from_metadata(
788
905
  s_metadata,
789
906
  source=self.source,
790
- sync_on_reconstruction=self.sync_on_reconstruction)
907
+ sync_on_reconstruction=self.sync_on_reconstruction,
908
+ _bucket_sub_path=self._bucket_sub_path)
791
909
  elif s_type == StoreType.R2:
792
910
  store = R2Store.from_metadata(
793
911
  s_metadata,
794
912
  source=self.source,
795
- sync_on_reconstruction=self.sync_on_reconstruction)
913
+ sync_on_reconstruction=self.sync_on_reconstruction,
914
+ _bucket_sub_path=self._bucket_sub_path)
796
915
  elif s_type == StoreType.IBM:
797
916
  store = IBMCosStore.from_metadata(
798
917
  s_metadata,
799
918
  source=self.source,
800
- sync_on_reconstruction=self.sync_on_reconstruction)
919
+ sync_on_reconstruction=self.sync_on_reconstruction,
920
+ _bucket_sub_path=self._bucket_sub_path)
921
+ elif s_type == StoreType.OCI:
922
+ store = OciStore.from_metadata(
923
+ s_metadata,
924
+ source=self.source,
925
+ sync_on_reconstruction=self.sync_on_reconstruction,
926
+ _bucket_sub_path=self._bucket_sub_path)
801
927
  else:
802
928
  with ux_utils.print_exception_no_traceback():
803
929
  raise ValueError(f'Unknown store type: {s_type}')
@@ -817,7 +943,6 @@ class Storage(object):
817
943
  'to be reconstructed while the corresponding '
818
944
  'bucket was externally deleted.')
819
945
  continue
820
-
821
946
  self._add_store(store, is_reconstructed=True)
822
947
 
823
948
  @classmethod
@@ -873,6 +998,7 @@ class Storage(object):
873
998
  f'storage account {storage_account_name!r}.')
874
999
  else:
875
1000
  logger.info(f'Storage type {store_type} already exists.')
1001
+
876
1002
  return self.stores[store_type]
877
1003
 
878
1004
  store_cls: Type[AbstractStore]
@@ -886,25 +1012,30 @@ class Storage(object):
886
1012
  store_cls = R2Store
887
1013
  elif store_type == StoreType.IBM:
888
1014
  store_cls = IBMCosStore
1015
+ elif store_type == StoreType.OCI:
1016
+ store_cls = OciStore
889
1017
  else:
890
1018
  with ux_utils.print_exception_no_traceback():
891
1019
  raise exceptions.StorageSpecError(
892
1020
  f'{store_type} not supported as a Store.')
893
-
894
- # Initialize store object and get/create bucket
895
1021
  try:
896
1022
  store = store_cls(
897
1023
  name=self.name,
898
1024
  source=self.source,
899
1025
  region=region,
900
- sync_on_reconstruction=self.sync_on_reconstruction)
1026
+ sync_on_reconstruction=self.sync_on_reconstruction,
1027
+ is_sky_managed=self._is_sky_managed,
1028
+ _bucket_sub_path=self._bucket_sub_path)
901
1029
  except exceptions.StorageBucketCreateError:
902
1030
  # Creation failed, so this must be sky managed store. Add failure
903
1031
  # to state.
904
1032
  logger.error(f'Could not create {store_type} store '
905
1033
  f'with name {self.name}.')
906
- global_user_state.set_storage_status(self.name,
907
- StorageStatus.INIT_FAILED)
1034
+ try:
1035
+ global_user_state.set_storage_status(self.name,
1036
+ StorageStatus.INIT_FAILED)
1037
+ except ValueError as e:
1038
+ logger.error(f'Error setting storage status: {e}')
908
1039
  raise
909
1040
  except exceptions.StorageBucketGetError:
910
1041
  # Bucket get failed, so this is not sky managed. Do not update state
@@ -1020,12 +1151,15 @@ class Storage(object):
1020
1151
  def from_yaml_config(cls, config: Dict[str, Any]) -> 'Storage':
1021
1152
  common_utils.validate_schema(config, schemas.get_storage_schema(),
1022
1153
  'Invalid storage YAML: ')
1023
-
1024
1154
  name = config.pop('name', None)
1025
1155
  source = config.pop('source', None)
1026
1156
  store = config.pop('store', None)
1027
1157
  mode_str = config.pop('mode', None)
1028
1158
  force_delete = config.pop('_force_delete', None)
1159
+ # pylint: disable=invalid-name
1160
+ _is_sky_managed = config.pop('_is_sky_managed', None)
1161
+ # pylint: disable=invalid-name
1162
+ _bucket_sub_path = config.pop('_bucket_sub_path', None)
1029
1163
  if force_delete is None:
1030
1164
  force_delete = False
1031
1165
 
@@ -1045,7 +1179,9 @@ class Storage(object):
1045
1179
  storage_obj = cls(name=name,
1046
1180
  source=source,
1047
1181
  persistent=persistent,
1048
- mode=mode)
1182
+ mode=mode,
1183
+ _is_sky_managed=_is_sky_managed,
1184
+ _bucket_sub_path=_bucket_sub_path)
1049
1185
  if store is not None:
1050
1186
  storage_obj.add_store(StoreType(store.upper()))
1051
1187
 
@@ -1053,7 +1189,7 @@ class Storage(object):
1053
1189
  storage_obj.force_delete = force_delete
1054
1190
  return storage_obj
1055
1191
 
1056
- def to_yaml_config(self) -> Dict[str, str]:
1192
+ def to_yaml_config(self) -> Dict[str, Any]:
1057
1193
  config = {}
1058
1194
 
1059
1195
  def add_if_not_none(key: str, value: Optional[Any]):
@@ -1069,13 +1205,18 @@ class Storage(object):
1069
1205
  add_if_not_none('source', self.source)
1070
1206
 
1071
1207
  stores = None
1208
+ is_sky_managed = self._is_sky_managed
1072
1209
  if self.stores:
1073
1210
  stores = ','.join([store.value for store in self.stores])
1211
+ is_sky_managed = list(self.stores.values())[0].is_sky_managed
1074
1212
  add_if_not_none('store', stores)
1213
+ add_if_not_none('_is_sky_managed', is_sky_managed)
1075
1214
  add_if_not_none('persistent', self.persistent)
1076
1215
  add_if_not_none('mode', self.mode.value)
1077
1216
  if self.force_delete:
1078
1217
  config['_force_delete'] = True
1218
+ if self._bucket_sub_path is not None:
1219
+ config['_bucket_sub_path'] = self._bucket_sub_path
1079
1220
  return config
1080
1221
 
1081
1222
 
@@ -1097,7 +1238,8 @@ class S3Store(AbstractStore):
1097
1238
  source: str,
1098
1239
  region: Optional[str] = _DEFAULT_REGION,
1099
1240
  is_sky_managed: Optional[bool] = None,
1100
- sync_on_reconstruction: bool = True):
1241
+ sync_on_reconstruction: bool = True,
1242
+ _bucket_sub_path: Optional[str] = None):
1101
1243
  self.client: 'boto3.client.Client'
1102
1244
  self.bucket: 'StorageHandle'
1103
1245
  # TODO(romilb): This is purely a stopgap fix for
@@ -1110,7 +1252,7 @@ class S3Store(AbstractStore):
1110
1252
  f'{self._DEFAULT_REGION} for bucket {name!r}.')
1111
1253
  region = self._DEFAULT_REGION
1112
1254
  super().__init__(name, source, region, is_sky_managed,
1113
- sync_on_reconstruction)
1255
+ sync_on_reconstruction, _bucket_sub_path)
1114
1256
 
1115
1257
  def _validate(self):
1116
1258
  if self.source is not None and isinstance(self.source, str):
@@ -1149,6 +1291,9 @@ class S3Store(AbstractStore):
1149
1291
  assert data_utils.verify_ibm_cos_bucket(self.name), (
1150
1292
  f'Source specified as {self.source}, a COS bucket. ',
1151
1293
  'COS Bucket should exist.')
1294
+ elif self.source.startswith('oci://'):
1295
+ raise NotImplementedError(
1296
+ 'Moving data from OCI to S3 is currently not supported.')
1152
1297
  # Validate name
1153
1298
  self.name = self.validate_name(self.name)
1154
1299
 
@@ -1260,6 +1405,8 @@ class S3Store(AbstractStore):
1260
1405
  self._transfer_to_s3()
1261
1406
  elif self.source.startswith('r2://'):
1262
1407
  self._transfer_to_s3()
1408
+ elif self.source.startswith('oci://'):
1409
+ self._transfer_to_s3()
1263
1410
  else:
1264
1411
  self.batch_aws_rsync([self.source])
1265
1412
  except exceptions.StorageUploadError:
@@ -1269,6 +1416,9 @@ class S3Store(AbstractStore):
1269
1416
  f'Upload failed for store {self.name}') from e
1270
1417
 
1271
1418
  def delete(self) -> None:
1419
+ if self._bucket_sub_path is not None and not self.is_sky_managed:
1420
+ return self._delete_sub_path()
1421
+
1272
1422
  deleted_by_skypilot = self._delete_s3_bucket(self.name)
1273
1423
  if deleted_by_skypilot:
1274
1424
  msg_str = f'Deleted S3 bucket {self.name}.'
@@ -1278,6 +1428,19 @@ class S3Store(AbstractStore):
1278
1428
  logger.info(f'{colorama.Fore.GREEN}{msg_str}'
1279
1429
  f'{colorama.Style.RESET_ALL}')
1280
1430
 
1431
+ def _delete_sub_path(self) -> None:
1432
+ assert self._bucket_sub_path is not None, 'bucket_sub_path is not set'
1433
+ deleted_by_skypilot = self._delete_s3_bucket_sub_path(
1434
+ self.name, self._bucket_sub_path)
1435
+ if deleted_by_skypilot:
1436
+ msg_str = f'Removed objects from S3 bucket ' \
1437
+ f'{self.name}/{self._bucket_sub_path}.'
1438
+ else:
1439
+ msg_str = f'Failed to remove objects from S3 bucket ' \
1440
+ f'{self.name}/{self._bucket_sub_path}.'
1441
+ logger.info(f'{colorama.Fore.GREEN}{msg_str}'
1442
+ f'{colorama.Style.RESET_ALL}')
1443
+
1281
1444
  def get_handle(self) -> StorageHandle:
1282
1445
  return aws.resource('s3').Bucket(self.name)
1283
1446
 
@@ -1308,9 +1471,11 @@ class S3Store(AbstractStore):
1308
1471
  for file_name in file_names
1309
1472
  ])
1310
1473
  base_dir_path = shlex.quote(base_dir_path)
1474
+ sub_path = (f'/{self._bucket_sub_path}'
1475
+ if self._bucket_sub_path else '')
1311
1476
  sync_command = ('aws s3 sync --no-follow-symlinks --exclude="*" '
1312
1477
  f'{includes} {base_dir_path} '
1313
- f's3://{self.name}')
1478
+ f's3://{self.name}{sub_path}')
1314
1479
  return sync_command
1315
1480
 
1316
1481
  def get_dir_sync_command(src_dir_path, dest_dir_name):
@@ -1322,9 +1487,11 @@ class S3Store(AbstractStore):
1322
1487
  for file_name in excluded_list
1323
1488
  ])
1324
1489
  src_dir_path = shlex.quote(src_dir_path)
1490
+ sub_path = (f'/{self._bucket_sub_path}'
1491
+ if self._bucket_sub_path else '')
1325
1492
  sync_command = (f'aws s3 sync --no-follow-symlinks {excludes} '
1326
1493
  f'{src_dir_path} '
1327
- f's3://{self.name}/{dest_dir_name}')
1494
+ f's3://{self.name}{sub_path}/{dest_dir_name}')
1328
1495
  return sync_command
1329
1496
 
1330
1497
  # Generate message for upload
@@ -1442,7 +1609,8 @@ class S3Store(AbstractStore):
1442
1609
  """
1443
1610
  install_cmd = mounting_utils.get_s3_mount_install_cmd()
1444
1611
  mount_cmd = mounting_utils.get_s3_mount_cmd(self.bucket.name,
1445
- mount_path)
1612
+ mount_path,
1613
+ self._bucket_sub_path)
1446
1614
  return mounting_utils.get_mounting_command(mount_path, install_cmd,
1447
1615
  mount_cmd)
1448
1616
 
@@ -1492,6 +1660,27 @@ class S3Store(AbstractStore):
1492
1660
  ) from e
1493
1661
  return aws.resource('s3').Bucket(bucket_name)
1494
1662
 
1663
+ def _execute_s3_remove_command(self, command: str, bucket_name: str,
1664
+ hint_operating: str,
1665
+ hint_failed: str) -> bool:
1666
+ try:
1667
+ with rich_utils.safe_status(
1668
+ ux_utils.spinner_message(hint_operating)):
1669
+ subprocess.check_output(command.split(' '),
1670
+ stderr=subprocess.STDOUT)
1671
+ except subprocess.CalledProcessError as e:
1672
+ if 'NoSuchBucket' in e.output.decode('utf-8'):
1673
+ logger.debug(
1674
+ _BUCKET_EXTERNALLY_DELETED_DEBUG_MESSAGE.format(
1675
+ bucket_name=bucket_name))
1676
+ return False
1677
+ else:
1678
+ with ux_utils.print_exception_no_traceback():
1679
+ raise exceptions.StorageBucketDeleteError(
1680
+ f'{hint_failed}'
1681
+ f'Detailed error: {e.output}')
1682
+ return True
1683
+
1495
1684
  def _delete_s3_bucket(self, bucket_name: str) -> bool:
1496
1685
  """Deletes S3 bucket, including all objects in bucket
1497
1686
 
@@ -1509,29 +1698,28 @@ class S3Store(AbstractStore):
1509
1698
  # The fastest way to delete is to run `aws s3 rb --force`,
1510
1699
  # which removes the bucket by force.
1511
1700
  remove_command = f'aws s3 rb s3://{bucket_name} --force'
1512
- try:
1513
- with rich_utils.safe_status(
1514
- ux_utils.spinner_message(
1515
- f'Deleting S3 bucket [green]{bucket_name}')):
1516
- subprocess.check_output(remove_command.split(' '),
1517
- stderr=subprocess.STDOUT)
1518
- except subprocess.CalledProcessError as e:
1519
- if 'NoSuchBucket' in e.output.decode('utf-8'):
1520
- logger.debug(
1521
- _BUCKET_EXTERNALLY_DELETED_DEBUG_MESSAGE.format(
1522
- bucket_name=bucket_name))
1523
- return False
1524
- else:
1525
- with ux_utils.print_exception_no_traceback():
1526
- raise exceptions.StorageBucketDeleteError(
1527
- f'Failed to delete S3 bucket {bucket_name}.'
1528
- f'Detailed error: {e.output}')
1701
+ success = self._execute_s3_remove_command(
1702
+ remove_command, bucket_name,
1703
+ f'Deleting S3 bucket [green]{bucket_name}[/]',
1704
+ f'Failed to delete S3 bucket {bucket_name}.')
1705
+ if not success:
1706
+ return False
1529
1707
 
1530
1708
  # Wait until bucket deletion propagates on AWS servers
1531
1709
  while data_utils.verify_s3_bucket(bucket_name):
1532
1710
  time.sleep(0.1)
1533
1711
  return True
1534
1712
 
1713
+ def _delete_s3_bucket_sub_path(self, bucket_name: str,
1714
+ sub_path: str) -> bool:
1715
+ """Deletes the sub path from the bucket."""
1716
+ remove_command = f'aws s3 rm s3://{bucket_name}/{sub_path}/ --recursive'
1717
+ return self._execute_s3_remove_command(
1718
+ remove_command, bucket_name, f'Removing objects from S3 bucket '
1719
+ f'[green]{bucket_name}/{sub_path}[/]',
1720
+ f'Failed to remove objects from S3 bucket {bucket_name}/{sub_path}.'
1721
+ )
1722
+
1535
1723
 
1536
1724
  class GcsStore(AbstractStore):
1537
1725
  """GcsStore inherits from Storage Object and represents the backend
@@ -1545,11 +1733,12 @@ class GcsStore(AbstractStore):
1545
1733
  source: str,
1546
1734
  region: Optional[str] = 'us-central1',
1547
1735
  is_sky_managed: Optional[bool] = None,
1548
- sync_on_reconstruction: Optional[bool] = True):
1736
+ sync_on_reconstruction: Optional[bool] = True,
1737
+ _bucket_sub_path: Optional[str] = None):
1549
1738
  self.client: 'storage.Client'
1550
1739
  self.bucket: StorageHandle
1551
1740
  super().__init__(name, source, region, is_sky_managed,
1552
- sync_on_reconstruction)
1741
+ sync_on_reconstruction, _bucket_sub_path)
1553
1742
 
1554
1743
  def _validate(self):
1555
1744
  if self.source is not None and isinstance(self.source, str):
@@ -1588,6 +1777,9 @@ class GcsStore(AbstractStore):
1588
1777
  assert data_utils.verify_ibm_cos_bucket(self.name), (
1589
1778
  f'Source specified as {self.source}, a COS bucket. ',
1590
1779
  'COS Bucket should exist.')
1780
+ elif self.source.startswith('oci://'):
1781
+ raise NotImplementedError(
1782
+ 'Moving data from OCI to GCS is currently not supported.')
1591
1783
  # Validate name
1592
1784
  self.name = self.validate_name(self.name)
1593
1785
  # Check if the storage is enabled
@@ -1696,6 +1888,8 @@ class GcsStore(AbstractStore):
1696
1888
  self._transfer_to_gcs()
1697
1889
  elif self.source.startswith('r2://'):
1698
1890
  self._transfer_to_gcs()
1891
+ elif self.source.startswith('oci://'):
1892
+ self._transfer_to_gcs()
1699
1893
  else:
1700
1894
  # If a single directory is specified in source, upload
1701
1895
  # contents to root of bucket by suffixing /*.
@@ -1707,6 +1901,9 @@ class GcsStore(AbstractStore):
1707
1901
  f'Upload failed for store {self.name}') from e
1708
1902
 
1709
1903
  def delete(self) -> None:
1904
+ if self._bucket_sub_path is not None and not self.is_sky_managed:
1905
+ return self._delete_sub_path()
1906
+
1710
1907
  deleted_by_skypilot = self._delete_gcs_bucket(self.name)
1711
1908
  if deleted_by_skypilot:
1712
1909
  msg_str = f'Deleted GCS bucket {self.name}.'
@@ -1716,6 +1913,19 @@ class GcsStore(AbstractStore):
1716
1913
  logger.info(f'{colorama.Fore.GREEN}{msg_str}'
1717
1914
  f'{colorama.Style.RESET_ALL}')
1718
1915
 
1916
+ def _delete_sub_path(self) -> None:
1917
+ assert self._bucket_sub_path is not None, 'bucket_sub_path is not set'
1918
+ deleted_by_skypilot = self._delete_gcs_bucket(self.name,
1919
+ self._bucket_sub_path)
1920
+ if deleted_by_skypilot:
1921
+ msg_str = f'Deleted objects in GCS bucket ' \
1922
+ f'{self.name}/{self._bucket_sub_path}.'
1923
+ else:
1924
+ msg_str = f'GCS bucket {self.name} may have ' \
1925
+ 'been deleted externally.'
1926
+ logger.info(f'{colorama.Fore.GREEN}{msg_str}'
1927
+ f'{colorama.Style.RESET_ALL}')
1928
+
1719
1929
  def get_handle(self) -> StorageHandle:
1720
1930
  return self.client.get_bucket(self.name)
1721
1931
 
@@ -1789,9 +1999,11 @@ class GcsStore(AbstractStore):
1789
1999
  sync_format = '|'.join(file_names)
1790
2000
  gsutil_alias, alias_gen = data_utils.get_gsutil_command()
1791
2001
  base_dir_path = shlex.quote(base_dir_path)
2002
+ sub_path = (f'/{self._bucket_sub_path}'
2003
+ if self._bucket_sub_path else '')
1792
2004
  sync_command = (f'{alias_gen}; {gsutil_alias} '
1793
2005
  f'rsync -e -x \'^(?!{sync_format}$).*\' '
1794
- f'{base_dir_path} gs://{self.name}')
2006
+ f'{base_dir_path} gs://{self.name}{sub_path}')
1795
2007
  return sync_command
1796
2008
 
1797
2009
  def get_dir_sync_command(src_dir_path, dest_dir_name):
@@ -1801,9 +2013,11 @@ class GcsStore(AbstractStore):
1801
2013
  excludes = '|'.join(excluded_list)
1802
2014
  gsutil_alias, alias_gen = data_utils.get_gsutil_command()
1803
2015
  src_dir_path = shlex.quote(src_dir_path)
2016
+ sub_path = (f'/{self._bucket_sub_path}'
2017
+ if self._bucket_sub_path else '')
1804
2018
  sync_command = (f'{alias_gen}; {gsutil_alias} '
1805
2019
  f'rsync -e -r -x \'({excludes})\' {src_dir_path} '
1806
- f'gs://{self.name}/{dest_dir_name}')
2020
+ f'gs://{self.name}{sub_path}/{dest_dir_name}')
1807
2021
  return sync_command
1808
2022
 
1809
2023
  # Generate message for upload
@@ -1908,7 +2122,8 @@ class GcsStore(AbstractStore):
1908
2122
  """
1909
2123
  install_cmd = mounting_utils.get_gcs_mount_install_cmd()
1910
2124
  mount_cmd = mounting_utils.get_gcs_mount_cmd(self.bucket.name,
1911
- mount_path)
2125
+ mount_path,
2126
+ self._bucket_sub_path)
1912
2127
  version_check_cmd = (
1913
2128
  f'gcsfuse --version | grep -q {mounting_utils.GCSFUSE_VERSION}')
1914
2129
  return mounting_utils.get_mounting_command(mount_path, install_cmd,
@@ -1948,19 +2163,33 @@ class GcsStore(AbstractStore):
1948
2163
  f'{new_bucket.storage_class}{colorama.Style.RESET_ALL}')
1949
2164
  return new_bucket
1950
2165
 
1951
- def _delete_gcs_bucket(self, bucket_name: str) -> bool:
1952
- """Deletes GCS bucket, including all objects in bucket
2166
+ def _delete_gcs_bucket(
2167
+ self,
2168
+ bucket_name: str,
2169
+ # pylint: disable=invalid-name
2170
+ _bucket_sub_path: Optional[str] = None
2171
+ ) -> bool:
2172
+ """Deletes objects in GCS bucket
1953
2173
 
1954
2174
  Args:
1955
2175
  bucket_name: str; Name of bucket
2176
+ _bucket_sub_path: str; Sub path in the bucket, if provided only
2177
+ objects in the sub path will be deleted, else the whole bucket will
2178
+ be deleted
1956
2179
 
1957
2180
  Returns:
1958
2181
  bool; True if bucket was deleted, False if it was deleted externally.
1959
2182
  """
1960
-
2183
+ if _bucket_sub_path is not None:
2184
+ command_suffix = f'/{_bucket_sub_path}'
2185
+ hint_text = 'objects in '
2186
+ else:
2187
+ command_suffix = ''
2188
+ hint_text = ''
1961
2189
  with rich_utils.safe_status(
1962
2190
  ux_utils.spinner_message(
1963
- f'Deleting GCS bucket [green]{bucket_name}')):
2191
+ f'Deleting {hint_text}GCS bucket '
2192
+ f'[green]{bucket_name}{command_suffix}[/]')):
1964
2193
  try:
1965
2194
  self.client.get_bucket(bucket_name)
1966
2195
  except gcp.forbidden_exception() as e:
@@ -1978,8 +2207,9 @@ class GcsStore(AbstractStore):
1978
2207
  return False
1979
2208
  try:
1980
2209
  gsutil_alias, alias_gen = data_utils.get_gsutil_command()
1981
- remove_obj_command = (f'{alias_gen};{gsutil_alias} '
1982
- f'rm -r gs://{bucket_name}')
2210
+ remove_obj_command = (
2211
+ f'{alias_gen};{gsutil_alias} '
2212
+ f'rm -r gs://{bucket_name}{command_suffix}')
1983
2213
  subprocess.check_output(remove_obj_command,
1984
2214
  stderr=subprocess.STDOUT,
1985
2215
  shell=True,
@@ -1988,7 +2218,8 @@ class GcsStore(AbstractStore):
1988
2218
  except subprocess.CalledProcessError as e:
1989
2219
  with ux_utils.print_exception_no_traceback():
1990
2220
  raise exceptions.StorageBucketDeleteError(
1991
- f'Failed to delete GCS bucket {bucket_name}.'
2221
+ f'Failed to delete {hint_text}GCS bucket '
2222
+ f'{bucket_name}{command_suffix}.'
1992
2223
  f'Detailed error: {e.output}')
1993
2224
 
1994
2225
 
@@ -2040,7 +2271,8 @@ class AzureBlobStore(AbstractStore):
2040
2271
  storage_account_name: str = '',
2041
2272
  region: Optional[str] = 'eastus',
2042
2273
  is_sky_managed: Optional[bool] = None,
2043
- sync_on_reconstruction: bool = True):
2274
+ sync_on_reconstruction: bool = True,
2275
+ _bucket_sub_path: Optional[str] = None):
2044
2276
  self.storage_client: 'storage.Client'
2045
2277
  self.resource_client: 'storage.Client'
2046
2278
  self.container_name: str
@@ -2052,7 +2284,7 @@ class AzureBlobStore(AbstractStore):
2052
2284
  if region is None:
2053
2285
  region = 'eastus'
2054
2286
  super().__init__(name, source, region, is_sky_managed,
2055
- sync_on_reconstruction)
2287
+ sync_on_reconstruction, _bucket_sub_path)
2056
2288
 
2057
2289
  @classmethod
2058
2290
  def from_metadata(cls, metadata: AbstractStore.StoreMetadata,
@@ -2122,6 +2354,9 @@ class AzureBlobStore(AbstractStore):
2122
2354
  assert data_utils.verify_ibm_cos_bucket(self.name), (
2123
2355
  f'Source specified as {self.source}, a COS bucket. ',
2124
2356
  'COS Bucket should exist.')
2357
+ elif self.source.startswith('oci://'):
2358
+ raise NotImplementedError(
2359
+ 'Moving data from OCI to AZureBlob is not supported.')
2125
2360
  # Validate name
2126
2361
  self.name = self.validate_name(self.name)
2127
2362
 
@@ -2199,6 +2434,17 @@ class AzureBlobStore(AbstractStore):
2199
2434
  """
2200
2435
  self.storage_client = data_utils.create_az_client('storage')
2201
2436
  self.resource_client = data_utils.create_az_client('resource')
2437
+ self._update_storage_account_name_and_resource()
2438
+
2439
+ self.container_name, is_new_bucket = self._get_bucket()
2440
+ if self.is_sky_managed is None:
2441
+ # If is_sky_managed is not specified, then this is a new storage
2442
+ # object (i.e., did not exist in global_user_state) and we should
2443
+ # set the is_sky_managed property.
2444
+ # If is_sky_managed is specified, then we take no action.
2445
+ self.is_sky_managed = is_new_bucket
2446
+
2447
+ def _update_storage_account_name_and_resource(self):
2202
2448
  self.storage_account_name, self.resource_group_name = (
2203
2449
  self._get_storage_account_and_resource_group())
2204
2450
 
@@ -2209,13 +2455,13 @@ class AzureBlobStore(AbstractStore):
2209
2455
  self.storage_account_name, self.resource_group_name,
2210
2456
  self.storage_client, self.resource_client)
2211
2457
 
2212
- self.container_name, is_new_bucket = self._get_bucket()
2213
- if self.is_sky_managed is None:
2214
- # If is_sky_managed is not specified, then this is a new storage
2215
- # object (i.e., did not exist in global_user_state) and we should
2216
- # set the is_sky_managed property.
2217
- # If is_sky_managed is specified, then we take no action.
2218
- self.is_sky_managed = is_new_bucket
2458
+ def update_storage_attributes(self, **kwargs: Dict[str, Any]):
2459
+ assert 'storage_account_name' in kwargs, (
2460
+ 'only storage_account_name supported')
2461
+ assert isinstance(kwargs['storage_account_name'],
2462
+ str), ('storage_account_name must be a string')
2463
+ self.storage_account_name = kwargs['storage_account_name']
2464
+ self._update_storage_account_name_and_resource()
2219
2465
 
2220
2466
  @staticmethod
2221
2467
  def get_default_storage_account_name(region: Optional[str]) -> str:
@@ -2474,6 +2720,8 @@ class AzureBlobStore(AbstractStore):
2474
2720
  raise NotImplementedError(error_message.format('R2'))
2475
2721
  elif self.source.startswith('cos://'):
2476
2722
  raise NotImplementedError(error_message.format('IBM COS'))
2723
+ elif self.source.startswith('oci://'):
2724
+ raise NotImplementedError(error_message.format('OCI'))
2477
2725
  else:
2478
2726
  self.batch_az_blob_sync([self.source])
2479
2727
  except exceptions.StorageUploadError:
@@ -2484,6 +2732,9 @@ class AzureBlobStore(AbstractStore):
2484
2732
 
2485
2733
  def delete(self) -> None:
2486
2734
  """Deletes the storage."""
2735
+ if self._bucket_sub_path is not None and not self.is_sky_managed:
2736
+ return self._delete_sub_path()
2737
+
2487
2738
  deleted_by_skypilot = self._delete_az_bucket(self.name)
2488
2739
  if deleted_by_skypilot:
2489
2740
  msg_str = (f'Deleted AZ Container {self.name!r} under storage '
@@ -2494,6 +2745,32 @@ class AzureBlobStore(AbstractStore):
2494
2745
  logger.info(f'{colorama.Fore.GREEN}{msg_str}'
2495
2746
  f'{colorama.Style.RESET_ALL}')
2496
2747
 
2748
+ def _delete_sub_path(self) -> None:
2749
+ assert self._bucket_sub_path is not None, 'bucket_sub_path is not set'
2750
+ try:
2751
+ container_url = data_utils.AZURE_CONTAINER_URL.format(
2752
+ storage_account_name=self.storage_account_name,
2753
+ container_name=self.name)
2754
+ container_client = data_utils.create_az_client(
2755
+ client_type='container',
2756
+ container_url=container_url,
2757
+ storage_account_name=self.storage_account_name,
2758
+ resource_group_name=self.resource_group_name)
2759
+ # List and delete blobs in the specified directory
2760
+ blobs = container_client.list_blobs(
2761
+ name_starts_with=self._bucket_sub_path + '/')
2762
+ for blob in blobs:
2763
+ container_client.delete_blob(blob.name)
2764
+ logger.info(
2765
+ f'Deleted objects from sub path {self._bucket_sub_path} '
2766
+ f'in container {self.name}.')
2767
+ except Exception as e: # pylint: disable=broad-except
2768
+ logger.error(
2769
+ f'Failed to delete objects from sub path '
2770
+ f'{self._bucket_sub_path} in container {self.name}. '
2771
+ f'Details: {common_utils.format_exception(e, use_bracket=True)}'
2772
+ )
2773
+
2497
2774
  def get_handle(self) -> StorageHandle:
2498
2775
  """Returns the Storage Handle object."""
2499
2776
  return self.storage_client.blob_containers.get(
@@ -2520,13 +2797,15 @@ class AzureBlobStore(AbstractStore):
2520
2797
  includes_list = ';'.join(file_names)
2521
2798
  includes = f'--include-pattern "{includes_list}"'
2522
2799
  base_dir_path = shlex.quote(base_dir_path)
2800
+ container_path = (f'{self.container_name}/{self._bucket_sub_path}'
2801
+ if self._bucket_sub_path else self.container_name)
2523
2802
  sync_command = (f'az storage blob sync '
2524
2803
  f'--account-name {self.storage_account_name} '
2525
2804
  f'--account-key {self.storage_account_key} '
2526
2805
  f'{includes} '
2527
2806
  '--delete-destination false '
2528
2807
  f'--source {base_dir_path} '
2529
- f'--container {self.container_name}')
2808
+ f'--container {container_path}')
2530
2809
  return sync_command
2531
2810
 
2532
2811
  def get_dir_sync_command(src_dir_path, dest_dir_name) -> str:
@@ -2537,8 +2816,11 @@ class AzureBlobStore(AbstractStore):
2537
2816
  [file_name.rstrip('*') for file_name in excluded_list])
2538
2817
  excludes = f'--exclude-path "{excludes_list}"'
2539
2818
  src_dir_path = shlex.quote(src_dir_path)
2540
- container_path = (f'{self.container_name}/{dest_dir_name}'
2541
- if dest_dir_name else self.container_name)
2819
+ container_path = (f'{self.container_name}/{self._bucket_sub_path}'
2820
+ if self._bucket_sub_path else
2821
+ f'{self.container_name}')
2822
+ if dest_dir_name:
2823
+ container_path = f'{container_path}/{dest_dir_name}'
2542
2824
  sync_command = (f'az storage blob sync '
2543
2825
  f'--account-name {self.storage_account_name} '
2544
2826
  f'--account-key {self.storage_account_key} '
@@ -2661,6 +2943,7 @@ class AzureBlobStore(AbstractStore):
2661
2943
  f'{self.storage_account_name!r}.'
2662
2944
  'Details: '
2663
2945
  f'{common_utils.format_exception(e, use_bracket=True)}')
2946
+
2664
2947
  # If the container cannot be found in both private and public settings,
2665
2948
  # the container is to be created by Sky. However, creation is skipped
2666
2949
  # if Store object is being reconstructed for deletion or re-mount with
@@ -2691,7 +2974,8 @@ class AzureBlobStore(AbstractStore):
2691
2974
  mount_cmd = mounting_utils.get_az_mount_cmd(self.container_name,
2692
2975
  self.storage_account_name,
2693
2976
  mount_path,
2694
- self.storage_account_key)
2977
+ self.storage_account_key,
2978
+ self._bucket_sub_path)
2695
2979
  return mounting_utils.get_mounting_command(mount_path, install_cmd,
2696
2980
  mount_cmd)
2697
2981
 
@@ -2790,11 +3074,12 @@ class R2Store(AbstractStore):
2790
3074
  source: str,
2791
3075
  region: Optional[str] = 'auto',
2792
3076
  is_sky_managed: Optional[bool] = None,
2793
- sync_on_reconstruction: Optional[bool] = True):
3077
+ sync_on_reconstruction: Optional[bool] = True,
3078
+ _bucket_sub_path: Optional[str] = None):
2794
3079
  self.client: 'boto3.client.Client'
2795
3080
  self.bucket: 'StorageHandle'
2796
3081
  super().__init__(name, source, region, is_sky_managed,
2797
- sync_on_reconstruction)
3082
+ sync_on_reconstruction, _bucket_sub_path)
2798
3083
 
2799
3084
  def _validate(self):
2800
3085
  if self.source is not None and isinstance(self.source, str):
@@ -2833,6 +3118,10 @@ class R2Store(AbstractStore):
2833
3118
  assert data_utils.verify_ibm_cos_bucket(self.name), (
2834
3119
  f'Source specified as {self.source}, a COS bucket. ',
2835
3120
  'COS Bucket should exist.')
3121
+ elif self.source.startswith('oci://'):
3122
+ raise NotImplementedError(
3123
+ 'Moving data from OCI to R2 is currently not supported.')
3124
+
2836
3125
  # Validate name
2837
3126
  self.name = S3Store.validate_name(self.name)
2838
3127
  # Check if the storage is enabled
@@ -2884,6 +3173,8 @@ class R2Store(AbstractStore):
2884
3173
  self._transfer_to_r2()
2885
3174
  elif self.source.startswith('r2://'):
2886
3175
  pass
3176
+ elif self.source.startswith('oci://'):
3177
+ self._transfer_to_r2()
2887
3178
  else:
2888
3179
  self.batch_aws_rsync([self.source])
2889
3180
  except exceptions.StorageUploadError:
@@ -2893,6 +3184,9 @@ class R2Store(AbstractStore):
2893
3184
  f'Upload failed for store {self.name}') from e
2894
3185
 
2895
3186
  def delete(self) -> None:
3187
+ if self._bucket_sub_path is not None and not self.is_sky_managed:
3188
+ return self._delete_sub_path()
3189
+
2896
3190
  deleted_by_skypilot = self._delete_r2_bucket(self.name)
2897
3191
  if deleted_by_skypilot:
2898
3192
  msg_str = f'Deleted R2 bucket {self.name}.'
@@ -2902,6 +3196,19 @@ class R2Store(AbstractStore):
2902
3196
  logger.info(f'{colorama.Fore.GREEN}{msg_str}'
2903
3197
  f'{colorama.Style.RESET_ALL}')
2904
3198
 
3199
+ def _delete_sub_path(self) -> None:
3200
+ assert self._bucket_sub_path is not None, 'bucket_sub_path is not set'
3201
+ deleted_by_skypilot = self._delete_r2_bucket_sub_path(
3202
+ self.name, self._bucket_sub_path)
3203
+ if deleted_by_skypilot:
3204
+ msg_str = f'Removed objects from R2 bucket ' \
3205
+ f'{self.name}/{self._bucket_sub_path}.'
3206
+ else:
3207
+ msg_str = f'Failed to remove objects from R2 bucket ' \
3208
+ f'{self.name}/{self._bucket_sub_path}.'
3209
+ logger.info(f'{colorama.Fore.GREEN}{msg_str}'
3210
+ f'{colorama.Style.RESET_ALL}')
3211
+
2905
3212
  def get_handle(self) -> StorageHandle:
2906
3213
  return cloudflare.resource('s3').Bucket(self.name)
2907
3214
 
@@ -2933,11 +3240,13 @@ class R2Store(AbstractStore):
2933
3240
  ])
2934
3241
  endpoint_url = cloudflare.create_endpoint()
2935
3242
  base_dir_path = shlex.quote(base_dir_path)
3243
+ sub_path = (f'/{self._bucket_sub_path}'
3244
+ if self._bucket_sub_path else '')
2936
3245
  sync_command = ('AWS_SHARED_CREDENTIALS_FILE='
2937
3246
  f'{cloudflare.R2_CREDENTIALS_PATH} '
2938
3247
  'aws s3 sync --no-follow-symlinks --exclude="*" '
2939
3248
  f'{includes} {base_dir_path} '
2940
- f's3://{self.name} '
3249
+ f's3://{self.name}{sub_path} '
2941
3250
  f'--endpoint {endpoint_url} '
2942
3251
  f'--profile={cloudflare.R2_PROFILE_NAME}')
2943
3252
  return sync_command
@@ -2952,11 +3261,13 @@ class R2Store(AbstractStore):
2952
3261
  ])
2953
3262
  endpoint_url = cloudflare.create_endpoint()
2954
3263
  src_dir_path = shlex.quote(src_dir_path)
3264
+ sub_path = (f'/{self._bucket_sub_path}'
3265
+ if self._bucket_sub_path else '')
2955
3266
  sync_command = ('AWS_SHARED_CREDENTIALS_FILE='
2956
3267
  f'{cloudflare.R2_CREDENTIALS_PATH} '
2957
3268
  f'aws s3 sync --no-follow-symlinks {excludes} '
2958
3269
  f'{src_dir_path} '
2959
- f's3://{self.name}/{dest_dir_name} '
3270
+ f's3://{self.name}{sub_path}/{dest_dir_name} '
2960
3271
  f'--endpoint {endpoint_url} '
2961
3272
  f'--profile={cloudflare.R2_PROFILE_NAME}')
2962
3273
  return sync_command
@@ -3087,11 +3398,9 @@ class R2Store(AbstractStore):
3087
3398
  endpoint_url = cloudflare.create_endpoint()
3088
3399
  r2_credential_path = cloudflare.R2_CREDENTIALS_PATH
3089
3400
  r2_profile_name = cloudflare.R2_PROFILE_NAME
3090
- mount_cmd = mounting_utils.get_r2_mount_cmd(r2_credential_path,
3091
- r2_profile_name,
3092
- endpoint_url,
3093
- self.bucket.name,
3094
- mount_path)
3401
+ mount_cmd = mounting_utils.get_r2_mount_cmd(
3402
+ r2_credential_path, r2_profile_name, endpoint_url, self.bucket.name,
3403
+ mount_path, self._bucket_sub_path)
3095
3404
  return mounting_utils.get_mounting_command(mount_path, install_cmd,
3096
3405
  mount_cmd)
3097
3406
 
@@ -3124,6 +3433,43 @@ class R2Store(AbstractStore):
3124
3433
  f'{self.name} but failed.') from e
3125
3434
  return cloudflare.resource('s3').Bucket(bucket_name)
3126
3435
 
3436
+ def _execute_r2_remove_command(self, command: str, bucket_name: str,
3437
+ hint_operating: str,
3438
+ hint_failed: str) -> bool:
3439
+ try:
3440
+ with rich_utils.safe_status(
3441
+ ux_utils.spinner_message(hint_operating)):
3442
+ subprocess.check_output(command.split(' '),
3443
+ stderr=subprocess.STDOUT,
3444
+ shell=True)
3445
+ except subprocess.CalledProcessError as e:
3446
+ if 'NoSuchBucket' in e.output.decode('utf-8'):
3447
+ logger.debug(
3448
+ _BUCKET_EXTERNALLY_DELETED_DEBUG_MESSAGE.format(
3449
+ bucket_name=bucket_name))
3450
+ return False
3451
+ else:
3452
+ with ux_utils.print_exception_no_traceback():
3453
+ raise exceptions.StorageBucketDeleteError(
3454
+ f'{hint_failed}'
3455
+ f'Detailed error: {e.output}')
3456
+ return True
3457
+
3458
+ def _delete_r2_bucket_sub_path(self, bucket_name: str,
3459
+ sub_path: str) -> bool:
3460
+ """Deletes the sub path from the bucket."""
3461
+ endpoint_url = cloudflare.create_endpoint()
3462
+ remove_command = (
3463
+ f'AWS_SHARED_CREDENTIALS_FILE={cloudflare.R2_CREDENTIALS_PATH} '
3464
+ f'aws s3 rm s3://{bucket_name}/{sub_path}/ --recursive '
3465
+ f'--endpoint {endpoint_url} '
3466
+ f'--profile={cloudflare.R2_PROFILE_NAME}')
3467
+ return self._execute_r2_remove_command(
3468
+ remove_command, bucket_name,
3469
+ f'Removing objects from R2 bucket {bucket_name}/{sub_path}',
3470
+ f'Failed to remove objects from R2 bucket {bucket_name}/{sub_path}.'
3471
+ )
3472
+
3127
3473
  def _delete_r2_bucket(self, bucket_name: str) -> bool:
3128
3474
  """Deletes R2 bucket, including all objects in bucket
3129
3475
 
@@ -3146,24 +3492,12 @@ class R2Store(AbstractStore):
3146
3492
  f'aws s3 rb s3://{bucket_name} --force '
3147
3493
  f'--endpoint {endpoint_url} '
3148
3494
  f'--profile={cloudflare.R2_PROFILE_NAME}')
3149
- try:
3150
- with rich_utils.safe_status(
3151
- ux_utils.spinner_message(
3152
- f'Deleting R2 bucket {bucket_name}')):
3153
- subprocess.check_output(remove_command,
3154
- stderr=subprocess.STDOUT,
3155
- shell=True)
3156
- except subprocess.CalledProcessError as e:
3157
- if 'NoSuchBucket' in e.output.decode('utf-8'):
3158
- logger.debug(
3159
- _BUCKET_EXTERNALLY_DELETED_DEBUG_MESSAGE.format(
3160
- bucket_name=bucket_name))
3161
- return False
3162
- else:
3163
- with ux_utils.print_exception_no_traceback():
3164
- raise exceptions.StorageBucketDeleteError(
3165
- f'Failed to delete R2 bucket {bucket_name}.'
3166
- f'Detailed error: {e.output}')
3495
+
3496
+ success = self._execute_r2_remove_command(
3497
+ remove_command, bucket_name, f'Deleting R2 bucket {bucket_name}',
3498
+ f'Failed to delete R2 bucket {bucket_name}.')
3499
+ if not success:
3500
+ return False
3167
3501
 
3168
3502
  # Wait until bucket deletion propagates on AWS servers
3169
3503
  while data_utils.verify_r2_bucket(bucket_name):
@@ -3182,11 +3516,12 @@ class IBMCosStore(AbstractStore):
3182
3516
  source: str,
3183
3517
  region: Optional[str] = 'us-east',
3184
3518
  is_sky_managed: Optional[bool] = None,
3185
- sync_on_reconstruction: bool = True):
3519
+ sync_on_reconstruction: bool = True,
3520
+ _bucket_sub_path: Optional[str] = None):
3186
3521
  self.client: 'storage.Client'
3187
3522
  self.bucket: 'StorageHandle'
3188
3523
  super().__init__(name, source, region, is_sky_managed,
3189
- sync_on_reconstruction)
3524
+ sync_on_reconstruction, _bucket_sub_path)
3190
3525
  self.bucket_rclone_profile = \
3191
3526
  Rclone.generate_rclone_bucket_profile_name(
3192
3527
  self.name, Rclone.RcloneClouds.IBM)
@@ -3331,10 +3666,22 @@ class IBMCosStore(AbstractStore):
3331
3666
  f'Upload failed for store {self.name}') from e
3332
3667
 
3333
3668
  def delete(self) -> None:
3669
+ if self._bucket_sub_path is not None and not self.is_sky_managed:
3670
+ return self._delete_sub_path()
3671
+
3334
3672
  self._delete_cos_bucket()
3335
3673
  logger.info(f'{colorama.Fore.GREEN}Deleted COS bucket {self.name}.'
3336
3674
  f'{colorama.Style.RESET_ALL}')
3337
3675
 
3676
+ def _delete_sub_path(self) -> None:
3677
+ assert self._bucket_sub_path is not None, 'bucket_sub_path is not set'
3678
+ bucket = self.s3_resource.Bucket(self.name)
3679
+ try:
3680
+ self._delete_cos_bucket_objects(bucket, self._bucket_sub_path + '/')
3681
+ except ibm.ibm_botocore.exceptions.ClientError as e:
3682
+ if e.__class__.__name__ == 'NoSuchBucket':
3683
+ logger.debug('bucket already removed')
3684
+
3338
3685
  def get_handle(self) -> StorageHandle:
3339
3686
  return self.s3_resource.Bucket(self.name)
3340
3687
 
@@ -3375,10 +3722,13 @@ class IBMCosStore(AbstractStore):
3375
3722
  # .git directory is excluded from the sync
3376
3723
  # wrapping src_dir_path with "" to support path with spaces
3377
3724
  src_dir_path = shlex.quote(src_dir_path)
3725
+ sub_path = (f'/{self._bucket_sub_path}'
3726
+ if self._bucket_sub_path else '')
3378
3727
  sync_command = (
3379
3728
  'rclone copy --exclude ".git/*" '
3380
3729
  f'{src_dir_path} '
3381
- f'{self.bucket_rclone_profile}:{self.name}/{dest_dir_name}')
3730
+ f'{self.bucket_rclone_profile}:{self.name}{sub_path}'
3731
+ f'/{dest_dir_name}')
3382
3732
  return sync_command
3383
3733
 
3384
3734
  def get_file_sync_command(base_dir_path, file_names) -> str:
@@ -3404,9 +3754,12 @@ class IBMCosStore(AbstractStore):
3404
3754
  for file_name in file_names
3405
3755
  ])
3406
3756
  base_dir_path = shlex.quote(base_dir_path)
3407
- sync_command = ('rclone copy '
3408
- f'{includes} {base_dir_path} '
3409
- f'{self.bucket_rclone_profile}:{self.name}')
3757
+ sub_path = (f'/{self._bucket_sub_path}'
3758
+ if self._bucket_sub_path else '')
3759
+ sync_command = (
3760
+ 'rclone copy '
3761
+ f'{includes} {base_dir_path} '
3762
+ f'{self.bucket_rclone_profile}:{self.name}{sub_path}')
3410
3763
  return sync_command
3411
3764
 
3412
3765
  # Generate message for upload
@@ -3491,6 +3844,7 @@ class IBMCosStore(AbstractStore):
3491
3844
  Rclone.RcloneClouds.IBM,
3492
3845
  self.region, # type: ignore
3493
3846
  )
3847
+
3494
3848
  if not bucket_region and self.sync_on_reconstruction:
3495
3849
  # bucket doesn't exist
3496
3850
  return self._create_cos_bucket(self.name, self.region), True
@@ -3537,7 +3891,8 @@ class IBMCosStore(AbstractStore):
3537
3891
  Rclone.RCLONE_CONFIG_PATH,
3538
3892
  self.bucket_rclone_profile,
3539
3893
  self.bucket.name,
3540
- mount_path)
3894
+ mount_path,
3895
+ self._bucket_sub_path)
3541
3896
  return mounting_utils.get_mounting_command(mount_path, install_cmd,
3542
3897
  mount_cmd)
3543
3898
 
@@ -3575,18 +3930,442 @@ class IBMCosStore(AbstractStore):
3575
3930
 
3576
3931
  return self.bucket
3577
3932
 
3578
- def _delete_cos_bucket(self):
3579
- bucket = self.s3_resource.Bucket(self.name)
3580
- try:
3581
- bucket_versioning = self.s3_resource.BucketVersioning(self.name)
3582
- if bucket_versioning.status == 'Enabled':
3933
+ def _delete_cos_bucket_objects(self,
3934
+ bucket: Any,
3935
+ prefix: Optional[str] = None):
3936
+ bucket_versioning = self.s3_resource.BucketVersioning(bucket.name)
3937
+ if bucket_versioning.status == 'Enabled':
3938
+ if prefix is not None:
3939
+ res = list(
3940
+ bucket.object_versions.filter(Prefix=prefix).delete())
3941
+ else:
3583
3942
  res = list(bucket.object_versions.delete())
3943
+ else:
3944
+ if prefix is not None:
3945
+ res = list(bucket.objects.filter(Prefix=prefix).delete())
3584
3946
  else:
3585
3947
  res = list(bucket.objects.delete())
3586
- logger.debug(f'Deleted bucket\'s content:\n{res}')
3948
+ logger.debug(f'Deleted bucket\'s content:\n{res}, prefix: {prefix}')
3949
+
3950
+ def _delete_cos_bucket(self):
3951
+ bucket = self.s3_resource.Bucket(self.name)
3952
+ try:
3953
+ self._delete_cos_bucket_objects(bucket)
3587
3954
  bucket.delete()
3588
3955
  bucket.wait_until_not_exists()
3589
3956
  except ibm.ibm_botocore.exceptions.ClientError as e:
3590
3957
  if e.__class__.__name__ == 'NoSuchBucket':
3591
3958
  logger.debug('bucket already removed')
3592
3959
  Rclone.delete_rclone_bucket_profile(self.name, Rclone.RcloneClouds.IBM)
3960
+
3961
+
3962
+ class OciStore(AbstractStore):
3963
+ """OciStore inherits from Storage Object and represents the backend
3964
+ for OCI buckets.
3965
+ """
3966
+
3967
+ _ACCESS_DENIED_MESSAGE = 'AccessDeniedException'
3968
+
3969
+ def __init__(self,
3970
+ name: str,
3971
+ source: str,
3972
+ region: Optional[str] = None,
3973
+ is_sky_managed: Optional[bool] = None,
3974
+ sync_on_reconstruction: Optional[bool] = True,
3975
+ _bucket_sub_path: Optional[str] = None):
3976
+ self.client: Any
3977
+ self.bucket: StorageHandle
3978
+ self.oci_config_file: str
3979
+ self.config_profile: str
3980
+ self.compartment: str
3981
+ self.namespace: str
3982
+
3983
+ # Bucket region should be consistence with the OCI config file
3984
+ region = oci.get_oci_config()['region']
3985
+
3986
+ super().__init__(name, source, region, is_sky_managed,
3987
+ sync_on_reconstruction, _bucket_sub_path)
3988
+ # TODO(zpoint): add _bucket_sub_path to the sync/mount/delete commands
3989
+
3990
+ def _validate(self):
3991
+ if self.source is not None and isinstance(self.source, str):
3992
+ if self.source.startswith('oci://'):
3993
+ assert self.name == data_utils.split_oci_path(self.source)[0], (
3994
+ 'OCI Bucket is specified as path, the name should be '
3995
+ 'the same as OCI bucket.')
3996
+ elif not re.search(r'^\w+://', self.source):
3997
+ # Treat it as local path.
3998
+ pass
3999
+ else:
4000
+ raise NotImplementedError(
4001
+ f'Moving data from {self.source} to OCI is not supported.')
4002
+
4003
+ # Validate name
4004
+ self.name = self.validate_name(self.name)
4005
+ # Check if the storage is enabled
4006
+ if not _is_storage_cloud_enabled(str(clouds.OCI())):
4007
+ with ux_utils.print_exception_no_traceback():
4008
+ raise exceptions.ResourcesUnavailableError(
4009
+ 'Storage \'store: oci\' specified, but ' \
4010
+ 'OCI access is disabled. To fix, enable '\
4011
+ 'OCI by running `sky check`. '\
4012
+ 'More info: https://skypilot.readthedocs.io/en/latest/getting-started/installation.html.' # pylint: disable=line-too-long
4013
+ )
4014
+
4015
+ @classmethod
4016
+ def validate_name(cls, name) -> str:
4017
+ """Validates the name of the OCI store.
4018
+
4019
+ Source for rules: https://docs.oracle.com/en-us/iaas/Content/Object/Tasks/managingbuckets.htm#Managing_Buckets # pylint: disable=line-too-long
4020
+ """
4021
+
4022
+ def _raise_no_traceback_name_error(err_str):
4023
+ with ux_utils.print_exception_no_traceback():
4024
+ raise exceptions.StorageNameError(err_str)
4025
+
4026
+ if name is not None and isinstance(name, str):
4027
+ # Check for overall length
4028
+ if not 1 <= len(name) <= 256:
4029
+ _raise_no_traceback_name_error(
4030
+ f'Invalid store name: name {name} must contain 1-256 '
4031
+ 'characters.')
4032
+
4033
+ # Check for valid characters and start/end with a number or letter
4034
+ pattern = r'^[A-Za-z0-9-._]+$'
4035
+ if not re.match(pattern, name):
4036
+ _raise_no_traceback_name_error(
4037
+ f'Invalid store name: name {name} can only contain '
4038
+ 'upper or lower case letters, numeric characters, hyphens '
4039
+ '(-), underscores (_), and dots (.). Spaces are not '
4040
+ 'allowed. Names must start and end with a number or '
4041
+ 'letter.')
4042
+ else:
4043
+ _raise_no_traceback_name_error('Store name must be specified.')
4044
+ return name
4045
+
4046
+ def initialize(self):
4047
+ """Initializes the OCI store object on the cloud.
4048
+
4049
+ Initialization involves fetching bucket if exists, or creating it if
4050
+ it does not.
4051
+
4052
+ Raises:
4053
+ StorageBucketCreateError: If bucket creation fails
4054
+ StorageBucketGetError: If fetching existing bucket fails
4055
+ StorageInitError: If general initialization fails.
4056
+ """
4057
+ # pylint: disable=import-outside-toplevel
4058
+ from sky.clouds.utils import oci_utils
4059
+ from sky.provision.oci.query_utils import query_helper
4060
+
4061
+ self.oci_config_file = oci.get_config_file()
4062
+ self.config_profile = oci_utils.oci_config.get_profile()
4063
+
4064
+ ## pylint: disable=line-too-long
4065
+ # What's compartment? See thttps://docs.oracle.com/en/cloud/foundation/cloud_architecture/governance/compartments.html
4066
+ self.compartment = query_helper.find_compartment(self.region)
4067
+ self.client = oci.get_object_storage_client(region=self.region,
4068
+ profile=self.config_profile)
4069
+ self.namespace = self.client.get_namespace(
4070
+ compartment_id=oci.get_oci_config()['tenancy']).data
4071
+
4072
+ self.bucket, is_new_bucket = self._get_bucket()
4073
+ if self.is_sky_managed is None:
4074
+ # If is_sky_managed is not specified, then this is a new storage
4075
+ # object (i.e., did not exist in global_user_state) and we should
4076
+ # set the is_sky_managed property.
4077
+ # If is_sky_managed is specified, then we take no action.
4078
+ self.is_sky_managed = is_new_bucket
4079
+
4080
+ def upload(self):
4081
+ """Uploads source to store bucket.
4082
+
4083
+ Upload must be called by the Storage handler - it is not called on
4084
+ Store initialization.
4085
+
4086
+ Raises:
4087
+ StorageUploadError: if upload fails.
4088
+ """
4089
+ try:
4090
+ if isinstance(self.source, list):
4091
+ self.batch_oci_rsync(self.source, create_dirs=True)
4092
+ elif self.source is not None:
4093
+ if self.source.startswith('oci://'):
4094
+ pass
4095
+ else:
4096
+ self.batch_oci_rsync([self.source])
4097
+ except exceptions.StorageUploadError:
4098
+ raise
4099
+ except Exception as e:
4100
+ raise exceptions.StorageUploadError(
4101
+ f'Upload failed for store {self.name}') from e
4102
+
4103
+ def delete(self) -> None:
4104
+ deleted_by_skypilot = self._delete_oci_bucket(self.name)
4105
+ if deleted_by_skypilot:
4106
+ msg_str = f'Deleted OCI bucket {self.name}.'
4107
+ else:
4108
+ msg_str = (f'OCI bucket {self.name} may have been deleted '
4109
+ f'externally. Removing from local state.')
4110
+ logger.info(f'{colorama.Fore.GREEN}{msg_str}'
4111
+ f'{colorama.Style.RESET_ALL}')
4112
+
4113
+ def get_handle(self) -> StorageHandle:
4114
+ return self.client.get_bucket(namespace_name=self.namespace,
4115
+ bucket_name=self.name).data
4116
+
4117
+ def batch_oci_rsync(self,
4118
+ source_path_list: List[Path],
4119
+ create_dirs: bool = False) -> None:
4120
+ """Invokes oci sync to batch upload a list of local paths to Bucket
4121
+
4122
+ Use OCI bulk operation to batch process the file upload
4123
+
4124
+ Args:
4125
+ source_path_list: List of paths to local files or directories
4126
+ create_dirs: If the local_path is a directory and this is set to
4127
+ False, the contents of the directory are directly uploaded to
4128
+ root of the bucket. If the local_path is a directory and this is
4129
+ set to True, the directory is created in the bucket root and
4130
+ contents are uploaded to it.
4131
+ """
4132
+
4133
+ @oci.with_oci_env
4134
+ def get_file_sync_command(base_dir_path, file_names):
4135
+ includes = ' '.join(
4136
+ [f'--include "{file_name}"' for file_name in file_names])
4137
+ sync_command = (
4138
+ 'oci os object bulk-upload --no-follow-symlinks --overwrite '
4139
+ f'--bucket-name {self.name} --namespace-name {self.namespace} '
4140
+ f'--src-dir "{base_dir_path}" {includes}')
4141
+
4142
+ return sync_command
4143
+
4144
+ @oci.with_oci_env
4145
+ def get_dir_sync_command(src_dir_path, dest_dir_name):
4146
+ if dest_dir_name and not str(dest_dir_name).endswith('/'):
4147
+ dest_dir_name = f'{dest_dir_name}/'
4148
+
4149
+ excluded_list = storage_utils.get_excluded_files(src_dir_path)
4150
+ excluded_list.append('.git/*')
4151
+ excludes = ' '.join([
4152
+ f'--exclude {shlex.quote(file_name)}'
4153
+ for file_name in excluded_list
4154
+ ])
4155
+
4156
+ # we exclude .git directory from the sync
4157
+ sync_command = (
4158
+ 'oci os object bulk-upload --no-follow-symlinks --overwrite '
4159
+ f'--bucket-name {self.name} --namespace-name {self.namespace} '
4160
+ f'--object-prefix "{dest_dir_name}" --src-dir "{src_dir_path}" '
4161
+ f'{excludes} ')
4162
+
4163
+ return sync_command
4164
+
4165
+ # Generate message for upload
4166
+ if len(source_path_list) > 1:
4167
+ source_message = f'{len(source_path_list)} paths'
4168
+ else:
4169
+ source_message = source_path_list[0]
4170
+
4171
+ log_path = sky_logging.generate_tmp_logging_file_path(
4172
+ _STORAGE_LOG_FILE_NAME)
4173
+ sync_path = f'{source_message} -> oci://{self.name}/'
4174
+ with rich_utils.safe_status(
4175
+ ux_utils.spinner_message(f'Syncing {sync_path}',
4176
+ log_path=log_path)):
4177
+ data_utils.parallel_upload(
4178
+ source_path_list=source_path_list,
4179
+ filesync_command_generator=get_file_sync_command,
4180
+ dirsync_command_generator=get_dir_sync_command,
4181
+ log_path=log_path,
4182
+ bucket_name=self.name,
4183
+ access_denied_message=self._ACCESS_DENIED_MESSAGE,
4184
+ create_dirs=create_dirs,
4185
+ max_concurrent_uploads=1)
4186
+
4187
+ logger.info(
4188
+ ux_utils.finishing_message(f'Storage synced: {sync_path}',
4189
+ log_path))
4190
+
4191
+ def _get_bucket(self) -> Tuple[StorageHandle, bool]:
4192
+ """Obtains the OCI bucket.
4193
+ If the bucket exists, this method will connect to the bucket.
4194
+
4195
+ If the bucket does not exist, there are three cases:
4196
+ 1) Raise an error if the bucket source starts with oci://
4197
+ 2) Return None if bucket has been externally deleted and
4198
+ sync_on_reconstruction is False
4199
+ 3) Create and return a new bucket otherwise
4200
+
4201
+ Return tuple (Bucket, Boolean): The first item is the bucket
4202
+ json payload from the OCI API call, the second item indicates
4203
+ if this is a new created bucket(True) or an existing bucket(False).
4204
+
4205
+ Raises:
4206
+ StorageBucketCreateError: If creating the bucket fails
4207
+ StorageBucketGetError: If fetching a bucket fails
4208
+ """
4209
+ try:
4210
+ get_bucket_response = self.client.get_bucket(
4211
+ namespace_name=self.namespace, bucket_name=self.name)
4212
+ bucket = get_bucket_response.data
4213
+ return bucket, False
4214
+ except oci.service_exception() as e:
4215
+ if e.status == 404: # Not Found
4216
+ if isinstance(self.source,
4217
+ str) and self.source.startswith('oci://'):
4218
+ with ux_utils.print_exception_no_traceback():
4219
+ raise exceptions.StorageBucketGetError(
4220
+ 'Attempted to connect to a non-existent bucket: '
4221
+ f'{self.source}') from e
4222
+ else:
4223
+ # If bucket cannot be found (i.e., does not exist), it is
4224
+ # to be created by Sky. However, creation is skipped if
4225
+ # Store object is being reconstructed for deletion.
4226
+ if self.sync_on_reconstruction:
4227
+ bucket = self._create_oci_bucket(self.name)
4228
+ return bucket, True
4229
+ else:
4230
+ return None, False
4231
+ elif e.status == 401: # Unauthorized
4232
+ # AccessDenied error for buckets that are private and not
4233
+ # owned by user.
4234
+ command = (
4235
+ f'oci os object list --namespace-name {self.namespace} '
4236
+ f'--bucket-name {self.name}')
4237
+ with ux_utils.print_exception_no_traceback():
4238
+ raise exceptions.StorageBucketGetError(
4239
+ _BUCKET_FAIL_TO_CONNECT_MESSAGE.format(name=self.name) +
4240
+ f' To debug, consider running `{command}`.') from e
4241
+ else:
4242
+ # Unknown / unexpected error happened. This might happen when
4243
+ # Object storage service itself functions not normal (e.g.
4244
+ # maintainance event causes internal server error or request
4245
+ # timeout, etc).
4246
+ with ux_utils.print_exception_no_traceback():
4247
+ raise exceptions.StorageBucketGetError(
4248
+ f'Failed to connect to OCI bucket {self.name}') from e
4249
+
4250
+ def mount_command(self, mount_path: str) -> str:
4251
+ """Returns the command to mount the bucket to the mount_path.
4252
+
4253
+ Uses Rclone to mount the bucket.
4254
+
4255
+ Args:
4256
+ mount_path: str; Path to mount the bucket to.
4257
+ """
4258
+ install_cmd = mounting_utils.get_rclone_install_cmd()
4259
+ mount_cmd = mounting_utils.get_oci_mount_cmd(
4260
+ mount_path=mount_path,
4261
+ store_name=self.name,
4262
+ region=str(self.region),
4263
+ namespace=self.namespace,
4264
+ compartment=self.bucket.compartment_id,
4265
+ config_file=self.oci_config_file,
4266
+ config_profile=self.config_profile)
4267
+ version_check_cmd = mounting_utils.get_rclone_version_check_cmd()
4268
+
4269
+ return mounting_utils.get_mounting_command(mount_path, install_cmd,
4270
+ mount_cmd, version_check_cmd)
4271
+
4272
+ def _download_file(self, remote_path: str, local_path: str) -> None:
4273
+ """Downloads file from remote to local on OCI bucket
4274
+
4275
+ Args:
4276
+ remote_path: str; Remote path on OCI bucket
4277
+ local_path: str; Local path on user's device
4278
+ """
4279
+ if remote_path.startswith(f'/{self.name}'):
4280
+ # If the remote path is /bucket_name, we need to
4281
+ # remove the leading /
4282
+ remote_path = remote_path.lstrip('/')
4283
+
4284
+ filename = os.path.basename(remote_path)
4285
+ if not local_path.endswith(filename):
4286
+ local_path = os.path.join(local_path, filename)
4287
+
4288
+ @oci.with_oci_env
4289
+ def get_file_download_command(remote_path, local_path):
4290
+ download_command = (f'oci os object get --bucket-name {self.name} '
4291
+ f'--namespace-name {self.namespace} '
4292
+ f'--name {remote_path} --file {local_path}')
4293
+
4294
+ return download_command
4295
+
4296
+ download_command = get_file_download_command(remote_path, local_path)
4297
+
4298
+ try:
4299
+ with rich_utils.safe_status(
4300
+ f'[bold cyan]Downloading: {remote_path} -> {local_path}[/]'
4301
+ ):
4302
+ subprocess.check_output(download_command,
4303
+ stderr=subprocess.STDOUT,
4304
+ shell=True)
4305
+ except subprocess.CalledProcessError as e:
4306
+ logger.error(f'Download failed: {remote_path} -> {local_path}.\n'
4307
+ f'Detail errors: {e.output}')
4308
+ with ux_utils.print_exception_no_traceback():
4309
+ raise exceptions.StorageBucketDeleteError(
4310
+ f'Failed download file {self.name}:{remote_path}.') from e
4311
+
4312
+ def _create_oci_bucket(self, bucket_name: str) -> StorageHandle:
4313
+ """Creates OCI bucket with specific name in specific region
4314
+
4315
+ Args:
4316
+ bucket_name: str; Name of bucket
4317
+ region: str; Region name, e.g. us-central1, us-west1
4318
+ """
4319
+ logger.debug(f'_create_oci_bucket: {bucket_name}')
4320
+ try:
4321
+ create_bucket_response = self.client.create_bucket(
4322
+ namespace_name=self.namespace,
4323
+ create_bucket_details=oci.oci.object_storage.models.
4324
+ CreateBucketDetails(
4325
+ name=bucket_name,
4326
+ compartment_id=self.compartment,
4327
+ ))
4328
+ bucket = create_bucket_response.data
4329
+ return bucket
4330
+ except oci.service_exception() as e:
4331
+ with ux_utils.print_exception_no_traceback():
4332
+ raise exceptions.StorageBucketCreateError(
4333
+ f'Failed to create OCI bucket: {self.name}') from e
4334
+
4335
+ def _delete_oci_bucket(self, bucket_name: str) -> bool:
4336
+ """Deletes OCI bucket, including all objects in bucket
4337
+
4338
+ Args:
4339
+ bucket_name: str; Name of bucket
4340
+
4341
+ Returns:
4342
+ bool; True if bucket was deleted, False if it was deleted externally.
4343
+ """
4344
+ logger.debug(f'_delete_oci_bucket: {bucket_name}')
4345
+
4346
+ @oci.with_oci_env
4347
+ def get_bucket_delete_command(bucket_name):
4348
+ remove_command = (f'oci os bucket delete --bucket-name '
4349
+ f'{bucket_name} --empty --force')
4350
+
4351
+ return remove_command
4352
+
4353
+ remove_command = get_bucket_delete_command(bucket_name)
4354
+
4355
+ try:
4356
+ with rich_utils.safe_status(
4357
+ f'[bold cyan]Deleting OCI bucket {bucket_name}[/]'):
4358
+ subprocess.check_output(remove_command.split(' '),
4359
+ stderr=subprocess.STDOUT)
4360
+ except subprocess.CalledProcessError as e:
4361
+ if 'BucketNotFound' in e.output.decode('utf-8'):
4362
+ logger.debug(
4363
+ _BUCKET_EXTERNALLY_DELETED_DEBUG_MESSAGE.format(
4364
+ bucket_name=bucket_name))
4365
+ return False
4366
+ else:
4367
+ logger.error(e.output)
4368
+ with ux_utils.print_exception_no_traceback():
4369
+ raise exceptions.StorageBucketDeleteError(
4370
+ f'Failed to delete OCI bucket {bucket_name}.')
4371
+ return True