skypilot-nightly 1.0.0.dev20241227__py3-none-any.whl → 1.0.0.dev20250124__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +2 -2
- sky/adaptors/common.py +15 -9
- sky/adaptors/do.py +20 -0
- sky/adaptors/oci.py +32 -1
- sky/authentication.py +20 -8
- sky/backends/backend_utils.py +44 -0
- sky/backends/cloud_vm_ray_backend.py +202 -41
- sky/backends/wheel_utils.py +4 -1
- sky/check.py +31 -1
- sky/cli.py +39 -43
- sky/cloud_stores.py +71 -2
- sky/clouds/__init__.py +2 -0
- sky/clouds/aws.py +137 -50
- sky/clouds/cloud.py +4 -0
- sky/clouds/do.py +303 -0
- sky/clouds/gcp.py +9 -0
- sky/clouds/kubernetes.py +3 -3
- sky/clouds/oci.py +20 -9
- sky/clouds/service_catalog/__init__.py +7 -3
- sky/clouds/service_catalog/constants.py +1 -1
- sky/clouds/service_catalog/data_fetchers/fetch_gcp.py +10 -51
- sky/clouds/service_catalog/do_catalog.py +111 -0
- sky/clouds/service_catalog/kubernetes_catalog.py +14 -0
- sky/clouds/utils/oci_utils.py +15 -2
- sky/core.py +8 -5
- sky/data/data_transfer.py +37 -0
- sky/data/data_utils.py +19 -4
- sky/data/mounting_utils.py +99 -15
- sky/data/storage.py +961 -130
- sky/global_user_state.py +1 -1
- sky/jobs/__init__.py +2 -0
- sky/jobs/constants.py +8 -7
- sky/jobs/controller.py +19 -22
- sky/jobs/core.py +46 -2
- sky/jobs/recovery_strategy.py +114 -143
- sky/jobs/scheduler.py +283 -0
- sky/jobs/state.py +290 -21
- sky/jobs/utils.py +346 -95
- sky/optimizer.py +6 -3
- sky/provision/aws/config.py +59 -29
- sky/provision/azure/instance.py +1 -1
- sky/provision/do/__init__.py +11 -0
- sky/provision/do/config.py +14 -0
- sky/provision/do/constants.py +10 -0
- sky/provision/do/instance.py +287 -0
- sky/provision/do/utils.py +306 -0
- sky/provision/docker_utils.py +22 -11
- sky/provision/gcp/instance_utils.py +15 -9
- sky/provision/kubernetes/instance.py +3 -2
- sky/provision/kubernetes/utils.py +125 -20
- sky/provision/oci/query_utils.py +17 -14
- sky/provision/provisioner.py +0 -1
- sky/provision/runpod/instance.py +10 -1
- sky/provision/runpod/utils.py +170 -13
- sky/resources.py +1 -1
- sky/serve/autoscalers.py +359 -301
- sky/serve/controller.py +10 -8
- sky/serve/core.py +84 -7
- sky/serve/load_balancer.py +27 -10
- sky/serve/replica_managers.py +1 -3
- sky/serve/serve_state.py +10 -5
- sky/serve/serve_utils.py +28 -1
- sky/serve/service.py +4 -3
- sky/serve/service_spec.py +31 -0
- sky/setup_files/dependencies.py +4 -1
- sky/skylet/constants.py +8 -4
- sky/skylet/events.py +7 -3
- sky/skylet/job_lib.py +10 -30
- sky/skylet/log_lib.py +8 -8
- sky/skylet/log_lib.pyi +3 -0
- sky/skylet/providers/command_runner.py +5 -7
- sky/skylet/skylet.py +1 -1
- sky/task.py +28 -1
- sky/templates/do-ray.yml.j2 +98 -0
- sky/templates/jobs-controller.yaml.j2 +41 -7
- sky/templates/runpod-ray.yml.j2 +13 -0
- sky/templates/sky-serve-controller.yaml.j2 +4 -0
- sky/usage/usage_lib.py +10 -2
- sky/utils/accelerator_registry.py +12 -8
- sky/utils/controller_utils.py +114 -39
- sky/utils/db_utils.py +18 -4
- sky/utils/kubernetes/deploy_remote_cluster.sh +5 -5
- sky/utils/log_utils.py +2 -0
- sky/utils/resources_utils.py +25 -21
- sky/utils/schemas.py +27 -0
- sky/utils/subprocess_utils.py +54 -10
- {skypilot_nightly-1.0.0.dev20241227.dist-info → skypilot_nightly-1.0.0.dev20250124.dist-info}/METADATA +23 -4
- {skypilot_nightly-1.0.0.dev20241227.dist-info → skypilot_nightly-1.0.0.dev20250124.dist-info}/RECORD +92 -82
- {skypilot_nightly-1.0.0.dev20241227.dist-info → skypilot_nightly-1.0.0.dev20250124.dist-info}/WHEEL +1 -1
- {skypilot_nightly-1.0.0.dev20241227.dist-info → skypilot_nightly-1.0.0.dev20250124.dist-info}/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20241227.dist-info → skypilot_nightly-1.0.0.dev20250124.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20241227.dist-info → skypilot_nightly-1.0.0.dev20250124.dist-info}/top_level.txt +0 -0
sky/data/storage.py
CHANGED
@@ -24,6 +24,7 @@ from sky.adaptors import azure
|
|
24
24
|
from sky.adaptors import cloudflare
|
25
25
|
from sky.adaptors import gcp
|
26
26
|
from sky.adaptors import ibm
|
27
|
+
from sky.adaptors import oci
|
27
28
|
from sky.data import data_transfer
|
28
29
|
from sky.data import data_utils
|
29
30
|
from sky.data import mounting_utils
|
@@ -54,7 +55,9 @@ STORE_ENABLED_CLOUDS: List[str] = [
|
|
54
55
|
str(clouds.AWS()),
|
55
56
|
str(clouds.GCP()),
|
56
57
|
str(clouds.Azure()),
|
57
|
-
str(clouds.IBM()),
|
58
|
+
str(clouds.IBM()),
|
59
|
+
str(clouds.OCI()),
|
60
|
+
cloudflare.NAME,
|
58
61
|
]
|
59
62
|
|
60
63
|
# Maximum number of concurrent rsync upload processes
|
@@ -115,6 +118,7 @@ class StoreType(enum.Enum):
|
|
115
118
|
AZURE = 'AZURE'
|
116
119
|
R2 = 'R2'
|
117
120
|
IBM = 'IBM'
|
121
|
+
OCI = 'OCI'
|
118
122
|
|
119
123
|
@classmethod
|
120
124
|
def from_cloud(cls, cloud: str) -> 'StoreType':
|
@@ -128,6 +132,8 @@ class StoreType(enum.Enum):
|
|
128
132
|
return StoreType.R2
|
129
133
|
elif cloud.lower() == str(clouds.Azure()).lower():
|
130
134
|
return StoreType.AZURE
|
135
|
+
elif cloud.lower() == str(clouds.OCI()).lower():
|
136
|
+
return StoreType.OCI
|
131
137
|
elif cloud.lower() == str(clouds.Lambda()).lower():
|
132
138
|
with ux_utils.print_exception_no_traceback():
|
133
139
|
raise ValueError('Lambda Cloud does not provide cloud storage.')
|
@@ -149,6 +155,8 @@ class StoreType(enum.Enum):
|
|
149
155
|
return StoreType.R2
|
150
156
|
elif isinstance(store, IBMCosStore):
|
151
157
|
return StoreType.IBM
|
158
|
+
elif isinstance(store, OciStore):
|
159
|
+
return StoreType.OCI
|
152
160
|
else:
|
153
161
|
with ux_utils.print_exception_no_traceback():
|
154
162
|
raise ValueError(f'Unknown store type: {store}')
|
@@ -165,6 +173,8 @@ class StoreType(enum.Enum):
|
|
165
173
|
return 'r2://'
|
166
174
|
elif self == StoreType.IBM:
|
167
175
|
return 'cos://'
|
176
|
+
elif self == StoreType.OCI:
|
177
|
+
return 'oci://'
|
168
178
|
else:
|
169
179
|
with ux_utils.print_exception_no_traceback():
|
170
180
|
raise ValueError(f'Unknown store type: {self}')
|
@@ -190,6 +200,45 @@ class StoreType(enum.Enum):
|
|
190
200
|
bucket_endpoint_url = f'{store_type.store_prefix()}{path}'
|
191
201
|
return bucket_endpoint_url
|
192
202
|
|
203
|
+
@classmethod
|
204
|
+
def get_fields_from_store_url(
|
205
|
+
cls, store_url: str
|
206
|
+
) -> Tuple['StoreType', Type['AbstractStore'], str, str, Optional[str],
|
207
|
+
Optional[str]]:
|
208
|
+
"""Returns the store type, store class, bucket name, and sub path from
|
209
|
+
a store URL, and the storage account name and region if applicable.
|
210
|
+
|
211
|
+
Args:
|
212
|
+
store_url: str; The store URL.
|
213
|
+
"""
|
214
|
+
# The full path from the user config of IBM COS contains the region,
|
215
|
+
# and Azure Blob Storage contains the storage account name, we need to
|
216
|
+
# pass these information to the store constructor.
|
217
|
+
storage_account_name = None
|
218
|
+
region = None
|
219
|
+
for store_type in StoreType:
|
220
|
+
if store_url.startswith(store_type.store_prefix()):
|
221
|
+
if store_type == StoreType.AZURE:
|
222
|
+
storage_account_name, bucket_name, sub_path = \
|
223
|
+
data_utils.split_az_path(store_url)
|
224
|
+
store_cls: Type['AbstractStore'] = AzureBlobStore
|
225
|
+
elif store_type == StoreType.IBM:
|
226
|
+
bucket_name, sub_path, region = data_utils.split_cos_path(
|
227
|
+
store_url)
|
228
|
+
store_cls = IBMCosStore
|
229
|
+
elif store_type == StoreType.R2:
|
230
|
+
bucket_name, sub_path = data_utils.split_r2_path(store_url)
|
231
|
+
store_cls = R2Store
|
232
|
+
elif store_type == StoreType.GCS:
|
233
|
+
bucket_name, sub_path = data_utils.split_gcs_path(store_url)
|
234
|
+
store_cls = GcsStore
|
235
|
+
elif store_type == StoreType.S3:
|
236
|
+
bucket_name, sub_path = data_utils.split_s3_path(store_url)
|
237
|
+
store_cls = S3Store
|
238
|
+
return store_type, store_cls,bucket_name, \
|
239
|
+
sub_path, storage_account_name, region
|
240
|
+
raise ValueError(f'Unknown store URL: {store_url}')
|
241
|
+
|
193
242
|
|
194
243
|
class StorageMode(enum.Enum):
|
195
244
|
MOUNT = 'MOUNT'
|
@@ -216,25 +265,29 @@ class AbstractStore:
|
|
216
265
|
name: str,
|
217
266
|
source: Optional[SourceType],
|
218
267
|
region: Optional[str] = None,
|
219
|
-
is_sky_managed: Optional[bool] = None
|
268
|
+
is_sky_managed: Optional[bool] = None,
|
269
|
+
_bucket_sub_path: Optional[str] = None):
|
220
270
|
self.name = name
|
221
271
|
self.source = source
|
222
272
|
self.region = region
|
223
273
|
self.is_sky_managed = is_sky_managed
|
274
|
+
self._bucket_sub_path = _bucket_sub_path
|
224
275
|
|
225
276
|
def __repr__(self):
|
226
277
|
return (f'StoreMetadata('
|
227
278
|
f'\n\tname={self.name},'
|
228
279
|
f'\n\tsource={self.source},'
|
229
280
|
f'\n\tregion={self.region},'
|
230
|
-
f'\n\tis_sky_managed={self.is_sky_managed}
|
281
|
+
f'\n\tis_sky_managed={self.is_sky_managed},'
|
282
|
+
f'\n\t_bucket_sub_path={self._bucket_sub_path})')
|
231
283
|
|
232
284
|
def __init__(self,
|
233
285
|
name: str,
|
234
286
|
source: Optional[SourceType],
|
235
287
|
region: Optional[str] = None,
|
236
288
|
is_sky_managed: Optional[bool] = None,
|
237
|
-
sync_on_reconstruction: Optional[bool] = True
|
289
|
+
sync_on_reconstruction: Optional[bool] = True,
|
290
|
+
_bucket_sub_path: Optional[str] = None): # pylint: disable=invalid-name
|
238
291
|
"""Initialize AbstractStore
|
239
292
|
|
240
293
|
Args:
|
@@ -248,7 +301,11 @@ class AbstractStore:
|
|
248
301
|
there. This is set to false when the Storage object is created not
|
249
302
|
for direct use, e.g. for 'sky storage delete', or the storage is
|
250
303
|
being re-used, e.g., for `sky start` on a stopped cluster.
|
251
|
-
|
304
|
+
_bucket_sub_path: str; The prefix of the bucket directory to be
|
305
|
+
created in the store, e.g. if _bucket_sub_path=my-dir, the files
|
306
|
+
will be uploaded to s3://<bucket>/my-dir/.
|
307
|
+
This only works if source is a local directory.
|
308
|
+
# TODO(zpoint): Add support for non-local source.
|
252
309
|
Raises:
|
253
310
|
StorageBucketCreateError: If bucket creation fails
|
254
311
|
StorageBucketGetError: If fetching existing bucket fails
|
@@ -259,10 +316,29 @@ class AbstractStore:
|
|
259
316
|
self.region = region
|
260
317
|
self.is_sky_managed = is_sky_managed
|
261
318
|
self.sync_on_reconstruction = sync_on_reconstruction
|
319
|
+
|
320
|
+
# To avoid mypy error
|
321
|
+
self._bucket_sub_path: Optional[str] = None
|
322
|
+
# Trigger the setter to strip any leading/trailing slashes.
|
323
|
+
self.bucket_sub_path = _bucket_sub_path
|
262
324
|
# Whether sky is responsible for the lifecycle of the Store.
|
263
325
|
self._validate()
|
264
326
|
self.initialize()
|
265
327
|
|
328
|
+
@property
|
329
|
+
def bucket_sub_path(self) -> Optional[str]:
|
330
|
+
"""Get the bucket_sub_path."""
|
331
|
+
return self._bucket_sub_path
|
332
|
+
|
333
|
+
@bucket_sub_path.setter
|
334
|
+
# pylint: disable=invalid-name
|
335
|
+
def bucket_sub_path(self, bucket_sub_path: Optional[str]) -> None:
|
336
|
+
"""Set the bucket_sub_path, stripping any leading/trailing slashes."""
|
337
|
+
if bucket_sub_path is not None:
|
338
|
+
self._bucket_sub_path = bucket_sub_path.strip('/')
|
339
|
+
else:
|
340
|
+
self._bucket_sub_path = None
|
341
|
+
|
266
342
|
@classmethod
|
267
343
|
def from_metadata(cls, metadata: StoreMetadata, **override_args):
|
268
344
|
"""Create a Store from a StoreMetadata object.
|
@@ -270,19 +346,26 @@ class AbstractStore:
|
|
270
346
|
Used when reconstructing Storage and Store objects from
|
271
347
|
global_user_state.
|
272
348
|
"""
|
273
|
-
return cls(
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
349
|
+
return cls(
|
350
|
+
name=override_args.get('name', metadata.name),
|
351
|
+
source=override_args.get('source', metadata.source),
|
352
|
+
region=override_args.get('region', metadata.region),
|
353
|
+
is_sky_managed=override_args.get('is_sky_managed',
|
354
|
+
metadata.is_sky_managed),
|
355
|
+
sync_on_reconstruction=override_args.get('sync_on_reconstruction',
|
356
|
+
True),
|
357
|
+
# backward compatibility
|
358
|
+
_bucket_sub_path=override_args.get(
|
359
|
+
'_bucket_sub_path',
|
360
|
+
metadata._bucket_sub_path # pylint: disable=protected-access
|
361
|
+
) if hasattr(metadata, '_bucket_sub_path') else None)
|
280
362
|
|
281
363
|
def get_metadata(self) -> StoreMetadata:
|
282
364
|
return self.StoreMetadata(name=self.name,
|
283
365
|
source=self.source,
|
284
366
|
region=self.region,
|
285
|
-
is_sky_managed=self.is_sky_managed
|
367
|
+
is_sky_managed=self.is_sky_managed,
|
368
|
+
_bucket_sub_path=self._bucket_sub_path)
|
286
369
|
|
287
370
|
def initialize(self):
|
288
371
|
"""Initializes the Store object on the cloud.
|
@@ -310,7 +393,11 @@ class AbstractStore:
|
|
310
393
|
raise NotImplementedError
|
311
394
|
|
312
395
|
def delete(self) -> None:
|
313
|
-
"""Removes the Storage
|
396
|
+
"""Removes the Storage from the cloud."""
|
397
|
+
raise NotImplementedError
|
398
|
+
|
399
|
+
def _delete_sub_path(self) -> None:
|
400
|
+
"""Removes objects from the sub path in the bucket."""
|
314
401
|
raise NotImplementedError
|
315
402
|
|
316
403
|
def get_handle(self) -> StorageHandle:
|
@@ -454,13 +541,19 @@ class Storage(object):
|
|
454
541
|
if storetype in self.sky_stores:
|
455
542
|
del self.sky_stores[storetype]
|
456
543
|
|
457
|
-
def __init__(
|
458
|
-
|
459
|
-
|
460
|
-
|
461
|
-
|
462
|
-
|
463
|
-
|
544
|
+
def __init__(
|
545
|
+
self,
|
546
|
+
name: Optional[str] = None,
|
547
|
+
source: Optional[SourceType] = None,
|
548
|
+
stores: Optional[Dict[StoreType, AbstractStore]] = None,
|
549
|
+
persistent: Optional[bool] = True,
|
550
|
+
mode: StorageMode = StorageMode.MOUNT,
|
551
|
+
sync_on_reconstruction: bool = True,
|
552
|
+
# pylint: disable=invalid-name
|
553
|
+
_is_sky_managed: Optional[bool] = None,
|
554
|
+
# pylint: disable=invalid-name
|
555
|
+
_bucket_sub_path: Optional[str] = None
|
556
|
+
) -> None:
|
464
557
|
"""Initializes a Storage object.
|
465
558
|
|
466
559
|
Three fields are required: the name of the storage, the source
|
@@ -498,6 +591,18 @@ class Storage(object):
|
|
498
591
|
there. This is set to false when the Storage object is created not
|
499
592
|
for direct use, e.g. for 'sky storage delete', or the storage is
|
500
593
|
being re-used, e.g., for `sky start` on a stopped cluster.
|
594
|
+
_is_sky_managed: Optional[bool]; Indicates if the storage is managed
|
595
|
+
by Sky. Without this argument, the controller's behavior differs
|
596
|
+
from the local machine. For example, if a bucket does not exist:
|
597
|
+
Local Machine (is_sky_managed=True) →
|
598
|
+
Controller (is_sky_managed=False).
|
599
|
+
With this argument, the controller aligns with the local machine,
|
600
|
+
ensuring it retains the is_sky_managed information from the YAML.
|
601
|
+
During teardown, if is_sky_managed is True, the controller should
|
602
|
+
delete the bucket. Otherwise, it might mistakenly delete only the
|
603
|
+
sub-path, assuming is_sky_managed is False.
|
604
|
+
_bucket_sub_path: Optional[str]; The subdirectory to use for the
|
605
|
+
storage object.
|
501
606
|
"""
|
502
607
|
self.name: str
|
503
608
|
self.source = source
|
@@ -505,6 +610,8 @@ class Storage(object):
|
|
505
610
|
self.mode = mode
|
506
611
|
assert mode in StorageMode
|
507
612
|
self.sync_on_reconstruction = sync_on_reconstruction
|
613
|
+
self._is_sky_managed = _is_sky_managed
|
614
|
+
self._bucket_sub_path = _bucket_sub_path
|
508
615
|
|
509
616
|
# TODO(romilb, zhwu): This is a workaround to support storage deletion
|
510
617
|
# for spot. Once sky storage supports forced management for external
|
@@ -564,6 +671,14 @@ class Storage(object):
|
|
564
671
|
self.add_store(StoreType.R2)
|
565
672
|
elif self.source.startswith('cos://'):
|
566
673
|
self.add_store(StoreType.IBM)
|
674
|
+
elif self.source.startswith('oci://'):
|
675
|
+
self.add_store(StoreType.OCI)
|
676
|
+
|
677
|
+
def get_bucket_sub_path_prefix(self, blob_path: str) -> str:
|
678
|
+
"""Adds the bucket sub path prefix to the blob path."""
|
679
|
+
if self._bucket_sub_path is not None:
|
680
|
+
return f'{blob_path}/{self._bucket_sub_path}'
|
681
|
+
return blob_path
|
567
682
|
|
568
683
|
@staticmethod
|
569
684
|
def _validate_source(
|
@@ -644,7 +759,7 @@ class Storage(object):
|
|
644
759
|
'using a bucket by writing <destination_path>: '
|
645
760
|
f'{source} in the file_mounts section of your YAML')
|
646
761
|
is_local_source = True
|
647
|
-
elif split_path.scheme in ['s3', 'gs', 'https', 'r2', 'cos']:
|
762
|
+
elif split_path.scheme in ['s3', 'gs', 'https', 'r2', 'cos', 'oci']:
|
648
763
|
is_local_source = False
|
649
764
|
# Storage mounting does not support mounting specific files from
|
650
765
|
# cloud store - ensure path points to only a directory
|
@@ -668,7 +783,7 @@ class Storage(object):
|
|
668
783
|
with ux_utils.print_exception_no_traceback():
|
669
784
|
raise exceptions.StorageSourceError(
|
670
785
|
f'Supported paths: local, s3://, gs://, https://, '
|
671
|
-
f'r2://, cos://. Got: {source}')
|
786
|
+
f'r2://, cos://, oci://. Got: {source}')
|
672
787
|
return source, is_local_source
|
673
788
|
|
674
789
|
def _validate_storage_spec(self, name: Optional[str]) -> None:
|
@@ -683,7 +798,7 @@ class Storage(object):
|
|
683
798
|
"""
|
684
799
|
prefix = name.split('://')[0]
|
685
800
|
prefix = prefix.lower()
|
686
|
-
if prefix in ['s3', 'gs', 'https', 'r2', 'cos']:
|
801
|
+
if prefix in ['s3', 'gs', 'https', 'r2', 'cos', 'oci']:
|
687
802
|
with ux_utils.print_exception_no_traceback():
|
688
803
|
raise exceptions.StorageNameError(
|
689
804
|
'Prefix detected: `name` cannot start with '
|
@@ -775,29 +890,40 @@ class Storage(object):
|
|
775
890
|
store = S3Store.from_metadata(
|
776
891
|
s_metadata,
|
777
892
|
source=self.source,
|
778
|
-
sync_on_reconstruction=self.sync_on_reconstruction
|
893
|
+
sync_on_reconstruction=self.sync_on_reconstruction,
|
894
|
+
_bucket_sub_path=self._bucket_sub_path)
|
779
895
|
elif s_type == StoreType.GCS:
|
780
896
|
store = GcsStore.from_metadata(
|
781
897
|
s_metadata,
|
782
898
|
source=self.source,
|
783
|
-
sync_on_reconstruction=self.sync_on_reconstruction
|
899
|
+
sync_on_reconstruction=self.sync_on_reconstruction,
|
900
|
+
_bucket_sub_path=self._bucket_sub_path)
|
784
901
|
elif s_type == StoreType.AZURE:
|
785
902
|
assert isinstance(s_metadata,
|
786
903
|
AzureBlobStore.AzureBlobStoreMetadata)
|
787
904
|
store = AzureBlobStore.from_metadata(
|
788
905
|
s_metadata,
|
789
906
|
source=self.source,
|
790
|
-
sync_on_reconstruction=self.sync_on_reconstruction
|
907
|
+
sync_on_reconstruction=self.sync_on_reconstruction,
|
908
|
+
_bucket_sub_path=self._bucket_sub_path)
|
791
909
|
elif s_type == StoreType.R2:
|
792
910
|
store = R2Store.from_metadata(
|
793
911
|
s_metadata,
|
794
912
|
source=self.source,
|
795
|
-
sync_on_reconstruction=self.sync_on_reconstruction
|
913
|
+
sync_on_reconstruction=self.sync_on_reconstruction,
|
914
|
+
_bucket_sub_path=self._bucket_sub_path)
|
796
915
|
elif s_type == StoreType.IBM:
|
797
916
|
store = IBMCosStore.from_metadata(
|
798
917
|
s_metadata,
|
799
918
|
source=self.source,
|
800
|
-
sync_on_reconstruction=self.sync_on_reconstruction
|
919
|
+
sync_on_reconstruction=self.sync_on_reconstruction,
|
920
|
+
_bucket_sub_path=self._bucket_sub_path)
|
921
|
+
elif s_type == StoreType.OCI:
|
922
|
+
store = OciStore.from_metadata(
|
923
|
+
s_metadata,
|
924
|
+
source=self.source,
|
925
|
+
sync_on_reconstruction=self.sync_on_reconstruction,
|
926
|
+
_bucket_sub_path=self._bucket_sub_path)
|
801
927
|
else:
|
802
928
|
with ux_utils.print_exception_no_traceback():
|
803
929
|
raise ValueError(f'Unknown store type: {s_type}')
|
@@ -817,7 +943,6 @@ class Storage(object):
|
|
817
943
|
'to be reconstructed while the corresponding '
|
818
944
|
'bucket was externally deleted.')
|
819
945
|
continue
|
820
|
-
|
821
946
|
self._add_store(store, is_reconstructed=True)
|
822
947
|
|
823
948
|
@classmethod
|
@@ -873,6 +998,7 @@ class Storage(object):
|
|
873
998
|
f'storage account {storage_account_name!r}.')
|
874
999
|
else:
|
875
1000
|
logger.info(f'Storage type {store_type} already exists.')
|
1001
|
+
|
876
1002
|
return self.stores[store_type]
|
877
1003
|
|
878
1004
|
store_cls: Type[AbstractStore]
|
@@ -886,25 +1012,30 @@ class Storage(object):
|
|
886
1012
|
store_cls = R2Store
|
887
1013
|
elif store_type == StoreType.IBM:
|
888
1014
|
store_cls = IBMCosStore
|
1015
|
+
elif store_type == StoreType.OCI:
|
1016
|
+
store_cls = OciStore
|
889
1017
|
else:
|
890
1018
|
with ux_utils.print_exception_no_traceback():
|
891
1019
|
raise exceptions.StorageSpecError(
|
892
1020
|
f'{store_type} not supported as a Store.')
|
893
|
-
|
894
|
-
# Initialize store object and get/create bucket
|
895
1021
|
try:
|
896
1022
|
store = store_cls(
|
897
1023
|
name=self.name,
|
898
1024
|
source=self.source,
|
899
1025
|
region=region,
|
900
|
-
sync_on_reconstruction=self.sync_on_reconstruction
|
1026
|
+
sync_on_reconstruction=self.sync_on_reconstruction,
|
1027
|
+
is_sky_managed=self._is_sky_managed,
|
1028
|
+
_bucket_sub_path=self._bucket_sub_path)
|
901
1029
|
except exceptions.StorageBucketCreateError:
|
902
1030
|
# Creation failed, so this must be sky managed store. Add failure
|
903
1031
|
# to state.
|
904
1032
|
logger.error(f'Could not create {store_type} store '
|
905
1033
|
f'with name {self.name}.')
|
906
|
-
|
907
|
-
|
1034
|
+
try:
|
1035
|
+
global_user_state.set_storage_status(self.name,
|
1036
|
+
StorageStatus.INIT_FAILED)
|
1037
|
+
except ValueError as e:
|
1038
|
+
logger.error(f'Error setting storage status: {e}')
|
908
1039
|
raise
|
909
1040
|
except exceptions.StorageBucketGetError:
|
910
1041
|
# Bucket get failed, so this is not sky managed. Do not update state
|
@@ -952,18 +1083,16 @@ class Storage(object):
|
|
952
1083
|
if not self.stores:
|
953
1084
|
logger.info('No backing stores found. Deleting storage.')
|
954
1085
|
global_user_state.remove_storage(self.name)
|
955
|
-
if store_type:
|
1086
|
+
if store_type is not None:
|
956
1087
|
store = self.stores[store_type]
|
957
|
-
is_sky_managed = store.is_sky_managed
|
958
1088
|
# We delete a store from the cloud if it's sky managed. Else just
|
959
1089
|
# remove handle and return
|
960
|
-
if is_sky_managed:
|
1090
|
+
if store.is_sky_managed:
|
961
1091
|
self.handle.remove_store(store)
|
962
1092
|
store.delete()
|
963
1093
|
# Check remaining stores - if none is sky managed, remove
|
964
1094
|
# the storage from global_user_state.
|
965
|
-
delete = all(
|
966
|
-
s.is_sky_managed is False for s in self.stores.values())
|
1095
|
+
delete = all(not s.is_sky_managed for s in self.stores.values())
|
967
1096
|
if delete:
|
968
1097
|
global_user_state.remove_storage(self.name)
|
969
1098
|
else:
|
@@ -1020,12 +1149,15 @@ class Storage(object):
|
|
1020
1149
|
def from_yaml_config(cls, config: Dict[str, Any]) -> 'Storage':
|
1021
1150
|
common_utils.validate_schema(config, schemas.get_storage_schema(),
|
1022
1151
|
'Invalid storage YAML: ')
|
1023
|
-
|
1024
1152
|
name = config.pop('name', None)
|
1025
1153
|
source = config.pop('source', None)
|
1026
1154
|
store = config.pop('store', None)
|
1027
1155
|
mode_str = config.pop('mode', None)
|
1028
1156
|
force_delete = config.pop('_force_delete', None)
|
1157
|
+
# pylint: disable=invalid-name
|
1158
|
+
_is_sky_managed = config.pop('_is_sky_managed', None)
|
1159
|
+
# pylint: disable=invalid-name
|
1160
|
+
_bucket_sub_path = config.pop('_bucket_sub_path', None)
|
1029
1161
|
if force_delete is None:
|
1030
1162
|
force_delete = False
|
1031
1163
|
|
@@ -1045,7 +1177,9 @@ class Storage(object):
|
|
1045
1177
|
storage_obj = cls(name=name,
|
1046
1178
|
source=source,
|
1047
1179
|
persistent=persistent,
|
1048
|
-
mode=mode
|
1180
|
+
mode=mode,
|
1181
|
+
_is_sky_managed=_is_sky_managed,
|
1182
|
+
_bucket_sub_path=_bucket_sub_path)
|
1049
1183
|
if store is not None:
|
1050
1184
|
storage_obj.add_store(StoreType(store.upper()))
|
1051
1185
|
|
@@ -1053,7 +1187,7 @@ class Storage(object):
|
|
1053
1187
|
storage_obj.force_delete = force_delete
|
1054
1188
|
return storage_obj
|
1055
1189
|
|
1056
|
-
def to_yaml_config(self) -> Dict[str,
|
1190
|
+
def to_yaml_config(self) -> Dict[str, Any]:
|
1057
1191
|
config = {}
|
1058
1192
|
|
1059
1193
|
def add_if_not_none(key: str, value: Optional[Any]):
|
@@ -1069,13 +1203,18 @@ class Storage(object):
|
|
1069
1203
|
add_if_not_none('source', self.source)
|
1070
1204
|
|
1071
1205
|
stores = None
|
1206
|
+
is_sky_managed = self._is_sky_managed
|
1072
1207
|
if self.stores:
|
1073
1208
|
stores = ','.join([store.value for store in self.stores])
|
1209
|
+
is_sky_managed = list(self.stores.values())[0].is_sky_managed
|
1074
1210
|
add_if_not_none('store', stores)
|
1211
|
+
add_if_not_none('_is_sky_managed', is_sky_managed)
|
1075
1212
|
add_if_not_none('persistent', self.persistent)
|
1076
1213
|
add_if_not_none('mode', self.mode.value)
|
1077
1214
|
if self.force_delete:
|
1078
1215
|
config['_force_delete'] = True
|
1216
|
+
if self._bucket_sub_path is not None:
|
1217
|
+
config['_bucket_sub_path'] = self._bucket_sub_path
|
1079
1218
|
return config
|
1080
1219
|
|
1081
1220
|
|
@@ -1097,7 +1236,8 @@ class S3Store(AbstractStore):
|
|
1097
1236
|
source: str,
|
1098
1237
|
region: Optional[str] = _DEFAULT_REGION,
|
1099
1238
|
is_sky_managed: Optional[bool] = None,
|
1100
|
-
sync_on_reconstruction: bool = True
|
1239
|
+
sync_on_reconstruction: bool = True,
|
1240
|
+
_bucket_sub_path: Optional[str] = None):
|
1101
1241
|
self.client: 'boto3.client.Client'
|
1102
1242
|
self.bucket: 'StorageHandle'
|
1103
1243
|
# TODO(romilb): This is purely a stopgap fix for
|
@@ -1110,7 +1250,7 @@ class S3Store(AbstractStore):
|
|
1110
1250
|
f'{self._DEFAULT_REGION} for bucket {name!r}.')
|
1111
1251
|
region = self._DEFAULT_REGION
|
1112
1252
|
super().__init__(name, source, region, is_sky_managed,
|
1113
|
-
sync_on_reconstruction)
|
1253
|
+
sync_on_reconstruction, _bucket_sub_path)
|
1114
1254
|
|
1115
1255
|
def _validate(self):
|
1116
1256
|
if self.source is not None and isinstance(self.source, str):
|
@@ -1149,6 +1289,9 @@ class S3Store(AbstractStore):
|
|
1149
1289
|
assert data_utils.verify_ibm_cos_bucket(self.name), (
|
1150
1290
|
f'Source specified as {self.source}, a COS bucket. ',
|
1151
1291
|
'COS Bucket should exist.')
|
1292
|
+
elif self.source.startswith('oci://'):
|
1293
|
+
raise NotImplementedError(
|
1294
|
+
'Moving data from OCI to S3 is currently not supported.')
|
1152
1295
|
# Validate name
|
1153
1296
|
self.name = self.validate_name(self.name)
|
1154
1297
|
|
@@ -1260,6 +1403,8 @@ class S3Store(AbstractStore):
|
|
1260
1403
|
self._transfer_to_s3()
|
1261
1404
|
elif self.source.startswith('r2://'):
|
1262
1405
|
self._transfer_to_s3()
|
1406
|
+
elif self.source.startswith('oci://'):
|
1407
|
+
self._transfer_to_s3()
|
1263
1408
|
else:
|
1264
1409
|
self.batch_aws_rsync([self.source])
|
1265
1410
|
except exceptions.StorageUploadError:
|
@@ -1269,6 +1414,9 @@ class S3Store(AbstractStore):
|
|
1269
1414
|
f'Upload failed for store {self.name}') from e
|
1270
1415
|
|
1271
1416
|
def delete(self) -> None:
|
1417
|
+
if self._bucket_sub_path is not None and not self.is_sky_managed:
|
1418
|
+
return self._delete_sub_path()
|
1419
|
+
|
1272
1420
|
deleted_by_skypilot = self._delete_s3_bucket(self.name)
|
1273
1421
|
if deleted_by_skypilot:
|
1274
1422
|
msg_str = f'Deleted S3 bucket {self.name}.'
|
@@ -1278,6 +1426,19 @@ class S3Store(AbstractStore):
|
|
1278
1426
|
logger.info(f'{colorama.Fore.GREEN}{msg_str}'
|
1279
1427
|
f'{colorama.Style.RESET_ALL}')
|
1280
1428
|
|
1429
|
+
def _delete_sub_path(self) -> None:
|
1430
|
+
assert self._bucket_sub_path is not None, 'bucket_sub_path is not set'
|
1431
|
+
deleted_by_skypilot = self._delete_s3_bucket_sub_path(
|
1432
|
+
self.name, self._bucket_sub_path)
|
1433
|
+
if deleted_by_skypilot:
|
1434
|
+
msg_str = f'Removed objects from S3 bucket ' \
|
1435
|
+
f'{self.name}/{self._bucket_sub_path}.'
|
1436
|
+
else:
|
1437
|
+
msg_str = f'Failed to remove objects from S3 bucket ' \
|
1438
|
+
f'{self.name}/{self._bucket_sub_path}.'
|
1439
|
+
logger.info(f'{colorama.Fore.GREEN}{msg_str}'
|
1440
|
+
f'{colorama.Style.RESET_ALL}')
|
1441
|
+
|
1281
1442
|
def get_handle(self) -> StorageHandle:
|
1282
1443
|
return aws.resource('s3').Bucket(self.name)
|
1283
1444
|
|
@@ -1308,9 +1469,11 @@ class S3Store(AbstractStore):
|
|
1308
1469
|
for file_name in file_names
|
1309
1470
|
])
|
1310
1471
|
base_dir_path = shlex.quote(base_dir_path)
|
1472
|
+
sub_path = (f'/{self._bucket_sub_path}'
|
1473
|
+
if self._bucket_sub_path else '')
|
1311
1474
|
sync_command = ('aws s3 sync --no-follow-symlinks --exclude="*" '
|
1312
1475
|
f'{includes} {base_dir_path} '
|
1313
|
-
f's3://{self.name}')
|
1476
|
+
f's3://{self.name}{sub_path}')
|
1314
1477
|
return sync_command
|
1315
1478
|
|
1316
1479
|
def get_dir_sync_command(src_dir_path, dest_dir_name):
|
@@ -1322,9 +1485,11 @@ class S3Store(AbstractStore):
|
|
1322
1485
|
for file_name in excluded_list
|
1323
1486
|
])
|
1324
1487
|
src_dir_path = shlex.quote(src_dir_path)
|
1488
|
+
sub_path = (f'/{self._bucket_sub_path}'
|
1489
|
+
if self._bucket_sub_path else '')
|
1325
1490
|
sync_command = (f'aws s3 sync --no-follow-symlinks {excludes} '
|
1326
1491
|
f'{src_dir_path} '
|
1327
|
-
f's3://{self.name}/{dest_dir_name}')
|
1492
|
+
f's3://{self.name}{sub_path}/{dest_dir_name}')
|
1328
1493
|
return sync_command
|
1329
1494
|
|
1330
1495
|
# Generate message for upload
|
@@ -1442,7 +1607,8 @@ class S3Store(AbstractStore):
|
|
1442
1607
|
"""
|
1443
1608
|
install_cmd = mounting_utils.get_s3_mount_install_cmd()
|
1444
1609
|
mount_cmd = mounting_utils.get_s3_mount_cmd(self.bucket.name,
|
1445
|
-
mount_path
|
1610
|
+
mount_path,
|
1611
|
+
self._bucket_sub_path)
|
1446
1612
|
return mounting_utils.get_mounting_command(mount_path, install_cmd,
|
1447
1613
|
mount_cmd)
|
1448
1614
|
|
@@ -1492,6 +1658,27 @@ class S3Store(AbstractStore):
|
|
1492
1658
|
) from e
|
1493
1659
|
return aws.resource('s3').Bucket(bucket_name)
|
1494
1660
|
|
1661
|
+
def _execute_s3_remove_command(self, command: str, bucket_name: str,
|
1662
|
+
hint_operating: str,
|
1663
|
+
hint_failed: str) -> bool:
|
1664
|
+
try:
|
1665
|
+
with rich_utils.safe_status(
|
1666
|
+
ux_utils.spinner_message(hint_operating)):
|
1667
|
+
subprocess.check_output(command.split(' '),
|
1668
|
+
stderr=subprocess.STDOUT)
|
1669
|
+
except subprocess.CalledProcessError as e:
|
1670
|
+
if 'NoSuchBucket' in e.output.decode('utf-8'):
|
1671
|
+
logger.debug(
|
1672
|
+
_BUCKET_EXTERNALLY_DELETED_DEBUG_MESSAGE.format(
|
1673
|
+
bucket_name=bucket_name))
|
1674
|
+
return False
|
1675
|
+
else:
|
1676
|
+
with ux_utils.print_exception_no_traceback():
|
1677
|
+
raise exceptions.StorageBucketDeleteError(
|
1678
|
+
f'{hint_failed}'
|
1679
|
+
f'Detailed error: {e.output}')
|
1680
|
+
return True
|
1681
|
+
|
1495
1682
|
def _delete_s3_bucket(self, bucket_name: str) -> bool:
|
1496
1683
|
"""Deletes S3 bucket, including all objects in bucket
|
1497
1684
|
|
@@ -1500,6 +1687,9 @@ class S3Store(AbstractStore):
|
|
1500
1687
|
|
1501
1688
|
Returns:
|
1502
1689
|
bool; True if bucket was deleted, False if it was deleted externally.
|
1690
|
+
|
1691
|
+
Raises:
|
1692
|
+
StorageBucketDeleteError: If deleting the bucket fails.
|
1503
1693
|
"""
|
1504
1694
|
# Deleting objects is very slow programatically
|
1505
1695
|
# (i.e. bucket.objects.all().delete() is slow).
|
@@ -1509,29 +1699,28 @@ class S3Store(AbstractStore):
|
|
1509
1699
|
# The fastest way to delete is to run `aws s3 rb --force`,
|
1510
1700
|
# which removes the bucket by force.
|
1511
1701
|
remove_command = f'aws s3 rb s3://{bucket_name} --force'
|
1512
|
-
|
1513
|
-
|
1514
|
-
|
1515
|
-
|
1516
|
-
|
1517
|
-
|
1518
|
-
except subprocess.CalledProcessError as e:
|
1519
|
-
if 'NoSuchBucket' in e.output.decode('utf-8'):
|
1520
|
-
logger.debug(
|
1521
|
-
_BUCKET_EXTERNALLY_DELETED_DEBUG_MESSAGE.format(
|
1522
|
-
bucket_name=bucket_name))
|
1523
|
-
return False
|
1524
|
-
else:
|
1525
|
-
with ux_utils.print_exception_no_traceback():
|
1526
|
-
raise exceptions.StorageBucketDeleteError(
|
1527
|
-
f'Failed to delete S3 bucket {bucket_name}.'
|
1528
|
-
f'Detailed error: {e.output}')
|
1702
|
+
success = self._execute_s3_remove_command(
|
1703
|
+
remove_command, bucket_name,
|
1704
|
+
f'Deleting S3 bucket [green]{bucket_name}[/]',
|
1705
|
+
f'Failed to delete S3 bucket {bucket_name}.')
|
1706
|
+
if not success:
|
1707
|
+
return False
|
1529
1708
|
|
1530
1709
|
# Wait until bucket deletion propagates on AWS servers
|
1531
1710
|
while data_utils.verify_s3_bucket(bucket_name):
|
1532
1711
|
time.sleep(0.1)
|
1533
1712
|
return True
|
1534
1713
|
|
1714
|
+
def _delete_s3_bucket_sub_path(self, bucket_name: str,
|
1715
|
+
sub_path: str) -> bool:
|
1716
|
+
"""Deletes the sub path from the bucket."""
|
1717
|
+
remove_command = f'aws s3 rm s3://{bucket_name}/{sub_path}/ --recursive'
|
1718
|
+
return self._execute_s3_remove_command(
|
1719
|
+
remove_command, bucket_name, f'Removing objects from S3 bucket '
|
1720
|
+
f'[green]{bucket_name}/{sub_path}[/]',
|
1721
|
+
f'Failed to remove objects from S3 bucket {bucket_name}/{sub_path}.'
|
1722
|
+
)
|
1723
|
+
|
1535
1724
|
|
1536
1725
|
class GcsStore(AbstractStore):
|
1537
1726
|
"""GcsStore inherits from Storage Object and represents the backend
|
@@ -1545,11 +1734,12 @@ class GcsStore(AbstractStore):
|
|
1545
1734
|
source: str,
|
1546
1735
|
region: Optional[str] = 'us-central1',
|
1547
1736
|
is_sky_managed: Optional[bool] = None,
|
1548
|
-
sync_on_reconstruction: Optional[bool] = True
|
1737
|
+
sync_on_reconstruction: Optional[bool] = True,
|
1738
|
+
_bucket_sub_path: Optional[str] = None):
|
1549
1739
|
self.client: 'storage.Client'
|
1550
1740
|
self.bucket: StorageHandle
|
1551
1741
|
super().__init__(name, source, region, is_sky_managed,
|
1552
|
-
sync_on_reconstruction)
|
1742
|
+
sync_on_reconstruction, _bucket_sub_path)
|
1553
1743
|
|
1554
1744
|
def _validate(self):
|
1555
1745
|
if self.source is not None and isinstance(self.source, str):
|
@@ -1588,6 +1778,9 @@ class GcsStore(AbstractStore):
|
|
1588
1778
|
assert data_utils.verify_ibm_cos_bucket(self.name), (
|
1589
1779
|
f'Source specified as {self.source}, a COS bucket. ',
|
1590
1780
|
'COS Bucket should exist.')
|
1781
|
+
elif self.source.startswith('oci://'):
|
1782
|
+
raise NotImplementedError(
|
1783
|
+
'Moving data from OCI to GCS is currently not supported.')
|
1591
1784
|
# Validate name
|
1592
1785
|
self.name = self.validate_name(self.name)
|
1593
1786
|
# Check if the storage is enabled
|
@@ -1696,6 +1889,8 @@ class GcsStore(AbstractStore):
|
|
1696
1889
|
self._transfer_to_gcs()
|
1697
1890
|
elif self.source.startswith('r2://'):
|
1698
1891
|
self._transfer_to_gcs()
|
1892
|
+
elif self.source.startswith('oci://'):
|
1893
|
+
self._transfer_to_gcs()
|
1699
1894
|
else:
|
1700
1895
|
# If a single directory is specified in source, upload
|
1701
1896
|
# contents to root of bucket by suffixing /*.
|
@@ -1707,6 +1902,9 @@ class GcsStore(AbstractStore):
|
|
1707
1902
|
f'Upload failed for store {self.name}') from e
|
1708
1903
|
|
1709
1904
|
def delete(self) -> None:
|
1905
|
+
if self._bucket_sub_path is not None and not self.is_sky_managed:
|
1906
|
+
return self._delete_sub_path()
|
1907
|
+
|
1710
1908
|
deleted_by_skypilot = self._delete_gcs_bucket(self.name)
|
1711
1909
|
if deleted_by_skypilot:
|
1712
1910
|
msg_str = f'Deleted GCS bucket {self.name}.'
|
@@ -1716,6 +1914,19 @@ class GcsStore(AbstractStore):
|
|
1716
1914
|
logger.info(f'{colorama.Fore.GREEN}{msg_str}'
|
1717
1915
|
f'{colorama.Style.RESET_ALL}')
|
1718
1916
|
|
1917
|
+
def _delete_sub_path(self) -> None:
|
1918
|
+
assert self._bucket_sub_path is not None, 'bucket_sub_path is not set'
|
1919
|
+
deleted_by_skypilot = self._delete_gcs_bucket(self.name,
|
1920
|
+
self._bucket_sub_path)
|
1921
|
+
if deleted_by_skypilot:
|
1922
|
+
msg_str = f'Deleted objects in GCS bucket ' \
|
1923
|
+
f'{self.name}/{self._bucket_sub_path}.'
|
1924
|
+
else:
|
1925
|
+
msg_str = f'GCS bucket {self.name} may have ' \
|
1926
|
+
'been deleted externally.'
|
1927
|
+
logger.info(f'{colorama.Fore.GREEN}{msg_str}'
|
1928
|
+
f'{colorama.Style.RESET_ALL}')
|
1929
|
+
|
1719
1930
|
def get_handle(self) -> StorageHandle:
|
1720
1931
|
return self.client.get_bucket(self.name)
|
1721
1932
|
|
@@ -1789,9 +2000,11 @@ class GcsStore(AbstractStore):
|
|
1789
2000
|
sync_format = '|'.join(file_names)
|
1790
2001
|
gsutil_alias, alias_gen = data_utils.get_gsutil_command()
|
1791
2002
|
base_dir_path = shlex.quote(base_dir_path)
|
2003
|
+
sub_path = (f'/{self._bucket_sub_path}'
|
2004
|
+
if self._bucket_sub_path else '')
|
1792
2005
|
sync_command = (f'{alias_gen}; {gsutil_alias} '
|
1793
2006
|
f'rsync -e -x \'^(?!{sync_format}$).*\' '
|
1794
|
-
f'{base_dir_path} gs://{self.name}')
|
2007
|
+
f'{base_dir_path} gs://{self.name}{sub_path}')
|
1795
2008
|
return sync_command
|
1796
2009
|
|
1797
2010
|
def get_dir_sync_command(src_dir_path, dest_dir_name):
|
@@ -1801,9 +2014,11 @@ class GcsStore(AbstractStore):
|
|
1801
2014
|
excludes = '|'.join(excluded_list)
|
1802
2015
|
gsutil_alias, alias_gen = data_utils.get_gsutil_command()
|
1803
2016
|
src_dir_path = shlex.quote(src_dir_path)
|
2017
|
+
sub_path = (f'/{self._bucket_sub_path}'
|
2018
|
+
if self._bucket_sub_path else '')
|
1804
2019
|
sync_command = (f'{alias_gen}; {gsutil_alias} '
|
1805
2020
|
f'rsync -e -r -x \'({excludes})\' {src_dir_path} '
|
1806
|
-
f'gs://{self.name}/{dest_dir_name}')
|
2021
|
+
f'gs://{self.name}{sub_path}/{dest_dir_name}')
|
1807
2022
|
return sync_command
|
1808
2023
|
|
1809
2024
|
# Generate message for upload
|
@@ -1908,7 +2123,8 @@ class GcsStore(AbstractStore):
|
|
1908
2123
|
"""
|
1909
2124
|
install_cmd = mounting_utils.get_gcs_mount_install_cmd()
|
1910
2125
|
mount_cmd = mounting_utils.get_gcs_mount_cmd(self.bucket.name,
|
1911
|
-
mount_path
|
2126
|
+
mount_path,
|
2127
|
+
self._bucket_sub_path)
|
1912
2128
|
version_check_cmd = (
|
1913
2129
|
f'gcsfuse --version | grep -q {mounting_utils.GCSFUSE_VERSION}')
|
1914
2130
|
return mounting_utils.get_mounting_command(mount_path, install_cmd,
|
@@ -1948,19 +2164,38 @@ class GcsStore(AbstractStore):
|
|
1948
2164
|
f'{new_bucket.storage_class}{colorama.Style.RESET_ALL}')
|
1949
2165
|
return new_bucket
|
1950
2166
|
|
1951
|
-
def _delete_gcs_bucket(
|
1952
|
-
|
2167
|
+
def _delete_gcs_bucket(
|
2168
|
+
self,
|
2169
|
+
bucket_name: str,
|
2170
|
+
# pylint: disable=invalid-name
|
2171
|
+
_bucket_sub_path: Optional[str] = None
|
2172
|
+
) -> bool:
|
2173
|
+
"""Deletes objects in GCS bucket
|
1953
2174
|
|
1954
2175
|
Args:
|
1955
2176
|
bucket_name: str; Name of bucket
|
2177
|
+
_bucket_sub_path: str; Sub path in the bucket, if provided only
|
2178
|
+
objects in the sub path will be deleted, else the whole bucket will
|
2179
|
+
be deleted
|
1956
2180
|
|
1957
2181
|
Returns:
|
1958
2182
|
bool; True if bucket was deleted, False if it was deleted externally.
|
1959
|
-
"""
|
1960
2183
|
|
2184
|
+
Raises:
|
2185
|
+
StorageBucketDeleteError: If deleting the bucket fails.
|
2186
|
+
PermissionError: If the bucket is external and the user is not
|
2187
|
+
allowed to delete it.
|
2188
|
+
"""
|
2189
|
+
if _bucket_sub_path is not None:
|
2190
|
+
command_suffix = f'/{_bucket_sub_path}'
|
2191
|
+
hint_text = 'objects in '
|
2192
|
+
else:
|
2193
|
+
command_suffix = ''
|
2194
|
+
hint_text = ''
|
1961
2195
|
with rich_utils.safe_status(
|
1962
2196
|
ux_utils.spinner_message(
|
1963
|
-
f'Deleting GCS bucket
|
2197
|
+
f'Deleting {hint_text}GCS bucket '
|
2198
|
+
f'[green]{bucket_name}{command_suffix}[/]')):
|
1964
2199
|
try:
|
1965
2200
|
self.client.get_bucket(bucket_name)
|
1966
2201
|
except gcp.forbidden_exception() as e:
|
@@ -1978,8 +2213,9 @@ class GcsStore(AbstractStore):
|
|
1978
2213
|
return False
|
1979
2214
|
try:
|
1980
2215
|
gsutil_alias, alias_gen = data_utils.get_gsutil_command()
|
1981
|
-
remove_obj_command = (
|
1982
|
-
|
2216
|
+
remove_obj_command = (
|
2217
|
+
f'{alias_gen};{gsutil_alias} '
|
2218
|
+
f'rm -r gs://{bucket_name}{command_suffix}')
|
1983
2219
|
subprocess.check_output(remove_obj_command,
|
1984
2220
|
stderr=subprocess.STDOUT,
|
1985
2221
|
shell=True,
|
@@ -1988,7 +2224,8 @@ class GcsStore(AbstractStore):
|
|
1988
2224
|
except subprocess.CalledProcessError as e:
|
1989
2225
|
with ux_utils.print_exception_no_traceback():
|
1990
2226
|
raise exceptions.StorageBucketDeleteError(
|
1991
|
-
f'Failed to delete GCS bucket
|
2227
|
+
f'Failed to delete {hint_text}GCS bucket '
|
2228
|
+
f'{bucket_name}{command_suffix}.'
|
1992
2229
|
f'Detailed error: {e.output}')
|
1993
2230
|
|
1994
2231
|
|
@@ -2040,7 +2277,8 @@ class AzureBlobStore(AbstractStore):
|
|
2040
2277
|
storage_account_name: str = '',
|
2041
2278
|
region: Optional[str] = 'eastus',
|
2042
2279
|
is_sky_managed: Optional[bool] = None,
|
2043
|
-
sync_on_reconstruction: bool = True
|
2280
|
+
sync_on_reconstruction: bool = True,
|
2281
|
+
_bucket_sub_path: Optional[str] = None):
|
2044
2282
|
self.storage_client: 'storage.Client'
|
2045
2283
|
self.resource_client: 'storage.Client'
|
2046
2284
|
self.container_name: str
|
@@ -2052,7 +2290,7 @@ class AzureBlobStore(AbstractStore):
|
|
2052
2290
|
if region is None:
|
2053
2291
|
region = 'eastus'
|
2054
2292
|
super().__init__(name, source, region, is_sky_managed,
|
2055
|
-
sync_on_reconstruction)
|
2293
|
+
sync_on_reconstruction, _bucket_sub_path)
|
2056
2294
|
|
2057
2295
|
@classmethod
|
2058
2296
|
def from_metadata(cls, metadata: AbstractStore.StoreMetadata,
|
@@ -2122,6 +2360,9 @@ class AzureBlobStore(AbstractStore):
|
|
2122
2360
|
assert data_utils.verify_ibm_cos_bucket(self.name), (
|
2123
2361
|
f'Source specified as {self.source}, a COS bucket. ',
|
2124
2362
|
'COS Bucket should exist.')
|
2363
|
+
elif self.source.startswith('oci://'):
|
2364
|
+
raise NotImplementedError(
|
2365
|
+
'Moving data from OCI to AZureBlob is not supported.')
|
2125
2366
|
# Validate name
|
2126
2367
|
self.name = self.validate_name(self.name)
|
2127
2368
|
|
@@ -2199,6 +2440,17 @@ class AzureBlobStore(AbstractStore):
|
|
2199
2440
|
"""
|
2200
2441
|
self.storage_client = data_utils.create_az_client('storage')
|
2201
2442
|
self.resource_client = data_utils.create_az_client('resource')
|
2443
|
+
self._update_storage_account_name_and_resource()
|
2444
|
+
|
2445
|
+
self.container_name, is_new_bucket = self._get_bucket()
|
2446
|
+
if self.is_sky_managed is None:
|
2447
|
+
# If is_sky_managed is not specified, then this is a new storage
|
2448
|
+
# object (i.e., did not exist in global_user_state) and we should
|
2449
|
+
# set the is_sky_managed property.
|
2450
|
+
# If is_sky_managed is specified, then we take no action.
|
2451
|
+
self.is_sky_managed = is_new_bucket
|
2452
|
+
|
2453
|
+
def _update_storage_account_name_and_resource(self):
|
2202
2454
|
self.storage_account_name, self.resource_group_name = (
|
2203
2455
|
self._get_storage_account_and_resource_group())
|
2204
2456
|
|
@@ -2209,13 +2461,13 @@ class AzureBlobStore(AbstractStore):
|
|
2209
2461
|
self.storage_account_name, self.resource_group_name,
|
2210
2462
|
self.storage_client, self.resource_client)
|
2211
2463
|
|
2212
|
-
|
2213
|
-
|
2214
|
-
|
2215
|
-
|
2216
|
-
|
2217
|
-
|
2218
|
-
|
2464
|
+
def update_storage_attributes(self, **kwargs: Dict[str, Any]):
|
2465
|
+
assert 'storage_account_name' in kwargs, (
|
2466
|
+
'only storage_account_name supported')
|
2467
|
+
assert isinstance(kwargs['storage_account_name'],
|
2468
|
+
str), ('storage_account_name must be a string')
|
2469
|
+
self.storage_account_name = kwargs['storage_account_name']
|
2470
|
+
self._update_storage_account_name_and_resource()
|
2219
2471
|
|
2220
2472
|
@staticmethod
|
2221
2473
|
def get_default_storage_account_name(region: Optional[str]) -> str:
|
@@ -2474,6 +2726,8 @@ class AzureBlobStore(AbstractStore):
|
|
2474
2726
|
raise NotImplementedError(error_message.format('R2'))
|
2475
2727
|
elif self.source.startswith('cos://'):
|
2476
2728
|
raise NotImplementedError(error_message.format('IBM COS'))
|
2729
|
+
elif self.source.startswith('oci://'):
|
2730
|
+
raise NotImplementedError(error_message.format('OCI'))
|
2477
2731
|
else:
|
2478
2732
|
self.batch_az_blob_sync([self.source])
|
2479
2733
|
except exceptions.StorageUploadError:
|
@@ -2484,6 +2738,9 @@ class AzureBlobStore(AbstractStore):
|
|
2484
2738
|
|
2485
2739
|
def delete(self) -> None:
|
2486
2740
|
"""Deletes the storage."""
|
2741
|
+
if self._bucket_sub_path is not None and not self.is_sky_managed:
|
2742
|
+
return self._delete_sub_path()
|
2743
|
+
|
2487
2744
|
deleted_by_skypilot = self._delete_az_bucket(self.name)
|
2488
2745
|
if deleted_by_skypilot:
|
2489
2746
|
msg_str = (f'Deleted AZ Container {self.name!r} under storage '
|
@@ -2494,6 +2751,32 @@ class AzureBlobStore(AbstractStore):
|
|
2494
2751
|
logger.info(f'{colorama.Fore.GREEN}{msg_str}'
|
2495
2752
|
f'{colorama.Style.RESET_ALL}')
|
2496
2753
|
|
2754
|
+
def _delete_sub_path(self) -> None:
|
2755
|
+
assert self._bucket_sub_path is not None, 'bucket_sub_path is not set'
|
2756
|
+
try:
|
2757
|
+
container_url = data_utils.AZURE_CONTAINER_URL.format(
|
2758
|
+
storage_account_name=self.storage_account_name,
|
2759
|
+
container_name=self.name)
|
2760
|
+
container_client = data_utils.create_az_client(
|
2761
|
+
client_type='container',
|
2762
|
+
container_url=container_url,
|
2763
|
+
storage_account_name=self.storage_account_name,
|
2764
|
+
resource_group_name=self.resource_group_name)
|
2765
|
+
# List and delete blobs in the specified directory
|
2766
|
+
blobs = container_client.list_blobs(
|
2767
|
+
name_starts_with=self._bucket_sub_path + '/')
|
2768
|
+
for blob in blobs:
|
2769
|
+
container_client.delete_blob(blob.name)
|
2770
|
+
logger.info(
|
2771
|
+
f'Deleted objects from sub path {self._bucket_sub_path} '
|
2772
|
+
f'in container {self.name}.')
|
2773
|
+
except Exception as e: # pylint: disable=broad-except
|
2774
|
+
logger.error(
|
2775
|
+
f'Failed to delete objects from sub path '
|
2776
|
+
f'{self._bucket_sub_path} in container {self.name}. '
|
2777
|
+
f'Details: {common_utils.format_exception(e, use_bracket=True)}'
|
2778
|
+
)
|
2779
|
+
|
2497
2780
|
def get_handle(self) -> StorageHandle:
|
2498
2781
|
"""Returns the Storage Handle object."""
|
2499
2782
|
return self.storage_client.blob_containers.get(
|
@@ -2520,13 +2803,15 @@ class AzureBlobStore(AbstractStore):
|
|
2520
2803
|
includes_list = ';'.join(file_names)
|
2521
2804
|
includes = f'--include-pattern "{includes_list}"'
|
2522
2805
|
base_dir_path = shlex.quote(base_dir_path)
|
2806
|
+
container_path = (f'{self.container_name}/{self._bucket_sub_path}'
|
2807
|
+
if self._bucket_sub_path else self.container_name)
|
2523
2808
|
sync_command = (f'az storage blob sync '
|
2524
2809
|
f'--account-name {self.storage_account_name} '
|
2525
2810
|
f'--account-key {self.storage_account_key} '
|
2526
2811
|
f'{includes} '
|
2527
2812
|
'--delete-destination false '
|
2528
2813
|
f'--source {base_dir_path} '
|
2529
|
-
f'--container {
|
2814
|
+
f'--container {container_path}')
|
2530
2815
|
return sync_command
|
2531
2816
|
|
2532
2817
|
def get_dir_sync_command(src_dir_path, dest_dir_name) -> str:
|
@@ -2537,8 +2822,11 @@ class AzureBlobStore(AbstractStore):
|
|
2537
2822
|
[file_name.rstrip('*') for file_name in excluded_list])
|
2538
2823
|
excludes = f'--exclude-path "{excludes_list}"'
|
2539
2824
|
src_dir_path = shlex.quote(src_dir_path)
|
2540
|
-
container_path = (f'{self.container_name}/{
|
2541
|
-
if
|
2825
|
+
container_path = (f'{self.container_name}/{self._bucket_sub_path}'
|
2826
|
+
if self._bucket_sub_path else
|
2827
|
+
f'{self.container_name}')
|
2828
|
+
if dest_dir_name:
|
2829
|
+
container_path = f'{container_path}/{dest_dir_name}'
|
2542
2830
|
sync_command = (f'az storage blob sync '
|
2543
2831
|
f'--account-name {self.storage_account_name} '
|
2544
2832
|
f'--account-key {self.storage_account_key} '
|
@@ -2661,6 +2949,7 @@ class AzureBlobStore(AbstractStore):
|
|
2661
2949
|
f'{self.storage_account_name!r}.'
|
2662
2950
|
'Details: '
|
2663
2951
|
f'{common_utils.format_exception(e, use_bracket=True)}')
|
2952
|
+
|
2664
2953
|
# If the container cannot be found in both private and public settings,
|
2665
2954
|
# the container is to be created by Sky. However, creation is skipped
|
2666
2955
|
# if Store object is being reconstructed for deletion or re-mount with
|
@@ -2691,7 +2980,8 @@ class AzureBlobStore(AbstractStore):
|
|
2691
2980
|
mount_cmd = mounting_utils.get_az_mount_cmd(self.container_name,
|
2692
2981
|
self.storage_account_name,
|
2693
2982
|
mount_path,
|
2694
|
-
self.storage_account_key
|
2983
|
+
self.storage_account_key,
|
2984
|
+
self._bucket_sub_path)
|
2695
2985
|
return mounting_utils.get_mounting_command(mount_path, install_cmd,
|
2696
2986
|
mount_cmd)
|
2697
2987
|
|
@@ -2790,11 +3080,12 @@ class R2Store(AbstractStore):
|
|
2790
3080
|
source: str,
|
2791
3081
|
region: Optional[str] = 'auto',
|
2792
3082
|
is_sky_managed: Optional[bool] = None,
|
2793
|
-
sync_on_reconstruction: Optional[bool] = True
|
3083
|
+
sync_on_reconstruction: Optional[bool] = True,
|
3084
|
+
_bucket_sub_path: Optional[str] = None):
|
2794
3085
|
self.client: 'boto3.client.Client'
|
2795
3086
|
self.bucket: 'StorageHandle'
|
2796
3087
|
super().__init__(name, source, region, is_sky_managed,
|
2797
|
-
sync_on_reconstruction)
|
3088
|
+
sync_on_reconstruction, _bucket_sub_path)
|
2798
3089
|
|
2799
3090
|
def _validate(self):
|
2800
3091
|
if self.source is not None and isinstance(self.source, str):
|
@@ -2833,6 +3124,10 @@ class R2Store(AbstractStore):
|
|
2833
3124
|
assert data_utils.verify_ibm_cos_bucket(self.name), (
|
2834
3125
|
f'Source specified as {self.source}, a COS bucket. ',
|
2835
3126
|
'COS Bucket should exist.')
|
3127
|
+
elif self.source.startswith('oci://'):
|
3128
|
+
raise NotImplementedError(
|
3129
|
+
'Moving data from OCI to R2 is currently not supported.')
|
3130
|
+
|
2836
3131
|
# Validate name
|
2837
3132
|
self.name = S3Store.validate_name(self.name)
|
2838
3133
|
# Check if the storage is enabled
|
@@ -2884,6 +3179,8 @@ class R2Store(AbstractStore):
|
|
2884
3179
|
self._transfer_to_r2()
|
2885
3180
|
elif self.source.startswith('r2://'):
|
2886
3181
|
pass
|
3182
|
+
elif self.source.startswith('oci://'):
|
3183
|
+
self._transfer_to_r2()
|
2887
3184
|
else:
|
2888
3185
|
self.batch_aws_rsync([self.source])
|
2889
3186
|
except exceptions.StorageUploadError:
|
@@ -2893,6 +3190,9 @@ class R2Store(AbstractStore):
|
|
2893
3190
|
f'Upload failed for store {self.name}') from e
|
2894
3191
|
|
2895
3192
|
def delete(self) -> None:
|
3193
|
+
if self._bucket_sub_path is not None and not self.is_sky_managed:
|
3194
|
+
return self._delete_sub_path()
|
3195
|
+
|
2896
3196
|
deleted_by_skypilot = self._delete_r2_bucket(self.name)
|
2897
3197
|
if deleted_by_skypilot:
|
2898
3198
|
msg_str = f'Deleted R2 bucket {self.name}.'
|
@@ -2902,6 +3202,19 @@ class R2Store(AbstractStore):
|
|
2902
3202
|
logger.info(f'{colorama.Fore.GREEN}{msg_str}'
|
2903
3203
|
f'{colorama.Style.RESET_ALL}')
|
2904
3204
|
|
3205
|
+
def _delete_sub_path(self) -> None:
|
3206
|
+
assert self._bucket_sub_path is not None, 'bucket_sub_path is not set'
|
3207
|
+
deleted_by_skypilot = self._delete_r2_bucket_sub_path(
|
3208
|
+
self.name, self._bucket_sub_path)
|
3209
|
+
if deleted_by_skypilot:
|
3210
|
+
msg_str = f'Removed objects from R2 bucket ' \
|
3211
|
+
f'{self.name}/{self._bucket_sub_path}.'
|
3212
|
+
else:
|
3213
|
+
msg_str = f'Failed to remove objects from R2 bucket ' \
|
3214
|
+
f'{self.name}/{self._bucket_sub_path}.'
|
3215
|
+
logger.info(f'{colorama.Fore.GREEN}{msg_str}'
|
3216
|
+
f'{colorama.Style.RESET_ALL}')
|
3217
|
+
|
2905
3218
|
def get_handle(self) -> StorageHandle:
|
2906
3219
|
return cloudflare.resource('s3').Bucket(self.name)
|
2907
3220
|
|
@@ -2933,11 +3246,13 @@ class R2Store(AbstractStore):
|
|
2933
3246
|
])
|
2934
3247
|
endpoint_url = cloudflare.create_endpoint()
|
2935
3248
|
base_dir_path = shlex.quote(base_dir_path)
|
3249
|
+
sub_path = (f'/{self._bucket_sub_path}'
|
3250
|
+
if self._bucket_sub_path else '')
|
2936
3251
|
sync_command = ('AWS_SHARED_CREDENTIALS_FILE='
|
2937
3252
|
f'{cloudflare.R2_CREDENTIALS_PATH} '
|
2938
3253
|
'aws s3 sync --no-follow-symlinks --exclude="*" '
|
2939
3254
|
f'{includes} {base_dir_path} '
|
2940
|
-
f's3://{self.name} '
|
3255
|
+
f's3://{self.name}{sub_path} '
|
2941
3256
|
f'--endpoint {endpoint_url} '
|
2942
3257
|
f'--profile={cloudflare.R2_PROFILE_NAME}')
|
2943
3258
|
return sync_command
|
@@ -2952,11 +3267,13 @@ class R2Store(AbstractStore):
|
|
2952
3267
|
])
|
2953
3268
|
endpoint_url = cloudflare.create_endpoint()
|
2954
3269
|
src_dir_path = shlex.quote(src_dir_path)
|
3270
|
+
sub_path = (f'/{self._bucket_sub_path}'
|
3271
|
+
if self._bucket_sub_path else '')
|
2955
3272
|
sync_command = ('AWS_SHARED_CREDENTIALS_FILE='
|
2956
3273
|
f'{cloudflare.R2_CREDENTIALS_PATH} '
|
2957
3274
|
f'aws s3 sync --no-follow-symlinks {excludes} '
|
2958
3275
|
f'{src_dir_path} '
|
2959
|
-
f's3://{self.name}/{dest_dir_name} '
|
3276
|
+
f's3://{self.name}{sub_path}/{dest_dir_name} '
|
2960
3277
|
f'--endpoint {endpoint_url} '
|
2961
3278
|
f'--profile={cloudflare.R2_PROFILE_NAME}')
|
2962
3279
|
return sync_command
|
@@ -3087,11 +3404,9 @@ class R2Store(AbstractStore):
|
|
3087
3404
|
endpoint_url = cloudflare.create_endpoint()
|
3088
3405
|
r2_credential_path = cloudflare.R2_CREDENTIALS_PATH
|
3089
3406
|
r2_profile_name = cloudflare.R2_PROFILE_NAME
|
3090
|
-
mount_cmd = mounting_utils.get_r2_mount_cmd(
|
3091
|
-
|
3092
|
-
|
3093
|
-
self.bucket.name,
|
3094
|
-
mount_path)
|
3407
|
+
mount_cmd = mounting_utils.get_r2_mount_cmd(
|
3408
|
+
r2_credential_path, r2_profile_name, endpoint_url, self.bucket.name,
|
3409
|
+
mount_path, self._bucket_sub_path)
|
3095
3410
|
return mounting_utils.get_mounting_command(mount_path, install_cmd,
|
3096
3411
|
mount_cmd)
|
3097
3412
|
|
@@ -3124,6 +3439,43 @@ class R2Store(AbstractStore):
|
|
3124
3439
|
f'{self.name} but failed.') from e
|
3125
3440
|
return cloudflare.resource('s3').Bucket(bucket_name)
|
3126
3441
|
|
3442
|
+
def _execute_r2_remove_command(self, command: str, bucket_name: str,
|
3443
|
+
hint_operating: str,
|
3444
|
+
hint_failed: str) -> bool:
|
3445
|
+
try:
|
3446
|
+
with rich_utils.safe_status(
|
3447
|
+
ux_utils.spinner_message(hint_operating)):
|
3448
|
+
subprocess.check_output(command.split(' '),
|
3449
|
+
stderr=subprocess.STDOUT,
|
3450
|
+
shell=True)
|
3451
|
+
except subprocess.CalledProcessError as e:
|
3452
|
+
if 'NoSuchBucket' in e.output.decode('utf-8'):
|
3453
|
+
logger.debug(
|
3454
|
+
_BUCKET_EXTERNALLY_DELETED_DEBUG_MESSAGE.format(
|
3455
|
+
bucket_name=bucket_name))
|
3456
|
+
return False
|
3457
|
+
else:
|
3458
|
+
with ux_utils.print_exception_no_traceback():
|
3459
|
+
raise exceptions.StorageBucketDeleteError(
|
3460
|
+
f'{hint_failed}'
|
3461
|
+
f'Detailed error: {e.output}')
|
3462
|
+
return True
|
3463
|
+
|
3464
|
+
def _delete_r2_bucket_sub_path(self, bucket_name: str,
|
3465
|
+
sub_path: str) -> bool:
|
3466
|
+
"""Deletes the sub path from the bucket."""
|
3467
|
+
endpoint_url = cloudflare.create_endpoint()
|
3468
|
+
remove_command = (
|
3469
|
+
f'AWS_SHARED_CREDENTIALS_FILE={cloudflare.R2_CREDENTIALS_PATH} '
|
3470
|
+
f'aws s3 rm s3://{bucket_name}/{sub_path}/ --recursive '
|
3471
|
+
f'--endpoint {endpoint_url} '
|
3472
|
+
f'--profile={cloudflare.R2_PROFILE_NAME}')
|
3473
|
+
return self._execute_r2_remove_command(
|
3474
|
+
remove_command, bucket_name,
|
3475
|
+
f'Removing objects from R2 bucket {bucket_name}/{sub_path}',
|
3476
|
+
f'Failed to remove objects from R2 bucket {bucket_name}/{sub_path}.'
|
3477
|
+
)
|
3478
|
+
|
3127
3479
|
def _delete_r2_bucket(self, bucket_name: str) -> bool:
|
3128
3480
|
"""Deletes R2 bucket, including all objects in bucket
|
3129
3481
|
|
@@ -3132,6 +3484,9 @@ class R2Store(AbstractStore):
|
|
3132
3484
|
|
3133
3485
|
Returns:
|
3134
3486
|
bool; True if bucket was deleted, False if it was deleted externally.
|
3487
|
+
|
3488
|
+
Raises:
|
3489
|
+
StorageBucketDeleteError: If deleting the bucket fails.
|
3135
3490
|
"""
|
3136
3491
|
# Deleting objects is very slow programatically
|
3137
3492
|
# (i.e. bucket.objects.all().delete() is slow).
|
@@ -3146,24 +3501,12 @@ class R2Store(AbstractStore):
|
|
3146
3501
|
f'aws s3 rb s3://{bucket_name} --force '
|
3147
3502
|
f'--endpoint {endpoint_url} '
|
3148
3503
|
f'--profile={cloudflare.R2_PROFILE_NAME}')
|
3149
|
-
|
3150
|
-
|
3151
|
-
|
3152
|
-
|
3153
|
-
|
3154
|
-
|
3155
|
-
shell=True)
|
3156
|
-
except subprocess.CalledProcessError as e:
|
3157
|
-
if 'NoSuchBucket' in e.output.decode('utf-8'):
|
3158
|
-
logger.debug(
|
3159
|
-
_BUCKET_EXTERNALLY_DELETED_DEBUG_MESSAGE.format(
|
3160
|
-
bucket_name=bucket_name))
|
3161
|
-
return False
|
3162
|
-
else:
|
3163
|
-
with ux_utils.print_exception_no_traceback():
|
3164
|
-
raise exceptions.StorageBucketDeleteError(
|
3165
|
-
f'Failed to delete R2 bucket {bucket_name}.'
|
3166
|
-
f'Detailed error: {e.output}')
|
3504
|
+
|
3505
|
+
success = self._execute_r2_remove_command(
|
3506
|
+
remove_command, bucket_name, f'Deleting R2 bucket {bucket_name}',
|
3507
|
+
f'Failed to delete R2 bucket {bucket_name}.')
|
3508
|
+
if not success:
|
3509
|
+
return False
|
3167
3510
|
|
3168
3511
|
# Wait until bucket deletion propagates on AWS servers
|
3169
3512
|
while data_utils.verify_r2_bucket(bucket_name):
|
@@ -3182,11 +3525,12 @@ class IBMCosStore(AbstractStore):
|
|
3182
3525
|
source: str,
|
3183
3526
|
region: Optional[str] = 'us-east',
|
3184
3527
|
is_sky_managed: Optional[bool] = None,
|
3185
|
-
sync_on_reconstruction: bool = True
|
3528
|
+
sync_on_reconstruction: bool = True,
|
3529
|
+
_bucket_sub_path: Optional[str] = None):
|
3186
3530
|
self.client: 'storage.Client'
|
3187
3531
|
self.bucket: 'StorageHandle'
|
3188
3532
|
super().__init__(name, source, region, is_sky_managed,
|
3189
|
-
sync_on_reconstruction)
|
3533
|
+
sync_on_reconstruction, _bucket_sub_path)
|
3190
3534
|
self.bucket_rclone_profile = \
|
3191
3535
|
Rclone.generate_rclone_bucket_profile_name(
|
3192
3536
|
self.name, Rclone.RcloneClouds.IBM)
|
@@ -3331,10 +3675,22 @@ class IBMCosStore(AbstractStore):
|
|
3331
3675
|
f'Upload failed for store {self.name}') from e
|
3332
3676
|
|
3333
3677
|
def delete(self) -> None:
|
3678
|
+
if self._bucket_sub_path is not None and not self.is_sky_managed:
|
3679
|
+
return self._delete_sub_path()
|
3680
|
+
|
3334
3681
|
self._delete_cos_bucket()
|
3335
3682
|
logger.info(f'{colorama.Fore.GREEN}Deleted COS bucket {self.name}.'
|
3336
3683
|
f'{colorama.Style.RESET_ALL}')
|
3337
3684
|
|
3685
|
+
def _delete_sub_path(self) -> None:
|
3686
|
+
assert self._bucket_sub_path is not None, 'bucket_sub_path is not set'
|
3687
|
+
bucket = self.s3_resource.Bucket(self.name)
|
3688
|
+
try:
|
3689
|
+
self._delete_cos_bucket_objects(bucket, self._bucket_sub_path + '/')
|
3690
|
+
except ibm.ibm_botocore.exceptions.ClientError as e:
|
3691
|
+
if e.__class__.__name__ == 'NoSuchBucket':
|
3692
|
+
logger.debug('bucket already removed')
|
3693
|
+
|
3338
3694
|
def get_handle(self) -> StorageHandle:
|
3339
3695
|
return self.s3_resource.Bucket(self.name)
|
3340
3696
|
|
@@ -3375,10 +3731,13 @@ class IBMCosStore(AbstractStore):
|
|
3375
3731
|
# .git directory is excluded from the sync
|
3376
3732
|
# wrapping src_dir_path with "" to support path with spaces
|
3377
3733
|
src_dir_path = shlex.quote(src_dir_path)
|
3734
|
+
sub_path = (f'/{self._bucket_sub_path}'
|
3735
|
+
if self._bucket_sub_path else '')
|
3378
3736
|
sync_command = (
|
3379
3737
|
'rclone copy --exclude ".git/*" '
|
3380
3738
|
f'{src_dir_path} '
|
3381
|
-
f'{self.bucket_rclone_profile}:{self.name}
|
3739
|
+
f'{self.bucket_rclone_profile}:{self.name}{sub_path}'
|
3740
|
+
f'/{dest_dir_name}')
|
3382
3741
|
return sync_command
|
3383
3742
|
|
3384
3743
|
def get_file_sync_command(base_dir_path, file_names) -> str:
|
@@ -3404,9 +3763,12 @@ class IBMCosStore(AbstractStore):
|
|
3404
3763
|
for file_name in file_names
|
3405
3764
|
])
|
3406
3765
|
base_dir_path = shlex.quote(base_dir_path)
|
3407
|
-
|
3408
|
-
|
3409
|
-
|
3766
|
+
sub_path = (f'/{self._bucket_sub_path}'
|
3767
|
+
if self._bucket_sub_path else '')
|
3768
|
+
sync_command = (
|
3769
|
+
'rclone copy '
|
3770
|
+
f'{includes} {base_dir_path} '
|
3771
|
+
f'{self.bucket_rclone_profile}:{self.name}{sub_path}')
|
3410
3772
|
return sync_command
|
3411
3773
|
|
3412
3774
|
# Generate message for upload
|
@@ -3491,6 +3853,7 @@ class IBMCosStore(AbstractStore):
|
|
3491
3853
|
Rclone.RcloneClouds.IBM,
|
3492
3854
|
self.region, # type: ignore
|
3493
3855
|
)
|
3856
|
+
|
3494
3857
|
if not bucket_region and self.sync_on_reconstruction:
|
3495
3858
|
# bucket doesn't exist
|
3496
3859
|
return self._create_cos_bucket(self.name, self.region), True
|
@@ -3537,7 +3900,8 @@ class IBMCosStore(AbstractStore):
|
|
3537
3900
|
Rclone.RCLONE_CONFIG_PATH,
|
3538
3901
|
self.bucket_rclone_profile,
|
3539
3902
|
self.bucket.name,
|
3540
|
-
mount_path
|
3903
|
+
mount_path,
|
3904
|
+
self._bucket_sub_path)
|
3541
3905
|
return mounting_utils.get_mounting_command(mount_path, install_cmd,
|
3542
3906
|
mount_cmd)
|
3543
3907
|
|
@@ -3575,18 +3939,485 @@ class IBMCosStore(AbstractStore):
|
|
3575
3939
|
|
3576
3940
|
return self.bucket
|
3577
3941
|
|
3578
|
-
def
|
3579
|
-
|
3580
|
-
|
3581
|
-
|
3582
|
-
|
3942
|
+
def _delete_cos_bucket_objects(self,
|
3943
|
+
bucket: Any,
|
3944
|
+
prefix: Optional[str] = None) -> None:
|
3945
|
+
bucket_versioning = self.s3_resource.BucketVersioning(bucket.name)
|
3946
|
+
if bucket_versioning.status == 'Enabled':
|
3947
|
+
if prefix is not None:
|
3948
|
+
res = list(
|
3949
|
+
bucket.object_versions.filter(Prefix=prefix).delete())
|
3950
|
+
else:
|
3583
3951
|
res = list(bucket.object_versions.delete())
|
3952
|
+
else:
|
3953
|
+
if prefix is not None:
|
3954
|
+
res = list(bucket.objects.filter(Prefix=prefix).delete())
|
3584
3955
|
else:
|
3585
3956
|
res = list(bucket.objects.delete())
|
3586
|
-
|
3957
|
+
logger.debug(f'Deleted bucket\'s content:\n{res}, prefix: {prefix}')
|
3958
|
+
|
3959
|
+
def _delete_cos_bucket(self) -> None:
|
3960
|
+
bucket = self.s3_resource.Bucket(self.name)
|
3961
|
+
try:
|
3962
|
+
self._delete_cos_bucket_objects(bucket)
|
3587
3963
|
bucket.delete()
|
3588
3964
|
bucket.wait_until_not_exists()
|
3589
3965
|
except ibm.ibm_botocore.exceptions.ClientError as e:
|
3590
3966
|
if e.__class__.__name__ == 'NoSuchBucket':
|
3591
3967
|
logger.debug('bucket already removed')
|
3592
3968
|
Rclone.delete_rclone_bucket_profile(self.name, Rclone.RcloneClouds.IBM)
|
3969
|
+
|
3970
|
+
|
3971
|
+
class OciStore(AbstractStore):
|
3972
|
+
"""OciStore inherits from Storage Object and represents the backend
|
3973
|
+
for OCI buckets.
|
3974
|
+
"""
|
3975
|
+
|
3976
|
+
_ACCESS_DENIED_MESSAGE = 'AccessDeniedException'
|
3977
|
+
|
3978
|
+
def __init__(self,
|
3979
|
+
name: str,
|
3980
|
+
source: Optional[SourceType],
|
3981
|
+
region: Optional[str] = None,
|
3982
|
+
is_sky_managed: Optional[bool] = None,
|
3983
|
+
sync_on_reconstruction: Optional[bool] = True,
|
3984
|
+
_bucket_sub_path: Optional[str] = None):
|
3985
|
+
self.client: Any
|
3986
|
+
self.bucket: StorageHandle
|
3987
|
+
self.oci_config_file: str
|
3988
|
+
self.config_profile: str
|
3989
|
+
self.compartment: str
|
3990
|
+
self.namespace: str
|
3991
|
+
|
3992
|
+
# Region is from the specified name in <bucket>@<region> format.
|
3993
|
+
# Another case is name can also be set by the source, for example:
|
3994
|
+
# /datasets-storage:
|
3995
|
+
# source: oci://RAGData@us-sanjose-1
|
3996
|
+
# The name in above mount will be set to RAGData@us-sanjose-1
|
3997
|
+
region_in_name = None
|
3998
|
+
if name is not None and '@' in name:
|
3999
|
+
self._validate_bucket_expr(name)
|
4000
|
+
name, region_in_name = name.split('@')
|
4001
|
+
|
4002
|
+
# Region is from the specified source in oci://<bucket>@<region> format
|
4003
|
+
region_in_source = None
|
4004
|
+
if isinstance(source,
|
4005
|
+
str) and source.startswith('oci://') and '@' in source:
|
4006
|
+
self._validate_bucket_expr(source)
|
4007
|
+
source, region_in_source = source.split('@')
|
4008
|
+
|
4009
|
+
if region_in_name is not None and region_in_source is not None:
|
4010
|
+
# This should never happen because name and source will never be
|
4011
|
+
# the remote bucket at the same time.
|
4012
|
+
assert region_in_name == region_in_source, (
|
4013
|
+
f'Mismatch region specified. Region in name {region_in_name}, '
|
4014
|
+
f'but region in source is {region_in_source}')
|
4015
|
+
|
4016
|
+
if region_in_name is not None:
|
4017
|
+
region = region_in_name
|
4018
|
+
elif region_in_source is not None:
|
4019
|
+
region = region_in_source
|
4020
|
+
|
4021
|
+
# Default region set to what specified in oci config.
|
4022
|
+
if region is None:
|
4023
|
+
region = oci.get_oci_config()['region']
|
4024
|
+
|
4025
|
+
# So far from now on, the name and source are canonical, means there
|
4026
|
+
# is no region (@<region> suffix) associated with them anymore.
|
4027
|
+
|
4028
|
+
super().__init__(name, source, region, is_sky_managed,
|
4029
|
+
sync_on_reconstruction, _bucket_sub_path)
|
4030
|
+
# TODO(zpoint): add _bucket_sub_path to the sync/mount/delete commands
|
4031
|
+
|
4032
|
+
def _validate_bucket_expr(self, bucket_expr: str):
|
4033
|
+
pattern = r'^(\w+://)?[A-Za-z0-9-._]+(@\w{2}-\w+-\d{1})$'
|
4034
|
+
if not re.match(pattern, bucket_expr):
|
4035
|
+
raise ValueError(
|
4036
|
+
'The format for the bucket portion is <bucket>@<region> '
|
4037
|
+
'when specify a region with a bucket.')
|
4038
|
+
|
4039
|
+
def _validate(self):
|
4040
|
+
if self.source is not None and isinstance(self.source, str):
|
4041
|
+
if self.source.startswith('oci://'):
|
4042
|
+
assert self.name == data_utils.split_oci_path(self.source)[0], (
|
4043
|
+
'OCI Bucket is specified as path, the name should be '
|
4044
|
+
'the same as OCI bucket.')
|
4045
|
+
elif not re.search(r'^\w+://', self.source):
|
4046
|
+
# Treat it as local path.
|
4047
|
+
pass
|
4048
|
+
else:
|
4049
|
+
raise NotImplementedError(
|
4050
|
+
f'Moving data from {self.source} to OCI is not supported.')
|
4051
|
+
|
4052
|
+
# Validate name
|
4053
|
+
self.name = self.validate_name(self.name)
|
4054
|
+
# Check if the storage is enabled
|
4055
|
+
if not _is_storage_cloud_enabled(str(clouds.OCI())):
|
4056
|
+
with ux_utils.print_exception_no_traceback():
|
4057
|
+
raise exceptions.ResourcesUnavailableError(
|
4058
|
+
'Storage \'store: oci\' specified, but ' \
|
4059
|
+
'OCI access is disabled. To fix, enable '\
|
4060
|
+
'OCI by running `sky check`. '\
|
4061
|
+
'More info: https://skypilot.readthedocs.io/en/latest/getting-started/installation.html.' # pylint: disable=line-too-long
|
4062
|
+
)
|
4063
|
+
|
4064
|
+
@classmethod
|
4065
|
+
def validate_name(cls, name) -> str:
|
4066
|
+
"""Validates the name of the OCI store.
|
4067
|
+
|
4068
|
+
Source for rules: https://docs.oracle.com/en-us/iaas/Content/Object/Tasks/managingbuckets.htm#Managing_Buckets # pylint: disable=line-too-long
|
4069
|
+
"""
|
4070
|
+
|
4071
|
+
def _raise_no_traceback_name_error(err_str):
|
4072
|
+
with ux_utils.print_exception_no_traceback():
|
4073
|
+
raise exceptions.StorageNameError(err_str)
|
4074
|
+
|
4075
|
+
if name is not None and isinstance(name, str):
|
4076
|
+
# Check for overall length
|
4077
|
+
if not 1 <= len(name) <= 256:
|
4078
|
+
_raise_no_traceback_name_error(
|
4079
|
+
f'Invalid store name: name {name} must contain 1-256 '
|
4080
|
+
'characters.')
|
4081
|
+
|
4082
|
+
# Check for valid characters and start/end with a number or letter
|
4083
|
+
pattern = r'^[A-Za-z0-9-._]+$'
|
4084
|
+
if not re.match(pattern, name):
|
4085
|
+
_raise_no_traceback_name_error(
|
4086
|
+
f'Invalid store name: name {name} can only contain '
|
4087
|
+
'upper or lower case letters, numeric characters, hyphens '
|
4088
|
+
'(-), underscores (_), and dots (.). Spaces are not '
|
4089
|
+
'allowed. Names must start and end with a number or '
|
4090
|
+
'letter.')
|
4091
|
+
else:
|
4092
|
+
_raise_no_traceback_name_error('Store name must be specified.')
|
4093
|
+
return name
|
4094
|
+
|
4095
|
+
def initialize(self):
|
4096
|
+
"""Initializes the OCI store object on the cloud.
|
4097
|
+
|
4098
|
+
Initialization involves fetching bucket if exists, or creating it if
|
4099
|
+
it does not.
|
4100
|
+
|
4101
|
+
Raises:
|
4102
|
+
StorageBucketCreateError: If bucket creation fails
|
4103
|
+
StorageBucketGetError: If fetching existing bucket fails
|
4104
|
+
StorageInitError: If general initialization fails.
|
4105
|
+
"""
|
4106
|
+
# pylint: disable=import-outside-toplevel
|
4107
|
+
from sky.clouds.utils import oci_utils
|
4108
|
+
from sky.provision.oci.query_utils import query_helper
|
4109
|
+
|
4110
|
+
self.oci_config_file = oci.get_config_file()
|
4111
|
+
self.config_profile = oci_utils.oci_config.get_profile()
|
4112
|
+
|
4113
|
+
## pylint: disable=line-too-long
|
4114
|
+
# What's compartment? See thttps://docs.oracle.com/en/cloud/foundation/cloud_architecture/governance/compartments.html
|
4115
|
+
self.compartment = query_helper.find_compartment(self.region)
|
4116
|
+
self.client = oci.get_object_storage_client(region=self.region,
|
4117
|
+
profile=self.config_profile)
|
4118
|
+
self.namespace = self.client.get_namespace(
|
4119
|
+
compartment_id=oci.get_oci_config()['tenancy']).data
|
4120
|
+
|
4121
|
+
self.bucket, is_new_bucket = self._get_bucket()
|
4122
|
+
if self.is_sky_managed is None:
|
4123
|
+
# If is_sky_managed is not specified, then this is a new storage
|
4124
|
+
# object (i.e., did not exist in global_user_state) and we should
|
4125
|
+
# set the is_sky_managed property.
|
4126
|
+
# If is_sky_managed is specified, then we take no action.
|
4127
|
+
self.is_sky_managed = is_new_bucket
|
4128
|
+
|
4129
|
+
def upload(self):
|
4130
|
+
"""Uploads source to store bucket.
|
4131
|
+
|
4132
|
+
Upload must be called by the Storage handler - it is not called on
|
4133
|
+
Store initialization.
|
4134
|
+
|
4135
|
+
Raises:
|
4136
|
+
StorageUploadError: if upload fails.
|
4137
|
+
"""
|
4138
|
+
try:
|
4139
|
+
if isinstance(self.source, list):
|
4140
|
+
self.batch_oci_rsync(self.source, create_dirs=True)
|
4141
|
+
elif self.source is not None:
|
4142
|
+
if self.source.startswith('oci://'):
|
4143
|
+
pass
|
4144
|
+
else:
|
4145
|
+
self.batch_oci_rsync([self.source])
|
4146
|
+
except exceptions.StorageUploadError:
|
4147
|
+
raise
|
4148
|
+
except Exception as e:
|
4149
|
+
raise exceptions.StorageUploadError(
|
4150
|
+
f'Upload failed for store {self.name}') from e
|
4151
|
+
|
4152
|
+
def delete(self) -> None:
|
4153
|
+
deleted_by_skypilot = self._delete_oci_bucket(self.name)
|
4154
|
+
if deleted_by_skypilot:
|
4155
|
+
msg_str = f'Deleted OCI bucket {self.name}.'
|
4156
|
+
else:
|
4157
|
+
msg_str = (f'OCI bucket {self.name} may have been deleted '
|
4158
|
+
f'externally. Removing from local state.')
|
4159
|
+
logger.info(f'{colorama.Fore.GREEN}{msg_str}'
|
4160
|
+
f'{colorama.Style.RESET_ALL}')
|
4161
|
+
|
4162
|
+
def get_handle(self) -> StorageHandle:
|
4163
|
+
return self.client.get_bucket(namespace_name=self.namespace,
|
4164
|
+
bucket_name=self.name).data
|
4165
|
+
|
4166
|
+
def batch_oci_rsync(self,
|
4167
|
+
source_path_list: List[Path],
|
4168
|
+
create_dirs: bool = False) -> None:
|
4169
|
+
"""Invokes oci sync to batch upload a list of local paths to Bucket
|
4170
|
+
|
4171
|
+
Use OCI bulk operation to batch process the file upload
|
4172
|
+
|
4173
|
+
Args:
|
4174
|
+
source_path_list: List of paths to local files or directories
|
4175
|
+
create_dirs: If the local_path is a directory and this is set to
|
4176
|
+
False, the contents of the directory are directly uploaded to
|
4177
|
+
root of the bucket. If the local_path is a directory and this is
|
4178
|
+
set to True, the directory is created in the bucket root and
|
4179
|
+
contents are uploaded to it.
|
4180
|
+
"""
|
4181
|
+
|
4182
|
+
@oci.with_oci_env
|
4183
|
+
def get_file_sync_command(base_dir_path, file_names):
|
4184
|
+
includes = ' '.join(
|
4185
|
+
[f'--include "{file_name}"' for file_name in file_names])
|
4186
|
+
sync_command = (
|
4187
|
+
'oci os object bulk-upload --no-follow-symlinks --overwrite '
|
4188
|
+
f'--bucket-name {self.name} --namespace-name {self.namespace} '
|
4189
|
+
f'--region {self.region} --src-dir "{base_dir_path}" '
|
4190
|
+
f'{includes}')
|
4191
|
+
|
4192
|
+
return sync_command
|
4193
|
+
|
4194
|
+
@oci.with_oci_env
|
4195
|
+
def get_dir_sync_command(src_dir_path, dest_dir_name):
|
4196
|
+
if dest_dir_name and not str(dest_dir_name).endswith('/'):
|
4197
|
+
dest_dir_name = f'{dest_dir_name}/'
|
4198
|
+
|
4199
|
+
excluded_list = storage_utils.get_excluded_files(src_dir_path)
|
4200
|
+
excluded_list.append('.git/*')
|
4201
|
+
excludes = ' '.join([
|
4202
|
+
f'--exclude {shlex.quote(file_name)}'
|
4203
|
+
for file_name in excluded_list
|
4204
|
+
])
|
4205
|
+
|
4206
|
+
# we exclude .git directory from the sync
|
4207
|
+
sync_command = (
|
4208
|
+
'oci os object bulk-upload --no-follow-symlinks --overwrite '
|
4209
|
+
f'--bucket-name {self.name} --namespace-name {self.namespace} '
|
4210
|
+
f'--region {self.region} --object-prefix "{dest_dir_name}" '
|
4211
|
+
f'--src-dir "{src_dir_path}" {excludes}')
|
4212
|
+
|
4213
|
+
return sync_command
|
4214
|
+
|
4215
|
+
# Generate message for upload
|
4216
|
+
if len(source_path_list) > 1:
|
4217
|
+
source_message = f'{len(source_path_list)} paths'
|
4218
|
+
else:
|
4219
|
+
source_message = source_path_list[0]
|
4220
|
+
|
4221
|
+
log_path = sky_logging.generate_tmp_logging_file_path(
|
4222
|
+
_STORAGE_LOG_FILE_NAME)
|
4223
|
+
sync_path = f'{source_message} -> oci://{self.name}/'
|
4224
|
+
with rich_utils.safe_status(
|
4225
|
+
ux_utils.spinner_message(f'Syncing {sync_path}',
|
4226
|
+
log_path=log_path)):
|
4227
|
+
data_utils.parallel_upload(
|
4228
|
+
source_path_list=source_path_list,
|
4229
|
+
filesync_command_generator=get_file_sync_command,
|
4230
|
+
dirsync_command_generator=get_dir_sync_command,
|
4231
|
+
log_path=log_path,
|
4232
|
+
bucket_name=self.name,
|
4233
|
+
access_denied_message=self._ACCESS_DENIED_MESSAGE,
|
4234
|
+
create_dirs=create_dirs,
|
4235
|
+
max_concurrent_uploads=1)
|
4236
|
+
|
4237
|
+
logger.info(
|
4238
|
+
ux_utils.finishing_message(f'Storage synced: {sync_path}',
|
4239
|
+
log_path))
|
4240
|
+
|
4241
|
+
def _get_bucket(self) -> Tuple[StorageHandle, bool]:
|
4242
|
+
"""Obtains the OCI bucket.
|
4243
|
+
If the bucket exists, this method will connect to the bucket.
|
4244
|
+
|
4245
|
+
If the bucket does not exist, there are three cases:
|
4246
|
+
1) Raise an error if the bucket source starts with oci://
|
4247
|
+
2) Return None if bucket has been externally deleted and
|
4248
|
+
sync_on_reconstruction is False
|
4249
|
+
3) Create and return a new bucket otherwise
|
4250
|
+
|
4251
|
+
Return tuple (Bucket, Boolean): The first item is the bucket
|
4252
|
+
json payload from the OCI API call, the second item indicates
|
4253
|
+
if this is a new created bucket(True) or an existing bucket(False).
|
4254
|
+
|
4255
|
+
Raises:
|
4256
|
+
StorageBucketCreateError: If creating the bucket fails
|
4257
|
+
StorageBucketGetError: If fetching a bucket fails
|
4258
|
+
"""
|
4259
|
+
try:
|
4260
|
+
get_bucket_response = self.client.get_bucket(
|
4261
|
+
namespace_name=self.namespace, bucket_name=self.name)
|
4262
|
+
bucket = get_bucket_response.data
|
4263
|
+
return bucket, False
|
4264
|
+
except oci.service_exception() as e:
|
4265
|
+
if e.status == 404: # Not Found
|
4266
|
+
if isinstance(self.source,
|
4267
|
+
str) and self.source.startswith('oci://'):
|
4268
|
+
with ux_utils.print_exception_no_traceback():
|
4269
|
+
raise exceptions.StorageBucketGetError(
|
4270
|
+
'Attempted to connect to a non-existent bucket: '
|
4271
|
+
f'{self.source}') from e
|
4272
|
+
else:
|
4273
|
+
# If bucket cannot be found (i.e., does not exist), it is
|
4274
|
+
# to be created by Sky. However, creation is skipped if
|
4275
|
+
# Store object is being reconstructed for deletion.
|
4276
|
+
if self.sync_on_reconstruction:
|
4277
|
+
bucket = self._create_oci_bucket(self.name)
|
4278
|
+
return bucket, True
|
4279
|
+
else:
|
4280
|
+
return None, False
|
4281
|
+
elif e.status == 401: # Unauthorized
|
4282
|
+
# AccessDenied error for buckets that are private and not
|
4283
|
+
# owned by user.
|
4284
|
+
command = (
|
4285
|
+
f'oci os object list --namespace-name {self.namespace} '
|
4286
|
+
f'--bucket-name {self.name}')
|
4287
|
+
with ux_utils.print_exception_no_traceback():
|
4288
|
+
raise exceptions.StorageBucketGetError(
|
4289
|
+
_BUCKET_FAIL_TO_CONNECT_MESSAGE.format(name=self.name) +
|
4290
|
+
f' To debug, consider running `{command}`.') from e
|
4291
|
+
else:
|
4292
|
+
# Unknown / unexpected error happened. This might happen when
|
4293
|
+
# Object storage service itself functions not normal (e.g.
|
4294
|
+
# maintainance event causes internal server error or request
|
4295
|
+
# timeout, etc).
|
4296
|
+
with ux_utils.print_exception_no_traceback():
|
4297
|
+
raise exceptions.StorageBucketGetError(
|
4298
|
+
f'Failed to connect to OCI bucket {self.name}') from e
|
4299
|
+
|
4300
|
+
def mount_command(self, mount_path: str) -> str:
|
4301
|
+
"""Returns the command to mount the bucket to the mount_path.
|
4302
|
+
|
4303
|
+
Uses Rclone to mount the bucket.
|
4304
|
+
|
4305
|
+
Args:
|
4306
|
+
mount_path: str; Path to mount the bucket to.
|
4307
|
+
"""
|
4308
|
+
install_cmd = mounting_utils.get_rclone_install_cmd()
|
4309
|
+
mount_cmd = mounting_utils.get_oci_mount_cmd(
|
4310
|
+
mount_path=mount_path,
|
4311
|
+
store_name=self.name,
|
4312
|
+
region=str(self.region),
|
4313
|
+
namespace=self.namespace,
|
4314
|
+
compartment=self.bucket.compartment_id,
|
4315
|
+
config_file=self.oci_config_file,
|
4316
|
+
config_profile=self.config_profile)
|
4317
|
+
version_check_cmd = mounting_utils.get_rclone_version_check_cmd()
|
4318
|
+
|
4319
|
+
return mounting_utils.get_mounting_command(mount_path, install_cmd,
|
4320
|
+
mount_cmd, version_check_cmd)
|
4321
|
+
|
4322
|
+
def _download_file(self, remote_path: str, local_path: str) -> None:
|
4323
|
+
"""Downloads file from remote to local on OCI bucket
|
4324
|
+
|
4325
|
+
Args:
|
4326
|
+
remote_path: str; Remote path on OCI bucket
|
4327
|
+
local_path: str; Local path on user's device
|
4328
|
+
"""
|
4329
|
+
if remote_path.startswith(f'/{self.name}'):
|
4330
|
+
# If the remote path is /bucket_name, we need to
|
4331
|
+
# remove the leading /
|
4332
|
+
remote_path = remote_path.lstrip('/')
|
4333
|
+
|
4334
|
+
filename = os.path.basename(remote_path)
|
4335
|
+
if not local_path.endswith(filename):
|
4336
|
+
local_path = os.path.join(local_path, filename)
|
4337
|
+
|
4338
|
+
@oci.with_oci_env
|
4339
|
+
def get_file_download_command(remote_path, local_path):
|
4340
|
+
download_command = (f'oci os object get --bucket-name {self.name} '
|
4341
|
+
f'--namespace-name {self.namespace} '
|
4342
|
+
f'--region {self.region} --name {remote_path} '
|
4343
|
+
f'--file {local_path}')
|
4344
|
+
|
4345
|
+
return download_command
|
4346
|
+
|
4347
|
+
download_command = get_file_download_command(remote_path, local_path)
|
4348
|
+
|
4349
|
+
try:
|
4350
|
+
with rich_utils.safe_status(
|
4351
|
+
f'[bold cyan]Downloading: {remote_path} -> {local_path}[/]'
|
4352
|
+
):
|
4353
|
+
subprocess.check_output(download_command,
|
4354
|
+
stderr=subprocess.STDOUT,
|
4355
|
+
shell=True)
|
4356
|
+
except subprocess.CalledProcessError as e:
|
4357
|
+
logger.error(f'Download failed: {remote_path} -> {local_path}.\n'
|
4358
|
+
f'Detail errors: {e.output}')
|
4359
|
+
with ux_utils.print_exception_no_traceback():
|
4360
|
+
raise exceptions.StorageBucketDeleteError(
|
4361
|
+
f'Failed download file {self.name}:{remote_path}.') from e
|
4362
|
+
|
4363
|
+
def _create_oci_bucket(self, bucket_name: str) -> StorageHandle:
|
4364
|
+
"""Creates OCI bucket with specific name in specific region
|
4365
|
+
|
4366
|
+
Args:
|
4367
|
+
bucket_name: str; Name of bucket
|
4368
|
+
region: str; Region name, e.g. us-central1, us-west1
|
4369
|
+
"""
|
4370
|
+
logger.debug(f'_create_oci_bucket: {bucket_name}')
|
4371
|
+
try:
|
4372
|
+
create_bucket_response = self.client.create_bucket(
|
4373
|
+
namespace_name=self.namespace,
|
4374
|
+
create_bucket_details=oci.oci.object_storage.models.
|
4375
|
+
CreateBucketDetails(
|
4376
|
+
name=bucket_name,
|
4377
|
+
compartment_id=self.compartment,
|
4378
|
+
))
|
4379
|
+
bucket = create_bucket_response.data
|
4380
|
+
return bucket
|
4381
|
+
except oci.service_exception() as e:
|
4382
|
+
with ux_utils.print_exception_no_traceback():
|
4383
|
+
raise exceptions.StorageBucketCreateError(
|
4384
|
+
f'Failed to create OCI bucket: {self.name}') from e
|
4385
|
+
|
4386
|
+
def _delete_oci_bucket(self, bucket_name: str) -> bool:
|
4387
|
+
"""Deletes OCI bucket, including all objects in bucket
|
4388
|
+
|
4389
|
+
Args:
|
4390
|
+
bucket_name: str; Name of bucket
|
4391
|
+
|
4392
|
+
Returns:
|
4393
|
+
bool; True if bucket was deleted, False if it was deleted externally.
|
4394
|
+
"""
|
4395
|
+
logger.debug(f'_delete_oci_bucket: {bucket_name}')
|
4396
|
+
|
4397
|
+
@oci.with_oci_env
|
4398
|
+
def get_bucket_delete_command(bucket_name):
|
4399
|
+
remove_command = (f'oci os bucket delete --bucket-name '
|
4400
|
+
f'--region {self.region} '
|
4401
|
+
f'{bucket_name} --empty --force')
|
4402
|
+
|
4403
|
+
return remove_command
|
4404
|
+
|
4405
|
+
remove_command = get_bucket_delete_command(bucket_name)
|
4406
|
+
|
4407
|
+
try:
|
4408
|
+
with rich_utils.safe_status(
|
4409
|
+
f'[bold cyan]Deleting OCI bucket {bucket_name}[/]'):
|
4410
|
+
subprocess.check_output(remove_command.split(' '),
|
4411
|
+
stderr=subprocess.STDOUT)
|
4412
|
+
except subprocess.CalledProcessError as e:
|
4413
|
+
if 'BucketNotFound' in e.output.decode('utf-8'):
|
4414
|
+
logger.debug(
|
4415
|
+
_BUCKET_EXTERNALLY_DELETED_DEBUG_MESSAGE.format(
|
4416
|
+
bucket_name=bucket_name))
|
4417
|
+
return False
|
4418
|
+
else:
|
4419
|
+
logger.error(e.output)
|
4420
|
+
with ux_utils.print_exception_no_traceback():
|
4421
|
+
raise exceptions.StorageBucketDeleteError(
|
4422
|
+
f'Failed to delete OCI bucket {bucket_name}.')
|
4423
|
+
return True
|