skypilot-nightly 1.0.0.dev20250320__py3-none-any.whl → 1.0.0.dev20250322__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. sky/__init__.py +2 -2
  2. sky/adaptors/cloudflare.py +16 -5
  3. sky/adaptors/kubernetes.py +2 -1
  4. sky/adaptors/nebius.py +128 -6
  5. sky/backends/cloud_vm_ray_backend.py +3 -1
  6. sky/benchmark/benchmark_utils.py +3 -2
  7. sky/check.py +114 -114
  8. sky/cloud_stores.py +66 -0
  9. sky/clouds/aws.py +14 -7
  10. sky/clouds/azure.py +13 -6
  11. sky/clouds/cloud.py +34 -10
  12. sky/clouds/cudo.py +3 -2
  13. sky/clouds/do.py +3 -2
  14. sky/clouds/fluidstack.py +3 -2
  15. sky/clouds/gcp.py +8 -9
  16. sky/clouds/ibm.py +15 -6
  17. sky/clouds/kubernetes.py +3 -1
  18. sky/clouds/lambda_cloud.py +3 -1
  19. sky/clouds/nebius.py +59 -11
  20. sky/clouds/oci.py +15 -6
  21. sky/clouds/paperspace.py +3 -2
  22. sky/clouds/runpod.py +7 -1
  23. sky/clouds/scp.py +3 -1
  24. sky/clouds/service_catalog/kubernetes_catalog.py +3 -1
  25. sky/clouds/vast.py +3 -2
  26. sky/clouds/vsphere.py +3 -2
  27. sky/core.py +6 -4
  28. sky/data/data_transfer.py +75 -0
  29. sky/data/data_utils.py +34 -0
  30. sky/data/mounting_utils.py +18 -0
  31. sky/data/storage.py +540 -10
  32. sky/data/storage_utils.py +102 -84
  33. sky/exceptions.py +2 -0
  34. sky/global_user_state.py +12 -33
  35. sky/jobs/server/core.py +1 -1
  36. sky/jobs/utils.py +5 -0
  37. sky/optimizer.py +10 -5
  38. sky/resources.py +6 -1
  39. sky/setup_files/dependencies.py +3 -1
  40. sky/task.py +16 -5
  41. sky/utils/command_runner.py +2 -0
  42. sky/utils/controller_utils.py +8 -5
  43. sky/utils/kubernetes/gpu_labeler.py +4 -4
  44. sky/utils/kubernetes/kubernetes_deploy_utils.py +4 -3
  45. {skypilot_nightly-1.0.0.dev20250320.dist-info → skypilot_nightly-1.0.0.dev20250322.dist-info}/METADATA +16 -7
  46. {skypilot_nightly-1.0.0.dev20250320.dist-info → skypilot_nightly-1.0.0.dev20250322.dist-info}/RECORD +50 -50
  47. {skypilot_nightly-1.0.0.dev20250320.dist-info → skypilot_nightly-1.0.0.dev20250322.dist-info}/WHEEL +1 -1
  48. {skypilot_nightly-1.0.0.dev20250320.dist-info → skypilot_nightly-1.0.0.dev20250322.dist-info}/entry_points.txt +0 -0
  49. {skypilot_nightly-1.0.0.dev20250320.dist-info → skypilot_nightly-1.0.0.dev20250322.dist-info}/licenses/LICENSE +0 -0
  50. {skypilot_nightly-1.0.0.dev20250320.dist-info → skypilot_nightly-1.0.0.dev20250322.dist-info}/top_level.txt +0 -0
sky/data/storage.py CHANGED
@@ -23,7 +23,9 @@ from sky.adaptors import azure
23
23
  from sky.adaptors import cloudflare
24
24
  from sky.adaptors import gcp
25
25
  from sky.adaptors import ibm
26
+ from sky.adaptors import nebius
26
27
  from sky.adaptors import oci
28
+ from sky.clouds import cloud as sky_cloud
27
29
  from sky.data import data_transfer
28
30
  from sky.data import data_utils
29
31
  from sky.data import mounting_utils
@@ -57,6 +59,7 @@ STORE_ENABLED_CLOUDS: List[str] = [
57
59
  str(clouds.Azure()),
58
60
  str(clouds.IBM()),
59
61
  str(clouds.OCI()),
62
+ str(clouds.Nebius()),
60
63
  cloudflare.NAME,
61
64
  ]
62
65
 
@@ -78,11 +81,12 @@ _BUCKET_EXTERNALLY_DELETED_DEBUG_MESSAGE = (
78
81
  _STORAGE_LOG_FILE_NAME = 'storage_sync.log'
79
82
 
80
83
 
81
- def get_cached_enabled_storage_clouds_or_refresh(
84
+ def get_cached_enabled_storage_cloud_names_or_refresh(
82
85
  raise_if_no_cloud_access: bool = False) -> List[str]:
83
86
  # This is a temporary solution until https://github.com/skypilot-org/skypilot/issues/1943 # pylint: disable=line-too-long
84
87
  # is resolved by implementing separate 'enabled_storage_clouds'
85
- enabled_clouds = sky_check.get_cached_enabled_storage_clouds_or_refresh()
88
+ enabled_clouds = sky_check.get_cached_enabled_clouds_or_refresh(
89
+ sky_cloud.CloudCapability.STORAGE)
86
90
  enabled_clouds = [str(cloud) for cloud in enabled_clouds]
87
91
 
88
92
  r2_is_enabled, _ = cloudflare.check_storage_credentials()
@@ -97,13 +101,16 @@ def get_cached_enabled_storage_clouds_or_refresh(
97
101
 
98
102
  def _is_storage_cloud_enabled(cloud_name: str,
99
103
  try_fix_with_sky_check: bool = True) -> bool:
100
- enabled_storage_clouds = get_cached_enabled_storage_clouds_or_refresh()
101
- if cloud_name in enabled_storage_clouds:
104
+ enabled_storage_cloud_names = (
105
+ get_cached_enabled_storage_cloud_names_or_refresh())
106
+ if cloud_name in enabled_storage_cloud_names:
102
107
  return True
103
108
  if try_fix_with_sky_check:
104
109
  # TODO(zhwu): Only check the specified cloud to speed up.
105
- sky_check.check(quiet=True,
106
- capability=sky_check.CloudCapability.STORAGE)
110
+ sky_check.check_capability(
111
+ sky_cloud.CloudCapability.STORAGE,
112
+ quiet=True,
113
+ )
107
114
  return _is_storage_cloud_enabled(cloud_name,
108
115
  try_fix_with_sky_check=False)
109
116
  return False
@@ -117,6 +124,7 @@ class StoreType(enum.Enum):
117
124
  R2 = 'R2'
118
125
  IBM = 'IBM'
119
126
  OCI = 'OCI'
127
+ NEBIUS = 'NEBIUS'
120
128
 
121
129
  @classmethod
122
130
  def from_cloud(cls, cloud: str) -> 'StoreType':
@@ -132,6 +140,8 @@ class StoreType(enum.Enum):
132
140
  return StoreType.AZURE
133
141
  elif cloud.lower() == str(clouds.OCI()).lower():
134
142
  return StoreType.OCI
143
+ elif cloud.lower() == str(clouds.Nebius()).lower():
144
+ return StoreType.NEBIUS
135
145
  elif cloud.lower() == str(clouds.Lambda()).lower():
136
146
  with ux_utils.print_exception_no_traceback():
137
147
  raise ValueError('Lambda Cloud does not provide cloud storage.')
@@ -171,6 +181,8 @@ class StoreType(enum.Enum):
171
181
  return StoreType.IBM
172
182
  elif isinstance(store, OciStore):
173
183
  return StoreType.OCI
184
+ elif isinstance(store, NebiusStore):
185
+ return StoreType.NEBIUS
174
186
  else:
175
187
  with ux_utils.print_exception_no_traceback():
176
188
  raise ValueError(f'Unknown store type: {store}')
@@ -189,6 +201,9 @@ class StoreType(enum.Enum):
189
201
  return 'cos://'
190
202
  elif self == StoreType.OCI:
191
203
  return 'oci://'
204
+ # Nebius storages use 's3://' as a prefix for various aws cli commands
205
+ elif self == StoreType.NEBIUS:
206
+ return 's3://'
192
207
  else:
193
208
  with ux_utils.print_exception_no_traceback():
194
209
  raise ValueError(f'Unknown store type: {self}')
@@ -726,6 +741,8 @@ class Storage(object):
726
741
  self.add_store(StoreType.IBM)
727
742
  elif self.source.startswith('oci://'):
728
743
  self.add_store(StoreType.OCI)
744
+ elif self.source.startswith('nebius://'):
745
+ self.add_store(StoreType.NEBIUS)
729
746
 
730
747
  def get_bucket_sub_path_prefix(self, blob_path: str) -> str:
731
748
  """Adds the bucket sub path prefix to the blob path."""
@@ -812,7 +829,9 @@ class Storage(object):
812
829
  'using a bucket by writing <destination_path>: '
813
830
  f'{source} in the file_mounts section of your YAML')
814
831
  is_local_source = True
815
- elif split_path.scheme in ['s3', 'gs', 'https', 'r2', 'cos', 'oci']:
832
+ elif split_path.scheme in [
833
+ 's3', 'gs', 'https', 'r2', 'cos', 'oci', 'nebius'
834
+ ]:
816
835
  is_local_source = False
817
836
  # Storage mounting does not support mounting specific files from
818
837
  # cloud store - ensure path points to only a directory
@@ -836,7 +855,7 @@ class Storage(object):
836
855
  with ux_utils.print_exception_no_traceback():
837
856
  raise exceptions.StorageSourceError(
838
857
  f'Supported paths: local, s3://, gs://, https://, '
839
- f'r2://, cos://, oci://. Got: {source}')
858
+ f'r2://, cos://, oci://, nebius://. Got: {source}')
840
859
  return source, is_local_source
841
860
 
842
861
  def _validate_storage_spec(self, name: Optional[str]) -> None:
@@ -851,7 +870,7 @@ class Storage(object):
851
870
  """
852
871
  prefix = name.split('://')[0]
853
872
  prefix = prefix.lower()
854
- if prefix in ['s3', 'gs', 'https', 'r2', 'cos', 'oci']:
873
+ if prefix in ['s3', 'gs', 'https', 'r2', 'cos', 'oci', 'nebius']:
855
874
  with ux_utils.print_exception_no_traceback():
856
875
  raise exceptions.StorageNameError(
857
876
  'Prefix detected: `name` cannot start with '
@@ -977,6 +996,12 @@ class Storage(object):
977
996
  source=self.source,
978
997
  sync_on_reconstruction=self.sync_on_reconstruction,
979
998
  _bucket_sub_path=self._bucket_sub_path)
999
+ elif s_type == StoreType.NEBIUS:
1000
+ store = NebiusStore.from_metadata(
1001
+ s_metadata,
1002
+ source=self.source,
1003
+ sync_on_reconstruction=self.sync_on_reconstruction,
1004
+ _bucket_sub_path=self._bucket_sub_path)
980
1005
  else:
981
1006
  with ux_utils.print_exception_no_traceback():
982
1007
  raise ValueError(f'Unknown store type: {s_type}')
@@ -1071,6 +1096,8 @@ class Storage(object):
1071
1096
  store_cls = IBMCosStore
1072
1097
  elif store_type == StoreType.OCI:
1073
1098
  store_cls = OciStore
1099
+ elif store_type == StoreType.NEBIUS:
1100
+ store_cls = NebiusStore
1074
1101
  else:
1075
1102
  with ux_utils.print_exception_no_traceback():
1076
1103
  raise exceptions.StorageSpecError(
@@ -1104,7 +1131,7 @@ class Storage(object):
1104
1131
  f'name {self.name}. General initialization error.')
1105
1132
  raise
1106
1133
  except exceptions.StorageSpecError:
1107
- logger.error(f'Could not mount externally created {store_type}'
1134
+ logger.error(f'Could not mount externally created {store_type} '
1108
1135
  f'store with name {self.name!r}.')
1109
1136
  raise
1110
1137
 
@@ -1359,6 +1386,15 @@ class S3Store(AbstractStore):
1359
1386
  assert data_utils.verify_r2_bucket(self.name), (
1360
1387
  f'Source specified as {self.source}, a R2 bucket. ',
1361
1388
  'R2 Bucket should exist.')
1389
+ elif self.source.startswith('nebius://'):
1390
+ assert self.name == data_utils.split_nebius_path(
1391
+ self.source)[0], (
1392
+ 'Nebius Object Storage is specified as path, the name '
1393
+ 'should be the same as Nebius Object Storage bucket.')
1394
+ assert data_utils.verify_nebius_bucket(self.name), (
1395
+ f'Source specified as {self.source}, a Nebius Object '
1396
+ f'Storage bucket. Nebius Object Storage Bucket should'
1397
+ f' exist.')
1362
1398
  elif self.source.startswith('cos://'):
1363
1399
  assert self.name == data_utils.split_cos_path(self.source)[0], (
1364
1400
  'COS Bucket is specified as path, the name should be '
@@ -1482,6 +1518,8 @@ class S3Store(AbstractStore):
1482
1518
  self._transfer_to_s3()
1483
1519
  elif self.source.startswith('oci://'):
1484
1520
  self._transfer_to_s3()
1521
+ elif self.source.startswith('nebius://'):
1522
+ self._transfer_to_s3()
1485
1523
  else:
1486
1524
  self.batch_aws_rsync([self.source])
1487
1525
  except exceptions.StorageUploadError:
@@ -1846,6 +1884,15 @@ class GcsStore(AbstractStore):
1846
1884
  assert data_utils.verify_r2_bucket(self.name), (
1847
1885
  f'Source specified as {self.source}, a R2 bucket. ',
1848
1886
  'R2 Bucket should exist.')
1887
+ elif self.source.startswith('nebius://'):
1888
+ assert self.name == data_utils.split_nebius_path(
1889
+ self.source)[0], (
1890
+ 'Nebius Object Storage is specified as path, the name '
1891
+ 'should be the same as R2 bucket.')
1892
+ assert data_utils.verify_nebius_bucket(self.name), (
1893
+ f'Source specified as {self.source}, a Nebius Object '
1894
+ f'Storage bucket. Nebius Object Storage Bucket should '
1895
+ f'exist.')
1849
1896
  elif self.source.startswith('cos://'):
1850
1897
  assert self.name == data_utils.split_cos_path(self.source)[0], (
1851
1898
  'COS Bucket is specified as path, the name should be '
@@ -2437,6 +2484,15 @@ class AzureBlobStore(AbstractStore):
2437
2484
  assert data_utils.verify_r2_bucket(self.name), (
2438
2485
  f'Source specified as {self.source}, a R2 bucket. ',
2439
2486
  'R2 Bucket should exist.')
2487
+ elif self.source.startswith('nebius://'):
2488
+ assert self.name == data_utils.split_nebius_path(
2489
+ self.source)[0], (
2490
+ 'Nebius Object Storage is specified as path, the name '
2491
+ 'should be the same as Nebius Object Storage bucket.')
2492
+ assert data_utils.verify_nebius_bucket(self.name), (
2493
+ f'Source specified as {self.source}, a Nebius Object '
2494
+ f'Storage bucket. Nebius Object Storage Bucket should '
2495
+ f'exist.')
2440
2496
  elif self.source.startswith('cos://'):
2441
2497
  assert self.name == data_utils.split_cos_path(self.source)[0], (
2442
2498
  'COS Bucket is specified as path, the name should be '
@@ -2812,6 +2868,8 @@ class AzureBlobStore(AbstractStore):
2812
2868
  raise NotImplementedError(error_message.format('IBM COS'))
2813
2869
  elif self.source.startswith('oci://'):
2814
2870
  raise NotImplementedError(error_message.format('OCI'))
2871
+ elif self.source.startswith('nebius://'):
2872
+ raise NotImplementedError(error_message.format('NEBIUS'))
2815
2873
  else:
2816
2874
  self.batch_az_blob_sync([self.source])
2817
2875
  except exceptions.StorageUploadError:
@@ -3200,6 +3258,15 @@ class R2Store(AbstractStore):
3200
3258
  assert self.name == data_utils.split_r2_path(self.source)[0], (
3201
3259
  'R2 Bucket is specified as path, the name should be '
3202
3260
  'the same as R2 bucket.')
3261
+ elif self.source.startswith('nebius://'):
3262
+ assert self.name == data_utils.split_nebius_path(
3263
+ self.source)[0], (
3264
+ 'Nebius Object Storage is specified as path, the name '
3265
+ 'should be the same as Nebius Object Storage bucket.')
3266
+ assert data_utils.verify_nebius_bucket(self.name), (
3267
+ f'Source specified as {self.source}, a Nebius Object '
3268
+ f'Storage bucket. Nebius Object Storage Bucket should '
3269
+ f'exist.')
3203
3270
  elif self.source.startswith('cos://'):
3204
3271
  assert self.name == data_utils.split_cos_path(self.source)[0], (
3205
3272
  'IBM COS Bucket is specified as path, the name should be '
@@ -3264,6 +3331,8 @@ class R2Store(AbstractStore):
3264
3331
  pass
3265
3332
  elif self.source.startswith('oci://'):
3266
3333
  self._transfer_to_r2()
3334
+ elif self.source.startswith('nebius://'):
3335
+ self._transfer_to_r2()
3267
3336
  else:
3268
3337
  self.batch_aws_rsync([self.source])
3269
3338
  except exceptions.StorageUploadError:
@@ -3390,6 +3459,8 @@ class R2Store(AbstractStore):
3390
3459
  data_transfer.gcs_to_r2(self.name, self.name)
3391
3460
  elif self.source.startswith('s3://'):
3392
3461
  data_transfer.s3_to_r2(self.name, self.name)
3462
+ elif self.source.startswith('nebius://'):
3463
+ data_transfer.s3_to_r2(self.name, self.name)
3393
3464
 
3394
3465
  def _get_bucket(self) -> Tuple[StorageHandle, bool]:
3395
3466
  """Obtains the R2 bucket.
@@ -3649,6 +3720,15 @@ class IBMCosStore(AbstractStore):
3649
3720
  assert data_utils.verify_r2_bucket(self.name), (
3650
3721
  f'Source specified as {self.source}, a R2 bucket. ',
3651
3722
  'R2 Bucket should exist.')
3723
+ elif self.source.startswith('nebius://'):
3724
+ assert self.name == data_utils.split_nebius_path(
3725
+ self.source)[0], (
3726
+ 'Nebius Object Storage is specified as path, the name '
3727
+ 'should be the same as Nebius Object Storage bucket.')
3728
+ assert data_utils.verify_nebius_bucket(self.name), (
3729
+ f'Source specified as {self.source}, a Nebius Object '
3730
+ f'Storage bucket. Nebius Object Storage Bucket should '
3731
+ f'exist.')
3652
3732
  elif self.source.startswith('cos://'):
3653
3733
  assert self.name == data_utils.split_cos_path(self.source)[0], (
3654
3734
  'COS Bucket is specified as path, the name should be '
@@ -3742,6 +3822,9 @@ class IBMCosStore(AbstractStore):
3742
3822
  elif self.source.startswith('s3://'):
3743
3823
  raise Exception('IBM COS currently not supporting'
3744
3824
  'data transfers between COS and S3')
3825
+ elif self.source.startswith('nebius://'):
3826
+ raise Exception('IBM COS currently not supporting'
3827
+ 'data transfers between COS and Nebius')
3745
3828
  elif self.source.startswith('gs://'):
3746
3829
  raise Exception('IBM COS currently not supporting'
3747
3830
  'data transfers between COS and GS')
@@ -4508,3 +4591,450 @@ class OciStore(AbstractStore):
4508
4591
  raise exceptions.StorageBucketDeleteError(
4509
4592
  f'Failed to delete OCI bucket {bucket_name}.')
4510
4593
  return True
4594
+
4595
+
4596
+ class NebiusStore(AbstractStore):
4597
+ """NebiusStore inherits from Storage Object and represents the backend
4598
+ for S3 buckets.
4599
+ """
4600
+
4601
+ _ACCESS_DENIED_MESSAGE = 'Access Denied'
4602
+ _TIMEOUT_TO_PROPAGATES = 20
4603
+
4604
+ def __init__(self,
4605
+ name: str,
4606
+ source: str,
4607
+ region: Optional[str] = None,
4608
+ is_sky_managed: Optional[bool] = None,
4609
+ sync_on_reconstruction: bool = True,
4610
+ _bucket_sub_path: Optional[str] = None):
4611
+ self.client: 'boto3.client.Client'
4612
+ self.bucket: 'StorageHandle'
4613
+ self.region = region if region is not None else nebius.DEFAULT_REGION
4614
+ super().__init__(name, source, region, is_sky_managed,
4615
+ sync_on_reconstruction, _bucket_sub_path)
4616
+
4617
+ def _validate(self):
4618
+ if self.source is not None and isinstance(self.source, str):
4619
+ if self.source.startswith('s3://'):
4620
+ assert self.name == data_utils.split_s3_path(self.source)[0], (
4621
+ 'S3 Bucket is specified as path, the name should be the'
4622
+ ' same as S3 bucket.')
4623
+ elif self.source.startswith('gs://'):
4624
+ assert self.name == data_utils.split_gcs_path(self.source)[0], (
4625
+ 'GCS Bucket is specified as path, the name should be '
4626
+ 'the same as GCS bucket.')
4627
+ assert data_utils.verify_gcs_bucket(self.name), (
4628
+ f'Source specified as {self.source}, a GCS bucket. ',
4629
+ 'GCS Bucket should exist.')
4630
+ elif data_utils.is_az_container_endpoint(self.source):
4631
+ storage_account_name, container_name, _ = (
4632
+ data_utils.split_az_path(self.source))
4633
+ assert self.name == container_name, (
4634
+ 'Azure bucket is specified as path, the name should be '
4635
+ 'the same as Azure bucket.')
4636
+ assert data_utils.verify_az_bucket(
4637
+ storage_account_name, self.name), (
4638
+ f'Source specified as {self.source}, an Azure bucket. '
4639
+ 'Azure bucket should exist.')
4640
+ elif self.source.startswith('r2://'):
4641
+ assert self.name == data_utils.split_r2_path(self.source)[0], (
4642
+ 'R2 Bucket is specified as path, the name should be '
4643
+ 'the same as R2 bucket.')
4644
+ assert data_utils.verify_r2_bucket(self.name), (
4645
+ f'Source specified as {self.source}, a R2 bucket. ',
4646
+ 'R2 Bucket should exist.')
4647
+ elif self.source.startswith('nebius://'):
4648
+ assert self.name == data_utils.split_nebius_path(
4649
+ self.source)[0], (
4650
+ 'Nebius Object Storage is specified as path, the name '
4651
+ 'should be the same as Nebius Object Storage bucket.')
4652
+ elif self.source.startswith('cos://'):
4653
+ assert self.name == data_utils.split_cos_path(self.source)[0], (
4654
+ 'COS Bucket is specified as path, the name should be '
4655
+ 'the same as COS bucket.')
4656
+ assert data_utils.verify_ibm_cos_bucket(self.name), (
4657
+ f'Source specified as {self.source}, a COS bucket. ',
4658
+ 'COS Bucket should exist.')
4659
+ elif self.source.startswith('oci://'):
4660
+ raise NotImplementedError(
4661
+ 'Moving data from OCI to S3 is currently not supported.')
4662
+ # Validate name
4663
+ self.name = S3Store.validate_name(self.name)
4664
+
4665
+ # Check if the storage is enabled
4666
+ if not _is_storage_cloud_enabled(str(clouds.Nebius())):
4667
+ with ux_utils.print_exception_no_traceback():
4668
+ raise exceptions.ResourcesUnavailableError((
4669
+ 'Storage \'store: nebius\' specified, but '
4670
+ 'Nebius access is disabled. To fix, enable '
4671
+ 'Nebius by running `sky check`. More info: '
4672
+ 'https://docs.skypilot.co/en/latest/getting-started/installation.html.' # pylint: disable=line-too-long
4673
+ ))
4674
+
4675
+ def initialize(self):
4676
+ """Initializes the Nebius Object Storage on the cloud.
4677
+
4678
+ Initialization involves fetching bucket if exists, or creating it if
4679
+ it does not.
4680
+
4681
+ Raises:
4682
+ StorageBucketCreateError: If bucket creation fails
4683
+ StorageBucketGetError: If fetching existing bucket fails
4684
+ StorageInitError: If general initialization fails.
4685
+ """
4686
+ self.client = data_utils.create_nebius_client(self.region)
4687
+ self.bucket, is_new_bucket = self._get_bucket()
4688
+ if self.is_sky_managed is None:
4689
+ # If is_sky_managed is not specified, then this is a new storage
4690
+ # object (i.e., did not exist in global_user_state) and we should
4691
+ # set the is_sky_managed property.
4692
+ # If is_sky_managed is specified, then we take no action.
4693
+ self.is_sky_managed = is_new_bucket
4694
+
4695
+ def upload(self):
4696
+ """Uploads source to store bucket.
4697
+
4698
+ Upload must be called by the Storage handler - it is not called on
4699
+ Store initialization.
4700
+
4701
+ Raises:
4702
+ StorageUploadError: if upload fails.
4703
+ """
4704
+ try:
4705
+ if isinstance(self.source, list):
4706
+ self.batch_aws_rsync(self.source, create_dirs=True)
4707
+ elif self.source is not None:
4708
+ if self.source.startswith('nebius://'):
4709
+ pass
4710
+ elif self.source.startswith('s3://'):
4711
+ self._transfer_to_nebius()
4712
+ elif self.source.startswith('gs://'):
4713
+ self._transfer_to_nebius()
4714
+ elif self.source.startswith('r2://'):
4715
+ self._transfer_to_nebius()
4716
+ elif self.source.startswith('oci://'):
4717
+ self._transfer_to_nebius()
4718
+ else:
4719
+ self.batch_aws_rsync([self.source])
4720
+ except exceptions.StorageUploadError:
4721
+ raise
4722
+ except Exception as e:
4723
+ raise exceptions.StorageUploadError(
4724
+ f'Upload failed for store {self.name}') from e
4725
+
4726
+ def delete(self) -> None:
4727
+ if self._bucket_sub_path is not None and not self.is_sky_managed:
4728
+ return self._delete_sub_path()
4729
+
4730
+ deleted_by_skypilot = self._delete_nebius_bucket(self.name)
4731
+ if deleted_by_skypilot:
4732
+ msg_str = f'Deleted Nebius bucket {self.name}.'
4733
+ else:
4734
+ msg_str = (f'Nebius bucket {self.name} may have been deleted '
4735
+ f'externally. Removing from local state.')
4736
+ logger.info(f'{colorama.Fore.GREEN}{msg_str}'
4737
+ f'{colorama.Style.RESET_ALL}')
4738
+
4739
+ def _delete_sub_path(self) -> None:
4740
+ assert self._bucket_sub_path is not None, 'bucket_sub_path is not set'
4741
+ deleted_by_skypilot = self._delete_nebius_bucket_sub_path(
4742
+ self.name, self._bucket_sub_path)
4743
+ if deleted_by_skypilot:
4744
+ msg_str = (f'Removed objects from S3 bucket '
4745
+ f'{self.name}/{self._bucket_sub_path}.')
4746
+ else:
4747
+ msg_str = (f'Failed to remove objects from S3 bucket '
4748
+ f'{self.name}/{self._bucket_sub_path}.')
4749
+ logger.info(f'{colorama.Fore.GREEN}{msg_str}'
4750
+ f'{colorama.Style.RESET_ALL}')
4751
+
4752
+ def get_handle(self) -> StorageHandle:
4753
+ return nebius.resource('s3').Bucket(self.name)
4754
+
4755
+ def batch_aws_rsync(self,
4756
+ source_path_list: List[Path],
4757
+ create_dirs: bool = False) -> None:
4758
+ """Invokes aws s3 sync to batch upload a list of local paths to S3
4759
+
4760
+ AWS Sync by default uses 10 threads to upload files to the bucket. To
4761
+ increase parallelism, modify max_concurrent_requests in your aws config
4762
+ file (Default path: ~/.aws/config).
4763
+
4764
+ Since aws s3 sync does not support batch operations, we construct
4765
+ multiple commands to be run in parallel.
4766
+
4767
+ Args:
4768
+ source_path_list: List of paths to local files or directories
4769
+ create_dirs: If the local_path is a directory and this is set to
4770
+ False, the contents of the directory are directly uploaded to
4771
+ root of the bucket. If the local_path is a directory and this is
4772
+ set to True, the directory is created in the bucket root and
4773
+ contents are uploaded to it.
4774
+ """
4775
+ sub_path = (f'/{self._bucket_sub_path}'
4776
+ if self._bucket_sub_path else '')
4777
+
4778
+ def get_file_sync_command(base_dir_path, file_names):
4779
+ includes = ' '.join([
4780
+ f'--include {shlex.quote(file_name)}'
4781
+ for file_name in file_names
4782
+ ])
4783
+ endpoint_url = nebius.create_endpoint(self.region)
4784
+ base_dir_path = shlex.quote(base_dir_path)
4785
+ sync_command = ('aws s3 sync --no-follow-symlinks --exclude="*" '
4786
+ f'{includes} {base_dir_path} '
4787
+ f's3://{self.name}{sub_path} '
4788
+ f'--endpoint={endpoint_url} '
4789
+ f'--profile={nebius.NEBIUS_PROFILE_NAME}')
4790
+ return sync_command
4791
+
4792
+ def get_dir_sync_command(src_dir_path, dest_dir_name):
4793
+ # we exclude .git directory from the sync
4794
+ excluded_list = storage_utils.get_excluded_files(src_dir_path)
4795
+ excluded_list.append('.git/*')
4796
+ excludes = ' '.join([
4797
+ f'--exclude {shlex.quote(file_name)}'
4798
+ for file_name in excluded_list
4799
+ ])
4800
+ endpoint_url = nebius.create_endpoint(self.region)
4801
+ src_dir_path = shlex.quote(src_dir_path)
4802
+ sync_command = (f'aws s3 sync --no-follow-symlinks {excludes} '
4803
+ f'{src_dir_path} '
4804
+ f's3://{self.name}{sub_path}/{dest_dir_name} '
4805
+ f'--endpoint={endpoint_url} '
4806
+ f'--profile={nebius.NEBIUS_PROFILE_NAME}')
4807
+ return sync_command
4808
+
4809
+ # Generate message for upload
4810
+ if len(source_path_list) > 1:
4811
+ source_message = f'{len(source_path_list)} paths'
4812
+ else:
4813
+ source_message = source_path_list[0]
4814
+
4815
+ log_path = sky_logging.generate_tmp_logging_file_path(
4816
+ _STORAGE_LOG_FILE_NAME)
4817
+ sync_path = f'{source_message} -> nebius://{self.name}{sub_path}/'
4818
+ with rich_utils.safe_status(
4819
+ ux_utils.spinner_message(f'Syncing {sync_path}',
4820
+ log_path=log_path)):
4821
+ data_utils.parallel_upload(
4822
+ source_path_list,
4823
+ get_file_sync_command,
4824
+ get_dir_sync_command,
4825
+ log_path,
4826
+ self.name,
4827
+ self._ACCESS_DENIED_MESSAGE,
4828
+ create_dirs=create_dirs,
4829
+ max_concurrent_uploads=_MAX_CONCURRENT_UPLOADS)
4830
+ logger.info(
4831
+ ux_utils.finishing_message(f'Storage synced: {sync_path}',
4832
+ log_path))
4833
+
4834
+ def _transfer_to_nebius(self) -> None:
4835
+ assert isinstance(self.source, str), self.source
4836
+ if self.source.startswith('gs://'):
4837
+ data_transfer.gcs_to_nebius(self.name, self.name)
4838
+ elif self.source.startswith('r2://'):
4839
+ data_transfer.r2_to_nebius(self.name, self.name)
4840
+ elif self.source.startswith('s3://'):
4841
+ data_transfer.s3_to_nebius(self.name, self.name)
4842
+
4843
+ def _get_bucket(self) -> Tuple[StorageHandle, bool]:
4844
+ """Obtains the S3 bucket.
4845
+
4846
+ If the bucket exists, this method will return the bucket.
4847
+ If the bucket does not exist, there are three cases:
4848
+ 1) Raise an error if the bucket source starts with s3://
4849
+ 2) Return None if bucket has been externally deleted and
4850
+ sync_on_reconstruction is False
4851
+ 3) Create and return a new bucket otherwise
4852
+
4853
+ Raises:
4854
+ StorageSpecError: If externally created bucket is attempted to be
4855
+ mounted without specifying storage source.
4856
+ StorageBucketCreateError: If creating the bucket fails
4857
+ StorageBucketGetError: If fetching a bucket fails
4858
+ StorageExternalDeletionError: If externally deleted storage is
4859
+ attempted to be fetched while reconstructing the storage for
4860
+ 'sky storage delete' or 'sky start'
4861
+ """
4862
+ nebius_s = nebius.resource('s3')
4863
+ bucket = nebius_s.Bucket(self.name)
4864
+ endpoint_url = nebius.create_endpoint(self.region)
4865
+ try:
4866
+ # Try Public bucket case.
4867
+ # This line does not error out if the bucket is an external public
4868
+ # bucket or if it is a user's bucket that is publicly
4869
+ # accessible.
4870
+ self.client.head_bucket(Bucket=self.name)
4871
+ self._validate_existing_bucket()
4872
+ return bucket, False
4873
+ except aws.botocore_exceptions().ClientError as e:
4874
+ error_code = e.response['Error']['Code']
4875
+ # AccessDenied error for buckets that are private and not owned by
4876
+ # user.
4877
+ if error_code == '403':
4878
+ command = (f'aws s3 ls s3://{self.name} '
4879
+ f'--endpoint={endpoint_url} '
4880
+ f'--profile={nebius.NEBIUS_PROFILE_NAME}')
4881
+ with ux_utils.print_exception_no_traceback():
4882
+ raise exceptions.StorageBucketGetError(
4883
+ _BUCKET_FAIL_TO_CONNECT_MESSAGE.format(name=self.name) +
4884
+ f' To debug, consider running `{command}`.') from e
4885
+
4886
+ if isinstance(self.source, str) and self.source.startswith('nebius://'):
4887
+ with ux_utils.print_exception_no_traceback():
4888
+ raise exceptions.StorageBucketGetError(
4889
+ 'Attempted to use a non-existent bucket as a source: '
4890
+ f'{self.source}. Consider using `aws s3 ls '
4891
+ f'{self.source} --endpoint={endpoint_url}`'
4892
+ f'--profile={nebius.NEBIUS_PROFILE_NAME} to debug.')
4893
+
4894
+ # If bucket cannot be found in both private and public settings,
4895
+ # the bucket is to be created by Sky. However, creation is skipped if
4896
+ # Store object is being reconstructed for deletion or re-mount with
4897
+ # sky start, and error is raised instead.
4898
+ if self.sync_on_reconstruction:
4899
+ bucket = self._create_nebius_bucket(self.name, self.region)
4900
+ return bucket, True
4901
+ else:
4902
+ # Raised when Storage object is reconstructed for sky storage
4903
+ # delete or to re-mount Storages with sky start but the storage
4904
+ # is already removed externally.
4905
+ raise exceptions.StorageExternalDeletionError(
4906
+ 'Attempted to fetch a non-existent bucket: '
4907
+ f'{self.name}')
4908
+
4909
+ def _download_file(self, remote_path: str, local_path: str) -> None:
4910
+ """Downloads file from remote to local on s3 bucket
4911
+ using the boto3 API
4912
+
4913
+ Args:
4914
+ remote_path: str; Remote path on S3 bucket
4915
+ local_path: str; Local path on user's device
4916
+ """
4917
+ self.bucket.download_file(remote_path, local_path)
4918
+
4919
+ def mount_command(self, mount_path: str) -> str:
4920
+ """Returns the command to mount the bucket to the mount_path.
4921
+
4922
+ Uses goofys to mount the bucket.
4923
+
4924
+ Args:
4925
+ mount_path: str; Path to mount the bucket to.
4926
+ """
4927
+ install_cmd = mounting_utils.get_s3_mount_install_cmd()
4928
+ endpoint_url = nebius.create_endpoint(self.region)
4929
+ nebius_profile_name = nebius.NEBIUS_PROFILE_NAME
4930
+ mount_cmd = mounting_utils.get_nebius_mount_cmd(nebius_profile_name,
4931
+ endpoint_url,
4932
+ self.bucket.name,
4933
+ mount_path,
4934
+ self._bucket_sub_path)
4935
+ return mounting_utils.get_mounting_command(mount_path, install_cmd,
4936
+ mount_cmd)
4937
+
4938
+ def _create_nebius_bucket(self,
4939
+ bucket_name: str,
4940
+ region='auto') -> StorageHandle:
4941
+ """Creates S3 bucket with specific name in specific region
4942
+
4943
+ Args:
4944
+ bucket_name: str; Name of bucket
4945
+ region: str; Region name, e.g. us-west-1, us-east-2
4946
+ Raises:
4947
+ StorageBucketCreateError: If bucket creation fails.
4948
+ """
4949
+ nebius_client = self.client
4950
+ try:
4951
+ if region is None:
4952
+ nebius_client.create_bucket(Bucket=bucket_name)
4953
+ else:
4954
+ location = {'LocationConstraint': region}
4955
+ nebius_client.create_bucket(Bucket=bucket_name,
4956
+ CreateBucketConfiguration=location)
4957
+ logger.info(f' {colorama.Style.DIM}Created Nebius bucket '
4958
+ f'{bucket_name!r} in {region}'
4959
+ f'{colorama.Style.RESET_ALL}')
4960
+ except aws.botocore_exceptions().ClientError as e:
4961
+ with ux_utils.print_exception_no_traceback():
4962
+ raise exceptions.StorageBucketCreateError(
4963
+ f'Attempted to create a bucket '
4964
+ f'{self.name} but failed.') from e
4965
+ return nebius.resource('s3').Bucket(bucket_name)
4966
+
4967
+ def _execute_nebius_remove_command(self, command: str, bucket_name: str,
4968
+ hint_operating: str,
4969
+ hint_failed: str) -> bool:
4970
+ try:
4971
+ with rich_utils.safe_status(
4972
+ ux_utils.spinner_message(hint_operating)):
4973
+ subprocess.check_output(command.split(' '),
4974
+ stderr=subprocess.STDOUT)
4975
+ except subprocess.CalledProcessError as e:
4976
+ if 'NoSuchBucket' in e.output.decode('utf-8'):
4977
+ logger.debug(
4978
+ _BUCKET_EXTERNALLY_DELETED_DEBUG_MESSAGE.format(
4979
+ bucket_name=bucket_name))
4980
+ return False
4981
+ else:
4982
+ with ux_utils.print_exception_no_traceback():
4983
+ raise exceptions.StorageBucketDeleteError(
4984
+ f'{hint_failed}'
4985
+ f'Detailed error: {e.output}')
4986
+ return True
4987
+
4988
+ def _delete_nebius_bucket(self, bucket_name: str) -> bool:
4989
+ """Deletes S3 bucket, including all objects in bucket
4990
+
4991
+ Args:
4992
+ bucket_name: str; Name of bucket
4993
+
4994
+ Returns:
4995
+ bool; True if bucket was deleted, False if it was deleted externally.
4996
+
4997
+ Raises:
4998
+ StorageBucketDeleteError: If deleting the bucket fails.
4999
+ """
5000
+ # Deleting objects is very slow programatically
5001
+ # (i.e. bucket.objects.all().delete() is slow).
5002
+ # In addition, standard delete operations (i.e. via `aws s3 rm`)
5003
+ # are slow, since AWS puts deletion markers.
5004
+ # https://stackoverflow.com/questions/49239351/why-is-it-so-much-slower-to-delete-objects-in-aws-s3-than-it-is-to-create-them
5005
+ # The fastest way to delete is to run `aws s3 rb --force`,
5006
+ # which removes the bucket by force.
5007
+ endpoint_url = nebius.create_endpoint(self.region)
5008
+ remove_command = (f'aws s3 rb s3://{bucket_name} --force '
5009
+ f'--endpoint {endpoint_url} '
5010
+ f'--profile={nebius.NEBIUS_PROFILE_NAME}')
5011
+
5012
+ success = self._execute_nebius_remove_command(
5013
+ remove_command, bucket_name,
5014
+ f'Deleting Nebius bucket {bucket_name}',
5015
+ f'Failed to delete Nebius bucket {bucket_name}.')
5016
+ if not success:
5017
+ return False
5018
+
5019
+ # Wait until bucket deletion propagates on Nebius servers
5020
+ start_time = time.time()
5021
+ while data_utils.verify_nebius_bucket(bucket_name):
5022
+ if time.time() - start_time > self._TIMEOUT_TO_PROPAGATES:
5023
+ raise TimeoutError(
5024
+ f'Timeout while verifying {bucket_name} Nebius bucket.')
5025
+ time.sleep(0.1)
5026
+ return True
5027
+
5028
+ def _delete_nebius_bucket_sub_path(self, bucket_name: str,
5029
+ sub_path: str) -> bool:
5030
+ """Deletes the sub path from the bucket."""
5031
+ endpoint_url = nebius.create_endpoint(self.region)
5032
+ remove_command = (
5033
+ f'aws s3 rm s3://{bucket_name}/{sub_path}/ --recursive '
5034
+ f'--endpoint {endpoint_url} '
5035
+ f'--profile={nebius.NEBIUS_PROFILE_NAME}')
5036
+ return self._execute_nebius_remove_command(
5037
+ remove_command, bucket_name, f'Removing objects from '
5038
+ f'Nebius bucket {bucket_name}/{sub_path}',
5039
+ f'Failed to remove objects from '
5040
+ f'Nebius bucket {bucket_name}/{sub_path}.')