skypilot-nightly 1.0.0.dev20250319__py3-none-any.whl → 1.0.0.dev20250321__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. sky/__init__.py +2 -2
  2. sky/adaptors/cloudflare.py +19 -3
  3. sky/adaptors/kubernetes.py +2 -1
  4. sky/adaptors/nebius.py +128 -6
  5. sky/backends/cloud_vm_ray_backend.py +3 -1
  6. sky/benchmark/benchmark_utils.py +3 -2
  7. sky/check.py +89 -55
  8. sky/cloud_stores.py +66 -0
  9. sky/clouds/aws.py +14 -2
  10. sky/clouds/azure.py +13 -1
  11. sky/clouds/cloud.py +37 -2
  12. sky/clouds/cudo.py +3 -2
  13. sky/clouds/do.py +3 -2
  14. sky/clouds/fluidstack.py +3 -2
  15. sky/clouds/gcp.py +55 -34
  16. sky/clouds/ibm.py +15 -1
  17. sky/clouds/kubernetes.py +3 -1
  18. sky/clouds/lambda_cloud.py +3 -1
  19. sky/clouds/nebius.py +7 -3
  20. sky/clouds/oci.py +15 -1
  21. sky/clouds/paperspace.py +3 -2
  22. sky/clouds/runpod.py +7 -1
  23. sky/clouds/scp.py +3 -1
  24. sky/clouds/service_catalog/kubernetes_catalog.py +3 -1
  25. sky/clouds/utils/gcp_utils.py +11 -1
  26. sky/clouds/vast.py +3 -2
  27. sky/clouds/vsphere.py +3 -2
  28. sky/core.py +6 -2
  29. sky/data/data_transfer.py +75 -0
  30. sky/data/data_utils.py +34 -0
  31. sky/data/mounting_utils.py +18 -0
  32. sky/data/storage.py +542 -16
  33. sky/data/storage_utils.py +102 -84
  34. sky/exceptions.py +2 -0
  35. sky/global_user_state.py +15 -6
  36. sky/jobs/server/core.py +1 -1
  37. sky/jobs/utils.py +5 -0
  38. sky/optimizer.py +8 -2
  39. sky/provision/gcp/config.py +3 -3
  40. sky/provision/gcp/constants.py +16 -2
  41. sky/provision/gcp/instance.py +4 -1
  42. sky/provision/kubernetes/utils.py +26 -21
  43. sky/resources.py +6 -1
  44. sky/serve/replica_managers.py +10 -1
  45. sky/setup_files/dependencies.py +3 -1
  46. sky/task.py +16 -5
  47. sky/utils/command_runner.py +2 -0
  48. sky/utils/controller_utils.py +13 -4
  49. sky/utils/kubernetes/kubernetes_deploy_utils.py +4 -1
  50. {skypilot_nightly-1.0.0.dev20250319.dist-info → skypilot_nightly-1.0.0.dev20250321.dist-info}/METADATA +13 -2
  51. {skypilot_nightly-1.0.0.dev20250319.dist-info → skypilot_nightly-1.0.0.dev20250321.dist-info}/RECORD +55 -55
  52. {skypilot_nightly-1.0.0.dev20250319.dist-info → skypilot_nightly-1.0.0.dev20250321.dist-info}/WHEEL +1 -1
  53. {skypilot_nightly-1.0.0.dev20250319.dist-info → skypilot_nightly-1.0.0.dev20250321.dist-info}/entry_points.txt +0 -0
  54. {skypilot_nightly-1.0.0.dev20250319.dist-info → skypilot_nightly-1.0.0.dev20250321.dist-info/licenses}/LICENSE +0 -0
  55. {skypilot_nightly-1.0.0.dev20250319.dist-info → skypilot_nightly-1.0.0.dev20250321.dist-info}/top_level.txt +0 -0
sky/data/storage.py CHANGED
@@ -23,7 +23,9 @@ from sky.adaptors import azure
23
23
  from sky.adaptors import cloudflare
24
24
  from sky.adaptors import gcp
25
25
  from sky.adaptors import ibm
26
+ from sky.adaptors import nebius
26
27
  from sky.adaptors import oci
28
+ from sky.clouds import cloud as sky_cloud
27
29
  from sky.data import data_transfer
28
30
  from sky.data import data_utils
29
31
  from sky.data import mounting_utils
@@ -57,6 +59,7 @@ STORE_ENABLED_CLOUDS: List[str] = [
57
59
  str(clouds.Azure()),
58
60
  str(clouds.IBM()),
59
61
  str(clouds.OCI()),
62
+ str(clouds.Nebius()),
60
63
  cloudflare.NAME,
61
64
  ]
62
65
 
@@ -78,34 +81,34 @@ _BUCKET_EXTERNALLY_DELETED_DEBUG_MESSAGE = (
78
81
  _STORAGE_LOG_FILE_NAME = 'storage_sync.log'
79
82
 
80
83
 
81
- def get_cached_enabled_storage_clouds_or_refresh(
84
+ def get_cached_enabled_storage_cloud_names_or_refresh(
82
85
  raise_if_no_cloud_access: bool = False) -> List[str]:
83
86
  # This is a temporary solution until https://github.com/skypilot-org/skypilot/issues/1943 # pylint: disable=line-too-long
84
87
  # is resolved by implementing separate 'enabled_storage_clouds'
85
- enabled_clouds = sky_check.get_cached_enabled_clouds_or_refresh()
88
+ enabled_clouds = sky_check.get_cached_enabled_clouds_or_refresh(
89
+ sky_cloud.CloudCapability.STORAGE)
86
90
  enabled_clouds = [str(cloud) for cloud in enabled_clouds]
87
91
 
88
- enabled_storage_clouds = [
89
- cloud for cloud in enabled_clouds if cloud in STORE_ENABLED_CLOUDS
90
- ]
91
- r2_is_enabled, _ = cloudflare.check_credentials()
92
+ r2_is_enabled, _ = cloudflare.check_storage_credentials()
92
93
  if r2_is_enabled:
93
- enabled_storage_clouds.append(cloudflare.NAME)
94
- if raise_if_no_cloud_access and not enabled_storage_clouds:
94
+ enabled_clouds.append(cloudflare.NAME)
95
+ if raise_if_no_cloud_access and not enabled_clouds:
95
96
  raise exceptions.NoCloudAccessError(
96
97
  'No cloud access available for storage. '
97
98
  'Please check your cloud credentials.')
98
- return enabled_storage_clouds
99
+ return enabled_clouds
99
100
 
100
101
 
101
102
  def _is_storage_cloud_enabled(cloud_name: str,
102
103
  try_fix_with_sky_check: bool = True) -> bool:
103
- enabled_storage_clouds = get_cached_enabled_storage_clouds_or_refresh()
104
- if cloud_name in enabled_storage_clouds:
104
+ enabled_storage_cloud_names = (
105
+ get_cached_enabled_storage_cloud_names_or_refresh())
106
+ if cloud_name in enabled_storage_cloud_names:
105
107
  return True
106
108
  if try_fix_with_sky_check:
107
109
  # TODO(zhwu): Only check the specified cloud to speed up.
108
- sky_check.check(quiet=True)
110
+ sky_check.check(quiet=True,
111
+ capability=sky_cloud.CloudCapability.STORAGE)
109
112
  return _is_storage_cloud_enabled(cloud_name,
110
113
  try_fix_with_sky_check=False)
111
114
  return False
@@ -119,6 +122,7 @@ class StoreType(enum.Enum):
119
122
  R2 = 'R2'
120
123
  IBM = 'IBM'
121
124
  OCI = 'OCI'
125
+ NEBIUS = 'NEBIUS'
122
126
 
123
127
  @classmethod
124
128
  def from_cloud(cls, cloud: str) -> 'StoreType':
@@ -134,6 +138,8 @@ class StoreType(enum.Enum):
134
138
  return StoreType.AZURE
135
139
  elif cloud.lower() == str(clouds.OCI()).lower():
136
140
  return StoreType.OCI
141
+ elif cloud.lower() == str(clouds.Nebius()).lower():
142
+ return StoreType.NEBIUS
137
143
  elif cloud.lower() == str(clouds.Lambda()).lower():
138
144
  with ux_utils.print_exception_no_traceback():
139
145
  raise ValueError('Lambda Cloud does not provide cloud storage.')
@@ -173,6 +179,8 @@ class StoreType(enum.Enum):
173
179
  return StoreType.IBM
174
180
  elif isinstance(store, OciStore):
175
181
  return StoreType.OCI
182
+ elif isinstance(store, NebiusStore):
183
+ return StoreType.NEBIUS
176
184
  else:
177
185
  with ux_utils.print_exception_no_traceback():
178
186
  raise ValueError(f'Unknown store type: {store}')
@@ -191,6 +199,9 @@ class StoreType(enum.Enum):
191
199
  return 'cos://'
192
200
  elif self == StoreType.OCI:
193
201
  return 'oci://'
202
+ # Nebius storages use 's3://' as a prefix for various aws cli commands
203
+ elif self == StoreType.NEBIUS:
204
+ return 's3://'
194
205
  else:
195
206
  with ux_utils.print_exception_no_traceback():
196
207
  raise ValueError(f'Unknown store type: {self}')
@@ -728,6 +739,8 @@ class Storage(object):
728
739
  self.add_store(StoreType.IBM)
729
740
  elif self.source.startswith('oci://'):
730
741
  self.add_store(StoreType.OCI)
742
+ elif self.source.startswith('nebius://'):
743
+ self.add_store(StoreType.NEBIUS)
731
744
 
732
745
  def get_bucket_sub_path_prefix(self, blob_path: str) -> str:
733
746
  """Adds the bucket sub path prefix to the blob path."""
@@ -814,7 +827,9 @@ class Storage(object):
814
827
  'using a bucket by writing <destination_path>: '
815
828
  f'{source} in the file_mounts section of your YAML')
816
829
  is_local_source = True
817
- elif split_path.scheme in ['s3', 'gs', 'https', 'r2', 'cos', 'oci']:
830
+ elif split_path.scheme in [
831
+ 's3', 'gs', 'https', 'r2', 'cos', 'oci', 'nebius'
832
+ ]:
818
833
  is_local_source = False
819
834
  # Storage mounting does not support mounting specific files from
820
835
  # cloud store - ensure path points to only a directory
@@ -838,7 +853,7 @@ class Storage(object):
838
853
  with ux_utils.print_exception_no_traceback():
839
854
  raise exceptions.StorageSourceError(
840
855
  f'Supported paths: local, s3://, gs://, https://, '
841
- f'r2://, cos://, oci://. Got: {source}')
856
+ f'r2://, cos://, oci://, nebius://. Got: {source}')
842
857
  return source, is_local_source
843
858
 
844
859
  def _validate_storage_spec(self, name: Optional[str]) -> None:
@@ -853,7 +868,7 @@ class Storage(object):
853
868
  """
854
869
  prefix = name.split('://')[0]
855
870
  prefix = prefix.lower()
856
- if prefix in ['s3', 'gs', 'https', 'r2', 'cos', 'oci']:
871
+ if prefix in ['s3', 'gs', 'https', 'r2', 'cos', 'oci', 'nebius']:
857
872
  with ux_utils.print_exception_no_traceback():
858
873
  raise exceptions.StorageNameError(
859
874
  'Prefix detected: `name` cannot start with '
@@ -979,6 +994,12 @@ class Storage(object):
979
994
  source=self.source,
980
995
  sync_on_reconstruction=self.sync_on_reconstruction,
981
996
  _bucket_sub_path=self._bucket_sub_path)
997
+ elif s_type == StoreType.NEBIUS:
998
+ store = NebiusStore.from_metadata(
999
+ s_metadata,
1000
+ source=self.source,
1001
+ sync_on_reconstruction=self.sync_on_reconstruction,
1002
+ _bucket_sub_path=self._bucket_sub_path)
982
1003
  else:
983
1004
  with ux_utils.print_exception_no_traceback():
984
1005
  raise ValueError(f'Unknown store type: {s_type}')
@@ -1073,6 +1094,8 @@ class Storage(object):
1073
1094
  store_cls = IBMCosStore
1074
1095
  elif store_type == StoreType.OCI:
1075
1096
  store_cls = OciStore
1097
+ elif store_type == StoreType.NEBIUS:
1098
+ store_cls = NebiusStore
1076
1099
  else:
1077
1100
  with ux_utils.print_exception_no_traceback():
1078
1101
  raise exceptions.StorageSpecError(
@@ -1106,7 +1129,7 @@ class Storage(object):
1106
1129
  f'name {self.name}. General initialization error.')
1107
1130
  raise
1108
1131
  except exceptions.StorageSpecError:
1109
- logger.error(f'Could not mount externally created {store_type}'
1132
+ logger.error(f'Could not mount externally created {store_type} '
1110
1133
  f'store with name {self.name!r}.')
1111
1134
  raise
1112
1135
 
@@ -1361,6 +1384,15 @@ class S3Store(AbstractStore):
1361
1384
  assert data_utils.verify_r2_bucket(self.name), (
1362
1385
  f'Source specified as {self.source}, a R2 bucket. ',
1363
1386
  'R2 Bucket should exist.')
1387
+ elif self.source.startswith('nebius://'):
1388
+ assert self.name == data_utils.split_nebius_path(
1389
+ self.source)[0], (
1390
+ 'Nebius Object Storage is specified as path, the name '
1391
+ 'should be the same as Nebius Object Storage bucket.')
1392
+ assert data_utils.verify_nebius_bucket(self.name), (
1393
+ f'Source specified as {self.source}, a Nebius Object '
1394
+ f'Storage bucket. Nebius Object Storage Bucket should'
1395
+ f' exist.')
1364
1396
  elif self.source.startswith('cos://'):
1365
1397
  assert self.name == data_utils.split_cos_path(self.source)[0], (
1366
1398
  'COS Bucket is specified as path, the name should be '
@@ -1484,6 +1516,8 @@ class S3Store(AbstractStore):
1484
1516
  self._transfer_to_s3()
1485
1517
  elif self.source.startswith('oci://'):
1486
1518
  self._transfer_to_s3()
1519
+ elif self.source.startswith('nebius://'):
1520
+ self._transfer_to_s3()
1487
1521
  else:
1488
1522
  self.batch_aws_rsync([self.source])
1489
1523
  except exceptions.StorageUploadError:
@@ -1848,6 +1882,15 @@ class GcsStore(AbstractStore):
1848
1882
  assert data_utils.verify_r2_bucket(self.name), (
1849
1883
  f'Source specified as {self.source}, a R2 bucket. ',
1850
1884
  'R2 Bucket should exist.')
1885
+ elif self.source.startswith('nebius://'):
1886
+ assert self.name == data_utils.split_nebius_path(
1887
+ self.source)[0], (
1888
+ 'Nebius Object Storage is specified as path, the name '
1889
+ 'should be the same as R2 bucket.')
1890
+ assert data_utils.verify_nebius_bucket(self.name), (
1891
+ f'Source specified as {self.source}, a Nebius Object '
1892
+ f'Storage bucket. Nebius Object Storage Bucket should '
1893
+ f'exist.')
1851
1894
  elif self.source.startswith('cos://'):
1852
1895
  assert self.name == data_utils.split_cos_path(self.source)[0], (
1853
1896
  'COS Bucket is specified as path, the name should be '
@@ -2439,6 +2482,15 @@ class AzureBlobStore(AbstractStore):
2439
2482
  assert data_utils.verify_r2_bucket(self.name), (
2440
2483
  f'Source specified as {self.source}, a R2 bucket. ',
2441
2484
  'R2 Bucket should exist.')
2485
+ elif self.source.startswith('nebius://'):
2486
+ assert self.name == data_utils.split_nebius_path(
2487
+ self.source)[0], (
2488
+ 'Nebius Object Storage is specified as path, the name '
2489
+ 'should be the same as Nebius Object Storage bucket.')
2490
+ assert data_utils.verify_nebius_bucket(self.name), (
2491
+ f'Source specified as {self.source}, a Nebius Object '
2492
+ f'Storage bucket. Nebius Object Storage Bucket should '
2493
+ f'exist.')
2442
2494
  elif self.source.startswith('cos://'):
2443
2495
  assert self.name == data_utils.split_cos_path(self.source)[0], (
2444
2496
  'COS Bucket is specified as path, the name should be '
@@ -2814,6 +2866,8 @@ class AzureBlobStore(AbstractStore):
2814
2866
  raise NotImplementedError(error_message.format('IBM COS'))
2815
2867
  elif self.source.startswith('oci://'):
2816
2868
  raise NotImplementedError(error_message.format('OCI'))
2869
+ elif self.source.startswith('nebius://'):
2870
+ raise NotImplementedError(error_message.format('NEBIUS'))
2817
2871
  else:
2818
2872
  self.batch_az_blob_sync([self.source])
2819
2873
  except exceptions.StorageUploadError:
@@ -3202,6 +3256,15 @@ class R2Store(AbstractStore):
3202
3256
  assert self.name == data_utils.split_r2_path(self.source)[0], (
3203
3257
  'R2 Bucket is specified as path, the name should be '
3204
3258
  'the same as R2 bucket.')
3259
+ elif self.source.startswith('nebius://'):
3260
+ assert self.name == data_utils.split_nebius_path(
3261
+ self.source)[0], (
3262
+ 'Nebius Object Storage is specified as path, the name '
3263
+ 'should be the same as Nebius Object Storage bucket.')
3264
+ assert data_utils.verify_nebius_bucket(self.name), (
3265
+ f'Source specified as {self.source}, a Nebius Object '
3266
+ f'Storage bucket. Nebius Object Storage Bucket should '
3267
+ f'exist.')
3205
3268
  elif self.source.startswith('cos://'):
3206
3269
  assert self.name == data_utils.split_cos_path(self.source)[0], (
3207
3270
  'IBM COS Bucket is specified as path, the name should be '
@@ -3266,6 +3329,8 @@ class R2Store(AbstractStore):
3266
3329
  pass
3267
3330
  elif self.source.startswith('oci://'):
3268
3331
  self._transfer_to_r2()
3332
+ elif self.source.startswith('nebius://'):
3333
+ self._transfer_to_r2()
3269
3334
  else:
3270
3335
  self.batch_aws_rsync([self.source])
3271
3336
  except exceptions.StorageUploadError:
@@ -3392,6 +3457,8 @@ class R2Store(AbstractStore):
3392
3457
  data_transfer.gcs_to_r2(self.name, self.name)
3393
3458
  elif self.source.startswith('s3://'):
3394
3459
  data_transfer.s3_to_r2(self.name, self.name)
3460
+ elif self.source.startswith('nebius://'):
3461
+ data_transfer.s3_to_r2(self.name, self.name)
3395
3462
 
3396
3463
  def _get_bucket(self) -> Tuple[StorageHandle, bool]:
3397
3464
  """Obtains the R2 bucket.
@@ -3651,6 +3718,15 @@ class IBMCosStore(AbstractStore):
3651
3718
  assert data_utils.verify_r2_bucket(self.name), (
3652
3719
  f'Source specified as {self.source}, a R2 bucket. ',
3653
3720
  'R2 Bucket should exist.')
3721
+ elif self.source.startswith('nebius://'):
3722
+ assert self.name == data_utils.split_nebius_path(
3723
+ self.source)[0], (
3724
+ 'Nebius Object Storage is specified as path, the name '
3725
+ 'should be the same as Nebius Object Storage bucket.')
3726
+ assert data_utils.verify_nebius_bucket(self.name), (
3727
+ f'Source specified as {self.source}, a Nebius Object '
3728
+ f'Storage bucket. Nebius Object Storage Bucket should '
3729
+ f'exist.')
3654
3730
  elif self.source.startswith('cos://'):
3655
3731
  assert self.name == data_utils.split_cos_path(self.source)[0], (
3656
3732
  'COS Bucket is specified as path, the name should be '
@@ -3744,6 +3820,9 @@ class IBMCosStore(AbstractStore):
3744
3820
  elif self.source.startswith('s3://'):
3745
3821
  raise Exception('IBM COS currently not supporting'
3746
3822
  'data transfers between COS and S3')
3823
+ elif self.source.startswith('nebius://'):
3824
+ raise Exception('IBM COS currently not supporting'
3825
+ 'data transfers between COS and Nebius')
3747
3826
  elif self.source.startswith('gs://'):
3748
3827
  raise Exception('IBM COS currently not supporting'
3749
3828
  'data transfers between COS and GS')
@@ -4510,3 +4589,450 @@ class OciStore(AbstractStore):
4510
4589
  raise exceptions.StorageBucketDeleteError(
4511
4590
  f'Failed to delete OCI bucket {bucket_name}.')
4512
4591
  return True
4592
+
4593
+
4594
+ class NebiusStore(AbstractStore):
4595
+ """NebiusStore inherits from Storage Object and represents the backend
4596
+ for S3 buckets.
4597
+ """
4598
+
4599
+ _ACCESS_DENIED_MESSAGE = 'Access Denied'
4600
+ _TIMEOUT_TO_PROPAGATES = 20
4601
+
4602
+ def __init__(self,
4603
+ name: str,
4604
+ source: str,
4605
+ region: Optional[str] = None,
4606
+ is_sky_managed: Optional[bool] = None,
4607
+ sync_on_reconstruction: bool = True,
4608
+ _bucket_sub_path: Optional[str] = None):
4609
+ self.client: 'boto3.client.Client'
4610
+ self.bucket: 'StorageHandle'
4611
+ self.region = region if region is not None else nebius.DEFAULT_REGION
4612
+ super().__init__(name, source, region, is_sky_managed,
4613
+ sync_on_reconstruction, _bucket_sub_path)
4614
+
4615
+ def _validate(self):
4616
+ if self.source is not None and isinstance(self.source, str):
4617
+ if self.source.startswith('s3://'):
4618
+ assert self.name == data_utils.split_s3_path(self.source)[0], (
4619
+ 'S3 Bucket is specified as path, the name should be the'
4620
+ ' same as S3 bucket.')
4621
+ elif self.source.startswith('gs://'):
4622
+ assert self.name == data_utils.split_gcs_path(self.source)[0], (
4623
+ 'GCS Bucket is specified as path, the name should be '
4624
+ 'the same as GCS bucket.')
4625
+ assert data_utils.verify_gcs_bucket(self.name), (
4626
+ f'Source specified as {self.source}, a GCS bucket. ',
4627
+ 'GCS Bucket should exist.')
4628
+ elif data_utils.is_az_container_endpoint(self.source):
4629
+ storage_account_name, container_name, _ = (
4630
+ data_utils.split_az_path(self.source))
4631
+ assert self.name == container_name, (
4632
+ 'Azure bucket is specified as path, the name should be '
4633
+ 'the same as Azure bucket.')
4634
+ assert data_utils.verify_az_bucket(
4635
+ storage_account_name, self.name), (
4636
+ f'Source specified as {self.source}, an Azure bucket. '
4637
+ 'Azure bucket should exist.')
4638
+ elif self.source.startswith('r2://'):
4639
+ assert self.name == data_utils.split_r2_path(self.source)[0], (
4640
+ 'R2 Bucket is specified as path, the name should be '
4641
+ 'the same as R2 bucket.')
4642
+ assert data_utils.verify_r2_bucket(self.name), (
4643
+ f'Source specified as {self.source}, a R2 bucket. ',
4644
+ 'R2 Bucket should exist.')
4645
+ elif self.source.startswith('nebius://'):
4646
+ assert self.name == data_utils.split_nebius_path(
4647
+ self.source)[0], (
4648
+ 'Nebius Object Storage is specified as path, the name '
4649
+ 'should be the same as Nebius Object Storage bucket.')
4650
+ elif self.source.startswith('cos://'):
4651
+ assert self.name == data_utils.split_cos_path(self.source)[0], (
4652
+ 'COS Bucket is specified as path, the name should be '
4653
+ 'the same as COS bucket.')
4654
+ assert data_utils.verify_ibm_cos_bucket(self.name), (
4655
+ f'Source specified as {self.source}, a COS bucket. ',
4656
+ 'COS Bucket should exist.')
4657
+ elif self.source.startswith('oci://'):
4658
+ raise NotImplementedError(
4659
+ 'Moving data from OCI to S3 is currently not supported.')
4660
+ # Validate name
4661
+ self.name = S3Store.validate_name(self.name)
4662
+
4663
+ # Check if the storage is enabled
4664
+ if not _is_storage_cloud_enabled(str(clouds.Nebius())):
4665
+ with ux_utils.print_exception_no_traceback():
4666
+ raise exceptions.ResourcesUnavailableError((
4667
+ 'Storage \'store: nebius\' specified, but '
4668
+ 'Nebius access is disabled. To fix, enable '
4669
+ 'Nebius by running `sky check`. More info: '
4670
+ 'https://docs.skypilot.co/en/latest/getting-started/installation.html.' # pylint: disable=line-too-long
4671
+ ))
4672
+
4673
+ def initialize(self):
4674
+ """Initializes the Nebius Object Storage on the cloud.
4675
+
4676
+ Initialization involves fetching bucket if exists, or creating it if
4677
+ it does not.
4678
+
4679
+ Raises:
4680
+ StorageBucketCreateError: If bucket creation fails
4681
+ StorageBucketGetError: If fetching existing bucket fails
4682
+ StorageInitError: If general initialization fails.
4683
+ """
4684
+ self.client = data_utils.create_nebius_client(self.region)
4685
+ self.bucket, is_new_bucket = self._get_bucket()
4686
+ if self.is_sky_managed is None:
4687
+ # If is_sky_managed is not specified, then this is a new storage
4688
+ # object (i.e., did not exist in global_user_state) and we should
4689
+ # set the is_sky_managed property.
4690
+ # If is_sky_managed is specified, then we take no action.
4691
+ self.is_sky_managed = is_new_bucket
4692
+
4693
+ def upload(self):
4694
+ """Uploads source to store bucket.
4695
+
4696
+ Upload must be called by the Storage handler - it is not called on
4697
+ Store initialization.
4698
+
4699
+ Raises:
4700
+ StorageUploadError: if upload fails.
4701
+ """
4702
+ try:
4703
+ if isinstance(self.source, list):
4704
+ self.batch_aws_rsync(self.source, create_dirs=True)
4705
+ elif self.source is not None:
4706
+ if self.source.startswith('nebius://'):
4707
+ pass
4708
+ elif self.source.startswith('s3://'):
4709
+ self._transfer_to_nebius()
4710
+ elif self.source.startswith('gs://'):
4711
+ self._transfer_to_nebius()
4712
+ elif self.source.startswith('r2://'):
4713
+ self._transfer_to_nebius()
4714
+ elif self.source.startswith('oci://'):
4715
+ self._transfer_to_nebius()
4716
+ else:
4717
+ self.batch_aws_rsync([self.source])
4718
+ except exceptions.StorageUploadError:
4719
+ raise
4720
+ except Exception as e:
4721
+ raise exceptions.StorageUploadError(
4722
+ f'Upload failed for store {self.name}') from e
4723
+
4724
+ def delete(self) -> None:
4725
+ if self._bucket_sub_path is not None and not self.is_sky_managed:
4726
+ return self._delete_sub_path()
4727
+
4728
+ deleted_by_skypilot = self._delete_nebius_bucket(self.name)
4729
+ if deleted_by_skypilot:
4730
+ msg_str = f'Deleted Nebius bucket {self.name}.'
4731
+ else:
4732
+ msg_str = (f'Nebius bucket {self.name} may have been deleted '
4733
+ f'externally. Removing from local state.')
4734
+ logger.info(f'{colorama.Fore.GREEN}{msg_str}'
4735
+ f'{colorama.Style.RESET_ALL}')
4736
+
4737
+ def _delete_sub_path(self) -> None:
4738
+ assert self._bucket_sub_path is not None, 'bucket_sub_path is not set'
4739
+ deleted_by_skypilot = self._delete_nebius_bucket_sub_path(
4740
+ self.name, self._bucket_sub_path)
4741
+ if deleted_by_skypilot:
4742
+ msg_str = (f'Removed objects from S3 bucket '
4743
+ f'{self.name}/{self._bucket_sub_path}.')
4744
+ else:
4745
+ msg_str = (f'Failed to remove objects from S3 bucket '
4746
+ f'{self.name}/{self._bucket_sub_path}.')
4747
+ logger.info(f'{colorama.Fore.GREEN}{msg_str}'
4748
+ f'{colorama.Style.RESET_ALL}')
4749
+
4750
+ def get_handle(self) -> StorageHandle:
4751
+ return nebius.resource('s3').Bucket(self.name)
4752
+
4753
+ def batch_aws_rsync(self,
4754
+ source_path_list: List[Path],
4755
+ create_dirs: bool = False) -> None:
4756
+ """Invokes aws s3 sync to batch upload a list of local paths to S3
4757
+
4758
+ AWS Sync by default uses 10 threads to upload files to the bucket. To
4759
+ increase parallelism, modify max_concurrent_requests in your aws config
4760
+ file (Default path: ~/.aws/config).
4761
+
4762
+ Since aws s3 sync does not support batch operations, we construct
4763
+ multiple commands to be run in parallel.
4764
+
4765
+ Args:
4766
+ source_path_list: List of paths to local files or directories
4767
+ create_dirs: If the local_path is a directory and this is set to
4768
+ False, the contents of the directory are directly uploaded to
4769
+ root of the bucket. If the local_path is a directory and this is
4770
+ set to True, the directory is created in the bucket root and
4771
+ contents are uploaded to it.
4772
+ """
4773
+ sub_path = (f'/{self._bucket_sub_path}'
4774
+ if self._bucket_sub_path else '')
4775
+
4776
+ def get_file_sync_command(base_dir_path, file_names):
4777
+ includes = ' '.join([
4778
+ f'--include {shlex.quote(file_name)}'
4779
+ for file_name in file_names
4780
+ ])
4781
+ endpoint_url = nebius.create_endpoint(self.region)
4782
+ base_dir_path = shlex.quote(base_dir_path)
4783
+ sync_command = ('aws s3 sync --no-follow-symlinks --exclude="*" '
4784
+ f'{includes} {base_dir_path} '
4785
+ f's3://{self.name}{sub_path} '
4786
+ f'--endpoint={endpoint_url} '
4787
+ f'--profile={nebius.NEBIUS_PROFILE_NAME}')
4788
+ return sync_command
4789
+
4790
+ def get_dir_sync_command(src_dir_path, dest_dir_name):
4791
+ # we exclude .git directory from the sync
4792
+ excluded_list = storage_utils.get_excluded_files(src_dir_path)
4793
+ excluded_list.append('.git/*')
4794
+ excludes = ' '.join([
4795
+ f'--exclude {shlex.quote(file_name)}'
4796
+ for file_name in excluded_list
4797
+ ])
4798
+ endpoint_url = nebius.create_endpoint(self.region)
4799
+ src_dir_path = shlex.quote(src_dir_path)
4800
+ sync_command = (f'aws s3 sync --no-follow-symlinks {excludes} '
4801
+ f'{src_dir_path} '
4802
+ f's3://{self.name}{sub_path}/{dest_dir_name} '
4803
+ f'--endpoint={endpoint_url} '
4804
+ f'--profile={nebius.NEBIUS_PROFILE_NAME}')
4805
+ return sync_command
4806
+
4807
+ # Generate message for upload
4808
+ if len(source_path_list) > 1:
4809
+ source_message = f'{len(source_path_list)} paths'
4810
+ else:
4811
+ source_message = source_path_list[0]
4812
+
4813
+ log_path = sky_logging.generate_tmp_logging_file_path(
4814
+ _STORAGE_LOG_FILE_NAME)
4815
+ sync_path = f'{source_message} -> nebius://{self.name}{sub_path}/'
4816
+ with rich_utils.safe_status(
4817
+ ux_utils.spinner_message(f'Syncing {sync_path}',
4818
+ log_path=log_path)):
4819
+ data_utils.parallel_upload(
4820
+ source_path_list,
4821
+ get_file_sync_command,
4822
+ get_dir_sync_command,
4823
+ log_path,
4824
+ self.name,
4825
+ self._ACCESS_DENIED_MESSAGE,
4826
+ create_dirs=create_dirs,
4827
+ max_concurrent_uploads=_MAX_CONCURRENT_UPLOADS)
4828
+ logger.info(
4829
+ ux_utils.finishing_message(f'Storage synced: {sync_path}',
4830
+ log_path))
4831
+
4832
+ def _transfer_to_nebius(self) -> None:
4833
+ assert isinstance(self.source, str), self.source
4834
+ if self.source.startswith('gs://'):
4835
+ data_transfer.gcs_to_nebius(self.name, self.name)
4836
+ elif self.source.startswith('r2://'):
4837
+ data_transfer.r2_to_nebius(self.name, self.name)
4838
+ elif self.source.startswith('s3://'):
4839
+ data_transfer.s3_to_nebius(self.name, self.name)
4840
+
4841
+ def _get_bucket(self) -> Tuple[StorageHandle, bool]:
4842
+ """Obtains the S3 bucket.
4843
+
4844
+ If the bucket exists, this method will return the bucket.
4845
+ If the bucket does not exist, there are three cases:
4846
+ 1) Raise an error if the bucket source starts with s3://
4847
+ 2) Return None if bucket has been externally deleted and
4848
+ sync_on_reconstruction is False
4849
+ 3) Create and return a new bucket otherwise
4850
+
4851
+ Raises:
4852
+ StorageSpecError: If externally created bucket is attempted to be
4853
+ mounted without specifying storage source.
4854
+ StorageBucketCreateError: If creating the bucket fails
4855
+ StorageBucketGetError: If fetching a bucket fails
4856
+ StorageExternalDeletionError: If externally deleted storage is
4857
+ attempted to be fetched while reconstructing the storage for
4858
+ 'sky storage delete' or 'sky start'
4859
+ """
4860
+ nebius_s = nebius.resource('s3')
4861
+ bucket = nebius_s.Bucket(self.name)
4862
+ endpoint_url = nebius.create_endpoint(self.region)
4863
+ try:
4864
+ # Try Public bucket case.
4865
+ # This line does not error out if the bucket is an external public
4866
+ # bucket or if it is a user's bucket that is publicly
4867
+ # accessible.
4868
+ self.client.head_bucket(Bucket=self.name)
4869
+ self._validate_existing_bucket()
4870
+ return bucket, False
4871
+ except aws.botocore_exceptions().ClientError as e:
4872
+ error_code = e.response['Error']['Code']
4873
+ # AccessDenied error for buckets that are private and not owned by
4874
+ # user.
4875
+ if error_code == '403':
4876
+ command = (f'aws s3 ls s3://{self.name} '
4877
+ f'--endpoint={endpoint_url} '
4878
+ f'--profile={nebius.NEBIUS_PROFILE_NAME}')
4879
+ with ux_utils.print_exception_no_traceback():
4880
+ raise exceptions.StorageBucketGetError(
4881
+ _BUCKET_FAIL_TO_CONNECT_MESSAGE.format(name=self.name) +
4882
+ f' To debug, consider running `{command}`.') from e
4883
+
4884
+ if isinstance(self.source, str) and self.source.startswith('nebius://'):
4885
+ with ux_utils.print_exception_no_traceback():
4886
+ raise exceptions.StorageBucketGetError(
4887
+ 'Attempted to use a non-existent bucket as a source: '
4888
+ f'{self.source}. Consider using `aws s3 ls '
4889
+ f'{self.source} --endpoint={endpoint_url}`'
4890
+ f'--profile={nebius.NEBIUS_PROFILE_NAME} to debug.')
4891
+
4892
+ # If bucket cannot be found in both private and public settings,
4893
+ # the bucket is to be created by Sky. However, creation is skipped if
4894
+ # Store object is being reconstructed for deletion or re-mount with
4895
+ # sky start, and error is raised instead.
4896
+ if self.sync_on_reconstruction:
4897
+ bucket = self._create_nebius_bucket(self.name, self.region)
4898
+ return bucket, True
4899
+ else:
4900
+ # Raised when Storage object is reconstructed for sky storage
4901
+ # delete or to re-mount Storages with sky start but the storage
4902
+ # is already removed externally.
4903
+ raise exceptions.StorageExternalDeletionError(
4904
+ 'Attempted to fetch a non-existent bucket: '
4905
+ f'{self.name}')
4906
+
4907
+ def _download_file(self, remote_path: str, local_path: str) -> None:
4908
+ """Downloads file from remote to local on s3 bucket
4909
+ using the boto3 API
4910
+
4911
+ Args:
4912
+ remote_path: str; Remote path on S3 bucket
4913
+ local_path: str; Local path on user's device
4914
+ """
4915
+ self.bucket.download_file(remote_path, local_path)
4916
+
4917
+ def mount_command(self, mount_path: str) -> str:
4918
+ """Returns the command to mount the bucket to the mount_path.
4919
+
4920
+ Uses goofys to mount the bucket.
4921
+
4922
+ Args:
4923
+ mount_path: str; Path to mount the bucket to.
4924
+ """
4925
+ install_cmd = mounting_utils.get_s3_mount_install_cmd()
4926
+ endpoint_url = nebius.create_endpoint(self.region)
4927
+ nebius_profile_name = nebius.NEBIUS_PROFILE_NAME
4928
+ mount_cmd = mounting_utils.get_nebius_mount_cmd(nebius_profile_name,
4929
+ endpoint_url,
4930
+ self.bucket.name,
4931
+ mount_path,
4932
+ self._bucket_sub_path)
4933
+ return mounting_utils.get_mounting_command(mount_path, install_cmd,
4934
+ mount_cmd)
4935
+
4936
+ def _create_nebius_bucket(self,
4937
+ bucket_name: str,
4938
+ region='auto') -> StorageHandle:
4939
+ """Creates S3 bucket with specific name in specific region
4940
+
4941
+ Args:
4942
+ bucket_name: str; Name of bucket
4943
+ region: str; Region name, e.g. us-west-1, us-east-2
4944
+ Raises:
4945
+ StorageBucketCreateError: If bucket creation fails.
4946
+ """
4947
+ nebius_client = self.client
4948
+ try:
4949
+ if region is None:
4950
+ nebius_client.create_bucket(Bucket=bucket_name)
4951
+ else:
4952
+ location = {'LocationConstraint': region}
4953
+ nebius_client.create_bucket(Bucket=bucket_name,
4954
+ CreateBucketConfiguration=location)
4955
+ logger.info(f' {colorama.Style.DIM}Created Nebius bucket '
4956
+ f'{bucket_name!r} in {region}'
4957
+ f'{colorama.Style.RESET_ALL}')
4958
+ except aws.botocore_exceptions().ClientError as e:
4959
+ with ux_utils.print_exception_no_traceback():
4960
+ raise exceptions.StorageBucketCreateError(
4961
+ f'Attempted to create a bucket '
4962
+ f'{self.name} but failed.') from e
4963
+ return nebius.resource('s3').Bucket(bucket_name)
4964
+
4965
+ def _execute_nebius_remove_command(self, command: str, bucket_name: str,
4966
+ hint_operating: str,
4967
+ hint_failed: str) -> bool:
4968
+ try:
4969
+ with rich_utils.safe_status(
4970
+ ux_utils.spinner_message(hint_operating)):
4971
+ subprocess.check_output(command.split(' '),
4972
+ stderr=subprocess.STDOUT)
4973
+ except subprocess.CalledProcessError as e:
4974
+ if 'NoSuchBucket' in e.output.decode('utf-8'):
4975
+ logger.debug(
4976
+ _BUCKET_EXTERNALLY_DELETED_DEBUG_MESSAGE.format(
4977
+ bucket_name=bucket_name))
4978
+ return False
4979
+ else:
4980
+ with ux_utils.print_exception_no_traceback():
4981
+ raise exceptions.StorageBucketDeleteError(
4982
+ f'{hint_failed}'
4983
+ f'Detailed error: {e.output}')
4984
+ return True
4985
+
4986
+ def _delete_nebius_bucket(self, bucket_name: str) -> bool:
4987
+ """Deletes S3 bucket, including all objects in bucket
4988
+
4989
+ Args:
4990
+ bucket_name: str; Name of bucket
4991
+
4992
+ Returns:
4993
+ bool; True if bucket was deleted, False if it was deleted externally.
4994
+
4995
+ Raises:
4996
+ StorageBucketDeleteError: If deleting the bucket fails.
4997
+ """
4998
+ # Deleting objects is very slow programatically
4999
+ # (i.e. bucket.objects.all().delete() is slow).
5000
+ # In addition, standard delete operations (i.e. via `aws s3 rm`)
5001
+ # are slow, since AWS puts deletion markers.
5002
+ # https://stackoverflow.com/questions/49239351/why-is-it-so-much-slower-to-delete-objects-in-aws-s3-than-it-is-to-create-them
5003
+ # The fastest way to delete is to run `aws s3 rb --force`,
5004
+ # which removes the bucket by force.
5005
+ endpoint_url = nebius.create_endpoint(self.region)
5006
+ remove_command = (f'aws s3 rb s3://{bucket_name} --force '
5007
+ f'--endpoint {endpoint_url} '
5008
+ f'--profile={nebius.NEBIUS_PROFILE_NAME}')
5009
+
5010
+ success = self._execute_nebius_remove_command(
5011
+ remove_command, bucket_name,
5012
+ f'Deleting Nebius bucket {bucket_name}',
5013
+ f'Failed to delete Nebius bucket {bucket_name}.')
5014
+ if not success:
5015
+ return False
5016
+
5017
+ # Wait until bucket deletion propagates on Nebius servers
5018
+ start_time = time.time()
5019
+ while data_utils.verify_nebius_bucket(bucket_name):
5020
+ if time.time() - start_time > self._TIMEOUT_TO_PROPAGATES:
5021
+ raise TimeoutError(
5022
+ f'Timeout while verifying {bucket_name} Nebius bucket.')
5023
+ time.sleep(0.1)
5024
+ return True
5025
+
5026
+ def _delete_nebius_bucket_sub_path(self, bucket_name: str,
5027
+ sub_path: str) -> bool:
5028
+ """Deletes the sub path from the bucket."""
5029
+ endpoint_url = nebius.create_endpoint(self.region)
5030
+ remove_command = (
5031
+ f'aws s3 rm s3://{bucket_name}/{sub_path}/ --recursive '
5032
+ f'--endpoint {endpoint_url} '
5033
+ f'--profile={nebius.NEBIUS_PROFILE_NAME}')
5034
+ return self._execute_nebius_remove_command(
5035
+ remove_command, bucket_name, f'Removing objects from '
5036
+ f'Nebius bucket {bucket_name}/{sub_path}',
5037
+ f'Failed to remove objects from '
5038
+ f'Nebius bucket {bucket_name}/{sub_path}.')