skypilot-nightly 1.0.0.dev20250728__py3-none-any.whl → 1.0.0.dev20250730__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of skypilot-nightly might be problematic. Click here for more details.
- sky/__init__.py +2 -2
- sky/catalog/kubernetes_catalog.py +2 -2
- sky/client/cli/command.py +0 -7
- sky/client/common.py +12 -9
- sky/clouds/kubernetes.py +2 -1
- sky/clouds/nebius.py +1 -1
- sky/clouds/utils/gcp_utils.py +1 -1
- sky/clouds/vast.py +1 -2
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/_next/static/_r2LwCFLjlWjZDUIJQG_V/_buildManifest.js +1 -0
- sky/dashboard/out/_next/static/chunks/1043-928582d4860fef92.js +1 -0
- sky/dashboard/out/_next/static/chunks/1141-3f10a5a9f697c630.js +11 -0
- sky/dashboard/out/_next/static/chunks/1559-6c00e20454194859.js +30 -0
- sky/dashboard/out/_next/static/chunks/1664-22b00e32c9ff96a4.js +1 -0
- sky/dashboard/out/_next/static/chunks/1871-1df8b686a51f3e3a.js +6 -0
- sky/dashboard/out/_next/static/chunks/2003.f90b06bb1f914295.js +1 -0
- sky/dashboard/out/_next/static/chunks/2350.fab69e61bac57b23.js +1 -0
- sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +15 -0
- sky/dashboard/out/_next/static/chunks/2641.142718b6b78a6f9b.js +1 -0
- sky/dashboard/out/_next/static/chunks/3698-7874720877646365.js +1 -0
- sky/dashboard/out/_next/static/chunks/3785.95524bc443db8260.js +1 -0
- sky/dashboard/out/_next/static/chunks/3937.210053269f121201.js +1 -0
- sky/dashboard/out/_next/static/chunks/4725.42f21f250f91f65b.js +1 -0
- sky/dashboard/out/_next/static/chunks/4869.18e6a4361a380763.js +16 -0
- sky/dashboard/out/_next/static/chunks/4937.d6bf67771e353356.js +15 -0
- sky/dashboard/out/_next/static/chunks/5230-f3bb2663e442e86c.js +1 -0
- sky/dashboard/out/_next/static/chunks/5739-d67458fcb1386c92.js +8 -0
- sky/dashboard/out/_next/static/chunks/6135-d0e285ac5f3f2485.js +1 -0
- sky/dashboard/out/_next/static/chunks/616-3d59f75e2ccf9321.js +39 -0
- sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +13 -0
- sky/dashboard/out/_next/static/chunks/6601-234b1cf963c7280b.js +1 -0
- sky/dashboard/out/_next/static/chunks/691.6d99cbfba347cebf.js +55 -0
- sky/dashboard/out/_next/static/chunks/6989-983d3ae7a874de98.js +1 -0
- sky/dashboard/out/_next/static/chunks/6990-08b2a1cae076a943.js +1 -0
- sky/dashboard/out/_next/static/chunks/7411-b15471acd2cba716.js +41 -0
- sky/dashboard/out/_next/static/chunks/8969-9a8cca241b30db83.js +1 -0
- sky/dashboard/out/_next/static/chunks/9025.7937c16bc8623516.js +6 -0
- sky/dashboard/out/_next/static/chunks/938-40d15b6261ec8dc1.js +1 -0
- sky/dashboard/out/_next/static/chunks/9847.4c46c5e229c78704.js +30 -0
- sky/dashboard/out/_next/static/chunks/9984.78ee6d2c6fa4b0e8.js +1 -0
- sky/dashboard/out/_next/static/chunks/fd9d1056-86323a29a8f7e46a.js +1 -0
- sky/dashboard/out/_next/static/chunks/framework-cf60a09ccd051a10.js +33 -0
- sky/dashboard/out/_next/static/chunks/main-app-587214043926b3cc.js +1 -0
- sky/dashboard/out/_next/static/chunks/main-f15ccb73239a3bf1.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/_app-a67ae198457b9886.js +34 -0
- sky/dashboard/out/_next/static/chunks/pages/_error-c66a4e8afc46f17b.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-fa63e8b1d203f298.js +11 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-665fa5d96dd41d67.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters-956ad430075efee8.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/config-8620d099cbef8608.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/index-444f1804401f04ea.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/infra/[context]-9cfd875eecb6eaf5.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/infra-0fbdc9072f19fbe2.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-b25c109d6e41bcf4.js +11 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs-6393a9edc7322b54.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/users-34d6bb10c3b3ee3d.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/volumes-225c8dae0634eb7f.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspace/new-92f741084a89e27b.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-4d41c9023287f59a.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspaces-e4cb7e97d37e93ad.js +1 -0
- sky/dashboard/out/_next/static/chunks/webpack-5adfc4d4b3db6f71.js +1 -0
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/config.html +1 -1
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra/[context].html +1 -1
- sky/dashboard/out/infra.html +1 -1
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs.html +1 -1
- sky/dashboard/out/users.html +1 -1
- sky/dashboard/out/volumes.html +1 -1
- sky/dashboard/out/workspace/new.html +1 -1
- sky/dashboard/out/workspaces/[name].html +1 -1
- sky/dashboard/out/workspaces.html +1 -1
- sky/data/data_utils.py +25 -0
- sky/data/storage.py +1219 -1775
- sky/global_user_state.py +18 -8
- sky/jobs/server/core.py +4 -1
- sky/jobs/state.py +35 -7
- sky/jobs/utils.py +35 -17
- sky/logs/agent.py +0 -14
- sky/logs/aws.py +4 -30
- sky/provision/kubernetes/instance.py +4 -3
- sky/provision/kubernetes/utils.py +56 -31
- sky/provision/vast/instance.py +2 -1
- sky/provision/vast/utils.py +9 -6
- sky/resources.py +8 -2
- sky/serve/server/core.py +21 -2
- sky/serve/service.py +22 -2
- sky/server/server.py +7 -2
- sky/templates/sky-serve-controller.yaml.j2 +3 -0
- sky/utils/kubernetes/gpu_labeler.py +2 -2
- sky/utils/schemas.py +5 -1
- {skypilot_nightly-1.0.0.dev20250728.dist-info → skypilot_nightly-1.0.0.dev20250730.dist-info}/METADATA +1 -1
- {skypilot_nightly-1.0.0.dev20250728.dist-info → skypilot_nightly-1.0.0.dev20250730.dist-info}/RECORD +101 -100
- sky/dashboard/out/_next/static/chunks/1043-869d9c78bf5dd3df.js +0 -1
- sky/dashboard/out/_next/static/chunks/1141-e49a159c30a6c4a7.js +0 -11
- sky/dashboard/out/_next/static/chunks/1559-18717d96ef2fcbe9.js +0 -30
- sky/dashboard/out/_next/static/chunks/1664-d65361e92b85e786.js +0 -1
- sky/dashboard/out/_next/static/chunks/1871-ea0e7283886407ca.js +0 -6
- sky/dashboard/out/_next/static/chunks/2003.b82e6db40ec4c463.js +0 -1
- sky/dashboard/out/_next/static/chunks/2350.23778a2b19aabd33.js +0 -1
- sky/dashboard/out/_next/static/chunks/2369.2d6e4757f8dfc2b7.js +0 -15
- sky/dashboard/out/_next/static/chunks/2641.74c19c4d45a2c034.js +0 -1
- sky/dashboard/out/_next/static/chunks/3698-9fa11dafb5cad4a6.js +0 -1
- sky/dashboard/out/_next/static/chunks/3785.59705416215ff08b.js +0 -1
- sky/dashboard/out/_next/static/chunks/3937.d7f1c55d1916c7f2.js +0 -1
- sky/dashboard/out/_next/static/chunks/4725.66125dcd9832aa5d.js +0 -1
- sky/dashboard/out/_next/static/chunks/4869.da729a7db3a31f43.js +0 -16
- sky/dashboard/out/_next/static/chunks/4937.d75809403fc264ac.js +0 -15
- sky/dashboard/out/_next/static/chunks/5230-df791914b54d91d9.js +0 -1
- sky/dashboard/out/_next/static/chunks/5739-5ea3ffa10fc884f2.js +0 -8
- sky/dashboard/out/_next/static/chunks/6135-2abbd0352f8ee061.js +0 -1
- sky/dashboard/out/_next/static/chunks/616-162f3033ffcd3d31.js +0 -39
- sky/dashboard/out/_next/static/chunks/6601-d4a381403a8bae91.js +0 -1
- sky/dashboard/out/_next/static/chunks/691.488b4aef97c28727.js +0 -55
- sky/dashboard/out/_next/static/chunks/6989-eab0e9c16b64fd9f.js +0 -1
- sky/dashboard/out/_next/static/chunks/6990-f64e03df359e04f7.js +0 -1
- sky/dashboard/out/_next/static/chunks/7411-2cc31dc0fdf2a9ad.js +0 -41
- sky/dashboard/out/_next/static/chunks/8969-8e0b2055bf5dd499.js +0 -1
- sky/dashboard/out/_next/static/chunks/9025.4a9099bdf3ed4875.js +0 -6
- sky/dashboard/out/_next/static/chunks/938-7ee806653aef0609.js +0 -1
- sky/dashboard/out/_next/static/chunks/9847.387abf8a14d722db.js +0 -30
- sky/dashboard/out/_next/static/chunks/9984.0460de9d3adf5582.js +0 -1
- sky/dashboard/out/_next/static/chunks/fd9d1056-61f2257a9cd8b32b.js +0 -1
- sky/dashboard/out/_next/static/chunks/framework-efc06c2733009cd3.js +0 -33
- sky/dashboard/out/_next/static/chunks/main-app-68c028b1bc5e1b72.js +0 -1
- sky/dashboard/out/_next/static/chunks/main-c0a4f1ea606d48d2.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/_app-da491665d4289aae.js +0 -34
- sky/dashboard/out/_next/static/chunks/pages/_error-c72a1f77a3c0be1b.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-2186770cc2de1623.js +0 -11
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-95afb019ab85801c.js +0 -6
- sky/dashboard/out/_next/static/chunks/pages/clusters-3d4be4961e1c94eb.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/config-a2673b256b6d416f.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/index-89e7daf7b7df02e0.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/infra/[context]-a90b4fe4616dc501.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/infra-0d3d1f890c5d188a.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-dc0299ffefebcdbe.js +0 -16
- sky/dashboard/out/_next/static/chunks/pages/jobs-49f790d12a85027c.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/users-6790fcefd5487b13.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/volumes-61ea7ba7e56f8d06.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/workspace/new-5629d4e551dba1ee.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-6bcd4b20914d76c9.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/workspaces-5f7fe4b7d55b8612.js +0 -1
- sky/dashboard/out/_next/static/chunks/webpack-a305898dc479711e.js +0 -1
- sky/dashboard/out/_next/static/ucBqsWPN0A5D2kXj8-FqQ/_buildManifest.js +0 -1
- /sky/dashboard/out/_next/static/{ucBqsWPN0A5D2kXj8-FqQ → _r2LwCFLjlWjZDUIJQG_V}/_ssgManifest.js +0 -0
- {skypilot_nightly-1.0.0.dev20250728.dist-info → skypilot_nightly-1.0.0.dev20250730.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20250728.dist-info → skypilot_nightly-1.0.0.dev20250730.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250728.dist-info → skypilot_nightly-1.0.0.dev20250730.dist-info}/licenses/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20250728.dist-info → skypilot_nightly-1.0.0.dev20250730.dist-info}/top_level.txt +0 -0
sky/data/storage.py
CHANGED
|
@@ -1,4 +1,6 @@
|
|
|
1
1
|
"""Storage and Store Classes for Sky Data."""
|
|
2
|
+
from abc import abstractmethod
|
|
3
|
+
from dataclasses import dataclass
|
|
2
4
|
import enum
|
|
3
5
|
import hashlib
|
|
4
6
|
import os
|
|
@@ -7,7 +9,7 @@ import shlex
|
|
|
7
9
|
import subprocess
|
|
8
10
|
import time
|
|
9
11
|
import typing
|
|
10
|
-
from typing import Any, Dict, List, Optional, Tuple, Type, Union
|
|
12
|
+
from typing import Any, Callable, Dict, List, Optional, Tuple, Type, Union
|
|
11
13
|
import urllib.parse
|
|
12
14
|
|
|
13
15
|
import colorama
|
|
@@ -126,40 +128,68 @@ class StoreType(enum.Enum):
|
|
|
126
128
|
NEBIUS = 'NEBIUS'
|
|
127
129
|
VOLUME = 'VOLUME'
|
|
128
130
|
|
|
131
|
+
@classmethod
|
|
132
|
+
def _get_s3_compatible_store_by_cloud(cls,
|
|
133
|
+
cloud_name: str) -> Optional[str]:
|
|
134
|
+
"""Get S3-compatible store type by cloud name."""
|
|
135
|
+
for store_type, store_class in _S3_COMPATIBLE_STORES.items():
|
|
136
|
+
config = store_class.get_config()
|
|
137
|
+
if config.cloud_name.lower() == cloud_name:
|
|
138
|
+
return store_type
|
|
139
|
+
return None
|
|
140
|
+
|
|
141
|
+
@classmethod
|
|
142
|
+
def _get_s3_compatible_config(
|
|
143
|
+
cls, store_type: str) -> Optional['S3CompatibleConfig']:
|
|
144
|
+
"""Get S3-compatible store configuration by store type."""
|
|
145
|
+
store_class = _S3_COMPATIBLE_STORES.get(store_type)
|
|
146
|
+
if store_class:
|
|
147
|
+
return store_class.get_config()
|
|
148
|
+
return None
|
|
149
|
+
|
|
150
|
+
@classmethod
|
|
151
|
+
def find_s3_compatible_config_by_prefix(
|
|
152
|
+
cls, source: str) -> Optional['StoreType']:
|
|
153
|
+
"""Get S3-compatible store type by URL prefix."""
|
|
154
|
+
for store_type, store_class in _S3_COMPATIBLE_STORES.items():
|
|
155
|
+
config = store_class.get_config()
|
|
156
|
+
if source.startswith(config.url_prefix):
|
|
157
|
+
return StoreType(store_type)
|
|
158
|
+
return None
|
|
159
|
+
|
|
129
160
|
@classmethod
|
|
130
161
|
def from_cloud(cls, cloud: str) -> 'StoreType':
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
elif cloud.lower() == str(clouds.GCP()).lower():
|
|
162
|
+
cloud_lower = cloud.lower()
|
|
163
|
+
if cloud_lower == str(clouds.GCP()).lower():
|
|
134
164
|
return StoreType.GCS
|
|
135
|
-
elif
|
|
165
|
+
elif cloud_lower == str(clouds.IBM()).lower():
|
|
136
166
|
return StoreType.IBM
|
|
137
|
-
elif
|
|
138
|
-
return StoreType.R2
|
|
139
|
-
elif cloud.lower() == str(clouds.Azure()).lower():
|
|
167
|
+
elif cloud_lower == str(clouds.Azure()).lower():
|
|
140
168
|
return StoreType.AZURE
|
|
141
|
-
elif
|
|
169
|
+
elif cloud_lower == str(clouds.OCI()).lower():
|
|
142
170
|
return StoreType.OCI
|
|
143
|
-
elif
|
|
144
|
-
return StoreType.NEBIUS
|
|
145
|
-
elif cloud.lower() == str(clouds.Lambda()).lower():
|
|
171
|
+
elif cloud_lower == str(clouds.Lambda()).lower():
|
|
146
172
|
with ux_utils.print_exception_no_traceback():
|
|
147
173
|
raise ValueError('Lambda Cloud does not provide cloud storage.')
|
|
148
|
-
elif
|
|
174
|
+
elif cloud_lower == str(clouds.SCP()).lower():
|
|
149
175
|
with ux_utils.print_exception_no_traceback():
|
|
150
176
|
raise ValueError('SCP does not provide cloud storage.')
|
|
177
|
+
else:
|
|
178
|
+
s3_store_type = cls._get_s3_compatible_store_by_cloud(cloud_lower)
|
|
179
|
+
if s3_store_type:
|
|
180
|
+
return cls(s3_store_type)
|
|
151
181
|
|
|
152
182
|
raise ValueError(f'Unsupported cloud for StoreType: {cloud}')
|
|
153
183
|
|
|
154
184
|
def to_cloud(self) -> str:
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
185
|
+
config = self._get_s3_compatible_config(self.value)
|
|
186
|
+
if config:
|
|
187
|
+
return config.cloud_name
|
|
188
|
+
|
|
189
|
+
if self == StoreType.GCS:
|
|
158
190
|
return str(clouds.GCP())
|
|
159
191
|
elif self == StoreType.AZURE:
|
|
160
192
|
return str(clouds.Azure())
|
|
161
|
-
elif self == StoreType.R2:
|
|
162
|
-
return cloudflare.NAME
|
|
163
193
|
elif self == StoreType.IBM:
|
|
164
194
|
return str(clouds.IBM())
|
|
165
195
|
elif self == StoreType.OCI:
|
|
@@ -169,41 +199,34 @@ class StoreType(enum.Enum):
|
|
|
169
199
|
|
|
170
200
|
@classmethod
|
|
171
201
|
def from_store(cls, store: 'AbstractStore') -> 'StoreType':
|
|
172
|
-
if isinstance(store,
|
|
173
|
-
return
|
|
174
|
-
|
|
202
|
+
if isinstance(store, S3CompatibleStore):
|
|
203
|
+
return cls(store.get_store_type())
|
|
204
|
+
|
|
205
|
+
if isinstance(store, GcsStore):
|
|
175
206
|
return StoreType.GCS
|
|
176
207
|
elif isinstance(store, AzureBlobStore):
|
|
177
208
|
return StoreType.AZURE
|
|
178
|
-
elif isinstance(store, R2Store):
|
|
179
|
-
return StoreType.R2
|
|
180
209
|
elif isinstance(store, IBMCosStore):
|
|
181
210
|
return StoreType.IBM
|
|
182
211
|
elif isinstance(store, OciStore):
|
|
183
212
|
return StoreType.OCI
|
|
184
|
-
elif isinstance(store, NebiusStore):
|
|
185
|
-
return StoreType.NEBIUS
|
|
186
213
|
else:
|
|
187
214
|
with ux_utils.print_exception_no_traceback():
|
|
188
215
|
raise ValueError(f'Unknown store type: {store}')
|
|
189
216
|
|
|
190
217
|
def store_prefix(self) -> str:
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
218
|
+
config = self._get_s3_compatible_config(self.value)
|
|
219
|
+
if config:
|
|
220
|
+
return config.url_prefix
|
|
221
|
+
|
|
222
|
+
if self == StoreType.GCS:
|
|
194
223
|
return 'gs://'
|
|
195
224
|
elif self == StoreType.AZURE:
|
|
196
225
|
return 'https://'
|
|
197
|
-
# R2 storages use 's3://' as a prefix for various aws cli commands
|
|
198
|
-
elif self == StoreType.R2:
|
|
199
|
-
return 'r2://'
|
|
200
226
|
elif self == StoreType.IBM:
|
|
201
227
|
return 'cos://'
|
|
202
228
|
elif self == StoreType.OCI:
|
|
203
229
|
return 'oci://'
|
|
204
|
-
# Nebius storages use 's3://' as a prefix for various aws cli commands
|
|
205
|
-
elif self == StoreType.NEBIUS:
|
|
206
|
-
return 'nebius://'
|
|
207
230
|
else:
|
|
208
231
|
with ux_utils.print_exception_no_traceback():
|
|
209
232
|
raise ValueError(f'Unknown store type: {self}')
|
|
@@ -252,12 +275,20 @@ class StoreType(enum.Enum):
|
|
|
252
275
|
elif store_type == StoreType.IBM:
|
|
253
276
|
bucket_name, sub_path, region = data_utils.split_cos_path(
|
|
254
277
|
store_url)
|
|
255
|
-
elif store_type == StoreType.R2:
|
|
256
|
-
bucket_name, sub_path = data_utils.split_r2_path(store_url)
|
|
257
278
|
elif store_type == StoreType.GCS:
|
|
258
279
|
bucket_name, sub_path = data_utils.split_gcs_path(store_url)
|
|
259
|
-
|
|
260
|
-
|
|
280
|
+
else:
|
|
281
|
+
# Check compatible stores
|
|
282
|
+
for compatible_store_type, store_class in \
|
|
283
|
+
_S3_COMPATIBLE_STORES.items():
|
|
284
|
+
if store_type.value == compatible_store_type:
|
|
285
|
+
config = store_class.get_config()
|
|
286
|
+
bucket_name, sub_path = config.split_path(store_url)
|
|
287
|
+
break
|
|
288
|
+
else:
|
|
289
|
+
# If we get here, it's an unknown S3-compatible store
|
|
290
|
+
raise ValueError(
|
|
291
|
+
f'Unknown S3-compatible store type: {store_type}')
|
|
261
292
|
return store_type, bucket_name, \
|
|
262
293
|
sub_path, storage_account_name, region
|
|
263
294
|
raise ValueError(f'Unknown store URL: {store_url}')
|
|
@@ -752,20 +783,19 @@ class Storage(object):
|
|
|
752
783
|
# If source is a pre-existing bucket, connect to the bucket
|
|
753
784
|
# If the bucket does not exist, this will error out
|
|
754
785
|
if isinstance(self.source, str):
|
|
755
|
-
if self.source.startswith('
|
|
756
|
-
self.add_store(StoreType.S3)
|
|
757
|
-
elif self.source.startswith('gs://'):
|
|
786
|
+
if self.source.startswith('gs://'):
|
|
758
787
|
self.add_store(StoreType.GCS)
|
|
759
788
|
elif data_utils.is_az_container_endpoint(self.source):
|
|
760
789
|
self.add_store(StoreType.AZURE)
|
|
761
|
-
elif self.source.startswith('r2://'):
|
|
762
|
-
self.add_store(StoreType.R2)
|
|
763
790
|
elif self.source.startswith('cos://'):
|
|
764
791
|
self.add_store(StoreType.IBM)
|
|
765
792
|
elif self.source.startswith('oci://'):
|
|
766
793
|
self.add_store(StoreType.OCI)
|
|
767
|
-
|
|
768
|
-
|
|
794
|
+
|
|
795
|
+
store_type = StoreType.find_s3_compatible_config_by_prefix(
|
|
796
|
+
self.source)
|
|
797
|
+
if store_type:
|
|
798
|
+
self.add_store(store_type)
|
|
769
799
|
|
|
770
800
|
def get_bucket_sub_path_prefix(self, blob_path: str) -> str:
|
|
771
801
|
"""Adds the bucket sub path prefix to the blob path."""
|
|
@@ -981,12 +1011,25 @@ class Storage(object):
|
|
|
981
1011
|
# When initializing from global_user_state, we override the
|
|
982
1012
|
# source from the YAML
|
|
983
1013
|
try:
|
|
984
|
-
if s_type
|
|
1014
|
+
if s_type.value in _S3_COMPATIBLE_STORES:
|
|
1015
|
+
store_class = _S3_COMPATIBLE_STORES[s_type.value]
|
|
1016
|
+
store = store_class.from_metadata(
|
|
1017
|
+
s_metadata,
|
|
1018
|
+
source=self.source,
|
|
1019
|
+
sync_on_reconstruction=self.sync_on_reconstruction,
|
|
1020
|
+
_bucket_sub_path=self._bucket_sub_path)
|
|
1021
|
+
elif s_type == StoreType.S3:
|
|
985
1022
|
store = S3Store.from_metadata(
|
|
986
1023
|
s_metadata,
|
|
987
1024
|
source=self.source,
|
|
988
1025
|
sync_on_reconstruction=self.sync_on_reconstruction,
|
|
989
1026
|
_bucket_sub_path=self._bucket_sub_path)
|
|
1027
|
+
elif s_type == StoreType.R2:
|
|
1028
|
+
store = R2Store.from_metadata(
|
|
1029
|
+
s_metadata,
|
|
1030
|
+
source=self.source,
|
|
1031
|
+
sync_on_reconstruction=self.sync_on_reconstruction,
|
|
1032
|
+
_bucket_sub_path=self._bucket_sub_path)
|
|
990
1033
|
elif s_type == StoreType.GCS:
|
|
991
1034
|
store = GcsStore.from_metadata(
|
|
992
1035
|
s_metadata,
|
|
@@ -1001,12 +1044,6 @@ class Storage(object):
|
|
|
1001
1044
|
source=self.source,
|
|
1002
1045
|
sync_on_reconstruction=self.sync_on_reconstruction,
|
|
1003
1046
|
_bucket_sub_path=self._bucket_sub_path)
|
|
1004
|
-
elif s_type == StoreType.R2:
|
|
1005
|
-
store = R2Store.from_metadata(
|
|
1006
|
-
s_metadata,
|
|
1007
|
-
source=self.source,
|
|
1008
|
-
sync_on_reconstruction=self.sync_on_reconstruction,
|
|
1009
|
-
_bucket_sub_path=self._bucket_sub_path)
|
|
1010
1047
|
elif s_type == StoreType.IBM:
|
|
1011
1048
|
store = IBMCosStore.from_metadata(
|
|
1012
1049
|
s_metadata,
|
|
@@ -1107,20 +1144,17 @@ class Storage(object):
|
|
|
1107
1144
|
return store
|
|
1108
1145
|
|
|
1109
1146
|
store_cls: Type[AbstractStore]
|
|
1110
|
-
if
|
|
1111
|
-
|
|
1147
|
+
# First check if it's a registered S3-compatible store
|
|
1148
|
+
if store_type.value in _S3_COMPATIBLE_STORES:
|
|
1149
|
+
store_cls = _S3_COMPATIBLE_STORES[store_type.value]
|
|
1112
1150
|
elif store_type == StoreType.GCS:
|
|
1113
1151
|
store_cls = GcsStore
|
|
1114
1152
|
elif store_type == StoreType.AZURE:
|
|
1115
1153
|
store_cls = AzureBlobStore
|
|
1116
|
-
elif store_type == StoreType.R2:
|
|
1117
|
-
store_cls = R2Store
|
|
1118
1154
|
elif store_type == StoreType.IBM:
|
|
1119
1155
|
store_cls = IBMCosStore
|
|
1120
1156
|
elif store_type == StoreType.OCI:
|
|
1121
1157
|
store_cls = OciStore
|
|
1122
|
-
elif store_type == StoreType.NEBIUS:
|
|
1123
|
-
store_cls = NebiusStore
|
|
1124
1158
|
else:
|
|
1125
1159
|
with ux_utils.print_exception_no_traceback():
|
|
1126
1160
|
raise exceptions.StorageSpecError(
|
|
@@ -1344,101 +1378,261 @@ class Storage(object):
|
|
|
1344
1378
|
return config
|
|
1345
1379
|
|
|
1346
1380
|
|
|
1347
|
-
|
|
1348
|
-
|
|
1349
|
-
|
|
1381
|
+
# Registry for S3-compatible stores
|
|
1382
|
+
_S3_COMPATIBLE_STORES = {}
|
|
1383
|
+
|
|
1384
|
+
|
|
1385
|
+
def register_s3_compatible_store(store_class):
|
|
1386
|
+
"""Decorator to automatically register S3-compatible stores."""
|
|
1387
|
+
store_type = store_class.get_store_type()
|
|
1388
|
+
_S3_COMPATIBLE_STORES[store_type] = store_class
|
|
1389
|
+
return store_class
|
|
1390
|
+
|
|
1391
|
+
|
|
1392
|
+
@dataclass
|
|
1393
|
+
class S3CompatibleConfig:
|
|
1394
|
+
"""Configuration for S3-compatible storage providers."""
|
|
1395
|
+
# Provider identification
|
|
1396
|
+
store_type: str # Store type identifier (e.g., "S3", "R2", "MINIO")
|
|
1397
|
+
url_prefix: str # URL prefix (e.g., "s3://", "r2://", "minio://")
|
|
1398
|
+
|
|
1399
|
+
# Client creation
|
|
1400
|
+
client_factory: Callable[[Optional[str]], Any]
|
|
1401
|
+
resource_factory: Callable[[str], StorageHandle]
|
|
1402
|
+
split_path: Callable[[str], Tuple[str, str]]
|
|
1403
|
+
verify_bucket: Callable[[str], bool]
|
|
1404
|
+
|
|
1405
|
+
# CLI configuration
|
|
1406
|
+
aws_profile: Optional[str] = None
|
|
1407
|
+
get_endpoint_url: Optional[Callable[[], str]] = None
|
|
1408
|
+
credentials_file: Optional[str] = None
|
|
1409
|
+
extra_cli_args: Optional[List[str]] = None
|
|
1410
|
+
|
|
1411
|
+
# Provider-specific settings
|
|
1412
|
+
cloud_name: str = ''
|
|
1413
|
+
default_region: Optional[str] = None
|
|
1414
|
+
access_denied_message: str = 'Access Denied'
|
|
1415
|
+
|
|
1416
|
+
# Mounting
|
|
1417
|
+
mount_cmd_factory: Optional[Callable] = None
|
|
1418
|
+
mount_cached_cmd_factory: Optional[Callable] = None
|
|
1419
|
+
|
|
1420
|
+
def __post_init__(self):
|
|
1421
|
+
if self.extra_cli_args is None:
|
|
1422
|
+
self.extra_cli_args = []
|
|
1423
|
+
|
|
1424
|
+
|
|
1425
|
+
class S3CompatibleStore(AbstractStore):
|
|
1426
|
+
"""Base class for S3-compatible object storage providers.
|
|
1427
|
+
|
|
1428
|
+
This class provides a unified interface for all S3-compatible storage
|
|
1429
|
+
providers (AWS S3, Cloudflare R2, Nebius, MinIO, etc.) by leveraging
|
|
1430
|
+
a configuration-driven approach that eliminates code duplication.
|
|
1431
|
+
|
|
1432
|
+
## Adding a New S3-Compatible Store
|
|
1433
|
+
|
|
1434
|
+
To add a new S3-compatible storage provider (e.g., MinIO),
|
|
1435
|
+
follow these steps:
|
|
1436
|
+
|
|
1437
|
+
### 1. Add Store Type to Enum
|
|
1438
|
+
First, add your store type to the StoreType enum:
|
|
1439
|
+
```python
|
|
1440
|
+
class StoreType(enum.Enum):
|
|
1441
|
+
# ... existing entries ...
|
|
1442
|
+
MINIO = 'MINIO'
|
|
1443
|
+
```
|
|
1444
|
+
|
|
1445
|
+
### 2. Create Store Class
|
|
1446
|
+
Create a new store class that inherits from S3CompatibleStore:
|
|
1447
|
+
```python
|
|
1448
|
+
@register_s3_compatible_store
|
|
1449
|
+
class MinIOStore(S3CompatibleStore):
|
|
1450
|
+
'''MinIOStore for MinIO object storage.'''
|
|
1451
|
+
|
|
1452
|
+
@classmethod
|
|
1453
|
+
def get_config(cls) -> S3CompatibleConfig:
|
|
1454
|
+
'''Return the configuration for MinIO.'''
|
|
1455
|
+
return S3CompatibleConfig(
|
|
1456
|
+
store_type='MINIO',
|
|
1457
|
+
url_prefix='minio://',
|
|
1458
|
+
client_factory=lambda region:\
|
|
1459
|
+
data_utils.create_minio_client(region),
|
|
1460
|
+
resource_factory=lambda name:\
|
|
1461
|
+
minio.resource('s3').Bucket(name),
|
|
1462
|
+
split_path=data_utils.split_minio_path,
|
|
1463
|
+
aws_profile='minio',
|
|
1464
|
+
get_endpoint_url=lambda: minio.get_endpoint_url(),
|
|
1465
|
+
cloud_name='minio',
|
|
1466
|
+
default_region='us-east-1',
|
|
1467
|
+
mount_cmd_factory=mounting_utils.get_minio_mount_cmd,
|
|
1468
|
+
)
|
|
1469
|
+
```
|
|
1470
|
+
|
|
1471
|
+
### 3. Implement Required Utilities
|
|
1472
|
+
Create the necessary utility functions:
|
|
1473
|
+
|
|
1474
|
+
#### In `sky/data/data_utils.py`:
|
|
1475
|
+
```python
|
|
1476
|
+
def create_minio_client(region: Optional[str] = None):
|
|
1477
|
+
'''Create MinIO S3 client.'''
|
|
1478
|
+
return boto3.client('s3',
|
|
1479
|
+
endpoint_url=minio.get_endpoint_url(),
|
|
1480
|
+
aws_access_key_id=minio.get_access_key(),
|
|
1481
|
+
aws_secret_access_key=minio.get_secret_key(),
|
|
1482
|
+
region_name=region or 'us-east-1')
|
|
1483
|
+
|
|
1484
|
+
def split_minio_path(minio_path: str) -> Tuple[str, str]:
|
|
1485
|
+
'''Split minio://bucket/key into (bucket, key).'''
|
|
1486
|
+
path_parts = minio_path.replace('minio://', '').split('/', 1)
|
|
1487
|
+
bucket = path_parts[0]
|
|
1488
|
+
key = path_parts[1] if len(path_parts) > 1 else ''
|
|
1489
|
+
return bucket, key
|
|
1490
|
+
```
|
|
1491
|
+
|
|
1492
|
+
#### In `sky/utils/mounting_utils.py`:
|
|
1493
|
+
```python
|
|
1494
|
+
def get_minio_mount_cmd(profile: str, bucket_name: str, endpoint_url: str,
|
|
1495
|
+
mount_path: str,
|
|
1496
|
+
bucket_sub_path: Optional[str]) -> str:
|
|
1497
|
+
'''Generate MinIO mount command using s3fs.'''
|
|
1498
|
+
# Implementation similar to other S3-compatible mount commands
|
|
1499
|
+
pass
|
|
1500
|
+
```
|
|
1501
|
+
|
|
1502
|
+
### 4. Create Adapter Module (if needed)
|
|
1503
|
+
Create `sky/adaptors/minio.py` for MinIO-specific configuration:
|
|
1504
|
+
```python
|
|
1505
|
+
'''MinIO adapter for SkyPilot.'''
|
|
1506
|
+
|
|
1507
|
+
MINIO_PROFILE_NAME = 'minio'
|
|
1508
|
+
|
|
1509
|
+
def get_endpoint_url() -> str:
|
|
1510
|
+
'''Get MinIO endpoint URL from configuration.'''
|
|
1511
|
+
# Read from ~/.minio/config or environment variables
|
|
1512
|
+
pass
|
|
1513
|
+
|
|
1514
|
+
def resource(resource_name: str):
|
|
1515
|
+
'''Get MinIO resource.'''
|
|
1516
|
+
# Implementation for creating MinIO resources
|
|
1517
|
+
pass
|
|
1518
|
+
```
|
|
1519
|
+
|
|
1350
1520
|
"""
|
|
1351
1521
|
|
|
1352
|
-
_DEFAULT_REGION = 'us-east-1'
|
|
1353
1522
|
_ACCESS_DENIED_MESSAGE = 'Access Denied'
|
|
1354
|
-
_CUSTOM_ENDPOINT_REGIONS = [
|
|
1355
|
-
'ap-east-1', 'me-south-1', 'af-south-1', 'eu-south-1', 'eu-south-2',
|
|
1356
|
-
'ap-south-2', 'ap-southeast-3', 'ap-southeast-4', 'me-central-1',
|
|
1357
|
-
'il-central-1'
|
|
1358
|
-
]
|
|
1359
1523
|
|
|
1360
1524
|
def __init__(self,
|
|
1361
1525
|
name: str,
|
|
1362
1526
|
source: str,
|
|
1363
|
-
region: Optional[str] =
|
|
1527
|
+
region: Optional[str] = None,
|
|
1364
1528
|
is_sky_managed: Optional[bool] = None,
|
|
1365
1529
|
sync_on_reconstruction: bool = True,
|
|
1366
1530
|
_bucket_sub_path: Optional[str] = None):
|
|
1531
|
+
# Initialize configuration first to get defaults
|
|
1532
|
+
self.config = self.__class__.get_config()
|
|
1533
|
+
|
|
1534
|
+
# Use provider's default region if not specified
|
|
1535
|
+
if region is None:
|
|
1536
|
+
region = self.config.default_region
|
|
1537
|
+
|
|
1538
|
+
# Initialize S3CompatibleStore specific attributes
|
|
1367
1539
|
self.client: 'mypy_boto3_s3.Client'
|
|
1368
1540
|
self.bucket: 'StorageHandle'
|
|
1369
|
-
|
|
1370
|
-
#
|
|
1371
|
-
# We should eventually make all opt-in regions also work for S3 by
|
|
1372
|
-
# passing the right endpoint flags.
|
|
1373
|
-
if region in self._CUSTOM_ENDPOINT_REGIONS:
|
|
1374
|
-
logger.warning('AWS opt-in regions are not supported for S3. '
|
|
1375
|
-
f'Falling back to default region '
|
|
1376
|
-
f'{self._DEFAULT_REGION} for bucket {name!r}.')
|
|
1377
|
-
region = self._DEFAULT_REGION
|
|
1541
|
+
|
|
1542
|
+
# Call parent constructor
|
|
1378
1543
|
super().__init__(name, source, region, is_sky_managed,
|
|
1379
1544
|
sync_on_reconstruction, _bucket_sub_path)
|
|
1380
1545
|
|
|
1546
|
+
@classmethod
|
|
1547
|
+
@abstractmethod
|
|
1548
|
+
def get_config(cls) -> S3CompatibleConfig:
|
|
1549
|
+
"""Return the configuration for this S3-compatible provider."""
|
|
1550
|
+
pass
|
|
1551
|
+
|
|
1552
|
+
@classmethod
|
|
1553
|
+
def get_store_type(cls) -> str:
|
|
1554
|
+
"""Return the store type identifier from configuration."""
|
|
1555
|
+
return cls.get_config().store_type
|
|
1556
|
+
|
|
1557
|
+
@property
|
|
1558
|
+
def provider_prefixes(self) -> set:
|
|
1559
|
+
"""Dynamically get all provider prefixes from registered stores."""
|
|
1560
|
+
prefixes = set()
|
|
1561
|
+
|
|
1562
|
+
# Get prefixes from all registered S3-compatible stores
|
|
1563
|
+
for store_class in _S3_COMPATIBLE_STORES.values():
|
|
1564
|
+
config = store_class.get_config()
|
|
1565
|
+
prefixes.add(config.url_prefix)
|
|
1566
|
+
|
|
1567
|
+
# Add hardcoded prefixes for non-S3-compatible stores
|
|
1568
|
+
prefixes.update({
|
|
1569
|
+
'gs://', # GCS
|
|
1570
|
+
'https://', # Azure
|
|
1571
|
+
'cos://', # IBM COS
|
|
1572
|
+
'oci://', # OCI
|
|
1573
|
+
})
|
|
1574
|
+
|
|
1575
|
+
return prefixes
|
|
1576
|
+
|
|
1381
1577
|
def _validate(self):
|
|
1382
1578
|
if self.source is not None and isinstance(self.source, str):
|
|
1383
|
-
if self.source.startswith(
|
|
1384
|
-
|
|
1385
|
-
|
|
1386
|
-
'
|
|
1579
|
+
if self.source.startswith(self.config.url_prefix):
|
|
1580
|
+
bucket_name, _ = self.config.split_path(self.source)
|
|
1581
|
+
assert self.name == bucket_name, (
|
|
1582
|
+
f'{self.config.store_type} Bucket is specified as path, '
|
|
1583
|
+
f'the name should be the same as {self.config.store_type} '
|
|
1584
|
+
f'bucket.')
|
|
1585
|
+
# Only verify if this is NOT the same store type as the source
|
|
1586
|
+
if self.__class__.get_store_type() != self.config.store_type:
|
|
1587
|
+
assert self.config.verify_bucket(self.name), (
|
|
1588
|
+
f'Source specified as {self.source},'
|
|
1589
|
+
f'a {self.config.store_type} '
|
|
1590
|
+
f'bucket. {self.config.store_type} Bucket should exist.'
|
|
1591
|
+
)
|
|
1387
1592
|
elif self.source.startswith('gs://'):
|
|
1388
1593
|
assert self.name == data_utils.split_gcs_path(self.source)[0], (
|
|
1389
1594
|
'GCS Bucket is specified as path, the name should be '
|
|
1390
1595
|
'the same as GCS bucket.')
|
|
1391
|
-
|
|
1392
|
-
|
|
1393
|
-
|
|
1596
|
+
if not isinstance(self, GcsStore):
|
|
1597
|
+
assert data_utils.verify_gcs_bucket(self.name), (
|
|
1598
|
+
f'Source specified as {self.source}, a GCS bucket. ',
|
|
1599
|
+
'GCS Bucket should exist.')
|
|
1394
1600
|
elif data_utils.is_az_container_endpoint(self.source):
|
|
1395
1601
|
storage_account_name, container_name, _ = (
|
|
1396
1602
|
data_utils.split_az_path(self.source))
|
|
1397
1603
|
assert self.name == container_name, (
|
|
1398
1604
|
'Azure bucket is specified as path, the name should be '
|
|
1399
1605
|
'the same as Azure bucket.')
|
|
1400
|
-
|
|
1401
|
-
|
|
1402
|
-
|
|
1606
|
+
if not isinstance(self, AzureBlobStore):
|
|
1607
|
+
assert data_utils.verify_az_bucket(
|
|
1608
|
+
storage_account_name, self.name
|
|
1609
|
+
), (f'Source specified as {self.source}, an Azure bucket. '
|
|
1403
1610
|
'Azure bucket should exist.')
|
|
1404
|
-
elif self.source.startswith('r2://'):
|
|
1405
|
-
assert self.name == data_utils.split_r2_path(self.source)[0], (
|
|
1406
|
-
'R2 Bucket is specified as path, the name should be '
|
|
1407
|
-
'the same as R2 bucket.')
|
|
1408
|
-
assert data_utils.verify_r2_bucket(self.name), (
|
|
1409
|
-
f'Source specified as {self.source}, a R2 bucket. ',
|
|
1410
|
-
'R2 Bucket should exist.')
|
|
1411
|
-
elif self.source.startswith('nebius://'):
|
|
1412
|
-
assert self.name == data_utils.split_nebius_path(
|
|
1413
|
-
self.source)[0], (
|
|
1414
|
-
'Nebius Object Storage is specified as path, the name '
|
|
1415
|
-
'should be the same as Nebius Object Storage bucket.')
|
|
1416
|
-
assert data_utils.verify_nebius_bucket(self.name), (
|
|
1417
|
-
f'Source specified as {self.source}, a Nebius Object '
|
|
1418
|
-
f'Storage bucket. Nebius Object Storage Bucket should'
|
|
1419
|
-
f' exist.')
|
|
1420
1611
|
elif self.source.startswith('cos://'):
|
|
1421
1612
|
assert self.name == data_utils.split_cos_path(self.source)[0], (
|
|
1422
1613
|
'COS Bucket is specified as path, the name should be '
|
|
1423
1614
|
'the same as COS bucket.')
|
|
1424
|
-
|
|
1425
|
-
|
|
1426
|
-
|
|
1615
|
+
if not isinstance(self, IBMCosStore):
|
|
1616
|
+
assert data_utils.verify_ibm_cos_bucket(self.name), (
|
|
1617
|
+
f'Source specified as {self.source}, a COS bucket. ',
|
|
1618
|
+
'COS Bucket should exist.')
|
|
1427
1619
|
elif self.source.startswith('oci://'):
|
|
1428
1620
|
raise NotImplementedError(
|
|
1429
|
-
'Moving data from OCI to
|
|
1621
|
+
f'Moving data from OCI to {self.source} is ',
|
|
1622
|
+
'currently not supported.')
|
|
1623
|
+
|
|
1430
1624
|
# Validate name
|
|
1431
1625
|
self.name = self.validate_name(self.name)
|
|
1432
1626
|
|
|
1433
1627
|
# Check if the storage is enabled
|
|
1434
|
-
if not _is_storage_cloud_enabled(
|
|
1628
|
+
if not _is_storage_cloud_enabled(self.config.cloud_name):
|
|
1435
1629
|
with ux_utils.print_exception_no_traceback():
|
|
1436
1630
|
raise exceptions.ResourcesUnavailableError(
|
|
1437
|
-
'Storage
|
|
1438
|
-
'
|
|
1439
|
-
'
|
|
1440
|
-
'
|
|
1441
|
-
)
|
|
1631
|
+
f'Storage "store: {self.config.store_type.lower()}" '
|
|
1632
|
+
f'specified, but '
|
|
1633
|
+
f'{self.config.cloud_name} access is disabled. '
|
|
1634
|
+
'To fix, enable '
|
|
1635
|
+
f'{self.config.cloud_name} by running `sky check`.')
|
|
1442
1636
|
|
|
1443
1637
|
@classmethod
|
|
1444
1638
|
def validate_name(cls, name: str) -> str:
|
|
@@ -1510,7 +1704,7 @@ class S3Store(AbstractStore):
|
|
|
1510
1704
|
StorageBucketGetError: If fetching existing bucket fails
|
|
1511
1705
|
StorageInitError: If general initialization fails.
|
|
1512
1706
|
"""
|
|
1513
|
-
self.client =
|
|
1707
|
+
self.client = self.config.client_factory(self.region)
|
|
1514
1708
|
self.bucket, is_new_bucket = self._get_bucket()
|
|
1515
1709
|
if self.is_sky_managed is None:
|
|
1516
1710
|
# If is_sky_managed is not specified, then this is a new storage
|
|
@@ -1532,16 +1726,10 @@ class S3Store(AbstractStore):
|
|
|
1532
1726
|
if isinstance(self.source, list):
|
|
1533
1727
|
self.batch_aws_rsync(self.source, create_dirs=True)
|
|
1534
1728
|
elif self.source is not None:
|
|
1535
|
-
if self.
|
|
1536
|
-
pass
|
|
1537
|
-
elif self.
|
|
1538
|
-
self.
|
|
1539
|
-
elif self.source.startswith('r2://'):
|
|
1540
|
-
self._transfer_to_s3()
|
|
1541
|
-
elif self.source.startswith('oci://'):
|
|
1542
|
-
self._transfer_to_s3()
|
|
1543
|
-
elif self.source.startswith('nebius://'):
|
|
1544
|
-
self._transfer_to_s3()
|
|
1729
|
+
if self._is_same_provider_source():
|
|
1730
|
+
pass # No transfer needed
|
|
1731
|
+
elif self._needs_cross_provider_transfer():
|
|
1732
|
+
self._transfer_from_other_provider()
|
|
1545
1733
|
else:
|
|
1546
1734
|
self.batch_aws_rsync([self.source])
|
|
1547
1735
|
except exceptions.StorageUploadError:
|
|
@@ -1550,57 +1738,94 @@ class S3Store(AbstractStore):
|
|
|
1550
1738
|
raise exceptions.StorageUploadError(
|
|
1551
1739
|
f'Upload failed for store {self.name}') from e
|
|
1552
1740
|
|
|
1741
|
+
def _is_same_provider_source(self) -> bool:
|
|
1742
|
+
"""Check if source is from the same provider."""
|
|
1743
|
+
return isinstance(self.source, str) and self.source.startswith(
|
|
1744
|
+
self.config.url_prefix)
|
|
1745
|
+
|
|
1746
|
+
def _needs_cross_provider_transfer(self) -> bool:
|
|
1747
|
+
"""Check if source needs cross-provider transfer."""
|
|
1748
|
+
if not isinstance(self.source, str):
|
|
1749
|
+
return False
|
|
1750
|
+
return any(
|
|
1751
|
+
self.source.startswith(prefix) for prefix in self.provider_prefixes)
|
|
1752
|
+
|
|
1753
|
+
def _detect_source_type(self) -> str:
|
|
1754
|
+
"""Detect the source provider type from URL."""
|
|
1755
|
+
if not isinstance(self.source, str):
|
|
1756
|
+
return 'unknown'
|
|
1757
|
+
|
|
1758
|
+
for provider in self.provider_prefixes:
|
|
1759
|
+
if self.source.startswith(provider):
|
|
1760
|
+
return provider[:-len('://')]
|
|
1761
|
+
return ''
|
|
1762
|
+
|
|
1763
|
+
def _transfer_from_other_provider(self):
|
|
1764
|
+
"""Transfer data from another cloud to this S3-compatible store."""
|
|
1765
|
+
source_type = self._detect_source_type()
|
|
1766
|
+
target_type = self.config.store_type.lower()
|
|
1767
|
+
|
|
1768
|
+
if hasattr(data_transfer, f'{source_type}_to_{target_type}'):
|
|
1769
|
+
transfer_func = getattr(data_transfer,
|
|
1770
|
+
f'{source_type}_to_{target_type}')
|
|
1771
|
+
transfer_func(self.name, self.name)
|
|
1772
|
+
else:
|
|
1773
|
+
with ux_utils.print_exception_no_traceback():
|
|
1774
|
+
raise NotImplementedError(
|
|
1775
|
+
f'Transfer from {source_type} to {target_type} '
|
|
1776
|
+
'is not yet supported.')
|
|
1777
|
+
|
|
1553
1778
|
def delete(self) -> None:
|
|
1779
|
+
"""Delete the bucket or sub-path."""
|
|
1554
1780
|
if self._bucket_sub_path is not None and not self.is_sky_managed:
|
|
1555
1781
|
return self._delete_sub_path()
|
|
1556
1782
|
|
|
1557
|
-
deleted_by_skypilot = self.
|
|
1783
|
+
deleted_by_skypilot = self._delete_bucket(self.name)
|
|
1784
|
+
provider = self.config.store_type
|
|
1558
1785
|
if deleted_by_skypilot:
|
|
1559
|
-
msg_str = f'Deleted
|
|
1786
|
+
msg_str = f'Deleted {provider} bucket {self.name}.'
|
|
1560
1787
|
else:
|
|
1561
|
-
msg_str = f'
|
|
1788
|
+
msg_str = f'{provider} bucket {self.name} may have been deleted ' \
|
|
1562
1789
|
f'externally. Removing from local state.'
|
|
1563
|
-
logger.info(f'{colorama.Fore.GREEN}{msg_str}'
|
|
1564
|
-
f'{colorama.Style.RESET_ALL}')
|
|
1565
|
-
|
|
1566
|
-
def _delete_sub_path(self) -> None:
|
|
1567
|
-
assert self._bucket_sub_path is not None, 'bucket_sub_path is not set'
|
|
1568
|
-
deleted_by_skypilot = self._delete_s3_bucket_sub_path(
|
|
1569
|
-
self.name, self._bucket_sub_path)
|
|
1570
|
-
if deleted_by_skypilot:
|
|
1571
|
-
msg_str = f'Removed objects from S3 bucket ' \
|
|
1572
|
-
f'{self.name}/{self._bucket_sub_path}.'
|
|
1573
|
-
else:
|
|
1574
|
-
msg_str = f'Failed to remove objects from S3 bucket ' \
|
|
1575
|
-
f'{self.name}/{self._bucket_sub_path}.'
|
|
1576
|
-
logger.info(f'{colorama.Fore.GREEN}{msg_str}'
|
|
1577
|
-
f'{colorama.Style.RESET_ALL}')
|
|
1790
|
+
logger.info(f'{colorama.Fore.GREEN}{msg_str}{colorama.Style.RESET_ALL}')
|
|
1578
1791
|
|
|
1579
1792
|
def get_handle(self) -> StorageHandle:
|
|
1580
|
-
|
|
1793
|
+
"""Get storage handle using provider's resource factory."""
|
|
1794
|
+
return self.config.resource_factory(self.name)
|
|
1581
1795
|
|
|
1582
|
-
def
|
|
1583
|
-
|
|
1584
|
-
|
|
1585
|
-
|
|
1796
|
+
def _download_file(self, remote_path: str, local_path: str) -> None:
|
|
1797
|
+
"""Download file using S3 API."""
|
|
1798
|
+
self.bucket.download_file(remote_path, local_path)
|
|
1799
|
+
|
|
1800
|
+
def mount_command(self, mount_path: str) -> str:
|
|
1801
|
+
"""Get mount command using provider's mount factory."""
|
|
1802
|
+
if self.config.mount_cmd_factory is None:
|
|
1803
|
+
raise exceptions.NotSupportedError(
|
|
1804
|
+
f'Mounting not supported for {self.config.store_type}')
|
|
1586
1805
|
|
|
1587
|
-
|
|
1588
|
-
|
|
1589
|
-
|
|
1806
|
+
install_cmd = mounting_utils.get_s3_mount_install_cmd()
|
|
1807
|
+
mount_cmd = self.config.mount_cmd_factory(self.bucket.name, mount_path,
|
|
1808
|
+
self._bucket_sub_path)
|
|
1809
|
+
return mounting_utils.get_mounting_command(mount_path, install_cmd,
|
|
1810
|
+
mount_cmd)
|
|
1590
1811
|
|
|
1591
|
-
|
|
1592
|
-
|
|
1812
|
+
def mount_cached_command(self, mount_path: str) -> str:
|
|
1813
|
+
"""Get cached mount command. Can be overridden by subclasses."""
|
|
1814
|
+
if self.config.mount_cached_cmd_factory is None:
|
|
1815
|
+
raise exceptions.NotSupportedError(
|
|
1816
|
+
f'Cached mounting not supported for {self.config.store_type}')
|
|
1593
1817
|
|
|
1594
|
-
|
|
1595
|
-
|
|
1596
|
-
|
|
1597
|
-
|
|
1598
|
-
|
|
1599
|
-
|
|
1600
|
-
|
|
1601
|
-
|
|
1602
|
-
|
|
1603
|
-
|
|
1818
|
+
install_cmd = mounting_utils.get_rclone_install_cmd()
|
|
1819
|
+
mount_cmd = self.config.mount_cached_cmd_factory(
|
|
1820
|
+
self.bucket.name, mount_path, self._bucket_sub_path)
|
|
1821
|
+
return mounting_utils.get_mounting_command(mount_path, install_cmd,
|
|
1822
|
+
mount_cmd)
|
|
1823
|
+
|
|
1824
|
+
def batch_aws_rsync(self,
|
|
1825
|
+
source_path_list: List[Path],
|
|
1826
|
+
create_dirs: bool = False) -> None:
|
|
1827
|
+
"""Generic S3-compatible rsync using AWS CLI."""
|
|
1828
|
+
sub_path = f'/{self._bucket_sub_path}' if self._bucket_sub_path else ''
|
|
1604
1829
|
|
|
1605
1830
|
def get_file_sync_command(base_dir_path, file_names):
|
|
1606
1831
|
includes = ' '.join([
|
|
@@ -1608,10 +1833,28 @@ class S3Store(AbstractStore):
|
|
|
1608
1833
|
for file_name in file_names
|
|
1609
1834
|
])
|
|
1610
1835
|
base_dir_path = shlex.quote(base_dir_path)
|
|
1611
|
-
|
|
1612
|
-
|
|
1613
|
-
|
|
1614
|
-
|
|
1836
|
+
|
|
1837
|
+
# Build AWS CLI command with provider-specific configuration
|
|
1838
|
+
cmd_parts = ['aws s3 sync --no-follow-symlinks --exclude="*"']
|
|
1839
|
+
cmd_parts.append(f'{includes} {base_dir_path}')
|
|
1840
|
+
cmd_parts.append(f's3://{self.name}{sub_path}')
|
|
1841
|
+
|
|
1842
|
+
# Add provider-specific arguments
|
|
1843
|
+
if self.config.get_endpoint_url:
|
|
1844
|
+
cmd_parts.append(
|
|
1845
|
+
f'--endpoint-url {self.config.get_endpoint_url()}')
|
|
1846
|
+
if self.config.aws_profile:
|
|
1847
|
+
cmd_parts.append(f'--profile={self.config.aws_profile}')
|
|
1848
|
+
if self.config.extra_cli_args:
|
|
1849
|
+
cmd_parts.extend(self.config.extra_cli_args)
|
|
1850
|
+
|
|
1851
|
+
# Handle credentials file via environment
|
|
1852
|
+
cmd = ' '.join(cmd_parts)
|
|
1853
|
+
if self.config.credentials_file:
|
|
1854
|
+
cmd = 'AWS_SHARED_CREDENTIALS_FILE=' + \
|
|
1855
|
+
f'{self.config.credentials_file} {cmd}'
|
|
1856
|
+
|
|
1857
|
+
return cmd
|
|
1615
1858
|
|
|
1616
1859
|
def get_dir_sync_command(src_dir_path, dest_dir_name):
|
|
1617
1860
|
# we exclude .git directory from the sync
|
|
@@ -1619,11 +1862,11 @@ class S3Store(AbstractStore):
|
|
|
1619
1862
|
excluded_list.append('.git/*')
|
|
1620
1863
|
|
|
1621
1864
|
# Process exclusion patterns to make them work correctly with aws
|
|
1622
|
-
# s3 sync
|
|
1865
|
+
# s3 sync - this logic is from S3Store2 to ensure compatibility
|
|
1623
1866
|
processed_excludes = []
|
|
1624
1867
|
for excluded_path in excluded_list:
|
|
1625
1868
|
# Check if the path is a directory exclusion pattern
|
|
1626
|
-
# For AWS S3 sync, directory patterns need to end with "
|
|
1869
|
+
# For AWS S3 sync, directory patterns need to end with "/*" to
|
|
1627
1870
|
# exclude all contents
|
|
1628
1871
|
if (excluded_path.endswith('/') or os.path.isdir(
|
|
1629
1872
|
os.path.join(src_dir_path, excluded_path.rstrip('/')))):
|
|
@@ -1638,10 +1881,25 @@ class S3Store(AbstractStore):
|
|
|
1638
1881
|
for file_name in processed_excludes
|
|
1639
1882
|
])
|
|
1640
1883
|
src_dir_path = shlex.quote(src_dir_path)
|
|
1641
|
-
|
|
1642
|
-
|
|
1643
|
-
|
|
1644
|
-
|
|
1884
|
+
|
|
1885
|
+
cmd_parts = ['aws s3 sync --no-follow-symlinks']
|
|
1886
|
+
cmd_parts.append(f'{excludes} {src_dir_path}')
|
|
1887
|
+
cmd_parts.append(f's3://{self.name}{sub_path}/{dest_dir_name}')
|
|
1888
|
+
|
|
1889
|
+
if self.config.get_endpoint_url:
|
|
1890
|
+
cmd_parts.append(
|
|
1891
|
+
f'--endpoint-url {self.config.get_endpoint_url()}')
|
|
1892
|
+
if self.config.aws_profile:
|
|
1893
|
+
cmd_parts.append(f'--profile={self.config.aws_profile}')
|
|
1894
|
+
if self.config.extra_cli_args:
|
|
1895
|
+
cmd_parts.extend(self.config.extra_cli_args)
|
|
1896
|
+
|
|
1897
|
+
cmd = ' '.join(cmd_parts)
|
|
1898
|
+
if self.config.credentials_file:
|
|
1899
|
+
cmd = 'AWS_SHARED_CREDENTIALS_FILE=' + \
|
|
1900
|
+
f'{self.config.credentials_file} {cmd}'
|
|
1901
|
+
|
|
1902
|
+
return cmd
|
|
1645
1903
|
|
|
1646
1904
|
# Generate message for upload
|
|
1647
1905
|
if len(source_path_list) > 1:
|
|
@@ -1649,9 +1907,12 @@ class S3Store(AbstractStore):
|
|
|
1649
1907
|
else:
|
|
1650
1908
|
source_message = source_path_list[0]
|
|
1651
1909
|
|
|
1910
|
+
provider_prefix = self.config.url_prefix
|
|
1652
1911
|
log_path = sky_logging.generate_tmp_logging_file_path(
|
|
1653
1912
|
_STORAGE_LOG_FILE_NAME)
|
|
1654
|
-
sync_path = f'{source_message} ->
|
|
1913
|
+
sync_path = (f'{source_message} -> '
|
|
1914
|
+
f'{provider_prefix}{self.name}{sub_path}/')
|
|
1915
|
+
|
|
1655
1916
|
with rich_utils.safe_status(
|
|
1656
1917
|
ux_utils.spinner_message(f'Syncing {sync_path}',
|
|
1657
1918
|
log_path=log_path)):
|
|
@@ -1661,151 +1922,78 @@ class S3Store(AbstractStore):
|
|
|
1661
1922
|
get_dir_sync_command,
|
|
1662
1923
|
log_path,
|
|
1663
1924
|
self.name,
|
|
1664
|
-
self.
|
|
1925
|
+
self.config.access_denied_message,
|
|
1665
1926
|
create_dirs=create_dirs,
|
|
1666
1927
|
max_concurrent_uploads=_MAX_CONCURRENT_UPLOADS)
|
|
1928
|
+
|
|
1667
1929
|
logger.info(
|
|
1668
1930
|
ux_utils.finishing_message(f'Storage synced: {sync_path}',
|
|
1669
1931
|
log_path))
|
|
1670
1932
|
|
|
1671
|
-
def _transfer_to_s3(self) -> None:
|
|
1672
|
-
assert isinstance(self.source, str), self.source
|
|
1673
|
-
if self.source.startswith('gs://'):
|
|
1674
|
-
data_transfer.gcs_to_s3(self.name, self.name)
|
|
1675
|
-
elif self.source.startswith('r2://'):
|
|
1676
|
-
data_transfer.r2_to_s3(self.name, self.name)
|
|
1677
|
-
|
|
1678
1933
|
def _get_bucket(self) -> Tuple[StorageHandle, bool]:
|
|
1679
|
-
"""
|
|
1680
|
-
|
|
1681
|
-
If the bucket exists, this method will return the bucket.
|
|
1682
|
-
If the bucket does not exist, there are three cases:
|
|
1683
|
-
1) Raise an error if the bucket source starts with s3://
|
|
1684
|
-
2) Return None if bucket has been externally deleted and
|
|
1685
|
-
sync_on_reconstruction is False
|
|
1686
|
-
3) Create and return a new bucket otherwise
|
|
1687
|
-
|
|
1688
|
-
Raises:
|
|
1689
|
-
StorageSpecError: If externally created bucket is attempted to be
|
|
1690
|
-
mounted without specifying storage source.
|
|
1691
|
-
StorageBucketCreateError: If creating the bucket fails
|
|
1692
|
-
StorageBucketGetError: If fetching a bucket fails
|
|
1693
|
-
StorageExternalDeletionError: If externally deleted storage is
|
|
1694
|
-
attempted to be fetched while reconstructing the storage for
|
|
1695
|
-
'sky storage delete' or 'sky start'
|
|
1696
|
-
"""
|
|
1697
|
-
s3 = aws.resource('s3')
|
|
1698
|
-
bucket = s3.Bucket(self.name)
|
|
1934
|
+
"""Get or create bucket using S3 API."""
|
|
1935
|
+
bucket = self.config.resource_factory(self.name)
|
|
1699
1936
|
|
|
1700
1937
|
try:
|
|
1701
1938
|
# Try Public bucket case.
|
|
1702
|
-
# This line does not error out if the bucket is an external public
|
|
1703
|
-
# bucket or if it is a user's bucket that is publicly
|
|
1704
|
-
# accessible.
|
|
1705
1939
|
self.client.head_bucket(Bucket=self.name)
|
|
1706
1940
|
self._validate_existing_bucket()
|
|
1707
1941
|
return bucket, False
|
|
1708
1942
|
except aws.botocore_exceptions().ClientError as e:
|
|
1709
1943
|
error_code = e.response['Error']['Code']
|
|
1710
|
-
# AccessDenied error for buckets that are private and not owned by
|
|
1711
|
-
# user.
|
|
1712
1944
|
if error_code == '403':
|
|
1713
|
-
command = f'aws s3 ls {self.name}'
|
|
1945
|
+
command = f'aws s3 ls s3://{self.name}'
|
|
1946
|
+
if self.config.aws_profile:
|
|
1947
|
+
command += f' --profile={self.config.aws_profile}'
|
|
1948
|
+
if self.config.get_endpoint_url:
|
|
1949
|
+
command += f' --endpoint-url '\
|
|
1950
|
+
f'{self.config.get_endpoint_url()}'
|
|
1951
|
+
if self.config.credentials_file:
|
|
1952
|
+
command = (f'AWS_SHARED_CREDENTIALS_FILE='
|
|
1953
|
+
f'{self.config.credentials_file} {command}')
|
|
1714
1954
|
with ux_utils.print_exception_no_traceback():
|
|
1715
1955
|
raise exceptions.StorageBucketGetError(
|
|
1716
1956
|
_BUCKET_FAIL_TO_CONNECT_MESSAGE.format(name=self.name) +
|
|
1717
1957
|
f' To debug, consider running `{command}`.') from e
|
|
1718
1958
|
|
|
1719
|
-
if isinstance(self.source, str) and self.source.startswith(
|
|
1959
|
+
if isinstance(self.source, str) and self.source.startswith(
|
|
1960
|
+
self.config.url_prefix):
|
|
1720
1961
|
with ux_utils.print_exception_no_traceback():
|
|
1721
1962
|
raise exceptions.StorageBucketGetError(
|
|
1722
1963
|
'Attempted to use a non-existent bucket as a source: '
|
|
1723
|
-
f'{self.source}.
|
|
1724
|
-
f'{self.source}` to debug.')
|
|
1964
|
+
f'{self.source}.')
|
|
1725
1965
|
|
|
1726
|
-
# If bucket cannot be found
|
|
1727
|
-
# the bucket is to be created by Sky. However, creation is skipped if
|
|
1728
|
-
# Store object is being reconstructed for deletion or re-mount with
|
|
1729
|
-
# sky start, and error is raised instead.
|
|
1966
|
+
# If bucket cannot be found, create it if needed
|
|
1730
1967
|
if self.sync_on_reconstruction:
|
|
1731
|
-
bucket = self.
|
|
1968
|
+
bucket = self._create_bucket(self.name)
|
|
1732
1969
|
return bucket, True
|
|
1733
1970
|
else:
|
|
1734
|
-
# Raised when Storage object is reconstructed for sky storage
|
|
1735
|
-
# delete or to re-mount Storages with sky start but the storage
|
|
1736
|
-
# is already removed externally.
|
|
1737
1971
|
raise exceptions.StorageExternalDeletionError(
|
|
1738
1972
|
'Attempted to fetch a non-existent bucket: '
|
|
1739
1973
|
f'{self.name}')
|
|
1740
1974
|
|
|
1741
|
-
def
|
|
1742
|
-
"""
|
|
1743
|
-
using the boto3 API
|
|
1744
|
-
|
|
1745
|
-
Args:
|
|
1746
|
-
remote_path: str; Remote path on S3 bucket
|
|
1747
|
-
local_path: str; Local path on user's device
|
|
1748
|
-
"""
|
|
1749
|
-
self.bucket.download_file(remote_path, local_path)
|
|
1750
|
-
|
|
1751
|
-
def mount_command(self, mount_path: str) -> str:
|
|
1752
|
-
"""Returns the command to mount the bucket to the mount_path.
|
|
1753
|
-
|
|
1754
|
-
Uses goofys to mount the bucket.
|
|
1755
|
-
|
|
1756
|
-
Args:
|
|
1757
|
-
mount_path: str; Path to mount the bucket to.
|
|
1758
|
-
"""
|
|
1759
|
-
install_cmd = mounting_utils.get_s3_mount_install_cmd()
|
|
1760
|
-
mount_cmd = mounting_utils.get_s3_mount_cmd(self.bucket.name,
|
|
1761
|
-
mount_path,
|
|
1762
|
-
self._bucket_sub_path)
|
|
1763
|
-
return mounting_utils.get_mounting_command(mount_path, install_cmd,
|
|
1764
|
-
mount_cmd)
|
|
1765
|
-
|
|
1766
|
-
def mount_cached_command(self, mount_path: str) -> str:
|
|
1767
|
-
install_cmd = mounting_utils.get_rclone_install_cmd()
|
|
1768
|
-
rclone_profile_name = (
|
|
1769
|
-
data_utils.Rclone.RcloneStores.S3.get_profile_name(self.name))
|
|
1770
|
-
rclone_config = data_utils.Rclone.RcloneStores.S3.get_config(
|
|
1771
|
-
rclone_profile_name=rclone_profile_name)
|
|
1772
|
-
mount_cached_cmd = mounting_utils.get_mount_cached_cmd(
|
|
1773
|
-
rclone_config, rclone_profile_name, self.bucket.name, mount_path)
|
|
1774
|
-
return mounting_utils.get_mounting_command(mount_path, install_cmd,
|
|
1775
|
-
mount_cached_cmd)
|
|
1776
|
-
|
|
1777
|
-
def _create_s3_bucket(self,
|
|
1778
|
-
bucket_name: str,
|
|
1779
|
-
region=_DEFAULT_REGION) -> StorageHandle:
|
|
1780
|
-
"""Creates S3 bucket with specific name in specific region
|
|
1781
|
-
|
|
1782
|
-
Args:
|
|
1783
|
-
bucket_name: str; Name of bucket
|
|
1784
|
-
region: str; Region name, e.g. us-west-1, us-east-2
|
|
1785
|
-
Raises:
|
|
1786
|
-
StorageBucketCreateError: If bucket creation fails.
|
|
1787
|
-
"""
|
|
1788
|
-
s3_client = self.client
|
|
1975
|
+
def _create_bucket(self, bucket_name: str) -> StorageHandle:
|
|
1976
|
+
"""Create bucket using S3 API."""
|
|
1789
1977
|
try:
|
|
1790
1978
|
create_bucket_config: Dict[str, Any] = {'Bucket': bucket_name}
|
|
1791
|
-
|
|
1792
|
-
# the LocationConstraint must not be specified.
|
|
1793
|
-
# Reference: https://stackoverflow.com/a/51912090
|
|
1794
|
-
if region is not None and region != 'us-east-1':
|
|
1979
|
+
if self.region is not None and self.region != 'us-east-1':
|
|
1795
1980
|
create_bucket_config['CreateBucketConfiguration'] = {
|
|
1796
|
-
'LocationConstraint': region
|
|
1981
|
+
'LocationConstraint': self.region
|
|
1797
1982
|
}
|
|
1798
|
-
|
|
1983
|
+
self.client.create_bucket(**create_bucket_config)
|
|
1799
1984
|
logger.info(
|
|
1800
1985
|
f' {colorama.Style.DIM}Created S3 bucket {bucket_name!r} in '
|
|
1801
|
-
f'{region or "us-east-1"}{colorama.Style.RESET_ALL}')
|
|
1986
|
+
f'{self.region or "us-east-1"}{colorama.Style.RESET_ALL}')
|
|
1802
1987
|
|
|
1803
1988
|
# Add AWS tags configured in config.yaml to the bucket.
|
|
1804
1989
|
# This is useful for cost tracking and external cleanup.
|
|
1805
1990
|
bucket_tags = skypilot_config.get_effective_region_config(
|
|
1806
|
-
cloud=
|
|
1991
|
+
cloud=self.config.cloud_name,
|
|
1992
|
+
region=None,
|
|
1993
|
+
keys=('labels',),
|
|
1994
|
+
default_value={})
|
|
1807
1995
|
if bucket_tags:
|
|
1808
|
-
|
|
1996
|
+
self.client.put_bucket_tagging(
|
|
1809
1997
|
Bucket=bucket_name,
|
|
1810
1998
|
Tagging={
|
|
1811
1999
|
'TagSet': [{
|
|
@@ -1813,17 +2001,38 @@ class S3Store(AbstractStore):
|
|
|
1813
2001
|
'Value': v
|
|
1814
2002
|
} for k, v in bucket_tags.items()]
|
|
1815
2003
|
})
|
|
1816
|
-
|
|
1817
2004
|
except aws.botocore_exceptions().ClientError as e:
|
|
1818
2005
|
with ux_utils.print_exception_no_traceback():
|
|
1819
2006
|
raise exceptions.StorageBucketCreateError(
|
|
1820
2007
|
f'Attempted to create a bucket {self.name} but failed.'
|
|
1821
2008
|
) from e
|
|
1822
|
-
return
|
|
2009
|
+
return self.config.resource_factory(bucket_name)
|
|
2010
|
+
|
|
2011
|
+
def _delete_bucket(self, bucket_name: str) -> bool:
|
|
2012
|
+
"""Delete bucket using AWS CLI."""
|
|
2013
|
+
cmd_parts = [f'aws s3 rb s3://{bucket_name} --force']
|
|
2014
|
+
|
|
2015
|
+
if self.config.aws_profile:
|
|
2016
|
+
cmd_parts.append(f'--profile={self.config.aws_profile}')
|
|
2017
|
+
if self.config.get_endpoint_url:
|
|
2018
|
+
cmd_parts.append(f'--endpoint-url {self.config.get_endpoint_url()}')
|
|
2019
|
+
|
|
2020
|
+
remove_command = ' '.join(cmd_parts)
|
|
2021
|
+
|
|
2022
|
+
if self.config.credentials_file:
|
|
2023
|
+
remove_command = (f'AWS_SHARED_CREDENTIALS_FILE='
|
|
2024
|
+
f'{self.config.credentials_file} '
|
|
2025
|
+
f'{remove_command}')
|
|
1823
2026
|
|
|
1824
|
-
|
|
1825
|
-
|
|
1826
|
-
|
|
2027
|
+
return self._execute_remove_command(
|
|
2028
|
+
remove_command, bucket_name,
|
|
2029
|
+
f'Deleting {self.config.store_type} bucket {bucket_name}',
|
|
2030
|
+
(f'Failed to delete {self.config.store_type} bucket '
|
|
2031
|
+
f'{bucket_name}.'))
|
|
2032
|
+
|
|
2033
|
+
def _execute_remove_command(self, command: str, bucket_name: str,
|
|
2034
|
+
hint_operating: str, hint_failed: str) -> bool:
|
|
2035
|
+
"""Execute bucket removal command."""
|
|
1827
2036
|
try:
|
|
1828
2037
|
with rich_utils.safe_status(
|
|
1829
2038
|
ux_utils.spinner_message(hint_operating)):
|
|
@@ -1842,47 +2051,42 @@ class S3Store(AbstractStore):
|
|
|
1842
2051
|
f'Detailed error: {e.output}')
|
|
1843
2052
|
return True
|
|
1844
2053
|
|
|
1845
|
-
def
|
|
1846
|
-
"""
|
|
2054
|
+
def _delete_sub_path(self) -> None:
|
|
2055
|
+
"""Remove objects from the sub path in the bucket."""
|
|
2056
|
+
assert self._bucket_sub_path is not None, 'bucket_sub_path is not set'
|
|
2057
|
+
deleted_by_skypilot = self._delete_bucket_sub_path(
|
|
2058
|
+
self.name, self._bucket_sub_path)
|
|
2059
|
+
provider = self.config.store_type
|
|
2060
|
+
if deleted_by_skypilot:
|
|
2061
|
+
msg_str = (f'Removed objects from {provider} bucket '
|
|
2062
|
+
f'{self.name}/{self._bucket_sub_path}.')
|
|
2063
|
+
else:
|
|
2064
|
+
msg_str = (f'Failed to remove objects from {provider} bucket '
|
|
2065
|
+
f'{self.name}/{self._bucket_sub_path}.')
|
|
2066
|
+
logger.info(f'{colorama.Fore.GREEN}{msg_str}{colorama.Style.RESET_ALL}')
|
|
1847
2067
|
|
|
1848
|
-
|
|
1849
|
-
|
|
2068
|
+
def _delete_bucket_sub_path(self, bucket_name: str, sub_path: str) -> bool:
|
|
2069
|
+
"""Delete objects in the sub path from the bucket."""
|
|
2070
|
+
cmd_parts = [f'aws s3 rm s3://{bucket_name}/{sub_path}/ --recursive']
|
|
1850
2071
|
|
|
1851
|
-
|
|
1852
|
-
|
|
2072
|
+
if self.config.aws_profile:
|
|
2073
|
+
cmd_parts.append(f'--profile={self.config.aws_profile}')
|
|
2074
|
+
if self.config.get_endpoint_url:
|
|
2075
|
+
cmd_parts.append(f'--endpoint-url {self.config.get_endpoint_url()}')
|
|
1853
2076
|
|
|
1854
|
-
|
|
1855
|
-
StorageBucketDeleteError: If deleting the bucket fails.
|
|
1856
|
-
"""
|
|
1857
|
-
# Deleting objects is very slow programatically
|
|
1858
|
-
# (i.e. bucket.objects.all().delete() is slow).
|
|
1859
|
-
# In addition, standard delete operations (i.e. via `aws s3 rm`)
|
|
1860
|
-
# are slow, since AWS puts deletion markers.
|
|
1861
|
-
# https://stackoverflow.com/questions/49239351/why-is-it-so-much-slower-to-delete-objects-in-aws-s3-than-it-is-to-create-them
|
|
1862
|
-
# The fastest way to delete is to run `aws s3 rb --force`,
|
|
1863
|
-
# which removes the bucket by force.
|
|
1864
|
-
remove_command = f'aws s3 rb s3://{bucket_name} --force'
|
|
1865
|
-
success = self._execute_s3_remove_command(
|
|
1866
|
-
remove_command, bucket_name,
|
|
1867
|
-
f'Deleting S3 bucket [green]{bucket_name}[/]',
|
|
1868
|
-
f'Failed to delete S3 bucket {bucket_name}.')
|
|
1869
|
-
if not success:
|
|
1870
|
-
return False
|
|
2077
|
+
remove_command = ' '.join(cmd_parts)
|
|
1871
2078
|
|
|
1872
|
-
|
|
1873
|
-
|
|
1874
|
-
|
|
1875
|
-
|
|
2079
|
+
if self.config.credentials_file:
|
|
2080
|
+
remove_command = (f'AWS_SHARED_CREDENTIALS_FILE='
|
|
2081
|
+
f'{self.config.credentials_file} '
|
|
2082
|
+
f'{remove_command}')
|
|
1876
2083
|
|
|
1877
|
-
|
|
1878
|
-
|
|
1879
|
-
|
|
1880
|
-
|
|
1881
|
-
|
|
1882
|
-
|
|
1883
|
-
f'[green]{bucket_name}/{sub_path}[/]',
|
|
1884
|
-
f'Failed to remove objects from S3 bucket {bucket_name}/{sub_path}.'
|
|
1885
|
-
)
|
|
2084
|
+
return self._execute_remove_command(
|
|
2085
|
+
remove_command, bucket_name,
|
|
2086
|
+
(f'Removing objects from {self.config.store_type} bucket '
|
|
2087
|
+
f'{bucket_name}/{sub_path}'),
|
|
2088
|
+
(f'Failed to remove objects from {self.config.store_type} '
|
|
2089
|
+
f'bucket {bucket_name}/{sub_path}.'))
|
|
1886
2090
|
|
|
1887
2091
|
|
|
1888
2092
|
class GcsStore(AbstractStore):
|
|
@@ -3287,22 +3491,23 @@ class AzureBlobStore(AbstractStore):
|
|
|
3287
3491
|
return True
|
|
3288
3492
|
|
|
3289
3493
|
|
|
3290
|
-
class
|
|
3291
|
-
"""
|
|
3292
|
-
for
|
|
3293
|
-
"""
|
|
3294
|
-
|
|
3494
|
+
class IBMCosStore(AbstractStore):
|
|
3495
|
+
"""IBMCosStore inherits from Storage Object and represents the backend
|
|
3496
|
+
for COS buckets.
|
|
3497
|
+
"""
|
|
3295
3498
|
_ACCESS_DENIED_MESSAGE = 'Access Denied'
|
|
3296
3499
|
|
|
3297
3500
|
def __init__(self,
|
|
3298
3501
|
name: str,
|
|
3299
3502
|
source: str,
|
|
3300
|
-
region: Optional[str] = '
|
|
3503
|
+
region: Optional[str] = 'us-east',
|
|
3301
3504
|
is_sky_managed: Optional[bool] = None,
|
|
3302
|
-
sync_on_reconstruction:
|
|
3505
|
+
sync_on_reconstruction: bool = True,
|
|
3303
3506
|
_bucket_sub_path: Optional[str] = None):
|
|
3304
|
-
self.client: '
|
|
3507
|
+
self.client: 'storage.Client'
|
|
3305
3508
|
self.bucket: 'StorageHandle'
|
|
3509
|
+
self.rclone_profile_name = (
|
|
3510
|
+
data_utils.Rclone.RcloneStores.IBM.get_profile_name(self.name))
|
|
3306
3511
|
super().__init__(name, source, region, is_sky_managed,
|
|
3307
3512
|
sync_on_reconstruction, _bucket_sub_path)
|
|
3308
3513
|
|
|
@@ -3336,6 +3541,9 @@ class R2Store(AbstractStore):
|
|
|
3336
3541
|
assert self.name == data_utils.split_r2_path(self.source)[0], (
|
|
3337
3542
|
'R2 Bucket is specified as path, the name should be '
|
|
3338
3543
|
'the same as R2 bucket.')
|
|
3544
|
+
assert data_utils.verify_r2_bucket(self.name), (
|
|
3545
|
+
f'Source specified as {self.source}, a R2 bucket. ',
|
|
3546
|
+
'R2 Bucket should exist.')
|
|
3339
3547
|
elif self.source.startswith('nebius://'):
|
|
3340
3548
|
assert self.name == data_utils.split_nebius_path(
|
|
3341
3549
|
self.source)[0], (
|
|
@@ -3347,29 +3555,59 @@ class R2Store(AbstractStore):
|
|
|
3347
3555
|
f'exist.')
|
|
3348
3556
|
elif self.source.startswith('cos://'):
|
|
3349
3557
|
assert self.name == data_utils.split_cos_path(self.source)[0], (
|
|
3350
|
-
'
|
|
3558
|
+
'COS Bucket is specified as path, the name should be '
|
|
3351
3559
|
'the same as COS bucket.')
|
|
3352
|
-
assert data_utils.verify_ibm_cos_bucket(self.name), (
|
|
3353
|
-
f'Source specified as {self.source}, a COS bucket. ',
|
|
3354
|
-
'COS Bucket should exist.')
|
|
3355
|
-
elif self.source.startswith('oci://'):
|
|
3356
|
-
raise NotImplementedError(
|
|
3357
|
-
'Moving data from OCI to R2 is currently not supported.')
|
|
3358
|
-
|
|
3359
3560
|
# Validate name
|
|
3360
|
-
self.name =
|
|
3361
|
-
|
|
3362
|
-
|
|
3561
|
+
self.name = IBMCosStore.validate_name(self.name)
|
|
3562
|
+
|
|
3563
|
+
@classmethod
|
|
3564
|
+
def validate_name(cls, name: str) -> str:
|
|
3565
|
+
"""Validates the name of a COS bucket.
|
|
3566
|
+
|
|
3567
|
+
Rules source: https://ibm.github.io/ibm-cos-sdk-java/com/ibm/cloud/objectstorage/services/s3/model/Bucket.html # pylint: disable=line-too-long
|
|
3568
|
+
"""
|
|
3569
|
+
|
|
3570
|
+
def _raise_no_traceback_name_error(err_str):
|
|
3363
3571
|
with ux_utils.print_exception_no_traceback():
|
|
3364
|
-
raise exceptions.
|
|
3365
|
-
|
|
3366
|
-
|
|
3367
|
-
|
|
3368
|
-
|
|
3369
|
-
)
|
|
3572
|
+
raise exceptions.StorageNameError(err_str)
|
|
3573
|
+
|
|
3574
|
+
if name is not None and isinstance(name, str):
|
|
3575
|
+
if not 3 <= len(name) <= 63:
|
|
3576
|
+
_raise_no_traceback_name_error(
|
|
3577
|
+
f'Invalid store name: {name} must be between 3 (min) '
|
|
3578
|
+
'and 63 (max) characters long.')
|
|
3579
|
+
|
|
3580
|
+
# Check for valid characters and start/end with a letter or number
|
|
3581
|
+
pattern = r'^[a-z0-9][-a-z0-9.]*[a-z0-9]$'
|
|
3582
|
+
if not re.match(pattern, name):
|
|
3583
|
+
_raise_no_traceback_name_error(
|
|
3584
|
+
f'Invalid store name: {name} can consist only of '
|
|
3585
|
+
'lowercase letters, numbers, dots (.), and dashes (-). '
|
|
3586
|
+
'It must begin and end with a letter or number.')
|
|
3587
|
+
|
|
3588
|
+
# Check for two adjacent periods or dashes
|
|
3589
|
+
if any(substring in name for substring in ['..', '--']):
|
|
3590
|
+
_raise_no_traceback_name_error(
|
|
3591
|
+
f'Invalid store name: {name} must not contain '
|
|
3592
|
+
'two adjacent periods/dashes')
|
|
3593
|
+
|
|
3594
|
+
# Check for IP address format
|
|
3595
|
+
ip_pattern = r'^(?:\d{1,3}\.){3}\d{1,3}$'
|
|
3596
|
+
if re.match(ip_pattern, name):
|
|
3597
|
+
_raise_no_traceback_name_error(
|
|
3598
|
+
f'Invalid store name: {name} must not be formatted as '
|
|
3599
|
+
'an IP address (for example, 192.168.5.4).')
|
|
3600
|
+
|
|
3601
|
+
if any(substring in name for substring in ['.-', '-.']):
|
|
3602
|
+
_raise_no_traceback_name_error(
|
|
3603
|
+
f'Invalid store name: {name} must '
|
|
3604
|
+
'not allow substrings: ".-", "-." .')
|
|
3605
|
+
else:
|
|
3606
|
+
_raise_no_traceback_name_error('Store name must be specified.')
|
|
3607
|
+
return name
|
|
3370
3608
|
|
|
3371
3609
|
def initialize(self):
|
|
3372
|
-
"""Initializes the
|
|
3610
|
+
"""Initializes the cos store object on the cloud.
|
|
3373
3611
|
|
|
3374
3612
|
Initialization involves fetching bucket if exists, or creating it if
|
|
3375
3613
|
it does not.
|
|
@@ -3379,7 +3617,8 @@ class R2Store(AbstractStore):
|
|
|
3379
3617
|
StorageBucketGetError: If fetching existing bucket fails
|
|
3380
3618
|
StorageInitError: If general initialization fails.
|
|
3381
3619
|
"""
|
|
3382
|
-
self.client =
|
|
3620
|
+
self.client = ibm.get_cos_client(self.region)
|
|
3621
|
+
self.s3_resource = ibm.get_cos_resource(self.region)
|
|
3383
3622
|
self.bucket, is_new_bucket = self._get_bucket()
|
|
3384
3623
|
if self.is_sky_managed is None:
|
|
3385
3624
|
# If is_sky_managed is not specified, then this is a new storage
|
|
@@ -3389,7 +3628,7 @@ class R2Store(AbstractStore):
|
|
|
3389
3628
|
self.is_sky_managed = is_new_bucket
|
|
3390
3629
|
|
|
3391
3630
|
def upload(self):
|
|
3392
|
-
"""Uploads
|
|
3631
|
+
"""Uploads files from local machine to bucket.
|
|
3393
3632
|
|
|
3394
3633
|
Upload must be called by the Storage handler - it is not called on
|
|
3395
3634
|
Store initialization.
|
|
@@ -3399,22 +3638,26 @@ class R2Store(AbstractStore):
|
|
|
3399
3638
|
"""
|
|
3400
3639
|
try:
|
|
3401
3640
|
if isinstance(self.source, list):
|
|
3402
|
-
self.
|
|
3641
|
+
self.batch_ibm_rsync(self.source, create_dirs=True)
|
|
3403
3642
|
elif self.source is not None:
|
|
3404
|
-
if self.source.startswith('
|
|
3405
|
-
|
|
3406
|
-
elif self.source.startswith('gs://'):
|
|
3407
|
-
self._transfer_to_r2()
|
|
3408
|
-
elif self.source.startswith('r2://'):
|
|
3643
|
+
if self.source.startswith('cos://'):
|
|
3644
|
+
# cos bucket used as a dest, can't be used as source.
|
|
3409
3645
|
pass
|
|
3410
|
-
elif self.source.startswith('
|
|
3411
|
-
|
|
3646
|
+
elif self.source.startswith('s3://'):
|
|
3647
|
+
raise Exception('IBM COS currently not supporting'
|
|
3648
|
+
'data transfers between COS and S3')
|
|
3412
3649
|
elif self.source.startswith('nebius://'):
|
|
3413
|
-
|
|
3650
|
+
raise Exception('IBM COS currently not supporting'
|
|
3651
|
+
'data transfers between COS and Nebius')
|
|
3652
|
+
elif self.source.startswith('gs://'):
|
|
3653
|
+
raise Exception('IBM COS currently not supporting'
|
|
3654
|
+
'data transfers between COS and GS')
|
|
3655
|
+
elif self.source.startswith('r2://'):
|
|
3656
|
+
raise Exception('IBM COS currently not supporting'
|
|
3657
|
+
'data transfers between COS and r2')
|
|
3414
3658
|
else:
|
|
3415
|
-
self.
|
|
3416
|
-
|
|
3417
|
-
raise
|
|
3659
|
+
self.batch_ibm_rsync([self.source])
|
|
3660
|
+
|
|
3418
3661
|
except Exception as e:
|
|
3419
3662
|
raise exceptions.StorageUploadError(
|
|
3420
3663
|
f'Upload failed for store {self.name}') from e
|
|
@@ -3423,41 +3666,28 @@ class R2Store(AbstractStore):
|
|
|
3423
3666
|
if self._bucket_sub_path is not None and not self.is_sky_managed:
|
|
3424
3667
|
return self._delete_sub_path()
|
|
3425
3668
|
|
|
3426
|
-
|
|
3427
|
-
|
|
3428
|
-
msg_str = f'Deleted R2 bucket {self.name}.'
|
|
3429
|
-
else:
|
|
3430
|
-
msg_str = f'R2 bucket {self.name} may have been deleted ' \
|
|
3431
|
-
f'externally. Removing from local state.'
|
|
3432
|
-
logger.info(f'{colorama.Fore.GREEN}{msg_str}'
|
|
3669
|
+
self._delete_cos_bucket()
|
|
3670
|
+
logger.info(f'{colorama.Fore.GREEN}Deleted COS bucket {self.name}.'
|
|
3433
3671
|
f'{colorama.Style.RESET_ALL}')
|
|
3434
3672
|
|
|
3435
3673
|
def _delete_sub_path(self) -> None:
|
|
3436
3674
|
assert self._bucket_sub_path is not None, 'bucket_sub_path is not set'
|
|
3437
|
-
|
|
3438
|
-
|
|
3439
|
-
|
|
3440
|
-
|
|
3441
|
-
|
|
3442
|
-
|
|
3443
|
-
msg_str = f'Failed to remove objects from R2 bucket ' \
|
|
3444
|
-
f'{self.name}/{self._bucket_sub_path}.'
|
|
3445
|
-
logger.info(f'{colorama.Fore.GREEN}{msg_str}'
|
|
3446
|
-
f'{colorama.Style.RESET_ALL}')
|
|
3675
|
+
bucket = self.s3_resource.Bucket(self.name)
|
|
3676
|
+
try:
|
|
3677
|
+
self._delete_cos_bucket_objects(bucket, self._bucket_sub_path + '/')
|
|
3678
|
+
except ibm.ibm_botocore.exceptions.ClientError as e:
|
|
3679
|
+
if e.__class__.__name__ == 'NoSuchBucket':
|
|
3680
|
+
logger.debug('bucket already removed')
|
|
3447
3681
|
|
|
3448
3682
|
def get_handle(self) -> StorageHandle:
|
|
3449
|
-
return
|
|
3683
|
+
return self.s3_resource.Bucket(self.name)
|
|
3450
3684
|
|
|
3451
|
-
def
|
|
3685
|
+
def batch_ibm_rsync(self,
|
|
3452
3686
|
source_path_list: List[Path],
|
|
3453
3687
|
create_dirs: bool = False) -> None:
|
|
3454
|
-
"""Invokes
|
|
3455
|
-
|
|
3456
|
-
AWS Sync by default uses 10 threads to upload files to the bucket. To
|
|
3457
|
-
increase parallelism, modify max_concurrent_requests in your aws config
|
|
3458
|
-
file (Default path: ~/.aws/config).
|
|
3688
|
+
"""Invokes rclone copy to batch upload a list of local paths to cos
|
|
3459
3689
|
|
|
3460
|
-
Since
|
|
3690
|
+
Since rclone does not support batch operations, we construct
|
|
3461
3691
|
multiple commands to be run in parallel.
|
|
3462
3692
|
|
|
3463
3693
|
Args:
|
|
@@ -3471,49 +3701,58 @@ class R2Store(AbstractStore):
|
|
|
3471
3701
|
sub_path = (f'/{self._bucket_sub_path}'
|
|
3472
3702
|
if self._bucket_sub_path else '')
|
|
3473
3703
|
|
|
3474
|
-
def
|
|
3704
|
+
def get_dir_sync_command(src_dir_path, dest_dir_name) -> str:
|
|
3705
|
+
"""returns an rclone command that copies a complete folder
|
|
3706
|
+
from 'src_dir_path' to bucket/'dest_dir_name'.
|
|
3707
|
+
|
|
3708
|
+
`rclone copy` copies files from source path to target.
|
|
3709
|
+
files with identical names at won't be copied over, unless
|
|
3710
|
+
their modification date is more recent.
|
|
3711
|
+
works similarly to `aws sync` (without --delete).
|
|
3712
|
+
|
|
3713
|
+
Args:
|
|
3714
|
+
src_dir_path (str): local source path from which to copy files.
|
|
3715
|
+
dest_dir_name (str): remote target path files are copied to.
|
|
3716
|
+
|
|
3717
|
+
Returns:
|
|
3718
|
+
str: bash command using rclone to sync files. Executed remotely.
|
|
3719
|
+
"""
|
|
3720
|
+
|
|
3721
|
+
# .git directory is excluded from the sync
|
|
3722
|
+
# wrapping src_dir_path with "" to support path with spaces
|
|
3723
|
+
src_dir_path = shlex.quote(src_dir_path)
|
|
3724
|
+
sync_command = ('rclone copy --exclude ".git/*" '
|
|
3725
|
+
f'{src_dir_path} '
|
|
3726
|
+
f'{self.rclone_profile_name}:{self.name}{sub_path}'
|
|
3727
|
+
f'/{dest_dir_name}')
|
|
3728
|
+
return sync_command
|
|
3729
|
+
|
|
3730
|
+
def get_file_sync_command(base_dir_path, file_names) -> str:
|
|
3731
|
+
"""returns an rclone command that copies files: 'file_names'
|
|
3732
|
+
from base directory: `base_dir_path` to bucket.
|
|
3733
|
+
|
|
3734
|
+
`rclone copy` copies files from source path to target.
|
|
3735
|
+
files with identical names at won't be copied over, unless
|
|
3736
|
+
their modification date is more recent.
|
|
3737
|
+
works similarly to `aws sync` (without --delete).
|
|
3738
|
+
|
|
3739
|
+
Args:
|
|
3740
|
+
base_dir_path (str): local path from which to copy files.
|
|
3741
|
+
file_names (List): specific file names to copy.
|
|
3742
|
+
|
|
3743
|
+
Returns:
|
|
3744
|
+
str: bash command using rclone to sync files
|
|
3745
|
+
"""
|
|
3746
|
+
|
|
3747
|
+
# wrapping file_name with "" to support spaces
|
|
3475
3748
|
includes = ' '.join([
|
|
3476
3749
|
f'--include {shlex.quote(file_name)}'
|
|
3477
3750
|
for file_name in file_names
|
|
3478
3751
|
])
|
|
3479
|
-
endpoint_url = cloudflare.create_endpoint()
|
|
3480
3752
|
base_dir_path = shlex.quote(base_dir_path)
|
|
3481
|
-
sync_command = (
|
|
3482
|
-
|
|
3483
|
-
|
|
3484
|
-
'aws s3 sync --no-follow-symlinks --exclude="*" '
|
|
3485
|
-
f'{includes} {base_dir_path} '
|
|
3486
|
-
f's3://{self.name}{sub_path} '
|
|
3487
|
-
f'--endpoint {endpoint_url} '
|
|
3488
|
-
# R2 does not support CRC64-NVME
|
|
3489
|
-
# which is the default for aws s3 sync
|
|
3490
|
-
# https://community.cloudflare.com/t/an-error-occurred-internalerror-when-calling-the-putobject-operation/764905/13
|
|
3491
|
-
f'--checksum-algorithm CRC32 '
|
|
3492
|
-
f'--profile={cloudflare.R2_PROFILE_NAME}')
|
|
3493
|
-
return sync_command
|
|
3494
|
-
|
|
3495
|
-
def get_dir_sync_command(src_dir_path, dest_dir_name):
|
|
3496
|
-
# we exclude .git directory from the sync
|
|
3497
|
-
excluded_list = storage_utils.get_excluded_files(src_dir_path)
|
|
3498
|
-
excluded_list.append('.git/*')
|
|
3499
|
-
excludes = ' '.join([
|
|
3500
|
-
f'--exclude {shlex.quote(file_name)}'
|
|
3501
|
-
for file_name in excluded_list
|
|
3502
|
-
])
|
|
3503
|
-
endpoint_url = cloudflare.create_endpoint()
|
|
3504
|
-
src_dir_path = shlex.quote(src_dir_path)
|
|
3505
|
-
sync_command = (
|
|
3506
|
-
'AWS_SHARED_CREDENTIALS_FILE='
|
|
3507
|
-
f'{cloudflare.R2_CREDENTIALS_PATH} '
|
|
3508
|
-
f'aws s3 sync --no-follow-symlinks {excludes} '
|
|
3509
|
-
f'{src_dir_path} '
|
|
3510
|
-
f's3://{self.name}{sub_path}/{dest_dir_name} '
|
|
3511
|
-
f'--endpoint {endpoint_url} '
|
|
3512
|
-
# R2 does not support CRC64-NVME
|
|
3513
|
-
# which is the default for aws s3 sync
|
|
3514
|
-
# https://community.cloudflare.com/t/an-error-occurred-internalerror-when-calling-the-putobject-operation/764905/13
|
|
3515
|
-
f'--checksum-algorithm CRC32 '
|
|
3516
|
-
f'--profile={cloudflare.R2_PROFILE_NAME}')
|
|
3753
|
+
sync_command = ('rclone copy '
|
|
3754
|
+
f'{includes} {base_dir_path} '
|
|
3755
|
+
f'{self.rclone_profile_name}:{self.name}{sub_path}')
|
|
3517
3756
|
return sync_command
|
|
3518
3757
|
|
|
3519
3758
|
# Generate message for upload
|
|
@@ -3524,7 +3763,8 @@ class R2Store(AbstractStore):
|
|
|
3524
3763
|
|
|
3525
3764
|
log_path = sky_logging.generate_tmp_logging_file_path(
|
|
3526
3765
|
_STORAGE_LOG_FILE_NAME)
|
|
3527
|
-
sync_path =
|
|
3766
|
+
sync_path = (
|
|
3767
|
+
f'{source_message} -> cos://{self.region}/{self.name}{sub_path}/')
|
|
3528
3768
|
with rich_utils.safe_status(
|
|
3529
3769
|
ux_utils.spinner_message(f'Syncing {sync_path}',
|
|
3530
3770
|
log_path=log_path)):
|
|
@@ -3541,1236 +3781,306 @@ class R2Store(AbstractStore):
|
|
|
3541
3781
|
ux_utils.finishing_message(f'Storage synced: {sync_path}',
|
|
3542
3782
|
log_path))
|
|
3543
3783
|
|
|
3544
|
-
def _transfer_to_r2(self) -> None:
|
|
3545
|
-
assert isinstance(self.source, str), self.source
|
|
3546
|
-
if self.source.startswith('gs://'):
|
|
3547
|
-
data_transfer.gcs_to_r2(self.name, self.name)
|
|
3548
|
-
elif self.source.startswith('s3://'):
|
|
3549
|
-
data_transfer.s3_to_r2(self.name, self.name)
|
|
3550
|
-
elif self.source.startswith('nebius://'):
|
|
3551
|
-
data_transfer.s3_to_r2(self.name, self.name)
|
|
3552
|
-
|
|
3553
3784
|
def _get_bucket(self) -> Tuple[StorageHandle, bool]:
|
|
3554
|
-
"""
|
|
3785
|
+
"""returns IBM COS bucket object if exists, otherwise creates it.
|
|
3555
3786
|
|
|
3556
|
-
|
|
3557
|
-
|
|
3558
|
-
|
|
3559
|
-
2) Return None if bucket has been externally deleted and
|
|
3560
|
-
sync_on_reconstruction is False
|
|
3561
|
-
3) Create and return a new bucket otherwise
|
|
3787
|
+
Returns:
|
|
3788
|
+
StorageHandle(str): bucket name
|
|
3789
|
+
bool: indicates whether a new bucket was created.
|
|
3562
3790
|
|
|
3563
3791
|
Raises:
|
|
3564
3792
|
StorageSpecError: If externally created bucket is attempted to be
|
|
3565
3793
|
mounted without specifying storage source.
|
|
3566
|
-
StorageBucketCreateError: If
|
|
3794
|
+
StorageBucketCreateError: If bucket creation fails.
|
|
3567
3795
|
StorageBucketGetError: If fetching a bucket fails
|
|
3568
3796
|
StorageExternalDeletionError: If externally deleted storage is
|
|
3569
3797
|
attempted to be fetched while reconstructing the storage for
|
|
3570
3798
|
'sky storage delete' or 'sky start'
|
|
3571
3799
|
"""
|
|
3572
|
-
|
|
3573
|
-
|
|
3574
|
-
|
|
3800
|
+
|
|
3801
|
+
bucket_profile_name = (data_utils.Rclone.RcloneStores.IBM.value +
|
|
3802
|
+
self.name)
|
|
3575
3803
|
try:
|
|
3576
|
-
|
|
3577
|
-
|
|
3578
|
-
|
|
3579
|
-
|
|
3580
|
-
|
|
3581
|
-
|
|
3582
|
-
|
|
3583
|
-
|
|
3584
|
-
|
|
3585
|
-
|
|
3586
|
-
|
|
3587
|
-
|
|
3588
|
-
|
|
3589
|
-
|
|
3590
|
-
|
|
3591
|
-
|
|
3592
|
-
|
|
3593
|
-
|
|
3594
|
-
|
|
3595
|
-
|
|
3596
|
-
|
|
3804
|
+
bucket_region = data_utils.get_ibm_cos_bucket_region(self.name)
|
|
3805
|
+
except exceptions.StorageBucketGetError as e:
|
|
3806
|
+
with ux_utils.print_exception_no_traceback():
|
|
3807
|
+
command = f'rclone lsd {bucket_profile_name}: '
|
|
3808
|
+
raise exceptions.StorageBucketGetError(
|
|
3809
|
+
_BUCKET_FAIL_TO_CONNECT_MESSAGE.format(name=self.name) +
|
|
3810
|
+
f' To debug, consider running `{command}`.') from e
|
|
3811
|
+
|
|
3812
|
+
try:
|
|
3813
|
+
uri_region = data_utils.split_cos_path(
|
|
3814
|
+
self.source)[2] # type: ignore
|
|
3815
|
+
except ValueError:
|
|
3816
|
+
# source isn't a cos uri
|
|
3817
|
+
uri_region = ''
|
|
3818
|
+
|
|
3819
|
+
# bucket's region doesn't match specified region in URI
|
|
3820
|
+
if bucket_region and uri_region and uri_region != bucket_region\
|
|
3821
|
+
and self.sync_on_reconstruction:
|
|
3822
|
+
with ux_utils.print_exception_no_traceback():
|
|
3823
|
+
raise exceptions.StorageBucketGetError(
|
|
3824
|
+
f'Bucket {self.name} exists in '
|
|
3825
|
+
f'region {bucket_region}, '
|
|
3826
|
+
f'but URI specified region {uri_region}.')
|
|
3597
3827
|
|
|
3598
|
-
if
|
|
3828
|
+
if not bucket_region and uri_region:
|
|
3829
|
+
# bucket doesn't exist but source is a bucket URI
|
|
3599
3830
|
with ux_utils.print_exception_no_traceback():
|
|
3600
3831
|
raise exceptions.StorageBucketGetError(
|
|
3601
3832
|
'Attempted to use a non-existent bucket as a source: '
|
|
3602
|
-
f'{self.
|
|
3603
|
-
'`
|
|
3604
|
-
|
|
3605
|
-
|
|
3606
|
-
|
|
3607
|
-
|
|
3608
|
-
|
|
3609
|
-
|
|
3610
|
-
|
|
3611
|
-
|
|
3612
|
-
|
|
3613
|
-
|
|
3614
|
-
|
|
3615
|
-
|
|
3616
|
-
return bucket, True
|
|
3617
|
-
else:
|
|
3833
|
+
f'{self.name} by providing URI. Consider using '
|
|
3834
|
+
'`rclone lsd <remote>` on relevant remotes returned '
|
|
3835
|
+
'via `rclone listremotes` to debug.')
|
|
3836
|
+
|
|
3837
|
+
data_utils.Rclone.store_rclone_config(
|
|
3838
|
+
self.name,
|
|
3839
|
+
data_utils.Rclone.RcloneStores.IBM,
|
|
3840
|
+
self.region, # type: ignore
|
|
3841
|
+
)
|
|
3842
|
+
|
|
3843
|
+
if not bucket_region and self.sync_on_reconstruction:
|
|
3844
|
+
# bucket doesn't exist
|
|
3845
|
+
return self._create_cos_bucket(self.name, self.region), True
|
|
3846
|
+
elif not bucket_region and not self.sync_on_reconstruction:
|
|
3618
3847
|
# Raised when Storage object is reconstructed for sky storage
|
|
3619
3848
|
# delete or to re-mount Storages with sky start but the storage
|
|
3620
3849
|
# is already removed externally.
|
|
3621
3850
|
raise exceptions.StorageExternalDeletionError(
|
|
3622
3851
|
'Attempted to fetch a non-existent bucket: '
|
|
3623
3852
|
f'{self.name}')
|
|
3624
|
-
|
|
3853
|
+
else:
|
|
3854
|
+
# bucket exists
|
|
3855
|
+
bucket = self.s3_resource.Bucket(self.name)
|
|
3856
|
+
self._validate_existing_bucket()
|
|
3857
|
+
return bucket, False
|
|
3858
|
+
|
|
3625
3859
|
def _download_file(self, remote_path: str, local_path: str) -> None:
|
|
3626
|
-
"""Downloads file from remote to local on
|
|
3860
|
+
"""Downloads file from remote to local on s3 bucket
|
|
3627
3861
|
using the boto3 API
|
|
3628
3862
|
|
|
3629
3863
|
Args:
|
|
3630
|
-
remote_path: str; Remote path on
|
|
3864
|
+
remote_path: str; Remote path on S3 bucket
|
|
3631
3865
|
local_path: str; Local path on user's device
|
|
3632
3866
|
"""
|
|
3633
|
-
self.
|
|
3867
|
+
self.client.download_file(self.name, local_path, remote_path)
|
|
3634
3868
|
|
|
3635
3869
|
def mount_command(self, mount_path: str) -> str:
|
|
3636
3870
|
"""Returns the command to mount the bucket to the mount_path.
|
|
3637
3871
|
|
|
3638
|
-
Uses
|
|
3872
|
+
Uses rclone to mount the bucket.
|
|
3873
|
+
Source: https://github.com/rclone/rclone
|
|
3639
3874
|
|
|
3640
3875
|
Args:
|
|
3641
3876
|
mount_path: str; Path to mount the bucket to.
|
|
3642
3877
|
"""
|
|
3643
|
-
|
|
3644
|
-
endpoint_url = cloudflare.create_endpoint()
|
|
3645
|
-
r2_credential_path = cloudflare.R2_CREDENTIALS_PATH
|
|
3646
|
-
r2_profile_name = cloudflare.R2_PROFILE_NAME
|
|
3647
|
-
mount_cmd = mounting_utils.get_r2_mount_cmd(
|
|
3648
|
-
r2_credential_path, r2_profile_name, endpoint_url, self.bucket.name,
|
|
3649
|
-
mount_path, self._bucket_sub_path)
|
|
3650
|
-
return mounting_utils.get_mounting_command(mount_path, install_cmd,
|
|
3651
|
-
mount_cmd)
|
|
3652
|
-
|
|
3653
|
-
def mount_cached_command(self, mount_path: str) -> str:
|
|
3878
|
+
# install rclone if not installed.
|
|
3654
3879
|
install_cmd = mounting_utils.get_rclone_install_cmd()
|
|
3655
|
-
|
|
3656
|
-
|
|
3657
|
-
|
|
3658
|
-
|
|
3659
|
-
|
|
3660
|
-
|
|
3880
|
+
rclone_config = data_utils.Rclone.RcloneStores.IBM.get_config(
|
|
3881
|
+
rclone_profile_name=self.rclone_profile_name,
|
|
3882
|
+
region=self.region) # type: ignore
|
|
3883
|
+
mount_cmd = (
|
|
3884
|
+
mounting_utils.get_cos_mount_cmd(
|
|
3885
|
+
rclone_config,
|
|
3886
|
+
self.rclone_profile_name,
|
|
3887
|
+
self.bucket.name,
|
|
3888
|
+
mount_path,
|
|
3889
|
+
self._bucket_sub_path, # type: ignore
|
|
3890
|
+
))
|
|
3661
3891
|
return mounting_utils.get_mounting_command(mount_path, install_cmd,
|
|
3662
|
-
|
|
3892
|
+
mount_cmd)
|
|
3663
3893
|
|
|
3664
|
-
def
|
|
3665
|
-
|
|
3666
|
-
|
|
3667
|
-
"""Creates
|
|
3894
|
+
def _create_cos_bucket(self,
|
|
3895
|
+
bucket_name: str,
|
|
3896
|
+
region='us-east') -> StorageHandle:
|
|
3897
|
+
"""Creates IBM COS bucket with specific name in specific region
|
|
3668
3898
|
|
|
3669
3899
|
Args:
|
|
3670
3900
|
bucket_name: str; Name of bucket
|
|
3671
|
-
region: str; Region name,
|
|
3901
|
+
region: str; Region name, e.g. us-east, us-south
|
|
3672
3902
|
Raises:
|
|
3673
3903
|
StorageBucketCreateError: If bucket creation fails.
|
|
3674
3904
|
"""
|
|
3675
|
-
r2_client = self.client
|
|
3676
3905
|
try:
|
|
3677
|
-
|
|
3678
|
-
|
|
3679
|
-
|
|
3680
|
-
|
|
3681
|
-
|
|
3682
|
-
|
|
3683
|
-
|
|
3684
|
-
|
|
3685
|
-
|
|
3686
|
-
|
|
3906
|
+
self.client.create_bucket(
|
|
3907
|
+
Bucket=bucket_name,
|
|
3908
|
+
CreateBucketConfiguration={
|
|
3909
|
+
'LocationConstraint': f'{region}-smart'
|
|
3910
|
+
})
|
|
3911
|
+
logger.info(f' {colorama.Style.DIM}Created IBM COS bucket '
|
|
3912
|
+
f'{bucket_name!r} in {region} '
|
|
3913
|
+
'with storage class smart tier'
|
|
3914
|
+
f'{colorama.Style.RESET_ALL}')
|
|
3915
|
+
self.bucket = self.s3_resource.Bucket(bucket_name)
|
|
3916
|
+
|
|
3917
|
+
except ibm.ibm_botocore.exceptions.ClientError as e: # type: ignore[union-attr] # pylint: disable=line-too-long
|
|
3687
3918
|
with ux_utils.print_exception_no_traceback():
|
|
3688
3919
|
raise exceptions.StorageBucketCreateError(
|
|
3689
|
-
f'
|
|
3690
|
-
f'{
|
|
3691
|
-
return cloudflare.resource('s3').Bucket(bucket_name)
|
|
3692
|
-
|
|
3693
|
-
def _execute_r2_remove_command(self, command: str, bucket_name: str,
|
|
3694
|
-
hint_operating: str,
|
|
3695
|
-
hint_failed: str) -> bool:
|
|
3696
|
-
try:
|
|
3697
|
-
with rich_utils.safe_status(
|
|
3698
|
-
ux_utils.spinner_message(hint_operating)):
|
|
3699
|
-
subprocess.check_output(command.split(' '),
|
|
3700
|
-
stderr=subprocess.STDOUT,
|
|
3701
|
-
shell=True)
|
|
3702
|
-
except subprocess.CalledProcessError as e:
|
|
3703
|
-
if 'NoSuchBucket' in e.output.decode('utf-8'):
|
|
3704
|
-
logger.debug(
|
|
3705
|
-
_BUCKET_EXTERNALLY_DELETED_DEBUG_MESSAGE.format(
|
|
3706
|
-
bucket_name=bucket_name))
|
|
3707
|
-
return False
|
|
3708
|
-
else:
|
|
3709
|
-
with ux_utils.print_exception_no_traceback():
|
|
3710
|
-
raise exceptions.StorageBucketDeleteError(
|
|
3711
|
-
f'{hint_failed}'
|
|
3712
|
-
f'Detailed error: {e.output}')
|
|
3713
|
-
return True
|
|
3714
|
-
|
|
3715
|
-
def _delete_r2_bucket_sub_path(self, bucket_name: str,
|
|
3716
|
-
sub_path: str) -> bool:
|
|
3717
|
-
"""Deletes the sub path from the bucket."""
|
|
3718
|
-
endpoint_url = cloudflare.create_endpoint()
|
|
3719
|
-
remove_command = (
|
|
3720
|
-
f'AWS_SHARED_CREDENTIALS_FILE={cloudflare.R2_CREDENTIALS_PATH} '
|
|
3721
|
-
f'aws s3 rm s3://{bucket_name}/{sub_path}/ --recursive '
|
|
3722
|
-
f'--endpoint {endpoint_url} '
|
|
3723
|
-
f'--profile={cloudflare.R2_PROFILE_NAME}')
|
|
3724
|
-
return self._execute_r2_remove_command(
|
|
3725
|
-
remove_command, bucket_name,
|
|
3726
|
-
f'Removing objects from R2 bucket {bucket_name}/{sub_path}',
|
|
3727
|
-
f'Failed to remove objects from R2 bucket {bucket_name}/{sub_path}.'
|
|
3728
|
-
)
|
|
3729
|
-
|
|
3730
|
-
def _delete_r2_bucket(self, bucket_name: str) -> bool:
|
|
3731
|
-
"""Deletes R2 bucket, including all objects in bucket
|
|
3920
|
+
f'Failed to create bucket: '
|
|
3921
|
+
f'{bucket_name}') from e
|
|
3732
3922
|
|
|
3733
|
-
|
|
3734
|
-
|
|
3923
|
+
s3_bucket_exists_waiter = self.client.get_waiter('bucket_exists')
|
|
3924
|
+
s3_bucket_exists_waiter.wait(Bucket=bucket_name)
|
|
3735
3925
|
|
|
3736
|
-
|
|
3737
|
-
bool; True if bucket was deleted, False if it was deleted externally.
|
|
3926
|
+
return self.bucket
|
|
3738
3927
|
|
|
3739
|
-
|
|
3740
|
-
|
|
3741
|
-
|
|
3742
|
-
|
|
3743
|
-
|
|
3744
|
-
|
|
3745
|
-
|
|
3746
|
-
|
|
3747
|
-
|
|
3748
|
-
|
|
3749
|
-
|
|
3750
|
-
|
|
3751
|
-
|
|
3752
|
-
|
|
3753
|
-
|
|
3754
|
-
|
|
3755
|
-
|
|
3756
|
-
success = self._execute_r2_remove_command(
|
|
3757
|
-
remove_command, bucket_name, f'Deleting R2 bucket {bucket_name}',
|
|
3758
|
-
f'Failed to delete R2 bucket {bucket_name}.')
|
|
3759
|
-
if not success:
|
|
3760
|
-
return False
|
|
3928
|
+
def _delete_cos_bucket_objects(self,
|
|
3929
|
+
bucket: Any,
|
|
3930
|
+
prefix: Optional[str] = None) -> None:
|
|
3931
|
+
bucket_versioning = self.s3_resource.BucketVersioning(bucket.name)
|
|
3932
|
+
if bucket_versioning.status == 'Enabled':
|
|
3933
|
+
if prefix is not None:
|
|
3934
|
+
res = list(
|
|
3935
|
+
bucket.object_versions.filter(Prefix=prefix).delete())
|
|
3936
|
+
else:
|
|
3937
|
+
res = list(bucket.object_versions.delete())
|
|
3938
|
+
else:
|
|
3939
|
+
if prefix is not None:
|
|
3940
|
+
res = list(bucket.objects.filter(Prefix=prefix).delete())
|
|
3941
|
+
else:
|
|
3942
|
+
res = list(bucket.objects.delete())
|
|
3943
|
+
logger.debug(f'Deleted bucket\'s content:\n{res}, prefix: {prefix}')
|
|
3761
3944
|
|
|
3762
|
-
|
|
3763
|
-
|
|
3764
|
-
|
|
3765
|
-
|
|
3945
|
+
def _delete_cos_bucket(self) -> None:
|
|
3946
|
+
bucket = self.s3_resource.Bucket(self.name)
|
|
3947
|
+
try:
|
|
3948
|
+
self._delete_cos_bucket_objects(bucket)
|
|
3949
|
+
bucket.delete()
|
|
3950
|
+
bucket.wait_until_not_exists()
|
|
3951
|
+
except ibm.ibm_botocore.exceptions.ClientError as e:
|
|
3952
|
+
if e.__class__.__name__ == 'NoSuchBucket':
|
|
3953
|
+
logger.debug('bucket already removed')
|
|
3954
|
+
data_utils.Rclone.delete_rclone_bucket_profile(
|
|
3955
|
+
self.name, data_utils.Rclone.RcloneStores.IBM)
|
|
3766
3956
|
|
|
3767
3957
|
|
|
3768
|
-
class
|
|
3769
|
-
"""
|
|
3770
|
-
for
|
|
3958
|
+
class OciStore(AbstractStore):
|
|
3959
|
+
"""OciStore inherits from Storage Object and represents the backend
|
|
3960
|
+
for OCI buckets.
|
|
3771
3961
|
"""
|
|
3772
|
-
|
|
3962
|
+
|
|
3963
|
+
_ACCESS_DENIED_MESSAGE = 'AccessDeniedException'
|
|
3773
3964
|
|
|
3774
3965
|
def __init__(self,
|
|
3775
3966
|
name: str,
|
|
3776
|
-
source:
|
|
3777
|
-
region: Optional[str] =
|
|
3967
|
+
source: Optional[SourceType],
|
|
3968
|
+
region: Optional[str] = None,
|
|
3778
3969
|
is_sky_managed: Optional[bool] = None,
|
|
3779
|
-
sync_on_reconstruction: bool = True,
|
|
3970
|
+
sync_on_reconstruction: Optional[bool] = True,
|
|
3780
3971
|
_bucket_sub_path: Optional[str] = None):
|
|
3781
|
-
self.client:
|
|
3782
|
-
self.bucket:
|
|
3783
|
-
self.
|
|
3784
|
-
|
|
3785
|
-
|
|
3786
|
-
|
|
3787
|
-
|
|
3788
|
-
def _validate(self):
|
|
3789
|
-
if self.source is not None and isinstance(self.source, str):
|
|
3790
|
-
if self.source.startswith('s3://'):
|
|
3791
|
-
assert self.name == data_utils.split_s3_path(self.source)[0], (
|
|
3792
|
-
'S3 Bucket is specified as path, the name should be the'
|
|
3793
|
-
' same as S3 bucket.')
|
|
3794
|
-
assert data_utils.verify_s3_bucket(self.name), (
|
|
3795
|
-
f'Source specified as {self.source}, a S3 bucket. ',
|
|
3796
|
-
'S3 Bucket should exist.')
|
|
3797
|
-
elif self.source.startswith('gs://'):
|
|
3798
|
-
assert self.name == data_utils.split_gcs_path(self.source)[0], (
|
|
3799
|
-
'GCS Bucket is specified as path, the name should be '
|
|
3800
|
-
'the same as GCS bucket.')
|
|
3801
|
-
assert data_utils.verify_gcs_bucket(self.name), (
|
|
3802
|
-
f'Source specified as {self.source}, a GCS bucket. ',
|
|
3803
|
-
'GCS Bucket should exist.')
|
|
3804
|
-
elif data_utils.is_az_container_endpoint(self.source):
|
|
3805
|
-
storage_account_name, container_name, _ = (
|
|
3806
|
-
data_utils.split_az_path(self.source))
|
|
3807
|
-
assert self.name == container_name, (
|
|
3808
|
-
'Azure bucket is specified as path, the name should be '
|
|
3809
|
-
'the same as Azure bucket.')
|
|
3810
|
-
assert data_utils.verify_az_bucket(
|
|
3811
|
-
storage_account_name, self.name), (
|
|
3812
|
-
f'Source specified as {self.source}, an Azure bucket. '
|
|
3813
|
-
'Azure bucket should exist.')
|
|
3814
|
-
elif self.source.startswith('r2://'):
|
|
3815
|
-
assert self.name == data_utils.split_r2_path(self.source)[0], (
|
|
3816
|
-
'R2 Bucket is specified as path, the name should be '
|
|
3817
|
-
'the same as R2 bucket.')
|
|
3818
|
-
assert data_utils.verify_r2_bucket(self.name), (
|
|
3819
|
-
f'Source specified as {self.source}, a R2 bucket. ',
|
|
3820
|
-
'R2 Bucket should exist.')
|
|
3821
|
-
elif self.source.startswith('nebius://'):
|
|
3822
|
-
assert self.name == data_utils.split_nebius_path(
|
|
3823
|
-
self.source)[0], (
|
|
3824
|
-
'Nebius Object Storage is specified as path, the name '
|
|
3825
|
-
'should be the same as Nebius Object Storage bucket.')
|
|
3826
|
-
assert data_utils.verify_nebius_bucket(self.name), (
|
|
3827
|
-
f'Source specified as {self.source}, a Nebius Object '
|
|
3828
|
-
f'Storage bucket. Nebius Object Storage Bucket should '
|
|
3829
|
-
f'exist.')
|
|
3830
|
-
elif self.source.startswith('cos://'):
|
|
3831
|
-
assert self.name == data_utils.split_cos_path(self.source)[0], (
|
|
3832
|
-
'COS Bucket is specified as path, the name should be '
|
|
3833
|
-
'the same as COS bucket.')
|
|
3834
|
-
# Validate name
|
|
3835
|
-
self.name = IBMCosStore.validate_name(self.name)
|
|
3836
|
-
|
|
3837
|
-
@classmethod
|
|
3838
|
-
def validate_name(cls, name: str) -> str:
|
|
3839
|
-
"""Validates the name of a COS bucket.
|
|
3840
|
-
|
|
3841
|
-
Rules source: https://ibm.github.io/ibm-cos-sdk-java/com/ibm/cloud/objectstorage/services/s3/model/Bucket.html # pylint: disable=line-too-long
|
|
3842
|
-
"""
|
|
3843
|
-
|
|
3844
|
-
def _raise_no_traceback_name_error(err_str):
|
|
3845
|
-
with ux_utils.print_exception_no_traceback():
|
|
3846
|
-
raise exceptions.StorageNameError(err_str)
|
|
3847
|
-
|
|
3848
|
-
if name is not None and isinstance(name, str):
|
|
3849
|
-
if not 3 <= len(name) <= 63:
|
|
3850
|
-
_raise_no_traceback_name_error(
|
|
3851
|
-
f'Invalid store name: {name} must be between 3 (min) '
|
|
3852
|
-
'and 63 (max) characters long.')
|
|
3853
|
-
|
|
3854
|
-
# Check for valid characters and start/end with a letter or number
|
|
3855
|
-
pattern = r'^[a-z0-9][-a-z0-9.]*[a-z0-9]$'
|
|
3856
|
-
if not re.match(pattern, name):
|
|
3857
|
-
_raise_no_traceback_name_error(
|
|
3858
|
-
f'Invalid store name: {name} can consist only of '
|
|
3859
|
-
'lowercase letters, numbers, dots (.), and dashes (-). '
|
|
3860
|
-
'It must begin and end with a letter or number.')
|
|
3861
|
-
|
|
3862
|
-
# Check for two adjacent periods or dashes
|
|
3863
|
-
if any(substring in name for substring in ['..', '--']):
|
|
3864
|
-
_raise_no_traceback_name_error(
|
|
3865
|
-
f'Invalid store name: {name} must not contain '
|
|
3866
|
-
'two adjacent periods/dashes')
|
|
3867
|
-
|
|
3868
|
-
# Check for IP address format
|
|
3869
|
-
ip_pattern = r'^(?:\d{1,3}\.){3}\d{1,3}$'
|
|
3870
|
-
if re.match(ip_pattern, name):
|
|
3871
|
-
_raise_no_traceback_name_error(
|
|
3872
|
-
f'Invalid store name: {name} must not be formatted as '
|
|
3873
|
-
'an IP address (for example, 192.168.5.4).')
|
|
3874
|
-
|
|
3875
|
-
if any(substring in name for substring in ['.-', '-.']):
|
|
3876
|
-
_raise_no_traceback_name_error(
|
|
3877
|
-
f'Invalid store name: {name} must '
|
|
3878
|
-
'not allow substrings: ".-", "-." .')
|
|
3879
|
-
else:
|
|
3880
|
-
_raise_no_traceback_name_error('Store name must be specified.')
|
|
3881
|
-
return name
|
|
3882
|
-
|
|
3883
|
-
def initialize(self):
|
|
3884
|
-
"""Initializes the cos store object on the cloud.
|
|
3885
|
-
|
|
3886
|
-
Initialization involves fetching bucket if exists, or creating it if
|
|
3887
|
-
it does not.
|
|
3972
|
+
self.client: Any
|
|
3973
|
+
self.bucket: StorageHandle
|
|
3974
|
+
self.oci_config_file: str
|
|
3975
|
+
self.config_profile: str
|
|
3976
|
+
self.compartment: str
|
|
3977
|
+
self.namespace: str
|
|
3888
3978
|
|
|
3889
|
-
|
|
3890
|
-
|
|
3891
|
-
|
|
3892
|
-
|
|
3893
|
-
|
|
3894
|
-
|
|
3895
|
-
|
|
3896
|
-
|
|
3897
|
-
|
|
3898
|
-
# If is_sky_managed is not specified, then this is a new storage
|
|
3899
|
-
# object (i.e., did not exist in global_user_state) and we should
|
|
3900
|
-
# set the is_sky_managed property.
|
|
3901
|
-
# If is_sky_managed is specified, then we take no action.
|
|
3902
|
-
self.is_sky_managed = is_new_bucket
|
|
3903
|
-
|
|
3904
|
-
def upload(self):
|
|
3905
|
-
"""Uploads files from local machine to bucket.
|
|
3906
|
-
|
|
3907
|
-
Upload must be called by the Storage handler - it is not called on
|
|
3908
|
-
Store initialization.
|
|
3909
|
-
|
|
3910
|
-
Raises:
|
|
3911
|
-
StorageUploadError: if upload fails.
|
|
3912
|
-
"""
|
|
3913
|
-
try:
|
|
3914
|
-
if isinstance(self.source, list):
|
|
3915
|
-
self.batch_ibm_rsync(self.source, create_dirs=True)
|
|
3916
|
-
elif self.source is not None:
|
|
3917
|
-
if self.source.startswith('cos://'):
|
|
3918
|
-
# cos bucket used as a dest, can't be used as source.
|
|
3919
|
-
pass
|
|
3920
|
-
elif self.source.startswith('s3://'):
|
|
3921
|
-
raise Exception('IBM COS currently not supporting'
|
|
3922
|
-
'data transfers between COS and S3')
|
|
3923
|
-
elif self.source.startswith('nebius://'):
|
|
3924
|
-
raise Exception('IBM COS currently not supporting'
|
|
3925
|
-
'data transfers between COS and Nebius')
|
|
3926
|
-
elif self.source.startswith('gs://'):
|
|
3927
|
-
raise Exception('IBM COS currently not supporting'
|
|
3928
|
-
'data transfers between COS and GS')
|
|
3929
|
-
elif self.source.startswith('r2://'):
|
|
3930
|
-
raise Exception('IBM COS currently not supporting'
|
|
3931
|
-
'data transfers between COS and r2')
|
|
3932
|
-
else:
|
|
3933
|
-
self.batch_ibm_rsync([self.source])
|
|
3934
|
-
|
|
3935
|
-
except Exception as e:
|
|
3936
|
-
raise exceptions.StorageUploadError(
|
|
3937
|
-
f'Upload failed for store {self.name}') from e
|
|
3938
|
-
|
|
3939
|
-
def delete(self) -> None:
|
|
3940
|
-
if self._bucket_sub_path is not None and not self.is_sky_managed:
|
|
3941
|
-
return self._delete_sub_path()
|
|
3942
|
-
|
|
3943
|
-
self._delete_cos_bucket()
|
|
3944
|
-
logger.info(f'{colorama.Fore.GREEN}Deleted COS bucket {self.name}.'
|
|
3945
|
-
f'{colorama.Style.RESET_ALL}')
|
|
3946
|
-
|
|
3947
|
-
def _delete_sub_path(self) -> None:
|
|
3948
|
-
assert self._bucket_sub_path is not None, 'bucket_sub_path is not set'
|
|
3949
|
-
bucket = self.s3_resource.Bucket(self.name)
|
|
3950
|
-
try:
|
|
3951
|
-
self._delete_cos_bucket_objects(bucket, self._bucket_sub_path + '/')
|
|
3952
|
-
except ibm.ibm_botocore.exceptions.ClientError as e:
|
|
3953
|
-
if e.__class__.__name__ == 'NoSuchBucket':
|
|
3954
|
-
logger.debug('bucket already removed')
|
|
3955
|
-
|
|
3956
|
-
def get_handle(self) -> StorageHandle:
|
|
3957
|
-
return self.s3_resource.Bucket(self.name)
|
|
3958
|
-
|
|
3959
|
-
def batch_ibm_rsync(self,
|
|
3960
|
-
source_path_list: List[Path],
|
|
3961
|
-
create_dirs: bool = False) -> None:
|
|
3962
|
-
"""Invokes rclone copy to batch upload a list of local paths to cos
|
|
3963
|
-
|
|
3964
|
-
Since rclone does not support batch operations, we construct
|
|
3965
|
-
multiple commands to be run in parallel.
|
|
3966
|
-
|
|
3967
|
-
Args:
|
|
3968
|
-
source_path_list: List of paths to local files or directories
|
|
3969
|
-
create_dirs: If the local_path is a directory and this is set to
|
|
3970
|
-
False, the contents of the directory are directly uploaded to
|
|
3971
|
-
root of the bucket. If the local_path is a directory and this is
|
|
3972
|
-
set to True, the directory is created in the bucket root and
|
|
3973
|
-
contents are uploaded to it.
|
|
3974
|
-
"""
|
|
3975
|
-
sub_path = (f'/{self._bucket_sub_path}'
|
|
3976
|
-
if self._bucket_sub_path else '')
|
|
3977
|
-
|
|
3978
|
-
def get_dir_sync_command(src_dir_path, dest_dir_name) -> str:
|
|
3979
|
-
"""returns an rclone command that copies a complete folder
|
|
3980
|
-
from 'src_dir_path' to bucket/'dest_dir_name'.
|
|
3981
|
-
|
|
3982
|
-
`rclone copy` copies files from source path to target.
|
|
3983
|
-
files with identical names at won't be copied over, unless
|
|
3984
|
-
their modification date is more recent.
|
|
3985
|
-
works similarly to `aws sync` (without --delete).
|
|
3986
|
-
|
|
3987
|
-
Args:
|
|
3988
|
-
src_dir_path (str): local source path from which to copy files.
|
|
3989
|
-
dest_dir_name (str): remote target path files are copied to.
|
|
3990
|
-
|
|
3991
|
-
Returns:
|
|
3992
|
-
str: bash command using rclone to sync files. Executed remotely.
|
|
3993
|
-
"""
|
|
3994
|
-
|
|
3995
|
-
# .git directory is excluded from the sync
|
|
3996
|
-
# wrapping src_dir_path with "" to support path with spaces
|
|
3997
|
-
src_dir_path = shlex.quote(src_dir_path)
|
|
3998
|
-
sync_command = ('rclone copy --exclude ".git/*" '
|
|
3999
|
-
f'{src_dir_path} '
|
|
4000
|
-
f'{self.rclone_profile_name}:{self.name}{sub_path}'
|
|
4001
|
-
f'/{dest_dir_name}')
|
|
4002
|
-
return sync_command
|
|
4003
|
-
|
|
4004
|
-
def get_file_sync_command(base_dir_path, file_names) -> str:
|
|
4005
|
-
"""returns an rclone command that copies files: 'file_names'
|
|
4006
|
-
from base directory: `base_dir_path` to bucket.
|
|
4007
|
-
|
|
4008
|
-
`rclone copy` copies files from source path to target.
|
|
4009
|
-
files with identical names at won't be copied over, unless
|
|
4010
|
-
their modification date is more recent.
|
|
4011
|
-
works similarly to `aws sync` (without --delete).
|
|
4012
|
-
|
|
4013
|
-
Args:
|
|
4014
|
-
base_dir_path (str): local path from which to copy files.
|
|
4015
|
-
file_names (List): specific file names to copy.
|
|
4016
|
-
|
|
4017
|
-
Returns:
|
|
4018
|
-
str: bash command using rclone to sync files
|
|
4019
|
-
"""
|
|
4020
|
-
|
|
4021
|
-
# wrapping file_name with "" to support spaces
|
|
4022
|
-
includes = ' '.join([
|
|
4023
|
-
f'--include {shlex.quote(file_name)}'
|
|
4024
|
-
for file_name in file_names
|
|
4025
|
-
])
|
|
4026
|
-
base_dir_path = shlex.quote(base_dir_path)
|
|
4027
|
-
sync_command = ('rclone copy '
|
|
4028
|
-
f'{includes} {base_dir_path} '
|
|
4029
|
-
f'{self.rclone_profile_name}:{self.name}{sub_path}')
|
|
4030
|
-
return sync_command
|
|
4031
|
-
|
|
4032
|
-
# Generate message for upload
|
|
4033
|
-
if len(source_path_list) > 1:
|
|
4034
|
-
source_message = f'{len(source_path_list)} paths'
|
|
4035
|
-
else:
|
|
4036
|
-
source_message = source_path_list[0]
|
|
4037
|
-
|
|
4038
|
-
log_path = sky_logging.generate_tmp_logging_file_path(
|
|
4039
|
-
_STORAGE_LOG_FILE_NAME)
|
|
4040
|
-
sync_path = (
|
|
4041
|
-
f'{source_message} -> cos://{self.region}/{self.name}{sub_path}/')
|
|
4042
|
-
with rich_utils.safe_status(
|
|
4043
|
-
ux_utils.spinner_message(f'Syncing {sync_path}',
|
|
4044
|
-
log_path=log_path)):
|
|
4045
|
-
data_utils.parallel_upload(
|
|
4046
|
-
source_path_list,
|
|
4047
|
-
get_file_sync_command,
|
|
4048
|
-
get_dir_sync_command,
|
|
4049
|
-
log_path,
|
|
4050
|
-
self.name,
|
|
4051
|
-
self._ACCESS_DENIED_MESSAGE,
|
|
4052
|
-
create_dirs=create_dirs,
|
|
4053
|
-
max_concurrent_uploads=_MAX_CONCURRENT_UPLOADS)
|
|
4054
|
-
logger.info(
|
|
4055
|
-
ux_utils.finishing_message(f'Storage synced: {sync_path}',
|
|
4056
|
-
log_path))
|
|
4057
|
-
|
|
4058
|
-
def _get_bucket(self) -> Tuple[StorageHandle, bool]:
|
|
4059
|
-
"""returns IBM COS bucket object if exists, otherwise creates it.
|
|
4060
|
-
|
|
4061
|
-
Returns:
|
|
4062
|
-
StorageHandle(str): bucket name
|
|
4063
|
-
bool: indicates whether a new bucket was created.
|
|
4064
|
-
|
|
4065
|
-
Raises:
|
|
4066
|
-
StorageSpecError: If externally created bucket is attempted to be
|
|
4067
|
-
mounted without specifying storage source.
|
|
4068
|
-
StorageBucketCreateError: If bucket creation fails.
|
|
4069
|
-
StorageBucketGetError: If fetching a bucket fails
|
|
4070
|
-
StorageExternalDeletionError: If externally deleted storage is
|
|
4071
|
-
attempted to be fetched while reconstructing the storage for
|
|
4072
|
-
'sky storage delete' or 'sky start'
|
|
4073
|
-
"""
|
|
4074
|
-
|
|
4075
|
-
bucket_profile_name = (data_utils.Rclone.RcloneStores.IBM.value +
|
|
4076
|
-
self.name)
|
|
4077
|
-
try:
|
|
4078
|
-
bucket_region = data_utils.get_ibm_cos_bucket_region(self.name)
|
|
4079
|
-
except exceptions.StorageBucketGetError as e:
|
|
4080
|
-
with ux_utils.print_exception_no_traceback():
|
|
4081
|
-
command = f'rclone lsd {bucket_profile_name}: '
|
|
4082
|
-
raise exceptions.StorageBucketGetError(
|
|
4083
|
-
_BUCKET_FAIL_TO_CONNECT_MESSAGE.format(name=self.name) +
|
|
4084
|
-
f' To debug, consider running `{command}`.') from e
|
|
4085
|
-
|
|
4086
|
-
try:
|
|
4087
|
-
uri_region = data_utils.split_cos_path(
|
|
4088
|
-
self.source)[2] # type: ignore
|
|
4089
|
-
except ValueError:
|
|
4090
|
-
# source isn't a cos uri
|
|
4091
|
-
uri_region = ''
|
|
4092
|
-
|
|
4093
|
-
# bucket's region doesn't match specified region in URI
|
|
4094
|
-
if bucket_region and uri_region and uri_region != bucket_region\
|
|
4095
|
-
and self.sync_on_reconstruction:
|
|
4096
|
-
with ux_utils.print_exception_no_traceback():
|
|
4097
|
-
raise exceptions.StorageBucketGetError(
|
|
4098
|
-
f'Bucket {self.name} exists in '
|
|
4099
|
-
f'region {bucket_region}, '
|
|
4100
|
-
f'but URI specified region {uri_region}.')
|
|
4101
|
-
|
|
4102
|
-
if not bucket_region and uri_region:
|
|
4103
|
-
# bucket doesn't exist but source is a bucket URI
|
|
4104
|
-
with ux_utils.print_exception_no_traceback():
|
|
4105
|
-
raise exceptions.StorageBucketGetError(
|
|
4106
|
-
'Attempted to use a non-existent bucket as a source: '
|
|
4107
|
-
f'{self.name} by providing URI. Consider using '
|
|
4108
|
-
'`rclone lsd <remote>` on relevant remotes returned '
|
|
4109
|
-
'via `rclone listremotes` to debug.')
|
|
4110
|
-
|
|
4111
|
-
data_utils.Rclone.store_rclone_config(
|
|
4112
|
-
self.name,
|
|
4113
|
-
data_utils.Rclone.RcloneStores.IBM,
|
|
4114
|
-
self.region, # type: ignore
|
|
4115
|
-
)
|
|
4116
|
-
|
|
4117
|
-
if not bucket_region and self.sync_on_reconstruction:
|
|
4118
|
-
# bucket doesn't exist
|
|
4119
|
-
return self._create_cos_bucket(self.name, self.region), True
|
|
4120
|
-
elif not bucket_region and not self.sync_on_reconstruction:
|
|
4121
|
-
# Raised when Storage object is reconstructed for sky storage
|
|
4122
|
-
# delete or to re-mount Storages with sky start but the storage
|
|
4123
|
-
# is already removed externally.
|
|
4124
|
-
raise exceptions.StorageExternalDeletionError(
|
|
4125
|
-
'Attempted to fetch a non-existent bucket: '
|
|
4126
|
-
f'{self.name}')
|
|
4127
|
-
else:
|
|
4128
|
-
# bucket exists
|
|
4129
|
-
bucket = self.s3_resource.Bucket(self.name)
|
|
4130
|
-
self._validate_existing_bucket()
|
|
4131
|
-
return bucket, False
|
|
4132
|
-
|
|
4133
|
-
def _download_file(self, remote_path: str, local_path: str) -> None:
|
|
4134
|
-
"""Downloads file from remote to local on s3 bucket
|
|
4135
|
-
using the boto3 API
|
|
4136
|
-
|
|
4137
|
-
Args:
|
|
4138
|
-
remote_path: str; Remote path on S3 bucket
|
|
4139
|
-
local_path: str; Local path on user's device
|
|
4140
|
-
"""
|
|
4141
|
-
self.client.download_file(self.name, local_path, remote_path)
|
|
4142
|
-
|
|
4143
|
-
def mount_command(self, mount_path: str) -> str:
|
|
4144
|
-
"""Returns the command to mount the bucket to the mount_path.
|
|
4145
|
-
|
|
4146
|
-
Uses rclone to mount the bucket.
|
|
4147
|
-
Source: https://github.com/rclone/rclone
|
|
4148
|
-
|
|
4149
|
-
Args:
|
|
4150
|
-
mount_path: str; Path to mount the bucket to.
|
|
4151
|
-
"""
|
|
4152
|
-
# install rclone if not installed.
|
|
4153
|
-
install_cmd = mounting_utils.get_rclone_install_cmd()
|
|
4154
|
-
rclone_config = data_utils.Rclone.RcloneStores.IBM.get_config(
|
|
4155
|
-
rclone_profile_name=self.rclone_profile_name,
|
|
4156
|
-
region=self.region) # type: ignore
|
|
4157
|
-
mount_cmd = (
|
|
4158
|
-
mounting_utils.get_cos_mount_cmd(
|
|
4159
|
-
rclone_config,
|
|
4160
|
-
self.rclone_profile_name,
|
|
4161
|
-
self.bucket.name,
|
|
4162
|
-
mount_path,
|
|
4163
|
-
self._bucket_sub_path, # type: ignore
|
|
4164
|
-
))
|
|
4165
|
-
return mounting_utils.get_mounting_command(mount_path, install_cmd,
|
|
4166
|
-
mount_cmd)
|
|
4167
|
-
|
|
4168
|
-
def _create_cos_bucket(self,
|
|
4169
|
-
bucket_name: str,
|
|
4170
|
-
region='us-east') -> StorageHandle:
|
|
4171
|
-
"""Creates IBM COS bucket with specific name in specific region
|
|
4172
|
-
|
|
4173
|
-
Args:
|
|
4174
|
-
bucket_name: str; Name of bucket
|
|
4175
|
-
region: str; Region name, e.g. us-east, us-south
|
|
4176
|
-
Raises:
|
|
4177
|
-
StorageBucketCreateError: If bucket creation fails.
|
|
4178
|
-
"""
|
|
4179
|
-
try:
|
|
4180
|
-
self.client.create_bucket(
|
|
4181
|
-
Bucket=bucket_name,
|
|
4182
|
-
CreateBucketConfiguration={
|
|
4183
|
-
'LocationConstraint': f'{region}-smart'
|
|
4184
|
-
})
|
|
4185
|
-
logger.info(f' {colorama.Style.DIM}Created IBM COS bucket '
|
|
4186
|
-
f'{bucket_name!r} in {region} '
|
|
4187
|
-
'with storage class smart tier'
|
|
4188
|
-
f'{colorama.Style.RESET_ALL}')
|
|
4189
|
-
self.bucket = self.s3_resource.Bucket(bucket_name)
|
|
4190
|
-
|
|
4191
|
-
except ibm.ibm_botocore.exceptions.ClientError as e: # type: ignore[union-attr] # pylint: disable=line-too-long
|
|
4192
|
-
with ux_utils.print_exception_no_traceback():
|
|
4193
|
-
raise exceptions.StorageBucketCreateError(
|
|
4194
|
-
f'Failed to create bucket: '
|
|
4195
|
-
f'{bucket_name}') from e
|
|
4196
|
-
|
|
4197
|
-
s3_bucket_exists_waiter = self.client.get_waiter('bucket_exists')
|
|
4198
|
-
s3_bucket_exists_waiter.wait(Bucket=bucket_name)
|
|
4199
|
-
|
|
4200
|
-
return self.bucket
|
|
4201
|
-
|
|
4202
|
-
def _delete_cos_bucket_objects(self,
|
|
4203
|
-
bucket: Any,
|
|
4204
|
-
prefix: Optional[str] = None) -> None:
|
|
4205
|
-
bucket_versioning = self.s3_resource.BucketVersioning(bucket.name)
|
|
4206
|
-
if bucket_versioning.status == 'Enabled':
|
|
4207
|
-
if prefix is not None:
|
|
4208
|
-
res = list(
|
|
4209
|
-
bucket.object_versions.filter(Prefix=prefix).delete())
|
|
4210
|
-
else:
|
|
4211
|
-
res = list(bucket.object_versions.delete())
|
|
4212
|
-
else:
|
|
4213
|
-
if prefix is not None:
|
|
4214
|
-
res = list(bucket.objects.filter(Prefix=prefix).delete())
|
|
4215
|
-
else:
|
|
4216
|
-
res = list(bucket.objects.delete())
|
|
4217
|
-
logger.debug(f'Deleted bucket\'s content:\n{res}, prefix: {prefix}')
|
|
4218
|
-
|
|
4219
|
-
def _delete_cos_bucket(self) -> None:
|
|
4220
|
-
bucket = self.s3_resource.Bucket(self.name)
|
|
4221
|
-
try:
|
|
4222
|
-
self._delete_cos_bucket_objects(bucket)
|
|
4223
|
-
bucket.delete()
|
|
4224
|
-
bucket.wait_until_not_exists()
|
|
4225
|
-
except ibm.ibm_botocore.exceptions.ClientError as e:
|
|
4226
|
-
if e.__class__.__name__ == 'NoSuchBucket':
|
|
4227
|
-
logger.debug('bucket already removed')
|
|
4228
|
-
data_utils.Rclone.delete_rclone_bucket_profile(
|
|
4229
|
-
self.name, data_utils.Rclone.RcloneStores.IBM)
|
|
4230
|
-
|
|
4231
|
-
|
|
4232
|
-
class OciStore(AbstractStore):
|
|
4233
|
-
"""OciStore inherits from Storage Object and represents the backend
|
|
4234
|
-
for OCI buckets.
|
|
4235
|
-
"""
|
|
4236
|
-
|
|
4237
|
-
_ACCESS_DENIED_MESSAGE = 'AccessDeniedException'
|
|
4238
|
-
|
|
4239
|
-
def __init__(self,
|
|
4240
|
-
name: str,
|
|
4241
|
-
source: Optional[SourceType],
|
|
4242
|
-
region: Optional[str] = None,
|
|
4243
|
-
is_sky_managed: Optional[bool] = None,
|
|
4244
|
-
sync_on_reconstruction: Optional[bool] = True,
|
|
4245
|
-
_bucket_sub_path: Optional[str] = None):
|
|
4246
|
-
self.client: Any
|
|
4247
|
-
self.bucket: StorageHandle
|
|
4248
|
-
self.oci_config_file: str
|
|
4249
|
-
self.config_profile: str
|
|
4250
|
-
self.compartment: str
|
|
4251
|
-
self.namespace: str
|
|
4252
|
-
|
|
4253
|
-
# Region is from the specified name in <bucket>@<region> format.
|
|
4254
|
-
# Another case is name can also be set by the source, for example:
|
|
4255
|
-
# /datasets-storage:
|
|
4256
|
-
# source: oci://RAGData@us-sanjose-1
|
|
4257
|
-
# The name in above mount will be set to RAGData@us-sanjose-1
|
|
4258
|
-
region_in_name = None
|
|
4259
|
-
if name is not None and '@' in name:
|
|
4260
|
-
self._validate_bucket_expr(name)
|
|
4261
|
-
name, region_in_name = name.split('@')
|
|
4262
|
-
|
|
4263
|
-
# Region is from the specified source in oci://<bucket>@<region> format
|
|
4264
|
-
region_in_source = None
|
|
4265
|
-
if isinstance(source,
|
|
4266
|
-
str) and source.startswith('oci://') and '@' in source:
|
|
4267
|
-
self._validate_bucket_expr(source)
|
|
4268
|
-
source, region_in_source = source.split('@')
|
|
4269
|
-
|
|
4270
|
-
if region_in_name is not None and region_in_source is not None:
|
|
4271
|
-
# This should never happen because name and source will never be
|
|
4272
|
-
# the remote bucket at the same time.
|
|
4273
|
-
assert region_in_name == region_in_source, (
|
|
4274
|
-
f'Mismatch region specified. Region in name {region_in_name}, '
|
|
4275
|
-
f'but region in source is {region_in_source}')
|
|
4276
|
-
|
|
4277
|
-
if region_in_name is not None:
|
|
4278
|
-
region = region_in_name
|
|
4279
|
-
elif region_in_source is not None:
|
|
4280
|
-
region = region_in_source
|
|
4281
|
-
|
|
4282
|
-
# Default region set to what specified in oci config.
|
|
4283
|
-
if region is None:
|
|
4284
|
-
region = oci.get_oci_config()['region']
|
|
4285
|
-
|
|
4286
|
-
# So far from now on, the name and source are canonical, means there
|
|
4287
|
-
# is no region (@<region> suffix) associated with them anymore.
|
|
4288
|
-
|
|
4289
|
-
super().__init__(name, source, region, is_sky_managed,
|
|
4290
|
-
sync_on_reconstruction, _bucket_sub_path)
|
|
4291
|
-
# TODO(zpoint): add _bucket_sub_path to the sync/mount/delete commands
|
|
4292
|
-
|
|
4293
|
-
def _validate_bucket_expr(self, bucket_expr: str):
|
|
4294
|
-
pattern = r'^(\w+://)?[A-Za-z0-9-._]+(@\w{2}-\w+-\d{1})$'
|
|
4295
|
-
if not re.match(pattern, bucket_expr):
|
|
4296
|
-
raise ValueError(
|
|
4297
|
-
'The format for the bucket portion is <bucket>@<region> '
|
|
4298
|
-
'when specify a region with a bucket.')
|
|
4299
|
-
|
|
4300
|
-
def _validate(self):
|
|
4301
|
-
if self.source is not None and isinstance(self.source, str):
|
|
4302
|
-
if self.source.startswith('oci://'):
|
|
4303
|
-
assert self.name == data_utils.split_oci_path(self.source)[0], (
|
|
4304
|
-
'OCI Bucket is specified as path, the name should be '
|
|
4305
|
-
'the same as OCI bucket.')
|
|
4306
|
-
elif not re.search(r'^\w+://', self.source):
|
|
4307
|
-
# Treat it as local path.
|
|
4308
|
-
pass
|
|
4309
|
-
else:
|
|
4310
|
-
raise NotImplementedError(
|
|
4311
|
-
f'Moving data from {self.source} to OCI is not supported.')
|
|
4312
|
-
|
|
4313
|
-
# Validate name
|
|
4314
|
-
self.name = self.validate_name(self.name)
|
|
4315
|
-
# Check if the storage is enabled
|
|
4316
|
-
if not _is_storage_cloud_enabled(str(clouds.OCI())):
|
|
4317
|
-
with ux_utils.print_exception_no_traceback():
|
|
4318
|
-
raise exceptions.ResourcesUnavailableError(
|
|
4319
|
-
'Storage \'store: oci\' specified, but ' \
|
|
4320
|
-
'OCI access is disabled. To fix, enable '\
|
|
4321
|
-
'OCI by running `sky check`. '\
|
|
4322
|
-
'More info: https://skypilot.readthedocs.io/en/latest/getting-started/installation.html.' # pylint: disable=line-too-long
|
|
4323
|
-
)
|
|
4324
|
-
|
|
4325
|
-
@classmethod
|
|
4326
|
-
def validate_name(cls, name) -> str:
|
|
4327
|
-
"""Validates the name of the OCI store.
|
|
4328
|
-
|
|
4329
|
-
Source for rules: https://docs.oracle.com/en-us/iaas/Content/Object/Tasks/managingbuckets.htm#Managing_Buckets # pylint: disable=line-too-long
|
|
4330
|
-
"""
|
|
4331
|
-
|
|
4332
|
-
def _raise_no_traceback_name_error(err_str):
|
|
4333
|
-
with ux_utils.print_exception_no_traceback():
|
|
4334
|
-
raise exceptions.StorageNameError(err_str)
|
|
4335
|
-
|
|
4336
|
-
if name is not None and isinstance(name, str):
|
|
4337
|
-
# Check for overall length
|
|
4338
|
-
if not 1 <= len(name) <= 256:
|
|
4339
|
-
_raise_no_traceback_name_error(
|
|
4340
|
-
f'Invalid store name: name {name} must contain 1-256 '
|
|
4341
|
-
'characters.')
|
|
4342
|
-
|
|
4343
|
-
# Check for valid characters and start/end with a number or letter
|
|
4344
|
-
pattern = r'^[A-Za-z0-9-._]+$'
|
|
4345
|
-
if not re.match(pattern, name):
|
|
4346
|
-
_raise_no_traceback_name_error(
|
|
4347
|
-
f'Invalid store name: name {name} can only contain '
|
|
4348
|
-
'upper or lower case letters, numeric characters, hyphens '
|
|
4349
|
-
'(-), underscores (_), and dots (.). Spaces are not '
|
|
4350
|
-
'allowed. Names must start and end with a number or '
|
|
4351
|
-
'letter.')
|
|
4352
|
-
else:
|
|
4353
|
-
_raise_no_traceback_name_error('Store name must be specified.')
|
|
4354
|
-
return name
|
|
4355
|
-
|
|
4356
|
-
def initialize(self):
|
|
4357
|
-
"""Initializes the OCI store object on the cloud.
|
|
4358
|
-
|
|
4359
|
-
Initialization involves fetching bucket if exists, or creating it if
|
|
4360
|
-
it does not.
|
|
4361
|
-
|
|
4362
|
-
Raises:
|
|
4363
|
-
StorageBucketCreateError: If bucket creation fails
|
|
4364
|
-
StorageBucketGetError: If fetching existing bucket fails
|
|
4365
|
-
StorageInitError: If general initialization fails.
|
|
4366
|
-
"""
|
|
4367
|
-
# pylint: disable=import-outside-toplevel
|
|
4368
|
-
from sky.clouds.utils import oci_utils
|
|
4369
|
-
from sky.provision.oci.query_utils import query_helper
|
|
4370
|
-
|
|
4371
|
-
self.oci_config_file = oci.get_config_file()
|
|
4372
|
-
self.config_profile = oci_utils.oci_config.get_profile()
|
|
4373
|
-
|
|
4374
|
-
## pylint: disable=line-too-long
|
|
4375
|
-
# What's compartment? See thttps://docs.oracle.com/en/cloud/foundation/cloud_architecture/governance/compartments.html
|
|
4376
|
-
self.compartment = query_helper.find_compartment(self.region)
|
|
4377
|
-
self.client = oci.get_object_storage_client(region=self.region,
|
|
4378
|
-
profile=self.config_profile)
|
|
4379
|
-
self.namespace = self.client.get_namespace(
|
|
4380
|
-
compartment_id=oci.get_oci_config()['tenancy']).data
|
|
4381
|
-
|
|
4382
|
-
self.bucket, is_new_bucket = self._get_bucket()
|
|
4383
|
-
if self.is_sky_managed is None:
|
|
4384
|
-
# If is_sky_managed is not specified, then this is a new storage
|
|
4385
|
-
# object (i.e., did not exist in global_user_state) and we should
|
|
4386
|
-
# set the is_sky_managed property.
|
|
4387
|
-
# If is_sky_managed is specified, then we take no action.
|
|
4388
|
-
self.is_sky_managed = is_new_bucket
|
|
4389
|
-
|
|
4390
|
-
def upload(self):
|
|
4391
|
-
"""Uploads source to store bucket.
|
|
4392
|
-
|
|
4393
|
-
Upload must be called by the Storage handler - it is not called on
|
|
4394
|
-
Store initialization.
|
|
4395
|
-
|
|
4396
|
-
Raises:
|
|
4397
|
-
StorageUploadError: if upload fails.
|
|
4398
|
-
"""
|
|
4399
|
-
try:
|
|
4400
|
-
if isinstance(self.source, list):
|
|
4401
|
-
self.batch_oci_rsync(self.source, create_dirs=True)
|
|
4402
|
-
elif self.source is not None:
|
|
4403
|
-
if self.source.startswith('oci://'):
|
|
4404
|
-
pass
|
|
4405
|
-
else:
|
|
4406
|
-
self.batch_oci_rsync([self.source])
|
|
4407
|
-
except exceptions.StorageUploadError:
|
|
4408
|
-
raise
|
|
4409
|
-
except Exception as e:
|
|
4410
|
-
raise exceptions.StorageUploadError(
|
|
4411
|
-
f'Upload failed for store {self.name}') from e
|
|
4412
|
-
|
|
4413
|
-
def delete(self) -> None:
|
|
4414
|
-
deleted_by_skypilot = self._delete_oci_bucket(self.name)
|
|
4415
|
-
if deleted_by_skypilot:
|
|
4416
|
-
msg_str = f'Deleted OCI bucket {self.name}.'
|
|
4417
|
-
else:
|
|
4418
|
-
msg_str = (f'OCI bucket {self.name} may have been deleted '
|
|
4419
|
-
f'externally. Removing from local state.')
|
|
4420
|
-
logger.info(f'{colorama.Fore.GREEN}{msg_str}'
|
|
4421
|
-
f'{colorama.Style.RESET_ALL}')
|
|
4422
|
-
|
|
4423
|
-
def get_handle(self) -> StorageHandle:
|
|
4424
|
-
return self.client.get_bucket(namespace_name=self.namespace,
|
|
4425
|
-
bucket_name=self.name).data
|
|
4426
|
-
|
|
4427
|
-
def batch_oci_rsync(self,
|
|
4428
|
-
source_path_list: List[Path],
|
|
4429
|
-
create_dirs: bool = False) -> None:
|
|
4430
|
-
"""Invokes oci sync to batch upload a list of local paths to Bucket
|
|
4431
|
-
|
|
4432
|
-
Use OCI bulk operation to batch process the file upload
|
|
4433
|
-
|
|
4434
|
-
Args:
|
|
4435
|
-
source_path_list: List of paths to local files or directories
|
|
4436
|
-
create_dirs: If the local_path is a directory and this is set to
|
|
4437
|
-
False, the contents of the directory are directly uploaded to
|
|
4438
|
-
root of the bucket. If the local_path is a directory and this is
|
|
4439
|
-
set to True, the directory is created in the bucket root and
|
|
4440
|
-
contents are uploaded to it.
|
|
4441
|
-
"""
|
|
4442
|
-
sub_path = (f'{self._bucket_sub_path}/'
|
|
4443
|
-
if self._bucket_sub_path else '')
|
|
4444
|
-
|
|
4445
|
-
@oci.with_oci_env
|
|
4446
|
-
def get_file_sync_command(base_dir_path, file_names):
|
|
4447
|
-
includes = ' '.join(
|
|
4448
|
-
[f'--include "{file_name}"' for file_name in file_names])
|
|
4449
|
-
prefix_arg = ''
|
|
4450
|
-
if sub_path:
|
|
4451
|
-
prefix_arg = f'--object-prefix "{sub_path.strip("/")}"'
|
|
4452
|
-
sync_command = (
|
|
4453
|
-
'oci os object bulk-upload --no-follow-symlinks --overwrite '
|
|
4454
|
-
f'--bucket-name {self.name} --namespace-name {self.namespace} '
|
|
4455
|
-
f'--region {self.region} --src-dir "{base_dir_path}" '
|
|
4456
|
-
f'{prefix_arg} '
|
|
4457
|
-
f'{includes}')
|
|
4458
|
-
|
|
4459
|
-
return sync_command
|
|
4460
|
-
|
|
4461
|
-
@oci.with_oci_env
|
|
4462
|
-
def get_dir_sync_command(src_dir_path, dest_dir_name):
|
|
4463
|
-
if dest_dir_name and not str(dest_dir_name).endswith('/'):
|
|
4464
|
-
dest_dir_name = f'{dest_dir_name}/'
|
|
4465
|
-
|
|
4466
|
-
excluded_list = storage_utils.get_excluded_files(src_dir_path)
|
|
4467
|
-
excluded_list.append('.git/*')
|
|
4468
|
-
excludes = ' '.join([
|
|
4469
|
-
f'--exclude {shlex.quote(file_name)}'
|
|
4470
|
-
for file_name in excluded_list
|
|
4471
|
-
])
|
|
4472
|
-
|
|
4473
|
-
# we exclude .git directory from the sync
|
|
4474
|
-
sync_command = (
|
|
4475
|
-
'oci os object bulk-upload --no-follow-symlinks --overwrite '
|
|
4476
|
-
f'--bucket-name {self.name} --namespace-name {self.namespace} '
|
|
4477
|
-
f'--region {self.region} '
|
|
4478
|
-
f'--object-prefix "{sub_path}{dest_dir_name}" '
|
|
4479
|
-
f'--src-dir "{src_dir_path}" {excludes}')
|
|
4480
|
-
|
|
4481
|
-
return sync_command
|
|
4482
|
-
|
|
4483
|
-
# Generate message for upload
|
|
4484
|
-
if len(source_path_list) > 1:
|
|
4485
|
-
source_message = f'{len(source_path_list)} paths'
|
|
4486
|
-
else:
|
|
4487
|
-
source_message = source_path_list[0]
|
|
4488
|
-
|
|
4489
|
-
log_path = sky_logging.generate_tmp_logging_file_path(
|
|
4490
|
-
_STORAGE_LOG_FILE_NAME)
|
|
4491
|
-
sync_path = f'{source_message} -> oci://{self.name}/{sub_path}'
|
|
4492
|
-
with rich_utils.safe_status(
|
|
4493
|
-
ux_utils.spinner_message(f'Syncing {sync_path}',
|
|
4494
|
-
log_path=log_path)):
|
|
4495
|
-
data_utils.parallel_upload(
|
|
4496
|
-
source_path_list=source_path_list,
|
|
4497
|
-
filesync_command_generator=get_file_sync_command,
|
|
4498
|
-
dirsync_command_generator=get_dir_sync_command,
|
|
4499
|
-
log_path=log_path,
|
|
4500
|
-
bucket_name=self.name,
|
|
4501
|
-
access_denied_message=self._ACCESS_DENIED_MESSAGE,
|
|
4502
|
-
create_dirs=create_dirs,
|
|
4503
|
-
max_concurrent_uploads=1)
|
|
4504
|
-
|
|
4505
|
-
logger.info(
|
|
4506
|
-
ux_utils.finishing_message(f'Storage synced: {sync_path}',
|
|
4507
|
-
log_path))
|
|
4508
|
-
|
|
4509
|
-
def _get_bucket(self) -> Tuple[StorageHandle, bool]:
|
|
4510
|
-
"""Obtains the OCI bucket.
|
|
4511
|
-
If the bucket exists, this method will connect to the bucket.
|
|
4512
|
-
|
|
4513
|
-
If the bucket does not exist, there are three cases:
|
|
4514
|
-
1) Raise an error if the bucket source starts with oci://
|
|
4515
|
-
2) Return None if bucket has been externally deleted and
|
|
4516
|
-
sync_on_reconstruction is False
|
|
4517
|
-
3) Create and return a new bucket otherwise
|
|
4518
|
-
|
|
4519
|
-
Return tuple (Bucket, Boolean): The first item is the bucket
|
|
4520
|
-
json payload from the OCI API call, the second item indicates
|
|
4521
|
-
if this is a new created bucket(True) or an existing bucket(False).
|
|
4522
|
-
|
|
4523
|
-
Raises:
|
|
4524
|
-
StorageBucketCreateError: If creating the bucket fails
|
|
4525
|
-
StorageBucketGetError: If fetching a bucket fails
|
|
4526
|
-
"""
|
|
4527
|
-
try:
|
|
4528
|
-
get_bucket_response = self.client.get_bucket(
|
|
4529
|
-
namespace_name=self.namespace, bucket_name=self.name)
|
|
4530
|
-
bucket = get_bucket_response.data
|
|
4531
|
-
return bucket, False
|
|
4532
|
-
except oci.service_exception() as e:
|
|
4533
|
-
if e.status == 404: # Not Found
|
|
4534
|
-
if isinstance(self.source,
|
|
4535
|
-
str) and self.source.startswith('oci://'):
|
|
4536
|
-
with ux_utils.print_exception_no_traceback():
|
|
4537
|
-
raise exceptions.StorageBucketGetError(
|
|
4538
|
-
'Attempted to connect to a non-existent bucket: '
|
|
4539
|
-
f'{self.source}') from e
|
|
4540
|
-
else:
|
|
4541
|
-
# If bucket cannot be found (i.e., does not exist), it is
|
|
4542
|
-
# to be created by Sky. However, creation is skipped if
|
|
4543
|
-
# Store object is being reconstructed for deletion.
|
|
4544
|
-
if self.sync_on_reconstruction:
|
|
4545
|
-
bucket = self._create_oci_bucket(self.name)
|
|
4546
|
-
return bucket, True
|
|
4547
|
-
else:
|
|
4548
|
-
return None, False
|
|
4549
|
-
elif e.status == 401: # Unauthorized
|
|
4550
|
-
# AccessDenied error for buckets that are private and not
|
|
4551
|
-
# owned by user.
|
|
4552
|
-
command = (
|
|
4553
|
-
f'oci os object list --namespace-name {self.namespace} '
|
|
4554
|
-
f'--bucket-name {self.name}')
|
|
4555
|
-
with ux_utils.print_exception_no_traceback():
|
|
4556
|
-
raise exceptions.StorageBucketGetError(
|
|
4557
|
-
_BUCKET_FAIL_TO_CONNECT_MESSAGE.format(name=self.name) +
|
|
4558
|
-
f' To debug, consider running `{command}`.') from e
|
|
4559
|
-
else:
|
|
4560
|
-
# Unknown / unexpected error happened. This might happen when
|
|
4561
|
-
# Object storage service itself functions not normal (e.g.
|
|
4562
|
-
# maintainance event causes internal server error or request
|
|
4563
|
-
# timeout, etc).
|
|
4564
|
-
with ux_utils.print_exception_no_traceback():
|
|
4565
|
-
raise exceptions.StorageBucketGetError(
|
|
4566
|
-
f'Failed to connect to OCI bucket {self.name}') from e
|
|
4567
|
-
|
|
4568
|
-
def mount_command(self, mount_path: str) -> str:
|
|
4569
|
-
"""Returns the command to mount the bucket to the mount_path.
|
|
4570
|
-
|
|
4571
|
-
Uses Rclone to mount the bucket.
|
|
4572
|
-
|
|
4573
|
-
Args:
|
|
4574
|
-
mount_path: str; Path to mount the bucket to.
|
|
4575
|
-
"""
|
|
4576
|
-
install_cmd = mounting_utils.get_rclone_install_cmd()
|
|
4577
|
-
mount_cmd = mounting_utils.get_oci_mount_cmd(
|
|
4578
|
-
mount_path=mount_path,
|
|
4579
|
-
store_name=self.name,
|
|
4580
|
-
region=str(self.region),
|
|
4581
|
-
namespace=self.namespace,
|
|
4582
|
-
compartment=self.bucket.compartment_id,
|
|
4583
|
-
config_file=self.oci_config_file,
|
|
4584
|
-
config_profile=self.config_profile)
|
|
4585
|
-
version_check_cmd = mounting_utils.get_rclone_version_check_cmd()
|
|
4586
|
-
|
|
4587
|
-
return mounting_utils.get_mounting_command(mount_path, install_cmd,
|
|
4588
|
-
mount_cmd, version_check_cmd)
|
|
4589
|
-
|
|
4590
|
-
def _download_file(self, remote_path: str, local_path: str) -> None:
|
|
4591
|
-
"""Downloads file from remote to local on OCI bucket
|
|
4592
|
-
|
|
4593
|
-
Args:
|
|
4594
|
-
remote_path: str; Remote path on OCI bucket
|
|
4595
|
-
local_path: str; Local path on user's device
|
|
4596
|
-
"""
|
|
4597
|
-
if remote_path.startswith(f'/{self.name}'):
|
|
4598
|
-
# If the remote path is /bucket_name, we need to
|
|
4599
|
-
# remove the leading /
|
|
4600
|
-
remote_path = remote_path.lstrip('/')
|
|
4601
|
-
|
|
4602
|
-
filename = os.path.basename(remote_path)
|
|
4603
|
-
if not local_path.endswith(filename):
|
|
4604
|
-
local_path = os.path.join(local_path, filename)
|
|
4605
|
-
|
|
4606
|
-
@oci.with_oci_env
|
|
4607
|
-
def get_file_download_command(remote_path, local_path):
|
|
4608
|
-
download_command = (f'oci os object get --bucket-name {self.name} '
|
|
4609
|
-
f'--namespace-name {self.namespace} '
|
|
4610
|
-
f'--region {self.region} --name {remote_path} '
|
|
4611
|
-
f'--file {local_path}')
|
|
4612
|
-
|
|
4613
|
-
return download_command
|
|
4614
|
-
|
|
4615
|
-
download_command = get_file_download_command(remote_path, local_path)
|
|
4616
|
-
|
|
4617
|
-
try:
|
|
4618
|
-
with rich_utils.safe_status(
|
|
4619
|
-
f'[bold cyan]Downloading: {remote_path} -> {local_path}[/]'
|
|
4620
|
-
):
|
|
4621
|
-
subprocess.check_output(download_command,
|
|
4622
|
-
stderr=subprocess.STDOUT,
|
|
4623
|
-
shell=True)
|
|
4624
|
-
except subprocess.CalledProcessError as e:
|
|
4625
|
-
logger.error(f'Download failed: {remote_path} -> {local_path}.\n'
|
|
4626
|
-
f'Detail errors: {e.output}')
|
|
4627
|
-
with ux_utils.print_exception_no_traceback():
|
|
4628
|
-
raise exceptions.StorageBucketDeleteError(
|
|
4629
|
-
f'Failed download file {self.name}:{remote_path}.') from e
|
|
4630
|
-
|
|
4631
|
-
def _create_oci_bucket(self, bucket_name: str) -> StorageHandle:
|
|
4632
|
-
"""Creates OCI bucket with specific name in specific region
|
|
4633
|
-
|
|
4634
|
-
Args:
|
|
4635
|
-
bucket_name: str; Name of bucket
|
|
4636
|
-
region: str; Region name, e.g. us-central1, us-west1
|
|
4637
|
-
"""
|
|
4638
|
-
logger.debug(f'_create_oci_bucket: {bucket_name}')
|
|
4639
|
-
try:
|
|
4640
|
-
create_bucket_response = self.client.create_bucket(
|
|
4641
|
-
namespace_name=self.namespace,
|
|
4642
|
-
create_bucket_details=oci.oci.object_storage.models.
|
|
4643
|
-
CreateBucketDetails(
|
|
4644
|
-
name=bucket_name,
|
|
4645
|
-
compartment_id=self.compartment,
|
|
4646
|
-
))
|
|
4647
|
-
bucket = create_bucket_response.data
|
|
4648
|
-
return bucket
|
|
4649
|
-
except oci.service_exception() as e:
|
|
4650
|
-
with ux_utils.print_exception_no_traceback():
|
|
4651
|
-
raise exceptions.StorageBucketCreateError(
|
|
4652
|
-
f'Failed to create OCI bucket: {self.name}') from e
|
|
4653
|
-
|
|
4654
|
-
def _delete_oci_bucket(self, bucket_name: str) -> bool:
|
|
4655
|
-
"""Deletes OCI bucket, including all objects in bucket
|
|
4656
|
-
|
|
4657
|
-
Args:
|
|
4658
|
-
bucket_name: str; Name of bucket
|
|
4659
|
-
|
|
4660
|
-
Returns:
|
|
4661
|
-
bool; True if bucket was deleted, False if it was deleted externally.
|
|
4662
|
-
"""
|
|
4663
|
-
logger.debug(f'_delete_oci_bucket: {bucket_name}')
|
|
4664
|
-
|
|
4665
|
-
@oci.with_oci_env
|
|
4666
|
-
def get_bucket_delete_command(bucket_name):
|
|
4667
|
-
remove_command = (f'oci os bucket delete --bucket-name '
|
|
4668
|
-
f'--region {self.region} '
|
|
4669
|
-
f'{bucket_name} --empty --force')
|
|
4670
|
-
|
|
4671
|
-
return remove_command
|
|
4672
|
-
|
|
4673
|
-
remove_command = get_bucket_delete_command(bucket_name)
|
|
4674
|
-
|
|
4675
|
-
try:
|
|
4676
|
-
with rich_utils.safe_status(
|
|
4677
|
-
f'[bold cyan]Deleting OCI bucket {bucket_name}[/]'):
|
|
4678
|
-
subprocess.check_output(remove_command.split(' '),
|
|
4679
|
-
stderr=subprocess.STDOUT)
|
|
4680
|
-
except subprocess.CalledProcessError as e:
|
|
4681
|
-
if 'BucketNotFound' in e.output.decode('utf-8'):
|
|
4682
|
-
logger.debug(
|
|
4683
|
-
_BUCKET_EXTERNALLY_DELETED_DEBUG_MESSAGE.format(
|
|
4684
|
-
bucket_name=bucket_name))
|
|
4685
|
-
return False
|
|
4686
|
-
else:
|
|
4687
|
-
logger.error(e.output)
|
|
4688
|
-
with ux_utils.print_exception_no_traceback():
|
|
4689
|
-
raise exceptions.StorageBucketDeleteError(
|
|
4690
|
-
f'Failed to delete OCI bucket {bucket_name}.')
|
|
4691
|
-
return True
|
|
3979
|
+
# Region is from the specified name in <bucket>@<region> format.
|
|
3980
|
+
# Another case is name can also be set by the source, for example:
|
|
3981
|
+
# /datasets-storage:
|
|
3982
|
+
# source: oci://RAGData@us-sanjose-1
|
|
3983
|
+
# The name in above mount will be set to RAGData@us-sanjose-1
|
|
3984
|
+
region_in_name = None
|
|
3985
|
+
if name is not None and '@' in name:
|
|
3986
|
+
self._validate_bucket_expr(name)
|
|
3987
|
+
name, region_in_name = name.split('@')
|
|
4692
3988
|
|
|
3989
|
+
# Region is from the specified source in oci://<bucket>@<region> format
|
|
3990
|
+
region_in_source = None
|
|
3991
|
+
if isinstance(source,
|
|
3992
|
+
str) and source.startswith('oci://') and '@' in source:
|
|
3993
|
+
self._validate_bucket_expr(source)
|
|
3994
|
+
source, region_in_source = source.split('@')
|
|
4693
3995
|
|
|
4694
|
-
|
|
4695
|
-
|
|
4696
|
-
|
|
4697
|
-
|
|
3996
|
+
if region_in_name is not None and region_in_source is not None:
|
|
3997
|
+
# This should never happen because name and source will never be
|
|
3998
|
+
# the remote bucket at the same time.
|
|
3999
|
+
assert region_in_name == region_in_source, (
|
|
4000
|
+
f'Mismatch region specified. Region in name {region_in_name}, '
|
|
4001
|
+
f'but region in source is {region_in_source}')
|
|
4698
4002
|
|
|
4699
|
-
|
|
4700
|
-
|
|
4003
|
+
if region_in_name is not None:
|
|
4004
|
+
region = region_in_name
|
|
4005
|
+
elif region_in_source is not None:
|
|
4006
|
+
region = region_in_source
|
|
4007
|
+
|
|
4008
|
+
# Default region set to what specified in oci config.
|
|
4009
|
+
if region is None:
|
|
4010
|
+
region = oci.get_oci_config()['region']
|
|
4011
|
+
|
|
4012
|
+
# So far from now on, the name and source are canonical, means there
|
|
4013
|
+
# is no region (@<region> suffix) associated with them anymore.
|
|
4701
4014
|
|
|
4702
|
-
def __init__(self,
|
|
4703
|
-
name: str,
|
|
4704
|
-
source: str,
|
|
4705
|
-
region: Optional[str] = None,
|
|
4706
|
-
is_sky_managed: Optional[bool] = None,
|
|
4707
|
-
sync_on_reconstruction: bool = True,
|
|
4708
|
-
_bucket_sub_path: Optional[str] = None):
|
|
4709
|
-
self.client: 'mypy_boto3_s3.Client'
|
|
4710
|
-
self.bucket: 'StorageHandle'
|
|
4711
4015
|
super().__init__(name, source, region, is_sky_managed,
|
|
4712
4016
|
sync_on_reconstruction, _bucket_sub_path)
|
|
4017
|
+
# TODO(zpoint): add _bucket_sub_path to the sync/mount/delete commands
|
|
4018
|
+
|
|
4019
|
+
def _validate_bucket_expr(self, bucket_expr: str):
|
|
4020
|
+
pattern = r'^(\w+://)?[A-Za-z0-9-._]+(@\w{2}-\w+-\d{1})$'
|
|
4021
|
+
if not re.match(pattern, bucket_expr):
|
|
4022
|
+
raise ValueError(
|
|
4023
|
+
'The format for the bucket portion is <bucket>@<region> '
|
|
4024
|
+
'when specify a region with a bucket.')
|
|
4713
4025
|
|
|
4714
4026
|
def _validate(self):
|
|
4715
4027
|
if self.source is not None and isinstance(self.source, str):
|
|
4716
|
-
if self.source.startswith('
|
|
4717
|
-
assert self.name == data_utils.
|
|
4718
|
-
'
|
|
4719
|
-
' same as
|
|
4720
|
-
elif
|
|
4721
|
-
|
|
4722
|
-
|
|
4723
|
-
|
|
4724
|
-
assert data_utils.verify_gcs_bucket(self.name), (
|
|
4725
|
-
f'Source specified as {self.source}, a GCS bucket. ',
|
|
4726
|
-
'GCS Bucket should exist.')
|
|
4727
|
-
elif data_utils.is_az_container_endpoint(self.source):
|
|
4728
|
-
storage_account_name, container_name, _ = (
|
|
4729
|
-
data_utils.split_az_path(self.source))
|
|
4730
|
-
assert self.name == container_name, (
|
|
4731
|
-
'Azure bucket is specified as path, the name should be '
|
|
4732
|
-
'the same as Azure bucket.')
|
|
4733
|
-
assert data_utils.verify_az_bucket(
|
|
4734
|
-
storage_account_name, self.name), (
|
|
4735
|
-
f'Source specified as {self.source}, an Azure bucket. '
|
|
4736
|
-
'Azure bucket should exist.')
|
|
4737
|
-
elif self.source.startswith('r2://'):
|
|
4738
|
-
assert self.name == data_utils.split_r2_path(self.source)[0], (
|
|
4739
|
-
'R2 Bucket is specified as path, the name should be '
|
|
4740
|
-
'the same as R2 bucket.')
|
|
4741
|
-
assert data_utils.verify_r2_bucket(self.name), (
|
|
4742
|
-
f'Source specified as {self.source}, a R2 bucket. ',
|
|
4743
|
-
'R2 Bucket should exist.')
|
|
4744
|
-
elif self.source.startswith('nebius://'):
|
|
4745
|
-
assert self.name == data_utils.split_nebius_path(
|
|
4746
|
-
self.source)[0], (
|
|
4747
|
-
'Nebius Object Storage is specified as path, the name '
|
|
4748
|
-
'should be the same as Nebius Object Storage bucket.')
|
|
4749
|
-
elif self.source.startswith('cos://'):
|
|
4750
|
-
assert self.name == data_utils.split_cos_path(self.source)[0], (
|
|
4751
|
-
'COS Bucket is specified as path, the name should be '
|
|
4752
|
-
'the same as COS bucket.')
|
|
4753
|
-
assert data_utils.verify_ibm_cos_bucket(self.name), (
|
|
4754
|
-
f'Source specified as {self.source}, a COS bucket. ',
|
|
4755
|
-
'COS Bucket should exist.')
|
|
4756
|
-
elif self.source.startswith('oci://'):
|
|
4028
|
+
if self.source.startswith('oci://'):
|
|
4029
|
+
assert self.name == data_utils.split_oci_path(self.source)[0], (
|
|
4030
|
+
'OCI Bucket is specified as path, the name should be '
|
|
4031
|
+
'the same as OCI bucket.')
|
|
4032
|
+
elif not re.search(r'^\w+://', self.source):
|
|
4033
|
+
# Treat it as local path.
|
|
4034
|
+
pass
|
|
4035
|
+
else:
|
|
4757
4036
|
raise NotImplementedError(
|
|
4758
|
-
'Moving data from
|
|
4759
|
-
# Validate name
|
|
4760
|
-
self.name = S3Store.validate_name(self.name)
|
|
4037
|
+
f'Moving data from {self.source} to OCI is not supported.')
|
|
4761
4038
|
|
|
4039
|
+
# Validate name
|
|
4040
|
+
self.name = self.validate_name(self.name)
|
|
4762
4041
|
# Check if the storage is enabled
|
|
4763
|
-
if not _is_storage_cloud_enabled(str(clouds.
|
|
4042
|
+
if not _is_storage_cloud_enabled(str(clouds.OCI())):
|
|
4764
4043
|
with ux_utils.print_exception_no_traceback():
|
|
4765
|
-
raise exceptions.ResourcesUnavailableError(
|
|
4766
|
-
'Storage \'store:
|
|
4767
|
-
'
|
|
4768
|
-
'
|
|
4769
|
-
'https://
|
|
4770
|
-
|
|
4044
|
+
raise exceptions.ResourcesUnavailableError(
|
|
4045
|
+
'Storage \'store: oci\' specified, but ' \
|
|
4046
|
+
'OCI access is disabled. To fix, enable '\
|
|
4047
|
+
'OCI by running `sky check`. '\
|
|
4048
|
+
'More info: https://skypilot.readthedocs.io/en/latest/getting-started/installation.html.' # pylint: disable=line-too-long
|
|
4049
|
+
)
|
|
4050
|
+
|
|
4051
|
+
@classmethod
|
|
4052
|
+
def validate_name(cls, name) -> str:
|
|
4053
|
+
"""Validates the name of the OCI store.
|
|
4054
|
+
|
|
4055
|
+
Source for rules: https://docs.oracle.com/en-us/iaas/Content/Object/Tasks/managingbuckets.htm#Managing_Buckets # pylint: disable=line-too-long
|
|
4056
|
+
"""
|
|
4057
|
+
|
|
4058
|
+
def _raise_no_traceback_name_error(err_str):
|
|
4059
|
+
with ux_utils.print_exception_no_traceback():
|
|
4060
|
+
raise exceptions.StorageNameError(err_str)
|
|
4061
|
+
|
|
4062
|
+
if name is not None and isinstance(name, str):
|
|
4063
|
+
# Check for overall length
|
|
4064
|
+
if not 1 <= len(name) <= 256:
|
|
4065
|
+
_raise_no_traceback_name_error(
|
|
4066
|
+
f'Invalid store name: name {name} must contain 1-256 '
|
|
4067
|
+
'characters.')
|
|
4068
|
+
|
|
4069
|
+
# Check for valid characters and start/end with a number or letter
|
|
4070
|
+
pattern = r'^[A-Za-z0-9-._]+$'
|
|
4071
|
+
if not re.match(pattern, name):
|
|
4072
|
+
_raise_no_traceback_name_error(
|
|
4073
|
+
f'Invalid store name: name {name} can only contain '
|
|
4074
|
+
'upper or lower case letters, numeric characters, hyphens '
|
|
4075
|
+
'(-), underscores (_), and dots (.). Spaces are not '
|
|
4076
|
+
'allowed. Names must start and end with a number or '
|
|
4077
|
+
'letter.')
|
|
4078
|
+
else:
|
|
4079
|
+
_raise_no_traceback_name_error('Store name must be specified.')
|
|
4080
|
+
return name
|
|
4771
4081
|
|
|
4772
4082
|
def initialize(self):
|
|
4773
|
-
"""Initializes the
|
|
4083
|
+
"""Initializes the OCI store object on the cloud.
|
|
4774
4084
|
|
|
4775
4085
|
Initialization involves fetching bucket if exists, or creating it if
|
|
4776
4086
|
it does not.
|
|
@@ -4780,7 +4090,21 @@ class NebiusStore(AbstractStore):
|
|
|
4780
4090
|
StorageBucketGetError: If fetching existing bucket fails
|
|
4781
4091
|
StorageInitError: If general initialization fails.
|
|
4782
4092
|
"""
|
|
4783
|
-
|
|
4093
|
+
# pylint: disable=import-outside-toplevel
|
|
4094
|
+
from sky.clouds.utils import oci_utils
|
|
4095
|
+
from sky.provision.oci.query_utils import query_helper
|
|
4096
|
+
|
|
4097
|
+
self.oci_config_file = oci.get_config_file()
|
|
4098
|
+
self.config_profile = oci_utils.oci_config.get_profile()
|
|
4099
|
+
|
|
4100
|
+
## pylint: disable=line-too-long
|
|
4101
|
+
# What's compartment? See thttps://docs.oracle.com/en/cloud/foundation/cloud_architecture/governance/compartments.html
|
|
4102
|
+
self.compartment = query_helper.find_compartment(self.region)
|
|
4103
|
+
self.client = oci.get_object_storage_client(region=self.region,
|
|
4104
|
+
profile=self.config_profile)
|
|
4105
|
+
self.namespace = self.client.get_namespace(
|
|
4106
|
+
compartment_id=oci.get_oci_config()['tenancy']).data
|
|
4107
|
+
|
|
4784
4108
|
self.bucket, is_new_bucket = self._get_bucket()
|
|
4785
4109
|
if self.is_sky_managed is None:
|
|
4786
4110
|
# If is_sky_managed is not specified, then this is a new storage
|
|
@@ -4800,20 +4124,12 @@ class NebiusStore(AbstractStore):
|
|
|
4800
4124
|
"""
|
|
4801
4125
|
try:
|
|
4802
4126
|
if isinstance(self.source, list):
|
|
4803
|
-
self.
|
|
4127
|
+
self.batch_oci_rsync(self.source, create_dirs=True)
|
|
4804
4128
|
elif self.source is not None:
|
|
4805
|
-
if self.source.startswith('
|
|
4129
|
+
if self.source.startswith('oci://'):
|
|
4806
4130
|
pass
|
|
4807
|
-
elif self.source.startswith('s3://'):
|
|
4808
|
-
self._transfer_to_nebius()
|
|
4809
|
-
elif self.source.startswith('gs://'):
|
|
4810
|
-
self._transfer_to_nebius()
|
|
4811
|
-
elif self.source.startswith('r2://'):
|
|
4812
|
-
self._transfer_to_nebius()
|
|
4813
|
-
elif self.source.startswith('oci://'):
|
|
4814
|
-
self._transfer_to_nebius()
|
|
4815
4131
|
else:
|
|
4816
|
-
self.
|
|
4132
|
+
self.batch_oci_rsync([self.source])
|
|
4817
4133
|
except exceptions.StorageUploadError:
|
|
4818
4134
|
raise
|
|
4819
4135
|
except Exception as e:
|
|
@@ -4821,45 +4137,25 @@ class NebiusStore(AbstractStore):
|
|
|
4821
4137
|
f'Upload failed for store {self.name}') from e
|
|
4822
4138
|
|
|
4823
4139
|
def delete(self) -> None:
|
|
4824
|
-
|
|
4825
|
-
return self._delete_sub_path()
|
|
4826
|
-
|
|
4827
|
-
deleted_by_skypilot = self._delete_nebius_bucket(self.name)
|
|
4140
|
+
deleted_by_skypilot = self._delete_oci_bucket(self.name)
|
|
4828
4141
|
if deleted_by_skypilot:
|
|
4829
|
-
msg_str = f'Deleted
|
|
4142
|
+
msg_str = f'Deleted OCI bucket {self.name}.'
|
|
4830
4143
|
else:
|
|
4831
|
-
msg_str = (f'
|
|
4144
|
+
msg_str = (f'OCI bucket {self.name} may have been deleted '
|
|
4832
4145
|
f'externally. Removing from local state.')
|
|
4833
4146
|
logger.info(f'{colorama.Fore.GREEN}{msg_str}'
|
|
4834
4147
|
f'{colorama.Style.RESET_ALL}')
|
|
4835
4148
|
|
|
4836
|
-
def _delete_sub_path(self) -> None:
|
|
4837
|
-
assert self._bucket_sub_path is not None, 'bucket_sub_path is not set'
|
|
4838
|
-
deleted_by_skypilot = self._delete_nebius_bucket_sub_path(
|
|
4839
|
-
self.name, self._bucket_sub_path)
|
|
4840
|
-
if deleted_by_skypilot:
|
|
4841
|
-
msg_str = (f'Removed objects from S3 bucket '
|
|
4842
|
-
f'{self.name}/{self._bucket_sub_path}.')
|
|
4843
|
-
else:
|
|
4844
|
-
msg_str = (f'Failed to remove objects from S3 bucket '
|
|
4845
|
-
f'{self.name}/{self._bucket_sub_path}.')
|
|
4846
|
-
logger.info(f'{colorama.Fore.GREEN}{msg_str}'
|
|
4847
|
-
f'{colorama.Style.RESET_ALL}')
|
|
4848
|
-
|
|
4849
4149
|
def get_handle(self) -> StorageHandle:
|
|
4850
|
-
return
|
|
4150
|
+
return self.client.get_bucket(namespace_name=self.namespace,
|
|
4151
|
+
bucket_name=self.name).data
|
|
4851
4152
|
|
|
4852
|
-
def
|
|
4153
|
+
def batch_oci_rsync(self,
|
|
4853
4154
|
source_path_list: List[Path],
|
|
4854
4155
|
create_dirs: bool = False) -> None:
|
|
4855
|
-
"""Invokes
|
|
4856
|
-
|
|
4857
|
-
AWS Sync by default uses 10 threads to upload files to the bucket. To
|
|
4858
|
-
increase parallelism, modify max_concurrent_requests in your aws config
|
|
4859
|
-
file (Default path: ~/.aws/config).
|
|
4156
|
+
"""Invokes oci sync to batch upload a list of local paths to Bucket
|
|
4860
4157
|
|
|
4861
|
-
|
|
4862
|
-
multiple commands to be run in parallel.
|
|
4158
|
+
Use OCI bulk operation to batch process the file upload
|
|
4863
4159
|
|
|
4864
4160
|
Args:
|
|
4865
4161
|
source_path_list: List of paths to local files or directories
|
|
@@ -4869,34 +4165,45 @@ class NebiusStore(AbstractStore):
|
|
|
4869
4165
|
set to True, the directory is created in the bucket root and
|
|
4870
4166
|
contents are uploaded to it.
|
|
4871
4167
|
"""
|
|
4872
|
-
sub_path = (f'
|
|
4168
|
+
sub_path = (f'{self._bucket_sub_path}/'
|
|
4873
4169
|
if self._bucket_sub_path else '')
|
|
4874
4170
|
|
|
4171
|
+
@oci.with_oci_env
|
|
4875
4172
|
def get_file_sync_command(base_dir_path, file_names):
|
|
4876
|
-
includes = ' '.join(
|
|
4877
|
-
f'--include {
|
|
4878
|
-
|
|
4879
|
-
|
|
4880
|
-
|
|
4881
|
-
sync_command = (
|
|
4882
|
-
|
|
4883
|
-
|
|
4884
|
-
|
|
4173
|
+
includes = ' '.join(
|
|
4174
|
+
[f'--include "{file_name}"' for file_name in file_names])
|
|
4175
|
+
prefix_arg = ''
|
|
4176
|
+
if sub_path:
|
|
4177
|
+
prefix_arg = f'--object-prefix "{sub_path.strip("/")}"'
|
|
4178
|
+
sync_command = (
|
|
4179
|
+
'oci os object bulk-upload --no-follow-symlinks --overwrite '
|
|
4180
|
+
f'--bucket-name {self.name} --namespace-name {self.namespace} '
|
|
4181
|
+
f'--region {self.region} --src-dir "{base_dir_path}" '
|
|
4182
|
+
f'{prefix_arg} '
|
|
4183
|
+
f'{includes}')
|
|
4184
|
+
|
|
4885
4185
|
return sync_command
|
|
4886
4186
|
|
|
4187
|
+
@oci.with_oci_env
|
|
4887
4188
|
def get_dir_sync_command(src_dir_path, dest_dir_name):
|
|
4888
|
-
|
|
4189
|
+
if dest_dir_name and not str(dest_dir_name).endswith('/'):
|
|
4190
|
+
dest_dir_name = f'{dest_dir_name}/'
|
|
4191
|
+
|
|
4889
4192
|
excluded_list = storage_utils.get_excluded_files(src_dir_path)
|
|
4890
4193
|
excluded_list.append('.git/*')
|
|
4891
4194
|
excludes = ' '.join([
|
|
4892
4195
|
f'--exclude {shlex.quote(file_name)}'
|
|
4893
4196
|
for file_name in excluded_list
|
|
4894
4197
|
])
|
|
4895
|
-
|
|
4896
|
-
|
|
4897
|
-
|
|
4898
|
-
|
|
4899
|
-
|
|
4198
|
+
|
|
4199
|
+
# we exclude .git directory from the sync
|
|
4200
|
+
sync_command = (
|
|
4201
|
+
'oci os object bulk-upload --no-follow-symlinks --overwrite '
|
|
4202
|
+
f'--bucket-name {self.name} --namespace-name {self.namespace} '
|
|
4203
|
+
f'--region {self.region} '
|
|
4204
|
+
f'--object-prefix "{sub_path}{dest_dir_name}" '
|
|
4205
|
+
f'--src-dir "{src_dir_path}" {excludes}')
|
|
4206
|
+
|
|
4900
4207
|
return sync_command
|
|
4901
4208
|
|
|
4902
4209
|
# Generate message for upload
|
|
@@ -4907,210 +4214,347 @@ class NebiusStore(AbstractStore):
|
|
|
4907
4214
|
|
|
4908
4215
|
log_path = sky_logging.generate_tmp_logging_file_path(
|
|
4909
4216
|
_STORAGE_LOG_FILE_NAME)
|
|
4910
|
-
sync_path = f'{source_message} ->
|
|
4217
|
+
sync_path = f'{source_message} -> oci://{self.name}/{sub_path}'
|
|
4911
4218
|
with rich_utils.safe_status(
|
|
4912
4219
|
ux_utils.spinner_message(f'Syncing {sync_path}',
|
|
4913
4220
|
log_path=log_path)):
|
|
4914
4221
|
data_utils.parallel_upload(
|
|
4915
|
-
source_path_list,
|
|
4916
|
-
get_file_sync_command,
|
|
4917
|
-
get_dir_sync_command,
|
|
4918
|
-
log_path,
|
|
4919
|
-
self.name,
|
|
4920
|
-
self._ACCESS_DENIED_MESSAGE,
|
|
4222
|
+
source_path_list=source_path_list,
|
|
4223
|
+
filesync_command_generator=get_file_sync_command,
|
|
4224
|
+
dirsync_command_generator=get_dir_sync_command,
|
|
4225
|
+
log_path=log_path,
|
|
4226
|
+
bucket_name=self.name,
|
|
4227
|
+
access_denied_message=self._ACCESS_DENIED_MESSAGE,
|
|
4921
4228
|
create_dirs=create_dirs,
|
|
4922
|
-
max_concurrent_uploads=
|
|
4923
|
-
logger.info(
|
|
4924
|
-
ux_utils.finishing_message(f'Storage synced: {sync_path}',
|
|
4925
|
-
log_path))
|
|
4229
|
+
max_concurrent_uploads=1)
|
|
4926
4230
|
|
|
4927
|
-
|
|
4928
|
-
|
|
4929
|
-
|
|
4930
|
-
data_transfer.gcs_to_nebius(self.name, self.name)
|
|
4931
|
-
elif self.source.startswith('r2://'):
|
|
4932
|
-
data_transfer.r2_to_nebius(self.name, self.name)
|
|
4933
|
-
elif self.source.startswith('s3://'):
|
|
4934
|
-
data_transfer.s3_to_nebius(self.name, self.name)
|
|
4231
|
+
logger.info(
|
|
4232
|
+
ux_utils.finishing_message(f'Storage synced: {sync_path}',
|
|
4233
|
+
log_path))
|
|
4935
4234
|
|
|
4936
4235
|
def _get_bucket(self) -> Tuple[StorageHandle, bool]:
|
|
4937
|
-
"""Obtains the
|
|
4236
|
+
"""Obtains the OCI bucket.
|
|
4237
|
+
If the bucket exists, this method will connect to the bucket.
|
|
4938
4238
|
|
|
4939
|
-
If the bucket exists, this method will return the bucket.
|
|
4940
4239
|
If the bucket does not exist, there are three cases:
|
|
4941
|
-
1) Raise an error if the bucket source starts with
|
|
4240
|
+
1) Raise an error if the bucket source starts with oci://
|
|
4942
4241
|
2) Return None if bucket has been externally deleted and
|
|
4943
4242
|
sync_on_reconstruction is False
|
|
4944
4243
|
3) Create and return a new bucket otherwise
|
|
4945
4244
|
|
|
4245
|
+
Return tuple (Bucket, Boolean): The first item is the bucket
|
|
4246
|
+
json payload from the OCI API call, the second item indicates
|
|
4247
|
+
if this is a new created bucket(True) or an existing bucket(False).
|
|
4248
|
+
|
|
4946
4249
|
Raises:
|
|
4947
|
-
StorageSpecError: If externally created bucket is attempted to be
|
|
4948
|
-
mounted without specifying storage source.
|
|
4949
4250
|
StorageBucketCreateError: If creating the bucket fails
|
|
4950
4251
|
StorageBucketGetError: If fetching a bucket fails
|
|
4951
|
-
StorageExternalDeletionError: If externally deleted storage is
|
|
4952
|
-
attempted to be fetched while reconstructing the storage for
|
|
4953
|
-
'sky storage delete' or 'sky start'
|
|
4954
4252
|
"""
|
|
4955
|
-
nebius_s = nebius.resource('s3')
|
|
4956
|
-
bucket = nebius_s.Bucket(self.name)
|
|
4957
4253
|
try:
|
|
4958
|
-
|
|
4959
|
-
|
|
4960
|
-
|
|
4961
|
-
# accessible.
|
|
4962
|
-
self.client.head_bucket(Bucket=self.name)
|
|
4963
|
-
self._validate_existing_bucket()
|
|
4254
|
+
get_bucket_response = self.client.get_bucket(
|
|
4255
|
+
namespace_name=self.namespace, bucket_name=self.name)
|
|
4256
|
+
bucket = get_bucket_response.data
|
|
4964
4257
|
return bucket, False
|
|
4965
|
-
except
|
|
4966
|
-
|
|
4967
|
-
|
|
4968
|
-
|
|
4969
|
-
|
|
4970
|
-
|
|
4971
|
-
|
|
4258
|
+
except oci.service_exception() as e:
|
|
4259
|
+
if e.status == 404: # Not Found
|
|
4260
|
+
if isinstance(self.source,
|
|
4261
|
+
str) and self.source.startswith('oci://'):
|
|
4262
|
+
with ux_utils.print_exception_no_traceback():
|
|
4263
|
+
raise exceptions.StorageBucketGetError(
|
|
4264
|
+
'Attempted to connect to a non-existent bucket: '
|
|
4265
|
+
f'{self.source}') from e
|
|
4266
|
+
else:
|
|
4267
|
+
# If bucket cannot be found (i.e., does not exist), it is
|
|
4268
|
+
# to be created by Sky. However, creation is skipped if
|
|
4269
|
+
# Store object is being reconstructed for deletion.
|
|
4270
|
+
if self.sync_on_reconstruction:
|
|
4271
|
+
bucket = self._create_oci_bucket(self.name)
|
|
4272
|
+
return bucket, True
|
|
4273
|
+
else:
|
|
4274
|
+
return None, False
|
|
4275
|
+
elif e.status == 401: # Unauthorized
|
|
4276
|
+
# AccessDenied error for buckets that are private and not
|
|
4277
|
+
# owned by user.
|
|
4278
|
+
command = (
|
|
4279
|
+
f'oci os object list --namespace-name {self.namespace} '
|
|
4280
|
+
f'--bucket-name {self.name}')
|
|
4972
4281
|
with ux_utils.print_exception_no_traceback():
|
|
4973
4282
|
raise exceptions.StorageBucketGetError(
|
|
4974
4283
|
_BUCKET_FAIL_TO_CONNECT_MESSAGE.format(name=self.name) +
|
|
4975
4284
|
f' To debug, consider running `{command}`.') from e
|
|
4285
|
+
else:
|
|
4286
|
+
# Unknown / unexpected error happened. This might happen when
|
|
4287
|
+
# Object storage service itself functions not normal (e.g.
|
|
4288
|
+
# maintainance event causes internal server error or request
|
|
4289
|
+
# timeout, etc).
|
|
4290
|
+
with ux_utils.print_exception_no_traceback():
|
|
4291
|
+
raise exceptions.StorageBucketGetError(
|
|
4292
|
+
f'Failed to connect to OCI bucket {self.name}') from e
|
|
4976
4293
|
|
|
4977
|
-
|
|
4978
|
-
|
|
4979
|
-
raise exceptions.StorageBucketGetError(
|
|
4980
|
-
'Attempted to use a non-existent bucket as a source: '
|
|
4981
|
-
f'{self.source}. Consider using `aws s3 ls '
|
|
4982
|
-
f's3://{self.name} '
|
|
4983
|
-
f'--profile={nebius.NEBIUS_PROFILE_NAME}` to debug.')
|
|
4294
|
+
def mount_command(self, mount_path: str) -> str:
|
|
4295
|
+
"""Returns the command to mount the bucket to the mount_path.
|
|
4984
4296
|
|
|
4985
|
-
|
|
4986
|
-
|
|
4987
|
-
|
|
4988
|
-
|
|
4989
|
-
|
|
4990
|
-
|
|
4991
|
-
|
|
4992
|
-
|
|
4993
|
-
|
|
4994
|
-
|
|
4995
|
-
|
|
4996
|
-
|
|
4997
|
-
|
|
4998
|
-
|
|
4297
|
+
Uses Rclone to mount the bucket.
|
|
4298
|
+
|
|
4299
|
+
Args:
|
|
4300
|
+
mount_path: str; Path to mount the bucket to.
|
|
4301
|
+
"""
|
|
4302
|
+
install_cmd = mounting_utils.get_rclone_install_cmd()
|
|
4303
|
+
mount_cmd = mounting_utils.get_oci_mount_cmd(
|
|
4304
|
+
mount_path=mount_path,
|
|
4305
|
+
store_name=self.name,
|
|
4306
|
+
region=str(self.region),
|
|
4307
|
+
namespace=self.namespace,
|
|
4308
|
+
compartment=self.bucket.compartment_id,
|
|
4309
|
+
config_file=self.oci_config_file,
|
|
4310
|
+
config_profile=self.config_profile)
|
|
4311
|
+
version_check_cmd = mounting_utils.get_rclone_version_check_cmd()
|
|
4312
|
+
|
|
4313
|
+
return mounting_utils.get_mounting_command(mount_path, install_cmd,
|
|
4314
|
+
mount_cmd, version_check_cmd)
|
|
4999
4315
|
|
|
5000
4316
|
def _download_file(self, remote_path: str, local_path: str) -> None:
|
|
5001
|
-
"""Downloads file from remote to local on
|
|
5002
|
-
using the boto3 API
|
|
4317
|
+
"""Downloads file from remote to local on OCI bucket
|
|
5003
4318
|
|
|
5004
4319
|
Args:
|
|
5005
|
-
remote_path: str; Remote path on
|
|
4320
|
+
remote_path: str; Remote path on OCI bucket
|
|
5006
4321
|
local_path: str; Local path on user's device
|
|
5007
4322
|
"""
|
|
5008
|
-
self.
|
|
4323
|
+
if remote_path.startswith(f'/{self.name}'):
|
|
4324
|
+
# If the remote path is /bucket_name, we need to
|
|
4325
|
+
# remove the leading /
|
|
4326
|
+
remote_path = remote_path.lstrip('/')
|
|
5009
4327
|
|
|
5010
|
-
|
|
5011
|
-
|
|
4328
|
+
filename = os.path.basename(remote_path)
|
|
4329
|
+
if not local_path.endswith(filename):
|
|
4330
|
+
local_path = os.path.join(local_path, filename)
|
|
4331
|
+
|
|
4332
|
+
@oci.with_oci_env
|
|
4333
|
+
def get_file_download_command(remote_path, local_path):
|
|
4334
|
+
download_command = (f'oci os object get --bucket-name {self.name} '
|
|
4335
|
+
f'--namespace-name {self.namespace} '
|
|
4336
|
+
f'--region {self.region} --name {remote_path} '
|
|
4337
|
+
f'--file {local_path}')
|
|
5012
4338
|
|
|
5013
|
-
|
|
4339
|
+
return download_command
|
|
5014
4340
|
|
|
5015
|
-
|
|
5016
|
-
|
|
5017
|
-
|
|
5018
|
-
|
|
5019
|
-
|
|
5020
|
-
|
|
5021
|
-
|
|
5022
|
-
|
|
5023
|
-
|
|
5024
|
-
|
|
5025
|
-
|
|
5026
|
-
|
|
5027
|
-
|
|
4341
|
+
download_command = get_file_download_command(remote_path, local_path)
|
|
4342
|
+
|
|
4343
|
+
try:
|
|
4344
|
+
with rich_utils.safe_status(
|
|
4345
|
+
f'[bold cyan]Downloading: {remote_path} -> {local_path}[/]'
|
|
4346
|
+
):
|
|
4347
|
+
subprocess.check_output(download_command,
|
|
4348
|
+
stderr=subprocess.STDOUT,
|
|
4349
|
+
shell=True)
|
|
4350
|
+
except subprocess.CalledProcessError as e:
|
|
4351
|
+
logger.error(f'Download failed: {remote_path} -> {local_path}.\n'
|
|
4352
|
+
f'Detail errors: {e.output}')
|
|
4353
|
+
with ux_utils.print_exception_no_traceback():
|
|
4354
|
+
raise exceptions.StorageBucketDeleteError(
|
|
4355
|
+
f'Failed download file {self.name}:{remote_path}.') from e
|
|
5028
4356
|
|
|
5029
|
-
def
|
|
5030
|
-
"""Creates
|
|
4357
|
+
def _create_oci_bucket(self, bucket_name: str) -> StorageHandle:
|
|
4358
|
+
"""Creates OCI bucket with specific name in specific region
|
|
5031
4359
|
|
|
5032
4360
|
Args:
|
|
5033
4361
|
bucket_name: str; Name of bucket
|
|
5034
|
-
|
|
5035
|
-
StorageBucketCreateError: If bucket creation fails.
|
|
4362
|
+
region: str; Region name, e.g. us-central1, us-west1
|
|
5036
4363
|
"""
|
|
5037
|
-
|
|
4364
|
+
logger.debug(f'_create_oci_bucket: {bucket_name}')
|
|
5038
4365
|
try:
|
|
5039
|
-
|
|
5040
|
-
|
|
4366
|
+
create_bucket_response = self.client.create_bucket(
|
|
4367
|
+
namespace_name=self.namespace,
|
|
4368
|
+
create_bucket_details=oci.oci.object_storage.models.
|
|
4369
|
+
CreateBucketDetails(
|
|
4370
|
+
name=bucket_name,
|
|
4371
|
+
compartment_id=self.compartment,
|
|
4372
|
+
))
|
|
4373
|
+
bucket = create_bucket_response.data
|
|
4374
|
+
return bucket
|
|
4375
|
+
except oci.service_exception() as e:
|
|
5041
4376
|
with ux_utils.print_exception_no_traceback():
|
|
5042
4377
|
raise exceptions.StorageBucketCreateError(
|
|
5043
|
-
f'
|
|
5044
|
-
|
|
5045
|
-
|
|
4378
|
+
f'Failed to create OCI bucket: {self.name}') from e
|
|
4379
|
+
|
|
4380
|
+
def _delete_oci_bucket(self, bucket_name: str) -> bool:
|
|
4381
|
+
"""Deletes OCI bucket, including all objects in bucket
|
|
4382
|
+
|
|
4383
|
+
Args:
|
|
4384
|
+
bucket_name: str; Name of bucket
|
|
4385
|
+
|
|
4386
|
+
Returns:
|
|
4387
|
+
bool; True if bucket was deleted, False if it was deleted externally.
|
|
4388
|
+
"""
|
|
4389
|
+
logger.debug(f'_delete_oci_bucket: {bucket_name}')
|
|
4390
|
+
|
|
4391
|
+
@oci.with_oci_env
|
|
4392
|
+
def get_bucket_delete_command(bucket_name):
|
|
4393
|
+
remove_command = (f'oci os bucket delete --bucket-name '
|
|
4394
|
+
f'--region {self.region} '
|
|
4395
|
+
f'{bucket_name} --empty --force')
|
|
4396
|
+
|
|
4397
|
+
return remove_command
|
|
4398
|
+
|
|
4399
|
+
remove_command = get_bucket_delete_command(bucket_name)
|
|
5046
4400
|
|
|
5047
|
-
def _execute_nebius_remove_command(self, command: str, bucket_name: str,
|
|
5048
|
-
hint_operating: str,
|
|
5049
|
-
hint_failed: str) -> bool:
|
|
5050
4401
|
try:
|
|
5051
4402
|
with rich_utils.safe_status(
|
|
5052
|
-
|
|
5053
|
-
subprocess.check_output(
|
|
4403
|
+
f'[bold cyan]Deleting OCI bucket {bucket_name}[/]'):
|
|
4404
|
+
subprocess.check_output(remove_command.split(' '),
|
|
5054
4405
|
stderr=subprocess.STDOUT)
|
|
5055
4406
|
except subprocess.CalledProcessError as e:
|
|
5056
|
-
if '
|
|
4407
|
+
if 'BucketNotFound' in e.output.decode('utf-8'):
|
|
5057
4408
|
logger.debug(
|
|
5058
4409
|
_BUCKET_EXTERNALLY_DELETED_DEBUG_MESSAGE.format(
|
|
5059
4410
|
bucket_name=bucket_name))
|
|
5060
4411
|
return False
|
|
5061
4412
|
else:
|
|
4413
|
+
logger.error(e.output)
|
|
5062
4414
|
with ux_utils.print_exception_no_traceback():
|
|
5063
4415
|
raise exceptions.StorageBucketDeleteError(
|
|
5064
|
-
f'{
|
|
5065
|
-
f'Detailed error: {e.output}')
|
|
4416
|
+
f'Failed to delete OCI bucket {bucket_name}.')
|
|
5066
4417
|
return True
|
|
5067
4418
|
|
|
5068
|
-
def _delete_nebius_bucket(self, bucket_name: str) -> bool:
|
|
5069
|
-
"""Deletes S3 bucket, including all objects in bucket
|
|
5070
4419
|
|
|
5071
|
-
|
|
5072
|
-
|
|
4420
|
+
@register_s3_compatible_store
|
|
4421
|
+
class S3Store(S3CompatibleStore):
|
|
4422
|
+
"""S3Store inherits from S3CompatibleStore and represents the backend
|
|
4423
|
+
for S3 buckets.
|
|
4424
|
+
"""
|
|
5073
4425
|
|
|
5074
|
-
|
|
5075
|
-
|
|
4426
|
+
_DEFAULT_REGION = 'us-east-1'
|
|
4427
|
+
_CUSTOM_ENDPOINT_REGIONS = [
|
|
4428
|
+
'ap-east-1', 'me-south-1', 'af-south-1', 'eu-south-1', 'eu-south-2',
|
|
4429
|
+
'ap-south-2', 'ap-southeast-3', 'ap-southeast-4', 'me-central-1',
|
|
4430
|
+
'il-central-1'
|
|
4431
|
+
]
|
|
5076
4432
|
|
|
5077
|
-
|
|
5078
|
-
|
|
5079
|
-
|
|
5080
|
-
|
|
5081
|
-
|
|
5082
|
-
|
|
5083
|
-
|
|
5084
|
-
#
|
|
5085
|
-
#
|
|
5086
|
-
#
|
|
5087
|
-
|
|
5088
|
-
|
|
5089
|
-
|
|
5090
|
-
|
|
5091
|
-
|
|
5092
|
-
|
|
5093
|
-
|
|
5094
|
-
|
|
5095
|
-
return False
|
|
4433
|
+
def __init__(self,
|
|
4434
|
+
name: str,
|
|
4435
|
+
source: str,
|
|
4436
|
+
region: Optional[str] = None,
|
|
4437
|
+
is_sky_managed: Optional[bool] = None,
|
|
4438
|
+
sync_on_reconstruction: bool = True,
|
|
4439
|
+
_bucket_sub_path: Optional[str] = None):
|
|
4440
|
+
# TODO(romilb): This is purely a stopgap fix for
|
|
4441
|
+
# https://github.com/skypilot-org/skypilot/issues/3405
|
|
4442
|
+
# We should eventually make all opt-in regions also work for S3 by
|
|
4443
|
+
# passing the right endpoint flags.
|
|
4444
|
+
if region in self._CUSTOM_ENDPOINT_REGIONS:
|
|
4445
|
+
logger.warning('AWS opt-in regions are not supported for S3. '
|
|
4446
|
+
f'Falling back to default region '
|
|
4447
|
+
f'{self._DEFAULT_REGION} for bucket {name!r}.')
|
|
4448
|
+
region = self._DEFAULT_REGION
|
|
4449
|
+
super().__init__(name, source, region, is_sky_managed,
|
|
4450
|
+
sync_on_reconstruction, _bucket_sub_path)
|
|
5096
4451
|
|
|
5097
|
-
|
|
5098
|
-
|
|
5099
|
-
|
|
5100
|
-
|
|
5101
|
-
|
|
5102
|
-
|
|
5103
|
-
|
|
5104
|
-
|
|
4452
|
+
@classmethod
|
|
4453
|
+
def get_config(cls) -> S3CompatibleConfig:
|
|
4454
|
+
"""Return the configuration for AWS S3."""
|
|
4455
|
+
return S3CompatibleConfig(
|
|
4456
|
+
store_type='S3',
|
|
4457
|
+
url_prefix='s3://',
|
|
4458
|
+
client_factory=data_utils.create_s3_client,
|
|
4459
|
+
resource_factory=lambda name: aws.resource('s3').Bucket(name),
|
|
4460
|
+
split_path=data_utils.split_s3_path,
|
|
4461
|
+
verify_bucket=data_utils.verify_s3_bucket,
|
|
4462
|
+
cloud_name=str(clouds.AWS()),
|
|
4463
|
+
default_region=cls._DEFAULT_REGION,
|
|
4464
|
+
mount_cmd_factory=mounting_utils.get_s3_mount_cmd,
|
|
4465
|
+
)
|
|
4466
|
+
|
|
4467
|
+
def mount_cached_command(self, mount_path: str) -> str:
|
|
4468
|
+
install_cmd = mounting_utils.get_rclone_install_cmd()
|
|
4469
|
+
rclone_profile_name = (
|
|
4470
|
+
data_utils.Rclone.RcloneStores.S3.get_profile_name(self.name))
|
|
4471
|
+
rclone_config = data_utils.Rclone.RcloneStores.S3.get_config(
|
|
4472
|
+
rclone_profile_name=rclone_profile_name)
|
|
4473
|
+
mount_cached_cmd = mounting_utils.get_mount_cached_cmd(
|
|
4474
|
+
rclone_config, rclone_profile_name, self.bucket.name, mount_path)
|
|
4475
|
+
return mounting_utils.get_mounting_command(mount_path, install_cmd,
|
|
4476
|
+
mount_cached_cmd)
|
|
4477
|
+
|
|
4478
|
+
|
|
4479
|
+
@register_s3_compatible_store
|
|
4480
|
+
class R2Store(S3CompatibleStore):
|
|
4481
|
+
"""R2Store inherits from S3CompatibleStore and represents the backend
|
|
4482
|
+
for R2 buckets.
|
|
4483
|
+
"""
|
|
4484
|
+
|
|
4485
|
+
def __init__(self,
|
|
4486
|
+
name: str,
|
|
4487
|
+
source: str,
|
|
4488
|
+
region: Optional[str] = 'auto',
|
|
4489
|
+
is_sky_managed: Optional[bool] = None,
|
|
4490
|
+
sync_on_reconstruction: bool = True,
|
|
4491
|
+
_bucket_sub_path: Optional[str] = None):
|
|
4492
|
+
super().__init__(name, source, region, is_sky_managed,
|
|
4493
|
+
sync_on_reconstruction, _bucket_sub_path)
|
|
4494
|
+
|
|
4495
|
+
@classmethod
|
|
4496
|
+
def get_config(cls) -> S3CompatibleConfig:
|
|
4497
|
+
"""Return the configuration for Cloudflare R2."""
|
|
4498
|
+
return S3CompatibleConfig(
|
|
4499
|
+
store_type='R2',
|
|
4500
|
+
url_prefix='r2://',
|
|
4501
|
+
client_factory=lambda region: data_utils.create_r2_client(region or
|
|
4502
|
+
'auto'),
|
|
4503
|
+
resource_factory=lambda name: cloudflare.resource('s3').Bucket(name
|
|
4504
|
+
),
|
|
4505
|
+
split_path=data_utils.split_r2_path,
|
|
4506
|
+
verify_bucket=data_utils.verify_r2_bucket,
|
|
4507
|
+
credentials_file=cloudflare.R2_CREDENTIALS_PATH,
|
|
4508
|
+
aws_profile=cloudflare.R2_PROFILE_NAME,
|
|
4509
|
+
get_endpoint_url=lambda: cloudflare.create_endpoint(), # pylint: disable=unnecessary-lambda
|
|
4510
|
+
extra_cli_args=['--checksum-algorithm', 'CRC32'], # R2 specific
|
|
4511
|
+
cloud_name=cloudflare.NAME,
|
|
4512
|
+
default_region='auto',
|
|
4513
|
+
mount_cmd_factory=mounting_utils.get_r2_mount_cmd,
|
|
4514
|
+
)
|
|
4515
|
+
|
|
4516
|
+
def mount_cached_command(self, mount_path: str) -> str:
|
|
4517
|
+
"""R2-specific cached mount implementation using rclone."""
|
|
4518
|
+
install_cmd = mounting_utils.get_rclone_install_cmd()
|
|
4519
|
+
rclone_profile_name = (
|
|
4520
|
+
data_utils.Rclone.RcloneStores.R2.get_profile_name(self.name))
|
|
4521
|
+
rclone_config = data_utils.Rclone.RcloneStores.R2.get_config(
|
|
4522
|
+
rclone_profile_name=rclone_profile_name)
|
|
4523
|
+
mount_cached_cmd = mounting_utils.get_mount_cached_cmd(
|
|
4524
|
+
rclone_config, rclone_profile_name, self.bucket.name, mount_path)
|
|
4525
|
+
return mounting_utils.get_mounting_command(mount_path, install_cmd,
|
|
4526
|
+
mount_cached_cmd)
|
|
4527
|
+
|
|
4528
|
+
|
|
4529
|
+
@register_s3_compatible_store
|
|
4530
|
+
class NebiusStore(S3CompatibleStore):
|
|
4531
|
+
"""NebiusStore inherits from S3CompatibleStore and represents the backend
|
|
4532
|
+
for Nebius Object Storage buckets.
|
|
4533
|
+
"""
|
|
5105
4534
|
|
|
5106
|
-
|
|
5107
|
-
|
|
5108
|
-
"""
|
|
5109
|
-
|
|
5110
|
-
|
|
5111
|
-
|
|
5112
|
-
|
|
5113
|
-
|
|
5114
|
-
|
|
5115
|
-
|
|
5116
|
-
|
|
4535
|
+
@classmethod
|
|
4536
|
+
def get_config(cls) -> S3CompatibleConfig:
|
|
4537
|
+
"""Return the configuration for Nebius Object Storage."""
|
|
4538
|
+
return S3CompatibleConfig(
|
|
4539
|
+
store_type='NEBIUS',
|
|
4540
|
+
url_prefix='nebius://',
|
|
4541
|
+
client_factory=lambda region: data_utils.create_nebius_client(),
|
|
4542
|
+
resource_factory=lambda name: nebius.resource('s3').Bucket(name),
|
|
4543
|
+
split_path=data_utils.split_nebius_path,
|
|
4544
|
+
verify_bucket=data_utils.verify_nebius_bucket,
|
|
4545
|
+
aws_profile=nebius.NEBIUS_PROFILE_NAME,
|
|
4546
|
+
cloud_name=str(clouds.Nebius()),
|
|
4547
|
+
mount_cmd_factory=cls._get_nebius_mount_cmd,
|
|
4548
|
+
)
|
|
4549
|
+
|
|
4550
|
+
@classmethod
|
|
4551
|
+
def _get_nebius_mount_cmd(cls, bucket_name: str, mount_path: str,
|
|
4552
|
+
bucket_sub_path: Optional[str]) -> str:
|
|
4553
|
+
"""Factory method for Nebius mount command."""
|
|
4554
|
+
# We need to get the endpoint URL, but since this is a static method,
|
|
4555
|
+
# we'll need to create a client to get it
|
|
4556
|
+
client = data_utils.create_nebius_client()
|
|
4557
|
+
endpoint_url = client.meta.endpoint_url
|
|
4558
|
+
return mounting_utils.get_nebius_mount_cmd(nebius.NEBIUS_PROFILE_NAME,
|
|
4559
|
+
bucket_name, endpoint_url,
|
|
4560
|
+
mount_path, bucket_sub_path)
|