skypilot-nightly 1.0.0.dev20250729__py3-none-any.whl → 1.0.0.dev20250731__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of skypilot-nightly might be problematic. Click here for more details.

Files changed (186) hide show
  1. sky/__init__.py +2 -2
  2. sky/backends/backend_utils.py +4 -1
  3. sky/backends/cloud_vm_ray_backend.py +4 -3
  4. sky/catalog/__init__.py +3 -3
  5. sky/catalog/aws_catalog.py +12 -0
  6. sky/catalog/common.py +2 -2
  7. sky/catalog/data_fetchers/fetch_aws.py +13 -1
  8. sky/client/cli/command.py +448 -60
  9. sky/client/common.py +12 -9
  10. sky/clouds/nebius.py +1 -1
  11. sky/clouds/utils/gcp_utils.py +1 -1
  12. sky/clouds/vast.py +1 -2
  13. sky/dashboard/out/404.html +1 -1
  14. sky/dashboard/out/_next/static/chunks/1043-928582d4860fef92.js +1 -0
  15. sky/dashboard/out/_next/static/chunks/1141-3f10a5a9f697c630.js +11 -0
  16. sky/dashboard/out/_next/static/chunks/1559-6c00e20454194859.js +30 -0
  17. sky/dashboard/out/_next/static/chunks/1664-22b00e32c9ff96a4.js +1 -0
  18. sky/dashboard/out/_next/static/chunks/1871-1df8b686a51f3e3a.js +6 -0
  19. sky/dashboard/out/_next/static/chunks/2003.f90b06bb1f914295.js +1 -0
  20. sky/dashboard/out/_next/static/chunks/2350.fab69e61bac57b23.js +1 -0
  21. sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +15 -0
  22. sky/dashboard/out/_next/static/chunks/2641.142718b6b78a6f9b.js +1 -0
  23. sky/dashboard/out/_next/static/chunks/3698-7874720877646365.js +1 -0
  24. sky/dashboard/out/_next/static/chunks/3785.95524bc443db8260.js +1 -0
  25. sky/dashboard/out/_next/static/chunks/3937.210053269f121201.js +1 -0
  26. sky/dashboard/out/_next/static/chunks/4725.42f21f250f91f65b.js +1 -0
  27. sky/dashboard/out/_next/static/chunks/4869.18e6a4361a380763.js +16 -0
  28. sky/dashboard/out/_next/static/chunks/4937.d6bf67771e353356.js +15 -0
  29. sky/dashboard/out/_next/static/chunks/5230-f3bb2663e442e86c.js +1 -0
  30. sky/dashboard/out/_next/static/chunks/5739-d67458fcb1386c92.js +8 -0
  31. sky/dashboard/out/_next/static/chunks/6135-d0e285ac5f3f2485.js +1 -0
  32. sky/dashboard/out/_next/static/chunks/616-3d59f75e2ccf9321.js +39 -0
  33. sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +13 -0
  34. sky/dashboard/out/_next/static/chunks/6601-234b1cf963c7280b.js +1 -0
  35. sky/dashboard/out/_next/static/chunks/691.6d99cbfba347cebf.js +55 -0
  36. sky/dashboard/out/_next/static/chunks/6989-983d3ae7a874de98.js +1 -0
  37. sky/dashboard/out/_next/static/chunks/6990-08b2a1cae076a943.js +1 -0
  38. sky/dashboard/out/_next/static/chunks/7411-b15471acd2cba716.js +41 -0
  39. sky/dashboard/out/_next/static/chunks/8969-9a8cca241b30db83.js +1 -0
  40. sky/dashboard/out/_next/static/chunks/9025.7937c16bc8623516.js +6 -0
  41. sky/dashboard/out/_next/static/chunks/938-40d15b6261ec8dc1.js +1 -0
  42. sky/dashboard/out/_next/static/chunks/9847.4c46c5e229c78704.js +30 -0
  43. sky/dashboard/out/_next/static/chunks/9984.78ee6d2c6fa4b0e8.js +1 -0
  44. sky/dashboard/out/_next/static/chunks/fd9d1056-86323a29a8f7e46a.js +1 -0
  45. sky/dashboard/out/_next/static/chunks/framework-cf60a09ccd051a10.js +33 -0
  46. sky/dashboard/out/_next/static/chunks/main-app-587214043926b3cc.js +1 -0
  47. sky/dashboard/out/_next/static/chunks/main-f15ccb73239a3bf1.js +1 -0
  48. sky/dashboard/out/_next/static/chunks/pages/_app-a67ae198457b9886.js +34 -0
  49. sky/dashboard/out/_next/static/chunks/pages/_error-c66a4e8afc46f17b.js +1 -0
  50. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-fa63e8b1d203f298.js +11 -0
  51. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-665fa5d96dd41d67.js +1 -0
  52. sky/dashboard/out/_next/static/chunks/pages/clusters-956ad430075efee8.js +1 -0
  53. sky/dashboard/out/_next/static/chunks/pages/config-8620d099cbef8608.js +1 -0
  54. sky/dashboard/out/_next/static/chunks/pages/index-444f1804401f04ea.js +1 -0
  55. sky/dashboard/out/_next/static/chunks/pages/infra/[context]-9cfd875eecb6eaf5.js +1 -0
  56. sky/dashboard/out/_next/static/chunks/pages/infra-0fbdc9072f19fbe2.js +1 -0
  57. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-b25c109d6e41bcf4.js +11 -0
  58. sky/dashboard/out/_next/static/chunks/pages/jobs-6393a9edc7322b54.js +1 -0
  59. sky/dashboard/out/_next/static/chunks/pages/users-34d6bb10c3b3ee3d.js +1 -0
  60. sky/dashboard/out/_next/static/chunks/pages/volumes-225c8dae0634eb7f.js +1 -0
  61. sky/dashboard/out/_next/static/chunks/pages/workspace/new-92f741084a89e27b.js +1 -0
  62. sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-4d41c9023287f59a.js +1 -0
  63. sky/dashboard/out/_next/static/chunks/pages/workspaces-e4cb7e97d37e93ad.js +1 -0
  64. sky/dashboard/out/_next/static/chunks/webpack-5adfc4d4b3db6f71.js +1 -0
  65. sky/dashboard/out/_next/static/oKqDxFQ88cquF4nQGE_0w/_buildManifest.js +1 -0
  66. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  67. sky/dashboard/out/clusters/[cluster].html +1 -1
  68. sky/dashboard/out/clusters.html +1 -1
  69. sky/dashboard/out/config.html +1 -1
  70. sky/dashboard/out/index.html +1 -1
  71. sky/dashboard/out/infra/[context].html +1 -1
  72. sky/dashboard/out/infra.html +1 -1
  73. sky/dashboard/out/jobs/[job].html +1 -1
  74. sky/dashboard/out/jobs.html +1 -1
  75. sky/dashboard/out/users.html +1 -1
  76. sky/dashboard/out/volumes.html +1 -1
  77. sky/dashboard/out/workspace/new.html +1 -1
  78. sky/dashboard/out/workspaces/[name].html +1 -1
  79. sky/dashboard/out/workspaces.html +1 -1
  80. sky/data/data_utils.py +25 -0
  81. sky/data/storage.py +1219 -1775
  82. sky/global_user_state.py +18 -8
  83. sky/jobs/__init__.py +3 -0
  84. sky/jobs/client/sdk.py +80 -3
  85. sky/jobs/controller.py +76 -25
  86. sky/jobs/recovery_strategy.py +80 -34
  87. sky/jobs/scheduler.py +68 -20
  88. sky/jobs/server/core.py +228 -136
  89. sky/jobs/server/server.py +40 -0
  90. sky/jobs/state.py +164 -31
  91. sky/jobs/utils.py +144 -68
  92. sky/logs/aws.py +4 -2
  93. sky/provision/kubernetes/utils.py +6 -4
  94. sky/provision/nebius/constants.py +3 -0
  95. sky/provision/vast/instance.py +2 -1
  96. sky/provision/vast/utils.py +9 -6
  97. sky/py.typed +0 -0
  98. sky/resources.py +24 -14
  99. sky/schemas/db/spot_jobs/002_cluster_pool.py +42 -0
  100. sky/serve/autoscalers.py +8 -0
  101. sky/serve/client/impl.py +188 -0
  102. sky/serve/client/sdk.py +12 -82
  103. sky/serve/constants.py +5 -1
  104. sky/serve/controller.py +5 -0
  105. sky/serve/replica_managers.py +112 -37
  106. sky/serve/serve_state.py +16 -6
  107. sky/serve/serve_utils.py +274 -77
  108. sky/serve/server/core.py +8 -525
  109. sky/serve/server/impl.py +709 -0
  110. sky/serve/service.py +13 -9
  111. sky/serve/service_spec.py +74 -4
  112. sky/server/constants.py +1 -1
  113. sky/server/requests/payloads.py +33 -0
  114. sky/server/requests/requests.py +18 -1
  115. sky/server/requests/serializers/decoders.py +12 -3
  116. sky/server/requests/serializers/encoders.py +13 -2
  117. sky/server/server.py +6 -1
  118. sky/skylet/events.py +9 -0
  119. sky/skypilot_config.py +24 -21
  120. sky/task.py +41 -11
  121. sky/templates/jobs-controller.yaml.j2 +3 -0
  122. sky/templates/sky-serve-controller.yaml.j2 +18 -2
  123. sky/users/server.py +1 -1
  124. sky/utils/command_runner.py +4 -2
  125. sky/utils/controller_utils.py +14 -10
  126. sky/utils/dag_utils.py +4 -2
  127. sky/utils/db/migration_utils.py +2 -4
  128. sky/utils/schemas.py +24 -19
  129. {skypilot_nightly-1.0.0.dev20250729.dist-info → skypilot_nightly-1.0.0.dev20250731.dist-info}/METADATA +1 -1
  130. {skypilot_nightly-1.0.0.dev20250729.dist-info → skypilot_nightly-1.0.0.dev20250731.dist-info}/RECORD +135 -130
  131. sky/dashboard/out/_next/static/Q2sVXboB_t7cgvntL-6nD/_buildManifest.js +0 -1
  132. sky/dashboard/out/_next/static/chunks/1043-869d9c78bf5dd3df.js +0 -1
  133. sky/dashboard/out/_next/static/chunks/1141-e49a159c30a6c4a7.js +0 -11
  134. sky/dashboard/out/_next/static/chunks/1559-18717d96ef2fcbe9.js +0 -30
  135. sky/dashboard/out/_next/static/chunks/1664-d65361e92b85e786.js +0 -1
  136. sky/dashboard/out/_next/static/chunks/1871-ea0e7283886407ca.js +0 -6
  137. sky/dashboard/out/_next/static/chunks/2003.b82e6db40ec4c463.js +0 -1
  138. sky/dashboard/out/_next/static/chunks/2350.23778a2b19aabd33.js +0 -1
  139. sky/dashboard/out/_next/static/chunks/2369.2d6e4757f8dfc2b7.js +0 -15
  140. sky/dashboard/out/_next/static/chunks/2641.74c19c4d45a2c034.js +0 -1
  141. sky/dashboard/out/_next/static/chunks/3698-9fa11dafb5cad4a6.js +0 -1
  142. sky/dashboard/out/_next/static/chunks/3785.59705416215ff08b.js +0 -1
  143. sky/dashboard/out/_next/static/chunks/3937.d7f1c55d1916c7f2.js +0 -1
  144. sky/dashboard/out/_next/static/chunks/4725.66125dcd9832aa5d.js +0 -1
  145. sky/dashboard/out/_next/static/chunks/4869.da729a7db3a31f43.js +0 -16
  146. sky/dashboard/out/_next/static/chunks/4937.d75809403fc264ac.js +0 -15
  147. sky/dashboard/out/_next/static/chunks/5230-df791914b54d91d9.js +0 -1
  148. sky/dashboard/out/_next/static/chunks/5739-5ea3ffa10fc884f2.js +0 -8
  149. sky/dashboard/out/_next/static/chunks/6135-2abbd0352f8ee061.js +0 -1
  150. sky/dashboard/out/_next/static/chunks/616-162f3033ffcd3d31.js +0 -39
  151. sky/dashboard/out/_next/static/chunks/6601-d4a381403a8bae91.js +0 -1
  152. sky/dashboard/out/_next/static/chunks/691.488b4aef97c28727.js +0 -55
  153. sky/dashboard/out/_next/static/chunks/6989-eab0e9c16b64fd9f.js +0 -1
  154. sky/dashboard/out/_next/static/chunks/6990-f64e03df359e04f7.js +0 -1
  155. sky/dashboard/out/_next/static/chunks/7411-2cc31dc0fdf2a9ad.js +0 -41
  156. sky/dashboard/out/_next/static/chunks/8969-8e0b2055bf5dd499.js +0 -1
  157. sky/dashboard/out/_next/static/chunks/9025.4a9099bdf3ed4875.js +0 -6
  158. sky/dashboard/out/_next/static/chunks/938-7ee806653aef0609.js +0 -1
  159. sky/dashboard/out/_next/static/chunks/9847.387abf8a14d722db.js +0 -30
  160. sky/dashboard/out/_next/static/chunks/9984.0460de9d3adf5582.js +0 -1
  161. sky/dashboard/out/_next/static/chunks/fd9d1056-61f2257a9cd8b32b.js +0 -1
  162. sky/dashboard/out/_next/static/chunks/framework-efc06c2733009cd3.js +0 -33
  163. sky/dashboard/out/_next/static/chunks/main-app-68c028b1bc5e1b72.js +0 -1
  164. sky/dashboard/out/_next/static/chunks/main-c0a4f1ea606d48d2.js +0 -1
  165. sky/dashboard/out/_next/static/chunks/pages/_app-da491665d4289aae.js +0 -34
  166. sky/dashboard/out/_next/static/chunks/pages/_error-c72a1f77a3c0be1b.js +0 -1
  167. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-2186770cc2de1623.js +0 -11
  168. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-95afb019ab85801c.js +0 -6
  169. sky/dashboard/out/_next/static/chunks/pages/clusters-3d4be4961e1c94eb.js +0 -1
  170. sky/dashboard/out/_next/static/chunks/pages/config-a2673b256b6d416f.js +0 -1
  171. sky/dashboard/out/_next/static/chunks/pages/index-89e7daf7b7df02e0.js +0 -1
  172. sky/dashboard/out/_next/static/chunks/pages/infra/[context]-a90b4fe4616dc501.js +0 -1
  173. sky/dashboard/out/_next/static/chunks/pages/infra-0d3d1f890c5d188a.js +0 -1
  174. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-dc0299ffefebcdbe.js +0 -16
  175. sky/dashboard/out/_next/static/chunks/pages/jobs-49f790d12a85027c.js +0 -1
  176. sky/dashboard/out/_next/static/chunks/pages/users-6790fcefd5487b13.js +0 -1
  177. sky/dashboard/out/_next/static/chunks/pages/volumes-61ea7ba7e56f8d06.js +0 -1
  178. sky/dashboard/out/_next/static/chunks/pages/workspace/new-5629d4e551dba1ee.js +0 -1
  179. sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-6bcd4b20914d76c9.js +0 -1
  180. sky/dashboard/out/_next/static/chunks/pages/workspaces-5f7fe4b7d55b8612.js +0 -1
  181. sky/dashboard/out/_next/static/chunks/webpack-a305898dc479711e.js +0 -1
  182. /sky/dashboard/out/_next/static/{Q2sVXboB_t7cgvntL-6nD → oKqDxFQ88cquF4nQGE_0w}/_ssgManifest.js +0 -0
  183. {skypilot_nightly-1.0.0.dev20250729.dist-info → skypilot_nightly-1.0.0.dev20250731.dist-info}/WHEEL +0 -0
  184. {skypilot_nightly-1.0.0.dev20250729.dist-info → skypilot_nightly-1.0.0.dev20250731.dist-info}/entry_points.txt +0 -0
  185. {skypilot_nightly-1.0.0.dev20250729.dist-info → skypilot_nightly-1.0.0.dev20250731.dist-info}/licenses/LICENSE +0 -0
  186. {skypilot_nightly-1.0.0.dev20250729.dist-info → skypilot_nightly-1.0.0.dev20250731.dist-info}/top_level.txt +0 -0
sky/data/storage.py CHANGED
@@ -1,4 +1,6 @@
1
1
  """Storage and Store Classes for Sky Data."""
2
+ from abc import abstractmethod
3
+ from dataclasses import dataclass
2
4
  import enum
3
5
  import hashlib
4
6
  import os
@@ -7,7 +9,7 @@ import shlex
7
9
  import subprocess
8
10
  import time
9
11
  import typing
10
- from typing import Any, Dict, List, Optional, Tuple, Type, Union
12
+ from typing import Any, Callable, Dict, List, Optional, Tuple, Type, Union
11
13
  import urllib.parse
12
14
 
13
15
  import colorama
@@ -126,40 +128,68 @@ class StoreType(enum.Enum):
126
128
  NEBIUS = 'NEBIUS'
127
129
  VOLUME = 'VOLUME'
128
130
 
131
+ @classmethod
132
+ def _get_s3_compatible_store_by_cloud(cls,
133
+ cloud_name: str) -> Optional[str]:
134
+ """Get S3-compatible store type by cloud name."""
135
+ for store_type, store_class in _S3_COMPATIBLE_STORES.items():
136
+ config = store_class.get_config()
137
+ if config.cloud_name.lower() == cloud_name:
138
+ return store_type
139
+ return None
140
+
141
+ @classmethod
142
+ def _get_s3_compatible_config(
143
+ cls, store_type: str) -> Optional['S3CompatibleConfig']:
144
+ """Get S3-compatible store configuration by store type."""
145
+ store_class = _S3_COMPATIBLE_STORES.get(store_type)
146
+ if store_class:
147
+ return store_class.get_config()
148
+ return None
149
+
150
+ @classmethod
151
+ def find_s3_compatible_config_by_prefix(
152
+ cls, source: str) -> Optional['StoreType']:
153
+ """Get S3-compatible store type by URL prefix."""
154
+ for store_type, store_class in _S3_COMPATIBLE_STORES.items():
155
+ config = store_class.get_config()
156
+ if source.startswith(config.url_prefix):
157
+ return StoreType(store_type)
158
+ return None
159
+
129
160
  @classmethod
130
161
  def from_cloud(cls, cloud: str) -> 'StoreType':
131
- if cloud.lower() == str(clouds.AWS()).lower():
132
- return StoreType.S3
133
- elif cloud.lower() == str(clouds.GCP()).lower():
162
+ cloud_lower = cloud.lower()
163
+ if cloud_lower == str(clouds.GCP()).lower():
134
164
  return StoreType.GCS
135
- elif cloud.lower() == str(clouds.IBM()).lower():
165
+ elif cloud_lower == str(clouds.IBM()).lower():
136
166
  return StoreType.IBM
137
- elif cloud.lower() == cloudflare.NAME.lower():
138
- return StoreType.R2
139
- elif cloud.lower() == str(clouds.Azure()).lower():
167
+ elif cloud_lower == str(clouds.Azure()).lower():
140
168
  return StoreType.AZURE
141
- elif cloud.lower() == str(clouds.OCI()).lower():
169
+ elif cloud_lower == str(clouds.OCI()).lower():
142
170
  return StoreType.OCI
143
- elif cloud.lower() == str(clouds.Nebius()).lower():
144
- return StoreType.NEBIUS
145
- elif cloud.lower() == str(clouds.Lambda()).lower():
171
+ elif cloud_lower == str(clouds.Lambda()).lower():
146
172
  with ux_utils.print_exception_no_traceback():
147
173
  raise ValueError('Lambda Cloud does not provide cloud storage.')
148
- elif cloud.lower() == str(clouds.SCP()).lower():
174
+ elif cloud_lower == str(clouds.SCP()).lower():
149
175
  with ux_utils.print_exception_no_traceback():
150
176
  raise ValueError('SCP does not provide cloud storage.')
177
+ else:
178
+ s3_store_type = cls._get_s3_compatible_store_by_cloud(cloud_lower)
179
+ if s3_store_type:
180
+ return cls(s3_store_type)
151
181
 
152
182
  raise ValueError(f'Unsupported cloud for StoreType: {cloud}')
153
183
 
154
184
  def to_cloud(self) -> str:
155
- if self == StoreType.S3:
156
- return str(clouds.AWS())
157
- elif self == StoreType.GCS:
185
+ config = self._get_s3_compatible_config(self.value)
186
+ if config:
187
+ return config.cloud_name
188
+
189
+ if self == StoreType.GCS:
158
190
  return str(clouds.GCP())
159
191
  elif self == StoreType.AZURE:
160
192
  return str(clouds.Azure())
161
- elif self == StoreType.R2:
162
- return cloudflare.NAME
163
193
  elif self == StoreType.IBM:
164
194
  return str(clouds.IBM())
165
195
  elif self == StoreType.OCI:
@@ -169,41 +199,34 @@ class StoreType(enum.Enum):
169
199
 
170
200
  @classmethod
171
201
  def from_store(cls, store: 'AbstractStore') -> 'StoreType':
172
- if isinstance(store, S3Store):
173
- return StoreType.S3
174
- elif isinstance(store, GcsStore):
202
+ if isinstance(store, S3CompatibleStore):
203
+ return cls(store.get_store_type())
204
+
205
+ if isinstance(store, GcsStore):
175
206
  return StoreType.GCS
176
207
  elif isinstance(store, AzureBlobStore):
177
208
  return StoreType.AZURE
178
- elif isinstance(store, R2Store):
179
- return StoreType.R2
180
209
  elif isinstance(store, IBMCosStore):
181
210
  return StoreType.IBM
182
211
  elif isinstance(store, OciStore):
183
212
  return StoreType.OCI
184
- elif isinstance(store, NebiusStore):
185
- return StoreType.NEBIUS
186
213
  else:
187
214
  with ux_utils.print_exception_no_traceback():
188
215
  raise ValueError(f'Unknown store type: {store}')
189
216
 
190
217
  def store_prefix(self) -> str:
191
- if self == StoreType.S3:
192
- return 's3://'
193
- elif self == StoreType.GCS:
218
+ config = self._get_s3_compatible_config(self.value)
219
+ if config:
220
+ return config.url_prefix
221
+
222
+ if self == StoreType.GCS:
194
223
  return 'gs://'
195
224
  elif self == StoreType.AZURE:
196
225
  return 'https://'
197
- # R2 storages use 's3://' as a prefix for various aws cli commands
198
- elif self == StoreType.R2:
199
- return 'r2://'
200
226
  elif self == StoreType.IBM:
201
227
  return 'cos://'
202
228
  elif self == StoreType.OCI:
203
229
  return 'oci://'
204
- # Nebius storages use 's3://' as a prefix for various aws cli commands
205
- elif self == StoreType.NEBIUS:
206
- return 'nebius://'
207
230
  else:
208
231
  with ux_utils.print_exception_no_traceback():
209
232
  raise ValueError(f'Unknown store type: {self}')
@@ -252,12 +275,20 @@ class StoreType(enum.Enum):
252
275
  elif store_type == StoreType.IBM:
253
276
  bucket_name, sub_path, region = data_utils.split_cos_path(
254
277
  store_url)
255
- elif store_type == StoreType.R2:
256
- bucket_name, sub_path = data_utils.split_r2_path(store_url)
257
278
  elif store_type == StoreType.GCS:
258
279
  bucket_name, sub_path = data_utils.split_gcs_path(store_url)
259
- elif store_type == StoreType.S3:
260
- bucket_name, sub_path = data_utils.split_s3_path(store_url)
280
+ else:
281
+ # Check compatible stores
282
+ for compatible_store_type, store_class in \
283
+ _S3_COMPATIBLE_STORES.items():
284
+ if store_type.value == compatible_store_type:
285
+ config = store_class.get_config()
286
+ bucket_name, sub_path = config.split_path(store_url)
287
+ break
288
+ else:
289
+ # If we get here, it's an unknown S3-compatible store
290
+ raise ValueError(
291
+ f'Unknown S3-compatible store type: {store_type}')
261
292
  return store_type, bucket_name, \
262
293
  sub_path, storage_account_name, region
263
294
  raise ValueError(f'Unknown store URL: {store_url}')
@@ -752,20 +783,19 @@ class Storage(object):
752
783
  # If source is a pre-existing bucket, connect to the bucket
753
784
  # If the bucket does not exist, this will error out
754
785
  if isinstance(self.source, str):
755
- if self.source.startswith('s3://'):
756
- self.add_store(StoreType.S3)
757
- elif self.source.startswith('gs://'):
786
+ if self.source.startswith('gs://'):
758
787
  self.add_store(StoreType.GCS)
759
788
  elif data_utils.is_az_container_endpoint(self.source):
760
789
  self.add_store(StoreType.AZURE)
761
- elif self.source.startswith('r2://'):
762
- self.add_store(StoreType.R2)
763
790
  elif self.source.startswith('cos://'):
764
791
  self.add_store(StoreType.IBM)
765
792
  elif self.source.startswith('oci://'):
766
793
  self.add_store(StoreType.OCI)
767
- elif self.source.startswith('nebius://'):
768
- self.add_store(StoreType.NEBIUS)
794
+
795
+ store_type = StoreType.find_s3_compatible_config_by_prefix(
796
+ self.source)
797
+ if store_type:
798
+ self.add_store(store_type)
769
799
 
770
800
  def get_bucket_sub_path_prefix(self, blob_path: str) -> str:
771
801
  """Adds the bucket sub path prefix to the blob path."""
@@ -981,12 +1011,25 @@ class Storage(object):
981
1011
  # When initializing from global_user_state, we override the
982
1012
  # source from the YAML
983
1013
  try:
984
- if s_type == StoreType.S3:
1014
+ if s_type.value in _S3_COMPATIBLE_STORES:
1015
+ store_class = _S3_COMPATIBLE_STORES[s_type.value]
1016
+ store = store_class.from_metadata(
1017
+ s_metadata,
1018
+ source=self.source,
1019
+ sync_on_reconstruction=self.sync_on_reconstruction,
1020
+ _bucket_sub_path=self._bucket_sub_path)
1021
+ elif s_type == StoreType.S3:
985
1022
  store = S3Store.from_metadata(
986
1023
  s_metadata,
987
1024
  source=self.source,
988
1025
  sync_on_reconstruction=self.sync_on_reconstruction,
989
1026
  _bucket_sub_path=self._bucket_sub_path)
1027
+ elif s_type == StoreType.R2:
1028
+ store = R2Store.from_metadata(
1029
+ s_metadata,
1030
+ source=self.source,
1031
+ sync_on_reconstruction=self.sync_on_reconstruction,
1032
+ _bucket_sub_path=self._bucket_sub_path)
990
1033
  elif s_type == StoreType.GCS:
991
1034
  store = GcsStore.from_metadata(
992
1035
  s_metadata,
@@ -1001,12 +1044,6 @@ class Storage(object):
1001
1044
  source=self.source,
1002
1045
  sync_on_reconstruction=self.sync_on_reconstruction,
1003
1046
  _bucket_sub_path=self._bucket_sub_path)
1004
- elif s_type == StoreType.R2:
1005
- store = R2Store.from_metadata(
1006
- s_metadata,
1007
- source=self.source,
1008
- sync_on_reconstruction=self.sync_on_reconstruction,
1009
- _bucket_sub_path=self._bucket_sub_path)
1010
1047
  elif s_type == StoreType.IBM:
1011
1048
  store = IBMCosStore.from_metadata(
1012
1049
  s_metadata,
@@ -1107,20 +1144,17 @@ class Storage(object):
1107
1144
  return store
1108
1145
 
1109
1146
  store_cls: Type[AbstractStore]
1110
- if store_type == StoreType.S3:
1111
- store_cls = S3Store
1147
+ # First check if it's a registered S3-compatible store
1148
+ if store_type.value in _S3_COMPATIBLE_STORES:
1149
+ store_cls = _S3_COMPATIBLE_STORES[store_type.value]
1112
1150
  elif store_type == StoreType.GCS:
1113
1151
  store_cls = GcsStore
1114
1152
  elif store_type == StoreType.AZURE:
1115
1153
  store_cls = AzureBlobStore
1116
- elif store_type == StoreType.R2:
1117
- store_cls = R2Store
1118
1154
  elif store_type == StoreType.IBM:
1119
1155
  store_cls = IBMCosStore
1120
1156
  elif store_type == StoreType.OCI:
1121
1157
  store_cls = OciStore
1122
- elif store_type == StoreType.NEBIUS:
1123
- store_cls = NebiusStore
1124
1158
  else:
1125
1159
  with ux_utils.print_exception_no_traceback():
1126
1160
  raise exceptions.StorageSpecError(
@@ -1344,101 +1378,261 @@ class Storage(object):
1344
1378
  return config
1345
1379
 
1346
1380
 
1347
- class S3Store(AbstractStore):
1348
- """S3Store inherits from Storage Object and represents the backend
1349
- for S3 buckets.
1381
+ # Registry for S3-compatible stores
1382
+ _S3_COMPATIBLE_STORES = {}
1383
+
1384
+
1385
+ def register_s3_compatible_store(store_class):
1386
+ """Decorator to automatically register S3-compatible stores."""
1387
+ store_type = store_class.get_store_type()
1388
+ _S3_COMPATIBLE_STORES[store_type] = store_class
1389
+ return store_class
1390
+
1391
+
1392
+ @dataclass
1393
+ class S3CompatibleConfig:
1394
+ """Configuration for S3-compatible storage providers."""
1395
+ # Provider identification
1396
+ store_type: str # Store type identifier (e.g., "S3", "R2", "MINIO")
1397
+ url_prefix: str # URL prefix (e.g., "s3://", "r2://", "minio://")
1398
+
1399
+ # Client creation
1400
+ client_factory: Callable[[Optional[str]], Any]
1401
+ resource_factory: Callable[[str], StorageHandle]
1402
+ split_path: Callable[[str], Tuple[str, str]]
1403
+ verify_bucket: Callable[[str], bool]
1404
+
1405
+ # CLI configuration
1406
+ aws_profile: Optional[str] = None
1407
+ get_endpoint_url: Optional[Callable[[], str]] = None
1408
+ credentials_file: Optional[str] = None
1409
+ extra_cli_args: Optional[List[str]] = None
1410
+
1411
+ # Provider-specific settings
1412
+ cloud_name: str = ''
1413
+ default_region: Optional[str] = None
1414
+ access_denied_message: str = 'Access Denied'
1415
+
1416
+ # Mounting
1417
+ mount_cmd_factory: Optional[Callable] = None
1418
+ mount_cached_cmd_factory: Optional[Callable] = None
1419
+
1420
+ def __post_init__(self):
1421
+ if self.extra_cli_args is None:
1422
+ self.extra_cli_args = []
1423
+
1424
+
1425
+ class S3CompatibleStore(AbstractStore):
1426
+ """Base class for S3-compatible object storage providers.
1427
+
1428
+ This class provides a unified interface for all S3-compatible storage
1429
+ providers (AWS S3, Cloudflare R2, Nebius, MinIO, etc.) by leveraging
1430
+ a configuration-driven approach that eliminates code duplication.
1431
+
1432
+ ## Adding a New S3-Compatible Store
1433
+
1434
+ To add a new S3-compatible storage provider (e.g., MinIO),
1435
+ follow these steps:
1436
+
1437
+ ### 1. Add Store Type to Enum
1438
+ First, add your store type to the StoreType enum:
1439
+ ```python
1440
+ class StoreType(enum.Enum):
1441
+ # ... existing entries ...
1442
+ MINIO = 'MINIO'
1443
+ ```
1444
+
1445
+ ### 2. Create Store Class
1446
+ Create a new store class that inherits from S3CompatibleStore:
1447
+ ```python
1448
+ @register_s3_compatible_store
1449
+ class MinIOStore(S3CompatibleStore):
1450
+ '''MinIOStore for MinIO object storage.'''
1451
+
1452
+ @classmethod
1453
+ def get_config(cls) -> S3CompatibleConfig:
1454
+ '''Return the configuration for MinIO.'''
1455
+ return S3CompatibleConfig(
1456
+ store_type='MINIO',
1457
+ url_prefix='minio://',
1458
+ client_factory=lambda region:\
1459
+ data_utils.create_minio_client(region),
1460
+ resource_factory=lambda name:\
1461
+ minio.resource('s3').Bucket(name),
1462
+ split_path=data_utils.split_minio_path,
1463
+ aws_profile='minio',
1464
+ get_endpoint_url=lambda: minio.get_endpoint_url(),
1465
+ cloud_name='minio',
1466
+ default_region='us-east-1',
1467
+ mount_cmd_factory=mounting_utils.get_minio_mount_cmd,
1468
+ )
1469
+ ```
1470
+
1471
+ ### 3. Implement Required Utilities
1472
+ Create the necessary utility functions:
1473
+
1474
+ #### In `sky/data/data_utils.py`:
1475
+ ```python
1476
+ def create_minio_client(region: Optional[str] = None):
1477
+ '''Create MinIO S3 client.'''
1478
+ return boto3.client('s3',
1479
+ endpoint_url=minio.get_endpoint_url(),
1480
+ aws_access_key_id=minio.get_access_key(),
1481
+ aws_secret_access_key=minio.get_secret_key(),
1482
+ region_name=region or 'us-east-1')
1483
+
1484
+ def split_minio_path(minio_path: str) -> Tuple[str, str]:
1485
+ '''Split minio://bucket/key into (bucket, key).'''
1486
+ path_parts = minio_path.replace('minio://', '').split('/', 1)
1487
+ bucket = path_parts[0]
1488
+ key = path_parts[1] if len(path_parts) > 1 else ''
1489
+ return bucket, key
1490
+ ```
1491
+
1492
+ #### In `sky/utils/mounting_utils.py`:
1493
+ ```python
1494
+ def get_minio_mount_cmd(profile: str, bucket_name: str, endpoint_url: str,
1495
+ mount_path: str,
1496
+ bucket_sub_path: Optional[str]) -> str:
1497
+ '''Generate MinIO mount command using s3fs.'''
1498
+ # Implementation similar to other S3-compatible mount commands
1499
+ pass
1500
+ ```
1501
+
1502
+ ### 4. Create Adapter Module (if needed)
1503
+ Create `sky/adaptors/minio.py` for MinIO-specific configuration:
1504
+ ```python
1505
+ '''MinIO adapter for SkyPilot.'''
1506
+
1507
+ MINIO_PROFILE_NAME = 'minio'
1508
+
1509
+ def get_endpoint_url() -> str:
1510
+ '''Get MinIO endpoint URL from configuration.'''
1511
+ # Read from ~/.minio/config or environment variables
1512
+ pass
1513
+
1514
+ def resource(resource_name: str):
1515
+ '''Get MinIO resource.'''
1516
+ # Implementation for creating MinIO resources
1517
+ pass
1518
+ ```
1519
+
1350
1520
  """
1351
1521
 
1352
- _DEFAULT_REGION = 'us-east-1'
1353
1522
  _ACCESS_DENIED_MESSAGE = 'Access Denied'
1354
- _CUSTOM_ENDPOINT_REGIONS = [
1355
- 'ap-east-1', 'me-south-1', 'af-south-1', 'eu-south-1', 'eu-south-2',
1356
- 'ap-south-2', 'ap-southeast-3', 'ap-southeast-4', 'me-central-1',
1357
- 'il-central-1'
1358
- ]
1359
1523
 
1360
1524
  def __init__(self,
1361
1525
  name: str,
1362
1526
  source: str,
1363
- region: Optional[str] = _DEFAULT_REGION,
1527
+ region: Optional[str] = None,
1364
1528
  is_sky_managed: Optional[bool] = None,
1365
1529
  sync_on_reconstruction: bool = True,
1366
1530
  _bucket_sub_path: Optional[str] = None):
1531
+ # Initialize configuration first to get defaults
1532
+ self.config = self.__class__.get_config()
1533
+
1534
+ # Use provider's default region if not specified
1535
+ if region is None:
1536
+ region = self.config.default_region
1537
+
1538
+ # Initialize S3CompatibleStore specific attributes
1367
1539
  self.client: 'mypy_boto3_s3.Client'
1368
1540
  self.bucket: 'StorageHandle'
1369
- # TODO(romilb): This is purely a stopgap fix for
1370
- # https://github.com/skypilot-org/skypilot/issues/3405
1371
- # We should eventually make all opt-in regions also work for S3 by
1372
- # passing the right endpoint flags.
1373
- if region in self._CUSTOM_ENDPOINT_REGIONS:
1374
- logger.warning('AWS opt-in regions are not supported for S3. '
1375
- f'Falling back to default region '
1376
- f'{self._DEFAULT_REGION} for bucket {name!r}.')
1377
- region = self._DEFAULT_REGION
1541
+
1542
+ # Call parent constructor
1378
1543
  super().__init__(name, source, region, is_sky_managed,
1379
1544
  sync_on_reconstruction, _bucket_sub_path)
1380
1545
 
1546
+ @classmethod
1547
+ @abstractmethod
1548
+ def get_config(cls) -> S3CompatibleConfig:
1549
+ """Return the configuration for this S3-compatible provider."""
1550
+ pass
1551
+
1552
+ @classmethod
1553
+ def get_store_type(cls) -> str:
1554
+ """Return the store type identifier from configuration."""
1555
+ return cls.get_config().store_type
1556
+
1557
+ @property
1558
+ def provider_prefixes(self) -> set:
1559
+ """Dynamically get all provider prefixes from registered stores."""
1560
+ prefixes = set()
1561
+
1562
+ # Get prefixes from all registered S3-compatible stores
1563
+ for store_class in _S3_COMPATIBLE_STORES.values():
1564
+ config = store_class.get_config()
1565
+ prefixes.add(config.url_prefix)
1566
+
1567
+ # Add hardcoded prefixes for non-S3-compatible stores
1568
+ prefixes.update({
1569
+ 'gs://', # GCS
1570
+ 'https://', # Azure
1571
+ 'cos://', # IBM COS
1572
+ 'oci://', # OCI
1573
+ })
1574
+
1575
+ return prefixes
1576
+
1381
1577
  def _validate(self):
1382
1578
  if self.source is not None and isinstance(self.source, str):
1383
- if self.source.startswith('s3://'):
1384
- assert self.name == data_utils.split_s3_path(self.source)[0], (
1385
- 'S3 Bucket is specified as path, the name should be the'
1386
- ' same as S3 bucket.')
1579
+ if self.source.startswith(self.config.url_prefix):
1580
+ bucket_name, _ = self.config.split_path(self.source)
1581
+ assert self.name == bucket_name, (
1582
+ f'{self.config.store_type} Bucket is specified as path, '
1583
+ f'the name should be the same as {self.config.store_type} '
1584
+ f'bucket.')
1585
+ # Only verify if this is NOT the same store type as the source
1586
+ if self.__class__.get_store_type() != self.config.store_type:
1587
+ assert self.config.verify_bucket(self.name), (
1588
+ f'Source specified as {self.source},'
1589
+ f'a {self.config.store_type} '
1590
+ f'bucket. {self.config.store_type} Bucket should exist.'
1591
+ )
1387
1592
  elif self.source.startswith('gs://'):
1388
1593
  assert self.name == data_utils.split_gcs_path(self.source)[0], (
1389
1594
  'GCS Bucket is specified as path, the name should be '
1390
1595
  'the same as GCS bucket.')
1391
- assert data_utils.verify_gcs_bucket(self.name), (
1392
- f'Source specified as {self.source}, a GCS bucket. ',
1393
- 'GCS Bucket should exist.')
1596
+ if not isinstance(self, GcsStore):
1597
+ assert data_utils.verify_gcs_bucket(self.name), (
1598
+ f'Source specified as {self.source}, a GCS bucket. ',
1599
+ 'GCS Bucket should exist.')
1394
1600
  elif data_utils.is_az_container_endpoint(self.source):
1395
1601
  storage_account_name, container_name, _ = (
1396
1602
  data_utils.split_az_path(self.source))
1397
1603
  assert self.name == container_name, (
1398
1604
  'Azure bucket is specified as path, the name should be '
1399
1605
  'the same as Azure bucket.')
1400
- assert data_utils.verify_az_bucket(
1401
- storage_account_name, self.name), (
1402
- f'Source specified as {self.source}, an Azure bucket. '
1606
+ if not isinstance(self, AzureBlobStore):
1607
+ assert data_utils.verify_az_bucket(
1608
+ storage_account_name, self.name
1609
+ ), (f'Source specified as {self.source}, an Azure bucket. '
1403
1610
  'Azure bucket should exist.')
1404
- elif self.source.startswith('r2://'):
1405
- assert self.name == data_utils.split_r2_path(self.source)[0], (
1406
- 'R2 Bucket is specified as path, the name should be '
1407
- 'the same as R2 bucket.')
1408
- assert data_utils.verify_r2_bucket(self.name), (
1409
- f'Source specified as {self.source}, a R2 bucket. ',
1410
- 'R2 Bucket should exist.')
1411
- elif self.source.startswith('nebius://'):
1412
- assert self.name == data_utils.split_nebius_path(
1413
- self.source)[0], (
1414
- 'Nebius Object Storage is specified as path, the name '
1415
- 'should be the same as Nebius Object Storage bucket.')
1416
- assert data_utils.verify_nebius_bucket(self.name), (
1417
- f'Source specified as {self.source}, a Nebius Object '
1418
- f'Storage bucket. Nebius Object Storage Bucket should'
1419
- f' exist.')
1420
1611
  elif self.source.startswith('cos://'):
1421
1612
  assert self.name == data_utils.split_cos_path(self.source)[0], (
1422
1613
  'COS Bucket is specified as path, the name should be '
1423
1614
  'the same as COS bucket.')
1424
- assert data_utils.verify_ibm_cos_bucket(self.name), (
1425
- f'Source specified as {self.source}, a COS bucket. ',
1426
- 'COS Bucket should exist.')
1615
+ if not isinstance(self, IBMCosStore):
1616
+ assert data_utils.verify_ibm_cos_bucket(self.name), (
1617
+ f'Source specified as {self.source}, a COS bucket. ',
1618
+ 'COS Bucket should exist.')
1427
1619
  elif self.source.startswith('oci://'):
1428
1620
  raise NotImplementedError(
1429
- 'Moving data from OCI to S3 is currently not supported.')
1621
+ f'Moving data from OCI to {self.source} is ',
1622
+ 'currently not supported.')
1623
+
1430
1624
  # Validate name
1431
1625
  self.name = self.validate_name(self.name)
1432
1626
 
1433
1627
  # Check if the storage is enabled
1434
- if not _is_storage_cloud_enabled(str(clouds.AWS())):
1628
+ if not _is_storage_cloud_enabled(self.config.cloud_name):
1435
1629
  with ux_utils.print_exception_no_traceback():
1436
1630
  raise exceptions.ResourcesUnavailableError(
1437
- 'Storage \'store: s3\' specified, but ' \
1438
- 'AWS access is disabled. To fix, enable '\
1439
- 'AWS by running `sky check`. More info: '\
1440
- 'https://docs.skypilot.co/en/latest/getting-started/installation.html.' # pylint: disable=line-too-long
1441
- )
1631
+ f'Storage "store: {self.config.store_type.lower()}" '
1632
+ f'specified, but '
1633
+ f'{self.config.cloud_name} access is disabled. '
1634
+ 'To fix, enable '
1635
+ f'{self.config.cloud_name} by running `sky check`.')
1442
1636
 
1443
1637
  @classmethod
1444
1638
  def validate_name(cls, name: str) -> str:
@@ -1510,7 +1704,7 @@ class S3Store(AbstractStore):
1510
1704
  StorageBucketGetError: If fetching existing bucket fails
1511
1705
  StorageInitError: If general initialization fails.
1512
1706
  """
1513
- self.client = data_utils.create_s3_client(self.region)
1707
+ self.client = self.config.client_factory(self.region)
1514
1708
  self.bucket, is_new_bucket = self._get_bucket()
1515
1709
  if self.is_sky_managed is None:
1516
1710
  # If is_sky_managed is not specified, then this is a new storage
@@ -1532,16 +1726,10 @@ class S3Store(AbstractStore):
1532
1726
  if isinstance(self.source, list):
1533
1727
  self.batch_aws_rsync(self.source, create_dirs=True)
1534
1728
  elif self.source is not None:
1535
- if self.source.startswith('s3://'):
1536
- pass
1537
- elif self.source.startswith('gs://'):
1538
- self._transfer_to_s3()
1539
- elif self.source.startswith('r2://'):
1540
- self._transfer_to_s3()
1541
- elif self.source.startswith('oci://'):
1542
- self._transfer_to_s3()
1543
- elif self.source.startswith('nebius://'):
1544
- self._transfer_to_s3()
1729
+ if self._is_same_provider_source():
1730
+ pass # No transfer needed
1731
+ elif self._needs_cross_provider_transfer():
1732
+ self._transfer_from_other_provider()
1545
1733
  else:
1546
1734
  self.batch_aws_rsync([self.source])
1547
1735
  except exceptions.StorageUploadError:
@@ -1550,57 +1738,94 @@ class S3Store(AbstractStore):
1550
1738
  raise exceptions.StorageUploadError(
1551
1739
  f'Upload failed for store {self.name}') from e
1552
1740
 
1741
+ def _is_same_provider_source(self) -> bool:
1742
+ """Check if source is from the same provider."""
1743
+ return isinstance(self.source, str) and self.source.startswith(
1744
+ self.config.url_prefix)
1745
+
1746
+ def _needs_cross_provider_transfer(self) -> bool:
1747
+ """Check if source needs cross-provider transfer."""
1748
+ if not isinstance(self.source, str):
1749
+ return False
1750
+ return any(
1751
+ self.source.startswith(prefix) for prefix in self.provider_prefixes)
1752
+
1753
+ def _detect_source_type(self) -> str:
1754
+ """Detect the source provider type from URL."""
1755
+ if not isinstance(self.source, str):
1756
+ return 'unknown'
1757
+
1758
+ for provider in self.provider_prefixes:
1759
+ if self.source.startswith(provider):
1760
+ return provider[:-len('://')]
1761
+ return ''
1762
+
1763
+ def _transfer_from_other_provider(self):
1764
+ """Transfer data from another cloud to this S3-compatible store."""
1765
+ source_type = self._detect_source_type()
1766
+ target_type = self.config.store_type.lower()
1767
+
1768
+ if hasattr(data_transfer, f'{source_type}_to_{target_type}'):
1769
+ transfer_func = getattr(data_transfer,
1770
+ f'{source_type}_to_{target_type}')
1771
+ transfer_func(self.name, self.name)
1772
+ else:
1773
+ with ux_utils.print_exception_no_traceback():
1774
+ raise NotImplementedError(
1775
+ f'Transfer from {source_type} to {target_type} '
1776
+ 'is not yet supported.')
1777
+
1553
1778
  def delete(self) -> None:
1779
+ """Delete the bucket or sub-path."""
1554
1780
  if self._bucket_sub_path is not None and not self.is_sky_managed:
1555
1781
  return self._delete_sub_path()
1556
1782
 
1557
- deleted_by_skypilot = self._delete_s3_bucket(self.name)
1783
+ deleted_by_skypilot = self._delete_bucket(self.name)
1784
+ provider = self.config.store_type
1558
1785
  if deleted_by_skypilot:
1559
- msg_str = f'Deleted S3 bucket {self.name}.'
1786
+ msg_str = f'Deleted {provider} bucket {self.name}.'
1560
1787
  else:
1561
- msg_str = f'S3 bucket {self.name} may have been deleted ' \
1788
+ msg_str = f'{provider} bucket {self.name} may have been deleted ' \
1562
1789
  f'externally. Removing from local state.'
1563
- logger.info(f'{colorama.Fore.GREEN}{msg_str}'
1564
- f'{colorama.Style.RESET_ALL}')
1565
-
1566
- def _delete_sub_path(self) -> None:
1567
- assert self._bucket_sub_path is not None, 'bucket_sub_path is not set'
1568
- deleted_by_skypilot = self._delete_s3_bucket_sub_path(
1569
- self.name, self._bucket_sub_path)
1570
- if deleted_by_skypilot:
1571
- msg_str = f'Removed objects from S3 bucket ' \
1572
- f'{self.name}/{self._bucket_sub_path}.'
1573
- else:
1574
- msg_str = f'Failed to remove objects from S3 bucket ' \
1575
- f'{self.name}/{self._bucket_sub_path}.'
1576
- logger.info(f'{colorama.Fore.GREEN}{msg_str}'
1577
- f'{colorama.Style.RESET_ALL}')
1790
+ logger.info(f'{colorama.Fore.GREEN}{msg_str}{colorama.Style.RESET_ALL}')
1578
1791
 
1579
1792
  def get_handle(self) -> StorageHandle:
1580
- return aws.resource('s3').Bucket(self.name)
1793
+ """Get storage handle using provider's resource factory."""
1794
+ return self.config.resource_factory(self.name)
1581
1795
 
1582
- def batch_aws_rsync(self,
1583
- source_path_list: List[Path],
1584
- create_dirs: bool = False) -> None:
1585
- """Invokes aws s3 sync to batch upload a list of local paths to S3
1796
+ def _download_file(self, remote_path: str, local_path: str) -> None:
1797
+ """Download file using S3 API."""
1798
+ self.bucket.download_file(remote_path, local_path)
1799
+
1800
+ def mount_command(self, mount_path: str) -> str:
1801
+ """Get mount command using provider's mount factory."""
1802
+ if self.config.mount_cmd_factory is None:
1803
+ raise exceptions.NotSupportedError(
1804
+ f'Mounting not supported for {self.config.store_type}')
1586
1805
 
1587
- AWS Sync by default uses 10 threads to upload files to the bucket. To
1588
- increase parallelism, modify max_concurrent_requests in your aws config
1589
- file (Default path: ~/.aws/config).
1806
+ install_cmd = mounting_utils.get_s3_mount_install_cmd()
1807
+ mount_cmd = self.config.mount_cmd_factory(self.bucket.name, mount_path,
1808
+ self._bucket_sub_path)
1809
+ return mounting_utils.get_mounting_command(mount_path, install_cmd,
1810
+ mount_cmd)
1590
1811
 
1591
- Since aws s3 sync does not support batch operations, we construct
1592
- multiple commands to be run in parallel.
1812
+ def mount_cached_command(self, mount_path: str) -> str:
1813
+ """Get cached mount command. Can be overridden by subclasses."""
1814
+ if self.config.mount_cached_cmd_factory is None:
1815
+ raise exceptions.NotSupportedError(
1816
+ f'Cached mounting not supported for {self.config.store_type}')
1593
1817
 
1594
- Args:
1595
- source_path_list: List of paths to local files or directories
1596
- create_dirs: If the local_path is a directory and this is set to
1597
- False, the contents of the directory are directly uploaded to
1598
- root of the bucket. If the local_path is a directory and this is
1599
- set to True, the directory is created in the bucket root and
1600
- contents are uploaded to it.
1601
- """
1602
- sub_path = (f'/{self._bucket_sub_path}'
1603
- if self._bucket_sub_path else '')
1818
+ install_cmd = mounting_utils.get_rclone_install_cmd()
1819
+ mount_cmd = self.config.mount_cached_cmd_factory(
1820
+ self.bucket.name, mount_path, self._bucket_sub_path)
1821
+ return mounting_utils.get_mounting_command(mount_path, install_cmd,
1822
+ mount_cmd)
1823
+
1824
+ def batch_aws_rsync(self,
1825
+ source_path_list: List[Path],
1826
+ create_dirs: bool = False) -> None:
1827
+ """Generic S3-compatible rsync using AWS CLI."""
1828
+ sub_path = f'/{self._bucket_sub_path}' if self._bucket_sub_path else ''
1604
1829
 
1605
1830
  def get_file_sync_command(base_dir_path, file_names):
1606
1831
  includes = ' '.join([
@@ -1608,10 +1833,28 @@ class S3Store(AbstractStore):
1608
1833
  for file_name in file_names
1609
1834
  ])
1610
1835
  base_dir_path = shlex.quote(base_dir_path)
1611
- sync_command = ('aws s3 sync --no-follow-symlinks --exclude="*" '
1612
- f'{includes} {base_dir_path} '
1613
- f's3://{self.name}{sub_path}')
1614
- return sync_command
1836
+
1837
+ # Build AWS CLI command with provider-specific configuration
1838
+ cmd_parts = ['aws s3 sync --no-follow-symlinks --exclude="*"']
1839
+ cmd_parts.append(f'{includes} {base_dir_path}')
1840
+ cmd_parts.append(f's3://{self.name}{sub_path}')
1841
+
1842
+ # Add provider-specific arguments
1843
+ if self.config.get_endpoint_url:
1844
+ cmd_parts.append(
1845
+ f'--endpoint-url {self.config.get_endpoint_url()}')
1846
+ if self.config.aws_profile:
1847
+ cmd_parts.append(f'--profile={self.config.aws_profile}')
1848
+ if self.config.extra_cli_args:
1849
+ cmd_parts.extend(self.config.extra_cli_args)
1850
+
1851
+ # Handle credentials file via environment
1852
+ cmd = ' '.join(cmd_parts)
1853
+ if self.config.credentials_file:
1854
+ cmd = 'AWS_SHARED_CREDENTIALS_FILE=' + \
1855
+ f'{self.config.credentials_file} {cmd}'
1856
+
1857
+ return cmd
1615
1858
 
1616
1859
  def get_dir_sync_command(src_dir_path, dest_dir_name):
1617
1860
  # we exclude .git directory from the sync
@@ -1619,11 +1862,11 @@ class S3Store(AbstractStore):
1619
1862
  excluded_list.append('.git/*')
1620
1863
 
1621
1864
  # Process exclusion patterns to make them work correctly with aws
1622
- # s3 sync
1865
+ # s3 sync - this logic is from S3Store2 to ensure compatibility
1623
1866
  processed_excludes = []
1624
1867
  for excluded_path in excluded_list:
1625
1868
  # Check if the path is a directory exclusion pattern
1626
- # For AWS S3 sync, directory patterns need to end with "/**" to
1869
+ # For AWS S3 sync, directory patterns need to end with "/*" to
1627
1870
  # exclude all contents
1628
1871
  if (excluded_path.endswith('/') or os.path.isdir(
1629
1872
  os.path.join(src_dir_path, excluded_path.rstrip('/')))):
@@ -1638,10 +1881,25 @@ class S3Store(AbstractStore):
1638
1881
  for file_name in processed_excludes
1639
1882
  ])
1640
1883
  src_dir_path = shlex.quote(src_dir_path)
1641
- sync_command = (f'aws s3 sync --no-follow-symlinks {excludes} '
1642
- f'{src_dir_path} '
1643
- f's3://{self.name}{sub_path}/{dest_dir_name}')
1644
- return sync_command
1884
+
1885
+ cmd_parts = ['aws s3 sync --no-follow-symlinks']
1886
+ cmd_parts.append(f'{excludes} {src_dir_path}')
1887
+ cmd_parts.append(f's3://{self.name}{sub_path}/{dest_dir_name}')
1888
+
1889
+ if self.config.get_endpoint_url:
1890
+ cmd_parts.append(
1891
+ f'--endpoint-url {self.config.get_endpoint_url()}')
1892
+ if self.config.aws_profile:
1893
+ cmd_parts.append(f'--profile={self.config.aws_profile}')
1894
+ if self.config.extra_cli_args:
1895
+ cmd_parts.extend(self.config.extra_cli_args)
1896
+
1897
+ cmd = ' '.join(cmd_parts)
1898
+ if self.config.credentials_file:
1899
+ cmd = 'AWS_SHARED_CREDENTIALS_FILE=' + \
1900
+ f'{self.config.credentials_file} {cmd}'
1901
+
1902
+ return cmd
1645
1903
 
1646
1904
  # Generate message for upload
1647
1905
  if len(source_path_list) > 1:
@@ -1649,9 +1907,12 @@ class S3Store(AbstractStore):
1649
1907
  else:
1650
1908
  source_message = source_path_list[0]
1651
1909
 
1910
+ provider_prefix = self.config.url_prefix
1652
1911
  log_path = sky_logging.generate_tmp_logging_file_path(
1653
1912
  _STORAGE_LOG_FILE_NAME)
1654
- sync_path = f'{source_message} -> s3://{self.name}{sub_path}/'
1913
+ sync_path = (f'{source_message} -> '
1914
+ f'{provider_prefix}{self.name}{sub_path}/')
1915
+
1655
1916
  with rich_utils.safe_status(
1656
1917
  ux_utils.spinner_message(f'Syncing {sync_path}',
1657
1918
  log_path=log_path)):
@@ -1661,151 +1922,78 @@ class S3Store(AbstractStore):
1661
1922
  get_dir_sync_command,
1662
1923
  log_path,
1663
1924
  self.name,
1664
- self._ACCESS_DENIED_MESSAGE,
1925
+ self.config.access_denied_message,
1665
1926
  create_dirs=create_dirs,
1666
1927
  max_concurrent_uploads=_MAX_CONCURRENT_UPLOADS)
1928
+
1667
1929
  logger.info(
1668
1930
  ux_utils.finishing_message(f'Storage synced: {sync_path}',
1669
1931
  log_path))
1670
1932
 
1671
- def _transfer_to_s3(self) -> None:
1672
- assert isinstance(self.source, str), self.source
1673
- if self.source.startswith('gs://'):
1674
- data_transfer.gcs_to_s3(self.name, self.name)
1675
- elif self.source.startswith('r2://'):
1676
- data_transfer.r2_to_s3(self.name, self.name)
1677
-
1678
1933
  def _get_bucket(self) -> Tuple[StorageHandle, bool]:
1679
- """Obtains the S3 bucket.
1680
-
1681
- If the bucket exists, this method will return the bucket.
1682
- If the bucket does not exist, there are three cases:
1683
- 1) Raise an error if the bucket source starts with s3://
1684
- 2) Return None if bucket has been externally deleted and
1685
- sync_on_reconstruction is False
1686
- 3) Create and return a new bucket otherwise
1687
-
1688
- Raises:
1689
- StorageSpecError: If externally created bucket is attempted to be
1690
- mounted without specifying storage source.
1691
- StorageBucketCreateError: If creating the bucket fails
1692
- StorageBucketGetError: If fetching a bucket fails
1693
- StorageExternalDeletionError: If externally deleted storage is
1694
- attempted to be fetched while reconstructing the storage for
1695
- 'sky storage delete' or 'sky start'
1696
- """
1697
- s3 = aws.resource('s3')
1698
- bucket = s3.Bucket(self.name)
1934
+ """Get or create bucket using S3 API."""
1935
+ bucket = self.config.resource_factory(self.name)
1699
1936
 
1700
1937
  try:
1701
1938
  # Try Public bucket case.
1702
- # This line does not error out if the bucket is an external public
1703
- # bucket or if it is a user's bucket that is publicly
1704
- # accessible.
1705
1939
  self.client.head_bucket(Bucket=self.name)
1706
1940
  self._validate_existing_bucket()
1707
1941
  return bucket, False
1708
1942
  except aws.botocore_exceptions().ClientError as e:
1709
1943
  error_code = e.response['Error']['Code']
1710
- # AccessDenied error for buckets that are private and not owned by
1711
- # user.
1712
1944
  if error_code == '403':
1713
- command = f'aws s3 ls {self.name}'
1945
+ command = f'aws s3 ls s3://{self.name}'
1946
+ if self.config.aws_profile:
1947
+ command += f' --profile={self.config.aws_profile}'
1948
+ if self.config.get_endpoint_url:
1949
+ command += f' --endpoint-url '\
1950
+ f'{self.config.get_endpoint_url()}'
1951
+ if self.config.credentials_file:
1952
+ command = (f'AWS_SHARED_CREDENTIALS_FILE='
1953
+ f'{self.config.credentials_file} {command}')
1714
1954
  with ux_utils.print_exception_no_traceback():
1715
1955
  raise exceptions.StorageBucketGetError(
1716
1956
  _BUCKET_FAIL_TO_CONNECT_MESSAGE.format(name=self.name) +
1717
1957
  f' To debug, consider running `{command}`.') from e
1718
1958
 
1719
- if isinstance(self.source, str) and self.source.startswith('s3://'):
1959
+ if isinstance(self.source, str) and self.source.startswith(
1960
+ self.config.url_prefix):
1720
1961
  with ux_utils.print_exception_no_traceback():
1721
1962
  raise exceptions.StorageBucketGetError(
1722
1963
  'Attempted to use a non-existent bucket as a source: '
1723
- f'{self.source}. Consider using `aws s3 ls '
1724
- f'{self.source}` to debug.')
1964
+ f'{self.source}.')
1725
1965
 
1726
- # If bucket cannot be found in both private and public settings,
1727
- # the bucket is to be created by Sky. However, creation is skipped if
1728
- # Store object is being reconstructed for deletion or re-mount with
1729
- # sky start, and error is raised instead.
1966
+ # If bucket cannot be found, create it if needed
1730
1967
  if self.sync_on_reconstruction:
1731
- bucket = self._create_s3_bucket(self.name, self.region)
1968
+ bucket = self._create_bucket(self.name)
1732
1969
  return bucket, True
1733
1970
  else:
1734
- # Raised when Storage object is reconstructed for sky storage
1735
- # delete or to re-mount Storages with sky start but the storage
1736
- # is already removed externally.
1737
1971
  raise exceptions.StorageExternalDeletionError(
1738
1972
  'Attempted to fetch a non-existent bucket: '
1739
1973
  f'{self.name}')
1740
1974
 
1741
- def _download_file(self, remote_path: str, local_path: str) -> None:
1742
- """Downloads file from remote to local on s3 bucket
1743
- using the boto3 API
1744
-
1745
- Args:
1746
- remote_path: str; Remote path on S3 bucket
1747
- local_path: str; Local path on user's device
1748
- """
1749
- self.bucket.download_file(remote_path, local_path)
1750
-
1751
- def mount_command(self, mount_path: str) -> str:
1752
- """Returns the command to mount the bucket to the mount_path.
1753
-
1754
- Uses goofys to mount the bucket.
1755
-
1756
- Args:
1757
- mount_path: str; Path to mount the bucket to.
1758
- """
1759
- install_cmd = mounting_utils.get_s3_mount_install_cmd()
1760
- mount_cmd = mounting_utils.get_s3_mount_cmd(self.bucket.name,
1761
- mount_path,
1762
- self._bucket_sub_path)
1763
- return mounting_utils.get_mounting_command(mount_path, install_cmd,
1764
- mount_cmd)
1765
-
1766
- def mount_cached_command(self, mount_path: str) -> str:
1767
- install_cmd = mounting_utils.get_rclone_install_cmd()
1768
- rclone_profile_name = (
1769
- data_utils.Rclone.RcloneStores.S3.get_profile_name(self.name))
1770
- rclone_config = data_utils.Rclone.RcloneStores.S3.get_config(
1771
- rclone_profile_name=rclone_profile_name)
1772
- mount_cached_cmd = mounting_utils.get_mount_cached_cmd(
1773
- rclone_config, rclone_profile_name, self.bucket.name, mount_path)
1774
- return mounting_utils.get_mounting_command(mount_path, install_cmd,
1775
- mount_cached_cmd)
1776
-
1777
- def _create_s3_bucket(self,
1778
- bucket_name: str,
1779
- region=_DEFAULT_REGION) -> StorageHandle:
1780
- """Creates S3 bucket with specific name in specific region
1781
-
1782
- Args:
1783
- bucket_name: str; Name of bucket
1784
- region: str; Region name, e.g. us-west-1, us-east-2
1785
- Raises:
1786
- StorageBucketCreateError: If bucket creation fails.
1787
- """
1788
- s3_client = self.client
1975
+ def _create_bucket(self, bucket_name: str) -> StorageHandle:
1976
+ """Create bucket using S3 API."""
1789
1977
  try:
1790
1978
  create_bucket_config: Dict[str, Any] = {'Bucket': bucket_name}
1791
- # If default us-east-1 region of create_bucket API is used,
1792
- # the LocationConstraint must not be specified.
1793
- # Reference: https://stackoverflow.com/a/51912090
1794
- if region is not None and region != 'us-east-1':
1979
+ if self.region is not None and self.region != 'us-east-1':
1795
1980
  create_bucket_config['CreateBucketConfiguration'] = {
1796
- 'LocationConstraint': region
1981
+ 'LocationConstraint': self.region
1797
1982
  }
1798
- s3_client.create_bucket(**create_bucket_config)
1983
+ self.client.create_bucket(**create_bucket_config)
1799
1984
  logger.info(
1800
1985
  f' {colorama.Style.DIM}Created S3 bucket {bucket_name!r} in '
1801
- f'{region or "us-east-1"}{colorama.Style.RESET_ALL}')
1986
+ f'{self.region or "us-east-1"}{colorama.Style.RESET_ALL}')
1802
1987
 
1803
1988
  # Add AWS tags configured in config.yaml to the bucket.
1804
1989
  # This is useful for cost tracking and external cleanup.
1805
1990
  bucket_tags = skypilot_config.get_effective_region_config(
1806
- cloud='aws', region=None, keys=('labels',), default_value={})
1991
+ cloud=self.config.cloud_name,
1992
+ region=None,
1993
+ keys=('labels',),
1994
+ default_value={})
1807
1995
  if bucket_tags:
1808
- s3_client.put_bucket_tagging(
1996
+ self.client.put_bucket_tagging(
1809
1997
  Bucket=bucket_name,
1810
1998
  Tagging={
1811
1999
  'TagSet': [{
@@ -1813,17 +2001,38 @@ class S3Store(AbstractStore):
1813
2001
  'Value': v
1814
2002
  } for k, v in bucket_tags.items()]
1815
2003
  })
1816
-
1817
2004
  except aws.botocore_exceptions().ClientError as e:
1818
2005
  with ux_utils.print_exception_no_traceback():
1819
2006
  raise exceptions.StorageBucketCreateError(
1820
2007
  f'Attempted to create a bucket {self.name} but failed.'
1821
2008
  ) from e
1822
- return aws.resource('s3').Bucket(bucket_name)
2009
+ return self.config.resource_factory(bucket_name)
2010
+
2011
+ def _delete_bucket(self, bucket_name: str) -> bool:
2012
+ """Delete bucket using AWS CLI."""
2013
+ cmd_parts = [f'aws s3 rb s3://{bucket_name} --force']
2014
+
2015
+ if self.config.aws_profile:
2016
+ cmd_parts.append(f'--profile={self.config.aws_profile}')
2017
+ if self.config.get_endpoint_url:
2018
+ cmd_parts.append(f'--endpoint-url {self.config.get_endpoint_url()}')
2019
+
2020
+ remove_command = ' '.join(cmd_parts)
2021
+
2022
+ if self.config.credentials_file:
2023
+ remove_command = (f'AWS_SHARED_CREDENTIALS_FILE='
2024
+ f'{self.config.credentials_file} '
2025
+ f'{remove_command}')
1823
2026
 
1824
- def _execute_s3_remove_command(self, command: str, bucket_name: str,
1825
- hint_operating: str,
1826
- hint_failed: str) -> bool:
2027
+ return self._execute_remove_command(
2028
+ remove_command, bucket_name,
2029
+ f'Deleting {self.config.store_type} bucket {bucket_name}',
2030
+ (f'Failed to delete {self.config.store_type} bucket '
2031
+ f'{bucket_name}.'))
2032
+
2033
+ def _execute_remove_command(self, command: str, bucket_name: str,
2034
+ hint_operating: str, hint_failed: str) -> bool:
2035
+ """Execute bucket removal command."""
1827
2036
  try:
1828
2037
  with rich_utils.safe_status(
1829
2038
  ux_utils.spinner_message(hint_operating)):
@@ -1842,47 +2051,42 @@ class S3Store(AbstractStore):
1842
2051
  f'Detailed error: {e.output}')
1843
2052
  return True
1844
2053
 
1845
- def _delete_s3_bucket(self, bucket_name: str) -> bool:
1846
- """Deletes S3 bucket, including all objects in bucket
2054
+ def _delete_sub_path(self) -> None:
2055
+ """Remove objects from the sub path in the bucket."""
2056
+ assert self._bucket_sub_path is not None, 'bucket_sub_path is not set'
2057
+ deleted_by_skypilot = self._delete_bucket_sub_path(
2058
+ self.name, self._bucket_sub_path)
2059
+ provider = self.config.store_type
2060
+ if deleted_by_skypilot:
2061
+ msg_str = (f'Removed objects from {provider} bucket '
2062
+ f'{self.name}/{self._bucket_sub_path}.')
2063
+ else:
2064
+ msg_str = (f'Failed to remove objects from {provider} bucket '
2065
+ f'{self.name}/{self._bucket_sub_path}.')
2066
+ logger.info(f'{colorama.Fore.GREEN}{msg_str}{colorama.Style.RESET_ALL}')
1847
2067
 
1848
- Args:
1849
- bucket_name: str; Name of bucket
2068
+ def _delete_bucket_sub_path(self, bucket_name: str, sub_path: str) -> bool:
2069
+ """Delete objects in the sub path from the bucket."""
2070
+ cmd_parts = [f'aws s3 rm s3://{bucket_name}/{sub_path}/ --recursive']
1850
2071
 
1851
- Returns:
1852
- bool; True if bucket was deleted, False if it was deleted externally.
2072
+ if self.config.aws_profile:
2073
+ cmd_parts.append(f'--profile={self.config.aws_profile}')
2074
+ if self.config.get_endpoint_url:
2075
+ cmd_parts.append(f'--endpoint-url {self.config.get_endpoint_url()}')
1853
2076
 
1854
- Raises:
1855
- StorageBucketDeleteError: If deleting the bucket fails.
1856
- """
1857
- # Deleting objects is very slow programatically
1858
- # (i.e. bucket.objects.all().delete() is slow).
1859
- # In addition, standard delete operations (i.e. via `aws s3 rm`)
1860
- # are slow, since AWS puts deletion markers.
1861
- # https://stackoverflow.com/questions/49239351/why-is-it-so-much-slower-to-delete-objects-in-aws-s3-than-it-is-to-create-them
1862
- # The fastest way to delete is to run `aws s3 rb --force`,
1863
- # which removes the bucket by force.
1864
- remove_command = f'aws s3 rb s3://{bucket_name} --force'
1865
- success = self._execute_s3_remove_command(
1866
- remove_command, bucket_name,
1867
- f'Deleting S3 bucket [green]{bucket_name}[/]',
1868
- f'Failed to delete S3 bucket {bucket_name}.')
1869
- if not success:
1870
- return False
2077
+ remove_command = ' '.join(cmd_parts)
1871
2078
 
1872
- # Wait until bucket deletion propagates on AWS servers
1873
- while data_utils.verify_s3_bucket(bucket_name):
1874
- time.sleep(0.1)
1875
- return True
2079
+ if self.config.credentials_file:
2080
+ remove_command = (f'AWS_SHARED_CREDENTIALS_FILE='
2081
+ f'{self.config.credentials_file} '
2082
+ f'{remove_command}')
1876
2083
 
1877
- def _delete_s3_bucket_sub_path(self, bucket_name: str,
1878
- sub_path: str) -> bool:
1879
- """Deletes the sub path from the bucket."""
1880
- remove_command = f'aws s3 rm s3://{bucket_name}/{sub_path}/ --recursive'
1881
- return self._execute_s3_remove_command(
1882
- remove_command, bucket_name, f'Removing objects from S3 bucket '
1883
- f'[green]{bucket_name}/{sub_path}[/]',
1884
- f'Failed to remove objects from S3 bucket {bucket_name}/{sub_path}.'
1885
- )
2084
+ return self._execute_remove_command(
2085
+ remove_command, bucket_name,
2086
+ (f'Removing objects from {self.config.store_type} bucket '
2087
+ f'{bucket_name}/{sub_path}'),
2088
+ (f'Failed to remove objects from {self.config.store_type} '
2089
+ f'bucket {bucket_name}/{sub_path}.'))
1886
2090
 
1887
2091
 
1888
2092
  class GcsStore(AbstractStore):
@@ -3287,22 +3491,23 @@ class AzureBlobStore(AbstractStore):
3287
3491
  return True
3288
3492
 
3289
3493
 
3290
- class R2Store(AbstractStore):
3291
- """R2Store inherits from S3Store Object and represents the backend
3292
- for R2 buckets.
3293
- """
3294
-
3494
+ class IBMCosStore(AbstractStore):
3495
+ """IBMCosStore inherits from Storage Object and represents the backend
3496
+ for COS buckets.
3497
+ """
3295
3498
  _ACCESS_DENIED_MESSAGE = 'Access Denied'
3296
3499
 
3297
3500
  def __init__(self,
3298
3501
  name: str,
3299
3502
  source: str,
3300
- region: Optional[str] = 'auto',
3503
+ region: Optional[str] = 'us-east',
3301
3504
  is_sky_managed: Optional[bool] = None,
3302
- sync_on_reconstruction: Optional[bool] = True,
3505
+ sync_on_reconstruction: bool = True,
3303
3506
  _bucket_sub_path: Optional[str] = None):
3304
- self.client: 'mypy_boto3_s3.Client'
3507
+ self.client: 'storage.Client'
3305
3508
  self.bucket: 'StorageHandle'
3509
+ self.rclone_profile_name = (
3510
+ data_utils.Rclone.RcloneStores.IBM.get_profile_name(self.name))
3306
3511
  super().__init__(name, source, region, is_sky_managed,
3307
3512
  sync_on_reconstruction, _bucket_sub_path)
3308
3513
 
@@ -3336,6 +3541,9 @@ class R2Store(AbstractStore):
3336
3541
  assert self.name == data_utils.split_r2_path(self.source)[0], (
3337
3542
  'R2 Bucket is specified as path, the name should be '
3338
3543
  'the same as R2 bucket.')
3544
+ assert data_utils.verify_r2_bucket(self.name), (
3545
+ f'Source specified as {self.source}, a R2 bucket. ',
3546
+ 'R2 Bucket should exist.')
3339
3547
  elif self.source.startswith('nebius://'):
3340
3548
  assert self.name == data_utils.split_nebius_path(
3341
3549
  self.source)[0], (
@@ -3347,29 +3555,59 @@ class R2Store(AbstractStore):
3347
3555
  f'exist.')
3348
3556
  elif self.source.startswith('cos://'):
3349
3557
  assert self.name == data_utils.split_cos_path(self.source)[0], (
3350
- 'IBM COS Bucket is specified as path, the name should be '
3558
+ 'COS Bucket is specified as path, the name should be '
3351
3559
  'the same as COS bucket.')
3352
- assert data_utils.verify_ibm_cos_bucket(self.name), (
3353
- f'Source specified as {self.source}, a COS bucket. ',
3354
- 'COS Bucket should exist.')
3355
- elif self.source.startswith('oci://'):
3356
- raise NotImplementedError(
3357
- 'Moving data from OCI to R2 is currently not supported.')
3358
-
3359
3560
  # Validate name
3360
- self.name = S3Store.validate_name(self.name)
3361
- # Check if the storage is enabled
3362
- if not _is_storage_cloud_enabled(cloudflare.NAME):
3561
+ self.name = IBMCosStore.validate_name(self.name)
3562
+
3563
+ @classmethod
3564
+ def validate_name(cls, name: str) -> str:
3565
+ """Validates the name of a COS bucket.
3566
+
3567
+ Rules source: https://ibm.github.io/ibm-cos-sdk-java/com/ibm/cloud/objectstorage/services/s3/model/Bucket.html # pylint: disable=line-too-long
3568
+ """
3569
+
3570
+ def _raise_no_traceback_name_error(err_str):
3363
3571
  with ux_utils.print_exception_no_traceback():
3364
- raise exceptions.ResourcesUnavailableError(
3365
- 'Storage \'store: r2\' specified, but ' \
3366
- 'Cloudflare R2 access is disabled. To fix, '\
3367
- 'enable Cloudflare R2 by running `sky check`. '\
3368
- 'More info: https://docs.skypilot.co/en/latest/getting-started/installation.html.' # pylint: disable=line-too-long
3369
- )
3572
+ raise exceptions.StorageNameError(err_str)
3573
+
3574
+ if name is not None and isinstance(name, str):
3575
+ if not 3 <= len(name) <= 63:
3576
+ _raise_no_traceback_name_error(
3577
+ f'Invalid store name: {name} must be between 3 (min) '
3578
+ 'and 63 (max) characters long.')
3579
+
3580
+ # Check for valid characters and start/end with a letter or number
3581
+ pattern = r'^[a-z0-9][-a-z0-9.]*[a-z0-9]$'
3582
+ if not re.match(pattern, name):
3583
+ _raise_no_traceback_name_error(
3584
+ f'Invalid store name: {name} can consist only of '
3585
+ 'lowercase letters, numbers, dots (.), and dashes (-). '
3586
+ 'It must begin and end with a letter or number.')
3587
+
3588
+ # Check for two adjacent periods or dashes
3589
+ if any(substring in name for substring in ['..', '--']):
3590
+ _raise_no_traceback_name_error(
3591
+ f'Invalid store name: {name} must not contain '
3592
+ 'two adjacent periods/dashes')
3593
+
3594
+ # Check for IP address format
3595
+ ip_pattern = r'^(?:\d{1,3}\.){3}\d{1,3}$'
3596
+ if re.match(ip_pattern, name):
3597
+ _raise_no_traceback_name_error(
3598
+ f'Invalid store name: {name} must not be formatted as '
3599
+ 'an IP address (for example, 192.168.5.4).')
3600
+
3601
+ if any(substring in name for substring in ['.-', '-.']):
3602
+ _raise_no_traceback_name_error(
3603
+ f'Invalid store name: {name} must '
3604
+ 'not allow substrings: ".-", "-." .')
3605
+ else:
3606
+ _raise_no_traceback_name_error('Store name must be specified.')
3607
+ return name
3370
3608
 
3371
3609
  def initialize(self):
3372
- """Initializes the R2 store object on the cloud.
3610
+ """Initializes the cos store object on the cloud.
3373
3611
 
3374
3612
  Initialization involves fetching bucket if exists, or creating it if
3375
3613
  it does not.
@@ -3379,7 +3617,8 @@ class R2Store(AbstractStore):
3379
3617
  StorageBucketGetError: If fetching existing bucket fails
3380
3618
  StorageInitError: If general initialization fails.
3381
3619
  """
3382
- self.client = data_utils.create_r2_client(self.region)
3620
+ self.client = ibm.get_cos_client(self.region)
3621
+ self.s3_resource = ibm.get_cos_resource(self.region)
3383
3622
  self.bucket, is_new_bucket = self._get_bucket()
3384
3623
  if self.is_sky_managed is None:
3385
3624
  # If is_sky_managed is not specified, then this is a new storage
@@ -3389,7 +3628,7 @@ class R2Store(AbstractStore):
3389
3628
  self.is_sky_managed = is_new_bucket
3390
3629
 
3391
3630
  def upload(self):
3392
- """Uploads source to store bucket.
3631
+ """Uploads files from local machine to bucket.
3393
3632
 
3394
3633
  Upload must be called by the Storage handler - it is not called on
3395
3634
  Store initialization.
@@ -3399,22 +3638,26 @@ class R2Store(AbstractStore):
3399
3638
  """
3400
3639
  try:
3401
3640
  if isinstance(self.source, list):
3402
- self.batch_aws_rsync(self.source, create_dirs=True)
3641
+ self.batch_ibm_rsync(self.source, create_dirs=True)
3403
3642
  elif self.source is not None:
3404
- if self.source.startswith('s3://'):
3405
- self._transfer_to_r2()
3406
- elif self.source.startswith('gs://'):
3407
- self._transfer_to_r2()
3408
- elif self.source.startswith('r2://'):
3643
+ if self.source.startswith('cos://'):
3644
+ # cos bucket used as a dest, can't be used as source.
3409
3645
  pass
3410
- elif self.source.startswith('oci://'):
3411
- self._transfer_to_r2()
3646
+ elif self.source.startswith('s3://'):
3647
+ raise Exception('IBM COS currently not supporting'
3648
+ 'data transfers between COS and S3')
3412
3649
  elif self.source.startswith('nebius://'):
3413
- self._transfer_to_r2()
3650
+ raise Exception('IBM COS currently not supporting'
3651
+ 'data transfers between COS and Nebius')
3652
+ elif self.source.startswith('gs://'):
3653
+ raise Exception('IBM COS currently not supporting'
3654
+ 'data transfers between COS and GS')
3655
+ elif self.source.startswith('r2://'):
3656
+ raise Exception('IBM COS currently not supporting'
3657
+ 'data transfers between COS and r2')
3414
3658
  else:
3415
- self.batch_aws_rsync([self.source])
3416
- except exceptions.StorageUploadError:
3417
- raise
3659
+ self.batch_ibm_rsync([self.source])
3660
+
3418
3661
  except Exception as e:
3419
3662
  raise exceptions.StorageUploadError(
3420
3663
  f'Upload failed for store {self.name}') from e
@@ -3423,41 +3666,28 @@ class R2Store(AbstractStore):
3423
3666
  if self._bucket_sub_path is not None and not self.is_sky_managed:
3424
3667
  return self._delete_sub_path()
3425
3668
 
3426
- deleted_by_skypilot = self._delete_r2_bucket(self.name)
3427
- if deleted_by_skypilot:
3428
- msg_str = f'Deleted R2 bucket {self.name}.'
3429
- else:
3430
- msg_str = f'R2 bucket {self.name} may have been deleted ' \
3431
- f'externally. Removing from local state.'
3432
- logger.info(f'{colorama.Fore.GREEN}{msg_str}'
3669
+ self._delete_cos_bucket()
3670
+ logger.info(f'{colorama.Fore.GREEN}Deleted COS bucket {self.name}.'
3433
3671
  f'{colorama.Style.RESET_ALL}')
3434
3672
 
3435
3673
  def _delete_sub_path(self) -> None:
3436
3674
  assert self._bucket_sub_path is not None, 'bucket_sub_path is not set'
3437
- deleted_by_skypilot = self._delete_r2_bucket_sub_path(
3438
- self.name, self._bucket_sub_path)
3439
- if deleted_by_skypilot:
3440
- msg_str = f'Removed objects from R2 bucket ' \
3441
- f'{self.name}/{self._bucket_sub_path}.'
3442
- else:
3443
- msg_str = f'Failed to remove objects from R2 bucket ' \
3444
- f'{self.name}/{self._bucket_sub_path}.'
3445
- logger.info(f'{colorama.Fore.GREEN}{msg_str}'
3446
- f'{colorama.Style.RESET_ALL}')
3675
+ bucket = self.s3_resource.Bucket(self.name)
3676
+ try:
3677
+ self._delete_cos_bucket_objects(bucket, self._bucket_sub_path + '/')
3678
+ except ibm.ibm_botocore.exceptions.ClientError as e:
3679
+ if e.__class__.__name__ == 'NoSuchBucket':
3680
+ logger.debug('bucket already removed')
3447
3681
 
3448
3682
  def get_handle(self) -> StorageHandle:
3449
- return cloudflare.resource('s3').Bucket(self.name)
3683
+ return self.s3_resource.Bucket(self.name)
3450
3684
 
3451
- def batch_aws_rsync(self,
3685
+ def batch_ibm_rsync(self,
3452
3686
  source_path_list: List[Path],
3453
3687
  create_dirs: bool = False) -> None:
3454
- """Invokes aws s3 sync to batch upload a list of local paths to R2
3455
-
3456
- AWS Sync by default uses 10 threads to upload files to the bucket. To
3457
- increase parallelism, modify max_concurrent_requests in your aws config
3458
- file (Default path: ~/.aws/config).
3688
+ """Invokes rclone copy to batch upload a list of local paths to cos
3459
3689
 
3460
- Since aws s3 sync does not support batch operations, we construct
3690
+ Since rclone does not support batch operations, we construct
3461
3691
  multiple commands to be run in parallel.
3462
3692
 
3463
3693
  Args:
@@ -3471,49 +3701,58 @@ class R2Store(AbstractStore):
3471
3701
  sub_path = (f'/{self._bucket_sub_path}'
3472
3702
  if self._bucket_sub_path else '')
3473
3703
 
3474
- def get_file_sync_command(base_dir_path, file_names):
3704
+ def get_dir_sync_command(src_dir_path, dest_dir_name) -> str:
3705
+ """returns an rclone command that copies a complete folder
3706
+ from 'src_dir_path' to bucket/'dest_dir_name'.
3707
+
3708
+ `rclone copy` copies files from source path to target.
3709
+ files with identical names at won't be copied over, unless
3710
+ their modification date is more recent.
3711
+ works similarly to `aws sync` (without --delete).
3712
+
3713
+ Args:
3714
+ src_dir_path (str): local source path from which to copy files.
3715
+ dest_dir_name (str): remote target path files are copied to.
3716
+
3717
+ Returns:
3718
+ str: bash command using rclone to sync files. Executed remotely.
3719
+ """
3720
+
3721
+ # .git directory is excluded from the sync
3722
+ # wrapping src_dir_path with "" to support path with spaces
3723
+ src_dir_path = shlex.quote(src_dir_path)
3724
+ sync_command = ('rclone copy --exclude ".git/*" '
3725
+ f'{src_dir_path} '
3726
+ f'{self.rclone_profile_name}:{self.name}{sub_path}'
3727
+ f'/{dest_dir_name}')
3728
+ return sync_command
3729
+
3730
+ def get_file_sync_command(base_dir_path, file_names) -> str:
3731
+ """returns an rclone command that copies files: 'file_names'
3732
+ from base directory: `base_dir_path` to bucket.
3733
+
3734
+ `rclone copy` copies files from source path to target.
3735
+ files with identical names at won't be copied over, unless
3736
+ their modification date is more recent.
3737
+ works similarly to `aws sync` (without --delete).
3738
+
3739
+ Args:
3740
+ base_dir_path (str): local path from which to copy files.
3741
+ file_names (List): specific file names to copy.
3742
+
3743
+ Returns:
3744
+ str: bash command using rclone to sync files
3745
+ """
3746
+
3747
+ # wrapping file_name with "" to support spaces
3475
3748
  includes = ' '.join([
3476
3749
  f'--include {shlex.quote(file_name)}'
3477
3750
  for file_name in file_names
3478
3751
  ])
3479
- endpoint_url = cloudflare.create_endpoint()
3480
3752
  base_dir_path = shlex.quote(base_dir_path)
3481
- sync_command = (
3482
- 'AWS_SHARED_CREDENTIALS_FILE='
3483
- f'{cloudflare.R2_CREDENTIALS_PATH} '
3484
- 'aws s3 sync --no-follow-symlinks --exclude="*" '
3485
- f'{includes} {base_dir_path} '
3486
- f's3://{self.name}{sub_path} '
3487
- f'--endpoint {endpoint_url} '
3488
- # R2 does not support CRC64-NVME
3489
- # which is the default for aws s3 sync
3490
- # https://community.cloudflare.com/t/an-error-occurred-internalerror-when-calling-the-putobject-operation/764905/13
3491
- f'--checksum-algorithm CRC32 '
3492
- f'--profile={cloudflare.R2_PROFILE_NAME}')
3493
- return sync_command
3494
-
3495
- def get_dir_sync_command(src_dir_path, dest_dir_name):
3496
- # we exclude .git directory from the sync
3497
- excluded_list = storage_utils.get_excluded_files(src_dir_path)
3498
- excluded_list.append('.git/*')
3499
- excludes = ' '.join([
3500
- f'--exclude {shlex.quote(file_name)}'
3501
- for file_name in excluded_list
3502
- ])
3503
- endpoint_url = cloudflare.create_endpoint()
3504
- src_dir_path = shlex.quote(src_dir_path)
3505
- sync_command = (
3506
- 'AWS_SHARED_CREDENTIALS_FILE='
3507
- f'{cloudflare.R2_CREDENTIALS_PATH} '
3508
- f'aws s3 sync --no-follow-symlinks {excludes} '
3509
- f'{src_dir_path} '
3510
- f's3://{self.name}{sub_path}/{dest_dir_name} '
3511
- f'--endpoint {endpoint_url} '
3512
- # R2 does not support CRC64-NVME
3513
- # which is the default for aws s3 sync
3514
- # https://community.cloudflare.com/t/an-error-occurred-internalerror-when-calling-the-putobject-operation/764905/13
3515
- f'--checksum-algorithm CRC32 '
3516
- f'--profile={cloudflare.R2_PROFILE_NAME}')
3753
+ sync_command = ('rclone copy '
3754
+ f'{includes} {base_dir_path} '
3755
+ f'{self.rclone_profile_name}:{self.name}{sub_path}')
3517
3756
  return sync_command
3518
3757
 
3519
3758
  # Generate message for upload
@@ -3524,7 +3763,8 @@ class R2Store(AbstractStore):
3524
3763
 
3525
3764
  log_path = sky_logging.generate_tmp_logging_file_path(
3526
3765
  _STORAGE_LOG_FILE_NAME)
3527
- sync_path = f'{source_message} -> r2://{self.name}{sub_path}/'
3766
+ sync_path = (
3767
+ f'{source_message} -> cos://{self.region}/{self.name}{sub_path}/')
3528
3768
  with rich_utils.safe_status(
3529
3769
  ux_utils.spinner_message(f'Syncing {sync_path}',
3530
3770
  log_path=log_path)):
@@ -3541,1236 +3781,306 @@ class R2Store(AbstractStore):
3541
3781
  ux_utils.finishing_message(f'Storage synced: {sync_path}',
3542
3782
  log_path))
3543
3783
 
3544
- def _transfer_to_r2(self) -> None:
3545
- assert isinstance(self.source, str), self.source
3546
- if self.source.startswith('gs://'):
3547
- data_transfer.gcs_to_r2(self.name, self.name)
3548
- elif self.source.startswith('s3://'):
3549
- data_transfer.s3_to_r2(self.name, self.name)
3550
- elif self.source.startswith('nebius://'):
3551
- data_transfer.s3_to_r2(self.name, self.name)
3552
-
3553
3784
  def _get_bucket(self) -> Tuple[StorageHandle, bool]:
3554
- """Obtains the R2 bucket.
3785
+ """returns IBM COS bucket object if exists, otherwise creates it.
3555
3786
 
3556
- If the bucket exists, this method will return the bucket.
3557
- If the bucket does not exist, there are three cases:
3558
- 1) Raise an error if the bucket source starts with s3://
3559
- 2) Return None if bucket has been externally deleted and
3560
- sync_on_reconstruction is False
3561
- 3) Create and return a new bucket otherwise
3787
+ Returns:
3788
+ StorageHandle(str): bucket name
3789
+ bool: indicates whether a new bucket was created.
3562
3790
 
3563
3791
  Raises:
3564
3792
  StorageSpecError: If externally created bucket is attempted to be
3565
3793
  mounted without specifying storage source.
3566
- StorageBucketCreateError: If creating the bucket fails
3794
+ StorageBucketCreateError: If bucket creation fails.
3567
3795
  StorageBucketGetError: If fetching a bucket fails
3568
3796
  StorageExternalDeletionError: If externally deleted storage is
3569
3797
  attempted to be fetched while reconstructing the storage for
3570
3798
  'sky storage delete' or 'sky start'
3571
3799
  """
3572
- r2 = cloudflare.resource('s3')
3573
- bucket = r2.Bucket(self.name)
3574
- endpoint_url = cloudflare.create_endpoint()
3800
+
3801
+ bucket_profile_name = (data_utils.Rclone.RcloneStores.IBM.value +
3802
+ self.name)
3575
3803
  try:
3576
- # Try Public bucket case.
3577
- # This line does not error out if the bucket is an external public
3578
- # bucket or if it is a user's bucket that is publicly
3579
- # accessible.
3580
- self.client.head_bucket(Bucket=self.name)
3581
- self._validate_existing_bucket()
3582
- return bucket, False
3583
- except aws.botocore_exceptions().ClientError as e:
3584
- error_code = e.response['Error']['Code']
3585
- # AccessDenied error for buckets that are private and not owned by
3586
- # user.
3587
- if error_code == '403':
3588
- command = ('AWS_SHARED_CREDENTIALS_FILE='
3589
- f'{cloudflare.R2_CREDENTIALS_PATH} '
3590
- f'aws s3 ls s3://{self.name} '
3591
- f'--endpoint {endpoint_url} '
3592
- f'--profile={cloudflare.R2_PROFILE_NAME}')
3593
- with ux_utils.print_exception_no_traceback():
3594
- raise exceptions.StorageBucketGetError(
3595
- _BUCKET_FAIL_TO_CONNECT_MESSAGE.format(name=self.name) +
3596
- f' To debug, consider running `{command}`.') from e
3804
+ bucket_region = data_utils.get_ibm_cos_bucket_region(self.name)
3805
+ except exceptions.StorageBucketGetError as e:
3806
+ with ux_utils.print_exception_no_traceback():
3807
+ command = f'rclone lsd {bucket_profile_name}: '
3808
+ raise exceptions.StorageBucketGetError(
3809
+ _BUCKET_FAIL_TO_CONNECT_MESSAGE.format(name=self.name) +
3810
+ f' To debug, consider running `{command}`.') from e
3811
+
3812
+ try:
3813
+ uri_region = data_utils.split_cos_path(
3814
+ self.source)[2] # type: ignore
3815
+ except ValueError:
3816
+ # source isn't a cos uri
3817
+ uri_region = ''
3818
+
3819
+ # bucket's region doesn't match specified region in URI
3820
+ if bucket_region and uri_region and uri_region != bucket_region\
3821
+ and self.sync_on_reconstruction:
3822
+ with ux_utils.print_exception_no_traceback():
3823
+ raise exceptions.StorageBucketGetError(
3824
+ f'Bucket {self.name} exists in '
3825
+ f'region {bucket_region}, '
3826
+ f'but URI specified region {uri_region}.')
3597
3827
 
3598
- if isinstance(self.source, str) and self.source.startswith('r2://'):
3828
+ if not bucket_region and uri_region:
3829
+ # bucket doesn't exist but source is a bucket URI
3599
3830
  with ux_utils.print_exception_no_traceback():
3600
3831
  raise exceptions.StorageBucketGetError(
3601
3832
  'Attempted to use a non-existent bucket as a source: '
3602
- f'{self.source}. Consider using '
3603
- '`AWS_SHARED_CREDENTIALS_FILE='
3604
- f'{cloudflare.R2_CREDENTIALS_PATH} aws s3 ls '
3605
- f's3://{self.name} '
3606
- f'--endpoint {endpoint_url} '
3607
- f'--profile={cloudflare.R2_PROFILE_NAME}\' '
3608
- 'to debug.')
3609
-
3610
- # If bucket cannot be found in both private and public settings,
3611
- # the bucket is to be created by Sky. However, creation is skipped if
3612
- # Store object is being reconstructed for deletion or re-mount with
3613
- # sky start, and error is raised instead.
3614
- if self.sync_on_reconstruction:
3615
- bucket = self._create_r2_bucket(self.name)
3616
- return bucket, True
3617
- else:
3833
+ f'{self.name} by providing URI. Consider using '
3834
+ '`rclone lsd <remote>` on relevant remotes returned '
3835
+ 'via `rclone listremotes` to debug.')
3836
+
3837
+ data_utils.Rclone.store_rclone_config(
3838
+ self.name,
3839
+ data_utils.Rclone.RcloneStores.IBM,
3840
+ self.region, # type: ignore
3841
+ )
3842
+
3843
+ if not bucket_region and self.sync_on_reconstruction:
3844
+ # bucket doesn't exist
3845
+ return self._create_cos_bucket(self.name, self.region), True
3846
+ elif not bucket_region and not self.sync_on_reconstruction:
3618
3847
  # Raised when Storage object is reconstructed for sky storage
3619
3848
  # delete or to re-mount Storages with sky start but the storage
3620
3849
  # is already removed externally.
3621
3850
  raise exceptions.StorageExternalDeletionError(
3622
3851
  'Attempted to fetch a non-existent bucket: '
3623
3852
  f'{self.name}')
3624
-
3853
+ else:
3854
+ # bucket exists
3855
+ bucket = self.s3_resource.Bucket(self.name)
3856
+ self._validate_existing_bucket()
3857
+ return bucket, False
3858
+
3625
3859
  def _download_file(self, remote_path: str, local_path: str) -> None:
3626
- """Downloads file from remote to local on r2 bucket
3860
+ """Downloads file from remote to local on s3 bucket
3627
3861
  using the boto3 API
3628
3862
 
3629
3863
  Args:
3630
- remote_path: str; Remote path on R2 bucket
3864
+ remote_path: str; Remote path on S3 bucket
3631
3865
  local_path: str; Local path on user's device
3632
3866
  """
3633
- self.bucket.download_file(remote_path, local_path)
3867
+ self.client.download_file(self.name, local_path, remote_path)
3634
3868
 
3635
3869
  def mount_command(self, mount_path: str) -> str:
3636
3870
  """Returns the command to mount the bucket to the mount_path.
3637
3871
 
3638
- Uses goofys to mount the bucket.
3872
+ Uses rclone to mount the bucket.
3873
+ Source: https://github.com/rclone/rclone
3639
3874
 
3640
3875
  Args:
3641
3876
  mount_path: str; Path to mount the bucket to.
3642
3877
  """
3643
- install_cmd = mounting_utils.get_s3_mount_install_cmd()
3644
- endpoint_url = cloudflare.create_endpoint()
3645
- r2_credential_path = cloudflare.R2_CREDENTIALS_PATH
3646
- r2_profile_name = cloudflare.R2_PROFILE_NAME
3647
- mount_cmd = mounting_utils.get_r2_mount_cmd(
3648
- r2_credential_path, r2_profile_name, endpoint_url, self.bucket.name,
3649
- mount_path, self._bucket_sub_path)
3650
- return mounting_utils.get_mounting_command(mount_path, install_cmd,
3651
- mount_cmd)
3652
-
3653
- def mount_cached_command(self, mount_path: str) -> str:
3878
+ # install rclone if not installed.
3654
3879
  install_cmd = mounting_utils.get_rclone_install_cmd()
3655
- rclone_profile_name = (
3656
- data_utils.Rclone.RcloneStores.R2.get_profile_name(self.name))
3657
- rclone_config = data_utils.Rclone.RcloneStores.R2.get_config(
3658
- rclone_profile_name=rclone_profile_name)
3659
- mount_cached_cmd = mounting_utils.get_mount_cached_cmd(
3660
- rclone_config, rclone_profile_name, self.bucket.name, mount_path)
3880
+ rclone_config = data_utils.Rclone.RcloneStores.IBM.get_config(
3881
+ rclone_profile_name=self.rclone_profile_name,
3882
+ region=self.region) # type: ignore
3883
+ mount_cmd = (
3884
+ mounting_utils.get_cos_mount_cmd(
3885
+ rclone_config,
3886
+ self.rclone_profile_name,
3887
+ self.bucket.name,
3888
+ mount_path,
3889
+ self._bucket_sub_path, # type: ignore
3890
+ ))
3661
3891
  return mounting_utils.get_mounting_command(mount_path, install_cmd,
3662
- mount_cached_cmd)
3892
+ mount_cmd)
3663
3893
 
3664
- def _create_r2_bucket(self,
3665
- bucket_name: str,
3666
- region='auto') -> StorageHandle:
3667
- """Creates R2 bucket with specific name in specific region
3894
+ def _create_cos_bucket(self,
3895
+ bucket_name: str,
3896
+ region='us-east') -> StorageHandle:
3897
+ """Creates IBM COS bucket with specific name in specific region
3668
3898
 
3669
3899
  Args:
3670
3900
  bucket_name: str; Name of bucket
3671
- region: str; Region name, r2 automatically sets region
3901
+ region: str; Region name, e.g. us-east, us-south
3672
3902
  Raises:
3673
3903
  StorageBucketCreateError: If bucket creation fails.
3674
3904
  """
3675
- r2_client = self.client
3676
3905
  try:
3677
- if region is None:
3678
- r2_client.create_bucket(Bucket=bucket_name)
3679
- else:
3680
- location = {'LocationConstraint': region}
3681
- r2_client.create_bucket(Bucket=bucket_name,
3682
- CreateBucketConfiguration=location)
3683
- logger.info(f' {colorama.Style.DIM}Created R2 bucket '
3684
- f'{bucket_name!r} in {region}'
3685
- f'{colorama.Style.RESET_ALL}')
3686
- except aws.botocore_exceptions().ClientError as e:
3906
+ self.client.create_bucket(
3907
+ Bucket=bucket_name,
3908
+ CreateBucketConfiguration={
3909
+ 'LocationConstraint': f'{region}-smart'
3910
+ })
3911
+ logger.info(f' {colorama.Style.DIM}Created IBM COS bucket '
3912
+ f'{bucket_name!r} in {region} '
3913
+ 'with storage class smart tier'
3914
+ f'{colorama.Style.RESET_ALL}')
3915
+ self.bucket = self.s3_resource.Bucket(bucket_name)
3916
+
3917
+ except ibm.ibm_botocore.exceptions.ClientError as e: # type: ignore[union-attr] # pylint: disable=line-too-long
3687
3918
  with ux_utils.print_exception_no_traceback():
3688
3919
  raise exceptions.StorageBucketCreateError(
3689
- f'Attempted to create a bucket '
3690
- f'{self.name} but failed.') from e
3691
- return cloudflare.resource('s3').Bucket(bucket_name)
3692
-
3693
- def _execute_r2_remove_command(self, command: str, bucket_name: str,
3694
- hint_operating: str,
3695
- hint_failed: str) -> bool:
3696
- try:
3697
- with rich_utils.safe_status(
3698
- ux_utils.spinner_message(hint_operating)):
3699
- subprocess.check_output(command.split(' '),
3700
- stderr=subprocess.STDOUT,
3701
- shell=True)
3702
- except subprocess.CalledProcessError as e:
3703
- if 'NoSuchBucket' in e.output.decode('utf-8'):
3704
- logger.debug(
3705
- _BUCKET_EXTERNALLY_DELETED_DEBUG_MESSAGE.format(
3706
- bucket_name=bucket_name))
3707
- return False
3708
- else:
3709
- with ux_utils.print_exception_no_traceback():
3710
- raise exceptions.StorageBucketDeleteError(
3711
- f'{hint_failed}'
3712
- f'Detailed error: {e.output}')
3713
- return True
3714
-
3715
- def _delete_r2_bucket_sub_path(self, bucket_name: str,
3716
- sub_path: str) -> bool:
3717
- """Deletes the sub path from the bucket."""
3718
- endpoint_url = cloudflare.create_endpoint()
3719
- remove_command = (
3720
- f'AWS_SHARED_CREDENTIALS_FILE={cloudflare.R2_CREDENTIALS_PATH} '
3721
- f'aws s3 rm s3://{bucket_name}/{sub_path}/ --recursive '
3722
- f'--endpoint {endpoint_url} '
3723
- f'--profile={cloudflare.R2_PROFILE_NAME}')
3724
- return self._execute_r2_remove_command(
3725
- remove_command, bucket_name,
3726
- f'Removing objects from R2 bucket {bucket_name}/{sub_path}',
3727
- f'Failed to remove objects from R2 bucket {bucket_name}/{sub_path}.'
3728
- )
3729
-
3730
- def _delete_r2_bucket(self, bucket_name: str) -> bool:
3731
- """Deletes R2 bucket, including all objects in bucket
3920
+ f'Failed to create bucket: '
3921
+ f'{bucket_name}') from e
3732
3922
 
3733
- Args:
3734
- bucket_name: str; Name of bucket
3923
+ s3_bucket_exists_waiter = self.client.get_waiter('bucket_exists')
3924
+ s3_bucket_exists_waiter.wait(Bucket=bucket_name)
3735
3925
 
3736
- Returns:
3737
- bool; True if bucket was deleted, False if it was deleted externally.
3926
+ return self.bucket
3738
3927
 
3739
- Raises:
3740
- StorageBucketDeleteError: If deleting the bucket fails.
3741
- """
3742
- # Deleting objects is very slow programmatically
3743
- # (i.e. bucket.objects.all().delete() is slow).
3744
- # In addition, standard delete operations (i.e. via `aws s3 rm`)
3745
- # are slow, since AWS puts deletion markers.
3746
- # https://stackoverflow.com/questions/49239351/why-is-it-so-much-slower-to-delete-objects-in-aws-s3-than-it-is-to-create-them
3747
- # The fastest way to delete is to run `aws s3 rb --force`,
3748
- # which removes the bucket by force.
3749
- endpoint_url = cloudflare.create_endpoint()
3750
- remove_command = (
3751
- f'AWS_SHARED_CREDENTIALS_FILE={cloudflare.R2_CREDENTIALS_PATH} '
3752
- f'aws s3 rb s3://{bucket_name} --force '
3753
- f'--endpoint {endpoint_url} '
3754
- f'--profile={cloudflare.R2_PROFILE_NAME}')
3755
-
3756
- success = self._execute_r2_remove_command(
3757
- remove_command, bucket_name, f'Deleting R2 bucket {bucket_name}',
3758
- f'Failed to delete R2 bucket {bucket_name}.')
3759
- if not success:
3760
- return False
3928
+ def _delete_cos_bucket_objects(self,
3929
+ bucket: Any,
3930
+ prefix: Optional[str] = None) -> None:
3931
+ bucket_versioning = self.s3_resource.BucketVersioning(bucket.name)
3932
+ if bucket_versioning.status == 'Enabled':
3933
+ if prefix is not None:
3934
+ res = list(
3935
+ bucket.object_versions.filter(Prefix=prefix).delete())
3936
+ else:
3937
+ res = list(bucket.object_versions.delete())
3938
+ else:
3939
+ if prefix is not None:
3940
+ res = list(bucket.objects.filter(Prefix=prefix).delete())
3941
+ else:
3942
+ res = list(bucket.objects.delete())
3943
+ logger.debug(f'Deleted bucket\'s content:\n{res}, prefix: {prefix}')
3761
3944
 
3762
- # Wait until bucket deletion propagates on AWS servers
3763
- while data_utils.verify_r2_bucket(bucket_name):
3764
- time.sleep(0.1)
3765
- return True
3945
+ def _delete_cos_bucket(self) -> None:
3946
+ bucket = self.s3_resource.Bucket(self.name)
3947
+ try:
3948
+ self._delete_cos_bucket_objects(bucket)
3949
+ bucket.delete()
3950
+ bucket.wait_until_not_exists()
3951
+ except ibm.ibm_botocore.exceptions.ClientError as e:
3952
+ if e.__class__.__name__ == 'NoSuchBucket':
3953
+ logger.debug('bucket already removed')
3954
+ data_utils.Rclone.delete_rclone_bucket_profile(
3955
+ self.name, data_utils.Rclone.RcloneStores.IBM)
3766
3956
 
3767
3957
 
3768
- class IBMCosStore(AbstractStore):
3769
- """IBMCosStore inherits from Storage Object and represents the backend
3770
- for COS buckets.
3958
+ class OciStore(AbstractStore):
3959
+ """OciStore inherits from Storage Object and represents the backend
3960
+ for OCI buckets.
3771
3961
  """
3772
- _ACCESS_DENIED_MESSAGE = 'Access Denied'
3962
+
3963
+ _ACCESS_DENIED_MESSAGE = 'AccessDeniedException'
3773
3964
 
3774
3965
  def __init__(self,
3775
3966
  name: str,
3776
- source: str,
3777
- region: Optional[str] = 'us-east',
3967
+ source: Optional[SourceType],
3968
+ region: Optional[str] = None,
3778
3969
  is_sky_managed: Optional[bool] = None,
3779
- sync_on_reconstruction: bool = True,
3970
+ sync_on_reconstruction: Optional[bool] = True,
3780
3971
  _bucket_sub_path: Optional[str] = None):
3781
- self.client: 'storage.Client'
3782
- self.bucket: 'StorageHandle'
3783
- self.rclone_profile_name = (
3784
- data_utils.Rclone.RcloneStores.IBM.get_profile_name(self.name))
3785
- super().__init__(name, source, region, is_sky_managed,
3786
- sync_on_reconstruction, _bucket_sub_path)
3787
-
3788
- def _validate(self):
3789
- if self.source is not None and isinstance(self.source, str):
3790
- if self.source.startswith('s3://'):
3791
- assert self.name == data_utils.split_s3_path(self.source)[0], (
3792
- 'S3 Bucket is specified as path, the name should be the'
3793
- ' same as S3 bucket.')
3794
- assert data_utils.verify_s3_bucket(self.name), (
3795
- f'Source specified as {self.source}, a S3 bucket. ',
3796
- 'S3 Bucket should exist.')
3797
- elif self.source.startswith('gs://'):
3798
- assert self.name == data_utils.split_gcs_path(self.source)[0], (
3799
- 'GCS Bucket is specified as path, the name should be '
3800
- 'the same as GCS bucket.')
3801
- assert data_utils.verify_gcs_bucket(self.name), (
3802
- f'Source specified as {self.source}, a GCS bucket. ',
3803
- 'GCS Bucket should exist.')
3804
- elif data_utils.is_az_container_endpoint(self.source):
3805
- storage_account_name, container_name, _ = (
3806
- data_utils.split_az_path(self.source))
3807
- assert self.name == container_name, (
3808
- 'Azure bucket is specified as path, the name should be '
3809
- 'the same as Azure bucket.')
3810
- assert data_utils.verify_az_bucket(
3811
- storage_account_name, self.name), (
3812
- f'Source specified as {self.source}, an Azure bucket. '
3813
- 'Azure bucket should exist.')
3814
- elif self.source.startswith('r2://'):
3815
- assert self.name == data_utils.split_r2_path(self.source)[0], (
3816
- 'R2 Bucket is specified as path, the name should be '
3817
- 'the same as R2 bucket.')
3818
- assert data_utils.verify_r2_bucket(self.name), (
3819
- f'Source specified as {self.source}, a R2 bucket. ',
3820
- 'R2 Bucket should exist.')
3821
- elif self.source.startswith('nebius://'):
3822
- assert self.name == data_utils.split_nebius_path(
3823
- self.source)[0], (
3824
- 'Nebius Object Storage is specified as path, the name '
3825
- 'should be the same as Nebius Object Storage bucket.')
3826
- assert data_utils.verify_nebius_bucket(self.name), (
3827
- f'Source specified as {self.source}, a Nebius Object '
3828
- f'Storage bucket. Nebius Object Storage Bucket should '
3829
- f'exist.')
3830
- elif self.source.startswith('cos://'):
3831
- assert self.name == data_utils.split_cos_path(self.source)[0], (
3832
- 'COS Bucket is specified as path, the name should be '
3833
- 'the same as COS bucket.')
3834
- # Validate name
3835
- self.name = IBMCosStore.validate_name(self.name)
3836
-
3837
- @classmethod
3838
- def validate_name(cls, name: str) -> str:
3839
- """Validates the name of a COS bucket.
3840
-
3841
- Rules source: https://ibm.github.io/ibm-cos-sdk-java/com/ibm/cloud/objectstorage/services/s3/model/Bucket.html # pylint: disable=line-too-long
3842
- """
3843
-
3844
- def _raise_no_traceback_name_error(err_str):
3845
- with ux_utils.print_exception_no_traceback():
3846
- raise exceptions.StorageNameError(err_str)
3847
-
3848
- if name is not None and isinstance(name, str):
3849
- if not 3 <= len(name) <= 63:
3850
- _raise_no_traceback_name_error(
3851
- f'Invalid store name: {name} must be between 3 (min) '
3852
- 'and 63 (max) characters long.')
3853
-
3854
- # Check for valid characters and start/end with a letter or number
3855
- pattern = r'^[a-z0-9][-a-z0-9.]*[a-z0-9]$'
3856
- if not re.match(pattern, name):
3857
- _raise_no_traceback_name_error(
3858
- f'Invalid store name: {name} can consist only of '
3859
- 'lowercase letters, numbers, dots (.), and dashes (-). '
3860
- 'It must begin and end with a letter or number.')
3861
-
3862
- # Check for two adjacent periods or dashes
3863
- if any(substring in name for substring in ['..', '--']):
3864
- _raise_no_traceback_name_error(
3865
- f'Invalid store name: {name} must not contain '
3866
- 'two adjacent periods/dashes')
3867
-
3868
- # Check for IP address format
3869
- ip_pattern = r'^(?:\d{1,3}\.){3}\d{1,3}$'
3870
- if re.match(ip_pattern, name):
3871
- _raise_no_traceback_name_error(
3872
- f'Invalid store name: {name} must not be formatted as '
3873
- 'an IP address (for example, 192.168.5.4).')
3874
-
3875
- if any(substring in name for substring in ['.-', '-.']):
3876
- _raise_no_traceback_name_error(
3877
- f'Invalid store name: {name} must '
3878
- 'not allow substrings: ".-", "-." .')
3879
- else:
3880
- _raise_no_traceback_name_error('Store name must be specified.')
3881
- return name
3882
-
3883
- def initialize(self):
3884
- """Initializes the cos store object on the cloud.
3885
-
3886
- Initialization involves fetching bucket if exists, or creating it if
3887
- it does not.
3972
+ self.client: Any
3973
+ self.bucket: StorageHandle
3974
+ self.oci_config_file: str
3975
+ self.config_profile: str
3976
+ self.compartment: str
3977
+ self.namespace: str
3888
3978
 
3889
- Raises:
3890
- StorageBucketCreateError: If bucket creation fails
3891
- StorageBucketGetError: If fetching existing bucket fails
3892
- StorageInitError: If general initialization fails.
3893
- """
3894
- self.client = ibm.get_cos_client(self.region)
3895
- self.s3_resource = ibm.get_cos_resource(self.region)
3896
- self.bucket, is_new_bucket = self._get_bucket()
3897
- if self.is_sky_managed is None:
3898
- # If is_sky_managed is not specified, then this is a new storage
3899
- # object (i.e., did not exist in global_user_state) and we should
3900
- # set the is_sky_managed property.
3901
- # If is_sky_managed is specified, then we take no action.
3902
- self.is_sky_managed = is_new_bucket
3903
-
3904
- def upload(self):
3905
- """Uploads files from local machine to bucket.
3906
-
3907
- Upload must be called by the Storage handler - it is not called on
3908
- Store initialization.
3909
-
3910
- Raises:
3911
- StorageUploadError: if upload fails.
3912
- """
3913
- try:
3914
- if isinstance(self.source, list):
3915
- self.batch_ibm_rsync(self.source, create_dirs=True)
3916
- elif self.source is not None:
3917
- if self.source.startswith('cos://'):
3918
- # cos bucket used as a dest, can't be used as source.
3919
- pass
3920
- elif self.source.startswith('s3://'):
3921
- raise Exception('IBM COS currently not supporting'
3922
- 'data transfers between COS and S3')
3923
- elif self.source.startswith('nebius://'):
3924
- raise Exception('IBM COS currently not supporting'
3925
- 'data transfers between COS and Nebius')
3926
- elif self.source.startswith('gs://'):
3927
- raise Exception('IBM COS currently not supporting'
3928
- 'data transfers between COS and GS')
3929
- elif self.source.startswith('r2://'):
3930
- raise Exception('IBM COS currently not supporting'
3931
- 'data transfers between COS and r2')
3932
- else:
3933
- self.batch_ibm_rsync([self.source])
3934
-
3935
- except Exception as e:
3936
- raise exceptions.StorageUploadError(
3937
- f'Upload failed for store {self.name}') from e
3938
-
3939
- def delete(self) -> None:
3940
- if self._bucket_sub_path is not None and not self.is_sky_managed:
3941
- return self._delete_sub_path()
3942
-
3943
- self._delete_cos_bucket()
3944
- logger.info(f'{colorama.Fore.GREEN}Deleted COS bucket {self.name}.'
3945
- f'{colorama.Style.RESET_ALL}')
3946
-
3947
- def _delete_sub_path(self) -> None:
3948
- assert self._bucket_sub_path is not None, 'bucket_sub_path is not set'
3949
- bucket = self.s3_resource.Bucket(self.name)
3950
- try:
3951
- self._delete_cos_bucket_objects(bucket, self._bucket_sub_path + '/')
3952
- except ibm.ibm_botocore.exceptions.ClientError as e:
3953
- if e.__class__.__name__ == 'NoSuchBucket':
3954
- logger.debug('bucket already removed')
3955
-
3956
- def get_handle(self) -> StorageHandle:
3957
- return self.s3_resource.Bucket(self.name)
3958
-
3959
- def batch_ibm_rsync(self,
3960
- source_path_list: List[Path],
3961
- create_dirs: bool = False) -> None:
3962
- """Invokes rclone copy to batch upload a list of local paths to cos
3963
-
3964
- Since rclone does not support batch operations, we construct
3965
- multiple commands to be run in parallel.
3966
-
3967
- Args:
3968
- source_path_list: List of paths to local files or directories
3969
- create_dirs: If the local_path is a directory and this is set to
3970
- False, the contents of the directory are directly uploaded to
3971
- root of the bucket. If the local_path is a directory and this is
3972
- set to True, the directory is created in the bucket root and
3973
- contents are uploaded to it.
3974
- """
3975
- sub_path = (f'/{self._bucket_sub_path}'
3976
- if self._bucket_sub_path else '')
3977
-
3978
- def get_dir_sync_command(src_dir_path, dest_dir_name) -> str:
3979
- """returns an rclone command that copies a complete folder
3980
- from 'src_dir_path' to bucket/'dest_dir_name'.
3981
-
3982
- `rclone copy` copies files from source path to target.
3983
- files with identical names at won't be copied over, unless
3984
- their modification date is more recent.
3985
- works similarly to `aws sync` (without --delete).
3986
-
3987
- Args:
3988
- src_dir_path (str): local source path from which to copy files.
3989
- dest_dir_name (str): remote target path files are copied to.
3990
-
3991
- Returns:
3992
- str: bash command using rclone to sync files. Executed remotely.
3993
- """
3994
-
3995
- # .git directory is excluded from the sync
3996
- # wrapping src_dir_path with "" to support path with spaces
3997
- src_dir_path = shlex.quote(src_dir_path)
3998
- sync_command = ('rclone copy --exclude ".git/*" '
3999
- f'{src_dir_path} '
4000
- f'{self.rclone_profile_name}:{self.name}{sub_path}'
4001
- f'/{dest_dir_name}')
4002
- return sync_command
4003
-
4004
- def get_file_sync_command(base_dir_path, file_names) -> str:
4005
- """returns an rclone command that copies files: 'file_names'
4006
- from base directory: `base_dir_path` to bucket.
4007
-
4008
- `rclone copy` copies files from source path to target.
4009
- files with identical names at won't be copied over, unless
4010
- their modification date is more recent.
4011
- works similarly to `aws sync` (without --delete).
4012
-
4013
- Args:
4014
- base_dir_path (str): local path from which to copy files.
4015
- file_names (List): specific file names to copy.
4016
-
4017
- Returns:
4018
- str: bash command using rclone to sync files
4019
- """
4020
-
4021
- # wrapping file_name with "" to support spaces
4022
- includes = ' '.join([
4023
- f'--include {shlex.quote(file_name)}'
4024
- for file_name in file_names
4025
- ])
4026
- base_dir_path = shlex.quote(base_dir_path)
4027
- sync_command = ('rclone copy '
4028
- f'{includes} {base_dir_path} '
4029
- f'{self.rclone_profile_name}:{self.name}{sub_path}')
4030
- return sync_command
4031
-
4032
- # Generate message for upload
4033
- if len(source_path_list) > 1:
4034
- source_message = f'{len(source_path_list)} paths'
4035
- else:
4036
- source_message = source_path_list[0]
4037
-
4038
- log_path = sky_logging.generate_tmp_logging_file_path(
4039
- _STORAGE_LOG_FILE_NAME)
4040
- sync_path = (
4041
- f'{source_message} -> cos://{self.region}/{self.name}{sub_path}/')
4042
- with rich_utils.safe_status(
4043
- ux_utils.spinner_message(f'Syncing {sync_path}',
4044
- log_path=log_path)):
4045
- data_utils.parallel_upload(
4046
- source_path_list,
4047
- get_file_sync_command,
4048
- get_dir_sync_command,
4049
- log_path,
4050
- self.name,
4051
- self._ACCESS_DENIED_MESSAGE,
4052
- create_dirs=create_dirs,
4053
- max_concurrent_uploads=_MAX_CONCURRENT_UPLOADS)
4054
- logger.info(
4055
- ux_utils.finishing_message(f'Storage synced: {sync_path}',
4056
- log_path))
4057
-
4058
- def _get_bucket(self) -> Tuple[StorageHandle, bool]:
4059
- """returns IBM COS bucket object if exists, otherwise creates it.
4060
-
4061
- Returns:
4062
- StorageHandle(str): bucket name
4063
- bool: indicates whether a new bucket was created.
4064
-
4065
- Raises:
4066
- StorageSpecError: If externally created bucket is attempted to be
4067
- mounted without specifying storage source.
4068
- StorageBucketCreateError: If bucket creation fails.
4069
- StorageBucketGetError: If fetching a bucket fails
4070
- StorageExternalDeletionError: If externally deleted storage is
4071
- attempted to be fetched while reconstructing the storage for
4072
- 'sky storage delete' or 'sky start'
4073
- """
4074
-
4075
- bucket_profile_name = (data_utils.Rclone.RcloneStores.IBM.value +
4076
- self.name)
4077
- try:
4078
- bucket_region = data_utils.get_ibm_cos_bucket_region(self.name)
4079
- except exceptions.StorageBucketGetError as e:
4080
- with ux_utils.print_exception_no_traceback():
4081
- command = f'rclone lsd {bucket_profile_name}: '
4082
- raise exceptions.StorageBucketGetError(
4083
- _BUCKET_FAIL_TO_CONNECT_MESSAGE.format(name=self.name) +
4084
- f' To debug, consider running `{command}`.') from e
4085
-
4086
- try:
4087
- uri_region = data_utils.split_cos_path(
4088
- self.source)[2] # type: ignore
4089
- except ValueError:
4090
- # source isn't a cos uri
4091
- uri_region = ''
4092
-
4093
- # bucket's region doesn't match specified region in URI
4094
- if bucket_region and uri_region and uri_region != bucket_region\
4095
- and self.sync_on_reconstruction:
4096
- with ux_utils.print_exception_no_traceback():
4097
- raise exceptions.StorageBucketGetError(
4098
- f'Bucket {self.name} exists in '
4099
- f'region {bucket_region}, '
4100
- f'but URI specified region {uri_region}.')
4101
-
4102
- if not bucket_region and uri_region:
4103
- # bucket doesn't exist but source is a bucket URI
4104
- with ux_utils.print_exception_no_traceback():
4105
- raise exceptions.StorageBucketGetError(
4106
- 'Attempted to use a non-existent bucket as a source: '
4107
- f'{self.name} by providing URI. Consider using '
4108
- '`rclone lsd <remote>` on relevant remotes returned '
4109
- 'via `rclone listremotes` to debug.')
4110
-
4111
- data_utils.Rclone.store_rclone_config(
4112
- self.name,
4113
- data_utils.Rclone.RcloneStores.IBM,
4114
- self.region, # type: ignore
4115
- )
4116
-
4117
- if not bucket_region and self.sync_on_reconstruction:
4118
- # bucket doesn't exist
4119
- return self._create_cos_bucket(self.name, self.region), True
4120
- elif not bucket_region and not self.sync_on_reconstruction:
4121
- # Raised when Storage object is reconstructed for sky storage
4122
- # delete or to re-mount Storages with sky start but the storage
4123
- # is already removed externally.
4124
- raise exceptions.StorageExternalDeletionError(
4125
- 'Attempted to fetch a non-existent bucket: '
4126
- f'{self.name}')
4127
- else:
4128
- # bucket exists
4129
- bucket = self.s3_resource.Bucket(self.name)
4130
- self._validate_existing_bucket()
4131
- return bucket, False
4132
-
4133
- def _download_file(self, remote_path: str, local_path: str) -> None:
4134
- """Downloads file from remote to local on s3 bucket
4135
- using the boto3 API
4136
-
4137
- Args:
4138
- remote_path: str; Remote path on S3 bucket
4139
- local_path: str; Local path on user's device
4140
- """
4141
- self.client.download_file(self.name, local_path, remote_path)
4142
-
4143
- def mount_command(self, mount_path: str) -> str:
4144
- """Returns the command to mount the bucket to the mount_path.
4145
-
4146
- Uses rclone to mount the bucket.
4147
- Source: https://github.com/rclone/rclone
4148
-
4149
- Args:
4150
- mount_path: str; Path to mount the bucket to.
4151
- """
4152
- # install rclone if not installed.
4153
- install_cmd = mounting_utils.get_rclone_install_cmd()
4154
- rclone_config = data_utils.Rclone.RcloneStores.IBM.get_config(
4155
- rclone_profile_name=self.rclone_profile_name,
4156
- region=self.region) # type: ignore
4157
- mount_cmd = (
4158
- mounting_utils.get_cos_mount_cmd(
4159
- rclone_config,
4160
- self.rclone_profile_name,
4161
- self.bucket.name,
4162
- mount_path,
4163
- self._bucket_sub_path, # type: ignore
4164
- ))
4165
- return mounting_utils.get_mounting_command(mount_path, install_cmd,
4166
- mount_cmd)
4167
-
4168
- def _create_cos_bucket(self,
4169
- bucket_name: str,
4170
- region='us-east') -> StorageHandle:
4171
- """Creates IBM COS bucket with specific name in specific region
4172
-
4173
- Args:
4174
- bucket_name: str; Name of bucket
4175
- region: str; Region name, e.g. us-east, us-south
4176
- Raises:
4177
- StorageBucketCreateError: If bucket creation fails.
4178
- """
4179
- try:
4180
- self.client.create_bucket(
4181
- Bucket=bucket_name,
4182
- CreateBucketConfiguration={
4183
- 'LocationConstraint': f'{region}-smart'
4184
- })
4185
- logger.info(f' {colorama.Style.DIM}Created IBM COS bucket '
4186
- f'{bucket_name!r} in {region} '
4187
- 'with storage class smart tier'
4188
- f'{colorama.Style.RESET_ALL}')
4189
- self.bucket = self.s3_resource.Bucket(bucket_name)
4190
-
4191
- except ibm.ibm_botocore.exceptions.ClientError as e: # type: ignore[union-attr] # pylint: disable=line-too-long
4192
- with ux_utils.print_exception_no_traceback():
4193
- raise exceptions.StorageBucketCreateError(
4194
- f'Failed to create bucket: '
4195
- f'{bucket_name}') from e
4196
-
4197
- s3_bucket_exists_waiter = self.client.get_waiter('bucket_exists')
4198
- s3_bucket_exists_waiter.wait(Bucket=bucket_name)
4199
-
4200
- return self.bucket
4201
-
4202
- def _delete_cos_bucket_objects(self,
4203
- bucket: Any,
4204
- prefix: Optional[str] = None) -> None:
4205
- bucket_versioning = self.s3_resource.BucketVersioning(bucket.name)
4206
- if bucket_versioning.status == 'Enabled':
4207
- if prefix is not None:
4208
- res = list(
4209
- bucket.object_versions.filter(Prefix=prefix).delete())
4210
- else:
4211
- res = list(bucket.object_versions.delete())
4212
- else:
4213
- if prefix is not None:
4214
- res = list(bucket.objects.filter(Prefix=prefix).delete())
4215
- else:
4216
- res = list(bucket.objects.delete())
4217
- logger.debug(f'Deleted bucket\'s content:\n{res}, prefix: {prefix}')
4218
-
4219
- def _delete_cos_bucket(self) -> None:
4220
- bucket = self.s3_resource.Bucket(self.name)
4221
- try:
4222
- self._delete_cos_bucket_objects(bucket)
4223
- bucket.delete()
4224
- bucket.wait_until_not_exists()
4225
- except ibm.ibm_botocore.exceptions.ClientError as e:
4226
- if e.__class__.__name__ == 'NoSuchBucket':
4227
- logger.debug('bucket already removed')
4228
- data_utils.Rclone.delete_rclone_bucket_profile(
4229
- self.name, data_utils.Rclone.RcloneStores.IBM)
4230
-
4231
-
4232
- class OciStore(AbstractStore):
4233
- """OciStore inherits from Storage Object and represents the backend
4234
- for OCI buckets.
4235
- """
4236
-
4237
- _ACCESS_DENIED_MESSAGE = 'AccessDeniedException'
4238
-
4239
- def __init__(self,
4240
- name: str,
4241
- source: Optional[SourceType],
4242
- region: Optional[str] = None,
4243
- is_sky_managed: Optional[bool] = None,
4244
- sync_on_reconstruction: Optional[bool] = True,
4245
- _bucket_sub_path: Optional[str] = None):
4246
- self.client: Any
4247
- self.bucket: StorageHandle
4248
- self.oci_config_file: str
4249
- self.config_profile: str
4250
- self.compartment: str
4251
- self.namespace: str
4252
-
4253
- # Region is from the specified name in <bucket>@<region> format.
4254
- # Another case is name can also be set by the source, for example:
4255
- # /datasets-storage:
4256
- # source: oci://RAGData@us-sanjose-1
4257
- # The name in above mount will be set to RAGData@us-sanjose-1
4258
- region_in_name = None
4259
- if name is not None and '@' in name:
4260
- self._validate_bucket_expr(name)
4261
- name, region_in_name = name.split('@')
4262
-
4263
- # Region is from the specified source in oci://<bucket>@<region> format
4264
- region_in_source = None
4265
- if isinstance(source,
4266
- str) and source.startswith('oci://') and '@' in source:
4267
- self._validate_bucket_expr(source)
4268
- source, region_in_source = source.split('@')
4269
-
4270
- if region_in_name is not None and region_in_source is not None:
4271
- # This should never happen because name and source will never be
4272
- # the remote bucket at the same time.
4273
- assert region_in_name == region_in_source, (
4274
- f'Mismatch region specified. Region in name {region_in_name}, '
4275
- f'but region in source is {region_in_source}')
4276
-
4277
- if region_in_name is not None:
4278
- region = region_in_name
4279
- elif region_in_source is not None:
4280
- region = region_in_source
4281
-
4282
- # Default region set to what specified in oci config.
4283
- if region is None:
4284
- region = oci.get_oci_config()['region']
4285
-
4286
- # So far from now on, the name and source are canonical, means there
4287
- # is no region (@<region> suffix) associated with them anymore.
4288
-
4289
- super().__init__(name, source, region, is_sky_managed,
4290
- sync_on_reconstruction, _bucket_sub_path)
4291
- # TODO(zpoint): add _bucket_sub_path to the sync/mount/delete commands
4292
-
4293
- def _validate_bucket_expr(self, bucket_expr: str):
4294
- pattern = r'^(\w+://)?[A-Za-z0-9-._]+(@\w{2}-\w+-\d{1})$'
4295
- if not re.match(pattern, bucket_expr):
4296
- raise ValueError(
4297
- 'The format for the bucket portion is <bucket>@<region> '
4298
- 'when specify a region with a bucket.')
4299
-
4300
- def _validate(self):
4301
- if self.source is not None and isinstance(self.source, str):
4302
- if self.source.startswith('oci://'):
4303
- assert self.name == data_utils.split_oci_path(self.source)[0], (
4304
- 'OCI Bucket is specified as path, the name should be '
4305
- 'the same as OCI bucket.')
4306
- elif not re.search(r'^\w+://', self.source):
4307
- # Treat it as local path.
4308
- pass
4309
- else:
4310
- raise NotImplementedError(
4311
- f'Moving data from {self.source} to OCI is not supported.')
4312
-
4313
- # Validate name
4314
- self.name = self.validate_name(self.name)
4315
- # Check if the storage is enabled
4316
- if not _is_storage_cloud_enabled(str(clouds.OCI())):
4317
- with ux_utils.print_exception_no_traceback():
4318
- raise exceptions.ResourcesUnavailableError(
4319
- 'Storage \'store: oci\' specified, but ' \
4320
- 'OCI access is disabled. To fix, enable '\
4321
- 'OCI by running `sky check`. '\
4322
- 'More info: https://skypilot.readthedocs.io/en/latest/getting-started/installation.html.' # pylint: disable=line-too-long
4323
- )
4324
-
4325
- @classmethod
4326
- def validate_name(cls, name) -> str:
4327
- """Validates the name of the OCI store.
4328
-
4329
- Source for rules: https://docs.oracle.com/en-us/iaas/Content/Object/Tasks/managingbuckets.htm#Managing_Buckets # pylint: disable=line-too-long
4330
- """
4331
-
4332
- def _raise_no_traceback_name_error(err_str):
4333
- with ux_utils.print_exception_no_traceback():
4334
- raise exceptions.StorageNameError(err_str)
4335
-
4336
- if name is not None and isinstance(name, str):
4337
- # Check for overall length
4338
- if not 1 <= len(name) <= 256:
4339
- _raise_no_traceback_name_error(
4340
- f'Invalid store name: name {name} must contain 1-256 '
4341
- 'characters.')
4342
-
4343
- # Check for valid characters and start/end with a number or letter
4344
- pattern = r'^[A-Za-z0-9-._]+$'
4345
- if not re.match(pattern, name):
4346
- _raise_no_traceback_name_error(
4347
- f'Invalid store name: name {name} can only contain '
4348
- 'upper or lower case letters, numeric characters, hyphens '
4349
- '(-), underscores (_), and dots (.). Spaces are not '
4350
- 'allowed. Names must start and end with a number or '
4351
- 'letter.')
4352
- else:
4353
- _raise_no_traceback_name_error('Store name must be specified.')
4354
- return name
4355
-
4356
- def initialize(self):
4357
- """Initializes the OCI store object on the cloud.
4358
-
4359
- Initialization involves fetching bucket if exists, or creating it if
4360
- it does not.
4361
-
4362
- Raises:
4363
- StorageBucketCreateError: If bucket creation fails
4364
- StorageBucketGetError: If fetching existing bucket fails
4365
- StorageInitError: If general initialization fails.
4366
- """
4367
- # pylint: disable=import-outside-toplevel
4368
- from sky.clouds.utils import oci_utils
4369
- from sky.provision.oci.query_utils import query_helper
4370
-
4371
- self.oci_config_file = oci.get_config_file()
4372
- self.config_profile = oci_utils.oci_config.get_profile()
4373
-
4374
- ## pylint: disable=line-too-long
4375
- # What's compartment? See thttps://docs.oracle.com/en/cloud/foundation/cloud_architecture/governance/compartments.html
4376
- self.compartment = query_helper.find_compartment(self.region)
4377
- self.client = oci.get_object_storage_client(region=self.region,
4378
- profile=self.config_profile)
4379
- self.namespace = self.client.get_namespace(
4380
- compartment_id=oci.get_oci_config()['tenancy']).data
4381
-
4382
- self.bucket, is_new_bucket = self._get_bucket()
4383
- if self.is_sky_managed is None:
4384
- # If is_sky_managed is not specified, then this is a new storage
4385
- # object (i.e., did not exist in global_user_state) and we should
4386
- # set the is_sky_managed property.
4387
- # If is_sky_managed is specified, then we take no action.
4388
- self.is_sky_managed = is_new_bucket
4389
-
4390
- def upload(self):
4391
- """Uploads source to store bucket.
4392
-
4393
- Upload must be called by the Storage handler - it is not called on
4394
- Store initialization.
4395
-
4396
- Raises:
4397
- StorageUploadError: if upload fails.
4398
- """
4399
- try:
4400
- if isinstance(self.source, list):
4401
- self.batch_oci_rsync(self.source, create_dirs=True)
4402
- elif self.source is not None:
4403
- if self.source.startswith('oci://'):
4404
- pass
4405
- else:
4406
- self.batch_oci_rsync([self.source])
4407
- except exceptions.StorageUploadError:
4408
- raise
4409
- except Exception as e:
4410
- raise exceptions.StorageUploadError(
4411
- f'Upload failed for store {self.name}') from e
4412
-
4413
- def delete(self) -> None:
4414
- deleted_by_skypilot = self._delete_oci_bucket(self.name)
4415
- if deleted_by_skypilot:
4416
- msg_str = f'Deleted OCI bucket {self.name}.'
4417
- else:
4418
- msg_str = (f'OCI bucket {self.name} may have been deleted '
4419
- f'externally. Removing from local state.')
4420
- logger.info(f'{colorama.Fore.GREEN}{msg_str}'
4421
- f'{colorama.Style.RESET_ALL}')
4422
-
4423
- def get_handle(self) -> StorageHandle:
4424
- return self.client.get_bucket(namespace_name=self.namespace,
4425
- bucket_name=self.name).data
4426
-
4427
- def batch_oci_rsync(self,
4428
- source_path_list: List[Path],
4429
- create_dirs: bool = False) -> None:
4430
- """Invokes oci sync to batch upload a list of local paths to Bucket
4431
-
4432
- Use OCI bulk operation to batch process the file upload
4433
-
4434
- Args:
4435
- source_path_list: List of paths to local files or directories
4436
- create_dirs: If the local_path is a directory and this is set to
4437
- False, the contents of the directory are directly uploaded to
4438
- root of the bucket. If the local_path is a directory and this is
4439
- set to True, the directory is created in the bucket root and
4440
- contents are uploaded to it.
4441
- """
4442
- sub_path = (f'{self._bucket_sub_path}/'
4443
- if self._bucket_sub_path else '')
4444
-
4445
- @oci.with_oci_env
4446
- def get_file_sync_command(base_dir_path, file_names):
4447
- includes = ' '.join(
4448
- [f'--include "{file_name}"' for file_name in file_names])
4449
- prefix_arg = ''
4450
- if sub_path:
4451
- prefix_arg = f'--object-prefix "{sub_path.strip("/")}"'
4452
- sync_command = (
4453
- 'oci os object bulk-upload --no-follow-symlinks --overwrite '
4454
- f'--bucket-name {self.name} --namespace-name {self.namespace} '
4455
- f'--region {self.region} --src-dir "{base_dir_path}" '
4456
- f'{prefix_arg} '
4457
- f'{includes}')
4458
-
4459
- return sync_command
4460
-
4461
- @oci.with_oci_env
4462
- def get_dir_sync_command(src_dir_path, dest_dir_name):
4463
- if dest_dir_name and not str(dest_dir_name).endswith('/'):
4464
- dest_dir_name = f'{dest_dir_name}/'
4465
-
4466
- excluded_list = storage_utils.get_excluded_files(src_dir_path)
4467
- excluded_list.append('.git/*')
4468
- excludes = ' '.join([
4469
- f'--exclude {shlex.quote(file_name)}'
4470
- for file_name in excluded_list
4471
- ])
4472
-
4473
- # we exclude .git directory from the sync
4474
- sync_command = (
4475
- 'oci os object bulk-upload --no-follow-symlinks --overwrite '
4476
- f'--bucket-name {self.name} --namespace-name {self.namespace} '
4477
- f'--region {self.region} '
4478
- f'--object-prefix "{sub_path}{dest_dir_name}" '
4479
- f'--src-dir "{src_dir_path}" {excludes}')
4480
-
4481
- return sync_command
4482
-
4483
- # Generate message for upload
4484
- if len(source_path_list) > 1:
4485
- source_message = f'{len(source_path_list)} paths'
4486
- else:
4487
- source_message = source_path_list[0]
4488
-
4489
- log_path = sky_logging.generate_tmp_logging_file_path(
4490
- _STORAGE_LOG_FILE_NAME)
4491
- sync_path = f'{source_message} -> oci://{self.name}/{sub_path}'
4492
- with rich_utils.safe_status(
4493
- ux_utils.spinner_message(f'Syncing {sync_path}',
4494
- log_path=log_path)):
4495
- data_utils.parallel_upload(
4496
- source_path_list=source_path_list,
4497
- filesync_command_generator=get_file_sync_command,
4498
- dirsync_command_generator=get_dir_sync_command,
4499
- log_path=log_path,
4500
- bucket_name=self.name,
4501
- access_denied_message=self._ACCESS_DENIED_MESSAGE,
4502
- create_dirs=create_dirs,
4503
- max_concurrent_uploads=1)
4504
-
4505
- logger.info(
4506
- ux_utils.finishing_message(f'Storage synced: {sync_path}',
4507
- log_path))
4508
-
4509
- def _get_bucket(self) -> Tuple[StorageHandle, bool]:
4510
- """Obtains the OCI bucket.
4511
- If the bucket exists, this method will connect to the bucket.
4512
-
4513
- If the bucket does not exist, there are three cases:
4514
- 1) Raise an error if the bucket source starts with oci://
4515
- 2) Return None if bucket has been externally deleted and
4516
- sync_on_reconstruction is False
4517
- 3) Create and return a new bucket otherwise
4518
-
4519
- Return tuple (Bucket, Boolean): The first item is the bucket
4520
- json payload from the OCI API call, the second item indicates
4521
- if this is a new created bucket(True) or an existing bucket(False).
4522
-
4523
- Raises:
4524
- StorageBucketCreateError: If creating the bucket fails
4525
- StorageBucketGetError: If fetching a bucket fails
4526
- """
4527
- try:
4528
- get_bucket_response = self.client.get_bucket(
4529
- namespace_name=self.namespace, bucket_name=self.name)
4530
- bucket = get_bucket_response.data
4531
- return bucket, False
4532
- except oci.service_exception() as e:
4533
- if e.status == 404: # Not Found
4534
- if isinstance(self.source,
4535
- str) and self.source.startswith('oci://'):
4536
- with ux_utils.print_exception_no_traceback():
4537
- raise exceptions.StorageBucketGetError(
4538
- 'Attempted to connect to a non-existent bucket: '
4539
- f'{self.source}') from e
4540
- else:
4541
- # If bucket cannot be found (i.e., does not exist), it is
4542
- # to be created by Sky. However, creation is skipped if
4543
- # Store object is being reconstructed for deletion.
4544
- if self.sync_on_reconstruction:
4545
- bucket = self._create_oci_bucket(self.name)
4546
- return bucket, True
4547
- else:
4548
- return None, False
4549
- elif e.status == 401: # Unauthorized
4550
- # AccessDenied error for buckets that are private and not
4551
- # owned by user.
4552
- command = (
4553
- f'oci os object list --namespace-name {self.namespace} '
4554
- f'--bucket-name {self.name}')
4555
- with ux_utils.print_exception_no_traceback():
4556
- raise exceptions.StorageBucketGetError(
4557
- _BUCKET_FAIL_TO_CONNECT_MESSAGE.format(name=self.name) +
4558
- f' To debug, consider running `{command}`.') from e
4559
- else:
4560
- # Unknown / unexpected error happened. This might happen when
4561
- # Object storage service itself functions not normal (e.g.
4562
- # maintainance event causes internal server error or request
4563
- # timeout, etc).
4564
- with ux_utils.print_exception_no_traceback():
4565
- raise exceptions.StorageBucketGetError(
4566
- f'Failed to connect to OCI bucket {self.name}') from e
4567
-
4568
- def mount_command(self, mount_path: str) -> str:
4569
- """Returns the command to mount the bucket to the mount_path.
4570
-
4571
- Uses Rclone to mount the bucket.
4572
-
4573
- Args:
4574
- mount_path: str; Path to mount the bucket to.
4575
- """
4576
- install_cmd = mounting_utils.get_rclone_install_cmd()
4577
- mount_cmd = mounting_utils.get_oci_mount_cmd(
4578
- mount_path=mount_path,
4579
- store_name=self.name,
4580
- region=str(self.region),
4581
- namespace=self.namespace,
4582
- compartment=self.bucket.compartment_id,
4583
- config_file=self.oci_config_file,
4584
- config_profile=self.config_profile)
4585
- version_check_cmd = mounting_utils.get_rclone_version_check_cmd()
4586
-
4587
- return mounting_utils.get_mounting_command(mount_path, install_cmd,
4588
- mount_cmd, version_check_cmd)
4589
-
4590
- def _download_file(self, remote_path: str, local_path: str) -> None:
4591
- """Downloads file from remote to local on OCI bucket
4592
-
4593
- Args:
4594
- remote_path: str; Remote path on OCI bucket
4595
- local_path: str; Local path on user's device
4596
- """
4597
- if remote_path.startswith(f'/{self.name}'):
4598
- # If the remote path is /bucket_name, we need to
4599
- # remove the leading /
4600
- remote_path = remote_path.lstrip('/')
4601
-
4602
- filename = os.path.basename(remote_path)
4603
- if not local_path.endswith(filename):
4604
- local_path = os.path.join(local_path, filename)
4605
-
4606
- @oci.with_oci_env
4607
- def get_file_download_command(remote_path, local_path):
4608
- download_command = (f'oci os object get --bucket-name {self.name} '
4609
- f'--namespace-name {self.namespace} '
4610
- f'--region {self.region} --name {remote_path} '
4611
- f'--file {local_path}')
4612
-
4613
- return download_command
4614
-
4615
- download_command = get_file_download_command(remote_path, local_path)
4616
-
4617
- try:
4618
- with rich_utils.safe_status(
4619
- f'[bold cyan]Downloading: {remote_path} -> {local_path}[/]'
4620
- ):
4621
- subprocess.check_output(download_command,
4622
- stderr=subprocess.STDOUT,
4623
- shell=True)
4624
- except subprocess.CalledProcessError as e:
4625
- logger.error(f'Download failed: {remote_path} -> {local_path}.\n'
4626
- f'Detail errors: {e.output}')
4627
- with ux_utils.print_exception_no_traceback():
4628
- raise exceptions.StorageBucketDeleteError(
4629
- f'Failed download file {self.name}:{remote_path}.') from e
4630
-
4631
- def _create_oci_bucket(self, bucket_name: str) -> StorageHandle:
4632
- """Creates OCI bucket with specific name in specific region
4633
-
4634
- Args:
4635
- bucket_name: str; Name of bucket
4636
- region: str; Region name, e.g. us-central1, us-west1
4637
- """
4638
- logger.debug(f'_create_oci_bucket: {bucket_name}')
4639
- try:
4640
- create_bucket_response = self.client.create_bucket(
4641
- namespace_name=self.namespace,
4642
- create_bucket_details=oci.oci.object_storage.models.
4643
- CreateBucketDetails(
4644
- name=bucket_name,
4645
- compartment_id=self.compartment,
4646
- ))
4647
- bucket = create_bucket_response.data
4648
- return bucket
4649
- except oci.service_exception() as e:
4650
- with ux_utils.print_exception_no_traceback():
4651
- raise exceptions.StorageBucketCreateError(
4652
- f'Failed to create OCI bucket: {self.name}') from e
4653
-
4654
- def _delete_oci_bucket(self, bucket_name: str) -> bool:
4655
- """Deletes OCI bucket, including all objects in bucket
4656
-
4657
- Args:
4658
- bucket_name: str; Name of bucket
4659
-
4660
- Returns:
4661
- bool; True if bucket was deleted, False if it was deleted externally.
4662
- """
4663
- logger.debug(f'_delete_oci_bucket: {bucket_name}')
4664
-
4665
- @oci.with_oci_env
4666
- def get_bucket_delete_command(bucket_name):
4667
- remove_command = (f'oci os bucket delete --bucket-name '
4668
- f'--region {self.region} '
4669
- f'{bucket_name} --empty --force')
4670
-
4671
- return remove_command
4672
-
4673
- remove_command = get_bucket_delete_command(bucket_name)
4674
-
4675
- try:
4676
- with rich_utils.safe_status(
4677
- f'[bold cyan]Deleting OCI bucket {bucket_name}[/]'):
4678
- subprocess.check_output(remove_command.split(' '),
4679
- stderr=subprocess.STDOUT)
4680
- except subprocess.CalledProcessError as e:
4681
- if 'BucketNotFound' in e.output.decode('utf-8'):
4682
- logger.debug(
4683
- _BUCKET_EXTERNALLY_DELETED_DEBUG_MESSAGE.format(
4684
- bucket_name=bucket_name))
4685
- return False
4686
- else:
4687
- logger.error(e.output)
4688
- with ux_utils.print_exception_no_traceback():
4689
- raise exceptions.StorageBucketDeleteError(
4690
- f'Failed to delete OCI bucket {bucket_name}.')
4691
- return True
3979
+ # Region is from the specified name in <bucket>@<region> format.
3980
+ # Another case is name can also be set by the source, for example:
3981
+ # /datasets-storage:
3982
+ # source: oci://RAGData@us-sanjose-1
3983
+ # The name in above mount will be set to RAGData@us-sanjose-1
3984
+ region_in_name = None
3985
+ if name is not None and '@' in name:
3986
+ self._validate_bucket_expr(name)
3987
+ name, region_in_name = name.split('@')
4692
3988
 
3989
+ # Region is from the specified source in oci://<bucket>@<region> format
3990
+ region_in_source = None
3991
+ if isinstance(source,
3992
+ str) and source.startswith('oci://') and '@' in source:
3993
+ self._validate_bucket_expr(source)
3994
+ source, region_in_source = source.split('@')
4693
3995
 
4694
- class NebiusStore(AbstractStore):
4695
- """NebiusStore inherits from Storage Object and represents the backend
4696
- for S3 buckets.
4697
- """
3996
+ if region_in_name is not None and region_in_source is not None:
3997
+ # This should never happen because name and source will never be
3998
+ # the remote bucket at the same time.
3999
+ assert region_in_name == region_in_source, (
4000
+ f'Mismatch region specified. Region in name {region_in_name}, '
4001
+ f'but region in source is {region_in_source}')
4698
4002
 
4699
- _ACCESS_DENIED_MESSAGE = 'Access Denied'
4700
- _TIMEOUT_TO_PROPAGATES = 20
4003
+ if region_in_name is not None:
4004
+ region = region_in_name
4005
+ elif region_in_source is not None:
4006
+ region = region_in_source
4007
+
4008
+ # Default region set to what specified in oci config.
4009
+ if region is None:
4010
+ region = oci.get_oci_config()['region']
4011
+
4012
+ # So far from now on, the name and source are canonical, means there
4013
+ # is no region (@<region> suffix) associated with them anymore.
4701
4014
 
4702
- def __init__(self,
4703
- name: str,
4704
- source: str,
4705
- region: Optional[str] = None,
4706
- is_sky_managed: Optional[bool] = None,
4707
- sync_on_reconstruction: bool = True,
4708
- _bucket_sub_path: Optional[str] = None):
4709
- self.client: 'mypy_boto3_s3.Client'
4710
- self.bucket: 'StorageHandle'
4711
4015
  super().__init__(name, source, region, is_sky_managed,
4712
4016
  sync_on_reconstruction, _bucket_sub_path)
4017
+ # TODO(zpoint): add _bucket_sub_path to the sync/mount/delete commands
4018
+
4019
+ def _validate_bucket_expr(self, bucket_expr: str):
4020
+ pattern = r'^(\w+://)?[A-Za-z0-9-._]+(@\w{2}-\w+-\d{1})$'
4021
+ if not re.match(pattern, bucket_expr):
4022
+ raise ValueError(
4023
+ 'The format for the bucket portion is <bucket>@<region> '
4024
+ 'when specify a region with a bucket.')
4713
4025
 
4714
4026
  def _validate(self):
4715
4027
  if self.source is not None and isinstance(self.source, str):
4716
- if self.source.startswith('s3://'):
4717
- assert self.name == data_utils.split_s3_path(self.source)[0], (
4718
- 'S3 Bucket is specified as path, the name should be the'
4719
- ' same as S3 bucket.')
4720
- elif self.source.startswith('gs://'):
4721
- assert self.name == data_utils.split_gcs_path(self.source)[0], (
4722
- 'GCS Bucket is specified as path, the name should be '
4723
- 'the same as GCS bucket.')
4724
- assert data_utils.verify_gcs_bucket(self.name), (
4725
- f'Source specified as {self.source}, a GCS bucket. ',
4726
- 'GCS Bucket should exist.')
4727
- elif data_utils.is_az_container_endpoint(self.source):
4728
- storage_account_name, container_name, _ = (
4729
- data_utils.split_az_path(self.source))
4730
- assert self.name == container_name, (
4731
- 'Azure bucket is specified as path, the name should be '
4732
- 'the same as Azure bucket.')
4733
- assert data_utils.verify_az_bucket(
4734
- storage_account_name, self.name), (
4735
- f'Source specified as {self.source}, an Azure bucket. '
4736
- 'Azure bucket should exist.')
4737
- elif self.source.startswith('r2://'):
4738
- assert self.name == data_utils.split_r2_path(self.source)[0], (
4739
- 'R2 Bucket is specified as path, the name should be '
4740
- 'the same as R2 bucket.')
4741
- assert data_utils.verify_r2_bucket(self.name), (
4742
- f'Source specified as {self.source}, a R2 bucket. ',
4743
- 'R2 Bucket should exist.')
4744
- elif self.source.startswith('nebius://'):
4745
- assert self.name == data_utils.split_nebius_path(
4746
- self.source)[0], (
4747
- 'Nebius Object Storage is specified as path, the name '
4748
- 'should be the same as Nebius Object Storage bucket.')
4749
- elif self.source.startswith('cos://'):
4750
- assert self.name == data_utils.split_cos_path(self.source)[0], (
4751
- 'COS Bucket is specified as path, the name should be '
4752
- 'the same as COS bucket.')
4753
- assert data_utils.verify_ibm_cos_bucket(self.name), (
4754
- f'Source specified as {self.source}, a COS bucket. ',
4755
- 'COS Bucket should exist.')
4756
- elif self.source.startswith('oci://'):
4028
+ if self.source.startswith('oci://'):
4029
+ assert self.name == data_utils.split_oci_path(self.source)[0], (
4030
+ 'OCI Bucket is specified as path, the name should be '
4031
+ 'the same as OCI bucket.')
4032
+ elif not re.search(r'^\w+://', self.source):
4033
+ # Treat it as local path.
4034
+ pass
4035
+ else:
4757
4036
  raise NotImplementedError(
4758
- 'Moving data from OCI to S3 is currently not supported.')
4759
- # Validate name
4760
- self.name = S3Store.validate_name(self.name)
4037
+ f'Moving data from {self.source} to OCI is not supported.')
4761
4038
 
4039
+ # Validate name
4040
+ self.name = self.validate_name(self.name)
4762
4041
  # Check if the storage is enabled
4763
- if not _is_storage_cloud_enabled(str(clouds.Nebius())):
4042
+ if not _is_storage_cloud_enabled(str(clouds.OCI())):
4764
4043
  with ux_utils.print_exception_no_traceback():
4765
- raise exceptions.ResourcesUnavailableError((
4766
- 'Storage \'store: nebius\' specified, but '
4767
- 'Nebius access is disabled. To fix, enable '
4768
- 'Nebius by running `sky check`. More info: '
4769
- 'https://docs.skypilot.co/en/latest/getting-started/installation.html.' # pylint: disable=line-too-long
4770
- ))
4044
+ raise exceptions.ResourcesUnavailableError(
4045
+ 'Storage \'store: oci\' specified, but ' \
4046
+ 'OCI access is disabled. To fix, enable '\
4047
+ 'OCI by running `sky check`. '\
4048
+ 'More info: https://skypilot.readthedocs.io/en/latest/getting-started/installation.html.' # pylint: disable=line-too-long
4049
+ )
4050
+
4051
+ @classmethod
4052
+ def validate_name(cls, name) -> str:
4053
+ """Validates the name of the OCI store.
4054
+
4055
+ Source for rules: https://docs.oracle.com/en-us/iaas/Content/Object/Tasks/managingbuckets.htm#Managing_Buckets # pylint: disable=line-too-long
4056
+ """
4057
+
4058
+ def _raise_no_traceback_name_error(err_str):
4059
+ with ux_utils.print_exception_no_traceback():
4060
+ raise exceptions.StorageNameError(err_str)
4061
+
4062
+ if name is not None and isinstance(name, str):
4063
+ # Check for overall length
4064
+ if not 1 <= len(name) <= 256:
4065
+ _raise_no_traceback_name_error(
4066
+ f'Invalid store name: name {name} must contain 1-256 '
4067
+ 'characters.')
4068
+
4069
+ # Check for valid characters and start/end with a number or letter
4070
+ pattern = r'^[A-Za-z0-9-._]+$'
4071
+ if not re.match(pattern, name):
4072
+ _raise_no_traceback_name_error(
4073
+ f'Invalid store name: name {name} can only contain '
4074
+ 'upper or lower case letters, numeric characters, hyphens '
4075
+ '(-), underscores (_), and dots (.). Spaces are not '
4076
+ 'allowed. Names must start and end with a number or '
4077
+ 'letter.')
4078
+ else:
4079
+ _raise_no_traceback_name_error('Store name must be specified.')
4080
+ return name
4771
4081
 
4772
4082
  def initialize(self):
4773
- """Initializes the Nebius Object Storage on the cloud.
4083
+ """Initializes the OCI store object on the cloud.
4774
4084
 
4775
4085
  Initialization involves fetching bucket if exists, or creating it if
4776
4086
  it does not.
@@ -4780,7 +4090,21 @@ class NebiusStore(AbstractStore):
4780
4090
  StorageBucketGetError: If fetching existing bucket fails
4781
4091
  StorageInitError: If general initialization fails.
4782
4092
  """
4783
- self.client = data_utils.create_nebius_client()
4093
+ # pylint: disable=import-outside-toplevel
4094
+ from sky.clouds.utils import oci_utils
4095
+ from sky.provision.oci.query_utils import query_helper
4096
+
4097
+ self.oci_config_file = oci.get_config_file()
4098
+ self.config_profile = oci_utils.oci_config.get_profile()
4099
+
4100
+ ## pylint: disable=line-too-long
4101
+ # What's compartment? See thttps://docs.oracle.com/en/cloud/foundation/cloud_architecture/governance/compartments.html
4102
+ self.compartment = query_helper.find_compartment(self.region)
4103
+ self.client = oci.get_object_storage_client(region=self.region,
4104
+ profile=self.config_profile)
4105
+ self.namespace = self.client.get_namespace(
4106
+ compartment_id=oci.get_oci_config()['tenancy']).data
4107
+
4784
4108
  self.bucket, is_new_bucket = self._get_bucket()
4785
4109
  if self.is_sky_managed is None:
4786
4110
  # If is_sky_managed is not specified, then this is a new storage
@@ -4800,20 +4124,12 @@ class NebiusStore(AbstractStore):
4800
4124
  """
4801
4125
  try:
4802
4126
  if isinstance(self.source, list):
4803
- self.batch_aws_rsync(self.source, create_dirs=True)
4127
+ self.batch_oci_rsync(self.source, create_dirs=True)
4804
4128
  elif self.source is not None:
4805
- if self.source.startswith('nebius://'):
4129
+ if self.source.startswith('oci://'):
4806
4130
  pass
4807
- elif self.source.startswith('s3://'):
4808
- self._transfer_to_nebius()
4809
- elif self.source.startswith('gs://'):
4810
- self._transfer_to_nebius()
4811
- elif self.source.startswith('r2://'):
4812
- self._transfer_to_nebius()
4813
- elif self.source.startswith('oci://'):
4814
- self._transfer_to_nebius()
4815
4131
  else:
4816
- self.batch_aws_rsync([self.source])
4132
+ self.batch_oci_rsync([self.source])
4817
4133
  except exceptions.StorageUploadError:
4818
4134
  raise
4819
4135
  except Exception as e:
@@ -4821,45 +4137,25 @@ class NebiusStore(AbstractStore):
4821
4137
  f'Upload failed for store {self.name}') from e
4822
4138
 
4823
4139
  def delete(self) -> None:
4824
- if self._bucket_sub_path is not None and not self.is_sky_managed:
4825
- return self._delete_sub_path()
4826
-
4827
- deleted_by_skypilot = self._delete_nebius_bucket(self.name)
4140
+ deleted_by_skypilot = self._delete_oci_bucket(self.name)
4828
4141
  if deleted_by_skypilot:
4829
- msg_str = f'Deleted Nebius bucket {self.name}.'
4142
+ msg_str = f'Deleted OCI bucket {self.name}.'
4830
4143
  else:
4831
- msg_str = (f'Nebius bucket {self.name} may have been deleted '
4144
+ msg_str = (f'OCI bucket {self.name} may have been deleted '
4832
4145
  f'externally. Removing from local state.')
4833
4146
  logger.info(f'{colorama.Fore.GREEN}{msg_str}'
4834
4147
  f'{colorama.Style.RESET_ALL}')
4835
4148
 
4836
- def _delete_sub_path(self) -> None:
4837
- assert self._bucket_sub_path is not None, 'bucket_sub_path is not set'
4838
- deleted_by_skypilot = self._delete_nebius_bucket_sub_path(
4839
- self.name, self._bucket_sub_path)
4840
- if deleted_by_skypilot:
4841
- msg_str = (f'Removed objects from S3 bucket '
4842
- f'{self.name}/{self._bucket_sub_path}.')
4843
- else:
4844
- msg_str = (f'Failed to remove objects from S3 bucket '
4845
- f'{self.name}/{self._bucket_sub_path}.')
4846
- logger.info(f'{colorama.Fore.GREEN}{msg_str}'
4847
- f'{colorama.Style.RESET_ALL}')
4848
-
4849
4149
  def get_handle(self) -> StorageHandle:
4850
- return nebius.resource('s3').Bucket(self.name)
4150
+ return self.client.get_bucket(namespace_name=self.namespace,
4151
+ bucket_name=self.name).data
4851
4152
 
4852
- def batch_aws_rsync(self,
4153
+ def batch_oci_rsync(self,
4853
4154
  source_path_list: List[Path],
4854
4155
  create_dirs: bool = False) -> None:
4855
- """Invokes aws s3 sync to batch upload a list of local paths to S3
4856
-
4857
- AWS Sync by default uses 10 threads to upload files to the bucket. To
4858
- increase parallelism, modify max_concurrent_requests in your aws config
4859
- file (Default path: ~/.aws/config).
4156
+ """Invokes oci sync to batch upload a list of local paths to Bucket
4860
4157
 
4861
- Since aws s3 sync does not support batch operations, we construct
4862
- multiple commands to be run in parallel.
4158
+ Use OCI bulk operation to batch process the file upload
4863
4159
 
4864
4160
  Args:
4865
4161
  source_path_list: List of paths to local files or directories
@@ -4869,34 +4165,45 @@ class NebiusStore(AbstractStore):
4869
4165
  set to True, the directory is created in the bucket root and
4870
4166
  contents are uploaded to it.
4871
4167
  """
4872
- sub_path = (f'/{self._bucket_sub_path}'
4168
+ sub_path = (f'{self._bucket_sub_path}/'
4873
4169
  if self._bucket_sub_path else '')
4874
4170
 
4171
+ @oci.with_oci_env
4875
4172
  def get_file_sync_command(base_dir_path, file_names):
4876
- includes = ' '.join([
4877
- f'--include {shlex.quote(file_name)}'
4878
- for file_name in file_names
4879
- ])
4880
- base_dir_path = shlex.quote(base_dir_path)
4881
- sync_command = ('aws s3 sync --no-follow-symlinks --exclude="*" '
4882
- f'{includes} {base_dir_path} '
4883
- f's3://{self.name}{sub_path} '
4884
- f'--profile={nebius.NEBIUS_PROFILE_NAME}')
4173
+ includes = ' '.join(
4174
+ [f'--include "{file_name}"' for file_name in file_names])
4175
+ prefix_arg = ''
4176
+ if sub_path:
4177
+ prefix_arg = f'--object-prefix "{sub_path.strip("/")}"'
4178
+ sync_command = (
4179
+ 'oci os object bulk-upload --no-follow-symlinks --overwrite '
4180
+ f'--bucket-name {self.name} --namespace-name {self.namespace} '
4181
+ f'--region {self.region} --src-dir "{base_dir_path}" '
4182
+ f'{prefix_arg} '
4183
+ f'{includes}')
4184
+
4885
4185
  return sync_command
4886
4186
 
4187
+ @oci.with_oci_env
4887
4188
  def get_dir_sync_command(src_dir_path, dest_dir_name):
4888
- # we exclude .git directory from the sync
4189
+ if dest_dir_name and not str(dest_dir_name).endswith('/'):
4190
+ dest_dir_name = f'{dest_dir_name}/'
4191
+
4889
4192
  excluded_list = storage_utils.get_excluded_files(src_dir_path)
4890
4193
  excluded_list.append('.git/*')
4891
4194
  excludes = ' '.join([
4892
4195
  f'--exclude {shlex.quote(file_name)}'
4893
4196
  for file_name in excluded_list
4894
4197
  ])
4895
- src_dir_path = shlex.quote(src_dir_path)
4896
- sync_command = (f'aws s3 sync --no-follow-symlinks {excludes} '
4897
- f'{src_dir_path} '
4898
- f's3://{self.name}{sub_path}/{dest_dir_name} '
4899
- f'--profile={nebius.NEBIUS_PROFILE_NAME}')
4198
+
4199
+ # we exclude .git directory from the sync
4200
+ sync_command = (
4201
+ 'oci os object bulk-upload --no-follow-symlinks --overwrite '
4202
+ f'--bucket-name {self.name} --namespace-name {self.namespace} '
4203
+ f'--region {self.region} '
4204
+ f'--object-prefix "{sub_path}{dest_dir_name}" '
4205
+ f'--src-dir "{src_dir_path}" {excludes}')
4206
+
4900
4207
  return sync_command
4901
4208
 
4902
4209
  # Generate message for upload
@@ -4907,210 +4214,347 @@ class NebiusStore(AbstractStore):
4907
4214
 
4908
4215
  log_path = sky_logging.generate_tmp_logging_file_path(
4909
4216
  _STORAGE_LOG_FILE_NAME)
4910
- sync_path = f'{source_message} -> nebius://{self.name}{sub_path}/'
4217
+ sync_path = f'{source_message} -> oci://{self.name}/{sub_path}'
4911
4218
  with rich_utils.safe_status(
4912
4219
  ux_utils.spinner_message(f'Syncing {sync_path}',
4913
4220
  log_path=log_path)):
4914
4221
  data_utils.parallel_upload(
4915
- source_path_list,
4916
- get_file_sync_command,
4917
- get_dir_sync_command,
4918
- log_path,
4919
- self.name,
4920
- self._ACCESS_DENIED_MESSAGE,
4222
+ source_path_list=source_path_list,
4223
+ filesync_command_generator=get_file_sync_command,
4224
+ dirsync_command_generator=get_dir_sync_command,
4225
+ log_path=log_path,
4226
+ bucket_name=self.name,
4227
+ access_denied_message=self._ACCESS_DENIED_MESSAGE,
4921
4228
  create_dirs=create_dirs,
4922
- max_concurrent_uploads=_MAX_CONCURRENT_UPLOADS)
4923
- logger.info(
4924
- ux_utils.finishing_message(f'Storage synced: {sync_path}',
4925
- log_path))
4229
+ max_concurrent_uploads=1)
4926
4230
 
4927
- def _transfer_to_nebius(self) -> None:
4928
- assert isinstance(self.source, str), self.source
4929
- if self.source.startswith('gs://'):
4930
- data_transfer.gcs_to_nebius(self.name, self.name)
4931
- elif self.source.startswith('r2://'):
4932
- data_transfer.r2_to_nebius(self.name, self.name)
4933
- elif self.source.startswith('s3://'):
4934
- data_transfer.s3_to_nebius(self.name, self.name)
4231
+ logger.info(
4232
+ ux_utils.finishing_message(f'Storage synced: {sync_path}',
4233
+ log_path))
4935
4234
 
4936
4235
  def _get_bucket(self) -> Tuple[StorageHandle, bool]:
4937
- """Obtains the S3 bucket.
4236
+ """Obtains the OCI bucket.
4237
+ If the bucket exists, this method will connect to the bucket.
4938
4238
 
4939
- If the bucket exists, this method will return the bucket.
4940
4239
  If the bucket does not exist, there are three cases:
4941
- 1) Raise an error if the bucket source starts with s3://
4240
+ 1) Raise an error if the bucket source starts with oci://
4942
4241
  2) Return None if bucket has been externally deleted and
4943
4242
  sync_on_reconstruction is False
4944
4243
  3) Create and return a new bucket otherwise
4945
4244
 
4245
+ Return tuple (Bucket, Boolean): The first item is the bucket
4246
+ json payload from the OCI API call, the second item indicates
4247
+ if this is a new created bucket(True) or an existing bucket(False).
4248
+
4946
4249
  Raises:
4947
- StorageSpecError: If externally created bucket is attempted to be
4948
- mounted without specifying storage source.
4949
4250
  StorageBucketCreateError: If creating the bucket fails
4950
4251
  StorageBucketGetError: If fetching a bucket fails
4951
- StorageExternalDeletionError: If externally deleted storage is
4952
- attempted to be fetched while reconstructing the storage for
4953
- 'sky storage delete' or 'sky start'
4954
4252
  """
4955
- nebius_s = nebius.resource('s3')
4956
- bucket = nebius_s.Bucket(self.name)
4957
4253
  try:
4958
- # Try Public bucket case.
4959
- # This line does not error out if the bucket is an external public
4960
- # bucket or if it is a user's bucket that is publicly
4961
- # accessible.
4962
- self.client.head_bucket(Bucket=self.name)
4963
- self._validate_existing_bucket()
4254
+ get_bucket_response = self.client.get_bucket(
4255
+ namespace_name=self.namespace, bucket_name=self.name)
4256
+ bucket = get_bucket_response.data
4964
4257
  return bucket, False
4965
- except aws.botocore_exceptions().ClientError as e:
4966
- error_code = e.response['Error']['Code']
4967
- # AccessDenied error for buckets that are private and not owned by
4968
- # user.
4969
- if error_code == '403':
4970
- command = (f'aws s3 ls s3://{self.name} '
4971
- f'--profile={nebius.NEBIUS_PROFILE_NAME}')
4258
+ except oci.service_exception() as e:
4259
+ if e.status == 404: # Not Found
4260
+ if isinstance(self.source,
4261
+ str) and self.source.startswith('oci://'):
4262
+ with ux_utils.print_exception_no_traceback():
4263
+ raise exceptions.StorageBucketGetError(
4264
+ 'Attempted to connect to a non-existent bucket: '
4265
+ f'{self.source}') from e
4266
+ else:
4267
+ # If bucket cannot be found (i.e., does not exist), it is
4268
+ # to be created by Sky. However, creation is skipped if
4269
+ # Store object is being reconstructed for deletion.
4270
+ if self.sync_on_reconstruction:
4271
+ bucket = self._create_oci_bucket(self.name)
4272
+ return bucket, True
4273
+ else:
4274
+ return None, False
4275
+ elif e.status == 401: # Unauthorized
4276
+ # AccessDenied error for buckets that are private and not
4277
+ # owned by user.
4278
+ command = (
4279
+ f'oci os object list --namespace-name {self.namespace} '
4280
+ f'--bucket-name {self.name}')
4972
4281
  with ux_utils.print_exception_no_traceback():
4973
4282
  raise exceptions.StorageBucketGetError(
4974
4283
  _BUCKET_FAIL_TO_CONNECT_MESSAGE.format(name=self.name) +
4975
4284
  f' To debug, consider running `{command}`.') from e
4285
+ else:
4286
+ # Unknown / unexpected error happened. This might happen when
4287
+ # Object storage service itself functions not normal (e.g.
4288
+ # maintainance event causes internal server error or request
4289
+ # timeout, etc).
4290
+ with ux_utils.print_exception_no_traceback():
4291
+ raise exceptions.StorageBucketGetError(
4292
+ f'Failed to connect to OCI bucket {self.name}') from e
4976
4293
 
4977
- if isinstance(self.source, str) and self.source.startswith('nebius://'):
4978
- with ux_utils.print_exception_no_traceback():
4979
- raise exceptions.StorageBucketGetError(
4980
- 'Attempted to use a non-existent bucket as a source: '
4981
- f'{self.source}. Consider using `aws s3 ls '
4982
- f's3://{self.name} '
4983
- f'--profile={nebius.NEBIUS_PROFILE_NAME}` to debug.')
4294
+ def mount_command(self, mount_path: str) -> str:
4295
+ """Returns the command to mount the bucket to the mount_path.
4984
4296
 
4985
- # If bucket cannot be found in both private and public settings,
4986
- # the bucket is to be created by Sky. However, creation is skipped if
4987
- # Store object is being reconstructed for deletion or re-mount with
4988
- # sky start, and error is raised instead.
4989
- if self.sync_on_reconstruction:
4990
- bucket = self._create_nebius_bucket(self.name)
4991
- return bucket, True
4992
- else:
4993
- # Raised when Storage object is reconstructed for sky storage
4994
- # delete or to re-mount Storages with sky start but the storage
4995
- # is already removed externally.
4996
- raise exceptions.StorageExternalDeletionError(
4997
- 'Attempted to fetch a non-existent bucket: '
4998
- f'{self.name}')
4297
+ Uses Rclone to mount the bucket.
4298
+
4299
+ Args:
4300
+ mount_path: str; Path to mount the bucket to.
4301
+ """
4302
+ install_cmd = mounting_utils.get_rclone_install_cmd()
4303
+ mount_cmd = mounting_utils.get_oci_mount_cmd(
4304
+ mount_path=mount_path,
4305
+ store_name=self.name,
4306
+ region=str(self.region),
4307
+ namespace=self.namespace,
4308
+ compartment=self.bucket.compartment_id,
4309
+ config_file=self.oci_config_file,
4310
+ config_profile=self.config_profile)
4311
+ version_check_cmd = mounting_utils.get_rclone_version_check_cmd()
4312
+
4313
+ return mounting_utils.get_mounting_command(mount_path, install_cmd,
4314
+ mount_cmd, version_check_cmd)
4999
4315
 
5000
4316
  def _download_file(self, remote_path: str, local_path: str) -> None:
5001
- """Downloads file from remote to local on s3 bucket
5002
- using the boto3 API
4317
+ """Downloads file from remote to local on OCI bucket
5003
4318
 
5004
4319
  Args:
5005
- remote_path: str; Remote path on S3 bucket
4320
+ remote_path: str; Remote path on OCI bucket
5006
4321
  local_path: str; Local path on user's device
5007
4322
  """
5008
- self.bucket.download_file(remote_path, local_path)
4323
+ if remote_path.startswith(f'/{self.name}'):
4324
+ # If the remote path is /bucket_name, we need to
4325
+ # remove the leading /
4326
+ remote_path = remote_path.lstrip('/')
5009
4327
 
5010
- def mount_command(self, mount_path: str) -> str:
5011
- """Returns the command to mount the bucket to the mount_path.
4328
+ filename = os.path.basename(remote_path)
4329
+ if not local_path.endswith(filename):
4330
+ local_path = os.path.join(local_path, filename)
4331
+
4332
+ @oci.with_oci_env
4333
+ def get_file_download_command(remote_path, local_path):
4334
+ download_command = (f'oci os object get --bucket-name {self.name} '
4335
+ f'--namespace-name {self.namespace} '
4336
+ f'--region {self.region} --name {remote_path} '
4337
+ f'--file {local_path}')
5012
4338
 
5013
- Uses goofys to mount the bucket.
4339
+ return download_command
5014
4340
 
5015
- Args:
5016
- mount_path: str; Path to mount the bucket to.
5017
- """
5018
- install_cmd = mounting_utils.get_s3_mount_install_cmd()
5019
- nebius_profile_name = nebius.NEBIUS_PROFILE_NAME
5020
- endpoint_url = self.client.meta.endpoint_url
5021
- mount_cmd = mounting_utils.get_nebius_mount_cmd(nebius_profile_name,
5022
- self.bucket.name,
5023
- endpoint_url,
5024
- mount_path,
5025
- self._bucket_sub_path)
5026
- return mounting_utils.get_mounting_command(mount_path, install_cmd,
5027
- mount_cmd)
4341
+ download_command = get_file_download_command(remote_path, local_path)
4342
+
4343
+ try:
4344
+ with rich_utils.safe_status(
4345
+ f'[bold cyan]Downloading: {remote_path} -> {local_path}[/]'
4346
+ ):
4347
+ subprocess.check_output(download_command,
4348
+ stderr=subprocess.STDOUT,
4349
+ shell=True)
4350
+ except subprocess.CalledProcessError as e:
4351
+ logger.error(f'Download failed: {remote_path} -> {local_path}.\n'
4352
+ f'Detail errors: {e.output}')
4353
+ with ux_utils.print_exception_no_traceback():
4354
+ raise exceptions.StorageBucketDeleteError(
4355
+ f'Failed download file {self.name}:{remote_path}.') from e
5028
4356
 
5029
- def _create_nebius_bucket(self, bucket_name: str) -> StorageHandle:
5030
- """Creates S3 bucket with specific name
4357
+ def _create_oci_bucket(self, bucket_name: str) -> StorageHandle:
4358
+ """Creates OCI bucket with specific name in specific region
5031
4359
 
5032
4360
  Args:
5033
4361
  bucket_name: str; Name of bucket
5034
- Raises:
5035
- StorageBucketCreateError: If bucket creation fails.
4362
+ region: str; Region name, e.g. us-central1, us-west1
5036
4363
  """
5037
- nebius_client = self.client
4364
+ logger.debug(f'_create_oci_bucket: {bucket_name}')
5038
4365
  try:
5039
- nebius_client.create_bucket(Bucket=bucket_name)
5040
- except aws.botocore_exceptions().ClientError as e:
4366
+ create_bucket_response = self.client.create_bucket(
4367
+ namespace_name=self.namespace,
4368
+ create_bucket_details=oci.oci.object_storage.models.
4369
+ CreateBucketDetails(
4370
+ name=bucket_name,
4371
+ compartment_id=self.compartment,
4372
+ ))
4373
+ bucket = create_bucket_response.data
4374
+ return bucket
4375
+ except oci.service_exception() as e:
5041
4376
  with ux_utils.print_exception_no_traceback():
5042
4377
  raise exceptions.StorageBucketCreateError(
5043
- f'Attempted to create a bucket '
5044
- f'{self.name} but failed.') from e
5045
- return nebius.resource('s3').Bucket(bucket_name)
4378
+ f'Failed to create OCI bucket: {self.name}') from e
4379
+
4380
+ def _delete_oci_bucket(self, bucket_name: str) -> bool:
4381
+ """Deletes OCI bucket, including all objects in bucket
4382
+
4383
+ Args:
4384
+ bucket_name: str; Name of bucket
4385
+
4386
+ Returns:
4387
+ bool; True if bucket was deleted, False if it was deleted externally.
4388
+ """
4389
+ logger.debug(f'_delete_oci_bucket: {bucket_name}')
4390
+
4391
+ @oci.with_oci_env
4392
+ def get_bucket_delete_command(bucket_name):
4393
+ remove_command = (f'oci os bucket delete --bucket-name '
4394
+ f'--region {self.region} '
4395
+ f'{bucket_name} --empty --force')
4396
+
4397
+ return remove_command
4398
+
4399
+ remove_command = get_bucket_delete_command(bucket_name)
5046
4400
 
5047
- def _execute_nebius_remove_command(self, command: str, bucket_name: str,
5048
- hint_operating: str,
5049
- hint_failed: str) -> bool:
5050
4401
  try:
5051
4402
  with rich_utils.safe_status(
5052
- ux_utils.spinner_message(hint_operating)):
5053
- subprocess.check_output(command.split(' '),
4403
+ f'[bold cyan]Deleting OCI bucket {bucket_name}[/]'):
4404
+ subprocess.check_output(remove_command.split(' '),
5054
4405
  stderr=subprocess.STDOUT)
5055
4406
  except subprocess.CalledProcessError as e:
5056
- if 'NoSuchBucket' in e.output.decode('utf-8'):
4407
+ if 'BucketNotFound' in e.output.decode('utf-8'):
5057
4408
  logger.debug(
5058
4409
  _BUCKET_EXTERNALLY_DELETED_DEBUG_MESSAGE.format(
5059
4410
  bucket_name=bucket_name))
5060
4411
  return False
5061
4412
  else:
4413
+ logger.error(e.output)
5062
4414
  with ux_utils.print_exception_no_traceback():
5063
4415
  raise exceptions.StorageBucketDeleteError(
5064
- f'{hint_failed}'
5065
- f'Detailed error: {e.output}')
4416
+ f'Failed to delete OCI bucket {bucket_name}.')
5066
4417
  return True
5067
4418
 
5068
- def _delete_nebius_bucket(self, bucket_name: str) -> bool:
5069
- """Deletes S3 bucket, including all objects in bucket
5070
4419
 
5071
- Args:
5072
- bucket_name: str; Name of bucket
4420
+ @register_s3_compatible_store
4421
+ class S3Store(S3CompatibleStore):
4422
+ """S3Store inherits from S3CompatibleStore and represents the backend
4423
+ for S3 buckets.
4424
+ """
5073
4425
 
5074
- Returns:
5075
- bool; True if bucket was deleted, False if it was deleted externally.
4426
+ _DEFAULT_REGION = 'us-east-1'
4427
+ _CUSTOM_ENDPOINT_REGIONS = [
4428
+ 'ap-east-1', 'me-south-1', 'af-south-1', 'eu-south-1', 'eu-south-2',
4429
+ 'ap-south-2', 'ap-southeast-3', 'ap-southeast-4', 'me-central-1',
4430
+ 'il-central-1'
4431
+ ]
5076
4432
 
5077
- Raises:
5078
- StorageBucketDeleteError: If deleting the bucket fails.
5079
- """
5080
- # Deleting objects is very slow programatically
5081
- # (i.e. bucket.objects.all().delete() is slow).
5082
- # In addition, standard delete operations (i.e. via `aws s3 rm`)
5083
- # are slow, since AWS puts deletion markers.
5084
- # https://stackoverflow.com/questions/49239351/why-is-it-so-much-slower-to-delete-objects-in-aws-s3-than-it-is-to-create-them
5085
- # The fastest way to delete is to run `aws s3 rb --force`,
5086
- # which removes the bucket by force.
5087
- remove_command = (f'aws s3 rb s3://{bucket_name} --force '
5088
- f'--profile={nebius.NEBIUS_PROFILE_NAME}')
5089
-
5090
- success = self._execute_nebius_remove_command(
5091
- remove_command, bucket_name,
5092
- f'Deleting Nebius bucket {bucket_name}',
5093
- f'Failed to delete Nebius bucket {bucket_name}.')
5094
- if not success:
5095
- return False
4433
+ def __init__(self,
4434
+ name: str,
4435
+ source: str,
4436
+ region: Optional[str] = None,
4437
+ is_sky_managed: Optional[bool] = None,
4438
+ sync_on_reconstruction: bool = True,
4439
+ _bucket_sub_path: Optional[str] = None):
4440
+ # TODO(romilb): This is purely a stopgap fix for
4441
+ # https://github.com/skypilot-org/skypilot/issues/3405
4442
+ # We should eventually make all opt-in regions also work for S3 by
4443
+ # passing the right endpoint flags.
4444
+ if region in self._CUSTOM_ENDPOINT_REGIONS:
4445
+ logger.warning('AWS opt-in regions are not supported for S3. '
4446
+ f'Falling back to default region '
4447
+ f'{self._DEFAULT_REGION} for bucket {name!r}.')
4448
+ region = self._DEFAULT_REGION
4449
+ super().__init__(name, source, region, is_sky_managed,
4450
+ sync_on_reconstruction, _bucket_sub_path)
5096
4451
 
5097
- # Wait until bucket deletion propagates on Nebius servers
5098
- start_time = time.time()
5099
- while data_utils.verify_nebius_bucket(bucket_name):
5100
- if time.time() - start_time > self._TIMEOUT_TO_PROPAGATES:
5101
- raise TimeoutError(
5102
- f'Timeout while verifying {bucket_name} Nebius bucket.')
5103
- time.sleep(0.1)
5104
- return True
4452
+ @classmethod
4453
+ def get_config(cls) -> S3CompatibleConfig:
4454
+ """Return the configuration for AWS S3."""
4455
+ return S3CompatibleConfig(
4456
+ store_type='S3',
4457
+ url_prefix='s3://',
4458
+ client_factory=data_utils.create_s3_client,
4459
+ resource_factory=lambda name: aws.resource('s3').Bucket(name),
4460
+ split_path=data_utils.split_s3_path,
4461
+ verify_bucket=data_utils.verify_s3_bucket,
4462
+ cloud_name=str(clouds.AWS()),
4463
+ default_region=cls._DEFAULT_REGION,
4464
+ mount_cmd_factory=mounting_utils.get_s3_mount_cmd,
4465
+ )
4466
+
4467
+ def mount_cached_command(self, mount_path: str) -> str:
4468
+ install_cmd = mounting_utils.get_rclone_install_cmd()
4469
+ rclone_profile_name = (
4470
+ data_utils.Rclone.RcloneStores.S3.get_profile_name(self.name))
4471
+ rclone_config = data_utils.Rclone.RcloneStores.S3.get_config(
4472
+ rclone_profile_name=rclone_profile_name)
4473
+ mount_cached_cmd = mounting_utils.get_mount_cached_cmd(
4474
+ rclone_config, rclone_profile_name, self.bucket.name, mount_path)
4475
+ return mounting_utils.get_mounting_command(mount_path, install_cmd,
4476
+ mount_cached_cmd)
4477
+
4478
+
4479
+ @register_s3_compatible_store
4480
+ class R2Store(S3CompatibleStore):
4481
+ """R2Store inherits from S3CompatibleStore and represents the backend
4482
+ for R2 buckets.
4483
+ """
4484
+
4485
+ def __init__(self,
4486
+ name: str,
4487
+ source: str,
4488
+ region: Optional[str] = 'auto',
4489
+ is_sky_managed: Optional[bool] = None,
4490
+ sync_on_reconstruction: bool = True,
4491
+ _bucket_sub_path: Optional[str] = None):
4492
+ super().__init__(name, source, region, is_sky_managed,
4493
+ sync_on_reconstruction, _bucket_sub_path)
4494
+
4495
+ @classmethod
4496
+ def get_config(cls) -> S3CompatibleConfig:
4497
+ """Return the configuration for Cloudflare R2."""
4498
+ return S3CompatibleConfig(
4499
+ store_type='R2',
4500
+ url_prefix='r2://',
4501
+ client_factory=lambda region: data_utils.create_r2_client(region or
4502
+ 'auto'),
4503
+ resource_factory=lambda name: cloudflare.resource('s3').Bucket(name
4504
+ ),
4505
+ split_path=data_utils.split_r2_path,
4506
+ verify_bucket=data_utils.verify_r2_bucket,
4507
+ credentials_file=cloudflare.R2_CREDENTIALS_PATH,
4508
+ aws_profile=cloudflare.R2_PROFILE_NAME,
4509
+ get_endpoint_url=lambda: cloudflare.create_endpoint(), # pylint: disable=unnecessary-lambda
4510
+ extra_cli_args=['--checksum-algorithm', 'CRC32'], # R2 specific
4511
+ cloud_name=cloudflare.NAME,
4512
+ default_region='auto',
4513
+ mount_cmd_factory=mounting_utils.get_r2_mount_cmd,
4514
+ )
4515
+
4516
+ def mount_cached_command(self, mount_path: str) -> str:
4517
+ """R2-specific cached mount implementation using rclone."""
4518
+ install_cmd = mounting_utils.get_rclone_install_cmd()
4519
+ rclone_profile_name = (
4520
+ data_utils.Rclone.RcloneStores.R2.get_profile_name(self.name))
4521
+ rclone_config = data_utils.Rclone.RcloneStores.R2.get_config(
4522
+ rclone_profile_name=rclone_profile_name)
4523
+ mount_cached_cmd = mounting_utils.get_mount_cached_cmd(
4524
+ rclone_config, rclone_profile_name, self.bucket.name, mount_path)
4525
+ return mounting_utils.get_mounting_command(mount_path, install_cmd,
4526
+ mount_cached_cmd)
4527
+
4528
+
4529
+ @register_s3_compatible_store
4530
+ class NebiusStore(S3CompatibleStore):
4531
+ """NebiusStore inherits from S3CompatibleStore and represents the backend
4532
+ for Nebius Object Storage buckets.
4533
+ """
5105
4534
 
5106
- def _delete_nebius_bucket_sub_path(self, bucket_name: str,
5107
- sub_path: str) -> bool:
5108
- """Deletes the sub path from the bucket."""
5109
- remove_command = (
5110
- f'aws s3 rm s3://{bucket_name}/{sub_path}/ --recursive '
5111
- f'--profile={nebius.NEBIUS_PROFILE_NAME}')
5112
- return self._execute_nebius_remove_command(
5113
- remove_command, bucket_name, f'Removing objects from '
5114
- f'Nebius bucket {bucket_name}/{sub_path}',
5115
- f'Failed to remove objects from '
5116
- f'Nebius bucket {bucket_name}/{sub_path}.')
4535
+ @classmethod
4536
+ def get_config(cls) -> S3CompatibleConfig:
4537
+ """Return the configuration for Nebius Object Storage."""
4538
+ return S3CompatibleConfig(
4539
+ store_type='NEBIUS',
4540
+ url_prefix='nebius://',
4541
+ client_factory=lambda region: data_utils.create_nebius_client(),
4542
+ resource_factory=lambda name: nebius.resource('s3').Bucket(name),
4543
+ split_path=data_utils.split_nebius_path,
4544
+ verify_bucket=data_utils.verify_nebius_bucket,
4545
+ aws_profile=nebius.NEBIUS_PROFILE_NAME,
4546
+ cloud_name=str(clouds.Nebius()),
4547
+ mount_cmd_factory=cls._get_nebius_mount_cmd,
4548
+ )
4549
+
4550
+ @classmethod
4551
+ def _get_nebius_mount_cmd(cls, bucket_name: str, mount_path: str,
4552
+ bucket_sub_path: Optional[str]) -> str:
4553
+ """Factory method for Nebius mount command."""
4554
+ # We need to get the endpoint URL, but since this is a static method,
4555
+ # we'll need to create a client to get it
4556
+ client = data_utils.create_nebius_client()
4557
+ endpoint_url = client.meta.endpoint_url
4558
+ return mounting_utils.get_nebius_mount_cmd(nebius.NEBIUS_PROFILE_NAME,
4559
+ bucket_name, endpoint_url,
4560
+ mount_path, bucket_sub_path)