skypilot-nightly 1.0.0.dev20250502__py3-none-any.whl → 1.0.0.dev20251203__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (546) hide show
  1. sky/__init__.py +22 -6
  2. sky/adaptors/aws.py +81 -16
  3. sky/adaptors/common.py +25 -2
  4. sky/adaptors/coreweave.py +278 -0
  5. sky/adaptors/do.py +8 -2
  6. sky/adaptors/gcp.py +11 -0
  7. sky/adaptors/hyperbolic.py +8 -0
  8. sky/adaptors/ibm.py +5 -2
  9. sky/adaptors/kubernetes.py +149 -18
  10. sky/adaptors/nebius.py +173 -30
  11. sky/adaptors/primeintellect.py +1 -0
  12. sky/adaptors/runpod.py +68 -0
  13. sky/adaptors/seeweb.py +183 -0
  14. sky/adaptors/shadeform.py +89 -0
  15. sky/admin_policy.py +187 -4
  16. sky/authentication.py +179 -225
  17. sky/backends/__init__.py +4 -2
  18. sky/backends/backend.py +22 -9
  19. sky/backends/backend_utils.py +1323 -397
  20. sky/backends/cloud_vm_ray_backend.py +1749 -1029
  21. sky/backends/docker_utils.py +1 -1
  22. sky/backends/local_docker_backend.py +11 -6
  23. sky/backends/task_codegen.py +633 -0
  24. sky/backends/wheel_utils.py +55 -9
  25. sky/{clouds/service_catalog → catalog}/__init__.py +21 -19
  26. sky/{clouds/service_catalog → catalog}/aws_catalog.py +27 -8
  27. sky/{clouds/service_catalog → catalog}/azure_catalog.py +10 -7
  28. sky/{clouds/service_catalog → catalog}/common.py +90 -49
  29. sky/{clouds/service_catalog → catalog}/cudo_catalog.py +8 -5
  30. sky/{clouds/service_catalog → catalog}/data_fetchers/analyze.py +1 -1
  31. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_aws.py +116 -80
  32. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_cudo.py +38 -38
  33. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_gcp.py +70 -16
  34. sky/catalog/data_fetchers/fetch_hyperbolic.py +136 -0
  35. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_lambda_cloud.py +1 -0
  36. sky/catalog/data_fetchers/fetch_nebius.py +338 -0
  37. sky/catalog/data_fetchers/fetch_runpod.py +698 -0
  38. sky/catalog/data_fetchers/fetch_seeweb.py +329 -0
  39. sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
  40. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vast.py +1 -1
  41. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vsphere.py +1 -1
  42. sky/{clouds/service_catalog → catalog}/do_catalog.py +5 -2
  43. sky/{clouds/service_catalog → catalog}/fluidstack_catalog.py +6 -3
  44. sky/{clouds/service_catalog → catalog}/gcp_catalog.py +41 -15
  45. sky/catalog/hyperbolic_catalog.py +136 -0
  46. sky/{clouds/service_catalog → catalog}/ibm_catalog.py +9 -6
  47. sky/{clouds/service_catalog → catalog}/kubernetes_catalog.py +36 -24
  48. sky/{clouds/service_catalog → catalog}/lambda_catalog.py +9 -6
  49. sky/{clouds/service_catalog → catalog}/nebius_catalog.py +9 -7
  50. sky/{clouds/service_catalog → catalog}/oci_catalog.py +9 -6
  51. sky/{clouds/service_catalog → catalog}/paperspace_catalog.py +5 -2
  52. sky/catalog/primeintellect_catalog.py +95 -0
  53. sky/{clouds/service_catalog → catalog}/runpod_catalog.py +11 -4
  54. sky/{clouds/service_catalog → catalog}/scp_catalog.py +9 -6
  55. sky/catalog/seeweb_catalog.py +184 -0
  56. sky/catalog/shadeform_catalog.py +165 -0
  57. sky/catalog/ssh_catalog.py +167 -0
  58. sky/{clouds/service_catalog → catalog}/vast_catalog.py +6 -3
  59. sky/{clouds/service_catalog → catalog}/vsphere_catalog.py +5 -2
  60. sky/check.py +533 -185
  61. sky/cli.py +5 -5975
  62. sky/client/{cli.py → cli/command.py} +2591 -1956
  63. sky/client/cli/deprecation_utils.py +99 -0
  64. sky/client/cli/flags.py +359 -0
  65. sky/client/cli/table_utils.py +322 -0
  66. sky/client/cli/utils.py +79 -0
  67. sky/client/common.py +78 -32
  68. sky/client/oauth.py +82 -0
  69. sky/client/sdk.py +1219 -319
  70. sky/client/sdk_async.py +827 -0
  71. sky/client/service_account_auth.py +47 -0
  72. sky/cloud_stores.py +82 -3
  73. sky/clouds/__init__.py +13 -0
  74. sky/clouds/aws.py +564 -164
  75. sky/clouds/azure.py +105 -83
  76. sky/clouds/cloud.py +140 -40
  77. sky/clouds/cudo.py +68 -50
  78. sky/clouds/do.py +66 -48
  79. sky/clouds/fluidstack.py +63 -44
  80. sky/clouds/gcp.py +339 -110
  81. sky/clouds/hyperbolic.py +293 -0
  82. sky/clouds/ibm.py +70 -49
  83. sky/clouds/kubernetes.py +570 -162
  84. sky/clouds/lambda_cloud.py +74 -54
  85. sky/clouds/nebius.py +210 -81
  86. sky/clouds/oci.py +88 -66
  87. sky/clouds/paperspace.py +61 -44
  88. sky/clouds/primeintellect.py +317 -0
  89. sky/clouds/runpod.py +164 -74
  90. sky/clouds/scp.py +89 -86
  91. sky/clouds/seeweb.py +477 -0
  92. sky/clouds/shadeform.py +400 -0
  93. sky/clouds/ssh.py +263 -0
  94. sky/clouds/utils/aws_utils.py +10 -4
  95. sky/clouds/utils/gcp_utils.py +87 -11
  96. sky/clouds/utils/oci_utils.py +38 -14
  97. sky/clouds/utils/scp_utils.py +231 -167
  98. sky/clouds/vast.py +99 -77
  99. sky/clouds/vsphere.py +51 -40
  100. sky/core.py +375 -173
  101. sky/dag.py +15 -0
  102. sky/dashboard/out/404.html +1 -1
  103. sky/dashboard/out/_next/static/96_E2yl3QAiIJGOYCkSpB/_buildManifest.js +1 -0
  104. sky/dashboard/out/_next/static/chunks/1141-e6aa9ab418717c59.js +11 -0
  105. sky/dashboard/out/_next/static/chunks/1272-1ef0bf0237faccdb.js +1 -0
  106. sky/dashboard/out/_next/static/chunks/1871-7e202677c42f43fe.js +6 -0
  107. sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +1 -0
  108. sky/dashboard/out/_next/static/chunks/2350.fab69e61bac57b23.js +1 -0
  109. sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +15 -0
  110. sky/dashboard/out/_next/static/chunks/2755.edd818326d489a1d.js +26 -0
  111. sky/dashboard/out/_next/static/chunks/3294.20a8540fe697d5ee.js +1 -0
  112. sky/dashboard/out/_next/static/chunks/3785.7e245f318f9d1121.js +1 -0
  113. sky/dashboard/out/_next/static/chunks/3800-7b45f9fbb6308557.js +1 -0
  114. sky/dashboard/out/_next/static/chunks/3850-ff4a9a69d978632b.js +1 -0
  115. sky/dashboard/out/_next/static/chunks/3937.210053269f121201.js +1 -0
  116. sky/dashboard/out/_next/static/chunks/4725.172ede95d1b21022.js +1 -0
  117. sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +15 -0
  118. sky/dashboard/out/_next/static/chunks/5739-d67458fcb1386c92.js +8 -0
  119. sky/dashboard/out/_next/static/chunks/6130-2be46d70a38f1e82.js +1 -0
  120. sky/dashboard/out/_next/static/chunks/616-3d59f75e2ccf9321.js +39 -0
  121. sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +13 -0
  122. sky/dashboard/out/_next/static/chunks/6856-8f27d1c10c98def8.js +1 -0
  123. sky/dashboard/out/_next/static/chunks/6989-01359c57e018caa4.js +1 -0
  124. sky/dashboard/out/_next/static/chunks/6990-9146207c4567fdfd.js +1 -0
  125. sky/dashboard/out/_next/static/chunks/7359-c8d04e06886000b3.js +30 -0
  126. sky/dashboard/out/_next/static/chunks/7411-b15471acd2cba716.js +41 -0
  127. sky/dashboard/out/_next/static/chunks/7615-019513abc55b3b47.js +1 -0
  128. sky/dashboard/out/_next/static/chunks/8640.5b9475a2d18c5416.js +16 -0
  129. sky/dashboard/out/_next/static/chunks/8969-452f9d5cbdd2dc73.js +1 -0
  130. sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +6 -0
  131. sky/dashboard/out/_next/static/chunks/9353-cff34f7e773b2e2b.js +1 -0
  132. sky/dashboard/out/_next/static/chunks/9360.a536cf6b1fa42355.js +31 -0
  133. sky/dashboard/out/_next/static/chunks/9847.3aaca6bb33455140.js +30 -0
  134. sky/dashboard/out/_next/static/chunks/fd9d1056-86323a29a8f7e46a.js +1 -0
  135. sky/dashboard/out/_next/static/chunks/framework-cf60a09ccd051a10.js +33 -0
  136. sky/dashboard/out/_next/static/chunks/main-app-587214043926b3cc.js +1 -0
  137. sky/dashboard/out/_next/static/chunks/main-f15ccb73239a3bf1.js +1 -0
  138. sky/dashboard/out/_next/static/chunks/pages/_app-bde01e4a2beec258.js +34 -0
  139. sky/dashboard/out/_next/static/chunks/pages/_error-c66a4e8afc46f17b.js +1 -0
  140. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-792db96d918c98c9.js +16 -0
  141. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-abfcac9c137aa543.js +1 -0
  142. sky/dashboard/out/_next/static/chunks/pages/clusters-ee39056f9851a3ff.js +1 -0
  143. sky/dashboard/out/_next/static/chunks/pages/config-dfb9bf07b13045f4.js +1 -0
  144. sky/dashboard/out/_next/static/chunks/pages/index-444f1804401f04ea.js +1 -0
  145. sky/dashboard/out/_next/static/chunks/pages/infra/[context]-c0b5935149902e6f.js +1 -0
  146. sky/dashboard/out/_next/static/chunks/pages/infra-aed0ea19df7cf961.js +1 -0
  147. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-d66997e2bfc837cf.js +16 -0
  148. sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-9faf940b253e3e06.js +21 -0
  149. sky/dashboard/out/_next/static/chunks/pages/jobs-2072b48b617989c9.js +1 -0
  150. sky/dashboard/out/_next/static/chunks/pages/users-f42674164aa73423.js +1 -0
  151. sky/dashboard/out/_next/static/chunks/pages/volumes-b84b948ff357c43e.js +1 -0
  152. sky/dashboard/out/_next/static/chunks/pages/workspace/new-3f88a1c7e86a3f86.js +1 -0
  153. sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-84a40f8c7c627fe4.js +1 -0
  154. sky/dashboard/out/_next/static/chunks/pages/workspaces-531b2f8c4bf89f82.js +1 -0
  155. sky/dashboard/out/_next/static/chunks/webpack-64e05f17bf2cf8ce.js +1 -0
  156. sky/dashboard/out/_next/static/css/0748ce22df867032.css +3 -0
  157. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  158. sky/dashboard/out/clusters/[cluster].html +1 -1
  159. sky/dashboard/out/clusters.html +1 -1
  160. sky/dashboard/out/config.html +1 -0
  161. sky/dashboard/out/index.html +1 -1
  162. sky/dashboard/out/infra/[context].html +1 -0
  163. sky/dashboard/out/infra.html +1 -0
  164. sky/dashboard/out/jobs/[job].html +1 -1
  165. sky/dashboard/out/jobs/pools/[pool].html +1 -0
  166. sky/dashboard/out/jobs.html +1 -1
  167. sky/dashboard/out/users.html +1 -0
  168. sky/dashboard/out/volumes.html +1 -0
  169. sky/dashboard/out/workspace/new.html +1 -0
  170. sky/dashboard/out/workspaces/[name].html +1 -0
  171. sky/dashboard/out/workspaces.html +1 -0
  172. sky/data/data_utils.py +137 -1
  173. sky/data/mounting_utils.py +269 -84
  174. sky/data/storage.py +1460 -1807
  175. sky/data/storage_utils.py +43 -57
  176. sky/exceptions.py +126 -2
  177. sky/execution.py +216 -63
  178. sky/global_user_state.py +2390 -586
  179. sky/jobs/__init__.py +7 -0
  180. sky/jobs/client/sdk.py +300 -58
  181. sky/jobs/client/sdk_async.py +161 -0
  182. sky/jobs/constants.py +15 -8
  183. sky/jobs/controller.py +848 -275
  184. sky/jobs/file_content_utils.py +128 -0
  185. sky/jobs/log_gc.py +193 -0
  186. sky/jobs/recovery_strategy.py +402 -152
  187. sky/jobs/scheduler.py +314 -189
  188. sky/jobs/server/core.py +836 -255
  189. sky/jobs/server/server.py +156 -115
  190. sky/jobs/server/utils.py +136 -0
  191. sky/jobs/state.py +2109 -706
  192. sky/jobs/utils.py +1306 -215
  193. sky/logs/__init__.py +21 -0
  194. sky/logs/agent.py +108 -0
  195. sky/logs/aws.py +243 -0
  196. sky/logs/gcp.py +91 -0
  197. sky/metrics/__init__.py +0 -0
  198. sky/metrics/utils.py +453 -0
  199. sky/models.py +78 -1
  200. sky/optimizer.py +164 -70
  201. sky/provision/__init__.py +90 -4
  202. sky/provision/aws/config.py +147 -26
  203. sky/provision/aws/instance.py +136 -50
  204. sky/provision/azure/instance.py +11 -6
  205. sky/provision/common.py +13 -1
  206. sky/provision/cudo/cudo_machine_type.py +1 -1
  207. sky/provision/cudo/cudo_utils.py +14 -8
  208. sky/provision/cudo/cudo_wrapper.py +72 -71
  209. sky/provision/cudo/instance.py +10 -6
  210. sky/provision/do/instance.py +10 -6
  211. sky/provision/do/utils.py +4 -3
  212. sky/provision/docker_utils.py +140 -33
  213. sky/provision/fluidstack/instance.py +13 -8
  214. sky/provision/gcp/__init__.py +1 -0
  215. sky/provision/gcp/config.py +301 -19
  216. sky/provision/gcp/constants.py +218 -0
  217. sky/provision/gcp/instance.py +36 -8
  218. sky/provision/gcp/instance_utils.py +18 -4
  219. sky/provision/gcp/volume_utils.py +247 -0
  220. sky/provision/hyperbolic/__init__.py +12 -0
  221. sky/provision/hyperbolic/config.py +10 -0
  222. sky/provision/hyperbolic/instance.py +437 -0
  223. sky/provision/hyperbolic/utils.py +373 -0
  224. sky/provision/instance_setup.py +101 -20
  225. sky/provision/kubernetes/__init__.py +5 -0
  226. sky/provision/kubernetes/config.py +9 -52
  227. sky/provision/kubernetes/constants.py +17 -0
  228. sky/provision/kubernetes/instance.py +919 -280
  229. sky/provision/kubernetes/manifests/fusermount-server-daemonset.yaml +1 -2
  230. sky/provision/kubernetes/network.py +27 -17
  231. sky/provision/kubernetes/network_utils.py +44 -43
  232. sky/provision/kubernetes/utils.py +1221 -534
  233. sky/provision/kubernetes/volume.py +343 -0
  234. sky/provision/lambda_cloud/instance.py +22 -16
  235. sky/provision/nebius/constants.py +50 -0
  236. sky/provision/nebius/instance.py +19 -6
  237. sky/provision/nebius/utils.py +237 -137
  238. sky/provision/oci/instance.py +10 -5
  239. sky/provision/paperspace/instance.py +10 -7
  240. sky/provision/paperspace/utils.py +1 -1
  241. sky/provision/primeintellect/__init__.py +10 -0
  242. sky/provision/primeintellect/config.py +11 -0
  243. sky/provision/primeintellect/instance.py +454 -0
  244. sky/provision/primeintellect/utils.py +398 -0
  245. sky/provision/provisioner.py +117 -36
  246. sky/provision/runpod/__init__.py +5 -0
  247. sky/provision/runpod/instance.py +27 -6
  248. sky/provision/runpod/utils.py +51 -18
  249. sky/provision/runpod/volume.py +214 -0
  250. sky/provision/scp/__init__.py +15 -0
  251. sky/provision/scp/config.py +93 -0
  252. sky/provision/scp/instance.py +707 -0
  253. sky/provision/seeweb/__init__.py +11 -0
  254. sky/provision/seeweb/config.py +13 -0
  255. sky/provision/seeweb/instance.py +812 -0
  256. sky/provision/shadeform/__init__.py +11 -0
  257. sky/provision/shadeform/config.py +12 -0
  258. sky/provision/shadeform/instance.py +351 -0
  259. sky/provision/shadeform/shadeform_utils.py +83 -0
  260. sky/provision/ssh/__init__.py +18 -0
  261. sky/provision/vast/instance.py +13 -8
  262. sky/provision/vast/utils.py +10 -7
  263. sky/provision/volume.py +164 -0
  264. sky/provision/vsphere/common/ssl_helper.py +1 -1
  265. sky/provision/vsphere/common/vapiconnect.py +2 -1
  266. sky/provision/vsphere/common/vim_utils.py +4 -4
  267. sky/provision/vsphere/instance.py +15 -10
  268. sky/provision/vsphere/vsphere_utils.py +17 -20
  269. sky/py.typed +0 -0
  270. sky/resources.py +845 -119
  271. sky/schemas/__init__.py +0 -0
  272. sky/schemas/api/__init__.py +0 -0
  273. sky/schemas/api/responses.py +227 -0
  274. sky/schemas/db/README +4 -0
  275. sky/schemas/db/env.py +90 -0
  276. sky/schemas/db/global_user_state/001_initial_schema.py +124 -0
  277. sky/schemas/db/global_user_state/002_add_workspace_to_cluster_history.py +35 -0
  278. sky/schemas/db/global_user_state/003_fix_initial_revision.py +61 -0
  279. sky/schemas/db/global_user_state/004_is_managed.py +34 -0
  280. sky/schemas/db/global_user_state/005_cluster_event.py +32 -0
  281. sky/schemas/db/global_user_state/006_provision_log.py +41 -0
  282. sky/schemas/db/global_user_state/007_cluster_event_request_id.py +34 -0
  283. sky/schemas/db/global_user_state/008_skylet_ssh_tunnel_metadata.py +34 -0
  284. sky/schemas/db/global_user_state/009_last_activity_and_launched_at.py +89 -0
  285. sky/schemas/db/global_user_state/010_save_ssh_key.py +66 -0
  286. sky/schemas/db/global_user_state/011_is_ephemeral.py +34 -0
  287. sky/schemas/db/kv_cache/001_initial_schema.py +29 -0
  288. sky/schemas/db/script.py.mako +28 -0
  289. sky/schemas/db/serve_state/001_initial_schema.py +67 -0
  290. sky/schemas/db/serve_state/002_yaml_content.py +34 -0
  291. sky/schemas/db/skypilot_config/001_initial_schema.py +30 -0
  292. sky/schemas/db/spot_jobs/001_initial_schema.py +97 -0
  293. sky/schemas/db/spot_jobs/002_cluster_pool.py +42 -0
  294. sky/schemas/db/spot_jobs/003_pool_hash.py +34 -0
  295. sky/schemas/db/spot_jobs/004_job_file_contents.py +42 -0
  296. sky/schemas/db/spot_jobs/005_logs_gc.py +38 -0
  297. sky/schemas/db/spot_jobs/006_controller_pid_started_at.py +34 -0
  298. sky/schemas/db/spot_jobs/007_config_file_content.py +34 -0
  299. sky/schemas/generated/__init__.py +0 -0
  300. sky/schemas/generated/autostopv1_pb2.py +36 -0
  301. sky/schemas/generated/autostopv1_pb2.pyi +43 -0
  302. sky/schemas/generated/autostopv1_pb2_grpc.py +146 -0
  303. sky/schemas/generated/jobsv1_pb2.py +86 -0
  304. sky/schemas/generated/jobsv1_pb2.pyi +254 -0
  305. sky/schemas/generated/jobsv1_pb2_grpc.py +542 -0
  306. sky/schemas/generated/managed_jobsv1_pb2.py +76 -0
  307. sky/schemas/generated/managed_jobsv1_pb2.pyi +278 -0
  308. sky/schemas/generated/managed_jobsv1_pb2_grpc.py +278 -0
  309. sky/schemas/generated/servev1_pb2.py +58 -0
  310. sky/schemas/generated/servev1_pb2.pyi +115 -0
  311. sky/schemas/generated/servev1_pb2_grpc.py +322 -0
  312. sky/serve/autoscalers.py +357 -5
  313. sky/serve/client/impl.py +310 -0
  314. sky/serve/client/sdk.py +47 -139
  315. sky/serve/client/sdk_async.py +130 -0
  316. sky/serve/constants.py +12 -9
  317. sky/serve/controller.py +68 -17
  318. sky/serve/load_balancer.py +106 -60
  319. sky/serve/load_balancing_policies.py +116 -2
  320. sky/serve/replica_managers.py +434 -249
  321. sky/serve/serve_rpc_utils.py +179 -0
  322. sky/serve/serve_state.py +569 -257
  323. sky/serve/serve_utils.py +775 -265
  324. sky/serve/server/core.py +66 -711
  325. sky/serve/server/impl.py +1093 -0
  326. sky/serve/server/server.py +21 -18
  327. sky/serve/service.py +192 -89
  328. sky/serve/service_spec.py +144 -20
  329. sky/serve/spot_placer.py +3 -0
  330. sky/server/auth/__init__.py +0 -0
  331. sky/server/auth/authn.py +50 -0
  332. sky/server/auth/loopback.py +38 -0
  333. sky/server/auth/oauth2_proxy.py +202 -0
  334. sky/server/common.py +478 -182
  335. sky/server/config.py +85 -23
  336. sky/server/constants.py +44 -6
  337. sky/server/daemons.py +295 -0
  338. sky/server/html/token_page.html +185 -0
  339. sky/server/metrics.py +160 -0
  340. sky/server/middleware_utils.py +166 -0
  341. sky/server/requests/executor.py +558 -138
  342. sky/server/requests/payloads.py +364 -24
  343. sky/server/requests/preconditions.py +21 -17
  344. sky/server/requests/process.py +112 -29
  345. sky/server/requests/request_names.py +121 -0
  346. sky/server/requests/requests.py +822 -226
  347. sky/server/requests/serializers/decoders.py +82 -31
  348. sky/server/requests/serializers/encoders.py +140 -22
  349. sky/server/requests/threads.py +117 -0
  350. sky/server/rest.py +455 -0
  351. sky/server/server.py +1309 -285
  352. sky/server/state.py +20 -0
  353. sky/server/stream_utils.py +327 -61
  354. sky/server/uvicorn.py +217 -3
  355. sky/server/versions.py +270 -0
  356. sky/setup_files/MANIFEST.in +11 -1
  357. sky/setup_files/alembic.ini +160 -0
  358. sky/setup_files/dependencies.py +139 -31
  359. sky/setup_files/setup.py +44 -42
  360. sky/sky_logging.py +114 -7
  361. sky/skylet/attempt_skylet.py +106 -24
  362. sky/skylet/autostop_lib.py +129 -8
  363. sky/skylet/configs.py +29 -20
  364. sky/skylet/constants.py +216 -25
  365. sky/skylet/events.py +101 -21
  366. sky/skylet/job_lib.py +345 -164
  367. sky/skylet/log_lib.py +297 -18
  368. sky/skylet/log_lib.pyi +44 -1
  369. sky/skylet/providers/ibm/node_provider.py +12 -8
  370. sky/skylet/providers/ibm/vpc_provider.py +13 -12
  371. sky/skylet/ray_patches/__init__.py +17 -3
  372. sky/skylet/ray_patches/autoscaler.py.diff +18 -0
  373. sky/skylet/ray_patches/cli.py.diff +19 -0
  374. sky/skylet/ray_patches/command_runner.py.diff +17 -0
  375. sky/skylet/ray_patches/log_monitor.py.diff +20 -0
  376. sky/skylet/ray_patches/resource_demand_scheduler.py.diff +32 -0
  377. sky/skylet/ray_patches/updater.py.diff +18 -0
  378. sky/skylet/ray_patches/worker.py.diff +41 -0
  379. sky/skylet/runtime_utils.py +21 -0
  380. sky/skylet/services.py +568 -0
  381. sky/skylet/skylet.py +72 -4
  382. sky/skylet/subprocess_daemon.py +104 -29
  383. sky/skypilot_config.py +506 -99
  384. sky/ssh_node_pools/__init__.py +1 -0
  385. sky/ssh_node_pools/core.py +135 -0
  386. sky/ssh_node_pools/server.py +233 -0
  387. sky/task.py +685 -163
  388. sky/templates/aws-ray.yml.j2 +11 -3
  389. sky/templates/azure-ray.yml.j2 +2 -1
  390. sky/templates/cudo-ray.yml.j2 +1 -0
  391. sky/templates/do-ray.yml.j2 +2 -1
  392. sky/templates/fluidstack-ray.yml.j2 +1 -0
  393. sky/templates/gcp-ray.yml.j2 +62 -1
  394. sky/templates/hyperbolic-ray.yml.j2 +68 -0
  395. sky/templates/ibm-ray.yml.j2 +2 -1
  396. sky/templates/jobs-controller.yaml.j2 +27 -24
  397. sky/templates/kubernetes-loadbalancer.yml.j2 +2 -0
  398. sky/templates/kubernetes-ray.yml.j2 +611 -50
  399. sky/templates/lambda-ray.yml.j2 +2 -1
  400. sky/templates/nebius-ray.yml.j2 +34 -12
  401. sky/templates/oci-ray.yml.j2 +1 -0
  402. sky/templates/paperspace-ray.yml.j2 +2 -1
  403. sky/templates/primeintellect-ray.yml.j2 +72 -0
  404. sky/templates/runpod-ray.yml.j2 +10 -1
  405. sky/templates/scp-ray.yml.j2 +4 -50
  406. sky/templates/seeweb-ray.yml.j2 +171 -0
  407. sky/templates/shadeform-ray.yml.j2 +73 -0
  408. sky/templates/sky-serve-controller.yaml.j2 +22 -2
  409. sky/templates/vast-ray.yml.j2 +1 -0
  410. sky/templates/vsphere-ray.yml.j2 +1 -0
  411. sky/templates/websocket_proxy.py +212 -37
  412. sky/usage/usage_lib.py +31 -15
  413. sky/users/__init__.py +0 -0
  414. sky/users/model.conf +15 -0
  415. sky/users/permission.py +397 -0
  416. sky/users/rbac.py +121 -0
  417. sky/users/server.py +720 -0
  418. sky/users/token_service.py +218 -0
  419. sky/utils/accelerator_registry.py +35 -5
  420. sky/utils/admin_policy_utils.py +84 -38
  421. sky/utils/annotations.py +38 -5
  422. sky/utils/asyncio_utils.py +78 -0
  423. sky/utils/atomic.py +1 -1
  424. sky/utils/auth_utils.py +153 -0
  425. sky/utils/benchmark_utils.py +60 -0
  426. sky/utils/cli_utils/status_utils.py +159 -86
  427. sky/utils/cluster_utils.py +31 -9
  428. sky/utils/command_runner.py +354 -68
  429. sky/utils/command_runner.pyi +93 -3
  430. sky/utils/common.py +35 -8
  431. sky/utils/common_utils.py +314 -91
  432. sky/utils/config_utils.py +74 -5
  433. sky/utils/context.py +403 -0
  434. sky/utils/context_utils.py +242 -0
  435. sky/utils/controller_utils.py +383 -89
  436. sky/utils/dag_utils.py +31 -12
  437. sky/utils/db/__init__.py +0 -0
  438. sky/utils/db/db_utils.py +485 -0
  439. sky/utils/db/kv_cache.py +149 -0
  440. sky/utils/db/migration_utils.py +137 -0
  441. sky/utils/directory_utils.py +12 -0
  442. sky/utils/env_options.py +13 -0
  443. sky/utils/git.py +567 -0
  444. sky/utils/git_clone.sh +460 -0
  445. sky/utils/infra_utils.py +195 -0
  446. sky/utils/kubernetes/cleanup-tunnel.sh +62 -0
  447. sky/utils/kubernetes/config_map_utils.py +133 -0
  448. sky/utils/kubernetes/create_cluster.sh +15 -29
  449. sky/utils/kubernetes/delete_cluster.sh +10 -7
  450. sky/utils/kubernetes/deploy_ssh_node_pools.py +1177 -0
  451. sky/utils/kubernetes/exec_kubeconfig_converter.py +22 -31
  452. sky/utils/kubernetes/generate_kind_config.py +6 -66
  453. sky/utils/kubernetes/generate_kubeconfig.sh +4 -1
  454. sky/utils/kubernetes/gpu_labeler.py +18 -8
  455. sky/utils/kubernetes/k8s_gpu_labeler_job.yaml +2 -1
  456. sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml +16 -16
  457. sky/utils/kubernetes/kubernetes_deploy_utils.py +284 -114
  458. sky/utils/kubernetes/rsync_helper.sh +11 -3
  459. sky/utils/kubernetes/ssh-tunnel.sh +379 -0
  460. sky/utils/kubernetes/ssh_utils.py +221 -0
  461. sky/utils/kubernetes_enums.py +8 -15
  462. sky/utils/lock_events.py +94 -0
  463. sky/utils/locks.py +416 -0
  464. sky/utils/log_utils.py +82 -107
  465. sky/utils/perf_utils.py +22 -0
  466. sky/utils/resource_checker.py +298 -0
  467. sky/utils/resources_utils.py +249 -32
  468. sky/utils/rich_utils.py +217 -39
  469. sky/utils/schemas.py +955 -160
  470. sky/utils/serialize_utils.py +16 -0
  471. sky/utils/status_lib.py +10 -0
  472. sky/utils/subprocess_utils.py +29 -15
  473. sky/utils/tempstore.py +70 -0
  474. sky/utils/thread_utils.py +91 -0
  475. sky/utils/timeline.py +26 -53
  476. sky/utils/ux_utils.py +84 -15
  477. sky/utils/validator.py +11 -1
  478. sky/utils/volume.py +165 -0
  479. sky/utils/yaml_utils.py +111 -0
  480. sky/volumes/__init__.py +13 -0
  481. sky/volumes/client/__init__.py +0 -0
  482. sky/volumes/client/sdk.py +150 -0
  483. sky/volumes/server/__init__.py +0 -0
  484. sky/volumes/server/core.py +270 -0
  485. sky/volumes/server/server.py +124 -0
  486. sky/volumes/volume.py +215 -0
  487. sky/workspaces/__init__.py +0 -0
  488. sky/workspaces/core.py +655 -0
  489. sky/workspaces/server.py +101 -0
  490. sky/workspaces/utils.py +56 -0
  491. sky_templates/README.md +3 -0
  492. sky_templates/__init__.py +3 -0
  493. sky_templates/ray/__init__.py +0 -0
  494. sky_templates/ray/start_cluster +183 -0
  495. sky_templates/ray/stop_cluster +75 -0
  496. skypilot_nightly-1.0.0.dev20251203.dist-info/METADATA +676 -0
  497. skypilot_nightly-1.0.0.dev20251203.dist-info/RECORD +611 -0
  498. {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/WHEEL +1 -1
  499. skypilot_nightly-1.0.0.dev20251203.dist-info/top_level.txt +2 -0
  500. sky/benchmark/benchmark_state.py +0 -256
  501. sky/benchmark/benchmark_utils.py +0 -641
  502. sky/clouds/service_catalog/constants.py +0 -7
  503. sky/dashboard/out/_next/static/GWvVBSCS7FmUiVmjaL1a7/_buildManifest.js +0 -1
  504. sky/dashboard/out/_next/static/chunks/236-2db3ee3fba33dd9e.js +0 -6
  505. sky/dashboard/out/_next/static/chunks/312-c3c8845990db8ffc.js +0 -15
  506. sky/dashboard/out/_next/static/chunks/37-0a572fe0dbb89c4d.js +0 -6
  507. sky/dashboard/out/_next/static/chunks/678-206dddca808e6d16.js +0 -59
  508. sky/dashboard/out/_next/static/chunks/845-9e60713e0c441abc.js +0 -1
  509. sky/dashboard/out/_next/static/chunks/979-7bf73a4c7cea0f5c.js +0 -1
  510. sky/dashboard/out/_next/static/chunks/fd9d1056-2821b0f0cabcd8bd.js +0 -1
  511. sky/dashboard/out/_next/static/chunks/framework-87d061ee6ed71b28.js +0 -33
  512. sky/dashboard/out/_next/static/chunks/main-app-241eb28595532291.js +0 -1
  513. sky/dashboard/out/_next/static/chunks/main-e0e2335212e72357.js +0 -1
  514. sky/dashboard/out/_next/static/chunks/pages/_app-e6b013bc3f77ad60.js +0 -1
  515. sky/dashboard/out/_next/static/chunks/pages/_error-1be831200e60c5c0.js +0 -1
  516. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-6ac338bc2239cb45.js +0 -1
  517. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-f383db7389368ea7.js +0 -1
  518. sky/dashboard/out/_next/static/chunks/pages/clusters-a93b93e10b8b074e.js +0 -1
  519. sky/dashboard/out/_next/static/chunks/pages/index-f9f039532ca8cbc4.js +0 -1
  520. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-1c519e1afc523dc9.js +0 -1
  521. sky/dashboard/out/_next/static/chunks/pages/jobs-a75029b67aab6a2e.js +0 -1
  522. sky/dashboard/out/_next/static/chunks/webpack-830f59b8404e96b8.js +0 -1
  523. sky/dashboard/out/_next/static/css/c6933bbb2ce7f4dd.css +0 -3
  524. sky/jobs/dashboard/dashboard.py +0 -223
  525. sky/jobs/dashboard/static/favicon.ico +0 -0
  526. sky/jobs/dashboard/templates/index.html +0 -831
  527. sky/jobs/server/dashboard_utils.py +0 -69
  528. sky/skylet/providers/scp/__init__.py +0 -2
  529. sky/skylet/providers/scp/config.py +0 -149
  530. sky/skylet/providers/scp/node_provider.py +0 -578
  531. sky/templates/kubernetes-ssh-jump.yml.j2 +0 -94
  532. sky/utils/db_utils.py +0 -100
  533. sky/utils/kubernetes/deploy_remote_cluster.sh +0 -308
  534. sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +0 -191
  535. skypilot_nightly-1.0.0.dev20250502.dist-info/METADATA +0 -361
  536. skypilot_nightly-1.0.0.dev20250502.dist-info/RECORD +0 -396
  537. skypilot_nightly-1.0.0.dev20250502.dist-info/top_level.txt +0 -1
  538. /sky/{clouds/service_catalog → catalog}/config.py +0 -0
  539. /sky/{benchmark → catalog/data_fetchers}/__init__.py +0 -0
  540. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_azure.py +0 -0
  541. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_fluidstack.py +0 -0
  542. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_ibm.py +0 -0
  543. /sky/{clouds/service_catalog/data_fetchers → client/cli}/__init__.py +0 -0
  544. /sky/dashboard/out/_next/static/{GWvVBSCS7FmUiVmjaL1a7 → 96_E2yl3QAiIJGOYCkSpB}/_ssgManifest.js +0 -0
  545. {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/entry_points.txt +0 -0
  546. {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/licenses/LICENSE +0 -0
sky/data/storage.py CHANGED
@@ -1,4 +1,6 @@
1
1
  """Storage and Store Classes for Sky Data."""
2
+ from abc import abstractmethod
3
+ from dataclasses import dataclass
2
4
  import enum
3
5
  import hashlib
4
6
  import os
@@ -7,7 +9,7 @@ import shlex
7
9
  import subprocess
8
10
  import time
9
11
  import typing
10
- from typing import Any, Dict, List, Optional, Tuple, Type, Union
12
+ from typing import Any, Callable, Dict, List, Optional, Tuple, Type, Union
11
13
  import urllib.parse
12
14
 
13
15
  import colorama
@@ -21,6 +23,7 @@ from sky import skypilot_config
21
23
  from sky.adaptors import aws
22
24
  from sky.adaptors import azure
23
25
  from sky.adaptors import cloudflare
26
+ from sky.adaptors import coreweave
24
27
  from sky.adaptors import gcp
25
28
  from sky.adaptors import ibm
26
29
  from sky.adaptors import nebius
@@ -60,6 +63,7 @@ STORE_ENABLED_CLOUDS: List[str] = [
60
63
  str(clouds.OCI()),
61
64
  str(clouds.Nebius()),
62
65
  cloudflare.NAME,
66
+ coreweave.NAME,
63
67
  ]
64
68
 
65
69
  # Maximum number of concurrent rsync upload processes
@@ -91,6 +95,12 @@ def get_cached_enabled_storage_cloud_names_or_refresh(
91
95
  r2_is_enabled, _ = cloudflare.check_storage_credentials()
92
96
  if r2_is_enabled:
93
97
  enabled_clouds.append(cloudflare.NAME)
98
+
99
+ # Similarly, handle CoreWeave storage credentials
100
+ coreweave_is_enabled, _ = coreweave.check_storage_credentials()
101
+ if coreweave_is_enabled:
102
+ enabled_clouds.append(coreweave.NAME)
103
+
94
104
  if raise_if_no_cloud_access and not enabled_clouds:
95
105
  raise exceptions.NoCloudAccessError(
96
106
  'No cloud access available for storage. '
@@ -105,11 +115,11 @@ def _is_storage_cloud_enabled(cloud_name: str,
105
115
  if cloud_name in enabled_storage_cloud_names:
106
116
  return True
107
117
  if try_fix_with_sky_check:
108
- # TODO(zhwu): Only check the specified cloud to speed up.
109
118
  sky_check.check_capability(
110
119
  sky_cloud.CloudCapability.STORAGE,
111
120
  quiet=True,
112
- )
121
+ clouds=[cloud_name],
122
+ workspace=skypilot_config.get_active_workspace())
113
123
  return _is_storage_cloud_enabled(cloud_name,
114
124
  try_fix_with_sky_check=False)
115
125
  return False
@@ -124,41 +134,71 @@ class StoreType(enum.Enum):
124
134
  IBM = 'IBM'
125
135
  OCI = 'OCI'
126
136
  NEBIUS = 'NEBIUS'
137
+ COREWEAVE = 'COREWEAVE'
138
+ VOLUME = 'VOLUME'
139
+
140
+ @classmethod
141
+ def _get_s3_compatible_store_by_cloud(cls,
142
+ cloud_name: str) -> Optional[str]:
143
+ """Get S3-compatible store type by cloud name."""
144
+ for store_type, store_class in _S3_COMPATIBLE_STORES.items():
145
+ config = store_class.get_config()
146
+ if config.cloud_name.lower() == cloud_name:
147
+ return store_type
148
+ return None
149
+
150
+ @classmethod
151
+ def _get_s3_compatible_config(
152
+ cls, store_type: str) -> Optional['S3CompatibleConfig']:
153
+ """Get S3-compatible store configuration by store type."""
154
+ store_class = _S3_COMPATIBLE_STORES.get(store_type)
155
+ if store_class:
156
+ return store_class.get_config()
157
+ return None
158
+
159
+ @classmethod
160
+ def find_s3_compatible_config_by_prefix(
161
+ cls, source: str) -> Optional['StoreType']:
162
+ """Get S3-compatible store type by URL prefix."""
163
+ for store_type, store_class in _S3_COMPATIBLE_STORES.items():
164
+ config = store_class.get_config()
165
+ if source.startswith(config.url_prefix):
166
+ return StoreType(store_type)
167
+ return None
127
168
 
128
169
  @classmethod
129
170
  def from_cloud(cls, cloud: str) -> 'StoreType':
130
- if cloud.lower() == str(clouds.AWS()).lower():
131
- return StoreType.S3
132
- elif cloud.lower() == str(clouds.GCP()).lower():
171
+ cloud_lower = cloud.lower()
172
+ if cloud_lower == str(clouds.GCP()).lower():
133
173
  return StoreType.GCS
134
- elif cloud.lower() == str(clouds.IBM()).lower():
174
+ elif cloud_lower == str(clouds.IBM()).lower():
135
175
  return StoreType.IBM
136
- elif cloud.lower() == cloudflare.NAME.lower():
137
- return StoreType.R2
138
- elif cloud.lower() == str(clouds.Azure()).lower():
176
+ elif cloud_lower == str(clouds.Azure()).lower():
139
177
  return StoreType.AZURE
140
- elif cloud.lower() == str(clouds.OCI()).lower():
178
+ elif cloud_lower == str(clouds.OCI()).lower():
141
179
  return StoreType.OCI
142
- elif cloud.lower() == str(clouds.Nebius()).lower():
143
- return StoreType.NEBIUS
144
- elif cloud.lower() == str(clouds.Lambda()).lower():
180
+ elif cloud_lower == str(clouds.Lambda()).lower():
145
181
  with ux_utils.print_exception_no_traceback():
146
182
  raise ValueError('Lambda Cloud does not provide cloud storage.')
147
- elif cloud.lower() == str(clouds.SCP()).lower():
183
+ elif cloud_lower == str(clouds.SCP()).lower():
148
184
  with ux_utils.print_exception_no_traceback():
149
185
  raise ValueError('SCP does not provide cloud storage.')
186
+ else:
187
+ s3_store_type = cls._get_s3_compatible_store_by_cloud(cloud_lower)
188
+ if s3_store_type:
189
+ return cls(s3_store_type)
150
190
 
151
191
  raise ValueError(f'Unsupported cloud for StoreType: {cloud}')
152
192
 
153
193
  def to_cloud(self) -> str:
154
- if self == StoreType.S3:
155
- return str(clouds.AWS())
156
- elif self == StoreType.GCS:
194
+ config = self._get_s3_compatible_config(self.value)
195
+ if config:
196
+ return config.cloud_name
197
+
198
+ if self == StoreType.GCS:
157
199
  return str(clouds.GCP())
158
200
  elif self == StoreType.AZURE:
159
201
  return str(clouds.Azure())
160
- elif self == StoreType.R2:
161
- return cloudflare.NAME
162
202
  elif self == StoreType.IBM:
163
203
  return str(clouds.IBM())
164
204
  elif self == StoreType.OCI:
@@ -168,41 +208,34 @@ class StoreType(enum.Enum):
168
208
 
169
209
  @classmethod
170
210
  def from_store(cls, store: 'AbstractStore') -> 'StoreType':
171
- if isinstance(store, S3Store):
172
- return StoreType.S3
173
- elif isinstance(store, GcsStore):
211
+ if isinstance(store, S3CompatibleStore):
212
+ return cls(store.get_store_type())
213
+
214
+ if isinstance(store, GcsStore):
174
215
  return StoreType.GCS
175
216
  elif isinstance(store, AzureBlobStore):
176
217
  return StoreType.AZURE
177
- elif isinstance(store, R2Store):
178
- return StoreType.R2
179
218
  elif isinstance(store, IBMCosStore):
180
219
  return StoreType.IBM
181
220
  elif isinstance(store, OciStore):
182
221
  return StoreType.OCI
183
- elif isinstance(store, NebiusStore):
184
- return StoreType.NEBIUS
185
222
  else:
186
223
  with ux_utils.print_exception_no_traceback():
187
224
  raise ValueError(f'Unknown store type: {store}')
188
225
 
189
226
  def store_prefix(self) -> str:
190
- if self == StoreType.S3:
191
- return 's3://'
192
- elif self == StoreType.GCS:
227
+ config = self._get_s3_compatible_config(self.value)
228
+ if config:
229
+ return config.url_prefix
230
+
231
+ if self == StoreType.GCS:
193
232
  return 'gs://'
194
233
  elif self == StoreType.AZURE:
195
234
  return 'https://'
196
- # R2 storages use 's3://' as a prefix for various aws cli commands
197
- elif self == StoreType.R2:
198
- return 'r2://'
199
235
  elif self == StoreType.IBM:
200
236
  return 'cos://'
201
237
  elif self == StoreType.OCI:
202
238
  return 'oci://'
203
- # Nebius storages use 's3://' as a prefix for various aws cli commands
204
- elif self == StoreType.NEBIUS:
205
- return 'nebius://'
206
239
  else:
207
240
  with ux_utils.print_exception_no_traceback():
208
241
  raise ValueError(f'Unknown store type: {self}')
@@ -251,12 +284,20 @@ class StoreType(enum.Enum):
251
284
  elif store_type == StoreType.IBM:
252
285
  bucket_name, sub_path, region = data_utils.split_cos_path(
253
286
  store_url)
254
- elif store_type == StoreType.R2:
255
- bucket_name, sub_path = data_utils.split_r2_path(store_url)
256
287
  elif store_type == StoreType.GCS:
257
288
  bucket_name, sub_path = data_utils.split_gcs_path(store_url)
258
- elif store_type == StoreType.S3:
259
- bucket_name, sub_path = data_utils.split_s3_path(store_url)
289
+ else:
290
+ # Check compatible stores
291
+ for compatible_store_type, store_class in \
292
+ _S3_COMPATIBLE_STORES.items():
293
+ if store_type.value == compatible_store_type:
294
+ config = store_class.get_config()
295
+ bucket_name, sub_path = config.split_path(store_url)
296
+ break
297
+ else:
298
+ # If we get here, it's an unknown S3-compatible store
299
+ raise ValueError(
300
+ f'Unknown S3-compatible store type: {store_type}')
260
301
  return store_type, bucket_name, \
261
302
  sub_path, storage_account_name, region
262
303
  raise ValueError(f'Unknown store URL: {store_url}')
@@ -714,6 +755,11 @@ class Storage(object):
714
755
  previous_store_type = store_type
715
756
  else:
716
757
  new_store_type = store_type
758
+ if previous_store_type is None or new_store_type is None:
759
+ # This should not happen if the condition above is true,
760
+ # but add check for type safety
761
+ raise exceptions.StorageBucketCreateError(
762
+ f'Bucket {self.name} has inconsistent store types.')
717
763
  with ux_utils.print_exception_no_traceback():
718
764
  raise exceptions.StorageBucketCreateError(
719
765
  f'Bucket {self.name} was previously created for '
@@ -744,27 +790,27 @@ class Storage(object):
744
790
  source=self.source,
745
791
  mode=self.mode)
746
792
 
747
- for store in input_stores:
748
- self.add_store(store)
793
+ for store_type in input_stores:
794
+ self.add_store(store_type)
749
795
 
750
796
  if self.source is not None:
751
797
  # If source is a pre-existing bucket, connect to the bucket
752
798
  # If the bucket does not exist, this will error out
753
799
  if isinstance(self.source, str):
754
- if self.source.startswith('s3://'):
755
- self.add_store(StoreType.S3)
756
- elif self.source.startswith('gs://'):
800
+ if self.source.startswith('gs://'):
757
801
  self.add_store(StoreType.GCS)
758
802
  elif data_utils.is_az_container_endpoint(self.source):
759
803
  self.add_store(StoreType.AZURE)
760
- elif self.source.startswith('r2://'):
761
- self.add_store(StoreType.R2)
762
804
  elif self.source.startswith('cos://'):
763
805
  self.add_store(StoreType.IBM)
764
806
  elif self.source.startswith('oci://'):
765
807
  self.add_store(StoreType.OCI)
766
- elif self.source.startswith('nebius://'):
767
- self.add_store(StoreType.NEBIUS)
808
+
809
+ s3_compatible_store_type: Optional[StoreType] = (
810
+ StoreType.find_s3_compatible_config_by_prefix(
811
+ self.source))
812
+ if s3_compatible_store_type:
813
+ self.add_store(s3_compatible_store_type)
768
814
 
769
815
  def get_bucket_sub_path_prefix(self, blob_path: str) -> str:
770
816
  """Adds the bucket sub path prefix to the blob path."""
@@ -852,7 +898,7 @@ class Storage(object):
852
898
  f'{source} in the file_mounts section of your YAML')
853
899
  is_local_source = True
854
900
  elif split_path.scheme in [
855
- 's3', 'gs', 'https', 'r2', 'cos', 'oci', 'nebius'
901
+ 's3', 'gs', 'https', 'r2', 'cos', 'oci', 'nebius', 'cw'
856
902
  ]:
857
903
  is_local_source = False
858
904
  # Storage mounting does not support mounting specific files from
@@ -877,7 +923,8 @@ class Storage(object):
877
923
  with ux_utils.print_exception_no_traceback():
878
924
  raise exceptions.StorageSourceError(
879
925
  f'Supported paths: local, s3://, gs://, https://, '
880
- f'r2://, cos://, oci://, nebius://. Got: {source}')
926
+ f'r2://, cos://, oci://, nebius://, cw://. '
927
+ f'Got: {source}')
881
928
  return source, is_local_source
882
929
 
883
930
  def _validate_storage_spec(self, name: Optional[str]) -> None:
@@ -892,7 +939,16 @@ class Storage(object):
892
939
  """
893
940
  prefix = name.split('://')[0]
894
941
  prefix = prefix.lower()
895
- if prefix in ['s3', 'gs', 'https', 'r2', 'cos', 'oci', 'nebius']:
942
+ if prefix in [
943
+ 's3',
944
+ 'gs',
945
+ 'https',
946
+ 'r2',
947
+ 'cos',
948
+ 'oci',
949
+ 'nebius',
950
+ 'cw',
951
+ ]:
896
952
  with ux_utils.print_exception_no_traceback():
897
953
  raise exceptions.StorageNameError(
898
954
  'Prefix detected: `name` cannot start with '
@@ -980,12 +1036,25 @@ class Storage(object):
980
1036
  # When initializing from global_user_state, we override the
981
1037
  # source from the YAML
982
1038
  try:
983
- if s_type == StoreType.S3:
1039
+ if s_type.value in _S3_COMPATIBLE_STORES:
1040
+ store_class = _S3_COMPATIBLE_STORES[s_type.value]
1041
+ store = store_class.from_metadata(
1042
+ s_metadata,
1043
+ source=self.source,
1044
+ sync_on_reconstruction=self.sync_on_reconstruction,
1045
+ _bucket_sub_path=self._bucket_sub_path)
1046
+ elif s_type == StoreType.S3:
984
1047
  store = S3Store.from_metadata(
985
1048
  s_metadata,
986
1049
  source=self.source,
987
1050
  sync_on_reconstruction=self.sync_on_reconstruction,
988
1051
  _bucket_sub_path=self._bucket_sub_path)
1052
+ elif s_type == StoreType.R2:
1053
+ store = R2Store.from_metadata(
1054
+ s_metadata,
1055
+ source=self.source,
1056
+ sync_on_reconstruction=self.sync_on_reconstruction,
1057
+ _bucket_sub_path=self._bucket_sub_path)
989
1058
  elif s_type == StoreType.GCS:
990
1059
  store = GcsStore.from_metadata(
991
1060
  s_metadata,
@@ -1000,12 +1069,6 @@ class Storage(object):
1000
1069
  source=self.source,
1001
1070
  sync_on_reconstruction=self.sync_on_reconstruction,
1002
1071
  _bucket_sub_path=self._bucket_sub_path)
1003
- elif s_type == StoreType.R2:
1004
- store = R2Store.from_metadata(
1005
- s_metadata,
1006
- source=self.source,
1007
- sync_on_reconstruction=self.sync_on_reconstruction,
1008
- _bucket_sub_path=self._bucket_sub_path)
1009
1072
  elif s_type == StoreType.IBM:
1010
1073
  store = IBMCosStore.from_metadata(
1011
1074
  s_metadata,
@@ -1024,6 +1087,12 @@ class Storage(object):
1024
1087
  source=self.source,
1025
1088
  sync_on_reconstruction=self.sync_on_reconstruction,
1026
1089
  _bucket_sub_path=self._bucket_sub_path)
1090
+ elif s_type == StoreType.COREWEAVE:
1091
+ store = CoreWeaveStore.from_metadata(
1092
+ s_metadata,
1093
+ source=self.source,
1094
+ sync_on_reconstruction=self.sync_on_reconstruction,
1095
+ _bucket_sub_path=self._bucket_sub_path)
1027
1096
  else:
1028
1097
  with ux_utils.print_exception_no_traceback():
1029
1098
  raise ValueError(f'Unknown store type: {s_type}')
@@ -1106,20 +1175,17 @@ class Storage(object):
1106
1175
  return store
1107
1176
 
1108
1177
  store_cls: Type[AbstractStore]
1109
- if store_type == StoreType.S3:
1110
- store_cls = S3Store
1178
+ # First check if it's a registered S3-compatible store
1179
+ if store_type.value in _S3_COMPATIBLE_STORES:
1180
+ store_cls = _S3_COMPATIBLE_STORES[store_type.value]
1111
1181
  elif store_type == StoreType.GCS:
1112
1182
  store_cls = GcsStore
1113
1183
  elif store_type == StoreType.AZURE:
1114
1184
  store_cls = AzureBlobStore
1115
- elif store_type == StoreType.R2:
1116
- store_cls = R2Store
1117
1185
  elif store_type == StoreType.IBM:
1118
1186
  store_cls = IBMCosStore
1119
1187
  elif store_type == StoreType.OCI:
1120
1188
  store_cls = OciStore
1121
- elif store_type == StoreType.NEBIUS:
1122
- store_cls = NebiusStore
1123
1189
  else:
1124
1190
  with ux_utils.print_exception_no_traceback():
1125
1191
  raise exceptions.StorageSpecError(
@@ -1266,6 +1332,17 @@ class Storage(object):
1266
1332
  if store.is_sky_managed:
1267
1333
  global_user_state.set_storage_status(self.name, StorageStatus.READY)
1268
1334
 
1335
+ @classmethod
1336
+ def from_handle(cls, handle: StorageHandle) -> 'Storage':
1337
+ """Create Storage from StorageHandle object.
1338
+ """
1339
+ obj = cls(name=handle.storage_name,
1340
+ source=handle.source,
1341
+ sync_on_reconstruction=False)
1342
+ obj.handle = handle
1343
+ obj._add_store_from_metadata(handle.sky_stores)
1344
+ return obj
1345
+
1269
1346
  @classmethod
1270
1347
  def from_yaml_config(cls, config: Dict[str, Any]) -> 'Storage':
1271
1348
  common_utils.validate_schema(config, schemas.get_storage_schema(),
@@ -1343,101 +1420,262 @@ class Storage(object):
1343
1420
  return config
1344
1421
 
1345
1422
 
1346
- class S3Store(AbstractStore):
1347
- """S3Store inherits from Storage Object and represents the backend
1348
- for S3 buckets.
1423
+ # Registry for S3-compatible stores
1424
+ _S3_COMPATIBLE_STORES = {}
1425
+
1426
+
1427
+ def register_s3_compatible_store(store_class):
1428
+ """Decorator to automatically register S3-compatible stores."""
1429
+ store_type = store_class.get_store_type()
1430
+ _S3_COMPATIBLE_STORES[store_type] = store_class
1431
+ return store_class
1432
+
1433
+
1434
+ @dataclass
1435
+ class S3CompatibleConfig:
1436
+ """Configuration for S3-compatible storage providers."""
1437
+ # Provider identification
1438
+ store_type: str # Store type identifier (e.g., "S3", "R2", "MINIO")
1439
+ url_prefix: str # URL prefix (e.g., "s3://", "r2://", "minio://")
1440
+
1441
+ # Client creation
1442
+ client_factory: Callable[[Optional[str]], Any]
1443
+ resource_factory: Callable[[str], StorageHandle]
1444
+ split_path: Callable[[str], Tuple[str, str]]
1445
+ verify_bucket: Callable[[str], bool]
1446
+
1447
+ # CLI configuration
1448
+ aws_profile: Optional[str] = None
1449
+ get_endpoint_url: Optional[Callable[[], str]] = None
1450
+ credentials_file: Optional[str] = None
1451
+ config_file: Optional[str] = None
1452
+ extra_cli_args: Optional[List[str]] = None
1453
+
1454
+ # Provider-specific settings
1455
+ cloud_name: str = ''
1456
+ default_region: Optional[str] = None
1457
+ access_denied_message: str = 'Access Denied'
1458
+
1459
+ # Mounting
1460
+ mount_cmd_factory: Optional[Callable] = None
1461
+ mount_cached_cmd_factory: Optional[Callable] = None
1462
+
1463
+ def __post_init__(self):
1464
+ if self.extra_cli_args is None:
1465
+ self.extra_cli_args = []
1466
+
1467
+
1468
+ class S3CompatibleStore(AbstractStore):
1469
+ """Base class for S3-compatible object storage providers.
1470
+
1471
+ This class provides a unified interface for all S3-compatible storage
1472
+ providers (AWS S3, Cloudflare R2, Nebius, MinIO, CoreWeave, etc.) by
1473
+ leveraging a configuration-driven approach that eliminates code duplication
1474
+
1475
+ ## Adding a New S3-Compatible Store
1476
+
1477
+ To add a new S3-compatible storage provider (e.g., MinIO),
1478
+ follow these steps:
1479
+
1480
+ ### 1. Add Store Type to Enum
1481
+ First, add your store type to the StoreType enum:
1482
+ ```python
1483
+ class StoreType(enum.Enum):
1484
+ # ... existing entries ...
1485
+ MINIO = 'MINIO'
1486
+ ```
1487
+
1488
+ ### 2. Create Store Class
1489
+ Create a new store class that inherits from S3CompatibleStore:
1490
+ ```python
1491
+ @register_s3_compatible_store
1492
+ class MinIOStore(S3CompatibleStore):
1493
+ '''MinIOStore for MinIO object storage.'''
1494
+
1495
+ @classmethod
1496
+ def get_config(cls) -> S3CompatibleConfig:
1497
+ '''Return the configuration for MinIO.'''
1498
+ return S3CompatibleConfig(
1499
+ store_type='MINIO',
1500
+ url_prefix='minio://',
1501
+ client_factory=lambda region:\
1502
+ data_utils.create_minio_client(region),
1503
+ resource_factory=lambda name:\
1504
+ minio.resource('s3').Bucket(name),
1505
+ split_path=data_utils.split_minio_path,
1506
+ aws_profile='minio',
1507
+ get_endpoint_url=lambda: minio.get_endpoint_url(),
1508
+ cloud_name='minio',
1509
+ default_region='us-east-1',
1510
+ mount_cmd_factory=mounting_utils.get_minio_mount_cmd,
1511
+ )
1512
+ ```
1513
+
1514
+ ### 3. Implement Required Utilities
1515
+ Create the necessary utility functions:
1516
+
1517
+ #### In `sky/data/data_utils.py`:
1518
+ ```python
1519
+ def create_minio_client(region: Optional[str] = None):
1520
+ '''Create MinIO S3 client.'''
1521
+ return boto3.client('s3',
1522
+ endpoint_url=minio.get_endpoint_url(),
1523
+ aws_access_key_id=minio.get_access_key(),
1524
+ aws_secret_access_key=minio.get_secret_key(),
1525
+ region_name=region or 'us-east-1')
1526
+
1527
+ def split_minio_path(minio_path: str) -> Tuple[str, str]:
1528
+ '''Split minio://bucket/key into (bucket, key).'''
1529
+ path_parts = minio_path.replace('minio://', '').split('/', 1)
1530
+ bucket = path_parts[0]
1531
+ key = path_parts[1] if len(path_parts) > 1 else ''
1532
+ return bucket, key
1533
+ ```
1534
+
1535
+ #### In `sky/utils/mounting_utils.py`:
1536
+ ```python
1537
+ def get_minio_mount_cmd(profile: str, bucket_name: str, endpoint_url: str,
1538
+ mount_path: str,
1539
+ bucket_sub_path: Optional[str]) -> str:
1540
+ '''Generate MinIO mount command using s3fs.'''
1541
+ # Implementation similar to other S3-compatible mount commands
1542
+ pass
1543
+ ```
1544
+
1545
+ ### 4. Create Adapter Module (if needed)
1546
+ Create `sky/adaptors/minio.py` for MinIO-specific configuration:
1547
+ ```python
1548
+ '''MinIO adapter for SkyPilot.'''
1549
+
1550
+ MINIO_PROFILE_NAME = 'minio'
1551
+
1552
+ def get_endpoint_url() -> str:
1553
+ '''Get MinIO endpoint URL from configuration.'''
1554
+ # Read from ~/.minio/config or environment variables
1555
+ pass
1556
+
1557
+ def resource(resource_name: str):
1558
+ '''Get MinIO resource.'''
1559
+ # Implementation for creating MinIO resources
1560
+ pass
1561
+ ```
1562
+
1349
1563
  """
1350
1564
 
1351
- _DEFAULT_REGION = 'us-east-1'
1352
1565
  _ACCESS_DENIED_MESSAGE = 'Access Denied'
1353
- _CUSTOM_ENDPOINT_REGIONS = [
1354
- 'ap-east-1', 'me-south-1', 'af-south-1', 'eu-south-1', 'eu-south-2',
1355
- 'ap-south-2', 'ap-southeast-3', 'ap-southeast-4', 'me-central-1',
1356
- 'il-central-1'
1357
- ]
1358
1566
 
1359
1567
  def __init__(self,
1360
1568
  name: str,
1361
1569
  source: str,
1362
- region: Optional[str] = _DEFAULT_REGION,
1570
+ region: Optional[str] = None,
1363
1571
  is_sky_managed: Optional[bool] = None,
1364
1572
  sync_on_reconstruction: bool = True,
1365
1573
  _bucket_sub_path: Optional[str] = None):
1574
+ # Initialize configuration first to get defaults
1575
+ self.config = self.__class__.get_config()
1576
+
1577
+ # Use provider's default region if not specified
1578
+ if region is None:
1579
+ region = self.config.default_region
1580
+
1581
+ # Initialize S3CompatibleStore specific attributes
1366
1582
  self.client: 'mypy_boto3_s3.Client'
1367
1583
  self.bucket: 'StorageHandle'
1368
- # TODO(romilb): This is purely a stopgap fix for
1369
- # https://github.com/skypilot-org/skypilot/issues/3405
1370
- # We should eventually make all opt-in regions also work for S3 by
1371
- # passing the right endpoint flags.
1372
- if region in self._CUSTOM_ENDPOINT_REGIONS:
1373
- logger.warning('AWS opt-in regions are not supported for S3. '
1374
- f'Falling back to default region '
1375
- f'{self._DEFAULT_REGION} for bucket {name!r}.')
1376
- region = self._DEFAULT_REGION
1584
+
1585
+ # Call parent constructor
1377
1586
  super().__init__(name, source, region, is_sky_managed,
1378
1587
  sync_on_reconstruction, _bucket_sub_path)
1379
1588
 
1589
+ @classmethod
1590
+ @abstractmethod
1591
+ def get_config(cls) -> S3CompatibleConfig:
1592
+ """Return the configuration for this S3-compatible provider."""
1593
+ pass
1594
+
1595
+ @classmethod
1596
+ def get_store_type(cls) -> str:
1597
+ """Return the store type identifier from configuration."""
1598
+ return cls.get_config().store_type
1599
+
1600
+ @property
1601
+ def provider_prefixes(self) -> set:
1602
+ """Dynamically get all provider prefixes from registered stores."""
1603
+ prefixes = set()
1604
+
1605
+ # Get prefixes from all registered S3-compatible stores
1606
+ for store_class in _S3_COMPATIBLE_STORES.values():
1607
+ config = store_class.get_config()
1608
+ prefixes.add(config.url_prefix)
1609
+
1610
+ # Add hardcoded prefixes for non-S3-compatible stores
1611
+ prefixes.update({
1612
+ 'gs://', # GCS
1613
+ 'https://', # Azure
1614
+ 'cos://', # IBM COS
1615
+ 'oci://', # OCI
1616
+ })
1617
+
1618
+ return prefixes
1619
+
1380
1620
  def _validate(self):
1381
1621
  if self.source is not None and isinstance(self.source, str):
1382
- if self.source.startswith('s3://'):
1383
- assert self.name == data_utils.split_s3_path(self.source)[0], (
1384
- 'S3 Bucket is specified as path, the name should be the'
1385
- ' same as S3 bucket.')
1622
+ if self.source.startswith(self.config.url_prefix):
1623
+ bucket_name, _ = self.config.split_path(self.source)
1624
+ assert self.name == bucket_name, (
1625
+ f'{self.config.store_type} Bucket is specified as path, '
1626
+ f'the name should be the same as {self.config.store_type} '
1627
+ f'bucket.')
1628
+ # Only verify if this is NOT the same store type as the source
1629
+ if self.__class__.get_store_type() != self.config.store_type:
1630
+ assert self.config.verify_bucket(self.name), (
1631
+ f'Source specified as {self.source},'
1632
+ f'a {self.config.store_type} '
1633
+ f'bucket. {self.config.store_type} Bucket should exist.'
1634
+ )
1386
1635
  elif self.source.startswith('gs://'):
1387
1636
  assert self.name == data_utils.split_gcs_path(self.source)[0], (
1388
1637
  'GCS Bucket is specified as path, the name should be '
1389
1638
  'the same as GCS bucket.')
1390
- assert data_utils.verify_gcs_bucket(self.name), (
1391
- f'Source specified as {self.source}, a GCS bucket. ',
1392
- 'GCS Bucket should exist.')
1639
+ if not isinstance(self, GcsStore):
1640
+ assert data_utils.verify_gcs_bucket(self.name), (
1641
+ f'Source specified as {self.source}, a GCS bucket. ',
1642
+ 'GCS Bucket should exist.')
1393
1643
  elif data_utils.is_az_container_endpoint(self.source):
1394
1644
  storage_account_name, container_name, _ = (
1395
1645
  data_utils.split_az_path(self.source))
1396
1646
  assert self.name == container_name, (
1397
1647
  'Azure bucket is specified as path, the name should be '
1398
1648
  'the same as Azure bucket.')
1399
- assert data_utils.verify_az_bucket(
1400
- storage_account_name, self.name), (
1401
- f'Source specified as {self.source}, an Azure bucket. '
1649
+ if not isinstance(self, AzureBlobStore):
1650
+ assert data_utils.verify_az_bucket(
1651
+ storage_account_name, self.name
1652
+ ), (f'Source specified as {self.source}, an Azure bucket. '
1402
1653
  'Azure bucket should exist.')
1403
- elif self.source.startswith('r2://'):
1404
- assert self.name == data_utils.split_r2_path(self.source)[0], (
1405
- 'R2 Bucket is specified as path, the name should be '
1406
- 'the same as R2 bucket.')
1407
- assert data_utils.verify_r2_bucket(self.name), (
1408
- f'Source specified as {self.source}, a R2 bucket. ',
1409
- 'R2 Bucket should exist.')
1410
- elif self.source.startswith('nebius://'):
1411
- assert self.name == data_utils.split_nebius_path(
1412
- self.source)[0], (
1413
- 'Nebius Object Storage is specified as path, the name '
1414
- 'should be the same as Nebius Object Storage bucket.')
1415
- assert data_utils.verify_nebius_bucket(self.name), (
1416
- f'Source specified as {self.source}, a Nebius Object '
1417
- f'Storage bucket. Nebius Object Storage Bucket should'
1418
- f' exist.')
1419
1654
  elif self.source.startswith('cos://'):
1420
1655
  assert self.name == data_utils.split_cos_path(self.source)[0], (
1421
1656
  'COS Bucket is specified as path, the name should be '
1422
1657
  'the same as COS bucket.')
1423
- assert data_utils.verify_ibm_cos_bucket(self.name), (
1424
- f'Source specified as {self.source}, a COS bucket. ',
1425
- 'COS Bucket should exist.')
1658
+ if not isinstance(self, IBMCosStore):
1659
+ assert data_utils.verify_ibm_cos_bucket(self.name), (
1660
+ f'Source specified as {self.source}, a COS bucket. ',
1661
+ 'COS Bucket should exist.')
1426
1662
  elif self.source.startswith('oci://'):
1427
1663
  raise NotImplementedError(
1428
- 'Moving data from OCI to S3 is currently not supported.')
1664
+ f'Moving data from OCI to {self.source} is ',
1665
+ 'currently not supported.')
1666
+
1429
1667
  # Validate name
1430
1668
  self.name = self.validate_name(self.name)
1431
1669
 
1432
1670
  # Check if the storage is enabled
1433
- if not _is_storage_cloud_enabled(str(clouds.AWS())):
1671
+ if not _is_storage_cloud_enabled(self.config.cloud_name):
1434
1672
  with ux_utils.print_exception_no_traceback():
1435
1673
  raise exceptions.ResourcesUnavailableError(
1436
- 'Storage \'store: s3\' specified, but ' \
1437
- 'AWS access is disabled. To fix, enable '\
1438
- 'AWS by running `sky check`. More info: '\
1439
- 'https://docs.skypilot.co/en/latest/getting-started/installation.html.' # pylint: disable=line-too-long
1440
- )
1674
+ f'Storage "store: {self.config.store_type.lower()}" '
1675
+ f'specified, but '
1676
+ f'{self.config.cloud_name} access is disabled. '
1677
+ 'To fix, enable '
1678
+ f'{self.config.cloud_name} by running `sky check`.')
1441
1679
 
1442
1680
  @classmethod
1443
1681
  def validate_name(cls, name: str) -> str:
@@ -1509,7 +1747,7 @@ class S3Store(AbstractStore):
1509
1747
  StorageBucketGetError: If fetching existing bucket fails
1510
1748
  StorageInitError: If general initialization fails.
1511
1749
  """
1512
- self.client = data_utils.create_s3_client(self.region)
1750
+ self.client = self.config.client_factory(self.region)
1513
1751
  self.bucket, is_new_bucket = self._get_bucket()
1514
1752
  if self.is_sky_managed is None:
1515
1753
  # If is_sky_managed is not specified, then this is a new storage
@@ -1531,16 +1769,10 @@ class S3Store(AbstractStore):
1531
1769
  if isinstance(self.source, list):
1532
1770
  self.batch_aws_rsync(self.source, create_dirs=True)
1533
1771
  elif self.source is not None:
1534
- if self.source.startswith('s3://'):
1535
- pass
1536
- elif self.source.startswith('gs://'):
1537
- self._transfer_to_s3()
1538
- elif self.source.startswith('r2://'):
1539
- self._transfer_to_s3()
1540
- elif self.source.startswith('oci://'):
1541
- self._transfer_to_s3()
1542
- elif self.source.startswith('nebius://'):
1543
- self._transfer_to_s3()
1772
+ if self._is_same_provider_source():
1773
+ pass # No transfer needed
1774
+ elif self._needs_cross_provider_transfer():
1775
+ self._transfer_from_other_provider()
1544
1776
  else:
1545
1777
  self.batch_aws_rsync([self.source])
1546
1778
  except exceptions.StorageUploadError:
@@ -1549,57 +1781,94 @@ class S3Store(AbstractStore):
1549
1781
  raise exceptions.StorageUploadError(
1550
1782
  f'Upload failed for store {self.name}') from e
1551
1783
 
1784
+ def _is_same_provider_source(self) -> bool:
1785
+ """Check if source is from the same provider."""
1786
+ return isinstance(self.source, str) and self.source.startswith(
1787
+ self.config.url_prefix)
1788
+
1789
+ def _needs_cross_provider_transfer(self) -> bool:
1790
+ """Check if source needs cross-provider transfer."""
1791
+ if not isinstance(self.source, str):
1792
+ return False
1793
+ return any(
1794
+ self.source.startswith(prefix) for prefix in self.provider_prefixes)
1795
+
1796
+ def _detect_source_type(self) -> str:
1797
+ """Detect the source provider type from URL."""
1798
+ if not isinstance(self.source, str):
1799
+ return 'unknown'
1800
+
1801
+ for provider in self.provider_prefixes:
1802
+ if self.source.startswith(provider):
1803
+ return provider[:-len('://')]
1804
+ return ''
1805
+
1806
+ def _transfer_from_other_provider(self):
1807
+ """Transfer data from another cloud to this S3-compatible store."""
1808
+ source_type = self._detect_source_type()
1809
+ target_type = self.config.store_type.lower()
1810
+
1811
+ if hasattr(data_transfer, f'{source_type}_to_{target_type}'):
1812
+ transfer_func = getattr(data_transfer,
1813
+ f'{source_type}_to_{target_type}')
1814
+ transfer_func(self.name, self.name)
1815
+ else:
1816
+ with ux_utils.print_exception_no_traceback():
1817
+ raise NotImplementedError(
1818
+ f'Transfer from {source_type} to {target_type} '
1819
+ 'is not yet supported.')
1820
+
1552
1821
  def delete(self) -> None:
1822
+ """Delete the bucket or sub-path."""
1553
1823
  if self._bucket_sub_path is not None and not self.is_sky_managed:
1554
1824
  return self._delete_sub_path()
1555
1825
 
1556
- deleted_by_skypilot = self._delete_s3_bucket(self.name)
1826
+ deleted_by_skypilot = self._delete_bucket(self.name)
1827
+ provider = self.config.store_type
1557
1828
  if deleted_by_skypilot:
1558
- msg_str = f'Deleted S3 bucket {self.name}.'
1829
+ msg_str = f'Deleted {provider} bucket {self.name}.'
1559
1830
  else:
1560
- msg_str = f'S3 bucket {self.name} may have been deleted ' \
1831
+ msg_str = f'{provider} bucket {self.name} may have been deleted ' \
1561
1832
  f'externally. Removing from local state.'
1562
- logger.info(f'{colorama.Fore.GREEN}{msg_str}'
1563
- f'{colorama.Style.RESET_ALL}')
1564
-
1565
- def _delete_sub_path(self) -> None:
1566
- assert self._bucket_sub_path is not None, 'bucket_sub_path is not set'
1567
- deleted_by_skypilot = self._delete_s3_bucket_sub_path(
1568
- self.name, self._bucket_sub_path)
1569
- if deleted_by_skypilot:
1570
- msg_str = f'Removed objects from S3 bucket ' \
1571
- f'{self.name}/{self._bucket_sub_path}.'
1572
- else:
1573
- msg_str = f'Failed to remove objects from S3 bucket ' \
1574
- f'{self.name}/{self._bucket_sub_path}.'
1575
- logger.info(f'{colorama.Fore.GREEN}{msg_str}'
1576
- f'{colorama.Style.RESET_ALL}')
1833
+ logger.info(f'{colorama.Fore.GREEN}{msg_str}{colorama.Style.RESET_ALL}')
1577
1834
 
1578
1835
  def get_handle(self) -> StorageHandle:
1579
- return aws.resource('s3').Bucket(self.name)
1836
+ """Get storage handle using provider's resource factory."""
1837
+ return self.config.resource_factory(self.name)
1580
1838
 
1581
- def batch_aws_rsync(self,
1582
- source_path_list: List[Path],
1583
- create_dirs: bool = False) -> None:
1584
- """Invokes aws s3 sync to batch upload a list of local paths to S3
1839
+ def _download_file(self, remote_path: str, local_path: str) -> None:
1840
+ """Download file using S3 API."""
1841
+ self.bucket.download_file(remote_path, local_path)
1585
1842
 
1586
- AWS Sync by default uses 10 threads to upload files to the bucket. To
1587
- increase parallelism, modify max_concurrent_requests in your aws config
1588
- file (Default path: ~/.aws/config).
1843
+ def mount_command(self, mount_path: str) -> str:
1844
+ """Get mount command using provider's mount factory."""
1845
+ if self.config.mount_cmd_factory is None:
1846
+ raise exceptions.NotSupportedError(
1847
+ f'Mounting not supported for {self.config.store_type}')
1589
1848
 
1590
- Since aws s3 sync does not support batch operations, we construct
1591
- multiple commands to be run in parallel.
1849
+ install_cmd = mounting_utils.get_s3_mount_install_cmd()
1850
+ mount_cmd = self.config.mount_cmd_factory(self.bucket.name, mount_path,
1851
+ self._bucket_sub_path)
1852
+ return mounting_utils.get_mounting_command(mount_path, install_cmd,
1853
+ mount_cmd)
1592
1854
 
1593
- Args:
1594
- source_path_list: List of paths to local files or directories
1595
- create_dirs: If the local_path is a directory and this is set to
1596
- False, the contents of the directory are directly uploaded to
1597
- root of the bucket. If the local_path is a directory and this is
1598
- set to True, the directory is created in the bucket root and
1599
- contents are uploaded to it.
1600
- """
1601
- sub_path = (f'/{self._bucket_sub_path}'
1602
- if self._bucket_sub_path else '')
1855
+ def mount_cached_command(self, mount_path: str) -> str:
1856
+ """Get cached mount command. Can be overridden by subclasses."""
1857
+ if self.config.mount_cached_cmd_factory is None:
1858
+ raise exceptions.NotSupportedError(
1859
+ f'Cached mounting not supported for {self.config.store_type}')
1860
+
1861
+ install_cmd = mounting_utils.get_rclone_install_cmd()
1862
+ mount_cmd = self.config.mount_cached_cmd_factory(
1863
+ self.bucket.name, mount_path, self._bucket_sub_path)
1864
+ return mounting_utils.get_mounting_command(mount_path, install_cmd,
1865
+ mount_cmd)
1866
+
1867
+ def batch_aws_rsync(self,
1868
+ source_path_list: List[Path],
1869
+ create_dirs: bool = False) -> None:
1870
+ """Generic S3-compatible rsync using AWS CLI."""
1871
+ sub_path = f'/{self._bucket_sub_path}' if self._bucket_sub_path else ''
1603
1872
 
1604
1873
  def get_file_sync_command(base_dir_path, file_names):
1605
1874
  includes = ' '.join([
@@ -1607,10 +1876,31 @@ class S3Store(AbstractStore):
1607
1876
  for file_name in file_names
1608
1877
  ])
1609
1878
  base_dir_path = shlex.quote(base_dir_path)
1610
- sync_command = ('aws s3 sync --no-follow-symlinks --exclude="*" '
1611
- f'{includes} {base_dir_path} '
1612
- f's3://{self.name}{sub_path}')
1613
- return sync_command
1879
+
1880
+ # Build AWS CLI command with provider-specific configuration
1881
+ cmd_parts = ['aws s3 sync --no-follow-symlinks --exclude="*"']
1882
+ cmd_parts.append(f'{includes} {base_dir_path}')
1883
+ cmd_parts.append(f's3://{self.name}{sub_path}')
1884
+
1885
+ # Add provider-specific arguments
1886
+ if self.config.get_endpoint_url:
1887
+ cmd_parts.append(
1888
+ f'--endpoint-url {self.config.get_endpoint_url()}')
1889
+ if self.config.aws_profile:
1890
+ cmd_parts.append(f'--profile={self.config.aws_profile}')
1891
+ if self.config.extra_cli_args:
1892
+ cmd_parts.extend(self.config.extra_cli_args)
1893
+
1894
+ # Handle credentials file via environment
1895
+ cmd = ' '.join(cmd_parts)
1896
+ if self.config.credentials_file:
1897
+ cmd = 'AWS_SHARED_CREDENTIALS_FILE=' + \
1898
+ f'{self.config.credentials_file} {cmd}'
1899
+ if self.config.config_file:
1900
+ cmd = 'AWS_CONFIG_FILE=' + \
1901
+ f'{self.config.config_file} {cmd}'
1902
+
1903
+ return cmd
1614
1904
 
1615
1905
  def get_dir_sync_command(src_dir_path, dest_dir_name):
1616
1906
  # we exclude .git directory from the sync
@@ -1618,11 +1908,11 @@ class S3Store(AbstractStore):
1618
1908
  excluded_list.append('.git/*')
1619
1909
 
1620
1910
  # Process exclusion patterns to make them work correctly with aws
1621
- # s3 sync
1911
+ # s3 sync - this logic is from S3Store2 to ensure compatibility
1622
1912
  processed_excludes = []
1623
1913
  for excluded_path in excluded_list:
1624
1914
  # Check if the path is a directory exclusion pattern
1625
- # For AWS S3 sync, directory patterns need to end with "/**" to
1915
+ # For AWS S3 sync, directory patterns need to end with "/*" to
1626
1916
  # exclude all contents
1627
1917
  if (excluded_path.endswith('/') or os.path.isdir(
1628
1918
  os.path.join(src_dir_path, excluded_path.rstrip('/')))):
@@ -1637,10 +1927,28 @@ class S3Store(AbstractStore):
1637
1927
  for file_name in processed_excludes
1638
1928
  ])
1639
1929
  src_dir_path = shlex.quote(src_dir_path)
1640
- sync_command = (f'aws s3 sync --no-follow-symlinks {excludes} '
1641
- f'{src_dir_path} '
1642
- f's3://{self.name}{sub_path}/{dest_dir_name}')
1643
- return sync_command
1930
+
1931
+ cmd_parts = ['aws s3 sync --no-follow-symlinks']
1932
+ cmd_parts.append(f'{excludes} {src_dir_path}')
1933
+ cmd_parts.append(f's3://{self.name}{sub_path}/{dest_dir_name}')
1934
+
1935
+ if self.config.get_endpoint_url:
1936
+ cmd_parts.append(
1937
+ f'--endpoint-url {self.config.get_endpoint_url()}')
1938
+ if self.config.aws_profile:
1939
+ cmd_parts.append(f'--profile={self.config.aws_profile}')
1940
+ if self.config.extra_cli_args:
1941
+ cmd_parts.extend(self.config.extra_cli_args)
1942
+
1943
+ cmd = ' '.join(cmd_parts)
1944
+ if self.config.credentials_file:
1945
+ cmd = 'AWS_SHARED_CREDENTIALS_FILE=' + \
1946
+ f'{self.config.credentials_file} {cmd}'
1947
+ if self.config.config_file:
1948
+ cmd = 'AWS_CONFIG_FILE=' + \
1949
+ f'{self.config.config_file} {cmd}'
1950
+
1951
+ return cmd
1644
1952
 
1645
1953
  # Generate message for upload
1646
1954
  if len(source_path_list) > 1:
@@ -1648,9 +1956,12 @@ class S3Store(AbstractStore):
1648
1956
  else:
1649
1957
  source_message = source_path_list[0]
1650
1958
 
1959
+ provider_prefix = self.config.url_prefix
1651
1960
  log_path = sky_logging.generate_tmp_logging_file_path(
1652
1961
  _STORAGE_LOG_FILE_NAME)
1653
- sync_path = f'{source_message} -> s3://{self.name}{sub_path}/'
1962
+ sync_path = (f'{source_message} -> '
1963
+ f'{provider_prefix}{self.name}{sub_path}/')
1964
+
1654
1965
  with rich_utils.safe_status(
1655
1966
  ux_utils.spinner_message(f'Syncing {sync_path}',
1656
1967
  log_path=log_path)):
@@ -1660,150 +1971,81 @@ class S3Store(AbstractStore):
1660
1971
  get_dir_sync_command,
1661
1972
  log_path,
1662
1973
  self.name,
1663
- self._ACCESS_DENIED_MESSAGE,
1974
+ self.config.access_denied_message,
1664
1975
  create_dirs=create_dirs,
1665
1976
  max_concurrent_uploads=_MAX_CONCURRENT_UPLOADS)
1977
+
1666
1978
  logger.info(
1667
1979
  ux_utils.finishing_message(f'Storage synced: {sync_path}',
1668
1980
  log_path))
1669
1981
 
1670
- def _transfer_to_s3(self) -> None:
1671
- assert isinstance(self.source, str), self.source
1672
- if self.source.startswith('gs://'):
1673
- data_transfer.gcs_to_s3(self.name, self.name)
1674
- elif self.source.startswith('r2://'):
1675
- data_transfer.r2_to_s3(self.name, self.name)
1676
-
1677
1982
  def _get_bucket(self) -> Tuple[StorageHandle, bool]:
1678
- """Obtains the S3 bucket.
1679
-
1680
- If the bucket exists, this method will return the bucket.
1681
- If the bucket does not exist, there are three cases:
1682
- 1) Raise an error if the bucket source starts with s3://
1683
- 2) Return None if bucket has been externally deleted and
1684
- sync_on_reconstruction is False
1685
- 3) Create and return a new bucket otherwise
1686
-
1687
- Raises:
1688
- StorageSpecError: If externally created bucket is attempted to be
1689
- mounted without specifying storage source.
1690
- StorageBucketCreateError: If creating the bucket fails
1691
- StorageBucketGetError: If fetching a bucket fails
1692
- StorageExternalDeletionError: If externally deleted storage is
1693
- attempted to be fetched while reconstructing the storage for
1694
- 'sky storage delete' or 'sky start'
1695
- """
1696
- s3 = aws.resource('s3')
1697
- bucket = s3.Bucket(self.name)
1983
+ """Get or create bucket using S3 API."""
1984
+ bucket = self.config.resource_factory(self.name)
1698
1985
 
1699
1986
  try:
1700
1987
  # Try Public bucket case.
1701
- # This line does not error out if the bucket is an external public
1702
- # bucket or if it is a user's bucket that is publicly
1703
- # accessible.
1704
1988
  self.client.head_bucket(Bucket=self.name)
1705
1989
  self._validate_existing_bucket()
1706
1990
  return bucket, False
1707
1991
  except aws.botocore_exceptions().ClientError as e:
1708
1992
  error_code = e.response['Error']['Code']
1709
- # AccessDenied error for buckets that are private and not owned by
1710
- # user.
1711
1993
  if error_code == '403':
1712
- command = f'aws s3 ls {self.name}'
1994
+ command = f'aws s3 ls s3://{self.name}'
1995
+ if self.config.aws_profile:
1996
+ command += f' --profile={self.config.aws_profile}'
1997
+ if self.config.get_endpoint_url:
1998
+ command += f' --endpoint-url '\
1999
+ f'{self.config.get_endpoint_url()}'
2000
+ if self.config.credentials_file:
2001
+ command = (f'AWS_SHARED_CREDENTIALS_FILE='
2002
+ f'{self.config.credentials_file} {command}')
2003
+ if self.config.config_file:
2004
+ command = 'AWS_CONFIG_FILE=' + \
2005
+ f'{self.config.config_file} {command}'
1713
2006
  with ux_utils.print_exception_no_traceback():
1714
2007
  raise exceptions.StorageBucketGetError(
1715
2008
  _BUCKET_FAIL_TO_CONNECT_MESSAGE.format(name=self.name) +
1716
2009
  f' To debug, consider running `{command}`.') from e
1717
2010
 
1718
- if isinstance(self.source, str) and self.source.startswith('s3://'):
2011
+ if isinstance(self.source, str) and self.source.startswith(
2012
+ self.config.url_prefix):
1719
2013
  with ux_utils.print_exception_no_traceback():
1720
2014
  raise exceptions.StorageBucketGetError(
1721
2015
  'Attempted to use a non-existent bucket as a source: '
1722
- f'{self.source}. Consider using `aws s3 ls '
1723
- f'{self.source}` to debug.')
2016
+ f'{self.source}.')
1724
2017
 
1725
- # If bucket cannot be found in both private and public settings,
1726
- # the bucket is to be created by Sky. However, creation is skipped if
1727
- # Store object is being reconstructed for deletion or re-mount with
1728
- # sky start, and error is raised instead.
2018
+ # If bucket cannot be found, create it if needed
1729
2019
  if self.sync_on_reconstruction:
1730
- bucket = self._create_s3_bucket(self.name, self.region)
2020
+ bucket = self._create_bucket(self.name)
1731
2021
  return bucket, True
1732
2022
  else:
1733
- # Raised when Storage object is reconstructed for sky storage
1734
- # delete or to re-mount Storages with sky start but the storage
1735
- # is already removed externally.
1736
2023
  raise exceptions.StorageExternalDeletionError(
1737
2024
  'Attempted to fetch a non-existent bucket: '
1738
2025
  f'{self.name}')
1739
2026
 
1740
- def _download_file(self, remote_path: str, local_path: str) -> None:
1741
- """Downloads file from remote to local on s3 bucket
1742
- using the boto3 API
1743
-
1744
- Args:
1745
- remote_path: str; Remote path on S3 bucket
1746
- local_path: str; Local path on user's device
1747
- """
1748
- self.bucket.download_file(remote_path, local_path)
1749
-
1750
- def mount_command(self, mount_path: str) -> str:
1751
- """Returns the command to mount the bucket to the mount_path.
1752
-
1753
- Uses goofys to mount the bucket.
1754
-
1755
- Args:
1756
- mount_path: str; Path to mount the bucket to.
1757
- """
1758
- install_cmd = mounting_utils.get_s3_mount_install_cmd()
1759
- mount_cmd = mounting_utils.get_s3_mount_cmd(self.bucket.name,
1760
- mount_path,
1761
- self._bucket_sub_path)
1762
- return mounting_utils.get_mounting_command(mount_path, install_cmd,
1763
- mount_cmd)
1764
-
1765
- def mount_cached_command(self, mount_path: str) -> str:
1766
- install_cmd = mounting_utils.get_rclone_install_cmd()
1767
- rclone_profile_name = (
1768
- data_utils.Rclone.RcloneStores.S3.get_profile_name(self.name))
1769
- rclone_config = data_utils.Rclone.RcloneStores.S3.get_config(
1770
- rclone_profile_name=rclone_profile_name)
1771
- mount_cached_cmd = mounting_utils.get_mount_cached_cmd(
1772
- rclone_config, rclone_profile_name, self.bucket.name, mount_path)
1773
- return mounting_utils.get_mounting_command(mount_path, install_cmd,
1774
- mount_cached_cmd)
1775
-
1776
- def _create_s3_bucket(self,
1777
- bucket_name: str,
1778
- region=_DEFAULT_REGION) -> StorageHandle:
1779
- """Creates S3 bucket with specific name in specific region
1780
-
1781
- Args:
1782
- bucket_name: str; Name of bucket
1783
- region: str; Region name, e.g. us-west-1, us-east-2
1784
- Raises:
1785
- StorageBucketCreateError: If bucket creation fails.
1786
- """
1787
- s3_client = self.client
2027
+ def _create_bucket(self, bucket_name: str) -> StorageHandle:
2028
+ """Create bucket using S3 API."""
1788
2029
  try:
1789
2030
  create_bucket_config: Dict[str, Any] = {'Bucket': bucket_name}
1790
- # If default us-east-1 region of create_bucket API is used,
1791
- # the LocationConstraint must not be specified.
1792
- # Reference: https://stackoverflow.com/a/51912090
1793
- if region is not None and region != 'us-east-1':
2031
+ if self.region is not None and self.region != 'us-east-1':
1794
2032
  create_bucket_config['CreateBucketConfiguration'] = {
1795
- 'LocationConstraint': region
2033
+ 'LocationConstraint': self.region
1796
2034
  }
1797
- s3_client.create_bucket(**create_bucket_config)
2035
+ self.client.create_bucket(**create_bucket_config)
1798
2036
  logger.info(
1799
2037
  f' {colorama.Style.DIM}Created S3 bucket {bucket_name!r} in '
1800
- f'{region or "us-east-1"}{colorama.Style.RESET_ALL}')
2038
+ f'{self.region or "us-east-1"}{colorama.Style.RESET_ALL}')
1801
2039
 
1802
2040
  # Add AWS tags configured in config.yaml to the bucket.
1803
2041
  # This is useful for cost tracking and external cleanup.
1804
- bucket_tags = skypilot_config.get_nested(('aws', 'labels'), {})
2042
+ bucket_tags = skypilot_config.get_effective_region_config(
2043
+ cloud=self.config.cloud_name,
2044
+ region=None,
2045
+ keys=('labels',),
2046
+ default_value={})
1805
2047
  if bucket_tags:
1806
- s3_client.put_bucket_tagging(
2048
+ self.client.put_bucket_tagging(
1807
2049
  Bucket=bucket_name,
1808
2050
  Tagging={
1809
2051
  'TagSet': [{
@@ -1811,22 +2053,46 @@ class S3Store(AbstractStore):
1811
2053
  'Value': v
1812
2054
  } for k, v in bucket_tags.items()]
1813
2055
  })
1814
-
1815
2056
  except aws.botocore_exceptions().ClientError as e:
1816
2057
  with ux_utils.print_exception_no_traceback():
1817
2058
  raise exceptions.StorageBucketCreateError(
1818
- f'Attempted to create a bucket {self.name} but failed.'
2059
+ f'Attempted to create S3 bucket {self.name} but failed.'
1819
2060
  ) from e
1820
- return aws.resource('s3').Bucket(bucket_name)
2061
+ return self.config.resource_factory(bucket_name)
2062
+
2063
+ def _delete_bucket(self, bucket_name: str) -> bool:
2064
+ """Delete bucket using AWS CLI."""
2065
+ cmd_parts = [f'aws s3 rb s3://{bucket_name} --force']
2066
+
2067
+ if self.config.aws_profile:
2068
+ cmd_parts.append(f'--profile={self.config.aws_profile}')
2069
+ if self.config.get_endpoint_url:
2070
+ cmd_parts.append(f'--endpoint-url {self.config.get_endpoint_url()}')
2071
+
2072
+ remove_command = ' '.join(cmd_parts)
2073
+
2074
+ if self.config.credentials_file:
2075
+ remove_command = (f'AWS_SHARED_CREDENTIALS_FILE='
2076
+ f'{self.config.credentials_file} '
2077
+ f'{remove_command}')
2078
+ if self.config.config_file:
2079
+ remove_command = 'AWS_CONFIG_FILE=' + \
2080
+ f'{self.config.config_file} {remove_command}'
2081
+ return self._execute_remove_command(
2082
+ remove_command, bucket_name,
2083
+ f'Deleting {self.config.store_type} bucket {bucket_name}',
2084
+ (f'Failed to delete {self.config.store_type} bucket '
2085
+ f'{bucket_name}.'))
1821
2086
 
1822
- def _execute_s3_remove_command(self, command: str, bucket_name: str,
1823
- hint_operating: str,
1824
- hint_failed: str) -> bool:
2087
+ def _execute_remove_command(self, command: str, bucket_name: str,
2088
+ hint_operating: str, hint_failed: str) -> bool:
2089
+ """Execute bucket removal command."""
1825
2090
  try:
1826
2091
  with rich_utils.safe_status(
1827
2092
  ux_utils.spinner_message(hint_operating)):
1828
- subprocess.check_output(command.split(' '),
1829
- stderr=subprocess.STDOUT)
2093
+ subprocess.check_output(command,
2094
+ stderr=subprocess.STDOUT,
2095
+ shell=True)
1830
2096
  except subprocess.CalledProcessError as e:
1831
2097
  if 'NoSuchBucket' in e.output.decode('utf-8'):
1832
2098
  logger.debug(
@@ -1840,47 +2106,44 @@ class S3Store(AbstractStore):
1840
2106
  f'Detailed error: {e.output}')
1841
2107
  return True
1842
2108
 
1843
- def _delete_s3_bucket(self, bucket_name: str) -> bool:
1844
- """Deletes S3 bucket, including all objects in bucket
1845
-
1846
- Args:
1847
- bucket_name: str; Name of bucket
1848
-
1849
- Returns:
1850
- bool; True if bucket was deleted, False if it was deleted externally.
1851
-
1852
- Raises:
1853
- StorageBucketDeleteError: If deleting the bucket fails.
1854
- """
1855
- # Deleting objects is very slow programatically
1856
- # (i.e. bucket.objects.all().delete() is slow).
1857
- # In addition, standard delete operations (i.e. via `aws s3 rm`)
1858
- # are slow, since AWS puts deletion markers.
1859
- # https://stackoverflow.com/questions/49239351/why-is-it-so-much-slower-to-delete-objects-in-aws-s3-than-it-is-to-create-them
1860
- # The fastest way to delete is to run `aws s3 rb --force`,
1861
- # which removes the bucket by force.
1862
- remove_command = f'aws s3 rb s3://{bucket_name} --force'
1863
- success = self._execute_s3_remove_command(
1864
- remove_command, bucket_name,
1865
- f'Deleting S3 bucket [green]{bucket_name}[/]',
1866
- f'Failed to delete S3 bucket {bucket_name}.')
1867
- if not success:
1868
- return False
1869
-
1870
- # Wait until bucket deletion propagates on AWS servers
1871
- while data_utils.verify_s3_bucket(bucket_name):
1872
- time.sleep(0.1)
1873
- return True
1874
-
1875
- def _delete_s3_bucket_sub_path(self, bucket_name: str,
1876
- sub_path: str) -> bool:
1877
- """Deletes the sub path from the bucket."""
1878
- remove_command = f'aws s3 rm s3://{bucket_name}/{sub_path}/ --recursive'
1879
- return self._execute_s3_remove_command(
1880
- remove_command, bucket_name, f'Removing objects from S3 bucket '
1881
- f'[green]{bucket_name}/{sub_path}[/]',
1882
- f'Failed to remove objects from S3 bucket {bucket_name}/{sub_path}.'
1883
- )
2109
+ def _delete_sub_path(self) -> None:
2110
+ """Remove objects from the sub path in the bucket."""
2111
+ assert self._bucket_sub_path is not None, 'bucket_sub_path is not set'
2112
+ deleted_by_skypilot = self._delete_bucket_sub_path(
2113
+ self.name, self._bucket_sub_path)
2114
+ provider = self.config.store_type
2115
+ if deleted_by_skypilot:
2116
+ msg_str = (f'Removed objects from {provider} bucket '
2117
+ f'{self.name}/{self._bucket_sub_path}.')
2118
+ else:
2119
+ msg_str = (f'Failed to remove objects from {provider} bucket '
2120
+ f'{self.name}/{self._bucket_sub_path}.')
2121
+ logger.info(f'{colorama.Fore.GREEN}{msg_str}{colorama.Style.RESET_ALL}')
2122
+
2123
+ def _delete_bucket_sub_path(self, bucket_name: str, sub_path: str) -> bool:
2124
+ """Delete objects in the sub path from the bucket."""
2125
+ cmd_parts = [f'aws s3 rm s3://{bucket_name}/{sub_path}/ --recursive']
2126
+
2127
+ if self.config.aws_profile:
2128
+ cmd_parts.append(f'--profile={self.config.aws_profile}')
2129
+ if self.config.get_endpoint_url:
2130
+ cmd_parts.append(f'--endpoint-url {self.config.get_endpoint_url()}')
2131
+
2132
+ remove_command = ' '.join(cmd_parts)
2133
+
2134
+ if self.config.credentials_file:
2135
+ remove_command = (f'AWS_SHARED_CREDENTIALS_FILE='
2136
+ f'{self.config.credentials_file} '
2137
+ f'{remove_command}')
2138
+ if self.config.config_file:
2139
+ remove_command = 'AWS_CONFIG_FILE=' + \
2140
+ f'{self.config.config_file} {remove_command}'
2141
+ return self._execute_remove_command(
2142
+ remove_command, bucket_name,
2143
+ (f'Removing objects from {self.config.store_type} bucket '
2144
+ f'{bucket_name}/{sub_path}'),
2145
+ (f'Failed to remove objects from {self.config.store_type} '
2146
+ f'bucket {bucket_name}/{sub_path}.'))
1884
2147
 
1885
2148
 
1886
2149
  class GcsStore(AbstractStore):
@@ -1951,6 +2214,10 @@ class GcsStore(AbstractStore):
1951
2214
  elif self.source.startswith('oci://'):
1952
2215
  raise NotImplementedError(
1953
2216
  'Moving data from OCI to GCS is currently not supported.')
2217
+ elif self.source.startswith('cw://'):
2218
+ raise NotImplementedError(
2219
+ 'Moving data from CoreWeave Object Storage to GCS is'
2220
+ ' currently not supported.')
1954
2221
  # Validate name
1955
2222
  self.name = self.validate_name(self.name)
1956
2223
  # Check if the storage is enabled
@@ -2337,7 +2604,7 @@ class GcsStore(AbstractStore):
2337
2604
  except Exception as e: # pylint: disable=broad-except
2338
2605
  with ux_utils.print_exception_no_traceback():
2339
2606
  raise exceptions.StorageBucketCreateError(
2340
- f'Attempted to create a bucket {self.name} but failed.'
2607
+ f'Attempted to create GCS bucket {self.name} but failed.'
2341
2608
  ) from e
2342
2609
  logger.info(
2343
2610
  f' {colorama.Style.DIM}Created GCS bucket {new_bucket.name!r} in '
@@ -2494,7 +2761,11 @@ class AzureBlobStore(AbstractStore):
2494
2761
  name=override_args.get('name', metadata.name),
2495
2762
  storage_account_name=override_args.get(
2496
2763
  'storage_account', metadata.storage_account_name),
2497
- source=override_args.get('source', metadata.source),
2764
+ # TODO(cooperc): fix the types for mypy 1.16
2765
+ # Azure store expects a string path; metadata.source may be a Path
2766
+ # or List[Path].
2767
+ source=override_args.get('source',
2768
+ metadata.source), # type: ignore[arg-type]
2498
2769
  region=override_args.get('region', metadata.region),
2499
2770
  is_sky_managed=override_args.get('is_sky_managed',
2500
2771
  metadata.is_sky_managed),
@@ -2562,6 +2833,10 @@ class AzureBlobStore(AbstractStore):
2562
2833
  elif self.source.startswith('oci://'):
2563
2834
  raise NotImplementedError(
2564
2835
  'Moving data from OCI to AZureBlob is not supported.')
2836
+ elif self.source.startswith('cw://'):
2837
+ raise NotImplementedError(
2838
+ 'Moving data from CoreWeave Object Storage to AzureBlob is'
2839
+ ' currently not supported.')
2565
2840
  # Validate name
2566
2841
  self.name = self.validate_name(self.name)
2567
2842
 
@@ -2764,8 +3039,12 @@ class AzureBlobStore(AbstractStore):
2764
3039
  # Creates new resource group and storage account or use the
2765
3040
  # storage_account provided by the user through config.yaml
2766
3041
  else:
2767
- config_storage_account = skypilot_config.get_nested(
2768
- ('azure', 'storage_account'), None)
3042
+ config_storage_account = (
3043
+ skypilot_config.get_effective_region_config(
3044
+ cloud='azure',
3045
+ region=None,
3046
+ keys=('storage_account',),
3047
+ default_value=None))
2769
3048
  if config_storage_account is not None:
2770
3049
  # using user provided storage account from config.yaml
2771
3050
  storage_account_name = config_storage_account
@@ -2929,6 +3208,8 @@ class AzureBlobStore(AbstractStore):
2929
3208
  raise NotImplementedError(error_message.format('OCI'))
2930
3209
  elif self.source.startswith('nebius://'):
2931
3210
  raise NotImplementedError(error_message.format('NEBIUS'))
3211
+ elif self.source.startswith('cw://'):
3212
+ raise NotImplementedError(error_message.format('CoreWeave'))
2932
3213
  else:
2933
3214
  self.batch_az_blob_sync([self.source])
2934
3215
  except exceptions.StorageUploadError:
@@ -3256,7 +3537,7 @@ class AzureBlobStore(AbstractStore):
3256
3537
  with rich_utils.safe_status(
3257
3538
  ux_utils.spinner_message(
3258
3539
  f'Deleting Azure container {container_name}')):
3259
- # Check for the existance of the container before deletion.
3540
+ # Check for the existence of the container before deletion.
3260
3541
  self.storage_client.blob_containers.get(
3261
3542
  self.resource_group_name,
3262
3543
  self.storage_account_name,
@@ -3281,22 +3562,23 @@ class AzureBlobStore(AbstractStore):
3281
3562
  return True
3282
3563
 
3283
3564
 
3284
- class R2Store(AbstractStore):
3285
- """R2Store inherits from S3Store Object and represents the backend
3286
- for R2 buckets.
3565
+ class IBMCosStore(AbstractStore):
3566
+ """IBMCosStore inherits from Storage Object and represents the backend
3567
+ for COS buckets.
3287
3568
  """
3288
-
3289
3569
  _ACCESS_DENIED_MESSAGE = 'Access Denied'
3290
3570
 
3291
3571
  def __init__(self,
3292
3572
  name: str,
3293
3573
  source: str,
3294
- region: Optional[str] = 'auto',
3574
+ region: Optional[str] = 'us-east',
3295
3575
  is_sky_managed: Optional[bool] = None,
3296
- sync_on_reconstruction: Optional[bool] = True,
3576
+ sync_on_reconstruction: bool = True,
3297
3577
  _bucket_sub_path: Optional[str] = None):
3298
- self.client: 'mypy_boto3_s3.Client'
3578
+ self.client: 'storage.Client'
3299
3579
  self.bucket: 'StorageHandle'
3580
+ self.rclone_profile_name = (
3581
+ data_utils.Rclone.RcloneStores.IBM.get_profile_name(self.name))
3300
3582
  super().__init__(name, source, region, is_sky_managed,
3301
3583
  sync_on_reconstruction, _bucket_sub_path)
3302
3584
 
@@ -3330,6 +3612,9 @@ class R2Store(AbstractStore):
3330
3612
  assert self.name == data_utils.split_r2_path(self.source)[0], (
3331
3613
  'R2 Bucket is specified as path, the name should be '
3332
3614
  'the same as R2 bucket.')
3615
+ assert data_utils.verify_r2_bucket(self.name), (
3616
+ f'Source specified as {self.source}, a R2 bucket. ',
3617
+ 'R2 Bucket should exist.')
3333
3618
  elif self.source.startswith('nebius://'):
3334
3619
  assert self.name == data_utils.split_nebius_path(
3335
3620
  self.source)[0], (
@@ -3341,29 +3626,63 @@ class R2Store(AbstractStore):
3341
3626
  f'exist.')
3342
3627
  elif self.source.startswith('cos://'):
3343
3628
  assert self.name == data_utils.split_cos_path(self.source)[0], (
3344
- 'IBM COS Bucket is specified as path, the name should be '
3629
+ 'COS Bucket is specified as path, the name should be '
3345
3630
  'the same as COS bucket.')
3346
- assert data_utils.verify_ibm_cos_bucket(self.name), (
3347
- f'Source specified as {self.source}, a COS bucket. ',
3348
- 'COS Bucket should exist.')
3349
- elif self.source.startswith('oci://'):
3631
+ elif self.source.startswith('cw://'):
3350
3632
  raise NotImplementedError(
3351
- 'Moving data from OCI to R2 is currently not supported.')
3352
-
3633
+ 'Moving data from CoreWeave Object Storage to COS is '
3634
+ 'currently not supported.')
3353
3635
  # Validate name
3354
- self.name = S3Store.validate_name(self.name)
3355
- # Check if the storage is enabled
3356
- if not _is_storage_cloud_enabled(cloudflare.NAME):
3636
+ self.name = IBMCosStore.validate_name(self.name)
3637
+
3638
+ @classmethod
3639
+ def validate_name(cls, name: str) -> str:
3640
+ """Validates the name of a COS bucket.
3641
+
3642
+ Rules source: https://ibm.github.io/ibm-cos-sdk-java/com/ibm/cloud/objectstorage/services/s3/model/Bucket.html # pylint: disable=line-too-long
3643
+ """
3644
+
3645
+ def _raise_no_traceback_name_error(err_str):
3357
3646
  with ux_utils.print_exception_no_traceback():
3358
- raise exceptions.ResourcesUnavailableError(
3359
- 'Storage \'store: r2\' specified, but ' \
3360
- 'Cloudflare R2 access is disabled. To fix, '\
3361
- 'enable Cloudflare R2 by running `sky check`. '\
3362
- 'More info: https://docs.skypilot.co/en/latest/getting-started/installation.html.' # pylint: disable=line-too-long
3363
- )
3647
+ raise exceptions.StorageNameError(err_str)
3648
+
3649
+ if name is not None and isinstance(name, str):
3650
+ if not 3 <= len(name) <= 63:
3651
+ _raise_no_traceback_name_error(
3652
+ f'Invalid store name: {name} must be between 3 (min) '
3653
+ 'and 63 (max) characters long.')
3654
+
3655
+ # Check for valid characters and start/end with a letter or number
3656
+ pattern = r'^[a-z0-9][-a-z0-9.]*[a-z0-9]$'
3657
+ if not re.match(pattern, name):
3658
+ _raise_no_traceback_name_error(
3659
+ f'Invalid store name: {name} can consist only of '
3660
+ 'lowercase letters, numbers, dots (.), and dashes (-). '
3661
+ 'It must begin and end with a letter or number.')
3662
+
3663
+ # Check for two adjacent periods or dashes
3664
+ if any(substring in name for substring in ['..', '--']):
3665
+ _raise_no_traceback_name_error(
3666
+ f'Invalid store name: {name} must not contain '
3667
+ 'two adjacent periods/dashes')
3668
+
3669
+ # Check for IP address format
3670
+ ip_pattern = r'^(?:\d{1,3}\.){3}\d{1,3}$'
3671
+ if re.match(ip_pattern, name):
3672
+ _raise_no_traceback_name_error(
3673
+ f'Invalid store name: {name} must not be formatted as '
3674
+ 'an IP address (for example, 192.168.5.4).')
3675
+
3676
+ if any(substring in name for substring in ['.-', '-.']):
3677
+ _raise_no_traceback_name_error(
3678
+ f'Invalid store name: {name} must '
3679
+ 'not allow substrings: ".-", "-." .')
3680
+ else:
3681
+ _raise_no_traceback_name_error('Store name must be specified.')
3682
+ return name
3364
3683
 
3365
3684
  def initialize(self):
3366
- """Initializes the R2 store object on the cloud.
3685
+ """Initializes the cos store object on the cloud.
3367
3686
 
3368
3687
  Initialization involves fetching bucket if exists, or creating it if
3369
3688
  it does not.
@@ -3373,7 +3692,11 @@ class R2Store(AbstractStore):
3373
3692
  StorageBucketGetError: If fetching existing bucket fails
3374
3693
  StorageInitError: If general initialization fails.
3375
3694
  """
3376
- self.client = data_utils.create_r2_client(self.region)
3695
+ if self.region is None:
3696
+ raise exceptions.StorageInitError(
3697
+ 'Region must be specified for IBM COS store.')
3698
+ self.client = ibm.get_cos_client(self.region)
3699
+ self.s3_resource = ibm.get_cos_resource(self.region)
3377
3700
  self.bucket, is_new_bucket = self._get_bucket()
3378
3701
  if self.is_sky_managed is None:
3379
3702
  # If is_sky_managed is not specified, then this is a new storage
@@ -3383,7 +3706,7 @@ class R2Store(AbstractStore):
3383
3706
  self.is_sky_managed = is_new_bucket
3384
3707
 
3385
3708
  def upload(self):
3386
- """Uploads source to store bucket.
3709
+ """Uploads files from local machine to bucket.
3387
3710
 
3388
3711
  Upload must be called by the Storage handler - it is not called on
3389
3712
  Store initialization.
@@ -3393,22 +3716,29 @@ class R2Store(AbstractStore):
3393
3716
  """
3394
3717
  try:
3395
3718
  if isinstance(self.source, list):
3396
- self.batch_aws_rsync(self.source, create_dirs=True)
3719
+ self.batch_ibm_rsync(self.source, create_dirs=True)
3397
3720
  elif self.source is not None:
3398
- if self.source.startswith('s3://'):
3399
- self._transfer_to_r2()
3400
- elif self.source.startswith('gs://'):
3401
- self._transfer_to_r2()
3402
- elif self.source.startswith('r2://'):
3721
+ if self.source.startswith('cos://'):
3722
+ # cos bucket used as a dest, can't be used as source.
3403
3723
  pass
3404
- elif self.source.startswith('oci://'):
3405
- self._transfer_to_r2()
3724
+ elif self.source.startswith('s3://'):
3725
+ raise Exception('IBM COS currently not supporting'
3726
+ 'data transfers between COS and S3')
3406
3727
  elif self.source.startswith('nebius://'):
3407
- self._transfer_to_r2()
3728
+ raise Exception('IBM COS currently not supporting'
3729
+ 'data transfers between COS and Nebius')
3730
+ elif self.source.startswith('gs://'):
3731
+ raise Exception('IBM COS currently not supporting'
3732
+ 'data transfers between COS and GS')
3733
+ elif self.source.startswith('r2://'):
3734
+ raise Exception('IBM COS currently not supporting'
3735
+ 'data transfers between COS and r2')
3736
+ elif self.source.startswith('cw://'):
3737
+ raise Exception('IBM COS currently not supporting'
3738
+ 'data transfers between COS and CoreWeave')
3408
3739
  else:
3409
- self.batch_aws_rsync([self.source])
3410
- except exceptions.StorageUploadError:
3411
- raise
3740
+ self.batch_ibm_rsync([self.source])
3741
+
3412
3742
  except Exception as e:
3413
3743
  raise exceptions.StorageUploadError(
3414
3744
  f'Upload failed for store {self.name}') from e
@@ -3417,41 +3747,28 @@ class R2Store(AbstractStore):
3417
3747
  if self._bucket_sub_path is not None and not self.is_sky_managed:
3418
3748
  return self._delete_sub_path()
3419
3749
 
3420
- deleted_by_skypilot = self._delete_r2_bucket(self.name)
3421
- if deleted_by_skypilot:
3422
- msg_str = f'Deleted R2 bucket {self.name}.'
3423
- else:
3424
- msg_str = f'R2 bucket {self.name} may have been deleted ' \
3425
- f'externally. Removing from local state.'
3426
- logger.info(f'{colorama.Fore.GREEN}{msg_str}'
3750
+ self._delete_cos_bucket()
3751
+ logger.info(f'{colorama.Fore.GREEN}Deleted COS bucket {self.name}.'
3427
3752
  f'{colorama.Style.RESET_ALL}')
3428
3753
 
3429
3754
  def _delete_sub_path(self) -> None:
3430
3755
  assert self._bucket_sub_path is not None, 'bucket_sub_path is not set'
3431
- deleted_by_skypilot = self._delete_r2_bucket_sub_path(
3432
- self.name, self._bucket_sub_path)
3433
- if deleted_by_skypilot:
3434
- msg_str = f'Removed objects from R2 bucket ' \
3435
- f'{self.name}/{self._bucket_sub_path}.'
3436
- else:
3437
- msg_str = f'Failed to remove objects from R2 bucket ' \
3438
- f'{self.name}/{self._bucket_sub_path}.'
3439
- logger.info(f'{colorama.Fore.GREEN}{msg_str}'
3440
- f'{colorama.Style.RESET_ALL}')
3756
+ bucket = self.s3_resource.Bucket(self.name)
3757
+ try:
3758
+ self._delete_cos_bucket_objects(bucket, self._bucket_sub_path + '/')
3759
+ except ibm.ibm_botocore.exceptions.ClientError as e:
3760
+ if e.__class__.__name__ == 'NoSuchBucket':
3761
+ logger.debug('bucket already removed')
3441
3762
 
3442
3763
  def get_handle(self) -> StorageHandle:
3443
- return cloudflare.resource('s3').Bucket(self.name)
3764
+ return self.s3_resource.Bucket(self.name)
3444
3765
 
3445
- def batch_aws_rsync(self,
3766
+ def batch_ibm_rsync(self,
3446
3767
  source_path_list: List[Path],
3447
3768
  create_dirs: bool = False) -> None:
3448
- """Invokes aws s3 sync to batch upload a list of local paths to R2
3449
-
3450
- AWS Sync by default uses 10 threads to upload files to the bucket. To
3451
- increase parallelism, modify max_concurrent_requests in your aws config
3452
- file (Default path: ~/.aws/config).
3769
+ """Invokes rclone copy to batch upload a list of local paths to cos
3453
3770
 
3454
- Since aws s3 sync does not support batch operations, we construct
3771
+ Since rclone does not support batch operations, we construct
3455
3772
  multiple commands to be run in parallel.
3456
3773
 
3457
3774
  Args:
@@ -3465,49 +3782,58 @@ class R2Store(AbstractStore):
3465
3782
  sub_path = (f'/{self._bucket_sub_path}'
3466
3783
  if self._bucket_sub_path else '')
3467
3784
 
3468
- def get_file_sync_command(base_dir_path, file_names):
3785
+ def get_dir_sync_command(src_dir_path, dest_dir_name) -> str:
3786
+ """returns an rclone command that copies a complete folder
3787
+ from 'src_dir_path' to bucket/'dest_dir_name'.
3788
+
3789
+ `rclone copy` copies files from source path to target.
3790
+ files with identical names at won't be copied over, unless
3791
+ their modification date is more recent.
3792
+ works similarly to `aws sync` (without --delete).
3793
+
3794
+ Args:
3795
+ src_dir_path (str): local source path from which to copy files.
3796
+ dest_dir_name (str): remote target path files are copied to.
3797
+
3798
+ Returns:
3799
+ str: bash command using rclone to sync files. Executed remotely.
3800
+ """
3801
+
3802
+ # .git directory is excluded from the sync
3803
+ # wrapping src_dir_path with "" to support path with spaces
3804
+ src_dir_path = shlex.quote(src_dir_path)
3805
+ sync_command = ('rclone copy --exclude ".git/*" '
3806
+ f'{src_dir_path} '
3807
+ f'{self.rclone_profile_name}:{self.name}{sub_path}'
3808
+ f'/{dest_dir_name}')
3809
+ return sync_command
3810
+
3811
+ def get_file_sync_command(base_dir_path, file_names) -> str:
3812
+ """returns an rclone command that copies files: 'file_names'
3813
+ from base directory: `base_dir_path` to bucket.
3814
+
3815
+ `rclone copy` copies files from source path to target.
3816
+ files with identical names at won't be copied over, unless
3817
+ their modification date is more recent.
3818
+ works similarly to `aws sync` (without --delete).
3819
+
3820
+ Args:
3821
+ base_dir_path (str): local path from which to copy files.
3822
+ file_names (List): specific file names to copy.
3823
+
3824
+ Returns:
3825
+ str: bash command using rclone to sync files
3826
+ """
3827
+
3828
+ # wrapping file_name with "" to support spaces
3469
3829
  includes = ' '.join([
3470
3830
  f'--include {shlex.quote(file_name)}'
3471
3831
  for file_name in file_names
3472
3832
  ])
3473
- endpoint_url = cloudflare.create_endpoint()
3474
3833
  base_dir_path = shlex.quote(base_dir_path)
3475
- sync_command = (
3476
- 'AWS_SHARED_CREDENTIALS_FILE='
3477
- f'{cloudflare.R2_CREDENTIALS_PATH} '
3478
- 'aws s3 sync --no-follow-symlinks --exclude="*" '
3479
- f'{includes} {base_dir_path} '
3480
- f's3://{self.name}{sub_path} '
3481
- f'--endpoint {endpoint_url} '
3482
- # R2 does not support CRC64-NVME
3483
- # which is the default for aws s3 sync
3484
- # https://community.cloudflare.com/t/an-error-occurred-internalerror-when-calling-the-putobject-operation/764905/13
3485
- f'--checksum-algorithm CRC32 '
3486
- f'--profile={cloudflare.R2_PROFILE_NAME}')
3487
- return sync_command
3488
-
3489
- def get_dir_sync_command(src_dir_path, dest_dir_name):
3490
- # we exclude .git directory from the sync
3491
- excluded_list = storage_utils.get_excluded_files(src_dir_path)
3492
- excluded_list.append('.git/*')
3493
- excludes = ' '.join([
3494
- f'--exclude {shlex.quote(file_name)}'
3495
- for file_name in excluded_list
3496
- ])
3497
- endpoint_url = cloudflare.create_endpoint()
3498
- src_dir_path = shlex.quote(src_dir_path)
3499
- sync_command = (
3500
- 'AWS_SHARED_CREDENTIALS_FILE='
3501
- f'{cloudflare.R2_CREDENTIALS_PATH} '
3502
- f'aws s3 sync --no-follow-symlinks {excludes} '
3503
- f'{src_dir_path} '
3504
- f's3://{self.name}{sub_path}/{dest_dir_name} '
3505
- f'--endpoint {endpoint_url} '
3506
- # R2 does not support CRC64-NVME
3507
- # which is the default for aws s3 sync
3508
- # https://community.cloudflare.com/t/an-error-occurred-internalerror-when-calling-the-putobject-operation/764905/13
3509
- f'--checksum-algorithm CRC32 '
3510
- f'--profile={cloudflare.R2_PROFILE_NAME}')
3834
+ sync_command = ('rclone copy '
3835
+ f'{includes} {base_dir_path} '
3836
+ f'{self.rclone_profile_name}:{self.name}{sub_path}')
3511
3837
  return sync_command
3512
3838
 
3513
3839
  # Generate message for upload
@@ -3518,7 +3844,8 @@ class R2Store(AbstractStore):
3518
3844
 
3519
3845
  log_path = sky_logging.generate_tmp_logging_file_path(
3520
3846
  _STORAGE_LOG_FILE_NAME)
3521
- sync_path = f'{source_message} -> r2://{self.name}{sub_path}/'
3847
+ sync_path = (
3848
+ f'{source_message} -> cos://{self.region}/{self.name}{sub_path}/')
3522
3849
  with rich_utils.safe_status(
3523
3850
  ux_utils.spinner_message(f'Syncing {sync_path}',
3524
3851
  log_path=log_path)):
@@ -3535,1236 +3862,306 @@ class R2Store(AbstractStore):
3535
3862
  ux_utils.finishing_message(f'Storage synced: {sync_path}',
3536
3863
  log_path))
3537
3864
 
3538
- def _transfer_to_r2(self) -> None:
3539
- assert isinstance(self.source, str), self.source
3540
- if self.source.startswith('gs://'):
3541
- data_transfer.gcs_to_r2(self.name, self.name)
3542
- elif self.source.startswith('s3://'):
3543
- data_transfer.s3_to_r2(self.name, self.name)
3544
- elif self.source.startswith('nebius://'):
3545
- data_transfer.s3_to_r2(self.name, self.name)
3546
-
3547
3865
  def _get_bucket(self) -> Tuple[StorageHandle, bool]:
3548
- """Obtains the R2 bucket.
3866
+ """returns IBM COS bucket object if exists, otherwise creates it.
3549
3867
 
3550
- If the bucket exists, this method will return the bucket.
3551
- If the bucket does not exist, there are three cases:
3552
- 1) Raise an error if the bucket source starts with s3://
3553
- 2) Return None if bucket has been externally deleted and
3554
- sync_on_reconstruction is False
3555
- 3) Create and return a new bucket otherwise
3868
+ Returns:
3869
+ StorageHandle(str): bucket name
3870
+ bool: indicates whether a new bucket was created.
3556
3871
 
3557
3872
  Raises:
3558
3873
  StorageSpecError: If externally created bucket is attempted to be
3559
3874
  mounted without specifying storage source.
3560
- StorageBucketCreateError: If creating the bucket fails
3875
+ StorageBucketCreateError: If bucket creation fails.
3561
3876
  StorageBucketGetError: If fetching a bucket fails
3562
3877
  StorageExternalDeletionError: If externally deleted storage is
3563
3878
  attempted to be fetched while reconstructing the storage for
3564
3879
  'sky storage delete' or 'sky start'
3565
3880
  """
3566
- r2 = cloudflare.resource('s3')
3567
- bucket = r2.Bucket(self.name)
3568
- endpoint_url = cloudflare.create_endpoint()
3569
- try:
3570
- # Try Public bucket case.
3571
- # This line does not error out if the bucket is an external public
3572
- # bucket or if it is a user's bucket that is publicly
3573
- # accessible.
3574
- self.client.head_bucket(Bucket=self.name)
3575
- self._validate_existing_bucket()
3576
- return bucket, False
3577
- except aws.botocore_exceptions().ClientError as e:
3578
- error_code = e.response['Error']['Code']
3579
- # AccessDenied error for buckets that are private and not owned by
3580
- # user.
3581
- if error_code == '403':
3582
- command = ('AWS_SHARED_CREDENTIALS_FILE='
3583
- f'{cloudflare.R2_CREDENTIALS_PATH} '
3584
- f'aws s3 ls s3://{self.name} '
3585
- f'--endpoint {endpoint_url} '
3586
- f'--profile={cloudflare.R2_PROFILE_NAME}')
3587
- with ux_utils.print_exception_no_traceback():
3588
- raise exceptions.StorageBucketGetError(
3589
- _BUCKET_FAIL_TO_CONNECT_MESSAGE.format(name=self.name) +
3590
- f' To debug, consider running `{command}`.') from e
3591
3881
 
3592
- if isinstance(self.source, str) and self.source.startswith('r2://'):
3882
+ bucket_profile_name = (data_utils.Rclone.RcloneStores.IBM.value +
3883
+ self.name)
3884
+ try:
3885
+ bucket_region = data_utils.get_ibm_cos_bucket_region(self.name)
3886
+ except exceptions.StorageBucketGetError as e:
3593
3887
  with ux_utils.print_exception_no_traceback():
3888
+ command = f'rclone lsd {bucket_profile_name}: '
3594
3889
  raise exceptions.StorageBucketGetError(
3595
- 'Attempted to use a non-existent bucket as a source: '
3596
- f'{self.source}. Consider using '
3597
- '`AWS_SHARED_CREDENTIALS_FILE='
3598
- f'{cloudflare.R2_CREDENTIALS_PATH} aws s3 ls '
3599
- f's3://{self.name} '
3600
- f'--endpoint {endpoint_url} '
3601
- f'--profile={cloudflare.R2_PROFILE_NAME}\' '
3602
- 'to debug.')
3603
-
3604
- # If bucket cannot be found in both private and public settings,
3605
- # the bucket is to be created by Sky. However, creation is skipped if
3606
- # Store object is being reconstructed for deletion or re-mount with
3607
- # sky start, and error is raised instead.
3608
- if self.sync_on_reconstruction:
3609
- bucket = self._create_r2_bucket(self.name)
3610
- return bucket, True
3611
- else:
3612
- # Raised when Storage object is reconstructed for sky storage
3613
- # delete or to re-mount Storages with sky start but the storage
3614
- # is already removed externally.
3615
- raise exceptions.StorageExternalDeletionError(
3616
- 'Attempted to fetch a non-existent bucket: '
3617
- f'{self.name}')
3618
-
3619
- def _download_file(self, remote_path: str, local_path: str) -> None:
3620
- """Downloads file from remote to local on r2 bucket
3621
- using the boto3 API
3890
+ _BUCKET_FAIL_TO_CONNECT_MESSAGE.format(name=self.name) +
3891
+ f' To debug, consider running `{command}`.') from e
3622
3892
 
3623
- Args:
3624
- remote_path: str; Remote path on R2 bucket
3625
- local_path: str; Local path on user's device
3626
- """
3627
- self.bucket.download_file(remote_path, local_path)
3893
+ try:
3894
+ uri_region = data_utils.split_cos_path(
3895
+ self.source)[2] # type: ignore
3896
+ except ValueError:
3897
+ # source isn't a cos uri
3898
+ uri_region = ''
3628
3899
 
3629
- def mount_command(self, mount_path: str) -> str:
3630
- """Returns the command to mount the bucket to the mount_path.
3631
-
3632
- Uses goofys to mount the bucket.
3633
-
3634
- Args:
3635
- mount_path: str; Path to mount the bucket to.
3636
- """
3637
- install_cmd = mounting_utils.get_s3_mount_install_cmd()
3638
- endpoint_url = cloudflare.create_endpoint()
3639
- r2_credential_path = cloudflare.R2_CREDENTIALS_PATH
3640
- r2_profile_name = cloudflare.R2_PROFILE_NAME
3641
- mount_cmd = mounting_utils.get_r2_mount_cmd(
3642
- r2_credential_path, r2_profile_name, endpoint_url, self.bucket.name,
3643
- mount_path, self._bucket_sub_path)
3644
- return mounting_utils.get_mounting_command(mount_path, install_cmd,
3645
- mount_cmd)
3646
-
3647
- def mount_cached_command(self, mount_path: str) -> str:
3648
- install_cmd = mounting_utils.get_rclone_install_cmd()
3649
- rclone_profile_name = (
3650
- data_utils.Rclone.RcloneStores.R2.get_profile_name(self.name))
3651
- rclone_config = data_utils.Rclone.RcloneStores.R2.get_config(
3652
- rclone_profile_name=rclone_profile_name)
3653
- mount_cached_cmd = mounting_utils.get_mount_cached_cmd(
3654
- rclone_config, rclone_profile_name, self.bucket.name, mount_path)
3655
- return mounting_utils.get_mounting_command(mount_path, install_cmd,
3656
- mount_cached_cmd)
3657
-
3658
- def _create_r2_bucket(self,
3659
- bucket_name: str,
3660
- region='auto') -> StorageHandle:
3661
- """Creates R2 bucket with specific name in specific region
3662
-
3663
- Args:
3664
- bucket_name: str; Name of bucket
3665
- region: str; Region name, r2 automatically sets region
3666
- Raises:
3667
- StorageBucketCreateError: If bucket creation fails.
3668
- """
3669
- r2_client = self.client
3670
- try:
3671
- if region is None:
3672
- r2_client.create_bucket(Bucket=bucket_name)
3673
- else:
3674
- location = {'LocationConstraint': region}
3675
- r2_client.create_bucket(Bucket=bucket_name,
3676
- CreateBucketConfiguration=location)
3677
- logger.info(f' {colorama.Style.DIM}Created R2 bucket '
3678
- f'{bucket_name!r} in {region}'
3679
- f'{colorama.Style.RESET_ALL}')
3680
- except aws.botocore_exceptions().ClientError as e:
3681
- with ux_utils.print_exception_no_traceback():
3682
- raise exceptions.StorageBucketCreateError(
3683
- f'Attempted to create a bucket '
3684
- f'{self.name} but failed.') from e
3685
- return cloudflare.resource('s3').Bucket(bucket_name)
3686
-
3687
- def _execute_r2_remove_command(self, command: str, bucket_name: str,
3688
- hint_operating: str,
3689
- hint_failed: str) -> bool:
3690
- try:
3691
- with rich_utils.safe_status(
3692
- ux_utils.spinner_message(hint_operating)):
3693
- subprocess.check_output(command.split(' '),
3694
- stderr=subprocess.STDOUT,
3695
- shell=True)
3696
- except subprocess.CalledProcessError as e:
3697
- if 'NoSuchBucket' in e.output.decode('utf-8'):
3698
- logger.debug(
3699
- _BUCKET_EXTERNALLY_DELETED_DEBUG_MESSAGE.format(
3700
- bucket_name=bucket_name))
3701
- return False
3702
- else:
3703
- with ux_utils.print_exception_no_traceback():
3704
- raise exceptions.StorageBucketDeleteError(
3705
- f'{hint_failed}'
3706
- f'Detailed error: {e.output}')
3707
- return True
3708
-
3709
- def _delete_r2_bucket_sub_path(self, bucket_name: str,
3710
- sub_path: str) -> bool:
3711
- """Deletes the sub path from the bucket."""
3712
- endpoint_url = cloudflare.create_endpoint()
3713
- remove_command = (
3714
- f'AWS_SHARED_CREDENTIALS_FILE={cloudflare.R2_CREDENTIALS_PATH} '
3715
- f'aws s3 rm s3://{bucket_name}/{sub_path}/ --recursive '
3716
- f'--endpoint {endpoint_url} '
3717
- f'--profile={cloudflare.R2_PROFILE_NAME}')
3718
- return self._execute_r2_remove_command(
3719
- remove_command, bucket_name,
3720
- f'Removing objects from R2 bucket {bucket_name}/{sub_path}',
3721
- f'Failed to remove objects from R2 bucket {bucket_name}/{sub_path}.'
3722
- )
3723
-
3724
- def _delete_r2_bucket(self, bucket_name: str) -> bool:
3725
- """Deletes R2 bucket, including all objects in bucket
3726
-
3727
- Args:
3728
- bucket_name: str; Name of bucket
3729
-
3730
- Returns:
3731
- bool; True if bucket was deleted, False if it was deleted externally.
3732
-
3733
- Raises:
3734
- StorageBucketDeleteError: If deleting the bucket fails.
3735
- """
3736
- # Deleting objects is very slow programatically
3737
- # (i.e. bucket.objects.all().delete() is slow).
3738
- # In addition, standard delete operations (i.e. via `aws s3 rm`)
3739
- # are slow, since AWS puts deletion markers.
3740
- # https://stackoverflow.com/questions/49239351/why-is-it-so-much-slower-to-delete-objects-in-aws-s3-than-it-is-to-create-them
3741
- # The fastest way to delete is to run `aws s3 rb --force`,
3742
- # which removes the bucket by force.
3743
- endpoint_url = cloudflare.create_endpoint()
3744
- remove_command = (
3745
- f'AWS_SHARED_CREDENTIALS_FILE={cloudflare.R2_CREDENTIALS_PATH} '
3746
- f'aws s3 rb s3://{bucket_name} --force '
3747
- f'--endpoint {endpoint_url} '
3748
- f'--profile={cloudflare.R2_PROFILE_NAME}')
3749
-
3750
- success = self._execute_r2_remove_command(
3751
- remove_command, bucket_name, f'Deleting R2 bucket {bucket_name}',
3752
- f'Failed to delete R2 bucket {bucket_name}.')
3753
- if not success:
3754
- return False
3755
-
3756
- # Wait until bucket deletion propagates on AWS servers
3757
- while data_utils.verify_r2_bucket(bucket_name):
3758
- time.sleep(0.1)
3759
- return True
3760
-
3761
-
3762
- class IBMCosStore(AbstractStore):
3763
- """IBMCosStore inherits from Storage Object and represents the backend
3764
- for COS buckets.
3765
- """
3766
- _ACCESS_DENIED_MESSAGE = 'Access Denied'
3767
-
3768
- def __init__(self,
3769
- name: str,
3770
- source: str,
3771
- region: Optional[str] = 'us-east',
3772
- is_sky_managed: Optional[bool] = None,
3773
- sync_on_reconstruction: bool = True,
3774
- _bucket_sub_path: Optional[str] = None):
3775
- self.client: 'storage.Client'
3776
- self.bucket: 'StorageHandle'
3777
- self.rclone_profile_name = (
3778
- data_utils.Rclone.RcloneStores.IBM.get_profile_name(self.name))
3779
- super().__init__(name, source, region, is_sky_managed,
3780
- sync_on_reconstruction, _bucket_sub_path)
3781
-
3782
- def _validate(self):
3783
- if self.source is not None and isinstance(self.source, str):
3784
- if self.source.startswith('s3://'):
3785
- assert self.name == data_utils.split_s3_path(self.source)[0], (
3786
- 'S3 Bucket is specified as path, the name should be the'
3787
- ' same as S3 bucket.')
3788
- assert data_utils.verify_s3_bucket(self.name), (
3789
- f'Source specified as {self.source}, a S3 bucket. ',
3790
- 'S3 Bucket should exist.')
3791
- elif self.source.startswith('gs://'):
3792
- assert self.name == data_utils.split_gcs_path(self.source)[0], (
3793
- 'GCS Bucket is specified as path, the name should be '
3794
- 'the same as GCS bucket.')
3795
- assert data_utils.verify_gcs_bucket(self.name), (
3796
- f'Source specified as {self.source}, a GCS bucket. ',
3797
- 'GCS Bucket should exist.')
3798
- elif data_utils.is_az_container_endpoint(self.source):
3799
- storage_account_name, container_name, _ = (
3800
- data_utils.split_az_path(self.source))
3801
- assert self.name == container_name, (
3802
- 'Azure bucket is specified as path, the name should be '
3803
- 'the same as Azure bucket.')
3804
- assert data_utils.verify_az_bucket(
3805
- storage_account_name, self.name), (
3806
- f'Source specified as {self.source}, an Azure bucket. '
3807
- 'Azure bucket should exist.')
3808
- elif self.source.startswith('r2://'):
3809
- assert self.name == data_utils.split_r2_path(self.source)[0], (
3810
- 'R2 Bucket is specified as path, the name should be '
3811
- 'the same as R2 bucket.')
3812
- assert data_utils.verify_r2_bucket(self.name), (
3813
- f'Source specified as {self.source}, a R2 bucket. ',
3814
- 'R2 Bucket should exist.')
3815
- elif self.source.startswith('nebius://'):
3816
- assert self.name == data_utils.split_nebius_path(
3817
- self.source)[0], (
3818
- 'Nebius Object Storage is specified as path, the name '
3819
- 'should be the same as Nebius Object Storage bucket.')
3820
- assert data_utils.verify_nebius_bucket(self.name), (
3821
- f'Source specified as {self.source}, a Nebius Object '
3822
- f'Storage bucket. Nebius Object Storage Bucket should '
3823
- f'exist.')
3824
- elif self.source.startswith('cos://'):
3825
- assert self.name == data_utils.split_cos_path(self.source)[0], (
3826
- 'COS Bucket is specified as path, the name should be '
3827
- 'the same as COS bucket.')
3828
- # Validate name
3829
- self.name = IBMCosStore.validate_name(self.name)
3830
-
3831
- @classmethod
3832
- def validate_name(cls, name: str) -> str:
3833
- """Validates the name of a COS bucket.
3834
-
3835
- Rules source: https://ibm.github.io/ibm-cos-sdk-java/com/ibm/cloud/objectstorage/services/s3/model/Bucket.html # pylint: disable=line-too-long
3836
- """
3837
-
3838
- def _raise_no_traceback_name_error(err_str):
3839
- with ux_utils.print_exception_no_traceback():
3840
- raise exceptions.StorageNameError(err_str)
3841
-
3842
- if name is not None and isinstance(name, str):
3843
- if not 3 <= len(name) <= 63:
3844
- _raise_no_traceback_name_error(
3845
- f'Invalid store name: {name} must be between 3 (min) '
3846
- 'and 63 (max) characters long.')
3847
-
3848
- # Check for valid characters and start/end with a letter or number
3849
- pattern = r'^[a-z0-9][-a-z0-9.]*[a-z0-9]$'
3850
- if not re.match(pattern, name):
3851
- _raise_no_traceback_name_error(
3852
- f'Invalid store name: {name} can consist only of '
3853
- 'lowercase letters, numbers, dots (.), and dashes (-). '
3854
- 'It must begin and end with a letter or number.')
3855
-
3856
- # Check for two adjacent periods or dashes
3857
- if any(substring in name for substring in ['..', '--']):
3858
- _raise_no_traceback_name_error(
3859
- f'Invalid store name: {name} must not contain '
3860
- 'two adjacent periods/dashes')
3861
-
3862
- # Check for IP address format
3863
- ip_pattern = r'^(?:\d{1,3}\.){3}\d{1,3}$'
3864
- if re.match(ip_pattern, name):
3865
- _raise_no_traceback_name_error(
3866
- f'Invalid store name: {name} must not be formatted as '
3867
- 'an IP address (for example, 192.168.5.4).')
3868
-
3869
- if any(substring in name for substring in ['.-', '-.']):
3870
- _raise_no_traceback_name_error(
3871
- f'Invalid store name: {name} must '
3872
- 'not allow substrings: ".-", "-." .')
3873
- else:
3874
- _raise_no_traceback_name_error('Store name must be specified.')
3875
- return name
3876
-
3877
- def initialize(self):
3878
- """Initializes the cos store object on the cloud.
3879
-
3880
- Initialization involves fetching bucket if exists, or creating it if
3881
- it does not.
3882
-
3883
- Raises:
3884
- StorageBucketCreateError: If bucket creation fails
3885
- StorageBucketGetError: If fetching existing bucket fails
3886
- StorageInitError: If general initialization fails.
3887
- """
3888
- self.client = ibm.get_cos_client(self.region)
3889
- self.s3_resource = ibm.get_cos_resource(self.region)
3890
- self.bucket, is_new_bucket = self._get_bucket()
3891
- if self.is_sky_managed is None:
3892
- # If is_sky_managed is not specified, then this is a new storage
3893
- # object (i.e., did not exist in global_user_state) and we should
3894
- # set the is_sky_managed property.
3895
- # If is_sky_managed is specified, then we take no action.
3896
- self.is_sky_managed = is_new_bucket
3897
-
3898
- def upload(self):
3899
- """Uploads files from local machine to bucket.
3900
-
3901
- Upload must be called by the Storage handler - it is not called on
3902
- Store initialization.
3903
-
3904
- Raises:
3905
- StorageUploadError: if upload fails.
3906
- """
3907
- try:
3908
- if isinstance(self.source, list):
3909
- self.batch_ibm_rsync(self.source, create_dirs=True)
3910
- elif self.source is not None:
3911
- if self.source.startswith('cos://'):
3912
- # cos bucket used as a dest, can't be used as source.
3913
- pass
3914
- elif self.source.startswith('s3://'):
3915
- raise Exception('IBM COS currently not supporting'
3916
- 'data transfers between COS and S3')
3917
- elif self.source.startswith('nebius://'):
3918
- raise Exception('IBM COS currently not supporting'
3919
- 'data transfers between COS and Nebius')
3920
- elif self.source.startswith('gs://'):
3921
- raise Exception('IBM COS currently not supporting'
3922
- 'data transfers between COS and GS')
3923
- elif self.source.startswith('r2://'):
3924
- raise Exception('IBM COS currently not supporting'
3925
- 'data transfers between COS and r2')
3926
- else:
3927
- self.batch_ibm_rsync([self.source])
3928
-
3929
- except Exception as e:
3930
- raise exceptions.StorageUploadError(
3931
- f'Upload failed for store {self.name}') from e
3932
-
3933
- def delete(self) -> None:
3934
- if self._bucket_sub_path is not None and not self.is_sky_managed:
3935
- return self._delete_sub_path()
3936
-
3937
- self._delete_cos_bucket()
3938
- logger.info(f'{colorama.Fore.GREEN}Deleted COS bucket {self.name}.'
3939
- f'{colorama.Style.RESET_ALL}')
3940
-
3941
- def _delete_sub_path(self) -> None:
3942
- assert self._bucket_sub_path is not None, 'bucket_sub_path is not set'
3943
- bucket = self.s3_resource.Bucket(self.name)
3944
- try:
3945
- self._delete_cos_bucket_objects(bucket, self._bucket_sub_path + '/')
3946
- except ibm.ibm_botocore.exceptions.ClientError as e:
3947
- if e.__class__.__name__ == 'NoSuchBucket':
3948
- logger.debug('bucket already removed')
3949
-
3950
- def get_handle(self) -> StorageHandle:
3951
- return self.s3_resource.Bucket(self.name)
3952
-
3953
- def batch_ibm_rsync(self,
3954
- source_path_list: List[Path],
3955
- create_dirs: bool = False) -> None:
3956
- """Invokes rclone copy to batch upload a list of local paths to cos
3957
-
3958
- Since rclone does not support batch operations, we construct
3959
- multiple commands to be run in parallel.
3960
-
3961
- Args:
3962
- source_path_list: List of paths to local files or directories
3963
- create_dirs: If the local_path is a directory and this is set to
3964
- False, the contents of the directory are directly uploaded to
3965
- root of the bucket. If the local_path is a directory and this is
3966
- set to True, the directory is created in the bucket root and
3967
- contents are uploaded to it.
3968
- """
3969
- sub_path = (f'/{self._bucket_sub_path}'
3970
- if self._bucket_sub_path else '')
3971
-
3972
- def get_dir_sync_command(src_dir_path, dest_dir_name) -> str:
3973
- """returns an rclone command that copies a complete folder
3974
- from 'src_dir_path' to bucket/'dest_dir_name'.
3975
-
3976
- `rclone copy` copies files from source path to target.
3977
- files with identical names at won't be copied over, unless
3978
- their modification date is more recent.
3979
- works similarly to `aws sync` (without --delete).
3980
-
3981
- Args:
3982
- src_dir_path (str): local source path from which to copy files.
3983
- dest_dir_name (str): remote target path files are copied to.
3984
-
3985
- Returns:
3986
- str: bash command using rclone to sync files. Executed remotely.
3987
- """
3988
-
3989
- # .git directory is excluded from the sync
3990
- # wrapping src_dir_path with "" to support path with spaces
3991
- src_dir_path = shlex.quote(src_dir_path)
3992
- sync_command = ('rclone copy --exclude ".git/*" '
3993
- f'{src_dir_path} '
3994
- f'{self.rclone_profile_name}:{self.name}{sub_path}'
3995
- f'/{dest_dir_name}')
3996
- return sync_command
3997
-
3998
- def get_file_sync_command(base_dir_path, file_names) -> str:
3999
- """returns an rclone command that copies files: 'file_names'
4000
- from base directory: `base_dir_path` to bucket.
4001
-
4002
- `rclone copy` copies files from source path to target.
4003
- files with identical names at won't be copied over, unless
4004
- their modification date is more recent.
4005
- works similarly to `aws sync` (without --delete).
4006
-
4007
- Args:
4008
- base_dir_path (str): local path from which to copy files.
4009
- file_names (List): specific file names to copy.
4010
-
4011
- Returns:
4012
- str: bash command using rclone to sync files
4013
- """
4014
-
4015
- # wrapping file_name with "" to support spaces
4016
- includes = ' '.join([
4017
- f'--include {shlex.quote(file_name)}'
4018
- for file_name in file_names
4019
- ])
4020
- base_dir_path = shlex.quote(base_dir_path)
4021
- sync_command = ('rclone copy '
4022
- f'{includes} {base_dir_path} '
4023
- f'{self.rclone_profile_name}:{self.name}{sub_path}')
4024
- return sync_command
4025
-
4026
- # Generate message for upload
4027
- if len(source_path_list) > 1:
4028
- source_message = f'{len(source_path_list)} paths'
4029
- else:
4030
- source_message = source_path_list[0]
4031
-
4032
- log_path = sky_logging.generate_tmp_logging_file_path(
4033
- _STORAGE_LOG_FILE_NAME)
4034
- sync_path = (
4035
- f'{source_message} -> cos://{self.region}/{self.name}{sub_path}/')
4036
- with rich_utils.safe_status(
4037
- ux_utils.spinner_message(f'Syncing {sync_path}',
4038
- log_path=log_path)):
4039
- data_utils.parallel_upload(
4040
- source_path_list,
4041
- get_file_sync_command,
4042
- get_dir_sync_command,
4043
- log_path,
4044
- self.name,
4045
- self._ACCESS_DENIED_MESSAGE,
4046
- create_dirs=create_dirs,
4047
- max_concurrent_uploads=_MAX_CONCURRENT_UPLOADS)
4048
- logger.info(
4049
- ux_utils.finishing_message(f'Storage synced: {sync_path}',
4050
- log_path))
4051
-
4052
- def _get_bucket(self) -> Tuple[StorageHandle, bool]:
4053
- """returns IBM COS bucket object if exists, otherwise creates it.
4054
-
4055
- Returns:
4056
- StorageHandle(str): bucket name
4057
- bool: indicates whether a new bucket was created.
4058
-
4059
- Raises:
4060
- StorageSpecError: If externally created bucket is attempted to be
4061
- mounted without specifying storage source.
4062
- StorageBucketCreateError: If bucket creation fails.
4063
- StorageBucketGetError: If fetching a bucket fails
4064
- StorageExternalDeletionError: If externally deleted storage is
4065
- attempted to be fetched while reconstructing the storage for
4066
- 'sky storage delete' or 'sky start'
4067
- """
4068
-
4069
- bucket_profile_name = (data_utils.Rclone.RcloneStores.IBM.value +
4070
- self.name)
4071
- try:
4072
- bucket_region = data_utils.get_ibm_cos_bucket_region(self.name)
4073
- except exceptions.StorageBucketGetError as e:
4074
- with ux_utils.print_exception_no_traceback():
4075
- command = f'rclone lsd {bucket_profile_name}: '
4076
- raise exceptions.StorageBucketGetError(
4077
- _BUCKET_FAIL_TO_CONNECT_MESSAGE.format(name=self.name) +
4078
- f' To debug, consider running `{command}`.') from e
4079
-
4080
- try:
4081
- uri_region = data_utils.split_cos_path(
4082
- self.source)[2] # type: ignore
4083
- except ValueError:
4084
- # source isn't a cos uri
4085
- uri_region = ''
4086
-
4087
- # bucket's region doesn't match specified region in URI
4088
- if bucket_region and uri_region and uri_region != bucket_region\
4089
- and self.sync_on_reconstruction:
4090
- with ux_utils.print_exception_no_traceback():
4091
- raise exceptions.StorageBucketGetError(
4092
- f'Bucket {self.name} exists in '
4093
- f'region {bucket_region}, '
4094
- f'but URI specified region {uri_region}.')
4095
-
4096
- if not bucket_region and uri_region:
4097
- # bucket doesn't exist but source is a bucket URI
4098
- with ux_utils.print_exception_no_traceback():
4099
- raise exceptions.StorageBucketGetError(
4100
- 'Attempted to use a non-existent bucket as a source: '
4101
- f'{self.name} by providing URI. Consider using '
4102
- '`rclone lsd <remote>` on relevant remotes returned '
4103
- 'via `rclone listremotes` to debug.')
4104
-
4105
- data_utils.Rclone.store_rclone_config(
4106
- self.name,
4107
- data_utils.Rclone.RcloneStores.IBM,
4108
- self.region, # type: ignore
4109
- )
4110
-
4111
- if not bucket_region and self.sync_on_reconstruction:
4112
- # bucket doesn't exist
4113
- return self._create_cos_bucket(self.name, self.region), True
4114
- elif not bucket_region and not self.sync_on_reconstruction:
4115
- # Raised when Storage object is reconstructed for sky storage
4116
- # delete or to re-mount Storages with sky start but the storage
4117
- # is already removed externally.
4118
- raise exceptions.StorageExternalDeletionError(
4119
- 'Attempted to fetch a non-existent bucket: '
4120
- f'{self.name}')
4121
- else:
4122
- # bucket exists
4123
- bucket = self.s3_resource.Bucket(self.name)
4124
- self._validate_existing_bucket()
4125
- return bucket, False
4126
-
4127
- def _download_file(self, remote_path: str, local_path: str) -> None:
4128
- """Downloads file from remote to local on s3 bucket
4129
- using the boto3 API
4130
-
4131
- Args:
4132
- remote_path: str; Remote path on S3 bucket
4133
- local_path: str; Local path on user's device
4134
- """
4135
- self.client.download_file(self.name, local_path, remote_path)
4136
-
4137
- def mount_command(self, mount_path: str) -> str:
4138
- """Returns the command to mount the bucket to the mount_path.
4139
-
4140
- Uses rclone to mount the bucket.
4141
- Source: https://github.com/rclone/rclone
4142
-
4143
- Args:
4144
- mount_path: str; Path to mount the bucket to.
4145
- """
4146
- # install rclone if not installed.
4147
- install_cmd = mounting_utils.get_rclone_install_cmd()
4148
- rclone_config = data_utils.Rclone.RcloneStores.IBM.get_config(
4149
- rclone_profile_name=self.rclone_profile_name,
4150
- region=self.region) # type: ignore
4151
- mount_cmd = (
4152
- mounting_utils.get_cos_mount_cmd(
4153
- rclone_config,
4154
- self.rclone_profile_name,
4155
- self.bucket.name,
4156
- mount_path,
4157
- self._bucket_sub_path, # type: ignore
4158
- ))
4159
- return mounting_utils.get_mounting_command(mount_path, install_cmd,
4160
- mount_cmd)
4161
-
4162
- def _create_cos_bucket(self,
4163
- bucket_name: str,
4164
- region='us-east') -> StorageHandle:
4165
- """Creates IBM COS bucket with specific name in specific region
4166
-
4167
- Args:
4168
- bucket_name: str; Name of bucket
4169
- region: str; Region name, e.g. us-east, us-south
4170
- Raises:
4171
- StorageBucketCreateError: If bucket creation fails.
4172
- """
4173
- try:
4174
- self.client.create_bucket(
4175
- Bucket=bucket_name,
4176
- CreateBucketConfiguration={
4177
- 'LocationConstraint': f'{region}-smart'
4178
- })
4179
- logger.info(f' {colorama.Style.DIM}Created IBM COS bucket '
4180
- f'{bucket_name!r} in {region} '
4181
- 'with storage class smart tier'
4182
- f'{colorama.Style.RESET_ALL}')
4183
- self.bucket = self.s3_resource.Bucket(bucket_name)
4184
-
4185
- except ibm.ibm_botocore.exceptions.ClientError as e: # type: ignore[union-attr] # pylint: disable=line-too-long
4186
- with ux_utils.print_exception_no_traceback():
4187
- raise exceptions.StorageBucketCreateError(
4188
- f'Failed to create bucket: '
4189
- f'{bucket_name}') from e
4190
-
4191
- s3_bucket_exists_waiter = self.client.get_waiter('bucket_exists')
4192
- s3_bucket_exists_waiter.wait(Bucket=bucket_name)
4193
-
4194
- return self.bucket
4195
-
4196
- def _delete_cos_bucket_objects(self,
4197
- bucket: Any,
4198
- prefix: Optional[str] = None) -> None:
4199
- bucket_versioning = self.s3_resource.BucketVersioning(bucket.name)
4200
- if bucket_versioning.status == 'Enabled':
4201
- if prefix is not None:
4202
- res = list(
4203
- bucket.object_versions.filter(Prefix=prefix).delete())
4204
- else:
4205
- res = list(bucket.object_versions.delete())
4206
- else:
4207
- if prefix is not None:
4208
- res = list(bucket.objects.filter(Prefix=prefix).delete())
4209
- else:
4210
- res = list(bucket.objects.delete())
4211
- logger.debug(f'Deleted bucket\'s content:\n{res}, prefix: {prefix}')
4212
-
4213
- def _delete_cos_bucket(self) -> None:
4214
- bucket = self.s3_resource.Bucket(self.name)
4215
- try:
4216
- self._delete_cos_bucket_objects(bucket)
4217
- bucket.delete()
4218
- bucket.wait_until_not_exists()
4219
- except ibm.ibm_botocore.exceptions.ClientError as e:
4220
- if e.__class__.__name__ == 'NoSuchBucket':
4221
- logger.debug('bucket already removed')
4222
- data_utils.Rclone.delete_rclone_bucket_profile(
4223
- self.name, data_utils.Rclone.RcloneStores.IBM)
4224
-
4225
-
4226
- class OciStore(AbstractStore):
4227
- """OciStore inherits from Storage Object and represents the backend
4228
- for OCI buckets.
4229
- """
4230
-
4231
- _ACCESS_DENIED_MESSAGE = 'AccessDeniedException'
4232
-
4233
- def __init__(self,
4234
- name: str,
4235
- source: Optional[SourceType],
4236
- region: Optional[str] = None,
4237
- is_sky_managed: Optional[bool] = None,
4238
- sync_on_reconstruction: Optional[bool] = True,
4239
- _bucket_sub_path: Optional[str] = None):
4240
- self.client: Any
4241
- self.bucket: StorageHandle
4242
- self.oci_config_file: str
4243
- self.config_profile: str
4244
- self.compartment: str
4245
- self.namespace: str
4246
-
4247
- # Region is from the specified name in <bucket>@<region> format.
4248
- # Another case is name can also be set by the source, for example:
4249
- # /datasets-storage:
4250
- # source: oci://RAGData@us-sanjose-1
4251
- # The name in above mount will be set to RAGData@us-sanjose-1
4252
- region_in_name = None
4253
- if name is not None and '@' in name:
4254
- self._validate_bucket_expr(name)
4255
- name, region_in_name = name.split('@')
4256
-
4257
- # Region is from the specified source in oci://<bucket>@<region> format
4258
- region_in_source = None
4259
- if isinstance(source,
4260
- str) and source.startswith('oci://') and '@' in source:
4261
- self._validate_bucket_expr(source)
4262
- source, region_in_source = source.split('@')
4263
-
4264
- if region_in_name is not None and region_in_source is not None:
4265
- # This should never happen because name and source will never be
4266
- # the remote bucket at the same time.
4267
- assert region_in_name == region_in_source, (
4268
- f'Mismatch region specified. Region in name {region_in_name}, '
4269
- f'but region in source is {region_in_source}')
4270
-
4271
- if region_in_name is not None:
4272
- region = region_in_name
4273
- elif region_in_source is not None:
4274
- region = region_in_source
4275
-
4276
- # Default region set to what specified in oci config.
4277
- if region is None:
4278
- region = oci.get_oci_config()['region']
4279
-
4280
- # So far from now on, the name and source are canonical, means there
4281
- # is no region (@<region> suffix) associated with them anymore.
4282
-
4283
- super().__init__(name, source, region, is_sky_managed,
4284
- sync_on_reconstruction, _bucket_sub_path)
4285
- # TODO(zpoint): add _bucket_sub_path to the sync/mount/delete commands
4286
-
4287
- def _validate_bucket_expr(self, bucket_expr: str):
4288
- pattern = r'^(\w+://)?[A-Za-z0-9-._]+(@\w{2}-\w+-\d{1})$'
4289
- if not re.match(pattern, bucket_expr):
4290
- raise ValueError(
4291
- 'The format for the bucket portion is <bucket>@<region> '
4292
- 'when specify a region with a bucket.')
4293
-
4294
- def _validate(self):
4295
- if self.source is not None and isinstance(self.source, str):
4296
- if self.source.startswith('oci://'):
4297
- assert self.name == data_utils.split_oci_path(self.source)[0], (
4298
- 'OCI Bucket is specified as path, the name should be '
4299
- 'the same as OCI bucket.')
4300
- elif not re.search(r'^\w+://', self.source):
4301
- # Treat it as local path.
4302
- pass
4303
- else:
4304
- raise NotImplementedError(
4305
- f'Moving data from {self.source} to OCI is not supported.')
4306
-
4307
- # Validate name
4308
- self.name = self.validate_name(self.name)
4309
- # Check if the storage is enabled
4310
- if not _is_storage_cloud_enabled(str(clouds.OCI())):
4311
- with ux_utils.print_exception_no_traceback():
4312
- raise exceptions.ResourcesUnavailableError(
4313
- 'Storage \'store: oci\' specified, but ' \
4314
- 'OCI access is disabled. To fix, enable '\
4315
- 'OCI by running `sky check`. '\
4316
- 'More info: https://skypilot.readthedocs.io/en/latest/getting-started/installation.html.' # pylint: disable=line-too-long
4317
- )
4318
-
4319
- @classmethod
4320
- def validate_name(cls, name) -> str:
4321
- """Validates the name of the OCI store.
4322
-
4323
- Source for rules: https://docs.oracle.com/en-us/iaas/Content/Object/Tasks/managingbuckets.htm#Managing_Buckets # pylint: disable=line-too-long
4324
- """
4325
-
4326
- def _raise_no_traceback_name_error(err_str):
4327
- with ux_utils.print_exception_no_traceback():
4328
- raise exceptions.StorageNameError(err_str)
4329
-
4330
- if name is not None and isinstance(name, str):
4331
- # Check for overall length
4332
- if not 1 <= len(name) <= 256:
4333
- _raise_no_traceback_name_error(
4334
- f'Invalid store name: name {name} must contain 1-256 '
4335
- 'characters.')
4336
-
4337
- # Check for valid characters and start/end with a number or letter
4338
- pattern = r'^[A-Za-z0-9-._]+$'
4339
- if not re.match(pattern, name):
4340
- _raise_no_traceback_name_error(
4341
- f'Invalid store name: name {name} can only contain '
4342
- 'upper or lower case letters, numeric characters, hyphens '
4343
- '(-), underscores (_), and dots (.). Spaces are not '
4344
- 'allowed. Names must start and end with a number or '
4345
- 'letter.')
4346
- else:
4347
- _raise_no_traceback_name_error('Store name must be specified.')
4348
- return name
4349
-
4350
- def initialize(self):
4351
- """Initializes the OCI store object on the cloud.
4352
-
4353
- Initialization involves fetching bucket if exists, or creating it if
4354
- it does not.
4355
-
4356
- Raises:
4357
- StorageBucketCreateError: If bucket creation fails
4358
- StorageBucketGetError: If fetching existing bucket fails
4359
- StorageInitError: If general initialization fails.
4360
- """
4361
- # pylint: disable=import-outside-toplevel
4362
- from sky.clouds.utils import oci_utils
4363
- from sky.provision.oci.query_utils import query_helper
4364
-
4365
- self.oci_config_file = oci.get_config_file()
4366
- self.config_profile = oci_utils.oci_config.get_profile()
4367
-
4368
- ## pylint: disable=line-too-long
4369
- # What's compartment? See thttps://docs.oracle.com/en/cloud/foundation/cloud_architecture/governance/compartments.html
4370
- self.compartment = query_helper.find_compartment(self.region)
4371
- self.client = oci.get_object_storage_client(region=self.region,
4372
- profile=self.config_profile)
4373
- self.namespace = self.client.get_namespace(
4374
- compartment_id=oci.get_oci_config()['tenancy']).data
4375
-
4376
- self.bucket, is_new_bucket = self._get_bucket()
4377
- if self.is_sky_managed is None:
4378
- # If is_sky_managed is not specified, then this is a new storage
4379
- # object (i.e., did not exist in global_user_state) and we should
4380
- # set the is_sky_managed property.
4381
- # If is_sky_managed is specified, then we take no action.
4382
- self.is_sky_managed = is_new_bucket
4383
-
4384
- def upload(self):
4385
- """Uploads source to store bucket.
4386
-
4387
- Upload must be called by the Storage handler - it is not called on
4388
- Store initialization.
4389
-
4390
- Raises:
4391
- StorageUploadError: if upload fails.
4392
- """
4393
- try:
4394
- if isinstance(self.source, list):
4395
- self.batch_oci_rsync(self.source, create_dirs=True)
4396
- elif self.source is not None:
4397
- if self.source.startswith('oci://'):
4398
- pass
4399
- else:
4400
- self.batch_oci_rsync([self.source])
4401
- except exceptions.StorageUploadError:
4402
- raise
4403
- except Exception as e:
4404
- raise exceptions.StorageUploadError(
4405
- f'Upload failed for store {self.name}') from e
4406
-
4407
- def delete(self) -> None:
4408
- deleted_by_skypilot = self._delete_oci_bucket(self.name)
4409
- if deleted_by_skypilot:
4410
- msg_str = f'Deleted OCI bucket {self.name}.'
4411
- else:
4412
- msg_str = (f'OCI bucket {self.name} may have been deleted '
4413
- f'externally. Removing from local state.')
4414
- logger.info(f'{colorama.Fore.GREEN}{msg_str}'
4415
- f'{colorama.Style.RESET_ALL}')
4416
-
4417
- def get_handle(self) -> StorageHandle:
4418
- return self.client.get_bucket(namespace_name=self.namespace,
4419
- bucket_name=self.name).data
4420
-
4421
- def batch_oci_rsync(self,
4422
- source_path_list: List[Path],
4423
- create_dirs: bool = False) -> None:
4424
- """Invokes oci sync to batch upload a list of local paths to Bucket
4425
-
4426
- Use OCI bulk operation to batch process the file upload
4427
-
4428
- Args:
4429
- source_path_list: List of paths to local files or directories
4430
- create_dirs: If the local_path is a directory and this is set to
4431
- False, the contents of the directory are directly uploaded to
4432
- root of the bucket. If the local_path is a directory and this is
4433
- set to True, the directory is created in the bucket root and
4434
- contents are uploaded to it.
4435
- """
4436
- sub_path = (f'{self._bucket_sub_path}/'
4437
- if self._bucket_sub_path else '')
4438
-
4439
- @oci.with_oci_env
4440
- def get_file_sync_command(base_dir_path, file_names):
4441
- includes = ' '.join(
4442
- [f'--include "{file_name}"' for file_name in file_names])
4443
- prefix_arg = ''
4444
- if sub_path:
4445
- prefix_arg = f'--object-prefix "{sub_path.strip("/")}"'
4446
- sync_command = (
4447
- 'oci os object bulk-upload --no-follow-symlinks --overwrite '
4448
- f'--bucket-name {self.name} --namespace-name {self.namespace} '
4449
- f'--region {self.region} --src-dir "{base_dir_path}" '
4450
- f'{prefix_arg} '
4451
- f'{includes}')
4452
-
4453
- return sync_command
4454
-
4455
- @oci.with_oci_env
4456
- def get_dir_sync_command(src_dir_path, dest_dir_name):
4457
- if dest_dir_name and not str(dest_dir_name).endswith('/'):
4458
- dest_dir_name = f'{dest_dir_name}/'
4459
-
4460
- excluded_list = storage_utils.get_excluded_files(src_dir_path)
4461
- excluded_list.append('.git/*')
4462
- excludes = ' '.join([
4463
- f'--exclude {shlex.quote(file_name)}'
4464
- for file_name in excluded_list
4465
- ])
4466
-
4467
- # we exclude .git directory from the sync
4468
- sync_command = (
4469
- 'oci os object bulk-upload --no-follow-symlinks --overwrite '
4470
- f'--bucket-name {self.name} --namespace-name {self.namespace} '
4471
- f'--region {self.region} '
4472
- f'--object-prefix "{sub_path}{dest_dir_name}" '
4473
- f'--src-dir "{src_dir_path}" {excludes}')
4474
-
4475
- return sync_command
4476
-
4477
- # Generate message for upload
4478
- if len(source_path_list) > 1:
4479
- source_message = f'{len(source_path_list)} paths'
4480
- else:
4481
- source_message = source_path_list[0]
4482
-
4483
- log_path = sky_logging.generate_tmp_logging_file_path(
4484
- _STORAGE_LOG_FILE_NAME)
4485
- sync_path = f'{source_message} -> oci://{self.name}/{sub_path}'
4486
- with rich_utils.safe_status(
4487
- ux_utils.spinner_message(f'Syncing {sync_path}',
4488
- log_path=log_path)):
4489
- data_utils.parallel_upload(
4490
- source_path_list=source_path_list,
4491
- filesync_command_generator=get_file_sync_command,
4492
- dirsync_command_generator=get_dir_sync_command,
4493
- log_path=log_path,
4494
- bucket_name=self.name,
4495
- access_denied_message=self._ACCESS_DENIED_MESSAGE,
4496
- create_dirs=create_dirs,
4497
- max_concurrent_uploads=1)
4498
-
4499
- logger.info(
4500
- ux_utils.finishing_message(f'Storage synced: {sync_path}',
4501
- log_path))
4502
-
4503
- def _get_bucket(self) -> Tuple[StorageHandle, bool]:
4504
- """Obtains the OCI bucket.
4505
- If the bucket exists, this method will connect to the bucket.
4506
-
4507
- If the bucket does not exist, there are three cases:
4508
- 1) Raise an error if the bucket source starts with oci://
4509
- 2) Return None if bucket has been externally deleted and
4510
- sync_on_reconstruction is False
4511
- 3) Create and return a new bucket otherwise
4512
-
4513
- Return tuple (Bucket, Boolean): The first item is the bucket
4514
- json payload from the OCI API call, the second item indicates
4515
- if this is a new created bucket(True) or an existing bucket(False).
4516
-
4517
- Raises:
4518
- StorageBucketCreateError: If creating the bucket fails
4519
- StorageBucketGetError: If fetching a bucket fails
4520
- """
4521
- try:
4522
- get_bucket_response = self.client.get_bucket(
4523
- namespace_name=self.namespace, bucket_name=self.name)
4524
- bucket = get_bucket_response.data
4525
- return bucket, False
4526
- except oci.service_exception() as e:
4527
- if e.status == 404: # Not Found
4528
- if isinstance(self.source,
4529
- str) and self.source.startswith('oci://'):
4530
- with ux_utils.print_exception_no_traceback():
4531
- raise exceptions.StorageBucketGetError(
4532
- 'Attempted to connect to a non-existent bucket: '
4533
- f'{self.source}') from e
4534
- else:
4535
- # If bucket cannot be found (i.e., does not exist), it is
4536
- # to be created by Sky. However, creation is skipped if
4537
- # Store object is being reconstructed for deletion.
4538
- if self.sync_on_reconstruction:
4539
- bucket = self._create_oci_bucket(self.name)
4540
- return bucket, True
4541
- else:
4542
- return None, False
4543
- elif e.status == 401: # Unauthorized
4544
- # AccessDenied error for buckets that are private and not
4545
- # owned by user.
4546
- command = (
4547
- f'oci os object list --namespace-name {self.namespace} '
4548
- f'--bucket-name {self.name}')
4549
- with ux_utils.print_exception_no_traceback():
4550
- raise exceptions.StorageBucketGetError(
4551
- _BUCKET_FAIL_TO_CONNECT_MESSAGE.format(name=self.name) +
4552
- f' To debug, consider running `{command}`.') from e
4553
- else:
4554
- # Unknown / unexpected error happened. This might happen when
4555
- # Object storage service itself functions not normal (e.g.
4556
- # maintainance event causes internal server error or request
4557
- # timeout, etc).
4558
- with ux_utils.print_exception_no_traceback():
4559
- raise exceptions.StorageBucketGetError(
4560
- f'Failed to connect to OCI bucket {self.name}') from e
4561
-
4562
- def mount_command(self, mount_path: str) -> str:
4563
- """Returns the command to mount the bucket to the mount_path.
3900
+ # bucket's region doesn't match specified region in URI
3901
+ if bucket_region and uri_region and uri_region != bucket_region\
3902
+ and self.sync_on_reconstruction:
3903
+ with ux_utils.print_exception_no_traceback():
3904
+ raise exceptions.StorageBucketGetError(
3905
+ f'Bucket {self.name} exists in '
3906
+ f'region {bucket_region}, '
3907
+ f'but URI specified region {uri_region}.')
4564
3908
 
4565
- Uses Rclone to mount the bucket.
3909
+ if not bucket_region and uri_region:
3910
+ # bucket doesn't exist but source is a bucket URI
3911
+ with ux_utils.print_exception_no_traceback():
3912
+ raise exceptions.StorageBucketGetError(
3913
+ 'Attempted to use a non-existent bucket as a source: '
3914
+ f'{self.name} by providing URI. Consider using '
3915
+ '`rclone lsd <remote>` on relevant remotes returned '
3916
+ 'via `rclone listremotes` to debug.')
4566
3917
 
4567
- Args:
4568
- mount_path: str; Path to mount the bucket to.
4569
- """
4570
- install_cmd = mounting_utils.get_rclone_install_cmd()
4571
- mount_cmd = mounting_utils.get_oci_mount_cmd(
4572
- mount_path=mount_path,
4573
- store_name=self.name,
4574
- region=str(self.region),
4575
- namespace=self.namespace,
4576
- compartment=self.bucket.compartment_id,
4577
- config_file=self.oci_config_file,
4578
- config_profile=self.config_profile)
4579
- version_check_cmd = mounting_utils.get_rclone_version_check_cmd()
3918
+ data_utils.Rclone.store_rclone_config(
3919
+ self.name,
3920
+ data_utils.Rclone.RcloneStores.IBM,
3921
+ self.region, # type: ignore
3922
+ )
4580
3923
 
4581
- return mounting_utils.get_mounting_command(mount_path, install_cmd,
4582
- mount_cmd, version_check_cmd)
3924
+ if not bucket_region and self.sync_on_reconstruction:
3925
+ # bucket doesn't exist
3926
+ return self._create_cos_bucket(self.name, self.region), True
3927
+ elif not bucket_region and not self.sync_on_reconstruction:
3928
+ # Raised when Storage object is reconstructed for sky storage
3929
+ # delete or to re-mount Storages with sky start but the storage
3930
+ # is already removed externally.
3931
+ raise exceptions.StorageExternalDeletionError(
3932
+ 'Attempted to fetch a non-existent bucket: '
3933
+ f'{self.name}')
3934
+ else:
3935
+ # bucket exists
3936
+ bucket = self.s3_resource.Bucket(self.name)
3937
+ self._validate_existing_bucket()
3938
+ return bucket, False
4583
3939
 
4584
3940
  def _download_file(self, remote_path: str, local_path: str) -> None:
4585
- """Downloads file from remote to local on OCI bucket
3941
+ """Downloads file from remote to local on s3 bucket
3942
+ using the boto3 API
4586
3943
 
4587
3944
  Args:
4588
- remote_path: str; Remote path on OCI bucket
3945
+ remote_path: str; Remote path on S3 bucket
4589
3946
  local_path: str; Local path on user's device
4590
3947
  """
4591
- if remote_path.startswith(f'/{self.name}'):
4592
- # If the remote path is /bucket_name, we need to
4593
- # remove the leading /
4594
- remote_path = remote_path.lstrip('/')
4595
-
4596
- filename = os.path.basename(remote_path)
4597
- if not local_path.endswith(filename):
4598
- local_path = os.path.join(local_path, filename)
4599
-
4600
- @oci.with_oci_env
4601
- def get_file_download_command(remote_path, local_path):
4602
- download_command = (f'oci os object get --bucket-name {self.name} '
4603
- f'--namespace-name {self.namespace} '
4604
- f'--region {self.region} --name {remote_path} '
4605
- f'--file {local_path}')
3948
+ self.client.download_file(self.name, local_path, remote_path)
4606
3949
 
4607
- return download_command
3950
+ def mount_command(self, mount_path: str) -> str:
3951
+ """Returns the command to mount the bucket to the mount_path.
4608
3952
 
4609
- download_command = get_file_download_command(remote_path, local_path)
3953
+ Uses rclone to mount the bucket.
3954
+ Source: https://github.com/rclone/rclone
4610
3955
 
4611
- try:
4612
- with rich_utils.safe_status(
4613
- f'[bold cyan]Downloading: {remote_path} -> {local_path}[/]'
4614
- ):
4615
- subprocess.check_output(download_command,
4616
- stderr=subprocess.STDOUT,
4617
- shell=True)
4618
- except subprocess.CalledProcessError as e:
4619
- logger.error(f'Download failed: {remote_path} -> {local_path}.\n'
4620
- f'Detail errors: {e.output}')
4621
- with ux_utils.print_exception_no_traceback():
4622
- raise exceptions.StorageBucketDeleteError(
4623
- f'Failed download file {self.name}:{remote_path}.') from e
3956
+ Args:
3957
+ mount_path: str; Path to mount the bucket to.
3958
+ """
3959
+ # install rclone if not installed.
3960
+ install_cmd = mounting_utils.get_rclone_install_cmd()
3961
+ rclone_config = data_utils.Rclone.RcloneStores.IBM.get_config(
3962
+ rclone_profile_name=self.rclone_profile_name,
3963
+ region=self.region) # type: ignore
3964
+ mount_cmd = (
3965
+ mounting_utils.get_cos_mount_cmd(
3966
+ rclone_config,
3967
+ self.rclone_profile_name,
3968
+ self.bucket.name,
3969
+ mount_path,
3970
+ self._bucket_sub_path, # type: ignore
3971
+ ))
3972
+ return mounting_utils.get_mounting_command(mount_path, install_cmd,
3973
+ mount_cmd)
4624
3974
 
4625
- def _create_oci_bucket(self, bucket_name: str) -> StorageHandle:
4626
- """Creates OCI bucket with specific name in specific region
3975
+ def _create_cos_bucket(self,
3976
+ bucket_name: str,
3977
+ region='us-east') -> StorageHandle:
3978
+ """Creates IBM COS bucket with specific name in specific region
4627
3979
 
4628
3980
  Args:
4629
3981
  bucket_name: str; Name of bucket
4630
- region: str; Region name, e.g. us-central1, us-west1
3982
+ region: str; Region name, e.g. us-east, us-south
3983
+ Raises:
3984
+ StorageBucketCreateError: If bucket creation fails.
4631
3985
  """
4632
- logger.debug(f'_create_oci_bucket: {bucket_name}')
4633
3986
  try:
4634
- create_bucket_response = self.client.create_bucket(
4635
- namespace_name=self.namespace,
4636
- create_bucket_details=oci.oci.object_storage.models.
4637
- CreateBucketDetails(
4638
- name=bucket_name,
4639
- compartment_id=self.compartment,
4640
- ))
4641
- bucket = create_bucket_response.data
4642
- return bucket
4643
- except oci.service_exception() as e:
3987
+ self.client.create_bucket(
3988
+ Bucket=bucket_name,
3989
+ CreateBucketConfiguration={
3990
+ 'LocationConstraint': f'{region}-smart'
3991
+ })
3992
+ logger.info(f' {colorama.Style.DIM}Created IBM COS bucket '
3993
+ f'{bucket_name!r} in {region} '
3994
+ 'with storage class smart tier'
3995
+ f'{colorama.Style.RESET_ALL}')
3996
+ self.bucket = self.s3_resource.Bucket(bucket_name)
3997
+
3998
+ except ibm.ibm_botocore.exceptions.ClientError as e: # type: ignore[union-attr] # pylint: disable=line-too-long
4644
3999
  with ux_utils.print_exception_no_traceback():
4645
4000
  raise exceptions.StorageBucketCreateError(
4646
- f'Failed to create OCI bucket: {self.name}') from e
4647
-
4648
- def _delete_oci_bucket(self, bucket_name: str) -> bool:
4649
- """Deletes OCI bucket, including all objects in bucket
4650
-
4651
- Args:
4652
- bucket_name: str; Name of bucket
4653
-
4654
- Returns:
4655
- bool; True if bucket was deleted, False if it was deleted externally.
4656
- """
4657
- logger.debug(f'_delete_oci_bucket: {bucket_name}')
4001
+ f'Failed to create bucket: '
4002
+ f'{bucket_name}') from e
4658
4003
 
4659
- @oci.with_oci_env
4660
- def get_bucket_delete_command(bucket_name):
4661
- remove_command = (f'oci os bucket delete --bucket-name '
4662
- f'--region {self.region} '
4663
- f'{bucket_name} --empty --force')
4004
+ s3_bucket_exists_waiter = self.client.get_waiter('bucket_exists')
4005
+ s3_bucket_exists_waiter.wait(Bucket=bucket_name)
4664
4006
 
4665
- return remove_command
4007
+ return self.bucket
4666
4008
 
4667
- remove_command = get_bucket_delete_command(bucket_name)
4009
+ def _delete_cos_bucket_objects(self,
4010
+ bucket: Any,
4011
+ prefix: Optional[str] = None) -> None:
4012
+ bucket_versioning = self.s3_resource.BucketVersioning(bucket.name)
4013
+ if bucket_versioning.status == 'Enabled':
4014
+ if prefix is not None:
4015
+ res = list(
4016
+ bucket.object_versions.filter(Prefix=prefix).delete())
4017
+ else:
4018
+ res = list(bucket.object_versions.delete())
4019
+ else:
4020
+ if prefix is not None:
4021
+ res = list(bucket.objects.filter(Prefix=prefix).delete())
4022
+ else:
4023
+ res = list(bucket.objects.delete())
4024
+ logger.debug(f'Deleted bucket\'s content:\n{res}, prefix: {prefix}')
4668
4025
 
4026
+ def _delete_cos_bucket(self) -> None:
4027
+ bucket = self.s3_resource.Bucket(self.name)
4669
4028
  try:
4670
- with rich_utils.safe_status(
4671
- f'[bold cyan]Deleting OCI bucket {bucket_name}[/]'):
4672
- subprocess.check_output(remove_command.split(' '),
4673
- stderr=subprocess.STDOUT)
4674
- except subprocess.CalledProcessError as e:
4675
- if 'BucketNotFound' in e.output.decode('utf-8'):
4676
- logger.debug(
4677
- _BUCKET_EXTERNALLY_DELETED_DEBUG_MESSAGE.format(
4678
- bucket_name=bucket_name))
4679
- return False
4680
- else:
4681
- logger.error(e.output)
4682
- with ux_utils.print_exception_no_traceback():
4683
- raise exceptions.StorageBucketDeleteError(
4684
- f'Failed to delete OCI bucket {bucket_name}.')
4685
- return True
4029
+ self._delete_cos_bucket_objects(bucket)
4030
+ bucket.delete()
4031
+ bucket.wait_until_not_exists()
4032
+ except ibm.ibm_botocore.exceptions.ClientError as e:
4033
+ if e.__class__.__name__ == 'NoSuchBucket':
4034
+ logger.debug('bucket already removed')
4035
+ data_utils.Rclone.delete_rclone_bucket_profile(
4036
+ self.name, data_utils.Rclone.RcloneStores.IBM)
4686
4037
 
4687
4038
 
4688
- class NebiusStore(AbstractStore):
4689
- """NebiusStore inherits from Storage Object and represents the backend
4690
- for S3 buckets.
4039
+ class OciStore(AbstractStore):
4040
+ """OciStore inherits from Storage Object and represents the backend
4041
+ for OCI buckets.
4691
4042
  """
4692
4043
 
4693
- _ACCESS_DENIED_MESSAGE = 'Access Denied'
4694
- _TIMEOUT_TO_PROPAGATES = 20
4044
+ _ACCESS_DENIED_MESSAGE = 'AccessDeniedException'
4695
4045
 
4696
4046
  def __init__(self,
4697
4047
  name: str,
4698
- source: str,
4048
+ source: Optional[SourceType],
4699
4049
  region: Optional[str] = None,
4700
4050
  is_sky_managed: Optional[bool] = None,
4701
- sync_on_reconstruction: bool = True,
4051
+ sync_on_reconstruction: Optional[bool] = True,
4702
4052
  _bucket_sub_path: Optional[str] = None):
4703
- self.client: 'mypy_boto3_s3.Client'
4704
- self.bucket: 'StorageHandle'
4053
+ self.client: Any
4054
+ self.bucket: StorageHandle
4055
+ self.oci_config_file: str
4056
+ self.config_profile: str
4057
+ self.compartment: str
4058
+ self.namespace: str
4059
+
4060
+ # Region is from the specified name in <bucket>@<region> format.
4061
+ # Another case is name can also be set by the source, for example:
4062
+ # /datasets-storage:
4063
+ # source: oci://RAGData@us-sanjose-1
4064
+ # The name in above mount will be set to RAGData@us-sanjose-1
4065
+ region_in_name = None
4066
+ if name is not None and '@' in name:
4067
+ self._validate_bucket_expr(name)
4068
+ name, region_in_name = name.split('@')
4069
+
4070
+ # Region is from the specified source in oci://<bucket>@<region> format
4071
+ region_in_source = None
4072
+ if isinstance(source,
4073
+ str) and source.startswith('oci://') and '@' in source:
4074
+ self._validate_bucket_expr(source)
4075
+ source, region_in_source = source.split('@')
4076
+
4077
+ if region_in_name is not None and region_in_source is not None:
4078
+ # This should never happen because name and source will never be
4079
+ # the remote bucket at the same time.
4080
+ assert region_in_name == region_in_source, (
4081
+ f'Mismatch region specified. Region in name {region_in_name}, '
4082
+ f'but region in source is {region_in_source}')
4083
+
4084
+ if region_in_name is not None:
4085
+ region = region_in_name
4086
+ elif region_in_source is not None:
4087
+ region = region_in_source
4088
+
4089
+ # Default region set to what specified in oci config.
4090
+ if region is None:
4091
+ region = oci.get_oci_config()['region']
4092
+
4093
+ # So far from now on, the name and source are canonical, means there
4094
+ # is no region (@<region> suffix) associated with them anymore.
4095
+
4705
4096
  super().__init__(name, source, region, is_sky_managed,
4706
4097
  sync_on_reconstruction, _bucket_sub_path)
4098
+ # TODO(zpoint): add _bucket_sub_path to the sync/mount/delete commands
4099
+
4100
+ def _validate_bucket_expr(self, bucket_expr: str):
4101
+ pattern = r'^(\w+://)?[A-Za-z0-9-._]+(@\w{2}-\w+-\d{1})$'
4102
+ if not re.match(pattern, bucket_expr):
4103
+ raise ValueError(
4104
+ 'The format for the bucket portion is <bucket>@<region> '
4105
+ 'when specify a region with a bucket.')
4707
4106
 
4708
4107
  def _validate(self):
4709
4108
  if self.source is not None and isinstance(self.source, str):
4710
- if self.source.startswith('s3://'):
4711
- assert self.name == data_utils.split_s3_path(self.source)[0], (
4712
- 'S3 Bucket is specified as path, the name should be the'
4713
- ' same as S3 bucket.')
4714
- elif self.source.startswith('gs://'):
4715
- assert self.name == data_utils.split_gcs_path(self.source)[0], (
4716
- 'GCS Bucket is specified as path, the name should be '
4717
- 'the same as GCS bucket.')
4718
- assert data_utils.verify_gcs_bucket(self.name), (
4719
- f'Source specified as {self.source}, a GCS bucket. ',
4720
- 'GCS Bucket should exist.')
4721
- elif data_utils.is_az_container_endpoint(self.source):
4722
- storage_account_name, container_name, _ = (
4723
- data_utils.split_az_path(self.source))
4724
- assert self.name == container_name, (
4725
- 'Azure bucket is specified as path, the name should be '
4726
- 'the same as Azure bucket.')
4727
- assert data_utils.verify_az_bucket(
4728
- storage_account_name, self.name), (
4729
- f'Source specified as {self.source}, an Azure bucket. '
4730
- 'Azure bucket should exist.')
4731
- elif self.source.startswith('r2://'):
4732
- assert self.name == data_utils.split_r2_path(self.source)[0], (
4733
- 'R2 Bucket is specified as path, the name should be '
4734
- 'the same as R2 bucket.')
4735
- assert data_utils.verify_r2_bucket(self.name), (
4736
- f'Source specified as {self.source}, a R2 bucket. ',
4737
- 'R2 Bucket should exist.')
4738
- elif self.source.startswith('nebius://'):
4739
- assert self.name == data_utils.split_nebius_path(
4740
- self.source)[0], (
4741
- 'Nebius Object Storage is specified as path, the name '
4742
- 'should be the same as Nebius Object Storage bucket.')
4743
- elif self.source.startswith('cos://'):
4744
- assert self.name == data_utils.split_cos_path(self.source)[0], (
4745
- 'COS Bucket is specified as path, the name should be '
4746
- 'the same as COS bucket.')
4747
- assert data_utils.verify_ibm_cos_bucket(self.name), (
4748
- f'Source specified as {self.source}, a COS bucket. ',
4749
- 'COS Bucket should exist.')
4750
- elif self.source.startswith('oci://'):
4109
+ if self.source.startswith('oci://'):
4110
+ assert self.name == data_utils.split_oci_path(self.source)[0], (
4111
+ 'OCI Bucket is specified as path, the name should be '
4112
+ 'the same as OCI bucket.')
4113
+ elif not re.search(r'^\w+://', self.source):
4114
+ # Treat it as local path.
4115
+ pass
4116
+ else:
4751
4117
  raise NotImplementedError(
4752
- 'Moving data from OCI to S3 is currently not supported.')
4753
- # Validate name
4754
- self.name = S3Store.validate_name(self.name)
4118
+ f'Moving data from {self.source} to OCI is not supported.')
4755
4119
 
4120
+ # Validate name
4121
+ self.name = self.validate_name(self.name)
4756
4122
  # Check if the storage is enabled
4757
- if not _is_storage_cloud_enabled(str(clouds.Nebius())):
4123
+ if not _is_storage_cloud_enabled(str(clouds.OCI())):
4758
4124
  with ux_utils.print_exception_no_traceback():
4759
- raise exceptions.ResourcesUnavailableError((
4760
- 'Storage \'store: nebius\' specified, but '
4761
- 'Nebius access is disabled. To fix, enable '
4762
- 'Nebius by running `sky check`. More info: '
4763
- 'https://docs.skypilot.co/en/latest/getting-started/installation.html.' # pylint: disable=line-too-long
4764
- ))
4125
+ raise exceptions.ResourcesUnavailableError(
4126
+ 'Storage \'store: oci\' specified, but ' \
4127
+ 'OCI access is disabled. To fix, enable '\
4128
+ 'OCI by running `sky check`. '\
4129
+ 'More info: https://skypilot.readthedocs.io/en/latest/getting-started/installation.html.' # pylint: disable=line-too-long
4130
+ )
4131
+
4132
+ @classmethod
4133
+ def validate_name(cls, name) -> str:
4134
+ """Validates the name of the OCI store.
4135
+
4136
+ Source for rules: https://docs.oracle.com/en-us/iaas/Content/Object/Tasks/managingbuckets.htm#Managing_Buckets # pylint: disable=line-too-long
4137
+ """
4138
+
4139
+ def _raise_no_traceback_name_error(err_str):
4140
+ with ux_utils.print_exception_no_traceback():
4141
+ raise exceptions.StorageNameError(err_str)
4142
+
4143
+ if name is not None and isinstance(name, str):
4144
+ # Check for overall length
4145
+ if not 1 <= len(name) <= 256:
4146
+ _raise_no_traceback_name_error(
4147
+ f'Invalid store name: name {name} must contain 1-256 '
4148
+ 'characters.')
4149
+
4150
+ # Check for valid characters and start/end with a number or letter
4151
+ pattern = r'^[A-Za-z0-9-._]+$'
4152
+ if not re.match(pattern, name):
4153
+ _raise_no_traceback_name_error(
4154
+ f'Invalid store name: name {name} can only contain '
4155
+ 'upper or lower case letters, numeric characters, hyphens '
4156
+ '(-), underscores (_), and dots (.). Spaces are not '
4157
+ 'allowed. Names must start and end with a number or '
4158
+ 'letter.')
4159
+ else:
4160
+ _raise_no_traceback_name_error('Store name must be specified.')
4161
+ return name
4765
4162
 
4766
4163
  def initialize(self):
4767
- """Initializes the Nebius Object Storage on the cloud.
4164
+ """Initializes the OCI store object on the cloud.
4768
4165
 
4769
4166
  Initialization involves fetching bucket if exists, or creating it if
4770
4167
  it does not.
@@ -4774,7 +4171,21 @@ class NebiusStore(AbstractStore):
4774
4171
  StorageBucketGetError: If fetching existing bucket fails
4775
4172
  StorageInitError: If general initialization fails.
4776
4173
  """
4777
- self.client = data_utils.create_nebius_client()
4174
+ # pylint: disable=import-outside-toplevel
4175
+ from sky.clouds.utils import oci_utils
4176
+ from sky.provision.oci.query_utils import query_helper
4177
+
4178
+ self.oci_config_file = oci.get_config_file()
4179
+ self.config_profile = oci_utils.oci_config.get_profile()
4180
+
4181
+ ## pylint: disable=line-too-long
4182
+ # What's compartment? See thttps://docs.oracle.com/en/cloud/foundation/cloud_architecture/governance/compartments.html
4183
+ self.compartment = query_helper.find_compartment(self.region)
4184
+ self.client = oci.get_object_storage_client(region=self.region,
4185
+ profile=self.config_profile)
4186
+ self.namespace = self.client.get_namespace(
4187
+ compartment_id=oci.get_oci_config()['tenancy']).data
4188
+
4778
4189
  self.bucket, is_new_bucket = self._get_bucket()
4779
4190
  if self.is_sky_managed is None:
4780
4191
  # If is_sky_managed is not specified, then this is a new storage
@@ -4794,20 +4205,12 @@ class NebiusStore(AbstractStore):
4794
4205
  """
4795
4206
  try:
4796
4207
  if isinstance(self.source, list):
4797
- self.batch_aws_rsync(self.source, create_dirs=True)
4208
+ self.batch_oci_rsync(self.source, create_dirs=True)
4798
4209
  elif self.source is not None:
4799
- if self.source.startswith('nebius://'):
4210
+ if self.source.startswith('oci://'):
4800
4211
  pass
4801
- elif self.source.startswith('s3://'):
4802
- self._transfer_to_nebius()
4803
- elif self.source.startswith('gs://'):
4804
- self._transfer_to_nebius()
4805
- elif self.source.startswith('r2://'):
4806
- self._transfer_to_nebius()
4807
- elif self.source.startswith('oci://'):
4808
- self._transfer_to_nebius()
4809
4212
  else:
4810
- self.batch_aws_rsync([self.source])
4213
+ self.batch_oci_rsync([self.source])
4811
4214
  except exceptions.StorageUploadError:
4812
4215
  raise
4813
4216
  except Exception as e:
@@ -4815,45 +4218,25 @@ class NebiusStore(AbstractStore):
4815
4218
  f'Upload failed for store {self.name}') from e
4816
4219
 
4817
4220
  def delete(self) -> None:
4818
- if self._bucket_sub_path is not None and not self.is_sky_managed:
4819
- return self._delete_sub_path()
4820
-
4821
- deleted_by_skypilot = self._delete_nebius_bucket(self.name)
4221
+ deleted_by_skypilot = self._delete_oci_bucket(self.name)
4822
4222
  if deleted_by_skypilot:
4823
- msg_str = f'Deleted Nebius bucket {self.name}.'
4223
+ msg_str = f'Deleted OCI bucket {self.name}.'
4824
4224
  else:
4825
- msg_str = (f'Nebius bucket {self.name} may have been deleted '
4225
+ msg_str = (f'OCI bucket {self.name} may have been deleted '
4826
4226
  f'externally. Removing from local state.')
4827
4227
  logger.info(f'{colorama.Fore.GREEN}{msg_str}'
4828
4228
  f'{colorama.Style.RESET_ALL}')
4829
4229
 
4830
- def _delete_sub_path(self) -> None:
4831
- assert self._bucket_sub_path is not None, 'bucket_sub_path is not set'
4832
- deleted_by_skypilot = self._delete_nebius_bucket_sub_path(
4833
- self.name, self._bucket_sub_path)
4834
- if deleted_by_skypilot:
4835
- msg_str = (f'Removed objects from S3 bucket '
4836
- f'{self.name}/{self._bucket_sub_path}.')
4837
- else:
4838
- msg_str = (f'Failed to remove objects from S3 bucket '
4839
- f'{self.name}/{self._bucket_sub_path}.')
4840
- logger.info(f'{colorama.Fore.GREEN}{msg_str}'
4841
- f'{colorama.Style.RESET_ALL}')
4842
-
4843
4230
  def get_handle(self) -> StorageHandle:
4844
- return nebius.resource('s3').Bucket(self.name)
4231
+ return self.client.get_bucket(namespace_name=self.namespace,
4232
+ bucket_name=self.name).data
4845
4233
 
4846
- def batch_aws_rsync(self,
4234
+ def batch_oci_rsync(self,
4847
4235
  source_path_list: List[Path],
4848
4236
  create_dirs: bool = False) -> None:
4849
- """Invokes aws s3 sync to batch upload a list of local paths to S3
4850
-
4851
- AWS Sync by default uses 10 threads to upload files to the bucket. To
4852
- increase parallelism, modify max_concurrent_requests in your aws config
4853
- file (Default path: ~/.aws/config).
4237
+ """Invokes oci sync to batch upload a list of local paths to Bucket
4854
4238
 
4855
- Since aws s3 sync does not support batch operations, we construct
4856
- multiple commands to be run in parallel.
4239
+ Use OCI bulk operation to batch process the file upload
4857
4240
 
4858
4241
  Args:
4859
4242
  source_path_list: List of paths to local files or directories
@@ -4863,34 +4246,45 @@ class NebiusStore(AbstractStore):
4863
4246
  set to True, the directory is created in the bucket root and
4864
4247
  contents are uploaded to it.
4865
4248
  """
4866
- sub_path = (f'/{self._bucket_sub_path}'
4249
+ sub_path = (f'{self._bucket_sub_path}/'
4867
4250
  if self._bucket_sub_path else '')
4868
4251
 
4252
+ @oci.with_oci_env
4869
4253
  def get_file_sync_command(base_dir_path, file_names):
4870
- includes = ' '.join([
4871
- f'--include {shlex.quote(file_name)}'
4872
- for file_name in file_names
4873
- ])
4874
- base_dir_path = shlex.quote(base_dir_path)
4875
- sync_command = ('aws s3 sync --no-follow-symlinks --exclude="*" '
4876
- f'{includes} {base_dir_path} '
4877
- f's3://{self.name}{sub_path} '
4878
- f'--profile={nebius.NEBIUS_PROFILE_NAME}')
4254
+ includes = ' '.join(
4255
+ [f'--include "{file_name}"' for file_name in file_names])
4256
+ prefix_arg = ''
4257
+ if sub_path:
4258
+ prefix_arg = f'--object-prefix "{sub_path.strip("/")}"'
4259
+ sync_command = (
4260
+ 'oci os object bulk-upload --no-follow-symlinks --overwrite '
4261
+ f'--bucket-name {self.name} --namespace-name {self.namespace} '
4262
+ f'--region {self.region} --src-dir "{base_dir_path}" '
4263
+ f'{prefix_arg} '
4264
+ f'{includes}')
4265
+
4879
4266
  return sync_command
4880
4267
 
4268
+ @oci.with_oci_env
4881
4269
  def get_dir_sync_command(src_dir_path, dest_dir_name):
4882
- # we exclude .git directory from the sync
4270
+ if dest_dir_name and not str(dest_dir_name).endswith('/'):
4271
+ dest_dir_name = f'{dest_dir_name}/'
4272
+
4883
4273
  excluded_list = storage_utils.get_excluded_files(src_dir_path)
4884
4274
  excluded_list.append('.git/*')
4885
4275
  excludes = ' '.join([
4886
4276
  f'--exclude {shlex.quote(file_name)}'
4887
4277
  for file_name in excluded_list
4888
4278
  ])
4889
- src_dir_path = shlex.quote(src_dir_path)
4890
- sync_command = (f'aws s3 sync --no-follow-symlinks {excludes} '
4891
- f'{src_dir_path} '
4892
- f's3://{self.name}{sub_path}/{dest_dir_name} '
4893
- f'--profile={nebius.NEBIUS_PROFILE_NAME}')
4279
+
4280
+ # we exclude .git directory from the sync
4281
+ sync_command = (
4282
+ 'oci os object bulk-upload --no-follow-symlinks --overwrite '
4283
+ f'--bucket-name {self.name} --namespace-name {self.namespace} '
4284
+ f'--region {self.region} '
4285
+ f'--object-prefix "{sub_path}{dest_dir_name}" '
4286
+ f'--src-dir "{src_dir_path}" {excludes}')
4287
+
4894
4288
  return sync_command
4895
4289
 
4896
4290
  # Generate message for upload
@@ -4901,210 +4295,469 @@ class NebiusStore(AbstractStore):
4901
4295
 
4902
4296
  log_path = sky_logging.generate_tmp_logging_file_path(
4903
4297
  _STORAGE_LOG_FILE_NAME)
4904
- sync_path = f'{source_message} -> nebius://{self.name}{sub_path}/'
4298
+ sync_path = f'{source_message} -> oci://{self.name}/{sub_path}'
4905
4299
  with rich_utils.safe_status(
4906
4300
  ux_utils.spinner_message(f'Syncing {sync_path}',
4907
4301
  log_path=log_path)):
4908
4302
  data_utils.parallel_upload(
4909
- source_path_list,
4910
- get_file_sync_command,
4911
- get_dir_sync_command,
4912
- log_path,
4913
- self.name,
4914
- self._ACCESS_DENIED_MESSAGE,
4303
+ source_path_list=source_path_list,
4304
+ filesync_command_generator=get_file_sync_command,
4305
+ dirsync_command_generator=get_dir_sync_command,
4306
+ log_path=log_path,
4307
+ bucket_name=self.name,
4308
+ access_denied_message=self._ACCESS_DENIED_MESSAGE,
4915
4309
  create_dirs=create_dirs,
4916
- max_concurrent_uploads=_MAX_CONCURRENT_UPLOADS)
4917
- logger.info(
4918
- ux_utils.finishing_message(f'Storage synced: {sync_path}',
4919
- log_path))
4310
+ max_concurrent_uploads=1)
4920
4311
 
4921
- def _transfer_to_nebius(self) -> None:
4922
- assert isinstance(self.source, str), self.source
4923
- if self.source.startswith('gs://'):
4924
- data_transfer.gcs_to_nebius(self.name, self.name)
4925
- elif self.source.startswith('r2://'):
4926
- data_transfer.r2_to_nebius(self.name, self.name)
4927
- elif self.source.startswith('s3://'):
4928
- data_transfer.s3_to_nebius(self.name, self.name)
4312
+ logger.info(
4313
+ ux_utils.finishing_message(f'Storage synced: {sync_path}',
4314
+ log_path))
4929
4315
 
4930
4316
  def _get_bucket(self) -> Tuple[StorageHandle, bool]:
4931
- """Obtains the S3 bucket.
4317
+ """Obtains the OCI bucket.
4318
+ If the bucket exists, this method will connect to the bucket.
4932
4319
 
4933
- If the bucket exists, this method will return the bucket.
4934
4320
  If the bucket does not exist, there are three cases:
4935
- 1) Raise an error if the bucket source starts with s3://
4321
+ 1) Raise an error if the bucket source starts with oci://
4936
4322
  2) Return None if bucket has been externally deleted and
4937
4323
  sync_on_reconstruction is False
4938
4324
  3) Create and return a new bucket otherwise
4939
4325
 
4326
+ Return tuple (Bucket, Boolean): The first item is the bucket
4327
+ json payload from the OCI API call, the second item indicates
4328
+ if this is a new created bucket(True) or an existing bucket(False).
4329
+
4940
4330
  Raises:
4941
- StorageSpecError: If externally created bucket is attempted to be
4942
- mounted without specifying storage source.
4943
4331
  StorageBucketCreateError: If creating the bucket fails
4944
4332
  StorageBucketGetError: If fetching a bucket fails
4945
- StorageExternalDeletionError: If externally deleted storage is
4946
- attempted to be fetched while reconstructing the storage for
4947
- 'sky storage delete' or 'sky start'
4948
4333
  """
4949
- nebius_s = nebius.resource('s3')
4950
- bucket = nebius_s.Bucket(self.name)
4951
4334
  try:
4952
- # Try Public bucket case.
4953
- # This line does not error out if the bucket is an external public
4954
- # bucket or if it is a user's bucket that is publicly
4955
- # accessible.
4956
- self.client.head_bucket(Bucket=self.name)
4957
- self._validate_existing_bucket()
4335
+ get_bucket_response = self.client.get_bucket(
4336
+ namespace_name=self.namespace, bucket_name=self.name)
4337
+ bucket = get_bucket_response.data
4958
4338
  return bucket, False
4959
- except aws.botocore_exceptions().ClientError as e:
4960
- error_code = e.response['Error']['Code']
4961
- # AccessDenied error for buckets that are private and not owned by
4962
- # user.
4963
- if error_code == '403':
4964
- command = (f'aws s3 ls s3://{self.name} '
4965
- f'--profile={nebius.NEBIUS_PROFILE_NAME}')
4339
+ except oci.service_exception() as e:
4340
+ if e.status == 404: # Not Found
4341
+ if isinstance(self.source,
4342
+ str) and self.source.startswith('oci://'):
4343
+ with ux_utils.print_exception_no_traceback():
4344
+ raise exceptions.StorageBucketGetError(
4345
+ 'Attempted to connect to a non-existent bucket: '
4346
+ f'{self.source}') from e
4347
+ else:
4348
+ # If bucket cannot be found (i.e., does not exist), it is
4349
+ # to be created by Sky. However, creation is skipped if
4350
+ # Store object is being reconstructed for deletion.
4351
+ if self.sync_on_reconstruction:
4352
+ bucket = self._create_oci_bucket(self.name)
4353
+ return bucket, True
4354
+ else:
4355
+ return None, False
4356
+ elif e.status == 401: # Unauthorized
4357
+ # AccessDenied error for buckets that are private and not
4358
+ # owned by user.
4359
+ command = (
4360
+ f'oci os object list --namespace-name {self.namespace} '
4361
+ f'--bucket-name {self.name}')
4966
4362
  with ux_utils.print_exception_no_traceback():
4967
4363
  raise exceptions.StorageBucketGetError(
4968
4364
  _BUCKET_FAIL_TO_CONNECT_MESSAGE.format(name=self.name) +
4969
4365
  f' To debug, consider running `{command}`.') from e
4366
+ else:
4367
+ # Unknown / unexpected error happened. This might happen when
4368
+ # Object storage service itself functions not normal (e.g.
4369
+ # maintainance event causes internal server error or request
4370
+ # timeout, etc).
4371
+ with ux_utils.print_exception_no_traceback():
4372
+ raise exceptions.StorageBucketGetError(
4373
+ f'Failed to connect to OCI bucket {self.name}') from e
4970
4374
 
4971
- if isinstance(self.source, str) and self.source.startswith('nebius://'):
4972
- with ux_utils.print_exception_no_traceback():
4973
- raise exceptions.StorageBucketGetError(
4974
- 'Attempted to use a non-existent bucket as a source: '
4975
- f'{self.source}. Consider using `aws s3 ls '
4976
- f's3://{self.name} '
4977
- f'--profile={nebius.NEBIUS_PROFILE_NAME}` to debug.')
4375
+ def mount_command(self, mount_path: str) -> str:
4376
+ """Returns the command to mount the bucket to the mount_path.
4978
4377
 
4979
- # If bucket cannot be found in both private and public settings,
4980
- # the bucket is to be created by Sky. However, creation is skipped if
4981
- # Store object is being reconstructed for deletion or re-mount with
4982
- # sky start, and error is raised instead.
4983
- if self.sync_on_reconstruction:
4984
- bucket = self._create_nebius_bucket(self.name)
4985
- return bucket, True
4986
- else:
4987
- # Raised when Storage object is reconstructed for sky storage
4988
- # delete or to re-mount Storages with sky start but the storage
4989
- # is already removed externally.
4990
- raise exceptions.StorageExternalDeletionError(
4991
- 'Attempted to fetch a non-existent bucket: '
4992
- f'{self.name}')
4378
+ Uses Rclone to mount the bucket.
4379
+
4380
+ Args:
4381
+ mount_path: str; Path to mount the bucket to.
4382
+ """
4383
+ install_cmd = mounting_utils.get_rclone_install_cmd()
4384
+ mount_cmd = mounting_utils.get_oci_mount_cmd(
4385
+ mount_path=mount_path,
4386
+ store_name=self.name,
4387
+ region=str(self.region),
4388
+ namespace=self.namespace,
4389
+ compartment=self.bucket.compartment_id,
4390
+ config_file=self.oci_config_file,
4391
+ config_profile=self.config_profile)
4392
+ version_check_cmd = mounting_utils.get_rclone_version_check_cmd()
4393
+
4394
+ return mounting_utils.get_mounting_command(mount_path, install_cmd,
4395
+ mount_cmd, version_check_cmd)
4993
4396
 
4994
4397
  def _download_file(self, remote_path: str, local_path: str) -> None:
4995
- """Downloads file from remote to local on s3 bucket
4996
- using the boto3 API
4398
+ """Downloads file from remote to local on OCI bucket
4997
4399
 
4998
4400
  Args:
4999
- remote_path: str; Remote path on S3 bucket
4401
+ remote_path: str; Remote path on OCI bucket
5000
4402
  local_path: str; Local path on user's device
5001
4403
  """
5002
- self.bucket.download_file(remote_path, local_path)
4404
+ if remote_path.startswith(f'/{self.name}'):
4405
+ # If the remote path is /bucket_name, we need to
4406
+ # remove the leading /
4407
+ remote_path = remote_path.lstrip('/')
5003
4408
 
5004
- def mount_command(self, mount_path: str) -> str:
5005
- """Returns the command to mount the bucket to the mount_path.
4409
+ filename = os.path.basename(remote_path)
4410
+ if not local_path.endswith(filename):
4411
+ local_path = os.path.join(local_path, filename)
4412
+
4413
+ @oci.with_oci_env
4414
+ def get_file_download_command(remote_path, local_path):
4415
+ download_command = (f'oci os object get --bucket-name {self.name} '
4416
+ f'--namespace-name {self.namespace} '
4417
+ f'--region {self.region} --name {remote_path} '
4418
+ f'--file {local_path}')
5006
4419
 
5007
- Uses goofys to mount the bucket.
4420
+ return download_command
5008
4421
 
5009
- Args:
5010
- mount_path: str; Path to mount the bucket to.
5011
- """
5012
- install_cmd = mounting_utils.get_s3_mount_install_cmd()
5013
- nebius_profile_name = nebius.NEBIUS_PROFILE_NAME
5014
- endpoint_url = self.client.meta.endpoint_url
5015
- mount_cmd = mounting_utils.get_nebius_mount_cmd(nebius_profile_name,
5016
- self.bucket.name,
5017
- endpoint_url,
5018
- mount_path,
5019
- self._bucket_sub_path)
5020
- return mounting_utils.get_mounting_command(mount_path, install_cmd,
5021
- mount_cmd)
4422
+ download_command = get_file_download_command(remote_path, local_path)
4423
+
4424
+ try:
4425
+ with rich_utils.safe_status(
4426
+ f'[bold cyan]Downloading: {remote_path} -> {local_path}[/]'
4427
+ ):
4428
+ subprocess.check_output(download_command,
4429
+ stderr=subprocess.STDOUT,
4430
+ shell=True)
4431
+ except subprocess.CalledProcessError as e:
4432
+ logger.error(f'Download failed: {remote_path} -> {local_path}.\n'
4433
+ f'Detail errors: {e.output}')
4434
+ with ux_utils.print_exception_no_traceback():
4435
+ raise exceptions.StorageBucketDeleteError(
4436
+ f'Failed download file {self.name}:{remote_path}.') from e
5022
4437
 
5023
- def _create_nebius_bucket(self, bucket_name: str) -> StorageHandle:
5024
- """Creates S3 bucket with specific name
4438
+ def _create_oci_bucket(self, bucket_name: str) -> StorageHandle:
4439
+ """Creates OCI bucket with specific name in specific region
5025
4440
 
5026
4441
  Args:
5027
4442
  bucket_name: str; Name of bucket
5028
- Raises:
5029
- StorageBucketCreateError: If bucket creation fails.
4443
+ region: str; Region name, e.g. us-central1, us-west1
5030
4444
  """
5031
- nebius_client = self.client
4445
+ logger.debug(f'_create_oci_bucket: {bucket_name}')
5032
4446
  try:
5033
- nebius_client.create_bucket(Bucket=bucket_name)
5034
- except aws.botocore_exceptions().ClientError as e:
4447
+ create_bucket_response = self.client.create_bucket(
4448
+ namespace_name=self.namespace,
4449
+ create_bucket_details=oci.oci.object_storage.models.
4450
+ CreateBucketDetails(
4451
+ name=bucket_name,
4452
+ compartment_id=self.compartment,
4453
+ ))
4454
+ bucket = create_bucket_response.data
4455
+ return bucket
4456
+ except oci.service_exception() as e:
5035
4457
  with ux_utils.print_exception_no_traceback():
5036
4458
  raise exceptions.StorageBucketCreateError(
5037
- f'Attempted to create a bucket '
5038
- f'{self.name} but failed.') from e
5039
- return nebius.resource('s3').Bucket(bucket_name)
4459
+ f'Failed to create OCI bucket: {self.name}') from e
4460
+
4461
+ def _delete_oci_bucket(self, bucket_name: str) -> bool:
4462
+ """Deletes OCI bucket, including all objects in bucket
4463
+
4464
+ Args:
4465
+ bucket_name: str; Name of bucket
4466
+
4467
+ Returns:
4468
+ bool; True if bucket was deleted, False if it was deleted externally.
4469
+ """
4470
+ logger.debug(f'_delete_oci_bucket: {bucket_name}')
4471
+
4472
+ @oci.with_oci_env
4473
+ def get_bucket_delete_command(bucket_name):
4474
+ remove_command = (f'oci os bucket delete --bucket-name '
4475
+ f'--region {self.region} '
4476
+ f'{bucket_name} --empty --force')
4477
+
4478
+ return remove_command
4479
+
4480
+ remove_command = get_bucket_delete_command(bucket_name)
5040
4481
 
5041
- def _execute_nebius_remove_command(self, command: str, bucket_name: str,
5042
- hint_operating: str,
5043
- hint_failed: str) -> bool:
5044
4482
  try:
5045
4483
  with rich_utils.safe_status(
5046
- ux_utils.spinner_message(hint_operating)):
5047
- subprocess.check_output(command.split(' '),
4484
+ f'[bold cyan]Deleting OCI bucket {bucket_name}[/]'):
4485
+ subprocess.check_output(remove_command.split(' '),
5048
4486
  stderr=subprocess.STDOUT)
5049
4487
  except subprocess.CalledProcessError as e:
5050
- if 'NoSuchBucket' in e.output.decode('utf-8'):
4488
+ if 'BucketNotFound' in e.output.decode('utf-8'):
5051
4489
  logger.debug(
5052
4490
  _BUCKET_EXTERNALLY_DELETED_DEBUG_MESSAGE.format(
5053
4491
  bucket_name=bucket_name))
5054
4492
  return False
5055
4493
  else:
4494
+ logger.error(e.output)
5056
4495
  with ux_utils.print_exception_no_traceback():
5057
4496
  raise exceptions.StorageBucketDeleteError(
5058
- f'{hint_failed}'
5059
- f'Detailed error: {e.output}')
4497
+ f'Failed to delete OCI bucket {bucket_name}.')
5060
4498
  return True
5061
4499
 
5062
- def _delete_nebius_bucket(self, bucket_name: str) -> bool:
5063
- """Deletes S3 bucket, including all objects in bucket
5064
4500
 
5065
- Args:
5066
- bucket_name: str; Name of bucket
4501
+ @register_s3_compatible_store
4502
+ class S3Store(S3CompatibleStore):
4503
+ """S3Store inherits from S3CompatibleStore and represents the backend
4504
+ for S3 buckets.
4505
+ """
5067
4506
 
5068
- Returns:
5069
- bool; True if bucket was deleted, False if it was deleted externally.
4507
+ _DEFAULT_REGION = 'us-east-1'
4508
+ _CUSTOM_ENDPOINT_REGIONS = [
4509
+ 'ap-east-1', 'me-south-1', 'af-south-1', 'eu-south-1', 'eu-south-2',
4510
+ 'ap-south-2', 'ap-southeast-3', 'ap-southeast-4', 'me-central-1',
4511
+ 'il-central-1'
4512
+ ]
5070
4513
 
5071
- Raises:
5072
- StorageBucketDeleteError: If deleting the bucket fails.
5073
- """
5074
- # Deleting objects is very slow programatically
5075
- # (i.e. bucket.objects.all().delete() is slow).
5076
- # In addition, standard delete operations (i.e. via `aws s3 rm`)
5077
- # are slow, since AWS puts deletion markers.
5078
- # https://stackoverflow.com/questions/49239351/why-is-it-so-much-slower-to-delete-objects-in-aws-s3-than-it-is-to-create-them
5079
- # The fastest way to delete is to run `aws s3 rb --force`,
5080
- # which removes the bucket by force.
5081
- remove_command = (f'aws s3 rb s3://{bucket_name} --force '
5082
- f'--profile={nebius.NEBIUS_PROFILE_NAME}')
5083
-
5084
- success = self._execute_nebius_remove_command(
5085
- remove_command, bucket_name,
5086
- f'Deleting Nebius bucket {bucket_name}',
5087
- f'Failed to delete Nebius bucket {bucket_name}.')
5088
- if not success:
5089
- return False
4514
+ def __init__(self,
4515
+ name: str,
4516
+ source: str,
4517
+ region: Optional[str] = None,
4518
+ is_sky_managed: Optional[bool] = None,
4519
+ sync_on_reconstruction: bool = True,
4520
+ _bucket_sub_path: Optional[str] = None):
4521
+ # TODO(romilb): This is purely a stopgap fix for
4522
+ # https://github.com/skypilot-org/skypilot/issues/3405
4523
+ # We should eventually make all opt-in regions also work for S3 by
4524
+ # passing the right endpoint flags.
4525
+ if region in self._CUSTOM_ENDPOINT_REGIONS:
4526
+ logger.warning('AWS opt-in regions are not supported for S3. '
4527
+ f'Falling back to default region '
4528
+ f'{self._DEFAULT_REGION} for bucket {name!r}.')
4529
+ region = self._DEFAULT_REGION
4530
+ super().__init__(name, source, region, is_sky_managed,
4531
+ sync_on_reconstruction, _bucket_sub_path)
5090
4532
 
5091
- # Wait until bucket deletion propagates on Nebius servers
5092
- start_time = time.time()
5093
- while data_utils.verify_nebius_bucket(bucket_name):
5094
- if time.time() - start_time > self._TIMEOUT_TO_PROPAGATES:
5095
- raise TimeoutError(
5096
- f'Timeout while verifying {bucket_name} Nebius bucket.')
5097
- time.sleep(0.1)
5098
- return True
4533
+ @classmethod
4534
+ def get_config(cls) -> S3CompatibleConfig:
4535
+ """Return the configuration for AWS S3."""
4536
+ return S3CompatibleConfig(
4537
+ store_type='S3',
4538
+ url_prefix='s3://',
4539
+ client_factory=data_utils.create_s3_client,
4540
+ resource_factory=lambda name: aws.resource('s3').Bucket(name),
4541
+ split_path=data_utils.split_s3_path,
4542
+ verify_bucket=data_utils.verify_s3_bucket,
4543
+ cloud_name=str(clouds.AWS()),
4544
+ default_region=cls._DEFAULT_REGION,
4545
+ mount_cmd_factory=mounting_utils.get_s3_mount_cmd,
4546
+ )
4547
+
4548
+ def mount_cached_command(self, mount_path: str) -> str:
4549
+ install_cmd = mounting_utils.get_rclone_install_cmd()
4550
+ rclone_profile_name = (
4551
+ data_utils.Rclone.RcloneStores.S3.get_profile_name(self.name))
4552
+ rclone_config = data_utils.Rclone.RcloneStores.S3.get_config(
4553
+ rclone_profile_name=rclone_profile_name)
4554
+ mount_cached_cmd = mounting_utils.get_mount_cached_cmd(
4555
+ rclone_config, rclone_profile_name, self.bucket.name, mount_path)
4556
+ return mounting_utils.get_mounting_command(mount_path, install_cmd,
4557
+ mount_cached_cmd)
4558
+
4559
+
4560
+ @register_s3_compatible_store
4561
+ class R2Store(S3CompatibleStore):
4562
+ """R2Store inherits from S3CompatibleStore and represents the backend
4563
+ for R2 buckets.
4564
+ """
4565
+
4566
+ def __init__(self,
4567
+ name: str,
4568
+ source: str,
4569
+ region: Optional[str] = 'auto',
4570
+ is_sky_managed: Optional[bool] = None,
4571
+ sync_on_reconstruction: bool = True,
4572
+ _bucket_sub_path: Optional[str] = None):
4573
+ super().__init__(name, source, region, is_sky_managed,
4574
+ sync_on_reconstruction, _bucket_sub_path)
4575
+
4576
+ @classmethod
4577
+ def get_config(cls) -> S3CompatibleConfig:
4578
+ """Return the configuration for Cloudflare R2."""
4579
+ return S3CompatibleConfig(
4580
+ store_type='R2',
4581
+ url_prefix='r2://',
4582
+ client_factory=lambda region: data_utils.create_r2_client(region or
4583
+ 'auto'),
4584
+ resource_factory=lambda name: cloudflare.resource('s3').Bucket(name
4585
+ ),
4586
+ split_path=data_utils.split_r2_path,
4587
+ verify_bucket=data_utils.verify_r2_bucket,
4588
+ credentials_file=cloudflare.R2_CREDENTIALS_PATH,
4589
+ aws_profile=cloudflare.R2_PROFILE_NAME,
4590
+ get_endpoint_url=lambda: cloudflare.create_endpoint(), # pylint: disable=unnecessary-lambda
4591
+ extra_cli_args=['--checksum-algorithm', 'CRC32'], # R2 specific
4592
+ cloud_name=cloudflare.NAME,
4593
+ default_region='auto',
4594
+ mount_cmd_factory=cls._get_r2_mount_cmd,
4595
+ )
4596
+
4597
+ @classmethod
4598
+ def _get_r2_mount_cmd(cls, bucket_name: str, mount_path: str,
4599
+ bucket_sub_path: Optional[str]) -> str:
4600
+ """Factory method for R2 mount command."""
4601
+ endpoint_url = cloudflare.create_endpoint()
4602
+ return mounting_utils.get_r2_mount_cmd(cloudflare.R2_CREDENTIALS_PATH,
4603
+ cloudflare.R2_PROFILE_NAME,
4604
+ endpoint_url, bucket_name,
4605
+ mount_path, bucket_sub_path)
4606
+
4607
+ def mount_cached_command(self, mount_path: str) -> str:
4608
+ """R2-specific cached mount implementation using rclone."""
4609
+ install_cmd = mounting_utils.get_rclone_install_cmd()
4610
+ rclone_profile_name = (
4611
+ data_utils.Rclone.RcloneStores.R2.get_profile_name(self.name))
4612
+ rclone_config = data_utils.Rclone.RcloneStores.R2.get_config(
4613
+ rclone_profile_name=rclone_profile_name)
4614
+ mount_cached_cmd = mounting_utils.get_mount_cached_cmd(
4615
+ rclone_config, rclone_profile_name, self.bucket.name, mount_path)
4616
+ return mounting_utils.get_mounting_command(mount_path, install_cmd,
4617
+ mount_cached_cmd)
4618
+
4619
+
4620
+ @register_s3_compatible_store
4621
+ class NebiusStore(S3CompatibleStore):
4622
+ """NebiusStore inherits from S3CompatibleStore and represents the backend
4623
+ for Nebius Object Storage buckets.
4624
+ """
4625
+
4626
+ @classmethod
4627
+ def get_config(cls) -> S3CompatibleConfig:
4628
+ """Return the configuration for Nebius Object Storage."""
4629
+ return S3CompatibleConfig(
4630
+ store_type='NEBIUS',
4631
+ url_prefix='nebius://',
4632
+ client_factory=lambda region: data_utils.create_nebius_client(),
4633
+ resource_factory=lambda name: nebius.resource('s3').Bucket(name),
4634
+ split_path=data_utils.split_nebius_path,
4635
+ verify_bucket=data_utils.verify_nebius_bucket,
4636
+ aws_profile=nebius.NEBIUS_PROFILE_NAME,
4637
+ cloud_name=str(clouds.Nebius()),
4638
+ mount_cmd_factory=cls._get_nebius_mount_cmd,
4639
+ )
4640
+
4641
+ @classmethod
4642
+ def _get_nebius_mount_cmd(cls, bucket_name: str, mount_path: str,
4643
+ bucket_sub_path: Optional[str]) -> str:
4644
+ """Factory method for Nebius mount command."""
4645
+ # We need to get the endpoint URL, but since this is a static method,
4646
+ # we'll need to create a client to get it
4647
+ client = data_utils.create_nebius_client()
4648
+ endpoint_url = client.meta.endpoint_url
4649
+ return mounting_utils.get_nebius_mount_cmd(nebius.NEBIUS_PROFILE_NAME,
4650
+ bucket_name, endpoint_url,
4651
+ mount_path, bucket_sub_path)
4652
+
4653
+ def mount_cached_command(self, mount_path: str) -> str:
4654
+ """Nebius-specific cached mount implementation using rclone."""
4655
+ install_cmd = mounting_utils.get_rclone_install_cmd()
4656
+ rclone_profile_name = (
4657
+ data_utils.Rclone.RcloneStores.NEBIUS.get_profile_name(self.name))
4658
+ rclone_config = data_utils.Rclone.RcloneStores.NEBIUS.get_config(
4659
+ rclone_profile_name=rclone_profile_name)
4660
+ mount_cached_cmd = mounting_utils.get_mount_cached_cmd(
4661
+ rclone_config, rclone_profile_name, self.bucket.name, mount_path)
4662
+ return mounting_utils.get_mounting_command(mount_path, install_cmd,
4663
+ mount_cached_cmd)
4664
+
4665
+
4666
+ @register_s3_compatible_store
4667
+ class CoreWeaveStore(S3CompatibleStore):
4668
+ """CoreWeaveStore inherits from S3CompatibleStore and represents the backend
4669
+ for CoreWeave Object Storage buckets.
4670
+ """
4671
+
4672
+ @classmethod
4673
+ def get_config(cls) -> S3CompatibleConfig:
4674
+ """Return the configuration for CoreWeave Object Storage."""
4675
+ return S3CompatibleConfig(
4676
+ store_type='COREWEAVE',
4677
+ url_prefix='cw://',
4678
+ client_factory=lambda region: data_utils.create_coreweave_client(),
4679
+ resource_factory=lambda name: coreweave.resource('s3').Bucket(name),
4680
+ split_path=data_utils.split_coreweave_path,
4681
+ verify_bucket=data_utils.verify_coreweave_bucket,
4682
+ aws_profile=coreweave.COREWEAVE_PROFILE_NAME,
4683
+ get_endpoint_url=coreweave.get_endpoint,
4684
+ credentials_file=coreweave.COREWEAVE_CREDENTIALS_PATH,
4685
+ config_file=coreweave.COREWEAVE_CONFIG_PATH,
4686
+ cloud_name=coreweave.NAME,
4687
+ default_region=coreweave.DEFAULT_REGION,
4688
+ mount_cmd_factory=cls._get_coreweave_mount_cmd,
4689
+ )
4690
+
4691
+ def _get_bucket(self) -> Tuple[StorageHandle, bool]:
4692
+ """Get or create bucket using CoreWeave's S3 API"""
4693
+ bucket = self.config.resource_factory(self.name)
4694
+
4695
+ # Use our custom bucket verification instead of head_bucket
4696
+ if data_utils.verify_coreweave_bucket(self.name):
4697
+ self._validate_existing_bucket()
4698
+ return bucket, False
4699
+
4700
+ # TODO(hailong): Enable the bucket creation for CoreWeave
4701
+ # Disable this to avoid waiting too long until the following
4702
+ # issue is resolved:
4703
+ # https://github.com/skypilot-org/skypilot/issues/7736
4704
+ raise exceptions.StorageBucketGetError(
4705
+ f'Bucket {self.name!r} does not exist. CoreWeave buckets can take'
4706
+ ' a long time to become accessible after creation, so SkyPilot'
4707
+ ' does not create them automatically. Please create the bucket'
4708
+ ' manually in CoreWeave and wait for it to be accessible before'
4709
+ ' using it.')
4710
+
4711
+ # # Check if this is a source with URL prefix (existing bucket case)
4712
+ # if isinstance(self.source, str) and self.source.startswith(
4713
+ # self.config.url_prefix):
4714
+ # with ux_utils.print_exception_no_traceback():
4715
+ # raise exceptions.StorageBucketGetError(
4716
+ # 'Attempted to use a non-existent bucket as a source: '
4717
+ # f'{self.source}.')
4718
+
4719
+ # # If bucket cannot be found, create it if needed
4720
+ # if self.sync_on_reconstruction:
4721
+ # bucket = self._create_bucket(self.name)
4722
+ # return bucket, True
4723
+ # else:
4724
+ # raise exceptions.StorageExternalDeletionError(
4725
+ # 'Attempted to fetch a non-existent bucket: '
4726
+ # f'{self.name}')
4727
+
4728
+ @classmethod
4729
+ def _get_coreweave_mount_cmd(cls, bucket_name: str, mount_path: str,
4730
+ bucket_sub_path: Optional[str]) -> str:
4731
+ """Factory method for CoreWeave mount command."""
4732
+ endpoint_url = coreweave.get_endpoint()
4733
+ return mounting_utils.get_coreweave_mount_cmd(
4734
+ coreweave.COREWEAVE_CREDENTIALS_PATH,
4735
+ coreweave.COREWEAVE_PROFILE_NAME, bucket_name, endpoint_url,
4736
+ mount_path, bucket_sub_path)
4737
+
4738
+ def mount_cached_command(self, mount_path: str) -> str:
4739
+ """CoreWeave-specific cached mount implementation using rclone."""
4740
+ install_cmd = mounting_utils.get_rclone_install_cmd()
4741
+ rclone_profile_name = (
4742
+ data_utils.Rclone.RcloneStores.COREWEAVE.get_profile_name(
4743
+ self.name))
4744
+ rclone_config = data_utils.Rclone.RcloneStores.COREWEAVE.get_config(
4745
+ rclone_profile_name=rclone_profile_name)
4746
+ mount_cached_cmd = mounting_utils.get_mount_cached_cmd(
4747
+ rclone_config, rclone_profile_name, self.bucket.name, mount_path)
4748
+ return mounting_utils.get_mounting_command(mount_path, install_cmd,
4749
+ mount_cached_cmd)
5099
4750
 
5100
- def _delete_nebius_bucket_sub_path(self, bucket_name: str,
5101
- sub_path: str) -> bool:
5102
- """Deletes the sub path from the bucket."""
5103
- remove_command = (
5104
- f'aws s3 rm s3://{bucket_name}/{sub_path}/ --recursive '
5105
- f'--profile={nebius.NEBIUS_PROFILE_NAME}')
5106
- return self._execute_nebius_remove_command(
5107
- remove_command, bucket_name, f'Removing objects from '
5108
- f'Nebius bucket {bucket_name}/{sub_path}',
5109
- f'Failed to remove objects from '
5110
- f'Nebius bucket {bucket_name}/{sub_path}.')
4751
+ def _create_bucket(self, bucket_name: str) -> StorageHandle:
4752
+ """Create bucket using S3 API with timing handling for CoreWeave."""
4753
+ result = super()._create_bucket(bucket_name)
4754
+ # Ensure bucket is created
4755
+ # The newly created bucket ever takes about 18min to be accessible,
4756
+ # here we just retry for 36 times (5s * 36 = 180s) to avoid waiting
4757
+ # too long
4758
+ # TODO(hailong): Update the logic here when the following
4759
+ # issue is resolved:
4760
+ # https://github.com/skypilot-org/skypilot/issues/7736
4761
+ data_utils.verify_coreweave_bucket(bucket_name, retry=36)
4762
+
4763
+ return result