skypilot-nightly 1.0.0.dev20250509__py3-none-any.whl → 1.0.0.dev20251107__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of skypilot-nightly might be problematic. Click here for more details.

Files changed (512) hide show
  1. sky/__init__.py +22 -6
  2. sky/adaptors/aws.py +25 -7
  3. sky/adaptors/common.py +24 -1
  4. sky/adaptors/coreweave.py +278 -0
  5. sky/adaptors/do.py +8 -2
  6. sky/adaptors/hyperbolic.py +8 -0
  7. sky/adaptors/kubernetes.py +149 -18
  8. sky/adaptors/nebius.py +170 -17
  9. sky/adaptors/primeintellect.py +1 -0
  10. sky/adaptors/runpod.py +68 -0
  11. sky/adaptors/seeweb.py +167 -0
  12. sky/adaptors/shadeform.py +89 -0
  13. sky/admin_policy.py +187 -4
  14. sky/authentication.py +179 -225
  15. sky/backends/__init__.py +4 -2
  16. sky/backends/backend.py +22 -9
  17. sky/backends/backend_utils.py +1299 -380
  18. sky/backends/cloud_vm_ray_backend.py +1715 -518
  19. sky/backends/docker_utils.py +1 -1
  20. sky/backends/local_docker_backend.py +11 -6
  21. sky/backends/wheel_utils.py +37 -9
  22. sky/{clouds/service_catalog → catalog}/__init__.py +21 -19
  23. sky/{clouds/service_catalog → catalog}/aws_catalog.py +27 -8
  24. sky/{clouds/service_catalog → catalog}/azure_catalog.py +10 -7
  25. sky/{clouds/service_catalog → catalog}/common.py +89 -48
  26. sky/{clouds/service_catalog → catalog}/cudo_catalog.py +8 -5
  27. sky/{clouds/service_catalog → catalog}/data_fetchers/analyze.py +1 -1
  28. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_aws.py +30 -40
  29. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_cudo.py +38 -38
  30. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_gcp.py +42 -15
  31. sky/catalog/data_fetchers/fetch_hyperbolic.py +136 -0
  32. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_lambda_cloud.py +1 -0
  33. sky/catalog/data_fetchers/fetch_nebius.py +335 -0
  34. sky/catalog/data_fetchers/fetch_runpod.py +698 -0
  35. sky/catalog/data_fetchers/fetch_seeweb.py +329 -0
  36. sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
  37. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vast.py +1 -1
  38. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vsphere.py +1 -1
  39. sky/{clouds/service_catalog → catalog}/do_catalog.py +5 -2
  40. sky/{clouds/service_catalog → catalog}/fluidstack_catalog.py +6 -3
  41. sky/{clouds/service_catalog → catalog}/gcp_catalog.py +41 -15
  42. sky/catalog/hyperbolic_catalog.py +136 -0
  43. sky/{clouds/service_catalog → catalog}/ibm_catalog.py +9 -6
  44. sky/{clouds/service_catalog → catalog}/kubernetes_catalog.py +36 -24
  45. sky/{clouds/service_catalog → catalog}/lambda_catalog.py +9 -6
  46. sky/{clouds/service_catalog → catalog}/nebius_catalog.py +9 -7
  47. sky/{clouds/service_catalog → catalog}/oci_catalog.py +9 -6
  48. sky/{clouds/service_catalog → catalog}/paperspace_catalog.py +5 -2
  49. sky/catalog/primeintellect_catalog.py +95 -0
  50. sky/{clouds/service_catalog → catalog}/runpod_catalog.py +11 -4
  51. sky/{clouds/service_catalog → catalog}/scp_catalog.py +9 -6
  52. sky/catalog/seeweb_catalog.py +184 -0
  53. sky/catalog/shadeform_catalog.py +165 -0
  54. sky/catalog/ssh_catalog.py +167 -0
  55. sky/{clouds/service_catalog → catalog}/vast_catalog.py +6 -3
  56. sky/{clouds/service_catalog → catalog}/vsphere_catalog.py +5 -2
  57. sky/check.py +491 -203
  58. sky/cli.py +5 -6005
  59. sky/client/{cli.py → cli/command.py} +2477 -1885
  60. sky/client/cli/deprecation_utils.py +99 -0
  61. sky/client/cli/flags.py +359 -0
  62. sky/client/cli/table_utils.py +320 -0
  63. sky/client/common.py +70 -32
  64. sky/client/oauth.py +82 -0
  65. sky/client/sdk.py +1203 -297
  66. sky/client/sdk_async.py +833 -0
  67. sky/client/service_account_auth.py +47 -0
  68. sky/cloud_stores.py +73 -0
  69. sky/clouds/__init__.py +13 -0
  70. sky/clouds/aws.py +358 -93
  71. sky/clouds/azure.py +105 -83
  72. sky/clouds/cloud.py +127 -36
  73. sky/clouds/cudo.py +68 -50
  74. sky/clouds/do.py +66 -48
  75. sky/clouds/fluidstack.py +63 -44
  76. sky/clouds/gcp.py +339 -110
  77. sky/clouds/hyperbolic.py +293 -0
  78. sky/clouds/ibm.py +70 -49
  79. sky/clouds/kubernetes.py +563 -162
  80. sky/clouds/lambda_cloud.py +74 -54
  81. sky/clouds/nebius.py +206 -80
  82. sky/clouds/oci.py +88 -66
  83. sky/clouds/paperspace.py +61 -44
  84. sky/clouds/primeintellect.py +317 -0
  85. sky/clouds/runpod.py +164 -74
  86. sky/clouds/scp.py +89 -83
  87. sky/clouds/seeweb.py +466 -0
  88. sky/clouds/shadeform.py +400 -0
  89. sky/clouds/ssh.py +263 -0
  90. sky/clouds/utils/aws_utils.py +10 -4
  91. sky/clouds/utils/gcp_utils.py +87 -11
  92. sky/clouds/utils/oci_utils.py +38 -14
  93. sky/clouds/utils/scp_utils.py +177 -124
  94. sky/clouds/vast.py +99 -77
  95. sky/clouds/vsphere.py +51 -40
  96. sky/core.py +349 -139
  97. sky/dag.py +15 -0
  98. sky/dashboard/out/404.html +1 -1
  99. sky/dashboard/out/_next/static/chunks/1141-e6aa9ab418717c59.js +11 -0
  100. sky/dashboard/out/_next/static/chunks/1272-1ef0bf0237faccdb.js +1 -0
  101. sky/dashboard/out/_next/static/chunks/1871-74503c8e80fd253b.js +6 -0
  102. sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +1 -0
  103. sky/dashboard/out/_next/static/chunks/2350.fab69e61bac57b23.js +1 -0
  104. sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +15 -0
  105. sky/dashboard/out/_next/static/chunks/2755.fff53c4a3fcae910.js +26 -0
  106. sky/dashboard/out/_next/static/chunks/3294.72362fa129305b19.js +1 -0
  107. sky/dashboard/out/_next/static/chunks/3785.ad6adaa2a0fa9768.js +1 -0
  108. sky/dashboard/out/_next/static/chunks/3850-ff4a9a69d978632b.js +1 -0
  109. sky/dashboard/out/_next/static/chunks/3937.210053269f121201.js +1 -0
  110. sky/dashboard/out/_next/static/chunks/4725.a830b5c9e7867c92.js +1 -0
  111. sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +15 -0
  112. sky/dashboard/out/_next/static/chunks/5739-d67458fcb1386c92.js +8 -0
  113. sky/dashboard/out/_next/static/chunks/6130-2be46d70a38f1e82.js +1 -0
  114. sky/dashboard/out/_next/static/chunks/616-3d59f75e2ccf9321.js +39 -0
  115. sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +13 -0
  116. sky/dashboard/out/_next/static/chunks/6601-06114c982db410b6.js +1 -0
  117. sky/dashboard/out/_next/static/chunks/6856-ef8ba11f96d8c4a3.js +1 -0
  118. sky/dashboard/out/_next/static/chunks/6989-01359c57e018caa4.js +1 -0
  119. sky/dashboard/out/_next/static/chunks/6990-32b6e2d3822301fa.js +1 -0
  120. sky/dashboard/out/_next/static/chunks/7359-c8d04e06886000b3.js +30 -0
  121. sky/dashboard/out/_next/static/chunks/7411-b15471acd2cba716.js +41 -0
  122. sky/dashboard/out/_next/static/chunks/7615-3301e838e5f25772.js +1 -0
  123. sky/dashboard/out/_next/static/chunks/8640.5b9475a2d18c5416.js +16 -0
  124. sky/dashboard/out/_next/static/chunks/8969-1e4613c651bf4051.js +1 -0
  125. sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +6 -0
  126. sky/dashboard/out/_next/static/chunks/9353-cff34f7e773b2e2b.js +1 -0
  127. sky/dashboard/out/_next/static/chunks/9360.7310982cf5a0dc79.js +31 -0
  128. sky/dashboard/out/_next/static/chunks/9847.3aaca6bb33455140.js +30 -0
  129. sky/dashboard/out/_next/static/chunks/fd9d1056-86323a29a8f7e46a.js +1 -0
  130. sky/dashboard/out/_next/static/chunks/framework-cf60a09ccd051a10.js +33 -0
  131. sky/dashboard/out/_next/static/chunks/main-app-587214043926b3cc.js +1 -0
  132. sky/dashboard/out/_next/static/chunks/main-f15ccb73239a3bf1.js +1 -0
  133. sky/dashboard/out/_next/static/chunks/pages/_app-bde01e4a2beec258.js +34 -0
  134. sky/dashboard/out/_next/static/chunks/pages/_error-c66a4e8afc46f17b.js +1 -0
  135. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-c736ead69c2d86ec.js +16 -0
  136. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-a37d2063af475a1c.js +1 -0
  137. sky/dashboard/out/_next/static/chunks/pages/clusters-d44859594e6f8064.js +1 -0
  138. sky/dashboard/out/_next/static/chunks/pages/config-dfb9bf07b13045f4.js +1 -0
  139. sky/dashboard/out/_next/static/chunks/pages/index-444f1804401f04ea.js +1 -0
  140. sky/dashboard/out/_next/static/chunks/pages/infra/[context]-c0b5935149902e6f.js +1 -0
  141. sky/dashboard/out/_next/static/chunks/pages/infra-aed0ea19df7cf961.js +1 -0
  142. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-5796e8d6aea291a0.js +16 -0
  143. sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-6edeb7d06032adfc.js +21 -0
  144. sky/dashboard/out/_next/static/chunks/pages/jobs-479dde13399cf270.js +1 -0
  145. sky/dashboard/out/_next/static/chunks/pages/users-5ab3b907622cf0fe.js +1 -0
  146. sky/dashboard/out/_next/static/chunks/pages/volumes-b84b948ff357c43e.js +1 -0
  147. sky/dashboard/out/_next/static/chunks/pages/workspace/new-3f88a1c7e86a3f86.js +1 -0
  148. sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-c5a3eeee1c218af1.js +1 -0
  149. sky/dashboard/out/_next/static/chunks/pages/workspaces-22b23febb3e89ce1.js +1 -0
  150. sky/dashboard/out/_next/static/chunks/webpack-2679be77fc08a2f8.js +1 -0
  151. sky/dashboard/out/_next/static/css/0748ce22df867032.css +3 -0
  152. sky/dashboard/out/_next/static/zB0ed6ge_W1MDszVHhijS/_buildManifest.js +1 -0
  153. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  154. sky/dashboard/out/clusters/[cluster].html +1 -1
  155. sky/dashboard/out/clusters.html +1 -1
  156. sky/dashboard/out/config.html +1 -0
  157. sky/dashboard/out/index.html +1 -1
  158. sky/dashboard/out/infra/[context].html +1 -0
  159. sky/dashboard/out/infra.html +1 -0
  160. sky/dashboard/out/jobs/[job].html +1 -1
  161. sky/dashboard/out/jobs/pools/[pool].html +1 -0
  162. sky/dashboard/out/jobs.html +1 -1
  163. sky/dashboard/out/users.html +1 -0
  164. sky/dashboard/out/volumes.html +1 -0
  165. sky/dashboard/out/workspace/new.html +1 -0
  166. sky/dashboard/out/workspaces/[name].html +1 -0
  167. sky/dashboard/out/workspaces.html +1 -0
  168. sky/data/data_utils.py +137 -1
  169. sky/data/mounting_utils.py +269 -84
  170. sky/data/storage.py +1451 -1807
  171. sky/data/storage_utils.py +43 -57
  172. sky/exceptions.py +132 -2
  173. sky/execution.py +206 -63
  174. sky/global_user_state.py +2374 -586
  175. sky/jobs/__init__.py +5 -0
  176. sky/jobs/client/sdk.py +242 -65
  177. sky/jobs/client/sdk_async.py +143 -0
  178. sky/jobs/constants.py +9 -8
  179. sky/jobs/controller.py +839 -277
  180. sky/jobs/file_content_utils.py +80 -0
  181. sky/jobs/log_gc.py +201 -0
  182. sky/jobs/recovery_strategy.py +398 -152
  183. sky/jobs/scheduler.py +315 -189
  184. sky/jobs/server/core.py +829 -255
  185. sky/jobs/server/server.py +156 -115
  186. sky/jobs/server/utils.py +136 -0
  187. sky/jobs/state.py +2092 -701
  188. sky/jobs/utils.py +1242 -160
  189. sky/logs/__init__.py +21 -0
  190. sky/logs/agent.py +108 -0
  191. sky/logs/aws.py +243 -0
  192. sky/logs/gcp.py +91 -0
  193. sky/metrics/__init__.py +0 -0
  194. sky/metrics/utils.py +443 -0
  195. sky/models.py +78 -1
  196. sky/optimizer.py +164 -70
  197. sky/provision/__init__.py +90 -4
  198. sky/provision/aws/config.py +147 -26
  199. sky/provision/aws/instance.py +135 -50
  200. sky/provision/azure/instance.py +10 -5
  201. sky/provision/common.py +13 -1
  202. sky/provision/cudo/cudo_machine_type.py +1 -1
  203. sky/provision/cudo/cudo_utils.py +14 -8
  204. sky/provision/cudo/cudo_wrapper.py +72 -71
  205. sky/provision/cudo/instance.py +10 -6
  206. sky/provision/do/instance.py +10 -6
  207. sky/provision/do/utils.py +4 -3
  208. sky/provision/docker_utils.py +114 -23
  209. sky/provision/fluidstack/instance.py +13 -8
  210. sky/provision/gcp/__init__.py +1 -0
  211. sky/provision/gcp/config.py +301 -19
  212. sky/provision/gcp/constants.py +218 -0
  213. sky/provision/gcp/instance.py +36 -8
  214. sky/provision/gcp/instance_utils.py +18 -4
  215. sky/provision/gcp/volume_utils.py +247 -0
  216. sky/provision/hyperbolic/__init__.py +12 -0
  217. sky/provision/hyperbolic/config.py +10 -0
  218. sky/provision/hyperbolic/instance.py +437 -0
  219. sky/provision/hyperbolic/utils.py +373 -0
  220. sky/provision/instance_setup.py +93 -14
  221. sky/provision/kubernetes/__init__.py +5 -0
  222. sky/provision/kubernetes/config.py +9 -52
  223. sky/provision/kubernetes/constants.py +17 -0
  224. sky/provision/kubernetes/instance.py +789 -247
  225. sky/provision/kubernetes/manifests/fusermount-server-daemonset.yaml +1 -2
  226. sky/provision/kubernetes/network.py +27 -17
  227. sky/provision/kubernetes/network_utils.py +40 -43
  228. sky/provision/kubernetes/utils.py +1192 -531
  229. sky/provision/kubernetes/volume.py +282 -0
  230. sky/provision/lambda_cloud/instance.py +22 -16
  231. sky/provision/nebius/constants.py +50 -0
  232. sky/provision/nebius/instance.py +19 -6
  233. sky/provision/nebius/utils.py +196 -91
  234. sky/provision/oci/instance.py +10 -5
  235. sky/provision/paperspace/instance.py +10 -7
  236. sky/provision/paperspace/utils.py +1 -1
  237. sky/provision/primeintellect/__init__.py +10 -0
  238. sky/provision/primeintellect/config.py +11 -0
  239. sky/provision/primeintellect/instance.py +454 -0
  240. sky/provision/primeintellect/utils.py +398 -0
  241. sky/provision/provisioner.py +110 -36
  242. sky/provision/runpod/__init__.py +5 -0
  243. sky/provision/runpod/instance.py +27 -6
  244. sky/provision/runpod/utils.py +51 -18
  245. sky/provision/runpod/volume.py +180 -0
  246. sky/provision/scp/__init__.py +15 -0
  247. sky/provision/scp/config.py +93 -0
  248. sky/provision/scp/instance.py +531 -0
  249. sky/provision/seeweb/__init__.py +11 -0
  250. sky/provision/seeweb/config.py +13 -0
  251. sky/provision/seeweb/instance.py +807 -0
  252. sky/provision/shadeform/__init__.py +11 -0
  253. sky/provision/shadeform/config.py +12 -0
  254. sky/provision/shadeform/instance.py +351 -0
  255. sky/provision/shadeform/shadeform_utils.py +83 -0
  256. sky/provision/ssh/__init__.py +18 -0
  257. sky/provision/vast/instance.py +13 -8
  258. sky/provision/vast/utils.py +10 -7
  259. sky/provision/vsphere/common/vim_utils.py +1 -2
  260. sky/provision/vsphere/instance.py +15 -10
  261. sky/provision/vsphere/vsphere_utils.py +9 -19
  262. sky/py.typed +0 -0
  263. sky/resources.py +844 -118
  264. sky/schemas/__init__.py +0 -0
  265. sky/schemas/api/__init__.py +0 -0
  266. sky/schemas/api/responses.py +225 -0
  267. sky/schemas/db/README +4 -0
  268. sky/schemas/db/env.py +90 -0
  269. sky/schemas/db/global_user_state/001_initial_schema.py +124 -0
  270. sky/schemas/db/global_user_state/002_add_workspace_to_cluster_history.py +35 -0
  271. sky/schemas/db/global_user_state/003_fix_initial_revision.py +61 -0
  272. sky/schemas/db/global_user_state/004_is_managed.py +34 -0
  273. sky/schemas/db/global_user_state/005_cluster_event.py +32 -0
  274. sky/schemas/db/global_user_state/006_provision_log.py +41 -0
  275. sky/schemas/db/global_user_state/007_cluster_event_request_id.py +34 -0
  276. sky/schemas/db/global_user_state/008_skylet_ssh_tunnel_metadata.py +34 -0
  277. sky/schemas/db/global_user_state/009_last_activity_and_launched_at.py +89 -0
  278. sky/schemas/db/global_user_state/010_save_ssh_key.py +66 -0
  279. sky/schemas/db/script.py.mako +28 -0
  280. sky/schemas/db/serve_state/001_initial_schema.py +67 -0
  281. sky/schemas/db/skypilot_config/001_initial_schema.py +30 -0
  282. sky/schemas/db/spot_jobs/001_initial_schema.py +97 -0
  283. sky/schemas/db/spot_jobs/002_cluster_pool.py +42 -0
  284. sky/schemas/db/spot_jobs/003_pool_hash.py +34 -0
  285. sky/schemas/db/spot_jobs/004_job_file_contents.py +42 -0
  286. sky/schemas/db/spot_jobs/005_logs_gc.py +38 -0
  287. sky/schemas/generated/__init__.py +0 -0
  288. sky/schemas/generated/autostopv1_pb2.py +36 -0
  289. sky/schemas/generated/autostopv1_pb2.pyi +43 -0
  290. sky/schemas/generated/autostopv1_pb2_grpc.py +146 -0
  291. sky/schemas/generated/jobsv1_pb2.py +86 -0
  292. sky/schemas/generated/jobsv1_pb2.pyi +254 -0
  293. sky/schemas/generated/jobsv1_pb2_grpc.py +542 -0
  294. sky/schemas/generated/managed_jobsv1_pb2.py +74 -0
  295. sky/schemas/generated/managed_jobsv1_pb2.pyi +278 -0
  296. sky/schemas/generated/managed_jobsv1_pb2_grpc.py +278 -0
  297. sky/schemas/generated/servev1_pb2.py +58 -0
  298. sky/schemas/generated/servev1_pb2.pyi +115 -0
  299. sky/schemas/generated/servev1_pb2_grpc.py +322 -0
  300. sky/serve/autoscalers.py +357 -5
  301. sky/serve/client/impl.py +310 -0
  302. sky/serve/client/sdk.py +47 -139
  303. sky/serve/client/sdk_async.py +130 -0
  304. sky/serve/constants.py +10 -8
  305. sky/serve/controller.py +64 -19
  306. sky/serve/load_balancer.py +106 -60
  307. sky/serve/load_balancing_policies.py +115 -1
  308. sky/serve/replica_managers.py +273 -162
  309. sky/serve/serve_rpc_utils.py +179 -0
  310. sky/serve/serve_state.py +554 -251
  311. sky/serve/serve_utils.py +733 -220
  312. sky/serve/server/core.py +66 -711
  313. sky/serve/server/impl.py +1093 -0
  314. sky/serve/server/server.py +21 -18
  315. sky/serve/service.py +133 -48
  316. sky/serve/service_spec.py +135 -16
  317. sky/serve/spot_placer.py +3 -0
  318. sky/server/auth/__init__.py +0 -0
  319. sky/server/auth/authn.py +50 -0
  320. sky/server/auth/loopback.py +38 -0
  321. sky/server/auth/oauth2_proxy.py +200 -0
  322. sky/server/common.py +475 -181
  323. sky/server/config.py +81 -23
  324. sky/server/constants.py +44 -6
  325. sky/server/daemons.py +229 -0
  326. sky/server/html/token_page.html +185 -0
  327. sky/server/metrics.py +160 -0
  328. sky/server/requests/executor.py +528 -138
  329. sky/server/requests/payloads.py +351 -17
  330. sky/server/requests/preconditions.py +21 -17
  331. sky/server/requests/process.py +112 -29
  332. sky/server/requests/request_names.py +120 -0
  333. sky/server/requests/requests.py +817 -224
  334. sky/server/requests/serializers/decoders.py +82 -31
  335. sky/server/requests/serializers/encoders.py +140 -22
  336. sky/server/requests/threads.py +106 -0
  337. sky/server/rest.py +417 -0
  338. sky/server/server.py +1290 -284
  339. sky/server/state.py +20 -0
  340. sky/server/stream_utils.py +345 -57
  341. sky/server/uvicorn.py +217 -3
  342. sky/server/versions.py +270 -0
  343. sky/setup_files/MANIFEST.in +5 -0
  344. sky/setup_files/alembic.ini +156 -0
  345. sky/setup_files/dependencies.py +136 -31
  346. sky/setup_files/setup.py +44 -42
  347. sky/sky_logging.py +102 -5
  348. sky/skylet/attempt_skylet.py +1 -0
  349. sky/skylet/autostop_lib.py +129 -8
  350. sky/skylet/configs.py +27 -20
  351. sky/skylet/constants.py +171 -19
  352. sky/skylet/events.py +105 -21
  353. sky/skylet/job_lib.py +335 -104
  354. sky/skylet/log_lib.py +297 -18
  355. sky/skylet/log_lib.pyi +44 -1
  356. sky/skylet/ray_patches/__init__.py +17 -3
  357. sky/skylet/ray_patches/autoscaler.py.diff +18 -0
  358. sky/skylet/ray_patches/cli.py.diff +19 -0
  359. sky/skylet/ray_patches/command_runner.py.diff +17 -0
  360. sky/skylet/ray_patches/log_monitor.py.diff +20 -0
  361. sky/skylet/ray_patches/resource_demand_scheduler.py.diff +32 -0
  362. sky/skylet/ray_patches/updater.py.diff +18 -0
  363. sky/skylet/ray_patches/worker.py.diff +41 -0
  364. sky/skylet/services.py +564 -0
  365. sky/skylet/skylet.py +63 -4
  366. sky/skylet/subprocess_daemon.py +103 -29
  367. sky/skypilot_config.py +506 -99
  368. sky/ssh_node_pools/__init__.py +1 -0
  369. sky/ssh_node_pools/core.py +135 -0
  370. sky/ssh_node_pools/server.py +233 -0
  371. sky/task.py +621 -137
  372. sky/templates/aws-ray.yml.j2 +10 -3
  373. sky/templates/azure-ray.yml.j2 +1 -1
  374. sky/templates/do-ray.yml.j2 +1 -1
  375. sky/templates/gcp-ray.yml.j2 +57 -0
  376. sky/templates/hyperbolic-ray.yml.j2 +67 -0
  377. sky/templates/jobs-controller.yaml.j2 +27 -24
  378. sky/templates/kubernetes-loadbalancer.yml.j2 +2 -0
  379. sky/templates/kubernetes-ray.yml.j2 +607 -51
  380. sky/templates/lambda-ray.yml.j2 +1 -1
  381. sky/templates/nebius-ray.yml.j2 +33 -12
  382. sky/templates/paperspace-ray.yml.j2 +1 -1
  383. sky/templates/primeintellect-ray.yml.j2 +71 -0
  384. sky/templates/runpod-ray.yml.j2 +9 -1
  385. sky/templates/scp-ray.yml.j2 +3 -50
  386. sky/templates/seeweb-ray.yml.j2 +108 -0
  387. sky/templates/shadeform-ray.yml.j2 +72 -0
  388. sky/templates/sky-serve-controller.yaml.j2 +22 -2
  389. sky/templates/websocket_proxy.py +178 -18
  390. sky/usage/usage_lib.py +18 -11
  391. sky/users/__init__.py +0 -0
  392. sky/users/model.conf +15 -0
  393. sky/users/permission.py +387 -0
  394. sky/users/rbac.py +121 -0
  395. sky/users/server.py +720 -0
  396. sky/users/token_service.py +218 -0
  397. sky/utils/accelerator_registry.py +34 -5
  398. sky/utils/admin_policy_utils.py +84 -38
  399. sky/utils/annotations.py +16 -5
  400. sky/utils/asyncio_utils.py +78 -0
  401. sky/utils/auth_utils.py +153 -0
  402. sky/utils/benchmark_utils.py +60 -0
  403. sky/utils/cli_utils/status_utils.py +159 -86
  404. sky/utils/cluster_utils.py +31 -9
  405. sky/utils/command_runner.py +354 -68
  406. sky/utils/command_runner.pyi +93 -3
  407. sky/utils/common.py +35 -8
  408. sky/utils/common_utils.py +310 -87
  409. sky/utils/config_utils.py +87 -5
  410. sky/utils/context.py +402 -0
  411. sky/utils/context_utils.py +222 -0
  412. sky/utils/controller_utils.py +264 -89
  413. sky/utils/dag_utils.py +31 -12
  414. sky/utils/db/__init__.py +0 -0
  415. sky/utils/db/db_utils.py +470 -0
  416. sky/utils/db/migration_utils.py +133 -0
  417. sky/utils/directory_utils.py +12 -0
  418. sky/utils/env_options.py +13 -0
  419. sky/utils/git.py +567 -0
  420. sky/utils/git_clone.sh +460 -0
  421. sky/utils/infra_utils.py +195 -0
  422. sky/utils/kubernetes/cleanup-tunnel.sh +62 -0
  423. sky/utils/kubernetes/config_map_utils.py +133 -0
  424. sky/utils/kubernetes/create_cluster.sh +13 -27
  425. sky/utils/kubernetes/delete_cluster.sh +10 -7
  426. sky/utils/kubernetes/deploy_remote_cluster.py +1299 -0
  427. sky/utils/kubernetes/exec_kubeconfig_converter.py +22 -31
  428. sky/utils/kubernetes/generate_kind_config.py +6 -66
  429. sky/utils/kubernetes/generate_kubeconfig.sh +4 -1
  430. sky/utils/kubernetes/gpu_labeler.py +5 -5
  431. sky/utils/kubernetes/kubernetes_deploy_utils.py +354 -47
  432. sky/utils/kubernetes/ssh-tunnel.sh +379 -0
  433. sky/utils/kubernetes/ssh_utils.py +221 -0
  434. sky/utils/kubernetes_enums.py +8 -15
  435. sky/utils/lock_events.py +94 -0
  436. sky/utils/locks.py +368 -0
  437. sky/utils/log_utils.py +300 -6
  438. sky/utils/perf_utils.py +22 -0
  439. sky/utils/resource_checker.py +298 -0
  440. sky/utils/resources_utils.py +249 -32
  441. sky/utils/rich_utils.py +213 -37
  442. sky/utils/schemas.py +905 -147
  443. sky/utils/serialize_utils.py +16 -0
  444. sky/utils/status_lib.py +10 -0
  445. sky/utils/subprocess_utils.py +38 -15
  446. sky/utils/tempstore.py +70 -0
  447. sky/utils/timeline.py +24 -52
  448. sky/utils/ux_utils.py +84 -15
  449. sky/utils/validator.py +11 -1
  450. sky/utils/volume.py +86 -0
  451. sky/utils/yaml_utils.py +111 -0
  452. sky/volumes/__init__.py +13 -0
  453. sky/volumes/client/__init__.py +0 -0
  454. sky/volumes/client/sdk.py +149 -0
  455. sky/volumes/server/__init__.py +0 -0
  456. sky/volumes/server/core.py +258 -0
  457. sky/volumes/server/server.py +122 -0
  458. sky/volumes/volume.py +212 -0
  459. sky/workspaces/__init__.py +0 -0
  460. sky/workspaces/core.py +655 -0
  461. sky/workspaces/server.py +101 -0
  462. sky/workspaces/utils.py +56 -0
  463. skypilot_nightly-1.0.0.dev20251107.dist-info/METADATA +675 -0
  464. skypilot_nightly-1.0.0.dev20251107.dist-info/RECORD +594 -0
  465. {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/WHEEL +1 -1
  466. sky/benchmark/benchmark_state.py +0 -256
  467. sky/benchmark/benchmark_utils.py +0 -641
  468. sky/clouds/service_catalog/constants.py +0 -7
  469. sky/dashboard/out/_next/static/LksQgChY5izXjokL3LcEu/_buildManifest.js +0 -1
  470. sky/dashboard/out/_next/static/chunks/236-f49500b82ad5392d.js +0 -6
  471. sky/dashboard/out/_next/static/chunks/312-c3c8845990db8ffc.js +0 -15
  472. sky/dashboard/out/_next/static/chunks/37-0a572fe0dbb89c4d.js +0 -6
  473. sky/dashboard/out/_next/static/chunks/678-206dddca808e6d16.js +0 -59
  474. sky/dashboard/out/_next/static/chunks/845-0f8017370869e269.js +0 -1
  475. sky/dashboard/out/_next/static/chunks/979-7bf73a4c7cea0f5c.js +0 -1
  476. sky/dashboard/out/_next/static/chunks/fd9d1056-2821b0f0cabcd8bd.js +0 -1
  477. sky/dashboard/out/_next/static/chunks/framework-87d061ee6ed71b28.js +0 -33
  478. sky/dashboard/out/_next/static/chunks/main-app-241eb28595532291.js +0 -1
  479. sky/dashboard/out/_next/static/chunks/main-e0e2335212e72357.js +0 -1
  480. sky/dashboard/out/_next/static/chunks/pages/_app-e6b013bc3f77ad60.js +0 -1
  481. sky/dashboard/out/_next/static/chunks/pages/_error-1be831200e60c5c0.js +0 -1
  482. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-e15db85d0ea1fbe1.js +0 -1
  483. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-f383db7389368ea7.js +0 -1
  484. sky/dashboard/out/_next/static/chunks/pages/clusters-a93b93e10b8b074e.js +0 -1
  485. sky/dashboard/out/_next/static/chunks/pages/index-f9f039532ca8cbc4.js +0 -1
  486. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-03f279c6741fb48b.js +0 -1
  487. sky/dashboard/out/_next/static/chunks/pages/jobs-a75029b67aab6a2e.js +0 -1
  488. sky/dashboard/out/_next/static/chunks/webpack-830f59b8404e96b8.js +0 -1
  489. sky/dashboard/out/_next/static/css/c6933bbb2ce7f4dd.css +0 -3
  490. sky/jobs/dashboard/dashboard.py +0 -223
  491. sky/jobs/dashboard/static/favicon.ico +0 -0
  492. sky/jobs/dashboard/templates/index.html +0 -831
  493. sky/jobs/server/dashboard_utils.py +0 -69
  494. sky/skylet/providers/scp/__init__.py +0 -2
  495. sky/skylet/providers/scp/config.py +0 -149
  496. sky/skylet/providers/scp/node_provider.py +0 -578
  497. sky/templates/kubernetes-ssh-jump.yml.j2 +0 -94
  498. sky/utils/db_utils.py +0 -100
  499. sky/utils/kubernetes/deploy_remote_cluster.sh +0 -308
  500. sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +0 -191
  501. skypilot_nightly-1.0.0.dev20250509.dist-info/METADATA +0 -361
  502. skypilot_nightly-1.0.0.dev20250509.dist-info/RECORD +0 -396
  503. /sky/{clouds/service_catalog → catalog}/config.py +0 -0
  504. /sky/{benchmark → catalog/data_fetchers}/__init__.py +0 -0
  505. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_azure.py +0 -0
  506. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_fluidstack.py +0 -0
  507. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_ibm.py +0 -0
  508. /sky/{clouds/service_catalog/data_fetchers → client/cli}/__init__.py +0 -0
  509. /sky/dashboard/out/_next/static/{LksQgChY5izXjokL3LcEu → zB0ed6ge_W1MDszVHhijS}/_ssgManifest.js +0 -0
  510. {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/entry_points.txt +0 -0
  511. {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/licenses/LICENSE +0 -0
  512. {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/top_level.txt +0 -0
sky/data/storage.py CHANGED
@@ -1,4 +1,6 @@
1
1
  """Storage and Store Classes for Sky Data."""
2
+ from abc import abstractmethod
3
+ from dataclasses import dataclass
2
4
  import enum
3
5
  import hashlib
4
6
  import os
@@ -7,7 +9,7 @@ import shlex
7
9
  import subprocess
8
10
  import time
9
11
  import typing
10
- from typing import Any, Dict, List, Optional, Tuple, Type, Union
12
+ from typing import Any, Callable, Dict, List, Optional, Tuple, Type, Union
11
13
  import urllib.parse
12
14
 
13
15
  import colorama
@@ -21,6 +23,7 @@ from sky import skypilot_config
21
23
  from sky.adaptors import aws
22
24
  from sky.adaptors import azure
23
25
  from sky.adaptors import cloudflare
26
+ from sky.adaptors import coreweave
24
27
  from sky.adaptors import gcp
25
28
  from sky.adaptors import ibm
26
29
  from sky.adaptors import nebius
@@ -60,6 +63,7 @@ STORE_ENABLED_CLOUDS: List[str] = [
60
63
  str(clouds.OCI()),
61
64
  str(clouds.Nebius()),
62
65
  cloudflare.NAME,
66
+ coreweave.NAME,
63
67
  ]
64
68
 
65
69
  # Maximum number of concurrent rsync upload processes
@@ -91,6 +95,12 @@ def get_cached_enabled_storage_cloud_names_or_refresh(
91
95
  r2_is_enabled, _ = cloudflare.check_storage_credentials()
92
96
  if r2_is_enabled:
93
97
  enabled_clouds.append(cloudflare.NAME)
98
+
99
+ # Similarly, handle CoreWeave storage credentials
100
+ coreweave_is_enabled, _ = coreweave.check_storage_credentials()
101
+ if coreweave_is_enabled:
102
+ enabled_clouds.append(coreweave.NAME)
103
+
94
104
  if raise_if_no_cloud_access and not enabled_clouds:
95
105
  raise exceptions.NoCloudAccessError(
96
106
  'No cloud access available for storage. '
@@ -109,7 +119,7 @@ def _is_storage_cloud_enabled(cloud_name: str,
109
119
  sky_check.check_capability(
110
120
  sky_cloud.CloudCapability.STORAGE,
111
121
  quiet=True,
112
- )
122
+ workspace=skypilot_config.get_active_workspace())
113
123
  return _is_storage_cloud_enabled(cloud_name,
114
124
  try_fix_with_sky_check=False)
115
125
  return False
@@ -124,41 +134,71 @@ class StoreType(enum.Enum):
124
134
  IBM = 'IBM'
125
135
  OCI = 'OCI'
126
136
  NEBIUS = 'NEBIUS'
137
+ COREWEAVE = 'COREWEAVE'
138
+ VOLUME = 'VOLUME'
139
+
140
+ @classmethod
141
+ def _get_s3_compatible_store_by_cloud(cls,
142
+ cloud_name: str) -> Optional[str]:
143
+ """Get S3-compatible store type by cloud name."""
144
+ for store_type, store_class in _S3_COMPATIBLE_STORES.items():
145
+ config = store_class.get_config()
146
+ if config.cloud_name.lower() == cloud_name:
147
+ return store_type
148
+ return None
149
+
150
+ @classmethod
151
+ def _get_s3_compatible_config(
152
+ cls, store_type: str) -> Optional['S3CompatibleConfig']:
153
+ """Get S3-compatible store configuration by store type."""
154
+ store_class = _S3_COMPATIBLE_STORES.get(store_type)
155
+ if store_class:
156
+ return store_class.get_config()
157
+ return None
158
+
159
+ @classmethod
160
+ def find_s3_compatible_config_by_prefix(
161
+ cls, source: str) -> Optional['StoreType']:
162
+ """Get S3-compatible store type by URL prefix."""
163
+ for store_type, store_class in _S3_COMPATIBLE_STORES.items():
164
+ config = store_class.get_config()
165
+ if source.startswith(config.url_prefix):
166
+ return StoreType(store_type)
167
+ return None
127
168
 
128
169
  @classmethod
129
170
  def from_cloud(cls, cloud: str) -> 'StoreType':
130
- if cloud.lower() == str(clouds.AWS()).lower():
131
- return StoreType.S3
132
- elif cloud.lower() == str(clouds.GCP()).lower():
171
+ cloud_lower = cloud.lower()
172
+ if cloud_lower == str(clouds.GCP()).lower():
133
173
  return StoreType.GCS
134
- elif cloud.lower() == str(clouds.IBM()).lower():
174
+ elif cloud_lower == str(clouds.IBM()).lower():
135
175
  return StoreType.IBM
136
- elif cloud.lower() == cloudflare.NAME.lower():
137
- return StoreType.R2
138
- elif cloud.lower() == str(clouds.Azure()).lower():
176
+ elif cloud_lower == str(clouds.Azure()).lower():
139
177
  return StoreType.AZURE
140
- elif cloud.lower() == str(clouds.OCI()).lower():
178
+ elif cloud_lower == str(clouds.OCI()).lower():
141
179
  return StoreType.OCI
142
- elif cloud.lower() == str(clouds.Nebius()).lower():
143
- return StoreType.NEBIUS
144
- elif cloud.lower() == str(clouds.Lambda()).lower():
180
+ elif cloud_lower == str(clouds.Lambda()).lower():
145
181
  with ux_utils.print_exception_no_traceback():
146
182
  raise ValueError('Lambda Cloud does not provide cloud storage.')
147
- elif cloud.lower() == str(clouds.SCP()).lower():
183
+ elif cloud_lower == str(clouds.SCP()).lower():
148
184
  with ux_utils.print_exception_no_traceback():
149
185
  raise ValueError('SCP does not provide cloud storage.')
186
+ else:
187
+ s3_store_type = cls._get_s3_compatible_store_by_cloud(cloud_lower)
188
+ if s3_store_type:
189
+ return cls(s3_store_type)
150
190
 
151
191
  raise ValueError(f'Unsupported cloud for StoreType: {cloud}')
152
192
 
153
193
  def to_cloud(self) -> str:
154
- if self == StoreType.S3:
155
- return str(clouds.AWS())
156
- elif self == StoreType.GCS:
194
+ config = self._get_s3_compatible_config(self.value)
195
+ if config:
196
+ return config.cloud_name
197
+
198
+ if self == StoreType.GCS:
157
199
  return str(clouds.GCP())
158
200
  elif self == StoreType.AZURE:
159
201
  return str(clouds.Azure())
160
- elif self == StoreType.R2:
161
- return cloudflare.NAME
162
202
  elif self == StoreType.IBM:
163
203
  return str(clouds.IBM())
164
204
  elif self == StoreType.OCI:
@@ -168,41 +208,34 @@ class StoreType(enum.Enum):
168
208
 
169
209
  @classmethod
170
210
  def from_store(cls, store: 'AbstractStore') -> 'StoreType':
171
- if isinstance(store, S3Store):
172
- return StoreType.S3
173
- elif isinstance(store, GcsStore):
211
+ if isinstance(store, S3CompatibleStore):
212
+ return cls(store.get_store_type())
213
+
214
+ if isinstance(store, GcsStore):
174
215
  return StoreType.GCS
175
216
  elif isinstance(store, AzureBlobStore):
176
217
  return StoreType.AZURE
177
- elif isinstance(store, R2Store):
178
- return StoreType.R2
179
218
  elif isinstance(store, IBMCosStore):
180
219
  return StoreType.IBM
181
220
  elif isinstance(store, OciStore):
182
221
  return StoreType.OCI
183
- elif isinstance(store, NebiusStore):
184
- return StoreType.NEBIUS
185
222
  else:
186
223
  with ux_utils.print_exception_no_traceback():
187
224
  raise ValueError(f'Unknown store type: {store}')
188
225
 
189
226
  def store_prefix(self) -> str:
190
- if self == StoreType.S3:
191
- return 's3://'
192
- elif self == StoreType.GCS:
227
+ config = self._get_s3_compatible_config(self.value)
228
+ if config:
229
+ return config.url_prefix
230
+
231
+ if self == StoreType.GCS:
193
232
  return 'gs://'
194
233
  elif self == StoreType.AZURE:
195
234
  return 'https://'
196
- # R2 storages use 's3://' as a prefix for various aws cli commands
197
- elif self == StoreType.R2:
198
- return 'r2://'
199
235
  elif self == StoreType.IBM:
200
236
  return 'cos://'
201
237
  elif self == StoreType.OCI:
202
238
  return 'oci://'
203
- # Nebius storages use 's3://' as a prefix for various aws cli commands
204
- elif self == StoreType.NEBIUS:
205
- return 'nebius://'
206
239
  else:
207
240
  with ux_utils.print_exception_no_traceback():
208
241
  raise ValueError(f'Unknown store type: {self}')
@@ -251,12 +284,20 @@ class StoreType(enum.Enum):
251
284
  elif store_type == StoreType.IBM:
252
285
  bucket_name, sub_path, region = data_utils.split_cos_path(
253
286
  store_url)
254
- elif store_type == StoreType.R2:
255
- bucket_name, sub_path = data_utils.split_r2_path(store_url)
256
287
  elif store_type == StoreType.GCS:
257
288
  bucket_name, sub_path = data_utils.split_gcs_path(store_url)
258
- elif store_type == StoreType.S3:
259
- bucket_name, sub_path = data_utils.split_s3_path(store_url)
289
+ else:
290
+ # Check compatible stores
291
+ for compatible_store_type, store_class in \
292
+ _S3_COMPATIBLE_STORES.items():
293
+ if store_type.value == compatible_store_type:
294
+ config = store_class.get_config()
295
+ bucket_name, sub_path = config.split_path(store_url)
296
+ break
297
+ else:
298
+ # If we get here, it's an unknown S3-compatible store
299
+ raise ValueError(
300
+ f'Unknown S3-compatible store type: {store_type}')
260
301
  return store_type, bucket_name, \
261
302
  sub_path, storage_account_name, region
262
303
  raise ValueError(f'Unknown store URL: {store_url}')
@@ -751,20 +792,19 @@ class Storage(object):
751
792
  # If source is a pre-existing bucket, connect to the bucket
752
793
  # If the bucket does not exist, this will error out
753
794
  if isinstance(self.source, str):
754
- if self.source.startswith('s3://'):
755
- self.add_store(StoreType.S3)
756
- elif self.source.startswith('gs://'):
795
+ if self.source.startswith('gs://'):
757
796
  self.add_store(StoreType.GCS)
758
797
  elif data_utils.is_az_container_endpoint(self.source):
759
798
  self.add_store(StoreType.AZURE)
760
- elif self.source.startswith('r2://'):
761
- self.add_store(StoreType.R2)
762
799
  elif self.source.startswith('cos://'):
763
800
  self.add_store(StoreType.IBM)
764
801
  elif self.source.startswith('oci://'):
765
802
  self.add_store(StoreType.OCI)
766
- elif self.source.startswith('nebius://'):
767
- self.add_store(StoreType.NEBIUS)
803
+
804
+ store_type = StoreType.find_s3_compatible_config_by_prefix(
805
+ self.source)
806
+ if store_type:
807
+ self.add_store(store_type)
768
808
 
769
809
  def get_bucket_sub_path_prefix(self, blob_path: str) -> str:
770
810
  """Adds the bucket sub path prefix to the blob path."""
@@ -852,7 +892,7 @@ class Storage(object):
852
892
  f'{source} in the file_mounts section of your YAML')
853
893
  is_local_source = True
854
894
  elif split_path.scheme in [
855
- 's3', 'gs', 'https', 'r2', 'cos', 'oci', 'nebius'
895
+ 's3', 'gs', 'https', 'r2', 'cos', 'oci', 'nebius', 'cw'
856
896
  ]:
857
897
  is_local_source = False
858
898
  # Storage mounting does not support mounting specific files from
@@ -877,7 +917,8 @@ class Storage(object):
877
917
  with ux_utils.print_exception_no_traceback():
878
918
  raise exceptions.StorageSourceError(
879
919
  f'Supported paths: local, s3://, gs://, https://, '
880
- f'r2://, cos://, oci://, nebius://. Got: {source}')
920
+ f'r2://, cos://, oci://, nebius://, cw://. '
921
+ f'Got: {source}')
881
922
  return source, is_local_source
882
923
 
883
924
  def _validate_storage_spec(self, name: Optional[str]) -> None:
@@ -892,7 +933,16 @@ class Storage(object):
892
933
  """
893
934
  prefix = name.split('://')[0]
894
935
  prefix = prefix.lower()
895
- if prefix in ['s3', 'gs', 'https', 'r2', 'cos', 'oci', 'nebius']:
936
+ if prefix in [
937
+ 's3',
938
+ 'gs',
939
+ 'https',
940
+ 'r2',
941
+ 'cos',
942
+ 'oci',
943
+ 'nebius',
944
+ 'cw',
945
+ ]:
896
946
  with ux_utils.print_exception_no_traceback():
897
947
  raise exceptions.StorageNameError(
898
948
  'Prefix detected: `name` cannot start with '
@@ -980,12 +1030,25 @@ class Storage(object):
980
1030
  # When initializing from global_user_state, we override the
981
1031
  # source from the YAML
982
1032
  try:
983
- if s_type == StoreType.S3:
1033
+ if s_type.value in _S3_COMPATIBLE_STORES:
1034
+ store_class = _S3_COMPATIBLE_STORES[s_type.value]
1035
+ store = store_class.from_metadata(
1036
+ s_metadata,
1037
+ source=self.source,
1038
+ sync_on_reconstruction=self.sync_on_reconstruction,
1039
+ _bucket_sub_path=self._bucket_sub_path)
1040
+ elif s_type == StoreType.S3:
984
1041
  store = S3Store.from_metadata(
985
1042
  s_metadata,
986
1043
  source=self.source,
987
1044
  sync_on_reconstruction=self.sync_on_reconstruction,
988
1045
  _bucket_sub_path=self._bucket_sub_path)
1046
+ elif s_type == StoreType.R2:
1047
+ store = R2Store.from_metadata(
1048
+ s_metadata,
1049
+ source=self.source,
1050
+ sync_on_reconstruction=self.sync_on_reconstruction,
1051
+ _bucket_sub_path=self._bucket_sub_path)
989
1052
  elif s_type == StoreType.GCS:
990
1053
  store = GcsStore.from_metadata(
991
1054
  s_metadata,
@@ -1000,12 +1063,6 @@ class Storage(object):
1000
1063
  source=self.source,
1001
1064
  sync_on_reconstruction=self.sync_on_reconstruction,
1002
1065
  _bucket_sub_path=self._bucket_sub_path)
1003
- elif s_type == StoreType.R2:
1004
- store = R2Store.from_metadata(
1005
- s_metadata,
1006
- source=self.source,
1007
- sync_on_reconstruction=self.sync_on_reconstruction,
1008
- _bucket_sub_path=self._bucket_sub_path)
1009
1066
  elif s_type == StoreType.IBM:
1010
1067
  store = IBMCosStore.from_metadata(
1011
1068
  s_metadata,
@@ -1024,6 +1081,12 @@ class Storage(object):
1024
1081
  source=self.source,
1025
1082
  sync_on_reconstruction=self.sync_on_reconstruction,
1026
1083
  _bucket_sub_path=self._bucket_sub_path)
1084
+ elif s_type == StoreType.COREWEAVE:
1085
+ store = CoreWeaveStore.from_metadata(
1086
+ s_metadata,
1087
+ source=self.source,
1088
+ sync_on_reconstruction=self.sync_on_reconstruction,
1089
+ _bucket_sub_path=self._bucket_sub_path)
1027
1090
  else:
1028
1091
  with ux_utils.print_exception_no_traceback():
1029
1092
  raise ValueError(f'Unknown store type: {s_type}')
@@ -1106,20 +1169,17 @@ class Storage(object):
1106
1169
  return store
1107
1170
 
1108
1171
  store_cls: Type[AbstractStore]
1109
- if store_type == StoreType.S3:
1110
- store_cls = S3Store
1172
+ # First check if it's a registered S3-compatible store
1173
+ if store_type.value in _S3_COMPATIBLE_STORES:
1174
+ store_cls = _S3_COMPATIBLE_STORES[store_type.value]
1111
1175
  elif store_type == StoreType.GCS:
1112
1176
  store_cls = GcsStore
1113
1177
  elif store_type == StoreType.AZURE:
1114
1178
  store_cls = AzureBlobStore
1115
- elif store_type == StoreType.R2:
1116
- store_cls = R2Store
1117
1179
  elif store_type == StoreType.IBM:
1118
1180
  store_cls = IBMCosStore
1119
1181
  elif store_type == StoreType.OCI:
1120
1182
  store_cls = OciStore
1121
- elif store_type == StoreType.NEBIUS:
1122
- store_cls = NebiusStore
1123
1183
  else:
1124
1184
  with ux_utils.print_exception_no_traceback():
1125
1185
  raise exceptions.StorageSpecError(
@@ -1266,6 +1326,17 @@ class Storage(object):
1266
1326
  if store.is_sky_managed:
1267
1327
  global_user_state.set_storage_status(self.name, StorageStatus.READY)
1268
1328
 
1329
+ @classmethod
1330
+ def from_handle(cls, handle: StorageHandle) -> 'Storage':
1331
+ """Create Storage from StorageHandle object.
1332
+ """
1333
+ obj = cls(name=handle.storage_name,
1334
+ source=handle.source,
1335
+ sync_on_reconstruction=False)
1336
+ obj.handle = handle
1337
+ obj._add_store_from_metadata(handle.sky_stores)
1338
+ return obj
1339
+
1269
1340
  @classmethod
1270
1341
  def from_yaml_config(cls, config: Dict[str, Any]) -> 'Storage':
1271
1342
  common_utils.validate_schema(config, schemas.get_storage_schema(),
@@ -1343,101 +1414,262 @@ class Storage(object):
1343
1414
  return config
1344
1415
 
1345
1416
 
1346
- class S3Store(AbstractStore):
1347
- """S3Store inherits from Storage Object and represents the backend
1348
- for S3 buckets.
1417
+ # Registry for S3-compatible stores
1418
+ _S3_COMPATIBLE_STORES = {}
1419
+
1420
+
1421
+ def register_s3_compatible_store(store_class):
1422
+ """Decorator to automatically register S3-compatible stores."""
1423
+ store_type = store_class.get_store_type()
1424
+ _S3_COMPATIBLE_STORES[store_type] = store_class
1425
+ return store_class
1426
+
1427
+
1428
+ @dataclass
1429
+ class S3CompatibleConfig:
1430
+ """Configuration for S3-compatible storage providers."""
1431
+ # Provider identification
1432
+ store_type: str # Store type identifier (e.g., "S3", "R2", "MINIO")
1433
+ url_prefix: str # URL prefix (e.g., "s3://", "r2://", "minio://")
1434
+
1435
+ # Client creation
1436
+ client_factory: Callable[[Optional[str]], Any]
1437
+ resource_factory: Callable[[str], StorageHandle]
1438
+ split_path: Callable[[str], Tuple[str, str]]
1439
+ verify_bucket: Callable[[str], bool]
1440
+
1441
+ # CLI configuration
1442
+ aws_profile: Optional[str] = None
1443
+ get_endpoint_url: Optional[Callable[[], str]] = None
1444
+ credentials_file: Optional[str] = None
1445
+ config_file: Optional[str] = None
1446
+ extra_cli_args: Optional[List[str]] = None
1447
+
1448
+ # Provider-specific settings
1449
+ cloud_name: str = ''
1450
+ default_region: Optional[str] = None
1451
+ access_denied_message: str = 'Access Denied'
1452
+
1453
+ # Mounting
1454
+ mount_cmd_factory: Optional[Callable] = None
1455
+ mount_cached_cmd_factory: Optional[Callable] = None
1456
+
1457
+ def __post_init__(self):
1458
+ if self.extra_cli_args is None:
1459
+ self.extra_cli_args = []
1460
+
1461
+
1462
+ class S3CompatibleStore(AbstractStore):
1463
+ """Base class for S3-compatible object storage providers.
1464
+
1465
+ This class provides a unified interface for all S3-compatible storage
1466
+ providers (AWS S3, Cloudflare R2, Nebius, MinIO, CoreWeave, etc.) by
1467
+ leveraging a configuration-driven approach that eliminates code duplication
1468
+
1469
+ ## Adding a New S3-Compatible Store
1470
+
1471
+ To add a new S3-compatible storage provider (e.g., MinIO),
1472
+ follow these steps:
1473
+
1474
+ ### 1. Add Store Type to Enum
1475
+ First, add your store type to the StoreType enum:
1476
+ ```python
1477
+ class StoreType(enum.Enum):
1478
+ # ... existing entries ...
1479
+ MINIO = 'MINIO'
1480
+ ```
1481
+
1482
+ ### 2. Create Store Class
1483
+ Create a new store class that inherits from S3CompatibleStore:
1484
+ ```python
1485
+ @register_s3_compatible_store
1486
+ class MinIOStore(S3CompatibleStore):
1487
+ '''MinIOStore for MinIO object storage.'''
1488
+
1489
+ @classmethod
1490
+ def get_config(cls) -> S3CompatibleConfig:
1491
+ '''Return the configuration for MinIO.'''
1492
+ return S3CompatibleConfig(
1493
+ store_type='MINIO',
1494
+ url_prefix='minio://',
1495
+ client_factory=lambda region:\
1496
+ data_utils.create_minio_client(region),
1497
+ resource_factory=lambda name:\
1498
+ minio.resource('s3').Bucket(name),
1499
+ split_path=data_utils.split_minio_path,
1500
+ aws_profile='minio',
1501
+ get_endpoint_url=lambda: minio.get_endpoint_url(),
1502
+ cloud_name='minio',
1503
+ default_region='us-east-1',
1504
+ mount_cmd_factory=mounting_utils.get_minio_mount_cmd,
1505
+ )
1506
+ ```
1507
+
1508
+ ### 3. Implement Required Utilities
1509
+ Create the necessary utility functions:
1510
+
1511
+ #### In `sky/data/data_utils.py`:
1512
+ ```python
1513
+ def create_minio_client(region: Optional[str] = None):
1514
+ '''Create MinIO S3 client.'''
1515
+ return boto3.client('s3',
1516
+ endpoint_url=minio.get_endpoint_url(),
1517
+ aws_access_key_id=minio.get_access_key(),
1518
+ aws_secret_access_key=minio.get_secret_key(),
1519
+ region_name=region or 'us-east-1')
1520
+
1521
+ def split_minio_path(minio_path: str) -> Tuple[str, str]:
1522
+ '''Split minio://bucket/key into (bucket, key).'''
1523
+ path_parts = minio_path.replace('minio://', '').split('/', 1)
1524
+ bucket = path_parts[0]
1525
+ key = path_parts[1] if len(path_parts) > 1 else ''
1526
+ return bucket, key
1527
+ ```
1528
+
1529
+ #### In `sky/utils/mounting_utils.py`:
1530
+ ```python
1531
+ def get_minio_mount_cmd(profile: str, bucket_name: str, endpoint_url: str,
1532
+ mount_path: str,
1533
+ bucket_sub_path: Optional[str]) -> str:
1534
+ '''Generate MinIO mount command using s3fs.'''
1535
+ # Implementation similar to other S3-compatible mount commands
1536
+ pass
1537
+ ```
1538
+
1539
+ ### 4. Create Adapter Module (if needed)
1540
+ Create `sky/adaptors/minio.py` for MinIO-specific configuration:
1541
+ ```python
1542
+ '''MinIO adapter for SkyPilot.'''
1543
+
1544
+ MINIO_PROFILE_NAME = 'minio'
1545
+
1546
+ def get_endpoint_url() -> str:
1547
+ '''Get MinIO endpoint URL from configuration.'''
1548
+ # Read from ~/.minio/config or environment variables
1549
+ pass
1550
+
1551
+ def resource(resource_name: str):
1552
+ '''Get MinIO resource.'''
1553
+ # Implementation for creating MinIO resources
1554
+ pass
1555
+ ```
1556
+
1349
1557
  """
1350
1558
 
1351
- _DEFAULT_REGION = 'us-east-1'
1352
1559
  _ACCESS_DENIED_MESSAGE = 'Access Denied'
1353
- _CUSTOM_ENDPOINT_REGIONS = [
1354
- 'ap-east-1', 'me-south-1', 'af-south-1', 'eu-south-1', 'eu-south-2',
1355
- 'ap-south-2', 'ap-southeast-3', 'ap-southeast-4', 'me-central-1',
1356
- 'il-central-1'
1357
- ]
1358
1560
 
1359
1561
  def __init__(self,
1360
1562
  name: str,
1361
1563
  source: str,
1362
- region: Optional[str] = _DEFAULT_REGION,
1564
+ region: Optional[str] = None,
1363
1565
  is_sky_managed: Optional[bool] = None,
1364
1566
  sync_on_reconstruction: bool = True,
1365
1567
  _bucket_sub_path: Optional[str] = None):
1568
+ # Initialize configuration first to get defaults
1569
+ self.config = self.__class__.get_config()
1570
+
1571
+ # Use provider's default region if not specified
1572
+ if region is None:
1573
+ region = self.config.default_region
1574
+
1575
+ # Initialize S3CompatibleStore specific attributes
1366
1576
  self.client: 'mypy_boto3_s3.Client'
1367
1577
  self.bucket: 'StorageHandle'
1368
- # TODO(romilb): This is purely a stopgap fix for
1369
- # https://github.com/skypilot-org/skypilot/issues/3405
1370
- # We should eventually make all opt-in regions also work for S3 by
1371
- # passing the right endpoint flags.
1372
- if region in self._CUSTOM_ENDPOINT_REGIONS:
1373
- logger.warning('AWS opt-in regions are not supported for S3. '
1374
- f'Falling back to default region '
1375
- f'{self._DEFAULT_REGION} for bucket {name!r}.')
1376
- region = self._DEFAULT_REGION
1578
+
1579
+ # Call parent constructor
1377
1580
  super().__init__(name, source, region, is_sky_managed,
1378
1581
  sync_on_reconstruction, _bucket_sub_path)
1379
1582
 
1583
+ @classmethod
1584
+ @abstractmethod
1585
+ def get_config(cls) -> S3CompatibleConfig:
1586
+ """Return the configuration for this S3-compatible provider."""
1587
+ pass
1588
+
1589
+ @classmethod
1590
+ def get_store_type(cls) -> str:
1591
+ """Return the store type identifier from configuration."""
1592
+ return cls.get_config().store_type
1593
+
1594
+ @property
1595
+ def provider_prefixes(self) -> set:
1596
+ """Dynamically get all provider prefixes from registered stores."""
1597
+ prefixes = set()
1598
+
1599
+ # Get prefixes from all registered S3-compatible stores
1600
+ for store_class in _S3_COMPATIBLE_STORES.values():
1601
+ config = store_class.get_config()
1602
+ prefixes.add(config.url_prefix)
1603
+
1604
+ # Add hardcoded prefixes for non-S3-compatible stores
1605
+ prefixes.update({
1606
+ 'gs://', # GCS
1607
+ 'https://', # Azure
1608
+ 'cos://', # IBM COS
1609
+ 'oci://', # OCI
1610
+ })
1611
+
1612
+ return prefixes
1613
+
1380
1614
  def _validate(self):
1381
1615
  if self.source is not None and isinstance(self.source, str):
1382
- if self.source.startswith('s3://'):
1383
- assert self.name == data_utils.split_s3_path(self.source)[0], (
1384
- 'S3 Bucket is specified as path, the name should be the'
1385
- ' same as S3 bucket.')
1616
+ if self.source.startswith(self.config.url_prefix):
1617
+ bucket_name, _ = self.config.split_path(self.source)
1618
+ assert self.name == bucket_name, (
1619
+ f'{self.config.store_type} Bucket is specified as path, '
1620
+ f'the name should be the same as {self.config.store_type} '
1621
+ f'bucket.')
1622
+ # Only verify if this is NOT the same store type as the source
1623
+ if self.__class__.get_store_type() != self.config.store_type:
1624
+ assert self.config.verify_bucket(self.name), (
1625
+ f'Source specified as {self.source},'
1626
+ f'a {self.config.store_type} '
1627
+ f'bucket. {self.config.store_type} Bucket should exist.'
1628
+ )
1386
1629
  elif self.source.startswith('gs://'):
1387
1630
  assert self.name == data_utils.split_gcs_path(self.source)[0], (
1388
1631
  'GCS Bucket is specified as path, the name should be '
1389
1632
  'the same as GCS bucket.')
1390
- assert data_utils.verify_gcs_bucket(self.name), (
1391
- f'Source specified as {self.source}, a GCS bucket. ',
1392
- 'GCS Bucket should exist.')
1633
+ if not isinstance(self, GcsStore):
1634
+ assert data_utils.verify_gcs_bucket(self.name), (
1635
+ f'Source specified as {self.source}, a GCS bucket. ',
1636
+ 'GCS Bucket should exist.')
1393
1637
  elif data_utils.is_az_container_endpoint(self.source):
1394
1638
  storage_account_name, container_name, _ = (
1395
1639
  data_utils.split_az_path(self.source))
1396
1640
  assert self.name == container_name, (
1397
1641
  'Azure bucket is specified as path, the name should be '
1398
1642
  'the same as Azure bucket.')
1399
- assert data_utils.verify_az_bucket(
1400
- storage_account_name, self.name), (
1401
- f'Source specified as {self.source}, an Azure bucket. '
1643
+ if not isinstance(self, AzureBlobStore):
1644
+ assert data_utils.verify_az_bucket(
1645
+ storage_account_name, self.name
1646
+ ), (f'Source specified as {self.source}, an Azure bucket. '
1402
1647
  'Azure bucket should exist.')
1403
- elif self.source.startswith('r2://'):
1404
- assert self.name == data_utils.split_r2_path(self.source)[0], (
1405
- 'R2 Bucket is specified as path, the name should be '
1406
- 'the same as R2 bucket.')
1407
- assert data_utils.verify_r2_bucket(self.name), (
1408
- f'Source specified as {self.source}, a R2 bucket. ',
1409
- 'R2 Bucket should exist.')
1410
- elif self.source.startswith('nebius://'):
1411
- assert self.name == data_utils.split_nebius_path(
1412
- self.source)[0], (
1413
- 'Nebius Object Storage is specified as path, the name '
1414
- 'should be the same as Nebius Object Storage bucket.')
1415
- assert data_utils.verify_nebius_bucket(self.name), (
1416
- f'Source specified as {self.source}, a Nebius Object '
1417
- f'Storage bucket. Nebius Object Storage Bucket should'
1418
- f' exist.')
1419
1648
  elif self.source.startswith('cos://'):
1420
1649
  assert self.name == data_utils.split_cos_path(self.source)[0], (
1421
1650
  'COS Bucket is specified as path, the name should be '
1422
1651
  'the same as COS bucket.')
1423
- assert data_utils.verify_ibm_cos_bucket(self.name), (
1424
- f'Source specified as {self.source}, a COS bucket. ',
1425
- 'COS Bucket should exist.')
1652
+ if not isinstance(self, IBMCosStore):
1653
+ assert data_utils.verify_ibm_cos_bucket(self.name), (
1654
+ f'Source specified as {self.source}, a COS bucket. ',
1655
+ 'COS Bucket should exist.')
1426
1656
  elif self.source.startswith('oci://'):
1427
1657
  raise NotImplementedError(
1428
- 'Moving data from OCI to S3 is currently not supported.')
1658
+ f'Moving data from OCI to {self.source} is ',
1659
+ 'currently not supported.')
1660
+
1429
1661
  # Validate name
1430
1662
  self.name = self.validate_name(self.name)
1431
1663
 
1432
1664
  # Check if the storage is enabled
1433
- if not _is_storage_cloud_enabled(str(clouds.AWS())):
1665
+ if not _is_storage_cloud_enabled(self.config.cloud_name):
1434
1666
  with ux_utils.print_exception_no_traceback():
1435
1667
  raise exceptions.ResourcesUnavailableError(
1436
- 'Storage \'store: s3\' specified, but ' \
1437
- 'AWS access is disabled. To fix, enable '\
1438
- 'AWS by running `sky check`. More info: '\
1439
- 'https://docs.skypilot.co/en/latest/getting-started/installation.html.' # pylint: disable=line-too-long
1440
- )
1668
+ f'Storage "store: {self.config.store_type.lower()}" '
1669
+ f'specified, but '
1670
+ f'{self.config.cloud_name} access is disabled. '
1671
+ 'To fix, enable '
1672
+ f'{self.config.cloud_name} by running `sky check`.')
1441
1673
 
1442
1674
  @classmethod
1443
1675
  def validate_name(cls, name: str) -> str:
@@ -1509,7 +1741,7 @@ class S3Store(AbstractStore):
1509
1741
  StorageBucketGetError: If fetching existing bucket fails
1510
1742
  StorageInitError: If general initialization fails.
1511
1743
  """
1512
- self.client = data_utils.create_s3_client(self.region)
1744
+ self.client = self.config.client_factory(self.region)
1513
1745
  self.bucket, is_new_bucket = self._get_bucket()
1514
1746
  if self.is_sky_managed is None:
1515
1747
  # If is_sky_managed is not specified, then this is a new storage
@@ -1531,16 +1763,10 @@ class S3Store(AbstractStore):
1531
1763
  if isinstance(self.source, list):
1532
1764
  self.batch_aws_rsync(self.source, create_dirs=True)
1533
1765
  elif self.source is not None:
1534
- if self.source.startswith('s3://'):
1535
- pass
1536
- elif self.source.startswith('gs://'):
1537
- self._transfer_to_s3()
1538
- elif self.source.startswith('r2://'):
1539
- self._transfer_to_s3()
1540
- elif self.source.startswith('oci://'):
1541
- self._transfer_to_s3()
1542
- elif self.source.startswith('nebius://'):
1543
- self._transfer_to_s3()
1766
+ if self._is_same_provider_source():
1767
+ pass # No transfer needed
1768
+ elif self._needs_cross_provider_transfer():
1769
+ self._transfer_from_other_provider()
1544
1770
  else:
1545
1771
  self.batch_aws_rsync([self.source])
1546
1772
  except exceptions.StorageUploadError:
@@ -1549,57 +1775,94 @@ class S3Store(AbstractStore):
1549
1775
  raise exceptions.StorageUploadError(
1550
1776
  f'Upload failed for store {self.name}') from e
1551
1777
 
1778
+ def _is_same_provider_source(self) -> bool:
1779
+ """Check if source is from the same provider."""
1780
+ return isinstance(self.source, str) and self.source.startswith(
1781
+ self.config.url_prefix)
1782
+
1783
+ def _needs_cross_provider_transfer(self) -> bool:
1784
+ """Check if source needs cross-provider transfer."""
1785
+ if not isinstance(self.source, str):
1786
+ return False
1787
+ return any(
1788
+ self.source.startswith(prefix) for prefix in self.provider_prefixes)
1789
+
1790
+ def _detect_source_type(self) -> str:
1791
+ """Detect the source provider type from URL."""
1792
+ if not isinstance(self.source, str):
1793
+ return 'unknown'
1794
+
1795
+ for provider in self.provider_prefixes:
1796
+ if self.source.startswith(provider):
1797
+ return provider[:-len('://')]
1798
+ return ''
1799
+
1800
+ def _transfer_from_other_provider(self):
1801
+ """Transfer data from another cloud to this S3-compatible store."""
1802
+ source_type = self._detect_source_type()
1803
+ target_type = self.config.store_type.lower()
1804
+
1805
+ if hasattr(data_transfer, f'{source_type}_to_{target_type}'):
1806
+ transfer_func = getattr(data_transfer,
1807
+ f'{source_type}_to_{target_type}')
1808
+ transfer_func(self.name, self.name)
1809
+ else:
1810
+ with ux_utils.print_exception_no_traceback():
1811
+ raise NotImplementedError(
1812
+ f'Transfer from {source_type} to {target_type} '
1813
+ 'is not yet supported.')
1814
+
1552
1815
  def delete(self) -> None:
1816
+ """Delete the bucket or sub-path."""
1553
1817
  if self._bucket_sub_path is not None and not self.is_sky_managed:
1554
1818
  return self._delete_sub_path()
1555
1819
 
1556
- deleted_by_skypilot = self._delete_s3_bucket(self.name)
1820
+ deleted_by_skypilot = self._delete_bucket(self.name)
1821
+ provider = self.config.store_type
1557
1822
  if deleted_by_skypilot:
1558
- msg_str = f'Deleted S3 bucket {self.name}.'
1823
+ msg_str = f'Deleted {provider} bucket {self.name}.'
1559
1824
  else:
1560
- msg_str = f'S3 bucket {self.name} may have been deleted ' \
1825
+ msg_str = f'{provider} bucket {self.name} may have been deleted ' \
1561
1826
  f'externally. Removing from local state.'
1562
- logger.info(f'{colorama.Fore.GREEN}{msg_str}'
1563
- f'{colorama.Style.RESET_ALL}')
1564
-
1565
- def _delete_sub_path(self) -> None:
1566
- assert self._bucket_sub_path is not None, 'bucket_sub_path is not set'
1567
- deleted_by_skypilot = self._delete_s3_bucket_sub_path(
1568
- self.name, self._bucket_sub_path)
1569
- if deleted_by_skypilot:
1570
- msg_str = f'Removed objects from S3 bucket ' \
1571
- f'{self.name}/{self._bucket_sub_path}.'
1572
- else:
1573
- msg_str = f'Failed to remove objects from S3 bucket ' \
1574
- f'{self.name}/{self._bucket_sub_path}.'
1575
- logger.info(f'{colorama.Fore.GREEN}{msg_str}'
1576
- f'{colorama.Style.RESET_ALL}')
1827
+ logger.info(f'{colorama.Fore.GREEN}{msg_str}{colorama.Style.RESET_ALL}')
1577
1828
 
1578
1829
  def get_handle(self) -> StorageHandle:
1579
- return aws.resource('s3').Bucket(self.name)
1830
+ """Get storage handle using provider's resource factory."""
1831
+ return self.config.resource_factory(self.name)
1580
1832
 
1581
- def batch_aws_rsync(self,
1582
- source_path_list: List[Path],
1583
- create_dirs: bool = False) -> None:
1584
- """Invokes aws s3 sync to batch upload a list of local paths to S3
1833
+ def _download_file(self, remote_path: str, local_path: str) -> None:
1834
+ """Download file using S3 API."""
1835
+ self.bucket.download_file(remote_path, local_path)
1585
1836
 
1586
- AWS Sync by default uses 10 threads to upload files to the bucket. To
1587
- increase parallelism, modify max_concurrent_requests in your aws config
1588
- file (Default path: ~/.aws/config).
1837
+ def mount_command(self, mount_path: str) -> str:
1838
+ """Get mount command using provider's mount factory."""
1839
+ if self.config.mount_cmd_factory is None:
1840
+ raise exceptions.NotSupportedError(
1841
+ f'Mounting not supported for {self.config.store_type}')
1589
1842
 
1590
- Since aws s3 sync does not support batch operations, we construct
1591
- multiple commands to be run in parallel.
1843
+ install_cmd = mounting_utils.get_s3_mount_install_cmd()
1844
+ mount_cmd = self.config.mount_cmd_factory(self.bucket.name, mount_path,
1845
+ self._bucket_sub_path)
1846
+ return mounting_utils.get_mounting_command(mount_path, install_cmd,
1847
+ mount_cmd)
1592
1848
 
1593
- Args:
1594
- source_path_list: List of paths to local files or directories
1595
- create_dirs: If the local_path is a directory and this is set to
1596
- False, the contents of the directory are directly uploaded to
1597
- root of the bucket. If the local_path is a directory and this is
1598
- set to True, the directory is created in the bucket root and
1599
- contents are uploaded to it.
1600
- """
1601
- sub_path = (f'/{self._bucket_sub_path}'
1602
- if self._bucket_sub_path else '')
1849
+ def mount_cached_command(self, mount_path: str) -> str:
1850
+ """Get cached mount command. Can be overridden by subclasses."""
1851
+ if self.config.mount_cached_cmd_factory is None:
1852
+ raise exceptions.NotSupportedError(
1853
+ f'Cached mounting not supported for {self.config.store_type}')
1854
+
1855
+ install_cmd = mounting_utils.get_rclone_install_cmd()
1856
+ mount_cmd = self.config.mount_cached_cmd_factory(
1857
+ self.bucket.name, mount_path, self._bucket_sub_path)
1858
+ return mounting_utils.get_mounting_command(mount_path, install_cmd,
1859
+ mount_cmd)
1860
+
1861
+ def batch_aws_rsync(self,
1862
+ source_path_list: List[Path],
1863
+ create_dirs: bool = False) -> None:
1864
+ """Generic S3-compatible rsync using AWS CLI."""
1865
+ sub_path = f'/{self._bucket_sub_path}' if self._bucket_sub_path else ''
1603
1866
 
1604
1867
  def get_file_sync_command(base_dir_path, file_names):
1605
1868
  includes = ' '.join([
@@ -1607,10 +1870,31 @@ class S3Store(AbstractStore):
1607
1870
  for file_name in file_names
1608
1871
  ])
1609
1872
  base_dir_path = shlex.quote(base_dir_path)
1610
- sync_command = ('aws s3 sync --no-follow-symlinks --exclude="*" '
1611
- f'{includes} {base_dir_path} '
1612
- f's3://{self.name}{sub_path}')
1613
- return sync_command
1873
+
1874
+ # Build AWS CLI command with provider-specific configuration
1875
+ cmd_parts = ['aws s3 sync --no-follow-symlinks --exclude="*"']
1876
+ cmd_parts.append(f'{includes} {base_dir_path}')
1877
+ cmd_parts.append(f's3://{self.name}{sub_path}')
1878
+
1879
+ # Add provider-specific arguments
1880
+ if self.config.get_endpoint_url:
1881
+ cmd_parts.append(
1882
+ f'--endpoint-url {self.config.get_endpoint_url()}')
1883
+ if self.config.aws_profile:
1884
+ cmd_parts.append(f'--profile={self.config.aws_profile}')
1885
+ if self.config.extra_cli_args:
1886
+ cmd_parts.extend(self.config.extra_cli_args)
1887
+
1888
+ # Handle credentials file via environment
1889
+ cmd = ' '.join(cmd_parts)
1890
+ if self.config.credentials_file:
1891
+ cmd = 'AWS_SHARED_CREDENTIALS_FILE=' + \
1892
+ f'{self.config.credentials_file} {cmd}'
1893
+ if self.config.config_file:
1894
+ cmd = 'AWS_CONFIG_FILE=' + \
1895
+ f'{self.config.config_file} {cmd}'
1896
+
1897
+ return cmd
1614
1898
 
1615
1899
  def get_dir_sync_command(src_dir_path, dest_dir_name):
1616
1900
  # we exclude .git directory from the sync
@@ -1618,11 +1902,11 @@ class S3Store(AbstractStore):
1618
1902
  excluded_list.append('.git/*')
1619
1903
 
1620
1904
  # Process exclusion patterns to make them work correctly with aws
1621
- # s3 sync
1905
+ # s3 sync - this logic is from S3Store2 to ensure compatibility
1622
1906
  processed_excludes = []
1623
1907
  for excluded_path in excluded_list:
1624
1908
  # Check if the path is a directory exclusion pattern
1625
- # For AWS S3 sync, directory patterns need to end with "/**" to
1909
+ # For AWS S3 sync, directory patterns need to end with "/*" to
1626
1910
  # exclude all contents
1627
1911
  if (excluded_path.endswith('/') or os.path.isdir(
1628
1912
  os.path.join(src_dir_path, excluded_path.rstrip('/')))):
@@ -1637,10 +1921,28 @@ class S3Store(AbstractStore):
1637
1921
  for file_name in processed_excludes
1638
1922
  ])
1639
1923
  src_dir_path = shlex.quote(src_dir_path)
1640
- sync_command = (f'aws s3 sync --no-follow-symlinks {excludes} '
1641
- f'{src_dir_path} '
1642
- f's3://{self.name}{sub_path}/{dest_dir_name}')
1643
- return sync_command
1924
+
1925
+ cmd_parts = ['aws s3 sync --no-follow-symlinks']
1926
+ cmd_parts.append(f'{excludes} {src_dir_path}')
1927
+ cmd_parts.append(f's3://{self.name}{sub_path}/{dest_dir_name}')
1928
+
1929
+ if self.config.get_endpoint_url:
1930
+ cmd_parts.append(
1931
+ f'--endpoint-url {self.config.get_endpoint_url()}')
1932
+ if self.config.aws_profile:
1933
+ cmd_parts.append(f'--profile={self.config.aws_profile}')
1934
+ if self.config.extra_cli_args:
1935
+ cmd_parts.extend(self.config.extra_cli_args)
1936
+
1937
+ cmd = ' '.join(cmd_parts)
1938
+ if self.config.credentials_file:
1939
+ cmd = 'AWS_SHARED_CREDENTIALS_FILE=' + \
1940
+ f'{self.config.credentials_file} {cmd}'
1941
+ if self.config.config_file:
1942
+ cmd = 'AWS_CONFIG_FILE=' + \
1943
+ f'{self.config.config_file} {cmd}'
1944
+
1945
+ return cmd
1644
1946
 
1645
1947
  # Generate message for upload
1646
1948
  if len(source_path_list) > 1:
@@ -1648,9 +1950,12 @@ class S3Store(AbstractStore):
1648
1950
  else:
1649
1951
  source_message = source_path_list[0]
1650
1952
 
1953
+ provider_prefix = self.config.url_prefix
1651
1954
  log_path = sky_logging.generate_tmp_logging_file_path(
1652
1955
  _STORAGE_LOG_FILE_NAME)
1653
- sync_path = f'{source_message} -> s3://{self.name}{sub_path}/'
1956
+ sync_path = (f'{source_message} -> '
1957
+ f'{provider_prefix}{self.name}{sub_path}/')
1958
+
1654
1959
  with rich_utils.safe_status(
1655
1960
  ux_utils.spinner_message(f'Syncing {sync_path}',
1656
1961
  log_path=log_path)):
@@ -1660,150 +1965,81 @@ class S3Store(AbstractStore):
1660
1965
  get_dir_sync_command,
1661
1966
  log_path,
1662
1967
  self.name,
1663
- self._ACCESS_DENIED_MESSAGE,
1968
+ self.config.access_denied_message,
1664
1969
  create_dirs=create_dirs,
1665
1970
  max_concurrent_uploads=_MAX_CONCURRENT_UPLOADS)
1971
+
1666
1972
  logger.info(
1667
1973
  ux_utils.finishing_message(f'Storage synced: {sync_path}',
1668
1974
  log_path))
1669
1975
 
1670
- def _transfer_to_s3(self) -> None:
1671
- assert isinstance(self.source, str), self.source
1672
- if self.source.startswith('gs://'):
1673
- data_transfer.gcs_to_s3(self.name, self.name)
1674
- elif self.source.startswith('r2://'):
1675
- data_transfer.r2_to_s3(self.name, self.name)
1676
-
1677
1976
  def _get_bucket(self) -> Tuple[StorageHandle, bool]:
1678
- """Obtains the S3 bucket.
1679
-
1680
- If the bucket exists, this method will return the bucket.
1681
- If the bucket does not exist, there are three cases:
1682
- 1) Raise an error if the bucket source starts with s3://
1683
- 2) Return None if bucket has been externally deleted and
1684
- sync_on_reconstruction is False
1685
- 3) Create and return a new bucket otherwise
1686
-
1687
- Raises:
1688
- StorageSpecError: If externally created bucket is attempted to be
1689
- mounted without specifying storage source.
1690
- StorageBucketCreateError: If creating the bucket fails
1691
- StorageBucketGetError: If fetching a bucket fails
1692
- StorageExternalDeletionError: If externally deleted storage is
1693
- attempted to be fetched while reconstructing the storage for
1694
- 'sky storage delete' or 'sky start'
1695
- """
1696
- s3 = aws.resource('s3')
1697
- bucket = s3.Bucket(self.name)
1977
+ """Get or create bucket using S3 API."""
1978
+ bucket = self.config.resource_factory(self.name)
1698
1979
 
1699
1980
  try:
1700
1981
  # Try Public bucket case.
1701
- # This line does not error out if the bucket is an external public
1702
- # bucket or if it is a user's bucket that is publicly
1703
- # accessible.
1704
1982
  self.client.head_bucket(Bucket=self.name)
1705
1983
  self._validate_existing_bucket()
1706
1984
  return bucket, False
1707
1985
  except aws.botocore_exceptions().ClientError as e:
1708
1986
  error_code = e.response['Error']['Code']
1709
- # AccessDenied error for buckets that are private and not owned by
1710
- # user.
1711
1987
  if error_code == '403':
1712
- command = f'aws s3 ls {self.name}'
1988
+ command = f'aws s3 ls s3://{self.name}'
1989
+ if self.config.aws_profile:
1990
+ command += f' --profile={self.config.aws_profile}'
1991
+ if self.config.get_endpoint_url:
1992
+ command += f' --endpoint-url '\
1993
+ f'{self.config.get_endpoint_url()}'
1994
+ if self.config.credentials_file:
1995
+ command = (f'AWS_SHARED_CREDENTIALS_FILE='
1996
+ f'{self.config.credentials_file} {command}')
1997
+ if self.config.config_file:
1998
+ command = 'AWS_CONFIG_FILE=' + \
1999
+ f'{self.config.config_file} {command}'
1713
2000
  with ux_utils.print_exception_no_traceback():
1714
2001
  raise exceptions.StorageBucketGetError(
1715
2002
  _BUCKET_FAIL_TO_CONNECT_MESSAGE.format(name=self.name) +
1716
2003
  f' To debug, consider running `{command}`.') from e
1717
2004
 
1718
- if isinstance(self.source, str) and self.source.startswith('s3://'):
2005
+ if isinstance(self.source, str) and self.source.startswith(
2006
+ self.config.url_prefix):
1719
2007
  with ux_utils.print_exception_no_traceback():
1720
2008
  raise exceptions.StorageBucketGetError(
1721
2009
  'Attempted to use a non-existent bucket as a source: '
1722
- f'{self.source}. Consider using `aws s3 ls '
1723
- f'{self.source}` to debug.')
2010
+ f'{self.source}.')
1724
2011
 
1725
- # If bucket cannot be found in both private and public settings,
1726
- # the bucket is to be created by Sky. However, creation is skipped if
1727
- # Store object is being reconstructed for deletion or re-mount with
1728
- # sky start, and error is raised instead.
2012
+ # If bucket cannot be found, create it if needed
1729
2013
  if self.sync_on_reconstruction:
1730
- bucket = self._create_s3_bucket(self.name, self.region)
2014
+ bucket = self._create_bucket(self.name)
1731
2015
  return bucket, True
1732
2016
  else:
1733
- # Raised when Storage object is reconstructed for sky storage
1734
- # delete or to re-mount Storages with sky start but the storage
1735
- # is already removed externally.
1736
2017
  raise exceptions.StorageExternalDeletionError(
1737
2018
  'Attempted to fetch a non-existent bucket: '
1738
2019
  f'{self.name}')
1739
2020
 
1740
- def _download_file(self, remote_path: str, local_path: str) -> None:
1741
- """Downloads file from remote to local on s3 bucket
1742
- using the boto3 API
1743
-
1744
- Args:
1745
- remote_path: str; Remote path on S3 bucket
1746
- local_path: str; Local path on user's device
1747
- """
1748
- self.bucket.download_file(remote_path, local_path)
1749
-
1750
- def mount_command(self, mount_path: str) -> str:
1751
- """Returns the command to mount the bucket to the mount_path.
1752
-
1753
- Uses goofys to mount the bucket.
1754
-
1755
- Args:
1756
- mount_path: str; Path to mount the bucket to.
1757
- """
1758
- install_cmd = mounting_utils.get_s3_mount_install_cmd()
1759
- mount_cmd = mounting_utils.get_s3_mount_cmd(self.bucket.name,
1760
- mount_path,
1761
- self._bucket_sub_path)
1762
- return mounting_utils.get_mounting_command(mount_path, install_cmd,
1763
- mount_cmd)
1764
-
1765
- def mount_cached_command(self, mount_path: str) -> str:
1766
- install_cmd = mounting_utils.get_rclone_install_cmd()
1767
- rclone_profile_name = (
1768
- data_utils.Rclone.RcloneStores.S3.get_profile_name(self.name))
1769
- rclone_config = data_utils.Rclone.RcloneStores.S3.get_config(
1770
- rclone_profile_name=rclone_profile_name)
1771
- mount_cached_cmd = mounting_utils.get_mount_cached_cmd(
1772
- rclone_config, rclone_profile_name, self.bucket.name, mount_path)
1773
- return mounting_utils.get_mounting_command(mount_path, install_cmd,
1774
- mount_cached_cmd)
1775
-
1776
- def _create_s3_bucket(self,
1777
- bucket_name: str,
1778
- region=_DEFAULT_REGION) -> StorageHandle:
1779
- """Creates S3 bucket with specific name in specific region
1780
-
1781
- Args:
1782
- bucket_name: str; Name of bucket
1783
- region: str; Region name, e.g. us-west-1, us-east-2
1784
- Raises:
1785
- StorageBucketCreateError: If bucket creation fails.
1786
- """
1787
- s3_client = self.client
2021
+ def _create_bucket(self, bucket_name: str) -> StorageHandle:
2022
+ """Create bucket using S3 API."""
1788
2023
  try:
1789
2024
  create_bucket_config: Dict[str, Any] = {'Bucket': bucket_name}
1790
- # If default us-east-1 region of create_bucket API is used,
1791
- # the LocationConstraint must not be specified.
1792
- # Reference: https://stackoverflow.com/a/51912090
1793
- if region is not None and region != 'us-east-1':
2025
+ if self.region is not None and self.region != 'us-east-1':
1794
2026
  create_bucket_config['CreateBucketConfiguration'] = {
1795
- 'LocationConstraint': region
2027
+ 'LocationConstraint': self.region
1796
2028
  }
1797
- s3_client.create_bucket(**create_bucket_config)
2029
+ self.client.create_bucket(**create_bucket_config)
1798
2030
  logger.info(
1799
2031
  f' {colorama.Style.DIM}Created S3 bucket {bucket_name!r} in '
1800
- f'{region or "us-east-1"}{colorama.Style.RESET_ALL}')
2032
+ f'{self.region or "us-east-1"}{colorama.Style.RESET_ALL}')
1801
2033
 
1802
2034
  # Add AWS tags configured in config.yaml to the bucket.
1803
2035
  # This is useful for cost tracking and external cleanup.
1804
- bucket_tags = skypilot_config.get_nested(('aws', 'labels'), {})
2036
+ bucket_tags = skypilot_config.get_effective_region_config(
2037
+ cloud=self.config.cloud_name,
2038
+ region=None,
2039
+ keys=('labels',),
2040
+ default_value={})
1805
2041
  if bucket_tags:
1806
- s3_client.put_bucket_tagging(
2042
+ self.client.put_bucket_tagging(
1807
2043
  Bucket=bucket_name,
1808
2044
  Tagging={
1809
2045
  'TagSet': [{
@@ -1811,22 +2047,46 @@ class S3Store(AbstractStore):
1811
2047
  'Value': v
1812
2048
  } for k, v in bucket_tags.items()]
1813
2049
  })
1814
-
1815
2050
  except aws.botocore_exceptions().ClientError as e:
1816
2051
  with ux_utils.print_exception_no_traceback():
1817
2052
  raise exceptions.StorageBucketCreateError(
1818
- f'Attempted to create a bucket {self.name} but failed.'
2053
+ f'Attempted to create S3 bucket {self.name} but failed.'
1819
2054
  ) from e
1820
- return aws.resource('s3').Bucket(bucket_name)
2055
+ return self.config.resource_factory(bucket_name)
2056
+
2057
+ def _delete_bucket(self, bucket_name: str) -> bool:
2058
+ """Delete bucket using AWS CLI."""
2059
+ cmd_parts = [f'aws s3 rb s3://{bucket_name} --force']
2060
+
2061
+ if self.config.aws_profile:
2062
+ cmd_parts.append(f'--profile={self.config.aws_profile}')
2063
+ if self.config.get_endpoint_url:
2064
+ cmd_parts.append(f'--endpoint-url {self.config.get_endpoint_url()}')
2065
+
2066
+ remove_command = ' '.join(cmd_parts)
2067
+
2068
+ if self.config.credentials_file:
2069
+ remove_command = (f'AWS_SHARED_CREDENTIALS_FILE='
2070
+ f'{self.config.credentials_file} '
2071
+ f'{remove_command}')
2072
+ if self.config.config_file:
2073
+ remove_command = 'AWS_CONFIG_FILE=' + \
2074
+ f'{self.config.config_file} {remove_command}'
2075
+ return self._execute_remove_command(
2076
+ remove_command, bucket_name,
2077
+ f'Deleting {self.config.store_type} bucket {bucket_name}',
2078
+ (f'Failed to delete {self.config.store_type} bucket '
2079
+ f'{bucket_name}.'))
1821
2080
 
1822
- def _execute_s3_remove_command(self, command: str, bucket_name: str,
1823
- hint_operating: str,
1824
- hint_failed: str) -> bool:
2081
+ def _execute_remove_command(self, command: str, bucket_name: str,
2082
+ hint_operating: str, hint_failed: str) -> bool:
2083
+ """Execute bucket removal command."""
1825
2084
  try:
1826
2085
  with rich_utils.safe_status(
1827
2086
  ux_utils.spinner_message(hint_operating)):
1828
- subprocess.check_output(command.split(' '),
1829
- stderr=subprocess.STDOUT)
2087
+ subprocess.check_output(command,
2088
+ stderr=subprocess.STDOUT,
2089
+ shell=True)
1830
2090
  except subprocess.CalledProcessError as e:
1831
2091
  if 'NoSuchBucket' in e.output.decode('utf-8'):
1832
2092
  logger.debug(
@@ -1840,53 +2100,50 @@ class S3Store(AbstractStore):
1840
2100
  f'Detailed error: {e.output}')
1841
2101
  return True
1842
2102
 
1843
- def _delete_s3_bucket(self, bucket_name: str) -> bool:
1844
- """Deletes S3 bucket, including all objects in bucket
2103
+ def _delete_sub_path(self) -> None:
2104
+ """Remove objects from the sub path in the bucket."""
2105
+ assert self._bucket_sub_path is not None, 'bucket_sub_path is not set'
2106
+ deleted_by_skypilot = self._delete_bucket_sub_path(
2107
+ self.name, self._bucket_sub_path)
2108
+ provider = self.config.store_type
2109
+ if deleted_by_skypilot:
2110
+ msg_str = (f'Removed objects from {provider} bucket '
2111
+ f'{self.name}/{self._bucket_sub_path}.')
2112
+ else:
2113
+ msg_str = (f'Failed to remove objects from {provider} bucket '
2114
+ f'{self.name}/{self._bucket_sub_path}.')
2115
+ logger.info(f'{colorama.Fore.GREEN}{msg_str}{colorama.Style.RESET_ALL}')
2116
+
2117
+ def _delete_bucket_sub_path(self, bucket_name: str, sub_path: str) -> bool:
2118
+ """Delete objects in the sub path from the bucket."""
2119
+ cmd_parts = [f'aws s3 rm s3://{bucket_name}/{sub_path}/ --recursive']
2120
+
2121
+ if self.config.aws_profile:
2122
+ cmd_parts.append(f'--profile={self.config.aws_profile}')
2123
+ if self.config.get_endpoint_url:
2124
+ cmd_parts.append(f'--endpoint-url {self.config.get_endpoint_url()}')
2125
+
2126
+ remove_command = ' '.join(cmd_parts)
2127
+
2128
+ if self.config.credentials_file:
2129
+ remove_command = (f'AWS_SHARED_CREDENTIALS_FILE='
2130
+ f'{self.config.credentials_file} '
2131
+ f'{remove_command}')
2132
+ if self.config.config_file:
2133
+ remove_command = 'AWS_CONFIG_FILE=' + \
2134
+ f'{self.config.config_file} {remove_command}'
2135
+ return self._execute_remove_command(
2136
+ remove_command, bucket_name,
2137
+ (f'Removing objects from {self.config.store_type} bucket '
2138
+ f'{bucket_name}/{sub_path}'),
2139
+ (f'Failed to remove objects from {self.config.store_type} '
2140
+ f'bucket {bucket_name}/{sub_path}.'))
1845
2141
 
1846
- Args:
1847
- bucket_name: str; Name of bucket
1848
2142
 
1849
- Returns:
1850
- bool; True if bucket was deleted, False if it was deleted externally.
1851
-
1852
- Raises:
1853
- StorageBucketDeleteError: If deleting the bucket fails.
1854
- """
1855
- # Deleting objects is very slow programatically
1856
- # (i.e. bucket.objects.all().delete() is slow).
1857
- # In addition, standard delete operations (i.e. via `aws s3 rm`)
1858
- # are slow, since AWS puts deletion markers.
1859
- # https://stackoverflow.com/questions/49239351/why-is-it-so-much-slower-to-delete-objects-in-aws-s3-than-it-is-to-create-them
1860
- # The fastest way to delete is to run `aws s3 rb --force`,
1861
- # which removes the bucket by force.
1862
- remove_command = f'aws s3 rb s3://{bucket_name} --force'
1863
- success = self._execute_s3_remove_command(
1864
- remove_command, bucket_name,
1865
- f'Deleting S3 bucket [green]{bucket_name}[/]',
1866
- f'Failed to delete S3 bucket {bucket_name}.')
1867
- if not success:
1868
- return False
1869
-
1870
- # Wait until bucket deletion propagates on AWS servers
1871
- while data_utils.verify_s3_bucket(bucket_name):
1872
- time.sleep(0.1)
1873
- return True
1874
-
1875
- def _delete_s3_bucket_sub_path(self, bucket_name: str,
1876
- sub_path: str) -> bool:
1877
- """Deletes the sub path from the bucket."""
1878
- remove_command = f'aws s3 rm s3://{bucket_name}/{sub_path}/ --recursive'
1879
- return self._execute_s3_remove_command(
1880
- remove_command, bucket_name, f'Removing objects from S3 bucket '
1881
- f'[green]{bucket_name}/{sub_path}[/]',
1882
- f'Failed to remove objects from S3 bucket {bucket_name}/{sub_path}.'
1883
- )
1884
-
1885
-
1886
- class GcsStore(AbstractStore):
1887
- """GcsStore inherits from Storage Object and represents the backend
1888
- for GCS buckets.
1889
- """
2143
+ class GcsStore(AbstractStore):
2144
+ """GcsStore inherits from Storage Object and represents the backend
2145
+ for GCS buckets.
2146
+ """
1890
2147
 
1891
2148
  _ACCESS_DENIED_MESSAGE = 'AccessDeniedException'
1892
2149
 
@@ -1951,6 +2208,10 @@ class GcsStore(AbstractStore):
1951
2208
  elif self.source.startswith('oci://'):
1952
2209
  raise NotImplementedError(
1953
2210
  'Moving data from OCI to GCS is currently not supported.')
2211
+ elif self.source.startswith('cw://'):
2212
+ raise NotImplementedError(
2213
+ 'Moving data from CoreWeave Object Storage to GCS is'
2214
+ ' currently not supported.')
1954
2215
  # Validate name
1955
2216
  self.name = self.validate_name(self.name)
1956
2217
  # Check if the storage is enabled
@@ -2337,7 +2598,7 @@ class GcsStore(AbstractStore):
2337
2598
  except Exception as e: # pylint: disable=broad-except
2338
2599
  with ux_utils.print_exception_no_traceback():
2339
2600
  raise exceptions.StorageBucketCreateError(
2340
- f'Attempted to create a bucket {self.name} but failed.'
2601
+ f'Attempted to create GCS bucket {self.name} but failed.'
2341
2602
  ) from e
2342
2603
  logger.info(
2343
2604
  f' {colorama.Style.DIM}Created GCS bucket {new_bucket.name!r} in '
@@ -2494,7 +2755,11 @@ class AzureBlobStore(AbstractStore):
2494
2755
  name=override_args.get('name', metadata.name),
2495
2756
  storage_account_name=override_args.get(
2496
2757
  'storage_account', metadata.storage_account_name),
2497
- source=override_args.get('source', metadata.source),
2758
+ # TODO(cooperc): fix the types for mypy 1.16
2759
+ # Azure store expects a string path; metadata.source may be a Path
2760
+ # or List[Path].
2761
+ source=override_args.get('source',
2762
+ metadata.source), # type: ignore[arg-type]
2498
2763
  region=override_args.get('region', metadata.region),
2499
2764
  is_sky_managed=override_args.get('is_sky_managed',
2500
2765
  metadata.is_sky_managed),
@@ -2562,6 +2827,10 @@ class AzureBlobStore(AbstractStore):
2562
2827
  elif self.source.startswith('oci://'):
2563
2828
  raise NotImplementedError(
2564
2829
  'Moving data from OCI to AZureBlob is not supported.')
2830
+ elif self.source.startswith('cw://'):
2831
+ raise NotImplementedError(
2832
+ 'Moving data from CoreWeave Object Storage to AzureBlob is'
2833
+ ' currently not supported.')
2565
2834
  # Validate name
2566
2835
  self.name = self.validate_name(self.name)
2567
2836
 
@@ -2764,8 +3033,12 @@ class AzureBlobStore(AbstractStore):
2764
3033
  # Creates new resource group and storage account or use the
2765
3034
  # storage_account provided by the user through config.yaml
2766
3035
  else:
2767
- config_storage_account = skypilot_config.get_nested(
2768
- ('azure', 'storage_account'), None)
3036
+ config_storage_account = (
3037
+ skypilot_config.get_effective_region_config(
3038
+ cloud='azure',
3039
+ region=None,
3040
+ keys=('storage_account',),
3041
+ default_value=None))
2769
3042
  if config_storage_account is not None:
2770
3043
  # using user provided storage account from config.yaml
2771
3044
  storage_account_name = config_storage_account
@@ -2929,6 +3202,8 @@ class AzureBlobStore(AbstractStore):
2929
3202
  raise NotImplementedError(error_message.format('OCI'))
2930
3203
  elif self.source.startswith('nebius://'):
2931
3204
  raise NotImplementedError(error_message.format('NEBIUS'))
3205
+ elif self.source.startswith('cw://'):
3206
+ raise NotImplementedError(error_message.format('CoreWeave'))
2932
3207
  else:
2933
3208
  self.batch_az_blob_sync([self.source])
2934
3209
  except exceptions.StorageUploadError:
@@ -3256,7 +3531,7 @@ class AzureBlobStore(AbstractStore):
3256
3531
  with rich_utils.safe_status(
3257
3532
  ux_utils.spinner_message(
3258
3533
  f'Deleting Azure container {container_name}')):
3259
- # Check for the existance of the container before deletion.
3534
+ # Check for the existence of the container before deletion.
3260
3535
  self.storage_client.blob_containers.get(
3261
3536
  self.resource_group_name,
3262
3537
  self.storage_account_name,
@@ -3281,22 +3556,23 @@ class AzureBlobStore(AbstractStore):
3281
3556
  return True
3282
3557
 
3283
3558
 
3284
- class R2Store(AbstractStore):
3285
- """R2Store inherits from S3Store Object and represents the backend
3286
- for R2 buckets.
3559
+ class IBMCosStore(AbstractStore):
3560
+ """IBMCosStore inherits from Storage Object and represents the backend
3561
+ for COS buckets.
3287
3562
  """
3288
-
3289
3563
  _ACCESS_DENIED_MESSAGE = 'Access Denied'
3290
3564
 
3291
3565
  def __init__(self,
3292
3566
  name: str,
3293
3567
  source: str,
3294
- region: Optional[str] = 'auto',
3568
+ region: Optional[str] = 'us-east',
3295
3569
  is_sky_managed: Optional[bool] = None,
3296
- sync_on_reconstruction: Optional[bool] = True,
3570
+ sync_on_reconstruction: bool = True,
3297
3571
  _bucket_sub_path: Optional[str] = None):
3298
- self.client: 'mypy_boto3_s3.Client'
3572
+ self.client: 'storage.Client'
3299
3573
  self.bucket: 'StorageHandle'
3574
+ self.rclone_profile_name = (
3575
+ data_utils.Rclone.RcloneStores.IBM.get_profile_name(self.name))
3300
3576
  super().__init__(name, source, region, is_sky_managed,
3301
3577
  sync_on_reconstruction, _bucket_sub_path)
3302
3578
 
@@ -3330,6 +3606,9 @@ class R2Store(AbstractStore):
3330
3606
  assert self.name == data_utils.split_r2_path(self.source)[0], (
3331
3607
  'R2 Bucket is specified as path, the name should be '
3332
3608
  'the same as R2 bucket.')
3609
+ assert data_utils.verify_r2_bucket(self.name), (
3610
+ f'Source specified as {self.source}, a R2 bucket. ',
3611
+ 'R2 Bucket should exist.')
3333
3612
  elif self.source.startswith('nebius://'):
3334
3613
  assert self.name == data_utils.split_nebius_path(
3335
3614
  self.source)[0], (
@@ -3341,29 +3620,63 @@ class R2Store(AbstractStore):
3341
3620
  f'exist.')
3342
3621
  elif self.source.startswith('cos://'):
3343
3622
  assert self.name == data_utils.split_cos_path(self.source)[0], (
3344
- 'IBM COS Bucket is specified as path, the name should be '
3623
+ 'COS Bucket is specified as path, the name should be '
3345
3624
  'the same as COS bucket.')
3346
- assert data_utils.verify_ibm_cos_bucket(self.name), (
3347
- f'Source specified as {self.source}, a COS bucket. ',
3348
- 'COS Bucket should exist.')
3349
- elif self.source.startswith('oci://'):
3625
+ elif self.source.startswith('cw://'):
3350
3626
  raise NotImplementedError(
3351
- 'Moving data from OCI to R2 is currently not supported.')
3352
-
3627
+ 'Moving data from CoreWeave Object Storage to COS is '
3628
+ 'currently not supported.')
3353
3629
  # Validate name
3354
- self.name = S3Store.validate_name(self.name)
3355
- # Check if the storage is enabled
3356
- if not _is_storage_cloud_enabled(cloudflare.NAME):
3630
+ self.name = IBMCosStore.validate_name(self.name)
3631
+
3632
+ @classmethod
3633
+ def validate_name(cls, name: str) -> str:
3634
+ """Validates the name of a COS bucket.
3635
+
3636
+ Rules source: https://ibm.github.io/ibm-cos-sdk-java/com/ibm/cloud/objectstorage/services/s3/model/Bucket.html # pylint: disable=line-too-long
3637
+ """
3638
+
3639
+ def _raise_no_traceback_name_error(err_str):
3357
3640
  with ux_utils.print_exception_no_traceback():
3358
- raise exceptions.ResourcesUnavailableError(
3359
- 'Storage \'store: r2\' specified, but ' \
3360
- 'Cloudflare R2 access is disabled. To fix, '\
3361
- 'enable Cloudflare R2 by running `sky check`. '\
3362
- 'More info: https://docs.skypilot.co/en/latest/getting-started/installation.html.' # pylint: disable=line-too-long
3363
- )
3641
+ raise exceptions.StorageNameError(err_str)
3642
+
3643
+ if name is not None and isinstance(name, str):
3644
+ if not 3 <= len(name) <= 63:
3645
+ _raise_no_traceback_name_error(
3646
+ f'Invalid store name: {name} must be between 3 (min) '
3647
+ 'and 63 (max) characters long.')
3648
+
3649
+ # Check for valid characters and start/end with a letter or number
3650
+ pattern = r'^[a-z0-9][-a-z0-9.]*[a-z0-9]$'
3651
+ if not re.match(pattern, name):
3652
+ _raise_no_traceback_name_error(
3653
+ f'Invalid store name: {name} can consist only of '
3654
+ 'lowercase letters, numbers, dots (.), and dashes (-). '
3655
+ 'It must begin and end with a letter or number.')
3656
+
3657
+ # Check for two adjacent periods or dashes
3658
+ if any(substring in name for substring in ['..', '--']):
3659
+ _raise_no_traceback_name_error(
3660
+ f'Invalid store name: {name} must not contain '
3661
+ 'two adjacent periods/dashes')
3662
+
3663
+ # Check for IP address format
3664
+ ip_pattern = r'^(?:\d{1,3}\.){3}\d{1,3}$'
3665
+ if re.match(ip_pattern, name):
3666
+ _raise_no_traceback_name_error(
3667
+ f'Invalid store name: {name} must not be formatted as '
3668
+ 'an IP address (for example, 192.168.5.4).')
3669
+
3670
+ if any(substring in name for substring in ['.-', '-.']):
3671
+ _raise_no_traceback_name_error(
3672
+ f'Invalid store name: {name} must '
3673
+ 'not allow substrings: ".-", "-." .')
3674
+ else:
3675
+ _raise_no_traceback_name_error('Store name must be specified.')
3676
+ return name
3364
3677
 
3365
3678
  def initialize(self):
3366
- """Initializes the R2 store object on the cloud.
3679
+ """Initializes the cos store object on the cloud.
3367
3680
 
3368
3681
  Initialization involves fetching bucket if exists, or creating it if
3369
3682
  it does not.
@@ -3373,7 +3686,8 @@ class R2Store(AbstractStore):
3373
3686
  StorageBucketGetError: If fetching existing bucket fails
3374
3687
  StorageInitError: If general initialization fails.
3375
3688
  """
3376
- self.client = data_utils.create_r2_client(self.region)
3689
+ self.client = ibm.get_cos_client(self.region)
3690
+ self.s3_resource = ibm.get_cos_resource(self.region)
3377
3691
  self.bucket, is_new_bucket = self._get_bucket()
3378
3692
  if self.is_sky_managed is None:
3379
3693
  # If is_sky_managed is not specified, then this is a new storage
@@ -3383,7 +3697,7 @@ class R2Store(AbstractStore):
3383
3697
  self.is_sky_managed = is_new_bucket
3384
3698
 
3385
3699
  def upload(self):
3386
- """Uploads source to store bucket.
3700
+ """Uploads files from local machine to bucket.
3387
3701
 
3388
3702
  Upload must be called by the Storage handler - it is not called on
3389
3703
  Store initialization.
@@ -3393,22 +3707,29 @@ class R2Store(AbstractStore):
3393
3707
  """
3394
3708
  try:
3395
3709
  if isinstance(self.source, list):
3396
- self.batch_aws_rsync(self.source, create_dirs=True)
3710
+ self.batch_ibm_rsync(self.source, create_dirs=True)
3397
3711
  elif self.source is not None:
3398
- if self.source.startswith('s3://'):
3399
- self._transfer_to_r2()
3400
- elif self.source.startswith('gs://'):
3401
- self._transfer_to_r2()
3402
- elif self.source.startswith('r2://'):
3712
+ if self.source.startswith('cos://'):
3713
+ # cos bucket used as a dest, can't be used as source.
3403
3714
  pass
3404
- elif self.source.startswith('oci://'):
3405
- self._transfer_to_r2()
3715
+ elif self.source.startswith('s3://'):
3716
+ raise Exception('IBM COS currently not supporting'
3717
+ 'data transfers between COS and S3')
3406
3718
  elif self.source.startswith('nebius://'):
3407
- self._transfer_to_r2()
3719
+ raise Exception('IBM COS currently not supporting'
3720
+ 'data transfers between COS and Nebius')
3721
+ elif self.source.startswith('gs://'):
3722
+ raise Exception('IBM COS currently not supporting'
3723
+ 'data transfers between COS and GS')
3724
+ elif self.source.startswith('r2://'):
3725
+ raise Exception('IBM COS currently not supporting'
3726
+ 'data transfers between COS and r2')
3727
+ elif self.source.startswith('cw://'):
3728
+ raise Exception('IBM COS currently not supporting'
3729
+ 'data transfers between COS and CoreWeave')
3408
3730
  else:
3409
- self.batch_aws_rsync([self.source])
3410
- except exceptions.StorageUploadError:
3411
- raise
3731
+ self.batch_ibm_rsync([self.source])
3732
+
3412
3733
  except Exception as e:
3413
3734
  raise exceptions.StorageUploadError(
3414
3735
  f'Upload failed for store {self.name}') from e
@@ -3417,41 +3738,28 @@ class R2Store(AbstractStore):
3417
3738
  if self._bucket_sub_path is not None and not self.is_sky_managed:
3418
3739
  return self._delete_sub_path()
3419
3740
 
3420
- deleted_by_skypilot = self._delete_r2_bucket(self.name)
3421
- if deleted_by_skypilot:
3422
- msg_str = f'Deleted R2 bucket {self.name}.'
3423
- else:
3424
- msg_str = f'R2 bucket {self.name} may have been deleted ' \
3425
- f'externally. Removing from local state.'
3426
- logger.info(f'{colorama.Fore.GREEN}{msg_str}'
3741
+ self._delete_cos_bucket()
3742
+ logger.info(f'{colorama.Fore.GREEN}Deleted COS bucket {self.name}.'
3427
3743
  f'{colorama.Style.RESET_ALL}')
3428
3744
 
3429
3745
  def _delete_sub_path(self) -> None:
3430
3746
  assert self._bucket_sub_path is not None, 'bucket_sub_path is not set'
3431
- deleted_by_skypilot = self._delete_r2_bucket_sub_path(
3432
- self.name, self._bucket_sub_path)
3433
- if deleted_by_skypilot:
3434
- msg_str = f'Removed objects from R2 bucket ' \
3435
- f'{self.name}/{self._bucket_sub_path}.'
3436
- else:
3437
- msg_str = f'Failed to remove objects from R2 bucket ' \
3438
- f'{self.name}/{self._bucket_sub_path}.'
3439
- logger.info(f'{colorama.Fore.GREEN}{msg_str}'
3440
- f'{colorama.Style.RESET_ALL}')
3747
+ bucket = self.s3_resource.Bucket(self.name)
3748
+ try:
3749
+ self._delete_cos_bucket_objects(bucket, self._bucket_sub_path + '/')
3750
+ except ibm.ibm_botocore.exceptions.ClientError as e:
3751
+ if e.__class__.__name__ == 'NoSuchBucket':
3752
+ logger.debug('bucket already removed')
3441
3753
 
3442
3754
  def get_handle(self) -> StorageHandle:
3443
- return cloudflare.resource('s3').Bucket(self.name)
3755
+ return self.s3_resource.Bucket(self.name)
3444
3756
 
3445
- def batch_aws_rsync(self,
3757
+ def batch_ibm_rsync(self,
3446
3758
  source_path_list: List[Path],
3447
3759
  create_dirs: bool = False) -> None:
3448
- """Invokes aws s3 sync to batch upload a list of local paths to R2
3449
-
3450
- AWS Sync by default uses 10 threads to upload files to the bucket. To
3451
- increase parallelism, modify max_concurrent_requests in your aws config
3452
- file (Default path: ~/.aws/config).
3760
+ """Invokes rclone copy to batch upload a list of local paths to cos
3453
3761
 
3454
- Since aws s3 sync does not support batch operations, we construct
3762
+ Since rclone does not support batch operations, we construct
3455
3763
  multiple commands to be run in parallel.
3456
3764
 
3457
3765
  Args:
@@ -3465,49 +3773,58 @@ class R2Store(AbstractStore):
3465
3773
  sub_path = (f'/{self._bucket_sub_path}'
3466
3774
  if self._bucket_sub_path else '')
3467
3775
 
3468
- def get_file_sync_command(base_dir_path, file_names):
3776
+ def get_dir_sync_command(src_dir_path, dest_dir_name) -> str:
3777
+ """returns an rclone command that copies a complete folder
3778
+ from 'src_dir_path' to bucket/'dest_dir_name'.
3779
+
3780
+ `rclone copy` copies files from source path to target.
3781
+ files with identical names at won't be copied over, unless
3782
+ their modification date is more recent.
3783
+ works similarly to `aws sync` (without --delete).
3784
+
3785
+ Args:
3786
+ src_dir_path (str): local source path from which to copy files.
3787
+ dest_dir_name (str): remote target path files are copied to.
3788
+
3789
+ Returns:
3790
+ str: bash command using rclone to sync files. Executed remotely.
3791
+ """
3792
+
3793
+ # .git directory is excluded from the sync
3794
+ # wrapping src_dir_path with "" to support path with spaces
3795
+ src_dir_path = shlex.quote(src_dir_path)
3796
+ sync_command = ('rclone copy --exclude ".git/*" '
3797
+ f'{src_dir_path} '
3798
+ f'{self.rclone_profile_name}:{self.name}{sub_path}'
3799
+ f'/{dest_dir_name}')
3800
+ return sync_command
3801
+
3802
+ def get_file_sync_command(base_dir_path, file_names) -> str:
3803
+ """returns an rclone command that copies files: 'file_names'
3804
+ from base directory: `base_dir_path` to bucket.
3805
+
3806
+ `rclone copy` copies files from source path to target.
3807
+ files with identical names at won't be copied over, unless
3808
+ their modification date is more recent.
3809
+ works similarly to `aws sync` (without --delete).
3810
+
3811
+ Args:
3812
+ base_dir_path (str): local path from which to copy files.
3813
+ file_names (List): specific file names to copy.
3814
+
3815
+ Returns:
3816
+ str: bash command using rclone to sync files
3817
+ """
3818
+
3819
+ # wrapping file_name with "" to support spaces
3469
3820
  includes = ' '.join([
3470
3821
  f'--include {shlex.quote(file_name)}'
3471
3822
  for file_name in file_names
3472
3823
  ])
3473
- endpoint_url = cloudflare.create_endpoint()
3474
3824
  base_dir_path = shlex.quote(base_dir_path)
3475
- sync_command = (
3476
- 'AWS_SHARED_CREDENTIALS_FILE='
3477
- f'{cloudflare.R2_CREDENTIALS_PATH} '
3478
- 'aws s3 sync --no-follow-symlinks --exclude="*" '
3479
- f'{includes} {base_dir_path} '
3480
- f's3://{self.name}{sub_path} '
3481
- f'--endpoint {endpoint_url} '
3482
- # R2 does not support CRC64-NVME
3483
- # which is the default for aws s3 sync
3484
- # https://community.cloudflare.com/t/an-error-occurred-internalerror-when-calling-the-putobject-operation/764905/13
3485
- f'--checksum-algorithm CRC32 '
3486
- f'--profile={cloudflare.R2_PROFILE_NAME}')
3487
- return sync_command
3488
-
3489
- def get_dir_sync_command(src_dir_path, dest_dir_name):
3490
- # we exclude .git directory from the sync
3491
- excluded_list = storage_utils.get_excluded_files(src_dir_path)
3492
- excluded_list.append('.git/*')
3493
- excludes = ' '.join([
3494
- f'--exclude {shlex.quote(file_name)}'
3495
- for file_name in excluded_list
3496
- ])
3497
- endpoint_url = cloudflare.create_endpoint()
3498
- src_dir_path = shlex.quote(src_dir_path)
3499
- sync_command = (
3500
- 'AWS_SHARED_CREDENTIALS_FILE='
3501
- f'{cloudflare.R2_CREDENTIALS_PATH} '
3502
- f'aws s3 sync --no-follow-symlinks {excludes} '
3503
- f'{src_dir_path} '
3504
- f's3://{self.name}{sub_path}/{dest_dir_name} '
3505
- f'--endpoint {endpoint_url} '
3506
- # R2 does not support CRC64-NVME
3507
- # which is the default for aws s3 sync
3508
- # https://community.cloudflare.com/t/an-error-occurred-internalerror-when-calling-the-putobject-operation/764905/13
3509
- f'--checksum-algorithm CRC32 '
3510
- f'--profile={cloudflare.R2_PROFILE_NAME}')
3825
+ sync_command = ('rclone copy '
3826
+ f'{includes} {base_dir_path} '
3827
+ f'{self.rclone_profile_name}:{self.name}{sub_path}')
3511
3828
  return sync_command
3512
3829
 
3513
3830
  # Generate message for upload
@@ -3518,7 +3835,8 @@ class R2Store(AbstractStore):
3518
3835
 
3519
3836
  log_path = sky_logging.generate_tmp_logging_file_path(
3520
3837
  _STORAGE_LOG_FILE_NAME)
3521
- sync_path = f'{source_message} -> r2://{self.name}{sub_path}/'
3838
+ sync_path = (
3839
+ f'{source_message} -> cos://{self.region}/{self.name}{sub_path}/')
3522
3840
  with rich_utils.safe_status(
3523
3841
  ux_utils.spinner_message(f'Syncing {sync_path}',
3524
3842
  log_path=log_path)):
@@ -3535,1236 +3853,306 @@ class R2Store(AbstractStore):
3535
3853
  ux_utils.finishing_message(f'Storage synced: {sync_path}',
3536
3854
  log_path))
3537
3855
 
3538
- def _transfer_to_r2(self) -> None:
3539
- assert isinstance(self.source, str), self.source
3540
- if self.source.startswith('gs://'):
3541
- data_transfer.gcs_to_r2(self.name, self.name)
3542
- elif self.source.startswith('s3://'):
3543
- data_transfer.s3_to_r2(self.name, self.name)
3544
- elif self.source.startswith('nebius://'):
3545
- data_transfer.s3_to_r2(self.name, self.name)
3546
-
3547
3856
  def _get_bucket(self) -> Tuple[StorageHandle, bool]:
3548
- """Obtains the R2 bucket.
3857
+ """returns IBM COS bucket object if exists, otherwise creates it.
3549
3858
 
3550
- If the bucket exists, this method will return the bucket.
3551
- If the bucket does not exist, there are three cases:
3552
- 1) Raise an error if the bucket source starts with s3://
3553
- 2) Return None if bucket has been externally deleted and
3554
- sync_on_reconstruction is False
3555
- 3) Create and return a new bucket otherwise
3859
+ Returns:
3860
+ StorageHandle(str): bucket name
3861
+ bool: indicates whether a new bucket was created.
3556
3862
 
3557
3863
  Raises:
3558
3864
  StorageSpecError: If externally created bucket is attempted to be
3559
3865
  mounted without specifying storage source.
3560
- StorageBucketCreateError: If creating the bucket fails
3866
+ StorageBucketCreateError: If bucket creation fails.
3561
3867
  StorageBucketGetError: If fetching a bucket fails
3562
3868
  StorageExternalDeletionError: If externally deleted storage is
3563
3869
  attempted to be fetched while reconstructing the storage for
3564
3870
  'sky storage delete' or 'sky start'
3565
3871
  """
3566
- r2 = cloudflare.resource('s3')
3567
- bucket = r2.Bucket(self.name)
3568
- endpoint_url = cloudflare.create_endpoint()
3569
- try:
3570
- # Try Public bucket case.
3571
- # This line does not error out if the bucket is an external public
3572
- # bucket or if it is a user's bucket that is publicly
3573
- # accessible.
3574
- self.client.head_bucket(Bucket=self.name)
3575
- self._validate_existing_bucket()
3576
- return bucket, False
3577
- except aws.botocore_exceptions().ClientError as e:
3578
- error_code = e.response['Error']['Code']
3579
- # AccessDenied error for buckets that are private and not owned by
3580
- # user.
3581
- if error_code == '403':
3582
- command = ('AWS_SHARED_CREDENTIALS_FILE='
3583
- f'{cloudflare.R2_CREDENTIALS_PATH} '
3584
- f'aws s3 ls s3://{self.name} '
3585
- f'--endpoint {endpoint_url} '
3586
- f'--profile={cloudflare.R2_PROFILE_NAME}')
3587
- with ux_utils.print_exception_no_traceback():
3588
- raise exceptions.StorageBucketGetError(
3589
- _BUCKET_FAIL_TO_CONNECT_MESSAGE.format(name=self.name) +
3590
- f' To debug, consider running `{command}`.') from e
3591
3872
 
3592
- if isinstance(self.source, str) and self.source.startswith('r2://'):
3873
+ bucket_profile_name = (data_utils.Rclone.RcloneStores.IBM.value +
3874
+ self.name)
3875
+ try:
3876
+ bucket_region = data_utils.get_ibm_cos_bucket_region(self.name)
3877
+ except exceptions.StorageBucketGetError as e:
3593
3878
  with ux_utils.print_exception_no_traceback():
3879
+ command = f'rclone lsd {bucket_profile_name}: '
3594
3880
  raise exceptions.StorageBucketGetError(
3595
- 'Attempted to use a non-existent bucket as a source: '
3596
- f'{self.source}. Consider using '
3597
- '`AWS_SHARED_CREDENTIALS_FILE='
3598
- f'{cloudflare.R2_CREDENTIALS_PATH} aws s3 ls '
3599
- f's3://{self.name} '
3600
- f'--endpoint {endpoint_url} '
3601
- f'--profile={cloudflare.R2_PROFILE_NAME}\' '
3602
- 'to debug.')
3603
-
3604
- # If bucket cannot be found in both private and public settings,
3605
- # the bucket is to be created by Sky. However, creation is skipped if
3606
- # Store object is being reconstructed for deletion or re-mount with
3607
- # sky start, and error is raised instead.
3608
- if self.sync_on_reconstruction:
3609
- bucket = self._create_r2_bucket(self.name)
3610
- return bucket, True
3611
- else:
3612
- # Raised when Storage object is reconstructed for sky storage
3613
- # delete or to re-mount Storages with sky start but the storage
3614
- # is already removed externally.
3615
- raise exceptions.StorageExternalDeletionError(
3616
- 'Attempted to fetch a non-existent bucket: '
3617
- f'{self.name}')
3618
-
3619
- def _download_file(self, remote_path: str, local_path: str) -> None:
3620
- """Downloads file from remote to local on r2 bucket
3621
- using the boto3 API
3881
+ _BUCKET_FAIL_TO_CONNECT_MESSAGE.format(name=self.name) +
3882
+ f' To debug, consider running `{command}`.') from e
3622
3883
 
3623
- Args:
3624
- remote_path: str; Remote path on R2 bucket
3625
- local_path: str; Local path on user's device
3626
- """
3627
- self.bucket.download_file(remote_path, local_path)
3628
-
3629
- def mount_command(self, mount_path: str) -> str:
3630
- """Returns the command to mount the bucket to the mount_path.
3631
-
3632
- Uses goofys to mount the bucket.
3633
-
3634
- Args:
3635
- mount_path: str; Path to mount the bucket to.
3636
- """
3637
- install_cmd = mounting_utils.get_s3_mount_install_cmd()
3638
- endpoint_url = cloudflare.create_endpoint()
3639
- r2_credential_path = cloudflare.R2_CREDENTIALS_PATH
3640
- r2_profile_name = cloudflare.R2_PROFILE_NAME
3641
- mount_cmd = mounting_utils.get_r2_mount_cmd(
3642
- r2_credential_path, r2_profile_name, endpoint_url, self.bucket.name,
3643
- mount_path, self._bucket_sub_path)
3644
- return mounting_utils.get_mounting_command(mount_path, install_cmd,
3645
- mount_cmd)
3646
-
3647
- def mount_cached_command(self, mount_path: str) -> str:
3648
- install_cmd = mounting_utils.get_rclone_install_cmd()
3649
- rclone_profile_name = (
3650
- data_utils.Rclone.RcloneStores.R2.get_profile_name(self.name))
3651
- rclone_config = data_utils.Rclone.RcloneStores.R2.get_config(
3652
- rclone_profile_name=rclone_profile_name)
3653
- mount_cached_cmd = mounting_utils.get_mount_cached_cmd(
3654
- rclone_config, rclone_profile_name, self.bucket.name, mount_path)
3655
- return mounting_utils.get_mounting_command(mount_path, install_cmd,
3656
- mount_cached_cmd)
3657
-
3658
- def _create_r2_bucket(self,
3659
- bucket_name: str,
3660
- region='auto') -> StorageHandle:
3661
- """Creates R2 bucket with specific name in specific region
3662
-
3663
- Args:
3664
- bucket_name: str; Name of bucket
3665
- region: str; Region name, r2 automatically sets region
3666
- Raises:
3667
- StorageBucketCreateError: If bucket creation fails.
3668
- """
3669
- r2_client = self.client
3670
- try:
3671
- if region is None:
3672
- r2_client.create_bucket(Bucket=bucket_name)
3673
- else:
3674
- location = {'LocationConstraint': region}
3675
- r2_client.create_bucket(Bucket=bucket_name,
3676
- CreateBucketConfiguration=location)
3677
- logger.info(f' {colorama.Style.DIM}Created R2 bucket '
3678
- f'{bucket_name!r} in {region}'
3679
- f'{colorama.Style.RESET_ALL}')
3680
- except aws.botocore_exceptions().ClientError as e:
3681
- with ux_utils.print_exception_no_traceback():
3682
- raise exceptions.StorageBucketCreateError(
3683
- f'Attempted to create a bucket '
3684
- f'{self.name} but failed.') from e
3685
- return cloudflare.resource('s3').Bucket(bucket_name)
3686
-
3687
- def _execute_r2_remove_command(self, command: str, bucket_name: str,
3688
- hint_operating: str,
3689
- hint_failed: str) -> bool:
3690
- try:
3691
- with rich_utils.safe_status(
3692
- ux_utils.spinner_message(hint_operating)):
3693
- subprocess.check_output(command.split(' '),
3694
- stderr=subprocess.STDOUT,
3695
- shell=True)
3696
- except subprocess.CalledProcessError as e:
3697
- if 'NoSuchBucket' in e.output.decode('utf-8'):
3698
- logger.debug(
3699
- _BUCKET_EXTERNALLY_DELETED_DEBUG_MESSAGE.format(
3700
- bucket_name=bucket_name))
3701
- return False
3702
- else:
3703
- with ux_utils.print_exception_no_traceback():
3704
- raise exceptions.StorageBucketDeleteError(
3705
- f'{hint_failed}'
3706
- f'Detailed error: {e.output}')
3707
- return True
3708
-
3709
- def _delete_r2_bucket_sub_path(self, bucket_name: str,
3710
- sub_path: str) -> bool:
3711
- """Deletes the sub path from the bucket."""
3712
- endpoint_url = cloudflare.create_endpoint()
3713
- remove_command = (
3714
- f'AWS_SHARED_CREDENTIALS_FILE={cloudflare.R2_CREDENTIALS_PATH} '
3715
- f'aws s3 rm s3://{bucket_name}/{sub_path}/ --recursive '
3716
- f'--endpoint {endpoint_url} '
3717
- f'--profile={cloudflare.R2_PROFILE_NAME}')
3718
- return self._execute_r2_remove_command(
3719
- remove_command, bucket_name,
3720
- f'Removing objects from R2 bucket {bucket_name}/{sub_path}',
3721
- f'Failed to remove objects from R2 bucket {bucket_name}/{sub_path}.'
3722
- )
3723
-
3724
- def _delete_r2_bucket(self, bucket_name: str) -> bool:
3725
- """Deletes R2 bucket, including all objects in bucket
3726
-
3727
- Args:
3728
- bucket_name: str; Name of bucket
3729
-
3730
- Returns:
3731
- bool; True if bucket was deleted, False if it was deleted externally.
3732
-
3733
- Raises:
3734
- StorageBucketDeleteError: If deleting the bucket fails.
3735
- """
3736
- # Deleting objects is very slow programatically
3737
- # (i.e. bucket.objects.all().delete() is slow).
3738
- # In addition, standard delete operations (i.e. via `aws s3 rm`)
3739
- # are slow, since AWS puts deletion markers.
3740
- # https://stackoverflow.com/questions/49239351/why-is-it-so-much-slower-to-delete-objects-in-aws-s3-than-it-is-to-create-them
3741
- # The fastest way to delete is to run `aws s3 rb --force`,
3742
- # which removes the bucket by force.
3743
- endpoint_url = cloudflare.create_endpoint()
3744
- remove_command = (
3745
- f'AWS_SHARED_CREDENTIALS_FILE={cloudflare.R2_CREDENTIALS_PATH} '
3746
- f'aws s3 rb s3://{bucket_name} --force '
3747
- f'--endpoint {endpoint_url} '
3748
- f'--profile={cloudflare.R2_PROFILE_NAME}')
3749
-
3750
- success = self._execute_r2_remove_command(
3751
- remove_command, bucket_name, f'Deleting R2 bucket {bucket_name}',
3752
- f'Failed to delete R2 bucket {bucket_name}.')
3753
- if not success:
3754
- return False
3755
-
3756
- # Wait until bucket deletion propagates on AWS servers
3757
- while data_utils.verify_r2_bucket(bucket_name):
3758
- time.sleep(0.1)
3759
- return True
3760
-
3761
-
3762
- class IBMCosStore(AbstractStore):
3763
- """IBMCosStore inherits from Storage Object and represents the backend
3764
- for COS buckets.
3765
- """
3766
- _ACCESS_DENIED_MESSAGE = 'Access Denied'
3767
-
3768
- def __init__(self,
3769
- name: str,
3770
- source: str,
3771
- region: Optional[str] = 'us-east',
3772
- is_sky_managed: Optional[bool] = None,
3773
- sync_on_reconstruction: bool = True,
3774
- _bucket_sub_path: Optional[str] = None):
3775
- self.client: 'storage.Client'
3776
- self.bucket: 'StorageHandle'
3777
- self.rclone_profile_name = (
3778
- data_utils.Rclone.RcloneStores.IBM.get_profile_name(self.name))
3779
- super().__init__(name, source, region, is_sky_managed,
3780
- sync_on_reconstruction, _bucket_sub_path)
3781
-
3782
- def _validate(self):
3783
- if self.source is not None and isinstance(self.source, str):
3784
- if self.source.startswith('s3://'):
3785
- assert self.name == data_utils.split_s3_path(self.source)[0], (
3786
- 'S3 Bucket is specified as path, the name should be the'
3787
- ' same as S3 bucket.')
3788
- assert data_utils.verify_s3_bucket(self.name), (
3789
- f'Source specified as {self.source}, a S3 bucket. ',
3790
- 'S3 Bucket should exist.')
3791
- elif self.source.startswith('gs://'):
3792
- assert self.name == data_utils.split_gcs_path(self.source)[0], (
3793
- 'GCS Bucket is specified as path, the name should be '
3794
- 'the same as GCS bucket.')
3795
- assert data_utils.verify_gcs_bucket(self.name), (
3796
- f'Source specified as {self.source}, a GCS bucket. ',
3797
- 'GCS Bucket should exist.')
3798
- elif data_utils.is_az_container_endpoint(self.source):
3799
- storage_account_name, container_name, _ = (
3800
- data_utils.split_az_path(self.source))
3801
- assert self.name == container_name, (
3802
- 'Azure bucket is specified as path, the name should be '
3803
- 'the same as Azure bucket.')
3804
- assert data_utils.verify_az_bucket(
3805
- storage_account_name, self.name), (
3806
- f'Source specified as {self.source}, an Azure bucket. '
3807
- 'Azure bucket should exist.')
3808
- elif self.source.startswith('r2://'):
3809
- assert self.name == data_utils.split_r2_path(self.source)[0], (
3810
- 'R2 Bucket is specified as path, the name should be '
3811
- 'the same as R2 bucket.')
3812
- assert data_utils.verify_r2_bucket(self.name), (
3813
- f'Source specified as {self.source}, a R2 bucket. ',
3814
- 'R2 Bucket should exist.')
3815
- elif self.source.startswith('nebius://'):
3816
- assert self.name == data_utils.split_nebius_path(
3817
- self.source)[0], (
3818
- 'Nebius Object Storage is specified as path, the name '
3819
- 'should be the same as Nebius Object Storage bucket.')
3820
- assert data_utils.verify_nebius_bucket(self.name), (
3821
- f'Source specified as {self.source}, a Nebius Object '
3822
- f'Storage bucket. Nebius Object Storage Bucket should '
3823
- f'exist.')
3824
- elif self.source.startswith('cos://'):
3825
- assert self.name == data_utils.split_cos_path(self.source)[0], (
3826
- 'COS Bucket is specified as path, the name should be '
3827
- 'the same as COS bucket.')
3828
- # Validate name
3829
- self.name = IBMCosStore.validate_name(self.name)
3830
-
3831
- @classmethod
3832
- def validate_name(cls, name: str) -> str:
3833
- """Validates the name of a COS bucket.
3834
-
3835
- Rules source: https://ibm.github.io/ibm-cos-sdk-java/com/ibm/cloud/objectstorage/services/s3/model/Bucket.html # pylint: disable=line-too-long
3836
- """
3837
-
3838
- def _raise_no_traceback_name_error(err_str):
3839
- with ux_utils.print_exception_no_traceback():
3840
- raise exceptions.StorageNameError(err_str)
3841
-
3842
- if name is not None and isinstance(name, str):
3843
- if not 3 <= len(name) <= 63:
3844
- _raise_no_traceback_name_error(
3845
- f'Invalid store name: {name} must be between 3 (min) '
3846
- 'and 63 (max) characters long.')
3847
-
3848
- # Check for valid characters and start/end with a letter or number
3849
- pattern = r'^[a-z0-9][-a-z0-9.]*[a-z0-9]$'
3850
- if not re.match(pattern, name):
3851
- _raise_no_traceback_name_error(
3852
- f'Invalid store name: {name} can consist only of '
3853
- 'lowercase letters, numbers, dots (.), and dashes (-). '
3854
- 'It must begin and end with a letter or number.')
3855
-
3856
- # Check for two adjacent periods or dashes
3857
- if any(substring in name for substring in ['..', '--']):
3858
- _raise_no_traceback_name_error(
3859
- f'Invalid store name: {name} must not contain '
3860
- 'two adjacent periods/dashes')
3861
-
3862
- # Check for IP address format
3863
- ip_pattern = r'^(?:\d{1,3}\.){3}\d{1,3}$'
3864
- if re.match(ip_pattern, name):
3865
- _raise_no_traceback_name_error(
3866
- f'Invalid store name: {name} must not be formatted as '
3867
- 'an IP address (for example, 192.168.5.4).')
3868
-
3869
- if any(substring in name for substring in ['.-', '-.']):
3870
- _raise_no_traceback_name_error(
3871
- f'Invalid store name: {name} must '
3872
- 'not allow substrings: ".-", "-." .')
3873
- else:
3874
- _raise_no_traceback_name_error('Store name must be specified.')
3875
- return name
3876
-
3877
- def initialize(self):
3878
- """Initializes the cos store object on the cloud.
3879
-
3880
- Initialization involves fetching bucket if exists, or creating it if
3881
- it does not.
3882
-
3883
- Raises:
3884
- StorageBucketCreateError: If bucket creation fails
3885
- StorageBucketGetError: If fetching existing bucket fails
3886
- StorageInitError: If general initialization fails.
3887
- """
3888
- self.client = ibm.get_cos_client(self.region)
3889
- self.s3_resource = ibm.get_cos_resource(self.region)
3890
- self.bucket, is_new_bucket = self._get_bucket()
3891
- if self.is_sky_managed is None:
3892
- # If is_sky_managed is not specified, then this is a new storage
3893
- # object (i.e., did not exist in global_user_state) and we should
3894
- # set the is_sky_managed property.
3895
- # If is_sky_managed is specified, then we take no action.
3896
- self.is_sky_managed = is_new_bucket
3897
-
3898
- def upload(self):
3899
- """Uploads files from local machine to bucket.
3900
-
3901
- Upload must be called by the Storage handler - it is not called on
3902
- Store initialization.
3903
-
3904
- Raises:
3905
- StorageUploadError: if upload fails.
3906
- """
3907
- try:
3908
- if isinstance(self.source, list):
3909
- self.batch_ibm_rsync(self.source, create_dirs=True)
3910
- elif self.source is not None:
3911
- if self.source.startswith('cos://'):
3912
- # cos bucket used as a dest, can't be used as source.
3913
- pass
3914
- elif self.source.startswith('s3://'):
3915
- raise Exception('IBM COS currently not supporting'
3916
- 'data transfers between COS and S3')
3917
- elif self.source.startswith('nebius://'):
3918
- raise Exception('IBM COS currently not supporting'
3919
- 'data transfers between COS and Nebius')
3920
- elif self.source.startswith('gs://'):
3921
- raise Exception('IBM COS currently not supporting'
3922
- 'data transfers between COS and GS')
3923
- elif self.source.startswith('r2://'):
3924
- raise Exception('IBM COS currently not supporting'
3925
- 'data transfers between COS and r2')
3926
- else:
3927
- self.batch_ibm_rsync([self.source])
3928
-
3929
- except Exception as e:
3930
- raise exceptions.StorageUploadError(
3931
- f'Upload failed for store {self.name}') from e
3932
-
3933
- def delete(self) -> None:
3934
- if self._bucket_sub_path is not None and not self.is_sky_managed:
3935
- return self._delete_sub_path()
3936
-
3937
- self._delete_cos_bucket()
3938
- logger.info(f'{colorama.Fore.GREEN}Deleted COS bucket {self.name}.'
3939
- f'{colorama.Style.RESET_ALL}')
3940
-
3941
- def _delete_sub_path(self) -> None:
3942
- assert self._bucket_sub_path is not None, 'bucket_sub_path is not set'
3943
- bucket = self.s3_resource.Bucket(self.name)
3944
- try:
3945
- self._delete_cos_bucket_objects(bucket, self._bucket_sub_path + '/')
3946
- except ibm.ibm_botocore.exceptions.ClientError as e:
3947
- if e.__class__.__name__ == 'NoSuchBucket':
3948
- logger.debug('bucket already removed')
3949
-
3950
- def get_handle(self) -> StorageHandle:
3951
- return self.s3_resource.Bucket(self.name)
3952
-
3953
- def batch_ibm_rsync(self,
3954
- source_path_list: List[Path],
3955
- create_dirs: bool = False) -> None:
3956
- """Invokes rclone copy to batch upload a list of local paths to cos
3957
-
3958
- Since rclone does not support batch operations, we construct
3959
- multiple commands to be run in parallel.
3960
-
3961
- Args:
3962
- source_path_list: List of paths to local files or directories
3963
- create_dirs: If the local_path is a directory and this is set to
3964
- False, the contents of the directory are directly uploaded to
3965
- root of the bucket. If the local_path is a directory and this is
3966
- set to True, the directory is created in the bucket root and
3967
- contents are uploaded to it.
3968
- """
3969
- sub_path = (f'/{self._bucket_sub_path}'
3970
- if self._bucket_sub_path else '')
3971
-
3972
- def get_dir_sync_command(src_dir_path, dest_dir_name) -> str:
3973
- """returns an rclone command that copies a complete folder
3974
- from 'src_dir_path' to bucket/'dest_dir_name'.
3975
-
3976
- `rclone copy` copies files from source path to target.
3977
- files with identical names at won't be copied over, unless
3978
- their modification date is more recent.
3979
- works similarly to `aws sync` (without --delete).
3980
-
3981
- Args:
3982
- src_dir_path (str): local source path from which to copy files.
3983
- dest_dir_name (str): remote target path files are copied to.
3984
-
3985
- Returns:
3986
- str: bash command using rclone to sync files. Executed remotely.
3987
- """
3988
-
3989
- # .git directory is excluded from the sync
3990
- # wrapping src_dir_path with "" to support path with spaces
3991
- src_dir_path = shlex.quote(src_dir_path)
3992
- sync_command = ('rclone copy --exclude ".git/*" '
3993
- f'{src_dir_path} '
3994
- f'{self.rclone_profile_name}:{self.name}{sub_path}'
3995
- f'/{dest_dir_name}')
3996
- return sync_command
3997
-
3998
- def get_file_sync_command(base_dir_path, file_names) -> str:
3999
- """returns an rclone command that copies files: 'file_names'
4000
- from base directory: `base_dir_path` to bucket.
4001
-
4002
- `rclone copy` copies files from source path to target.
4003
- files with identical names at won't be copied over, unless
4004
- their modification date is more recent.
4005
- works similarly to `aws sync` (without --delete).
4006
-
4007
- Args:
4008
- base_dir_path (str): local path from which to copy files.
4009
- file_names (List): specific file names to copy.
4010
-
4011
- Returns:
4012
- str: bash command using rclone to sync files
4013
- """
4014
-
4015
- # wrapping file_name with "" to support spaces
4016
- includes = ' '.join([
4017
- f'--include {shlex.quote(file_name)}'
4018
- for file_name in file_names
4019
- ])
4020
- base_dir_path = shlex.quote(base_dir_path)
4021
- sync_command = ('rclone copy '
4022
- f'{includes} {base_dir_path} '
4023
- f'{self.rclone_profile_name}:{self.name}{sub_path}')
4024
- return sync_command
4025
-
4026
- # Generate message for upload
4027
- if len(source_path_list) > 1:
4028
- source_message = f'{len(source_path_list)} paths'
4029
- else:
4030
- source_message = source_path_list[0]
4031
-
4032
- log_path = sky_logging.generate_tmp_logging_file_path(
4033
- _STORAGE_LOG_FILE_NAME)
4034
- sync_path = (
4035
- f'{source_message} -> cos://{self.region}/{self.name}{sub_path}/')
4036
- with rich_utils.safe_status(
4037
- ux_utils.spinner_message(f'Syncing {sync_path}',
4038
- log_path=log_path)):
4039
- data_utils.parallel_upload(
4040
- source_path_list,
4041
- get_file_sync_command,
4042
- get_dir_sync_command,
4043
- log_path,
4044
- self.name,
4045
- self._ACCESS_DENIED_MESSAGE,
4046
- create_dirs=create_dirs,
4047
- max_concurrent_uploads=_MAX_CONCURRENT_UPLOADS)
4048
- logger.info(
4049
- ux_utils.finishing_message(f'Storage synced: {sync_path}',
4050
- log_path))
4051
-
4052
- def _get_bucket(self) -> Tuple[StorageHandle, bool]:
4053
- """returns IBM COS bucket object if exists, otherwise creates it.
4054
-
4055
- Returns:
4056
- StorageHandle(str): bucket name
4057
- bool: indicates whether a new bucket was created.
4058
-
4059
- Raises:
4060
- StorageSpecError: If externally created bucket is attempted to be
4061
- mounted without specifying storage source.
4062
- StorageBucketCreateError: If bucket creation fails.
4063
- StorageBucketGetError: If fetching a bucket fails
4064
- StorageExternalDeletionError: If externally deleted storage is
4065
- attempted to be fetched while reconstructing the storage for
4066
- 'sky storage delete' or 'sky start'
4067
- """
4068
-
4069
- bucket_profile_name = (data_utils.Rclone.RcloneStores.IBM.value +
4070
- self.name)
4071
- try:
4072
- bucket_region = data_utils.get_ibm_cos_bucket_region(self.name)
4073
- except exceptions.StorageBucketGetError as e:
4074
- with ux_utils.print_exception_no_traceback():
4075
- command = f'rclone lsd {bucket_profile_name}: '
4076
- raise exceptions.StorageBucketGetError(
4077
- _BUCKET_FAIL_TO_CONNECT_MESSAGE.format(name=self.name) +
4078
- f' To debug, consider running `{command}`.') from e
4079
-
4080
- try:
4081
- uri_region = data_utils.split_cos_path(
4082
- self.source)[2] # type: ignore
4083
- except ValueError:
4084
- # source isn't a cos uri
4085
- uri_region = ''
4086
-
4087
- # bucket's region doesn't match specified region in URI
4088
- if bucket_region and uri_region and uri_region != bucket_region\
4089
- and self.sync_on_reconstruction:
4090
- with ux_utils.print_exception_no_traceback():
4091
- raise exceptions.StorageBucketGetError(
4092
- f'Bucket {self.name} exists in '
4093
- f'region {bucket_region}, '
4094
- f'but URI specified region {uri_region}.')
4095
-
4096
- if not bucket_region and uri_region:
4097
- # bucket doesn't exist but source is a bucket URI
4098
- with ux_utils.print_exception_no_traceback():
4099
- raise exceptions.StorageBucketGetError(
4100
- 'Attempted to use a non-existent bucket as a source: '
4101
- f'{self.name} by providing URI. Consider using '
4102
- '`rclone lsd <remote>` on relevant remotes returned '
4103
- 'via `rclone listremotes` to debug.')
4104
-
4105
- data_utils.Rclone.store_rclone_config(
4106
- self.name,
4107
- data_utils.Rclone.RcloneStores.IBM,
4108
- self.region, # type: ignore
4109
- )
4110
-
4111
- if not bucket_region and self.sync_on_reconstruction:
4112
- # bucket doesn't exist
4113
- return self._create_cos_bucket(self.name, self.region), True
4114
- elif not bucket_region and not self.sync_on_reconstruction:
4115
- # Raised when Storage object is reconstructed for sky storage
4116
- # delete or to re-mount Storages with sky start but the storage
4117
- # is already removed externally.
4118
- raise exceptions.StorageExternalDeletionError(
4119
- 'Attempted to fetch a non-existent bucket: '
4120
- f'{self.name}')
4121
- else:
4122
- # bucket exists
4123
- bucket = self.s3_resource.Bucket(self.name)
4124
- self._validate_existing_bucket()
4125
- return bucket, False
4126
-
4127
- def _download_file(self, remote_path: str, local_path: str) -> None:
4128
- """Downloads file from remote to local on s3 bucket
4129
- using the boto3 API
4130
-
4131
- Args:
4132
- remote_path: str; Remote path on S3 bucket
4133
- local_path: str; Local path on user's device
4134
- """
4135
- self.client.download_file(self.name, local_path, remote_path)
4136
-
4137
- def mount_command(self, mount_path: str) -> str:
4138
- """Returns the command to mount the bucket to the mount_path.
4139
-
4140
- Uses rclone to mount the bucket.
4141
- Source: https://github.com/rclone/rclone
4142
-
4143
- Args:
4144
- mount_path: str; Path to mount the bucket to.
4145
- """
4146
- # install rclone if not installed.
4147
- install_cmd = mounting_utils.get_rclone_install_cmd()
4148
- rclone_config = data_utils.Rclone.RcloneStores.IBM.get_config(
4149
- rclone_profile_name=self.rclone_profile_name,
4150
- region=self.region) # type: ignore
4151
- mount_cmd = (
4152
- mounting_utils.get_cos_mount_cmd(
4153
- rclone_config,
4154
- self.rclone_profile_name,
4155
- self.bucket.name,
4156
- mount_path,
4157
- self._bucket_sub_path, # type: ignore
4158
- ))
4159
- return mounting_utils.get_mounting_command(mount_path, install_cmd,
4160
- mount_cmd)
4161
-
4162
- def _create_cos_bucket(self,
4163
- bucket_name: str,
4164
- region='us-east') -> StorageHandle:
4165
- """Creates IBM COS bucket with specific name in specific region
4166
-
4167
- Args:
4168
- bucket_name: str; Name of bucket
4169
- region: str; Region name, e.g. us-east, us-south
4170
- Raises:
4171
- StorageBucketCreateError: If bucket creation fails.
4172
- """
4173
- try:
4174
- self.client.create_bucket(
4175
- Bucket=bucket_name,
4176
- CreateBucketConfiguration={
4177
- 'LocationConstraint': f'{region}-smart'
4178
- })
4179
- logger.info(f' {colorama.Style.DIM}Created IBM COS bucket '
4180
- f'{bucket_name!r} in {region} '
4181
- 'with storage class smart tier'
4182
- f'{colorama.Style.RESET_ALL}')
4183
- self.bucket = self.s3_resource.Bucket(bucket_name)
4184
-
4185
- except ibm.ibm_botocore.exceptions.ClientError as e: # type: ignore[union-attr] # pylint: disable=line-too-long
4186
- with ux_utils.print_exception_no_traceback():
4187
- raise exceptions.StorageBucketCreateError(
4188
- f'Failed to create bucket: '
4189
- f'{bucket_name}') from e
4190
-
4191
- s3_bucket_exists_waiter = self.client.get_waiter('bucket_exists')
4192
- s3_bucket_exists_waiter.wait(Bucket=bucket_name)
4193
-
4194
- return self.bucket
4195
-
4196
- def _delete_cos_bucket_objects(self,
4197
- bucket: Any,
4198
- prefix: Optional[str] = None) -> None:
4199
- bucket_versioning = self.s3_resource.BucketVersioning(bucket.name)
4200
- if bucket_versioning.status == 'Enabled':
4201
- if prefix is not None:
4202
- res = list(
4203
- bucket.object_versions.filter(Prefix=prefix).delete())
4204
- else:
4205
- res = list(bucket.object_versions.delete())
4206
- else:
4207
- if prefix is not None:
4208
- res = list(bucket.objects.filter(Prefix=prefix).delete())
4209
- else:
4210
- res = list(bucket.objects.delete())
4211
- logger.debug(f'Deleted bucket\'s content:\n{res}, prefix: {prefix}')
4212
-
4213
- def _delete_cos_bucket(self) -> None:
4214
- bucket = self.s3_resource.Bucket(self.name)
4215
- try:
4216
- self._delete_cos_bucket_objects(bucket)
4217
- bucket.delete()
4218
- bucket.wait_until_not_exists()
4219
- except ibm.ibm_botocore.exceptions.ClientError as e:
4220
- if e.__class__.__name__ == 'NoSuchBucket':
4221
- logger.debug('bucket already removed')
4222
- data_utils.Rclone.delete_rclone_bucket_profile(
4223
- self.name, data_utils.Rclone.RcloneStores.IBM)
4224
-
4225
-
4226
- class OciStore(AbstractStore):
4227
- """OciStore inherits from Storage Object and represents the backend
4228
- for OCI buckets.
4229
- """
4230
-
4231
- _ACCESS_DENIED_MESSAGE = 'AccessDeniedException'
4232
-
4233
- def __init__(self,
4234
- name: str,
4235
- source: Optional[SourceType],
4236
- region: Optional[str] = None,
4237
- is_sky_managed: Optional[bool] = None,
4238
- sync_on_reconstruction: Optional[bool] = True,
4239
- _bucket_sub_path: Optional[str] = None):
4240
- self.client: Any
4241
- self.bucket: StorageHandle
4242
- self.oci_config_file: str
4243
- self.config_profile: str
4244
- self.compartment: str
4245
- self.namespace: str
4246
-
4247
- # Region is from the specified name in <bucket>@<region> format.
4248
- # Another case is name can also be set by the source, for example:
4249
- # /datasets-storage:
4250
- # source: oci://RAGData@us-sanjose-1
4251
- # The name in above mount will be set to RAGData@us-sanjose-1
4252
- region_in_name = None
4253
- if name is not None and '@' in name:
4254
- self._validate_bucket_expr(name)
4255
- name, region_in_name = name.split('@')
4256
-
4257
- # Region is from the specified source in oci://<bucket>@<region> format
4258
- region_in_source = None
4259
- if isinstance(source,
4260
- str) and source.startswith('oci://') and '@' in source:
4261
- self._validate_bucket_expr(source)
4262
- source, region_in_source = source.split('@')
4263
-
4264
- if region_in_name is not None and region_in_source is not None:
4265
- # This should never happen because name and source will never be
4266
- # the remote bucket at the same time.
4267
- assert region_in_name == region_in_source, (
4268
- f'Mismatch region specified. Region in name {region_in_name}, '
4269
- f'but region in source is {region_in_source}')
4270
-
4271
- if region_in_name is not None:
4272
- region = region_in_name
4273
- elif region_in_source is not None:
4274
- region = region_in_source
4275
-
4276
- # Default region set to what specified in oci config.
4277
- if region is None:
4278
- region = oci.get_oci_config()['region']
4279
-
4280
- # So far from now on, the name and source are canonical, means there
4281
- # is no region (@<region> suffix) associated with them anymore.
4282
-
4283
- super().__init__(name, source, region, is_sky_managed,
4284
- sync_on_reconstruction, _bucket_sub_path)
4285
- # TODO(zpoint): add _bucket_sub_path to the sync/mount/delete commands
4286
-
4287
- def _validate_bucket_expr(self, bucket_expr: str):
4288
- pattern = r'^(\w+://)?[A-Za-z0-9-._]+(@\w{2}-\w+-\d{1})$'
4289
- if not re.match(pattern, bucket_expr):
4290
- raise ValueError(
4291
- 'The format for the bucket portion is <bucket>@<region> '
4292
- 'when specify a region with a bucket.')
4293
-
4294
- def _validate(self):
4295
- if self.source is not None and isinstance(self.source, str):
4296
- if self.source.startswith('oci://'):
4297
- assert self.name == data_utils.split_oci_path(self.source)[0], (
4298
- 'OCI Bucket is specified as path, the name should be '
4299
- 'the same as OCI bucket.')
4300
- elif not re.search(r'^\w+://', self.source):
4301
- # Treat it as local path.
4302
- pass
4303
- else:
4304
- raise NotImplementedError(
4305
- f'Moving data from {self.source} to OCI is not supported.')
4306
-
4307
- # Validate name
4308
- self.name = self.validate_name(self.name)
4309
- # Check if the storage is enabled
4310
- if not _is_storage_cloud_enabled(str(clouds.OCI())):
4311
- with ux_utils.print_exception_no_traceback():
4312
- raise exceptions.ResourcesUnavailableError(
4313
- 'Storage \'store: oci\' specified, but ' \
4314
- 'OCI access is disabled. To fix, enable '\
4315
- 'OCI by running `sky check`. '\
4316
- 'More info: https://skypilot.readthedocs.io/en/latest/getting-started/installation.html.' # pylint: disable=line-too-long
4317
- )
4318
-
4319
- @classmethod
4320
- def validate_name(cls, name) -> str:
4321
- """Validates the name of the OCI store.
4322
-
4323
- Source for rules: https://docs.oracle.com/en-us/iaas/Content/Object/Tasks/managingbuckets.htm#Managing_Buckets # pylint: disable=line-too-long
4324
- """
4325
-
4326
- def _raise_no_traceback_name_error(err_str):
4327
- with ux_utils.print_exception_no_traceback():
4328
- raise exceptions.StorageNameError(err_str)
4329
-
4330
- if name is not None and isinstance(name, str):
4331
- # Check for overall length
4332
- if not 1 <= len(name) <= 256:
4333
- _raise_no_traceback_name_error(
4334
- f'Invalid store name: name {name} must contain 1-256 '
4335
- 'characters.')
4336
-
4337
- # Check for valid characters and start/end with a number or letter
4338
- pattern = r'^[A-Za-z0-9-._]+$'
4339
- if not re.match(pattern, name):
4340
- _raise_no_traceback_name_error(
4341
- f'Invalid store name: name {name} can only contain '
4342
- 'upper or lower case letters, numeric characters, hyphens '
4343
- '(-), underscores (_), and dots (.). Spaces are not '
4344
- 'allowed. Names must start and end with a number or '
4345
- 'letter.')
4346
- else:
4347
- _raise_no_traceback_name_error('Store name must be specified.')
4348
- return name
4349
-
4350
- def initialize(self):
4351
- """Initializes the OCI store object on the cloud.
4352
-
4353
- Initialization involves fetching bucket if exists, or creating it if
4354
- it does not.
4355
-
4356
- Raises:
4357
- StorageBucketCreateError: If bucket creation fails
4358
- StorageBucketGetError: If fetching existing bucket fails
4359
- StorageInitError: If general initialization fails.
4360
- """
4361
- # pylint: disable=import-outside-toplevel
4362
- from sky.clouds.utils import oci_utils
4363
- from sky.provision.oci.query_utils import query_helper
4364
-
4365
- self.oci_config_file = oci.get_config_file()
4366
- self.config_profile = oci_utils.oci_config.get_profile()
4367
-
4368
- ## pylint: disable=line-too-long
4369
- # What's compartment? See thttps://docs.oracle.com/en/cloud/foundation/cloud_architecture/governance/compartments.html
4370
- self.compartment = query_helper.find_compartment(self.region)
4371
- self.client = oci.get_object_storage_client(region=self.region,
4372
- profile=self.config_profile)
4373
- self.namespace = self.client.get_namespace(
4374
- compartment_id=oci.get_oci_config()['tenancy']).data
4375
-
4376
- self.bucket, is_new_bucket = self._get_bucket()
4377
- if self.is_sky_managed is None:
4378
- # If is_sky_managed is not specified, then this is a new storage
4379
- # object (i.e., did not exist in global_user_state) and we should
4380
- # set the is_sky_managed property.
4381
- # If is_sky_managed is specified, then we take no action.
4382
- self.is_sky_managed = is_new_bucket
4383
-
4384
- def upload(self):
4385
- """Uploads source to store bucket.
4386
-
4387
- Upload must be called by the Storage handler - it is not called on
4388
- Store initialization.
4389
-
4390
- Raises:
4391
- StorageUploadError: if upload fails.
4392
- """
4393
- try:
4394
- if isinstance(self.source, list):
4395
- self.batch_oci_rsync(self.source, create_dirs=True)
4396
- elif self.source is not None:
4397
- if self.source.startswith('oci://'):
4398
- pass
4399
- else:
4400
- self.batch_oci_rsync([self.source])
4401
- except exceptions.StorageUploadError:
4402
- raise
4403
- except Exception as e:
4404
- raise exceptions.StorageUploadError(
4405
- f'Upload failed for store {self.name}') from e
4406
-
4407
- def delete(self) -> None:
4408
- deleted_by_skypilot = self._delete_oci_bucket(self.name)
4409
- if deleted_by_skypilot:
4410
- msg_str = f'Deleted OCI bucket {self.name}.'
4411
- else:
4412
- msg_str = (f'OCI bucket {self.name} may have been deleted '
4413
- f'externally. Removing from local state.')
4414
- logger.info(f'{colorama.Fore.GREEN}{msg_str}'
4415
- f'{colorama.Style.RESET_ALL}')
4416
-
4417
- def get_handle(self) -> StorageHandle:
4418
- return self.client.get_bucket(namespace_name=self.namespace,
4419
- bucket_name=self.name).data
4420
-
4421
- def batch_oci_rsync(self,
4422
- source_path_list: List[Path],
4423
- create_dirs: bool = False) -> None:
4424
- """Invokes oci sync to batch upload a list of local paths to Bucket
4425
-
4426
- Use OCI bulk operation to batch process the file upload
4427
-
4428
- Args:
4429
- source_path_list: List of paths to local files or directories
4430
- create_dirs: If the local_path is a directory and this is set to
4431
- False, the contents of the directory are directly uploaded to
4432
- root of the bucket. If the local_path is a directory and this is
4433
- set to True, the directory is created in the bucket root and
4434
- contents are uploaded to it.
4435
- """
4436
- sub_path = (f'{self._bucket_sub_path}/'
4437
- if self._bucket_sub_path else '')
4438
-
4439
- @oci.with_oci_env
4440
- def get_file_sync_command(base_dir_path, file_names):
4441
- includes = ' '.join(
4442
- [f'--include "{file_name}"' for file_name in file_names])
4443
- prefix_arg = ''
4444
- if sub_path:
4445
- prefix_arg = f'--object-prefix "{sub_path.strip("/")}"'
4446
- sync_command = (
4447
- 'oci os object bulk-upload --no-follow-symlinks --overwrite '
4448
- f'--bucket-name {self.name} --namespace-name {self.namespace} '
4449
- f'--region {self.region} --src-dir "{base_dir_path}" '
4450
- f'{prefix_arg} '
4451
- f'{includes}')
4452
-
4453
- return sync_command
4454
-
4455
- @oci.with_oci_env
4456
- def get_dir_sync_command(src_dir_path, dest_dir_name):
4457
- if dest_dir_name and not str(dest_dir_name).endswith('/'):
4458
- dest_dir_name = f'{dest_dir_name}/'
4459
-
4460
- excluded_list = storage_utils.get_excluded_files(src_dir_path)
4461
- excluded_list.append('.git/*')
4462
- excludes = ' '.join([
4463
- f'--exclude {shlex.quote(file_name)}'
4464
- for file_name in excluded_list
4465
- ])
4466
-
4467
- # we exclude .git directory from the sync
4468
- sync_command = (
4469
- 'oci os object bulk-upload --no-follow-symlinks --overwrite '
4470
- f'--bucket-name {self.name} --namespace-name {self.namespace} '
4471
- f'--region {self.region} '
4472
- f'--object-prefix "{sub_path}{dest_dir_name}" '
4473
- f'--src-dir "{src_dir_path}" {excludes}')
4474
-
4475
- return sync_command
4476
-
4477
- # Generate message for upload
4478
- if len(source_path_list) > 1:
4479
- source_message = f'{len(source_path_list)} paths'
4480
- else:
4481
- source_message = source_path_list[0]
4482
-
4483
- log_path = sky_logging.generate_tmp_logging_file_path(
4484
- _STORAGE_LOG_FILE_NAME)
4485
- sync_path = f'{source_message} -> oci://{self.name}/{sub_path}'
4486
- with rich_utils.safe_status(
4487
- ux_utils.spinner_message(f'Syncing {sync_path}',
4488
- log_path=log_path)):
4489
- data_utils.parallel_upload(
4490
- source_path_list=source_path_list,
4491
- filesync_command_generator=get_file_sync_command,
4492
- dirsync_command_generator=get_dir_sync_command,
4493
- log_path=log_path,
4494
- bucket_name=self.name,
4495
- access_denied_message=self._ACCESS_DENIED_MESSAGE,
4496
- create_dirs=create_dirs,
4497
- max_concurrent_uploads=1)
4498
-
4499
- logger.info(
4500
- ux_utils.finishing_message(f'Storage synced: {sync_path}',
4501
- log_path))
4502
-
4503
- def _get_bucket(self) -> Tuple[StorageHandle, bool]:
4504
- """Obtains the OCI bucket.
4505
- If the bucket exists, this method will connect to the bucket.
4506
-
4507
- If the bucket does not exist, there are three cases:
4508
- 1) Raise an error if the bucket source starts with oci://
4509
- 2) Return None if bucket has been externally deleted and
4510
- sync_on_reconstruction is False
4511
- 3) Create and return a new bucket otherwise
4512
-
4513
- Return tuple (Bucket, Boolean): The first item is the bucket
4514
- json payload from the OCI API call, the second item indicates
4515
- if this is a new created bucket(True) or an existing bucket(False).
4516
-
4517
- Raises:
4518
- StorageBucketCreateError: If creating the bucket fails
4519
- StorageBucketGetError: If fetching a bucket fails
4520
- """
4521
3884
  try:
4522
- get_bucket_response = self.client.get_bucket(
4523
- namespace_name=self.namespace, bucket_name=self.name)
4524
- bucket = get_bucket_response.data
4525
- return bucket, False
4526
- except oci.service_exception() as e:
4527
- if e.status == 404: # Not Found
4528
- if isinstance(self.source,
4529
- str) and self.source.startswith('oci://'):
4530
- with ux_utils.print_exception_no_traceback():
4531
- raise exceptions.StorageBucketGetError(
4532
- 'Attempted to connect to a non-existent bucket: '
4533
- f'{self.source}') from e
4534
- else:
4535
- # If bucket cannot be found (i.e., does not exist), it is
4536
- # to be created by Sky. However, creation is skipped if
4537
- # Store object is being reconstructed for deletion.
4538
- if self.sync_on_reconstruction:
4539
- bucket = self._create_oci_bucket(self.name)
4540
- return bucket, True
4541
- else:
4542
- return None, False
4543
- elif e.status == 401: # Unauthorized
4544
- # AccessDenied error for buckets that are private and not
4545
- # owned by user.
4546
- command = (
4547
- f'oci os object list --namespace-name {self.namespace} '
4548
- f'--bucket-name {self.name}')
4549
- with ux_utils.print_exception_no_traceback():
4550
- raise exceptions.StorageBucketGetError(
4551
- _BUCKET_FAIL_TO_CONNECT_MESSAGE.format(name=self.name) +
4552
- f' To debug, consider running `{command}`.') from e
4553
- else:
4554
- # Unknown / unexpected error happened. This might happen when
4555
- # Object storage service itself functions not normal (e.g.
4556
- # maintainance event causes internal server error or request
4557
- # timeout, etc).
4558
- with ux_utils.print_exception_no_traceback():
4559
- raise exceptions.StorageBucketGetError(
4560
- f'Failed to connect to OCI bucket {self.name}') from e
3885
+ uri_region = data_utils.split_cos_path(
3886
+ self.source)[2] # type: ignore
3887
+ except ValueError:
3888
+ # source isn't a cos uri
3889
+ uri_region = ''
4561
3890
 
4562
- def mount_command(self, mount_path: str) -> str:
4563
- """Returns the command to mount the bucket to the mount_path.
3891
+ # bucket's region doesn't match specified region in URI
3892
+ if bucket_region and uri_region and uri_region != bucket_region\
3893
+ and self.sync_on_reconstruction:
3894
+ with ux_utils.print_exception_no_traceback():
3895
+ raise exceptions.StorageBucketGetError(
3896
+ f'Bucket {self.name} exists in '
3897
+ f'region {bucket_region}, '
3898
+ f'but URI specified region {uri_region}.')
4564
3899
 
4565
- Uses Rclone to mount the bucket.
3900
+ if not bucket_region and uri_region:
3901
+ # bucket doesn't exist but source is a bucket URI
3902
+ with ux_utils.print_exception_no_traceback():
3903
+ raise exceptions.StorageBucketGetError(
3904
+ 'Attempted to use a non-existent bucket as a source: '
3905
+ f'{self.name} by providing URI. Consider using '
3906
+ '`rclone lsd <remote>` on relevant remotes returned '
3907
+ 'via `rclone listremotes` to debug.')
4566
3908
 
4567
- Args:
4568
- mount_path: str; Path to mount the bucket to.
4569
- """
4570
- install_cmd = mounting_utils.get_rclone_install_cmd()
4571
- mount_cmd = mounting_utils.get_oci_mount_cmd(
4572
- mount_path=mount_path,
4573
- store_name=self.name,
4574
- region=str(self.region),
4575
- namespace=self.namespace,
4576
- compartment=self.bucket.compartment_id,
4577
- config_file=self.oci_config_file,
4578
- config_profile=self.config_profile)
4579
- version_check_cmd = mounting_utils.get_rclone_version_check_cmd()
3909
+ data_utils.Rclone.store_rclone_config(
3910
+ self.name,
3911
+ data_utils.Rclone.RcloneStores.IBM,
3912
+ self.region, # type: ignore
3913
+ )
4580
3914
 
4581
- return mounting_utils.get_mounting_command(mount_path, install_cmd,
4582
- mount_cmd, version_check_cmd)
3915
+ if not bucket_region and self.sync_on_reconstruction:
3916
+ # bucket doesn't exist
3917
+ return self._create_cos_bucket(self.name, self.region), True
3918
+ elif not bucket_region and not self.sync_on_reconstruction:
3919
+ # Raised when Storage object is reconstructed for sky storage
3920
+ # delete or to re-mount Storages with sky start but the storage
3921
+ # is already removed externally.
3922
+ raise exceptions.StorageExternalDeletionError(
3923
+ 'Attempted to fetch a non-existent bucket: '
3924
+ f'{self.name}')
3925
+ else:
3926
+ # bucket exists
3927
+ bucket = self.s3_resource.Bucket(self.name)
3928
+ self._validate_existing_bucket()
3929
+ return bucket, False
4583
3930
 
4584
3931
  def _download_file(self, remote_path: str, local_path: str) -> None:
4585
- """Downloads file from remote to local on OCI bucket
3932
+ """Downloads file from remote to local on s3 bucket
3933
+ using the boto3 API
4586
3934
 
4587
3935
  Args:
4588
- remote_path: str; Remote path on OCI bucket
3936
+ remote_path: str; Remote path on S3 bucket
4589
3937
  local_path: str; Local path on user's device
4590
3938
  """
4591
- if remote_path.startswith(f'/{self.name}'):
4592
- # If the remote path is /bucket_name, we need to
4593
- # remove the leading /
4594
- remote_path = remote_path.lstrip('/')
4595
-
4596
- filename = os.path.basename(remote_path)
4597
- if not local_path.endswith(filename):
4598
- local_path = os.path.join(local_path, filename)
4599
-
4600
- @oci.with_oci_env
4601
- def get_file_download_command(remote_path, local_path):
4602
- download_command = (f'oci os object get --bucket-name {self.name} '
4603
- f'--namespace-name {self.namespace} '
4604
- f'--region {self.region} --name {remote_path} '
4605
- f'--file {local_path}')
3939
+ self.client.download_file(self.name, local_path, remote_path)
4606
3940
 
4607
- return download_command
3941
+ def mount_command(self, mount_path: str) -> str:
3942
+ """Returns the command to mount the bucket to the mount_path.
4608
3943
 
4609
- download_command = get_file_download_command(remote_path, local_path)
3944
+ Uses rclone to mount the bucket.
3945
+ Source: https://github.com/rclone/rclone
4610
3946
 
4611
- try:
4612
- with rich_utils.safe_status(
4613
- f'[bold cyan]Downloading: {remote_path} -> {local_path}[/]'
4614
- ):
4615
- subprocess.check_output(download_command,
4616
- stderr=subprocess.STDOUT,
4617
- shell=True)
4618
- except subprocess.CalledProcessError as e:
4619
- logger.error(f'Download failed: {remote_path} -> {local_path}.\n'
4620
- f'Detail errors: {e.output}')
4621
- with ux_utils.print_exception_no_traceback():
4622
- raise exceptions.StorageBucketDeleteError(
4623
- f'Failed download file {self.name}:{remote_path}.') from e
3947
+ Args:
3948
+ mount_path: str; Path to mount the bucket to.
3949
+ """
3950
+ # install rclone if not installed.
3951
+ install_cmd = mounting_utils.get_rclone_install_cmd()
3952
+ rclone_config = data_utils.Rclone.RcloneStores.IBM.get_config(
3953
+ rclone_profile_name=self.rclone_profile_name,
3954
+ region=self.region) # type: ignore
3955
+ mount_cmd = (
3956
+ mounting_utils.get_cos_mount_cmd(
3957
+ rclone_config,
3958
+ self.rclone_profile_name,
3959
+ self.bucket.name,
3960
+ mount_path,
3961
+ self._bucket_sub_path, # type: ignore
3962
+ ))
3963
+ return mounting_utils.get_mounting_command(mount_path, install_cmd,
3964
+ mount_cmd)
4624
3965
 
4625
- def _create_oci_bucket(self, bucket_name: str) -> StorageHandle:
4626
- """Creates OCI bucket with specific name in specific region
3966
+ def _create_cos_bucket(self,
3967
+ bucket_name: str,
3968
+ region='us-east') -> StorageHandle:
3969
+ """Creates IBM COS bucket with specific name in specific region
4627
3970
 
4628
3971
  Args:
4629
3972
  bucket_name: str; Name of bucket
4630
- region: str; Region name, e.g. us-central1, us-west1
3973
+ region: str; Region name, e.g. us-east, us-south
3974
+ Raises:
3975
+ StorageBucketCreateError: If bucket creation fails.
4631
3976
  """
4632
- logger.debug(f'_create_oci_bucket: {bucket_name}')
4633
3977
  try:
4634
- create_bucket_response = self.client.create_bucket(
4635
- namespace_name=self.namespace,
4636
- create_bucket_details=oci.oci.object_storage.models.
4637
- CreateBucketDetails(
4638
- name=bucket_name,
4639
- compartment_id=self.compartment,
4640
- ))
4641
- bucket = create_bucket_response.data
4642
- return bucket
4643
- except oci.service_exception() as e:
3978
+ self.client.create_bucket(
3979
+ Bucket=bucket_name,
3980
+ CreateBucketConfiguration={
3981
+ 'LocationConstraint': f'{region}-smart'
3982
+ })
3983
+ logger.info(f' {colorama.Style.DIM}Created IBM COS bucket '
3984
+ f'{bucket_name!r} in {region} '
3985
+ 'with storage class smart tier'
3986
+ f'{colorama.Style.RESET_ALL}')
3987
+ self.bucket = self.s3_resource.Bucket(bucket_name)
3988
+
3989
+ except ibm.ibm_botocore.exceptions.ClientError as e: # type: ignore[union-attr] # pylint: disable=line-too-long
4644
3990
  with ux_utils.print_exception_no_traceback():
4645
3991
  raise exceptions.StorageBucketCreateError(
4646
- f'Failed to create OCI bucket: {self.name}') from e
4647
-
4648
- def _delete_oci_bucket(self, bucket_name: str) -> bool:
4649
- """Deletes OCI bucket, including all objects in bucket
4650
-
4651
- Args:
4652
- bucket_name: str; Name of bucket
4653
-
4654
- Returns:
4655
- bool; True if bucket was deleted, False if it was deleted externally.
4656
- """
4657
- logger.debug(f'_delete_oci_bucket: {bucket_name}')
3992
+ f'Failed to create bucket: '
3993
+ f'{bucket_name}') from e
4658
3994
 
4659
- @oci.with_oci_env
4660
- def get_bucket_delete_command(bucket_name):
4661
- remove_command = (f'oci os bucket delete --bucket-name '
4662
- f'--region {self.region} '
4663
- f'{bucket_name} --empty --force')
3995
+ s3_bucket_exists_waiter = self.client.get_waiter('bucket_exists')
3996
+ s3_bucket_exists_waiter.wait(Bucket=bucket_name)
4664
3997
 
4665
- return remove_command
3998
+ return self.bucket
4666
3999
 
4667
- remove_command = get_bucket_delete_command(bucket_name)
4000
+ def _delete_cos_bucket_objects(self,
4001
+ bucket: Any,
4002
+ prefix: Optional[str] = None) -> None:
4003
+ bucket_versioning = self.s3_resource.BucketVersioning(bucket.name)
4004
+ if bucket_versioning.status == 'Enabled':
4005
+ if prefix is not None:
4006
+ res = list(
4007
+ bucket.object_versions.filter(Prefix=prefix).delete())
4008
+ else:
4009
+ res = list(bucket.object_versions.delete())
4010
+ else:
4011
+ if prefix is not None:
4012
+ res = list(bucket.objects.filter(Prefix=prefix).delete())
4013
+ else:
4014
+ res = list(bucket.objects.delete())
4015
+ logger.debug(f'Deleted bucket\'s content:\n{res}, prefix: {prefix}')
4668
4016
 
4017
+ def _delete_cos_bucket(self) -> None:
4018
+ bucket = self.s3_resource.Bucket(self.name)
4669
4019
  try:
4670
- with rich_utils.safe_status(
4671
- f'[bold cyan]Deleting OCI bucket {bucket_name}[/]'):
4672
- subprocess.check_output(remove_command.split(' '),
4673
- stderr=subprocess.STDOUT)
4674
- except subprocess.CalledProcessError as e:
4675
- if 'BucketNotFound' in e.output.decode('utf-8'):
4676
- logger.debug(
4677
- _BUCKET_EXTERNALLY_DELETED_DEBUG_MESSAGE.format(
4678
- bucket_name=bucket_name))
4679
- return False
4680
- else:
4681
- logger.error(e.output)
4682
- with ux_utils.print_exception_no_traceback():
4683
- raise exceptions.StorageBucketDeleteError(
4684
- f'Failed to delete OCI bucket {bucket_name}.')
4685
- return True
4020
+ self._delete_cos_bucket_objects(bucket)
4021
+ bucket.delete()
4022
+ bucket.wait_until_not_exists()
4023
+ except ibm.ibm_botocore.exceptions.ClientError as e:
4024
+ if e.__class__.__name__ == 'NoSuchBucket':
4025
+ logger.debug('bucket already removed')
4026
+ data_utils.Rclone.delete_rclone_bucket_profile(
4027
+ self.name, data_utils.Rclone.RcloneStores.IBM)
4686
4028
 
4687
4029
 
4688
- class NebiusStore(AbstractStore):
4689
- """NebiusStore inherits from Storage Object and represents the backend
4690
- for S3 buckets.
4030
+ class OciStore(AbstractStore):
4031
+ """OciStore inherits from Storage Object and represents the backend
4032
+ for OCI buckets.
4691
4033
  """
4692
4034
 
4693
- _ACCESS_DENIED_MESSAGE = 'Access Denied'
4694
- _TIMEOUT_TO_PROPAGATES = 20
4035
+ _ACCESS_DENIED_MESSAGE = 'AccessDeniedException'
4695
4036
 
4696
4037
  def __init__(self,
4697
4038
  name: str,
4698
- source: str,
4039
+ source: Optional[SourceType],
4699
4040
  region: Optional[str] = None,
4700
4041
  is_sky_managed: Optional[bool] = None,
4701
- sync_on_reconstruction: bool = True,
4042
+ sync_on_reconstruction: Optional[bool] = True,
4702
4043
  _bucket_sub_path: Optional[str] = None):
4703
- self.client: 'mypy_boto3_s3.Client'
4704
- self.bucket: 'StorageHandle'
4044
+ self.client: Any
4045
+ self.bucket: StorageHandle
4046
+ self.oci_config_file: str
4047
+ self.config_profile: str
4048
+ self.compartment: str
4049
+ self.namespace: str
4050
+
4051
+ # Region is from the specified name in <bucket>@<region> format.
4052
+ # Another case is name can also be set by the source, for example:
4053
+ # /datasets-storage:
4054
+ # source: oci://RAGData@us-sanjose-1
4055
+ # The name in above mount will be set to RAGData@us-sanjose-1
4056
+ region_in_name = None
4057
+ if name is not None and '@' in name:
4058
+ self._validate_bucket_expr(name)
4059
+ name, region_in_name = name.split('@')
4060
+
4061
+ # Region is from the specified source in oci://<bucket>@<region> format
4062
+ region_in_source = None
4063
+ if isinstance(source,
4064
+ str) and source.startswith('oci://') and '@' in source:
4065
+ self._validate_bucket_expr(source)
4066
+ source, region_in_source = source.split('@')
4067
+
4068
+ if region_in_name is not None and region_in_source is not None:
4069
+ # This should never happen because name and source will never be
4070
+ # the remote bucket at the same time.
4071
+ assert region_in_name == region_in_source, (
4072
+ f'Mismatch region specified. Region in name {region_in_name}, '
4073
+ f'but region in source is {region_in_source}')
4074
+
4075
+ if region_in_name is not None:
4076
+ region = region_in_name
4077
+ elif region_in_source is not None:
4078
+ region = region_in_source
4079
+
4080
+ # Default region set to what specified in oci config.
4081
+ if region is None:
4082
+ region = oci.get_oci_config()['region']
4083
+
4084
+ # So far from now on, the name and source are canonical, means there
4085
+ # is no region (@<region> suffix) associated with them anymore.
4086
+
4705
4087
  super().__init__(name, source, region, is_sky_managed,
4706
4088
  sync_on_reconstruction, _bucket_sub_path)
4089
+ # TODO(zpoint): add _bucket_sub_path to the sync/mount/delete commands
4090
+
4091
+ def _validate_bucket_expr(self, bucket_expr: str):
4092
+ pattern = r'^(\w+://)?[A-Za-z0-9-._]+(@\w{2}-\w+-\d{1})$'
4093
+ if not re.match(pattern, bucket_expr):
4094
+ raise ValueError(
4095
+ 'The format for the bucket portion is <bucket>@<region> '
4096
+ 'when specify a region with a bucket.')
4707
4097
 
4708
4098
  def _validate(self):
4709
4099
  if self.source is not None and isinstance(self.source, str):
4710
- if self.source.startswith('s3://'):
4711
- assert self.name == data_utils.split_s3_path(self.source)[0], (
4712
- 'S3 Bucket is specified as path, the name should be the'
4713
- ' same as S3 bucket.')
4714
- elif self.source.startswith('gs://'):
4715
- assert self.name == data_utils.split_gcs_path(self.source)[0], (
4716
- 'GCS Bucket is specified as path, the name should be '
4717
- 'the same as GCS bucket.')
4718
- assert data_utils.verify_gcs_bucket(self.name), (
4719
- f'Source specified as {self.source}, a GCS bucket. ',
4720
- 'GCS Bucket should exist.')
4721
- elif data_utils.is_az_container_endpoint(self.source):
4722
- storage_account_name, container_name, _ = (
4723
- data_utils.split_az_path(self.source))
4724
- assert self.name == container_name, (
4725
- 'Azure bucket is specified as path, the name should be '
4726
- 'the same as Azure bucket.')
4727
- assert data_utils.verify_az_bucket(
4728
- storage_account_name, self.name), (
4729
- f'Source specified as {self.source}, an Azure bucket. '
4730
- 'Azure bucket should exist.')
4731
- elif self.source.startswith('r2://'):
4732
- assert self.name == data_utils.split_r2_path(self.source)[0], (
4733
- 'R2 Bucket is specified as path, the name should be '
4734
- 'the same as R2 bucket.')
4735
- assert data_utils.verify_r2_bucket(self.name), (
4736
- f'Source specified as {self.source}, a R2 bucket. ',
4737
- 'R2 Bucket should exist.')
4738
- elif self.source.startswith('nebius://'):
4739
- assert self.name == data_utils.split_nebius_path(
4740
- self.source)[0], (
4741
- 'Nebius Object Storage is specified as path, the name '
4742
- 'should be the same as Nebius Object Storage bucket.')
4743
- elif self.source.startswith('cos://'):
4744
- assert self.name == data_utils.split_cos_path(self.source)[0], (
4745
- 'COS Bucket is specified as path, the name should be '
4746
- 'the same as COS bucket.')
4747
- assert data_utils.verify_ibm_cos_bucket(self.name), (
4748
- f'Source specified as {self.source}, a COS bucket. ',
4749
- 'COS Bucket should exist.')
4750
- elif self.source.startswith('oci://'):
4100
+ if self.source.startswith('oci://'):
4101
+ assert self.name == data_utils.split_oci_path(self.source)[0], (
4102
+ 'OCI Bucket is specified as path, the name should be '
4103
+ 'the same as OCI bucket.')
4104
+ elif not re.search(r'^\w+://', self.source):
4105
+ # Treat it as local path.
4106
+ pass
4107
+ else:
4751
4108
  raise NotImplementedError(
4752
- 'Moving data from OCI to S3 is currently not supported.')
4753
- # Validate name
4754
- self.name = S3Store.validate_name(self.name)
4109
+ f'Moving data from {self.source} to OCI is not supported.')
4755
4110
 
4111
+ # Validate name
4112
+ self.name = self.validate_name(self.name)
4756
4113
  # Check if the storage is enabled
4757
- if not _is_storage_cloud_enabled(str(clouds.Nebius())):
4114
+ if not _is_storage_cloud_enabled(str(clouds.OCI())):
4758
4115
  with ux_utils.print_exception_no_traceback():
4759
- raise exceptions.ResourcesUnavailableError((
4760
- 'Storage \'store: nebius\' specified, but '
4761
- 'Nebius access is disabled. To fix, enable '
4762
- 'Nebius by running `sky check`. More info: '
4763
- 'https://docs.skypilot.co/en/latest/getting-started/installation.html.' # pylint: disable=line-too-long
4764
- ))
4116
+ raise exceptions.ResourcesUnavailableError(
4117
+ 'Storage \'store: oci\' specified, but ' \
4118
+ 'OCI access is disabled. To fix, enable '\
4119
+ 'OCI by running `sky check`. '\
4120
+ 'More info: https://skypilot.readthedocs.io/en/latest/getting-started/installation.html.' # pylint: disable=line-too-long
4121
+ )
4122
+
4123
+ @classmethod
4124
+ def validate_name(cls, name) -> str:
4125
+ """Validates the name of the OCI store.
4126
+
4127
+ Source for rules: https://docs.oracle.com/en-us/iaas/Content/Object/Tasks/managingbuckets.htm#Managing_Buckets # pylint: disable=line-too-long
4128
+ """
4129
+
4130
+ def _raise_no_traceback_name_error(err_str):
4131
+ with ux_utils.print_exception_no_traceback():
4132
+ raise exceptions.StorageNameError(err_str)
4133
+
4134
+ if name is not None and isinstance(name, str):
4135
+ # Check for overall length
4136
+ if not 1 <= len(name) <= 256:
4137
+ _raise_no_traceback_name_error(
4138
+ f'Invalid store name: name {name} must contain 1-256 '
4139
+ 'characters.')
4140
+
4141
+ # Check for valid characters and start/end with a number or letter
4142
+ pattern = r'^[A-Za-z0-9-._]+$'
4143
+ if not re.match(pattern, name):
4144
+ _raise_no_traceback_name_error(
4145
+ f'Invalid store name: name {name} can only contain '
4146
+ 'upper or lower case letters, numeric characters, hyphens '
4147
+ '(-), underscores (_), and dots (.). Spaces are not '
4148
+ 'allowed. Names must start and end with a number or '
4149
+ 'letter.')
4150
+ else:
4151
+ _raise_no_traceback_name_error('Store name must be specified.')
4152
+ return name
4765
4153
 
4766
4154
  def initialize(self):
4767
- """Initializes the Nebius Object Storage on the cloud.
4155
+ """Initializes the OCI store object on the cloud.
4768
4156
 
4769
4157
  Initialization involves fetching bucket if exists, or creating it if
4770
4158
  it does not.
@@ -4774,7 +4162,21 @@ class NebiusStore(AbstractStore):
4774
4162
  StorageBucketGetError: If fetching existing bucket fails
4775
4163
  StorageInitError: If general initialization fails.
4776
4164
  """
4777
- self.client = data_utils.create_nebius_client()
4165
+ # pylint: disable=import-outside-toplevel
4166
+ from sky.clouds.utils import oci_utils
4167
+ from sky.provision.oci.query_utils import query_helper
4168
+
4169
+ self.oci_config_file = oci.get_config_file()
4170
+ self.config_profile = oci_utils.oci_config.get_profile()
4171
+
4172
+ ## pylint: disable=line-too-long
4173
+ # What's compartment? See thttps://docs.oracle.com/en/cloud/foundation/cloud_architecture/governance/compartments.html
4174
+ self.compartment = query_helper.find_compartment(self.region)
4175
+ self.client = oci.get_object_storage_client(region=self.region,
4176
+ profile=self.config_profile)
4177
+ self.namespace = self.client.get_namespace(
4178
+ compartment_id=oci.get_oci_config()['tenancy']).data
4179
+
4778
4180
  self.bucket, is_new_bucket = self._get_bucket()
4779
4181
  if self.is_sky_managed is None:
4780
4182
  # If is_sky_managed is not specified, then this is a new storage
@@ -4794,20 +4196,12 @@ class NebiusStore(AbstractStore):
4794
4196
  """
4795
4197
  try:
4796
4198
  if isinstance(self.source, list):
4797
- self.batch_aws_rsync(self.source, create_dirs=True)
4199
+ self.batch_oci_rsync(self.source, create_dirs=True)
4798
4200
  elif self.source is not None:
4799
- if self.source.startswith('nebius://'):
4201
+ if self.source.startswith('oci://'):
4800
4202
  pass
4801
- elif self.source.startswith('s3://'):
4802
- self._transfer_to_nebius()
4803
- elif self.source.startswith('gs://'):
4804
- self._transfer_to_nebius()
4805
- elif self.source.startswith('r2://'):
4806
- self._transfer_to_nebius()
4807
- elif self.source.startswith('oci://'):
4808
- self._transfer_to_nebius()
4809
4203
  else:
4810
- self.batch_aws_rsync([self.source])
4204
+ self.batch_oci_rsync([self.source])
4811
4205
  except exceptions.StorageUploadError:
4812
4206
  raise
4813
4207
  except Exception as e:
@@ -4815,45 +4209,25 @@ class NebiusStore(AbstractStore):
4815
4209
  f'Upload failed for store {self.name}') from e
4816
4210
 
4817
4211
  def delete(self) -> None:
4818
- if self._bucket_sub_path is not None and not self.is_sky_managed:
4819
- return self._delete_sub_path()
4820
-
4821
- deleted_by_skypilot = self._delete_nebius_bucket(self.name)
4212
+ deleted_by_skypilot = self._delete_oci_bucket(self.name)
4822
4213
  if deleted_by_skypilot:
4823
- msg_str = f'Deleted Nebius bucket {self.name}.'
4214
+ msg_str = f'Deleted OCI bucket {self.name}.'
4824
4215
  else:
4825
- msg_str = (f'Nebius bucket {self.name} may have been deleted '
4216
+ msg_str = (f'OCI bucket {self.name} may have been deleted '
4826
4217
  f'externally. Removing from local state.')
4827
4218
  logger.info(f'{colorama.Fore.GREEN}{msg_str}'
4828
4219
  f'{colorama.Style.RESET_ALL}')
4829
4220
 
4830
- def _delete_sub_path(self) -> None:
4831
- assert self._bucket_sub_path is not None, 'bucket_sub_path is not set'
4832
- deleted_by_skypilot = self._delete_nebius_bucket_sub_path(
4833
- self.name, self._bucket_sub_path)
4834
- if deleted_by_skypilot:
4835
- msg_str = (f'Removed objects from S3 bucket '
4836
- f'{self.name}/{self._bucket_sub_path}.')
4837
- else:
4838
- msg_str = (f'Failed to remove objects from S3 bucket '
4839
- f'{self.name}/{self._bucket_sub_path}.')
4840
- logger.info(f'{colorama.Fore.GREEN}{msg_str}'
4841
- f'{colorama.Style.RESET_ALL}')
4842
-
4843
4221
  def get_handle(self) -> StorageHandle:
4844
- return nebius.resource('s3').Bucket(self.name)
4222
+ return self.client.get_bucket(namespace_name=self.namespace,
4223
+ bucket_name=self.name).data
4845
4224
 
4846
- def batch_aws_rsync(self,
4225
+ def batch_oci_rsync(self,
4847
4226
  source_path_list: List[Path],
4848
4227
  create_dirs: bool = False) -> None:
4849
- """Invokes aws s3 sync to batch upload a list of local paths to S3
4850
-
4851
- AWS Sync by default uses 10 threads to upload files to the bucket. To
4852
- increase parallelism, modify max_concurrent_requests in your aws config
4853
- file (Default path: ~/.aws/config).
4228
+ """Invokes oci sync to batch upload a list of local paths to Bucket
4854
4229
 
4855
- Since aws s3 sync does not support batch operations, we construct
4856
- multiple commands to be run in parallel.
4230
+ Use OCI bulk operation to batch process the file upload
4857
4231
 
4858
4232
  Args:
4859
4233
  source_path_list: List of paths to local files or directories
@@ -4863,34 +4237,45 @@ class NebiusStore(AbstractStore):
4863
4237
  set to True, the directory is created in the bucket root and
4864
4238
  contents are uploaded to it.
4865
4239
  """
4866
- sub_path = (f'/{self._bucket_sub_path}'
4240
+ sub_path = (f'{self._bucket_sub_path}/'
4867
4241
  if self._bucket_sub_path else '')
4868
4242
 
4243
+ @oci.with_oci_env
4869
4244
  def get_file_sync_command(base_dir_path, file_names):
4870
- includes = ' '.join([
4871
- f'--include {shlex.quote(file_name)}'
4872
- for file_name in file_names
4873
- ])
4874
- base_dir_path = shlex.quote(base_dir_path)
4875
- sync_command = ('aws s3 sync --no-follow-symlinks --exclude="*" '
4876
- f'{includes} {base_dir_path} '
4877
- f's3://{self.name}{sub_path} '
4878
- f'--profile={nebius.NEBIUS_PROFILE_NAME}')
4245
+ includes = ' '.join(
4246
+ [f'--include "{file_name}"' for file_name in file_names])
4247
+ prefix_arg = ''
4248
+ if sub_path:
4249
+ prefix_arg = f'--object-prefix "{sub_path.strip("/")}"'
4250
+ sync_command = (
4251
+ 'oci os object bulk-upload --no-follow-symlinks --overwrite '
4252
+ f'--bucket-name {self.name} --namespace-name {self.namespace} '
4253
+ f'--region {self.region} --src-dir "{base_dir_path}" '
4254
+ f'{prefix_arg} '
4255
+ f'{includes}')
4256
+
4879
4257
  return sync_command
4880
4258
 
4259
+ @oci.with_oci_env
4881
4260
  def get_dir_sync_command(src_dir_path, dest_dir_name):
4882
- # we exclude .git directory from the sync
4261
+ if dest_dir_name and not str(dest_dir_name).endswith('/'):
4262
+ dest_dir_name = f'{dest_dir_name}/'
4263
+
4883
4264
  excluded_list = storage_utils.get_excluded_files(src_dir_path)
4884
4265
  excluded_list.append('.git/*')
4885
4266
  excludes = ' '.join([
4886
4267
  f'--exclude {shlex.quote(file_name)}'
4887
4268
  for file_name in excluded_list
4888
4269
  ])
4889
- src_dir_path = shlex.quote(src_dir_path)
4890
- sync_command = (f'aws s3 sync --no-follow-symlinks {excludes} '
4891
- f'{src_dir_path} '
4892
- f's3://{self.name}{sub_path}/{dest_dir_name} '
4893
- f'--profile={nebius.NEBIUS_PROFILE_NAME}')
4270
+
4271
+ # we exclude .git directory from the sync
4272
+ sync_command = (
4273
+ 'oci os object bulk-upload --no-follow-symlinks --overwrite '
4274
+ f'--bucket-name {self.name} --namespace-name {self.namespace} '
4275
+ f'--region {self.region} '
4276
+ f'--object-prefix "{sub_path}{dest_dir_name}" '
4277
+ f'--src-dir "{src_dir_path}" {excludes}')
4278
+
4894
4279
  return sync_command
4895
4280
 
4896
4281
  # Generate message for upload
@@ -4901,210 +4286,469 @@ class NebiusStore(AbstractStore):
4901
4286
 
4902
4287
  log_path = sky_logging.generate_tmp_logging_file_path(
4903
4288
  _STORAGE_LOG_FILE_NAME)
4904
- sync_path = f'{source_message} -> nebius://{self.name}{sub_path}/'
4289
+ sync_path = f'{source_message} -> oci://{self.name}/{sub_path}'
4905
4290
  with rich_utils.safe_status(
4906
4291
  ux_utils.spinner_message(f'Syncing {sync_path}',
4907
4292
  log_path=log_path)):
4908
4293
  data_utils.parallel_upload(
4909
- source_path_list,
4910
- get_file_sync_command,
4911
- get_dir_sync_command,
4912
- log_path,
4913
- self.name,
4914
- self._ACCESS_DENIED_MESSAGE,
4294
+ source_path_list=source_path_list,
4295
+ filesync_command_generator=get_file_sync_command,
4296
+ dirsync_command_generator=get_dir_sync_command,
4297
+ log_path=log_path,
4298
+ bucket_name=self.name,
4299
+ access_denied_message=self._ACCESS_DENIED_MESSAGE,
4915
4300
  create_dirs=create_dirs,
4916
- max_concurrent_uploads=_MAX_CONCURRENT_UPLOADS)
4917
- logger.info(
4918
- ux_utils.finishing_message(f'Storage synced: {sync_path}',
4919
- log_path))
4301
+ max_concurrent_uploads=1)
4920
4302
 
4921
- def _transfer_to_nebius(self) -> None:
4922
- assert isinstance(self.source, str), self.source
4923
- if self.source.startswith('gs://'):
4924
- data_transfer.gcs_to_nebius(self.name, self.name)
4925
- elif self.source.startswith('r2://'):
4926
- data_transfer.r2_to_nebius(self.name, self.name)
4927
- elif self.source.startswith('s3://'):
4928
- data_transfer.s3_to_nebius(self.name, self.name)
4303
+ logger.info(
4304
+ ux_utils.finishing_message(f'Storage synced: {sync_path}',
4305
+ log_path))
4929
4306
 
4930
4307
  def _get_bucket(self) -> Tuple[StorageHandle, bool]:
4931
- """Obtains the S3 bucket.
4308
+ """Obtains the OCI bucket.
4309
+ If the bucket exists, this method will connect to the bucket.
4932
4310
 
4933
- If the bucket exists, this method will return the bucket.
4934
4311
  If the bucket does not exist, there are three cases:
4935
- 1) Raise an error if the bucket source starts with s3://
4312
+ 1) Raise an error if the bucket source starts with oci://
4936
4313
  2) Return None if bucket has been externally deleted and
4937
4314
  sync_on_reconstruction is False
4938
4315
  3) Create and return a new bucket otherwise
4939
4316
 
4317
+ Return tuple (Bucket, Boolean): The first item is the bucket
4318
+ json payload from the OCI API call, the second item indicates
4319
+ if this is a new created bucket(True) or an existing bucket(False).
4320
+
4940
4321
  Raises:
4941
- StorageSpecError: If externally created bucket is attempted to be
4942
- mounted without specifying storage source.
4943
4322
  StorageBucketCreateError: If creating the bucket fails
4944
4323
  StorageBucketGetError: If fetching a bucket fails
4945
- StorageExternalDeletionError: If externally deleted storage is
4946
- attempted to be fetched while reconstructing the storage for
4947
- 'sky storage delete' or 'sky start'
4948
4324
  """
4949
- nebius_s = nebius.resource('s3')
4950
- bucket = nebius_s.Bucket(self.name)
4951
4325
  try:
4952
- # Try Public bucket case.
4953
- # This line does not error out if the bucket is an external public
4954
- # bucket or if it is a user's bucket that is publicly
4955
- # accessible.
4956
- self.client.head_bucket(Bucket=self.name)
4957
- self._validate_existing_bucket()
4326
+ get_bucket_response = self.client.get_bucket(
4327
+ namespace_name=self.namespace, bucket_name=self.name)
4328
+ bucket = get_bucket_response.data
4958
4329
  return bucket, False
4959
- except aws.botocore_exceptions().ClientError as e:
4960
- error_code = e.response['Error']['Code']
4961
- # AccessDenied error for buckets that are private and not owned by
4962
- # user.
4963
- if error_code == '403':
4964
- command = (f'aws s3 ls s3://{self.name} '
4965
- f'--profile={nebius.NEBIUS_PROFILE_NAME}')
4330
+ except oci.service_exception() as e:
4331
+ if e.status == 404: # Not Found
4332
+ if isinstance(self.source,
4333
+ str) and self.source.startswith('oci://'):
4334
+ with ux_utils.print_exception_no_traceback():
4335
+ raise exceptions.StorageBucketGetError(
4336
+ 'Attempted to connect to a non-existent bucket: '
4337
+ f'{self.source}') from e
4338
+ else:
4339
+ # If bucket cannot be found (i.e., does not exist), it is
4340
+ # to be created by Sky. However, creation is skipped if
4341
+ # Store object is being reconstructed for deletion.
4342
+ if self.sync_on_reconstruction:
4343
+ bucket = self._create_oci_bucket(self.name)
4344
+ return bucket, True
4345
+ else:
4346
+ return None, False
4347
+ elif e.status == 401: # Unauthorized
4348
+ # AccessDenied error for buckets that are private and not
4349
+ # owned by user.
4350
+ command = (
4351
+ f'oci os object list --namespace-name {self.namespace} '
4352
+ f'--bucket-name {self.name}')
4966
4353
  with ux_utils.print_exception_no_traceback():
4967
4354
  raise exceptions.StorageBucketGetError(
4968
4355
  _BUCKET_FAIL_TO_CONNECT_MESSAGE.format(name=self.name) +
4969
4356
  f' To debug, consider running `{command}`.') from e
4357
+ else:
4358
+ # Unknown / unexpected error happened. This might happen when
4359
+ # Object storage service itself functions not normal (e.g.
4360
+ # maintainance event causes internal server error or request
4361
+ # timeout, etc).
4362
+ with ux_utils.print_exception_no_traceback():
4363
+ raise exceptions.StorageBucketGetError(
4364
+ f'Failed to connect to OCI bucket {self.name}') from e
4970
4365
 
4971
- if isinstance(self.source, str) and self.source.startswith('nebius://'):
4972
- with ux_utils.print_exception_no_traceback():
4973
- raise exceptions.StorageBucketGetError(
4974
- 'Attempted to use a non-existent bucket as a source: '
4975
- f'{self.source}. Consider using `aws s3 ls '
4976
- f's3://{self.name} '
4977
- f'--profile={nebius.NEBIUS_PROFILE_NAME}` to debug.')
4366
+ def mount_command(self, mount_path: str) -> str:
4367
+ """Returns the command to mount the bucket to the mount_path.
4978
4368
 
4979
- # If bucket cannot be found in both private and public settings,
4980
- # the bucket is to be created by Sky. However, creation is skipped if
4981
- # Store object is being reconstructed for deletion or re-mount with
4982
- # sky start, and error is raised instead.
4983
- if self.sync_on_reconstruction:
4984
- bucket = self._create_nebius_bucket(self.name)
4985
- return bucket, True
4986
- else:
4987
- # Raised when Storage object is reconstructed for sky storage
4988
- # delete or to re-mount Storages with sky start but the storage
4989
- # is already removed externally.
4990
- raise exceptions.StorageExternalDeletionError(
4991
- 'Attempted to fetch a non-existent bucket: '
4992
- f'{self.name}')
4369
+ Uses Rclone to mount the bucket.
4370
+
4371
+ Args:
4372
+ mount_path: str; Path to mount the bucket to.
4373
+ """
4374
+ install_cmd = mounting_utils.get_rclone_install_cmd()
4375
+ mount_cmd = mounting_utils.get_oci_mount_cmd(
4376
+ mount_path=mount_path,
4377
+ store_name=self.name,
4378
+ region=str(self.region),
4379
+ namespace=self.namespace,
4380
+ compartment=self.bucket.compartment_id,
4381
+ config_file=self.oci_config_file,
4382
+ config_profile=self.config_profile)
4383
+ version_check_cmd = mounting_utils.get_rclone_version_check_cmd()
4384
+
4385
+ return mounting_utils.get_mounting_command(mount_path, install_cmd,
4386
+ mount_cmd, version_check_cmd)
4993
4387
 
4994
4388
  def _download_file(self, remote_path: str, local_path: str) -> None:
4995
- """Downloads file from remote to local on s3 bucket
4996
- using the boto3 API
4389
+ """Downloads file from remote to local on OCI bucket
4997
4390
 
4998
4391
  Args:
4999
- remote_path: str; Remote path on S3 bucket
4392
+ remote_path: str; Remote path on OCI bucket
5000
4393
  local_path: str; Local path on user's device
5001
4394
  """
5002
- self.bucket.download_file(remote_path, local_path)
4395
+ if remote_path.startswith(f'/{self.name}'):
4396
+ # If the remote path is /bucket_name, we need to
4397
+ # remove the leading /
4398
+ remote_path = remote_path.lstrip('/')
5003
4399
 
5004
- def mount_command(self, mount_path: str) -> str:
5005
- """Returns the command to mount the bucket to the mount_path.
4400
+ filename = os.path.basename(remote_path)
4401
+ if not local_path.endswith(filename):
4402
+ local_path = os.path.join(local_path, filename)
4403
+
4404
+ @oci.with_oci_env
4405
+ def get_file_download_command(remote_path, local_path):
4406
+ download_command = (f'oci os object get --bucket-name {self.name} '
4407
+ f'--namespace-name {self.namespace} '
4408
+ f'--region {self.region} --name {remote_path} '
4409
+ f'--file {local_path}')
5006
4410
 
5007
- Uses goofys to mount the bucket.
4411
+ return download_command
5008
4412
 
5009
- Args:
5010
- mount_path: str; Path to mount the bucket to.
5011
- """
5012
- install_cmd = mounting_utils.get_s3_mount_install_cmd()
5013
- nebius_profile_name = nebius.NEBIUS_PROFILE_NAME
5014
- endpoint_url = self.client.meta.endpoint_url
5015
- mount_cmd = mounting_utils.get_nebius_mount_cmd(nebius_profile_name,
5016
- self.bucket.name,
5017
- endpoint_url,
5018
- mount_path,
5019
- self._bucket_sub_path)
5020
- return mounting_utils.get_mounting_command(mount_path, install_cmd,
5021
- mount_cmd)
4413
+ download_command = get_file_download_command(remote_path, local_path)
4414
+
4415
+ try:
4416
+ with rich_utils.safe_status(
4417
+ f'[bold cyan]Downloading: {remote_path} -> {local_path}[/]'
4418
+ ):
4419
+ subprocess.check_output(download_command,
4420
+ stderr=subprocess.STDOUT,
4421
+ shell=True)
4422
+ except subprocess.CalledProcessError as e:
4423
+ logger.error(f'Download failed: {remote_path} -> {local_path}.\n'
4424
+ f'Detail errors: {e.output}')
4425
+ with ux_utils.print_exception_no_traceback():
4426
+ raise exceptions.StorageBucketDeleteError(
4427
+ f'Failed download file {self.name}:{remote_path}.') from e
5022
4428
 
5023
- def _create_nebius_bucket(self, bucket_name: str) -> StorageHandle:
5024
- """Creates S3 bucket with specific name
4429
+ def _create_oci_bucket(self, bucket_name: str) -> StorageHandle:
4430
+ """Creates OCI bucket with specific name in specific region
5025
4431
 
5026
4432
  Args:
5027
4433
  bucket_name: str; Name of bucket
5028
- Raises:
5029
- StorageBucketCreateError: If bucket creation fails.
4434
+ region: str; Region name, e.g. us-central1, us-west1
5030
4435
  """
5031
- nebius_client = self.client
4436
+ logger.debug(f'_create_oci_bucket: {bucket_name}')
5032
4437
  try:
5033
- nebius_client.create_bucket(Bucket=bucket_name)
5034
- except aws.botocore_exceptions().ClientError as e:
4438
+ create_bucket_response = self.client.create_bucket(
4439
+ namespace_name=self.namespace,
4440
+ create_bucket_details=oci.oci.object_storage.models.
4441
+ CreateBucketDetails(
4442
+ name=bucket_name,
4443
+ compartment_id=self.compartment,
4444
+ ))
4445
+ bucket = create_bucket_response.data
4446
+ return bucket
4447
+ except oci.service_exception() as e:
5035
4448
  with ux_utils.print_exception_no_traceback():
5036
4449
  raise exceptions.StorageBucketCreateError(
5037
- f'Attempted to create a bucket '
5038
- f'{self.name} but failed.') from e
5039
- return nebius.resource('s3').Bucket(bucket_name)
4450
+ f'Failed to create OCI bucket: {self.name}') from e
4451
+
4452
+ def _delete_oci_bucket(self, bucket_name: str) -> bool:
4453
+ """Deletes OCI bucket, including all objects in bucket
4454
+
4455
+ Args:
4456
+ bucket_name: str; Name of bucket
4457
+
4458
+ Returns:
4459
+ bool; True if bucket was deleted, False if it was deleted externally.
4460
+ """
4461
+ logger.debug(f'_delete_oci_bucket: {bucket_name}')
4462
+
4463
+ @oci.with_oci_env
4464
+ def get_bucket_delete_command(bucket_name):
4465
+ remove_command = (f'oci os bucket delete --bucket-name '
4466
+ f'--region {self.region} '
4467
+ f'{bucket_name} --empty --force')
4468
+
4469
+ return remove_command
4470
+
4471
+ remove_command = get_bucket_delete_command(bucket_name)
5040
4472
 
5041
- def _execute_nebius_remove_command(self, command: str, bucket_name: str,
5042
- hint_operating: str,
5043
- hint_failed: str) -> bool:
5044
4473
  try:
5045
4474
  with rich_utils.safe_status(
5046
- ux_utils.spinner_message(hint_operating)):
5047
- subprocess.check_output(command.split(' '),
4475
+ f'[bold cyan]Deleting OCI bucket {bucket_name}[/]'):
4476
+ subprocess.check_output(remove_command.split(' '),
5048
4477
  stderr=subprocess.STDOUT)
5049
4478
  except subprocess.CalledProcessError as e:
5050
- if 'NoSuchBucket' in e.output.decode('utf-8'):
4479
+ if 'BucketNotFound' in e.output.decode('utf-8'):
5051
4480
  logger.debug(
5052
4481
  _BUCKET_EXTERNALLY_DELETED_DEBUG_MESSAGE.format(
5053
4482
  bucket_name=bucket_name))
5054
4483
  return False
5055
4484
  else:
4485
+ logger.error(e.output)
5056
4486
  with ux_utils.print_exception_no_traceback():
5057
4487
  raise exceptions.StorageBucketDeleteError(
5058
- f'{hint_failed}'
5059
- f'Detailed error: {e.output}')
4488
+ f'Failed to delete OCI bucket {bucket_name}.')
5060
4489
  return True
5061
4490
 
5062
- def _delete_nebius_bucket(self, bucket_name: str) -> bool:
5063
- """Deletes S3 bucket, including all objects in bucket
5064
4491
 
5065
- Args:
5066
- bucket_name: str; Name of bucket
4492
+ @register_s3_compatible_store
4493
+ class S3Store(S3CompatibleStore):
4494
+ """S3Store inherits from S3CompatibleStore and represents the backend
4495
+ for S3 buckets.
4496
+ """
5067
4497
 
5068
- Returns:
5069
- bool; True if bucket was deleted, False if it was deleted externally.
4498
+ _DEFAULT_REGION = 'us-east-1'
4499
+ _CUSTOM_ENDPOINT_REGIONS = [
4500
+ 'ap-east-1', 'me-south-1', 'af-south-1', 'eu-south-1', 'eu-south-2',
4501
+ 'ap-south-2', 'ap-southeast-3', 'ap-southeast-4', 'me-central-1',
4502
+ 'il-central-1'
4503
+ ]
5070
4504
 
5071
- Raises:
5072
- StorageBucketDeleteError: If deleting the bucket fails.
5073
- """
5074
- # Deleting objects is very slow programatically
5075
- # (i.e. bucket.objects.all().delete() is slow).
5076
- # In addition, standard delete operations (i.e. via `aws s3 rm`)
5077
- # are slow, since AWS puts deletion markers.
5078
- # https://stackoverflow.com/questions/49239351/why-is-it-so-much-slower-to-delete-objects-in-aws-s3-than-it-is-to-create-them
5079
- # The fastest way to delete is to run `aws s3 rb --force`,
5080
- # which removes the bucket by force.
5081
- remove_command = (f'aws s3 rb s3://{bucket_name} --force '
5082
- f'--profile={nebius.NEBIUS_PROFILE_NAME}')
5083
-
5084
- success = self._execute_nebius_remove_command(
5085
- remove_command, bucket_name,
5086
- f'Deleting Nebius bucket {bucket_name}',
5087
- f'Failed to delete Nebius bucket {bucket_name}.')
5088
- if not success:
5089
- return False
4505
+ def __init__(self,
4506
+ name: str,
4507
+ source: str,
4508
+ region: Optional[str] = None,
4509
+ is_sky_managed: Optional[bool] = None,
4510
+ sync_on_reconstruction: bool = True,
4511
+ _bucket_sub_path: Optional[str] = None):
4512
+ # TODO(romilb): This is purely a stopgap fix for
4513
+ # https://github.com/skypilot-org/skypilot/issues/3405
4514
+ # We should eventually make all opt-in regions also work for S3 by
4515
+ # passing the right endpoint flags.
4516
+ if region in self._CUSTOM_ENDPOINT_REGIONS:
4517
+ logger.warning('AWS opt-in regions are not supported for S3. '
4518
+ f'Falling back to default region '
4519
+ f'{self._DEFAULT_REGION} for bucket {name!r}.')
4520
+ region = self._DEFAULT_REGION
4521
+ super().__init__(name, source, region, is_sky_managed,
4522
+ sync_on_reconstruction, _bucket_sub_path)
5090
4523
 
5091
- # Wait until bucket deletion propagates on Nebius servers
5092
- start_time = time.time()
5093
- while data_utils.verify_nebius_bucket(bucket_name):
5094
- if time.time() - start_time > self._TIMEOUT_TO_PROPAGATES:
5095
- raise TimeoutError(
5096
- f'Timeout while verifying {bucket_name} Nebius bucket.')
5097
- time.sleep(0.1)
5098
- return True
4524
+ @classmethod
4525
+ def get_config(cls) -> S3CompatibleConfig:
4526
+ """Return the configuration for AWS S3."""
4527
+ return S3CompatibleConfig(
4528
+ store_type='S3',
4529
+ url_prefix='s3://',
4530
+ client_factory=data_utils.create_s3_client,
4531
+ resource_factory=lambda name: aws.resource('s3').Bucket(name),
4532
+ split_path=data_utils.split_s3_path,
4533
+ verify_bucket=data_utils.verify_s3_bucket,
4534
+ cloud_name=str(clouds.AWS()),
4535
+ default_region=cls._DEFAULT_REGION,
4536
+ mount_cmd_factory=mounting_utils.get_s3_mount_cmd,
4537
+ )
4538
+
4539
+ def mount_cached_command(self, mount_path: str) -> str:
4540
+ install_cmd = mounting_utils.get_rclone_install_cmd()
4541
+ rclone_profile_name = (
4542
+ data_utils.Rclone.RcloneStores.S3.get_profile_name(self.name))
4543
+ rclone_config = data_utils.Rclone.RcloneStores.S3.get_config(
4544
+ rclone_profile_name=rclone_profile_name)
4545
+ mount_cached_cmd = mounting_utils.get_mount_cached_cmd(
4546
+ rclone_config, rclone_profile_name, self.bucket.name, mount_path)
4547
+ return mounting_utils.get_mounting_command(mount_path, install_cmd,
4548
+ mount_cached_cmd)
4549
+
4550
+
4551
+ @register_s3_compatible_store
4552
+ class R2Store(S3CompatibleStore):
4553
+ """R2Store inherits from S3CompatibleStore and represents the backend
4554
+ for R2 buckets.
4555
+ """
4556
+
4557
+ def __init__(self,
4558
+ name: str,
4559
+ source: str,
4560
+ region: Optional[str] = 'auto',
4561
+ is_sky_managed: Optional[bool] = None,
4562
+ sync_on_reconstruction: bool = True,
4563
+ _bucket_sub_path: Optional[str] = None):
4564
+ super().__init__(name, source, region, is_sky_managed,
4565
+ sync_on_reconstruction, _bucket_sub_path)
4566
+
4567
+ @classmethod
4568
+ def get_config(cls) -> S3CompatibleConfig:
4569
+ """Return the configuration for Cloudflare R2."""
4570
+ return S3CompatibleConfig(
4571
+ store_type='R2',
4572
+ url_prefix='r2://',
4573
+ client_factory=lambda region: data_utils.create_r2_client(region or
4574
+ 'auto'),
4575
+ resource_factory=lambda name: cloudflare.resource('s3').Bucket(name
4576
+ ),
4577
+ split_path=data_utils.split_r2_path,
4578
+ verify_bucket=data_utils.verify_r2_bucket,
4579
+ credentials_file=cloudflare.R2_CREDENTIALS_PATH,
4580
+ aws_profile=cloudflare.R2_PROFILE_NAME,
4581
+ get_endpoint_url=lambda: cloudflare.create_endpoint(), # pylint: disable=unnecessary-lambda
4582
+ extra_cli_args=['--checksum-algorithm', 'CRC32'], # R2 specific
4583
+ cloud_name=cloudflare.NAME,
4584
+ default_region='auto',
4585
+ mount_cmd_factory=cls._get_r2_mount_cmd,
4586
+ )
4587
+
4588
+ @classmethod
4589
+ def _get_r2_mount_cmd(cls, bucket_name: str, mount_path: str,
4590
+ bucket_sub_path: Optional[str]) -> str:
4591
+ """Factory method for R2 mount command."""
4592
+ endpoint_url = cloudflare.create_endpoint()
4593
+ return mounting_utils.get_r2_mount_cmd(cloudflare.R2_CREDENTIALS_PATH,
4594
+ cloudflare.R2_PROFILE_NAME,
4595
+ endpoint_url, bucket_name,
4596
+ mount_path, bucket_sub_path)
4597
+
4598
+ def mount_cached_command(self, mount_path: str) -> str:
4599
+ """R2-specific cached mount implementation using rclone."""
4600
+ install_cmd = mounting_utils.get_rclone_install_cmd()
4601
+ rclone_profile_name = (
4602
+ data_utils.Rclone.RcloneStores.R2.get_profile_name(self.name))
4603
+ rclone_config = data_utils.Rclone.RcloneStores.R2.get_config(
4604
+ rclone_profile_name=rclone_profile_name)
4605
+ mount_cached_cmd = mounting_utils.get_mount_cached_cmd(
4606
+ rclone_config, rclone_profile_name, self.bucket.name, mount_path)
4607
+ return mounting_utils.get_mounting_command(mount_path, install_cmd,
4608
+ mount_cached_cmd)
4609
+
4610
+
4611
+ @register_s3_compatible_store
4612
+ class NebiusStore(S3CompatibleStore):
4613
+ """NebiusStore inherits from S3CompatibleStore and represents the backend
4614
+ for Nebius Object Storage buckets.
4615
+ """
4616
+
4617
+ @classmethod
4618
+ def get_config(cls) -> S3CompatibleConfig:
4619
+ """Return the configuration for Nebius Object Storage."""
4620
+ return S3CompatibleConfig(
4621
+ store_type='NEBIUS',
4622
+ url_prefix='nebius://',
4623
+ client_factory=lambda region: data_utils.create_nebius_client(),
4624
+ resource_factory=lambda name: nebius.resource('s3').Bucket(name),
4625
+ split_path=data_utils.split_nebius_path,
4626
+ verify_bucket=data_utils.verify_nebius_bucket,
4627
+ aws_profile=nebius.NEBIUS_PROFILE_NAME,
4628
+ cloud_name=str(clouds.Nebius()),
4629
+ mount_cmd_factory=cls._get_nebius_mount_cmd,
4630
+ )
4631
+
4632
+ @classmethod
4633
+ def _get_nebius_mount_cmd(cls, bucket_name: str, mount_path: str,
4634
+ bucket_sub_path: Optional[str]) -> str:
4635
+ """Factory method for Nebius mount command."""
4636
+ # We need to get the endpoint URL, but since this is a static method,
4637
+ # we'll need to create a client to get it
4638
+ client = data_utils.create_nebius_client()
4639
+ endpoint_url = client.meta.endpoint_url
4640
+ return mounting_utils.get_nebius_mount_cmd(nebius.NEBIUS_PROFILE_NAME,
4641
+ bucket_name, endpoint_url,
4642
+ mount_path, bucket_sub_path)
4643
+
4644
+ def mount_cached_command(self, mount_path: str) -> str:
4645
+ """Nebius-specific cached mount implementation using rclone."""
4646
+ install_cmd = mounting_utils.get_rclone_install_cmd()
4647
+ rclone_profile_name = (
4648
+ data_utils.Rclone.RcloneStores.NEBIUS.get_profile_name(self.name))
4649
+ rclone_config = data_utils.Rclone.RcloneStores.NEBIUS.get_config(
4650
+ rclone_profile_name=rclone_profile_name)
4651
+ mount_cached_cmd = mounting_utils.get_mount_cached_cmd(
4652
+ rclone_config, rclone_profile_name, self.bucket.name, mount_path)
4653
+ return mounting_utils.get_mounting_command(mount_path, install_cmd,
4654
+ mount_cached_cmd)
4655
+
4656
+
4657
+ @register_s3_compatible_store
4658
+ class CoreWeaveStore(S3CompatibleStore):
4659
+ """CoreWeaveStore inherits from S3CompatibleStore and represents the backend
4660
+ for CoreWeave Object Storage buckets.
4661
+ """
4662
+
4663
+ @classmethod
4664
+ def get_config(cls) -> S3CompatibleConfig:
4665
+ """Return the configuration for CoreWeave Object Storage."""
4666
+ return S3CompatibleConfig(
4667
+ store_type='COREWEAVE',
4668
+ url_prefix='cw://',
4669
+ client_factory=lambda region: data_utils.create_coreweave_client(),
4670
+ resource_factory=lambda name: coreweave.resource('s3').Bucket(name),
4671
+ split_path=data_utils.split_coreweave_path,
4672
+ verify_bucket=data_utils.verify_coreweave_bucket,
4673
+ aws_profile=coreweave.COREWEAVE_PROFILE_NAME,
4674
+ get_endpoint_url=coreweave.get_endpoint,
4675
+ credentials_file=coreweave.COREWEAVE_CREDENTIALS_PATH,
4676
+ config_file=coreweave.COREWEAVE_CONFIG_PATH,
4677
+ cloud_name=coreweave.NAME,
4678
+ default_region=coreweave.DEFAULT_REGION,
4679
+ mount_cmd_factory=cls._get_coreweave_mount_cmd,
4680
+ )
4681
+
4682
+ def _get_bucket(self) -> Tuple[StorageHandle, bool]:
4683
+ """Get or create bucket using CoreWeave's S3 API"""
4684
+ bucket = self.config.resource_factory(self.name)
4685
+
4686
+ # Use our custom bucket verification instead of head_bucket
4687
+ if data_utils.verify_coreweave_bucket(self.name):
4688
+ self._validate_existing_bucket()
4689
+ return bucket, False
4690
+
4691
+ # TODO(hailong): Enable the bucket creation for CoreWeave
4692
+ # Disable this to avoid waiting too long until the following
4693
+ # issue is resolved:
4694
+ # https://github.com/skypilot-org/skypilot/issues/7736
4695
+ raise exceptions.StorageBucketGetError(
4696
+ f'Bucket {self.name!r} does not exist. CoreWeave buckets can take'
4697
+ ' a long time to become accessible after creation, so SkyPilot'
4698
+ ' does not create them automatically. Please create the bucket'
4699
+ ' manually in CoreWeave and wait for it to be accessible before'
4700
+ ' using it.')
4701
+
4702
+ # # Check if this is a source with URL prefix (existing bucket case)
4703
+ # if isinstance(self.source, str) and self.source.startswith(
4704
+ # self.config.url_prefix):
4705
+ # with ux_utils.print_exception_no_traceback():
4706
+ # raise exceptions.StorageBucketGetError(
4707
+ # 'Attempted to use a non-existent bucket as a source: '
4708
+ # f'{self.source}.')
4709
+
4710
+ # # If bucket cannot be found, create it if needed
4711
+ # if self.sync_on_reconstruction:
4712
+ # bucket = self._create_bucket(self.name)
4713
+ # return bucket, True
4714
+ # else:
4715
+ # raise exceptions.StorageExternalDeletionError(
4716
+ # 'Attempted to fetch a non-existent bucket: '
4717
+ # f'{self.name}')
4718
+
4719
+ @classmethod
4720
+ def _get_coreweave_mount_cmd(cls, bucket_name: str, mount_path: str,
4721
+ bucket_sub_path: Optional[str]) -> str:
4722
+ """Factory method for CoreWeave mount command."""
4723
+ endpoint_url = coreweave.get_endpoint()
4724
+ return mounting_utils.get_coreweave_mount_cmd(
4725
+ coreweave.COREWEAVE_CREDENTIALS_PATH,
4726
+ coreweave.COREWEAVE_PROFILE_NAME, bucket_name, endpoint_url,
4727
+ mount_path, bucket_sub_path)
4728
+
4729
+ def mount_cached_command(self, mount_path: str) -> str:
4730
+ """CoreWeave-specific cached mount implementation using rclone."""
4731
+ install_cmd = mounting_utils.get_rclone_install_cmd()
4732
+ rclone_profile_name = (
4733
+ data_utils.Rclone.RcloneStores.COREWEAVE.get_profile_name(
4734
+ self.name))
4735
+ rclone_config = data_utils.Rclone.RcloneStores.COREWEAVE.get_config(
4736
+ rclone_profile_name=rclone_profile_name)
4737
+ mount_cached_cmd = mounting_utils.get_mount_cached_cmd(
4738
+ rclone_config, rclone_profile_name, self.bucket.name, mount_path)
4739
+ return mounting_utils.get_mounting_command(mount_path, install_cmd,
4740
+ mount_cached_cmd)
5099
4741
 
5100
- def _delete_nebius_bucket_sub_path(self, bucket_name: str,
5101
- sub_path: str) -> bool:
5102
- """Deletes the sub path from the bucket."""
5103
- remove_command = (
5104
- f'aws s3 rm s3://{bucket_name}/{sub_path}/ --recursive '
5105
- f'--profile={nebius.NEBIUS_PROFILE_NAME}')
5106
- return self._execute_nebius_remove_command(
5107
- remove_command, bucket_name, f'Removing objects from '
5108
- f'Nebius bucket {bucket_name}/{sub_path}',
5109
- f'Failed to remove objects from '
5110
- f'Nebius bucket {bucket_name}/{sub_path}.')
4742
+ def _create_bucket(self, bucket_name: str) -> StorageHandle:
4743
+ """Create bucket using S3 API with timing handling for CoreWeave."""
4744
+ result = super()._create_bucket(bucket_name)
4745
+ # Ensure bucket is created
4746
+ # The newly created bucket ever takes about 18min to be accessible,
4747
+ # here we just retry for 36 times (5s * 36 = 180s) to avoid waiting
4748
+ # too long
4749
+ # TODO(hailong): Update the logic here when the following
4750
+ # issue is resolved:
4751
+ # https://github.com/skypilot-org/skypilot/issues/7736
4752
+ data_utils.verify_coreweave_bucket(bucket_name, retry=36)
4753
+
4754
+ return result