skypilot-nightly 1.0.0.dev20250905__py3-none-any.whl → 1.0.0.dev20251210__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (429) hide show
  1. sky/__init__.py +12 -2
  2. sky/adaptors/aws.py +27 -22
  3. sky/adaptors/common.py +25 -2
  4. sky/adaptors/coreweave.py +278 -0
  5. sky/adaptors/do.py +8 -2
  6. sky/adaptors/gcp.py +11 -0
  7. sky/adaptors/ibm.py +5 -2
  8. sky/adaptors/kubernetes.py +64 -0
  9. sky/adaptors/nebius.py +3 -1
  10. sky/adaptors/primeintellect.py +1 -0
  11. sky/adaptors/seeweb.py +183 -0
  12. sky/adaptors/shadeform.py +89 -0
  13. sky/adaptors/slurm.py +478 -0
  14. sky/admin_policy.py +20 -0
  15. sky/authentication.py +157 -263
  16. sky/backends/__init__.py +3 -2
  17. sky/backends/backend.py +11 -3
  18. sky/backends/backend_utils.py +630 -185
  19. sky/backends/cloud_vm_ray_backend.py +1111 -928
  20. sky/backends/local_docker_backend.py +9 -5
  21. sky/backends/task_codegen.py +971 -0
  22. sky/backends/wheel_utils.py +18 -0
  23. sky/catalog/__init__.py +8 -3
  24. sky/catalog/aws_catalog.py +4 -0
  25. sky/catalog/common.py +19 -1
  26. sky/catalog/data_fetchers/fetch_aws.py +102 -80
  27. sky/catalog/data_fetchers/fetch_gcp.py +30 -3
  28. sky/catalog/data_fetchers/fetch_nebius.py +9 -6
  29. sky/catalog/data_fetchers/fetch_runpod.py +698 -0
  30. sky/catalog/data_fetchers/fetch_seeweb.py +329 -0
  31. sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
  32. sky/catalog/kubernetes_catalog.py +36 -32
  33. sky/catalog/primeintellect_catalog.py +95 -0
  34. sky/catalog/runpod_catalog.py +5 -1
  35. sky/catalog/seeweb_catalog.py +184 -0
  36. sky/catalog/shadeform_catalog.py +165 -0
  37. sky/catalog/slurm_catalog.py +243 -0
  38. sky/check.py +87 -46
  39. sky/client/cli/command.py +1004 -434
  40. sky/client/cli/flags.py +4 -2
  41. sky/{volumes/utils.py → client/cli/table_utils.py} +111 -13
  42. sky/client/cli/utils.py +79 -0
  43. sky/client/common.py +12 -2
  44. sky/client/sdk.py +188 -65
  45. sky/client/sdk_async.py +34 -33
  46. sky/cloud_stores.py +82 -3
  47. sky/clouds/__init__.py +8 -0
  48. sky/clouds/aws.py +337 -129
  49. sky/clouds/azure.py +24 -18
  50. sky/clouds/cloud.py +47 -13
  51. sky/clouds/cudo.py +16 -13
  52. sky/clouds/do.py +9 -7
  53. sky/clouds/fluidstack.py +12 -5
  54. sky/clouds/gcp.py +14 -7
  55. sky/clouds/hyperbolic.py +12 -5
  56. sky/clouds/ibm.py +12 -5
  57. sky/clouds/kubernetes.py +80 -45
  58. sky/clouds/lambda_cloud.py +12 -5
  59. sky/clouds/nebius.py +23 -9
  60. sky/clouds/oci.py +19 -12
  61. sky/clouds/paperspace.py +4 -1
  62. sky/clouds/primeintellect.py +317 -0
  63. sky/clouds/runpod.py +85 -24
  64. sky/clouds/scp.py +12 -8
  65. sky/clouds/seeweb.py +477 -0
  66. sky/clouds/shadeform.py +400 -0
  67. sky/clouds/slurm.py +578 -0
  68. sky/clouds/ssh.py +6 -3
  69. sky/clouds/utils/scp_utils.py +61 -50
  70. sky/clouds/vast.py +43 -27
  71. sky/clouds/vsphere.py +14 -16
  72. sky/core.py +296 -195
  73. sky/dashboard/out/404.html +1 -1
  74. sky/dashboard/out/_next/static/KYAhEFa3FTfq4JyKVgo-s/_buildManifest.js +1 -0
  75. sky/dashboard/out/_next/static/chunks/1141-9c810f01ff4f398a.js +11 -0
  76. sky/dashboard/out/_next/static/chunks/1871-7e202677c42f43fe.js +6 -0
  77. sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +1 -0
  78. sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +15 -0
  79. sky/dashboard/out/_next/static/chunks/2755.edd818326d489a1d.js +26 -0
  80. sky/dashboard/out/_next/static/chunks/3294.ddda8c6c6f9f24dc.js +1 -0
  81. sky/dashboard/out/_next/static/chunks/3785.7e245f318f9d1121.js +1 -0
  82. sky/dashboard/out/_next/static/chunks/{6601-06114c982db410b6.js → 3800-b589397dc09c5b4e.js} +1 -1
  83. sky/dashboard/out/_next/static/chunks/3850-fd5696f3bbbaddae.js +1 -0
  84. sky/dashboard/out/_next/static/chunks/4725.172ede95d1b21022.js +1 -0
  85. sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +15 -0
  86. sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +13 -0
  87. sky/dashboard/out/_next/static/chunks/6856-da20c5fd999f319c.js +1 -0
  88. sky/dashboard/out/_next/static/chunks/6990-09cbf02d3cd518c3.js +1 -0
  89. sky/dashboard/out/_next/static/chunks/7359-c8d04e06886000b3.js +30 -0
  90. sky/dashboard/out/_next/static/chunks/7615-019513abc55b3b47.js +1 -0
  91. sky/dashboard/out/_next/static/chunks/8640.5b9475a2d18c5416.js +16 -0
  92. sky/dashboard/out/_next/static/chunks/8969-452f9d5cbdd2dc73.js +1 -0
  93. sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +6 -0
  94. sky/dashboard/out/_next/static/chunks/9353-8369df1cf105221c.js +1 -0
  95. sky/dashboard/out/_next/static/chunks/9360.a536cf6b1fa42355.js +31 -0
  96. sky/dashboard/out/_next/static/chunks/9847.3aaca6bb33455140.js +30 -0
  97. sky/dashboard/out/_next/static/chunks/pages/_app-68b647e26f9d2793.js +34 -0
  98. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-33f525539665fdfd.js +16 -0
  99. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-a7565f586ef86467.js +1 -0
  100. sky/dashboard/out/_next/static/chunks/pages/clusters-9e5d47818b9bdadd.js +1 -0
  101. sky/dashboard/out/_next/static/chunks/pages/{config-dfb9bf07b13045f4.js → config-718cdc365de82689.js} +1 -1
  102. sky/dashboard/out/_next/static/chunks/pages/infra/{[context]-6563820e094f68ca.js → [context]-12c559ec4d81fdbd.js} +1 -1
  103. sky/dashboard/out/_next/static/chunks/pages/{infra-aabba60d57826e0f.js → infra-d187cd0413d72475.js} +1 -1
  104. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-895847b6cf200b04.js +16 -0
  105. sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-8d0f4655400b4eb9.js +21 -0
  106. sky/dashboard/out/_next/static/chunks/pages/jobs-e5a98f17f8513a96.js +1 -0
  107. sky/dashboard/out/_next/static/chunks/pages/plugins/[...slug]-4f46050ca065d8f8.js +1 -0
  108. sky/dashboard/out/_next/static/chunks/pages/users-2f7646eb77785a2c.js +1 -0
  109. sky/dashboard/out/_next/static/chunks/pages/volumes-ef19d49c6d0e8500.js +1 -0
  110. sky/dashboard/out/_next/static/chunks/pages/workspaces/{[name]-af76bb06dbb3954f.js → [name]-96e0f298308da7e2.js} +1 -1
  111. sky/dashboard/out/_next/static/chunks/pages/{workspaces-7598c33a746cdc91.js → workspaces-cb4da3abe08ebf19.js} +1 -1
  112. sky/dashboard/out/_next/static/chunks/webpack-fba3de387ff6bb08.js +1 -0
  113. sky/dashboard/out/_next/static/css/c5a4cfd2600fc715.css +3 -0
  114. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  115. sky/dashboard/out/clusters/[cluster].html +1 -1
  116. sky/dashboard/out/clusters.html +1 -1
  117. sky/dashboard/out/config.html +1 -1
  118. sky/dashboard/out/index.html +1 -1
  119. sky/dashboard/out/infra/[context].html +1 -1
  120. sky/dashboard/out/infra.html +1 -1
  121. sky/dashboard/out/jobs/[job].html +1 -1
  122. sky/dashboard/out/jobs/pools/[pool].html +1 -1
  123. sky/dashboard/out/jobs.html +1 -1
  124. sky/dashboard/out/plugins/[...slug].html +1 -0
  125. sky/dashboard/out/users.html +1 -1
  126. sky/dashboard/out/volumes.html +1 -1
  127. sky/dashboard/out/workspace/new.html +1 -1
  128. sky/dashboard/out/workspaces/[name].html +1 -1
  129. sky/dashboard/out/workspaces.html +1 -1
  130. sky/data/data_utils.py +92 -1
  131. sky/data/mounting_utils.py +177 -30
  132. sky/data/storage.py +200 -19
  133. sky/data/storage_utils.py +10 -45
  134. sky/exceptions.py +18 -7
  135. sky/execution.py +74 -31
  136. sky/global_user_state.py +605 -191
  137. sky/jobs/__init__.py +2 -0
  138. sky/jobs/client/sdk.py +101 -4
  139. sky/jobs/client/sdk_async.py +31 -5
  140. sky/jobs/constants.py +15 -8
  141. sky/jobs/controller.py +726 -284
  142. sky/jobs/file_content_utils.py +128 -0
  143. sky/jobs/log_gc.py +193 -0
  144. sky/jobs/recovery_strategy.py +250 -100
  145. sky/jobs/scheduler.py +271 -173
  146. sky/jobs/server/core.py +367 -114
  147. sky/jobs/server/server.py +81 -35
  148. sky/jobs/server/utils.py +89 -35
  149. sky/jobs/state.py +1498 -620
  150. sky/jobs/utils.py +771 -306
  151. sky/logs/agent.py +40 -5
  152. sky/logs/aws.py +9 -19
  153. sky/metrics/utils.py +282 -39
  154. sky/models.py +2 -0
  155. sky/optimizer.py +7 -6
  156. sky/provision/__init__.py +38 -1
  157. sky/provision/aws/config.py +34 -13
  158. sky/provision/aws/instance.py +5 -2
  159. sky/provision/azure/instance.py +5 -3
  160. sky/provision/common.py +22 -0
  161. sky/provision/cudo/instance.py +4 -3
  162. sky/provision/do/instance.py +4 -3
  163. sky/provision/docker_utils.py +112 -28
  164. sky/provision/fluidstack/instance.py +6 -5
  165. sky/provision/gcp/config.py +6 -1
  166. sky/provision/gcp/instance.py +4 -2
  167. sky/provision/hyperbolic/instance.py +4 -2
  168. sky/provision/instance_setup.py +66 -20
  169. sky/provision/kubernetes/__init__.py +2 -0
  170. sky/provision/kubernetes/config.py +7 -44
  171. sky/provision/kubernetes/constants.py +0 -1
  172. sky/provision/kubernetes/instance.py +609 -213
  173. sky/provision/kubernetes/manifests/fusermount-server-daemonset.yaml +1 -2
  174. sky/provision/kubernetes/network.py +12 -8
  175. sky/provision/kubernetes/network_utils.py +8 -25
  176. sky/provision/kubernetes/utils.py +422 -422
  177. sky/provision/kubernetes/volume.py +150 -18
  178. sky/provision/lambda_cloud/instance.py +16 -13
  179. sky/provision/nebius/instance.py +6 -2
  180. sky/provision/nebius/utils.py +103 -86
  181. sky/provision/oci/instance.py +4 -2
  182. sky/provision/paperspace/instance.py +4 -3
  183. sky/provision/primeintellect/__init__.py +10 -0
  184. sky/provision/primeintellect/config.py +11 -0
  185. sky/provision/primeintellect/instance.py +454 -0
  186. sky/provision/primeintellect/utils.py +398 -0
  187. sky/provision/provisioner.py +45 -15
  188. sky/provision/runpod/__init__.py +2 -0
  189. sky/provision/runpod/instance.py +4 -3
  190. sky/provision/runpod/volume.py +69 -13
  191. sky/provision/scp/instance.py +307 -130
  192. sky/provision/seeweb/__init__.py +11 -0
  193. sky/provision/seeweb/config.py +13 -0
  194. sky/provision/seeweb/instance.py +812 -0
  195. sky/provision/shadeform/__init__.py +11 -0
  196. sky/provision/shadeform/config.py +12 -0
  197. sky/provision/shadeform/instance.py +351 -0
  198. sky/provision/shadeform/shadeform_utils.py +83 -0
  199. sky/provision/slurm/__init__.py +12 -0
  200. sky/provision/slurm/config.py +13 -0
  201. sky/provision/slurm/instance.py +572 -0
  202. sky/provision/slurm/utils.py +583 -0
  203. sky/provision/vast/instance.py +9 -4
  204. sky/provision/vast/utils.py +10 -6
  205. sky/provision/volume.py +164 -0
  206. sky/provision/vsphere/common/ssl_helper.py +1 -1
  207. sky/provision/vsphere/common/vapiconnect.py +2 -1
  208. sky/provision/vsphere/common/vim_utils.py +3 -2
  209. sky/provision/vsphere/instance.py +8 -6
  210. sky/provision/vsphere/vsphere_utils.py +8 -1
  211. sky/resources.py +11 -3
  212. sky/schemas/api/responses.py +107 -6
  213. sky/schemas/db/global_user_state/008_skylet_ssh_tunnel_metadata.py +34 -0
  214. sky/schemas/db/global_user_state/009_last_activity_and_launched_at.py +89 -0
  215. sky/schemas/db/global_user_state/010_save_ssh_key.py +66 -0
  216. sky/schemas/db/global_user_state/011_is_ephemeral.py +34 -0
  217. sky/schemas/db/kv_cache/001_initial_schema.py +29 -0
  218. sky/schemas/db/serve_state/002_yaml_content.py +34 -0
  219. sky/schemas/db/skypilot_config/001_initial_schema.py +30 -0
  220. sky/schemas/db/spot_jobs/002_cluster_pool.py +3 -3
  221. sky/schemas/db/spot_jobs/004_job_file_contents.py +42 -0
  222. sky/schemas/db/spot_jobs/005_logs_gc.py +38 -0
  223. sky/schemas/db/spot_jobs/006_controller_pid_started_at.py +34 -0
  224. sky/schemas/db/spot_jobs/007_config_file_content.py +34 -0
  225. sky/schemas/generated/jobsv1_pb2.py +86 -0
  226. sky/schemas/generated/jobsv1_pb2.pyi +254 -0
  227. sky/schemas/generated/jobsv1_pb2_grpc.py +542 -0
  228. sky/schemas/generated/managed_jobsv1_pb2.py +76 -0
  229. sky/schemas/generated/managed_jobsv1_pb2.pyi +278 -0
  230. sky/schemas/generated/managed_jobsv1_pb2_grpc.py +278 -0
  231. sky/schemas/generated/servev1_pb2.py +58 -0
  232. sky/schemas/generated/servev1_pb2.pyi +115 -0
  233. sky/schemas/generated/servev1_pb2_grpc.py +322 -0
  234. sky/serve/autoscalers.py +2 -0
  235. sky/serve/client/impl.py +55 -21
  236. sky/serve/constants.py +4 -3
  237. sky/serve/controller.py +17 -11
  238. sky/serve/load_balancing_policies.py +1 -1
  239. sky/serve/replica_managers.py +219 -142
  240. sky/serve/serve_rpc_utils.py +179 -0
  241. sky/serve/serve_state.py +63 -54
  242. sky/serve/serve_utils.py +145 -109
  243. sky/serve/server/core.py +46 -25
  244. sky/serve/server/impl.py +311 -162
  245. sky/serve/server/server.py +21 -19
  246. sky/serve/service.py +84 -68
  247. sky/serve/service_spec.py +45 -7
  248. sky/server/auth/loopback.py +38 -0
  249. sky/server/auth/oauth2_proxy.py +12 -7
  250. sky/server/common.py +47 -24
  251. sky/server/config.py +62 -28
  252. sky/server/constants.py +9 -1
  253. sky/server/daemons.py +109 -38
  254. sky/server/metrics.py +76 -96
  255. sky/server/middleware_utils.py +166 -0
  256. sky/server/plugins.py +222 -0
  257. sky/server/requests/executor.py +384 -145
  258. sky/server/requests/payloads.py +83 -19
  259. sky/server/requests/preconditions.py +15 -13
  260. sky/server/requests/request_names.py +123 -0
  261. sky/server/requests/requests.py +511 -157
  262. sky/server/requests/serializers/decoders.py +48 -17
  263. sky/server/requests/serializers/encoders.py +102 -20
  264. sky/server/requests/serializers/return_value_serializers.py +60 -0
  265. sky/server/requests/threads.py +117 -0
  266. sky/server/rest.py +116 -24
  267. sky/server/server.py +497 -179
  268. sky/server/server_utils.py +30 -0
  269. sky/server/stream_utils.py +219 -45
  270. sky/server/uvicorn.py +30 -19
  271. sky/setup_files/MANIFEST.in +6 -1
  272. sky/setup_files/alembic.ini +8 -0
  273. sky/setup_files/dependencies.py +64 -19
  274. sky/setup_files/setup.py +44 -44
  275. sky/sky_logging.py +13 -5
  276. sky/skylet/attempt_skylet.py +116 -24
  277. sky/skylet/configs.py +3 -1
  278. sky/skylet/constants.py +139 -29
  279. sky/skylet/events.py +74 -14
  280. sky/skylet/executor/__init__.py +1 -0
  281. sky/skylet/executor/slurm.py +189 -0
  282. sky/skylet/job_lib.py +143 -105
  283. sky/skylet/log_lib.py +252 -8
  284. sky/skylet/log_lib.pyi +47 -7
  285. sky/skylet/providers/ibm/node_provider.py +12 -8
  286. sky/skylet/providers/ibm/vpc_provider.py +13 -12
  287. sky/skylet/runtime_utils.py +21 -0
  288. sky/skylet/services.py +524 -0
  289. sky/skylet/skylet.py +27 -2
  290. sky/skylet/subprocess_daemon.py +104 -28
  291. sky/skypilot_config.py +99 -79
  292. sky/ssh_node_pools/constants.py +12 -0
  293. sky/ssh_node_pools/core.py +40 -3
  294. sky/ssh_node_pools/deploy/__init__.py +4 -0
  295. sky/ssh_node_pools/deploy/deploy.py +952 -0
  296. sky/ssh_node_pools/deploy/tunnel_utils.py +199 -0
  297. sky/ssh_node_pools/deploy/utils.py +173 -0
  298. sky/ssh_node_pools/server.py +20 -21
  299. sky/{utils/kubernetes/ssh_utils.py → ssh_node_pools/utils.py} +9 -6
  300. sky/task.py +221 -104
  301. sky/templates/aws-ray.yml.j2 +1 -0
  302. sky/templates/azure-ray.yml.j2 +1 -0
  303. sky/templates/cudo-ray.yml.j2 +1 -0
  304. sky/templates/do-ray.yml.j2 +1 -0
  305. sky/templates/fluidstack-ray.yml.j2 +1 -0
  306. sky/templates/gcp-ray.yml.j2 +1 -0
  307. sky/templates/hyperbolic-ray.yml.j2 +1 -0
  308. sky/templates/ibm-ray.yml.j2 +2 -1
  309. sky/templates/jobs-controller.yaml.j2 +3 -0
  310. sky/templates/kubernetes-ray.yml.j2 +204 -55
  311. sky/templates/lambda-ray.yml.j2 +1 -0
  312. sky/templates/nebius-ray.yml.j2 +3 -0
  313. sky/templates/oci-ray.yml.j2 +1 -0
  314. sky/templates/paperspace-ray.yml.j2 +1 -0
  315. sky/templates/primeintellect-ray.yml.j2 +72 -0
  316. sky/templates/runpod-ray.yml.j2 +1 -0
  317. sky/templates/scp-ray.yml.j2 +1 -0
  318. sky/templates/seeweb-ray.yml.j2 +171 -0
  319. sky/templates/shadeform-ray.yml.j2 +73 -0
  320. sky/templates/slurm-ray.yml.j2 +85 -0
  321. sky/templates/vast-ray.yml.j2 +2 -0
  322. sky/templates/vsphere-ray.yml.j2 +1 -0
  323. sky/templates/websocket_proxy.py +188 -43
  324. sky/usage/usage_lib.py +16 -4
  325. sky/users/model.conf +1 -1
  326. sky/users/permission.py +84 -44
  327. sky/users/rbac.py +31 -3
  328. sky/utils/accelerator_registry.py +6 -3
  329. sky/utils/admin_policy_utils.py +18 -5
  330. sky/utils/annotations.py +128 -6
  331. sky/utils/asyncio_utils.py +78 -0
  332. sky/utils/atomic.py +1 -1
  333. sky/utils/auth_utils.py +153 -0
  334. sky/utils/cli_utils/status_utils.py +12 -7
  335. sky/utils/cluster_utils.py +28 -6
  336. sky/utils/command_runner.py +283 -30
  337. sky/utils/command_runner.pyi +63 -7
  338. sky/utils/common.py +3 -1
  339. sky/utils/common_utils.py +55 -7
  340. sky/utils/config_utils.py +1 -14
  341. sky/utils/context.py +127 -40
  342. sky/utils/context_utils.py +73 -18
  343. sky/utils/controller_utils.py +229 -70
  344. sky/utils/db/db_utils.py +95 -18
  345. sky/utils/db/kv_cache.py +149 -0
  346. sky/utils/db/migration_utils.py +24 -7
  347. sky/utils/env_options.py +4 -0
  348. sky/utils/git.py +559 -1
  349. sky/utils/kubernetes/create_cluster.sh +15 -30
  350. sky/utils/kubernetes/delete_cluster.sh +10 -7
  351. sky/utils/kubernetes/generate_kind_config.py +6 -66
  352. sky/utils/kubernetes/gpu_labeler.py +13 -3
  353. sky/utils/kubernetes/k8s_gpu_labeler_job.yaml +2 -1
  354. sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml +16 -16
  355. sky/utils/kubernetes/kubernetes_deploy_utils.py +187 -260
  356. sky/utils/kubernetes/rsync_helper.sh +11 -3
  357. sky/utils/kubernetes/ssh-tunnel.sh +7 -376
  358. sky/utils/kubernetes_enums.py +7 -15
  359. sky/utils/lock_events.py +4 -4
  360. sky/utils/locks.py +128 -31
  361. sky/utils/log_utils.py +0 -319
  362. sky/utils/resource_checker.py +13 -10
  363. sky/utils/resources_utils.py +53 -29
  364. sky/utils/rich_utils.py +8 -4
  365. sky/utils/schemas.py +138 -52
  366. sky/utils/subprocess_utils.py +17 -4
  367. sky/utils/thread_utils.py +91 -0
  368. sky/utils/timeline.py +2 -1
  369. sky/utils/ux_utils.py +35 -1
  370. sky/utils/volume.py +88 -4
  371. sky/utils/yaml_utils.py +9 -0
  372. sky/volumes/client/sdk.py +48 -10
  373. sky/volumes/server/core.py +59 -22
  374. sky/volumes/server/server.py +46 -17
  375. sky/volumes/volume.py +54 -42
  376. sky/workspaces/core.py +57 -21
  377. sky/workspaces/server.py +13 -12
  378. sky_templates/README.md +3 -0
  379. sky_templates/__init__.py +3 -0
  380. sky_templates/ray/__init__.py +0 -0
  381. sky_templates/ray/start_cluster +183 -0
  382. sky_templates/ray/stop_cluster +75 -0
  383. {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251210.dist-info}/METADATA +343 -65
  384. skypilot_nightly-1.0.0.dev20251210.dist-info/RECORD +629 -0
  385. skypilot_nightly-1.0.0.dev20251210.dist-info/top_level.txt +2 -0
  386. sky/client/cli/git.py +0 -549
  387. sky/dashboard/out/_next/static/chunks/1121-408ed10b2f9fce17.js +0 -1
  388. sky/dashboard/out/_next/static/chunks/1141-943efc7aff0f0c06.js +0 -1
  389. sky/dashboard/out/_next/static/chunks/1836-37fede578e2da5f8.js +0 -40
  390. sky/dashboard/out/_next/static/chunks/3015-86cabed5d4669ad0.js +0 -1
  391. sky/dashboard/out/_next/static/chunks/3294.c80326aec9bfed40.js +0 -6
  392. sky/dashboard/out/_next/static/chunks/3785.4872a2f3aa489880.js +0 -1
  393. sky/dashboard/out/_next/static/chunks/3850-ff4a9a69d978632b.js +0 -1
  394. sky/dashboard/out/_next/static/chunks/4045.b30465273dc5e468.js +0 -21
  395. sky/dashboard/out/_next/static/chunks/4676-9da7fdbde90b5549.js +0 -10
  396. sky/dashboard/out/_next/static/chunks/4725.10f7a9a5d3ea8208.js +0 -1
  397. sky/dashboard/out/_next/static/chunks/5339.3fda4a4010ff4e06.js +0 -51
  398. sky/dashboard/out/_next/static/chunks/6135-4b4d5e824b7f9d3c.js +0 -1
  399. sky/dashboard/out/_next/static/chunks/649.b9d7f7d10c1b8c53.js +0 -45
  400. sky/dashboard/out/_next/static/chunks/6856-dca7962af4814e1b.js +0 -1
  401. sky/dashboard/out/_next/static/chunks/6990-08b2a1cae076a943.js +0 -1
  402. sky/dashboard/out/_next/static/chunks/7325.b4bc99ce0892dcd5.js +0 -6
  403. sky/dashboard/out/_next/static/chunks/754-d0da8ab45f9509e9.js +0 -18
  404. sky/dashboard/out/_next/static/chunks/7669.1f5d9a402bf5cc42.js +0 -36
  405. sky/dashboard/out/_next/static/chunks/8969-0be3036bf86f8256.js +0 -1
  406. sky/dashboard/out/_next/static/chunks/9025.c12318fb6a1a9093.js +0 -6
  407. sky/dashboard/out/_next/static/chunks/9037-fa1737818d0a0969.js +0 -6
  408. sky/dashboard/out/_next/static/chunks/pages/_app-ce361c6959bc2001.js +0 -34
  409. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-1cbba24bd1bd35f8.js +0 -16
  410. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-0b4b35dc1dfe046c.js +0 -16
  411. sky/dashboard/out/_next/static/chunks/pages/clusters-469814d711d63b1b.js +0 -1
  412. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-dd64309c3fe67ed2.js +0 -11
  413. sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-07349868f7905d37.js +0 -16
  414. sky/dashboard/out/_next/static/chunks/pages/jobs-1f70d9faa564804f.js +0 -1
  415. sky/dashboard/out/_next/static/chunks/pages/users-018bf31cda52e11b.js +0 -1
  416. sky/dashboard/out/_next/static/chunks/pages/volumes-739726d6b823f532.js +0 -1
  417. sky/dashboard/out/_next/static/chunks/webpack-4fe903277b57b523.js +0 -1
  418. sky/dashboard/out/_next/static/css/4614e06482d7309e.css +0 -3
  419. sky/dashboard/out/_next/static/mS-4qZPSkRuA1u-g2wQhg/_buildManifest.js +0 -1
  420. sky/templates/kubernetes-ssh-jump.yml.j2 +0 -94
  421. sky/utils/kubernetes/cleanup-tunnel.sh +0 -62
  422. sky/utils/kubernetes/deploy_remote_cluster.py +0 -1299
  423. sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +0 -191
  424. skypilot_nightly-1.0.0.dev20250905.dist-info/RECORD +0 -547
  425. skypilot_nightly-1.0.0.dev20250905.dist-info/top_level.txt +0 -1
  426. /sky/dashboard/out/_next/static/{mS-4qZPSkRuA1u-g2wQhg → KYAhEFa3FTfq4JyKVgo-s}/_ssgManifest.js +0 -0
  427. {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251210.dist-info}/WHEEL +0 -0
  428. {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251210.dist-info}/entry_points.txt +0 -0
  429. {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251210.dist-info}/licenses/LICENSE +0 -0
sky/data/storage.py CHANGED
@@ -23,6 +23,7 @@ from sky import skypilot_config
23
23
  from sky.adaptors import aws
24
24
  from sky.adaptors import azure
25
25
  from sky.adaptors import cloudflare
26
+ from sky.adaptors import coreweave
26
27
  from sky.adaptors import gcp
27
28
  from sky.adaptors import ibm
28
29
  from sky.adaptors import nebius
@@ -62,6 +63,7 @@ STORE_ENABLED_CLOUDS: List[str] = [
62
63
  str(clouds.OCI()),
63
64
  str(clouds.Nebius()),
64
65
  cloudflare.NAME,
66
+ coreweave.NAME,
65
67
  ]
66
68
 
67
69
  # Maximum number of concurrent rsync upload processes
@@ -93,6 +95,12 @@ def get_cached_enabled_storage_cloud_names_or_refresh(
93
95
  r2_is_enabled, _ = cloudflare.check_storage_credentials()
94
96
  if r2_is_enabled:
95
97
  enabled_clouds.append(cloudflare.NAME)
98
+
99
+ # Similarly, handle CoreWeave storage credentials
100
+ coreweave_is_enabled, _ = coreweave.check_storage_credentials()
101
+ if coreweave_is_enabled:
102
+ enabled_clouds.append(coreweave.NAME)
103
+
96
104
  if raise_if_no_cloud_access and not enabled_clouds:
97
105
  raise exceptions.NoCloudAccessError(
98
106
  'No cloud access available for storage. '
@@ -107,10 +115,10 @@ def _is_storage_cloud_enabled(cloud_name: str,
107
115
  if cloud_name in enabled_storage_cloud_names:
108
116
  return True
109
117
  if try_fix_with_sky_check:
110
- # TODO(zhwu): Only check the specified cloud to speed up.
111
118
  sky_check.check_capability(
112
119
  sky_cloud.CloudCapability.STORAGE,
113
120
  quiet=True,
121
+ clouds=[cloud_name],
114
122
  workspace=skypilot_config.get_active_workspace())
115
123
  return _is_storage_cloud_enabled(cloud_name,
116
124
  try_fix_with_sky_check=False)
@@ -126,6 +134,7 @@ class StoreType(enum.Enum):
126
134
  IBM = 'IBM'
127
135
  OCI = 'OCI'
128
136
  NEBIUS = 'NEBIUS'
137
+ COREWEAVE = 'COREWEAVE'
129
138
  VOLUME = 'VOLUME'
130
139
 
131
140
  @classmethod
@@ -746,6 +755,11 @@ class Storage(object):
746
755
  previous_store_type = store_type
747
756
  else:
748
757
  new_store_type = store_type
758
+ if previous_store_type is None or new_store_type is None:
759
+ # This should not happen if the condition above is true,
760
+ # but add check for type safety
761
+ raise exceptions.StorageBucketCreateError(
762
+ f'Bucket {self.name} has inconsistent store types.')
749
763
  with ux_utils.print_exception_no_traceback():
750
764
  raise exceptions.StorageBucketCreateError(
751
765
  f'Bucket {self.name} was previously created for '
@@ -776,8 +790,8 @@ class Storage(object):
776
790
  source=self.source,
777
791
  mode=self.mode)
778
792
 
779
- for store in input_stores:
780
- self.add_store(store)
793
+ for store_type in input_stores:
794
+ self.add_store(store_type)
781
795
 
782
796
  if self.source is not None:
783
797
  # If source is a pre-existing bucket, connect to the bucket
@@ -792,10 +806,11 @@ class Storage(object):
792
806
  elif self.source.startswith('oci://'):
793
807
  self.add_store(StoreType.OCI)
794
808
 
795
- store_type = StoreType.find_s3_compatible_config_by_prefix(
796
- self.source)
797
- if store_type:
798
- self.add_store(store_type)
809
+ s3_compatible_store_type: Optional[StoreType] = (
810
+ StoreType.find_s3_compatible_config_by_prefix(
811
+ self.source))
812
+ if s3_compatible_store_type:
813
+ self.add_store(s3_compatible_store_type)
799
814
 
800
815
  def get_bucket_sub_path_prefix(self, blob_path: str) -> str:
801
816
  """Adds the bucket sub path prefix to the blob path."""
@@ -883,7 +898,7 @@ class Storage(object):
883
898
  f'{source} in the file_mounts section of your YAML')
884
899
  is_local_source = True
885
900
  elif split_path.scheme in [
886
- 's3', 'gs', 'https', 'r2', 'cos', 'oci', 'nebius'
901
+ 's3', 'gs', 'https', 'r2', 'cos', 'oci', 'nebius', 'cw'
887
902
  ]:
888
903
  is_local_source = False
889
904
  # Storage mounting does not support mounting specific files from
@@ -908,7 +923,8 @@ class Storage(object):
908
923
  with ux_utils.print_exception_no_traceback():
909
924
  raise exceptions.StorageSourceError(
910
925
  f'Supported paths: local, s3://, gs://, https://, '
911
- f'r2://, cos://, oci://, nebius://. Got: {source}')
926
+ f'r2://, cos://, oci://, nebius://, cw://. '
927
+ f'Got: {source}')
912
928
  return source, is_local_source
913
929
 
914
930
  def _validate_storage_spec(self, name: Optional[str]) -> None:
@@ -923,7 +939,16 @@ class Storage(object):
923
939
  """
924
940
  prefix = name.split('://')[0]
925
941
  prefix = prefix.lower()
926
- if prefix in ['s3', 'gs', 'https', 'r2', 'cos', 'oci', 'nebius']:
942
+ if prefix in [
943
+ 's3',
944
+ 'gs',
945
+ 'https',
946
+ 'r2',
947
+ 'cos',
948
+ 'oci',
949
+ 'nebius',
950
+ 'cw',
951
+ ]:
927
952
  with ux_utils.print_exception_no_traceback():
928
953
  raise exceptions.StorageNameError(
929
954
  'Prefix detected: `name` cannot start with '
@@ -1062,6 +1087,12 @@ class Storage(object):
1062
1087
  source=self.source,
1063
1088
  sync_on_reconstruction=self.sync_on_reconstruction,
1064
1089
  _bucket_sub_path=self._bucket_sub_path)
1090
+ elif s_type == StoreType.COREWEAVE:
1091
+ store = CoreWeaveStore.from_metadata(
1092
+ s_metadata,
1093
+ source=self.source,
1094
+ sync_on_reconstruction=self.sync_on_reconstruction,
1095
+ _bucket_sub_path=self._bucket_sub_path)
1065
1096
  else:
1066
1097
  with ux_utils.print_exception_no_traceback():
1067
1098
  raise ValueError(f'Unknown store type: {s_type}')
@@ -1301,6 +1332,17 @@ class Storage(object):
1301
1332
  if store.is_sky_managed:
1302
1333
  global_user_state.set_storage_status(self.name, StorageStatus.READY)
1303
1334
 
1335
+ @classmethod
1336
+ def from_handle(cls, handle: StorageHandle) -> 'Storage':
1337
+ """Create Storage from StorageHandle object.
1338
+ """
1339
+ obj = cls(name=handle.storage_name,
1340
+ source=handle.source,
1341
+ sync_on_reconstruction=False)
1342
+ obj.handle = handle
1343
+ obj._add_store_from_metadata(handle.sky_stores)
1344
+ return obj
1345
+
1304
1346
  @classmethod
1305
1347
  def from_yaml_config(cls, config: Dict[str, Any]) -> 'Storage':
1306
1348
  common_utils.validate_schema(config, schemas.get_storage_schema(),
@@ -1406,6 +1448,7 @@ class S3CompatibleConfig:
1406
1448
  aws_profile: Optional[str] = None
1407
1449
  get_endpoint_url: Optional[Callable[[], str]] = None
1408
1450
  credentials_file: Optional[str] = None
1451
+ config_file: Optional[str] = None
1409
1452
  extra_cli_args: Optional[List[str]] = None
1410
1453
 
1411
1454
  # Provider-specific settings
@@ -1426,8 +1469,8 @@ class S3CompatibleStore(AbstractStore):
1426
1469
  """Base class for S3-compatible object storage providers.
1427
1470
 
1428
1471
  This class provides a unified interface for all S3-compatible storage
1429
- providers (AWS S3, Cloudflare R2, Nebius, MinIO, etc.) by leveraging
1430
- a configuration-driven approach that eliminates code duplication.
1472
+ providers (AWS S3, Cloudflare R2, Nebius, MinIO, CoreWeave, etc.) by
1473
+ leveraging a configuration-driven approach that eliminates code duplication
1431
1474
 
1432
1475
  ## Adding a New S3-Compatible Store
1433
1476
 
@@ -1853,6 +1896,9 @@ class S3CompatibleStore(AbstractStore):
1853
1896
  if self.config.credentials_file:
1854
1897
  cmd = 'AWS_SHARED_CREDENTIALS_FILE=' + \
1855
1898
  f'{self.config.credentials_file} {cmd}'
1899
+ if self.config.config_file:
1900
+ cmd = 'AWS_CONFIG_FILE=' + \
1901
+ f'{self.config.config_file} {cmd}'
1856
1902
 
1857
1903
  return cmd
1858
1904
 
@@ -1898,6 +1944,9 @@ class S3CompatibleStore(AbstractStore):
1898
1944
  if self.config.credentials_file:
1899
1945
  cmd = 'AWS_SHARED_CREDENTIALS_FILE=' + \
1900
1946
  f'{self.config.credentials_file} {cmd}'
1947
+ if self.config.config_file:
1948
+ cmd = 'AWS_CONFIG_FILE=' + \
1949
+ f'{self.config.config_file} {cmd}'
1901
1950
 
1902
1951
  return cmd
1903
1952
 
@@ -1951,6 +2000,9 @@ class S3CompatibleStore(AbstractStore):
1951
2000
  if self.config.credentials_file:
1952
2001
  command = (f'AWS_SHARED_CREDENTIALS_FILE='
1953
2002
  f'{self.config.credentials_file} {command}')
2003
+ if self.config.config_file:
2004
+ command = 'AWS_CONFIG_FILE=' + \
2005
+ f'{self.config.config_file} {command}'
1954
2006
  with ux_utils.print_exception_no_traceback():
1955
2007
  raise exceptions.StorageBucketGetError(
1956
2008
  _BUCKET_FAIL_TO_CONNECT_MESSAGE.format(name=self.name) +
@@ -2004,7 +2056,7 @@ class S3CompatibleStore(AbstractStore):
2004
2056
  except aws.botocore_exceptions().ClientError as e:
2005
2057
  with ux_utils.print_exception_no_traceback():
2006
2058
  raise exceptions.StorageBucketCreateError(
2007
- f'Attempted to create a bucket {self.name} but failed.'
2059
+ f'Attempted to create S3 bucket {self.name} but failed.'
2008
2060
  ) from e
2009
2061
  return self.config.resource_factory(bucket_name)
2010
2062
 
@@ -2023,7 +2075,9 @@ class S3CompatibleStore(AbstractStore):
2023
2075
  remove_command = (f'AWS_SHARED_CREDENTIALS_FILE='
2024
2076
  f'{self.config.credentials_file} '
2025
2077
  f'{remove_command}')
2026
-
2078
+ if self.config.config_file:
2079
+ remove_command = 'AWS_CONFIG_FILE=' + \
2080
+ f'{self.config.config_file} {remove_command}'
2027
2081
  return self._execute_remove_command(
2028
2082
  remove_command, bucket_name,
2029
2083
  f'Deleting {self.config.store_type} bucket {bucket_name}',
@@ -2036,8 +2090,9 @@ class S3CompatibleStore(AbstractStore):
2036
2090
  try:
2037
2091
  with rich_utils.safe_status(
2038
2092
  ux_utils.spinner_message(hint_operating)):
2039
- subprocess.check_output(command.split(' '),
2040
- stderr=subprocess.STDOUT)
2093
+ subprocess.check_output(command,
2094
+ stderr=subprocess.STDOUT,
2095
+ shell=True)
2041
2096
  except subprocess.CalledProcessError as e:
2042
2097
  if 'NoSuchBucket' in e.output.decode('utf-8'):
2043
2098
  logger.debug(
@@ -2080,7 +2135,9 @@ class S3CompatibleStore(AbstractStore):
2080
2135
  remove_command = (f'AWS_SHARED_CREDENTIALS_FILE='
2081
2136
  f'{self.config.credentials_file} '
2082
2137
  f'{remove_command}')
2083
-
2138
+ if self.config.config_file:
2139
+ remove_command = 'AWS_CONFIG_FILE=' + \
2140
+ f'{self.config.config_file} {remove_command}'
2084
2141
  return self._execute_remove_command(
2085
2142
  remove_command, bucket_name,
2086
2143
  (f'Removing objects from {self.config.store_type} bucket '
@@ -2157,6 +2214,10 @@ class GcsStore(AbstractStore):
2157
2214
  elif self.source.startswith('oci://'):
2158
2215
  raise NotImplementedError(
2159
2216
  'Moving data from OCI to GCS is currently not supported.')
2217
+ elif self.source.startswith('cw://'):
2218
+ raise NotImplementedError(
2219
+ 'Moving data from CoreWeave Object Storage to GCS is'
2220
+ ' currently not supported.')
2160
2221
  # Validate name
2161
2222
  self.name = self.validate_name(self.name)
2162
2223
  # Check if the storage is enabled
@@ -2543,7 +2604,7 @@ class GcsStore(AbstractStore):
2543
2604
  except Exception as e: # pylint: disable=broad-except
2544
2605
  with ux_utils.print_exception_no_traceback():
2545
2606
  raise exceptions.StorageBucketCreateError(
2546
- f'Attempted to create a bucket {self.name} but failed.'
2607
+ f'Attempted to create GCS bucket {self.name} but failed.'
2547
2608
  ) from e
2548
2609
  logger.info(
2549
2610
  f' {colorama.Style.DIM}Created GCS bucket {new_bucket.name!r} in '
@@ -2700,7 +2761,11 @@ class AzureBlobStore(AbstractStore):
2700
2761
  name=override_args.get('name', metadata.name),
2701
2762
  storage_account_name=override_args.get(
2702
2763
  'storage_account', metadata.storage_account_name),
2703
- source=override_args.get('source', metadata.source),
2764
+ # TODO(cooperc): fix the types for mypy 1.16
2765
+ # Azure store expects a string path; metadata.source may be a Path
2766
+ # or List[Path].
2767
+ source=override_args.get('source',
2768
+ metadata.source), # type: ignore[arg-type]
2704
2769
  region=override_args.get('region', metadata.region),
2705
2770
  is_sky_managed=override_args.get('is_sky_managed',
2706
2771
  metadata.is_sky_managed),
@@ -2768,6 +2833,10 @@ class AzureBlobStore(AbstractStore):
2768
2833
  elif self.source.startswith('oci://'):
2769
2834
  raise NotImplementedError(
2770
2835
  'Moving data from OCI to AZureBlob is not supported.')
2836
+ elif self.source.startswith('cw://'):
2837
+ raise NotImplementedError(
2838
+ 'Moving data from CoreWeave Object Storage to AzureBlob is'
2839
+ ' currently not supported.')
2771
2840
  # Validate name
2772
2841
  self.name = self.validate_name(self.name)
2773
2842
 
@@ -3139,6 +3208,8 @@ class AzureBlobStore(AbstractStore):
3139
3208
  raise NotImplementedError(error_message.format('OCI'))
3140
3209
  elif self.source.startswith('nebius://'):
3141
3210
  raise NotImplementedError(error_message.format('NEBIUS'))
3211
+ elif self.source.startswith('cw://'):
3212
+ raise NotImplementedError(error_message.format('CoreWeave'))
3142
3213
  else:
3143
3214
  self.batch_az_blob_sync([self.source])
3144
3215
  except exceptions.StorageUploadError:
@@ -3557,6 +3628,10 @@ class IBMCosStore(AbstractStore):
3557
3628
  assert self.name == data_utils.split_cos_path(self.source)[0], (
3558
3629
  'COS Bucket is specified as path, the name should be '
3559
3630
  'the same as COS bucket.')
3631
+ elif self.source.startswith('cw://'):
3632
+ raise NotImplementedError(
3633
+ 'Moving data from CoreWeave Object Storage to COS is '
3634
+ 'currently not supported.')
3560
3635
  # Validate name
3561
3636
  self.name = IBMCosStore.validate_name(self.name)
3562
3637
 
@@ -3617,6 +3692,9 @@ class IBMCosStore(AbstractStore):
3617
3692
  StorageBucketGetError: If fetching existing bucket fails
3618
3693
  StorageInitError: If general initialization fails.
3619
3694
  """
3695
+ if self.region is None:
3696
+ raise exceptions.StorageInitError(
3697
+ 'Region must be specified for IBM COS store.')
3620
3698
  self.client = ibm.get_cos_client(self.region)
3621
3699
  self.s3_resource = ibm.get_cos_resource(self.region)
3622
3700
  self.bucket, is_new_bucket = self._get_bucket()
@@ -3655,6 +3733,9 @@ class IBMCosStore(AbstractStore):
3655
3733
  elif self.source.startswith('r2://'):
3656
3734
  raise Exception('IBM COS currently not supporting'
3657
3735
  'data transfers between COS and r2')
3736
+ elif self.source.startswith('cw://'):
3737
+ raise Exception('IBM COS currently not supporting'
3738
+ 'data transfers between COS and CoreWeave')
3658
3739
  else:
3659
3740
  self.batch_ibm_rsync([self.source])
3660
3741
 
@@ -4580,3 +4661,103 @@ class NebiusStore(S3CompatibleStore):
4580
4661
  rclone_config, rclone_profile_name, self.bucket.name, mount_path)
4581
4662
  return mounting_utils.get_mounting_command(mount_path, install_cmd,
4582
4663
  mount_cached_cmd)
4664
+
4665
+
4666
+ @register_s3_compatible_store
4667
+ class CoreWeaveStore(S3CompatibleStore):
4668
+ """CoreWeaveStore inherits from S3CompatibleStore and represents the backend
4669
+ for CoreWeave Object Storage buckets.
4670
+ """
4671
+
4672
+ @classmethod
4673
+ def get_config(cls) -> S3CompatibleConfig:
4674
+ """Return the configuration for CoreWeave Object Storage."""
4675
+ return S3CompatibleConfig(
4676
+ store_type='COREWEAVE',
4677
+ url_prefix='cw://',
4678
+ client_factory=lambda region: data_utils.create_coreweave_client(),
4679
+ resource_factory=lambda name: coreweave.resource('s3').Bucket(name),
4680
+ split_path=data_utils.split_coreweave_path,
4681
+ verify_bucket=data_utils.verify_coreweave_bucket,
4682
+ aws_profile=coreweave.COREWEAVE_PROFILE_NAME,
4683
+ get_endpoint_url=coreweave.get_endpoint,
4684
+ credentials_file=coreweave.COREWEAVE_CREDENTIALS_PATH,
4685
+ config_file=coreweave.COREWEAVE_CONFIG_PATH,
4686
+ cloud_name=coreweave.NAME,
4687
+ default_region=coreweave.DEFAULT_REGION,
4688
+ mount_cmd_factory=cls._get_coreweave_mount_cmd,
4689
+ )
4690
+
4691
+ def _get_bucket(self) -> Tuple[StorageHandle, bool]:
4692
+ """Get or create bucket using CoreWeave's S3 API"""
4693
+ bucket = self.config.resource_factory(self.name)
4694
+
4695
+ # Use our custom bucket verification instead of head_bucket
4696
+ if data_utils.verify_coreweave_bucket(self.name):
4697
+ self._validate_existing_bucket()
4698
+ return bucket, False
4699
+
4700
+ # TODO(hailong): Enable the bucket creation for CoreWeave
4701
+ # Disable this to avoid waiting too long until the following
4702
+ # issue is resolved:
4703
+ # https://github.com/skypilot-org/skypilot/issues/7736
4704
+ raise exceptions.StorageBucketGetError(
4705
+ f'Bucket {self.name!r} does not exist. CoreWeave buckets can take'
4706
+ ' a long time to become accessible after creation, so SkyPilot'
4707
+ ' does not create them automatically. Please create the bucket'
4708
+ ' manually in CoreWeave and wait for it to be accessible before'
4709
+ ' using it.')
4710
+
4711
+ # # Check if this is a source with URL prefix (existing bucket case)
4712
+ # if isinstance(self.source, str) and self.source.startswith(
4713
+ # self.config.url_prefix):
4714
+ # with ux_utils.print_exception_no_traceback():
4715
+ # raise exceptions.StorageBucketGetError(
4716
+ # 'Attempted to use a non-existent bucket as a source: '
4717
+ # f'{self.source}.')
4718
+
4719
+ # # If bucket cannot be found, create it if needed
4720
+ # if self.sync_on_reconstruction:
4721
+ # bucket = self._create_bucket(self.name)
4722
+ # return bucket, True
4723
+ # else:
4724
+ # raise exceptions.StorageExternalDeletionError(
4725
+ # 'Attempted to fetch a non-existent bucket: '
4726
+ # f'{self.name}')
4727
+
4728
+ @classmethod
4729
+ def _get_coreweave_mount_cmd(cls, bucket_name: str, mount_path: str,
4730
+ bucket_sub_path: Optional[str]) -> str:
4731
+ """Factory method for CoreWeave mount command."""
4732
+ endpoint_url = coreweave.get_endpoint()
4733
+ return mounting_utils.get_coreweave_mount_cmd(
4734
+ coreweave.COREWEAVE_CREDENTIALS_PATH,
4735
+ coreweave.COREWEAVE_PROFILE_NAME, bucket_name, endpoint_url,
4736
+ mount_path, bucket_sub_path)
4737
+
4738
+ def mount_cached_command(self, mount_path: str) -> str:
4739
+ """CoreWeave-specific cached mount implementation using rclone."""
4740
+ install_cmd = mounting_utils.get_rclone_install_cmd()
4741
+ rclone_profile_name = (
4742
+ data_utils.Rclone.RcloneStores.COREWEAVE.get_profile_name(
4743
+ self.name))
4744
+ rclone_config = data_utils.Rclone.RcloneStores.COREWEAVE.get_config(
4745
+ rclone_profile_name=rclone_profile_name)
4746
+ mount_cached_cmd = mounting_utils.get_mount_cached_cmd(
4747
+ rclone_config, rclone_profile_name, self.bucket.name, mount_path)
4748
+ return mounting_utils.get_mounting_command(mount_path, install_cmd,
4749
+ mount_cached_cmd)
4750
+
4751
+ def _create_bucket(self, bucket_name: str) -> StorageHandle:
4752
+ """Create bucket using S3 API with timing handling for CoreWeave."""
4753
+ result = super()._create_bucket(bucket_name)
4754
+ # Ensure bucket is created
4755
+ # The newly created bucket ever takes about 18min to be accessible,
4756
+ # here we just retry for 36 times (5s * 36 = 180s) to avoid waiting
4757
+ # too long
4758
+ # TODO(hailong): Update the logic here when the following
4759
+ # issue is resolved:
4760
+ # https://github.com/skypilot-org/skypilot/issues/7736
4761
+ data_utils.verify_coreweave_bucket(bucket_name, retry=36)
4762
+
4763
+ return result
sky/data/storage_utils.py CHANGED
@@ -5,7 +5,7 @@ import pathlib
5
5
  import shlex
6
6
  import stat
7
7
  import subprocess
8
- from typing import Any, Dict, List, Optional, Set, TextIO, Union
8
+ from typing import List, Optional, Set, TextIO, Union
9
9
  import warnings
10
10
  import zipfile
11
11
 
@@ -15,7 +15,6 @@ from sky import exceptions
15
15
  from sky import sky_logging
16
16
  from sky.skylet import constants
17
17
  from sky.utils import common_utils
18
- from sky.utils import log_utils
19
18
 
20
19
  logger = sky_logging.init_logger(__name__)
21
20
 
@@ -23,49 +22,6 @@ _USE_SKYIGNORE_HINT = (
23
22
  'To avoid using .gitignore, you can create a .skyignore file instead.')
24
23
 
25
24
 
26
- def format_storage_table(storages: List[Dict[str, Any]],
27
- show_all: bool = False) -> str:
28
- """Format the storage table for display.
29
-
30
- Args:
31
- storage_table (dict): The storage table.
32
-
33
- Returns:
34
- str: The formatted storage table.
35
- """
36
- storage_table = log_utils.create_table([
37
- 'NAME',
38
- 'UPDATED',
39
- 'STORE',
40
- 'COMMAND',
41
- 'STATUS',
42
- ])
43
-
44
- for row in storages:
45
- launched_at = row['launched_at']
46
- if show_all:
47
- command = row['last_use']
48
- else:
49
- command = common_utils.truncate_long_string(
50
- row['last_use'], constants.LAST_USE_TRUNC_LENGTH)
51
- storage_table.add_row([
52
- # NAME
53
- row['name'],
54
- # LAUNCHED
55
- log_utils.readable_time_duration(launched_at),
56
- # CLOUDS
57
- ', '.join([s.value for s in row['store']]),
58
- # COMMAND,
59
- command,
60
- # STATUS
61
- row['status'].value,
62
- ])
63
- if storages:
64
- return str(storage_table)
65
- else:
66
- return 'No existing storage.'
67
-
68
-
69
25
  def get_excluded_files_from_skyignore(src_dir_path: str) -> List[str]:
70
26
  """List files and patterns ignored by the .skyignore file
71
27
  in the given source directory.
@@ -295,6 +251,15 @@ def zip_files_and_folders(items: List[str],
295
251
  archive_name = _get_archive_name(item, item)
296
252
  zipf.write(item, archive_name)
297
253
  elif os.path.isdir(item):
254
+ # Include root dir
255
+ archive_name = _get_archive_name(item, item)
256
+ # If it's a symlink, store it as a symlink
257
+ if os.path.islink(item):
258
+ _store_symlink(zipf, item, archive_name, is_dir=True)
259
+ else:
260
+ zipf.write(item, archive_name)
261
+
262
+ # Include dir contents recursively
298
263
  excluded_files = set([
299
264
  os.path.join(item, f.rstrip('/'))
300
265
  for f in get_excluded_files(item)
sky/exceptions.py CHANGED
@@ -208,12 +208,6 @@ class InconsistentHighAvailabilityError(Exception):
208
208
  pass
209
209
 
210
210
 
211
- class InconsistentConsolidationModeError(Exception):
212
- """Raised when the consolidation mode property in the user config
213
- is inconsistent with the actual cluster."""
214
- pass
215
-
216
-
217
211
  class ProvisionPrechecksError(Exception):
218
212
  """Raised when a managed job fails prechecks before provision.
219
213
 
@@ -649,7 +643,14 @@ class VolumeTopologyConflictError(Exception):
649
643
 
650
644
  class ServerTemporarilyUnavailableError(Exception):
651
645
  """Raised when the server is temporarily unavailable."""
652
- pass
646
+
647
+ def __init__(self, message: str):
648
+ super().__init__(message)
649
+ self.message = message
650
+
651
+ def __str__(self):
652
+ return ('SkyPilot API server is temporarily unavailable: '
653
+ f'{self.message}. Please try again later.')
653
654
 
654
655
 
655
656
  class RestfulPolicyError(Exception):
@@ -675,9 +676,19 @@ class SkyletInternalError(Exception):
675
676
  pass
676
677
 
677
678
 
679
+ class SkyletMethodNotImplementedError(Exception):
680
+ """Raised when a Skylet gRPC method is not implemented on the server."""
681
+ pass
682
+
683
+
678
684
  class ClientError(Exception):
679
685
  """Raised when a there is a client error occurs.
680
686
 
681
687
  If a request encounters a ClientError, it will not be retried to the server.
682
688
  """
683
689
  pass
690
+
691
+
692
+ class ConcurrentWorkerExhaustedError(Exception):
693
+ """Raised when the concurrent worker is exhausted."""
694
+ pass