skypilot-nightly 1.0.0.dev20250905__py3-none-any.whl → 1.0.0.dev20251210__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (429) hide show
  1. sky/__init__.py +12 -2
  2. sky/adaptors/aws.py +27 -22
  3. sky/adaptors/common.py +25 -2
  4. sky/adaptors/coreweave.py +278 -0
  5. sky/adaptors/do.py +8 -2
  6. sky/adaptors/gcp.py +11 -0
  7. sky/adaptors/ibm.py +5 -2
  8. sky/adaptors/kubernetes.py +64 -0
  9. sky/adaptors/nebius.py +3 -1
  10. sky/adaptors/primeintellect.py +1 -0
  11. sky/adaptors/seeweb.py +183 -0
  12. sky/adaptors/shadeform.py +89 -0
  13. sky/adaptors/slurm.py +478 -0
  14. sky/admin_policy.py +20 -0
  15. sky/authentication.py +157 -263
  16. sky/backends/__init__.py +3 -2
  17. sky/backends/backend.py +11 -3
  18. sky/backends/backend_utils.py +630 -185
  19. sky/backends/cloud_vm_ray_backend.py +1111 -928
  20. sky/backends/local_docker_backend.py +9 -5
  21. sky/backends/task_codegen.py +971 -0
  22. sky/backends/wheel_utils.py +18 -0
  23. sky/catalog/__init__.py +8 -3
  24. sky/catalog/aws_catalog.py +4 -0
  25. sky/catalog/common.py +19 -1
  26. sky/catalog/data_fetchers/fetch_aws.py +102 -80
  27. sky/catalog/data_fetchers/fetch_gcp.py +30 -3
  28. sky/catalog/data_fetchers/fetch_nebius.py +9 -6
  29. sky/catalog/data_fetchers/fetch_runpod.py +698 -0
  30. sky/catalog/data_fetchers/fetch_seeweb.py +329 -0
  31. sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
  32. sky/catalog/kubernetes_catalog.py +36 -32
  33. sky/catalog/primeintellect_catalog.py +95 -0
  34. sky/catalog/runpod_catalog.py +5 -1
  35. sky/catalog/seeweb_catalog.py +184 -0
  36. sky/catalog/shadeform_catalog.py +165 -0
  37. sky/catalog/slurm_catalog.py +243 -0
  38. sky/check.py +87 -46
  39. sky/client/cli/command.py +1004 -434
  40. sky/client/cli/flags.py +4 -2
  41. sky/{volumes/utils.py → client/cli/table_utils.py} +111 -13
  42. sky/client/cli/utils.py +79 -0
  43. sky/client/common.py +12 -2
  44. sky/client/sdk.py +188 -65
  45. sky/client/sdk_async.py +34 -33
  46. sky/cloud_stores.py +82 -3
  47. sky/clouds/__init__.py +8 -0
  48. sky/clouds/aws.py +337 -129
  49. sky/clouds/azure.py +24 -18
  50. sky/clouds/cloud.py +47 -13
  51. sky/clouds/cudo.py +16 -13
  52. sky/clouds/do.py +9 -7
  53. sky/clouds/fluidstack.py +12 -5
  54. sky/clouds/gcp.py +14 -7
  55. sky/clouds/hyperbolic.py +12 -5
  56. sky/clouds/ibm.py +12 -5
  57. sky/clouds/kubernetes.py +80 -45
  58. sky/clouds/lambda_cloud.py +12 -5
  59. sky/clouds/nebius.py +23 -9
  60. sky/clouds/oci.py +19 -12
  61. sky/clouds/paperspace.py +4 -1
  62. sky/clouds/primeintellect.py +317 -0
  63. sky/clouds/runpod.py +85 -24
  64. sky/clouds/scp.py +12 -8
  65. sky/clouds/seeweb.py +477 -0
  66. sky/clouds/shadeform.py +400 -0
  67. sky/clouds/slurm.py +578 -0
  68. sky/clouds/ssh.py +6 -3
  69. sky/clouds/utils/scp_utils.py +61 -50
  70. sky/clouds/vast.py +43 -27
  71. sky/clouds/vsphere.py +14 -16
  72. sky/core.py +296 -195
  73. sky/dashboard/out/404.html +1 -1
  74. sky/dashboard/out/_next/static/KYAhEFa3FTfq4JyKVgo-s/_buildManifest.js +1 -0
  75. sky/dashboard/out/_next/static/chunks/1141-9c810f01ff4f398a.js +11 -0
  76. sky/dashboard/out/_next/static/chunks/1871-7e202677c42f43fe.js +6 -0
  77. sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +1 -0
  78. sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +15 -0
  79. sky/dashboard/out/_next/static/chunks/2755.edd818326d489a1d.js +26 -0
  80. sky/dashboard/out/_next/static/chunks/3294.ddda8c6c6f9f24dc.js +1 -0
  81. sky/dashboard/out/_next/static/chunks/3785.7e245f318f9d1121.js +1 -0
  82. sky/dashboard/out/_next/static/chunks/{6601-06114c982db410b6.js → 3800-b589397dc09c5b4e.js} +1 -1
  83. sky/dashboard/out/_next/static/chunks/3850-fd5696f3bbbaddae.js +1 -0
  84. sky/dashboard/out/_next/static/chunks/4725.172ede95d1b21022.js +1 -0
  85. sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +15 -0
  86. sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +13 -0
  87. sky/dashboard/out/_next/static/chunks/6856-da20c5fd999f319c.js +1 -0
  88. sky/dashboard/out/_next/static/chunks/6990-09cbf02d3cd518c3.js +1 -0
  89. sky/dashboard/out/_next/static/chunks/7359-c8d04e06886000b3.js +30 -0
  90. sky/dashboard/out/_next/static/chunks/7615-019513abc55b3b47.js +1 -0
  91. sky/dashboard/out/_next/static/chunks/8640.5b9475a2d18c5416.js +16 -0
  92. sky/dashboard/out/_next/static/chunks/8969-452f9d5cbdd2dc73.js +1 -0
  93. sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +6 -0
  94. sky/dashboard/out/_next/static/chunks/9353-8369df1cf105221c.js +1 -0
  95. sky/dashboard/out/_next/static/chunks/9360.a536cf6b1fa42355.js +31 -0
  96. sky/dashboard/out/_next/static/chunks/9847.3aaca6bb33455140.js +30 -0
  97. sky/dashboard/out/_next/static/chunks/pages/_app-68b647e26f9d2793.js +34 -0
  98. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-33f525539665fdfd.js +16 -0
  99. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-a7565f586ef86467.js +1 -0
  100. sky/dashboard/out/_next/static/chunks/pages/clusters-9e5d47818b9bdadd.js +1 -0
  101. sky/dashboard/out/_next/static/chunks/pages/{config-dfb9bf07b13045f4.js → config-718cdc365de82689.js} +1 -1
  102. sky/dashboard/out/_next/static/chunks/pages/infra/{[context]-6563820e094f68ca.js → [context]-12c559ec4d81fdbd.js} +1 -1
  103. sky/dashboard/out/_next/static/chunks/pages/{infra-aabba60d57826e0f.js → infra-d187cd0413d72475.js} +1 -1
  104. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-895847b6cf200b04.js +16 -0
  105. sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-8d0f4655400b4eb9.js +21 -0
  106. sky/dashboard/out/_next/static/chunks/pages/jobs-e5a98f17f8513a96.js +1 -0
  107. sky/dashboard/out/_next/static/chunks/pages/plugins/[...slug]-4f46050ca065d8f8.js +1 -0
  108. sky/dashboard/out/_next/static/chunks/pages/users-2f7646eb77785a2c.js +1 -0
  109. sky/dashboard/out/_next/static/chunks/pages/volumes-ef19d49c6d0e8500.js +1 -0
  110. sky/dashboard/out/_next/static/chunks/pages/workspaces/{[name]-af76bb06dbb3954f.js → [name]-96e0f298308da7e2.js} +1 -1
  111. sky/dashboard/out/_next/static/chunks/pages/{workspaces-7598c33a746cdc91.js → workspaces-cb4da3abe08ebf19.js} +1 -1
  112. sky/dashboard/out/_next/static/chunks/webpack-fba3de387ff6bb08.js +1 -0
  113. sky/dashboard/out/_next/static/css/c5a4cfd2600fc715.css +3 -0
  114. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  115. sky/dashboard/out/clusters/[cluster].html +1 -1
  116. sky/dashboard/out/clusters.html +1 -1
  117. sky/dashboard/out/config.html +1 -1
  118. sky/dashboard/out/index.html +1 -1
  119. sky/dashboard/out/infra/[context].html +1 -1
  120. sky/dashboard/out/infra.html +1 -1
  121. sky/dashboard/out/jobs/[job].html +1 -1
  122. sky/dashboard/out/jobs/pools/[pool].html +1 -1
  123. sky/dashboard/out/jobs.html +1 -1
  124. sky/dashboard/out/plugins/[...slug].html +1 -0
  125. sky/dashboard/out/users.html +1 -1
  126. sky/dashboard/out/volumes.html +1 -1
  127. sky/dashboard/out/workspace/new.html +1 -1
  128. sky/dashboard/out/workspaces/[name].html +1 -1
  129. sky/dashboard/out/workspaces.html +1 -1
  130. sky/data/data_utils.py +92 -1
  131. sky/data/mounting_utils.py +177 -30
  132. sky/data/storage.py +200 -19
  133. sky/data/storage_utils.py +10 -45
  134. sky/exceptions.py +18 -7
  135. sky/execution.py +74 -31
  136. sky/global_user_state.py +605 -191
  137. sky/jobs/__init__.py +2 -0
  138. sky/jobs/client/sdk.py +101 -4
  139. sky/jobs/client/sdk_async.py +31 -5
  140. sky/jobs/constants.py +15 -8
  141. sky/jobs/controller.py +726 -284
  142. sky/jobs/file_content_utils.py +128 -0
  143. sky/jobs/log_gc.py +193 -0
  144. sky/jobs/recovery_strategy.py +250 -100
  145. sky/jobs/scheduler.py +271 -173
  146. sky/jobs/server/core.py +367 -114
  147. sky/jobs/server/server.py +81 -35
  148. sky/jobs/server/utils.py +89 -35
  149. sky/jobs/state.py +1498 -620
  150. sky/jobs/utils.py +771 -306
  151. sky/logs/agent.py +40 -5
  152. sky/logs/aws.py +9 -19
  153. sky/metrics/utils.py +282 -39
  154. sky/models.py +2 -0
  155. sky/optimizer.py +7 -6
  156. sky/provision/__init__.py +38 -1
  157. sky/provision/aws/config.py +34 -13
  158. sky/provision/aws/instance.py +5 -2
  159. sky/provision/azure/instance.py +5 -3
  160. sky/provision/common.py +22 -0
  161. sky/provision/cudo/instance.py +4 -3
  162. sky/provision/do/instance.py +4 -3
  163. sky/provision/docker_utils.py +112 -28
  164. sky/provision/fluidstack/instance.py +6 -5
  165. sky/provision/gcp/config.py +6 -1
  166. sky/provision/gcp/instance.py +4 -2
  167. sky/provision/hyperbolic/instance.py +4 -2
  168. sky/provision/instance_setup.py +66 -20
  169. sky/provision/kubernetes/__init__.py +2 -0
  170. sky/provision/kubernetes/config.py +7 -44
  171. sky/provision/kubernetes/constants.py +0 -1
  172. sky/provision/kubernetes/instance.py +609 -213
  173. sky/provision/kubernetes/manifests/fusermount-server-daemonset.yaml +1 -2
  174. sky/provision/kubernetes/network.py +12 -8
  175. sky/provision/kubernetes/network_utils.py +8 -25
  176. sky/provision/kubernetes/utils.py +422 -422
  177. sky/provision/kubernetes/volume.py +150 -18
  178. sky/provision/lambda_cloud/instance.py +16 -13
  179. sky/provision/nebius/instance.py +6 -2
  180. sky/provision/nebius/utils.py +103 -86
  181. sky/provision/oci/instance.py +4 -2
  182. sky/provision/paperspace/instance.py +4 -3
  183. sky/provision/primeintellect/__init__.py +10 -0
  184. sky/provision/primeintellect/config.py +11 -0
  185. sky/provision/primeintellect/instance.py +454 -0
  186. sky/provision/primeintellect/utils.py +398 -0
  187. sky/provision/provisioner.py +45 -15
  188. sky/provision/runpod/__init__.py +2 -0
  189. sky/provision/runpod/instance.py +4 -3
  190. sky/provision/runpod/volume.py +69 -13
  191. sky/provision/scp/instance.py +307 -130
  192. sky/provision/seeweb/__init__.py +11 -0
  193. sky/provision/seeweb/config.py +13 -0
  194. sky/provision/seeweb/instance.py +812 -0
  195. sky/provision/shadeform/__init__.py +11 -0
  196. sky/provision/shadeform/config.py +12 -0
  197. sky/provision/shadeform/instance.py +351 -0
  198. sky/provision/shadeform/shadeform_utils.py +83 -0
  199. sky/provision/slurm/__init__.py +12 -0
  200. sky/provision/slurm/config.py +13 -0
  201. sky/provision/slurm/instance.py +572 -0
  202. sky/provision/slurm/utils.py +583 -0
  203. sky/provision/vast/instance.py +9 -4
  204. sky/provision/vast/utils.py +10 -6
  205. sky/provision/volume.py +164 -0
  206. sky/provision/vsphere/common/ssl_helper.py +1 -1
  207. sky/provision/vsphere/common/vapiconnect.py +2 -1
  208. sky/provision/vsphere/common/vim_utils.py +3 -2
  209. sky/provision/vsphere/instance.py +8 -6
  210. sky/provision/vsphere/vsphere_utils.py +8 -1
  211. sky/resources.py +11 -3
  212. sky/schemas/api/responses.py +107 -6
  213. sky/schemas/db/global_user_state/008_skylet_ssh_tunnel_metadata.py +34 -0
  214. sky/schemas/db/global_user_state/009_last_activity_and_launched_at.py +89 -0
  215. sky/schemas/db/global_user_state/010_save_ssh_key.py +66 -0
  216. sky/schemas/db/global_user_state/011_is_ephemeral.py +34 -0
  217. sky/schemas/db/kv_cache/001_initial_schema.py +29 -0
  218. sky/schemas/db/serve_state/002_yaml_content.py +34 -0
  219. sky/schemas/db/skypilot_config/001_initial_schema.py +30 -0
  220. sky/schemas/db/spot_jobs/002_cluster_pool.py +3 -3
  221. sky/schemas/db/spot_jobs/004_job_file_contents.py +42 -0
  222. sky/schemas/db/spot_jobs/005_logs_gc.py +38 -0
  223. sky/schemas/db/spot_jobs/006_controller_pid_started_at.py +34 -0
  224. sky/schemas/db/spot_jobs/007_config_file_content.py +34 -0
  225. sky/schemas/generated/jobsv1_pb2.py +86 -0
  226. sky/schemas/generated/jobsv1_pb2.pyi +254 -0
  227. sky/schemas/generated/jobsv1_pb2_grpc.py +542 -0
  228. sky/schemas/generated/managed_jobsv1_pb2.py +76 -0
  229. sky/schemas/generated/managed_jobsv1_pb2.pyi +278 -0
  230. sky/schemas/generated/managed_jobsv1_pb2_grpc.py +278 -0
  231. sky/schemas/generated/servev1_pb2.py +58 -0
  232. sky/schemas/generated/servev1_pb2.pyi +115 -0
  233. sky/schemas/generated/servev1_pb2_grpc.py +322 -0
  234. sky/serve/autoscalers.py +2 -0
  235. sky/serve/client/impl.py +55 -21
  236. sky/serve/constants.py +4 -3
  237. sky/serve/controller.py +17 -11
  238. sky/serve/load_balancing_policies.py +1 -1
  239. sky/serve/replica_managers.py +219 -142
  240. sky/serve/serve_rpc_utils.py +179 -0
  241. sky/serve/serve_state.py +63 -54
  242. sky/serve/serve_utils.py +145 -109
  243. sky/serve/server/core.py +46 -25
  244. sky/serve/server/impl.py +311 -162
  245. sky/serve/server/server.py +21 -19
  246. sky/serve/service.py +84 -68
  247. sky/serve/service_spec.py +45 -7
  248. sky/server/auth/loopback.py +38 -0
  249. sky/server/auth/oauth2_proxy.py +12 -7
  250. sky/server/common.py +47 -24
  251. sky/server/config.py +62 -28
  252. sky/server/constants.py +9 -1
  253. sky/server/daemons.py +109 -38
  254. sky/server/metrics.py +76 -96
  255. sky/server/middleware_utils.py +166 -0
  256. sky/server/plugins.py +222 -0
  257. sky/server/requests/executor.py +384 -145
  258. sky/server/requests/payloads.py +83 -19
  259. sky/server/requests/preconditions.py +15 -13
  260. sky/server/requests/request_names.py +123 -0
  261. sky/server/requests/requests.py +511 -157
  262. sky/server/requests/serializers/decoders.py +48 -17
  263. sky/server/requests/serializers/encoders.py +102 -20
  264. sky/server/requests/serializers/return_value_serializers.py +60 -0
  265. sky/server/requests/threads.py +117 -0
  266. sky/server/rest.py +116 -24
  267. sky/server/server.py +497 -179
  268. sky/server/server_utils.py +30 -0
  269. sky/server/stream_utils.py +219 -45
  270. sky/server/uvicorn.py +30 -19
  271. sky/setup_files/MANIFEST.in +6 -1
  272. sky/setup_files/alembic.ini +8 -0
  273. sky/setup_files/dependencies.py +64 -19
  274. sky/setup_files/setup.py +44 -44
  275. sky/sky_logging.py +13 -5
  276. sky/skylet/attempt_skylet.py +116 -24
  277. sky/skylet/configs.py +3 -1
  278. sky/skylet/constants.py +139 -29
  279. sky/skylet/events.py +74 -14
  280. sky/skylet/executor/__init__.py +1 -0
  281. sky/skylet/executor/slurm.py +189 -0
  282. sky/skylet/job_lib.py +143 -105
  283. sky/skylet/log_lib.py +252 -8
  284. sky/skylet/log_lib.pyi +47 -7
  285. sky/skylet/providers/ibm/node_provider.py +12 -8
  286. sky/skylet/providers/ibm/vpc_provider.py +13 -12
  287. sky/skylet/runtime_utils.py +21 -0
  288. sky/skylet/services.py +524 -0
  289. sky/skylet/skylet.py +27 -2
  290. sky/skylet/subprocess_daemon.py +104 -28
  291. sky/skypilot_config.py +99 -79
  292. sky/ssh_node_pools/constants.py +12 -0
  293. sky/ssh_node_pools/core.py +40 -3
  294. sky/ssh_node_pools/deploy/__init__.py +4 -0
  295. sky/ssh_node_pools/deploy/deploy.py +952 -0
  296. sky/ssh_node_pools/deploy/tunnel_utils.py +199 -0
  297. sky/ssh_node_pools/deploy/utils.py +173 -0
  298. sky/ssh_node_pools/server.py +20 -21
  299. sky/{utils/kubernetes/ssh_utils.py → ssh_node_pools/utils.py} +9 -6
  300. sky/task.py +221 -104
  301. sky/templates/aws-ray.yml.j2 +1 -0
  302. sky/templates/azure-ray.yml.j2 +1 -0
  303. sky/templates/cudo-ray.yml.j2 +1 -0
  304. sky/templates/do-ray.yml.j2 +1 -0
  305. sky/templates/fluidstack-ray.yml.j2 +1 -0
  306. sky/templates/gcp-ray.yml.j2 +1 -0
  307. sky/templates/hyperbolic-ray.yml.j2 +1 -0
  308. sky/templates/ibm-ray.yml.j2 +2 -1
  309. sky/templates/jobs-controller.yaml.j2 +3 -0
  310. sky/templates/kubernetes-ray.yml.j2 +204 -55
  311. sky/templates/lambda-ray.yml.j2 +1 -0
  312. sky/templates/nebius-ray.yml.j2 +3 -0
  313. sky/templates/oci-ray.yml.j2 +1 -0
  314. sky/templates/paperspace-ray.yml.j2 +1 -0
  315. sky/templates/primeintellect-ray.yml.j2 +72 -0
  316. sky/templates/runpod-ray.yml.j2 +1 -0
  317. sky/templates/scp-ray.yml.j2 +1 -0
  318. sky/templates/seeweb-ray.yml.j2 +171 -0
  319. sky/templates/shadeform-ray.yml.j2 +73 -0
  320. sky/templates/slurm-ray.yml.j2 +85 -0
  321. sky/templates/vast-ray.yml.j2 +2 -0
  322. sky/templates/vsphere-ray.yml.j2 +1 -0
  323. sky/templates/websocket_proxy.py +188 -43
  324. sky/usage/usage_lib.py +16 -4
  325. sky/users/model.conf +1 -1
  326. sky/users/permission.py +84 -44
  327. sky/users/rbac.py +31 -3
  328. sky/utils/accelerator_registry.py +6 -3
  329. sky/utils/admin_policy_utils.py +18 -5
  330. sky/utils/annotations.py +128 -6
  331. sky/utils/asyncio_utils.py +78 -0
  332. sky/utils/atomic.py +1 -1
  333. sky/utils/auth_utils.py +153 -0
  334. sky/utils/cli_utils/status_utils.py +12 -7
  335. sky/utils/cluster_utils.py +28 -6
  336. sky/utils/command_runner.py +283 -30
  337. sky/utils/command_runner.pyi +63 -7
  338. sky/utils/common.py +3 -1
  339. sky/utils/common_utils.py +55 -7
  340. sky/utils/config_utils.py +1 -14
  341. sky/utils/context.py +127 -40
  342. sky/utils/context_utils.py +73 -18
  343. sky/utils/controller_utils.py +229 -70
  344. sky/utils/db/db_utils.py +95 -18
  345. sky/utils/db/kv_cache.py +149 -0
  346. sky/utils/db/migration_utils.py +24 -7
  347. sky/utils/env_options.py +4 -0
  348. sky/utils/git.py +559 -1
  349. sky/utils/kubernetes/create_cluster.sh +15 -30
  350. sky/utils/kubernetes/delete_cluster.sh +10 -7
  351. sky/utils/kubernetes/generate_kind_config.py +6 -66
  352. sky/utils/kubernetes/gpu_labeler.py +13 -3
  353. sky/utils/kubernetes/k8s_gpu_labeler_job.yaml +2 -1
  354. sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml +16 -16
  355. sky/utils/kubernetes/kubernetes_deploy_utils.py +187 -260
  356. sky/utils/kubernetes/rsync_helper.sh +11 -3
  357. sky/utils/kubernetes/ssh-tunnel.sh +7 -376
  358. sky/utils/kubernetes_enums.py +7 -15
  359. sky/utils/lock_events.py +4 -4
  360. sky/utils/locks.py +128 -31
  361. sky/utils/log_utils.py +0 -319
  362. sky/utils/resource_checker.py +13 -10
  363. sky/utils/resources_utils.py +53 -29
  364. sky/utils/rich_utils.py +8 -4
  365. sky/utils/schemas.py +138 -52
  366. sky/utils/subprocess_utils.py +17 -4
  367. sky/utils/thread_utils.py +91 -0
  368. sky/utils/timeline.py +2 -1
  369. sky/utils/ux_utils.py +35 -1
  370. sky/utils/volume.py +88 -4
  371. sky/utils/yaml_utils.py +9 -0
  372. sky/volumes/client/sdk.py +48 -10
  373. sky/volumes/server/core.py +59 -22
  374. sky/volumes/server/server.py +46 -17
  375. sky/volumes/volume.py +54 -42
  376. sky/workspaces/core.py +57 -21
  377. sky/workspaces/server.py +13 -12
  378. sky_templates/README.md +3 -0
  379. sky_templates/__init__.py +3 -0
  380. sky_templates/ray/__init__.py +0 -0
  381. sky_templates/ray/start_cluster +183 -0
  382. sky_templates/ray/stop_cluster +75 -0
  383. {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251210.dist-info}/METADATA +343 -65
  384. skypilot_nightly-1.0.0.dev20251210.dist-info/RECORD +629 -0
  385. skypilot_nightly-1.0.0.dev20251210.dist-info/top_level.txt +2 -0
  386. sky/client/cli/git.py +0 -549
  387. sky/dashboard/out/_next/static/chunks/1121-408ed10b2f9fce17.js +0 -1
  388. sky/dashboard/out/_next/static/chunks/1141-943efc7aff0f0c06.js +0 -1
  389. sky/dashboard/out/_next/static/chunks/1836-37fede578e2da5f8.js +0 -40
  390. sky/dashboard/out/_next/static/chunks/3015-86cabed5d4669ad0.js +0 -1
  391. sky/dashboard/out/_next/static/chunks/3294.c80326aec9bfed40.js +0 -6
  392. sky/dashboard/out/_next/static/chunks/3785.4872a2f3aa489880.js +0 -1
  393. sky/dashboard/out/_next/static/chunks/3850-ff4a9a69d978632b.js +0 -1
  394. sky/dashboard/out/_next/static/chunks/4045.b30465273dc5e468.js +0 -21
  395. sky/dashboard/out/_next/static/chunks/4676-9da7fdbde90b5549.js +0 -10
  396. sky/dashboard/out/_next/static/chunks/4725.10f7a9a5d3ea8208.js +0 -1
  397. sky/dashboard/out/_next/static/chunks/5339.3fda4a4010ff4e06.js +0 -51
  398. sky/dashboard/out/_next/static/chunks/6135-4b4d5e824b7f9d3c.js +0 -1
  399. sky/dashboard/out/_next/static/chunks/649.b9d7f7d10c1b8c53.js +0 -45
  400. sky/dashboard/out/_next/static/chunks/6856-dca7962af4814e1b.js +0 -1
  401. sky/dashboard/out/_next/static/chunks/6990-08b2a1cae076a943.js +0 -1
  402. sky/dashboard/out/_next/static/chunks/7325.b4bc99ce0892dcd5.js +0 -6
  403. sky/dashboard/out/_next/static/chunks/754-d0da8ab45f9509e9.js +0 -18
  404. sky/dashboard/out/_next/static/chunks/7669.1f5d9a402bf5cc42.js +0 -36
  405. sky/dashboard/out/_next/static/chunks/8969-0be3036bf86f8256.js +0 -1
  406. sky/dashboard/out/_next/static/chunks/9025.c12318fb6a1a9093.js +0 -6
  407. sky/dashboard/out/_next/static/chunks/9037-fa1737818d0a0969.js +0 -6
  408. sky/dashboard/out/_next/static/chunks/pages/_app-ce361c6959bc2001.js +0 -34
  409. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-1cbba24bd1bd35f8.js +0 -16
  410. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-0b4b35dc1dfe046c.js +0 -16
  411. sky/dashboard/out/_next/static/chunks/pages/clusters-469814d711d63b1b.js +0 -1
  412. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-dd64309c3fe67ed2.js +0 -11
  413. sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-07349868f7905d37.js +0 -16
  414. sky/dashboard/out/_next/static/chunks/pages/jobs-1f70d9faa564804f.js +0 -1
  415. sky/dashboard/out/_next/static/chunks/pages/users-018bf31cda52e11b.js +0 -1
  416. sky/dashboard/out/_next/static/chunks/pages/volumes-739726d6b823f532.js +0 -1
  417. sky/dashboard/out/_next/static/chunks/webpack-4fe903277b57b523.js +0 -1
  418. sky/dashboard/out/_next/static/css/4614e06482d7309e.css +0 -3
  419. sky/dashboard/out/_next/static/mS-4qZPSkRuA1u-g2wQhg/_buildManifest.js +0 -1
  420. sky/templates/kubernetes-ssh-jump.yml.j2 +0 -94
  421. sky/utils/kubernetes/cleanup-tunnel.sh +0 -62
  422. sky/utils/kubernetes/deploy_remote_cluster.py +0 -1299
  423. sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +0 -191
  424. skypilot_nightly-1.0.0.dev20250905.dist-info/RECORD +0 -547
  425. skypilot_nightly-1.0.0.dev20250905.dist-info/top_level.txt +0 -1
  426. /sky/dashboard/out/_next/static/{mS-4qZPSkRuA1u-g2wQhg → KYAhEFa3FTfq4JyKVgo-s}/_ssgManifest.js +0 -0
  427. {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251210.dist-info}/WHEEL +0 -0
  428. {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251210.dist-info}/entry_points.txt +0 -0
  429. {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251210.dist-info}/licenses/LICENSE +0 -0
sky/utils/ux_utils.py CHANGED
@@ -1,11 +1,12 @@
1
1
  """Utility functions for UX."""
2
2
  import contextlib
3
3
  import enum
4
+ import fnmatch
4
5
  import os
5
6
  import sys
6
7
  import traceback
7
8
  import typing
8
- from typing import Callable, Optional, Union
9
+ from typing import Callable, Iterable, List, Optional, Union
9
10
 
10
11
  import colorama
11
12
 
@@ -288,3 +289,36 @@ def command_hint_messages(hint_type: CommandHintType,
288
289
  f'{BOLD}sky jobs queue{RESET_BOLD}')
289
290
  else:
290
291
  raise ValueError(f'Invalid hint type: {hint_type}')
292
+
293
+
294
+ def is_glob_pattern(pattern: str) -> bool:
295
+ """Checks if a string contains common glob pattern wildcards."""
296
+ glob_chars = {'*', '?', '[', ']'}
297
+ # Also check for '**' as a specific globstar pattern
298
+ if '**' in pattern:
299
+ return True
300
+ for char in pattern:
301
+ if char in glob_chars:
302
+ return True
303
+ return False
304
+
305
+
306
+ def get_non_matched_query(query_clusters: Iterable[str],
307
+ cluster_names: Iterable[str]) -> List[str]:
308
+ """Gets the non-matched query clusters."""
309
+ glob_query_clusters = []
310
+ non_glob_query_clusters = []
311
+ for cluster_name in query_clusters:
312
+ if is_glob_pattern(cluster_name):
313
+ glob_query_clusters.append(cluster_name)
314
+ else:
315
+ non_glob_query_clusters.append(cluster_name)
316
+ not_found_clusters = [
317
+ query_cluster for query_cluster in non_glob_query_clusters
318
+ if query_cluster not in cluster_names
319
+ ]
320
+ not_found_clusters.extend([
321
+ query_cluster for query_cluster in glob_query_clusters
322
+ if not fnmatch.filter(cluster_names, query_cluster)
323
+ ])
324
+ return not_found_clusters
sky/utils/volume.py CHANGED
@@ -1,12 +1,14 @@
1
1
  """Volume utilities."""
2
+ from dataclasses import dataclass
2
3
  import enum
3
4
  import time
4
- from typing import Any, Dict
5
+ from typing import Any, Dict, Optional
5
6
 
6
7
  from sky import exceptions
7
8
  from sky import global_user_state
8
9
  from sky import models
9
10
  from sky.utils import common_utils
11
+ from sky.utils import resources_utils
10
12
  from sky.utils import schemas
11
13
  from sky.utils import status_lib
12
14
 
@@ -26,18 +28,42 @@ class VolumeType(enum.Enum):
26
28
  PVC = 'k8s-pvc'
27
29
  RUNPOD_NETWORK_VOLUME = 'runpod-network-volume'
28
30
 
31
+ @classmethod
32
+ def supported_types(cls) -> list:
33
+ """Return list of supported volume type values."""
34
+ return [vt.value for vt in cls]
35
+
36
+
37
+ EPHEMERAL_VOLUME_TYPES = [VolumeType.PVC.value]
38
+
39
+
40
+ @dataclass
41
+ class VolumeInfo:
42
+ """Represents volume info."""
43
+ name: str
44
+ path: str
45
+ volume_name_on_cloud: Optional[str] = None
46
+ volume_id_on_cloud: Optional[str] = None
47
+
29
48
 
30
49
  class VolumeMount:
31
50
  """Volume mount specification."""
32
51
 
33
- def __init__(self, path: str, volume_name: str,
34
- volume_config: models.VolumeConfig):
52
+ def __init__(self,
53
+ path: str,
54
+ volume_name: str,
55
+ volume_config: models.VolumeConfig,
56
+ is_ephemeral: bool = False):
35
57
  self.path: str = path
36
58
  self.volume_name: str = volume_name
37
59
  self.volume_config: models.VolumeConfig = volume_config
60
+ self.is_ephemeral: bool = is_ephemeral
38
61
 
39
62
  def pre_mount(self) -> None:
40
63
  """Update the volume status before actual mounting."""
64
+ # Skip pre_mount for ephemeral volumes as they don't exist yet
65
+ if self.is_ephemeral:
66
+ return
41
67
  # TODO(aylei): for ReadWriteOnce volume, we also need to queue the
42
68
  # mount request if the target volume is already mounted to another
43
69
  # cluster. For now, we only support ReadWriteMany volume.
@@ -63,19 +89,77 @@ class VolumeMount:
63
89
 
64
90
  path = config.pop('path', None)
65
91
  volume_name = config.pop('volume_name', None)
92
+ is_ephemeral = config.pop('is_ephemeral', False)
66
93
  volume_config: models.VolumeConfig = models.VolumeConfig.model_validate(
67
94
  config.pop('volume_config', None))
68
- return cls(path, volume_name, volume_config)
95
+ return cls(path, volume_name, volume_config, is_ephemeral)
96
+
97
+ @classmethod
98
+ def resolve_ephemeral_config(cls, path: str,
99
+ config: Dict[str, Any]) -> 'VolumeMount':
100
+ """Create an ephemeral volume mount from inline config.
101
+
102
+ Args:
103
+ path: The mount path for the volume.
104
+ config: The volume configuration dict with size, and
105
+ optional type, labels, and config fields, etc.
106
+
107
+ Returns:
108
+ A VolumeMount instance for the ephemeral volume.
109
+ """
110
+ volume_type = config.get('type')
111
+ if volume_type and volume_type.lower() not in EPHEMERAL_VOLUME_TYPES:
112
+ raise ValueError(f'Unsupported ephemeral volume type: '
113
+ f'{volume_type}. Supported types: '
114
+ f'{", ".join(EPHEMERAL_VOLUME_TYPES)}')
115
+ size_config = config.get('size')
116
+ if size_config is None:
117
+ raise ValueError('Volume size must be specified for ephemeral '
118
+ 'volumes.')
119
+ try:
120
+ size = resources_utils.parse_memory_resource(size_config,
121
+ 'size',
122
+ allow_rounding=True)
123
+ if size == '0':
124
+ raise ValueError('Size must be no less than 1Gi')
125
+ except ValueError as e:
126
+ raise ValueError(
127
+ f'Invalid size {size_config} for ephemeral volume: {e}') from e
128
+
129
+ # Create VolumeConfig for ephemeral volume
130
+ # Note: the empty fields will be populated during provisioning
131
+ volume_config = models.VolumeConfig(
132
+ name='',
133
+ type=config.get('type', ''),
134
+ # Default to kubernetes cloud here for backward compatibility,
135
+ # but this will be reset to the correct cloud during provisioning.
136
+ cloud='kubernetes',
137
+ region=None,
138
+ zone=None,
139
+ name_on_cloud='',
140
+ size=size,
141
+ config=config.get('config', {}),
142
+ labels=config.get('labels'),
143
+ )
144
+
145
+ return cls(path, '', volume_config, is_ephemeral=True)
69
146
 
70
147
  def to_yaml_config(self) -> Dict[str, Any]:
71
148
  return {
72
149
  'path': self.path,
73
150
  'volume_name': self.volume_name,
74
151
  'volume_config': self.volume_config.model_dump(),
152
+ 'is_ephemeral': self.is_ephemeral,
75
153
  }
76
154
 
155
+ @property
156
+ def name(self) -> str:
157
+ """Return the volume name for use in provisioning."""
158
+ return self.volume_name
159
+
77
160
  def __repr__(self):
78
161
  return (f'VolumeMount('
79
162
  f'\n\tpath={self.path},'
80
163
  f'\n\tvolume_name={self.volume_name},'
164
+ f'\n\tis_ephemeral={self.is_ephemeral},'
81
165
  f'\n\tvolume_config={self.volume_config})')
sky/utils/yaml_utils.py CHANGED
@@ -44,6 +44,15 @@ def read_yaml(path: Optional[str]) -> Dict[str, Any]:
44
44
  return config
45
45
 
46
46
 
47
+ def read_yaml_str(yaml_str: str) -> Dict[str, Any]:
48
+ stream = io.StringIO(yaml_str)
49
+ parsed_yaml = safe_load(stream)
50
+ if not parsed_yaml:
51
+ # Empty dict
52
+ return {}
53
+ return parsed_yaml
54
+
55
+
47
56
  def read_yaml_all_str(yaml_str: str) -> List[Dict[str, Any]]:
48
57
  stream = io.StringIO(yaml_str)
49
58
  config = safe_load_all(stream)
sky/volumes/client/sdk.py CHANGED
@@ -1,15 +1,19 @@
1
1
  """SDK functions for managed jobs."""
2
2
  import json
3
3
  import typing
4
- from typing import Any, Dict, List
4
+ from typing import List
5
5
 
6
+ from sky import exceptions
6
7
  from sky import sky_logging
7
8
  from sky.adaptors import common as adaptors_common
9
+ from sky.schemas.api import responses
8
10
  from sky.server import common as server_common
11
+ from sky.server import versions
9
12
  from sky.server.requests import payloads
10
13
  from sky.usage import usage_lib
11
14
  from sky.utils import annotations
12
15
  from sky.utils import context
16
+ from sky.utils import ux_utils
13
17
  from sky.volumes import volume as volume_lib
14
18
 
15
19
  if typing.TYPE_CHECKING:
@@ -70,10 +74,10 @@ def apply(volume: volume_lib.Volume) -> server_common.RequestId[None]:
70
74
  size=volume.size,
71
75
  config=volume.config,
72
76
  labels=volume.labels,
77
+ use_existing=volume.use_existing,
73
78
  )
74
- response = requests.post(f'{server_common.get_server_url()}/volumes/apply',
75
- json=json.loads(body.model_dump_json()),
76
- cookies=server_common.get_api_cookie_jar())
79
+ response = server_common.make_authenticated_request(
80
+ 'POST', '/volumes/apply', json=json.loads(body.model_dump_json()))
77
81
  return server_common.get_request_id(response)
78
82
 
79
83
 
@@ -81,14 +85,49 @@ def apply(volume: volume_lib.Volume) -> server_common.RequestId[None]:
81
85
  @usage_lib.entrypoint
82
86
  @server_common.check_server_healthy_or_start
83
87
  @annotations.client_api
84
- def ls() -> server_common.RequestId[List[Dict[str, Any]]]:
88
+ @versions.minimal_api_version(20)
89
+ def validate(volume: volume_lib.Volume) -> None:
90
+ """Validates the volume.
91
+
92
+ All validation is done on the server side.
93
+
94
+ Args:
95
+ volume: The volume to validate.
96
+
97
+ Raises:
98
+ ValueError: If the volume is invalid.
99
+ """
100
+ body = payloads.VolumeValidateBody(
101
+ name=volume.name,
102
+ volume_type=volume.type,
103
+ infra=volume.infra,
104
+ size=volume.size,
105
+ config=volume.config,
106
+ labels=volume.labels,
107
+ use_existing=volume.use_existing,
108
+ )
109
+ response = server_common.make_authenticated_request(
110
+ 'POST', '/volumes/validate', json=json.loads(body.model_dump_json()))
111
+ if response.status_code == 400:
112
+ with ux_utils.print_exception_no_traceback():
113
+ raise exceptions.deserialize_exception(
114
+ response.json().get('detail'))
115
+
116
+
117
+ @context.contextual
118
+ @usage_lib.entrypoint
119
+ @server_common.check_server_healthy_or_start
120
+ @annotations.client_api
121
+ def ls() -> server_common.RequestId[List[responses.VolumeRecord]]:
85
122
  """Lists all volumes.
86
123
 
87
124
  Returns:
88
125
  The request ID of the list request.
89
126
  """
90
- response = requests.get(f'{server_common.get_server_url()}/volumes',
91
- cookies=server_common.get_api_cookie_jar())
127
+ response = server_common.make_authenticated_request(
128
+ 'GET',
129
+ '/volumes',
130
+ )
92
131
  return server_common.get_request_id(response)
93
132
 
94
133
 
@@ -106,7 +145,6 @@ def delete(names: List[str]) -> server_common.RequestId[None]:
106
145
  The request ID of the delete request.
107
146
  """
108
147
  body = payloads.VolumeDeleteBody(names=names)
109
- response = requests.post(f'{server_common.get_server_url()}/volumes/delete',
110
- json=json.loads(body.model_dump_json()),
111
- cookies=server_common.get_api_cookie_jar())
148
+ response = server_common.make_authenticated_request(
149
+ 'POST', '/volumes/delete', json=json.loads(body.model_dump_json()))
112
150
  return server_common.get_request_id(response)
@@ -11,6 +11,7 @@ from sky import global_user_state
11
11
  from sky import models
12
12
  from sky import provision
13
13
  from sky import sky_logging
14
+ from sky.schemas.api import responses
14
15
  from sky.utils import common_utils
15
16
  from sky.utils import registry
16
17
  from sky.utils import rich_utils
@@ -26,16 +27,10 @@ VOLUME_LOCK_TIMEOUT_SECONDS = 20
26
27
 
27
28
  def volume_refresh():
28
29
  """Refreshes the volume status."""
29
- volumes = global_user_state.get_volumes()
30
+ volumes = volume_list(is_ephemeral=False)
30
31
  for volume in volumes:
31
- volume_name = volume.get('name')
32
- config = volume.get('handle')
33
- if config is None:
34
- logger.warning(f'Volume {volume_name} has no handle.'
35
- 'Skipping status refresh...')
36
- continue
37
- cloud = config.cloud
38
- usedby_pods, _ = provision.get_volume_usedby(cloud, config)
32
+ volume_name = volume.name
33
+ usedby_pods = volume.usedby_pods
39
34
  with _volume_lock(volume_name):
40
35
  latest_volume = global_user_state.get_volume_by_name(volume_name)
41
36
  if latest_volume is None:
@@ -56,7 +51,8 @@ def volume_refresh():
56
51
  volume_name, status=status_lib.VolumeStatus.IN_USE)
57
52
 
58
53
 
59
- def volume_list() -> List[Dict[str, Any]]:
54
+ def volume_list(
55
+ is_ephemeral: Optional[bool] = None) -> List[responses.VolumeRecord]:
60
56
  """Gets the volumes.
61
57
 
62
58
  Returns:
@@ -78,11 +74,31 @@ def volume_list() -> List[Dict[str, Any]]:
78
74
  'status': sky.VolumeStatus,
79
75
  'usedby_pods': List[str],
80
76
  'usedby_clusters': List[str],
77
+ 'is_ephemeral': bool,
81
78
  }
82
79
  ]
83
80
  """
84
81
  with rich_utils.safe_status(ux_utils.spinner_message('Listing volumes')):
85
- volumes = global_user_state.get_volumes()
82
+ volumes = global_user_state.get_volumes(is_ephemeral=is_ephemeral)
83
+ cloud_to_configs: Dict[str, List[models.VolumeConfig]] = {}
84
+ for volume in volumes:
85
+ config = volume.get('handle')
86
+ if config is None:
87
+ volume_name = volume.get('name')
88
+ logger.warning(f'Volume {volume_name} has no handle.')
89
+ continue
90
+ cloud = config.cloud
91
+ if cloud not in cloud_to_configs:
92
+ cloud_to_configs[cloud] = []
93
+ cloud_to_configs[cloud].append(config)
94
+
95
+ cloud_to_used_by_pods, cloud_to_used_by_clusters = {}, {}
96
+ for cloud, configs in cloud_to_configs.items():
97
+ used_by_pods, used_by_clusters = provision.get_all_volumes_usedby(
98
+ cloud, configs)
99
+ cloud_to_used_by_pods[cloud] = used_by_pods
100
+ cloud_to_used_by_clusters[cloud] = used_by_clusters
101
+
86
102
  all_users = global_user_state.get_all_users()
87
103
  user_map = {user.id: user.name for user in all_users}
88
104
  records = []
@@ -98,6 +114,7 @@ def volume_list() -> List[Dict[str, Any]]:
98
114
  'last_use': volume.get('last_use'),
99
115
  'usedby_pods': [],
100
116
  'usedby_clusters': [],
117
+ 'is_ephemeral': volume.get('is_ephemeral', False),
101
118
  }
102
119
  status = volume.get('status')
103
120
  if status is not None:
@@ -109,8 +126,12 @@ def volume_list() -> List[Dict[str, Any]]:
109
126
  logger.warning(f'Volume {volume_name} has no handle.')
110
127
  continue
111
128
  cloud = config.cloud
112
- usedby_pods, usedby_clusters = provision.get_volume_usedby(
113
- cloud, config)
129
+ usedby_pods, usedby_clusters = provision.map_all_volumes_usedby(
130
+ cloud,
131
+ cloud_to_used_by_pods[cloud],
132
+ cloud_to_used_by_clusters[cloud],
133
+ config,
134
+ )
114
135
  record['type'] = config.type
115
136
  record['cloud'] = config.cloud
116
137
  record['region'] = config.region
@@ -120,15 +141,16 @@ def volume_list() -> List[Dict[str, Any]]:
120
141
  record['name_on_cloud'] = config.name_on_cloud
121
142
  record['usedby_pods'] = usedby_pods
122
143
  record['usedby_clusters'] = usedby_clusters
123
- records.append(record)
144
+ records.append(responses.VolumeRecord(**record))
124
145
  return records
125
146
 
126
147
 
127
- def volume_delete(names: List[str]) -> None:
148
+ def volume_delete(names: List[str], ignore_not_found: bool = False) -> None:
128
149
  """Deletes volumes.
129
150
 
130
151
  Args:
131
152
  names: List of volume names to delete.
153
+ ignore_not_found: If True, ignore volumes that are not found.
132
154
 
133
155
  Raises:
134
156
  ValueError: If the volume does not exist
@@ -138,6 +160,8 @@ def volume_delete(names: List[str]) -> None:
138
160
  for name in names:
139
161
  volume = global_user_state.get_volume_by_name(name)
140
162
  if volume is None:
163
+ if ignore_not_found:
164
+ continue
141
165
  raise ValueError(f'Volume {name} not found.')
142
166
  config = volume.get('handle')
143
167
  if config is None:
@@ -160,6 +184,7 @@ def volume_delete(names: List[str]) -> None:
160
184
  with _volume_lock(name):
161
185
  provision.delete_volume(cloud, config)
162
186
  global_user_state.delete_volume(name)
187
+ logger.info(f'Deleted volumes: {names}')
163
188
 
164
189
 
165
190
  def volume_apply(
@@ -171,6 +196,8 @@ def volume_apply(
171
196
  size: Optional[str],
172
197
  config: Dict[str, Any],
173
198
  labels: Optional[Dict[str, str]] = None,
199
+ use_existing: Optional[bool] = None,
200
+ is_ephemeral: bool = False,
174
201
  ) -> None:
175
202
  """Creates or registers a volume.
176
203
 
@@ -183,17 +210,22 @@ def volume_apply(
183
210
  size: The size of the volume.
184
211
  config: The configuration of the volume.
185
212
  labels: The labels of the volume.
186
-
213
+ use_existing: Whether to use an existing volume.
214
+ is_ephemeral: Whether the volume is ephemeral.
187
215
  """
188
216
  with rich_utils.safe_status(ux_utils.spinner_message('Creating volume')):
189
217
  # Reuse the method for cluster name on cloud to
190
218
  # generate the storage name on cloud.
191
219
  cloud_obj = registry.CLOUD_REGISTRY.from_str(cloud)
192
220
  assert cloud_obj is not None
193
- name_uuid = str(uuid.uuid4())[:6]
194
- name_on_cloud = common_utils.make_cluster_name_on_cloud(
195
- name, max_length=cloud_obj.max_cluster_name_length())
196
- name_on_cloud += '-' + name_uuid
221
+ region, zone = cloud_obj.validate_region_zone(region, zone)
222
+ if use_existing:
223
+ name_on_cloud = name
224
+ else:
225
+ name_uuid = str(uuid.uuid4())[:6]
226
+ name_on_cloud = common_utils.make_cluster_name_on_cloud(
227
+ name, max_length=cloud_obj.max_cluster_name_length())
228
+ name_on_cloud += '-' + name_uuid
197
229
  config = models.VolumeConfig(
198
230
  name=name,
199
231
  type=volume_type,
@@ -213,8 +245,13 @@ def volume_apply(
213
245
  logger.info(f'Volume {name} already exists.')
214
246
  return
215
247
  config = provision.apply_volume(cloud, config)
216
- global_user_state.add_volume(name, config,
217
- status_lib.VolumeStatus.READY)
248
+ global_user_state.add_volume(
249
+ name,
250
+ config,
251
+ status_lib.VolumeStatus.READY,
252
+ is_ephemeral,
253
+ )
254
+ logger.info(f'Created volume {name} on cloud {cloud}')
218
255
 
219
256
 
220
257
  @contextlib.contextmanager
@@ -3,12 +3,14 @@
3
3
  import fastapi
4
4
 
5
5
  from sky import clouds
6
+ from sky import exceptions
6
7
  from sky import sky_logging
7
8
  from sky.server.requests import executor
8
9
  from sky.server.requests import payloads
10
+ from sky.server.requests import request_names
9
11
  from sky.server.requests import requests as requests_lib
10
12
  from sky.utils import registry
11
- from sky.utils import volume
13
+ from sky.utils import volume as volume_utils
12
14
  from sky.volumes.server import core
13
15
 
14
16
  logger = sky_logging.init_logger(__name__)
@@ -23,11 +25,11 @@ async def volume_list(request: fastapi.Request) -> None:
23
25
  auth_user_env_vars_kwargs = {
24
26
  'env_vars': auth_user.to_env_vars()
25
27
  } if auth_user else {}
26
- volume_list_body = payloads.VolumeListBody(**auth_user_env_vars_kwargs)
27
- executor.schedule_request(
28
+ request_body = payloads.RequestBody(**auth_user_env_vars_kwargs)
29
+ await executor.schedule_request_async(
28
30
  request_id=request.state.request_id,
29
- request_name='volume_list',
30
- request_body=volume_list_body,
31
+ request_name=request_names.RequestName.VOLUME_LIST,
32
+ request_body=request_body,
31
33
  func=core.volume_list,
32
34
  schedule_type=requests_lib.ScheduleType.SHORT,
33
35
  )
@@ -37,15 +39,41 @@ async def volume_list(request: fastapi.Request) -> None:
37
39
  async def volume_delete(request: fastapi.Request,
38
40
  volume_delete_body: payloads.VolumeDeleteBody) -> None:
39
41
  """Deletes a volume."""
40
- executor.schedule_request(
42
+ await executor.schedule_request_async(
41
43
  request_id=request.state.request_id,
42
- request_name='volume_delete',
44
+ request_name=request_names.RequestName.VOLUME_DELETE,
43
45
  request_body=volume_delete_body,
44
46
  func=core.volume_delete,
45
47
  schedule_type=requests_lib.ScheduleType.LONG,
46
48
  )
47
49
 
48
50
 
51
+ @router.post('/validate')
52
+ async def volume_validate(
53
+ _: fastapi.Request,
54
+ volume_validate_body: payloads.VolumeValidateBody) -> None:
55
+ """Validates a volume."""
56
+ # pylint: disable=import-outside-toplevel
57
+ from sky.volumes import volume as volume_lib
58
+
59
+ try:
60
+ volume_config = {
61
+ 'name': volume_validate_body.name,
62
+ 'type': volume_validate_body.volume_type,
63
+ 'infra': volume_validate_body.infra,
64
+ 'size': volume_validate_body.size,
65
+ 'labels': volume_validate_body.labels,
66
+ 'config': volume_validate_body.config,
67
+ 'use_existing': volume_validate_body.use_existing,
68
+ }
69
+ volume = volume_lib.Volume.from_yaml_config(volume_config)
70
+ volume.validate()
71
+ except Exception as e:
72
+ requests_lib.set_exception_stacktrace(e)
73
+ raise fastapi.HTTPException(status_code=400,
74
+ detail=exceptions.serialize_exception(e))
75
+
76
+
49
77
  @router.post('/apply')
50
78
  async def volume_apply(request: fastapi.Request,
51
79
  volume_apply_body: payloads.VolumeApplyBody) -> None:
@@ -53,9 +81,12 @@ async def volume_apply(request: fastapi.Request,
53
81
  volume_cloud = volume_apply_body.cloud
54
82
  volume_type = volume_apply_body.volume_type
55
83
  volume_config = volume_apply_body.config
84
+ if volume_config is None:
85
+ volume_config = {}
86
+ volume_config['use_existing'] = volume_apply_body.use_existing
56
87
 
57
88
  supported_volume_types = [
58
- volume_type.value for volume_type in volume.VolumeType
89
+ volume_type.value for volume_type in volume_utils.VolumeType
59
90
  ]
60
91
  if volume_type not in supported_volume_types:
61
92
  raise fastapi.HTTPException(
@@ -64,31 +95,29 @@ async def volume_apply(request: fastapi.Request,
64
95
  if cloud is None:
65
96
  raise fastapi.HTTPException(status_code=400,
66
97
  detail=f'Invalid cloud: {volume_cloud}')
67
- if volume_type == volume.VolumeType.PVC.value:
98
+ if volume_type == volume_utils.VolumeType.PVC.value:
68
99
  if not cloud.is_same_cloud(clouds.Kubernetes()):
69
100
  raise fastapi.HTTPException(
70
101
  status_code=400,
71
102
  detail='PVC storage is only supported on Kubernetes')
72
103
  supported_access_modes = [
73
- access_mode.value for access_mode in volume.VolumeAccessMode
104
+ access_mode.value for access_mode in volume_utils.VolumeAccessMode
74
105
  ]
75
- if volume_config is None:
76
- volume_config = {}
77
106
  access_mode = volume_config.get('access_mode')
78
107
  if access_mode is None:
79
- volume_config[
80
- 'access_mode'] = volume.VolumeAccessMode.READ_WRITE_ONCE.value
108
+ volume_config['access_mode'] = (
109
+ volume_utils.VolumeAccessMode.READ_WRITE_ONCE.value)
81
110
  elif access_mode not in supported_access_modes:
82
111
  raise fastapi.HTTPException(
83
112
  status_code=400, detail=f'Invalid access mode: {access_mode}')
84
- elif volume_type == volume.VolumeType.RUNPOD_NETWORK_VOLUME.value:
113
+ elif volume_type == volume_utils.VolumeType.RUNPOD_NETWORK_VOLUME.value:
85
114
  if not cloud.is_same_cloud(clouds.RunPod()):
86
115
  raise fastapi.HTTPException(
87
116
  status_code=400,
88
117
  detail='Runpod network volume is only supported on Runpod')
89
- executor.schedule_request(
118
+ await executor.schedule_request_async(
90
119
  request_id=request.state.request_id,
91
- request_name='volume_apply',
120
+ request_name=request_names.RequestName.VOLUME_APPLY,
92
121
  request_body=volume_apply_body,
93
122
  func=core.volume_apply,
94
123
  schedule_type=requests_lib.ScheduleType.LONG,