skypilot-nightly 1.0.0.dev20250509__py3-none-any.whl → 1.0.0.dev20251107__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of skypilot-nightly might be problematic. Click here for more details.

Files changed (512) hide show
  1. sky/__init__.py +22 -6
  2. sky/adaptors/aws.py +25 -7
  3. sky/adaptors/common.py +24 -1
  4. sky/adaptors/coreweave.py +278 -0
  5. sky/adaptors/do.py +8 -2
  6. sky/adaptors/hyperbolic.py +8 -0
  7. sky/adaptors/kubernetes.py +149 -18
  8. sky/adaptors/nebius.py +170 -17
  9. sky/adaptors/primeintellect.py +1 -0
  10. sky/adaptors/runpod.py +68 -0
  11. sky/adaptors/seeweb.py +167 -0
  12. sky/adaptors/shadeform.py +89 -0
  13. sky/admin_policy.py +187 -4
  14. sky/authentication.py +179 -225
  15. sky/backends/__init__.py +4 -2
  16. sky/backends/backend.py +22 -9
  17. sky/backends/backend_utils.py +1299 -380
  18. sky/backends/cloud_vm_ray_backend.py +1715 -518
  19. sky/backends/docker_utils.py +1 -1
  20. sky/backends/local_docker_backend.py +11 -6
  21. sky/backends/wheel_utils.py +37 -9
  22. sky/{clouds/service_catalog → catalog}/__init__.py +21 -19
  23. sky/{clouds/service_catalog → catalog}/aws_catalog.py +27 -8
  24. sky/{clouds/service_catalog → catalog}/azure_catalog.py +10 -7
  25. sky/{clouds/service_catalog → catalog}/common.py +89 -48
  26. sky/{clouds/service_catalog → catalog}/cudo_catalog.py +8 -5
  27. sky/{clouds/service_catalog → catalog}/data_fetchers/analyze.py +1 -1
  28. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_aws.py +30 -40
  29. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_cudo.py +38 -38
  30. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_gcp.py +42 -15
  31. sky/catalog/data_fetchers/fetch_hyperbolic.py +136 -0
  32. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_lambda_cloud.py +1 -0
  33. sky/catalog/data_fetchers/fetch_nebius.py +335 -0
  34. sky/catalog/data_fetchers/fetch_runpod.py +698 -0
  35. sky/catalog/data_fetchers/fetch_seeweb.py +329 -0
  36. sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
  37. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vast.py +1 -1
  38. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vsphere.py +1 -1
  39. sky/{clouds/service_catalog → catalog}/do_catalog.py +5 -2
  40. sky/{clouds/service_catalog → catalog}/fluidstack_catalog.py +6 -3
  41. sky/{clouds/service_catalog → catalog}/gcp_catalog.py +41 -15
  42. sky/catalog/hyperbolic_catalog.py +136 -0
  43. sky/{clouds/service_catalog → catalog}/ibm_catalog.py +9 -6
  44. sky/{clouds/service_catalog → catalog}/kubernetes_catalog.py +36 -24
  45. sky/{clouds/service_catalog → catalog}/lambda_catalog.py +9 -6
  46. sky/{clouds/service_catalog → catalog}/nebius_catalog.py +9 -7
  47. sky/{clouds/service_catalog → catalog}/oci_catalog.py +9 -6
  48. sky/{clouds/service_catalog → catalog}/paperspace_catalog.py +5 -2
  49. sky/catalog/primeintellect_catalog.py +95 -0
  50. sky/{clouds/service_catalog → catalog}/runpod_catalog.py +11 -4
  51. sky/{clouds/service_catalog → catalog}/scp_catalog.py +9 -6
  52. sky/catalog/seeweb_catalog.py +184 -0
  53. sky/catalog/shadeform_catalog.py +165 -0
  54. sky/catalog/ssh_catalog.py +167 -0
  55. sky/{clouds/service_catalog → catalog}/vast_catalog.py +6 -3
  56. sky/{clouds/service_catalog → catalog}/vsphere_catalog.py +5 -2
  57. sky/check.py +491 -203
  58. sky/cli.py +5 -6005
  59. sky/client/{cli.py → cli/command.py} +2477 -1885
  60. sky/client/cli/deprecation_utils.py +99 -0
  61. sky/client/cli/flags.py +359 -0
  62. sky/client/cli/table_utils.py +320 -0
  63. sky/client/common.py +70 -32
  64. sky/client/oauth.py +82 -0
  65. sky/client/sdk.py +1203 -297
  66. sky/client/sdk_async.py +833 -0
  67. sky/client/service_account_auth.py +47 -0
  68. sky/cloud_stores.py +73 -0
  69. sky/clouds/__init__.py +13 -0
  70. sky/clouds/aws.py +358 -93
  71. sky/clouds/azure.py +105 -83
  72. sky/clouds/cloud.py +127 -36
  73. sky/clouds/cudo.py +68 -50
  74. sky/clouds/do.py +66 -48
  75. sky/clouds/fluidstack.py +63 -44
  76. sky/clouds/gcp.py +339 -110
  77. sky/clouds/hyperbolic.py +293 -0
  78. sky/clouds/ibm.py +70 -49
  79. sky/clouds/kubernetes.py +563 -162
  80. sky/clouds/lambda_cloud.py +74 -54
  81. sky/clouds/nebius.py +206 -80
  82. sky/clouds/oci.py +88 -66
  83. sky/clouds/paperspace.py +61 -44
  84. sky/clouds/primeintellect.py +317 -0
  85. sky/clouds/runpod.py +164 -74
  86. sky/clouds/scp.py +89 -83
  87. sky/clouds/seeweb.py +466 -0
  88. sky/clouds/shadeform.py +400 -0
  89. sky/clouds/ssh.py +263 -0
  90. sky/clouds/utils/aws_utils.py +10 -4
  91. sky/clouds/utils/gcp_utils.py +87 -11
  92. sky/clouds/utils/oci_utils.py +38 -14
  93. sky/clouds/utils/scp_utils.py +177 -124
  94. sky/clouds/vast.py +99 -77
  95. sky/clouds/vsphere.py +51 -40
  96. sky/core.py +349 -139
  97. sky/dag.py +15 -0
  98. sky/dashboard/out/404.html +1 -1
  99. sky/dashboard/out/_next/static/chunks/1141-e6aa9ab418717c59.js +11 -0
  100. sky/dashboard/out/_next/static/chunks/1272-1ef0bf0237faccdb.js +1 -0
  101. sky/dashboard/out/_next/static/chunks/1871-74503c8e80fd253b.js +6 -0
  102. sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +1 -0
  103. sky/dashboard/out/_next/static/chunks/2350.fab69e61bac57b23.js +1 -0
  104. sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +15 -0
  105. sky/dashboard/out/_next/static/chunks/2755.fff53c4a3fcae910.js +26 -0
  106. sky/dashboard/out/_next/static/chunks/3294.72362fa129305b19.js +1 -0
  107. sky/dashboard/out/_next/static/chunks/3785.ad6adaa2a0fa9768.js +1 -0
  108. sky/dashboard/out/_next/static/chunks/3850-ff4a9a69d978632b.js +1 -0
  109. sky/dashboard/out/_next/static/chunks/3937.210053269f121201.js +1 -0
  110. sky/dashboard/out/_next/static/chunks/4725.a830b5c9e7867c92.js +1 -0
  111. sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +15 -0
  112. sky/dashboard/out/_next/static/chunks/5739-d67458fcb1386c92.js +8 -0
  113. sky/dashboard/out/_next/static/chunks/6130-2be46d70a38f1e82.js +1 -0
  114. sky/dashboard/out/_next/static/chunks/616-3d59f75e2ccf9321.js +39 -0
  115. sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +13 -0
  116. sky/dashboard/out/_next/static/chunks/6601-06114c982db410b6.js +1 -0
  117. sky/dashboard/out/_next/static/chunks/6856-ef8ba11f96d8c4a3.js +1 -0
  118. sky/dashboard/out/_next/static/chunks/6989-01359c57e018caa4.js +1 -0
  119. sky/dashboard/out/_next/static/chunks/6990-32b6e2d3822301fa.js +1 -0
  120. sky/dashboard/out/_next/static/chunks/7359-c8d04e06886000b3.js +30 -0
  121. sky/dashboard/out/_next/static/chunks/7411-b15471acd2cba716.js +41 -0
  122. sky/dashboard/out/_next/static/chunks/7615-3301e838e5f25772.js +1 -0
  123. sky/dashboard/out/_next/static/chunks/8640.5b9475a2d18c5416.js +16 -0
  124. sky/dashboard/out/_next/static/chunks/8969-1e4613c651bf4051.js +1 -0
  125. sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +6 -0
  126. sky/dashboard/out/_next/static/chunks/9353-cff34f7e773b2e2b.js +1 -0
  127. sky/dashboard/out/_next/static/chunks/9360.7310982cf5a0dc79.js +31 -0
  128. sky/dashboard/out/_next/static/chunks/9847.3aaca6bb33455140.js +30 -0
  129. sky/dashboard/out/_next/static/chunks/fd9d1056-86323a29a8f7e46a.js +1 -0
  130. sky/dashboard/out/_next/static/chunks/framework-cf60a09ccd051a10.js +33 -0
  131. sky/dashboard/out/_next/static/chunks/main-app-587214043926b3cc.js +1 -0
  132. sky/dashboard/out/_next/static/chunks/main-f15ccb73239a3bf1.js +1 -0
  133. sky/dashboard/out/_next/static/chunks/pages/_app-bde01e4a2beec258.js +34 -0
  134. sky/dashboard/out/_next/static/chunks/pages/_error-c66a4e8afc46f17b.js +1 -0
  135. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-c736ead69c2d86ec.js +16 -0
  136. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-a37d2063af475a1c.js +1 -0
  137. sky/dashboard/out/_next/static/chunks/pages/clusters-d44859594e6f8064.js +1 -0
  138. sky/dashboard/out/_next/static/chunks/pages/config-dfb9bf07b13045f4.js +1 -0
  139. sky/dashboard/out/_next/static/chunks/pages/index-444f1804401f04ea.js +1 -0
  140. sky/dashboard/out/_next/static/chunks/pages/infra/[context]-c0b5935149902e6f.js +1 -0
  141. sky/dashboard/out/_next/static/chunks/pages/infra-aed0ea19df7cf961.js +1 -0
  142. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-5796e8d6aea291a0.js +16 -0
  143. sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-6edeb7d06032adfc.js +21 -0
  144. sky/dashboard/out/_next/static/chunks/pages/jobs-479dde13399cf270.js +1 -0
  145. sky/dashboard/out/_next/static/chunks/pages/users-5ab3b907622cf0fe.js +1 -0
  146. sky/dashboard/out/_next/static/chunks/pages/volumes-b84b948ff357c43e.js +1 -0
  147. sky/dashboard/out/_next/static/chunks/pages/workspace/new-3f88a1c7e86a3f86.js +1 -0
  148. sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-c5a3eeee1c218af1.js +1 -0
  149. sky/dashboard/out/_next/static/chunks/pages/workspaces-22b23febb3e89ce1.js +1 -0
  150. sky/dashboard/out/_next/static/chunks/webpack-2679be77fc08a2f8.js +1 -0
  151. sky/dashboard/out/_next/static/css/0748ce22df867032.css +3 -0
  152. sky/dashboard/out/_next/static/zB0ed6ge_W1MDszVHhijS/_buildManifest.js +1 -0
  153. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  154. sky/dashboard/out/clusters/[cluster].html +1 -1
  155. sky/dashboard/out/clusters.html +1 -1
  156. sky/dashboard/out/config.html +1 -0
  157. sky/dashboard/out/index.html +1 -1
  158. sky/dashboard/out/infra/[context].html +1 -0
  159. sky/dashboard/out/infra.html +1 -0
  160. sky/dashboard/out/jobs/[job].html +1 -1
  161. sky/dashboard/out/jobs/pools/[pool].html +1 -0
  162. sky/dashboard/out/jobs.html +1 -1
  163. sky/dashboard/out/users.html +1 -0
  164. sky/dashboard/out/volumes.html +1 -0
  165. sky/dashboard/out/workspace/new.html +1 -0
  166. sky/dashboard/out/workspaces/[name].html +1 -0
  167. sky/dashboard/out/workspaces.html +1 -0
  168. sky/data/data_utils.py +137 -1
  169. sky/data/mounting_utils.py +269 -84
  170. sky/data/storage.py +1451 -1807
  171. sky/data/storage_utils.py +43 -57
  172. sky/exceptions.py +132 -2
  173. sky/execution.py +206 -63
  174. sky/global_user_state.py +2374 -586
  175. sky/jobs/__init__.py +5 -0
  176. sky/jobs/client/sdk.py +242 -65
  177. sky/jobs/client/sdk_async.py +143 -0
  178. sky/jobs/constants.py +9 -8
  179. sky/jobs/controller.py +839 -277
  180. sky/jobs/file_content_utils.py +80 -0
  181. sky/jobs/log_gc.py +201 -0
  182. sky/jobs/recovery_strategy.py +398 -152
  183. sky/jobs/scheduler.py +315 -189
  184. sky/jobs/server/core.py +829 -255
  185. sky/jobs/server/server.py +156 -115
  186. sky/jobs/server/utils.py +136 -0
  187. sky/jobs/state.py +2092 -701
  188. sky/jobs/utils.py +1242 -160
  189. sky/logs/__init__.py +21 -0
  190. sky/logs/agent.py +108 -0
  191. sky/logs/aws.py +243 -0
  192. sky/logs/gcp.py +91 -0
  193. sky/metrics/__init__.py +0 -0
  194. sky/metrics/utils.py +443 -0
  195. sky/models.py +78 -1
  196. sky/optimizer.py +164 -70
  197. sky/provision/__init__.py +90 -4
  198. sky/provision/aws/config.py +147 -26
  199. sky/provision/aws/instance.py +135 -50
  200. sky/provision/azure/instance.py +10 -5
  201. sky/provision/common.py +13 -1
  202. sky/provision/cudo/cudo_machine_type.py +1 -1
  203. sky/provision/cudo/cudo_utils.py +14 -8
  204. sky/provision/cudo/cudo_wrapper.py +72 -71
  205. sky/provision/cudo/instance.py +10 -6
  206. sky/provision/do/instance.py +10 -6
  207. sky/provision/do/utils.py +4 -3
  208. sky/provision/docker_utils.py +114 -23
  209. sky/provision/fluidstack/instance.py +13 -8
  210. sky/provision/gcp/__init__.py +1 -0
  211. sky/provision/gcp/config.py +301 -19
  212. sky/provision/gcp/constants.py +218 -0
  213. sky/provision/gcp/instance.py +36 -8
  214. sky/provision/gcp/instance_utils.py +18 -4
  215. sky/provision/gcp/volume_utils.py +247 -0
  216. sky/provision/hyperbolic/__init__.py +12 -0
  217. sky/provision/hyperbolic/config.py +10 -0
  218. sky/provision/hyperbolic/instance.py +437 -0
  219. sky/provision/hyperbolic/utils.py +373 -0
  220. sky/provision/instance_setup.py +93 -14
  221. sky/provision/kubernetes/__init__.py +5 -0
  222. sky/provision/kubernetes/config.py +9 -52
  223. sky/provision/kubernetes/constants.py +17 -0
  224. sky/provision/kubernetes/instance.py +789 -247
  225. sky/provision/kubernetes/manifests/fusermount-server-daemonset.yaml +1 -2
  226. sky/provision/kubernetes/network.py +27 -17
  227. sky/provision/kubernetes/network_utils.py +40 -43
  228. sky/provision/kubernetes/utils.py +1192 -531
  229. sky/provision/kubernetes/volume.py +282 -0
  230. sky/provision/lambda_cloud/instance.py +22 -16
  231. sky/provision/nebius/constants.py +50 -0
  232. sky/provision/nebius/instance.py +19 -6
  233. sky/provision/nebius/utils.py +196 -91
  234. sky/provision/oci/instance.py +10 -5
  235. sky/provision/paperspace/instance.py +10 -7
  236. sky/provision/paperspace/utils.py +1 -1
  237. sky/provision/primeintellect/__init__.py +10 -0
  238. sky/provision/primeintellect/config.py +11 -0
  239. sky/provision/primeintellect/instance.py +454 -0
  240. sky/provision/primeintellect/utils.py +398 -0
  241. sky/provision/provisioner.py +110 -36
  242. sky/provision/runpod/__init__.py +5 -0
  243. sky/provision/runpod/instance.py +27 -6
  244. sky/provision/runpod/utils.py +51 -18
  245. sky/provision/runpod/volume.py +180 -0
  246. sky/provision/scp/__init__.py +15 -0
  247. sky/provision/scp/config.py +93 -0
  248. sky/provision/scp/instance.py +531 -0
  249. sky/provision/seeweb/__init__.py +11 -0
  250. sky/provision/seeweb/config.py +13 -0
  251. sky/provision/seeweb/instance.py +807 -0
  252. sky/provision/shadeform/__init__.py +11 -0
  253. sky/provision/shadeform/config.py +12 -0
  254. sky/provision/shadeform/instance.py +351 -0
  255. sky/provision/shadeform/shadeform_utils.py +83 -0
  256. sky/provision/ssh/__init__.py +18 -0
  257. sky/provision/vast/instance.py +13 -8
  258. sky/provision/vast/utils.py +10 -7
  259. sky/provision/vsphere/common/vim_utils.py +1 -2
  260. sky/provision/vsphere/instance.py +15 -10
  261. sky/provision/vsphere/vsphere_utils.py +9 -19
  262. sky/py.typed +0 -0
  263. sky/resources.py +844 -118
  264. sky/schemas/__init__.py +0 -0
  265. sky/schemas/api/__init__.py +0 -0
  266. sky/schemas/api/responses.py +225 -0
  267. sky/schemas/db/README +4 -0
  268. sky/schemas/db/env.py +90 -0
  269. sky/schemas/db/global_user_state/001_initial_schema.py +124 -0
  270. sky/schemas/db/global_user_state/002_add_workspace_to_cluster_history.py +35 -0
  271. sky/schemas/db/global_user_state/003_fix_initial_revision.py +61 -0
  272. sky/schemas/db/global_user_state/004_is_managed.py +34 -0
  273. sky/schemas/db/global_user_state/005_cluster_event.py +32 -0
  274. sky/schemas/db/global_user_state/006_provision_log.py +41 -0
  275. sky/schemas/db/global_user_state/007_cluster_event_request_id.py +34 -0
  276. sky/schemas/db/global_user_state/008_skylet_ssh_tunnel_metadata.py +34 -0
  277. sky/schemas/db/global_user_state/009_last_activity_and_launched_at.py +89 -0
  278. sky/schemas/db/global_user_state/010_save_ssh_key.py +66 -0
  279. sky/schemas/db/script.py.mako +28 -0
  280. sky/schemas/db/serve_state/001_initial_schema.py +67 -0
  281. sky/schemas/db/skypilot_config/001_initial_schema.py +30 -0
  282. sky/schemas/db/spot_jobs/001_initial_schema.py +97 -0
  283. sky/schemas/db/spot_jobs/002_cluster_pool.py +42 -0
  284. sky/schemas/db/spot_jobs/003_pool_hash.py +34 -0
  285. sky/schemas/db/spot_jobs/004_job_file_contents.py +42 -0
  286. sky/schemas/db/spot_jobs/005_logs_gc.py +38 -0
  287. sky/schemas/generated/__init__.py +0 -0
  288. sky/schemas/generated/autostopv1_pb2.py +36 -0
  289. sky/schemas/generated/autostopv1_pb2.pyi +43 -0
  290. sky/schemas/generated/autostopv1_pb2_grpc.py +146 -0
  291. sky/schemas/generated/jobsv1_pb2.py +86 -0
  292. sky/schemas/generated/jobsv1_pb2.pyi +254 -0
  293. sky/schemas/generated/jobsv1_pb2_grpc.py +542 -0
  294. sky/schemas/generated/managed_jobsv1_pb2.py +74 -0
  295. sky/schemas/generated/managed_jobsv1_pb2.pyi +278 -0
  296. sky/schemas/generated/managed_jobsv1_pb2_grpc.py +278 -0
  297. sky/schemas/generated/servev1_pb2.py +58 -0
  298. sky/schemas/generated/servev1_pb2.pyi +115 -0
  299. sky/schemas/generated/servev1_pb2_grpc.py +322 -0
  300. sky/serve/autoscalers.py +357 -5
  301. sky/serve/client/impl.py +310 -0
  302. sky/serve/client/sdk.py +47 -139
  303. sky/serve/client/sdk_async.py +130 -0
  304. sky/serve/constants.py +10 -8
  305. sky/serve/controller.py +64 -19
  306. sky/serve/load_balancer.py +106 -60
  307. sky/serve/load_balancing_policies.py +115 -1
  308. sky/serve/replica_managers.py +273 -162
  309. sky/serve/serve_rpc_utils.py +179 -0
  310. sky/serve/serve_state.py +554 -251
  311. sky/serve/serve_utils.py +733 -220
  312. sky/serve/server/core.py +66 -711
  313. sky/serve/server/impl.py +1093 -0
  314. sky/serve/server/server.py +21 -18
  315. sky/serve/service.py +133 -48
  316. sky/serve/service_spec.py +135 -16
  317. sky/serve/spot_placer.py +3 -0
  318. sky/server/auth/__init__.py +0 -0
  319. sky/server/auth/authn.py +50 -0
  320. sky/server/auth/loopback.py +38 -0
  321. sky/server/auth/oauth2_proxy.py +200 -0
  322. sky/server/common.py +475 -181
  323. sky/server/config.py +81 -23
  324. sky/server/constants.py +44 -6
  325. sky/server/daemons.py +229 -0
  326. sky/server/html/token_page.html +185 -0
  327. sky/server/metrics.py +160 -0
  328. sky/server/requests/executor.py +528 -138
  329. sky/server/requests/payloads.py +351 -17
  330. sky/server/requests/preconditions.py +21 -17
  331. sky/server/requests/process.py +112 -29
  332. sky/server/requests/request_names.py +120 -0
  333. sky/server/requests/requests.py +817 -224
  334. sky/server/requests/serializers/decoders.py +82 -31
  335. sky/server/requests/serializers/encoders.py +140 -22
  336. sky/server/requests/threads.py +106 -0
  337. sky/server/rest.py +417 -0
  338. sky/server/server.py +1290 -284
  339. sky/server/state.py +20 -0
  340. sky/server/stream_utils.py +345 -57
  341. sky/server/uvicorn.py +217 -3
  342. sky/server/versions.py +270 -0
  343. sky/setup_files/MANIFEST.in +5 -0
  344. sky/setup_files/alembic.ini +156 -0
  345. sky/setup_files/dependencies.py +136 -31
  346. sky/setup_files/setup.py +44 -42
  347. sky/sky_logging.py +102 -5
  348. sky/skylet/attempt_skylet.py +1 -0
  349. sky/skylet/autostop_lib.py +129 -8
  350. sky/skylet/configs.py +27 -20
  351. sky/skylet/constants.py +171 -19
  352. sky/skylet/events.py +105 -21
  353. sky/skylet/job_lib.py +335 -104
  354. sky/skylet/log_lib.py +297 -18
  355. sky/skylet/log_lib.pyi +44 -1
  356. sky/skylet/ray_patches/__init__.py +17 -3
  357. sky/skylet/ray_patches/autoscaler.py.diff +18 -0
  358. sky/skylet/ray_patches/cli.py.diff +19 -0
  359. sky/skylet/ray_patches/command_runner.py.diff +17 -0
  360. sky/skylet/ray_patches/log_monitor.py.diff +20 -0
  361. sky/skylet/ray_patches/resource_demand_scheduler.py.diff +32 -0
  362. sky/skylet/ray_patches/updater.py.diff +18 -0
  363. sky/skylet/ray_patches/worker.py.diff +41 -0
  364. sky/skylet/services.py +564 -0
  365. sky/skylet/skylet.py +63 -4
  366. sky/skylet/subprocess_daemon.py +103 -29
  367. sky/skypilot_config.py +506 -99
  368. sky/ssh_node_pools/__init__.py +1 -0
  369. sky/ssh_node_pools/core.py +135 -0
  370. sky/ssh_node_pools/server.py +233 -0
  371. sky/task.py +621 -137
  372. sky/templates/aws-ray.yml.j2 +10 -3
  373. sky/templates/azure-ray.yml.j2 +1 -1
  374. sky/templates/do-ray.yml.j2 +1 -1
  375. sky/templates/gcp-ray.yml.j2 +57 -0
  376. sky/templates/hyperbolic-ray.yml.j2 +67 -0
  377. sky/templates/jobs-controller.yaml.j2 +27 -24
  378. sky/templates/kubernetes-loadbalancer.yml.j2 +2 -0
  379. sky/templates/kubernetes-ray.yml.j2 +607 -51
  380. sky/templates/lambda-ray.yml.j2 +1 -1
  381. sky/templates/nebius-ray.yml.j2 +33 -12
  382. sky/templates/paperspace-ray.yml.j2 +1 -1
  383. sky/templates/primeintellect-ray.yml.j2 +71 -0
  384. sky/templates/runpod-ray.yml.j2 +9 -1
  385. sky/templates/scp-ray.yml.j2 +3 -50
  386. sky/templates/seeweb-ray.yml.j2 +108 -0
  387. sky/templates/shadeform-ray.yml.j2 +72 -0
  388. sky/templates/sky-serve-controller.yaml.j2 +22 -2
  389. sky/templates/websocket_proxy.py +178 -18
  390. sky/usage/usage_lib.py +18 -11
  391. sky/users/__init__.py +0 -0
  392. sky/users/model.conf +15 -0
  393. sky/users/permission.py +387 -0
  394. sky/users/rbac.py +121 -0
  395. sky/users/server.py +720 -0
  396. sky/users/token_service.py +218 -0
  397. sky/utils/accelerator_registry.py +34 -5
  398. sky/utils/admin_policy_utils.py +84 -38
  399. sky/utils/annotations.py +16 -5
  400. sky/utils/asyncio_utils.py +78 -0
  401. sky/utils/auth_utils.py +153 -0
  402. sky/utils/benchmark_utils.py +60 -0
  403. sky/utils/cli_utils/status_utils.py +159 -86
  404. sky/utils/cluster_utils.py +31 -9
  405. sky/utils/command_runner.py +354 -68
  406. sky/utils/command_runner.pyi +93 -3
  407. sky/utils/common.py +35 -8
  408. sky/utils/common_utils.py +310 -87
  409. sky/utils/config_utils.py +87 -5
  410. sky/utils/context.py +402 -0
  411. sky/utils/context_utils.py +222 -0
  412. sky/utils/controller_utils.py +264 -89
  413. sky/utils/dag_utils.py +31 -12
  414. sky/utils/db/__init__.py +0 -0
  415. sky/utils/db/db_utils.py +470 -0
  416. sky/utils/db/migration_utils.py +133 -0
  417. sky/utils/directory_utils.py +12 -0
  418. sky/utils/env_options.py +13 -0
  419. sky/utils/git.py +567 -0
  420. sky/utils/git_clone.sh +460 -0
  421. sky/utils/infra_utils.py +195 -0
  422. sky/utils/kubernetes/cleanup-tunnel.sh +62 -0
  423. sky/utils/kubernetes/config_map_utils.py +133 -0
  424. sky/utils/kubernetes/create_cluster.sh +13 -27
  425. sky/utils/kubernetes/delete_cluster.sh +10 -7
  426. sky/utils/kubernetes/deploy_remote_cluster.py +1299 -0
  427. sky/utils/kubernetes/exec_kubeconfig_converter.py +22 -31
  428. sky/utils/kubernetes/generate_kind_config.py +6 -66
  429. sky/utils/kubernetes/generate_kubeconfig.sh +4 -1
  430. sky/utils/kubernetes/gpu_labeler.py +5 -5
  431. sky/utils/kubernetes/kubernetes_deploy_utils.py +354 -47
  432. sky/utils/kubernetes/ssh-tunnel.sh +379 -0
  433. sky/utils/kubernetes/ssh_utils.py +221 -0
  434. sky/utils/kubernetes_enums.py +8 -15
  435. sky/utils/lock_events.py +94 -0
  436. sky/utils/locks.py +368 -0
  437. sky/utils/log_utils.py +300 -6
  438. sky/utils/perf_utils.py +22 -0
  439. sky/utils/resource_checker.py +298 -0
  440. sky/utils/resources_utils.py +249 -32
  441. sky/utils/rich_utils.py +213 -37
  442. sky/utils/schemas.py +905 -147
  443. sky/utils/serialize_utils.py +16 -0
  444. sky/utils/status_lib.py +10 -0
  445. sky/utils/subprocess_utils.py +38 -15
  446. sky/utils/tempstore.py +70 -0
  447. sky/utils/timeline.py +24 -52
  448. sky/utils/ux_utils.py +84 -15
  449. sky/utils/validator.py +11 -1
  450. sky/utils/volume.py +86 -0
  451. sky/utils/yaml_utils.py +111 -0
  452. sky/volumes/__init__.py +13 -0
  453. sky/volumes/client/__init__.py +0 -0
  454. sky/volumes/client/sdk.py +149 -0
  455. sky/volumes/server/__init__.py +0 -0
  456. sky/volumes/server/core.py +258 -0
  457. sky/volumes/server/server.py +122 -0
  458. sky/volumes/volume.py +212 -0
  459. sky/workspaces/__init__.py +0 -0
  460. sky/workspaces/core.py +655 -0
  461. sky/workspaces/server.py +101 -0
  462. sky/workspaces/utils.py +56 -0
  463. skypilot_nightly-1.0.0.dev20251107.dist-info/METADATA +675 -0
  464. skypilot_nightly-1.0.0.dev20251107.dist-info/RECORD +594 -0
  465. {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/WHEEL +1 -1
  466. sky/benchmark/benchmark_state.py +0 -256
  467. sky/benchmark/benchmark_utils.py +0 -641
  468. sky/clouds/service_catalog/constants.py +0 -7
  469. sky/dashboard/out/_next/static/LksQgChY5izXjokL3LcEu/_buildManifest.js +0 -1
  470. sky/dashboard/out/_next/static/chunks/236-f49500b82ad5392d.js +0 -6
  471. sky/dashboard/out/_next/static/chunks/312-c3c8845990db8ffc.js +0 -15
  472. sky/dashboard/out/_next/static/chunks/37-0a572fe0dbb89c4d.js +0 -6
  473. sky/dashboard/out/_next/static/chunks/678-206dddca808e6d16.js +0 -59
  474. sky/dashboard/out/_next/static/chunks/845-0f8017370869e269.js +0 -1
  475. sky/dashboard/out/_next/static/chunks/979-7bf73a4c7cea0f5c.js +0 -1
  476. sky/dashboard/out/_next/static/chunks/fd9d1056-2821b0f0cabcd8bd.js +0 -1
  477. sky/dashboard/out/_next/static/chunks/framework-87d061ee6ed71b28.js +0 -33
  478. sky/dashboard/out/_next/static/chunks/main-app-241eb28595532291.js +0 -1
  479. sky/dashboard/out/_next/static/chunks/main-e0e2335212e72357.js +0 -1
  480. sky/dashboard/out/_next/static/chunks/pages/_app-e6b013bc3f77ad60.js +0 -1
  481. sky/dashboard/out/_next/static/chunks/pages/_error-1be831200e60c5c0.js +0 -1
  482. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-e15db85d0ea1fbe1.js +0 -1
  483. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-f383db7389368ea7.js +0 -1
  484. sky/dashboard/out/_next/static/chunks/pages/clusters-a93b93e10b8b074e.js +0 -1
  485. sky/dashboard/out/_next/static/chunks/pages/index-f9f039532ca8cbc4.js +0 -1
  486. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-03f279c6741fb48b.js +0 -1
  487. sky/dashboard/out/_next/static/chunks/pages/jobs-a75029b67aab6a2e.js +0 -1
  488. sky/dashboard/out/_next/static/chunks/webpack-830f59b8404e96b8.js +0 -1
  489. sky/dashboard/out/_next/static/css/c6933bbb2ce7f4dd.css +0 -3
  490. sky/jobs/dashboard/dashboard.py +0 -223
  491. sky/jobs/dashboard/static/favicon.ico +0 -0
  492. sky/jobs/dashboard/templates/index.html +0 -831
  493. sky/jobs/server/dashboard_utils.py +0 -69
  494. sky/skylet/providers/scp/__init__.py +0 -2
  495. sky/skylet/providers/scp/config.py +0 -149
  496. sky/skylet/providers/scp/node_provider.py +0 -578
  497. sky/templates/kubernetes-ssh-jump.yml.j2 +0 -94
  498. sky/utils/db_utils.py +0 -100
  499. sky/utils/kubernetes/deploy_remote_cluster.sh +0 -308
  500. sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +0 -191
  501. skypilot_nightly-1.0.0.dev20250509.dist-info/METADATA +0 -361
  502. skypilot_nightly-1.0.0.dev20250509.dist-info/RECORD +0 -396
  503. /sky/{clouds/service_catalog → catalog}/config.py +0 -0
  504. /sky/{benchmark → catalog/data_fetchers}/__init__.py +0 -0
  505. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_azure.py +0 -0
  506. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_fluidstack.py +0 -0
  507. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_ibm.py +0 -0
  508. /sky/{clouds/service_catalog/data_fetchers → client/cli}/__init__.py +0 -0
  509. /sky/dashboard/out/_next/static/{LksQgChY5izXjokL3LcEu → zB0ed6ge_W1MDszVHhijS}/_ssgManifest.js +0 -0
  510. {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/entry_points.txt +0 -0
  511. {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/licenses/LICENSE +0 -0
  512. {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,111 @@
1
+ """YAML utilities."""
2
+ import io
3
+ from typing import Any, Dict, List, Optional, TYPE_CHECKING, Union
4
+
5
+ from sky.adaptors import common
6
+
7
+ if TYPE_CHECKING:
8
+ import yaml
9
+ else:
10
+ yaml = common.LazyImport('yaml')
11
+
12
+ _c_extension_unavailable = False
13
+
14
+
15
+ def safe_load(stream) -> Any:
16
+ global _c_extension_unavailable
17
+ if _c_extension_unavailable:
18
+ return yaml.load(stream, Loader=yaml.SafeLoader)
19
+
20
+ try:
21
+ return yaml.load(stream, Loader=yaml.CSafeLoader)
22
+ except AttributeError:
23
+ _c_extension_unavailable = True
24
+ return yaml.load(stream, Loader=yaml.SafeLoader)
25
+
26
+
27
+ def safe_load_all(stream) -> Any:
28
+ global _c_extension_unavailable
29
+ if _c_extension_unavailable:
30
+ return yaml.load_all(stream, Loader=yaml.SafeLoader)
31
+
32
+ try:
33
+ return yaml.load_all(stream, Loader=yaml.CSafeLoader)
34
+ except AttributeError:
35
+ _c_extension_unavailable = True
36
+ return yaml.load_all(stream, Loader=yaml.SafeLoader)
37
+
38
+
39
+ def read_yaml(path: Optional[str]) -> Dict[str, Any]:
40
+ if path is None:
41
+ raise ValueError('Attempted to read a None YAML.')
42
+ with open(path, 'r', encoding='utf-8') as f:
43
+ config = safe_load(f)
44
+ return config
45
+
46
+
47
+ def read_yaml_str(yaml_str: str) -> Dict[str, Any]:
48
+ stream = io.StringIO(yaml_str)
49
+ parsed_yaml = safe_load(stream)
50
+ if not parsed_yaml:
51
+ # Empty dict
52
+ return {}
53
+ return parsed_yaml
54
+
55
+
56
+ def read_yaml_all_str(yaml_str: str) -> List[Dict[str, Any]]:
57
+ stream = io.StringIO(yaml_str)
58
+ config = safe_load_all(stream)
59
+ configs = list(config)
60
+ if not configs:
61
+ # Empty YAML file.
62
+ return [{}]
63
+ return configs
64
+
65
+
66
+ def read_yaml_all(path: str) -> List[Dict[str, Any]]:
67
+ with open(path, 'r', encoding='utf-8') as f:
68
+ return read_yaml_all_str(f.read())
69
+
70
+
71
+ def dump_yaml(path: str,
72
+ config: Union[List[Dict[str, Any]], Dict[str, Any]],
73
+ blank: bool = False) -> None:
74
+ """Dumps a YAML file.
75
+
76
+ Args:
77
+ path: the path to the YAML file.
78
+ config: the configuration to dump.
79
+ """
80
+ with open(path, 'w', encoding='utf-8') as f:
81
+ contents = dump_yaml_str(config)
82
+ if blank and isinstance(config, dict) and len(config) == 0:
83
+ # when dumping to yaml, an empty dict will go in as {}.
84
+ contents = ''
85
+ f.write(contents)
86
+
87
+
88
+ def dump_yaml_str(config: Union[List[Dict[str, Any]], Dict[str, Any]]) -> str:
89
+ """Dumps a YAML string.
90
+ Args:
91
+ config: the configuration to dump.
92
+ Returns:
93
+ The YAML string.
94
+ """
95
+
96
+ # https://github.com/yaml/pyyaml/issues/127
97
+ class LineBreakDumper(yaml.SafeDumper):
98
+
99
+ def write_line_break(self, data=None):
100
+ super().write_line_break(data)
101
+ if len(self.indents) == 1:
102
+ super().write_line_break()
103
+
104
+ if isinstance(config, list):
105
+ dump_func = yaml.dump_all # type: ignore
106
+ else:
107
+ dump_func = yaml.dump # type: ignore
108
+ return dump_func(config,
109
+ Dumper=LineBreakDumper,
110
+ sort_keys=False,
111
+ default_flow_style=False)
@@ -0,0 +1,13 @@
1
+ """Volumes."""
2
+
3
+ from sky.volumes.client.sdk import apply
4
+ from sky.volumes.client.sdk import delete
5
+ from sky.volumes.client.sdk import ls
6
+ from sky.volumes.volume import Volume
7
+
8
+ __all__ = [
9
+ 'apply',
10
+ 'delete',
11
+ 'ls',
12
+ 'Volume',
13
+ ]
File without changes
@@ -0,0 +1,149 @@
1
+ """SDK functions for managed jobs."""
2
+ import json
3
+ import typing
4
+ from typing import List
5
+
6
+ from sky import exceptions
7
+ from sky import sky_logging
8
+ from sky.adaptors import common as adaptors_common
9
+ from sky.schemas.api import responses
10
+ from sky.server import common as server_common
11
+ from sky.server import versions
12
+ from sky.server.requests import payloads
13
+ from sky.usage import usage_lib
14
+ from sky.utils import annotations
15
+ from sky.utils import context
16
+ from sky.utils import ux_utils
17
+ from sky.volumes import volume as volume_lib
18
+
19
+ if typing.TYPE_CHECKING:
20
+ import requests
21
+ else:
22
+ requests = adaptors_common.LazyImport('requests')
23
+
24
+ logger = sky_logging.init_logger(__name__)
25
+
26
+
27
+ @context.contextual
28
+ @usage_lib.entrypoint
29
+ @server_common.check_server_healthy_or_start
30
+ @annotations.client_api
31
+ def apply(volume: volume_lib.Volume) -> server_common.RequestId[None]:
32
+ """Creates or registers a volume.
33
+
34
+ Example:
35
+ .. code-block:: python
36
+
37
+ import sky.volumes
38
+ cfg = {
39
+ 'name': 'pvc',
40
+ 'type': 'k8s-pvc',
41
+ 'size': '100GB',
42
+ 'labels': {
43
+ 'key': 'value',
44
+ },
45
+ }
46
+ vol = sky.volumes.Volume.from_yaml_config(cfg)
47
+ request_id = sky.volumes.apply(vol)
48
+ sky.get(request_id)
49
+
50
+ or
51
+
52
+ import sky.volumes
53
+ vol = sky.volumes.Volume(
54
+ name='vol',
55
+ type='runpod-network-volume',
56
+ infra='runpod/ca/CA-MTL-1',
57
+ size='100GB',
58
+ )
59
+ request_id = sky.volumes.apply(vol)
60
+ sky.get(request_id)
61
+
62
+ Args:
63
+ volume: The volume to apply.
64
+
65
+ Returns:
66
+ The request ID of the apply request.
67
+ """
68
+ body = payloads.VolumeApplyBody(
69
+ name=volume.name,
70
+ volume_type=volume.type,
71
+ cloud=volume.cloud,
72
+ region=volume.region,
73
+ zone=volume.zone,
74
+ size=volume.size,
75
+ config=volume.config,
76
+ labels=volume.labels,
77
+ )
78
+ response = server_common.make_authenticated_request(
79
+ 'POST', '/volumes/apply', json=json.loads(body.model_dump_json()))
80
+ return server_common.get_request_id(response)
81
+
82
+
83
+ @context.contextual
84
+ @usage_lib.entrypoint
85
+ @server_common.check_server_healthy_or_start
86
+ @annotations.client_api
87
+ @versions.minimal_api_version(20)
88
+ def validate(volume: volume_lib.Volume) -> None:
89
+ """Validates the volume.
90
+
91
+ All validation is done on the server side.
92
+
93
+ Args:
94
+ volume: The volume to validate.
95
+
96
+ Raises:
97
+ ValueError: If the volume is invalid.
98
+ """
99
+ body = payloads.VolumeValidateBody(
100
+ name=volume.name,
101
+ volume_type=volume.type,
102
+ infra=volume.infra,
103
+ resource_name=volume.resource_name,
104
+ size=volume.size,
105
+ config=volume.config,
106
+ labels=volume.labels,
107
+ )
108
+ response = server_common.make_authenticated_request(
109
+ 'POST', '/volumes/validate', json=json.loads(body.model_dump_json()))
110
+ if response.status_code == 400:
111
+ with ux_utils.print_exception_no_traceback():
112
+ raise exceptions.deserialize_exception(
113
+ response.json().get('detail'))
114
+
115
+
116
+ @context.contextual
117
+ @usage_lib.entrypoint
118
+ @server_common.check_server_healthy_or_start
119
+ @annotations.client_api
120
+ def ls() -> server_common.RequestId[List[responses.VolumeRecord]]:
121
+ """Lists all volumes.
122
+
123
+ Returns:
124
+ The request ID of the list request.
125
+ """
126
+ response = server_common.make_authenticated_request(
127
+ 'GET',
128
+ '/volumes',
129
+ )
130
+ return server_common.get_request_id(response)
131
+
132
+
133
+ @context.contextual
134
+ @usage_lib.entrypoint
135
+ @server_common.check_server_healthy_or_start
136
+ @annotations.client_api
137
+ def delete(names: List[str]) -> server_common.RequestId[None]:
138
+ """Deletes volumes.
139
+
140
+ Args:
141
+ names: List of volume names to delete.
142
+
143
+ Returns:
144
+ The request ID of the delete request.
145
+ """
146
+ body = payloads.VolumeDeleteBody(names=names)
147
+ response = server_common.make_authenticated_request(
148
+ 'POST', '/volumes/delete', json=json.loads(body.model_dump_json()))
149
+ return server_common.get_request_id(response)
File without changes
@@ -0,0 +1,258 @@
1
+ """Volume management core."""
2
+
3
+ import contextlib
4
+ import os
5
+ from typing import Any, Dict, Generator, List, Optional
6
+ import uuid
7
+
8
+ import filelock
9
+
10
+ from sky import global_user_state
11
+ from sky import models
12
+ from sky import provision
13
+ from sky import sky_logging
14
+ from sky.schemas.api import responses
15
+ from sky.utils import common_utils
16
+ from sky.utils import registry
17
+ from sky.utils import rich_utils
18
+ from sky.utils import status_lib
19
+ from sky.utils import ux_utils
20
+
21
+ logger = sky_logging.init_logger(__name__)
22
+
23
+ # Filelocks for the storage management.
24
+ VOLUME_LOCK_PATH = os.path.expanduser('~/.sky/.{volume_name}.lock')
25
+ VOLUME_LOCK_TIMEOUT_SECONDS = 20
26
+
27
+
28
+ def volume_refresh():
29
+ """Refreshes the volume status."""
30
+ volumes = global_user_state.get_volumes()
31
+ for volume in volumes:
32
+ volume_name = volume.get('name')
33
+ config = volume.get('handle')
34
+ if config is None:
35
+ logger.warning(f'Volume {volume_name} has no handle.'
36
+ 'Skipping status refresh...')
37
+ continue
38
+ cloud = config.cloud
39
+ usedby_pods, _ = provision.get_volume_usedby(cloud, config)
40
+ with _volume_lock(volume_name):
41
+ latest_volume = global_user_state.get_volume_by_name(volume_name)
42
+ if latest_volume is None:
43
+ logger.warning(f'Volume {volume_name} not found.')
44
+ continue
45
+ status = latest_volume.get('status')
46
+ if not usedby_pods:
47
+ if status != status_lib.VolumeStatus.READY:
48
+ logger.info(f'Update volume {volume_name} '
49
+ f'status to READY')
50
+ global_user_state.update_volume_status(
51
+ volume_name, status=status_lib.VolumeStatus.READY)
52
+ else:
53
+ if status != status_lib.VolumeStatus.IN_USE:
54
+ logger.info(f'Update volume {volume_name} '
55
+ f'status to IN_USE, usedby: {usedby_pods}')
56
+ global_user_state.update_volume_status(
57
+ volume_name, status=status_lib.VolumeStatus.IN_USE)
58
+
59
+
60
+ def volume_list() -> List[responses.VolumeRecord]:
61
+ """Gets the volumes.
62
+
63
+ Returns:
64
+ [
65
+ {
66
+ 'name': str,
67
+ 'type': str,
68
+ 'launched_at': int timestamp of creation,
69
+ 'cloud': str,
70
+ 'region': str,
71
+ 'zone': str,
72
+ 'size': str,
73
+ 'config': Dict[str, Any],
74
+ 'name_on_cloud': str,
75
+ 'user_hash': str,
76
+ 'workspace': str,
77
+ 'last_attached_at': int timestamp of last attachment,
78
+ 'last_use': last command,
79
+ 'status': sky.VolumeStatus,
80
+ 'usedby_pods': List[str],
81
+ 'usedby_clusters': List[str],
82
+ }
83
+ ]
84
+ """
85
+ with rich_utils.safe_status(ux_utils.spinner_message('Listing volumes')):
86
+ volumes = global_user_state.get_volumes()
87
+ cloud_to_configs: Dict[str, List[models.VolumeConfig]] = {}
88
+ for volume in volumes:
89
+ config = volume.get('handle')
90
+ if config is None:
91
+ volume_name = volume.get('name')
92
+ logger.warning(f'Volume {volume_name} has no handle.')
93
+ continue
94
+ cloud = config.cloud
95
+ if cloud not in cloud_to_configs:
96
+ cloud_to_configs[cloud] = []
97
+ cloud_to_configs[cloud].append(config)
98
+
99
+ cloud_to_used_by_pods, cloud_to_used_by_clusters = {}, {}
100
+ for cloud, configs in cloud_to_configs.items():
101
+ used_by_pods, used_by_clusters = provision.get_all_volumes_usedby(
102
+ cloud, configs)
103
+ cloud_to_used_by_pods[cloud] = used_by_pods
104
+ cloud_to_used_by_clusters[cloud] = used_by_clusters
105
+
106
+ all_users = global_user_state.get_all_users()
107
+ user_map = {user.id: user.name for user in all_users}
108
+ records = []
109
+ for volume in volumes:
110
+ volume_name = volume.get('name')
111
+ record = {
112
+ 'name': volume_name,
113
+ 'launched_at': volume.get('launched_at'),
114
+ 'user_hash': volume.get('user_hash'),
115
+ 'user_name': user_map.get(volume.get('user_hash'), ''),
116
+ 'workspace': volume.get('workspace'),
117
+ 'last_attached_at': volume.get('last_attached_at'),
118
+ 'last_use': volume.get('last_use'),
119
+ 'usedby_pods': [],
120
+ 'usedby_clusters': [],
121
+ }
122
+ status = volume.get('status')
123
+ if status is not None:
124
+ record['status'] = status.value
125
+ else:
126
+ record['status'] = ''
127
+ config = volume.get('handle')
128
+ if config is None:
129
+ logger.warning(f'Volume {volume_name} has no handle.')
130
+ continue
131
+ cloud = config.cloud
132
+ usedby_pods, usedby_clusters = provision.map_all_volumes_usedby(
133
+ cloud,
134
+ cloud_to_used_by_pods[cloud],
135
+ cloud_to_used_by_clusters[cloud],
136
+ config,
137
+ )
138
+ record['type'] = config.type
139
+ record['cloud'] = config.cloud
140
+ record['region'] = config.region
141
+ record['zone'] = config.zone
142
+ record['size'] = config.size
143
+ record['config'] = config.config
144
+ record['name_on_cloud'] = config.name_on_cloud
145
+ record['usedby_pods'] = usedby_pods
146
+ record['usedby_clusters'] = usedby_clusters
147
+ records.append(responses.VolumeRecord(**record))
148
+ return records
149
+
150
+
151
+ def volume_delete(names: List[str]) -> None:
152
+ """Deletes volumes.
153
+
154
+ Args:
155
+ names: List of volume names to delete.
156
+
157
+ Raises:
158
+ ValueError: If the volume does not exist
159
+ or is in use or has no handle.
160
+ """
161
+ with rich_utils.safe_status(ux_utils.spinner_message('Deleting volumes')):
162
+ for name in names:
163
+ volume = global_user_state.get_volume_by_name(name)
164
+ if volume is None:
165
+ raise ValueError(f'Volume {name} not found.')
166
+ config = volume.get('handle')
167
+ if config is None:
168
+ raise ValueError(f'Volume {name} has no handle.')
169
+ cloud = config.cloud
170
+ usedby_pods, usedby_clusters = provision.get_volume_usedby(
171
+ cloud, config)
172
+ if usedby_clusters:
173
+ usedby_clusters_str = ', '.join(usedby_clusters)
174
+ cluster_str = 'clusters' if len(
175
+ usedby_clusters) > 1 else 'cluster'
176
+ raise ValueError(f'Volume {name} is used by {cluster_str}'
177
+ f' {usedby_clusters_str}.')
178
+ if usedby_pods:
179
+ usedby_pods_str = ', '.join(usedby_pods)
180
+ pod_str = 'pods' if len(usedby_pods) > 1 else 'pod'
181
+ raise ValueError(
182
+ f'Volume {name} is used by {pod_str} {usedby_pods_str}.')
183
+ logger.debug(f'Deleting volume {name} with config {config}')
184
+ with _volume_lock(name):
185
+ provision.delete_volume(cloud, config)
186
+ global_user_state.delete_volume(name)
187
+
188
+
189
+ def volume_apply(
190
+ name: str,
191
+ volume_type: str,
192
+ cloud: str,
193
+ region: Optional[str],
194
+ zone: Optional[str],
195
+ size: Optional[str],
196
+ config: Dict[str, Any],
197
+ labels: Optional[Dict[str, str]] = None,
198
+ ) -> None:
199
+ """Creates or registers a volume.
200
+
201
+ Args:
202
+ name: The name of the volume.
203
+ volume_type: The type of the volume.
204
+ cloud: The cloud of the volume.
205
+ region: The region of the volume.
206
+ zone: The zone of the volume.
207
+ size: The size of the volume.
208
+ config: The configuration of the volume.
209
+ labels: The labels of the volume.
210
+
211
+ """
212
+ with rich_utils.safe_status(ux_utils.spinner_message('Creating volume')):
213
+ # Reuse the method for cluster name on cloud to
214
+ # generate the storage name on cloud.
215
+ cloud_obj = registry.CLOUD_REGISTRY.from_str(cloud)
216
+ assert cloud_obj is not None
217
+ region, zone = cloud_obj.validate_region_zone(region, zone)
218
+ name_uuid = str(uuid.uuid4())[:6]
219
+ name_on_cloud = common_utils.make_cluster_name_on_cloud(
220
+ name, max_length=cloud_obj.max_cluster_name_length())
221
+ name_on_cloud += '-' + name_uuid
222
+ config = models.VolumeConfig(
223
+ name=name,
224
+ type=volume_type,
225
+ cloud=str(cloud_obj),
226
+ region=region,
227
+ zone=zone,
228
+ size=size,
229
+ config=config,
230
+ name_on_cloud=name_on_cloud,
231
+ labels=labels,
232
+ )
233
+ logger.debug(
234
+ f'Creating volume {name} on cloud {cloud} with config {config}')
235
+ with _volume_lock(name):
236
+ current_volume = global_user_state.get_volume_by_name(name)
237
+ if current_volume is not None:
238
+ logger.info(f'Volume {name} already exists.')
239
+ return
240
+ config = provision.apply_volume(cloud, config)
241
+ global_user_state.add_volume(name, config,
242
+ status_lib.VolumeStatus.READY)
243
+
244
+
245
+ @contextlib.contextmanager
246
+ def _volume_lock(volume_name: str) -> Generator[None, None, None]:
247
+ """Context manager for volume lock."""
248
+ try:
249
+ with filelock.FileLock(VOLUME_LOCK_PATH.format(volume_name=volume_name),
250
+ VOLUME_LOCK_TIMEOUT_SECONDS):
251
+ yield
252
+ except filelock.Timeout as e:
253
+ raise RuntimeError(
254
+ f'Failed to update user due to a timeout '
255
+ f'when trying to acquire the lock at '
256
+ f'{VOLUME_LOCK_PATH.format(volume_name=volume_name)}. '
257
+ 'Please try again or manually remove the lock '
258
+ f'file if you believe it is stale.') from e
@@ -0,0 +1,122 @@
1
+ """REST API for storage management."""
2
+
3
+ import fastapi
4
+
5
+ from sky import clouds
6
+ from sky import exceptions
7
+ from sky import sky_logging
8
+ from sky.server.requests import executor
9
+ from sky.server.requests import payloads
10
+ from sky.server.requests import request_names
11
+ from sky.server.requests import requests as requests_lib
12
+ from sky.utils import registry
13
+ from sky.utils import volume as volume_utils
14
+ from sky.volumes.server import core
15
+
16
+ logger = sky_logging.init_logger(__name__)
17
+
18
+ router = fastapi.APIRouter()
19
+
20
+
21
+ @router.get('')
22
+ async def volume_list(request: fastapi.Request) -> None:
23
+ """Gets the volumes."""
24
+ auth_user = request.state.auth_user
25
+ auth_user_env_vars_kwargs = {
26
+ 'env_vars': auth_user.to_env_vars()
27
+ } if auth_user else {}
28
+ request_body = payloads.RequestBody(**auth_user_env_vars_kwargs)
29
+ await executor.schedule_request_async(
30
+ request_id=request.state.request_id,
31
+ request_name=request_names.RequestName.VOLUME_LIST,
32
+ request_body=request_body,
33
+ func=core.volume_list,
34
+ schedule_type=requests_lib.ScheduleType.SHORT,
35
+ )
36
+
37
+
38
+ @router.post('/delete')
39
+ async def volume_delete(request: fastapi.Request,
40
+ volume_delete_body: payloads.VolumeDeleteBody) -> None:
41
+ """Deletes a volume."""
42
+ await executor.schedule_request_async(
43
+ request_id=request.state.request_id,
44
+ request_name=request_names.RequestName.VOLUME_DELETE,
45
+ request_body=volume_delete_body,
46
+ func=core.volume_delete,
47
+ schedule_type=requests_lib.ScheduleType.LONG,
48
+ )
49
+
50
+
51
+ @router.post('/validate')
52
+ async def volume_validate(
53
+ _: fastapi.Request,
54
+ volume_validate_body: payloads.VolumeValidateBody) -> None:
55
+ """Validates a volume."""
56
+ # pylint: disable=import-outside-toplevel
57
+ from sky.volumes import volume as volume_lib
58
+
59
+ try:
60
+ volume_config = {
61
+ 'name': volume_validate_body.name,
62
+ 'type': volume_validate_body.volume_type,
63
+ 'infra': volume_validate_body.infra,
64
+ 'size': volume_validate_body.size,
65
+ 'labels': volume_validate_body.labels,
66
+ 'config': volume_validate_body.config,
67
+ 'resource_name': volume_validate_body.resource_name,
68
+ }
69
+ volume = volume_lib.Volume.from_yaml_config(volume_config)
70
+ volume.validate()
71
+ except Exception as e:
72
+ raise fastapi.HTTPException(status_code=400,
73
+ detail=exceptions.serialize_exception(e))
74
+
75
+
76
+ @router.post('/apply')
77
+ async def volume_apply(request: fastapi.Request,
78
+ volume_apply_body: payloads.VolumeApplyBody) -> None:
79
+ """Creates or registers a volume."""
80
+ volume_cloud = volume_apply_body.cloud
81
+ volume_type = volume_apply_body.volume_type
82
+ volume_config = volume_apply_body.config
83
+
84
+ supported_volume_types = [
85
+ volume_type.value for volume_type in volume_utils.VolumeType
86
+ ]
87
+ if volume_type not in supported_volume_types:
88
+ raise fastapi.HTTPException(
89
+ status_code=400, detail=f'Invalid volume type: {volume_type}')
90
+ cloud = registry.CLOUD_REGISTRY.from_str(volume_cloud)
91
+ if cloud is None:
92
+ raise fastapi.HTTPException(status_code=400,
93
+ detail=f'Invalid cloud: {volume_cloud}')
94
+ if volume_type == volume_utils.VolumeType.PVC.value:
95
+ if not cloud.is_same_cloud(clouds.Kubernetes()):
96
+ raise fastapi.HTTPException(
97
+ status_code=400,
98
+ detail='PVC storage is only supported on Kubernetes')
99
+ supported_access_modes = [
100
+ access_mode.value for access_mode in volume_utils.VolumeAccessMode
101
+ ]
102
+ if volume_config is None:
103
+ volume_config = {}
104
+ access_mode = volume_config.get('access_mode')
105
+ if access_mode is None:
106
+ volume_config['access_mode'] = (
107
+ volume_utils.VolumeAccessMode.READ_WRITE_ONCE.value)
108
+ elif access_mode not in supported_access_modes:
109
+ raise fastapi.HTTPException(
110
+ status_code=400, detail=f'Invalid access mode: {access_mode}')
111
+ elif volume_type == volume_utils.VolumeType.RUNPOD_NETWORK_VOLUME.value:
112
+ if not cloud.is_same_cloud(clouds.RunPod()):
113
+ raise fastapi.HTTPException(
114
+ status_code=400,
115
+ detail='Runpod network volume is only supported on Runpod')
116
+ await executor.schedule_request_async(
117
+ request_id=request.state.request_id,
118
+ request_name=request_names.RequestName.VOLUME_APPLY,
119
+ request_body=volume_apply_body,
120
+ func=core.volume_apply,
121
+ schedule_type=requests_lib.ScheduleType.LONG,
122
+ )