skypilot-nightly 1.0.0.dev20250502__py3-none-any.whl → 1.0.0.dev20251203__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (546) hide show
  1. sky/__init__.py +22 -6
  2. sky/adaptors/aws.py +81 -16
  3. sky/adaptors/common.py +25 -2
  4. sky/adaptors/coreweave.py +278 -0
  5. sky/adaptors/do.py +8 -2
  6. sky/adaptors/gcp.py +11 -0
  7. sky/adaptors/hyperbolic.py +8 -0
  8. sky/adaptors/ibm.py +5 -2
  9. sky/adaptors/kubernetes.py +149 -18
  10. sky/adaptors/nebius.py +173 -30
  11. sky/adaptors/primeintellect.py +1 -0
  12. sky/adaptors/runpod.py +68 -0
  13. sky/adaptors/seeweb.py +183 -0
  14. sky/adaptors/shadeform.py +89 -0
  15. sky/admin_policy.py +187 -4
  16. sky/authentication.py +179 -225
  17. sky/backends/__init__.py +4 -2
  18. sky/backends/backend.py +22 -9
  19. sky/backends/backend_utils.py +1323 -397
  20. sky/backends/cloud_vm_ray_backend.py +1749 -1029
  21. sky/backends/docker_utils.py +1 -1
  22. sky/backends/local_docker_backend.py +11 -6
  23. sky/backends/task_codegen.py +633 -0
  24. sky/backends/wheel_utils.py +55 -9
  25. sky/{clouds/service_catalog → catalog}/__init__.py +21 -19
  26. sky/{clouds/service_catalog → catalog}/aws_catalog.py +27 -8
  27. sky/{clouds/service_catalog → catalog}/azure_catalog.py +10 -7
  28. sky/{clouds/service_catalog → catalog}/common.py +90 -49
  29. sky/{clouds/service_catalog → catalog}/cudo_catalog.py +8 -5
  30. sky/{clouds/service_catalog → catalog}/data_fetchers/analyze.py +1 -1
  31. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_aws.py +116 -80
  32. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_cudo.py +38 -38
  33. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_gcp.py +70 -16
  34. sky/catalog/data_fetchers/fetch_hyperbolic.py +136 -0
  35. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_lambda_cloud.py +1 -0
  36. sky/catalog/data_fetchers/fetch_nebius.py +338 -0
  37. sky/catalog/data_fetchers/fetch_runpod.py +698 -0
  38. sky/catalog/data_fetchers/fetch_seeweb.py +329 -0
  39. sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
  40. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vast.py +1 -1
  41. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vsphere.py +1 -1
  42. sky/{clouds/service_catalog → catalog}/do_catalog.py +5 -2
  43. sky/{clouds/service_catalog → catalog}/fluidstack_catalog.py +6 -3
  44. sky/{clouds/service_catalog → catalog}/gcp_catalog.py +41 -15
  45. sky/catalog/hyperbolic_catalog.py +136 -0
  46. sky/{clouds/service_catalog → catalog}/ibm_catalog.py +9 -6
  47. sky/{clouds/service_catalog → catalog}/kubernetes_catalog.py +36 -24
  48. sky/{clouds/service_catalog → catalog}/lambda_catalog.py +9 -6
  49. sky/{clouds/service_catalog → catalog}/nebius_catalog.py +9 -7
  50. sky/{clouds/service_catalog → catalog}/oci_catalog.py +9 -6
  51. sky/{clouds/service_catalog → catalog}/paperspace_catalog.py +5 -2
  52. sky/catalog/primeintellect_catalog.py +95 -0
  53. sky/{clouds/service_catalog → catalog}/runpod_catalog.py +11 -4
  54. sky/{clouds/service_catalog → catalog}/scp_catalog.py +9 -6
  55. sky/catalog/seeweb_catalog.py +184 -0
  56. sky/catalog/shadeform_catalog.py +165 -0
  57. sky/catalog/ssh_catalog.py +167 -0
  58. sky/{clouds/service_catalog → catalog}/vast_catalog.py +6 -3
  59. sky/{clouds/service_catalog → catalog}/vsphere_catalog.py +5 -2
  60. sky/check.py +533 -185
  61. sky/cli.py +5 -5975
  62. sky/client/{cli.py → cli/command.py} +2591 -1956
  63. sky/client/cli/deprecation_utils.py +99 -0
  64. sky/client/cli/flags.py +359 -0
  65. sky/client/cli/table_utils.py +322 -0
  66. sky/client/cli/utils.py +79 -0
  67. sky/client/common.py +78 -32
  68. sky/client/oauth.py +82 -0
  69. sky/client/sdk.py +1219 -319
  70. sky/client/sdk_async.py +827 -0
  71. sky/client/service_account_auth.py +47 -0
  72. sky/cloud_stores.py +82 -3
  73. sky/clouds/__init__.py +13 -0
  74. sky/clouds/aws.py +564 -164
  75. sky/clouds/azure.py +105 -83
  76. sky/clouds/cloud.py +140 -40
  77. sky/clouds/cudo.py +68 -50
  78. sky/clouds/do.py +66 -48
  79. sky/clouds/fluidstack.py +63 -44
  80. sky/clouds/gcp.py +339 -110
  81. sky/clouds/hyperbolic.py +293 -0
  82. sky/clouds/ibm.py +70 -49
  83. sky/clouds/kubernetes.py +570 -162
  84. sky/clouds/lambda_cloud.py +74 -54
  85. sky/clouds/nebius.py +210 -81
  86. sky/clouds/oci.py +88 -66
  87. sky/clouds/paperspace.py +61 -44
  88. sky/clouds/primeintellect.py +317 -0
  89. sky/clouds/runpod.py +164 -74
  90. sky/clouds/scp.py +89 -86
  91. sky/clouds/seeweb.py +477 -0
  92. sky/clouds/shadeform.py +400 -0
  93. sky/clouds/ssh.py +263 -0
  94. sky/clouds/utils/aws_utils.py +10 -4
  95. sky/clouds/utils/gcp_utils.py +87 -11
  96. sky/clouds/utils/oci_utils.py +38 -14
  97. sky/clouds/utils/scp_utils.py +231 -167
  98. sky/clouds/vast.py +99 -77
  99. sky/clouds/vsphere.py +51 -40
  100. sky/core.py +375 -173
  101. sky/dag.py +15 -0
  102. sky/dashboard/out/404.html +1 -1
  103. sky/dashboard/out/_next/static/96_E2yl3QAiIJGOYCkSpB/_buildManifest.js +1 -0
  104. sky/dashboard/out/_next/static/chunks/1141-e6aa9ab418717c59.js +11 -0
  105. sky/dashboard/out/_next/static/chunks/1272-1ef0bf0237faccdb.js +1 -0
  106. sky/dashboard/out/_next/static/chunks/1871-7e202677c42f43fe.js +6 -0
  107. sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +1 -0
  108. sky/dashboard/out/_next/static/chunks/2350.fab69e61bac57b23.js +1 -0
  109. sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +15 -0
  110. sky/dashboard/out/_next/static/chunks/2755.edd818326d489a1d.js +26 -0
  111. sky/dashboard/out/_next/static/chunks/3294.20a8540fe697d5ee.js +1 -0
  112. sky/dashboard/out/_next/static/chunks/3785.7e245f318f9d1121.js +1 -0
  113. sky/dashboard/out/_next/static/chunks/3800-7b45f9fbb6308557.js +1 -0
  114. sky/dashboard/out/_next/static/chunks/3850-ff4a9a69d978632b.js +1 -0
  115. sky/dashboard/out/_next/static/chunks/3937.210053269f121201.js +1 -0
  116. sky/dashboard/out/_next/static/chunks/4725.172ede95d1b21022.js +1 -0
  117. sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +15 -0
  118. sky/dashboard/out/_next/static/chunks/5739-d67458fcb1386c92.js +8 -0
  119. sky/dashboard/out/_next/static/chunks/6130-2be46d70a38f1e82.js +1 -0
  120. sky/dashboard/out/_next/static/chunks/616-3d59f75e2ccf9321.js +39 -0
  121. sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +13 -0
  122. sky/dashboard/out/_next/static/chunks/6856-8f27d1c10c98def8.js +1 -0
  123. sky/dashboard/out/_next/static/chunks/6989-01359c57e018caa4.js +1 -0
  124. sky/dashboard/out/_next/static/chunks/6990-9146207c4567fdfd.js +1 -0
  125. sky/dashboard/out/_next/static/chunks/7359-c8d04e06886000b3.js +30 -0
  126. sky/dashboard/out/_next/static/chunks/7411-b15471acd2cba716.js +41 -0
  127. sky/dashboard/out/_next/static/chunks/7615-019513abc55b3b47.js +1 -0
  128. sky/dashboard/out/_next/static/chunks/8640.5b9475a2d18c5416.js +16 -0
  129. sky/dashboard/out/_next/static/chunks/8969-452f9d5cbdd2dc73.js +1 -0
  130. sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +6 -0
  131. sky/dashboard/out/_next/static/chunks/9353-cff34f7e773b2e2b.js +1 -0
  132. sky/dashboard/out/_next/static/chunks/9360.a536cf6b1fa42355.js +31 -0
  133. sky/dashboard/out/_next/static/chunks/9847.3aaca6bb33455140.js +30 -0
  134. sky/dashboard/out/_next/static/chunks/fd9d1056-86323a29a8f7e46a.js +1 -0
  135. sky/dashboard/out/_next/static/chunks/framework-cf60a09ccd051a10.js +33 -0
  136. sky/dashboard/out/_next/static/chunks/main-app-587214043926b3cc.js +1 -0
  137. sky/dashboard/out/_next/static/chunks/main-f15ccb73239a3bf1.js +1 -0
  138. sky/dashboard/out/_next/static/chunks/pages/_app-bde01e4a2beec258.js +34 -0
  139. sky/dashboard/out/_next/static/chunks/pages/_error-c66a4e8afc46f17b.js +1 -0
  140. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-792db96d918c98c9.js +16 -0
  141. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-abfcac9c137aa543.js +1 -0
  142. sky/dashboard/out/_next/static/chunks/pages/clusters-ee39056f9851a3ff.js +1 -0
  143. sky/dashboard/out/_next/static/chunks/pages/config-dfb9bf07b13045f4.js +1 -0
  144. sky/dashboard/out/_next/static/chunks/pages/index-444f1804401f04ea.js +1 -0
  145. sky/dashboard/out/_next/static/chunks/pages/infra/[context]-c0b5935149902e6f.js +1 -0
  146. sky/dashboard/out/_next/static/chunks/pages/infra-aed0ea19df7cf961.js +1 -0
  147. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-d66997e2bfc837cf.js +16 -0
  148. sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-9faf940b253e3e06.js +21 -0
  149. sky/dashboard/out/_next/static/chunks/pages/jobs-2072b48b617989c9.js +1 -0
  150. sky/dashboard/out/_next/static/chunks/pages/users-f42674164aa73423.js +1 -0
  151. sky/dashboard/out/_next/static/chunks/pages/volumes-b84b948ff357c43e.js +1 -0
  152. sky/dashboard/out/_next/static/chunks/pages/workspace/new-3f88a1c7e86a3f86.js +1 -0
  153. sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-84a40f8c7c627fe4.js +1 -0
  154. sky/dashboard/out/_next/static/chunks/pages/workspaces-531b2f8c4bf89f82.js +1 -0
  155. sky/dashboard/out/_next/static/chunks/webpack-64e05f17bf2cf8ce.js +1 -0
  156. sky/dashboard/out/_next/static/css/0748ce22df867032.css +3 -0
  157. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  158. sky/dashboard/out/clusters/[cluster].html +1 -1
  159. sky/dashboard/out/clusters.html +1 -1
  160. sky/dashboard/out/config.html +1 -0
  161. sky/dashboard/out/index.html +1 -1
  162. sky/dashboard/out/infra/[context].html +1 -0
  163. sky/dashboard/out/infra.html +1 -0
  164. sky/dashboard/out/jobs/[job].html +1 -1
  165. sky/dashboard/out/jobs/pools/[pool].html +1 -0
  166. sky/dashboard/out/jobs.html +1 -1
  167. sky/dashboard/out/users.html +1 -0
  168. sky/dashboard/out/volumes.html +1 -0
  169. sky/dashboard/out/workspace/new.html +1 -0
  170. sky/dashboard/out/workspaces/[name].html +1 -0
  171. sky/dashboard/out/workspaces.html +1 -0
  172. sky/data/data_utils.py +137 -1
  173. sky/data/mounting_utils.py +269 -84
  174. sky/data/storage.py +1460 -1807
  175. sky/data/storage_utils.py +43 -57
  176. sky/exceptions.py +126 -2
  177. sky/execution.py +216 -63
  178. sky/global_user_state.py +2390 -586
  179. sky/jobs/__init__.py +7 -0
  180. sky/jobs/client/sdk.py +300 -58
  181. sky/jobs/client/sdk_async.py +161 -0
  182. sky/jobs/constants.py +15 -8
  183. sky/jobs/controller.py +848 -275
  184. sky/jobs/file_content_utils.py +128 -0
  185. sky/jobs/log_gc.py +193 -0
  186. sky/jobs/recovery_strategy.py +402 -152
  187. sky/jobs/scheduler.py +314 -189
  188. sky/jobs/server/core.py +836 -255
  189. sky/jobs/server/server.py +156 -115
  190. sky/jobs/server/utils.py +136 -0
  191. sky/jobs/state.py +2109 -706
  192. sky/jobs/utils.py +1306 -215
  193. sky/logs/__init__.py +21 -0
  194. sky/logs/agent.py +108 -0
  195. sky/logs/aws.py +243 -0
  196. sky/logs/gcp.py +91 -0
  197. sky/metrics/__init__.py +0 -0
  198. sky/metrics/utils.py +453 -0
  199. sky/models.py +78 -1
  200. sky/optimizer.py +164 -70
  201. sky/provision/__init__.py +90 -4
  202. sky/provision/aws/config.py +147 -26
  203. sky/provision/aws/instance.py +136 -50
  204. sky/provision/azure/instance.py +11 -6
  205. sky/provision/common.py +13 -1
  206. sky/provision/cudo/cudo_machine_type.py +1 -1
  207. sky/provision/cudo/cudo_utils.py +14 -8
  208. sky/provision/cudo/cudo_wrapper.py +72 -71
  209. sky/provision/cudo/instance.py +10 -6
  210. sky/provision/do/instance.py +10 -6
  211. sky/provision/do/utils.py +4 -3
  212. sky/provision/docker_utils.py +140 -33
  213. sky/provision/fluidstack/instance.py +13 -8
  214. sky/provision/gcp/__init__.py +1 -0
  215. sky/provision/gcp/config.py +301 -19
  216. sky/provision/gcp/constants.py +218 -0
  217. sky/provision/gcp/instance.py +36 -8
  218. sky/provision/gcp/instance_utils.py +18 -4
  219. sky/provision/gcp/volume_utils.py +247 -0
  220. sky/provision/hyperbolic/__init__.py +12 -0
  221. sky/provision/hyperbolic/config.py +10 -0
  222. sky/provision/hyperbolic/instance.py +437 -0
  223. sky/provision/hyperbolic/utils.py +373 -0
  224. sky/provision/instance_setup.py +101 -20
  225. sky/provision/kubernetes/__init__.py +5 -0
  226. sky/provision/kubernetes/config.py +9 -52
  227. sky/provision/kubernetes/constants.py +17 -0
  228. sky/provision/kubernetes/instance.py +919 -280
  229. sky/provision/kubernetes/manifests/fusermount-server-daemonset.yaml +1 -2
  230. sky/provision/kubernetes/network.py +27 -17
  231. sky/provision/kubernetes/network_utils.py +44 -43
  232. sky/provision/kubernetes/utils.py +1221 -534
  233. sky/provision/kubernetes/volume.py +343 -0
  234. sky/provision/lambda_cloud/instance.py +22 -16
  235. sky/provision/nebius/constants.py +50 -0
  236. sky/provision/nebius/instance.py +19 -6
  237. sky/provision/nebius/utils.py +237 -137
  238. sky/provision/oci/instance.py +10 -5
  239. sky/provision/paperspace/instance.py +10 -7
  240. sky/provision/paperspace/utils.py +1 -1
  241. sky/provision/primeintellect/__init__.py +10 -0
  242. sky/provision/primeintellect/config.py +11 -0
  243. sky/provision/primeintellect/instance.py +454 -0
  244. sky/provision/primeintellect/utils.py +398 -0
  245. sky/provision/provisioner.py +117 -36
  246. sky/provision/runpod/__init__.py +5 -0
  247. sky/provision/runpod/instance.py +27 -6
  248. sky/provision/runpod/utils.py +51 -18
  249. sky/provision/runpod/volume.py +214 -0
  250. sky/provision/scp/__init__.py +15 -0
  251. sky/provision/scp/config.py +93 -0
  252. sky/provision/scp/instance.py +707 -0
  253. sky/provision/seeweb/__init__.py +11 -0
  254. sky/provision/seeweb/config.py +13 -0
  255. sky/provision/seeweb/instance.py +812 -0
  256. sky/provision/shadeform/__init__.py +11 -0
  257. sky/provision/shadeform/config.py +12 -0
  258. sky/provision/shadeform/instance.py +351 -0
  259. sky/provision/shadeform/shadeform_utils.py +83 -0
  260. sky/provision/ssh/__init__.py +18 -0
  261. sky/provision/vast/instance.py +13 -8
  262. sky/provision/vast/utils.py +10 -7
  263. sky/provision/volume.py +164 -0
  264. sky/provision/vsphere/common/ssl_helper.py +1 -1
  265. sky/provision/vsphere/common/vapiconnect.py +2 -1
  266. sky/provision/vsphere/common/vim_utils.py +4 -4
  267. sky/provision/vsphere/instance.py +15 -10
  268. sky/provision/vsphere/vsphere_utils.py +17 -20
  269. sky/py.typed +0 -0
  270. sky/resources.py +845 -119
  271. sky/schemas/__init__.py +0 -0
  272. sky/schemas/api/__init__.py +0 -0
  273. sky/schemas/api/responses.py +227 -0
  274. sky/schemas/db/README +4 -0
  275. sky/schemas/db/env.py +90 -0
  276. sky/schemas/db/global_user_state/001_initial_schema.py +124 -0
  277. sky/schemas/db/global_user_state/002_add_workspace_to_cluster_history.py +35 -0
  278. sky/schemas/db/global_user_state/003_fix_initial_revision.py +61 -0
  279. sky/schemas/db/global_user_state/004_is_managed.py +34 -0
  280. sky/schemas/db/global_user_state/005_cluster_event.py +32 -0
  281. sky/schemas/db/global_user_state/006_provision_log.py +41 -0
  282. sky/schemas/db/global_user_state/007_cluster_event_request_id.py +34 -0
  283. sky/schemas/db/global_user_state/008_skylet_ssh_tunnel_metadata.py +34 -0
  284. sky/schemas/db/global_user_state/009_last_activity_and_launched_at.py +89 -0
  285. sky/schemas/db/global_user_state/010_save_ssh_key.py +66 -0
  286. sky/schemas/db/global_user_state/011_is_ephemeral.py +34 -0
  287. sky/schemas/db/kv_cache/001_initial_schema.py +29 -0
  288. sky/schemas/db/script.py.mako +28 -0
  289. sky/schemas/db/serve_state/001_initial_schema.py +67 -0
  290. sky/schemas/db/serve_state/002_yaml_content.py +34 -0
  291. sky/schemas/db/skypilot_config/001_initial_schema.py +30 -0
  292. sky/schemas/db/spot_jobs/001_initial_schema.py +97 -0
  293. sky/schemas/db/spot_jobs/002_cluster_pool.py +42 -0
  294. sky/schemas/db/spot_jobs/003_pool_hash.py +34 -0
  295. sky/schemas/db/spot_jobs/004_job_file_contents.py +42 -0
  296. sky/schemas/db/spot_jobs/005_logs_gc.py +38 -0
  297. sky/schemas/db/spot_jobs/006_controller_pid_started_at.py +34 -0
  298. sky/schemas/db/spot_jobs/007_config_file_content.py +34 -0
  299. sky/schemas/generated/__init__.py +0 -0
  300. sky/schemas/generated/autostopv1_pb2.py +36 -0
  301. sky/schemas/generated/autostopv1_pb2.pyi +43 -0
  302. sky/schemas/generated/autostopv1_pb2_grpc.py +146 -0
  303. sky/schemas/generated/jobsv1_pb2.py +86 -0
  304. sky/schemas/generated/jobsv1_pb2.pyi +254 -0
  305. sky/schemas/generated/jobsv1_pb2_grpc.py +542 -0
  306. sky/schemas/generated/managed_jobsv1_pb2.py +76 -0
  307. sky/schemas/generated/managed_jobsv1_pb2.pyi +278 -0
  308. sky/schemas/generated/managed_jobsv1_pb2_grpc.py +278 -0
  309. sky/schemas/generated/servev1_pb2.py +58 -0
  310. sky/schemas/generated/servev1_pb2.pyi +115 -0
  311. sky/schemas/generated/servev1_pb2_grpc.py +322 -0
  312. sky/serve/autoscalers.py +357 -5
  313. sky/serve/client/impl.py +310 -0
  314. sky/serve/client/sdk.py +47 -139
  315. sky/serve/client/sdk_async.py +130 -0
  316. sky/serve/constants.py +12 -9
  317. sky/serve/controller.py +68 -17
  318. sky/serve/load_balancer.py +106 -60
  319. sky/serve/load_balancing_policies.py +116 -2
  320. sky/serve/replica_managers.py +434 -249
  321. sky/serve/serve_rpc_utils.py +179 -0
  322. sky/serve/serve_state.py +569 -257
  323. sky/serve/serve_utils.py +775 -265
  324. sky/serve/server/core.py +66 -711
  325. sky/serve/server/impl.py +1093 -0
  326. sky/serve/server/server.py +21 -18
  327. sky/serve/service.py +192 -89
  328. sky/serve/service_spec.py +144 -20
  329. sky/serve/spot_placer.py +3 -0
  330. sky/server/auth/__init__.py +0 -0
  331. sky/server/auth/authn.py +50 -0
  332. sky/server/auth/loopback.py +38 -0
  333. sky/server/auth/oauth2_proxy.py +202 -0
  334. sky/server/common.py +478 -182
  335. sky/server/config.py +85 -23
  336. sky/server/constants.py +44 -6
  337. sky/server/daemons.py +295 -0
  338. sky/server/html/token_page.html +185 -0
  339. sky/server/metrics.py +160 -0
  340. sky/server/middleware_utils.py +166 -0
  341. sky/server/requests/executor.py +558 -138
  342. sky/server/requests/payloads.py +364 -24
  343. sky/server/requests/preconditions.py +21 -17
  344. sky/server/requests/process.py +112 -29
  345. sky/server/requests/request_names.py +121 -0
  346. sky/server/requests/requests.py +822 -226
  347. sky/server/requests/serializers/decoders.py +82 -31
  348. sky/server/requests/serializers/encoders.py +140 -22
  349. sky/server/requests/threads.py +117 -0
  350. sky/server/rest.py +455 -0
  351. sky/server/server.py +1309 -285
  352. sky/server/state.py +20 -0
  353. sky/server/stream_utils.py +327 -61
  354. sky/server/uvicorn.py +217 -3
  355. sky/server/versions.py +270 -0
  356. sky/setup_files/MANIFEST.in +11 -1
  357. sky/setup_files/alembic.ini +160 -0
  358. sky/setup_files/dependencies.py +139 -31
  359. sky/setup_files/setup.py +44 -42
  360. sky/sky_logging.py +114 -7
  361. sky/skylet/attempt_skylet.py +106 -24
  362. sky/skylet/autostop_lib.py +129 -8
  363. sky/skylet/configs.py +29 -20
  364. sky/skylet/constants.py +216 -25
  365. sky/skylet/events.py +101 -21
  366. sky/skylet/job_lib.py +345 -164
  367. sky/skylet/log_lib.py +297 -18
  368. sky/skylet/log_lib.pyi +44 -1
  369. sky/skylet/providers/ibm/node_provider.py +12 -8
  370. sky/skylet/providers/ibm/vpc_provider.py +13 -12
  371. sky/skylet/ray_patches/__init__.py +17 -3
  372. sky/skylet/ray_patches/autoscaler.py.diff +18 -0
  373. sky/skylet/ray_patches/cli.py.diff +19 -0
  374. sky/skylet/ray_patches/command_runner.py.diff +17 -0
  375. sky/skylet/ray_patches/log_monitor.py.diff +20 -0
  376. sky/skylet/ray_patches/resource_demand_scheduler.py.diff +32 -0
  377. sky/skylet/ray_patches/updater.py.diff +18 -0
  378. sky/skylet/ray_patches/worker.py.diff +41 -0
  379. sky/skylet/runtime_utils.py +21 -0
  380. sky/skylet/services.py +568 -0
  381. sky/skylet/skylet.py +72 -4
  382. sky/skylet/subprocess_daemon.py +104 -29
  383. sky/skypilot_config.py +506 -99
  384. sky/ssh_node_pools/__init__.py +1 -0
  385. sky/ssh_node_pools/core.py +135 -0
  386. sky/ssh_node_pools/server.py +233 -0
  387. sky/task.py +685 -163
  388. sky/templates/aws-ray.yml.j2 +11 -3
  389. sky/templates/azure-ray.yml.j2 +2 -1
  390. sky/templates/cudo-ray.yml.j2 +1 -0
  391. sky/templates/do-ray.yml.j2 +2 -1
  392. sky/templates/fluidstack-ray.yml.j2 +1 -0
  393. sky/templates/gcp-ray.yml.j2 +62 -1
  394. sky/templates/hyperbolic-ray.yml.j2 +68 -0
  395. sky/templates/ibm-ray.yml.j2 +2 -1
  396. sky/templates/jobs-controller.yaml.j2 +27 -24
  397. sky/templates/kubernetes-loadbalancer.yml.j2 +2 -0
  398. sky/templates/kubernetes-ray.yml.j2 +611 -50
  399. sky/templates/lambda-ray.yml.j2 +2 -1
  400. sky/templates/nebius-ray.yml.j2 +34 -12
  401. sky/templates/oci-ray.yml.j2 +1 -0
  402. sky/templates/paperspace-ray.yml.j2 +2 -1
  403. sky/templates/primeintellect-ray.yml.j2 +72 -0
  404. sky/templates/runpod-ray.yml.j2 +10 -1
  405. sky/templates/scp-ray.yml.j2 +4 -50
  406. sky/templates/seeweb-ray.yml.j2 +171 -0
  407. sky/templates/shadeform-ray.yml.j2 +73 -0
  408. sky/templates/sky-serve-controller.yaml.j2 +22 -2
  409. sky/templates/vast-ray.yml.j2 +1 -0
  410. sky/templates/vsphere-ray.yml.j2 +1 -0
  411. sky/templates/websocket_proxy.py +212 -37
  412. sky/usage/usage_lib.py +31 -15
  413. sky/users/__init__.py +0 -0
  414. sky/users/model.conf +15 -0
  415. sky/users/permission.py +397 -0
  416. sky/users/rbac.py +121 -0
  417. sky/users/server.py +720 -0
  418. sky/users/token_service.py +218 -0
  419. sky/utils/accelerator_registry.py +35 -5
  420. sky/utils/admin_policy_utils.py +84 -38
  421. sky/utils/annotations.py +38 -5
  422. sky/utils/asyncio_utils.py +78 -0
  423. sky/utils/atomic.py +1 -1
  424. sky/utils/auth_utils.py +153 -0
  425. sky/utils/benchmark_utils.py +60 -0
  426. sky/utils/cli_utils/status_utils.py +159 -86
  427. sky/utils/cluster_utils.py +31 -9
  428. sky/utils/command_runner.py +354 -68
  429. sky/utils/command_runner.pyi +93 -3
  430. sky/utils/common.py +35 -8
  431. sky/utils/common_utils.py +314 -91
  432. sky/utils/config_utils.py +74 -5
  433. sky/utils/context.py +403 -0
  434. sky/utils/context_utils.py +242 -0
  435. sky/utils/controller_utils.py +383 -89
  436. sky/utils/dag_utils.py +31 -12
  437. sky/utils/db/__init__.py +0 -0
  438. sky/utils/db/db_utils.py +485 -0
  439. sky/utils/db/kv_cache.py +149 -0
  440. sky/utils/db/migration_utils.py +137 -0
  441. sky/utils/directory_utils.py +12 -0
  442. sky/utils/env_options.py +13 -0
  443. sky/utils/git.py +567 -0
  444. sky/utils/git_clone.sh +460 -0
  445. sky/utils/infra_utils.py +195 -0
  446. sky/utils/kubernetes/cleanup-tunnel.sh +62 -0
  447. sky/utils/kubernetes/config_map_utils.py +133 -0
  448. sky/utils/kubernetes/create_cluster.sh +15 -29
  449. sky/utils/kubernetes/delete_cluster.sh +10 -7
  450. sky/utils/kubernetes/deploy_ssh_node_pools.py +1177 -0
  451. sky/utils/kubernetes/exec_kubeconfig_converter.py +22 -31
  452. sky/utils/kubernetes/generate_kind_config.py +6 -66
  453. sky/utils/kubernetes/generate_kubeconfig.sh +4 -1
  454. sky/utils/kubernetes/gpu_labeler.py +18 -8
  455. sky/utils/kubernetes/k8s_gpu_labeler_job.yaml +2 -1
  456. sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml +16 -16
  457. sky/utils/kubernetes/kubernetes_deploy_utils.py +284 -114
  458. sky/utils/kubernetes/rsync_helper.sh +11 -3
  459. sky/utils/kubernetes/ssh-tunnel.sh +379 -0
  460. sky/utils/kubernetes/ssh_utils.py +221 -0
  461. sky/utils/kubernetes_enums.py +8 -15
  462. sky/utils/lock_events.py +94 -0
  463. sky/utils/locks.py +416 -0
  464. sky/utils/log_utils.py +82 -107
  465. sky/utils/perf_utils.py +22 -0
  466. sky/utils/resource_checker.py +298 -0
  467. sky/utils/resources_utils.py +249 -32
  468. sky/utils/rich_utils.py +217 -39
  469. sky/utils/schemas.py +955 -160
  470. sky/utils/serialize_utils.py +16 -0
  471. sky/utils/status_lib.py +10 -0
  472. sky/utils/subprocess_utils.py +29 -15
  473. sky/utils/tempstore.py +70 -0
  474. sky/utils/thread_utils.py +91 -0
  475. sky/utils/timeline.py +26 -53
  476. sky/utils/ux_utils.py +84 -15
  477. sky/utils/validator.py +11 -1
  478. sky/utils/volume.py +165 -0
  479. sky/utils/yaml_utils.py +111 -0
  480. sky/volumes/__init__.py +13 -0
  481. sky/volumes/client/__init__.py +0 -0
  482. sky/volumes/client/sdk.py +150 -0
  483. sky/volumes/server/__init__.py +0 -0
  484. sky/volumes/server/core.py +270 -0
  485. sky/volumes/server/server.py +124 -0
  486. sky/volumes/volume.py +215 -0
  487. sky/workspaces/__init__.py +0 -0
  488. sky/workspaces/core.py +655 -0
  489. sky/workspaces/server.py +101 -0
  490. sky/workspaces/utils.py +56 -0
  491. sky_templates/README.md +3 -0
  492. sky_templates/__init__.py +3 -0
  493. sky_templates/ray/__init__.py +0 -0
  494. sky_templates/ray/start_cluster +183 -0
  495. sky_templates/ray/stop_cluster +75 -0
  496. skypilot_nightly-1.0.0.dev20251203.dist-info/METADATA +676 -0
  497. skypilot_nightly-1.0.0.dev20251203.dist-info/RECORD +611 -0
  498. {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/WHEEL +1 -1
  499. skypilot_nightly-1.0.0.dev20251203.dist-info/top_level.txt +2 -0
  500. sky/benchmark/benchmark_state.py +0 -256
  501. sky/benchmark/benchmark_utils.py +0 -641
  502. sky/clouds/service_catalog/constants.py +0 -7
  503. sky/dashboard/out/_next/static/GWvVBSCS7FmUiVmjaL1a7/_buildManifest.js +0 -1
  504. sky/dashboard/out/_next/static/chunks/236-2db3ee3fba33dd9e.js +0 -6
  505. sky/dashboard/out/_next/static/chunks/312-c3c8845990db8ffc.js +0 -15
  506. sky/dashboard/out/_next/static/chunks/37-0a572fe0dbb89c4d.js +0 -6
  507. sky/dashboard/out/_next/static/chunks/678-206dddca808e6d16.js +0 -59
  508. sky/dashboard/out/_next/static/chunks/845-9e60713e0c441abc.js +0 -1
  509. sky/dashboard/out/_next/static/chunks/979-7bf73a4c7cea0f5c.js +0 -1
  510. sky/dashboard/out/_next/static/chunks/fd9d1056-2821b0f0cabcd8bd.js +0 -1
  511. sky/dashboard/out/_next/static/chunks/framework-87d061ee6ed71b28.js +0 -33
  512. sky/dashboard/out/_next/static/chunks/main-app-241eb28595532291.js +0 -1
  513. sky/dashboard/out/_next/static/chunks/main-e0e2335212e72357.js +0 -1
  514. sky/dashboard/out/_next/static/chunks/pages/_app-e6b013bc3f77ad60.js +0 -1
  515. sky/dashboard/out/_next/static/chunks/pages/_error-1be831200e60c5c0.js +0 -1
  516. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-6ac338bc2239cb45.js +0 -1
  517. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-f383db7389368ea7.js +0 -1
  518. sky/dashboard/out/_next/static/chunks/pages/clusters-a93b93e10b8b074e.js +0 -1
  519. sky/dashboard/out/_next/static/chunks/pages/index-f9f039532ca8cbc4.js +0 -1
  520. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-1c519e1afc523dc9.js +0 -1
  521. sky/dashboard/out/_next/static/chunks/pages/jobs-a75029b67aab6a2e.js +0 -1
  522. sky/dashboard/out/_next/static/chunks/webpack-830f59b8404e96b8.js +0 -1
  523. sky/dashboard/out/_next/static/css/c6933bbb2ce7f4dd.css +0 -3
  524. sky/jobs/dashboard/dashboard.py +0 -223
  525. sky/jobs/dashboard/static/favicon.ico +0 -0
  526. sky/jobs/dashboard/templates/index.html +0 -831
  527. sky/jobs/server/dashboard_utils.py +0 -69
  528. sky/skylet/providers/scp/__init__.py +0 -2
  529. sky/skylet/providers/scp/config.py +0 -149
  530. sky/skylet/providers/scp/node_provider.py +0 -578
  531. sky/templates/kubernetes-ssh-jump.yml.j2 +0 -94
  532. sky/utils/db_utils.py +0 -100
  533. sky/utils/kubernetes/deploy_remote_cluster.sh +0 -308
  534. sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +0 -191
  535. skypilot_nightly-1.0.0.dev20250502.dist-info/METADATA +0 -361
  536. skypilot_nightly-1.0.0.dev20250502.dist-info/RECORD +0 -396
  537. skypilot_nightly-1.0.0.dev20250502.dist-info/top_level.txt +0 -1
  538. /sky/{clouds/service_catalog → catalog}/config.py +0 -0
  539. /sky/{benchmark → catalog/data_fetchers}/__init__.py +0 -0
  540. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_azure.py +0 -0
  541. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_fluidstack.py +0 -0
  542. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_ibm.py +0 -0
  543. /sky/{clouds/service_catalog/data_fetchers → client/cli}/__init__.py +0 -0
  544. /sky/dashboard/out/_next/static/{GWvVBSCS7FmUiVmjaL1a7 → 96_E2yl3QAiIJGOYCkSpB}/_ssgManifest.js +0 -0
  545. {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/entry_points.txt +0 -0
  546. {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/licenses/LICENSE +0 -0
sky/utils/schemas.py CHANGED
@@ -6,7 +6,9 @@ https://json-schema.org/
6
6
  import enum
7
7
  from typing import Any, Dict, List, Tuple
8
8
 
9
+ from sky.skylet import autostop_lib
9
10
  from sky.skylet import constants
11
+ from sky.utils import kubernetes_enums
10
12
 
11
13
 
12
14
  def _check_not_both_fields_present(field1: str, field2: str):
@@ -33,11 +35,114 @@ def _check_not_both_fields_present(field1: str, field2: str):
33
35
  }
34
36
 
35
37
 
38
+ _AUTOSTOP_SCHEMA = {
39
+ 'anyOf': [
40
+ {
41
+ # Use boolean to disable autostop completely, e.g.
42
+ # autostop: false
43
+ 'type': 'boolean',
44
+ },
45
+ {
46
+ # Shorthand to set idle_minutes by directly specifying, e.g.
47
+ # autostop: 5
48
+ 'anyOf': [{
49
+ 'type': 'string',
50
+ 'pattern': constants.TIME_PATTERN,
51
+ 'minimum': 0,
52
+ }, {
53
+ 'type': 'integer',
54
+ }]
55
+ },
56
+ {
57
+ 'type': 'object',
58
+ 'required': [],
59
+ 'additionalProperties': False,
60
+ 'properties': {
61
+ # TODO(luca): update field to use time units as well.
62
+ 'idle_minutes': {
63
+ 'type': 'integer',
64
+ 'minimum': 0,
65
+ },
66
+ 'down': {
67
+ 'type': 'boolean',
68
+ },
69
+ 'wait_for': {
70
+ 'type': 'string',
71
+ 'case_insensitive_enum':
72
+ autostop_lib.AutostopWaitFor.supported_modes(),
73
+ }
74
+ },
75
+ },
76
+ ],
77
+ }
78
+
79
+
80
+ # Note: This is similar to _get_infra_pattern()
81
+ # but without the wildcard patterns.
82
+ def _get_volume_infra_pattern():
83
+ # Building the regex pattern for the infra field
84
+ # Format: cloud[/region[/zone]] or wildcards or kubernetes context
85
+ # Match any cloud name (case insensitive)
86
+ all_clouds = list(constants.ALL_CLOUDS)
87
+ all_clouds.remove('kubernetes')
88
+ cloud_pattern = f'(?i:({"|".join(all_clouds)}))'
89
+
90
+ # Optional /region followed by optional /zone
91
+ # /[^/]+ matches a slash followed by any characters except slash (region or
92
+ # zone name)
93
+ # The outer (?:...)? makes the entire region/zone part optional
94
+ region_zone_pattern = '(?:/[^/]+(?:/[^/]+)?)?'
95
+
96
+ # Kubernetes specific pattern - matches:
97
+ # 1. Just the word "kubernetes" or "k8s" by itself
98
+ # 2. "k8s/" or "kubernetes/" followed by any context name (which may contain
99
+ # slashes)
100
+ kubernetes_pattern = '(?i:kubernetes|k8s)(?:/.+)?'
101
+
102
+ # Combine all patterns with alternation (|)
103
+ # ^ marks start of string, $ marks end of string
104
+ infra_pattern = (f'^(?:{cloud_pattern}{region_zone_pattern}|'
105
+ f'{kubernetes_pattern})$')
106
+ return infra_pattern
107
+
108
+
109
+ def _get_infra_pattern():
110
+ # Building the regex pattern for the infra field
111
+ # Format: cloud[/region[/zone]] or wildcards or kubernetes context
112
+ # Match any cloud name (case insensitive)
113
+ all_clouds = list(constants.ALL_CLOUDS)
114
+ all_clouds.remove('kubernetes')
115
+ cloud_pattern = f'(?i:({"|".join(all_clouds)}))'
116
+
117
+ # Optional /region followed by optional /zone
118
+ # /[^/]+ matches a slash followed by any characters except slash (region or
119
+ # zone name)
120
+ # The outer (?:...)? makes the entire region/zone part optional
121
+ region_zone_pattern = '(?:/[^/]+(?:/[^/]+)?)?'
122
+
123
+ # Wildcard patterns:
124
+ # 1. * - any cloud
125
+ # 2. */region - any cloud with specific region
126
+ # 3. */*/zone - any cloud, any region, specific zone
127
+ wildcard_cloud = '\\*' # Wildcard for cloud
128
+ wildcard_with_region = '(?:/[^/]+(?:/[^/]+)?)?'
129
+
130
+ # Kubernetes specific pattern - matches:
131
+ # 1. Just the word "kubernetes" or "k8s" by itself
132
+ # 2. "k8s/" or "kubernetes/" followed by any context name (which may contain
133
+ # slashes)
134
+ kubernetes_pattern = '(?i:kubernetes|k8s)(?:/.+)?'
135
+
136
+ # Combine all patterns with alternation (|)
137
+ # ^ marks start of string, $ marks end of string
138
+ infra_pattern = (f'^(?:{cloud_pattern}{region_zone_pattern}|'
139
+ f'{wildcard_cloud}{wildcard_with_region}|'
140
+ f'{kubernetes_pattern})$')
141
+ return infra_pattern
142
+
143
+
36
144
  def _get_single_resources_schema():
37
145
  """Schema for a single resource in a resources list."""
38
- # To avoid circular imports, only import when needed.
39
- # pylint: disable=import-outside-toplevel
40
- from sky.clouds import service_catalog
41
146
  return {
42
147
  '$schema': 'https://json-schema.org/draft/2020-12/schema',
43
148
  'type': 'object',
@@ -46,7 +151,7 @@ def _get_single_resources_schema():
46
151
  'properties': {
47
152
  'cloud': {
48
153
  'type': 'string',
49
- 'case_insensitive_enum': list(service_catalog.ALL_CLOUDS)
154
+ 'case_insensitive_enum': list(constants.ALL_CLOUDS)
50
155
  },
51
156
  'region': {
52
157
  'type': 'string',
@@ -54,6 +159,21 @@ def _get_single_resources_schema():
54
159
  'zone': {
55
160
  'type': 'string',
56
161
  },
162
+ 'infra': {
163
+ 'type': 'string',
164
+ 'description':
165
+ ('Infrastructure specification in format: '
166
+ 'cloud[/region[/zone]]. Use "*" as a wildcard.'),
167
+ # Pattern validates:
168
+ # 1. cloud[/region[/zone]] - e.g. "aws", "aws/us-east-1",
169
+ # "aws/us-east-1/us-east-1a"
170
+ # 2. Wildcard patterns - e.g. "*", "*/us-east-1",
171
+ # "*/*/us-east-1a", "aws/*/us-east-1a"
172
+ # 3. Kubernetes patterns - e.g. "kubernetes/my-context",
173
+ # "k8s/context-name",
174
+ # "k8s/aws:eks:us-east-1:123456789012:cluster/my-cluster"
175
+ 'pattern': _get_infra_pattern(),
176
+ },
57
177
  'cpus': {
58
178
  'anyOf': [{
59
179
  'type': 'string',
@@ -109,12 +229,54 @@ def _get_single_resources_schema():
109
229
  }
110
230
  }],
111
231
  },
232
+ 'volumes': {
233
+ 'type': 'array',
234
+ 'items': {
235
+ 'type': 'object',
236
+ 'properties': {
237
+ 'disk_size': {
238
+ 'anyOf': [{
239
+ 'type': 'string',
240
+ 'pattern': constants.MEMORY_SIZE_PATTERN,
241
+ }, {
242
+ 'type': 'integer',
243
+ }],
244
+ },
245
+ 'disk_tier': {
246
+ 'type': 'string',
247
+ },
248
+ 'path': {
249
+ 'type': 'string',
250
+ },
251
+ 'auto_delete': {
252
+ 'type': 'boolean',
253
+ },
254
+ 'storage_type': {
255
+ 'type': 'string',
256
+ },
257
+ 'name': {
258
+ 'type': 'string',
259
+ },
260
+ 'attach_mode': {
261
+ 'type': 'string',
262
+ },
263
+ },
264
+ },
265
+ },
112
266
  'disk_size': {
113
- 'type': 'integer',
267
+ 'anyOf': [{
268
+ 'type': 'string',
269
+ 'pattern': constants.MEMORY_SIZE_PATTERN,
270
+ }, {
271
+ 'type': 'integer',
272
+ }],
114
273
  },
115
274
  'disk_tier': {
116
275
  'type': 'string',
117
276
  },
277
+ 'network_tier': {
278
+ 'type': 'string',
279
+ },
118
280
  'ports': {
119
281
  'anyOf': [{
120
282
  'type': 'string',
@@ -155,6 +317,9 @@ def _get_single_resources_schema():
155
317
  }
156
318
  }
157
319
  },
320
+ '_no_missing_accel_warnings': {
321
+ 'type': 'boolean'
322
+ },
158
323
  'image_id': {
159
324
  'anyOf': [{
160
325
  'type': 'string',
@@ -165,6 +330,12 @@ def _get_single_resources_schema():
165
330
  'type': 'null',
166
331
  }]
167
332
  },
333
+ 'autostop': _AUTOSTOP_SCHEMA,
334
+ 'priority': {
335
+ 'type': 'integer',
336
+ 'minimum': constants.MIN_PRIORITY,
337
+ 'maximum': constants.MAX_PRIORITY,
338
+ },
168
339
  # The following fields are for internal use only. Should not be
169
340
  # specified in the task config.
170
341
  '_docker_login_config': {
@@ -254,9 +425,71 @@ def get_resources_schema():
254
425
  }
255
426
 
256
427
 
428
+ def get_volume_schema():
429
+ # pylint: disable=import-outside-toplevel
430
+ from sky.utils import volume
431
+
432
+ return {
433
+ '$schema': 'https://json-schema.org/draft/2020-12/schema',
434
+ 'type': 'object',
435
+ 'required': ['name', 'type'],
436
+ 'additionalProperties': False,
437
+ 'properties': {
438
+ 'name': {
439
+ 'type': 'string',
440
+ },
441
+ 'type': {
442
+ 'type': 'string',
443
+ 'case_sensitive_enum': [
444
+ type.value for type in volume.VolumeType
445
+ ],
446
+ },
447
+ 'infra': {
448
+ 'type': 'string',
449
+ 'description': ('Infrastructure specification in format: '
450
+ 'cloud[/region[/zone]].'),
451
+ # Pattern validates:
452
+ # 1. cloud[/region[/zone]] - e.g. "aws", "aws/us-east-1",
453
+ # "aws/us-east-1/us-east-1a"
454
+ # 2. Kubernetes patterns - e.g. "kubernetes/my-context",
455
+ # "k8s/context-name",
456
+ # "k8s/aws:eks:us-east-1:123456789012:cluster/my-cluster"
457
+ 'pattern': _get_volume_infra_pattern(),
458
+ },
459
+ 'size': {
460
+ 'type': 'string',
461
+ 'pattern': constants.MEMORY_SIZE_PATTERN,
462
+ },
463
+ 'use_existing': {
464
+ 'type': 'boolean',
465
+ },
466
+ 'config': {
467
+ 'type': 'object',
468
+ 'required': [],
469
+ 'properties': {
470
+ 'storage_class_name': {
471
+ 'type': 'string',
472
+ },
473
+ 'access_mode': {
474
+ 'type': 'string',
475
+ 'case_sensitive_enum': [
476
+ type.value for type in volume.VolumeAccessMode
477
+ ],
478
+ },
479
+ 'namespace': {
480
+ 'type': 'string',
481
+ },
482
+ },
483
+ },
484
+ **_LABELS_SCHEMA,
485
+ }
486
+ }
487
+
488
+
257
489
  def get_storage_schema():
258
490
  # pylint: disable=import-outside-toplevel
259
491
  from sky.data import storage
492
+
260
493
  return {
261
494
  '$schema': 'https://json-schema.org/draft/2020-12/schema',
262
495
  'type': 'object',
@@ -292,6 +525,28 @@ def get_storage_schema():
292
525
  mode.value for mode in storage.StorageMode
293
526
  ]
294
527
  },
528
+ 'config': {
529
+ 'type': 'object',
530
+ 'properties': {
531
+ 'disk_size': {
532
+ 'anyOf': [{
533
+ 'type': 'string',
534
+ 'pattern': constants.MEMORY_SIZE_PATTERN,
535
+ }, {
536
+ 'type': 'integer',
537
+ }],
538
+ },
539
+ 'disk_tier': {
540
+ 'type': 'string',
541
+ },
542
+ 'storage_type': {
543
+ 'type': 'string',
544
+ },
545
+ 'attach_mode': {
546
+ 'type': 'string',
547
+ },
548
+ },
549
+ },
295
550
  '_is_sky_managed': {
296
551
  'type': 'boolean',
297
552
  },
@@ -305,6 +560,52 @@ def get_storage_schema():
305
560
  }
306
561
 
307
562
 
563
+ def get_volume_mount_schema():
564
+ """Schema for volume mount object in task config (internal use only)."""
565
+ return {
566
+ '$schema': 'https://json-schema.org/draft/2020-12/schema',
567
+ 'type': 'object',
568
+ 'required': [],
569
+ 'additionalProperties': False,
570
+ 'properties': {
571
+ 'path': {
572
+ 'type': 'string',
573
+ },
574
+ 'volume_name': {
575
+ 'type': 'string',
576
+ },
577
+ 'is_ephemeral': {
578
+ 'type': 'boolean',
579
+ },
580
+ 'volume_config': {
581
+ 'type': 'object',
582
+ 'required': [],
583
+ 'additionalProperties': True,
584
+ 'properties': {
585
+ 'cloud': {
586
+ 'type': 'string',
587
+ 'case_insensitive_enum': list(constants.ALL_CLOUDS)
588
+ },
589
+ 'region': {
590
+ 'anyOf': [{
591
+ 'type': 'string'
592
+ }, {
593
+ 'type': 'null'
594
+ }]
595
+ },
596
+ 'zone': {
597
+ 'anyOf': [{
598
+ 'type': 'string'
599
+ }, {
600
+ 'type': 'null'
601
+ }]
602
+ },
603
+ },
604
+ }
605
+ }
606
+ }
607
+
608
+
308
609
  def get_service_schema():
309
610
  """Schema for top-level `service:` field (for SkyServe)."""
310
611
  # To avoid circular imports, only import when needed.
@@ -314,7 +615,6 @@ def get_service_schema():
314
615
  return {
315
616
  '$schema': 'https://json-schema.org/draft/2020-12/schema',
316
617
  'type': 'object',
317
- 'required': ['readiness_probe'],
318
618
  'additionalProperties': False,
319
619
  'properties': {
320
620
  'readiness_probe': {
@@ -350,6 +650,9 @@ def get_service_schema():
350
650
  }
351
651
  }]
352
652
  },
653
+ 'pool': {
654
+ 'type': 'boolean',
655
+ },
353
656
  'replica_policy': {
354
657
  'type': 'object',
355
658
  'required': ['min_replicas'],
@@ -368,8 +671,24 @@ def get_service_schema():
368
671
  'minimum': 0,
369
672
  },
370
673
  'target_qps_per_replica': {
371
- 'type': 'number',
372
- 'minimum': 0,
674
+ 'anyOf': [
675
+ {
676
+ 'type': 'number',
677
+ 'minimum': 0,
678
+ },
679
+ {
680
+ 'type': 'object',
681
+ 'patternProperties': {
682
+ # Pattern for accelerator types like
683
+ # "H100:1", "A100:1", "H100", "A100"
684
+ '^[A-Z0-9]+(?::[0-9]+)?$': {
685
+ 'type': 'number',
686
+ 'minimum': 0,
687
+ }
688
+ },
689
+ 'additionalProperties': False,
690
+ }
691
+ ]
373
692
  },
374
693
  'dynamic_ondemand_fallback': {
375
694
  'type': 'boolean',
@@ -397,6 +716,9 @@ def get_service_schema():
397
716
  'replicas': {
398
717
  'type': 'integer',
399
718
  },
719
+ 'workers': {
720
+ 'type': 'integer',
721
+ },
400
722
  'load_balancing_policy': {
401
723
  'type': 'string',
402
724
  'case_insensitive_enum': list(
@@ -472,23 +794,6 @@ def _filter_schema(schema: dict, keys_to_keep: List[Tuple[str, ...]]) -> dict:
472
794
  return new_schema
473
795
 
474
796
 
475
- def _experimental_task_schema() -> dict:
476
- # TODO: experimental.config_overrides has been deprecated in favor of the
477
- # top-level `config` field. Remove in v0.11.0.
478
- config_override_schema = _filter_schema(
479
- get_config_schema(), constants.OVERRIDEABLE_CONFIG_KEYS_IN_TASK)
480
- return {
481
- 'experimental': {
482
- 'type': 'object',
483
- 'required': [],
484
- 'additionalProperties': False,
485
- 'properties': {
486
- 'config_overrides': config_override_schema,
487
- }
488
- }
489
- }
490
-
491
-
492
797
  def get_task_schema():
493
798
  return {
494
799
  '$schema': 'https://json-schema.org/draft/2020-12/schema',
@@ -500,7 +805,21 @@ def get_task_schema():
500
805
  'type': 'string',
501
806
  },
502
807
  'workdir': {
503
- 'type': 'string',
808
+ 'anyOf': [{
809
+ 'type': 'string',
810
+ }, {
811
+ 'type': 'object',
812
+ 'required': ['url'],
813
+ 'additionalProperties': False,
814
+ 'properties': {
815
+ 'url': {
816
+ 'type': 'string',
817
+ },
818
+ 'ref': {
819
+ 'type': 'string',
820
+ },
821
+ },
822
+ }],
504
823
  },
505
824
  'event_callback': {
506
825
  'type': 'string',
@@ -520,6 +839,9 @@ def get_task_schema():
520
839
  'service': {
521
840
  'type': 'object',
522
841
  },
842
+ 'pool': {
843
+ 'type': 'object',
844
+ },
523
845
  'setup': {
524
846
  'type': 'string',
525
847
  },
@@ -537,6 +859,17 @@ def get_task_schema():
537
859
  },
538
860
  'additionalProperties': False,
539
861
  },
862
+ 'secrets': {
863
+ 'type': 'object',
864
+ 'required': [],
865
+ 'patternProperties': {
866
+ # Checks secret keys are valid env var names.
867
+ '^[a-zA-Z_][a-zA-Z0-9_]*$': {
868
+ 'type': ['string', 'null']
869
+ }
870
+ },
871
+ 'additionalProperties': False,
872
+ },
540
873
  # inputs and outputs are experimental
541
874
  'inputs': {
542
875
  'type': 'object',
@@ -560,7 +893,17 @@ def get_task_schema():
560
893
  'config': _filter_schema(
561
894
  get_config_schema(),
562
895
  constants.OVERRIDEABLE_CONFIG_KEYS_IN_TASK),
563
- **_experimental_task_schema(),
896
+ # volumes config is validated separately using get_volume_schema
897
+ 'volumes': {
898
+ 'type': 'object',
899
+ },
900
+ 'volume_mounts': {
901
+ 'type': 'array',
902
+ 'items': get_volume_mount_schema(),
903
+ },
904
+ '_metadata': {
905
+ 'type': 'object',
906
+ },
564
907
  }
565
908
  }
566
909
 
@@ -644,7 +987,7 @@ _LABELS_SCHEMA = {
644
987
  }
645
988
  }
646
989
 
647
- _PRORPERTY_NAME_OR_CLUSTER_NAME_TO_PROPERTY = {
990
+ _PROPERTY_NAME_OR_CLUSTER_NAME_TO_PROPERTY = {
648
991
  'oneOf': [
649
992
  {
650
993
  'type': 'string'
@@ -685,11 +1028,21 @@ class RemoteIdentityOptions(enum.Enum):
685
1028
 
686
1029
  def get_default_remote_identity(cloud: str) -> str:
687
1030
  """Get the default remote identity for the specified cloud."""
688
- if cloud == 'kubernetes':
1031
+ if cloud in ('kubernetes', 'ssh'):
689
1032
  return RemoteIdentityOptions.SERVICE_ACCOUNT.value
690
1033
  return RemoteIdentityOptions.LOCAL_CREDENTIALS.value
691
1034
 
692
1035
 
1036
+ _CAPABILITIES_SCHEMA = {
1037
+ 'capabilities': {
1038
+ 'type': 'array',
1039
+ 'items': {
1040
+ 'type': 'string',
1041
+ 'case_insensitive_enum': ['compute', 'storage']
1042
+ },
1043
+ }
1044
+ }
1045
+
693
1046
  _REMOTE_IDENTITY_SCHEMA = {
694
1047
  'remote_identity': {
695
1048
  'type': 'string',
@@ -712,66 +1065,157 @@ _REMOTE_IDENTITY_SCHEMA_KUBERNETES = {
712
1065
  },
713
1066
  }
714
1067
 
1068
+ _CONTEXT_CONFIG_SCHEMA_MINIMAL = {
1069
+ 'pod_config': {
1070
+ 'type': 'object',
1071
+ 'required': [],
1072
+ # Allow arbitrary keys since validating pod spec is hard
1073
+ 'additionalProperties': True,
1074
+ },
1075
+ 'provision_timeout': {
1076
+ 'type': 'integer',
1077
+ },
1078
+ 'custom_metadata': {
1079
+ 'type': 'object',
1080
+ 'required': [],
1081
+ # Allow arbitrary keys since validating metadata is hard
1082
+ 'additionalProperties': True,
1083
+ # Disallow 'name' and 'namespace' keys in this dict
1084
+ 'not': {
1085
+ 'anyOf': [{
1086
+ 'required': ['name']
1087
+ }, {
1088
+ 'required': ['namespace']
1089
+ }]
1090
+ },
1091
+ },
1092
+ }
715
1093
 
716
- def get_config_schema():
717
- # pylint: disable=import-outside-toplevel
718
- from sky.clouds import service_catalog
719
- from sky.utils import kubernetes_enums
720
-
721
- resources_schema = {
722
- k: v
723
- for k, v in get_resources_schema().items()
724
- # Validation may fail if $schema is included.
725
- if k != '$schema'
726
- }
727
- resources_schema['properties'].pop('ports')
728
- autostop_schema = {
729
- 'anyOf': [
730
- {
731
- # Use boolean to disable autostop completely, e.g.
732
- # autostop: false
733
- 'type': 'boolean',
734
- },
735
- {
736
- 'type': 'object',
737
- 'required': [],
738
- 'additionalProperties': False,
739
- 'properties': {
740
- 'idle_minutes': {
741
- 'type': 'integer',
742
- 'minimum': 0,
743
- },
744
- 'down': {
745
- 'type': 'boolean',
746
- },
747
- },
748
- },
1094
+ _CONTEXT_CONFIG_SCHEMA_KUBERNETES = {
1095
+ # TODO(kevin): Remove 'networking' in v0.13.0.
1096
+ 'networking': {
1097
+ 'type': 'string',
1098
+ 'case_insensitive_enum': [
1099
+ type.value for type in kubernetes_enums.KubernetesNetworkingMode
749
1100
  ],
750
- }
751
- controller_resources_schema = {
1101
+ },
1102
+ 'ports': {
1103
+ 'type': 'string',
1104
+ 'case_insensitive_enum': [
1105
+ type.value for type in kubernetes_enums.KubernetesPortMode
1106
+ ],
1107
+ },
1108
+ **_CONTEXT_CONFIG_SCHEMA_MINIMAL,
1109
+ 'autoscaler': {
1110
+ 'type': 'string',
1111
+ 'case_insensitive_enum': [
1112
+ type.value for type in kubernetes_enums.KubernetesAutoscalerType
1113
+ ],
1114
+ },
1115
+ 'high_availability': {
752
1116
  'type': 'object',
753
1117
  'required': [],
754
1118
  'additionalProperties': False,
755
1119
  'properties': {
756
- 'controller': {
757
- 'type': 'object',
758
- 'required': [],
759
- 'additionalProperties': False,
760
- 'properties': {
761
- 'resources': resources_schema,
762
- 'high_availability': {
763
- 'type': 'boolean',
764
- },
765
- 'autostop': autostop_schema,
766
- }
767
- },
768
- 'bucket': {
1120
+ 'storage_class_name': {
769
1121
  'type': 'string',
770
- 'pattern': '^(https|s3|gs|r2|cos)://.+',
771
- 'required': [],
772
1122
  }
773
- }
1123
+ },
1124
+ },
1125
+ 'kueue': {
1126
+ 'type': 'object',
1127
+ 'required': [],
1128
+ 'additionalProperties': False,
1129
+ 'properties': {
1130
+ 'local_queue_name': {
1131
+ 'type': 'string',
1132
+ },
1133
+ },
1134
+ },
1135
+ 'dws': {
1136
+ 'type': 'object',
1137
+ 'required': [],
1138
+ 'additionalProperties': False,
1139
+ 'properties': {
1140
+ 'enabled': {
1141
+ 'type': 'boolean',
1142
+ },
1143
+ # Only used when Kueue is enabled.
1144
+ 'max_run_duration': {
1145
+ 'anyOf': [{
1146
+ 'type': 'string',
1147
+ 'pattern': constants.TIME_PATTERN,
1148
+ }, {
1149
+ 'type': 'integer',
1150
+ }]
1151
+ },
1152
+ },
1153
+ },
1154
+ 'remote_identity': {
1155
+ 'type': 'string',
1156
+ },
1157
+ 'post_provision_runcmd': {
1158
+ 'type': 'array',
1159
+ 'items': {
1160
+ 'type': 'string'
1161
+ },
1162
+ }
1163
+ }
1164
+
1165
+
1166
+ def get_config_schema():
1167
+ # pylint: disable=import-outside-toplevel
1168
+ from sky.server import daemons
1169
+
1170
+ resources_schema = {
1171
+ k: v
1172
+ for k, v in get_resources_schema().items()
1173
+ # Validation may fail if $schema is included.
1174
+ if k != '$schema'
774
1175
  }
1176
+ resources_schema['properties'].pop('ports')
1177
+
1178
+ def _get_controller_schema():
1179
+ return {
1180
+ 'type': 'object',
1181
+ 'required': [],
1182
+ 'additionalProperties': False,
1183
+ 'properties': {
1184
+ 'controller': {
1185
+ 'type': 'object',
1186
+ 'required': [],
1187
+ 'additionalProperties': False,
1188
+ 'properties': {
1189
+ 'resources': resources_schema,
1190
+ 'high_availability': {
1191
+ 'type': 'boolean',
1192
+ 'default': False,
1193
+ },
1194
+ 'autostop': _AUTOSTOP_SCHEMA,
1195
+ 'consolidation_mode': {
1196
+ 'type': 'boolean',
1197
+ 'default': False,
1198
+ },
1199
+ 'controller_logs_gc_retention_hours': {
1200
+ 'type': 'integer',
1201
+ },
1202
+ 'task_logs_gc_retention_hours': {
1203
+ 'type': 'integer',
1204
+ },
1205
+ },
1206
+ },
1207
+ 'bucket': {
1208
+ 'type': 'string',
1209
+ 'pattern': '^(https|s3|gs|r2|cos)://.+',
1210
+ 'required': [],
1211
+ },
1212
+ 'force_disable_cloud_bucket': {
1213
+ 'type': 'boolean',
1214
+ 'default': False,
1215
+ },
1216
+ }
1217
+ }
1218
+
775
1219
  cloud_configs = {
776
1220
  'aws': {
777
1221
  'type': 'object',
@@ -790,8 +1234,11 @@ def get_config_schema():
790
1234
  'disk_encrypted': {
791
1235
  'type': 'boolean',
792
1236
  },
1237
+ 'ssh_user': {
1238
+ 'type': 'string',
1239
+ },
793
1240
  'security_group_name':
794
- (_PRORPERTY_NAME_OR_CLUSTER_NAME_TO_PROPERTY),
1241
+ (_PROPERTY_NAME_OR_CLUSTER_NAME_TO_PROPERTY),
795
1242
  'vpc_name': {
796
1243
  'oneOf': [{
797
1244
  'type': 'string',
@@ -799,6 +1246,23 @@ def get_config_schema():
799
1246
  'type': 'null',
800
1247
  }],
801
1248
  },
1249
+ 'use_ssm': {
1250
+ 'type': 'boolean',
1251
+ },
1252
+ 'post_provision_runcmd': {
1253
+ 'type': 'array',
1254
+ 'items': {
1255
+ 'oneOf': [{
1256
+ 'type': 'string'
1257
+ }, {
1258
+ 'type': 'array',
1259
+ 'items': {
1260
+ 'type': 'string'
1261
+ }
1262
+ }]
1263
+ },
1264
+ },
1265
+ **_CAPABILITIES_SCHEMA,
802
1266
  **_LABELS_SCHEMA,
803
1267
  **_NETWORK_CONFIG_SCHEMA,
804
1268
  },
@@ -837,6 +1301,12 @@ def get_config_schema():
837
1301
  'enable_gvnic': {
838
1302
  'type': 'boolean'
839
1303
  },
1304
+ 'enable_gpu_direct': {
1305
+ 'type': 'boolean'
1306
+ },
1307
+ 'placement_policy': {
1308
+ 'type': 'string',
1309
+ },
840
1310
  'vpc_name': {
841
1311
  'oneOf': [
842
1312
  {
@@ -850,6 +1320,7 @@ def get_config_schema():
850
1320
  }
851
1321
  ],
852
1322
  },
1323
+ **_CAPABILITIES_SCHEMA,
853
1324
  **_LABELS_SCHEMA,
854
1325
  **_NETWORK_CONFIG_SCHEMA,
855
1326
  },
@@ -874,102 +1345,165 @@ def get_config_schema():
874
1345
  'additionalProperties': False,
875
1346
  'properties': {
876
1347
  'allowed_contexts': {
1348
+ 'oneOf': [{
1349
+ 'type': 'array',
1350
+ 'items': {
1351
+ 'type': 'string',
1352
+ },
1353
+ }, {
1354
+ 'type': 'string',
1355
+ 'pattern': '^all$'
1356
+ }]
1357
+ },
1358
+ 'context_configs': {
1359
+ 'type': 'object',
1360
+ 'required': [],
1361
+ 'properties': {},
1362
+ # Properties are kubernetes context names.
1363
+ 'additionalProperties': {
1364
+ 'type': 'object',
1365
+ 'required': [],
1366
+ 'additionalProperties': False,
1367
+ 'properties': {
1368
+ **_CONTEXT_CONFIG_SCHEMA_KUBERNETES,
1369
+ },
1370
+ },
1371
+ },
1372
+ **_CONTEXT_CONFIG_SCHEMA_KUBERNETES,
1373
+ }
1374
+ },
1375
+ 'ssh': {
1376
+ 'type': 'object',
1377
+ 'required': [],
1378
+ 'additionalProperties': False,
1379
+ 'properties': {
1380
+ 'allowed_node_pools': {
877
1381
  'type': 'array',
878
1382
  'items': {
879
1383
  'type': 'string',
880
1384
  },
881
1385
  },
882
- 'networking': {
883
- 'type': 'string',
884
- 'case_insensitive_enum': [
885
- type.value
886
- for type in kubernetes_enums.KubernetesNetworkingMode
887
- ]
888
- },
889
- 'ports': {
890
- 'type': 'string',
891
- 'case_insensitive_enum': [
892
- type.value
893
- for type in kubernetes_enums.KubernetesPortMode
894
- ]
895
- },
896
- 'pod_config': {
1386
+ 'context_configs': {
897
1387
  'type': 'object',
898
1388
  'required': [],
899
- # Allow arbitrary keys since validating pod spec is hard
900
- 'additionalProperties': True,
1389
+ 'properties': {},
1390
+ # Properties are ssh cluster names, which are the
1391
+ # kubernetes context names without `ssh-` prefix.
1392
+ 'additionalProperties': {
1393
+ 'type': 'object',
1394
+ 'required': [],
1395
+ 'additionalProperties': False,
1396
+ 'properties': {
1397
+ **_CONTEXT_CONFIG_SCHEMA_MINIMAL,
1398
+ },
1399
+ },
901
1400
  },
902
- 'custom_metadata': {
1401
+ **_CONTEXT_CONFIG_SCHEMA_MINIMAL,
1402
+ }
1403
+ },
1404
+ 'oci': {
1405
+ 'type': 'object',
1406
+ 'required': [],
1407
+ 'properties': {
1408
+ 'region_configs': {
903
1409
  'type': 'object',
904
1410
  'required': [],
905
- # Allow arbitrary keys since validating metadata is hard
906
- 'additionalProperties': True,
907
- # Disallow 'name' and 'namespace' keys in this dict
908
- 'not': {
909
- 'anyOf': [{
910
- 'required': ['name']
911
- }, {
912
- 'required': ['namespace']
913
- }]
914
- }
1411
+ 'properties': {},
1412
+ # Properties are either 'default' or a region name.
1413
+ 'additionalProperties': {
1414
+ 'type': 'object',
1415
+ 'required': [],
1416
+ 'additionalProperties': False,
1417
+ 'properties': {
1418
+ 'compartment_ocid': {
1419
+ 'type': 'string',
1420
+ },
1421
+ 'image_tag_general': {
1422
+ 'type': 'string',
1423
+ },
1424
+ 'image_tag_gpu': {
1425
+ 'type': 'string',
1426
+ },
1427
+ 'vcn_ocid': {
1428
+ 'type': 'string',
1429
+ },
1430
+ 'vcn_subnet': {
1431
+ 'type': 'string',
1432
+ },
1433
+ }
1434
+ },
1435
+ }
1436
+ },
1437
+ },
1438
+ 'nebius': {
1439
+ 'type': 'object',
1440
+ 'required': [],
1441
+ 'properties': {
1442
+ **_NETWORK_CONFIG_SCHEMA, 'use_static_ip_address': {
1443
+ 'type': 'boolean',
915
1444
  },
916
- 'provision_timeout': {
917
- 'type': 'integer',
1445
+ 'tenant_id': {
1446
+ 'type': 'string',
918
1447
  },
919
- 'autoscaler': {
1448
+ 'domain': {
920
1449
  'type': 'string',
921
- 'case_insensitive_enum': [
922
- type.value
923
- for type in kubernetes_enums.KubernetesAutoscalerType
924
- ]
925
1450
  },
926
- 'high_availability': {
1451
+ 'region_configs': {
927
1452
  'type': 'object',
928
1453
  'required': [],
929
- 'additionalProperties': False,
930
- 'properties': {
931
- 'storage_class_name': {
932
- 'type': 'string',
933
- }
1454
+ 'properties': {},
1455
+ 'additionalProperties': {
1456
+ 'type': 'object',
1457
+ 'required': [],
1458
+ 'additionalProperties': False,
1459
+ 'properties': {
1460
+ 'project_id': {
1461
+ 'type': 'string',
1462
+ },
1463
+ 'fabric': {
1464
+ 'type': 'string',
1465
+ },
1466
+ 'filesystems': {
1467
+ 'type': 'array',
1468
+ 'items': {
1469
+ 'type': 'object',
1470
+ 'additionalProperties': False,
1471
+ 'properties': {
1472
+ 'filesystem_id': {
1473
+ 'type': 'string',
1474
+ },
1475
+ 'attach_mode': {
1476
+ 'type': 'string',
1477
+ 'case_sensitive_enum': [
1478
+ 'READ_WRITE', 'READ_ONLY'
1479
+ ]
1480
+ },
1481
+ 'mount_path': {
1482
+ 'type': 'string',
1483
+ }
1484
+ }
1485
+ }
1486
+ },
1487
+ },
934
1488
  }
935
- },
936
- }
937
- },
938
- 'oci': {
939
- 'type': 'object',
940
- 'required': [],
941
- 'properties': {},
942
- # Properties are either 'default' or a region name.
943
- 'additionalProperties': {
944
- 'type': 'object',
945
- 'required': [],
946
- 'additionalProperties': False,
947
- 'properties': {
948
- 'compartment_ocid': {
949
- 'type': 'string',
950
- },
951
- 'image_tag_general': {
952
- 'type': 'string',
953
- },
954
- 'image_tag_gpu': {
955
- 'type': 'string',
956
- },
957
- 'vcn_ocid': {
958
- 'type': 'string',
959
- },
960
- 'vcn_subnet': {
961
- 'type': 'string',
962
- },
963
1489
  }
964
1490
  },
965
- },
1491
+ }
966
1492
  }
967
1493
 
968
1494
  admin_policy_schema = {
969
1495
  'type': 'string',
970
- # Check regex to be a valid python module path
971
- 'pattern': (r'^[a-zA-Z_][a-zA-Z0-9_]*'
972
- r'(\.[a-zA-Z_][a-zA-Z0-9_]*)+$'),
1496
+ 'anyOf': [
1497
+ {
1498
+ # Check regex to be a valid python module path
1499
+ 'pattern': (r'^[a-zA-Z_][a-zA-Z0-9_]*'
1500
+ r'(\.[a-zA-Z_][a-zA-Z0-9_]*)+$'),
1501
+ },
1502
+ {
1503
+ # Check for valid HTTP/HTTPS URL
1504
+ 'pattern': r'^https?://.*$',
1505
+ }
1506
+ ]
973
1507
  }
974
1508
 
975
1509
  allowed_clouds = {
@@ -978,7 +1512,7 @@ def get_config_schema():
978
1512
  'items': {
979
1513
  'type': 'string',
980
1514
  'case_insensitive_enum':
981
- (list(service_catalog.ALL_CLOUDS) + ['cloudflare'])
1515
+ (list(constants.ALL_CLOUDS) + ['cloudflare'])
982
1516
  }
983
1517
  }
984
1518
 
@@ -1010,6 +1544,27 @@ def get_config_schema():
1010
1544
  }
1011
1545
  }
1012
1546
 
1547
+ daemon_config = {
1548
+ 'type': 'object',
1549
+ 'required': [],
1550
+ 'properties': {
1551
+ 'log_level': {
1552
+ 'type': 'string',
1553
+ 'case_insensitive_enum': ['DEBUG', 'INFO', 'WARNING'],
1554
+ },
1555
+ }
1556
+ }
1557
+
1558
+ daemon_schema: Dict[str, Any] = {
1559
+ 'type': 'object',
1560
+ 'required': [],
1561
+ 'additionalProperties': False,
1562
+ 'properties': {}
1563
+ }
1564
+
1565
+ for daemon in daemons.INTERNAL_REQUEST_DAEMONS:
1566
+ daemon_schema['properties'][daemon.id] = daemon_config
1567
+
1013
1568
  api_server = {
1014
1569
  'type': 'object',
1015
1570
  'required': [],
@@ -1020,14 +1575,241 @@ def get_config_schema():
1020
1575
  # Apply validation for URL
1021
1576
  'pattern': r'^https?://.*$',
1022
1577
  },
1578
+ 'service_account_token': {
1579
+ 'anyOf': [
1580
+ {
1581
+ 'type': 'string',
1582
+ # Validate that token starts with sky_ prefix
1583
+ 'pattern': r'^sky_.+$',
1584
+ },
1585
+ {
1586
+ 'type': 'null',
1587
+ }
1588
+ ]
1589
+ },
1590
+ 'requests_retention_hours': {
1591
+ 'type': 'integer',
1592
+ },
1593
+ 'cluster_event_retention_hours': {
1594
+ 'type': 'number',
1595
+ },
1596
+ 'cluster_debug_event_retention_hours': {
1597
+ 'type': 'number',
1598
+ },
1023
1599
  }
1024
1600
  }
1025
1601
 
1602
+ rbac_schema = {
1603
+ 'type': 'object',
1604
+ 'required': [],
1605
+ 'additionalProperties': False,
1606
+ 'properties': {
1607
+ 'default_role': {
1608
+ 'type': 'string',
1609
+ 'case_insensitive_enum': ['admin', 'user']
1610
+ },
1611
+ },
1612
+ }
1613
+
1614
+ workspace_schema = {'type': 'string'}
1615
+
1616
+ allowed_workspace_cloud_names = list(constants.ALL_CLOUDS) + ['cloudflare']
1617
+ # Create pattern for not supported clouds, i.e.
1618
+ # all clouds except aws, gcp, kubernetes, ssh, nebius
1619
+ not_supported_clouds = [
1620
+ cloud for cloud in allowed_workspace_cloud_names
1621
+ if cloud.lower() not in ['aws', 'gcp', 'kubernetes', 'ssh', 'nebius']
1622
+ ]
1623
+ not_supported_cloud_regex = '|'.join(not_supported_clouds)
1624
+ workspaces_schema = {
1625
+ 'type': 'object',
1626
+ 'required': [],
1627
+ # each key is a workspace name
1628
+ 'additionalProperties': {
1629
+ 'type': 'object',
1630
+ 'additionalProperties': False,
1631
+ 'patternProperties': {
1632
+ # Pattern for clouds with no workspace-specific config -
1633
+ # only allow 'disabled' property.
1634
+ f'^({not_supported_cloud_regex})$': {
1635
+ 'type': 'object',
1636
+ 'additionalProperties': False,
1637
+ 'properties': {
1638
+ 'disabled': {
1639
+ 'type': 'boolean'
1640
+ }
1641
+ },
1642
+ },
1643
+ },
1644
+ 'properties': {
1645
+ # Explicit definition for GCP allows both project_id and
1646
+ # disabled
1647
+ 'private': {
1648
+ 'type': 'boolean',
1649
+ },
1650
+ 'allowed_users': {
1651
+ 'type': 'array',
1652
+ 'items': {
1653
+ 'type': 'string',
1654
+ },
1655
+ },
1656
+ 'gcp': {
1657
+ 'type': 'object',
1658
+ 'properties': {
1659
+ 'project_id': {
1660
+ 'type': 'string'
1661
+ },
1662
+ 'disabled': {
1663
+ 'type': 'boolean'
1664
+ },
1665
+ **_CAPABILITIES_SCHEMA,
1666
+ },
1667
+ 'additionalProperties': False,
1668
+ },
1669
+ 'aws': {
1670
+ 'type': 'object',
1671
+ 'properties': {
1672
+ 'profile': {
1673
+ 'type': 'string'
1674
+ },
1675
+ 'disabled': {
1676
+ 'type': 'boolean'
1677
+ },
1678
+ **_CAPABILITIES_SCHEMA,
1679
+ },
1680
+ 'additionalProperties': False,
1681
+ },
1682
+ 'ssh': {
1683
+ 'type': 'object',
1684
+ 'required': [],
1685
+ 'properties': {
1686
+ 'allowed_node_pools': {
1687
+ 'type': 'array',
1688
+ 'items': {
1689
+ 'type': 'string',
1690
+ },
1691
+ },
1692
+ 'disabled': {
1693
+ 'type': 'boolean'
1694
+ },
1695
+ },
1696
+ 'additionalProperties': False,
1697
+ },
1698
+ 'kubernetes': {
1699
+ 'type': 'object',
1700
+ 'required': [],
1701
+ 'properties': {
1702
+ 'allowed_contexts': {
1703
+ 'oneOf': [{
1704
+ 'type': 'array',
1705
+ 'items': {
1706
+ 'type': 'string',
1707
+ },
1708
+ }, {
1709
+ 'type': 'string',
1710
+ 'pattern': '^all$'
1711
+ }]
1712
+ },
1713
+ 'disabled': {
1714
+ 'type': 'boolean'
1715
+ },
1716
+ },
1717
+ 'additionalProperties': False,
1718
+ },
1719
+ 'nebius': {
1720
+ 'type': 'object',
1721
+ 'required': [],
1722
+ 'properties': {
1723
+ 'credentials_file_path': {
1724
+ 'type': 'string',
1725
+ },
1726
+ 'tenant_id': {
1727
+ 'type': 'string',
1728
+ },
1729
+ 'domain': {
1730
+ 'type': 'string',
1731
+ },
1732
+ 'disabled': {
1733
+ 'type': 'boolean'
1734
+ },
1735
+ },
1736
+ 'additionalProperties': False,
1737
+ },
1738
+ },
1739
+ },
1740
+ }
1741
+
1742
+ provision_configs = {
1743
+ 'type': 'object',
1744
+ 'required': [],
1745
+ 'additionalProperties': False,
1746
+ 'properties': {
1747
+ 'ssh_timeout': {
1748
+ 'type': 'integer',
1749
+ 'minimum': 1,
1750
+ },
1751
+ }
1752
+ }
1753
+
1754
+ logs_schema = {
1755
+ 'type': 'object',
1756
+ 'required': ['store'],
1757
+ 'additionalProperties': False,
1758
+ 'properties': {
1759
+ 'store': {
1760
+ 'type': 'string',
1761
+ 'case_insensitive_enum': ['gcp', 'aws'],
1762
+ },
1763
+ 'gcp': {
1764
+ 'type': 'object',
1765
+ 'properties': {
1766
+ 'project_id': {
1767
+ 'type': 'string',
1768
+ },
1769
+ 'credentials_file': {
1770
+ 'type': 'string',
1771
+ },
1772
+ 'additional_labels': {
1773
+ 'type': 'object',
1774
+ 'additionalProperties': {
1775
+ 'type': 'string',
1776
+ },
1777
+ },
1778
+ },
1779
+ },
1780
+ 'aws': {
1781
+ 'type': 'object',
1782
+ 'properties': {
1783
+ 'region': {
1784
+ 'type': 'string',
1785
+ },
1786
+ 'credentials_file': {
1787
+ 'type': 'string',
1788
+ },
1789
+ 'log_group_name': {
1790
+ 'type': 'string',
1791
+ },
1792
+ 'log_stream_prefix': {
1793
+ 'type': 'string',
1794
+ },
1795
+ 'auto_create_group': {
1796
+ 'type': 'boolean',
1797
+ },
1798
+ 'additional_tags': {
1799
+ 'type': 'object',
1800
+ 'additionalProperties': {
1801
+ 'type': 'string',
1802
+ },
1803
+ },
1804
+ },
1805
+ },
1806
+ },
1807
+ }
1808
+
1026
1809
  for cloud, config in cloud_configs.items():
1027
1810
  if cloud == 'aws':
1028
- config['properties'].update({
1029
- 'remote_identity': _PRORPERTY_NAME_OR_CLUSTER_NAME_TO_PROPERTY
1030
- })
1811
+ config['properties'].update(
1812
+ {'remote_identity': _PROPERTY_NAME_OR_CLUSTER_NAME_TO_PROPERTY})
1031
1813
  elif cloud == 'kubernetes':
1032
1814
  config['properties'].update(_REMOTE_IDENTITY_SCHEMA_KUBERNETES)
1033
1815
  else:
@@ -1038,13 +1820,26 @@ def get_config_schema():
1038
1820
  'required': [],
1039
1821
  'additionalProperties': False,
1040
1822
  'properties': {
1041
- 'jobs': controller_resources_schema,
1042
- 'serve': controller_resources_schema,
1823
+ # TODO Replace this with whatever syang cooks up
1824
+ 'workspace': {
1825
+ 'type': 'string',
1826
+ },
1827
+ 'db': {
1828
+ 'type': 'string',
1829
+ },
1830
+ 'jobs': _get_controller_schema(),
1831
+ 'serve': _get_controller_schema(),
1043
1832
  'allowed_clouds': allowed_clouds,
1044
1833
  'admin_policy': admin_policy_schema,
1045
1834
  'docker': docker_configs,
1046
1835
  'nvidia_gpus': gpu_configs,
1047
1836
  'api_server': api_server,
1837
+ 'active_workspace': workspace_schema,
1838
+ 'workspaces': workspaces_schema,
1839
+ 'provision': provision_configs,
1840
+ 'rbac': rbac_schema,
1841
+ 'logs': logs_schema,
1842
+ 'daemons': daemon_schema,
1048
1843
  **cloud_configs,
1049
1844
  },
1050
1845
  }