skypilot-nightly 1.0.0.dev20250502__py3-none-any.whl → 1.0.0.dev20251203__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (546) hide show
  1. sky/__init__.py +22 -6
  2. sky/adaptors/aws.py +81 -16
  3. sky/adaptors/common.py +25 -2
  4. sky/adaptors/coreweave.py +278 -0
  5. sky/adaptors/do.py +8 -2
  6. sky/adaptors/gcp.py +11 -0
  7. sky/adaptors/hyperbolic.py +8 -0
  8. sky/adaptors/ibm.py +5 -2
  9. sky/adaptors/kubernetes.py +149 -18
  10. sky/adaptors/nebius.py +173 -30
  11. sky/adaptors/primeintellect.py +1 -0
  12. sky/adaptors/runpod.py +68 -0
  13. sky/adaptors/seeweb.py +183 -0
  14. sky/adaptors/shadeform.py +89 -0
  15. sky/admin_policy.py +187 -4
  16. sky/authentication.py +179 -225
  17. sky/backends/__init__.py +4 -2
  18. sky/backends/backend.py +22 -9
  19. sky/backends/backend_utils.py +1323 -397
  20. sky/backends/cloud_vm_ray_backend.py +1749 -1029
  21. sky/backends/docker_utils.py +1 -1
  22. sky/backends/local_docker_backend.py +11 -6
  23. sky/backends/task_codegen.py +633 -0
  24. sky/backends/wheel_utils.py +55 -9
  25. sky/{clouds/service_catalog → catalog}/__init__.py +21 -19
  26. sky/{clouds/service_catalog → catalog}/aws_catalog.py +27 -8
  27. sky/{clouds/service_catalog → catalog}/azure_catalog.py +10 -7
  28. sky/{clouds/service_catalog → catalog}/common.py +90 -49
  29. sky/{clouds/service_catalog → catalog}/cudo_catalog.py +8 -5
  30. sky/{clouds/service_catalog → catalog}/data_fetchers/analyze.py +1 -1
  31. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_aws.py +116 -80
  32. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_cudo.py +38 -38
  33. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_gcp.py +70 -16
  34. sky/catalog/data_fetchers/fetch_hyperbolic.py +136 -0
  35. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_lambda_cloud.py +1 -0
  36. sky/catalog/data_fetchers/fetch_nebius.py +338 -0
  37. sky/catalog/data_fetchers/fetch_runpod.py +698 -0
  38. sky/catalog/data_fetchers/fetch_seeweb.py +329 -0
  39. sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
  40. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vast.py +1 -1
  41. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vsphere.py +1 -1
  42. sky/{clouds/service_catalog → catalog}/do_catalog.py +5 -2
  43. sky/{clouds/service_catalog → catalog}/fluidstack_catalog.py +6 -3
  44. sky/{clouds/service_catalog → catalog}/gcp_catalog.py +41 -15
  45. sky/catalog/hyperbolic_catalog.py +136 -0
  46. sky/{clouds/service_catalog → catalog}/ibm_catalog.py +9 -6
  47. sky/{clouds/service_catalog → catalog}/kubernetes_catalog.py +36 -24
  48. sky/{clouds/service_catalog → catalog}/lambda_catalog.py +9 -6
  49. sky/{clouds/service_catalog → catalog}/nebius_catalog.py +9 -7
  50. sky/{clouds/service_catalog → catalog}/oci_catalog.py +9 -6
  51. sky/{clouds/service_catalog → catalog}/paperspace_catalog.py +5 -2
  52. sky/catalog/primeintellect_catalog.py +95 -0
  53. sky/{clouds/service_catalog → catalog}/runpod_catalog.py +11 -4
  54. sky/{clouds/service_catalog → catalog}/scp_catalog.py +9 -6
  55. sky/catalog/seeweb_catalog.py +184 -0
  56. sky/catalog/shadeform_catalog.py +165 -0
  57. sky/catalog/ssh_catalog.py +167 -0
  58. sky/{clouds/service_catalog → catalog}/vast_catalog.py +6 -3
  59. sky/{clouds/service_catalog → catalog}/vsphere_catalog.py +5 -2
  60. sky/check.py +533 -185
  61. sky/cli.py +5 -5975
  62. sky/client/{cli.py → cli/command.py} +2591 -1956
  63. sky/client/cli/deprecation_utils.py +99 -0
  64. sky/client/cli/flags.py +359 -0
  65. sky/client/cli/table_utils.py +322 -0
  66. sky/client/cli/utils.py +79 -0
  67. sky/client/common.py +78 -32
  68. sky/client/oauth.py +82 -0
  69. sky/client/sdk.py +1219 -319
  70. sky/client/sdk_async.py +827 -0
  71. sky/client/service_account_auth.py +47 -0
  72. sky/cloud_stores.py +82 -3
  73. sky/clouds/__init__.py +13 -0
  74. sky/clouds/aws.py +564 -164
  75. sky/clouds/azure.py +105 -83
  76. sky/clouds/cloud.py +140 -40
  77. sky/clouds/cudo.py +68 -50
  78. sky/clouds/do.py +66 -48
  79. sky/clouds/fluidstack.py +63 -44
  80. sky/clouds/gcp.py +339 -110
  81. sky/clouds/hyperbolic.py +293 -0
  82. sky/clouds/ibm.py +70 -49
  83. sky/clouds/kubernetes.py +570 -162
  84. sky/clouds/lambda_cloud.py +74 -54
  85. sky/clouds/nebius.py +210 -81
  86. sky/clouds/oci.py +88 -66
  87. sky/clouds/paperspace.py +61 -44
  88. sky/clouds/primeintellect.py +317 -0
  89. sky/clouds/runpod.py +164 -74
  90. sky/clouds/scp.py +89 -86
  91. sky/clouds/seeweb.py +477 -0
  92. sky/clouds/shadeform.py +400 -0
  93. sky/clouds/ssh.py +263 -0
  94. sky/clouds/utils/aws_utils.py +10 -4
  95. sky/clouds/utils/gcp_utils.py +87 -11
  96. sky/clouds/utils/oci_utils.py +38 -14
  97. sky/clouds/utils/scp_utils.py +231 -167
  98. sky/clouds/vast.py +99 -77
  99. sky/clouds/vsphere.py +51 -40
  100. sky/core.py +375 -173
  101. sky/dag.py +15 -0
  102. sky/dashboard/out/404.html +1 -1
  103. sky/dashboard/out/_next/static/96_E2yl3QAiIJGOYCkSpB/_buildManifest.js +1 -0
  104. sky/dashboard/out/_next/static/chunks/1141-e6aa9ab418717c59.js +11 -0
  105. sky/dashboard/out/_next/static/chunks/1272-1ef0bf0237faccdb.js +1 -0
  106. sky/dashboard/out/_next/static/chunks/1871-7e202677c42f43fe.js +6 -0
  107. sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +1 -0
  108. sky/dashboard/out/_next/static/chunks/2350.fab69e61bac57b23.js +1 -0
  109. sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +15 -0
  110. sky/dashboard/out/_next/static/chunks/2755.edd818326d489a1d.js +26 -0
  111. sky/dashboard/out/_next/static/chunks/3294.20a8540fe697d5ee.js +1 -0
  112. sky/dashboard/out/_next/static/chunks/3785.7e245f318f9d1121.js +1 -0
  113. sky/dashboard/out/_next/static/chunks/3800-7b45f9fbb6308557.js +1 -0
  114. sky/dashboard/out/_next/static/chunks/3850-ff4a9a69d978632b.js +1 -0
  115. sky/dashboard/out/_next/static/chunks/3937.210053269f121201.js +1 -0
  116. sky/dashboard/out/_next/static/chunks/4725.172ede95d1b21022.js +1 -0
  117. sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +15 -0
  118. sky/dashboard/out/_next/static/chunks/5739-d67458fcb1386c92.js +8 -0
  119. sky/dashboard/out/_next/static/chunks/6130-2be46d70a38f1e82.js +1 -0
  120. sky/dashboard/out/_next/static/chunks/616-3d59f75e2ccf9321.js +39 -0
  121. sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +13 -0
  122. sky/dashboard/out/_next/static/chunks/6856-8f27d1c10c98def8.js +1 -0
  123. sky/dashboard/out/_next/static/chunks/6989-01359c57e018caa4.js +1 -0
  124. sky/dashboard/out/_next/static/chunks/6990-9146207c4567fdfd.js +1 -0
  125. sky/dashboard/out/_next/static/chunks/7359-c8d04e06886000b3.js +30 -0
  126. sky/dashboard/out/_next/static/chunks/7411-b15471acd2cba716.js +41 -0
  127. sky/dashboard/out/_next/static/chunks/7615-019513abc55b3b47.js +1 -0
  128. sky/dashboard/out/_next/static/chunks/8640.5b9475a2d18c5416.js +16 -0
  129. sky/dashboard/out/_next/static/chunks/8969-452f9d5cbdd2dc73.js +1 -0
  130. sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +6 -0
  131. sky/dashboard/out/_next/static/chunks/9353-cff34f7e773b2e2b.js +1 -0
  132. sky/dashboard/out/_next/static/chunks/9360.a536cf6b1fa42355.js +31 -0
  133. sky/dashboard/out/_next/static/chunks/9847.3aaca6bb33455140.js +30 -0
  134. sky/dashboard/out/_next/static/chunks/fd9d1056-86323a29a8f7e46a.js +1 -0
  135. sky/dashboard/out/_next/static/chunks/framework-cf60a09ccd051a10.js +33 -0
  136. sky/dashboard/out/_next/static/chunks/main-app-587214043926b3cc.js +1 -0
  137. sky/dashboard/out/_next/static/chunks/main-f15ccb73239a3bf1.js +1 -0
  138. sky/dashboard/out/_next/static/chunks/pages/_app-bde01e4a2beec258.js +34 -0
  139. sky/dashboard/out/_next/static/chunks/pages/_error-c66a4e8afc46f17b.js +1 -0
  140. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-792db96d918c98c9.js +16 -0
  141. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-abfcac9c137aa543.js +1 -0
  142. sky/dashboard/out/_next/static/chunks/pages/clusters-ee39056f9851a3ff.js +1 -0
  143. sky/dashboard/out/_next/static/chunks/pages/config-dfb9bf07b13045f4.js +1 -0
  144. sky/dashboard/out/_next/static/chunks/pages/index-444f1804401f04ea.js +1 -0
  145. sky/dashboard/out/_next/static/chunks/pages/infra/[context]-c0b5935149902e6f.js +1 -0
  146. sky/dashboard/out/_next/static/chunks/pages/infra-aed0ea19df7cf961.js +1 -0
  147. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-d66997e2bfc837cf.js +16 -0
  148. sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-9faf940b253e3e06.js +21 -0
  149. sky/dashboard/out/_next/static/chunks/pages/jobs-2072b48b617989c9.js +1 -0
  150. sky/dashboard/out/_next/static/chunks/pages/users-f42674164aa73423.js +1 -0
  151. sky/dashboard/out/_next/static/chunks/pages/volumes-b84b948ff357c43e.js +1 -0
  152. sky/dashboard/out/_next/static/chunks/pages/workspace/new-3f88a1c7e86a3f86.js +1 -0
  153. sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-84a40f8c7c627fe4.js +1 -0
  154. sky/dashboard/out/_next/static/chunks/pages/workspaces-531b2f8c4bf89f82.js +1 -0
  155. sky/dashboard/out/_next/static/chunks/webpack-64e05f17bf2cf8ce.js +1 -0
  156. sky/dashboard/out/_next/static/css/0748ce22df867032.css +3 -0
  157. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  158. sky/dashboard/out/clusters/[cluster].html +1 -1
  159. sky/dashboard/out/clusters.html +1 -1
  160. sky/dashboard/out/config.html +1 -0
  161. sky/dashboard/out/index.html +1 -1
  162. sky/dashboard/out/infra/[context].html +1 -0
  163. sky/dashboard/out/infra.html +1 -0
  164. sky/dashboard/out/jobs/[job].html +1 -1
  165. sky/dashboard/out/jobs/pools/[pool].html +1 -0
  166. sky/dashboard/out/jobs.html +1 -1
  167. sky/dashboard/out/users.html +1 -0
  168. sky/dashboard/out/volumes.html +1 -0
  169. sky/dashboard/out/workspace/new.html +1 -0
  170. sky/dashboard/out/workspaces/[name].html +1 -0
  171. sky/dashboard/out/workspaces.html +1 -0
  172. sky/data/data_utils.py +137 -1
  173. sky/data/mounting_utils.py +269 -84
  174. sky/data/storage.py +1460 -1807
  175. sky/data/storage_utils.py +43 -57
  176. sky/exceptions.py +126 -2
  177. sky/execution.py +216 -63
  178. sky/global_user_state.py +2390 -586
  179. sky/jobs/__init__.py +7 -0
  180. sky/jobs/client/sdk.py +300 -58
  181. sky/jobs/client/sdk_async.py +161 -0
  182. sky/jobs/constants.py +15 -8
  183. sky/jobs/controller.py +848 -275
  184. sky/jobs/file_content_utils.py +128 -0
  185. sky/jobs/log_gc.py +193 -0
  186. sky/jobs/recovery_strategy.py +402 -152
  187. sky/jobs/scheduler.py +314 -189
  188. sky/jobs/server/core.py +836 -255
  189. sky/jobs/server/server.py +156 -115
  190. sky/jobs/server/utils.py +136 -0
  191. sky/jobs/state.py +2109 -706
  192. sky/jobs/utils.py +1306 -215
  193. sky/logs/__init__.py +21 -0
  194. sky/logs/agent.py +108 -0
  195. sky/logs/aws.py +243 -0
  196. sky/logs/gcp.py +91 -0
  197. sky/metrics/__init__.py +0 -0
  198. sky/metrics/utils.py +453 -0
  199. sky/models.py +78 -1
  200. sky/optimizer.py +164 -70
  201. sky/provision/__init__.py +90 -4
  202. sky/provision/aws/config.py +147 -26
  203. sky/provision/aws/instance.py +136 -50
  204. sky/provision/azure/instance.py +11 -6
  205. sky/provision/common.py +13 -1
  206. sky/provision/cudo/cudo_machine_type.py +1 -1
  207. sky/provision/cudo/cudo_utils.py +14 -8
  208. sky/provision/cudo/cudo_wrapper.py +72 -71
  209. sky/provision/cudo/instance.py +10 -6
  210. sky/provision/do/instance.py +10 -6
  211. sky/provision/do/utils.py +4 -3
  212. sky/provision/docker_utils.py +140 -33
  213. sky/provision/fluidstack/instance.py +13 -8
  214. sky/provision/gcp/__init__.py +1 -0
  215. sky/provision/gcp/config.py +301 -19
  216. sky/provision/gcp/constants.py +218 -0
  217. sky/provision/gcp/instance.py +36 -8
  218. sky/provision/gcp/instance_utils.py +18 -4
  219. sky/provision/gcp/volume_utils.py +247 -0
  220. sky/provision/hyperbolic/__init__.py +12 -0
  221. sky/provision/hyperbolic/config.py +10 -0
  222. sky/provision/hyperbolic/instance.py +437 -0
  223. sky/provision/hyperbolic/utils.py +373 -0
  224. sky/provision/instance_setup.py +101 -20
  225. sky/provision/kubernetes/__init__.py +5 -0
  226. sky/provision/kubernetes/config.py +9 -52
  227. sky/provision/kubernetes/constants.py +17 -0
  228. sky/provision/kubernetes/instance.py +919 -280
  229. sky/provision/kubernetes/manifests/fusermount-server-daemonset.yaml +1 -2
  230. sky/provision/kubernetes/network.py +27 -17
  231. sky/provision/kubernetes/network_utils.py +44 -43
  232. sky/provision/kubernetes/utils.py +1221 -534
  233. sky/provision/kubernetes/volume.py +343 -0
  234. sky/provision/lambda_cloud/instance.py +22 -16
  235. sky/provision/nebius/constants.py +50 -0
  236. sky/provision/nebius/instance.py +19 -6
  237. sky/provision/nebius/utils.py +237 -137
  238. sky/provision/oci/instance.py +10 -5
  239. sky/provision/paperspace/instance.py +10 -7
  240. sky/provision/paperspace/utils.py +1 -1
  241. sky/provision/primeintellect/__init__.py +10 -0
  242. sky/provision/primeintellect/config.py +11 -0
  243. sky/provision/primeintellect/instance.py +454 -0
  244. sky/provision/primeintellect/utils.py +398 -0
  245. sky/provision/provisioner.py +117 -36
  246. sky/provision/runpod/__init__.py +5 -0
  247. sky/provision/runpod/instance.py +27 -6
  248. sky/provision/runpod/utils.py +51 -18
  249. sky/provision/runpod/volume.py +214 -0
  250. sky/provision/scp/__init__.py +15 -0
  251. sky/provision/scp/config.py +93 -0
  252. sky/provision/scp/instance.py +707 -0
  253. sky/provision/seeweb/__init__.py +11 -0
  254. sky/provision/seeweb/config.py +13 -0
  255. sky/provision/seeweb/instance.py +812 -0
  256. sky/provision/shadeform/__init__.py +11 -0
  257. sky/provision/shadeform/config.py +12 -0
  258. sky/provision/shadeform/instance.py +351 -0
  259. sky/provision/shadeform/shadeform_utils.py +83 -0
  260. sky/provision/ssh/__init__.py +18 -0
  261. sky/provision/vast/instance.py +13 -8
  262. sky/provision/vast/utils.py +10 -7
  263. sky/provision/volume.py +164 -0
  264. sky/provision/vsphere/common/ssl_helper.py +1 -1
  265. sky/provision/vsphere/common/vapiconnect.py +2 -1
  266. sky/provision/vsphere/common/vim_utils.py +4 -4
  267. sky/provision/vsphere/instance.py +15 -10
  268. sky/provision/vsphere/vsphere_utils.py +17 -20
  269. sky/py.typed +0 -0
  270. sky/resources.py +845 -119
  271. sky/schemas/__init__.py +0 -0
  272. sky/schemas/api/__init__.py +0 -0
  273. sky/schemas/api/responses.py +227 -0
  274. sky/schemas/db/README +4 -0
  275. sky/schemas/db/env.py +90 -0
  276. sky/schemas/db/global_user_state/001_initial_schema.py +124 -0
  277. sky/schemas/db/global_user_state/002_add_workspace_to_cluster_history.py +35 -0
  278. sky/schemas/db/global_user_state/003_fix_initial_revision.py +61 -0
  279. sky/schemas/db/global_user_state/004_is_managed.py +34 -0
  280. sky/schemas/db/global_user_state/005_cluster_event.py +32 -0
  281. sky/schemas/db/global_user_state/006_provision_log.py +41 -0
  282. sky/schemas/db/global_user_state/007_cluster_event_request_id.py +34 -0
  283. sky/schemas/db/global_user_state/008_skylet_ssh_tunnel_metadata.py +34 -0
  284. sky/schemas/db/global_user_state/009_last_activity_and_launched_at.py +89 -0
  285. sky/schemas/db/global_user_state/010_save_ssh_key.py +66 -0
  286. sky/schemas/db/global_user_state/011_is_ephemeral.py +34 -0
  287. sky/schemas/db/kv_cache/001_initial_schema.py +29 -0
  288. sky/schemas/db/script.py.mako +28 -0
  289. sky/schemas/db/serve_state/001_initial_schema.py +67 -0
  290. sky/schemas/db/serve_state/002_yaml_content.py +34 -0
  291. sky/schemas/db/skypilot_config/001_initial_schema.py +30 -0
  292. sky/schemas/db/spot_jobs/001_initial_schema.py +97 -0
  293. sky/schemas/db/spot_jobs/002_cluster_pool.py +42 -0
  294. sky/schemas/db/spot_jobs/003_pool_hash.py +34 -0
  295. sky/schemas/db/spot_jobs/004_job_file_contents.py +42 -0
  296. sky/schemas/db/spot_jobs/005_logs_gc.py +38 -0
  297. sky/schemas/db/spot_jobs/006_controller_pid_started_at.py +34 -0
  298. sky/schemas/db/spot_jobs/007_config_file_content.py +34 -0
  299. sky/schemas/generated/__init__.py +0 -0
  300. sky/schemas/generated/autostopv1_pb2.py +36 -0
  301. sky/schemas/generated/autostopv1_pb2.pyi +43 -0
  302. sky/schemas/generated/autostopv1_pb2_grpc.py +146 -0
  303. sky/schemas/generated/jobsv1_pb2.py +86 -0
  304. sky/schemas/generated/jobsv1_pb2.pyi +254 -0
  305. sky/schemas/generated/jobsv1_pb2_grpc.py +542 -0
  306. sky/schemas/generated/managed_jobsv1_pb2.py +76 -0
  307. sky/schemas/generated/managed_jobsv1_pb2.pyi +278 -0
  308. sky/schemas/generated/managed_jobsv1_pb2_grpc.py +278 -0
  309. sky/schemas/generated/servev1_pb2.py +58 -0
  310. sky/schemas/generated/servev1_pb2.pyi +115 -0
  311. sky/schemas/generated/servev1_pb2_grpc.py +322 -0
  312. sky/serve/autoscalers.py +357 -5
  313. sky/serve/client/impl.py +310 -0
  314. sky/serve/client/sdk.py +47 -139
  315. sky/serve/client/sdk_async.py +130 -0
  316. sky/serve/constants.py +12 -9
  317. sky/serve/controller.py +68 -17
  318. sky/serve/load_balancer.py +106 -60
  319. sky/serve/load_balancing_policies.py +116 -2
  320. sky/serve/replica_managers.py +434 -249
  321. sky/serve/serve_rpc_utils.py +179 -0
  322. sky/serve/serve_state.py +569 -257
  323. sky/serve/serve_utils.py +775 -265
  324. sky/serve/server/core.py +66 -711
  325. sky/serve/server/impl.py +1093 -0
  326. sky/serve/server/server.py +21 -18
  327. sky/serve/service.py +192 -89
  328. sky/serve/service_spec.py +144 -20
  329. sky/serve/spot_placer.py +3 -0
  330. sky/server/auth/__init__.py +0 -0
  331. sky/server/auth/authn.py +50 -0
  332. sky/server/auth/loopback.py +38 -0
  333. sky/server/auth/oauth2_proxy.py +202 -0
  334. sky/server/common.py +478 -182
  335. sky/server/config.py +85 -23
  336. sky/server/constants.py +44 -6
  337. sky/server/daemons.py +295 -0
  338. sky/server/html/token_page.html +185 -0
  339. sky/server/metrics.py +160 -0
  340. sky/server/middleware_utils.py +166 -0
  341. sky/server/requests/executor.py +558 -138
  342. sky/server/requests/payloads.py +364 -24
  343. sky/server/requests/preconditions.py +21 -17
  344. sky/server/requests/process.py +112 -29
  345. sky/server/requests/request_names.py +121 -0
  346. sky/server/requests/requests.py +822 -226
  347. sky/server/requests/serializers/decoders.py +82 -31
  348. sky/server/requests/serializers/encoders.py +140 -22
  349. sky/server/requests/threads.py +117 -0
  350. sky/server/rest.py +455 -0
  351. sky/server/server.py +1309 -285
  352. sky/server/state.py +20 -0
  353. sky/server/stream_utils.py +327 -61
  354. sky/server/uvicorn.py +217 -3
  355. sky/server/versions.py +270 -0
  356. sky/setup_files/MANIFEST.in +11 -1
  357. sky/setup_files/alembic.ini +160 -0
  358. sky/setup_files/dependencies.py +139 -31
  359. sky/setup_files/setup.py +44 -42
  360. sky/sky_logging.py +114 -7
  361. sky/skylet/attempt_skylet.py +106 -24
  362. sky/skylet/autostop_lib.py +129 -8
  363. sky/skylet/configs.py +29 -20
  364. sky/skylet/constants.py +216 -25
  365. sky/skylet/events.py +101 -21
  366. sky/skylet/job_lib.py +345 -164
  367. sky/skylet/log_lib.py +297 -18
  368. sky/skylet/log_lib.pyi +44 -1
  369. sky/skylet/providers/ibm/node_provider.py +12 -8
  370. sky/skylet/providers/ibm/vpc_provider.py +13 -12
  371. sky/skylet/ray_patches/__init__.py +17 -3
  372. sky/skylet/ray_patches/autoscaler.py.diff +18 -0
  373. sky/skylet/ray_patches/cli.py.diff +19 -0
  374. sky/skylet/ray_patches/command_runner.py.diff +17 -0
  375. sky/skylet/ray_patches/log_monitor.py.diff +20 -0
  376. sky/skylet/ray_patches/resource_demand_scheduler.py.diff +32 -0
  377. sky/skylet/ray_patches/updater.py.diff +18 -0
  378. sky/skylet/ray_patches/worker.py.diff +41 -0
  379. sky/skylet/runtime_utils.py +21 -0
  380. sky/skylet/services.py +568 -0
  381. sky/skylet/skylet.py +72 -4
  382. sky/skylet/subprocess_daemon.py +104 -29
  383. sky/skypilot_config.py +506 -99
  384. sky/ssh_node_pools/__init__.py +1 -0
  385. sky/ssh_node_pools/core.py +135 -0
  386. sky/ssh_node_pools/server.py +233 -0
  387. sky/task.py +685 -163
  388. sky/templates/aws-ray.yml.j2 +11 -3
  389. sky/templates/azure-ray.yml.j2 +2 -1
  390. sky/templates/cudo-ray.yml.j2 +1 -0
  391. sky/templates/do-ray.yml.j2 +2 -1
  392. sky/templates/fluidstack-ray.yml.j2 +1 -0
  393. sky/templates/gcp-ray.yml.j2 +62 -1
  394. sky/templates/hyperbolic-ray.yml.j2 +68 -0
  395. sky/templates/ibm-ray.yml.j2 +2 -1
  396. sky/templates/jobs-controller.yaml.j2 +27 -24
  397. sky/templates/kubernetes-loadbalancer.yml.j2 +2 -0
  398. sky/templates/kubernetes-ray.yml.j2 +611 -50
  399. sky/templates/lambda-ray.yml.j2 +2 -1
  400. sky/templates/nebius-ray.yml.j2 +34 -12
  401. sky/templates/oci-ray.yml.j2 +1 -0
  402. sky/templates/paperspace-ray.yml.j2 +2 -1
  403. sky/templates/primeintellect-ray.yml.j2 +72 -0
  404. sky/templates/runpod-ray.yml.j2 +10 -1
  405. sky/templates/scp-ray.yml.j2 +4 -50
  406. sky/templates/seeweb-ray.yml.j2 +171 -0
  407. sky/templates/shadeform-ray.yml.j2 +73 -0
  408. sky/templates/sky-serve-controller.yaml.j2 +22 -2
  409. sky/templates/vast-ray.yml.j2 +1 -0
  410. sky/templates/vsphere-ray.yml.j2 +1 -0
  411. sky/templates/websocket_proxy.py +212 -37
  412. sky/usage/usage_lib.py +31 -15
  413. sky/users/__init__.py +0 -0
  414. sky/users/model.conf +15 -0
  415. sky/users/permission.py +397 -0
  416. sky/users/rbac.py +121 -0
  417. sky/users/server.py +720 -0
  418. sky/users/token_service.py +218 -0
  419. sky/utils/accelerator_registry.py +35 -5
  420. sky/utils/admin_policy_utils.py +84 -38
  421. sky/utils/annotations.py +38 -5
  422. sky/utils/asyncio_utils.py +78 -0
  423. sky/utils/atomic.py +1 -1
  424. sky/utils/auth_utils.py +153 -0
  425. sky/utils/benchmark_utils.py +60 -0
  426. sky/utils/cli_utils/status_utils.py +159 -86
  427. sky/utils/cluster_utils.py +31 -9
  428. sky/utils/command_runner.py +354 -68
  429. sky/utils/command_runner.pyi +93 -3
  430. sky/utils/common.py +35 -8
  431. sky/utils/common_utils.py +314 -91
  432. sky/utils/config_utils.py +74 -5
  433. sky/utils/context.py +403 -0
  434. sky/utils/context_utils.py +242 -0
  435. sky/utils/controller_utils.py +383 -89
  436. sky/utils/dag_utils.py +31 -12
  437. sky/utils/db/__init__.py +0 -0
  438. sky/utils/db/db_utils.py +485 -0
  439. sky/utils/db/kv_cache.py +149 -0
  440. sky/utils/db/migration_utils.py +137 -0
  441. sky/utils/directory_utils.py +12 -0
  442. sky/utils/env_options.py +13 -0
  443. sky/utils/git.py +567 -0
  444. sky/utils/git_clone.sh +460 -0
  445. sky/utils/infra_utils.py +195 -0
  446. sky/utils/kubernetes/cleanup-tunnel.sh +62 -0
  447. sky/utils/kubernetes/config_map_utils.py +133 -0
  448. sky/utils/kubernetes/create_cluster.sh +15 -29
  449. sky/utils/kubernetes/delete_cluster.sh +10 -7
  450. sky/utils/kubernetes/deploy_ssh_node_pools.py +1177 -0
  451. sky/utils/kubernetes/exec_kubeconfig_converter.py +22 -31
  452. sky/utils/kubernetes/generate_kind_config.py +6 -66
  453. sky/utils/kubernetes/generate_kubeconfig.sh +4 -1
  454. sky/utils/kubernetes/gpu_labeler.py +18 -8
  455. sky/utils/kubernetes/k8s_gpu_labeler_job.yaml +2 -1
  456. sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml +16 -16
  457. sky/utils/kubernetes/kubernetes_deploy_utils.py +284 -114
  458. sky/utils/kubernetes/rsync_helper.sh +11 -3
  459. sky/utils/kubernetes/ssh-tunnel.sh +379 -0
  460. sky/utils/kubernetes/ssh_utils.py +221 -0
  461. sky/utils/kubernetes_enums.py +8 -15
  462. sky/utils/lock_events.py +94 -0
  463. sky/utils/locks.py +416 -0
  464. sky/utils/log_utils.py +82 -107
  465. sky/utils/perf_utils.py +22 -0
  466. sky/utils/resource_checker.py +298 -0
  467. sky/utils/resources_utils.py +249 -32
  468. sky/utils/rich_utils.py +217 -39
  469. sky/utils/schemas.py +955 -160
  470. sky/utils/serialize_utils.py +16 -0
  471. sky/utils/status_lib.py +10 -0
  472. sky/utils/subprocess_utils.py +29 -15
  473. sky/utils/tempstore.py +70 -0
  474. sky/utils/thread_utils.py +91 -0
  475. sky/utils/timeline.py +26 -53
  476. sky/utils/ux_utils.py +84 -15
  477. sky/utils/validator.py +11 -1
  478. sky/utils/volume.py +165 -0
  479. sky/utils/yaml_utils.py +111 -0
  480. sky/volumes/__init__.py +13 -0
  481. sky/volumes/client/__init__.py +0 -0
  482. sky/volumes/client/sdk.py +150 -0
  483. sky/volumes/server/__init__.py +0 -0
  484. sky/volumes/server/core.py +270 -0
  485. sky/volumes/server/server.py +124 -0
  486. sky/volumes/volume.py +215 -0
  487. sky/workspaces/__init__.py +0 -0
  488. sky/workspaces/core.py +655 -0
  489. sky/workspaces/server.py +101 -0
  490. sky/workspaces/utils.py +56 -0
  491. sky_templates/README.md +3 -0
  492. sky_templates/__init__.py +3 -0
  493. sky_templates/ray/__init__.py +0 -0
  494. sky_templates/ray/start_cluster +183 -0
  495. sky_templates/ray/stop_cluster +75 -0
  496. skypilot_nightly-1.0.0.dev20251203.dist-info/METADATA +676 -0
  497. skypilot_nightly-1.0.0.dev20251203.dist-info/RECORD +611 -0
  498. {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/WHEEL +1 -1
  499. skypilot_nightly-1.0.0.dev20251203.dist-info/top_level.txt +2 -0
  500. sky/benchmark/benchmark_state.py +0 -256
  501. sky/benchmark/benchmark_utils.py +0 -641
  502. sky/clouds/service_catalog/constants.py +0 -7
  503. sky/dashboard/out/_next/static/GWvVBSCS7FmUiVmjaL1a7/_buildManifest.js +0 -1
  504. sky/dashboard/out/_next/static/chunks/236-2db3ee3fba33dd9e.js +0 -6
  505. sky/dashboard/out/_next/static/chunks/312-c3c8845990db8ffc.js +0 -15
  506. sky/dashboard/out/_next/static/chunks/37-0a572fe0dbb89c4d.js +0 -6
  507. sky/dashboard/out/_next/static/chunks/678-206dddca808e6d16.js +0 -59
  508. sky/dashboard/out/_next/static/chunks/845-9e60713e0c441abc.js +0 -1
  509. sky/dashboard/out/_next/static/chunks/979-7bf73a4c7cea0f5c.js +0 -1
  510. sky/dashboard/out/_next/static/chunks/fd9d1056-2821b0f0cabcd8bd.js +0 -1
  511. sky/dashboard/out/_next/static/chunks/framework-87d061ee6ed71b28.js +0 -33
  512. sky/dashboard/out/_next/static/chunks/main-app-241eb28595532291.js +0 -1
  513. sky/dashboard/out/_next/static/chunks/main-e0e2335212e72357.js +0 -1
  514. sky/dashboard/out/_next/static/chunks/pages/_app-e6b013bc3f77ad60.js +0 -1
  515. sky/dashboard/out/_next/static/chunks/pages/_error-1be831200e60c5c0.js +0 -1
  516. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-6ac338bc2239cb45.js +0 -1
  517. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-f383db7389368ea7.js +0 -1
  518. sky/dashboard/out/_next/static/chunks/pages/clusters-a93b93e10b8b074e.js +0 -1
  519. sky/dashboard/out/_next/static/chunks/pages/index-f9f039532ca8cbc4.js +0 -1
  520. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-1c519e1afc523dc9.js +0 -1
  521. sky/dashboard/out/_next/static/chunks/pages/jobs-a75029b67aab6a2e.js +0 -1
  522. sky/dashboard/out/_next/static/chunks/webpack-830f59b8404e96b8.js +0 -1
  523. sky/dashboard/out/_next/static/css/c6933bbb2ce7f4dd.css +0 -3
  524. sky/jobs/dashboard/dashboard.py +0 -223
  525. sky/jobs/dashboard/static/favicon.ico +0 -0
  526. sky/jobs/dashboard/templates/index.html +0 -831
  527. sky/jobs/server/dashboard_utils.py +0 -69
  528. sky/skylet/providers/scp/__init__.py +0 -2
  529. sky/skylet/providers/scp/config.py +0 -149
  530. sky/skylet/providers/scp/node_provider.py +0 -578
  531. sky/templates/kubernetes-ssh-jump.yml.j2 +0 -94
  532. sky/utils/db_utils.py +0 -100
  533. sky/utils/kubernetes/deploy_remote_cluster.sh +0 -308
  534. sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +0 -191
  535. skypilot_nightly-1.0.0.dev20250502.dist-info/METADATA +0 -361
  536. skypilot_nightly-1.0.0.dev20250502.dist-info/RECORD +0 -396
  537. skypilot_nightly-1.0.0.dev20250502.dist-info/top_level.txt +0 -1
  538. /sky/{clouds/service_catalog → catalog}/config.py +0 -0
  539. /sky/{benchmark → catalog/data_fetchers}/__init__.py +0 -0
  540. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_azure.py +0 -0
  541. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_fluidstack.py +0 -0
  542. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_ibm.py +0 -0
  543. /sky/{clouds/service_catalog/data_fetchers → client/cli}/__init__.py +0 -0
  544. /sky/dashboard/out/_next/static/{GWvVBSCS7FmUiVmjaL1a7 → 96_E2yl3QAiIJGOYCkSpB}/_ssgManifest.js +0 -0
  545. {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/entry_points.txt +0 -0
  546. {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/licenses/LICENSE +0 -0
sky/adaptors/seeweb.py ADDED
@@ -0,0 +1,183 @@
1
+ """ Seeweb Adaptor """
2
+ import configparser
3
+ import pathlib
4
+ from typing import Optional
5
+
6
+ import pydantic
7
+ import requests # type: ignore
8
+
9
+ from sky.adaptors import common
10
+ from sky.utils import annotations
11
+
12
+
13
+ class SeewebError(Exception):
14
+ """Base exception for Seeweb adaptor errors."""
15
+
16
+
17
+ class SeewebCredentialsFileNotFound(SeewebError):
18
+ """Raised when the Seeweb credentials file is missing."""
19
+
20
+
21
+ class SeewebApiKeyMissing(SeewebError):
22
+ """Raised when the Seeweb API key is missing or empty."""
23
+
24
+
25
+ class SeewebAuthenticationError(SeewebError):
26
+ """Raised when authenticating with Seeweb API fails."""
27
+
28
+
29
+ _IMPORT_ERROR_MESSAGE = ('Failed to import dependencies for Seeweb.'
30
+ 'Try pip install "skypilot[seeweb]"')
31
+
32
+ ecsapi = common.LazyImport(
33
+ 'ecsapi',
34
+ import_error_message=_IMPORT_ERROR_MESSAGE,
35
+ )
36
+ boto3 = common.LazyImport('boto3', import_error_message=_IMPORT_ERROR_MESSAGE)
37
+ botocore = common.LazyImport('botocore',
38
+ import_error_message=_IMPORT_ERROR_MESSAGE)
39
+
40
+ _LAZY_MODULES = (ecsapi, boto3, botocore)
41
+
42
+
43
+ @common.load_lazy_modules(_LAZY_MODULES)
44
+ def check_compute_credentials() -> bool:
45
+ """Checks if the user has access credentials to Seeweb's compute service.
46
+
47
+ Returns True if credentials are valid; otherwise raises a SeewebError.
48
+ """
49
+ # Read API key from standard Seeweb configuration file
50
+ key_path = pathlib.Path('~/.seeweb_cloud/seeweb_keys').expanduser()
51
+ if not key_path.exists():
52
+ raise SeewebCredentialsFileNotFound(
53
+ 'Missing Seeweb API key file ~/.seeweb_cloud/seeweb_keys')
54
+
55
+ parser = configparser.ConfigParser()
56
+ parser.read(key_path)
57
+ try:
58
+ api_key = parser['DEFAULT']['api_key'].strip()
59
+ except KeyError as e:
60
+ raise SeewebApiKeyMissing(
61
+ 'Missing api_key in ~/.seeweb_cloud/seeweb_keys') from e
62
+ if not api_key:
63
+ raise SeewebApiKeyMissing(
64
+ 'Empty api_key in ~/.seeweb_cloud/seeweb_keys')
65
+
66
+ # Test connection by fetching servers list to validate the key
67
+ try:
68
+ seeweb_client = ecsapi.Api(token=api_key)
69
+ try:
70
+ seeweb_client.fetch_servers()
71
+ except pydantic.ValidationError:
72
+ # Fallback: fetch raw JSON to validate authentication
73
+ # pylint: disable=protected-access
74
+ base_url = seeweb_client._Api__generate_base_url() # type: ignore
75
+ headers = seeweb_client._Api__generate_authentication_headers(
76
+ ) # type: ignore
77
+ url = f'{base_url}/servers'
78
+ resp = requests.get(url, headers=headers, timeout=15)
79
+ resp.raise_for_status()
80
+ # If we get here, authentication worked even if schema mismatches
81
+ except Exception as e: # pylint: disable=broad-except
82
+ raise SeewebAuthenticationError(
83
+ f'Unable to authenticate with Seeweb API: {e}') from e
84
+
85
+ return True
86
+
87
+
88
+ @common.load_lazy_modules(_LAZY_MODULES)
89
+ def check_storage_credentials() -> bool:
90
+ """Checks if the user has access credentials to Seeweb's storage service.
91
+
92
+ Mirrors compute credentials validation.
93
+ """
94
+ return check_compute_credentials()
95
+
96
+
97
+ @common.load_lazy_modules(_LAZY_MODULES)
98
+ @annotations.lru_cache(scope='global', maxsize=1)
99
+ def client():
100
+ """Returns an authenticated ecsapi.Api object."""
101
+ # Create authenticated client using the same credential pattern
102
+ key_path = pathlib.Path('~/.seeweb_cloud/seeweb_keys').expanduser()
103
+ if not key_path.exists():
104
+ raise SeewebCredentialsFileNotFound(
105
+ 'Missing Seeweb API key file ~/.seeweb_cloud/seeweb_keys')
106
+
107
+ parser = configparser.ConfigParser()
108
+ parser.read(key_path)
109
+ try:
110
+ api_key = parser['DEFAULT']['api_key'].strip()
111
+ except KeyError as e:
112
+ raise SeewebApiKeyMissing(
113
+ 'Missing api_key in ~/.seeweb_cloud/seeweb_keys') from e
114
+ if not api_key:
115
+ raise SeewebApiKeyMissing(
116
+ 'Empty api_key in ~/.seeweb_cloud/seeweb_keys')
117
+
118
+ api = ecsapi.Api(token=api_key)
119
+
120
+ # Monkey-patch fetch_servers to be tolerant to API schema mismatches.
121
+ orig_fetch_servers = api.fetch_servers
122
+ orig_delete_server = api.delete_server
123
+
124
+ def _tolerant_fetch_servers(
125
+ timeout: Optional[int] = None): # type: ignore[override]
126
+ try:
127
+ return orig_fetch_servers(timeout=timeout)
128
+ except pydantic.ValidationError:
129
+ # Fallback path: fetch raw JSON, drop snapshot fields, then validate
130
+ # pylint: disable=protected-access
131
+ base_url = api._Api__generate_base_url() # type: ignore
132
+ headers = api._Api__generate_authentication_headers(
133
+ ) # type: ignore
134
+ url = f'{base_url}/servers'
135
+ resp = requests.get(url, headers=headers, timeout=timeout or 15)
136
+ resp.raise_for_status()
137
+ data = resp.json()
138
+ try:
139
+ servers = data.get('server', [])
140
+ for s in servers:
141
+ s.pop('last_restored_snapshot', None)
142
+ group = s.get('group')
143
+ if isinstance(group, dict):
144
+ group_name = group.get('name')
145
+ s['group'] = group_name if isinstance(
146
+ group_name, str) else str(group_name)
147
+ except (KeyError, TypeError, ValueError):
148
+ pass
149
+ server_list_response_cls = ecsapi._server._ServerListResponse
150
+ servers_response = server_list_response_cls.model_validate(data)
151
+ return servers_response.server
152
+
153
+ api.fetch_servers = _tolerant_fetch_servers # type: ignore[assignment]
154
+
155
+ def _tolerant_delete_server(server_name: str,
156
+ timeout: Optional[int] = None):
157
+ try:
158
+ return orig_delete_server(server_name, timeout=timeout)
159
+ except pydantic.ValidationError:
160
+ # Fallback: perform raw DELETE and interpret not_found as success
161
+ # pylint: disable=protected-access
162
+ base_url = api._Api__generate_base_url() # type: ignore
163
+ headers = api._Api__generate_authentication_headers(
164
+ ) # type: ignore
165
+ url = f'{base_url}/servers/{server_name}'
166
+ resp = requests.delete(url, headers=headers, timeout=timeout or 15)
167
+ # Treat 404 as idempotent success
168
+ if resp.status_code == 404:
169
+ return None
170
+ # Some APIs return {status: 'not_found', message: ...}
171
+ try:
172
+ data = resp.json()
173
+ if isinstance(data, dict) and data.get('status') == 'not_found':
174
+ return None
175
+ except (ValueError, TypeError):
176
+ pass
177
+ # If not clearly not_found, re-raise original behavior
178
+ resp.raise_for_status()
179
+ # Best-effort: return None to indicate deletion requested
180
+ return None
181
+
182
+ api.delete_server = _tolerant_delete_server # type: ignore[assignment]
183
+ return api
@@ -0,0 +1,89 @@
1
+ """Shadeform cloud adaptor."""
2
+
3
+ import functools
4
+ import socket
5
+ from typing import Any, Dict, List, Optional
6
+
7
+ import requests
8
+
9
+ from sky import sky_logging
10
+ from sky.provision.shadeform import shadeform_utils
11
+ from sky.utils import common_utils
12
+
13
+ logger = sky_logging.init_logger(__name__)
14
+
15
+ _shadeform_sdk = None
16
+
17
+
18
+ def import_package(func):
19
+
20
+ @functools.wraps(func)
21
+ def wrapper(*args, **kwargs):
22
+ global _shadeform_sdk
23
+ if _shadeform_sdk is None:
24
+ try:
25
+ import shadeform as _shadeform # pylint: disable=import-outside-toplevel
26
+ _shadeform_sdk = _shadeform
27
+ except ImportError:
28
+ raise ImportError(
29
+ 'Failed to import dependencies for Shadeform. '
30
+ 'Try pip install "skypilot[shadeform]"') from None
31
+ return func(*args, **kwargs)
32
+
33
+ return wrapper
34
+
35
+
36
+ @import_package
37
+ def shadeform():
38
+ """Return the shadeform package."""
39
+ return _shadeform_sdk
40
+
41
+
42
+ def list_ssh_keys() -> List[Dict[str, Any]]:
43
+ """List all SSH keys in Shadeform account."""
44
+ try:
45
+ response = shadeform_utils.get_ssh_keys()
46
+ return response.get('ssh_keys', [])
47
+ except (ValueError, KeyError, requests.exceptions.RequestException) as e:
48
+ logger.warning(f'Failed to list SSH keys from Shadeform: {e}')
49
+ return []
50
+
51
+
52
+ def add_ssh_key_to_shadeform(public_key: str) -> Optional[str]:
53
+ """Add SSH key to Shadeform if it doesn't already exist.
54
+
55
+ Args:
56
+ public_key: The SSH public key string.
57
+
58
+ Returns:
59
+ The name of the key if added successfully, None otherwise.
60
+ """
61
+ try:
62
+ # Check if key already exists
63
+ existing_keys = list_ssh_keys()
64
+ key_exists = False
65
+ key_id = None
66
+ for key in existing_keys:
67
+ if key.get('public_key') == public_key:
68
+ key_exists = True
69
+ key_id = key.get('id')
70
+ break
71
+
72
+ if key_exists:
73
+ logger.info('SSH key already exists in Shadeform account')
74
+ return key_id
75
+
76
+ # Generate a unique key name
77
+ hostname = socket.gethostname()
78
+ key_name = f'skypilot-{hostname}-{common_utils.get_user_hash()[:8]}'
79
+
80
+ # Add the key
81
+ response = shadeform_utils.add_ssh_key(name=key_name,
82
+ public_key=public_key)
83
+ key_id = response['id']
84
+ logger.info(f'Added SSH key to Shadeform: {key_name, key_id}')
85
+ return key_id
86
+
87
+ except (ValueError, KeyError, requests.exceptions.RequestException) as e:
88
+ logger.warning(f'Failed to add SSH key to Shadeform: {e}')
89
+ return None
sky/admin_policy.py CHANGED
@@ -4,12 +4,26 @@ import dataclasses
4
4
  import typing
5
5
  from typing import Optional
6
6
 
7
+ import colorama
8
+ import pydantic
9
+
10
+ import sky
11
+ from sky import exceptions
12
+ from sky import models
13
+ from sky.adaptors import common as adaptors_common
14
+ from sky.server.requests import request_names
15
+ from sky.utils import common_utils
16
+ from sky.utils import config_utils
17
+ from sky.utils import ux_utils
18
+ from sky.utils import yaml_utils
19
+
7
20
  if typing.TYPE_CHECKING:
8
- import sky
21
+ import requests
22
+ else:
23
+ requests = adaptors_common.LazyImport('requests')
9
24
 
10
25
 
11
- @dataclasses.dataclass
12
- class RequestOptions:
26
+ class RequestOptions(pydantic.BaseModel):
13
27
  """Request options for admin policy.
14
28
 
15
29
  Args:
@@ -21,11 +35,28 @@ class RequestOptions:
21
35
  dryrun: Is the request a dryrun?
22
36
  """
23
37
  cluster_name: Optional[str]
38
+ # Keep these two fields for backward compatibility. The values are copied
39
+ # from task.resources.autostop_config, so that legacy admin policy plugins
40
+ # can still read the correct autostop config from request options before
41
+ # we drop the compatibility.
42
+ # TODO(aylei): remove these fields after 0.12.0
24
43
  idle_minutes_to_autostop: Optional[int]
25
44
  down: bool
26
45
  dryrun: bool
27
46
 
28
47
 
48
+ class _UserRequestBody(pydantic.BaseModel):
49
+ """Auxiliary model to validate and serialize a user request."""
50
+ # We have to use serialized YAML string, instead of a dict, because dict
51
+ # will be converted to JSON string, which will lose the None key.
52
+ task: str
53
+ skypilot_config: str
54
+ request_name: str
55
+ request_options: Optional[RequestOptions] = None
56
+ at_client_side: bool = False
57
+ user: str
58
+
59
+
29
60
  @dataclasses.dataclass
30
61
  class UserRequest:
31
62
  """A user request.
@@ -45,20 +76,101 @@ class UserRequest:
45
76
  task: User specified task.
46
77
  skypilot_config: Global skypilot config to be used in this request.
47
78
  request_options: Request options. It is None for jobs and services.
79
+ at_client_side: Is the request intercepted by the policy at client-side?
80
+ user: User who made the request.
81
+ Only available on the server side.
82
+ This value is None if at_client_side is True.
48
83
  """
49
84
  task: 'sky.Task'
50
85
  skypilot_config: 'sky.Config'
86
+ request_name: request_names.AdminPolicyRequestName
51
87
  request_options: Optional['RequestOptions'] = None
88
+ at_client_side: bool = False
89
+ user: Optional['models.User'] = None
90
+
91
+ def encode(self) -> str:
92
+ return _UserRequestBody(
93
+ task=yaml_utils.dump_yaml_str(self.task.to_yaml_config()),
94
+ skypilot_config=yaml_utils.dump_yaml_str(dict(
95
+ self.skypilot_config)),
96
+ request_name=self.request_name.value,
97
+ request_options=self.request_options,
98
+ at_client_side=self.at_client_side,
99
+ user=(yaml_utils.dump_yaml_str(self.user.to_dict())
100
+ if self.user is not None else ''),
101
+ ).model_dump_json()
102
+
103
+ @classmethod
104
+ def decode(cls, body: str) -> 'UserRequest':
105
+ user_request_body = _UserRequestBody.model_validate_json(body)
106
+ user_dict = yaml_utils.read_yaml_str(
107
+ user_request_body.user) if user_request_body.user != '' else None
108
+ user = models.User(
109
+ id=user_dict['id'],
110
+ name=user_dict['name']) if user_dict is not None else None
111
+ return cls(
112
+ task=sky.Task.from_yaml_config(
113
+ yaml_utils.read_yaml_all_str(user_request_body.task)[0]),
114
+ skypilot_config=config_utils.Config.from_dict(
115
+ yaml_utils.read_yaml_all_str(
116
+ user_request_body.skypilot_config)[0]),
117
+ request_name=request_names.AdminPolicyRequestName(
118
+ user_request_body.request_name),
119
+ request_options=user_request_body.request_options,
120
+ at_client_side=user_request_body.at_client_side,
121
+ user=user,
122
+ )
123
+
124
+
125
+ class _MutatedUserRequestBody(pydantic.BaseModel):
126
+ """Auxiliary model to validate and serialize a user request."""
127
+ task: str
128
+ skypilot_config: str
52
129
 
53
130
 
54
131
  @dataclasses.dataclass
55
132
  class MutatedUserRequest:
133
+ """Mutated user request."""
134
+
56
135
  task: 'sky.Task'
57
136
  skypilot_config: 'sky.Config'
58
137
 
138
+ def encode(self) -> str:
139
+ return _MutatedUserRequestBody(
140
+ task=yaml_utils.dump_yaml_str(self.task.to_yaml_config()),
141
+ skypilot_config=yaml_utils.dump_yaml_str(dict(
142
+ self.skypilot_config),)).model_dump_json()
143
+
144
+ @classmethod
145
+ def decode(cls, mutated_user_request_body: str,
146
+ original_request: UserRequest) -> 'MutatedUserRequest':
147
+ mutated_user_request_body = _MutatedUserRequestBody.model_validate_json(
148
+ mutated_user_request_body)
149
+ task = sky.Task.from_yaml_config(
150
+ yaml_utils.read_yaml_all_str(mutated_user_request_body.task)[0])
151
+ # Some internal Task fields are not serialized. We need to manually
152
+ # restore them from the original request.
153
+ task.managed_job_dag = original_request.task.managed_job_dag
154
+ task.service_name = original_request.task.service_name
155
+ return cls(task=task,
156
+ skypilot_config=config_utils.Config.from_dict(
157
+ yaml_utils.read_yaml_all_str(
158
+ mutated_user_request_body.skypilot_config)[0],))
159
+
160
+
161
+ class PolicyInterface:
162
+ """Interface for admin-defined policy for user requests."""
163
+
164
+ @abc.abstractmethod
165
+ def apply(self, user_request: UserRequest) -> MutatedUserRequest:
166
+ """Apply the admin policy to the user request."""
167
+
168
+ def __str__(self):
169
+ return f'{self.__class__.__name__}'
170
+
59
171
 
60
172
  # pylint: disable=line-too-long
61
- class AdminPolicy:
173
+ class AdminPolicy(PolicyInterface):
62
174
  """Abstract interface of an admin-defined policy for all user requests.
63
175
 
64
176
  Admins can implement a subclass of AdminPolicy with the following signature:
@@ -99,3 +211,74 @@ class AdminPolicy:
99
211
  """
100
212
  raise NotImplementedError(
101
213
  'Your policy must implement validate_and_mutate')
214
+
215
+ def apply(self, user_request: UserRequest) -> MutatedUserRequest:
216
+ return self.validate_and_mutate(user_request)
217
+
218
+
219
+ class PolicyTemplate(PolicyInterface):
220
+ """Admin policy template that can be instantiated to create a policy."""
221
+
222
+ @abc.abstractmethod
223
+ def validate_and_mutate(self,
224
+ user_request: UserRequest) -> MutatedUserRequest:
225
+ """Validates and mutates the user request and returns mutated request.
226
+
227
+ Args:
228
+ user_request: The user request to validate and mutate.
229
+ UserRequest contains (sky.Task, sky.Config)
230
+
231
+ Returns:
232
+ MutatedUserRequest: The mutated user request.
233
+
234
+ Raises:
235
+ Exception to throw if the user request failed the validation.
236
+ """
237
+ raise NotImplementedError(
238
+ 'Your policy must implement validate_and_mutate')
239
+
240
+ def apply(self, user_request: UserRequest) -> MutatedUserRequest:
241
+ return self.validate_and_mutate(user_request)
242
+
243
+
244
+ class RestfulAdminPolicy(PolicyTemplate):
245
+ """Admin policy that calls a RESTful API for validation."""
246
+
247
+ def __init__(self, policy_url: str):
248
+ super().__init__()
249
+ self.policy_url = policy_url
250
+
251
+ def validate_and_mutate(self,
252
+ user_request: UserRequest) -> MutatedUserRequest:
253
+ try:
254
+ response = requests.post(
255
+ self.policy_url,
256
+ json=user_request.encode(),
257
+ headers={'Content-Type': 'application/json'},
258
+ # TODO(aylei): make this configurable
259
+ timeout=30)
260
+ if response.status_code == 400:
261
+ raise exceptions.UserRequestRejectedByPolicy(
262
+ f'{colorama.Fore.RED}User request is rejected by admin '
263
+ f'policy {self.policy_url}{colorama.Fore.RESET}: '
264
+ f'{response.text}')
265
+ response.raise_for_status()
266
+ except requests.exceptions.RequestException as e:
267
+ with ux_utils.print_exception_no_traceback():
268
+ raise exceptions.RestfulPolicyError(
269
+ f'Failed to call admin policy URL '
270
+ f'{self.policy_url}: {e}') from None
271
+
272
+ try:
273
+ mutated_user_request = MutatedUserRequest.decode(
274
+ response.json(), user_request)
275
+ except Exception as e: # pylint: disable=broad-except
276
+ with ux_utils.print_exception_no_traceback():
277
+ raise exceptions.RestfulPolicyError(
278
+ f'Failed to decode response from admin policy URL '
279
+ f'{self.policy_url}: {common_utils.format_exception(e, use_bracket=True)}'
280
+ ) from None
281
+ return mutated_user_request
282
+
283
+ def __repr__(self):
284
+ return f'RestfulAdminPolicy(policy_url={self.policy_url})'