skypilot-nightly 1.0.0.dev20250502__py3-none-any.whl → 1.0.0.dev20251203__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (546) hide show
  1. sky/__init__.py +22 -6
  2. sky/adaptors/aws.py +81 -16
  3. sky/adaptors/common.py +25 -2
  4. sky/adaptors/coreweave.py +278 -0
  5. sky/adaptors/do.py +8 -2
  6. sky/adaptors/gcp.py +11 -0
  7. sky/adaptors/hyperbolic.py +8 -0
  8. sky/adaptors/ibm.py +5 -2
  9. sky/adaptors/kubernetes.py +149 -18
  10. sky/adaptors/nebius.py +173 -30
  11. sky/adaptors/primeintellect.py +1 -0
  12. sky/adaptors/runpod.py +68 -0
  13. sky/adaptors/seeweb.py +183 -0
  14. sky/adaptors/shadeform.py +89 -0
  15. sky/admin_policy.py +187 -4
  16. sky/authentication.py +179 -225
  17. sky/backends/__init__.py +4 -2
  18. sky/backends/backend.py +22 -9
  19. sky/backends/backend_utils.py +1323 -397
  20. sky/backends/cloud_vm_ray_backend.py +1749 -1029
  21. sky/backends/docker_utils.py +1 -1
  22. sky/backends/local_docker_backend.py +11 -6
  23. sky/backends/task_codegen.py +633 -0
  24. sky/backends/wheel_utils.py +55 -9
  25. sky/{clouds/service_catalog → catalog}/__init__.py +21 -19
  26. sky/{clouds/service_catalog → catalog}/aws_catalog.py +27 -8
  27. sky/{clouds/service_catalog → catalog}/azure_catalog.py +10 -7
  28. sky/{clouds/service_catalog → catalog}/common.py +90 -49
  29. sky/{clouds/service_catalog → catalog}/cudo_catalog.py +8 -5
  30. sky/{clouds/service_catalog → catalog}/data_fetchers/analyze.py +1 -1
  31. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_aws.py +116 -80
  32. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_cudo.py +38 -38
  33. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_gcp.py +70 -16
  34. sky/catalog/data_fetchers/fetch_hyperbolic.py +136 -0
  35. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_lambda_cloud.py +1 -0
  36. sky/catalog/data_fetchers/fetch_nebius.py +338 -0
  37. sky/catalog/data_fetchers/fetch_runpod.py +698 -0
  38. sky/catalog/data_fetchers/fetch_seeweb.py +329 -0
  39. sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
  40. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vast.py +1 -1
  41. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vsphere.py +1 -1
  42. sky/{clouds/service_catalog → catalog}/do_catalog.py +5 -2
  43. sky/{clouds/service_catalog → catalog}/fluidstack_catalog.py +6 -3
  44. sky/{clouds/service_catalog → catalog}/gcp_catalog.py +41 -15
  45. sky/catalog/hyperbolic_catalog.py +136 -0
  46. sky/{clouds/service_catalog → catalog}/ibm_catalog.py +9 -6
  47. sky/{clouds/service_catalog → catalog}/kubernetes_catalog.py +36 -24
  48. sky/{clouds/service_catalog → catalog}/lambda_catalog.py +9 -6
  49. sky/{clouds/service_catalog → catalog}/nebius_catalog.py +9 -7
  50. sky/{clouds/service_catalog → catalog}/oci_catalog.py +9 -6
  51. sky/{clouds/service_catalog → catalog}/paperspace_catalog.py +5 -2
  52. sky/catalog/primeintellect_catalog.py +95 -0
  53. sky/{clouds/service_catalog → catalog}/runpod_catalog.py +11 -4
  54. sky/{clouds/service_catalog → catalog}/scp_catalog.py +9 -6
  55. sky/catalog/seeweb_catalog.py +184 -0
  56. sky/catalog/shadeform_catalog.py +165 -0
  57. sky/catalog/ssh_catalog.py +167 -0
  58. sky/{clouds/service_catalog → catalog}/vast_catalog.py +6 -3
  59. sky/{clouds/service_catalog → catalog}/vsphere_catalog.py +5 -2
  60. sky/check.py +533 -185
  61. sky/cli.py +5 -5975
  62. sky/client/{cli.py → cli/command.py} +2591 -1956
  63. sky/client/cli/deprecation_utils.py +99 -0
  64. sky/client/cli/flags.py +359 -0
  65. sky/client/cli/table_utils.py +322 -0
  66. sky/client/cli/utils.py +79 -0
  67. sky/client/common.py +78 -32
  68. sky/client/oauth.py +82 -0
  69. sky/client/sdk.py +1219 -319
  70. sky/client/sdk_async.py +827 -0
  71. sky/client/service_account_auth.py +47 -0
  72. sky/cloud_stores.py +82 -3
  73. sky/clouds/__init__.py +13 -0
  74. sky/clouds/aws.py +564 -164
  75. sky/clouds/azure.py +105 -83
  76. sky/clouds/cloud.py +140 -40
  77. sky/clouds/cudo.py +68 -50
  78. sky/clouds/do.py +66 -48
  79. sky/clouds/fluidstack.py +63 -44
  80. sky/clouds/gcp.py +339 -110
  81. sky/clouds/hyperbolic.py +293 -0
  82. sky/clouds/ibm.py +70 -49
  83. sky/clouds/kubernetes.py +570 -162
  84. sky/clouds/lambda_cloud.py +74 -54
  85. sky/clouds/nebius.py +210 -81
  86. sky/clouds/oci.py +88 -66
  87. sky/clouds/paperspace.py +61 -44
  88. sky/clouds/primeintellect.py +317 -0
  89. sky/clouds/runpod.py +164 -74
  90. sky/clouds/scp.py +89 -86
  91. sky/clouds/seeweb.py +477 -0
  92. sky/clouds/shadeform.py +400 -0
  93. sky/clouds/ssh.py +263 -0
  94. sky/clouds/utils/aws_utils.py +10 -4
  95. sky/clouds/utils/gcp_utils.py +87 -11
  96. sky/clouds/utils/oci_utils.py +38 -14
  97. sky/clouds/utils/scp_utils.py +231 -167
  98. sky/clouds/vast.py +99 -77
  99. sky/clouds/vsphere.py +51 -40
  100. sky/core.py +375 -173
  101. sky/dag.py +15 -0
  102. sky/dashboard/out/404.html +1 -1
  103. sky/dashboard/out/_next/static/96_E2yl3QAiIJGOYCkSpB/_buildManifest.js +1 -0
  104. sky/dashboard/out/_next/static/chunks/1141-e6aa9ab418717c59.js +11 -0
  105. sky/dashboard/out/_next/static/chunks/1272-1ef0bf0237faccdb.js +1 -0
  106. sky/dashboard/out/_next/static/chunks/1871-7e202677c42f43fe.js +6 -0
  107. sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +1 -0
  108. sky/dashboard/out/_next/static/chunks/2350.fab69e61bac57b23.js +1 -0
  109. sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +15 -0
  110. sky/dashboard/out/_next/static/chunks/2755.edd818326d489a1d.js +26 -0
  111. sky/dashboard/out/_next/static/chunks/3294.20a8540fe697d5ee.js +1 -0
  112. sky/dashboard/out/_next/static/chunks/3785.7e245f318f9d1121.js +1 -0
  113. sky/dashboard/out/_next/static/chunks/3800-7b45f9fbb6308557.js +1 -0
  114. sky/dashboard/out/_next/static/chunks/3850-ff4a9a69d978632b.js +1 -0
  115. sky/dashboard/out/_next/static/chunks/3937.210053269f121201.js +1 -0
  116. sky/dashboard/out/_next/static/chunks/4725.172ede95d1b21022.js +1 -0
  117. sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +15 -0
  118. sky/dashboard/out/_next/static/chunks/5739-d67458fcb1386c92.js +8 -0
  119. sky/dashboard/out/_next/static/chunks/6130-2be46d70a38f1e82.js +1 -0
  120. sky/dashboard/out/_next/static/chunks/616-3d59f75e2ccf9321.js +39 -0
  121. sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +13 -0
  122. sky/dashboard/out/_next/static/chunks/6856-8f27d1c10c98def8.js +1 -0
  123. sky/dashboard/out/_next/static/chunks/6989-01359c57e018caa4.js +1 -0
  124. sky/dashboard/out/_next/static/chunks/6990-9146207c4567fdfd.js +1 -0
  125. sky/dashboard/out/_next/static/chunks/7359-c8d04e06886000b3.js +30 -0
  126. sky/dashboard/out/_next/static/chunks/7411-b15471acd2cba716.js +41 -0
  127. sky/dashboard/out/_next/static/chunks/7615-019513abc55b3b47.js +1 -0
  128. sky/dashboard/out/_next/static/chunks/8640.5b9475a2d18c5416.js +16 -0
  129. sky/dashboard/out/_next/static/chunks/8969-452f9d5cbdd2dc73.js +1 -0
  130. sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +6 -0
  131. sky/dashboard/out/_next/static/chunks/9353-cff34f7e773b2e2b.js +1 -0
  132. sky/dashboard/out/_next/static/chunks/9360.a536cf6b1fa42355.js +31 -0
  133. sky/dashboard/out/_next/static/chunks/9847.3aaca6bb33455140.js +30 -0
  134. sky/dashboard/out/_next/static/chunks/fd9d1056-86323a29a8f7e46a.js +1 -0
  135. sky/dashboard/out/_next/static/chunks/framework-cf60a09ccd051a10.js +33 -0
  136. sky/dashboard/out/_next/static/chunks/main-app-587214043926b3cc.js +1 -0
  137. sky/dashboard/out/_next/static/chunks/main-f15ccb73239a3bf1.js +1 -0
  138. sky/dashboard/out/_next/static/chunks/pages/_app-bde01e4a2beec258.js +34 -0
  139. sky/dashboard/out/_next/static/chunks/pages/_error-c66a4e8afc46f17b.js +1 -0
  140. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-792db96d918c98c9.js +16 -0
  141. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-abfcac9c137aa543.js +1 -0
  142. sky/dashboard/out/_next/static/chunks/pages/clusters-ee39056f9851a3ff.js +1 -0
  143. sky/dashboard/out/_next/static/chunks/pages/config-dfb9bf07b13045f4.js +1 -0
  144. sky/dashboard/out/_next/static/chunks/pages/index-444f1804401f04ea.js +1 -0
  145. sky/dashboard/out/_next/static/chunks/pages/infra/[context]-c0b5935149902e6f.js +1 -0
  146. sky/dashboard/out/_next/static/chunks/pages/infra-aed0ea19df7cf961.js +1 -0
  147. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-d66997e2bfc837cf.js +16 -0
  148. sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-9faf940b253e3e06.js +21 -0
  149. sky/dashboard/out/_next/static/chunks/pages/jobs-2072b48b617989c9.js +1 -0
  150. sky/dashboard/out/_next/static/chunks/pages/users-f42674164aa73423.js +1 -0
  151. sky/dashboard/out/_next/static/chunks/pages/volumes-b84b948ff357c43e.js +1 -0
  152. sky/dashboard/out/_next/static/chunks/pages/workspace/new-3f88a1c7e86a3f86.js +1 -0
  153. sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-84a40f8c7c627fe4.js +1 -0
  154. sky/dashboard/out/_next/static/chunks/pages/workspaces-531b2f8c4bf89f82.js +1 -0
  155. sky/dashboard/out/_next/static/chunks/webpack-64e05f17bf2cf8ce.js +1 -0
  156. sky/dashboard/out/_next/static/css/0748ce22df867032.css +3 -0
  157. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  158. sky/dashboard/out/clusters/[cluster].html +1 -1
  159. sky/dashboard/out/clusters.html +1 -1
  160. sky/dashboard/out/config.html +1 -0
  161. sky/dashboard/out/index.html +1 -1
  162. sky/dashboard/out/infra/[context].html +1 -0
  163. sky/dashboard/out/infra.html +1 -0
  164. sky/dashboard/out/jobs/[job].html +1 -1
  165. sky/dashboard/out/jobs/pools/[pool].html +1 -0
  166. sky/dashboard/out/jobs.html +1 -1
  167. sky/dashboard/out/users.html +1 -0
  168. sky/dashboard/out/volumes.html +1 -0
  169. sky/dashboard/out/workspace/new.html +1 -0
  170. sky/dashboard/out/workspaces/[name].html +1 -0
  171. sky/dashboard/out/workspaces.html +1 -0
  172. sky/data/data_utils.py +137 -1
  173. sky/data/mounting_utils.py +269 -84
  174. sky/data/storage.py +1460 -1807
  175. sky/data/storage_utils.py +43 -57
  176. sky/exceptions.py +126 -2
  177. sky/execution.py +216 -63
  178. sky/global_user_state.py +2390 -586
  179. sky/jobs/__init__.py +7 -0
  180. sky/jobs/client/sdk.py +300 -58
  181. sky/jobs/client/sdk_async.py +161 -0
  182. sky/jobs/constants.py +15 -8
  183. sky/jobs/controller.py +848 -275
  184. sky/jobs/file_content_utils.py +128 -0
  185. sky/jobs/log_gc.py +193 -0
  186. sky/jobs/recovery_strategy.py +402 -152
  187. sky/jobs/scheduler.py +314 -189
  188. sky/jobs/server/core.py +836 -255
  189. sky/jobs/server/server.py +156 -115
  190. sky/jobs/server/utils.py +136 -0
  191. sky/jobs/state.py +2109 -706
  192. sky/jobs/utils.py +1306 -215
  193. sky/logs/__init__.py +21 -0
  194. sky/logs/agent.py +108 -0
  195. sky/logs/aws.py +243 -0
  196. sky/logs/gcp.py +91 -0
  197. sky/metrics/__init__.py +0 -0
  198. sky/metrics/utils.py +453 -0
  199. sky/models.py +78 -1
  200. sky/optimizer.py +164 -70
  201. sky/provision/__init__.py +90 -4
  202. sky/provision/aws/config.py +147 -26
  203. sky/provision/aws/instance.py +136 -50
  204. sky/provision/azure/instance.py +11 -6
  205. sky/provision/common.py +13 -1
  206. sky/provision/cudo/cudo_machine_type.py +1 -1
  207. sky/provision/cudo/cudo_utils.py +14 -8
  208. sky/provision/cudo/cudo_wrapper.py +72 -71
  209. sky/provision/cudo/instance.py +10 -6
  210. sky/provision/do/instance.py +10 -6
  211. sky/provision/do/utils.py +4 -3
  212. sky/provision/docker_utils.py +140 -33
  213. sky/provision/fluidstack/instance.py +13 -8
  214. sky/provision/gcp/__init__.py +1 -0
  215. sky/provision/gcp/config.py +301 -19
  216. sky/provision/gcp/constants.py +218 -0
  217. sky/provision/gcp/instance.py +36 -8
  218. sky/provision/gcp/instance_utils.py +18 -4
  219. sky/provision/gcp/volume_utils.py +247 -0
  220. sky/provision/hyperbolic/__init__.py +12 -0
  221. sky/provision/hyperbolic/config.py +10 -0
  222. sky/provision/hyperbolic/instance.py +437 -0
  223. sky/provision/hyperbolic/utils.py +373 -0
  224. sky/provision/instance_setup.py +101 -20
  225. sky/provision/kubernetes/__init__.py +5 -0
  226. sky/provision/kubernetes/config.py +9 -52
  227. sky/provision/kubernetes/constants.py +17 -0
  228. sky/provision/kubernetes/instance.py +919 -280
  229. sky/provision/kubernetes/manifests/fusermount-server-daemonset.yaml +1 -2
  230. sky/provision/kubernetes/network.py +27 -17
  231. sky/provision/kubernetes/network_utils.py +44 -43
  232. sky/provision/kubernetes/utils.py +1221 -534
  233. sky/provision/kubernetes/volume.py +343 -0
  234. sky/provision/lambda_cloud/instance.py +22 -16
  235. sky/provision/nebius/constants.py +50 -0
  236. sky/provision/nebius/instance.py +19 -6
  237. sky/provision/nebius/utils.py +237 -137
  238. sky/provision/oci/instance.py +10 -5
  239. sky/provision/paperspace/instance.py +10 -7
  240. sky/provision/paperspace/utils.py +1 -1
  241. sky/provision/primeintellect/__init__.py +10 -0
  242. sky/provision/primeintellect/config.py +11 -0
  243. sky/provision/primeintellect/instance.py +454 -0
  244. sky/provision/primeintellect/utils.py +398 -0
  245. sky/provision/provisioner.py +117 -36
  246. sky/provision/runpod/__init__.py +5 -0
  247. sky/provision/runpod/instance.py +27 -6
  248. sky/provision/runpod/utils.py +51 -18
  249. sky/provision/runpod/volume.py +214 -0
  250. sky/provision/scp/__init__.py +15 -0
  251. sky/provision/scp/config.py +93 -0
  252. sky/provision/scp/instance.py +707 -0
  253. sky/provision/seeweb/__init__.py +11 -0
  254. sky/provision/seeweb/config.py +13 -0
  255. sky/provision/seeweb/instance.py +812 -0
  256. sky/provision/shadeform/__init__.py +11 -0
  257. sky/provision/shadeform/config.py +12 -0
  258. sky/provision/shadeform/instance.py +351 -0
  259. sky/provision/shadeform/shadeform_utils.py +83 -0
  260. sky/provision/ssh/__init__.py +18 -0
  261. sky/provision/vast/instance.py +13 -8
  262. sky/provision/vast/utils.py +10 -7
  263. sky/provision/volume.py +164 -0
  264. sky/provision/vsphere/common/ssl_helper.py +1 -1
  265. sky/provision/vsphere/common/vapiconnect.py +2 -1
  266. sky/provision/vsphere/common/vim_utils.py +4 -4
  267. sky/provision/vsphere/instance.py +15 -10
  268. sky/provision/vsphere/vsphere_utils.py +17 -20
  269. sky/py.typed +0 -0
  270. sky/resources.py +845 -119
  271. sky/schemas/__init__.py +0 -0
  272. sky/schemas/api/__init__.py +0 -0
  273. sky/schemas/api/responses.py +227 -0
  274. sky/schemas/db/README +4 -0
  275. sky/schemas/db/env.py +90 -0
  276. sky/schemas/db/global_user_state/001_initial_schema.py +124 -0
  277. sky/schemas/db/global_user_state/002_add_workspace_to_cluster_history.py +35 -0
  278. sky/schemas/db/global_user_state/003_fix_initial_revision.py +61 -0
  279. sky/schemas/db/global_user_state/004_is_managed.py +34 -0
  280. sky/schemas/db/global_user_state/005_cluster_event.py +32 -0
  281. sky/schemas/db/global_user_state/006_provision_log.py +41 -0
  282. sky/schemas/db/global_user_state/007_cluster_event_request_id.py +34 -0
  283. sky/schemas/db/global_user_state/008_skylet_ssh_tunnel_metadata.py +34 -0
  284. sky/schemas/db/global_user_state/009_last_activity_and_launched_at.py +89 -0
  285. sky/schemas/db/global_user_state/010_save_ssh_key.py +66 -0
  286. sky/schemas/db/global_user_state/011_is_ephemeral.py +34 -0
  287. sky/schemas/db/kv_cache/001_initial_schema.py +29 -0
  288. sky/schemas/db/script.py.mako +28 -0
  289. sky/schemas/db/serve_state/001_initial_schema.py +67 -0
  290. sky/schemas/db/serve_state/002_yaml_content.py +34 -0
  291. sky/schemas/db/skypilot_config/001_initial_schema.py +30 -0
  292. sky/schemas/db/spot_jobs/001_initial_schema.py +97 -0
  293. sky/schemas/db/spot_jobs/002_cluster_pool.py +42 -0
  294. sky/schemas/db/spot_jobs/003_pool_hash.py +34 -0
  295. sky/schemas/db/spot_jobs/004_job_file_contents.py +42 -0
  296. sky/schemas/db/spot_jobs/005_logs_gc.py +38 -0
  297. sky/schemas/db/spot_jobs/006_controller_pid_started_at.py +34 -0
  298. sky/schemas/db/spot_jobs/007_config_file_content.py +34 -0
  299. sky/schemas/generated/__init__.py +0 -0
  300. sky/schemas/generated/autostopv1_pb2.py +36 -0
  301. sky/schemas/generated/autostopv1_pb2.pyi +43 -0
  302. sky/schemas/generated/autostopv1_pb2_grpc.py +146 -0
  303. sky/schemas/generated/jobsv1_pb2.py +86 -0
  304. sky/schemas/generated/jobsv1_pb2.pyi +254 -0
  305. sky/schemas/generated/jobsv1_pb2_grpc.py +542 -0
  306. sky/schemas/generated/managed_jobsv1_pb2.py +76 -0
  307. sky/schemas/generated/managed_jobsv1_pb2.pyi +278 -0
  308. sky/schemas/generated/managed_jobsv1_pb2_grpc.py +278 -0
  309. sky/schemas/generated/servev1_pb2.py +58 -0
  310. sky/schemas/generated/servev1_pb2.pyi +115 -0
  311. sky/schemas/generated/servev1_pb2_grpc.py +322 -0
  312. sky/serve/autoscalers.py +357 -5
  313. sky/serve/client/impl.py +310 -0
  314. sky/serve/client/sdk.py +47 -139
  315. sky/serve/client/sdk_async.py +130 -0
  316. sky/serve/constants.py +12 -9
  317. sky/serve/controller.py +68 -17
  318. sky/serve/load_balancer.py +106 -60
  319. sky/serve/load_balancing_policies.py +116 -2
  320. sky/serve/replica_managers.py +434 -249
  321. sky/serve/serve_rpc_utils.py +179 -0
  322. sky/serve/serve_state.py +569 -257
  323. sky/serve/serve_utils.py +775 -265
  324. sky/serve/server/core.py +66 -711
  325. sky/serve/server/impl.py +1093 -0
  326. sky/serve/server/server.py +21 -18
  327. sky/serve/service.py +192 -89
  328. sky/serve/service_spec.py +144 -20
  329. sky/serve/spot_placer.py +3 -0
  330. sky/server/auth/__init__.py +0 -0
  331. sky/server/auth/authn.py +50 -0
  332. sky/server/auth/loopback.py +38 -0
  333. sky/server/auth/oauth2_proxy.py +202 -0
  334. sky/server/common.py +478 -182
  335. sky/server/config.py +85 -23
  336. sky/server/constants.py +44 -6
  337. sky/server/daemons.py +295 -0
  338. sky/server/html/token_page.html +185 -0
  339. sky/server/metrics.py +160 -0
  340. sky/server/middleware_utils.py +166 -0
  341. sky/server/requests/executor.py +558 -138
  342. sky/server/requests/payloads.py +364 -24
  343. sky/server/requests/preconditions.py +21 -17
  344. sky/server/requests/process.py +112 -29
  345. sky/server/requests/request_names.py +121 -0
  346. sky/server/requests/requests.py +822 -226
  347. sky/server/requests/serializers/decoders.py +82 -31
  348. sky/server/requests/serializers/encoders.py +140 -22
  349. sky/server/requests/threads.py +117 -0
  350. sky/server/rest.py +455 -0
  351. sky/server/server.py +1309 -285
  352. sky/server/state.py +20 -0
  353. sky/server/stream_utils.py +327 -61
  354. sky/server/uvicorn.py +217 -3
  355. sky/server/versions.py +270 -0
  356. sky/setup_files/MANIFEST.in +11 -1
  357. sky/setup_files/alembic.ini +160 -0
  358. sky/setup_files/dependencies.py +139 -31
  359. sky/setup_files/setup.py +44 -42
  360. sky/sky_logging.py +114 -7
  361. sky/skylet/attempt_skylet.py +106 -24
  362. sky/skylet/autostop_lib.py +129 -8
  363. sky/skylet/configs.py +29 -20
  364. sky/skylet/constants.py +216 -25
  365. sky/skylet/events.py +101 -21
  366. sky/skylet/job_lib.py +345 -164
  367. sky/skylet/log_lib.py +297 -18
  368. sky/skylet/log_lib.pyi +44 -1
  369. sky/skylet/providers/ibm/node_provider.py +12 -8
  370. sky/skylet/providers/ibm/vpc_provider.py +13 -12
  371. sky/skylet/ray_patches/__init__.py +17 -3
  372. sky/skylet/ray_patches/autoscaler.py.diff +18 -0
  373. sky/skylet/ray_patches/cli.py.diff +19 -0
  374. sky/skylet/ray_patches/command_runner.py.diff +17 -0
  375. sky/skylet/ray_patches/log_monitor.py.diff +20 -0
  376. sky/skylet/ray_patches/resource_demand_scheduler.py.diff +32 -0
  377. sky/skylet/ray_patches/updater.py.diff +18 -0
  378. sky/skylet/ray_patches/worker.py.diff +41 -0
  379. sky/skylet/runtime_utils.py +21 -0
  380. sky/skylet/services.py +568 -0
  381. sky/skylet/skylet.py +72 -4
  382. sky/skylet/subprocess_daemon.py +104 -29
  383. sky/skypilot_config.py +506 -99
  384. sky/ssh_node_pools/__init__.py +1 -0
  385. sky/ssh_node_pools/core.py +135 -0
  386. sky/ssh_node_pools/server.py +233 -0
  387. sky/task.py +685 -163
  388. sky/templates/aws-ray.yml.j2 +11 -3
  389. sky/templates/azure-ray.yml.j2 +2 -1
  390. sky/templates/cudo-ray.yml.j2 +1 -0
  391. sky/templates/do-ray.yml.j2 +2 -1
  392. sky/templates/fluidstack-ray.yml.j2 +1 -0
  393. sky/templates/gcp-ray.yml.j2 +62 -1
  394. sky/templates/hyperbolic-ray.yml.j2 +68 -0
  395. sky/templates/ibm-ray.yml.j2 +2 -1
  396. sky/templates/jobs-controller.yaml.j2 +27 -24
  397. sky/templates/kubernetes-loadbalancer.yml.j2 +2 -0
  398. sky/templates/kubernetes-ray.yml.j2 +611 -50
  399. sky/templates/lambda-ray.yml.j2 +2 -1
  400. sky/templates/nebius-ray.yml.j2 +34 -12
  401. sky/templates/oci-ray.yml.j2 +1 -0
  402. sky/templates/paperspace-ray.yml.j2 +2 -1
  403. sky/templates/primeintellect-ray.yml.j2 +72 -0
  404. sky/templates/runpod-ray.yml.j2 +10 -1
  405. sky/templates/scp-ray.yml.j2 +4 -50
  406. sky/templates/seeweb-ray.yml.j2 +171 -0
  407. sky/templates/shadeform-ray.yml.j2 +73 -0
  408. sky/templates/sky-serve-controller.yaml.j2 +22 -2
  409. sky/templates/vast-ray.yml.j2 +1 -0
  410. sky/templates/vsphere-ray.yml.j2 +1 -0
  411. sky/templates/websocket_proxy.py +212 -37
  412. sky/usage/usage_lib.py +31 -15
  413. sky/users/__init__.py +0 -0
  414. sky/users/model.conf +15 -0
  415. sky/users/permission.py +397 -0
  416. sky/users/rbac.py +121 -0
  417. sky/users/server.py +720 -0
  418. sky/users/token_service.py +218 -0
  419. sky/utils/accelerator_registry.py +35 -5
  420. sky/utils/admin_policy_utils.py +84 -38
  421. sky/utils/annotations.py +38 -5
  422. sky/utils/asyncio_utils.py +78 -0
  423. sky/utils/atomic.py +1 -1
  424. sky/utils/auth_utils.py +153 -0
  425. sky/utils/benchmark_utils.py +60 -0
  426. sky/utils/cli_utils/status_utils.py +159 -86
  427. sky/utils/cluster_utils.py +31 -9
  428. sky/utils/command_runner.py +354 -68
  429. sky/utils/command_runner.pyi +93 -3
  430. sky/utils/common.py +35 -8
  431. sky/utils/common_utils.py +314 -91
  432. sky/utils/config_utils.py +74 -5
  433. sky/utils/context.py +403 -0
  434. sky/utils/context_utils.py +242 -0
  435. sky/utils/controller_utils.py +383 -89
  436. sky/utils/dag_utils.py +31 -12
  437. sky/utils/db/__init__.py +0 -0
  438. sky/utils/db/db_utils.py +485 -0
  439. sky/utils/db/kv_cache.py +149 -0
  440. sky/utils/db/migration_utils.py +137 -0
  441. sky/utils/directory_utils.py +12 -0
  442. sky/utils/env_options.py +13 -0
  443. sky/utils/git.py +567 -0
  444. sky/utils/git_clone.sh +460 -0
  445. sky/utils/infra_utils.py +195 -0
  446. sky/utils/kubernetes/cleanup-tunnel.sh +62 -0
  447. sky/utils/kubernetes/config_map_utils.py +133 -0
  448. sky/utils/kubernetes/create_cluster.sh +15 -29
  449. sky/utils/kubernetes/delete_cluster.sh +10 -7
  450. sky/utils/kubernetes/deploy_ssh_node_pools.py +1177 -0
  451. sky/utils/kubernetes/exec_kubeconfig_converter.py +22 -31
  452. sky/utils/kubernetes/generate_kind_config.py +6 -66
  453. sky/utils/kubernetes/generate_kubeconfig.sh +4 -1
  454. sky/utils/kubernetes/gpu_labeler.py +18 -8
  455. sky/utils/kubernetes/k8s_gpu_labeler_job.yaml +2 -1
  456. sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml +16 -16
  457. sky/utils/kubernetes/kubernetes_deploy_utils.py +284 -114
  458. sky/utils/kubernetes/rsync_helper.sh +11 -3
  459. sky/utils/kubernetes/ssh-tunnel.sh +379 -0
  460. sky/utils/kubernetes/ssh_utils.py +221 -0
  461. sky/utils/kubernetes_enums.py +8 -15
  462. sky/utils/lock_events.py +94 -0
  463. sky/utils/locks.py +416 -0
  464. sky/utils/log_utils.py +82 -107
  465. sky/utils/perf_utils.py +22 -0
  466. sky/utils/resource_checker.py +298 -0
  467. sky/utils/resources_utils.py +249 -32
  468. sky/utils/rich_utils.py +217 -39
  469. sky/utils/schemas.py +955 -160
  470. sky/utils/serialize_utils.py +16 -0
  471. sky/utils/status_lib.py +10 -0
  472. sky/utils/subprocess_utils.py +29 -15
  473. sky/utils/tempstore.py +70 -0
  474. sky/utils/thread_utils.py +91 -0
  475. sky/utils/timeline.py +26 -53
  476. sky/utils/ux_utils.py +84 -15
  477. sky/utils/validator.py +11 -1
  478. sky/utils/volume.py +165 -0
  479. sky/utils/yaml_utils.py +111 -0
  480. sky/volumes/__init__.py +13 -0
  481. sky/volumes/client/__init__.py +0 -0
  482. sky/volumes/client/sdk.py +150 -0
  483. sky/volumes/server/__init__.py +0 -0
  484. sky/volumes/server/core.py +270 -0
  485. sky/volumes/server/server.py +124 -0
  486. sky/volumes/volume.py +215 -0
  487. sky/workspaces/__init__.py +0 -0
  488. sky/workspaces/core.py +655 -0
  489. sky/workspaces/server.py +101 -0
  490. sky/workspaces/utils.py +56 -0
  491. sky_templates/README.md +3 -0
  492. sky_templates/__init__.py +3 -0
  493. sky_templates/ray/__init__.py +0 -0
  494. sky_templates/ray/start_cluster +183 -0
  495. sky_templates/ray/stop_cluster +75 -0
  496. skypilot_nightly-1.0.0.dev20251203.dist-info/METADATA +676 -0
  497. skypilot_nightly-1.0.0.dev20251203.dist-info/RECORD +611 -0
  498. {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/WHEEL +1 -1
  499. skypilot_nightly-1.0.0.dev20251203.dist-info/top_level.txt +2 -0
  500. sky/benchmark/benchmark_state.py +0 -256
  501. sky/benchmark/benchmark_utils.py +0 -641
  502. sky/clouds/service_catalog/constants.py +0 -7
  503. sky/dashboard/out/_next/static/GWvVBSCS7FmUiVmjaL1a7/_buildManifest.js +0 -1
  504. sky/dashboard/out/_next/static/chunks/236-2db3ee3fba33dd9e.js +0 -6
  505. sky/dashboard/out/_next/static/chunks/312-c3c8845990db8ffc.js +0 -15
  506. sky/dashboard/out/_next/static/chunks/37-0a572fe0dbb89c4d.js +0 -6
  507. sky/dashboard/out/_next/static/chunks/678-206dddca808e6d16.js +0 -59
  508. sky/dashboard/out/_next/static/chunks/845-9e60713e0c441abc.js +0 -1
  509. sky/dashboard/out/_next/static/chunks/979-7bf73a4c7cea0f5c.js +0 -1
  510. sky/dashboard/out/_next/static/chunks/fd9d1056-2821b0f0cabcd8bd.js +0 -1
  511. sky/dashboard/out/_next/static/chunks/framework-87d061ee6ed71b28.js +0 -33
  512. sky/dashboard/out/_next/static/chunks/main-app-241eb28595532291.js +0 -1
  513. sky/dashboard/out/_next/static/chunks/main-e0e2335212e72357.js +0 -1
  514. sky/dashboard/out/_next/static/chunks/pages/_app-e6b013bc3f77ad60.js +0 -1
  515. sky/dashboard/out/_next/static/chunks/pages/_error-1be831200e60c5c0.js +0 -1
  516. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-6ac338bc2239cb45.js +0 -1
  517. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-f383db7389368ea7.js +0 -1
  518. sky/dashboard/out/_next/static/chunks/pages/clusters-a93b93e10b8b074e.js +0 -1
  519. sky/dashboard/out/_next/static/chunks/pages/index-f9f039532ca8cbc4.js +0 -1
  520. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-1c519e1afc523dc9.js +0 -1
  521. sky/dashboard/out/_next/static/chunks/pages/jobs-a75029b67aab6a2e.js +0 -1
  522. sky/dashboard/out/_next/static/chunks/webpack-830f59b8404e96b8.js +0 -1
  523. sky/dashboard/out/_next/static/css/c6933bbb2ce7f4dd.css +0 -3
  524. sky/jobs/dashboard/dashboard.py +0 -223
  525. sky/jobs/dashboard/static/favicon.ico +0 -0
  526. sky/jobs/dashboard/templates/index.html +0 -831
  527. sky/jobs/server/dashboard_utils.py +0 -69
  528. sky/skylet/providers/scp/__init__.py +0 -2
  529. sky/skylet/providers/scp/config.py +0 -149
  530. sky/skylet/providers/scp/node_provider.py +0 -578
  531. sky/templates/kubernetes-ssh-jump.yml.j2 +0 -94
  532. sky/utils/db_utils.py +0 -100
  533. sky/utils/kubernetes/deploy_remote_cluster.sh +0 -308
  534. sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +0 -191
  535. skypilot_nightly-1.0.0.dev20250502.dist-info/METADATA +0 -361
  536. skypilot_nightly-1.0.0.dev20250502.dist-info/RECORD +0 -396
  537. skypilot_nightly-1.0.0.dev20250502.dist-info/top_level.txt +0 -1
  538. /sky/{clouds/service_catalog → catalog}/config.py +0 -0
  539. /sky/{benchmark → catalog/data_fetchers}/__init__.py +0 -0
  540. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_azure.py +0 -0
  541. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_fluidstack.py +0 -0
  542. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_ibm.py +0 -0
  543. /sky/{clouds/service_catalog/data_fetchers → client/cli}/__init__.py +0 -0
  544. /sky/dashboard/out/_next/static/{GWvVBSCS7FmUiVmjaL1a7 → 96_E2yl3QAiIJGOYCkSpB}/_ssgManifest.js +0 -0
  545. {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/entry_points.txt +0 -0
  546. {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/licenses/LICENSE +0 -0
@@ -8,11 +8,18 @@ This file is imported by setup.py, so:
8
8
  import sys
9
9
  from typing import Dict, List
10
10
 
11
+ clouds_with_ray = ['ibm', 'docker', 'scp']
12
+
11
13
  install_requires = [
12
14
  'wheel<0.46.0', # https://github.com/skypilot-org/skypilot/issues/5153
15
+ 'setuptools', # TODO: match version to pyproject.toml once #5153 is fixed
16
+ 'pip',
13
17
  'cachetools',
14
18
  # NOTE: ray requires click>=7.0.
15
- 'click >= 7.0',
19
+ # click 8.2.0 has a bug in parsing the command line arguments:
20
+ # https://github.com/pallets/click/issues/2894
21
+ # TODO(aylei): remove this once the bug is fixed in click.
22
+ 'click >= 7.0, < 8.2.0',
16
23
  'colorama',
17
24
  'cryptography',
18
25
  # Jinja has a bug in older versions because of the lack of pinning
@@ -32,7 +39,8 @@ install_requires = [
32
39
  # Light weight requirement, can be replaced with "typing" once
33
40
  # we deprecate Python 3.7 (this will take a while).
34
41
  'typing_extensions',
35
- 'filelock >= 3.6.0',
42
+ # filelock 3.15.0 or higher is required for async file locking.
43
+ 'filelock >= 3.15.0',
36
44
  'packaging',
37
45
  'psutil',
38
46
  'pulp',
@@ -40,9 +48,18 @@ install_requires = [
40
48
  # (https://github.com/yaml/pyyaml/issues/601)
41
49
  # <= 3.13 may encounter https://github.com/ultralytics/yolov5/issues/414
42
50
  'pyyaml > 3.13, != 5.4.*',
51
+ 'ijson',
52
+ 'orjson',
43
53
  'requests',
54
+ # SkyPilot inherits from uvicorn.Server to customize the behavior of
55
+ # uvicorn, so we need to pin uvicorn version to avoid potential break
56
+ # changes.
57
+ # Notes for current version check:
58
+ # - uvicorn 0.33.0 is the latest version that supports Python 3.8
59
+ # - uvicorn 0.36.0 removes setup_event_loop thus breaks SkyPilot's custom
60
+ # behavior.
61
+ 'uvicorn[standard] >=0.33.0, <0.36.0',
44
62
  'fastapi',
45
- 'uvicorn[standard]',
46
63
  # Some pydantic versions are not compatible with ray. Adopted from ray's
47
64
  # setup.py:
48
65
  # https://github.com/ray-project/ray/blob/ray-2.9.3/python/setup.py#L254
@@ -53,27 +70,68 @@ install_requires = [
53
70
  'aiofiles',
54
71
  'httpx',
55
72
  'setproctitle',
73
+ 'sqlalchemy',
74
+ 'psycopg2-binary',
75
+ 'aiosqlite',
76
+ 'asyncpg',
77
+ # TODO(hailong): These three dependencies should be removed after we make
78
+ # the client-side actually not importing them.
79
+ 'casbin',
80
+ 'sqlalchemy_adapter',
81
+ # Required for API server metrics
82
+ 'prometheus_client>=0.8.0',
83
+ 'passlib',
84
+ 'bcrypt==4.0.1',
85
+ 'pyjwt',
86
+ 'gitpython',
87
+ 'types-paramiko',
88
+ 'alembic',
89
+ 'aiohttp',
90
+ 'anyio',
91
+ ]
92
+
93
+ # See requirements-dev.txt for the version of grpc and protobuf
94
+ # used to generate the code during development.
95
+
96
+ # The grpc version at runtime has to be newer than the version
97
+ # used to generate the code.
98
+ GRPC = 'grpcio>=1.63.0'
99
+ # >= 5.26.1 because the runtime version can't be older than the version
100
+ # used to generate the code.
101
+ # < 7.0.0 because code generated for a major version V will be supported by
102
+ # protobuf runtimes of version V and V+1.
103
+ # https://protobuf.dev/support/cross-version-runtime-guarantee
104
+ PROTOBUF = 'protobuf>=5.26.1, < 7.0.0'
105
+
106
+ server_dependencies = [
107
+ # TODO: Some of these dependencies are also specified in install_requires,
108
+ # so they are redundant here. We should figure out if they are only needed
109
+ # on the server (should remove from install_requires), or if they are needed
110
+ # on the client (should remove from here).
111
+ 'casbin',
112
+ 'sqlalchemy_adapter',
113
+ 'passlib',
114
+ 'pyjwt',
115
+ 'aiohttp',
116
+ 'anyio',
117
+ GRPC,
118
+ PROTOBUF,
119
+ 'aiosqlite',
120
+ 'greenlet',
56
121
  ]
57
122
 
58
123
  local_ray = [
59
124
  # Lower version of ray will cause dependency conflict for
60
125
  # click/grpcio/protobuf.
61
- # Excluded 2.6.0 as it has a bug in the cluster launcher:
126
+ # Ray 2.6.1+ resolved cluster launcher bugs
127
+ # and grpcio issues on Apple Silicon.
62
128
  # https://github.com/ray-project/ray/releases/tag/ray-2.6.1
63
- 'ray[default] >= 2.2.0, != 2.6.0',
129
+ 'ray[default] >= 2.6.1',
64
130
  ]
65
131
 
66
132
  remote = [
67
- # Adopted from ray's setup.py:
68
- # https://github.com/ray-project/ray/blob/ray-2.9.3/python/setup.py#L251-L252
69
- # SkyPilot: != 1.48.0 is required to avoid the error where ray dashboard
70
- # fails to start when ray start is called (#2054).
71
- # Tracking issue: https://github.com/ray-project/ray/issues/30984
72
- 'grpcio >= 1.32.0, != 1.48.0; python_version < \'3.10\'',
73
- 'grpcio >= 1.42.0, != 1.48.0; python_version >= \'3.10\'',
74
- # Adopted from ray's setup.py:
75
- # https://github.com/ray-project/ray/blob/ray-2.9.3/python/setup.py#L343
76
- 'protobuf >= 3.15.3, != 3.19.5',
133
+ GRPC,
134
+ PROTOBUF,
77
135
  ]
78
136
 
79
137
  # NOTE: Change the templates/jobs-controller.yaml.j2 file if any of the
@@ -90,11 +148,19 @@ aws_dependencies = [
90
148
  'colorama < 0.4.5',
91
149
  ]
92
150
 
151
+ # Kubernetes 32.0.0 has an authentication bug:
152
+ # https://github.com/kubernetes-client/python/issues/2333
153
+ kubernetes_dependencies = [
154
+ 'kubernetes>=20.0.0,!=32.0.0',
155
+ 'websockets',
156
+ 'python-dateutil',
157
+ ]
158
+
93
159
  # azure-cli cannot be installed normally by uv, so we need to work around it in
94
160
  # a few places.
95
161
  AZURE_CLI = 'azure-cli>=2.65.0'
96
162
 
97
- extras_require: Dict[str, List[str]] = {
163
+ cloud_dependencies: Dict[str, List[str]] = {
98
164
  'aws': aws_dependencies,
99
165
  # TODO(zongheng): azure-cli is huge and takes a long time to install.
100
166
  # Tracked in: https://github.com/Azure/azure-cli/issues/7387
@@ -109,11 +175,18 @@ extras_require: Dict[str, List[str]] = {
109
175
  'azure-mgmt-compute>=33.0.0',
110
176
  'azure-storage-blob>=12.23.1',
111
177
  'msgraph-sdk',
112
- ] + local_ray,
178
+ 'msrestazure',
179
+ ],
113
180
  # We need google-api-python-client>=2.69.0 to enable 'discardLocalSsd'
114
181
  # parameter for stopping instances. Reference:
115
182
  # https://github.com/googleapis/google-api-python-client/commit/f6e9d3869ed605b06f7cbf2e8cf2db25108506e6
116
- 'gcp': ['google-api-python-client>=2.69.0', 'google-cloud-storage'],
183
+ 'gcp': [
184
+ 'google-api-python-client>=2.69.0',
185
+ 'google-cloud-storage',
186
+ # see https://github.com/conda/conda/issues/13619
187
+ # see https://github.com/googleapis/google-api-python-client/issues/2554
188
+ 'pyopenssl >= 23.2.0, <24.3.0',
189
+ ],
117
190
  'ibm': [
118
191
  'ibm-cloud-sdk-core',
119
192
  'ibm-vpc',
@@ -123,17 +196,23 @@ extras_require: Dict[str, List[str]] = {
123
196
  'docker': ['docker'] + local_ray,
124
197
  'lambda': [], # No dependencies needed for lambda
125
198
  'cloudflare': aws_dependencies,
199
+ 'coreweave': aws_dependencies + kubernetes_dependencies,
126
200
  'scp': local_ray,
127
- 'oci': ['oci'] + local_ray,
128
- # Kubernetes 32.0.0 has an authentication bug: https://github.com/kubernetes-client/python/issues/2333 # pylint: disable=line-too-long
129
- 'kubernetes': ['kubernetes>=20.0.0,!=32.0.0', 'websockets'],
130
- 'remote': remote,
201
+ 'oci': ['oci'],
202
+ 'kubernetes': kubernetes_dependencies,
203
+ 'ssh': kubernetes_dependencies,
131
204
  # For the container registry auth api. Reference:
132
205
  # https://github.com/runpod/runpod-python/releases/tag/1.6.1
133
- 'runpod': ['runpod>=1.6.1'],
206
+ # RunPod needs a TOML parser to read ~/.runpod/config.toml. On Python 3.11+
207
+ # stdlib provides tomllib; on lower versions we depend on tomli explicitly.
208
+ # Instead of installing tomli conditionally, we install it explicitly.
209
+ # This is because the conditional installation of tomli does not work
210
+ # with controller package installation code.
211
+ 'runpod': ['runpod>=1.6.1', 'tomli'],
134
212
  'fluidstack': [], # No dependencies needed for fluidstack
135
213
  'cudo': ['cudo-compute>=0.1.10'],
136
214
  'paperspace': [], # No dependencies needed for paperspace
215
+ 'primeintellect': [], # No dependencies needed for primeintellect
137
216
  'do': ['pydo>=0.3.0', 'azure-core>=1.24.0', 'azure-common'],
138
217
  'vast': ['vastai-sdk>=0.1.12'],
139
218
  'vsphere': [
@@ -146,14 +225,43 @@ extras_require: Dict[str, List[str]] = {
146
225
  # 'vsphere-automation-sdk @ git+https://github.com/vmware/vsphere-automation-sdk-python.git@v8.0.1.0' pylint: disable=line-too-long
147
226
  ],
148
227
  'nebius': [
149
- 'nebius>=0.2.0',
150
- ] + aws_dependencies
228
+ # Nebius requires grpcio and protobuf, so we need to include
229
+ # our constraints here.
230
+ 'nebius>=0.3.12',
231
+ GRPC,
232
+ PROTOBUF,
233
+ ] + aws_dependencies,
234
+ 'hyperbolic': [], # No dependencies needed for hyperbolic
235
+ 'seeweb': ['ecsapi==0.4.0'],
236
+ 'shadeform': [], # No dependencies needed for shadeform
151
237
  }
152
238
 
153
- # Nebius needs python3.10. If python 3.9 [all] will not install nebius
239
+ # Calculate which clouds should be included in the [all] installation.
240
+ clouds_for_all = set(cloud_dependencies)
241
+
154
242
  if sys.version_info < (3, 10):
155
- filtered_keys = [k for k in extras_require if k != 'nebius']
156
- extras_require['all'] = sum(
157
- [v for k, v in extras_require.items() if k != 'nebius'], [])
158
- else:
159
- extras_require['all'] = sum(extras_require.values(), [])
243
+ # Nebius needs python3.10. If python 3.9 [all] will not install nebius
244
+ clouds_for_all.remove('nebius')
245
+ clouds_for_all.remove('seeweb')
246
+
247
+ if sys.version_info >= (3, 12):
248
+ # The version of ray we use does not work with >= 3.12, so avoid clouds
249
+ # that require ray.
250
+ clouds_for_all -= set(clouds_with_ray)
251
+ # vast requires setuptools==51.1.1 which will not work with python >= 3.12
252
+ # TODO: Remove once https://github.com/vast-ai/vast-sdk/pull/6 is released
253
+ clouds_for_all.remove('vast')
254
+
255
+ cloud_extras = {
256
+ cloud: dependencies + server_dependencies
257
+ for cloud, dependencies in cloud_dependencies.items()
258
+ }
259
+
260
+ extras_require: Dict[str, List[str]] = {
261
+ # Include server_dependencies with each cloud.
262
+ **cloud_extras,
263
+ 'all': list(set().union(*[cloud_extras[cloud] for cloud in clouds_for_all])
264
+ ),
265
+ 'remote': remote,
266
+ 'server': server_dependencies,
267
+ }
sky/setup_files/setup.py CHANGED
@@ -148,45 +148,47 @@ if os.path.exists(readme_filepath):
148
148
  long_description = io.open(readme_filepath, 'r', encoding='utf-8').read()
149
149
  long_description = parse_readme(long_description)
150
150
 
151
- atexit.register(revert_commit_hash)
152
- replace_commit_hash()
153
-
154
- setuptools.setup(
155
- # NOTE: this affects the package.whl wheel name. When changing this (if
156
- # ever), you must grep for '.whl' and change all corresponding wheel paths
157
- # (templates/*.j2 and wheel_utils.py).
158
- name='skypilot-nightly',
159
- version=find_version(),
160
- packages=setuptools.find_packages(),
161
- author='SkyPilot Team',
162
- license='Apache 2.0',
163
- readme='README.md',
164
- description='SkyPilot: Run AI on Any Infra — Unified, Faster, Cheaper.',
165
- long_description=long_description,
166
- long_description_content_type='text/markdown',
167
- setup_requires=['wheel'],
168
- requires_python='>=3.7',
169
- install_requires=dependencies['install_requires'],
170
- extras_require=dependencies['extras_require'],
171
- entry_points={
172
- 'console_scripts': ['sky = sky.cli:cli'],
173
- },
174
- include_package_data=True,
175
- classifiers=[
176
- 'Programming Language :: Python :: 3.7',
177
- 'Programming Language :: Python :: 3.8',
178
- 'Programming Language :: Python :: 3.9',
179
- 'Programming Language :: Python :: 3.10',
180
- 'Programming Language :: Python :: 3.11',
181
- 'License :: OSI Approved :: Apache Software License',
182
- 'Operating System :: OS Independent',
183
- 'Topic :: Software Development :: Libraries :: Python Modules',
184
- 'Topic :: System :: Distributed Computing',
185
- ],
186
- project_urls={
187
- 'Homepage': 'https://github.com/skypilot-org/skypilot',
188
- 'Issues': 'https://github.com/skypilot-org/skypilot/issues',
189
- 'Discussion': 'https://github.com/skypilot-org/skypilot/discussions',
190
- 'Documentation': 'https://docs.skypilot.co/',
191
- },
192
- )
151
+ if __name__ == '__main__':
152
+ atexit.register(revert_commit_hash)
153
+ replace_commit_hash()
154
+ setuptools.setup(
155
+ # NOTE: this affects the package.whl wheel name. When changing this (if
156
+ # ever), you must grep for '.whl' and change all corresponding wheel paths
157
+ # (templates/*.j2 and wheel_utils.py).
158
+ name='skypilot-nightly',
159
+ version=find_version(),
160
+ packages=setuptools.find_packages(),
161
+ author='SkyPilot Team',
162
+ license='Apache 2.0',
163
+ readme='README.md',
164
+ description='SkyPilot: Run AI on Any Infra — Unified, Faster, Cheaper.',
165
+ long_description=long_description,
166
+ long_description_content_type='text/markdown',
167
+ setup_requires=['wheel'],
168
+ requires_python='>=3.7',
169
+ install_requires=dependencies['install_requires'],
170
+ extras_require=dependencies['extras_require'],
171
+ entry_points={
172
+ 'console_scripts': ['sky = sky.cli:cli'],
173
+ },
174
+ include_package_data=True,
175
+ classifiers=[
176
+ 'Programming Language :: Python :: 3.7',
177
+ 'Programming Language :: Python :: 3.8',
178
+ 'Programming Language :: Python :: 3.9',
179
+ 'Programming Language :: Python :: 3.10',
180
+ 'Programming Language :: Python :: 3.11',
181
+ 'Programming Language :: Python :: 3.12',
182
+ 'Programming Language :: Python :: 3.13',
183
+ 'License :: OSI Approved :: Apache Software License',
184
+ 'Operating System :: OS Independent',
185
+ 'Topic :: Software Development :: Libraries :: Python Modules',
186
+ 'Topic :: System :: Distributed Computing',
187
+ ],
188
+ project_urls={
189
+ 'Homepage': 'https://github.com/skypilot-org/skypilot',
190
+ 'Issues': 'https://github.com/skypilot-org/skypilot/issues',
191
+ 'Discussion': 'https://github.com/skypilot-org/skypilot/discussions',
192
+ 'Documentation': 'https://docs.skypilot.co/',
193
+ },
194
+ )
sky/sky_logging.py CHANGED
@@ -10,6 +10,7 @@ import threading
10
10
  import colorama
11
11
 
12
12
  from sky.skylet import constants
13
+ from sky.utils import context
13
14
  from sky.utils import env_options
14
15
  from sky.utils import rich_utils
15
16
 
@@ -18,6 +19,9 @@ _FORMAT = '%(levelname).1s %(asctime)s %(filename)s:%(lineno)d] %(message)s'
18
19
  _DATE_FORMAT = '%m-%d %H:%M:%S'
19
20
  _SENSITIVE_LOGGER = ['sky.provisioner', 'sky.optimizer']
20
21
 
22
+ _DEBUG_LOG_DIR = os.path.expanduser(
23
+ os.path.join(constants.SKY_LOGS_DIRECTORY, 'request_debug'))
24
+
21
25
  DEBUG = logging.DEBUG
22
26
  INFO = logging.INFO
23
27
  WARNING = logging.WARNING
@@ -47,6 +51,43 @@ class NewLineFormatter(logging.Formatter):
47
51
  return msg
48
52
 
49
53
 
54
+ class EnvAwareHandler(rich_utils.RichSafeStreamHandler):
55
+ """A handler that awares environment variables.
56
+
57
+ This handler dynamically reflects the log level from environment variables.
58
+ """
59
+
60
+ def __init__(self, stream=None, level=logging.NOTSET, sensitive=False):
61
+ super().__init__(stream)
62
+ self.level = level
63
+ self._sensitive = sensitive
64
+
65
+ @property
66
+ def level(self):
67
+ # Only refresh log level if we are in a context, since the log level
68
+ # has already been reloaded eagerly in multi-processing. Refresh again
69
+ # is a no-op and can be avoided.
70
+ # TODO(aylei): unify the mechanism for coroutine context and
71
+ # multi-processing.
72
+ if context.get() is not None:
73
+ if self._sensitive:
74
+ # For sensitive logger, suppress debug log despite the
75
+ # SKYPILOT_DEBUG env var if SUPPRESS_SENSITIVE_LOG is set
76
+ if env_options.Options.SUPPRESS_SENSITIVE_LOG.get():
77
+ return logging.INFO
78
+ if env_options.Options.SHOW_DEBUG_INFO.get():
79
+ return logging.DEBUG
80
+ else:
81
+ return self._level
82
+ else:
83
+ return self._level
84
+
85
+ @level.setter
86
+ def level(self, level):
87
+ # pylint: disable=protected-access
88
+ self._level = logging._checkLevel(level) # type: ignore[attr-defined]
89
+
90
+
50
91
  _root_logger = logging.getLogger('sky')
51
92
  _default_handler = None
52
93
  _logging_config = threading.local()
@@ -67,8 +108,7 @@ def _setup_logger():
67
108
  _root_logger.setLevel(logging.DEBUG)
68
109
  global _default_handler
69
110
  if _default_handler is None:
70
- _default_handler = rich_utils.RichSafeStreamHandler(sys.stdout)
71
- _default_handler.flush = sys.stdout.flush # type: ignore
111
+ _default_handler = EnvAwareHandler(sys.stdout)
72
112
  if env_options.Options.SHOW_DEBUG_INFO.get():
73
113
  _default_handler.setLevel(logging.DEBUG)
74
114
  else:
@@ -87,8 +127,7 @@ def _setup_logger():
87
127
  # for certain loggers.
88
128
  for logger_name in _SENSITIVE_LOGGER:
89
129
  logger = logging.getLogger(logger_name)
90
- handler_to_logger = rich_utils.RichSafeStreamHandler(sys.stdout)
91
- handler_to_logger.flush = sys.stdout.flush # type: ignore
130
+ handler_to_logger = EnvAwareHandler(sys.stdout, sensitive=True)
92
131
  logger.addHandler(handler_to_logger)
93
132
  logger.setLevel(logging.INFO)
94
133
  if _show_logging_prefix():
@@ -107,7 +146,8 @@ def reload_logger():
107
146
  such as SKYPILOT_DEBUG.
108
147
  """
109
148
  global _default_handler
110
- _root_logger.removeHandler(_default_handler)
149
+ if _default_handler is not None:
150
+ _root_logger.removeHandler(_default_handler)
111
151
  _default_handler = None
112
152
  _setup_logger()
113
153
 
@@ -133,17 +173,59 @@ def set_logging_level(logger: str, level: int):
133
173
  logger.setLevel(original_level)
134
174
 
135
175
 
176
+ @contextlib.contextmanager
177
+ def set_sky_logging_levels(level: int):
178
+ """Set the logging level for all loggers."""
179
+ # Turn off logger
180
+ previous_levels = {}
181
+ for logger_name in logging.Logger.manager.loggerDict:
182
+ if logger_name.startswith('sky'):
183
+ logger = logging.getLogger(logger_name)
184
+ previous_levels[logger_name] = logger.level
185
+ logger.setLevel(level)
186
+ if level == logging.DEBUG:
187
+ previous_show_debug_info = env_options.Options.SHOW_DEBUG_INFO.get()
188
+ os.environ[env_options.Options.SHOW_DEBUG_INFO.env_key] = '1'
189
+ try:
190
+ yield
191
+ finally:
192
+ # Restore logger
193
+ for logger_name in logging.Logger.manager.loggerDict:
194
+ if logger_name.startswith('sky'):
195
+ logger = logging.getLogger(logger_name)
196
+ try:
197
+ logger.setLevel(previous_levels[logger_name])
198
+ except KeyError:
199
+ # New loggers maybe initialized after the context manager,
200
+ # no need to restore the level.
201
+ pass
202
+ if level == logging.DEBUG and not previous_show_debug_info:
203
+ os.environ.pop(env_options.Options.SHOW_DEBUG_INFO.env_key)
204
+
205
+
136
206
  def logging_enabled(logger: logging.Logger, level: int) -> bool:
137
- return logger.level <= level
207
+ # Note(cooperc): This may return true in a lot of cases where we won't
208
+ # actually log anything, since the log level is set on the handler in
209
+ # _setup_logger.
210
+ return logger.getEffectiveLevel() <= level
138
211
 
139
212
 
140
213
  @contextlib.contextmanager
141
- def silent():
214
+ def silent(should_silence: bool = True):
142
215
  """Make all sky_logging.print() and logger.{info, warning...} silent.
143
216
 
144
217
  We preserve the ERROR level logging, so that errors are
145
218
  still printed.
219
+
220
+ Args:
221
+ should_silence: Whether to actually suppress the logging. If False, this
222
+ is a no-op context manager. Provided for convenience when we want to
223
+ suppress logging conditionally.
146
224
  """
225
+ if not should_silence:
226
+ yield
227
+ return
228
+
147
229
  global print
148
230
  previous_level = _root_logger.level
149
231
  previous_is_silent = is_silent()
@@ -183,3 +265,28 @@ def generate_tmp_logging_file_path(file_name: str) -> str:
183
265
  log_path = os.path.expanduser(os.path.join(log_dir, file_name))
184
266
 
185
267
  return log_path
268
+
269
+
270
+ @contextlib.contextmanager
271
+ def add_debug_log_handler(request_id: str):
272
+ if os.getenv(constants.ENV_VAR_ENABLE_REQUEST_DEBUG_LOGGING) != 'true':
273
+ yield
274
+ return
275
+
276
+ os.makedirs(_DEBUG_LOG_DIR, exist_ok=True)
277
+ log_path = os.path.join(_DEBUG_LOG_DIR, f'{request_id}.log')
278
+ try:
279
+ debug_log_handler = logging.FileHandler(log_path)
280
+ debug_log_handler.setFormatter(FORMATTER)
281
+ debug_log_handler.setLevel(logging.DEBUG)
282
+ _root_logger.addHandler(debug_log_handler)
283
+ # sky.provision sets up its own logger/handler with propogate=False,
284
+ # so add it there too.
285
+ provision_logger = logging.getLogger('sky.provision')
286
+ provision_logger.addHandler(debug_log_handler)
287
+ provision_logger.setLevel(logging.DEBUG)
288
+ yield
289
+ finally:
290
+ _root_logger.removeHandler(debug_log_handler)
291
+ provision_logger.removeHandler(debug_log_handler)
292
+ debug_log_handler.close()