skypilot-nightly 1.0.0.dev20250509__py3-none-any.whl → 1.0.0.dev20251107__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of skypilot-nightly might be problematic. Click here for more details.

Files changed (512) hide show
  1. sky/__init__.py +22 -6
  2. sky/adaptors/aws.py +25 -7
  3. sky/adaptors/common.py +24 -1
  4. sky/adaptors/coreweave.py +278 -0
  5. sky/adaptors/do.py +8 -2
  6. sky/adaptors/hyperbolic.py +8 -0
  7. sky/adaptors/kubernetes.py +149 -18
  8. sky/adaptors/nebius.py +170 -17
  9. sky/adaptors/primeintellect.py +1 -0
  10. sky/adaptors/runpod.py +68 -0
  11. sky/adaptors/seeweb.py +167 -0
  12. sky/adaptors/shadeform.py +89 -0
  13. sky/admin_policy.py +187 -4
  14. sky/authentication.py +179 -225
  15. sky/backends/__init__.py +4 -2
  16. sky/backends/backend.py +22 -9
  17. sky/backends/backend_utils.py +1299 -380
  18. sky/backends/cloud_vm_ray_backend.py +1715 -518
  19. sky/backends/docker_utils.py +1 -1
  20. sky/backends/local_docker_backend.py +11 -6
  21. sky/backends/wheel_utils.py +37 -9
  22. sky/{clouds/service_catalog → catalog}/__init__.py +21 -19
  23. sky/{clouds/service_catalog → catalog}/aws_catalog.py +27 -8
  24. sky/{clouds/service_catalog → catalog}/azure_catalog.py +10 -7
  25. sky/{clouds/service_catalog → catalog}/common.py +89 -48
  26. sky/{clouds/service_catalog → catalog}/cudo_catalog.py +8 -5
  27. sky/{clouds/service_catalog → catalog}/data_fetchers/analyze.py +1 -1
  28. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_aws.py +30 -40
  29. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_cudo.py +38 -38
  30. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_gcp.py +42 -15
  31. sky/catalog/data_fetchers/fetch_hyperbolic.py +136 -0
  32. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_lambda_cloud.py +1 -0
  33. sky/catalog/data_fetchers/fetch_nebius.py +335 -0
  34. sky/catalog/data_fetchers/fetch_runpod.py +698 -0
  35. sky/catalog/data_fetchers/fetch_seeweb.py +329 -0
  36. sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
  37. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vast.py +1 -1
  38. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vsphere.py +1 -1
  39. sky/{clouds/service_catalog → catalog}/do_catalog.py +5 -2
  40. sky/{clouds/service_catalog → catalog}/fluidstack_catalog.py +6 -3
  41. sky/{clouds/service_catalog → catalog}/gcp_catalog.py +41 -15
  42. sky/catalog/hyperbolic_catalog.py +136 -0
  43. sky/{clouds/service_catalog → catalog}/ibm_catalog.py +9 -6
  44. sky/{clouds/service_catalog → catalog}/kubernetes_catalog.py +36 -24
  45. sky/{clouds/service_catalog → catalog}/lambda_catalog.py +9 -6
  46. sky/{clouds/service_catalog → catalog}/nebius_catalog.py +9 -7
  47. sky/{clouds/service_catalog → catalog}/oci_catalog.py +9 -6
  48. sky/{clouds/service_catalog → catalog}/paperspace_catalog.py +5 -2
  49. sky/catalog/primeintellect_catalog.py +95 -0
  50. sky/{clouds/service_catalog → catalog}/runpod_catalog.py +11 -4
  51. sky/{clouds/service_catalog → catalog}/scp_catalog.py +9 -6
  52. sky/catalog/seeweb_catalog.py +184 -0
  53. sky/catalog/shadeform_catalog.py +165 -0
  54. sky/catalog/ssh_catalog.py +167 -0
  55. sky/{clouds/service_catalog → catalog}/vast_catalog.py +6 -3
  56. sky/{clouds/service_catalog → catalog}/vsphere_catalog.py +5 -2
  57. sky/check.py +491 -203
  58. sky/cli.py +5 -6005
  59. sky/client/{cli.py → cli/command.py} +2477 -1885
  60. sky/client/cli/deprecation_utils.py +99 -0
  61. sky/client/cli/flags.py +359 -0
  62. sky/client/cli/table_utils.py +320 -0
  63. sky/client/common.py +70 -32
  64. sky/client/oauth.py +82 -0
  65. sky/client/sdk.py +1203 -297
  66. sky/client/sdk_async.py +833 -0
  67. sky/client/service_account_auth.py +47 -0
  68. sky/cloud_stores.py +73 -0
  69. sky/clouds/__init__.py +13 -0
  70. sky/clouds/aws.py +358 -93
  71. sky/clouds/azure.py +105 -83
  72. sky/clouds/cloud.py +127 -36
  73. sky/clouds/cudo.py +68 -50
  74. sky/clouds/do.py +66 -48
  75. sky/clouds/fluidstack.py +63 -44
  76. sky/clouds/gcp.py +339 -110
  77. sky/clouds/hyperbolic.py +293 -0
  78. sky/clouds/ibm.py +70 -49
  79. sky/clouds/kubernetes.py +563 -162
  80. sky/clouds/lambda_cloud.py +74 -54
  81. sky/clouds/nebius.py +206 -80
  82. sky/clouds/oci.py +88 -66
  83. sky/clouds/paperspace.py +61 -44
  84. sky/clouds/primeintellect.py +317 -0
  85. sky/clouds/runpod.py +164 -74
  86. sky/clouds/scp.py +89 -83
  87. sky/clouds/seeweb.py +466 -0
  88. sky/clouds/shadeform.py +400 -0
  89. sky/clouds/ssh.py +263 -0
  90. sky/clouds/utils/aws_utils.py +10 -4
  91. sky/clouds/utils/gcp_utils.py +87 -11
  92. sky/clouds/utils/oci_utils.py +38 -14
  93. sky/clouds/utils/scp_utils.py +177 -124
  94. sky/clouds/vast.py +99 -77
  95. sky/clouds/vsphere.py +51 -40
  96. sky/core.py +349 -139
  97. sky/dag.py +15 -0
  98. sky/dashboard/out/404.html +1 -1
  99. sky/dashboard/out/_next/static/chunks/1141-e6aa9ab418717c59.js +11 -0
  100. sky/dashboard/out/_next/static/chunks/1272-1ef0bf0237faccdb.js +1 -0
  101. sky/dashboard/out/_next/static/chunks/1871-74503c8e80fd253b.js +6 -0
  102. sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +1 -0
  103. sky/dashboard/out/_next/static/chunks/2350.fab69e61bac57b23.js +1 -0
  104. sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +15 -0
  105. sky/dashboard/out/_next/static/chunks/2755.fff53c4a3fcae910.js +26 -0
  106. sky/dashboard/out/_next/static/chunks/3294.72362fa129305b19.js +1 -0
  107. sky/dashboard/out/_next/static/chunks/3785.ad6adaa2a0fa9768.js +1 -0
  108. sky/dashboard/out/_next/static/chunks/3850-ff4a9a69d978632b.js +1 -0
  109. sky/dashboard/out/_next/static/chunks/3937.210053269f121201.js +1 -0
  110. sky/dashboard/out/_next/static/chunks/4725.a830b5c9e7867c92.js +1 -0
  111. sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +15 -0
  112. sky/dashboard/out/_next/static/chunks/5739-d67458fcb1386c92.js +8 -0
  113. sky/dashboard/out/_next/static/chunks/6130-2be46d70a38f1e82.js +1 -0
  114. sky/dashboard/out/_next/static/chunks/616-3d59f75e2ccf9321.js +39 -0
  115. sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +13 -0
  116. sky/dashboard/out/_next/static/chunks/6601-06114c982db410b6.js +1 -0
  117. sky/dashboard/out/_next/static/chunks/6856-ef8ba11f96d8c4a3.js +1 -0
  118. sky/dashboard/out/_next/static/chunks/6989-01359c57e018caa4.js +1 -0
  119. sky/dashboard/out/_next/static/chunks/6990-32b6e2d3822301fa.js +1 -0
  120. sky/dashboard/out/_next/static/chunks/7359-c8d04e06886000b3.js +30 -0
  121. sky/dashboard/out/_next/static/chunks/7411-b15471acd2cba716.js +41 -0
  122. sky/dashboard/out/_next/static/chunks/7615-3301e838e5f25772.js +1 -0
  123. sky/dashboard/out/_next/static/chunks/8640.5b9475a2d18c5416.js +16 -0
  124. sky/dashboard/out/_next/static/chunks/8969-1e4613c651bf4051.js +1 -0
  125. sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +6 -0
  126. sky/dashboard/out/_next/static/chunks/9353-cff34f7e773b2e2b.js +1 -0
  127. sky/dashboard/out/_next/static/chunks/9360.7310982cf5a0dc79.js +31 -0
  128. sky/dashboard/out/_next/static/chunks/9847.3aaca6bb33455140.js +30 -0
  129. sky/dashboard/out/_next/static/chunks/fd9d1056-86323a29a8f7e46a.js +1 -0
  130. sky/dashboard/out/_next/static/chunks/framework-cf60a09ccd051a10.js +33 -0
  131. sky/dashboard/out/_next/static/chunks/main-app-587214043926b3cc.js +1 -0
  132. sky/dashboard/out/_next/static/chunks/main-f15ccb73239a3bf1.js +1 -0
  133. sky/dashboard/out/_next/static/chunks/pages/_app-bde01e4a2beec258.js +34 -0
  134. sky/dashboard/out/_next/static/chunks/pages/_error-c66a4e8afc46f17b.js +1 -0
  135. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-c736ead69c2d86ec.js +16 -0
  136. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-a37d2063af475a1c.js +1 -0
  137. sky/dashboard/out/_next/static/chunks/pages/clusters-d44859594e6f8064.js +1 -0
  138. sky/dashboard/out/_next/static/chunks/pages/config-dfb9bf07b13045f4.js +1 -0
  139. sky/dashboard/out/_next/static/chunks/pages/index-444f1804401f04ea.js +1 -0
  140. sky/dashboard/out/_next/static/chunks/pages/infra/[context]-c0b5935149902e6f.js +1 -0
  141. sky/dashboard/out/_next/static/chunks/pages/infra-aed0ea19df7cf961.js +1 -0
  142. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-5796e8d6aea291a0.js +16 -0
  143. sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-6edeb7d06032adfc.js +21 -0
  144. sky/dashboard/out/_next/static/chunks/pages/jobs-479dde13399cf270.js +1 -0
  145. sky/dashboard/out/_next/static/chunks/pages/users-5ab3b907622cf0fe.js +1 -0
  146. sky/dashboard/out/_next/static/chunks/pages/volumes-b84b948ff357c43e.js +1 -0
  147. sky/dashboard/out/_next/static/chunks/pages/workspace/new-3f88a1c7e86a3f86.js +1 -0
  148. sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-c5a3eeee1c218af1.js +1 -0
  149. sky/dashboard/out/_next/static/chunks/pages/workspaces-22b23febb3e89ce1.js +1 -0
  150. sky/dashboard/out/_next/static/chunks/webpack-2679be77fc08a2f8.js +1 -0
  151. sky/dashboard/out/_next/static/css/0748ce22df867032.css +3 -0
  152. sky/dashboard/out/_next/static/zB0ed6ge_W1MDszVHhijS/_buildManifest.js +1 -0
  153. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  154. sky/dashboard/out/clusters/[cluster].html +1 -1
  155. sky/dashboard/out/clusters.html +1 -1
  156. sky/dashboard/out/config.html +1 -0
  157. sky/dashboard/out/index.html +1 -1
  158. sky/dashboard/out/infra/[context].html +1 -0
  159. sky/dashboard/out/infra.html +1 -0
  160. sky/dashboard/out/jobs/[job].html +1 -1
  161. sky/dashboard/out/jobs/pools/[pool].html +1 -0
  162. sky/dashboard/out/jobs.html +1 -1
  163. sky/dashboard/out/users.html +1 -0
  164. sky/dashboard/out/volumes.html +1 -0
  165. sky/dashboard/out/workspace/new.html +1 -0
  166. sky/dashboard/out/workspaces/[name].html +1 -0
  167. sky/dashboard/out/workspaces.html +1 -0
  168. sky/data/data_utils.py +137 -1
  169. sky/data/mounting_utils.py +269 -84
  170. sky/data/storage.py +1451 -1807
  171. sky/data/storage_utils.py +43 -57
  172. sky/exceptions.py +132 -2
  173. sky/execution.py +206 -63
  174. sky/global_user_state.py +2374 -586
  175. sky/jobs/__init__.py +5 -0
  176. sky/jobs/client/sdk.py +242 -65
  177. sky/jobs/client/sdk_async.py +143 -0
  178. sky/jobs/constants.py +9 -8
  179. sky/jobs/controller.py +839 -277
  180. sky/jobs/file_content_utils.py +80 -0
  181. sky/jobs/log_gc.py +201 -0
  182. sky/jobs/recovery_strategy.py +398 -152
  183. sky/jobs/scheduler.py +315 -189
  184. sky/jobs/server/core.py +829 -255
  185. sky/jobs/server/server.py +156 -115
  186. sky/jobs/server/utils.py +136 -0
  187. sky/jobs/state.py +2092 -701
  188. sky/jobs/utils.py +1242 -160
  189. sky/logs/__init__.py +21 -0
  190. sky/logs/agent.py +108 -0
  191. sky/logs/aws.py +243 -0
  192. sky/logs/gcp.py +91 -0
  193. sky/metrics/__init__.py +0 -0
  194. sky/metrics/utils.py +443 -0
  195. sky/models.py +78 -1
  196. sky/optimizer.py +164 -70
  197. sky/provision/__init__.py +90 -4
  198. sky/provision/aws/config.py +147 -26
  199. sky/provision/aws/instance.py +135 -50
  200. sky/provision/azure/instance.py +10 -5
  201. sky/provision/common.py +13 -1
  202. sky/provision/cudo/cudo_machine_type.py +1 -1
  203. sky/provision/cudo/cudo_utils.py +14 -8
  204. sky/provision/cudo/cudo_wrapper.py +72 -71
  205. sky/provision/cudo/instance.py +10 -6
  206. sky/provision/do/instance.py +10 -6
  207. sky/provision/do/utils.py +4 -3
  208. sky/provision/docker_utils.py +114 -23
  209. sky/provision/fluidstack/instance.py +13 -8
  210. sky/provision/gcp/__init__.py +1 -0
  211. sky/provision/gcp/config.py +301 -19
  212. sky/provision/gcp/constants.py +218 -0
  213. sky/provision/gcp/instance.py +36 -8
  214. sky/provision/gcp/instance_utils.py +18 -4
  215. sky/provision/gcp/volume_utils.py +247 -0
  216. sky/provision/hyperbolic/__init__.py +12 -0
  217. sky/provision/hyperbolic/config.py +10 -0
  218. sky/provision/hyperbolic/instance.py +437 -0
  219. sky/provision/hyperbolic/utils.py +373 -0
  220. sky/provision/instance_setup.py +93 -14
  221. sky/provision/kubernetes/__init__.py +5 -0
  222. sky/provision/kubernetes/config.py +9 -52
  223. sky/provision/kubernetes/constants.py +17 -0
  224. sky/provision/kubernetes/instance.py +789 -247
  225. sky/provision/kubernetes/manifests/fusermount-server-daemonset.yaml +1 -2
  226. sky/provision/kubernetes/network.py +27 -17
  227. sky/provision/kubernetes/network_utils.py +40 -43
  228. sky/provision/kubernetes/utils.py +1192 -531
  229. sky/provision/kubernetes/volume.py +282 -0
  230. sky/provision/lambda_cloud/instance.py +22 -16
  231. sky/provision/nebius/constants.py +50 -0
  232. sky/provision/nebius/instance.py +19 -6
  233. sky/provision/nebius/utils.py +196 -91
  234. sky/provision/oci/instance.py +10 -5
  235. sky/provision/paperspace/instance.py +10 -7
  236. sky/provision/paperspace/utils.py +1 -1
  237. sky/provision/primeintellect/__init__.py +10 -0
  238. sky/provision/primeintellect/config.py +11 -0
  239. sky/provision/primeintellect/instance.py +454 -0
  240. sky/provision/primeintellect/utils.py +398 -0
  241. sky/provision/provisioner.py +110 -36
  242. sky/provision/runpod/__init__.py +5 -0
  243. sky/provision/runpod/instance.py +27 -6
  244. sky/provision/runpod/utils.py +51 -18
  245. sky/provision/runpod/volume.py +180 -0
  246. sky/provision/scp/__init__.py +15 -0
  247. sky/provision/scp/config.py +93 -0
  248. sky/provision/scp/instance.py +531 -0
  249. sky/provision/seeweb/__init__.py +11 -0
  250. sky/provision/seeweb/config.py +13 -0
  251. sky/provision/seeweb/instance.py +807 -0
  252. sky/provision/shadeform/__init__.py +11 -0
  253. sky/provision/shadeform/config.py +12 -0
  254. sky/provision/shadeform/instance.py +351 -0
  255. sky/provision/shadeform/shadeform_utils.py +83 -0
  256. sky/provision/ssh/__init__.py +18 -0
  257. sky/provision/vast/instance.py +13 -8
  258. sky/provision/vast/utils.py +10 -7
  259. sky/provision/vsphere/common/vim_utils.py +1 -2
  260. sky/provision/vsphere/instance.py +15 -10
  261. sky/provision/vsphere/vsphere_utils.py +9 -19
  262. sky/py.typed +0 -0
  263. sky/resources.py +844 -118
  264. sky/schemas/__init__.py +0 -0
  265. sky/schemas/api/__init__.py +0 -0
  266. sky/schemas/api/responses.py +225 -0
  267. sky/schemas/db/README +4 -0
  268. sky/schemas/db/env.py +90 -0
  269. sky/schemas/db/global_user_state/001_initial_schema.py +124 -0
  270. sky/schemas/db/global_user_state/002_add_workspace_to_cluster_history.py +35 -0
  271. sky/schemas/db/global_user_state/003_fix_initial_revision.py +61 -0
  272. sky/schemas/db/global_user_state/004_is_managed.py +34 -0
  273. sky/schemas/db/global_user_state/005_cluster_event.py +32 -0
  274. sky/schemas/db/global_user_state/006_provision_log.py +41 -0
  275. sky/schemas/db/global_user_state/007_cluster_event_request_id.py +34 -0
  276. sky/schemas/db/global_user_state/008_skylet_ssh_tunnel_metadata.py +34 -0
  277. sky/schemas/db/global_user_state/009_last_activity_and_launched_at.py +89 -0
  278. sky/schemas/db/global_user_state/010_save_ssh_key.py +66 -0
  279. sky/schemas/db/script.py.mako +28 -0
  280. sky/schemas/db/serve_state/001_initial_schema.py +67 -0
  281. sky/schemas/db/skypilot_config/001_initial_schema.py +30 -0
  282. sky/schemas/db/spot_jobs/001_initial_schema.py +97 -0
  283. sky/schemas/db/spot_jobs/002_cluster_pool.py +42 -0
  284. sky/schemas/db/spot_jobs/003_pool_hash.py +34 -0
  285. sky/schemas/db/spot_jobs/004_job_file_contents.py +42 -0
  286. sky/schemas/db/spot_jobs/005_logs_gc.py +38 -0
  287. sky/schemas/generated/__init__.py +0 -0
  288. sky/schemas/generated/autostopv1_pb2.py +36 -0
  289. sky/schemas/generated/autostopv1_pb2.pyi +43 -0
  290. sky/schemas/generated/autostopv1_pb2_grpc.py +146 -0
  291. sky/schemas/generated/jobsv1_pb2.py +86 -0
  292. sky/schemas/generated/jobsv1_pb2.pyi +254 -0
  293. sky/schemas/generated/jobsv1_pb2_grpc.py +542 -0
  294. sky/schemas/generated/managed_jobsv1_pb2.py +74 -0
  295. sky/schemas/generated/managed_jobsv1_pb2.pyi +278 -0
  296. sky/schemas/generated/managed_jobsv1_pb2_grpc.py +278 -0
  297. sky/schemas/generated/servev1_pb2.py +58 -0
  298. sky/schemas/generated/servev1_pb2.pyi +115 -0
  299. sky/schemas/generated/servev1_pb2_grpc.py +322 -0
  300. sky/serve/autoscalers.py +357 -5
  301. sky/serve/client/impl.py +310 -0
  302. sky/serve/client/sdk.py +47 -139
  303. sky/serve/client/sdk_async.py +130 -0
  304. sky/serve/constants.py +10 -8
  305. sky/serve/controller.py +64 -19
  306. sky/serve/load_balancer.py +106 -60
  307. sky/serve/load_balancing_policies.py +115 -1
  308. sky/serve/replica_managers.py +273 -162
  309. sky/serve/serve_rpc_utils.py +179 -0
  310. sky/serve/serve_state.py +554 -251
  311. sky/serve/serve_utils.py +733 -220
  312. sky/serve/server/core.py +66 -711
  313. sky/serve/server/impl.py +1093 -0
  314. sky/serve/server/server.py +21 -18
  315. sky/serve/service.py +133 -48
  316. sky/serve/service_spec.py +135 -16
  317. sky/serve/spot_placer.py +3 -0
  318. sky/server/auth/__init__.py +0 -0
  319. sky/server/auth/authn.py +50 -0
  320. sky/server/auth/loopback.py +38 -0
  321. sky/server/auth/oauth2_proxy.py +200 -0
  322. sky/server/common.py +475 -181
  323. sky/server/config.py +81 -23
  324. sky/server/constants.py +44 -6
  325. sky/server/daemons.py +229 -0
  326. sky/server/html/token_page.html +185 -0
  327. sky/server/metrics.py +160 -0
  328. sky/server/requests/executor.py +528 -138
  329. sky/server/requests/payloads.py +351 -17
  330. sky/server/requests/preconditions.py +21 -17
  331. sky/server/requests/process.py +112 -29
  332. sky/server/requests/request_names.py +120 -0
  333. sky/server/requests/requests.py +817 -224
  334. sky/server/requests/serializers/decoders.py +82 -31
  335. sky/server/requests/serializers/encoders.py +140 -22
  336. sky/server/requests/threads.py +106 -0
  337. sky/server/rest.py +417 -0
  338. sky/server/server.py +1290 -284
  339. sky/server/state.py +20 -0
  340. sky/server/stream_utils.py +345 -57
  341. sky/server/uvicorn.py +217 -3
  342. sky/server/versions.py +270 -0
  343. sky/setup_files/MANIFEST.in +5 -0
  344. sky/setup_files/alembic.ini +156 -0
  345. sky/setup_files/dependencies.py +136 -31
  346. sky/setup_files/setup.py +44 -42
  347. sky/sky_logging.py +102 -5
  348. sky/skylet/attempt_skylet.py +1 -0
  349. sky/skylet/autostop_lib.py +129 -8
  350. sky/skylet/configs.py +27 -20
  351. sky/skylet/constants.py +171 -19
  352. sky/skylet/events.py +105 -21
  353. sky/skylet/job_lib.py +335 -104
  354. sky/skylet/log_lib.py +297 -18
  355. sky/skylet/log_lib.pyi +44 -1
  356. sky/skylet/ray_patches/__init__.py +17 -3
  357. sky/skylet/ray_patches/autoscaler.py.diff +18 -0
  358. sky/skylet/ray_patches/cli.py.diff +19 -0
  359. sky/skylet/ray_patches/command_runner.py.diff +17 -0
  360. sky/skylet/ray_patches/log_monitor.py.diff +20 -0
  361. sky/skylet/ray_patches/resource_demand_scheduler.py.diff +32 -0
  362. sky/skylet/ray_patches/updater.py.diff +18 -0
  363. sky/skylet/ray_patches/worker.py.diff +41 -0
  364. sky/skylet/services.py +564 -0
  365. sky/skylet/skylet.py +63 -4
  366. sky/skylet/subprocess_daemon.py +103 -29
  367. sky/skypilot_config.py +506 -99
  368. sky/ssh_node_pools/__init__.py +1 -0
  369. sky/ssh_node_pools/core.py +135 -0
  370. sky/ssh_node_pools/server.py +233 -0
  371. sky/task.py +621 -137
  372. sky/templates/aws-ray.yml.j2 +10 -3
  373. sky/templates/azure-ray.yml.j2 +1 -1
  374. sky/templates/do-ray.yml.j2 +1 -1
  375. sky/templates/gcp-ray.yml.j2 +57 -0
  376. sky/templates/hyperbolic-ray.yml.j2 +67 -0
  377. sky/templates/jobs-controller.yaml.j2 +27 -24
  378. sky/templates/kubernetes-loadbalancer.yml.j2 +2 -0
  379. sky/templates/kubernetes-ray.yml.j2 +607 -51
  380. sky/templates/lambda-ray.yml.j2 +1 -1
  381. sky/templates/nebius-ray.yml.j2 +33 -12
  382. sky/templates/paperspace-ray.yml.j2 +1 -1
  383. sky/templates/primeintellect-ray.yml.j2 +71 -0
  384. sky/templates/runpod-ray.yml.j2 +9 -1
  385. sky/templates/scp-ray.yml.j2 +3 -50
  386. sky/templates/seeweb-ray.yml.j2 +108 -0
  387. sky/templates/shadeform-ray.yml.j2 +72 -0
  388. sky/templates/sky-serve-controller.yaml.j2 +22 -2
  389. sky/templates/websocket_proxy.py +178 -18
  390. sky/usage/usage_lib.py +18 -11
  391. sky/users/__init__.py +0 -0
  392. sky/users/model.conf +15 -0
  393. sky/users/permission.py +387 -0
  394. sky/users/rbac.py +121 -0
  395. sky/users/server.py +720 -0
  396. sky/users/token_service.py +218 -0
  397. sky/utils/accelerator_registry.py +34 -5
  398. sky/utils/admin_policy_utils.py +84 -38
  399. sky/utils/annotations.py +16 -5
  400. sky/utils/asyncio_utils.py +78 -0
  401. sky/utils/auth_utils.py +153 -0
  402. sky/utils/benchmark_utils.py +60 -0
  403. sky/utils/cli_utils/status_utils.py +159 -86
  404. sky/utils/cluster_utils.py +31 -9
  405. sky/utils/command_runner.py +354 -68
  406. sky/utils/command_runner.pyi +93 -3
  407. sky/utils/common.py +35 -8
  408. sky/utils/common_utils.py +310 -87
  409. sky/utils/config_utils.py +87 -5
  410. sky/utils/context.py +402 -0
  411. sky/utils/context_utils.py +222 -0
  412. sky/utils/controller_utils.py +264 -89
  413. sky/utils/dag_utils.py +31 -12
  414. sky/utils/db/__init__.py +0 -0
  415. sky/utils/db/db_utils.py +470 -0
  416. sky/utils/db/migration_utils.py +133 -0
  417. sky/utils/directory_utils.py +12 -0
  418. sky/utils/env_options.py +13 -0
  419. sky/utils/git.py +567 -0
  420. sky/utils/git_clone.sh +460 -0
  421. sky/utils/infra_utils.py +195 -0
  422. sky/utils/kubernetes/cleanup-tunnel.sh +62 -0
  423. sky/utils/kubernetes/config_map_utils.py +133 -0
  424. sky/utils/kubernetes/create_cluster.sh +13 -27
  425. sky/utils/kubernetes/delete_cluster.sh +10 -7
  426. sky/utils/kubernetes/deploy_remote_cluster.py +1299 -0
  427. sky/utils/kubernetes/exec_kubeconfig_converter.py +22 -31
  428. sky/utils/kubernetes/generate_kind_config.py +6 -66
  429. sky/utils/kubernetes/generate_kubeconfig.sh +4 -1
  430. sky/utils/kubernetes/gpu_labeler.py +5 -5
  431. sky/utils/kubernetes/kubernetes_deploy_utils.py +354 -47
  432. sky/utils/kubernetes/ssh-tunnel.sh +379 -0
  433. sky/utils/kubernetes/ssh_utils.py +221 -0
  434. sky/utils/kubernetes_enums.py +8 -15
  435. sky/utils/lock_events.py +94 -0
  436. sky/utils/locks.py +368 -0
  437. sky/utils/log_utils.py +300 -6
  438. sky/utils/perf_utils.py +22 -0
  439. sky/utils/resource_checker.py +298 -0
  440. sky/utils/resources_utils.py +249 -32
  441. sky/utils/rich_utils.py +213 -37
  442. sky/utils/schemas.py +905 -147
  443. sky/utils/serialize_utils.py +16 -0
  444. sky/utils/status_lib.py +10 -0
  445. sky/utils/subprocess_utils.py +38 -15
  446. sky/utils/tempstore.py +70 -0
  447. sky/utils/timeline.py +24 -52
  448. sky/utils/ux_utils.py +84 -15
  449. sky/utils/validator.py +11 -1
  450. sky/utils/volume.py +86 -0
  451. sky/utils/yaml_utils.py +111 -0
  452. sky/volumes/__init__.py +13 -0
  453. sky/volumes/client/__init__.py +0 -0
  454. sky/volumes/client/sdk.py +149 -0
  455. sky/volumes/server/__init__.py +0 -0
  456. sky/volumes/server/core.py +258 -0
  457. sky/volumes/server/server.py +122 -0
  458. sky/volumes/volume.py +212 -0
  459. sky/workspaces/__init__.py +0 -0
  460. sky/workspaces/core.py +655 -0
  461. sky/workspaces/server.py +101 -0
  462. sky/workspaces/utils.py +56 -0
  463. skypilot_nightly-1.0.0.dev20251107.dist-info/METADATA +675 -0
  464. skypilot_nightly-1.0.0.dev20251107.dist-info/RECORD +594 -0
  465. {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/WHEEL +1 -1
  466. sky/benchmark/benchmark_state.py +0 -256
  467. sky/benchmark/benchmark_utils.py +0 -641
  468. sky/clouds/service_catalog/constants.py +0 -7
  469. sky/dashboard/out/_next/static/LksQgChY5izXjokL3LcEu/_buildManifest.js +0 -1
  470. sky/dashboard/out/_next/static/chunks/236-f49500b82ad5392d.js +0 -6
  471. sky/dashboard/out/_next/static/chunks/312-c3c8845990db8ffc.js +0 -15
  472. sky/dashboard/out/_next/static/chunks/37-0a572fe0dbb89c4d.js +0 -6
  473. sky/dashboard/out/_next/static/chunks/678-206dddca808e6d16.js +0 -59
  474. sky/dashboard/out/_next/static/chunks/845-0f8017370869e269.js +0 -1
  475. sky/dashboard/out/_next/static/chunks/979-7bf73a4c7cea0f5c.js +0 -1
  476. sky/dashboard/out/_next/static/chunks/fd9d1056-2821b0f0cabcd8bd.js +0 -1
  477. sky/dashboard/out/_next/static/chunks/framework-87d061ee6ed71b28.js +0 -33
  478. sky/dashboard/out/_next/static/chunks/main-app-241eb28595532291.js +0 -1
  479. sky/dashboard/out/_next/static/chunks/main-e0e2335212e72357.js +0 -1
  480. sky/dashboard/out/_next/static/chunks/pages/_app-e6b013bc3f77ad60.js +0 -1
  481. sky/dashboard/out/_next/static/chunks/pages/_error-1be831200e60c5c0.js +0 -1
  482. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-e15db85d0ea1fbe1.js +0 -1
  483. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-f383db7389368ea7.js +0 -1
  484. sky/dashboard/out/_next/static/chunks/pages/clusters-a93b93e10b8b074e.js +0 -1
  485. sky/dashboard/out/_next/static/chunks/pages/index-f9f039532ca8cbc4.js +0 -1
  486. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-03f279c6741fb48b.js +0 -1
  487. sky/dashboard/out/_next/static/chunks/pages/jobs-a75029b67aab6a2e.js +0 -1
  488. sky/dashboard/out/_next/static/chunks/webpack-830f59b8404e96b8.js +0 -1
  489. sky/dashboard/out/_next/static/css/c6933bbb2ce7f4dd.css +0 -3
  490. sky/jobs/dashboard/dashboard.py +0 -223
  491. sky/jobs/dashboard/static/favicon.ico +0 -0
  492. sky/jobs/dashboard/templates/index.html +0 -831
  493. sky/jobs/server/dashboard_utils.py +0 -69
  494. sky/skylet/providers/scp/__init__.py +0 -2
  495. sky/skylet/providers/scp/config.py +0 -149
  496. sky/skylet/providers/scp/node_provider.py +0 -578
  497. sky/templates/kubernetes-ssh-jump.yml.j2 +0 -94
  498. sky/utils/db_utils.py +0 -100
  499. sky/utils/kubernetes/deploy_remote_cluster.sh +0 -308
  500. sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +0 -191
  501. skypilot_nightly-1.0.0.dev20250509.dist-info/METADATA +0 -361
  502. skypilot_nightly-1.0.0.dev20250509.dist-info/RECORD +0 -396
  503. /sky/{clouds/service_catalog → catalog}/config.py +0 -0
  504. /sky/{benchmark → catalog/data_fetchers}/__init__.py +0 -0
  505. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_azure.py +0 -0
  506. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_fluidstack.py +0 -0
  507. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_ibm.py +0 -0
  508. /sky/{clouds/service_catalog/data_fetchers → client/cli}/__init__.py +0 -0
  509. /sky/dashboard/out/_next/static/{LksQgChY5izXjokL3LcEu → zB0ed6ge_W1MDszVHhijS}/_ssgManifest.js +0 -0
  510. {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/entry_points.txt +0 -0
  511. {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/licenses/LICENSE +0 -0
  512. {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/top_level.txt +0 -0
@@ -8,11 +8,18 @@ This file is imported by setup.py, so:
8
8
  import sys
9
9
  from typing import Dict, List
10
10
 
11
+ clouds_with_ray = ['ibm', 'docker', 'scp']
12
+
11
13
  install_requires = [
12
14
  'wheel<0.46.0', # https://github.com/skypilot-org/skypilot/issues/5153
15
+ 'setuptools', # TODO: match version to pyproject.toml once #5153 is fixed
16
+ 'pip',
13
17
  'cachetools',
14
18
  # NOTE: ray requires click>=7.0.
15
- 'click >= 7.0',
19
+ # click 8.2.0 has a bug in parsing the command line arguments:
20
+ # https://github.com/pallets/click/issues/2894
21
+ # TODO(aylei): remove this once the bug is fixed in click.
22
+ 'click >= 7.0, < 8.2.0',
16
23
  'colorama',
17
24
  'cryptography',
18
25
  # Jinja has a bug in older versions because of the lack of pinning
@@ -32,7 +39,8 @@ install_requires = [
32
39
  # Light weight requirement, can be replaced with "typing" once
33
40
  # we deprecate Python 3.7 (this will take a while).
34
41
  'typing_extensions',
35
- 'filelock >= 3.6.0',
42
+ # filelock 3.15.0 or higher is required for async file locking.
43
+ 'filelock >= 3.15.0',
36
44
  'packaging',
37
45
  'psutil',
38
46
  'pulp',
@@ -40,9 +48,18 @@ install_requires = [
40
48
  # (https://github.com/yaml/pyyaml/issues/601)
41
49
  # <= 3.13 may encounter https://github.com/ultralytics/yolov5/issues/414
42
50
  'pyyaml > 3.13, != 5.4.*',
51
+ 'ijson',
52
+ 'orjson',
43
53
  'requests',
54
+ # SkyPilot inherits from uvicorn.Server to customize the behavior of
55
+ # uvicorn, so we need to pin uvicorn version to avoid potential break
56
+ # changes.
57
+ # Notes for current version check:
58
+ # - uvicorn 0.33.0 is the latest version that supports Python 3.8
59
+ # - uvicorn 0.36.0 removes setup_event_loop thus breaks SkyPilot's custom
60
+ # behavior.
61
+ 'uvicorn[standard] >=0.33.0, <0.36.0',
44
62
  'fastapi',
45
- 'uvicorn[standard]',
46
63
  # Some pydantic versions are not compatible with ray. Adopted from ray's
47
64
  # setup.py:
48
65
  # https://github.com/ray-project/ray/blob/ray-2.9.3/python/setup.py#L254
@@ -53,27 +70,68 @@ install_requires = [
53
70
  'aiofiles',
54
71
  'httpx',
55
72
  'setproctitle',
73
+ 'sqlalchemy',
74
+ 'psycopg2-binary',
75
+ 'aiosqlite',
76
+ 'asyncpg',
77
+ # TODO(hailong): These three dependencies should be removed after we make
78
+ # the client-side actually not importing them.
79
+ 'casbin',
80
+ 'sqlalchemy_adapter',
81
+ # Required for API server metrics
82
+ 'prometheus_client>=0.8.0',
83
+ 'passlib',
84
+ 'bcrypt==4.0.1',
85
+ 'pyjwt',
86
+ 'gitpython',
87
+ 'types-paramiko',
88
+ 'alembic',
89
+ 'aiohttp',
90
+ 'anyio',
91
+ ]
92
+
93
+ # See requirements-dev.txt for the version of grpc and protobuf
94
+ # used to generate the code during development.
95
+
96
+ # The grpc version at runtime has to be newer than the version
97
+ # used to generate the code.
98
+ GRPC = 'grpcio>=1.63.0'
99
+ # >= 5.26.1 because the runtime version can't be older than the version
100
+ # used to generate the code.
101
+ # < 7.0.0 because code generated for a major version V will be supported by
102
+ # protobuf runtimes of version V and V+1.
103
+ # https://protobuf.dev/support/cross-version-runtime-guarantee
104
+ PROTOBUF = 'protobuf>=5.26.1, < 7.0.0'
105
+
106
+ server_dependencies = [
107
+ # TODO: Some of these dependencies are also specified in install_requires,
108
+ # so they are redundant here. We should figure out if they are only needed
109
+ # on the server (should remove from install_requires), or if they are needed
110
+ # on the client (should remove from here).
111
+ 'casbin',
112
+ 'sqlalchemy_adapter',
113
+ 'passlib',
114
+ 'pyjwt',
115
+ 'aiohttp',
116
+ 'anyio',
117
+ GRPC,
118
+ PROTOBUF,
119
+ 'aiosqlite',
120
+ 'greenlet',
56
121
  ]
57
122
 
58
123
  local_ray = [
59
124
  # Lower version of ray will cause dependency conflict for
60
125
  # click/grpcio/protobuf.
61
- # Excluded 2.6.0 as it has a bug in the cluster launcher:
126
+ # Ray 2.6.1+ resolved cluster launcher bugs
127
+ # and grpcio issues on Apple Silicon.
62
128
  # https://github.com/ray-project/ray/releases/tag/ray-2.6.1
63
- 'ray[default] >= 2.2.0, != 2.6.0',
129
+ 'ray[default] >= 2.6.1',
64
130
  ]
65
131
 
66
132
  remote = [
67
- # Adopted from ray's setup.py:
68
- # https://github.com/ray-project/ray/blob/ray-2.9.3/python/setup.py#L251-L252
69
- # SkyPilot: != 1.48.0 is required to avoid the error where ray dashboard
70
- # fails to start when ray start is called (#2054).
71
- # Tracking issue: https://github.com/ray-project/ray/issues/30984
72
- 'grpcio >= 1.32.0, != 1.48.0; python_version < \'3.10\'',
73
- 'grpcio >= 1.42.0, != 1.48.0; python_version >= \'3.10\'',
74
- # Adopted from ray's setup.py:
75
- # https://github.com/ray-project/ray/blob/ray-2.9.3/python/setup.py#L343
76
- 'protobuf >= 3.15.3, != 3.19.5',
133
+ GRPC,
134
+ PROTOBUF,
77
135
  ]
78
136
 
79
137
  # NOTE: Change the templates/jobs-controller.yaml.j2 file if any of the
@@ -90,11 +148,19 @@ aws_dependencies = [
90
148
  'colorama < 0.4.5',
91
149
  ]
92
150
 
151
+ # Kubernetes 32.0.0 has an authentication bug:
152
+ # https://github.com/kubernetes-client/python/issues/2333
153
+ kubernetes_dependencies = [
154
+ 'kubernetes>=20.0.0,!=32.0.0',
155
+ 'websockets',
156
+ 'python-dateutil',
157
+ ]
158
+
93
159
  # azure-cli cannot be installed normally by uv, so we need to work around it in
94
160
  # a few places.
95
161
  AZURE_CLI = 'azure-cli>=2.65.0'
96
162
 
97
- extras_require: Dict[str, List[str]] = {
163
+ cloud_dependencies: Dict[str, List[str]] = {
98
164
  'aws': aws_dependencies,
99
165
  # TODO(zongheng): azure-cli is huge and takes a long time to install.
100
166
  # Tracked in: https://github.com/Azure/azure-cli/issues/7387
@@ -109,11 +175,18 @@ extras_require: Dict[str, List[str]] = {
109
175
  'azure-mgmt-compute>=33.0.0',
110
176
  'azure-storage-blob>=12.23.1',
111
177
  'msgraph-sdk',
112
- ] + local_ray,
178
+ 'msrestazure',
179
+ ],
113
180
  # We need google-api-python-client>=2.69.0 to enable 'discardLocalSsd'
114
181
  # parameter for stopping instances. Reference:
115
182
  # https://github.com/googleapis/google-api-python-client/commit/f6e9d3869ed605b06f7cbf2e8cf2db25108506e6
116
- 'gcp': ['google-api-python-client>=2.69.0', 'google-cloud-storage'],
183
+ 'gcp': [
184
+ 'google-api-python-client>=2.69.0',
185
+ 'google-cloud-storage',
186
+ # see https://github.com/conda/conda/issues/13619
187
+ # see https://github.com/googleapis/google-api-python-client/issues/2554
188
+ 'pyopenssl >= 23.2.0, <24.3.0',
189
+ ],
117
190
  'ibm': [
118
191
  'ibm-cloud-sdk-core',
119
192
  'ibm-vpc',
@@ -123,17 +196,20 @@ extras_require: Dict[str, List[str]] = {
123
196
  'docker': ['docker'] + local_ray,
124
197
  'lambda': [], # No dependencies needed for lambda
125
198
  'cloudflare': aws_dependencies,
199
+ 'coreweave': aws_dependencies + kubernetes_dependencies,
126
200
  'scp': local_ray,
127
- 'oci': ['oci'] + local_ray,
128
- # Kubernetes 32.0.0 has an authentication bug: https://github.com/kubernetes-client/python/issues/2333 # pylint: disable=line-too-long
129
- 'kubernetes': ['kubernetes>=20.0.0,!=32.0.0', 'websockets'],
130
- 'remote': remote,
201
+ 'oci': ['oci'],
202
+ 'kubernetes': kubernetes_dependencies,
203
+ 'ssh': kubernetes_dependencies,
131
204
  # For the container registry auth api. Reference:
132
205
  # https://github.com/runpod/runpod-python/releases/tag/1.6.1
133
- 'runpod': ['runpod>=1.6.1'],
206
+ # RunPod needs a TOML parser to read ~/.runpod/config.toml. On Python 3.11+
207
+ # stdlib provides tomllib; on lower versions we depend on tomli explicitly.
208
+ 'runpod': ['runpod>=1.6.1', 'tomli; python_version < "3.11"'],
134
209
  'fluidstack': [], # No dependencies needed for fluidstack
135
210
  'cudo': ['cudo-compute>=0.1.10'],
136
211
  'paperspace': [], # No dependencies needed for paperspace
212
+ 'primeintellect': [], # No dependencies needed for primeintellect
137
213
  'do': ['pydo>=0.3.0', 'azure-core>=1.24.0', 'azure-common'],
138
214
  'vast': ['vastai-sdk>=0.1.12'],
139
215
  'vsphere': [
@@ -146,14 +222,43 @@ extras_require: Dict[str, List[str]] = {
146
222
  # 'vsphere-automation-sdk @ git+https://github.com/vmware/vsphere-automation-sdk-python.git@v8.0.1.0' pylint: disable=line-too-long
147
223
  ],
148
224
  'nebius': [
149
- 'nebius>=0.2.0',
150
- ] + aws_dependencies
225
+ # Nebius requires grpcio and protobuf, so we need to include
226
+ # our constraints here.
227
+ 'nebius>=0.2.47',
228
+ GRPC,
229
+ PROTOBUF,
230
+ ] + aws_dependencies,
231
+ 'hyperbolic': [], # No dependencies needed for hyperbolic
232
+ 'seeweb': ['ecsapi>=0.2.0'],
233
+ 'shadeform': [], # No dependencies needed for shadeform
151
234
  }
152
235
 
153
- # Nebius needs python3.10. If python 3.9 [all] will not install nebius
236
+ # Calculate which clouds should be included in the [all] installation.
237
+ clouds_for_all = set(cloud_dependencies)
238
+
154
239
  if sys.version_info < (3, 10):
155
- filtered_keys = [k for k in extras_require if k != 'nebius']
156
- extras_require['all'] = sum(
157
- [v for k, v in extras_require.items() if k != 'nebius'], [])
158
- else:
159
- extras_require['all'] = sum(extras_require.values(), [])
240
+ # Nebius needs python3.10. If python 3.9 [all] will not install nebius
241
+ clouds_for_all.remove('nebius')
242
+ clouds_for_all.remove('seeweb')
243
+
244
+ if sys.version_info >= (3, 12):
245
+ # The version of ray we use does not work with >= 3.12, so avoid clouds
246
+ # that require ray.
247
+ clouds_for_all -= set(clouds_with_ray)
248
+ # vast requires setuptools==51.1.1 which will not work with python >= 3.12
249
+ # TODO: Remove once https://github.com/vast-ai/vast-sdk/pull/6 is released
250
+ clouds_for_all.remove('vast')
251
+
252
+ cloud_extras = {
253
+ cloud: dependencies + server_dependencies
254
+ for cloud, dependencies in cloud_dependencies.items()
255
+ }
256
+
257
+ extras_require: Dict[str, List[str]] = {
258
+ # Include server_dependencies with each cloud.
259
+ **cloud_extras,
260
+ 'all': list(set().union(*[cloud_extras[cloud] for cloud in clouds_for_all])
261
+ ),
262
+ 'remote': remote,
263
+ 'server': server_dependencies,
264
+ }
sky/setup_files/setup.py CHANGED
@@ -148,45 +148,47 @@ if os.path.exists(readme_filepath):
148
148
  long_description = io.open(readme_filepath, 'r', encoding='utf-8').read()
149
149
  long_description = parse_readme(long_description)
150
150
 
151
- atexit.register(revert_commit_hash)
152
- replace_commit_hash()
153
-
154
- setuptools.setup(
155
- # NOTE: this affects the package.whl wheel name. When changing this (if
156
- # ever), you must grep for '.whl' and change all corresponding wheel paths
157
- # (templates/*.j2 and wheel_utils.py).
158
- name='skypilot-nightly',
159
- version=find_version(),
160
- packages=setuptools.find_packages(),
161
- author='SkyPilot Team',
162
- license='Apache 2.0',
163
- readme='README.md',
164
- description='SkyPilot: Run AI on Any Infra — Unified, Faster, Cheaper.',
165
- long_description=long_description,
166
- long_description_content_type='text/markdown',
167
- setup_requires=['wheel'],
168
- requires_python='>=3.7',
169
- install_requires=dependencies['install_requires'],
170
- extras_require=dependencies['extras_require'],
171
- entry_points={
172
- 'console_scripts': ['sky = sky.cli:cli'],
173
- },
174
- include_package_data=True,
175
- classifiers=[
176
- 'Programming Language :: Python :: 3.7',
177
- 'Programming Language :: Python :: 3.8',
178
- 'Programming Language :: Python :: 3.9',
179
- 'Programming Language :: Python :: 3.10',
180
- 'Programming Language :: Python :: 3.11',
181
- 'License :: OSI Approved :: Apache Software License',
182
- 'Operating System :: OS Independent',
183
- 'Topic :: Software Development :: Libraries :: Python Modules',
184
- 'Topic :: System :: Distributed Computing',
185
- ],
186
- project_urls={
187
- 'Homepage': 'https://github.com/skypilot-org/skypilot',
188
- 'Issues': 'https://github.com/skypilot-org/skypilot/issues',
189
- 'Discussion': 'https://github.com/skypilot-org/skypilot/discussions',
190
- 'Documentation': 'https://docs.skypilot.co/',
191
- },
192
- )
151
+ if __name__ == '__main__':
152
+ atexit.register(revert_commit_hash)
153
+ replace_commit_hash()
154
+ setuptools.setup(
155
+ # NOTE: this affects the package.whl wheel name. When changing this (if
156
+ # ever), you must grep for '.whl' and change all corresponding wheel paths
157
+ # (templates/*.j2 and wheel_utils.py).
158
+ name='skypilot-nightly',
159
+ version=find_version(),
160
+ packages=setuptools.find_packages(),
161
+ author='SkyPilot Team',
162
+ license='Apache 2.0',
163
+ readme='README.md',
164
+ description='SkyPilot: Run AI on Any Infra — Unified, Faster, Cheaper.',
165
+ long_description=long_description,
166
+ long_description_content_type='text/markdown',
167
+ setup_requires=['wheel'],
168
+ requires_python='>=3.7',
169
+ install_requires=dependencies['install_requires'],
170
+ extras_require=dependencies['extras_require'],
171
+ entry_points={
172
+ 'console_scripts': ['sky = sky.cli:cli'],
173
+ },
174
+ include_package_data=True,
175
+ classifiers=[
176
+ 'Programming Language :: Python :: 3.7',
177
+ 'Programming Language :: Python :: 3.8',
178
+ 'Programming Language :: Python :: 3.9',
179
+ 'Programming Language :: Python :: 3.10',
180
+ 'Programming Language :: Python :: 3.11',
181
+ 'Programming Language :: Python :: 3.12',
182
+ 'Programming Language :: Python :: 3.13',
183
+ 'License :: OSI Approved :: Apache Software License',
184
+ 'Operating System :: OS Independent',
185
+ 'Topic :: Software Development :: Libraries :: Python Modules',
186
+ 'Topic :: System :: Distributed Computing',
187
+ ],
188
+ project_urls={
189
+ 'Homepage': 'https://github.com/skypilot-org/skypilot',
190
+ 'Issues': 'https://github.com/skypilot-org/skypilot/issues',
191
+ 'Discussion': 'https://github.com/skypilot-org/skypilot/discussions',
192
+ 'Documentation': 'https://docs.skypilot.co/',
193
+ },
194
+ )
sky/sky_logging.py CHANGED
@@ -10,6 +10,7 @@ import threading
10
10
  import colorama
11
11
 
12
12
  from sky.skylet import constants
13
+ from sky.utils import context
13
14
  from sky.utils import env_options
14
15
  from sky.utils import rich_utils
15
16
 
@@ -18,6 +19,9 @@ _FORMAT = '%(levelname).1s %(asctime)s %(filename)s:%(lineno)d] %(message)s'
18
19
  _DATE_FORMAT = '%m-%d %H:%M:%S'
19
20
  _SENSITIVE_LOGGER = ['sky.provisioner', 'sky.optimizer']
20
21
 
22
+ _DEBUG_LOG_DIR = os.path.expanduser(
23
+ os.path.join(constants.SKY_LOGS_DIRECTORY, 'request_debug'))
24
+
21
25
  DEBUG = logging.DEBUG
22
26
  INFO = logging.INFO
23
27
  WARNING = logging.WARNING
@@ -47,6 +51,43 @@ class NewLineFormatter(logging.Formatter):
47
51
  return msg
48
52
 
49
53
 
54
+ class EnvAwareHandler(rich_utils.RichSafeStreamHandler):
55
+ """A handler that awares environment variables.
56
+
57
+ This handler dynamically reflects the log level from environment variables.
58
+ """
59
+
60
+ def __init__(self, stream=None, level=logging.NOTSET, sensitive=False):
61
+ super().__init__(stream)
62
+ self.level = level
63
+ self._sensitive = sensitive
64
+
65
+ @property
66
+ def level(self):
67
+ # Only refresh log level if we are in a context, since the log level
68
+ # has already been reloaded eagerly in multi-processing. Refresh again
69
+ # is a no-op and can be avoided.
70
+ # TODO(aylei): unify the mechanism for coroutine context and
71
+ # multi-processing.
72
+ if context.get() is not None:
73
+ if self._sensitive:
74
+ # For sensitive logger, suppress debug log despite the
75
+ # SKYPILOT_DEBUG env var if SUPPRESS_SENSITIVE_LOG is set
76
+ if env_options.Options.SUPPRESS_SENSITIVE_LOG.get():
77
+ return logging.INFO
78
+ if env_options.Options.SHOW_DEBUG_INFO.get():
79
+ return logging.DEBUG
80
+ else:
81
+ return self._level
82
+ else:
83
+ return self._level
84
+
85
+ @level.setter
86
+ def level(self, level):
87
+ # pylint: disable=protected-access
88
+ self._level = logging._checkLevel(level)
89
+
90
+
50
91
  _root_logger = logging.getLogger('sky')
51
92
  _default_handler = None
52
93
  _logging_config = threading.local()
@@ -67,8 +108,7 @@ def _setup_logger():
67
108
  _root_logger.setLevel(logging.DEBUG)
68
109
  global _default_handler
69
110
  if _default_handler is None:
70
- _default_handler = rich_utils.RichSafeStreamHandler(sys.stdout)
71
- _default_handler.flush = sys.stdout.flush # type: ignore
111
+ _default_handler = EnvAwareHandler(sys.stdout)
72
112
  if env_options.Options.SHOW_DEBUG_INFO.get():
73
113
  _default_handler.setLevel(logging.DEBUG)
74
114
  else:
@@ -87,8 +127,7 @@ def _setup_logger():
87
127
  # for certain loggers.
88
128
  for logger_name in _SENSITIVE_LOGGER:
89
129
  logger = logging.getLogger(logger_name)
90
- handler_to_logger = rich_utils.RichSafeStreamHandler(sys.stdout)
91
- handler_to_logger.flush = sys.stdout.flush # type: ignore
130
+ handler_to_logger = EnvAwareHandler(sys.stdout, sensitive=True)
92
131
  logger.addHandler(handler_to_logger)
93
132
  logger.setLevel(logging.INFO)
94
133
  if _show_logging_prefix():
@@ -133,8 +172,41 @@ def set_logging_level(logger: str, level: int):
133
172
  logger.setLevel(original_level)
134
173
 
135
174
 
175
+ @contextlib.contextmanager
176
+ def set_sky_logging_levels(level: int):
177
+ """Set the logging level for all loggers."""
178
+ # Turn off logger
179
+ previous_levels = {}
180
+ for logger_name in logging.Logger.manager.loggerDict:
181
+ if logger_name.startswith('sky'):
182
+ logger = logging.getLogger(logger_name)
183
+ previous_levels[logger_name] = logger.level
184
+ logger.setLevel(level)
185
+ if level == logging.DEBUG:
186
+ previous_show_debug_info = env_options.Options.SHOW_DEBUG_INFO.get()
187
+ os.environ[env_options.Options.SHOW_DEBUG_INFO.env_key] = '1'
188
+ try:
189
+ yield
190
+ finally:
191
+ # Restore logger
192
+ for logger_name in logging.Logger.manager.loggerDict:
193
+ if logger_name.startswith('sky'):
194
+ logger = logging.getLogger(logger_name)
195
+ try:
196
+ logger.setLevel(previous_levels[logger_name])
197
+ except KeyError:
198
+ # New loggers maybe initialized after the context manager,
199
+ # no need to restore the level.
200
+ pass
201
+ if level == logging.DEBUG and not previous_show_debug_info:
202
+ os.environ.pop(env_options.Options.SHOW_DEBUG_INFO.env_key)
203
+
204
+
136
205
  def logging_enabled(logger: logging.Logger, level: int) -> bool:
137
- return logger.level <= level
206
+ # Note(cooperc): This may return true in a lot of cases where we won't
207
+ # actually log anything, since the log level is set on the handler in
208
+ # _setup_logger.
209
+ return logger.getEffectiveLevel() <= level
138
210
 
139
211
 
140
212
  @contextlib.contextmanager
@@ -183,3 +255,28 @@ def generate_tmp_logging_file_path(file_name: str) -> str:
183
255
  log_path = os.path.expanduser(os.path.join(log_dir, file_name))
184
256
 
185
257
  return log_path
258
+
259
+
260
+ @contextlib.contextmanager
261
+ def add_debug_log_handler(request_id: str):
262
+ if os.getenv(constants.ENV_VAR_ENABLE_REQUEST_DEBUG_LOGGING) != 'true':
263
+ yield
264
+ return
265
+
266
+ os.makedirs(_DEBUG_LOG_DIR, exist_ok=True)
267
+ log_path = os.path.join(_DEBUG_LOG_DIR, f'{request_id}.log')
268
+ try:
269
+ debug_log_handler = logging.FileHandler(log_path)
270
+ debug_log_handler.setFormatter(FORMATTER)
271
+ debug_log_handler.setLevel(logging.DEBUG)
272
+ _root_logger.addHandler(debug_log_handler)
273
+ # sky.provision sets up its own logger/handler with propogate=False,
274
+ # so add it there too.
275
+ provision_logger = logging.getLogger('sky.provision')
276
+ provision_logger.addHandler(debug_log_handler)
277
+ provision_logger.setLevel(logging.DEBUG)
278
+ yield
279
+ finally:
280
+ _root_logger.removeHandler(debug_log_handler)
281
+ provision_logger.removeHandler(debug_log_handler)
282
+ debug_log_handler.close()
@@ -12,6 +12,7 @@ def restart_skylet():
12
12
  # Kills old skylet if it is running.
13
13
  # TODO(zhwu): make the killing graceful, e.g., use a signal to tell
14
14
  # skylet to exit, instead of directly killing it.
15
+
15
16
  subprocess.run(
16
17
  # We use -m to grep instead of {constants.SKY_PYTHON_CMD} -m to grep
17
18
  # because need to handle the backward compatibility of the old skylet