skypilot-nightly 1.0.0.dev20250502__py3-none-any.whl → 1.0.0.dev20251203__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (546) hide show
  1. sky/__init__.py +22 -6
  2. sky/adaptors/aws.py +81 -16
  3. sky/adaptors/common.py +25 -2
  4. sky/adaptors/coreweave.py +278 -0
  5. sky/adaptors/do.py +8 -2
  6. sky/adaptors/gcp.py +11 -0
  7. sky/adaptors/hyperbolic.py +8 -0
  8. sky/adaptors/ibm.py +5 -2
  9. sky/adaptors/kubernetes.py +149 -18
  10. sky/adaptors/nebius.py +173 -30
  11. sky/adaptors/primeintellect.py +1 -0
  12. sky/adaptors/runpod.py +68 -0
  13. sky/adaptors/seeweb.py +183 -0
  14. sky/adaptors/shadeform.py +89 -0
  15. sky/admin_policy.py +187 -4
  16. sky/authentication.py +179 -225
  17. sky/backends/__init__.py +4 -2
  18. sky/backends/backend.py +22 -9
  19. sky/backends/backend_utils.py +1323 -397
  20. sky/backends/cloud_vm_ray_backend.py +1749 -1029
  21. sky/backends/docker_utils.py +1 -1
  22. sky/backends/local_docker_backend.py +11 -6
  23. sky/backends/task_codegen.py +633 -0
  24. sky/backends/wheel_utils.py +55 -9
  25. sky/{clouds/service_catalog → catalog}/__init__.py +21 -19
  26. sky/{clouds/service_catalog → catalog}/aws_catalog.py +27 -8
  27. sky/{clouds/service_catalog → catalog}/azure_catalog.py +10 -7
  28. sky/{clouds/service_catalog → catalog}/common.py +90 -49
  29. sky/{clouds/service_catalog → catalog}/cudo_catalog.py +8 -5
  30. sky/{clouds/service_catalog → catalog}/data_fetchers/analyze.py +1 -1
  31. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_aws.py +116 -80
  32. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_cudo.py +38 -38
  33. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_gcp.py +70 -16
  34. sky/catalog/data_fetchers/fetch_hyperbolic.py +136 -0
  35. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_lambda_cloud.py +1 -0
  36. sky/catalog/data_fetchers/fetch_nebius.py +338 -0
  37. sky/catalog/data_fetchers/fetch_runpod.py +698 -0
  38. sky/catalog/data_fetchers/fetch_seeweb.py +329 -0
  39. sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
  40. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vast.py +1 -1
  41. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vsphere.py +1 -1
  42. sky/{clouds/service_catalog → catalog}/do_catalog.py +5 -2
  43. sky/{clouds/service_catalog → catalog}/fluidstack_catalog.py +6 -3
  44. sky/{clouds/service_catalog → catalog}/gcp_catalog.py +41 -15
  45. sky/catalog/hyperbolic_catalog.py +136 -0
  46. sky/{clouds/service_catalog → catalog}/ibm_catalog.py +9 -6
  47. sky/{clouds/service_catalog → catalog}/kubernetes_catalog.py +36 -24
  48. sky/{clouds/service_catalog → catalog}/lambda_catalog.py +9 -6
  49. sky/{clouds/service_catalog → catalog}/nebius_catalog.py +9 -7
  50. sky/{clouds/service_catalog → catalog}/oci_catalog.py +9 -6
  51. sky/{clouds/service_catalog → catalog}/paperspace_catalog.py +5 -2
  52. sky/catalog/primeintellect_catalog.py +95 -0
  53. sky/{clouds/service_catalog → catalog}/runpod_catalog.py +11 -4
  54. sky/{clouds/service_catalog → catalog}/scp_catalog.py +9 -6
  55. sky/catalog/seeweb_catalog.py +184 -0
  56. sky/catalog/shadeform_catalog.py +165 -0
  57. sky/catalog/ssh_catalog.py +167 -0
  58. sky/{clouds/service_catalog → catalog}/vast_catalog.py +6 -3
  59. sky/{clouds/service_catalog → catalog}/vsphere_catalog.py +5 -2
  60. sky/check.py +533 -185
  61. sky/cli.py +5 -5975
  62. sky/client/{cli.py → cli/command.py} +2591 -1956
  63. sky/client/cli/deprecation_utils.py +99 -0
  64. sky/client/cli/flags.py +359 -0
  65. sky/client/cli/table_utils.py +322 -0
  66. sky/client/cli/utils.py +79 -0
  67. sky/client/common.py +78 -32
  68. sky/client/oauth.py +82 -0
  69. sky/client/sdk.py +1219 -319
  70. sky/client/sdk_async.py +827 -0
  71. sky/client/service_account_auth.py +47 -0
  72. sky/cloud_stores.py +82 -3
  73. sky/clouds/__init__.py +13 -0
  74. sky/clouds/aws.py +564 -164
  75. sky/clouds/azure.py +105 -83
  76. sky/clouds/cloud.py +140 -40
  77. sky/clouds/cudo.py +68 -50
  78. sky/clouds/do.py +66 -48
  79. sky/clouds/fluidstack.py +63 -44
  80. sky/clouds/gcp.py +339 -110
  81. sky/clouds/hyperbolic.py +293 -0
  82. sky/clouds/ibm.py +70 -49
  83. sky/clouds/kubernetes.py +570 -162
  84. sky/clouds/lambda_cloud.py +74 -54
  85. sky/clouds/nebius.py +210 -81
  86. sky/clouds/oci.py +88 -66
  87. sky/clouds/paperspace.py +61 -44
  88. sky/clouds/primeintellect.py +317 -0
  89. sky/clouds/runpod.py +164 -74
  90. sky/clouds/scp.py +89 -86
  91. sky/clouds/seeweb.py +477 -0
  92. sky/clouds/shadeform.py +400 -0
  93. sky/clouds/ssh.py +263 -0
  94. sky/clouds/utils/aws_utils.py +10 -4
  95. sky/clouds/utils/gcp_utils.py +87 -11
  96. sky/clouds/utils/oci_utils.py +38 -14
  97. sky/clouds/utils/scp_utils.py +231 -167
  98. sky/clouds/vast.py +99 -77
  99. sky/clouds/vsphere.py +51 -40
  100. sky/core.py +375 -173
  101. sky/dag.py +15 -0
  102. sky/dashboard/out/404.html +1 -1
  103. sky/dashboard/out/_next/static/96_E2yl3QAiIJGOYCkSpB/_buildManifest.js +1 -0
  104. sky/dashboard/out/_next/static/chunks/1141-e6aa9ab418717c59.js +11 -0
  105. sky/dashboard/out/_next/static/chunks/1272-1ef0bf0237faccdb.js +1 -0
  106. sky/dashboard/out/_next/static/chunks/1871-7e202677c42f43fe.js +6 -0
  107. sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +1 -0
  108. sky/dashboard/out/_next/static/chunks/2350.fab69e61bac57b23.js +1 -0
  109. sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +15 -0
  110. sky/dashboard/out/_next/static/chunks/2755.edd818326d489a1d.js +26 -0
  111. sky/dashboard/out/_next/static/chunks/3294.20a8540fe697d5ee.js +1 -0
  112. sky/dashboard/out/_next/static/chunks/3785.7e245f318f9d1121.js +1 -0
  113. sky/dashboard/out/_next/static/chunks/3800-7b45f9fbb6308557.js +1 -0
  114. sky/dashboard/out/_next/static/chunks/3850-ff4a9a69d978632b.js +1 -0
  115. sky/dashboard/out/_next/static/chunks/3937.210053269f121201.js +1 -0
  116. sky/dashboard/out/_next/static/chunks/4725.172ede95d1b21022.js +1 -0
  117. sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +15 -0
  118. sky/dashboard/out/_next/static/chunks/5739-d67458fcb1386c92.js +8 -0
  119. sky/dashboard/out/_next/static/chunks/6130-2be46d70a38f1e82.js +1 -0
  120. sky/dashboard/out/_next/static/chunks/616-3d59f75e2ccf9321.js +39 -0
  121. sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +13 -0
  122. sky/dashboard/out/_next/static/chunks/6856-8f27d1c10c98def8.js +1 -0
  123. sky/dashboard/out/_next/static/chunks/6989-01359c57e018caa4.js +1 -0
  124. sky/dashboard/out/_next/static/chunks/6990-9146207c4567fdfd.js +1 -0
  125. sky/dashboard/out/_next/static/chunks/7359-c8d04e06886000b3.js +30 -0
  126. sky/dashboard/out/_next/static/chunks/7411-b15471acd2cba716.js +41 -0
  127. sky/dashboard/out/_next/static/chunks/7615-019513abc55b3b47.js +1 -0
  128. sky/dashboard/out/_next/static/chunks/8640.5b9475a2d18c5416.js +16 -0
  129. sky/dashboard/out/_next/static/chunks/8969-452f9d5cbdd2dc73.js +1 -0
  130. sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +6 -0
  131. sky/dashboard/out/_next/static/chunks/9353-cff34f7e773b2e2b.js +1 -0
  132. sky/dashboard/out/_next/static/chunks/9360.a536cf6b1fa42355.js +31 -0
  133. sky/dashboard/out/_next/static/chunks/9847.3aaca6bb33455140.js +30 -0
  134. sky/dashboard/out/_next/static/chunks/fd9d1056-86323a29a8f7e46a.js +1 -0
  135. sky/dashboard/out/_next/static/chunks/framework-cf60a09ccd051a10.js +33 -0
  136. sky/dashboard/out/_next/static/chunks/main-app-587214043926b3cc.js +1 -0
  137. sky/dashboard/out/_next/static/chunks/main-f15ccb73239a3bf1.js +1 -0
  138. sky/dashboard/out/_next/static/chunks/pages/_app-bde01e4a2beec258.js +34 -0
  139. sky/dashboard/out/_next/static/chunks/pages/_error-c66a4e8afc46f17b.js +1 -0
  140. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-792db96d918c98c9.js +16 -0
  141. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-abfcac9c137aa543.js +1 -0
  142. sky/dashboard/out/_next/static/chunks/pages/clusters-ee39056f9851a3ff.js +1 -0
  143. sky/dashboard/out/_next/static/chunks/pages/config-dfb9bf07b13045f4.js +1 -0
  144. sky/dashboard/out/_next/static/chunks/pages/index-444f1804401f04ea.js +1 -0
  145. sky/dashboard/out/_next/static/chunks/pages/infra/[context]-c0b5935149902e6f.js +1 -0
  146. sky/dashboard/out/_next/static/chunks/pages/infra-aed0ea19df7cf961.js +1 -0
  147. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-d66997e2bfc837cf.js +16 -0
  148. sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-9faf940b253e3e06.js +21 -0
  149. sky/dashboard/out/_next/static/chunks/pages/jobs-2072b48b617989c9.js +1 -0
  150. sky/dashboard/out/_next/static/chunks/pages/users-f42674164aa73423.js +1 -0
  151. sky/dashboard/out/_next/static/chunks/pages/volumes-b84b948ff357c43e.js +1 -0
  152. sky/dashboard/out/_next/static/chunks/pages/workspace/new-3f88a1c7e86a3f86.js +1 -0
  153. sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-84a40f8c7c627fe4.js +1 -0
  154. sky/dashboard/out/_next/static/chunks/pages/workspaces-531b2f8c4bf89f82.js +1 -0
  155. sky/dashboard/out/_next/static/chunks/webpack-64e05f17bf2cf8ce.js +1 -0
  156. sky/dashboard/out/_next/static/css/0748ce22df867032.css +3 -0
  157. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  158. sky/dashboard/out/clusters/[cluster].html +1 -1
  159. sky/dashboard/out/clusters.html +1 -1
  160. sky/dashboard/out/config.html +1 -0
  161. sky/dashboard/out/index.html +1 -1
  162. sky/dashboard/out/infra/[context].html +1 -0
  163. sky/dashboard/out/infra.html +1 -0
  164. sky/dashboard/out/jobs/[job].html +1 -1
  165. sky/dashboard/out/jobs/pools/[pool].html +1 -0
  166. sky/dashboard/out/jobs.html +1 -1
  167. sky/dashboard/out/users.html +1 -0
  168. sky/dashboard/out/volumes.html +1 -0
  169. sky/dashboard/out/workspace/new.html +1 -0
  170. sky/dashboard/out/workspaces/[name].html +1 -0
  171. sky/dashboard/out/workspaces.html +1 -0
  172. sky/data/data_utils.py +137 -1
  173. sky/data/mounting_utils.py +269 -84
  174. sky/data/storage.py +1460 -1807
  175. sky/data/storage_utils.py +43 -57
  176. sky/exceptions.py +126 -2
  177. sky/execution.py +216 -63
  178. sky/global_user_state.py +2390 -586
  179. sky/jobs/__init__.py +7 -0
  180. sky/jobs/client/sdk.py +300 -58
  181. sky/jobs/client/sdk_async.py +161 -0
  182. sky/jobs/constants.py +15 -8
  183. sky/jobs/controller.py +848 -275
  184. sky/jobs/file_content_utils.py +128 -0
  185. sky/jobs/log_gc.py +193 -0
  186. sky/jobs/recovery_strategy.py +402 -152
  187. sky/jobs/scheduler.py +314 -189
  188. sky/jobs/server/core.py +836 -255
  189. sky/jobs/server/server.py +156 -115
  190. sky/jobs/server/utils.py +136 -0
  191. sky/jobs/state.py +2109 -706
  192. sky/jobs/utils.py +1306 -215
  193. sky/logs/__init__.py +21 -0
  194. sky/logs/agent.py +108 -0
  195. sky/logs/aws.py +243 -0
  196. sky/logs/gcp.py +91 -0
  197. sky/metrics/__init__.py +0 -0
  198. sky/metrics/utils.py +453 -0
  199. sky/models.py +78 -1
  200. sky/optimizer.py +164 -70
  201. sky/provision/__init__.py +90 -4
  202. sky/provision/aws/config.py +147 -26
  203. sky/provision/aws/instance.py +136 -50
  204. sky/provision/azure/instance.py +11 -6
  205. sky/provision/common.py +13 -1
  206. sky/provision/cudo/cudo_machine_type.py +1 -1
  207. sky/provision/cudo/cudo_utils.py +14 -8
  208. sky/provision/cudo/cudo_wrapper.py +72 -71
  209. sky/provision/cudo/instance.py +10 -6
  210. sky/provision/do/instance.py +10 -6
  211. sky/provision/do/utils.py +4 -3
  212. sky/provision/docker_utils.py +140 -33
  213. sky/provision/fluidstack/instance.py +13 -8
  214. sky/provision/gcp/__init__.py +1 -0
  215. sky/provision/gcp/config.py +301 -19
  216. sky/provision/gcp/constants.py +218 -0
  217. sky/provision/gcp/instance.py +36 -8
  218. sky/provision/gcp/instance_utils.py +18 -4
  219. sky/provision/gcp/volume_utils.py +247 -0
  220. sky/provision/hyperbolic/__init__.py +12 -0
  221. sky/provision/hyperbolic/config.py +10 -0
  222. sky/provision/hyperbolic/instance.py +437 -0
  223. sky/provision/hyperbolic/utils.py +373 -0
  224. sky/provision/instance_setup.py +101 -20
  225. sky/provision/kubernetes/__init__.py +5 -0
  226. sky/provision/kubernetes/config.py +9 -52
  227. sky/provision/kubernetes/constants.py +17 -0
  228. sky/provision/kubernetes/instance.py +919 -280
  229. sky/provision/kubernetes/manifests/fusermount-server-daemonset.yaml +1 -2
  230. sky/provision/kubernetes/network.py +27 -17
  231. sky/provision/kubernetes/network_utils.py +44 -43
  232. sky/provision/kubernetes/utils.py +1221 -534
  233. sky/provision/kubernetes/volume.py +343 -0
  234. sky/provision/lambda_cloud/instance.py +22 -16
  235. sky/provision/nebius/constants.py +50 -0
  236. sky/provision/nebius/instance.py +19 -6
  237. sky/provision/nebius/utils.py +237 -137
  238. sky/provision/oci/instance.py +10 -5
  239. sky/provision/paperspace/instance.py +10 -7
  240. sky/provision/paperspace/utils.py +1 -1
  241. sky/provision/primeintellect/__init__.py +10 -0
  242. sky/provision/primeintellect/config.py +11 -0
  243. sky/provision/primeintellect/instance.py +454 -0
  244. sky/provision/primeintellect/utils.py +398 -0
  245. sky/provision/provisioner.py +117 -36
  246. sky/provision/runpod/__init__.py +5 -0
  247. sky/provision/runpod/instance.py +27 -6
  248. sky/provision/runpod/utils.py +51 -18
  249. sky/provision/runpod/volume.py +214 -0
  250. sky/provision/scp/__init__.py +15 -0
  251. sky/provision/scp/config.py +93 -0
  252. sky/provision/scp/instance.py +707 -0
  253. sky/provision/seeweb/__init__.py +11 -0
  254. sky/provision/seeweb/config.py +13 -0
  255. sky/provision/seeweb/instance.py +812 -0
  256. sky/provision/shadeform/__init__.py +11 -0
  257. sky/provision/shadeform/config.py +12 -0
  258. sky/provision/shadeform/instance.py +351 -0
  259. sky/provision/shadeform/shadeform_utils.py +83 -0
  260. sky/provision/ssh/__init__.py +18 -0
  261. sky/provision/vast/instance.py +13 -8
  262. sky/provision/vast/utils.py +10 -7
  263. sky/provision/volume.py +164 -0
  264. sky/provision/vsphere/common/ssl_helper.py +1 -1
  265. sky/provision/vsphere/common/vapiconnect.py +2 -1
  266. sky/provision/vsphere/common/vim_utils.py +4 -4
  267. sky/provision/vsphere/instance.py +15 -10
  268. sky/provision/vsphere/vsphere_utils.py +17 -20
  269. sky/py.typed +0 -0
  270. sky/resources.py +845 -119
  271. sky/schemas/__init__.py +0 -0
  272. sky/schemas/api/__init__.py +0 -0
  273. sky/schemas/api/responses.py +227 -0
  274. sky/schemas/db/README +4 -0
  275. sky/schemas/db/env.py +90 -0
  276. sky/schemas/db/global_user_state/001_initial_schema.py +124 -0
  277. sky/schemas/db/global_user_state/002_add_workspace_to_cluster_history.py +35 -0
  278. sky/schemas/db/global_user_state/003_fix_initial_revision.py +61 -0
  279. sky/schemas/db/global_user_state/004_is_managed.py +34 -0
  280. sky/schemas/db/global_user_state/005_cluster_event.py +32 -0
  281. sky/schemas/db/global_user_state/006_provision_log.py +41 -0
  282. sky/schemas/db/global_user_state/007_cluster_event_request_id.py +34 -0
  283. sky/schemas/db/global_user_state/008_skylet_ssh_tunnel_metadata.py +34 -0
  284. sky/schemas/db/global_user_state/009_last_activity_and_launched_at.py +89 -0
  285. sky/schemas/db/global_user_state/010_save_ssh_key.py +66 -0
  286. sky/schemas/db/global_user_state/011_is_ephemeral.py +34 -0
  287. sky/schemas/db/kv_cache/001_initial_schema.py +29 -0
  288. sky/schemas/db/script.py.mako +28 -0
  289. sky/schemas/db/serve_state/001_initial_schema.py +67 -0
  290. sky/schemas/db/serve_state/002_yaml_content.py +34 -0
  291. sky/schemas/db/skypilot_config/001_initial_schema.py +30 -0
  292. sky/schemas/db/spot_jobs/001_initial_schema.py +97 -0
  293. sky/schemas/db/spot_jobs/002_cluster_pool.py +42 -0
  294. sky/schemas/db/spot_jobs/003_pool_hash.py +34 -0
  295. sky/schemas/db/spot_jobs/004_job_file_contents.py +42 -0
  296. sky/schemas/db/spot_jobs/005_logs_gc.py +38 -0
  297. sky/schemas/db/spot_jobs/006_controller_pid_started_at.py +34 -0
  298. sky/schemas/db/spot_jobs/007_config_file_content.py +34 -0
  299. sky/schemas/generated/__init__.py +0 -0
  300. sky/schemas/generated/autostopv1_pb2.py +36 -0
  301. sky/schemas/generated/autostopv1_pb2.pyi +43 -0
  302. sky/schemas/generated/autostopv1_pb2_grpc.py +146 -0
  303. sky/schemas/generated/jobsv1_pb2.py +86 -0
  304. sky/schemas/generated/jobsv1_pb2.pyi +254 -0
  305. sky/schemas/generated/jobsv1_pb2_grpc.py +542 -0
  306. sky/schemas/generated/managed_jobsv1_pb2.py +76 -0
  307. sky/schemas/generated/managed_jobsv1_pb2.pyi +278 -0
  308. sky/schemas/generated/managed_jobsv1_pb2_grpc.py +278 -0
  309. sky/schemas/generated/servev1_pb2.py +58 -0
  310. sky/schemas/generated/servev1_pb2.pyi +115 -0
  311. sky/schemas/generated/servev1_pb2_grpc.py +322 -0
  312. sky/serve/autoscalers.py +357 -5
  313. sky/serve/client/impl.py +310 -0
  314. sky/serve/client/sdk.py +47 -139
  315. sky/serve/client/sdk_async.py +130 -0
  316. sky/serve/constants.py +12 -9
  317. sky/serve/controller.py +68 -17
  318. sky/serve/load_balancer.py +106 -60
  319. sky/serve/load_balancing_policies.py +116 -2
  320. sky/serve/replica_managers.py +434 -249
  321. sky/serve/serve_rpc_utils.py +179 -0
  322. sky/serve/serve_state.py +569 -257
  323. sky/serve/serve_utils.py +775 -265
  324. sky/serve/server/core.py +66 -711
  325. sky/serve/server/impl.py +1093 -0
  326. sky/serve/server/server.py +21 -18
  327. sky/serve/service.py +192 -89
  328. sky/serve/service_spec.py +144 -20
  329. sky/serve/spot_placer.py +3 -0
  330. sky/server/auth/__init__.py +0 -0
  331. sky/server/auth/authn.py +50 -0
  332. sky/server/auth/loopback.py +38 -0
  333. sky/server/auth/oauth2_proxy.py +202 -0
  334. sky/server/common.py +478 -182
  335. sky/server/config.py +85 -23
  336. sky/server/constants.py +44 -6
  337. sky/server/daemons.py +295 -0
  338. sky/server/html/token_page.html +185 -0
  339. sky/server/metrics.py +160 -0
  340. sky/server/middleware_utils.py +166 -0
  341. sky/server/requests/executor.py +558 -138
  342. sky/server/requests/payloads.py +364 -24
  343. sky/server/requests/preconditions.py +21 -17
  344. sky/server/requests/process.py +112 -29
  345. sky/server/requests/request_names.py +121 -0
  346. sky/server/requests/requests.py +822 -226
  347. sky/server/requests/serializers/decoders.py +82 -31
  348. sky/server/requests/serializers/encoders.py +140 -22
  349. sky/server/requests/threads.py +117 -0
  350. sky/server/rest.py +455 -0
  351. sky/server/server.py +1309 -285
  352. sky/server/state.py +20 -0
  353. sky/server/stream_utils.py +327 -61
  354. sky/server/uvicorn.py +217 -3
  355. sky/server/versions.py +270 -0
  356. sky/setup_files/MANIFEST.in +11 -1
  357. sky/setup_files/alembic.ini +160 -0
  358. sky/setup_files/dependencies.py +139 -31
  359. sky/setup_files/setup.py +44 -42
  360. sky/sky_logging.py +114 -7
  361. sky/skylet/attempt_skylet.py +106 -24
  362. sky/skylet/autostop_lib.py +129 -8
  363. sky/skylet/configs.py +29 -20
  364. sky/skylet/constants.py +216 -25
  365. sky/skylet/events.py +101 -21
  366. sky/skylet/job_lib.py +345 -164
  367. sky/skylet/log_lib.py +297 -18
  368. sky/skylet/log_lib.pyi +44 -1
  369. sky/skylet/providers/ibm/node_provider.py +12 -8
  370. sky/skylet/providers/ibm/vpc_provider.py +13 -12
  371. sky/skylet/ray_patches/__init__.py +17 -3
  372. sky/skylet/ray_patches/autoscaler.py.diff +18 -0
  373. sky/skylet/ray_patches/cli.py.diff +19 -0
  374. sky/skylet/ray_patches/command_runner.py.diff +17 -0
  375. sky/skylet/ray_patches/log_monitor.py.diff +20 -0
  376. sky/skylet/ray_patches/resource_demand_scheduler.py.diff +32 -0
  377. sky/skylet/ray_patches/updater.py.diff +18 -0
  378. sky/skylet/ray_patches/worker.py.diff +41 -0
  379. sky/skylet/runtime_utils.py +21 -0
  380. sky/skylet/services.py +568 -0
  381. sky/skylet/skylet.py +72 -4
  382. sky/skylet/subprocess_daemon.py +104 -29
  383. sky/skypilot_config.py +506 -99
  384. sky/ssh_node_pools/__init__.py +1 -0
  385. sky/ssh_node_pools/core.py +135 -0
  386. sky/ssh_node_pools/server.py +233 -0
  387. sky/task.py +685 -163
  388. sky/templates/aws-ray.yml.j2 +11 -3
  389. sky/templates/azure-ray.yml.j2 +2 -1
  390. sky/templates/cudo-ray.yml.j2 +1 -0
  391. sky/templates/do-ray.yml.j2 +2 -1
  392. sky/templates/fluidstack-ray.yml.j2 +1 -0
  393. sky/templates/gcp-ray.yml.j2 +62 -1
  394. sky/templates/hyperbolic-ray.yml.j2 +68 -0
  395. sky/templates/ibm-ray.yml.j2 +2 -1
  396. sky/templates/jobs-controller.yaml.j2 +27 -24
  397. sky/templates/kubernetes-loadbalancer.yml.j2 +2 -0
  398. sky/templates/kubernetes-ray.yml.j2 +611 -50
  399. sky/templates/lambda-ray.yml.j2 +2 -1
  400. sky/templates/nebius-ray.yml.j2 +34 -12
  401. sky/templates/oci-ray.yml.j2 +1 -0
  402. sky/templates/paperspace-ray.yml.j2 +2 -1
  403. sky/templates/primeintellect-ray.yml.j2 +72 -0
  404. sky/templates/runpod-ray.yml.j2 +10 -1
  405. sky/templates/scp-ray.yml.j2 +4 -50
  406. sky/templates/seeweb-ray.yml.j2 +171 -0
  407. sky/templates/shadeform-ray.yml.j2 +73 -0
  408. sky/templates/sky-serve-controller.yaml.j2 +22 -2
  409. sky/templates/vast-ray.yml.j2 +1 -0
  410. sky/templates/vsphere-ray.yml.j2 +1 -0
  411. sky/templates/websocket_proxy.py +212 -37
  412. sky/usage/usage_lib.py +31 -15
  413. sky/users/__init__.py +0 -0
  414. sky/users/model.conf +15 -0
  415. sky/users/permission.py +397 -0
  416. sky/users/rbac.py +121 -0
  417. sky/users/server.py +720 -0
  418. sky/users/token_service.py +218 -0
  419. sky/utils/accelerator_registry.py +35 -5
  420. sky/utils/admin_policy_utils.py +84 -38
  421. sky/utils/annotations.py +38 -5
  422. sky/utils/asyncio_utils.py +78 -0
  423. sky/utils/atomic.py +1 -1
  424. sky/utils/auth_utils.py +153 -0
  425. sky/utils/benchmark_utils.py +60 -0
  426. sky/utils/cli_utils/status_utils.py +159 -86
  427. sky/utils/cluster_utils.py +31 -9
  428. sky/utils/command_runner.py +354 -68
  429. sky/utils/command_runner.pyi +93 -3
  430. sky/utils/common.py +35 -8
  431. sky/utils/common_utils.py +314 -91
  432. sky/utils/config_utils.py +74 -5
  433. sky/utils/context.py +403 -0
  434. sky/utils/context_utils.py +242 -0
  435. sky/utils/controller_utils.py +383 -89
  436. sky/utils/dag_utils.py +31 -12
  437. sky/utils/db/__init__.py +0 -0
  438. sky/utils/db/db_utils.py +485 -0
  439. sky/utils/db/kv_cache.py +149 -0
  440. sky/utils/db/migration_utils.py +137 -0
  441. sky/utils/directory_utils.py +12 -0
  442. sky/utils/env_options.py +13 -0
  443. sky/utils/git.py +567 -0
  444. sky/utils/git_clone.sh +460 -0
  445. sky/utils/infra_utils.py +195 -0
  446. sky/utils/kubernetes/cleanup-tunnel.sh +62 -0
  447. sky/utils/kubernetes/config_map_utils.py +133 -0
  448. sky/utils/kubernetes/create_cluster.sh +15 -29
  449. sky/utils/kubernetes/delete_cluster.sh +10 -7
  450. sky/utils/kubernetes/deploy_ssh_node_pools.py +1177 -0
  451. sky/utils/kubernetes/exec_kubeconfig_converter.py +22 -31
  452. sky/utils/kubernetes/generate_kind_config.py +6 -66
  453. sky/utils/kubernetes/generate_kubeconfig.sh +4 -1
  454. sky/utils/kubernetes/gpu_labeler.py +18 -8
  455. sky/utils/kubernetes/k8s_gpu_labeler_job.yaml +2 -1
  456. sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml +16 -16
  457. sky/utils/kubernetes/kubernetes_deploy_utils.py +284 -114
  458. sky/utils/kubernetes/rsync_helper.sh +11 -3
  459. sky/utils/kubernetes/ssh-tunnel.sh +379 -0
  460. sky/utils/kubernetes/ssh_utils.py +221 -0
  461. sky/utils/kubernetes_enums.py +8 -15
  462. sky/utils/lock_events.py +94 -0
  463. sky/utils/locks.py +416 -0
  464. sky/utils/log_utils.py +82 -107
  465. sky/utils/perf_utils.py +22 -0
  466. sky/utils/resource_checker.py +298 -0
  467. sky/utils/resources_utils.py +249 -32
  468. sky/utils/rich_utils.py +217 -39
  469. sky/utils/schemas.py +955 -160
  470. sky/utils/serialize_utils.py +16 -0
  471. sky/utils/status_lib.py +10 -0
  472. sky/utils/subprocess_utils.py +29 -15
  473. sky/utils/tempstore.py +70 -0
  474. sky/utils/thread_utils.py +91 -0
  475. sky/utils/timeline.py +26 -53
  476. sky/utils/ux_utils.py +84 -15
  477. sky/utils/validator.py +11 -1
  478. sky/utils/volume.py +165 -0
  479. sky/utils/yaml_utils.py +111 -0
  480. sky/volumes/__init__.py +13 -0
  481. sky/volumes/client/__init__.py +0 -0
  482. sky/volumes/client/sdk.py +150 -0
  483. sky/volumes/server/__init__.py +0 -0
  484. sky/volumes/server/core.py +270 -0
  485. sky/volumes/server/server.py +124 -0
  486. sky/volumes/volume.py +215 -0
  487. sky/workspaces/__init__.py +0 -0
  488. sky/workspaces/core.py +655 -0
  489. sky/workspaces/server.py +101 -0
  490. sky/workspaces/utils.py +56 -0
  491. sky_templates/README.md +3 -0
  492. sky_templates/__init__.py +3 -0
  493. sky_templates/ray/__init__.py +0 -0
  494. sky_templates/ray/start_cluster +183 -0
  495. sky_templates/ray/stop_cluster +75 -0
  496. skypilot_nightly-1.0.0.dev20251203.dist-info/METADATA +676 -0
  497. skypilot_nightly-1.0.0.dev20251203.dist-info/RECORD +611 -0
  498. {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/WHEEL +1 -1
  499. skypilot_nightly-1.0.0.dev20251203.dist-info/top_level.txt +2 -0
  500. sky/benchmark/benchmark_state.py +0 -256
  501. sky/benchmark/benchmark_utils.py +0 -641
  502. sky/clouds/service_catalog/constants.py +0 -7
  503. sky/dashboard/out/_next/static/GWvVBSCS7FmUiVmjaL1a7/_buildManifest.js +0 -1
  504. sky/dashboard/out/_next/static/chunks/236-2db3ee3fba33dd9e.js +0 -6
  505. sky/dashboard/out/_next/static/chunks/312-c3c8845990db8ffc.js +0 -15
  506. sky/dashboard/out/_next/static/chunks/37-0a572fe0dbb89c4d.js +0 -6
  507. sky/dashboard/out/_next/static/chunks/678-206dddca808e6d16.js +0 -59
  508. sky/dashboard/out/_next/static/chunks/845-9e60713e0c441abc.js +0 -1
  509. sky/dashboard/out/_next/static/chunks/979-7bf73a4c7cea0f5c.js +0 -1
  510. sky/dashboard/out/_next/static/chunks/fd9d1056-2821b0f0cabcd8bd.js +0 -1
  511. sky/dashboard/out/_next/static/chunks/framework-87d061ee6ed71b28.js +0 -33
  512. sky/dashboard/out/_next/static/chunks/main-app-241eb28595532291.js +0 -1
  513. sky/dashboard/out/_next/static/chunks/main-e0e2335212e72357.js +0 -1
  514. sky/dashboard/out/_next/static/chunks/pages/_app-e6b013bc3f77ad60.js +0 -1
  515. sky/dashboard/out/_next/static/chunks/pages/_error-1be831200e60c5c0.js +0 -1
  516. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-6ac338bc2239cb45.js +0 -1
  517. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-f383db7389368ea7.js +0 -1
  518. sky/dashboard/out/_next/static/chunks/pages/clusters-a93b93e10b8b074e.js +0 -1
  519. sky/dashboard/out/_next/static/chunks/pages/index-f9f039532ca8cbc4.js +0 -1
  520. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-1c519e1afc523dc9.js +0 -1
  521. sky/dashboard/out/_next/static/chunks/pages/jobs-a75029b67aab6a2e.js +0 -1
  522. sky/dashboard/out/_next/static/chunks/webpack-830f59b8404e96b8.js +0 -1
  523. sky/dashboard/out/_next/static/css/c6933bbb2ce7f4dd.css +0 -3
  524. sky/jobs/dashboard/dashboard.py +0 -223
  525. sky/jobs/dashboard/static/favicon.ico +0 -0
  526. sky/jobs/dashboard/templates/index.html +0 -831
  527. sky/jobs/server/dashboard_utils.py +0 -69
  528. sky/skylet/providers/scp/__init__.py +0 -2
  529. sky/skylet/providers/scp/config.py +0 -149
  530. sky/skylet/providers/scp/node_provider.py +0 -578
  531. sky/templates/kubernetes-ssh-jump.yml.j2 +0 -94
  532. sky/utils/db_utils.py +0 -100
  533. sky/utils/kubernetes/deploy_remote_cluster.sh +0 -308
  534. sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +0 -191
  535. skypilot_nightly-1.0.0.dev20250502.dist-info/METADATA +0 -361
  536. skypilot_nightly-1.0.0.dev20250502.dist-info/RECORD +0 -396
  537. skypilot_nightly-1.0.0.dev20250502.dist-info/top_level.txt +0 -1
  538. /sky/{clouds/service_catalog → catalog}/config.py +0 -0
  539. /sky/{benchmark → catalog/data_fetchers}/__init__.py +0 -0
  540. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_azure.py +0 -0
  541. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_fluidstack.py +0 -0
  542. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_ibm.py +0 -0
  543. /sky/{clouds/service_catalog/data_fetchers → client/cli}/__init__.py +0 -0
  544. /sky/dashboard/out/_next/static/{GWvVBSCS7FmUiVmjaL1a7 → 96_E2yl3QAiIJGOYCkSpB}/_ssgManifest.js +0 -0
  545. {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/entry_points.txt +0 -0
  546. {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/licenses/LICENSE +0 -0
sky/utils/context.py ADDED
@@ -0,0 +1,403 @@
1
+ """SkyPilot context for threads and coroutines."""
2
+
3
+ import asyncio
4
+ from collections.abc import Mapping
5
+ import contextvars
6
+ import copy
7
+ import functools
8
+ import os
9
+ import pathlib
10
+ import subprocess
11
+ import sys
12
+ from typing import (Any, Callable, Coroutine, Dict, Iterator, MutableMapping,
13
+ Optional, TextIO, TYPE_CHECKING, TypeVar)
14
+
15
+ from typing_extensions import ParamSpec
16
+
17
+ if TYPE_CHECKING:
18
+ from sky.skypilot_config import ConfigContext
19
+
20
+
21
+ class SkyPilotContext(object):
22
+ """SkyPilot typed context vars for threads and coroutines.
23
+
24
+ This is a wrapper around `contextvars.ContextVar` that provides a typed
25
+ interface for the SkyPilot specific context variables that can be accessed
26
+ at any layer of the call stack. ContextVar is coroutine local, an empty
27
+ Context will be initialized for each coroutine when it is created.
28
+
29
+ Adding a new context variable for a new feature is as simple as:
30
+ 1. Add a new instance variable to the Context class.
31
+ 2. (Optional) Add new accessor methods if the variable should be protected.
32
+
33
+ To propagate the context to a new thread/coroutine, use
34
+ `contextvars.copy_context()`.
35
+
36
+ Example:
37
+ import asyncio
38
+ import contextvars
39
+ import time
40
+ from sky.utils import context
41
+
42
+ def sync_task():
43
+ while True:
44
+ if context.get().is_canceled():
45
+ break
46
+ time.sleep(1)
47
+
48
+ async def fastapi_handler():
49
+ # context.initialize() has been called in lifespan
50
+ ctx = contextvars.copy_context()
51
+ # asyncio.to_thread copies current context implicitly
52
+ task = asyncio.to_thread(sync_task)
53
+ # Or explicitly:
54
+ # loop = asyncio.get_running_loop()
55
+ # ctx = contextvars.copy_context()
56
+ # task = loop.run_in_executor(None, ctx.run, sync_task)
57
+ await asyncio.sleep(1)
58
+ context.get().cancel()
59
+ await task
60
+ """
61
+
62
+ def __init__(self):
63
+ self._canceled = asyncio.Event()
64
+ self._log_file = None
65
+ self._log_file_handle = None
66
+ self.env_overrides = {}
67
+ self.config_context = None
68
+
69
+ def cancel(self):
70
+ """Cancel the context."""
71
+ self._canceled.set()
72
+
73
+ def is_canceled(self):
74
+ """Check if the context is canceled."""
75
+ return self._canceled.is_set()
76
+
77
+ def redirect_log(
78
+ self, log_file: Optional[pathlib.Path]) -> Optional[pathlib.Path]:
79
+ """Redirect the stdout and stderr of current context to a file.
80
+
81
+ Args:
82
+ log_file: The log file to redirect to. If None, the stdout and
83
+ stderr will be restored to the original streams.
84
+
85
+ Returns:
86
+ The old log file, or None if the stdout and stderr were not
87
+ redirected.
88
+ """
89
+ original_log_file = self._log_file
90
+ original_log_handle = self._log_file_handle
91
+ if log_file is None:
92
+ self._log_file_handle = None
93
+ else:
94
+ self._log_file_handle = open(log_file, 'a', encoding='utf-8')
95
+ self._log_file = log_file
96
+ if original_log_handle is not None:
97
+ original_log_handle.close()
98
+ return original_log_file
99
+
100
+ def output_stream(self, fallback: TextIO) -> TextIO:
101
+ if self._log_file_handle is None:
102
+ return fallback
103
+ else:
104
+ return self._log_file_handle
105
+
106
+ def override_envs(self, envs: Dict[str, str]):
107
+ for k, v in envs.items():
108
+ self.env_overrides[k] = v
109
+
110
+ def cleanup(self):
111
+ """Clean up the context."""
112
+ if self._log_file_handle is not None:
113
+ self._log_file_handle.close()
114
+ self._log_file_handle = None
115
+
116
+ def __enter__(self):
117
+ return self
118
+
119
+ def __exit__(self, exc_type, exc_val, exc_tb):
120
+ del exc_type, exc_val, exc_tb
121
+ self.cleanup()
122
+
123
+ def copy(self) -> 'SkyPilotContext':
124
+ """Create a copy of the context.
125
+
126
+ Changes to the current context after this call will not affect the copy.
127
+ The new context will get its own handle/fd for the log file.
128
+ The new context will get an independent copy of the env var overrides.
129
+ The new context will get an independent copy of the config context.
130
+ Cancellation of the current context will not be propagated to the copy.
131
+ """
132
+ new_context = SkyPilotContext()
133
+ new_context.redirect_log(self._log_file)
134
+ new_context.env_overrides = self.env_overrides.copy()
135
+ new_context.config_context = copy.deepcopy(self.config_context)
136
+ return new_context
137
+
138
+
139
+ _CONTEXT = contextvars.ContextVar[Optional[SkyPilotContext]]('sky_context',
140
+ default=None)
141
+
142
+
143
+ def get() -> Optional[SkyPilotContext]:
144
+ """Get the current SkyPilot context.
145
+
146
+ If the context is not initialized, get() will return None. This helps
147
+ sync code to check whether it runs in a cancellable context and avoid
148
+ polling the cancellation event if it is not.
149
+ """
150
+ return _CONTEXT.get()
151
+
152
+
153
+ class ContextualEnviron(MutableMapping[str, str]):
154
+ """Environment variables wrapper with contextual overrides.
155
+
156
+ An instance of ContextualEnviron will typically be used to replace
157
+ os.environ to make the envron access of current process contextual
158
+ aware.
159
+
160
+ Behavior of spawning a subprocess:
161
+ - The contextual overrides will not be applied to the subprocess by
162
+ default.
163
+ - When using env=os.environ to pass the environment variables to the
164
+ subprocess explicitly. The subprocess will inherit the contextual
165
+ environment variables at the time of the spawn, that is, it will not
166
+ see the updates to the environment variables after the spawn. Also,
167
+ os.environ of the subprocess will not be a ContextualEnviron unless
168
+ the subprocess hijacks os.environ explicitly.
169
+ - Optionally, context.Popen() can be used to automatically pass
170
+ os.environ with overrides to subprocess.
171
+
172
+
173
+ Example:
174
+ 1. Parent process:
175
+ # Hijack os.environ to be a ContextualEnviron
176
+ os.environ = ContextualEnviron(os.environ)
177
+ ctx = context.get()
178
+ ctx.override_envs({'FOO': 'BAR1'})
179
+ proc = subprocess.Popen(..., env=os.environ)
180
+ # Or use context.Popen instead
181
+ # proc = context.Popen(...)
182
+ ctx.override_envs({'FOO': 'BAR2'})
183
+ 2. Subprocess:
184
+ assert os.environ['FOO'] == 'BAR1'
185
+ ctx = context.get()
186
+ # Override the contextual env var in the subprocess does not take
187
+ # effect since the os.environ is not hijacked.
188
+ ctx.override_envs({'FOO': 'BAR3'})
189
+ assert os.environ['FOO'] == 'BAR1'
190
+ """
191
+
192
+ def __init__(self, environ: 'os._Environ[str]') -> None:
193
+ self._environ = environ
194
+
195
+ def __getitem__(self, key: str) -> str:
196
+ ctx = get()
197
+ if ctx is not None:
198
+ if key in ctx.env_overrides:
199
+ value = ctx.env_overrides[key]
200
+ # None is used to indicate that the key is deleted in the
201
+ # context.
202
+ if value is None:
203
+ raise KeyError(key)
204
+ return value
205
+ return self._environ[key]
206
+
207
+ def __iter__(self) -> Iterator[str]:
208
+
209
+ def iter_from_context(ctx: SkyPilotContext) -> Iterator[str]:
210
+ deleted_keys = set()
211
+ for key, value in ctx.env_overrides.items():
212
+ if value is None:
213
+ deleted_keys.add(key)
214
+ else:
215
+ yield key
216
+ for key in self._environ:
217
+ # Deduplicate the keys
218
+ if key not in ctx.env_overrides and key not in deleted_keys:
219
+ yield key
220
+
221
+ ctx = get()
222
+ if ctx is not None:
223
+ return iter_from_context(ctx)
224
+ else:
225
+ return self._environ.__iter__()
226
+
227
+ def __len__(self) -> int:
228
+ return len(dict(self))
229
+
230
+ def __setitem__(self, key: str, value: str) -> None:
231
+ ctx = get()
232
+ if ctx is not None:
233
+ ctx.env_overrides[key] = value
234
+ else:
235
+ self._environ.__setitem__(key, value)
236
+
237
+ def __delitem__(self, key: str) -> None:
238
+ ctx = get()
239
+ if ctx is not None:
240
+ if key in self._environ:
241
+ # If the key is set in the environ of the process, we mark it as
242
+ # deleted in the context by setting the value to None.
243
+ # Note: we must do this even if it was also set in the context,
244
+ # since it could be set in both, and deleting should delete it
245
+ # from both.
246
+ ctx.env_overrides[key] = None
247
+ elif key in ctx.env_overrides:
248
+ # If the key is set in the context, but not the original
249
+ # environ, we can just delete the override.
250
+ del ctx.env_overrides[key]
251
+ else:
252
+ # The key is not set in the context nor the process.
253
+ raise KeyError(key)
254
+ else:
255
+ self._environ.__delitem__(key)
256
+
257
+ def __repr__(self) -> str:
258
+ # Adapted from os._Environ.__repr__
259
+ formatted_items = ', '.join(
260
+ f'{key!r}: {value!r}' for key, value in self.items())
261
+ return f'ctx_environ({{{formatted_items}}})'
262
+
263
+ def copy(self) -> Dict[str, str]:
264
+ copied = self._environ.copy()
265
+ ctx = get()
266
+ if ctx is not None:
267
+ for key in ctx.env_overrides:
268
+ if ctx.env_overrides[key] is None:
269
+ copied.pop(key)
270
+ else:
271
+ copied[key] = ctx.env_overrides[key]
272
+ return copied
273
+
274
+ def setdefault(self, key: str, default: str) -> str:
275
+ return self._environ.setdefault(key, default)
276
+
277
+ def __ior__(self, other):
278
+ if not isinstance(other, Mapping):
279
+ return NotImplemented
280
+ self.update(other)
281
+ return self
282
+
283
+ def __or__(self, other):
284
+ if not isinstance(other, Mapping):
285
+ return NotImplemented
286
+ new = dict(self)
287
+ new.update(other)
288
+ return new
289
+
290
+ def __ror__(self, other):
291
+ if not isinstance(other, Mapping):
292
+ return NotImplemented
293
+ new = dict(other)
294
+ new.update(self)
295
+ return new
296
+
297
+
298
+ class Popen(subprocess.Popen):
299
+
300
+ def __init__(self, *args, **kwargs):
301
+ env = kwargs.pop('env', None)
302
+ if env is None:
303
+ # Pass a copy of current context.environ to avoid race condition
304
+ # when the context is updated after the Popen is created.
305
+ env = os.environ.copy()
306
+ super().__init__(*args, env=env,
307
+ **kwargs) # type: ignore[call-overload]
308
+
309
+
310
+ P = ParamSpec('P')
311
+ T = TypeVar('T')
312
+
313
+
314
+ def contextual(func: Callable[P, T]) -> Callable[P, T]:
315
+ """Decorator to initialize a context before executing the function.
316
+
317
+ If a context is already initialized, this decorator will create a new
318
+ context that inherits the values from the existing context.
319
+ """
320
+
321
+ def run_in_context(*args: P.args, **kwargs: P.kwargs) -> T:
322
+ # Within the new contextvars Context, set up the SkyPilotContext.
323
+ original_ctx = get()
324
+ with initialize(original_ctx):
325
+ return func(*args, **kwargs)
326
+
327
+ @functools.wraps(func)
328
+ def wrapper(*args: P.args, **kwargs: P.kwargs) -> T:
329
+ # Create a copy of the current contextvars Context so that setting the
330
+ # SkyPilotContext does not affect the caller's context in async
331
+ # environments.
332
+ context = contextvars.copy_context()
333
+ return context.run(run_in_context, *args, **kwargs)
334
+
335
+ return wrapper
336
+
337
+
338
+ def contextual_async(
339
+ func: Callable[P, Coroutine[Any, Any, T]]
340
+ ) -> Callable[P, Coroutine[Any, Any, T]]:
341
+ """Decorator to initialize a context before executing the function.
342
+
343
+ If a context is already initialized, this decorator will create a new
344
+ context that inherits the values from the existing context.
345
+ """
346
+
347
+ async def run_in_context(*args: P.args, **kwargs: P.kwargs) -> T:
348
+ # Within the new contextvars Context, set up the SkyPilotContext.
349
+ original_ctx = get()
350
+ with initialize(original_ctx):
351
+ return await func(*args, **kwargs)
352
+
353
+ @functools.wraps(func)
354
+ async def wrapper(*args: P.args, **kwargs: P.kwargs) -> T:
355
+ # Create a copy of the current contextvars Context so that setting the
356
+ # SkyPilotContext does not affect the caller's context in async
357
+ # environments.
358
+ context = contextvars.copy_context()
359
+ return await context.run(run_in_context, *args, **kwargs)
360
+
361
+ return wrapper
362
+
363
+
364
+ def initialize(
365
+ base_context: Optional[SkyPilotContext] = None) -> SkyPilotContext:
366
+ """Initialize the current SkyPilot context."""
367
+ new_context = base_context.copy(
368
+ ) if base_context is not None else SkyPilotContext()
369
+ _CONTEXT.set(new_context)
370
+ return new_context
371
+
372
+
373
+ class _ContextualStream:
374
+ """A base class for streams that are contextually aware.
375
+
376
+ This class implements the TextIO interface via __getattr__ to delegate
377
+ attribute access to the original or contextual stream.
378
+ """
379
+ _original_stream: TextIO
380
+
381
+ def __init__(self, original_stream: TextIO):
382
+ self._original_stream = original_stream
383
+
384
+ def __getattr__(self, attr: str):
385
+ return getattr(self._active_stream(), attr)
386
+
387
+ def _active_stream(self) -> TextIO:
388
+ ctx = get()
389
+ if ctx is None:
390
+ return self._original_stream
391
+ return ctx.output_stream(self._original_stream)
392
+
393
+
394
+ class Stdout(_ContextualStream):
395
+
396
+ def __init__(self):
397
+ super().__init__(sys.stdout)
398
+
399
+
400
+ class Stderr(_ContextualStream):
401
+
402
+ def __init__(self):
403
+ super().__init__(sys.stderr)
@@ -0,0 +1,242 @@
1
+ """Utilities for SkyPilot context."""
2
+ import asyncio
3
+ import concurrent.futures
4
+ import contextvars
5
+ import functools
6
+ import multiprocessing
7
+ import os
8
+ import select
9
+ import subprocess
10
+ import sys
11
+ import time
12
+ import typing
13
+ from typing import Any, Callable, IO, Optional, Tuple, TypeVar
14
+
15
+ from typing_extensions import ParamSpec
16
+
17
+ from sky import sky_logging
18
+ from sky.utils import context
19
+ from sky.utils import subprocess_utils
20
+
21
+ StreamHandler = Callable[[IO[Any], IO[Any]], str]
22
+ PASSTHROUGH_FLUSH_INTERVAL_SECONDS = 0.5
23
+
24
+ logger = sky_logging.init_logger(__name__)
25
+
26
+
27
+ # TODO(aylei): call hijack_sys_attrs() proactivly in module init at server-side
28
+ # once we have context widely adopted.
29
+ def hijack_sys_attrs():
30
+ """hijack system attributes to be context aware
31
+
32
+ This function should be called at the very beginning of the processes
33
+ that might use sky.utils.context.
34
+ """
35
+ # Modify stdout and stderr of unvicorn process to be contextually aware,
36
+ # use setattr to bypass the TextIO type check.
37
+ setattr(sys, 'stdout', context.Stdout())
38
+ setattr(sys, 'stderr', context.Stderr())
39
+ # Reload logger to apply latest stdout and stderr.
40
+ sky_logging.reload_logger()
41
+ # Hijack os.environ with ContextualEnviron to make env variables
42
+ # contextually aware.
43
+ setattr(os, 'environ', context.ContextualEnviron(os.environ))
44
+ # Hijack subprocess.Popen to pass the contextual environ to subprocess
45
+ # by default.
46
+ setattr(subprocess, 'Popen', context.Popen)
47
+
48
+
49
+ def passthrough_stream_handler(in_stream: IO[Any], out_stream: IO[Any]) -> str:
50
+ """Passthrough the stream from the process to the output stream"""
51
+ last_flush_time = time.time()
52
+ has_unflushed_content = False
53
+
54
+ # Use poll() with timeout instead of readline() to avoid blocking.
55
+ # readline() blocks until a newline is available, which can take minutes
56
+ # for tasks that emit logs infrequently (e.g. jupyter lab server).
57
+ # While readline() is blocked, the timing code never executes, so buffered
58
+ # logs never get flushed. poll() with timeout allows us to periodically
59
+ # flush even when no new data is available, ensuring logs appear promptly.
60
+ fd = in_stream.fileno()
61
+ poller = select.poll()
62
+ poller.register(fd, select.POLLIN)
63
+
64
+ # Timeout in milliseconds for poll()
65
+ poll_timeout_ms = int(PASSTHROUGH_FLUSH_INTERVAL_SECONDS * 1000)
66
+
67
+ while True:
68
+ # Poll with timeout - returns when data available or timeout
69
+ events = poller.poll(poll_timeout_ms)
70
+
71
+ current_time = time.time()
72
+
73
+ if events:
74
+ # Data is available, read a chunk
75
+ chunk = os.read(fd, 4096) # Read up to 4KB
76
+ if not chunk:
77
+ break # EOF
78
+ out_stream.write(chunk.decode('utf-8', errors='replace'))
79
+ has_unflushed_content = True
80
+
81
+ # Flush only if we have unflushed content and timeout reached
82
+ if (has_unflushed_content and current_time - last_flush_time >=
83
+ PASSTHROUGH_FLUSH_INTERVAL_SECONDS):
84
+ out_stream.flush()
85
+ last_flush_time = current_time
86
+ has_unflushed_content = False
87
+
88
+ poller.unregister(fd)
89
+ # Final flush to ensure all data is written
90
+ if has_unflushed_content:
91
+ out_stream.flush()
92
+
93
+ return ''
94
+
95
+
96
+ def pipe_and_wait_process(
97
+ ctx: context.SkyPilotContext,
98
+ proc: subprocess.Popen,
99
+ poll_interval: float = 0.5,
100
+ cancel_callback: Optional[Callable[[], None]] = None,
101
+ stdout_stream_handler: Optional[StreamHandler] = None,
102
+ stderr_stream_handler: Optional[StreamHandler] = None
103
+ ) -> Tuple[str, str]:
104
+ """Wait for the process to finish or cancel it if the context is cancelled.
105
+
106
+ Args:
107
+ proc: The process to wait for.
108
+ poll_interval: The interval to poll the process.
109
+ cancel_callback: The callback to call if the context is cancelled.
110
+ stdout_stream_handler: An optional handler to handle the stdout stream,
111
+ if None, the stdout stream will be passed through.
112
+ stderr_stream_handler: An optional handler to handle the stderr stream,
113
+ if None, the stderr stream will be passed through.
114
+ """
115
+
116
+ if stdout_stream_handler is None:
117
+ stdout_stream_handler = passthrough_stream_handler
118
+ if stderr_stream_handler is None:
119
+ stderr_stream_handler = passthrough_stream_handler
120
+
121
+ # Threads are lazily created, so no harm if stderr is None
122
+ with multiprocessing.pool.ThreadPool(processes=2) as pool:
123
+ # Context will be lost in the new thread, capture current output stream
124
+ # and pass it to the new thread directly.
125
+ stdout_fut = pool.apply_async(
126
+ stdout_stream_handler, (proc.stdout, ctx.output_stream(sys.stdout)))
127
+ stderr_fut = None
128
+ if proc.stderr is not None:
129
+ stderr_fut = pool.apply_async(
130
+ stderr_stream_handler,
131
+ (proc.stderr, ctx.output_stream(sys.stderr)))
132
+ try:
133
+ wait_process(ctx,
134
+ proc,
135
+ poll_interval=poll_interval,
136
+ cancel_callback=cancel_callback)
137
+ finally:
138
+ # Wait for the stream handler threads to exit when process is done
139
+ # or cancelled
140
+ stdout_fut.wait()
141
+ if stderr_fut is not None:
142
+ stderr_fut.wait()
143
+ stdout = stdout_fut.get()
144
+ stderr = ''
145
+ if stderr_fut is not None:
146
+ stderr = stderr_fut.get()
147
+ return stdout, stderr
148
+
149
+
150
+ def wait_process(ctx: context.SkyPilotContext,
151
+ proc: subprocess.Popen,
152
+ poll_interval: float = 0.5,
153
+ cancel_callback: Optional[Callable[[], None]] = None):
154
+ """Wait for the process to finish or cancel it if the context is cancelled.
155
+
156
+ Args:
157
+ proc: The process to wait for.
158
+ poll_interval: The interval to poll the process.
159
+ cancel_callback: The callback to call if the context is cancelled.
160
+ """
161
+ while True:
162
+ if ctx.is_canceled():
163
+ if cancel_callback is not None:
164
+ cancel_callback()
165
+ # Kill the process despite the caller's callback, the utility
166
+ # function gracefully handles the case where the process is
167
+ # already terminated.
168
+ # Bash script typically does not forward SIGTERM to childs, thus
169
+ # cannot be killed gracefully, shorten the grace period for faster
170
+ # termination.
171
+ subprocess_utils.kill_process_with_grace_period(proc,
172
+ grace_period=1)
173
+ raise asyncio.CancelledError()
174
+ try:
175
+ proc.wait(poll_interval)
176
+ except subprocess.TimeoutExpired:
177
+ pass
178
+ else:
179
+ # Process exited
180
+ break
181
+
182
+
183
+ F = TypeVar('F', bound=Callable[..., Any])
184
+
185
+
186
+ def cancellation_guard(func: F) -> F:
187
+ """Decorator to make a synchronous function cancellable via context.
188
+
189
+ Guards the function execution by checking context.is_canceled() before
190
+ executing the function and raises asyncio.CancelledError if the context
191
+ is already cancelled.
192
+
193
+ This basically mimics the behavior of asyncio, which checks coroutine
194
+ cancelled in await call.
195
+
196
+ Args:
197
+ func: The function to be decorated.
198
+
199
+ Returns:
200
+ The wrapped function that checks cancellation before execution.
201
+
202
+ Raises:
203
+ asyncio.CancelledError: If the context is cancelled before execution.
204
+ """
205
+
206
+ @functools.wraps(func)
207
+ def wrapper(*args, **kwargs):
208
+ ctx = context.get()
209
+ if ctx is not None and ctx.is_canceled():
210
+ raise asyncio.CancelledError(
211
+ f'Function {func.__name__} cancelled before execution')
212
+ return func(*args, **kwargs)
213
+
214
+ return typing.cast(F, wrapper)
215
+
216
+
217
+ P = ParamSpec('P')
218
+ T = TypeVar('T')
219
+
220
+
221
+ # TODO(aylei): replace this with asyncio.to_thread once we drop support for
222
+ # python 3.8
223
+ def to_thread(func: Callable[P, T], /, *args: P.args,
224
+ **kwargs: P.kwargs) -> 'asyncio.Future[T]':
225
+ """Asynchronously run function *func* in a separate thread.
226
+
227
+ This is same as asyncio.to_thread added in python 3.9
228
+ """
229
+ return to_thread_with_executor(None, func, *args, **kwargs)
230
+
231
+
232
+ def to_thread_with_executor(executor: Optional[concurrent.futures.Executor],
233
+ func: Callable[P, T], /, *args: P.args,
234
+ **kwargs: P.kwargs) -> 'asyncio.Future[T]':
235
+ """Asynchronously run function *func* in a separate thread with
236
+ a custom executor."""
237
+
238
+ loop = asyncio.get_running_loop()
239
+ pyctx = contextvars.copy_context()
240
+ func_call: Callable[..., T] = functools.partial(pyctx.run, func, *args,
241
+ **kwargs)
242
+ return loop.run_in_executor(executor, func_call)