skypilot-nightly 1.0.0.dev20250509__py3-none-any.whl → 1.0.0.dev20251107__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of skypilot-nightly might be problematic. Click here for more details.

Files changed (512) hide show
  1. sky/__init__.py +22 -6
  2. sky/adaptors/aws.py +25 -7
  3. sky/adaptors/common.py +24 -1
  4. sky/adaptors/coreweave.py +278 -0
  5. sky/adaptors/do.py +8 -2
  6. sky/adaptors/hyperbolic.py +8 -0
  7. sky/adaptors/kubernetes.py +149 -18
  8. sky/adaptors/nebius.py +170 -17
  9. sky/adaptors/primeintellect.py +1 -0
  10. sky/adaptors/runpod.py +68 -0
  11. sky/adaptors/seeweb.py +167 -0
  12. sky/adaptors/shadeform.py +89 -0
  13. sky/admin_policy.py +187 -4
  14. sky/authentication.py +179 -225
  15. sky/backends/__init__.py +4 -2
  16. sky/backends/backend.py +22 -9
  17. sky/backends/backend_utils.py +1299 -380
  18. sky/backends/cloud_vm_ray_backend.py +1715 -518
  19. sky/backends/docker_utils.py +1 -1
  20. sky/backends/local_docker_backend.py +11 -6
  21. sky/backends/wheel_utils.py +37 -9
  22. sky/{clouds/service_catalog → catalog}/__init__.py +21 -19
  23. sky/{clouds/service_catalog → catalog}/aws_catalog.py +27 -8
  24. sky/{clouds/service_catalog → catalog}/azure_catalog.py +10 -7
  25. sky/{clouds/service_catalog → catalog}/common.py +89 -48
  26. sky/{clouds/service_catalog → catalog}/cudo_catalog.py +8 -5
  27. sky/{clouds/service_catalog → catalog}/data_fetchers/analyze.py +1 -1
  28. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_aws.py +30 -40
  29. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_cudo.py +38 -38
  30. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_gcp.py +42 -15
  31. sky/catalog/data_fetchers/fetch_hyperbolic.py +136 -0
  32. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_lambda_cloud.py +1 -0
  33. sky/catalog/data_fetchers/fetch_nebius.py +335 -0
  34. sky/catalog/data_fetchers/fetch_runpod.py +698 -0
  35. sky/catalog/data_fetchers/fetch_seeweb.py +329 -0
  36. sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
  37. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vast.py +1 -1
  38. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vsphere.py +1 -1
  39. sky/{clouds/service_catalog → catalog}/do_catalog.py +5 -2
  40. sky/{clouds/service_catalog → catalog}/fluidstack_catalog.py +6 -3
  41. sky/{clouds/service_catalog → catalog}/gcp_catalog.py +41 -15
  42. sky/catalog/hyperbolic_catalog.py +136 -0
  43. sky/{clouds/service_catalog → catalog}/ibm_catalog.py +9 -6
  44. sky/{clouds/service_catalog → catalog}/kubernetes_catalog.py +36 -24
  45. sky/{clouds/service_catalog → catalog}/lambda_catalog.py +9 -6
  46. sky/{clouds/service_catalog → catalog}/nebius_catalog.py +9 -7
  47. sky/{clouds/service_catalog → catalog}/oci_catalog.py +9 -6
  48. sky/{clouds/service_catalog → catalog}/paperspace_catalog.py +5 -2
  49. sky/catalog/primeintellect_catalog.py +95 -0
  50. sky/{clouds/service_catalog → catalog}/runpod_catalog.py +11 -4
  51. sky/{clouds/service_catalog → catalog}/scp_catalog.py +9 -6
  52. sky/catalog/seeweb_catalog.py +184 -0
  53. sky/catalog/shadeform_catalog.py +165 -0
  54. sky/catalog/ssh_catalog.py +167 -0
  55. sky/{clouds/service_catalog → catalog}/vast_catalog.py +6 -3
  56. sky/{clouds/service_catalog → catalog}/vsphere_catalog.py +5 -2
  57. sky/check.py +491 -203
  58. sky/cli.py +5 -6005
  59. sky/client/{cli.py → cli/command.py} +2477 -1885
  60. sky/client/cli/deprecation_utils.py +99 -0
  61. sky/client/cli/flags.py +359 -0
  62. sky/client/cli/table_utils.py +320 -0
  63. sky/client/common.py +70 -32
  64. sky/client/oauth.py +82 -0
  65. sky/client/sdk.py +1203 -297
  66. sky/client/sdk_async.py +833 -0
  67. sky/client/service_account_auth.py +47 -0
  68. sky/cloud_stores.py +73 -0
  69. sky/clouds/__init__.py +13 -0
  70. sky/clouds/aws.py +358 -93
  71. sky/clouds/azure.py +105 -83
  72. sky/clouds/cloud.py +127 -36
  73. sky/clouds/cudo.py +68 -50
  74. sky/clouds/do.py +66 -48
  75. sky/clouds/fluidstack.py +63 -44
  76. sky/clouds/gcp.py +339 -110
  77. sky/clouds/hyperbolic.py +293 -0
  78. sky/clouds/ibm.py +70 -49
  79. sky/clouds/kubernetes.py +563 -162
  80. sky/clouds/lambda_cloud.py +74 -54
  81. sky/clouds/nebius.py +206 -80
  82. sky/clouds/oci.py +88 -66
  83. sky/clouds/paperspace.py +61 -44
  84. sky/clouds/primeintellect.py +317 -0
  85. sky/clouds/runpod.py +164 -74
  86. sky/clouds/scp.py +89 -83
  87. sky/clouds/seeweb.py +466 -0
  88. sky/clouds/shadeform.py +400 -0
  89. sky/clouds/ssh.py +263 -0
  90. sky/clouds/utils/aws_utils.py +10 -4
  91. sky/clouds/utils/gcp_utils.py +87 -11
  92. sky/clouds/utils/oci_utils.py +38 -14
  93. sky/clouds/utils/scp_utils.py +177 -124
  94. sky/clouds/vast.py +99 -77
  95. sky/clouds/vsphere.py +51 -40
  96. sky/core.py +349 -139
  97. sky/dag.py +15 -0
  98. sky/dashboard/out/404.html +1 -1
  99. sky/dashboard/out/_next/static/chunks/1141-e6aa9ab418717c59.js +11 -0
  100. sky/dashboard/out/_next/static/chunks/1272-1ef0bf0237faccdb.js +1 -0
  101. sky/dashboard/out/_next/static/chunks/1871-74503c8e80fd253b.js +6 -0
  102. sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +1 -0
  103. sky/dashboard/out/_next/static/chunks/2350.fab69e61bac57b23.js +1 -0
  104. sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +15 -0
  105. sky/dashboard/out/_next/static/chunks/2755.fff53c4a3fcae910.js +26 -0
  106. sky/dashboard/out/_next/static/chunks/3294.72362fa129305b19.js +1 -0
  107. sky/dashboard/out/_next/static/chunks/3785.ad6adaa2a0fa9768.js +1 -0
  108. sky/dashboard/out/_next/static/chunks/3850-ff4a9a69d978632b.js +1 -0
  109. sky/dashboard/out/_next/static/chunks/3937.210053269f121201.js +1 -0
  110. sky/dashboard/out/_next/static/chunks/4725.a830b5c9e7867c92.js +1 -0
  111. sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +15 -0
  112. sky/dashboard/out/_next/static/chunks/5739-d67458fcb1386c92.js +8 -0
  113. sky/dashboard/out/_next/static/chunks/6130-2be46d70a38f1e82.js +1 -0
  114. sky/dashboard/out/_next/static/chunks/616-3d59f75e2ccf9321.js +39 -0
  115. sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +13 -0
  116. sky/dashboard/out/_next/static/chunks/6601-06114c982db410b6.js +1 -0
  117. sky/dashboard/out/_next/static/chunks/6856-ef8ba11f96d8c4a3.js +1 -0
  118. sky/dashboard/out/_next/static/chunks/6989-01359c57e018caa4.js +1 -0
  119. sky/dashboard/out/_next/static/chunks/6990-32b6e2d3822301fa.js +1 -0
  120. sky/dashboard/out/_next/static/chunks/7359-c8d04e06886000b3.js +30 -0
  121. sky/dashboard/out/_next/static/chunks/7411-b15471acd2cba716.js +41 -0
  122. sky/dashboard/out/_next/static/chunks/7615-3301e838e5f25772.js +1 -0
  123. sky/dashboard/out/_next/static/chunks/8640.5b9475a2d18c5416.js +16 -0
  124. sky/dashboard/out/_next/static/chunks/8969-1e4613c651bf4051.js +1 -0
  125. sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +6 -0
  126. sky/dashboard/out/_next/static/chunks/9353-cff34f7e773b2e2b.js +1 -0
  127. sky/dashboard/out/_next/static/chunks/9360.7310982cf5a0dc79.js +31 -0
  128. sky/dashboard/out/_next/static/chunks/9847.3aaca6bb33455140.js +30 -0
  129. sky/dashboard/out/_next/static/chunks/fd9d1056-86323a29a8f7e46a.js +1 -0
  130. sky/dashboard/out/_next/static/chunks/framework-cf60a09ccd051a10.js +33 -0
  131. sky/dashboard/out/_next/static/chunks/main-app-587214043926b3cc.js +1 -0
  132. sky/dashboard/out/_next/static/chunks/main-f15ccb73239a3bf1.js +1 -0
  133. sky/dashboard/out/_next/static/chunks/pages/_app-bde01e4a2beec258.js +34 -0
  134. sky/dashboard/out/_next/static/chunks/pages/_error-c66a4e8afc46f17b.js +1 -0
  135. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-c736ead69c2d86ec.js +16 -0
  136. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-a37d2063af475a1c.js +1 -0
  137. sky/dashboard/out/_next/static/chunks/pages/clusters-d44859594e6f8064.js +1 -0
  138. sky/dashboard/out/_next/static/chunks/pages/config-dfb9bf07b13045f4.js +1 -0
  139. sky/dashboard/out/_next/static/chunks/pages/index-444f1804401f04ea.js +1 -0
  140. sky/dashboard/out/_next/static/chunks/pages/infra/[context]-c0b5935149902e6f.js +1 -0
  141. sky/dashboard/out/_next/static/chunks/pages/infra-aed0ea19df7cf961.js +1 -0
  142. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-5796e8d6aea291a0.js +16 -0
  143. sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-6edeb7d06032adfc.js +21 -0
  144. sky/dashboard/out/_next/static/chunks/pages/jobs-479dde13399cf270.js +1 -0
  145. sky/dashboard/out/_next/static/chunks/pages/users-5ab3b907622cf0fe.js +1 -0
  146. sky/dashboard/out/_next/static/chunks/pages/volumes-b84b948ff357c43e.js +1 -0
  147. sky/dashboard/out/_next/static/chunks/pages/workspace/new-3f88a1c7e86a3f86.js +1 -0
  148. sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-c5a3eeee1c218af1.js +1 -0
  149. sky/dashboard/out/_next/static/chunks/pages/workspaces-22b23febb3e89ce1.js +1 -0
  150. sky/dashboard/out/_next/static/chunks/webpack-2679be77fc08a2f8.js +1 -0
  151. sky/dashboard/out/_next/static/css/0748ce22df867032.css +3 -0
  152. sky/dashboard/out/_next/static/zB0ed6ge_W1MDszVHhijS/_buildManifest.js +1 -0
  153. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  154. sky/dashboard/out/clusters/[cluster].html +1 -1
  155. sky/dashboard/out/clusters.html +1 -1
  156. sky/dashboard/out/config.html +1 -0
  157. sky/dashboard/out/index.html +1 -1
  158. sky/dashboard/out/infra/[context].html +1 -0
  159. sky/dashboard/out/infra.html +1 -0
  160. sky/dashboard/out/jobs/[job].html +1 -1
  161. sky/dashboard/out/jobs/pools/[pool].html +1 -0
  162. sky/dashboard/out/jobs.html +1 -1
  163. sky/dashboard/out/users.html +1 -0
  164. sky/dashboard/out/volumes.html +1 -0
  165. sky/dashboard/out/workspace/new.html +1 -0
  166. sky/dashboard/out/workspaces/[name].html +1 -0
  167. sky/dashboard/out/workspaces.html +1 -0
  168. sky/data/data_utils.py +137 -1
  169. sky/data/mounting_utils.py +269 -84
  170. sky/data/storage.py +1451 -1807
  171. sky/data/storage_utils.py +43 -57
  172. sky/exceptions.py +132 -2
  173. sky/execution.py +206 -63
  174. sky/global_user_state.py +2374 -586
  175. sky/jobs/__init__.py +5 -0
  176. sky/jobs/client/sdk.py +242 -65
  177. sky/jobs/client/sdk_async.py +143 -0
  178. sky/jobs/constants.py +9 -8
  179. sky/jobs/controller.py +839 -277
  180. sky/jobs/file_content_utils.py +80 -0
  181. sky/jobs/log_gc.py +201 -0
  182. sky/jobs/recovery_strategy.py +398 -152
  183. sky/jobs/scheduler.py +315 -189
  184. sky/jobs/server/core.py +829 -255
  185. sky/jobs/server/server.py +156 -115
  186. sky/jobs/server/utils.py +136 -0
  187. sky/jobs/state.py +2092 -701
  188. sky/jobs/utils.py +1242 -160
  189. sky/logs/__init__.py +21 -0
  190. sky/logs/agent.py +108 -0
  191. sky/logs/aws.py +243 -0
  192. sky/logs/gcp.py +91 -0
  193. sky/metrics/__init__.py +0 -0
  194. sky/metrics/utils.py +443 -0
  195. sky/models.py +78 -1
  196. sky/optimizer.py +164 -70
  197. sky/provision/__init__.py +90 -4
  198. sky/provision/aws/config.py +147 -26
  199. sky/provision/aws/instance.py +135 -50
  200. sky/provision/azure/instance.py +10 -5
  201. sky/provision/common.py +13 -1
  202. sky/provision/cudo/cudo_machine_type.py +1 -1
  203. sky/provision/cudo/cudo_utils.py +14 -8
  204. sky/provision/cudo/cudo_wrapper.py +72 -71
  205. sky/provision/cudo/instance.py +10 -6
  206. sky/provision/do/instance.py +10 -6
  207. sky/provision/do/utils.py +4 -3
  208. sky/provision/docker_utils.py +114 -23
  209. sky/provision/fluidstack/instance.py +13 -8
  210. sky/provision/gcp/__init__.py +1 -0
  211. sky/provision/gcp/config.py +301 -19
  212. sky/provision/gcp/constants.py +218 -0
  213. sky/provision/gcp/instance.py +36 -8
  214. sky/provision/gcp/instance_utils.py +18 -4
  215. sky/provision/gcp/volume_utils.py +247 -0
  216. sky/provision/hyperbolic/__init__.py +12 -0
  217. sky/provision/hyperbolic/config.py +10 -0
  218. sky/provision/hyperbolic/instance.py +437 -0
  219. sky/provision/hyperbolic/utils.py +373 -0
  220. sky/provision/instance_setup.py +93 -14
  221. sky/provision/kubernetes/__init__.py +5 -0
  222. sky/provision/kubernetes/config.py +9 -52
  223. sky/provision/kubernetes/constants.py +17 -0
  224. sky/provision/kubernetes/instance.py +789 -247
  225. sky/provision/kubernetes/manifests/fusermount-server-daemonset.yaml +1 -2
  226. sky/provision/kubernetes/network.py +27 -17
  227. sky/provision/kubernetes/network_utils.py +40 -43
  228. sky/provision/kubernetes/utils.py +1192 -531
  229. sky/provision/kubernetes/volume.py +282 -0
  230. sky/provision/lambda_cloud/instance.py +22 -16
  231. sky/provision/nebius/constants.py +50 -0
  232. sky/provision/nebius/instance.py +19 -6
  233. sky/provision/nebius/utils.py +196 -91
  234. sky/provision/oci/instance.py +10 -5
  235. sky/provision/paperspace/instance.py +10 -7
  236. sky/provision/paperspace/utils.py +1 -1
  237. sky/provision/primeintellect/__init__.py +10 -0
  238. sky/provision/primeintellect/config.py +11 -0
  239. sky/provision/primeintellect/instance.py +454 -0
  240. sky/provision/primeintellect/utils.py +398 -0
  241. sky/provision/provisioner.py +110 -36
  242. sky/provision/runpod/__init__.py +5 -0
  243. sky/provision/runpod/instance.py +27 -6
  244. sky/provision/runpod/utils.py +51 -18
  245. sky/provision/runpod/volume.py +180 -0
  246. sky/provision/scp/__init__.py +15 -0
  247. sky/provision/scp/config.py +93 -0
  248. sky/provision/scp/instance.py +531 -0
  249. sky/provision/seeweb/__init__.py +11 -0
  250. sky/provision/seeweb/config.py +13 -0
  251. sky/provision/seeweb/instance.py +807 -0
  252. sky/provision/shadeform/__init__.py +11 -0
  253. sky/provision/shadeform/config.py +12 -0
  254. sky/provision/shadeform/instance.py +351 -0
  255. sky/provision/shadeform/shadeform_utils.py +83 -0
  256. sky/provision/ssh/__init__.py +18 -0
  257. sky/provision/vast/instance.py +13 -8
  258. sky/provision/vast/utils.py +10 -7
  259. sky/provision/vsphere/common/vim_utils.py +1 -2
  260. sky/provision/vsphere/instance.py +15 -10
  261. sky/provision/vsphere/vsphere_utils.py +9 -19
  262. sky/py.typed +0 -0
  263. sky/resources.py +844 -118
  264. sky/schemas/__init__.py +0 -0
  265. sky/schemas/api/__init__.py +0 -0
  266. sky/schemas/api/responses.py +225 -0
  267. sky/schemas/db/README +4 -0
  268. sky/schemas/db/env.py +90 -0
  269. sky/schemas/db/global_user_state/001_initial_schema.py +124 -0
  270. sky/schemas/db/global_user_state/002_add_workspace_to_cluster_history.py +35 -0
  271. sky/schemas/db/global_user_state/003_fix_initial_revision.py +61 -0
  272. sky/schemas/db/global_user_state/004_is_managed.py +34 -0
  273. sky/schemas/db/global_user_state/005_cluster_event.py +32 -0
  274. sky/schemas/db/global_user_state/006_provision_log.py +41 -0
  275. sky/schemas/db/global_user_state/007_cluster_event_request_id.py +34 -0
  276. sky/schemas/db/global_user_state/008_skylet_ssh_tunnel_metadata.py +34 -0
  277. sky/schemas/db/global_user_state/009_last_activity_and_launched_at.py +89 -0
  278. sky/schemas/db/global_user_state/010_save_ssh_key.py +66 -0
  279. sky/schemas/db/script.py.mako +28 -0
  280. sky/schemas/db/serve_state/001_initial_schema.py +67 -0
  281. sky/schemas/db/skypilot_config/001_initial_schema.py +30 -0
  282. sky/schemas/db/spot_jobs/001_initial_schema.py +97 -0
  283. sky/schemas/db/spot_jobs/002_cluster_pool.py +42 -0
  284. sky/schemas/db/spot_jobs/003_pool_hash.py +34 -0
  285. sky/schemas/db/spot_jobs/004_job_file_contents.py +42 -0
  286. sky/schemas/db/spot_jobs/005_logs_gc.py +38 -0
  287. sky/schemas/generated/__init__.py +0 -0
  288. sky/schemas/generated/autostopv1_pb2.py +36 -0
  289. sky/schemas/generated/autostopv1_pb2.pyi +43 -0
  290. sky/schemas/generated/autostopv1_pb2_grpc.py +146 -0
  291. sky/schemas/generated/jobsv1_pb2.py +86 -0
  292. sky/schemas/generated/jobsv1_pb2.pyi +254 -0
  293. sky/schemas/generated/jobsv1_pb2_grpc.py +542 -0
  294. sky/schemas/generated/managed_jobsv1_pb2.py +74 -0
  295. sky/schemas/generated/managed_jobsv1_pb2.pyi +278 -0
  296. sky/schemas/generated/managed_jobsv1_pb2_grpc.py +278 -0
  297. sky/schemas/generated/servev1_pb2.py +58 -0
  298. sky/schemas/generated/servev1_pb2.pyi +115 -0
  299. sky/schemas/generated/servev1_pb2_grpc.py +322 -0
  300. sky/serve/autoscalers.py +357 -5
  301. sky/serve/client/impl.py +310 -0
  302. sky/serve/client/sdk.py +47 -139
  303. sky/serve/client/sdk_async.py +130 -0
  304. sky/serve/constants.py +10 -8
  305. sky/serve/controller.py +64 -19
  306. sky/serve/load_balancer.py +106 -60
  307. sky/serve/load_balancing_policies.py +115 -1
  308. sky/serve/replica_managers.py +273 -162
  309. sky/serve/serve_rpc_utils.py +179 -0
  310. sky/serve/serve_state.py +554 -251
  311. sky/serve/serve_utils.py +733 -220
  312. sky/serve/server/core.py +66 -711
  313. sky/serve/server/impl.py +1093 -0
  314. sky/serve/server/server.py +21 -18
  315. sky/serve/service.py +133 -48
  316. sky/serve/service_spec.py +135 -16
  317. sky/serve/spot_placer.py +3 -0
  318. sky/server/auth/__init__.py +0 -0
  319. sky/server/auth/authn.py +50 -0
  320. sky/server/auth/loopback.py +38 -0
  321. sky/server/auth/oauth2_proxy.py +200 -0
  322. sky/server/common.py +475 -181
  323. sky/server/config.py +81 -23
  324. sky/server/constants.py +44 -6
  325. sky/server/daemons.py +229 -0
  326. sky/server/html/token_page.html +185 -0
  327. sky/server/metrics.py +160 -0
  328. sky/server/requests/executor.py +528 -138
  329. sky/server/requests/payloads.py +351 -17
  330. sky/server/requests/preconditions.py +21 -17
  331. sky/server/requests/process.py +112 -29
  332. sky/server/requests/request_names.py +120 -0
  333. sky/server/requests/requests.py +817 -224
  334. sky/server/requests/serializers/decoders.py +82 -31
  335. sky/server/requests/serializers/encoders.py +140 -22
  336. sky/server/requests/threads.py +106 -0
  337. sky/server/rest.py +417 -0
  338. sky/server/server.py +1290 -284
  339. sky/server/state.py +20 -0
  340. sky/server/stream_utils.py +345 -57
  341. sky/server/uvicorn.py +217 -3
  342. sky/server/versions.py +270 -0
  343. sky/setup_files/MANIFEST.in +5 -0
  344. sky/setup_files/alembic.ini +156 -0
  345. sky/setup_files/dependencies.py +136 -31
  346. sky/setup_files/setup.py +44 -42
  347. sky/sky_logging.py +102 -5
  348. sky/skylet/attempt_skylet.py +1 -0
  349. sky/skylet/autostop_lib.py +129 -8
  350. sky/skylet/configs.py +27 -20
  351. sky/skylet/constants.py +171 -19
  352. sky/skylet/events.py +105 -21
  353. sky/skylet/job_lib.py +335 -104
  354. sky/skylet/log_lib.py +297 -18
  355. sky/skylet/log_lib.pyi +44 -1
  356. sky/skylet/ray_patches/__init__.py +17 -3
  357. sky/skylet/ray_patches/autoscaler.py.diff +18 -0
  358. sky/skylet/ray_patches/cli.py.diff +19 -0
  359. sky/skylet/ray_patches/command_runner.py.diff +17 -0
  360. sky/skylet/ray_patches/log_monitor.py.diff +20 -0
  361. sky/skylet/ray_patches/resource_demand_scheduler.py.diff +32 -0
  362. sky/skylet/ray_patches/updater.py.diff +18 -0
  363. sky/skylet/ray_patches/worker.py.diff +41 -0
  364. sky/skylet/services.py +564 -0
  365. sky/skylet/skylet.py +63 -4
  366. sky/skylet/subprocess_daemon.py +103 -29
  367. sky/skypilot_config.py +506 -99
  368. sky/ssh_node_pools/__init__.py +1 -0
  369. sky/ssh_node_pools/core.py +135 -0
  370. sky/ssh_node_pools/server.py +233 -0
  371. sky/task.py +621 -137
  372. sky/templates/aws-ray.yml.j2 +10 -3
  373. sky/templates/azure-ray.yml.j2 +1 -1
  374. sky/templates/do-ray.yml.j2 +1 -1
  375. sky/templates/gcp-ray.yml.j2 +57 -0
  376. sky/templates/hyperbolic-ray.yml.j2 +67 -0
  377. sky/templates/jobs-controller.yaml.j2 +27 -24
  378. sky/templates/kubernetes-loadbalancer.yml.j2 +2 -0
  379. sky/templates/kubernetes-ray.yml.j2 +607 -51
  380. sky/templates/lambda-ray.yml.j2 +1 -1
  381. sky/templates/nebius-ray.yml.j2 +33 -12
  382. sky/templates/paperspace-ray.yml.j2 +1 -1
  383. sky/templates/primeintellect-ray.yml.j2 +71 -0
  384. sky/templates/runpod-ray.yml.j2 +9 -1
  385. sky/templates/scp-ray.yml.j2 +3 -50
  386. sky/templates/seeweb-ray.yml.j2 +108 -0
  387. sky/templates/shadeform-ray.yml.j2 +72 -0
  388. sky/templates/sky-serve-controller.yaml.j2 +22 -2
  389. sky/templates/websocket_proxy.py +178 -18
  390. sky/usage/usage_lib.py +18 -11
  391. sky/users/__init__.py +0 -0
  392. sky/users/model.conf +15 -0
  393. sky/users/permission.py +387 -0
  394. sky/users/rbac.py +121 -0
  395. sky/users/server.py +720 -0
  396. sky/users/token_service.py +218 -0
  397. sky/utils/accelerator_registry.py +34 -5
  398. sky/utils/admin_policy_utils.py +84 -38
  399. sky/utils/annotations.py +16 -5
  400. sky/utils/asyncio_utils.py +78 -0
  401. sky/utils/auth_utils.py +153 -0
  402. sky/utils/benchmark_utils.py +60 -0
  403. sky/utils/cli_utils/status_utils.py +159 -86
  404. sky/utils/cluster_utils.py +31 -9
  405. sky/utils/command_runner.py +354 -68
  406. sky/utils/command_runner.pyi +93 -3
  407. sky/utils/common.py +35 -8
  408. sky/utils/common_utils.py +310 -87
  409. sky/utils/config_utils.py +87 -5
  410. sky/utils/context.py +402 -0
  411. sky/utils/context_utils.py +222 -0
  412. sky/utils/controller_utils.py +264 -89
  413. sky/utils/dag_utils.py +31 -12
  414. sky/utils/db/__init__.py +0 -0
  415. sky/utils/db/db_utils.py +470 -0
  416. sky/utils/db/migration_utils.py +133 -0
  417. sky/utils/directory_utils.py +12 -0
  418. sky/utils/env_options.py +13 -0
  419. sky/utils/git.py +567 -0
  420. sky/utils/git_clone.sh +460 -0
  421. sky/utils/infra_utils.py +195 -0
  422. sky/utils/kubernetes/cleanup-tunnel.sh +62 -0
  423. sky/utils/kubernetes/config_map_utils.py +133 -0
  424. sky/utils/kubernetes/create_cluster.sh +13 -27
  425. sky/utils/kubernetes/delete_cluster.sh +10 -7
  426. sky/utils/kubernetes/deploy_remote_cluster.py +1299 -0
  427. sky/utils/kubernetes/exec_kubeconfig_converter.py +22 -31
  428. sky/utils/kubernetes/generate_kind_config.py +6 -66
  429. sky/utils/kubernetes/generate_kubeconfig.sh +4 -1
  430. sky/utils/kubernetes/gpu_labeler.py +5 -5
  431. sky/utils/kubernetes/kubernetes_deploy_utils.py +354 -47
  432. sky/utils/kubernetes/ssh-tunnel.sh +379 -0
  433. sky/utils/kubernetes/ssh_utils.py +221 -0
  434. sky/utils/kubernetes_enums.py +8 -15
  435. sky/utils/lock_events.py +94 -0
  436. sky/utils/locks.py +368 -0
  437. sky/utils/log_utils.py +300 -6
  438. sky/utils/perf_utils.py +22 -0
  439. sky/utils/resource_checker.py +298 -0
  440. sky/utils/resources_utils.py +249 -32
  441. sky/utils/rich_utils.py +213 -37
  442. sky/utils/schemas.py +905 -147
  443. sky/utils/serialize_utils.py +16 -0
  444. sky/utils/status_lib.py +10 -0
  445. sky/utils/subprocess_utils.py +38 -15
  446. sky/utils/tempstore.py +70 -0
  447. sky/utils/timeline.py +24 -52
  448. sky/utils/ux_utils.py +84 -15
  449. sky/utils/validator.py +11 -1
  450. sky/utils/volume.py +86 -0
  451. sky/utils/yaml_utils.py +111 -0
  452. sky/volumes/__init__.py +13 -0
  453. sky/volumes/client/__init__.py +0 -0
  454. sky/volumes/client/sdk.py +149 -0
  455. sky/volumes/server/__init__.py +0 -0
  456. sky/volumes/server/core.py +258 -0
  457. sky/volumes/server/server.py +122 -0
  458. sky/volumes/volume.py +212 -0
  459. sky/workspaces/__init__.py +0 -0
  460. sky/workspaces/core.py +655 -0
  461. sky/workspaces/server.py +101 -0
  462. sky/workspaces/utils.py +56 -0
  463. skypilot_nightly-1.0.0.dev20251107.dist-info/METADATA +675 -0
  464. skypilot_nightly-1.0.0.dev20251107.dist-info/RECORD +594 -0
  465. {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/WHEEL +1 -1
  466. sky/benchmark/benchmark_state.py +0 -256
  467. sky/benchmark/benchmark_utils.py +0 -641
  468. sky/clouds/service_catalog/constants.py +0 -7
  469. sky/dashboard/out/_next/static/LksQgChY5izXjokL3LcEu/_buildManifest.js +0 -1
  470. sky/dashboard/out/_next/static/chunks/236-f49500b82ad5392d.js +0 -6
  471. sky/dashboard/out/_next/static/chunks/312-c3c8845990db8ffc.js +0 -15
  472. sky/dashboard/out/_next/static/chunks/37-0a572fe0dbb89c4d.js +0 -6
  473. sky/dashboard/out/_next/static/chunks/678-206dddca808e6d16.js +0 -59
  474. sky/dashboard/out/_next/static/chunks/845-0f8017370869e269.js +0 -1
  475. sky/dashboard/out/_next/static/chunks/979-7bf73a4c7cea0f5c.js +0 -1
  476. sky/dashboard/out/_next/static/chunks/fd9d1056-2821b0f0cabcd8bd.js +0 -1
  477. sky/dashboard/out/_next/static/chunks/framework-87d061ee6ed71b28.js +0 -33
  478. sky/dashboard/out/_next/static/chunks/main-app-241eb28595532291.js +0 -1
  479. sky/dashboard/out/_next/static/chunks/main-e0e2335212e72357.js +0 -1
  480. sky/dashboard/out/_next/static/chunks/pages/_app-e6b013bc3f77ad60.js +0 -1
  481. sky/dashboard/out/_next/static/chunks/pages/_error-1be831200e60c5c0.js +0 -1
  482. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-e15db85d0ea1fbe1.js +0 -1
  483. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-f383db7389368ea7.js +0 -1
  484. sky/dashboard/out/_next/static/chunks/pages/clusters-a93b93e10b8b074e.js +0 -1
  485. sky/dashboard/out/_next/static/chunks/pages/index-f9f039532ca8cbc4.js +0 -1
  486. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-03f279c6741fb48b.js +0 -1
  487. sky/dashboard/out/_next/static/chunks/pages/jobs-a75029b67aab6a2e.js +0 -1
  488. sky/dashboard/out/_next/static/chunks/webpack-830f59b8404e96b8.js +0 -1
  489. sky/dashboard/out/_next/static/css/c6933bbb2ce7f4dd.css +0 -3
  490. sky/jobs/dashboard/dashboard.py +0 -223
  491. sky/jobs/dashboard/static/favicon.ico +0 -0
  492. sky/jobs/dashboard/templates/index.html +0 -831
  493. sky/jobs/server/dashboard_utils.py +0 -69
  494. sky/skylet/providers/scp/__init__.py +0 -2
  495. sky/skylet/providers/scp/config.py +0 -149
  496. sky/skylet/providers/scp/node_provider.py +0 -578
  497. sky/templates/kubernetes-ssh-jump.yml.j2 +0 -94
  498. sky/utils/db_utils.py +0 -100
  499. sky/utils/kubernetes/deploy_remote_cluster.sh +0 -308
  500. sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +0 -191
  501. skypilot_nightly-1.0.0.dev20250509.dist-info/METADATA +0 -361
  502. skypilot_nightly-1.0.0.dev20250509.dist-info/RECORD +0 -396
  503. /sky/{clouds/service_catalog → catalog}/config.py +0 -0
  504. /sky/{benchmark → catalog/data_fetchers}/__init__.py +0 -0
  505. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_azure.py +0 -0
  506. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_fluidstack.py +0 -0
  507. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_ibm.py +0 -0
  508. /sky/{clouds/service_catalog/data_fetchers → client/cli}/__init__.py +0 -0
  509. /sky/dashboard/out/_next/static/{LksQgChY5izXjokL3LcEu → zB0ed6ge_W1MDszVHhijS}/_ssgManifest.js +0 -0
  510. {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/entry_points.txt +0 -0
  511. {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/licenses/LICENSE +0 -0
  512. {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/top_level.txt +0 -0
sky/utils/context.py ADDED
@@ -0,0 +1,402 @@
1
+ """SkyPilot context for threads and coroutines."""
2
+
3
+ import asyncio
4
+ from collections.abc import Mapping
5
+ import contextvars
6
+ import copy
7
+ import functools
8
+ import os
9
+ import pathlib
10
+ import subprocess
11
+ import sys
12
+ from typing import (Any, Callable, Coroutine, Dict, Iterator, MutableMapping,
13
+ Optional, TextIO, TYPE_CHECKING, TypeVar)
14
+
15
+ from typing_extensions import ParamSpec
16
+
17
+ if TYPE_CHECKING:
18
+ from sky.skypilot_config import ConfigContext
19
+
20
+
21
+ class SkyPilotContext(object):
22
+ """SkyPilot typed context vars for threads and coroutines.
23
+
24
+ This is a wrapper around `contextvars.ContextVar` that provides a typed
25
+ interface for the SkyPilot specific context variables that can be accessed
26
+ at any layer of the call stack. ContextVar is coroutine local, an empty
27
+ Context will be initialized for each coroutine when it is created.
28
+
29
+ Adding a new context variable for a new feature is as simple as:
30
+ 1. Add a new instance variable to the Context class.
31
+ 2. (Optional) Add new accessor methods if the variable should be protected.
32
+
33
+ To propagate the context to a new thread/coroutine, use
34
+ `contextvars.copy_context()`.
35
+
36
+ Example:
37
+ import asyncio
38
+ import contextvars
39
+ import time
40
+ from sky.utils import context
41
+
42
+ def sync_task():
43
+ while True:
44
+ if context.get().is_canceled():
45
+ break
46
+ time.sleep(1)
47
+
48
+ async def fastapi_handler():
49
+ # context.initialize() has been called in lifespan
50
+ ctx = contextvars.copy_context()
51
+ # asyncio.to_thread copies current context implicitly
52
+ task = asyncio.to_thread(sync_task)
53
+ # Or explicitly:
54
+ # loop = asyncio.get_running_loop()
55
+ # ctx = contextvars.copy_context()
56
+ # task = loop.run_in_executor(None, ctx.run, sync_task)
57
+ await asyncio.sleep(1)
58
+ context.get().cancel()
59
+ await task
60
+ """
61
+
62
+ def __init__(self):
63
+ self._canceled = asyncio.Event()
64
+ self._log_file = None
65
+ self._log_file_handle = None
66
+ self.env_overrides = {}
67
+ self.config_context = None
68
+
69
+ def cancel(self):
70
+ """Cancel the context."""
71
+ self._canceled.set()
72
+
73
+ def is_canceled(self):
74
+ """Check if the context is canceled."""
75
+ return self._canceled.is_set()
76
+
77
+ def redirect_log(
78
+ self, log_file: Optional[pathlib.Path]) -> Optional[pathlib.Path]:
79
+ """Redirect the stdout and stderr of current context to a file.
80
+
81
+ Args:
82
+ log_file: The log file to redirect to. If None, the stdout and
83
+ stderr will be restored to the original streams.
84
+
85
+ Returns:
86
+ The old log file, or None if the stdout and stderr were not
87
+ redirected.
88
+ """
89
+ original_log_file = self._log_file
90
+ original_log_handle = self._log_file_handle
91
+ if log_file is None:
92
+ self._log_file_handle = None
93
+ else:
94
+ self._log_file_handle = open(log_file, 'a', encoding='utf-8')
95
+ self._log_file = log_file
96
+ if original_log_handle is not None:
97
+ original_log_handle.close()
98
+ return original_log_file
99
+
100
+ def output_stream(self, fallback: TextIO) -> TextIO:
101
+ if self._log_file_handle is None:
102
+ return fallback
103
+ else:
104
+ return self._log_file_handle
105
+
106
+ def override_envs(self, envs: Dict[str, str]):
107
+ for k, v in envs.items():
108
+ self.env_overrides[k] = v
109
+
110
+ def cleanup(self):
111
+ """Clean up the context."""
112
+ if self._log_file_handle is not None:
113
+ self._log_file_handle.close()
114
+ self._log_file_handle = None
115
+
116
+ def __enter__(self):
117
+ return self
118
+
119
+ def __exit__(self, exc_type, exc_val, exc_tb):
120
+ del exc_type, exc_val, exc_tb
121
+ self.cleanup()
122
+
123
+ def copy(self) -> 'SkyPilotContext':
124
+ """Create a copy of the context.
125
+
126
+ Changes to the current context after this call will not affect the copy.
127
+ The new context will get its own handle/fd for the log file.
128
+ The new context will get an independent copy of the env var overrides.
129
+ The new context will get an independent copy of the config context.
130
+ Cancellation of the current context will not be propagated to the copy.
131
+ """
132
+ new_context = SkyPilotContext()
133
+ new_context.redirect_log(self._log_file)
134
+ new_context.env_overrides = self.env_overrides.copy()
135
+ new_context.config_context = copy.deepcopy(self.config_context)
136
+ return new_context
137
+
138
+
139
+ _CONTEXT = contextvars.ContextVar[Optional[SkyPilotContext]]('sky_context',
140
+ default=None)
141
+
142
+
143
+ def get() -> Optional[SkyPilotContext]:
144
+ """Get the current SkyPilot context.
145
+
146
+ If the context is not initialized, get() will return None. This helps
147
+ sync code to check whether it runs in a cancellable context and avoid
148
+ polling the cancellation event if it is not.
149
+ """
150
+ return _CONTEXT.get()
151
+
152
+
153
+ class ContextualEnviron(MutableMapping[str, str]):
154
+ """Environment variables wrapper with contextual overrides.
155
+
156
+ An instance of ContextualEnviron will typically be used to replace
157
+ os.environ to make the envron access of current process contextual
158
+ aware.
159
+
160
+ Behavior of spawning a subprocess:
161
+ - The contexual overrides will not be applied to the subprocess by
162
+ default.
163
+ - When using env=os.environ to pass the environment variables to the
164
+ subprocess explicitly. The subprocess will inherit the contextual
165
+ environment variables at the time of the spawn, that is, it will not
166
+ see the updates to the environment variables after the spawn. Also,
167
+ os.environ of the subprocess will not be a ContextualEnviron unless
168
+ the subprocess hijacks os.environ explicitly.
169
+ - Optionally, context.Popen() can be used to automatically pass
170
+ os.environ with overrides to subprocess.
171
+
172
+
173
+ Example:
174
+ 1. Parent process:
175
+ # Hijack os.environ to be a ContextualEnviron
176
+ os.environ = ContextualEnviron(os.environ)
177
+ ctx = context.get()
178
+ ctx.override_envs({'FOO': 'BAR1'})
179
+ proc = subprocess.Popen(..., env=os.environ)
180
+ # Or use context.Popen instead
181
+ # proc = context.Popen(...)
182
+ ctx.override_envs({'FOO': 'BAR2'})
183
+ 2. Subprocess:
184
+ assert os.environ['FOO'] == 'BAR1'
185
+ ctx = context.get()
186
+ # Override the contextual env var in the subprocess does not take
187
+ # effect since the os.environ is not hijacked.
188
+ ctx.override_envs({'FOO': 'BAR3'})
189
+ assert os.environ['FOO'] == 'BAR1'
190
+ """
191
+
192
+ def __init__(self, environ: 'os._Environ[str]') -> None:
193
+ self._environ = environ
194
+
195
+ def __getitem__(self, key: str) -> str:
196
+ ctx = get()
197
+ if ctx is not None:
198
+ if key in ctx.env_overrides:
199
+ value = ctx.env_overrides[key]
200
+ # None is used to indicate that the key is deleted in the
201
+ # context.
202
+ if value is None:
203
+ raise KeyError(key)
204
+ return value
205
+ return self._environ[key]
206
+
207
+ def __iter__(self) -> Iterator[str]:
208
+
209
+ def iter_from_context(ctx: SkyPilotContext) -> Iterator[str]:
210
+ deleted_keys = set()
211
+ for key, value in ctx.env_overrides.items():
212
+ if value is None:
213
+ deleted_keys.add(key)
214
+ else:
215
+ yield key
216
+ for key in self._environ:
217
+ # Deduplicate the keys
218
+ if key not in ctx.env_overrides and key not in deleted_keys:
219
+ yield key
220
+
221
+ ctx = get()
222
+ if ctx is not None:
223
+ return iter_from_context(ctx)
224
+ else:
225
+ return self._environ.__iter__()
226
+
227
+ def __len__(self) -> int:
228
+ return len(dict(self))
229
+
230
+ def __setitem__(self, key: str, value: str) -> None:
231
+ ctx = get()
232
+ if ctx is not None:
233
+ ctx.env_overrides[key] = value
234
+ else:
235
+ self._environ.__setitem__(key, value)
236
+
237
+ def __delitem__(self, key: str) -> None:
238
+ ctx = get()
239
+ if ctx is not None:
240
+ if key in self._environ:
241
+ # If the key is set in the environ of the process, we mark it as
242
+ # deleted in the context by setting the value to None.
243
+ # Note: we must do this even if it was also set in the context,
244
+ # since it could be set in both, and deleting should delete it
245
+ # from both.
246
+ ctx.env_overrides[key] = None
247
+ elif key in ctx.env_overrides:
248
+ # If the key is set in the context, but not the original
249
+ # environ, we can just delete the override.
250
+ del ctx.env_overrides[key]
251
+ else:
252
+ # The key is not set in the context nor the process.
253
+ raise KeyError(key)
254
+ else:
255
+ self._environ.__delitem__(key)
256
+
257
+ def __repr__(self) -> str:
258
+ # Adapted from os._Environ.__repr__
259
+ formatted_items = ', '.join(
260
+ f'{key!r}: {value!r}' for key, value in self.items())
261
+ return f'ctx_environ({{{formatted_items}}})'
262
+
263
+ def copy(self) -> Dict[str, str]:
264
+ copied = self._environ.copy()
265
+ ctx = get()
266
+ if ctx is not None:
267
+ for key in ctx.env_overrides:
268
+ if ctx.env_overrides[key] is None:
269
+ copied.pop(key)
270
+ else:
271
+ copied[key] = ctx.env_overrides[key]
272
+ return copied
273
+
274
+ def setdefault(self, key: str, default: str) -> str:
275
+ return self._environ.setdefault(key, default)
276
+
277
+ def __ior__(self, other):
278
+ if not isinstance(other, Mapping):
279
+ return NotImplemented
280
+ self.update(other)
281
+ return self
282
+
283
+ def __or__(self, other):
284
+ if not isinstance(other, Mapping):
285
+ return NotImplemented
286
+ new = dict(self)
287
+ new.update(other)
288
+ return new
289
+
290
+ def __ror__(self, other):
291
+ if not isinstance(other, Mapping):
292
+ return NotImplemented
293
+ new = dict(other)
294
+ new.update(self)
295
+ return new
296
+
297
+
298
+ class Popen(subprocess.Popen):
299
+
300
+ def __init__(self, *args, **kwargs):
301
+ env = kwargs.pop('env', None)
302
+ if env is None:
303
+ # Pass a copy of current context.environ to avoid race condition
304
+ # when the context is updated after the Popen is created.
305
+ env = os.environ.copy()
306
+ super().__init__(*args, env=env, **kwargs)
307
+
308
+
309
+ P = ParamSpec('P')
310
+ T = TypeVar('T')
311
+
312
+
313
+ def contextual(func: Callable[P, T]) -> Callable[P, T]:
314
+ """Decorator to initialize a context before executing the function.
315
+
316
+ If a context is already initialized, this decorator will create a new
317
+ context that inherits the values from the existing context.
318
+ """
319
+
320
+ def run_in_context(*args: P.args, **kwargs: P.kwargs) -> T:
321
+ # Within the new contextvars Context, set up the SkyPilotContext.
322
+ original_ctx = get()
323
+ with initialize(original_ctx):
324
+ return func(*args, **kwargs)
325
+
326
+ @functools.wraps(func)
327
+ def wrapper(*args: P.args, **kwargs: P.kwargs) -> T:
328
+ # Create a copy of the current contextvars Context so that setting the
329
+ # SkyPilotContext does not affect the caller's context in async
330
+ # environments.
331
+ context = contextvars.copy_context()
332
+ return context.run(run_in_context, *args, **kwargs)
333
+
334
+ return wrapper
335
+
336
+
337
+ def contextual_async(
338
+ func: Callable[P, Coroutine[Any, Any, T]]
339
+ ) -> Callable[P, Coroutine[Any, Any, T]]:
340
+ """Decorator to initialize a context before executing the function.
341
+
342
+ If a context is already initialized, this decorator will create a new
343
+ context that inherits the values from the existing context.
344
+ """
345
+
346
+ async def run_in_context(*args: P.args, **kwargs: P.kwargs) -> T:
347
+ # Within the new contextvars Context, set up the SkyPilotContext.
348
+ original_ctx = get()
349
+ with initialize(original_ctx):
350
+ return await func(*args, **kwargs)
351
+
352
+ @functools.wraps(func)
353
+ async def wrapper(*args: P.args, **kwargs: P.kwargs) -> T:
354
+ # Create a copy of the current contextvars Context so that setting the
355
+ # SkyPilotContext does not affect the caller's context in async
356
+ # environments.
357
+ context = contextvars.copy_context()
358
+ return await context.run(run_in_context, *args, **kwargs)
359
+
360
+ return wrapper
361
+
362
+
363
+ def initialize(
364
+ base_context: Optional[SkyPilotContext] = None) -> SkyPilotContext:
365
+ """Initialize the current SkyPilot context."""
366
+ new_context = base_context.copy(
367
+ ) if base_context is not None else SkyPilotContext()
368
+ _CONTEXT.set(new_context)
369
+ return new_context
370
+
371
+
372
+ class _ContextualStream:
373
+ """A base class for streams that are contextually aware.
374
+
375
+ This class implements the TextIO interface via __getattr__ to delegate
376
+ attribute access to the original or contextual stream.
377
+ """
378
+ _original_stream: TextIO
379
+
380
+ def __init__(self, original_stream: TextIO):
381
+ self._original_stream = original_stream
382
+
383
+ def __getattr__(self, attr: str):
384
+ return getattr(self._active_stream(), attr)
385
+
386
+ def _active_stream(self) -> TextIO:
387
+ ctx = get()
388
+ if ctx is None:
389
+ return self._original_stream
390
+ return ctx.output_stream(self._original_stream)
391
+
392
+
393
+ class Stdout(_ContextualStream):
394
+
395
+ def __init__(self):
396
+ super().__init__(sys.stdout)
397
+
398
+
399
+ class Stderr(_ContextualStream):
400
+
401
+ def __init__(self):
402
+ super().__init__(sys.stderr)
@@ -0,0 +1,222 @@
1
+ """Utilities for SkyPilot context."""
2
+ import asyncio
3
+ import concurrent.futures
4
+ import contextvars
5
+ import functools
6
+ import io
7
+ import multiprocessing
8
+ import os
9
+ import subprocess
10
+ import sys
11
+ import time
12
+ import typing
13
+ from typing import Any, Callable, IO, Optional, Tuple, TypeVar
14
+
15
+ from typing_extensions import ParamSpec
16
+
17
+ from sky import sky_logging
18
+ from sky.utils import context
19
+ from sky.utils import subprocess_utils
20
+
21
+ StreamHandler = Callable[[IO[Any], IO[Any]], str]
22
+ PASSTHROUGH_FLUSH_INTERVAL_SECONDS = 0.5
23
+
24
+ logger = sky_logging.init_logger(__name__)
25
+
26
+
27
+ # TODO(aylei): call hijack_sys_attrs() proactivly in module init at server-side
28
+ # once we have context widely adopted.
29
+ def hijack_sys_attrs():
30
+ """hijack system attributes to be context aware
31
+
32
+ This function should be called at the very beginning of the processes
33
+ that might use sky.utils.context.
34
+ """
35
+ # Modify stdout and stderr of unvicorn process to be contextually aware,
36
+ # use setattr to bypass the TextIO type check.
37
+ setattr(sys, 'stdout', context.Stdout())
38
+ setattr(sys, 'stderr', context.Stderr())
39
+ # Reload logger to apply latest stdout and stderr.
40
+ sky_logging.reload_logger()
41
+ # Hijack os.environ with ContextualEnviron to make env variables
42
+ # contextually aware.
43
+ setattr(os, 'environ', context.ContextualEnviron(os.environ))
44
+ # Hijack subprocess.Popen to pass the contextual environ to subprocess
45
+ # by default.
46
+ setattr(subprocess, 'Popen', context.Popen)
47
+
48
+
49
+ def passthrough_stream_handler(in_stream: IO[Any], out_stream: IO[Any]) -> str:
50
+ """Passthrough the stream from the process to the output stream"""
51
+ last_flush_time = time.time()
52
+ wrapped = io.TextIOWrapper(in_stream,
53
+ encoding='utf-8',
54
+ newline='',
55
+ errors='replace',
56
+ write_through=True)
57
+ while True:
58
+ line = wrapped.readline()
59
+ if line:
60
+ out_stream.write(line)
61
+
62
+ # Flush based on timeout instead of on every line
63
+ current_time = time.time()
64
+ if (current_time - last_flush_time >=
65
+ PASSTHROUGH_FLUSH_INTERVAL_SECONDS):
66
+ out_stream.flush()
67
+ last_flush_time = current_time
68
+ else:
69
+ break
70
+
71
+ # Final flush to ensure all data is written
72
+ out_stream.flush()
73
+ return ''
74
+
75
+
76
+ def pipe_and_wait_process(
77
+ ctx: context.SkyPilotContext,
78
+ proc: subprocess.Popen,
79
+ poll_interval: float = 0.5,
80
+ cancel_callback: Optional[Callable[[], None]] = None,
81
+ stdout_stream_handler: Optional[StreamHandler] = None,
82
+ stderr_stream_handler: Optional[StreamHandler] = None
83
+ ) -> Tuple[str, str]:
84
+ """Wait for the process to finish or cancel it if the context is cancelled.
85
+
86
+ Args:
87
+ proc: The process to wait for.
88
+ poll_interval: The interval to poll the process.
89
+ cancel_callback: The callback to call if the context is cancelled.
90
+ stdout_stream_handler: An optional handler to handle the stdout stream,
91
+ if None, the stdout stream will be passed through.
92
+ stderr_stream_handler: An optional handler to handle the stderr stream,
93
+ if None, the stderr stream will be passed through.
94
+ """
95
+
96
+ if stdout_stream_handler is None:
97
+ stdout_stream_handler = passthrough_stream_handler
98
+ if stderr_stream_handler is None:
99
+ stderr_stream_handler = passthrough_stream_handler
100
+
101
+ # Threads are lazily created, so no harm if stderr is None
102
+ with multiprocessing.pool.ThreadPool(processes=2) as pool:
103
+ # Context will be lost in the new thread, capture current output stream
104
+ # and pass it to the new thread directly.
105
+ stdout_fut = pool.apply_async(
106
+ stdout_stream_handler, (proc.stdout, ctx.output_stream(sys.stdout)))
107
+ stderr_fut = None
108
+ if proc.stderr is not None:
109
+ stderr_fut = pool.apply_async(
110
+ stderr_stream_handler,
111
+ (proc.stderr, ctx.output_stream(sys.stderr)))
112
+ try:
113
+ wait_process(ctx,
114
+ proc,
115
+ poll_interval=poll_interval,
116
+ cancel_callback=cancel_callback)
117
+ finally:
118
+ # Wait for the stream handler threads to exit when process is done
119
+ # or cancelled
120
+ stdout_fut.wait()
121
+ if stderr_fut is not None:
122
+ stderr_fut.wait()
123
+ stdout = stdout_fut.get()
124
+ stderr = ''
125
+ if stderr_fut is not None:
126
+ stderr = stderr_fut.get()
127
+ return stdout, stderr
128
+
129
+
130
+ def wait_process(ctx: context.SkyPilotContext,
131
+ proc: subprocess.Popen,
132
+ poll_interval: float = 0.5,
133
+ cancel_callback: Optional[Callable[[], None]] = None):
134
+ """Wait for the process to finish or cancel it if the context is cancelled.
135
+
136
+ Args:
137
+ proc: The process to wait for.
138
+ poll_interval: The interval to poll the process.
139
+ cancel_callback: The callback to call if the context is cancelled.
140
+ """
141
+ while True:
142
+ if ctx.is_canceled():
143
+ if cancel_callback is not None:
144
+ cancel_callback()
145
+ # Kill the process despite the caller's callback, the utility
146
+ # function gracefully handles the case where the process is
147
+ # already terminated.
148
+ # Bash script typically does not forward SIGTERM to childs, thus
149
+ # cannot be killed gracefully, shorten the grace period for faster
150
+ # termination.
151
+ subprocess_utils.kill_process_with_grace_period(proc,
152
+ grace_period=1)
153
+ raise asyncio.CancelledError()
154
+ try:
155
+ proc.wait(poll_interval)
156
+ except subprocess.TimeoutExpired:
157
+ pass
158
+ else:
159
+ # Process exited
160
+ break
161
+
162
+
163
+ F = TypeVar('F', bound=Callable[..., Any])
164
+
165
+
166
+ def cancellation_guard(func: F) -> F:
167
+ """Decorator to make a synchronous function cancellable via context.
168
+
169
+ Guards the function execution by checking context.is_canceled() before
170
+ executing the function and raises asyncio.CancelledError if the context
171
+ is already cancelled.
172
+
173
+ This basically mimics the behavior of asyncio, which checks coroutine
174
+ cancelled in await call.
175
+
176
+ Args:
177
+ func: The function to be decorated.
178
+
179
+ Returns:
180
+ The wrapped function that checks cancellation before execution.
181
+
182
+ Raises:
183
+ asyncio.CancelledError: If the context is cancelled before execution.
184
+ """
185
+
186
+ @functools.wraps(func)
187
+ def wrapper(*args, **kwargs):
188
+ ctx = context.get()
189
+ if ctx is not None and ctx.is_canceled():
190
+ raise asyncio.CancelledError(
191
+ f'Function {func.__name__} cancelled before execution')
192
+ return func(*args, **kwargs)
193
+
194
+ return typing.cast(F, wrapper)
195
+
196
+
197
+ P = ParamSpec('P')
198
+ T = TypeVar('T')
199
+
200
+
201
+ # TODO(aylei): replace this with asyncio.to_thread once we drop support for
202
+ # python 3.8
203
+ def to_thread(func: Callable[P, T], /, *args: P.args,
204
+ **kwargs: P.kwargs) -> 'asyncio.Future[T]':
205
+ """Asynchronously run function *func* in a separate thread.
206
+
207
+ This is same as asyncio.to_thread added in python 3.9
208
+ """
209
+ return to_thread_with_executor(None, func, *args, **kwargs)
210
+
211
+
212
+ def to_thread_with_executor(executor: Optional[concurrent.futures.Executor],
213
+ func: Callable[P, T], /, *args: P.args,
214
+ **kwargs: P.kwargs) -> 'asyncio.Future[T]':
215
+ """Asynchronously run function *func* in a separate thread with
216
+ a custom executor."""
217
+
218
+ loop = asyncio.get_running_loop()
219
+ pyctx = contextvars.copy_context()
220
+ func_call: Callable[..., T] = functools.partial(pyctx.run, func, *args,
221
+ **kwargs)
222
+ return loop.run_in_executor(executor, func_call)