skypilot-nightly 1.0.0.dev20250502__py3-none-any.whl → 1.0.0.dev20251203__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (546) hide show
  1. sky/__init__.py +22 -6
  2. sky/adaptors/aws.py +81 -16
  3. sky/adaptors/common.py +25 -2
  4. sky/adaptors/coreweave.py +278 -0
  5. sky/adaptors/do.py +8 -2
  6. sky/adaptors/gcp.py +11 -0
  7. sky/adaptors/hyperbolic.py +8 -0
  8. sky/adaptors/ibm.py +5 -2
  9. sky/adaptors/kubernetes.py +149 -18
  10. sky/adaptors/nebius.py +173 -30
  11. sky/adaptors/primeintellect.py +1 -0
  12. sky/adaptors/runpod.py +68 -0
  13. sky/adaptors/seeweb.py +183 -0
  14. sky/adaptors/shadeform.py +89 -0
  15. sky/admin_policy.py +187 -4
  16. sky/authentication.py +179 -225
  17. sky/backends/__init__.py +4 -2
  18. sky/backends/backend.py +22 -9
  19. sky/backends/backend_utils.py +1323 -397
  20. sky/backends/cloud_vm_ray_backend.py +1749 -1029
  21. sky/backends/docker_utils.py +1 -1
  22. sky/backends/local_docker_backend.py +11 -6
  23. sky/backends/task_codegen.py +633 -0
  24. sky/backends/wheel_utils.py +55 -9
  25. sky/{clouds/service_catalog → catalog}/__init__.py +21 -19
  26. sky/{clouds/service_catalog → catalog}/aws_catalog.py +27 -8
  27. sky/{clouds/service_catalog → catalog}/azure_catalog.py +10 -7
  28. sky/{clouds/service_catalog → catalog}/common.py +90 -49
  29. sky/{clouds/service_catalog → catalog}/cudo_catalog.py +8 -5
  30. sky/{clouds/service_catalog → catalog}/data_fetchers/analyze.py +1 -1
  31. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_aws.py +116 -80
  32. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_cudo.py +38 -38
  33. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_gcp.py +70 -16
  34. sky/catalog/data_fetchers/fetch_hyperbolic.py +136 -0
  35. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_lambda_cloud.py +1 -0
  36. sky/catalog/data_fetchers/fetch_nebius.py +338 -0
  37. sky/catalog/data_fetchers/fetch_runpod.py +698 -0
  38. sky/catalog/data_fetchers/fetch_seeweb.py +329 -0
  39. sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
  40. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vast.py +1 -1
  41. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vsphere.py +1 -1
  42. sky/{clouds/service_catalog → catalog}/do_catalog.py +5 -2
  43. sky/{clouds/service_catalog → catalog}/fluidstack_catalog.py +6 -3
  44. sky/{clouds/service_catalog → catalog}/gcp_catalog.py +41 -15
  45. sky/catalog/hyperbolic_catalog.py +136 -0
  46. sky/{clouds/service_catalog → catalog}/ibm_catalog.py +9 -6
  47. sky/{clouds/service_catalog → catalog}/kubernetes_catalog.py +36 -24
  48. sky/{clouds/service_catalog → catalog}/lambda_catalog.py +9 -6
  49. sky/{clouds/service_catalog → catalog}/nebius_catalog.py +9 -7
  50. sky/{clouds/service_catalog → catalog}/oci_catalog.py +9 -6
  51. sky/{clouds/service_catalog → catalog}/paperspace_catalog.py +5 -2
  52. sky/catalog/primeintellect_catalog.py +95 -0
  53. sky/{clouds/service_catalog → catalog}/runpod_catalog.py +11 -4
  54. sky/{clouds/service_catalog → catalog}/scp_catalog.py +9 -6
  55. sky/catalog/seeweb_catalog.py +184 -0
  56. sky/catalog/shadeform_catalog.py +165 -0
  57. sky/catalog/ssh_catalog.py +167 -0
  58. sky/{clouds/service_catalog → catalog}/vast_catalog.py +6 -3
  59. sky/{clouds/service_catalog → catalog}/vsphere_catalog.py +5 -2
  60. sky/check.py +533 -185
  61. sky/cli.py +5 -5975
  62. sky/client/{cli.py → cli/command.py} +2591 -1956
  63. sky/client/cli/deprecation_utils.py +99 -0
  64. sky/client/cli/flags.py +359 -0
  65. sky/client/cli/table_utils.py +322 -0
  66. sky/client/cli/utils.py +79 -0
  67. sky/client/common.py +78 -32
  68. sky/client/oauth.py +82 -0
  69. sky/client/sdk.py +1219 -319
  70. sky/client/sdk_async.py +827 -0
  71. sky/client/service_account_auth.py +47 -0
  72. sky/cloud_stores.py +82 -3
  73. sky/clouds/__init__.py +13 -0
  74. sky/clouds/aws.py +564 -164
  75. sky/clouds/azure.py +105 -83
  76. sky/clouds/cloud.py +140 -40
  77. sky/clouds/cudo.py +68 -50
  78. sky/clouds/do.py +66 -48
  79. sky/clouds/fluidstack.py +63 -44
  80. sky/clouds/gcp.py +339 -110
  81. sky/clouds/hyperbolic.py +293 -0
  82. sky/clouds/ibm.py +70 -49
  83. sky/clouds/kubernetes.py +570 -162
  84. sky/clouds/lambda_cloud.py +74 -54
  85. sky/clouds/nebius.py +210 -81
  86. sky/clouds/oci.py +88 -66
  87. sky/clouds/paperspace.py +61 -44
  88. sky/clouds/primeintellect.py +317 -0
  89. sky/clouds/runpod.py +164 -74
  90. sky/clouds/scp.py +89 -86
  91. sky/clouds/seeweb.py +477 -0
  92. sky/clouds/shadeform.py +400 -0
  93. sky/clouds/ssh.py +263 -0
  94. sky/clouds/utils/aws_utils.py +10 -4
  95. sky/clouds/utils/gcp_utils.py +87 -11
  96. sky/clouds/utils/oci_utils.py +38 -14
  97. sky/clouds/utils/scp_utils.py +231 -167
  98. sky/clouds/vast.py +99 -77
  99. sky/clouds/vsphere.py +51 -40
  100. sky/core.py +375 -173
  101. sky/dag.py +15 -0
  102. sky/dashboard/out/404.html +1 -1
  103. sky/dashboard/out/_next/static/96_E2yl3QAiIJGOYCkSpB/_buildManifest.js +1 -0
  104. sky/dashboard/out/_next/static/chunks/1141-e6aa9ab418717c59.js +11 -0
  105. sky/dashboard/out/_next/static/chunks/1272-1ef0bf0237faccdb.js +1 -0
  106. sky/dashboard/out/_next/static/chunks/1871-7e202677c42f43fe.js +6 -0
  107. sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +1 -0
  108. sky/dashboard/out/_next/static/chunks/2350.fab69e61bac57b23.js +1 -0
  109. sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +15 -0
  110. sky/dashboard/out/_next/static/chunks/2755.edd818326d489a1d.js +26 -0
  111. sky/dashboard/out/_next/static/chunks/3294.20a8540fe697d5ee.js +1 -0
  112. sky/dashboard/out/_next/static/chunks/3785.7e245f318f9d1121.js +1 -0
  113. sky/dashboard/out/_next/static/chunks/3800-7b45f9fbb6308557.js +1 -0
  114. sky/dashboard/out/_next/static/chunks/3850-ff4a9a69d978632b.js +1 -0
  115. sky/dashboard/out/_next/static/chunks/3937.210053269f121201.js +1 -0
  116. sky/dashboard/out/_next/static/chunks/4725.172ede95d1b21022.js +1 -0
  117. sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +15 -0
  118. sky/dashboard/out/_next/static/chunks/5739-d67458fcb1386c92.js +8 -0
  119. sky/dashboard/out/_next/static/chunks/6130-2be46d70a38f1e82.js +1 -0
  120. sky/dashboard/out/_next/static/chunks/616-3d59f75e2ccf9321.js +39 -0
  121. sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +13 -0
  122. sky/dashboard/out/_next/static/chunks/6856-8f27d1c10c98def8.js +1 -0
  123. sky/dashboard/out/_next/static/chunks/6989-01359c57e018caa4.js +1 -0
  124. sky/dashboard/out/_next/static/chunks/6990-9146207c4567fdfd.js +1 -0
  125. sky/dashboard/out/_next/static/chunks/7359-c8d04e06886000b3.js +30 -0
  126. sky/dashboard/out/_next/static/chunks/7411-b15471acd2cba716.js +41 -0
  127. sky/dashboard/out/_next/static/chunks/7615-019513abc55b3b47.js +1 -0
  128. sky/dashboard/out/_next/static/chunks/8640.5b9475a2d18c5416.js +16 -0
  129. sky/dashboard/out/_next/static/chunks/8969-452f9d5cbdd2dc73.js +1 -0
  130. sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +6 -0
  131. sky/dashboard/out/_next/static/chunks/9353-cff34f7e773b2e2b.js +1 -0
  132. sky/dashboard/out/_next/static/chunks/9360.a536cf6b1fa42355.js +31 -0
  133. sky/dashboard/out/_next/static/chunks/9847.3aaca6bb33455140.js +30 -0
  134. sky/dashboard/out/_next/static/chunks/fd9d1056-86323a29a8f7e46a.js +1 -0
  135. sky/dashboard/out/_next/static/chunks/framework-cf60a09ccd051a10.js +33 -0
  136. sky/dashboard/out/_next/static/chunks/main-app-587214043926b3cc.js +1 -0
  137. sky/dashboard/out/_next/static/chunks/main-f15ccb73239a3bf1.js +1 -0
  138. sky/dashboard/out/_next/static/chunks/pages/_app-bde01e4a2beec258.js +34 -0
  139. sky/dashboard/out/_next/static/chunks/pages/_error-c66a4e8afc46f17b.js +1 -0
  140. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-792db96d918c98c9.js +16 -0
  141. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-abfcac9c137aa543.js +1 -0
  142. sky/dashboard/out/_next/static/chunks/pages/clusters-ee39056f9851a3ff.js +1 -0
  143. sky/dashboard/out/_next/static/chunks/pages/config-dfb9bf07b13045f4.js +1 -0
  144. sky/dashboard/out/_next/static/chunks/pages/index-444f1804401f04ea.js +1 -0
  145. sky/dashboard/out/_next/static/chunks/pages/infra/[context]-c0b5935149902e6f.js +1 -0
  146. sky/dashboard/out/_next/static/chunks/pages/infra-aed0ea19df7cf961.js +1 -0
  147. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-d66997e2bfc837cf.js +16 -0
  148. sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-9faf940b253e3e06.js +21 -0
  149. sky/dashboard/out/_next/static/chunks/pages/jobs-2072b48b617989c9.js +1 -0
  150. sky/dashboard/out/_next/static/chunks/pages/users-f42674164aa73423.js +1 -0
  151. sky/dashboard/out/_next/static/chunks/pages/volumes-b84b948ff357c43e.js +1 -0
  152. sky/dashboard/out/_next/static/chunks/pages/workspace/new-3f88a1c7e86a3f86.js +1 -0
  153. sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-84a40f8c7c627fe4.js +1 -0
  154. sky/dashboard/out/_next/static/chunks/pages/workspaces-531b2f8c4bf89f82.js +1 -0
  155. sky/dashboard/out/_next/static/chunks/webpack-64e05f17bf2cf8ce.js +1 -0
  156. sky/dashboard/out/_next/static/css/0748ce22df867032.css +3 -0
  157. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  158. sky/dashboard/out/clusters/[cluster].html +1 -1
  159. sky/dashboard/out/clusters.html +1 -1
  160. sky/dashboard/out/config.html +1 -0
  161. sky/dashboard/out/index.html +1 -1
  162. sky/dashboard/out/infra/[context].html +1 -0
  163. sky/dashboard/out/infra.html +1 -0
  164. sky/dashboard/out/jobs/[job].html +1 -1
  165. sky/dashboard/out/jobs/pools/[pool].html +1 -0
  166. sky/dashboard/out/jobs.html +1 -1
  167. sky/dashboard/out/users.html +1 -0
  168. sky/dashboard/out/volumes.html +1 -0
  169. sky/dashboard/out/workspace/new.html +1 -0
  170. sky/dashboard/out/workspaces/[name].html +1 -0
  171. sky/dashboard/out/workspaces.html +1 -0
  172. sky/data/data_utils.py +137 -1
  173. sky/data/mounting_utils.py +269 -84
  174. sky/data/storage.py +1460 -1807
  175. sky/data/storage_utils.py +43 -57
  176. sky/exceptions.py +126 -2
  177. sky/execution.py +216 -63
  178. sky/global_user_state.py +2390 -586
  179. sky/jobs/__init__.py +7 -0
  180. sky/jobs/client/sdk.py +300 -58
  181. sky/jobs/client/sdk_async.py +161 -0
  182. sky/jobs/constants.py +15 -8
  183. sky/jobs/controller.py +848 -275
  184. sky/jobs/file_content_utils.py +128 -0
  185. sky/jobs/log_gc.py +193 -0
  186. sky/jobs/recovery_strategy.py +402 -152
  187. sky/jobs/scheduler.py +314 -189
  188. sky/jobs/server/core.py +836 -255
  189. sky/jobs/server/server.py +156 -115
  190. sky/jobs/server/utils.py +136 -0
  191. sky/jobs/state.py +2109 -706
  192. sky/jobs/utils.py +1306 -215
  193. sky/logs/__init__.py +21 -0
  194. sky/logs/agent.py +108 -0
  195. sky/logs/aws.py +243 -0
  196. sky/logs/gcp.py +91 -0
  197. sky/metrics/__init__.py +0 -0
  198. sky/metrics/utils.py +453 -0
  199. sky/models.py +78 -1
  200. sky/optimizer.py +164 -70
  201. sky/provision/__init__.py +90 -4
  202. sky/provision/aws/config.py +147 -26
  203. sky/provision/aws/instance.py +136 -50
  204. sky/provision/azure/instance.py +11 -6
  205. sky/provision/common.py +13 -1
  206. sky/provision/cudo/cudo_machine_type.py +1 -1
  207. sky/provision/cudo/cudo_utils.py +14 -8
  208. sky/provision/cudo/cudo_wrapper.py +72 -71
  209. sky/provision/cudo/instance.py +10 -6
  210. sky/provision/do/instance.py +10 -6
  211. sky/provision/do/utils.py +4 -3
  212. sky/provision/docker_utils.py +140 -33
  213. sky/provision/fluidstack/instance.py +13 -8
  214. sky/provision/gcp/__init__.py +1 -0
  215. sky/provision/gcp/config.py +301 -19
  216. sky/provision/gcp/constants.py +218 -0
  217. sky/provision/gcp/instance.py +36 -8
  218. sky/provision/gcp/instance_utils.py +18 -4
  219. sky/provision/gcp/volume_utils.py +247 -0
  220. sky/provision/hyperbolic/__init__.py +12 -0
  221. sky/provision/hyperbolic/config.py +10 -0
  222. sky/provision/hyperbolic/instance.py +437 -0
  223. sky/provision/hyperbolic/utils.py +373 -0
  224. sky/provision/instance_setup.py +101 -20
  225. sky/provision/kubernetes/__init__.py +5 -0
  226. sky/provision/kubernetes/config.py +9 -52
  227. sky/provision/kubernetes/constants.py +17 -0
  228. sky/provision/kubernetes/instance.py +919 -280
  229. sky/provision/kubernetes/manifests/fusermount-server-daemonset.yaml +1 -2
  230. sky/provision/kubernetes/network.py +27 -17
  231. sky/provision/kubernetes/network_utils.py +44 -43
  232. sky/provision/kubernetes/utils.py +1221 -534
  233. sky/provision/kubernetes/volume.py +343 -0
  234. sky/provision/lambda_cloud/instance.py +22 -16
  235. sky/provision/nebius/constants.py +50 -0
  236. sky/provision/nebius/instance.py +19 -6
  237. sky/provision/nebius/utils.py +237 -137
  238. sky/provision/oci/instance.py +10 -5
  239. sky/provision/paperspace/instance.py +10 -7
  240. sky/provision/paperspace/utils.py +1 -1
  241. sky/provision/primeintellect/__init__.py +10 -0
  242. sky/provision/primeintellect/config.py +11 -0
  243. sky/provision/primeintellect/instance.py +454 -0
  244. sky/provision/primeintellect/utils.py +398 -0
  245. sky/provision/provisioner.py +117 -36
  246. sky/provision/runpod/__init__.py +5 -0
  247. sky/provision/runpod/instance.py +27 -6
  248. sky/provision/runpod/utils.py +51 -18
  249. sky/provision/runpod/volume.py +214 -0
  250. sky/provision/scp/__init__.py +15 -0
  251. sky/provision/scp/config.py +93 -0
  252. sky/provision/scp/instance.py +707 -0
  253. sky/provision/seeweb/__init__.py +11 -0
  254. sky/provision/seeweb/config.py +13 -0
  255. sky/provision/seeweb/instance.py +812 -0
  256. sky/provision/shadeform/__init__.py +11 -0
  257. sky/provision/shadeform/config.py +12 -0
  258. sky/provision/shadeform/instance.py +351 -0
  259. sky/provision/shadeform/shadeform_utils.py +83 -0
  260. sky/provision/ssh/__init__.py +18 -0
  261. sky/provision/vast/instance.py +13 -8
  262. sky/provision/vast/utils.py +10 -7
  263. sky/provision/volume.py +164 -0
  264. sky/provision/vsphere/common/ssl_helper.py +1 -1
  265. sky/provision/vsphere/common/vapiconnect.py +2 -1
  266. sky/provision/vsphere/common/vim_utils.py +4 -4
  267. sky/provision/vsphere/instance.py +15 -10
  268. sky/provision/vsphere/vsphere_utils.py +17 -20
  269. sky/py.typed +0 -0
  270. sky/resources.py +845 -119
  271. sky/schemas/__init__.py +0 -0
  272. sky/schemas/api/__init__.py +0 -0
  273. sky/schemas/api/responses.py +227 -0
  274. sky/schemas/db/README +4 -0
  275. sky/schemas/db/env.py +90 -0
  276. sky/schemas/db/global_user_state/001_initial_schema.py +124 -0
  277. sky/schemas/db/global_user_state/002_add_workspace_to_cluster_history.py +35 -0
  278. sky/schemas/db/global_user_state/003_fix_initial_revision.py +61 -0
  279. sky/schemas/db/global_user_state/004_is_managed.py +34 -0
  280. sky/schemas/db/global_user_state/005_cluster_event.py +32 -0
  281. sky/schemas/db/global_user_state/006_provision_log.py +41 -0
  282. sky/schemas/db/global_user_state/007_cluster_event_request_id.py +34 -0
  283. sky/schemas/db/global_user_state/008_skylet_ssh_tunnel_metadata.py +34 -0
  284. sky/schemas/db/global_user_state/009_last_activity_and_launched_at.py +89 -0
  285. sky/schemas/db/global_user_state/010_save_ssh_key.py +66 -0
  286. sky/schemas/db/global_user_state/011_is_ephemeral.py +34 -0
  287. sky/schemas/db/kv_cache/001_initial_schema.py +29 -0
  288. sky/schemas/db/script.py.mako +28 -0
  289. sky/schemas/db/serve_state/001_initial_schema.py +67 -0
  290. sky/schemas/db/serve_state/002_yaml_content.py +34 -0
  291. sky/schemas/db/skypilot_config/001_initial_schema.py +30 -0
  292. sky/schemas/db/spot_jobs/001_initial_schema.py +97 -0
  293. sky/schemas/db/spot_jobs/002_cluster_pool.py +42 -0
  294. sky/schemas/db/spot_jobs/003_pool_hash.py +34 -0
  295. sky/schemas/db/spot_jobs/004_job_file_contents.py +42 -0
  296. sky/schemas/db/spot_jobs/005_logs_gc.py +38 -0
  297. sky/schemas/db/spot_jobs/006_controller_pid_started_at.py +34 -0
  298. sky/schemas/db/spot_jobs/007_config_file_content.py +34 -0
  299. sky/schemas/generated/__init__.py +0 -0
  300. sky/schemas/generated/autostopv1_pb2.py +36 -0
  301. sky/schemas/generated/autostopv1_pb2.pyi +43 -0
  302. sky/schemas/generated/autostopv1_pb2_grpc.py +146 -0
  303. sky/schemas/generated/jobsv1_pb2.py +86 -0
  304. sky/schemas/generated/jobsv1_pb2.pyi +254 -0
  305. sky/schemas/generated/jobsv1_pb2_grpc.py +542 -0
  306. sky/schemas/generated/managed_jobsv1_pb2.py +76 -0
  307. sky/schemas/generated/managed_jobsv1_pb2.pyi +278 -0
  308. sky/schemas/generated/managed_jobsv1_pb2_grpc.py +278 -0
  309. sky/schemas/generated/servev1_pb2.py +58 -0
  310. sky/schemas/generated/servev1_pb2.pyi +115 -0
  311. sky/schemas/generated/servev1_pb2_grpc.py +322 -0
  312. sky/serve/autoscalers.py +357 -5
  313. sky/serve/client/impl.py +310 -0
  314. sky/serve/client/sdk.py +47 -139
  315. sky/serve/client/sdk_async.py +130 -0
  316. sky/serve/constants.py +12 -9
  317. sky/serve/controller.py +68 -17
  318. sky/serve/load_balancer.py +106 -60
  319. sky/serve/load_balancing_policies.py +116 -2
  320. sky/serve/replica_managers.py +434 -249
  321. sky/serve/serve_rpc_utils.py +179 -0
  322. sky/serve/serve_state.py +569 -257
  323. sky/serve/serve_utils.py +775 -265
  324. sky/serve/server/core.py +66 -711
  325. sky/serve/server/impl.py +1093 -0
  326. sky/serve/server/server.py +21 -18
  327. sky/serve/service.py +192 -89
  328. sky/serve/service_spec.py +144 -20
  329. sky/serve/spot_placer.py +3 -0
  330. sky/server/auth/__init__.py +0 -0
  331. sky/server/auth/authn.py +50 -0
  332. sky/server/auth/loopback.py +38 -0
  333. sky/server/auth/oauth2_proxy.py +202 -0
  334. sky/server/common.py +478 -182
  335. sky/server/config.py +85 -23
  336. sky/server/constants.py +44 -6
  337. sky/server/daemons.py +295 -0
  338. sky/server/html/token_page.html +185 -0
  339. sky/server/metrics.py +160 -0
  340. sky/server/middleware_utils.py +166 -0
  341. sky/server/requests/executor.py +558 -138
  342. sky/server/requests/payloads.py +364 -24
  343. sky/server/requests/preconditions.py +21 -17
  344. sky/server/requests/process.py +112 -29
  345. sky/server/requests/request_names.py +121 -0
  346. sky/server/requests/requests.py +822 -226
  347. sky/server/requests/serializers/decoders.py +82 -31
  348. sky/server/requests/serializers/encoders.py +140 -22
  349. sky/server/requests/threads.py +117 -0
  350. sky/server/rest.py +455 -0
  351. sky/server/server.py +1309 -285
  352. sky/server/state.py +20 -0
  353. sky/server/stream_utils.py +327 -61
  354. sky/server/uvicorn.py +217 -3
  355. sky/server/versions.py +270 -0
  356. sky/setup_files/MANIFEST.in +11 -1
  357. sky/setup_files/alembic.ini +160 -0
  358. sky/setup_files/dependencies.py +139 -31
  359. sky/setup_files/setup.py +44 -42
  360. sky/sky_logging.py +114 -7
  361. sky/skylet/attempt_skylet.py +106 -24
  362. sky/skylet/autostop_lib.py +129 -8
  363. sky/skylet/configs.py +29 -20
  364. sky/skylet/constants.py +216 -25
  365. sky/skylet/events.py +101 -21
  366. sky/skylet/job_lib.py +345 -164
  367. sky/skylet/log_lib.py +297 -18
  368. sky/skylet/log_lib.pyi +44 -1
  369. sky/skylet/providers/ibm/node_provider.py +12 -8
  370. sky/skylet/providers/ibm/vpc_provider.py +13 -12
  371. sky/skylet/ray_patches/__init__.py +17 -3
  372. sky/skylet/ray_patches/autoscaler.py.diff +18 -0
  373. sky/skylet/ray_patches/cli.py.diff +19 -0
  374. sky/skylet/ray_patches/command_runner.py.diff +17 -0
  375. sky/skylet/ray_patches/log_monitor.py.diff +20 -0
  376. sky/skylet/ray_patches/resource_demand_scheduler.py.diff +32 -0
  377. sky/skylet/ray_patches/updater.py.diff +18 -0
  378. sky/skylet/ray_patches/worker.py.diff +41 -0
  379. sky/skylet/runtime_utils.py +21 -0
  380. sky/skylet/services.py +568 -0
  381. sky/skylet/skylet.py +72 -4
  382. sky/skylet/subprocess_daemon.py +104 -29
  383. sky/skypilot_config.py +506 -99
  384. sky/ssh_node_pools/__init__.py +1 -0
  385. sky/ssh_node_pools/core.py +135 -0
  386. sky/ssh_node_pools/server.py +233 -0
  387. sky/task.py +685 -163
  388. sky/templates/aws-ray.yml.j2 +11 -3
  389. sky/templates/azure-ray.yml.j2 +2 -1
  390. sky/templates/cudo-ray.yml.j2 +1 -0
  391. sky/templates/do-ray.yml.j2 +2 -1
  392. sky/templates/fluidstack-ray.yml.j2 +1 -0
  393. sky/templates/gcp-ray.yml.j2 +62 -1
  394. sky/templates/hyperbolic-ray.yml.j2 +68 -0
  395. sky/templates/ibm-ray.yml.j2 +2 -1
  396. sky/templates/jobs-controller.yaml.j2 +27 -24
  397. sky/templates/kubernetes-loadbalancer.yml.j2 +2 -0
  398. sky/templates/kubernetes-ray.yml.j2 +611 -50
  399. sky/templates/lambda-ray.yml.j2 +2 -1
  400. sky/templates/nebius-ray.yml.j2 +34 -12
  401. sky/templates/oci-ray.yml.j2 +1 -0
  402. sky/templates/paperspace-ray.yml.j2 +2 -1
  403. sky/templates/primeintellect-ray.yml.j2 +72 -0
  404. sky/templates/runpod-ray.yml.j2 +10 -1
  405. sky/templates/scp-ray.yml.j2 +4 -50
  406. sky/templates/seeweb-ray.yml.j2 +171 -0
  407. sky/templates/shadeform-ray.yml.j2 +73 -0
  408. sky/templates/sky-serve-controller.yaml.j2 +22 -2
  409. sky/templates/vast-ray.yml.j2 +1 -0
  410. sky/templates/vsphere-ray.yml.j2 +1 -0
  411. sky/templates/websocket_proxy.py +212 -37
  412. sky/usage/usage_lib.py +31 -15
  413. sky/users/__init__.py +0 -0
  414. sky/users/model.conf +15 -0
  415. sky/users/permission.py +397 -0
  416. sky/users/rbac.py +121 -0
  417. sky/users/server.py +720 -0
  418. sky/users/token_service.py +218 -0
  419. sky/utils/accelerator_registry.py +35 -5
  420. sky/utils/admin_policy_utils.py +84 -38
  421. sky/utils/annotations.py +38 -5
  422. sky/utils/asyncio_utils.py +78 -0
  423. sky/utils/atomic.py +1 -1
  424. sky/utils/auth_utils.py +153 -0
  425. sky/utils/benchmark_utils.py +60 -0
  426. sky/utils/cli_utils/status_utils.py +159 -86
  427. sky/utils/cluster_utils.py +31 -9
  428. sky/utils/command_runner.py +354 -68
  429. sky/utils/command_runner.pyi +93 -3
  430. sky/utils/common.py +35 -8
  431. sky/utils/common_utils.py +314 -91
  432. sky/utils/config_utils.py +74 -5
  433. sky/utils/context.py +403 -0
  434. sky/utils/context_utils.py +242 -0
  435. sky/utils/controller_utils.py +383 -89
  436. sky/utils/dag_utils.py +31 -12
  437. sky/utils/db/__init__.py +0 -0
  438. sky/utils/db/db_utils.py +485 -0
  439. sky/utils/db/kv_cache.py +149 -0
  440. sky/utils/db/migration_utils.py +137 -0
  441. sky/utils/directory_utils.py +12 -0
  442. sky/utils/env_options.py +13 -0
  443. sky/utils/git.py +567 -0
  444. sky/utils/git_clone.sh +460 -0
  445. sky/utils/infra_utils.py +195 -0
  446. sky/utils/kubernetes/cleanup-tunnel.sh +62 -0
  447. sky/utils/kubernetes/config_map_utils.py +133 -0
  448. sky/utils/kubernetes/create_cluster.sh +15 -29
  449. sky/utils/kubernetes/delete_cluster.sh +10 -7
  450. sky/utils/kubernetes/deploy_ssh_node_pools.py +1177 -0
  451. sky/utils/kubernetes/exec_kubeconfig_converter.py +22 -31
  452. sky/utils/kubernetes/generate_kind_config.py +6 -66
  453. sky/utils/kubernetes/generate_kubeconfig.sh +4 -1
  454. sky/utils/kubernetes/gpu_labeler.py +18 -8
  455. sky/utils/kubernetes/k8s_gpu_labeler_job.yaml +2 -1
  456. sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml +16 -16
  457. sky/utils/kubernetes/kubernetes_deploy_utils.py +284 -114
  458. sky/utils/kubernetes/rsync_helper.sh +11 -3
  459. sky/utils/kubernetes/ssh-tunnel.sh +379 -0
  460. sky/utils/kubernetes/ssh_utils.py +221 -0
  461. sky/utils/kubernetes_enums.py +8 -15
  462. sky/utils/lock_events.py +94 -0
  463. sky/utils/locks.py +416 -0
  464. sky/utils/log_utils.py +82 -107
  465. sky/utils/perf_utils.py +22 -0
  466. sky/utils/resource_checker.py +298 -0
  467. sky/utils/resources_utils.py +249 -32
  468. sky/utils/rich_utils.py +217 -39
  469. sky/utils/schemas.py +955 -160
  470. sky/utils/serialize_utils.py +16 -0
  471. sky/utils/status_lib.py +10 -0
  472. sky/utils/subprocess_utils.py +29 -15
  473. sky/utils/tempstore.py +70 -0
  474. sky/utils/thread_utils.py +91 -0
  475. sky/utils/timeline.py +26 -53
  476. sky/utils/ux_utils.py +84 -15
  477. sky/utils/validator.py +11 -1
  478. sky/utils/volume.py +165 -0
  479. sky/utils/yaml_utils.py +111 -0
  480. sky/volumes/__init__.py +13 -0
  481. sky/volumes/client/__init__.py +0 -0
  482. sky/volumes/client/sdk.py +150 -0
  483. sky/volumes/server/__init__.py +0 -0
  484. sky/volumes/server/core.py +270 -0
  485. sky/volumes/server/server.py +124 -0
  486. sky/volumes/volume.py +215 -0
  487. sky/workspaces/__init__.py +0 -0
  488. sky/workspaces/core.py +655 -0
  489. sky/workspaces/server.py +101 -0
  490. sky/workspaces/utils.py +56 -0
  491. sky_templates/README.md +3 -0
  492. sky_templates/__init__.py +3 -0
  493. sky_templates/ray/__init__.py +0 -0
  494. sky_templates/ray/start_cluster +183 -0
  495. sky_templates/ray/stop_cluster +75 -0
  496. skypilot_nightly-1.0.0.dev20251203.dist-info/METADATA +676 -0
  497. skypilot_nightly-1.0.0.dev20251203.dist-info/RECORD +611 -0
  498. {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/WHEEL +1 -1
  499. skypilot_nightly-1.0.0.dev20251203.dist-info/top_level.txt +2 -0
  500. sky/benchmark/benchmark_state.py +0 -256
  501. sky/benchmark/benchmark_utils.py +0 -641
  502. sky/clouds/service_catalog/constants.py +0 -7
  503. sky/dashboard/out/_next/static/GWvVBSCS7FmUiVmjaL1a7/_buildManifest.js +0 -1
  504. sky/dashboard/out/_next/static/chunks/236-2db3ee3fba33dd9e.js +0 -6
  505. sky/dashboard/out/_next/static/chunks/312-c3c8845990db8ffc.js +0 -15
  506. sky/dashboard/out/_next/static/chunks/37-0a572fe0dbb89c4d.js +0 -6
  507. sky/dashboard/out/_next/static/chunks/678-206dddca808e6d16.js +0 -59
  508. sky/dashboard/out/_next/static/chunks/845-9e60713e0c441abc.js +0 -1
  509. sky/dashboard/out/_next/static/chunks/979-7bf73a4c7cea0f5c.js +0 -1
  510. sky/dashboard/out/_next/static/chunks/fd9d1056-2821b0f0cabcd8bd.js +0 -1
  511. sky/dashboard/out/_next/static/chunks/framework-87d061ee6ed71b28.js +0 -33
  512. sky/dashboard/out/_next/static/chunks/main-app-241eb28595532291.js +0 -1
  513. sky/dashboard/out/_next/static/chunks/main-e0e2335212e72357.js +0 -1
  514. sky/dashboard/out/_next/static/chunks/pages/_app-e6b013bc3f77ad60.js +0 -1
  515. sky/dashboard/out/_next/static/chunks/pages/_error-1be831200e60c5c0.js +0 -1
  516. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-6ac338bc2239cb45.js +0 -1
  517. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-f383db7389368ea7.js +0 -1
  518. sky/dashboard/out/_next/static/chunks/pages/clusters-a93b93e10b8b074e.js +0 -1
  519. sky/dashboard/out/_next/static/chunks/pages/index-f9f039532ca8cbc4.js +0 -1
  520. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-1c519e1afc523dc9.js +0 -1
  521. sky/dashboard/out/_next/static/chunks/pages/jobs-a75029b67aab6a2e.js +0 -1
  522. sky/dashboard/out/_next/static/chunks/webpack-830f59b8404e96b8.js +0 -1
  523. sky/dashboard/out/_next/static/css/c6933bbb2ce7f4dd.css +0 -3
  524. sky/jobs/dashboard/dashboard.py +0 -223
  525. sky/jobs/dashboard/static/favicon.ico +0 -0
  526. sky/jobs/dashboard/templates/index.html +0 -831
  527. sky/jobs/server/dashboard_utils.py +0 -69
  528. sky/skylet/providers/scp/__init__.py +0 -2
  529. sky/skylet/providers/scp/config.py +0 -149
  530. sky/skylet/providers/scp/node_provider.py +0 -578
  531. sky/templates/kubernetes-ssh-jump.yml.j2 +0 -94
  532. sky/utils/db_utils.py +0 -100
  533. sky/utils/kubernetes/deploy_remote_cluster.sh +0 -308
  534. sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +0 -191
  535. skypilot_nightly-1.0.0.dev20250502.dist-info/METADATA +0 -361
  536. skypilot_nightly-1.0.0.dev20250502.dist-info/RECORD +0 -396
  537. skypilot_nightly-1.0.0.dev20250502.dist-info/top_level.txt +0 -1
  538. /sky/{clouds/service_catalog → catalog}/config.py +0 -0
  539. /sky/{benchmark → catalog/data_fetchers}/__init__.py +0 -0
  540. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_azure.py +0 -0
  541. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_fluidstack.py +0 -0
  542. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_ibm.py +0 -0
  543. /sky/{clouds/service_catalog/data_fetchers → client/cli}/__init__.py +0 -0
  544. /sky/dashboard/out/_next/static/{GWvVBSCS7FmUiVmjaL1a7 → 96_E2yl3QAiIJGOYCkSpB}/_ssgManifest.js +0 -0
  545. {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/entry_points.txt +0 -0
  546. {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/licenses/LICENSE +0 -0
@@ -11,18 +11,43 @@ This script is useful for users who do not have local Kubernetes credentials.
11
11
  import asyncio
12
12
  from http.cookiejar import MozillaCookieJar
13
13
  import os
14
+ import struct
14
15
  import sys
15
- from typing import Dict
16
+ import time
17
+ from typing import Dict, Optional
16
18
  from urllib.request import Request
17
19
 
20
+ import requests
18
21
  import websockets
22
+ from websockets.asyncio.client import ClientConnection
19
23
  from websockets.asyncio.client import connect
20
24
 
25
+ from sky import exceptions
26
+ from sky.client import service_account_auth
27
+ from sky.server import constants
28
+ from sky.server.server import KubernetesSSHMessageType
29
+ from sky.skylet import constants as skylet_constants
30
+
31
+ BUFFER_SIZE = 2**16 # 64KB
32
+ HEARTBEAT_INTERVAL_SECONDS = 10
33
+
34
+ # Environment variable for a file path to the API cookie file.
35
+ # Keep in sync with server/constants.py
36
+ API_COOKIE_FILE_ENV_VAR = 'SKYPILOT_API_COOKIE_FILE'
37
+ # Default file if unset.
38
+ # Keep in sync with server/constants.py
39
+ API_COOKIE_FILE_DEFAULT_LOCATION = '~/.sky/cookies.txt'
40
+
41
+ MAX_UNANSWERED_PINGS = 100
42
+
21
43
 
22
44
  def _get_cookie_header(url: str) -> Dict[str, str]:
23
45
  """Extract Cookie header value from a cookie jar for a specific URL"""
24
- cookie_path = os.environ.get('SKYPILOT_API_COOKIE_FILE')
46
+ cookie_path = os.environ.get(API_COOKIE_FILE_ENV_VAR)
25
47
  if cookie_path is None:
48
+ cookie_path = API_COOKIE_FILE_DEFAULT_LOCATION
49
+ cookie_path = os.path.expanduser(cookie_path)
50
+ if not os.path.exists(cookie_path):
26
51
  return {}
27
52
 
28
53
  request = Request(url)
@@ -36,68 +61,218 @@ def _get_cookie_header(url: str) -> Dict[str, str]:
36
61
  return {'Cookie': cookie_header}
37
62
 
38
63
 
39
- async def main(url: str) -> None:
40
- cookie_header = _get_cookie_header(url)
41
- async with connect(url,
42
- ping_interval=None,
43
- additional_headers=cookie_header) as websocket:
44
- if os.isatty(sys.stdin.fileno()):
45
- # pylint: disable=import-outside-toplevel
46
- import termios
47
- import tty
48
- old_settings = termios.tcgetattr(sys.stdin.fileno())
49
- tty.setraw(sys.stdin.fileno())
64
+ async def main(url: str, timestamps_supported: bool, login_url: str) -> None:
65
+ headers = {}
66
+ headers.update(_get_cookie_header(url))
67
+ headers.update(service_account_auth.get_service_account_headers())
68
+ try:
69
+ async with connect(url, ping_interval=None,
70
+ additional_headers=headers) as websocket:
71
+ await run_websocket_proxy(websocket, timestamps_supported)
72
+ except websockets.exceptions.InvalidStatus as e:
73
+ if e.response.status_code == 403:
74
+ print(str(exceptions.ApiServerAuthenticationError(login_url)),
75
+ file=sys.stderr)
50
76
  else:
51
- old_settings = None
77
+ print(f'Error ssh into cluster: {e}', file=sys.stderr)
78
+ sys.exit(1)
79
+
80
+
81
+ async def run_websocket_proxy(websocket: ClientConnection,
82
+ timestamps_supported: bool) -> None:
83
+ if os.isatty(sys.stdin.fileno()):
84
+ # pylint: disable=import-outside-toplevel
85
+ import termios
86
+ import tty
87
+ old_settings = termios.tcgetattr(sys.stdin.fileno())
88
+ tty.setraw(sys.stdin.fileno())
89
+ else:
90
+ old_settings = None
91
+
92
+ try:
93
+ loop = asyncio.get_running_loop()
94
+ # Use asyncio.Stream primitives to wrap stdin and stdout, this is to
95
+ # avoid creating a new thread for each read/write operation
96
+ # excessively.
97
+ stdin_reader = asyncio.StreamReader()
98
+ protocol = asyncio.StreamReaderProtocol(stdin_reader)
99
+ await loop.connect_read_pipe(lambda: protocol, sys.stdin)
100
+ transport, protocol = await loop.connect_write_pipe(
101
+ asyncio.streams.FlowControlMixin, sys.stdout) # type: ignore
102
+ stdout_writer = asyncio.StreamWriter(transport, protocol, None, loop)
103
+ # Dictionary to store last ping time for latency measurement
104
+ last_ping_time_dict: Optional[Dict[int, float]] = None
105
+ if timestamps_supported:
106
+ last_ping_time_dict = {}
107
+
108
+ # Use an Event to signal when websocket is closed
109
+ websocket_closed_event = asyncio.Event()
110
+ websocket_lock = asyncio.Lock()
111
+
112
+ await asyncio.gather(
113
+ stdin_to_websocket(stdin_reader, websocket, timestamps_supported,
114
+ websocket_closed_event, websocket_lock),
115
+ websocket_to_stdout(websocket, stdout_writer, timestamps_supported,
116
+ last_ping_time_dict, websocket_closed_event,
117
+ websocket_lock),
118
+ latency_monitor(websocket, last_ping_time_dict,
119
+ websocket_closed_event, websocket_lock),
120
+ return_exceptions=True)
121
+ finally:
122
+ if old_settings:
123
+ termios.tcsetattr(sys.stdin.fileno(), termios.TCSADRAIN,
124
+ old_settings)
52
125
 
126
+
127
+ async def latency_monitor(websocket: ClientConnection,
128
+ last_ping_time_dict: Optional[dict],
129
+ websocket_closed_event: asyncio.Event,
130
+ websocket_lock: asyncio.Lock):
131
+ """Periodically send PING messages (type 1) to measure latency."""
132
+ if last_ping_time_dict is None:
133
+ return
134
+ next_id = 0
135
+ while not websocket_closed_event.is_set():
53
136
  try:
54
- await asyncio.gather(stdin_to_websocket(websocket),
55
- websocket_to_stdout(websocket))
56
- finally:
57
- if old_settings:
58
- termios.tcsetattr(sys.stdin.fileno(), termios.TCSADRAIN,
59
- old_settings)
137
+ await asyncio.sleep(HEARTBEAT_INTERVAL_SECONDS)
138
+ if len(last_ping_time_dict) >= MAX_UNANSWERED_PINGS:
139
+ # We are not getting responses, clear the dictionary so
140
+ # as not to grow unbounded.
141
+ last_ping_time_dict.clear()
142
+ ping_time = time.time()
143
+ next_id += 1
144
+ last_ping_time_dict[next_id] = ping_time
145
+ message_header_bytes = struct.pack(
146
+ '!BI', KubernetesSSHMessageType.PINGPONG.value, next_id)
147
+ try:
148
+ async with websocket_lock:
149
+ await websocket.send(message_header_bytes)
150
+ except websockets.exceptions.ConnectionClosed as e:
151
+ # Websocket is already closed.
152
+ print(f'Failed to send PING message: {e}', file=sys.stderr)
153
+ break
154
+ except Exception as e:
155
+ print(f'Error in latency_monitor: {e}', file=sys.stderr)
156
+ websocket_closed_event.set()
157
+ raise e
60
158
 
61
159
 
62
- async def stdin_to_websocket(websocket):
160
+ async def stdin_to_websocket(reader: asyncio.StreamReader,
161
+ websocket: ClientConnection,
162
+ timestamps_supported: bool,
163
+ websocket_closed_event: asyncio.Event,
164
+ websocket_lock: asyncio.Lock):
63
165
  try:
64
- while True:
65
- data = await asyncio.get_event_loop().run_in_executor(
66
- None, sys.stdin.buffer.read, 1)
166
+ while not websocket_closed_event.is_set():
167
+ # Read at most BUFFER_SIZE bytes, this not affect
168
+ # responsiveness since it will return as soon as
169
+ # there is at least one byte.
170
+ # The BUFFER_SIZE is chosen to be large enough to improve
171
+ # throughput.
172
+ data = await reader.read(BUFFER_SIZE)
173
+
67
174
  if not data:
68
175
  break
69
- await websocket.send(data)
176
+ if timestamps_supported:
177
+ # Send message with type 0 to indicate data.
178
+ message_type_bytes = struct.pack(
179
+ '!B', KubernetesSSHMessageType.REGULAR_DATA.value)
180
+ data = message_type_bytes + data
181
+ async with websocket_lock:
182
+ await websocket.send(data)
183
+
70
184
  except Exception as e: # pylint: disable=broad-except
71
185
  print(f'Error in stdin_to_websocket: {e}', file=sys.stderr)
72
186
  finally:
73
- await websocket.close()
187
+ async with websocket_lock:
188
+ await websocket.close()
189
+ websocket_closed_event.set()
74
190
 
75
191
 
76
- async def websocket_to_stdout(websocket):
192
+ async def websocket_to_stdout(websocket: ClientConnection,
193
+ writer: asyncio.StreamWriter,
194
+ timestamps_supported: bool,
195
+ last_ping_time_dict: Optional[dict],
196
+ websocket_closed_event: asyncio.Event,
197
+ websocket_lock: asyncio.Lock):
77
198
  try:
78
- while True:
199
+ while not websocket_closed_event.is_set():
79
200
  message = await websocket.recv()
80
- sys.stdout.buffer.write(message)
81
- await asyncio.get_event_loop().run_in_executor(
82
- None, sys.stdout.buffer.flush)
201
+ if (timestamps_supported and len(message) > 0 and
202
+ last_ping_time_dict is not None):
203
+ message_type = struct.unpack('!B', message[:1])[0]
204
+ if message_type == KubernetesSSHMessageType.REGULAR_DATA.value:
205
+ # Regular data - strip type byte and write to stdout
206
+ message = message[1:]
207
+ elif message_type == KubernetesSSHMessageType.PINGPONG.value:
208
+ # PONG response - calculate latency and send measurement
209
+ if not len(message) == struct.calcsize('!BI'):
210
+ raise ValueError(
211
+ f'Invalid PONG message length: {len(message)}')
212
+ pong_id = struct.unpack('!I', message[1:5])[0]
213
+ pong_time = time.time()
214
+
215
+ ping_time = last_ping_time_dict.pop(pong_id, None)
216
+
217
+ if ping_time is None:
218
+ continue
219
+
220
+ latency_seconds = pong_time - ping_time
221
+ latency_ms = int(latency_seconds * 1000)
222
+
223
+ # Send latency measurement (type 2)
224
+ message_type_bytes = struct.pack(
225
+ '!B',
226
+ KubernetesSSHMessageType.LATENCY_MEASUREMENT.value)
227
+ latency_bytes = struct.pack('!Q', latency_ms)
228
+ message = message_type_bytes + latency_bytes
229
+ # Send to server.
230
+ async with websocket_lock:
231
+ await websocket.send(message)
232
+ continue
233
+ # No timestamps support, write directly
234
+ writer.write(message)
235
+ await writer.drain()
83
236
  except websockets.exceptions.ConnectionClosed:
84
237
  print('WebSocket connection closed', file=sys.stderr)
85
238
  except Exception as e: # pylint: disable=broad-except
86
239
  print(f'Error in websocket_to_stdout: {e}', file=sys.stderr)
240
+ raise e
241
+ finally:
242
+ async with websocket_lock:
243
+ await websocket.close()
244
+ websocket_closed_event.set()
87
245
 
88
246
 
89
247
  if __name__ == '__main__':
90
248
  server_url = sys.argv[1].strip('/')
91
- if '://' not in server_url:
92
- # Keep backward compatibility for legacy server URLs without protocol
93
- # TODO(aylei): Remove this after 0.10.0
94
- server_url = f'http://{server_url}'
95
249
 
250
+ disable_latency_measurement = os.environ.get(
251
+ skylet_constants.SSH_DISABLE_LATENCY_MEASUREMENT_ENV_VAR, '0') == '1'
252
+ if disable_latency_measurement:
253
+ timestamps_are_supported = False
254
+ else:
255
+ # TODO(aylei): remove the separate /api/health call and use the header
256
+ # during websocket handshake to determine the server version.
257
+ health_url = f'{server_url}/api/health'
258
+ cookie_hdr = _get_cookie_header(health_url)
259
+ health_response = requests.get(health_url, headers=cookie_hdr)
260
+ health_data = health_response.json()
261
+ timestamps_are_supported = int(health_data.get('api_version', 0)) > 21
262
+
263
+ # Capture the original API server URL for login hint if authentication
264
+ # is required.
265
+ _login_url = server_url
96
266
  server_proto, server_fqdn = server_url.split('://')
97
267
  websocket_proto = 'ws'
98
268
  if server_proto == 'https':
99
269
  websocket_proto = 'wss'
100
270
  server_url = f'{websocket_proto}://{server_fqdn}'
271
+
272
+ client_version_str = (f'&client_version={constants.API_VERSION}'
273
+ if timestamps_are_supported else '')
274
+
101
275
  websocket_url = (f'{server_url}/kubernetes-pod-ssh-proxy'
102
- f'?cluster_name={sys.argv[2]}')
103
- asyncio.run(main(websocket_url))
276
+ f'?cluster_name={sys.argv[2]}'
277
+ f'{client_version_str}')
278
+ asyncio.run(main(websocket_url, timestamps_are_supported, _login_url))
sky/usage/usage_lib.py CHANGED
@@ -10,13 +10,17 @@ import traceback
10
10
  import typing
11
11
  from typing import Any, Callable, Dict, List, Optional, Union
12
12
 
13
+ from typing_extensions import ParamSpec
14
+
13
15
  import sky
14
16
  from sky import sky_logging
17
+ from sky import skypilot_config
15
18
  from sky.adaptors import common as adaptors_common
16
19
  from sky.usage import constants
17
20
  from sky.utils import common_utils
18
21
  from sky.utils import env_options
19
22
  from sky.utils import ux_utils
23
+ from sky.utils import yaml_utils
20
24
 
21
25
  if typing.TYPE_CHECKING:
22
26
  import inspect
@@ -164,6 +168,7 @@ class UsageMessageToReport(MessageToReport):
164
168
  self.runtimes: Dict[str, float] = {} # update_runtime
165
169
  self.exception: Optional[str] = None # entrypoint_context
166
170
  self.stacktrace: Optional[str] = None # entrypoint_context
171
+ self.skypilot_config: Optional[Dict[str, Any]] = None
167
172
 
168
173
  # Whether API server is deployed remotely.
169
174
  self.using_remote_api_server: bool = (
@@ -174,6 +179,7 @@ class UsageMessageToReport(MessageToReport):
174
179
  self.client_entrypoint = common_utils.get_current_client_entrypoint(
175
180
  msg)
176
181
  self.entrypoint = msg
182
+ self.skypilot_config = dict(skypilot_config.to_dict())
177
183
 
178
184
  def set_internal(self):
179
185
  self.internal = True
@@ -205,8 +211,8 @@ class UsageMessageToReport(MessageToReport):
205
211
  logger.debug('Multiple accelerators are not supported: '
206
212
  f'{resources.accelerators}.')
207
213
  self.task_accelerators = list(resources.accelerators.keys())[0]
208
- self.task_num_accelerators = resources.accelerators[
209
- self.task_accelerators]
214
+ self.task_num_accelerators = int(
215
+ resources.accelerators[self.task_accelerators])
210
216
  else:
211
217
  self.task_accelerators = None
212
218
  self.task_num_accelerators = None
@@ -245,7 +251,8 @@ class UsageMessageToReport(MessageToReport):
245
251
  logger.debug('Multiple accelerators are not supported: '
246
252
  f'{resources.accelerators}.')
247
253
  self.accelerators = list(resources.accelerators.keys())[0]
248
- self.num_accelerators = resources.accelerators[self.accelerators]
254
+ self.num_accelerators = int(
255
+ resources.accelerators[self.accelerators])
249
256
  else:
250
257
  self.accelerators = None
251
258
  self.num_accelerators = None
@@ -309,21 +316,30 @@ class MessageCollection:
309
316
  """A collection of messages."""
310
317
 
311
318
  def __init__(self):
312
- self._messages = {
319
+ self._messages: Dict[MessageType, MessageToReport] = {
313
320
  MessageType.USAGE: UsageMessageToReport(),
314
321
  MessageType.HEARTBEAT: HeartbeatMessageToReport()
315
322
  }
316
323
 
317
324
  @property
318
325
  def usage(self) -> UsageMessageToReport:
319
- return self._messages[MessageType.USAGE]
326
+ msg = self._messages[MessageType.USAGE]
327
+ assert isinstance(msg, UsageMessageToReport)
328
+ return msg
320
329
 
321
330
  @property
322
331
  def heartbeat(self) -> HeartbeatMessageToReport:
323
- return self._messages[MessageType.HEARTBEAT]
332
+ msg = self._messages[MessageType.HEARTBEAT]
333
+ assert isinstance(msg, HeartbeatMessageToReport)
334
+ return msg
324
335
 
325
336
  def reset(self, message_type: MessageType):
326
- self._messages[message_type] = self._messages[message_type].__class__()
337
+ if message_type == MessageType.USAGE:
338
+ self._messages[message_type] = UsageMessageToReport()
339
+ elif message_type == MessageType.HEARTBEAT:
340
+ self._messages[message_type] = HeartbeatMessageToReport()
341
+ else:
342
+ raise ValueError(f'Unknown message type: {message_type}')
327
343
 
328
344
  def __getitem__(self, key):
329
345
  return self._messages[key]
@@ -399,7 +415,7 @@ def _clean_yaml(yaml_info: Dict[str, Optional[str]]):
399
415
  contents = inspect.getsource(contents)
400
416
 
401
417
  if type(contents) in constants.USAGE_MESSAGE_REDACT_TYPES:
402
- lines = common_utils.dump_yaml_str({
418
+ lines = yaml_utils.dump_yaml_str({
403
419
  redact_type: contents
404
420
  }).strip().split('\n')
405
421
  message = (f'{len(lines)} lines {redact_type.upper()}'
@@ -428,7 +444,7 @@ def prepare_json_from_yaml_config(
428
444
  with open(yaml_config_or_path, 'r', encoding='utf-8') as f:
429
445
  lines = f.readlines()
430
446
  comment_lines = [line for line in lines if line.startswith('#')]
431
- yaml_info = common_utils.read_yaml_all(yaml_config_or_path)
447
+ yaml_info = yaml_utils.read_yaml_all(yaml_config_or_path)
432
448
 
433
449
  for i in range(len(yaml_info)):
434
450
  if yaml_info[i] is None:
@@ -516,26 +532,26 @@ def entrypoint_context(name: str, fallback: bool = False):
516
532
 
517
533
 
518
534
  T = typing.TypeVar('T')
535
+ P = ParamSpec('P')
519
536
 
520
537
 
521
538
  @typing.overload
522
539
  def entrypoint(
523
540
  name_or_fn: str,
524
- fallback: bool = False
525
- ) -> Callable[[Callable[..., T]], Callable[..., T]]:
541
+ fallback: bool = False) -> Callable[[Callable[P, T]], Callable[P, T]]:
526
542
  ...
527
543
 
528
544
 
529
545
  @typing.overload
530
- def entrypoint(name_or_fn: Callable[..., T],
531
- fallback: bool = False) -> Callable[..., T]:
546
+ def entrypoint(name_or_fn: Callable[P, T],
547
+ fallback: bool = False) -> Callable[P, T]:
532
548
  ...
533
549
 
534
550
 
535
551
  def entrypoint(
536
- name_or_fn: Union[str, Callable[..., T]],
552
+ name_or_fn: Union[str, Callable[P, T]],
537
553
  fallback: bool = False
538
- ) -> Union[Callable[..., T], Callable[[Callable[..., T]], Callable[..., T]]]:
554
+ ) -> Union[Callable[P, T], Callable[[Callable[P, T]], Callable[P, T]]]:
539
555
  return common_utils.make_decorator(entrypoint_context,
540
556
  name_or_fn,
541
557
  fallback=fallback)
sky/users/__init__.py ADDED
File without changes
sky/users/model.conf ADDED
@@ -0,0 +1,15 @@
1
+ # rbac_model.conf
2
+ [request_definition]
3
+ r = sub, obj, act
4
+
5
+ [policy_definition]
6
+ p = sub, obj, act
7
+
8
+ [role_definition]
9
+ g = _, _
10
+
11
+ [policy_effect]
12
+ e = some(where (p.eft == allow))
13
+
14
+ [matchers]
15
+ m = (g(r.sub, p.sub)|| p.sub == '*') && r.obj == p.obj && r.act == p.act