skypilot-nightly 1.0.0.dev20250905__py3-none-any.whl → 1.0.0.dev20251203__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (397) hide show
  1. sky/__init__.py +10 -2
  2. sky/adaptors/aws.py +81 -16
  3. sky/adaptors/common.py +25 -2
  4. sky/adaptors/coreweave.py +278 -0
  5. sky/adaptors/do.py +8 -2
  6. sky/adaptors/gcp.py +11 -0
  7. sky/adaptors/ibm.py +5 -2
  8. sky/adaptors/kubernetes.py +64 -0
  9. sky/adaptors/nebius.py +3 -1
  10. sky/adaptors/primeintellect.py +1 -0
  11. sky/adaptors/seeweb.py +183 -0
  12. sky/adaptors/shadeform.py +89 -0
  13. sky/admin_policy.py +20 -0
  14. sky/authentication.py +157 -263
  15. sky/backends/__init__.py +3 -2
  16. sky/backends/backend.py +11 -3
  17. sky/backends/backend_utils.py +588 -184
  18. sky/backends/cloud_vm_ray_backend.py +1088 -904
  19. sky/backends/local_docker_backend.py +9 -5
  20. sky/backends/task_codegen.py +633 -0
  21. sky/backends/wheel_utils.py +18 -0
  22. sky/catalog/__init__.py +8 -0
  23. sky/catalog/aws_catalog.py +4 -0
  24. sky/catalog/common.py +19 -1
  25. sky/catalog/data_fetchers/fetch_aws.py +102 -80
  26. sky/catalog/data_fetchers/fetch_gcp.py +30 -3
  27. sky/catalog/data_fetchers/fetch_nebius.py +9 -6
  28. sky/catalog/data_fetchers/fetch_runpod.py +698 -0
  29. sky/catalog/data_fetchers/fetch_seeweb.py +329 -0
  30. sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
  31. sky/catalog/kubernetes_catalog.py +24 -28
  32. sky/catalog/primeintellect_catalog.py +95 -0
  33. sky/catalog/runpod_catalog.py +5 -1
  34. sky/catalog/seeweb_catalog.py +184 -0
  35. sky/catalog/shadeform_catalog.py +165 -0
  36. sky/check.py +73 -43
  37. sky/client/cli/command.py +675 -412
  38. sky/client/cli/flags.py +4 -2
  39. sky/{volumes/utils.py → client/cli/table_utils.py} +111 -13
  40. sky/client/cli/utils.py +79 -0
  41. sky/client/common.py +12 -2
  42. sky/client/sdk.py +132 -63
  43. sky/client/sdk_async.py +34 -33
  44. sky/cloud_stores.py +82 -3
  45. sky/clouds/__init__.py +6 -0
  46. sky/clouds/aws.py +337 -129
  47. sky/clouds/azure.py +24 -18
  48. sky/clouds/cloud.py +40 -13
  49. sky/clouds/cudo.py +16 -13
  50. sky/clouds/do.py +9 -7
  51. sky/clouds/fluidstack.py +12 -5
  52. sky/clouds/gcp.py +14 -7
  53. sky/clouds/hyperbolic.py +12 -5
  54. sky/clouds/ibm.py +12 -5
  55. sky/clouds/kubernetes.py +80 -45
  56. sky/clouds/lambda_cloud.py +12 -5
  57. sky/clouds/nebius.py +23 -9
  58. sky/clouds/oci.py +19 -12
  59. sky/clouds/paperspace.py +4 -1
  60. sky/clouds/primeintellect.py +317 -0
  61. sky/clouds/runpod.py +85 -24
  62. sky/clouds/scp.py +12 -8
  63. sky/clouds/seeweb.py +477 -0
  64. sky/clouds/shadeform.py +400 -0
  65. sky/clouds/ssh.py +4 -2
  66. sky/clouds/utils/scp_utils.py +61 -50
  67. sky/clouds/vast.py +33 -27
  68. sky/clouds/vsphere.py +14 -16
  69. sky/core.py +174 -165
  70. sky/dashboard/out/404.html +1 -1
  71. sky/dashboard/out/_next/static/96_E2yl3QAiIJGOYCkSpB/_buildManifest.js +1 -0
  72. sky/dashboard/out/_next/static/chunks/1141-e6aa9ab418717c59.js +11 -0
  73. sky/dashboard/out/_next/static/chunks/1871-7e202677c42f43fe.js +6 -0
  74. sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +1 -0
  75. sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +15 -0
  76. sky/dashboard/out/_next/static/chunks/2755.edd818326d489a1d.js +26 -0
  77. sky/dashboard/out/_next/static/chunks/3294.20a8540fe697d5ee.js +1 -0
  78. sky/dashboard/out/_next/static/chunks/3785.7e245f318f9d1121.js +1 -0
  79. sky/dashboard/out/_next/static/chunks/{6601-06114c982db410b6.js → 3800-7b45f9fbb6308557.js} +1 -1
  80. sky/dashboard/out/_next/static/chunks/4725.172ede95d1b21022.js +1 -0
  81. sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +15 -0
  82. sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +13 -0
  83. sky/dashboard/out/_next/static/chunks/6856-8f27d1c10c98def8.js +1 -0
  84. sky/dashboard/out/_next/static/chunks/6990-9146207c4567fdfd.js +1 -0
  85. sky/dashboard/out/_next/static/chunks/7359-c8d04e06886000b3.js +30 -0
  86. sky/dashboard/out/_next/static/chunks/7615-019513abc55b3b47.js +1 -0
  87. sky/dashboard/out/_next/static/chunks/8640.5b9475a2d18c5416.js +16 -0
  88. sky/dashboard/out/_next/static/chunks/8969-452f9d5cbdd2dc73.js +1 -0
  89. sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +6 -0
  90. sky/dashboard/out/_next/static/chunks/9353-cff34f7e773b2e2b.js +1 -0
  91. sky/dashboard/out/_next/static/chunks/9360.a536cf6b1fa42355.js +31 -0
  92. sky/dashboard/out/_next/static/chunks/9847.3aaca6bb33455140.js +30 -0
  93. sky/dashboard/out/_next/static/chunks/pages/{_app-ce361c6959bc2001.js → _app-bde01e4a2beec258.js} +1 -1
  94. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-792db96d918c98c9.js +16 -0
  95. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-abfcac9c137aa543.js +1 -0
  96. sky/dashboard/out/_next/static/chunks/pages/clusters-ee39056f9851a3ff.js +1 -0
  97. sky/dashboard/out/_next/static/chunks/pages/infra/{[context]-6563820e094f68ca.js → [context]-c0b5935149902e6f.js} +1 -1
  98. sky/dashboard/out/_next/static/chunks/pages/{infra-aabba60d57826e0f.js → infra-aed0ea19df7cf961.js} +1 -1
  99. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-d66997e2bfc837cf.js +16 -0
  100. sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-9faf940b253e3e06.js +21 -0
  101. sky/dashboard/out/_next/static/chunks/pages/jobs-2072b48b617989c9.js +1 -0
  102. sky/dashboard/out/_next/static/chunks/pages/users-f42674164aa73423.js +1 -0
  103. sky/dashboard/out/_next/static/chunks/pages/volumes-b84b948ff357c43e.js +1 -0
  104. sky/dashboard/out/_next/static/chunks/pages/workspaces/{[name]-af76bb06dbb3954f.js → [name]-84a40f8c7c627fe4.js} +1 -1
  105. sky/dashboard/out/_next/static/chunks/pages/{workspaces-7598c33a746cdc91.js → workspaces-531b2f8c4bf89f82.js} +1 -1
  106. sky/dashboard/out/_next/static/chunks/webpack-64e05f17bf2cf8ce.js +1 -0
  107. sky/dashboard/out/_next/static/css/0748ce22df867032.css +3 -0
  108. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  109. sky/dashboard/out/clusters/[cluster].html +1 -1
  110. sky/dashboard/out/clusters.html +1 -1
  111. sky/dashboard/out/config.html +1 -1
  112. sky/dashboard/out/index.html +1 -1
  113. sky/dashboard/out/infra/[context].html +1 -1
  114. sky/dashboard/out/infra.html +1 -1
  115. sky/dashboard/out/jobs/[job].html +1 -1
  116. sky/dashboard/out/jobs/pools/[pool].html +1 -1
  117. sky/dashboard/out/jobs.html +1 -1
  118. sky/dashboard/out/users.html +1 -1
  119. sky/dashboard/out/volumes.html +1 -1
  120. sky/dashboard/out/workspace/new.html +1 -1
  121. sky/dashboard/out/workspaces/[name].html +1 -1
  122. sky/dashboard/out/workspaces.html +1 -1
  123. sky/data/data_utils.py +92 -1
  124. sky/data/mounting_utils.py +162 -29
  125. sky/data/storage.py +200 -19
  126. sky/data/storage_utils.py +10 -45
  127. sky/exceptions.py +18 -7
  128. sky/execution.py +74 -31
  129. sky/global_user_state.py +605 -191
  130. sky/jobs/__init__.py +2 -0
  131. sky/jobs/client/sdk.py +101 -4
  132. sky/jobs/client/sdk_async.py +31 -5
  133. sky/jobs/constants.py +15 -8
  134. sky/jobs/controller.py +726 -284
  135. sky/jobs/file_content_utils.py +128 -0
  136. sky/jobs/log_gc.py +193 -0
  137. sky/jobs/recovery_strategy.py +250 -100
  138. sky/jobs/scheduler.py +271 -173
  139. sky/jobs/server/core.py +367 -114
  140. sky/jobs/server/server.py +81 -35
  141. sky/jobs/server/utils.py +89 -35
  142. sky/jobs/state.py +1498 -620
  143. sky/jobs/utils.py +771 -306
  144. sky/logs/agent.py +40 -5
  145. sky/logs/aws.py +9 -19
  146. sky/metrics/utils.py +282 -39
  147. sky/optimizer.py +1 -1
  148. sky/provision/__init__.py +37 -1
  149. sky/provision/aws/config.py +34 -13
  150. sky/provision/aws/instance.py +5 -2
  151. sky/provision/azure/instance.py +5 -3
  152. sky/provision/common.py +2 -0
  153. sky/provision/cudo/instance.py +4 -3
  154. sky/provision/do/instance.py +4 -3
  155. sky/provision/docker_utils.py +97 -26
  156. sky/provision/fluidstack/instance.py +6 -5
  157. sky/provision/gcp/config.py +6 -1
  158. sky/provision/gcp/instance.py +4 -2
  159. sky/provision/hyperbolic/instance.py +4 -2
  160. sky/provision/instance_setup.py +66 -20
  161. sky/provision/kubernetes/__init__.py +2 -0
  162. sky/provision/kubernetes/config.py +7 -44
  163. sky/provision/kubernetes/constants.py +0 -1
  164. sky/provision/kubernetes/instance.py +609 -213
  165. sky/provision/kubernetes/manifests/fusermount-server-daemonset.yaml +1 -2
  166. sky/provision/kubernetes/network.py +12 -8
  167. sky/provision/kubernetes/network_utils.py +8 -25
  168. sky/provision/kubernetes/utils.py +382 -418
  169. sky/provision/kubernetes/volume.py +150 -18
  170. sky/provision/lambda_cloud/instance.py +16 -13
  171. sky/provision/nebius/instance.py +6 -2
  172. sky/provision/nebius/utils.py +103 -86
  173. sky/provision/oci/instance.py +4 -2
  174. sky/provision/paperspace/instance.py +4 -3
  175. sky/provision/primeintellect/__init__.py +10 -0
  176. sky/provision/primeintellect/config.py +11 -0
  177. sky/provision/primeintellect/instance.py +454 -0
  178. sky/provision/primeintellect/utils.py +398 -0
  179. sky/provision/provisioner.py +30 -9
  180. sky/provision/runpod/__init__.py +2 -0
  181. sky/provision/runpod/instance.py +4 -3
  182. sky/provision/runpod/volume.py +69 -13
  183. sky/provision/scp/instance.py +307 -130
  184. sky/provision/seeweb/__init__.py +11 -0
  185. sky/provision/seeweb/config.py +13 -0
  186. sky/provision/seeweb/instance.py +812 -0
  187. sky/provision/shadeform/__init__.py +11 -0
  188. sky/provision/shadeform/config.py +12 -0
  189. sky/provision/shadeform/instance.py +351 -0
  190. sky/provision/shadeform/shadeform_utils.py +83 -0
  191. sky/provision/vast/instance.py +5 -3
  192. sky/provision/volume.py +164 -0
  193. sky/provision/vsphere/common/ssl_helper.py +1 -1
  194. sky/provision/vsphere/common/vapiconnect.py +2 -1
  195. sky/provision/vsphere/common/vim_utils.py +3 -2
  196. sky/provision/vsphere/instance.py +8 -6
  197. sky/provision/vsphere/vsphere_utils.py +8 -1
  198. sky/resources.py +11 -3
  199. sky/schemas/api/responses.py +107 -6
  200. sky/schemas/db/global_user_state/008_skylet_ssh_tunnel_metadata.py +34 -0
  201. sky/schemas/db/global_user_state/009_last_activity_and_launched_at.py +89 -0
  202. sky/schemas/db/global_user_state/010_save_ssh_key.py +66 -0
  203. sky/schemas/db/global_user_state/011_is_ephemeral.py +34 -0
  204. sky/schemas/db/kv_cache/001_initial_schema.py +29 -0
  205. sky/schemas/db/serve_state/002_yaml_content.py +34 -0
  206. sky/schemas/db/skypilot_config/001_initial_schema.py +30 -0
  207. sky/schemas/db/spot_jobs/002_cluster_pool.py +3 -3
  208. sky/schemas/db/spot_jobs/004_job_file_contents.py +42 -0
  209. sky/schemas/db/spot_jobs/005_logs_gc.py +38 -0
  210. sky/schemas/db/spot_jobs/006_controller_pid_started_at.py +34 -0
  211. sky/schemas/db/spot_jobs/007_config_file_content.py +34 -0
  212. sky/schemas/generated/jobsv1_pb2.py +86 -0
  213. sky/schemas/generated/jobsv1_pb2.pyi +254 -0
  214. sky/schemas/generated/jobsv1_pb2_grpc.py +542 -0
  215. sky/schemas/generated/managed_jobsv1_pb2.py +76 -0
  216. sky/schemas/generated/managed_jobsv1_pb2.pyi +278 -0
  217. sky/schemas/generated/managed_jobsv1_pb2_grpc.py +278 -0
  218. sky/schemas/generated/servev1_pb2.py +58 -0
  219. sky/schemas/generated/servev1_pb2.pyi +115 -0
  220. sky/schemas/generated/servev1_pb2_grpc.py +322 -0
  221. sky/serve/autoscalers.py +2 -0
  222. sky/serve/client/impl.py +55 -21
  223. sky/serve/constants.py +4 -3
  224. sky/serve/controller.py +17 -11
  225. sky/serve/load_balancing_policies.py +1 -1
  226. sky/serve/replica_managers.py +219 -142
  227. sky/serve/serve_rpc_utils.py +179 -0
  228. sky/serve/serve_state.py +63 -54
  229. sky/serve/serve_utils.py +145 -109
  230. sky/serve/server/core.py +46 -25
  231. sky/serve/server/impl.py +311 -162
  232. sky/serve/server/server.py +21 -19
  233. sky/serve/service.py +84 -68
  234. sky/serve/service_spec.py +45 -7
  235. sky/server/auth/loopback.py +38 -0
  236. sky/server/auth/oauth2_proxy.py +12 -7
  237. sky/server/common.py +47 -24
  238. sky/server/config.py +62 -28
  239. sky/server/constants.py +9 -1
  240. sky/server/daemons.py +109 -38
  241. sky/server/metrics.py +76 -96
  242. sky/server/middleware_utils.py +166 -0
  243. sky/server/requests/executor.py +381 -145
  244. sky/server/requests/payloads.py +71 -18
  245. sky/server/requests/preconditions.py +15 -13
  246. sky/server/requests/request_names.py +121 -0
  247. sky/server/requests/requests.py +507 -157
  248. sky/server/requests/serializers/decoders.py +48 -17
  249. sky/server/requests/serializers/encoders.py +85 -20
  250. sky/server/requests/threads.py +117 -0
  251. sky/server/rest.py +116 -24
  252. sky/server/server.py +420 -172
  253. sky/server/stream_utils.py +219 -45
  254. sky/server/uvicorn.py +30 -19
  255. sky/setup_files/MANIFEST.in +6 -1
  256. sky/setup_files/alembic.ini +8 -0
  257. sky/setup_files/dependencies.py +62 -19
  258. sky/setup_files/setup.py +44 -44
  259. sky/sky_logging.py +13 -5
  260. sky/skylet/attempt_skylet.py +106 -24
  261. sky/skylet/configs.py +3 -1
  262. sky/skylet/constants.py +111 -26
  263. sky/skylet/events.py +64 -10
  264. sky/skylet/job_lib.py +141 -104
  265. sky/skylet/log_lib.py +233 -5
  266. sky/skylet/log_lib.pyi +40 -2
  267. sky/skylet/providers/ibm/node_provider.py +12 -8
  268. sky/skylet/providers/ibm/vpc_provider.py +13 -12
  269. sky/skylet/runtime_utils.py +21 -0
  270. sky/skylet/services.py +524 -0
  271. sky/skylet/skylet.py +22 -1
  272. sky/skylet/subprocess_daemon.py +104 -29
  273. sky/skypilot_config.py +99 -79
  274. sky/ssh_node_pools/server.py +9 -8
  275. sky/task.py +221 -104
  276. sky/templates/aws-ray.yml.j2 +1 -0
  277. sky/templates/azure-ray.yml.j2 +1 -0
  278. sky/templates/cudo-ray.yml.j2 +1 -0
  279. sky/templates/do-ray.yml.j2 +1 -0
  280. sky/templates/fluidstack-ray.yml.j2 +1 -0
  281. sky/templates/gcp-ray.yml.j2 +1 -0
  282. sky/templates/hyperbolic-ray.yml.j2 +1 -0
  283. sky/templates/ibm-ray.yml.j2 +2 -1
  284. sky/templates/jobs-controller.yaml.j2 +3 -0
  285. sky/templates/kubernetes-ray.yml.j2 +196 -55
  286. sky/templates/lambda-ray.yml.j2 +1 -0
  287. sky/templates/nebius-ray.yml.j2 +3 -0
  288. sky/templates/oci-ray.yml.j2 +1 -0
  289. sky/templates/paperspace-ray.yml.j2 +1 -0
  290. sky/templates/primeintellect-ray.yml.j2 +72 -0
  291. sky/templates/runpod-ray.yml.j2 +1 -0
  292. sky/templates/scp-ray.yml.j2 +1 -0
  293. sky/templates/seeweb-ray.yml.j2 +171 -0
  294. sky/templates/shadeform-ray.yml.j2 +73 -0
  295. sky/templates/vast-ray.yml.j2 +1 -0
  296. sky/templates/vsphere-ray.yml.j2 +1 -0
  297. sky/templates/websocket_proxy.py +188 -43
  298. sky/usage/usage_lib.py +16 -4
  299. sky/users/permission.py +60 -43
  300. sky/utils/accelerator_registry.py +6 -3
  301. sky/utils/admin_policy_utils.py +18 -5
  302. sky/utils/annotations.py +22 -0
  303. sky/utils/asyncio_utils.py +78 -0
  304. sky/utils/atomic.py +1 -1
  305. sky/utils/auth_utils.py +153 -0
  306. sky/utils/cli_utils/status_utils.py +12 -7
  307. sky/utils/cluster_utils.py +28 -6
  308. sky/utils/command_runner.py +88 -27
  309. sky/utils/command_runner.pyi +36 -3
  310. sky/utils/common.py +3 -1
  311. sky/utils/common_utils.py +37 -4
  312. sky/utils/config_utils.py +1 -14
  313. sky/utils/context.py +127 -40
  314. sky/utils/context_utils.py +73 -18
  315. sky/utils/controller_utils.py +229 -70
  316. sky/utils/db/db_utils.py +95 -18
  317. sky/utils/db/kv_cache.py +149 -0
  318. sky/utils/db/migration_utils.py +24 -7
  319. sky/utils/env_options.py +4 -0
  320. sky/utils/git.py +559 -1
  321. sky/utils/kubernetes/create_cluster.sh +15 -30
  322. sky/utils/kubernetes/delete_cluster.sh +10 -7
  323. sky/utils/kubernetes/{deploy_remote_cluster.py → deploy_ssh_node_pools.py} +258 -380
  324. sky/utils/kubernetes/generate_kind_config.py +6 -66
  325. sky/utils/kubernetes/gpu_labeler.py +13 -3
  326. sky/utils/kubernetes/k8s_gpu_labeler_job.yaml +2 -1
  327. sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml +16 -16
  328. sky/utils/kubernetes/kubernetes_deploy_utils.py +213 -194
  329. sky/utils/kubernetes/rsync_helper.sh +11 -3
  330. sky/utils/kubernetes_enums.py +7 -15
  331. sky/utils/lock_events.py +4 -4
  332. sky/utils/locks.py +128 -31
  333. sky/utils/log_utils.py +0 -319
  334. sky/utils/resource_checker.py +13 -10
  335. sky/utils/resources_utils.py +53 -29
  336. sky/utils/rich_utils.py +8 -4
  337. sky/utils/schemas.py +107 -52
  338. sky/utils/subprocess_utils.py +17 -4
  339. sky/utils/thread_utils.py +91 -0
  340. sky/utils/timeline.py +2 -1
  341. sky/utils/ux_utils.py +35 -1
  342. sky/utils/volume.py +88 -4
  343. sky/utils/yaml_utils.py +9 -0
  344. sky/volumes/client/sdk.py +48 -10
  345. sky/volumes/server/core.py +59 -22
  346. sky/volumes/server/server.py +46 -17
  347. sky/volumes/volume.py +54 -42
  348. sky/workspaces/core.py +57 -21
  349. sky/workspaces/server.py +13 -12
  350. sky_templates/README.md +3 -0
  351. sky_templates/__init__.py +3 -0
  352. sky_templates/ray/__init__.py +0 -0
  353. sky_templates/ray/start_cluster +183 -0
  354. sky_templates/ray/stop_cluster +75 -0
  355. {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/METADATA +331 -65
  356. skypilot_nightly-1.0.0.dev20251203.dist-info/RECORD +611 -0
  357. skypilot_nightly-1.0.0.dev20251203.dist-info/top_level.txt +2 -0
  358. sky/client/cli/git.py +0 -549
  359. sky/dashboard/out/_next/static/chunks/1121-408ed10b2f9fce17.js +0 -1
  360. sky/dashboard/out/_next/static/chunks/1141-943efc7aff0f0c06.js +0 -1
  361. sky/dashboard/out/_next/static/chunks/1836-37fede578e2da5f8.js +0 -40
  362. sky/dashboard/out/_next/static/chunks/3015-86cabed5d4669ad0.js +0 -1
  363. sky/dashboard/out/_next/static/chunks/3294.c80326aec9bfed40.js +0 -6
  364. sky/dashboard/out/_next/static/chunks/3785.4872a2f3aa489880.js +0 -1
  365. sky/dashboard/out/_next/static/chunks/4045.b30465273dc5e468.js +0 -21
  366. sky/dashboard/out/_next/static/chunks/4676-9da7fdbde90b5549.js +0 -10
  367. sky/dashboard/out/_next/static/chunks/4725.10f7a9a5d3ea8208.js +0 -1
  368. sky/dashboard/out/_next/static/chunks/5339.3fda4a4010ff4e06.js +0 -51
  369. sky/dashboard/out/_next/static/chunks/6135-4b4d5e824b7f9d3c.js +0 -1
  370. sky/dashboard/out/_next/static/chunks/649.b9d7f7d10c1b8c53.js +0 -45
  371. sky/dashboard/out/_next/static/chunks/6856-dca7962af4814e1b.js +0 -1
  372. sky/dashboard/out/_next/static/chunks/6990-08b2a1cae076a943.js +0 -1
  373. sky/dashboard/out/_next/static/chunks/7325.b4bc99ce0892dcd5.js +0 -6
  374. sky/dashboard/out/_next/static/chunks/754-d0da8ab45f9509e9.js +0 -18
  375. sky/dashboard/out/_next/static/chunks/7669.1f5d9a402bf5cc42.js +0 -36
  376. sky/dashboard/out/_next/static/chunks/8969-0be3036bf86f8256.js +0 -1
  377. sky/dashboard/out/_next/static/chunks/9025.c12318fb6a1a9093.js +0 -6
  378. sky/dashboard/out/_next/static/chunks/9037-fa1737818d0a0969.js +0 -6
  379. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-1cbba24bd1bd35f8.js +0 -16
  380. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-0b4b35dc1dfe046c.js +0 -16
  381. sky/dashboard/out/_next/static/chunks/pages/clusters-469814d711d63b1b.js +0 -1
  382. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-dd64309c3fe67ed2.js +0 -11
  383. sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-07349868f7905d37.js +0 -16
  384. sky/dashboard/out/_next/static/chunks/pages/jobs-1f70d9faa564804f.js +0 -1
  385. sky/dashboard/out/_next/static/chunks/pages/users-018bf31cda52e11b.js +0 -1
  386. sky/dashboard/out/_next/static/chunks/pages/volumes-739726d6b823f532.js +0 -1
  387. sky/dashboard/out/_next/static/chunks/webpack-4fe903277b57b523.js +0 -1
  388. sky/dashboard/out/_next/static/css/4614e06482d7309e.css +0 -3
  389. sky/dashboard/out/_next/static/mS-4qZPSkRuA1u-g2wQhg/_buildManifest.js +0 -1
  390. sky/templates/kubernetes-ssh-jump.yml.j2 +0 -94
  391. sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +0 -191
  392. skypilot_nightly-1.0.0.dev20250905.dist-info/RECORD +0 -547
  393. skypilot_nightly-1.0.0.dev20250905.dist-info/top_level.txt +0 -1
  394. /sky/dashboard/out/_next/static/{mS-4qZPSkRuA1u-g2wQhg → 96_E2yl3QAiIJGOYCkSpB}/_ssgManifest.js +0 -0
  395. {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/WHEEL +0 -0
  396. {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/entry_points.txt +0 -0
  397. {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,179 @@
1
+ """Rpc Utilities for SkyServe"""
2
+
3
+ import typing
4
+ from typing import Any, Dict, List, Optional, Tuple
5
+
6
+ from sky import backends
7
+ from sky.adaptors import common as adaptors_common
8
+ from sky.backends import backend_utils
9
+ from sky.serve import serve_utils
10
+
11
+ if typing.TYPE_CHECKING:
12
+ from sky.schemas.generated import servev1_pb2
13
+ else:
14
+ servev1_pb2 = adaptors_common.LazyImport(
15
+ 'sky.schemas.generated.servev1_pb2')
16
+
17
+ # ======================= gRPC Converters for Sky Serve =======================
18
+
19
+
20
+ class GetServiceStatusRequestConverter:
21
+ """Converter for GetServiceStatusRequest"""
22
+
23
+ @classmethod
24
+ def to_proto(cls, service_names: Optional[List[str]],
25
+ pool: bool) -> 'servev1_pb2.GetServiceStatusRequest':
26
+ request = servev1_pb2.GetServiceStatusRequest()
27
+ request.pool = pool
28
+ if service_names is not None:
29
+ request.service_names.names.extend(service_names)
30
+ return request
31
+
32
+ @classmethod
33
+ def from_proto(
34
+ cls, proto: 'servev1_pb2.GetServiceStatusRequest'
35
+ ) -> Tuple[Optional[List[str]], bool]:
36
+ pool = proto.pool
37
+ if proto.HasField('service_names'):
38
+ service_names = list(proto.service_names.names)
39
+ else:
40
+ service_names = None
41
+ return service_names, pool
42
+
43
+
44
+ class GetServiceStatusResponseConverter:
45
+ """Converter for GetServiceStatusResponse"""
46
+
47
+ @classmethod
48
+ def to_proto(
49
+ cls,
50
+ statuses: List[Dict[str,
51
+ str]]) -> 'servev1_pb2.GetServiceStatusResponse':
52
+ response = servev1_pb2.GetServiceStatusResponse()
53
+ for status in statuses:
54
+ added = response.statuses.add()
55
+ added.status.update(status)
56
+ return response
57
+
58
+ @classmethod
59
+ def from_proto(
60
+ cls, proto: 'servev1_pb2.GetServiceStatusResponse'
61
+ ) -> List[Dict[str, str]]:
62
+ pickled = [dict(status.status) for status in proto.statuses]
63
+ return pickled
64
+
65
+
66
+ class TerminateServicesRequestConverter:
67
+ """Converter for TerminateServicesRequest"""
68
+
69
+ @classmethod
70
+ def to_proto(cls, service_names: Optional[List[str]], purge: bool,
71
+ pool: bool) -> 'servev1_pb2.TerminateServicesRequest':
72
+ request = servev1_pb2.TerminateServicesRequest()
73
+ request.purge = purge
74
+ request.pool = pool
75
+ if service_names is not None:
76
+ request.service_names.names.extend(service_names)
77
+ return request
78
+
79
+ @classmethod
80
+ def from_proto(
81
+ cls, proto: 'servev1_pb2.TerminateServicesRequest'
82
+ ) -> Tuple[Optional[List[str]], bool, bool]:
83
+ purge = proto.purge
84
+ pool = proto.pool
85
+ if proto.HasField('service_names'):
86
+ service_names = list(proto.service_names.names)
87
+ else:
88
+ service_names = None
89
+ return service_names, purge, pool
90
+
91
+
92
+ # ========================= gRPC Runner for Sky Serve =========================
93
+
94
+
95
+ class RpcRunner:
96
+ """gRPC Runner for Sky Serve
97
+
98
+ The RPC runner does not catch errors, and assumes that backend handle has
99
+ grpc enabled.
100
+
101
+ Common exceptions raised:
102
+ exceptions.FetchClusterInfoError
103
+ exceptions.SkyletInternalError
104
+ grpc.RpcError
105
+ grpc.FutureTimeoutError
106
+ AssertionError
107
+ """
108
+
109
+ @classmethod
110
+ def get_service_status(cls, handle: backends.CloudVmRayResourceHandle,
111
+ service_names: Optional[List[str]],
112
+ pool: bool) -> List[Dict[str, Any]]:
113
+ assert handle.is_grpc_enabled_with_flag
114
+ request = GetServiceStatusRequestConverter.to_proto(service_names, pool)
115
+ response = backend_utils.invoke_skylet_with_retries(
116
+ lambda: backends.SkyletClient(handle.get_grpc_channel()
117
+ ).get_service_status(request))
118
+ pickled = GetServiceStatusResponseConverter.from_proto(response)
119
+ return serve_utils.unpickle_service_status(pickled)
120
+
121
+ @classmethod
122
+ def add_version(cls, handle: backends.CloudVmRayResourceHandle,
123
+ service_name: str) -> int:
124
+ assert handle.is_grpc_enabled_with_flag
125
+ request = servev1_pb2.AddVersionRequest(service_name=service_name)
126
+ response = backend_utils.invoke_skylet_with_retries(
127
+ lambda: backends.SkyletClient(handle.get_grpc_channel()
128
+ ).add_serve_version(request))
129
+ return response.version
130
+
131
+ @classmethod
132
+ def terminate_services(cls, handle: backends.CloudVmRayResourceHandle,
133
+ service_names: Optional[List[str]], purge: bool,
134
+ pool: bool) -> str:
135
+ assert handle.is_grpc_enabled_with_flag
136
+ request = TerminateServicesRequestConverter.to_proto(
137
+ service_names, purge, pool)
138
+ response = backend_utils.invoke_skylet_with_retries(
139
+ lambda: backends.SkyletClient(handle.get_grpc_channel()
140
+ ).terminate_services(request))
141
+ return response.message
142
+
143
+ @classmethod
144
+ def terminate_replica(cls, handle: backends.CloudVmRayResourceHandle,
145
+ service_name: str, replica_id: int,
146
+ purge: bool) -> str:
147
+ assert handle.is_grpc_enabled_with_flag
148
+ request = servev1_pb2.TerminateReplicaRequest(service_name=service_name,
149
+ replica_id=replica_id,
150
+ purge=purge)
151
+ response = backend_utils.invoke_skylet_with_retries(
152
+ lambda: backends.SkyletClient(handle.get_grpc_channel()
153
+ ).terminate_replica(request))
154
+ return response.message
155
+
156
+ @classmethod
157
+ def wait_service_registration(cls,
158
+ handle: backends.CloudVmRayResourceHandle,
159
+ service_name: str, job_id: int,
160
+ pool: bool) -> int:
161
+ assert handle.is_grpc_enabled_with_flag
162
+ request = servev1_pb2.WaitServiceRegistrationRequest(
163
+ service_name=service_name, job_id=job_id, pool=pool)
164
+ response = backend_utils.invoke_skylet_with_retries(
165
+ lambda: backends.SkyletClient(handle.get_grpc_channel()
166
+ ).wait_service_registration(request))
167
+ return response.lb_port
168
+
169
+ @classmethod
170
+ def update_service(cls, handle: backends.CloudVmRayResourceHandle,
171
+ service_name: str, version: int,
172
+ mode: serve_utils.UpdateMode, pool: bool) -> None:
173
+ assert handle.is_grpc_enabled_with_flag
174
+ request = servev1_pb2.UpdateServiceRequest(service_name=service_name,
175
+ version=version,
176
+ mode=mode.value,
177
+ pool=pool)
178
+ backend_utils.invoke_skylet_with_retries(lambda: backends.SkyletClient(
179
+ handle.get_grpc_channel()).update_service(request))
sky/serve/serve_state.py CHANGED
@@ -86,6 +86,7 @@ version_specs_table = sqlalchemy.Table(
86
86
  sqlalchemy.Column('service_name', sqlalchemy.Text, primary_key=True),
87
87
  sqlalchemy.Column('version', sqlalchemy.Integer, primary_key=True),
88
88
  sqlalchemy.Column('spec', sqlalchemy.LargeBinary),
89
+ sqlalchemy.Column('yaml_content', sqlalchemy.Text, server_default=None),
89
90
  )
90
91
 
91
92
  serve_ha_recovery_script_table = sqlalchemy.Table(
@@ -472,6 +473,7 @@ def _get_service_from_row(r: 'row.RowMapping') -> Dict[str, Any]:
472
473
  'controller_pid': r['controller_pid'],
473
474
  'hash': r['hash'],
474
475
  'entrypoint': r['entrypoint'],
476
+ 'yaml_content': r.get('yaml_content'),
475
477
  }
476
478
  latest_spec = get_spec(r['name'], current_version)
477
479
  if latest_spec is not None:
@@ -480,21 +482,48 @@ def _get_service_from_row(r: 'row.RowMapping') -> Dict[str, Any]:
480
482
  return record
481
483
 
482
484
 
485
+ def _build_services_with_latest_version_query(
486
+ service_name: Optional[str] = None) -> sqlalchemy.sql.Select:
487
+ """Builds a query joining services with their latest version and yaml.
488
+
489
+ Args:
490
+ service_name: If provided, filter to this service only.
491
+
492
+ Returns:
493
+ A SQLAlchemy selectable for fetching rows, including columns:
494
+ - max_version (latest version per service)
495
+ - services_table.*
496
+ - yaml_content (from version_specs_table for latest version)
497
+ """
498
+ subquery = sqlalchemy.select(
499
+ version_specs_table.c.service_name,
500
+ sqlalchemy.func.max(version_specs_table.c.version).label('max_version'),
501
+ ).group_by(version_specs_table.c.service_name).alias('v')
502
+
503
+ query = sqlalchemy.select(
504
+ subquery.c.max_version,
505
+ services_table,
506
+ version_specs_table.c.yaml_content,
507
+ ).select_from(
508
+ services_table.join(
509
+ subquery, services_table.c.name == subquery.c.service_name).join(
510
+ version_specs_table,
511
+ sqlalchemy.and_(
512
+ version_specs_table.c.service_name == services_table.c.name,
513
+ version_specs_table.c.version == subquery.c.max_version,
514
+ ),
515
+ ))
516
+ if service_name is not None:
517
+ query = query.where(services_table.c.name == service_name)
518
+ return query
519
+
520
+
483
521
  @init_db
484
522
  def get_services() -> List[Dict[str, Any]]:
485
523
  """Get all existing service records."""
486
524
  assert _SQLALCHEMY_ENGINE is not None
487
525
  with orm.Session(_SQLALCHEMY_ENGINE) as session:
488
- subquery = sqlalchemy.select(
489
- version_specs_table.c.service_name,
490
- sqlalchemy.func.max(
491
- version_specs_table.c.version).label('max_version')).group_by(
492
- version_specs_table.c.service_name).alias('v')
493
-
494
- query = sqlalchemy.select(
495
- subquery.c.max_version, services_table).select_from(
496
- services_table.join(
497
- subquery, services_table.c.name == subquery.c.service_name))
526
+ query = _build_services_with_latest_version_query()
498
527
  rows = session.execute(query).fetchall()
499
528
  records = []
500
529
  for row in rows:
@@ -517,20 +546,7 @@ def get_service_from_name(service_name: str) -> Optional[Dict[str, Any]]:
517
546
  """Get all existing service records."""
518
547
  assert _SQLALCHEMY_ENGINE is not None
519
548
  with orm.Session(_SQLALCHEMY_ENGINE) as session:
520
- subquery = sqlalchemy.select(
521
- version_specs_table.c.service_name,
522
- sqlalchemy.func.max(
523
- version_specs_table.c.version).label('max_version')
524
- ).where(version_specs_table.c.service_name == service_name).group_by(
525
- version_specs_table.c.service_name).alias('v')
526
-
527
- query = sqlalchemy.select(
528
- subquery.c.max_version, services_table).select_from(
529
- services_table.join(
530
- subquery,
531
- services_table.c.name == subquery.c.service_name)).where(
532
- services_table.c.name == service_name)
533
-
549
+ query = _build_services_with_latest_version_query(service_name)
534
550
  rows = session.execute(query).fetchall()
535
551
  for row in rows:
536
552
  return _get_service_from_row(row._mapping) # pylint: disable=protected-access
@@ -686,22 +702,6 @@ def total_number_terminating_replicas() -> int:
686
702
  return terminating_count
687
703
 
688
704
 
689
- @init_db
690
- def total_number_scheduled_to_terminate_replicas() -> int:
691
- """Returns the total number of terminating replicas."""
692
- assert _SQLALCHEMY_ENGINE is not None
693
- with orm.Session(_SQLALCHEMY_ENGINE) as session:
694
- rows = session.execute(sqlalchemy.select(
695
- replicas_table.c.replica_info)).fetchall()
696
- terminating_count = 0
697
- for row in rows:
698
- replica_info: 'replica_managers.ReplicaInfo' = pickle.loads(row[0])
699
- if (replica_info.status_property.sky_down_status ==
700
- common_utils.ProcessStatus.SCHEDULED):
701
- terminating_count += 1
702
- return terminating_count
703
-
704
-
705
705
  def get_replicas_at_status(
706
706
  service_name: str,
707
707
  status: ReplicaStatus,
@@ -737,7 +737,8 @@ def add_version(service_name: str) -> int:
737
737
 
738
738
  @init_db
739
739
  def add_or_update_version(service_name: str, version: int,
740
- spec: 'service_spec.SkyServiceSpec') -> None:
740
+ spec: 'service_spec.SkyServiceSpec',
741
+ yaml_content: str) -> None:
741
742
  assert _SQLALCHEMY_ENGINE is not None
742
743
  with orm.Session(_SQLALCHEMY_ENGINE) as session:
743
744
  if (_SQLALCHEMY_ENGINE.dialect.name ==
@@ -750,27 +751,22 @@ def add_or_update_version(service_name: str, version: int,
750
751
  raise ValueError('Unsupported database dialect')
751
752
 
752
753
  insert_stmt = insert_func(version_specs_table).values(
753
- service_name=service_name, version=version, spec=pickle.dumps(spec))
754
+ service_name=service_name,
755
+ version=version,
756
+ spec=pickle.dumps(spec),
757
+ yaml_content=yaml_content)
754
758
 
755
759
  insert_stmt = insert_stmt.on_conflict_do_update(
756
760
  index_elements=['service_name', 'version'],
757
- set_={'spec': insert_stmt.excluded.spec})
761
+ set_={
762
+ 'spec': insert_stmt.excluded.spec,
763
+ 'yaml_content': insert_stmt.excluded.yaml_content
764
+ })
758
765
 
759
766
  session.execute(insert_stmt)
760
767
  session.commit()
761
768
 
762
769
 
763
- @init_db
764
- def remove_service_versions(service_name: str) -> None:
765
- """Removes a replica from the database."""
766
- assert _SQLALCHEMY_ENGINE is not None
767
- with orm.Session(_SQLALCHEMY_ENGINE) as session:
768
- session.execute(
769
- sqlalchemy.delete(version_specs_table).where(
770
- version_specs_table.c.service_name == service_name))
771
- session.commit()
772
-
773
-
774
770
  @init_db
775
771
  def get_spec(service_name: str,
776
772
  version: int) -> Optional['service_spec.SkyServiceSpec']:
@@ -785,6 +781,19 @@ def get_spec(service_name: str,
785
781
  return pickle.loads(result[0]) if result else None
786
782
 
787
783
 
784
+ @init_db
785
+ def get_yaml_content(service_name: str, version: int) -> Optional[str]:
786
+ """Gets the yaml content of a version."""
787
+ assert _SQLALCHEMY_ENGINE is not None
788
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
789
+ result = session.execute(
790
+ sqlalchemy.select(version_specs_table.c.yaml_content).where(
791
+ sqlalchemy.and_(
792
+ version_specs_table.c.service_name == service_name,
793
+ version_specs_table.c.version == version))).fetchone()
794
+ return result[0] if result else None
795
+
796
+
788
797
  @init_db
789
798
  def delete_version(service_name: str, version: int) -> None:
790
799
  """Deletes a version from the database."""