skypilot-nightly 1.0.0.dev20250905__py3-none-any.whl → 1.0.0.dev20251203__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (397) hide show
  1. sky/__init__.py +10 -2
  2. sky/adaptors/aws.py +81 -16
  3. sky/adaptors/common.py +25 -2
  4. sky/adaptors/coreweave.py +278 -0
  5. sky/adaptors/do.py +8 -2
  6. sky/adaptors/gcp.py +11 -0
  7. sky/adaptors/ibm.py +5 -2
  8. sky/adaptors/kubernetes.py +64 -0
  9. sky/adaptors/nebius.py +3 -1
  10. sky/adaptors/primeintellect.py +1 -0
  11. sky/adaptors/seeweb.py +183 -0
  12. sky/adaptors/shadeform.py +89 -0
  13. sky/admin_policy.py +20 -0
  14. sky/authentication.py +157 -263
  15. sky/backends/__init__.py +3 -2
  16. sky/backends/backend.py +11 -3
  17. sky/backends/backend_utils.py +588 -184
  18. sky/backends/cloud_vm_ray_backend.py +1088 -904
  19. sky/backends/local_docker_backend.py +9 -5
  20. sky/backends/task_codegen.py +633 -0
  21. sky/backends/wheel_utils.py +18 -0
  22. sky/catalog/__init__.py +8 -0
  23. sky/catalog/aws_catalog.py +4 -0
  24. sky/catalog/common.py +19 -1
  25. sky/catalog/data_fetchers/fetch_aws.py +102 -80
  26. sky/catalog/data_fetchers/fetch_gcp.py +30 -3
  27. sky/catalog/data_fetchers/fetch_nebius.py +9 -6
  28. sky/catalog/data_fetchers/fetch_runpod.py +698 -0
  29. sky/catalog/data_fetchers/fetch_seeweb.py +329 -0
  30. sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
  31. sky/catalog/kubernetes_catalog.py +24 -28
  32. sky/catalog/primeintellect_catalog.py +95 -0
  33. sky/catalog/runpod_catalog.py +5 -1
  34. sky/catalog/seeweb_catalog.py +184 -0
  35. sky/catalog/shadeform_catalog.py +165 -0
  36. sky/check.py +73 -43
  37. sky/client/cli/command.py +675 -412
  38. sky/client/cli/flags.py +4 -2
  39. sky/{volumes/utils.py → client/cli/table_utils.py} +111 -13
  40. sky/client/cli/utils.py +79 -0
  41. sky/client/common.py +12 -2
  42. sky/client/sdk.py +132 -63
  43. sky/client/sdk_async.py +34 -33
  44. sky/cloud_stores.py +82 -3
  45. sky/clouds/__init__.py +6 -0
  46. sky/clouds/aws.py +337 -129
  47. sky/clouds/azure.py +24 -18
  48. sky/clouds/cloud.py +40 -13
  49. sky/clouds/cudo.py +16 -13
  50. sky/clouds/do.py +9 -7
  51. sky/clouds/fluidstack.py +12 -5
  52. sky/clouds/gcp.py +14 -7
  53. sky/clouds/hyperbolic.py +12 -5
  54. sky/clouds/ibm.py +12 -5
  55. sky/clouds/kubernetes.py +80 -45
  56. sky/clouds/lambda_cloud.py +12 -5
  57. sky/clouds/nebius.py +23 -9
  58. sky/clouds/oci.py +19 -12
  59. sky/clouds/paperspace.py +4 -1
  60. sky/clouds/primeintellect.py +317 -0
  61. sky/clouds/runpod.py +85 -24
  62. sky/clouds/scp.py +12 -8
  63. sky/clouds/seeweb.py +477 -0
  64. sky/clouds/shadeform.py +400 -0
  65. sky/clouds/ssh.py +4 -2
  66. sky/clouds/utils/scp_utils.py +61 -50
  67. sky/clouds/vast.py +33 -27
  68. sky/clouds/vsphere.py +14 -16
  69. sky/core.py +174 -165
  70. sky/dashboard/out/404.html +1 -1
  71. sky/dashboard/out/_next/static/96_E2yl3QAiIJGOYCkSpB/_buildManifest.js +1 -0
  72. sky/dashboard/out/_next/static/chunks/1141-e6aa9ab418717c59.js +11 -0
  73. sky/dashboard/out/_next/static/chunks/1871-7e202677c42f43fe.js +6 -0
  74. sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +1 -0
  75. sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +15 -0
  76. sky/dashboard/out/_next/static/chunks/2755.edd818326d489a1d.js +26 -0
  77. sky/dashboard/out/_next/static/chunks/3294.20a8540fe697d5ee.js +1 -0
  78. sky/dashboard/out/_next/static/chunks/3785.7e245f318f9d1121.js +1 -0
  79. sky/dashboard/out/_next/static/chunks/{6601-06114c982db410b6.js → 3800-7b45f9fbb6308557.js} +1 -1
  80. sky/dashboard/out/_next/static/chunks/4725.172ede95d1b21022.js +1 -0
  81. sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +15 -0
  82. sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +13 -0
  83. sky/dashboard/out/_next/static/chunks/6856-8f27d1c10c98def8.js +1 -0
  84. sky/dashboard/out/_next/static/chunks/6990-9146207c4567fdfd.js +1 -0
  85. sky/dashboard/out/_next/static/chunks/7359-c8d04e06886000b3.js +30 -0
  86. sky/dashboard/out/_next/static/chunks/7615-019513abc55b3b47.js +1 -0
  87. sky/dashboard/out/_next/static/chunks/8640.5b9475a2d18c5416.js +16 -0
  88. sky/dashboard/out/_next/static/chunks/8969-452f9d5cbdd2dc73.js +1 -0
  89. sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +6 -0
  90. sky/dashboard/out/_next/static/chunks/9353-cff34f7e773b2e2b.js +1 -0
  91. sky/dashboard/out/_next/static/chunks/9360.a536cf6b1fa42355.js +31 -0
  92. sky/dashboard/out/_next/static/chunks/9847.3aaca6bb33455140.js +30 -0
  93. sky/dashboard/out/_next/static/chunks/pages/{_app-ce361c6959bc2001.js → _app-bde01e4a2beec258.js} +1 -1
  94. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-792db96d918c98c9.js +16 -0
  95. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-abfcac9c137aa543.js +1 -0
  96. sky/dashboard/out/_next/static/chunks/pages/clusters-ee39056f9851a3ff.js +1 -0
  97. sky/dashboard/out/_next/static/chunks/pages/infra/{[context]-6563820e094f68ca.js → [context]-c0b5935149902e6f.js} +1 -1
  98. sky/dashboard/out/_next/static/chunks/pages/{infra-aabba60d57826e0f.js → infra-aed0ea19df7cf961.js} +1 -1
  99. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-d66997e2bfc837cf.js +16 -0
  100. sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-9faf940b253e3e06.js +21 -0
  101. sky/dashboard/out/_next/static/chunks/pages/jobs-2072b48b617989c9.js +1 -0
  102. sky/dashboard/out/_next/static/chunks/pages/users-f42674164aa73423.js +1 -0
  103. sky/dashboard/out/_next/static/chunks/pages/volumes-b84b948ff357c43e.js +1 -0
  104. sky/dashboard/out/_next/static/chunks/pages/workspaces/{[name]-af76bb06dbb3954f.js → [name]-84a40f8c7c627fe4.js} +1 -1
  105. sky/dashboard/out/_next/static/chunks/pages/{workspaces-7598c33a746cdc91.js → workspaces-531b2f8c4bf89f82.js} +1 -1
  106. sky/dashboard/out/_next/static/chunks/webpack-64e05f17bf2cf8ce.js +1 -0
  107. sky/dashboard/out/_next/static/css/0748ce22df867032.css +3 -0
  108. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  109. sky/dashboard/out/clusters/[cluster].html +1 -1
  110. sky/dashboard/out/clusters.html +1 -1
  111. sky/dashboard/out/config.html +1 -1
  112. sky/dashboard/out/index.html +1 -1
  113. sky/dashboard/out/infra/[context].html +1 -1
  114. sky/dashboard/out/infra.html +1 -1
  115. sky/dashboard/out/jobs/[job].html +1 -1
  116. sky/dashboard/out/jobs/pools/[pool].html +1 -1
  117. sky/dashboard/out/jobs.html +1 -1
  118. sky/dashboard/out/users.html +1 -1
  119. sky/dashboard/out/volumes.html +1 -1
  120. sky/dashboard/out/workspace/new.html +1 -1
  121. sky/dashboard/out/workspaces/[name].html +1 -1
  122. sky/dashboard/out/workspaces.html +1 -1
  123. sky/data/data_utils.py +92 -1
  124. sky/data/mounting_utils.py +162 -29
  125. sky/data/storage.py +200 -19
  126. sky/data/storage_utils.py +10 -45
  127. sky/exceptions.py +18 -7
  128. sky/execution.py +74 -31
  129. sky/global_user_state.py +605 -191
  130. sky/jobs/__init__.py +2 -0
  131. sky/jobs/client/sdk.py +101 -4
  132. sky/jobs/client/sdk_async.py +31 -5
  133. sky/jobs/constants.py +15 -8
  134. sky/jobs/controller.py +726 -284
  135. sky/jobs/file_content_utils.py +128 -0
  136. sky/jobs/log_gc.py +193 -0
  137. sky/jobs/recovery_strategy.py +250 -100
  138. sky/jobs/scheduler.py +271 -173
  139. sky/jobs/server/core.py +367 -114
  140. sky/jobs/server/server.py +81 -35
  141. sky/jobs/server/utils.py +89 -35
  142. sky/jobs/state.py +1498 -620
  143. sky/jobs/utils.py +771 -306
  144. sky/logs/agent.py +40 -5
  145. sky/logs/aws.py +9 -19
  146. sky/metrics/utils.py +282 -39
  147. sky/optimizer.py +1 -1
  148. sky/provision/__init__.py +37 -1
  149. sky/provision/aws/config.py +34 -13
  150. sky/provision/aws/instance.py +5 -2
  151. sky/provision/azure/instance.py +5 -3
  152. sky/provision/common.py +2 -0
  153. sky/provision/cudo/instance.py +4 -3
  154. sky/provision/do/instance.py +4 -3
  155. sky/provision/docker_utils.py +97 -26
  156. sky/provision/fluidstack/instance.py +6 -5
  157. sky/provision/gcp/config.py +6 -1
  158. sky/provision/gcp/instance.py +4 -2
  159. sky/provision/hyperbolic/instance.py +4 -2
  160. sky/provision/instance_setup.py +66 -20
  161. sky/provision/kubernetes/__init__.py +2 -0
  162. sky/provision/kubernetes/config.py +7 -44
  163. sky/provision/kubernetes/constants.py +0 -1
  164. sky/provision/kubernetes/instance.py +609 -213
  165. sky/provision/kubernetes/manifests/fusermount-server-daemonset.yaml +1 -2
  166. sky/provision/kubernetes/network.py +12 -8
  167. sky/provision/kubernetes/network_utils.py +8 -25
  168. sky/provision/kubernetes/utils.py +382 -418
  169. sky/provision/kubernetes/volume.py +150 -18
  170. sky/provision/lambda_cloud/instance.py +16 -13
  171. sky/provision/nebius/instance.py +6 -2
  172. sky/provision/nebius/utils.py +103 -86
  173. sky/provision/oci/instance.py +4 -2
  174. sky/provision/paperspace/instance.py +4 -3
  175. sky/provision/primeintellect/__init__.py +10 -0
  176. sky/provision/primeintellect/config.py +11 -0
  177. sky/provision/primeintellect/instance.py +454 -0
  178. sky/provision/primeintellect/utils.py +398 -0
  179. sky/provision/provisioner.py +30 -9
  180. sky/provision/runpod/__init__.py +2 -0
  181. sky/provision/runpod/instance.py +4 -3
  182. sky/provision/runpod/volume.py +69 -13
  183. sky/provision/scp/instance.py +307 -130
  184. sky/provision/seeweb/__init__.py +11 -0
  185. sky/provision/seeweb/config.py +13 -0
  186. sky/provision/seeweb/instance.py +812 -0
  187. sky/provision/shadeform/__init__.py +11 -0
  188. sky/provision/shadeform/config.py +12 -0
  189. sky/provision/shadeform/instance.py +351 -0
  190. sky/provision/shadeform/shadeform_utils.py +83 -0
  191. sky/provision/vast/instance.py +5 -3
  192. sky/provision/volume.py +164 -0
  193. sky/provision/vsphere/common/ssl_helper.py +1 -1
  194. sky/provision/vsphere/common/vapiconnect.py +2 -1
  195. sky/provision/vsphere/common/vim_utils.py +3 -2
  196. sky/provision/vsphere/instance.py +8 -6
  197. sky/provision/vsphere/vsphere_utils.py +8 -1
  198. sky/resources.py +11 -3
  199. sky/schemas/api/responses.py +107 -6
  200. sky/schemas/db/global_user_state/008_skylet_ssh_tunnel_metadata.py +34 -0
  201. sky/schemas/db/global_user_state/009_last_activity_and_launched_at.py +89 -0
  202. sky/schemas/db/global_user_state/010_save_ssh_key.py +66 -0
  203. sky/schemas/db/global_user_state/011_is_ephemeral.py +34 -0
  204. sky/schemas/db/kv_cache/001_initial_schema.py +29 -0
  205. sky/schemas/db/serve_state/002_yaml_content.py +34 -0
  206. sky/schemas/db/skypilot_config/001_initial_schema.py +30 -0
  207. sky/schemas/db/spot_jobs/002_cluster_pool.py +3 -3
  208. sky/schemas/db/spot_jobs/004_job_file_contents.py +42 -0
  209. sky/schemas/db/spot_jobs/005_logs_gc.py +38 -0
  210. sky/schemas/db/spot_jobs/006_controller_pid_started_at.py +34 -0
  211. sky/schemas/db/spot_jobs/007_config_file_content.py +34 -0
  212. sky/schemas/generated/jobsv1_pb2.py +86 -0
  213. sky/schemas/generated/jobsv1_pb2.pyi +254 -0
  214. sky/schemas/generated/jobsv1_pb2_grpc.py +542 -0
  215. sky/schemas/generated/managed_jobsv1_pb2.py +76 -0
  216. sky/schemas/generated/managed_jobsv1_pb2.pyi +278 -0
  217. sky/schemas/generated/managed_jobsv1_pb2_grpc.py +278 -0
  218. sky/schemas/generated/servev1_pb2.py +58 -0
  219. sky/schemas/generated/servev1_pb2.pyi +115 -0
  220. sky/schemas/generated/servev1_pb2_grpc.py +322 -0
  221. sky/serve/autoscalers.py +2 -0
  222. sky/serve/client/impl.py +55 -21
  223. sky/serve/constants.py +4 -3
  224. sky/serve/controller.py +17 -11
  225. sky/serve/load_balancing_policies.py +1 -1
  226. sky/serve/replica_managers.py +219 -142
  227. sky/serve/serve_rpc_utils.py +179 -0
  228. sky/serve/serve_state.py +63 -54
  229. sky/serve/serve_utils.py +145 -109
  230. sky/serve/server/core.py +46 -25
  231. sky/serve/server/impl.py +311 -162
  232. sky/serve/server/server.py +21 -19
  233. sky/serve/service.py +84 -68
  234. sky/serve/service_spec.py +45 -7
  235. sky/server/auth/loopback.py +38 -0
  236. sky/server/auth/oauth2_proxy.py +12 -7
  237. sky/server/common.py +47 -24
  238. sky/server/config.py +62 -28
  239. sky/server/constants.py +9 -1
  240. sky/server/daemons.py +109 -38
  241. sky/server/metrics.py +76 -96
  242. sky/server/middleware_utils.py +166 -0
  243. sky/server/requests/executor.py +381 -145
  244. sky/server/requests/payloads.py +71 -18
  245. sky/server/requests/preconditions.py +15 -13
  246. sky/server/requests/request_names.py +121 -0
  247. sky/server/requests/requests.py +507 -157
  248. sky/server/requests/serializers/decoders.py +48 -17
  249. sky/server/requests/serializers/encoders.py +85 -20
  250. sky/server/requests/threads.py +117 -0
  251. sky/server/rest.py +116 -24
  252. sky/server/server.py +420 -172
  253. sky/server/stream_utils.py +219 -45
  254. sky/server/uvicorn.py +30 -19
  255. sky/setup_files/MANIFEST.in +6 -1
  256. sky/setup_files/alembic.ini +8 -0
  257. sky/setup_files/dependencies.py +62 -19
  258. sky/setup_files/setup.py +44 -44
  259. sky/sky_logging.py +13 -5
  260. sky/skylet/attempt_skylet.py +106 -24
  261. sky/skylet/configs.py +3 -1
  262. sky/skylet/constants.py +111 -26
  263. sky/skylet/events.py +64 -10
  264. sky/skylet/job_lib.py +141 -104
  265. sky/skylet/log_lib.py +233 -5
  266. sky/skylet/log_lib.pyi +40 -2
  267. sky/skylet/providers/ibm/node_provider.py +12 -8
  268. sky/skylet/providers/ibm/vpc_provider.py +13 -12
  269. sky/skylet/runtime_utils.py +21 -0
  270. sky/skylet/services.py +524 -0
  271. sky/skylet/skylet.py +22 -1
  272. sky/skylet/subprocess_daemon.py +104 -29
  273. sky/skypilot_config.py +99 -79
  274. sky/ssh_node_pools/server.py +9 -8
  275. sky/task.py +221 -104
  276. sky/templates/aws-ray.yml.j2 +1 -0
  277. sky/templates/azure-ray.yml.j2 +1 -0
  278. sky/templates/cudo-ray.yml.j2 +1 -0
  279. sky/templates/do-ray.yml.j2 +1 -0
  280. sky/templates/fluidstack-ray.yml.j2 +1 -0
  281. sky/templates/gcp-ray.yml.j2 +1 -0
  282. sky/templates/hyperbolic-ray.yml.j2 +1 -0
  283. sky/templates/ibm-ray.yml.j2 +2 -1
  284. sky/templates/jobs-controller.yaml.j2 +3 -0
  285. sky/templates/kubernetes-ray.yml.j2 +196 -55
  286. sky/templates/lambda-ray.yml.j2 +1 -0
  287. sky/templates/nebius-ray.yml.j2 +3 -0
  288. sky/templates/oci-ray.yml.j2 +1 -0
  289. sky/templates/paperspace-ray.yml.j2 +1 -0
  290. sky/templates/primeintellect-ray.yml.j2 +72 -0
  291. sky/templates/runpod-ray.yml.j2 +1 -0
  292. sky/templates/scp-ray.yml.j2 +1 -0
  293. sky/templates/seeweb-ray.yml.j2 +171 -0
  294. sky/templates/shadeform-ray.yml.j2 +73 -0
  295. sky/templates/vast-ray.yml.j2 +1 -0
  296. sky/templates/vsphere-ray.yml.j2 +1 -0
  297. sky/templates/websocket_proxy.py +188 -43
  298. sky/usage/usage_lib.py +16 -4
  299. sky/users/permission.py +60 -43
  300. sky/utils/accelerator_registry.py +6 -3
  301. sky/utils/admin_policy_utils.py +18 -5
  302. sky/utils/annotations.py +22 -0
  303. sky/utils/asyncio_utils.py +78 -0
  304. sky/utils/atomic.py +1 -1
  305. sky/utils/auth_utils.py +153 -0
  306. sky/utils/cli_utils/status_utils.py +12 -7
  307. sky/utils/cluster_utils.py +28 -6
  308. sky/utils/command_runner.py +88 -27
  309. sky/utils/command_runner.pyi +36 -3
  310. sky/utils/common.py +3 -1
  311. sky/utils/common_utils.py +37 -4
  312. sky/utils/config_utils.py +1 -14
  313. sky/utils/context.py +127 -40
  314. sky/utils/context_utils.py +73 -18
  315. sky/utils/controller_utils.py +229 -70
  316. sky/utils/db/db_utils.py +95 -18
  317. sky/utils/db/kv_cache.py +149 -0
  318. sky/utils/db/migration_utils.py +24 -7
  319. sky/utils/env_options.py +4 -0
  320. sky/utils/git.py +559 -1
  321. sky/utils/kubernetes/create_cluster.sh +15 -30
  322. sky/utils/kubernetes/delete_cluster.sh +10 -7
  323. sky/utils/kubernetes/{deploy_remote_cluster.py → deploy_ssh_node_pools.py} +258 -380
  324. sky/utils/kubernetes/generate_kind_config.py +6 -66
  325. sky/utils/kubernetes/gpu_labeler.py +13 -3
  326. sky/utils/kubernetes/k8s_gpu_labeler_job.yaml +2 -1
  327. sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml +16 -16
  328. sky/utils/kubernetes/kubernetes_deploy_utils.py +213 -194
  329. sky/utils/kubernetes/rsync_helper.sh +11 -3
  330. sky/utils/kubernetes_enums.py +7 -15
  331. sky/utils/lock_events.py +4 -4
  332. sky/utils/locks.py +128 -31
  333. sky/utils/log_utils.py +0 -319
  334. sky/utils/resource_checker.py +13 -10
  335. sky/utils/resources_utils.py +53 -29
  336. sky/utils/rich_utils.py +8 -4
  337. sky/utils/schemas.py +107 -52
  338. sky/utils/subprocess_utils.py +17 -4
  339. sky/utils/thread_utils.py +91 -0
  340. sky/utils/timeline.py +2 -1
  341. sky/utils/ux_utils.py +35 -1
  342. sky/utils/volume.py +88 -4
  343. sky/utils/yaml_utils.py +9 -0
  344. sky/volumes/client/sdk.py +48 -10
  345. sky/volumes/server/core.py +59 -22
  346. sky/volumes/server/server.py +46 -17
  347. sky/volumes/volume.py +54 -42
  348. sky/workspaces/core.py +57 -21
  349. sky/workspaces/server.py +13 -12
  350. sky_templates/README.md +3 -0
  351. sky_templates/__init__.py +3 -0
  352. sky_templates/ray/__init__.py +0 -0
  353. sky_templates/ray/start_cluster +183 -0
  354. sky_templates/ray/stop_cluster +75 -0
  355. {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/METADATA +331 -65
  356. skypilot_nightly-1.0.0.dev20251203.dist-info/RECORD +611 -0
  357. skypilot_nightly-1.0.0.dev20251203.dist-info/top_level.txt +2 -0
  358. sky/client/cli/git.py +0 -549
  359. sky/dashboard/out/_next/static/chunks/1121-408ed10b2f9fce17.js +0 -1
  360. sky/dashboard/out/_next/static/chunks/1141-943efc7aff0f0c06.js +0 -1
  361. sky/dashboard/out/_next/static/chunks/1836-37fede578e2da5f8.js +0 -40
  362. sky/dashboard/out/_next/static/chunks/3015-86cabed5d4669ad0.js +0 -1
  363. sky/dashboard/out/_next/static/chunks/3294.c80326aec9bfed40.js +0 -6
  364. sky/dashboard/out/_next/static/chunks/3785.4872a2f3aa489880.js +0 -1
  365. sky/dashboard/out/_next/static/chunks/4045.b30465273dc5e468.js +0 -21
  366. sky/dashboard/out/_next/static/chunks/4676-9da7fdbde90b5549.js +0 -10
  367. sky/dashboard/out/_next/static/chunks/4725.10f7a9a5d3ea8208.js +0 -1
  368. sky/dashboard/out/_next/static/chunks/5339.3fda4a4010ff4e06.js +0 -51
  369. sky/dashboard/out/_next/static/chunks/6135-4b4d5e824b7f9d3c.js +0 -1
  370. sky/dashboard/out/_next/static/chunks/649.b9d7f7d10c1b8c53.js +0 -45
  371. sky/dashboard/out/_next/static/chunks/6856-dca7962af4814e1b.js +0 -1
  372. sky/dashboard/out/_next/static/chunks/6990-08b2a1cae076a943.js +0 -1
  373. sky/dashboard/out/_next/static/chunks/7325.b4bc99ce0892dcd5.js +0 -6
  374. sky/dashboard/out/_next/static/chunks/754-d0da8ab45f9509e9.js +0 -18
  375. sky/dashboard/out/_next/static/chunks/7669.1f5d9a402bf5cc42.js +0 -36
  376. sky/dashboard/out/_next/static/chunks/8969-0be3036bf86f8256.js +0 -1
  377. sky/dashboard/out/_next/static/chunks/9025.c12318fb6a1a9093.js +0 -6
  378. sky/dashboard/out/_next/static/chunks/9037-fa1737818d0a0969.js +0 -6
  379. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-1cbba24bd1bd35f8.js +0 -16
  380. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-0b4b35dc1dfe046c.js +0 -16
  381. sky/dashboard/out/_next/static/chunks/pages/clusters-469814d711d63b1b.js +0 -1
  382. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-dd64309c3fe67ed2.js +0 -11
  383. sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-07349868f7905d37.js +0 -16
  384. sky/dashboard/out/_next/static/chunks/pages/jobs-1f70d9faa564804f.js +0 -1
  385. sky/dashboard/out/_next/static/chunks/pages/users-018bf31cda52e11b.js +0 -1
  386. sky/dashboard/out/_next/static/chunks/pages/volumes-739726d6b823f532.js +0 -1
  387. sky/dashboard/out/_next/static/chunks/webpack-4fe903277b57b523.js +0 -1
  388. sky/dashboard/out/_next/static/css/4614e06482d7309e.css +0 -3
  389. sky/dashboard/out/_next/static/mS-4qZPSkRuA1u-g2wQhg/_buildManifest.js +0 -1
  390. sky/templates/kubernetes-ssh-jump.yml.j2 +0 -94
  391. sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +0 -191
  392. skypilot_nightly-1.0.0.dev20250905.dist-info/RECORD +0 -547
  393. skypilot_nightly-1.0.0.dev20250905.dist-info/top_level.txt +0 -1
  394. /sky/dashboard/out/_next/static/{mS-4qZPSkRuA1u-g2wQhg → 96_E2yl3QAiIJGOYCkSpB}/_ssgManifest.js +0 -0
  395. {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/WHEEL +0 -0
  396. {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/entry_points.txt +0 -0
  397. {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/licenses/LICENSE +0 -0
@@ -60,6 +60,11 @@ EXTERNAL_LOCAL_ENV_VARS = [
60
60
  'AWS_ACCESS_KEY_ID',
61
61
  'AWS_SECRET_ACCESS_KEY',
62
62
  'AWS_SESSION_TOKEN',
63
+ # Allow overriding the Azure authentication.
64
+ 'AZURE_CLIENT_ID',
65
+ 'AZURE_CLIENT_SECRET',
66
+ 'AZURE_TENANT_ID',
67
+ 'AZURE_SUBSCRIPTION_ID',
63
68
  # Allow overriding the GCP authentication.
64
69
  'GOOGLE_APPLICATION_CREDENTIALS',
65
70
  # Allow overriding the kubeconfig.
@@ -67,7 +72,6 @@ EXTERNAL_LOCAL_ENV_VARS = [
67
72
  ]
68
73
 
69
74
 
70
- @annotations.lru_cache(scope='global')
71
75
  def request_body_env_vars() -> dict:
72
76
  env_vars = {}
73
77
  for env_var in os.environ:
@@ -81,11 +85,19 @@ def request_body_env_vars() -> dict:
81
85
  env_vars[constants.USER_ENV_VAR] = common_utils.get_current_user_name()
82
86
  env_vars[
83
87
  usage_constants.USAGE_RUN_ID_ENV_VAR] = usage_lib.messages.usage.run_id
88
+ if not common.is_api_server_local():
89
+ # Used in job controller, for local API server, keep the
90
+ # SKYPILOT_CONFIG env var to use the config for the managed job.
91
+ env_vars.pop(skypilot_config.ENV_VAR_SKYPILOT_CONFIG, None)
84
92
  # Remove the path to config file, as the config content is included in the
85
93
  # request body and will be merged with the config on the server side.
86
- env_vars.pop(skypilot_config.ENV_VAR_SKYPILOT_CONFIG, None)
87
94
  env_vars.pop(skypilot_config.ENV_VAR_GLOBAL_CONFIG, None)
88
95
  env_vars.pop(skypilot_config.ENV_VAR_PROJECT_CONFIG, None)
96
+ # Remove the config related env vars, as the client config override
97
+ # should be passed in the request body.
98
+ # Any new environment variables that are server-specific should
99
+ # use SKYPILOT_SERVER_ENV_VAR_PREFIX.
100
+ env_vars.pop(constants.ENV_VAR_DB_CONNECTION_URI, None)
89
101
  return env_vars
90
102
 
91
103
 
@@ -309,8 +321,13 @@ class StatusBody(RequestBody):
309
321
  cluster_names: Optional[List[str]] = None
310
322
  refresh: common_lib.StatusRefreshMode = common_lib.StatusRefreshMode.NONE
311
323
  all_users: bool = True
312
- # TODO (kyuds): default to False post 0.10.5
324
+ # TODO (kyuds): default to False post 0.12.0
313
325
  include_credentials: bool = True
326
+ # Only return fields that are needed for the
327
+ # dashboard / CLI summary response
328
+ summary_response: bool = False
329
+ # Include the cluster handle in the response
330
+ include_handle: bool = True
314
331
 
315
332
 
316
333
  class StartBody(RequestBody):
@@ -355,9 +372,10 @@ class CancelBody(RequestBody):
355
372
  return kwargs
356
373
 
357
374
 
358
- class ClusterNameBody(RequestBody):
375
+ class ProvisionLogsBody(RequestBody):
359
376
  """Cluster node."""
360
377
  cluster_name: str
378
+ worker: Optional[int] = None
361
379
 
362
380
 
363
381
  class ClusterJobBody(RequestBody):
@@ -458,6 +476,7 @@ class VolumeApplyBody(RequestBody):
458
476
  size: Optional[str] = None
459
477
  config: Optional[Dict[str, Any]] = None
460
478
  labels: Optional[Dict[str, str]] = None
479
+ use_existing: Optional[bool] = None
461
480
 
462
481
 
463
482
  class VolumeDeleteBody(RequestBody):
@@ -470,6 +489,17 @@ class VolumeListBody(RequestBody):
470
489
  pass
471
490
 
472
491
 
492
+ class VolumeValidateBody(RequestBody):
493
+ """The request body for the volume validate endpoint."""
494
+ name: Optional[str] = None
495
+ volume_type: Optional[str] = None
496
+ infra: Optional[str] = None
497
+ size: Optional[str] = None
498
+ labels: Optional[Dict[str, str]] = None
499
+ config: Optional[Dict[str, Any]] = None
500
+ use_existing: Optional[bool] = None
501
+
502
+
473
503
  class EndpointsBody(RequestBody):
474
504
  """The request body for the endpoint."""
475
505
  cluster: str
@@ -507,6 +537,14 @@ class JobsQueueBody(RequestBody):
507
537
  skip_finished: bool = False
508
538
  all_users: bool = False
509
539
  job_ids: Optional[List[int]] = None
540
+
541
+
542
+ class JobsQueueV2Body(RequestBody):
543
+ """The request body for the jobs queue endpoint."""
544
+ refresh: bool = False
545
+ skip_finished: bool = False
546
+ all_users: bool = False
547
+ job_ids: Optional[List[int]] = None
510
548
  user_match: Optional[str] = None
511
549
  workspace_match: Optional[str] = None
512
550
  name_match: Optional[str] = None
@@ -514,6 +552,9 @@ class JobsQueueBody(RequestBody):
514
552
  page: Optional[int] = None
515
553
  limit: Optional[int] = None
516
554
  statuses: Optional[List[str]] = None
555
+ # The fields to return in the response.
556
+ # Refer to the fields in the `class ManagedJobRecord` in `response.py`
557
+ fields: Optional[List[str]] = None
517
558
 
518
559
 
519
560
  class JobsCancelBody(RequestBody):
@@ -546,6 +587,8 @@ class RequestStatusBody(pydantic.BaseModel):
546
587
  """The request body for the API request status endpoint."""
547
588
  request_ids: Optional[List[str]] = None
548
589
  all_status: bool = False
590
+ limit: Optional[int] = None
591
+ fields: Optional[List[str]] = None
549
592
 
550
593
 
551
594
  class ServeUpBody(RequestBody):
@@ -651,12 +694,13 @@ class ListAcceleratorCountsBody(RequestBody):
651
694
  class LocalUpBody(RequestBody):
652
695
  """The request body for the local up endpoint."""
653
696
  gpus: bool = True
654
- ips: Optional[List[str]] = None
655
- ssh_user: Optional[str] = None
656
- ssh_key: Optional[str] = None
657
- cleanup: bool = False
658
- context_name: Optional[str] = None
659
- password: Optional[str] = None
697
+ name: Optional[str] = None
698
+ port_start: Optional[int] = None
699
+
700
+
701
+ class LocalDownBody(RequestBody):
702
+ """The request body for the local down endpoint."""
703
+ name: Optional[str] = None
660
704
 
661
705
 
662
706
  class SSHUpBody(RequestBody):
@@ -696,19 +740,22 @@ class JobsDownloadLogsBody(RequestBody):
696
740
 
697
741
  class JobsPoolApplyBody(RequestBody):
698
742
  """The request body for the jobs pool apply endpoint."""
699
- task: str
743
+ task: Optional[str] = None
744
+ workers: Optional[int] = None
700
745
  pool_name: str
701
746
  mode: serve.UpdateMode
702
747
 
703
748
  def to_kwargs(self) -> Dict[str, Any]:
704
749
  kwargs = super().to_kwargs()
705
- dag = common.process_mounts_in_task_on_api_server(self.task,
706
- self.env_vars,
707
- workdir_only=False)
708
- assert len(
709
- dag.tasks) == 1, ('Must only specify one task in the DAG for '
710
- 'a pool.', dag)
711
- kwargs['task'] = dag.tasks[0]
750
+ if self.task is not None:
751
+ dag = common.process_mounts_in_task_on_api_server(
752
+ self.task, self.env_vars, workdir_only=False)
753
+ assert len(
754
+ dag.tasks) == 1, ('Must only specify one task in the DAG for '
755
+ 'a pool.', dag)
756
+ kwargs['task'] = dag.tasks[0]
757
+ else:
758
+ kwargs['task'] = None
712
759
  return kwargs
713
760
 
714
761
 
@@ -779,6 +826,12 @@ class GetConfigBody(RequestBody):
779
826
  class CostReportBody(RequestBody):
780
827
  """The request body for the cost report endpoint."""
781
828
  days: Optional[int] = 30
829
+ # we use hashes instead of names to avoid the case where
830
+ # the name is not unique
831
+ cluster_hashes: Optional[List[str]] = None
832
+ # Only return fields that are needed for the dashboard
833
+ # summary page
834
+ dashboard_summary_response: bool = False
782
835
 
783
836
 
784
837
  class RequestPayload(BasePayload):
@@ -90,7 +90,7 @@ class Precondition(abc.ABC):
90
90
  while True:
91
91
  if self.timeout > 0 and time.time() - start_time > self.timeout:
92
92
  # Cancel the request on timeout.
93
- api_requests.set_request_failed(
93
+ await api_requests.set_request_failed_async(
94
94
  self.request_id,
95
95
  exceptions.RequestCancelled(
96
96
  f'Request {self.request_id} precondition wait timed '
@@ -98,13 +98,15 @@ class Precondition(abc.ABC):
98
98
  return False
99
99
 
100
100
  # Check if the request has been cancelled
101
- request = await api_requests.get_request_async(self.request_id)
101
+ request = await api_requests.get_request_async(self.request_id,
102
+ fields=['status'])
102
103
  if request is None:
103
104
  logger.error(f'Request {self.request_id} not found')
104
105
  return False
105
106
  if request.status == api_requests.RequestStatus.CANCELLED:
106
107
  logger.debug(f'Request {self.request_id} cancelled')
107
108
  return False
109
+ del request
108
110
 
109
111
  try:
110
112
  met, status_msg = await self.check()
@@ -112,13 +114,11 @@ class Precondition(abc.ABC):
112
114
  return True
113
115
  if status_msg is not None and status_msg != last_status_msg:
114
116
  # Update the status message if it has changed.
115
- async with api_requests.update_request_async(
116
- self.request_id) as req:
117
- assert req is not None, self.request_id
118
- req.status_msg = status_msg
117
+ await api_requests.update_status_msg_async(
118
+ self.request_id, status_msg)
119
119
  last_status_msg = status_msg
120
120
  except (Exception, SystemExit, KeyboardInterrupt) as e: # pylint: disable=broad-except
121
- api_requests.set_request_failed(self.request_id, e)
121
+ await api_requests.set_request_failed_async(self.request_id, e)
122
122
  logger.info(f'Request {self.request_id} failed due to '
123
123
  f'{common_utils.format_exception(e)}')
124
124
  return False
@@ -146,10 +146,9 @@ class ClusterStartCompletePrecondition(Precondition):
146
146
  self.cluster_name = cluster_name
147
147
 
148
148
  async def check(self) -> Tuple[bool, Optional[str]]:
149
- cluster_record = global_user_state.get_cluster_from_name(
149
+ cluster_status = global_user_state.get_status_from_cluster_name(
150
150
  self.cluster_name)
151
- if (cluster_record and
152
- cluster_record['status'] is status_lib.ClusterStatus.UP):
151
+ if cluster_status is status_lib.ClusterStatus.UP:
153
152
  # Shortcut for started clusters, ignore cluster not found
154
153
  # since the cluster record might not yet be created by the
155
154
  # launch task.
@@ -165,11 +164,14 @@ class ClusterStartCompletePrecondition(Precondition):
165
164
  requests = await api_requests.get_request_tasks_async(
166
165
  req_filter=api_requests.RequestTaskFilter(
167
166
  status=[
168
- api_requests.RequestStatus.RUNNING,
169
- api_requests.RequestStatus.PENDING
167
+ api_requests.RequestStatus.PENDING,
168
+ api_requests.RequestStatus.RUNNING
170
169
  ],
171
170
  include_request_names=['sky.launch', 'sky.start'],
172
- cluster_names=[self.cluster_name]))
171
+ cluster_names=[self.cluster_name],
172
+ # Only get the request ID to avoid fetching the whole request.
173
+ # We're only interested in the count, not the whole request.
174
+ fields=['request_id']))
173
175
  if len(requests) == 0:
174
176
  # No running or pending tasks, the start process is done.
175
177
  return True, None
@@ -0,0 +1,121 @@
1
+ """Request names."""
2
+ import enum
3
+
4
+
5
+ class RequestName(str, enum.Enum):
6
+ """Enum of all the request names."""
7
+ # General requests
8
+ CHECK = 'check'
9
+ ENABLED_CLOUDS = 'enabled_clouds'
10
+ REALTIME_KUBERNETES_GPU_AVAILABILITY = (
11
+ 'realtime_kubernetes_gpu_availability')
12
+ KUBERNETES_NODE_INFO = 'kubernetes_node_info'
13
+ STATUS_KUBERNETES = 'status_kubernetes'
14
+ LIST_ACCELERATORS = 'list_accelerators'
15
+ LIST_ACCELERATOR_COUNTS = 'list_accelerator_counts'
16
+ OPTIMIZE = 'optimize'
17
+ # Cluster requests
18
+ CLUSTER_LAUNCH = 'launch'
19
+ CLUSTER_EXEC = 'exec'
20
+ CLUSTER_STOP = 'stop'
21
+ CLUSTER_STATUS = 'status'
22
+ CLUSTER_ENDPOINTS = 'endpoints'
23
+ CLUSTER_DOWN = 'down'
24
+ CLUSTER_START = 'start'
25
+ CLUSTER_AUTOSTOP = 'autostop'
26
+ CLUSTER_QUEUE = 'queue'
27
+ CLUSTER_JOB_STATUS = 'job_status'
28
+ CLUSTER_JOB_CANCEL = 'cancel'
29
+ CLUSTER_JOB_LOGS = 'logs'
30
+ CLUSTER_JOB_DOWNLOAD_LOGS = 'download_logs'
31
+ CLUSTER_COST_REPORT = 'cost_report'
32
+ # Storage requests
33
+ STORAGE_LS = 'storage_ls'
34
+ STORAGE_DELETE = 'storage_delete'
35
+ # Local requests
36
+ LOCAL_UP = 'local_up'
37
+ LOCAL_DOWN = 'local_down'
38
+ # API requests
39
+ API_CANCEL = 'api_cancel'
40
+ ALL_CONTEXTS = 'all_contexts'
41
+ # Managed jobs requests
42
+ JOBS_LAUNCH = 'jobs.launch'
43
+ JOBS_QUEUE = 'jobs.queue'
44
+ JOBS_QUEUE_V2 = 'jobs.queue_v2'
45
+ JOBS_CANCEL = 'jobs.cancel'
46
+ JOBS_LOGS = 'jobs.logs'
47
+ JOBS_DOWNLOAD_LOGS = 'jobs.download_logs'
48
+ JOBS_POOL_APPLY = 'jobs.pool_apply'
49
+ JOBS_POOL_DOWN = 'jobs.pool_down'
50
+ JOBS_POOL_STATUS = 'jobs.pool_status'
51
+ JOBS_POOL_LOGS = 'jobs.pool_logs'
52
+ JOBS_POOL_SYNC_DOWN_LOGS = 'jobs.pool_sync_down_logs'
53
+ # Serve requests
54
+ SERVE_UP = 'serve.up'
55
+ SERVE_UPDATE = 'serve.update'
56
+ SERVE_DOWN = 'serve.down'
57
+ SERVE_TERMINATE_REPLICA = 'serve.terminate_replica'
58
+ SERVE_STATUS = 'serve.status'
59
+ SERVE_LOGS = 'serve.logs'
60
+ SERVE_SYNC_DOWN_LOGS = 'serve.sync_down_logs'
61
+ # Volumes requests
62
+ VOLUME_LIST = 'volume_list'
63
+ VOLUME_DELETE = 'volume_delete'
64
+ VOLUME_APPLY = 'volume_apply'
65
+ # Workspaces requests
66
+ WORKSPACES_GET = 'workspaces.get'
67
+ WORKSPACES_UPDATE = 'workspaces.update'
68
+ WORKSPACES_CREATE = 'workspaces.create'
69
+ WORKSPACES_DELETE = 'workspaces.delete'
70
+ WORKSPACES_GET_CONFIG = 'workspaces.get_config'
71
+ WORKSPACES_UPDATE_CONFIG = 'workspaces.update_config'
72
+ # SSH node pools requests
73
+ SSH_NODE_POOLS_UP = 'ssh_node_pools.up'
74
+ SSH_NODE_POOLS_DOWN = 'ssh_node_pools.down'
75
+ # Internal request daemons
76
+ REQUEST_DAEMON_STATUS_REFRESH = 'status-refresh'
77
+ REQUEST_DAEMON_VOLUME_REFRESH = 'volume-refresh'
78
+ REQUEST_DAEMON_MANAGED_JOB_STATUS_REFRESH = 'managed-job-status-refresh'
79
+ REQUEST_DAEMON_SKY_SERVE_STATUS_REFRESH = 'sky-serve-status-refresh'
80
+ REQUEST_DAEMON_POOL_STATUS_REFRESH = 'pool-status-refresh'
81
+
82
+ def __repr__(self):
83
+ return self.value
84
+
85
+ def __str__(self):
86
+ return self.value
87
+
88
+
89
+ class AdminPolicyRequestName(str, enum.Enum):
90
+ """Enum of all the request names that are
91
+ used for admin policy application."""
92
+ # General requests
93
+ # validate call is not stored in the request db,
94
+ # but is defined here for admin policy application
95
+ VALIDATE = 'validate'
96
+ OPTIMIZE = RequestName.OPTIMIZE.value
97
+ # Cluster requests
98
+ CLUSTER_LAUNCH = RequestName.CLUSTER_LAUNCH.value
99
+ CLUSTER_EXEC = RequestName.CLUSTER_EXEC.value
100
+ # Jobs requests
101
+ JOBS_LAUNCH = RequestName.JOBS_LAUNCH.value
102
+ # jobs launch controller request is not stored in the request db,
103
+ # but is defined here for admin policy application
104
+ JOBS_LAUNCH_CONTROLLER = 'jobs.launch_controller'
105
+ JOBS_POOL_APPLY = RequestName.JOBS_POOL_APPLY.value
106
+ JOBS_LAUNCH_CLUSTER = 'jobs.launch_cluster'
107
+ # Serve requests
108
+ SERVE_UP = RequestName.SERVE_UP.value
109
+ # serve launch controller request is not stored in the request db,
110
+ # but is defined here for admin policy application
111
+ SERVE_LAUNCH_CONTROLLER = 'serve.launch_controller'
112
+ SERVE_UPDATE = RequestName.SERVE_UPDATE.value
113
+ # serve launch replica request is not stored in the request db,
114
+ # but is defined here for admin policy application
115
+ SERVE_LAUNCH_REPLICA = 'serve.launch_replica'
116
+
117
+ def __repr__(self):
118
+ return self.value
119
+
120
+ def __str__(self):
121
+ return self.value