skypilot-nightly 1.0.0.dev20250905__py3-none-any.whl → 1.0.0.dev20251210__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (429) hide show
  1. sky/__init__.py +12 -2
  2. sky/adaptors/aws.py +27 -22
  3. sky/adaptors/common.py +25 -2
  4. sky/adaptors/coreweave.py +278 -0
  5. sky/adaptors/do.py +8 -2
  6. sky/adaptors/gcp.py +11 -0
  7. sky/adaptors/ibm.py +5 -2
  8. sky/adaptors/kubernetes.py +64 -0
  9. sky/adaptors/nebius.py +3 -1
  10. sky/adaptors/primeintellect.py +1 -0
  11. sky/adaptors/seeweb.py +183 -0
  12. sky/adaptors/shadeform.py +89 -0
  13. sky/adaptors/slurm.py +478 -0
  14. sky/admin_policy.py +20 -0
  15. sky/authentication.py +157 -263
  16. sky/backends/__init__.py +3 -2
  17. sky/backends/backend.py +11 -3
  18. sky/backends/backend_utils.py +630 -185
  19. sky/backends/cloud_vm_ray_backend.py +1111 -928
  20. sky/backends/local_docker_backend.py +9 -5
  21. sky/backends/task_codegen.py +971 -0
  22. sky/backends/wheel_utils.py +18 -0
  23. sky/catalog/__init__.py +8 -3
  24. sky/catalog/aws_catalog.py +4 -0
  25. sky/catalog/common.py +19 -1
  26. sky/catalog/data_fetchers/fetch_aws.py +102 -80
  27. sky/catalog/data_fetchers/fetch_gcp.py +30 -3
  28. sky/catalog/data_fetchers/fetch_nebius.py +9 -6
  29. sky/catalog/data_fetchers/fetch_runpod.py +698 -0
  30. sky/catalog/data_fetchers/fetch_seeweb.py +329 -0
  31. sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
  32. sky/catalog/kubernetes_catalog.py +36 -32
  33. sky/catalog/primeintellect_catalog.py +95 -0
  34. sky/catalog/runpod_catalog.py +5 -1
  35. sky/catalog/seeweb_catalog.py +184 -0
  36. sky/catalog/shadeform_catalog.py +165 -0
  37. sky/catalog/slurm_catalog.py +243 -0
  38. sky/check.py +87 -46
  39. sky/client/cli/command.py +1004 -434
  40. sky/client/cli/flags.py +4 -2
  41. sky/{volumes/utils.py → client/cli/table_utils.py} +111 -13
  42. sky/client/cli/utils.py +79 -0
  43. sky/client/common.py +12 -2
  44. sky/client/sdk.py +188 -65
  45. sky/client/sdk_async.py +34 -33
  46. sky/cloud_stores.py +82 -3
  47. sky/clouds/__init__.py +8 -0
  48. sky/clouds/aws.py +337 -129
  49. sky/clouds/azure.py +24 -18
  50. sky/clouds/cloud.py +47 -13
  51. sky/clouds/cudo.py +16 -13
  52. sky/clouds/do.py +9 -7
  53. sky/clouds/fluidstack.py +12 -5
  54. sky/clouds/gcp.py +14 -7
  55. sky/clouds/hyperbolic.py +12 -5
  56. sky/clouds/ibm.py +12 -5
  57. sky/clouds/kubernetes.py +80 -45
  58. sky/clouds/lambda_cloud.py +12 -5
  59. sky/clouds/nebius.py +23 -9
  60. sky/clouds/oci.py +19 -12
  61. sky/clouds/paperspace.py +4 -1
  62. sky/clouds/primeintellect.py +317 -0
  63. sky/clouds/runpod.py +85 -24
  64. sky/clouds/scp.py +12 -8
  65. sky/clouds/seeweb.py +477 -0
  66. sky/clouds/shadeform.py +400 -0
  67. sky/clouds/slurm.py +578 -0
  68. sky/clouds/ssh.py +6 -3
  69. sky/clouds/utils/scp_utils.py +61 -50
  70. sky/clouds/vast.py +43 -27
  71. sky/clouds/vsphere.py +14 -16
  72. sky/core.py +296 -195
  73. sky/dashboard/out/404.html +1 -1
  74. sky/dashboard/out/_next/static/KYAhEFa3FTfq4JyKVgo-s/_buildManifest.js +1 -0
  75. sky/dashboard/out/_next/static/chunks/1141-9c810f01ff4f398a.js +11 -0
  76. sky/dashboard/out/_next/static/chunks/1871-7e202677c42f43fe.js +6 -0
  77. sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +1 -0
  78. sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +15 -0
  79. sky/dashboard/out/_next/static/chunks/2755.edd818326d489a1d.js +26 -0
  80. sky/dashboard/out/_next/static/chunks/3294.ddda8c6c6f9f24dc.js +1 -0
  81. sky/dashboard/out/_next/static/chunks/3785.7e245f318f9d1121.js +1 -0
  82. sky/dashboard/out/_next/static/chunks/{6601-06114c982db410b6.js → 3800-b589397dc09c5b4e.js} +1 -1
  83. sky/dashboard/out/_next/static/chunks/3850-fd5696f3bbbaddae.js +1 -0
  84. sky/dashboard/out/_next/static/chunks/4725.172ede95d1b21022.js +1 -0
  85. sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +15 -0
  86. sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +13 -0
  87. sky/dashboard/out/_next/static/chunks/6856-da20c5fd999f319c.js +1 -0
  88. sky/dashboard/out/_next/static/chunks/6990-09cbf02d3cd518c3.js +1 -0
  89. sky/dashboard/out/_next/static/chunks/7359-c8d04e06886000b3.js +30 -0
  90. sky/dashboard/out/_next/static/chunks/7615-019513abc55b3b47.js +1 -0
  91. sky/dashboard/out/_next/static/chunks/8640.5b9475a2d18c5416.js +16 -0
  92. sky/dashboard/out/_next/static/chunks/8969-452f9d5cbdd2dc73.js +1 -0
  93. sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +6 -0
  94. sky/dashboard/out/_next/static/chunks/9353-8369df1cf105221c.js +1 -0
  95. sky/dashboard/out/_next/static/chunks/9360.a536cf6b1fa42355.js +31 -0
  96. sky/dashboard/out/_next/static/chunks/9847.3aaca6bb33455140.js +30 -0
  97. sky/dashboard/out/_next/static/chunks/pages/_app-68b647e26f9d2793.js +34 -0
  98. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-33f525539665fdfd.js +16 -0
  99. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-a7565f586ef86467.js +1 -0
  100. sky/dashboard/out/_next/static/chunks/pages/clusters-9e5d47818b9bdadd.js +1 -0
  101. sky/dashboard/out/_next/static/chunks/pages/{config-dfb9bf07b13045f4.js → config-718cdc365de82689.js} +1 -1
  102. sky/dashboard/out/_next/static/chunks/pages/infra/{[context]-6563820e094f68ca.js → [context]-12c559ec4d81fdbd.js} +1 -1
  103. sky/dashboard/out/_next/static/chunks/pages/{infra-aabba60d57826e0f.js → infra-d187cd0413d72475.js} +1 -1
  104. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-895847b6cf200b04.js +16 -0
  105. sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-8d0f4655400b4eb9.js +21 -0
  106. sky/dashboard/out/_next/static/chunks/pages/jobs-e5a98f17f8513a96.js +1 -0
  107. sky/dashboard/out/_next/static/chunks/pages/plugins/[...slug]-4f46050ca065d8f8.js +1 -0
  108. sky/dashboard/out/_next/static/chunks/pages/users-2f7646eb77785a2c.js +1 -0
  109. sky/dashboard/out/_next/static/chunks/pages/volumes-ef19d49c6d0e8500.js +1 -0
  110. sky/dashboard/out/_next/static/chunks/pages/workspaces/{[name]-af76bb06dbb3954f.js → [name]-96e0f298308da7e2.js} +1 -1
  111. sky/dashboard/out/_next/static/chunks/pages/{workspaces-7598c33a746cdc91.js → workspaces-cb4da3abe08ebf19.js} +1 -1
  112. sky/dashboard/out/_next/static/chunks/webpack-fba3de387ff6bb08.js +1 -0
  113. sky/dashboard/out/_next/static/css/c5a4cfd2600fc715.css +3 -0
  114. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  115. sky/dashboard/out/clusters/[cluster].html +1 -1
  116. sky/dashboard/out/clusters.html +1 -1
  117. sky/dashboard/out/config.html +1 -1
  118. sky/dashboard/out/index.html +1 -1
  119. sky/dashboard/out/infra/[context].html +1 -1
  120. sky/dashboard/out/infra.html +1 -1
  121. sky/dashboard/out/jobs/[job].html +1 -1
  122. sky/dashboard/out/jobs/pools/[pool].html +1 -1
  123. sky/dashboard/out/jobs.html +1 -1
  124. sky/dashboard/out/plugins/[...slug].html +1 -0
  125. sky/dashboard/out/users.html +1 -1
  126. sky/dashboard/out/volumes.html +1 -1
  127. sky/dashboard/out/workspace/new.html +1 -1
  128. sky/dashboard/out/workspaces/[name].html +1 -1
  129. sky/dashboard/out/workspaces.html +1 -1
  130. sky/data/data_utils.py +92 -1
  131. sky/data/mounting_utils.py +177 -30
  132. sky/data/storage.py +200 -19
  133. sky/data/storage_utils.py +10 -45
  134. sky/exceptions.py +18 -7
  135. sky/execution.py +74 -31
  136. sky/global_user_state.py +605 -191
  137. sky/jobs/__init__.py +2 -0
  138. sky/jobs/client/sdk.py +101 -4
  139. sky/jobs/client/sdk_async.py +31 -5
  140. sky/jobs/constants.py +15 -8
  141. sky/jobs/controller.py +726 -284
  142. sky/jobs/file_content_utils.py +128 -0
  143. sky/jobs/log_gc.py +193 -0
  144. sky/jobs/recovery_strategy.py +250 -100
  145. sky/jobs/scheduler.py +271 -173
  146. sky/jobs/server/core.py +367 -114
  147. sky/jobs/server/server.py +81 -35
  148. sky/jobs/server/utils.py +89 -35
  149. sky/jobs/state.py +1498 -620
  150. sky/jobs/utils.py +771 -306
  151. sky/logs/agent.py +40 -5
  152. sky/logs/aws.py +9 -19
  153. sky/metrics/utils.py +282 -39
  154. sky/models.py +2 -0
  155. sky/optimizer.py +7 -6
  156. sky/provision/__init__.py +38 -1
  157. sky/provision/aws/config.py +34 -13
  158. sky/provision/aws/instance.py +5 -2
  159. sky/provision/azure/instance.py +5 -3
  160. sky/provision/common.py +22 -0
  161. sky/provision/cudo/instance.py +4 -3
  162. sky/provision/do/instance.py +4 -3
  163. sky/provision/docker_utils.py +112 -28
  164. sky/provision/fluidstack/instance.py +6 -5
  165. sky/provision/gcp/config.py +6 -1
  166. sky/provision/gcp/instance.py +4 -2
  167. sky/provision/hyperbolic/instance.py +4 -2
  168. sky/provision/instance_setup.py +66 -20
  169. sky/provision/kubernetes/__init__.py +2 -0
  170. sky/provision/kubernetes/config.py +7 -44
  171. sky/provision/kubernetes/constants.py +0 -1
  172. sky/provision/kubernetes/instance.py +609 -213
  173. sky/provision/kubernetes/manifests/fusermount-server-daemonset.yaml +1 -2
  174. sky/provision/kubernetes/network.py +12 -8
  175. sky/provision/kubernetes/network_utils.py +8 -25
  176. sky/provision/kubernetes/utils.py +422 -422
  177. sky/provision/kubernetes/volume.py +150 -18
  178. sky/provision/lambda_cloud/instance.py +16 -13
  179. sky/provision/nebius/instance.py +6 -2
  180. sky/provision/nebius/utils.py +103 -86
  181. sky/provision/oci/instance.py +4 -2
  182. sky/provision/paperspace/instance.py +4 -3
  183. sky/provision/primeintellect/__init__.py +10 -0
  184. sky/provision/primeintellect/config.py +11 -0
  185. sky/provision/primeintellect/instance.py +454 -0
  186. sky/provision/primeintellect/utils.py +398 -0
  187. sky/provision/provisioner.py +45 -15
  188. sky/provision/runpod/__init__.py +2 -0
  189. sky/provision/runpod/instance.py +4 -3
  190. sky/provision/runpod/volume.py +69 -13
  191. sky/provision/scp/instance.py +307 -130
  192. sky/provision/seeweb/__init__.py +11 -0
  193. sky/provision/seeweb/config.py +13 -0
  194. sky/provision/seeweb/instance.py +812 -0
  195. sky/provision/shadeform/__init__.py +11 -0
  196. sky/provision/shadeform/config.py +12 -0
  197. sky/provision/shadeform/instance.py +351 -0
  198. sky/provision/shadeform/shadeform_utils.py +83 -0
  199. sky/provision/slurm/__init__.py +12 -0
  200. sky/provision/slurm/config.py +13 -0
  201. sky/provision/slurm/instance.py +572 -0
  202. sky/provision/slurm/utils.py +583 -0
  203. sky/provision/vast/instance.py +9 -4
  204. sky/provision/vast/utils.py +10 -6
  205. sky/provision/volume.py +164 -0
  206. sky/provision/vsphere/common/ssl_helper.py +1 -1
  207. sky/provision/vsphere/common/vapiconnect.py +2 -1
  208. sky/provision/vsphere/common/vim_utils.py +3 -2
  209. sky/provision/vsphere/instance.py +8 -6
  210. sky/provision/vsphere/vsphere_utils.py +8 -1
  211. sky/resources.py +11 -3
  212. sky/schemas/api/responses.py +107 -6
  213. sky/schemas/db/global_user_state/008_skylet_ssh_tunnel_metadata.py +34 -0
  214. sky/schemas/db/global_user_state/009_last_activity_and_launched_at.py +89 -0
  215. sky/schemas/db/global_user_state/010_save_ssh_key.py +66 -0
  216. sky/schemas/db/global_user_state/011_is_ephemeral.py +34 -0
  217. sky/schemas/db/kv_cache/001_initial_schema.py +29 -0
  218. sky/schemas/db/serve_state/002_yaml_content.py +34 -0
  219. sky/schemas/db/skypilot_config/001_initial_schema.py +30 -0
  220. sky/schemas/db/spot_jobs/002_cluster_pool.py +3 -3
  221. sky/schemas/db/spot_jobs/004_job_file_contents.py +42 -0
  222. sky/schemas/db/spot_jobs/005_logs_gc.py +38 -0
  223. sky/schemas/db/spot_jobs/006_controller_pid_started_at.py +34 -0
  224. sky/schemas/db/spot_jobs/007_config_file_content.py +34 -0
  225. sky/schemas/generated/jobsv1_pb2.py +86 -0
  226. sky/schemas/generated/jobsv1_pb2.pyi +254 -0
  227. sky/schemas/generated/jobsv1_pb2_grpc.py +542 -0
  228. sky/schemas/generated/managed_jobsv1_pb2.py +76 -0
  229. sky/schemas/generated/managed_jobsv1_pb2.pyi +278 -0
  230. sky/schemas/generated/managed_jobsv1_pb2_grpc.py +278 -0
  231. sky/schemas/generated/servev1_pb2.py +58 -0
  232. sky/schemas/generated/servev1_pb2.pyi +115 -0
  233. sky/schemas/generated/servev1_pb2_grpc.py +322 -0
  234. sky/serve/autoscalers.py +2 -0
  235. sky/serve/client/impl.py +55 -21
  236. sky/serve/constants.py +4 -3
  237. sky/serve/controller.py +17 -11
  238. sky/serve/load_balancing_policies.py +1 -1
  239. sky/serve/replica_managers.py +219 -142
  240. sky/serve/serve_rpc_utils.py +179 -0
  241. sky/serve/serve_state.py +63 -54
  242. sky/serve/serve_utils.py +145 -109
  243. sky/serve/server/core.py +46 -25
  244. sky/serve/server/impl.py +311 -162
  245. sky/serve/server/server.py +21 -19
  246. sky/serve/service.py +84 -68
  247. sky/serve/service_spec.py +45 -7
  248. sky/server/auth/loopback.py +38 -0
  249. sky/server/auth/oauth2_proxy.py +12 -7
  250. sky/server/common.py +47 -24
  251. sky/server/config.py +62 -28
  252. sky/server/constants.py +9 -1
  253. sky/server/daemons.py +109 -38
  254. sky/server/metrics.py +76 -96
  255. sky/server/middleware_utils.py +166 -0
  256. sky/server/plugins.py +222 -0
  257. sky/server/requests/executor.py +384 -145
  258. sky/server/requests/payloads.py +83 -19
  259. sky/server/requests/preconditions.py +15 -13
  260. sky/server/requests/request_names.py +123 -0
  261. sky/server/requests/requests.py +511 -157
  262. sky/server/requests/serializers/decoders.py +48 -17
  263. sky/server/requests/serializers/encoders.py +102 -20
  264. sky/server/requests/serializers/return_value_serializers.py +60 -0
  265. sky/server/requests/threads.py +117 -0
  266. sky/server/rest.py +116 -24
  267. sky/server/server.py +497 -179
  268. sky/server/server_utils.py +30 -0
  269. sky/server/stream_utils.py +219 -45
  270. sky/server/uvicorn.py +30 -19
  271. sky/setup_files/MANIFEST.in +6 -1
  272. sky/setup_files/alembic.ini +8 -0
  273. sky/setup_files/dependencies.py +64 -19
  274. sky/setup_files/setup.py +44 -44
  275. sky/sky_logging.py +13 -5
  276. sky/skylet/attempt_skylet.py +116 -24
  277. sky/skylet/configs.py +3 -1
  278. sky/skylet/constants.py +139 -29
  279. sky/skylet/events.py +74 -14
  280. sky/skylet/executor/__init__.py +1 -0
  281. sky/skylet/executor/slurm.py +189 -0
  282. sky/skylet/job_lib.py +143 -105
  283. sky/skylet/log_lib.py +252 -8
  284. sky/skylet/log_lib.pyi +47 -7
  285. sky/skylet/providers/ibm/node_provider.py +12 -8
  286. sky/skylet/providers/ibm/vpc_provider.py +13 -12
  287. sky/skylet/runtime_utils.py +21 -0
  288. sky/skylet/services.py +524 -0
  289. sky/skylet/skylet.py +27 -2
  290. sky/skylet/subprocess_daemon.py +104 -28
  291. sky/skypilot_config.py +99 -79
  292. sky/ssh_node_pools/constants.py +12 -0
  293. sky/ssh_node_pools/core.py +40 -3
  294. sky/ssh_node_pools/deploy/__init__.py +4 -0
  295. sky/ssh_node_pools/deploy/deploy.py +952 -0
  296. sky/ssh_node_pools/deploy/tunnel_utils.py +199 -0
  297. sky/ssh_node_pools/deploy/utils.py +173 -0
  298. sky/ssh_node_pools/server.py +20 -21
  299. sky/{utils/kubernetes/ssh_utils.py → ssh_node_pools/utils.py} +9 -6
  300. sky/task.py +221 -104
  301. sky/templates/aws-ray.yml.j2 +1 -0
  302. sky/templates/azure-ray.yml.j2 +1 -0
  303. sky/templates/cudo-ray.yml.j2 +1 -0
  304. sky/templates/do-ray.yml.j2 +1 -0
  305. sky/templates/fluidstack-ray.yml.j2 +1 -0
  306. sky/templates/gcp-ray.yml.j2 +1 -0
  307. sky/templates/hyperbolic-ray.yml.j2 +1 -0
  308. sky/templates/ibm-ray.yml.j2 +2 -1
  309. sky/templates/jobs-controller.yaml.j2 +3 -0
  310. sky/templates/kubernetes-ray.yml.j2 +204 -55
  311. sky/templates/lambda-ray.yml.j2 +1 -0
  312. sky/templates/nebius-ray.yml.j2 +3 -0
  313. sky/templates/oci-ray.yml.j2 +1 -0
  314. sky/templates/paperspace-ray.yml.j2 +1 -0
  315. sky/templates/primeintellect-ray.yml.j2 +72 -0
  316. sky/templates/runpod-ray.yml.j2 +1 -0
  317. sky/templates/scp-ray.yml.j2 +1 -0
  318. sky/templates/seeweb-ray.yml.j2 +171 -0
  319. sky/templates/shadeform-ray.yml.j2 +73 -0
  320. sky/templates/slurm-ray.yml.j2 +85 -0
  321. sky/templates/vast-ray.yml.j2 +2 -0
  322. sky/templates/vsphere-ray.yml.j2 +1 -0
  323. sky/templates/websocket_proxy.py +188 -43
  324. sky/usage/usage_lib.py +16 -4
  325. sky/users/model.conf +1 -1
  326. sky/users/permission.py +84 -44
  327. sky/users/rbac.py +31 -3
  328. sky/utils/accelerator_registry.py +6 -3
  329. sky/utils/admin_policy_utils.py +18 -5
  330. sky/utils/annotations.py +128 -6
  331. sky/utils/asyncio_utils.py +78 -0
  332. sky/utils/atomic.py +1 -1
  333. sky/utils/auth_utils.py +153 -0
  334. sky/utils/cli_utils/status_utils.py +12 -7
  335. sky/utils/cluster_utils.py +28 -6
  336. sky/utils/command_runner.py +283 -30
  337. sky/utils/command_runner.pyi +63 -7
  338. sky/utils/common.py +3 -1
  339. sky/utils/common_utils.py +55 -7
  340. sky/utils/config_utils.py +1 -14
  341. sky/utils/context.py +127 -40
  342. sky/utils/context_utils.py +73 -18
  343. sky/utils/controller_utils.py +229 -70
  344. sky/utils/db/db_utils.py +95 -18
  345. sky/utils/db/kv_cache.py +149 -0
  346. sky/utils/db/migration_utils.py +24 -7
  347. sky/utils/env_options.py +4 -0
  348. sky/utils/git.py +559 -1
  349. sky/utils/kubernetes/create_cluster.sh +15 -30
  350. sky/utils/kubernetes/delete_cluster.sh +10 -7
  351. sky/utils/kubernetes/generate_kind_config.py +6 -66
  352. sky/utils/kubernetes/gpu_labeler.py +13 -3
  353. sky/utils/kubernetes/k8s_gpu_labeler_job.yaml +2 -1
  354. sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml +16 -16
  355. sky/utils/kubernetes/kubernetes_deploy_utils.py +187 -260
  356. sky/utils/kubernetes/rsync_helper.sh +11 -3
  357. sky/utils/kubernetes/ssh-tunnel.sh +7 -376
  358. sky/utils/kubernetes_enums.py +7 -15
  359. sky/utils/lock_events.py +4 -4
  360. sky/utils/locks.py +128 -31
  361. sky/utils/log_utils.py +0 -319
  362. sky/utils/resource_checker.py +13 -10
  363. sky/utils/resources_utils.py +53 -29
  364. sky/utils/rich_utils.py +8 -4
  365. sky/utils/schemas.py +138 -52
  366. sky/utils/subprocess_utils.py +17 -4
  367. sky/utils/thread_utils.py +91 -0
  368. sky/utils/timeline.py +2 -1
  369. sky/utils/ux_utils.py +35 -1
  370. sky/utils/volume.py +88 -4
  371. sky/utils/yaml_utils.py +9 -0
  372. sky/volumes/client/sdk.py +48 -10
  373. sky/volumes/server/core.py +59 -22
  374. sky/volumes/server/server.py +46 -17
  375. sky/volumes/volume.py +54 -42
  376. sky/workspaces/core.py +57 -21
  377. sky/workspaces/server.py +13 -12
  378. sky_templates/README.md +3 -0
  379. sky_templates/__init__.py +3 -0
  380. sky_templates/ray/__init__.py +0 -0
  381. sky_templates/ray/start_cluster +183 -0
  382. sky_templates/ray/stop_cluster +75 -0
  383. {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251210.dist-info}/METADATA +343 -65
  384. skypilot_nightly-1.0.0.dev20251210.dist-info/RECORD +629 -0
  385. skypilot_nightly-1.0.0.dev20251210.dist-info/top_level.txt +2 -0
  386. sky/client/cli/git.py +0 -549
  387. sky/dashboard/out/_next/static/chunks/1121-408ed10b2f9fce17.js +0 -1
  388. sky/dashboard/out/_next/static/chunks/1141-943efc7aff0f0c06.js +0 -1
  389. sky/dashboard/out/_next/static/chunks/1836-37fede578e2da5f8.js +0 -40
  390. sky/dashboard/out/_next/static/chunks/3015-86cabed5d4669ad0.js +0 -1
  391. sky/dashboard/out/_next/static/chunks/3294.c80326aec9bfed40.js +0 -6
  392. sky/dashboard/out/_next/static/chunks/3785.4872a2f3aa489880.js +0 -1
  393. sky/dashboard/out/_next/static/chunks/3850-ff4a9a69d978632b.js +0 -1
  394. sky/dashboard/out/_next/static/chunks/4045.b30465273dc5e468.js +0 -21
  395. sky/dashboard/out/_next/static/chunks/4676-9da7fdbde90b5549.js +0 -10
  396. sky/dashboard/out/_next/static/chunks/4725.10f7a9a5d3ea8208.js +0 -1
  397. sky/dashboard/out/_next/static/chunks/5339.3fda4a4010ff4e06.js +0 -51
  398. sky/dashboard/out/_next/static/chunks/6135-4b4d5e824b7f9d3c.js +0 -1
  399. sky/dashboard/out/_next/static/chunks/649.b9d7f7d10c1b8c53.js +0 -45
  400. sky/dashboard/out/_next/static/chunks/6856-dca7962af4814e1b.js +0 -1
  401. sky/dashboard/out/_next/static/chunks/6990-08b2a1cae076a943.js +0 -1
  402. sky/dashboard/out/_next/static/chunks/7325.b4bc99ce0892dcd5.js +0 -6
  403. sky/dashboard/out/_next/static/chunks/754-d0da8ab45f9509e9.js +0 -18
  404. sky/dashboard/out/_next/static/chunks/7669.1f5d9a402bf5cc42.js +0 -36
  405. sky/dashboard/out/_next/static/chunks/8969-0be3036bf86f8256.js +0 -1
  406. sky/dashboard/out/_next/static/chunks/9025.c12318fb6a1a9093.js +0 -6
  407. sky/dashboard/out/_next/static/chunks/9037-fa1737818d0a0969.js +0 -6
  408. sky/dashboard/out/_next/static/chunks/pages/_app-ce361c6959bc2001.js +0 -34
  409. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-1cbba24bd1bd35f8.js +0 -16
  410. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-0b4b35dc1dfe046c.js +0 -16
  411. sky/dashboard/out/_next/static/chunks/pages/clusters-469814d711d63b1b.js +0 -1
  412. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-dd64309c3fe67ed2.js +0 -11
  413. sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-07349868f7905d37.js +0 -16
  414. sky/dashboard/out/_next/static/chunks/pages/jobs-1f70d9faa564804f.js +0 -1
  415. sky/dashboard/out/_next/static/chunks/pages/users-018bf31cda52e11b.js +0 -1
  416. sky/dashboard/out/_next/static/chunks/pages/volumes-739726d6b823f532.js +0 -1
  417. sky/dashboard/out/_next/static/chunks/webpack-4fe903277b57b523.js +0 -1
  418. sky/dashboard/out/_next/static/css/4614e06482d7309e.css +0 -3
  419. sky/dashboard/out/_next/static/mS-4qZPSkRuA1u-g2wQhg/_buildManifest.js +0 -1
  420. sky/templates/kubernetes-ssh-jump.yml.j2 +0 -94
  421. sky/utils/kubernetes/cleanup-tunnel.sh +0 -62
  422. sky/utils/kubernetes/deploy_remote_cluster.py +0 -1299
  423. sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +0 -191
  424. skypilot_nightly-1.0.0.dev20250905.dist-info/RECORD +0 -547
  425. skypilot_nightly-1.0.0.dev20250905.dist-info/top_level.txt +0 -1
  426. /sky/dashboard/out/_next/static/{mS-4qZPSkRuA1u-g2wQhg → KYAhEFa3FTfq4JyKVgo-s}/_ssgManifest.js +0 -0
  427. {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251210.dist-info}/WHEEL +0 -0
  428. {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251210.dist-info}/entry_points.txt +0 -0
  429. {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251210.dist-info}/licenses/LICENSE +0 -0
sky/clouds/azure.py CHANGED
@@ -15,6 +15,7 @@ from sky import exceptions
15
15
  from sky import sky_logging
16
16
  from sky import skypilot_config
17
17
  from sky.adaptors import azure
18
+ from sky.adaptors import common as adaptors_common
18
19
  from sky.clouds.utils import azure_utils
19
20
  from sky.utils import annotations
20
21
  from sky.utils import common_utils
@@ -86,7 +87,9 @@ class Azure(clouds.Cloud):
86
87
 
87
88
  @classmethod
88
89
  def _unsupported_features_for_resources(
89
- cls, resources: 'resources.Resources'
90
+ cls,
91
+ resources: 'resources.Resources',
92
+ region: Optional[str] = None,
90
93
  ) -> Dict[clouds.CloudImplementationFeatures, str]:
91
94
  features = {
92
95
  clouds.CloudImplementationFeatures.CLONE_DISK_FROM_CLUSTER:
@@ -263,10 +266,15 @@ class Azure(clouds.Cloud):
263
266
  return _DEFAULT_GPU_IMAGE_ID
264
267
 
265
268
  @classmethod
266
- def regions_with_offering(cls, instance_type: str,
267
- accelerators: Optional[Dict[str, int]],
268
- use_spot: bool, region: Optional[str],
269
- zone: Optional[str]) -> List[clouds.Region]:
269
+ def regions_with_offering(
270
+ cls,
271
+ instance_type: str,
272
+ accelerators: Optional[Dict[str, int]],
273
+ use_spot: bool,
274
+ region: Optional[str],
275
+ zone: Optional[str],
276
+ resources: Optional['resources.Resources'] = None,
277
+ ) -> List[clouds.Region]:
270
278
  del accelerators # unused
271
279
  assert zone is None, 'Azure does not support zones'
272
280
  regions = catalog.get_region_zones_for_instance_type(
@@ -546,6 +554,7 @@ class Azure(clouds.Cloud):
546
554
  @classmethod
547
555
  def _check_credentials(cls) -> Tuple[bool, Optional[str]]:
548
556
  """Checks if the user has access credentials to this cloud."""
557
+
549
558
  help_str = (
550
559
  ' Run the following commands:'
551
560
  f'\n{cls._INDENT_PREFIX} $ az login'
@@ -561,6 +570,16 @@ class Azure(clouds.Cloud):
561
570
  return (False,
562
571
  f'{azure_token_cache_file} does not exist.' + help_str)
563
572
 
573
+ dependency_installation_hints = (
574
+ 'Azure dependencies are not installed. '
575
+ 'Run the following commands:'
576
+ f'\n{cls._INDENT_PREFIX} $ pip install skypilot[azure]'
577
+ f'\n{cls._INDENT_PREFIX}Credentials may also need to be set.')
578
+ # Check if the azure blob storage dependencies are installed.
579
+ if not adaptors_common.can_import_modules(
580
+ ['azure.storage.blob', 'msgraph']):
581
+ return False, dependency_installation_hints
582
+
564
583
  try:
565
584
  _run_output('az --version')
566
585
  except subprocess.CalledProcessError as e:
@@ -580,19 +599,6 @@ class Azure(clouds.Cloud):
580
599
  return False, (f'Getting user\'s Azure identity failed.{help_str}\n'
581
600
  f'{cls._INDENT_PREFIX}Details: '
582
601
  f'{common_utils.format_exception(e)}')
583
-
584
- # Check if the azure blob storage dependencies are installed.
585
- try:
586
- # pylint: disable=redefined-outer-name, import-outside-toplevel, unused-import
587
- from azure.storage import blob
588
- import msgraph
589
- except ImportError as e:
590
- return False, (
591
- f'Azure blob storage depdencies are not installed. '
592
- 'Run the following commands:'
593
- f'\n{cls._INDENT_PREFIX} $ pip install skypilot[azure]'
594
- f'\n{cls._INDENT_PREFIX}Details: '
595
- f'{common_utils.format_exception(e)}')
596
602
  return True, None
597
603
 
598
604
  def get_credential_file_mounts(self) -> Dict[str, str]:
sky/clouds/cloud.py CHANGED
@@ -182,13 +182,25 @@ class Cloud:
182
182
  """
183
183
  return cls._SUPPORTS_SERVICE_ACCOUNT_ON_REMOTE
184
184
 
185
+ @classmethod
186
+ def uses_ray(cls) -> bool:
187
+ """Returns whether this cloud uses Ray as the distributed
188
+ execution framework.
189
+ """
190
+ return True
191
+
185
192
  #### Regions/Zones ####
186
193
 
187
194
  @classmethod
188
- def regions_with_offering(cls, instance_type: str,
189
- accelerators: Optional[Dict[str, int]],
190
- use_spot: bool, region: Optional[str],
191
- zone: Optional[str]) -> List[Region]:
195
+ def regions_with_offering(
196
+ cls,
197
+ instance_type: str,
198
+ accelerators: Optional[Dict[str, int]],
199
+ use_spot: bool,
200
+ region: Optional[str],
201
+ zone: Optional[str],
202
+ resources: Optional['resources_lib.Resources'] = None,
203
+ ) -> List[Region]:
192
204
  """Returns the regions that offer the specified resources.
193
205
 
194
206
  The order of the regions follow the order of the regions returned by
@@ -340,6 +352,14 @@ class Cloud:
340
352
  """Returns {acc: acc_count} held by 'instance_type', if any."""
341
353
  raise NotImplementedError
342
354
 
355
+ @classmethod
356
+ def get_arch_from_instance_type(
357
+ cls,
358
+ instance_type: str,
359
+ ) -> Optional[str]:
360
+ """Returns the arch of the instance type, if any."""
361
+ raise NotImplementedError
362
+
343
363
  @classmethod
344
364
  def get_default_instance_type(cls,
345
365
  cpus: Optional[str] = None,
@@ -666,8 +686,11 @@ class Cloud:
666
686
 
667
687
  @classmethod
668
688
  def check_features_are_supported(
669
- cls, resources: 'resources_lib.Resources',
670
- requested_features: Set[CloudImplementationFeatures]) -> None:
689
+ cls,
690
+ resources: 'resources_lib.Resources',
691
+ requested_features: Set[CloudImplementationFeatures],
692
+ region: Optional[str] = None,
693
+ ) -> None:
671
694
  """Errors out if the cloud does not support all requested features.
672
695
 
673
696
  For instance, Lambda Cloud does not support stop, so
@@ -685,7 +708,7 @@ class Cloud:
685
708
  requested features.
686
709
  """
687
710
  unsupported_features2reason = cls._unsupported_features_for_resources(
688
- resources)
711
+ resources, region)
689
712
 
690
713
  # Docker image is not compatible with ssh proxy command.
691
714
  if skypilot_config.get_effective_region_config(
@@ -715,7 +738,9 @@ class Cloud:
715
738
 
716
739
  @classmethod
717
740
  def _unsupported_features_for_resources(
718
- cls, resources: 'resources_lib.Resources'
741
+ cls,
742
+ resources: 'resources_lib.Resources',
743
+ region: Optional[str] = None,
719
744
  ) -> Dict[CloudImplementationFeatures, str]:
720
745
  """The features not supported based on the resources provided.
721
746
 
@@ -726,7 +751,7 @@ class Cloud:
726
751
  A dict of {feature: reason} for the features not supported by the
727
752
  cloud implementation.
728
753
  """
729
- del resources
754
+ del resources, region
730
755
  raise NotImplementedError
731
756
 
732
757
  @classmethod
@@ -800,12 +825,21 @@ class Cloud:
800
825
  if acc_from_instance_type is None:
801
826
  return False
802
827
 
803
- for acc in acc_requested:
804
- if acc not in acc_from_instance_type:
828
+ for requested_acc in acc_requested:
829
+ for instance_acc in acc_from_instance_type:
830
+ # The requested accelerator can be canonicalized based on
831
+ # the accelerator registry, which may not has the same case
832
+ # as the cloud's catalog, e.g., 'RTXPro6000' in Shadeform
833
+ # catalog, and 'RTXPRO6000' in RunPod catalog.
834
+ if requested_acc.lower() == instance_acc.lower():
835
+ # Found the requested accelerator in the instance type.
836
+ break
837
+ else:
838
+ # Requested accelerator not found in instance type.
805
839
  return False
806
840
  # Avoid float point precision issue.
807
- if not math.isclose(acc_requested[acc],
808
- acc_from_instance_type[acc]):
841
+ if not math.isclose(acc_requested[requested_acc],
842
+ acc_from_instance_type[instance_acc]):
809
843
  return False
810
844
  return True
811
845
 
sky/clouds/cudo.py CHANGED
@@ -5,6 +5,7 @@ from typing import Dict, Iterator, List, Optional, Tuple, Union
5
5
 
6
6
  from sky import catalog
7
7
  from sky import clouds
8
+ from sky.adaptors import common
8
9
  from sky.utils import common_utils
9
10
  from sky.utils import registry
10
11
  from sky.utils import resources_utils
@@ -86,7 +87,9 @@ class Cudo(clouds.Cloud):
86
87
 
87
88
  @classmethod
88
89
  def _unsupported_features_for_resources(
89
- cls, resources: 'resources_lib.Resources'
90
+ cls,
91
+ resources: 'resources_lib.Resources',
92
+ region: Optional[str] = None,
90
93
  ) -> Dict[clouds.CloudImplementationFeatures, str]:
91
94
  """The features not supported based on the resources provided.
92
95
 
@@ -105,10 +108,15 @@ class Cudo(clouds.Cloud):
105
108
  return cls._MAX_CLUSTER_NAME_LEN_LIMIT
106
109
 
107
110
  @classmethod
108
- def regions_with_offering(cls, instance_type,
109
- accelerators: Optional[Dict[str, int]],
110
- use_spot: bool, region: Optional[str],
111
- zone: Optional[str]) -> List[clouds.Region]:
111
+ def regions_with_offering(
112
+ cls,
113
+ instance_type,
114
+ accelerators: Optional[Dict[str, int]],
115
+ use_spot: bool,
116
+ region: Optional[str],
117
+ zone: Optional[str],
118
+ resources: Optional['resources_lib.Resources'] = None,
119
+ ) -> List[clouds.Region]:
112
120
  assert zone is None, 'Cudo does not support zones.'
113
121
  del accelerators, zone # unused
114
122
  if use_spot:
@@ -287,14 +295,9 @@ class Cudo(clouds.Cloud):
287
295
  cls) -> Tuple[bool, Optional[Union[str, Dict[str, str]]]]:
288
296
  """Checks if the user has access credentials to
289
297
  Cudo's compute service."""
290
- try:
291
- # pylint: disable=import-outside-toplevel,unused-import
292
- from cudo_compute import cudo_api
293
- except (ImportError, subprocess.CalledProcessError) as e:
294
- return False, (
295
- f'{cls._DEPENDENCY_HINT}\n'
296
- f'{cls._INDENT_PREFIX}'
297
- f'{common_utils.format_exception(e, use_bracket=True)}')
298
+ if not common.can_import_modules(['cudo_compute']):
299
+ return False, (f'{cls._DEPENDENCY_HINT}\n'
300
+ f'{cls._INDENT_PREFIX}')
298
301
 
299
302
  try:
300
303
  _run_output('cudoctl --version')
sky/clouds/do.py CHANGED
@@ -57,7 +57,9 @@ class DO(clouds.Cloud):
57
57
 
58
58
  @classmethod
59
59
  def _unsupported_features_for_resources(
60
- cls, resources: 'resources_lib.Resources'
60
+ cls,
61
+ resources: 'resources_lib.Resources',
62
+ region: Optional[str] = None,
61
63
  ) -> Dict[clouds.CloudImplementationFeatures, str]:
62
64
  """The features not supported based on the resources provided.
63
65
 
@@ -83,6 +85,7 @@ class DO(clouds.Cloud):
83
85
  use_spot: bool,
84
86
  region: Optional[str],
85
87
  zone: Optional[str],
88
+ resources: Optional['resources_lib.Resources'] = None,
86
89
  ) -> List[clouds.Region]:
87
90
  assert zone is None, 'DO does not support zones.'
88
91
  del accelerators, zone # unused
@@ -283,18 +286,17 @@ class DO(clouds.Cloud):
283
286
  """Verify that the user has valid credentials for
284
287
  DO's compute service."""
285
288
 
286
- try:
287
- do.exceptions()
288
- except ImportError as err:
289
- return False, str(err)
289
+ installed, err_msg = do.check_exceptions_dependencies_installed()
290
+ if not installed:
291
+ return False, err_msg
290
292
 
291
293
  try:
292
294
  # attempt to make a CURL request for listing instances
293
295
  do_utils.client().droplets.list()
294
- except do.exceptions().HttpResponseError as err:
295
- return False, str(err)
296
296
  except do_utils.DigitalOceanError as err:
297
297
  return False, str(err)
298
+ except do.exceptions().HttpResponseError as err:
299
+ return False, str(err)
298
300
 
299
301
  return True, None
300
302
 
sky/clouds/fluidstack.py CHANGED
@@ -73,7 +73,9 @@ class Fluidstack(clouds.Cloud):
73
73
 
74
74
  @classmethod
75
75
  def _unsupported_features_for_resources(
76
- cls, resources: 'resources_lib.Resources'
76
+ cls,
77
+ resources: 'resources_lib.Resources',
78
+ region: Optional[str] = None,
77
79
  ) -> Dict[clouds.CloudImplementationFeatures, str]:
78
80
  """The features not supported based on the resources provided.
79
81
 
@@ -92,10 +94,15 @@ class Fluidstack(clouds.Cloud):
92
94
  return cls._MAX_CLUSTER_NAME_LEN_LIMIT
93
95
 
94
96
  @classmethod
95
- def regions_with_offering(cls, instance_type: str,
96
- accelerators: Optional[Dict[str, int]],
97
- use_spot: bool, region: Optional[str],
98
- zone: Optional[str]) -> List[clouds.Region]:
97
+ def regions_with_offering(
98
+ cls,
99
+ instance_type: str,
100
+ accelerators: Optional[Dict[str, int]],
101
+ use_spot: bool,
102
+ region: Optional[str],
103
+ zone: Optional[str],
104
+ resources: Optional['resources_lib.Resources'] = None,
105
+ ) -> List[clouds.Region]:
99
106
  assert zone is None, 'FluidStack does not support zones.'
100
107
  del accelerators, zone # unused
101
108
  if use_spot:
sky/clouds/gcp.py CHANGED
@@ -211,7 +211,9 @@ class GCP(clouds.Cloud):
211
211
 
212
212
  @classmethod
213
213
  def _unsupported_features_for_resources(
214
- cls, resources: 'resources.Resources'
214
+ cls,
215
+ resources: 'resources.Resources',
216
+ region: Optional[str] = None,
215
217
  ) -> Dict[clouds.CloudImplementationFeatures, str]:
216
218
  unsupported = {}
217
219
  if gcp_utils.is_tpu_vm_pod(resources):
@@ -255,10 +257,15 @@ class GCP(clouds.Cloud):
255
257
 
256
258
  #### Regions/Zones ####
257
259
  @classmethod
258
- def regions_with_offering(cls, instance_type: str,
259
- accelerators: Optional[Dict[str, int]],
260
- use_spot: bool, region: Optional[str],
261
- zone: Optional[str]) -> List[clouds.Region]:
260
+ def regions_with_offering(
261
+ cls,
262
+ instance_type: str,
263
+ accelerators: Optional[Dict[str, int]],
264
+ use_spot: bool,
265
+ region: Optional[str],
266
+ zone: Optional[str],
267
+ resources: Optional['resources.Resources'] = None,
268
+ ) -> List[clouds.Region]:
262
269
  if accelerators is None:
263
270
  regions = catalog.get_region_zones_for_instance_type(instance_type,
264
271
  use_spot,
@@ -1179,8 +1186,8 @@ class GCP(clouds.Cloud):
1179
1186
  # These series don't support pd-standard, use pd-balanced for LOW.
1180
1187
  _propagate_disk_type(
1181
1188
  lowest=tier2name[resources_utils.DiskTier.MEDIUM])
1182
- if instance_type.startswith('a3-ultragpu') or series == 'n4':
1183
- # a3-ultragpu instances only support hyperdisk-balanced.
1189
+ if instance_type.startswith('a3-ultragpu') or series in ('n4', 'a4'):
1190
+ # a3-ultragpu, n4, and a4 instances only support hyperdisk-balanced.
1184
1191
  _propagate_disk_type(all='hyperdisk-balanced')
1185
1192
 
1186
1193
  # Series specific handling
sky/clouds/hyperbolic.py CHANGED
@@ -65,7 +65,9 @@ class Hyperbolic(clouds.Cloud):
65
65
 
66
66
  @classmethod
67
67
  def _unsupported_features_for_resources(
68
- cls, resources: 'resources_lib.Resources'
68
+ cls,
69
+ resources: 'resources_lib.Resources',
70
+ region: Optional[str] = None,
69
71
  ) -> Dict[clouds.CloudImplementationFeatures, str]:
70
72
  del resources
71
73
  return cls._CLOUD_UNSUPPORTED_FEATURES
@@ -78,10 +80,15 @@ class Hyperbolic(clouds.Cloud):
78
80
  return catalog.instance_type_exists(instance_type, 'hyperbolic')
79
81
 
80
82
  @classmethod
81
- def regions_with_offering(cls, instance_type: str,
82
- accelerators: Optional[Dict[str, int]],
83
- use_spot: bool, region: Optional[str],
84
- zone: Optional[str]) -> List[clouds.Region]:
83
+ def regions_with_offering(
84
+ cls,
85
+ instance_type: str,
86
+ accelerators: Optional[Dict[str, int]],
87
+ use_spot: bool,
88
+ region: Optional[str],
89
+ zone: Optional[str],
90
+ resources: Optional['resources_lib.Resources'] = None,
91
+ ) -> List[clouds.Region]:
85
92
  assert zone is None, 'Hyperbolic does not support zones.'
86
93
  del accelerators, zone # unused
87
94
 
sky/clouds/ibm.py CHANGED
@@ -37,7 +37,9 @@ class IBM(clouds.Cloud):
37
37
 
38
38
  @classmethod
39
39
  def _unsupported_features_for_resources(
40
- cls, resources: 'resources_lib.Resources'
40
+ cls,
41
+ resources: 'resources_lib.Resources',
42
+ region: Optional[str] = None,
41
43
  ) -> Dict[clouds.CloudImplementationFeatures, str]:
42
44
  features = {
43
45
  clouds.CloudImplementationFeatures.CLONE_DISK_FROM_CLUSTER:
@@ -68,10 +70,15 @@ class IBM(clouds.Cloud):
68
70
  return cls._MAX_CLUSTER_NAME_LEN_LIMIT
69
71
 
70
72
  @classmethod
71
- def regions_with_offering(cls, instance_type: str,
72
- accelerators: Optional[Dict[str, int]],
73
- use_spot: bool, region: Optional[str],
74
- zone: Optional[str]) -> List[clouds.Region]:
73
+ def regions_with_offering(
74
+ cls,
75
+ instance_type: str,
76
+ accelerators: Optional[Dict[str, int]],
77
+ use_spot: bool,
78
+ region: Optional[str],
79
+ zone: Optional[str],
80
+ resources: Optional['resources_lib.Resources'] = None,
81
+ ) -> List[clouds.Region]:
75
82
  del accelerators # unused
76
83
  if use_spot:
77
84
  return []