skypilot-nightly 1.0.0.dev20250905__py3-none-any.whl → 1.0.0.dev20251210__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (429) hide show
  1. sky/__init__.py +12 -2
  2. sky/adaptors/aws.py +27 -22
  3. sky/adaptors/common.py +25 -2
  4. sky/adaptors/coreweave.py +278 -0
  5. sky/adaptors/do.py +8 -2
  6. sky/adaptors/gcp.py +11 -0
  7. sky/adaptors/ibm.py +5 -2
  8. sky/adaptors/kubernetes.py +64 -0
  9. sky/adaptors/nebius.py +3 -1
  10. sky/adaptors/primeintellect.py +1 -0
  11. sky/adaptors/seeweb.py +183 -0
  12. sky/adaptors/shadeform.py +89 -0
  13. sky/adaptors/slurm.py +478 -0
  14. sky/admin_policy.py +20 -0
  15. sky/authentication.py +157 -263
  16. sky/backends/__init__.py +3 -2
  17. sky/backends/backend.py +11 -3
  18. sky/backends/backend_utils.py +630 -185
  19. sky/backends/cloud_vm_ray_backend.py +1111 -928
  20. sky/backends/local_docker_backend.py +9 -5
  21. sky/backends/task_codegen.py +971 -0
  22. sky/backends/wheel_utils.py +18 -0
  23. sky/catalog/__init__.py +8 -3
  24. sky/catalog/aws_catalog.py +4 -0
  25. sky/catalog/common.py +19 -1
  26. sky/catalog/data_fetchers/fetch_aws.py +102 -80
  27. sky/catalog/data_fetchers/fetch_gcp.py +30 -3
  28. sky/catalog/data_fetchers/fetch_nebius.py +9 -6
  29. sky/catalog/data_fetchers/fetch_runpod.py +698 -0
  30. sky/catalog/data_fetchers/fetch_seeweb.py +329 -0
  31. sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
  32. sky/catalog/kubernetes_catalog.py +36 -32
  33. sky/catalog/primeintellect_catalog.py +95 -0
  34. sky/catalog/runpod_catalog.py +5 -1
  35. sky/catalog/seeweb_catalog.py +184 -0
  36. sky/catalog/shadeform_catalog.py +165 -0
  37. sky/catalog/slurm_catalog.py +243 -0
  38. sky/check.py +87 -46
  39. sky/client/cli/command.py +1004 -434
  40. sky/client/cli/flags.py +4 -2
  41. sky/{volumes/utils.py → client/cli/table_utils.py} +111 -13
  42. sky/client/cli/utils.py +79 -0
  43. sky/client/common.py +12 -2
  44. sky/client/sdk.py +188 -65
  45. sky/client/sdk_async.py +34 -33
  46. sky/cloud_stores.py +82 -3
  47. sky/clouds/__init__.py +8 -0
  48. sky/clouds/aws.py +337 -129
  49. sky/clouds/azure.py +24 -18
  50. sky/clouds/cloud.py +47 -13
  51. sky/clouds/cudo.py +16 -13
  52. sky/clouds/do.py +9 -7
  53. sky/clouds/fluidstack.py +12 -5
  54. sky/clouds/gcp.py +14 -7
  55. sky/clouds/hyperbolic.py +12 -5
  56. sky/clouds/ibm.py +12 -5
  57. sky/clouds/kubernetes.py +80 -45
  58. sky/clouds/lambda_cloud.py +12 -5
  59. sky/clouds/nebius.py +23 -9
  60. sky/clouds/oci.py +19 -12
  61. sky/clouds/paperspace.py +4 -1
  62. sky/clouds/primeintellect.py +317 -0
  63. sky/clouds/runpod.py +85 -24
  64. sky/clouds/scp.py +12 -8
  65. sky/clouds/seeweb.py +477 -0
  66. sky/clouds/shadeform.py +400 -0
  67. sky/clouds/slurm.py +578 -0
  68. sky/clouds/ssh.py +6 -3
  69. sky/clouds/utils/scp_utils.py +61 -50
  70. sky/clouds/vast.py +43 -27
  71. sky/clouds/vsphere.py +14 -16
  72. sky/core.py +296 -195
  73. sky/dashboard/out/404.html +1 -1
  74. sky/dashboard/out/_next/static/KYAhEFa3FTfq4JyKVgo-s/_buildManifest.js +1 -0
  75. sky/dashboard/out/_next/static/chunks/1141-9c810f01ff4f398a.js +11 -0
  76. sky/dashboard/out/_next/static/chunks/1871-7e202677c42f43fe.js +6 -0
  77. sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +1 -0
  78. sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +15 -0
  79. sky/dashboard/out/_next/static/chunks/2755.edd818326d489a1d.js +26 -0
  80. sky/dashboard/out/_next/static/chunks/3294.ddda8c6c6f9f24dc.js +1 -0
  81. sky/dashboard/out/_next/static/chunks/3785.7e245f318f9d1121.js +1 -0
  82. sky/dashboard/out/_next/static/chunks/{6601-06114c982db410b6.js → 3800-b589397dc09c5b4e.js} +1 -1
  83. sky/dashboard/out/_next/static/chunks/3850-fd5696f3bbbaddae.js +1 -0
  84. sky/dashboard/out/_next/static/chunks/4725.172ede95d1b21022.js +1 -0
  85. sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +15 -0
  86. sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +13 -0
  87. sky/dashboard/out/_next/static/chunks/6856-da20c5fd999f319c.js +1 -0
  88. sky/dashboard/out/_next/static/chunks/6990-09cbf02d3cd518c3.js +1 -0
  89. sky/dashboard/out/_next/static/chunks/7359-c8d04e06886000b3.js +30 -0
  90. sky/dashboard/out/_next/static/chunks/7615-019513abc55b3b47.js +1 -0
  91. sky/dashboard/out/_next/static/chunks/8640.5b9475a2d18c5416.js +16 -0
  92. sky/dashboard/out/_next/static/chunks/8969-452f9d5cbdd2dc73.js +1 -0
  93. sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +6 -0
  94. sky/dashboard/out/_next/static/chunks/9353-8369df1cf105221c.js +1 -0
  95. sky/dashboard/out/_next/static/chunks/9360.a536cf6b1fa42355.js +31 -0
  96. sky/dashboard/out/_next/static/chunks/9847.3aaca6bb33455140.js +30 -0
  97. sky/dashboard/out/_next/static/chunks/pages/_app-68b647e26f9d2793.js +34 -0
  98. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-33f525539665fdfd.js +16 -0
  99. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-a7565f586ef86467.js +1 -0
  100. sky/dashboard/out/_next/static/chunks/pages/clusters-9e5d47818b9bdadd.js +1 -0
  101. sky/dashboard/out/_next/static/chunks/pages/{config-dfb9bf07b13045f4.js → config-718cdc365de82689.js} +1 -1
  102. sky/dashboard/out/_next/static/chunks/pages/infra/{[context]-6563820e094f68ca.js → [context]-12c559ec4d81fdbd.js} +1 -1
  103. sky/dashboard/out/_next/static/chunks/pages/{infra-aabba60d57826e0f.js → infra-d187cd0413d72475.js} +1 -1
  104. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-895847b6cf200b04.js +16 -0
  105. sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-8d0f4655400b4eb9.js +21 -0
  106. sky/dashboard/out/_next/static/chunks/pages/jobs-e5a98f17f8513a96.js +1 -0
  107. sky/dashboard/out/_next/static/chunks/pages/plugins/[...slug]-4f46050ca065d8f8.js +1 -0
  108. sky/dashboard/out/_next/static/chunks/pages/users-2f7646eb77785a2c.js +1 -0
  109. sky/dashboard/out/_next/static/chunks/pages/volumes-ef19d49c6d0e8500.js +1 -0
  110. sky/dashboard/out/_next/static/chunks/pages/workspaces/{[name]-af76bb06dbb3954f.js → [name]-96e0f298308da7e2.js} +1 -1
  111. sky/dashboard/out/_next/static/chunks/pages/{workspaces-7598c33a746cdc91.js → workspaces-cb4da3abe08ebf19.js} +1 -1
  112. sky/dashboard/out/_next/static/chunks/webpack-fba3de387ff6bb08.js +1 -0
  113. sky/dashboard/out/_next/static/css/c5a4cfd2600fc715.css +3 -0
  114. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  115. sky/dashboard/out/clusters/[cluster].html +1 -1
  116. sky/dashboard/out/clusters.html +1 -1
  117. sky/dashboard/out/config.html +1 -1
  118. sky/dashboard/out/index.html +1 -1
  119. sky/dashboard/out/infra/[context].html +1 -1
  120. sky/dashboard/out/infra.html +1 -1
  121. sky/dashboard/out/jobs/[job].html +1 -1
  122. sky/dashboard/out/jobs/pools/[pool].html +1 -1
  123. sky/dashboard/out/jobs.html +1 -1
  124. sky/dashboard/out/plugins/[...slug].html +1 -0
  125. sky/dashboard/out/users.html +1 -1
  126. sky/dashboard/out/volumes.html +1 -1
  127. sky/dashboard/out/workspace/new.html +1 -1
  128. sky/dashboard/out/workspaces/[name].html +1 -1
  129. sky/dashboard/out/workspaces.html +1 -1
  130. sky/data/data_utils.py +92 -1
  131. sky/data/mounting_utils.py +177 -30
  132. sky/data/storage.py +200 -19
  133. sky/data/storage_utils.py +10 -45
  134. sky/exceptions.py +18 -7
  135. sky/execution.py +74 -31
  136. sky/global_user_state.py +605 -191
  137. sky/jobs/__init__.py +2 -0
  138. sky/jobs/client/sdk.py +101 -4
  139. sky/jobs/client/sdk_async.py +31 -5
  140. sky/jobs/constants.py +15 -8
  141. sky/jobs/controller.py +726 -284
  142. sky/jobs/file_content_utils.py +128 -0
  143. sky/jobs/log_gc.py +193 -0
  144. sky/jobs/recovery_strategy.py +250 -100
  145. sky/jobs/scheduler.py +271 -173
  146. sky/jobs/server/core.py +367 -114
  147. sky/jobs/server/server.py +81 -35
  148. sky/jobs/server/utils.py +89 -35
  149. sky/jobs/state.py +1498 -620
  150. sky/jobs/utils.py +771 -306
  151. sky/logs/agent.py +40 -5
  152. sky/logs/aws.py +9 -19
  153. sky/metrics/utils.py +282 -39
  154. sky/models.py +2 -0
  155. sky/optimizer.py +7 -6
  156. sky/provision/__init__.py +38 -1
  157. sky/provision/aws/config.py +34 -13
  158. sky/provision/aws/instance.py +5 -2
  159. sky/provision/azure/instance.py +5 -3
  160. sky/provision/common.py +22 -0
  161. sky/provision/cudo/instance.py +4 -3
  162. sky/provision/do/instance.py +4 -3
  163. sky/provision/docker_utils.py +112 -28
  164. sky/provision/fluidstack/instance.py +6 -5
  165. sky/provision/gcp/config.py +6 -1
  166. sky/provision/gcp/instance.py +4 -2
  167. sky/provision/hyperbolic/instance.py +4 -2
  168. sky/provision/instance_setup.py +66 -20
  169. sky/provision/kubernetes/__init__.py +2 -0
  170. sky/provision/kubernetes/config.py +7 -44
  171. sky/provision/kubernetes/constants.py +0 -1
  172. sky/provision/kubernetes/instance.py +609 -213
  173. sky/provision/kubernetes/manifests/fusermount-server-daemonset.yaml +1 -2
  174. sky/provision/kubernetes/network.py +12 -8
  175. sky/provision/kubernetes/network_utils.py +8 -25
  176. sky/provision/kubernetes/utils.py +422 -422
  177. sky/provision/kubernetes/volume.py +150 -18
  178. sky/provision/lambda_cloud/instance.py +16 -13
  179. sky/provision/nebius/instance.py +6 -2
  180. sky/provision/nebius/utils.py +103 -86
  181. sky/provision/oci/instance.py +4 -2
  182. sky/provision/paperspace/instance.py +4 -3
  183. sky/provision/primeintellect/__init__.py +10 -0
  184. sky/provision/primeintellect/config.py +11 -0
  185. sky/provision/primeintellect/instance.py +454 -0
  186. sky/provision/primeintellect/utils.py +398 -0
  187. sky/provision/provisioner.py +45 -15
  188. sky/provision/runpod/__init__.py +2 -0
  189. sky/provision/runpod/instance.py +4 -3
  190. sky/provision/runpod/volume.py +69 -13
  191. sky/provision/scp/instance.py +307 -130
  192. sky/provision/seeweb/__init__.py +11 -0
  193. sky/provision/seeweb/config.py +13 -0
  194. sky/provision/seeweb/instance.py +812 -0
  195. sky/provision/shadeform/__init__.py +11 -0
  196. sky/provision/shadeform/config.py +12 -0
  197. sky/provision/shadeform/instance.py +351 -0
  198. sky/provision/shadeform/shadeform_utils.py +83 -0
  199. sky/provision/slurm/__init__.py +12 -0
  200. sky/provision/slurm/config.py +13 -0
  201. sky/provision/slurm/instance.py +572 -0
  202. sky/provision/slurm/utils.py +583 -0
  203. sky/provision/vast/instance.py +9 -4
  204. sky/provision/vast/utils.py +10 -6
  205. sky/provision/volume.py +164 -0
  206. sky/provision/vsphere/common/ssl_helper.py +1 -1
  207. sky/provision/vsphere/common/vapiconnect.py +2 -1
  208. sky/provision/vsphere/common/vim_utils.py +3 -2
  209. sky/provision/vsphere/instance.py +8 -6
  210. sky/provision/vsphere/vsphere_utils.py +8 -1
  211. sky/resources.py +11 -3
  212. sky/schemas/api/responses.py +107 -6
  213. sky/schemas/db/global_user_state/008_skylet_ssh_tunnel_metadata.py +34 -0
  214. sky/schemas/db/global_user_state/009_last_activity_and_launched_at.py +89 -0
  215. sky/schemas/db/global_user_state/010_save_ssh_key.py +66 -0
  216. sky/schemas/db/global_user_state/011_is_ephemeral.py +34 -0
  217. sky/schemas/db/kv_cache/001_initial_schema.py +29 -0
  218. sky/schemas/db/serve_state/002_yaml_content.py +34 -0
  219. sky/schemas/db/skypilot_config/001_initial_schema.py +30 -0
  220. sky/schemas/db/spot_jobs/002_cluster_pool.py +3 -3
  221. sky/schemas/db/spot_jobs/004_job_file_contents.py +42 -0
  222. sky/schemas/db/spot_jobs/005_logs_gc.py +38 -0
  223. sky/schemas/db/spot_jobs/006_controller_pid_started_at.py +34 -0
  224. sky/schemas/db/spot_jobs/007_config_file_content.py +34 -0
  225. sky/schemas/generated/jobsv1_pb2.py +86 -0
  226. sky/schemas/generated/jobsv1_pb2.pyi +254 -0
  227. sky/schemas/generated/jobsv1_pb2_grpc.py +542 -0
  228. sky/schemas/generated/managed_jobsv1_pb2.py +76 -0
  229. sky/schemas/generated/managed_jobsv1_pb2.pyi +278 -0
  230. sky/schemas/generated/managed_jobsv1_pb2_grpc.py +278 -0
  231. sky/schemas/generated/servev1_pb2.py +58 -0
  232. sky/schemas/generated/servev1_pb2.pyi +115 -0
  233. sky/schemas/generated/servev1_pb2_grpc.py +322 -0
  234. sky/serve/autoscalers.py +2 -0
  235. sky/serve/client/impl.py +55 -21
  236. sky/serve/constants.py +4 -3
  237. sky/serve/controller.py +17 -11
  238. sky/serve/load_balancing_policies.py +1 -1
  239. sky/serve/replica_managers.py +219 -142
  240. sky/serve/serve_rpc_utils.py +179 -0
  241. sky/serve/serve_state.py +63 -54
  242. sky/serve/serve_utils.py +145 -109
  243. sky/serve/server/core.py +46 -25
  244. sky/serve/server/impl.py +311 -162
  245. sky/serve/server/server.py +21 -19
  246. sky/serve/service.py +84 -68
  247. sky/serve/service_spec.py +45 -7
  248. sky/server/auth/loopback.py +38 -0
  249. sky/server/auth/oauth2_proxy.py +12 -7
  250. sky/server/common.py +47 -24
  251. sky/server/config.py +62 -28
  252. sky/server/constants.py +9 -1
  253. sky/server/daemons.py +109 -38
  254. sky/server/metrics.py +76 -96
  255. sky/server/middleware_utils.py +166 -0
  256. sky/server/plugins.py +222 -0
  257. sky/server/requests/executor.py +384 -145
  258. sky/server/requests/payloads.py +83 -19
  259. sky/server/requests/preconditions.py +15 -13
  260. sky/server/requests/request_names.py +123 -0
  261. sky/server/requests/requests.py +511 -157
  262. sky/server/requests/serializers/decoders.py +48 -17
  263. sky/server/requests/serializers/encoders.py +102 -20
  264. sky/server/requests/serializers/return_value_serializers.py +60 -0
  265. sky/server/requests/threads.py +117 -0
  266. sky/server/rest.py +116 -24
  267. sky/server/server.py +497 -179
  268. sky/server/server_utils.py +30 -0
  269. sky/server/stream_utils.py +219 -45
  270. sky/server/uvicorn.py +30 -19
  271. sky/setup_files/MANIFEST.in +6 -1
  272. sky/setup_files/alembic.ini +8 -0
  273. sky/setup_files/dependencies.py +64 -19
  274. sky/setup_files/setup.py +44 -44
  275. sky/sky_logging.py +13 -5
  276. sky/skylet/attempt_skylet.py +116 -24
  277. sky/skylet/configs.py +3 -1
  278. sky/skylet/constants.py +139 -29
  279. sky/skylet/events.py +74 -14
  280. sky/skylet/executor/__init__.py +1 -0
  281. sky/skylet/executor/slurm.py +189 -0
  282. sky/skylet/job_lib.py +143 -105
  283. sky/skylet/log_lib.py +252 -8
  284. sky/skylet/log_lib.pyi +47 -7
  285. sky/skylet/providers/ibm/node_provider.py +12 -8
  286. sky/skylet/providers/ibm/vpc_provider.py +13 -12
  287. sky/skylet/runtime_utils.py +21 -0
  288. sky/skylet/services.py +524 -0
  289. sky/skylet/skylet.py +27 -2
  290. sky/skylet/subprocess_daemon.py +104 -28
  291. sky/skypilot_config.py +99 -79
  292. sky/ssh_node_pools/constants.py +12 -0
  293. sky/ssh_node_pools/core.py +40 -3
  294. sky/ssh_node_pools/deploy/__init__.py +4 -0
  295. sky/ssh_node_pools/deploy/deploy.py +952 -0
  296. sky/ssh_node_pools/deploy/tunnel_utils.py +199 -0
  297. sky/ssh_node_pools/deploy/utils.py +173 -0
  298. sky/ssh_node_pools/server.py +20 -21
  299. sky/{utils/kubernetes/ssh_utils.py → ssh_node_pools/utils.py} +9 -6
  300. sky/task.py +221 -104
  301. sky/templates/aws-ray.yml.j2 +1 -0
  302. sky/templates/azure-ray.yml.j2 +1 -0
  303. sky/templates/cudo-ray.yml.j2 +1 -0
  304. sky/templates/do-ray.yml.j2 +1 -0
  305. sky/templates/fluidstack-ray.yml.j2 +1 -0
  306. sky/templates/gcp-ray.yml.j2 +1 -0
  307. sky/templates/hyperbolic-ray.yml.j2 +1 -0
  308. sky/templates/ibm-ray.yml.j2 +2 -1
  309. sky/templates/jobs-controller.yaml.j2 +3 -0
  310. sky/templates/kubernetes-ray.yml.j2 +204 -55
  311. sky/templates/lambda-ray.yml.j2 +1 -0
  312. sky/templates/nebius-ray.yml.j2 +3 -0
  313. sky/templates/oci-ray.yml.j2 +1 -0
  314. sky/templates/paperspace-ray.yml.j2 +1 -0
  315. sky/templates/primeintellect-ray.yml.j2 +72 -0
  316. sky/templates/runpod-ray.yml.j2 +1 -0
  317. sky/templates/scp-ray.yml.j2 +1 -0
  318. sky/templates/seeweb-ray.yml.j2 +171 -0
  319. sky/templates/shadeform-ray.yml.j2 +73 -0
  320. sky/templates/slurm-ray.yml.j2 +85 -0
  321. sky/templates/vast-ray.yml.j2 +2 -0
  322. sky/templates/vsphere-ray.yml.j2 +1 -0
  323. sky/templates/websocket_proxy.py +188 -43
  324. sky/usage/usage_lib.py +16 -4
  325. sky/users/model.conf +1 -1
  326. sky/users/permission.py +84 -44
  327. sky/users/rbac.py +31 -3
  328. sky/utils/accelerator_registry.py +6 -3
  329. sky/utils/admin_policy_utils.py +18 -5
  330. sky/utils/annotations.py +128 -6
  331. sky/utils/asyncio_utils.py +78 -0
  332. sky/utils/atomic.py +1 -1
  333. sky/utils/auth_utils.py +153 -0
  334. sky/utils/cli_utils/status_utils.py +12 -7
  335. sky/utils/cluster_utils.py +28 -6
  336. sky/utils/command_runner.py +283 -30
  337. sky/utils/command_runner.pyi +63 -7
  338. sky/utils/common.py +3 -1
  339. sky/utils/common_utils.py +55 -7
  340. sky/utils/config_utils.py +1 -14
  341. sky/utils/context.py +127 -40
  342. sky/utils/context_utils.py +73 -18
  343. sky/utils/controller_utils.py +229 -70
  344. sky/utils/db/db_utils.py +95 -18
  345. sky/utils/db/kv_cache.py +149 -0
  346. sky/utils/db/migration_utils.py +24 -7
  347. sky/utils/env_options.py +4 -0
  348. sky/utils/git.py +559 -1
  349. sky/utils/kubernetes/create_cluster.sh +15 -30
  350. sky/utils/kubernetes/delete_cluster.sh +10 -7
  351. sky/utils/kubernetes/generate_kind_config.py +6 -66
  352. sky/utils/kubernetes/gpu_labeler.py +13 -3
  353. sky/utils/kubernetes/k8s_gpu_labeler_job.yaml +2 -1
  354. sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml +16 -16
  355. sky/utils/kubernetes/kubernetes_deploy_utils.py +187 -260
  356. sky/utils/kubernetes/rsync_helper.sh +11 -3
  357. sky/utils/kubernetes/ssh-tunnel.sh +7 -376
  358. sky/utils/kubernetes_enums.py +7 -15
  359. sky/utils/lock_events.py +4 -4
  360. sky/utils/locks.py +128 -31
  361. sky/utils/log_utils.py +0 -319
  362. sky/utils/resource_checker.py +13 -10
  363. sky/utils/resources_utils.py +53 -29
  364. sky/utils/rich_utils.py +8 -4
  365. sky/utils/schemas.py +138 -52
  366. sky/utils/subprocess_utils.py +17 -4
  367. sky/utils/thread_utils.py +91 -0
  368. sky/utils/timeline.py +2 -1
  369. sky/utils/ux_utils.py +35 -1
  370. sky/utils/volume.py +88 -4
  371. sky/utils/yaml_utils.py +9 -0
  372. sky/volumes/client/sdk.py +48 -10
  373. sky/volumes/server/core.py +59 -22
  374. sky/volumes/server/server.py +46 -17
  375. sky/volumes/volume.py +54 -42
  376. sky/workspaces/core.py +57 -21
  377. sky/workspaces/server.py +13 -12
  378. sky_templates/README.md +3 -0
  379. sky_templates/__init__.py +3 -0
  380. sky_templates/ray/__init__.py +0 -0
  381. sky_templates/ray/start_cluster +183 -0
  382. sky_templates/ray/stop_cluster +75 -0
  383. {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251210.dist-info}/METADATA +343 -65
  384. skypilot_nightly-1.0.0.dev20251210.dist-info/RECORD +629 -0
  385. skypilot_nightly-1.0.0.dev20251210.dist-info/top_level.txt +2 -0
  386. sky/client/cli/git.py +0 -549
  387. sky/dashboard/out/_next/static/chunks/1121-408ed10b2f9fce17.js +0 -1
  388. sky/dashboard/out/_next/static/chunks/1141-943efc7aff0f0c06.js +0 -1
  389. sky/dashboard/out/_next/static/chunks/1836-37fede578e2da5f8.js +0 -40
  390. sky/dashboard/out/_next/static/chunks/3015-86cabed5d4669ad0.js +0 -1
  391. sky/dashboard/out/_next/static/chunks/3294.c80326aec9bfed40.js +0 -6
  392. sky/dashboard/out/_next/static/chunks/3785.4872a2f3aa489880.js +0 -1
  393. sky/dashboard/out/_next/static/chunks/3850-ff4a9a69d978632b.js +0 -1
  394. sky/dashboard/out/_next/static/chunks/4045.b30465273dc5e468.js +0 -21
  395. sky/dashboard/out/_next/static/chunks/4676-9da7fdbde90b5549.js +0 -10
  396. sky/dashboard/out/_next/static/chunks/4725.10f7a9a5d3ea8208.js +0 -1
  397. sky/dashboard/out/_next/static/chunks/5339.3fda4a4010ff4e06.js +0 -51
  398. sky/dashboard/out/_next/static/chunks/6135-4b4d5e824b7f9d3c.js +0 -1
  399. sky/dashboard/out/_next/static/chunks/649.b9d7f7d10c1b8c53.js +0 -45
  400. sky/dashboard/out/_next/static/chunks/6856-dca7962af4814e1b.js +0 -1
  401. sky/dashboard/out/_next/static/chunks/6990-08b2a1cae076a943.js +0 -1
  402. sky/dashboard/out/_next/static/chunks/7325.b4bc99ce0892dcd5.js +0 -6
  403. sky/dashboard/out/_next/static/chunks/754-d0da8ab45f9509e9.js +0 -18
  404. sky/dashboard/out/_next/static/chunks/7669.1f5d9a402bf5cc42.js +0 -36
  405. sky/dashboard/out/_next/static/chunks/8969-0be3036bf86f8256.js +0 -1
  406. sky/dashboard/out/_next/static/chunks/9025.c12318fb6a1a9093.js +0 -6
  407. sky/dashboard/out/_next/static/chunks/9037-fa1737818d0a0969.js +0 -6
  408. sky/dashboard/out/_next/static/chunks/pages/_app-ce361c6959bc2001.js +0 -34
  409. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-1cbba24bd1bd35f8.js +0 -16
  410. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-0b4b35dc1dfe046c.js +0 -16
  411. sky/dashboard/out/_next/static/chunks/pages/clusters-469814d711d63b1b.js +0 -1
  412. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-dd64309c3fe67ed2.js +0 -11
  413. sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-07349868f7905d37.js +0 -16
  414. sky/dashboard/out/_next/static/chunks/pages/jobs-1f70d9faa564804f.js +0 -1
  415. sky/dashboard/out/_next/static/chunks/pages/users-018bf31cda52e11b.js +0 -1
  416. sky/dashboard/out/_next/static/chunks/pages/volumes-739726d6b823f532.js +0 -1
  417. sky/dashboard/out/_next/static/chunks/webpack-4fe903277b57b523.js +0 -1
  418. sky/dashboard/out/_next/static/css/4614e06482d7309e.css +0 -3
  419. sky/dashboard/out/_next/static/mS-4qZPSkRuA1u-g2wQhg/_buildManifest.js +0 -1
  420. sky/templates/kubernetes-ssh-jump.yml.j2 +0 -94
  421. sky/utils/kubernetes/cleanup-tunnel.sh +0 -62
  422. sky/utils/kubernetes/deploy_remote_cluster.py +0 -1299
  423. sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +0 -191
  424. skypilot_nightly-1.0.0.dev20250905.dist-info/RECORD +0 -547
  425. skypilot_nightly-1.0.0.dev20250905.dist-info/top_level.txt +0 -1
  426. /sky/dashboard/out/_next/static/{mS-4qZPSkRuA1u-g2wQhg → KYAhEFa3FTfq4JyKVgo-s}/_ssgManifest.js +0 -0
  427. {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251210.dist-info}/WHEEL +0 -0
  428. {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251210.dist-info}/entry_points.txt +0 -0
  429. {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251210.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,199 @@
1
+ """Utilities to setup SSH Tunnel"""
2
+ import os
3
+ import random
4
+ import re
5
+ import subprocess
6
+ import sys
7
+ from typing import Set
8
+
9
+ import colorama
10
+
11
+ from sky import sky_logging
12
+ from sky.ssh_node_pools import constants
13
+ from sky.ssh_node_pools.deploy import utils as deploy_utils
14
+
15
+ logger = sky_logging.init_logger(__name__)
16
+
17
+ # Get the directory of this script
18
+ SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
19
+
20
+
21
+ def _get_used_localhost_ports() -> Set[int]:
22
+ """Get SSH port forwardings already in use on localhost"""
23
+ used_ports = set()
24
+
25
+ # Get ports from netstat (works on macOS and Linux)
26
+ try:
27
+ if sys.platform == 'darwin':
28
+ # macOS
29
+ result = subprocess.run(['netstat', '-an', '-p', 'tcp'],
30
+ capture_output=True,
31
+ text=True,
32
+ check=False)
33
+ else:
34
+ # Linux and other Unix-like systems
35
+ result = subprocess.run(['netstat', '-tln'],
36
+ capture_output=True,
37
+ text=True,
38
+ check=False)
39
+
40
+ if result.returncode == 0:
41
+ # Look for lines with 'localhost:<port>' or '127.0.0.1:<port>'
42
+ for line in result.stdout.splitlines():
43
+ if '127.0.0.1:' in line or 'localhost:' in line:
44
+ match = re.search(r':(64\d\d)\s', line)
45
+ if match:
46
+ port = int(match.group(1))
47
+ if 6400 <= port <= 6500: # Only consider our range
48
+ used_ports.add(port)
49
+ except (subprocess.SubprocessError, FileNotFoundError):
50
+ # If netstat fails, try another approach
51
+ pass
52
+
53
+ # Also check ports from existing kubeconfig entries
54
+ try:
55
+ result = subprocess.run([
56
+ 'kubectl', 'config', 'view', '-o',
57
+ 'jsonpath=\'{.clusters[*].cluster.server}\''
58
+ ],
59
+ capture_output=True,
60
+ text=True,
61
+ check=False)
62
+
63
+ if result.returncode == 0:
64
+ # Look for localhost URLs with ports
65
+ for url in result.stdout.split():
66
+ if 'localhost:' in url or '127.0.0.1:' in url:
67
+ match = re.search(r':(\d+)', url)
68
+ if match:
69
+ port = int(match.group(1))
70
+ if 6400 <= port <= 6500: # Only consider our range
71
+ used_ports.add(port)
72
+ except subprocess.SubprocessError:
73
+ pass
74
+
75
+ return used_ports
76
+
77
+
78
+ def get_available_port(start: int = 6443, end: int = 6499) -> int:
79
+ """Get an available port in the given range not used by other tunnels"""
80
+ used_ports = _get_used_localhost_ports()
81
+
82
+ # Try to use port 6443 first if available for the first cluster
83
+ if start == 6443 and start not in used_ports:
84
+ return start
85
+
86
+ # Otherwise find any available port in the range
87
+ available_ports = list(set(range(start, end + 1)) - used_ports)
88
+
89
+ if not available_ports:
90
+ # If all ports are used, pick a random one from our range
91
+ # (we'll terminate any existing connection in the setup)
92
+ return random.randint(start, end)
93
+
94
+ # Sort to get deterministic allocation
95
+ available_ports.sort()
96
+ return available_ports[0]
97
+
98
+
99
+ def setup_kubectl_ssh_tunnel(head_node,
100
+ ssh_user,
101
+ ssh_key,
102
+ context_name,
103
+ use_ssh_config=False):
104
+ """Set up kubeconfig exec credential plugin for SSH tunnel"""
105
+ logger.info(f'{colorama.Fore.YELLOW}➜ Setting up SSH tunnel for '
106
+ f'Kubernetes API access...{colorama.Style.RESET_ALL}')
107
+
108
+ # Get an available port for this cluster
109
+ port = get_available_port()
110
+
111
+ # Paths to scripts
112
+ tunnel_script = os.path.join(SCRIPT_DIR, 'tunnel', 'ssh-tunnel.sh')
113
+
114
+ # Make sure scripts are executable
115
+ os.chmod(tunnel_script, 0o755)
116
+
117
+ # Certificate files
118
+ client_cert_file = os.path.join(constants.NODE_POOLS_INFO_DIR,
119
+ f'{context_name}-cert.pem')
120
+ client_key_file = os.path.join(constants.NODE_POOLS_INFO_DIR,
121
+ f'{context_name}-key.pem')
122
+
123
+ # Update kubeconfig to use localhost with the selected port
124
+ deploy_utils.run_command([
125
+ 'kubectl', 'config', 'set-cluster', context_name,
126
+ f'--server=https://127.0.0.1:{port}', '--insecure-skip-tls-verify=true'
127
+ ])
128
+
129
+ # Build the exec args list based on auth method
130
+ exec_args = [
131
+ '--exec-command', tunnel_script, '--exec-api-version',
132
+ 'client.authentication.k8s.io/v1beta1'
133
+ ]
134
+
135
+ # Set credential TTL to force frequent tunnel checks
136
+ ttl_seconds = 30
137
+
138
+ # Verify if we have extracted certificate data files
139
+ has_cert_files = os.path.isfile(client_cert_file) and os.path.isfile(
140
+ client_key_file)
141
+ if has_cert_files:
142
+ logger.info(f'{colorama.Fore.GREEN}Client certificate data extracted '
143
+ 'and will be used for authentication'
144
+ f'{colorama.Style.RESET_ALL}')
145
+
146
+ if use_ssh_config:
147
+ deploy_utils.run_command(
148
+ ['kubectl', 'config', 'set-credentials', context_name] + exec_args +
149
+ [
150
+ '--exec-arg=--context', f'--exec-arg={context_name}',
151
+ '--exec-arg=--port', f'--exec-arg={port}', '--exec-arg=--ttl',
152
+ f'--exec-arg={ttl_seconds}', '--exec-arg=--use-ssh-config',
153
+ '--exec-arg=--host', f'--exec-arg={head_node}'
154
+ ])
155
+ else:
156
+ deploy_utils.run_command(
157
+ ['kubectl', 'config', 'set-credentials', context_name] + exec_args +
158
+ [
159
+ '--exec-arg=--context', f'--exec-arg={context_name}',
160
+ '--exec-arg=--port', f'--exec-arg={port}', '--exec-arg=--ttl',
161
+ f'--exec-arg={ttl_seconds}', '--exec-arg=--host',
162
+ f'--exec-arg={head_node}', '--exec-arg=--user',
163
+ f'--exec-arg={ssh_user}', '--exec-arg=--ssh-key',
164
+ f'--exec-arg={ssh_key}'
165
+ ])
166
+
167
+ logger.info(f'{colorama.Fore.GREEN}✔ SSH tunnel configured through '
168
+ 'kubectl credential plugin on port '
169
+ f'{port}{colorama.Style.RESET_ALL}')
170
+ logger.info('Your kubectl connection is now tunneled through SSH '
171
+ f'(port {port}).')
172
+ logger.info('This tunnel will be automatically established when needed.')
173
+ logger.info(f'Credential TTL set to {ttl_seconds}s to ensure tunnel '
174
+ 'health is checked frequently.')
175
+ return port
176
+
177
+
178
+ def cleanup_kubectl_ssh_tunnel(cluster_name, context_name):
179
+ """Clean up the SSH tunnel for a specific context"""
180
+ logger.info(f'{colorama.Fore.YELLOW}➜ Cleaning up SSH tunnel for '
181
+ f'`{cluster_name}`...{colorama.Style.RESET_ALL}')
182
+
183
+ # Path to cleanup script
184
+ cleanup_script = os.path.join(SCRIPT_DIR, 'tunnel', 'cleanup-tunnel.sh')
185
+
186
+ # Make sure script is executable
187
+ if os.path.exists(cleanup_script):
188
+ os.chmod(cleanup_script, 0o755)
189
+
190
+ # Run the cleanup script
191
+ subprocess.run([cleanup_script, context_name],
192
+ stdout=subprocess.DEVNULL,
193
+ stderr=subprocess.DEVNULL,
194
+ check=False)
195
+ logger.info(f'{colorama.Fore.GREEN}✔ SSH tunnel for `{cluster_name}` '
196
+ f'cleaned up.{colorama.Style.RESET_ALL}')
197
+ else:
198
+ logger.error(f'{colorama.Fore.YELLOW}Cleanup script not found: '
199
+ f'{cleanup_script}{colorama.Style.RESET_ALL}')
@@ -0,0 +1,173 @@
1
+ """Utilities for SSH Node Pools Deployment"""
2
+ import os
3
+ import subprocess
4
+ from typing import List, Optional
5
+
6
+ import colorama
7
+
8
+ from sky import sky_logging
9
+ from sky.utils import ux_utils
10
+
11
+ logger = sky_logging.init_logger(__name__)
12
+
13
+
14
+ def check_ssh_cluster_dependencies(
15
+ raise_error: bool = True) -> Optional[List[str]]:
16
+ """Checks if the dependencies for ssh cluster are installed.
17
+
18
+ Args:
19
+ raise_error: set to true when the dependency needs to be present.
20
+ set to false for `sky check`, where reason strings are compiled
21
+ at the end.
22
+
23
+ Returns: the reasons list if there are missing dependencies.
24
+ """
25
+ # error message
26
+ jq_message = ('`jq` is required to setup ssh cluster.')
27
+
28
+ # save
29
+ reasons = []
30
+ required_binaries = []
31
+
32
+ # Ensure jq is installed
33
+ try:
34
+ subprocess.run(['jq', '--version'],
35
+ stdout=subprocess.DEVNULL,
36
+ stderr=subprocess.DEVNULL,
37
+ check=True)
38
+ except (FileNotFoundError, subprocess.CalledProcessError):
39
+ required_binaries.append('jq')
40
+ reasons.append(jq_message)
41
+
42
+ if required_binaries:
43
+ reasons.extend([
44
+ 'On Debian/Ubuntu, install the missing dependenc(ies) with:',
45
+ f' $ sudo apt install {" ".join(required_binaries)}',
46
+ 'On MacOS, install with: ',
47
+ f' $ brew install {" ".join(required_binaries)}',
48
+ ])
49
+ if raise_error:
50
+ with ux_utils.print_exception_no_traceback():
51
+ raise RuntimeError('\n'.join(reasons))
52
+ return reasons
53
+ return None
54
+
55
+
56
+ def run_command(cmd, shell=False, silent=False):
57
+ """Run a local command and return the output."""
58
+ process = subprocess.run(cmd,
59
+ shell=shell,
60
+ capture_output=True,
61
+ text=True,
62
+ check=False)
63
+ if process.returncode != 0:
64
+ if not silent:
65
+ logger.error(f'{colorama.Fore.RED}Error executing command: {cmd}\n'
66
+ f'{colorama.Style.RESET_ALL}STDOUT: {process.stdout}\n'
67
+ f'STDERR: {process.stderr}')
68
+ return None
69
+ return process.stdout.strip()
70
+
71
+
72
+ def get_effective_host_ip(hostname: str) -> str:
73
+ """Get the effective IP for a hostname from SSH config."""
74
+ try:
75
+ result = subprocess.run(['ssh', '-G', hostname],
76
+ capture_output=True,
77
+ text=True,
78
+ check=False)
79
+ if result.returncode == 0:
80
+ for line in result.stdout.splitlines():
81
+ if line.startswith('hostname '):
82
+ return line.split(' ', 1)[1].strip()
83
+ except Exception: # pylint: disable=broad-except
84
+ pass
85
+ return hostname # Return the original hostname if lookup fails
86
+
87
+
88
+ def run_remote(node,
89
+ cmd,
90
+ user='',
91
+ ssh_key='',
92
+ connect_timeout=30,
93
+ use_ssh_config=False,
94
+ print_output=False,
95
+ use_shell=False,
96
+ silent=False):
97
+ """Run a command on a remote machine via SSH."""
98
+ ssh_cmd: List[str]
99
+ if use_ssh_config:
100
+ # Use SSH config for connection parameters
101
+ ssh_cmd = ['ssh', node, cmd]
102
+ else:
103
+ # Use explicit parameters
104
+ ssh_cmd = [
105
+ 'ssh', '-o', 'StrictHostKeyChecking=no', '-o', 'IdentitiesOnly=yes',
106
+ '-o', f'ConnectTimeout={connect_timeout}', '-o',
107
+ 'ServerAliveInterval=10', '-o', 'ServerAliveCountMax=3'
108
+ ]
109
+
110
+ if ssh_key:
111
+ if not os.path.isfile(ssh_key):
112
+ raise ValueError(f'SSH key not found: {ssh_key}')
113
+ ssh_cmd.extend(['-i', ssh_key])
114
+
115
+ ssh_cmd.append(f'{user}@{node}' if user else node)
116
+ ssh_cmd.append(cmd)
117
+
118
+ subprocess_cmd = ' '.join(ssh_cmd) if use_shell else ssh_cmd
119
+ process = subprocess.run(subprocess_cmd,
120
+ capture_output=True,
121
+ text=True,
122
+ check=False,
123
+ shell=use_shell)
124
+ if process.returncode != 0:
125
+ if not silent:
126
+ logger.error(f'{colorama.Fore.RED}Error executing command {cmd} on '
127
+ f'{node}:{colorama.Style.RESET_ALL} {process.stderr}')
128
+ return None
129
+ if print_output:
130
+ logger.info(process.stdout)
131
+ return process.stdout.strip()
132
+
133
+
134
+ def ensure_directory_exists(path):
135
+ """Ensure the directory for the specified file path exists."""
136
+ directory = os.path.dirname(path)
137
+ if directory and not os.path.exists(directory):
138
+ os.makedirs(directory, exist_ok=True)
139
+
140
+
141
+ def check_gpu(node, user, ssh_key, use_ssh_config=False, is_head=False):
142
+ """Check if a node has a GPU."""
143
+ cmd = ('command -v nvidia-smi &> /dev/null && '
144
+ 'nvidia-smi --query-gpu=gpu_name --format=csv,noheader')
145
+ result = run_remote(node,
146
+ cmd,
147
+ user,
148
+ ssh_key,
149
+ use_ssh_config=use_ssh_config,
150
+ silent=True)
151
+ if result is not None:
152
+ # Check that all GPUs have the same type.
153
+ # Currently, SkyPilot does not support heterogeneous GPU node
154
+ # (i.e. more than one GPU type on the same node).
155
+ gpu_names = {
156
+ line.strip() for line in result.splitlines() if line.strip()
157
+ }
158
+ if not gpu_names:
159
+ # This can happen if nvidia-smi returns only whitespace.
160
+ # Set result to None to ensure this function returns False.
161
+ result = None
162
+ elif len(gpu_names) > 1:
163
+ # Sort for a deterministic error message.
164
+ sorted_gpu_names = sorted(list(gpu_names))
165
+ raise RuntimeError(
166
+ f'Node {node} has more than one GPU types '
167
+ f'({", ".join(sorted_gpu_names)}). '
168
+ 'SkyPilot does not support a node with multiple GPU types.')
169
+ else:
170
+ logger.info(f'{colorama.Fore.YELLOW}➜ GPU {list(gpu_names)[0]} '
171
+ f'detected on {"head" if is_head else "worker"} '
172
+ f'node ({node}).{colorama.Style.RESET_ALL}')
173
+ return result is not None
@@ -4,11 +4,11 @@ from typing import Any, Dict, List
4
4
 
5
5
  import fastapi
6
6
 
7
- from sky import core as sky_core
8
7
  from sky.server.requests import executor
9
8
  from sky.server.requests import payloads
9
+ from sky.server.requests import request_names
10
10
  from sky.server.requests import requests as requests_lib
11
- from sky.ssh_node_pools import core as ssh_node_pools_core
11
+ from sky.ssh_node_pools import core
12
12
  from sky.utils import common_utils
13
13
 
14
14
  router = fastapi.APIRouter()
@@ -18,7 +18,7 @@ router = fastapi.APIRouter()
18
18
  def get_ssh_node_pools() -> Dict[str, Any]:
19
19
  """Get all SSH Node Pool configurations."""
20
20
  try:
21
- return ssh_node_pools_core.get_all_pools()
21
+ return core.get_all_pools()
22
22
  except Exception as e:
23
23
  raise fastapi.HTTPException(
24
24
  status_code=500,
@@ -30,7 +30,7 @@ def get_ssh_node_pools() -> Dict[str, Any]:
30
30
  def update_ssh_node_pools(pools_config: Dict[str, Any]) -> Dict[str, str]:
31
31
  """Update SSH Node Pool configurations."""
32
32
  try:
33
- ssh_node_pools_core.update_pools(pools_config)
33
+ core.update_pools(pools_config)
34
34
  return {'status': 'success'}
35
35
  except Exception as e:
36
36
  raise fastapi.HTTPException(status_code=400,
@@ -42,7 +42,7 @@ def update_ssh_node_pools(pools_config: Dict[str, Any]) -> Dict[str, str]:
42
42
  def delete_ssh_node_pool(pool_name: str) -> Dict[str, str]:
43
43
  """Delete a SSH Node Pool configuration."""
44
44
  try:
45
- if ssh_node_pools_core.delete_pool(pool_name):
45
+ if core.delete_pool(pool_name):
46
46
  return {'status': 'success'}
47
47
  else:
48
48
  raise fastapi.HTTPException(
@@ -69,8 +69,7 @@ async def upload_ssh_key(request: fastapi.Request) -> Dict[str, str]:
69
69
  detail='Missing key_name or key_file')
70
70
 
71
71
  key_content = await key_file.read()
72
- key_path = ssh_node_pools_core.upload_ssh_key(key_name,
73
- key_content.decode())
72
+ key_path = core.upload_ssh_key(key_name, key_content.decode())
74
73
 
75
74
  return {'status': 'success', 'key_path': key_path}
76
75
  except fastapi.HTTPException:
@@ -86,7 +85,7 @@ async def upload_ssh_key(request: fastapi.Request) -> Dict[str, str]:
86
85
  def list_ssh_keys() -> List[str]:
87
86
  """List available SSH keys."""
88
87
  try:
89
- return ssh_node_pools_core.list_ssh_keys()
88
+ return core.list_ssh_keys()
90
89
  except Exception as e:
91
90
  exception_msg = common_utils.format_exception(e)
92
91
  raise fastapi.HTTPException(
@@ -99,11 +98,11 @@ async def deploy_ssh_node_pool(request: fastapi.Request,
99
98
  """Deploy SSH Node Pool using existing ssh_up functionality."""
100
99
  try:
101
100
  ssh_up_body = payloads.SSHUpBody(infra=pool_name, cleanup=False)
102
- executor.schedule_request(
101
+ await executor.schedule_request_async(
103
102
  request_id=request.state.request_id,
104
- request_name='ssh_up',
103
+ request_name=request_names.RequestName.SSH_NODE_POOLS_UP,
105
104
  request_body=ssh_up_body,
106
- func=sky_core.ssh_up,
105
+ func=core.ssh_up,
107
106
  schedule_type=requests_lib.ScheduleType.LONG,
108
107
  )
109
108
 
@@ -124,11 +123,11 @@ async def deploy_ssh_node_pool_general(
124
123
  ssh_up_body: payloads.SSHUpBody) -> Dict[str, str]:
125
124
  """Deploys all SSH Node Pools."""
126
125
  try:
127
- executor.schedule_request(
126
+ await executor.schedule_request_async(
128
127
  request_id=request.state.request_id,
129
- request_name='ssh_up',
128
+ request_name=request_names.RequestName.SSH_NODE_POOLS_UP,
130
129
  request_body=ssh_up_body,
131
- func=sky_core.ssh_up,
130
+ func=core.ssh_up,
132
131
  schedule_type=requests_lib.ScheduleType.LONG,
133
132
  )
134
133
 
@@ -150,11 +149,11 @@ async def down_ssh_node_pool(request: fastapi.Request,
150
149
  """Cleans up a SSH Node Pools."""
151
150
  try:
152
151
  ssh_up_body = payloads.SSHUpBody(infra=pool_name, cleanup=True)
153
- executor.schedule_request(
152
+ await executor.schedule_request_async(
154
153
  request_id=request.state.request_id,
155
- request_name='ssh_down',
154
+ request_name=request_names.RequestName.SSH_NODE_POOLS_DOWN,
156
155
  request_body=ssh_up_body,
157
- func=sky_core.ssh_up, # Reuse ssh_up function with cleanup=True
156
+ func=core.ssh_up, # Reuse ssh_up function with cleanup=True
158
157
  schedule_type=requests_lib.ScheduleType.LONG,
159
158
  )
160
159
 
@@ -178,11 +177,11 @@ async def down_ssh_node_pool_general(
178
177
  try:
179
178
  # Set cleanup=True for down operation
180
179
  ssh_up_body.cleanup = True
181
- executor.schedule_request(
180
+ await executor.schedule_request_async(
182
181
  request_id=request.state.request_id,
183
- request_name='ssh_down',
182
+ request_name=request_names.RequestName.SSH_NODE_POOLS_DOWN,
184
183
  request_body=ssh_up_body,
185
- func=sky_core.ssh_up, # Reuse ssh_up function with cleanup=True
184
+ func=core.ssh_up, # Reuse ssh_up function with cleanup=True
186
185
  schedule_type=requests_lib.ScheduleType.LONG,
187
186
  )
188
187
 
@@ -205,7 +204,7 @@ def get_ssh_node_pool_status(pool_name: str) -> Dict[str, str]:
205
204
  try:
206
205
  # Call ssh_status to check the context
207
206
  context_name = f'ssh-{pool_name}'
208
- is_ready, reason = sky_core.ssh_status(context_name)
207
+ is_ready, reason = core.ssh_status(context_name)
209
208
 
210
209
  # Strip ANSI escape codes from the reason text
211
210
  def strip_ansi_codes(text):
@@ -5,13 +5,14 @@ import subprocess
5
5
  from typing import Any, Callable, Dict, List, Optional
6
6
  import uuid
7
7
 
8
+ import colorama
8
9
  import yaml
9
10
 
11
+ from sky import sky_logging
12
+ from sky.ssh_node_pools import constants
10
13
  from sky.utils import ux_utils
11
14
 
12
- DEFAULT_SSH_NODE_POOLS_PATH = os.path.expanduser('~/.sky/ssh_node_pools.yaml')
13
- RED = '\033[0;31m'
14
- NC = '\033[0m' # No color
15
+ logger = sky_logging.init_logger(__name__)
15
16
 
16
17
 
17
18
  def check_host_in_ssh_config(hostname: str) -> bool:
@@ -92,7 +93,8 @@ def load_ssh_targets(file_path: str) -> Dict[str, Any]:
92
93
  def get_cluster_config(
93
94
  targets: Dict[str, Any],
94
95
  cluster_name: Optional[str] = None,
95
- file_path: str = DEFAULT_SSH_NODE_POOLS_PATH) -> Dict[str, Any]:
96
+ file_path: str = constants.DEFAULT_SSH_NODE_POOLS_PATH
97
+ ) -> Dict[str, Any]:
96
98
  """Get configuration for specific clusters or all clusters."""
97
99
  if not targets:
98
100
  with ux_utils.print_exception_no_traceback():
@@ -186,8 +188,9 @@ def prepare_hosts_info(
186
188
  else:
187
189
  # It's a dict with potential overrides
188
190
  if 'ip' not in host:
189
- print(f'{RED}Warning: Host missing \'ip\' field, '
190
- f'skipping: {host}{NC}')
191
+ logger.warning(f'{colorama.Fore.RED}Warning: Host missing'
192
+ f'\'ip\' field, skipping: {host}'
193
+ f'{colorama.Style.RESET_ALL}')
191
194
  continue
192
195
 
193
196
  # Check if this is an SSH config hostname