skypilot-nightly 1.0.0.dev20250905__py3-none-any.whl → 1.0.0.dev20251210__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (429) hide show
  1. sky/__init__.py +12 -2
  2. sky/adaptors/aws.py +27 -22
  3. sky/adaptors/common.py +25 -2
  4. sky/adaptors/coreweave.py +278 -0
  5. sky/adaptors/do.py +8 -2
  6. sky/adaptors/gcp.py +11 -0
  7. sky/adaptors/ibm.py +5 -2
  8. sky/adaptors/kubernetes.py +64 -0
  9. sky/adaptors/nebius.py +3 -1
  10. sky/adaptors/primeintellect.py +1 -0
  11. sky/adaptors/seeweb.py +183 -0
  12. sky/adaptors/shadeform.py +89 -0
  13. sky/adaptors/slurm.py +478 -0
  14. sky/admin_policy.py +20 -0
  15. sky/authentication.py +157 -263
  16. sky/backends/__init__.py +3 -2
  17. sky/backends/backend.py +11 -3
  18. sky/backends/backend_utils.py +630 -185
  19. sky/backends/cloud_vm_ray_backend.py +1111 -928
  20. sky/backends/local_docker_backend.py +9 -5
  21. sky/backends/task_codegen.py +971 -0
  22. sky/backends/wheel_utils.py +18 -0
  23. sky/catalog/__init__.py +8 -3
  24. sky/catalog/aws_catalog.py +4 -0
  25. sky/catalog/common.py +19 -1
  26. sky/catalog/data_fetchers/fetch_aws.py +102 -80
  27. sky/catalog/data_fetchers/fetch_gcp.py +30 -3
  28. sky/catalog/data_fetchers/fetch_nebius.py +9 -6
  29. sky/catalog/data_fetchers/fetch_runpod.py +698 -0
  30. sky/catalog/data_fetchers/fetch_seeweb.py +329 -0
  31. sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
  32. sky/catalog/kubernetes_catalog.py +36 -32
  33. sky/catalog/primeintellect_catalog.py +95 -0
  34. sky/catalog/runpod_catalog.py +5 -1
  35. sky/catalog/seeweb_catalog.py +184 -0
  36. sky/catalog/shadeform_catalog.py +165 -0
  37. sky/catalog/slurm_catalog.py +243 -0
  38. sky/check.py +87 -46
  39. sky/client/cli/command.py +1004 -434
  40. sky/client/cli/flags.py +4 -2
  41. sky/{volumes/utils.py → client/cli/table_utils.py} +111 -13
  42. sky/client/cli/utils.py +79 -0
  43. sky/client/common.py +12 -2
  44. sky/client/sdk.py +188 -65
  45. sky/client/sdk_async.py +34 -33
  46. sky/cloud_stores.py +82 -3
  47. sky/clouds/__init__.py +8 -0
  48. sky/clouds/aws.py +337 -129
  49. sky/clouds/azure.py +24 -18
  50. sky/clouds/cloud.py +47 -13
  51. sky/clouds/cudo.py +16 -13
  52. sky/clouds/do.py +9 -7
  53. sky/clouds/fluidstack.py +12 -5
  54. sky/clouds/gcp.py +14 -7
  55. sky/clouds/hyperbolic.py +12 -5
  56. sky/clouds/ibm.py +12 -5
  57. sky/clouds/kubernetes.py +80 -45
  58. sky/clouds/lambda_cloud.py +12 -5
  59. sky/clouds/nebius.py +23 -9
  60. sky/clouds/oci.py +19 -12
  61. sky/clouds/paperspace.py +4 -1
  62. sky/clouds/primeintellect.py +317 -0
  63. sky/clouds/runpod.py +85 -24
  64. sky/clouds/scp.py +12 -8
  65. sky/clouds/seeweb.py +477 -0
  66. sky/clouds/shadeform.py +400 -0
  67. sky/clouds/slurm.py +578 -0
  68. sky/clouds/ssh.py +6 -3
  69. sky/clouds/utils/scp_utils.py +61 -50
  70. sky/clouds/vast.py +43 -27
  71. sky/clouds/vsphere.py +14 -16
  72. sky/core.py +296 -195
  73. sky/dashboard/out/404.html +1 -1
  74. sky/dashboard/out/_next/static/KYAhEFa3FTfq4JyKVgo-s/_buildManifest.js +1 -0
  75. sky/dashboard/out/_next/static/chunks/1141-9c810f01ff4f398a.js +11 -0
  76. sky/dashboard/out/_next/static/chunks/1871-7e202677c42f43fe.js +6 -0
  77. sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +1 -0
  78. sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +15 -0
  79. sky/dashboard/out/_next/static/chunks/2755.edd818326d489a1d.js +26 -0
  80. sky/dashboard/out/_next/static/chunks/3294.ddda8c6c6f9f24dc.js +1 -0
  81. sky/dashboard/out/_next/static/chunks/3785.7e245f318f9d1121.js +1 -0
  82. sky/dashboard/out/_next/static/chunks/{6601-06114c982db410b6.js → 3800-b589397dc09c5b4e.js} +1 -1
  83. sky/dashboard/out/_next/static/chunks/3850-fd5696f3bbbaddae.js +1 -0
  84. sky/dashboard/out/_next/static/chunks/4725.172ede95d1b21022.js +1 -0
  85. sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +15 -0
  86. sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +13 -0
  87. sky/dashboard/out/_next/static/chunks/6856-da20c5fd999f319c.js +1 -0
  88. sky/dashboard/out/_next/static/chunks/6990-09cbf02d3cd518c3.js +1 -0
  89. sky/dashboard/out/_next/static/chunks/7359-c8d04e06886000b3.js +30 -0
  90. sky/dashboard/out/_next/static/chunks/7615-019513abc55b3b47.js +1 -0
  91. sky/dashboard/out/_next/static/chunks/8640.5b9475a2d18c5416.js +16 -0
  92. sky/dashboard/out/_next/static/chunks/8969-452f9d5cbdd2dc73.js +1 -0
  93. sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +6 -0
  94. sky/dashboard/out/_next/static/chunks/9353-8369df1cf105221c.js +1 -0
  95. sky/dashboard/out/_next/static/chunks/9360.a536cf6b1fa42355.js +31 -0
  96. sky/dashboard/out/_next/static/chunks/9847.3aaca6bb33455140.js +30 -0
  97. sky/dashboard/out/_next/static/chunks/pages/_app-68b647e26f9d2793.js +34 -0
  98. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-33f525539665fdfd.js +16 -0
  99. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-a7565f586ef86467.js +1 -0
  100. sky/dashboard/out/_next/static/chunks/pages/clusters-9e5d47818b9bdadd.js +1 -0
  101. sky/dashboard/out/_next/static/chunks/pages/{config-dfb9bf07b13045f4.js → config-718cdc365de82689.js} +1 -1
  102. sky/dashboard/out/_next/static/chunks/pages/infra/{[context]-6563820e094f68ca.js → [context]-12c559ec4d81fdbd.js} +1 -1
  103. sky/dashboard/out/_next/static/chunks/pages/{infra-aabba60d57826e0f.js → infra-d187cd0413d72475.js} +1 -1
  104. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-895847b6cf200b04.js +16 -0
  105. sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-8d0f4655400b4eb9.js +21 -0
  106. sky/dashboard/out/_next/static/chunks/pages/jobs-e5a98f17f8513a96.js +1 -0
  107. sky/dashboard/out/_next/static/chunks/pages/plugins/[...slug]-4f46050ca065d8f8.js +1 -0
  108. sky/dashboard/out/_next/static/chunks/pages/users-2f7646eb77785a2c.js +1 -0
  109. sky/dashboard/out/_next/static/chunks/pages/volumes-ef19d49c6d0e8500.js +1 -0
  110. sky/dashboard/out/_next/static/chunks/pages/workspaces/{[name]-af76bb06dbb3954f.js → [name]-96e0f298308da7e2.js} +1 -1
  111. sky/dashboard/out/_next/static/chunks/pages/{workspaces-7598c33a746cdc91.js → workspaces-cb4da3abe08ebf19.js} +1 -1
  112. sky/dashboard/out/_next/static/chunks/webpack-fba3de387ff6bb08.js +1 -0
  113. sky/dashboard/out/_next/static/css/c5a4cfd2600fc715.css +3 -0
  114. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  115. sky/dashboard/out/clusters/[cluster].html +1 -1
  116. sky/dashboard/out/clusters.html +1 -1
  117. sky/dashboard/out/config.html +1 -1
  118. sky/dashboard/out/index.html +1 -1
  119. sky/dashboard/out/infra/[context].html +1 -1
  120. sky/dashboard/out/infra.html +1 -1
  121. sky/dashboard/out/jobs/[job].html +1 -1
  122. sky/dashboard/out/jobs/pools/[pool].html +1 -1
  123. sky/dashboard/out/jobs.html +1 -1
  124. sky/dashboard/out/plugins/[...slug].html +1 -0
  125. sky/dashboard/out/users.html +1 -1
  126. sky/dashboard/out/volumes.html +1 -1
  127. sky/dashboard/out/workspace/new.html +1 -1
  128. sky/dashboard/out/workspaces/[name].html +1 -1
  129. sky/dashboard/out/workspaces.html +1 -1
  130. sky/data/data_utils.py +92 -1
  131. sky/data/mounting_utils.py +177 -30
  132. sky/data/storage.py +200 -19
  133. sky/data/storage_utils.py +10 -45
  134. sky/exceptions.py +18 -7
  135. sky/execution.py +74 -31
  136. sky/global_user_state.py +605 -191
  137. sky/jobs/__init__.py +2 -0
  138. sky/jobs/client/sdk.py +101 -4
  139. sky/jobs/client/sdk_async.py +31 -5
  140. sky/jobs/constants.py +15 -8
  141. sky/jobs/controller.py +726 -284
  142. sky/jobs/file_content_utils.py +128 -0
  143. sky/jobs/log_gc.py +193 -0
  144. sky/jobs/recovery_strategy.py +250 -100
  145. sky/jobs/scheduler.py +271 -173
  146. sky/jobs/server/core.py +367 -114
  147. sky/jobs/server/server.py +81 -35
  148. sky/jobs/server/utils.py +89 -35
  149. sky/jobs/state.py +1498 -620
  150. sky/jobs/utils.py +771 -306
  151. sky/logs/agent.py +40 -5
  152. sky/logs/aws.py +9 -19
  153. sky/metrics/utils.py +282 -39
  154. sky/models.py +2 -0
  155. sky/optimizer.py +7 -6
  156. sky/provision/__init__.py +38 -1
  157. sky/provision/aws/config.py +34 -13
  158. sky/provision/aws/instance.py +5 -2
  159. sky/provision/azure/instance.py +5 -3
  160. sky/provision/common.py +22 -0
  161. sky/provision/cudo/instance.py +4 -3
  162. sky/provision/do/instance.py +4 -3
  163. sky/provision/docker_utils.py +112 -28
  164. sky/provision/fluidstack/instance.py +6 -5
  165. sky/provision/gcp/config.py +6 -1
  166. sky/provision/gcp/instance.py +4 -2
  167. sky/provision/hyperbolic/instance.py +4 -2
  168. sky/provision/instance_setup.py +66 -20
  169. sky/provision/kubernetes/__init__.py +2 -0
  170. sky/provision/kubernetes/config.py +7 -44
  171. sky/provision/kubernetes/constants.py +0 -1
  172. sky/provision/kubernetes/instance.py +609 -213
  173. sky/provision/kubernetes/manifests/fusermount-server-daemonset.yaml +1 -2
  174. sky/provision/kubernetes/network.py +12 -8
  175. sky/provision/kubernetes/network_utils.py +8 -25
  176. sky/provision/kubernetes/utils.py +422 -422
  177. sky/provision/kubernetes/volume.py +150 -18
  178. sky/provision/lambda_cloud/instance.py +16 -13
  179. sky/provision/nebius/instance.py +6 -2
  180. sky/provision/nebius/utils.py +103 -86
  181. sky/provision/oci/instance.py +4 -2
  182. sky/provision/paperspace/instance.py +4 -3
  183. sky/provision/primeintellect/__init__.py +10 -0
  184. sky/provision/primeintellect/config.py +11 -0
  185. sky/provision/primeintellect/instance.py +454 -0
  186. sky/provision/primeintellect/utils.py +398 -0
  187. sky/provision/provisioner.py +45 -15
  188. sky/provision/runpod/__init__.py +2 -0
  189. sky/provision/runpod/instance.py +4 -3
  190. sky/provision/runpod/volume.py +69 -13
  191. sky/provision/scp/instance.py +307 -130
  192. sky/provision/seeweb/__init__.py +11 -0
  193. sky/provision/seeweb/config.py +13 -0
  194. sky/provision/seeweb/instance.py +812 -0
  195. sky/provision/shadeform/__init__.py +11 -0
  196. sky/provision/shadeform/config.py +12 -0
  197. sky/provision/shadeform/instance.py +351 -0
  198. sky/provision/shadeform/shadeform_utils.py +83 -0
  199. sky/provision/slurm/__init__.py +12 -0
  200. sky/provision/slurm/config.py +13 -0
  201. sky/provision/slurm/instance.py +572 -0
  202. sky/provision/slurm/utils.py +583 -0
  203. sky/provision/vast/instance.py +9 -4
  204. sky/provision/vast/utils.py +10 -6
  205. sky/provision/volume.py +164 -0
  206. sky/provision/vsphere/common/ssl_helper.py +1 -1
  207. sky/provision/vsphere/common/vapiconnect.py +2 -1
  208. sky/provision/vsphere/common/vim_utils.py +3 -2
  209. sky/provision/vsphere/instance.py +8 -6
  210. sky/provision/vsphere/vsphere_utils.py +8 -1
  211. sky/resources.py +11 -3
  212. sky/schemas/api/responses.py +107 -6
  213. sky/schemas/db/global_user_state/008_skylet_ssh_tunnel_metadata.py +34 -0
  214. sky/schemas/db/global_user_state/009_last_activity_and_launched_at.py +89 -0
  215. sky/schemas/db/global_user_state/010_save_ssh_key.py +66 -0
  216. sky/schemas/db/global_user_state/011_is_ephemeral.py +34 -0
  217. sky/schemas/db/kv_cache/001_initial_schema.py +29 -0
  218. sky/schemas/db/serve_state/002_yaml_content.py +34 -0
  219. sky/schemas/db/skypilot_config/001_initial_schema.py +30 -0
  220. sky/schemas/db/spot_jobs/002_cluster_pool.py +3 -3
  221. sky/schemas/db/spot_jobs/004_job_file_contents.py +42 -0
  222. sky/schemas/db/spot_jobs/005_logs_gc.py +38 -0
  223. sky/schemas/db/spot_jobs/006_controller_pid_started_at.py +34 -0
  224. sky/schemas/db/spot_jobs/007_config_file_content.py +34 -0
  225. sky/schemas/generated/jobsv1_pb2.py +86 -0
  226. sky/schemas/generated/jobsv1_pb2.pyi +254 -0
  227. sky/schemas/generated/jobsv1_pb2_grpc.py +542 -0
  228. sky/schemas/generated/managed_jobsv1_pb2.py +76 -0
  229. sky/schemas/generated/managed_jobsv1_pb2.pyi +278 -0
  230. sky/schemas/generated/managed_jobsv1_pb2_grpc.py +278 -0
  231. sky/schemas/generated/servev1_pb2.py +58 -0
  232. sky/schemas/generated/servev1_pb2.pyi +115 -0
  233. sky/schemas/generated/servev1_pb2_grpc.py +322 -0
  234. sky/serve/autoscalers.py +2 -0
  235. sky/serve/client/impl.py +55 -21
  236. sky/serve/constants.py +4 -3
  237. sky/serve/controller.py +17 -11
  238. sky/serve/load_balancing_policies.py +1 -1
  239. sky/serve/replica_managers.py +219 -142
  240. sky/serve/serve_rpc_utils.py +179 -0
  241. sky/serve/serve_state.py +63 -54
  242. sky/serve/serve_utils.py +145 -109
  243. sky/serve/server/core.py +46 -25
  244. sky/serve/server/impl.py +311 -162
  245. sky/serve/server/server.py +21 -19
  246. sky/serve/service.py +84 -68
  247. sky/serve/service_spec.py +45 -7
  248. sky/server/auth/loopback.py +38 -0
  249. sky/server/auth/oauth2_proxy.py +12 -7
  250. sky/server/common.py +47 -24
  251. sky/server/config.py +62 -28
  252. sky/server/constants.py +9 -1
  253. sky/server/daemons.py +109 -38
  254. sky/server/metrics.py +76 -96
  255. sky/server/middleware_utils.py +166 -0
  256. sky/server/plugins.py +222 -0
  257. sky/server/requests/executor.py +384 -145
  258. sky/server/requests/payloads.py +83 -19
  259. sky/server/requests/preconditions.py +15 -13
  260. sky/server/requests/request_names.py +123 -0
  261. sky/server/requests/requests.py +511 -157
  262. sky/server/requests/serializers/decoders.py +48 -17
  263. sky/server/requests/serializers/encoders.py +102 -20
  264. sky/server/requests/serializers/return_value_serializers.py +60 -0
  265. sky/server/requests/threads.py +117 -0
  266. sky/server/rest.py +116 -24
  267. sky/server/server.py +497 -179
  268. sky/server/server_utils.py +30 -0
  269. sky/server/stream_utils.py +219 -45
  270. sky/server/uvicorn.py +30 -19
  271. sky/setup_files/MANIFEST.in +6 -1
  272. sky/setup_files/alembic.ini +8 -0
  273. sky/setup_files/dependencies.py +64 -19
  274. sky/setup_files/setup.py +44 -44
  275. sky/sky_logging.py +13 -5
  276. sky/skylet/attempt_skylet.py +116 -24
  277. sky/skylet/configs.py +3 -1
  278. sky/skylet/constants.py +139 -29
  279. sky/skylet/events.py +74 -14
  280. sky/skylet/executor/__init__.py +1 -0
  281. sky/skylet/executor/slurm.py +189 -0
  282. sky/skylet/job_lib.py +143 -105
  283. sky/skylet/log_lib.py +252 -8
  284. sky/skylet/log_lib.pyi +47 -7
  285. sky/skylet/providers/ibm/node_provider.py +12 -8
  286. sky/skylet/providers/ibm/vpc_provider.py +13 -12
  287. sky/skylet/runtime_utils.py +21 -0
  288. sky/skylet/services.py +524 -0
  289. sky/skylet/skylet.py +27 -2
  290. sky/skylet/subprocess_daemon.py +104 -28
  291. sky/skypilot_config.py +99 -79
  292. sky/ssh_node_pools/constants.py +12 -0
  293. sky/ssh_node_pools/core.py +40 -3
  294. sky/ssh_node_pools/deploy/__init__.py +4 -0
  295. sky/ssh_node_pools/deploy/deploy.py +952 -0
  296. sky/ssh_node_pools/deploy/tunnel_utils.py +199 -0
  297. sky/ssh_node_pools/deploy/utils.py +173 -0
  298. sky/ssh_node_pools/server.py +20 -21
  299. sky/{utils/kubernetes/ssh_utils.py → ssh_node_pools/utils.py} +9 -6
  300. sky/task.py +221 -104
  301. sky/templates/aws-ray.yml.j2 +1 -0
  302. sky/templates/azure-ray.yml.j2 +1 -0
  303. sky/templates/cudo-ray.yml.j2 +1 -0
  304. sky/templates/do-ray.yml.j2 +1 -0
  305. sky/templates/fluidstack-ray.yml.j2 +1 -0
  306. sky/templates/gcp-ray.yml.j2 +1 -0
  307. sky/templates/hyperbolic-ray.yml.j2 +1 -0
  308. sky/templates/ibm-ray.yml.j2 +2 -1
  309. sky/templates/jobs-controller.yaml.j2 +3 -0
  310. sky/templates/kubernetes-ray.yml.j2 +204 -55
  311. sky/templates/lambda-ray.yml.j2 +1 -0
  312. sky/templates/nebius-ray.yml.j2 +3 -0
  313. sky/templates/oci-ray.yml.j2 +1 -0
  314. sky/templates/paperspace-ray.yml.j2 +1 -0
  315. sky/templates/primeintellect-ray.yml.j2 +72 -0
  316. sky/templates/runpod-ray.yml.j2 +1 -0
  317. sky/templates/scp-ray.yml.j2 +1 -0
  318. sky/templates/seeweb-ray.yml.j2 +171 -0
  319. sky/templates/shadeform-ray.yml.j2 +73 -0
  320. sky/templates/slurm-ray.yml.j2 +85 -0
  321. sky/templates/vast-ray.yml.j2 +2 -0
  322. sky/templates/vsphere-ray.yml.j2 +1 -0
  323. sky/templates/websocket_proxy.py +188 -43
  324. sky/usage/usage_lib.py +16 -4
  325. sky/users/model.conf +1 -1
  326. sky/users/permission.py +84 -44
  327. sky/users/rbac.py +31 -3
  328. sky/utils/accelerator_registry.py +6 -3
  329. sky/utils/admin_policy_utils.py +18 -5
  330. sky/utils/annotations.py +128 -6
  331. sky/utils/asyncio_utils.py +78 -0
  332. sky/utils/atomic.py +1 -1
  333. sky/utils/auth_utils.py +153 -0
  334. sky/utils/cli_utils/status_utils.py +12 -7
  335. sky/utils/cluster_utils.py +28 -6
  336. sky/utils/command_runner.py +283 -30
  337. sky/utils/command_runner.pyi +63 -7
  338. sky/utils/common.py +3 -1
  339. sky/utils/common_utils.py +55 -7
  340. sky/utils/config_utils.py +1 -14
  341. sky/utils/context.py +127 -40
  342. sky/utils/context_utils.py +73 -18
  343. sky/utils/controller_utils.py +229 -70
  344. sky/utils/db/db_utils.py +95 -18
  345. sky/utils/db/kv_cache.py +149 -0
  346. sky/utils/db/migration_utils.py +24 -7
  347. sky/utils/env_options.py +4 -0
  348. sky/utils/git.py +559 -1
  349. sky/utils/kubernetes/create_cluster.sh +15 -30
  350. sky/utils/kubernetes/delete_cluster.sh +10 -7
  351. sky/utils/kubernetes/generate_kind_config.py +6 -66
  352. sky/utils/kubernetes/gpu_labeler.py +13 -3
  353. sky/utils/kubernetes/k8s_gpu_labeler_job.yaml +2 -1
  354. sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml +16 -16
  355. sky/utils/kubernetes/kubernetes_deploy_utils.py +187 -260
  356. sky/utils/kubernetes/rsync_helper.sh +11 -3
  357. sky/utils/kubernetes/ssh-tunnel.sh +7 -376
  358. sky/utils/kubernetes_enums.py +7 -15
  359. sky/utils/lock_events.py +4 -4
  360. sky/utils/locks.py +128 -31
  361. sky/utils/log_utils.py +0 -319
  362. sky/utils/resource_checker.py +13 -10
  363. sky/utils/resources_utils.py +53 -29
  364. sky/utils/rich_utils.py +8 -4
  365. sky/utils/schemas.py +138 -52
  366. sky/utils/subprocess_utils.py +17 -4
  367. sky/utils/thread_utils.py +91 -0
  368. sky/utils/timeline.py +2 -1
  369. sky/utils/ux_utils.py +35 -1
  370. sky/utils/volume.py +88 -4
  371. sky/utils/yaml_utils.py +9 -0
  372. sky/volumes/client/sdk.py +48 -10
  373. sky/volumes/server/core.py +59 -22
  374. sky/volumes/server/server.py +46 -17
  375. sky/volumes/volume.py +54 -42
  376. sky/workspaces/core.py +57 -21
  377. sky/workspaces/server.py +13 -12
  378. sky_templates/README.md +3 -0
  379. sky_templates/__init__.py +3 -0
  380. sky_templates/ray/__init__.py +0 -0
  381. sky_templates/ray/start_cluster +183 -0
  382. sky_templates/ray/stop_cluster +75 -0
  383. {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251210.dist-info}/METADATA +343 -65
  384. skypilot_nightly-1.0.0.dev20251210.dist-info/RECORD +629 -0
  385. skypilot_nightly-1.0.0.dev20251210.dist-info/top_level.txt +2 -0
  386. sky/client/cli/git.py +0 -549
  387. sky/dashboard/out/_next/static/chunks/1121-408ed10b2f9fce17.js +0 -1
  388. sky/dashboard/out/_next/static/chunks/1141-943efc7aff0f0c06.js +0 -1
  389. sky/dashboard/out/_next/static/chunks/1836-37fede578e2da5f8.js +0 -40
  390. sky/dashboard/out/_next/static/chunks/3015-86cabed5d4669ad0.js +0 -1
  391. sky/dashboard/out/_next/static/chunks/3294.c80326aec9bfed40.js +0 -6
  392. sky/dashboard/out/_next/static/chunks/3785.4872a2f3aa489880.js +0 -1
  393. sky/dashboard/out/_next/static/chunks/3850-ff4a9a69d978632b.js +0 -1
  394. sky/dashboard/out/_next/static/chunks/4045.b30465273dc5e468.js +0 -21
  395. sky/dashboard/out/_next/static/chunks/4676-9da7fdbde90b5549.js +0 -10
  396. sky/dashboard/out/_next/static/chunks/4725.10f7a9a5d3ea8208.js +0 -1
  397. sky/dashboard/out/_next/static/chunks/5339.3fda4a4010ff4e06.js +0 -51
  398. sky/dashboard/out/_next/static/chunks/6135-4b4d5e824b7f9d3c.js +0 -1
  399. sky/dashboard/out/_next/static/chunks/649.b9d7f7d10c1b8c53.js +0 -45
  400. sky/dashboard/out/_next/static/chunks/6856-dca7962af4814e1b.js +0 -1
  401. sky/dashboard/out/_next/static/chunks/6990-08b2a1cae076a943.js +0 -1
  402. sky/dashboard/out/_next/static/chunks/7325.b4bc99ce0892dcd5.js +0 -6
  403. sky/dashboard/out/_next/static/chunks/754-d0da8ab45f9509e9.js +0 -18
  404. sky/dashboard/out/_next/static/chunks/7669.1f5d9a402bf5cc42.js +0 -36
  405. sky/dashboard/out/_next/static/chunks/8969-0be3036bf86f8256.js +0 -1
  406. sky/dashboard/out/_next/static/chunks/9025.c12318fb6a1a9093.js +0 -6
  407. sky/dashboard/out/_next/static/chunks/9037-fa1737818d0a0969.js +0 -6
  408. sky/dashboard/out/_next/static/chunks/pages/_app-ce361c6959bc2001.js +0 -34
  409. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-1cbba24bd1bd35f8.js +0 -16
  410. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-0b4b35dc1dfe046c.js +0 -16
  411. sky/dashboard/out/_next/static/chunks/pages/clusters-469814d711d63b1b.js +0 -1
  412. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-dd64309c3fe67ed2.js +0 -11
  413. sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-07349868f7905d37.js +0 -16
  414. sky/dashboard/out/_next/static/chunks/pages/jobs-1f70d9faa564804f.js +0 -1
  415. sky/dashboard/out/_next/static/chunks/pages/users-018bf31cda52e11b.js +0 -1
  416. sky/dashboard/out/_next/static/chunks/pages/volumes-739726d6b823f532.js +0 -1
  417. sky/dashboard/out/_next/static/chunks/webpack-4fe903277b57b523.js +0 -1
  418. sky/dashboard/out/_next/static/css/4614e06482d7309e.css +0 -3
  419. sky/dashboard/out/_next/static/mS-4qZPSkRuA1u-g2wQhg/_buildManifest.js +0 -1
  420. sky/templates/kubernetes-ssh-jump.yml.j2 +0 -94
  421. sky/utils/kubernetes/cleanup-tunnel.sh +0 -62
  422. sky/utils/kubernetes/deploy_remote_cluster.py +0 -1299
  423. sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +0 -191
  424. skypilot_nightly-1.0.0.dev20250905.dist-info/RECORD +0 -547
  425. skypilot_nightly-1.0.0.dev20250905.dist-info/top_level.txt +0 -1
  426. /sky/dashboard/out/_next/static/{mS-4qZPSkRuA1u-g2wQhg → KYAhEFa3FTfq4JyKVgo-s}/_ssgManifest.js +0 -0
  427. {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251210.dist-info}/WHEEL +0 -0
  428. {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251210.dist-info}/entry_points.txt +0 -0
  429. {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251210.dist-info}/licenses/LICENSE +0 -0
sky/authentication.py CHANGED
@@ -19,34 +19,31 @@ controller. (Lambda cloud is an exception, due to the limitation of the cloud
19
19
  provider. See the comments in setup_lambda_authentication)
20
20
  """
21
21
  import copy
22
- import functools
23
22
  import os
24
23
  import re
25
24
  import socket
26
25
  import subprocess
27
26
  import sys
28
- from typing import Any, Dict, Optional, Tuple
27
+ from typing import Any, Dict
29
28
  import uuid
30
29
 
31
30
  import colorama
32
- import filelock
33
31
 
34
32
  from sky import clouds
35
33
  from sky import exceptions
36
- from sky import global_user_state
37
34
  from sky import sky_logging
38
- from sky import skypilot_config
39
35
  from sky.adaptors import gcp
40
36
  from sky.adaptors import ibm
41
- from sky.adaptors import kubernetes
42
37
  from sky.adaptors import runpod
38
+ from sky.adaptors import seeweb as seeweb_adaptor
39
+ from sky.adaptors import shadeform as shadeform_adaptor
43
40
  from sky.adaptors import vast
44
41
  from sky.provision.fluidstack import fluidstack_utils
45
42
  from sky.provision.kubernetes import utils as kubernetes_utils
46
43
  from sky.provision.lambda_cloud import lambda_utils
44
+ from sky.provision.primeintellect import utils as primeintellect_utils
45
+ from sky.utils import auth_utils
47
46
  from sky.utils import common_utils
48
- from sky.utils import config_utils
49
- from sky.utils import kubernetes_enums
50
47
  from sky.utils import subprocess_utils
51
48
  from sky.utils import ux_utils
52
49
  from sky.utils import yaml_utils
@@ -58,144 +55,9 @@ logger = sky_logging.init_logger(__name__)
58
55
  # using Cloud Client Libraries for Python, where possible, for new code
59
56
  # development.
60
57
 
61
- MAX_TRIALS = 64
62
- # TODO(zhwu): Support user specified key pair.
63
- # We intentionally not have the ssh key pair to be stored in
64
- # ~/.sky/api_server/clients, i.e. sky.server.common.API_SERVER_CLIENT_DIR,
65
- # because ssh key pair need to persist across API server restarts, while
66
- # the former dir is empheral.
67
- _SSH_KEY_PATH_PREFIX = '~/.sky/clients/{user_hash}/ssh'
68
-
69
-
70
- def get_ssh_key_and_lock_path(
71
- user_hash: Optional[str] = None) -> Tuple[str, str, str]:
72
- if user_hash is None:
73
- user_hash = common_utils.get_user_hash()
74
- user_ssh_key_prefix = _SSH_KEY_PATH_PREFIX.format(user_hash=user_hash)
75
-
76
- os.makedirs(os.path.expanduser(user_ssh_key_prefix),
77
- exist_ok=True,
78
- mode=0o700)
79
- private_key_path = os.path.join(user_ssh_key_prefix, 'sky-key')
80
- public_key_path = os.path.join(user_ssh_key_prefix, 'sky-key.pub')
81
- lock_path = os.path.join(user_ssh_key_prefix, '.__internal-sky-key.lock')
82
- return private_key_path, public_key_path, lock_path
83
-
84
-
85
- def _generate_rsa_key_pair() -> Tuple[str, str]:
86
- # Keep the import of the cryptography local to avoid expensive
87
- # third-party imports when not needed.
88
- # pylint: disable=import-outside-toplevel
89
- from cryptography.hazmat.backends import default_backend
90
- from cryptography.hazmat.primitives import serialization
91
- from cryptography.hazmat.primitives.asymmetric import rsa
92
-
93
- key = rsa.generate_private_key(backend=default_backend(),
94
- public_exponent=65537,
95
- key_size=2048)
96
-
97
- private_key = key.private_bytes(
98
- encoding=serialization.Encoding.PEM,
99
- format=serialization.PrivateFormat.TraditionalOpenSSL,
100
- encryption_algorithm=serialization.NoEncryption()).decode(
101
- 'utf-8').strip()
102
-
103
- public_key = key.public_key().public_bytes(
104
- serialization.Encoding.OpenSSH,
105
- serialization.PublicFormat.OpenSSH).decode('utf-8').strip()
106
-
107
- return public_key, private_key
108
-
109
-
110
- def _save_key_pair(private_key_path: str, public_key_path: str,
111
- private_key: str, public_key: str) -> None:
112
- key_dir = os.path.dirname(private_key_path)
113
- os.makedirs(key_dir, exist_ok=True, mode=0o700)
114
-
115
- with open(
116
- private_key_path,
117
- 'w',
118
- encoding='utf-8',
119
- opener=functools.partial(os.open, mode=0o600),
120
- ) as f:
121
- f.write(private_key)
122
-
123
- with open(public_key_path,
124
- 'w',
125
- encoding='utf-8',
126
- opener=functools.partial(os.open, mode=0o644)) as f:
127
- f.write(public_key)
128
-
129
- global_user_state.set_ssh_keys(common_utils.get_user_hash(), public_key,
130
- private_key)
131
-
132
-
133
- def get_or_generate_keys() -> Tuple[str, str]:
134
- """Returns the absolute private and public key paths."""
135
- private_key_path, public_key_path, lock_path = get_ssh_key_and_lock_path()
136
- private_key_path = os.path.expanduser(private_key_path)
137
- public_key_path = os.path.expanduser(public_key_path)
138
- lock_path = os.path.expanduser(lock_path)
139
-
140
- lock_dir = os.path.dirname(lock_path)
141
- # We should have the folder ~/.sky/generated/ssh to have 0o700 permission,
142
- # as the ssh configs will be written to this folder as well in
143
- # backend_utils.SSHConfigHelper
144
- os.makedirs(lock_dir, exist_ok=True, mode=0o700)
145
- with filelock.FileLock(lock_path, timeout=10):
146
- if not os.path.exists(private_key_path):
147
- ssh_public_key, ssh_private_key, exists = (
148
- global_user_state.get_ssh_keys(common_utils.get_user_hash()))
149
- if not exists:
150
- ssh_public_key, ssh_private_key = _generate_rsa_key_pair()
151
- _save_key_pair(private_key_path, public_key_path, ssh_private_key,
152
- ssh_public_key)
153
- assert os.path.exists(public_key_path), (
154
- 'Private key found, but associated public key '
155
- f'{public_key_path} does not exist.')
156
- return private_key_path, public_key_path
157
-
158
-
159
- def create_ssh_key_files_from_db(private_key_path: Optional[str] = None):
160
- if private_key_path is None:
161
- user_hash = common_utils.get_user_hash()
162
- else:
163
- # Assume private key path is in the format of
164
- # ~/.sky/clients/<user_hash>/ssh/sky-key
165
- separated_path = os.path.normpath(private_key_path).split(os.path.sep)
166
- assert separated_path[-1] == 'sky-key'
167
- assert separated_path[-2] == 'ssh'
168
- user_hash = separated_path[-3]
169
-
170
- private_key_path_generated, public_key_path, lock_path = (
171
- get_ssh_key_and_lock_path(user_hash))
172
- assert private_key_path == os.path.expanduser(private_key_path_generated), (
173
- f'Private key path {private_key_path} does not '
174
- f'match the generated path {private_key_path_generated}')
175
- private_key_path = os.path.expanduser(private_key_path)
176
- public_key_path = os.path.expanduser(public_key_path)
177
- lock_path = os.path.expanduser(lock_path)
178
-
179
- lock_dir = os.path.dirname(lock_path)
180
- # We should have the folder ~/.sky/generated/ssh to have 0o700 permission,
181
- # as the ssh configs will be written to this folder as well in
182
- # backend_utils.SSHConfigHelper
183
- os.makedirs(lock_dir, exist_ok=True, mode=0o700)
184
- with filelock.FileLock(lock_path, timeout=10):
185
- if not os.path.exists(private_key_path):
186
- ssh_public_key, ssh_private_key, exists = (
187
- global_user_state.get_ssh_keys(user_hash))
188
- if not exists:
189
- raise RuntimeError(f'SSH keys not found for user {user_hash}')
190
- _save_key_pair(private_key_path, public_key_path, ssh_private_key,
191
- ssh_public_key)
192
- assert os.path.exists(public_key_path), (
193
- 'Private key found, but associated public key '
194
- f'{public_key_path} does not exist.')
195
-
196
58
 
197
59
  def configure_ssh_info(config: Dict[str, Any]) -> Dict[str, Any]:
198
- _, public_key_path = get_or_generate_keys()
60
+ _, public_key_path = auth_utils.get_or_generate_keys()
199
61
  with open(public_key_path, 'r', encoding='utf-8') as f:
200
62
  public_key = f.read().strip()
201
63
  config_str = yaml_utils.dump_yaml_str(config)
@@ -207,6 +69,24 @@ def configure_ssh_info(config: Dict[str, Any]) -> Dict[str, Any]:
207
69
  return config
208
70
 
209
71
 
72
+ def parse_gcp_project_oslogin(project):
73
+ """Helper function to parse GCP project metadata."""
74
+ common_metadata = project.get('commonInstanceMetadata', {})
75
+ if not isinstance(common_metadata, dict):
76
+ common_metadata = {}
77
+
78
+ metadata_items = common_metadata.get('items', [])
79
+ if not isinstance(metadata_items, list):
80
+ metadata_items = []
81
+
82
+ project_oslogin = next(
83
+ (item for item in metadata_items
84
+ if isinstance(item, dict) and item.get('key') == 'enable-oslogin'),
85
+ {}).get('value', 'False')
86
+
87
+ return project_oslogin
88
+
89
+
210
90
  # Snippets of code inspired from
211
91
  # https://github.com/ray-project/ray/blob/master/python/ray/autoscaler/_private/gcp/config.py
212
92
  # Takes in config, a yaml dict and outputs a postprocessed dict
@@ -215,7 +95,7 @@ def configure_ssh_info(config: Dict[str, Any]) -> Dict[str, Any]:
215
95
  # Retry for the GCP as sometimes there will be connection reset by peer error.
216
96
  @common_utils.retry
217
97
  def setup_gcp_authentication(config: Dict[str, Any]) -> Dict[str, Any]:
218
- _, public_key_path = get_or_generate_keys()
98
+ _, public_key_path = auth_utils.get_or_generate_keys()
219
99
  config = copy.deepcopy(config)
220
100
 
221
101
  project_id = config['provider']['project_id']
@@ -264,10 +144,7 @@ def setup_gcp_authentication(config: Dict[str, Any]) -> Dict[str, Any]:
264
144
  'Please check your network connection.')
265
145
  raise
266
146
 
267
- project_oslogin: str = next( # type: ignore
268
- (item for item in project['commonInstanceMetadata'].get('items', [])
269
- if item['key'] == 'enable-oslogin'), {}).get('value', 'False')
270
-
147
+ project_oslogin = parse_gcp_project_oslogin(project)
271
148
  if project_oslogin.lower() == 'true':
272
149
  logger.info(
273
150
  f'OS Login is enabled for GCP project {project_id}. Running '
@@ -343,11 +220,11 @@ def setup_gcp_authentication(config: Dict[str, Any]) -> Dict[str, Any]:
343
220
 
344
221
  def setup_lambda_authentication(config: Dict[str, Any]) -> Dict[str, Any]:
345
222
 
346
- get_or_generate_keys()
223
+ auth_utils.get_or_generate_keys()
347
224
 
348
225
  # Ensure ssh key is registered with Lambda Cloud
349
226
  lambda_client = lambda_utils.LambdaCloudClient()
350
- _, public_key_path = get_or_generate_keys()
227
+ _, public_key_path = auth_utils.get_or_generate_keys()
351
228
  with open(public_key_path, 'r', encoding='utf-8') as f:
352
229
  public_key = f.read().strip()
353
230
  prefix = f'sky-key-{common_utils.get_user_hash()}'
@@ -364,7 +241,7 @@ def setup_ibm_authentication(config: Dict[str, Any]) -> Dict[str, Any]:
364
241
  and updates config file.
365
242
  keys default location: '~/.ssh/sky-key' and '~/.ssh/sky-key.pub'
366
243
  """
367
- private_key_path, _ = get_or_generate_keys()
244
+ private_key_path, _ = auth_utils.get_or_generate_keys()
368
245
 
369
246
  def _get_unique_key_name():
370
247
  suffix_len = 10
@@ -373,7 +250,7 @@ def setup_ibm_authentication(config: Dict[str, Any]) -> Dict[str, Any]:
373
250
  client = ibm.client(region=config['provider']['region'])
374
251
  resource_group_id = config['provider']['resource_group_id']
375
252
 
376
- _, public_key_path = get_or_generate_keys()
253
+ _, public_key_path = auth_utils.get_or_generate_keys()
377
254
  with open(os.path.abspath(os.path.expanduser(public_key_path)),
378
255
  'r',
379
256
  encoding='utf-8') as file:
@@ -414,116 +291,30 @@ def setup_ibm_authentication(config: Dict[str, Any]) -> Dict[str, Any]:
414
291
 
415
292
  def setup_kubernetes_authentication(config: Dict[str, Any]) -> Dict[str, Any]:
416
293
  context = kubernetes_utils.get_context_from_config(config['provider'])
417
-
418
- # Default ssh session is established with kubectl port-forwarding with
419
- # ClusterIP service.
420
- nodeport_mode = kubernetes_enums.KubernetesNetworkingMode.NODEPORT
421
- port_forward_mode = kubernetes_enums.KubernetesNetworkingMode.PORTFORWARD
422
- network_mode_str = skypilot_config.get_effective_region_config(
423
- cloud='kubernetes',
424
- region=context,
425
- keys=('networking',),
426
- default_value=port_forward_mode.value)
427
- try:
428
- network_mode = kubernetes_enums.KubernetesNetworkingMode.from_str(
429
- network_mode_str)
430
- except ValueError as e:
431
- # Add message saying "Please check: ~/.sky/config.yaml" to the error
432
- # message.
433
- with ux_utils.print_exception_no_traceback():
434
- raise ValueError(str(e) +
435
- ' Please check: ~/.sky/config.yaml.') from None
436
- _, public_key_path = get_or_generate_keys()
437
-
438
- # Add the user's public key to the SkyPilot cluster.
439
- secret_name = clouds.Kubernetes.SKY_SSH_KEY_SECRET_NAME
440
- secret_field_name = clouds.Kubernetes().ssh_key_secret_field_name
441
294
  namespace = kubernetes_utils.get_namespace_from_config(config['provider'])
442
- k8s = kubernetes.kubernetes
443
- with open(public_key_path, 'r', encoding='utf-8') as f:
444
- public_key = f.read()
445
- if not public_key.endswith('\n'):
446
- public_key += '\n'
447
-
448
- # Generate metadata
449
- secret_metadata = {
450
- 'name': secret_name,
451
- 'labels': {
452
- 'parent': 'skypilot'
453
- }
454
- }
455
- custom_metadata = skypilot_config.get_effective_region_config(
456
- cloud='kubernetes',
457
- region=context,
458
- keys=('custom_metadata',),
459
- default_value={})
460
- config_utils.merge_k8s_configs(secret_metadata, custom_metadata)
461
-
462
- secret = k8s.client.V1Secret(
463
- metadata=k8s.client.V1ObjectMeta(**secret_metadata),
464
- string_data={secret_field_name: public_key})
465
- try:
466
- if kubernetes_utils.check_secret_exists(secret_name, namespace,
467
- context):
468
- logger.debug(f'Key {secret_name} exists in the cluster, '
469
- 'patching it...')
470
- kubernetes.core_api(context).patch_namespaced_secret(
471
- secret_name, namespace, secret)
472
- else:
473
- logger.debug(f'Key {secret_name} does not exist in the cluster, '
474
- 'creating it...')
475
- kubernetes.core_api(context).create_namespaced_secret(
476
- namespace, secret)
477
- except kubernetes.api_exception() as e:
478
- if e.status == 409 and e.reason == 'AlreadyExists':
479
- logger.debug(f'Key {secret_name} was created concurrently, '
480
- 'patching it...')
481
- kubernetes.core_api(context).patch_namespaced_secret(
482
- secret_name, namespace, secret)
483
- else:
484
- raise e
485
-
486
- private_key_path, _ = get_or_generate_keys()
487
- if network_mode == nodeport_mode:
488
- ssh_jump_name = clouds.Kubernetes.SKY_SSH_JUMP_NAME
489
- service_type = kubernetes_enums.KubernetesServiceType.NODEPORT
490
- # Setup service for SSH jump pod. We create the SSH jump service here
491
- # because we need to know the service IP address and port to set the
492
- # ssh_proxy_command in the autoscaler config.
493
- kubernetes_utils.setup_ssh_jump_svc(ssh_jump_name, namespace, context,
494
- service_type)
495
- ssh_proxy_cmd = kubernetes_utils.get_ssh_proxy_command(
496
- ssh_jump_name,
497
- nodeport_mode,
498
- private_key_path=private_key_path,
499
- context=context,
500
- namespace=namespace)
501
- elif network_mode == port_forward_mode:
502
- # Using `kubectl port-forward` creates a direct tunnel to the pod and
503
- # does not require a ssh jump pod.
504
- kubernetes_utils.check_port_forward_mode_dependencies()
505
- # TODO(romilb): This can be further optimized. Instead of using the
506
- # head node as a jump pod for worker nodes, we can also directly
507
- # set the ssh_target to the worker node. However, that requires
508
- # changes in the downstream code to return a mapping of node IPs to
509
- # pod names (to be used as ssh_target) and updating the upstream
510
- # SSHConfigHelper to use a different ProxyCommand for each pod.
511
- # This optimization can reduce SSH time from ~0.35s to ~0.25s, tested
512
- # on GKE.
513
- ssh_target = config['cluster_name'] + '-head'
514
- ssh_proxy_cmd = kubernetes_utils.get_ssh_proxy_command(
515
- ssh_target,
516
- port_forward_mode,
517
- private_key_path=private_key_path,
518
- context=context,
519
- namespace=namespace)
520
- else:
521
- # This should never happen because we check for this in from_str above.
522
- raise ValueError(f'Unsupported networking mode: {network_mode_str}')
295
+ private_key_path, _ = auth_utils.get_or_generate_keys()
296
+ # Using `kubectl port-forward` creates a direct tunnel to the pod and
297
+ # does not require a ssh jump pod.
298
+ kubernetes_utils.check_port_forward_mode_dependencies()
299
+ # TODO(romilb): This can be further optimized. Instead of using the
300
+ # head node as a jump pod for worker nodes, we can also directly
301
+ # set the ssh_target to the worker node. However, that requires
302
+ # changes in the downstream code to return a mapping of node IPs to
303
+ # pod names (to be used as ssh_target) and updating the upstream
304
+ # SSHConfigHelper to use a different ProxyCommand for each pod.
305
+ # This optimization can reduce SSH time from ~0.35s to ~0.25s, tested
306
+ # on GKE.
307
+ pod_name = config['cluster_name'] + '-head'
308
+ ssh_proxy_cmd = kubernetes_utils.get_ssh_proxy_command(
309
+ pod_name,
310
+ private_key_path=private_key_path,
311
+ context=context,
312
+ namespace=namespace)
523
313
  config['auth']['ssh_proxy_command'] = ssh_proxy_cmd
524
314
  config['auth']['ssh_private_key'] = private_key_path
525
315
 
526
- return config
316
+ # Add the user's public key to the SkyPilot cluster.
317
+ return configure_ssh_info(config)
527
318
 
528
319
 
529
320
  # ---------------------------------- RunPod ---------------------------------- #
@@ -532,7 +323,7 @@ def setup_runpod_authentication(config: Dict[str, Any]) -> Dict[str, Any]:
532
323
  - Generates a new SSH key pair if one does not exist.
533
324
  - Adds the public SSH key to the user's RunPod account.
534
325
  """
535
- _, public_key_path = get_or_generate_keys()
326
+ _, public_key_path = auth_utils.get_or_generate_keys()
536
327
  with open(public_key_path, 'r', encoding='UTF-8') as pub_key_file:
537
328
  public_key = pub_key_file.read().strip()
538
329
  runpod.runpod.cli.groups.ssh.functions.add_ssh_key(public_key)
@@ -545,7 +336,7 @@ def setup_vast_authentication(config: Dict[str, Any]) -> Dict[str, Any]:
545
336
  - Generates a new SSH key pair if one does not exist.
546
337
  - Adds the public SSH key to the user's Vast account.
547
338
  """
548
- _, public_key_path = get_or_generate_keys()
339
+ _, public_key_path = auth_utils.get_or_generate_keys()
549
340
  with open(public_key_path, 'r', encoding='UTF-8') as pub_key_file:
550
341
  public_key = pub_key_file.read().strip()
551
342
  current_key_list = vast.vast().show_ssh_keys() # pylint: disable=assignment-from-no-return
@@ -559,7 +350,7 @@ def setup_vast_authentication(config: Dict[str, Any]) -> Dict[str, Any]:
559
350
 
560
351
  def setup_fluidstack_authentication(config: Dict[str, Any]) -> Dict[str, Any]:
561
352
 
562
- _, public_key_path = get_or_generate_keys()
353
+ _, public_key_path = auth_utils.get_or_generate_keys()
563
354
 
564
355
  client = fluidstack_utils.FluidstackClient()
565
356
  public_key = None
@@ -572,7 +363,7 @@ def setup_fluidstack_authentication(config: Dict[str, Any]) -> Dict[str, Any]:
572
363
 
573
364
  def setup_hyperbolic_authentication(config: Dict[str, Any]) -> Dict[str, Any]:
574
365
  """Sets up SSH authentication for Hyperbolic."""
575
- _, public_key_path = get_or_generate_keys()
366
+ _, public_key_path = auth_utils.get_or_generate_keys()
576
367
  with open(public_key_path, 'r', encoding='utf-8') as f:
577
368
  public_key = f.read().strip()
578
369
 
@@ -586,3 +377,106 @@ def setup_hyperbolic_authentication(config: Dict[str, Any]) -> Dict[str, Any]:
586
377
  config['auth']['ssh_public_key'] = public_key_path
587
378
 
588
379
  return configure_ssh_info(config)
380
+
381
+
382
+ def setup_shadeform_authentication(config: Dict[str, Any]) -> Dict[str, Any]:
383
+ """Sets up SSH authentication for Shadeform.
384
+ - Generates a new SSH key pair if one does not exist.
385
+ - Adds the public SSH key to the user's Shadeform account.
386
+
387
+ Note: This assumes there is a Shadeform Python SDK available.
388
+ If no official SDK exists, this function would need to use direct API calls.
389
+ """
390
+
391
+ _, public_key_path = auth_utils.get_or_generate_keys()
392
+ ssh_key_id = None
393
+
394
+ with open(public_key_path, 'r', encoding='utf-8') as f:
395
+ public_key = f.read().strip()
396
+
397
+ try:
398
+ # Add SSH key to Shadeform using our utility functions
399
+ ssh_key_id = shadeform_adaptor.add_ssh_key_to_shadeform(public_key)
400
+
401
+ except ImportError as e:
402
+ # If required dependencies are missing
403
+ logger.warning(
404
+ f'Failed to add Shadeform SSH key due to missing dependencies: '
405
+ f'{e}. Manually configure SSH keys in your Shadeform account.')
406
+
407
+ except Exception as e:
408
+ logger.warning(f'Failed to set up Shadeform authentication: {e}')
409
+ raise exceptions.CloudUserIdentityError(
410
+ 'Failed to set up SSH authentication for Shadeform. '
411
+ f'Please ensure your Shadeform credentials are configured: {e}'
412
+ ) from e
413
+
414
+ if ssh_key_id is None:
415
+ raise Exception('Failed to add SSH key to Shadeform')
416
+
417
+ # Configure SSH info in the config
418
+ config['auth']['ssh_public_key'] = public_key_path
419
+ config['auth']['ssh_key_id'] = ssh_key_id
420
+
421
+ return configure_ssh_info(config)
422
+
423
+
424
+ def setup_primeintellect_authentication(
425
+ config: Dict[str, Any]) -> Dict[str, Any]:
426
+ """Sets up SSH authentication for Prime Intellect.
427
+ - Generates a new SSH key pair if one does not exist.
428
+ - Adds the public SSH key to the user's Prime Intellect account.
429
+ """
430
+ # Ensure local SSH keypair exists and fetch public key content
431
+ _, public_key_path = auth_utils.get_or_generate_keys()
432
+ with open(public_key_path, 'r', encoding='utf-8') as f:
433
+ public_key = f.read().strip()
434
+
435
+ # Register the public key with Prime Intellect (no-op if already exists)
436
+ client = primeintellect_utils.PrimeIntellectAPIClient()
437
+ client.get_or_add_ssh_key(public_key)
438
+
439
+ # Set up auth section for Ray template
440
+ config.setdefault('auth', {})
441
+ # Default username for Prime Intellect images
442
+ config['auth']['ssh_user'] = 'ubuntu'
443
+ config['auth']['ssh_public_key'] = public_key_path
444
+
445
+ return configure_ssh_info(config)
446
+
447
+
448
+ def setup_seeweb_authentication(config: Dict[str, Any]) -> Dict[str, Any]:
449
+ """Registers the public key with Seeweb and notes the remote name."""
450
+ # 1. local key pair
451
+ auth_utils.get_or_generate_keys()
452
+
453
+ # 2. public key
454
+ _, public_key_path = auth_utils.get_or_generate_keys()
455
+ with open(public_key_path, 'r', encoding='utf-8') as f:
456
+ public_key = f.read().strip()
457
+
458
+ # 3. Seeweb API client
459
+ client = seeweb_adaptor.client()
460
+
461
+ # 4. Check if key is already registered
462
+ prefix = f'sky-key-{common_utils.get_user_hash()}'
463
+ remote_name = None
464
+ for k in client.fetch_ssh_keys():
465
+ if k.key.strip() == public_key:
466
+ remote_name = k.label # already present
467
+ break
468
+
469
+ # 5. doesn't exist, choose a unique name and create it
470
+ if remote_name is None:
471
+ suffix = 1
472
+ remote_name = prefix
473
+ existing_names = {k.label for k in client.fetch_ssh_keys()}
474
+ while remote_name in existing_names:
475
+ suffix += 1
476
+ remote_name = f'{prefix}-{suffix}'
477
+ client.create_ssh_key(label=remote_name, key=public_key)
478
+
479
+ # 6. Put the remote name in cluster-config (like for Lambda)
480
+ config['auth']['remote_key_name'] = remote_name
481
+
482
+ return config
sky/backends/__init__.py CHANGED
@@ -4,11 +4,12 @@ from sky.backends.backend import ResourceHandle
4
4
  from sky.backends.cloud_vm_ray_backend import CloudVmRayBackend
5
5
  from sky.backends.cloud_vm_ray_backend import CloudVmRayResourceHandle
6
6
  from sky.backends.cloud_vm_ray_backend import LocalResourcesHandle
7
+ from sky.backends.cloud_vm_ray_backend import SkyletClient
7
8
  from sky.backends.local_docker_backend import LocalDockerBackend
8
9
  from sky.backends.local_docker_backend import LocalDockerResourceHandle
9
10
 
10
11
  __all__ = [
11
12
  'Backend', 'ResourceHandle', 'CloudVmRayBackend',
12
- 'CloudVmRayResourceHandle', 'LocalResourcesHandle', 'LocalDockerBackend',
13
- 'LocalDockerResourceHandle'
13
+ 'CloudVmRayResourceHandle', 'SkyletClient', 'LocalResourcesHandle',
14
+ 'LocalDockerBackend', 'LocalDockerResourceHandle'
14
15
  ]
sky/backends/backend.py CHANGED
@@ -95,6 +95,12 @@ class Backend(Generic[_ResourceHandleType]):
95
95
  envs_and_secrets: Dict[str, str]) -> None:
96
96
  return self._sync_workdir(handle, workdir, envs_and_secrets)
97
97
 
98
+ @timeline.event
99
+ @usage_lib.messages.usage.update_runtime('download_file')
100
+ def download_file(self, handle: _ResourceHandleType, local_file_path: str,
101
+ remote_file_path: str) -> None:
102
+ return self._download_file(handle, local_file_path, remote_file_path)
103
+
98
104
  @timeline.event
99
105
  @usage_lib.messages.usage.update_runtime('sync_file_mounts')
100
106
  def sync_file_mounts(
@@ -120,7 +126,6 @@ class Backend(Generic[_ResourceHandleType]):
120
126
  def execute(self,
121
127
  handle: _ResourceHandleType,
122
128
  task: 'task_lib.Task',
123
- detach_run: bool,
124
129
  dryrun: bool = False) -> Optional[int]:
125
130
  """Execute the task on the cluster.
126
131
 
@@ -131,7 +136,7 @@ class Backend(Generic[_ResourceHandleType]):
131
136
  handle.get_cluster_name())
132
137
  usage_lib.messages.usage.update_actual_task(task)
133
138
  with rich_utils.safe_status(ux_utils.spinner_message('Submitting job')):
134
- return self._execute(handle, task, detach_run, dryrun)
139
+ return self._execute(handle, task, dryrun)
135
140
 
136
141
  @timeline.event
137
142
  def post_execute(self, handle: _ResourceHandleType, down: bool) -> None:
@@ -172,6 +177,10 @@ class Backend(Generic[_ResourceHandleType]):
172
177
  envs_and_secrets: Dict[str, str]) -> None:
173
178
  raise NotImplementedError
174
179
 
180
+ def _download_file(self, handle: _ResourceHandleType, local_file_path: str,
181
+ remote_file_path: str) -> None:
182
+ raise NotImplementedError
183
+
175
184
  def _sync_file_mounts(
176
185
  self,
177
186
  handle: _ResourceHandleType,
@@ -187,7 +196,6 @@ class Backend(Generic[_ResourceHandleType]):
187
196
  def _execute(self,
188
197
  handle: _ResourceHandleType,
189
198
  task: 'task_lib.Task',
190
- detach_run: bool,
191
199
  dryrun: bool = False) -> Optional[int]:
192
200
  raise NotImplementedError
193
201