skypilot-nightly 1.0.0.dev20250905__py3-none-any.whl → 1.0.0.dev20251203__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (397) hide show
  1. sky/__init__.py +10 -2
  2. sky/adaptors/aws.py +81 -16
  3. sky/adaptors/common.py +25 -2
  4. sky/adaptors/coreweave.py +278 -0
  5. sky/adaptors/do.py +8 -2
  6. sky/adaptors/gcp.py +11 -0
  7. sky/adaptors/ibm.py +5 -2
  8. sky/adaptors/kubernetes.py +64 -0
  9. sky/adaptors/nebius.py +3 -1
  10. sky/adaptors/primeintellect.py +1 -0
  11. sky/adaptors/seeweb.py +183 -0
  12. sky/adaptors/shadeform.py +89 -0
  13. sky/admin_policy.py +20 -0
  14. sky/authentication.py +157 -263
  15. sky/backends/__init__.py +3 -2
  16. sky/backends/backend.py +11 -3
  17. sky/backends/backend_utils.py +588 -184
  18. sky/backends/cloud_vm_ray_backend.py +1088 -904
  19. sky/backends/local_docker_backend.py +9 -5
  20. sky/backends/task_codegen.py +633 -0
  21. sky/backends/wheel_utils.py +18 -0
  22. sky/catalog/__init__.py +8 -0
  23. sky/catalog/aws_catalog.py +4 -0
  24. sky/catalog/common.py +19 -1
  25. sky/catalog/data_fetchers/fetch_aws.py +102 -80
  26. sky/catalog/data_fetchers/fetch_gcp.py +30 -3
  27. sky/catalog/data_fetchers/fetch_nebius.py +9 -6
  28. sky/catalog/data_fetchers/fetch_runpod.py +698 -0
  29. sky/catalog/data_fetchers/fetch_seeweb.py +329 -0
  30. sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
  31. sky/catalog/kubernetes_catalog.py +24 -28
  32. sky/catalog/primeintellect_catalog.py +95 -0
  33. sky/catalog/runpod_catalog.py +5 -1
  34. sky/catalog/seeweb_catalog.py +184 -0
  35. sky/catalog/shadeform_catalog.py +165 -0
  36. sky/check.py +73 -43
  37. sky/client/cli/command.py +675 -412
  38. sky/client/cli/flags.py +4 -2
  39. sky/{volumes/utils.py → client/cli/table_utils.py} +111 -13
  40. sky/client/cli/utils.py +79 -0
  41. sky/client/common.py +12 -2
  42. sky/client/sdk.py +132 -63
  43. sky/client/sdk_async.py +34 -33
  44. sky/cloud_stores.py +82 -3
  45. sky/clouds/__init__.py +6 -0
  46. sky/clouds/aws.py +337 -129
  47. sky/clouds/azure.py +24 -18
  48. sky/clouds/cloud.py +40 -13
  49. sky/clouds/cudo.py +16 -13
  50. sky/clouds/do.py +9 -7
  51. sky/clouds/fluidstack.py +12 -5
  52. sky/clouds/gcp.py +14 -7
  53. sky/clouds/hyperbolic.py +12 -5
  54. sky/clouds/ibm.py +12 -5
  55. sky/clouds/kubernetes.py +80 -45
  56. sky/clouds/lambda_cloud.py +12 -5
  57. sky/clouds/nebius.py +23 -9
  58. sky/clouds/oci.py +19 -12
  59. sky/clouds/paperspace.py +4 -1
  60. sky/clouds/primeintellect.py +317 -0
  61. sky/clouds/runpod.py +85 -24
  62. sky/clouds/scp.py +12 -8
  63. sky/clouds/seeweb.py +477 -0
  64. sky/clouds/shadeform.py +400 -0
  65. sky/clouds/ssh.py +4 -2
  66. sky/clouds/utils/scp_utils.py +61 -50
  67. sky/clouds/vast.py +33 -27
  68. sky/clouds/vsphere.py +14 -16
  69. sky/core.py +174 -165
  70. sky/dashboard/out/404.html +1 -1
  71. sky/dashboard/out/_next/static/96_E2yl3QAiIJGOYCkSpB/_buildManifest.js +1 -0
  72. sky/dashboard/out/_next/static/chunks/1141-e6aa9ab418717c59.js +11 -0
  73. sky/dashboard/out/_next/static/chunks/1871-7e202677c42f43fe.js +6 -0
  74. sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +1 -0
  75. sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +15 -0
  76. sky/dashboard/out/_next/static/chunks/2755.edd818326d489a1d.js +26 -0
  77. sky/dashboard/out/_next/static/chunks/3294.20a8540fe697d5ee.js +1 -0
  78. sky/dashboard/out/_next/static/chunks/3785.7e245f318f9d1121.js +1 -0
  79. sky/dashboard/out/_next/static/chunks/{6601-06114c982db410b6.js → 3800-7b45f9fbb6308557.js} +1 -1
  80. sky/dashboard/out/_next/static/chunks/4725.172ede95d1b21022.js +1 -0
  81. sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +15 -0
  82. sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +13 -0
  83. sky/dashboard/out/_next/static/chunks/6856-8f27d1c10c98def8.js +1 -0
  84. sky/dashboard/out/_next/static/chunks/6990-9146207c4567fdfd.js +1 -0
  85. sky/dashboard/out/_next/static/chunks/7359-c8d04e06886000b3.js +30 -0
  86. sky/dashboard/out/_next/static/chunks/7615-019513abc55b3b47.js +1 -0
  87. sky/dashboard/out/_next/static/chunks/8640.5b9475a2d18c5416.js +16 -0
  88. sky/dashboard/out/_next/static/chunks/8969-452f9d5cbdd2dc73.js +1 -0
  89. sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +6 -0
  90. sky/dashboard/out/_next/static/chunks/9353-cff34f7e773b2e2b.js +1 -0
  91. sky/dashboard/out/_next/static/chunks/9360.a536cf6b1fa42355.js +31 -0
  92. sky/dashboard/out/_next/static/chunks/9847.3aaca6bb33455140.js +30 -0
  93. sky/dashboard/out/_next/static/chunks/pages/{_app-ce361c6959bc2001.js → _app-bde01e4a2beec258.js} +1 -1
  94. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-792db96d918c98c9.js +16 -0
  95. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-abfcac9c137aa543.js +1 -0
  96. sky/dashboard/out/_next/static/chunks/pages/clusters-ee39056f9851a3ff.js +1 -0
  97. sky/dashboard/out/_next/static/chunks/pages/infra/{[context]-6563820e094f68ca.js → [context]-c0b5935149902e6f.js} +1 -1
  98. sky/dashboard/out/_next/static/chunks/pages/{infra-aabba60d57826e0f.js → infra-aed0ea19df7cf961.js} +1 -1
  99. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-d66997e2bfc837cf.js +16 -0
  100. sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-9faf940b253e3e06.js +21 -0
  101. sky/dashboard/out/_next/static/chunks/pages/jobs-2072b48b617989c9.js +1 -0
  102. sky/dashboard/out/_next/static/chunks/pages/users-f42674164aa73423.js +1 -0
  103. sky/dashboard/out/_next/static/chunks/pages/volumes-b84b948ff357c43e.js +1 -0
  104. sky/dashboard/out/_next/static/chunks/pages/workspaces/{[name]-af76bb06dbb3954f.js → [name]-84a40f8c7c627fe4.js} +1 -1
  105. sky/dashboard/out/_next/static/chunks/pages/{workspaces-7598c33a746cdc91.js → workspaces-531b2f8c4bf89f82.js} +1 -1
  106. sky/dashboard/out/_next/static/chunks/webpack-64e05f17bf2cf8ce.js +1 -0
  107. sky/dashboard/out/_next/static/css/0748ce22df867032.css +3 -0
  108. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  109. sky/dashboard/out/clusters/[cluster].html +1 -1
  110. sky/dashboard/out/clusters.html +1 -1
  111. sky/dashboard/out/config.html +1 -1
  112. sky/dashboard/out/index.html +1 -1
  113. sky/dashboard/out/infra/[context].html +1 -1
  114. sky/dashboard/out/infra.html +1 -1
  115. sky/dashboard/out/jobs/[job].html +1 -1
  116. sky/dashboard/out/jobs/pools/[pool].html +1 -1
  117. sky/dashboard/out/jobs.html +1 -1
  118. sky/dashboard/out/users.html +1 -1
  119. sky/dashboard/out/volumes.html +1 -1
  120. sky/dashboard/out/workspace/new.html +1 -1
  121. sky/dashboard/out/workspaces/[name].html +1 -1
  122. sky/dashboard/out/workspaces.html +1 -1
  123. sky/data/data_utils.py +92 -1
  124. sky/data/mounting_utils.py +162 -29
  125. sky/data/storage.py +200 -19
  126. sky/data/storage_utils.py +10 -45
  127. sky/exceptions.py +18 -7
  128. sky/execution.py +74 -31
  129. sky/global_user_state.py +605 -191
  130. sky/jobs/__init__.py +2 -0
  131. sky/jobs/client/sdk.py +101 -4
  132. sky/jobs/client/sdk_async.py +31 -5
  133. sky/jobs/constants.py +15 -8
  134. sky/jobs/controller.py +726 -284
  135. sky/jobs/file_content_utils.py +128 -0
  136. sky/jobs/log_gc.py +193 -0
  137. sky/jobs/recovery_strategy.py +250 -100
  138. sky/jobs/scheduler.py +271 -173
  139. sky/jobs/server/core.py +367 -114
  140. sky/jobs/server/server.py +81 -35
  141. sky/jobs/server/utils.py +89 -35
  142. sky/jobs/state.py +1498 -620
  143. sky/jobs/utils.py +771 -306
  144. sky/logs/agent.py +40 -5
  145. sky/logs/aws.py +9 -19
  146. sky/metrics/utils.py +282 -39
  147. sky/optimizer.py +1 -1
  148. sky/provision/__init__.py +37 -1
  149. sky/provision/aws/config.py +34 -13
  150. sky/provision/aws/instance.py +5 -2
  151. sky/provision/azure/instance.py +5 -3
  152. sky/provision/common.py +2 -0
  153. sky/provision/cudo/instance.py +4 -3
  154. sky/provision/do/instance.py +4 -3
  155. sky/provision/docker_utils.py +97 -26
  156. sky/provision/fluidstack/instance.py +6 -5
  157. sky/provision/gcp/config.py +6 -1
  158. sky/provision/gcp/instance.py +4 -2
  159. sky/provision/hyperbolic/instance.py +4 -2
  160. sky/provision/instance_setup.py +66 -20
  161. sky/provision/kubernetes/__init__.py +2 -0
  162. sky/provision/kubernetes/config.py +7 -44
  163. sky/provision/kubernetes/constants.py +0 -1
  164. sky/provision/kubernetes/instance.py +609 -213
  165. sky/provision/kubernetes/manifests/fusermount-server-daemonset.yaml +1 -2
  166. sky/provision/kubernetes/network.py +12 -8
  167. sky/provision/kubernetes/network_utils.py +8 -25
  168. sky/provision/kubernetes/utils.py +382 -418
  169. sky/provision/kubernetes/volume.py +150 -18
  170. sky/provision/lambda_cloud/instance.py +16 -13
  171. sky/provision/nebius/instance.py +6 -2
  172. sky/provision/nebius/utils.py +103 -86
  173. sky/provision/oci/instance.py +4 -2
  174. sky/provision/paperspace/instance.py +4 -3
  175. sky/provision/primeintellect/__init__.py +10 -0
  176. sky/provision/primeintellect/config.py +11 -0
  177. sky/provision/primeintellect/instance.py +454 -0
  178. sky/provision/primeintellect/utils.py +398 -0
  179. sky/provision/provisioner.py +30 -9
  180. sky/provision/runpod/__init__.py +2 -0
  181. sky/provision/runpod/instance.py +4 -3
  182. sky/provision/runpod/volume.py +69 -13
  183. sky/provision/scp/instance.py +307 -130
  184. sky/provision/seeweb/__init__.py +11 -0
  185. sky/provision/seeweb/config.py +13 -0
  186. sky/provision/seeweb/instance.py +812 -0
  187. sky/provision/shadeform/__init__.py +11 -0
  188. sky/provision/shadeform/config.py +12 -0
  189. sky/provision/shadeform/instance.py +351 -0
  190. sky/provision/shadeform/shadeform_utils.py +83 -0
  191. sky/provision/vast/instance.py +5 -3
  192. sky/provision/volume.py +164 -0
  193. sky/provision/vsphere/common/ssl_helper.py +1 -1
  194. sky/provision/vsphere/common/vapiconnect.py +2 -1
  195. sky/provision/vsphere/common/vim_utils.py +3 -2
  196. sky/provision/vsphere/instance.py +8 -6
  197. sky/provision/vsphere/vsphere_utils.py +8 -1
  198. sky/resources.py +11 -3
  199. sky/schemas/api/responses.py +107 -6
  200. sky/schemas/db/global_user_state/008_skylet_ssh_tunnel_metadata.py +34 -0
  201. sky/schemas/db/global_user_state/009_last_activity_and_launched_at.py +89 -0
  202. sky/schemas/db/global_user_state/010_save_ssh_key.py +66 -0
  203. sky/schemas/db/global_user_state/011_is_ephemeral.py +34 -0
  204. sky/schemas/db/kv_cache/001_initial_schema.py +29 -0
  205. sky/schemas/db/serve_state/002_yaml_content.py +34 -0
  206. sky/schemas/db/skypilot_config/001_initial_schema.py +30 -0
  207. sky/schemas/db/spot_jobs/002_cluster_pool.py +3 -3
  208. sky/schemas/db/spot_jobs/004_job_file_contents.py +42 -0
  209. sky/schemas/db/spot_jobs/005_logs_gc.py +38 -0
  210. sky/schemas/db/spot_jobs/006_controller_pid_started_at.py +34 -0
  211. sky/schemas/db/spot_jobs/007_config_file_content.py +34 -0
  212. sky/schemas/generated/jobsv1_pb2.py +86 -0
  213. sky/schemas/generated/jobsv1_pb2.pyi +254 -0
  214. sky/schemas/generated/jobsv1_pb2_grpc.py +542 -0
  215. sky/schemas/generated/managed_jobsv1_pb2.py +76 -0
  216. sky/schemas/generated/managed_jobsv1_pb2.pyi +278 -0
  217. sky/schemas/generated/managed_jobsv1_pb2_grpc.py +278 -0
  218. sky/schemas/generated/servev1_pb2.py +58 -0
  219. sky/schemas/generated/servev1_pb2.pyi +115 -0
  220. sky/schemas/generated/servev1_pb2_grpc.py +322 -0
  221. sky/serve/autoscalers.py +2 -0
  222. sky/serve/client/impl.py +55 -21
  223. sky/serve/constants.py +4 -3
  224. sky/serve/controller.py +17 -11
  225. sky/serve/load_balancing_policies.py +1 -1
  226. sky/serve/replica_managers.py +219 -142
  227. sky/serve/serve_rpc_utils.py +179 -0
  228. sky/serve/serve_state.py +63 -54
  229. sky/serve/serve_utils.py +145 -109
  230. sky/serve/server/core.py +46 -25
  231. sky/serve/server/impl.py +311 -162
  232. sky/serve/server/server.py +21 -19
  233. sky/serve/service.py +84 -68
  234. sky/serve/service_spec.py +45 -7
  235. sky/server/auth/loopback.py +38 -0
  236. sky/server/auth/oauth2_proxy.py +12 -7
  237. sky/server/common.py +47 -24
  238. sky/server/config.py +62 -28
  239. sky/server/constants.py +9 -1
  240. sky/server/daemons.py +109 -38
  241. sky/server/metrics.py +76 -96
  242. sky/server/middleware_utils.py +166 -0
  243. sky/server/requests/executor.py +381 -145
  244. sky/server/requests/payloads.py +71 -18
  245. sky/server/requests/preconditions.py +15 -13
  246. sky/server/requests/request_names.py +121 -0
  247. sky/server/requests/requests.py +507 -157
  248. sky/server/requests/serializers/decoders.py +48 -17
  249. sky/server/requests/serializers/encoders.py +85 -20
  250. sky/server/requests/threads.py +117 -0
  251. sky/server/rest.py +116 -24
  252. sky/server/server.py +420 -172
  253. sky/server/stream_utils.py +219 -45
  254. sky/server/uvicorn.py +30 -19
  255. sky/setup_files/MANIFEST.in +6 -1
  256. sky/setup_files/alembic.ini +8 -0
  257. sky/setup_files/dependencies.py +62 -19
  258. sky/setup_files/setup.py +44 -44
  259. sky/sky_logging.py +13 -5
  260. sky/skylet/attempt_skylet.py +106 -24
  261. sky/skylet/configs.py +3 -1
  262. sky/skylet/constants.py +111 -26
  263. sky/skylet/events.py +64 -10
  264. sky/skylet/job_lib.py +141 -104
  265. sky/skylet/log_lib.py +233 -5
  266. sky/skylet/log_lib.pyi +40 -2
  267. sky/skylet/providers/ibm/node_provider.py +12 -8
  268. sky/skylet/providers/ibm/vpc_provider.py +13 -12
  269. sky/skylet/runtime_utils.py +21 -0
  270. sky/skylet/services.py +524 -0
  271. sky/skylet/skylet.py +22 -1
  272. sky/skylet/subprocess_daemon.py +104 -29
  273. sky/skypilot_config.py +99 -79
  274. sky/ssh_node_pools/server.py +9 -8
  275. sky/task.py +221 -104
  276. sky/templates/aws-ray.yml.j2 +1 -0
  277. sky/templates/azure-ray.yml.j2 +1 -0
  278. sky/templates/cudo-ray.yml.j2 +1 -0
  279. sky/templates/do-ray.yml.j2 +1 -0
  280. sky/templates/fluidstack-ray.yml.j2 +1 -0
  281. sky/templates/gcp-ray.yml.j2 +1 -0
  282. sky/templates/hyperbolic-ray.yml.j2 +1 -0
  283. sky/templates/ibm-ray.yml.j2 +2 -1
  284. sky/templates/jobs-controller.yaml.j2 +3 -0
  285. sky/templates/kubernetes-ray.yml.j2 +196 -55
  286. sky/templates/lambda-ray.yml.j2 +1 -0
  287. sky/templates/nebius-ray.yml.j2 +3 -0
  288. sky/templates/oci-ray.yml.j2 +1 -0
  289. sky/templates/paperspace-ray.yml.j2 +1 -0
  290. sky/templates/primeintellect-ray.yml.j2 +72 -0
  291. sky/templates/runpod-ray.yml.j2 +1 -0
  292. sky/templates/scp-ray.yml.j2 +1 -0
  293. sky/templates/seeweb-ray.yml.j2 +171 -0
  294. sky/templates/shadeform-ray.yml.j2 +73 -0
  295. sky/templates/vast-ray.yml.j2 +1 -0
  296. sky/templates/vsphere-ray.yml.j2 +1 -0
  297. sky/templates/websocket_proxy.py +188 -43
  298. sky/usage/usage_lib.py +16 -4
  299. sky/users/permission.py +60 -43
  300. sky/utils/accelerator_registry.py +6 -3
  301. sky/utils/admin_policy_utils.py +18 -5
  302. sky/utils/annotations.py +22 -0
  303. sky/utils/asyncio_utils.py +78 -0
  304. sky/utils/atomic.py +1 -1
  305. sky/utils/auth_utils.py +153 -0
  306. sky/utils/cli_utils/status_utils.py +12 -7
  307. sky/utils/cluster_utils.py +28 -6
  308. sky/utils/command_runner.py +88 -27
  309. sky/utils/command_runner.pyi +36 -3
  310. sky/utils/common.py +3 -1
  311. sky/utils/common_utils.py +37 -4
  312. sky/utils/config_utils.py +1 -14
  313. sky/utils/context.py +127 -40
  314. sky/utils/context_utils.py +73 -18
  315. sky/utils/controller_utils.py +229 -70
  316. sky/utils/db/db_utils.py +95 -18
  317. sky/utils/db/kv_cache.py +149 -0
  318. sky/utils/db/migration_utils.py +24 -7
  319. sky/utils/env_options.py +4 -0
  320. sky/utils/git.py +559 -1
  321. sky/utils/kubernetes/create_cluster.sh +15 -30
  322. sky/utils/kubernetes/delete_cluster.sh +10 -7
  323. sky/utils/kubernetes/{deploy_remote_cluster.py → deploy_ssh_node_pools.py} +258 -380
  324. sky/utils/kubernetes/generate_kind_config.py +6 -66
  325. sky/utils/kubernetes/gpu_labeler.py +13 -3
  326. sky/utils/kubernetes/k8s_gpu_labeler_job.yaml +2 -1
  327. sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml +16 -16
  328. sky/utils/kubernetes/kubernetes_deploy_utils.py +213 -194
  329. sky/utils/kubernetes/rsync_helper.sh +11 -3
  330. sky/utils/kubernetes_enums.py +7 -15
  331. sky/utils/lock_events.py +4 -4
  332. sky/utils/locks.py +128 -31
  333. sky/utils/log_utils.py +0 -319
  334. sky/utils/resource_checker.py +13 -10
  335. sky/utils/resources_utils.py +53 -29
  336. sky/utils/rich_utils.py +8 -4
  337. sky/utils/schemas.py +107 -52
  338. sky/utils/subprocess_utils.py +17 -4
  339. sky/utils/thread_utils.py +91 -0
  340. sky/utils/timeline.py +2 -1
  341. sky/utils/ux_utils.py +35 -1
  342. sky/utils/volume.py +88 -4
  343. sky/utils/yaml_utils.py +9 -0
  344. sky/volumes/client/sdk.py +48 -10
  345. sky/volumes/server/core.py +59 -22
  346. sky/volumes/server/server.py +46 -17
  347. sky/volumes/volume.py +54 -42
  348. sky/workspaces/core.py +57 -21
  349. sky/workspaces/server.py +13 -12
  350. sky_templates/README.md +3 -0
  351. sky_templates/__init__.py +3 -0
  352. sky_templates/ray/__init__.py +0 -0
  353. sky_templates/ray/start_cluster +183 -0
  354. sky_templates/ray/stop_cluster +75 -0
  355. {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/METADATA +331 -65
  356. skypilot_nightly-1.0.0.dev20251203.dist-info/RECORD +611 -0
  357. skypilot_nightly-1.0.0.dev20251203.dist-info/top_level.txt +2 -0
  358. sky/client/cli/git.py +0 -549
  359. sky/dashboard/out/_next/static/chunks/1121-408ed10b2f9fce17.js +0 -1
  360. sky/dashboard/out/_next/static/chunks/1141-943efc7aff0f0c06.js +0 -1
  361. sky/dashboard/out/_next/static/chunks/1836-37fede578e2da5f8.js +0 -40
  362. sky/dashboard/out/_next/static/chunks/3015-86cabed5d4669ad0.js +0 -1
  363. sky/dashboard/out/_next/static/chunks/3294.c80326aec9bfed40.js +0 -6
  364. sky/dashboard/out/_next/static/chunks/3785.4872a2f3aa489880.js +0 -1
  365. sky/dashboard/out/_next/static/chunks/4045.b30465273dc5e468.js +0 -21
  366. sky/dashboard/out/_next/static/chunks/4676-9da7fdbde90b5549.js +0 -10
  367. sky/dashboard/out/_next/static/chunks/4725.10f7a9a5d3ea8208.js +0 -1
  368. sky/dashboard/out/_next/static/chunks/5339.3fda4a4010ff4e06.js +0 -51
  369. sky/dashboard/out/_next/static/chunks/6135-4b4d5e824b7f9d3c.js +0 -1
  370. sky/dashboard/out/_next/static/chunks/649.b9d7f7d10c1b8c53.js +0 -45
  371. sky/dashboard/out/_next/static/chunks/6856-dca7962af4814e1b.js +0 -1
  372. sky/dashboard/out/_next/static/chunks/6990-08b2a1cae076a943.js +0 -1
  373. sky/dashboard/out/_next/static/chunks/7325.b4bc99ce0892dcd5.js +0 -6
  374. sky/dashboard/out/_next/static/chunks/754-d0da8ab45f9509e9.js +0 -18
  375. sky/dashboard/out/_next/static/chunks/7669.1f5d9a402bf5cc42.js +0 -36
  376. sky/dashboard/out/_next/static/chunks/8969-0be3036bf86f8256.js +0 -1
  377. sky/dashboard/out/_next/static/chunks/9025.c12318fb6a1a9093.js +0 -6
  378. sky/dashboard/out/_next/static/chunks/9037-fa1737818d0a0969.js +0 -6
  379. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-1cbba24bd1bd35f8.js +0 -16
  380. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-0b4b35dc1dfe046c.js +0 -16
  381. sky/dashboard/out/_next/static/chunks/pages/clusters-469814d711d63b1b.js +0 -1
  382. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-dd64309c3fe67ed2.js +0 -11
  383. sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-07349868f7905d37.js +0 -16
  384. sky/dashboard/out/_next/static/chunks/pages/jobs-1f70d9faa564804f.js +0 -1
  385. sky/dashboard/out/_next/static/chunks/pages/users-018bf31cda52e11b.js +0 -1
  386. sky/dashboard/out/_next/static/chunks/pages/volumes-739726d6b823f532.js +0 -1
  387. sky/dashboard/out/_next/static/chunks/webpack-4fe903277b57b523.js +0 -1
  388. sky/dashboard/out/_next/static/css/4614e06482d7309e.css +0 -3
  389. sky/dashboard/out/_next/static/mS-4qZPSkRuA1u-g2wQhg/_buildManifest.js +0 -1
  390. sky/templates/kubernetes-ssh-jump.yml.j2 +0 -94
  391. sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +0 -191
  392. skypilot_nightly-1.0.0.dev20250905.dist-info/RECORD +0 -547
  393. skypilot_nightly-1.0.0.dev20250905.dist-info/top_level.txt +0 -1
  394. /sky/dashboard/out/_next/static/{mS-4qZPSkRuA1u-g2wQhg → 96_E2yl3QAiIJGOYCkSpB}/_ssgManifest.js +0 -0
  395. {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/WHEEL +0 -0
  396. {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/entry_points.txt +0 -0
  397. {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/licenses/LICENSE +0 -0
sky/utils/git.py CHANGED
@@ -1,4 +1,23 @@
1
- """Git related constants."""
1
+ """Git utilities for SkyPilot."""
2
+
3
+ import enum
4
+ import os
5
+ import re
6
+ import typing
7
+ from typing import List, Optional, Union
8
+
9
+ import requests
10
+
11
+ from sky import exceptions
12
+ from sky import sky_logging
13
+ from sky.adaptors import common as adaptors_common
14
+
15
+ logger = sky_logging.init_logger(__name__)
16
+
17
+ if typing.TYPE_CHECKING:
18
+ import git
19
+ else:
20
+ git = adaptors_common.LazyImport('git')
2
21
 
3
22
  GIT_TOKEN_ENV_VAR = 'GIT_TOKEN'
4
23
  GIT_SSH_KEY_PATH_ENV_VAR = 'GIT_SSH_KEY_PATH'
@@ -7,3 +26,542 @@ GIT_URL_ENV_VAR = 'GIT_URL'
7
26
  GIT_COMMIT_HASH_ENV_VAR = 'GIT_COMMIT_HASH'
8
27
  GIT_BRANCH_ENV_VAR = 'GIT_BRANCH'
9
28
  GIT_TAG_ENV_VAR = 'GIT_TAG'
29
+
30
+
31
+ class GitRefType(enum.Enum):
32
+ """Type of git reference."""
33
+
34
+ BRANCH = 'branch'
35
+ TAG = 'tag'
36
+ COMMIT = 'commit'
37
+
38
+
39
+ class GitUrlInfo:
40
+ """Information extracted from a git URL."""
41
+
42
+ def __init__(self,
43
+ host: str,
44
+ path: str,
45
+ protocol: str,
46
+ user: Optional[str] = None,
47
+ port: Optional[int] = None):
48
+ self.host = host
49
+ # Repository path (e.g., 'user/repo' or 'org/subgroup/repo').
50
+ # The path is the part after the host.
51
+ self.path = path
52
+ # 'https', 'ssh'
53
+ self.protocol = protocol
54
+ # SSH username
55
+ self.user = user
56
+ self.port = port
57
+
58
+
59
+ class GitCloneInfo:
60
+ """Information about a git clone."""
61
+
62
+ def __init__(self,
63
+ url: str,
64
+ envs: Optional[dict] = None,
65
+ token: Optional[str] = None,
66
+ ssh_key: Optional[str] = None):
67
+ self.url = url
68
+ self.envs = envs
69
+ self.token = token
70
+ self.ssh_key = ssh_key
71
+
72
+
73
+ class GitRepo:
74
+ """Git utilities for SkyPilot."""
75
+
76
+ def __init__(self,
77
+ repo_url: str,
78
+ ref: str = 'main',
79
+ git_token: Optional[str] = None,
80
+ git_ssh_key_path: Optional[str] = None):
81
+ """Initialize Git utility.
82
+
83
+ Args:
84
+ repo_url: Git repository URL.
85
+ ref: Git reference (branch, tag, or commit hash).
86
+ git_token: GitHub token for private repositories.
87
+ git_ssh_key_path: Path to SSH private key for authentication.
88
+ """
89
+ self.repo_url = repo_url
90
+ self.ref = ref
91
+ self.git_token = git_token
92
+ self.git_ssh_key_path = git_ssh_key_path
93
+
94
+ # Parse URL during initialization to catch format errors early
95
+ self._parsed_url = self._parse_git_url(self.repo_url)
96
+
97
+ def _parse_git_url(self, url: str) -> GitUrlInfo:
98
+ """Parse git URL into components.
99
+
100
+ Supports various git URL formats:
101
+ - HTTPS: https://github.com/user/repo.git
102
+ - SSH: git@github.com:user/repo.git (SCP-like)
103
+ - SSH full: ssh://git@github.com/user/repo.git
104
+ - SSH with port: ssh://git@github.com:2222/user/repo.git
105
+
106
+ Args:
107
+ url: Git repository URL in any supported format.
108
+
109
+ Returns:
110
+ GitUrlInfo with parsed components.
111
+
112
+ Raises:
113
+ exceptions.GitError: If URL format is not supported.
114
+ """
115
+ # Remove trailing .git if present
116
+ clean_url = url.rstrip('/')
117
+ if clean_url.endswith('.git'):
118
+ clean_url = clean_url[:-4]
119
+
120
+ # Pattern for HTTPS/HTTP URLs
121
+ https_pattern = r'^(https?)://(?:([^@]+)@)?([^:/]+)(?::(\d+))?/(.+)$'
122
+ https_match = re.match(https_pattern, clean_url)
123
+
124
+ if https_match:
125
+ protocol, user, host, port_str, path = https_match.groups()
126
+ port = int(port_str) if port_str else None
127
+
128
+ # Validate that path is not empty
129
+ if not path or path == '/':
130
+ raise exceptions.GitError(
131
+ f'Invalid repository path in URL: {url}')
132
+
133
+ return GitUrlInfo(host=host,
134
+ path=path,
135
+ protocol=protocol,
136
+ user=user,
137
+ port=port)
138
+
139
+ # Pattern for SSH URLs (full format)
140
+ ssh_full_pattern = r'^ssh://(?:([^@]+)@)?([^:/]+)(?::(\d+))?/(.+)$'
141
+ ssh_full_match = re.match(ssh_full_pattern, clean_url)
142
+
143
+ if ssh_full_match:
144
+ user, host, port_str, path = ssh_full_match.groups()
145
+ port = int(port_str) if port_str else None
146
+
147
+ # Validate that path is not empty
148
+ if not path or path == '/':
149
+ raise exceptions.GitError(
150
+ f'Invalid repository path in SSH URL: {url}')
151
+
152
+ return GitUrlInfo(host=host,
153
+ path=path,
154
+ protocol='ssh',
155
+ user=user,
156
+ port=port)
157
+
158
+ # Pattern for SSH SCP-like format (exclude URLs with ://)
159
+ scp_pattern = r'^(?:([^@]+)@)?([^:/]+):(.+)$'
160
+ scp_match = re.match(scp_pattern, clean_url)
161
+
162
+ # Make sure it's not a URL with protocol (should not contain ://)
163
+ if scp_match and '://' not in clean_url:
164
+ user, host, path = scp_match.groups()
165
+
166
+ # Validate that path is not empty
167
+ if not path:
168
+ raise exceptions.GitError(
169
+ f'Invalid repository path in SSH URL: {url}')
170
+
171
+ return GitUrlInfo(host=host,
172
+ path=path,
173
+ protocol='ssh',
174
+ user=user,
175
+ port=None)
176
+
177
+ raise exceptions.GitError(
178
+ f'Unsupported git URL format: {url}. '
179
+ 'Supported formats: https://host/owner/repo, '
180
+ 'ssh://user@host/owner/repo, user@host:owner/repo')
181
+
182
+ def get_https_url(self, with_token: bool = False) -> str:
183
+ """Get HTTPS URL for the repository.
184
+
185
+ Args:
186
+ with_token: If True, includes token in URL for authentication
187
+
188
+ Returns:
189
+ HTTPS URL string.
190
+ """
191
+ port_str = f':{self._parsed_url.port}' if self._parsed_url.port else ''
192
+ path = self._parsed_url.path
193
+ # Remove .git suffix if present (but not individual characters)
194
+ if path.endswith('.git'):
195
+ path = path[:-4]
196
+
197
+ if with_token and self.git_token:
198
+ return f'https://{self.git_token}@{self._parsed_url.host}' \
199
+ f'{port_str}/{path}.git'
200
+ return f'https://{self._parsed_url.host}{port_str}/{path}.git'
201
+
202
+ def get_ssh_url(self) -> str:
203
+ """Get SSH URL for the repository in full format.
204
+
205
+ Returns:
206
+ SSH URL string in full format.
207
+ """
208
+ # Use original user from URL, or default to 'git'
209
+ ssh_user = self._parsed_url.user or 'git'
210
+ port_str = f':{self._parsed_url.port}' if self._parsed_url.port else ''
211
+ path = self._parsed_url.path
212
+ # Remove .git suffix if present (but not individual characters)
213
+ if path.endswith('.git'):
214
+ path = path[:-4]
215
+ return f'ssh://{ssh_user}@{self._parsed_url.host}{port_str}/{path}.git'
216
+
217
+ def get_repo_clone_info(self) -> GitCloneInfo:
218
+ """Validate the repository access with comprehensive authentication
219
+ and return the appropriate clone info.
220
+
221
+ This method implements a sequential validation approach:
222
+ 1. Try public access (no authentication)
223
+ 2. If has token and URL is https, try token access
224
+ 3. If URL is ssh, try ssh access with user provided ssh key or
225
+ default ssh credential
226
+
227
+ Returns:
228
+ GitCloneInfo instance with successful access method.
229
+
230
+ Raises:
231
+ exceptions.GitError: If the git URL format is invalid or
232
+ the repository cannot be accessed.
233
+ """
234
+ logger.debug(f'Validating access to {self._parsed_url.host}'
235
+ f'/{self._parsed_url.path}')
236
+
237
+ # Step 1: Try public access first (most common case)
238
+ try:
239
+ https_url = self.get_https_url()
240
+ logger.debug(f'Trying public HTTPS access to {https_url}')
241
+
242
+ # Use /info/refs endpoint to check public access.
243
+ # This is more reliable than git ls-remote as it doesn't
244
+ # use local git config.
245
+ stripped_url = https_url.rstrip('/')
246
+ info_refs_url = f'{stripped_url}/info/refs?service=git-upload-pack'
247
+
248
+ # Make a simple HTTP request without any authentication
249
+ response = requests.get(
250
+ info_refs_url,
251
+ timeout=10,
252
+ allow_redirects=True,
253
+ # Ensure no local credentials are used
254
+ auth=None)
255
+
256
+ if response.status_code == 200:
257
+ logger.info(
258
+ f'Successfully validated repository {https_url} access '
259
+ 'using public access')
260
+ return GitCloneInfo(url=https_url)
261
+ except Exception as e: # pylint: disable=broad-except
262
+ logger.debug(f'Public access failed: {str(e)}')
263
+
264
+ # Step 2: Try with token if provided
265
+ if self.git_token and self._parsed_url.protocol == 'https':
266
+ try:
267
+ https_url = self.get_https_url()
268
+ auth_url = self.get_https_url(with_token=True)
269
+ logger.debug(f'Trying token authentication to {https_url}')
270
+ git_cmd = git.cmd.Git()
271
+ git_cmd.ls_remote(auth_url)
272
+ logger.info(
273
+ f'Successfully validated repository {https_url} access '
274
+ 'using token authentication')
275
+ return GitCloneInfo(url=https_url, token=self.git_token)
276
+ except Exception as e:
277
+ logger.info(f'Token access failed: {str(e)}')
278
+ raise exceptions.GitError(
279
+ f'Failed to access repository {self.repo_url} using token '
280
+ 'authentication. Please verify your token and repository '
281
+ f'access permissions. Original error: {str(e)}') from e
282
+
283
+ # Step 3: Try SSH access with available keys
284
+ if self._parsed_url.protocol == 'ssh':
285
+ try:
286
+ ssh_url = self.get_ssh_url()
287
+
288
+ # Get SSH key info using the combined method
289
+ ssh_key_info = self._get_ssh_key_info()
290
+
291
+ if ssh_key_info:
292
+ key_path, key_content = ssh_key_info
293
+ git_ssh_command = f'ssh -F none -i {key_path} ' \
294
+ '-o StrictHostKeyChecking=no ' \
295
+ '-o UserKnownHostsFile=/dev/null ' \
296
+ '-o IdentitiesOnly=yes'
297
+ ssh_env = {'GIT_SSH_COMMAND': git_ssh_command}
298
+
299
+ logger.debug(f'Trying SSH authentication to {ssh_url} '
300
+ f'with {key_path}')
301
+ git_cmd = git.cmd.Git()
302
+ git_cmd.update_environment(**ssh_env)
303
+ git_cmd.ls_remote(ssh_url)
304
+ logger.info(
305
+ f'Successfully validated repository {ssh_url} access '
306
+ f'using SSH key: {key_path}')
307
+ return GitCloneInfo(url=ssh_url,
308
+ ssh_key=key_content,
309
+ envs=ssh_env)
310
+ else:
311
+ raise exceptions.GitError(
312
+ f'No SSH keys found for {self.repo_url}.')
313
+ except Exception as e: # pylint: disable=broad-except
314
+ raise exceptions.GitError(
315
+ f'Failed to access repository {self.repo_url} using '
316
+ 'SSH key authentication. Please verify your SSH key and '
317
+ 'repository access permissions. '
318
+ f'Original error: {str(e)}') from e
319
+
320
+ # If we get here, no authentication methods are available
321
+ raise exceptions.GitError(
322
+ f'Failed to access repository {self.repo_url}. '
323
+ 'If this is a private repository, please provide authentication'
324
+ f' using either: GIT_TOKEN for token-based access, or'
325
+ f' GIT_SSH_KEY_PATH for SSH access.')
326
+
327
+ def _parse_ssh_config(self) -> Optional[str]:
328
+ """Parse SSH config file to find IdentityFile for the target host.
329
+
330
+ Returns:
331
+ Path to SSH private key specified in config, or None if not found.
332
+ """
333
+ ssh_config_path = os.path.expanduser('~/.ssh/config')
334
+ if not os.path.exists(ssh_config_path):
335
+ logger.debug('SSH config file ~/.ssh/config does not exist')
336
+ return None
337
+
338
+ try:
339
+ # Try to use paramiko's SSH config parser if available
340
+ try:
341
+ import paramiko # pylint: disable=import-outside-toplevel
342
+ ssh_config = paramiko.SSHConfig()
343
+ with open(ssh_config_path, 'r', encoding='utf-8') as f:
344
+ ssh_config.parse(f)
345
+ # Get config for the target host
346
+ host_config = ssh_config.lookup(self._parsed_url.host)
347
+
348
+ # Look for identity files in the config
349
+ identity_files: Union[str, List[str]] = host_config.get(
350
+ 'identityfile', [])
351
+ if not isinstance(identity_files, list):
352
+ identity_files = [identity_files]
353
+
354
+ # Find the first existing identity file
355
+ for identity_file in identity_files:
356
+ key_path = os.path.expanduser(identity_file)
357
+ if os.path.exists(key_path):
358
+ logger.debug(f'Found SSH key in config for '
359
+ f'{self._parsed_url.host}: {key_path}')
360
+ return key_path
361
+
362
+ logger.debug(f'No valid SSH keys found in config for host: '
363
+ f'{self._parsed_url.host}')
364
+ return None
365
+
366
+ except ImportError:
367
+ logger.debug('paramiko not available')
368
+ return None
369
+
370
+ except Exception as e: # pylint: disable=broad-except
371
+ logger.debug(f'Error parsing SSH config: {str(e)}')
372
+ return None
373
+
374
+ def _get_ssh_key_info(self) -> Optional[tuple]:
375
+ """Get SSH key path and content using comprehensive strategy.
376
+
377
+ Strategy:
378
+ 1. Check provided git_ssh_key_path if given
379
+ 2. Check SSH config for host-specific IdentityFile
380
+ 3. Search for common SSH key types in ~/.ssh/ directory
381
+
382
+ Returns:
383
+ Tuple of (key_path, key_content) if found, None otherwise.
384
+ """
385
+ # Step 1: Check provided SSH key path first
386
+ if self.git_ssh_key_path:
387
+ try:
388
+ key_path = os.path.expanduser(self.git_ssh_key_path)
389
+
390
+ # Validate SSH key before using it
391
+ if not os.path.exists(key_path):
392
+ raise exceptions.GitError(
393
+ f'SSH key not found at path: {self.git_ssh_key_path}')
394
+
395
+ # Check key permissions
396
+ key_stat = os.stat(key_path)
397
+ if key_stat.st_mode & 0o077:
398
+ logger.warning(
399
+ f'SSH key {key_path} has too open permissions. '
400
+ f'Recommended: chmod 600 {key_path}')
401
+
402
+ # Check if it's a valid private key and read content
403
+ with open(key_path, 'r', encoding='utf-8') as f:
404
+ key_content = f.read()
405
+ if not (key_content.startswith('-----BEGIN') and
406
+ 'PRIVATE KEY' in key_content):
407
+ raise exceptions.GitError(
408
+ f'SSH key {key_path} is invalid.')
409
+
410
+ logger.debug(f'Using provided SSH key: {key_path}')
411
+ return (key_path, key_content)
412
+ except Exception as e: # pylint: disable=broad-except
413
+ raise exceptions.GitError(
414
+ f'Validate provided SSH key error: {str(e)}') from e
415
+
416
+ # Step 2: Check SSH config for host-specific configuration
417
+ config_key_path = self._parse_ssh_config()
418
+ if config_key_path:
419
+ try:
420
+ with open(config_key_path, 'r', encoding='utf-8') as f:
421
+ key_content = f.read()
422
+ logger.debug(f'Using SSH key from config: {config_key_path}')
423
+ return (config_key_path, key_content)
424
+ except Exception as e: # pylint: disable=broad-except
425
+ logger.debug(f'Could not read SSH key: {str(e)}')
426
+
427
+ # Step 3: Search for default SSH keys
428
+ ssh_dir = os.path.expanduser('~/.ssh')
429
+ if not os.path.exists(ssh_dir):
430
+ logger.debug('SSH directory ~/.ssh does not exist')
431
+ return None
432
+
433
+ # Common SSH key file names in order of preference
434
+ key_candidates = [
435
+ 'id_rsa', # Most common
436
+ 'id_ed25519', # Modern, recommended
437
+ ]
438
+
439
+ for key_name in key_candidates:
440
+ private_key_path = os.path.join(ssh_dir, key_name)
441
+
442
+ # Check if both private and public keys exist
443
+ if not os.path.exists(private_key_path):
444
+ continue
445
+
446
+ # Check private key permissions
447
+ try:
448
+ key_stat = os.stat(private_key_path)
449
+ if key_stat.st_mode & 0o077:
450
+ logger.warning(
451
+ f'SSH key {private_key_path} has too open permissions. '
452
+ f'Consider: chmod 600 {private_key_path}')
453
+
454
+ # Validate private key format and read content
455
+ with open(private_key_path, 'r', encoding='utf-8') as f:
456
+ key_content = f.read()
457
+ if not (key_content.startswith('-----BEGIN') and
458
+ 'PRIVATE KEY' in key_content):
459
+ logger.debug(f'SSH key {private_key_path} is invalid.')
460
+ continue
461
+
462
+ logger.debug(f'Discovered default SSH key: {private_key_path}')
463
+ return (private_key_path, key_content)
464
+
465
+ except Exception as e: # pylint: disable=broad-except
466
+ logger.debug(
467
+ f'Error checking SSH key {private_key_path}: {str(e)}')
468
+ continue
469
+
470
+ logger.debug('No suitable SSH keys found')
471
+ return None
472
+
473
+ def get_ref_type(self) -> GitRefType:
474
+ """Get the type of the reference.
475
+
476
+ Returns:
477
+ GitRefType.COMMIT if it's a commit hash,
478
+ GitRefType.BRANCH if it's a branch,
479
+ GitRefType.TAG if it's a tag.
480
+
481
+ Raises:
482
+ exceptions.GitError: If the reference is invalid.
483
+ """
484
+ clone_info = self.get_repo_clone_info()
485
+ git_cmd = git.cmd.Git()
486
+ if clone_info.envs:
487
+ git_cmd.update_environment(**clone_info.envs)
488
+
489
+ try:
490
+ # Get all remote refs
491
+ url = clone_info.url
492
+ if clone_info.token:
493
+ url = self.get_https_url(with_token=True)
494
+ refs = git_cmd.ls_remote(url).split('\n')
495
+
496
+ # Collect all commit hashes from refs
497
+ all_commit_hashes = set()
498
+
499
+ # Check if it's a branch or tag name
500
+ for ref in refs:
501
+ if not ref:
502
+ continue
503
+ hash_val, ref_name = ref.split('\t')
504
+
505
+ # Store the commit hash for later validation
506
+ all_commit_hashes.add(hash_val)
507
+
508
+ # Check if it's a branch
509
+ if ref_name.startswith(
510
+ 'refs/heads/') and ref_name[11:] == self.ref:
511
+ return GitRefType.BRANCH
512
+
513
+ # Check if it's a tag
514
+ if ref_name.startswith(
515
+ 'refs/tags/') and ref_name[10:] == self.ref:
516
+ return GitRefType.TAG
517
+
518
+ # If we get here, it's not a branch or tag name
519
+ # Check if it looks like a commit hash (hex string)
520
+ if len(self.ref) >= 4 and all(
521
+ c in '0123456789abcdef' for c in self.ref.lower()):
522
+ # First check if it's a complete match with any known commit
523
+ if self.ref in all_commit_hashes:
524
+ logger.debug(f'Found exact commit hash match: {self.ref}')
525
+ return GitRefType.COMMIT
526
+
527
+ # Check if it's a prefix match with any known commit
528
+ matching_commits = [
529
+ h for h in all_commit_hashes if h.startswith(self.ref)
530
+ ]
531
+ if len(matching_commits) == 1:
532
+ logger.debug(
533
+ f'Found commit hash prefix match: {self.ref} -> '
534
+ f'{matching_commits[0]}')
535
+ return GitRefType.COMMIT
536
+ elif len(matching_commits) > 1:
537
+ # Multiple matches - ambiguous
538
+ raise exceptions.GitError(
539
+ f'Ambiguous commit hash {self.ref!r}. '
540
+ f'Multiple commits match: '
541
+ f'{", ".join(matching_commits[:5])}...')
542
+
543
+ # If no match found in ls-remote output, we can't verify
544
+ # the commit exists. This could be a valid commit that's
545
+ # not at the tip of any branch/tag. We'll assume it's valid
546
+ # if it looks like a commit hash and let git handle validation
547
+ # during clone.
548
+ logger.debug(f'Commit hash not found in ls-remote output, '
549
+ f'assuming valid: {self.ref}')
550
+ logger.warning(
551
+ f'Cannot verify commit {self.ref} exists - it may be a '
552
+ 'commit in history not at any branch/tag tip')
553
+ return GitRefType.COMMIT
554
+
555
+ # If it's not a branch, tag, or hex string, it's invalid
556
+ raise exceptions.GitError(
557
+ f'Git reference {self.ref!r} not found. '
558
+ 'Please provide a valid branch, tag, or commit hash.')
559
+
560
+ except git.exc.GitCommandError as e:
561
+ if not (self.git_token or self.git_ssh_key_path):
562
+ raise exceptions.GitError(
563
+ 'Failed to check repository. If this is a private '
564
+ 'repository, please provide authentication using either '
565
+ 'GIT_TOKEN or GIT_SSH_KEY_PATH.') from e
566
+ raise exceptions.GitError(
567
+ f'Failed to check git reference: {str(e)}') from e
@@ -1,22 +1,19 @@
1
1
  #!/bin/bash
2
2
  # Creates a local Kubernetes cluster using kind with optional GPU support
3
- # Usage: ./create_cluster.sh [--gpus]
4
- # Invokes generate_kind_config.py to generate a kind-cluster.yaml with NodePort mappings
3
+ # Usage: ./create_cluster.sh [name] [yaml_path] [--gpus]
5
4
  set -e
6
5
 
7
6
  # Images
8
- IMAGE="us-central1-docker.pkg.dev/skypilot-375900/skypilotk8s/skypilot:latest"
9
- IMAGE_GPU="us-central1-docker.pkg.dev/skypilot-375900/skypilotk8s/skypilot-gpu:latest"
7
+ IMAGE="us-docker.pkg.dev/sky-dev-465/skypilotk8s/skypilot:latest"
8
+ IMAGE_GPU="us-docker.pkg.dev/sky-dev-465/skypilotk8s/skypilot-gpu:latest"
10
9
 
11
- # Limit port range to speed up kind cluster creation
12
- PORT_RANGE_START=30000
13
- PORT_RANGE_END=30100
14
-
15
- USER_HASH=$1
10
+ # Arguments
11
+ NAME=$1
12
+ YAML_PATH=$2
16
13
 
17
14
  # Check for GPU flag
18
15
  ENABLE_GPUS=false
19
- if [[ "$2" == "--gpus" ]]; then
16
+ if [[ "$3" == "--gpus" ]]; then
20
17
  ENABLE_GPUS=true
21
18
  fi
22
19
 
@@ -82,28 +79,16 @@ fi
82
79
  # ====== End of dependency checks =======
83
80
 
84
81
  # Check if the local cluster already exists
85
- if kind get clusters | grep -q skypilot; then
86
- echo "Local cluster already exists. Exiting."
82
+ if kind get clusters | grep -q $NAME; then
83
+ echo "Local cluster $NAME already exists. Exiting."
87
84
  # Switch context to the local cluster
88
- kind export kubeconfig --name skypilot
89
- kubectl config use-context kind-skypilot
85
+ kind export kubeconfig --name $NAME
86
+ kubectl config use-context kind-$NAME
90
87
  exit 100
91
88
  fi
92
89
 
93
- # Generate cluster YAML
94
- YAML_PATH="/tmp/skypilot-kind-$USER_HASH.yaml"
95
- echo "Generating $YAML_PATH"
96
-
97
- # Add GPUs flag to the generate_kind_config.py command if GPUs are enabled
98
- if $ENABLE_GPUS; then
99
- python -m sky.utils.kubernetes.generate_kind_config --path $YAML_PATH --port-start ${PORT_RANGE_START} --port-end ${PORT_RANGE_END} --gpus
100
- else
101
- python -m sky.utils.kubernetes.generate_kind_config --path $YAML_PATH --port-start ${PORT_RANGE_START} --port-end ${PORT_RANGE_END}
102
- fi
103
-
104
- kind create cluster --config $YAML_PATH --name skypilot
105
-
106
- echo "Kind cluster created."
90
+ kind create cluster --config $YAML_PATH --name $NAME
91
+ echo "Kind cluster $NAME created."
107
92
 
108
93
  # Function to wait for GPU operator to be correctly installed
109
94
  wait_for_gpu_operator_installation() {
@@ -157,7 +142,7 @@ if $ENABLE_GPUS; then
157
142
  echo "Enabling GPU support..."
158
143
  # Run patch for missing ldconfig.real
159
144
  # https://github.com/NVIDIA/nvidia-docker/issues/614#issuecomment-423991632
160
- docker exec -ti skypilot-control-plane /bin/bash -c '[ ! -f /sbin/ldconfig.real ] && ln -s /sbin/ldconfig /sbin/ldconfig.real || echo "/sbin/ldconfig.real already exists"'
145
+ docker exec -ti $NAME-control-plane /bin/bash -c '[ ! -f /sbin/ldconfig.real ] && ln -s /sbin/ldconfig /sbin/ldconfig.real || echo "/sbin/ldconfig.real already exists"'
161
146
 
162
147
  echo "Installing NVIDIA GPU operator..."
163
148
  # Install the NVIDIA GPU operator
@@ -185,4 +170,4 @@ if $ENABLE_GPUS; then
185
170
  echo "GPU support is enabled. Run 'sky show-gpus --cloud kubernetes' to see the GPUs available on the cluster."
186
171
  fi
187
172
  fi
188
- echo "Number of CPUs available on the local cluster: $NUM_CPUS"
173
+ echo "Number of CPUs available on the local cluster $NAME: $NUM_CPUS"
@@ -1,9 +1,12 @@
1
1
  #!/bin/bash
2
- # Deletes the local kind cluster
3
- # Usage: ./delete_cluster.sh
4
- # Raises error code 100 if the local cluster does not exist
2
+ # Deletes the local kind cluster of [name]
3
+ # Usage: ./delete_cluster.sh [name]
4
+ # Raises error code 100 if the specified local cluster does not exist
5
5
 
6
6
  set -e
7
+
8
+ NAME="${1:-skypilot}"
9
+
7
10
  # Check if docker is running
8
11
  if ! docker info > /dev/null 2>&1; then
9
12
  >&2 echo "Docker is not running. Please start Docker and try again."
@@ -17,13 +20,13 @@ if ! kind version > /dev/null 2>&1; then
17
20
  fi
18
21
 
19
22
  # Check if the local cluster exists
20
- if ! kind get clusters | grep -q skypilot; then
21
- echo "Local cluster does not exist. Exiting."
23
+ if ! kind get clusters | grep -q $NAME; then
24
+ echo "Local cluster $NAME does not exist. Exiting."
22
25
  exit 100
23
26
  fi
24
27
 
25
- kind delete cluster --name skypilot
26
- echo "Local cluster deleted!"
28
+ kind delete cluster --name $NAME
29
+ echo "Local cluster $NAME deleted!"
27
30
 
28
31
  # Switch to the first available context
29
32
  AVAILABLE_CONTEXT=$(kubectl config get-contexts -o name | head -n 1)