skypilot-nightly 1.0.0.dev20250905__py3-none-any.whl → 1.0.0.dev20251203__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (397) hide show
  1. sky/__init__.py +10 -2
  2. sky/adaptors/aws.py +81 -16
  3. sky/adaptors/common.py +25 -2
  4. sky/adaptors/coreweave.py +278 -0
  5. sky/adaptors/do.py +8 -2
  6. sky/adaptors/gcp.py +11 -0
  7. sky/adaptors/ibm.py +5 -2
  8. sky/adaptors/kubernetes.py +64 -0
  9. sky/adaptors/nebius.py +3 -1
  10. sky/adaptors/primeintellect.py +1 -0
  11. sky/adaptors/seeweb.py +183 -0
  12. sky/adaptors/shadeform.py +89 -0
  13. sky/admin_policy.py +20 -0
  14. sky/authentication.py +157 -263
  15. sky/backends/__init__.py +3 -2
  16. sky/backends/backend.py +11 -3
  17. sky/backends/backend_utils.py +588 -184
  18. sky/backends/cloud_vm_ray_backend.py +1088 -904
  19. sky/backends/local_docker_backend.py +9 -5
  20. sky/backends/task_codegen.py +633 -0
  21. sky/backends/wheel_utils.py +18 -0
  22. sky/catalog/__init__.py +8 -0
  23. sky/catalog/aws_catalog.py +4 -0
  24. sky/catalog/common.py +19 -1
  25. sky/catalog/data_fetchers/fetch_aws.py +102 -80
  26. sky/catalog/data_fetchers/fetch_gcp.py +30 -3
  27. sky/catalog/data_fetchers/fetch_nebius.py +9 -6
  28. sky/catalog/data_fetchers/fetch_runpod.py +698 -0
  29. sky/catalog/data_fetchers/fetch_seeweb.py +329 -0
  30. sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
  31. sky/catalog/kubernetes_catalog.py +24 -28
  32. sky/catalog/primeintellect_catalog.py +95 -0
  33. sky/catalog/runpod_catalog.py +5 -1
  34. sky/catalog/seeweb_catalog.py +184 -0
  35. sky/catalog/shadeform_catalog.py +165 -0
  36. sky/check.py +73 -43
  37. sky/client/cli/command.py +675 -412
  38. sky/client/cli/flags.py +4 -2
  39. sky/{volumes/utils.py → client/cli/table_utils.py} +111 -13
  40. sky/client/cli/utils.py +79 -0
  41. sky/client/common.py +12 -2
  42. sky/client/sdk.py +132 -63
  43. sky/client/sdk_async.py +34 -33
  44. sky/cloud_stores.py +82 -3
  45. sky/clouds/__init__.py +6 -0
  46. sky/clouds/aws.py +337 -129
  47. sky/clouds/azure.py +24 -18
  48. sky/clouds/cloud.py +40 -13
  49. sky/clouds/cudo.py +16 -13
  50. sky/clouds/do.py +9 -7
  51. sky/clouds/fluidstack.py +12 -5
  52. sky/clouds/gcp.py +14 -7
  53. sky/clouds/hyperbolic.py +12 -5
  54. sky/clouds/ibm.py +12 -5
  55. sky/clouds/kubernetes.py +80 -45
  56. sky/clouds/lambda_cloud.py +12 -5
  57. sky/clouds/nebius.py +23 -9
  58. sky/clouds/oci.py +19 -12
  59. sky/clouds/paperspace.py +4 -1
  60. sky/clouds/primeintellect.py +317 -0
  61. sky/clouds/runpod.py +85 -24
  62. sky/clouds/scp.py +12 -8
  63. sky/clouds/seeweb.py +477 -0
  64. sky/clouds/shadeform.py +400 -0
  65. sky/clouds/ssh.py +4 -2
  66. sky/clouds/utils/scp_utils.py +61 -50
  67. sky/clouds/vast.py +33 -27
  68. sky/clouds/vsphere.py +14 -16
  69. sky/core.py +174 -165
  70. sky/dashboard/out/404.html +1 -1
  71. sky/dashboard/out/_next/static/96_E2yl3QAiIJGOYCkSpB/_buildManifest.js +1 -0
  72. sky/dashboard/out/_next/static/chunks/1141-e6aa9ab418717c59.js +11 -0
  73. sky/dashboard/out/_next/static/chunks/1871-7e202677c42f43fe.js +6 -0
  74. sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +1 -0
  75. sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +15 -0
  76. sky/dashboard/out/_next/static/chunks/2755.edd818326d489a1d.js +26 -0
  77. sky/dashboard/out/_next/static/chunks/3294.20a8540fe697d5ee.js +1 -0
  78. sky/dashboard/out/_next/static/chunks/3785.7e245f318f9d1121.js +1 -0
  79. sky/dashboard/out/_next/static/chunks/{6601-06114c982db410b6.js → 3800-7b45f9fbb6308557.js} +1 -1
  80. sky/dashboard/out/_next/static/chunks/4725.172ede95d1b21022.js +1 -0
  81. sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +15 -0
  82. sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +13 -0
  83. sky/dashboard/out/_next/static/chunks/6856-8f27d1c10c98def8.js +1 -0
  84. sky/dashboard/out/_next/static/chunks/6990-9146207c4567fdfd.js +1 -0
  85. sky/dashboard/out/_next/static/chunks/7359-c8d04e06886000b3.js +30 -0
  86. sky/dashboard/out/_next/static/chunks/7615-019513abc55b3b47.js +1 -0
  87. sky/dashboard/out/_next/static/chunks/8640.5b9475a2d18c5416.js +16 -0
  88. sky/dashboard/out/_next/static/chunks/8969-452f9d5cbdd2dc73.js +1 -0
  89. sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +6 -0
  90. sky/dashboard/out/_next/static/chunks/9353-cff34f7e773b2e2b.js +1 -0
  91. sky/dashboard/out/_next/static/chunks/9360.a536cf6b1fa42355.js +31 -0
  92. sky/dashboard/out/_next/static/chunks/9847.3aaca6bb33455140.js +30 -0
  93. sky/dashboard/out/_next/static/chunks/pages/{_app-ce361c6959bc2001.js → _app-bde01e4a2beec258.js} +1 -1
  94. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-792db96d918c98c9.js +16 -0
  95. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-abfcac9c137aa543.js +1 -0
  96. sky/dashboard/out/_next/static/chunks/pages/clusters-ee39056f9851a3ff.js +1 -0
  97. sky/dashboard/out/_next/static/chunks/pages/infra/{[context]-6563820e094f68ca.js → [context]-c0b5935149902e6f.js} +1 -1
  98. sky/dashboard/out/_next/static/chunks/pages/{infra-aabba60d57826e0f.js → infra-aed0ea19df7cf961.js} +1 -1
  99. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-d66997e2bfc837cf.js +16 -0
  100. sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-9faf940b253e3e06.js +21 -0
  101. sky/dashboard/out/_next/static/chunks/pages/jobs-2072b48b617989c9.js +1 -0
  102. sky/dashboard/out/_next/static/chunks/pages/users-f42674164aa73423.js +1 -0
  103. sky/dashboard/out/_next/static/chunks/pages/volumes-b84b948ff357c43e.js +1 -0
  104. sky/dashboard/out/_next/static/chunks/pages/workspaces/{[name]-af76bb06dbb3954f.js → [name]-84a40f8c7c627fe4.js} +1 -1
  105. sky/dashboard/out/_next/static/chunks/pages/{workspaces-7598c33a746cdc91.js → workspaces-531b2f8c4bf89f82.js} +1 -1
  106. sky/dashboard/out/_next/static/chunks/webpack-64e05f17bf2cf8ce.js +1 -0
  107. sky/dashboard/out/_next/static/css/0748ce22df867032.css +3 -0
  108. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  109. sky/dashboard/out/clusters/[cluster].html +1 -1
  110. sky/dashboard/out/clusters.html +1 -1
  111. sky/dashboard/out/config.html +1 -1
  112. sky/dashboard/out/index.html +1 -1
  113. sky/dashboard/out/infra/[context].html +1 -1
  114. sky/dashboard/out/infra.html +1 -1
  115. sky/dashboard/out/jobs/[job].html +1 -1
  116. sky/dashboard/out/jobs/pools/[pool].html +1 -1
  117. sky/dashboard/out/jobs.html +1 -1
  118. sky/dashboard/out/users.html +1 -1
  119. sky/dashboard/out/volumes.html +1 -1
  120. sky/dashboard/out/workspace/new.html +1 -1
  121. sky/dashboard/out/workspaces/[name].html +1 -1
  122. sky/dashboard/out/workspaces.html +1 -1
  123. sky/data/data_utils.py +92 -1
  124. sky/data/mounting_utils.py +162 -29
  125. sky/data/storage.py +200 -19
  126. sky/data/storage_utils.py +10 -45
  127. sky/exceptions.py +18 -7
  128. sky/execution.py +74 -31
  129. sky/global_user_state.py +605 -191
  130. sky/jobs/__init__.py +2 -0
  131. sky/jobs/client/sdk.py +101 -4
  132. sky/jobs/client/sdk_async.py +31 -5
  133. sky/jobs/constants.py +15 -8
  134. sky/jobs/controller.py +726 -284
  135. sky/jobs/file_content_utils.py +128 -0
  136. sky/jobs/log_gc.py +193 -0
  137. sky/jobs/recovery_strategy.py +250 -100
  138. sky/jobs/scheduler.py +271 -173
  139. sky/jobs/server/core.py +367 -114
  140. sky/jobs/server/server.py +81 -35
  141. sky/jobs/server/utils.py +89 -35
  142. sky/jobs/state.py +1498 -620
  143. sky/jobs/utils.py +771 -306
  144. sky/logs/agent.py +40 -5
  145. sky/logs/aws.py +9 -19
  146. sky/metrics/utils.py +282 -39
  147. sky/optimizer.py +1 -1
  148. sky/provision/__init__.py +37 -1
  149. sky/provision/aws/config.py +34 -13
  150. sky/provision/aws/instance.py +5 -2
  151. sky/provision/azure/instance.py +5 -3
  152. sky/provision/common.py +2 -0
  153. sky/provision/cudo/instance.py +4 -3
  154. sky/provision/do/instance.py +4 -3
  155. sky/provision/docker_utils.py +97 -26
  156. sky/provision/fluidstack/instance.py +6 -5
  157. sky/provision/gcp/config.py +6 -1
  158. sky/provision/gcp/instance.py +4 -2
  159. sky/provision/hyperbolic/instance.py +4 -2
  160. sky/provision/instance_setup.py +66 -20
  161. sky/provision/kubernetes/__init__.py +2 -0
  162. sky/provision/kubernetes/config.py +7 -44
  163. sky/provision/kubernetes/constants.py +0 -1
  164. sky/provision/kubernetes/instance.py +609 -213
  165. sky/provision/kubernetes/manifests/fusermount-server-daemonset.yaml +1 -2
  166. sky/provision/kubernetes/network.py +12 -8
  167. sky/provision/kubernetes/network_utils.py +8 -25
  168. sky/provision/kubernetes/utils.py +382 -418
  169. sky/provision/kubernetes/volume.py +150 -18
  170. sky/provision/lambda_cloud/instance.py +16 -13
  171. sky/provision/nebius/instance.py +6 -2
  172. sky/provision/nebius/utils.py +103 -86
  173. sky/provision/oci/instance.py +4 -2
  174. sky/provision/paperspace/instance.py +4 -3
  175. sky/provision/primeintellect/__init__.py +10 -0
  176. sky/provision/primeintellect/config.py +11 -0
  177. sky/provision/primeintellect/instance.py +454 -0
  178. sky/provision/primeintellect/utils.py +398 -0
  179. sky/provision/provisioner.py +30 -9
  180. sky/provision/runpod/__init__.py +2 -0
  181. sky/provision/runpod/instance.py +4 -3
  182. sky/provision/runpod/volume.py +69 -13
  183. sky/provision/scp/instance.py +307 -130
  184. sky/provision/seeweb/__init__.py +11 -0
  185. sky/provision/seeweb/config.py +13 -0
  186. sky/provision/seeweb/instance.py +812 -0
  187. sky/provision/shadeform/__init__.py +11 -0
  188. sky/provision/shadeform/config.py +12 -0
  189. sky/provision/shadeform/instance.py +351 -0
  190. sky/provision/shadeform/shadeform_utils.py +83 -0
  191. sky/provision/vast/instance.py +5 -3
  192. sky/provision/volume.py +164 -0
  193. sky/provision/vsphere/common/ssl_helper.py +1 -1
  194. sky/provision/vsphere/common/vapiconnect.py +2 -1
  195. sky/provision/vsphere/common/vim_utils.py +3 -2
  196. sky/provision/vsphere/instance.py +8 -6
  197. sky/provision/vsphere/vsphere_utils.py +8 -1
  198. sky/resources.py +11 -3
  199. sky/schemas/api/responses.py +107 -6
  200. sky/schemas/db/global_user_state/008_skylet_ssh_tunnel_metadata.py +34 -0
  201. sky/schemas/db/global_user_state/009_last_activity_and_launched_at.py +89 -0
  202. sky/schemas/db/global_user_state/010_save_ssh_key.py +66 -0
  203. sky/schemas/db/global_user_state/011_is_ephemeral.py +34 -0
  204. sky/schemas/db/kv_cache/001_initial_schema.py +29 -0
  205. sky/schemas/db/serve_state/002_yaml_content.py +34 -0
  206. sky/schemas/db/skypilot_config/001_initial_schema.py +30 -0
  207. sky/schemas/db/spot_jobs/002_cluster_pool.py +3 -3
  208. sky/schemas/db/spot_jobs/004_job_file_contents.py +42 -0
  209. sky/schemas/db/spot_jobs/005_logs_gc.py +38 -0
  210. sky/schemas/db/spot_jobs/006_controller_pid_started_at.py +34 -0
  211. sky/schemas/db/spot_jobs/007_config_file_content.py +34 -0
  212. sky/schemas/generated/jobsv1_pb2.py +86 -0
  213. sky/schemas/generated/jobsv1_pb2.pyi +254 -0
  214. sky/schemas/generated/jobsv1_pb2_grpc.py +542 -0
  215. sky/schemas/generated/managed_jobsv1_pb2.py +76 -0
  216. sky/schemas/generated/managed_jobsv1_pb2.pyi +278 -0
  217. sky/schemas/generated/managed_jobsv1_pb2_grpc.py +278 -0
  218. sky/schemas/generated/servev1_pb2.py +58 -0
  219. sky/schemas/generated/servev1_pb2.pyi +115 -0
  220. sky/schemas/generated/servev1_pb2_grpc.py +322 -0
  221. sky/serve/autoscalers.py +2 -0
  222. sky/serve/client/impl.py +55 -21
  223. sky/serve/constants.py +4 -3
  224. sky/serve/controller.py +17 -11
  225. sky/serve/load_balancing_policies.py +1 -1
  226. sky/serve/replica_managers.py +219 -142
  227. sky/serve/serve_rpc_utils.py +179 -0
  228. sky/serve/serve_state.py +63 -54
  229. sky/serve/serve_utils.py +145 -109
  230. sky/serve/server/core.py +46 -25
  231. sky/serve/server/impl.py +311 -162
  232. sky/serve/server/server.py +21 -19
  233. sky/serve/service.py +84 -68
  234. sky/serve/service_spec.py +45 -7
  235. sky/server/auth/loopback.py +38 -0
  236. sky/server/auth/oauth2_proxy.py +12 -7
  237. sky/server/common.py +47 -24
  238. sky/server/config.py +62 -28
  239. sky/server/constants.py +9 -1
  240. sky/server/daemons.py +109 -38
  241. sky/server/metrics.py +76 -96
  242. sky/server/middleware_utils.py +166 -0
  243. sky/server/requests/executor.py +381 -145
  244. sky/server/requests/payloads.py +71 -18
  245. sky/server/requests/preconditions.py +15 -13
  246. sky/server/requests/request_names.py +121 -0
  247. sky/server/requests/requests.py +507 -157
  248. sky/server/requests/serializers/decoders.py +48 -17
  249. sky/server/requests/serializers/encoders.py +85 -20
  250. sky/server/requests/threads.py +117 -0
  251. sky/server/rest.py +116 -24
  252. sky/server/server.py +420 -172
  253. sky/server/stream_utils.py +219 -45
  254. sky/server/uvicorn.py +30 -19
  255. sky/setup_files/MANIFEST.in +6 -1
  256. sky/setup_files/alembic.ini +8 -0
  257. sky/setup_files/dependencies.py +62 -19
  258. sky/setup_files/setup.py +44 -44
  259. sky/sky_logging.py +13 -5
  260. sky/skylet/attempt_skylet.py +106 -24
  261. sky/skylet/configs.py +3 -1
  262. sky/skylet/constants.py +111 -26
  263. sky/skylet/events.py +64 -10
  264. sky/skylet/job_lib.py +141 -104
  265. sky/skylet/log_lib.py +233 -5
  266. sky/skylet/log_lib.pyi +40 -2
  267. sky/skylet/providers/ibm/node_provider.py +12 -8
  268. sky/skylet/providers/ibm/vpc_provider.py +13 -12
  269. sky/skylet/runtime_utils.py +21 -0
  270. sky/skylet/services.py +524 -0
  271. sky/skylet/skylet.py +22 -1
  272. sky/skylet/subprocess_daemon.py +104 -29
  273. sky/skypilot_config.py +99 -79
  274. sky/ssh_node_pools/server.py +9 -8
  275. sky/task.py +221 -104
  276. sky/templates/aws-ray.yml.j2 +1 -0
  277. sky/templates/azure-ray.yml.j2 +1 -0
  278. sky/templates/cudo-ray.yml.j2 +1 -0
  279. sky/templates/do-ray.yml.j2 +1 -0
  280. sky/templates/fluidstack-ray.yml.j2 +1 -0
  281. sky/templates/gcp-ray.yml.j2 +1 -0
  282. sky/templates/hyperbolic-ray.yml.j2 +1 -0
  283. sky/templates/ibm-ray.yml.j2 +2 -1
  284. sky/templates/jobs-controller.yaml.j2 +3 -0
  285. sky/templates/kubernetes-ray.yml.j2 +196 -55
  286. sky/templates/lambda-ray.yml.j2 +1 -0
  287. sky/templates/nebius-ray.yml.j2 +3 -0
  288. sky/templates/oci-ray.yml.j2 +1 -0
  289. sky/templates/paperspace-ray.yml.j2 +1 -0
  290. sky/templates/primeintellect-ray.yml.j2 +72 -0
  291. sky/templates/runpod-ray.yml.j2 +1 -0
  292. sky/templates/scp-ray.yml.j2 +1 -0
  293. sky/templates/seeweb-ray.yml.j2 +171 -0
  294. sky/templates/shadeform-ray.yml.j2 +73 -0
  295. sky/templates/vast-ray.yml.j2 +1 -0
  296. sky/templates/vsphere-ray.yml.j2 +1 -0
  297. sky/templates/websocket_proxy.py +188 -43
  298. sky/usage/usage_lib.py +16 -4
  299. sky/users/permission.py +60 -43
  300. sky/utils/accelerator_registry.py +6 -3
  301. sky/utils/admin_policy_utils.py +18 -5
  302. sky/utils/annotations.py +22 -0
  303. sky/utils/asyncio_utils.py +78 -0
  304. sky/utils/atomic.py +1 -1
  305. sky/utils/auth_utils.py +153 -0
  306. sky/utils/cli_utils/status_utils.py +12 -7
  307. sky/utils/cluster_utils.py +28 -6
  308. sky/utils/command_runner.py +88 -27
  309. sky/utils/command_runner.pyi +36 -3
  310. sky/utils/common.py +3 -1
  311. sky/utils/common_utils.py +37 -4
  312. sky/utils/config_utils.py +1 -14
  313. sky/utils/context.py +127 -40
  314. sky/utils/context_utils.py +73 -18
  315. sky/utils/controller_utils.py +229 -70
  316. sky/utils/db/db_utils.py +95 -18
  317. sky/utils/db/kv_cache.py +149 -0
  318. sky/utils/db/migration_utils.py +24 -7
  319. sky/utils/env_options.py +4 -0
  320. sky/utils/git.py +559 -1
  321. sky/utils/kubernetes/create_cluster.sh +15 -30
  322. sky/utils/kubernetes/delete_cluster.sh +10 -7
  323. sky/utils/kubernetes/{deploy_remote_cluster.py → deploy_ssh_node_pools.py} +258 -380
  324. sky/utils/kubernetes/generate_kind_config.py +6 -66
  325. sky/utils/kubernetes/gpu_labeler.py +13 -3
  326. sky/utils/kubernetes/k8s_gpu_labeler_job.yaml +2 -1
  327. sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml +16 -16
  328. sky/utils/kubernetes/kubernetes_deploy_utils.py +213 -194
  329. sky/utils/kubernetes/rsync_helper.sh +11 -3
  330. sky/utils/kubernetes_enums.py +7 -15
  331. sky/utils/lock_events.py +4 -4
  332. sky/utils/locks.py +128 -31
  333. sky/utils/log_utils.py +0 -319
  334. sky/utils/resource_checker.py +13 -10
  335. sky/utils/resources_utils.py +53 -29
  336. sky/utils/rich_utils.py +8 -4
  337. sky/utils/schemas.py +107 -52
  338. sky/utils/subprocess_utils.py +17 -4
  339. sky/utils/thread_utils.py +91 -0
  340. sky/utils/timeline.py +2 -1
  341. sky/utils/ux_utils.py +35 -1
  342. sky/utils/volume.py +88 -4
  343. sky/utils/yaml_utils.py +9 -0
  344. sky/volumes/client/sdk.py +48 -10
  345. sky/volumes/server/core.py +59 -22
  346. sky/volumes/server/server.py +46 -17
  347. sky/volumes/volume.py +54 -42
  348. sky/workspaces/core.py +57 -21
  349. sky/workspaces/server.py +13 -12
  350. sky_templates/README.md +3 -0
  351. sky_templates/__init__.py +3 -0
  352. sky_templates/ray/__init__.py +0 -0
  353. sky_templates/ray/start_cluster +183 -0
  354. sky_templates/ray/stop_cluster +75 -0
  355. {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/METADATA +331 -65
  356. skypilot_nightly-1.0.0.dev20251203.dist-info/RECORD +611 -0
  357. skypilot_nightly-1.0.0.dev20251203.dist-info/top_level.txt +2 -0
  358. sky/client/cli/git.py +0 -549
  359. sky/dashboard/out/_next/static/chunks/1121-408ed10b2f9fce17.js +0 -1
  360. sky/dashboard/out/_next/static/chunks/1141-943efc7aff0f0c06.js +0 -1
  361. sky/dashboard/out/_next/static/chunks/1836-37fede578e2da5f8.js +0 -40
  362. sky/dashboard/out/_next/static/chunks/3015-86cabed5d4669ad0.js +0 -1
  363. sky/dashboard/out/_next/static/chunks/3294.c80326aec9bfed40.js +0 -6
  364. sky/dashboard/out/_next/static/chunks/3785.4872a2f3aa489880.js +0 -1
  365. sky/dashboard/out/_next/static/chunks/4045.b30465273dc5e468.js +0 -21
  366. sky/dashboard/out/_next/static/chunks/4676-9da7fdbde90b5549.js +0 -10
  367. sky/dashboard/out/_next/static/chunks/4725.10f7a9a5d3ea8208.js +0 -1
  368. sky/dashboard/out/_next/static/chunks/5339.3fda4a4010ff4e06.js +0 -51
  369. sky/dashboard/out/_next/static/chunks/6135-4b4d5e824b7f9d3c.js +0 -1
  370. sky/dashboard/out/_next/static/chunks/649.b9d7f7d10c1b8c53.js +0 -45
  371. sky/dashboard/out/_next/static/chunks/6856-dca7962af4814e1b.js +0 -1
  372. sky/dashboard/out/_next/static/chunks/6990-08b2a1cae076a943.js +0 -1
  373. sky/dashboard/out/_next/static/chunks/7325.b4bc99ce0892dcd5.js +0 -6
  374. sky/dashboard/out/_next/static/chunks/754-d0da8ab45f9509e9.js +0 -18
  375. sky/dashboard/out/_next/static/chunks/7669.1f5d9a402bf5cc42.js +0 -36
  376. sky/dashboard/out/_next/static/chunks/8969-0be3036bf86f8256.js +0 -1
  377. sky/dashboard/out/_next/static/chunks/9025.c12318fb6a1a9093.js +0 -6
  378. sky/dashboard/out/_next/static/chunks/9037-fa1737818d0a0969.js +0 -6
  379. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-1cbba24bd1bd35f8.js +0 -16
  380. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-0b4b35dc1dfe046c.js +0 -16
  381. sky/dashboard/out/_next/static/chunks/pages/clusters-469814d711d63b1b.js +0 -1
  382. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-dd64309c3fe67ed2.js +0 -11
  383. sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-07349868f7905d37.js +0 -16
  384. sky/dashboard/out/_next/static/chunks/pages/jobs-1f70d9faa564804f.js +0 -1
  385. sky/dashboard/out/_next/static/chunks/pages/users-018bf31cda52e11b.js +0 -1
  386. sky/dashboard/out/_next/static/chunks/pages/volumes-739726d6b823f532.js +0 -1
  387. sky/dashboard/out/_next/static/chunks/webpack-4fe903277b57b523.js +0 -1
  388. sky/dashboard/out/_next/static/css/4614e06482d7309e.css +0 -3
  389. sky/dashboard/out/_next/static/mS-4qZPSkRuA1u-g2wQhg/_buildManifest.js +0 -1
  390. sky/templates/kubernetes-ssh-jump.yml.j2 +0 -94
  391. sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +0 -191
  392. skypilot_nightly-1.0.0.dev20250905.dist-info/RECORD +0 -547
  393. skypilot_nightly-1.0.0.dev20250905.dist-info/top_level.txt +0 -1
  394. /sky/dashboard/out/_next/static/{mS-4qZPSkRuA1u-g2wQhg → 96_E2yl3QAiIJGOYCkSpB}/_ssgManifest.js +0 -0
  395. {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/WHEEL +0 -0
  396. {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/entry_points.txt +0 -0
  397. {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,11 @@
1
+ """Shadeform provisioner."""
2
+
3
+ from sky.provision.shadeform.config import bootstrap_instances
4
+ from sky.provision.shadeform.instance import cleanup_ports
5
+ from sky.provision.shadeform.instance import get_cluster_info
6
+ from sky.provision.shadeform.instance import open_ports
7
+ from sky.provision.shadeform.instance import query_instances
8
+ from sky.provision.shadeform.instance import run_instances
9
+ from sky.provision.shadeform.instance import stop_instances
10
+ from sky.provision.shadeform.instance import terminate_instances
11
+ from sky.provision.shadeform.instance import wait_instances
@@ -0,0 +1,12 @@
1
+ """Shadeform configuration bootstrapping."""
2
+
3
+ from sky.provision import common
4
+
5
+
6
+ def bootstrap_instances(
7
+ region: str, cluster_name: str,
8
+ config: common.ProvisionConfig) -> common.ProvisionConfig:
9
+ """Bootstraps instances for the given cluster."""
10
+ del region, cluster_name # unused
11
+
12
+ return config
@@ -0,0 +1,351 @@
1
+ """Shadeform instance provisioning."""
2
+ import time
3
+ from typing import Any, Dict, List, Optional, Tuple
4
+
5
+ import requests
6
+
7
+ from sky import sky_logging
8
+ from sky.provision import common
9
+ from sky.provision.shadeform import shadeform_utils
10
+ from sky.utils import status_lib
11
+
12
+ POLL_INTERVAL = 10
13
+ INSTANCE_READY_TIMEOUT = 3600
14
+
15
+ logger = sky_logging.init_logger(__name__)
16
+
17
+ # Status mapping from Shadeform to SkyPilot
18
+ SHADEFORM_STATUS_MAP = {
19
+ 'creating': status_lib.ClusterStatus.INIT,
20
+ 'pending_provider': status_lib.ClusterStatus.INIT,
21
+ 'pending': status_lib.ClusterStatus.INIT,
22
+ 'active': status_lib.ClusterStatus.UP,
23
+ 'deleted': status_lib.ClusterStatus.STOPPED,
24
+ }
25
+
26
+
27
+ def _get_cluster_instances(cluster_name_on_cloud: str) -> Dict[str, Any]:
28
+ """Get all instances belonging to a cluster."""
29
+ try:
30
+ response = shadeform_utils.get_instances()
31
+ instances = response.get('instances', [])
32
+
33
+ cluster_instances = {}
34
+ possible_names = [
35
+ f'{cluster_name_on_cloud}-head', f'{cluster_name_on_cloud}-worker'
36
+ ]
37
+
38
+ for instance in instances:
39
+ if instance.get('name') in possible_names:
40
+ cluster_instances[instance['id']] = instance
41
+
42
+ return cluster_instances
43
+ except (ValueError, KeyError, requests.exceptions.RequestException) as e:
44
+ logger.warning(f'Failed to get instances: {e}')
45
+ return {}
46
+
47
+
48
+ def _get_head_instance_id(instances: Dict[str, Any]) -> Optional[str]:
49
+ """Get the head instance ID from a list of instances."""
50
+ for instance_id, instance in instances.items():
51
+ if instance.get('name', '').endswith('-head'):
52
+ return instance_id
53
+ return None
54
+
55
+
56
+ def _wait_for_instances_ready(cluster_name_on_cloud: str,
57
+ expected_count: int,
58
+ timeout: int = INSTANCE_READY_TIMEOUT) -> bool:
59
+ """Wait for instances to be ready (active state with SSH access)."""
60
+ start_time = time.time()
61
+
62
+ while time.time() - start_time < timeout:
63
+ instances = _get_cluster_instances(cluster_name_on_cloud)
64
+ ready_count = 0
65
+
66
+ for instance in instances.values():
67
+ if (instance.get('status') == 'active' and
68
+ instance.get('ip') is not None and
69
+ instance.get('ssh_port') is not None):
70
+ ready_count += 1
71
+
72
+ logger.info(f'Waiting for instances to be ready: '
73
+ f'({ready_count}/{expected_count})')
74
+
75
+ if ready_count >= expected_count:
76
+ return True
77
+
78
+ time.sleep(POLL_INTERVAL)
79
+
80
+ return False
81
+
82
+
83
+ def run_instances(region: str, cluster_name: str, cluster_name_on_cloud: str,
84
+ config: common.ProvisionConfig) -> common.ProvisionRecord:
85
+ """Run instances for the given cluster."""
86
+ del cluster_name # unused - we use cluster_name_on_cloud
87
+ logger.info(f'Running instances for cluster {cluster_name_on_cloud} '
88
+ f'in region {region}')
89
+ logger.debug(f'DEBUG: region type={type(region)}, value={region!r}')
90
+ logger.debug(f'DEBUG: config node_config={config.node_config}')
91
+
92
+ # Check existing instances
93
+ existing_instances = _get_cluster_instances(cluster_name_on_cloud)
94
+ head_instance_id = _get_head_instance_id(existing_instances)
95
+
96
+ # Filter active instances
97
+ active_instances = {
98
+ iid: inst
99
+ for iid, inst in existing_instances.items()
100
+ if inst.get('status') == 'active'
101
+ }
102
+
103
+ current_count = len(active_instances)
104
+ target_count = config.count
105
+
106
+ logger.info(f'Current instances: {current_count}, target: {target_count}')
107
+
108
+ if current_count >= target_count:
109
+ if head_instance_id is None:
110
+ raise RuntimeError(
111
+ f'Cluster {cluster_name_on_cloud} has no head node')
112
+ logger.info(f'Cluster already has {current_count} instances, '
113
+ f'no need to start more')
114
+ return common.ProvisionRecord(
115
+ provider_name='shadeform',
116
+ cluster_name=cluster_name_on_cloud,
117
+ region=region,
118
+ zone=None, # Shadeform doesn't use separate zones
119
+ head_instance_id=head_instance_id,
120
+ resumed_instance_ids=[],
121
+ created_instance_ids=[])
122
+
123
+ # Create new instances
124
+ to_create = target_count - current_count
125
+ created_instance_ids = []
126
+
127
+ for _ in range(to_create):
128
+ node_type = 'head' if head_instance_id is None else 'worker'
129
+ instance_name = f'{cluster_name_on_cloud}-{node_type}'
130
+
131
+ # Extract configuration from node_config
132
+
133
+ # The node_config contains instance specs including InstanceType
134
+ # which follows the format: {cloud_provider}_{instance_type}
135
+ # (e.g., "massedcompute_A6000_basex2")
136
+ node_config = config.node_config
137
+ assert 'InstanceType' in node_config, \
138
+ 'InstanceType must be present in node_config'
139
+
140
+ # Parse the instance type to extract cloud provider and instance specs
141
+ # Expected format: "{cloud}_{instance_type}" where cloud is provider
142
+ # (massedcompute, scaleway, lambda, etc.)
143
+ instance_type_full = node_config['InstanceType']
144
+ assert (isinstance(instance_type_full, str) and
145
+ '_' in instance_type_full), \
146
+ f'InstanceType must be in format cloud_instance_type, got: ' \
147
+ f'{instance_type_full}'
148
+
149
+ instance_type_split = instance_type_full.split('_')
150
+ assert len(instance_type_split) >= 2, \
151
+ f'InstanceType must contain at least one underscore, got: ' \
152
+ f'{instance_type_full}'
153
+
154
+ # Extract cloud provider (first part) and instance type (remaining)
155
+ # Example: "massedcompute_A6000-basex2" -> cloud="massedcompute",
156
+ # instance_type="A6000-basex2"
157
+ cloud = instance_type_split[0]
158
+ instance_type = '_'.join(instance_type_split[1:])
159
+
160
+ # Shadeform uses underscores instead of hyphens
161
+ instance_type = instance_type.replace('-', '_')
162
+
163
+ if instance_type.endswith('B'):
164
+ instance_type = instance_type[:-1]
165
+
166
+ # Replace "GBx" with "Gx" (case sensitive)
167
+ if 'GBx' in instance_type:
168
+ instance_type = instance_type.replace('GBx', 'Gx')
169
+
170
+ assert cloud, 'Cloud provider cannot be empty'
171
+ assert instance_type, 'Instance type cannot be empty'
172
+
173
+ # Get SSH key ID for authentication - this is optional and may be None
174
+ ssh_key_id = config.authentication_config.get('ssh_key_id')
175
+
176
+ create_config = {
177
+ 'cloud': cloud,
178
+ 'region': region,
179
+ 'shade_instance_type': instance_type,
180
+ 'name': instance_name,
181
+ 'ssh_key_id': ssh_key_id
182
+ }
183
+
184
+ try:
185
+ logger.info(f'Creating {node_type} instance: {instance_name}')
186
+ response = shadeform_utils.create_instance(create_config)
187
+ instance_id = response['id']
188
+ created_instance_ids.append(instance_id)
189
+
190
+ if head_instance_id is None:
191
+ head_instance_id = instance_id
192
+
193
+ logger.info(f'Created instance {instance_id} ({node_type})')
194
+
195
+ except Exception as e:
196
+ logger.error(f'Failed to create instance: {e}')
197
+ # Clean up any created instances
198
+ for iid in created_instance_ids:
199
+ try:
200
+ shadeform_utils.delete_instance(iid)
201
+ except requests.exceptions.RequestException as cleanup_e:
202
+ logger.warning(
203
+ f'Failed to cleanup instance {iid}: {cleanup_e}')
204
+ raise
205
+
206
+ # Wait for all instances to be ready
207
+ logger.info('Waiting for instances to become ready...')
208
+ if not _wait_for_instances_ready(cluster_name_on_cloud, target_count):
209
+ raise RuntimeError('Timed out waiting for instances to be ready')
210
+
211
+ assert head_instance_id is not None, 'head_instance_id should not be None'
212
+
213
+ return common.ProvisionRecord(provider_name='shadeform',
214
+ cluster_name=cluster_name_on_cloud,
215
+ region=region,
216
+ zone=region,
217
+ head_instance_id=head_instance_id,
218
+ resumed_instance_ids=[],
219
+ created_instance_ids=created_instance_ids)
220
+
221
+
222
+ def wait_instances(region: str, cluster_name_on_cloud: str,
223
+ state: Optional[status_lib.ClusterStatus]) -> None:
224
+ """Wait for instances to reach the specified state."""
225
+ del region, cluster_name_on_cloud, state # unused
226
+ # For Shadeform, instances are ready when they reach 'active' status
227
+ # This is already handled in run_instances
228
+
229
+
230
+ def stop_instances(cluster_name_on_cloud: str,
231
+ provider_config: Optional[Dict[str, Any]] = None,
232
+ worker_only: bool = False) -> None:
233
+ """Stop instances (not supported by Shadeform)."""
234
+ del cluster_name_on_cloud, provider_config, worker_only # unused
235
+ raise NotImplementedError(
236
+ 'Stopping instances is not supported by Shadeform')
237
+
238
+
239
+ def terminate_instances(cluster_name_on_cloud: str,
240
+ provider_config: Optional[Dict[str, Any]] = None,
241
+ worker_only: bool = False) -> None:
242
+ """Terminate instances."""
243
+ del provider_config # unused
244
+ logger.info(f'Terminating instances for cluster {cluster_name_on_cloud}')
245
+
246
+ instances = _get_cluster_instances(cluster_name_on_cloud)
247
+
248
+ if not instances:
249
+ logger.info(f'No instances found for cluster {cluster_name_on_cloud}')
250
+ return
251
+
252
+ instances_to_delete = instances
253
+ if worker_only:
254
+ # Only delete worker nodes, not head
255
+ instances_to_delete = {
256
+ iid: inst
257
+ for iid, inst in instances.items()
258
+ if not inst.get('name', '').endswith('-head')
259
+ }
260
+
261
+ for instance_id, instance in instances_to_delete.items():
262
+ try:
263
+ logger.info(
264
+ f'Terminating instance {instance_id} ({instance.get("name")})')
265
+ shadeform_utils.delete_instance(instance_id)
266
+ except requests.exceptions.RequestException as e:
267
+ logger.warning(f'Failed to terminate instance {instance_id}: {e}')
268
+
269
+
270
+ def get_cluster_info(
271
+ region: str,
272
+ cluster_name_on_cloud: str,
273
+ provider_config: Optional[Dict[str, Any]] = None) -> common.ClusterInfo:
274
+ """Get cluster information."""
275
+ del region, provider_config # unused
276
+ instances = _get_cluster_instances(cluster_name_on_cloud)
277
+
278
+ if not instances:
279
+ return common.ClusterInfo(instances={},
280
+ head_instance_id=None,
281
+ provider_name='shadeform')
282
+
283
+ head_instance_id = _get_head_instance_id(instances)
284
+
285
+ # Convert instance format for ClusterInfo
286
+ cluster_instances = {}
287
+ for instance_id, instance in instances.items():
288
+ instance_info = common.InstanceInfo(
289
+ instance_id=instance_id,
290
+ internal_ip=instance.get('ip', ''),
291
+ external_ip=instance.get('ip', ''),
292
+ ssh_port=instance.get('ssh_port', 22),
293
+ tags={},
294
+ )
295
+ # ClusterInfo expects Dict[InstanceId, List[InstanceInfo]]
296
+ cluster_instances[instance_id] = [instance_info]
297
+
298
+ ssh_user = 'shadeform' # default
299
+ if head_instance_id is not None:
300
+ ssh_user = instances.get(head_instance_id,
301
+ {}).get('ssh_user', 'shadeform')
302
+
303
+ return common.ClusterInfo(instances=cluster_instances,
304
+ head_instance_id=head_instance_id,
305
+ provider_name='shadeform',
306
+ ssh_user=ssh_user)
307
+
308
+
309
+ def query_instances(
310
+ cluster_name: str,
311
+ cluster_name_on_cloud: str,
312
+ provider_config: Optional[Dict[str, Any]] = None,
313
+ non_terminated_only: bool = True,
314
+ ) -> Dict[str, Tuple[Optional['status_lib.ClusterStatus'], Optional[str]]]:
315
+ """Query the status of instances."""
316
+ del cluster_name, provider_config # unused
317
+ instances = _get_cluster_instances(cluster_name_on_cloud)
318
+
319
+ if not instances:
320
+ return {}
321
+
322
+ status_map: Dict[str, Tuple[Optional['status_lib.ClusterStatus'],
323
+ Optional[str]]] = {}
324
+ for instance_id, instance in instances.items():
325
+ shadeform_status = instance.get('status', 'unknown')
326
+ sky_status = SHADEFORM_STATUS_MAP.get(shadeform_status,
327
+ status_lib.ClusterStatus.INIT)
328
+
329
+ if (non_terminated_only and
330
+ sky_status == status_lib.ClusterStatus.STOPPED):
331
+ continue
332
+
333
+ status_map[instance_id] = (sky_status, None)
334
+
335
+ return status_map
336
+
337
+
338
+ def open_ports(cluster_name_on_cloud: str,
339
+ ports: List[str],
340
+ provider_config: Optional[Dict[str, Any]] = None) -> None:
341
+ """Open ports (not supported by Shadeform)."""
342
+ del cluster_name_on_cloud, ports, provider_config # unused
343
+ raise NotImplementedError()
344
+
345
+
346
+ def cleanup_ports(cluster_name_on_cloud: str,
347
+ ports: List[str],
348
+ provider_config: Optional[Dict[str, Any]] = None) -> None:
349
+ """Cleanup ports (not supported by Shadeform)."""
350
+ del cluster_name_on_cloud, ports, provider_config # unused
351
+ # Nothing to cleanup since we don't support dynamic port opening
@@ -0,0 +1,83 @@
1
+ """Shadeform API utilities."""
2
+
3
+ import os
4
+ from typing import Any, Dict
5
+
6
+ from sky.adaptors import common
7
+
8
+ # Lazy import to avoid dependency on external packages
9
+ requests = common.LazyImport('requests')
10
+
11
+ # Shadeform API configuration
12
+ SHADEFORM_API_BASE = 'https://api.shadeform.ai/v1'
13
+ SHADEFORM_API_KEY_PATH = '~/.shadeform/api_key'
14
+
15
+
16
+ def get_api_key() -> str:
17
+ """Get Shadeform API key from file."""
18
+ api_key_path = os.path.expanduser(SHADEFORM_API_KEY_PATH)
19
+ if not os.path.exists(api_key_path):
20
+ raise FileNotFoundError(
21
+ f'Shadeform API key not found at {api_key_path}. '
22
+ 'Please save your API key to this file.')
23
+
24
+ with open(api_key_path, 'r', encoding='utf-8') as f:
25
+ api_key = f.read().strip()
26
+
27
+ if not api_key:
28
+ raise ValueError(f'Shadeform API key is empty in {api_key_path}')
29
+
30
+ return api_key
31
+
32
+
33
+ def make_request(method: str, endpoint: str, **kwargs) -> Any:
34
+ """Make a request to the Shadeform API."""
35
+ url = f'{SHADEFORM_API_BASE}/{endpoint.lstrip("/")}'
36
+ headers = {
37
+ 'X-API-KEY': get_api_key(),
38
+ 'Content-Type': 'application/json',
39
+ }
40
+
41
+ response = requests.request(method, url, headers=headers, **kwargs)
42
+ response.raise_for_status()
43
+
44
+ # Some APIs (like delete) return empty responses with just 200 status
45
+ if response.text.strip():
46
+ return response.json()
47
+ else:
48
+ # Return empty dict for empty responses (e.g., delete operations)
49
+ return {}
50
+
51
+
52
+ def get_instances() -> Dict[str, Any]:
53
+ """Get all instances."""
54
+ return make_request('GET', '/instances')
55
+
56
+
57
+ def get_instance_info(instance_id: str) -> Dict[str, Any]:
58
+ """Get information about a specific instance."""
59
+ return make_request('GET', f'/instances/{instance_id}/info')
60
+
61
+
62
+ def create_instance(config: Dict[str, Any]) -> Dict[str, Any]:
63
+ """Create a new instance."""
64
+ return make_request('POST', '/instances/create', json=config)
65
+
66
+
67
+ def delete_instance(instance_id: str) -> Dict[str, Any]:
68
+ """Delete an instance.
69
+
70
+ Note: Shadeform delete API returns empty response with 200 status.
71
+ """
72
+ return make_request('POST', f'/instances/{instance_id}/delete')
73
+
74
+
75
+ def get_ssh_keys() -> Dict[str, Any]:
76
+ """Get all SSH keys."""
77
+ return make_request('GET', '/sshkeys')
78
+
79
+
80
+ def add_ssh_key(name: str, public_key: str) -> Dict[str, Any]:
81
+ """Add a new SSH key."""
82
+ config = {'name': name, 'public_key': public_key}
83
+ return make_request('POST', '/sshkeys/add', json=config)
@@ -39,14 +39,15 @@ def _filter_instances(cluster_name_on_cloud: str,
39
39
 
40
40
  def _get_head_instance_id(instances: Dict[str, Any]) -> Optional[str]:
41
41
  for inst_id, inst in instances.items():
42
- if inst['name'].endswith('-head'):
42
+ if inst.get('name') and inst['name'].endswith('-head'):
43
43
  return inst_id
44
44
  return None
45
45
 
46
46
 
47
- def run_instances(region: str, cluster_name_on_cloud: str,
47
+ def run_instances(region: str, cluster_name: str, cluster_name_on_cloud: str,
48
48
  config: common.ProvisionConfig) -> common.ProvisionRecord:
49
49
  """Runs instances for the given cluster."""
50
+ del cluster_name # unused
50
51
  pending_status = ['CREATED', 'RESTARTING']
51
52
 
52
53
  created_instance_ids = []
@@ -220,9 +221,10 @@ def query_instances(
220
221
  cluster_name_on_cloud: str,
221
222
  provider_config: Optional[Dict[str, Any]] = None,
222
223
  non_terminated_only: bool = True,
224
+ retry_if_missing: bool = False,
223
225
  ) -> Dict[str, Tuple[Optional['status_lib.ClusterStatus'], Optional[str]]]:
224
226
  """See sky/provision/__init__.py"""
225
- del cluster_name # unused
227
+ del cluster_name, retry_if_missing # unused
226
228
  assert provider_config is not None, (cluster_name_on_cloud, provider_config)
227
229
  instances = _filter_instances(cluster_name_on_cloud, None)
228
230
  # "running", "frozen", "stopped", "unknown", "loading"
@@ -0,0 +1,164 @@
1
+ """Volume functions for provisioning and deleting ephemeral volumes."""
2
+
3
+ import copy
4
+ from typing import Any, Dict, Optional
5
+
6
+ from sky import clouds
7
+ from sky import global_user_state
8
+ from sky import models
9
+ from sky import sky_logging
10
+ from sky.provision import common as provision_common
11
+ from sky.provision import constants as provision_constants
12
+ from sky.provision.kubernetes import utils as kubernetes_utils
13
+ from sky.utils import volume as volume_utils
14
+ from sky.volumes import volume as volume_lib
15
+ from sky.volumes.server import core as volume_server_core
16
+
17
+ logger = sky_logging.init_logger(__name__)
18
+
19
+
20
+ def _resolve_volume_type(cloud: clouds.Cloud,
21
+ volume_type: Optional[str]) -> str:
22
+ if not volume_type:
23
+ volume_types = None
24
+ for cloud_key, vol_types in volume_lib.CLOUD_TO_VOLUME_TYPE.items():
25
+ if cloud.is_same_cloud(cloud_key):
26
+ volume_types = vol_types
27
+ break
28
+ if volume_types is None:
29
+ raise ValueError(f'No default volume type found for cloud {cloud}')
30
+ if len(volume_types) != 1:
31
+ raise ValueError(
32
+ f'Found multiple volume types for cloud {cloud}: {volume_types}'
33
+ )
34
+ return volume_types[0].value
35
+ supported_volume_types = [
36
+ volume_type.value for volume_type in volume_utils.VolumeType
37
+ ]
38
+ volume_type = volume_type.lower()
39
+ if volume_type not in supported_volume_types:
40
+ raise ValueError(
41
+ f'Invalid volume type: {volume_type} for cloud {cloud}')
42
+ return volume_type
43
+
44
+
45
+ def _resolve_pvc_volume_config(cloud: clouds.Cloud,
46
+ config: provision_common.ProvisionConfig,
47
+ volume_config: Dict[str, Any]) -> Dict[str, Any]:
48
+ provider_config = config.provider_config
49
+ if not cloud.is_same_cloud(clouds.Kubernetes()):
50
+ raise ValueError(
51
+ f'PVC volume type is only supported on Kubernetes not on {cloud}')
52
+ supported_access_modes = [
53
+ access_mode.value for access_mode in volume_utils.VolumeAccessMode
54
+ ]
55
+ access_mode = volume_config.get('access_mode')
56
+ if access_mode is None:
57
+ access_mode = volume_utils.VolumeAccessMode.READ_WRITE_ONCE.value
58
+ volume_config['access_mode'] = access_mode
59
+ elif access_mode not in supported_access_modes:
60
+ raise ValueError(f'Invalid access mode: {access_mode} for PVC')
61
+ if (access_mode == volume_utils.VolumeAccessMode.READ_WRITE_ONCE.value and
62
+ config.count > 1):
63
+ raise ValueError(
64
+ 'Access mode ReadWriteOnce is not supported for multi-node'
65
+ ' clusters.')
66
+ namespace = kubernetes_utils.get_namespace_from_config(provider_config)
67
+ volume_config['namespace'] = namespace
68
+ return volume_config
69
+
70
+
71
+ def _create_ephemeral_volume(
72
+ cloud: clouds.Cloud, region: str, cluster_name_on_cloud: str,
73
+ config: provision_common.ProvisionConfig,
74
+ volume_mount: volume_utils.VolumeMount
75
+ ) -> Optional[volume_utils.VolumeInfo]:
76
+ provider_name = repr(cloud)
77
+ path = volume_mount.path
78
+ volume_config = volume_mount.volume_config
79
+ volume_type = _resolve_volume_type(cloud, volume_config.type)
80
+ labels = volume_config.labels
81
+ if volume_type == volume_utils.VolumeType.PVC.value:
82
+ internal_volume_config = _resolve_pvc_volume_config(
83
+ cloud, config, volume_config.config)
84
+ if labels:
85
+ for key, value in labels.items():
86
+ valid, err_msg = cloud.is_label_valid(key, value)
87
+ if not valid:
88
+ raise ValueError(f'{err_msg}')
89
+ else:
90
+ labels = {}
91
+ labels.update({
92
+ provision_constants.TAG_SKYPILOT_CLUSTER_NAME: cluster_name_on_cloud
93
+ })
94
+ else:
95
+ logger.warning(f'Skipping unsupported ephemeral volume type: '
96
+ f'{volume_type} for cloud {cloud}.')
97
+ return None
98
+ volume_name = volume_config.name
99
+ volume_server_core.volume_apply(
100
+ name=volume_name,
101
+ volume_type=volume_type,
102
+ cloud=provider_name,
103
+ region=region,
104
+ zone=None,
105
+ size=volume_config.size,
106
+ config=internal_volume_config,
107
+ labels=labels,
108
+ is_ephemeral=True,
109
+ )
110
+ volume = global_user_state.get_volume_by_name(volume_name)
111
+ if volume is None:
112
+ raise ValueError(f'Failed to get record for volume: {volume_name}')
113
+ assert 'handle' in volume, 'Volume handle is None.'
114
+ volume_config: models.VolumeConfig = volume['handle']
115
+ volume_info = volume_utils.VolumeInfo(
116
+ name=volume_name,
117
+ path=path,
118
+ volume_name_on_cloud=volume_config.name_on_cloud,
119
+ volume_id_on_cloud=volume_config.id_on_cloud,
120
+ )
121
+ return volume_info
122
+
123
+
124
+ def provision_ephemeral_volumes(
125
+ cloud: clouds.Cloud,
126
+ region: str,
127
+ cluster_name_on_cloud: str,
128
+ config: provision_common.ProvisionConfig,
129
+ ) -> None:
130
+ """Provision ephemeral volumes for a cluster."""
131
+ provider_config = config.provider_config
132
+ ephemeral_volume_mounts = provider_config.get('ephemeral_volume_specs')
133
+ if not ephemeral_volume_mounts:
134
+ return
135
+ volume_infos = []
136
+ try:
137
+ for ephemeral_volume_mount in ephemeral_volume_mounts:
138
+ mount_copy = copy.deepcopy(ephemeral_volume_mount)
139
+ volume_mount = volume_utils.VolumeMount.from_yaml_config(mount_copy)
140
+ volume_info = _create_ephemeral_volume(cloud, region,
141
+ cluster_name_on_cloud,
142
+ config, volume_mount)
143
+ if volume_info is None:
144
+ continue
145
+ volume_infos.append(volume_info)
146
+ provider_config['ephemeral_volume_infos'] = volume_infos
147
+ except Exception as e: # pylint: disable=broad-exception-caught
148
+ logger.error(f'Failed to provision ephemeral volumes: {e}')
149
+ raise e
150
+
151
+
152
+ def delete_ephemeral_volumes(provider_config: Dict[str, Any],) -> None:
153
+ """Provision ephemeral volumes for a cluster."""
154
+ ephemeral_volume_mounts = provider_config.get('ephemeral_volume_specs')
155
+ if not ephemeral_volume_mounts:
156
+ return
157
+ ephemeral_volume_names = []
158
+ for ephemeral_volume_mount in ephemeral_volume_mounts:
159
+ mount_copy = copy.deepcopy(ephemeral_volume_mount)
160
+ volume_mount = volume_utils.VolumeMount.from_yaml_config(mount_copy)
161
+ volume_name = volume_mount.volume_config.name
162
+ ephemeral_volume_names.append(volume_name)
163
+ volume_server_core.volume_delete(names=ephemeral_volume_names,
164
+ ignore_not_found=True)
@@ -30,5 +30,5 @@ def get_unverified_session():
30
30
  """
31
31
  session = requests.session()
32
32
  session.verify = False
33
- requests.packages.urllib3.disable_warnings()
33
+ requests.packages.urllib3.disable_warnings() # type: ignore[attr-defined]
34
34
  return session
@@ -89,5 +89,6 @@ def create_unverified_session(session, suppress_warning=True):
89
89
  session.verify = False
90
90
  if suppress_warning:
91
91
  # Suppress unverified https request warnings
92
- requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
92
+ requests.packages.urllib3.disable_warnings( # type: ignore
93
+ InsecureRequestWarning)
93
94
  return session