skypilot-nightly 1.0.0.dev20250502__py3-none-any.whl → 1.0.0.dev20251203__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (546) hide show
  1. sky/__init__.py +22 -6
  2. sky/adaptors/aws.py +81 -16
  3. sky/adaptors/common.py +25 -2
  4. sky/adaptors/coreweave.py +278 -0
  5. sky/adaptors/do.py +8 -2
  6. sky/adaptors/gcp.py +11 -0
  7. sky/adaptors/hyperbolic.py +8 -0
  8. sky/adaptors/ibm.py +5 -2
  9. sky/adaptors/kubernetes.py +149 -18
  10. sky/adaptors/nebius.py +173 -30
  11. sky/adaptors/primeintellect.py +1 -0
  12. sky/adaptors/runpod.py +68 -0
  13. sky/adaptors/seeweb.py +183 -0
  14. sky/adaptors/shadeform.py +89 -0
  15. sky/admin_policy.py +187 -4
  16. sky/authentication.py +179 -225
  17. sky/backends/__init__.py +4 -2
  18. sky/backends/backend.py +22 -9
  19. sky/backends/backend_utils.py +1323 -397
  20. sky/backends/cloud_vm_ray_backend.py +1749 -1029
  21. sky/backends/docker_utils.py +1 -1
  22. sky/backends/local_docker_backend.py +11 -6
  23. sky/backends/task_codegen.py +633 -0
  24. sky/backends/wheel_utils.py +55 -9
  25. sky/{clouds/service_catalog → catalog}/__init__.py +21 -19
  26. sky/{clouds/service_catalog → catalog}/aws_catalog.py +27 -8
  27. sky/{clouds/service_catalog → catalog}/azure_catalog.py +10 -7
  28. sky/{clouds/service_catalog → catalog}/common.py +90 -49
  29. sky/{clouds/service_catalog → catalog}/cudo_catalog.py +8 -5
  30. sky/{clouds/service_catalog → catalog}/data_fetchers/analyze.py +1 -1
  31. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_aws.py +116 -80
  32. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_cudo.py +38 -38
  33. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_gcp.py +70 -16
  34. sky/catalog/data_fetchers/fetch_hyperbolic.py +136 -0
  35. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_lambda_cloud.py +1 -0
  36. sky/catalog/data_fetchers/fetch_nebius.py +338 -0
  37. sky/catalog/data_fetchers/fetch_runpod.py +698 -0
  38. sky/catalog/data_fetchers/fetch_seeweb.py +329 -0
  39. sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
  40. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vast.py +1 -1
  41. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vsphere.py +1 -1
  42. sky/{clouds/service_catalog → catalog}/do_catalog.py +5 -2
  43. sky/{clouds/service_catalog → catalog}/fluidstack_catalog.py +6 -3
  44. sky/{clouds/service_catalog → catalog}/gcp_catalog.py +41 -15
  45. sky/catalog/hyperbolic_catalog.py +136 -0
  46. sky/{clouds/service_catalog → catalog}/ibm_catalog.py +9 -6
  47. sky/{clouds/service_catalog → catalog}/kubernetes_catalog.py +36 -24
  48. sky/{clouds/service_catalog → catalog}/lambda_catalog.py +9 -6
  49. sky/{clouds/service_catalog → catalog}/nebius_catalog.py +9 -7
  50. sky/{clouds/service_catalog → catalog}/oci_catalog.py +9 -6
  51. sky/{clouds/service_catalog → catalog}/paperspace_catalog.py +5 -2
  52. sky/catalog/primeintellect_catalog.py +95 -0
  53. sky/{clouds/service_catalog → catalog}/runpod_catalog.py +11 -4
  54. sky/{clouds/service_catalog → catalog}/scp_catalog.py +9 -6
  55. sky/catalog/seeweb_catalog.py +184 -0
  56. sky/catalog/shadeform_catalog.py +165 -0
  57. sky/catalog/ssh_catalog.py +167 -0
  58. sky/{clouds/service_catalog → catalog}/vast_catalog.py +6 -3
  59. sky/{clouds/service_catalog → catalog}/vsphere_catalog.py +5 -2
  60. sky/check.py +533 -185
  61. sky/cli.py +5 -5975
  62. sky/client/{cli.py → cli/command.py} +2591 -1956
  63. sky/client/cli/deprecation_utils.py +99 -0
  64. sky/client/cli/flags.py +359 -0
  65. sky/client/cli/table_utils.py +322 -0
  66. sky/client/cli/utils.py +79 -0
  67. sky/client/common.py +78 -32
  68. sky/client/oauth.py +82 -0
  69. sky/client/sdk.py +1219 -319
  70. sky/client/sdk_async.py +827 -0
  71. sky/client/service_account_auth.py +47 -0
  72. sky/cloud_stores.py +82 -3
  73. sky/clouds/__init__.py +13 -0
  74. sky/clouds/aws.py +564 -164
  75. sky/clouds/azure.py +105 -83
  76. sky/clouds/cloud.py +140 -40
  77. sky/clouds/cudo.py +68 -50
  78. sky/clouds/do.py +66 -48
  79. sky/clouds/fluidstack.py +63 -44
  80. sky/clouds/gcp.py +339 -110
  81. sky/clouds/hyperbolic.py +293 -0
  82. sky/clouds/ibm.py +70 -49
  83. sky/clouds/kubernetes.py +570 -162
  84. sky/clouds/lambda_cloud.py +74 -54
  85. sky/clouds/nebius.py +210 -81
  86. sky/clouds/oci.py +88 -66
  87. sky/clouds/paperspace.py +61 -44
  88. sky/clouds/primeintellect.py +317 -0
  89. sky/clouds/runpod.py +164 -74
  90. sky/clouds/scp.py +89 -86
  91. sky/clouds/seeweb.py +477 -0
  92. sky/clouds/shadeform.py +400 -0
  93. sky/clouds/ssh.py +263 -0
  94. sky/clouds/utils/aws_utils.py +10 -4
  95. sky/clouds/utils/gcp_utils.py +87 -11
  96. sky/clouds/utils/oci_utils.py +38 -14
  97. sky/clouds/utils/scp_utils.py +231 -167
  98. sky/clouds/vast.py +99 -77
  99. sky/clouds/vsphere.py +51 -40
  100. sky/core.py +375 -173
  101. sky/dag.py +15 -0
  102. sky/dashboard/out/404.html +1 -1
  103. sky/dashboard/out/_next/static/96_E2yl3QAiIJGOYCkSpB/_buildManifest.js +1 -0
  104. sky/dashboard/out/_next/static/chunks/1141-e6aa9ab418717c59.js +11 -0
  105. sky/dashboard/out/_next/static/chunks/1272-1ef0bf0237faccdb.js +1 -0
  106. sky/dashboard/out/_next/static/chunks/1871-7e202677c42f43fe.js +6 -0
  107. sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +1 -0
  108. sky/dashboard/out/_next/static/chunks/2350.fab69e61bac57b23.js +1 -0
  109. sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +15 -0
  110. sky/dashboard/out/_next/static/chunks/2755.edd818326d489a1d.js +26 -0
  111. sky/dashboard/out/_next/static/chunks/3294.20a8540fe697d5ee.js +1 -0
  112. sky/dashboard/out/_next/static/chunks/3785.7e245f318f9d1121.js +1 -0
  113. sky/dashboard/out/_next/static/chunks/3800-7b45f9fbb6308557.js +1 -0
  114. sky/dashboard/out/_next/static/chunks/3850-ff4a9a69d978632b.js +1 -0
  115. sky/dashboard/out/_next/static/chunks/3937.210053269f121201.js +1 -0
  116. sky/dashboard/out/_next/static/chunks/4725.172ede95d1b21022.js +1 -0
  117. sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +15 -0
  118. sky/dashboard/out/_next/static/chunks/5739-d67458fcb1386c92.js +8 -0
  119. sky/dashboard/out/_next/static/chunks/6130-2be46d70a38f1e82.js +1 -0
  120. sky/dashboard/out/_next/static/chunks/616-3d59f75e2ccf9321.js +39 -0
  121. sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +13 -0
  122. sky/dashboard/out/_next/static/chunks/6856-8f27d1c10c98def8.js +1 -0
  123. sky/dashboard/out/_next/static/chunks/6989-01359c57e018caa4.js +1 -0
  124. sky/dashboard/out/_next/static/chunks/6990-9146207c4567fdfd.js +1 -0
  125. sky/dashboard/out/_next/static/chunks/7359-c8d04e06886000b3.js +30 -0
  126. sky/dashboard/out/_next/static/chunks/7411-b15471acd2cba716.js +41 -0
  127. sky/dashboard/out/_next/static/chunks/7615-019513abc55b3b47.js +1 -0
  128. sky/dashboard/out/_next/static/chunks/8640.5b9475a2d18c5416.js +16 -0
  129. sky/dashboard/out/_next/static/chunks/8969-452f9d5cbdd2dc73.js +1 -0
  130. sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +6 -0
  131. sky/dashboard/out/_next/static/chunks/9353-cff34f7e773b2e2b.js +1 -0
  132. sky/dashboard/out/_next/static/chunks/9360.a536cf6b1fa42355.js +31 -0
  133. sky/dashboard/out/_next/static/chunks/9847.3aaca6bb33455140.js +30 -0
  134. sky/dashboard/out/_next/static/chunks/fd9d1056-86323a29a8f7e46a.js +1 -0
  135. sky/dashboard/out/_next/static/chunks/framework-cf60a09ccd051a10.js +33 -0
  136. sky/dashboard/out/_next/static/chunks/main-app-587214043926b3cc.js +1 -0
  137. sky/dashboard/out/_next/static/chunks/main-f15ccb73239a3bf1.js +1 -0
  138. sky/dashboard/out/_next/static/chunks/pages/_app-bde01e4a2beec258.js +34 -0
  139. sky/dashboard/out/_next/static/chunks/pages/_error-c66a4e8afc46f17b.js +1 -0
  140. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-792db96d918c98c9.js +16 -0
  141. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-abfcac9c137aa543.js +1 -0
  142. sky/dashboard/out/_next/static/chunks/pages/clusters-ee39056f9851a3ff.js +1 -0
  143. sky/dashboard/out/_next/static/chunks/pages/config-dfb9bf07b13045f4.js +1 -0
  144. sky/dashboard/out/_next/static/chunks/pages/index-444f1804401f04ea.js +1 -0
  145. sky/dashboard/out/_next/static/chunks/pages/infra/[context]-c0b5935149902e6f.js +1 -0
  146. sky/dashboard/out/_next/static/chunks/pages/infra-aed0ea19df7cf961.js +1 -0
  147. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-d66997e2bfc837cf.js +16 -0
  148. sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-9faf940b253e3e06.js +21 -0
  149. sky/dashboard/out/_next/static/chunks/pages/jobs-2072b48b617989c9.js +1 -0
  150. sky/dashboard/out/_next/static/chunks/pages/users-f42674164aa73423.js +1 -0
  151. sky/dashboard/out/_next/static/chunks/pages/volumes-b84b948ff357c43e.js +1 -0
  152. sky/dashboard/out/_next/static/chunks/pages/workspace/new-3f88a1c7e86a3f86.js +1 -0
  153. sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-84a40f8c7c627fe4.js +1 -0
  154. sky/dashboard/out/_next/static/chunks/pages/workspaces-531b2f8c4bf89f82.js +1 -0
  155. sky/dashboard/out/_next/static/chunks/webpack-64e05f17bf2cf8ce.js +1 -0
  156. sky/dashboard/out/_next/static/css/0748ce22df867032.css +3 -0
  157. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  158. sky/dashboard/out/clusters/[cluster].html +1 -1
  159. sky/dashboard/out/clusters.html +1 -1
  160. sky/dashboard/out/config.html +1 -0
  161. sky/dashboard/out/index.html +1 -1
  162. sky/dashboard/out/infra/[context].html +1 -0
  163. sky/dashboard/out/infra.html +1 -0
  164. sky/dashboard/out/jobs/[job].html +1 -1
  165. sky/dashboard/out/jobs/pools/[pool].html +1 -0
  166. sky/dashboard/out/jobs.html +1 -1
  167. sky/dashboard/out/users.html +1 -0
  168. sky/dashboard/out/volumes.html +1 -0
  169. sky/dashboard/out/workspace/new.html +1 -0
  170. sky/dashboard/out/workspaces/[name].html +1 -0
  171. sky/dashboard/out/workspaces.html +1 -0
  172. sky/data/data_utils.py +137 -1
  173. sky/data/mounting_utils.py +269 -84
  174. sky/data/storage.py +1460 -1807
  175. sky/data/storage_utils.py +43 -57
  176. sky/exceptions.py +126 -2
  177. sky/execution.py +216 -63
  178. sky/global_user_state.py +2390 -586
  179. sky/jobs/__init__.py +7 -0
  180. sky/jobs/client/sdk.py +300 -58
  181. sky/jobs/client/sdk_async.py +161 -0
  182. sky/jobs/constants.py +15 -8
  183. sky/jobs/controller.py +848 -275
  184. sky/jobs/file_content_utils.py +128 -0
  185. sky/jobs/log_gc.py +193 -0
  186. sky/jobs/recovery_strategy.py +402 -152
  187. sky/jobs/scheduler.py +314 -189
  188. sky/jobs/server/core.py +836 -255
  189. sky/jobs/server/server.py +156 -115
  190. sky/jobs/server/utils.py +136 -0
  191. sky/jobs/state.py +2109 -706
  192. sky/jobs/utils.py +1306 -215
  193. sky/logs/__init__.py +21 -0
  194. sky/logs/agent.py +108 -0
  195. sky/logs/aws.py +243 -0
  196. sky/logs/gcp.py +91 -0
  197. sky/metrics/__init__.py +0 -0
  198. sky/metrics/utils.py +453 -0
  199. sky/models.py +78 -1
  200. sky/optimizer.py +164 -70
  201. sky/provision/__init__.py +90 -4
  202. sky/provision/aws/config.py +147 -26
  203. sky/provision/aws/instance.py +136 -50
  204. sky/provision/azure/instance.py +11 -6
  205. sky/provision/common.py +13 -1
  206. sky/provision/cudo/cudo_machine_type.py +1 -1
  207. sky/provision/cudo/cudo_utils.py +14 -8
  208. sky/provision/cudo/cudo_wrapper.py +72 -71
  209. sky/provision/cudo/instance.py +10 -6
  210. sky/provision/do/instance.py +10 -6
  211. sky/provision/do/utils.py +4 -3
  212. sky/provision/docker_utils.py +140 -33
  213. sky/provision/fluidstack/instance.py +13 -8
  214. sky/provision/gcp/__init__.py +1 -0
  215. sky/provision/gcp/config.py +301 -19
  216. sky/provision/gcp/constants.py +218 -0
  217. sky/provision/gcp/instance.py +36 -8
  218. sky/provision/gcp/instance_utils.py +18 -4
  219. sky/provision/gcp/volume_utils.py +247 -0
  220. sky/provision/hyperbolic/__init__.py +12 -0
  221. sky/provision/hyperbolic/config.py +10 -0
  222. sky/provision/hyperbolic/instance.py +437 -0
  223. sky/provision/hyperbolic/utils.py +373 -0
  224. sky/provision/instance_setup.py +101 -20
  225. sky/provision/kubernetes/__init__.py +5 -0
  226. sky/provision/kubernetes/config.py +9 -52
  227. sky/provision/kubernetes/constants.py +17 -0
  228. sky/provision/kubernetes/instance.py +919 -280
  229. sky/provision/kubernetes/manifests/fusermount-server-daemonset.yaml +1 -2
  230. sky/provision/kubernetes/network.py +27 -17
  231. sky/provision/kubernetes/network_utils.py +44 -43
  232. sky/provision/kubernetes/utils.py +1221 -534
  233. sky/provision/kubernetes/volume.py +343 -0
  234. sky/provision/lambda_cloud/instance.py +22 -16
  235. sky/provision/nebius/constants.py +50 -0
  236. sky/provision/nebius/instance.py +19 -6
  237. sky/provision/nebius/utils.py +237 -137
  238. sky/provision/oci/instance.py +10 -5
  239. sky/provision/paperspace/instance.py +10 -7
  240. sky/provision/paperspace/utils.py +1 -1
  241. sky/provision/primeintellect/__init__.py +10 -0
  242. sky/provision/primeintellect/config.py +11 -0
  243. sky/provision/primeintellect/instance.py +454 -0
  244. sky/provision/primeintellect/utils.py +398 -0
  245. sky/provision/provisioner.py +117 -36
  246. sky/provision/runpod/__init__.py +5 -0
  247. sky/provision/runpod/instance.py +27 -6
  248. sky/provision/runpod/utils.py +51 -18
  249. sky/provision/runpod/volume.py +214 -0
  250. sky/provision/scp/__init__.py +15 -0
  251. sky/provision/scp/config.py +93 -0
  252. sky/provision/scp/instance.py +707 -0
  253. sky/provision/seeweb/__init__.py +11 -0
  254. sky/provision/seeweb/config.py +13 -0
  255. sky/provision/seeweb/instance.py +812 -0
  256. sky/provision/shadeform/__init__.py +11 -0
  257. sky/provision/shadeform/config.py +12 -0
  258. sky/provision/shadeform/instance.py +351 -0
  259. sky/provision/shadeform/shadeform_utils.py +83 -0
  260. sky/provision/ssh/__init__.py +18 -0
  261. sky/provision/vast/instance.py +13 -8
  262. sky/provision/vast/utils.py +10 -7
  263. sky/provision/volume.py +164 -0
  264. sky/provision/vsphere/common/ssl_helper.py +1 -1
  265. sky/provision/vsphere/common/vapiconnect.py +2 -1
  266. sky/provision/vsphere/common/vim_utils.py +4 -4
  267. sky/provision/vsphere/instance.py +15 -10
  268. sky/provision/vsphere/vsphere_utils.py +17 -20
  269. sky/py.typed +0 -0
  270. sky/resources.py +845 -119
  271. sky/schemas/__init__.py +0 -0
  272. sky/schemas/api/__init__.py +0 -0
  273. sky/schemas/api/responses.py +227 -0
  274. sky/schemas/db/README +4 -0
  275. sky/schemas/db/env.py +90 -0
  276. sky/schemas/db/global_user_state/001_initial_schema.py +124 -0
  277. sky/schemas/db/global_user_state/002_add_workspace_to_cluster_history.py +35 -0
  278. sky/schemas/db/global_user_state/003_fix_initial_revision.py +61 -0
  279. sky/schemas/db/global_user_state/004_is_managed.py +34 -0
  280. sky/schemas/db/global_user_state/005_cluster_event.py +32 -0
  281. sky/schemas/db/global_user_state/006_provision_log.py +41 -0
  282. sky/schemas/db/global_user_state/007_cluster_event_request_id.py +34 -0
  283. sky/schemas/db/global_user_state/008_skylet_ssh_tunnel_metadata.py +34 -0
  284. sky/schemas/db/global_user_state/009_last_activity_and_launched_at.py +89 -0
  285. sky/schemas/db/global_user_state/010_save_ssh_key.py +66 -0
  286. sky/schemas/db/global_user_state/011_is_ephemeral.py +34 -0
  287. sky/schemas/db/kv_cache/001_initial_schema.py +29 -0
  288. sky/schemas/db/script.py.mako +28 -0
  289. sky/schemas/db/serve_state/001_initial_schema.py +67 -0
  290. sky/schemas/db/serve_state/002_yaml_content.py +34 -0
  291. sky/schemas/db/skypilot_config/001_initial_schema.py +30 -0
  292. sky/schemas/db/spot_jobs/001_initial_schema.py +97 -0
  293. sky/schemas/db/spot_jobs/002_cluster_pool.py +42 -0
  294. sky/schemas/db/spot_jobs/003_pool_hash.py +34 -0
  295. sky/schemas/db/spot_jobs/004_job_file_contents.py +42 -0
  296. sky/schemas/db/spot_jobs/005_logs_gc.py +38 -0
  297. sky/schemas/db/spot_jobs/006_controller_pid_started_at.py +34 -0
  298. sky/schemas/db/spot_jobs/007_config_file_content.py +34 -0
  299. sky/schemas/generated/__init__.py +0 -0
  300. sky/schemas/generated/autostopv1_pb2.py +36 -0
  301. sky/schemas/generated/autostopv1_pb2.pyi +43 -0
  302. sky/schemas/generated/autostopv1_pb2_grpc.py +146 -0
  303. sky/schemas/generated/jobsv1_pb2.py +86 -0
  304. sky/schemas/generated/jobsv1_pb2.pyi +254 -0
  305. sky/schemas/generated/jobsv1_pb2_grpc.py +542 -0
  306. sky/schemas/generated/managed_jobsv1_pb2.py +76 -0
  307. sky/schemas/generated/managed_jobsv1_pb2.pyi +278 -0
  308. sky/schemas/generated/managed_jobsv1_pb2_grpc.py +278 -0
  309. sky/schemas/generated/servev1_pb2.py +58 -0
  310. sky/schemas/generated/servev1_pb2.pyi +115 -0
  311. sky/schemas/generated/servev1_pb2_grpc.py +322 -0
  312. sky/serve/autoscalers.py +357 -5
  313. sky/serve/client/impl.py +310 -0
  314. sky/serve/client/sdk.py +47 -139
  315. sky/serve/client/sdk_async.py +130 -0
  316. sky/serve/constants.py +12 -9
  317. sky/serve/controller.py +68 -17
  318. sky/serve/load_balancer.py +106 -60
  319. sky/serve/load_balancing_policies.py +116 -2
  320. sky/serve/replica_managers.py +434 -249
  321. sky/serve/serve_rpc_utils.py +179 -0
  322. sky/serve/serve_state.py +569 -257
  323. sky/serve/serve_utils.py +775 -265
  324. sky/serve/server/core.py +66 -711
  325. sky/serve/server/impl.py +1093 -0
  326. sky/serve/server/server.py +21 -18
  327. sky/serve/service.py +192 -89
  328. sky/serve/service_spec.py +144 -20
  329. sky/serve/spot_placer.py +3 -0
  330. sky/server/auth/__init__.py +0 -0
  331. sky/server/auth/authn.py +50 -0
  332. sky/server/auth/loopback.py +38 -0
  333. sky/server/auth/oauth2_proxy.py +202 -0
  334. sky/server/common.py +478 -182
  335. sky/server/config.py +85 -23
  336. sky/server/constants.py +44 -6
  337. sky/server/daemons.py +295 -0
  338. sky/server/html/token_page.html +185 -0
  339. sky/server/metrics.py +160 -0
  340. sky/server/middleware_utils.py +166 -0
  341. sky/server/requests/executor.py +558 -138
  342. sky/server/requests/payloads.py +364 -24
  343. sky/server/requests/preconditions.py +21 -17
  344. sky/server/requests/process.py +112 -29
  345. sky/server/requests/request_names.py +121 -0
  346. sky/server/requests/requests.py +822 -226
  347. sky/server/requests/serializers/decoders.py +82 -31
  348. sky/server/requests/serializers/encoders.py +140 -22
  349. sky/server/requests/threads.py +117 -0
  350. sky/server/rest.py +455 -0
  351. sky/server/server.py +1309 -285
  352. sky/server/state.py +20 -0
  353. sky/server/stream_utils.py +327 -61
  354. sky/server/uvicorn.py +217 -3
  355. sky/server/versions.py +270 -0
  356. sky/setup_files/MANIFEST.in +11 -1
  357. sky/setup_files/alembic.ini +160 -0
  358. sky/setup_files/dependencies.py +139 -31
  359. sky/setup_files/setup.py +44 -42
  360. sky/sky_logging.py +114 -7
  361. sky/skylet/attempt_skylet.py +106 -24
  362. sky/skylet/autostop_lib.py +129 -8
  363. sky/skylet/configs.py +29 -20
  364. sky/skylet/constants.py +216 -25
  365. sky/skylet/events.py +101 -21
  366. sky/skylet/job_lib.py +345 -164
  367. sky/skylet/log_lib.py +297 -18
  368. sky/skylet/log_lib.pyi +44 -1
  369. sky/skylet/providers/ibm/node_provider.py +12 -8
  370. sky/skylet/providers/ibm/vpc_provider.py +13 -12
  371. sky/skylet/ray_patches/__init__.py +17 -3
  372. sky/skylet/ray_patches/autoscaler.py.diff +18 -0
  373. sky/skylet/ray_patches/cli.py.diff +19 -0
  374. sky/skylet/ray_patches/command_runner.py.diff +17 -0
  375. sky/skylet/ray_patches/log_monitor.py.diff +20 -0
  376. sky/skylet/ray_patches/resource_demand_scheduler.py.diff +32 -0
  377. sky/skylet/ray_patches/updater.py.diff +18 -0
  378. sky/skylet/ray_patches/worker.py.diff +41 -0
  379. sky/skylet/runtime_utils.py +21 -0
  380. sky/skylet/services.py +568 -0
  381. sky/skylet/skylet.py +72 -4
  382. sky/skylet/subprocess_daemon.py +104 -29
  383. sky/skypilot_config.py +506 -99
  384. sky/ssh_node_pools/__init__.py +1 -0
  385. sky/ssh_node_pools/core.py +135 -0
  386. sky/ssh_node_pools/server.py +233 -0
  387. sky/task.py +685 -163
  388. sky/templates/aws-ray.yml.j2 +11 -3
  389. sky/templates/azure-ray.yml.j2 +2 -1
  390. sky/templates/cudo-ray.yml.j2 +1 -0
  391. sky/templates/do-ray.yml.j2 +2 -1
  392. sky/templates/fluidstack-ray.yml.j2 +1 -0
  393. sky/templates/gcp-ray.yml.j2 +62 -1
  394. sky/templates/hyperbolic-ray.yml.j2 +68 -0
  395. sky/templates/ibm-ray.yml.j2 +2 -1
  396. sky/templates/jobs-controller.yaml.j2 +27 -24
  397. sky/templates/kubernetes-loadbalancer.yml.j2 +2 -0
  398. sky/templates/kubernetes-ray.yml.j2 +611 -50
  399. sky/templates/lambda-ray.yml.j2 +2 -1
  400. sky/templates/nebius-ray.yml.j2 +34 -12
  401. sky/templates/oci-ray.yml.j2 +1 -0
  402. sky/templates/paperspace-ray.yml.j2 +2 -1
  403. sky/templates/primeintellect-ray.yml.j2 +72 -0
  404. sky/templates/runpod-ray.yml.j2 +10 -1
  405. sky/templates/scp-ray.yml.j2 +4 -50
  406. sky/templates/seeweb-ray.yml.j2 +171 -0
  407. sky/templates/shadeform-ray.yml.j2 +73 -0
  408. sky/templates/sky-serve-controller.yaml.j2 +22 -2
  409. sky/templates/vast-ray.yml.j2 +1 -0
  410. sky/templates/vsphere-ray.yml.j2 +1 -0
  411. sky/templates/websocket_proxy.py +212 -37
  412. sky/usage/usage_lib.py +31 -15
  413. sky/users/__init__.py +0 -0
  414. sky/users/model.conf +15 -0
  415. sky/users/permission.py +397 -0
  416. sky/users/rbac.py +121 -0
  417. sky/users/server.py +720 -0
  418. sky/users/token_service.py +218 -0
  419. sky/utils/accelerator_registry.py +35 -5
  420. sky/utils/admin_policy_utils.py +84 -38
  421. sky/utils/annotations.py +38 -5
  422. sky/utils/asyncio_utils.py +78 -0
  423. sky/utils/atomic.py +1 -1
  424. sky/utils/auth_utils.py +153 -0
  425. sky/utils/benchmark_utils.py +60 -0
  426. sky/utils/cli_utils/status_utils.py +159 -86
  427. sky/utils/cluster_utils.py +31 -9
  428. sky/utils/command_runner.py +354 -68
  429. sky/utils/command_runner.pyi +93 -3
  430. sky/utils/common.py +35 -8
  431. sky/utils/common_utils.py +314 -91
  432. sky/utils/config_utils.py +74 -5
  433. sky/utils/context.py +403 -0
  434. sky/utils/context_utils.py +242 -0
  435. sky/utils/controller_utils.py +383 -89
  436. sky/utils/dag_utils.py +31 -12
  437. sky/utils/db/__init__.py +0 -0
  438. sky/utils/db/db_utils.py +485 -0
  439. sky/utils/db/kv_cache.py +149 -0
  440. sky/utils/db/migration_utils.py +137 -0
  441. sky/utils/directory_utils.py +12 -0
  442. sky/utils/env_options.py +13 -0
  443. sky/utils/git.py +567 -0
  444. sky/utils/git_clone.sh +460 -0
  445. sky/utils/infra_utils.py +195 -0
  446. sky/utils/kubernetes/cleanup-tunnel.sh +62 -0
  447. sky/utils/kubernetes/config_map_utils.py +133 -0
  448. sky/utils/kubernetes/create_cluster.sh +15 -29
  449. sky/utils/kubernetes/delete_cluster.sh +10 -7
  450. sky/utils/kubernetes/deploy_ssh_node_pools.py +1177 -0
  451. sky/utils/kubernetes/exec_kubeconfig_converter.py +22 -31
  452. sky/utils/kubernetes/generate_kind_config.py +6 -66
  453. sky/utils/kubernetes/generate_kubeconfig.sh +4 -1
  454. sky/utils/kubernetes/gpu_labeler.py +18 -8
  455. sky/utils/kubernetes/k8s_gpu_labeler_job.yaml +2 -1
  456. sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml +16 -16
  457. sky/utils/kubernetes/kubernetes_deploy_utils.py +284 -114
  458. sky/utils/kubernetes/rsync_helper.sh +11 -3
  459. sky/utils/kubernetes/ssh-tunnel.sh +379 -0
  460. sky/utils/kubernetes/ssh_utils.py +221 -0
  461. sky/utils/kubernetes_enums.py +8 -15
  462. sky/utils/lock_events.py +94 -0
  463. sky/utils/locks.py +416 -0
  464. sky/utils/log_utils.py +82 -107
  465. sky/utils/perf_utils.py +22 -0
  466. sky/utils/resource_checker.py +298 -0
  467. sky/utils/resources_utils.py +249 -32
  468. sky/utils/rich_utils.py +217 -39
  469. sky/utils/schemas.py +955 -160
  470. sky/utils/serialize_utils.py +16 -0
  471. sky/utils/status_lib.py +10 -0
  472. sky/utils/subprocess_utils.py +29 -15
  473. sky/utils/tempstore.py +70 -0
  474. sky/utils/thread_utils.py +91 -0
  475. sky/utils/timeline.py +26 -53
  476. sky/utils/ux_utils.py +84 -15
  477. sky/utils/validator.py +11 -1
  478. sky/utils/volume.py +165 -0
  479. sky/utils/yaml_utils.py +111 -0
  480. sky/volumes/__init__.py +13 -0
  481. sky/volumes/client/__init__.py +0 -0
  482. sky/volumes/client/sdk.py +150 -0
  483. sky/volumes/server/__init__.py +0 -0
  484. sky/volumes/server/core.py +270 -0
  485. sky/volumes/server/server.py +124 -0
  486. sky/volumes/volume.py +215 -0
  487. sky/workspaces/__init__.py +0 -0
  488. sky/workspaces/core.py +655 -0
  489. sky/workspaces/server.py +101 -0
  490. sky/workspaces/utils.py +56 -0
  491. sky_templates/README.md +3 -0
  492. sky_templates/__init__.py +3 -0
  493. sky_templates/ray/__init__.py +0 -0
  494. sky_templates/ray/start_cluster +183 -0
  495. sky_templates/ray/stop_cluster +75 -0
  496. skypilot_nightly-1.0.0.dev20251203.dist-info/METADATA +676 -0
  497. skypilot_nightly-1.0.0.dev20251203.dist-info/RECORD +611 -0
  498. {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/WHEEL +1 -1
  499. skypilot_nightly-1.0.0.dev20251203.dist-info/top_level.txt +2 -0
  500. sky/benchmark/benchmark_state.py +0 -256
  501. sky/benchmark/benchmark_utils.py +0 -641
  502. sky/clouds/service_catalog/constants.py +0 -7
  503. sky/dashboard/out/_next/static/GWvVBSCS7FmUiVmjaL1a7/_buildManifest.js +0 -1
  504. sky/dashboard/out/_next/static/chunks/236-2db3ee3fba33dd9e.js +0 -6
  505. sky/dashboard/out/_next/static/chunks/312-c3c8845990db8ffc.js +0 -15
  506. sky/dashboard/out/_next/static/chunks/37-0a572fe0dbb89c4d.js +0 -6
  507. sky/dashboard/out/_next/static/chunks/678-206dddca808e6d16.js +0 -59
  508. sky/dashboard/out/_next/static/chunks/845-9e60713e0c441abc.js +0 -1
  509. sky/dashboard/out/_next/static/chunks/979-7bf73a4c7cea0f5c.js +0 -1
  510. sky/dashboard/out/_next/static/chunks/fd9d1056-2821b0f0cabcd8bd.js +0 -1
  511. sky/dashboard/out/_next/static/chunks/framework-87d061ee6ed71b28.js +0 -33
  512. sky/dashboard/out/_next/static/chunks/main-app-241eb28595532291.js +0 -1
  513. sky/dashboard/out/_next/static/chunks/main-e0e2335212e72357.js +0 -1
  514. sky/dashboard/out/_next/static/chunks/pages/_app-e6b013bc3f77ad60.js +0 -1
  515. sky/dashboard/out/_next/static/chunks/pages/_error-1be831200e60c5c0.js +0 -1
  516. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-6ac338bc2239cb45.js +0 -1
  517. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-f383db7389368ea7.js +0 -1
  518. sky/dashboard/out/_next/static/chunks/pages/clusters-a93b93e10b8b074e.js +0 -1
  519. sky/dashboard/out/_next/static/chunks/pages/index-f9f039532ca8cbc4.js +0 -1
  520. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-1c519e1afc523dc9.js +0 -1
  521. sky/dashboard/out/_next/static/chunks/pages/jobs-a75029b67aab6a2e.js +0 -1
  522. sky/dashboard/out/_next/static/chunks/webpack-830f59b8404e96b8.js +0 -1
  523. sky/dashboard/out/_next/static/css/c6933bbb2ce7f4dd.css +0 -3
  524. sky/jobs/dashboard/dashboard.py +0 -223
  525. sky/jobs/dashboard/static/favicon.ico +0 -0
  526. sky/jobs/dashboard/templates/index.html +0 -831
  527. sky/jobs/server/dashboard_utils.py +0 -69
  528. sky/skylet/providers/scp/__init__.py +0 -2
  529. sky/skylet/providers/scp/config.py +0 -149
  530. sky/skylet/providers/scp/node_provider.py +0 -578
  531. sky/templates/kubernetes-ssh-jump.yml.j2 +0 -94
  532. sky/utils/db_utils.py +0 -100
  533. sky/utils/kubernetes/deploy_remote_cluster.sh +0 -308
  534. sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +0 -191
  535. skypilot_nightly-1.0.0.dev20250502.dist-info/METADATA +0 -361
  536. skypilot_nightly-1.0.0.dev20250502.dist-info/RECORD +0 -396
  537. skypilot_nightly-1.0.0.dev20250502.dist-info/top_level.txt +0 -1
  538. /sky/{clouds/service_catalog → catalog}/config.py +0 -0
  539. /sky/{benchmark → catalog/data_fetchers}/__init__.py +0 -0
  540. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_azure.py +0 -0
  541. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_fluidstack.py +0 -0
  542. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_ibm.py +0 -0
  543. /sky/{clouds/service_catalog/data_fetchers → client/cli}/__init__.py +0 -0
  544. /sky/dashboard/out/_next/static/{GWvVBSCS7FmUiVmjaL1a7 → 96_E2yl3QAiIJGOYCkSpB}/_ssgManifest.js +0 -0
  545. {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/entry_points.txt +0 -0
  546. {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/licenses/LICENSE +0 -0
sky/clouds/vast.py CHANGED
@@ -1,15 +1,20 @@
1
1
  """ Vast Cloud. """
2
2
 
3
+ import os
3
4
  import typing
4
5
  from typing import Dict, Iterator, List, Optional, Tuple, Union
5
6
 
7
+ from sky import catalog
6
8
  from sky import clouds
7
- from sky.clouds import service_catalog
9
+ from sky.adaptors import common
8
10
  from sky.utils import registry
9
11
  from sky.utils import resources_utils
10
12
 
11
13
  if typing.TYPE_CHECKING:
12
14
  from sky import resources as resources_lib
15
+ from sky.utils import volume as volume_lib
16
+
17
+ _CREDENTIAL_PATH = '~/.config/vastai/vast_api_key'
13
18
 
14
19
 
15
20
  @registry.CLOUD_REGISTRY.register
@@ -25,12 +30,16 @@ class Vast(clouds.Cloud):
25
30
  'are non-trivial on Vast.'),
26
31
  clouds.CloudImplementationFeatures.CUSTOM_DISK_TIER:
27
32
  ('Customizing disk tier is not supported yet on Vast.'),
28
- clouds.CloudImplementationFeatures.OPEN_PORTS:
29
- ('Opening ports is currently not supported on Vast.'),
33
+ clouds.CloudImplementationFeatures.CUSTOM_NETWORK_TIER:
34
+ ('Custom network tier is currently not supported in '
35
+ f'{_REPR}.'),
30
36
  clouds.CloudImplementationFeatures.STORAGE_MOUNTING:
31
37
  ('Mounting object stores is not supported on Vast.'),
32
38
  clouds.CloudImplementationFeatures.HIGH_AVAILABILITY_CONTROLLERS:
33
39
  ('High availability controllers are not supported on Vast.'),
40
+ clouds.CloudImplementationFeatures.CUSTOM_MULTI_NETWORK:
41
+ ('Customized multiple network interfaces are not supported on Vast.'
42
+ ),
34
43
  }
35
44
  #
36
45
  # Vast doesn't have a max cluster name limit. This number
@@ -42,10 +51,13 @@ class Vast(clouds.Cloud):
42
51
 
43
52
  PROVISIONER_VERSION = clouds.ProvisionerVersion.SKYPILOT
44
53
  STATUS_VERSION = clouds.StatusVersion.SKYPILOT
54
+ OPEN_PORTS_VERSION = clouds.OpenPortsVersion.LAUNCH_ONLY
45
55
 
46
56
  @classmethod
47
57
  def _unsupported_features_for_resources(
48
- cls, resources: 'resources_lib.Resources'
58
+ cls,
59
+ resources: 'resources_lib.Resources',
60
+ region: Optional[str] = None,
49
61
  ) -> Dict[clouds.CloudImplementationFeatures, str]:
50
62
  """The features not supported based on the resources provided.
51
63
 
@@ -64,13 +76,18 @@ class Vast(clouds.Cloud):
64
76
  return cls._MAX_CLUSTER_NAME_LEN_LIMIT
65
77
 
66
78
  @classmethod
67
- def regions_with_offering(cls, instance_type: str,
68
- accelerators: Optional[Dict[str, int]],
69
- use_spot: bool, region: Optional[str],
70
- zone: Optional[str]) -> List[clouds.Region]:
79
+ def regions_with_offering(
80
+ cls,
81
+ instance_type: str,
82
+ accelerators: Optional[Dict[str, int]],
83
+ use_spot: bool,
84
+ region: Optional[str],
85
+ zone: Optional[str],
86
+ resources: Optional['resources_lib.Resources'] = None,
87
+ ) -> List[clouds.Region]:
71
88
  assert zone is None, 'Vast does not support zones.'
72
89
  del accelerators, zone # unused
73
- regions = service_catalog.get_region_zones_for_instance_type(
90
+ regions = catalog.get_region_zones_for_instance_type(
74
91
  instance_type, use_spot, 'vast')
75
92
 
76
93
  if region is not None:
@@ -82,8 +99,8 @@ class Vast(clouds.Cloud):
82
99
  cls,
83
100
  instance_type: str,
84
101
  ) -> Tuple[Optional[float], Optional[float]]:
85
- return service_catalog.get_vcpus_mem_from_instance_type(instance_type,
86
- clouds='vast')
102
+ return catalog.get_vcpus_mem_from_instance_type(instance_type,
103
+ clouds='vast')
87
104
 
88
105
  @classmethod
89
106
  def zones_provision_loop(
@@ -110,11 +127,11 @@ class Vast(clouds.Cloud):
110
127
  use_spot: bool,
111
128
  region: Optional[str] = None,
112
129
  zone: Optional[str] = None) -> float:
113
- return service_catalog.get_hourly_cost(instance_type,
114
- use_spot=use_spot,
115
- region=region,
116
- zone=zone,
117
- clouds='vast')
130
+ return catalog.get_hourly_cost(instance_type,
131
+ use_spot=use_spot,
132
+ region=region,
133
+ zone=zone,
134
+ clouds='vast')
118
135
 
119
136
  def accelerators_to_hourly_cost(self,
120
137
  accelerators: Dict[str, int],
@@ -129,49 +146,55 @@ class Vast(clouds.Cloud):
129
146
  return 0.0
130
147
 
131
148
  @classmethod
132
- def get_default_instance_type(
133
- cls,
134
- cpus: Optional[str] = None,
135
- memory: Optional[str] = None,
136
- disk_tier: Optional[resources_utils.DiskTier] = None
137
- ) -> Optional[str]:
149
+ def get_default_instance_type(cls,
150
+ cpus: Optional[str] = None,
151
+ memory: Optional[str] = None,
152
+ disk_tier: Optional[
153
+ resources_utils.DiskTier] = None,
154
+ region: Optional[str] = None,
155
+ zone: Optional[str] = None) -> Optional[str]:
138
156
  """Returns the default instance type for Vast."""
139
- return service_catalog.get_default_instance_type(cpus=cpus,
140
- memory=memory,
141
- disk_tier=disk_tier,
142
- clouds='vast')
157
+ return catalog.get_default_instance_type(cpus=cpus,
158
+ memory=memory,
159
+ disk_tier=disk_tier,
160
+ region=region,
161
+ zone=zone,
162
+ clouds='vast')
143
163
 
144
164
  @classmethod
145
165
  def get_accelerators_from_instance_type(
146
166
  cls, instance_type: str) -> Optional[Dict[str, Union[int, float]]]:
147
- return service_catalog.get_accelerators_from_instance_type(
148
- instance_type, clouds='vast')
167
+ return catalog.get_accelerators_from_instance_type(instance_type,
168
+ clouds='vast')
149
169
 
150
170
  @classmethod
151
171
  def get_zone_shell_cmd(cls) -> Optional[str]:
152
172
  return None
153
173
 
154
174
  def make_deploy_resources_variables(
155
- self,
156
- resources: 'resources_lib.Resources',
157
- cluster_name: resources_utils.ClusterName,
158
- region: 'clouds.Region',
159
- zones: Optional[List['clouds.Zone']],
160
- num_nodes: int,
161
- dryrun: bool = False) -> Dict[str, Optional[str]]:
175
+ self,
176
+ resources: 'resources_lib.Resources',
177
+ cluster_name: resources_utils.ClusterName,
178
+ region: 'clouds.Region',
179
+ zones: Optional[List['clouds.Zone']],
180
+ num_nodes: int,
181
+ dryrun: bool = False,
182
+ volume_mounts: Optional[List['volume_lib.VolumeMount']] = None,
183
+ ) -> Dict[str, Optional[str]]:
162
184
  del zones, dryrun, cluster_name, num_nodes # unused
163
185
 
164
- r = resources
165
- acc_dict = self.get_accelerators_from_instance_type(r.instance_type)
186
+ resources = resources.assert_launchable()
187
+ acc_dict = self.get_accelerators_from_instance_type(
188
+ resources.instance_type)
166
189
  custom_resources = resources_utils.make_ray_custom_resources_str(
167
190
  acc_dict)
168
191
 
169
- if r.image_id is None:
170
- image_id = 'vastai/base:0.0.2'
171
- elif r.extract_docker_image() is not None:
172
- image_id = r.extract_docker_image()
192
+ if resources.image_id is None:
193
+ image_id: Optional[str] = 'vastai/base:0.0.2'
194
+ elif resources.extract_docker_image() is not None:
195
+ image_id = resources.extract_docker_image()
173
196
  else:
174
- image_id = r.image_id[r.region]
197
+ image_id = resources.image_id[resources.region]
175
198
 
176
199
  return {
177
200
  'instance_type': resources.instance_type,
@@ -208,7 +231,9 @@ class Vast(clouds.Cloud):
208
231
  default_instance_type = Vast.get_default_instance_type(
209
232
  cpus=resources.cpus,
210
233
  memory=resources.memory,
211
- disk_tier=resources.disk_tier)
234
+ disk_tier=resources.disk_tier,
235
+ region=resources.region,
236
+ zone=resources.zone)
212
237
  if default_instance_type is None:
213
238
  # TODO: Add hints to all return values in this method to help
214
239
  # users understand why the resources are not launchable.
@@ -219,16 +244,16 @@ class Vast(clouds.Cloud):
219
244
 
220
245
  assert len(accelerators) == 1, resources
221
246
  acc, acc_count = list(accelerators.items())[0]
222
- (instance_list, fuzzy_candidate_list
223
- ) = service_catalog.get_instance_type_for_accelerator(
224
- acc,
225
- acc_count,
226
- use_spot=resources.use_spot,
227
- cpus=resources.cpus,
228
- region=resources.region,
229
- zone=resources.zone,
230
- memory=resources.memory,
231
- clouds='vast')
247
+ (instance_list,
248
+ fuzzy_candidate_list) = catalog.get_instance_type_for_accelerator(
249
+ acc,
250
+ acc_count,
251
+ use_spot=resources.use_spot,
252
+ cpus=resources.cpus,
253
+ region=resources.region,
254
+ zone=resources.zone,
255
+ memory=resources.memory,
256
+ clouds='vast')
232
257
  if instance_list is None:
233
258
  return resources_utils.FeasibleResources([], fuzzy_candidate_list,
234
259
  None)
@@ -236,33 +261,30 @@ class Vast(clouds.Cloud):
236
261
  fuzzy_candidate_list, None)
237
262
 
238
263
  @classmethod
239
- def _check_compute_credentials(cls) -> Tuple[bool, Optional[str]]:
264
+ def _check_compute_credentials(
265
+ cls) -> Tuple[bool, Optional[Union[str, Dict[str, str]]]]:
240
266
  """Checks if the user has valid credentials for
241
- Vast's compute service. """
242
- try:
243
- import vastai_sdk as _vast # pylint: disable=import-outside-toplevel
244
- vast = _vast.VastAI()
245
-
246
- # We only support file pased credential passing
247
- if vast.creds_source != 'FILE':
248
- return False, (
249
- 'error \n' # First line is indented by 4 spaces
250
- ' Credentials can be set up by running: \n'
251
- ' $ pip install vastai\n'
252
- ' $ echo [key] > ~/.vast_api_key\n'
253
- ' For more information, see https://skypilot.readthedocs.io/en/latest/getting-started/installation.html#vast' # pylint: disable=line-too-long
254
- )
267
+ Vast's compute service."""
268
+
269
+ dependency_error_msg = ('Failed to import vast. '
270
+ 'To install, run: pip install skypilot[vast]')
271
+ if not common.can_import_modules(['vastai_sdk']):
272
+ return False, dependency_error_msg
255
273
 
256
- return True, None
274
+ if not os.path.exists(os.path.expanduser(_CREDENTIAL_PATH)):
275
+ return False, (
276
+ 'error \n' # First line is indented by 4 spaces
277
+ ' Credentials can be set up by running: \n'
278
+ ' $ pip install vastai\n'
279
+ ' $ mkdir -p ~/.config/vastai\n'
280
+ f' $ echo [key] > {_CREDENTIAL_PATH}\n'
281
+ ' For more information, see https://skypilot.readthedocs.io/en/latest/getting-started/installation.html#vast' # pylint: disable=line-too-long
282
+ )
257
283
 
258
- except ImportError:
259
- return False, ('Failed to import vast. '
260
- 'To install, run: pip install skypilot[vast]')
284
+ return True, None
261
285
 
262
286
  def get_credential_file_mounts(self) -> Dict[str, str]:
263
- return {
264
- '~/.config/vastai/vast_api_key': '~/.config/vastai/vast_api_key'
265
- }
287
+ return {f'{_CREDENTIAL_PATH}': f'{_CREDENTIAL_PATH}'}
266
288
 
267
289
  @classmethod
268
290
  def get_user_identities(cls) -> Optional[List[List[str]]]:
@@ -271,10 +293,10 @@ class Vast(clouds.Cloud):
271
293
  return None
272
294
 
273
295
  def instance_type_exists(self, instance_type: str) -> bool:
274
- return service_catalog.instance_type_exists(instance_type, 'vast')
296
+ return catalog.instance_type_exists(instance_type, 'vast')
275
297
 
276
298
  def validate_region_zone(self, region: Optional[str], zone: Optional[str]):
277
- return service_catalog.validate_region_zone(region, zone, clouds='vast')
299
+ return catalog.validate_region_zone(region, zone, clouds='vast')
278
300
 
279
301
  @classmethod
280
302
  def get_image_size(cls, image_id: str, region: Optional[str]) -> float:
sky/clouds/vsphere.py CHANGED
@@ -1,15 +1,13 @@
1
1
  """Vsphere cloud implementation."""
2
- import subprocess
3
2
  import typing
4
3
  from typing import Dict, Iterator, List, Optional, Tuple, Union
5
4
 
5
+ from sky import catalog
6
6
  from sky import clouds
7
7
  from sky.adaptors import common as adaptors_common
8
- from sky.clouds import service_catalog
9
8
  from sky.provision.vsphere import vsphere_utils
10
9
  from sky.provision.vsphere.vsphere_utils import get_vsphere_credentials
11
10
  from sky.provision.vsphere.vsphere_utils import initialize_vsphere_data
12
- from sky.utils import common_utils
13
11
  from sky.utils import registry
14
12
  from sky.utils import resources_utils
15
13
 
@@ -18,6 +16,7 @@ if typing.TYPE_CHECKING:
18
16
 
19
17
  # Renaming to avoid shadowing variables.
20
18
  from sky import resources as resources_lib
19
+ from sky.utils import volume as volume_lib
21
20
  else:
22
21
  requests = adaptors_common.LazyImport('requests')
23
22
 
@@ -52,10 +51,16 @@ class Vsphere(clouds.Cloud):
52
51
  (f'Spot instances are not supported in {_REPR}.'),
53
52
  clouds.CloudImplementationFeatures.CUSTOM_DISK_TIER:
54
53
  (f'Custom disk tiers are not supported in {_REPR}.'),
54
+ clouds.CloudImplementationFeatures.CUSTOM_NETWORK_TIER:
55
+ ('Custom network tier is currently not supported in '
56
+ f'{_REPR}.'),
55
57
  clouds.CloudImplementationFeatures.OPEN_PORTS:
56
58
  (f'Opening ports is currently not supported on {_REPR}.'),
57
59
  clouds.CloudImplementationFeatures.HIGH_AVAILABILITY_CONTROLLERS:
58
60
  (f'High availability controllers are not supported on {_REPR}.'),
61
+ clouds.CloudImplementationFeatures.CUSTOM_MULTI_NETWORK:
62
+ (f'Customized multiple network interfaces '
63
+ f'are not supported on {_REPR}.'),
59
64
  }
60
65
 
61
66
  _MAX_CLUSTER_NAME_LEN_LIMIT = 80 # The name can't exceeds 80 characters
@@ -68,7 +73,9 @@ class Vsphere(clouds.Cloud):
68
73
 
69
74
  @classmethod
70
75
  def _unsupported_features_for_resources(
71
- cls, resources: 'resources_lib.Resources'
76
+ cls,
77
+ resources: 'resources_lib.Resources',
78
+ region: Optional[str] = None,
72
79
  ) -> Dict[clouds.CloudImplementationFeatures, str]:
73
80
  features = cls._CLOUD_UNSUPPORTED_FEATURES
74
81
  return features
@@ -85,9 +92,10 @@ class Vsphere(clouds.Cloud):
85
92
  use_spot: bool,
86
93
  region: Optional[str],
87
94
  zone: Optional[str],
95
+ resources: Optional['resources_lib.Resources'] = None,
88
96
  ) -> List[clouds.Region]:
89
97
  del accelerators, zone # unused
90
- regions = service_catalog.get_region_zones_for_instance_type(
98
+ regions = catalog.get_region_zones_for_instance_type(
91
99
  instance_type, use_spot, _CLOUD_VSPHERE)
92
100
 
93
101
  if region is not None:
@@ -142,23 +150,26 @@ class Vsphere(clouds.Cloud):
142
150
  return 'vSphere'
143
151
 
144
152
  @classmethod
145
- def get_default_instance_type(
146
- cls,
147
- cpus: Optional[str] = None,
148
- memory: Optional[str] = None,
149
- disk_tier: Optional[resources_utils.DiskTier] = None,
150
- ) -> Optional[str]:
151
- return service_catalog.get_default_instance_type(cpus=cpus,
152
- memory=memory,
153
- disk_tier=disk_tier,
154
- clouds=_CLOUD_VSPHERE)
153
+ def get_default_instance_type(cls,
154
+ cpus: Optional[str] = None,
155
+ memory: Optional[str] = None,
156
+ disk_tier: Optional[
157
+ resources_utils.DiskTier] = None,
158
+ region: Optional[str] = None,
159
+ zone: Optional[str] = None) -> Optional[str]:
160
+ return catalog.get_default_instance_type(cpus=cpus,
161
+ memory=memory,
162
+ disk_tier=disk_tier,
163
+ region=region,
164
+ zone=zone,
165
+ clouds=_CLOUD_VSPHERE)
155
166
 
156
167
  @classmethod
157
168
  def get_accelerators_from_instance_type(
158
169
  cls,
159
170
  instance_type: str,
160
171
  ) -> Optional[Dict[str, Union[int, float]]]:
161
- return service_catalog.get_accelerators_from_instance_type(
172
+ return catalog.get_accelerators_from_instance_type(
162
173
  instance_type, clouds=_CLOUD_VSPHERE)
163
174
 
164
175
  @classmethod
@@ -166,8 +177,8 @@ class Vsphere(clouds.Cloud):
166
177
  cls,
167
178
  instance_type: str,
168
179
  ) -> Tuple[Optional[float], Optional[float]]:
169
- return service_catalog.get_vcpus_mem_from_instance_type(
170
- instance_type, clouds=_CLOUD_VSPHERE)
180
+ return catalog.get_vcpus_mem_from_instance_type(instance_type,
181
+ clouds=_CLOUD_VSPHERE)
171
182
 
172
183
  @classmethod
173
184
  def get_zone_shell_cmd(cls) -> Optional[str]:
@@ -181,13 +192,16 @@ class Vsphere(clouds.Cloud):
181
192
  zones: Optional[List['clouds.Zone']],
182
193
  num_nodes: int,
183
194
  dryrun: bool = False,
195
+ volume_mounts: Optional[List['volume_lib.VolumeMount']] = None,
184
196
  ) -> Dict[str, Optional[str]]:
185
197
  # TODO get image id here.
186
198
  del cluster_name, dryrun # unused
187
199
  assert zones is not None, (region, zones)
188
200
  zone_names = [zone.name for zone in zones]
189
- r = resources
190
- acc_dict = self.get_accelerators_from_instance_type(r.instance_type)
201
+
202
+ resources = resources.assert_launchable()
203
+ acc_dict = self.get_accelerators_from_instance_type(
204
+ resources.instance_type)
191
205
  custom_resources = resources_utils.make_ray_custom_resources_str(
192
206
  acc_dict)
193
207
 
@@ -230,6 +244,8 @@ class Vsphere(clouds.Cloud):
230
244
  cpus=resources.cpus,
231
245
  memory=resources.memory,
232
246
  disk_tier=resources.disk_tier,
247
+ region=resources.region,
248
+ zone=resources.zone,
233
249
  )
234
250
  if default_instance_type is None:
235
251
  return resources_utils.FeasibleResources([], [], None)
@@ -242,7 +258,7 @@ class Vsphere(clouds.Cloud):
242
258
  (
243
259
  instance_list,
244
260
  fuzzy_candidate_list,
245
- ) = service_catalog.get_instance_type_for_accelerator(
261
+ ) = catalog.get_instance_type_for_accelerator(
246
262
  acc,
247
263
  acc_count,
248
264
  use_spot=resources.use_spot,
@@ -259,22 +275,20 @@ class Vsphere(clouds.Cloud):
259
275
  fuzzy_candidate_list, None)
260
276
 
261
277
  @classmethod
262
- def _check_compute_credentials(cls) -> Tuple[bool, Optional[str]]:
278
+ def _check_compute_credentials(
279
+ cls) -> Tuple[bool, Optional[Union[str, Dict[str, str]]]]:
263
280
  """Checks if the user has access credentials to
264
281
  vSphere's compute service."""
265
-
266
- try:
267
- # pylint: disable=import-outside-toplevel,unused-import
268
- # Check pyVmomi installation.
269
- import pyVmomi
270
- except (ImportError, subprocess.CalledProcessError) as e:
271
- return False, (
272
- 'vSphere dependencies are not installed. '
273
- 'Run the following commands:'
274
- f'\n{cls._INDENT_PREFIX} $ pip install skypilot[vSphere]'
275
- f'\n{cls._INDENT_PREFIX}Credentials may also need to be set. '
276
- 'For more details. See https://docs.skypilot.co/en/latest/getting-started/installation.html#vmware-vsphere' # pylint: disable=line-too-long
277
- f'{common_utils.format_exception(e, use_bracket=True)}')
282
+ dependency_error_msg = (
283
+ 'vSphere dependencies are not installed. '
284
+ 'Run the following commands:'
285
+ f'\n{cls._INDENT_PREFIX} $ pip install skypilot[vSphere]'
286
+ f'\n{cls._INDENT_PREFIX}Credentials may also need to be set. '
287
+ 'For more details. See https://docs.skypilot.co/en/latest/getting-started/installation.html#vmware-vsphere' # pylint: disable=line-too-long
288
+ )
289
+ # Check pyVmomi installation.
290
+ if not adaptors_common.can_import_modules(['pyVmomi']):
291
+ return False, dependency_error_msg
278
292
 
279
293
  required_keys = ['name', 'username', 'password', 'clusters']
280
294
  skip_key = 'skip_verification'
@@ -319,10 +333,7 @@ class Vsphere(clouds.Cloud):
319
333
  return None
320
334
 
321
335
  def instance_type_exists(self, instance_type: str) -> bool:
322
- return service_catalog.instance_type_exists(instance_type,
323
- _CLOUD_VSPHERE)
336
+ return catalog.instance_type_exists(instance_type, _CLOUD_VSPHERE)
324
337
 
325
338
  def validate_region_zone(self, region: Optional[str], zone: Optional[str]):
326
- return service_catalog.validate_region_zone(region,
327
- zone,
328
- clouds=_CLOUD_VSPHERE)
339
+ return catalog.validate_region_zone(region, zone, clouds=_CLOUD_VSPHERE)