skypilot-nightly 1.0.0.dev20250502__py3-none-any.whl → 1.0.0.dev20251203__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (546) hide show
  1. sky/__init__.py +22 -6
  2. sky/adaptors/aws.py +81 -16
  3. sky/adaptors/common.py +25 -2
  4. sky/adaptors/coreweave.py +278 -0
  5. sky/adaptors/do.py +8 -2
  6. sky/adaptors/gcp.py +11 -0
  7. sky/adaptors/hyperbolic.py +8 -0
  8. sky/adaptors/ibm.py +5 -2
  9. sky/adaptors/kubernetes.py +149 -18
  10. sky/adaptors/nebius.py +173 -30
  11. sky/adaptors/primeintellect.py +1 -0
  12. sky/adaptors/runpod.py +68 -0
  13. sky/adaptors/seeweb.py +183 -0
  14. sky/adaptors/shadeform.py +89 -0
  15. sky/admin_policy.py +187 -4
  16. sky/authentication.py +179 -225
  17. sky/backends/__init__.py +4 -2
  18. sky/backends/backend.py +22 -9
  19. sky/backends/backend_utils.py +1323 -397
  20. sky/backends/cloud_vm_ray_backend.py +1749 -1029
  21. sky/backends/docker_utils.py +1 -1
  22. sky/backends/local_docker_backend.py +11 -6
  23. sky/backends/task_codegen.py +633 -0
  24. sky/backends/wheel_utils.py +55 -9
  25. sky/{clouds/service_catalog → catalog}/__init__.py +21 -19
  26. sky/{clouds/service_catalog → catalog}/aws_catalog.py +27 -8
  27. sky/{clouds/service_catalog → catalog}/azure_catalog.py +10 -7
  28. sky/{clouds/service_catalog → catalog}/common.py +90 -49
  29. sky/{clouds/service_catalog → catalog}/cudo_catalog.py +8 -5
  30. sky/{clouds/service_catalog → catalog}/data_fetchers/analyze.py +1 -1
  31. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_aws.py +116 -80
  32. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_cudo.py +38 -38
  33. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_gcp.py +70 -16
  34. sky/catalog/data_fetchers/fetch_hyperbolic.py +136 -0
  35. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_lambda_cloud.py +1 -0
  36. sky/catalog/data_fetchers/fetch_nebius.py +338 -0
  37. sky/catalog/data_fetchers/fetch_runpod.py +698 -0
  38. sky/catalog/data_fetchers/fetch_seeweb.py +329 -0
  39. sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
  40. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vast.py +1 -1
  41. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vsphere.py +1 -1
  42. sky/{clouds/service_catalog → catalog}/do_catalog.py +5 -2
  43. sky/{clouds/service_catalog → catalog}/fluidstack_catalog.py +6 -3
  44. sky/{clouds/service_catalog → catalog}/gcp_catalog.py +41 -15
  45. sky/catalog/hyperbolic_catalog.py +136 -0
  46. sky/{clouds/service_catalog → catalog}/ibm_catalog.py +9 -6
  47. sky/{clouds/service_catalog → catalog}/kubernetes_catalog.py +36 -24
  48. sky/{clouds/service_catalog → catalog}/lambda_catalog.py +9 -6
  49. sky/{clouds/service_catalog → catalog}/nebius_catalog.py +9 -7
  50. sky/{clouds/service_catalog → catalog}/oci_catalog.py +9 -6
  51. sky/{clouds/service_catalog → catalog}/paperspace_catalog.py +5 -2
  52. sky/catalog/primeintellect_catalog.py +95 -0
  53. sky/{clouds/service_catalog → catalog}/runpod_catalog.py +11 -4
  54. sky/{clouds/service_catalog → catalog}/scp_catalog.py +9 -6
  55. sky/catalog/seeweb_catalog.py +184 -0
  56. sky/catalog/shadeform_catalog.py +165 -0
  57. sky/catalog/ssh_catalog.py +167 -0
  58. sky/{clouds/service_catalog → catalog}/vast_catalog.py +6 -3
  59. sky/{clouds/service_catalog → catalog}/vsphere_catalog.py +5 -2
  60. sky/check.py +533 -185
  61. sky/cli.py +5 -5975
  62. sky/client/{cli.py → cli/command.py} +2591 -1956
  63. sky/client/cli/deprecation_utils.py +99 -0
  64. sky/client/cli/flags.py +359 -0
  65. sky/client/cli/table_utils.py +322 -0
  66. sky/client/cli/utils.py +79 -0
  67. sky/client/common.py +78 -32
  68. sky/client/oauth.py +82 -0
  69. sky/client/sdk.py +1219 -319
  70. sky/client/sdk_async.py +827 -0
  71. sky/client/service_account_auth.py +47 -0
  72. sky/cloud_stores.py +82 -3
  73. sky/clouds/__init__.py +13 -0
  74. sky/clouds/aws.py +564 -164
  75. sky/clouds/azure.py +105 -83
  76. sky/clouds/cloud.py +140 -40
  77. sky/clouds/cudo.py +68 -50
  78. sky/clouds/do.py +66 -48
  79. sky/clouds/fluidstack.py +63 -44
  80. sky/clouds/gcp.py +339 -110
  81. sky/clouds/hyperbolic.py +293 -0
  82. sky/clouds/ibm.py +70 -49
  83. sky/clouds/kubernetes.py +570 -162
  84. sky/clouds/lambda_cloud.py +74 -54
  85. sky/clouds/nebius.py +210 -81
  86. sky/clouds/oci.py +88 -66
  87. sky/clouds/paperspace.py +61 -44
  88. sky/clouds/primeintellect.py +317 -0
  89. sky/clouds/runpod.py +164 -74
  90. sky/clouds/scp.py +89 -86
  91. sky/clouds/seeweb.py +477 -0
  92. sky/clouds/shadeform.py +400 -0
  93. sky/clouds/ssh.py +263 -0
  94. sky/clouds/utils/aws_utils.py +10 -4
  95. sky/clouds/utils/gcp_utils.py +87 -11
  96. sky/clouds/utils/oci_utils.py +38 -14
  97. sky/clouds/utils/scp_utils.py +231 -167
  98. sky/clouds/vast.py +99 -77
  99. sky/clouds/vsphere.py +51 -40
  100. sky/core.py +375 -173
  101. sky/dag.py +15 -0
  102. sky/dashboard/out/404.html +1 -1
  103. sky/dashboard/out/_next/static/96_E2yl3QAiIJGOYCkSpB/_buildManifest.js +1 -0
  104. sky/dashboard/out/_next/static/chunks/1141-e6aa9ab418717c59.js +11 -0
  105. sky/dashboard/out/_next/static/chunks/1272-1ef0bf0237faccdb.js +1 -0
  106. sky/dashboard/out/_next/static/chunks/1871-7e202677c42f43fe.js +6 -0
  107. sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +1 -0
  108. sky/dashboard/out/_next/static/chunks/2350.fab69e61bac57b23.js +1 -0
  109. sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +15 -0
  110. sky/dashboard/out/_next/static/chunks/2755.edd818326d489a1d.js +26 -0
  111. sky/dashboard/out/_next/static/chunks/3294.20a8540fe697d5ee.js +1 -0
  112. sky/dashboard/out/_next/static/chunks/3785.7e245f318f9d1121.js +1 -0
  113. sky/dashboard/out/_next/static/chunks/3800-7b45f9fbb6308557.js +1 -0
  114. sky/dashboard/out/_next/static/chunks/3850-ff4a9a69d978632b.js +1 -0
  115. sky/dashboard/out/_next/static/chunks/3937.210053269f121201.js +1 -0
  116. sky/dashboard/out/_next/static/chunks/4725.172ede95d1b21022.js +1 -0
  117. sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +15 -0
  118. sky/dashboard/out/_next/static/chunks/5739-d67458fcb1386c92.js +8 -0
  119. sky/dashboard/out/_next/static/chunks/6130-2be46d70a38f1e82.js +1 -0
  120. sky/dashboard/out/_next/static/chunks/616-3d59f75e2ccf9321.js +39 -0
  121. sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +13 -0
  122. sky/dashboard/out/_next/static/chunks/6856-8f27d1c10c98def8.js +1 -0
  123. sky/dashboard/out/_next/static/chunks/6989-01359c57e018caa4.js +1 -0
  124. sky/dashboard/out/_next/static/chunks/6990-9146207c4567fdfd.js +1 -0
  125. sky/dashboard/out/_next/static/chunks/7359-c8d04e06886000b3.js +30 -0
  126. sky/dashboard/out/_next/static/chunks/7411-b15471acd2cba716.js +41 -0
  127. sky/dashboard/out/_next/static/chunks/7615-019513abc55b3b47.js +1 -0
  128. sky/dashboard/out/_next/static/chunks/8640.5b9475a2d18c5416.js +16 -0
  129. sky/dashboard/out/_next/static/chunks/8969-452f9d5cbdd2dc73.js +1 -0
  130. sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +6 -0
  131. sky/dashboard/out/_next/static/chunks/9353-cff34f7e773b2e2b.js +1 -0
  132. sky/dashboard/out/_next/static/chunks/9360.a536cf6b1fa42355.js +31 -0
  133. sky/dashboard/out/_next/static/chunks/9847.3aaca6bb33455140.js +30 -0
  134. sky/dashboard/out/_next/static/chunks/fd9d1056-86323a29a8f7e46a.js +1 -0
  135. sky/dashboard/out/_next/static/chunks/framework-cf60a09ccd051a10.js +33 -0
  136. sky/dashboard/out/_next/static/chunks/main-app-587214043926b3cc.js +1 -0
  137. sky/dashboard/out/_next/static/chunks/main-f15ccb73239a3bf1.js +1 -0
  138. sky/dashboard/out/_next/static/chunks/pages/_app-bde01e4a2beec258.js +34 -0
  139. sky/dashboard/out/_next/static/chunks/pages/_error-c66a4e8afc46f17b.js +1 -0
  140. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-792db96d918c98c9.js +16 -0
  141. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-abfcac9c137aa543.js +1 -0
  142. sky/dashboard/out/_next/static/chunks/pages/clusters-ee39056f9851a3ff.js +1 -0
  143. sky/dashboard/out/_next/static/chunks/pages/config-dfb9bf07b13045f4.js +1 -0
  144. sky/dashboard/out/_next/static/chunks/pages/index-444f1804401f04ea.js +1 -0
  145. sky/dashboard/out/_next/static/chunks/pages/infra/[context]-c0b5935149902e6f.js +1 -0
  146. sky/dashboard/out/_next/static/chunks/pages/infra-aed0ea19df7cf961.js +1 -0
  147. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-d66997e2bfc837cf.js +16 -0
  148. sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-9faf940b253e3e06.js +21 -0
  149. sky/dashboard/out/_next/static/chunks/pages/jobs-2072b48b617989c9.js +1 -0
  150. sky/dashboard/out/_next/static/chunks/pages/users-f42674164aa73423.js +1 -0
  151. sky/dashboard/out/_next/static/chunks/pages/volumes-b84b948ff357c43e.js +1 -0
  152. sky/dashboard/out/_next/static/chunks/pages/workspace/new-3f88a1c7e86a3f86.js +1 -0
  153. sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-84a40f8c7c627fe4.js +1 -0
  154. sky/dashboard/out/_next/static/chunks/pages/workspaces-531b2f8c4bf89f82.js +1 -0
  155. sky/dashboard/out/_next/static/chunks/webpack-64e05f17bf2cf8ce.js +1 -0
  156. sky/dashboard/out/_next/static/css/0748ce22df867032.css +3 -0
  157. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  158. sky/dashboard/out/clusters/[cluster].html +1 -1
  159. sky/dashboard/out/clusters.html +1 -1
  160. sky/dashboard/out/config.html +1 -0
  161. sky/dashboard/out/index.html +1 -1
  162. sky/dashboard/out/infra/[context].html +1 -0
  163. sky/dashboard/out/infra.html +1 -0
  164. sky/dashboard/out/jobs/[job].html +1 -1
  165. sky/dashboard/out/jobs/pools/[pool].html +1 -0
  166. sky/dashboard/out/jobs.html +1 -1
  167. sky/dashboard/out/users.html +1 -0
  168. sky/dashboard/out/volumes.html +1 -0
  169. sky/dashboard/out/workspace/new.html +1 -0
  170. sky/dashboard/out/workspaces/[name].html +1 -0
  171. sky/dashboard/out/workspaces.html +1 -0
  172. sky/data/data_utils.py +137 -1
  173. sky/data/mounting_utils.py +269 -84
  174. sky/data/storage.py +1460 -1807
  175. sky/data/storage_utils.py +43 -57
  176. sky/exceptions.py +126 -2
  177. sky/execution.py +216 -63
  178. sky/global_user_state.py +2390 -586
  179. sky/jobs/__init__.py +7 -0
  180. sky/jobs/client/sdk.py +300 -58
  181. sky/jobs/client/sdk_async.py +161 -0
  182. sky/jobs/constants.py +15 -8
  183. sky/jobs/controller.py +848 -275
  184. sky/jobs/file_content_utils.py +128 -0
  185. sky/jobs/log_gc.py +193 -0
  186. sky/jobs/recovery_strategy.py +402 -152
  187. sky/jobs/scheduler.py +314 -189
  188. sky/jobs/server/core.py +836 -255
  189. sky/jobs/server/server.py +156 -115
  190. sky/jobs/server/utils.py +136 -0
  191. sky/jobs/state.py +2109 -706
  192. sky/jobs/utils.py +1306 -215
  193. sky/logs/__init__.py +21 -0
  194. sky/logs/agent.py +108 -0
  195. sky/logs/aws.py +243 -0
  196. sky/logs/gcp.py +91 -0
  197. sky/metrics/__init__.py +0 -0
  198. sky/metrics/utils.py +453 -0
  199. sky/models.py +78 -1
  200. sky/optimizer.py +164 -70
  201. sky/provision/__init__.py +90 -4
  202. sky/provision/aws/config.py +147 -26
  203. sky/provision/aws/instance.py +136 -50
  204. sky/provision/azure/instance.py +11 -6
  205. sky/provision/common.py +13 -1
  206. sky/provision/cudo/cudo_machine_type.py +1 -1
  207. sky/provision/cudo/cudo_utils.py +14 -8
  208. sky/provision/cudo/cudo_wrapper.py +72 -71
  209. sky/provision/cudo/instance.py +10 -6
  210. sky/provision/do/instance.py +10 -6
  211. sky/provision/do/utils.py +4 -3
  212. sky/provision/docker_utils.py +140 -33
  213. sky/provision/fluidstack/instance.py +13 -8
  214. sky/provision/gcp/__init__.py +1 -0
  215. sky/provision/gcp/config.py +301 -19
  216. sky/provision/gcp/constants.py +218 -0
  217. sky/provision/gcp/instance.py +36 -8
  218. sky/provision/gcp/instance_utils.py +18 -4
  219. sky/provision/gcp/volume_utils.py +247 -0
  220. sky/provision/hyperbolic/__init__.py +12 -0
  221. sky/provision/hyperbolic/config.py +10 -0
  222. sky/provision/hyperbolic/instance.py +437 -0
  223. sky/provision/hyperbolic/utils.py +373 -0
  224. sky/provision/instance_setup.py +101 -20
  225. sky/provision/kubernetes/__init__.py +5 -0
  226. sky/provision/kubernetes/config.py +9 -52
  227. sky/provision/kubernetes/constants.py +17 -0
  228. sky/provision/kubernetes/instance.py +919 -280
  229. sky/provision/kubernetes/manifests/fusermount-server-daemonset.yaml +1 -2
  230. sky/provision/kubernetes/network.py +27 -17
  231. sky/provision/kubernetes/network_utils.py +44 -43
  232. sky/provision/kubernetes/utils.py +1221 -534
  233. sky/provision/kubernetes/volume.py +343 -0
  234. sky/provision/lambda_cloud/instance.py +22 -16
  235. sky/provision/nebius/constants.py +50 -0
  236. sky/provision/nebius/instance.py +19 -6
  237. sky/provision/nebius/utils.py +237 -137
  238. sky/provision/oci/instance.py +10 -5
  239. sky/provision/paperspace/instance.py +10 -7
  240. sky/provision/paperspace/utils.py +1 -1
  241. sky/provision/primeintellect/__init__.py +10 -0
  242. sky/provision/primeintellect/config.py +11 -0
  243. sky/provision/primeintellect/instance.py +454 -0
  244. sky/provision/primeintellect/utils.py +398 -0
  245. sky/provision/provisioner.py +117 -36
  246. sky/provision/runpod/__init__.py +5 -0
  247. sky/provision/runpod/instance.py +27 -6
  248. sky/provision/runpod/utils.py +51 -18
  249. sky/provision/runpod/volume.py +214 -0
  250. sky/provision/scp/__init__.py +15 -0
  251. sky/provision/scp/config.py +93 -0
  252. sky/provision/scp/instance.py +707 -0
  253. sky/provision/seeweb/__init__.py +11 -0
  254. sky/provision/seeweb/config.py +13 -0
  255. sky/provision/seeweb/instance.py +812 -0
  256. sky/provision/shadeform/__init__.py +11 -0
  257. sky/provision/shadeform/config.py +12 -0
  258. sky/provision/shadeform/instance.py +351 -0
  259. sky/provision/shadeform/shadeform_utils.py +83 -0
  260. sky/provision/ssh/__init__.py +18 -0
  261. sky/provision/vast/instance.py +13 -8
  262. sky/provision/vast/utils.py +10 -7
  263. sky/provision/volume.py +164 -0
  264. sky/provision/vsphere/common/ssl_helper.py +1 -1
  265. sky/provision/vsphere/common/vapiconnect.py +2 -1
  266. sky/provision/vsphere/common/vim_utils.py +4 -4
  267. sky/provision/vsphere/instance.py +15 -10
  268. sky/provision/vsphere/vsphere_utils.py +17 -20
  269. sky/py.typed +0 -0
  270. sky/resources.py +845 -119
  271. sky/schemas/__init__.py +0 -0
  272. sky/schemas/api/__init__.py +0 -0
  273. sky/schemas/api/responses.py +227 -0
  274. sky/schemas/db/README +4 -0
  275. sky/schemas/db/env.py +90 -0
  276. sky/schemas/db/global_user_state/001_initial_schema.py +124 -0
  277. sky/schemas/db/global_user_state/002_add_workspace_to_cluster_history.py +35 -0
  278. sky/schemas/db/global_user_state/003_fix_initial_revision.py +61 -0
  279. sky/schemas/db/global_user_state/004_is_managed.py +34 -0
  280. sky/schemas/db/global_user_state/005_cluster_event.py +32 -0
  281. sky/schemas/db/global_user_state/006_provision_log.py +41 -0
  282. sky/schemas/db/global_user_state/007_cluster_event_request_id.py +34 -0
  283. sky/schemas/db/global_user_state/008_skylet_ssh_tunnel_metadata.py +34 -0
  284. sky/schemas/db/global_user_state/009_last_activity_and_launched_at.py +89 -0
  285. sky/schemas/db/global_user_state/010_save_ssh_key.py +66 -0
  286. sky/schemas/db/global_user_state/011_is_ephemeral.py +34 -0
  287. sky/schemas/db/kv_cache/001_initial_schema.py +29 -0
  288. sky/schemas/db/script.py.mako +28 -0
  289. sky/schemas/db/serve_state/001_initial_schema.py +67 -0
  290. sky/schemas/db/serve_state/002_yaml_content.py +34 -0
  291. sky/schemas/db/skypilot_config/001_initial_schema.py +30 -0
  292. sky/schemas/db/spot_jobs/001_initial_schema.py +97 -0
  293. sky/schemas/db/spot_jobs/002_cluster_pool.py +42 -0
  294. sky/schemas/db/spot_jobs/003_pool_hash.py +34 -0
  295. sky/schemas/db/spot_jobs/004_job_file_contents.py +42 -0
  296. sky/schemas/db/spot_jobs/005_logs_gc.py +38 -0
  297. sky/schemas/db/spot_jobs/006_controller_pid_started_at.py +34 -0
  298. sky/schemas/db/spot_jobs/007_config_file_content.py +34 -0
  299. sky/schemas/generated/__init__.py +0 -0
  300. sky/schemas/generated/autostopv1_pb2.py +36 -0
  301. sky/schemas/generated/autostopv1_pb2.pyi +43 -0
  302. sky/schemas/generated/autostopv1_pb2_grpc.py +146 -0
  303. sky/schemas/generated/jobsv1_pb2.py +86 -0
  304. sky/schemas/generated/jobsv1_pb2.pyi +254 -0
  305. sky/schemas/generated/jobsv1_pb2_grpc.py +542 -0
  306. sky/schemas/generated/managed_jobsv1_pb2.py +76 -0
  307. sky/schemas/generated/managed_jobsv1_pb2.pyi +278 -0
  308. sky/schemas/generated/managed_jobsv1_pb2_grpc.py +278 -0
  309. sky/schemas/generated/servev1_pb2.py +58 -0
  310. sky/schemas/generated/servev1_pb2.pyi +115 -0
  311. sky/schemas/generated/servev1_pb2_grpc.py +322 -0
  312. sky/serve/autoscalers.py +357 -5
  313. sky/serve/client/impl.py +310 -0
  314. sky/serve/client/sdk.py +47 -139
  315. sky/serve/client/sdk_async.py +130 -0
  316. sky/serve/constants.py +12 -9
  317. sky/serve/controller.py +68 -17
  318. sky/serve/load_balancer.py +106 -60
  319. sky/serve/load_balancing_policies.py +116 -2
  320. sky/serve/replica_managers.py +434 -249
  321. sky/serve/serve_rpc_utils.py +179 -0
  322. sky/serve/serve_state.py +569 -257
  323. sky/serve/serve_utils.py +775 -265
  324. sky/serve/server/core.py +66 -711
  325. sky/serve/server/impl.py +1093 -0
  326. sky/serve/server/server.py +21 -18
  327. sky/serve/service.py +192 -89
  328. sky/serve/service_spec.py +144 -20
  329. sky/serve/spot_placer.py +3 -0
  330. sky/server/auth/__init__.py +0 -0
  331. sky/server/auth/authn.py +50 -0
  332. sky/server/auth/loopback.py +38 -0
  333. sky/server/auth/oauth2_proxy.py +202 -0
  334. sky/server/common.py +478 -182
  335. sky/server/config.py +85 -23
  336. sky/server/constants.py +44 -6
  337. sky/server/daemons.py +295 -0
  338. sky/server/html/token_page.html +185 -0
  339. sky/server/metrics.py +160 -0
  340. sky/server/middleware_utils.py +166 -0
  341. sky/server/requests/executor.py +558 -138
  342. sky/server/requests/payloads.py +364 -24
  343. sky/server/requests/preconditions.py +21 -17
  344. sky/server/requests/process.py +112 -29
  345. sky/server/requests/request_names.py +121 -0
  346. sky/server/requests/requests.py +822 -226
  347. sky/server/requests/serializers/decoders.py +82 -31
  348. sky/server/requests/serializers/encoders.py +140 -22
  349. sky/server/requests/threads.py +117 -0
  350. sky/server/rest.py +455 -0
  351. sky/server/server.py +1309 -285
  352. sky/server/state.py +20 -0
  353. sky/server/stream_utils.py +327 -61
  354. sky/server/uvicorn.py +217 -3
  355. sky/server/versions.py +270 -0
  356. sky/setup_files/MANIFEST.in +11 -1
  357. sky/setup_files/alembic.ini +160 -0
  358. sky/setup_files/dependencies.py +139 -31
  359. sky/setup_files/setup.py +44 -42
  360. sky/sky_logging.py +114 -7
  361. sky/skylet/attempt_skylet.py +106 -24
  362. sky/skylet/autostop_lib.py +129 -8
  363. sky/skylet/configs.py +29 -20
  364. sky/skylet/constants.py +216 -25
  365. sky/skylet/events.py +101 -21
  366. sky/skylet/job_lib.py +345 -164
  367. sky/skylet/log_lib.py +297 -18
  368. sky/skylet/log_lib.pyi +44 -1
  369. sky/skylet/providers/ibm/node_provider.py +12 -8
  370. sky/skylet/providers/ibm/vpc_provider.py +13 -12
  371. sky/skylet/ray_patches/__init__.py +17 -3
  372. sky/skylet/ray_patches/autoscaler.py.diff +18 -0
  373. sky/skylet/ray_patches/cli.py.diff +19 -0
  374. sky/skylet/ray_patches/command_runner.py.diff +17 -0
  375. sky/skylet/ray_patches/log_monitor.py.diff +20 -0
  376. sky/skylet/ray_patches/resource_demand_scheduler.py.diff +32 -0
  377. sky/skylet/ray_patches/updater.py.diff +18 -0
  378. sky/skylet/ray_patches/worker.py.diff +41 -0
  379. sky/skylet/runtime_utils.py +21 -0
  380. sky/skylet/services.py +568 -0
  381. sky/skylet/skylet.py +72 -4
  382. sky/skylet/subprocess_daemon.py +104 -29
  383. sky/skypilot_config.py +506 -99
  384. sky/ssh_node_pools/__init__.py +1 -0
  385. sky/ssh_node_pools/core.py +135 -0
  386. sky/ssh_node_pools/server.py +233 -0
  387. sky/task.py +685 -163
  388. sky/templates/aws-ray.yml.j2 +11 -3
  389. sky/templates/azure-ray.yml.j2 +2 -1
  390. sky/templates/cudo-ray.yml.j2 +1 -0
  391. sky/templates/do-ray.yml.j2 +2 -1
  392. sky/templates/fluidstack-ray.yml.j2 +1 -0
  393. sky/templates/gcp-ray.yml.j2 +62 -1
  394. sky/templates/hyperbolic-ray.yml.j2 +68 -0
  395. sky/templates/ibm-ray.yml.j2 +2 -1
  396. sky/templates/jobs-controller.yaml.j2 +27 -24
  397. sky/templates/kubernetes-loadbalancer.yml.j2 +2 -0
  398. sky/templates/kubernetes-ray.yml.j2 +611 -50
  399. sky/templates/lambda-ray.yml.j2 +2 -1
  400. sky/templates/nebius-ray.yml.j2 +34 -12
  401. sky/templates/oci-ray.yml.j2 +1 -0
  402. sky/templates/paperspace-ray.yml.j2 +2 -1
  403. sky/templates/primeintellect-ray.yml.j2 +72 -0
  404. sky/templates/runpod-ray.yml.j2 +10 -1
  405. sky/templates/scp-ray.yml.j2 +4 -50
  406. sky/templates/seeweb-ray.yml.j2 +171 -0
  407. sky/templates/shadeform-ray.yml.j2 +73 -0
  408. sky/templates/sky-serve-controller.yaml.j2 +22 -2
  409. sky/templates/vast-ray.yml.j2 +1 -0
  410. sky/templates/vsphere-ray.yml.j2 +1 -0
  411. sky/templates/websocket_proxy.py +212 -37
  412. sky/usage/usage_lib.py +31 -15
  413. sky/users/__init__.py +0 -0
  414. sky/users/model.conf +15 -0
  415. sky/users/permission.py +397 -0
  416. sky/users/rbac.py +121 -0
  417. sky/users/server.py +720 -0
  418. sky/users/token_service.py +218 -0
  419. sky/utils/accelerator_registry.py +35 -5
  420. sky/utils/admin_policy_utils.py +84 -38
  421. sky/utils/annotations.py +38 -5
  422. sky/utils/asyncio_utils.py +78 -0
  423. sky/utils/atomic.py +1 -1
  424. sky/utils/auth_utils.py +153 -0
  425. sky/utils/benchmark_utils.py +60 -0
  426. sky/utils/cli_utils/status_utils.py +159 -86
  427. sky/utils/cluster_utils.py +31 -9
  428. sky/utils/command_runner.py +354 -68
  429. sky/utils/command_runner.pyi +93 -3
  430. sky/utils/common.py +35 -8
  431. sky/utils/common_utils.py +314 -91
  432. sky/utils/config_utils.py +74 -5
  433. sky/utils/context.py +403 -0
  434. sky/utils/context_utils.py +242 -0
  435. sky/utils/controller_utils.py +383 -89
  436. sky/utils/dag_utils.py +31 -12
  437. sky/utils/db/__init__.py +0 -0
  438. sky/utils/db/db_utils.py +485 -0
  439. sky/utils/db/kv_cache.py +149 -0
  440. sky/utils/db/migration_utils.py +137 -0
  441. sky/utils/directory_utils.py +12 -0
  442. sky/utils/env_options.py +13 -0
  443. sky/utils/git.py +567 -0
  444. sky/utils/git_clone.sh +460 -0
  445. sky/utils/infra_utils.py +195 -0
  446. sky/utils/kubernetes/cleanup-tunnel.sh +62 -0
  447. sky/utils/kubernetes/config_map_utils.py +133 -0
  448. sky/utils/kubernetes/create_cluster.sh +15 -29
  449. sky/utils/kubernetes/delete_cluster.sh +10 -7
  450. sky/utils/kubernetes/deploy_ssh_node_pools.py +1177 -0
  451. sky/utils/kubernetes/exec_kubeconfig_converter.py +22 -31
  452. sky/utils/kubernetes/generate_kind_config.py +6 -66
  453. sky/utils/kubernetes/generate_kubeconfig.sh +4 -1
  454. sky/utils/kubernetes/gpu_labeler.py +18 -8
  455. sky/utils/kubernetes/k8s_gpu_labeler_job.yaml +2 -1
  456. sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml +16 -16
  457. sky/utils/kubernetes/kubernetes_deploy_utils.py +284 -114
  458. sky/utils/kubernetes/rsync_helper.sh +11 -3
  459. sky/utils/kubernetes/ssh-tunnel.sh +379 -0
  460. sky/utils/kubernetes/ssh_utils.py +221 -0
  461. sky/utils/kubernetes_enums.py +8 -15
  462. sky/utils/lock_events.py +94 -0
  463. sky/utils/locks.py +416 -0
  464. sky/utils/log_utils.py +82 -107
  465. sky/utils/perf_utils.py +22 -0
  466. sky/utils/resource_checker.py +298 -0
  467. sky/utils/resources_utils.py +249 -32
  468. sky/utils/rich_utils.py +217 -39
  469. sky/utils/schemas.py +955 -160
  470. sky/utils/serialize_utils.py +16 -0
  471. sky/utils/status_lib.py +10 -0
  472. sky/utils/subprocess_utils.py +29 -15
  473. sky/utils/tempstore.py +70 -0
  474. sky/utils/thread_utils.py +91 -0
  475. sky/utils/timeline.py +26 -53
  476. sky/utils/ux_utils.py +84 -15
  477. sky/utils/validator.py +11 -1
  478. sky/utils/volume.py +165 -0
  479. sky/utils/yaml_utils.py +111 -0
  480. sky/volumes/__init__.py +13 -0
  481. sky/volumes/client/__init__.py +0 -0
  482. sky/volumes/client/sdk.py +150 -0
  483. sky/volumes/server/__init__.py +0 -0
  484. sky/volumes/server/core.py +270 -0
  485. sky/volumes/server/server.py +124 -0
  486. sky/volumes/volume.py +215 -0
  487. sky/workspaces/__init__.py +0 -0
  488. sky/workspaces/core.py +655 -0
  489. sky/workspaces/server.py +101 -0
  490. sky/workspaces/utils.py +56 -0
  491. sky_templates/README.md +3 -0
  492. sky_templates/__init__.py +3 -0
  493. sky_templates/ray/__init__.py +0 -0
  494. sky_templates/ray/start_cluster +183 -0
  495. sky_templates/ray/stop_cluster +75 -0
  496. skypilot_nightly-1.0.0.dev20251203.dist-info/METADATA +676 -0
  497. skypilot_nightly-1.0.0.dev20251203.dist-info/RECORD +611 -0
  498. {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/WHEEL +1 -1
  499. skypilot_nightly-1.0.0.dev20251203.dist-info/top_level.txt +2 -0
  500. sky/benchmark/benchmark_state.py +0 -256
  501. sky/benchmark/benchmark_utils.py +0 -641
  502. sky/clouds/service_catalog/constants.py +0 -7
  503. sky/dashboard/out/_next/static/GWvVBSCS7FmUiVmjaL1a7/_buildManifest.js +0 -1
  504. sky/dashboard/out/_next/static/chunks/236-2db3ee3fba33dd9e.js +0 -6
  505. sky/dashboard/out/_next/static/chunks/312-c3c8845990db8ffc.js +0 -15
  506. sky/dashboard/out/_next/static/chunks/37-0a572fe0dbb89c4d.js +0 -6
  507. sky/dashboard/out/_next/static/chunks/678-206dddca808e6d16.js +0 -59
  508. sky/dashboard/out/_next/static/chunks/845-9e60713e0c441abc.js +0 -1
  509. sky/dashboard/out/_next/static/chunks/979-7bf73a4c7cea0f5c.js +0 -1
  510. sky/dashboard/out/_next/static/chunks/fd9d1056-2821b0f0cabcd8bd.js +0 -1
  511. sky/dashboard/out/_next/static/chunks/framework-87d061ee6ed71b28.js +0 -33
  512. sky/dashboard/out/_next/static/chunks/main-app-241eb28595532291.js +0 -1
  513. sky/dashboard/out/_next/static/chunks/main-e0e2335212e72357.js +0 -1
  514. sky/dashboard/out/_next/static/chunks/pages/_app-e6b013bc3f77ad60.js +0 -1
  515. sky/dashboard/out/_next/static/chunks/pages/_error-1be831200e60c5c0.js +0 -1
  516. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-6ac338bc2239cb45.js +0 -1
  517. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-f383db7389368ea7.js +0 -1
  518. sky/dashboard/out/_next/static/chunks/pages/clusters-a93b93e10b8b074e.js +0 -1
  519. sky/dashboard/out/_next/static/chunks/pages/index-f9f039532ca8cbc4.js +0 -1
  520. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-1c519e1afc523dc9.js +0 -1
  521. sky/dashboard/out/_next/static/chunks/pages/jobs-a75029b67aab6a2e.js +0 -1
  522. sky/dashboard/out/_next/static/chunks/webpack-830f59b8404e96b8.js +0 -1
  523. sky/dashboard/out/_next/static/css/c6933bbb2ce7f4dd.css +0 -3
  524. sky/jobs/dashboard/dashboard.py +0 -223
  525. sky/jobs/dashboard/static/favicon.ico +0 -0
  526. sky/jobs/dashboard/templates/index.html +0 -831
  527. sky/jobs/server/dashboard_utils.py +0 -69
  528. sky/skylet/providers/scp/__init__.py +0 -2
  529. sky/skylet/providers/scp/config.py +0 -149
  530. sky/skylet/providers/scp/node_provider.py +0 -578
  531. sky/templates/kubernetes-ssh-jump.yml.j2 +0 -94
  532. sky/utils/db_utils.py +0 -100
  533. sky/utils/kubernetes/deploy_remote_cluster.sh +0 -308
  534. sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +0 -191
  535. skypilot_nightly-1.0.0.dev20250502.dist-info/METADATA +0 -361
  536. skypilot_nightly-1.0.0.dev20250502.dist-info/RECORD +0 -396
  537. skypilot_nightly-1.0.0.dev20250502.dist-info/top_level.txt +0 -1
  538. /sky/{clouds/service_catalog → catalog}/config.py +0 -0
  539. /sky/{benchmark → catalog/data_fetchers}/__init__.py +0 -0
  540. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_azure.py +0 -0
  541. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_fluidstack.py +0 -0
  542. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_ibm.py +0 -0
  543. /sky/{clouds/service_catalog/data_fetchers → client/cli}/__init__.py +0 -0
  544. /sky/dashboard/out/_next/static/{GWvVBSCS7FmUiVmjaL1a7 → 96_E2yl3QAiIJGOYCkSpB}/_ssgManifest.js +0 -0
  545. {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/entry_points.txt +0 -0
  546. {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,293 @@
1
+ """Hyperbolic Cloud provider implementation
2
+ for SkyPilot.
3
+ """
4
+ import os
5
+ import typing
6
+ from typing import Any, Dict, List, Optional, Tuple, Union
7
+
8
+ from sky import catalog
9
+ from sky import clouds
10
+ from sky.utils import registry
11
+ from sky.utils import resources_utils
12
+ from sky.utils.resources_utils import DiskTier
13
+
14
+ if typing.TYPE_CHECKING:
15
+ from sky import resources as resources_lib
16
+ from sky.utils import volume as volume_lib
17
+
18
+
19
+ @registry.CLOUD_REGISTRY.register
20
+ class Hyperbolic(clouds.Cloud):
21
+ """Hyperbolic Cloud Provider."""
22
+
23
+ _REPR = 'Hyperbolic'
24
+ name = 'hyperbolic'
25
+ _MAX_CLUSTER_NAME_LEN_LIMIT = 120
26
+ API_KEY_PATH = os.path.expanduser('~/.hyperbolic/api_key')
27
+
28
+ _CLOUD_UNSUPPORTED_FEATURES = {
29
+ clouds.CloudImplementationFeatures.STOP: ('Stopping not supported.'),
30
+ clouds.CloudImplementationFeatures.MULTI_NODE:
31
+ ('Multi-node not supported.'),
32
+ clouds.CloudImplementationFeatures.CUSTOM_DISK_TIER:
33
+ ('Custom disk tiers not supported.'),
34
+ clouds.CloudImplementationFeatures.STORAGE_MOUNTING:
35
+ ('Storage mounting not supported.'),
36
+ clouds.CloudImplementationFeatures.HIGH_AVAILABILITY_CONTROLLERS:
37
+ ('High availability controllers not supported.'),
38
+ clouds.CloudImplementationFeatures.SPOT_INSTANCE:
39
+ ('Spot instances not supported.'),
40
+ clouds.CloudImplementationFeatures.CLONE_DISK_FROM_CLUSTER:
41
+ ('Disk cloning not supported.'),
42
+ clouds.CloudImplementationFeatures.DOCKER_IMAGE:
43
+ ('Docker images not supported.'),
44
+ clouds.CloudImplementationFeatures.OPEN_PORTS:
45
+ ('Opening ports not supported.'),
46
+ clouds.CloudImplementationFeatures.IMAGE_ID:
47
+ ('Custom image IDs not supported.'),
48
+ clouds.CloudImplementationFeatures.CUSTOM_NETWORK_TIER:
49
+ ('Custom network tiers not supported.'),
50
+ clouds.CloudImplementationFeatures.HOST_CONTROLLERS:
51
+ ('Host controllers not supported.'),
52
+ clouds.CloudImplementationFeatures.AUTO_TERMINATE:
53
+ ('Auto-termination not supported.'),
54
+ clouds.CloudImplementationFeatures.AUTOSTOP:
55
+ ('Auto-stop not supported.'),
56
+ clouds.CloudImplementationFeatures.AUTODOWN:
57
+ ('Auto-down not supported.'),
58
+ clouds.CloudImplementationFeatures.CUSTOM_MULTI_NETWORK:
59
+ ('Customized multiple network interfaces not supported.'),
60
+ }
61
+
62
+ PROVISIONER_VERSION = clouds.ProvisionerVersion.SKYPILOT
63
+ STATUS_VERSION = clouds.StatusVersion.SKYPILOT
64
+ OPEN_PORTS_VERSION = clouds.OpenPortsVersion.LAUNCH_ONLY
65
+
66
+ @classmethod
67
+ def _unsupported_features_for_resources(
68
+ cls,
69
+ resources: 'resources_lib.Resources',
70
+ region: Optional[str] = None,
71
+ ) -> Dict[clouds.CloudImplementationFeatures, str]:
72
+ del resources
73
+ return cls._CLOUD_UNSUPPORTED_FEATURES
74
+
75
+ @classmethod
76
+ def _max_cluster_name_length(cls) -> Optional[int]:
77
+ return cls._MAX_CLUSTER_NAME_LEN_LIMIT
78
+
79
+ def instance_type_exists(self, instance_type: str) -> bool:
80
+ return catalog.instance_type_exists(instance_type, 'hyperbolic')
81
+
82
+ @classmethod
83
+ def regions_with_offering(
84
+ cls,
85
+ instance_type: str,
86
+ accelerators: Optional[Dict[str, int]],
87
+ use_spot: bool,
88
+ region: Optional[str],
89
+ zone: Optional[str],
90
+ resources: Optional['resources_lib.Resources'] = None,
91
+ ) -> List[clouds.Region]:
92
+ assert zone is None, 'Hyperbolic does not support zones.'
93
+ del accelerators, zone # unused
94
+
95
+ regions = catalog.get_region_zones_for_instance_type(
96
+ instance_type, use_spot, 'hyperbolic')
97
+ if region is not None:
98
+ regions = [r for r in regions if r.name == region]
99
+ return regions
100
+
101
+ @classmethod
102
+ def get_vcpus_mem_from_instance_type(
103
+ cls, instance_type: str) -> Tuple[Optional[float], Optional[float]]:
104
+ return catalog.get_vcpus_mem_from_instance_type(instance_type,
105
+ clouds='hyperbolic')
106
+
107
+ def instance_type_to_hourly_cost(self,
108
+ instance_type: str,
109
+ use_spot: bool,
110
+ region: Optional[str] = None,
111
+ zone: Optional[str] = None) -> float:
112
+ return catalog.get_hourly_cost(instance_type,
113
+ use_spot=use_spot,
114
+ region=region,
115
+ zone=zone,
116
+ clouds='hyperbolic')
117
+
118
+ @classmethod
119
+ def get_default_instance_type(cls,
120
+ cpus: Optional[str] = None,
121
+ memory: Optional[str] = None,
122
+ disk_tier: Optional[DiskTier] = None,
123
+ region: Optional[str] = None,
124
+ zone: Optional[str] = None) -> Optional[str]:
125
+ return catalog.get_default_instance_type(cpus=cpus,
126
+ memory=memory,
127
+ disk_tier=disk_tier,
128
+ region=region,
129
+ zone=zone,
130
+ clouds='hyperbolic')
131
+
132
+ @classmethod
133
+ def get_accelerators_from_instance_type(
134
+ cls, instance_type: str) -> Optional[Dict[str, Union[int, float]]]:
135
+ return catalog.get_accelerators_from_instance_type(instance_type,
136
+ clouds='hyperbolic')
137
+
138
+ @classmethod
139
+ def _check_credentials(cls) -> Tuple[bool, Optional[str]]:
140
+ if os.path.exists(cls.API_KEY_PATH):
141
+ return True, None
142
+ return False, f'API key not found at {cls.API_KEY_PATH}'
143
+
144
+ @classmethod
145
+ def _check_compute_credentials(cls) -> Tuple[bool, Optional[str]]:
146
+ return cls._check_credentials()
147
+
148
+ @classmethod
149
+ def get_credential_file_mounts(cls) -> Dict[str, str]:
150
+ if os.path.exists(cls.API_KEY_PATH):
151
+ return {cls.API_KEY_PATH: '~/.hyperbolic/api_key'}
152
+ return {}
153
+
154
+ def __repr__(self):
155
+ return self._REPR
156
+
157
+ def _get_feasible_launchable_resources(
158
+ self, resources: 'resources_lib.Resources'
159
+ ) -> 'resources_utils.FeasibleResources':
160
+ # Check if the instance type exists in the catalog
161
+ if resources.instance_type is not None:
162
+ if catalog.instance_type_exists(resources.instance_type,
163
+ 'hyperbolic'):
164
+ # Remove accelerators for launchable resources
165
+ resources_launch = resources.copy(accelerators=None)
166
+ return resources_utils.FeasibleResources([resources_launch], [],
167
+ None)
168
+ else:
169
+ raise ValueError(
170
+ f'Invalid instance type: {resources.instance_type}')
171
+
172
+ # If accelerators are specified
173
+ accelerators = resources.accelerators
174
+ if accelerators is not None:
175
+ assert len(accelerators) == 1, resources
176
+ acc, acc_count = list(accelerators.items())[0]
177
+ (instance_list,
178
+ fuzzy_candidate_list) = catalog.get_instance_type_for_accelerator(
179
+ acc,
180
+ acc_count,
181
+ use_spot=resources.use_spot,
182
+ cpus=resources.cpus,
183
+ memory=resources.memory,
184
+ region=resources.region,
185
+ zone=resources.zone,
186
+ clouds='hyperbolic')
187
+ if instance_list is None:
188
+ return resources_utils.FeasibleResources([],
189
+ fuzzy_candidate_list,
190
+ None)
191
+
192
+ def _make(instance_list):
193
+ resource_list = []
194
+ for instance_type in instance_list:
195
+ r = resources.copy(
196
+ cloud=self,
197
+ instance_type=instance_type,
198
+ accelerators=None,
199
+ cpus=None,
200
+ memory=None,
201
+ )
202
+ resource_list.append(r)
203
+ return resource_list
204
+
205
+ return resources_utils.FeasibleResources(_make(instance_list),
206
+ fuzzy_candidate_list, None)
207
+
208
+ # If nothing is specified, return a default instance type
209
+ default_instance_type = self.get_default_instance_type(
210
+ cpus=resources.cpus,
211
+ memory=resources.memory,
212
+ disk_tier=resources.disk_tier,
213
+ region=resources.region,
214
+ zone=resources.zone)
215
+ if default_instance_type is None:
216
+ return resources_utils.FeasibleResources([], [], None)
217
+ else:
218
+ r = resources.copy(
219
+ cloud=self,
220
+ instance_type=default_instance_type,
221
+ accelerators=None,
222
+ cpus=None,
223
+ memory=None,
224
+ )
225
+ return resources_utils.FeasibleResources([r], [], None)
226
+
227
+ def validate_region_zone(
228
+ self, region: Optional[str],
229
+ zone: Optional[str]) -> Tuple[Optional[str], Optional[str]]:
230
+ if zone is not None:
231
+ raise ValueError('Hyperbolic does not support zones.')
232
+ return catalog.validate_region_zone(region, zone, 'hyperbolic')
233
+
234
+ @classmethod
235
+ def regions(cls) -> List[clouds.Region]:
236
+ """Returns the list of regions in Hyperbolic's catalog."""
237
+ return catalog.regions('hyperbolic')
238
+
239
+ @classmethod
240
+ def zones_provision_loop(cls,
241
+ *,
242
+ region: str,
243
+ num_nodes: int,
244
+ instance_type: str,
245
+ accelerators: Optional[Dict[str, int]] = None,
246
+ use_spot: bool = False):
247
+ yield None
248
+
249
+ @classmethod
250
+ def get_zone_shell_cmd(cls) -> Optional[str]:
251
+ return None
252
+
253
+ def get_egress_cost(self, num_gigabytes: float):
254
+ return 0.0
255
+
256
+ def accelerators_to_hourly_cost(self, accelerators: Dict[str, int],
257
+ use_spot: bool, region: Optional[str],
258
+ zone: Optional[str]) -> float:
259
+ return 0.0
260
+
261
+ def make_deploy_resources_variables(
262
+ self,
263
+ resources: 'resources_lib.Resources',
264
+ cluster_name: resources_utils.ClusterName,
265
+ region: 'clouds.Region',
266
+ zones: Optional[List['clouds.Zone']],
267
+ num_nodes: int,
268
+ dryrun: bool = False,
269
+ volume_mounts: Optional[List['volume_lib.VolumeMount']] = None,
270
+ ) -> Dict[str, Any]:
271
+ """Returns a dict of variables for the deployment template."""
272
+ del dryrun, region, cluster_name # unused
273
+ assert zones is None, ('Hyperbolic does not support zones', zones)
274
+
275
+ resources = resources.assert_launchable()
276
+ # resources.accelerators is cleared but .instance_type encodes the info.
277
+ acc_dict = self.get_accelerators_from_instance_type(
278
+ resources.instance_type)
279
+ custom_resources = resources_utils.make_ray_custom_resources_str(
280
+ acc_dict)
281
+
282
+ return {
283
+ 'instance_type': resources.instance_type,
284
+ 'custom_resources': custom_resources,
285
+ 'num_nodes': 1, # Hyperbolic only supports single-node clusters
286
+ }
287
+
288
+ def cluster_name_in_hint(self, cluster_name_on_cloud: Optional[str],
289
+ cluster_name: str) -> bool:
290
+ """Check if a node's name matches the cluster name pattern."""
291
+ if cluster_name_on_cloud is None:
292
+ return False
293
+ return cluster_name_on_cloud.startswith(cluster_name)
sky/clouds/ibm.py CHANGED
@@ -5,11 +5,11 @@ from typing import Any, Dict, Iterator, List, Optional, Tuple, Union
5
5
 
6
6
  import colorama
7
7
 
8
+ from sky import catalog
8
9
  from sky import clouds
9
10
  from sky import sky_logging
10
11
  from sky.adaptors import ibm
11
12
  from sky.adaptors.ibm import CREDENTIAL_FILE
12
- from sky.clouds import service_catalog
13
13
  from sky.utils import registry
14
14
  from sky.utils import resources_utils
15
15
  from sky.utils import status_lib
@@ -18,6 +18,7 @@ from sky.utils import ux_utils
18
18
  if typing.TYPE_CHECKING:
19
19
  # renaming to avoid shadowing variables
20
20
  from sky import resources as resources_lib
21
+ from sky.utils import volume as volume_lib
21
22
 
22
23
  logger = sky_logging.init_logger(__name__)
23
24
 
@@ -36,7 +37,9 @@ class IBM(clouds.Cloud):
36
37
 
37
38
  @classmethod
38
39
  def _unsupported_features_for_resources(
39
- cls, resources: 'resources_lib.Resources'
40
+ cls,
41
+ resources: 'resources_lib.Resources',
42
+ region: Optional[str] = None,
40
43
  ) -> Dict[clouds.CloudImplementationFeatures, str]:
41
44
  features = {
42
45
  clouds.CloudImplementationFeatures.CLONE_DISK_FROM_CLUSTER:
@@ -52,6 +55,9 @@ class IBM(clouds.Cloud):
52
55
  (f'Opening ports is currently not supported on {cls._REPR}.'),
53
56
  clouds.CloudImplementationFeatures.HIGH_AVAILABILITY_CONTROLLERS:
54
57
  ('High availability controllers are not supported on IBM.'),
58
+ clouds.CloudImplementationFeatures.CUSTOM_MULTI_NETWORK:
59
+ ('Customized multiple network interfaces are not supported on '
60
+ f'{cls._REPR}.'),
55
61
  }
56
62
  if resources.use_spot:
57
63
  features[clouds.CloudImplementationFeatures.STOP] = (
@@ -64,14 +70,19 @@ class IBM(clouds.Cloud):
64
70
  return cls._MAX_CLUSTER_NAME_LEN_LIMIT
65
71
 
66
72
  @classmethod
67
- def regions_with_offering(cls, instance_type: str,
68
- accelerators: Optional[Dict[str, int]],
69
- use_spot: bool, region: Optional[str],
70
- zone: Optional[str]) -> List[clouds.Region]:
73
+ def regions_with_offering(
74
+ cls,
75
+ instance_type: str,
76
+ accelerators: Optional[Dict[str, int]],
77
+ use_spot: bool,
78
+ region: Optional[str],
79
+ zone: Optional[str],
80
+ resources: Optional['resources_lib.Resources'] = None,
81
+ ) -> List[clouds.Region]:
71
82
  del accelerators # unused
72
83
  if use_spot:
73
84
  return []
74
- regions = service_catalog.get_region_zones_for_instance_type(
85
+ regions = catalog.get_region_zones_for_instance_type(
75
86
  instance_type, use_spot, 'ibm')
76
87
 
77
88
  if region is not None:
@@ -131,11 +142,11 @@ class IBM(clouds.Cloud):
131
142
  zone: Optional[str] = None) -> float:
132
143
  # Currently doesn't support spot instances, hence use_spot set to False.
133
144
  del use_spot
134
- return service_catalog.get_hourly_cost(instance_type,
135
- use_spot=False,
136
- region=region,
137
- zone=zone,
138
- clouds='ibm')
145
+ return catalog.get_hourly_cost(instance_type,
146
+ use_spot=False,
147
+ region=region,
148
+ zone=zone,
149
+ clouds='ibm')
139
150
 
140
151
  def accelerators_to_hourly_cost(self,
141
152
  accelerators: Dict[str, int],
@@ -175,7 +186,8 @@ class IBM(clouds.Cloud):
175
186
  zones: Optional[List['clouds.Zone']],
176
187
  num_nodes: int,
177
188
  dryrun: bool = False,
178
- ) -> Dict[str, Optional[str]]:
189
+ volume_mounts: Optional[List['volume_lib.VolumeMount']] = None,
190
+ ) -> Dict[str, Any]:
179
191
  """Converts planned sky.Resources to cloud-specific resource variables.
180
192
 
181
193
  These variables are used to fill the node type section (instance type,
@@ -204,30 +216,32 @@ class IBM(clouds.Cloud):
204
216
  # clouds implementing 'zones_provision_loop()'
205
217
  zone_names = [zone.name for zone in zones] # type: ignore[union-attr]
206
218
 
207
- r = resources
208
- assert not r.use_spot, \
219
+ resources = resources.assert_launchable()
220
+ assert not resources.use_spot, \
209
221
  'IBM does not currently support spot instances in this framework'
210
222
 
211
- acc_dict = self.get_accelerators_from_instance_type(r.instance_type)
223
+ acc_dict = self.get_accelerators_from_instance_type(
224
+ resources.instance_type)
212
225
  custom_resources = resources_utils.make_ray_custom_resources_str(
213
226
  acc_dict)
214
227
 
215
- instance_resources = _get_profile_resources(r.instance_type)
228
+ instance_resources = _get_profile_resources(resources.instance_type)
216
229
 
217
230
  worker_instance_type = get_cred_file_field('worker_instance_type',
218
- r.instance_type)
231
+ resources.instance_type)
219
232
  worker_instance_resources = _get_profile_resources(worker_instance_type)
220
233
  # r.image_id: {clouds.Region:image_id} - property of Resources class
221
- image_id = r.image_id[
222
- region.name] if r.image_id else self.get_default_image(region_name)
234
+ image_id = resources.image_id[
235
+ region.name] if resources.image_id else self.get_default_image(
236
+ region_name)
223
237
 
224
238
  return {
225
- 'instance_type': r.instance_type,
239
+ 'instance_type': resources.instance_type,
226
240
  'instance_resources': instance_resources,
227
241
  'worker_instance_type': worker_instance_type,
228
242
  'worker_instance_resources': worker_instance_resources,
229
243
  'custom_resources': custom_resources,
230
- 'use_spot': r.use_spot,
244
+ 'use_spot': resources.use_spot,
231
245
  'region': region_name,
232
246
  'zones': ','.join(zone_names),
233
247
  'image_id': image_id,
@@ -241,8 +255,8 @@ class IBM(clouds.Cloud):
241
255
  cls,
242
256
  instance_type: str,
243
257
  ) -> Tuple[Optional[float], Optional[float]]:
244
- return service_catalog.get_vcpus_mem_from_instance_type(instance_type,
245
- clouds='ibm')
258
+ return catalog.get_vcpus_mem_from_instance_type(instance_type,
259
+ clouds='ibm')
246
260
 
247
261
  @classmethod
248
262
  def get_accelerators_from_instance_type(
@@ -250,20 +264,23 @@ class IBM(clouds.Cloud):
250
264
  instance_type: str,
251
265
  ) -> Optional[Dict[str, Union[int, float]]]:
252
266
  """Returns {acc: acc_count} held by 'instance_type', if any."""
253
- return service_catalog.get_accelerators_from_instance_type(
254
- instance_type, clouds='ibm')
267
+ return catalog.get_accelerators_from_instance_type(instance_type,
268
+ clouds='ibm')
255
269
 
256
270
  @classmethod
257
271
  def get_default_instance_type(
258
- cls,
259
- cpus: Optional[str] = None,
260
- memory: Optional[str] = None,
261
- disk_tier: Optional['resources_utils.DiskTier'] = None
262
- ) -> Optional[str]:
263
- return service_catalog.get_default_instance_type(cpus=cpus,
264
- memory=memory,
265
- disk_tier=disk_tier,
266
- clouds='ibm')
272
+ cls,
273
+ cpus: Optional[str] = None,
274
+ memory: Optional[str] = None,
275
+ disk_tier: Optional['resources_utils.DiskTier'] = None,
276
+ region: Optional[str] = None,
277
+ zone: Optional[str] = None) -> Optional[str]:
278
+ return catalog.get_default_instance_type(cpus=cpus,
279
+ memory=memory,
280
+ disk_tier=disk_tier,
281
+ region=region,
282
+ zone=zone,
283
+ clouds='ibm')
267
284
 
268
285
  def _get_feasible_launchable_resources(
269
286
  self, resources: 'resources_lib.Resources'
@@ -298,7 +315,9 @@ class IBM(clouds.Cloud):
298
315
  default_instance_type = IBM.get_default_instance_type(
299
316
  cpus=resources.cpus,
300
317
  memory=resources.memory,
301
- disk_tier=resources.disk_tier)
318
+ disk_tier=resources.disk_tier,
319
+ region=resources.region,
320
+ zone=resources.zone)
302
321
  if default_instance_type is None:
303
322
  return resources_utils.FeasibleResources([], [], None)
304
323
  else:
@@ -307,15 +326,15 @@ class IBM(clouds.Cloud):
307
326
 
308
327
  assert len(accelerators) == 1, resources
309
328
  acc, acc_count = list(accelerators.items())[0]
310
- (instance_list, fuzzy_candidate_list
311
- ) = service_catalog.get_instance_type_for_accelerator(
312
- acc,
313
- acc_count,
314
- cpus=resources.cpus,
315
- memory=resources.memory,
316
- region=resources.region,
317
- zone=resources.zone,
318
- clouds='ibm')
329
+ (instance_list,
330
+ fuzzy_candidate_list) = catalog.get_instance_type_for_accelerator(
331
+ acc,
332
+ acc_count,
333
+ cpus=resources.cpus,
334
+ memory=resources.memory,
335
+ region=resources.region,
336
+ zone=resources.zone,
337
+ clouds='ibm')
319
338
  if instance_list is None:
320
339
  return resources_utils.FeasibleResources([], fuzzy_candidate_list,
321
340
  None)
@@ -397,13 +416,15 @@ class IBM(clouds.Cloud):
397
416
  return image_size
398
417
 
399
418
  @classmethod
400
- def _check_compute_credentials(cls) -> Tuple[bool, Optional[str]]:
419
+ def _check_compute_credentials(
420
+ cls) -> Tuple[bool, Optional[Union[str, Dict[str, str]]]]:
401
421
  """Checks if the user has access credentials to
402
422
  IBM's compute service."""
403
423
  return cls._check_credentials()
404
424
 
405
425
  @classmethod
406
- def _check_storage_credentials(cls) -> Tuple[bool, Optional[str]]:
426
+ def _check_storage_credentials(
427
+ cls) -> Tuple[bool, Optional[Union[str, Dict[str, str]]]]:
407
428
  """Checks if the user has access credentials to
408
429
  IBM's storage service."""
409
430
  # TODO(seungjin): Implement separate check for
@@ -458,11 +479,11 @@ class IBM(clouds.Cloud):
458
479
 
459
480
  def instance_type_exists(self, instance_type):
460
481
  """Returns whether the instance type exists for this cloud."""
461
- return service_catalog.instance_type_exists(instance_type, clouds='ibm')
482
+ return catalog.instance_type_exists(instance_type, clouds='ibm')
462
483
 
463
484
  def validate_region_zone(self, region: Optional[str], zone: Optional[str]):
464
485
  """Validates the region and zone."""
465
- return service_catalog.validate_region_zone(region, zone, clouds='ibm')
486
+ return catalog.validate_region_zone(region, zone, clouds='ibm')
466
487
 
467
488
  @classmethod
468
489
  def query_status(cls, name: str, tag_filters: Dict[str, str],