skypilot-nightly 1.0.0.dev20250502__py3-none-any.whl → 1.0.0.dev20251203__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (546) hide show
  1. sky/__init__.py +22 -6
  2. sky/adaptors/aws.py +81 -16
  3. sky/adaptors/common.py +25 -2
  4. sky/adaptors/coreweave.py +278 -0
  5. sky/adaptors/do.py +8 -2
  6. sky/adaptors/gcp.py +11 -0
  7. sky/adaptors/hyperbolic.py +8 -0
  8. sky/adaptors/ibm.py +5 -2
  9. sky/adaptors/kubernetes.py +149 -18
  10. sky/adaptors/nebius.py +173 -30
  11. sky/adaptors/primeintellect.py +1 -0
  12. sky/adaptors/runpod.py +68 -0
  13. sky/adaptors/seeweb.py +183 -0
  14. sky/adaptors/shadeform.py +89 -0
  15. sky/admin_policy.py +187 -4
  16. sky/authentication.py +179 -225
  17. sky/backends/__init__.py +4 -2
  18. sky/backends/backend.py +22 -9
  19. sky/backends/backend_utils.py +1323 -397
  20. sky/backends/cloud_vm_ray_backend.py +1749 -1029
  21. sky/backends/docker_utils.py +1 -1
  22. sky/backends/local_docker_backend.py +11 -6
  23. sky/backends/task_codegen.py +633 -0
  24. sky/backends/wheel_utils.py +55 -9
  25. sky/{clouds/service_catalog → catalog}/__init__.py +21 -19
  26. sky/{clouds/service_catalog → catalog}/aws_catalog.py +27 -8
  27. sky/{clouds/service_catalog → catalog}/azure_catalog.py +10 -7
  28. sky/{clouds/service_catalog → catalog}/common.py +90 -49
  29. sky/{clouds/service_catalog → catalog}/cudo_catalog.py +8 -5
  30. sky/{clouds/service_catalog → catalog}/data_fetchers/analyze.py +1 -1
  31. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_aws.py +116 -80
  32. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_cudo.py +38 -38
  33. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_gcp.py +70 -16
  34. sky/catalog/data_fetchers/fetch_hyperbolic.py +136 -0
  35. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_lambda_cloud.py +1 -0
  36. sky/catalog/data_fetchers/fetch_nebius.py +338 -0
  37. sky/catalog/data_fetchers/fetch_runpod.py +698 -0
  38. sky/catalog/data_fetchers/fetch_seeweb.py +329 -0
  39. sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
  40. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vast.py +1 -1
  41. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vsphere.py +1 -1
  42. sky/{clouds/service_catalog → catalog}/do_catalog.py +5 -2
  43. sky/{clouds/service_catalog → catalog}/fluidstack_catalog.py +6 -3
  44. sky/{clouds/service_catalog → catalog}/gcp_catalog.py +41 -15
  45. sky/catalog/hyperbolic_catalog.py +136 -0
  46. sky/{clouds/service_catalog → catalog}/ibm_catalog.py +9 -6
  47. sky/{clouds/service_catalog → catalog}/kubernetes_catalog.py +36 -24
  48. sky/{clouds/service_catalog → catalog}/lambda_catalog.py +9 -6
  49. sky/{clouds/service_catalog → catalog}/nebius_catalog.py +9 -7
  50. sky/{clouds/service_catalog → catalog}/oci_catalog.py +9 -6
  51. sky/{clouds/service_catalog → catalog}/paperspace_catalog.py +5 -2
  52. sky/catalog/primeintellect_catalog.py +95 -0
  53. sky/{clouds/service_catalog → catalog}/runpod_catalog.py +11 -4
  54. sky/{clouds/service_catalog → catalog}/scp_catalog.py +9 -6
  55. sky/catalog/seeweb_catalog.py +184 -0
  56. sky/catalog/shadeform_catalog.py +165 -0
  57. sky/catalog/ssh_catalog.py +167 -0
  58. sky/{clouds/service_catalog → catalog}/vast_catalog.py +6 -3
  59. sky/{clouds/service_catalog → catalog}/vsphere_catalog.py +5 -2
  60. sky/check.py +533 -185
  61. sky/cli.py +5 -5975
  62. sky/client/{cli.py → cli/command.py} +2591 -1956
  63. sky/client/cli/deprecation_utils.py +99 -0
  64. sky/client/cli/flags.py +359 -0
  65. sky/client/cli/table_utils.py +322 -0
  66. sky/client/cli/utils.py +79 -0
  67. sky/client/common.py +78 -32
  68. sky/client/oauth.py +82 -0
  69. sky/client/sdk.py +1219 -319
  70. sky/client/sdk_async.py +827 -0
  71. sky/client/service_account_auth.py +47 -0
  72. sky/cloud_stores.py +82 -3
  73. sky/clouds/__init__.py +13 -0
  74. sky/clouds/aws.py +564 -164
  75. sky/clouds/azure.py +105 -83
  76. sky/clouds/cloud.py +140 -40
  77. sky/clouds/cudo.py +68 -50
  78. sky/clouds/do.py +66 -48
  79. sky/clouds/fluidstack.py +63 -44
  80. sky/clouds/gcp.py +339 -110
  81. sky/clouds/hyperbolic.py +293 -0
  82. sky/clouds/ibm.py +70 -49
  83. sky/clouds/kubernetes.py +570 -162
  84. sky/clouds/lambda_cloud.py +74 -54
  85. sky/clouds/nebius.py +210 -81
  86. sky/clouds/oci.py +88 -66
  87. sky/clouds/paperspace.py +61 -44
  88. sky/clouds/primeintellect.py +317 -0
  89. sky/clouds/runpod.py +164 -74
  90. sky/clouds/scp.py +89 -86
  91. sky/clouds/seeweb.py +477 -0
  92. sky/clouds/shadeform.py +400 -0
  93. sky/clouds/ssh.py +263 -0
  94. sky/clouds/utils/aws_utils.py +10 -4
  95. sky/clouds/utils/gcp_utils.py +87 -11
  96. sky/clouds/utils/oci_utils.py +38 -14
  97. sky/clouds/utils/scp_utils.py +231 -167
  98. sky/clouds/vast.py +99 -77
  99. sky/clouds/vsphere.py +51 -40
  100. sky/core.py +375 -173
  101. sky/dag.py +15 -0
  102. sky/dashboard/out/404.html +1 -1
  103. sky/dashboard/out/_next/static/96_E2yl3QAiIJGOYCkSpB/_buildManifest.js +1 -0
  104. sky/dashboard/out/_next/static/chunks/1141-e6aa9ab418717c59.js +11 -0
  105. sky/dashboard/out/_next/static/chunks/1272-1ef0bf0237faccdb.js +1 -0
  106. sky/dashboard/out/_next/static/chunks/1871-7e202677c42f43fe.js +6 -0
  107. sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +1 -0
  108. sky/dashboard/out/_next/static/chunks/2350.fab69e61bac57b23.js +1 -0
  109. sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +15 -0
  110. sky/dashboard/out/_next/static/chunks/2755.edd818326d489a1d.js +26 -0
  111. sky/dashboard/out/_next/static/chunks/3294.20a8540fe697d5ee.js +1 -0
  112. sky/dashboard/out/_next/static/chunks/3785.7e245f318f9d1121.js +1 -0
  113. sky/dashboard/out/_next/static/chunks/3800-7b45f9fbb6308557.js +1 -0
  114. sky/dashboard/out/_next/static/chunks/3850-ff4a9a69d978632b.js +1 -0
  115. sky/dashboard/out/_next/static/chunks/3937.210053269f121201.js +1 -0
  116. sky/dashboard/out/_next/static/chunks/4725.172ede95d1b21022.js +1 -0
  117. sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +15 -0
  118. sky/dashboard/out/_next/static/chunks/5739-d67458fcb1386c92.js +8 -0
  119. sky/dashboard/out/_next/static/chunks/6130-2be46d70a38f1e82.js +1 -0
  120. sky/dashboard/out/_next/static/chunks/616-3d59f75e2ccf9321.js +39 -0
  121. sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +13 -0
  122. sky/dashboard/out/_next/static/chunks/6856-8f27d1c10c98def8.js +1 -0
  123. sky/dashboard/out/_next/static/chunks/6989-01359c57e018caa4.js +1 -0
  124. sky/dashboard/out/_next/static/chunks/6990-9146207c4567fdfd.js +1 -0
  125. sky/dashboard/out/_next/static/chunks/7359-c8d04e06886000b3.js +30 -0
  126. sky/dashboard/out/_next/static/chunks/7411-b15471acd2cba716.js +41 -0
  127. sky/dashboard/out/_next/static/chunks/7615-019513abc55b3b47.js +1 -0
  128. sky/dashboard/out/_next/static/chunks/8640.5b9475a2d18c5416.js +16 -0
  129. sky/dashboard/out/_next/static/chunks/8969-452f9d5cbdd2dc73.js +1 -0
  130. sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +6 -0
  131. sky/dashboard/out/_next/static/chunks/9353-cff34f7e773b2e2b.js +1 -0
  132. sky/dashboard/out/_next/static/chunks/9360.a536cf6b1fa42355.js +31 -0
  133. sky/dashboard/out/_next/static/chunks/9847.3aaca6bb33455140.js +30 -0
  134. sky/dashboard/out/_next/static/chunks/fd9d1056-86323a29a8f7e46a.js +1 -0
  135. sky/dashboard/out/_next/static/chunks/framework-cf60a09ccd051a10.js +33 -0
  136. sky/dashboard/out/_next/static/chunks/main-app-587214043926b3cc.js +1 -0
  137. sky/dashboard/out/_next/static/chunks/main-f15ccb73239a3bf1.js +1 -0
  138. sky/dashboard/out/_next/static/chunks/pages/_app-bde01e4a2beec258.js +34 -0
  139. sky/dashboard/out/_next/static/chunks/pages/_error-c66a4e8afc46f17b.js +1 -0
  140. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-792db96d918c98c9.js +16 -0
  141. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-abfcac9c137aa543.js +1 -0
  142. sky/dashboard/out/_next/static/chunks/pages/clusters-ee39056f9851a3ff.js +1 -0
  143. sky/dashboard/out/_next/static/chunks/pages/config-dfb9bf07b13045f4.js +1 -0
  144. sky/dashboard/out/_next/static/chunks/pages/index-444f1804401f04ea.js +1 -0
  145. sky/dashboard/out/_next/static/chunks/pages/infra/[context]-c0b5935149902e6f.js +1 -0
  146. sky/dashboard/out/_next/static/chunks/pages/infra-aed0ea19df7cf961.js +1 -0
  147. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-d66997e2bfc837cf.js +16 -0
  148. sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-9faf940b253e3e06.js +21 -0
  149. sky/dashboard/out/_next/static/chunks/pages/jobs-2072b48b617989c9.js +1 -0
  150. sky/dashboard/out/_next/static/chunks/pages/users-f42674164aa73423.js +1 -0
  151. sky/dashboard/out/_next/static/chunks/pages/volumes-b84b948ff357c43e.js +1 -0
  152. sky/dashboard/out/_next/static/chunks/pages/workspace/new-3f88a1c7e86a3f86.js +1 -0
  153. sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-84a40f8c7c627fe4.js +1 -0
  154. sky/dashboard/out/_next/static/chunks/pages/workspaces-531b2f8c4bf89f82.js +1 -0
  155. sky/dashboard/out/_next/static/chunks/webpack-64e05f17bf2cf8ce.js +1 -0
  156. sky/dashboard/out/_next/static/css/0748ce22df867032.css +3 -0
  157. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  158. sky/dashboard/out/clusters/[cluster].html +1 -1
  159. sky/dashboard/out/clusters.html +1 -1
  160. sky/dashboard/out/config.html +1 -0
  161. sky/dashboard/out/index.html +1 -1
  162. sky/dashboard/out/infra/[context].html +1 -0
  163. sky/dashboard/out/infra.html +1 -0
  164. sky/dashboard/out/jobs/[job].html +1 -1
  165. sky/dashboard/out/jobs/pools/[pool].html +1 -0
  166. sky/dashboard/out/jobs.html +1 -1
  167. sky/dashboard/out/users.html +1 -0
  168. sky/dashboard/out/volumes.html +1 -0
  169. sky/dashboard/out/workspace/new.html +1 -0
  170. sky/dashboard/out/workspaces/[name].html +1 -0
  171. sky/dashboard/out/workspaces.html +1 -0
  172. sky/data/data_utils.py +137 -1
  173. sky/data/mounting_utils.py +269 -84
  174. sky/data/storage.py +1460 -1807
  175. sky/data/storage_utils.py +43 -57
  176. sky/exceptions.py +126 -2
  177. sky/execution.py +216 -63
  178. sky/global_user_state.py +2390 -586
  179. sky/jobs/__init__.py +7 -0
  180. sky/jobs/client/sdk.py +300 -58
  181. sky/jobs/client/sdk_async.py +161 -0
  182. sky/jobs/constants.py +15 -8
  183. sky/jobs/controller.py +848 -275
  184. sky/jobs/file_content_utils.py +128 -0
  185. sky/jobs/log_gc.py +193 -0
  186. sky/jobs/recovery_strategy.py +402 -152
  187. sky/jobs/scheduler.py +314 -189
  188. sky/jobs/server/core.py +836 -255
  189. sky/jobs/server/server.py +156 -115
  190. sky/jobs/server/utils.py +136 -0
  191. sky/jobs/state.py +2109 -706
  192. sky/jobs/utils.py +1306 -215
  193. sky/logs/__init__.py +21 -0
  194. sky/logs/agent.py +108 -0
  195. sky/logs/aws.py +243 -0
  196. sky/logs/gcp.py +91 -0
  197. sky/metrics/__init__.py +0 -0
  198. sky/metrics/utils.py +453 -0
  199. sky/models.py +78 -1
  200. sky/optimizer.py +164 -70
  201. sky/provision/__init__.py +90 -4
  202. sky/provision/aws/config.py +147 -26
  203. sky/provision/aws/instance.py +136 -50
  204. sky/provision/azure/instance.py +11 -6
  205. sky/provision/common.py +13 -1
  206. sky/provision/cudo/cudo_machine_type.py +1 -1
  207. sky/provision/cudo/cudo_utils.py +14 -8
  208. sky/provision/cudo/cudo_wrapper.py +72 -71
  209. sky/provision/cudo/instance.py +10 -6
  210. sky/provision/do/instance.py +10 -6
  211. sky/provision/do/utils.py +4 -3
  212. sky/provision/docker_utils.py +140 -33
  213. sky/provision/fluidstack/instance.py +13 -8
  214. sky/provision/gcp/__init__.py +1 -0
  215. sky/provision/gcp/config.py +301 -19
  216. sky/provision/gcp/constants.py +218 -0
  217. sky/provision/gcp/instance.py +36 -8
  218. sky/provision/gcp/instance_utils.py +18 -4
  219. sky/provision/gcp/volume_utils.py +247 -0
  220. sky/provision/hyperbolic/__init__.py +12 -0
  221. sky/provision/hyperbolic/config.py +10 -0
  222. sky/provision/hyperbolic/instance.py +437 -0
  223. sky/provision/hyperbolic/utils.py +373 -0
  224. sky/provision/instance_setup.py +101 -20
  225. sky/provision/kubernetes/__init__.py +5 -0
  226. sky/provision/kubernetes/config.py +9 -52
  227. sky/provision/kubernetes/constants.py +17 -0
  228. sky/provision/kubernetes/instance.py +919 -280
  229. sky/provision/kubernetes/manifests/fusermount-server-daemonset.yaml +1 -2
  230. sky/provision/kubernetes/network.py +27 -17
  231. sky/provision/kubernetes/network_utils.py +44 -43
  232. sky/provision/kubernetes/utils.py +1221 -534
  233. sky/provision/kubernetes/volume.py +343 -0
  234. sky/provision/lambda_cloud/instance.py +22 -16
  235. sky/provision/nebius/constants.py +50 -0
  236. sky/provision/nebius/instance.py +19 -6
  237. sky/provision/nebius/utils.py +237 -137
  238. sky/provision/oci/instance.py +10 -5
  239. sky/provision/paperspace/instance.py +10 -7
  240. sky/provision/paperspace/utils.py +1 -1
  241. sky/provision/primeintellect/__init__.py +10 -0
  242. sky/provision/primeintellect/config.py +11 -0
  243. sky/provision/primeintellect/instance.py +454 -0
  244. sky/provision/primeintellect/utils.py +398 -0
  245. sky/provision/provisioner.py +117 -36
  246. sky/provision/runpod/__init__.py +5 -0
  247. sky/provision/runpod/instance.py +27 -6
  248. sky/provision/runpod/utils.py +51 -18
  249. sky/provision/runpod/volume.py +214 -0
  250. sky/provision/scp/__init__.py +15 -0
  251. sky/provision/scp/config.py +93 -0
  252. sky/provision/scp/instance.py +707 -0
  253. sky/provision/seeweb/__init__.py +11 -0
  254. sky/provision/seeweb/config.py +13 -0
  255. sky/provision/seeweb/instance.py +812 -0
  256. sky/provision/shadeform/__init__.py +11 -0
  257. sky/provision/shadeform/config.py +12 -0
  258. sky/provision/shadeform/instance.py +351 -0
  259. sky/provision/shadeform/shadeform_utils.py +83 -0
  260. sky/provision/ssh/__init__.py +18 -0
  261. sky/provision/vast/instance.py +13 -8
  262. sky/provision/vast/utils.py +10 -7
  263. sky/provision/volume.py +164 -0
  264. sky/provision/vsphere/common/ssl_helper.py +1 -1
  265. sky/provision/vsphere/common/vapiconnect.py +2 -1
  266. sky/provision/vsphere/common/vim_utils.py +4 -4
  267. sky/provision/vsphere/instance.py +15 -10
  268. sky/provision/vsphere/vsphere_utils.py +17 -20
  269. sky/py.typed +0 -0
  270. sky/resources.py +845 -119
  271. sky/schemas/__init__.py +0 -0
  272. sky/schemas/api/__init__.py +0 -0
  273. sky/schemas/api/responses.py +227 -0
  274. sky/schemas/db/README +4 -0
  275. sky/schemas/db/env.py +90 -0
  276. sky/schemas/db/global_user_state/001_initial_schema.py +124 -0
  277. sky/schemas/db/global_user_state/002_add_workspace_to_cluster_history.py +35 -0
  278. sky/schemas/db/global_user_state/003_fix_initial_revision.py +61 -0
  279. sky/schemas/db/global_user_state/004_is_managed.py +34 -0
  280. sky/schemas/db/global_user_state/005_cluster_event.py +32 -0
  281. sky/schemas/db/global_user_state/006_provision_log.py +41 -0
  282. sky/schemas/db/global_user_state/007_cluster_event_request_id.py +34 -0
  283. sky/schemas/db/global_user_state/008_skylet_ssh_tunnel_metadata.py +34 -0
  284. sky/schemas/db/global_user_state/009_last_activity_and_launched_at.py +89 -0
  285. sky/schemas/db/global_user_state/010_save_ssh_key.py +66 -0
  286. sky/schemas/db/global_user_state/011_is_ephemeral.py +34 -0
  287. sky/schemas/db/kv_cache/001_initial_schema.py +29 -0
  288. sky/schemas/db/script.py.mako +28 -0
  289. sky/schemas/db/serve_state/001_initial_schema.py +67 -0
  290. sky/schemas/db/serve_state/002_yaml_content.py +34 -0
  291. sky/schemas/db/skypilot_config/001_initial_schema.py +30 -0
  292. sky/schemas/db/spot_jobs/001_initial_schema.py +97 -0
  293. sky/schemas/db/spot_jobs/002_cluster_pool.py +42 -0
  294. sky/schemas/db/spot_jobs/003_pool_hash.py +34 -0
  295. sky/schemas/db/spot_jobs/004_job_file_contents.py +42 -0
  296. sky/schemas/db/spot_jobs/005_logs_gc.py +38 -0
  297. sky/schemas/db/spot_jobs/006_controller_pid_started_at.py +34 -0
  298. sky/schemas/db/spot_jobs/007_config_file_content.py +34 -0
  299. sky/schemas/generated/__init__.py +0 -0
  300. sky/schemas/generated/autostopv1_pb2.py +36 -0
  301. sky/schemas/generated/autostopv1_pb2.pyi +43 -0
  302. sky/schemas/generated/autostopv1_pb2_grpc.py +146 -0
  303. sky/schemas/generated/jobsv1_pb2.py +86 -0
  304. sky/schemas/generated/jobsv1_pb2.pyi +254 -0
  305. sky/schemas/generated/jobsv1_pb2_grpc.py +542 -0
  306. sky/schemas/generated/managed_jobsv1_pb2.py +76 -0
  307. sky/schemas/generated/managed_jobsv1_pb2.pyi +278 -0
  308. sky/schemas/generated/managed_jobsv1_pb2_grpc.py +278 -0
  309. sky/schemas/generated/servev1_pb2.py +58 -0
  310. sky/schemas/generated/servev1_pb2.pyi +115 -0
  311. sky/schemas/generated/servev1_pb2_grpc.py +322 -0
  312. sky/serve/autoscalers.py +357 -5
  313. sky/serve/client/impl.py +310 -0
  314. sky/serve/client/sdk.py +47 -139
  315. sky/serve/client/sdk_async.py +130 -0
  316. sky/serve/constants.py +12 -9
  317. sky/serve/controller.py +68 -17
  318. sky/serve/load_balancer.py +106 -60
  319. sky/serve/load_balancing_policies.py +116 -2
  320. sky/serve/replica_managers.py +434 -249
  321. sky/serve/serve_rpc_utils.py +179 -0
  322. sky/serve/serve_state.py +569 -257
  323. sky/serve/serve_utils.py +775 -265
  324. sky/serve/server/core.py +66 -711
  325. sky/serve/server/impl.py +1093 -0
  326. sky/serve/server/server.py +21 -18
  327. sky/serve/service.py +192 -89
  328. sky/serve/service_spec.py +144 -20
  329. sky/serve/spot_placer.py +3 -0
  330. sky/server/auth/__init__.py +0 -0
  331. sky/server/auth/authn.py +50 -0
  332. sky/server/auth/loopback.py +38 -0
  333. sky/server/auth/oauth2_proxy.py +202 -0
  334. sky/server/common.py +478 -182
  335. sky/server/config.py +85 -23
  336. sky/server/constants.py +44 -6
  337. sky/server/daemons.py +295 -0
  338. sky/server/html/token_page.html +185 -0
  339. sky/server/metrics.py +160 -0
  340. sky/server/middleware_utils.py +166 -0
  341. sky/server/requests/executor.py +558 -138
  342. sky/server/requests/payloads.py +364 -24
  343. sky/server/requests/preconditions.py +21 -17
  344. sky/server/requests/process.py +112 -29
  345. sky/server/requests/request_names.py +121 -0
  346. sky/server/requests/requests.py +822 -226
  347. sky/server/requests/serializers/decoders.py +82 -31
  348. sky/server/requests/serializers/encoders.py +140 -22
  349. sky/server/requests/threads.py +117 -0
  350. sky/server/rest.py +455 -0
  351. sky/server/server.py +1309 -285
  352. sky/server/state.py +20 -0
  353. sky/server/stream_utils.py +327 -61
  354. sky/server/uvicorn.py +217 -3
  355. sky/server/versions.py +270 -0
  356. sky/setup_files/MANIFEST.in +11 -1
  357. sky/setup_files/alembic.ini +160 -0
  358. sky/setup_files/dependencies.py +139 -31
  359. sky/setup_files/setup.py +44 -42
  360. sky/sky_logging.py +114 -7
  361. sky/skylet/attempt_skylet.py +106 -24
  362. sky/skylet/autostop_lib.py +129 -8
  363. sky/skylet/configs.py +29 -20
  364. sky/skylet/constants.py +216 -25
  365. sky/skylet/events.py +101 -21
  366. sky/skylet/job_lib.py +345 -164
  367. sky/skylet/log_lib.py +297 -18
  368. sky/skylet/log_lib.pyi +44 -1
  369. sky/skylet/providers/ibm/node_provider.py +12 -8
  370. sky/skylet/providers/ibm/vpc_provider.py +13 -12
  371. sky/skylet/ray_patches/__init__.py +17 -3
  372. sky/skylet/ray_patches/autoscaler.py.diff +18 -0
  373. sky/skylet/ray_patches/cli.py.diff +19 -0
  374. sky/skylet/ray_patches/command_runner.py.diff +17 -0
  375. sky/skylet/ray_patches/log_monitor.py.diff +20 -0
  376. sky/skylet/ray_patches/resource_demand_scheduler.py.diff +32 -0
  377. sky/skylet/ray_patches/updater.py.diff +18 -0
  378. sky/skylet/ray_patches/worker.py.diff +41 -0
  379. sky/skylet/runtime_utils.py +21 -0
  380. sky/skylet/services.py +568 -0
  381. sky/skylet/skylet.py +72 -4
  382. sky/skylet/subprocess_daemon.py +104 -29
  383. sky/skypilot_config.py +506 -99
  384. sky/ssh_node_pools/__init__.py +1 -0
  385. sky/ssh_node_pools/core.py +135 -0
  386. sky/ssh_node_pools/server.py +233 -0
  387. sky/task.py +685 -163
  388. sky/templates/aws-ray.yml.j2 +11 -3
  389. sky/templates/azure-ray.yml.j2 +2 -1
  390. sky/templates/cudo-ray.yml.j2 +1 -0
  391. sky/templates/do-ray.yml.j2 +2 -1
  392. sky/templates/fluidstack-ray.yml.j2 +1 -0
  393. sky/templates/gcp-ray.yml.j2 +62 -1
  394. sky/templates/hyperbolic-ray.yml.j2 +68 -0
  395. sky/templates/ibm-ray.yml.j2 +2 -1
  396. sky/templates/jobs-controller.yaml.j2 +27 -24
  397. sky/templates/kubernetes-loadbalancer.yml.j2 +2 -0
  398. sky/templates/kubernetes-ray.yml.j2 +611 -50
  399. sky/templates/lambda-ray.yml.j2 +2 -1
  400. sky/templates/nebius-ray.yml.j2 +34 -12
  401. sky/templates/oci-ray.yml.j2 +1 -0
  402. sky/templates/paperspace-ray.yml.j2 +2 -1
  403. sky/templates/primeintellect-ray.yml.j2 +72 -0
  404. sky/templates/runpod-ray.yml.j2 +10 -1
  405. sky/templates/scp-ray.yml.j2 +4 -50
  406. sky/templates/seeweb-ray.yml.j2 +171 -0
  407. sky/templates/shadeform-ray.yml.j2 +73 -0
  408. sky/templates/sky-serve-controller.yaml.j2 +22 -2
  409. sky/templates/vast-ray.yml.j2 +1 -0
  410. sky/templates/vsphere-ray.yml.j2 +1 -0
  411. sky/templates/websocket_proxy.py +212 -37
  412. sky/usage/usage_lib.py +31 -15
  413. sky/users/__init__.py +0 -0
  414. sky/users/model.conf +15 -0
  415. sky/users/permission.py +397 -0
  416. sky/users/rbac.py +121 -0
  417. sky/users/server.py +720 -0
  418. sky/users/token_service.py +218 -0
  419. sky/utils/accelerator_registry.py +35 -5
  420. sky/utils/admin_policy_utils.py +84 -38
  421. sky/utils/annotations.py +38 -5
  422. sky/utils/asyncio_utils.py +78 -0
  423. sky/utils/atomic.py +1 -1
  424. sky/utils/auth_utils.py +153 -0
  425. sky/utils/benchmark_utils.py +60 -0
  426. sky/utils/cli_utils/status_utils.py +159 -86
  427. sky/utils/cluster_utils.py +31 -9
  428. sky/utils/command_runner.py +354 -68
  429. sky/utils/command_runner.pyi +93 -3
  430. sky/utils/common.py +35 -8
  431. sky/utils/common_utils.py +314 -91
  432. sky/utils/config_utils.py +74 -5
  433. sky/utils/context.py +403 -0
  434. sky/utils/context_utils.py +242 -0
  435. sky/utils/controller_utils.py +383 -89
  436. sky/utils/dag_utils.py +31 -12
  437. sky/utils/db/__init__.py +0 -0
  438. sky/utils/db/db_utils.py +485 -0
  439. sky/utils/db/kv_cache.py +149 -0
  440. sky/utils/db/migration_utils.py +137 -0
  441. sky/utils/directory_utils.py +12 -0
  442. sky/utils/env_options.py +13 -0
  443. sky/utils/git.py +567 -0
  444. sky/utils/git_clone.sh +460 -0
  445. sky/utils/infra_utils.py +195 -0
  446. sky/utils/kubernetes/cleanup-tunnel.sh +62 -0
  447. sky/utils/kubernetes/config_map_utils.py +133 -0
  448. sky/utils/kubernetes/create_cluster.sh +15 -29
  449. sky/utils/kubernetes/delete_cluster.sh +10 -7
  450. sky/utils/kubernetes/deploy_ssh_node_pools.py +1177 -0
  451. sky/utils/kubernetes/exec_kubeconfig_converter.py +22 -31
  452. sky/utils/kubernetes/generate_kind_config.py +6 -66
  453. sky/utils/kubernetes/generate_kubeconfig.sh +4 -1
  454. sky/utils/kubernetes/gpu_labeler.py +18 -8
  455. sky/utils/kubernetes/k8s_gpu_labeler_job.yaml +2 -1
  456. sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml +16 -16
  457. sky/utils/kubernetes/kubernetes_deploy_utils.py +284 -114
  458. sky/utils/kubernetes/rsync_helper.sh +11 -3
  459. sky/utils/kubernetes/ssh-tunnel.sh +379 -0
  460. sky/utils/kubernetes/ssh_utils.py +221 -0
  461. sky/utils/kubernetes_enums.py +8 -15
  462. sky/utils/lock_events.py +94 -0
  463. sky/utils/locks.py +416 -0
  464. sky/utils/log_utils.py +82 -107
  465. sky/utils/perf_utils.py +22 -0
  466. sky/utils/resource_checker.py +298 -0
  467. sky/utils/resources_utils.py +249 -32
  468. sky/utils/rich_utils.py +217 -39
  469. sky/utils/schemas.py +955 -160
  470. sky/utils/serialize_utils.py +16 -0
  471. sky/utils/status_lib.py +10 -0
  472. sky/utils/subprocess_utils.py +29 -15
  473. sky/utils/tempstore.py +70 -0
  474. sky/utils/thread_utils.py +91 -0
  475. sky/utils/timeline.py +26 -53
  476. sky/utils/ux_utils.py +84 -15
  477. sky/utils/validator.py +11 -1
  478. sky/utils/volume.py +165 -0
  479. sky/utils/yaml_utils.py +111 -0
  480. sky/volumes/__init__.py +13 -0
  481. sky/volumes/client/__init__.py +0 -0
  482. sky/volumes/client/sdk.py +150 -0
  483. sky/volumes/server/__init__.py +0 -0
  484. sky/volumes/server/core.py +270 -0
  485. sky/volumes/server/server.py +124 -0
  486. sky/volumes/volume.py +215 -0
  487. sky/workspaces/__init__.py +0 -0
  488. sky/workspaces/core.py +655 -0
  489. sky/workspaces/server.py +101 -0
  490. sky/workspaces/utils.py +56 -0
  491. sky_templates/README.md +3 -0
  492. sky_templates/__init__.py +3 -0
  493. sky_templates/ray/__init__.py +0 -0
  494. sky_templates/ray/start_cluster +183 -0
  495. sky_templates/ray/stop_cluster +75 -0
  496. skypilot_nightly-1.0.0.dev20251203.dist-info/METADATA +676 -0
  497. skypilot_nightly-1.0.0.dev20251203.dist-info/RECORD +611 -0
  498. {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/WHEEL +1 -1
  499. skypilot_nightly-1.0.0.dev20251203.dist-info/top_level.txt +2 -0
  500. sky/benchmark/benchmark_state.py +0 -256
  501. sky/benchmark/benchmark_utils.py +0 -641
  502. sky/clouds/service_catalog/constants.py +0 -7
  503. sky/dashboard/out/_next/static/GWvVBSCS7FmUiVmjaL1a7/_buildManifest.js +0 -1
  504. sky/dashboard/out/_next/static/chunks/236-2db3ee3fba33dd9e.js +0 -6
  505. sky/dashboard/out/_next/static/chunks/312-c3c8845990db8ffc.js +0 -15
  506. sky/dashboard/out/_next/static/chunks/37-0a572fe0dbb89c4d.js +0 -6
  507. sky/dashboard/out/_next/static/chunks/678-206dddca808e6d16.js +0 -59
  508. sky/dashboard/out/_next/static/chunks/845-9e60713e0c441abc.js +0 -1
  509. sky/dashboard/out/_next/static/chunks/979-7bf73a4c7cea0f5c.js +0 -1
  510. sky/dashboard/out/_next/static/chunks/fd9d1056-2821b0f0cabcd8bd.js +0 -1
  511. sky/dashboard/out/_next/static/chunks/framework-87d061ee6ed71b28.js +0 -33
  512. sky/dashboard/out/_next/static/chunks/main-app-241eb28595532291.js +0 -1
  513. sky/dashboard/out/_next/static/chunks/main-e0e2335212e72357.js +0 -1
  514. sky/dashboard/out/_next/static/chunks/pages/_app-e6b013bc3f77ad60.js +0 -1
  515. sky/dashboard/out/_next/static/chunks/pages/_error-1be831200e60c5c0.js +0 -1
  516. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-6ac338bc2239cb45.js +0 -1
  517. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-f383db7389368ea7.js +0 -1
  518. sky/dashboard/out/_next/static/chunks/pages/clusters-a93b93e10b8b074e.js +0 -1
  519. sky/dashboard/out/_next/static/chunks/pages/index-f9f039532ca8cbc4.js +0 -1
  520. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-1c519e1afc523dc9.js +0 -1
  521. sky/dashboard/out/_next/static/chunks/pages/jobs-a75029b67aab6a2e.js +0 -1
  522. sky/dashboard/out/_next/static/chunks/webpack-830f59b8404e96b8.js +0 -1
  523. sky/dashboard/out/_next/static/css/c6933bbb2ce7f4dd.css +0 -3
  524. sky/jobs/dashboard/dashboard.py +0 -223
  525. sky/jobs/dashboard/static/favicon.ico +0 -0
  526. sky/jobs/dashboard/templates/index.html +0 -831
  527. sky/jobs/server/dashboard_utils.py +0 -69
  528. sky/skylet/providers/scp/__init__.py +0 -2
  529. sky/skylet/providers/scp/config.py +0 -149
  530. sky/skylet/providers/scp/node_provider.py +0 -578
  531. sky/templates/kubernetes-ssh-jump.yml.j2 +0 -94
  532. sky/utils/db_utils.py +0 -100
  533. sky/utils/kubernetes/deploy_remote_cluster.sh +0 -308
  534. sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +0 -191
  535. skypilot_nightly-1.0.0.dev20250502.dist-info/METADATA +0 -361
  536. skypilot_nightly-1.0.0.dev20250502.dist-info/RECORD +0 -396
  537. skypilot_nightly-1.0.0.dev20250502.dist-info/top_level.txt +0 -1
  538. /sky/{clouds/service_catalog → catalog}/config.py +0 -0
  539. /sky/{benchmark → catalog/data_fetchers}/__init__.py +0 -0
  540. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_azure.py +0 -0
  541. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_fluidstack.py +0 -0
  542. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_ibm.py +0 -0
  543. /sky/{clouds/service_catalog/data_fetchers → client/cli}/__init__.py +0 -0
  544. /sky/dashboard/out/_next/static/{GWvVBSCS7FmUiVmjaL1a7 → 96_E2yl3QAiIJGOYCkSpB}/_ssgManifest.js +0 -0
  545. {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/entry_points.txt +0 -0
  546. {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,184 @@
1
+ """Seeweb service catalog.
2
+
3
+ This module loads the service catalog file and can be used to
4
+ query instance types and pricing information for Seeweb.
5
+ """
6
+
7
+ import typing
8
+ from typing import Dict, List, Optional, Tuple
9
+
10
+ import pandas as pd
11
+
12
+ from sky.catalog import common
13
+ from sky.utils import resources_utils
14
+ from sky.utils import ux_utils
15
+
16
+ if typing.TYPE_CHECKING:
17
+ from sky.clouds import cloud
18
+
19
+ _PULL_FREQUENCY_HOURS = 8
20
+ _df = common.read_catalog('seeweb/vms.csv',
21
+ pull_frequency_hours=_PULL_FREQUENCY_HOURS)
22
+
23
+
24
+ def instance_type_exists(instance_type: str) -> bool:
25
+ result = common.instance_type_exists_impl(_df, instance_type)
26
+ return result
27
+
28
+
29
+ def validate_region_zone(
30
+ region: Optional[str],
31
+ zone: Optional[str]) -> Tuple[Optional[str], Optional[str]]:
32
+ if zone is not None:
33
+ with ux_utils.print_exception_no_traceback():
34
+ raise ValueError('Seeweb does not support zones.')
35
+
36
+ result = common.validate_region_zone_impl('Seeweb', _df, region, zone)
37
+ return result
38
+
39
+
40
+ def get_hourly_cost(instance_type: str,
41
+ use_spot: bool = False,
42
+ region: Optional[str] = None,
43
+ zone: Optional[str] = None) -> float:
44
+ """Returns the cost, or the cheapest cost among all zones for spot."""
45
+ if zone is not None:
46
+ with ux_utils.print_exception_no_traceback():
47
+ raise ValueError('Seeweb does not support zones.')
48
+
49
+ result = common.get_hourly_cost_impl(_df, instance_type, use_spot, region,
50
+ zone)
51
+ return result
52
+
53
+
54
+ def get_vcpus_mem_from_instance_type(
55
+ instance_type: str) -> Tuple[Optional[float], Optional[float]]:
56
+ result = common.get_vcpus_mem_from_instance_type_impl(_df, instance_type)
57
+ return result
58
+
59
+
60
+ def get_default_instance_type(cpus: Optional[str] = None,
61
+ memory: Optional[str] = None,
62
+ disk_tier: Optional[
63
+ resources_utils.DiskTier] = None,
64
+ region: Optional[str] = None,
65
+ zone: Optional[str] = None) -> Optional[str]:
66
+ del disk_tier # unused
67
+ result = common.get_instance_type_for_cpus_mem_impl(_df, cpus, memory,
68
+ region, zone)
69
+ return result
70
+
71
+
72
+ def get_accelerators_from_instance_type(
73
+ instance_type: str) -> Optional[Dict[str, int]]:
74
+ # Filter the dataframe for the specific instance type
75
+ df_filtered = _df[_df['InstanceType'] == instance_type]
76
+ if df_filtered.empty:
77
+ return None
78
+
79
+ # Get the first row (all rows for same instance
80
+ # type should have same accelerator info)
81
+ row = df_filtered.iloc[0]
82
+ acc_name = row['AcceleratorName']
83
+ acc_count = row['AcceleratorCount']
84
+
85
+ # Check if the instance has accelerators
86
+ if pd.isna(acc_name) or pd.isna(
87
+ acc_count) or acc_name == '' or acc_count == '':
88
+ return None
89
+
90
+ # Convert accelerator count to int/float
91
+ try:
92
+ if int(acc_count) == acc_count:
93
+ acc_count = int(acc_count)
94
+ else:
95
+ acc_count = float(acc_count)
96
+ except (ValueError, TypeError):
97
+ return None
98
+
99
+ result = {acc_name: acc_count}
100
+ return result
101
+
102
+
103
+ def get_instance_type_for_accelerator(
104
+ acc_name: str,
105
+ acc_count: int,
106
+ cpus: Optional[str] = None,
107
+ memory: Optional[str] = None,
108
+ use_spot: bool = False,
109
+ region: Optional[str] = None,
110
+ zone: Optional[str] = None) -> Tuple[Optional[List[str]], List[str]]:
111
+ """Returns a list of instance types satisfying
112
+ the required count of accelerators."""
113
+ if zone is not None:
114
+ with ux_utils.print_exception_no_traceback():
115
+ raise ValueError('Seeweb does not support zones.')
116
+
117
+ result = common.get_instance_type_for_accelerator_impl(df=_df,
118
+ acc_name=acc_name,
119
+ acc_count=acc_count,
120
+ cpus=cpus,
121
+ memory=memory,
122
+ use_spot=use_spot,
123
+ region=region,
124
+ zone=zone)
125
+ return result
126
+
127
+
128
+ def regions() -> List['cloud.Region']:
129
+ result = common.get_region_zones(_df, use_spot=False)
130
+ return result
131
+
132
+
133
+ def get_region_zones_for_instance_type(instance_type: str,
134
+ use_spot: bool = False
135
+ ) -> List['cloud.Region']:
136
+ """Returns a list of regions for a given instance type."""
137
+ # Filter the dataframe for the specific instance type
138
+ df_filtered = _df[_df['InstanceType'] == instance_type]
139
+ if df_filtered.empty:
140
+ return []
141
+
142
+ # Use common.get_region_zones() like all other providers
143
+ region_list = common.get_region_zones(df_filtered, use_spot)
144
+
145
+ # Default region: Frosinone (it-fr2)
146
+ # Other regions: Milano (it-mi2), Lugano (ch-lug1), Bulgaria (bg-sof1)
147
+ priority_regions = ['it-fr2']
148
+ prioritized_regions = []
149
+ other_regions = []
150
+
151
+ # First, add regions in priority order if they exist
152
+ for priority_region in priority_regions:
153
+ for region in region_list:
154
+ if region.name == priority_region:
155
+ prioritized_regions.append(region)
156
+ break
157
+
158
+ # Then, add any remaining regions that weren't in the priority list
159
+ for region in region_list:
160
+ if region.name not in priority_regions:
161
+ other_regions.append(region)
162
+
163
+ result = prioritized_regions + other_regions
164
+ return result
165
+
166
+
167
+ def list_accelerators(
168
+ gpus_only: bool,
169
+ name_filter: Optional[str],
170
+ region_filter: Optional[str],
171
+ quantity_filter: Optional[int],
172
+ case_sensitive: bool = True,
173
+ all_regions: bool = False,
174
+ require_price: bool = True) -> Dict[str, List[common.InstanceTypeInfo]]:
175
+ """Lists accelerators offered in Seeweb."""
176
+ # Filter out rows with empty or null regions (indicating unavailability)
177
+ df_filtered = _df.dropna(subset=['Region'])
178
+ df_filtered = df_filtered[df_filtered['Region'].str.strip() != '']
179
+
180
+ result = common.list_accelerators_impl('Seeweb', df_filtered, gpus_only,
181
+ name_filter, region_filter,
182
+ quantity_filter, case_sensitive,
183
+ all_regions, require_price)
184
+ return result
@@ -0,0 +1,165 @@
1
+ """ Shadeform | Catalog
2
+
3
+ This module loads pricing and instance information from the Shadeform API
4
+ and can be used to query instance types and pricing information for Shadeform.
5
+ """
6
+
7
+ import typing
8
+ from typing import Dict, List, Optional, Tuple, Union
9
+
10
+ import pandas as pd
11
+
12
+ from sky.catalog import common
13
+
14
+ if typing.TYPE_CHECKING:
15
+ from sky.clouds import cloud
16
+
17
+ # We'll use dynamic fetching, so no static CSV file to load
18
+ _df = None
19
+
20
+
21
+ def _get_df():
22
+ """Get the dataframe, fetching from API if needed."""
23
+ global _df
24
+ if _df is None:
25
+ # For now, we'll fall back to a minimal static catalog
26
+ # In a full implementation, this would call the Shadeform API
27
+ # to dynamically fetch the latest instance types and pricing
28
+ try:
29
+ df = common.read_catalog('shadeform/vms.csv')
30
+ except FileNotFoundError:
31
+ # If no static catalog exists, create an empty one
32
+ # This would be replaced with dynamic API fetching
33
+ _df = pd.DataFrame(columns=[
34
+ 'InstanceType', 'AcceleratorName', 'AcceleratorCount', 'vCPUs',
35
+ 'MemoryGiB', 'Price', 'Region', 'GpuInfo', 'SpotPrice'
36
+ ])
37
+ else:
38
+ df = df[df['InstanceType'].notna()]
39
+ if 'AcceleratorName' in df.columns:
40
+ df = df[df['AcceleratorName'].notna()]
41
+ df = df.assign(AcceleratorName=df['AcceleratorName'].astype(
42
+ str).str.strip())
43
+ _df = df.reset_index(drop=True)
44
+ return _df
45
+
46
+
47
+ def _is_not_found_error(err: ValueError) -> bool:
48
+ msg = str(err).lower()
49
+ return 'not found' in msg or 'not supported' in msg
50
+
51
+
52
+ def _call_or_default(func, default):
53
+ try:
54
+ return func()
55
+ except ValueError as err:
56
+ if _is_not_found_error(err):
57
+ return default
58
+ raise
59
+
60
+
61
+ def instance_type_exists(instance_type: str) -> bool:
62
+ """Check if an instance type exists."""
63
+ return common.instance_type_exists_impl(_get_df(), instance_type)
64
+
65
+
66
+ def validate_region_zone(
67
+ region: Optional[str],
68
+ zone: Optional[str]) -> Tuple[Optional[str], Optional[str]]:
69
+ """Validate region and zone for Shadeform."""
70
+ return common.validate_region_zone_impl('shadeform', _get_df(), region,
71
+ zone)
72
+
73
+
74
+ def get_hourly_cost(instance_type: str,
75
+ use_spot: bool = False,
76
+ region: Optional[str] = None,
77
+ zone: Optional[str] = None) -> float:
78
+ """Returns the cost, or the cheapest cost among all zones for spot."""
79
+ # Shadeform doesn't support spot instances currently
80
+ if use_spot:
81
+ raise ValueError('Spot instances are not supported on Shadeform')
82
+
83
+ return common.get_hourly_cost_impl(_get_df(), instance_type, use_spot,
84
+ region, zone)
85
+
86
+
87
+ def get_vcpus_mem_from_instance_type(
88
+ instance_type: str) -> Tuple[Optional[float], Optional[float]]:
89
+ """Get vCPUs and memory from instance type."""
90
+ return _call_or_default(
91
+ lambda: common.get_vcpus_mem_from_instance_type_impl(
92
+ _get_df(), instance_type), (None, None))
93
+
94
+
95
+ def get_default_instance_type(cpus: Optional[str] = None,
96
+ memory: Optional[str] = None,
97
+ disk_tier: Optional[str] = None,
98
+ region: Optional[str] = None,
99
+ zone: Optional[str] = None) -> Optional[str]:
100
+ """Get default instance type based on requirements."""
101
+ del disk_tier # Shadeform doesn't support custom disk tiers yet
102
+ return _call_or_default(
103
+ lambda: common.get_instance_type_for_cpus_mem_impl(
104
+ _get_df(), cpus, memory, region, zone), None)
105
+
106
+
107
+ def get_accelerators_from_instance_type(
108
+ instance_type: str) -> Optional[Dict[str, Union[int, float]]]:
109
+ """Get accelerator information from instance type."""
110
+ return _call_or_default(
111
+ lambda: common.get_accelerators_from_instance_type_impl(
112
+ _get_df(), instance_type), None)
113
+
114
+
115
+ def get_instance_type_for_accelerator(
116
+ acc_name: str,
117
+ acc_count: int,
118
+ cpus: Optional[str] = None,
119
+ memory: Optional[str] = None,
120
+ use_spot: bool = False,
121
+ region: Optional[str] = None,
122
+ zone: Optional[str] = None) -> Tuple[Optional[List[str]], List[str]]:
123
+ """Returns a list of instance types that have the given accelerator."""
124
+ if use_spot:
125
+ # Return empty lists since spot is not supported
126
+ return None, ['Spot instances are not supported on Shadeform']
127
+
128
+ return _call_or_default(
129
+ lambda: common.get_instance_type_for_accelerator_impl(
130
+ df=_get_df(),
131
+ acc_name=acc_name,
132
+ acc_count=acc_count,
133
+ cpus=cpus,
134
+ memory=memory,
135
+ use_spot=use_spot,
136
+ region=region,
137
+ zone=zone), (None, []))
138
+
139
+
140
+ def get_region_zones_for_instance_type(instance_type: str,
141
+ use_spot: bool) -> List['cloud.Region']:
142
+ """Get regions and zones for an instance type."""
143
+ if use_spot:
144
+ return [] # No spot support
145
+
146
+ df = _get_df()
147
+ df_filtered = df[df['InstanceType'] == instance_type]
148
+ return _call_or_default(
149
+ lambda: common.get_region_zones(df_filtered, use_spot), [])
150
+
151
+
152
+ def list_accelerators(
153
+ gpus_only: bool,
154
+ name_filter: Optional[str],
155
+ region_filter: Optional[str],
156
+ quantity_filter: Optional[int],
157
+ case_sensitive: bool = True,
158
+ all_regions: bool = False,
159
+ require_price: bool = True) -> Dict[str, List[common.InstanceTypeInfo]]:
160
+ """Returns all instance types in Shadeform offering GPUs."""
161
+ del require_price # Unused.
162
+ return common.list_accelerators_impl('Shadeform', _get_df(), gpus_only,
163
+ name_filter, region_filter,
164
+ quantity_filter, case_sensitive,
165
+ all_regions)
@@ -0,0 +1,167 @@
1
+ """SSH Catalog.
2
+
3
+ This catalog inherits from the Kubernetes catalog as SSH cloud is a wrapper
4
+ around Kubernetes that uses SSH-specific contexts.
5
+ """
6
+ import typing
7
+ from typing import Dict, List, Optional, Tuple
8
+
9
+ from sky import sky_logging
10
+ from sky.catalog import CloudFilter
11
+ from sky.catalog import common
12
+ from sky.catalog import kubernetes_catalog
13
+ from sky.clouds import ssh
14
+
15
+ logger = sky_logging.init_logger(__name__)
16
+
17
+ if typing.TYPE_CHECKING:
18
+ import pandas as pd
19
+ else:
20
+ from sky.adaptors import common as adaptors_common
21
+ pd = adaptors_common.LazyImport('pandas')
22
+
23
+ _PULL_FREQUENCY_HOURS = 7
24
+
25
+ # Reuse the Kubernetes images catalog for SSH cloud.
26
+ # We keep pull_frequency_hours so we can remotely update the default image paths
27
+ _image_df = common.read_catalog('kubernetes/images.csv',
28
+ pull_frequency_hours=_PULL_FREQUENCY_HOURS)
29
+
30
+
31
+ def get_image_id_from_tag(tag: str, region: Optional[str]) -> Optional[str]:
32
+ """Returns the image id from the tag.
33
+
34
+ Delegates to Kubernetes catalog implementation.
35
+ """
36
+ return kubernetes_catalog.get_image_id_from_tag(tag, region)
37
+
38
+
39
+ def is_image_tag_valid(tag: str, region: Optional[str]) -> bool:
40
+ """Returns whether the image tag is valid.
41
+
42
+ Delegates to Kubernetes catalog implementation.
43
+ """
44
+ return kubernetes_catalog.is_image_tag_valid(tag, region)
45
+
46
+
47
+ def list_accelerators(
48
+ gpus_only: bool,
49
+ name_filter: Optional[str],
50
+ region_filter: Optional[str],
51
+ quantity_filter: Optional[int],
52
+ case_sensitive: bool = True,
53
+ all_regions: bool = False,
54
+ require_price: bool = True) -> Dict[str, List[common.InstanceTypeInfo]]:
55
+ """List accelerators in SSH-based Kubernetes clusters.
56
+
57
+ Delegates to the Kubernetes _list_accelerators function but restricts to
58
+ SSH contexts.
59
+ """
60
+ return _list_accelerators(gpus_only,
61
+ name_filter,
62
+ region_filter,
63
+ quantity_filter,
64
+ case_sensitive,
65
+ all_regions,
66
+ require_price,
67
+ realtime=False)[0]
68
+
69
+
70
+ def list_accelerators_realtime(
71
+ gpus_only: bool,
72
+ name_filter: Optional[str],
73
+ region_filter: Optional[str],
74
+ quantity_filter: Optional[int],
75
+ case_sensitive: bool = True,
76
+ all_regions: bool = False,
77
+ require_price: bool = True
78
+ ) -> Tuple[Dict[str, List[common.InstanceTypeInfo]], Dict[str, int], Dict[str,
79
+ int]]:
80
+ """List accelerators in SSH Node Pools with real-time information.
81
+
82
+ Delegates to the Kubernetes _list_accelerators function but restricts to
83
+ SSH contexts.
84
+ """
85
+ return _list_accelerators(gpus_only,
86
+ name_filter,
87
+ region_filter,
88
+ quantity_filter,
89
+ case_sensitive,
90
+ all_regions,
91
+ require_price,
92
+ realtime=True)
93
+
94
+
95
+ def _list_accelerators(
96
+ gpus_only: bool,
97
+ name_filter: Optional[str],
98
+ region_filter: Optional[str],
99
+ quantity_filter: Optional[int],
100
+ case_sensitive: bool = True,
101
+ all_regions: bool = False,
102
+ require_price: bool = True,
103
+ realtime: bool = False
104
+ ) -> Tuple[Dict[str, List[common.InstanceTypeInfo]], Dict[str, int], Dict[str,
105
+ int]]:
106
+ """List accelerators in SSH-based Kubernetes clusters.
107
+
108
+ This is a wrapper around the Kubernetes _list_accelerators function that
109
+ restricts the contexts to SSH-specific contexts only.
110
+
111
+ If region_filter is specified and it's not an SSH context, no results will
112
+ be returned.
113
+ """
114
+ # If a specific region is requested, ensure it's an SSH context
115
+ if region_filter is not None and not region_filter.startswith('ssh-'):
116
+ return {}, {}, {}
117
+
118
+ # Get SSH contexts
119
+ ssh_contexts = ssh.SSH.existing_allowed_contexts()
120
+
121
+ # If no contexts found, return empty results
122
+ if not ssh_contexts:
123
+ return {}, {}, {}
124
+
125
+ # If a region filter is specified and it's not a SSH context return empty
126
+ # results
127
+ if region_filter is not None and region_filter not in ssh_contexts:
128
+ return {}, {}, {}
129
+
130
+ # If region_filter is None, use the first context if all_regions is False
131
+ if region_filter is None and not all_regions and ssh_contexts:
132
+ # Use the first SSH context if no specific region requested
133
+ region_filter = ssh_contexts[0]
134
+
135
+ # Call the Kubernetes _list_accelerators with the appropriate region filter
136
+ if realtime:
137
+ return kubernetes_catalog.list_accelerators_realtime(
138
+ gpus_only, name_filter, region_filter, quantity_filter,
139
+ case_sensitive, all_regions, require_price)
140
+ else:
141
+ result = kubernetes_catalog.list_accelerators(
142
+ gpus_only, name_filter, region_filter, quantity_filter,
143
+ case_sensitive, all_regions, require_price)
144
+ return result, {}, {}
145
+
146
+
147
+ def validate_region_zone(
148
+ region_name: Optional[str],
149
+ zone_name: Optional[str],
150
+ clouds: CloudFilter = None) -> Tuple[Optional[str], Optional[str]]:
151
+ """Validates the region and zone for SSH cloud.
152
+
153
+ Delegates to the Kubernetes catalog implementation but ensures
154
+ the region is a valid SSH context.
155
+ """
156
+ # Delegate to Kubernetes implementation
157
+ region, zone = kubernetes_catalog.validate_region_zone(
158
+ region_name, zone_name, clouds)
159
+
160
+ # Get SSH contexts
161
+ ssh_contexts = ssh.SSH.existing_allowed_contexts()
162
+
163
+ # If a region is specified, ensure it's in the list of SSH contexts
164
+ if region is not None and region not in ssh_contexts:
165
+ return None, None
166
+
167
+ return region, zone
@@ -7,7 +7,7 @@ query instance types and pricing information for Vast.ai.
7
7
  import typing
8
8
  from typing import Dict, List, Optional, Tuple, Union
9
9
 
10
- from sky.clouds.service_catalog import common
10
+ from sky.catalog import common
11
11
  from sky.utils import ux_utils
12
12
 
13
13
  if typing.TYPE_CHECKING:
@@ -48,11 +48,14 @@ def get_vcpus_mem_from_instance_type(
48
48
 
49
49
  def get_default_instance_type(cpus: Optional[str] = None,
50
50
  memory: Optional[str] = None,
51
- disk_tier: Optional[str] = None) -> Optional[str]:
51
+ disk_tier: Optional[str] = None,
52
+ region: Optional[str] = None,
53
+ zone: Optional[str] = None) -> Optional[str]:
52
54
  del disk_tier
53
55
  # NOTE: After expanding catalog to multiple entries, you may
54
56
  # want to specify a default instance type or family.
55
- return common.get_instance_type_for_cpus_mem_impl(_df, cpus, memory)
57
+ return common.get_instance_type_for_cpus_mem_impl(_df, cpus, memory, region,
58
+ zone)
56
59
 
57
60
 
58
61
  def get_accelerators_from_instance_type(
@@ -5,7 +5,7 @@ import typing
5
5
  from typing import Dict, List, Optional, Tuple, Union
6
6
 
7
7
  from sky.adaptors import common as adaptors_common
8
- from sky.clouds.service_catalog import common
8
+ from sky.catalog import common
9
9
 
10
10
  if typing.TYPE_CHECKING:
11
11
  import pandas as pd
@@ -72,6 +72,8 @@ def get_default_instance_type(
72
72
  cpus: Optional[str] = None,
73
73
  memory: Optional[str] = None,
74
74
  disk_tier: Optional[str] = None,
75
+ region: Optional[str] = None,
76
+ zone: Optional[str] = None,
75
77
  ) -> Optional[str]:
76
78
  del disk_tier # unused
77
79
  if cpus is None and memory is None:
@@ -81,7 +83,8 @@ def get_default_instance_type(
81
83
  else:
82
84
  memory_gb_or_ratio = memory
83
85
  return common.get_instance_type_for_cpus_mem_impl(_get_df(), cpus,
84
- memory_gb_or_ratio)
86
+ memory_gb_or_ratio,
87
+ region, zone)
85
88
 
86
89
 
87
90
  def get_accelerators_from_instance_type(