skypilot-nightly 1.0.0.dev20250502__py3-none-any.whl → 1.0.0.dev20251203__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (546) hide show
  1. sky/__init__.py +22 -6
  2. sky/adaptors/aws.py +81 -16
  3. sky/adaptors/common.py +25 -2
  4. sky/adaptors/coreweave.py +278 -0
  5. sky/adaptors/do.py +8 -2
  6. sky/adaptors/gcp.py +11 -0
  7. sky/adaptors/hyperbolic.py +8 -0
  8. sky/adaptors/ibm.py +5 -2
  9. sky/adaptors/kubernetes.py +149 -18
  10. sky/adaptors/nebius.py +173 -30
  11. sky/adaptors/primeintellect.py +1 -0
  12. sky/adaptors/runpod.py +68 -0
  13. sky/adaptors/seeweb.py +183 -0
  14. sky/adaptors/shadeform.py +89 -0
  15. sky/admin_policy.py +187 -4
  16. sky/authentication.py +179 -225
  17. sky/backends/__init__.py +4 -2
  18. sky/backends/backend.py +22 -9
  19. sky/backends/backend_utils.py +1323 -397
  20. sky/backends/cloud_vm_ray_backend.py +1749 -1029
  21. sky/backends/docker_utils.py +1 -1
  22. sky/backends/local_docker_backend.py +11 -6
  23. sky/backends/task_codegen.py +633 -0
  24. sky/backends/wheel_utils.py +55 -9
  25. sky/{clouds/service_catalog → catalog}/__init__.py +21 -19
  26. sky/{clouds/service_catalog → catalog}/aws_catalog.py +27 -8
  27. sky/{clouds/service_catalog → catalog}/azure_catalog.py +10 -7
  28. sky/{clouds/service_catalog → catalog}/common.py +90 -49
  29. sky/{clouds/service_catalog → catalog}/cudo_catalog.py +8 -5
  30. sky/{clouds/service_catalog → catalog}/data_fetchers/analyze.py +1 -1
  31. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_aws.py +116 -80
  32. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_cudo.py +38 -38
  33. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_gcp.py +70 -16
  34. sky/catalog/data_fetchers/fetch_hyperbolic.py +136 -0
  35. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_lambda_cloud.py +1 -0
  36. sky/catalog/data_fetchers/fetch_nebius.py +338 -0
  37. sky/catalog/data_fetchers/fetch_runpod.py +698 -0
  38. sky/catalog/data_fetchers/fetch_seeweb.py +329 -0
  39. sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
  40. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vast.py +1 -1
  41. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vsphere.py +1 -1
  42. sky/{clouds/service_catalog → catalog}/do_catalog.py +5 -2
  43. sky/{clouds/service_catalog → catalog}/fluidstack_catalog.py +6 -3
  44. sky/{clouds/service_catalog → catalog}/gcp_catalog.py +41 -15
  45. sky/catalog/hyperbolic_catalog.py +136 -0
  46. sky/{clouds/service_catalog → catalog}/ibm_catalog.py +9 -6
  47. sky/{clouds/service_catalog → catalog}/kubernetes_catalog.py +36 -24
  48. sky/{clouds/service_catalog → catalog}/lambda_catalog.py +9 -6
  49. sky/{clouds/service_catalog → catalog}/nebius_catalog.py +9 -7
  50. sky/{clouds/service_catalog → catalog}/oci_catalog.py +9 -6
  51. sky/{clouds/service_catalog → catalog}/paperspace_catalog.py +5 -2
  52. sky/catalog/primeintellect_catalog.py +95 -0
  53. sky/{clouds/service_catalog → catalog}/runpod_catalog.py +11 -4
  54. sky/{clouds/service_catalog → catalog}/scp_catalog.py +9 -6
  55. sky/catalog/seeweb_catalog.py +184 -0
  56. sky/catalog/shadeform_catalog.py +165 -0
  57. sky/catalog/ssh_catalog.py +167 -0
  58. sky/{clouds/service_catalog → catalog}/vast_catalog.py +6 -3
  59. sky/{clouds/service_catalog → catalog}/vsphere_catalog.py +5 -2
  60. sky/check.py +533 -185
  61. sky/cli.py +5 -5975
  62. sky/client/{cli.py → cli/command.py} +2591 -1956
  63. sky/client/cli/deprecation_utils.py +99 -0
  64. sky/client/cli/flags.py +359 -0
  65. sky/client/cli/table_utils.py +322 -0
  66. sky/client/cli/utils.py +79 -0
  67. sky/client/common.py +78 -32
  68. sky/client/oauth.py +82 -0
  69. sky/client/sdk.py +1219 -319
  70. sky/client/sdk_async.py +827 -0
  71. sky/client/service_account_auth.py +47 -0
  72. sky/cloud_stores.py +82 -3
  73. sky/clouds/__init__.py +13 -0
  74. sky/clouds/aws.py +564 -164
  75. sky/clouds/azure.py +105 -83
  76. sky/clouds/cloud.py +140 -40
  77. sky/clouds/cudo.py +68 -50
  78. sky/clouds/do.py +66 -48
  79. sky/clouds/fluidstack.py +63 -44
  80. sky/clouds/gcp.py +339 -110
  81. sky/clouds/hyperbolic.py +293 -0
  82. sky/clouds/ibm.py +70 -49
  83. sky/clouds/kubernetes.py +570 -162
  84. sky/clouds/lambda_cloud.py +74 -54
  85. sky/clouds/nebius.py +210 -81
  86. sky/clouds/oci.py +88 -66
  87. sky/clouds/paperspace.py +61 -44
  88. sky/clouds/primeintellect.py +317 -0
  89. sky/clouds/runpod.py +164 -74
  90. sky/clouds/scp.py +89 -86
  91. sky/clouds/seeweb.py +477 -0
  92. sky/clouds/shadeform.py +400 -0
  93. sky/clouds/ssh.py +263 -0
  94. sky/clouds/utils/aws_utils.py +10 -4
  95. sky/clouds/utils/gcp_utils.py +87 -11
  96. sky/clouds/utils/oci_utils.py +38 -14
  97. sky/clouds/utils/scp_utils.py +231 -167
  98. sky/clouds/vast.py +99 -77
  99. sky/clouds/vsphere.py +51 -40
  100. sky/core.py +375 -173
  101. sky/dag.py +15 -0
  102. sky/dashboard/out/404.html +1 -1
  103. sky/dashboard/out/_next/static/96_E2yl3QAiIJGOYCkSpB/_buildManifest.js +1 -0
  104. sky/dashboard/out/_next/static/chunks/1141-e6aa9ab418717c59.js +11 -0
  105. sky/dashboard/out/_next/static/chunks/1272-1ef0bf0237faccdb.js +1 -0
  106. sky/dashboard/out/_next/static/chunks/1871-7e202677c42f43fe.js +6 -0
  107. sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +1 -0
  108. sky/dashboard/out/_next/static/chunks/2350.fab69e61bac57b23.js +1 -0
  109. sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +15 -0
  110. sky/dashboard/out/_next/static/chunks/2755.edd818326d489a1d.js +26 -0
  111. sky/dashboard/out/_next/static/chunks/3294.20a8540fe697d5ee.js +1 -0
  112. sky/dashboard/out/_next/static/chunks/3785.7e245f318f9d1121.js +1 -0
  113. sky/dashboard/out/_next/static/chunks/3800-7b45f9fbb6308557.js +1 -0
  114. sky/dashboard/out/_next/static/chunks/3850-ff4a9a69d978632b.js +1 -0
  115. sky/dashboard/out/_next/static/chunks/3937.210053269f121201.js +1 -0
  116. sky/dashboard/out/_next/static/chunks/4725.172ede95d1b21022.js +1 -0
  117. sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +15 -0
  118. sky/dashboard/out/_next/static/chunks/5739-d67458fcb1386c92.js +8 -0
  119. sky/dashboard/out/_next/static/chunks/6130-2be46d70a38f1e82.js +1 -0
  120. sky/dashboard/out/_next/static/chunks/616-3d59f75e2ccf9321.js +39 -0
  121. sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +13 -0
  122. sky/dashboard/out/_next/static/chunks/6856-8f27d1c10c98def8.js +1 -0
  123. sky/dashboard/out/_next/static/chunks/6989-01359c57e018caa4.js +1 -0
  124. sky/dashboard/out/_next/static/chunks/6990-9146207c4567fdfd.js +1 -0
  125. sky/dashboard/out/_next/static/chunks/7359-c8d04e06886000b3.js +30 -0
  126. sky/dashboard/out/_next/static/chunks/7411-b15471acd2cba716.js +41 -0
  127. sky/dashboard/out/_next/static/chunks/7615-019513abc55b3b47.js +1 -0
  128. sky/dashboard/out/_next/static/chunks/8640.5b9475a2d18c5416.js +16 -0
  129. sky/dashboard/out/_next/static/chunks/8969-452f9d5cbdd2dc73.js +1 -0
  130. sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +6 -0
  131. sky/dashboard/out/_next/static/chunks/9353-cff34f7e773b2e2b.js +1 -0
  132. sky/dashboard/out/_next/static/chunks/9360.a536cf6b1fa42355.js +31 -0
  133. sky/dashboard/out/_next/static/chunks/9847.3aaca6bb33455140.js +30 -0
  134. sky/dashboard/out/_next/static/chunks/fd9d1056-86323a29a8f7e46a.js +1 -0
  135. sky/dashboard/out/_next/static/chunks/framework-cf60a09ccd051a10.js +33 -0
  136. sky/dashboard/out/_next/static/chunks/main-app-587214043926b3cc.js +1 -0
  137. sky/dashboard/out/_next/static/chunks/main-f15ccb73239a3bf1.js +1 -0
  138. sky/dashboard/out/_next/static/chunks/pages/_app-bde01e4a2beec258.js +34 -0
  139. sky/dashboard/out/_next/static/chunks/pages/_error-c66a4e8afc46f17b.js +1 -0
  140. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-792db96d918c98c9.js +16 -0
  141. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-abfcac9c137aa543.js +1 -0
  142. sky/dashboard/out/_next/static/chunks/pages/clusters-ee39056f9851a3ff.js +1 -0
  143. sky/dashboard/out/_next/static/chunks/pages/config-dfb9bf07b13045f4.js +1 -0
  144. sky/dashboard/out/_next/static/chunks/pages/index-444f1804401f04ea.js +1 -0
  145. sky/dashboard/out/_next/static/chunks/pages/infra/[context]-c0b5935149902e6f.js +1 -0
  146. sky/dashboard/out/_next/static/chunks/pages/infra-aed0ea19df7cf961.js +1 -0
  147. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-d66997e2bfc837cf.js +16 -0
  148. sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-9faf940b253e3e06.js +21 -0
  149. sky/dashboard/out/_next/static/chunks/pages/jobs-2072b48b617989c9.js +1 -0
  150. sky/dashboard/out/_next/static/chunks/pages/users-f42674164aa73423.js +1 -0
  151. sky/dashboard/out/_next/static/chunks/pages/volumes-b84b948ff357c43e.js +1 -0
  152. sky/dashboard/out/_next/static/chunks/pages/workspace/new-3f88a1c7e86a3f86.js +1 -0
  153. sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-84a40f8c7c627fe4.js +1 -0
  154. sky/dashboard/out/_next/static/chunks/pages/workspaces-531b2f8c4bf89f82.js +1 -0
  155. sky/dashboard/out/_next/static/chunks/webpack-64e05f17bf2cf8ce.js +1 -0
  156. sky/dashboard/out/_next/static/css/0748ce22df867032.css +3 -0
  157. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  158. sky/dashboard/out/clusters/[cluster].html +1 -1
  159. sky/dashboard/out/clusters.html +1 -1
  160. sky/dashboard/out/config.html +1 -0
  161. sky/dashboard/out/index.html +1 -1
  162. sky/dashboard/out/infra/[context].html +1 -0
  163. sky/dashboard/out/infra.html +1 -0
  164. sky/dashboard/out/jobs/[job].html +1 -1
  165. sky/dashboard/out/jobs/pools/[pool].html +1 -0
  166. sky/dashboard/out/jobs.html +1 -1
  167. sky/dashboard/out/users.html +1 -0
  168. sky/dashboard/out/volumes.html +1 -0
  169. sky/dashboard/out/workspace/new.html +1 -0
  170. sky/dashboard/out/workspaces/[name].html +1 -0
  171. sky/dashboard/out/workspaces.html +1 -0
  172. sky/data/data_utils.py +137 -1
  173. sky/data/mounting_utils.py +269 -84
  174. sky/data/storage.py +1460 -1807
  175. sky/data/storage_utils.py +43 -57
  176. sky/exceptions.py +126 -2
  177. sky/execution.py +216 -63
  178. sky/global_user_state.py +2390 -586
  179. sky/jobs/__init__.py +7 -0
  180. sky/jobs/client/sdk.py +300 -58
  181. sky/jobs/client/sdk_async.py +161 -0
  182. sky/jobs/constants.py +15 -8
  183. sky/jobs/controller.py +848 -275
  184. sky/jobs/file_content_utils.py +128 -0
  185. sky/jobs/log_gc.py +193 -0
  186. sky/jobs/recovery_strategy.py +402 -152
  187. sky/jobs/scheduler.py +314 -189
  188. sky/jobs/server/core.py +836 -255
  189. sky/jobs/server/server.py +156 -115
  190. sky/jobs/server/utils.py +136 -0
  191. sky/jobs/state.py +2109 -706
  192. sky/jobs/utils.py +1306 -215
  193. sky/logs/__init__.py +21 -0
  194. sky/logs/agent.py +108 -0
  195. sky/logs/aws.py +243 -0
  196. sky/logs/gcp.py +91 -0
  197. sky/metrics/__init__.py +0 -0
  198. sky/metrics/utils.py +453 -0
  199. sky/models.py +78 -1
  200. sky/optimizer.py +164 -70
  201. sky/provision/__init__.py +90 -4
  202. sky/provision/aws/config.py +147 -26
  203. sky/provision/aws/instance.py +136 -50
  204. sky/provision/azure/instance.py +11 -6
  205. sky/provision/common.py +13 -1
  206. sky/provision/cudo/cudo_machine_type.py +1 -1
  207. sky/provision/cudo/cudo_utils.py +14 -8
  208. sky/provision/cudo/cudo_wrapper.py +72 -71
  209. sky/provision/cudo/instance.py +10 -6
  210. sky/provision/do/instance.py +10 -6
  211. sky/provision/do/utils.py +4 -3
  212. sky/provision/docker_utils.py +140 -33
  213. sky/provision/fluidstack/instance.py +13 -8
  214. sky/provision/gcp/__init__.py +1 -0
  215. sky/provision/gcp/config.py +301 -19
  216. sky/provision/gcp/constants.py +218 -0
  217. sky/provision/gcp/instance.py +36 -8
  218. sky/provision/gcp/instance_utils.py +18 -4
  219. sky/provision/gcp/volume_utils.py +247 -0
  220. sky/provision/hyperbolic/__init__.py +12 -0
  221. sky/provision/hyperbolic/config.py +10 -0
  222. sky/provision/hyperbolic/instance.py +437 -0
  223. sky/provision/hyperbolic/utils.py +373 -0
  224. sky/provision/instance_setup.py +101 -20
  225. sky/provision/kubernetes/__init__.py +5 -0
  226. sky/provision/kubernetes/config.py +9 -52
  227. sky/provision/kubernetes/constants.py +17 -0
  228. sky/provision/kubernetes/instance.py +919 -280
  229. sky/provision/kubernetes/manifests/fusermount-server-daemonset.yaml +1 -2
  230. sky/provision/kubernetes/network.py +27 -17
  231. sky/provision/kubernetes/network_utils.py +44 -43
  232. sky/provision/kubernetes/utils.py +1221 -534
  233. sky/provision/kubernetes/volume.py +343 -0
  234. sky/provision/lambda_cloud/instance.py +22 -16
  235. sky/provision/nebius/constants.py +50 -0
  236. sky/provision/nebius/instance.py +19 -6
  237. sky/provision/nebius/utils.py +237 -137
  238. sky/provision/oci/instance.py +10 -5
  239. sky/provision/paperspace/instance.py +10 -7
  240. sky/provision/paperspace/utils.py +1 -1
  241. sky/provision/primeintellect/__init__.py +10 -0
  242. sky/provision/primeintellect/config.py +11 -0
  243. sky/provision/primeintellect/instance.py +454 -0
  244. sky/provision/primeintellect/utils.py +398 -0
  245. sky/provision/provisioner.py +117 -36
  246. sky/provision/runpod/__init__.py +5 -0
  247. sky/provision/runpod/instance.py +27 -6
  248. sky/provision/runpod/utils.py +51 -18
  249. sky/provision/runpod/volume.py +214 -0
  250. sky/provision/scp/__init__.py +15 -0
  251. sky/provision/scp/config.py +93 -0
  252. sky/provision/scp/instance.py +707 -0
  253. sky/provision/seeweb/__init__.py +11 -0
  254. sky/provision/seeweb/config.py +13 -0
  255. sky/provision/seeweb/instance.py +812 -0
  256. sky/provision/shadeform/__init__.py +11 -0
  257. sky/provision/shadeform/config.py +12 -0
  258. sky/provision/shadeform/instance.py +351 -0
  259. sky/provision/shadeform/shadeform_utils.py +83 -0
  260. sky/provision/ssh/__init__.py +18 -0
  261. sky/provision/vast/instance.py +13 -8
  262. sky/provision/vast/utils.py +10 -7
  263. sky/provision/volume.py +164 -0
  264. sky/provision/vsphere/common/ssl_helper.py +1 -1
  265. sky/provision/vsphere/common/vapiconnect.py +2 -1
  266. sky/provision/vsphere/common/vim_utils.py +4 -4
  267. sky/provision/vsphere/instance.py +15 -10
  268. sky/provision/vsphere/vsphere_utils.py +17 -20
  269. sky/py.typed +0 -0
  270. sky/resources.py +845 -119
  271. sky/schemas/__init__.py +0 -0
  272. sky/schemas/api/__init__.py +0 -0
  273. sky/schemas/api/responses.py +227 -0
  274. sky/schemas/db/README +4 -0
  275. sky/schemas/db/env.py +90 -0
  276. sky/schemas/db/global_user_state/001_initial_schema.py +124 -0
  277. sky/schemas/db/global_user_state/002_add_workspace_to_cluster_history.py +35 -0
  278. sky/schemas/db/global_user_state/003_fix_initial_revision.py +61 -0
  279. sky/schemas/db/global_user_state/004_is_managed.py +34 -0
  280. sky/schemas/db/global_user_state/005_cluster_event.py +32 -0
  281. sky/schemas/db/global_user_state/006_provision_log.py +41 -0
  282. sky/schemas/db/global_user_state/007_cluster_event_request_id.py +34 -0
  283. sky/schemas/db/global_user_state/008_skylet_ssh_tunnel_metadata.py +34 -0
  284. sky/schemas/db/global_user_state/009_last_activity_and_launched_at.py +89 -0
  285. sky/schemas/db/global_user_state/010_save_ssh_key.py +66 -0
  286. sky/schemas/db/global_user_state/011_is_ephemeral.py +34 -0
  287. sky/schemas/db/kv_cache/001_initial_schema.py +29 -0
  288. sky/schemas/db/script.py.mako +28 -0
  289. sky/schemas/db/serve_state/001_initial_schema.py +67 -0
  290. sky/schemas/db/serve_state/002_yaml_content.py +34 -0
  291. sky/schemas/db/skypilot_config/001_initial_schema.py +30 -0
  292. sky/schemas/db/spot_jobs/001_initial_schema.py +97 -0
  293. sky/schemas/db/spot_jobs/002_cluster_pool.py +42 -0
  294. sky/schemas/db/spot_jobs/003_pool_hash.py +34 -0
  295. sky/schemas/db/spot_jobs/004_job_file_contents.py +42 -0
  296. sky/schemas/db/spot_jobs/005_logs_gc.py +38 -0
  297. sky/schemas/db/spot_jobs/006_controller_pid_started_at.py +34 -0
  298. sky/schemas/db/spot_jobs/007_config_file_content.py +34 -0
  299. sky/schemas/generated/__init__.py +0 -0
  300. sky/schemas/generated/autostopv1_pb2.py +36 -0
  301. sky/schemas/generated/autostopv1_pb2.pyi +43 -0
  302. sky/schemas/generated/autostopv1_pb2_grpc.py +146 -0
  303. sky/schemas/generated/jobsv1_pb2.py +86 -0
  304. sky/schemas/generated/jobsv1_pb2.pyi +254 -0
  305. sky/schemas/generated/jobsv1_pb2_grpc.py +542 -0
  306. sky/schemas/generated/managed_jobsv1_pb2.py +76 -0
  307. sky/schemas/generated/managed_jobsv1_pb2.pyi +278 -0
  308. sky/schemas/generated/managed_jobsv1_pb2_grpc.py +278 -0
  309. sky/schemas/generated/servev1_pb2.py +58 -0
  310. sky/schemas/generated/servev1_pb2.pyi +115 -0
  311. sky/schemas/generated/servev1_pb2_grpc.py +322 -0
  312. sky/serve/autoscalers.py +357 -5
  313. sky/serve/client/impl.py +310 -0
  314. sky/serve/client/sdk.py +47 -139
  315. sky/serve/client/sdk_async.py +130 -0
  316. sky/serve/constants.py +12 -9
  317. sky/serve/controller.py +68 -17
  318. sky/serve/load_balancer.py +106 -60
  319. sky/serve/load_balancing_policies.py +116 -2
  320. sky/serve/replica_managers.py +434 -249
  321. sky/serve/serve_rpc_utils.py +179 -0
  322. sky/serve/serve_state.py +569 -257
  323. sky/serve/serve_utils.py +775 -265
  324. sky/serve/server/core.py +66 -711
  325. sky/serve/server/impl.py +1093 -0
  326. sky/serve/server/server.py +21 -18
  327. sky/serve/service.py +192 -89
  328. sky/serve/service_spec.py +144 -20
  329. sky/serve/spot_placer.py +3 -0
  330. sky/server/auth/__init__.py +0 -0
  331. sky/server/auth/authn.py +50 -0
  332. sky/server/auth/loopback.py +38 -0
  333. sky/server/auth/oauth2_proxy.py +202 -0
  334. sky/server/common.py +478 -182
  335. sky/server/config.py +85 -23
  336. sky/server/constants.py +44 -6
  337. sky/server/daemons.py +295 -0
  338. sky/server/html/token_page.html +185 -0
  339. sky/server/metrics.py +160 -0
  340. sky/server/middleware_utils.py +166 -0
  341. sky/server/requests/executor.py +558 -138
  342. sky/server/requests/payloads.py +364 -24
  343. sky/server/requests/preconditions.py +21 -17
  344. sky/server/requests/process.py +112 -29
  345. sky/server/requests/request_names.py +121 -0
  346. sky/server/requests/requests.py +822 -226
  347. sky/server/requests/serializers/decoders.py +82 -31
  348. sky/server/requests/serializers/encoders.py +140 -22
  349. sky/server/requests/threads.py +117 -0
  350. sky/server/rest.py +455 -0
  351. sky/server/server.py +1309 -285
  352. sky/server/state.py +20 -0
  353. sky/server/stream_utils.py +327 -61
  354. sky/server/uvicorn.py +217 -3
  355. sky/server/versions.py +270 -0
  356. sky/setup_files/MANIFEST.in +11 -1
  357. sky/setup_files/alembic.ini +160 -0
  358. sky/setup_files/dependencies.py +139 -31
  359. sky/setup_files/setup.py +44 -42
  360. sky/sky_logging.py +114 -7
  361. sky/skylet/attempt_skylet.py +106 -24
  362. sky/skylet/autostop_lib.py +129 -8
  363. sky/skylet/configs.py +29 -20
  364. sky/skylet/constants.py +216 -25
  365. sky/skylet/events.py +101 -21
  366. sky/skylet/job_lib.py +345 -164
  367. sky/skylet/log_lib.py +297 -18
  368. sky/skylet/log_lib.pyi +44 -1
  369. sky/skylet/providers/ibm/node_provider.py +12 -8
  370. sky/skylet/providers/ibm/vpc_provider.py +13 -12
  371. sky/skylet/ray_patches/__init__.py +17 -3
  372. sky/skylet/ray_patches/autoscaler.py.diff +18 -0
  373. sky/skylet/ray_patches/cli.py.diff +19 -0
  374. sky/skylet/ray_patches/command_runner.py.diff +17 -0
  375. sky/skylet/ray_patches/log_monitor.py.diff +20 -0
  376. sky/skylet/ray_patches/resource_demand_scheduler.py.diff +32 -0
  377. sky/skylet/ray_patches/updater.py.diff +18 -0
  378. sky/skylet/ray_patches/worker.py.diff +41 -0
  379. sky/skylet/runtime_utils.py +21 -0
  380. sky/skylet/services.py +568 -0
  381. sky/skylet/skylet.py +72 -4
  382. sky/skylet/subprocess_daemon.py +104 -29
  383. sky/skypilot_config.py +506 -99
  384. sky/ssh_node_pools/__init__.py +1 -0
  385. sky/ssh_node_pools/core.py +135 -0
  386. sky/ssh_node_pools/server.py +233 -0
  387. sky/task.py +685 -163
  388. sky/templates/aws-ray.yml.j2 +11 -3
  389. sky/templates/azure-ray.yml.j2 +2 -1
  390. sky/templates/cudo-ray.yml.j2 +1 -0
  391. sky/templates/do-ray.yml.j2 +2 -1
  392. sky/templates/fluidstack-ray.yml.j2 +1 -0
  393. sky/templates/gcp-ray.yml.j2 +62 -1
  394. sky/templates/hyperbolic-ray.yml.j2 +68 -0
  395. sky/templates/ibm-ray.yml.j2 +2 -1
  396. sky/templates/jobs-controller.yaml.j2 +27 -24
  397. sky/templates/kubernetes-loadbalancer.yml.j2 +2 -0
  398. sky/templates/kubernetes-ray.yml.j2 +611 -50
  399. sky/templates/lambda-ray.yml.j2 +2 -1
  400. sky/templates/nebius-ray.yml.j2 +34 -12
  401. sky/templates/oci-ray.yml.j2 +1 -0
  402. sky/templates/paperspace-ray.yml.j2 +2 -1
  403. sky/templates/primeintellect-ray.yml.j2 +72 -0
  404. sky/templates/runpod-ray.yml.j2 +10 -1
  405. sky/templates/scp-ray.yml.j2 +4 -50
  406. sky/templates/seeweb-ray.yml.j2 +171 -0
  407. sky/templates/shadeform-ray.yml.j2 +73 -0
  408. sky/templates/sky-serve-controller.yaml.j2 +22 -2
  409. sky/templates/vast-ray.yml.j2 +1 -0
  410. sky/templates/vsphere-ray.yml.j2 +1 -0
  411. sky/templates/websocket_proxy.py +212 -37
  412. sky/usage/usage_lib.py +31 -15
  413. sky/users/__init__.py +0 -0
  414. sky/users/model.conf +15 -0
  415. sky/users/permission.py +397 -0
  416. sky/users/rbac.py +121 -0
  417. sky/users/server.py +720 -0
  418. sky/users/token_service.py +218 -0
  419. sky/utils/accelerator_registry.py +35 -5
  420. sky/utils/admin_policy_utils.py +84 -38
  421. sky/utils/annotations.py +38 -5
  422. sky/utils/asyncio_utils.py +78 -0
  423. sky/utils/atomic.py +1 -1
  424. sky/utils/auth_utils.py +153 -0
  425. sky/utils/benchmark_utils.py +60 -0
  426. sky/utils/cli_utils/status_utils.py +159 -86
  427. sky/utils/cluster_utils.py +31 -9
  428. sky/utils/command_runner.py +354 -68
  429. sky/utils/command_runner.pyi +93 -3
  430. sky/utils/common.py +35 -8
  431. sky/utils/common_utils.py +314 -91
  432. sky/utils/config_utils.py +74 -5
  433. sky/utils/context.py +403 -0
  434. sky/utils/context_utils.py +242 -0
  435. sky/utils/controller_utils.py +383 -89
  436. sky/utils/dag_utils.py +31 -12
  437. sky/utils/db/__init__.py +0 -0
  438. sky/utils/db/db_utils.py +485 -0
  439. sky/utils/db/kv_cache.py +149 -0
  440. sky/utils/db/migration_utils.py +137 -0
  441. sky/utils/directory_utils.py +12 -0
  442. sky/utils/env_options.py +13 -0
  443. sky/utils/git.py +567 -0
  444. sky/utils/git_clone.sh +460 -0
  445. sky/utils/infra_utils.py +195 -0
  446. sky/utils/kubernetes/cleanup-tunnel.sh +62 -0
  447. sky/utils/kubernetes/config_map_utils.py +133 -0
  448. sky/utils/kubernetes/create_cluster.sh +15 -29
  449. sky/utils/kubernetes/delete_cluster.sh +10 -7
  450. sky/utils/kubernetes/deploy_ssh_node_pools.py +1177 -0
  451. sky/utils/kubernetes/exec_kubeconfig_converter.py +22 -31
  452. sky/utils/kubernetes/generate_kind_config.py +6 -66
  453. sky/utils/kubernetes/generate_kubeconfig.sh +4 -1
  454. sky/utils/kubernetes/gpu_labeler.py +18 -8
  455. sky/utils/kubernetes/k8s_gpu_labeler_job.yaml +2 -1
  456. sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml +16 -16
  457. sky/utils/kubernetes/kubernetes_deploy_utils.py +284 -114
  458. sky/utils/kubernetes/rsync_helper.sh +11 -3
  459. sky/utils/kubernetes/ssh-tunnel.sh +379 -0
  460. sky/utils/kubernetes/ssh_utils.py +221 -0
  461. sky/utils/kubernetes_enums.py +8 -15
  462. sky/utils/lock_events.py +94 -0
  463. sky/utils/locks.py +416 -0
  464. sky/utils/log_utils.py +82 -107
  465. sky/utils/perf_utils.py +22 -0
  466. sky/utils/resource_checker.py +298 -0
  467. sky/utils/resources_utils.py +249 -32
  468. sky/utils/rich_utils.py +217 -39
  469. sky/utils/schemas.py +955 -160
  470. sky/utils/serialize_utils.py +16 -0
  471. sky/utils/status_lib.py +10 -0
  472. sky/utils/subprocess_utils.py +29 -15
  473. sky/utils/tempstore.py +70 -0
  474. sky/utils/thread_utils.py +91 -0
  475. sky/utils/timeline.py +26 -53
  476. sky/utils/ux_utils.py +84 -15
  477. sky/utils/validator.py +11 -1
  478. sky/utils/volume.py +165 -0
  479. sky/utils/yaml_utils.py +111 -0
  480. sky/volumes/__init__.py +13 -0
  481. sky/volumes/client/__init__.py +0 -0
  482. sky/volumes/client/sdk.py +150 -0
  483. sky/volumes/server/__init__.py +0 -0
  484. sky/volumes/server/core.py +270 -0
  485. sky/volumes/server/server.py +124 -0
  486. sky/volumes/volume.py +215 -0
  487. sky/workspaces/__init__.py +0 -0
  488. sky/workspaces/core.py +655 -0
  489. sky/workspaces/server.py +101 -0
  490. sky/workspaces/utils.py +56 -0
  491. sky_templates/README.md +3 -0
  492. sky_templates/__init__.py +3 -0
  493. sky_templates/ray/__init__.py +0 -0
  494. sky_templates/ray/start_cluster +183 -0
  495. sky_templates/ray/stop_cluster +75 -0
  496. skypilot_nightly-1.0.0.dev20251203.dist-info/METADATA +676 -0
  497. skypilot_nightly-1.0.0.dev20251203.dist-info/RECORD +611 -0
  498. {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/WHEEL +1 -1
  499. skypilot_nightly-1.0.0.dev20251203.dist-info/top_level.txt +2 -0
  500. sky/benchmark/benchmark_state.py +0 -256
  501. sky/benchmark/benchmark_utils.py +0 -641
  502. sky/clouds/service_catalog/constants.py +0 -7
  503. sky/dashboard/out/_next/static/GWvVBSCS7FmUiVmjaL1a7/_buildManifest.js +0 -1
  504. sky/dashboard/out/_next/static/chunks/236-2db3ee3fba33dd9e.js +0 -6
  505. sky/dashboard/out/_next/static/chunks/312-c3c8845990db8ffc.js +0 -15
  506. sky/dashboard/out/_next/static/chunks/37-0a572fe0dbb89c4d.js +0 -6
  507. sky/dashboard/out/_next/static/chunks/678-206dddca808e6d16.js +0 -59
  508. sky/dashboard/out/_next/static/chunks/845-9e60713e0c441abc.js +0 -1
  509. sky/dashboard/out/_next/static/chunks/979-7bf73a4c7cea0f5c.js +0 -1
  510. sky/dashboard/out/_next/static/chunks/fd9d1056-2821b0f0cabcd8bd.js +0 -1
  511. sky/dashboard/out/_next/static/chunks/framework-87d061ee6ed71b28.js +0 -33
  512. sky/dashboard/out/_next/static/chunks/main-app-241eb28595532291.js +0 -1
  513. sky/dashboard/out/_next/static/chunks/main-e0e2335212e72357.js +0 -1
  514. sky/dashboard/out/_next/static/chunks/pages/_app-e6b013bc3f77ad60.js +0 -1
  515. sky/dashboard/out/_next/static/chunks/pages/_error-1be831200e60c5c0.js +0 -1
  516. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-6ac338bc2239cb45.js +0 -1
  517. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-f383db7389368ea7.js +0 -1
  518. sky/dashboard/out/_next/static/chunks/pages/clusters-a93b93e10b8b074e.js +0 -1
  519. sky/dashboard/out/_next/static/chunks/pages/index-f9f039532ca8cbc4.js +0 -1
  520. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-1c519e1afc523dc9.js +0 -1
  521. sky/dashboard/out/_next/static/chunks/pages/jobs-a75029b67aab6a2e.js +0 -1
  522. sky/dashboard/out/_next/static/chunks/webpack-830f59b8404e96b8.js +0 -1
  523. sky/dashboard/out/_next/static/css/c6933bbb2ce7f4dd.css +0 -3
  524. sky/jobs/dashboard/dashboard.py +0 -223
  525. sky/jobs/dashboard/static/favicon.ico +0 -0
  526. sky/jobs/dashboard/templates/index.html +0 -831
  527. sky/jobs/server/dashboard_utils.py +0 -69
  528. sky/skylet/providers/scp/__init__.py +0 -2
  529. sky/skylet/providers/scp/config.py +0 -149
  530. sky/skylet/providers/scp/node_provider.py +0 -578
  531. sky/templates/kubernetes-ssh-jump.yml.j2 +0 -94
  532. sky/utils/db_utils.py +0 -100
  533. sky/utils/kubernetes/deploy_remote_cluster.sh +0 -308
  534. sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +0 -191
  535. skypilot_nightly-1.0.0.dev20250502.dist-info/METADATA +0 -361
  536. skypilot_nightly-1.0.0.dev20250502.dist-info/RECORD +0 -396
  537. skypilot_nightly-1.0.0.dev20250502.dist-info/top_level.txt +0 -1
  538. /sky/{clouds/service_catalog → catalog}/config.py +0 -0
  539. /sky/{benchmark → catalog/data_fetchers}/__init__.py +0 -0
  540. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_azure.py +0 -0
  541. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_fluidstack.py +0 -0
  542. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_ibm.py +0 -0
  543. /sky/{clouds/service_catalog/data_fetchers → client/cli}/__init__.py +0 -0
  544. /sky/dashboard/out/_next/static/{GWvVBSCS7FmUiVmjaL1a7 → 96_E2yl3QAiIJGOYCkSpB}/_ssgManifest.js +0 -0
  545. {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/entry_points.txt +0 -0
  546. {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,136 @@
1
+ """Script to fetch Hyperbolic instance data and generate catalog."""
2
+ import argparse
3
+ import csv
4
+ import json
5
+ import os
6
+ import sys
7
+ from typing import Any, Dict
8
+
9
+ import requests
10
+
11
+ ENDPOINT = 'https://api.hyperbolic.xyz/v2/skypilot/catalog'
12
+ API_KEY_PATH = os.path.expanduser('~/.hyperbolic/api_key')
13
+
14
+ REQUIRED_FIELDS = [
15
+ 'InstanceType', 'AcceleratorName', 'AcceleratorCount', 'vCPUs', 'MemoryGiB',
16
+ 'StorageGiB', 'Price', 'Region', 'GpuInfo', 'SpotPrice'
17
+ ]
18
+
19
+
20
+ class HyperbolicCatalogError(Exception):
21
+ """Base exception for Hyperbolic catalog errors."""
22
+ pass
23
+
24
+
25
+ def get_api_key(api_key=None) -> str:
26
+ """Get API key from arg, env var, or file."""
27
+ if api_key:
28
+ return api_key
29
+ if api_key := os.environ.get('HYPERBOLIC_API_KEY'):
30
+ return api_key
31
+ try:
32
+ with open(API_KEY_PATH, 'r', encoding='utf-8') as f:
33
+ return f.read().strip()
34
+ except FileNotFoundError as exc:
35
+ raise HyperbolicCatalogError(
36
+ 'No API key found. Please either:\n'
37
+ '1. Pass --api-key\n'
38
+ '2. Set HYPERBOLIC_API_KEY environment variable\n'
39
+ '3. Create ~/.hyperbolic/api_key file') from exc
40
+
41
+
42
+ def get_output_path() -> str:
43
+ """Get output path for catalog file."""
44
+ current_dir = os.getcwd()
45
+ if os.path.basename(current_dir) == 'hyperbolic':
46
+ return 'vms.csv'
47
+ hyperbolic_dir = os.path.join(current_dir, 'hyperbolic')
48
+ os.makedirs(hyperbolic_dir, exist_ok=True)
49
+ return os.path.join(hyperbolic_dir, 'vms.csv')
50
+
51
+
52
+ def validate_instance_data(instance: Dict[str, Any]) -> None:
53
+ """Validate instance data has all required fields."""
54
+ missing_fields = [
55
+ field for field in REQUIRED_FIELDS if field not in instance
56
+ ]
57
+ if missing_fields:
58
+ raise HyperbolicCatalogError(
59
+ f'Instance data missing required fields: {missing_fields}')
60
+
61
+
62
+ def create_catalog(api_key=None) -> None:
63
+ """Generate Hyperbolic catalog CSV file."""
64
+ try:
65
+ response = requests.get(
66
+ ENDPOINT,
67
+ headers={'Authorization': f'Bearer {get_api_key(api_key)}'},
68
+ timeout=30)
69
+ response.raise_for_status()
70
+
71
+ try:
72
+ data = response.json()
73
+ except json.JSONDecodeError as e:
74
+ raise HyperbolicCatalogError(
75
+ f'Invalid JSON response from API: {response.text}') from e
76
+
77
+ if 'vms' not in data:
78
+ raise HyperbolicCatalogError(
79
+ f'Missing "vms" field in API response: {data}')
80
+
81
+ instances = data['vms']
82
+ if not isinstance(instances, list):
83
+ raise HyperbolicCatalogError(
84
+ f'Expected list of instances, got {type(instances)}')
85
+
86
+ if not instances:
87
+ raise HyperbolicCatalogError('No instances found in API response')
88
+
89
+ # Validate each instance
90
+ for instance in instances:
91
+ validate_instance_data(instance)
92
+
93
+ except requests.exceptions.RequestException as e:
94
+ raise HyperbolicCatalogError(
95
+ f'Failed to fetch instance data: {e}') from e
96
+
97
+ output_path = get_output_path()
98
+ try:
99
+ with open(output_path, 'w', newline='', encoding='utf-8') as f:
100
+ writer = csv.DictWriter(f, fieldnames=REQUIRED_FIELDS)
101
+ writer.writeheader()
102
+
103
+ for instance in instances:
104
+ entry = instance.copy()
105
+ # Convert GpuInfo to string format
106
+ entry['GpuInfo'] = json.dumps(entry['GpuInfo'],
107
+ ensure_ascii=False).replace(
108
+ '"', "'") # pylint: disable=invalid-string-quote
109
+ writer.writerow(entry)
110
+ except (IOError, OSError) as e:
111
+ raise HyperbolicCatalogError(
112
+ f'Failed to write catalog file to {output_path}: {e}') from e
113
+
114
+
115
+ def main() -> int:
116
+ """Main entry point."""
117
+ parser = argparse.ArgumentParser(
118
+ description='Fetch Hyperbolic instance data')
119
+ parser.add_argument('--api-key', help='Hyperbolic API key')
120
+ args = parser.parse_args()
121
+
122
+ try:
123
+ create_catalog(args.api_key)
124
+ print(f'Hyperbolic Service Catalog saved to {get_output_path()}')
125
+ return 0
126
+ except HyperbolicCatalogError as e:
127
+ print(f'Error: {e}', file=sys.stderr)
128
+ return 1
129
+ except (requests.exceptions.RequestException, json.JSONDecodeError, IOError,
130
+ OSError) as e:
131
+ print(f'Unexpected error: {e}', file=sys.stderr)
132
+ return 1
133
+
134
+
135
+ if __name__ == '__main__':
136
+ sys.exit(main())
@@ -49,6 +49,7 @@ GPU_TO_MEMORY = {
49
49
  'V100': 16384,
50
50
  'H100': 81920,
51
51
  'GH200': 98304,
52
+ 'B200': 184320, # 180 GB
52
53
  'GENERAL': None
53
54
  }
54
55
 
@@ -0,0 +1,338 @@
1
+ """A script that queries Nebius API to get instance types and pricing info.
2
+
3
+ This script takes about 1 minute to finish.
4
+ """
5
+ import csv
6
+ from dataclasses import dataclass
7
+ import decimal
8
+ import json
9
+ import logging
10
+ import os
11
+ import re
12
+ from typing import Any, Dict, List, Optional
13
+
14
+ from sky.adaptors import nebius
15
+ from sky.adaptors.nebius import billing
16
+ from sky.adaptors.nebius import compute
17
+ from sky.adaptors.nebius import iam
18
+ from sky.adaptors.nebius import nebius_common
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+ TIMEOUT = 10
23
+ PARENT_ID_TEMPLATE = 'project-{}public-images'
24
+ ACCELERATOR_MANUFACTURER = 'NVIDIA'
25
+
26
+
27
+ @dataclass
28
+ class PresetInfo:
29
+ """Represents information about a specific compute preset,
30
+ including its pricing.
31
+
32
+ Attributes:
33
+ region (str): The geographical region where the preset is available.
34
+ fullname (str): The full name of the preset, a combination of platform
35
+ and preset name.
36
+ name (str): The name of the preset.
37
+ platform_name (str): The name of the platform the preset belongs to.
38
+ gpu (int): The number of GPUs in the preset.
39
+ vcpu (int): The number of virtual CPUs in the preset.
40
+ gpu_memory_gibibytes (int): size of gpu memory in GiB.
41
+ memory_gib (int): The amount of memory in GiB in the preset.
42
+ accelerator_manufacturer (str | None): The manufacturer of the
43
+ accelerator (e.g., "NVIDIA"), or None if no accelerator.
44
+ accelerator_name (str | None): The name of the accelerator
45
+ (e.g., "H100"), or None if no accelerator.
46
+ price_hourly (decimal.Decimal): The hourly price of the preset.
47
+ spot_price (decimal.Decimal): The spot (preemptible) price
48
+ of the preset.
49
+ """
50
+
51
+ region: str
52
+ fullname: str
53
+ name: str
54
+ platform_name: str
55
+ gpu: int
56
+ vcpu: int
57
+ gpu_memory_gibibytes: int
58
+ memory_gib: int
59
+ accelerator_manufacturer: Optional[str]
60
+ accelerator_name: Optional[str]
61
+ price_hourly: decimal.Decimal
62
+ spot_price: decimal.Decimal
63
+
64
+
65
+ def _format_decimal(value: decimal.Decimal) -> str:
66
+ """Formats a decimal value to a string with at least two decimal places,
67
+ removing trailing zeros and ensuring a two-digit decimal part.
68
+
69
+ Args:
70
+ value (decimal.Decimal): The decimal value to format.
71
+
72
+ Returns:
73
+ str: The formatted string representation of the decimal.
74
+ """
75
+ formatted_value = f'{value:f}'
76
+ integer_part, decimal_part = formatted_value.split(
77
+ '.') if '.' in formatted_value else (formatted_value, '')
78
+ if len(decimal_part) < 2:
79
+ decimal_part += '0' * (2 - len(decimal_part))
80
+
81
+ return f'{integer_part}.{decimal_part}'
82
+
83
+
84
+ def _estimate_platforms(platforms: List[Any], parent_id: str,
85
+ region: str) -> List[PresetInfo]:
86
+ """Collects specifications for all presets on the given platforms to form a
87
+ batch price request. It then sends the request and processes the responses
88
+ to create a list of PresetInfo objects.
89
+
90
+ Args:
91
+ platforms (List[Platform]): A List of compute platforms to estimate
92
+ prices for.
93
+ parent_id (str): The parent ID used for resource metadata
94
+ in the estimate request.
95
+ region (str): The region associated with the platforms.
96
+
97
+ Returns:
98
+ List[PresetInfo]: A list of PresetInfo objects containing details and
99
+ estimated prices for each preset.
100
+ """
101
+
102
+ calculator_service = billing().CalculatorServiceClient(nebius.sdk())
103
+ futures = []
104
+
105
+ for platform in platforms:
106
+ platform_name = platform.metadata.name
107
+
108
+ for preset in platform.spec.presets:
109
+ # Form the specification for the price request
110
+ estimate_spec = billing().ResourceSpec(
111
+ compute_instance_spec=compute().CreateInstanceRequest(
112
+ metadata=nebius_common().ResourceMetadata(
113
+ parent_id=parent_id,),
114
+ spec=compute().InstanceSpec(
115
+ resources=compute().ResourcesSpec(
116
+ platform=platform_name,
117
+ preset=preset.name,
118
+ )),
119
+ ))
120
+ price_request = billing().EstimateBatchRequest(
121
+ resource_specs=[estimate_spec])
122
+
123
+ # Form the specification for the spot price request
124
+ spot_estimate_spec = billing().ResourceSpec(
125
+ compute_instance_spec=compute().CreateInstanceRequest(
126
+ metadata=nebius_common().ResourceMetadata(
127
+ parent_id=parent_id,),
128
+ spec=compute().InstanceSpec(
129
+ resources=compute().ResourcesSpec(
130
+ platform=platform_name,
131
+ preset=preset.name,
132
+ ),
133
+ preemptible=compute().PreemptibleSpec(priority=1),
134
+ ),
135
+ ))
136
+ spot_price_request = billing().EstimateBatchRequest(
137
+ resource_specs=[spot_estimate_spec])
138
+
139
+ # Start future for each preset
140
+ futures.append((
141
+ platform,
142
+ preset,
143
+ calculator_service.estimate_batch(price_request,
144
+ timeout=TIMEOUT),
145
+ calculator_service.estimate_batch(spot_price_request,
146
+ timeout=TIMEOUT),
147
+ ))
148
+
149
+ # wait all futures to complete and collect results
150
+ result = []
151
+ for platform, preset, future, future_spot in futures:
152
+ platform_name = platform.metadata.name
153
+ result.append(
154
+ PresetInfo(
155
+ region=region,
156
+ fullname=f'{platform_name}_{preset.name}',
157
+ name=preset.name,
158
+ platform_name=platform_name,
159
+ gpu=preset.resources.gpu_count or 0,
160
+ vcpu=preset.resources.vcpu_count,
161
+ gpu_memory_gibibytes=platform.spec.gpu_memory_gibibytes,
162
+ memory_gib=preset.resources.memory_gibibytes,
163
+ accelerator_manufacturer=ACCELERATOR_MANUFACTURER
164
+ if platform_name.startswith('gpu-') else '',
165
+ accelerator_name=platform_name.split('-')[1].upper()
166
+ if platform_name.startswith('gpu-') else '',
167
+ price_hourly=decimal.Decimal(
168
+ future.wait().hourly_cost.general.total.cost),
169
+ spot_price=decimal.Decimal(
170
+ future_spot.wait().hourly_cost.general.total.cost),
171
+ ))
172
+
173
+ return result
174
+
175
+
176
+ def _write_preset_prices(presets: List[PresetInfo], output_file: str) -> None:
177
+ """Writes the provided preset information to a CSV file.
178
+
179
+ Args:
180
+ presets (List[PresetInfo]): A list of PresetInfo objects to write.
181
+ output_file (str): The path to the output CSV file.
182
+ """
183
+ os.makedirs(os.path.dirname(output_file))
184
+ # Set up the CSV writer to output to stdout
185
+ with open(output_file, 'w', encoding='utf-8') as out:
186
+ header = [
187
+ 'InstanceType',
188
+ 'AcceleratorName',
189
+ 'AcceleratorCount',
190
+ 'vCPUs',
191
+ 'MemoryGiB',
192
+ 'Price',
193
+ 'Region',
194
+ 'GpuInfo',
195
+ 'SpotPrice',
196
+ ]
197
+ writer = csv.DictWriter(out, fieldnames=header)
198
+ writer.writeheader()
199
+ # logger.info(presets)
200
+ for preset in sorted(presets,
201
+ key=lambda x:
202
+ (bool(x.gpu), x.region, x.platform_name, x.vcpu)):
203
+ gpu_info = ''
204
+ if preset.gpu > 0 and preset.accelerator_name:
205
+ vram = preset.gpu_memory_gibibytes * 1024
206
+ gpu_info_dict = {
207
+ 'Gpus': [{
208
+ 'Name': preset.accelerator_name,
209
+ 'Manufacturer': preset.accelerator_manufacturer,
210
+ 'Count': preset.gpu,
211
+ 'MemoryInfo': {
212
+ 'SizeInMiB': vram
213
+ },
214
+ }],
215
+ 'TotalGpuMemoryInMiB': vram * preset.gpu,
216
+ }
217
+ gpu_info = json.dumps(gpu_info_dict).replace('"', '\'')
218
+
219
+ writer.writerow({
220
+ 'InstanceType': preset.fullname,
221
+ 'AcceleratorName': preset.accelerator_name,
222
+ 'AcceleratorCount': preset.gpu,
223
+ 'vCPUs': preset.vcpu,
224
+ 'MemoryGiB': preset.memory_gib,
225
+ 'Price': _format_decimal(preset.price_hourly),
226
+ 'Region': preset.region,
227
+ 'GpuInfo': gpu_info,
228
+ 'SpotPrice': _format_decimal(preset.spot_price)
229
+ if preset.spot_price else '',
230
+ })
231
+
232
+
233
+ def _fetch_platforms_for_project(project_id: str) -> List[Any]:
234
+ """Fetches all available compute platforms for a given project.
235
+
236
+ Args:
237
+ project_id (str): The ID of the project to fetch platforms from.
238
+
239
+ Returns:
240
+ List[ComputePlatform]: A list of ComputePlatform objects available
241
+ in the project.
242
+ """
243
+ platform_service = compute().PlatformServiceClient(nebius.sdk())
244
+
245
+ platform_request = compute().ListPlatformsRequest(page_size=999,
246
+ parent_id=project_id)
247
+ platform_response = platform_service.list(platform_request,
248
+ timeout=TIMEOUT).wait()
249
+
250
+ return platform_response.items
251
+
252
+
253
+ def _get_regions_map() -> Dict[str, str]:
254
+ """Maps region codes to their full names by iterating through tenants and
255
+ projects.
256
+
257
+ Returns:
258
+ dict[str, str]: A dictionary where keys are region codes (e.g., "e00")
259
+ and values are full region names (e.g., "eu-north1").
260
+ """
261
+ result = {}
262
+ response = iam().TenantServiceClient(nebius.sdk()).list(
263
+ iam().ListTenantsRequest(), timeout=TIMEOUT).wait()
264
+
265
+ for tenant in response.items:
266
+ projects = (iam().ProjectServiceClient(nebius.sdk()).list(
267
+ iam().ListProjectsRequest(parent_id=tenant.metadata.id),
268
+ timeout=TIMEOUT).wait())
269
+
270
+ for project in projects.items:
271
+ match = re.match(r'^project-([a-z0-9]{3})', project.metadata.id)
272
+ if match is None:
273
+ logger.error('Could not parse project id %s',
274
+ project.metadata.id)
275
+ continue
276
+ result[match.group(1)] = project.status.region
277
+
278
+ return result
279
+
280
+
281
+ def _get_all_platform_prices() -> List[PresetInfo]:
282
+ """Orchestrates fetching specifications and prices for all platforms across
283
+ all regions.
284
+
285
+ This function first retrieves a map of region codes to full names, then
286
+ iterates through each region, fetches available platforms for
287
+ the corresponding project ID, and finally estimates prices for all presets
288
+ on those platforms.
289
+
290
+ Returns:
291
+ List[PresetInfo]: A consolidated list of PresetInfo objects for all
292
+ platforms and presets across all regions.
293
+ """
294
+
295
+ # Get regions codes to names
296
+ regions_map = _get_regions_map()
297
+
298
+ presets = []
299
+
300
+ for region_code in sorted(regions_map.keys()):
301
+ project_id = PARENT_ID_TEMPLATE.format(region_code)
302
+ region = regions_map[region_code]
303
+ logger.info('Processing region: %s (project: %s)...', region,
304
+ project_id)
305
+
306
+ platforms = _fetch_platforms_for_project(project_id)
307
+ if not platforms:
308
+ logger.warning('No platforms found in region %s', region)
309
+ continue
310
+
311
+ presets.extend(
312
+ _estimate_platforms(platforms=platforms,
313
+ parent_id=project_id,
314
+ region=region))
315
+
316
+ return presets
317
+
318
+
319
+ def main() -> None:
320
+ """Main function to fetch and write Nebius platform prices to a CSV file.
321
+
322
+ It initializes the SDK, fetches all platform prices, and then writes them
323
+ to the specified CSV file.
324
+ """
325
+
326
+ output_file = 'nebius/vms.csv'
327
+
328
+ # Fetch presets and estimate
329
+ presets = _get_all_platform_prices()
330
+
331
+ # Write CSV
332
+ _write_preset_prices(presets, output_file)
333
+
334
+ logger.info('Done!')
335
+
336
+
337
+ if __name__ == '__main__':
338
+ main()