skypilot-nightly 1.0.0.dev20250905__py3-none-any.whl → 1.0.0.dev20251203__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (397) hide show
  1. sky/__init__.py +10 -2
  2. sky/adaptors/aws.py +81 -16
  3. sky/adaptors/common.py +25 -2
  4. sky/adaptors/coreweave.py +278 -0
  5. sky/adaptors/do.py +8 -2
  6. sky/adaptors/gcp.py +11 -0
  7. sky/adaptors/ibm.py +5 -2
  8. sky/adaptors/kubernetes.py +64 -0
  9. sky/adaptors/nebius.py +3 -1
  10. sky/adaptors/primeintellect.py +1 -0
  11. sky/adaptors/seeweb.py +183 -0
  12. sky/adaptors/shadeform.py +89 -0
  13. sky/admin_policy.py +20 -0
  14. sky/authentication.py +157 -263
  15. sky/backends/__init__.py +3 -2
  16. sky/backends/backend.py +11 -3
  17. sky/backends/backend_utils.py +588 -184
  18. sky/backends/cloud_vm_ray_backend.py +1088 -904
  19. sky/backends/local_docker_backend.py +9 -5
  20. sky/backends/task_codegen.py +633 -0
  21. sky/backends/wheel_utils.py +18 -0
  22. sky/catalog/__init__.py +8 -0
  23. sky/catalog/aws_catalog.py +4 -0
  24. sky/catalog/common.py +19 -1
  25. sky/catalog/data_fetchers/fetch_aws.py +102 -80
  26. sky/catalog/data_fetchers/fetch_gcp.py +30 -3
  27. sky/catalog/data_fetchers/fetch_nebius.py +9 -6
  28. sky/catalog/data_fetchers/fetch_runpod.py +698 -0
  29. sky/catalog/data_fetchers/fetch_seeweb.py +329 -0
  30. sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
  31. sky/catalog/kubernetes_catalog.py +24 -28
  32. sky/catalog/primeintellect_catalog.py +95 -0
  33. sky/catalog/runpod_catalog.py +5 -1
  34. sky/catalog/seeweb_catalog.py +184 -0
  35. sky/catalog/shadeform_catalog.py +165 -0
  36. sky/check.py +73 -43
  37. sky/client/cli/command.py +675 -412
  38. sky/client/cli/flags.py +4 -2
  39. sky/{volumes/utils.py → client/cli/table_utils.py} +111 -13
  40. sky/client/cli/utils.py +79 -0
  41. sky/client/common.py +12 -2
  42. sky/client/sdk.py +132 -63
  43. sky/client/sdk_async.py +34 -33
  44. sky/cloud_stores.py +82 -3
  45. sky/clouds/__init__.py +6 -0
  46. sky/clouds/aws.py +337 -129
  47. sky/clouds/azure.py +24 -18
  48. sky/clouds/cloud.py +40 -13
  49. sky/clouds/cudo.py +16 -13
  50. sky/clouds/do.py +9 -7
  51. sky/clouds/fluidstack.py +12 -5
  52. sky/clouds/gcp.py +14 -7
  53. sky/clouds/hyperbolic.py +12 -5
  54. sky/clouds/ibm.py +12 -5
  55. sky/clouds/kubernetes.py +80 -45
  56. sky/clouds/lambda_cloud.py +12 -5
  57. sky/clouds/nebius.py +23 -9
  58. sky/clouds/oci.py +19 -12
  59. sky/clouds/paperspace.py +4 -1
  60. sky/clouds/primeintellect.py +317 -0
  61. sky/clouds/runpod.py +85 -24
  62. sky/clouds/scp.py +12 -8
  63. sky/clouds/seeweb.py +477 -0
  64. sky/clouds/shadeform.py +400 -0
  65. sky/clouds/ssh.py +4 -2
  66. sky/clouds/utils/scp_utils.py +61 -50
  67. sky/clouds/vast.py +33 -27
  68. sky/clouds/vsphere.py +14 -16
  69. sky/core.py +174 -165
  70. sky/dashboard/out/404.html +1 -1
  71. sky/dashboard/out/_next/static/96_E2yl3QAiIJGOYCkSpB/_buildManifest.js +1 -0
  72. sky/dashboard/out/_next/static/chunks/1141-e6aa9ab418717c59.js +11 -0
  73. sky/dashboard/out/_next/static/chunks/1871-7e202677c42f43fe.js +6 -0
  74. sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +1 -0
  75. sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +15 -0
  76. sky/dashboard/out/_next/static/chunks/2755.edd818326d489a1d.js +26 -0
  77. sky/dashboard/out/_next/static/chunks/3294.20a8540fe697d5ee.js +1 -0
  78. sky/dashboard/out/_next/static/chunks/3785.7e245f318f9d1121.js +1 -0
  79. sky/dashboard/out/_next/static/chunks/{6601-06114c982db410b6.js → 3800-7b45f9fbb6308557.js} +1 -1
  80. sky/dashboard/out/_next/static/chunks/4725.172ede95d1b21022.js +1 -0
  81. sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +15 -0
  82. sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +13 -0
  83. sky/dashboard/out/_next/static/chunks/6856-8f27d1c10c98def8.js +1 -0
  84. sky/dashboard/out/_next/static/chunks/6990-9146207c4567fdfd.js +1 -0
  85. sky/dashboard/out/_next/static/chunks/7359-c8d04e06886000b3.js +30 -0
  86. sky/dashboard/out/_next/static/chunks/7615-019513abc55b3b47.js +1 -0
  87. sky/dashboard/out/_next/static/chunks/8640.5b9475a2d18c5416.js +16 -0
  88. sky/dashboard/out/_next/static/chunks/8969-452f9d5cbdd2dc73.js +1 -0
  89. sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +6 -0
  90. sky/dashboard/out/_next/static/chunks/9353-cff34f7e773b2e2b.js +1 -0
  91. sky/dashboard/out/_next/static/chunks/9360.a536cf6b1fa42355.js +31 -0
  92. sky/dashboard/out/_next/static/chunks/9847.3aaca6bb33455140.js +30 -0
  93. sky/dashboard/out/_next/static/chunks/pages/{_app-ce361c6959bc2001.js → _app-bde01e4a2beec258.js} +1 -1
  94. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-792db96d918c98c9.js +16 -0
  95. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-abfcac9c137aa543.js +1 -0
  96. sky/dashboard/out/_next/static/chunks/pages/clusters-ee39056f9851a3ff.js +1 -0
  97. sky/dashboard/out/_next/static/chunks/pages/infra/{[context]-6563820e094f68ca.js → [context]-c0b5935149902e6f.js} +1 -1
  98. sky/dashboard/out/_next/static/chunks/pages/{infra-aabba60d57826e0f.js → infra-aed0ea19df7cf961.js} +1 -1
  99. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-d66997e2bfc837cf.js +16 -0
  100. sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-9faf940b253e3e06.js +21 -0
  101. sky/dashboard/out/_next/static/chunks/pages/jobs-2072b48b617989c9.js +1 -0
  102. sky/dashboard/out/_next/static/chunks/pages/users-f42674164aa73423.js +1 -0
  103. sky/dashboard/out/_next/static/chunks/pages/volumes-b84b948ff357c43e.js +1 -0
  104. sky/dashboard/out/_next/static/chunks/pages/workspaces/{[name]-af76bb06dbb3954f.js → [name]-84a40f8c7c627fe4.js} +1 -1
  105. sky/dashboard/out/_next/static/chunks/pages/{workspaces-7598c33a746cdc91.js → workspaces-531b2f8c4bf89f82.js} +1 -1
  106. sky/dashboard/out/_next/static/chunks/webpack-64e05f17bf2cf8ce.js +1 -0
  107. sky/dashboard/out/_next/static/css/0748ce22df867032.css +3 -0
  108. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  109. sky/dashboard/out/clusters/[cluster].html +1 -1
  110. sky/dashboard/out/clusters.html +1 -1
  111. sky/dashboard/out/config.html +1 -1
  112. sky/dashboard/out/index.html +1 -1
  113. sky/dashboard/out/infra/[context].html +1 -1
  114. sky/dashboard/out/infra.html +1 -1
  115. sky/dashboard/out/jobs/[job].html +1 -1
  116. sky/dashboard/out/jobs/pools/[pool].html +1 -1
  117. sky/dashboard/out/jobs.html +1 -1
  118. sky/dashboard/out/users.html +1 -1
  119. sky/dashboard/out/volumes.html +1 -1
  120. sky/dashboard/out/workspace/new.html +1 -1
  121. sky/dashboard/out/workspaces/[name].html +1 -1
  122. sky/dashboard/out/workspaces.html +1 -1
  123. sky/data/data_utils.py +92 -1
  124. sky/data/mounting_utils.py +162 -29
  125. sky/data/storage.py +200 -19
  126. sky/data/storage_utils.py +10 -45
  127. sky/exceptions.py +18 -7
  128. sky/execution.py +74 -31
  129. sky/global_user_state.py +605 -191
  130. sky/jobs/__init__.py +2 -0
  131. sky/jobs/client/sdk.py +101 -4
  132. sky/jobs/client/sdk_async.py +31 -5
  133. sky/jobs/constants.py +15 -8
  134. sky/jobs/controller.py +726 -284
  135. sky/jobs/file_content_utils.py +128 -0
  136. sky/jobs/log_gc.py +193 -0
  137. sky/jobs/recovery_strategy.py +250 -100
  138. sky/jobs/scheduler.py +271 -173
  139. sky/jobs/server/core.py +367 -114
  140. sky/jobs/server/server.py +81 -35
  141. sky/jobs/server/utils.py +89 -35
  142. sky/jobs/state.py +1498 -620
  143. sky/jobs/utils.py +771 -306
  144. sky/logs/agent.py +40 -5
  145. sky/logs/aws.py +9 -19
  146. sky/metrics/utils.py +282 -39
  147. sky/optimizer.py +1 -1
  148. sky/provision/__init__.py +37 -1
  149. sky/provision/aws/config.py +34 -13
  150. sky/provision/aws/instance.py +5 -2
  151. sky/provision/azure/instance.py +5 -3
  152. sky/provision/common.py +2 -0
  153. sky/provision/cudo/instance.py +4 -3
  154. sky/provision/do/instance.py +4 -3
  155. sky/provision/docker_utils.py +97 -26
  156. sky/provision/fluidstack/instance.py +6 -5
  157. sky/provision/gcp/config.py +6 -1
  158. sky/provision/gcp/instance.py +4 -2
  159. sky/provision/hyperbolic/instance.py +4 -2
  160. sky/provision/instance_setup.py +66 -20
  161. sky/provision/kubernetes/__init__.py +2 -0
  162. sky/provision/kubernetes/config.py +7 -44
  163. sky/provision/kubernetes/constants.py +0 -1
  164. sky/provision/kubernetes/instance.py +609 -213
  165. sky/provision/kubernetes/manifests/fusermount-server-daemonset.yaml +1 -2
  166. sky/provision/kubernetes/network.py +12 -8
  167. sky/provision/kubernetes/network_utils.py +8 -25
  168. sky/provision/kubernetes/utils.py +382 -418
  169. sky/provision/kubernetes/volume.py +150 -18
  170. sky/provision/lambda_cloud/instance.py +16 -13
  171. sky/provision/nebius/instance.py +6 -2
  172. sky/provision/nebius/utils.py +103 -86
  173. sky/provision/oci/instance.py +4 -2
  174. sky/provision/paperspace/instance.py +4 -3
  175. sky/provision/primeintellect/__init__.py +10 -0
  176. sky/provision/primeintellect/config.py +11 -0
  177. sky/provision/primeintellect/instance.py +454 -0
  178. sky/provision/primeintellect/utils.py +398 -0
  179. sky/provision/provisioner.py +30 -9
  180. sky/provision/runpod/__init__.py +2 -0
  181. sky/provision/runpod/instance.py +4 -3
  182. sky/provision/runpod/volume.py +69 -13
  183. sky/provision/scp/instance.py +307 -130
  184. sky/provision/seeweb/__init__.py +11 -0
  185. sky/provision/seeweb/config.py +13 -0
  186. sky/provision/seeweb/instance.py +812 -0
  187. sky/provision/shadeform/__init__.py +11 -0
  188. sky/provision/shadeform/config.py +12 -0
  189. sky/provision/shadeform/instance.py +351 -0
  190. sky/provision/shadeform/shadeform_utils.py +83 -0
  191. sky/provision/vast/instance.py +5 -3
  192. sky/provision/volume.py +164 -0
  193. sky/provision/vsphere/common/ssl_helper.py +1 -1
  194. sky/provision/vsphere/common/vapiconnect.py +2 -1
  195. sky/provision/vsphere/common/vim_utils.py +3 -2
  196. sky/provision/vsphere/instance.py +8 -6
  197. sky/provision/vsphere/vsphere_utils.py +8 -1
  198. sky/resources.py +11 -3
  199. sky/schemas/api/responses.py +107 -6
  200. sky/schemas/db/global_user_state/008_skylet_ssh_tunnel_metadata.py +34 -0
  201. sky/schemas/db/global_user_state/009_last_activity_and_launched_at.py +89 -0
  202. sky/schemas/db/global_user_state/010_save_ssh_key.py +66 -0
  203. sky/schemas/db/global_user_state/011_is_ephemeral.py +34 -0
  204. sky/schemas/db/kv_cache/001_initial_schema.py +29 -0
  205. sky/schemas/db/serve_state/002_yaml_content.py +34 -0
  206. sky/schemas/db/skypilot_config/001_initial_schema.py +30 -0
  207. sky/schemas/db/spot_jobs/002_cluster_pool.py +3 -3
  208. sky/schemas/db/spot_jobs/004_job_file_contents.py +42 -0
  209. sky/schemas/db/spot_jobs/005_logs_gc.py +38 -0
  210. sky/schemas/db/spot_jobs/006_controller_pid_started_at.py +34 -0
  211. sky/schemas/db/spot_jobs/007_config_file_content.py +34 -0
  212. sky/schemas/generated/jobsv1_pb2.py +86 -0
  213. sky/schemas/generated/jobsv1_pb2.pyi +254 -0
  214. sky/schemas/generated/jobsv1_pb2_grpc.py +542 -0
  215. sky/schemas/generated/managed_jobsv1_pb2.py +76 -0
  216. sky/schemas/generated/managed_jobsv1_pb2.pyi +278 -0
  217. sky/schemas/generated/managed_jobsv1_pb2_grpc.py +278 -0
  218. sky/schemas/generated/servev1_pb2.py +58 -0
  219. sky/schemas/generated/servev1_pb2.pyi +115 -0
  220. sky/schemas/generated/servev1_pb2_grpc.py +322 -0
  221. sky/serve/autoscalers.py +2 -0
  222. sky/serve/client/impl.py +55 -21
  223. sky/serve/constants.py +4 -3
  224. sky/serve/controller.py +17 -11
  225. sky/serve/load_balancing_policies.py +1 -1
  226. sky/serve/replica_managers.py +219 -142
  227. sky/serve/serve_rpc_utils.py +179 -0
  228. sky/serve/serve_state.py +63 -54
  229. sky/serve/serve_utils.py +145 -109
  230. sky/serve/server/core.py +46 -25
  231. sky/serve/server/impl.py +311 -162
  232. sky/serve/server/server.py +21 -19
  233. sky/serve/service.py +84 -68
  234. sky/serve/service_spec.py +45 -7
  235. sky/server/auth/loopback.py +38 -0
  236. sky/server/auth/oauth2_proxy.py +12 -7
  237. sky/server/common.py +47 -24
  238. sky/server/config.py +62 -28
  239. sky/server/constants.py +9 -1
  240. sky/server/daemons.py +109 -38
  241. sky/server/metrics.py +76 -96
  242. sky/server/middleware_utils.py +166 -0
  243. sky/server/requests/executor.py +381 -145
  244. sky/server/requests/payloads.py +71 -18
  245. sky/server/requests/preconditions.py +15 -13
  246. sky/server/requests/request_names.py +121 -0
  247. sky/server/requests/requests.py +507 -157
  248. sky/server/requests/serializers/decoders.py +48 -17
  249. sky/server/requests/serializers/encoders.py +85 -20
  250. sky/server/requests/threads.py +117 -0
  251. sky/server/rest.py +116 -24
  252. sky/server/server.py +420 -172
  253. sky/server/stream_utils.py +219 -45
  254. sky/server/uvicorn.py +30 -19
  255. sky/setup_files/MANIFEST.in +6 -1
  256. sky/setup_files/alembic.ini +8 -0
  257. sky/setup_files/dependencies.py +62 -19
  258. sky/setup_files/setup.py +44 -44
  259. sky/sky_logging.py +13 -5
  260. sky/skylet/attempt_skylet.py +106 -24
  261. sky/skylet/configs.py +3 -1
  262. sky/skylet/constants.py +111 -26
  263. sky/skylet/events.py +64 -10
  264. sky/skylet/job_lib.py +141 -104
  265. sky/skylet/log_lib.py +233 -5
  266. sky/skylet/log_lib.pyi +40 -2
  267. sky/skylet/providers/ibm/node_provider.py +12 -8
  268. sky/skylet/providers/ibm/vpc_provider.py +13 -12
  269. sky/skylet/runtime_utils.py +21 -0
  270. sky/skylet/services.py +524 -0
  271. sky/skylet/skylet.py +22 -1
  272. sky/skylet/subprocess_daemon.py +104 -29
  273. sky/skypilot_config.py +99 -79
  274. sky/ssh_node_pools/server.py +9 -8
  275. sky/task.py +221 -104
  276. sky/templates/aws-ray.yml.j2 +1 -0
  277. sky/templates/azure-ray.yml.j2 +1 -0
  278. sky/templates/cudo-ray.yml.j2 +1 -0
  279. sky/templates/do-ray.yml.j2 +1 -0
  280. sky/templates/fluidstack-ray.yml.j2 +1 -0
  281. sky/templates/gcp-ray.yml.j2 +1 -0
  282. sky/templates/hyperbolic-ray.yml.j2 +1 -0
  283. sky/templates/ibm-ray.yml.j2 +2 -1
  284. sky/templates/jobs-controller.yaml.j2 +3 -0
  285. sky/templates/kubernetes-ray.yml.j2 +196 -55
  286. sky/templates/lambda-ray.yml.j2 +1 -0
  287. sky/templates/nebius-ray.yml.j2 +3 -0
  288. sky/templates/oci-ray.yml.j2 +1 -0
  289. sky/templates/paperspace-ray.yml.j2 +1 -0
  290. sky/templates/primeintellect-ray.yml.j2 +72 -0
  291. sky/templates/runpod-ray.yml.j2 +1 -0
  292. sky/templates/scp-ray.yml.j2 +1 -0
  293. sky/templates/seeweb-ray.yml.j2 +171 -0
  294. sky/templates/shadeform-ray.yml.j2 +73 -0
  295. sky/templates/vast-ray.yml.j2 +1 -0
  296. sky/templates/vsphere-ray.yml.j2 +1 -0
  297. sky/templates/websocket_proxy.py +188 -43
  298. sky/usage/usage_lib.py +16 -4
  299. sky/users/permission.py +60 -43
  300. sky/utils/accelerator_registry.py +6 -3
  301. sky/utils/admin_policy_utils.py +18 -5
  302. sky/utils/annotations.py +22 -0
  303. sky/utils/asyncio_utils.py +78 -0
  304. sky/utils/atomic.py +1 -1
  305. sky/utils/auth_utils.py +153 -0
  306. sky/utils/cli_utils/status_utils.py +12 -7
  307. sky/utils/cluster_utils.py +28 -6
  308. sky/utils/command_runner.py +88 -27
  309. sky/utils/command_runner.pyi +36 -3
  310. sky/utils/common.py +3 -1
  311. sky/utils/common_utils.py +37 -4
  312. sky/utils/config_utils.py +1 -14
  313. sky/utils/context.py +127 -40
  314. sky/utils/context_utils.py +73 -18
  315. sky/utils/controller_utils.py +229 -70
  316. sky/utils/db/db_utils.py +95 -18
  317. sky/utils/db/kv_cache.py +149 -0
  318. sky/utils/db/migration_utils.py +24 -7
  319. sky/utils/env_options.py +4 -0
  320. sky/utils/git.py +559 -1
  321. sky/utils/kubernetes/create_cluster.sh +15 -30
  322. sky/utils/kubernetes/delete_cluster.sh +10 -7
  323. sky/utils/kubernetes/{deploy_remote_cluster.py → deploy_ssh_node_pools.py} +258 -380
  324. sky/utils/kubernetes/generate_kind_config.py +6 -66
  325. sky/utils/kubernetes/gpu_labeler.py +13 -3
  326. sky/utils/kubernetes/k8s_gpu_labeler_job.yaml +2 -1
  327. sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml +16 -16
  328. sky/utils/kubernetes/kubernetes_deploy_utils.py +213 -194
  329. sky/utils/kubernetes/rsync_helper.sh +11 -3
  330. sky/utils/kubernetes_enums.py +7 -15
  331. sky/utils/lock_events.py +4 -4
  332. sky/utils/locks.py +128 -31
  333. sky/utils/log_utils.py +0 -319
  334. sky/utils/resource_checker.py +13 -10
  335. sky/utils/resources_utils.py +53 -29
  336. sky/utils/rich_utils.py +8 -4
  337. sky/utils/schemas.py +107 -52
  338. sky/utils/subprocess_utils.py +17 -4
  339. sky/utils/thread_utils.py +91 -0
  340. sky/utils/timeline.py +2 -1
  341. sky/utils/ux_utils.py +35 -1
  342. sky/utils/volume.py +88 -4
  343. sky/utils/yaml_utils.py +9 -0
  344. sky/volumes/client/sdk.py +48 -10
  345. sky/volumes/server/core.py +59 -22
  346. sky/volumes/server/server.py +46 -17
  347. sky/volumes/volume.py +54 -42
  348. sky/workspaces/core.py +57 -21
  349. sky/workspaces/server.py +13 -12
  350. sky_templates/README.md +3 -0
  351. sky_templates/__init__.py +3 -0
  352. sky_templates/ray/__init__.py +0 -0
  353. sky_templates/ray/start_cluster +183 -0
  354. sky_templates/ray/stop_cluster +75 -0
  355. {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/METADATA +331 -65
  356. skypilot_nightly-1.0.0.dev20251203.dist-info/RECORD +611 -0
  357. skypilot_nightly-1.0.0.dev20251203.dist-info/top_level.txt +2 -0
  358. sky/client/cli/git.py +0 -549
  359. sky/dashboard/out/_next/static/chunks/1121-408ed10b2f9fce17.js +0 -1
  360. sky/dashboard/out/_next/static/chunks/1141-943efc7aff0f0c06.js +0 -1
  361. sky/dashboard/out/_next/static/chunks/1836-37fede578e2da5f8.js +0 -40
  362. sky/dashboard/out/_next/static/chunks/3015-86cabed5d4669ad0.js +0 -1
  363. sky/dashboard/out/_next/static/chunks/3294.c80326aec9bfed40.js +0 -6
  364. sky/dashboard/out/_next/static/chunks/3785.4872a2f3aa489880.js +0 -1
  365. sky/dashboard/out/_next/static/chunks/4045.b30465273dc5e468.js +0 -21
  366. sky/dashboard/out/_next/static/chunks/4676-9da7fdbde90b5549.js +0 -10
  367. sky/dashboard/out/_next/static/chunks/4725.10f7a9a5d3ea8208.js +0 -1
  368. sky/dashboard/out/_next/static/chunks/5339.3fda4a4010ff4e06.js +0 -51
  369. sky/dashboard/out/_next/static/chunks/6135-4b4d5e824b7f9d3c.js +0 -1
  370. sky/dashboard/out/_next/static/chunks/649.b9d7f7d10c1b8c53.js +0 -45
  371. sky/dashboard/out/_next/static/chunks/6856-dca7962af4814e1b.js +0 -1
  372. sky/dashboard/out/_next/static/chunks/6990-08b2a1cae076a943.js +0 -1
  373. sky/dashboard/out/_next/static/chunks/7325.b4bc99ce0892dcd5.js +0 -6
  374. sky/dashboard/out/_next/static/chunks/754-d0da8ab45f9509e9.js +0 -18
  375. sky/dashboard/out/_next/static/chunks/7669.1f5d9a402bf5cc42.js +0 -36
  376. sky/dashboard/out/_next/static/chunks/8969-0be3036bf86f8256.js +0 -1
  377. sky/dashboard/out/_next/static/chunks/9025.c12318fb6a1a9093.js +0 -6
  378. sky/dashboard/out/_next/static/chunks/9037-fa1737818d0a0969.js +0 -6
  379. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-1cbba24bd1bd35f8.js +0 -16
  380. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-0b4b35dc1dfe046c.js +0 -16
  381. sky/dashboard/out/_next/static/chunks/pages/clusters-469814d711d63b1b.js +0 -1
  382. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-dd64309c3fe67ed2.js +0 -11
  383. sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-07349868f7905d37.js +0 -16
  384. sky/dashboard/out/_next/static/chunks/pages/jobs-1f70d9faa564804f.js +0 -1
  385. sky/dashboard/out/_next/static/chunks/pages/users-018bf31cda52e11b.js +0 -1
  386. sky/dashboard/out/_next/static/chunks/pages/volumes-739726d6b823f532.js +0 -1
  387. sky/dashboard/out/_next/static/chunks/webpack-4fe903277b57b523.js +0 -1
  388. sky/dashboard/out/_next/static/css/4614e06482d7309e.css +0 -3
  389. sky/dashboard/out/_next/static/mS-4qZPSkRuA1u-g2wQhg/_buildManifest.js +0 -1
  390. sky/templates/kubernetes-ssh-jump.yml.j2 +0 -94
  391. sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +0 -191
  392. skypilot_nightly-1.0.0.dev20250905.dist-info/RECORD +0 -547
  393. skypilot_nightly-1.0.0.dev20250905.dist-info/top_level.txt +0 -1
  394. /sky/dashboard/out/_next/static/{mS-4qZPSkRuA1u-g2wQhg → 96_E2yl3QAiIJGOYCkSpB}/_ssgManifest.js +0 -0
  395. {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/WHEEL +0 -0
  396. {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/entry_points.txt +0 -0
  397. {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/licenses/LICENSE +0 -0
@@ -59,7 +59,9 @@ class Lambda(clouds.Cloud):
59
59
 
60
60
  @classmethod
61
61
  def _unsupported_features_for_resources(
62
- cls, resources: 'resources_lib.Resources'
62
+ cls,
63
+ resources: 'resources_lib.Resources',
64
+ region: Optional[str] = None,
63
65
  ) -> Dict[clouds.CloudImplementationFeatures, str]:
64
66
  del resources # unused
65
67
  return cls._CLOUD_UNSUPPORTED_FEATURES
@@ -69,10 +71,15 @@ class Lambda(clouds.Cloud):
69
71
  return cls._MAX_CLUSTER_NAME_LEN_LIMIT
70
72
 
71
73
  @classmethod
72
- def regions_with_offering(cls, instance_type: str,
73
- accelerators: Optional[Dict[str, int]],
74
- use_spot: bool, region: Optional[str],
75
- zone: Optional[str]) -> List[clouds.Region]:
74
+ def regions_with_offering(
75
+ cls,
76
+ instance_type: str,
77
+ accelerators: Optional[Dict[str, int]],
78
+ use_spot: bool,
79
+ region: Optional[str],
80
+ zone: Optional[str],
81
+ resources: Optional['resources_lib.Resources'] = None,
82
+ ) -> List[clouds.Region]:
76
83
  assert zone is None, 'Lambda does not support zones.'
77
84
  del accelerators, zone # unused
78
85
  if use_spot:
sky/clouds/nebius.py CHANGED
@@ -78,7 +78,9 @@ class Nebius(clouds.Cloud):
78
78
 
79
79
  @classmethod
80
80
  def _unsupported_features_for_resources(
81
- cls, resources: 'resources_lib.Resources'
81
+ cls,
82
+ resources: 'resources_lib.Resources',
83
+ region: Optional[str] = None,
82
84
  ) -> Dict[clouds.CloudImplementationFeatures, str]:
83
85
  unsupported = cls._CLOUD_UNSUPPORTED_FEATURES.copy()
84
86
 
@@ -101,10 +103,15 @@ class Nebius(clouds.Cloud):
101
103
  return cls._MAX_CLUSTER_NAME_LEN_LIMIT
102
104
 
103
105
  @classmethod
104
- def regions_with_offering(cls, instance_type: str,
105
- accelerators: Optional[Dict[str, int]],
106
- use_spot: bool, region: Optional[str],
107
- zone: Optional[str]) -> List[clouds.Region]:
106
+ def regions_with_offering(
107
+ cls,
108
+ instance_type: str,
109
+ accelerators: Optional[Dict[str, int]],
110
+ use_spot: bool,
111
+ region: Optional[str],
112
+ zone: Optional[str],
113
+ resources: Optional['resources_lib.Resources'] = None,
114
+ ) -> List[clouds.Region]:
108
115
  assert zone is None, 'Nebius does not support zones.'
109
116
  del accelerators, zone # unused
110
117
  regions = catalog.get_region_zones_for_instance_type(
@@ -245,9 +252,12 @@ class Nebius(clouds.Cloud):
245
252
  'filesystem_mount_tag': f'filesystem-skypilot-{i+1}'
246
253
  })
247
254
 
255
+ use_static_ip_address = skypilot_config.get_nested(
256
+ ('nebius', 'use_static_ip_address'), default_value=False)
248
257
  resources_vars: Dict[str, Any] = {
249
258
  'instance_type': resources.instance_type,
250
259
  'custom_resources': custom_resources,
260
+ 'use_static_ip_address': use_static_ip_address,
251
261
  'region': region.name,
252
262
  'image_id': image_family,
253
263
  # Nebius does not support specific zones.
@@ -364,10 +374,10 @@ class Nebius(clouds.Cloud):
364
374
  f'{_INDENT_PREFIX} $ nebius --format json iam whoami|jq -r \'.user_profile.tenants[0].tenant_id\' > {nebius.tenant_id_path()} \n') # pylint: disable=line-too-long
365
375
  if not nebius.is_token_or_cred_file_exist():
366
376
  return False, f'{token_cred_msg}'
367
- sdk = nebius.sdk()
368
377
  tenant_id = nebius.get_tenant_id()
369
378
  if tenant_id is None:
370
379
  return False, f'{tenant_msg}'
380
+ sdk = nebius.sdk()
371
381
  try:
372
382
  service = nebius.iam().ProjectServiceClient(sdk)
373
383
  service.list(
@@ -445,9 +455,13 @@ class Nebius(clouds.Cloud):
445
455
  del workspace_config # Unused
446
456
  sdk = nebius.sdk()
447
457
  profile_client = nebius.iam().ProfileServiceClient(sdk)
448
- profile = nebius.sync_call(
449
- profile_client.get(nebius.iam().GetProfileRequest(),
450
- timeout=nebius.READ_TIMEOUT))
458
+ try:
459
+ profile = nebius.sync_call(
460
+ profile_client.get(nebius.iam().GetProfileRequest(),
461
+ timeout=nebius.READ_TIMEOUT))
462
+ except Exception as e:
463
+ raise exceptions.CloudUserIdentityError(
464
+ f'Error getting Nebius profile: {e}')
451
465
  if profile.user_profile is not None:
452
466
  if profile.user_profile.attributes is None:
453
467
  raise exceptions.CloudUserIdentityError(
sky/clouds/oci.py CHANGED
@@ -28,6 +28,7 @@ from typing import Any, Dict, Iterator, List, Optional, Tuple, Union
28
28
  from sky import catalog
29
29
  from sky import clouds
30
30
  from sky import exceptions
31
+ from sky.adaptors import common
31
32
  from sky.adaptors import oci as oci_adaptor
32
33
  from sky.clouds.utils import oci_utils
33
34
  from sky.provision.oci.query_utils import query_helper
@@ -68,7 +69,9 @@ class OCI(clouds.Cloud):
68
69
 
69
70
  @classmethod
70
71
  def _unsupported_features_for_resources(
71
- cls, resources: 'resources_lib.Resources'
72
+ cls,
73
+ resources: 'resources_lib.Resources',
74
+ region: Optional[str] = None,
72
75
  ) -> Dict[clouds.CloudImplementationFeatures, str]:
73
76
  unsupported_features = {
74
77
  clouds.CloudImplementationFeatures.CLONE_DISK_FROM_CLUSTER:
@@ -95,10 +98,15 @@ class OCI(clouds.Cloud):
95
98
  return cls._MAX_CLUSTER_NAME_LEN_LIMIT
96
99
 
97
100
  @classmethod
98
- def regions_with_offering(cls, instance_type: str,
99
- accelerators: Optional[Dict[str, int]],
100
- use_spot: bool, region: Optional[str],
101
- zone: Optional[str]) -> List[clouds.Region]:
101
+ def regions_with_offering(
102
+ cls,
103
+ instance_type: str,
104
+ accelerators: Optional[Dict[str, int]],
105
+ use_spot: bool,
106
+ region: Optional[str],
107
+ zone: Optional[str],
108
+ resources: Optional['resources_lib.Resources'] = None,
109
+ ) -> List[clouds.Region]:
102
110
  del accelerators # unused
103
111
 
104
112
  regions = catalog.get_region_zones_for_instance_type(
@@ -454,13 +462,12 @@ class OCI(clouds.Cloud):
454
462
  f'{cls._INDENT_PREFIX} region=us-sanjose-1\n'
455
463
  f'{cls._INDENT_PREFIX} key_file=~/.oci/oci_api_key.pem')
456
464
 
457
- try:
458
- # pylint: disable=import-outside-toplevel,unused-import
459
- import oci
460
- except ImportError:
461
- return False, ('`oci` is not installed. Install it with: '
462
- 'pip install oci\n'
463
- f'{cls._INDENT_PREFIX}{short_credential_help_str}')
465
+ dependency_error_msg = (
466
+ '`oci` is not installed. Install it with: '
467
+ 'pip install oci\n'
468
+ f'{cls._INDENT_PREFIX}{short_credential_help_str}')
469
+ if not common.can_import_modules(['oci']):
470
+ return False, dependency_error_msg
464
471
 
465
472
  conf_file = oci_adaptor.get_config_file()
466
473
 
sky/clouds/paperspace.py CHANGED
@@ -60,7 +60,9 @@ class Paperspace(clouds.Cloud):
60
60
 
61
61
  @classmethod
62
62
  def _unsupported_features_for_resources(
63
- cls, resources: 'resources_lib.Resources'
63
+ cls,
64
+ resources: 'resources_lib.Resources',
65
+ region: Optional[str] = None,
64
66
  ) -> Dict[clouds.CloudImplementationFeatures, str]:
65
67
  """The features not supported based on the resources provided.
66
68
 
@@ -86,6 +88,7 @@ class Paperspace(clouds.Cloud):
86
88
  use_spot: bool,
87
89
  region: Optional[str],
88
90
  zone: Optional[str],
91
+ resources: Optional['resources_lib.Resources'] = None,
89
92
  ) -> List[clouds.Region]:
90
93
  assert zone is None, 'Paperspace does not support zones.'
91
94
  del accelerators, zone # unused
@@ -0,0 +1,317 @@
1
+ """ Prime Intellect Cloud. """
2
+ import json
3
+ import os
4
+ import typing
5
+ from typing import Dict, Iterator, List, Optional, Tuple, Union
6
+
7
+ from sky import catalog
8
+ from sky import clouds
9
+ from sky.provision.primeintellect import utils
10
+ from sky.utils import registry
11
+ from sky.utils import resources_utils
12
+
13
+ if typing.TYPE_CHECKING:
14
+ from sky import resources as resources_lib
15
+ from sky.utils import volume as volume_lib
16
+
17
+ CredentialCheckResult = Tuple[bool, Optional[Union[str, Dict[str, str]]]]
18
+
19
+ _CREDENTIAL_FILES = [
20
+ 'config.json',
21
+ ]
22
+
23
+
24
+ @registry.CLOUD_REGISTRY.register
25
+ class PrimeIntellect(clouds.Cloud):
26
+ """Prime Intellect GPU Cloud"""
27
+ _REPR = 'PrimeIntellect'
28
+ _CLOUD_UNSUPPORTED_FEATURES = {
29
+ clouds.CloudImplementationFeatures.AUTOSTOP: 'Stopping not supported.',
30
+ clouds.CloudImplementationFeatures.AUTODOWN:
31
+ ('Auto down not supported yet.'),
32
+ clouds.CloudImplementationFeatures.STOP: 'Stopping not supported.',
33
+ clouds.CloudImplementationFeatures.MULTI_NODE:
34
+ ('Multi-node not supported yet.'),
35
+ clouds.CloudImplementationFeatures.CUSTOM_DISK_TIER:
36
+ ('Custom disk tier not supported yet.'),
37
+ clouds.CloudImplementationFeatures.CUSTOM_NETWORK_TIER:
38
+ ('Custom network tier not supported yet.'),
39
+ clouds.CloudImplementationFeatures.CUSTOM_MULTI_NETWORK:
40
+ ('Customized multiple network interfaces are not supported'),
41
+ clouds.CloudImplementationFeatures.IMAGE_ID:
42
+ ('Custom image not supported yet.'),
43
+ clouds.CloudImplementationFeatures.DOCKER_IMAGE:
44
+ ('Custom docker image not supported yet.'),
45
+ }
46
+ PROVISIONER_VERSION = clouds.ProvisionerVersion.SKYPILOT
47
+ STATUS_VERSION = clouds.StatusVersion.SKYPILOT
48
+ _MAX_CLUSTER_NAME_LEN_LIMIT = 120
49
+ _regions: List[clouds.Region] = []
50
+
51
+ @classmethod
52
+ def _cloud_unsupported_features(
53
+ cls) -> Dict[clouds.CloudImplementationFeatures, str]:
54
+ return cls._CLOUD_UNSUPPORTED_FEATURES
55
+
56
+ @classmethod
57
+ def _max_cluster_name_length(cls) -> Optional[int]:
58
+ return cls._MAX_CLUSTER_NAME_LEN_LIMIT
59
+
60
+ @classmethod
61
+ def regions_with_offering(
62
+ cls,
63
+ instance_type: str,
64
+ accelerators: Optional[Dict[str, int]],
65
+ use_spot: bool,
66
+ region: Optional[str],
67
+ zone: Optional[str],
68
+ resources: Optional['resources_lib.Resources'] = None,
69
+ ) -> List[clouds.Region]:
70
+ """Returns the regions that offer the specified resources."""
71
+ del accelerators
72
+ regions = catalog.get_region_zones_for_instance_type(
73
+ instance_type, use_spot, 'primeintellect')
74
+
75
+ if region is not None:
76
+ regions = [r for r in regions if r.name == region]
77
+ if zone is not None:
78
+ for r in regions:
79
+ assert r.zones is not None, r
80
+ r.set_zones([z for z in r.zones if z.name == zone])
81
+ regions = [r for r in regions if r.zones]
82
+ return regions
83
+
84
+ @classmethod
85
+ def get_vcpus_mem_from_instance_type(
86
+ cls,
87
+ instance_type: str,
88
+ ) -> Tuple[Optional[float], Optional[float]]:
89
+ """Returns the #vCPUs and memory that the instance type offers."""
90
+ return catalog.get_vcpus_mem_from_instance_type(instance_type,
91
+ clouds='primeintellect')
92
+
93
+ @classmethod
94
+ def zones_provision_loop(
95
+ cls,
96
+ *,
97
+ region: str,
98
+ num_nodes: int,
99
+ instance_type: str,
100
+ accelerators: Optional[Dict[str, int]] = None,
101
+ use_spot: bool = False,
102
+ ) -> Iterator[Optional[List['clouds.Zone']]]:
103
+ """Returns an iterator over zones for provisioning."""
104
+ regions = cls.regions_with_offering(instance_type,
105
+ accelerators,
106
+ use_spot,
107
+ region=region,
108
+ zone=None)
109
+ for r in regions:
110
+ assert r.zones is not None, r
111
+ yield r.zones
112
+
113
+ def instance_type_to_hourly_cost(self,
114
+ instance_type: str,
115
+ use_spot: bool,
116
+ region: Optional[str] = None,
117
+ zone: Optional[str] = None) -> float:
118
+ """Returns the cost, or the cheapest cost among all zones for spot."""
119
+ return catalog.get_hourly_cost(instance_type,
120
+ use_spot=use_spot,
121
+ region=region,
122
+ zone=zone,
123
+ clouds='primeintellect')
124
+
125
+ def accelerators_to_hourly_cost(self,
126
+ accelerators: Dict[str, int],
127
+ use_spot: bool,
128
+ region: Optional[str] = None,
129
+ zone: Optional[str] = None) -> float:
130
+ """Returns the cost, or the cheapest cost among all zones for spot."""
131
+ del accelerators, use_spot, region, zone # Unused.
132
+ return 0.0
133
+
134
+ def get_egress_cost(self, num_gigabytes: float) -> float:
135
+ return 0.0
136
+
137
+ def is_same_cloud(self, other: clouds.Cloud) -> bool:
138
+ return isinstance(other, PrimeIntellect)
139
+
140
+ @classmethod
141
+ def get_default_instance_type(cls,
142
+ cpus: Optional[str] = None,
143
+ memory: Optional[str] = None,
144
+ disk_tier: Optional[
145
+ resources_utils.DiskTier] = None,
146
+ region: Optional[str] = None,
147
+ zone: Optional[str] = None) -> Optional[str]:
148
+ """Returns the default instance type for Prime Intellect."""
149
+ return catalog.get_default_instance_type(cpus=cpus,
150
+ memory=memory,
151
+ disk_tier=disk_tier,
152
+ region=region,
153
+ zone=zone,
154
+ clouds='primeintellect')
155
+
156
+ @classmethod
157
+ def get_accelerators_from_instance_type(
158
+ cls, instance_type: str) -> Optional[Dict[str, Union[int, float]]]:
159
+ return catalog.get_accelerators_from_instance_type(
160
+ instance_type, clouds='primeintellect')
161
+
162
+ @classmethod
163
+ def get_zone_shell_cmd(cls) -> Optional[str]:
164
+ return None
165
+
166
+ def make_deploy_resources_variables(
167
+ self,
168
+ resources: 'resources_lib.Resources',
169
+ cluster_name: resources_utils.ClusterName,
170
+ region: 'clouds.Region',
171
+ zones: Optional[List['clouds.Zone']],
172
+ num_nodes: int,
173
+ dryrun: bool = False,
174
+ volume_mounts: Optional[List['volume_lib.VolumeMount']] = None
175
+ ) -> Dict[str, Optional[str]]:
176
+ del dryrun, cluster_name, num_nodes, volume_mounts
177
+ assert zones is not None, (region, zones)
178
+
179
+ resources = resources.assert_launchable()
180
+ acc_dict = self.get_accelerators_from_instance_type(
181
+ resources.instance_type)
182
+ if acc_dict is not None:
183
+ custom_resources = json.dumps(acc_dict, separators=(',', ':'))
184
+ else:
185
+ custom_resources = None
186
+
187
+ return {
188
+ 'instance_type': resources.instance_type,
189
+ 'custom_resources': custom_resources,
190
+ 'region': region.name,
191
+ 'zones': zones[0].name,
192
+ 'availability_zone': zones[0].name,
193
+ }
194
+
195
+ def _get_feasible_launchable_resources(
196
+ self, resources: 'resources_lib.Resources'
197
+ ) -> 'resources_utils.FeasibleResources':
198
+ """Returns a list of feasible resources for the given resources."""
199
+ if resources.instance_type is not None:
200
+ assert resources.is_launchable(), resources
201
+ resources = resources.copy(accelerators=None)
202
+ return resources_utils.FeasibleResources([resources], [], None)
203
+
204
+ def _make(instance_list):
205
+ resource_list = []
206
+ for instance_type in instance_list:
207
+ r = resources.copy(
208
+ cloud=PrimeIntellect(),
209
+ instance_type=instance_type,
210
+ accelerators=None,
211
+ cpus=None,
212
+ )
213
+ resource_list.append(r)
214
+ return resource_list
215
+
216
+ # Currently, handle a filter on accelerators only.
217
+ accelerators = resources.accelerators
218
+ if accelerators is None:
219
+ default_instance_type = PrimeIntellect.get_default_instance_type(
220
+ cpus=resources.cpus,
221
+ memory=resources.memory,
222
+ disk_tier=resources.disk_tier)
223
+ if default_instance_type is None:
224
+ # TODO(pokgak): Add hints to all return values in this method
225
+ # to help users understand why the resources are not
226
+ # launchable.
227
+ return resources_utils.FeasibleResources([], [], None)
228
+ else:
229
+ return resources_utils.FeasibleResources(
230
+ _make([default_instance_type]), [], None)
231
+
232
+ assert len(accelerators) == 1, resources
233
+ acc, acc_count = list(accelerators.items())[0]
234
+ (instance_list,
235
+ fuzzy_candidate_list) = catalog.get_instance_type_for_accelerator(
236
+ acc,
237
+ acc_count,
238
+ use_spot=resources.use_spot,
239
+ cpus=resources.cpus,
240
+ region=resources.region,
241
+ zone=resources.zone,
242
+ clouds='primeintellect')
243
+ if instance_list is None:
244
+ return resources_utils.FeasibleResources([], fuzzy_candidate_list,
245
+ None)
246
+ return resources_utils.FeasibleResources(_make(instance_list),
247
+ fuzzy_candidate_list, None)
248
+
249
+ @classmethod
250
+ def _check_credentials(cls) -> Tuple[bool, Optional[str]]:
251
+ """Verify that the user has valid credentials for Prime Intellect."""
252
+
253
+ primeintellect_config_file = '~/.prime/config.json'
254
+ if not os.path.isfile(os.path.expanduser(primeintellect_config_file)):
255
+ return (False, f'{primeintellect_config_file} does not exist.')
256
+
257
+ with open(os.path.expanduser(primeintellect_config_file),
258
+ encoding='UTF-8') as f:
259
+ data = json.load(f)
260
+ api_key = data.get('api_key')
261
+ if not api_key:
262
+ print('API key is missing or empty')
263
+
264
+ client = utils.PrimeIntellectAPIClient()
265
+ try:
266
+ client.list_instances()
267
+ except utils.PrimeintellectAPIError as e:
268
+ if e.status_code == 403:
269
+ return False, (
270
+ 'Please check that your API key has the correct '
271
+ 'permissions, generate a new one at '
272
+ 'https://app.primeintellect.ai/dashboard/tokens, '
273
+ 'or run \'prime login\' to configure a new API key.')
274
+ return True, None
275
+
276
+ @classmethod
277
+ def _check_compute_credentials(cls) -> CredentialCheckResult:
278
+ """Checks if the user has access credentials to Prime Intellect's
279
+ compute service."""
280
+ return cls._check_credentials()
281
+
282
+ def get_credential_file_mounts(self) -> Dict[str, str]:
283
+ """Returns a dict of credential file paths to mount paths."""
284
+ return {
285
+ f'~/.prime/{filename}': f'~/.prime/{filename}'
286
+ for filename in _CREDENTIAL_FILES
287
+ }
288
+
289
+ @classmethod
290
+ def get_current_user_identity(cls) -> Optional[List[str]]:
291
+ return None
292
+
293
+ def instance_type_exists(self, instance_type: str) -> bool:
294
+ return catalog.instance_type_exists(instance_type, 'primeintellect')
295
+
296
+ def validate_region_zone(self, region: Optional[str], zone: Optional[str]):
297
+ return catalog.validate_region_zone(region,
298
+ zone,
299
+ clouds='primeintellect')
300
+
301
+ @classmethod
302
+ def _unsupported_features_for_resources(
303
+ cls,
304
+ resources: 'resources_lib.Resources',
305
+ region: Optional[str] = None,
306
+ ) -> Dict[clouds.CloudImplementationFeatures, str]:
307
+ """The features not supported based on the resources provided.
308
+
309
+ This method is used by check_features_are_supported() to check if the
310
+ cloud implementation supports all the requested features.
311
+
312
+ Returns:
313
+ A dict of {feature: reason} for the features not supported by the
314
+ cloud implementation.
315
+ """
316
+ del resources # unused
317
+ return cls._CLOUD_UNSUPPORTED_FEATURES
sky/clouds/runpod.py CHANGED
@@ -1,5 +1,7 @@
1
1
  """ RunPod Cloud. """
2
2
 
3
+ from importlib import util as import_lib_util
4
+ import os
3
5
  import typing
4
6
  from typing import Dict, Iterator, List, Optional, Tuple, Union
5
7
 
@@ -12,9 +14,7 @@ if typing.TYPE_CHECKING:
12
14
  from sky import resources as resources_lib
13
15
  from sky.utils import volume as volume_lib
14
16
 
15
- _CREDENTIAL_FILES = [
16
- 'config.toml',
17
- ]
17
+ _CREDENTIAL_FILE = 'config.toml'
18
18
 
19
19
 
20
20
  @registry.CLOUD_REGISTRY.register
@@ -53,7 +53,9 @@ class RunPod(clouds.Cloud):
53
53
 
54
54
  @classmethod
55
55
  def _unsupported_features_for_resources(
56
- cls, resources: 'resources_lib.Resources'
56
+ cls,
57
+ resources: 'resources_lib.Resources',
58
+ region: Optional[str] = None,
57
59
  ) -> Dict[clouds.CloudImplementationFeatures, str]:
58
60
  """The features not supported based on the resources provided.
59
61
 
@@ -72,10 +74,15 @@ class RunPod(clouds.Cloud):
72
74
  return cls._MAX_CLUSTER_NAME_LEN_LIMIT
73
75
 
74
76
  @classmethod
75
- def regions_with_offering(cls, instance_type: str,
76
- accelerators: Optional[Dict[str, int]],
77
- use_spot: bool, region: Optional[str],
78
- zone: Optional[str]) -> List[clouds.Region]:
77
+ def regions_with_offering(
78
+ cls,
79
+ instance_type: str,
80
+ accelerators: Optional[Dict[str, int]],
81
+ use_spot: bool,
82
+ region: Optional[str],
83
+ zone: Optional[str],
84
+ resources: Optional['resources_lib.Resources'] = None,
85
+ ) -> List[clouds.Region]:
79
86
  del accelerators # unused
80
87
  regions = catalog.get_region_zones_for_instance_type(
81
88
  instance_type, use_spot, 'runpod')
@@ -193,7 +200,7 @@ class RunPod(clouds.Cloud):
193
200
  acc_dict)
194
201
 
195
202
  if resources.image_id is None:
196
- image_id: Optional[str] = 'runpod/base:0.0.2'
203
+ image_id: Optional[str] = 'runpod/base:1.0.2-ubuntu2204'
197
204
  elif resources.extract_docker_image() is not None:
198
205
  image_id = resources.extract_docker_image()
199
206
  else:
@@ -285,30 +292,84 @@ class RunPod(clouds.Cloud):
285
292
 
286
293
  @classmethod
287
294
  def _check_credentials(cls) -> Tuple[bool, Optional[str]]:
288
- """ Verify that the user has valid credentials for RunPod. """
295
+ """Verify that the user has valid credentials for RunPod. """
296
+ dependency_error_msg = ('Failed to import runpod or TOML parser. '
297
+ 'Install: pip install "skypilot[runpod]".')
298
+ try:
299
+ runpod_spec = import_lib_util.find_spec('runpod')
300
+ if runpod_spec is None:
301
+ return False, dependency_error_msg
302
+ # Prefer stdlib tomllib (Python 3.11+); fallback to tomli
303
+ tomllib_spec = import_lib_util.find_spec('tomllib')
304
+ tomli_spec = import_lib_util.find_spec('tomli')
305
+ if tomllib_spec is None and tomli_spec is None:
306
+ return False, dependency_error_msg
307
+ except ValueError:
308
+ # docstring of importlib_util.find_spec:
309
+ # First, sys.modules is checked to see if the module was alread
310
+ # imported.
311
+ # If so, then sys.modules[name].__spec__ is returned.
312
+ # If that happens to be set to None, then ValueError is raised.
313
+ return False, dependency_error_msg
314
+
315
+ valid, error = cls._check_runpod_credentials()
316
+ if not valid:
317
+ return False, (
318
+ f'{error} \n' # First line is indented by 4 spaces
319
+ ' Credentials can be set up by running: \n'
320
+ f' $ pip install runpod \n'
321
+ f' $ runpod config\n'
322
+ ' For more information, see https://docs.skypilot.co/en/latest/getting-started/installation.html#runpod' # pylint: disable=line-too-long
323
+ )
324
+
325
+ return True, None
326
+
327
+ @classmethod
328
+ def _check_runpod_credentials(cls, profile: str = 'default'):
329
+ """Checks if the credentials file exists and is valid."""
330
+ credential_file = os.path.expanduser(f'~/.runpod/{_CREDENTIAL_FILE}')
331
+ if not os.path.exists(credential_file):
332
+ return False, '~/.runpod/config.toml does not exist.'
333
+
334
+ # We don't need to import TOML parser if config.toml does not exist.
335
+ # When needed, prefer stdlib tomllib (py>=3.11); otherwise use tomli.
336
+ # TODO(andy): remove this fallback after dropping Python 3.10 support.
289
337
  try:
290
- import runpod # pylint: disable=import-outside-toplevel
291
- valid, error = runpod.check_credentials()
338
+ try:
339
+ import tomllib as toml # pylint: disable=import-outside-toplevel
340
+ except ModuleNotFoundError: # py<3.11
341
+ import tomli as toml # pylint: disable=import-outside-toplevel
342
+ except ModuleNotFoundError:
343
+ # Should never happen. We already installed proper dependencies for
344
+ # different Python versions in setup_files/dependencies.py.
345
+ return False, (
346
+ '~/.runpod/config.toml exists but no TOML parser is available. '
347
+ 'Install tomli for Python < 3.11: pip install tomli.')
348
+
349
+ # Check for default api_key
350
+ try:
351
+ with open(credential_file, 'rb') as cred_file:
352
+ config = toml.load(cred_file)
292
353
 
293
- if not valid:
354
+ if profile not in config:
294
355
  return False, (
295
- f'{error} \n' # First line is indented by 4 spaces
296
- ' Credentials can be set up by running: \n'
297
- f' $ pip install runpod \n'
298
- f' $ runpod config\n'
299
- ' For more information, see https://docs.skypilot.co/en/latest/getting-started/installation.html#runpod' # pylint: disable=line-too-long
356
+ f'~/.runpod/config.toml is missing {profile} profile.')
357
+
358
+ if 'api_key' not in config[profile]:
359
+ return (
360
+ False,
361
+ '~/.runpod/config.toml is missing '
362
+ f'api_key for {profile} profile.',
300
363
  )
301
364
 
302
- return True, None
365
+ except (TypeError, ValueError):
366
+ return False, '~/.runpod/config.toml is not a valid TOML file.'
303
367
 
304
- except ImportError:
305
- return False, ('Failed to import runpod. '
306
- 'To install, run: pip install skypilot[runpod]')
368
+ return True, None
307
369
 
308
370
  def get_credential_file_mounts(self) -> Dict[str, str]:
309
371
  return {
310
- f'~/.runpod/{filename}': f'~/.runpod/{filename}'
311
- for filename in _CREDENTIAL_FILES
372
+ f'~/.runpod/{_CREDENTIAL_FILE}': f'~/.runpod/{_CREDENTIAL_FILE}'
312
373
  }
313
374
 
314
375
  @classmethod