skypilot-nightly 1.0.0.dev20250502__py3-none-any.whl → 1.0.0.dev20251203__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (546) hide show
  1. sky/__init__.py +22 -6
  2. sky/adaptors/aws.py +81 -16
  3. sky/adaptors/common.py +25 -2
  4. sky/adaptors/coreweave.py +278 -0
  5. sky/adaptors/do.py +8 -2
  6. sky/adaptors/gcp.py +11 -0
  7. sky/adaptors/hyperbolic.py +8 -0
  8. sky/adaptors/ibm.py +5 -2
  9. sky/adaptors/kubernetes.py +149 -18
  10. sky/adaptors/nebius.py +173 -30
  11. sky/adaptors/primeintellect.py +1 -0
  12. sky/adaptors/runpod.py +68 -0
  13. sky/adaptors/seeweb.py +183 -0
  14. sky/adaptors/shadeform.py +89 -0
  15. sky/admin_policy.py +187 -4
  16. sky/authentication.py +179 -225
  17. sky/backends/__init__.py +4 -2
  18. sky/backends/backend.py +22 -9
  19. sky/backends/backend_utils.py +1323 -397
  20. sky/backends/cloud_vm_ray_backend.py +1749 -1029
  21. sky/backends/docker_utils.py +1 -1
  22. sky/backends/local_docker_backend.py +11 -6
  23. sky/backends/task_codegen.py +633 -0
  24. sky/backends/wheel_utils.py +55 -9
  25. sky/{clouds/service_catalog → catalog}/__init__.py +21 -19
  26. sky/{clouds/service_catalog → catalog}/aws_catalog.py +27 -8
  27. sky/{clouds/service_catalog → catalog}/azure_catalog.py +10 -7
  28. sky/{clouds/service_catalog → catalog}/common.py +90 -49
  29. sky/{clouds/service_catalog → catalog}/cudo_catalog.py +8 -5
  30. sky/{clouds/service_catalog → catalog}/data_fetchers/analyze.py +1 -1
  31. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_aws.py +116 -80
  32. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_cudo.py +38 -38
  33. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_gcp.py +70 -16
  34. sky/catalog/data_fetchers/fetch_hyperbolic.py +136 -0
  35. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_lambda_cloud.py +1 -0
  36. sky/catalog/data_fetchers/fetch_nebius.py +338 -0
  37. sky/catalog/data_fetchers/fetch_runpod.py +698 -0
  38. sky/catalog/data_fetchers/fetch_seeweb.py +329 -0
  39. sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
  40. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vast.py +1 -1
  41. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vsphere.py +1 -1
  42. sky/{clouds/service_catalog → catalog}/do_catalog.py +5 -2
  43. sky/{clouds/service_catalog → catalog}/fluidstack_catalog.py +6 -3
  44. sky/{clouds/service_catalog → catalog}/gcp_catalog.py +41 -15
  45. sky/catalog/hyperbolic_catalog.py +136 -0
  46. sky/{clouds/service_catalog → catalog}/ibm_catalog.py +9 -6
  47. sky/{clouds/service_catalog → catalog}/kubernetes_catalog.py +36 -24
  48. sky/{clouds/service_catalog → catalog}/lambda_catalog.py +9 -6
  49. sky/{clouds/service_catalog → catalog}/nebius_catalog.py +9 -7
  50. sky/{clouds/service_catalog → catalog}/oci_catalog.py +9 -6
  51. sky/{clouds/service_catalog → catalog}/paperspace_catalog.py +5 -2
  52. sky/catalog/primeintellect_catalog.py +95 -0
  53. sky/{clouds/service_catalog → catalog}/runpod_catalog.py +11 -4
  54. sky/{clouds/service_catalog → catalog}/scp_catalog.py +9 -6
  55. sky/catalog/seeweb_catalog.py +184 -0
  56. sky/catalog/shadeform_catalog.py +165 -0
  57. sky/catalog/ssh_catalog.py +167 -0
  58. sky/{clouds/service_catalog → catalog}/vast_catalog.py +6 -3
  59. sky/{clouds/service_catalog → catalog}/vsphere_catalog.py +5 -2
  60. sky/check.py +533 -185
  61. sky/cli.py +5 -5975
  62. sky/client/{cli.py → cli/command.py} +2591 -1956
  63. sky/client/cli/deprecation_utils.py +99 -0
  64. sky/client/cli/flags.py +359 -0
  65. sky/client/cli/table_utils.py +322 -0
  66. sky/client/cli/utils.py +79 -0
  67. sky/client/common.py +78 -32
  68. sky/client/oauth.py +82 -0
  69. sky/client/sdk.py +1219 -319
  70. sky/client/sdk_async.py +827 -0
  71. sky/client/service_account_auth.py +47 -0
  72. sky/cloud_stores.py +82 -3
  73. sky/clouds/__init__.py +13 -0
  74. sky/clouds/aws.py +564 -164
  75. sky/clouds/azure.py +105 -83
  76. sky/clouds/cloud.py +140 -40
  77. sky/clouds/cudo.py +68 -50
  78. sky/clouds/do.py +66 -48
  79. sky/clouds/fluidstack.py +63 -44
  80. sky/clouds/gcp.py +339 -110
  81. sky/clouds/hyperbolic.py +293 -0
  82. sky/clouds/ibm.py +70 -49
  83. sky/clouds/kubernetes.py +570 -162
  84. sky/clouds/lambda_cloud.py +74 -54
  85. sky/clouds/nebius.py +210 -81
  86. sky/clouds/oci.py +88 -66
  87. sky/clouds/paperspace.py +61 -44
  88. sky/clouds/primeintellect.py +317 -0
  89. sky/clouds/runpod.py +164 -74
  90. sky/clouds/scp.py +89 -86
  91. sky/clouds/seeweb.py +477 -0
  92. sky/clouds/shadeform.py +400 -0
  93. sky/clouds/ssh.py +263 -0
  94. sky/clouds/utils/aws_utils.py +10 -4
  95. sky/clouds/utils/gcp_utils.py +87 -11
  96. sky/clouds/utils/oci_utils.py +38 -14
  97. sky/clouds/utils/scp_utils.py +231 -167
  98. sky/clouds/vast.py +99 -77
  99. sky/clouds/vsphere.py +51 -40
  100. sky/core.py +375 -173
  101. sky/dag.py +15 -0
  102. sky/dashboard/out/404.html +1 -1
  103. sky/dashboard/out/_next/static/96_E2yl3QAiIJGOYCkSpB/_buildManifest.js +1 -0
  104. sky/dashboard/out/_next/static/chunks/1141-e6aa9ab418717c59.js +11 -0
  105. sky/dashboard/out/_next/static/chunks/1272-1ef0bf0237faccdb.js +1 -0
  106. sky/dashboard/out/_next/static/chunks/1871-7e202677c42f43fe.js +6 -0
  107. sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +1 -0
  108. sky/dashboard/out/_next/static/chunks/2350.fab69e61bac57b23.js +1 -0
  109. sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +15 -0
  110. sky/dashboard/out/_next/static/chunks/2755.edd818326d489a1d.js +26 -0
  111. sky/dashboard/out/_next/static/chunks/3294.20a8540fe697d5ee.js +1 -0
  112. sky/dashboard/out/_next/static/chunks/3785.7e245f318f9d1121.js +1 -0
  113. sky/dashboard/out/_next/static/chunks/3800-7b45f9fbb6308557.js +1 -0
  114. sky/dashboard/out/_next/static/chunks/3850-ff4a9a69d978632b.js +1 -0
  115. sky/dashboard/out/_next/static/chunks/3937.210053269f121201.js +1 -0
  116. sky/dashboard/out/_next/static/chunks/4725.172ede95d1b21022.js +1 -0
  117. sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +15 -0
  118. sky/dashboard/out/_next/static/chunks/5739-d67458fcb1386c92.js +8 -0
  119. sky/dashboard/out/_next/static/chunks/6130-2be46d70a38f1e82.js +1 -0
  120. sky/dashboard/out/_next/static/chunks/616-3d59f75e2ccf9321.js +39 -0
  121. sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +13 -0
  122. sky/dashboard/out/_next/static/chunks/6856-8f27d1c10c98def8.js +1 -0
  123. sky/dashboard/out/_next/static/chunks/6989-01359c57e018caa4.js +1 -0
  124. sky/dashboard/out/_next/static/chunks/6990-9146207c4567fdfd.js +1 -0
  125. sky/dashboard/out/_next/static/chunks/7359-c8d04e06886000b3.js +30 -0
  126. sky/dashboard/out/_next/static/chunks/7411-b15471acd2cba716.js +41 -0
  127. sky/dashboard/out/_next/static/chunks/7615-019513abc55b3b47.js +1 -0
  128. sky/dashboard/out/_next/static/chunks/8640.5b9475a2d18c5416.js +16 -0
  129. sky/dashboard/out/_next/static/chunks/8969-452f9d5cbdd2dc73.js +1 -0
  130. sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +6 -0
  131. sky/dashboard/out/_next/static/chunks/9353-cff34f7e773b2e2b.js +1 -0
  132. sky/dashboard/out/_next/static/chunks/9360.a536cf6b1fa42355.js +31 -0
  133. sky/dashboard/out/_next/static/chunks/9847.3aaca6bb33455140.js +30 -0
  134. sky/dashboard/out/_next/static/chunks/fd9d1056-86323a29a8f7e46a.js +1 -0
  135. sky/dashboard/out/_next/static/chunks/framework-cf60a09ccd051a10.js +33 -0
  136. sky/dashboard/out/_next/static/chunks/main-app-587214043926b3cc.js +1 -0
  137. sky/dashboard/out/_next/static/chunks/main-f15ccb73239a3bf1.js +1 -0
  138. sky/dashboard/out/_next/static/chunks/pages/_app-bde01e4a2beec258.js +34 -0
  139. sky/dashboard/out/_next/static/chunks/pages/_error-c66a4e8afc46f17b.js +1 -0
  140. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-792db96d918c98c9.js +16 -0
  141. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-abfcac9c137aa543.js +1 -0
  142. sky/dashboard/out/_next/static/chunks/pages/clusters-ee39056f9851a3ff.js +1 -0
  143. sky/dashboard/out/_next/static/chunks/pages/config-dfb9bf07b13045f4.js +1 -0
  144. sky/dashboard/out/_next/static/chunks/pages/index-444f1804401f04ea.js +1 -0
  145. sky/dashboard/out/_next/static/chunks/pages/infra/[context]-c0b5935149902e6f.js +1 -0
  146. sky/dashboard/out/_next/static/chunks/pages/infra-aed0ea19df7cf961.js +1 -0
  147. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-d66997e2bfc837cf.js +16 -0
  148. sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-9faf940b253e3e06.js +21 -0
  149. sky/dashboard/out/_next/static/chunks/pages/jobs-2072b48b617989c9.js +1 -0
  150. sky/dashboard/out/_next/static/chunks/pages/users-f42674164aa73423.js +1 -0
  151. sky/dashboard/out/_next/static/chunks/pages/volumes-b84b948ff357c43e.js +1 -0
  152. sky/dashboard/out/_next/static/chunks/pages/workspace/new-3f88a1c7e86a3f86.js +1 -0
  153. sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-84a40f8c7c627fe4.js +1 -0
  154. sky/dashboard/out/_next/static/chunks/pages/workspaces-531b2f8c4bf89f82.js +1 -0
  155. sky/dashboard/out/_next/static/chunks/webpack-64e05f17bf2cf8ce.js +1 -0
  156. sky/dashboard/out/_next/static/css/0748ce22df867032.css +3 -0
  157. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  158. sky/dashboard/out/clusters/[cluster].html +1 -1
  159. sky/dashboard/out/clusters.html +1 -1
  160. sky/dashboard/out/config.html +1 -0
  161. sky/dashboard/out/index.html +1 -1
  162. sky/dashboard/out/infra/[context].html +1 -0
  163. sky/dashboard/out/infra.html +1 -0
  164. sky/dashboard/out/jobs/[job].html +1 -1
  165. sky/dashboard/out/jobs/pools/[pool].html +1 -0
  166. sky/dashboard/out/jobs.html +1 -1
  167. sky/dashboard/out/users.html +1 -0
  168. sky/dashboard/out/volumes.html +1 -0
  169. sky/dashboard/out/workspace/new.html +1 -0
  170. sky/dashboard/out/workspaces/[name].html +1 -0
  171. sky/dashboard/out/workspaces.html +1 -0
  172. sky/data/data_utils.py +137 -1
  173. sky/data/mounting_utils.py +269 -84
  174. sky/data/storage.py +1460 -1807
  175. sky/data/storage_utils.py +43 -57
  176. sky/exceptions.py +126 -2
  177. sky/execution.py +216 -63
  178. sky/global_user_state.py +2390 -586
  179. sky/jobs/__init__.py +7 -0
  180. sky/jobs/client/sdk.py +300 -58
  181. sky/jobs/client/sdk_async.py +161 -0
  182. sky/jobs/constants.py +15 -8
  183. sky/jobs/controller.py +848 -275
  184. sky/jobs/file_content_utils.py +128 -0
  185. sky/jobs/log_gc.py +193 -0
  186. sky/jobs/recovery_strategy.py +402 -152
  187. sky/jobs/scheduler.py +314 -189
  188. sky/jobs/server/core.py +836 -255
  189. sky/jobs/server/server.py +156 -115
  190. sky/jobs/server/utils.py +136 -0
  191. sky/jobs/state.py +2109 -706
  192. sky/jobs/utils.py +1306 -215
  193. sky/logs/__init__.py +21 -0
  194. sky/logs/agent.py +108 -0
  195. sky/logs/aws.py +243 -0
  196. sky/logs/gcp.py +91 -0
  197. sky/metrics/__init__.py +0 -0
  198. sky/metrics/utils.py +453 -0
  199. sky/models.py +78 -1
  200. sky/optimizer.py +164 -70
  201. sky/provision/__init__.py +90 -4
  202. sky/provision/aws/config.py +147 -26
  203. sky/provision/aws/instance.py +136 -50
  204. sky/provision/azure/instance.py +11 -6
  205. sky/provision/common.py +13 -1
  206. sky/provision/cudo/cudo_machine_type.py +1 -1
  207. sky/provision/cudo/cudo_utils.py +14 -8
  208. sky/provision/cudo/cudo_wrapper.py +72 -71
  209. sky/provision/cudo/instance.py +10 -6
  210. sky/provision/do/instance.py +10 -6
  211. sky/provision/do/utils.py +4 -3
  212. sky/provision/docker_utils.py +140 -33
  213. sky/provision/fluidstack/instance.py +13 -8
  214. sky/provision/gcp/__init__.py +1 -0
  215. sky/provision/gcp/config.py +301 -19
  216. sky/provision/gcp/constants.py +218 -0
  217. sky/provision/gcp/instance.py +36 -8
  218. sky/provision/gcp/instance_utils.py +18 -4
  219. sky/provision/gcp/volume_utils.py +247 -0
  220. sky/provision/hyperbolic/__init__.py +12 -0
  221. sky/provision/hyperbolic/config.py +10 -0
  222. sky/provision/hyperbolic/instance.py +437 -0
  223. sky/provision/hyperbolic/utils.py +373 -0
  224. sky/provision/instance_setup.py +101 -20
  225. sky/provision/kubernetes/__init__.py +5 -0
  226. sky/provision/kubernetes/config.py +9 -52
  227. sky/provision/kubernetes/constants.py +17 -0
  228. sky/provision/kubernetes/instance.py +919 -280
  229. sky/provision/kubernetes/manifests/fusermount-server-daemonset.yaml +1 -2
  230. sky/provision/kubernetes/network.py +27 -17
  231. sky/provision/kubernetes/network_utils.py +44 -43
  232. sky/provision/kubernetes/utils.py +1221 -534
  233. sky/provision/kubernetes/volume.py +343 -0
  234. sky/provision/lambda_cloud/instance.py +22 -16
  235. sky/provision/nebius/constants.py +50 -0
  236. sky/provision/nebius/instance.py +19 -6
  237. sky/provision/nebius/utils.py +237 -137
  238. sky/provision/oci/instance.py +10 -5
  239. sky/provision/paperspace/instance.py +10 -7
  240. sky/provision/paperspace/utils.py +1 -1
  241. sky/provision/primeintellect/__init__.py +10 -0
  242. sky/provision/primeintellect/config.py +11 -0
  243. sky/provision/primeintellect/instance.py +454 -0
  244. sky/provision/primeintellect/utils.py +398 -0
  245. sky/provision/provisioner.py +117 -36
  246. sky/provision/runpod/__init__.py +5 -0
  247. sky/provision/runpod/instance.py +27 -6
  248. sky/provision/runpod/utils.py +51 -18
  249. sky/provision/runpod/volume.py +214 -0
  250. sky/provision/scp/__init__.py +15 -0
  251. sky/provision/scp/config.py +93 -0
  252. sky/provision/scp/instance.py +707 -0
  253. sky/provision/seeweb/__init__.py +11 -0
  254. sky/provision/seeweb/config.py +13 -0
  255. sky/provision/seeweb/instance.py +812 -0
  256. sky/provision/shadeform/__init__.py +11 -0
  257. sky/provision/shadeform/config.py +12 -0
  258. sky/provision/shadeform/instance.py +351 -0
  259. sky/provision/shadeform/shadeform_utils.py +83 -0
  260. sky/provision/ssh/__init__.py +18 -0
  261. sky/provision/vast/instance.py +13 -8
  262. sky/provision/vast/utils.py +10 -7
  263. sky/provision/volume.py +164 -0
  264. sky/provision/vsphere/common/ssl_helper.py +1 -1
  265. sky/provision/vsphere/common/vapiconnect.py +2 -1
  266. sky/provision/vsphere/common/vim_utils.py +4 -4
  267. sky/provision/vsphere/instance.py +15 -10
  268. sky/provision/vsphere/vsphere_utils.py +17 -20
  269. sky/py.typed +0 -0
  270. sky/resources.py +845 -119
  271. sky/schemas/__init__.py +0 -0
  272. sky/schemas/api/__init__.py +0 -0
  273. sky/schemas/api/responses.py +227 -0
  274. sky/schemas/db/README +4 -0
  275. sky/schemas/db/env.py +90 -0
  276. sky/schemas/db/global_user_state/001_initial_schema.py +124 -0
  277. sky/schemas/db/global_user_state/002_add_workspace_to_cluster_history.py +35 -0
  278. sky/schemas/db/global_user_state/003_fix_initial_revision.py +61 -0
  279. sky/schemas/db/global_user_state/004_is_managed.py +34 -0
  280. sky/schemas/db/global_user_state/005_cluster_event.py +32 -0
  281. sky/schemas/db/global_user_state/006_provision_log.py +41 -0
  282. sky/schemas/db/global_user_state/007_cluster_event_request_id.py +34 -0
  283. sky/schemas/db/global_user_state/008_skylet_ssh_tunnel_metadata.py +34 -0
  284. sky/schemas/db/global_user_state/009_last_activity_and_launched_at.py +89 -0
  285. sky/schemas/db/global_user_state/010_save_ssh_key.py +66 -0
  286. sky/schemas/db/global_user_state/011_is_ephemeral.py +34 -0
  287. sky/schemas/db/kv_cache/001_initial_schema.py +29 -0
  288. sky/schemas/db/script.py.mako +28 -0
  289. sky/schemas/db/serve_state/001_initial_schema.py +67 -0
  290. sky/schemas/db/serve_state/002_yaml_content.py +34 -0
  291. sky/schemas/db/skypilot_config/001_initial_schema.py +30 -0
  292. sky/schemas/db/spot_jobs/001_initial_schema.py +97 -0
  293. sky/schemas/db/spot_jobs/002_cluster_pool.py +42 -0
  294. sky/schemas/db/spot_jobs/003_pool_hash.py +34 -0
  295. sky/schemas/db/spot_jobs/004_job_file_contents.py +42 -0
  296. sky/schemas/db/spot_jobs/005_logs_gc.py +38 -0
  297. sky/schemas/db/spot_jobs/006_controller_pid_started_at.py +34 -0
  298. sky/schemas/db/spot_jobs/007_config_file_content.py +34 -0
  299. sky/schemas/generated/__init__.py +0 -0
  300. sky/schemas/generated/autostopv1_pb2.py +36 -0
  301. sky/schemas/generated/autostopv1_pb2.pyi +43 -0
  302. sky/schemas/generated/autostopv1_pb2_grpc.py +146 -0
  303. sky/schemas/generated/jobsv1_pb2.py +86 -0
  304. sky/schemas/generated/jobsv1_pb2.pyi +254 -0
  305. sky/schemas/generated/jobsv1_pb2_grpc.py +542 -0
  306. sky/schemas/generated/managed_jobsv1_pb2.py +76 -0
  307. sky/schemas/generated/managed_jobsv1_pb2.pyi +278 -0
  308. sky/schemas/generated/managed_jobsv1_pb2_grpc.py +278 -0
  309. sky/schemas/generated/servev1_pb2.py +58 -0
  310. sky/schemas/generated/servev1_pb2.pyi +115 -0
  311. sky/schemas/generated/servev1_pb2_grpc.py +322 -0
  312. sky/serve/autoscalers.py +357 -5
  313. sky/serve/client/impl.py +310 -0
  314. sky/serve/client/sdk.py +47 -139
  315. sky/serve/client/sdk_async.py +130 -0
  316. sky/serve/constants.py +12 -9
  317. sky/serve/controller.py +68 -17
  318. sky/serve/load_balancer.py +106 -60
  319. sky/serve/load_balancing_policies.py +116 -2
  320. sky/serve/replica_managers.py +434 -249
  321. sky/serve/serve_rpc_utils.py +179 -0
  322. sky/serve/serve_state.py +569 -257
  323. sky/serve/serve_utils.py +775 -265
  324. sky/serve/server/core.py +66 -711
  325. sky/serve/server/impl.py +1093 -0
  326. sky/serve/server/server.py +21 -18
  327. sky/serve/service.py +192 -89
  328. sky/serve/service_spec.py +144 -20
  329. sky/serve/spot_placer.py +3 -0
  330. sky/server/auth/__init__.py +0 -0
  331. sky/server/auth/authn.py +50 -0
  332. sky/server/auth/loopback.py +38 -0
  333. sky/server/auth/oauth2_proxy.py +202 -0
  334. sky/server/common.py +478 -182
  335. sky/server/config.py +85 -23
  336. sky/server/constants.py +44 -6
  337. sky/server/daemons.py +295 -0
  338. sky/server/html/token_page.html +185 -0
  339. sky/server/metrics.py +160 -0
  340. sky/server/middleware_utils.py +166 -0
  341. sky/server/requests/executor.py +558 -138
  342. sky/server/requests/payloads.py +364 -24
  343. sky/server/requests/preconditions.py +21 -17
  344. sky/server/requests/process.py +112 -29
  345. sky/server/requests/request_names.py +121 -0
  346. sky/server/requests/requests.py +822 -226
  347. sky/server/requests/serializers/decoders.py +82 -31
  348. sky/server/requests/serializers/encoders.py +140 -22
  349. sky/server/requests/threads.py +117 -0
  350. sky/server/rest.py +455 -0
  351. sky/server/server.py +1309 -285
  352. sky/server/state.py +20 -0
  353. sky/server/stream_utils.py +327 -61
  354. sky/server/uvicorn.py +217 -3
  355. sky/server/versions.py +270 -0
  356. sky/setup_files/MANIFEST.in +11 -1
  357. sky/setup_files/alembic.ini +160 -0
  358. sky/setup_files/dependencies.py +139 -31
  359. sky/setup_files/setup.py +44 -42
  360. sky/sky_logging.py +114 -7
  361. sky/skylet/attempt_skylet.py +106 -24
  362. sky/skylet/autostop_lib.py +129 -8
  363. sky/skylet/configs.py +29 -20
  364. sky/skylet/constants.py +216 -25
  365. sky/skylet/events.py +101 -21
  366. sky/skylet/job_lib.py +345 -164
  367. sky/skylet/log_lib.py +297 -18
  368. sky/skylet/log_lib.pyi +44 -1
  369. sky/skylet/providers/ibm/node_provider.py +12 -8
  370. sky/skylet/providers/ibm/vpc_provider.py +13 -12
  371. sky/skylet/ray_patches/__init__.py +17 -3
  372. sky/skylet/ray_patches/autoscaler.py.diff +18 -0
  373. sky/skylet/ray_patches/cli.py.diff +19 -0
  374. sky/skylet/ray_patches/command_runner.py.diff +17 -0
  375. sky/skylet/ray_patches/log_monitor.py.diff +20 -0
  376. sky/skylet/ray_patches/resource_demand_scheduler.py.diff +32 -0
  377. sky/skylet/ray_patches/updater.py.diff +18 -0
  378. sky/skylet/ray_patches/worker.py.diff +41 -0
  379. sky/skylet/runtime_utils.py +21 -0
  380. sky/skylet/services.py +568 -0
  381. sky/skylet/skylet.py +72 -4
  382. sky/skylet/subprocess_daemon.py +104 -29
  383. sky/skypilot_config.py +506 -99
  384. sky/ssh_node_pools/__init__.py +1 -0
  385. sky/ssh_node_pools/core.py +135 -0
  386. sky/ssh_node_pools/server.py +233 -0
  387. sky/task.py +685 -163
  388. sky/templates/aws-ray.yml.j2 +11 -3
  389. sky/templates/azure-ray.yml.j2 +2 -1
  390. sky/templates/cudo-ray.yml.j2 +1 -0
  391. sky/templates/do-ray.yml.j2 +2 -1
  392. sky/templates/fluidstack-ray.yml.j2 +1 -0
  393. sky/templates/gcp-ray.yml.j2 +62 -1
  394. sky/templates/hyperbolic-ray.yml.j2 +68 -0
  395. sky/templates/ibm-ray.yml.j2 +2 -1
  396. sky/templates/jobs-controller.yaml.j2 +27 -24
  397. sky/templates/kubernetes-loadbalancer.yml.j2 +2 -0
  398. sky/templates/kubernetes-ray.yml.j2 +611 -50
  399. sky/templates/lambda-ray.yml.j2 +2 -1
  400. sky/templates/nebius-ray.yml.j2 +34 -12
  401. sky/templates/oci-ray.yml.j2 +1 -0
  402. sky/templates/paperspace-ray.yml.j2 +2 -1
  403. sky/templates/primeintellect-ray.yml.j2 +72 -0
  404. sky/templates/runpod-ray.yml.j2 +10 -1
  405. sky/templates/scp-ray.yml.j2 +4 -50
  406. sky/templates/seeweb-ray.yml.j2 +171 -0
  407. sky/templates/shadeform-ray.yml.j2 +73 -0
  408. sky/templates/sky-serve-controller.yaml.j2 +22 -2
  409. sky/templates/vast-ray.yml.j2 +1 -0
  410. sky/templates/vsphere-ray.yml.j2 +1 -0
  411. sky/templates/websocket_proxy.py +212 -37
  412. sky/usage/usage_lib.py +31 -15
  413. sky/users/__init__.py +0 -0
  414. sky/users/model.conf +15 -0
  415. sky/users/permission.py +397 -0
  416. sky/users/rbac.py +121 -0
  417. sky/users/server.py +720 -0
  418. sky/users/token_service.py +218 -0
  419. sky/utils/accelerator_registry.py +35 -5
  420. sky/utils/admin_policy_utils.py +84 -38
  421. sky/utils/annotations.py +38 -5
  422. sky/utils/asyncio_utils.py +78 -0
  423. sky/utils/atomic.py +1 -1
  424. sky/utils/auth_utils.py +153 -0
  425. sky/utils/benchmark_utils.py +60 -0
  426. sky/utils/cli_utils/status_utils.py +159 -86
  427. sky/utils/cluster_utils.py +31 -9
  428. sky/utils/command_runner.py +354 -68
  429. sky/utils/command_runner.pyi +93 -3
  430. sky/utils/common.py +35 -8
  431. sky/utils/common_utils.py +314 -91
  432. sky/utils/config_utils.py +74 -5
  433. sky/utils/context.py +403 -0
  434. sky/utils/context_utils.py +242 -0
  435. sky/utils/controller_utils.py +383 -89
  436. sky/utils/dag_utils.py +31 -12
  437. sky/utils/db/__init__.py +0 -0
  438. sky/utils/db/db_utils.py +485 -0
  439. sky/utils/db/kv_cache.py +149 -0
  440. sky/utils/db/migration_utils.py +137 -0
  441. sky/utils/directory_utils.py +12 -0
  442. sky/utils/env_options.py +13 -0
  443. sky/utils/git.py +567 -0
  444. sky/utils/git_clone.sh +460 -0
  445. sky/utils/infra_utils.py +195 -0
  446. sky/utils/kubernetes/cleanup-tunnel.sh +62 -0
  447. sky/utils/kubernetes/config_map_utils.py +133 -0
  448. sky/utils/kubernetes/create_cluster.sh +15 -29
  449. sky/utils/kubernetes/delete_cluster.sh +10 -7
  450. sky/utils/kubernetes/deploy_ssh_node_pools.py +1177 -0
  451. sky/utils/kubernetes/exec_kubeconfig_converter.py +22 -31
  452. sky/utils/kubernetes/generate_kind_config.py +6 -66
  453. sky/utils/kubernetes/generate_kubeconfig.sh +4 -1
  454. sky/utils/kubernetes/gpu_labeler.py +18 -8
  455. sky/utils/kubernetes/k8s_gpu_labeler_job.yaml +2 -1
  456. sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml +16 -16
  457. sky/utils/kubernetes/kubernetes_deploy_utils.py +284 -114
  458. sky/utils/kubernetes/rsync_helper.sh +11 -3
  459. sky/utils/kubernetes/ssh-tunnel.sh +379 -0
  460. sky/utils/kubernetes/ssh_utils.py +221 -0
  461. sky/utils/kubernetes_enums.py +8 -15
  462. sky/utils/lock_events.py +94 -0
  463. sky/utils/locks.py +416 -0
  464. sky/utils/log_utils.py +82 -107
  465. sky/utils/perf_utils.py +22 -0
  466. sky/utils/resource_checker.py +298 -0
  467. sky/utils/resources_utils.py +249 -32
  468. sky/utils/rich_utils.py +217 -39
  469. sky/utils/schemas.py +955 -160
  470. sky/utils/serialize_utils.py +16 -0
  471. sky/utils/status_lib.py +10 -0
  472. sky/utils/subprocess_utils.py +29 -15
  473. sky/utils/tempstore.py +70 -0
  474. sky/utils/thread_utils.py +91 -0
  475. sky/utils/timeline.py +26 -53
  476. sky/utils/ux_utils.py +84 -15
  477. sky/utils/validator.py +11 -1
  478. sky/utils/volume.py +165 -0
  479. sky/utils/yaml_utils.py +111 -0
  480. sky/volumes/__init__.py +13 -0
  481. sky/volumes/client/__init__.py +0 -0
  482. sky/volumes/client/sdk.py +150 -0
  483. sky/volumes/server/__init__.py +0 -0
  484. sky/volumes/server/core.py +270 -0
  485. sky/volumes/server/server.py +124 -0
  486. sky/volumes/volume.py +215 -0
  487. sky/workspaces/__init__.py +0 -0
  488. sky/workspaces/core.py +655 -0
  489. sky/workspaces/server.py +101 -0
  490. sky/workspaces/utils.py +56 -0
  491. sky_templates/README.md +3 -0
  492. sky_templates/__init__.py +3 -0
  493. sky_templates/ray/__init__.py +0 -0
  494. sky_templates/ray/start_cluster +183 -0
  495. sky_templates/ray/stop_cluster +75 -0
  496. skypilot_nightly-1.0.0.dev20251203.dist-info/METADATA +676 -0
  497. skypilot_nightly-1.0.0.dev20251203.dist-info/RECORD +611 -0
  498. {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/WHEEL +1 -1
  499. skypilot_nightly-1.0.0.dev20251203.dist-info/top_level.txt +2 -0
  500. sky/benchmark/benchmark_state.py +0 -256
  501. sky/benchmark/benchmark_utils.py +0 -641
  502. sky/clouds/service_catalog/constants.py +0 -7
  503. sky/dashboard/out/_next/static/GWvVBSCS7FmUiVmjaL1a7/_buildManifest.js +0 -1
  504. sky/dashboard/out/_next/static/chunks/236-2db3ee3fba33dd9e.js +0 -6
  505. sky/dashboard/out/_next/static/chunks/312-c3c8845990db8ffc.js +0 -15
  506. sky/dashboard/out/_next/static/chunks/37-0a572fe0dbb89c4d.js +0 -6
  507. sky/dashboard/out/_next/static/chunks/678-206dddca808e6d16.js +0 -59
  508. sky/dashboard/out/_next/static/chunks/845-9e60713e0c441abc.js +0 -1
  509. sky/dashboard/out/_next/static/chunks/979-7bf73a4c7cea0f5c.js +0 -1
  510. sky/dashboard/out/_next/static/chunks/fd9d1056-2821b0f0cabcd8bd.js +0 -1
  511. sky/dashboard/out/_next/static/chunks/framework-87d061ee6ed71b28.js +0 -33
  512. sky/dashboard/out/_next/static/chunks/main-app-241eb28595532291.js +0 -1
  513. sky/dashboard/out/_next/static/chunks/main-e0e2335212e72357.js +0 -1
  514. sky/dashboard/out/_next/static/chunks/pages/_app-e6b013bc3f77ad60.js +0 -1
  515. sky/dashboard/out/_next/static/chunks/pages/_error-1be831200e60c5c0.js +0 -1
  516. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-6ac338bc2239cb45.js +0 -1
  517. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-f383db7389368ea7.js +0 -1
  518. sky/dashboard/out/_next/static/chunks/pages/clusters-a93b93e10b8b074e.js +0 -1
  519. sky/dashboard/out/_next/static/chunks/pages/index-f9f039532ca8cbc4.js +0 -1
  520. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-1c519e1afc523dc9.js +0 -1
  521. sky/dashboard/out/_next/static/chunks/pages/jobs-a75029b67aab6a2e.js +0 -1
  522. sky/dashboard/out/_next/static/chunks/webpack-830f59b8404e96b8.js +0 -1
  523. sky/dashboard/out/_next/static/css/c6933bbb2ce7f4dd.css +0 -3
  524. sky/jobs/dashboard/dashboard.py +0 -223
  525. sky/jobs/dashboard/static/favicon.ico +0 -0
  526. sky/jobs/dashboard/templates/index.html +0 -831
  527. sky/jobs/server/dashboard_utils.py +0 -69
  528. sky/skylet/providers/scp/__init__.py +0 -2
  529. sky/skylet/providers/scp/config.py +0 -149
  530. sky/skylet/providers/scp/node_provider.py +0 -578
  531. sky/templates/kubernetes-ssh-jump.yml.j2 +0 -94
  532. sky/utils/db_utils.py +0 -100
  533. sky/utils/kubernetes/deploy_remote_cluster.sh +0 -308
  534. sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +0 -191
  535. skypilot_nightly-1.0.0.dev20250502.dist-info/METADATA +0 -361
  536. skypilot_nightly-1.0.0.dev20250502.dist-info/RECORD +0 -396
  537. skypilot_nightly-1.0.0.dev20250502.dist-info/top_level.txt +0 -1
  538. /sky/{clouds/service_catalog → catalog}/config.py +0 -0
  539. /sky/{benchmark → catalog/data_fetchers}/__init__.py +0 -0
  540. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_azure.py +0 -0
  541. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_fluidstack.py +0 -0
  542. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_ibm.py +0 -0
  543. /sky/{clouds/service_catalog/data_fetchers → client/cli}/__init__.py +0 -0
  544. /sky/dashboard/out/_next/static/{GWvVBSCS7FmUiVmjaL1a7 → 96_E2yl3QAiIJGOYCkSpB}/_ssgManifest.js +0 -0
  545. {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/entry_points.txt +0 -0
  546. {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/licenses/LICENSE +0 -0
sky/__init__.py CHANGED
@@ -4,8 +4,10 @@ import subprocess
4
4
  from typing import Optional
5
5
  import urllib.request
6
6
 
7
+ from sky.utils import directory_utils
8
+
7
9
  # Replaced with the current commit when building the wheels.
8
- _SKYPILOT_COMMIT_SHA = '85c4b6b96e1302c7d6886312beb9a34838c35b65'
10
+ _SKYPILOT_COMMIT_SHA = '3ff39aba6d4752d5c3b09e3fa7d778cefea39370'
9
11
 
10
12
 
11
13
  def _get_git_commit():
@@ -35,8 +37,8 @@ def _get_git_commit():
35
37
 
36
38
 
37
39
  __commit__ = _get_git_commit()
38
- __version__ = '1.0.0.dev20250502'
39
- __root_dir__ = os.path.dirname(os.path.abspath(__file__))
40
+ __version__ = '1.0.0.dev20251203'
41
+ __root_dir__ = directory_utils.get_sky_dir()
40
42
 
41
43
 
42
44
  # ---------------------- Proxy Configuration ---------------------- #
@@ -81,13 +83,14 @@ _set_http_proxy_env_vars()
81
83
  # Keep this order to avoid cyclic imports
82
84
  # pylint: disable=wrong-import-position
83
85
  from sky import backends
84
- from sky import benchmark
85
86
  from sky import clouds
86
87
  from sky.admin_policy import AdminPolicy
87
88
  from sky.admin_policy import MutatedUserRequest
88
89
  from sky.admin_policy import UserRequest
90
+ from sky.catalog import list_accelerators
89
91
  from sky.client.sdk import api_cancel
90
92
  from sky.client.sdk import api_info
93
+ from sky.client.sdk import api_login
91
94
  from sky.client.sdk import api_server_logs
92
95
  from sky.client.sdk import api_start
93
96
  from sky.client.sdk import api_status
@@ -97,12 +100,14 @@ from sky.client.sdk import cancel
97
100
  from sky.client.sdk import cost_report
98
101
  from sky.client.sdk import down
99
102
  from sky.client.sdk import download_logs
103
+ from sky.client.sdk import endpoints
100
104
  from sky.client.sdk import exec # pylint: disable=redefined-builtin
101
105
  from sky.client.sdk import get
102
106
  from sky.client.sdk import job_status
103
107
  from sky.client.sdk import launch
104
108
  from sky.client.sdk import optimize
105
109
  from sky.client.sdk import queue
110
+ from sky.client.sdk import reload_config
106
111
  from sky.client.sdk import start
107
112
  from sky.client.sdk import status
108
113
  from sky.client.sdk import stop
@@ -110,7 +115,6 @@ from sky.client.sdk import storage_delete
110
115
  from sky.client.sdk import storage_ls
111
116
  from sky.client.sdk import stream_and_get
112
117
  from sky.client.sdk import tail_logs
113
- from sky.clouds.service_catalog import list_accelerators
114
118
  from sky.dag import Dag
115
119
  from sky.data import Storage
116
120
  from sky.data import StorageMode
@@ -118,6 +122,7 @@ from sky.data import StoreType
118
122
  from sky.jobs import ManagedJobStatus
119
123
  from sky.optimizer import Optimizer
120
124
  from sky.resources import Resources
125
+ from sky.server.requests.request_names import AdminPolicyRequestName
121
126
  from sky.skylet.job_lib import JobStatus
122
127
  from sky.task import Task
123
128
  from sky.utils.common import OptimizeTarget
@@ -139,11 +144,15 @@ Kubernetes = clouds.Kubernetes
139
144
  K8s = Kubernetes
140
145
  OCI = clouds.OCI
141
146
  Paperspace = clouds.Paperspace
147
+ PrimeIntellect = clouds.PrimeIntellect
142
148
  RunPod = clouds.RunPod
143
149
  Vast = clouds.Vast
144
150
  Vsphere = clouds.Vsphere
145
151
  Fluidstack = clouds.Fluidstack
146
152
  Nebius = clouds.Nebius
153
+ Hyperbolic = clouds.Hyperbolic
154
+ Shadeform = clouds.Shadeform
155
+ Seeweb = clouds.Seeweb
147
156
 
148
157
  __all__ = [
149
158
  '__version__',
@@ -157,16 +166,19 @@ __all__ = [
157
166
  'Lambda',
158
167
  'OCI',
159
168
  'Paperspace',
169
+ 'PrimeIntellect',
160
170
  'RunPod',
161
171
  'Vast',
162
172
  'SCP',
163
173
  'Vsphere',
164
174
  'Fluidstack',
165
175
  'Nebius',
176
+ 'Hyperbolic',
177
+ 'Shadeform',
178
+ 'Seeweb',
166
179
  'Optimizer',
167
180
  'OptimizeTarget',
168
181
  'backends',
169
- 'benchmark',
170
182
  'list_accelerators',
171
183
  '__root_dir__',
172
184
  'Storage',
@@ -184,6 +196,7 @@ __all__ = [
184
196
  'optimize',
185
197
  'launch',
186
198
  'exec',
199
+ 'reload_config',
187
200
  # core APIs
188
201
  'status',
189
202
  'start',
@@ -191,6 +204,7 @@ __all__ = [
191
204
  'down',
192
205
  'autostop',
193
206
  'cost_report',
207
+ 'endpoints',
194
208
  # core APIs Job Management
195
209
  'queue',
196
210
  'cancel',
@@ -206,6 +220,7 @@ __all__ = [
206
220
  'api_status',
207
221
  'api_cancel',
208
222
  'api_info',
223
+ 'api_login',
209
224
  'api_start',
210
225
  'api_stop',
211
226
  'api_server_logs',
@@ -214,6 +229,7 @@ __all__ = [
214
229
  'MutatedUserRequest',
215
230
  'AdminPolicy',
216
231
  'Config',
232
+ 'AdminPolicyRequestName',
217
233
  # Registry
218
234
  'CLOUD_REGISTRY',
219
235
  'JOBS_RECOVERY_STRATEGY_REGISTRY',
sky/adaptors/aws.py CHANGED
@@ -28,12 +28,14 @@ This is informed by the following boto3 docs:
28
28
 
29
29
  # pylint: disable=import-outside-toplevel
30
30
 
31
+ import functools
31
32
  import logging
32
33
  import threading
33
34
  import time
34
35
  import typing
35
36
  from typing import Callable, Literal, Optional, TypeVar
36
37
 
38
+ from sky import skypilot_config
37
39
  from sky.adaptors import common
38
40
  from sky.utils import annotations
39
41
  from sky.utils import common_utils
@@ -67,17 +69,63 @@ version = 1
67
69
  _MAX_ATTEMPT_FOR_CREATION = 5
68
70
 
69
71
 
70
- class _ThreadLocalLRUCache(threading.local):
72
+ class _ThreadLocalTTLCache(threading.local):
73
+ """Thread-local storage for _thread_local_lru_cache decorator."""
71
74
 
72
- def __init__(self, maxsize=32):
75
+ def __init__(self, func, maxsize: int, ttl: int):
73
76
  super().__init__()
74
- self.cache = annotations.lru_cache(scope='request', maxsize=maxsize)
77
+ self.func = func
78
+ self.maxsize = maxsize
79
+ self.ttl = ttl
75
80
 
81
+ def get_cache(self):
82
+ if not hasattr(self, 'cache'):
83
+ self.cache = annotations.ttl_cache(scope='request',
84
+ maxsize=self.maxsize,
85
+ ttl=self.ttl,
86
+ timer=time.time)(self.func)
87
+ return self.cache
76
88
 
77
- def _thread_local_lru_cache(maxsize=32):
78
- # Create thread-local storage for the LRU cache
79
- local_cache = _ThreadLocalLRUCache(maxsize)
80
- return local_cache.cache
89
+
90
+ def _thread_local_ttl_cache(maxsize=32, ttl=60 * 55):
91
+ """Thread-local TTL cache decorator.
92
+
93
+ Args:
94
+ maxsize: Maximum size of the cache.
95
+ ttl: Time to live for the cache in seconds.
96
+ Default is 55 minutes, a bit less than 1 hour
97
+ default lifetime of an STS token.
98
+ """
99
+
100
+ def decorator(func):
101
+ # Create thread-local storage for the LRU cache
102
+ local_cache = _ThreadLocalTTLCache(func, maxsize, ttl)
103
+
104
+ # We can't apply the lru_cache here, because this runs at import time
105
+ # so we will always have the main thread's cache.
106
+
107
+ @functools.wraps(func)
108
+ def wrapper(*args, **kwargs):
109
+ # We are within the actual function call, which may be on a thread,
110
+ # so local_cache.cache will return the correct thread-local cache,
111
+ # which we can now apply and immediately call.
112
+ return local_cache.get_cache()(*args, **kwargs)
113
+
114
+ def cache_info():
115
+ # Note that this will only give the cache info for the current
116
+ # thread's cache.
117
+ return local_cache.get_cache().cache_info()
118
+
119
+ def cache_clear():
120
+ # Note that this will only clear the cache for the current thread.
121
+ local_cache.get_cache().cache_clear()
122
+
123
+ wrapper.cache_info = cache_info # type: ignore[attr-defined]
124
+ wrapper.cache_clear = cache_clear # type: ignore[attr-defined]
125
+
126
+ return wrapper
127
+
128
+ return decorator
81
129
 
82
130
 
83
131
  def _assert_kwargs_builtin_type(kwargs):
@@ -119,12 +167,27 @@ def _create_aws_object(creation_fn_or_cls: Callable[[], T],
119
167
  f'{common_utils.format_exception(e)}.')
120
168
 
121
169
 
122
- # The LRU cache needs to be thread-local to avoid multiple threads sharing the
170
+ def get_workspace_profile() -> Optional[str]:
171
+ """Get AWS profile name from workspace config."""
172
+ return skypilot_config.get_workspace_cloud('aws').get('profile', None)
173
+
174
+
175
+ # The TTL cache needs to be thread-local to avoid multiple threads sharing the
123
176
  # same session object, which is not guaranteed to be thread-safe.
124
- @_thread_local_lru_cache()
125
- def session(check_credentials: bool = True):
126
- """Create an AWS session."""
127
- s = _create_aws_object(boto3.session.Session, 'session')
177
+ @_thread_local_ttl_cache()
178
+ def session(check_credentials: bool = True, profile: Optional[str] = None):
179
+ """Create an AWS session.
180
+
181
+ Args:
182
+ check_credentials: Whether to check if credentials are available.
183
+ profile: AWS profile name to use. If None, uses default credentials.
184
+ """
185
+ if profile is not None:
186
+ logger.debug(f'Using AWS profile \'{profile}\'.')
187
+ s = _create_aws_object(
188
+ lambda: boto3.session.Session(profile_name=profile), 'session')
189
+ else:
190
+ s = _create_aws_object(boto3.session.Session, 'session')
128
191
  if check_credentials and s.get_credentials() is None:
129
192
  # s.get_credentials() can be None if there are actually no credentials,
130
193
  # or if we fail to get credentials from IMDS (e.g. due to throttling).
@@ -180,13 +243,14 @@ def resource(service_name: str, **kwargs):
180
243
  kwargs['config'] = config
181
244
 
182
245
  check_credentials = kwargs.pop('check_credentials', True)
246
+ profile = get_workspace_profile()
183
247
 
184
248
  # Need to use the client retrieved from the per-thread session to avoid
185
249
  # thread-safety issues (Directly creating the client with boto3.resource()
186
250
  # is not thread-safe). Reference: https://stackoverflow.com/a/59635814
187
251
  return _create_aws_object(
188
- lambda: session(check_credentials=check_credentials).resource(
189
- service_name, **kwargs), 'resource')
252
+ lambda: session(check_credentials=check_credentials, profile=profile).
253
+ resource(service_name, **kwargs), 'resource')
190
254
 
191
255
 
192
256
  # New typing overloads can be added as needed.
@@ -221,14 +285,15 @@ def client(service_name: str, **kwargs):
221
285
  _assert_kwargs_builtin_type(kwargs)
222
286
 
223
287
  check_credentials = kwargs.pop('check_credentials', True)
288
+ profile = get_workspace_profile()
224
289
 
225
290
  # Need to use the client retrieved from the per-thread session to avoid
226
291
  # thread-safety issues (Directly creating the client with boto3.client() is
227
292
  # not thread-safe). Reference: https://stackoverflow.com/a/59635814
228
293
 
229
294
  return _create_aws_object(
230
- lambda: session(check_credentials=check_credentials).client(
231
- service_name, **kwargs), 'client')
295
+ lambda: session(check_credentials=check_credentials, profile=profile).
296
+ client(service_name, **kwargs), 'client')
232
297
 
233
298
 
234
299
  @common.load_lazy_modules(modules=_LAZY_MODULES)
sky/adaptors/common.py CHANGED
@@ -1,9 +1,10 @@
1
1
  """Lazy import for modules to avoid import error when not used."""
2
+ from importlib import util as importlib_util
2
3
  import functools
3
4
  import importlib
4
5
  import threading
5
6
  import types
6
- from typing import Any, Callable, Optional, Tuple
7
+ from typing import Any, Callable, List, Optional, Tuple
7
8
 
8
9
 
9
10
  class LazyImport(types.ModuleType):
@@ -27,7 +28,7 @@ class LazyImport(types.ModuleType):
27
28
  import_error_message: Optional[str] = None,
28
29
  set_loggers: Optional[Callable] = None):
29
30
  self._module_name = module_name
30
- self._module = None
31
+ self._module: Optional[types.ModuleType] = None
31
32
  self._import_error_message = import_error_message
32
33
  self._set_loggers = set_loggers
33
34
  self._lock = threading.RLock()
@@ -78,3 +79,25 @@ def load_lazy_modules(modules: Tuple[LazyImport, ...]):
78
79
  return wrapper
79
80
 
80
81
  return decorator
82
+
83
+
84
+ def can_import_modules(module_names: List[str]) -> bool:
85
+ """ module availability without actually importing it to
86
+ save memory footprint.
87
+
88
+ Args:
89
+ module_names: List[str], the names of the modules to check.
90
+
91
+ Returns:
92
+ True if all modules are available, False otherwise.
93
+ If a module exists in sys.modules, but is set to None,
94
+ then it is considered as not available.
95
+ """
96
+ try:
97
+ for module_name in module_names:
98
+ module_spec = importlib_util.find_spec(module_name)
99
+ if module_spec is None:
100
+ return False
101
+ return True
102
+ except ValueError:
103
+ return False
@@ -0,0 +1,278 @@
1
+ """CoreWeave cloud adaptor."""
2
+
3
+ import configparser
4
+ import contextlib
5
+ import os
6
+ import threading
7
+ from typing import Dict, Optional, Tuple
8
+
9
+ from sky import exceptions
10
+ from sky import sky_logging
11
+ from sky.adaptors import common
12
+ from sky.clouds import cloud
13
+ from sky.utils import annotations
14
+ from sky.utils import ux_utils
15
+
16
+ logger = sky_logging.init_logger(__name__)
17
+
18
+ COREWEAVE_PROFILE_NAME = 'cw'
19
+ COREWEAVE_CREDENTIALS_PATH = '~/.coreweave/cw.credentials'
20
+ COREWEAVE_CONFIG_PATH = '~/.coreweave/cw.config'
21
+ NAME = 'CoreWeave'
22
+ DEFAULT_REGION = 'US-EAST-01A'
23
+ _DEFAULT_ENDPOINT = 'https://cwobject.com'
24
+ _INDENT_PREFIX = ' '
25
+
26
+ _IMPORT_ERROR_MESSAGE = ('Failed to import dependencies for CoreWeave.'
27
+ 'Try pip install "skypilot[coreweave]"')
28
+
29
+ boto3 = common.LazyImport('boto3', import_error_message=_IMPORT_ERROR_MESSAGE)
30
+ botocore = common.LazyImport('botocore',
31
+ import_error_message=_IMPORT_ERROR_MESSAGE)
32
+
33
+ _LAZY_MODULES = (boto3, botocore)
34
+ _session_creation_lock = threading.RLock()
35
+
36
+
37
+ @contextlib.contextmanager
38
+ def _load_cw_credentials_env():
39
+ """Context manager to temporarily change the AWS credentials file path."""
40
+ prev_credentials_path = os.environ.get('AWS_SHARED_CREDENTIALS_FILE')
41
+ prev_config_path = os.environ.get('AWS_CONFIG_FILE')
42
+ os.environ['AWS_SHARED_CREDENTIALS_FILE'] = COREWEAVE_CREDENTIALS_PATH
43
+ os.environ['AWS_CONFIG_FILE'] = COREWEAVE_CONFIG_PATH
44
+ try:
45
+ yield
46
+ finally:
47
+ if prev_credentials_path is None:
48
+ del os.environ['AWS_SHARED_CREDENTIALS_FILE']
49
+ else:
50
+ os.environ['AWS_SHARED_CREDENTIALS_FILE'] = prev_credentials_path
51
+ if prev_config_path is None:
52
+ del os.environ['AWS_CONFIG_FILE']
53
+ else:
54
+ os.environ['AWS_CONFIG_FILE'] = prev_config_path
55
+
56
+
57
+ def get_coreweave_credentials(boto3_session):
58
+ """Gets the CoreWeave credentials from the boto3 session object.
59
+
60
+ Args:
61
+ boto3_session: The boto3 session object.
62
+ Returns:
63
+ botocore.credentials.ReadOnlyCredentials object with the CoreWeave
64
+ credentials.
65
+ """
66
+ with _load_cw_credentials_env():
67
+ coreweave_credentials = boto3_session.get_credentials()
68
+ if coreweave_credentials is None:
69
+ with ux_utils.print_exception_no_traceback():
70
+ raise ValueError('CoreWeave credentials not found. Run '
71
+ '`sky check` to verify credentials are '
72
+ 'correctly set up.')
73
+ return coreweave_credentials.get_frozen_credentials()
74
+
75
+
76
+ @annotations.lru_cache(scope='global')
77
+ def session():
78
+ """Create an AWS session for CoreWeave."""
79
+ # Creating the session object is not thread-safe for boto3,
80
+ # so we add a reentrant lock to synchronize the session creation.
81
+ # Reference: https://github.com/boto/boto3/issues/1592
82
+ # However, the session object itself is thread-safe, so we are
83
+ # able to use lru_cache() to cache the session object.
84
+ with _session_creation_lock:
85
+ with _load_cw_credentials_env():
86
+ session_ = boto3.session.Session(
87
+ profile_name=COREWEAVE_PROFILE_NAME)
88
+ return session_
89
+
90
+
91
+ @annotations.lru_cache(scope='global')
92
+ def resource(resource_name: str, **kwargs):
93
+ """Create a CoreWeave resource.
94
+
95
+ Args:
96
+ resource_name: CoreWeave resource name (e.g., 's3').
97
+ kwargs: Other options.
98
+ """
99
+ # Need to use the resource retrieved from the per-thread session
100
+ # to avoid thread-safety issues (Directly creating the client
101
+ # with boto3.resource() is not thread-safe).
102
+ # Reference: https://stackoverflow.com/a/59635814
103
+
104
+ session_ = session()
105
+ coreweave_credentials = get_coreweave_credentials(session_)
106
+ endpoint = get_endpoint()
107
+
108
+ return session_.resource(
109
+ resource_name,
110
+ endpoint_url=endpoint,
111
+ aws_access_key_id=coreweave_credentials.access_key,
112
+ aws_secret_access_key=coreweave_credentials.secret_key,
113
+ region_name='auto',
114
+ config=botocore.config.Config(s3={'addressing_style': 'virtual'}),
115
+ **kwargs)
116
+
117
+
118
+ @annotations.lru_cache(scope='global')
119
+ def client(service_name: str):
120
+ """Create CoreWeave client of a certain service.
121
+
122
+ Args:
123
+ service_name: CoreWeave service name (e.g., 's3').
124
+ """
125
+ # Need to use the client retrieved from the per-thread session
126
+ # to avoid thread-safety issues (Directly creating the client
127
+ # with boto3.client() is not thread-safe).
128
+ # Reference: https://stackoverflow.com/a/59635814
129
+
130
+ session_ = session()
131
+ coreweave_credentials = get_coreweave_credentials(session_)
132
+ endpoint = get_endpoint()
133
+
134
+ return session_.client(
135
+ service_name,
136
+ endpoint_url=endpoint,
137
+ aws_access_key_id=coreweave_credentials.access_key,
138
+ aws_secret_access_key=coreweave_credentials.secret_key,
139
+ region_name='auto',
140
+ config=botocore.config.Config(s3={'addressing_style': 'virtual'}),
141
+ )
142
+
143
+
144
+ @common.load_lazy_modules(_LAZY_MODULES)
145
+ def botocore_exceptions():
146
+ """AWS botocore exception."""
147
+ # pylint: disable=import-outside-toplevel
148
+ from botocore import exceptions as boto_exceptions
149
+ return boto_exceptions
150
+
151
+
152
+ def get_endpoint():
153
+ """Parse the COREWEAVE_CONFIG_PATH to get the endpoint_url.
154
+
155
+ The config file is an AWS-style config file with format:
156
+ [profile cw]
157
+ endpoint_url = https://cwobject.com
158
+ s3 =
159
+ addressing_style = virtual
160
+
161
+ Returns:
162
+ str: The endpoint URL from the config file, or the default endpoint
163
+ if the file doesn't exist or doesn't contain the endpoint_url.
164
+ """
165
+ config_path = os.path.expanduser(COREWEAVE_CONFIG_PATH)
166
+ if not os.path.isfile(config_path):
167
+ return _DEFAULT_ENDPOINT
168
+
169
+ try:
170
+ config = configparser.ConfigParser()
171
+ config.read(config_path)
172
+
173
+ # Try to get endpoint_url from [profile cw] section
174
+ profile_section = f'profile {COREWEAVE_PROFILE_NAME}'
175
+ if config.has_section(profile_section):
176
+ if config.has_option(profile_section, 'endpoint_url'):
177
+ endpoint = config.get(profile_section, 'endpoint_url')
178
+ return endpoint.strip()
179
+ except (configparser.Error, OSError) as e:
180
+ logger.warning(f'Failed to parse CoreWeave config file: {e}. '
181
+ f'Using default endpoint: {_DEFAULT_ENDPOINT}')
182
+
183
+ return _DEFAULT_ENDPOINT
184
+
185
+
186
+ def check_credentials(
187
+ cloud_capability: cloud.CloudCapability) -> Tuple[bool, Optional[str]]:
188
+ if cloud_capability == cloud.CloudCapability.STORAGE:
189
+ return check_storage_credentials()
190
+ else:
191
+ raise exceptions.NotSupportedError(
192
+ f'{NAME} does not support {cloud_capability}.')
193
+
194
+
195
+ def check_storage_credentials() -> Tuple[bool, Optional[str]]:
196
+ """Checks if the user has access credentials to CoreWeave Object Storage.
197
+
198
+ Returns:
199
+ A tuple of a boolean value and a hint message where the bool
200
+ is True when both credentials needed for CoreWeave storage is set.
201
+ It is False when either of those are not set, which would hint with a
202
+ string on unset credential.
203
+ """
204
+ hints = None
205
+ profile_in_cred = coreweave_profile_in_cred()
206
+ profile_in_config = coreweave_profile_in_config()
207
+
208
+ if not profile_in_cred:
209
+ hints = (f'[{COREWEAVE_PROFILE_NAME}] profile is not set in '
210
+ f'{COREWEAVE_CREDENTIALS_PATH}.')
211
+ if not profile_in_config:
212
+ if hints:
213
+ hints += ' Additionally, '
214
+ else:
215
+ hints = ''
216
+ hints += (f'[{COREWEAVE_PROFILE_NAME}] profile is not set in '
217
+ f'{COREWEAVE_CONFIG_PATH}.')
218
+
219
+ if hints:
220
+ hints += ' Run the following commands:'
221
+ if not profile_in_cred:
222
+ hints += f'\n{_INDENT_PREFIX} $ pip install boto3'
223
+ hints += (f'\n{_INDENT_PREFIX} $ AWS_SHARED_CREDENTIALS_FILE='
224
+ f'{COREWEAVE_CREDENTIALS_PATH} aws configure --profile '
225
+ f'{COREWEAVE_PROFILE_NAME}')
226
+ if not profile_in_config:
227
+ hints += (f'\n{_INDENT_PREFIX} $ AWS_CONFIG_FILE='
228
+ f'{COREWEAVE_CONFIG_PATH} aws configure set endpoint_url'
229
+ f' <ENDPOINT_URL> --profile '
230
+ f'{COREWEAVE_PROFILE_NAME}')
231
+ hints += (f'\n{_INDENT_PREFIX} $ AWS_CONFIG_FILE='
232
+ f'{COREWEAVE_CONFIG_PATH} aws configure set '
233
+ f's3.addressing_style virtual --profile '
234
+ f'{COREWEAVE_PROFILE_NAME}')
235
+ hints += f'\n{_INDENT_PREFIX}For more info: '
236
+ hints += 'https://docs.coreweave.com/docs/products/storage/object-storage/get-started-caios' # pylint: disable=line-too-long
237
+
238
+ return (False, hints) if hints else (True, hints)
239
+
240
+
241
+ def coreweave_profile_in_config() -> bool:
242
+ """Checks if CoreWeave profile is set in config"""
243
+ conf_path = os.path.expanduser(COREWEAVE_CONFIG_PATH)
244
+ coreweave_profile_exists = False
245
+ if os.path.isfile(conf_path):
246
+ with open(conf_path, 'r', encoding='utf-8') as file:
247
+ for line in file:
248
+ if f'[profile {COREWEAVE_PROFILE_NAME}]' in line:
249
+ coreweave_profile_exists = True
250
+ break
251
+ return coreweave_profile_exists
252
+
253
+
254
+ def coreweave_profile_in_cred() -> bool:
255
+ """Checks if CoreWeave profile is set in credentials"""
256
+ cred_path = os.path.expanduser(COREWEAVE_CREDENTIALS_PATH)
257
+ coreweave_profile_exists = False
258
+ if os.path.isfile(cred_path):
259
+ with open(cred_path, 'r', encoding='utf-8') as file:
260
+ for line in file:
261
+ if f'[{COREWEAVE_PROFILE_NAME}]' in line:
262
+ coreweave_profile_exists = True
263
+ break
264
+ return coreweave_profile_exists
265
+
266
+
267
+ def get_credential_file_mounts() -> Dict[str, str]:
268
+ """Returns credential file mounts for CoreWeave.
269
+
270
+ Returns:
271
+ Dict[str, str]: A dictionary mapping source paths to destination paths
272
+ for credential files.
273
+ """
274
+ coreweave_credential_mounts = {
275
+ COREWEAVE_CREDENTIALS_PATH: COREWEAVE_CREDENTIALS_PATH,
276
+ COREWEAVE_CONFIG_PATH: COREWEAVE_CONFIG_PATH
277
+ }
278
+ return coreweave_credential_mounts
sky/adaptors/do.py CHANGED
@@ -1,7 +1,5 @@
1
1
  """Digital Ocean cloud adaptors"""
2
2
 
3
- # pylint: disable=import-outside-toplevel
4
-
5
3
  from sky.adaptors import common
6
4
 
7
5
  _IMPORT_ERROR_MESSAGE = ('Failed to import dependencies for DO. '
@@ -16,5 +14,13 @@ _LAZY_MODULES = (pydo, azure)
16
14
  @common.load_lazy_modules(modules=_LAZY_MODULES)
17
15
  def exceptions():
18
16
  """Azure exceptions."""
17
+ # pylint: disable=import-outside-toplevel
19
18
  from azure.core import exceptions as azure_exceptions
20
19
  return azure_exceptions
20
+
21
+
22
+ def check_exceptions_dependencies_installed():
23
+ """Check if the azure.core.exceptions module is installed."""
24
+ if not common.can_import_modules(['azure.core.exceptions']):
25
+ return False, _IMPORT_ERROR_MESSAGE
26
+ return True, None
sky/adaptors/gcp.py CHANGED
@@ -2,9 +2,20 @@
2
2
 
3
3
  # pylint: disable=import-outside-toplevel
4
4
  import json
5
+ import warnings
5
6
 
6
7
  from sky.adaptors import common
7
8
 
9
+ # Suppress FutureWarning from google.api_core about Python 3.10 support ending.
10
+ # This warning is informational and does not affect functionality.
11
+ # Reference: https://github.com/skypilot-org/skypilot/issues/7886
12
+ warnings.filterwarnings(
13
+ 'ignore',
14
+ category=FutureWarning,
15
+ message=
16
+ r'.*You are using a Python version.*which Google will stop supporting.*',
17
+ )
18
+
8
19
  _IMPORT_ERROR_MESSAGE = ('Failed to import dependencies for GCP. '
9
20
  'Try pip install "skypilot[gcp]"')
10
21
  googleapiclient = common.LazyImport('googleapiclient',
@@ -0,0 +1,8 @@
1
+ """Hyperbolic cloud adaptor."""
2
+
3
+ from sky.adaptors import common
4
+
5
+ hyperbolic = common.LazyImport(
6
+ 'hyperbolic',
7
+ import_error_message='Failed to import dependencies for Hyperbolic. '
8
+ 'Try running: pip install "skypilot[hyperbolic]"')
sky/adaptors/ibm.py CHANGED
@@ -28,6 +28,9 @@ requests = common.LazyImport('requests',
28
28
  import_error_message=_IMPORT_ERROR_MESSAGE)
29
29
  yaml = common.LazyImport('yaml', import_error_message=_IMPORT_ERROR_MESSAGE)
30
30
 
31
+ # Global process lock for thread-safe boto3 operations
32
+ global_process_lock = None
33
+
31
34
 
32
35
  def read_credential_file():
33
36
  try:
@@ -152,9 +155,9 @@ def _get_global_process_lock():
152
155
  already initialized.
153
156
  Necessary when process are spawned without a shared lock.
154
157
  """
155
- global global_process_lock # pylint: disable=global-variable-undefined
158
+ global global_process_lock
156
159
 
157
- if 'global_process_lock' not in globals():
160
+ if global_process_lock is None:
158
161
  global_process_lock = multiprocessing.Lock()
159
162
 
160
163
  return global_process_lock