skypilot-nightly 1.0.0.dev20250502__py3-none-any.whl → 1.0.0.dev20251203__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (546) hide show
  1. sky/__init__.py +22 -6
  2. sky/adaptors/aws.py +81 -16
  3. sky/adaptors/common.py +25 -2
  4. sky/adaptors/coreweave.py +278 -0
  5. sky/adaptors/do.py +8 -2
  6. sky/adaptors/gcp.py +11 -0
  7. sky/adaptors/hyperbolic.py +8 -0
  8. sky/adaptors/ibm.py +5 -2
  9. sky/adaptors/kubernetes.py +149 -18
  10. sky/adaptors/nebius.py +173 -30
  11. sky/adaptors/primeintellect.py +1 -0
  12. sky/adaptors/runpod.py +68 -0
  13. sky/adaptors/seeweb.py +183 -0
  14. sky/adaptors/shadeform.py +89 -0
  15. sky/admin_policy.py +187 -4
  16. sky/authentication.py +179 -225
  17. sky/backends/__init__.py +4 -2
  18. sky/backends/backend.py +22 -9
  19. sky/backends/backend_utils.py +1323 -397
  20. sky/backends/cloud_vm_ray_backend.py +1749 -1029
  21. sky/backends/docker_utils.py +1 -1
  22. sky/backends/local_docker_backend.py +11 -6
  23. sky/backends/task_codegen.py +633 -0
  24. sky/backends/wheel_utils.py +55 -9
  25. sky/{clouds/service_catalog → catalog}/__init__.py +21 -19
  26. sky/{clouds/service_catalog → catalog}/aws_catalog.py +27 -8
  27. sky/{clouds/service_catalog → catalog}/azure_catalog.py +10 -7
  28. sky/{clouds/service_catalog → catalog}/common.py +90 -49
  29. sky/{clouds/service_catalog → catalog}/cudo_catalog.py +8 -5
  30. sky/{clouds/service_catalog → catalog}/data_fetchers/analyze.py +1 -1
  31. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_aws.py +116 -80
  32. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_cudo.py +38 -38
  33. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_gcp.py +70 -16
  34. sky/catalog/data_fetchers/fetch_hyperbolic.py +136 -0
  35. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_lambda_cloud.py +1 -0
  36. sky/catalog/data_fetchers/fetch_nebius.py +338 -0
  37. sky/catalog/data_fetchers/fetch_runpod.py +698 -0
  38. sky/catalog/data_fetchers/fetch_seeweb.py +329 -0
  39. sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
  40. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vast.py +1 -1
  41. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vsphere.py +1 -1
  42. sky/{clouds/service_catalog → catalog}/do_catalog.py +5 -2
  43. sky/{clouds/service_catalog → catalog}/fluidstack_catalog.py +6 -3
  44. sky/{clouds/service_catalog → catalog}/gcp_catalog.py +41 -15
  45. sky/catalog/hyperbolic_catalog.py +136 -0
  46. sky/{clouds/service_catalog → catalog}/ibm_catalog.py +9 -6
  47. sky/{clouds/service_catalog → catalog}/kubernetes_catalog.py +36 -24
  48. sky/{clouds/service_catalog → catalog}/lambda_catalog.py +9 -6
  49. sky/{clouds/service_catalog → catalog}/nebius_catalog.py +9 -7
  50. sky/{clouds/service_catalog → catalog}/oci_catalog.py +9 -6
  51. sky/{clouds/service_catalog → catalog}/paperspace_catalog.py +5 -2
  52. sky/catalog/primeintellect_catalog.py +95 -0
  53. sky/{clouds/service_catalog → catalog}/runpod_catalog.py +11 -4
  54. sky/{clouds/service_catalog → catalog}/scp_catalog.py +9 -6
  55. sky/catalog/seeweb_catalog.py +184 -0
  56. sky/catalog/shadeform_catalog.py +165 -0
  57. sky/catalog/ssh_catalog.py +167 -0
  58. sky/{clouds/service_catalog → catalog}/vast_catalog.py +6 -3
  59. sky/{clouds/service_catalog → catalog}/vsphere_catalog.py +5 -2
  60. sky/check.py +533 -185
  61. sky/cli.py +5 -5975
  62. sky/client/{cli.py → cli/command.py} +2591 -1956
  63. sky/client/cli/deprecation_utils.py +99 -0
  64. sky/client/cli/flags.py +359 -0
  65. sky/client/cli/table_utils.py +322 -0
  66. sky/client/cli/utils.py +79 -0
  67. sky/client/common.py +78 -32
  68. sky/client/oauth.py +82 -0
  69. sky/client/sdk.py +1219 -319
  70. sky/client/sdk_async.py +827 -0
  71. sky/client/service_account_auth.py +47 -0
  72. sky/cloud_stores.py +82 -3
  73. sky/clouds/__init__.py +13 -0
  74. sky/clouds/aws.py +564 -164
  75. sky/clouds/azure.py +105 -83
  76. sky/clouds/cloud.py +140 -40
  77. sky/clouds/cudo.py +68 -50
  78. sky/clouds/do.py +66 -48
  79. sky/clouds/fluidstack.py +63 -44
  80. sky/clouds/gcp.py +339 -110
  81. sky/clouds/hyperbolic.py +293 -0
  82. sky/clouds/ibm.py +70 -49
  83. sky/clouds/kubernetes.py +570 -162
  84. sky/clouds/lambda_cloud.py +74 -54
  85. sky/clouds/nebius.py +210 -81
  86. sky/clouds/oci.py +88 -66
  87. sky/clouds/paperspace.py +61 -44
  88. sky/clouds/primeintellect.py +317 -0
  89. sky/clouds/runpod.py +164 -74
  90. sky/clouds/scp.py +89 -86
  91. sky/clouds/seeweb.py +477 -0
  92. sky/clouds/shadeform.py +400 -0
  93. sky/clouds/ssh.py +263 -0
  94. sky/clouds/utils/aws_utils.py +10 -4
  95. sky/clouds/utils/gcp_utils.py +87 -11
  96. sky/clouds/utils/oci_utils.py +38 -14
  97. sky/clouds/utils/scp_utils.py +231 -167
  98. sky/clouds/vast.py +99 -77
  99. sky/clouds/vsphere.py +51 -40
  100. sky/core.py +375 -173
  101. sky/dag.py +15 -0
  102. sky/dashboard/out/404.html +1 -1
  103. sky/dashboard/out/_next/static/96_E2yl3QAiIJGOYCkSpB/_buildManifest.js +1 -0
  104. sky/dashboard/out/_next/static/chunks/1141-e6aa9ab418717c59.js +11 -0
  105. sky/dashboard/out/_next/static/chunks/1272-1ef0bf0237faccdb.js +1 -0
  106. sky/dashboard/out/_next/static/chunks/1871-7e202677c42f43fe.js +6 -0
  107. sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +1 -0
  108. sky/dashboard/out/_next/static/chunks/2350.fab69e61bac57b23.js +1 -0
  109. sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +15 -0
  110. sky/dashboard/out/_next/static/chunks/2755.edd818326d489a1d.js +26 -0
  111. sky/dashboard/out/_next/static/chunks/3294.20a8540fe697d5ee.js +1 -0
  112. sky/dashboard/out/_next/static/chunks/3785.7e245f318f9d1121.js +1 -0
  113. sky/dashboard/out/_next/static/chunks/3800-7b45f9fbb6308557.js +1 -0
  114. sky/dashboard/out/_next/static/chunks/3850-ff4a9a69d978632b.js +1 -0
  115. sky/dashboard/out/_next/static/chunks/3937.210053269f121201.js +1 -0
  116. sky/dashboard/out/_next/static/chunks/4725.172ede95d1b21022.js +1 -0
  117. sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +15 -0
  118. sky/dashboard/out/_next/static/chunks/5739-d67458fcb1386c92.js +8 -0
  119. sky/dashboard/out/_next/static/chunks/6130-2be46d70a38f1e82.js +1 -0
  120. sky/dashboard/out/_next/static/chunks/616-3d59f75e2ccf9321.js +39 -0
  121. sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +13 -0
  122. sky/dashboard/out/_next/static/chunks/6856-8f27d1c10c98def8.js +1 -0
  123. sky/dashboard/out/_next/static/chunks/6989-01359c57e018caa4.js +1 -0
  124. sky/dashboard/out/_next/static/chunks/6990-9146207c4567fdfd.js +1 -0
  125. sky/dashboard/out/_next/static/chunks/7359-c8d04e06886000b3.js +30 -0
  126. sky/dashboard/out/_next/static/chunks/7411-b15471acd2cba716.js +41 -0
  127. sky/dashboard/out/_next/static/chunks/7615-019513abc55b3b47.js +1 -0
  128. sky/dashboard/out/_next/static/chunks/8640.5b9475a2d18c5416.js +16 -0
  129. sky/dashboard/out/_next/static/chunks/8969-452f9d5cbdd2dc73.js +1 -0
  130. sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +6 -0
  131. sky/dashboard/out/_next/static/chunks/9353-cff34f7e773b2e2b.js +1 -0
  132. sky/dashboard/out/_next/static/chunks/9360.a536cf6b1fa42355.js +31 -0
  133. sky/dashboard/out/_next/static/chunks/9847.3aaca6bb33455140.js +30 -0
  134. sky/dashboard/out/_next/static/chunks/fd9d1056-86323a29a8f7e46a.js +1 -0
  135. sky/dashboard/out/_next/static/chunks/framework-cf60a09ccd051a10.js +33 -0
  136. sky/dashboard/out/_next/static/chunks/main-app-587214043926b3cc.js +1 -0
  137. sky/dashboard/out/_next/static/chunks/main-f15ccb73239a3bf1.js +1 -0
  138. sky/dashboard/out/_next/static/chunks/pages/_app-bde01e4a2beec258.js +34 -0
  139. sky/dashboard/out/_next/static/chunks/pages/_error-c66a4e8afc46f17b.js +1 -0
  140. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-792db96d918c98c9.js +16 -0
  141. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-abfcac9c137aa543.js +1 -0
  142. sky/dashboard/out/_next/static/chunks/pages/clusters-ee39056f9851a3ff.js +1 -0
  143. sky/dashboard/out/_next/static/chunks/pages/config-dfb9bf07b13045f4.js +1 -0
  144. sky/dashboard/out/_next/static/chunks/pages/index-444f1804401f04ea.js +1 -0
  145. sky/dashboard/out/_next/static/chunks/pages/infra/[context]-c0b5935149902e6f.js +1 -0
  146. sky/dashboard/out/_next/static/chunks/pages/infra-aed0ea19df7cf961.js +1 -0
  147. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-d66997e2bfc837cf.js +16 -0
  148. sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-9faf940b253e3e06.js +21 -0
  149. sky/dashboard/out/_next/static/chunks/pages/jobs-2072b48b617989c9.js +1 -0
  150. sky/dashboard/out/_next/static/chunks/pages/users-f42674164aa73423.js +1 -0
  151. sky/dashboard/out/_next/static/chunks/pages/volumes-b84b948ff357c43e.js +1 -0
  152. sky/dashboard/out/_next/static/chunks/pages/workspace/new-3f88a1c7e86a3f86.js +1 -0
  153. sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-84a40f8c7c627fe4.js +1 -0
  154. sky/dashboard/out/_next/static/chunks/pages/workspaces-531b2f8c4bf89f82.js +1 -0
  155. sky/dashboard/out/_next/static/chunks/webpack-64e05f17bf2cf8ce.js +1 -0
  156. sky/dashboard/out/_next/static/css/0748ce22df867032.css +3 -0
  157. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  158. sky/dashboard/out/clusters/[cluster].html +1 -1
  159. sky/dashboard/out/clusters.html +1 -1
  160. sky/dashboard/out/config.html +1 -0
  161. sky/dashboard/out/index.html +1 -1
  162. sky/dashboard/out/infra/[context].html +1 -0
  163. sky/dashboard/out/infra.html +1 -0
  164. sky/dashboard/out/jobs/[job].html +1 -1
  165. sky/dashboard/out/jobs/pools/[pool].html +1 -0
  166. sky/dashboard/out/jobs.html +1 -1
  167. sky/dashboard/out/users.html +1 -0
  168. sky/dashboard/out/volumes.html +1 -0
  169. sky/dashboard/out/workspace/new.html +1 -0
  170. sky/dashboard/out/workspaces/[name].html +1 -0
  171. sky/dashboard/out/workspaces.html +1 -0
  172. sky/data/data_utils.py +137 -1
  173. sky/data/mounting_utils.py +269 -84
  174. sky/data/storage.py +1460 -1807
  175. sky/data/storage_utils.py +43 -57
  176. sky/exceptions.py +126 -2
  177. sky/execution.py +216 -63
  178. sky/global_user_state.py +2390 -586
  179. sky/jobs/__init__.py +7 -0
  180. sky/jobs/client/sdk.py +300 -58
  181. sky/jobs/client/sdk_async.py +161 -0
  182. sky/jobs/constants.py +15 -8
  183. sky/jobs/controller.py +848 -275
  184. sky/jobs/file_content_utils.py +128 -0
  185. sky/jobs/log_gc.py +193 -0
  186. sky/jobs/recovery_strategy.py +402 -152
  187. sky/jobs/scheduler.py +314 -189
  188. sky/jobs/server/core.py +836 -255
  189. sky/jobs/server/server.py +156 -115
  190. sky/jobs/server/utils.py +136 -0
  191. sky/jobs/state.py +2109 -706
  192. sky/jobs/utils.py +1306 -215
  193. sky/logs/__init__.py +21 -0
  194. sky/logs/agent.py +108 -0
  195. sky/logs/aws.py +243 -0
  196. sky/logs/gcp.py +91 -0
  197. sky/metrics/__init__.py +0 -0
  198. sky/metrics/utils.py +453 -0
  199. sky/models.py +78 -1
  200. sky/optimizer.py +164 -70
  201. sky/provision/__init__.py +90 -4
  202. sky/provision/aws/config.py +147 -26
  203. sky/provision/aws/instance.py +136 -50
  204. sky/provision/azure/instance.py +11 -6
  205. sky/provision/common.py +13 -1
  206. sky/provision/cudo/cudo_machine_type.py +1 -1
  207. sky/provision/cudo/cudo_utils.py +14 -8
  208. sky/provision/cudo/cudo_wrapper.py +72 -71
  209. sky/provision/cudo/instance.py +10 -6
  210. sky/provision/do/instance.py +10 -6
  211. sky/provision/do/utils.py +4 -3
  212. sky/provision/docker_utils.py +140 -33
  213. sky/provision/fluidstack/instance.py +13 -8
  214. sky/provision/gcp/__init__.py +1 -0
  215. sky/provision/gcp/config.py +301 -19
  216. sky/provision/gcp/constants.py +218 -0
  217. sky/provision/gcp/instance.py +36 -8
  218. sky/provision/gcp/instance_utils.py +18 -4
  219. sky/provision/gcp/volume_utils.py +247 -0
  220. sky/provision/hyperbolic/__init__.py +12 -0
  221. sky/provision/hyperbolic/config.py +10 -0
  222. sky/provision/hyperbolic/instance.py +437 -0
  223. sky/provision/hyperbolic/utils.py +373 -0
  224. sky/provision/instance_setup.py +101 -20
  225. sky/provision/kubernetes/__init__.py +5 -0
  226. sky/provision/kubernetes/config.py +9 -52
  227. sky/provision/kubernetes/constants.py +17 -0
  228. sky/provision/kubernetes/instance.py +919 -280
  229. sky/provision/kubernetes/manifests/fusermount-server-daemonset.yaml +1 -2
  230. sky/provision/kubernetes/network.py +27 -17
  231. sky/provision/kubernetes/network_utils.py +44 -43
  232. sky/provision/kubernetes/utils.py +1221 -534
  233. sky/provision/kubernetes/volume.py +343 -0
  234. sky/provision/lambda_cloud/instance.py +22 -16
  235. sky/provision/nebius/constants.py +50 -0
  236. sky/provision/nebius/instance.py +19 -6
  237. sky/provision/nebius/utils.py +237 -137
  238. sky/provision/oci/instance.py +10 -5
  239. sky/provision/paperspace/instance.py +10 -7
  240. sky/provision/paperspace/utils.py +1 -1
  241. sky/provision/primeintellect/__init__.py +10 -0
  242. sky/provision/primeintellect/config.py +11 -0
  243. sky/provision/primeintellect/instance.py +454 -0
  244. sky/provision/primeintellect/utils.py +398 -0
  245. sky/provision/provisioner.py +117 -36
  246. sky/provision/runpod/__init__.py +5 -0
  247. sky/provision/runpod/instance.py +27 -6
  248. sky/provision/runpod/utils.py +51 -18
  249. sky/provision/runpod/volume.py +214 -0
  250. sky/provision/scp/__init__.py +15 -0
  251. sky/provision/scp/config.py +93 -0
  252. sky/provision/scp/instance.py +707 -0
  253. sky/provision/seeweb/__init__.py +11 -0
  254. sky/provision/seeweb/config.py +13 -0
  255. sky/provision/seeweb/instance.py +812 -0
  256. sky/provision/shadeform/__init__.py +11 -0
  257. sky/provision/shadeform/config.py +12 -0
  258. sky/provision/shadeform/instance.py +351 -0
  259. sky/provision/shadeform/shadeform_utils.py +83 -0
  260. sky/provision/ssh/__init__.py +18 -0
  261. sky/provision/vast/instance.py +13 -8
  262. sky/provision/vast/utils.py +10 -7
  263. sky/provision/volume.py +164 -0
  264. sky/provision/vsphere/common/ssl_helper.py +1 -1
  265. sky/provision/vsphere/common/vapiconnect.py +2 -1
  266. sky/provision/vsphere/common/vim_utils.py +4 -4
  267. sky/provision/vsphere/instance.py +15 -10
  268. sky/provision/vsphere/vsphere_utils.py +17 -20
  269. sky/py.typed +0 -0
  270. sky/resources.py +845 -119
  271. sky/schemas/__init__.py +0 -0
  272. sky/schemas/api/__init__.py +0 -0
  273. sky/schemas/api/responses.py +227 -0
  274. sky/schemas/db/README +4 -0
  275. sky/schemas/db/env.py +90 -0
  276. sky/schemas/db/global_user_state/001_initial_schema.py +124 -0
  277. sky/schemas/db/global_user_state/002_add_workspace_to_cluster_history.py +35 -0
  278. sky/schemas/db/global_user_state/003_fix_initial_revision.py +61 -0
  279. sky/schemas/db/global_user_state/004_is_managed.py +34 -0
  280. sky/schemas/db/global_user_state/005_cluster_event.py +32 -0
  281. sky/schemas/db/global_user_state/006_provision_log.py +41 -0
  282. sky/schemas/db/global_user_state/007_cluster_event_request_id.py +34 -0
  283. sky/schemas/db/global_user_state/008_skylet_ssh_tunnel_metadata.py +34 -0
  284. sky/schemas/db/global_user_state/009_last_activity_and_launched_at.py +89 -0
  285. sky/schemas/db/global_user_state/010_save_ssh_key.py +66 -0
  286. sky/schemas/db/global_user_state/011_is_ephemeral.py +34 -0
  287. sky/schemas/db/kv_cache/001_initial_schema.py +29 -0
  288. sky/schemas/db/script.py.mako +28 -0
  289. sky/schemas/db/serve_state/001_initial_schema.py +67 -0
  290. sky/schemas/db/serve_state/002_yaml_content.py +34 -0
  291. sky/schemas/db/skypilot_config/001_initial_schema.py +30 -0
  292. sky/schemas/db/spot_jobs/001_initial_schema.py +97 -0
  293. sky/schemas/db/spot_jobs/002_cluster_pool.py +42 -0
  294. sky/schemas/db/spot_jobs/003_pool_hash.py +34 -0
  295. sky/schemas/db/spot_jobs/004_job_file_contents.py +42 -0
  296. sky/schemas/db/spot_jobs/005_logs_gc.py +38 -0
  297. sky/schemas/db/spot_jobs/006_controller_pid_started_at.py +34 -0
  298. sky/schemas/db/spot_jobs/007_config_file_content.py +34 -0
  299. sky/schemas/generated/__init__.py +0 -0
  300. sky/schemas/generated/autostopv1_pb2.py +36 -0
  301. sky/schemas/generated/autostopv1_pb2.pyi +43 -0
  302. sky/schemas/generated/autostopv1_pb2_grpc.py +146 -0
  303. sky/schemas/generated/jobsv1_pb2.py +86 -0
  304. sky/schemas/generated/jobsv1_pb2.pyi +254 -0
  305. sky/schemas/generated/jobsv1_pb2_grpc.py +542 -0
  306. sky/schemas/generated/managed_jobsv1_pb2.py +76 -0
  307. sky/schemas/generated/managed_jobsv1_pb2.pyi +278 -0
  308. sky/schemas/generated/managed_jobsv1_pb2_grpc.py +278 -0
  309. sky/schemas/generated/servev1_pb2.py +58 -0
  310. sky/schemas/generated/servev1_pb2.pyi +115 -0
  311. sky/schemas/generated/servev1_pb2_grpc.py +322 -0
  312. sky/serve/autoscalers.py +357 -5
  313. sky/serve/client/impl.py +310 -0
  314. sky/serve/client/sdk.py +47 -139
  315. sky/serve/client/sdk_async.py +130 -0
  316. sky/serve/constants.py +12 -9
  317. sky/serve/controller.py +68 -17
  318. sky/serve/load_balancer.py +106 -60
  319. sky/serve/load_balancing_policies.py +116 -2
  320. sky/serve/replica_managers.py +434 -249
  321. sky/serve/serve_rpc_utils.py +179 -0
  322. sky/serve/serve_state.py +569 -257
  323. sky/serve/serve_utils.py +775 -265
  324. sky/serve/server/core.py +66 -711
  325. sky/serve/server/impl.py +1093 -0
  326. sky/serve/server/server.py +21 -18
  327. sky/serve/service.py +192 -89
  328. sky/serve/service_spec.py +144 -20
  329. sky/serve/spot_placer.py +3 -0
  330. sky/server/auth/__init__.py +0 -0
  331. sky/server/auth/authn.py +50 -0
  332. sky/server/auth/loopback.py +38 -0
  333. sky/server/auth/oauth2_proxy.py +202 -0
  334. sky/server/common.py +478 -182
  335. sky/server/config.py +85 -23
  336. sky/server/constants.py +44 -6
  337. sky/server/daemons.py +295 -0
  338. sky/server/html/token_page.html +185 -0
  339. sky/server/metrics.py +160 -0
  340. sky/server/middleware_utils.py +166 -0
  341. sky/server/requests/executor.py +558 -138
  342. sky/server/requests/payloads.py +364 -24
  343. sky/server/requests/preconditions.py +21 -17
  344. sky/server/requests/process.py +112 -29
  345. sky/server/requests/request_names.py +121 -0
  346. sky/server/requests/requests.py +822 -226
  347. sky/server/requests/serializers/decoders.py +82 -31
  348. sky/server/requests/serializers/encoders.py +140 -22
  349. sky/server/requests/threads.py +117 -0
  350. sky/server/rest.py +455 -0
  351. sky/server/server.py +1309 -285
  352. sky/server/state.py +20 -0
  353. sky/server/stream_utils.py +327 -61
  354. sky/server/uvicorn.py +217 -3
  355. sky/server/versions.py +270 -0
  356. sky/setup_files/MANIFEST.in +11 -1
  357. sky/setup_files/alembic.ini +160 -0
  358. sky/setup_files/dependencies.py +139 -31
  359. sky/setup_files/setup.py +44 -42
  360. sky/sky_logging.py +114 -7
  361. sky/skylet/attempt_skylet.py +106 -24
  362. sky/skylet/autostop_lib.py +129 -8
  363. sky/skylet/configs.py +29 -20
  364. sky/skylet/constants.py +216 -25
  365. sky/skylet/events.py +101 -21
  366. sky/skylet/job_lib.py +345 -164
  367. sky/skylet/log_lib.py +297 -18
  368. sky/skylet/log_lib.pyi +44 -1
  369. sky/skylet/providers/ibm/node_provider.py +12 -8
  370. sky/skylet/providers/ibm/vpc_provider.py +13 -12
  371. sky/skylet/ray_patches/__init__.py +17 -3
  372. sky/skylet/ray_patches/autoscaler.py.diff +18 -0
  373. sky/skylet/ray_patches/cli.py.diff +19 -0
  374. sky/skylet/ray_patches/command_runner.py.diff +17 -0
  375. sky/skylet/ray_patches/log_monitor.py.diff +20 -0
  376. sky/skylet/ray_patches/resource_demand_scheduler.py.diff +32 -0
  377. sky/skylet/ray_patches/updater.py.diff +18 -0
  378. sky/skylet/ray_patches/worker.py.diff +41 -0
  379. sky/skylet/runtime_utils.py +21 -0
  380. sky/skylet/services.py +568 -0
  381. sky/skylet/skylet.py +72 -4
  382. sky/skylet/subprocess_daemon.py +104 -29
  383. sky/skypilot_config.py +506 -99
  384. sky/ssh_node_pools/__init__.py +1 -0
  385. sky/ssh_node_pools/core.py +135 -0
  386. sky/ssh_node_pools/server.py +233 -0
  387. sky/task.py +685 -163
  388. sky/templates/aws-ray.yml.j2 +11 -3
  389. sky/templates/azure-ray.yml.j2 +2 -1
  390. sky/templates/cudo-ray.yml.j2 +1 -0
  391. sky/templates/do-ray.yml.j2 +2 -1
  392. sky/templates/fluidstack-ray.yml.j2 +1 -0
  393. sky/templates/gcp-ray.yml.j2 +62 -1
  394. sky/templates/hyperbolic-ray.yml.j2 +68 -0
  395. sky/templates/ibm-ray.yml.j2 +2 -1
  396. sky/templates/jobs-controller.yaml.j2 +27 -24
  397. sky/templates/kubernetes-loadbalancer.yml.j2 +2 -0
  398. sky/templates/kubernetes-ray.yml.j2 +611 -50
  399. sky/templates/lambda-ray.yml.j2 +2 -1
  400. sky/templates/nebius-ray.yml.j2 +34 -12
  401. sky/templates/oci-ray.yml.j2 +1 -0
  402. sky/templates/paperspace-ray.yml.j2 +2 -1
  403. sky/templates/primeintellect-ray.yml.j2 +72 -0
  404. sky/templates/runpod-ray.yml.j2 +10 -1
  405. sky/templates/scp-ray.yml.j2 +4 -50
  406. sky/templates/seeweb-ray.yml.j2 +171 -0
  407. sky/templates/shadeform-ray.yml.j2 +73 -0
  408. sky/templates/sky-serve-controller.yaml.j2 +22 -2
  409. sky/templates/vast-ray.yml.j2 +1 -0
  410. sky/templates/vsphere-ray.yml.j2 +1 -0
  411. sky/templates/websocket_proxy.py +212 -37
  412. sky/usage/usage_lib.py +31 -15
  413. sky/users/__init__.py +0 -0
  414. sky/users/model.conf +15 -0
  415. sky/users/permission.py +397 -0
  416. sky/users/rbac.py +121 -0
  417. sky/users/server.py +720 -0
  418. sky/users/token_service.py +218 -0
  419. sky/utils/accelerator_registry.py +35 -5
  420. sky/utils/admin_policy_utils.py +84 -38
  421. sky/utils/annotations.py +38 -5
  422. sky/utils/asyncio_utils.py +78 -0
  423. sky/utils/atomic.py +1 -1
  424. sky/utils/auth_utils.py +153 -0
  425. sky/utils/benchmark_utils.py +60 -0
  426. sky/utils/cli_utils/status_utils.py +159 -86
  427. sky/utils/cluster_utils.py +31 -9
  428. sky/utils/command_runner.py +354 -68
  429. sky/utils/command_runner.pyi +93 -3
  430. sky/utils/common.py +35 -8
  431. sky/utils/common_utils.py +314 -91
  432. sky/utils/config_utils.py +74 -5
  433. sky/utils/context.py +403 -0
  434. sky/utils/context_utils.py +242 -0
  435. sky/utils/controller_utils.py +383 -89
  436. sky/utils/dag_utils.py +31 -12
  437. sky/utils/db/__init__.py +0 -0
  438. sky/utils/db/db_utils.py +485 -0
  439. sky/utils/db/kv_cache.py +149 -0
  440. sky/utils/db/migration_utils.py +137 -0
  441. sky/utils/directory_utils.py +12 -0
  442. sky/utils/env_options.py +13 -0
  443. sky/utils/git.py +567 -0
  444. sky/utils/git_clone.sh +460 -0
  445. sky/utils/infra_utils.py +195 -0
  446. sky/utils/kubernetes/cleanup-tunnel.sh +62 -0
  447. sky/utils/kubernetes/config_map_utils.py +133 -0
  448. sky/utils/kubernetes/create_cluster.sh +15 -29
  449. sky/utils/kubernetes/delete_cluster.sh +10 -7
  450. sky/utils/kubernetes/deploy_ssh_node_pools.py +1177 -0
  451. sky/utils/kubernetes/exec_kubeconfig_converter.py +22 -31
  452. sky/utils/kubernetes/generate_kind_config.py +6 -66
  453. sky/utils/kubernetes/generate_kubeconfig.sh +4 -1
  454. sky/utils/kubernetes/gpu_labeler.py +18 -8
  455. sky/utils/kubernetes/k8s_gpu_labeler_job.yaml +2 -1
  456. sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml +16 -16
  457. sky/utils/kubernetes/kubernetes_deploy_utils.py +284 -114
  458. sky/utils/kubernetes/rsync_helper.sh +11 -3
  459. sky/utils/kubernetes/ssh-tunnel.sh +379 -0
  460. sky/utils/kubernetes/ssh_utils.py +221 -0
  461. sky/utils/kubernetes_enums.py +8 -15
  462. sky/utils/lock_events.py +94 -0
  463. sky/utils/locks.py +416 -0
  464. sky/utils/log_utils.py +82 -107
  465. sky/utils/perf_utils.py +22 -0
  466. sky/utils/resource_checker.py +298 -0
  467. sky/utils/resources_utils.py +249 -32
  468. sky/utils/rich_utils.py +217 -39
  469. sky/utils/schemas.py +955 -160
  470. sky/utils/serialize_utils.py +16 -0
  471. sky/utils/status_lib.py +10 -0
  472. sky/utils/subprocess_utils.py +29 -15
  473. sky/utils/tempstore.py +70 -0
  474. sky/utils/thread_utils.py +91 -0
  475. sky/utils/timeline.py +26 -53
  476. sky/utils/ux_utils.py +84 -15
  477. sky/utils/validator.py +11 -1
  478. sky/utils/volume.py +165 -0
  479. sky/utils/yaml_utils.py +111 -0
  480. sky/volumes/__init__.py +13 -0
  481. sky/volumes/client/__init__.py +0 -0
  482. sky/volumes/client/sdk.py +150 -0
  483. sky/volumes/server/__init__.py +0 -0
  484. sky/volumes/server/core.py +270 -0
  485. sky/volumes/server/server.py +124 -0
  486. sky/volumes/volume.py +215 -0
  487. sky/workspaces/__init__.py +0 -0
  488. sky/workspaces/core.py +655 -0
  489. sky/workspaces/server.py +101 -0
  490. sky/workspaces/utils.py +56 -0
  491. sky_templates/README.md +3 -0
  492. sky_templates/__init__.py +3 -0
  493. sky_templates/ray/__init__.py +0 -0
  494. sky_templates/ray/start_cluster +183 -0
  495. sky_templates/ray/stop_cluster +75 -0
  496. skypilot_nightly-1.0.0.dev20251203.dist-info/METADATA +676 -0
  497. skypilot_nightly-1.0.0.dev20251203.dist-info/RECORD +611 -0
  498. {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/WHEEL +1 -1
  499. skypilot_nightly-1.0.0.dev20251203.dist-info/top_level.txt +2 -0
  500. sky/benchmark/benchmark_state.py +0 -256
  501. sky/benchmark/benchmark_utils.py +0 -641
  502. sky/clouds/service_catalog/constants.py +0 -7
  503. sky/dashboard/out/_next/static/GWvVBSCS7FmUiVmjaL1a7/_buildManifest.js +0 -1
  504. sky/dashboard/out/_next/static/chunks/236-2db3ee3fba33dd9e.js +0 -6
  505. sky/dashboard/out/_next/static/chunks/312-c3c8845990db8ffc.js +0 -15
  506. sky/dashboard/out/_next/static/chunks/37-0a572fe0dbb89c4d.js +0 -6
  507. sky/dashboard/out/_next/static/chunks/678-206dddca808e6d16.js +0 -59
  508. sky/dashboard/out/_next/static/chunks/845-9e60713e0c441abc.js +0 -1
  509. sky/dashboard/out/_next/static/chunks/979-7bf73a4c7cea0f5c.js +0 -1
  510. sky/dashboard/out/_next/static/chunks/fd9d1056-2821b0f0cabcd8bd.js +0 -1
  511. sky/dashboard/out/_next/static/chunks/framework-87d061ee6ed71b28.js +0 -33
  512. sky/dashboard/out/_next/static/chunks/main-app-241eb28595532291.js +0 -1
  513. sky/dashboard/out/_next/static/chunks/main-e0e2335212e72357.js +0 -1
  514. sky/dashboard/out/_next/static/chunks/pages/_app-e6b013bc3f77ad60.js +0 -1
  515. sky/dashboard/out/_next/static/chunks/pages/_error-1be831200e60c5c0.js +0 -1
  516. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-6ac338bc2239cb45.js +0 -1
  517. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-f383db7389368ea7.js +0 -1
  518. sky/dashboard/out/_next/static/chunks/pages/clusters-a93b93e10b8b074e.js +0 -1
  519. sky/dashboard/out/_next/static/chunks/pages/index-f9f039532ca8cbc4.js +0 -1
  520. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-1c519e1afc523dc9.js +0 -1
  521. sky/dashboard/out/_next/static/chunks/pages/jobs-a75029b67aab6a2e.js +0 -1
  522. sky/dashboard/out/_next/static/chunks/webpack-830f59b8404e96b8.js +0 -1
  523. sky/dashboard/out/_next/static/css/c6933bbb2ce7f4dd.css +0 -3
  524. sky/jobs/dashboard/dashboard.py +0 -223
  525. sky/jobs/dashboard/static/favicon.ico +0 -0
  526. sky/jobs/dashboard/templates/index.html +0 -831
  527. sky/jobs/server/dashboard_utils.py +0 -69
  528. sky/skylet/providers/scp/__init__.py +0 -2
  529. sky/skylet/providers/scp/config.py +0 -149
  530. sky/skylet/providers/scp/node_provider.py +0 -578
  531. sky/templates/kubernetes-ssh-jump.yml.j2 +0 -94
  532. sky/utils/db_utils.py +0 -100
  533. sky/utils/kubernetes/deploy_remote_cluster.sh +0 -308
  534. sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +0 -191
  535. skypilot_nightly-1.0.0.dev20250502.dist-info/METADATA +0 -361
  536. skypilot_nightly-1.0.0.dev20250502.dist-info/RECORD +0 -396
  537. skypilot_nightly-1.0.0.dev20250502.dist-info/top_level.txt +0 -1
  538. /sky/{clouds/service_catalog → catalog}/config.py +0 -0
  539. /sky/{benchmark → catalog/data_fetchers}/__init__.py +0 -0
  540. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_azure.py +0 -0
  541. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_fluidstack.py +0 -0
  542. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_ibm.py +0 -0
  543. /sky/{clouds/service_catalog/data_fetchers → client/cli}/__init__.py +0 -0
  544. /sky/dashboard/out/_next/static/{GWvVBSCS7FmUiVmjaL1a7 → 96_E2yl3QAiIJGOYCkSpB}/_ssgManifest.js +0 -0
  545. {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/entry_points.txt +0 -0
  546. {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/licenses/LICENSE +0 -0
sky/utils/dag_utils.py CHANGED
@@ -1,14 +1,14 @@
1
1
  """Utilities for loading and dumping DAGs from/to YAML files."""
2
2
  import copy
3
- from typing import Any, Dict, List, Optional, Tuple
3
+ from typing import Any, Dict, List, Optional, Tuple, Union
4
4
 
5
5
  from sky import dag as dag_lib
6
6
  from sky import sky_logging
7
7
  from sky import task as task_lib
8
8
  from sky.utils import cluster_utils
9
- from sky.utils import common_utils
10
9
  from sky.utils import registry
11
10
  from sky.utils import ux_utils
11
+ from sky.utils import yaml_utils
12
12
 
13
13
  logger = sky_logging.init_logger(__name__)
14
14
 
@@ -66,7 +66,9 @@ def convert_entrypoint_to_dag(entrypoint: Any) -> 'dag_lib.Dag':
66
66
 
67
67
  def _load_chain_dag(
68
68
  configs: List[Dict[str, Any]],
69
- env_overrides: Optional[List[Tuple[str, str]]] = None) -> dag_lib.Dag:
69
+ env_overrides: Optional[List[Tuple[str, str]]] = None,
70
+ secrets_overrides: Optional[List[Tuple[str,
71
+ str]]] = None) -> dag_lib.Dag:
70
72
  """Loads a chain DAG from a list of YAML configs."""
71
73
  dag_name = None
72
74
  if set(configs[0].keys()) == {'name'}:
@@ -84,7 +86,8 @@ def _load_chain_dag(
84
86
  for task_config in configs:
85
87
  if task_config is None:
86
88
  continue
87
- task = task_lib.Task.from_yaml_config(task_config, env_overrides)
89
+ task = task_lib.Task.from_yaml_config(task_config, env_overrides,
90
+ secrets_overrides)
88
91
  if current_task is not None:
89
92
  current_task >> task # pylint: disable=pointless-statement
90
93
  current_task = task
@@ -95,6 +98,7 @@ def _load_chain_dag(
95
98
  def load_chain_dag_from_yaml(
96
99
  path: str,
97
100
  env_overrides: Optional[List[Tuple[str, str]]] = None,
101
+ secret_overrides: Optional[List[Tuple[str, str]]] = None,
98
102
  ) -> dag_lib.Dag:
99
103
  """Loads a chain DAG from a YAML file.
100
104
 
@@ -105,17 +109,22 @@ def load_chain_dag_from_yaml(
105
109
  the task's 'envs' section. If it is a chain dag, the envs will be updated
106
110
  for all tasks in the chain.
107
111
 
112
+ 'secrets_overrides' is a list of (key, value) pairs that will be used to
113
+ update the task's 'secrets' section. If it is a chain dag, the secrets will
114
+ be updated for all tasks in the chain.
115
+
108
116
  Returns:
109
117
  A chain Dag with 1 or more tasks (an empty entrypoint would create a
110
118
  trivial task).
111
119
  """
112
- configs = common_utils.read_yaml_all(path)
113
- return _load_chain_dag(configs, env_overrides)
120
+ configs = yaml_utils.read_yaml_all(path)
121
+ return _load_chain_dag(configs, env_overrides, secret_overrides)
114
122
 
115
123
 
116
124
  def load_chain_dag_from_yaml_str(
117
125
  yaml_str: str,
118
126
  env_overrides: Optional[List[Tuple[str, str]]] = None,
127
+ secrets_overrides: Optional[List[Tuple[str, str]]] = None,
119
128
  ) -> dag_lib.Dag:
120
129
  """Loads a chain DAG from a YAML string.
121
130
 
@@ -126,19 +135,25 @@ def load_chain_dag_from_yaml_str(
126
135
  the task's 'envs' section. If it is a chain dag, the envs will be updated
127
136
  for all tasks in the chain.
128
137
 
138
+ 'secrets_overrides' is a list of (key, value) pairs that will be used to
139
+ update the task's 'secrets' section. If it is a chain dag, the secrets will
140
+ be updated for all tasks in the chain.
141
+
129
142
  Returns:
130
143
  A chain Dag with 1 or more tasks (an empty entrypoint would create a
131
144
  trivial task).
132
145
  """
133
- configs = common_utils.read_yaml_all_str(yaml_str)
134
- return _load_chain_dag(configs, env_overrides)
146
+ configs = yaml_utils.read_yaml_all_str(yaml_str)
147
+ return _load_chain_dag(configs, env_overrides, secrets_overrides)
135
148
 
136
149
 
137
- def dump_chain_dag_to_yaml_str(dag: dag_lib.Dag) -> str:
150
+ def dump_chain_dag_to_yaml_str(dag: dag_lib.Dag,
151
+ use_user_specified_yaml: bool = False) -> str:
138
152
  """Dumps a chain DAG to a YAML string.
139
153
 
140
154
  Args:
141
155
  dag: the DAG to dump.
156
+ redact_secrets: whether to redact secrets in the YAML string.
142
157
 
143
158
  Returns:
144
159
  The YAML string.
@@ -146,8 +161,10 @@ def dump_chain_dag_to_yaml_str(dag: dag_lib.Dag) -> str:
146
161
  assert dag.is_chain(), dag
147
162
  configs = [{'name': dag.name}]
148
163
  for task in dag.tasks:
149
- configs.append(task.to_yaml_config())
150
- return common_utils.dump_yaml_str(configs)
164
+ configs.append(
165
+ task.to_yaml_config(
166
+ use_user_specified_yaml=use_user_specified_yaml))
167
+ return yaml_utils.dump_yaml_str(configs)
151
168
 
152
169
 
153
170
  def dump_chain_dag_to_yaml(dag: dag_lib.Dag, path: str) -> None:
@@ -195,7 +212,9 @@ def fill_default_config_in_dag_for_job_launch(dag: dag_lib.Dag) -> None:
195
212
  assert default_strategy is not None
196
213
  for resources in list(task_.resources):
197
214
  original_job_recovery = resources.job_recovery
198
- job_recovery = {'strategy': default_strategy}
215
+ job_recovery: Dict[str, Optional[Union[str, int]]] = {
216
+ 'strategy': default_strategy
217
+ }
199
218
  if isinstance(original_job_recovery, str):
200
219
  job_recovery['strategy'] = original_job_recovery
201
220
  elif isinstance(original_job_recovery, dict):
File without changes
@@ -0,0 +1,485 @@
1
+ """Utils for sky databases."""
2
+ import asyncio
3
+ import contextlib
4
+ import enum
5
+ import os
6
+ import pathlib
7
+ import sqlite3
8
+ import threading
9
+ import typing
10
+ from typing import Any, Callable, Dict, Iterable, Literal, Optional, Union
11
+
12
+ import aiosqlite
13
+ import aiosqlite.context
14
+ import sqlalchemy
15
+ from sqlalchemy import exc as sqlalchemy_exc
16
+ from sqlalchemy.ext import asyncio as sqlalchemy_async
17
+
18
+ from sky import sky_logging
19
+ from sky.skylet import constants
20
+ from sky.skylet import runtime_utils
21
+
22
+ logger = sky_logging.init_logger(__name__)
23
+ if typing.TYPE_CHECKING:
24
+ from sqlalchemy.orm import Session
25
+
26
+ # This parameter (passed to sqlite3.connect) controls how long we will wait to
27
+ # obtains a database lock (not necessarily during connection, but whenever it is
28
+ # needed). It is not a connection timeout.
29
+ # Even in WAL mode, only a single writer is allowed at a time. Other writers
30
+ # will block until the write lock can be obtained. This behavior is described in
31
+ # the SQLite documentation for WAL: https://www.sqlite.org/wal.html
32
+ # Python's default timeout is 5s. In normal usage, lock contention is very low,
33
+ # and this is more than sufficient. However, in some highly concurrent cases,
34
+ # such as a jobs controller suddenly recovering thousands of jobs at once, we
35
+ # can see a small number of processes that take much longer to obtain the lock.
36
+ # In contrived highly contentious cases, around 0.1% of transactions will take
37
+ # >30s to take the lock. We have not seen cases that take >60s. For cases up to
38
+ # 1000x parallelism, this is thus thought to be a conservative setting.
39
+ # For more info, see the PR description for #4552.
40
+ _DB_TIMEOUT_S = 60
41
+
42
+
43
+ class UniqueConstraintViolationError(Exception):
44
+ """Exception raised for unique constraint violation.
45
+ Attributes:
46
+ value -- the input value that caused the error
47
+ message -- explanation of the error
48
+ """
49
+
50
+ def __init__(self, value, message='Unique constraint violation'):
51
+ self.value = value
52
+ self.message = message
53
+ super().__init__(self.message)
54
+
55
+ def __str__(self):
56
+ return (f'UniqueConstraintViolationError: {self.message} '
57
+ f'(Value: {self.value})')
58
+
59
+
60
+ class SQLAlchemyDialect(enum.Enum):
61
+ SQLITE = 'sqlite'
62
+ POSTGRESQL = 'postgresql'
63
+
64
+
65
+ @contextlib.contextmanager
66
+ def safe_cursor(db_path: str):
67
+ """A newly created, auto-committing, auto-closing cursor."""
68
+ conn = sqlite3.connect(db_path, timeout=_DB_TIMEOUT_S)
69
+ cursor = conn.cursor()
70
+ try:
71
+ yield cursor
72
+ finally:
73
+ cursor.close()
74
+ conn.commit()
75
+ conn.close()
76
+
77
+
78
+ def add_column_to_table(
79
+ cursor: 'sqlite3.Cursor',
80
+ conn: 'sqlite3.Connection',
81
+ table_name: str,
82
+ column_name: str,
83
+ column_type: str,
84
+ copy_from: Optional[str] = None,
85
+ value_to_replace_existing_entries: Optional[Any] = None,
86
+ ):
87
+ """Add a column to a table."""
88
+ for row in cursor.execute(f'PRAGMA table_info({table_name})'):
89
+ if row[1] == column_name:
90
+ break
91
+ else:
92
+ try:
93
+ add_column_cmd = (f'ALTER TABLE {table_name} '
94
+ f'ADD COLUMN {column_name} {column_type}')
95
+ cursor.execute(add_column_cmd)
96
+ if copy_from is not None:
97
+ cursor.execute(f'UPDATE {table_name} '
98
+ f'SET {column_name} = {copy_from}')
99
+ if value_to_replace_existing_entries is not None:
100
+ cursor.execute(
101
+ f'UPDATE {table_name} '
102
+ f'SET {column_name} = (?) '
103
+ f'WHERE {column_name} IS NULL',
104
+ (value_to_replace_existing_entries,))
105
+ except sqlite3.OperationalError as e:
106
+ if 'duplicate column name' in str(e):
107
+ # We may be trying to add the same column twice, when
108
+ # running multiple threads. This is fine.
109
+ pass
110
+ else:
111
+ raise
112
+ conn.commit()
113
+
114
+
115
+ def add_all_tables_to_db_sqlalchemy(
116
+ metadata: sqlalchemy.MetaData,
117
+ engine: sqlalchemy.Engine,
118
+ ):
119
+ """Add tables to the database."""
120
+ for table in metadata.tables.values():
121
+ try:
122
+ table.create(bind=engine, checkfirst=True)
123
+ except (sqlalchemy_exc.OperationalError,
124
+ sqlalchemy_exc.ProgrammingError) as e:
125
+ if 'already exists' in str(e):
126
+ pass
127
+ else:
128
+ raise
129
+
130
+
131
+ def add_table_to_db_sqlalchemy(
132
+ metadata: sqlalchemy.MetaData,
133
+ engine: sqlalchemy.Engine,
134
+ table_name: str,
135
+ ):
136
+ """Add a specific table to the database."""
137
+ try:
138
+ table = metadata.tables[table_name]
139
+ except KeyError as e:
140
+ raise e
141
+
142
+ try:
143
+ table.create(bind=engine, checkfirst=True)
144
+ except (sqlalchemy_exc.OperationalError,
145
+ sqlalchemy_exc.ProgrammingError) as e:
146
+ if 'already exists' in str(e):
147
+ pass
148
+ else:
149
+ raise
150
+
151
+
152
+ def add_column_to_table_sqlalchemy(
153
+ session: 'Session',
154
+ table_name: str,
155
+ column_name: str,
156
+ column_type: sqlalchemy.types.TypeEngine,
157
+ default_statement: Optional[str] = None,
158
+ copy_from: Optional[str] = None,
159
+ value_to_replace_existing_entries: Optional[Any] = None,
160
+ ):
161
+ """Add a column to a table."""
162
+ # column type may be different for different dialects.
163
+ # for example, sqlite uses BLOB for LargeBinary
164
+ # while postgres uses BYTEA.
165
+ column_type_str = column_type.compile(dialect=session.bind.dialect)
166
+ default_statement_str = (f' {default_statement}'
167
+ if default_statement is not None else '')
168
+ try:
169
+ session.execute(
170
+ sqlalchemy.text(f'ALTER TABLE {table_name} '
171
+ f'ADD COLUMN {column_name} {column_type_str}'
172
+ f'{default_statement_str}'))
173
+ if copy_from is not None:
174
+ session.execute(
175
+ sqlalchemy.text(f'UPDATE {table_name} '
176
+ f'SET {column_name} = {copy_from}'))
177
+ if value_to_replace_existing_entries is not None:
178
+ session.execute(
179
+ sqlalchemy.text(f'UPDATE {table_name} '
180
+ f'SET {column_name} = :replacement_value '
181
+ f'WHERE {column_name} IS NULL'),
182
+ {'replacement_value': value_to_replace_existing_entries})
183
+ #sqlite
184
+ except sqlalchemy_exc.OperationalError as e:
185
+ if 'duplicate column name' in str(e):
186
+ pass
187
+ else:
188
+ raise
189
+ #postgresql
190
+ except sqlalchemy_exc.ProgrammingError as e:
191
+ if 'already exists' in str(e):
192
+ pass
193
+ else:
194
+ raise
195
+ session.commit()
196
+
197
+
198
+ def add_column_to_table_alembic(
199
+ table_name: str,
200
+ column_name: str,
201
+ column_type: sqlalchemy.types.TypeEngine,
202
+ server_default: Optional[str] = None,
203
+ copy_from: Optional[str] = None,
204
+ value_to_replace_existing_entries: Optional[Any] = None,
205
+ index: Optional[bool] = None,
206
+ ):
207
+ """Add a column to a table using Alembic operations.
208
+
209
+ This provides the same interface as add_column_to_table_sqlalchemy but
210
+ uses Alembic's connection context for proper migration support.
211
+
212
+ Args:
213
+ table_name: Name of the table to add column to
214
+ column_name: Name of the new column
215
+ column_type: SQLAlchemy column type
216
+ server_default: Server-side default value for the column
217
+ copy_from: Column name to copy values from (for existing rows)
218
+ value_to_replace_existing_entries: Default value for existing NULL
219
+ entries
220
+ index: If True, create an index on this column. If None, no index
221
+ is created.
222
+ """
223
+ from alembic import op # pylint: disable=import-outside-toplevel
224
+
225
+ try:
226
+ # Create the column with server_default if provided
227
+ column = sqlalchemy.Column(column_name,
228
+ column_type,
229
+ server_default=server_default,
230
+ index=index)
231
+ op.add_column(table_name, column)
232
+
233
+ # Handle data migration
234
+ if copy_from is not None:
235
+ op.execute(
236
+ sqlalchemy.text(
237
+ f'UPDATE {table_name} SET {column_name} = {copy_from}'))
238
+
239
+ if value_to_replace_existing_entries is not None:
240
+ # Use parameterized query for safety
241
+ op.get_bind().execute(
242
+ sqlalchemy.text(f'UPDATE {table_name} '
243
+ f'SET {column_name} = :replacement_value '
244
+ f'WHERE {column_name} IS NULL'),
245
+ {'replacement_value': value_to_replace_existing_entries})
246
+ except sqlalchemy_exc.ProgrammingError as e:
247
+ if 'already exists' in str(e).lower():
248
+ pass # Column already exists, that's fine
249
+ else:
250
+ raise
251
+ except sqlalchemy_exc.OperationalError as e:
252
+ if 'duplicate column name' in str(e).lower():
253
+ pass # Column already exists, that's fine
254
+ else:
255
+ raise
256
+
257
+
258
+ def drop_column_from_table_alembic(
259
+ table_name: str,
260
+ column_name: str,
261
+ ):
262
+ """Drop a column from a table using Alembic operations.
263
+
264
+ Args:
265
+ table_name: Name of the table to drop column from.
266
+ column_name: Name of the column to drop.
267
+ """
268
+ from alembic import op # pylint: disable=import-outside-toplevel
269
+
270
+ # Check if column exists before trying to drop it
271
+ bind = op.get_bind()
272
+ inspector = sqlalchemy.inspect(bind)
273
+ columns = [col['name'] for col in inspector.get_columns(table_name)]
274
+
275
+ if column_name not in columns:
276
+ # Column doesn't exist; nothing to do
277
+ return
278
+
279
+ try:
280
+ op.drop_column(table_name, column_name)
281
+ except (sqlalchemy_exc.ProgrammingError,
282
+ sqlalchemy_exc.OperationalError) as e:
283
+ if 'does not exist' in str(e).lower():
284
+ pass # Already dropped
285
+ else:
286
+ raise
287
+
288
+
289
+ class SQLiteConn(threading.local):
290
+ """Thread-local connection to the sqlite3 database."""
291
+
292
+ def __init__(self, db_path: str, create_table: Callable):
293
+ super().__init__()
294
+ self.db_path = db_path
295
+ self.conn = sqlite3.connect(db_path, timeout=_DB_TIMEOUT_S)
296
+ self.cursor = self.conn.cursor()
297
+ create_table(self.cursor, self.conn)
298
+ self._async_conn: Optional[aiosqlite.Connection] = None
299
+ self._async_conn_lock: Optional[asyncio.Lock] = None
300
+
301
+ async def _get_async_conn(self) -> aiosqlite.Connection:
302
+ """Get the shared aiosqlite connection for current thread.
303
+
304
+ Typically, external caller should not get the connection directly,
305
+ instead, SQLiteConn.{operation}_async methods should be used. This
306
+ is to avoid txn interleaving on the shared aiosqlite connection.
307
+ E.g.
308
+ coroutine 1:
309
+ A: await write(row1)
310
+ B: cursor = await conn.execute(read_row1)
311
+ C: await cursor.fetchall()
312
+ coroutine 2:
313
+ D: await write(row2)
314
+ E: cursor = await conn.execute(read_row2)
315
+ F: await cursor.fetchall()
316
+ The A -> B -> D -> E -> C time sequence will cause B and D read at the
317
+ same snapshot point when B started, thus cause coroutine2 lost the
318
+ read-after-write consistency. When you are adding new async operations
319
+ to SQLiteConn, make sure the txn pattern does not cause this issue.
320
+ """
321
+ # Python 3.8 binds current event loop to asyncio.Lock(), which requires
322
+ # a loop available in current thread. Lazy-init the lock to avoid this
323
+ # dependency. The correctness is guranteed since SQLiteConn is
324
+ # thread-local so there is no race condition between check and init.
325
+ if self._async_conn_lock is None:
326
+ self._async_conn_lock = asyncio.Lock()
327
+ if self._async_conn is None:
328
+ async with self._async_conn_lock:
329
+ if self._async_conn is None:
330
+ # Init logic like requests.init_db_within_lock will handle
331
+ # initialization like setting the WAL mode, so we do not
332
+ # duplicate that logic here.
333
+ self._async_conn = await aiosqlite.connect(self.db_path)
334
+ return self._async_conn
335
+
336
+ async def execute_and_commit_async(self,
337
+ sql: str,
338
+ parameters: Optional[
339
+ Iterable[Any]] = None) -> None:
340
+ """Execute the sql and commit the transaction in a sync block."""
341
+ conn = await self._get_async_conn()
342
+
343
+ if parameters is None:
344
+ parameters = []
345
+
346
+ def exec_and_commit(sql: str, parameters: Optional[Iterable[Any]]):
347
+ # pylint: disable=protected-access
348
+ conn._conn.execute(sql, parameters)
349
+ conn._conn.commit()
350
+
351
+ # pylint: disable=protected-access
352
+ await conn._execute(exec_and_commit, sql, parameters)
353
+
354
+ @aiosqlite.context.contextmanager
355
+ async def execute_fetchall_async(self,
356
+ sql: str,
357
+ parameters: Optional[Iterable[Any]] = None
358
+ ) -> Iterable[sqlite3.Row]:
359
+ conn = await self._get_async_conn()
360
+ return await conn.execute_fetchall(sql, parameters)
361
+
362
+ async def execute_get_returning_value_async(
363
+ self,
364
+ sql: str,
365
+ parameters: Optional[Iterable[Any]] = None
366
+ ) -> Optional[sqlite3.Row]:
367
+ conn = await self._get_async_conn()
368
+
369
+ if parameters is None:
370
+ parameters = []
371
+
372
+ def exec_and_get_returning_value(sql: str,
373
+ parameters: Optional[Iterable[Any]]):
374
+ # pylint: disable=protected-access
375
+ row = conn._conn.execute(sql, parameters).fetchone()
376
+ conn._conn.commit()
377
+ return row
378
+
379
+ # pylint: disable=protected-access
380
+ return await conn._execute(exec_and_get_returning_value, sql,
381
+ parameters)
382
+
383
+ async def close(self):
384
+ if self._async_conn is not None:
385
+ await self._async_conn.close()
386
+ self.conn.close()
387
+
388
+
389
+ _max_connections = 0
390
+ _postgres_engine_cache: Dict[str, sqlalchemy.engine.Engine] = {}
391
+ _sqlite_engine_cache: Dict[str, sqlalchemy.engine.Engine] = {}
392
+
393
+ _db_creation_lock = threading.Lock()
394
+
395
+
396
+ def set_max_connections(max_connections: int):
397
+ global _max_connections
398
+ _max_connections = max_connections
399
+
400
+
401
+ def get_max_connections():
402
+ return _max_connections
403
+
404
+
405
+ @typing.overload
406
+ def get_engine(
407
+ db_name: Optional[str],
408
+ async_engine: Literal[False] = False) -> sqlalchemy.engine.Engine:
409
+ ...
410
+
411
+
412
+ @typing.overload
413
+ def get_engine(db_name: Optional[str],
414
+ async_engine: Literal[True]) -> sqlalchemy_async.AsyncEngine:
415
+ ...
416
+
417
+
418
+ def get_engine(
419
+ db_name: Optional[str],
420
+ async_engine: bool = False
421
+ ) -> Union[sqlalchemy.engine.Engine, sqlalchemy_async.AsyncEngine]:
422
+ """Get the engine for the given database name.
423
+
424
+ Args:
425
+ db_name: The name of the database. ONLY used for SQLite. On Postgres,
426
+ we use a single database, which we get from the connection string.
427
+ async_engine: Whether to return an async engine.
428
+ """
429
+ conn_string = None
430
+ if os.environ.get(constants.ENV_VAR_IS_SKYPILOT_SERVER) is not None:
431
+ conn_string = os.environ.get(constants.ENV_VAR_DB_CONNECTION_URI)
432
+ if conn_string:
433
+ if async_engine:
434
+ conn_string = conn_string.replace('postgresql://',
435
+ 'postgresql+asyncpg://')
436
+ with _db_creation_lock:
437
+ # We use the same cache for both sync and async engines
438
+ # because we change the conn_string in the async case,
439
+ # so they would not overlap.
440
+ if conn_string not in _postgres_engine_cache:
441
+ engine_type = 'sync' if not async_engine else 'async'
442
+ logger.debug(
443
+ f'Creating a new postgres {engine_type} engine with '
444
+ f'maximum {_max_connections} connections')
445
+ if _max_connections == 0:
446
+ kw_args = {'poolclass': sqlalchemy.NullPool}
447
+ if async_engine:
448
+ _postgres_engine_cache[conn_string] = (
449
+ sqlalchemy_async.create_async_engine(
450
+ conn_string, **kw_args))
451
+ else:
452
+ _postgres_engine_cache[conn_string] = (
453
+ sqlalchemy.create_engine(conn_string, **kw_args))
454
+ else:
455
+ kw_args = {
456
+ 'pool_size': _max_connections,
457
+ 'max_overflow': max(0, 5 - _max_connections),
458
+ 'pool_pre_ping': True,
459
+ 'pool_recycle': 1800
460
+ }
461
+ if async_engine:
462
+ kw_args[
463
+ 'poolclass'] = sqlalchemy.pool.AsyncAdaptedQueuePool
464
+ _postgres_engine_cache[conn_string] = (
465
+ sqlalchemy_async.create_async_engine(
466
+ conn_string, **kw_args))
467
+ else:
468
+ kw_args['poolclass'] = sqlalchemy.pool.QueuePool
469
+ _postgres_engine_cache[conn_string] = (
470
+ sqlalchemy.create_engine(conn_string, **kw_args))
471
+ engine = _postgres_engine_cache[conn_string]
472
+ else:
473
+ assert db_name is not None, 'db_name must be provided for SQLite'
474
+ db_path = runtime_utils.get_runtime_dir_path(f'.sky/{db_name}.db')
475
+ pathlib.Path(db_path).parents[0].mkdir(parents=True, exist_ok=True)
476
+ if async_engine:
477
+ # This is an AsyncEngine, instead of a (normal, synchronous) Engine,
478
+ # so we should not put it in the cache. Instead, just return.
479
+ return sqlalchemy_async.create_async_engine(
480
+ 'sqlite+aiosqlite:///' + db_path, connect_args={'timeout': 30})
481
+ if db_path not in _sqlite_engine_cache:
482
+ _sqlite_engine_cache[db_path] = sqlalchemy.create_engine(
483
+ 'sqlite:///' + db_path)
484
+ engine = _sqlite_engine_cache[db_path]
485
+ return engine