skypilot-nightly 1.0.0.dev20250509__py3-none-any.whl → 1.0.0.dev20251107__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of skypilot-nightly might be problematic. Click here for more details.

Files changed (512) hide show
  1. sky/__init__.py +22 -6
  2. sky/adaptors/aws.py +25 -7
  3. sky/adaptors/common.py +24 -1
  4. sky/adaptors/coreweave.py +278 -0
  5. sky/adaptors/do.py +8 -2
  6. sky/adaptors/hyperbolic.py +8 -0
  7. sky/adaptors/kubernetes.py +149 -18
  8. sky/adaptors/nebius.py +170 -17
  9. sky/adaptors/primeintellect.py +1 -0
  10. sky/adaptors/runpod.py +68 -0
  11. sky/adaptors/seeweb.py +167 -0
  12. sky/adaptors/shadeform.py +89 -0
  13. sky/admin_policy.py +187 -4
  14. sky/authentication.py +179 -225
  15. sky/backends/__init__.py +4 -2
  16. sky/backends/backend.py +22 -9
  17. sky/backends/backend_utils.py +1299 -380
  18. sky/backends/cloud_vm_ray_backend.py +1715 -518
  19. sky/backends/docker_utils.py +1 -1
  20. sky/backends/local_docker_backend.py +11 -6
  21. sky/backends/wheel_utils.py +37 -9
  22. sky/{clouds/service_catalog → catalog}/__init__.py +21 -19
  23. sky/{clouds/service_catalog → catalog}/aws_catalog.py +27 -8
  24. sky/{clouds/service_catalog → catalog}/azure_catalog.py +10 -7
  25. sky/{clouds/service_catalog → catalog}/common.py +89 -48
  26. sky/{clouds/service_catalog → catalog}/cudo_catalog.py +8 -5
  27. sky/{clouds/service_catalog → catalog}/data_fetchers/analyze.py +1 -1
  28. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_aws.py +30 -40
  29. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_cudo.py +38 -38
  30. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_gcp.py +42 -15
  31. sky/catalog/data_fetchers/fetch_hyperbolic.py +136 -0
  32. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_lambda_cloud.py +1 -0
  33. sky/catalog/data_fetchers/fetch_nebius.py +335 -0
  34. sky/catalog/data_fetchers/fetch_runpod.py +698 -0
  35. sky/catalog/data_fetchers/fetch_seeweb.py +329 -0
  36. sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
  37. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vast.py +1 -1
  38. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vsphere.py +1 -1
  39. sky/{clouds/service_catalog → catalog}/do_catalog.py +5 -2
  40. sky/{clouds/service_catalog → catalog}/fluidstack_catalog.py +6 -3
  41. sky/{clouds/service_catalog → catalog}/gcp_catalog.py +41 -15
  42. sky/catalog/hyperbolic_catalog.py +136 -0
  43. sky/{clouds/service_catalog → catalog}/ibm_catalog.py +9 -6
  44. sky/{clouds/service_catalog → catalog}/kubernetes_catalog.py +36 -24
  45. sky/{clouds/service_catalog → catalog}/lambda_catalog.py +9 -6
  46. sky/{clouds/service_catalog → catalog}/nebius_catalog.py +9 -7
  47. sky/{clouds/service_catalog → catalog}/oci_catalog.py +9 -6
  48. sky/{clouds/service_catalog → catalog}/paperspace_catalog.py +5 -2
  49. sky/catalog/primeintellect_catalog.py +95 -0
  50. sky/{clouds/service_catalog → catalog}/runpod_catalog.py +11 -4
  51. sky/{clouds/service_catalog → catalog}/scp_catalog.py +9 -6
  52. sky/catalog/seeweb_catalog.py +184 -0
  53. sky/catalog/shadeform_catalog.py +165 -0
  54. sky/catalog/ssh_catalog.py +167 -0
  55. sky/{clouds/service_catalog → catalog}/vast_catalog.py +6 -3
  56. sky/{clouds/service_catalog → catalog}/vsphere_catalog.py +5 -2
  57. sky/check.py +491 -203
  58. sky/cli.py +5 -6005
  59. sky/client/{cli.py → cli/command.py} +2477 -1885
  60. sky/client/cli/deprecation_utils.py +99 -0
  61. sky/client/cli/flags.py +359 -0
  62. sky/client/cli/table_utils.py +320 -0
  63. sky/client/common.py +70 -32
  64. sky/client/oauth.py +82 -0
  65. sky/client/sdk.py +1203 -297
  66. sky/client/sdk_async.py +833 -0
  67. sky/client/service_account_auth.py +47 -0
  68. sky/cloud_stores.py +73 -0
  69. sky/clouds/__init__.py +13 -0
  70. sky/clouds/aws.py +358 -93
  71. sky/clouds/azure.py +105 -83
  72. sky/clouds/cloud.py +127 -36
  73. sky/clouds/cudo.py +68 -50
  74. sky/clouds/do.py +66 -48
  75. sky/clouds/fluidstack.py +63 -44
  76. sky/clouds/gcp.py +339 -110
  77. sky/clouds/hyperbolic.py +293 -0
  78. sky/clouds/ibm.py +70 -49
  79. sky/clouds/kubernetes.py +563 -162
  80. sky/clouds/lambda_cloud.py +74 -54
  81. sky/clouds/nebius.py +206 -80
  82. sky/clouds/oci.py +88 -66
  83. sky/clouds/paperspace.py +61 -44
  84. sky/clouds/primeintellect.py +317 -0
  85. sky/clouds/runpod.py +164 -74
  86. sky/clouds/scp.py +89 -83
  87. sky/clouds/seeweb.py +466 -0
  88. sky/clouds/shadeform.py +400 -0
  89. sky/clouds/ssh.py +263 -0
  90. sky/clouds/utils/aws_utils.py +10 -4
  91. sky/clouds/utils/gcp_utils.py +87 -11
  92. sky/clouds/utils/oci_utils.py +38 -14
  93. sky/clouds/utils/scp_utils.py +177 -124
  94. sky/clouds/vast.py +99 -77
  95. sky/clouds/vsphere.py +51 -40
  96. sky/core.py +349 -139
  97. sky/dag.py +15 -0
  98. sky/dashboard/out/404.html +1 -1
  99. sky/dashboard/out/_next/static/chunks/1141-e6aa9ab418717c59.js +11 -0
  100. sky/dashboard/out/_next/static/chunks/1272-1ef0bf0237faccdb.js +1 -0
  101. sky/dashboard/out/_next/static/chunks/1871-74503c8e80fd253b.js +6 -0
  102. sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +1 -0
  103. sky/dashboard/out/_next/static/chunks/2350.fab69e61bac57b23.js +1 -0
  104. sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +15 -0
  105. sky/dashboard/out/_next/static/chunks/2755.fff53c4a3fcae910.js +26 -0
  106. sky/dashboard/out/_next/static/chunks/3294.72362fa129305b19.js +1 -0
  107. sky/dashboard/out/_next/static/chunks/3785.ad6adaa2a0fa9768.js +1 -0
  108. sky/dashboard/out/_next/static/chunks/3850-ff4a9a69d978632b.js +1 -0
  109. sky/dashboard/out/_next/static/chunks/3937.210053269f121201.js +1 -0
  110. sky/dashboard/out/_next/static/chunks/4725.a830b5c9e7867c92.js +1 -0
  111. sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +15 -0
  112. sky/dashboard/out/_next/static/chunks/5739-d67458fcb1386c92.js +8 -0
  113. sky/dashboard/out/_next/static/chunks/6130-2be46d70a38f1e82.js +1 -0
  114. sky/dashboard/out/_next/static/chunks/616-3d59f75e2ccf9321.js +39 -0
  115. sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +13 -0
  116. sky/dashboard/out/_next/static/chunks/6601-06114c982db410b6.js +1 -0
  117. sky/dashboard/out/_next/static/chunks/6856-ef8ba11f96d8c4a3.js +1 -0
  118. sky/dashboard/out/_next/static/chunks/6989-01359c57e018caa4.js +1 -0
  119. sky/dashboard/out/_next/static/chunks/6990-32b6e2d3822301fa.js +1 -0
  120. sky/dashboard/out/_next/static/chunks/7359-c8d04e06886000b3.js +30 -0
  121. sky/dashboard/out/_next/static/chunks/7411-b15471acd2cba716.js +41 -0
  122. sky/dashboard/out/_next/static/chunks/7615-3301e838e5f25772.js +1 -0
  123. sky/dashboard/out/_next/static/chunks/8640.5b9475a2d18c5416.js +16 -0
  124. sky/dashboard/out/_next/static/chunks/8969-1e4613c651bf4051.js +1 -0
  125. sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +6 -0
  126. sky/dashboard/out/_next/static/chunks/9353-cff34f7e773b2e2b.js +1 -0
  127. sky/dashboard/out/_next/static/chunks/9360.7310982cf5a0dc79.js +31 -0
  128. sky/dashboard/out/_next/static/chunks/9847.3aaca6bb33455140.js +30 -0
  129. sky/dashboard/out/_next/static/chunks/fd9d1056-86323a29a8f7e46a.js +1 -0
  130. sky/dashboard/out/_next/static/chunks/framework-cf60a09ccd051a10.js +33 -0
  131. sky/dashboard/out/_next/static/chunks/main-app-587214043926b3cc.js +1 -0
  132. sky/dashboard/out/_next/static/chunks/main-f15ccb73239a3bf1.js +1 -0
  133. sky/dashboard/out/_next/static/chunks/pages/_app-bde01e4a2beec258.js +34 -0
  134. sky/dashboard/out/_next/static/chunks/pages/_error-c66a4e8afc46f17b.js +1 -0
  135. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-c736ead69c2d86ec.js +16 -0
  136. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-a37d2063af475a1c.js +1 -0
  137. sky/dashboard/out/_next/static/chunks/pages/clusters-d44859594e6f8064.js +1 -0
  138. sky/dashboard/out/_next/static/chunks/pages/config-dfb9bf07b13045f4.js +1 -0
  139. sky/dashboard/out/_next/static/chunks/pages/index-444f1804401f04ea.js +1 -0
  140. sky/dashboard/out/_next/static/chunks/pages/infra/[context]-c0b5935149902e6f.js +1 -0
  141. sky/dashboard/out/_next/static/chunks/pages/infra-aed0ea19df7cf961.js +1 -0
  142. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-5796e8d6aea291a0.js +16 -0
  143. sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-6edeb7d06032adfc.js +21 -0
  144. sky/dashboard/out/_next/static/chunks/pages/jobs-479dde13399cf270.js +1 -0
  145. sky/dashboard/out/_next/static/chunks/pages/users-5ab3b907622cf0fe.js +1 -0
  146. sky/dashboard/out/_next/static/chunks/pages/volumes-b84b948ff357c43e.js +1 -0
  147. sky/dashboard/out/_next/static/chunks/pages/workspace/new-3f88a1c7e86a3f86.js +1 -0
  148. sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-c5a3eeee1c218af1.js +1 -0
  149. sky/dashboard/out/_next/static/chunks/pages/workspaces-22b23febb3e89ce1.js +1 -0
  150. sky/dashboard/out/_next/static/chunks/webpack-2679be77fc08a2f8.js +1 -0
  151. sky/dashboard/out/_next/static/css/0748ce22df867032.css +3 -0
  152. sky/dashboard/out/_next/static/zB0ed6ge_W1MDszVHhijS/_buildManifest.js +1 -0
  153. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  154. sky/dashboard/out/clusters/[cluster].html +1 -1
  155. sky/dashboard/out/clusters.html +1 -1
  156. sky/dashboard/out/config.html +1 -0
  157. sky/dashboard/out/index.html +1 -1
  158. sky/dashboard/out/infra/[context].html +1 -0
  159. sky/dashboard/out/infra.html +1 -0
  160. sky/dashboard/out/jobs/[job].html +1 -1
  161. sky/dashboard/out/jobs/pools/[pool].html +1 -0
  162. sky/dashboard/out/jobs.html +1 -1
  163. sky/dashboard/out/users.html +1 -0
  164. sky/dashboard/out/volumes.html +1 -0
  165. sky/dashboard/out/workspace/new.html +1 -0
  166. sky/dashboard/out/workspaces/[name].html +1 -0
  167. sky/dashboard/out/workspaces.html +1 -0
  168. sky/data/data_utils.py +137 -1
  169. sky/data/mounting_utils.py +269 -84
  170. sky/data/storage.py +1451 -1807
  171. sky/data/storage_utils.py +43 -57
  172. sky/exceptions.py +132 -2
  173. sky/execution.py +206 -63
  174. sky/global_user_state.py +2374 -586
  175. sky/jobs/__init__.py +5 -0
  176. sky/jobs/client/sdk.py +242 -65
  177. sky/jobs/client/sdk_async.py +143 -0
  178. sky/jobs/constants.py +9 -8
  179. sky/jobs/controller.py +839 -277
  180. sky/jobs/file_content_utils.py +80 -0
  181. sky/jobs/log_gc.py +201 -0
  182. sky/jobs/recovery_strategy.py +398 -152
  183. sky/jobs/scheduler.py +315 -189
  184. sky/jobs/server/core.py +829 -255
  185. sky/jobs/server/server.py +156 -115
  186. sky/jobs/server/utils.py +136 -0
  187. sky/jobs/state.py +2092 -701
  188. sky/jobs/utils.py +1242 -160
  189. sky/logs/__init__.py +21 -0
  190. sky/logs/agent.py +108 -0
  191. sky/logs/aws.py +243 -0
  192. sky/logs/gcp.py +91 -0
  193. sky/metrics/__init__.py +0 -0
  194. sky/metrics/utils.py +443 -0
  195. sky/models.py +78 -1
  196. sky/optimizer.py +164 -70
  197. sky/provision/__init__.py +90 -4
  198. sky/provision/aws/config.py +147 -26
  199. sky/provision/aws/instance.py +135 -50
  200. sky/provision/azure/instance.py +10 -5
  201. sky/provision/common.py +13 -1
  202. sky/provision/cudo/cudo_machine_type.py +1 -1
  203. sky/provision/cudo/cudo_utils.py +14 -8
  204. sky/provision/cudo/cudo_wrapper.py +72 -71
  205. sky/provision/cudo/instance.py +10 -6
  206. sky/provision/do/instance.py +10 -6
  207. sky/provision/do/utils.py +4 -3
  208. sky/provision/docker_utils.py +114 -23
  209. sky/provision/fluidstack/instance.py +13 -8
  210. sky/provision/gcp/__init__.py +1 -0
  211. sky/provision/gcp/config.py +301 -19
  212. sky/provision/gcp/constants.py +218 -0
  213. sky/provision/gcp/instance.py +36 -8
  214. sky/provision/gcp/instance_utils.py +18 -4
  215. sky/provision/gcp/volume_utils.py +247 -0
  216. sky/provision/hyperbolic/__init__.py +12 -0
  217. sky/provision/hyperbolic/config.py +10 -0
  218. sky/provision/hyperbolic/instance.py +437 -0
  219. sky/provision/hyperbolic/utils.py +373 -0
  220. sky/provision/instance_setup.py +93 -14
  221. sky/provision/kubernetes/__init__.py +5 -0
  222. sky/provision/kubernetes/config.py +9 -52
  223. sky/provision/kubernetes/constants.py +17 -0
  224. sky/provision/kubernetes/instance.py +789 -247
  225. sky/provision/kubernetes/manifests/fusermount-server-daemonset.yaml +1 -2
  226. sky/provision/kubernetes/network.py +27 -17
  227. sky/provision/kubernetes/network_utils.py +40 -43
  228. sky/provision/kubernetes/utils.py +1192 -531
  229. sky/provision/kubernetes/volume.py +282 -0
  230. sky/provision/lambda_cloud/instance.py +22 -16
  231. sky/provision/nebius/constants.py +50 -0
  232. sky/provision/nebius/instance.py +19 -6
  233. sky/provision/nebius/utils.py +196 -91
  234. sky/provision/oci/instance.py +10 -5
  235. sky/provision/paperspace/instance.py +10 -7
  236. sky/provision/paperspace/utils.py +1 -1
  237. sky/provision/primeintellect/__init__.py +10 -0
  238. sky/provision/primeintellect/config.py +11 -0
  239. sky/provision/primeintellect/instance.py +454 -0
  240. sky/provision/primeintellect/utils.py +398 -0
  241. sky/provision/provisioner.py +110 -36
  242. sky/provision/runpod/__init__.py +5 -0
  243. sky/provision/runpod/instance.py +27 -6
  244. sky/provision/runpod/utils.py +51 -18
  245. sky/provision/runpod/volume.py +180 -0
  246. sky/provision/scp/__init__.py +15 -0
  247. sky/provision/scp/config.py +93 -0
  248. sky/provision/scp/instance.py +531 -0
  249. sky/provision/seeweb/__init__.py +11 -0
  250. sky/provision/seeweb/config.py +13 -0
  251. sky/provision/seeweb/instance.py +807 -0
  252. sky/provision/shadeform/__init__.py +11 -0
  253. sky/provision/shadeform/config.py +12 -0
  254. sky/provision/shadeform/instance.py +351 -0
  255. sky/provision/shadeform/shadeform_utils.py +83 -0
  256. sky/provision/ssh/__init__.py +18 -0
  257. sky/provision/vast/instance.py +13 -8
  258. sky/provision/vast/utils.py +10 -7
  259. sky/provision/vsphere/common/vim_utils.py +1 -2
  260. sky/provision/vsphere/instance.py +15 -10
  261. sky/provision/vsphere/vsphere_utils.py +9 -19
  262. sky/py.typed +0 -0
  263. sky/resources.py +844 -118
  264. sky/schemas/__init__.py +0 -0
  265. sky/schemas/api/__init__.py +0 -0
  266. sky/schemas/api/responses.py +225 -0
  267. sky/schemas/db/README +4 -0
  268. sky/schemas/db/env.py +90 -0
  269. sky/schemas/db/global_user_state/001_initial_schema.py +124 -0
  270. sky/schemas/db/global_user_state/002_add_workspace_to_cluster_history.py +35 -0
  271. sky/schemas/db/global_user_state/003_fix_initial_revision.py +61 -0
  272. sky/schemas/db/global_user_state/004_is_managed.py +34 -0
  273. sky/schemas/db/global_user_state/005_cluster_event.py +32 -0
  274. sky/schemas/db/global_user_state/006_provision_log.py +41 -0
  275. sky/schemas/db/global_user_state/007_cluster_event_request_id.py +34 -0
  276. sky/schemas/db/global_user_state/008_skylet_ssh_tunnel_metadata.py +34 -0
  277. sky/schemas/db/global_user_state/009_last_activity_and_launched_at.py +89 -0
  278. sky/schemas/db/global_user_state/010_save_ssh_key.py +66 -0
  279. sky/schemas/db/script.py.mako +28 -0
  280. sky/schemas/db/serve_state/001_initial_schema.py +67 -0
  281. sky/schemas/db/skypilot_config/001_initial_schema.py +30 -0
  282. sky/schemas/db/spot_jobs/001_initial_schema.py +97 -0
  283. sky/schemas/db/spot_jobs/002_cluster_pool.py +42 -0
  284. sky/schemas/db/spot_jobs/003_pool_hash.py +34 -0
  285. sky/schemas/db/spot_jobs/004_job_file_contents.py +42 -0
  286. sky/schemas/db/spot_jobs/005_logs_gc.py +38 -0
  287. sky/schemas/generated/__init__.py +0 -0
  288. sky/schemas/generated/autostopv1_pb2.py +36 -0
  289. sky/schemas/generated/autostopv1_pb2.pyi +43 -0
  290. sky/schemas/generated/autostopv1_pb2_grpc.py +146 -0
  291. sky/schemas/generated/jobsv1_pb2.py +86 -0
  292. sky/schemas/generated/jobsv1_pb2.pyi +254 -0
  293. sky/schemas/generated/jobsv1_pb2_grpc.py +542 -0
  294. sky/schemas/generated/managed_jobsv1_pb2.py +74 -0
  295. sky/schemas/generated/managed_jobsv1_pb2.pyi +278 -0
  296. sky/schemas/generated/managed_jobsv1_pb2_grpc.py +278 -0
  297. sky/schemas/generated/servev1_pb2.py +58 -0
  298. sky/schemas/generated/servev1_pb2.pyi +115 -0
  299. sky/schemas/generated/servev1_pb2_grpc.py +322 -0
  300. sky/serve/autoscalers.py +357 -5
  301. sky/serve/client/impl.py +310 -0
  302. sky/serve/client/sdk.py +47 -139
  303. sky/serve/client/sdk_async.py +130 -0
  304. sky/serve/constants.py +10 -8
  305. sky/serve/controller.py +64 -19
  306. sky/serve/load_balancer.py +106 -60
  307. sky/serve/load_balancing_policies.py +115 -1
  308. sky/serve/replica_managers.py +273 -162
  309. sky/serve/serve_rpc_utils.py +179 -0
  310. sky/serve/serve_state.py +554 -251
  311. sky/serve/serve_utils.py +733 -220
  312. sky/serve/server/core.py +66 -711
  313. sky/serve/server/impl.py +1093 -0
  314. sky/serve/server/server.py +21 -18
  315. sky/serve/service.py +133 -48
  316. sky/serve/service_spec.py +135 -16
  317. sky/serve/spot_placer.py +3 -0
  318. sky/server/auth/__init__.py +0 -0
  319. sky/server/auth/authn.py +50 -0
  320. sky/server/auth/loopback.py +38 -0
  321. sky/server/auth/oauth2_proxy.py +200 -0
  322. sky/server/common.py +475 -181
  323. sky/server/config.py +81 -23
  324. sky/server/constants.py +44 -6
  325. sky/server/daemons.py +229 -0
  326. sky/server/html/token_page.html +185 -0
  327. sky/server/metrics.py +160 -0
  328. sky/server/requests/executor.py +528 -138
  329. sky/server/requests/payloads.py +351 -17
  330. sky/server/requests/preconditions.py +21 -17
  331. sky/server/requests/process.py +112 -29
  332. sky/server/requests/request_names.py +120 -0
  333. sky/server/requests/requests.py +817 -224
  334. sky/server/requests/serializers/decoders.py +82 -31
  335. sky/server/requests/serializers/encoders.py +140 -22
  336. sky/server/requests/threads.py +106 -0
  337. sky/server/rest.py +417 -0
  338. sky/server/server.py +1290 -284
  339. sky/server/state.py +20 -0
  340. sky/server/stream_utils.py +345 -57
  341. sky/server/uvicorn.py +217 -3
  342. sky/server/versions.py +270 -0
  343. sky/setup_files/MANIFEST.in +5 -0
  344. sky/setup_files/alembic.ini +156 -0
  345. sky/setup_files/dependencies.py +136 -31
  346. sky/setup_files/setup.py +44 -42
  347. sky/sky_logging.py +102 -5
  348. sky/skylet/attempt_skylet.py +1 -0
  349. sky/skylet/autostop_lib.py +129 -8
  350. sky/skylet/configs.py +27 -20
  351. sky/skylet/constants.py +171 -19
  352. sky/skylet/events.py +105 -21
  353. sky/skylet/job_lib.py +335 -104
  354. sky/skylet/log_lib.py +297 -18
  355. sky/skylet/log_lib.pyi +44 -1
  356. sky/skylet/ray_patches/__init__.py +17 -3
  357. sky/skylet/ray_patches/autoscaler.py.diff +18 -0
  358. sky/skylet/ray_patches/cli.py.diff +19 -0
  359. sky/skylet/ray_patches/command_runner.py.diff +17 -0
  360. sky/skylet/ray_patches/log_monitor.py.diff +20 -0
  361. sky/skylet/ray_patches/resource_demand_scheduler.py.diff +32 -0
  362. sky/skylet/ray_patches/updater.py.diff +18 -0
  363. sky/skylet/ray_patches/worker.py.diff +41 -0
  364. sky/skylet/services.py +564 -0
  365. sky/skylet/skylet.py +63 -4
  366. sky/skylet/subprocess_daemon.py +103 -29
  367. sky/skypilot_config.py +506 -99
  368. sky/ssh_node_pools/__init__.py +1 -0
  369. sky/ssh_node_pools/core.py +135 -0
  370. sky/ssh_node_pools/server.py +233 -0
  371. sky/task.py +621 -137
  372. sky/templates/aws-ray.yml.j2 +10 -3
  373. sky/templates/azure-ray.yml.j2 +1 -1
  374. sky/templates/do-ray.yml.j2 +1 -1
  375. sky/templates/gcp-ray.yml.j2 +57 -0
  376. sky/templates/hyperbolic-ray.yml.j2 +67 -0
  377. sky/templates/jobs-controller.yaml.j2 +27 -24
  378. sky/templates/kubernetes-loadbalancer.yml.j2 +2 -0
  379. sky/templates/kubernetes-ray.yml.j2 +607 -51
  380. sky/templates/lambda-ray.yml.j2 +1 -1
  381. sky/templates/nebius-ray.yml.j2 +33 -12
  382. sky/templates/paperspace-ray.yml.j2 +1 -1
  383. sky/templates/primeintellect-ray.yml.j2 +71 -0
  384. sky/templates/runpod-ray.yml.j2 +9 -1
  385. sky/templates/scp-ray.yml.j2 +3 -50
  386. sky/templates/seeweb-ray.yml.j2 +108 -0
  387. sky/templates/shadeform-ray.yml.j2 +72 -0
  388. sky/templates/sky-serve-controller.yaml.j2 +22 -2
  389. sky/templates/websocket_proxy.py +178 -18
  390. sky/usage/usage_lib.py +18 -11
  391. sky/users/__init__.py +0 -0
  392. sky/users/model.conf +15 -0
  393. sky/users/permission.py +387 -0
  394. sky/users/rbac.py +121 -0
  395. sky/users/server.py +720 -0
  396. sky/users/token_service.py +218 -0
  397. sky/utils/accelerator_registry.py +34 -5
  398. sky/utils/admin_policy_utils.py +84 -38
  399. sky/utils/annotations.py +16 -5
  400. sky/utils/asyncio_utils.py +78 -0
  401. sky/utils/auth_utils.py +153 -0
  402. sky/utils/benchmark_utils.py +60 -0
  403. sky/utils/cli_utils/status_utils.py +159 -86
  404. sky/utils/cluster_utils.py +31 -9
  405. sky/utils/command_runner.py +354 -68
  406. sky/utils/command_runner.pyi +93 -3
  407. sky/utils/common.py +35 -8
  408. sky/utils/common_utils.py +310 -87
  409. sky/utils/config_utils.py +87 -5
  410. sky/utils/context.py +402 -0
  411. sky/utils/context_utils.py +222 -0
  412. sky/utils/controller_utils.py +264 -89
  413. sky/utils/dag_utils.py +31 -12
  414. sky/utils/db/__init__.py +0 -0
  415. sky/utils/db/db_utils.py +470 -0
  416. sky/utils/db/migration_utils.py +133 -0
  417. sky/utils/directory_utils.py +12 -0
  418. sky/utils/env_options.py +13 -0
  419. sky/utils/git.py +567 -0
  420. sky/utils/git_clone.sh +460 -0
  421. sky/utils/infra_utils.py +195 -0
  422. sky/utils/kubernetes/cleanup-tunnel.sh +62 -0
  423. sky/utils/kubernetes/config_map_utils.py +133 -0
  424. sky/utils/kubernetes/create_cluster.sh +13 -27
  425. sky/utils/kubernetes/delete_cluster.sh +10 -7
  426. sky/utils/kubernetes/deploy_remote_cluster.py +1299 -0
  427. sky/utils/kubernetes/exec_kubeconfig_converter.py +22 -31
  428. sky/utils/kubernetes/generate_kind_config.py +6 -66
  429. sky/utils/kubernetes/generate_kubeconfig.sh +4 -1
  430. sky/utils/kubernetes/gpu_labeler.py +5 -5
  431. sky/utils/kubernetes/kubernetes_deploy_utils.py +354 -47
  432. sky/utils/kubernetes/ssh-tunnel.sh +379 -0
  433. sky/utils/kubernetes/ssh_utils.py +221 -0
  434. sky/utils/kubernetes_enums.py +8 -15
  435. sky/utils/lock_events.py +94 -0
  436. sky/utils/locks.py +368 -0
  437. sky/utils/log_utils.py +300 -6
  438. sky/utils/perf_utils.py +22 -0
  439. sky/utils/resource_checker.py +298 -0
  440. sky/utils/resources_utils.py +249 -32
  441. sky/utils/rich_utils.py +213 -37
  442. sky/utils/schemas.py +905 -147
  443. sky/utils/serialize_utils.py +16 -0
  444. sky/utils/status_lib.py +10 -0
  445. sky/utils/subprocess_utils.py +38 -15
  446. sky/utils/tempstore.py +70 -0
  447. sky/utils/timeline.py +24 -52
  448. sky/utils/ux_utils.py +84 -15
  449. sky/utils/validator.py +11 -1
  450. sky/utils/volume.py +86 -0
  451. sky/utils/yaml_utils.py +111 -0
  452. sky/volumes/__init__.py +13 -0
  453. sky/volumes/client/__init__.py +0 -0
  454. sky/volumes/client/sdk.py +149 -0
  455. sky/volumes/server/__init__.py +0 -0
  456. sky/volumes/server/core.py +258 -0
  457. sky/volumes/server/server.py +122 -0
  458. sky/volumes/volume.py +212 -0
  459. sky/workspaces/__init__.py +0 -0
  460. sky/workspaces/core.py +655 -0
  461. sky/workspaces/server.py +101 -0
  462. sky/workspaces/utils.py +56 -0
  463. skypilot_nightly-1.0.0.dev20251107.dist-info/METADATA +675 -0
  464. skypilot_nightly-1.0.0.dev20251107.dist-info/RECORD +594 -0
  465. {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/WHEEL +1 -1
  466. sky/benchmark/benchmark_state.py +0 -256
  467. sky/benchmark/benchmark_utils.py +0 -641
  468. sky/clouds/service_catalog/constants.py +0 -7
  469. sky/dashboard/out/_next/static/LksQgChY5izXjokL3LcEu/_buildManifest.js +0 -1
  470. sky/dashboard/out/_next/static/chunks/236-f49500b82ad5392d.js +0 -6
  471. sky/dashboard/out/_next/static/chunks/312-c3c8845990db8ffc.js +0 -15
  472. sky/dashboard/out/_next/static/chunks/37-0a572fe0dbb89c4d.js +0 -6
  473. sky/dashboard/out/_next/static/chunks/678-206dddca808e6d16.js +0 -59
  474. sky/dashboard/out/_next/static/chunks/845-0f8017370869e269.js +0 -1
  475. sky/dashboard/out/_next/static/chunks/979-7bf73a4c7cea0f5c.js +0 -1
  476. sky/dashboard/out/_next/static/chunks/fd9d1056-2821b0f0cabcd8bd.js +0 -1
  477. sky/dashboard/out/_next/static/chunks/framework-87d061ee6ed71b28.js +0 -33
  478. sky/dashboard/out/_next/static/chunks/main-app-241eb28595532291.js +0 -1
  479. sky/dashboard/out/_next/static/chunks/main-e0e2335212e72357.js +0 -1
  480. sky/dashboard/out/_next/static/chunks/pages/_app-e6b013bc3f77ad60.js +0 -1
  481. sky/dashboard/out/_next/static/chunks/pages/_error-1be831200e60c5c0.js +0 -1
  482. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-e15db85d0ea1fbe1.js +0 -1
  483. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-f383db7389368ea7.js +0 -1
  484. sky/dashboard/out/_next/static/chunks/pages/clusters-a93b93e10b8b074e.js +0 -1
  485. sky/dashboard/out/_next/static/chunks/pages/index-f9f039532ca8cbc4.js +0 -1
  486. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-03f279c6741fb48b.js +0 -1
  487. sky/dashboard/out/_next/static/chunks/pages/jobs-a75029b67aab6a2e.js +0 -1
  488. sky/dashboard/out/_next/static/chunks/webpack-830f59b8404e96b8.js +0 -1
  489. sky/dashboard/out/_next/static/css/c6933bbb2ce7f4dd.css +0 -3
  490. sky/jobs/dashboard/dashboard.py +0 -223
  491. sky/jobs/dashboard/static/favicon.ico +0 -0
  492. sky/jobs/dashboard/templates/index.html +0 -831
  493. sky/jobs/server/dashboard_utils.py +0 -69
  494. sky/skylet/providers/scp/__init__.py +0 -2
  495. sky/skylet/providers/scp/config.py +0 -149
  496. sky/skylet/providers/scp/node_provider.py +0 -578
  497. sky/templates/kubernetes-ssh-jump.yml.j2 +0 -94
  498. sky/utils/db_utils.py +0 -100
  499. sky/utils/kubernetes/deploy_remote_cluster.sh +0 -308
  500. sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +0 -191
  501. skypilot_nightly-1.0.0.dev20250509.dist-info/METADATA +0 -361
  502. skypilot_nightly-1.0.0.dev20250509.dist-info/RECORD +0 -396
  503. /sky/{clouds/service_catalog → catalog}/config.py +0 -0
  504. /sky/{benchmark → catalog/data_fetchers}/__init__.py +0 -0
  505. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_azure.py +0 -0
  506. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_fluidstack.py +0 -0
  507. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_ibm.py +0 -0
  508. /sky/{clouds/service_catalog/data_fetchers → client/cli}/__init__.py +0 -0
  509. /sky/dashboard/out/_next/static/{LksQgChY5izXjokL3LcEu → zB0ed6ge_W1MDszVHhijS}/_ssgManifest.js +0 -0
  510. {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/entry_points.txt +0 -0
  511. {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/licenses/LICENSE +0 -0
  512. {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/top_level.txt +0 -0
sky/utils/dag_utils.py CHANGED
@@ -1,14 +1,14 @@
1
1
  """Utilities for loading and dumping DAGs from/to YAML files."""
2
2
  import copy
3
- from typing import Any, Dict, List, Optional, Tuple
3
+ from typing import Any, Dict, List, Optional, Tuple, Union
4
4
 
5
5
  from sky import dag as dag_lib
6
6
  from sky import sky_logging
7
7
  from sky import task as task_lib
8
8
  from sky.utils import cluster_utils
9
- from sky.utils import common_utils
10
9
  from sky.utils import registry
11
10
  from sky.utils import ux_utils
11
+ from sky.utils import yaml_utils
12
12
 
13
13
  logger = sky_logging.init_logger(__name__)
14
14
 
@@ -66,7 +66,9 @@ def convert_entrypoint_to_dag(entrypoint: Any) -> 'dag_lib.Dag':
66
66
 
67
67
  def _load_chain_dag(
68
68
  configs: List[Dict[str, Any]],
69
- env_overrides: Optional[List[Tuple[str, str]]] = None) -> dag_lib.Dag:
69
+ env_overrides: Optional[List[Tuple[str, str]]] = None,
70
+ secrets_overrides: Optional[List[Tuple[str,
71
+ str]]] = None) -> dag_lib.Dag:
70
72
  """Loads a chain DAG from a list of YAML configs."""
71
73
  dag_name = None
72
74
  if set(configs[0].keys()) == {'name'}:
@@ -84,7 +86,8 @@ def _load_chain_dag(
84
86
  for task_config in configs:
85
87
  if task_config is None:
86
88
  continue
87
- task = task_lib.Task.from_yaml_config(task_config, env_overrides)
89
+ task = task_lib.Task.from_yaml_config(task_config, env_overrides,
90
+ secrets_overrides)
88
91
  if current_task is not None:
89
92
  current_task >> task # pylint: disable=pointless-statement
90
93
  current_task = task
@@ -95,6 +98,7 @@ def _load_chain_dag(
95
98
  def load_chain_dag_from_yaml(
96
99
  path: str,
97
100
  env_overrides: Optional[List[Tuple[str, str]]] = None,
101
+ secret_overrides: Optional[List[Tuple[str, str]]] = None,
98
102
  ) -> dag_lib.Dag:
99
103
  """Loads a chain DAG from a YAML file.
100
104
 
@@ -105,17 +109,22 @@ def load_chain_dag_from_yaml(
105
109
  the task's 'envs' section. If it is a chain dag, the envs will be updated
106
110
  for all tasks in the chain.
107
111
 
112
+ 'secrets_overrides' is a list of (key, value) pairs that will be used to
113
+ update the task's 'secrets' section. If it is a chain dag, the secrets will
114
+ be updated for all tasks in the chain.
115
+
108
116
  Returns:
109
117
  A chain Dag with 1 or more tasks (an empty entrypoint would create a
110
118
  trivial task).
111
119
  """
112
- configs = common_utils.read_yaml_all(path)
113
- return _load_chain_dag(configs, env_overrides)
120
+ configs = yaml_utils.read_yaml_all(path)
121
+ return _load_chain_dag(configs, env_overrides, secret_overrides)
114
122
 
115
123
 
116
124
  def load_chain_dag_from_yaml_str(
117
125
  yaml_str: str,
118
126
  env_overrides: Optional[List[Tuple[str, str]]] = None,
127
+ secrets_overrides: Optional[List[Tuple[str, str]]] = None,
119
128
  ) -> dag_lib.Dag:
120
129
  """Loads a chain DAG from a YAML string.
121
130
 
@@ -126,19 +135,25 @@ def load_chain_dag_from_yaml_str(
126
135
  the task's 'envs' section. If it is a chain dag, the envs will be updated
127
136
  for all tasks in the chain.
128
137
 
138
+ 'secrets_overrides' is a list of (key, value) pairs that will be used to
139
+ update the task's 'secrets' section. If it is a chain dag, the secrets will
140
+ be updated for all tasks in the chain.
141
+
129
142
  Returns:
130
143
  A chain Dag with 1 or more tasks (an empty entrypoint would create a
131
144
  trivial task).
132
145
  """
133
- configs = common_utils.read_yaml_all_str(yaml_str)
134
- return _load_chain_dag(configs, env_overrides)
146
+ configs = yaml_utils.read_yaml_all_str(yaml_str)
147
+ return _load_chain_dag(configs, env_overrides, secrets_overrides)
135
148
 
136
149
 
137
- def dump_chain_dag_to_yaml_str(dag: dag_lib.Dag) -> str:
150
+ def dump_chain_dag_to_yaml_str(dag: dag_lib.Dag,
151
+ use_user_specified_yaml: bool = False) -> str:
138
152
  """Dumps a chain DAG to a YAML string.
139
153
 
140
154
  Args:
141
155
  dag: the DAG to dump.
156
+ redact_secrets: whether to redact secrets in the YAML string.
142
157
 
143
158
  Returns:
144
159
  The YAML string.
@@ -146,8 +161,10 @@ def dump_chain_dag_to_yaml_str(dag: dag_lib.Dag) -> str:
146
161
  assert dag.is_chain(), dag
147
162
  configs = [{'name': dag.name}]
148
163
  for task in dag.tasks:
149
- configs.append(task.to_yaml_config())
150
- return common_utils.dump_yaml_str(configs)
164
+ configs.append(
165
+ task.to_yaml_config(
166
+ use_user_specified_yaml=use_user_specified_yaml))
167
+ return yaml_utils.dump_yaml_str(configs)
151
168
 
152
169
 
153
170
  def dump_chain_dag_to_yaml(dag: dag_lib.Dag, path: str) -> None:
@@ -195,7 +212,9 @@ def fill_default_config_in_dag_for_job_launch(dag: dag_lib.Dag) -> None:
195
212
  assert default_strategy is not None
196
213
  for resources in list(task_.resources):
197
214
  original_job_recovery = resources.job_recovery
198
- job_recovery = {'strategy': default_strategy}
215
+ job_recovery: Dict[str, Optional[Union[str, int]]] = {
216
+ 'strategy': default_strategy
217
+ }
199
218
  if isinstance(original_job_recovery, str):
200
219
  job_recovery['strategy'] = original_job_recovery
201
220
  elif isinstance(original_job_recovery, dict):
File without changes
@@ -0,0 +1,470 @@
1
+ """Utils for sky databases."""
2
+ import asyncio
3
+ import contextlib
4
+ import enum
5
+ import os
6
+ import pathlib
7
+ import sqlite3
8
+ import threading
9
+ import typing
10
+ from typing import Any, Callable, Dict, Iterable, Literal, Optional, Union
11
+
12
+ import aiosqlite
13
+ import aiosqlite.context
14
+ import sqlalchemy
15
+ from sqlalchemy import exc as sqlalchemy_exc
16
+ from sqlalchemy.ext import asyncio as sqlalchemy_async
17
+
18
+ from sky import sky_logging
19
+ from sky.skylet import constants
20
+
21
+ logger = sky_logging.init_logger(__name__)
22
+ if typing.TYPE_CHECKING:
23
+ from sqlalchemy.orm import Session
24
+
25
+ # This parameter (passed to sqlite3.connect) controls how long we will wait to
26
+ # obtains a database lock (not necessarily during connection, but whenever it is
27
+ # needed). It is not a connection timeout.
28
+ # Even in WAL mode, only a single writer is allowed at a time. Other writers
29
+ # will block until the write lock can be obtained. This behavior is described in
30
+ # the SQLite documentation for WAL: https://www.sqlite.org/wal.html
31
+ # Python's default timeout is 5s. In normal usage, lock contention is very low,
32
+ # and this is more than sufficient. However, in some highly concurrent cases,
33
+ # such as a jobs controller suddenly recovering thousands of jobs at once, we
34
+ # can see a small number of processes that take much longer to obtain the lock.
35
+ # In contrived highly contentious cases, around 0.1% of transactions will take
36
+ # >30s to take the lock. We have not seen cases that take >60s. For cases up to
37
+ # 1000x parallelism, this is thus thought to be a conservative setting.
38
+ # For more info, see the PR description for #4552.
39
+ _DB_TIMEOUT_S = 60
40
+
41
+
42
+ class UniqueConstraintViolationError(Exception):
43
+ """Exception raised for unique constraint violation.
44
+ Attributes:
45
+ value -- the input value that caused the error
46
+ message -- explanation of the error
47
+ """
48
+
49
+ def __init__(self, value, message='Unique constraint violation'):
50
+ self.value = value
51
+ self.message = message
52
+ super().__init__(self.message)
53
+
54
+ def __str__(self):
55
+ return (f'UniqueConstraintViolationError: {self.message} '
56
+ f'(Value: {self.value})')
57
+
58
+
59
+ class SQLAlchemyDialect(enum.Enum):
60
+ SQLITE = 'sqlite'
61
+ POSTGRESQL = 'postgresql'
62
+
63
+
64
+ @contextlib.contextmanager
65
+ def safe_cursor(db_path: str):
66
+ """A newly created, auto-committing, auto-closing cursor."""
67
+ conn = sqlite3.connect(db_path, timeout=_DB_TIMEOUT_S)
68
+ cursor = conn.cursor()
69
+ try:
70
+ yield cursor
71
+ finally:
72
+ cursor.close()
73
+ conn.commit()
74
+ conn.close()
75
+
76
+
77
+ def add_column_to_table(
78
+ cursor: 'sqlite3.Cursor',
79
+ conn: 'sqlite3.Connection',
80
+ table_name: str,
81
+ column_name: str,
82
+ column_type: str,
83
+ copy_from: Optional[str] = None,
84
+ value_to_replace_existing_entries: Optional[Any] = None,
85
+ ):
86
+ """Add a column to a table."""
87
+ for row in cursor.execute(f'PRAGMA table_info({table_name})'):
88
+ if row[1] == column_name:
89
+ break
90
+ else:
91
+ try:
92
+ add_column_cmd = (f'ALTER TABLE {table_name} '
93
+ f'ADD COLUMN {column_name} {column_type}')
94
+ cursor.execute(add_column_cmd)
95
+ if copy_from is not None:
96
+ cursor.execute(f'UPDATE {table_name} '
97
+ f'SET {column_name} = {copy_from}')
98
+ if value_to_replace_existing_entries is not None:
99
+ cursor.execute(
100
+ f'UPDATE {table_name} '
101
+ f'SET {column_name} = (?) '
102
+ f'WHERE {column_name} IS NULL',
103
+ (value_to_replace_existing_entries,))
104
+ except sqlite3.OperationalError as e:
105
+ if 'duplicate column name' in str(e):
106
+ # We may be trying to add the same column twice, when
107
+ # running multiple threads. This is fine.
108
+ pass
109
+ else:
110
+ raise
111
+ conn.commit()
112
+
113
+
114
+ def add_all_tables_to_db_sqlalchemy(
115
+ metadata: sqlalchemy.MetaData,
116
+ engine: sqlalchemy.Engine,
117
+ ):
118
+ """Add tables to the database."""
119
+ for table in metadata.tables.values():
120
+ try:
121
+ table.create(bind=engine, checkfirst=True)
122
+ except (sqlalchemy_exc.OperationalError,
123
+ sqlalchemy_exc.ProgrammingError) as e:
124
+ if 'already exists' in str(e):
125
+ pass
126
+ else:
127
+ raise
128
+
129
+
130
+ def add_table_to_db_sqlalchemy(
131
+ metadata: sqlalchemy.MetaData,
132
+ engine: sqlalchemy.Engine,
133
+ table_name: str,
134
+ ):
135
+ """Add a specific table to the database."""
136
+ try:
137
+ table = metadata.tables[table_name]
138
+ except KeyError as e:
139
+ raise e
140
+
141
+ try:
142
+ table.create(bind=engine, checkfirst=True)
143
+ except (sqlalchemy_exc.OperationalError,
144
+ sqlalchemy_exc.ProgrammingError) as e:
145
+ if 'already exists' in str(e):
146
+ pass
147
+ else:
148
+ raise
149
+
150
+
151
+ def add_column_to_table_sqlalchemy(
152
+ session: 'Session',
153
+ table_name: str,
154
+ column_name: str,
155
+ column_type: sqlalchemy.types.TypeEngine,
156
+ default_statement: Optional[str] = None,
157
+ copy_from: Optional[str] = None,
158
+ value_to_replace_existing_entries: Optional[Any] = None,
159
+ ):
160
+ """Add a column to a table."""
161
+ # column type may be different for different dialects.
162
+ # for example, sqlite uses BLOB for LargeBinary
163
+ # while postgres uses BYTEA.
164
+ column_type_str = column_type.compile(dialect=session.bind.dialect)
165
+ default_statement_str = (f' {default_statement}'
166
+ if default_statement is not None else '')
167
+ try:
168
+ session.execute(
169
+ sqlalchemy.text(f'ALTER TABLE {table_name} '
170
+ f'ADD COLUMN {column_name} {column_type_str}'
171
+ f'{default_statement_str}'))
172
+ if copy_from is not None:
173
+ session.execute(
174
+ sqlalchemy.text(f'UPDATE {table_name} '
175
+ f'SET {column_name} = {copy_from}'))
176
+ if value_to_replace_existing_entries is not None:
177
+ session.execute(
178
+ sqlalchemy.text(f'UPDATE {table_name} '
179
+ f'SET {column_name} = :replacement_value '
180
+ f'WHERE {column_name} IS NULL'),
181
+ {'replacement_value': value_to_replace_existing_entries})
182
+ #sqlite
183
+ except sqlalchemy_exc.OperationalError as e:
184
+ if 'duplicate column name' in str(e):
185
+ pass
186
+ else:
187
+ raise
188
+ #postgresql
189
+ except sqlalchemy_exc.ProgrammingError as e:
190
+ if 'already exists' in str(e):
191
+ pass
192
+ else:
193
+ raise
194
+ session.commit()
195
+
196
+
197
+ def add_column_to_table_alembic(
198
+ table_name: str,
199
+ column_name: str,
200
+ column_type: sqlalchemy.types.TypeEngine,
201
+ server_default: Optional[str] = None,
202
+ copy_from: Optional[str] = None,
203
+ value_to_replace_existing_entries: Optional[Any] = None,
204
+ index: Optional[bool] = None,
205
+ ):
206
+ """Add a column to a table using Alembic operations.
207
+
208
+ This provides the same interface as add_column_to_table_sqlalchemy but
209
+ uses Alembic's connection context for proper migration support.
210
+
211
+ Args:
212
+ table_name: Name of the table to add column to
213
+ column_name: Name of the new column
214
+ column_type: SQLAlchemy column type
215
+ server_default: Server-side default value for the column
216
+ copy_from: Column name to copy values from (for existing rows)
217
+ value_to_replace_existing_entries: Default value for existing NULL
218
+ entries
219
+ index: If True, create an index on this column. If None, no index
220
+ is created.
221
+ """
222
+ from alembic import op # pylint: disable=import-outside-toplevel
223
+
224
+ try:
225
+ # Create the column with server_default if provided
226
+ column = sqlalchemy.Column(column_name,
227
+ column_type,
228
+ server_default=server_default,
229
+ index=index)
230
+ op.add_column(table_name, column)
231
+
232
+ # Handle data migration
233
+ if copy_from is not None:
234
+ op.execute(
235
+ sqlalchemy.text(
236
+ f'UPDATE {table_name} SET {column_name} = {copy_from}'))
237
+
238
+ if value_to_replace_existing_entries is not None:
239
+ # Use parameterized query for safety
240
+ op.get_bind().execute(
241
+ sqlalchemy.text(f'UPDATE {table_name} '
242
+ f'SET {column_name} = :replacement_value '
243
+ f'WHERE {column_name} IS NULL'),
244
+ {'replacement_value': value_to_replace_existing_entries})
245
+ except sqlalchemy_exc.ProgrammingError as e:
246
+ if 'already exists' in str(e).lower():
247
+ pass # Column already exists, that's fine
248
+ else:
249
+ raise
250
+ except sqlalchemy_exc.OperationalError as e:
251
+ if 'duplicate column name' in str(e).lower():
252
+ pass # Column already exists, that's fine
253
+ else:
254
+ raise
255
+
256
+
257
+ def drop_column_from_table_alembic(
258
+ table_name: str,
259
+ column_name: str,
260
+ ):
261
+ """Drop a column from a table using Alembic operations.
262
+
263
+ Args:
264
+ table_name: Name of the table to drop column from.
265
+ column_name: Name of the column to drop.
266
+ """
267
+ from alembic import op # pylint: disable=import-outside-toplevel
268
+
269
+ # Check if column exists before trying to drop it
270
+ bind = op.get_bind()
271
+ inspector = sqlalchemy.inspect(bind)
272
+ columns = [col['name'] for col in inspector.get_columns(table_name)]
273
+
274
+ if column_name not in columns:
275
+ # Column doesn't exist; nothing to do
276
+ return
277
+
278
+ try:
279
+ op.drop_column(table_name, column_name)
280
+ except (sqlalchemy_exc.ProgrammingError,
281
+ sqlalchemy_exc.OperationalError) as e:
282
+ if 'does not exist' in str(e).lower():
283
+ pass # Already dropped
284
+ else:
285
+ raise
286
+
287
+
288
+ class SQLiteConn(threading.local):
289
+ """Thread-local connection to the sqlite3 database."""
290
+
291
+ def __init__(self, db_path: str, create_table: Callable):
292
+ super().__init__()
293
+ self.db_path = db_path
294
+ self.conn = sqlite3.connect(db_path, timeout=_DB_TIMEOUT_S)
295
+ self.cursor = self.conn.cursor()
296
+ create_table(self.cursor, self.conn)
297
+ self._async_conn: Optional[aiosqlite.Connection] = None
298
+ self._async_conn_lock: Optional[asyncio.Lock] = None
299
+
300
+ async def _get_async_conn(self) -> aiosqlite.Connection:
301
+ """Get the shared aiosqlite connection for current thread.
302
+
303
+ Typically, external caller should not get the connection directly,
304
+ instead, SQLiteConn.{operation}_async methods should be used. This
305
+ is to avoid txn interleaving on the shared aiosqlite connection.
306
+ E.g.
307
+ coroutine 1:
308
+ A: await write(row1)
309
+ B: cursor = await conn.execute(read_row1)
310
+ C: await cursor.fetchall()
311
+ coroutine 2:
312
+ D: await write(row2)
313
+ E: cursor = await conn.execute(read_row2)
314
+ F: await cursor.fetchall()
315
+ The A -> B -> D -> E -> C time sequence will cause B and D read at the
316
+ same snapshot point when B started, thus cause coroutine2 lost the
317
+ read-after-write consistency. When you are adding new async operations
318
+ to SQLiteConn, make sure the txn pattern does not cause this issue.
319
+ """
320
+ # Python 3.8 binds current event loop to asyncio.Lock(), which requires
321
+ # a loop available in current thread. Lazy-init the lock to avoid this
322
+ # dependency. The correctness is guranteed since SQLiteConn is
323
+ # thread-local so there is no race condition between check and init.
324
+ if self._async_conn_lock is None:
325
+ self._async_conn_lock = asyncio.Lock()
326
+ if self._async_conn is None:
327
+ async with self._async_conn_lock:
328
+ if self._async_conn is None:
329
+ # Init logic like requests.init_db_within_lock will handle
330
+ # initialization like setting the WAL mode, so we do not
331
+ # duplicate that logic here.
332
+ self._async_conn = await aiosqlite.connect(self.db_path)
333
+ return self._async_conn
334
+
335
+ async def execute_and_commit_async(self,
336
+ sql: str,
337
+ parameters: Optional[
338
+ Iterable[Any]] = None) -> None:
339
+ """Execute the sql and commit the transaction in a sync block."""
340
+ conn = await self._get_async_conn()
341
+
342
+ if parameters is None:
343
+ parameters = []
344
+
345
+ def exec_and_commit(sql: str, parameters: Optional[Iterable[Any]]):
346
+ # pylint: disable=protected-access
347
+ conn._conn.execute(sql, parameters)
348
+ conn._conn.commit()
349
+
350
+ # pylint: disable=protected-access
351
+ await conn._execute(exec_and_commit, sql, parameters)
352
+
353
+ @aiosqlite.context.contextmanager
354
+ async def execute_fetchall_async(self,
355
+ sql: str,
356
+ parameters: Optional[Iterable[Any]] = None
357
+ ) -> Iterable[sqlite3.Row]:
358
+ conn = await self._get_async_conn()
359
+ return await conn.execute_fetchall(sql, parameters)
360
+
361
+ async def execute_get_returning_value_async(
362
+ self,
363
+ sql: str,
364
+ parameters: Optional[Iterable[Any]] = None
365
+ ) -> Optional[sqlite3.Row]:
366
+ conn = await self._get_async_conn()
367
+
368
+ if parameters is None:
369
+ parameters = []
370
+
371
+ def exec_and_get_returning_value(sql: str,
372
+ parameters: Optional[Iterable[Any]]):
373
+ # pylint: disable=protected-access
374
+ row = conn._conn.execute(sql, parameters).fetchone()
375
+ conn._conn.commit()
376
+ return row
377
+
378
+ # pylint: disable=protected-access
379
+ return await conn._execute(exec_and_get_returning_value, sql,
380
+ parameters)
381
+
382
+ async def close(self):
383
+ if self._async_conn is not None:
384
+ await self._async_conn.close()
385
+ self.conn.close()
386
+
387
+
388
+ _max_connections = 0
389
+ _postgres_engine_cache: Dict[str, sqlalchemy.engine.Engine] = {}
390
+ _sqlite_engine_cache: Dict[str, sqlalchemy.engine.Engine] = {}
391
+
392
+ _db_creation_lock = threading.Lock()
393
+
394
+
395
+ def set_max_connections(max_connections: int):
396
+ global _max_connections
397
+ _max_connections = max_connections
398
+
399
+
400
+ def get_max_connections():
401
+ return _max_connections
402
+
403
+
404
+ @typing.overload
405
+ def get_engine(
406
+ db_name: Optional[str],
407
+ async_engine: Literal[False] = False) -> sqlalchemy.engine.Engine:
408
+ ...
409
+
410
+
411
+ @typing.overload
412
+ def get_engine(db_name: Optional[str],
413
+ async_engine: Literal[True]) -> sqlalchemy_async.AsyncEngine:
414
+ ...
415
+
416
+
417
+ def get_engine(
418
+ db_name: Optional[str],
419
+ async_engine: bool = False
420
+ ) -> Union[sqlalchemy.engine.Engine, sqlalchemy_async.AsyncEngine]:
421
+ """Get the engine for the given database name.
422
+
423
+ Args:
424
+ db_name: The name of the database. ONLY used for SQLite. On Postgres,
425
+ we use a single database, which we get from the connection string.
426
+ async_engine: Whether to return an async engine.
427
+ """
428
+ conn_string = None
429
+ if os.environ.get(constants.ENV_VAR_IS_SKYPILOT_SERVER) is not None:
430
+ conn_string = os.environ.get(constants.ENV_VAR_DB_CONNECTION_URI)
431
+ if conn_string:
432
+ if async_engine:
433
+ conn_string = conn_string.replace('postgresql://',
434
+ 'postgresql+asyncpg://')
435
+ # This is an AsyncEngine, instead of a (normal, synchronous) Engine,
436
+ # so we should not put it in the cache. Instead, just return.
437
+ return sqlalchemy_async.create_async_engine(
438
+ conn_string, poolclass=sqlalchemy.NullPool)
439
+ with _db_creation_lock:
440
+ if conn_string not in _postgres_engine_cache:
441
+ logger.debug('Creating a new postgres engine with '
442
+ f'maximum {_max_connections} connections')
443
+ if _max_connections == 0:
444
+ _postgres_engine_cache[conn_string] = (
445
+ sqlalchemy.create_engine(
446
+ conn_string, poolclass=sqlalchemy.pool.NullPool))
447
+ else:
448
+ _postgres_engine_cache[conn_string] = (
449
+ sqlalchemy.create_engine(
450
+ conn_string,
451
+ poolclass=sqlalchemy.pool.QueuePool,
452
+ pool_size=_max_connections,
453
+ max_overflow=max(0, 5 - _max_connections),
454
+ pool_pre_ping=True,
455
+ pool_recycle=1800))
456
+ engine = _postgres_engine_cache[conn_string]
457
+ else:
458
+ assert db_name is not None, 'db_name must be provided for SQLite'
459
+ db_path = os.path.expanduser(f'~/.sky/{db_name}.db')
460
+ pathlib.Path(db_path).parents[0].mkdir(parents=True, exist_ok=True)
461
+ if async_engine:
462
+ # This is an AsyncEngine, instead of a (normal, synchronous) Engine,
463
+ # so we should not put it in the cache. Instead, just return.
464
+ return sqlalchemy_async.create_async_engine(
465
+ 'sqlite+aiosqlite:///' + db_path, connect_args={'timeout': 30})
466
+ if db_path not in _sqlite_engine_cache:
467
+ _sqlite_engine_cache[db_path] = sqlalchemy.create_engine(
468
+ 'sqlite:///' + db_path)
469
+ engine = _sqlite_engine_cache[db_path]
470
+ return engine