skypilot-nightly 1.0.0.dev20250502__py3-none-any.whl → 1.0.0.dev20251203__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (546) hide show
  1. sky/__init__.py +22 -6
  2. sky/adaptors/aws.py +81 -16
  3. sky/adaptors/common.py +25 -2
  4. sky/adaptors/coreweave.py +278 -0
  5. sky/adaptors/do.py +8 -2
  6. sky/adaptors/gcp.py +11 -0
  7. sky/adaptors/hyperbolic.py +8 -0
  8. sky/adaptors/ibm.py +5 -2
  9. sky/adaptors/kubernetes.py +149 -18
  10. sky/adaptors/nebius.py +173 -30
  11. sky/adaptors/primeintellect.py +1 -0
  12. sky/adaptors/runpod.py +68 -0
  13. sky/adaptors/seeweb.py +183 -0
  14. sky/adaptors/shadeform.py +89 -0
  15. sky/admin_policy.py +187 -4
  16. sky/authentication.py +179 -225
  17. sky/backends/__init__.py +4 -2
  18. sky/backends/backend.py +22 -9
  19. sky/backends/backend_utils.py +1323 -397
  20. sky/backends/cloud_vm_ray_backend.py +1749 -1029
  21. sky/backends/docker_utils.py +1 -1
  22. sky/backends/local_docker_backend.py +11 -6
  23. sky/backends/task_codegen.py +633 -0
  24. sky/backends/wheel_utils.py +55 -9
  25. sky/{clouds/service_catalog → catalog}/__init__.py +21 -19
  26. sky/{clouds/service_catalog → catalog}/aws_catalog.py +27 -8
  27. sky/{clouds/service_catalog → catalog}/azure_catalog.py +10 -7
  28. sky/{clouds/service_catalog → catalog}/common.py +90 -49
  29. sky/{clouds/service_catalog → catalog}/cudo_catalog.py +8 -5
  30. sky/{clouds/service_catalog → catalog}/data_fetchers/analyze.py +1 -1
  31. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_aws.py +116 -80
  32. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_cudo.py +38 -38
  33. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_gcp.py +70 -16
  34. sky/catalog/data_fetchers/fetch_hyperbolic.py +136 -0
  35. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_lambda_cloud.py +1 -0
  36. sky/catalog/data_fetchers/fetch_nebius.py +338 -0
  37. sky/catalog/data_fetchers/fetch_runpod.py +698 -0
  38. sky/catalog/data_fetchers/fetch_seeweb.py +329 -0
  39. sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
  40. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vast.py +1 -1
  41. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vsphere.py +1 -1
  42. sky/{clouds/service_catalog → catalog}/do_catalog.py +5 -2
  43. sky/{clouds/service_catalog → catalog}/fluidstack_catalog.py +6 -3
  44. sky/{clouds/service_catalog → catalog}/gcp_catalog.py +41 -15
  45. sky/catalog/hyperbolic_catalog.py +136 -0
  46. sky/{clouds/service_catalog → catalog}/ibm_catalog.py +9 -6
  47. sky/{clouds/service_catalog → catalog}/kubernetes_catalog.py +36 -24
  48. sky/{clouds/service_catalog → catalog}/lambda_catalog.py +9 -6
  49. sky/{clouds/service_catalog → catalog}/nebius_catalog.py +9 -7
  50. sky/{clouds/service_catalog → catalog}/oci_catalog.py +9 -6
  51. sky/{clouds/service_catalog → catalog}/paperspace_catalog.py +5 -2
  52. sky/catalog/primeintellect_catalog.py +95 -0
  53. sky/{clouds/service_catalog → catalog}/runpod_catalog.py +11 -4
  54. sky/{clouds/service_catalog → catalog}/scp_catalog.py +9 -6
  55. sky/catalog/seeweb_catalog.py +184 -0
  56. sky/catalog/shadeform_catalog.py +165 -0
  57. sky/catalog/ssh_catalog.py +167 -0
  58. sky/{clouds/service_catalog → catalog}/vast_catalog.py +6 -3
  59. sky/{clouds/service_catalog → catalog}/vsphere_catalog.py +5 -2
  60. sky/check.py +533 -185
  61. sky/cli.py +5 -5975
  62. sky/client/{cli.py → cli/command.py} +2591 -1956
  63. sky/client/cli/deprecation_utils.py +99 -0
  64. sky/client/cli/flags.py +359 -0
  65. sky/client/cli/table_utils.py +322 -0
  66. sky/client/cli/utils.py +79 -0
  67. sky/client/common.py +78 -32
  68. sky/client/oauth.py +82 -0
  69. sky/client/sdk.py +1219 -319
  70. sky/client/sdk_async.py +827 -0
  71. sky/client/service_account_auth.py +47 -0
  72. sky/cloud_stores.py +82 -3
  73. sky/clouds/__init__.py +13 -0
  74. sky/clouds/aws.py +564 -164
  75. sky/clouds/azure.py +105 -83
  76. sky/clouds/cloud.py +140 -40
  77. sky/clouds/cudo.py +68 -50
  78. sky/clouds/do.py +66 -48
  79. sky/clouds/fluidstack.py +63 -44
  80. sky/clouds/gcp.py +339 -110
  81. sky/clouds/hyperbolic.py +293 -0
  82. sky/clouds/ibm.py +70 -49
  83. sky/clouds/kubernetes.py +570 -162
  84. sky/clouds/lambda_cloud.py +74 -54
  85. sky/clouds/nebius.py +210 -81
  86. sky/clouds/oci.py +88 -66
  87. sky/clouds/paperspace.py +61 -44
  88. sky/clouds/primeintellect.py +317 -0
  89. sky/clouds/runpod.py +164 -74
  90. sky/clouds/scp.py +89 -86
  91. sky/clouds/seeweb.py +477 -0
  92. sky/clouds/shadeform.py +400 -0
  93. sky/clouds/ssh.py +263 -0
  94. sky/clouds/utils/aws_utils.py +10 -4
  95. sky/clouds/utils/gcp_utils.py +87 -11
  96. sky/clouds/utils/oci_utils.py +38 -14
  97. sky/clouds/utils/scp_utils.py +231 -167
  98. sky/clouds/vast.py +99 -77
  99. sky/clouds/vsphere.py +51 -40
  100. sky/core.py +375 -173
  101. sky/dag.py +15 -0
  102. sky/dashboard/out/404.html +1 -1
  103. sky/dashboard/out/_next/static/96_E2yl3QAiIJGOYCkSpB/_buildManifest.js +1 -0
  104. sky/dashboard/out/_next/static/chunks/1141-e6aa9ab418717c59.js +11 -0
  105. sky/dashboard/out/_next/static/chunks/1272-1ef0bf0237faccdb.js +1 -0
  106. sky/dashboard/out/_next/static/chunks/1871-7e202677c42f43fe.js +6 -0
  107. sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +1 -0
  108. sky/dashboard/out/_next/static/chunks/2350.fab69e61bac57b23.js +1 -0
  109. sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +15 -0
  110. sky/dashboard/out/_next/static/chunks/2755.edd818326d489a1d.js +26 -0
  111. sky/dashboard/out/_next/static/chunks/3294.20a8540fe697d5ee.js +1 -0
  112. sky/dashboard/out/_next/static/chunks/3785.7e245f318f9d1121.js +1 -0
  113. sky/dashboard/out/_next/static/chunks/3800-7b45f9fbb6308557.js +1 -0
  114. sky/dashboard/out/_next/static/chunks/3850-ff4a9a69d978632b.js +1 -0
  115. sky/dashboard/out/_next/static/chunks/3937.210053269f121201.js +1 -0
  116. sky/dashboard/out/_next/static/chunks/4725.172ede95d1b21022.js +1 -0
  117. sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +15 -0
  118. sky/dashboard/out/_next/static/chunks/5739-d67458fcb1386c92.js +8 -0
  119. sky/dashboard/out/_next/static/chunks/6130-2be46d70a38f1e82.js +1 -0
  120. sky/dashboard/out/_next/static/chunks/616-3d59f75e2ccf9321.js +39 -0
  121. sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +13 -0
  122. sky/dashboard/out/_next/static/chunks/6856-8f27d1c10c98def8.js +1 -0
  123. sky/dashboard/out/_next/static/chunks/6989-01359c57e018caa4.js +1 -0
  124. sky/dashboard/out/_next/static/chunks/6990-9146207c4567fdfd.js +1 -0
  125. sky/dashboard/out/_next/static/chunks/7359-c8d04e06886000b3.js +30 -0
  126. sky/dashboard/out/_next/static/chunks/7411-b15471acd2cba716.js +41 -0
  127. sky/dashboard/out/_next/static/chunks/7615-019513abc55b3b47.js +1 -0
  128. sky/dashboard/out/_next/static/chunks/8640.5b9475a2d18c5416.js +16 -0
  129. sky/dashboard/out/_next/static/chunks/8969-452f9d5cbdd2dc73.js +1 -0
  130. sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +6 -0
  131. sky/dashboard/out/_next/static/chunks/9353-cff34f7e773b2e2b.js +1 -0
  132. sky/dashboard/out/_next/static/chunks/9360.a536cf6b1fa42355.js +31 -0
  133. sky/dashboard/out/_next/static/chunks/9847.3aaca6bb33455140.js +30 -0
  134. sky/dashboard/out/_next/static/chunks/fd9d1056-86323a29a8f7e46a.js +1 -0
  135. sky/dashboard/out/_next/static/chunks/framework-cf60a09ccd051a10.js +33 -0
  136. sky/dashboard/out/_next/static/chunks/main-app-587214043926b3cc.js +1 -0
  137. sky/dashboard/out/_next/static/chunks/main-f15ccb73239a3bf1.js +1 -0
  138. sky/dashboard/out/_next/static/chunks/pages/_app-bde01e4a2beec258.js +34 -0
  139. sky/dashboard/out/_next/static/chunks/pages/_error-c66a4e8afc46f17b.js +1 -0
  140. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-792db96d918c98c9.js +16 -0
  141. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-abfcac9c137aa543.js +1 -0
  142. sky/dashboard/out/_next/static/chunks/pages/clusters-ee39056f9851a3ff.js +1 -0
  143. sky/dashboard/out/_next/static/chunks/pages/config-dfb9bf07b13045f4.js +1 -0
  144. sky/dashboard/out/_next/static/chunks/pages/index-444f1804401f04ea.js +1 -0
  145. sky/dashboard/out/_next/static/chunks/pages/infra/[context]-c0b5935149902e6f.js +1 -0
  146. sky/dashboard/out/_next/static/chunks/pages/infra-aed0ea19df7cf961.js +1 -0
  147. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-d66997e2bfc837cf.js +16 -0
  148. sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-9faf940b253e3e06.js +21 -0
  149. sky/dashboard/out/_next/static/chunks/pages/jobs-2072b48b617989c9.js +1 -0
  150. sky/dashboard/out/_next/static/chunks/pages/users-f42674164aa73423.js +1 -0
  151. sky/dashboard/out/_next/static/chunks/pages/volumes-b84b948ff357c43e.js +1 -0
  152. sky/dashboard/out/_next/static/chunks/pages/workspace/new-3f88a1c7e86a3f86.js +1 -0
  153. sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-84a40f8c7c627fe4.js +1 -0
  154. sky/dashboard/out/_next/static/chunks/pages/workspaces-531b2f8c4bf89f82.js +1 -0
  155. sky/dashboard/out/_next/static/chunks/webpack-64e05f17bf2cf8ce.js +1 -0
  156. sky/dashboard/out/_next/static/css/0748ce22df867032.css +3 -0
  157. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  158. sky/dashboard/out/clusters/[cluster].html +1 -1
  159. sky/dashboard/out/clusters.html +1 -1
  160. sky/dashboard/out/config.html +1 -0
  161. sky/dashboard/out/index.html +1 -1
  162. sky/dashboard/out/infra/[context].html +1 -0
  163. sky/dashboard/out/infra.html +1 -0
  164. sky/dashboard/out/jobs/[job].html +1 -1
  165. sky/dashboard/out/jobs/pools/[pool].html +1 -0
  166. sky/dashboard/out/jobs.html +1 -1
  167. sky/dashboard/out/users.html +1 -0
  168. sky/dashboard/out/volumes.html +1 -0
  169. sky/dashboard/out/workspace/new.html +1 -0
  170. sky/dashboard/out/workspaces/[name].html +1 -0
  171. sky/dashboard/out/workspaces.html +1 -0
  172. sky/data/data_utils.py +137 -1
  173. sky/data/mounting_utils.py +269 -84
  174. sky/data/storage.py +1460 -1807
  175. sky/data/storage_utils.py +43 -57
  176. sky/exceptions.py +126 -2
  177. sky/execution.py +216 -63
  178. sky/global_user_state.py +2390 -586
  179. sky/jobs/__init__.py +7 -0
  180. sky/jobs/client/sdk.py +300 -58
  181. sky/jobs/client/sdk_async.py +161 -0
  182. sky/jobs/constants.py +15 -8
  183. sky/jobs/controller.py +848 -275
  184. sky/jobs/file_content_utils.py +128 -0
  185. sky/jobs/log_gc.py +193 -0
  186. sky/jobs/recovery_strategy.py +402 -152
  187. sky/jobs/scheduler.py +314 -189
  188. sky/jobs/server/core.py +836 -255
  189. sky/jobs/server/server.py +156 -115
  190. sky/jobs/server/utils.py +136 -0
  191. sky/jobs/state.py +2109 -706
  192. sky/jobs/utils.py +1306 -215
  193. sky/logs/__init__.py +21 -0
  194. sky/logs/agent.py +108 -0
  195. sky/logs/aws.py +243 -0
  196. sky/logs/gcp.py +91 -0
  197. sky/metrics/__init__.py +0 -0
  198. sky/metrics/utils.py +453 -0
  199. sky/models.py +78 -1
  200. sky/optimizer.py +164 -70
  201. sky/provision/__init__.py +90 -4
  202. sky/provision/aws/config.py +147 -26
  203. sky/provision/aws/instance.py +136 -50
  204. sky/provision/azure/instance.py +11 -6
  205. sky/provision/common.py +13 -1
  206. sky/provision/cudo/cudo_machine_type.py +1 -1
  207. sky/provision/cudo/cudo_utils.py +14 -8
  208. sky/provision/cudo/cudo_wrapper.py +72 -71
  209. sky/provision/cudo/instance.py +10 -6
  210. sky/provision/do/instance.py +10 -6
  211. sky/provision/do/utils.py +4 -3
  212. sky/provision/docker_utils.py +140 -33
  213. sky/provision/fluidstack/instance.py +13 -8
  214. sky/provision/gcp/__init__.py +1 -0
  215. sky/provision/gcp/config.py +301 -19
  216. sky/provision/gcp/constants.py +218 -0
  217. sky/provision/gcp/instance.py +36 -8
  218. sky/provision/gcp/instance_utils.py +18 -4
  219. sky/provision/gcp/volume_utils.py +247 -0
  220. sky/provision/hyperbolic/__init__.py +12 -0
  221. sky/provision/hyperbolic/config.py +10 -0
  222. sky/provision/hyperbolic/instance.py +437 -0
  223. sky/provision/hyperbolic/utils.py +373 -0
  224. sky/provision/instance_setup.py +101 -20
  225. sky/provision/kubernetes/__init__.py +5 -0
  226. sky/provision/kubernetes/config.py +9 -52
  227. sky/provision/kubernetes/constants.py +17 -0
  228. sky/provision/kubernetes/instance.py +919 -280
  229. sky/provision/kubernetes/manifests/fusermount-server-daemonset.yaml +1 -2
  230. sky/provision/kubernetes/network.py +27 -17
  231. sky/provision/kubernetes/network_utils.py +44 -43
  232. sky/provision/kubernetes/utils.py +1221 -534
  233. sky/provision/kubernetes/volume.py +343 -0
  234. sky/provision/lambda_cloud/instance.py +22 -16
  235. sky/provision/nebius/constants.py +50 -0
  236. sky/provision/nebius/instance.py +19 -6
  237. sky/provision/nebius/utils.py +237 -137
  238. sky/provision/oci/instance.py +10 -5
  239. sky/provision/paperspace/instance.py +10 -7
  240. sky/provision/paperspace/utils.py +1 -1
  241. sky/provision/primeintellect/__init__.py +10 -0
  242. sky/provision/primeintellect/config.py +11 -0
  243. sky/provision/primeintellect/instance.py +454 -0
  244. sky/provision/primeintellect/utils.py +398 -0
  245. sky/provision/provisioner.py +117 -36
  246. sky/provision/runpod/__init__.py +5 -0
  247. sky/provision/runpod/instance.py +27 -6
  248. sky/provision/runpod/utils.py +51 -18
  249. sky/provision/runpod/volume.py +214 -0
  250. sky/provision/scp/__init__.py +15 -0
  251. sky/provision/scp/config.py +93 -0
  252. sky/provision/scp/instance.py +707 -0
  253. sky/provision/seeweb/__init__.py +11 -0
  254. sky/provision/seeweb/config.py +13 -0
  255. sky/provision/seeweb/instance.py +812 -0
  256. sky/provision/shadeform/__init__.py +11 -0
  257. sky/provision/shadeform/config.py +12 -0
  258. sky/provision/shadeform/instance.py +351 -0
  259. sky/provision/shadeform/shadeform_utils.py +83 -0
  260. sky/provision/ssh/__init__.py +18 -0
  261. sky/provision/vast/instance.py +13 -8
  262. sky/provision/vast/utils.py +10 -7
  263. sky/provision/volume.py +164 -0
  264. sky/provision/vsphere/common/ssl_helper.py +1 -1
  265. sky/provision/vsphere/common/vapiconnect.py +2 -1
  266. sky/provision/vsphere/common/vim_utils.py +4 -4
  267. sky/provision/vsphere/instance.py +15 -10
  268. sky/provision/vsphere/vsphere_utils.py +17 -20
  269. sky/py.typed +0 -0
  270. sky/resources.py +845 -119
  271. sky/schemas/__init__.py +0 -0
  272. sky/schemas/api/__init__.py +0 -0
  273. sky/schemas/api/responses.py +227 -0
  274. sky/schemas/db/README +4 -0
  275. sky/schemas/db/env.py +90 -0
  276. sky/schemas/db/global_user_state/001_initial_schema.py +124 -0
  277. sky/schemas/db/global_user_state/002_add_workspace_to_cluster_history.py +35 -0
  278. sky/schemas/db/global_user_state/003_fix_initial_revision.py +61 -0
  279. sky/schemas/db/global_user_state/004_is_managed.py +34 -0
  280. sky/schemas/db/global_user_state/005_cluster_event.py +32 -0
  281. sky/schemas/db/global_user_state/006_provision_log.py +41 -0
  282. sky/schemas/db/global_user_state/007_cluster_event_request_id.py +34 -0
  283. sky/schemas/db/global_user_state/008_skylet_ssh_tunnel_metadata.py +34 -0
  284. sky/schemas/db/global_user_state/009_last_activity_and_launched_at.py +89 -0
  285. sky/schemas/db/global_user_state/010_save_ssh_key.py +66 -0
  286. sky/schemas/db/global_user_state/011_is_ephemeral.py +34 -0
  287. sky/schemas/db/kv_cache/001_initial_schema.py +29 -0
  288. sky/schemas/db/script.py.mako +28 -0
  289. sky/schemas/db/serve_state/001_initial_schema.py +67 -0
  290. sky/schemas/db/serve_state/002_yaml_content.py +34 -0
  291. sky/schemas/db/skypilot_config/001_initial_schema.py +30 -0
  292. sky/schemas/db/spot_jobs/001_initial_schema.py +97 -0
  293. sky/schemas/db/spot_jobs/002_cluster_pool.py +42 -0
  294. sky/schemas/db/spot_jobs/003_pool_hash.py +34 -0
  295. sky/schemas/db/spot_jobs/004_job_file_contents.py +42 -0
  296. sky/schemas/db/spot_jobs/005_logs_gc.py +38 -0
  297. sky/schemas/db/spot_jobs/006_controller_pid_started_at.py +34 -0
  298. sky/schemas/db/spot_jobs/007_config_file_content.py +34 -0
  299. sky/schemas/generated/__init__.py +0 -0
  300. sky/schemas/generated/autostopv1_pb2.py +36 -0
  301. sky/schemas/generated/autostopv1_pb2.pyi +43 -0
  302. sky/schemas/generated/autostopv1_pb2_grpc.py +146 -0
  303. sky/schemas/generated/jobsv1_pb2.py +86 -0
  304. sky/schemas/generated/jobsv1_pb2.pyi +254 -0
  305. sky/schemas/generated/jobsv1_pb2_grpc.py +542 -0
  306. sky/schemas/generated/managed_jobsv1_pb2.py +76 -0
  307. sky/schemas/generated/managed_jobsv1_pb2.pyi +278 -0
  308. sky/schemas/generated/managed_jobsv1_pb2_grpc.py +278 -0
  309. sky/schemas/generated/servev1_pb2.py +58 -0
  310. sky/schemas/generated/servev1_pb2.pyi +115 -0
  311. sky/schemas/generated/servev1_pb2_grpc.py +322 -0
  312. sky/serve/autoscalers.py +357 -5
  313. sky/serve/client/impl.py +310 -0
  314. sky/serve/client/sdk.py +47 -139
  315. sky/serve/client/sdk_async.py +130 -0
  316. sky/serve/constants.py +12 -9
  317. sky/serve/controller.py +68 -17
  318. sky/serve/load_balancer.py +106 -60
  319. sky/serve/load_balancing_policies.py +116 -2
  320. sky/serve/replica_managers.py +434 -249
  321. sky/serve/serve_rpc_utils.py +179 -0
  322. sky/serve/serve_state.py +569 -257
  323. sky/serve/serve_utils.py +775 -265
  324. sky/serve/server/core.py +66 -711
  325. sky/serve/server/impl.py +1093 -0
  326. sky/serve/server/server.py +21 -18
  327. sky/serve/service.py +192 -89
  328. sky/serve/service_spec.py +144 -20
  329. sky/serve/spot_placer.py +3 -0
  330. sky/server/auth/__init__.py +0 -0
  331. sky/server/auth/authn.py +50 -0
  332. sky/server/auth/loopback.py +38 -0
  333. sky/server/auth/oauth2_proxy.py +202 -0
  334. sky/server/common.py +478 -182
  335. sky/server/config.py +85 -23
  336. sky/server/constants.py +44 -6
  337. sky/server/daemons.py +295 -0
  338. sky/server/html/token_page.html +185 -0
  339. sky/server/metrics.py +160 -0
  340. sky/server/middleware_utils.py +166 -0
  341. sky/server/requests/executor.py +558 -138
  342. sky/server/requests/payloads.py +364 -24
  343. sky/server/requests/preconditions.py +21 -17
  344. sky/server/requests/process.py +112 -29
  345. sky/server/requests/request_names.py +121 -0
  346. sky/server/requests/requests.py +822 -226
  347. sky/server/requests/serializers/decoders.py +82 -31
  348. sky/server/requests/serializers/encoders.py +140 -22
  349. sky/server/requests/threads.py +117 -0
  350. sky/server/rest.py +455 -0
  351. sky/server/server.py +1309 -285
  352. sky/server/state.py +20 -0
  353. sky/server/stream_utils.py +327 -61
  354. sky/server/uvicorn.py +217 -3
  355. sky/server/versions.py +270 -0
  356. sky/setup_files/MANIFEST.in +11 -1
  357. sky/setup_files/alembic.ini +160 -0
  358. sky/setup_files/dependencies.py +139 -31
  359. sky/setup_files/setup.py +44 -42
  360. sky/sky_logging.py +114 -7
  361. sky/skylet/attempt_skylet.py +106 -24
  362. sky/skylet/autostop_lib.py +129 -8
  363. sky/skylet/configs.py +29 -20
  364. sky/skylet/constants.py +216 -25
  365. sky/skylet/events.py +101 -21
  366. sky/skylet/job_lib.py +345 -164
  367. sky/skylet/log_lib.py +297 -18
  368. sky/skylet/log_lib.pyi +44 -1
  369. sky/skylet/providers/ibm/node_provider.py +12 -8
  370. sky/skylet/providers/ibm/vpc_provider.py +13 -12
  371. sky/skylet/ray_patches/__init__.py +17 -3
  372. sky/skylet/ray_patches/autoscaler.py.diff +18 -0
  373. sky/skylet/ray_patches/cli.py.diff +19 -0
  374. sky/skylet/ray_patches/command_runner.py.diff +17 -0
  375. sky/skylet/ray_patches/log_monitor.py.diff +20 -0
  376. sky/skylet/ray_patches/resource_demand_scheduler.py.diff +32 -0
  377. sky/skylet/ray_patches/updater.py.diff +18 -0
  378. sky/skylet/ray_patches/worker.py.diff +41 -0
  379. sky/skylet/runtime_utils.py +21 -0
  380. sky/skylet/services.py +568 -0
  381. sky/skylet/skylet.py +72 -4
  382. sky/skylet/subprocess_daemon.py +104 -29
  383. sky/skypilot_config.py +506 -99
  384. sky/ssh_node_pools/__init__.py +1 -0
  385. sky/ssh_node_pools/core.py +135 -0
  386. sky/ssh_node_pools/server.py +233 -0
  387. sky/task.py +685 -163
  388. sky/templates/aws-ray.yml.j2 +11 -3
  389. sky/templates/azure-ray.yml.j2 +2 -1
  390. sky/templates/cudo-ray.yml.j2 +1 -0
  391. sky/templates/do-ray.yml.j2 +2 -1
  392. sky/templates/fluidstack-ray.yml.j2 +1 -0
  393. sky/templates/gcp-ray.yml.j2 +62 -1
  394. sky/templates/hyperbolic-ray.yml.j2 +68 -0
  395. sky/templates/ibm-ray.yml.j2 +2 -1
  396. sky/templates/jobs-controller.yaml.j2 +27 -24
  397. sky/templates/kubernetes-loadbalancer.yml.j2 +2 -0
  398. sky/templates/kubernetes-ray.yml.j2 +611 -50
  399. sky/templates/lambda-ray.yml.j2 +2 -1
  400. sky/templates/nebius-ray.yml.j2 +34 -12
  401. sky/templates/oci-ray.yml.j2 +1 -0
  402. sky/templates/paperspace-ray.yml.j2 +2 -1
  403. sky/templates/primeintellect-ray.yml.j2 +72 -0
  404. sky/templates/runpod-ray.yml.j2 +10 -1
  405. sky/templates/scp-ray.yml.j2 +4 -50
  406. sky/templates/seeweb-ray.yml.j2 +171 -0
  407. sky/templates/shadeform-ray.yml.j2 +73 -0
  408. sky/templates/sky-serve-controller.yaml.j2 +22 -2
  409. sky/templates/vast-ray.yml.j2 +1 -0
  410. sky/templates/vsphere-ray.yml.j2 +1 -0
  411. sky/templates/websocket_proxy.py +212 -37
  412. sky/usage/usage_lib.py +31 -15
  413. sky/users/__init__.py +0 -0
  414. sky/users/model.conf +15 -0
  415. sky/users/permission.py +397 -0
  416. sky/users/rbac.py +121 -0
  417. sky/users/server.py +720 -0
  418. sky/users/token_service.py +218 -0
  419. sky/utils/accelerator_registry.py +35 -5
  420. sky/utils/admin_policy_utils.py +84 -38
  421. sky/utils/annotations.py +38 -5
  422. sky/utils/asyncio_utils.py +78 -0
  423. sky/utils/atomic.py +1 -1
  424. sky/utils/auth_utils.py +153 -0
  425. sky/utils/benchmark_utils.py +60 -0
  426. sky/utils/cli_utils/status_utils.py +159 -86
  427. sky/utils/cluster_utils.py +31 -9
  428. sky/utils/command_runner.py +354 -68
  429. sky/utils/command_runner.pyi +93 -3
  430. sky/utils/common.py +35 -8
  431. sky/utils/common_utils.py +314 -91
  432. sky/utils/config_utils.py +74 -5
  433. sky/utils/context.py +403 -0
  434. sky/utils/context_utils.py +242 -0
  435. sky/utils/controller_utils.py +383 -89
  436. sky/utils/dag_utils.py +31 -12
  437. sky/utils/db/__init__.py +0 -0
  438. sky/utils/db/db_utils.py +485 -0
  439. sky/utils/db/kv_cache.py +149 -0
  440. sky/utils/db/migration_utils.py +137 -0
  441. sky/utils/directory_utils.py +12 -0
  442. sky/utils/env_options.py +13 -0
  443. sky/utils/git.py +567 -0
  444. sky/utils/git_clone.sh +460 -0
  445. sky/utils/infra_utils.py +195 -0
  446. sky/utils/kubernetes/cleanup-tunnel.sh +62 -0
  447. sky/utils/kubernetes/config_map_utils.py +133 -0
  448. sky/utils/kubernetes/create_cluster.sh +15 -29
  449. sky/utils/kubernetes/delete_cluster.sh +10 -7
  450. sky/utils/kubernetes/deploy_ssh_node_pools.py +1177 -0
  451. sky/utils/kubernetes/exec_kubeconfig_converter.py +22 -31
  452. sky/utils/kubernetes/generate_kind_config.py +6 -66
  453. sky/utils/kubernetes/generate_kubeconfig.sh +4 -1
  454. sky/utils/kubernetes/gpu_labeler.py +18 -8
  455. sky/utils/kubernetes/k8s_gpu_labeler_job.yaml +2 -1
  456. sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml +16 -16
  457. sky/utils/kubernetes/kubernetes_deploy_utils.py +284 -114
  458. sky/utils/kubernetes/rsync_helper.sh +11 -3
  459. sky/utils/kubernetes/ssh-tunnel.sh +379 -0
  460. sky/utils/kubernetes/ssh_utils.py +221 -0
  461. sky/utils/kubernetes_enums.py +8 -15
  462. sky/utils/lock_events.py +94 -0
  463. sky/utils/locks.py +416 -0
  464. sky/utils/log_utils.py +82 -107
  465. sky/utils/perf_utils.py +22 -0
  466. sky/utils/resource_checker.py +298 -0
  467. sky/utils/resources_utils.py +249 -32
  468. sky/utils/rich_utils.py +217 -39
  469. sky/utils/schemas.py +955 -160
  470. sky/utils/serialize_utils.py +16 -0
  471. sky/utils/status_lib.py +10 -0
  472. sky/utils/subprocess_utils.py +29 -15
  473. sky/utils/tempstore.py +70 -0
  474. sky/utils/thread_utils.py +91 -0
  475. sky/utils/timeline.py +26 -53
  476. sky/utils/ux_utils.py +84 -15
  477. sky/utils/validator.py +11 -1
  478. sky/utils/volume.py +165 -0
  479. sky/utils/yaml_utils.py +111 -0
  480. sky/volumes/__init__.py +13 -0
  481. sky/volumes/client/__init__.py +0 -0
  482. sky/volumes/client/sdk.py +150 -0
  483. sky/volumes/server/__init__.py +0 -0
  484. sky/volumes/server/core.py +270 -0
  485. sky/volumes/server/server.py +124 -0
  486. sky/volumes/volume.py +215 -0
  487. sky/workspaces/__init__.py +0 -0
  488. sky/workspaces/core.py +655 -0
  489. sky/workspaces/server.py +101 -0
  490. sky/workspaces/utils.py +56 -0
  491. sky_templates/README.md +3 -0
  492. sky_templates/__init__.py +3 -0
  493. sky_templates/ray/__init__.py +0 -0
  494. sky_templates/ray/start_cluster +183 -0
  495. sky_templates/ray/stop_cluster +75 -0
  496. skypilot_nightly-1.0.0.dev20251203.dist-info/METADATA +676 -0
  497. skypilot_nightly-1.0.0.dev20251203.dist-info/RECORD +611 -0
  498. {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/WHEEL +1 -1
  499. skypilot_nightly-1.0.0.dev20251203.dist-info/top_level.txt +2 -0
  500. sky/benchmark/benchmark_state.py +0 -256
  501. sky/benchmark/benchmark_utils.py +0 -641
  502. sky/clouds/service_catalog/constants.py +0 -7
  503. sky/dashboard/out/_next/static/GWvVBSCS7FmUiVmjaL1a7/_buildManifest.js +0 -1
  504. sky/dashboard/out/_next/static/chunks/236-2db3ee3fba33dd9e.js +0 -6
  505. sky/dashboard/out/_next/static/chunks/312-c3c8845990db8ffc.js +0 -15
  506. sky/dashboard/out/_next/static/chunks/37-0a572fe0dbb89c4d.js +0 -6
  507. sky/dashboard/out/_next/static/chunks/678-206dddca808e6d16.js +0 -59
  508. sky/dashboard/out/_next/static/chunks/845-9e60713e0c441abc.js +0 -1
  509. sky/dashboard/out/_next/static/chunks/979-7bf73a4c7cea0f5c.js +0 -1
  510. sky/dashboard/out/_next/static/chunks/fd9d1056-2821b0f0cabcd8bd.js +0 -1
  511. sky/dashboard/out/_next/static/chunks/framework-87d061ee6ed71b28.js +0 -33
  512. sky/dashboard/out/_next/static/chunks/main-app-241eb28595532291.js +0 -1
  513. sky/dashboard/out/_next/static/chunks/main-e0e2335212e72357.js +0 -1
  514. sky/dashboard/out/_next/static/chunks/pages/_app-e6b013bc3f77ad60.js +0 -1
  515. sky/dashboard/out/_next/static/chunks/pages/_error-1be831200e60c5c0.js +0 -1
  516. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-6ac338bc2239cb45.js +0 -1
  517. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-f383db7389368ea7.js +0 -1
  518. sky/dashboard/out/_next/static/chunks/pages/clusters-a93b93e10b8b074e.js +0 -1
  519. sky/dashboard/out/_next/static/chunks/pages/index-f9f039532ca8cbc4.js +0 -1
  520. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-1c519e1afc523dc9.js +0 -1
  521. sky/dashboard/out/_next/static/chunks/pages/jobs-a75029b67aab6a2e.js +0 -1
  522. sky/dashboard/out/_next/static/chunks/webpack-830f59b8404e96b8.js +0 -1
  523. sky/dashboard/out/_next/static/css/c6933bbb2ce7f4dd.css +0 -3
  524. sky/jobs/dashboard/dashboard.py +0 -223
  525. sky/jobs/dashboard/static/favicon.ico +0 -0
  526. sky/jobs/dashboard/templates/index.html +0 -831
  527. sky/jobs/server/dashboard_utils.py +0 -69
  528. sky/skylet/providers/scp/__init__.py +0 -2
  529. sky/skylet/providers/scp/config.py +0 -149
  530. sky/skylet/providers/scp/node_provider.py +0 -578
  531. sky/templates/kubernetes-ssh-jump.yml.j2 +0 -94
  532. sky/utils/db_utils.py +0 -100
  533. sky/utils/kubernetes/deploy_remote_cluster.sh +0 -308
  534. sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +0 -191
  535. skypilot_nightly-1.0.0.dev20250502.dist-info/METADATA +0 -361
  536. skypilot_nightly-1.0.0.dev20250502.dist-info/RECORD +0 -396
  537. skypilot_nightly-1.0.0.dev20250502.dist-info/top_level.txt +0 -1
  538. /sky/{clouds/service_catalog → catalog}/config.py +0 -0
  539. /sky/{benchmark → catalog/data_fetchers}/__init__.py +0 -0
  540. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_azure.py +0 -0
  541. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_fluidstack.py +0 -0
  542. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_ibm.py +0 -0
  543. /sky/{clouds/service_catalog/data_fetchers → client/cli}/__init__.py +0 -0
  544. /sky/dashboard/out/_next/static/{GWvVBSCS7FmUiVmjaL1a7 → 96_E2yl3QAiIJGOYCkSpB}/_ssgManifest.js +0 -0
  545. {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/entry_points.txt +0 -0
  546. {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/licenses/LICENSE +0 -0
sky/serve/serve_state.py CHANGED
@@ -1,89 +1,163 @@
1
1
  """The database for services information."""
2
2
  import collections
3
3
  import enum
4
+ import functools
4
5
  import json
5
- import pathlib
6
6
  import pickle
7
- import sqlite3
7
+ import threading
8
8
  import typing
9
- from typing import Any, Dict, List, Optional, Tuple
9
+ from typing import Any, Dict, List, Optional
10
+ import uuid
10
11
 
11
12
  import colorama
13
+ import sqlalchemy
14
+ from sqlalchemy import exc as sqlalchemy_exc
15
+ from sqlalchemy import orm
16
+ from sqlalchemy.dialects import postgresql
17
+ from sqlalchemy.dialects import sqlite
18
+ from sqlalchemy.ext import declarative
12
19
 
13
20
  from sky.serve import constants
14
- from sky.utils import db_utils
21
+ from sky.utils import common_utils
22
+ from sky.utils.db import db_utils
23
+ from sky.utils.db import migration_utils
15
24
 
16
25
  if typing.TYPE_CHECKING:
26
+ from sqlalchemy.engine import row
27
+
17
28
  from sky.serve import replica_managers
18
29
  from sky.serve import service_spec
19
30
 
31
+ _SQLALCHEMY_ENGINE: Optional[sqlalchemy.engine.Engine] = None
32
+ _SQLALCHEMY_ENGINE_LOCK = threading.Lock()
33
+
34
+ Base = declarative.declarative_base()
35
+
36
+ # === Database schema ===
37
+ services_table = sqlalchemy.Table(
38
+ 'services',
39
+ Base.metadata,
40
+ sqlalchemy.Column('name', sqlalchemy.Text, primary_key=True),
41
+ sqlalchemy.Column('controller_job_id',
42
+ sqlalchemy.Integer,
43
+ server_default=None),
44
+ sqlalchemy.Column('controller_port',
45
+ sqlalchemy.Integer,
46
+ server_default=None),
47
+ sqlalchemy.Column('load_balancer_port',
48
+ sqlalchemy.Integer,
49
+ server_default=None),
50
+ sqlalchemy.Column('status', sqlalchemy.Text),
51
+ sqlalchemy.Column('uptime', sqlalchemy.Integer, server_default=None),
52
+ sqlalchemy.Column('policy', sqlalchemy.Text, server_default=None),
53
+ sqlalchemy.Column('auto_restart', sqlalchemy.Integer, server_default=None),
54
+ sqlalchemy.Column('requested_resources',
55
+ sqlalchemy.LargeBinary,
56
+ server_default=None),
57
+ sqlalchemy.Column('requested_resources_str', sqlalchemy.Text),
58
+ sqlalchemy.Column('current_version',
59
+ sqlalchemy.Integer,
60
+ server_default=str(constants.INITIAL_VERSION)),
61
+ sqlalchemy.Column('active_versions',
62
+ sqlalchemy.Text,
63
+ server_default=json.dumps([])),
64
+ sqlalchemy.Column('load_balancing_policy',
65
+ sqlalchemy.Text,
66
+ server_default=None),
67
+ sqlalchemy.Column('tls_encrypted', sqlalchemy.Integer, server_default='0'),
68
+ sqlalchemy.Column('pool', sqlalchemy.Integer, server_default='0'),
69
+ sqlalchemy.Column('controller_pid', sqlalchemy.Integer,
70
+ server_default=None),
71
+ sqlalchemy.Column('hash', sqlalchemy.Text, server_default=None),
72
+ sqlalchemy.Column('entrypoint', sqlalchemy.Text, server_default=None),
73
+ )
74
+
75
+ replicas_table = sqlalchemy.Table(
76
+ 'replicas',
77
+ Base.metadata,
78
+ sqlalchemy.Column('service_name', sqlalchemy.Text, primary_key=True),
79
+ sqlalchemy.Column('replica_id', sqlalchemy.Integer, primary_key=True),
80
+ sqlalchemy.Column('replica_info', sqlalchemy.LargeBinary),
81
+ )
82
+
83
+ version_specs_table = sqlalchemy.Table(
84
+ 'version_specs',
85
+ Base.metadata,
86
+ sqlalchemy.Column('service_name', sqlalchemy.Text, primary_key=True),
87
+ sqlalchemy.Column('version', sqlalchemy.Integer, primary_key=True),
88
+ sqlalchemy.Column('spec', sqlalchemy.LargeBinary),
89
+ sqlalchemy.Column('yaml_content', sqlalchemy.Text, server_default=None),
90
+ )
91
+
92
+ serve_ha_recovery_script_table = sqlalchemy.Table(
93
+ 'serve_ha_recovery_script',
94
+ Base.metadata,
95
+ sqlalchemy.Column('service_name', sqlalchemy.Text, primary_key=True),
96
+ sqlalchemy.Column('script', sqlalchemy.Text),
97
+ )
98
+
99
+
100
+ def create_table(engine: sqlalchemy.engine.Engine):
101
+ """Creates the service and replica tables if they do not exist."""
20
102
 
21
- def _get_db_path() -> str:
22
- """Workaround to collapse multi-step Path ops for type checker.
23
- Ensures _DB_PATH is str, avoiding Union[Path, str] inference.
24
- """
25
- path = pathlib.Path(constants.SKYSERVE_METADATA_DIR) / 'services.db'
26
- path = path.expanduser().absolute()
27
- path.parents[0].mkdir(parents=True, exist_ok=True)
28
- return str(path)
103
+ # Enable WAL mode to avoid locking issues.
104
+ # See: issue #3863, #1441 and PR #1509
105
+ # https://github.com/microsoft/WSL/issues/2395
106
+ # TODO(romilb): We do not enable WAL for WSL because of known issue in WSL.
107
+ # This may cause the database locked problem from WSL issue #1441.
108
+ if (engine.dialect.name == db_utils.SQLAlchemyDialect.SQLITE.value and
109
+ not common_utils.is_wsl()):
110
+ try:
111
+ with orm.Session(engine) as session:
112
+ session.execute(sqlalchemy.text('PRAGMA journal_mode=WAL'))
113
+ session.commit()
114
+ except sqlalchemy_exc.OperationalError as e:
115
+ if 'database is locked' not in str(e):
116
+ raise
117
+ # If the database is locked, it is OK to continue, as the WAL mode
118
+ # is not critical and is likely to be enabled by other processes.
29
119
 
120
+ migration_utils.safe_alembic_upgrade(engine, migration_utils.SERVE_DB_NAME,
121
+ migration_utils.SERVE_VERSION)
30
122
 
31
- _DB_PATH: str = _get_db_path()
32
123
 
124
+ def initialize_and_get_db() -> sqlalchemy.engine.Engine:
125
+ global _SQLALCHEMY_ENGINE
33
126
 
34
- def create_table(cursor: 'sqlite3.Cursor', conn: 'sqlite3.Connection') -> None:
35
- """Creates the service and replica tables if they do not exist."""
127
+ if _SQLALCHEMY_ENGINE is not None:
128
+ return _SQLALCHEMY_ENGINE
129
+
130
+ with _SQLALCHEMY_ENGINE_LOCK:
131
+ if _SQLALCHEMY_ENGINE is not None:
132
+ return _SQLALCHEMY_ENGINE
133
+ # get an engine to the db
134
+ engine = db_utils.get_engine('serve/services')
135
+
136
+ # run migrations if needed
137
+ create_table(engine)
138
+
139
+ # return engine
140
+ _SQLALCHEMY_ENGINE = engine
141
+ return _SQLALCHEMY_ENGINE
36
142
 
37
- # auto_restart and requested_resources column is deprecated.
38
- cursor.execute("""\
39
- CREATE TABLE IF NOT EXISTS services (
40
- name TEXT PRIMARY KEY,
41
- controller_job_id INTEGER DEFAULT NULL,
42
- controller_port INTEGER DEFAULT NULL,
43
- load_balancer_port INTEGER DEFAULT NULL,
44
- status TEXT,
45
- uptime INTEGER DEFAULT NULL,
46
- policy TEXT DEFAULT NULL,
47
- auto_restart INTEGER DEFAULT NULL,
48
- requested_resources BLOB DEFAULT NULL)""")
49
- cursor.execute("""\
50
- CREATE TABLE IF NOT EXISTS replicas (
51
- service_name TEXT,
52
- replica_id INTEGER,
53
- replica_info BLOB,
54
- PRIMARY KEY (service_name, replica_id))""")
55
- cursor.execute("""\
56
- CREATE TABLE IF NOT EXISTS version_specs (
57
- version INTEGER,
58
- service_name TEXT,
59
- spec BLOB,
60
- PRIMARY KEY (service_name, version))""")
61
- conn.commit()
62
-
63
- # Backward compatibility.
64
- db_utils.add_column_to_table(cursor, conn, 'services',
65
- 'requested_resources_str', 'TEXT')
66
- # Deprecated: switched to `active_versions` below for the version
67
- # considered active by the load balancer. The
68
- # authscaler/replica_manager version can be found in the
69
- # version_specs table.
70
- db_utils.add_column_to_table(
71
- cursor, conn, 'services', 'current_version',
72
- f'INTEGER DEFAULT {constants.INITIAL_VERSION}')
73
- # The versions that is activated for the service. This is a list
74
- # of integers in json format.
75
- db_utils.add_column_to_table(cursor, conn, 'services', 'active_versions',
76
- f'TEXT DEFAULT {json.dumps([])!r}')
77
- db_utils.add_column_to_table(cursor, conn, 'services',
78
- 'load_balancing_policy', 'TEXT DEFAULT NULL')
79
- # Whether the service's load balancer is encrypted with TLS.
80
- db_utils.add_column_to_table(cursor, conn, 'services', 'tls_encrypted',
81
- 'INTEGER DEFAULT 0')
82
- conn.commit()
83
-
84
-
85
- db_utils.SQLiteConn(_DB_PATH, create_table)
86
- _UNIQUE_CONSTRAINT_FAILED_ERROR_MSG = 'UNIQUE constraint failed: services.name'
143
+
144
+ def init_db(func):
145
+ """Initialize the database."""
146
+
147
+ @functools.wraps(func)
148
+ def wrapper(*args, **kwargs):
149
+ initialize_and_get_db()
150
+ return func(*args, **kwargs)
151
+
152
+ return wrapper
153
+
154
+
155
+ _UNIQUE_CONSTRAINT_FAILED_ERROR_MSGS = [
156
+ # sqlite
157
+ 'UNIQUE constraint failed: services.name',
158
+ # postgres
159
+ 'duplicate key value violates unique constraint "services_pkey"',
160
+ ]
87
161
 
88
162
 
89
163
  # === Statuses ===
@@ -247,153 +321,261 @@ _SERVICE_STATUS_TO_COLOR = {
247
321
  }
248
322
 
249
323
 
324
+ @init_db
250
325
  def add_service(name: str, controller_job_id: int, policy: str,
251
326
  requested_resources_str: str, load_balancing_policy: str,
252
- status: ServiceStatus, tls_encrypted: bool) -> bool:
327
+ status: ServiceStatus, tls_encrypted: bool, pool: bool,
328
+ controller_pid: int, entrypoint: str) -> bool:
253
329
  """Add a service in the database.
254
330
 
255
331
  Returns:
256
332
  True if the service is added successfully, False if the service already
257
333
  exists.
258
334
  """
335
+ assert _SQLALCHEMY_ENGINE is not None
259
336
  try:
260
- with db_utils.safe_cursor(_DB_PATH) as cursor:
261
- cursor.execute(
262
- """\
263
- INSERT INTO services
264
- (name, controller_job_id, status, policy,
265
- requested_resources_str, load_balancing_policy, tls_encrypted)
266
- VALUES (?, ?, ?, ?, ?, ?, ?)""",
267
- (name, controller_job_id, status.value, policy,
268
- requested_resources_str, load_balancing_policy,
269
- int(tls_encrypted)))
270
-
271
- except sqlite3.IntegrityError as e:
272
- if str(e) != _UNIQUE_CONSTRAINT_FAILED_ERROR_MSG:
273
- raise RuntimeError('Unexpected database error') from e
274
- return False
337
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
338
+ if (_SQLALCHEMY_ENGINE.dialect.name ==
339
+ db_utils.SQLAlchemyDialect.SQLITE.value):
340
+ insert_func = sqlite.insert
341
+ elif (_SQLALCHEMY_ENGINE.dialect.name ==
342
+ db_utils.SQLAlchemyDialect.POSTGRESQL.value):
343
+ insert_func = postgresql.insert
344
+ else:
345
+ raise ValueError('Unsupported database dialect')
346
+
347
+ insert_stmt = insert_func(services_table).values(
348
+ name=name,
349
+ controller_job_id=controller_job_id,
350
+ status=status.value,
351
+ policy=policy,
352
+ requested_resources_str=requested_resources_str,
353
+ load_balancing_policy=load_balancing_policy,
354
+ tls_encrypted=int(tls_encrypted),
355
+ pool=int(pool),
356
+ controller_pid=controller_pid,
357
+ hash=str(uuid.uuid4()),
358
+ entrypoint=entrypoint)
359
+ session.execute(insert_stmt)
360
+ session.commit()
361
+
362
+ except sqlalchemy_exc.IntegrityError as e:
363
+ for msg in _UNIQUE_CONSTRAINT_FAILED_ERROR_MSGS:
364
+ if msg in str(e):
365
+ return False
366
+ raise RuntimeError('Unexpected database error') from e
275
367
  return True
276
368
 
277
369
 
370
+ @init_db
371
+ def update_service_controller_pid(service_name: str,
372
+ controller_pid: int) -> None:
373
+ """Updates the controller pid of a service.
374
+
375
+ This is used to update the controller pid of a service on ha recovery.
376
+ """
377
+ assert _SQLALCHEMY_ENGINE is not None
378
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
379
+ session.query(services_table).filter(
380
+ services_table.c.name == service_name).update(
381
+ {services_table.c.controller_pid: controller_pid})
382
+ session.commit()
383
+
384
+
385
+ @init_db
278
386
  def remove_service(service_name: str) -> None:
279
387
  """Removes a service from the database."""
280
- with db_utils.safe_cursor(_DB_PATH) as cursor:
281
- cursor.execute("""\
282
- DELETE FROM services WHERE name=(?)""", (service_name,))
388
+ assert _SQLALCHEMY_ENGINE is not None
389
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
390
+ session.execute(
391
+ sqlalchemy.delete(services_table).where(
392
+ services_table.c.name == service_name))
393
+ session.commit()
283
394
 
284
395
 
396
+ @init_db
285
397
  def set_service_uptime(service_name: str, uptime: int) -> None:
286
398
  """Sets the uptime of a service."""
287
- with db_utils.safe_cursor(_DB_PATH) as cursor:
288
- cursor.execute(
289
- """\
290
- UPDATE services SET
291
- uptime=(?) WHERE name=(?)""", (uptime, service_name))
399
+ assert _SQLALCHEMY_ENGINE is not None
400
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
401
+ session.query(services_table).filter(
402
+ services_table.c.name == service_name).update(
403
+ {services_table.c.uptime: uptime})
404
+ session.commit()
292
405
 
293
406
 
407
+ @init_db
294
408
  def set_service_status_and_active_versions(
295
409
  service_name: str,
296
410
  status: ServiceStatus,
297
411
  active_versions: Optional[List[int]] = None) -> None:
298
412
  """Sets the service status."""
299
- vars_to_set = 'status=(?)'
300
- values: Tuple[str, ...] = (status.value, service_name)
413
+ assert _SQLALCHEMY_ENGINE is not None
414
+ update_dict = {services_table.c.status: status.value}
301
415
  if active_versions is not None:
302
- vars_to_set = 'status=(?), active_versions=(?)'
303
- values = (status.value, json.dumps(active_versions), service_name)
304
- with db_utils.safe_cursor(_DB_PATH) as cursor:
305
- cursor.execute(
306
- f"""\
307
- UPDATE services SET
308
- {vars_to_set} WHERE name=(?)""", values)
416
+ update_dict[services_table.c.active_versions] = json.dumps(
417
+ active_versions)
309
418
 
419
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
420
+ session.query(services_table).filter(
421
+ services_table.c.name == service_name).update(update_dict)
422
+ session.commit()
310
423
 
424
+
425
+ @init_db
311
426
  def set_service_controller_port(service_name: str,
312
427
  controller_port: int) -> None:
313
428
  """Sets the controller port of a service."""
314
- with db_utils.safe_cursor(_DB_PATH) as cursor:
315
- cursor.execute(
316
- """\
317
- UPDATE services SET
318
- controller_port=(?) WHERE name=(?)""",
319
- (controller_port, service_name))
429
+ assert _SQLALCHEMY_ENGINE is not None
430
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
431
+ session.query(services_table).filter(
432
+ services_table.c.name == service_name).update(
433
+ {services_table.c.controller_port: controller_port})
434
+ session.commit()
320
435
 
321
436
 
437
+ @init_db
322
438
  def set_service_load_balancer_port(service_name: str,
323
439
  load_balancer_port: int) -> None:
324
440
  """Sets the load balancer port of a service."""
325
- with db_utils.safe_cursor(_DB_PATH) as cursor:
326
- cursor.execute(
327
- """\
328
- UPDATE services SET
329
- load_balancer_port=(?) WHERE name=(?)""",
330
- (load_balancer_port, service_name))
331
-
332
-
333
- def _get_service_from_row(row) -> Dict[str, Any]:
334
- (current_version, name, controller_job_id, controller_port,
335
- load_balancer_port, status, uptime, policy, _, _, requested_resources_str,
336
- _, active_versions, load_balancing_policy, tls_encrypted) = row[:15]
337
- return {
338
- 'name': name,
339
- 'controller_job_id': controller_job_id,
340
- 'controller_port': controller_port,
341
- 'load_balancer_port': load_balancer_port,
342
- 'status': ServiceStatus[status],
343
- 'uptime': uptime,
344
- 'policy': policy,
441
+ assert _SQLALCHEMY_ENGINE is not None
442
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
443
+ session.query(services_table).filter(
444
+ services_table.c.name == service_name).update(
445
+ {services_table.c.load_balancer_port: load_balancer_port})
446
+ session.commit()
447
+
448
+
449
+ def _get_service_from_row(r: 'row.RowMapping') -> Dict[str, Any]:
450
+ # Get the max_version from the first column (from the subquery)
451
+ current_version = r['max_version']
452
+
453
+ record = {
454
+ 'name': r['name'],
455
+ 'controller_job_id': r['controller_job_id'],
456
+ 'controller_port': r['controller_port'],
457
+ 'load_balancer_port': r['load_balancer_port'],
458
+ 'status': ServiceStatus[r['status']],
459
+ 'uptime': r['uptime'],
460
+ 'policy': r['policy'],
345
461
  # The version of the autoscaler/replica manager are on. It can be larger
346
462
  # than the active versions as the load balancer may not consider the
347
463
  # latest version to be active for serving traffic.
348
464
  'version': current_version,
349
465
  # The versions that is active for the load balancer. This is a list of
350
466
  # integers in json format. This is mainly for display purpose.
351
- 'active_versions': json.loads(active_versions),
352
- 'requested_resources_str': requested_resources_str,
353
- 'load_balancing_policy': load_balancing_policy,
354
- 'tls_encrypted': bool(tls_encrypted),
467
+ 'active_versions': json.loads(r['active_versions'])
468
+ if r['active_versions'] else [],
469
+ 'requested_resources_str': r['requested_resources_str'],
470
+ 'load_balancing_policy': r['load_balancing_policy'],
471
+ 'tls_encrypted': bool(r['tls_encrypted']),
472
+ 'pool': bool(r['pool']),
473
+ 'controller_pid': r['controller_pid'],
474
+ 'hash': r['hash'],
475
+ 'entrypoint': r['entrypoint'],
476
+ 'yaml_content': r.get('yaml_content'),
355
477
  }
478
+ latest_spec = get_spec(r['name'], current_version)
479
+ if latest_spec is not None:
480
+ record['policy'] = latest_spec.autoscaling_policy_str()
481
+ record['load_balancing_policy'] = latest_spec.load_balancing_policy
482
+ return record
483
+
356
484
 
485
+ def _build_services_with_latest_version_query(
486
+ service_name: Optional[str] = None) -> sqlalchemy.sql.Select:
487
+ """Builds a query joining services with their latest version and yaml.
357
488
 
489
+ Args:
490
+ service_name: If provided, filter to this service only.
491
+
492
+ Returns:
493
+ A SQLAlchemy selectable for fetching rows, including columns:
494
+ - max_version (latest version per service)
495
+ - services_table.*
496
+ - yaml_content (from version_specs_table for latest version)
497
+ """
498
+ subquery = sqlalchemy.select(
499
+ version_specs_table.c.service_name,
500
+ sqlalchemy.func.max(version_specs_table.c.version).label('max_version'),
501
+ ).group_by(version_specs_table.c.service_name).alias('v')
502
+
503
+ query = sqlalchemy.select(
504
+ subquery.c.max_version,
505
+ services_table,
506
+ version_specs_table.c.yaml_content,
507
+ ).select_from(
508
+ services_table.join(
509
+ subquery, services_table.c.name == subquery.c.service_name).join(
510
+ version_specs_table,
511
+ sqlalchemy.and_(
512
+ version_specs_table.c.service_name == services_table.c.name,
513
+ version_specs_table.c.version == subquery.c.max_version,
514
+ ),
515
+ ))
516
+ if service_name is not None:
517
+ query = query.where(services_table.c.name == service_name)
518
+ return query
519
+
520
+
521
+ @init_db
358
522
  def get_services() -> List[Dict[str, Any]]:
359
523
  """Get all existing service records."""
360
- with db_utils.safe_cursor(_DB_PATH) as cursor:
361
- rows = cursor.execute('SELECT v.max_version, s.* FROM services s '
362
- 'JOIN ('
363
- 'SELECT service_name, MAX(version) as max_version'
364
- ' FROM version_specs GROUP BY service_name) v '
365
- 'ON s.name=v.service_name').fetchall()
524
+ assert _SQLALCHEMY_ENGINE is not None
525
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
526
+ query = _build_services_with_latest_version_query()
527
+ rows = session.execute(query).fetchall()
366
528
  records = []
367
529
  for row in rows:
368
- records.append(_get_service_from_row(row))
530
+ records.append(_get_service_from_row(row._mapping)) # pylint: disable=protected-access
369
531
  return records
370
532
 
371
533
 
534
+ @init_db
535
+ def get_num_services() -> int:
536
+ """Get the number of services."""
537
+ assert _SQLALCHEMY_ENGINE is not None
538
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
539
+ return session.execute(
540
+ sqlalchemy.select(sqlalchemy.func.count() # pylint: disable=not-callable
541
+ ).select_from(services_table)).fetchone()[0]
542
+
543
+
544
+ @init_db
372
545
  def get_service_from_name(service_name: str) -> Optional[Dict[str, Any]]:
373
546
  """Get all existing service records."""
374
- with db_utils.safe_cursor(_DB_PATH) as cursor:
375
- rows = cursor.execute(
376
- 'SELECT v.max_version, s.* FROM services s '
377
- 'JOIN ('
378
- 'SELECT service_name, MAX(version) as max_version '
379
- 'FROM version_specs WHERE service_name=(?)) v '
380
- 'ON s.name=v.service_name WHERE name=(?)',
381
- (service_name, service_name)).fetchall()
547
+ assert _SQLALCHEMY_ENGINE is not None
548
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
549
+ query = _build_services_with_latest_version_query(service_name)
550
+ rows = session.execute(query).fetchall()
382
551
  for row in rows:
383
- return _get_service_from_row(row)
552
+ return _get_service_from_row(row._mapping) # pylint: disable=protected-access
384
553
  return None
385
554
 
386
555
 
556
+ @init_db
557
+ def get_service_hash(service_name: str) -> Optional[str]:
558
+ """Get the hash of a service."""
559
+ assert _SQLALCHEMY_ENGINE is not None
560
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
561
+ result = session.execute(
562
+ sqlalchemy.select(services_table.c.hash).where(
563
+ services_table.c.name == service_name)).fetchone()
564
+ return result[0] if result else None
565
+
566
+
567
+ @init_db
387
568
  def get_service_versions(service_name: str) -> List[int]:
388
569
  """Gets all versions of a service."""
389
- with db_utils.safe_cursor(_DB_PATH) as cursor:
390
- rows = cursor.execute(
391
- """\
392
- SELECT DISTINCT version FROM version_specs
393
- WHERE service_name=(?)""", (service_name,)).fetchall()
570
+ assert _SQLALCHEMY_ENGINE is not None
571
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
572
+ rows = session.execute(
573
+ sqlalchemy.select(version_specs_table.c.version.distinct()).where(
574
+ version_specs_table.c.service_name == service_name)).fetchall()
394
575
  return [row[0] for row in rows]
395
576
 
396
577
 
578
+ @init_db
397
579
  def get_glob_service_names(
398
580
  service_names: Optional[List[str]] = None) -> List[str]:
399
581
  """Get service names matching the glob patterns.
@@ -405,72 +587,97 @@ def get_glob_service_names(
405
587
  Returns:
406
588
  A list of non-duplicated service names.
407
589
  """
408
- with db_utils.safe_cursor(_DB_PATH) as cursor:
590
+ assert _SQLALCHEMY_ENGINE is not None
591
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
409
592
  if service_names is None:
410
- rows = cursor.execute('SELECT name FROM services').fetchall()
593
+ rows = session.execute(sqlalchemy.select(
594
+ services_table.c.name)).fetchall()
411
595
  else:
412
596
  rows = []
413
597
  for service_name in service_names:
414
- rows.extend(
415
- cursor.execute(
416
- 'SELECT name FROM services WHERE name GLOB (?)',
417
- (service_name,)).fetchall())
598
+ pattern_rows = session.execute(
599
+ sqlalchemy.select(services_table.c.name).where(
600
+ services_table.c.name.like(
601
+ service_name.replace('*', '%')))).fetchall()
602
+ rows.extend(pattern_rows)
418
603
  return list({row[0] for row in rows})
419
604
 
420
605
 
421
606
  # === Replica functions ===
607
+ @init_db
422
608
  def add_or_update_replica(service_name: str, replica_id: int,
423
609
  replica_info: 'replica_managers.ReplicaInfo') -> None:
424
610
  """Adds a replica to the database."""
425
- with db_utils.safe_cursor(_DB_PATH) as cursor:
426
- cursor.execute(
427
- """\
428
- INSERT OR REPLACE INTO replicas
429
- (service_name, replica_id, replica_info)
430
- VALUES (?, ?, ?)""",
431
- (service_name, replica_id, pickle.dumps(replica_info)))
611
+ assert _SQLALCHEMY_ENGINE is not None
612
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
613
+ if (_SQLALCHEMY_ENGINE.dialect.name ==
614
+ db_utils.SQLAlchemyDialect.SQLITE.value):
615
+ insert_func = sqlite.insert
616
+ elif (_SQLALCHEMY_ENGINE.dialect.name ==
617
+ db_utils.SQLAlchemyDialect.POSTGRESQL.value):
618
+ insert_func = postgresql.insert
619
+ else:
620
+ raise ValueError('Unsupported database dialect')
432
621
 
622
+ insert_stmt = insert_func(replicas_table).values(
623
+ service_name=service_name,
624
+ replica_id=replica_id,
625
+ replica_info=pickle.dumps(replica_info))
433
626
 
627
+ insert_stmt = insert_stmt.on_conflict_do_update(
628
+ index_elements=['service_name', 'replica_id'],
629
+ set_={'replica_info': insert_stmt.excluded.replica_info})
630
+
631
+ session.execute(insert_stmt)
632
+ session.commit()
633
+
634
+
635
+ @init_db
434
636
  def remove_replica(service_name: str, replica_id: int) -> None:
435
637
  """Removes a replica from the database."""
436
- with db_utils.safe_cursor(_DB_PATH) as cursor:
437
- cursor.execute(
438
- """\
439
- DELETE FROM replicas
440
- WHERE service_name=(?)
441
- AND replica_id=(?)""", (service_name, replica_id))
638
+ assert _SQLALCHEMY_ENGINE is not None
639
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
640
+ session.execute(
641
+ sqlalchemy.delete(replicas_table).where(
642
+ sqlalchemy.and_(replicas_table.c.service_name == service_name,
643
+ replicas_table.c.replica_id == replica_id)))
644
+ session.commit()
442
645
 
443
646
 
647
+ @init_db
444
648
  def get_replica_info_from_id(
445
649
  service_name: str,
446
650
  replica_id: int) -> Optional['replica_managers.ReplicaInfo']:
447
651
  """Gets a replica info from the database."""
448
- with db_utils.safe_cursor(_DB_PATH) as cursor:
449
- rows = cursor.execute(
450
- """\
451
- SELECT replica_info FROM replicas
452
- WHERE service_name=(?)
453
- AND replica_id=(?)""", (service_name, replica_id)).fetchall()
454
- for row in rows:
455
- return pickle.loads(row[0])
456
- return None
652
+ assert _SQLALCHEMY_ENGINE is not None
653
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
654
+ result = session.execute(
655
+ sqlalchemy.select(replicas_table.c.replica_info).where(
656
+ sqlalchemy.and_(
657
+ replicas_table.c.service_name == service_name,
658
+ replicas_table.c.replica_id == replica_id))).fetchone()
659
+ return pickle.loads(result[0]) if result else None
457
660
 
458
661
 
662
+ @init_db
459
663
  def get_replica_infos(
460
664
  service_name: str) -> List['replica_managers.ReplicaInfo']:
461
665
  """Gets all replica infos of a service."""
462
- with db_utils.safe_cursor(_DB_PATH) as cursor:
463
- rows = cursor.execute(
464
- """\
465
- SELECT replica_info FROM replicas
466
- WHERE service_name=(?)""", (service_name,)).fetchall()
666
+ assert _SQLALCHEMY_ENGINE is not None
667
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
668
+ rows = session.execute(
669
+ sqlalchemy.select(replicas_table.c.replica_info).where(
670
+ replicas_table.c.service_name == service_name)).fetchall()
467
671
  return [pickle.loads(row[0]) for row in rows]
468
672
 
469
673
 
674
+ @init_db
470
675
  def total_number_provisioning_replicas() -> int:
471
676
  """Returns the total number of provisioning replicas."""
472
- with db_utils.safe_cursor(_DB_PATH) as cursor:
473
- rows = cursor.execute('SELECT replica_info FROM replicas').fetchall()
677
+ assert _SQLALCHEMY_ENGINE is not None
678
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
679
+ rows = session.execute(sqlalchemy.select(
680
+ replicas_table.c.replica_info)).fetchall()
474
681
  provisioning_count = 0
475
682
  for row in rows:
476
683
  replica_info: 'replica_managers.ReplicaInfo' = pickle.loads(row[0])
@@ -479,6 +686,22 @@ def total_number_provisioning_replicas() -> int:
479
686
  return provisioning_count
480
687
 
481
688
 
689
+ @init_db
690
+ def total_number_terminating_replicas() -> int:
691
+ """Returns the total number of terminating replicas."""
692
+ assert _SQLALCHEMY_ENGINE is not None
693
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
694
+ rows = session.execute(sqlalchemy.select(
695
+ replicas_table.c.replica_info)).fetchall()
696
+ terminating_count = 0
697
+ for row in rows:
698
+ replica_info: 'replica_managers.ReplicaInfo' = pickle.loads(row[0])
699
+ if (replica_info.status_property.sky_down_status ==
700
+ common_utils.ProcessStatus.RUNNING):
701
+ terminating_count += 1
702
+ return terminating_count
703
+
704
+
482
705
  def get_replicas_at_status(
483
706
  service_name: str,
484
707
  status: ReplicaStatus,
@@ -488,105 +711,194 @@ def get_replicas_at_status(
488
711
 
489
712
 
490
713
  # === Version functions ===
714
+ @init_db
491
715
  def add_version(service_name: str) -> int:
492
716
  """Adds a version to the database."""
493
-
494
- with db_utils.safe_cursor(_DB_PATH) as cursor:
495
- cursor.execute(
496
- """\
497
- INSERT INTO version_specs
498
- (version, service_name, spec)
499
- VALUES (
500
- (SELECT COALESCE(MAX(version), 0) + 1 FROM
501
- version_specs WHERE service_name = ?), ?, ?)
502
- RETURNING version""",
503
- (service_name, service_name, pickle.dumps(None)))
504
-
505
- inserted_version = cursor.fetchone()[0]
506
-
507
- return inserted_version
508
-
509
-
717
+ assert _SQLALCHEMY_ENGINE is not None
718
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
719
+ # Insert new version with MAX(version) + 1 in a single atomic operation
720
+ max_version_subquery = sqlalchemy.select(
721
+ sqlalchemy.func.coalesce(
722
+ sqlalchemy.func.max(version_specs_table.c.version), 0) +
723
+ 1).where(version_specs_table.c.service_name ==
724
+ service_name).scalar_subquery()
725
+
726
+ # Use INSERT with subquery and RETURNING
727
+ insert_stmt = sqlalchemy.insert(version_specs_table).values(
728
+ service_name=service_name,
729
+ version=max_version_subquery,
730
+ spec=pickle.dumps(None)).returning(version_specs_table.c.version)
731
+
732
+ result = session.execute(insert_stmt)
733
+ new_version = result.scalar()
734
+ session.commit()
735
+ return new_version
736
+
737
+
738
+ @init_db
510
739
  def add_or_update_version(service_name: str, version: int,
511
- spec: 'service_spec.SkyServiceSpec') -> None:
512
- with db_utils.safe_cursor(_DB_PATH) as cursor:
513
- cursor.execute(
514
- """\
515
- INSERT or REPLACE INTO version_specs
516
- (service_name, version, spec)
517
- VALUES (?, ?, ?)""", (service_name, version, pickle.dumps(spec)))
740
+ spec: 'service_spec.SkyServiceSpec',
741
+ yaml_content: str) -> None:
742
+ assert _SQLALCHEMY_ENGINE is not None
743
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
744
+ if (_SQLALCHEMY_ENGINE.dialect.name ==
745
+ db_utils.SQLAlchemyDialect.SQLITE.value):
746
+ insert_func = sqlite.insert
747
+ elif (_SQLALCHEMY_ENGINE.dialect.name ==
748
+ db_utils.SQLAlchemyDialect.POSTGRESQL.value):
749
+ insert_func = postgresql.insert
750
+ else:
751
+ raise ValueError('Unsupported database dialect')
518
752
 
753
+ insert_stmt = insert_func(version_specs_table).values(
754
+ service_name=service_name,
755
+ version=version,
756
+ spec=pickle.dumps(spec),
757
+ yaml_content=yaml_content)
519
758
 
520
- def remove_service_versions(service_name: str) -> None:
521
- """Removes a replica from the database."""
522
- with db_utils.safe_cursor(_DB_PATH) as cursor:
523
- cursor.execute(
524
- """\
525
- DELETE FROM version_specs
526
- WHERE service_name=(?)""", (service_name,))
759
+ insert_stmt = insert_stmt.on_conflict_do_update(
760
+ index_elements=['service_name', 'version'],
761
+ set_={
762
+ 'spec': insert_stmt.excluded.spec,
763
+ 'yaml_content': insert_stmt.excluded.yaml_content
764
+ })
527
765
 
766
+ session.execute(insert_stmt)
767
+ session.commit()
528
768
 
769
+
770
+ @init_db
529
771
  def get_spec(service_name: str,
530
772
  version: int) -> Optional['service_spec.SkyServiceSpec']:
531
773
  """Gets spec from the database."""
532
- with db_utils.safe_cursor(_DB_PATH) as cursor:
533
- rows = cursor.execute(
534
- """\
535
- SELECT spec FROM version_specs
536
- WHERE service_name=(?)
537
- AND version=(?)""", (service_name, version)).fetchall()
538
- for row in rows:
539
- return pickle.loads(row[0])
540
- return None
541
-
542
-
774
+ assert _SQLALCHEMY_ENGINE is not None
775
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
776
+ result = session.execute(
777
+ sqlalchemy.select(version_specs_table.c.spec).where(
778
+ sqlalchemy.and_(
779
+ version_specs_table.c.service_name == service_name,
780
+ version_specs_table.c.version == version))).fetchone()
781
+ return pickle.loads(result[0]) if result else None
782
+
783
+
784
+ @init_db
785
+ def get_yaml_content(service_name: str, version: int) -> Optional[str]:
786
+ """Gets the yaml content of a version."""
787
+ assert _SQLALCHEMY_ENGINE is not None
788
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
789
+ result = session.execute(
790
+ sqlalchemy.select(version_specs_table.c.yaml_content).where(
791
+ sqlalchemy.and_(
792
+ version_specs_table.c.service_name == service_name,
793
+ version_specs_table.c.version == version))).fetchone()
794
+ return result[0] if result else None
795
+
796
+
797
+ @init_db
543
798
  def delete_version(service_name: str, version: int) -> None:
544
799
  """Deletes a version from the database."""
545
- with db_utils.safe_cursor(_DB_PATH) as cursor:
546
- cursor.execute(
547
- """\
548
- DELETE FROM version_specs
549
- WHERE service_name=(?)
550
- AND version=(?)""", (service_name, version))
800
+ assert _SQLALCHEMY_ENGINE is not None
801
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
802
+ session.execute(
803
+ sqlalchemy.delete(version_specs_table).where(
804
+ sqlalchemy.and_(
805
+ version_specs_table.c.service_name == service_name,
806
+ version_specs_table.c.version == version)))
807
+ session.commit()
551
808
 
552
809
 
810
+ @init_db
553
811
  def delete_all_versions(service_name: str) -> None:
554
812
  """Deletes all versions from the database."""
555
- with db_utils.safe_cursor(_DB_PATH) as cursor:
556
- cursor.execute(
557
- """\
558
- DELETE FROM version_specs
559
- WHERE service_name=(?)""", (service_name,))
813
+ assert _SQLALCHEMY_ENGINE is not None
814
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
815
+ session.execute(
816
+ sqlalchemy.delete(version_specs_table).where(
817
+ version_specs_table.c.service_name == service_name))
818
+ session.commit()
560
819
 
561
820
 
821
+ @init_db
562
822
  def get_latest_version(service_name: str) -> Optional[int]:
563
- with db_utils.safe_cursor(_DB_PATH) as cursor:
564
- rows = cursor.execute(
565
- """\
566
- SELECT MAX(version) FROM version_specs
567
- WHERE service_name=(?)""", (service_name,)).fetchall()
568
- if not rows or rows[0][0] is None:
569
- return None
570
- return rows[0][0]
823
+ assert _SQLALCHEMY_ENGINE is not None
824
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
825
+ result = session.execute(
826
+ sqlalchemy.select(sqlalchemy.func.max(
827
+ version_specs_table.c.version)).where(
828
+ version_specs_table.c.service_name ==
829
+ service_name)).fetchone()
830
+ return result[0] if result else None
571
831
 
572
832
 
833
+ @init_db
573
834
  def get_service_controller_port(service_name: str) -> int:
574
835
  """Gets the controller port of a service."""
575
- with db_utils.safe_cursor(_DB_PATH) as cursor:
576
- cursor.execute('SELECT controller_port FROM services WHERE name = ?',
577
- (service_name,))
578
- row = cursor.fetchone()
579
- if row is None:
836
+ assert _SQLALCHEMY_ENGINE is not None
837
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
838
+ result = session.execute(
839
+ sqlalchemy.select(services_table.c.controller_port).where(
840
+ services_table.c.name == service_name)).fetchone()
841
+ if result is None:
580
842
  raise ValueError(f'Service {service_name} does not exist.')
581
- return row[0]
843
+ return result[0]
582
844
 
583
845
 
846
+ @init_db
584
847
  def get_service_load_balancer_port(service_name: str) -> int:
585
848
  """Gets the load balancer port of a service."""
586
- with db_utils.safe_cursor(_DB_PATH) as cursor:
587
- cursor.execute('SELECT load_balancer_port FROM services WHERE name = ?',
588
- (service_name,))
589
- row = cursor.fetchone()
590
- if row is None:
849
+ assert _SQLALCHEMY_ENGINE is not None
850
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
851
+ result = session.execute(
852
+ sqlalchemy.select(services_table.c.load_balancer_port).where(
853
+ services_table.c.name == service_name)).fetchone()
854
+ if result is None:
591
855
  raise ValueError(f'Service {service_name} does not exist.')
592
- return row[0]
856
+ return result[0]
857
+
858
+
859
+ @init_db
860
+ def get_ha_recovery_script(service_name: str) -> Optional[str]:
861
+ """Gets the HA recovery script for a service."""
862
+ assert _SQLALCHEMY_ENGINE is not None
863
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
864
+ result = session.execute(
865
+ sqlalchemy.select(serve_ha_recovery_script_table.c.script).where(
866
+ serve_ha_recovery_script_table.c.service_name ==
867
+ service_name)).fetchone()
868
+ return result[0] if result else None
869
+
870
+
871
+ @init_db
872
+ def set_ha_recovery_script(service_name: str, script: str) -> None:
873
+ """Sets the HA recovery script for a service."""
874
+ assert _SQLALCHEMY_ENGINE is not None
875
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
876
+ if (_SQLALCHEMY_ENGINE.dialect.name ==
877
+ db_utils.SQLAlchemyDialect.SQLITE.value):
878
+ insert_func = sqlite.insert
879
+ elif (_SQLALCHEMY_ENGINE.dialect.name ==
880
+ db_utils.SQLAlchemyDialect.POSTGRESQL.value):
881
+ insert_func = postgresql.insert
882
+ else:
883
+ raise ValueError('Unsupported database dialect')
884
+
885
+ insert_stmt = insert_func(serve_ha_recovery_script_table).values(
886
+ service_name=service_name, script=script)
887
+
888
+ insert_stmt = insert_stmt.on_conflict_do_update(
889
+ index_elements=['service_name'],
890
+ set_={'script': insert_stmt.excluded.script})
891
+
892
+ session.execute(insert_stmt)
893
+ session.commit()
894
+
895
+
896
+ @init_db
897
+ def remove_ha_recovery_script(service_name: str) -> None:
898
+ """Removes the HA recovery script for a service."""
899
+ assert _SQLALCHEMY_ENGINE is not None
900
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
901
+ session.execute(
902
+ sqlalchemy.delete(serve_ha_recovery_script_table).where(
903
+ serve_ha_recovery_script_table.c.service_name == service_name))
904
+ session.commit()