skypilot-nightly 1.0.0.dev20250509__py3-none-any.whl → 1.0.0.dev20251107__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of skypilot-nightly might be problematic. Click here for more details.

Files changed (512) hide show
  1. sky/__init__.py +22 -6
  2. sky/adaptors/aws.py +25 -7
  3. sky/adaptors/common.py +24 -1
  4. sky/adaptors/coreweave.py +278 -0
  5. sky/adaptors/do.py +8 -2
  6. sky/adaptors/hyperbolic.py +8 -0
  7. sky/adaptors/kubernetes.py +149 -18
  8. sky/adaptors/nebius.py +170 -17
  9. sky/adaptors/primeintellect.py +1 -0
  10. sky/adaptors/runpod.py +68 -0
  11. sky/adaptors/seeweb.py +167 -0
  12. sky/adaptors/shadeform.py +89 -0
  13. sky/admin_policy.py +187 -4
  14. sky/authentication.py +179 -225
  15. sky/backends/__init__.py +4 -2
  16. sky/backends/backend.py +22 -9
  17. sky/backends/backend_utils.py +1299 -380
  18. sky/backends/cloud_vm_ray_backend.py +1715 -518
  19. sky/backends/docker_utils.py +1 -1
  20. sky/backends/local_docker_backend.py +11 -6
  21. sky/backends/wheel_utils.py +37 -9
  22. sky/{clouds/service_catalog → catalog}/__init__.py +21 -19
  23. sky/{clouds/service_catalog → catalog}/aws_catalog.py +27 -8
  24. sky/{clouds/service_catalog → catalog}/azure_catalog.py +10 -7
  25. sky/{clouds/service_catalog → catalog}/common.py +89 -48
  26. sky/{clouds/service_catalog → catalog}/cudo_catalog.py +8 -5
  27. sky/{clouds/service_catalog → catalog}/data_fetchers/analyze.py +1 -1
  28. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_aws.py +30 -40
  29. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_cudo.py +38 -38
  30. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_gcp.py +42 -15
  31. sky/catalog/data_fetchers/fetch_hyperbolic.py +136 -0
  32. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_lambda_cloud.py +1 -0
  33. sky/catalog/data_fetchers/fetch_nebius.py +335 -0
  34. sky/catalog/data_fetchers/fetch_runpod.py +698 -0
  35. sky/catalog/data_fetchers/fetch_seeweb.py +329 -0
  36. sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
  37. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vast.py +1 -1
  38. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vsphere.py +1 -1
  39. sky/{clouds/service_catalog → catalog}/do_catalog.py +5 -2
  40. sky/{clouds/service_catalog → catalog}/fluidstack_catalog.py +6 -3
  41. sky/{clouds/service_catalog → catalog}/gcp_catalog.py +41 -15
  42. sky/catalog/hyperbolic_catalog.py +136 -0
  43. sky/{clouds/service_catalog → catalog}/ibm_catalog.py +9 -6
  44. sky/{clouds/service_catalog → catalog}/kubernetes_catalog.py +36 -24
  45. sky/{clouds/service_catalog → catalog}/lambda_catalog.py +9 -6
  46. sky/{clouds/service_catalog → catalog}/nebius_catalog.py +9 -7
  47. sky/{clouds/service_catalog → catalog}/oci_catalog.py +9 -6
  48. sky/{clouds/service_catalog → catalog}/paperspace_catalog.py +5 -2
  49. sky/catalog/primeintellect_catalog.py +95 -0
  50. sky/{clouds/service_catalog → catalog}/runpod_catalog.py +11 -4
  51. sky/{clouds/service_catalog → catalog}/scp_catalog.py +9 -6
  52. sky/catalog/seeweb_catalog.py +184 -0
  53. sky/catalog/shadeform_catalog.py +165 -0
  54. sky/catalog/ssh_catalog.py +167 -0
  55. sky/{clouds/service_catalog → catalog}/vast_catalog.py +6 -3
  56. sky/{clouds/service_catalog → catalog}/vsphere_catalog.py +5 -2
  57. sky/check.py +491 -203
  58. sky/cli.py +5 -6005
  59. sky/client/{cli.py → cli/command.py} +2477 -1885
  60. sky/client/cli/deprecation_utils.py +99 -0
  61. sky/client/cli/flags.py +359 -0
  62. sky/client/cli/table_utils.py +320 -0
  63. sky/client/common.py +70 -32
  64. sky/client/oauth.py +82 -0
  65. sky/client/sdk.py +1203 -297
  66. sky/client/sdk_async.py +833 -0
  67. sky/client/service_account_auth.py +47 -0
  68. sky/cloud_stores.py +73 -0
  69. sky/clouds/__init__.py +13 -0
  70. sky/clouds/aws.py +358 -93
  71. sky/clouds/azure.py +105 -83
  72. sky/clouds/cloud.py +127 -36
  73. sky/clouds/cudo.py +68 -50
  74. sky/clouds/do.py +66 -48
  75. sky/clouds/fluidstack.py +63 -44
  76. sky/clouds/gcp.py +339 -110
  77. sky/clouds/hyperbolic.py +293 -0
  78. sky/clouds/ibm.py +70 -49
  79. sky/clouds/kubernetes.py +563 -162
  80. sky/clouds/lambda_cloud.py +74 -54
  81. sky/clouds/nebius.py +206 -80
  82. sky/clouds/oci.py +88 -66
  83. sky/clouds/paperspace.py +61 -44
  84. sky/clouds/primeintellect.py +317 -0
  85. sky/clouds/runpod.py +164 -74
  86. sky/clouds/scp.py +89 -83
  87. sky/clouds/seeweb.py +466 -0
  88. sky/clouds/shadeform.py +400 -0
  89. sky/clouds/ssh.py +263 -0
  90. sky/clouds/utils/aws_utils.py +10 -4
  91. sky/clouds/utils/gcp_utils.py +87 -11
  92. sky/clouds/utils/oci_utils.py +38 -14
  93. sky/clouds/utils/scp_utils.py +177 -124
  94. sky/clouds/vast.py +99 -77
  95. sky/clouds/vsphere.py +51 -40
  96. sky/core.py +349 -139
  97. sky/dag.py +15 -0
  98. sky/dashboard/out/404.html +1 -1
  99. sky/dashboard/out/_next/static/chunks/1141-e6aa9ab418717c59.js +11 -0
  100. sky/dashboard/out/_next/static/chunks/1272-1ef0bf0237faccdb.js +1 -0
  101. sky/dashboard/out/_next/static/chunks/1871-74503c8e80fd253b.js +6 -0
  102. sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +1 -0
  103. sky/dashboard/out/_next/static/chunks/2350.fab69e61bac57b23.js +1 -0
  104. sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +15 -0
  105. sky/dashboard/out/_next/static/chunks/2755.fff53c4a3fcae910.js +26 -0
  106. sky/dashboard/out/_next/static/chunks/3294.72362fa129305b19.js +1 -0
  107. sky/dashboard/out/_next/static/chunks/3785.ad6adaa2a0fa9768.js +1 -0
  108. sky/dashboard/out/_next/static/chunks/3850-ff4a9a69d978632b.js +1 -0
  109. sky/dashboard/out/_next/static/chunks/3937.210053269f121201.js +1 -0
  110. sky/dashboard/out/_next/static/chunks/4725.a830b5c9e7867c92.js +1 -0
  111. sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +15 -0
  112. sky/dashboard/out/_next/static/chunks/5739-d67458fcb1386c92.js +8 -0
  113. sky/dashboard/out/_next/static/chunks/6130-2be46d70a38f1e82.js +1 -0
  114. sky/dashboard/out/_next/static/chunks/616-3d59f75e2ccf9321.js +39 -0
  115. sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +13 -0
  116. sky/dashboard/out/_next/static/chunks/6601-06114c982db410b6.js +1 -0
  117. sky/dashboard/out/_next/static/chunks/6856-ef8ba11f96d8c4a3.js +1 -0
  118. sky/dashboard/out/_next/static/chunks/6989-01359c57e018caa4.js +1 -0
  119. sky/dashboard/out/_next/static/chunks/6990-32b6e2d3822301fa.js +1 -0
  120. sky/dashboard/out/_next/static/chunks/7359-c8d04e06886000b3.js +30 -0
  121. sky/dashboard/out/_next/static/chunks/7411-b15471acd2cba716.js +41 -0
  122. sky/dashboard/out/_next/static/chunks/7615-3301e838e5f25772.js +1 -0
  123. sky/dashboard/out/_next/static/chunks/8640.5b9475a2d18c5416.js +16 -0
  124. sky/dashboard/out/_next/static/chunks/8969-1e4613c651bf4051.js +1 -0
  125. sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +6 -0
  126. sky/dashboard/out/_next/static/chunks/9353-cff34f7e773b2e2b.js +1 -0
  127. sky/dashboard/out/_next/static/chunks/9360.7310982cf5a0dc79.js +31 -0
  128. sky/dashboard/out/_next/static/chunks/9847.3aaca6bb33455140.js +30 -0
  129. sky/dashboard/out/_next/static/chunks/fd9d1056-86323a29a8f7e46a.js +1 -0
  130. sky/dashboard/out/_next/static/chunks/framework-cf60a09ccd051a10.js +33 -0
  131. sky/dashboard/out/_next/static/chunks/main-app-587214043926b3cc.js +1 -0
  132. sky/dashboard/out/_next/static/chunks/main-f15ccb73239a3bf1.js +1 -0
  133. sky/dashboard/out/_next/static/chunks/pages/_app-bde01e4a2beec258.js +34 -0
  134. sky/dashboard/out/_next/static/chunks/pages/_error-c66a4e8afc46f17b.js +1 -0
  135. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-c736ead69c2d86ec.js +16 -0
  136. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-a37d2063af475a1c.js +1 -0
  137. sky/dashboard/out/_next/static/chunks/pages/clusters-d44859594e6f8064.js +1 -0
  138. sky/dashboard/out/_next/static/chunks/pages/config-dfb9bf07b13045f4.js +1 -0
  139. sky/dashboard/out/_next/static/chunks/pages/index-444f1804401f04ea.js +1 -0
  140. sky/dashboard/out/_next/static/chunks/pages/infra/[context]-c0b5935149902e6f.js +1 -0
  141. sky/dashboard/out/_next/static/chunks/pages/infra-aed0ea19df7cf961.js +1 -0
  142. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-5796e8d6aea291a0.js +16 -0
  143. sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-6edeb7d06032adfc.js +21 -0
  144. sky/dashboard/out/_next/static/chunks/pages/jobs-479dde13399cf270.js +1 -0
  145. sky/dashboard/out/_next/static/chunks/pages/users-5ab3b907622cf0fe.js +1 -0
  146. sky/dashboard/out/_next/static/chunks/pages/volumes-b84b948ff357c43e.js +1 -0
  147. sky/dashboard/out/_next/static/chunks/pages/workspace/new-3f88a1c7e86a3f86.js +1 -0
  148. sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-c5a3eeee1c218af1.js +1 -0
  149. sky/dashboard/out/_next/static/chunks/pages/workspaces-22b23febb3e89ce1.js +1 -0
  150. sky/dashboard/out/_next/static/chunks/webpack-2679be77fc08a2f8.js +1 -0
  151. sky/dashboard/out/_next/static/css/0748ce22df867032.css +3 -0
  152. sky/dashboard/out/_next/static/zB0ed6ge_W1MDszVHhijS/_buildManifest.js +1 -0
  153. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  154. sky/dashboard/out/clusters/[cluster].html +1 -1
  155. sky/dashboard/out/clusters.html +1 -1
  156. sky/dashboard/out/config.html +1 -0
  157. sky/dashboard/out/index.html +1 -1
  158. sky/dashboard/out/infra/[context].html +1 -0
  159. sky/dashboard/out/infra.html +1 -0
  160. sky/dashboard/out/jobs/[job].html +1 -1
  161. sky/dashboard/out/jobs/pools/[pool].html +1 -0
  162. sky/dashboard/out/jobs.html +1 -1
  163. sky/dashboard/out/users.html +1 -0
  164. sky/dashboard/out/volumes.html +1 -0
  165. sky/dashboard/out/workspace/new.html +1 -0
  166. sky/dashboard/out/workspaces/[name].html +1 -0
  167. sky/dashboard/out/workspaces.html +1 -0
  168. sky/data/data_utils.py +137 -1
  169. sky/data/mounting_utils.py +269 -84
  170. sky/data/storage.py +1451 -1807
  171. sky/data/storage_utils.py +43 -57
  172. sky/exceptions.py +132 -2
  173. sky/execution.py +206 -63
  174. sky/global_user_state.py +2374 -586
  175. sky/jobs/__init__.py +5 -0
  176. sky/jobs/client/sdk.py +242 -65
  177. sky/jobs/client/sdk_async.py +143 -0
  178. sky/jobs/constants.py +9 -8
  179. sky/jobs/controller.py +839 -277
  180. sky/jobs/file_content_utils.py +80 -0
  181. sky/jobs/log_gc.py +201 -0
  182. sky/jobs/recovery_strategy.py +398 -152
  183. sky/jobs/scheduler.py +315 -189
  184. sky/jobs/server/core.py +829 -255
  185. sky/jobs/server/server.py +156 -115
  186. sky/jobs/server/utils.py +136 -0
  187. sky/jobs/state.py +2092 -701
  188. sky/jobs/utils.py +1242 -160
  189. sky/logs/__init__.py +21 -0
  190. sky/logs/agent.py +108 -0
  191. sky/logs/aws.py +243 -0
  192. sky/logs/gcp.py +91 -0
  193. sky/metrics/__init__.py +0 -0
  194. sky/metrics/utils.py +443 -0
  195. sky/models.py +78 -1
  196. sky/optimizer.py +164 -70
  197. sky/provision/__init__.py +90 -4
  198. sky/provision/aws/config.py +147 -26
  199. sky/provision/aws/instance.py +135 -50
  200. sky/provision/azure/instance.py +10 -5
  201. sky/provision/common.py +13 -1
  202. sky/provision/cudo/cudo_machine_type.py +1 -1
  203. sky/provision/cudo/cudo_utils.py +14 -8
  204. sky/provision/cudo/cudo_wrapper.py +72 -71
  205. sky/provision/cudo/instance.py +10 -6
  206. sky/provision/do/instance.py +10 -6
  207. sky/provision/do/utils.py +4 -3
  208. sky/provision/docker_utils.py +114 -23
  209. sky/provision/fluidstack/instance.py +13 -8
  210. sky/provision/gcp/__init__.py +1 -0
  211. sky/provision/gcp/config.py +301 -19
  212. sky/provision/gcp/constants.py +218 -0
  213. sky/provision/gcp/instance.py +36 -8
  214. sky/provision/gcp/instance_utils.py +18 -4
  215. sky/provision/gcp/volume_utils.py +247 -0
  216. sky/provision/hyperbolic/__init__.py +12 -0
  217. sky/provision/hyperbolic/config.py +10 -0
  218. sky/provision/hyperbolic/instance.py +437 -0
  219. sky/provision/hyperbolic/utils.py +373 -0
  220. sky/provision/instance_setup.py +93 -14
  221. sky/provision/kubernetes/__init__.py +5 -0
  222. sky/provision/kubernetes/config.py +9 -52
  223. sky/provision/kubernetes/constants.py +17 -0
  224. sky/provision/kubernetes/instance.py +789 -247
  225. sky/provision/kubernetes/manifests/fusermount-server-daemonset.yaml +1 -2
  226. sky/provision/kubernetes/network.py +27 -17
  227. sky/provision/kubernetes/network_utils.py +40 -43
  228. sky/provision/kubernetes/utils.py +1192 -531
  229. sky/provision/kubernetes/volume.py +282 -0
  230. sky/provision/lambda_cloud/instance.py +22 -16
  231. sky/provision/nebius/constants.py +50 -0
  232. sky/provision/nebius/instance.py +19 -6
  233. sky/provision/nebius/utils.py +196 -91
  234. sky/provision/oci/instance.py +10 -5
  235. sky/provision/paperspace/instance.py +10 -7
  236. sky/provision/paperspace/utils.py +1 -1
  237. sky/provision/primeintellect/__init__.py +10 -0
  238. sky/provision/primeintellect/config.py +11 -0
  239. sky/provision/primeintellect/instance.py +454 -0
  240. sky/provision/primeintellect/utils.py +398 -0
  241. sky/provision/provisioner.py +110 -36
  242. sky/provision/runpod/__init__.py +5 -0
  243. sky/provision/runpod/instance.py +27 -6
  244. sky/provision/runpod/utils.py +51 -18
  245. sky/provision/runpod/volume.py +180 -0
  246. sky/provision/scp/__init__.py +15 -0
  247. sky/provision/scp/config.py +93 -0
  248. sky/provision/scp/instance.py +531 -0
  249. sky/provision/seeweb/__init__.py +11 -0
  250. sky/provision/seeweb/config.py +13 -0
  251. sky/provision/seeweb/instance.py +807 -0
  252. sky/provision/shadeform/__init__.py +11 -0
  253. sky/provision/shadeform/config.py +12 -0
  254. sky/provision/shadeform/instance.py +351 -0
  255. sky/provision/shadeform/shadeform_utils.py +83 -0
  256. sky/provision/ssh/__init__.py +18 -0
  257. sky/provision/vast/instance.py +13 -8
  258. sky/provision/vast/utils.py +10 -7
  259. sky/provision/vsphere/common/vim_utils.py +1 -2
  260. sky/provision/vsphere/instance.py +15 -10
  261. sky/provision/vsphere/vsphere_utils.py +9 -19
  262. sky/py.typed +0 -0
  263. sky/resources.py +844 -118
  264. sky/schemas/__init__.py +0 -0
  265. sky/schemas/api/__init__.py +0 -0
  266. sky/schemas/api/responses.py +225 -0
  267. sky/schemas/db/README +4 -0
  268. sky/schemas/db/env.py +90 -0
  269. sky/schemas/db/global_user_state/001_initial_schema.py +124 -0
  270. sky/schemas/db/global_user_state/002_add_workspace_to_cluster_history.py +35 -0
  271. sky/schemas/db/global_user_state/003_fix_initial_revision.py +61 -0
  272. sky/schemas/db/global_user_state/004_is_managed.py +34 -0
  273. sky/schemas/db/global_user_state/005_cluster_event.py +32 -0
  274. sky/schemas/db/global_user_state/006_provision_log.py +41 -0
  275. sky/schemas/db/global_user_state/007_cluster_event_request_id.py +34 -0
  276. sky/schemas/db/global_user_state/008_skylet_ssh_tunnel_metadata.py +34 -0
  277. sky/schemas/db/global_user_state/009_last_activity_and_launched_at.py +89 -0
  278. sky/schemas/db/global_user_state/010_save_ssh_key.py +66 -0
  279. sky/schemas/db/script.py.mako +28 -0
  280. sky/schemas/db/serve_state/001_initial_schema.py +67 -0
  281. sky/schemas/db/skypilot_config/001_initial_schema.py +30 -0
  282. sky/schemas/db/spot_jobs/001_initial_schema.py +97 -0
  283. sky/schemas/db/spot_jobs/002_cluster_pool.py +42 -0
  284. sky/schemas/db/spot_jobs/003_pool_hash.py +34 -0
  285. sky/schemas/db/spot_jobs/004_job_file_contents.py +42 -0
  286. sky/schemas/db/spot_jobs/005_logs_gc.py +38 -0
  287. sky/schemas/generated/__init__.py +0 -0
  288. sky/schemas/generated/autostopv1_pb2.py +36 -0
  289. sky/schemas/generated/autostopv1_pb2.pyi +43 -0
  290. sky/schemas/generated/autostopv1_pb2_grpc.py +146 -0
  291. sky/schemas/generated/jobsv1_pb2.py +86 -0
  292. sky/schemas/generated/jobsv1_pb2.pyi +254 -0
  293. sky/schemas/generated/jobsv1_pb2_grpc.py +542 -0
  294. sky/schemas/generated/managed_jobsv1_pb2.py +74 -0
  295. sky/schemas/generated/managed_jobsv1_pb2.pyi +278 -0
  296. sky/schemas/generated/managed_jobsv1_pb2_grpc.py +278 -0
  297. sky/schemas/generated/servev1_pb2.py +58 -0
  298. sky/schemas/generated/servev1_pb2.pyi +115 -0
  299. sky/schemas/generated/servev1_pb2_grpc.py +322 -0
  300. sky/serve/autoscalers.py +357 -5
  301. sky/serve/client/impl.py +310 -0
  302. sky/serve/client/sdk.py +47 -139
  303. sky/serve/client/sdk_async.py +130 -0
  304. sky/serve/constants.py +10 -8
  305. sky/serve/controller.py +64 -19
  306. sky/serve/load_balancer.py +106 -60
  307. sky/serve/load_balancing_policies.py +115 -1
  308. sky/serve/replica_managers.py +273 -162
  309. sky/serve/serve_rpc_utils.py +179 -0
  310. sky/serve/serve_state.py +554 -251
  311. sky/serve/serve_utils.py +733 -220
  312. sky/serve/server/core.py +66 -711
  313. sky/serve/server/impl.py +1093 -0
  314. sky/serve/server/server.py +21 -18
  315. sky/serve/service.py +133 -48
  316. sky/serve/service_spec.py +135 -16
  317. sky/serve/spot_placer.py +3 -0
  318. sky/server/auth/__init__.py +0 -0
  319. sky/server/auth/authn.py +50 -0
  320. sky/server/auth/loopback.py +38 -0
  321. sky/server/auth/oauth2_proxy.py +200 -0
  322. sky/server/common.py +475 -181
  323. sky/server/config.py +81 -23
  324. sky/server/constants.py +44 -6
  325. sky/server/daemons.py +229 -0
  326. sky/server/html/token_page.html +185 -0
  327. sky/server/metrics.py +160 -0
  328. sky/server/requests/executor.py +528 -138
  329. sky/server/requests/payloads.py +351 -17
  330. sky/server/requests/preconditions.py +21 -17
  331. sky/server/requests/process.py +112 -29
  332. sky/server/requests/request_names.py +120 -0
  333. sky/server/requests/requests.py +817 -224
  334. sky/server/requests/serializers/decoders.py +82 -31
  335. sky/server/requests/serializers/encoders.py +140 -22
  336. sky/server/requests/threads.py +106 -0
  337. sky/server/rest.py +417 -0
  338. sky/server/server.py +1290 -284
  339. sky/server/state.py +20 -0
  340. sky/server/stream_utils.py +345 -57
  341. sky/server/uvicorn.py +217 -3
  342. sky/server/versions.py +270 -0
  343. sky/setup_files/MANIFEST.in +5 -0
  344. sky/setup_files/alembic.ini +156 -0
  345. sky/setup_files/dependencies.py +136 -31
  346. sky/setup_files/setup.py +44 -42
  347. sky/sky_logging.py +102 -5
  348. sky/skylet/attempt_skylet.py +1 -0
  349. sky/skylet/autostop_lib.py +129 -8
  350. sky/skylet/configs.py +27 -20
  351. sky/skylet/constants.py +171 -19
  352. sky/skylet/events.py +105 -21
  353. sky/skylet/job_lib.py +335 -104
  354. sky/skylet/log_lib.py +297 -18
  355. sky/skylet/log_lib.pyi +44 -1
  356. sky/skylet/ray_patches/__init__.py +17 -3
  357. sky/skylet/ray_patches/autoscaler.py.diff +18 -0
  358. sky/skylet/ray_patches/cli.py.diff +19 -0
  359. sky/skylet/ray_patches/command_runner.py.diff +17 -0
  360. sky/skylet/ray_patches/log_monitor.py.diff +20 -0
  361. sky/skylet/ray_patches/resource_demand_scheduler.py.diff +32 -0
  362. sky/skylet/ray_patches/updater.py.diff +18 -0
  363. sky/skylet/ray_patches/worker.py.diff +41 -0
  364. sky/skylet/services.py +564 -0
  365. sky/skylet/skylet.py +63 -4
  366. sky/skylet/subprocess_daemon.py +103 -29
  367. sky/skypilot_config.py +506 -99
  368. sky/ssh_node_pools/__init__.py +1 -0
  369. sky/ssh_node_pools/core.py +135 -0
  370. sky/ssh_node_pools/server.py +233 -0
  371. sky/task.py +621 -137
  372. sky/templates/aws-ray.yml.j2 +10 -3
  373. sky/templates/azure-ray.yml.j2 +1 -1
  374. sky/templates/do-ray.yml.j2 +1 -1
  375. sky/templates/gcp-ray.yml.j2 +57 -0
  376. sky/templates/hyperbolic-ray.yml.j2 +67 -0
  377. sky/templates/jobs-controller.yaml.j2 +27 -24
  378. sky/templates/kubernetes-loadbalancer.yml.j2 +2 -0
  379. sky/templates/kubernetes-ray.yml.j2 +607 -51
  380. sky/templates/lambda-ray.yml.j2 +1 -1
  381. sky/templates/nebius-ray.yml.j2 +33 -12
  382. sky/templates/paperspace-ray.yml.j2 +1 -1
  383. sky/templates/primeintellect-ray.yml.j2 +71 -0
  384. sky/templates/runpod-ray.yml.j2 +9 -1
  385. sky/templates/scp-ray.yml.j2 +3 -50
  386. sky/templates/seeweb-ray.yml.j2 +108 -0
  387. sky/templates/shadeform-ray.yml.j2 +72 -0
  388. sky/templates/sky-serve-controller.yaml.j2 +22 -2
  389. sky/templates/websocket_proxy.py +178 -18
  390. sky/usage/usage_lib.py +18 -11
  391. sky/users/__init__.py +0 -0
  392. sky/users/model.conf +15 -0
  393. sky/users/permission.py +387 -0
  394. sky/users/rbac.py +121 -0
  395. sky/users/server.py +720 -0
  396. sky/users/token_service.py +218 -0
  397. sky/utils/accelerator_registry.py +34 -5
  398. sky/utils/admin_policy_utils.py +84 -38
  399. sky/utils/annotations.py +16 -5
  400. sky/utils/asyncio_utils.py +78 -0
  401. sky/utils/auth_utils.py +153 -0
  402. sky/utils/benchmark_utils.py +60 -0
  403. sky/utils/cli_utils/status_utils.py +159 -86
  404. sky/utils/cluster_utils.py +31 -9
  405. sky/utils/command_runner.py +354 -68
  406. sky/utils/command_runner.pyi +93 -3
  407. sky/utils/common.py +35 -8
  408. sky/utils/common_utils.py +310 -87
  409. sky/utils/config_utils.py +87 -5
  410. sky/utils/context.py +402 -0
  411. sky/utils/context_utils.py +222 -0
  412. sky/utils/controller_utils.py +264 -89
  413. sky/utils/dag_utils.py +31 -12
  414. sky/utils/db/__init__.py +0 -0
  415. sky/utils/db/db_utils.py +470 -0
  416. sky/utils/db/migration_utils.py +133 -0
  417. sky/utils/directory_utils.py +12 -0
  418. sky/utils/env_options.py +13 -0
  419. sky/utils/git.py +567 -0
  420. sky/utils/git_clone.sh +460 -0
  421. sky/utils/infra_utils.py +195 -0
  422. sky/utils/kubernetes/cleanup-tunnel.sh +62 -0
  423. sky/utils/kubernetes/config_map_utils.py +133 -0
  424. sky/utils/kubernetes/create_cluster.sh +13 -27
  425. sky/utils/kubernetes/delete_cluster.sh +10 -7
  426. sky/utils/kubernetes/deploy_remote_cluster.py +1299 -0
  427. sky/utils/kubernetes/exec_kubeconfig_converter.py +22 -31
  428. sky/utils/kubernetes/generate_kind_config.py +6 -66
  429. sky/utils/kubernetes/generate_kubeconfig.sh +4 -1
  430. sky/utils/kubernetes/gpu_labeler.py +5 -5
  431. sky/utils/kubernetes/kubernetes_deploy_utils.py +354 -47
  432. sky/utils/kubernetes/ssh-tunnel.sh +379 -0
  433. sky/utils/kubernetes/ssh_utils.py +221 -0
  434. sky/utils/kubernetes_enums.py +8 -15
  435. sky/utils/lock_events.py +94 -0
  436. sky/utils/locks.py +368 -0
  437. sky/utils/log_utils.py +300 -6
  438. sky/utils/perf_utils.py +22 -0
  439. sky/utils/resource_checker.py +298 -0
  440. sky/utils/resources_utils.py +249 -32
  441. sky/utils/rich_utils.py +213 -37
  442. sky/utils/schemas.py +905 -147
  443. sky/utils/serialize_utils.py +16 -0
  444. sky/utils/status_lib.py +10 -0
  445. sky/utils/subprocess_utils.py +38 -15
  446. sky/utils/tempstore.py +70 -0
  447. sky/utils/timeline.py +24 -52
  448. sky/utils/ux_utils.py +84 -15
  449. sky/utils/validator.py +11 -1
  450. sky/utils/volume.py +86 -0
  451. sky/utils/yaml_utils.py +111 -0
  452. sky/volumes/__init__.py +13 -0
  453. sky/volumes/client/__init__.py +0 -0
  454. sky/volumes/client/sdk.py +149 -0
  455. sky/volumes/server/__init__.py +0 -0
  456. sky/volumes/server/core.py +258 -0
  457. sky/volumes/server/server.py +122 -0
  458. sky/volumes/volume.py +212 -0
  459. sky/workspaces/__init__.py +0 -0
  460. sky/workspaces/core.py +655 -0
  461. sky/workspaces/server.py +101 -0
  462. sky/workspaces/utils.py +56 -0
  463. skypilot_nightly-1.0.0.dev20251107.dist-info/METADATA +675 -0
  464. skypilot_nightly-1.0.0.dev20251107.dist-info/RECORD +594 -0
  465. {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/WHEEL +1 -1
  466. sky/benchmark/benchmark_state.py +0 -256
  467. sky/benchmark/benchmark_utils.py +0 -641
  468. sky/clouds/service_catalog/constants.py +0 -7
  469. sky/dashboard/out/_next/static/LksQgChY5izXjokL3LcEu/_buildManifest.js +0 -1
  470. sky/dashboard/out/_next/static/chunks/236-f49500b82ad5392d.js +0 -6
  471. sky/dashboard/out/_next/static/chunks/312-c3c8845990db8ffc.js +0 -15
  472. sky/dashboard/out/_next/static/chunks/37-0a572fe0dbb89c4d.js +0 -6
  473. sky/dashboard/out/_next/static/chunks/678-206dddca808e6d16.js +0 -59
  474. sky/dashboard/out/_next/static/chunks/845-0f8017370869e269.js +0 -1
  475. sky/dashboard/out/_next/static/chunks/979-7bf73a4c7cea0f5c.js +0 -1
  476. sky/dashboard/out/_next/static/chunks/fd9d1056-2821b0f0cabcd8bd.js +0 -1
  477. sky/dashboard/out/_next/static/chunks/framework-87d061ee6ed71b28.js +0 -33
  478. sky/dashboard/out/_next/static/chunks/main-app-241eb28595532291.js +0 -1
  479. sky/dashboard/out/_next/static/chunks/main-e0e2335212e72357.js +0 -1
  480. sky/dashboard/out/_next/static/chunks/pages/_app-e6b013bc3f77ad60.js +0 -1
  481. sky/dashboard/out/_next/static/chunks/pages/_error-1be831200e60c5c0.js +0 -1
  482. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-e15db85d0ea1fbe1.js +0 -1
  483. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-f383db7389368ea7.js +0 -1
  484. sky/dashboard/out/_next/static/chunks/pages/clusters-a93b93e10b8b074e.js +0 -1
  485. sky/dashboard/out/_next/static/chunks/pages/index-f9f039532ca8cbc4.js +0 -1
  486. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-03f279c6741fb48b.js +0 -1
  487. sky/dashboard/out/_next/static/chunks/pages/jobs-a75029b67aab6a2e.js +0 -1
  488. sky/dashboard/out/_next/static/chunks/webpack-830f59b8404e96b8.js +0 -1
  489. sky/dashboard/out/_next/static/css/c6933bbb2ce7f4dd.css +0 -3
  490. sky/jobs/dashboard/dashboard.py +0 -223
  491. sky/jobs/dashboard/static/favicon.ico +0 -0
  492. sky/jobs/dashboard/templates/index.html +0 -831
  493. sky/jobs/server/dashboard_utils.py +0 -69
  494. sky/skylet/providers/scp/__init__.py +0 -2
  495. sky/skylet/providers/scp/config.py +0 -149
  496. sky/skylet/providers/scp/node_provider.py +0 -578
  497. sky/templates/kubernetes-ssh-jump.yml.j2 +0 -94
  498. sky/utils/db_utils.py +0 -100
  499. sky/utils/kubernetes/deploy_remote_cluster.sh +0 -308
  500. sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +0 -191
  501. skypilot_nightly-1.0.0.dev20250509.dist-info/METADATA +0 -361
  502. skypilot_nightly-1.0.0.dev20250509.dist-info/RECORD +0 -396
  503. /sky/{clouds/service_catalog → catalog}/config.py +0 -0
  504. /sky/{benchmark → catalog/data_fetchers}/__init__.py +0 -0
  505. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_azure.py +0 -0
  506. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_fluidstack.py +0 -0
  507. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_ibm.py +0 -0
  508. /sky/{clouds/service_catalog/data_fetchers → client/cli}/__init__.py +0 -0
  509. /sky/dashboard/out/_next/static/{LksQgChY5izXjokL3LcEu → zB0ed6ge_W1MDszVHhijS}/_ssgManifest.js +0 -0
  510. {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/entry_points.txt +0 -0
  511. {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/licenses/LICENSE +0 -0
  512. {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/top_level.txt +0 -0
sky/serve/serve_state.py CHANGED
@@ -1,89 +1,162 @@
1
1
  """The database for services information."""
2
2
  import collections
3
3
  import enum
4
+ import functools
4
5
  import json
5
- import pathlib
6
6
  import pickle
7
- import sqlite3
7
+ import threading
8
8
  import typing
9
- from typing import Any, Dict, List, Optional, Tuple
9
+ from typing import Any, Dict, List, Optional
10
+ import uuid
10
11
 
11
12
  import colorama
13
+ import sqlalchemy
14
+ from sqlalchemy import exc as sqlalchemy_exc
15
+ from sqlalchemy import orm
16
+ from sqlalchemy.dialects import postgresql
17
+ from sqlalchemy.dialects import sqlite
18
+ from sqlalchemy.ext import declarative
12
19
 
13
20
  from sky.serve import constants
14
- from sky.utils import db_utils
21
+ from sky.utils import common_utils
22
+ from sky.utils.db import db_utils
23
+ from sky.utils.db import migration_utils
15
24
 
16
25
  if typing.TYPE_CHECKING:
26
+ from sqlalchemy.engine import row
27
+
17
28
  from sky.serve import replica_managers
18
29
  from sky.serve import service_spec
19
30
 
31
+ _SQLALCHEMY_ENGINE: Optional[sqlalchemy.engine.Engine] = None
32
+ _SQLALCHEMY_ENGINE_LOCK = threading.Lock()
33
+
34
+ Base = declarative.declarative_base()
35
+
36
+ # === Database schema ===
37
+ services_table = sqlalchemy.Table(
38
+ 'services',
39
+ Base.metadata,
40
+ sqlalchemy.Column('name', sqlalchemy.Text, primary_key=True),
41
+ sqlalchemy.Column('controller_job_id',
42
+ sqlalchemy.Integer,
43
+ server_default=None),
44
+ sqlalchemy.Column('controller_port',
45
+ sqlalchemy.Integer,
46
+ server_default=None),
47
+ sqlalchemy.Column('load_balancer_port',
48
+ sqlalchemy.Integer,
49
+ server_default=None),
50
+ sqlalchemy.Column('status', sqlalchemy.Text),
51
+ sqlalchemy.Column('uptime', sqlalchemy.Integer, server_default=None),
52
+ sqlalchemy.Column('policy', sqlalchemy.Text, server_default=None),
53
+ sqlalchemy.Column('auto_restart', sqlalchemy.Integer, server_default=None),
54
+ sqlalchemy.Column('requested_resources',
55
+ sqlalchemy.LargeBinary,
56
+ server_default=None),
57
+ sqlalchemy.Column('requested_resources_str', sqlalchemy.Text),
58
+ sqlalchemy.Column('current_version',
59
+ sqlalchemy.Integer,
60
+ server_default=str(constants.INITIAL_VERSION)),
61
+ sqlalchemy.Column('active_versions',
62
+ sqlalchemy.Text,
63
+ server_default=json.dumps([])),
64
+ sqlalchemy.Column('load_balancing_policy',
65
+ sqlalchemy.Text,
66
+ server_default=None),
67
+ sqlalchemy.Column('tls_encrypted', sqlalchemy.Integer, server_default='0'),
68
+ sqlalchemy.Column('pool', sqlalchemy.Integer, server_default='0'),
69
+ sqlalchemy.Column('controller_pid', sqlalchemy.Integer,
70
+ server_default=None),
71
+ sqlalchemy.Column('hash', sqlalchemy.Text, server_default=None),
72
+ sqlalchemy.Column('entrypoint', sqlalchemy.Text, server_default=None),
73
+ )
74
+
75
+ replicas_table = sqlalchemy.Table(
76
+ 'replicas',
77
+ Base.metadata,
78
+ sqlalchemy.Column('service_name', sqlalchemy.Text, primary_key=True),
79
+ sqlalchemy.Column('replica_id', sqlalchemy.Integer, primary_key=True),
80
+ sqlalchemy.Column('replica_info', sqlalchemy.LargeBinary),
81
+ )
82
+
83
+ version_specs_table = sqlalchemy.Table(
84
+ 'version_specs',
85
+ Base.metadata,
86
+ sqlalchemy.Column('service_name', sqlalchemy.Text, primary_key=True),
87
+ sqlalchemy.Column('version', sqlalchemy.Integer, primary_key=True),
88
+ sqlalchemy.Column('spec', sqlalchemy.LargeBinary),
89
+ )
90
+
91
+ serve_ha_recovery_script_table = sqlalchemy.Table(
92
+ 'serve_ha_recovery_script',
93
+ Base.metadata,
94
+ sqlalchemy.Column('service_name', sqlalchemy.Text, primary_key=True),
95
+ sqlalchemy.Column('script', sqlalchemy.Text),
96
+ )
97
+
98
+
99
+ def create_table(engine: sqlalchemy.engine.Engine):
100
+ """Creates the service and replica tables if they do not exist."""
20
101
 
21
- def _get_db_path() -> str:
22
- """Workaround to collapse multi-step Path ops for type checker.
23
- Ensures _DB_PATH is str, avoiding Union[Path, str] inference.
24
- """
25
- path = pathlib.Path(constants.SKYSERVE_METADATA_DIR) / 'services.db'
26
- path = path.expanduser().absolute()
27
- path.parents[0].mkdir(parents=True, exist_ok=True)
28
- return str(path)
102
+ # Enable WAL mode to avoid locking issues.
103
+ # See: issue #3863, #1441 and PR #1509
104
+ # https://github.com/microsoft/WSL/issues/2395
105
+ # TODO(romilb): We do not enable WAL for WSL because of known issue in WSL.
106
+ # This may cause the database locked problem from WSL issue #1441.
107
+ if (engine.dialect.name == db_utils.SQLAlchemyDialect.SQLITE.value and
108
+ not common_utils.is_wsl()):
109
+ try:
110
+ with orm.Session(engine) as session:
111
+ session.execute(sqlalchemy.text('PRAGMA journal_mode=WAL'))
112
+ session.commit()
113
+ except sqlalchemy_exc.OperationalError as e:
114
+ if 'database is locked' not in str(e):
115
+ raise
116
+ # If the database is locked, it is OK to continue, as the WAL mode
117
+ # is not critical and is likely to be enabled by other processes.
29
118
 
119
+ migration_utils.safe_alembic_upgrade(engine, migration_utils.SERVE_DB_NAME,
120
+ migration_utils.SERVE_VERSION)
30
121
 
31
- _DB_PATH: str = _get_db_path()
32
122
 
123
+ def initialize_and_get_db() -> sqlalchemy.engine.Engine:
124
+ global _SQLALCHEMY_ENGINE
125
+
126
+ if _SQLALCHEMY_ENGINE is not None:
127
+ return _SQLALCHEMY_ENGINE
128
+
129
+ with _SQLALCHEMY_ENGINE_LOCK:
130
+ if _SQLALCHEMY_ENGINE is not None:
131
+ return _SQLALCHEMY_ENGINE
132
+ # get an engine to the db
133
+ engine = db_utils.get_engine('serve/services')
134
+
135
+ # run migrations if needed
136
+ create_table(engine)
137
+
138
+ # return engine
139
+ _SQLALCHEMY_ENGINE = engine
140
+ return _SQLALCHEMY_ENGINE
141
+
142
+
143
+ def init_db(func):
144
+ """Initialize the database."""
145
+
146
+ @functools.wraps(func)
147
+ def wrapper(*args, **kwargs):
148
+ initialize_and_get_db()
149
+ return func(*args, **kwargs)
150
+
151
+ return wrapper
33
152
 
34
- def create_table(cursor: 'sqlite3.Cursor', conn: 'sqlite3.Connection') -> None:
35
- """Creates the service and replica tables if they do not exist."""
36
153
 
37
- # auto_restart and requested_resources column is deprecated.
38
- cursor.execute("""\
39
- CREATE TABLE IF NOT EXISTS services (
40
- name TEXT PRIMARY KEY,
41
- controller_job_id INTEGER DEFAULT NULL,
42
- controller_port INTEGER DEFAULT NULL,
43
- load_balancer_port INTEGER DEFAULT NULL,
44
- status TEXT,
45
- uptime INTEGER DEFAULT NULL,
46
- policy TEXT DEFAULT NULL,
47
- auto_restart INTEGER DEFAULT NULL,
48
- requested_resources BLOB DEFAULT NULL)""")
49
- cursor.execute("""\
50
- CREATE TABLE IF NOT EXISTS replicas (
51
- service_name TEXT,
52
- replica_id INTEGER,
53
- replica_info BLOB,
54
- PRIMARY KEY (service_name, replica_id))""")
55
- cursor.execute("""\
56
- CREATE TABLE IF NOT EXISTS version_specs (
57
- version INTEGER,
58
- service_name TEXT,
59
- spec BLOB,
60
- PRIMARY KEY (service_name, version))""")
61
- conn.commit()
62
-
63
- # Backward compatibility.
64
- db_utils.add_column_to_table(cursor, conn, 'services',
65
- 'requested_resources_str', 'TEXT')
66
- # Deprecated: switched to `active_versions` below for the version
67
- # considered active by the load balancer. The
68
- # authscaler/replica_manager version can be found in the
69
- # version_specs table.
70
- db_utils.add_column_to_table(
71
- cursor, conn, 'services', 'current_version',
72
- f'INTEGER DEFAULT {constants.INITIAL_VERSION}')
73
- # The versions that is activated for the service. This is a list
74
- # of integers in json format.
75
- db_utils.add_column_to_table(cursor, conn, 'services', 'active_versions',
76
- f'TEXT DEFAULT {json.dumps([])!r}')
77
- db_utils.add_column_to_table(cursor, conn, 'services',
78
- 'load_balancing_policy', 'TEXT DEFAULT NULL')
79
- # Whether the service's load balancer is encrypted with TLS.
80
- db_utils.add_column_to_table(cursor, conn, 'services', 'tls_encrypted',
81
- 'INTEGER DEFAULT 0')
82
- conn.commit()
83
-
84
-
85
- db_utils.SQLiteConn(_DB_PATH, create_table)
86
- _UNIQUE_CONSTRAINT_FAILED_ERROR_MSG = 'UNIQUE constraint failed: services.name'
154
+ _UNIQUE_CONSTRAINT_FAILED_ERROR_MSGS = [
155
+ # sqlite
156
+ 'UNIQUE constraint failed: services.name',
157
+ # postgres
158
+ 'duplicate key value violates unique constraint "services_pkey"',
159
+ ]
87
160
 
88
161
 
89
162
  # === Statuses ===
@@ -247,153 +320,246 @@ _SERVICE_STATUS_TO_COLOR = {
247
320
  }
248
321
 
249
322
 
323
+ @init_db
250
324
  def add_service(name: str, controller_job_id: int, policy: str,
251
325
  requested_resources_str: str, load_balancing_policy: str,
252
- status: ServiceStatus, tls_encrypted: bool) -> bool:
326
+ status: ServiceStatus, tls_encrypted: bool, pool: bool,
327
+ controller_pid: int, entrypoint: str) -> bool:
253
328
  """Add a service in the database.
254
329
 
255
330
  Returns:
256
331
  True if the service is added successfully, False if the service already
257
332
  exists.
258
333
  """
334
+ assert _SQLALCHEMY_ENGINE is not None
259
335
  try:
260
- with db_utils.safe_cursor(_DB_PATH) as cursor:
261
- cursor.execute(
262
- """\
263
- INSERT INTO services
264
- (name, controller_job_id, status, policy,
265
- requested_resources_str, load_balancing_policy, tls_encrypted)
266
- VALUES (?, ?, ?, ?, ?, ?, ?)""",
267
- (name, controller_job_id, status.value, policy,
268
- requested_resources_str, load_balancing_policy,
269
- int(tls_encrypted)))
270
-
271
- except sqlite3.IntegrityError as e:
272
- if str(e) != _UNIQUE_CONSTRAINT_FAILED_ERROR_MSG:
273
- raise RuntimeError('Unexpected database error') from e
274
- return False
336
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
337
+ if (_SQLALCHEMY_ENGINE.dialect.name ==
338
+ db_utils.SQLAlchemyDialect.SQLITE.value):
339
+ insert_func = sqlite.insert
340
+ elif (_SQLALCHEMY_ENGINE.dialect.name ==
341
+ db_utils.SQLAlchemyDialect.POSTGRESQL.value):
342
+ insert_func = postgresql.insert
343
+ else:
344
+ raise ValueError('Unsupported database dialect')
345
+
346
+ insert_stmt = insert_func(services_table).values(
347
+ name=name,
348
+ controller_job_id=controller_job_id,
349
+ status=status.value,
350
+ policy=policy,
351
+ requested_resources_str=requested_resources_str,
352
+ load_balancing_policy=load_balancing_policy,
353
+ tls_encrypted=int(tls_encrypted),
354
+ pool=int(pool),
355
+ controller_pid=controller_pid,
356
+ hash=str(uuid.uuid4()),
357
+ entrypoint=entrypoint)
358
+ session.execute(insert_stmt)
359
+ session.commit()
360
+
361
+ except sqlalchemy_exc.IntegrityError as e:
362
+ for msg in _UNIQUE_CONSTRAINT_FAILED_ERROR_MSGS:
363
+ if msg in str(e):
364
+ return False
365
+ raise RuntimeError('Unexpected database error') from e
275
366
  return True
276
367
 
277
368
 
369
+ @init_db
370
+ def update_service_controller_pid(service_name: str,
371
+ controller_pid: int) -> None:
372
+ """Updates the controller pid of a service.
373
+
374
+ This is used to update the controller pid of a service on ha recovery.
375
+ """
376
+ assert _SQLALCHEMY_ENGINE is not None
377
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
378
+ session.query(services_table).filter(
379
+ services_table.c.name == service_name).update(
380
+ {services_table.c.controller_pid: controller_pid})
381
+ session.commit()
382
+
383
+
384
+ @init_db
278
385
  def remove_service(service_name: str) -> None:
279
386
  """Removes a service from the database."""
280
- with db_utils.safe_cursor(_DB_PATH) as cursor:
281
- cursor.execute("""\
282
- DELETE FROM services WHERE name=(?)""", (service_name,))
387
+ assert _SQLALCHEMY_ENGINE is not None
388
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
389
+ session.execute(
390
+ sqlalchemy.delete(services_table).where(
391
+ services_table.c.name == service_name))
392
+ session.commit()
283
393
 
284
394
 
395
+ @init_db
285
396
  def set_service_uptime(service_name: str, uptime: int) -> None:
286
397
  """Sets the uptime of a service."""
287
- with db_utils.safe_cursor(_DB_PATH) as cursor:
288
- cursor.execute(
289
- """\
290
- UPDATE services SET
291
- uptime=(?) WHERE name=(?)""", (uptime, service_name))
398
+ assert _SQLALCHEMY_ENGINE is not None
399
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
400
+ session.query(services_table).filter(
401
+ services_table.c.name == service_name).update(
402
+ {services_table.c.uptime: uptime})
403
+ session.commit()
292
404
 
293
405
 
406
+ @init_db
294
407
  def set_service_status_and_active_versions(
295
408
  service_name: str,
296
409
  status: ServiceStatus,
297
410
  active_versions: Optional[List[int]] = None) -> None:
298
411
  """Sets the service status."""
299
- vars_to_set = 'status=(?)'
300
- values: Tuple[str, ...] = (status.value, service_name)
412
+ assert _SQLALCHEMY_ENGINE is not None
413
+ update_dict = {services_table.c.status: status.value}
301
414
  if active_versions is not None:
302
- vars_to_set = 'status=(?), active_versions=(?)'
303
- values = (status.value, json.dumps(active_versions), service_name)
304
- with db_utils.safe_cursor(_DB_PATH) as cursor:
305
- cursor.execute(
306
- f"""\
307
- UPDATE services SET
308
- {vars_to_set} WHERE name=(?)""", values)
415
+ update_dict[services_table.c.active_versions] = json.dumps(
416
+ active_versions)
417
+
418
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
419
+ session.query(services_table).filter(
420
+ services_table.c.name == service_name).update(update_dict)
421
+ session.commit()
309
422
 
310
423
 
424
+ @init_db
311
425
  def set_service_controller_port(service_name: str,
312
426
  controller_port: int) -> None:
313
427
  """Sets the controller port of a service."""
314
- with db_utils.safe_cursor(_DB_PATH) as cursor:
315
- cursor.execute(
316
- """\
317
- UPDATE services SET
318
- controller_port=(?) WHERE name=(?)""",
319
- (controller_port, service_name))
428
+ assert _SQLALCHEMY_ENGINE is not None
429
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
430
+ session.query(services_table).filter(
431
+ services_table.c.name == service_name).update(
432
+ {services_table.c.controller_port: controller_port})
433
+ session.commit()
320
434
 
321
435
 
436
+ @init_db
322
437
  def set_service_load_balancer_port(service_name: str,
323
438
  load_balancer_port: int) -> None:
324
439
  """Sets the load balancer port of a service."""
325
- with db_utils.safe_cursor(_DB_PATH) as cursor:
326
- cursor.execute(
327
- """\
328
- UPDATE services SET
329
- load_balancer_port=(?) WHERE name=(?)""",
330
- (load_balancer_port, service_name))
331
-
332
-
333
- def _get_service_from_row(row) -> Dict[str, Any]:
334
- (current_version, name, controller_job_id, controller_port,
335
- load_balancer_port, status, uptime, policy, _, _, requested_resources_str,
336
- _, active_versions, load_balancing_policy, tls_encrypted) = row[:15]
337
- return {
338
- 'name': name,
339
- 'controller_job_id': controller_job_id,
340
- 'controller_port': controller_port,
341
- 'load_balancer_port': load_balancer_port,
342
- 'status': ServiceStatus[status],
343
- 'uptime': uptime,
344
- 'policy': policy,
440
+ assert _SQLALCHEMY_ENGINE is not None
441
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
442
+ session.query(services_table).filter(
443
+ services_table.c.name == service_name).update(
444
+ {services_table.c.load_balancer_port: load_balancer_port})
445
+ session.commit()
446
+
447
+
448
+ def _get_service_from_row(r: 'row.RowMapping') -> Dict[str, Any]:
449
+ # Get the max_version from the first column (from the subquery)
450
+ current_version = r['max_version']
451
+
452
+ record = {
453
+ 'name': r['name'],
454
+ 'controller_job_id': r['controller_job_id'],
455
+ 'controller_port': r['controller_port'],
456
+ 'load_balancer_port': r['load_balancer_port'],
457
+ 'status': ServiceStatus[r['status']],
458
+ 'uptime': r['uptime'],
459
+ 'policy': r['policy'],
345
460
  # The version of the autoscaler/replica manager are on. It can be larger
346
461
  # than the active versions as the load balancer may not consider the
347
462
  # latest version to be active for serving traffic.
348
463
  'version': current_version,
349
464
  # The versions that is active for the load balancer. This is a list of
350
465
  # integers in json format. This is mainly for display purpose.
351
- 'active_versions': json.loads(active_versions),
352
- 'requested_resources_str': requested_resources_str,
353
- 'load_balancing_policy': load_balancing_policy,
354
- 'tls_encrypted': bool(tls_encrypted),
466
+ 'active_versions': json.loads(r['active_versions'])
467
+ if r['active_versions'] else [],
468
+ 'requested_resources_str': r['requested_resources_str'],
469
+ 'load_balancing_policy': r['load_balancing_policy'],
470
+ 'tls_encrypted': bool(r['tls_encrypted']),
471
+ 'pool': bool(r['pool']),
472
+ 'controller_pid': r['controller_pid'],
473
+ 'hash': r['hash'],
474
+ 'entrypoint': r['entrypoint'],
355
475
  }
476
+ latest_spec = get_spec(r['name'], current_version)
477
+ if latest_spec is not None:
478
+ record['policy'] = latest_spec.autoscaling_policy_str()
479
+ record['load_balancing_policy'] = latest_spec.load_balancing_policy
480
+ return record
356
481
 
357
482
 
483
+ @init_db
358
484
  def get_services() -> List[Dict[str, Any]]:
359
485
  """Get all existing service records."""
360
- with db_utils.safe_cursor(_DB_PATH) as cursor:
361
- rows = cursor.execute('SELECT v.max_version, s.* FROM services s '
362
- 'JOIN ('
363
- 'SELECT service_name, MAX(version) as max_version'
364
- ' FROM version_specs GROUP BY service_name) v '
365
- 'ON s.name=v.service_name').fetchall()
486
+ assert _SQLALCHEMY_ENGINE is not None
487
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
488
+ subquery = sqlalchemy.select(
489
+ version_specs_table.c.service_name,
490
+ sqlalchemy.func.max(
491
+ version_specs_table.c.version).label('max_version')).group_by(
492
+ version_specs_table.c.service_name).alias('v')
493
+
494
+ query = sqlalchemy.select(
495
+ subquery.c.max_version, services_table).select_from(
496
+ services_table.join(
497
+ subquery, services_table.c.name == subquery.c.service_name))
498
+ rows = session.execute(query).fetchall()
366
499
  records = []
367
500
  for row in rows:
368
- records.append(_get_service_from_row(row))
501
+ records.append(_get_service_from_row(row._mapping)) # pylint: disable=protected-access
369
502
  return records
370
503
 
371
504
 
505
+ @init_db
506
+ def get_num_services() -> int:
507
+ """Get the number of services."""
508
+ assert _SQLALCHEMY_ENGINE is not None
509
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
510
+ return session.execute(
511
+ sqlalchemy.select(sqlalchemy.func.count() # pylint: disable=not-callable
512
+ ).select_from(services_table)).fetchone()[0]
513
+
514
+
515
+ @init_db
372
516
  def get_service_from_name(service_name: str) -> Optional[Dict[str, Any]]:
373
517
  """Get all existing service records."""
374
- with db_utils.safe_cursor(_DB_PATH) as cursor:
375
- rows = cursor.execute(
376
- 'SELECT v.max_version, s.* FROM services s '
377
- 'JOIN ('
378
- 'SELECT service_name, MAX(version) as max_version '
379
- 'FROM version_specs WHERE service_name=(?)) v '
380
- 'ON s.name=v.service_name WHERE name=(?)',
381
- (service_name, service_name)).fetchall()
518
+ assert _SQLALCHEMY_ENGINE is not None
519
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
520
+ subquery = sqlalchemy.select(
521
+ version_specs_table.c.service_name,
522
+ sqlalchemy.func.max(
523
+ version_specs_table.c.version).label('max_version')
524
+ ).where(version_specs_table.c.service_name == service_name).group_by(
525
+ version_specs_table.c.service_name).alias('v')
526
+
527
+ query = sqlalchemy.select(
528
+ subquery.c.max_version, services_table).select_from(
529
+ services_table.join(
530
+ subquery,
531
+ services_table.c.name == subquery.c.service_name)).where(
532
+ services_table.c.name == service_name)
533
+
534
+ rows = session.execute(query).fetchall()
382
535
  for row in rows:
383
- return _get_service_from_row(row)
536
+ return _get_service_from_row(row._mapping) # pylint: disable=protected-access
384
537
  return None
385
538
 
386
539
 
540
+ @init_db
541
+ def get_service_hash(service_name: str) -> Optional[str]:
542
+ """Get the hash of a service."""
543
+ assert _SQLALCHEMY_ENGINE is not None
544
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
545
+ result = session.execute(
546
+ sqlalchemy.select(services_table.c.hash).where(
547
+ services_table.c.name == service_name)).fetchone()
548
+ return result[0] if result else None
549
+
550
+
551
+ @init_db
387
552
  def get_service_versions(service_name: str) -> List[int]:
388
553
  """Gets all versions of a service."""
389
- with db_utils.safe_cursor(_DB_PATH) as cursor:
390
- rows = cursor.execute(
391
- """\
392
- SELECT DISTINCT version FROM version_specs
393
- WHERE service_name=(?)""", (service_name,)).fetchall()
554
+ assert _SQLALCHEMY_ENGINE is not None
555
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
556
+ rows = session.execute(
557
+ sqlalchemy.select(version_specs_table.c.version.distinct()).where(
558
+ version_specs_table.c.service_name == service_name)).fetchall()
394
559
  return [row[0] for row in rows]
395
560
 
396
561
 
562
+ @init_db
397
563
  def get_glob_service_names(
398
564
  service_names: Optional[List[str]] = None) -> List[str]:
399
565
  """Get service names matching the glob patterns.
@@ -405,72 +571,97 @@ def get_glob_service_names(
405
571
  Returns:
406
572
  A list of non-duplicated service names.
407
573
  """
408
- with db_utils.safe_cursor(_DB_PATH) as cursor:
574
+ assert _SQLALCHEMY_ENGINE is not None
575
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
409
576
  if service_names is None:
410
- rows = cursor.execute('SELECT name FROM services').fetchall()
577
+ rows = session.execute(sqlalchemy.select(
578
+ services_table.c.name)).fetchall()
411
579
  else:
412
580
  rows = []
413
581
  for service_name in service_names:
414
- rows.extend(
415
- cursor.execute(
416
- 'SELECT name FROM services WHERE name GLOB (?)',
417
- (service_name,)).fetchall())
582
+ pattern_rows = session.execute(
583
+ sqlalchemy.select(services_table.c.name).where(
584
+ services_table.c.name.like(
585
+ service_name.replace('*', '%')))).fetchall()
586
+ rows.extend(pattern_rows)
418
587
  return list({row[0] for row in rows})
419
588
 
420
589
 
421
590
  # === Replica functions ===
591
+ @init_db
422
592
  def add_or_update_replica(service_name: str, replica_id: int,
423
593
  replica_info: 'replica_managers.ReplicaInfo') -> None:
424
594
  """Adds a replica to the database."""
425
- with db_utils.safe_cursor(_DB_PATH) as cursor:
426
- cursor.execute(
427
- """\
428
- INSERT OR REPLACE INTO replicas
429
- (service_name, replica_id, replica_info)
430
- VALUES (?, ?, ?)""",
431
- (service_name, replica_id, pickle.dumps(replica_info)))
595
+ assert _SQLALCHEMY_ENGINE is not None
596
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
597
+ if (_SQLALCHEMY_ENGINE.dialect.name ==
598
+ db_utils.SQLAlchemyDialect.SQLITE.value):
599
+ insert_func = sqlite.insert
600
+ elif (_SQLALCHEMY_ENGINE.dialect.name ==
601
+ db_utils.SQLAlchemyDialect.POSTGRESQL.value):
602
+ insert_func = postgresql.insert
603
+ else:
604
+ raise ValueError('Unsupported database dialect')
605
+
606
+ insert_stmt = insert_func(replicas_table).values(
607
+ service_name=service_name,
608
+ replica_id=replica_id,
609
+ replica_info=pickle.dumps(replica_info))
610
+
611
+ insert_stmt = insert_stmt.on_conflict_do_update(
612
+ index_elements=['service_name', 'replica_id'],
613
+ set_={'replica_info': insert_stmt.excluded.replica_info})
432
614
 
615
+ session.execute(insert_stmt)
616
+ session.commit()
433
617
 
618
+
619
+ @init_db
434
620
  def remove_replica(service_name: str, replica_id: int) -> None:
435
621
  """Removes a replica from the database."""
436
- with db_utils.safe_cursor(_DB_PATH) as cursor:
437
- cursor.execute(
438
- """\
439
- DELETE FROM replicas
440
- WHERE service_name=(?)
441
- AND replica_id=(?)""", (service_name, replica_id))
622
+ assert _SQLALCHEMY_ENGINE is not None
623
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
624
+ session.execute(
625
+ sqlalchemy.delete(replicas_table).where(
626
+ sqlalchemy.and_(replicas_table.c.service_name == service_name,
627
+ replicas_table.c.replica_id == replica_id)))
628
+ session.commit()
442
629
 
443
630
 
631
+ @init_db
444
632
  def get_replica_info_from_id(
445
633
  service_name: str,
446
634
  replica_id: int) -> Optional['replica_managers.ReplicaInfo']:
447
635
  """Gets a replica info from the database."""
448
- with db_utils.safe_cursor(_DB_PATH) as cursor:
449
- rows = cursor.execute(
450
- """\
451
- SELECT replica_info FROM replicas
452
- WHERE service_name=(?)
453
- AND replica_id=(?)""", (service_name, replica_id)).fetchall()
454
- for row in rows:
455
- return pickle.loads(row[0])
456
- return None
636
+ assert _SQLALCHEMY_ENGINE is not None
637
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
638
+ result = session.execute(
639
+ sqlalchemy.select(replicas_table.c.replica_info).where(
640
+ sqlalchemy.and_(
641
+ replicas_table.c.service_name == service_name,
642
+ replicas_table.c.replica_id == replica_id))).fetchone()
643
+ return pickle.loads(result[0]) if result else None
457
644
 
458
645
 
646
+ @init_db
459
647
  def get_replica_infos(
460
648
  service_name: str) -> List['replica_managers.ReplicaInfo']:
461
649
  """Gets all replica infos of a service."""
462
- with db_utils.safe_cursor(_DB_PATH) as cursor:
463
- rows = cursor.execute(
464
- """\
465
- SELECT replica_info FROM replicas
466
- WHERE service_name=(?)""", (service_name,)).fetchall()
650
+ assert _SQLALCHEMY_ENGINE is not None
651
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
652
+ rows = session.execute(
653
+ sqlalchemy.select(replicas_table.c.replica_info).where(
654
+ replicas_table.c.service_name == service_name)).fetchall()
467
655
  return [pickle.loads(row[0]) for row in rows]
468
656
 
469
657
 
658
+ @init_db
470
659
  def total_number_provisioning_replicas() -> int:
471
660
  """Returns the total number of provisioning replicas."""
472
- with db_utils.safe_cursor(_DB_PATH) as cursor:
473
- rows = cursor.execute('SELECT replica_info FROM replicas').fetchall()
661
+ assert _SQLALCHEMY_ENGINE is not None
662
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
663
+ rows = session.execute(sqlalchemy.select(
664
+ replicas_table.c.replica_info)).fetchall()
474
665
  provisioning_count = 0
475
666
  for row in rows:
476
667
  replica_info: 'replica_managers.ReplicaInfo' = pickle.loads(row[0])
@@ -479,6 +670,38 @@ def total_number_provisioning_replicas() -> int:
479
670
  return provisioning_count
480
671
 
481
672
 
673
+ @init_db
674
+ def total_number_terminating_replicas() -> int:
675
+ """Returns the total number of terminating replicas."""
676
+ assert _SQLALCHEMY_ENGINE is not None
677
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
678
+ rows = session.execute(sqlalchemy.select(
679
+ replicas_table.c.replica_info)).fetchall()
680
+ terminating_count = 0
681
+ for row in rows:
682
+ replica_info: 'replica_managers.ReplicaInfo' = pickle.loads(row[0])
683
+ if (replica_info.status_property.sky_down_status ==
684
+ common_utils.ProcessStatus.RUNNING):
685
+ terminating_count += 1
686
+ return terminating_count
687
+
688
+
689
+ @init_db
690
+ def total_number_scheduled_to_terminate_replicas() -> int:
691
+ """Returns the total number of terminating replicas."""
692
+ assert _SQLALCHEMY_ENGINE is not None
693
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
694
+ rows = session.execute(sqlalchemy.select(
695
+ replicas_table.c.replica_info)).fetchall()
696
+ terminating_count = 0
697
+ for row in rows:
698
+ replica_info: 'replica_managers.ReplicaInfo' = pickle.loads(row[0])
699
+ if (replica_info.status_property.sky_down_status ==
700
+ common_utils.ProcessStatus.SCHEDULED):
701
+ terminating_count += 1
702
+ return terminating_count
703
+
704
+
482
705
  def get_replicas_at_status(
483
706
  service_name: str,
484
707
  status: ReplicaStatus,
@@ -488,105 +711,185 @@ def get_replicas_at_status(
488
711
 
489
712
 
490
713
  # === Version functions ===
714
+ @init_db
491
715
  def add_version(service_name: str) -> int:
492
716
  """Adds a version to the database."""
717
+ assert _SQLALCHEMY_ENGINE is not None
718
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
719
+ # Insert new version with MAX(version) + 1 in a single atomic operation
720
+ max_version_subquery = sqlalchemy.select(
721
+ sqlalchemy.func.coalesce(
722
+ sqlalchemy.func.max(version_specs_table.c.version), 0) +
723
+ 1).where(version_specs_table.c.service_name ==
724
+ service_name).scalar_subquery()
725
+
726
+ # Use INSERT with subquery and RETURNING
727
+ insert_stmt = sqlalchemy.insert(version_specs_table).values(
728
+ service_name=service_name,
729
+ version=max_version_subquery,
730
+ spec=pickle.dumps(None)).returning(version_specs_table.c.version)
731
+
732
+ result = session.execute(insert_stmt)
733
+ new_version = result.scalar()
734
+ session.commit()
735
+ return new_version
736
+
737
+
738
+ @init_db
739
+ def add_or_update_version(service_name: str, version: int,
740
+ spec: 'service_spec.SkyServiceSpec') -> None:
741
+ assert _SQLALCHEMY_ENGINE is not None
742
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
743
+ if (_SQLALCHEMY_ENGINE.dialect.name ==
744
+ db_utils.SQLAlchemyDialect.SQLITE.value):
745
+ insert_func = sqlite.insert
746
+ elif (_SQLALCHEMY_ENGINE.dialect.name ==
747
+ db_utils.SQLAlchemyDialect.POSTGRESQL.value):
748
+ insert_func = postgresql.insert
749
+ else:
750
+ raise ValueError('Unsupported database dialect')
493
751
 
494
- with db_utils.safe_cursor(_DB_PATH) as cursor:
495
- cursor.execute(
496
- """\
497
- INSERT INTO version_specs
498
- (version, service_name, spec)
499
- VALUES (
500
- (SELECT COALESCE(MAX(version), 0) + 1 FROM
501
- version_specs WHERE service_name = ?), ?, ?)
502
- RETURNING version""",
503
- (service_name, service_name, pickle.dumps(None)))
504
-
505
- inserted_version = cursor.fetchone()[0]
506
-
507
- return inserted_version
752
+ insert_stmt = insert_func(version_specs_table).values(
753
+ service_name=service_name, version=version, spec=pickle.dumps(spec))
508
754
 
755
+ insert_stmt = insert_stmt.on_conflict_do_update(
756
+ index_elements=['service_name', 'version'],
757
+ set_={'spec': insert_stmt.excluded.spec})
509
758
 
510
- def add_or_update_version(service_name: str, version: int,
511
- spec: 'service_spec.SkyServiceSpec') -> None:
512
- with db_utils.safe_cursor(_DB_PATH) as cursor:
513
- cursor.execute(
514
- """\
515
- INSERT or REPLACE INTO version_specs
516
- (service_name, version, spec)
517
- VALUES (?, ?, ?)""", (service_name, version, pickle.dumps(spec)))
759
+ session.execute(insert_stmt)
760
+ session.commit()
518
761
 
519
762
 
763
+ @init_db
520
764
  def remove_service_versions(service_name: str) -> None:
521
765
  """Removes a replica from the database."""
522
- with db_utils.safe_cursor(_DB_PATH) as cursor:
523
- cursor.execute(
524
- """\
525
- DELETE FROM version_specs
526
- WHERE service_name=(?)""", (service_name,))
766
+ assert _SQLALCHEMY_ENGINE is not None
767
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
768
+ session.execute(
769
+ sqlalchemy.delete(version_specs_table).where(
770
+ version_specs_table.c.service_name == service_name))
771
+ session.commit()
527
772
 
528
773
 
774
+ @init_db
529
775
  def get_spec(service_name: str,
530
776
  version: int) -> Optional['service_spec.SkyServiceSpec']:
531
777
  """Gets spec from the database."""
532
- with db_utils.safe_cursor(_DB_PATH) as cursor:
533
- rows = cursor.execute(
534
- """\
535
- SELECT spec FROM version_specs
536
- WHERE service_name=(?)
537
- AND version=(?)""", (service_name, version)).fetchall()
538
- for row in rows:
539
- return pickle.loads(row[0])
540
- return None
778
+ assert _SQLALCHEMY_ENGINE is not None
779
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
780
+ result = session.execute(
781
+ sqlalchemy.select(version_specs_table.c.spec).where(
782
+ sqlalchemy.and_(
783
+ version_specs_table.c.service_name == service_name,
784
+ version_specs_table.c.version == version))).fetchone()
785
+ return pickle.loads(result[0]) if result else None
541
786
 
542
787
 
788
+ @init_db
543
789
  def delete_version(service_name: str, version: int) -> None:
544
790
  """Deletes a version from the database."""
545
- with db_utils.safe_cursor(_DB_PATH) as cursor:
546
- cursor.execute(
547
- """\
548
- DELETE FROM version_specs
549
- WHERE service_name=(?)
550
- AND version=(?)""", (service_name, version))
791
+ assert _SQLALCHEMY_ENGINE is not None
792
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
793
+ session.execute(
794
+ sqlalchemy.delete(version_specs_table).where(
795
+ sqlalchemy.and_(
796
+ version_specs_table.c.service_name == service_name,
797
+ version_specs_table.c.version == version)))
798
+ session.commit()
551
799
 
552
800
 
801
+ @init_db
553
802
  def delete_all_versions(service_name: str) -> None:
554
803
  """Deletes all versions from the database."""
555
- with db_utils.safe_cursor(_DB_PATH) as cursor:
556
- cursor.execute(
557
- """\
558
- DELETE FROM version_specs
559
- WHERE service_name=(?)""", (service_name,))
804
+ assert _SQLALCHEMY_ENGINE is not None
805
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
806
+ session.execute(
807
+ sqlalchemy.delete(version_specs_table).where(
808
+ version_specs_table.c.service_name == service_name))
809
+ session.commit()
560
810
 
561
811
 
812
+ @init_db
562
813
  def get_latest_version(service_name: str) -> Optional[int]:
563
- with db_utils.safe_cursor(_DB_PATH) as cursor:
564
- rows = cursor.execute(
565
- """\
566
- SELECT MAX(version) FROM version_specs
567
- WHERE service_name=(?)""", (service_name,)).fetchall()
568
- if not rows or rows[0][0] is None:
569
- return None
570
- return rows[0][0]
814
+ assert _SQLALCHEMY_ENGINE is not None
815
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
816
+ result = session.execute(
817
+ sqlalchemy.select(sqlalchemy.func.max(
818
+ version_specs_table.c.version)).where(
819
+ version_specs_table.c.service_name ==
820
+ service_name)).fetchone()
821
+ return result[0] if result else None
571
822
 
572
823
 
824
+ @init_db
573
825
  def get_service_controller_port(service_name: str) -> int:
574
826
  """Gets the controller port of a service."""
575
- with db_utils.safe_cursor(_DB_PATH) as cursor:
576
- cursor.execute('SELECT controller_port FROM services WHERE name = ?',
577
- (service_name,))
578
- row = cursor.fetchone()
579
- if row is None:
827
+ assert _SQLALCHEMY_ENGINE is not None
828
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
829
+ result = session.execute(
830
+ sqlalchemy.select(services_table.c.controller_port).where(
831
+ services_table.c.name == service_name)).fetchone()
832
+ if result is None:
580
833
  raise ValueError(f'Service {service_name} does not exist.')
581
- return row[0]
834
+ return result[0]
582
835
 
583
836
 
837
+ @init_db
584
838
  def get_service_load_balancer_port(service_name: str) -> int:
585
839
  """Gets the load balancer port of a service."""
586
- with db_utils.safe_cursor(_DB_PATH) as cursor:
587
- cursor.execute('SELECT load_balancer_port FROM services WHERE name = ?',
588
- (service_name,))
589
- row = cursor.fetchone()
590
- if row is None:
840
+ assert _SQLALCHEMY_ENGINE is not None
841
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
842
+ result = session.execute(
843
+ sqlalchemy.select(services_table.c.load_balancer_port).where(
844
+ services_table.c.name == service_name)).fetchone()
845
+ if result is None:
591
846
  raise ValueError(f'Service {service_name} does not exist.')
592
- return row[0]
847
+ return result[0]
848
+
849
+
850
+ @init_db
851
+ def get_ha_recovery_script(service_name: str) -> Optional[str]:
852
+ """Gets the HA recovery script for a service."""
853
+ assert _SQLALCHEMY_ENGINE is not None
854
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
855
+ result = session.execute(
856
+ sqlalchemy.select(serve_ha_recovery_script_table.c.script).where(
857
+ serve_ha_recovery_script_table.c.service_name ==
858
+ service_name)).fetchone()
859
+ return result[0] if result else None
860
+
861
+
862
+ @init_db
863
+ def set_ha_recovery_script(service_name: str, script: str) -> None:
864
+ """Sets the HA recovery script for a service."""
865
+ assert _SQLALCHEMY_ENGINE is not None
866
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
867
+ if (_SQLALCHEMY_ENGINE.dialect.name ==
868
+ db_utils.SQLAlchemyDialect.SQLITE.value):
869
+ insert_func = sqlite.insert
870
+ elif (_SQLALCHEMY_ENGINE.dialect.name ==
871
+ db_utils.SQLAlchemyDialect.POSTGRESQL.value):
872
+ insert_func = postgresql.insert
873
+ else:
874
+ raise ValueError('Unsupported database dialect')
875
+
876
+ insert_stmt = insert_func(serve_ha_recovery_script_table).values(
877
+ service_name=service_name, script=script)
878
+
879
+ insert_stmt = insert_stmt.on_conflict_do_update(
880
+ index_elements=['service_name'],
881
+ set_={'script': insert_stmt.excluded.script})
882
+
883
+ session.execute(insert_stmt)
884
+ session.commit()
885
+
886
+
887
+ @init_db
888
+ def remove_ha_recovery_script(service_name: str) -> None:
889
+ """Removes the HA recovery script for a service."""
890
+ assert _SQLALCHEMY_ENGINE is not None
891
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
892
+ session.execute(
893
+ sqlalchemy.delete(serve_ha_recovery_script_table).where(
894
+ serve_ha_recovery_script_table.c.service_name == service_name))
895
+ session.commit()