skypilot-nightly 1.0.0.dev20250502__py3-none-any.whl → 1.0.0.dev20251203__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +22 -6
- sky/adaptors/aws.py +81 -16
- sky/adaptors/common.py +25 -2
- sky/adaptors/coreweave.py +278 -0
- sky/adaptors/do.py +8 -2
- sky/adaptors/gcp.py +11 -0
- sky/adaptors/hyperbolic.py +8 -0
- sky/adaptors/ibm.py +5 -2
- sky/adaptors/kubernetes.py +149 -18
- sky/adaptors/nebius.py +173 -30
- sky/adaptors/primeintellect.py +1 -0
- sky/adaptors/runpod.py +68 -0
- sky/adaptors/seeweb.py +183 -0
- sky/adaptors/shadeform.py +89 -0
- sky/admin_policy.py +187 -4
- sky/authentication.py +179 -225
- sky/backends/__init__.py +4 -2
- sky/backends/backend.py +22 -9
- sky/backends/backend_utils.py +1323 -397
- sky/backends/cloud_vm_ray_backend.py +1749 -1029
- sky/backends/docker_utils.py +1 -1
- sky/backends/local_docker_backend.py +11 -6
- sky/backends/task_codegen.py +633 -0
- sky/backends/wheel_utils.py +55 -9
- sky/{clouds/service_catalog → catalog}/__init__.py +21 -19
- sky/{clouds/service_catalog → catalog}/aws_catalog.py +27 -8
- sky/{clouds/service_catalog → catalog}/azure_catalog.py +10 -7
- sky/{clouds/service_catalog → catalog}/common.py +90 -49
- sky/{clouds/service_catalog → catalog}/cudo_catalog.py +8 -5
- sky/{clouds/service_catalog → catalog}/data_fetchers/analyze.py +1 -1
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_aws.py +116 -80
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_cudo.py +38 -38
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_gcp.py +70 -16
- sky/catalog/data_fetchers/fetch_hyperbolic.py +136 -0
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_lambda_cloud.py +1 -0
- sky/catalog/data_fetchers/fetch_nebius.py +338 -0
- sky/catalog/data_fetchers/fetch_runpod.py +698 -0
- sky/catalog/data_fetchers/fetch_seeweb.py +329 -0
- sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vast.py +1 -1
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vsphere.py +1 -1
- sky/{clouds/service_catalog → catalog}/do_catalog.py +5 -2
- sky/{clouds/service_catalog → catalog}/fluidstack_catalog.py +6 -3
- sky/{clouds/service_catalog → catalog}/gcp_catalog.py +41 -15
- sky/catalog/hyperbolic_catalog.py +136 -0
- sky/{clouds/service_catalog → catalog}/ibm_catalog.py +9 -6
- sky/{clouds/service_catalog → catalog}/kubernetes_catalog.py +36 -24
- sky/{clouds/service_catalog → catalog}/lambda_catalog.py +9 -6
- sky/{clouds/service_catalog → catalog}/nebius_catalog.py +9 -7
- sky/{clouds/service_catalog → catalog}/oci_catalog.py +9 -6
- sky/{clouds/service_catalog → catalog}/paperspace_catalog.py +5 -2
- sky/catalog/primeintellect_catalog.py +95 -0
- sky/{clouds/service_catalog → catalog}/runpod_catalog.py +11 -4
- sky/{clouds/service_catalog → catalog}/scp_catalog.py +9 -6
- sky/catalog/seeweb_catalog.py +184 -0
- sky/catalog/shadeform_catalog.py +165 -0
- sky/catalog/ssh_catalog.py +167 -0
- sky/{clouds/service_catalog → catalog}/vast_catalog.py +6 -3
- sky/{clouds/service_catalog → catalog}/vsphere_catalog.py +5 -2
- sky/check.py +533 -185
- sky/cli.py +5 -5975
- sky/client/{cli.py → cli/command.py} +2591 -1956
- sky/client/cli/deprecation_utils.py +99 -0
- sky/client/cli/flags.py +359 -0
- sky/client/cli/table_utils.py +322 -0
- sky/client/cli/utils.py +79 -0
- sky/client/common.py +78 -32
- sky/client/oauth.py +82 -0
- sky/client/sdk.py +1219 -319
- sky/client/sdk_async.py +827 -0
- sky/client/service_account_auth.py +47 -0
- sky/cloud_stores.py +82 -3
- sky/clouds/__init__.py +13 -0
- sky/clouds/aws.py +564 -164
- sky/clouds/azure.py +105 -83
- sky/clouds/cloud.py +140 -40
- sky/clouds/cudo.py +68 -50
- sky/clouds/do.py +66 -48
- sky/clouds/fluidstack.py +63 -44
- sky/clouds/gcp.py +339 -110
- sky/clouds/hyperbolic.py +293 -0
- sky/clouds/ibm.py +70 -49
- sky/clouds/kubernetes.py +570 -162
- sky/clouds/lambda_cloud.py +74 -54
- sky/clouds/nebius.py +210 -81
- sky/clouds/oci.py +88 -66
- sky/clouds/paperspace.py +61 -44
- sky/clouds/primeintellect.py +317 -0
- sky/clouds/runpod.py +164 -74
- sky/clouds/scp.py +89 -86
- sky/clouds/seeweb.py +477 -0
- sky/clouds/shadeform.py +400 -0
- sky/clouds/ssh.py +263 -0
- sky/clouds/utils/aws_utils.py +10 -4
- sky/clouds/utils/gcp_utils.py +87 -11
- sky/clouds/utils/oci_utils.py +38 -14
- sky/clouds/utils/scp_utils.py +231 -167
- sky/clouds/vast.py +99 -77
- sky/clouds/vsphere.py +51 -40
- sky/core.py +375 -173
- sky/dag.py +15 -0
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/_next/static/96_E2yl3QAiIJGOYCkSpB/_buildManifest.js +1 -0
- sky/dashboard/out/_next/static/chunks/1141-e6aa9ab418717c59.js +11 -0
- sky/dashboard/out/_next/static/chunks/1272-1ef0bf0237faccdb.js +1 -0
- sky/dashboard/out/_next/static/chunks/1871-7e202677c42f43fe.js +6 -0
- sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +1 -0
- sky/dashboard/out/_next/static/chunks/2350.fab69e61bac57b23.js +1 -0
- sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +15 -0
- sky/dashboard/out/_next/static/chunks/2755.edd818326d489a1d.js +26 -0
- sky/dashboard/out/_next/static/chunks/3294.20a8540fe697d5ee.js +1 -0
- sky/dashboard/out/_next/static/chunks/3785.7e245f318f9d1121.js +1 -0
- sky/dashboard/out/_next/static/chunks/3800-7b45f9fbb6308557.js +1 -0
- sky/dashboard/out/_next/static/chunks/3850-ff4a9a69d978632b.js +1 -0
- sky/dashboard/out/_next/static/chunks/3937.210053269f121201.js +1 -0
- sky/dashboard/out/_next/static/chunks/4725.172ede95d1b21022.js +1 -0
- sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +15 -0
- sky/dashboard/out/_next/static/chunks/5739-d67458fcb1386c92.js +8 -0
- sky/dashboard/out/_next/static/chunks/6130-2be46d70a38f1e82.js +1 -0
- sky/dashboard/out/_next/static/chunks/616-3d59f75e2ccf9321.js +39 -0
- sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +13 -0
- sky/dashboard/out/_next/static/chunks/6856-8f27d1c10c98def8.js +1 -0
- sky/dashboard/out/_next/static/chunks/6989-01359c57e018caa4.js +1 -0
- sky/dashboard/out/_next/static/chunks/6990-9146207c4567fdfd.js +1 -0
- sky/dashboard/out/_next/static/chunks/7359-c8d04e06886000b3.js +30 -0
- sky/dashboard/out/_next/static/chunks/7411-b15471acd2cba716.js +41 -0
- sky/dashboard/out/_next/static/chunks/7615-019513abc55b3b47.js +1 -0
- sky/dashboard/out/_next/static/chunks/8640.5b9475a2d18c5416.js +16 -0
- sky/dashboard/out/_next/static/chunks/8969-452f9d5cbdd2dc73.js +1 -0
- sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +6 -0
- sky/dashboard/out/_next/static/chunks/9353-cff34f7e773b2e2b.js +1 -0
- sky/dashboard/out/_next/static/chunks/9360.a536cf6b1fa42355.js +31 -0
- sky/dashboard/out/_next/static/chunks/9847.3aaca6bb33455140.js +30 -0
- sky/dashboard/out/_next/static/chunks/fd9d1056-86323a29a8f7e46a.js +1 -0
- sky/dashboard/out/_next/static/chunks/framework-cf60a09ccd051a10.js +33 -0
- sky/dashboard/out/_next/static/chunks/main-app-587214043926b3cc.js +1 -0
- sky/dashboard/out/_next/static/chunks/main-f15ccb73239a3bf1.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/_app-bde01e4a2beec258.js +34 -0
- sky/dashboard/out/_next/static/chunks/pages/_error-c66a4e8afc46f17b.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-792db96d918c98c9.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-abfcac9c137aa543.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters-ee39056f9851a3ff.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/config-dfb9bf07b13045f4.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/index-444f1804401f04ea.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/infra/[context]-c0b5935149902e6f.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/infra-aed0ea19df7cf961.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-d66997e2bfc837cf.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-9faf940b253e3e06.js +21 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs-2072b48b617989c9.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/users-f42674164aa73423.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/volumes-b84b948ff357c43e.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspace/new-3f88a1c7e86a3f86.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-84a40f8c7c627fe4.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspaces-531b2f8c4bf89f82.js +1 -0
- sky/dashboard/out/_next/static/chunks/webpack-64e05f17bf2cf8ce.js +1 -0
- sky/dashboard/out/_next/static/css/0748ce22df867032.css +3 -0
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/config.html +1 -0
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra/[context].html +1 -0
- sky/dashboard/out/infra.html +1 -0
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs/pools/[pool].html +1 -0
- sky/dashboard/out/jobs.html +1 -1
- sky/dashboard/out/users.html +1 -0
- sky/dashboard/out/volumes.html +1 -0
- sky/dashboard/out/workspace/new.html +1 -0
- sky/dashboard/out/workspaces/[name].html +1 -0
- sky/dashboard/out/workspaces.html +1 -0
- sky/data/data_utils.py +137 -1
- sky/data/mounting_utils.py +269 -84
- sky/data/storage.py +1460 -1807
- sky/data/storage_utils.py +43 -57
- sky/exceptions.py +126 -2
- sky/execution.py +216 -63
- sky/global_user_state.py +2390 -586
- sky/jobs/__init__.py +7 -0
- sky/jobs/client/sdk.py +300 -58
- sky/jobs/client/sdk_async.py +161 -0
- sky/jobs/constants.py +15 -8
- sky/jobs/controller.py +848 -275
- sky/jobs/file_content_utils.py +128 -0
- sky/jobs/log_gc.py +193 -0
- sky/jobs/recovery_strategy.py +402 -152
- sky/jobs/scheduler.py +314 -189
- sky/jobs/server/core.py +836 -255
- sky/jobs/server/server.py +156 -115
- sky/jobs/server/utils.py +136 -0
- sky/jobs/state.py +2109 -706
- sky/jobs/utils.py +1306 -215
- sky/logs/__init__.py +21 -0
- sky/logs/agent.py +108 -0
- sky/logs/aws.py +243 -0
- sky/logs/gcp.py +91 -0
- sky/metrics/__init__.py +0 -0
- sky/metrics/utils.py +453 -0
- sky/models.py +78 -1
- sky/optimizer.py +164 -70
- sky/provision/__init__.py +90 -4
- sky/provision/aws/config.py +147 -26
- sky/provision/aws/instance.py +136 -50
- sky/provision/azure/instance.py +11 -6
- sky/provision/common.py +13 -1
- sky/provision/cudo/cudo_machine_type.py +1 -1
- sky/provision/cudo/cudo_utils.py +14 -8
- sky/provision/cudo/cudo_wrapper.py +72 -71
- sky/provision/cudo/instance.py +10 -6
- sky/provision/do/instance.py +10 -6
- sky/provision/do/utils.py +4 -3
- sky/provision/docker_utils.py +140 -33
- sky/provision/fluidstack/instance.py +13 -8
- sky/provision/gcp/__init__.py +1 -0
- sky/provision/gcp/config.py +301 -19
- sky/provision/gcp/constants.py +218 -0
- sky/provision/gcp/instance.py +36 -8
- sky/provision/gcp/instance_utils.py +18 -4
- sky/provision/gcp/volume_utils.py +247 -0
- sky/provision/hyperbolic/__init__.py +12 -0
- sky/provision/hyperbolic/config.py +10 -0
- sky/provision/hyperbolic/instance.py +437 -0
- sky/provision/hyperbolic/utils.py +373 -0
- sky/provision/instance_setup.py +101 -20
- sky/provision/kubernetes/__init__.py +5 -0
- sky/provision/kubernetes/config.py +9 -52
- sky/provision/kubernetes/constants.py +17 -0
- sky/provision/kubernetes/instance.py +919 -280
- sky/provision/kubernetes/manifests/fusermount-server-daemonset.yaml +1 -2
- sky/provision/kubernetes/network.py +27 -17
- sky/provision/kubernetes/network_utils.py +44 -43
- sky/provision/kubernetes/utils.py +1221 -534
- sky/provision/kubernetes/volume.py +343 -0
- sky/provision/lambda_cloud/instance.py +22 -16
- sky/provision/nebius/constants.py +50 -0
- sky/provision/nebius/instance.py +19 -6
- sky/provision/nebius/utils.py +237 -137
- sky/provision/oci/instance.py +10 -5
- sky/provision/paperspace/instance.py +10 -7
- sky/provision/paperspace/utils.py +1 -1
- sky/provision/primeintellect/__init__.py +10 -0
- sky/provision/primeintellect/config.py +11 -0
- sky/provision/primeintellect/instance.py +454 -0
- sky/provision/primeintellect/utils.py +398 -0
- sky/provision/provisioner.py +117 -36
- sky/provision/runpod/__init__.py +5 -0
- sky/provision/runpod/instance.py +27 -6
- sky/provision/runpod/utils.py +51 -18
- sky/provision/runpod/volume.py +214 -0
- sky/provision/scp/__init__.py +15 -0
- sky/provision/scp/config.py +93 -0
- sky/provision/scp/instance.py +707 -0
- sky/provision/seeweb/__init__.py +11 -0
- sky/provision/seeweb/config.py +13 -0
- sky/provision/seeweb/instance.py +812 -0
- sky/provision/shadeform/__init__.py +11 -0
- sky/provision/shadeform/config.py +12 -0
- sky/provision/shadeform/instance.py +351 -0
- sky/provision/shadeform/shadeform_utils.py +83 -0
- sky/provision/ssh/__init__.py +18 -0
- sky/provision/vast/instance.py +13 -8
- sky/provision/vast/utils.py +10 -7
- sky/provision/volume.py +164 -0
- sky/provision/vsphere/common/ssl_helper.py +1 -1
- sky/provision/vsphere/common/vapiconnect.py +2 -1
- sky/provision/vsphere/common/vim_utils.py +4 -4
- sky/provision/vsphere/instance.py +15 -10
- sky/provision/vsphere/vsphere_utils.py +17 -20
- sky/py.typed +0 -0
- sky/resources.py +845 -119
- sky/schemas/__init__.py +0 -0
- sky/schemas/api/__init__.py +0 -0
- sky/schemas/api/responses.py +227 -0
- sky/schemas/db/README +4 -0
- sky/schemas/db/env.py +90 -0
- sky/schemas/db/global_user_state/001_initial_schema.py +124 -0
- sky/schemas/db/global_user_state/002_add_workspace_to_cluster_history.py +35 -0
- sky/schemas/db/global_user_state/003_fix_initial_revision.py +61 -0
- sky/schemas/db/global_user_state/004_is_managed.py +34 -0
- sky/schemas/db/global_user_state/005_cluster_event.py +32 -0
- sky/schemas/db/global_user_state/006_provision_log.py +41 -0
- sky/schemas/db/global_user_state/007_cluster_event_request_id.py +34 -0
- sky/schemas/db/global_user_state/008_skylet_ssh_tunnel_metadata.py +34 -0
- sky/schemas/db/global_user_state/009_last_activity_and_launched_at.py +89 -0
- sky/schemas/db/global_user_state/010_save_ssh_key.py +66 -0
- sky/schemas/db/global_user_state/011_is_ephemeral.py +34 -0
- sky/schemas/db/kv_cache/001_initial_schema.py +29 -0
- sky/schemas/db/script.py.mako +28 -0
- sky/schemas/db/serve_state/001_initial_schema.py +67 -0
- sky/schemas/db/serve_state/002_yaml_content.py +34 -0
- sky/schemas/db/skypilot_config/001_initial_schema.py +30 -0
- sky/schemas/db/spot_jobs/001_initial_schema.py +97 -0
- sky/schemas/db/spot_jobs/002_cluster_pool.py +42 -0
- sky/schemas/db/spot_jobs/003_pool_hash.py +34 -0
- sky/schemas/db/spot_jobs/004_job_file_contents.py +42 -0
- sky/schemas/db/spot_jobs/005_logs_gc.py +38 -0
- sky/schemas/db/spot_jobs/006_controller_pid_started_at.py +34 -0
- sky/schemas/db/spot_jobs/007_config_file_content.py +34 -0
- sky/schemas/generated/__init__.py +0 -0
- sky/schemas/generated/autostopv1_pb2.py +36 -0
- sky/schemas/generated/autostopv1_pb2.pyi +43 -0
- sky/schemas/generated/autostopv1_pb2_grpc.py +146 -0
- sky/schemas/generated/jobsv1_pb2.py +86 -0
- sky/schemas/generated/jobsv1_pb2.pyi +254 -0
- sky/schemas/generated/jobsv1_pb2_grpc.py +542 -0
- sky/schemas/generated/managed_jobsv1_pb2.py +76 -0
- sky/schemas/generated/managed_jobsv1_pb2.pyi +278 -0
- sky/schemas/generated/managed_jobsv1_pb2_grpc.py +278 -0
- sky/schemas/generated/servev1_pb2.py +58 -0
- sky/schemas/generated/servev1_pb2.pyi +115 -0
- sky/schemas/generated/servev1_pb2_grpc.py +322 -0
- sky/serve/autoscalers.py +357 -5
- sky/serve/client/impl.py +310 -0
- sky/serve/client/sdk.py +47 -139
- sky/serve/client/sdk_async.py +130 -0
- sky/serve/constants.py +12 -9
- sky/serve/controller.py +68 -17
- sky/serve/load_balancer.py +106 -60
- sky/serve/load_balancing_policies.py +116 -2
- sky/serve/replica_managers.py +434 -249
- sky/serve/serve_rpc_utils.py +179 -0
- sky/serve/serve_state.py +569 -257
- sky/serve/serve_utils.py +775 -265
- sky/serve/server/core.py +66 -711
- sky/serve/server/impl.py +1093 -0
- sky/serve/server/server.py +21 -18
- sky/serve/service.py +192 -89
- sky/serve/service_spec.py +144 -20
- sky/serve/spot_placer.py +3 -0
- sky/server/auth/__init__.py +0 -0
- sky/server/auth/authn.py +50 -0
- sky/server/auth/loopback.py +38 -0
- sky/server/auth/oauth2_proxy.py +202 -0
- sky/server/common.py +478 -182
- sky/server/config.py +85 -23
- sky/server/constants.py +44 -6
- sky/server/daemons.py +295 -0
- sky/server/html/token_page.html +185 -0
- sky/server/metrics.py +160 -0
- sky/server/middleware_utils.py +166 -0
- sky/server/requests/executor.py +558 -138
- sky/server/requests/payloads.py +364 -24
- sky/server/requests/preconditions.py +21 -17
- sky/server/requests/process.py +112 -29
- sky/server/requests/request_names.py +121 -0
- sky/server/requests/requests.py +822 -226
- sky/server/requests/serializers/decoders.py +82 -31
- sky/server/requests/serializers/encoders.py +140 -22
- sky/server/requests/threads.py +117 -0
- sky/server/rest.py +455 -0
- sky/server/server.py +1309 -285
- sky/server/state.py +20 -0
- sky/server/stream_utils.py +327 -61
- sky/server/uvicorn.py +217 -3
- sky/server/versions.py +270 -0
- sky/setup_files/MANIFEST.in +11 -1
- sky/setup_files/alembic.ini +160 -0
- sky/setup_files/dependencies.py +139 -31
- sky/setup_files/setup.py +44 -42
- sky/sky_logging.py +114 -7
- sky/skylet/attempt_skylet.py +106 -24
- sky/skylet/autostop_lib.py +129 -8
- sky/skylet/configs.py +29 -20
- sky/skylet/constants.py +216 -25
- sky/skylet/events.py +101 -21
- sky/skylet/job_lib.py +345 -164
- sky/skylet/log_lib.py +297 -18
- sky/skylet/log_lib.pyi +44 -1
- sky/skylet/providers/ibm/node_provider.py +12 -8
- sky/skylet/providers/ibm/vpc_provider.py +13 -12
- sky/skylet/ray_patches/__init__.py +17 -3
- sky/skylet/ray_patches/autoscaler.py.diff +18 -0
- sky/skylet/ray_patches/cli.py.diff +19 -0
- sky/skylet/ray_patches/command_runner.py.diff +17 -0
- sky/skylet/ray_patches/log_monitor.py.diff +20 -0
- sky/skylet/ray_patches/resource_demand_scheduler.py.diff +32 -0
- sky/skylet/ray_patches/updater.py.diff +18 -0
- sky/skylet/ray_patches/worker.py.diff +41 -0
- sky/skylet/runtime_utils.py +21 -0
- sky/skylet/services.py +568 -0
- sky/skylet/skylet.py +72 -4
- sky/skylet/subprocess_daemon.py +104 -29
- sky/skypilot_config.py +506 -99
- sky/ssh_node_pools/__init__.py +1 -0
- sky/ssh_node_pools/core.py +135 -0
- sky/ssh_node_pools/server.py +233 -0
- sky/task.py +685 -163
- sky/templates/aws-ray.yml.j2 +11 -3
- sky/templates/azure-ray.yml.j2 +2 -1
- sky/templates/cudo-ray.yml.j2 +1 -0
- sky/templates/do-ray.yml.j2 +2 -1
- sky/templates/fluidstack-ray.yml.j2 +1 -0
- sky/templates/gcp-ray.yml.j2 +62 -1
- sky/templates/hyperbolic-ray.yml.j2 +68 -0
- sky/templates/ibm-ray.yml.j2 +2 -1
- sky/templates/jobs-controller.yaml.j2 +27 -24
- sky/templates/kubernetes-loadbalancer.yml.j2 +2 -0
- sky/templates/kubernetes-ray.yml.j2 +611 -50
- sky/templates/lambda-ray.yml.j2 +2 -1
- sky/templates/nebius-ray.yml.j2 +34 -12
- sky/templates/oci-ray.yml.j2 +1 -0
- sky/templates/paperspace-ray.yml.j2 +2 -1
- sky/templates/primeintellect-ray.yml.j2 +72 -0
- sky/templates/runpod-ray.yml.j2 +10 -1
- sky/templates/scp-ray.yml.j2 +4 -50
- sky/templates/seeweb-ray.yml.j2 +171 -0
- sky/templates/shadeform-ray.yml.j2 +73 -0
- sky/templates/sky-serve-controller.yaml.j2 +22 -2
- sky/templates/vast-ray.yml.j2 +1 -0
- sky/templates/vsphere-ray.yml.j2 +1 -0
- sky/templates/websocket_proxy.py +212 -37
- sky/usage/usage_lib.py +31 -15
- sky/users/__init__.py +0 -0
- sky/users/model.conf +15 -0
- sky/users/permission.py +397 -0
- sky/users/rbac.py +121 -0
- sky/users/server.py +720 -0
- sky/users/token_service.py +218 -0
- sky/utils/accelerator_registry.py +35 -5
- sky/utils/admin_policy_utils.py +84 -38
- sky/utils/annotations.py +38 -5
- sky/utils/asyncio_utils.py +78 -0
- sky/utils/atomic.py +1 -1
- sky/utils/auth_utils.py +153 -0
- sky/utils/benchmark_utils.py +60 -0
- sky/utils/cli_utils/status_utils.py +159 -86
- sky/utils/cluster_utils.py +31 -9
- sky/utils/command_runner.py +354 -68
- sky/utils/command_runner.pyi +93 -3
- sky/utils/common.py +35 -8
- sky/utils/common_utils.py +314 -91
- sky/utils/config_utils.py +74 -5
- sky/utils/context.py +403 -0
- sky/utils/context_utils.py +242 -0
- sky/utils/controller_utils.py +383 -89
- sky/utils/dag_utils.py +31 -12
- sky/utils/db/__init__.py +0 -0
- sky/utils/db/db_utils.py +485 -0
- sky/utils/db/kv_cache.py +149 -0
- sky/utils/db/migration_utils.py +137 -0
- sky/utils/directory_utils.py +12 -0
- sky/utils/env_options.py +13 -0
- sky/utils/git.py +567 -0
- sky/utils/git_clone.sh +460 -0
- sky/utils/infra_utils.py +195 -0
- sky/utils/kubernetes/cleanup-tunnel.sh +62 -0
- sky/utils/kubernetes/config_map_utils.py +133 -0
- sky/utils/kubernetes/create_cluster.sh +15 -29
- sky/utils/kubernetes/delete_cluster.sh +10 -7
- sky/utils/kubernetes/deploy_ssh_node_pools.py +1177 -0
- sky/utils/kubernetes/exec_kubeconfig_converter.py +22 -31
- sky/utils/kubernetes/generate_kind_config.py +6 -66
- sky/utils/kubernetes/generate_kubeconfig.sh +4 -1
- sky/utils/kubernetes/gpu_labeler.py +18 -8
- sky/utils/kubernetes/k8s_gpu_labeler_job.yaml +2 -1
- sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml +16 -16
- sky/utils/kubernetes/kubernetes_deploy_utils.py +284 -114
- sky/utils/kubernetes/rsync_helper.sh +11 -3
- sky/utils/kubernetes/ssh-tunnel.sh +379 -0
- sky/utils/kubernetes/ssh_utils.py +221 -0
- sky/utils/kubernetes_enums.py +8 -15
- sky/utils/lock_events.py +94 -0
- sky/utils/locks.py +416 -0
- sky/utils/log_utils.py +82 -107
- sky/utils/perf_utils.py +22 -0
- sky/utils/resource_checker.py +298 -0
- sky/utils/resources_utils.py +249 -32
- sky/utils/rich_utils.py +217 -39
- sky/utils/schemas.py +955 -160
- sky/utils/serialize_utils.py +16 -0
- sky/utils/status_lib.py +10 -0
- sky/utils/subprocess_utils.py +29 -15
- sky/utils/tempstore.py +70 -0
- sky/utils/thread_utils.py +91 -0
- sky/utils/timeline.py +26 -53
- sky/utils/ux_utils.py +84 -15
- sky/utils/validator.py +11 -1
- sky/utils/volume.py +165 -0
- sky/utils/yaml_utils.py +111 -0
- sky/volumes/__init__.py +13 -0
- sky/volumes/client/__init__.py +0 -0
- sky/volumes/client/sdk.py +150 -0
- sky/volumes/server/__init__.py +0 -0
- sky/volumes/server/core.py +270 -0
- sky/volumes/server/server.py +124 -0
- sky/volumes/volume.py +215 -0
- sky/workspaces/__init__.py +0 -0
- sky/workspaces/core.py +655 -0
- sky/workspaces/server.py +101 -0
- sky/workspaces/utils.py +56 -0
- sky_templates/README.md +3 -0
- sky_templates/__init__.py +3 -0
- sky_templates/ray/__init__.py +0 -0
- sky_templates/ray/start_cluster +183 -0
- sky_templates/ray/stop_cluster +75 -0
- skypilot_nightly-1.0.0.dev20251203.dist-info/METADATA +676 -0
- skypilot_nightly-1.0.0.dev20251203.dist-info/RECORD +611 -0
- {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/WHEEL +1 -1
- skypilot_nightly-1.0.0.dev20251203.dist-info/top_level.txt +2 -0
- sky/benchmark/benchmark_state.py +0 -256
- sky/benchmark/benchmark_utils.py +0 -641
- sky/clouds/service_catalog/constants.py +0 -7
- sky/dashboard/out/_next/static/GWvVBSCS7FmUiVmjaL1a7/_buildManifest.js +0 -1
- sky/dashboard/out/_next/static/chunks/236-2db3ee3fba33dd9e.js +0 -6
- sky/dashboard/out/_next/static/chunks/312-c3c8845990db8ffc.js +0 -15
- sky/dashboard/out/_next/static/chunks/37-0a572fe0dbb89c4d.js +0 -6
- sky/dashboard/out/_next/static/chunks/678-206dddca808e6d16.js +0 -59
- sky/dashboard/out/_next/static/chunks/845-9e60713e0c441abc.js +0 -1
- sky/dashboard/out/_next/static/chunks/979-7bf73a4c7cea0f5c.js +0 -1
- sky/dashboard/out/_next/static/chunks/fd9d1056-2821b0f0cabcd8bd.js +0 -1
- sky/dashboard/out/_next/static/chunks/framework-87d061ee6ed71b28.js +0 -33
- sky/dashboard/out/_next/static/chunks/main-app-241eb28595532291.js +0 -1
- sky/dashboard/out/_next/static/chunks/main-e0e2335212e72357.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/_app-e6b013bc3f77ad60.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/_error-1be831200e60c5c0.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-6ac338bc2239cb45.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-f383db7389368ea7.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters-a93b93e10b8b074e.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/index-f9f039532ca8cbc4.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-1c519e1afc523dc9.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs-a75029b67aab6a2e.js +0 -1
- sky/dashboard/out/_next/static/chunks/webpack-830f59b8404e96b8.js +0 -1
- sky/dashboard/out/_next/static/css/c6933bbb2ce7f4dd.css +0 -3
- sky/jobs/dashboard/dashboard.py +0 -223
- sky/jobs/dashboard/static/favicon.ico +0 -0
- sky/jobs/dashboard/templates/index.html +0 -831
- sky/jobs/server/dashboard_utils.py +0 -69
- sky/skylet/providers/scp/__init__.py +0 -2
- sky/skylet/providers/scp/config.py +0 -149
- sky/skylet/providers/scp/node_provider.py +0 -578
- sky/templates/kubernetes-ssh-jump.yml.j2 +0 -94
- sky/utils/db_utils.py +0 -100
- sky/utils/kubernetes/deploy_remote_cluster.sh +0 -308
- sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +0 -191
- skypilot_nightly-1.0.0.dev20250502.dist-info/METADATA +0 -361
- skypilot_nightly-1.0.0.dev20250502.dist-info/RECORD +0 -396
- skypilot_nightly-1.0.0.dev20250502.dist-info/top_level.txt +0 -1
- /sky/{clouds/service_catalog → catalog}/config.py +0 -0
- /sky/{benchmark → catalog/data_fetchers}/__init__.py +0 -0
- /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_azure.py +0 -0
- /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_fluidstack.py +0 -0
- /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_ibm.py +0 -0
- /sky/{clouds/service_catalog/data_fetchers → client/cli}/__init__.py +0 -0
- /sky/dashboard/out/_next/static/{GWvVBSCS7FmUiVmjaL1a7 → 96_E2yl3QAiIJGOYCkSpB}/_ssgManifest.js +0 -0
- {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/licenses/LICENSE +0 -0
sky/serve/serve_state.py
CHANGED
|
@@ -1,89 +1,163 @@
|
|
|
1
1
|
"""The database for services information."""
|
|
2
2
|
import collections
|
|
3
3
|
import enum
|
|
4
|
+
import functools
|
|
4
5
|
import json
|
|
5
|
-
import pathlib
|
|
6
6
|
import pickle
|
|
7
|
-
import
|
|
7
|
+
import threading
|
|
8
8
|
import typing
|
|
9
|
-
from typing import Any, Dict, List, Optional
|
|
9
|
+
from typing import Any, Dict, List, Optional
|
|
10
|
+
import uuid
|
|
10
11
|
|
|
11
12
|
import colorama
|
|
13
|
+
import sqlalchemy
|
|
14
|
+
from sqlalchemy import exc as sqlalchemy_exc
|
|
15
|
+
from sqlalchemy import orm
|
|
16
|
+
from sqlalchemy.dialects import postgresql
|
|
17
|
+
from sqlalchemy.dialects import sqlite
|
|
18
|
+
from sqlalchemy.ext import declarative
|
|
12
19
|
|
|
13
20
|
from sky.serve import constants
|
|
14
|
-
from sky.utils import
|
|
21
|
+
from sky.utils import common_utils
|
|
22
|
+
from sky.utils.db import db_utils
|
|
23
|
+
from sky.utils.db import migration_utils
|
|
15
24
|
|
|
16
25
|
if typing.TYPE_CHECKING:
|
|
26
|
+
from sqlalchemy.engine import row
|
|
27
|
+
|
|
17
28
|
from sky.serve import replica_managers
|
|
18
29
|
from sky.serve import service_spec
|
|
19
30
|
|
|
31
|
+
_SQLALCHEMY_ENGINE: Optional[sqlalchemy.engine.Engine] = None
|
|
32
|
+
_SQLALCHEMY_ENGINE_LOCK = threading.Lock()
|
|
33
|
+
|
|
34
|
+
Base = declarative.declarative_base()
|
|
35
|
+
|
|
36
|
+
# === Database schema ===
|
|
37
|
+
services_table = sqlalchemy.Table(
|
|
38
|
+
'services',
|
|
39
|
+
Base.metadata,
|
|
40
|
+
sqlalchemy.Column('name', sqlalchemy.Text, primary_key=True),
|
|
41
|
+
sqlalchemy.Column('controller_job_id',
|
|
42
|
+
sqlalchemy.Integer,
|
|
43
|
+
server_default=None),
|
|
44
|
+
sqlalchemy.Column('controller_port',
|
|
45
|
+
sqlalchemy.Integer,
|
|
46
|
+
server_default=None),
|
|
47
|
+
sqlalchemy.Column('load_balancer_port',
|
|
48
|
+
sqlalchemy.Integer,
|
|
49
|
+
server_default=None),
|
|
50
|
+
sqlalchemy.Column('status', sqlalchemy.Text),
|
|
51
|
+
sqlalchemy.Column('uptime', sqlalchemy.Integer, server_default=None),
|
|
52
|
+
sqlalchemy.Column('policy', sqlalchemy.Text, server_default=None),
|
|
53
|
+
sqlalchemy.Column('auto_restart', sqlalchemy.Integer, server_default=None),
|
|
54
|
+
sqlalchemy.Column('requested_resources',
|
|
55
|
+
sqlalchemy.LargeBinary,
|
|
56
|
+
server_default=None),
|
|
57
|
+
sqlalchemy.Column('requested_resources_str', sqlalchemy.Text),
|
|
58
|
+
sqlalchemy.Column('current_version',
|
|
59
|
+
sqlalchemy.Integer,
|
|
60
|
+
server_default=str(constants.INITIAL_VERSION)),
|
|
61
|
+
sqlalchemy.Column('active_versions',
|
|
62
|
+
sqlalchemy.Text,
|
|
63
|
+
server_default=json.dumps([])),
|
|
64
|
+
sqlalchemy.Column('load_balancing_policy',
|
|
65
|
+
sqlalchemy.Text,
|
|
66
|
+
server_default=None),
|
|
67
|
+
sqlalchemy.Column('tls_encrypted', sqlalchemy.Integer, server_default='0'),
|
|
68
|
+
sqlalchemy.Column('pool', sqlalchemy.Integer, server_default='0'),
|
|
69
|
+
sqlalchemy.Column('controller_pid', sqlalchemy.Integer,
|
|
70
|
+
server_default=None),
|
|
71
|
+
sqlalchemy.Column('hash', sqlalchemy.Text, server_default=None),
|
|
72
|
+
sqlalchemy.Column('entrypoint', sqlalchemy.Text, server_default=None),
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
replicas_table = sqlalchemy.Table(
|
|
76
|
+
'replicas',
|
|
77
|
+
Base.metadata,
|
|
78
|
+
sqlalchemy.Column('service_name', sqlalchemy.Text, primary_key=True),
|
|
79
|
+
sqlalchemy.Column('replica_id', sqlalchemy.Integer, primary_key=True),
|
|
80
|
+
sqlalchemy.Column('replica_info', sqlalchemy.LargeBinary),
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
version_specs_table = sqlalchemy.Table(
|
|
84
|
+
'version_specs',
|
|
85
|
+
Base.metadata,
|
|
86
|
+
sqlalchemy.Column('service_name', sqlalchemy.Text, primary_key=True),
|
|
87
|
+
sqlalchemy.Column('version', sqlalchemy.Integer, primary_key=True),
|
|
88
|
+
sqlalchemy.Column('spec', sqlalchemy.LargeBinary),
|
|
89
|
+
sqlalchemy.Column('yaml_content', sqlalchemy.Text, server_default=None),
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
serve_ha_recovery_script_table = sqlalchemy.Table(
|
|
93
|
+
'serve_ha_recovery_script',
|
|
94
|
+
Base.metadata,
|
|
95
|
+
sqlalchemy.Column('service_name', sqlalchemy.Text, primary_key=True),
|
|
96
|
+
sqlalchemy.Column('script', sqlalchemy.Text),
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def create_table(engine: sqlalchemy.engine.Engine):
|
|
101
|
+
"""Creates the service and replica tables if they do not exist."""
|
|
20
102
|
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
103
|
+
# Enable WAL mode to avoid locking issues.
|
|
104
|
+
# See: issue #3863, #1441 and PR #1509
|
|
105
|
+
# https://github.com/microsoft/WSL/issues/2395
|
|
106
|
+
# TODO(romilb): We do not enable WAL for WSL because of known issue in WSL.
|
|
107
|
+
# This may cause the database locked problem from WSL issue #1441.
|
|
108
|
+
if (engine.dialect.name == db_utils.SQLAlchemyDialect.SQLITE.value and
|
|
109
|
+
not common_utils.is_wsl()):
|
|
110
|
+
try:
|
|
111
|
+
with orm.Session(engine) as session:
|
|
112
|
+
session.execute(sqlalchemy.text('PRAGMA journal_mode=WAL'))
|
|
113
|
+
session.commit()
|
|
114
|
+
except sqlalchemy_exc.OperationalError as e:
|
|
115
|
+
if 'database is locked' not in str(e):
|
|
116
|
+
raise
|
|
117
|
+
# If the database is locked, it is OK to continue, as the WAL mode
|
|
118
|
+
# is not critical and is likely to be enabled by other processes.
|
|
29
119
|
|
|
120
|
+
migration_utils.safe_alembic_upgrade(engine, migration_utils.SERVE_DB_NAME,
|
|
121
|
+
migration_utils.SERVE_VERSION)
|
|
30
122
|
|
|
31
|
-
_DB_PATH: str = _get_db_path()
|
|
32
123
|
|
|
124
|
+
def initialize_and_get_db() -> sqlalchemy.engine.Engine:
|
|
125
|
+
global _SQLALCHEMY_ENGINE
|
|
33
126
|
|
|
34
|
-
|
|
35
|
-
|
|
127
|
+
if _SQLALCHEMY_ENGINE is not None:
|
|
128
|
+
return _SQLALCHEMY_ENGINE
|
|
129
|
+
|
|
130
|
+
with _SQLALCHEMY_ENGINE_LOCK:
|
|
131
|
+
if _SQLALCHEMY_ENGINE is not None:
|
|
132
|
+
return _SQLALCHEMY_ENGINE
|
|
133
|
+
# get an engine to the db
|
|
134
|
+
engine = db_utils.get_engine('serve/services')
|
|
135
|
+
|
|
136
|
+
# run migrations if needed
|
|
137
|
+
create_table(engine)
|
|
138
|
+
|
|
139
|
+
# return engine
|
|
140
|
+
_SQLALCHEMY_ENGINE = engine
|
|
141
|
+
return _SQLALCHEMY_ENGINE
|
|
36
142
|
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
cursor.execute("""\
|
|
56
|
-
CREATE TABLE IF NOT EXISTS version_specs (
|
|
57
|
-
version INTEGER,
|
|
58
|
-
service_name TEXT,
|
|
59
|
-
spec BLOB,
|
|
60
|
-
PRIMARY KEY (service_name, version))""")
|
|
61
|
-
conn.commit()
|
|
62
|
-
|
|
63
|
-
# Backward compatibility.
|
|
64
|
-
db_utils.add_column_to_table(cursor, conn, 'services',
|
|
65
|
-
'requested_resources_str', 'TEXT')
|
|
66
|
-
# Deprecated: switched to `active_versions` below for the version
|
|
67
|
-
# considered active by the load balancer. The
|
|
68
|
-
# authscaler/replica_manager version can be found in the
|
|
69
|
-
# version_specs table.
|
|
70
|
-
db_utils.add_column_to_table(
|
|
71
|
-
cursor, conn, 'services', 'current_version',
|
|
72
|
-
f'INTEGER DEFAULT {constants.INITIAL_VERSION}')
|
|
73
|
-
# The versions that is activated for the service. This is a list
|
|
74
|
-
# of integers in json format.
|
|
75
|
-
db_utils.add_column_to_table(cursor, conn, 'services', 'active_versions',
|
|
76
|
-
f'TEXT DEFAULT {json.dumps([])!r}')
|
|
77
|
-
db_utils.add_column_to_table(cursor, conn, 'services',
|
|
78
|
-
'load_balancing_policy', 'TEXT DEFAULT NULL')
|
|
79
|
-
# Whether the service's load balancer is encrypted with TLS.
|
|
80
|
-
db_utils.add_column_to_table(cursor, conn, 'services', 'tls_encrypted',
|
|
81
|
-
'INTEGER DEFAULT 0')
|
|
82
|
-
conn.commit()
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
db_utils.SQLiteConn(_DB_PATH, create_table)
|
|
86
|
-
_UNIQUE_CONSTRAINT_FAILED_ERROR_MSG = 'UNIQUE constraint failed: services.name'
|
|
143
|
+
|
|
144
|
+
def init_db(func):
|
|
145
|
+
"""Initialize the database."""
|
|
146
|
+
|
|
147
|
+
@functools.wraps(func)
|
|
148
|
+
def wrapper(*args, **kwargs):
|
|
149
|
+
initialize_and_get_db()
|
|
150
|
+
return func(*args, **kwargs)
|
|
151
|
+
|
|
152
|
+
return wrapper
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
_UNIQUE_CONSTRAINT_FAILED_ERROR_MSGS = [
|
|
156
|
+
# sqlite
|
|
157
|
+
'UNIQUE constraint failed: services.name',
|
|
158
|
+
# postgres
|
|
159
|
+
'duplicate key value violates unique constraint "services_pkey"',
|
|
160
|
+
]
|
|
87
161
|
|
|
88
162
|
|
|
89
163
|
# === Statuses ===
|
|
@@ -247,153 +321,261 @@ _SERVICE_STATUS_TO_COLOR = {
|
|
|
247
321
|
}
|
|
248
322
|
|
|
249
323
|
|
|
324
|
+
@init_db
|
|
250
325
|
def add_service(name: str, controller_job_id: int, policy: str,
|
|
251
326
|
requested_resources_str: str, load_balancing_policy: str,
|
|
252
|
-
status: ServiceStatus, tls_encrypted: bool
|
|
327
|
+
status: ServiceStatus, tls_encrypted: bool, pool: bool,
|
|
328
|
+
controller_pid: int, entrypoint: str) -> bool:
|
|
253
329
|
"""Add a service in the database.
|
|
254
330
|
|
|
255
331
|
Returns:
|
|
256
332
|
True if the service is added successfully, False if the service already
|
|
257
333
|
exists.
|
|
258
334
|
"""
|
|
335
|
+
assert _SQLALCHEMY_ENGINE is not None
|
|
259
336
|
try:
|
|
260
|
-
with
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
337
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
338
|
+
if (_SQLALCHEMY_ENGINE.dialect.name ==
|
|
339
|
+
db_utils.SQLAlchemyDialect.SQLITE.value):
|
|
340
|
+
insert_func = sqlite.insert
|
|
341
|
+
elif (_SQLALCHEMY_ENGINE.dialect.name ==
|
|
342
|
+
db_utils.SQLAlchemyDialect.POSTGRESQL.value):
|
|
343
|
+
insert_func = postgresql.insert
|
|
344
|
+
else:
|
|
345
|
+
raise ValueError('Unsupported database dialect')
|
|
346
|
+
|
|
347
|
+
insert_stmt = insert_func(services_table).values(
|
|
348
|
+
name=name,
|
|
349
|
+
controller_job_id=controller_job_id,
|
|
350
|
+
status=status.value,
|
|
351
|
+
policy=policy,
|
|
352
|
+
requested_resources_str=requested_resources_str,
|
|
353
|
+
load_balancing_policy=load_balancing_policy,
|
|
354
|
+
tls_encrypted=int(tls_encrypted),
|
|
355
|
+
pool=int(pool),
|
|
356
|
+
controller_pid=controller_pid,
|
|
357
|
+
hash=str(uuid.uuid4()),
|
|
358
|
+
entrypoint=entrypoint)
|
|
359
|
+
session.execute(insert_stmt)
|
|
360
|
+
session.commit()
|
|
361
|
+
|
|
362
|
+
except sqlalchemy_exc.IntegrityError as e:
|
|
363
|
+
for msg in _UNIQUE_CONSTRAINT_FAILED_ERROR_MSGS:
|
|
364
|
+
if msg in str(e):
|
|
365
|
+
return False
|
|
366
|
+
raise RuntimeError('Unexpected database error') from e
|
|
275
367
|
return True
|
|
276
368
|
|
|
277
369
|
|
|
370
|
+
@init_db
|
|
371
|
+
def update_service_controller_pid(service_name: str,
|
|
372
|
+
controller_pid: int) -> None:
|
|
373
|
+
"""Updates the controller pid of a service.
|
|
374
|
+
|
|
375
|
+
This is used to update the controller pid of a service on ha recovery.
|
|
376
|
+
"""
|
|
377
|
+
assert _SQLALCHEMY_ENGINE is not None
|
|
378
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
379
|
+
session.query(services_table).filter(
|
|
380
|
+
services_table.c.name == service_name).update(
|
|
381
|
+
{services_table.c.controller_pid: controller_pid})
|
|
382
|
+
session.commit()
|
|
383
|
+
|
|
384
|
+
|
|
385
|
+
@init_db
|
|
278
386
|
def remove_service(service_name: str) -> None:
|
|
279
387
|
"""Removes a service from the database."""
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
388
|
+
assert _SQLALCHEMY_ENGINE is not None
|
|
389
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
390
|
+
session.execute(
|
|
391
|
+
sqlalchemy.delete(services_table).where(
|
|
392
|
+
services_table.c.name == service_name))
|
|
393
|
+
session.commit()
|
|
283
394
|
|
|
284
395
|
|
|
396
|
+
@init_db
|
|
285
397
|
def set_service_uptime(service_name: str, uptime: int) -> None:
|
|
286
398
|
"""Sets the uptime of a service."""
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
399
|
+
assert _SQLALCHEMY_ENGINE is not None
|
|
400
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
401
|
+
session.query(services_table).filter(
|
|
402
|
+
services_table.c.name == service_name).update(
|
|
403
|
+
{services_table.c.uptime: uptime})
|
|
404
|
+
session.commit()
|
|
292
405
|
|
|
293
406
|
|
|
407
|
+
@init_db
|
|
294
408
|
def set_service_status_and_active_versions(
|
|
295
409
|
service_name: str,
|
|
296
410
|
status: ServiceStatus,
|
|
297
411
|
active_versions: Optional[List[int]] = None) -> None:
|
|
298
412
|
"""Sets the service status."""
|
|
299
|
-
|
|
300
|
-
|
|
413
|
+
assert _SQLALCHEMY_ENGINE is not None
|
|
414
|
+
update_dict = {services_table.c.status: status.value}
|
|
301
415
|
if active_versions is not None:
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
with db_utils.safe_cursor(_DB_PATH) as cursor:
|
|
305
|
-
cursor.execute(
|
|
306
|
-
f"""\
|
|
307
|
-
UPDATE services SET
|
|
308
|
-
{vars_to_set} WHERE name=(?)""", values)
|
|
416
|
+
update_dict[services_table.c.active_versions] = json.dumps(
|
|
417
|
+
active_versions)
|
|
309
418
|
|
|
419
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
420
|
+
session.query(services_table).filter(
|
|
421
|
+
services_table.c.name == service_name).update(update_dict)
|
|
422
|
+
session.commit()
|
|
310
423
|
|
|
424
|
+
|
|
425
|
+
@init_db
|
|
311
426
|
def set_service_controller_port(service_name: str,
|
|
312
427
|
controller_port: int) -> None:
|
|
313
428
|
"""Sets the controller port of a service."""
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
429
|
+
assert _SQLALCHEMY_ENGINE is not None
|
|
430
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
431
|
+
session.query(services_table).filter(
|
|
432
|
+
services_table.c.name == service_name).update(
|
|
433
|
+
{services_table.c.controller_port: controller_port})
|
|
434
|
+
session.commit()
|
|
320
435
|
|
|
321
436
|
|
|
437
|
+
@init_db
|
|
322
438
|
def set_service_load_balancer_port(service_name: str,
|
|
323
439
|
load_balancer_port: int) -> None:
|
|
324
440
|
"""Sets the load balancer port of a service."""
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
def _get_service_from_row(row) -> Dict[str, Any]:
|
|
334
|
-
(
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
'name': name,
|
|
339
|
-
'controller_job_id': controller_job_id,
|
|
340
|
-
'controller_port': controller_port,
|
|
341
|
-
'load_balancer_port': load_balancer_port,
|
|
342
|
-
'status': ServiceStatus[status],
|
|
343
|
-
'uptime': uptime,
|
|
344
|
-
'policy': policy,
|
|
441
|
+
assert _SQLALCHEMY_ENGINE is not None
|
|
442
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
443
|
+
session.query(services_table).filter(
|
|
444
|
+
services_table.c.name == service_name).update(
|
|
445
|
+
{services_table.c.load_balancer_port: load_balancer_port})
|
|
446
|
+
session.commit()
|
|
447
|
+
|
|
448
|
+
|
|
449
|
+
def _get_service_from_row(r: 'row.RowMapping') -> Dict[str, Any]:
|
|
450
|
+
# Get the max_version from the first column (from the subquery)
|
|
451
|
+
current_version = r['max_version']
|
|
452
|
+
|
|
453
|
+
record = {
|
|
454
|
+
'name': r['name'],
|
|
455
|
+
'controller_job_id': r['controller_job_id'],
|
|
456
|
+
'controller_port': r['controller_port'],
|
|
457
|
+
'load_balancer_port': r['load_balancer_port'],
|
|
458
|
+
'status': ServiceStatus[r['status']],
|
|
459
|
+
'uptime': r['uptime'],
|
|
460
|
+
'policy': r['policy'],
|
|
345
461
|
# The version of the autoscaler/replica manager are on. It can be larger
|
|
346
462
|
# than the active versions as the load balancer may not consider the
|
|
347
463
|
# latest version to be active for serving traffic.
|
|
348
464
|
'version': current_version,
|
|
349
465
|
# The versions that is active for the load balancer. This is a list of
|
|
350
466
|
# integers in json format. This is mainly for display purpose.
|
|
351
|
-
'active_versions': json.loads(active_versions)
|
|
352
|
-
|
|
353
|
-
'
|
|
354
|
-
'
|
|
467
|
+
'active_versions': json.loads(r['active_versions'])
|
|
468
|
+
if r['active_versions'] else [],
|
|
469
|
+
'requested_resources_str': r['requested_resources_str'],
|
|
470
|
+
'load_balancing_policy': r['load_balancing_policy'],
|
|
471
|
+
'tls_encrypted': bool(r['tls_encrypted']),
|
|
472
|
+
'pool': bool(r['pool']),
|
|
473
|
+
'controller_pid': r['controller_pid'],
|
|
474
|
+
'hash': r['hash'],
|
|
475
|
+
'entrypoint': r['entrypoint'],
|
|
476
|
+
'yaml_content': r.get('yaml_content'),
|
|
355
477
|
}
|
|
478
|
+
latest_spec = get_spec(r['name'], current_version)
|
|
479
|
+
if latest_spec is not None:
|
|
480
|
+
record['policy'] = latest_spec.autoscaling_policy_str()
|
|
481
|
+
record['load_balancing_policy'] = latest_spec.load_balancing_policy
|
|
482
|
+
return record
|
|
483
|
+
|
|
356
484
|
|
|
485
|
+
def _build_services_with_latest_version_query(
|
|
486
|
+
service_name: Optional[str] = None) -> sqlalchemy.sql.Select:
|
|
487
|
+
"""Builds a query joining services with their latest version and yaml.
|
|
357
488
|
|
|
489
|
+
Args:
|
|
490
|
+
service_name: If provided, filter to this service only.
|
|
491
|
+
|
|
492
|
+
Returns:
|
|
493
|
+
A SQLAlchemy selectable for fetching rows, including columns:
|
|
494
|
+
- max_version (latest version per service)
|
|
495
|
+
- services_table.*
|
|
496
|
+
- yaml_content (from version_specs_table for latest version)
|
|
497
|
+
"""
|
|
498
|
+
subquery = sqlalchemy.select(
|
|
499
|
+
version_specs_table.c.service_name,
|
|
500
|
+
sqlalchemy.func.max(version_specs_table.c.version).label('max_version'),
|
|
501
|
+
).group_by(version_specs_table.c.service_name).alias('v')
|
|
502
|
+
|
|
503
|
+
query = sqlalchemy.select(
|
|
504
|
+
subquery.c.max_version,
|
|
505
|
+
services_table,
|
|
506
|
+
version_specs_table.c.yaml_content,
|
|
507
|
+
).select_from(
|
|
508
|
+
services_table.join(
|
|
509
|
+
subquery, services_table.c.name == subquery.c.service_name).join(
|
|
510
|
+
version_specs_table,
|
|
511
|
+
sqlalchemy.and_(
|
|
512
|
+
version_specs_table.c.service_name == services_table.c.name,
|
|
513
|
+
version_specs_table.c.version == subquery.c.max_version,
|
|
514
|
+
),
|
|
515
|
+
))
|
|
516
|
+
if service_name is not None:
|
|
517
|
+
query = query.where(services_table.c.name == service_name)
|
|
518
|
+
return query
|
|
519
|
+
|
|
520
|
+
|
|
521
|
+
@init_db
|
|
358
522
|
def get_services() -> List[Dict[str, Any]]:
|
|
359
523
|
"""Get all existing service records."""
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
' FROM version_specs GROUP BY service_name) v '
|
|
365
|
-
'ON s.name=v.service_name').fetchall()
|
|
524
|
+
assert _SQLALCHEMY_ENGINE is not None
|
|
525
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
526
|
+
query = _build_services_with_latest_version_query()
|
|
527
|
+
rows = session.execute(query).fetchall()
|
|
366
528
|
records = []
|
|
367
529
|
for row in rows:
|
|
368
|
-
records.append(_get_service_from_row(row))
|
|
530
|
+
records.append(_get_service_from_row(row._mapping)) # pylint: disable=protected-access
|
|
369
531
|
return records
|
|
370
532
|
|
|
371
533
|
|
|
534
|
+
@init_db
|
|
535
|
+
def get_num_services() -> int:
|
|
536
|
+
"""Get the number of services."""
|
|
537
|
+
assert _SQLALCHEMY_ENGINE is not None
|
|
538
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
539
|
+
return session.execute(
|
|
540
|
+
sqlalchemy.select(sqlalchemy.func.count() # pylint: disable=not-callable
|
|
541
|
+
).select_from(services_table)).fetchone()[0]
|
|
542
|
+
|
|
543
|
+
|
|
544
|
+
@init_db
|
|
372
545
|
def get_service_from_name(service_name: str) -> Optional[Dict[str, Any]]:
|
|
373
546
|
"""Get all existing service records."""
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
'SELECT service_name, MAX(version) as max_version '
|
|
379
|
-
'FROM version_specs WHERE service_name=(?)) v '
|
|
380
|
-
'ON s.name=v.service_name WHERE name=(?)',
|
|
381
|
-
(service_name, service_name)).fetchall()
|
|
547
|
+
assert _SQLALCHEMY_ENGINE is not None
|
|
548
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
549
|
+
query = _build_services_with_latest_version_query(service_name)
|
|
550
|
+
rows = session.execute(query).fetchall()
|
|
382
551
|
for row in rows:
|
|
383
|
-
return _get_service_from_row(row)
|
|
552
|
+
return _get_service_from_row(row._mapping) # pylint: disable=protected-access
|
|
384
553
|
return None
|
|
385
554
|
|
|
386
555
|
|
|
556
|
+
@init_db
|
|
557
|
+
def get_service_hash(service_name: str) -> Optional[str]:
|
|
558
|
+
"""Get the hash of a service."""
|
|
559
|
+
assert _SQLALCHEMY_ENGINE is not None
|
|
560
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
561
|
+
result = session.execute(
|
|
562
|
+
sqlalchemy.select(services_table.c.hash).where(
|
|
563
|
+
services_table.c.name == service_name)).fetchone()
|
|
564
|
+
return result[0] if result else None
|
|
565
|
+
|
|
566
|
+
|
|
567
|
+
@init_db
|
|
387
568
|
def get_service_versions(service_name: str) -> List[int]:
|
|
388
569
|
"""Gets all versions of a service."""
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
570
|
+
assert _SQLALCHEMY_ENGINE is not None
|
|
571
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
572
|
+
rows = session.execute(
|
|
573
|
+
sqlalchemy.select(version_specs_table.c.version.distinct()).where(
|
|
574
|
+
version_specs_table.c.service_name == service_name)).fetchall()
|
|
394
575
|
return [row[0] for row in rows]
|
|
395
576
|
|
|
396
577
|
|
|
578
|
+
@init_db
|
|
397
579
|
def get_glob_service_names(
|
|
398
580
|
service_names: Optional[List[str]] = None) -> List[str]:
|
|
399
581
|
"""Get service names matching the glob patterns.
|
|
@@ -405,72 +587,97 @@ def get_glob_service_names(
|
|
|
405
587
|
Returns:
|
|
406
588
|
A list of non-duplicated service names.
|
|
407
589
|
"""
|
|
408
|
-
|
|
590
|
+
assert _SQLALCHEMY_ENGINE is not None
|
|
591
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
409
592
|
if service_names is None:
|
|
410
|
-
rows =
|
|
593
|
+
rows = session.execute(sqlalchemy.select(
|
|
594
|
+
services_table.c.name)).fetchall()
|
|
411
595
|
else:
|
|
412
596
|
rows = []
|
|
413
597
|
for service_name in service_names:
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
598
|
+
pattern_rows = session.execute(
|
|
599
|
+
sqlalchemy.select(services_table.c.name).where(
|
|
600
|
+
services_table.c.name.like(
|
|
601
|
+
service_name.replace('*', '%')))).fetchall()
|
|
602
|
+
rows.extend(pattern_rows)
|
|
418
603
|
return list({row[0] for row in rows})
|
|
419
604
|
|
|
420
605
|
|
|
421
606
|
# === Replica functions ===
|
|
607
|
+
@init_db
|
|
422
608
|
def add_or_update_replica(service_name: str, replica_id: int,
|
|
423
609
|
replica_info: 'replica_managers.ReplicaInfo') -> None:
|
|
424
610
|
"""Adds a replica to the database."""
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
611
|
+
assert _SQLALCHEMY_ENGINE is not None
|
|
612
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
613
|
+
if (_SQLALCHEMY_ENGINE.dialect.name ==
|
|
614
|
+
db_utils.SQLAlchemyDialect.SQLITE.value):
|
|
615
|
+
insert_func = sqlite.insert
|
|
616
|
+
elif (_SQLALCHEMY_ENGINE.dialect.name ==
|
|
617
|
+
db_utils.SQLAlchemyDialect.POSTGRESQL.value):
|
|
618
|
+
insert_func = postgresql.insert
|
|
619
|
+
else:
|
|
620
|
+
raise ValueError('Unsupported database dialect')
|
|
432
621
|
|
|
622
|
+
insert_stmt = insert_func(replicas_table).values(
|
|
623
|
+
service_name=service_name,
|
|
624
|
+
replica_id=replica_id,
|
|
625
|
+
replica_info=pickle.dumps(replica_info))
|
|
433
626
|
|
|
627
|
+
insert_stmt = insert_stmt.on_conflict_do_update(
|
|
628
|
+
index_elements=['service_name', 'replica_id'],
|
|
629
|
+
set_={'replica_info': insert_stmt.excluded.replica_info})
|
|
630
|
+
|
|
631
|
+
session.execute(insert_stmt)
|
|
632
|
+
session.commit()
|
|
633
|
+
|
|
634
|
+
|
|
635
|
+
@init_db
|
|
434
636
|
def remove_replica(service_name: str, replica_id: int) -> None:
|
|
435
637
|
"""Removes a replica from the database."""
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
638
|
+
assert _SQLALCHEMY_ENGINE is not None
|
|
639
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
640
|
+
session.execute(
|
|
641
|
+
sqlalchemy.delete(replicas_table).where(
|
|
642
|
+
sqlalchemy.and_(replicas_table.c.service_name == service_name,
|
|
643
|
+
replicas_table.c.replica_id == replica_id)))
|
|
644
|
+
session.commit()
|
|
442
645
|
|
|
443
646
|
|
|
647
|
+
@init_db
|
|
444
648
|
def get_replica_info_from_id(
|
|
445
649
|
service_name: str,
|
|
446
650
|
replica_id: int) -> Optional['replica_managers.ReplicaInfo']:
|
|
447
651
|
"""Gets a replica info from the database."""
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
return None
|
|
652
|
+
assert _SQLALCHEMY_ENGINE is not None
|
|
653
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
654
|
+
result = session.execute(
|
|
655
|
+
sqlalchemy.select(replicas_table.c.replica_info).where(
|
|
656
|
+
sqlalchemy.and_(
|
|
657
|
+
replicas_table.c.service_name == service_name,
|
|
658
|
+
replicas_table.c.replica_id == replica_id))).fetchone()
|
|
659
|
+
return pickle.loads(result[0]) if result else None
|
|
457
660
|
|
|
458
661
|
|
|
662
|
+
@init_db
|
|
459
663
|
def get_replica_infos(
|
|
460
664
|
service_name: str) -> List['replica_managers.ReplicaInfo']:
|
|
461
665
|
"""Gets all replica infos of a service."""
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
666
|
+
assert _SQLALCHEMY_ENGINE is not None
|
|
667
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
668
|
+
rows = session.execute(
|
|
669
|
+
sqlalchemy.select(replicas_table.c.replica_info).where(
|
|
670
|
+
replicas_table.c.service_name == service_name)).fetchall()
|
|
467
671
|
return [pickle.loads(row[0]) for row in rows]
|
|
468
672
|
|
|
469
673
|
|
|
674
|
+
@init_db
|
|
470
675
|
def total_number_provisioning_replicas() -> int:
|
|
471
676
|
"""Returns the total number of provisioning replicas."""
|
|
472
|
-
|
|
473
|
-
|
|
677
|
+
assert _SQLALCHEMY_ENGINE is not None
|
|
678
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
679
|
+
rows = session.execute(sqlalchemy.select(
|
|
680
|
+
replicas_table.c.replica_info)).fetchall()
|
|
474
681
|
provisioning_count = 0
|
|
475
682
|
for row in rows:
|
|
476
683
|
replica_info: 'replica_managers.ReplicaInfo' = pickle.loads(row[0])
|
|
@@ -479,6 +686,22 @@ def total_number_provisioning_replicas() -> int:
|
|
|
479
686
|
return provisioning_count
|
|
480
687
|
|
|
481
688
|
|
|
689
|
+
@init_db
|
|
690
|
+
def total_number_terminating_replicas() -> int:
|
|
691
|
+
"""Returns the total number of terminating replicas."""
|
|
692
|
+
assert _SQLALCHEMY_ENGINE is not None
|
|
693
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
694
|
+
rows = session.execute(sqlalchemy.select(
|
|
695
|
+
replicas_table.c.replica_info)).fetchall()
|
|
696
|
+
terminating_count = 0
|
|
697
|
+
for row in rows:
|
|
698
|
+
replica_info: 'replica_managers.ReplicaInfo' = pickle.loads(row[0])
|
|
699
|
+
if (replica_info.status_property.sky_down_status ==
|
|
700
|
+
common_utils.ProcessStatus.RUNNING):
|
|
701
|
+
terminating_count += 1
|
|
702
|
+
return terminating_count
|
|
703
|
+
|
|
704
|
+
|
|
482
705
|
def get_replicas_at_status(
|
|
483
706
|
service_name: str,
|
|
484
707
|
status: ReplicaStatus,
|
|
@@ -488,105 +711,194 @@ def get_replicas_at_status(
|
|
|
488
711
|
|
|
489
712
|
|
|
490
713
|
# === Version functions ===
|
|
714
|
+
@init_db
|
|
491
715
|
def add_version(service_name: str) -> int:
|
|
492
716
|
"""Adds a version to the database."""
|
|
493
|
-
|
|
494
|
-
with
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
717
|
+
assert _SQLALCHEMY_ENGINE is not None
|
|
718
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
719
|
+
# Insert new version with MAX(version) + 1 in a single atomic operation
|
|
720
|
+
max_version_subquery = sqlalchemy.select(
|
|
721
|
+
sqlalchemy.func.coalesce(
|
|
722
|
+
sqlalchemy.func.max(version_specs_table.c.version), 0) +
|
|
723
|
+
1).where(version_specs_table.c.service_name ==
|
|
724
|
+
service_name).scalar_subquery()
|
|
725
|
+
|
|
726
|
+
# Use INSERT with subquery and RETURNING
|
|
727
|
+
insert_stmt = sqlalchemy.insert(version_specs_table).values(
|
|
728
|
+
service_name=service_name,
|
|
729
|
+
version=max_version_subquery,
|
|
730
|
+
spec=pickle.dumps(None)).returning(version_specs_table.c.version)
|
|
731
|
+
|
|
732
|
+
result = session.execute(insert_stmt)
|
|
733
|
+
new_version = result.scalar()
|
|
734
|
+
session.commit()
|
|
735
|
+
return new_version
|
|
736
|
+
|
|
737
|
+
|
|
738
|
+
@init_db
|
|
510
739
|
def add_or_update_version(service_name: str, version: int,
|
|
511
|
-
spec: 'service_spec.SkyServiceSpec'
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
740
|
+
spec: 'service_spec.SkyServiceSpec',
|
|
741
|
+
yaml_content: str) -> None:
|
|
742
|
+
assert _SQLALCHEMY_ENGINE is not None
|
|
743
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
744
|
+
if (_SQLALCHEMY_ENGINE.dialect.name ==
|
|
745
|
+
db_utils.SQLAlchemyDialect.SQLITE.value):
|
|
746
|
+
insert_func = sqlite.insert
|
|
747
|
+
elif (_SQLALCHEMY_ENGINE.dialect.name ==
|
|
748
|
+
db_utils.SQLAlchemyDialect.POSTGRESQL.value):
|
|
749
|
+
insert_func = postgresql.insert
|
|
750
|
+
else:
|
|
751
|
+
raise ValueError('Unsupported database dialect')
|
|
518
752
|
|
|
753
|
+
insert_stmt = insert_func(version_specs_table).values(
|
|
754
|
+
service_name=service_name,
|
|
755
|
+
version=version,
|
|
756
|
+
spec=pickle.dumps(spec),
|
|
757
|
+
yaml_content=yaml_content)
|
|
519
758
|
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
WHERE service_name=(?)""", (service_name,))
|
|
759
|
+
insert_stmt = insert_stmt.on_conflict_do_update(
|
|
760
|
+
index_elements=['service_name', 'version'],
|
|
761
|
+
set_={
|
|
762
|
+
'spec': insert_stmt.excluded.spec,
|
|
763
|
+
'yaml_content': insert_stmt.excluded.yaml_content
|
|
764
|
+
})
|
|
527
765
|
|
|
766
|
+
session.execute(insert_stmt)
|
|
767
|
+
session.commit()
|
|
528
768
|
|
|
769
|
+
|
|
770
|
+
@init_db
|
|
529
771
|
def get_spec(service_name: str,
|
|
530
772
|
version: int) -> Optional['service_spec.SkyServiceSpec']:
|
|
531
773
|
"""Gets spec from the database."""
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
774
|
+
assert _SQLALCHEMY_ENGINE is not None
|
|
775
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
776
|
+
result = session.execute(
|
|
777
|
+
sqlalchemy.select(version_specs_table.c.spec).where(
|
|
778
|
+
sqlalchemy.and_(
|
|
779
|
+
version_specs_table.c.service_name == service_name,
|
|
780
|
+
version_specs_table.c.version == version))).fetchone()
|
|
781
|
+
return pickle.loads(result[0]) if result else None
|
|
782
|
+
|
|
783
|
+
|
|
784
|
+
@init_db
|
|
785
|
+
def get_yaml_content(service_name: str, version: int) -> Optional[str]:
|
|
786
|
+
"""Gets the yaml content of a version."""
|
|
787
|
+
assert _SQLALCHEMY_ENGINE is not None
|
|
788
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
789
|
+
result = session.execute(
|
|
790
|
+
sqlalchemy.select(version_specs_table.c.yaml_content).where(
|
|
791
|
+
sqlalchemy.and_(
|
|
792
|
+
version_specs_table.c.service_name == service_name,
|
|
793
|
+
version_specs_table.c.version == version))).fetchone()
|
|
794
|
+
return result[0] if result else None
|
|
795
|
+
|
|
796
|
+
|
|
797
|
+
@init_db
|
|
543
798
|
def delete_version(service_name: str, version: int) -> None:
|
|
544
799
|
"""Deletes a version from the database."""
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
800
|
+
assert _SQLALCHEMY_ENGINE is not None
|
|
801
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
802
|
+
session.execute(
|
|
803
|
+
sqlalchemy.delete(version_specs_table).where(
|
|
804
|
+
sqlalchemy.and_(
|
|
805
|
+
version_specs_table.c.service_name == service_name,
|
|
806
|
+
version_specs_table.c.version == version)))
|
|
807
|
+
session.commit()
|
|
551
808
|
|
|
552
809
|
|
|
810
|
+
@init_db
|
|
553
811
|
def delete_all_versions(service_name: str) -> None:
|
|
554
812
|
"""Deletes all versions from the database."""
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
813
|
+
assert _SQLALCHEMY_ENGINE is not None
|
|
814
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
815
|
+
session.execute(
|
|
816
|
+
sqlalchemy.delete(version_specs_table).where(
|
|
817
|
+
version_specs_table.c.service_name == service_name))
|
|
818
|
+
session.commit()
|
|
560
819
|
|
|
561
820
|
|
|
821
|
+
@init_db
|
|
562
822
|
def get_latest_version(service_name: str) -> Optional[int]:
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
return
|
|
823
|
+
assert _SQLALCHEMY_ENGINE is not None
|
|
824
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
825
|
+
result = session.execute(
|
|
826
|
+
sqlalchemy.select(sqlalchemy.func.max(
|
|
827
|
+
version_specs_table.c.version)).where(
|
|
828
|
+
version_specs_table.c.service_name ==
|
|
829
|
+
service_name)).fetchone()
|
|
830
|
+
return result[0] if result else None
|
|
571
831
|
|
|
572
832
|
|
|
833
|
+
@init_db
|
|
573
834
|
def get_service_controller_port(service_name: str) -> int:
|
|
574
835
|
"""Gets the controller port of a service."""
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
836
|
+
assert _SQLALCHEMY_ENGINE is not None
|
|
837
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
838
|
+
result = session.execute(
|
|
839
|
+
sqlalchemy.select(services_table.c.controller_port).where(
|
|
840
|
+
services_table.c.name == service_name)).fetchone()
|
|
841
|
+
if result is None:
|
|
580
842
|
raise ValueError(f'Service {service_name} does not exist.')
|
|
581
|
-
return
|
|
843
|
+
return result[0]
|
|
582
844
|
|
|
583
845
|
|
|
846
|
+
@init_db
|
|
584
847
|
def get_service_load_balancer_port(service_name: str) -> int:
|
|
585
848
|
"""Gets the load balancer port of a service."""
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
849
|
+
assert _SQLALCHEMY_ENGINE is not None
|
|
850
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
851
|
+
result = session.execute(
|
|
852
|
+
sqlalchemy.select(services_table.c.load_balancer_port).where(
|
|
853
|
+
services_table.c.name == service_name)).fetchone()
|
|
854
|
+
if result is None:
|
|
591
855
|
raise ValueError(f'Service {service_name} does not exist.')
|
|
592
|
-
return
|
|
856
|
+
return result[0]
|
|
857
|
+
|
|
858
|
+
|
|
859
|
+
@init_db
|
|
860
|
+
def get_ha_recovery_script(service_name: str) -> Optional[str]:
|
|
861
|
+
"""Gets the HA recovery script for a service."""
|
|
862
|
+
assert _SQLALCHEMY_ENGINE is not None
|
|
863
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
864
|
+
result = session.execute(
|
|
865
|
+
sqlalchemy.select(serve_ha_recovery_script_table.c.script).where(
|
|
866
|
+
serve_ha_recovery_script_table.c.service_name ==
|
|
867
|
+
service_name)).fetchone()
|
|
868
|
+
return result[0] if result else None
|
|
869
|
+
|
|
870
|
+
|
|
871
|
+
@init_db
|
|
872
|
+
def set_ha_recovery_script(service_name: str, script: str) -> None:
|
|
873
|
+
"""Sets the HA recovery script for a service."""
|
|
874
|
+
assert _SQLALCHEMY_ENGINE is not None
|
|
875
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
876
|
+
if (_SQLALCHEMY_ENGINE.dialect.name ==
|
|
877
|
+
db_utils.SQLAlchemyDialect.SQLITE.value):
|
|
878
|
+
insert_func = sqlite.insert
|
|
879
|
+
elif (_SQLALCHEMY_ENGINE.dialect.name ==
|
|
880
|
+
db_utils.SQLAlchemyDialect.POSTGRESQL.value):
|
|
881
|
+
insert_func = postgresql.insert
|
|
882
|
+
else:
|
|
883
|
+
raise ValueError('Unsupported database dialect')
|
|
884
|
+
|
|
885
|
+
insert_stmt = insert_func(serve_ha_recovery_script_table).values(
|
|
886
|
+
service_name=service_name, script=script)
|
|
887
|
+
|
|
888
|
+
insert_stmt = insert_stmt.on_conflict_do_update(
|
|
889
|
+
index_elements=['service_name'],
|
|
890
|
+
set_={'script': insert_stmt.excluded.script})
|
|
891
|
+
|
|
892
|
+
session.execute(insert_stmt)
|
|
893
|
+
session.commit()
|
|
894
|
+
|
|
895
|
+
|
|
896
|
+
@init_db
|
|
897
|
+
def remove_ha_recovery_script(service_name: str) -> None:
|
|
898
|
+
"""Removes the HA recovery script for a service."""
|
|
899
|
+
assert _SQLALCHEMY_ENGINE is not None
|
|
900
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
901
|
+
session.execute(
|
|
902
|
+
sqlalchemy.delete(serve_ha_recovery_script_table).where(
|
|
903
|
+
serve_ha_recovery_script_table.c.service_name == service_name))
|
|
904
|
+
session.commit()
|