skypilot-nightly 1.0.0.dev20250509__py3-none-any.whl → 1.0.0.dev20251107__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of skypilot-nightly might be problematic. Click here for more details.
- sky/__init__.py +22 -6
- sky/adaptors/aws.py +25 -7
- sky/adaptors/common.py +24 -1
- sky/adaptors/coreweave.py +278 -0
- sky/adaptors/do.py +8 -2
- sky/adaptors/hyperbolic.py +8 -0
- sky/adaptors/kubernetes.py +149 -18
- sky/adaptors/nebius.py +170 -17
- sky/adaptors/primeintellect.py +1 -0
- sky/adaptors/runpod.py +68 -0
- sky/adaptors/seeweb.py +167 -0
- sky/adaptors/shadeform.py +89 -0
- sky/admin_policy.py +187 -4
- sky/authentication.py +179 -225
- sky/backends/__init__.py +4 -2
- sky/backends/backend.py +22 -9
- sky/backends/backend_utils.py +1299 -380
- sky/backends/cloud_vm_ray_backend.py +1715 -518
- sky/backends/docker_utils.py +1 -1
- sky/backends/local_docker_backend.py +11 -6
- sky/backends/wheel_utils.py +37 -9
- sky/{clouds/service_catalog → catalog}/__init__.py +21 -19
- sky/{clouds/service_catalog → catalog}/aws_catalog.py +27 -8
- sky/{clouds/service_catalog → catalog}/azure_catalog.py +10 -7
- sky/{clouds/service_catalog → catalog}/common.py +89 -48
- sky/{clouds/service_catalog → catalog}/cudo_catalog.py +8 -5
- sky/{clouds/service_catalog → catalog}/data_fetchers/analyze.py +1 -1
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_aws.py +30 -40
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_cudo.py +38 -38
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_gcp.py +42 -15
- sky/catalog/data_fetchers/fetch_hyperbolic.py +136 -0
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_lambda_cloud.py +1 -0
- sky/catalog/data_fetchers/fetch_nebius.py +335 -0
- sky/catalog/data_fetchers/fetch_runpod.py +698 -0
- sky/catalog/data_fetchers/fetch_seeweb.py +329 -0
- sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vast.py +1 -1
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vsphere.py +1 -1
- sky/{clouds/service_catalog → catalog}/do_catalog.py +5 -2
- sky/{clouds/service_catalog → catalog}/fluidstack_catalog.py +6 -3
- sky/{clouds/service_catalog → catalog}/gcp_catalog.py +41 -15
- sky/catalog/hyperbolic_catalog.py +136 -0
- sky/{clouds/service_catalog → catalog}/ibm_catalog.py +9 -6
- sky/{clouds/service_catalog → catalog}/kubernetes_catalog.py +36 -24
- sky/{clouds/service_catalog → catalog}/lambda_catalog.py +9 -6
- sky/{clouds/service_catalog → catalog}/nebius_catalog.py +9 -7
- sky/{clouds/service_catalog → catalog}/oci_catalog.py +9 -6
- sky/{clouds/service_catalog → catalog}/paperspace_catalog.py +5 -2
- sky/catalog/primeintellect_catalog.py +95 -0
- sky/{clouds/service_catalog → catalog}/runpod_catalog.py +11 -4
- sky/{clouds/service_catalog → catalog}/scp_catalog.py +9 -6
- sky/catalog/seeweb_catalog.py +184 -0
- sky/catalog/shadeform_catalog.py +165 -0
- sky/catalog/ssh_catalog.py +167 -0
- sky/{clouds/service_catalog → catalog}/vast_catalog.py +6 -3
- sky/{clouds/service_catalog → catalog}/vsphere_catalog.py +5 -2
- sky/check.py +491 -203
- sky/cli.py +5 -6005
- sky/client/{cli.py → cli/command.py} +2477 -1885
- sky/client/cli/deprecation_utils.py +99 -0
- sky/client/cli/flags.py +359 -0
- sky/client/cli/table_utils.py +320 -0
- sky/client/common.py +70 -32
- sky/client/oauth.py +82 -0
- sky/client/sdk.py +1203 -297
- sky/client/sdk_async.py +833 -0
- sky/client/service_account_auth.py +47 -0
- sky/cloud_stores.py +73 -0
- sky/clouds/__init__.py +13 -0
- sky/clouds/aws.py +358 -93
- sky/clouds/azure.py +105 -83
- sky/clouds/cloud.py +127 -36
- sky/clouds/cudo.py +68 -50
- sky/clouds/do.py +66 -48
- sky/clouds/fluidstack.py +63 -44
- sky/clouds/gcp.py +339 -110
- sky/clouds/hyperbolic.py +293 -0
- sky/clouds/ibm.py +70 -49
- sky/clouds/kubernetes.py +563 -162
- sky/clouds/lambda_cloud.py +74 -54
- sky/clouds/nebius.py +206 -80
- sky/clouds/oci.py +88 -66
- sky/clouds/paperspace.py +61 -44
- sky/clouds/primeintellect.py +317 -0
- sky/clouds/runpod.py +164 -74
- sky/clouds/scp.py +89 -83
- sky/clouds/seeweb.py +466 -0
- sky/clouds/shadeform.py +400 -0
- sky/clouds/ssh.py +263 -0
- sky/clouds/utils/aws_utils.py +10 -4
- sky/clouds/utils/gcp_utils.py +87 -11
- sky/clouds/utils/oci_utils.py +38 -14
- sky/clouds/utils/scp_utils.py +177 -124
- sky/clouds/vast.py +99 -77
- sky/clouds/vsphere.py +51 -40
- sky/core.py +349 -139
- sky/dag.py +15 -0
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/_next/static/chunks/1141-e6aa9ab418717c59.js +11 -0
- sky/dashboard/out/_next/static/chunks/1272-1ef0bf0237faccdb.js +1 -0
- sky/dashboard/out/_next/static/chunks/1871-74503c8e80fd253b.js +6 -0
- sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +1 -0
- sky/dashboard/out/_next/static/chunks/2350.fab69e61bac57b23.js +1 -0
- sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +15 -0
- sky/dashboard/out/_next/static/chunks/2755.fff53c4a3fcae910.js +26 -0
- sky/dashboard/out/_next/static/chunks/3294.72362fa129305b19.js +1 -0
- sky/dashboard/out/_next/static/chunks/3785.ad6adaa2a0fa9768.js +1 -0
- sky/dashboard/out/_next/static/chunks/3850-ff4a9a69d978632b.js +1 -0
- sky/dashboard/out/_next/static/chunks/3937.210053269f121201.js +1 -0
- sky/dashboard/out/_next/static/chunks/4725.a830b5c9e7867c92.js +1 -0
- sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +15 -0
- sky/dashboard/out/_next/static/chunks/5739-d67458fcb1386c92.js +8 -0
- sky/dashboard/out/_next/static/chunks/6130-2be46d70a38f1e82.js +1 -0
- sky/dashboard/out/_next/static/chunks/616-3d59f75e2ccf9321.js +39 -0
- sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +13 -0
- sky/dashboard/out/_next/static/chunks/6601-06114c982db410b6.js +1 -0
- sky/dashboard/out/_next/static/chunks/6856-ef8ba11f96d8c4a3.js +1 -0
- sky/dashboard/out/_next/static/chunks/6989-01359c57e018caa4.js +1 -0
- sky/dashboard/out/_next/static/chunks/6990-32b6e2d3822301fa.js +1 -0
- sky/dashboard/out/_next/static/chunks/7359-c8d04e06886000b3.js +30 -0
- sky/dashboard/out/_next/static/chunks/7411-b15471acd2cba716.js +41 -0
- sky/dashboard/out/_next/static/chunks/7615-3301e838e5f25772.js +1 -0
- sky/dashboard/out/_next/static/chunks/8640.5b9475a2d18c5416.js +16 -0
- sky/dashboard/out/_next/static/chunks/8969-1e4613c651bf4051.js +1 -0
- sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +6 -0
- sky/dashboard/out/_next/static/chunks/9353-cff34f7e773b2e2b.js +1 -0
- sky/dashboard/out/_next/static/chunks/9360.7310982cf5a0dc79.js +31 -0
- sky/dashboard/out/_next/static/chunks/9847.3aaca6bb33455140.js +30 -0
- sky/dashboard/out/_next/static/chunks/fd9d1056-86323a29a8f7e46a.js +1 -0
- sky/dashboard/out/_next/static/chunks/framework-cf60a09ccd051a10.js +33 -0
- sky/dashboard/out/_next/static/chunks/main-app-587214043926b3cc.js +1 -0
- sky/dashboard/out/_next/static/chunks/main-f15ccb73239a3bf1.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/_app-bde01e4a2beec258.js +34 -0
- sky/dashboard/out/_next/static/chunks/pages/_error-c66a4e8afc46f17b.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-c736ead69c2d86ec.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-a37d2063af475a1c.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters-d44859594e6f8064.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/config-dfb9bf07b13045f4.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/index-444f1804401f04ea.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/infra/[context]-c0b5935149902e6f.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/infra-aed0ea19df7cf961.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-5796e8d6aea291a0.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-6edeb7d06032adfc.js +21 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs-479dde13399cf270.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/users-5ab3b907622cf0fe.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/volumes-b84b948ff357c43e.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspace/new-3f88a1c7e86a3f86.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-c5a3eeee1c218af1.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspaces-22b23febb3e89ce1.js +1 -0
- sky/dashboard/out/_next/static/chunks/webpack-2679be77fc08a2f8.js +1 -0
- sky/dashboard/out/_next/static/css/0748ce22df867032.css +3 -0
- sky/dashboard/out/_next/static/zB0ed6ge_W1MDszVHhijS/_buildManifest.js +1 -0
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/config.html +1 -0
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra/[context].html +1 -0
- sky/dashboard/out/infra.html +1 -0
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs/pools/[pool].html +1 -0
- sky/dashboard/out/jobs.html +1 -1
- sky/dashboard/out/users.html +1 -0
- sky/dashboard/out/volumes.html +1 -0
- sky/dashboard/out/workspace/new.html +1 -0
- sky/dashboard/out/workspaces/[name].html +1 -0
- sky/dashboard/out/workspaces.html +1 -0
- sky/data/data_utils.py +137 -1
- sky/data/mounting_utils.py +269 -84
- sky/data/storage.py +1451 -1807
- sky/data/storage_utils.py +43 -57
- sky/exceptions.py +132 -2
- sky/execution.py +206 -63
- sky/global_user_state.py +2374 -586
- sky/jobs/__init__.py +5 -0
- sky/jobs/client/sdk.py +242 -65
- sky/jobs/client/sdk_async.py +143 -0
- sky/jobs/constants.py +9 -8
- sky/jobs/controller.py +839 -277
- sky/jobs/file_content_utils.py +80 -0
- sky/jobs/log_gc.py +201 -0
- sky/jobs/recovery_strategy.py +398 -152
- sky/jobs/scheduler.py +315 -189
- sky/jobs/server/core.py +829 -255
- sky/jobs/server/server.py +156 -115
- sky/jobs/server/utils.py +136 -0
- sky/jobs/state.py +2092 -701
- sky/jobs/utils.py +1242 -160
- sky/logs/__init__.py +21 -0
- sky/logs/agent.py +108 -0
- sky/logs/aws.py +243 -0
- sky/logs/gcp.py +91 -0
- sky/metrics/__init__.py +0 -0
- sky/metrics/utils.py +443 -0
- sky/models.py +78 -1
- sky/optimizer.py +164 -70
- sky/provision/__init__.py +90 -4
- sky/provision/aws/config.py +147 -26
- sky/provision/aws/instance.py +135 -50
- sky/provision/azure/instance.py +10 -5
- sky/provision/common.py +13 -1
- sky/provision/cudo/cudo_machine_type.py +1 -1
- sky/provision/cudo/cudo_utils.py +14 -8
- sky/provision/cudo/cudo_wrapper.py +72 -71
- sky/provision/cudo/instance.py +10 -6
- sky/provision/do/instance.py +10 -6
- sky/provision/do/utils.py +4 -3
- sky/provision/docker_utils.py +114 -23
- sky/provision/fluidstack/instance.py +13 -8
- sky/provision/gcp/__init__.py +1 -0
- sky/provision/gcp/config.py +301 -19
- sky/provision/gcp/constants.py +218 -0
- sky/provision/gcp/instance.py +36 -8
- sky/provision/gcp/instance_utils.py +18 -4
- sky/provision/gcp/volume_utils.py +247 -0
- sky/provision/hyperbolic/__init__.py +12 -0
- sky/provision/hyperbolic/config.py +10 -0
- sky/provision/hyperbolic/instance.py +437 -0
- sky/provision/hyperbolic/utils.py +373 -0
- sky/provision/instance_setup.py +93 -14
- sky/provision/kubernetes/__init__.py +5 -0
- sky/provision/kubernetes/config.py +9 -52
- sky/provision/kubernetes/constants.py +17 -0
- sky/provision/kubernetes/instance.py +789 -247
- sky/provision/kubernetes/manifests/fusermount-server-daemonset.yaml +1 -2
- sky/provision/kubernetes/network.py +27 -17
- sky/provision/kubernetes/network_utils.py +40 -43
- sky/provision/kubernetes/utils.py +1192 -531
- sky/provision/kubernetes/volume.py +282 -0
- sky/provision/lambda_cloud/instance.py +22 -16
- sky/provision/nebius/constants.py +50 -0
- sky/provision/nebius/instance.py +19 -6
- sky/provision/nebius/utils.py +196 -91
- sky/provision/oci/instance.py +10 -5
- sky/provision/paperspace/instance.py +10 -7
- sky/provision/paperspace/utils.py +1 -1
- sky/provision/primeintellect/__init__.py +10 -0
- sky/provision/primeintellect/config.py +11 -0
- sky/provision/primeintellect/instance.py +454 -0
- sky/provision/primeintellect/utils.py +398 -0
- sky/provision/provisioner.py +110 -36
- sky/provision/runpod/__init__.py +5 -0
- sky/provision/runpod/instance.py +27 -6
- sky/provision/runpod/utils.py +51 -18
- sky/provision/runpod/volume.py +180 -0
- sky/provision/scp/__init__.py +15 -0
- sky/provision/scp/config.py +93 -0
- sky/provision/scp/instance.py +531 -0
- sky/provision/seeweb/__init__.py +11 -0
- sky/provision/seeweb/config.py +13 -0
- sky/provision/seeweb/instance.py +807 -0
- sky/provision/shadeform/__init__.py +11 -0
- sky/provision/shadeform/config.py +12 -0
- sky/provision/shadeform/instance.py +351 -0
- sky/provision/shadeform/shadeform_utils.py +83 -0
- sky/provision/ssh/__init__.py +18 -0
- sky/provision/vast/instance.py +13 -8
- sky/provision/vast/utils.py +10 -7
- sky/provision/vsphere/common/vim_utils.py +1 -2
- sky/provision/vsphere/instance.py +15 -10
- sky/provision/vsphere/vsphere_utils.py +9 -19
- sky/py.typed +0 -0
- sky/resources.py +844 -118
- sky/schemas/__init__.py +0 -0
- sky/schemas/api/__init__.py +0 -0
- sky/schemas/api/responses.py +225 -0
- sky/schemas/db/README +4 -0
- sky/schemas/db/env.py +90 -0
- sky/schemas/db/global_user_state/001_initial_schema.py +124 -0
- sky/schemas/db/global_user_state/002_add_workspace_to_cluster_history.py +35 -0
- sky/schemas/db/global_user_state/003_fix_initial_revision.py +61 -0
- sky/schemas/db/global_user_state/004_is_managed.py +34 -0
- sky/schemas/db/global_user_state/005_cluster_event.py +32 -0
- sky/schemas/db/global_user_state/006_provision_log.py +41 -0
- sky/schemas/db/global_user_state/007_cluster_event_request_id.py +34 -0
- sky/schemas/db/global_user_state/008_skylet_ssh_tunnel_metadata.py +34 -0
- sky/schemas/db/global_user_state/009_last_activity_and_launched_at.py +89 -0
- sky/schemas/db/global_user_state/010_save_ssh_key.py +66 -0
- sky/schemas/db/script.py.mako +28 -0
- sky/schemas/db/serve_state/001_initial_schema.py +67 -0
- sky/schemas/db/skypilot_config/001_initial_schema.py +30 -0
- sky/schemas/db/spot_jobs/001_initial_schema.py +97 -0
- sky/schemas/db/spot_jobs/002_cluster_pool.py +42 -0
- sky/schemas/db/spot_jobs/003_pool_hash.py +34 -0
- sky/schemas/db/spot_jobs/004_job_file_contents.py +42 -0
- sky/schemas/db/spot_jobs/005_logs_gc.py +38 -0
- sky/schemas/generated/__init__.py +0 -0
- sky/schemas/generated/autostopv1_pb2.py +36 -0
- sky/schemas/generated/autostopv1_pb2.pyi +43 -0
- sky/schemas/generated/autostopv1_pb2_grpc.py +146 -0
- sky/schemas/generated/jobsv1_pb2.py +86 -0
- sky/schemas/generated/jobsv1_pb2.pyi +254 -0
- sky/schemas/generated/jobsv1_pb2_grpc.py +542 -0
- sky/schemas/generated/managed_jobsv1_pb2.py +74 -0
- sky/schemas/generated/managed_jobsv1_pb2.pyi +278 -0
- sky/schemas/generated/managed_jobsv1_pb2_grpc.py +278 -0
- sky/schemas/generated/servev1_pb2.py +58 -0
- sky/schemas/generated/servev1_pb2.pyi +115 -0
- sky/schemas/generated/servev1_pb2_grpc.py +322 -0
- sky/serve/autoscalers.py +357 -5
- sky/serve/client/impl.py +310 -0
- sky/serve/client/sdk.py +47 -139
- sky/serve/client/sdk_async.py +130 -0
- sky/serve/constants.py +10 -8
- sky/serve/controller.py +64 -19
- sky/serve/load_balancer.py +106 -60
- sky/serve/load_balancing_policies.py +115 -1
- sky/serve/replica_managers.py +273 -162
- sky/serve/serve_rpc_utils.py +179 -0
- sky/serve/serve_state.py +554 -251
- sky/serve/serve_utils.py +733 -220
- sky/serve/server/core.py +66 -711
- sky/serve/server/impl.py +1093 -0
- sky/serve/server/server.py +21 -18
- sky/serve/service.py +133 -48
- sky/serve/service_spec.py +135 -16
- sky/serve/spot_placer.py +3 -0
- sky/server/auth/__init__.py +0 -0
- sky/server/auth/authn.py +50 -0
- sky/server/auth/loopback.py +38 -0
- sky/server/auth/oauth2_proxy.py +200 -0
- sky/server/common.py +475 -181
- sky/server/config.py +81 -23
- sky/server/constants.py +44 -6
- sky/server/daemons.py +229 -0
- sky/server/html/token_page.html +185 -0
- sky/server/metrics.py +160 -0
- sky/server/requests/executor.py +528 -138
- sky/server/requests/payloads.py +351 -17
- sky/server/requests/preconditions.py +21 -17
- sky/server/requests/process.py +112 -29
- sky/server/requests/request_names.py +120 -0
- sky/server/requests/requests.py +817 -224
- sky/server/requests/serializers/decoders.py +82 -31
- sky/server/requests/serializers/encoders.py +140 -22
- sky/server/requests/threads.py +106 -0
- sky/server/rest.py +417 -0
- sky/server/server.py +1290 -284
- sky/server/state.py +20 -0
- sky/server/stream_utils.py +345 -57
- sky/server/uvicorn.py +217 -3
- sky/server/versions.py +270 -0
- sky/setup_files/MANIFEST.in +5 -0
- sky/setup_files/alembic.ini +156 -0
- sky/setup_files/dependencies.py +136 -31
- sky/setup_files/setup.py +44 -42
- sky/sky_logging.py +102 -5
- sky/skylet/attempt_skylet.py +1 -0
- sky/skylet/autostop_lib.py +129 -8
- sky/skylet/configs.py +27 -20
- sky/skylet/constants.py +171 -19
- sky/skylet/events.py +105 -21
- sky/skylet/job_lib.py +335 -104
- sky/skylet/log_lib.py +297 -18
- sky/skylet/log_lib.pyi +44 -1
- sky/skylet/ray_patches/__init__.py +17 -3
- sky/skylet/ray_patches/autoscaler.py.diff +18 -0
- sky/skylet/ray_patches/cli.py.diff +19 -0
- sky/skylet/ray_patches/command_runner.py.diff +17 -0
- sky/skylet/ray_patches/log_monitor.py.diff +20 -0
- sky/skylet/ray_patches/resource_demand_scheduler.py.diff +32 -0
- sky/skylet/ray_patches/updater.py.diff +18 -0
- sky/skylet/ray_patches/worker.py.diff +41 -0
- sky/skylet/services.py +564 -0
- sky/skylet/skylet.py +63 -4
- sky/skylet/subprocess_daemon.py +103 -29
- sky/skypilot_config.py +506 -99
- sky/ssh_node_pools/__init__.py +1 -0
- sky/ssh_node_pools/core.py +135 -0
- sky/ssh_node_pools/server.py +233 -0
- sky/task.py +621 -137
- sky/templates/aws-ray.yml.j2 +10 -3
- sky/templates/azure-ray.yml.j2 +1 -1
- sky/templates/do-ray.yml.j2 +1 -1
- sky/templates/gcp-ray.yml.j2 +57 -0
- sky/templates/hyperbolic-ray.yml.j2 +67 -0
- sky/templates/jobs-controller.yaml.j2 +27 -24
- sky/templates/kubernetes-loadbalancer.yml.j2 +2 -0
- sky/templates/kubernetes-ray.yml.j2 +607 -51
- sky/templates/lambda-ray.yml.j2 +1 -1
- sky/templates/nebius-ray.yml.j2 +33 -12
- sky/templates/paperspace-ray.yml.j2 +1 -1
- sky/templates/primeintellect-ray.yml.j2 +71 -0
- sky/templates/runpod-ray.yml.j2 +9 -1
- sky/templates/scp-ray.yml.j2 +3 -50
- sky/templates/seeweb-ray.yml.j2 +108 -0
- sky/templates/shadeform-ray.yml.j2 +72 -0
- sky/templates/sky-serve-controller.yaml.j2 +22 -2
- sky/templates/websocket_proxy.py +178 -18
- sky/usage/usage_lib.py +18 -11
- sky/users/__init__.py +0 -0
- sky/users/model.conf +15 -0
- sky/users/permission.py +387 -0
- sky/users/rbac.py +121 -0
- sky/users/server.py +720 -0
- sky/users/token_service.py +218 -0
- sky/utils/accelerator_registry.py +34 -5
- sky/utils/admin_policy_utils.py +84 -38
- sky/utils/annotations.py +16 -5
- sky/utils/asyncio_utils.py +78 -0
- sky/utils/auth_utils.py +153 -0
- sky/utils/benchmark_utils.py +60 -0
- sky/utils/cli_utils/status_utils.py +159 -86
- sky/utils/cluster_utils.py +31 -9
- sky/utils/command_runner.py +354 -68
- sky/utils/command_runner.pyi +93 -3
- sky/utils/common.py +35 -8
- sky/utils/common_utils.py +310 -87
- sky/utils/config_utils.py +87 -5
- sky/utils/context.py +402 -0
- sky/utils/context_utils.py +222 -0
- sky/utils/controller_utils.py +264 -89
- sky/utils/dag_utils.py +31 -12
- sky/utils/db/__init__.py +0 -0
- sky/utils/db/db_utils.py +470 -0
- sky/utils/db/migration_utils.py +133 -0
- sky/utils/directory_utils.py +12 -0
- sky/utils/env_options.py +13 -0
- sky/utils/git.py +567 -0
- sky/utils/git_clone.sh +460 -0
- sky/utils/infra_utils.py +195 -0
- sky/utils/kubernetes/cleanup-tunnel.sh +62 -0
- sky/utils/kubernetes/config_map_utils.py +133 -0
- sky/utils/kubernetes/create_cluster.sh +13 -27
- sky/utils/kubernetes/delete_cluster.sh +10 -7
- sky/utils/kubernetes/deploy_remote_cluster.py +1299 -0
- sky/utils/kubernetes/exec_kubeconfig_converter.py +22 -31
- sky/utils/kubernetes/generate_kind_config.py +6 -66
- sky/utils/kubernetes/generate_kubeconfig.sh +4 -1
- sky/utils/kubernetes/gpu_labeler.py +5 -5
- sky/utils/kubernetes/kubernetes_deploy_utils.py +354 -47
- sky/utils/kubernetes/ssh-tunnel.sh +379 -0
- sky/utils/kubernetes/ssh_utils.py +221 -0
- sky/utils/kubernetes_enums.py +8 -15
- sky/utils/lock_events.py +94 -0
- sky/utils/locks.py +368 -0
- sky/utils/log_utils.py +300 -6
- sky/utils/perf_utils.py +22 -0
- sky/utils/resource_checker.py +298 -0
- sky/utils/resources_utils.py +249 -32
- sky/utils/rich_utils.py +213 -37
- sky/utils/schemas.py +905 -147
- sky/utils/serialize_utils.py +16 -0
- sky/utils/status_lib.py +10 -0
- sky/utils/subprocess_utils.py +38 -15
- sky/utils/tempstore.py +70 -0
- sky/utils/timeline.py +24 -52
- sky/utils/ux_utils.py +84 -15
- sky/utils/validator.py +11 -1
- sky/utils/volume.py +86 -0
- sky/utils/yaml_utils.py +111 -0
- sky/volumes/__init__.py +13 -0
- sky/volumes/client/__init__.py +0 -0
- sky/volumes/client/sdk.py +149 -0
- sky/volumes/server/__init__.py +0 -0
- sky/volumes/server/core.py +258 -0
- sky/volumes/server/server.py +122 -0
- sky/volumes/volume.py +212 -0
- sky/workspaces/__init__.py +0 -0
- sky/workspaces/core.py +655 -0
- sky/workspaces/server.py +101 -0
- sky/workspaces/utils.py +56 -0
- skypilot_nightly-1.0.0.dev20251107.dist-info/METADATA +675 -0
- skypilot_nightly-1.0.0.dev20251107.dist-info/RECORD +594 -0
- {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/WHEEL +1 -1
- sky/benchmark/benchmark_state.py +0 -256
- sky/benchmark/benchmark_utils.py +0 -641
- sky/clouds/service_catalog/constants.py +0 -7
- sky/dashboard/out/_next/static/LksQgChY5izXjokL3LcEu/_buildManifest.js +0 -1
- sky/dashboard/out/_next/static/chunks/236-f49500b82ad5392d.js +0 -6
- sky/dashboard/out/_next/static/chunks/312-c3c8845990db8ffc.js +0 -15
- sky/dashboard/out/_next/static/chunks/37-0a572fe0dbb89c4d.js +0 -6
- sky/dashboard/out/_next/static/chunks/678-206dddca808e6d16.js +0 -59
- sky/dashboard/out/_next/static/chunks/845-0f8017370869e269.js +0 -1
- sky/dashboard/out/_next/static/chunks/979-7bf73a4c7cea0f5c.js +0 -1
- sky/dashboard/out/_next/static/chunks/fd9d1056-2821b0f0cabcd8bd.js +0 -1
- sky/dashboard/out/_next/static/chunks/framework-87d061ee6ed71b28.js +0 -33
- sky/dashboard/out/_next/static/chunks/main-app-241eb28595532291.js +0 -1
- sky/dashboard/out/_next/static/chunks/main-e0e2335212e72357.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/_app-e6b013bc3f77ad60.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/_error-1be831200e60c5c0.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-e15db85d0ea1fbe1.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-f383db7389368ea7.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters-a93b93e10b8b074e.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/index-f9f039532ca8cbc4.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-03f279c6741fb48b.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs-a75029b67aab6a2e.js +0 -1
- sky/dashboard/out/_next/static/chunks/webpack-830f59b8404e96b8.js +0 -1
- sky/dashboard/out/_next/static/css/c6933bbb2ce7f4dd.css +0 -3
- sky/jobs/dashboard/dashboard.py +0 -223
- sky/jobs/dashboard/static/favicon.ico +0 -0
- sky/jobs/dashboard/templates/index.html +0 -831
- sky/jobs/server/dashboard_utils.py +0 -69
- sky/skylet/providers/scp/__init__.py +0 -2
- sky/skylet/providers/scp/config.py +0 -149
- sky/skylet/providers/scp/node_provider.py +0 -578
- sky/templates/kubernetes-ssh-jump.yml.j2 +0 -94
- sky/utils/db_utils.py +0 -100
- sky/utils/kubernetes/deploy_remote_cluster.sh +0 -308
- sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +0 -191
- skypilot_nightly-1.0.0.dev20250509.dist-info/METADATA +0 -361
- skypilot_nightly-1.0.0.dev20250509.dist-info/RECORD +0 -396
- /sky/{clouds/service_catalog → catalog}/config.py +0 -0
- /sky/{benchmark → catalog/data_fetchers}/__init__.py +0 -0
- /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_azure.py +0 -0
- /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_fluidstack.py +0 -0
- /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_ibm.py +0 -0
- /sky/{clouds/service_catalog/data_fetchers → client/cli}/__init__.py +0 -0
- /sky/dashboard/out/_next/static/{LksQgChY5izXjokL3LcEu → zB0ed6ge_W1MDszVHhijS}/_ssgManifest.js +0 -0
- {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/licenses/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/top_level.txt +0 -0
sky/serve/serve_state.py
CHANGED
|
@@ -1,89 +1,162 @@
|
|
|
1
1
|
"""The database for services information."""
|
|
2
2
|
import collections
|
|
3
3
|
import enum
|
|
4
|
+
import functools
|
|
4
5
|
import json
|
|
5
|
-
import pathlib
|
|
6
6
|
import pickle
|
|
7
|
-
import
|
|
7
|
+
import threading
|
|
8
8
|
import typing
|
|
9
|
-
from typing import Any, Dict, List, Optional
|
|
9
|
+
from typing import Any, Dict, List, Optional
|
|
10
|
+
import uuid
|
|
10
11
|
|
|
11
12
|
import colorama
|
|
13
|
+
import sqlalchemy
|
|
14
|
+
from sqlalchemy import exc as sqlalchemy_exc
|
|
15
|
+
from sqlalchemy import orm
|
|
16
|
+
from sqlalchemy.dialects import postgresql
|
|
17
|
+
from sqlalchemy.dialects import sqlite
|
|
18
|
+
from sqlalchemy.ext import declarative
|
|
12
19
|
|
|
13
20
|
from sky.serve import constants
|
|
14
|
-
from sky.utils import
|
|
21
|
+
from sky.utils import common_utils
|
|
22
|
+
from sky.utils.db import db_utils
|
|
23
|
+
from sky.utils.db import migration_utils
|
|
15
24
|
|
|
16
25
|
if typing.TYPE_CHECKING:
|
|
26
|
+
from sqlalchemy.engine import row
|
|
27
|
+
|
|
17
28
|
from sky.serve import replica_managers
|
|
18
29
|
from sky.serve import service_spec
|
|
19
30
|
|
|
31
|
+
_SQLALCHEMY_ENGINE: Optional[sqlalchemy.engine.Engine] = None
|
|
32
|
+
_SQLALCHEMY_ENGINE_LOCK = threading.Lock()
|
|
33
|
+
|
|
34
|
+
Base = declarative.declarative_base()
|
|
35
|
+
|
|
36
|
+
# === Database schema ===
|
|
37
|
+
services_table = sqlalchemy.Table(
|
|
38
|
+
'services',
|
|
39
|
+
Base.metadata,
|
|
40
|
+
sqlalchemy.Column('name', sqlalchemy.Text, primary_key=True),
|
|
41
|
+
sqlalchemy.Column('controller_job_id',
|
|
42
|
+
sqlalchemy.Integer,
|
|
43
|
+
server_default=None),
|
|
44
|
+
sqlalchemy.Column('controller_port',
|
|
45
|
+
sqlalchemy.Integer,
|
|
46
|
+
server_default=None),
|
|
47
|
+
sqlalchemy.Column('load_balancer_port',
|
|
48
|
+
sqlalchemy.Integer,
|
|
49
|
+
server_default=None),
|
|
50
|
+
sqlalchemy.Column('status', sqlalchemy.Text),
|
|
51
|
+
sqlalchemy.Column('uptime', sqlalchemy.Integer, server_default=None),
|
|
52
|
+
sqlalchemy.Column('policy', sqlalchemy.Text, server_default=None),
|
|
53
|
+
sqlalchemy.Column('auto_restart', sqlalchemy.Integer, server_default=None),
|
|
54
|
+
sqlalchemy.Column('requested_resources',
|
|
55
|
+
sqlalchemy.LargeBinary,
|
|
56
|
+
server_default=None),
|
|
57
|
+
sqlalchemy.Column('requested_resources_str', sqlalchemy.Text),
|
|
58
|
+
sqlalchemy.Column('current_version',
|
|
59
|
+
sqlalchemy.Integer,
|
|
60
|
+
server_default=str(constants.INITIAL_VERSION)),
|
|
61
|
+
sqlalchemy.Column('active_versions',
|
|
62
|
+
sqlalchemy.Text,
|
|
63
|
+
server_default=json.dumps([])),
|
|
64
|
+
sqlalchemy.Column('load_balancing_policy',
|
|
65
|
+
sqlalchemy.Text,
|
|
66
|
+
server_default=None),
|
|
67
|
+
sqlalchemy.Column('tls_encrypted', sqlalchemy.Integer, server_default='0'),
|
|
68
|
+
sqlalchemy.Column('pool', sqlalchemy.Integer, server_default='0'),
|
|
69
|
+
sqlalchemy.Column('controller_pid', sqlalchemy.Integer,
|
|
70
|
+
server_default=None),
|
|
71
|
+
sqlalchemy.Column('hash', sqlalchemy.Text, server_default=None),
|
|
72
|
+
sqlalchemy.Column('entrypoint', sqlalchemy.Text, server_default=None),
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
replicas_table = sqlalchemy.Table(
|
|
76
|
+
'replicas',
|
|
77
|
+
Base.metadata,
|
|
78
|
+
sqlalchemy.Column('service_name', sqlalchemy.Text, primary_key=True),
|
|
79
|
+
sqlalchemy.Column('replica_id', sqlalchemy.Integer, primary_key=True),
|
|
80
|
+
sqlalchemy.Column('replica_info', sqlalchemy.LargeBinary),
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
version_specs_table = sqlalchemy.Table(
|
|
84
|
+
'version_specs',
|
|
85
|
+
Base.metadata,
|
|
86
|
+
sqlalchemy.Column('service_name', sqlalchemy.Text, primary_key=True),
|
|
87
|
+
sqlalchemy.Column('version', sqlalchemy.Integer, primary_key=True),
|
|
88
|
+
sqlalchemy.Column('spec', sqlalchemy.LargeBinary),
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
serve_ha_recovery_script_table = sqlalchemy.Table(
|
|
92
|
+
'serve_ha_recovery_script',
|
|
93
|
+
Base.metadata,
|
|
94
|
+
sqlalchemy.Column('service_name', sqlalchemy.Text, primary_key=True),
|
|
95
|
+
sqlalchemy.Column('script', sqlalchemy.Text),
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def create_table(engine: sqlalchemy.engine.Engine):
|
|
100
|
+
"""Creates the service and replica tables if they do not exist."""
|
|
20
101
|
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
102
|
+
# Enable WAL mode to avoid locking issues.
|
|
103
|
+
# See: issue #3863, #1441 and PR #1509
|
|
104
|
+
# https://github.com/microsoft/WSL/issues/2395
|
|
105
|
+
# TODO(romilb): We do not enable WAL for WSL because of known issue in WSL.
|
|
106
|
+
# This may cause the database locked problem from WSL issue #1441.
|
|
107
|
+
if (engine.dialect.name == db_utils.SQLAlchemyDialect.SQLITE.value and
|
|
108
|
+
not common_utils.is_wsl()):
|
|
109
|
+
try:
|
|
110
|
+
with orm.Session(engine) as session:
|
|
111
|
+
session.execute(sqlalchemy.text('PRAGMA journal_mode=WAL'))
|
|
112
|
+
session.commit()
|
|
113
|
+
except sqlalchemy_exc.OperationalError as e:
|
|
114
|
+
if 'database is locked' not in str(e):
|
|
115
|
+
raise
|
|
116
|
+
# If the database is locked, it is OK to continue, as the WAL mode
|
|
117
|
+
# is not critical and is likely to be enabled by other processes.
|
|
29
118
|
|
|
119
|
+
migration_utils.safe_alembic_upgrade(engine, migration_utils.SERVE_DB_NAME,
|
|
120
|
+
migration_utils.SERVE_VERSION)
|
|
30
121
|
|
|
31
|
-
_DB_PATH: str = _get_db_path()
|
|
32
122
|
|
|
123
|
+
def initialize_and_get_db() -> sqlalchemy.engine.Engine:
|
|
124
|
+
global _SQLALCHEMY_ENGINE
|
|
125
|
+
|
|
126
|
+
if _SQLALCHEMY_ENGINE is not None:
|
|
127
|
+
return _SQLALCHEMY_ENGINE
|
|
128
|
+
|
|
129
|
+
with _SQLALCHEMY_ENGINE_LOCK:
|
|
130
|
+
if _SQLALCHEMY_ENGINE is not None:
|
|
131
|
+
return _SQLALCHEMY_ENGINE
|
|
132
|
+
# get an engine to the db
|
|
133
|
+
engine = db_utils.get_engine('serve/services')
|
|
134
|
+
|
|
135
|
+
# run migrations if needed
|
|
136
|
+
create_table(engine)
|
|
137
|
+
|
|
138
|
+
# return engine
|
|
139
|
+
_SQLALCHEMY_ENGINE = engine
|
|
140
|
+
return _SQLALCHEMY_ENGINE
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def init_db(func):
|
|
144
|
+
"""Initialize the database."""
|
|
145
|
+
|
|
146
|
+
@functools.wraps(func)
|
|
147
|
+
def wrapper(*args, **kwargs):
|
|
148
|
+
initialize_and_get_db()
|
|
149
|
+
return func(*args, **kwargs)
|
|
150
|
+
|
|
151
|
+
return wrapper
|
|
33
152
|
|
|
34
|
-
def create_table(cursor: 'sqlite3.Cursor', conn: 'sqlite3.Connection') -> None:
|
|
35
|
-
"""Creates the service and replica tables if they do not exist."""
|
|
36
153
|
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
load_balancer_port INTEGER DEFAULT NULL,
|
|
44
|
-
status TEXT,
|
|
45
|
-
uptime INTEGER DEFAULT NULL,
|
|
46
|
-
policy TEXT DEFAULT NULL,
|
|
47
|
-
auto_restart INTEGER DEFAULT NULL,
|
|
48
|
-
requested_resources BLOB DEFAULT NULL)""")
|
|
49
|
-
cursor.execute("""\
|
|
50
|
-
CREATE TABLE IF NOT EXISTS replicas (
|
|
51
|
-
service_name TEXT,
|
|
52
|
-
replica_id INTEGER,
|
|
53
|
-
replica_info BLOB,
|
|
54
|
-
PRIMARY KEY (service_name, replica_id))""")
|
|
55
|
-
cursor.execute("""\
|
|
56
|
-
CREATE TABLE IF NOT EXISTS version_specs (
|
|
57
|
-
version INTEGER,
|
|
58
|
-
service_name TEXT,
|
|
59
|
-
spec BLOB,
|
|
60
|
-
PRIMARY KEY (service_name, version))""")
|
|
61
|
-
conn.commit()
|
|
62
|
-
|
|
63
|
-
# Backward compatibility.
|
|
64
|
-
db_utils.add_column_to_table(cursor, conn, 'services',
|
|
65
|
-
'requested_resources_str', 'TEXT')
|
|
66
|
-
# Deprecated: switched to `active_versions` below for the version
|
|
67
|
-
# considered active by the load balancer. The
|
|
68
|
-
# authscaler/replica_manager version can be found in the
|
|
69
|
-
# version_specs table.
|
|
70
|
-
db_utils.add_column_to_table(
|
|
71
|
-
cursor, conn, 'services', 'current_version',
|
|
72
|
-
f'INTEGER DEFAULT {constants.INITIAL_VERSION}')
|
|
73
|
-
# The versions that is activated for the service. This is a list
|
|
74
|
-
# of integers in json format.
|
|
75
|
-
db_utils.add_column_to_table(cursor, conn, 'services', 'active_versions',
|
|
76
|
-
f'TEXT DEFAULT {json.dumps([])!r}')
|
|
77
|
-
db_utils.add_column_to_table(cursor, conn, 'services',
|
|
78
|
-
'load_balancing_policy', 'TEXT DEFAULT NULL')
|
|
79
|
-
# Whether the service's load balancer is encrypted with TLS.
|
|
80
|
-
db_utils.add_column_to_table(cursor, conn, 'services', 'tls_encrypted',
|
|
81
|
-
'INTEGER DEFAULT 0')
|
|
82
|
-
conn.commit()
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
db_utils.SQLiteConn(_DB_PATH, create_table)
|
|
86
|
-
_UNIQUE_CONSTRAINT_FAILED_ERROR_MSG = 'UNIQUE constraint failed: services.name'
|
|
154
|
+
_UNIQUE_CONSTRAINT_FAILED_ERROR_MSGS = [
|
|
155
|
+
# sqlite
|
|
156
|
+
'UNIQUE constraint failed: services.name',
|
|
157
|
+
# postgres
|
|
158
|
+
'duplicate key value violates unique constraint "services_pkey"',
|
|
159
|
+
]
|
|
87
160
|
|
|
88
161
|
|
|
89
162
|
# === Statuses ===
|
|
@@ -247,153 +320,246 @@ _SERVICE_STATUS_TO_COLOR = {
|
|
|
247
320
|
}
|
|
248
321
|
|
|
249
322
|
|
|
323
|
+
@init_db
|
|
250
324
|
def add_service(name: str, controller_job_id: int, policy: str,
|
|
251
325
|
requested_resources_str: str, load_balancing_policy: str,
|
|
252
|
-
status: ServiceStatus, tls_encrypted: bool
|
|
326
|
+
status: ServiceStatus, tls_encrypted: bool, pool: bool,
|
|
327
|
+
controller_pid: int, entrypoint: str) -> bool:
|
|
253
328
|
"""Add a service in the database.
|
|
254
329
|
|
|
255
330
|
Returns:
|
|
256
331
|
True if the service is added successfully, False if the service already
|
|
257
332
|
exists.
|
|
258
333
|
"""
|
|
334
|
+
assert _SQLALCHEMY_ENGINE is not None
|
|
259
335
|
try:
|
|
260
|
-
with
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
336
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
337
|
+
if (_SQLALCHEMY_ENGINE.dialect.name ==
|
|
338
|
+
db_utils.SQLAlchemyDialect.SQLITE.value):
|
|
339
|
+
insert_func = sqlite.insert
|
|
340
|
+
elif (_SQLALCHEMY_ENGINE.dialect.name ==
|
|
341
|
+
db_utils.SQLAlchemyDialect.POSTGRESQL.value):
|
|
342
|
+
insert_func = postgresql.insert
|
|
343
|
+
else:
|
|
344
|
+
raise ValueError('Unsupported database dialect')
|
|
345
|
+
|
|
346
|
+
insert_stmt = insert_func(services_table).values(
|
|
347
|
+
name=name,
|
|
348
|
+
controller_job_id=controller_job_id,
|
|
349
|
+
status=status.value,
|
|
350
|
+
policy=policy,
|
|
351
|
+
requested_resources_str=requested_resources_str,
|
|
352
|
+
load_balancing_policy=load_balancing_policy,
|
|
353
|
+
tls_encrypted=int(tls_encrypted),
|
|
354
|
+
pool=int(pool),
|
|
355
|
+
controller_pid=controller_pid,
|
|
356
|
+
hash=str(uuid.uuid4()),
|
|
357
|
+
entrypoint=entrypoint)
|
|
358
|
+
session.execute(insert_stmt)
|
|
359
|
+
session.commit()
|
|
360
|
+
|
|
361
|
+
except sqlalchemy_exc.IntegrityError as e:
|
|
362
|
+
for msg in _UNIQUE_CONSTRAINT_FAILED_ERROR_MSGS:
|
|
363
|
+
if msg in str(e):
|
|
364
|
+
return False
|
|
365
|
+
raise RuntimeError('Unexpected database error') from e
|
|
275
366
|
return True
|
|
276
367
|
|
|
277
368
|
|
|
369
|
+
@init_db
|
|
370
|
+
def update_service_controller_pid(service_name: str,
|
|
371
|
+
controller_pid: int) -> None:
|
|
372
|
+
"""Updates the controller pid of a service.
|
|
373
|
+
|
|
374
|
+
This is used to update the controller pid of a service on ha recovery.
|
|
375
|
+
"""
|
|
376
|
+
assert _SQLALCHEMY_ENGINE is not None
|
|
377
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
378
|
+
session.query(services_table).filter(
|
|
379
|
+
services_table.c.name == service_name).update(
|
|
380
|
+
{services_table.c.controller_pid: controller_pid})
|
|
381
|
+
session.commit()
|
|
382
|
+
|
|
383
|
+
|
|
384
|
+
@init_db
|
|
278
385
|
def remove_service(service_name: str) -> None:
|
|
279
386
|
"""Removes a service from the database."""
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
387
|
+
assert _SQLALCHEMY_ENGINE is not None
|
|
388
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
389
|
+
session.execute(
|
|
390
|
+
sqlalchemy.delete(services_table).where(
|
|
391
|
+
services_table.c.name == service_name))
|
|
392
|
+
session.commit()
|
|
283
393
|
|
|
284
394
|
|
|
395
|
+
@init_db
|
|
285
396
|
def set_service_uptime(service_name: str, uptime: int) -> None:
|
|
286
397
|
"""Sets the uptime of a service."""
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
398
|
+
assert _SQLALCHEMY_ENGINE is not None
|
|
399
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
400
|
+
session.query(services_table).filter(
|
|
401
|
+
services_table.c.name == service_name).update(
|
|
402
|
+
{services_table.c.uptime: uptime})
|
|
403
|
+
session.commit()
|
|
292
404
|
|
|
293
405
|
|
|
406
|
+
@init_db
|
|
294
407
|
def set_service_status_and_active_versions(
|
|
295
408
|
service_name: str,
|
|
296
409
|
status: ServiceStatus,
|
|
297
410
|
active_versions: Optional[List[int]] = None) -> None:
|
|
298
411
|
"""Sets the service status."""
|
|
299
|
-
|
|
300
|
-
|
|
412
|
+
assert _SQLALCHEMY_ENGINE is not None
|
|
413
|
+
update_dict = {services_table.c.status: status.value}
|
|
301
414
|
if active_versions is not None:
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
415
|
+
update_dict[services_table.c.active_versions] = json.dumps(
|
|
416
|
+
active_versions)
|
|
417
|
+
|
|
418
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
419
|
+
session.query(services_table).filter(
|
|
420
|
+
services_table.c.name == service_name).update(update_dict)
|
|
421
|
+
session.commit()
|
|
309
422
|
|
|
310
423
|
|
|
424
|
+
@init_db
|
|
311
425
|
def set_service_controller_port(service_name: str,
|
|
312
426
|
controller_port: int) -> None:
|
|
313
427
|
"""Sets the controller port of a service."""
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
428
|
+
assert _SQLALCHEMY_ENGINE is not None
|
|
429
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
430
|
+
session.query(services_table).filter(
|
|
431
|
+
services_table.c.name == service_name).update(
|
|
432
|
+
{services_table.c.controller_port: controller_port})
|
|
433
|
+
session.commit()
|
|
320
434
|
|
|
321
435
|
|
|
436
|
+
@init_db
|
|
322
437
|
def set_service_load_balancer_port(service_name: str,
|
|
323
438
|
load_balancer_port: int) -> None:
|
|
324
439
|
"""Sets the load balancer port of a service."""
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
def _get_service_from_row(row) -> Dict[str, Any]:
|
|
334
|
-
(
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
'name': name,
|
|
339
|
-
'controller_job_id': controller_job_id,
|
|
340
|
-
'controller_port': controller_port,
|
|
341
|
-
'load_balancer_port': load_balancer_port,
|
|
342
|
-
'status': ServiceStatus[status],
|
|
343
|
-
'uptime': uptime,
|
|
344
|
-
'policy': policy,
|
|
440
|
+
assert _SQLALCHEMY_ENGINE is not None
|
|
441
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
442
|
+
session.query(services_table).filter(
|
|
443
|
+
services_table.c.name == service_name).update(
|
|
444
|
+
{services_table.c.load_balancer_port: load_balancer_port})
|
|
445
|
+
session.commit()
|
|
446
|
+
|
|
447
|
+
|
|
448
|
+
def _get_service_from_row(r: 'row.RowMapping') -> Dict[str, Any]:
|
|
449
|
+
# Get the max_version from the first column (from the subquery)
|
|
450
|
+
current_version = r['max_version']
|
|
451
|
+
|
|
452
|
+
record = {
|
|
453
|
+
'name': r['name'],
|
|
454
|
+
'controller_job_id': r['controller_job_id'],
|
|
455
|
+
'controller_port': r['controller_port'],
|
|
456
|
+
'load_balancer_port': r['load_balancer_port'],
|
|
457
|
+
'status': ServiceStatus[r['status']],
|
|
458
|
+
'uptime': r['uptime'],
|
|
459
|
+
'policy': r['policy'],
|
|
345
460
|
# The version of the autoscaler/replica manager are on. It can be larger
|
|
346
461
|
# than the active versions as the load balancer may not consider the
|
|
347
462
|
# latest version to be active for serving traffic.
|
|
348
463
|
'version': current_version,
|
|
349
464
|
# The versions that is active for the load balancer. This is a list of
|
|
350
465
|
# integers in json format. This is mainly for display purpose.
|
|
351
|
-
'active_versions': json.loads(active_versions)
|
|
352
|
-
|
|
353
|
-
'
|
|
354
|
-
'
|
|
466
|
+
'active_versions': json.loads(r['active_versions'])
|
|
467
|
+
if r['active_versions'] else [],
|
|
468
|
+
'requested_resources_str': r['requested_resources_str'],
|
|
469
|
+
'load_balancing_policy': r['load_balancing_policy'],
|
|
470
|
+
'tls_encrypted': bool(r['tls_encrypted']),
|
|
471
|
+
'pool': bool(r['pool']),
|
|
472
|
+
'controller_pid': r['controller_pid'],
|
|
473
|
+
'hash': r['hash'],
|
|
474
|
+
'entrypoint': r['entrypoint'],
|
|
355
475
|
}
|
|
476
|
+
latest_spec = get_spec(r['name'], current_version)
|
|
477
|
+
if latest_spec is not None:
|
|
478
|
+
record['policy'] = latest_spec.autoscaling_policy_str()
|
|
479
|
+
record['load_balancing_policy'] = latest_spec.load_balancing_policy
|
|
480
|
+
return record
|
|
356
481
|
|
|
357
482
|
|
|
483
|
+
@init_db
|
|
358
484
|
def get_services() -> List[Dict[str, Any]]:
|
|
359
485
|
"""Get all existing service records."""
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
486
|
+
assert _SQLALCHEMY_ENGINE is not None
|
|
487
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
488
|
+
subquery = sqlalchemy.select(
|
|
489
|
+
version_specs_table.c.service_name,
|
|
490
|
+
sqlalchemy.func.max(
|
|
491
|
+
version_specs_table.c.version).label('max_version')).group_by(
|
|
492
|
+
version_specs_table.c.service_name).alias('v')
|
|
493
|
+
|
|
494
|
+
query = sqlalchemy.select(
|
|
495
|
+
subquery.c.max_version, services_table).select_from(
|
|
496
|
+
services_table.join(
|
|
497
|
+
subquery, services_table.c.name == subquery.c.service_name))
|
|
498
|
+
rows = session.execute(query).fetchall()
|
|
366
499
|
records = []
|
|
367
500
|
for row in rows:
|
|
368
|
-
records.append(_get_service_from_row(row))
|
|
501
|
+
records.append(_get_service_from_row(row._mapping)) # pylint: disable=protected-access
|
|
369
502
|
return records
|
|
370
503
|
|
|
371
504
|
|
|
505
|
+
@init_db
|
|
506
|
+
def get_num_services() -> int:
|
|
507
|
+
"""Get the number of services."""
|
|
508
|
+
assert _SQLALCHEMY_ENGINE is not None
|
|
509
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
510
|
+
return session.execute(
|
|
511
|
+
sqlalchemy.select(sqlalchemy.func.count() # pylint: disable=not-callable
|
|
512
|
+
).select_from(services_table)).fetchone()[0]
|
|
513
|
+
|
|
514
|
+
|
|
515
|
+
@init_db
|
|
372
516
|
def get_service_from_name(service_name: str) -> Optional[Dict[str, Any]]:
|
|
373
517
|
"""Get all existing service records."""
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
518
|
+
assert _SQLALCHEMY_ENGINE is not None
|
|
519
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
520
|
+
subquery = sqlalchemy.select(
|
|
521
|
+
version_specs_table.c.service_name,
|
|
522
|
+
sqlalchemy.func.max(
|
|
523
|
+
version_specs_table.c.version).label('max_version')
|
|
524
|
+
).where(version_specs_table.c.service_name == service_name).group_by(
|
|
525
|
+
version_specs_table.c.service_name).alias('v')
|
|
526
|
+
|
|
527
|
+
query = sqlalchemy.select(
|
|
528
|
+
subquery.c.max_version, services_table).select_from(
|
|
529
|
+
services_table.join(
|
|
530
|
+
subquery,
|
|
531
|
+
services_table.c.name == subquery.c.service_name)).where(
|
|
532
|
+
services_table.c.name == service_name)
|
|
533
|
+
|
|
534
|
+
rows = session.execute(query).fetchall()
|
|
382
535
|
for row in rows:
|
|
383
|
-
return _get_service_from_row(row)
|
|
536
|
+
return _get_service_from_row(row._mapping) # pylint: disable=protected-access
|
|
384
537
|
return None
|
|
385
538
|
|
|
386
539
|
|
|
540
|
+
@init_db
|
|
541
|
+
def get_service_hash(service_name: str) -> Optional[str]:
|
|
542
|
+
"""Get the hash of a service."""
|
|
543
|
+
assert _SQLALCHEMY_ENGINE is not None
|
|
544
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
545
|
+
result = session.execute(
|
|
546
|
+
sqlalchemy.select(services_table.c.hash).where(
|
|
547
|
+
services_table.c.name == service_name)).fetchone()
|
|
548
|
+
return result[0] if result else None
|
|
549
|
+
|
|
550
|
+
|
|
551
|
+
@init_db
|
|
387
552
|
def get_service_versions(service_name: str) -> List[int]:
|
|
388
553
|
"""Gets all versions of a service."""
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
554
|
+
assert _SQLALCHEMY_ENGINE is not None
|
|
555
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
556
|
+
rows = session.execute(
|
|
557
|
+
sqlalchemy.select(version_specs_table.c.version.distinct()).where(
|
|
558
|
+
version_specs_table.c.service_name == service_name)).fetchall()
|
|
394
559
|
return [row[0] for row in rows]
|
|
395
560
|
|
|
396
561
|
|
|
562
|
+
@init_db
|
|
397
563
|
def get_glob_service_names(
|
|
398
564
|
service_names: Optional[List[str]] = None) -> List[str]:
|
|
399
565
|
"""Get service names matching the glob patterns.
|
|
@@ -405,72 +571,97 @@ def get_glob_service_names(
|
|
|
405
571
|
Returns:
|
|
406
572
|
A list of non-duplicated service names.
|
|
407
573
|
"""
|
|
408
|
-
|
|
574
|
+
assert _SQLALCHEMY_ENGINE is not None
|
|
575
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
409
576
|
if service_names is None:
|
|
410
|
-
rows =
|
|
577
|
+
rows = session.execute(sqlalchemy.select(
|
|
578
|
+
services_table.c.name)).fetchall()
|
|
411
579
|
else:
|
|
412
580
|
rows = []
|
|
413
581
|
for service_name in service_names:
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
582
|
+
pattern_rows = session.execute(
|
|
583
|
+
sqlalchemy.select(services_table.c.name).where(
|
|
584
|
+
services_table.c.name.like(
|
|
585
|
+
service_name.replace('*', '%')))).fetchall()
|
|
586
|
+
rows.extend(pattern_rows)
|
|
418
587
|
return list({row[0] for row in rows})
|
|
419
588
|
|
|
420
589
|
|
|
421
590
|
# === Replica functions ===
|
|
591
|
+
@init_db
|
|
422
592
|
def add_or_update_replica(service_name: str, replica_id: int,
|
|
423
593
|
replica_info: 'replica_managers.ReplicaInfo') -> None:
|
|
424
594
|
"""Adds a replica to the database."""
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
595
|
+
assert _SQLALCHEMY_ENGINE is not None
|
|
596
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
597
|
+
if (_SQLALCHEMY_ENGINE.dialect.name ==
|
|
598
|
+
db_utils.SQLAlchemyDialect.SQLITE.value):
|
|
599
|
+
insert_func = sqlite.insert
|
|
600
|
+
elif (_SQLALCHEMY_ENGINE.dialect.name ==
|
|
601
|
+
db_utils.SQLAlchemyDialect.POSTGRESQL.value):
|
|
602
|
+
insert_func = postgresql.insert
|
|
603
|
+
else:
|
|
604
|
+
raise ValueError('Unsupported database dialect')
|
|
605
|
+
|
|
606
|
+
insert_stmt = insert_func(replicas_table).values(
|
|
607
|
+
service_name=service_name,
|
|
608
|
+
replica_id=replica_id,
|
|
609
|
+
replica_info=pickle.dumps(replica_info))
|
|
610
|
+
|
|
611
|
+
insert_stmt = insert_stmt.on_conflict_do_update(
|
|
612
|
+
index_elements=['service_name', 'replica_id'],
|
|
613
|
+
set_={'replica_info': insert_stmt.excluded.replica_info})
|
|
432
614
|
|
|
615
|
+
session.execute(insert_stmt)
|
|
616
|
+
session.commit()
|
|
433
617
|
|
|
618
|
+
|
|
619
|
+
@init_db
|
|
434
620
|
def remove_replica(service_name: str, replica_id: int) -> None:
|
|
435
621
|
"""Removes a replica from the database."""
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
622
|
+
assert _SQLALCHEMY_ENGINE is not None
|
|
623
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
624
|
+
session.execute(
|
|
625
|
+
sqlalchemy.delete(replicas_table).where(
|
|
626
|
+
sqlalchemy.and_(replicas_table.c.service_name == service_name,
|
|
627
|
+
replicas_table.c.replica_id == replica_id)))
|
|
628
|
+
session.commit()
|
|
442
629
|
|
|
443
630
|
|
|
631
|
+
@init_db
|
|
444
632
|
def get_replica_info_from_id(
|
|
445
633
|
service_name: str,
|
|
446
634
|
replica_id: int) -> Optional['replica_managers.ReplicaInfo']:
|
|
447
635
|
"""Gets a replica info from the database."""
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
return None
|
|
636
|
+
assert _SQLALCHEMY_ENGINE is not None
|
|
637
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
638
|
+
result = session.execute(
|
|
639
|
+
sqlalchemy.select(replicas_table.c.replica_info).where(
|
|
640
|
+
sqlalchemy.and_(
|
|
641
|
+
replicas_table.c.service_name == service_name,
|
|
642
|
+
replicas_table.c.replica_id == replica_id))).fetchone()
|
|
643
|
+
return pickle.loads(result[0]) if result else None
|
|
457
644
|
|
|
458
645
|
|
|
646
|
+
@init_db
|
|
459
647
|
def get_replica_infos(
|
|
460
648
|
service_name: str) -> List['replica_managers.ReplicaInfo']:
|
|
461
649
|
"""Gets all replica infos of a service."""
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
650
|
+
assert _SQLALCHEMY_ENGINE is not None
|
|
651
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
652
|
+
rows = session.execute(
|
|
653
|
+
sqlalchemy.select(replicas_table.c.replica_info).where(
|
|
654
|
+
replicas_table.c.service_name == service_name)).fetchall()
|
|
467
655
|
return [pickle.loads(row[0]) for row in rows]
|
|
468
656
|
|
|
469
657
|
|
|
658
|
+
@init_db
|
|
470
659
|
def total_number_provisioning_replicas() -> int:
|
|
471
660
|
"""Returns the total number of provisioning replicas."""
|
|
472
|
-
|
|
473
|
-
|
|
661
|
+
assert _SQLALCHEMY_ENGINE is not None
|
|
662
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
663
|
+
rows = session.execute(sqlalchemy.select(
|
|
664
|
+
replicas_table.c.replica_info)).fetchall()
|
|
474
665
|
provisioning_count = 0
|
|
475
666
|
for row in rows:
|
|
476
667
|
replica_info: 'replica_managers.ReplicaInfo' = pickle.loads(row[0])
|
|
@@ -479,6 +670,38 @@ def total_number_provisioning_replicas() -> int:
|
|
|
479
670
|
return provisioning_count
|
|
480
671
|
|
|
481
672
|
|
|
673
|
+
@init_db
|
|
674
|
+
def total_number_terminating_replicas() -> int:
|
|
675
|
+
"""Returns the total number of terminating replicas."""
|
|
676
|
+
assert _SQLALCHEMY_ENGINE is not None
|
|
677
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
678
|
+
rows = session.execute(sqlalchemy.select(
|
|
679
|
+
replicas_table.c.replica_info)).fetchall()
|
|
680
|
+
terminating_count = 0
|
|
681
|
+
for row in rows:
|
|
682
|
+
replica_info: 'replica_managers.ReplicaInfo' = pickle.loads(row[0])
|
|
683
|
+
if (replica_info.status_property.sky_down_status ==
|
|
684
|
+
common_utils.ProcessStatus.RUNNING):
|
|
685
|
+
terminating_count += 1
|
|
686
|
+
return terminating_count
|
|
687
|
+
|
|
688
|
+
|
|
689
|
+
@init_db
|
|
690
|
+
def total_number_scheduled_to_terminate_replicas() -> int:
|
|
691
|
+
"""Returns the total number of terminating replicas."""
|
|
692
|
+
assert _SQLALCHEMY_ENGINE is not None
|
|
693
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
694
|
+
rows = session.execute(sqlalchemy.select(
|
|
695
|
+
replicas_table.c.replica_info)).fetchall()
|
|
696
|
+
terminating_count = 0
|
|
697
|
+
for row in rows:
|
|
698
|
+
replica_info: 'replica_managers.ReplicaInfo' = pickle.loads(row[0])
|
|
699
|
+
if (replica_info.status_property.sky_down_status ==
|
|
700
|
+
common_utils.ProcessStatus.SCHEDULED):
|
|
701
|
+
terminating_count += 1
|
|
702
|
+
return terminating_count
|
|
703
|
+
|
|
704
|
+
|
|
482
705
|
def get_replicas_at_status(
|
|
483
706
|
service_name: str,
|
|
484
707
|
status: ReplicaStatus,
|
|
@@ -488,105 +711,185 @@ def get_replicas_at_status(
|
|
|
488
711
|
|
|
489
712
|
|
|
490
713
|
# === Version functions ===
|
|
714
|
+
@init_db
|
|
491
715
|
def add_version(service_name: str) -> int:
|
|
492
716
|
"""Adds a version to the database."""
|
|
717
|
+
assert _SQLALCHEMY_ENGINE is not None
|
|
718
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
719
|
+
# Insert new version with MAX(version) + 1 in a single atomic operation
|
|
720
|
+
max_version_subquery = sqlalchemy.select(
|
|
721
|
+
sqlalchemy.func.coalesce(
|
|
722
|
+
sqlalchemy.func.max(version_specs_table.c.version), 0) +
|
|
723
|
+
1).where(version_specs_table.c.service_name ==
|
|
724
|
+
service_name).scalar_subquery()
|
|
725
|
+
|
|
726
|
+
# Use INSERT with subquery and RETURNING
|
|
727
|
+
insert_stmt = sqlalchemy.insert(version_specs_table).values(
|
|
728
|
+
service_name=service_name,
|
|
729
|
+
version=max_version_subquery,
|
|
730
|
+
spec=pickle.dumps(None)).returning(version_specs_table.c.version)
|
|
731
|
+
|
|
732
|
+
result = session.execute(insert_stmt)
|
|
733
|
+
new_version = result.scalar()
|
|
734
|
+
session.commit()
|
|
735
|
+
return new_version
|
|
736
|
+
|
|
737
|
+
|
|
738
|
+
@init_db
|
|
739
|
+
def add_or_update_version(service_name: str, version: int,
|
|
740
|
+
spec: 'service_spec.SkyServiceSpec') -> None:
|
|
741
|
+
assert _SQLALCHEMY_ENGINE is not None
|
|
742
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
743
|
+
if (_SQLALCHEMY_ENGINE.dialect.name ==
|
|
744
|
+
db_utils.SQLAlchemyDialect.SQLITE.value):
|
|
745
|
+
insert_func = sqlite.insert
|
|
746
|
+
elif (_SQLALCHEMY_ENGINE.dialect.name ==
|
|
747
|
+
db_utils.SQLAlchemyDialect.POSTGRESQL.value):
|
|
748
|
+
insert_func = postgresql.insert
|
|
749
|
+
else:
|
|
750
|
+
raise ValueError('Unsupported database dialect')
|
|
493
751
|
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
"""\
|
|
497
|
-
INSERT INTO version_specs
|
|
498
|
-
(version, service_name, spec)
|
|
499
|
-
VALUES (
|
|
500
|
-
(SELECT COALESCE(MAX(version), 0) + 1 FROM
|
|
501
|
-
version_specs WHERE service_name = ?), ?, ?)
|
|
502
|
-
RETURNING version""",
|
|
503
|
-
(service_name, service_name, pickle.dumps(None)))
|
|
504
|
-
|
|
505
|
-
inserted_version = cursor.fetchone()[0]
|
|
506
|
-
|
|
507
|
-
return inserted_version
|
|
752
|
+
insert_stmt = insert_func(version_specs_table).values(
|
|
753
|
+
service_name=service_name, version=version, spec=pickle.dumps(spec))
|
|
508
754
|
|
|
755
|
+
insert_stmt = insert_stmt.on_conflict_do_update(
|
|
756
|
+
index_elements=['service_name', 'version'],
|
|
757
|
+
set_={'spec': insert_stmt.excluded.spec})
|
|
509
758
|
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
with db_utils.safe_cursor(_DB_PATH) as cursor:
|
|
513
|
-
cursor.execute(
|
|
514
|
-
"""\
|
|
515
|
-
INSERT or REPLACE INTO version_specs
|
|
516
|
-
(service_name, version, spec)
|
|
517
|
-
VALUES (?, ?, ?)""", (service_name, version, pickle.dumps(spec)))
|
|
759
|
+
session.execute(insert_stmt)
|
|
760
|
+
session.commit()
|
|
518
761
|
|
|
519
762
|
|
|
763
|
+
@init_db
|
|
520
764
|
def remove_service_versions(service_name: str) -> None:
|
|
521
765
|
"""Removes a replica from the database."""
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
766
|
+
assert _SQLALCHEMY_ENGINE is not None
|
|
767
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
768
|
+
session.execute(
|
|
769
|
+
sqlalchemy.delete(version_specs_table).where(
|
|
770
|
+
version_specs_table.c.service_name == service_name))
|
|
771
|
+
session.commit()
|
|
527
772
|
|
|
528
773
|
|
|
774
|
+
@init_db
|
|
529
775
|
def get_spec(service_name: str,
|
|
530
776
|
version: int) -> Optional['service_spec.SkyServiceSpec']:
|
|
531
777
|
"""Gets spec from the database."""
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
return None
|
|
778
|
+
assert _SQLALCHEMY_ENGINE is not None
|
|
779
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
780
|
+
result = session.execute(
|
|
781
|
+
sqlalchemy.select(version_specs_table.c.spec).where(
|
|
782
|
+
sqlalchemy.and_(
|
|
783
|
+
version_specs_table.c.service_name == service_name,
|
|
784
|
+
version_specs_table.c.version == version))).fetchone()
|
|
785
|
+
return pickle.loads(result[0]) if result else None
|
|
541
786
|
|
|
542
787
|
|
|
788
|
+
@init_db
|
|
543
789
|
def delete_version(service_name: str, version: int) -> None:
|
|
544
790
|
"""Deletes a version from the database."""
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
791
|
+
assert _SQLALCHEMY_ENGINE is not None
|
|
792
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
793
|
+
session.execute(
|
|
794
|
+
sqlalchemy.delete(version_specs_table).where(
|
|
795
|
+
sqlalchemy.and_(
|
|
796
|
+
version_specs_table.c.service_name == service_name,
|
|
797
|
+
version_specs_table.c.version == version)))
|
|
798
|
+
session.commit()
|
|
551
799
|
|
|
552
800
|
|
|
801
|
+
@init_db
|
|
553
802
|
def delete_all_versions(service_name: str) -> None:
|
|
554
803
|
"""Deletes all versions from the database."""
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
804
|
+
assert _SQLALCHEMY_ENGINE is not None
|
|
805
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
806
|
+
session.execute(
|
|
807
|
+
sqlalchemy.delete(version_specs_table).where(
|
|
808
|
+
version_specs_table.c.service_name == service_name))
|
|
809
|
+
session.commit()
|
|
560
810
|
|
|
561
811
|
|
|
812
|
+
@init_db
|
|
562
813
|
def get_latest_version(service_name: str) -> Optional[int]:
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
return
|
|
814
|
+
assert _SQLALCHEMY_ENGINE is not None
|
|
815
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
816
|
+
result = session.execute(
|
|
817
|
+
sqlalchemy.select(sqlalchemy.func.max(
|
|
818
|
+
version_specs_table.c.version)).where(
|
|
819
|
+
version_specs_table.c.service_name ==
|
|
820
|
+
service_name)).fetchone()
|
|
821
|
+
return result[0] if result else None
|
|
571
822
|
|
|
572
823
|
|
|
824
|
+
@init_db
|
|
573
825
|
def get_service_controller_port(service_name: str) -> int:
|
|
574
826
|
"""Gets the controller port of a service."""
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
827
|
+
assert _SQLALCHEMY_ENGINE is not None
|
|
828
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
829
|
+
result = session.execute(
|
|
830
|
+
sqlalchemy.select(services_table.c.controller_port).where(
|
|
831
|
+
services_table.c.name == service_name)).fetchone()
|
|
832
|
+
if result is None:
|
|
580
833
|
raise ValueError(f'Service {service_name} does not exist.')
|
|
581
|
-
return
|
|
834
|
+
return result[0]
|
|
582
835
|
|
|
583
836
|
|
|
837
|
+
@init_db
|
|
584
838
|
def get_service_load_balancer_port(service_name: str) -> int:
|
|
585
839
|
"""Gets the load balancer port of a service."""
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
840
|
+
assert _SQLALCHEMY_ENGINE is not None
|
|
841
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
842
|
+
result = session.execute(
|
|
843
|
+
sqlalchemy.select(services_table.c.load_balancer_port).where(
|
|
844
|
+
services_table.c.name == service_name)).fetchone()
|
|
845
|
+
if result is None:
|
|
591
846
|
raise ValueError(f'Service {service_name} does not exist.')
|
|
592
|
-
return
|
|
847
|
+
return result[0]
|
|
848
|
+
|
|
849
|
+
|
|
850
|
+
@init_db
|
|
851
|
+
def get_ha_recovery_script(service_name: str) -> Optional[str]:
|
|
852
|
+
"""Gets the HA recovery script for a service."""
|
|
853
|
+
assert _SQLALCHEMY_ENGINE is not None
|
|
854
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
855
|
+
result = session.execute(
|
|
856
|
+
sqlalchemy.select(serve_ha_recovery_script_table.c.script).where(
|
|
857
|
+
serve_ha_recovery_script_table.c.service_name ==
|
|
858
|
+
service_name)).fetchone()
|
|
859
|
+
return result[0] if result else None
|
|
860
|
+
|
|
861
|
+
|
|
862
|
+
@init_db
|
|
863
|
+
def set_ha_recovery_script(service_name: str, script: str) -> None:
|
|
864
|
+
"""Sets the HA recovery script for a service."""
|
|
865
|
+
assert _SQLALCHEMY_ENGINE is not None
|
|
866
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
867
|
+
if (_SQLALCHEMY_ENGINE.dialect.name ==
|
|
868
|
+
db_utils.SQLAlchemyDialect.SQLITE.value):
|
|
869
|
+
insert_func = sqlite.insert
|
|
870
|
+
elif (_SQLALCHEMY_ENGINE.dialect.name ==
|
|
871
|
+
db_utils.SQLAlchemyDialect.POSTGRESQL.value):
|
|
872
|
+
insert_func = postgresql.insert
|
|
873
|
+
else:
|
|
874
|
+
raise ValueError('Unsupported database dialect')
|
|
875
|
+
|
|
876
|
+
insert_stmt = insert_func(serve_ha_recovery_script_table).values(
|
|
877
|
+
service_name=service_name, script=script)
|
|
878
|
+
|
|
879
|
+
insert_stmt = insert_stmt.on_conflict_do_update(
|
|
880
|
+
index_elements=['service_name'],
|
|
881
|
+
set_={'script': insert_stmt.excluded.script})
|
|
882
|
+
|
|
883
|
+
session.execute(insert_stmt)
|
|
884
|
+
session.commit()
|
|
885
|
+
|
|
886
|
+
|
|
887
|
+
@init_db
|
|
888
|
+
def remove_ha_recovery_script(service_name: str) -> None:
|
|
889
|
+
"""Removes the HA recovery script for a service."""
|
|
890
|
+
assert _SQLALCHEMY_ENGINE is not None
|
|
891
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
892
|
+
session.execute(
|
|
893
|
+
sqlalchemy.delete(serve_ha_recovery_script_table).where(
|
|
894
|
+
serve_ha_recovery_script_table.c.service_name == service_name))
|
|
895
|
+
session.commit()
|