skypilot-nightly 1.0.0.dev20250509__py3-none-any.whl → 1.0.0.dev20251107__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of skypilot-nightly might be problematic. Click here for more details.
- sky/__init__.py +22 -6
- sky/adaptors/aws.py +25 -7
- sky/adaptors/common.py +24 -1
- sky/adaptors/coreweave.py +278 -0
- sky/adaptors/do.py +8 -2
- sky/adaptors/hyperbolic.py +8 -0
- sky/adaptors/kubernetes.py +149 -18
- sky/adaptors/nebius.py +170 -17
- sky/adaptors/primeintellect.py +1 -0
- sky/adaptors/runpod.py +68 -0
- sky/adaptors/seeweb.py +167 -0
- sky/adaptors/shadeform.py +89 -0
- sky/admin_policy.py +187 -4
- sky/authentication.py +179 -225
- sky/backends/__init__.py +4 -2
- sky/backends/backend.py +22 -9
- sky/backends/backend_utils.py +1299 -380
- sky/backends/cloud_vm_ray_backend.py +1715 -518
- sky/backends/docker_utils.py +1 -1
- sky/backends/local_docker_backend.py +11 -6
- sky/backends/wheel_utils.py +37 -9
- sky/{clouds/service_catalog → catalog}/__init__.py +21 -19
- sky/{clouds/service_catalog → catalog}/aws_catalog.py +27 -8
- sky/{clouds/service_catalog → catalog}/azure_catalog.py +10 -7
- sky/{clouds/service_catalog → catalog}/common.py +89 -48
- sky/{clouds/service_catalog → catalog}/cudo_catalog.py +8 -5
- sky/{clouds/service_catalog → catalog}/data_fetchers/analyze.py +1 -1
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_aws.py +30 -40
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_cudo.py +38 -38
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_gcp.py +42 -15
- sky/catalog/data_fetchers/fetch_hyperbolic.py +136 -0
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_lambda_cloud.py +1 -0
- sky/catalog/data_fetchers/fetch_nebius.py +335 -0
- sky/catalog/data_fetchers/fetch_runpod.py +698 -0
- sky/catalog/data_fetchers/fetch_seeweb.py +329 -0
- sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vast.py +1 -1
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vsphere.py +1 -1
- sky/{clouds/service_catalog → catalog}/do_catalog.py +5 -2
- sky/{clouds/service_catalog → catalog}/fluidstack_catalog.py +6 -3
- sky/{clouds/service_catalog → catalog}/gcp_catalog.py +41 -15
- sky/catalog/hyperbolic_catalog.py +136 -0
- sky/{clouds/service_catalog → catalog}/ibm_catalog.py +9 -6
- sky/{clouds/service_catalog → catalog}/kubernetes_catalog.py +36 -24
- sky/{clouds/service_catalog → catalog}/lambda_catalog.py +9 -6
- sky/{clouds/service_catalog → catalog}/nebius_catalog.py +9 -7
- sky/{clouds/service_catalog → catalog}/oci_catalog.py +9 -6
- sky/{clouds/service_catalog → catalog}/paperspace_catalog.py +5 -2
- sky/catalog/primeintellect_catalog.py +95 -0
- sky/{clouds/service_catalog → catalog}/runpod_catalog.py +11 -4
- sky/{clouds/service_catalog → catalog}/scp_catalog.py +9 -6
- sky/catalog/seeweb_catalog.py +184 -0
- sky/catalog/shadeform_catalog.py +165 -0
- sky/catalog/ssh_catalog.py +167 -0
- sky/{clouds/service_catalog → catalog}/vast_catalog.py +6 -3
- sky/{clouds/service_catalog → catalog}/vsphere_catalog.py +5 -2
- sky/check.py +491 -203
- sky/cli.py +5 -6005
- sky/client/{cli.py → cli/command.py} +2477 -1885
- sky/client/cli/deprecation_utils.py +99 -0
- sky/client/cli/flags.py +359 -0
- sky/client/cli/table_utils.py +320 -0
- sky/client/common.py +70 -32
- sky/client/oauth.py +82 -0
- sky/client/sdk.py +1203 -297
- sky/client/sdk_async.py +833 -0
- sky/client/service_account_auth.py +47 -0
- sky/cloud_stores.py +73 -0
- sky/clouds/__init__.py +13 -0
- sky/clouds/aws.py +358 -93
- sky/clouds/azure.py +105 -83
- sky/clouds/cloud.py +127 -36
- sky/clouds/cudo.py +68 -50
- sky/clouds/do.py +66 -48
- sky/clouds/fluidstack.py +63 -44
- sky/clouds/gcp.py +339 -110
- sky/clouds/hyperbolic.py +293 -0
- sky/clouds/ibm.py +70 -49
- sky/clouds/kubernetes.py +563 -162
- sky/clouds/lambda_cloud.py +74 -54
- sky/clouds/nebius.py +206 -80
- sky/clouds/oci.py +88 -66
- sky/clouds/paperspace.py +61 -44
- sky/clouds/primeintellect.py +317 -0
- sky/clouds/runpod.py +164 -74
- sky/clouds/scp.py +89 -83
- sky/clouds/seeweb.py +466 -0
- sky/clouds/shadeform.py +400 -0
- sky/clouds/ssh.py +263 -0
- sky/clouds/utils/aws_utils.py +10 -4
- sky/clouds/utils/gcp_utils.py +87 -11
- sky/clouds/utils/oci_utils.py +38 -14
- sky/clouds/utils/scp_utils.py +177 -124
- sky/clouds/vast.py +99 -77
- sky/clouds/vsphere.py +51 -40
- sky/core.py +349 -139
- sky/dag.py +15 -0
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/_next/static/chunks/1141-e6aa9ab418717c59.js +11 -0
- sky/dashboard/out/_next/static/chunks/1272-1ef0bf0237faccdb.js +1 -0
- sky/dashboard/out/_next/static/chunks/1871-74503c8e80fd253b.js +6 -0
- sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +1 -0
- sky/dashboard/out/_next/static/chunks/2350.fab69e61bac57b23.js +1 -0
- sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +15 -0
- sky/dashboard/out/_next/static/chunks/2755.fff53c4a3fcae910.js +26 -0
- sky/dashboard/out/_next/static/chunks/3294.72362fa129305b19.js +1 -0
- sky/dashboard/out/_next/static/chunks/3785.ad6adaa2a0fa9768.js +1 -0
- sky/dashboard/out/_next/static/chunks/3850-ff4a9a69d978632b.js +1 -0
- sky/dashboard/out/_next/static/chunks/3937.210053269f121201.js +1 -0
- sky/dashboard/out/_next/static/chunks/4725.a830b5c9e7867c92.js +1 -0
- sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +15 -0
- sky/dashboard/out/_next/static/chunks/5739-d67458fcb1386c92.js +8 -0
- sky/dashboard/out/_next/static/chunks/6130-2be46d70a38f1e82.js +1 -0
- sky/dashboard/out/_next/static/chunks/616-3d59f75e2ccf9321.js +39 -0
- sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +13 -0
- sky/dashboard/out/_next/static/chunks/6601-06114c982db410b6.js +1 -0
- sky/dashboard/out/_next/static/chunks/6856-ef8ba11f96d8c4a3.js +1 -0
- sky/dashboard/out/_next/static/chunks/6989-01359c57e018caa4.js +1 -0
- sky/dashboard/out/_next/static/chunks/6990-32b6e2d3822301fa.js +1 -0
- sky/dashboard/out/_next/static/chunks/7359-c8d04e06886000b3.js +30 -0
- sky/dashboard/out/_next/static/chunks/7411-b15471acd2cba716.js +41 -0
- sky/dashboard/out/_next/static/chunks/7615-3301e838e5f25772.js +1 -0
- sky/dashboard/out/_next/static/chunks/8640.5b9475a2d18c5416.js +16 -0
- sky/dashboard/out/_next/static/chunks/8969-1e4613c651bf4051.js +1 -0
- sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +6 -0
- sky/dashboard/out/_next/static/chunks/9353-cff34f7e773b2e2b.js +1 -0
- sky/dashboard/out/_next/static/chunks/9360.7310982cf5a0dc79.js +31 -0
- sky/dashboard/out/_next/static/chunks/9847.3aaca6bb33455140.js +30 -0
- sky/dashboard/out/_next/static/chunks/fd9d1056-86323a29a8f7e46a.js +1 -0
- sky/dashboard/out/_next/static/chunks/framework-cf60a09ccd051a10.js +33 -0
- sky/dashboard/out/_next/static/chunks/main-app-587214043926b3cc.js +1 -0
- sky/dashboard/out/_next/static/chunks/main-f15ccb73239a3bf1.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/_app-bde01e4a2beec258.js +34 -0
- sky/dashboard/out/_next/static/chunks/pages/_error-c66a4e8afc46f17b.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-c736ead69c2d86ec.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-a37d2063af475a1c.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters-d44859594e6f8064.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/config-dfb9bf07b13045f4.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/index-444f1804401f04ea.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/infra/[context]-c0b5935149902e6f.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/infra-aed0ea19df7cf961.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-5796e8d6aea291a0.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-6edeb7d06032adfc.js +21 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs-479dde13399cf270.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/users-5ab3b907622cf0fe.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/volumes-b84b948ff357c43e.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspace/new-3f88a1c7e86a3f86.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-c5a3eeee1c218af1.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspaces-22b23febb3e89ce1.js +1 -0
- sky/dashboard/out/_next/static/chunks/webpack-2679be77fc08a2f8.js +1 -0
- sky/dashboard/out/_next/static/css/0748ce22df867032.css +3 -0
- sky/dashboard/out/_next/static/zB0ed6ge_W1MDszVHhijS/_buildManifest.js +1 -0
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/config.html +1 -0
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra/[context].html +1 -0
- sky/dashboard/out/infra.html +1 -0
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs/pools/[pool].html +1 -0
- sky/dashboard/out/jobs.html +1 -1
- sky/dashboard/out/users.html +1 -0
- sky/dashboard/out/volumes.html +1 -0
- sky/dashboard/out/workspace/new.html +1 -0
- sky/dashboard/out/workspaces/[name].html +1 -0
- sky/dashboard/out/workspaces.html +1 -0
- sky/data/data_utils.py +137 -1
- sky/data/mounting_utils.py +269 -84
- sky/data/storage.py +1451 -1807
- sky/data/storage_utils.py +43 -57
- sky/exceptions.py +132 -2
- sky/execution.py +206 -63
- sky/global_user_state.py +2374 -586
- sky/jobs/__init__.py +5 -0
- sky/jobs/client/sdk.py +242 -65
- sky/jobs/client/sdk_async.py +143 -0
- sky/jobs/constants.py +9 -8
- sky/jobs/controller.py +839 -277
- sky/jobs/file_content_utils.py +80 -0
- sky/jobs/log_gc.py +201 -0
- sky/jobs/recovery_strategy.py +398 -152
- sky/jobs/scheduler.py +315 -189
- sky/jobs/server/core.py +829 -255
- sky/jobs/server/server.py +156 -115
- sky/jobs/server/utils.py +136 -0
- sky/jobs/state.py +2092 -701
- sky/jobs/utils.py +1242 -160
- sky/logs/__init__.py +21 -0
- sky/logs/agent.py +108 -0
- sky/logs/aws.py +243 -0
- sky/logs/gcp.py +91 -0
- sky/metrics/__init__.py +0 -0
- sky/metrics/utils.py +443 -0
- sky/models.py +78 -1
- sky/optimizer.py +164 -70
- sky/provision/__init__.py +90 -4
- sky/provision/aws/config.py +147 -26
- sky/provision/aws/instance.py +135 -50
- sky/provision/azure/instance.py +10 -5
- sky/provision/common.py +13 -1
- sky/provision/cudo/cudo_machine_type.py +1 -1
- sky/provision/cudo/cudo_utils.py +14 -8
- sky/provision/cudo/cudo_wrapper.py +72 -71
- sky/provision/cudo/instance.py +10 -6
- sky/provision/do/instance.py +10 -6
- sky/provision/do/utils.py +4 -3
- sky/provision/docker_utils.py +114 -23
- sky/provision/fluidstack/instance.py +13 -8
- sky/provision/gcp/__init__.py +1 -0
- sky/provision/gcp/config.py +301 -19
- sky/provision/gcp/constants.py +218 -0
- sky/provision/gcp/instance.py +36 -8
- sky/provision/gcp/instance_utils.py +18 -4
- sky/provision/gcp/volume_utils.py +247 -0
- sky/provision/hyperbolic/__init__.py +12 -0
- sky/provision/hyperbolic/config.py +10 -0
- sky/provision/hyperbolic/instance.py +437 -0
- sky/provision/hyperbolic/utils.py +373 -0
- sky/provision/instance_setup.py +93 -14
- sky/provision/kubernetes/__init__.py +5 -0
- sky/provision/kubernetes/config.py +9 -52
- sky/provision/kubernetes/constants.py +17 -0
- sky/provision/kubernetes/instance.py +789 -247
- sky/provision/kubernetes/manifests/fusermount-server-daemonset.yaml +1 -2
- sky/provision/kubernetes/network.py +27 -17
- sky/provision/kubernetes/network_utils.py +40 -43
- sky/provision/kubernetes/utils.py +1192 -531
- sky/provision/kubernetes/volume.py +282 -0
- sky/provision/lambda_cloud/instance.py +22 -16
- sky/provision/nebius/constants.py +50 -0
- sky/provision/nebius/instance.py +19 -6
- sky/provision/nebius/utils.py +196 -91
- sky/provision/oci/instance.py +10 -5
- sky/provision/paperspace/instance.py +10 -7
- sky/provision/paperspace/utils.py +1 -1
- sky/provision/primeintellect/__init__.py +10 -0
- sky/provision/primeintellect/config.py +11 -0
- sky/provision/primeintellect/instance.py +454 -0
- sky/provision/primeintellect/utils.py +398 -0
- sky/provision/provisioner.py +110 -36
- sky/provision/runpod/__init__.py +5 -0
- sky/provision/runpod/instance.py +27 -6
- sky/provision/runpod/utils.py +51 -18
- sky/provision/runpod/volume.py +180 -0
- sky/provision/scp/__init__.py +15 -0
- sky/provision/scp/config.py +93 -0
- sky/provision/scp/instance.py +531 -0
- sky/provision/seeweb/__init__.py +11 -0
- sky/provision/seeweb/config.py +13 -0
- sky/provision/seeweb/instance.py +807 -0
- sky/provision/shadeform/__init__.py +11 -0
- sky/provision/shadeform/config.py +12 -0
- sky/provision/shadeform/instance.py +351 -0
- sky/provision/shadeform/shadeform_utils.py +83 -0
- sky/provision/ssh/__init__.py +18 -0
- sky/provision/vast/instance.py +13 -8
- sky/provision/vast/utils.py +10 -7
- sky/provision/vsphere/common/vim_utils.py +1 -2
- sky/provision/vsphere/instance.py +15 -10
- sky/provision/vsphere/vsphere_utils.py +9 -19
- sky/py.typed +0 -0
- sky/resources.py +844 -118
- sky/schemas/__init__.py +0 -0
- sky/schemas/api/__init__.py +0 -0
- sky/schemas/api/responses.py +225 -0
- sky/schemas/db/README +4 -0
- sky/schemas/db/env.py +90 -0
- sky/schemas/db/global_user_state/001_initial_schema.py +124 -0
- sky/schemas/db/global_user_state/002_add_workspace_to_cluster_history.py +35 -0
- sky/schemas/db/global_user_state/003_fix_initial_revision.py +61 -0
- sky/schemas/db/global_user_state/004_is_managed.py +34 -0
- sky/schemas/db/global_user_state/005_cluster_event.py +32 -0
- sky/schemas/db/global_user_state/006_provision_log.py +41 -0
- sky/schemas/db/global_user_state/007_cluster_event_request_id.py +34 -0
- sky/schemas/db/global_user_state/008_skylet_ssh_tunnel_metadata.py +34 -0
- sky/schemas/db/global_user_state/009_last_activity_and_launched_at.py +89 -0
- sky/schemas/db/global_user_state/010_save_ssh_key.py +66 -0
- sky/schemas/db/script.py.mako +28 -0
- sky/schemas/db/serve_state/001_initial_schema.py +67 -0
- sky/schemas/db/skypilot_config/001_initial_schema.py +30 -0
- sky/schemas/db/spot_jobs/001_initial_schema.py +97 -0
- sky/schemas/db/spot_jobs/002_cluster_pool.py +42 -0
- sky/schemas/db/spot_jobs/003_pool_hash.py +34 -0
- sky/schemas/db/spot_jobs/004_job_file_contents.py +42 -0
- sky/schemas/db/spot_jobs/005_logs_gc.py +38 -0
- sky/schemas/generated/__init__.py +0 -0
- sky/schemas/generated/autostopv1_pb2.py +36 -0
- sky/schemas/generated/autostopv1_pb2.pyi +43 -0
- sky/schemas/generated/autostopv1_pb2_grpc.py +146 -0
- sky/schemas/generated/jobsv1_pb2.py +86 -0
- sky/schemas/generated/jobsv1_pb2.pyi +254 -0
- sky/schemas/generated/jobsv1_pb2_grpc.py +542 -0
- sky/schemas/generated/managed_jobsv1_pb2.py +74 -0
- sky/schemas/generated/managed_jobsv1_pb2.pyi +278 -0
- sky/schemas/generated/managed_jobsv1_pb2_grpc.py +278 -0
- sky/schemas/generated/servev1_pb2.py +58 -0
- sky/schemas/generated/servev1_pb2.pyi +115 -0
- sky/schemas/generated/servev1_pb2_grpc.py +322 -0
- sky/serve/autoscalers.py +357 -5
- sky/serve/client/impl.py +310 -0
- sky/serve/client/sdk.py +47 -139
- sky/serve/client/sdk_async.py +130 -0
- sky/serve/constants.py +10 -8
- sky/serve/controller.py +64 -19
- sky/serve/load_balancer.py +106 -60
- sky/serve/load_balancing_policies.py +115 -1
- sky/serve/replica_managers.py +273 -162
- sky/serve/serve_rpc_utils.py +179 -0
- sky/serve/serve_state.py +554 -251
- sky/serve/serve_utils.py +733 -220
- sky/serve/server/core.py +66 -711
- sky/serve/server/impl.py +1093 -0
- sky/serve/server/server.py +21 -18
- sky/serve/service.py +133 -48
- sky/serve/service_spec.py +135 -16
- sky/serve/spot_placer.py +3 -0
- sky/server/auth/__init__.py +0 -0
- sky/server/auth/authn.py +50 -0
- sky/server/auth/loopback.py +38 -0
- sky/server/auth/oauth2_proxy.py +200 -0
- sky/server/common.py +475 -181
- sky/server/config.py +81 -23
- sky/server/constants.py +44 -6
- sky/server/daemons.py +229 -0
- sky/server/html/token_page.html +185 -0
- sky/server/metrics.py +160 -0
- sky/server/requests/executor.py +528 -138
- sky/server/requests/payloads.py +351 -17
- sky/server/requests/preconditions.py +21 -17
- sky/server/requests/process.py +112 -29
- sky/server/requests/request_names.py +120 -0
- sky/server/requests/requests.py +817 -224
- sky/server/requests/serializers/decoders.py +82 -31
- sky/server/requests/serializers/encoders.py +140 -22
- sky/server/requests/threads.py +106 -0
- sky/server/rest.py +417 -0
- sky/server/server.py +1290 -284
- sky/server/state.py +20 -0
- sky/server/stream_utils.py +345 -57
- sky/server/uvicorn.py +217 -3
- sky/server/versions.py +270 -0
- sky/setup_files/MANIFEST.in +5 -0
- sky/setup_files/alembic.ini +156 -0
- sky/setup_files/dependencies.py +136 -31
- sky/setup_files/setup.py +44 -42
- sky/sky_logging.py +102 -5
- sky/skylet/attempt_skylet.py +1 -0
- sky/skylet/autostop_lib.py +129 -8
- sky/skylet/configs.py +27 -20
- sky/skylet/constants.py +171 -19
- sky/skylet/events.py +105 -21
- sky/skylet/job_lib.py +335 -104
- sky/skylet/log_lib.py +297 -18
- sky/skylet/log_lib.pyi +44 -1
- sky/skylet/ray_patches/__init__.py +17 -3
- sky/skylet/ray_patches/autoscaler.py.diff +18 -0
- sky/skylet/ray_patches/cli.py.diff +19 -0
- sky/skylet/ray_patches/command_runner.py.diff +17 -0
- sky/skylet/ray_patches/log_monitor.py.diff +20 -0
- sky/skylet/ray_patches/resource_demand_scheduler.py.diff +32 -0
- sky/skylet/ray_patches/updater.py.diff +18 -0
- sky/skylet/ray_patches/worker.py.diff +41 -0
- sky/skylet/services.py +564 -0
- sky/skylet/skylet.py +63 -4
- sky/skylet/subprocess_daemon.py +103 -29
- sky/skypilot_config.py +506 -99
- sky/ssh_node_pools/__init__.py +1 -0
- sky/ssh_node_pools/core.py +135 -0
- sky/ssh_node_pools/server.py +233 -0
- sky/task.py +621 -137
- sky/templates/aws-ray.yml.j2 +10 -3
- sky/templates/azure-ray.yml.j2 +1 -1
- sky/templates/do-ray.yml.j2 +1 -1
- sky/templates/gcp-ray.yml.j2 +57 -0
- sky/templates/hyperbolic-ray.yml.j2 +67 -0
- sky/templates/jobs-controller.yaml.j2 +27 -24
- sky/templates/kubernetes-loadbalancer.yml.j2 +2 -0
- sky/templates/kubernetes-ray.yml.j2 +607 -51
- sky/templates/lambda-ray.yml.j2 +1 -1
- sky/templates/nebius-ray.yml.j2 +33 -12
- sky/templates/paperspace-ray.yml.j2 +1 -1
- sky/templates/primeintellect-ray.yml.j2 +71 -0
- sky/templates/runpod-ray.yml.j2 +9 -1
- sky/templates/scp-ray.yml.j2 +3 -50
- sky/templates/seeweb-ray.yml.j2 +108 -0
- sky/templates/shadeform-ray.yml.j2 +72 -0
- sky/templates/sky-serve-controller.yaml.j2 +22 -2
- sky/templates/websocket_proxy.py +178 -18
- sky/usage/usage_lib.py +18 -11
- sky/users/__init__.py +0 -0
- sky/users/model.conf +15 -0
- sky/users/permission.py +387 -0
- sky/users/rbac.py +121 -0
- sky/users/server.py +720 -0
- sky/users/token_service.py +218 -0
- sky/utils/accelerator_registry.py +34 -5
- sky/utils/admin_policy_utils.py +84 -38
- sky/utils/annotations.py +16 -5
- sky/utils/asyncio_utils.py +78 -0
- sky/utils/auth_utils.py +153 -0
- sky/utils/benchmark_utils.py +60 -0
- sky/utils/cli_utils/status_utils.py +159 -86
- sky/utils/cluster_utils.py +31 -9
- sky/utils/command_runner.py +354 -68
- sky/utils/command_runner.pyi +93 -3
- sky/utils/common.py +35 -8
- sky/utils/common_utils.py +310 -87
- sky/utils/config_utils.py +87 -5
- sky/utils/context.py +402 -0
- sky/utils/context_utils.py +222 -0
- sky/utils/controller_utils.py +264 -89
- sky/utils/dag_utils.py +31 -12
- sky/utils/db/__init__.py +0 -0
- sky/utils/db/db_utils.py +470 -0
- sky/utils/db/migration_utils.py +133 -0
- sky/utils/directory_utils.py +12 -0
- sky/utils/env_options.py +13 -0
- sky/utils/git.py +567 -0
- sky/utils/git_clone.sh +460 -0
- sky/utils/infra_utils.py +195 -0
- sky/utils/kubernetes/cleanup-tunnel.sh +62 -0
- sky/utils/kubernetes/config_map_utils.py +133 -0
- sky/utils/kubernetes/create_cluster.sh +13 -27
- sky/utils/kubernetes/delete_cluster.sh +10 -7
- sky/utils/kubernetes/deploy_remote_cluster.py +1299 -0
- sky/utils/kubernetes/exec_kubeconfig_converter.py +22 -31
- sky/utils/kubernetes/generate_kind_config.py +6 -66
- sky/utils/kubernetes/generate_kubeconfig.sh +4 -1
- sky/utils/kubernetes/gpu_labeler.py +5 -5
- sky/utils/kubernetes/kubernetes_deploy_utils.py +354 -47
- sky/utils/kubernetes/ssh-tunnel.sh +379 -0
- sky/utils/kubernetes/ssh_utils.py +221 -0
- sky/utils/kubernetes_enums.py +8 -15
- sky/utils/lock_events.py +94 -0
- sky/utils/locks.py +368 -0
- sky/utils/log_utils.py +300 -6
- sky/utils/perf_utils.py +22 -0
- sky/utils/resource_checker.py +298 -0
- sky/utils/resources_utils.py +249 -32
- sky/utils/rich_utils.py +213 -37
- sky/utils/schemas.py +905 -147
- sky/utils/serialize_utils.py +16 -0
- sky/utils/status_lib.py +10 -0
- sky/utils/subprocess_utils.py +38 -15
- sky/utils/tempstore.py +70 -0
- sky/utils/timeline.py +24 -52
- sky/utils/ux_utils.py +84 -15
- sky/utils/validator.py +11 -1
- sky/utils/volume.py +86 -0
- sky/utils/yaml_utils.py +111 -0
- sky/volumes/__init__.py +13 -0
- sky/volumes/client/__init__.py +0 -0
- sky/volumes/client/sdk.py +149 -0
- sky/volumes/server/__init__.py +0 -0
- sky/volumes/server/core.py +258 -0
- sky/volumes/server/server.py +122 -0
- sky/volumes/volume.py +212 -0
- sky/workspaces/__init__.py +0 -0
- sky/workspaces/core.py +655 -0
- sky/workspaces/server.py +101 -0
- sky/workspaces/utils.py +56 -0
- skypilot_nightly-1.0.0.dev20251107.dist-info/METADATA +675 -0
- skypilot_nightly-1.0.0.dev20251107.dist-info/RECORD +594 -0
- {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/WHEEL +1 -1
- sky/benchmark/benchmark_state.py +0 -256
- sky/benchmark/benchmark_utils.py +0 -641
- sky/clouds/service_catalog/constants.py +0 -7
- sky/dashboard/out/_next/static/LksQgChY5izXjokL3LcEu/_buildManifest.js +0 -1
- sky/dashboard/out/_next/static/chunks/236-f49500b82ad5392d.js +0 -6
- sky/dashboard/out/_next/static/chunks/312-c3c8845990db8ffc.js +0 -15
- sky/dashboard/out/_next/static/chunks/37-0a572fe0dbb89c4d.js +0 -6
- sky/dashboard/out/_next/static/chunks/678-206dddca808e6d16.js +0 -59
- sky/dashboard/out/_next/static/chunks/845-0f8017370869e269.js +0 -1
- sky/dashboard/out/_next/static/chunks/979-7bf73a4c7cea0f5c.js +0 -1
- sky/dashboard/out/_next/static/chunks/fd9d1056-2821b0f0cabcd8bd.js +0 -1
- sky/dashboard/out/_next/static/chunks/framework-87d061ee6ed71b28.js +0 -33
- sky/dashboard/out/_next/static/chunks/main-app-241eb28595532291.js +0 -1
- sky/dashboard/out/_next/static/chunks/main-e0e2335212e72357.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/_app-e6b013bc3f77ad60.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/_error-1be831200e60c5c0.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-e15db85d0ea1fbe1.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-f383db7389368ea7.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters-a93b93e10b8b074e.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/index-f9f039532ca8cbc4.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-03f279c6741fb48b.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs-a75029b67aab6a2e.js +0 -1
- sky/dashboard/out/_next/static/chunks/webpack-830f59b8404e96b8.js +0 -1
- sky/dashboard/out/_next/static/css/c6933bbb2ce7f4dd.css +0 -3
- sky/jobs/dashboard/dashboard.py +0 -223
- sky/jobs/dashboard/static/favicon.ico +0 -0
- sky/jobs/dashboard/templates/index.html +0 -831
- sky/jobs/server/dashboard_utils.py +0 -69
- sky/skylet/providers/scp/__init__.py +0 -2
- sky/skylet/providers/scp/config.py +0 -149
- sky/skylet/providers/scp/node_provider.py +0 -578
- sky/templates/kubernetes-ssh-jump.yml.j2 +0 -94
- sky/utils/db_utils.py +0 -100
- sky/utils/kubernetes/deploy_remote_cluster.sh +0 -308
- sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +0 -191
- skypilot_nightly-1.0.0.dev20250509.dist-info/METADATA +0 -361
- skypilot_nightly-1.0.0.dev20250509.dist-info/RECORD +0 -396
- /sky/{clouds/service_catalog → catalog}/config.py +0 -0
- /sky/{benchmark → catalog/data_fetchers}/__init__.py +0 -0
- /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_azure.py +0 -0
- /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_fluidstack.py +0 -0
- /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_ibm.py +0 -0
- /sky/{clouds/service_catalog/data_fetchers → client/cli}/__init__.py +0 -0
- /sky/dashboard/out/_next/static/{LksQgChY5izXjokL3LcEu → zB0ed6ge_W1MDszVHhijS}/_ssgManifest.js +0 -0
- {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/licenses/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/top_level.txt +0 -0
sky/serve/autoscalers.py
CHANGED
|
@@ -6,7 +6,7 @@ import enum
|
|
|
6
6
|
import math
|
|
7
7
|
import time
|
|
8
8
|
import typing
|
|
9
|
-
from typing import Any, Dict, Iterable, List, Optional, Union
|
|
9
|
+
from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
|
|
10
10
|
|
|
11
11
|
from sky import sky_logging
|
|
12
12
|
from sky.serve import constants
|
|
@@ -175,6 +175,14 @@ class Autoscaler:
|
|
|
175
175
|
"""Collect request information from aggregator for autoscaling."""
|
|
176
176
|
raise NotImplementedError
|
|
177
177
|
|
|
178
|
+
def info(self) -> Dict[str, Any]:
|
|
179
|
+
"""Get information about the autoscaler."""
|
|
180
|
+
return {
|
|
181
|
+
'target_num_replicas': self.target_num_replicas,
|
|
182
|
+
'min_replicas': self.min_replicas,
|
|
183
|
+
'max_replicas': self.max_replicas,
|
|
184
|
+
}
|
|
185
|
+
|
|
178
186
|
def _generate_scaling_decisions(
|
|
179
187
|
self,
|
|
180
188
|
replica_infos: List['replica_managers.ReplicaInfo'],
|
|
@@ -205,6 +213,10 @@ class Autoscaler:
|
|
|
205
213
|
# TODO(MaoZiming): use NAME to get the class.
|
|
206
214
|
if spec.use_ondemand_fallback:
|
|
207
215
|
return FallbackRequestRateAutoscaler(service_name, spec)
|
|
216
|
+
elif isinstance(spec.target_qps_per_replica, dict):
|
|
217
|
+
# Use instance-aware autoscaler
|
|
218
|
+
# when target_qps_per_replica is a dict
|
|
219
|
+
return InstanceAwareRequestRateAutoscaler(service_name, spec)
|
|
208
220
|
else:
|
|
209
221
|
return RequestRateAutoscaler(service_name, spec)
|
|
210
222
|
|
|
@@ -399,6 +411,8 @@ class _AutoscalerWithHysteresis(Autoscaler):
|
|
|
399
411
|
# `_set_target_num_replicas_with_hysteresis` to have the replicas
|
|
400
412
|
# quickly scale after each update.
|
|
401
413
|
self.target_num_replicas = self._calculate_target_num_replicas()
|
|
414
|
+
logger.debug(f'Target number of replicas: {self.target_num_replicas}'
|
|
415
|
+
'after update_version.')
|
|
402
416
|
# Cleanup hysteresis counters.
|
|
403
417
|
self.upscale_counter = 0
|
|
404
418
|
self.downscale_counter = 0
|
|
@@ -456,20 +470,28 @@ class RequestRateAutoscaler(_AutoscalerWithHysteresis):
|
|
|
456
470
|
request_timestamps: All request timestamps within the window.
|
|
457
471
|
"""
|
|
458
472
|
super().__init__(service_name, spec)
|
|
459
|
-
self.target_qps_per_replica: Optional[
|
|
460
|
-
float] = spec.target_qps_per_replica
|
|
473
|
+
self.target_qps_per_replica: Optional[Union[float, Dict[
|
|
474
|
+
str, float]]] = spec.target_qps_per_replica
|
|
461
475
|
self.qps_window_size: int = constants.AUTOSCALER_QPS_WINDOW_SIZE_SECONDS
|
|
462
476
|
self.request_timestamps: List[float] = []
|
|
463
477
|
|
|
464
478
|
def _calculate_target_num_replicas(self) -> int:
|
|
465
479
|
if self.target_qps_per_replica is None:
|
|
466
480
|
return self.min_replicas
|
|
481
|
+
|
|
482
|
+
# RequestRateAutoscaler should only handle float values
|
|
483
|
+
if isinstance(self.target_qps_per_replica, dict):
|
|
484
|
+
raise ValueError('RequestRateAutoscaler does not support dict '
|
|
485
|
+
'target_qps_per_replica. Should use '
|
|
486
|
+
'InstanceAwareRequestRateAutoscaler instead.')
|
|
487
|
+
|
|
467
488
|
num_requests_per_second = len(
|
|
468
489
|
self.request_timestamps) / self.qps_window_size
|
|
469
|
-
target_num_replicas =
|
|
470
|
-
|
|
490
|
+
target_num_replicas = \
|
|
491
|
+
math.ceil(num_requests_per_second / self.target_qps_per_replica)
|
|
471
492
|
logger.info(f'Requests per second: {num_requests_per_second}. '
|
|
472
493
|
f'Target number of replicas: {target_num_replicas}.')
|
|
494
|
+
|
|
473
495
|
return self._clip_target_num_replicas(target_num_replicas)
|
|
474
496
|
|
|
475
497
|
def update_version(self, version: int, spec: 'service_spec.SkyServiceSpec',
|
|
@@ -502,6 +524,7 @@ class RequestRateAutoscaler(_AutoscalerWithHysteresis):
|
|
|
502
524
|
) -> List[AutoscalerDecision]:
|
|
503
525
|
"""Generate Autoscaling decisions based on request rate."""
|
|
504
526
|
|
|
527
|
+
# Use standard hysteresis-based logic (non-instance-aware)
|
|
505
528
|
self._set_target_num_replicas_with_hysteresis()
|
|
506
529
|
|
|
507
530
|
latest_nonterminal_replicas: List['replica_managers.ReplicaInfo'] = []
|
|
@@ -530,6 +553,7 @@ class RequestRateAutoscaler(_AutoscalerWithHysteresis):
|
|
|
530
553
|
if len(latest_nonterminal_replicas) > target_num_replicas:
|
|
531
554
|
num_replicas_to_scale_down = (len(latest_nonterminal_replicas) -
|
|
532
555
|
target_num_replicas)
|
|
556
|
+
# Use standard downscaling logic
|
|
533
557
|
replicas_to_scale_down = (
|
|
534
558
|
_select_nonterminal_replicas_to_scale_down(
|
|
535
559
|
num_replicas_to_scale_down, latest_nonterminal_replicas))
|
|
@@ -554,6 +578,334 @@ class RequestRateAutoscaler(_AutoscalerWithHysteresis):
|
|
|
554
578
|
logger.info(f'Remaining dynamic states: {dynamic_states}')
|
|
555
579
|
|
|
556
580
|
|
|
581
|
+
class InstanceAwareRequestRateAutoscaler(RequestRateAutoscaler):
|
|
582
|
+
"""Instance-aware RequestRateAutoscaler:
|
|
583
|
+
Autoscale based on each replica's GPU-specific QPS.
|
|
584
|
+
|
|
585
|
+
This autoscaler considers different QPS targets for different GPU types
|
|
586
|
+
when target_qps_per_replica is provided as a dictionary mapping GPU types
|
|
587
|
+
to their respective QPS targets.
|
|
588
|
+
"""
|
|
589
|
+
|
|
590
|
+
def __init__(self, service_name: str,
|
|
591
|
+
spec: 'service_spec.SkyServiceSpec') -> None:
|
|
592
|
+
super().__init__(service_name, spec)
|
|
593
|
+
# Ensure target_qps_per_replica is a dict for instance-aware logic
|
|
594
|
+
assert isinstance(spec.target_qps_per_replica, dict), \
|
|
595
|
+
'InstanceAware Autoscaler requires dict type target_qps_per_replica'
|
|
596
|
+
# Re-assign with correct type using setattr to avoid typing issues
|
|
597
|
+
self.target_qps_per_replica = spec.target_qps_per_replica
|
|
598
|
+
|
|
599
|
+
def _generate_scaling_decisions(
|
|
600
|
+
self,
|
|
601
|
+
replica_infos: List['replica_managers.ReplicaInfo'],
|
|
602
|
+
) -> List[AutoscalerDecision]:
|
|
603
|
+
"""Generate autoscaling decisions with instance-aware logic."""
|
|
604
|
+
# Always use instance-aware logic
|
|
605
|
+
# since target_qps_per_replica is guaranteed to be dict
|
|
606
|
+
self._set_target_num_replicas_with_instance_aware_logic(replica_infos)
|
|
607
|
+
|
|
608
|
+
latest_nonterminal_replicas: List['replica_managers.ReplicaInfo'] = []
|
|
609
|
+
|
|
610
|
+
for info in replica_infos:
|
|
611
|
+
if not info.is_terminal and info.version == self.latest_version:
|
|
612
|
+
latest_nonterminal_replicas.append(info)
|
|
613
|
+
|
|
614
|
+
target_num_replicas = self.get_final_target_num_replicas()
|
|
615
|
+
current_num_replicas = len(latest_nonterminal_replicas)
|
|
616
|
+
|
|
617
|
+
scaling_decisions: List[AutoscalerDecision] = []
|
|
618
|
+
|
|
619
|
+
# Decide if to scale up or down.
|
|
620
|
+
if target_num_replicas > current_num_replicas:
|
|
621
|
+
for _ in range(target_num_replicas - current_num_replicas):
|
|
622
|
+
# No resources_override to use when scaling up
|
|
623
|
+
scaling_decisions.append(
|
|
624
|
+
AutoscalerDecision(AutoscalerDecisionOperator.SCALE_UP,
|
|
625
|
+
target=None))
|
|
626
|
+
elif target_num_replicas < current_num_replicas:
|
|
627
|
+
num_replicas_to_scale_down = \
|
|
628
|
+
current_num_replicas - target_num_replicas
|
|
629
|
+
|
|
630
|
+
# Use instance-aware scale down logic
|
|
631
|
+
replicas_to_scale_down = self._select_replicas_to_scale_down_by_qps(
|
|
632
|
+
num_replicas_to_scale_down, latest_nonterminal_replicas)
|
|
633
|
+
for replica_id in replicas_to_scale_down:
|
|
634
|
+
scaling_decisions.append(
|
|
635
|
+
AutoscalerDecision(AutoscalerDecisionOperator.SCALE_DOWN,
|
|
636
|
+
target=replica_id))
|
|
637
|
+
|
|
638
|
+
# Outdated replicas are handled by base class generate_scaling_decisions
|
|
639
|
+
# No need to handle them here
|
|
640
|
+
|
|
641
|
+
upscale_decisions = [
|
|
642
|
+
d for d in scaling_decisions
|
|
643
|
+
if d.operator == AutoscalerDecisionOperator.SCALE_UP
|
|
644
|
+
]
|
|
645
|
+
downscale_decisions = [
|
|
646
|
+
d for d in scaling_decisions
|
|
647
|
+
if d.operator == AutoscalerDecisionOperator.SCALE_DOWN
|
|
648
|
+
]
|
|
649
|
+
logger.info(f'Scaling decisions: '
|
|
650
|
+
f'{len(upscale_decisions)} scale up, '
|
|
651
|
+
f'{len(downscale_decisions)} scale down '
|
|
652
|
+
f'(latest nonterminal: {current_num_replicas}, '
|
|
653
|
+
f'target: {target_num_replicas})')
|
|
654
|
+
|
|
655
|
+
return scaling_decisions
|
|
656
|
+
|
|
657
|
+
def _set_target_num_replicas_with_instance_aware_logic(
|
|
658
|
+
self, replica_infos: List['replica_managers.ReplicaInfo']) -> None:
|
|
659
|
+
"""Set target_num_replicas using instance-aware logic."""
|
|
660
|
+
assert isinstance(self.target_qps_per_replica,
|
|
661
|
+
dict), 'Expected dict for instance-aware logic'
|
|
662
|
+
target_qps_dict = self.target_qps_per_replica
|
|
663
|
+
|
|
664
|
+
num_requests_per_second = len(
|
|
665
|
+
self.request_timestamps) / self.qps_window_size
|
|
666
|
+
|
|
667
|
+
total_qps = self._calculate_total_qps_from_replicas(replica_infos)
|
|
668
|
+
if total_qps > 0:
|
|
669
|
+
if num_requests_per_second >= total_qps:
|
|
670
|
+
# for upscaling, max_target_qps is the standard qps
|
|
671
|
+
max_target_qps = max(target_qps_dict.values())
|
|
672
|
+
over_request_num = num_requests_per_second - total_qps
|
|
673
|
+
current_num_replicas = len(replica_infos)
|
|
674
|
+
raw_target_num = current_num_replicas + math.ceil(
|
|
675
|
+
over_request_num / max_target_qps)
|
|
676
|
+
target_num_replicas = self._clip_target_num_replicas(
|
|
677
|
+
raw_target_num)
|
|
678
|
+
logger.info(
|
|
679
|
+
f'Instance-aware autoscaling: total QPS {total_qps}, '
|
|
680
|
+
f'num_requests_per_second: {num_requests_per_second}, '
|
|
681
|
+
f'upscaling, using maximum QPS {max_target_qps} '
|
|
682
|
+
f'from {target_qps_dict}, '
|
|
683
|
+
f'target replicas: {target_num_replicas}')
|
|
684
|
+
else:
|
|
685
|
+
# for downscaling, use qps for every ready_target_qps_list
|
|
686
|
+
# to calculate target_num_replicas
|
|
687
|
+
ready_target_qps_list = \
|
|
688
|
+
self._extract_target_qps_list_from_ready_replicas(
|
|
689
|
+
replica_infos)
|
|
690
|
+
ready_target_qps_list = sorted(ready_target_qps_list,
|
|
691
|
+
reverse=True)
|
|
692
|
+
if not ready_target_qps_list:
|
|
693
|
+
# Fallback to maximum QPS from config if no ready replicas
|
|
694
|
+
ready_target_qps_list = [max(target_qps_dict.values())]
|
|
695
|
+
|
|
696
|
+
raw_target_num = 0
|
|
697
|
+
qps_sum = 0.0
|
|
698
|
+
for qps in ready_target_qps_list:
|
|
699
|
+
raw_target_num += 1
|
|
700
|
+
qps_sum += qps
|
|
701
|
+
if qps_sum > num_requests_per_second:
|
|
702
|
+
break
|
|
703
|
+
|
|
704
|
+
target_num_replicas = self._clip_target_num_replicas(
|
|
705
|
+
raw_target_num)
|
|
706
|
+
logger.info(
|
|
707
|
+
f'Instance-aware autoscaling: total QPS {total_qps}, '
|
|
708
|
+
f'num_requests_per_second: {num_requests_per_second}, '
|
|
709
|
+
f'downscaling, using ready QPS list '
|
|
710
|
+
f'{ready_target_qps_list}, '
|
|
711
|
+
f'target replicas: {target_num_replicas}')
|
|
712
|
+
else:
|
|
713
|
+
# no replica is ready; use the normal min_replicas
|
|
714
|
+
target_num_replicas = self._clip_target_num_replicas(
|
|
715
|
+
self.min_replicas)
|
|
716
|
+
logger.info(f'Instance-aware autoscaling: no replica QPS available,'
|
|
717
|
+
f' target replicas: {target_num_replicas}')
|
|
718
|
+
|
|
719
|
+
# Apply hysteresis logic
|
|
720
|
+
old_target_num_replicas = self.target_num_replicas
|
|
721
|
+
|
|
722
|
+
# Faster scale up when there is no replica.
|
|
723
|
+
if self.target_num_replicas == 0:
|
|
724
|
+
self.target_num_replicas = target_num_replicas
|
|
725
|
+
elif target_num_replicas > self.target_num_replicas:
|
|
726
|
+
self.upscale_counter += 1
|
|
727
|
+
self.downscale_counter = 0
|
|
728
|
+
if self.upscale_counter >= self.scale_up_threshold:
|
|
729
|
+
self.upscale_counter = 0
|
|
730
|
+
self.target_num_replicas = target_num_replicas
|
|
731
|
+
elif target_num_replicas < self.target_num_replicas:
|
|
732
|
+
self.downscale_counter += 1
|
|
733
|
+
self.upscale_counter = 0
|
|
734
|
+
if self.downscale_counter >= self.scale_down_threshold:
|
|
735
|
+
self.downscale_counter = 0
|
|
736
|
+
self.target_num_replicas = target_num_replicas
|
|
737
|
+
else:
|
|
738
|
+
self.upscale_counter = self.downscale_counter = 0
|
|
739
|
+
|
|
740
|
+
logger.info(
|
|
741
|
+
f'Instance-aware: Old target number of replicas: '
|
|
742
|
+
f'{old_target_num_replicas}. '
|
|
743
|
+
f'Current target number of replicas: {target_num_replicas}. '
|
|
744
|
+
f'Final target number of replicas: {self.target_num_replicas}. '
|
|
745
|
+
f'Num overprovision: {self.num_overprovision}. '
|
|
746
|
+
f'Upscale counter: {self.upscale_counter}/'
|
|
747
|
+
f'{self.scale_up_threshold}. '
|
|
748
|
+
f'Downscale counter: {self.downscale_counter}/'
|
|
749
|
+
f'{self.scale_down_threshold}. ')
|
|
750
|
+
|
|
751
|
+
def _calculate_total_qps_from_replicas(
|
|
752
|
+
self, replica_infos: List['replica_managers.ReplicaInfo']) -> float:
|
|
753
|
+
"""Calculate total QPS based on current replica GPU types."""
|
|
754
|
+
total_qps = 0.0
|
|
755
|
+
logger.info(f'Calculating total QPS from {len(replica_infos)} replicas')
|
|
756
|
+
|
|
757
|
+
for replica_info in replica_infos:
|
|
758
|
+
# Skip non-valid replicas
|
|
759
|
+
valid_statuses = [
|
|
760
|
+
serve_state.ReplicaStatus.READY,
|
|
761
|
+
serve_state.ReplicaStatus.STARTING,
|
|
762
|
+
serve_state.ReplicaStatus.PROVISIONING
|
|
763
|
+
]
|
|
764
|
+
if replica_info.status not in valid_statuses:
|
|
765
|
+
logger.info(f'Skipping replica {replica_info.replica_id} '
|
|
766
|
+
f'with status: {replica_info.status}')
|
|
767
|
+
continue
|
|
768
|
+
|
|
769
|
+
gpu_type = self._get_gpu_type_from_replica_info(replica_info)
|
|
770
|
+
logger.info(f'Processing replica {replica_info.replica_id} '
|
|
771
|
+
f'with GPU type: {gpu_type}')
|
|
772
|
+
|
|
773
|
+
# Use flexible matching logic
|
|
774
|
+
qps_for_this_gpu = self._get_target_qps_for_gpu_type(gpu_type)
|
|
775
|
+
total_qps += qps_for_this_gpu
|
|
776
|
+
logger.info(f'GPU type {gpu_type} -> {qps_for_this_gpu} QPS')
|
|
777
|
+
|
|
778
|
+
logger.info(f'Calculated total QPS: {total_qps}')
|
|
779
|
+
return total_qps
|
|
780
|
+
|
|
781
|
+
def _get_target_qps_for_gpu_type(self, gpu_type: str) -> float:
|
|
782
|
+
"""Get target QPS for a specific GPU type with flexible matching."""
|
|
783
|
+
assert isinstance(self.target_qps_per_replica,
|
|
784
|
+
dict), 'Expected dict for instance-aware logic'
|
|
785
|
+
target_qps_dict = self.target_qps_per_replica
|
|
786
|
+
|
|
787
|
+
# Direct match first
|
|
788
|
+
if gpu_type in target_qps_dict:
|
|
789
|
+
return target_qps_dict[gpu_type]
|
|
790
|
+
|
|
791
|
+
# Try matching by base name (e.g., 'A100' matches 'A100:1')
|
|
792
|
+
for config_key in target_qps_dict.keys():
|
|
793
|
+
# Remove count suffix (e.g., 'A100:1' -> 'A100')
|
|
794
|
+
base_name = config_key.split(':')[0]
|
|
795
|
+
if gpu_type == base_name:
|
|
796
|
+
return target_qps_dict[config_key]
|
|
797
|
+
|
|
798
|
+
# Fallback to minimum QPS
|
|
799
|
+
logger.warning(f'No matching QPS found for GPU type: {gpu_type}. '
|
|
800
|
+
f'Available types: {list(target_qps_dict.keys())}. '
|
|
801
|
+
f'Using minimum QPS as fallback.')
|
|
802
|
+
return min(target_qps_dict.values())
|
|
803
|
+
|
|
804
|
+
def _get_gpu_type_from_replica_info(
|
|
805
|
+
self, replica_info: 'replica_managers.ReplicaInfo') -> str:
|
|
806
|
+
"""Extract GPU type from ReplicaInfo object."""
|
|
807
|
+
gpu_type = 'unknown'
|
|
808
|
+
handle = replica_info.handle()
|
|
809
|
+
if handle is not None:
|
|
810
|
+
accelerators = handle.launched_resources.accelerators
|
|
811
|
+
if accelerators and len(accelerators) > 0:
|
|
812
|
+
# Get the first accelerator type
|
|
813
|
+
gpu_type = list(accelerators.keys())[0]
|
|
814
|
+
return gpu_type
|
|
815
|
+
|
|
816
|
+
def _extract_target_qps_list_from_ready_replicas(
|
|
817
|
+
self,
|
|
818
|
+
replica_infos: List['replica_managers.ReplicaInfo']) -> List[float]:
|
|
819
|
+
"""Extract target QPS list from current READY replicas."""
|
|
820
|
+
ready_replica_qps = []
|
|
821
|
+
|
|
822
|
+
for replica_info in replica_infos:
|
|
823
|
+
# Check if replica is READY
|
|
824
|
+
if replica_info.status != serve_state.ReplicaStatus.READY:
|
|
825
|
+
logger.info(
|
|
826
|
+
f'Replica {replica_info.replica_id} '
|
|
827
|
+
f'not ready (status: {replica_info.status}), skipping')
|
|
828
|
+
continue
|
|
829
|
+
|
|
830
|
+
gpu_type = self._get_gpu_type_from_replica_info(replica_info)
|
|
831
|
+
|
|
832
|
+
# Use flexible matching logic
|
|
833
|
+
qps_for_this_gpu = self._get_target_qps_for_gpu_type(gpu_type)
|
|
834
|
+
ready_replica_qps.append(qps_for_this_gpu)
|
|
835
|
+
logger.info(f'Ready replica {replica_info.replica_id} '
|
|
836
|
+
f'with GPU {gpu_type}: {qps_for_this_gpu} QPS')
|
|
837
|
+
|
|
838
|
+
if ready_replica_qps:
|
|
839
|
+
logger.info(
|
|
840
|
+
f'Target QPS list from ready replicas: {ready_replica_qps}')
|
|
841
|
+
return ready_replica_qps
|
|
842
|
+
|
|
843
|
+
return []
|
|
844
|
+
|
|
845
|
+
def _select_replicas_to_scale_down_by_qps(
|
|
846
|
+
self, num_replicas_to_scale_down: int,
|
|
847
|
+
replica_infos: List['replica_managers.ReplicaInfo']) -> List[int]:
|
|
848
|
+
"""Select replicas to scale down (lowest QPS first)."""
|
|
849
|
+
# Create a list of (replica_info, target_qps) tuples
|
|
850
|
+
replica_qps_pairs: List[Tuple['replica_managers.ReplicaInfo',
|
|
851
|
+
float]] = []
|
|
852
|
+
|
|
853
|
+
for info in replica_infos:
|
|
854
|
+
# Include old-version replicas as well so they also get a target_qps
|
|
855
|
+
# assigned. Skip terminal replicas only.
|
|
856
|
+
if info.is_terminal:
|
|
857
|
+
continue
|
|
858
|
+
|
|
859
|
+
# Get GPU type directly from replica info
|
|
860
|
+
gpu_type = self._get_gpu_type_from_replica_info(info)
|
|
861
|
+
|
|
862
|
+
# Use flexible matching logic
|
|
863
|
+
target_qps = self._get_target_qps_for_gpu_type(gpu_type)
|
|
864
|
+
|
|
865
|
+
replica_qps_pairs.append((info, float(target_qps)))
|
|
866
|
+
logger.info(f'Replica {info.replica_id} '
|
|
867
|
+
f'with GPU {gpu_type}: {target_qps} QPS')
|
|
868
|
+
|
|
869
|
+
# Create a mapping from replica_id to target_qps for sorting
|
|
870
|
+
replica_qps_map = {
|
|
871
|
+
info.replica_id: target_qps
|
|
872
|
+
for info, target_qps in replica_qps_pairs
|
|
873
|
+
}
|
|
874
|
+
|
|
875
|
+
# Sort replicas by: 1. status order, 2. target_qps (asc),
|
|
876
|
+
# 3. version (asc), 4. replica_id (desc)
|
|
877
|
+
sorted_replicas = sorted(
|
|
878
|
+
replica_infos,
|
|
879
|
+
key=lambda info: (
|
|
880
|
+
info.status.scale_down_decision_order(),
|
|
881
|
+
replica_qps_map.get(info.replica_id, float('inf')),
|
|
882
|
+
info.version,
|
|
883
|
+
-info.replica_id,
|
|
884
|
+
))
|
|
885
|
+
|
|
886
|
+
selected_replica_ids = []
|
|
887
|
+
for info in sorted_replicas:
|
|
888
|
+
if info.is_terminal:
|
|
889
|
+
continue
|
|
890
|
+
selected_replica_ids.append(info.replica_id)
|
|
891
|
+
if len(selected_replica_ids) >= num_replicas_to_scale_down:
|
|
892
|
+
break
|
|
893
|
+
|
|
894
|
+
logger.info(
|
|
895
|
+
f'Selected {len(selected_replica_ids)} replicas to scale down: '
|
|
896
|
+
f'{selected_replica_ids}')
|
|
897
|
+
return selected_replica_ids
|
|
898
|
+
|
|
899
|
+
def update_version(self, version: int, spec: 'service_spec.SkyServiceSpec',
|
|
900
|
+
update_mode: serve_utils.UpdateMode) -> None:
|
|
901
|
+
super(RequestRateAutoscaler,
|
|
902
|
+
self).update_version(version, spec, update_mode)
|
|
903
|
+
# Ensure it's a dict and re-assign using setattr to avoid typing
|
|
904
|
+
assert isinstance(spec.target_qps_per_replica, dict), \
|
|
905
|
+
'InstanceAware Autoscaler requires dict type target_qps_per_replica'
|
|
906
|
+
self.target_qps_per_replica = spec.target_qps_per_replica
|
|
907
|
+
|
|
908
|
+
|
|
557
909
|
class FallbackRequestRateAutoscaler(RequestRateAutoscaler):
|
|
558
910
|
"""FallbackRequestRateAutoscaler
|
|
559
911
|
|