skypilot-nightly 1.0.0.dev2024053101__py3-none-any.whl → 1.0.0.dev2025022801__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +64 -32
- sky/adaptors/aws.py +23 -6
- sky/adaptors/azure.py +432 -15
- sky/adaptors/cloudflare.py +5 -5
- sky/adaptors/common.py +19 -9
- sky/adaptors/do.py +20 -0
- sky/adaptors/gcp.py +3 -2
- sky/adaptors/kubernetes.py +122 -88
- sky/adaptors/nebius.py +100 -0
- sky/adaptors/oci.py +39 -1
- sky/adaptors/vast.py +29 -0
- sky/admin_policy.py +101 -0
- sky/authentication.py +117 -98
- sky/backends/backend.py +52 -20
- sky/backends/backend_utils.py +669 -557
- sky/backends/cloud_vm_ray_backend.py +1099 -808
- sky/backends/local_docker_backend.py +14 -8
- sky/backends/wheel_utils.py +38 -20
- sky/benchmark/benchmark_utils.py +22 -23
- sky/check.py +76 -27
- sky/cli.py +1586 -1139
- sky/client/__init__.py +1 -0
- sky/client/cli.py +5683 -0
- sky/client/common.py +345 -0
- sky/client/sdk.py +1765 -0
- sky/cloud_stores.py +283 -19
- sky/clouds/__init__.py +7 -2
- sky/clouds/aws.py +303 -112
- sky/clouds/azure.py +185 -179
- sky/clouds/cloud.py +115 -37
- sky/clouds/cudo.py +29 -22
- sky/clouds/do.py +313 -0
- sky/clouds/fluidstack.py +44 -54
- sky/clouds/gcp.py +206 -65
- sky/clouds/ibm.py +26 -21
- sky/clouds/kubernetes.py +345 -91
- sky/clouds/lambda_cloud.py +40 -29
- sky/clouds/nebius.py +297 -0
- sky/clouds/oci.py +129 -90
- sky/clouds/paperspace.py +22 -18
- sky/clouds/runpod.py +53 -34
- sky/clouds/scp.py +28 -24
- sky/clouds/service_catalog/__init__.py +19 -13
- sky/clouds/service_catalog/aws_catalog.py +29 -12
- sky/clouds/service_catalog/azure_catalog.py +33 -6
- sky/clouds/service_catalog/common.py +95 -75
- sky/clouds/service_catalog/constants.py +3 -3
- sky/clouds/service_catalog/cudo_catalog.py +13 -3
- sky/clouds/service_catalog/data_fetchers/fetch_aws.py +36 -21
- sky/clouds/service_catalog/data_fetchers/fetch_azure.py +31 -4
- sky/clouds/service_catalog/data_fetchers/fetch_cudo.py +8 -117
- sky/clouds/service_catalog/data_fetchers/fetch_fluidstack.py +197 -44
- sky/clouds/service_catalog/data_fetchers/fetch_gcp.py +224 -36
- sky/clouds/service_catalog/data_fetchers/fetch_lambda_cloud.py +44 -24
- sky/clouds/service_catalog/data_fetchers/fetch_vast.py +147 -0
- sky/clouds/service_catalog/data_fetchers/fetch_vsphere.py +1 -1
- sky/clouds/service_catalog/do_catalog.py +111 -0
- sky/clouds/service_catalog/fluidstack_catalog.py +2 -2
- sky/clouds/service_catalog/gcp_catalog.py +16 -2
- sky/clouds/service_catalog/ibm_catalog.py +2 -2
- sky/clouds/service_catalog/kubernetes_catalog.py +192 -70
- sky/clouds/service_catalog/lambda_catalog.py +8 -3
- sky/clouds/service_catalog/nebius_catalog.py +116 -0
- sky/clouds/service_catalog/oci_catalog.py +31 -4
- sky/clouds/service_catalog/paperspace_catalog.py +2 -2
- sky/clouds/service_catalog/runpod_catalog.py +2 -2
- sky/clouds/service_catalog/scp_catalog.py +2 -2
- sky/clouds/service_catalog/vast_catalog.py +104 -0
- sky/clouds/service_catalog/vsphere_catalog.py +2 -2
- sky/clouds/utils/aws_utils.py +65 -0
- sky/clouds/utils/azure_utils.py +91 -0
- sky/clouds/utils/gcp_utils.py +5 -9
- sky/clouds/utils/oci_utils.py +47 -5
- sky/clouds/utils/scp_utils.py +4 -3
- sky/clouds/vast.py +280 -0
- sky/clouds/vsphere.py +22 -18
- sky/core.py +361 -107
- sky/dag.py +41 -28
- sky/data/data_transfer.py +37 -0
- sky/data/data_utils.py +211 -32
- sky/data/mounting_utils.py +182 -30
- sky/data/storage.py +2118 -270
- sky/data/storage_utils.py +126 -5
- sky/exceptions.py +179 -8
- sky/execution.py +158 -85
- sky/global_user_state.py +150 -34
- sky/jobs/__init__.py +12 -10
- sky/jobs/client/__init__.py +0 -0
- sky/jobs/client/sdk.py +302 -0
- sky/jobs/constants.py +49 -11
- sky/jobs/controller.py +161 -99
- sky/jobs/dashboard/dashboard.py +171 -25
- sky/jobs/dashboard/templates/index.html +572 -60
- sky/jobs/recovery_strategy.py +157 -156
- sky/jobs/scheduler.py +307 -0
- sky/jobs/server/__init__.py +1 -0
- sky/jobs/server/core.py +598 -0
- sky/jobs/server/dashboard_utils.py +69 -0
- sky/jobs/server/server.py +190 -0
- sky/jobs/state.py +627 -122
- sky/jobs/utils.py +615 -206
- sky/models.py +27 -0
- sky/optimizer.py +142 -83
- sky/provision/__init__.py +20 -5
- sky/provision/aws/config.py +124 -42
- sky/provision/aws/instance.py +130 -53
- sky/provision/azure/__init__.py +7 -0
- sky/{skylet/providers → provision}/azure/azure-config-template.json +19 -7
- sky/provision/azure/config.py +220 -0
- sky/provision/azure/instance.py +1012 -37
- sky/provision/common.py +31 -3
- sky/provision/constants.py +25 -0
- sky/provision/cudo/__init__.py +2 -1
- sky/provision/cudo/cudo_utils.py +112 -0
- sky/provision/cudo/cudo_wrapper.py +37 -16
- sky/provision/cudo/instance.py +28 -12
- sky/provision/do/__init__.py +11 -0
- sky/provision/do/config.py +14 -0
- sky/provision/do/constants.py +10 -0
- sky/provision/do/instance.py +287 -0
- sky/provision/do/utils.py +301 -0
- sky/provision/docker_utils.py +82 -46
- sky/provision/fluidstack/fluidstack_utils.py +57 -125
- sky/provision/fluidstack/instance.py +15 -43
- sky/provision/gcp/config.py +19 -9
- sky/provision/gcp/constants.py +7 -1
- sky/provision/gcp/instance.py +55 -34
- sky/provision/gcp/instance_utils.py +339 -80
- sky/provision/gcp/mig_utils.py +210 -0
- sky/provision/instance_setup.py +172 -133
- sky/provision/kubernetes/__init__.py +1 -0
- sky/provision/kubernetes/config.py +104 -90
- sky/provision/kubernetes/constants.py +8 -0
- sky/provision/kubernetes/instance.py +680 -325
- sky/provision/kubernetes/manifests/smarter-device-manager-daemonset.yaml +3 -0
- sky/provision/kubernetes/network.py +54 -20
- sky/provision/kubernetes/network_utils.py +70 -21
- sky/provision/kubernetes/utils.py +1370 -251
- sky/provision/lambda_cloud/__init__.py +11 -0
- sky/provision/lambda_cloud/config.py +10 -0
- sky/provision/lambda_cloud/instance.py +265 -0
- sky/{clouds/utils → provision/lambda_cloud}/lambda_utils.py +24 -23
- sky/provision/logging.py +1 -1
- sky/provision/nebius/__init__.py +11 -0
- sky/provision/nebius/config.py +11 -0
- sky/provision/nebius/instance.py +285 -0
- sky/provision/nebius/utils.py +318 -0
- sky/provision/oci/__init__.py +15 -0
- sky/provision/oci/config.py +51 -0
- sky/provision/oci/instance.py +436 -0
- sky/provision/oci/query_utils.py +681 -0
- sky/provision/paperspace/constants.py +6 -0
- sky/provision/paperspace/instance.py +4 -3
- sky/provision/paperspace/utils.py +2 -0
- sky/provision/provisioner.py +207 -130
- sky/provision/runpod/__init__.py +1 -0
- sky/provision/runpod/api/__init__.py +3 -0
- sky/provision/runpod/api/commands.py +119 -0
- sky/provision/runpod/api/pods.py +142 -0
- sky/provision/runpod/instance.py +64 -8
- sky/provision/runpod/utils.py +239 -23
- sky/provision/vast/__init__.py +10 -0
- sky/provision/vast/config.py +11 -0
- sky/provision/vast/instance.py +247 -0
- sky/provision/vast/utils.py +162 -0
- sky/provision/vsphere/common/vim_utils.py +1 -1
- sky/provision/vsphere/instance.py +8 -18
- sky/provision/vsphere/vsphere_utils.py +1 -1
- sky/resources.py +247 -102
- sky/serve/__init__.py +9 -9
- sky/serve/autoscalers.py +361 -299
- sky/serve/client/__init__.py +0 -0
- sky/serve/client/sdk.py +366 -0
- sky/serve/constants.py +12 -3
- sky/serve/controller.py +106 -36
- sky/serve/load_balancer.py +63 -12
- sky/serve/load_balancing_policies.py +84 -2
- sky/serve/replica_managers.py +42 -34
- sky/serve/serve_state.py +62 -32
- sky/serve/serve_utils.py +271 -160
- sky/serve/server/__init__.py +0 -0
- sky/serve/{core.py → server/core.py} +271 -90
- sky/serve/server/server.py +112 -0
- sky/serve/service.py +52 -16
- sky/serve/service_spec.py +95 -32
- sky/server/__init__.py +1 -0
- sky/server/common.py +430 -0
- sky/server/constants.py +21 -0
- sky/server/html/log.html +174 -0
- sky/server/requests/__init__.py +0 -0
- sky/server/requests/executor.py +472 -0
- sky/server/requests/payloads.py +487 -0
- sky/server/requests/queues/__init__.py +0 -0
- sky/server/requests/queues/mp_queue.py +76 -0
- sky/server/requests/requests.py +567 -0
- sky/server/requests/serializers/__init__.py +0 -0
- sky/server/requests/serializers/decoders.py +192 -0
- sky/server/requests/serializers/encoders.py +166 -0
- sky/server/server.py +1106 -0
- sky/server/stream_utils.py +141 -0
- sky/setup_files/MANIFEST.in +2 -5
- sky/setup_files/dependencies.py +159 -0
- sky/setup_files/setup.py +14 -125
- sky/sky_logging.py +59 -14
- sky/skylet/autostop_lib.py +2 -2
- sky/skylet/constants.py +183 -50
- sky/skylet/events.py +22 -10
- sky/skylet/job_lib.py +403 -258
- sky/skylet/log_lib.py +111 -71
- sky/skylet/log_lib.pyi +6 -0
- sky/skylet/providers/command_runner.py +6 -8
- sky/skylet/providers/ibm/node_provider.py +2 -2
- sky/skylet/providers/scp/config.py +11 -3
- sky/skylet/providers/scp/node_provider.py +8 -8
- sky/skylet/skylet.py +3 -1
- sky/skylet/subprocess_daemon.py +69 -17
- sky/skypilot_config.py +119 -57
- sky/task.py +205 -64
- sky/templates/aws-ray.yml.j2 +37 -7
- sky/templates/azure-ray.yml.j2 +27 -82
- sky/templates/cudo-ray.yml.j2 +7 -3
- sky/templates/do-ray.yml.j2 +98 -0
- sky/templates/fluidstack-ray.yml.j2 +7 -4
- sky/templates/gcp-ray.yml.j2 +26 -6
- sky/templates/ibm-ray.yml.j2 +3 -2
- sky/templates/jobs-controller.yaml.j2 +46 -11
- sky/templates/kubernetes-ingress.yml.j2 +7 -0
- sky/templates/kubernetes-loadbalancer.yml.j2 +7 -0
- sky/templates/{kubernetes-port-forward-proxy-command.sh.j2 → kubernetes-port-forward-proxy-command.sh} +51 -7
- sky/templates/kubernetes-ray.yml.j2 +292 -25
- sky/templates/lambda-ray.yml.j2 +30 -40
- sky/templates/nebius-ray.yml.j2 +79 -0
- sky/templates/oci-ray.yml.j2 +18 -57
- sky/templates/paperspace-ray.yml.j2 +10 -6
- sky/templates/runpod-ray.yml.j2 +26 -4
- sky/templates/scp-ray.yml.j2 +3 -2
- sky/templates/sky-serve-controller.yaml.j2 +12 -1
- sky/templates/skypilot-server-kubernetes-proxy.sh +36 -0
- sky/templates/vast-ray.yml.j2 +70 -0
- sky/templates/vsphere-ray.yml.j2 +8 -3
- sky/templates/websocket_proxy.py +64 -0
- sky/usage/constants.py +10 -1
- sky/usage/usage_lib.py +130 -37
- sky/utils/accelerator_registry.py +35 -51
- sky/utils/admin_policy_utils.py +147 -0
- sky/utils/annotations.py +51 -0
- sky/utils/cli_utils/status_utils.py +81 -23
- sky/utils/cluster_utils.py +356 -0
- sky/utils/command_runner.py +452 -89
- sky/utils/command_runner.pyi +77 -3
- sky/utils/common.py +54 -0
- sky/utils/common_utils.py +319 -108
- sky/utils/config_utils.py +204 -0
- sky/utils/control_master_utils.py +48 -0
- sky/utils/controller_utils.py +548 -266
- sky/utils/dag_utils.py +93 -32
- sky/utils/db_utils.py +18 -4
- sky/utils/env_options.py +29 -7
- sky/utils/kubernetes/create_cluster.sh +8 -60
- sky/utils/kubernetes/deploy_remote_cluster.sh +243 -0
- sky/utils/kubernetes/exec_kubeconfig_converter.py +73 -0
- sky/utils/kubernetes/generate_kubeconfig.sh +336 -0
- sky/utils/kubernetes/gpu_labeler.py +4 -4
- sky/utils/kubernetes/k8s_gpu_labeler_job.yaml +4 -3
- sky/utils/kubernetes/kubernetes_deploy_utils.py +228 -0
- sky/utils/kubernetes/rsync_helper.sh +24 -0
- sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +1 -1
- sky/utils/log_utils.py +240 -33
- sky/utils/message_utils.py +81 -0
- sky/utils/registry.py +127 -0
- sky/utils/resources_utils.py +94 -22
- sky/utils/rich_utils.py +247 -18
- sky/utils/schemas.py +284 -64
- sky/{status_lib.py → utils/status_lib.py} +12 -7
- sky/utils/subprocess_utils.py +212 -46
- sky/utils/timeline.py +12 -7
- sky/utils/ux_utils.py +168 -15
- skypilot_nightly-1.0.0.dev2025022801.dist-info/METADATA +363 -0
- skypilot_nightly-1.0.0.dev2025022801.dist-info/RECORD +352 -0
- {skypilot_nightly-1.0.0.dev2024053101.dist-info → skypilot_nightly-1.0.0.dev2025022801.dist-info}/WHEEL +1 -1
- sky/clouds/cloud_registry.py +0 -31
- sky/jobs/core.py +0 -330
- sky/skylet/providers/azure/__init__.py +0 -2
- sky/skylet/providers/azure/azure-vm-template.json +0 -301
- sky/skylet/providers/azure/config.py +0 -170
- sky/skylet/providers/azure/node_provider.py +0 -466
- sky/skylet/providers/lambda_cloud/__init__.py +0 -2
- sky/skylet/providers/lambda_cloud/node_provider.py +0 -320
- sky/skylet/providers/oci/__init__.py +0 -2
- sky/skylet/providers/oci/node_provider.py +0 -488
- sky/skylet/providers/oci/query_helper.py +0 -383
- sky/skylet/providers/oci/utils.py +0 -21
- sky/utils/cluster_yaml_utils.py +0 -24
- sky/utils/kubernetes/generate_static_kubeconfig.sh +0 -137
- skypilot_nightly-1.0.0.dev2024053101.dist-info/METADATA +0 -315
- skypilot_nightly-1.0.0.dev2024053101.dist-info/RECORD +0 -275
- {skypilot_nightly-1.0.0.dev2024053101.dist-info → skypilot_nightly-1.0.0.dev2025022801.dist-info}/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev2024053101.dist-info → skypilot_nightly-1.0.0.dev2025022801.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev2024053101.dist-info → skypilot_nightly-1.0.0.dev2025022801.dist-info}/top_level.txt +0 -0
sky/serve/autoscalers.py
CHANGED
@@ -42,11 +42,10 @@ class AutoscalerDecision:
|
|
42
42
|
# TODO(MaoZiming): Add a doc to elaborate on autoscaling policies.
|
43
43
|
def __init__(self, operator: AutoscalerDecisionOperator,
|
44
44
|
target: Union[Optional[Dict[str, Any]], int]):
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
isinstance(target, int))
|
45
|
+
if operator == AutoscalerDecisionOperator.SCALE_UP:
|
46
|
+
assert (target is None or isinstance(target, dict))
|
47
|
+
else:
|
48
|
+
assert isinstance(target, int)
|
50
49
|
self.operator = operator
|
51
50
|
self.target = target
|
52
51
|
|
@@ -54,9 +53,70 @@ class AutoscalerDecision:
|
|
54
53
|
return f'AutoscalerDecision({self.operator}, {self.target})'
|
55
54
|
|
56
55
|
|
56
|
+
def _generate_scale_up_decisions(
|
57
|
+
num: int, target: Optional[Dict[str, Any]]) -> List[AutoscalerDecision]:
|
58
|
+
return [
|
59
|
+
AutoscalerDecision(AutoscalerDecisionOperator.SCALE_UP, target)
|
60
|
+
for _ in range(num)
|
61
|
+
]
|
62
|
+
|
63
|
+
|
64
|
+
def _generate_scale_down_decisions(
|
65
|
+
replica_ids: List[int]) -> List[AutoscalerDecision]:
|
66
|
+
return [
|
67
|
+
AutoscalerDecision(AutoscalerDecisionOperator.SCALE_DOWN, replica_id)
|
68
|
+
for replica_id in replica_ids
|
69
|
+
]
|
70
|
+
|
71
|
+
|
72
|
+
def _select_nonterminal_replicas_to_scale_down(
|
73
|
+
num_replica_to_scale_down: int,
|
74
|
+
replica_infos: Iterable['replica_managers.ReplicaInfo'],
|
75
|
+
) -> List[int]:
|
76
|
+
"""Select nonterminal replicas to scale down.
|
77
|
+
|
78
|
+
We sort the replicas based on the following order:
|
79
|
+
1. Based on the `scale_down_decision_order` of the status. We terminate
|
80
|
+
the replicas that is in earlier stage first, as the replicas in
|
81
|
+
later stage may become ready soon.
|
82
|
+
2. Based on the version in ascending order, so we scale down the older
|
83
|
+
versions first.
|
84
|
+
3. Based on the replica_id in descending order, which is also the order
|
85
|
+
of the replicas being launched. We scale down the replicas that are
|
86
|
+
launched earlier first, as the replicas that are launched later may
|
87
|
+
become ready soon.
|
88
|
+
|
89
|
+
Args:
|
90
|
+
num_replica_to_scale_down: The number of replicas to scale down.
|
91
|
+
replica_infos: The list of replica informations to select from.
|
92
|
+
|
93
|
+
Returns:
|
94
|
+
The list of replica ids to scale down.
|
95
|
+
"""
|
96
|
+
replicas = list(replica_infos)
|
97
|
+
status_order = serve_state.ReplicaStatus.scale_down_decision_order()
|
98
|
+
assert all(info.status in status_order for info in replicas), (
|
99
|
+
'All replicas to scale down should be in provisioning or launched '
|
100
|
+
'status.', replicas)
|
101
|
+
replicas = sorted(
|
102
|
+
replicas,
|
103
|
+
key=lambda info: (
|
104
|
+
status_order.index(info.status),
|
105
|
+
# version in ascending order
|
106
|
+
info.version,
|
107
|
+
# replica_id in descending order, i.e. launched order
|
108
|
+
-info.replica_id))
|
109
|
+
assert len(replicas) >= num_replica_to_scale_down, (
|
110
|
+
'Not enough replicas to scale down. Available replicas: ',
|
111
|
+
f'{replicas}, num_replica_to_scale_down: {num_replica_to_scale_down}.')
|
112
|
+
return [info.replica_id for info in replicas][:num_replica_to_scale_down]
|
113
|
+
|
114
|
+
|
57
115
|
class Autoscaler:
|
58
116
|
"""Abstract class for autoscalers."""
|
59
117
|
|
118
|
+
# --------------- APIs to implement for custom autoscaler ---------------
|
119
|
+
|
60
120
|
def __init__(self, service_name: str,
|
61
121
|
spec: 'service_spec.SkyServiceSpec') -> None:
|
62
122
|
"""Initialize the autoscaler.
|
@@ -67,6 +127,8 @@ class Autoscaler:
|
|
67
127
|
number of replicas, i.e. min_replicas == max_replicas.
|
68
128
|
target_num_replicas: Target number of replicas output by autoscaler.
|
69
129
|
latest_version: latest version of the service.
|
130
|
+
latest_version_ever_ready: The latest version that is ever ready.
|
131
|
+
update_mode: Update mode for the service.
|
70
132
|
"""
|
71
133
|
self._service_name: str = service_name
|
72
134
|
self.min_replicas: int = spec.min_replicas
|
@@ -81,6 +143,10 @@ class Autoscaler:
|
|
81
143
|
self.latest_version_ever_ready: int = self.latest_version - 1
|
82
144
|
self.update_mode = serve_utils.DEFAULT_UPDATE_MODE
|
83
145
|
|
146
|
+
def _calculate_target_num_replicas(self) -> int:
|
147
|
+
"""Calculate target number of replicas."""
|
148
|
+
raise NotImplementedError
|
149
|
+
|
84
150
|
def update_version(self, version: int, spec: 'service_spec.SkyServiceSpec',
|
85
151
|
update_mode: serve_utils.UpdateMode) -> None:
|
86
152
|
if version <= self.latest_version:
|
@@ -91,9 +157,9 @@ class Autoscaler:
|
|
91
157
|
self.min_replicas = spec.min_replicas
|
92
158
|
self.max_replicas = (spec.max_replicas if spec.max_replicas is not None
|
93
159
|
else spec.min_replicas)
|
94
|
-
#
|
95
|
-
self.target_num_replicas =
|
96
|
-
self.
|
160
|
+
# Re-clip self.target_num_replicas with new min and max replicas.
|
161
|
+
self.target_num_replicas = self._clip_target_num_replicas(
|
162
|
+
self.target_num_replicas)
|
97
163
|
self.update_mode = update_mode
|
98
164
|
|
99
165
|
def collect_request_information(
|
@@ -101,223 +167,61 @@ class Autoscaler:
|
|
101
167
|
"""Collect request information from aggregator for autoscaling."""
|
102
168
|
raise NotImplementedError
|
103
169
|
|
104
|
-
def
|
170
|
+
def _generate_scaling_decisions(
|
105
171
|
self,
|
106
172
|
replica_infos: List['replica_managers.ReplicaInfo'],
|
107
173
|
) -> List[AutoscalerDecision]:
|
108
|
-
"""
|
174
|
+
"""Generate Autoscaling decisions based on replica information."""
|
109
175
|
raise NotImplementedError
|
110
176
|
|
111
|
-
@classmethod
|
112
|
-
def from_spec(cls, service_name: str,
|
113
|
-
spec: 'service_spec.SkyServiceSpec') -> 'Autoscaler':
|
114
|
-
# TODO(MaoZiming): use NAME to get the class.
|
115
|
-
if spec.use_ondemand_fallback:
|
116
|
-
return FallbackRequestRateAutoscaler(service_name, spec)
|
117
|
-
else:
|
118
|
-
return RequestRateAutoscaler(service_name, spec)
|
119
|
-
|
120
177
|
def _dump_dynamic_states(self) -> Dict[str, Any]:
|
121
178
|
"""Dump dynamic states from autoscaler."""
|
122
179
|
raise NotImplementedError
|
123
180
|
|
124
|
-
def dump_dynamic_states(self) -> Dict[str, Any]:
|
125
|
-
"""Dump dynamic states from autoscaler."""
|
126
|
-
states = {'latest_version_ever_ready': self.latest_version_ever_ready}
|
127
|
-
states.update(self._dump_dynamic_states())
|
128
|
-
return states
|
129
|
-
|
130
181
|
def _load_dynamic_states(self, dynamic_states: Dict[str, Any]) -> None:
|
131
182
|
"""Load dynamic states to autoscaler."""
|
132
183
|
raise NotImplementedError
|
133
184
|
|
134
|
-
|
135
|
-
"""Load dynamic states to autoscaler."""
|
136
|
-
self.latest_version_ever_ready = dynamic_states.pop(
|
137
|
-
'latest_version_ever_ready', constants.INITIAL_VERSION)
|
138
|
-
self._load_dynamic_states(dynamic_states)
|
139
|
-
|
140
|
-
|
141
|
-
class RequestRateAutoscaler(Autoscaler):
|
142
|
-
"""RequestRateAutoscaler: Autoscale according to request rate.
|
143
|
-
|
144
|
-
Scales when the number of requests per replica in the given interval
|
145
|
-
is above or below the target qps per replica. The instance can be
|
146
|
-
either spot or on-demand, but not both.
|
147
|
-
"""
|
148
|
-
|
149
|
-
def __init__(self, service_name: str,
|
150
|
-
spec: 'service_spec.SkyServiceSpec') -> None:
|
151
|
-
"""Initialize the request rate autoscaler.
|
185
|
+
# --------------- Utility Functions ---------------
|
152
186
|
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
request_timestamps: All request timestamps within the window.
|
157
|
-
upscale_counter: counter for upscale number of replicas.
|
158
|
-
downscale_counter: counter for downscale number of replicas.
|
159
|
-
scale_up_consecutive_periods: period for scaling up.
|
160
|
-
scale_down_consecutive_periods: period for scaling down.
|
187
|
+
def _clip_target_num_replicas(self, target_num_replicas: int) -> int:
|
188
|
+
"""Clip target number of replicas with current minimal and maximum
|
189
|
+
number of replicas.
|
161
190
|
"""
|
162
|
-
super().__init__(service_name, spec)
|
163
|
-
self.target_qps_per_replica: Optional[
|
164
|
-
float] = spec.target_qps_per_replica
|
165
|
-
self.qps_window_size: int = constants.AUTOSCALER_QPS_WINDOW_SIZE_SECONDS
|
166
|
-
self.request_timestamps: List[float] = []
|
167
|
-
self.upscale_counter: int = 0
|
168
|
-
self.downscale_counter: int = 0
|
169
|
-
upscale_delay_seconds = (
|
170
|
-
spec.upscale_delay_seconds if spec.upscale_delay_seconds is not None
|
171
|
-
else constants.AUTOSCALER_DEFAULT_UPSCALE_DELAY_SECONDS)
|
172
|
-
self.scale_up_consecutive_periods: int = int(
|
173
|
-
upscale_delay_seconds /
|
174
|
-
constants.AUTOSCALER_DEFAULT_DECISION_INTERVAL_SECONDS)
|
175
|
-
downscale_delay_seconds = (
|
176
|
-
spec.downscale_delay_seconds
|
177
|
-
if spec.downscale_delay_seconds is not None else
|
178
|
-
constants.AUTOSCALER_DEFAULT_DOWNSCALE_DELAY_SECONDS)
|
179
|
-
self.scale_down_consecutive_periods: int = int(
|
180
|
-
downscale_delay_seconds /
|
181
|
-
constants.AUTOSCALER_DEFAULT_DECISION_INTERVAL_SECONDS)
|
182
|
-
|
183
|
-
def _cal_target_num_replicas_based_on_qps(self) -> int:
|
184
|
-
# Recalculate target_num_replicas based on QPS.
|
185
|
-
# Reclip self.target_num_replicas with new min and max replicas.
|
186
|
-
if self.target_qps_per_replica is None:
|
187
|
-
return self.min_replicas
|
188
|
-
target_num_replicas = math.ceil(
|
189
|
-
len(self.request_timestamps) / self.qps_window_size /
|
190
|
-
self.target_qps_per_replica)
|
191
191
|
return max(self.min_replicas, min(self.max_replicas,
|
192
192
|
target_num_replicas))
|
193
193
|
|
194
|
-
def update_version(self, version: int, spec: 'service_spec.SkyServiceSpec',
|
195
|
-
update_mode: serve_utils.UpdateMode) -> None:
|
196
|
-
super().update_version(version, spec, update_mode)
|
197
|
-
self.target_qps_per_replica = spec.target_qps_per_replica
|
198
|
-
upscale_delay_seconds = (
|
199
|
-
spec.upscale_delay_seconds if spec.upscale_delay_seconds is not None
|
200
|
-
else constants.AUTOSCALER_DEFAULT_UPSCALE_DELAY_SECONDS)
|
201
|
-
self.scale_up_consecutive_periods = int(
|
202
|
-
upscale_delay_seconds /
|
203
|
-
constants.AUTOSCALER_DEFAULT_DECISION_INTERVAL_SECONDS)
|
204
|
-
downscale_delay_seconds = (
|
205
|
-
spec.downscale_delay_seconds
|
206
|
-
if spec.downscale_delay_seconds is not None else
|
207
|
-
constants.AUTOSCALER_DEFAULT_DOWNSCALE_DELAY_SECONDS)
|
208
|
-
self.scale_down_consecutive_periods = int(
|
209
|
-
downscale_delay_seconds /
|
210
|
-
constants.AUTOSCALER_DEFAULT_DECISION_INTERVAL_SECONDS)
|
211
|
-
|
212
|
-
# We directly set the target_num_replicas here instead of
|
213
|
-
# calling `_set_target_num_replica_with_hysteresis` to have the replicas
|
214
|
-
# quickly scale after each update.
|
215
|
-
self.target_num_replicas = self._cal_target_num_replicas_based_on_qps()
|
216
|
-
# Cleanup hysteretic counters.
|
217
|
-
self.upscale_counter = 0
|
218
|
-
self.downscale_counter = 0
|
219
|
-
|
220
|
-
def collect_request_information(
|
221
|
-
self, request_aggregator_info: Dict[str, Any]) -> None:
|
222
|
-
"""Collect request information from aggregator for autoscaling.
|
223
|
-
|
224
|
-
request_aggregator_info should be a dict with the following format:
|
225
|
-
|
226
|
-
{
|
227
|
-
'timestamps': [timestamp1 (float), timestamp2 (float), ...]
|
228
|
-
}
|
229
|
-
"""
|
230
|
-
self.request_timestamps.extend(
|
231
|
-
request_aggregator_info.get('timestamps', []))
|
232
|
-
current_time = time.time()
|
233
|
-
index = bisect.bisect_left(self.request_timestamps,
|
234
|
-
current_time - self.qps_window_size)
|
235
|
-
self.request_timestamps = self.request_timestamps[index:]
|
236
|
-
logger.info(f'Num of requests in the last {self.qps_window_size} '
|
237
|
-
f'seconds: {len(self.request_timestamps)}')
|
238
|
-
|
239
|
-
def _set_target_num_replica_with_hysteresis(self) -> None:
|
240
|
-
"""Set target_num_replicas based on request rate with hysteresis."""
|
241
|
-
# Keep self.target_num_replicas unchange when autoscaling
|
242
|
-
# is not enabled, i.e. self.target_qps_per_replica is None.
|
243
|
-
# In this case, self.target_num_replicas will be min_replicas.
|
244
|
-
if self.target_qps_per_replica is None:
|
245
|
-
return
|
246
|
-
|
247
|
-
# Convert to requests per second.
|
248
|
-
target_num_replicas = self._cal_target_num_replicas_based_on_qps()
|
249
|
-
old_target_num_replicas = self.target_num_replicas
|
250
|
-
|
251
|
-
# Faster scale up when there is no replica.
|
252
|
-
if self.target_num_replicas == 0:
|
253
|
-
self.target_num_replicas = target_num_replicas
|
254
|
-
elif target_num_replicas > self.target_num_replicas:
|
255
|
-
self.upscale_counter += 1
|
256
|
-
self.downscale_counter = 0
|
257
|
-
if self.upscale_counter >= self.scale_up_consecutive_periods:
|
258
|
-
self.upscale_counter = 0
|
259
|
-
self.target_num_replicas = target_num_replicas
|
260
|
-
elif target_num_replicas < self.target_num_replicas:
|
261
|
-
self.downscale_counter += 1
|
262
|
-
self.upscale_counter = 0
|
263
|
-
if self.downscale_counter >= self.scale_down_consecutive_periods:
|
264
|
-
self.downscale_counter = 0
|
265
|
-
self.target_num_replicas = target_num_replicas
|
266
|
-
else:
|
267
|
-
self.upscale_counter = self.downscale_counter = 0
|
268
|
-
|
269
|
-
num_requests_per_second = len(
|
270
|
-
self.request_timestamps) / self.qps_window_size
|
271
|
-
logger.info(
|
272
|
-
f'Requests per second: {num_requests_per_second}. '
|
273
|
-
f'Current target number of replicas: {old_target_num_replicas}. '
|
274
|
-
f'Final target number of replicas: {self.target_num_replicas}. '
|
275
|
-
f'Upscale counter: {self.upscale_counter}/'
|
276
|
-
f'{self.scale_up_consecutive_periods}. '
|
277
|
-
f'Downscale counter: {self.downscale_counter}/'
|
278
|
-
f'{self.scale_down_consecutive_periods}')
|
279
|
-
|
280
194
|
@classmethod
|
281
|
-
def
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
'All replicas to scale down should be in provisioning or launched '
|
289
|
-
'status.', replicas)
|
290
|
-
replicas = sorted(
|
291
|
-
replicas,
|
292
|
-
key=lambda info: (
|
293
|
-
status_order.index(info.status),
|
294
|
-
# Sort by version in ascending order, so we scale down the older
|
295
|
-
# versions first.
|
296
|
-
info.version,
|
297
|
-
# Sort `info.replica_id` in descending order so that the
|
298
|
-
# replicas in the same version starts to provisioning later are
|
299
|
-
# scaled down first.
|
300
|
-
-info.replica_id))
|
301
|
-
assert len(replicas) >= num_limit, (
|
302
|
-
'Not enough replicas to scale down.', replicas, num_limit)
|
303
|
-
return [info.replica_id for info in replicas][:num_limit]
|
195
|
+
def from_spec(cls, service_name: str,
|
196
|
+
spec: 'service_spec.SkyServiceSpec') -> 'Autoscaler':
|
197
|
+
# TODO(MaoZiming): use NAME to get the class.
|
198
|
+
if spec.use_ondemand_fallback:
|
199
|
+
return FallbackRequestRateAutoscaler(service_name, spec)
|
200
|
+
else:
|
201
|
+
return RequestRateAutoscaler(service_name, spec)
|
304
202
|
|
305
203
|
def get_decision_interval(self) -> int:
|
306
|
-
|
307
|
-
|
204
|
+
"""Get the decision interval for the autoscaler.
|
205
|
+
|
206
|
+
We reduce the decision interval when the desired number of replicas is
|
207
|
+
0, to make the service scale faster when the service is not running.
|
208
|
+
This will happen when min_replicas = 0 and no traffic.
|
209
|
+
"""
|
308
210
|
if self.target_num_replicas == 0:
|
309
211
|
return constants.AUTOSCALER_NO_REPLICA_DECISION_INTERVAL_SECONDS
|
310
212
|
else:
|
311
213
|
return constants.AUTOSCALER_DEFAULT_DECISION_INTERVAL_SECONDS
|
312
214
|
|
313
|
-
def
|
314
|
-
|
315
|
-
|
215
|
+
def _select_outdated_replicas_to_scale_down(
|
216
|
+
self,
|
217
|
+
replica_infos: List['replica_managers.ReplicaInfo'],
|
218
|
+
active_versions: List[int],
|
219
|
+
) -> List[int]:
|
316
220
|
"""Select outdated replicas to scale down."""
|
317
221
|
|
318
222
|
if self.update_mode == serve_utils.UpdateMode.ROLLING:
|
319
|
-
latest_ready_replicas = []
|
320
|
-
old_nonterminal_replicas = []
|
223
|
+
latest_ready_replicas: List['replica_managers.ReplicaInfo'] = []
|
224
|
+
old_nonterminal_replicas: List['replica_managers.ReplicaInfo'] = []
|
321
225
|
for info in replica_infos:
|
322
226
|
if info.version == self.latest_version:
|
323
227
|
if info.is_ready:
|
@@ -346,19 +250,12 @@ class RequestRateAutoscaler(Autoscaler):
|
|
346
250
|
# `_select_replicas_to_scale_down` will make sure we scale the
|
347
251
|
# replicas in initializing statuses first before scaling down the
|
348
252
|
# READY old replicas.
|
349
|
-
return
|
253
|
+
return _select_nonterminal_replicas_to_scale_down(
|
350
254
|
max(0,
|
351
255
|
len(old_nonterminal_replicas) - num_old_replicas_to_keep),
|
352
256
|
old_nonterminal_replicas,
|
353
257
|
)
|
354
258
|
|
355
|
-
# Use the active versions set by replica manager to make sure we only
|
356
|
-
# scale down the outdated replicas that are not used by the load
|
357
|
-
# balancer.
|
358
|
-
record = serve_state.get_service_from_name(self._service_name)
|
359
|
-
assert record is not None, (f'No service record found for '
|
360
|
-
f'{self._service_name}')
|
361
|
-
active_versions = record['active_versions']
|
362
259
|
if not active_versions:
|
363
260
|
# active_versions can be empty when none of the replicas are ready
|
364
261
|
# when the load balancer sync with the controller.
|
@@ -372,36 +269,35 @@ class RequestRateAutoscaler(Autoscaler):
|
|
372
269
|
# number of ready new replicas is greater than or equal to the min
|
373
270
|
# replicas instead of the target, to ensure the service being updated
|
374
271
|
# to the latest version faster.
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
return all_replica_ids_to_scale_down
|
272
|
+
return [
|
273
|
+
info.replica_id
|
274
|
+
for info in replica_infos
|
275
|
+
if info.version < latest_version_with_min_replicas
|
276
|
+
]
|
381
277
|
|
382
|
-
def
|
278
|
+
def generate_scaling_decisions(
|
383
279
|
self,
|
384
280
|
replica_infos: List['replica_managers.ReplicaInfo'],
|
281
|
+
active_versions: List[int],
|
385
282
|
) -> List[AutoscalerDecision]:
|
386
|
-
"""
|
387
|
-
If the number of launched replicas is less than the target,
|
388
|
-
|
283
|
+
"""Generate Autoscaling decisions based on replica information.
|
284
|
+
If the number of launched replicas is less than the target, trigger a
|
285
|
+
scale up. Else, trigger a scale down. This function also handles the
|
286
|
+
version control of the replicas.
|
389
287
|
|
390
288
|
For future compatibility, we return a list of AutoscalerDecision.
|
391
289
|
Scale-up could include both spot and on-demand, each with a resource
|
392
290
|
override dict. Active migration could require returning both SCALE_UP
|
393
291
|
and SCALE_DOWN.
|
394
292
|
"""
|
395
|
-
latest_replicas: List['replica_managers.ReplicaInfo'] = []
|
396
|
-
latest_nonterminal_replicas: List['replica_managers.ReplicaInfo'] = []
|
397
293
|
|
294
|
+
# Handle latest version unrecoverable failure first.
|
295
|
+
latest_replicas: List['replica_managers.ReplicaInfo'] = []
|
398
296
|
for info in replica_infos:
|
399
297
|
if info.version == self.latest_version:
|
400
298
|
latest_replicas.append(info)
|
401
|
-
if
|
402
|
-
|
403
|
-
if info.is_ready:
|
404
|
-
self.latest_version_ever_ready = self.latest_version
|
299
|
+
if info.is_ready:
|
300
|
+
self.latest_version_ever_ready = self.latest_version
|
405
301
|
if self.latest_version_ever_ready < self.latest_version:
|
406
302
|
for info in latest_replicas:
|
407
303
|
if info.status_property.unrecoverable_failure():
|
@@ -411,55 +307,229 @@ class RequestRateAutoscaler(Autoscaler):
|
|
411
307
|
# and restart.
|
412
308
|
return []
|
413
309
|
|
414
|
-
|
310
|
+
scaling_decisions = []
|
415
311
|
|
416
|
-
|
417
|
-
|
312
|
+
# If rolling update is in progress, we scale down old replicas based on
|
313
|
+
# the number of ready new replicas and the traffic is directed to both
|
314
|
+
# old and new replicas. Or, for blue_green update, once there is
|
315
|
+
# min_replicas number of ready new replicas, we will direct all traffic
|
316
|
+
# to them, we can scale down all old replicas.
|
317
|
+
# TODO(MaoZiming,zhwu): corner case: We should make sure the fallback
|
318
|
+
# replicas are ready before scaling down the old replicas to avoid the
|
319
|
+
# situation that all the ready new replicas are preempted together.
|
320
|
+
scaling_decisions.extend(
|
321
|
+
_generate_scale_down_decisions(
|
322
|
+
self._select_outdated_replicas_to_scale_down(
|
323
|
+
replica_infos, active_versions)))
|
324
|
+
|
325
|
+
# If the latest version is ever ready, we can proceed to generate
|
326
|
+
# decisions from the implementations in subclasses.
|
327
|
+
scaling_decisions.extend(
|
328
|
+
self._generate_scaling_decisions(replica_infos))
|
418
329
|
|
419
|
-
|
420
|
-
|
421
|
-
# to both old and new replicas.
|
422
|
-
# Or, for blue_green update, once there is min_replicas number of ready
|
423
|
-
# new replicas, we will direct all traffic to them, we can scale down
|
424
|
-
# all old replicas.
|
425
|
-
all_replica_ids_to_scale_down.extend(
|
426
|
-
self.select_outdated_replicas_to_scale_down(replica_infos))
|
330
|
+
if not scaling_decisions:
|
331
|
+
logger.info('No scaling needed.')
|
427
332
|
|
428
|
-
|
333
|
+
return scaling_decisions
|
334
|
+
|
335
|
+
def dump_dynamic_states(self) -> Dict[str, Any]:
|
336
|
+
"""Dump dynamic states from autoscaler."""
|
337
|
+
states = {'latest_version_ever_ready': self.latest_version_ever_ready}
|
338
|
+
states.update(self._dump_dynamic_states())
|
339
|
+
return states
|
340
|
+
|
341
|
+
def load_dynamic_states(self, dynamic_states: Dict[str, Any]) -> None:
|
342
|
+
"""Load dynamic states to autoscaler."""
|
343
|
+
self.latest_version_ever_ready = dynamic_states.pop(
|
344
|
+
'latest_version_ever_ready', constants.INITIAL_VERSION)
|
345
|
+
self._load_dynamic_states(dynamic_states)
|
346
|
+
|
347
|
+
|
348
|
+
class _AutoscalerWithHysteresis(Autoscaler):
|
349
|
+
"""_AutoscalerWithHysteresis: Autoscale with hysteresis.
|
350
|
+
|
351
|
+
This is an internal class for developing autoscalers with hysteresis. It
|
352
|
+
only scales when the number of replicas is above or below the target number
|
353
|
+
of replicas for a certain number of consecutive periods.
|
354
|
+
"""
|
355
|
+
|
356
|
+
def _setup_thresholds(self, spec: 'service_spec.SkyServiceSpec') -> None:
|
357
|
+
upscale_delay_seconds = (
|
358
|
+
spec.upscale_delay_seconds if spec.upscale_delay_seconds is not None
|
359
|
+
else constants.AUTOSCALER_DEFAULT_UPSCALE_DELAY_SECONDS)
|
360
|
+
self.scale_up_threshold: int = int(
|
361
|
+
upscale_delay_seconds /
|
362
|
+
constants.AUTOSCALER_DEFAULT_DECISION_INTERVAL_SECONDS)
|
363
|
+
downscale_delay_seconds = (
|
364
|
+
spec.downscale_delay_seconds
|
365
|
+
if spec.downscale_delay_seconds is not None else
|
366
|
+
constants.AUTOSCALER_DEFAULT_DOWNSCALE_DELAY_SECONDS)
|
367
|
+
self.scale_down_threshold: int = int(
|
368
|
+
downscale_delay_seconds /
|
369
|
+
constants.AUTOSCALER_DEFAULT_DECISION_INTERVAL_SECONDS)
|
370
|
+
|
371
|
+
def __init__(self, service_name: str,
|
372
|
+
spec: 'service_spec.SkyServiceSpec') -> None:
|
373
|
+
"""Initialize the hysteresis autoscaler.
|
374
|
+
|
375
|
+
Variables:
|
376
|
+
upscale_counter: Counter for upscale decisions of replicas.
|
377
|
+
downscale_counter: Counter for downscale decisions of replicas.
|
378
|
+
scale_up_threshold: The threshold to trigger a scale up.
|
379
|
+
scale_down_threshold: The threshold to trigger a scale down.
|
380
|
+
"""
|
381
|
+
super().__init__(service_name, spec)
|
382
|
+
self.upscale_counter: int = 0
|
383
|
+
self.downscale_counter: int = 0
|
384
|
+
self._setup_thresholds(spec)
|
385
|
+
|
386
|
+
def update_version(self, version: int, spec: 'service_spec.SkyServiceSpec',
|
387
|
+
update_mode: serve_utils.UpdateMode) -> None:
|
388
|
+
super().update_version(version, spec, update_mode)
|
389
|
+
# We directly set the target_num_replicas here instead of calling
|
390
|
+
# `_set_target_num_replicas_with_hysteresis` to have the replicas
|
391
|
+
# quickly scale after each update.
|
392
|
+
self.target_num_replicas = self._calculate_target_num_replicas()
|
393
|
+
# Cleanup hysteresis counters.
|
394
|
+
self.upscale_counter = 0
|
395
|
+
self.downscale_counter = 0
|
396
|
+
self._setup_thresholds(spec)
|
397
|
+
|
398
|
+
def _set_target_num_replicas_with_hysteresis(self) -> None:
|
399
|
+
"""Set target_num_replicas based on request rate with hysteresis."""
|
400
|
+
target_num_replicas = self._calculate_target_num_replicas()
|
401
|
+
old_target_num_replicas = self.target_num_replicas
|
402
|
+
|
403
|
+
# Faster scale up when there is no replica.
|
404
|
+
if self.target_num_replicas == 0:
|
405
|
+
self.target_num_replicas = target_num_replicas
|
406
|
+
elif target_num_replicas > self.target_num_replicas:
|
407
|
+
self.upscale_counter += 1
|
408
|
+
self.downscale_counter = 0
|
409
|
+
if self.upscale_counter >= self.scale_up_threshold:
|
410
|
+
self.upscale_counter = 0
|
411
|
+
self.target_num_replicas = target_num_replicas
|
412
|
+
elif target_num_replicas < self.target_num_replicas:
|
413
|
+
self.downscale_counter += 1
|
414
|
+
self.upscale_counter = 0
|
415
|
+
if self.downscale_counter >= self.scale_down_threshold:
|
416
|
+
self.downscale_counter = 0
|
417
|
+
self.target_num_replicas = target_num_replicas
|
418
|
+
else:
|
419
|
+
self.upscale_counter = self.downscale_counter = 0
|
420
|
+
|
421
|
+
logger.info(
|
422
|
+
f'Old target number of replicas: {old_target_num_replicas}. '
|
423
|
+
f'Current target number of replicas: {target_num_replicas}. '
|
424
|
+
f'Final target number of replicas: {self.target_num_replicas}. '
|
425
|
+
f'Upscale counter: {self.upscale_counter}/'
|
426
|
+
f'{self.scale_up_threshold}. '
|
427
|
+
f'Downscale counter: {self.downscale_counter}/'
|
428
|
+
f'{self.scale_down_threshold}. ')
|
429
|
+
|
430
|
+
|
431
|
+
class RequestRateAutoscaler(_AutoscalerWithHysteresis):
|
432
|
+
"""RequestRateAutoscaler: Autoscale according to request rate.
|
433
|
+
|
434
|
+
Scales when the number of requests per replica in the given interval
|
435
|
+
is above or below the target qps per replica. The instance can be
|
436
|
+
either spot or on-demand, but not both.
|
437
|
+
"""
|
438
|
+
|
439
|
+
def __init__(self, service_name: str,
|
440
|
+
spec: 'service_spec.SkyServiceSpec') -> None:
|
441
|
+
"""Initialize the request rate autoscaler.
|
442
|
+
|
443
|
+
Variables:
|
444
|
+
target_qps_per_replica: Target qps per replica for autoscaling.
|
445
|
+
qps_window_size: Window size for qps calculating.
|
446
|
+
request_timestamps: All request timestamps within the window.
|
447
|
+
"""
|
448
|
+
super().__init__(service_name, spec)
|
449
|
+
self.target_qps_per_replica: Optional[
|
450
|
+
float] = spec.target_qps_per_replica
|
451
|
+
self.qps_window_size: int = constants.AUTOSCALER_QPS_WINDOW_SIZE_SECONDS
|
452
|
+
self.request_timestamps: List[float] = []
|
453
|
+
|
454
|
+
def _calculate_target_num_replicas(self) -> int:
|
455
|
+
if self.target_qps_per_replica is None:
|
456
|
+
return self.min_replicas
|
457
|
+
num_requests_per_second = len(
|
458
|
+
self.request_timestamps) / self.qps_window_size
|
459
|
+
target_num_replicas = math.ceil(num_requests_per_second /
|
460
|
+
self.target_qps_per_replica)
|
461
|
+
logger.info(f'Requests per second: {num_requests_per_second}. '
|
462
|
+
f'Target number of replicas: {target_num_replicas}.')
|
463
|
+
return self._clip_target_num_replicas(target_num_replicas)
|
464
|
+
|
465
|
+
def update_version(self, version: int, spec: 'service_spec.SkyServiceSpec',
|
466
|
+
update_mode: serve_utils.UpdateMode) -> None:
|
467
|
+
super().update_version(version, spec, update_mode)
|
468
|
+
self.target_qps_per_replica = spec.target_qps_per_replica
|
469
|
+
|
470
|
+
def collect_request_information(
|
471
|
+
self, request_aggregator_info: Dict[str, Any]) -> None:
|
472
|
+
"""Collect request information from aggregator for autoscaling.
|
473
|
+
|
474
|
+
request_aggregator_info should be a dict with the following format:
|
475
|
+
|
476
|
+
{
|
477
|
+
'timestamps': [timestamp1 (float), timestamp2 (float), ...]
|
478
|
+
}
|
479
|
+
"""
|
480
|
+
self.request_timestamps.extend(
|
481
|
+
request_aggregator_info.get('timestamps', []))
|
482
|
+
current_time = time.time()
|
483
|
+
index = bisect.bisect_left(self.request_timestamps,
|
484
|
+
current_time - self.qps_window_size)
|
485
|
+
self.request_timestamps = self.request_timestamps[index:]
|
486
|
+
logger.info(f'Num of requests in the last {self.qps_window_size} '
|
487
|
+
f'seconds: {len(self.request_timestamps)}')
|
488
|
+
|
489
|
+
def _generate_scaling_decisions(
|
490
|
+
self,
|
491
|
+
replica_infos: List['replica_managers.ReplicaInfo'],
|
492
|
+
) -> List[AutoscalerDecision]:
|
493
|
+
"""Generate Autoscaling decisions based on request rate."""
|
494
|
+
|
495
|
+
self._set_target_num_replicas_with_hysteresis()
|
496
|
+
|
497
|
+
latest_nonterminal_replicas: List['replica_managers.ReplicaInfo'] = []
|
498
|
+
|
499
|
+
for info in replica_infos:
|
500
|
+
if info.version == self.latest_version:
|
501
|
+
if not info.is_terminal:
|
502
|
+
latest_nonterminal_replicas.append(info)
|
503
|
+
|
504
|
+
scaling_decisions: List[AutoscalerDecision] = []
|
505
|
+
|
506
|
+
# Case 1. when latest_nonterminal_replicas is less
|
429
507
|
# than num_to_provision, we always scale up new replicas.
|
430
508
|
if len(latest_nonterminal_replicas) < self.target_num_replicas:
|
431
509
|
num_replicas_to_scale_up = (self.target_num_replicas -
|
432
510
|
len(latest_nonterminal_replicas))
|
433
511
|
logger.info('Number of replicas to scale up: '
|
434
512
|
f'{num_replicas_to_scale_up}')
|
435
|
-
|
436
|
-
|
437
|
-
AutoscalerDecision(AutoscalerDecisionOperator.SCALE_UP,
|
438
|
-
target=None))
|
513
|
+
scaling_decisions.extend(
|
514
|
+
_generate_scale_up_decisions(num_replicas_to_scale_up, None))
|
439
515
|
|
440
|
-
# Case
|
516
|
+
# Case 2: when latest_nonterminal_replicas is more
|
441
517
|
# than self.target_num_replicas, we scale down new replicas.
|
518
|
+
replicas_to_scale_down = []
|
442
519
|
if len(latest_nonterminal_replicas) > self.target_num_replicas:
|
443
520
|
num_replicas_to_scale_down = (len(latest_nonterminal_replicas) -
|
444
521
|
self.target_num_replicas)
|
445
522
|
replicas_to_scale_down = (
|
446
|
-
RequestRateAutoscaler.
|
447
523
|
_select_nonterminal_replicas_to_scale_down(
|
448
|
-
|
449
|
-
replica_infos=latest_nonterminal_replicas))
|
524
|
+
num_replicas_to_scale_down, latest_nonterminal_replicas))
|
450
525
|
logger.info(
|
451
526
|
'Number of replicas to scale down: '
|
452
527
|
f'{num_replicas_to_scale_down} {replicas_to_scale_down}')
|
453
|
-
all_replica_ids_to_scale_down.extend(replicas_to_scale_down)
|
454
528
|
|
455
|
-
|
456
|
-
|
457
|
-
AutoscalerDecision(AutoscalerDecisionOperator.SCALE_DOWN,
|
458
|
-
target=replica_id))
|
529
|
+
scaling_decisions.extend(
|
530
|
+
_generate_scale_down_decisions(replicas_to_scale_down))
|
459
531
|
|
460
|
-
|
461
|
-
logger.info('No scaling needed.')
|
462
|
-
return scaling_options
|
532
|
+
return scaling_decisions
|
463
533
|
|
464
534
|
def _dump_dynamic_states(self) -> Dict[str, Any]:
|
465
535
|
return {
|
@@ -481,16 +551,19 @@ class FallbackRequestRateAutoscaler(RequestRateAutoscaler):
|
|
481
551
|
|
482
552
|
When spec.base_ondemand_fallback_replicas is set, we make sure
|
483
553
|
there are at least spec.base_ondemand_fallback_replicas on-demands
|
484
|
-
to be always there to provide basic
|
554
|
+
to be always there to provide basic guarantee for the availability.
|
485
555
|
|
486
556
|
When spec.dynamic_ondemand_fallback is set, on-demand instances
|
487
557
|
will be scheduled to provision for any preempted spot instance, i.e.,
|
488
558
|
on-demand instance are used as dynamic fallback of spot.
|
489
559
|
"""
|
490
560
|
|
491
|
-
|
492
|
-
|
493
|
-
|
561
|
+
# job_recovery field is checked earlier in core
|
562
|
+
SPOT_OVERRIDE = {'use_spot': True}
|
563
|
+
ONDEMAND_OVERRIDE = {'use_spot': False}
|
564
|
+
|
565
|
+
def _setup_fallback_options(self,
|
566
|
+
spec: 'service_spec.SkyServiceSpec') -> None:
|
494
567
|
self.base_ondemand_fallback_replicas: int = (
|
495
568
|
spec.base_ondemand_fallback_replicas
|
496
569
|
if spec.base_ondemand_fallback_replicas is not None else 0)
|
@@ -501,37 +574,42 @@ class FallbackRequestRateAutoscaler(RequestRateAutoscaler):
|
|
501
574
|
spec.dynamic_ondemand_fallback
|
502
575
|
if spec.dynamic_ondemand_fallback is not None else False)
|
503
576
|
|
577
|
+
def __init__(self, service_name: str,
|
578
|
+
spec: 'service_spec.SkyServiceSpec') -> None:
|
579
|
+
"""Initialize the fallback request rate autoscaler.
|
580
|
+
|
581
|
+
Variables:
|
582
|
+
base_ondemand_fallback_replicas: Minimum number of on-demand
|
583
|
+
replicas to be always there.
|
584
|
+
dynamic_ondemand_fallback: Whether to dynamically provision
|
585
|
+
on-demand instances for preempted spot instances.
|
586
|
+
"""
|
587
|
+
super().__init__(service_name, spec)
|
588
|
+
self._setup_fallback_options(spec)
|
589
|
+
|
504
590
|
def update_version(self, version: int, spec: 'service_spec.SkyServiceSpec',
|
505
591
|
update_mode: serve_utils.UpdateMode) -> None:
|
506
592
|
super().update_version(version, spec, update_mode=update_mode)
|
507
|
-
self.
|
508
|
-
spec.base_ondemand_fallback_replicas
|
509
|
-
if spec.base_ondemand_fallback_replicas is not None else 0)
|
510
|
-
# Assert: Either dynamic_ondemand_fallback is set
|
511
|
-
# or base_ondemand_fallback_replicas is greater than 0.
|
512
|
-
assert spec.use_ondemand_fallback
|
513
|
-
self.dynamic_ondemand_fallback = (spec.dynamic_ondemand_fallback
|
514
|
-
if spec.dynamic_ondemand_fallback
|
515
|
-
is not None else False)
|
593
|
+
self._setup_fallback_options(spec)
|
516
594
|
|
517
|
-
|
518
|
-
def _get_spot_resources_override_dict(self) -> Dict[str, Any]:
|
519
|
-
return {'use_spot': True}
|
520
|
-
|
521
|
-
def _get_ondemand_resources_override_dict(self) -> Dict[str, Any]:
|
522
|
-
return {'use_spot': False}
|
523
|
-
|
524
|
-
def evaluate_scaling(
|
595
|
+
def _generate_scaling_decisions(
|
525
596
|
self,
|
526
597
|
replica_infos: List['replica_managers.ReplicaInfo'],
|
527
598
|
) -> List[AutoscalerDecision]:
|
599
|
+
"""Generate Autoscaling decisions based on request rate, with on-demand
|
600
|
+
fallback.
|
601
|
+
|
602
|
+
The autoscaler will make sure there are at least
|
603
|
+
`base_ondemand_fallback_replicas` on-demand replicas to be always there,
|
604
|
+
so the service can provide basic guarantee for the availability.
|
605
|
+
"""
|
606
|
+
|
607
|
+
self._set_target_num_replicas_with_hysteresis()
|
528
608
|
|
529
609
|
latest_nonterminal_replicas = list(
|
530
610
|
filter(
|
531
611
|
lambda info: not info.is_terminal and info.version == self.
|
532
612
|
latest_version, replica_infos))
|
533
|
-
|
534
|
-
self._set_target_num_replica_with_hysteresis()
|
535
613
|
num_nonterminal_spot, num_ready_spot = 0, 0
|
536
614
|
num_nonterminal_ondemand, num_ready_ondemand = 0, 0
|
537
615
|
|
@@ -546,22 +624,14 @@ class FallbackRequestRateAutoscaler(RequestRateAutoscaler):
|
|
546
624
|
num_nonterminal_ondemand += 1
|
547
625
|
|
548
626
|
logger.info(
|
549
|
-
'Number of alive spot instances: '
|
550
|
-
f'{num_nonterminal_spot}, '
|
627
|
+
f'Number of alive spot instances: {num_nonterminal_spot}, '
|
551
628
|
f'Number of ready spot instances: {num_ready_spot}, '
|
552
|
-
'Number of alive on-demand instances: '
|
553
|
-
f' {num_nonterminal_ondemand}, '
|
629
|
+
f'Number of alive on-demand instances: {num_nonterminal_ondemand}, '
|
554
630
|
f'Number of ready on-demand instances: {num_ready_ondemand}')
|
555
631
|
|
556
|
-
|
632
|
+
scaling_decisions: List[AutoscalerDecision] = []
|
557
633
|
all_replica_ids_to_scale_down: List[int] = []
|
558
634
|
|
559
|
-
# TODO(MaoZiming,zhwu): coner case: We should make sure the fallback
|
560
|
-
# replicas are ready before scaling down the old replicas to avoid the
|
561
|
-
# situation that all the ready new replicas are preempted together.
|
562
|
-
all_replica_ids_to_scale_down.extend(
|
563
|
-
self.select_outdated_replicas_to_scale_down(replica_infos))
|
564
|
-
|
565
635
|
# Decide how many spot instances to launch.
|
566
636
|
num_spot_to_provision = (self.target_num_replicas -
|
567
637
|
self.base_ondemand_fallback_replicas)
|
@@ -571,18 +641,15 @@ class FallbackRequestRateAutoscaler(RequestRateAutoscaler):
|
|
571
641
|
num_nonterminal_spot)
|
572
642
|
logger.info('Number of spot instances to scale up: '
|
573
643
|
f'{num_spot_to_scale_up}')
|
574
|
-
|
575
|
-
|
576
|
-
|
577
|
-
AutoscalerDecisionOperator.SCALE_UP,
|
578
|
-
target=self._get_spot_resources_override_dict()))
|
644
|
+
scaling_decisions.extend(
|
645
|
+
_generate_scale_up_decisions(num_spot_to_scale_up,
|
646
|
+
self.SPOT_OVERRIDE))
|
579
647
|
elif num_nonterminal_spot > num_spot_to_provision:
|
580
648
|
# Too many spot instances, scale down.
|
581
649
|
# Get the replica to scale down with _select_replicas_to_scale_down
|
582
650
|
num_spot_to_scale_down = (num_nonterminal_spot -
|
583
651
|
num_spot_to_provision)
|
584
652
|
replicas_to_scale_down = (
|
585
|
-
RequestRateAutoscaler.
|
586
653
|
_select_nonterminal_replicas_to_scale_down(
|
587
654
|
num_spot_to_scale_down,
|
588
655
|
filter(lambda info: info.is_spot,
|
@@ -606,16 +673,13 @@ class FallbackRequestRateAutoscaler(RequestRateAutoscaler):
|
|
606
673
|
num_nonterminal_ondemand)
|
607
674
|
logger.info('Number of on-demand instances to scale up: '
|
608
675
|
f'{num_ondemand_to_scale_up}')
|
609
|
-
|
610
|
-
|
611
|
-
|
612
|
-
AutoscalerDecisionOperator.SCALE_UP,
|
613
|
-
target=self._get_ondemand_resources_override_dict()))
|
676
|
+
scaling_decisions.extend(
|
677
|
+
_generate_scale_up_decisions(num_ondemand_to_scale_up,
|
678
|
+
self.ONDEMAND_OVERRIDE))
|
614
679
|
else:
|
615
680
|
num_ondemand_to_scale_down = (num_nonterminal_ondemand -
|
616
681
|
num_ondemand_to_provision)
|
617
682
|
replicas_to_scale_down = (
|
618
|
-
RequestRateAutoscaler.
|
619
683
|
_select_nonterminal_replicas_to_scale_down(
|
620
684
|
num_ondemand_to_scale_down,
|
621
685
|
filter(lambda info: not info.is_spot,
|
@@ -626,9 +690,7 @@ class FallbackRequestRateAutoscaler(RequestRateAutoscaler):
|
|
626
690
|
|
627
691
|
all_replica_ids_to_scale_down.extend(replicas_to_scale_down)
|
628
692
|
|
629
|
-
|
630
|
-
|
631
|
-
AutoscalerDecision(AutoscalerDecisionOperator.SCALE_DOWN,
|
632
|
-
target=replica_id))
|
693
|
+
scaling_decisions.extend(
|
694
|
+
_generate_scale_down_decisions(all_replica_ids_to_scale_down))
|
633
695
|
|
634
|
-
return
|
696
|
+
return scaling_decisions
|