skypilot-nightly 1.0.0.dev20250114__py3-none-any.whl → 1.0.0.dev20250124__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +2 -2
- sky/backends/cloud_vm_ray_backend.py +50 -67
- sky/check.py +31 -1
- sky/cli.py +11 -34
- sky/clouds/kubernetes.py +3 -3
- sky/clouds/service_catalog/kubernetes_catalog.py +14 -0
- sky/core.py +8 -5
- sky/data/storage.py +66 -14
- sky/global_user_state.py +1 -1
- sky/jobs/constants.py +8 -7
- sky/jobs/controller.py +19 -22
- sky/jobs/core.py +0 -2
- sky/jobs/recovery_strategy.py +114 -143
- sky/jobs/scheduler.py +283 -0
- sky/jobs/state.py +263 -21
- sky/jobs/utils.py +338 -96
- sky/provision/aws/config.py +48 -26
- sky/provision/gcp/instance_utils.py +15 -9
- sky/provision/kubernetes/instance.py +1 -1
- sky/provision/kubernetes/utils.py +76 -18
- sky/resources.py +1 -1
- sky/serve/autoscalers.py +359 -301
- sky/serve/controller.py +10 -8
- sky/serve/core.py +84 -7
- sky/serve/load_balancer.py +27 -10
- sky/serve/replica_managers.py +1 -3
- sky/serve/serve_state.py +10 -5
- sky/serve/serve_utils.py +28 -1
- sky/serve/service.py +4 -3
- sky/serve/service_spec.py +31 -0
- sky/skylet/constants.py +1 -1
- sky/skylet/events.py +7 -3
- sky/skylet/job_lib.py +10 -30
- sky/skylet/log_lib.py +8 -8
- sky/skylet/log_lib.pyi +3 -0
- sky/skylet/skylet.py +1 -1
- sky/templates/jobs-controller.yaml.j2 +7 -3
- sky/templates/sky-serve-controller.yaml.j2 +4 -0
- sky/utils/db_utils.py +18 -4
- sky/utils/kubernetes/deploy_remote_cluster.sh +5 -5
- sky/utils/resources_utils.py +25 -21
- sky/utils/schemas.py +13 -0
- sky/utils/subprocess_utils.py +48 -9
- {skypilot_nightly-1.0.0.dev20250114.dist-info → skypilot_nightly-1.0.0.dev20250124.dist-info}/METADATA +4 -1
- {skypilot_nightly-1.0.0.dev20250114.dist-info → skypilot_nightly-1.0.0.dev20250124.dist-info}/RECORD +49 -48
- {skypilot_nightly-1.0.0.dev20250114.dist-info → skypilot_nightly-1.0.0.dev20250124.dist-info}/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20250114.dist-info → skypilot_nightly-1.0.0.dev20250124.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20250114.dist-info → skypilot_nightly-1.0.0.dev20250124.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250114.dist-info → skypilot_nightly-1.0.0.dev20250124.dist-info}/top_level.txt +0 -0
sky/serve/autoscalers.py
CHANGED
@@ -42,11 +42,10 @@ class AutoscalerDecision:
|
|
42
42
|
# TODO(MaoZiming): Add a doc to elaborate on autoscaling policies.
|
43
43
|
def __init__(self, operator: AutoscalerDecisionOperator,
|
44
44
|
target: Union[Optional[Dict[str, Any]], int]):
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
isinstance(target, int))
|
45
|
+
if operator == AutoscalerDecisionOperator.SCALE_UP:
|
46
|
+
assert (target is None or isinstance(target, dict))
|
47
|
+
else:
|
48
|
+
assert isinstance(target, int)
|
50
49
|
self.operator = operator
|
51
50
|
self.target = target
|
52
51
|
|
@@ -54,9 +53,70 @@ class AutoscalerDecision:
|
|
54
53
|
return f'AutoscalerDecision({self.operator}, {self.target})'
|
55
54
|
|
56
55
|
|
56
|
+
def _generate_scale_up_decisions(
|
57
|
+
num: int, target: Optional[Dict[str, Any]]) -> List[AutoscalerDecision]:
|
58
|
+
return [
|
59
|
+
AutoscalerDecision(AutoscalerDecisionOperator.SCALE_UP, target)
|
60
|
+
for _ in range(num)
|
61
|
+
]
|
62
|
+
|
63
|
+
|
64
|
+
def _generate_scale_down_decisions(
|
65
|
+
replica_ids: List[int]) -> List[AutoscalerDecision]:
|
66
|
+
return [
|
67
|
+
AutoscalerDecision(AutoscalerDecisionOperator.SCALE_DOWN, replica_id)
|
68
|
+
for replica_id in replica_ids
|
69
|
+
]
|
70
|
+
|
71
|
+
|
72
|
+
def _select_nonterminal_replicas_to_scale_down(
|
73
|
+
num_replica_to_scale_down: int,
|
74
|
+
replica_infos: Iterable['replica_managers.ReplicaInfo'],
|
75
|
+
) -> List[int]:
|
76
|
+
"""Select nonterminal replicas to scale down.
|
77
|
+
|
78
|
+
We sort the replicas based on the following order:
|
79
|
+
1. Based on the `scale_down_decision_order` of the status. We terminate
|
80
|
+
the replicas that is in earlier stage first, as the replicas in
|
81
|
+
later stage may become ready soon.
|
82
|
+
2. Based on the version in ascending order, so we scale down the older
|
83
|
+
versions first.
|
84
|
+
3. Based on the replica_id in descending order, which is also the order
|
85
|
+
of the replicas being launched. We scale down the replicas that are
|
86
|
+
launched earlier first, as the replicas that are launched later may
|
87
|
+
become ready soon.
|
88
|
+
|
89
|
+
Args:
|
90
|
+
num_replica_to_scale_down: The number of replicas to scale down.
|
91
|
+
replica_infos: The list of replica informations to select from.
|
92
|
+
|
93
|
+
Returns:
|
94
|
+
The list of replica ids to scale down.
|
95
|
+
"""
|
96
|
+
replicas = list(replica_infos)
|
97
|
+
status_order = serve_state.ReplicaStatus.scale_down_decision_order()
|
98
|
+
assert all(info.status in status_order for info in replicas), (
|
99
|
+
'All replicas to scale down should be in provisioning or launched '
|
100
|
+
'status.', replicas)
|
101
|
+
replicas = sorted(
|
102
|
+
replicas,
|
103
|
+
key=lambda info: (
|
104
|
+
status_order.index(info.status),
|
105
|
+
# version in ascending order
|
106
|
+
info.version,
|
107
|
+
# replica_id in descending order, i.e. launched order
|
108
|
+
-info.replica_id))
|
109
|
+
assert len(replicas) >= num_replica_to_scale_down, (
|
110
|
+
'Not enough replicas to scale down. Available replicas: ',
|
111
|
+
f'{replicas}, num_replica_to_scale_down: {num_replica_to_scale_down}.')
|
112
|
+
return [info.replica_id for info in replicas][:num_replica_to_scale_down]
|
113
|
+
|
114
|
+
|
57
115
|
class Autoscaler:
|
58
116
|
"""Abstract class for autoscalers."""
|
59
117
|
|
118
|
+
# --------------- APIs to implement for custom autoscaler ---------------
|
119
|
+
|
60
120
|
def __init__(self, service_name: str,
|
61
121
|
spec: 'service_spec.SkyServiceSpec') -> None:
|
62
122
|
"""Initialize the autoscaler.
|
@@ -67,6 +127,8 @@ class Autoscaler:
|
|
67
127
|
number of replicas, i.e. min_replicas == max_replicas.
|
68
128
|
target_num_replicas: Target number of replicas output by autoscaler.
|
69
129
|
latest_version: latest version of the service.
|
130
|
+
latest_version_ever_ready: The latest version that is ever ready.
|
131
|
+
update_mode: Update mode for the service.
|
70
132
|
"""
|
71
133
|
self._service_name: str = service_name
|
72
134
|
self.min_replicas: int = spec.min_replicas
|
@@ -81,6 +143,10 @@ class Autoscaler:
|
|
81
143
|
self.latest_version_ever_ready: int = self.latest_version - 1
|
82
144
|
self.update_mode = serve_utils.DEFAULT_UPDATE_MODE
|
83
145
|
|
146
|
+
def _calculate_target_num_replicas(self) -> int:
|
147
|
+
"""Calculate target number of replicas."""
|
148
|
+
raise NotImplementedError
|
149
|
+
|
84
150
|
def update_version(self, version: int, spec: 'service_spec.SkyServiceSpec',
|
85
151
|
update_mode: serve_utils.UpdateMode) -> None:
|
86
152
|
if version <= self.latest_version:
|
@@ -91,9 +157,9 @@ class Autoscaler:
|
|
91
157
|
self.min_replicas = spec.min_replicas
|
92
158
|
self.max_replicas = (spec.max_replicas if spec.max_replicas is not None
|
93
159
|
else spec.min_replicas)
|
94
|
-
#
|
95
|
-
self.target_num_replicas =
|
96
|
-
self.
|
160
|
+
# Re-clip self.target_num_replicas with new min and max replicas.
|
161
|
+
self.target_num_replicas = self._clip_target_num_replicas(
|
162
|
+
self.target_num_replicas)
|
97
163
|
self.update_mode = update_mode
|
98
164
|
|
99
165
|
def collect_request_information(
|
@@ -101,222 +167,56 @@ class Autoscaler:
|
|
101
167
|
"""Collect request information from aggregator for autoscaling."""
|
102
168
|
raise NotImplementedError
|
103
169
|
|
104
|
-
def
|
170
|
+
def _generate_scaling_decisions(
|
105
171
|
self,
|
106
172
|
replica_infos: List['replica_managers.ReplicaInfo'],
|
107
173
|
) -> List[AutoscalerDecision]:
|
108
|
-
"""
|
174
|
+
"""Generate Autoscaling decisions based on replica information."""
|
109
175
|
raise NotImplementedError
|
110
176
|
|
111
|
-
@classmethod
|
112
|
-
def from_spec(cls, service_name: str,
|
113
|
-
spec: 'service_spec.SkyServiceSpec') -> 'Autoscaler':
|
114
|
-
# TODO(MaoZiming): use NAME to get the class.
|
115
|
-
if spec.use_ondemand_fallback:
|
116
|
-
return FallbackRequestRateAutoscaler(service_name, spec)
|
117
|
-
else:
|
118
|
-
return RequestRateAutoscaler(service_name, spec)
|
119
|
-
|
120
177
|
def _dump_dynamic_states(self) -> Dict[str, Any]:
|
121
178
|
"""Dump dynamic states from autoscaler."""
|
122
179
|
raise NotImplementedError
|
123
180
|
|
124
|
-
def dump_dynamic_states(self) -> Dict[str, Any]:
|
125
|
-
"""Dump dynamic states from autoscaler."""
|
126
|
-
states = {'latest_version_ever_ready': self.latest_version_ever_ready}
|
127
|
-
states.update(self._dump_dynamic_states())
|
128
|
-
return states
|
129
|
-
|
130
181
|
def _load_dynamic_states(self, dynamic_states: Dict[str, Any]) -> None:
|
131
182
|
"""Load dynamic states to autoscaler."""
|
132
183
|
raise NotImplementedError
|
133
184
|
|
134
|
-
|
135
|
-
"""Get the decision interval for the autoscaler."""
|
136
|
-
raise NotImplementedError
|
137
|
-
|
138
|
-
def load_dynamic_states(self, dynamic_states: Dict[str, Any]) -> None:
|
139
|
-
"""Load dynamic states to autoscaler."""
|
140
|
-
self.latest_version_ever_ready = dynamic_states.pop(
|
141
|
-
'latest_version_ever_ready', constants.INITIAL_VERSION)
|
142
|
-
self._load_dynamic_states(dynamic_states)
|
143
|
-
|
144
|
-
|
145
|
-
class RequestRateAutoscaler(Autoscaler):
|
146
|
-
"""RequestRateAutoscaler: Autoscale according to request rate.
|
147
|
-
|
148
|
-
Scales when the number of requests per replica in the given interval
|
149
|
-
is above or below the target qps per replica. The instance can be
|
150
|
-
either spot or on-demand, but not both.
|
151
|
-
"""
|
152
|
-
|
153
|
-
def __init__(self, service_name: str,
|
154
|
-
spec: 'service_spec.SkyServiceSpec') -> None:
|
155
|
-
"""Initialize the request rate autoscaler.
|
185
|
+
# --------------- Utility Functions ---------------
|
156
186
|
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
request_timestamps: All request timestamps within the window.
|
161
|
-
upscale_counter: counter for upscale number of replicas.
|
162
|
-
downscale_counter: counter for downscale number of replicas.
|
163
|
-
scale_up_consecutive_periods: period for scaling up.
|
164
|
-
scale_down_consecutive_periods: period for scaling down.
|
187
|
+
def _clip_target_num_replicas(self, target_num_replicas: int) -> int:
|
188
|
+
"""Clip target number of replicas with current minimal and maximum
|
189
|
+
number of replicas.
|
165
190
|
"""
|
166
|
-
super().__init__(service_name, spec)
|
167
|
-
self.target_qps_per_replica: Optional[
|
168
|
-
float] = spec.target_qps_per_replica
|
169
|
-
self.qps_window_size: int = constants.AUTOSCALER_QPS_WINDOW_SIZE_SECONDS
|
170
|
-
self.request_timestamps: List[float] = []
|
171
|
-
self.upscale_counter: int = 0
|
172
|
-
self.downscale_counter: int = 0
|
173
|
-
upscale_delay_seconds = (
|
174
|
-
spec.upscale_delay_seconds if spec.upscale_delay_seconds is not None
|
175
|
-
else constants.AUTOSCALER_DEFAULT_UPSCALE_DELAY_SECONDS)
|
176
|
-
self.scale_up_consecutive_periods: int = int(
|
177
|
-
upscale_delay_seconds /
|
178
|
-
constants.AUTOSCALER_DEFAULT_DECISION_INTERVAL_SECONDS)
|
179
|
-
downscale_delay_seconds = (
|
180
|
-
spec.downscale_delay_seconds
|
181
|
-
if spec.downscale_delay_seconds is not None else
|
182
|
-
constants.AUTOSCALER_DEFAULT_DOWNSCALE_DELAY_SECONDS)
|
183
|
-
self.scale_down_consecutive_periods: int = int(
|
184
|
-
downscale_delay_seconds /
|
185
|
-
constants.AUTOSCALER_DEFAULT_DECISION_INTERVAL_SECONDS)
|
186
|
-
|
187
|
-
def _cal_target_num_replicas_based_on_qps(self) -> int:
|
188
|
-
# Recalculate target_num_replicas based on QPS.
|
189
|
-
# Reclip self.target_num_replicas with new min and max replicas.
|
190
|
-
if self.target_qps_per_replica is None:
|
191
|
-
return self.min_replicas
|
192
|
-
target_num_replicas = math.ceil(
|
193
|
-
len(self.request_timestamps) / self.qps_window_size /
|
194
|
-
self.target_qps_per_replica)
|
195
191
|
return max(self.min_replicas, min(self.max_replicas,
|
196
192
|
target_num_replicas))
|
197
193
|
|
198
|
-
def update_version(self, version: int, spec: 'service_spec.SkyServiceSpec',
|
199
|
-
update_mode: serve_utils.UpdateMode) -> None:
|
200
|
-
super().update_version(version, spec, update_mode)
|
201
|
-
self.target_qps_per_replica = spec.target_qps_per_replica
|
202
|
-
upscale_delay_seconds = (
|
203
|
-
spec.upscale_delay_seconds if spec.upscale_delay_seconds is not None
|
204
|
-
else constants.AUTOSCALER_DEFAULT_UPSCALE_DELAY_SECONDS)
|
205
|
-
self.scale_up_consecutive_periods = int(
|
206
|
-
upscale_delay_seconds /
|
207
|
-
constants.AUTOSCALER_DEFAULT_DECISION_INTERVAL_SECONDS)
|
208
|
-
downscale_delay_seconds = (
|
209
|
-
spec.downscale_delay_seconds
|
210
|
-
if spec.downscale_delay_seconds is not None else
|
211
|
-
constants.AUTOSCALER_DEFAULT_DOWNSCALE_DELAY_SECONDS)
|
212
|
-
self.scale_down_consecutive_periods = int(
|
213
|
-
downscale_delay_seconds /
|
214
|
-
constants.AUTOSCALER_DEFAULT_DECISION_INTERVAL_SECONDS)
|
215
|
-
|
216
|
-
# We directly set the target_num_replicas here instead of
|
217
|
-
# calling `_set_target_num_replica_with_hysteresis` to have the replicas
|
218
|
-
# quickly scale after each update.
|
219
|
-
self.target_num_replicas = self._cal_target_num_replicas_based_on_qps()
|
220
|
-
# Cleanup hysteretic counters.
|
221
|
-
self.upscale_counter = 0
|
222
|
-
self.downscale_counter = 0
|
223
|
-
|
224
|
-
def collect_request_information(
|
225
|
-
self, request_aggregator_info: Dict[str, Any]) -> None:
|
226
|
-
"""Collect request information from aggregator for autoscaling.
|
227
|
-
|
228
|
-
request_aggregator_info should be a dict with the following format:
|
229
|
-
|
230
|
-
{
|
231
|
-
'timestamps': [timestamp1 (float), timestamp2 (float), ...]
|
232
|
-
}
|
233
|
-
"""
|
234
|
-
self.request_timestamps.extend(
|
235
|
-
request_aggregator_info.get('timestamps', []))
|
236
|
-
current_time = time.time()
|
237
|
-
index = bisect.bisect_left(self.request_timestamps,
|
238
|
-
current_time - self.qps_window_size)
|
239
|
-
self.request_timestamps = self.request_timestamps[index:]
|
240
|
-
logger.info(f'Num of requests in the last {self.qps_window_size} '
|
241
|
-
f'seconds: {len(self.request_timestamps)}')
|
242
|
-
|
243
|
-
def _set_target_num_replica_with_hysteresis(self) -> None:
|
244
|
-
"""Set target_num_replicas based on request rate with hysteresis."""
|
245
|
-
# Keep self.target_num_replicas unchange when autoscaling
|
246
|
-
# is not enabled, i.e. self.target_qps_per_replica is None.
|
247
|
-
# In this case, self.target_num_replicas will be min_replicas.
|
248
|
-
if self.target_qps_per_replica is None:
|
249
|
-
return
|
250
|
-
|
251
|
-
# Convert to requests per second.
|
252
|
-
target_num_replicas = self._cal_target_num_replicas_based_on_qps()
|
253
|
-
old_target_num_replicas = self.target_num_replicas
|
254
|
-
|
255
|
-
# Faster scale up when there is no replica.
|
256
|
-
if self.target_num_replicas == 0:
|
257
|
-
self.target_num_replicas = target_num_replicas
|
258
|
-
elif target_num_replicas > self.target_num_replicas:
|
259
|
-
self.upscale_counter += 1
|
260
|
-
self.downscale_counter = 0
|
261
|
-
if self.upscale_counter >= self.scale_up_consecutive_periods:
|
262
|
-
self.upscale_counter = 0
|
263
|
-
self.target_num_replicas = target_num_replicas
|
264
|
-
elif target_num_replicas < self.target_num_replicas:
|
265
|
-
self.downscale_counter += 1
|
266
|
-
self.upscale_counter = 0
|
267
|
-
if self.downscale_counter >= self.scale_down_consecutive_periods:
|
268
|
-
self.downscale_counter = 0
|
269
|
-
self.target_num_replicas = target_num_replicas
|
270
|
-
else:
|
271
|
-
self.upscale_counter = self.downscale_counter = 0
|
272
|
-
|
273
|
-
num_requests_per_second = len(
|
274
|
-
self.request_timestamps) / self.qps_window_size
|
275
|
-
logger.info(
|
276
|
-
f'Requests per second: {num_requests_per_second}. '
|
277
|
-
f'Current target number of replicas: {old_target_num_replicas}. '
|
278
|
-
f'Final target number of replicas: {self.target_num_replicas}. '
|
279
|
-
f'Upscale counter: {self.upscale_counter}/'
|
280
|
-
f'{self.scale_up_consecutive_periods}. '
|
281
|
-
f'Downscale counter: {self.downscale_counter}/'
|
282
|
-
f'{self.scale_down_consecutive_periods}')
|
283
|
-
|
284
194
|
@classmethod
|
285
|
-
def
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
'All replicas to scale down should be in provisioning or launched '
|
293
|
-
'status.', replicas)
|
294
|
-
replicas = sorted(
|
295
|
-
replicas,
|
296
|
-
key=lambda info: (
|
297
|
-
status_order.index(info.status),
|
298
|
-
# Sort by version in ascending order, so we scale down the older
|
299
|
-
# versions first.
|
300
|
-
info.version,
|
301
|
-
# Sort `info.replica_id` in descending order so that the
|
302
|
-
# replicas in the same version starts to provisioning later are
|
303
|
-
# scaled down first.
|
304
|
-
-info.replica_id))
|
305
|
-
assert len(replicas) >= num_limit, (
|
306
|
-
'Not enough replicas to scale down.', replicas, num_limit)
|
307
|
-
return [info.replica_id for info in replicas][:num_limit]
|
195
|
+
def from_spec(cls, service_name: str,
|
196
|
+
spec: 'service_spec.SkyServiceSpec') -> 'Autoscaler':
|
197
|
+
# TODO(MaoZiming): use NAME to get the class.
|
198
|
+
if spec.use_ondemand_fallback:
|
199
|
+
return FallbackRequestRateAutoscaler(service_name, spec)
|
200
|
+
else:
|
201
|
+
return RequestRateAutoscaler(service_name, spec)
|
308
202
|
|
309
203
|
def get_decision_interval(self) -> int:
|
310
|
-
|
311
|
-
|
204
|
+
"""Get the decision interval for the autoscaler.
|
205
|
+
|
206
|
+
We reduce the decision interval when the desired number of replicas is
|
207
|
+
0, to make the service scale faster when the service is not running.
|
208
|
+
This will happen when min_replicas = 0 and no traffic.
|
209
|
+
"""
|
312
210
|
if self.target_num_replicas == 0:
|
313
211
|
return constants.AUTOSCALER_NO_REPLICA_DECISION_INTERVAL_SECONDS
|
314
212
|
else:
|
315
213
|
return constants.AUTOSCALER_DEFAULT_DECISION_INTERVAL_SECONDS
|
316
214
|
|
317
|
-
def
|
318
|
-
|
319
|
-
|
215
|
+
def _select_outdated_replicas_to_scale_down(
|
216
|
+
self,
|
217
|
+
replica_infos: List['replica_managers.ReplicaInfo'],
|
218
|
+
active_versions: List[int],
|
219
|
+
) -> List[int]:
|
320
220
|
"""Select outdated replicas to scale down."""
|
321
221
|
|
322
222
|
if self.update_mode == serve_utils.UpdateMode.ROLLING:
|
@@ -350,19 +250,12 @@ class RequestRateAutoscaler(Autoscaler):
|
|
350
250
|
# `_select_replicas_to_scale_down` will make sure we scale the
|
351
251
|
# replicas in initializing statuses first before scaling down the
|
352
252
|
# READY old replicas.
|
353
|
-
return
|
253
|
+
return _select_nonterminal_replicas_to_scale_down(
|
354
254
|
max(0,
|
355
255
|
len(old_nonterminal_replicas) - num_old_replicas_to_keep),
|
356
256
|
old_nonterminal_replicas,
|
357
257
|
)
|
358
258
|
|
359
|
-
# Use the active versions set by replica manager to make sure we only
|
360
|
-
# scale down the outdated replicas that are not used by the load
|
361
|
-
# balancer.
|
362
|
-
record = serve_state.get_service_from_name(self._service_name)
|
363
|
-
assert record is not None, (f'No service record found for '
|
364
|
-
f'{self._service_name}')
|
365
|
-
active_versions = record['active_versions']
|
366
259
|
if not active_versions:
|
367
260
|
# active_versions can be empty when none of the replicas are ready
|
368
261
|
# when the load balancer sync with the controller.
|
@@ -376,36 +269,35 @@ class RequestRateAutoscaler(Autoscaler):
|
|
376
269
|
# number of ready new replicas is greater than or equal to the min
|
377
270
|
# replicas instead of the target, to ensure the service being updated
|
378
271
|
# to the latest version faster.
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
|
272
|
+
return [
|
273
|
+
info.replica_id
|
274
|
+
for info in replica_infos
|
275
|
+
if info.version < latest_version_with_min_replicas
|
276
|
+
]
|
383
277
|
|
384
|
-
|
385
|
-
|
386
|
-
def evaluate_scaling(
|
278
|
+
def generate_scaling_decisions(
|
387
279
|
self,
|
388
280
|
replica_infos: List['replica_managers.ReplicaInfo'],
|
281
|
+
active_versions: List[int],
|
389
282
|
) -> List[AutoscalerDecision]:
|
390
|
-
"""
|
391
|
-
If the number of launched replicas is less than the target,
|
392
|
-
|
283
|
+
"""Generate Autoscaling decisions based on replica information.
|
284
|
+
If the number of launched replicas is less than the target, trigger a
|
285
|
+
scale up. Else, trigger a scale down. This function also handles the
|
286
|
+
version control of the replicas.
|
393
287
|
|
394
288
|
For future compatibility, we return a list of AutoscalerDecision.
|
395
289
|
Scale-up could include both spot and on-demand, each with a resource
|
396
290
|
override dict. Active migration could require returning both SCALE_UP
|
397
291
|
and SCALE_DOWN.
|
398
292
|
"""
|
399
|
-
latest_replicas: List['replica_managers.ReplicaInfo'] = []
|
400
|
-
latest_nonterminal_replicas: List['replica_managers.ReplicaInfo'] = []
|
401
293
|
|
294
|
+
# Handle latest version unrecoverable failure first.
|
295
|
+
latest_replicas: List['replica_managers.ReplicaInfo'] = []
|
402
296
|
for info in replica_infos:
|
403
297
|
if info.version == self.latest_version:
|
404
298
|
latest_replicas.append(info)
|
405
|
-
if
|
406
|
-
|
407
|
-
if info.is_ready:
|
408
|
-
self.latest_version_ever_ready = self.latest_version
|
299
|
+
if info.is_ready:
|
300
|
+
self.latest_version_ever_ready = self.latest_version
|
409
301
|
if self.latest_version_ever_ready < self.latest_version:
|
410
302
|
for info in latest_replicas:
|
411
303
|
if info.status_property.unrecoverable_failure():
|
@@ -415,55 +307,229 @@ class RequestRateAutoscaler(Autoscaler):
|
|
415
307
|
# and restart.
|
416
308
|
return []
|
417
309
|
|
418
|
-
|
310
|
+
scaling_decisions = []
|
419
311
|
|
420
|
-
|
421
|
-
|
312
|
+
# If rolling update is in progress, we scale down old replicas based on
|
313
|
+
# the number of ready new replicas and the traffic is directed to both
|
314
|
+
# old and new replicas. Or, for blue_green update, once there is
|
315
|
+
# min_replicas number of ready new replicas, we will direct all traffic
|
316
|
+
# to them, we can scale down all old replicas.
|
317
|
+
# TODO(MaoZiming,zhwu): corner case: We should make sure the fallback
|
318
|
+
# replicas are ready before scaling down the old replicas to avoid the
|
319
|
+
# situation that all the ready new replicas are preempted together.
|
320
|
+
scaling_decisions.extend(
|
321
|
+
_generate_scale_down_decisions(
|
322
|
+
self._select_outdated_replicas_to_scale_down(
|
323
|
+
replica_infos, active_versions)))
|
324
|
+
|
325
|
+
# If the latest version is ever ready, we can proceed to generate
|
326
|
+
# decisions from the implementations in subclasses.
|
327
|
+
scaling_decisions.extend(
|
328
|
+
self._generate_scaling_decisions(replica_infos))
|
329
|
+
|
330
|
+
if not scaling_decisions:
|
331
|
+
logger.info('No scaling needed.')
|
332
|
+
|
333
|
+
return scaling_decisions
|
334
|
+
|
335
|
+
def dump_dynamic_states(self) -> Dict[str, Any]:
|
336
|
+
"""Dump dynamic states from autoscaler."""
|
337
|
+
states = {'latest_version_ever_ready': self.latest_version_ever_ready}
|
338
|
+
states.update(self._dump_dynamic_states())
|
339
|
+
return states
|
340
|
+
|
341
|
+
def load_dynamic_states(self, dynamic_states: Dict[str, Any]) -> None:
|
342
|
+
"""Load dynamic states to autoscaler."""
|
343
|
+
self.latest_version_ever_ready = dynamic_states.pop(
|
344
|
+
'latest_version_ever_ready', constants.INITIAL_VERSION)
|
345
|
+
self._load_dynamic_states(dynamic_states)
|
346
|
+
|
347
|
+
|
348
|
+
class _AutoscalerWithHysteresis(Autoscaler):
|
349
|
+
"""_AutoscalerWithHysteresis: Autoscale with hysteresis.
|
350
|
+
|
351
|
+
This is an internal class for developing autoscalers with hysteresis. It
|
352
|
+
only scales when the number of replicas is above or below the target number
|
353
|
+
of replicas for a certain number of consecutive periods.
|
354
|
+
"""
|
355
|
+
|
356
|
+
def _setup_thresholds(self, spec: 'service_spec.SkyServiceSpec') -> None:
|
357
|
+
upscale_delay_seconds = (
|
358
|
+
spec.upscale_delay_seconds if spec.upscale_delay_seconds is not None
|
359
|
+
else constants.AUTOSCALER_DEFAULT_UPSCALE_DELAY_SECONDS)
|
360
|
+
self.scale_up_threshold: int = int(
|
361
|
+
upscale_delay_seconds /
|
362
|
+
constants.AUTOSCALER_DEFAULT_DECISION_INTERVAL_SECONDS)
|
363
|
+
downscale_delay_seconds = (
|
364
|
+
spec.downscale_delay_seconds
|
365
|
+
if spec.downscale_delay_seconds is not None else
|
366
|
+
constants.AUTOSCALER_DEFAULT_DOWNSCALE_DELAY_SECONDS)
|
367
|
+
self.scale_down_threshold: int = int(
|
368
|
+
downscale_delay_seconds /
|
369
|
+
constants.AUTOSCALER_DEFAULT_DECISION_INTERVAL_SECONDS)
|
370
|
+
|
371
|
+
def __init__(self, service_name: str,
|
372
|
+
spec: 'service_spec.SkyServiceSpec') -> None:
|
373
|
+
"""Initialize the hysteresis autoscaler.
|
374
|
+
|
375
|
+
Variables:
|
376
|
+
upscale_counter: Counter for upscale decisions of replicas.
|
377
|
+
downscale_counter: Counter for downscale decisions of replicas.
|
378
|
+
scale_up_threshold: The threshold to trigger a scale up.
|
379
|
+
scale_down_threshold: The threshold to trigger a scale down.
|
380
|
+
"""
|
381
|
+
super().__init__(service_name, spec)
|
382
|
+
self.upscale_counter: int = 0
|
383
|
+
self.downscale_counter: int = 0
|
384
|
+
self._setup_thresholds(spec)
|
385
|
+
|
386
|
+
def update_version(self, version: int, spec: 'service_spec.SkyServiceSpec',
|
387
|
+
update_mode: serve_utils.UpdateMode) -> None:
|
388
|
+
super().update_version(version, spec, update_mode)
|
389
|
+
# We directly set the target_num_replicas here instead of calling
|
390
|
+
# `_set_target_num_replicas_with_hysteresis` to have the replicas
|
391
|
+
# quickly scale after each update.
|
392
|
+
self.target_num_replicas = self._calculate_target_num_replicas()
|
393
|
+
# Cleanup hysteresis counters.
|
394
|
+
self.upscale_counter = 0
|
395
|
+
self.downscale_counter = 0
|
396
|
+
self._setup_thresholds(spec)
|
422
397
|
|
423
|
-
|
424
|
-
|
425
|
-
|
426
|
-
|
427
|
-
# new replicas, we will direct all traffic to them, we can scale down
|
428
|
-
# all old replicas.
|
429
|
-
all_replica_ids_to_scale_down.extend(
|
430
|
-
self.select_outdated_replicas_to_scale_down(replica_infos))
|
398
|
+
def _set_target_num_replicas_with_hysteresis(self) -> None:
|
399
|
+
"""Set target_num_replicas based on request rate with hysteresis."""
|
400
|
+
target_num_replicas = self._calculate_target_num_replicas()
|
401
|
+
old_target_num_replicas = self.target_num_replicas
|
431
402
|
|
432
|
-
#
|
403
|
+
# Faster scale up when there is no replica.
|
404
|
+
if self.target_num_replicas == 0:
|
405
|
+
self.target_num_replicas = target_num_replicas
|
406
|
+
elif target_num_replicas > self.target_num_replicas:
|
407
|
+
self.upscale_counter += 1
|
408
|
+
self.downscale_counter = 0
|
409
|
+
if self.upscale_counter >= self.scale_up_threshold:
|
410
|
+
self.upscale_counter = 0
|
411
|
+
self.target_num_replicas = target_num_replicas
|
412
|
+
elif target_num_replicas < self.target_num_replicas:
|
413
|
+
self.downscale_counter += 1
|
414
|
+
self.upscale_counter = 0
|
415
|
+
if self.downscale_counter >= self.scale_down_threshold:
|
416
|
+
self.downscale_counter = 0
|
417
|
+
self.target_num_replicas = target_num_replicas
|
418
|
+
else:
|
419
|
+
self.upscale_counter = self.downscale_counter = 0
|
420
|
+
|
421
|
+
logger.info(
|
422
|
+
f'Old target number of replicas: {old_target_num_replicas}. '
|
423
|
+
f'Current target number of replicas: {target_num_replicas}. '
|
424
|
+
f'Final target number of replicas: {self.target_num_replicas}. '
|
425
|
+
f'Upscale counter: {self.upscale_counter}/'
|
426
|
+
f'{self.scale_up_threshold}. '
|
427
|
+
f'Downscale counter: {self.downscale_counter}/'
|
428
|
+
f'{self.scale_down_threshold}. ')
|
429
|
+
|
430
|
+
|
431
|
+
class RequestRateAutoscaler(_AutoscalerWithHysteresis):
|
432
|
+
"""RequestRateAutoscaler: Autoscale according to request rate.
|
433
|
+
|
434
|
+
Scales when the number of requests per replica in the given interval
|
435
|
+
is above or below the target qps per replica. The instance can be
|
436
|
+
either spot or on-demand, but not both.
|
437
|
+
"""
|
438
|
+
|
439
|
+
def __init__(self, service_name: str,
|
440
|
+
spec: 'service_spec.SkyServiceSpec') -> None:
|
441
|
+
"""Initialize the request rate autoscaler.
|
442
|
+
|
443
|
+
Variables:
|
444
|
+
target_qps_per_replica: Target qps per replica for autoscaling.
|
445
|
+
qps_window_size: Window size for qps calculating.
|
446
|
+
request_timestamps: All request timestamps within the window.
|
447
|
+
"""
|
448
|
+
super().__init__(service_name, spec)
|
449
|
+
self.target_qps_per_replica: Optional[
|
450
|
+
float] = spec.target_qps_per_replica
|
451
|
+
self.qps_window_size: int = constants.AUTOSCALER_QPS_WINDOW_SIZE_SECONDS
|
452
|
+
self.request_timestamps: List[float] = []
|
453
|
+
|
454
|
+
def _calculate_target_num_replicas(self) -> int:
|
455
|
+
if self.target_qps_per_replica is None:
|
456
|
+
return self.min_replicas
|
457
|
+
num_requests_per_second = len(
|
458
|
+
self.request_timestamps) / self.qps_window_size
|
459
|
+
target_num_replicas = math.ceil(num_requests_per_second /
|
460
|
+
self.target_qps_per_replica)
|
461
|
+
logger.info(f'Requests per second: {num_requests_per_second}. '
|
462
|
+
f'Target number of replicas: {target_num_replicas}.')
|
463
|
+
return self._clip_target_num_replicas(target_num_replicas)
|
464
|
+
|
465
|
+
def update_version(self, version: int, spec: 'service_spec.SkyServiceSpec',
|
466
|
+
update_mode: serve_utils.UpdateMode) -> None:
|
467
|
+
super().update_version(version, spec, update_mode)
|
468
|
+
self.target_qps_per_replica = spec.target_qps_per_replica
|
469
|
+
|
470
|
+
def collect_request_information(
|
471
|
+
self, request_aggregator_info: Dict[str, Any]) -> None:
|
472
|
+
"""Collect request information from aggregator for autoscaling.
|
473
|
+
|
474
|
+
request_aggregator_info should be a dict with the following format:
|
475
|
+
|
476
|
+
{
|
477
|
+
'timestamps': [timestamp1 (float), timestamp2 (float), ...]
|
478
|
+
}
|
479
|
+
"""
|
480
|
+
self.request_timestamps.extend(
|
481
|
+
request_aggregator_info.get('timestamps', []))
|
482
|
+
current_time = time.time()
|
483
|
+
index = bisect.bisect_left(self.request_timestamps,
|
484
|
+
current_time - self.qps_window_size)
|
485
|
+
self.request_timestamps = self.request_timestamps[index:]
|
486
|
+
logger.info(f'Num of requests in the last {self.qps_window_size} '
|
487
|
+
f'seconds: {len(self.request_timestamps)}')
|
488
|
+
|
489
|
+
def _generate_scaling_decisions(
|
490
|
+
self,
|
491
|
+
replica_infos: List['replica_managers.ReplicaInfo'],
|
492
|
+
) -> List[AutoscalerDecision]:
|
493
|
+
"""Generate Autoscaling decisions based on request rate."""
|
494
|
+
|
495
|
+
self._set_target_num_replicas_with_hysteresis()
|
496
|
+
|
497
|
+
latest_nonterminal_replicas: List['replica_managers.ReplicaInfo'] = []
|
498
|
+
|
499
|
+
for info in replica_infos:
|
500
|
+
if info.version == self.latest_version:
|
501
|
+
if not info.is_terminal:
|
502
|
+
latest_nonterminal_replicas.append(info)
|
503
|
+
|
504
|
+
scaling_decisions: List[AutoscalerDecision] = []
|
505
|
+
|
506
|
+
# Case 1. when latest_nonterminal_replicas is less
|
433
507
|
# than num_to_provision, we always scale up new replicas.
|
434
508
|
if len(latest_nonterminal_replicas) < self.target_num_replicas:
|
435
509
|
num_replicas_to_scale_up = (self.target_num_replicas -
|
436
510
|
len(latest_nonterminal_replicas))
|
437
511
|
logger.info('Number of replicas to scale up: '
|
438
512
|
f'{num_replicas_to_scale_up}')
|
439
|
-
|
440
|
-
|
441
|
-
AutoscalerDecision(AutoscalerDecisionOperator.SCALE_UP,
|
442
|
-
target=None))
|
513
|
+
scaling_decisions.extend(
|
514
|
+
_generate_scale_up_decisions(num_replicas_to_scale_up, None))
|
443
515
|
|
444
|
-
# Case
|
516
|
+
# Case 2: when latest_nonterminal_replicas is more
|
445
517
|
# than self.target_num_replicas, we scale down new replicas.
|
518
|
+
replicas_to_scale_down = []
|
446
519
|
if len(latest_nonterminal_replicas) > self.target_num_replicas:
|
447
520
|
num_replicas_to_scale_down = (len(latest_nonterminal_replicas) -
|
448
521
|
self.target_num_replicas)
|
449
522
|
replicas_to_scale_down = (
|
450
|
-
RequestRateAutoscaler.
|
451
523
|
_select_nonterminal_replicas_to_scale_down(
|
452
|
-
|
453
|
-
replica_infos=latest_nonterminal_replicas))
|
524
|
+
num_replicas_to_scale_down, latest_nonterminal_replicas))
|
454
525
|
logger.info(
|
455
526
|
'Number of replicas to scale down: '
|
456
527
|
f'{num_replicas_to_scale_down} {replicas_to_scale_down}')
|
457
|
-
all_replica_ids_to_scale_down.extend(replicas_to_scale_down)
|
458
528
|
|
459
|
-
|
460
|
-
|
461
|
-
AutoscalerDecision(AutoscalerDecisionOperator.SCALE_DOWN,
|
462
|
-
target=replica_id))
|
529
|
+
scaling_decisions.extend(
|
530
|
+
_generate_scale_down_decisions(replicas_to_scale_down))
|
463
531
|
|
464
|
-
|
465
|
-
logger.info('No scaling needed.')
|
466
|
-
return scaling_options
|
532
|
+
return scaling_decisions
|
467
533
|
|
468
534
|
def _dump_dynamic_states(self) -> Dict[str, Any]:
|
469
535
|
return {
|
@@ -485,16 +551,19 @@ class FallbackRequestRateAutoscaler(RequestRateAutoscaler):
|
|
485
551
|
|
486
552
|
When spec.base_ondemand_fallback_replicas is set, we make sure
|
487
553
|
there are at least spec.base_ondemand_fallback_replicas on-demands
|
488
|
-
to be always there to provide basic
|
554
|
+
to be always there to provide basic guarantee for the availability.
|
489
555
|
|
490
556
|
When spec.dynamic_ondemand_fallback is set, on-demand instances
|
491
557
|
will be scheduled to provision for any preempted spot instance, i.e.,
|
492
558
|
on-demand instance are used as dynamic fallback of spot.
|
493
559
|
"""
|
494
560
|
|
495
|
-
|
496
|
-
|
497
|
-
|
561
|
+
# job_recovery field is checked earlier in core
|
562
|
+
SPOT_OVERRIDE = {'use_spot': True}
|
563
|
+
ONDEMAND_OVERRIDE = {'use_spot': False}
|
564
|
+
|
565
|
+
def _setup_fallback_options(self,
|
566
|
+
spec: 'service_spec.SkyServiceSpec') -> None:
|
498
567
|
self.base_ondemand_fallback_replicas: int = (
|
499
568
|
spec.base_ondemand_fallback_replicas
|
500
569
|
if spec.base_ondemand_fallback_replicas is not None else 0)
|
@@ -505,37 +574,42 @@ class FallbackRequestRateAutoscaler(RequestRateAutoscaler):
|
|
505
574
|
spec.dynamic_ondemand_fallback
|
506
575
|
if spec.dynamic_ondemand_fallback is not None else False)
|
507
576
|
|
577
|
+
def __init__(self, service_name: str,
|
578
|
+
spec: 'service_spec.SkyServiceSpec') -> None:
|
579
|
+
"""Initialize the fallback request rate autoscaler.
|
580
|
+
|
581
|
+
Variables:
|
582
|
+
base_ondemand_fallback_replicas: Minimum number of on-demand
|
583
|
+
replicas to be always there.
|
584
|
+
dynamic_ondemand_fallback: Whether to dynamically provision
|
585
|
+
on-demand instances for preempted spot instances.
|
586
|
+
"""
|
587
|
+
super().__init__(service_name, spec)
|
588
|
+
self._setup_fallback_options(spec)
|
589
|
+
|
508
590
|
def update_version(self, version: int, spec: 'service_spec.SkyServiceSpec',
|
509
591
|
update_mode: serve_utils.UpdateMode) -> None:
|
510
592
|
super().update_version(version, spec, update_mode=update_mode)
|
511
|
-
self.
|
512
|
-
spec.base_ondemand_fallback_replicas
|
513
|
-
if spec.base_ondemand_fallback_replicas is not None else 0)
|
514
|
-
# Assert: Either dynamic_ondemand_fallback is set
|
515
|
-
# or base_ondemand_fallback_replicas is greater than 0.
|
516
|
-
assert spec.use_ondemand_fallback
|
517
|
-
self.dynamic_ondemand_fallback = (spec.dynamic_ondemand_fallback
|
518
|
-
if spec.dynamic_ondemand_fallback
|
519
|
-
is not None else False)
|
593
|
+
self._setup_fallback_options(spec)
|
520
594
|
|
521
|
-
|
522
|
-
def _get_spot_resources_override_dict(self) -> Dict[str, Any]:
|
523
|
-
return {'use_spot': True}
|
524
|
-
|
525
|
-
def _get_ondemand_resources_override_dict(self) -> Dict[str, Any]:
|
526
|
-
return {'use_spot': False}
|
527
|
-
|
528
|
-
def evaluate_scaling(
|
595
|
+
def _generate_scaling_decisions(
|
529
596
|
self,
|
530
597
|
replica_infos: List['replica_managers.ReplicaInfo'],
|
531
598
|
) -> List[AutoscalerDecision]:
|
599
|
+
"""Generate Autoscaling decisions based on request rate, with on-demand
|
600
|
+
fallback.
|
601
|
+
|
602
|
+
The autoscaler will make sure there are at least
|
603
|
+
`base_ondemand_fallback_replicas` on-demand replicas to be always there,
|
604
|
+
so the service can provide basic guarantee for the availability.
|
605
|
+
"""
|
606
|
+
|
607
|
+
self._set_target_num_replicas_with_hysteresis()
|
532
608
|
|
533
609
|
latest_nonterminal_replicas = list(
|
534
610
|
filter(
|
535
611
|
lambda info: not info.is_terminal and info.version == self.
|
536
612
|
latest_version, replica_infos))
|
537
|
-
|
538
|
-
self._set_target_num_replica_with_hysteresis()
|
539
613
|
num_nonterminal_spot, num_ready_spot = 0, 0
|
540
614
|
num_nonterminal_ondemand, num_ready_ondemand = 0, 0
|
541
615
|
|
@@ -550,22 +624,14 @@ class FallbackRequestRateAutoscaler(RequestRateAutoscaler):
|
|
550
624
|
num_nonterminal_ondemand += 1
|
551
625
|
|
552
626
|
logger.info(
|
553
|
-
'Number of alive spot instances: '
|
554
|
-
f'{num_nonterminal_spot}, '
|
627
|
+
f'Number of alive spot instances: {num_nonterminal_spot}, '
|
555
628
|
f'Number of ready spot instances: {num_ready_spot}, '
|
556
|
-
'Number of alive on-demand instances: '
|
557
|
-
f' {num_nonterminal_ondemand}, '
|
629
|
+
f'Number of alive on-demand instances: {num_nonterminal_ondemand}, '
|
558
630
|
f'Number of ready on-demand instances: {num_ready_ondemand}')
|
559
631
|
|
560
|
-
|
632
|
+
scaling_decisions: List[AutoscalerDecision] = []
|
561
633
|
all_replica_ids_to_scale_down: List[int] = []
|
562
634
|
|
563
|
-
# TODO(MaoZiming,zhwu): coner case: We should make sure the fallback
|
564
|
-
# replicas are ready before scaling down the old replicas to avoid the
|
565
|
-
# situation that all the ready new replicas are preempted together.
|
566
|
-
all_replica_ids_to_scale_down.extend(
|
567
|
-
self.select_outdated_replicas_to_scale_down(replica_infos))
|
568
|
-
|
569
635
|
# Decide how many spot instances to launch.
|
570
636
|
num_spot_to_provision = (self.target_num_replicas -
|
571
637
|
self.base_ondemand_fallback_replicas)
|
@@ -575,18 +641,15 @@ class FallbackRequestRateAutoscaler(RequestRateAutoscaler):
|
|
575
641
|
num_nonterminal_spot)
|
576
642
|
logger.info('Number of spot instances to scale up: '
|
577
643
|
f'{num_spot_to_scale_up}')
|
578
|
-
|
579
|
-
|
580
|
-
|
581
|
-
AutoscalerDecisionOperator.SCALE_UP,
|
582
|
-
target=self._get_spot_resources_override_dict()))
|
644
|
+
scaling_decisions.extend(
|
645
|
+
_generate_scale_up_decisions(num_spot_to_scale_up,
|
646
|
+
self.SPOT_OVERRIDE))
|
583
647
|
elif num_nonterminal_spot > num_spot_to_provision:
|
584
648
|
# Too many spot instances, scale down.
|
585
649
|
# Get the replica to scale down with _select_replicas_to_scale_down
|
586
650
|
num_spot_to_scale_down = (num_nonterminal_spot -
|
587
651
|
num_spot_to_provision)
|
588
652
|
replicas_to_scale_down = (
|
589
|
-
RequestRateAutoscaler.
|
590
653
|
_select_nonterminal_replicas_to_scale_down(
|
591
654
|
num_spot_to_scale_down,
|
592
655
|
filter(lambda info: info.is_spot,
|
@@ -610,16 +673,13 @@ class FallbackRequestRateAutoscaler(RequestRateAutoscaler):
|
|
610
673
|
num_nonterminal_ondemand)
|
611
674
|
logger.info('Number of on-demand instances to scale up: '
|
612
675
|
f'{num_ondemand_to_scale_up}')
|
613
|
-
|
614
|
-
|
615
|
-
|
616
|
-
AutoscalerDecisionOperator.SCALE_UP,
|
617
|
-
target=self._get_ondemand_resources_override_dict()))
|
676
|
+
scaling_decisions.extend(
|
677
|
+
_generate_scale_up_decisions(num_ondemand_to_scale_up,
|
678
|
+
self.ONDEMAND_OVERRIDE))
|
618
679
|
else:
|
619
680
|
num_ondemand_to_scale_down = (num_nonterminal_ondemand -
|
620
681
|
num_ondemand_to_provision)
|
621
682
|
replicas_to_scale_down = (
|
622
|
-
RequestRateAutoscaler.
|
623
683
|
_select_nonterminal_replicas_to_scale_down(
|
624
684
|
num_ondemand_to_scale_down,
|
625
685
|
filter(lambda info: not info.is_spot,
|
@@ -630,9 +690,7 @@ class FallbackRequestRateAutoscaler(RequestRateAutoscaler):
|
|
630
690
|
|
631
691
|
all_replica_ids_to_scale_down.extend(replicas_to_scale_down)
|
632
692
|
|
633
|
-
|
634
|
-
|
635
|
-
AutoscalerDecision(AutoscalerDecisionOperator.SCALE_DOWN,
|
636
|
-
target=replica_id))
|
693
|
+
scaling_decisions.extend(
|
694
|
+
_generate_scale_down_decisions(all_replica_ids_to_scale_down))
|
637
695
|
|
638
|
-
return
|
696
|
+
return scaling_decisions
|