modal 1.0.6.dev38__py3-none-any.whl → 1.0.6.dev41__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of modal might be problematic. Click here for more details.

modal/client.pyi CHANGED
@@ -33,7 +33,7 @@ class _Client:
33
33
  server_url: str,
34
34
  client_type: int,
35
35
  credentials: typing.Optional[tuple[str, str]],
36
- version: str = "1.0.6.dev38",
36
+ version: str = "1.0.6.dev41",
37
37
  ):
38
38
  """mdmd:hidden
39
39
  The Modal client object is not intended to be instantiated directly by users.
@@ -163,7 +163,7 @@ class Client:
163
163
  server_url: str,
164
164
  client_type: int,
165
165
  credentials: typing.Optional[tuple[str, str]],
166
- version: str = "1.0.6.dev38",
166
+ version: str = "1.0.6.dev41",
167
167
  ):
168
168
  """mdmd:hidden
169
169
  The Modal client object is not intended to be instantiated directly by users.
@@ -1,7 +1,5 @@
1
1
  # Copyright Modal Labs 2025
2
- import asyncio
3
2
  import os
4
- import sys
5
3
  from dataclasses import dataclass
6
4
  from pathlib import Path
7
5
  from typing import Literal, Optional, Union
@@ -13,17 +11,16 @@ from .._functions import _Function
13
11
  from .._object import _get_environment_name
14
12
  from .._partial_function import _clustered
15
13
  from .._runtime.container_io_manager import _ContainerIOManager
16
- from .._tunnel import _forward as _forward_tunnel
17
14
  from .._utils.async_utils import synchronize_api, synchronizer
18
15
  from .._utils.deprecation import deprecation_warning
19
16
  from .._utils.grpc_utils import retry_transient_errors
20
17
  from ..app import _App
21
18
  from ..client import _Client
22
19
  from ..cls import _Cls, _Obj
23
- from ..config import logger
24
20
  from ..exception import InvalidError
25
21
  from ..image import DockerfileSpec, ImageBuilderVersion, _Image, _ImageRegistryConfig
26
22
  from ..secret import _Secret
23
+ from .flash import flash_forward, flash_prometheus_autoscaler # noqa: F401
27
24
 
28
25
 
29
26
  def stop_fetching_inputs():
@@ -259,89 +256,3 @@ async def update_autoscaler(
259
256
 
260
257
  request = api_pb2.FunctionUpdateSchedulingParamsRequest(function_id=f.object_id, settings=settings)
261
258
  await retry_transient_errors(client.stub.FunctionUpdateSchedulingParams, request)
262
-
263
-
264
- class _FlashManager:
265
- def __init__(self, client: _Client, port: int):
266
- self.client = client
267
- self.port = port
268
- self.tunnel_manager = _forward_tunnel(port, client=client)
269
- self.stopped = False
270
-
271
- async def _start(self):
272
- self.tunnel = await self.tunnel_manager.__aenter__()
273
-
274
- hostname = self.tunnel.url.split("://")[1]
275
- if ":" in hostname:
276
- host, port = hostname.split(":")
277
- else:
278
- host = hostname
279
- port = "443"
280
-
281
- self.heartbeat_task = asyncio.create_task(self._run_heartbeat(host, int(port)))
282
-
283
- async def _run_heartbeat(self, host: str, port: int):
284
- first_registration = True
285
- while True:
286
- try:
287
- resp = await self.client.stub.FlashContainerRegister(
288
- api_pb2.FlashContainerRegisterRequest(
289
- priority=10,
290
- weight=5,
291
- host=host,
292
- port=port,
293
- ),
294
- timeout=10,
295
- )
296
- if first_registration:
297
- logger.warning(f"[Modal Flash] Listening at {resp.url}")
298
- first_registration = False
299
- except asyncio.CancelledError:
300
- logger.warning("[Modal Flash] Shutting down...")
301
- break
302
- except Exception as e:
303
- logger.error(f"[Modal Flash] Heartbeat failed: {e}")
304
-
305
- try:
306
- await asyncio.sleep(1)
307
- except asyncio.CancelledError:
308
- logger.warning("[Modal Flash] Shutting down...")
309
- break
310
-
311
- async def stop(self):
312
- self.heartbeat_task.cancel()
313
- await retry_transient_errors(
314
- self.client.stub.FlashContainerDeregister,
315
- api_pb2.FlashContainerDeregisterRequest(),
316
- )
317
-
318
- self.stopped = True
319
- logger.warning(f"[Modal Flash] No longer accepting new requests on {self.tunnel.url}.")
320
-
321
- # NOTE(gongy): We skip calling TunnelStop to avoid interrupting in-flight requests.
322
- # It is up to the user to wait after calling .stop() to drain in-flight requests.
323
-
324
- async def close(self):
325
- if not self.stopped:
326
- await self.stop()
327
-
328
- logger.warning(f"[Modal Flash] Closing tunnel on {self.tunnel.url}.")
329
- await self.tunnel_manager.__aexit__(*sys.exc_info())
330
-
331
-
332
- FlashManager = synchronize_api(_FlashManager)
333
-
334
-
335
- @synchronizer.create_blocking
336
- async def flash_forward(port: int) -> _FlashManager:
337
- """
338
- Forward a port to the Modal Flash service, exposing that port as a stable web endpoint.
339
-
340
- This is a highly experimental method that can break or be removed at any time without warning.
341
- Do not use this method unless explicitly instructed to do so by Modal support.
342
- """
343
- client = await _Client.from_env()
344
-
345
- manager = _FlashManager(client, port)
346
- await manager._start()
347
- return manager
@@ -0,0 +1,449 @@
1
+ # Copyright Modal Labs 2025
2
+ import asyncio
3
+ import math
4
+ import sys
5
+ import time
6
+ import traceback
7
+ from collections import defaultdict
8
+ from typing import Any, Optional
9
+ from urllib.parse import urlparse
10
+
11
+ from modal.cls import _Cls
12
+ from modal.dict import _Dict
13
+ from modal_proto import api_pb2
14
+
15
+ from .._tunnel import _forward as _forward_tunnel
16
+ from .._utils.async_utils import synchronize_api, synchronizer
17
+ from .._utils.grpc_utils import retry_transient_errors
18
+ from ..client import _Client
19
+ from ..config import logger
20
+ from ..exception import InvalidError
21
+
22
+
23
+ class _FlashManager:
24
+ def __init__(self, client: _Client, port: int, health_check_url: Optional[str] = None):
25
+ self.client = client
26
+ self.port = port
27
+ self.health_check_url = health_check_url
28
+ self.tunnel_manager = _forward_tunnel(port, client=client)
29
+ self.stopped = False
30
+
31
+ async def _start(self):
32
+ self.tunnel = await self.tunnel_manager.__aenter__()
33
+
34
+ parsed_url = urlparse(self.tunnel.url)
35
+ host = parsed_url.hostname
36
+ port = parsed_url.port or 443
37
+
38
+ self.heartbeat_task = asyncio.create_task(self._run_heartbeat(host, port))
39
+
40
+ async def _run_heartbeat(self, host: str, port: int):
41
+ first_registration = True
42
+ while True:
43
+ try:
44
+ resp = await self.client.stub.FlashContainerRegister(
45
+ api_pb2.FlashContainerRegisterRequest(
46
+ priority=10,
47
+ weight=5,
48
+ host=host,
49
+ port=port,
50
+ ),
51
+ timeout=10,
52
+ )
53
+ if first_registration:
54
+ logger.warning(f"[Modal Flash] Listening at {resp.url}")
55
+ first_registration = False
56
+ except asyncio.CancelledError:
57
+ logger.warning("[Modal Flash] Shutting down...")
58
+ break
59
+ except Exception as e:
60
+ logger.error(f"[Modal Flash] Heartbeat failed: {e}")
61
+
62
+ try:
63
+ await asyncio.sleep(1)
64
+ except asyncio.CancelledError:
65
+ logger.warning("[Modal Flash] Shutting down...")
66
+ break
67
+
68
+ def get_container_url(self):
69
+ # WARNING: Try not to use this method; we aren't sure if we will keep it.
70
+ return self.tunnel.url
71
+
72
+ async def stop(self):
73
+ self.heartbeat_task.cancel()
74
+ await retry_transient_errors(
75
+ self.client.stub.FlashContainerDeregister,
76
+ api_pb2.FlashContainerDeregisterRequest(),
77
+ )
78
+
79
+ self.stopped = True
80
+ logger.warning(f"[Modal Flash] No longer accepting new requests on {self.tunnel.url}.")
81
+
82
+ # NOTE(gongy): We skip calling TunnelStop to avoid interrupting in-flight requests.
83
+ # It is up to the user to wait after calling .stop() to drain in-flight requests.
84
+
85
+ async def close(self):
86
+ if not self.stopped:
87
+ await self.stop()
88
+
89
+ logger.warning(f"[Modal Flash] Closing tunnel on {self.tunnel.url}.")
90
+ await self.tunnel_manager.__aexit__(*sys.exc_info())
91
+
92
+
93
+ FlashManager = synchronize_api(_FlashManager)
94
+
95
+
96
+ @synchronizer.create_blocking
97
+ async def flash_forward(port: int, health_check_url: Optional[str] = None) -> _FlashManager:
98
+ """
99
+ Forward a port to the Modal Flash service, exposing that port as a stable web endpoint.
100
+
101
+ This is a highly experimental method that can break or be removed at any time without warning.
102
+ Do not use this method unless explicitly instructed to do so by Modal support.
103
+ """
104
+ client = await _Client.from_env()
105
+
106
+ manager = _FlashManager(client, port, health_check_url)
107
+ await manager._start()
108
+ return manager
109
+
110
+
111
+ class _FlashPrometheusAutoscaler:
112
+ _max_window_seconds = 60 * 60
113
+
114
+ def __init__(
115
+ self,
116
+ client: _Client,
117
+ app_name: str,
118
+ cls_name: str,
119
+ metrics_endpoint: str,
120
+ target_metric: str,
121
+ target_metric_value: float,
122
+ min_containers: Optional[int],
123
+ max_containers: Optional[int],
124
+ scale_up_tolerance: float,
125
+ scale_down_tolerance: float,
126
+ scale_up_stabilization_window_seconds: int,
127
+ scale_down_stabilization_window_seconds: int,
128
+ autoscaling_interval_seconds: int,
129
+ ):
130
+ if scale_up_stabilization_window_seconds > self._max_window_seconds:
131
+ raise InvalidError(
132
+ f"scale_up_stabilization_window_seconds must be less than or equal to {self._max_window_seconds}"
133
+ )
134
+ if scale_down_stabilization_window_seconds > self._max_window_seconds:
135
+ raise InvalidError(
136
+ f"scale_down_stabilization_window_seconds must be less than or equal to {self._max_window_seconds}"
137
+ )
138
+ if target_metric_value <= 0:
139
+ raise InvalidError("target_metric_value must be greater than 0")
140
+
141
+ import aiohttp
142
+
143
+ self.client = client
144
+ self.app_name = app_name
145
+ self.cls_name = cls_name
146
+ self.metrics_endpoint = metrics_endpoint
147
+ self.target_metric = target_metric
148
+ self.target_metric_value = target_metric_value
149
+ self.min_containers = min_containers
150
+ self.max_containers = max_containers
151
+ self.scale_up_tolerance = scale_up_tolerance
152
+ self.scale_down_tolerance = scale_down_tolerance
153
+ self.scale_up_stabilization_window_seconds = scale_up_stabilization_window_seconds
154
+ self.scale_down_stabilization_window_seconds = scale_down_stabilization_window_seconds
155
+ self.autoscaling_interval_seconds = autoscaling_interval_seconds
156
+
157
+ FlashClass = _Cls.from_name(app_name, cls_name)
158
+ self.fn = FlashClass._class_service_function
159
+ self.cls = FlashClass()
160
+
161
+ self.http_client = aiohttp.ClientSession()
162
+ self.autoscaling_decisions_dict = _Dict.from_name(
163
+ f"{app_name}-{cls_name}-autoscaling-decisions",
164
+ create_if_missing=True,
165
+ )
166
+
167
+ self.autoscaler_thread = None
168
+
169
+ async def start(self):
170
+ await self.fn.hydrate(client=self.client)
171
+ self.autoscaler_thread = asyncio.create_task(self._run_autoscaler_loop())
172
+
173
+ async def _run_autoscaler_loop(self):
174
+ while True:
175
+ try:
176
+ autoscaling_time = time.time()
177
+
178
+ current_replicas = await self.autoscaling_decisions_dict.get("current_replicas", 0)
179
+ autoscaling_decisions = await self.autoscaling_decisions_dict.get("autoscaling_decisions", [])
180
+ if not isinstance(current_replicas, int):
181
+ logger.warning(f"[Modal Flash] Invalid item in autoscaling decisions: {current_replicas}")
182
+ current_replicas = 0
183
+ if not isinstance(autoscaling_decisions, list):
184
+ logger.warning(f"[Modal Flash] Invalid item in autoscaling decisions: {autoscaling_decisions}")
185
+ autoscaling_decisions = []
186
+ for item in autoscaling_decisions:
187
+ if (
188
+ not isinstance(item, tuple)
189
+ or len(item) != 2
190
+ or not isinstance(item[0], float)
191
+ or not isinstance(item[1], int)
192
+ ):
193
+ logger.warning(f"[Modal Flash] Invalid item in autoscaling decisions: {item}")
194
+ autoscaling_decisions = []
195
+ break
196
+
197
+ autoscaling_decisions = [
198
+ (timestamp, decision)
199
+ for timestamp, decision in autoscaling_decisions
200
+ if timestamp >= autoscaling_time - self._max_window_seconds
201
+ ]
202
+
203
+ current_target_containers = await self._compute_target_containers(current_replicas)
204
+ autoscaling_decisions.append((autoscaling_time, current_target_containers))
205
+
206
+ actual_target_containers = self._make_scaling_decision(
207
+ current_replicas,
208
+ autoscaling_decisions,
209
+ scale_up_stabilization_window_seconds=self.scale_up_stabilization_window_seconds,
210
+ scale_down_stabilization_window_seconds=self.scale_down_stabilization_window_seconds,
211
+ min_containers=self.min_containers,
212
+ max_containers=self.max_containers,
213
+ )
214
+
215
+ logger.warning(
216
+ f"[Modal Flash] Scaling to {actual_target_containers} containers. Autoscaling decision "
217
+ f"made in {time.time() - autoscaling_time} seconds."
218
+ )
219
+
220
+ await self.autoscaling_decisions_dict.put(
221
+ "autoscaling_decisions",
222
+ autoscaling_decisions,
223
+ )
224
+ await self.autoscaling_decisions_dict.put("current_replicas", actual_target_containers)
225
+
226
+ await self.cls.update_autoscaler(
227
+ min_containers=actual_target_containers,
228
+ max_containers=actual_target_containers,
229
+ )
230
+
231
+ if time.time() - autoscaling_time < self.autoscaling_interval_seconds:
232
+ await asyncio.sleep(self.autoscaling_interval_seconds - (time.time() - autoscaling_time))
233
+ except asyncio.CancelledError:
234
+ logger.warning("[Modal Flash] Shutting down autoscaler...")
235
+ await self.http_client.close()
236
+ break
237
+ except Exception as e:
238
+ logger.error(f"[Modal Flash] Error in autoscaler: {e}")
239
+ logger.error(traceback.format_exc())
240
+ await asyncio.sleep(self.autoscaling_interval_seconds)
241
+
242
+ async def _compute_target_containers(self, current_replicas: int) -> int:
243
+ containers = await self._get_all_containers()
244
+ if len(containers) > current_replicas:
245
+ logger.info(
246
+ f"[Modal Flash] Current replicas {current_replicas} is less than the number of containers "
247
+ f"{len(containers)}. Setting current_replicas = num_containers."
248
+ )
249
+ current_replicas = len(containers)
250
+
251
+ if current_replicas == 0:
252
+ return 1
253
+
254
+ target_metric = self.target_metric
255
+ target_metric_value = float(self.target_metric_value)
256
+
257
+ sum_metric = 0
258
+ containers_with_metrics = 0
259
+ container_metrics_list = await asyncio.gather(
260
+ *[
261
+ self._get_metrics(f"https://{container.host}:{container.port}/{self.metrics_endpoint}")
262
+ for container in containers
263
+ ]
264
+ )
265
+ for container_metrics in container_metrics_list:
266
+ if (
267
+ container_metrics is None
268
+ or target_metric not in container_metrics
269
+ or len(container_metrics[target_metric]) == 0
270
+ ):
271
+ continue
272
+ sum_metric += container_metrics[target_metric][0].value
273
+ containers_with_metrics += 1
274
+
275
+ n_containers_missing_metric = current_replicas - containers_with_metrics
276
+
277
+ # Scale up / down conservatively: Any container that is missing the metric is assumed to be at the minimum
278
+ # value of the metric when scaling up and the maximum value of the metric when scaling down.
279
+ scale_up_target_metric_value = sum_metric / current_replicas
280
+ scale_down_target_metric_value = (
281
+ sum_metric + n_containers_missing_metric * target_metric_value
282
+ ) / current_replicas
283
+
284
+ scale_up_ratio = scale_up_target_metric_value / target_metric_value
285
+ scale_down_ratio = scale_down_target_metric_value / target_metric_value
286
+
287
+ desired_replicas = current_replicas
288
+ if scale_up_ratio > 1 + self.scale_up_tolerance:
289
+ desired_replicas = math.ceil(current_replicas * scale_up_ratio)
290
+ elif scale_down_ratio < 1 - self.scale_down_tolerance:
291
+ desired_replicas = math.ceil(current_replicas * scale_down_ratio)
292
+
293
+ logger.warning(
294
+ f"[Modal Flash] Current replicas: {current_replicas}, target metric value: {target_metric_value}, "
295
+ f"current sum of metric values: {sum_metric}, number of containers missing metric: "
296
+ f"{n_containers_missing_metric}, scale up ratio: {scale_up_ratio}, scale down ratio: {scale_down_ratio}, "
297
+ f"desired replicas: {desired_replicas}"
298
+ )
299
+
300
+ return desired_replicas
301
+
302
+ async def _get_metrics(self, url: str) -> Optional[dict[str, list[Any]]]: # technically any should be Sample
303
+ from prometheus_client.parser import Sample, text_string_to_metric_families
304
+
305
+ # Fetch the metrics from the endpoint
306
+ try:
307
+ response = await self.http_client.get(url)
308
+ response.raise_for_status()
309
+ except Exception as e:
310
+ logger.warning(f"[Modal Flash] Error getting metrics from {url}: {e}")
311
+ return None
312
+
313
+ # Parse the text-based Prometheus metrics format
314
+ metrics: dict[str, list[Sample]] = defaultdict(list)
315
+ for family in text_string_to_metric_families(await response.text()):
316
+ for sample in family.samples:
317
+ metrics[sample.name] += [sample]
318
+
319
+ return metrics
320
+
321
+ async def _get_all_containers(self):
322
+ req = api_pb2.FlashContainerListRequest(function_id=self.fn.object_id)
323
+ resp = await retry_transient_errors(self.client.stub.FlashContainerList, req)
324
+ return resp.containers
325
+
326
+ def _make_scaling_decision(
327
+ self,
328
+ current_replicas: int,
329
+ autoscaling_decisions: list[tuple[float, int]],
330
+ scale_up_stabilization_window_seconds: int = 0,
331
+ scale_down_stabilization_window_seconds: int = 60 * 5,
332
+ min_containers: Optional[int] = None,
333
+ max_containers: Optional[int] = None,
334
+ ) -> int:
335
+ """
336
+ Return the target number of containers following (simplified) Kubernetes HPA
337
+ stabilization-window semantics.
338
+
339
+ Args:
340
+ current_replicas: Current number of running Pods/containers.
341
+ autoscaling_decisions: List of (timestamp, desired_replicas) pairs, where
342
+ timestamp is a UNIX epoch float (seconds).
343
+ The list *must* contain at least one entry and should
344
+ already include the most-recent measurement.
345
+ scale_up_stabilization_window_seconds: 0 disables the up-window.
346
+ scale_down_stabilization_window_seconds: 0 disables the down-window.
347
+ min_containers / max_containers: Clamp the final decision to this range.
348
+
349
+ Returns:
350
+ The target number of containers.
351
+ """
352
+ if not autoscaling_decisions:
353
+ # Without data we can’t make a new decision – stay where we are.
354
+ return current_replicas
355
+
356
+ # Sort just once in case the caller didn’t: newest record is last.
357
+ autoscaling_decisions.sort(key=lambda rec: rec[0])
358
+ now_ts, latest_desired = autoscaling_decisions[-1]
359
+
360
+ if latest_desired > current_replicas:
361
+ # ---- SCALE-UP path ----
362
+ window_start = now_ts - scale_up_stabilization_window_seconds
363
+ # Consider only records *inside* the window.
364
+ desired_candidates = [desired for ts, desired in autoscaling_decisions if ts >= window_start]
365
+ # Use the *minimum* so that any temporary dip blocks the scale-up.
366
+ candidate = min(desired_candidates) if desired_candidates else latest_desired
367
+ new_replicas = max(current_replicas, candidate) # never scale *down* here
368
+ elif latest_desired < current_replicas:
369
+ # ---- SCALE-DOWN path ----
370
+ window_start = now_ts - scale_down_stabilization_window_seconds
371
+ desired_candidates = [desired for ts, desired in autoscaling_decisions if ts >= window_start]
372
+ # Use the *maximum* so that any temporary spike blocks the scale-down.
373
+ candidate = max(desired_candidates) if desired_candidates else latest_desired
374
+ new_replicas = min(current_replicas, candidate) # never scale *up* here
375
+ else:
376
+ # No change requested.
377
+ new_replicas = current_replicas
378
+
379
+ # Clamp to [min_containers, max_containers].
380
+ if min_containers is not None:
381
+ new_replicas = max(min_containers, new_replicas)
382
+ if max_containers is not None:
383
+ new_replicas = min(max_containers, new_replicas)
384
+ return new_replicas
385
+
386
+ async def stop(self):
387
+ self.autoscaler_thread.cancel()
388
+ await self.autoscaler_thread
389
+
390
+
391
+ FlashPrometheusAutoscaler = synchronize_api(_FlashPrometheusAutoscaler)
392
+
393
+
394
+ @synchronizer.create_blocking
395
+ async def flash_prometheus_autoscaler(
396
+ app_name: str,
397
+ cls_name: str,
398
+ # Endpoint to fetch metrics from. Must be in Prometheus format. Example: "/metrics"
399
+ metrics_endpoint: str,
400
+ # Target metric to autoscale on. Example: "vllm:num_requests_running"
401
+ target_metric: str,
402
+ # Target metric value. Example: 25
403
+ target_metric_value: float,
404
+ min_containers: Optional[int] = None,
405
+ max_containers: Optional[int] = None,
406
+ # Corresponds to https://kubernetes.io/docs/tasks/run-application/horizontal-pod-autoscale/#tolerance
407
+ scale_up_tolerance: float = 0.1,
408
+ # Corresponds to https://kubernetes.io/docs/tasks/run-application/horizontal-pod-autoscale/#tolerance
409
+ scale_down_tolerance: float = 0.1,
410
+ # Corresponds to https://kubernetes.io/docs/tasks/run-application/horizontal-pod-autoscale/#stabilization-window
411
+ scale_up_stabilization_window_seconds: int = 0,
412
+ # Corresponds to https://kubernetes.io/docs/tasks/run-application/horizontal-pod-autoscale/#stabilization-window
413
+ scale_down_stabilization_window_seconds: int = 300,
414
+ # How often to make autoscaling decisions.
415
+ # Corresponds to --horizontal-pod-autoscaler-sync-period in Kubernetes.
416
+ autoscaling_interval_seconds: int = 15,
417
+ ) -> _FlashPrometheusAutoscaler:
418
+ """
419
+ Autoscale a Flash service based on containers' Prometheus metrics.
420
+
421
+ The package `prometheus_client` is required to use this method.
422
+
423
+ This is a highly experimental method that can break or be removed at any time without warning.
424
+ Do not use this method unless explicitly instructed to do so by Modal support.
425
+ """
426
+
427
+ try:
428
+ import prometheus_client # noqa: F401
429
+ except ImportError:
430
+ raise ImportError("The package `prometheus_client` is required to use this method.")
431
+
432
+ client = await _Client.from_env()
433
+ autoscaler = _FlashPrometheusAutoscaler(
434
+ client,
435
+ app_name,
436
+ cls_name,
437
+ metrics_endpoint,
438
+ target_metric,
439
+ target_metric_value,
440
+ min_containers,
441
+ max_containers,
442
+ scale_up_tolerance,
443
+ scale_down_tolerance,
444
+ scale_up_stabilization_window_seconds,
445
+ scale_down_stabilization_window_seconds,
446
+ autoscaling_interval_seconds,
447
+ )
448
+ await autoscaler.start()
449
+ return autoscaler