modal 1.1.4.dev27__py3-none-any.whl → 1.1.4.dev28__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- modal/client.pyi +2 -2
- modal/experimental/flash.py +65 -2
- modal/experimental/flash.pyi +32 -3
- {modal-1.1.4.dev27.dist-info → modal-1.1.4.dev28.dist-info}/METADATA +1 -1
- {modal-1.1.4.dev27.dist-info → modal-1.1.4.dev28.dist-info}/RECORD +10 -10
- modal_version/__init__.py +1 -1
- {modal-1.1.4.dev27.dist-info → modal-1.1.4.dev28.dist-info}/WHEEL +0 -0
- {modal-1.1.4.dev27.dist-info → modal-1.1.4.dev28.dist-info}/entry_points.txt +0 -0
- {modal-1.1.4.dev27.dist-info → modal-1.1.4.dev28.dist-info}/licenses/LICENSE +0 -0
- {modal-1.1.4.dev27.dist-info → modal-1.1.4.dev28.dist-info}/top_level.txt +0 -0
modal/client.pyi
CHANGED
@@ -33,7 +33,7 @@ class _Client:
|
|
33
33
|
server_url: str,
|
34
34
|
client_type: int,
|
35
35
|
credentials: typing.Optional[tuple[str, str]],
|
36
|
-
version: str = "1.1.4.
|
36
|
+
version: str = "1.1.4.dev28",
|
37
37
|
):
|
38
38
|
"""mdmd:hidden
|
39
39
|
The Modal client object is not intended to be instantiated directly by users.
|
@@ -164,7 +164,7 @@ class Client:
|
|
164
164
|
server_url: str,
|
165
165
|
client_type: int,
|
166
166
|
credentials: typing.Optional[tuple[str, str]],
|
167
|
-
version: str = "1.1.4.
|
167
|
+
version: str = "1.1.4.dev28",
|
168
168
|
):
|
169
169
|
"""mdmd:hidden
|
170
170
|
The Modal client object is not intended to be instantiated directly by users.
|
modal/experimental/flash.py
CHANGED
@@ -200,7 +200,10 @@ class _FlashPrometheusAutoscaler:
|
|
200
200
|
if timestamp >= autoscaling_time - self._max_window_seconds
|
201
201
|
]
|
202
202
|
|
203
|
-
|
203
|
+
if self.metrics_endpoint == "internal":
|
204
|
+
current_target_containers = await self._compute_target_containers_internal(current_replicas)
|
205
|
+
else:
|
206
|
+
current_target_containers = await self._compute_target_containers_prometheus(current_replicas)
|
204
207
|
autoscaling_decisions.append((autoscaling_time, current_target_containers))
|
205
208
|
|
206
209
|
actual_target_containers = self._make_scaling_decision(
|
@@ -236,7 +239,53 @@ class _FlashPrometheusAutoscaler:
|
|
236
239
|
logger.error(traceback.format_exc())
|
237
240
|
await asyncio.sleep(self.autoscaling_interval_seconds)
|
238
241
|
|
239
|
-
async def
|
242
|
+
async def _compute_target_containers_internal(self, current_replicas: int) -> int:
|
243
|
+
"""
|
244
|
+
Gets internal metrics from container to autoscale up or down.
|
245
|
+
"""
|
246
|
+
containers = await self._get_all_containers()
|
247
|
+
if len(containers) > current_replicas:
|
248
|
+
logger.info(
|
249
|
+
f"[Modal Flash] Current replicas {current_replicas} is less than the number of containers "
|
250
|
+
f"{len(containers)}. Setting current_replicas = num_containers."
|
251
|
+
)
|
252
|
+
current_replicas = len(containers)
|
253
|
+
|
254
|
+
if current_replicas == 0:
|
255
|
+
return 1
|
256
|
+
|
257
|
+
internal_metrics_list = []
|
258
|
+
for container in containers:
|
259
|
+
internal_metric = await self._get_container_metrics(container.id)
|
260
|
+
if internal_metric is None:
|
261
|
+
continue
|
262
|
+
internal_metrics_list.append(getattr(internal_metric.metrics, self.target_metric))
|
263
|
+
|
264
|
+
if not internal_metrics_list:
|
265
|
+
return current_replicas
|
266
|
+
|
267
|
+
avg_internal_metric = sum(internal_metrics_list) / len(internal_metrics_list)
|
268
|
+
|
269
|
+
scale_factor = avg_internal_metric / self.target_metric_value
|
270
|
+
|
271
|
+
desired_replicas = current_replicas
|
272
|
+
if scale_factor > 1 + self.scale_up_tolerance:
|
273
|
+
desired_replicas = math.ceil(current_replicas * scale_factor)
|
274
|
+
elif scale_factor < 1 - self.scale_down_tolerance:
|
275
|
+
desired_replicas = math.ceil(current_replicas * scale_factor)
|
276
|
+
|
277
|
+
logger.warning(
|
278
|
+
f"[Modal Flash] Current replicas: {current_replicas}, "
|
279
|
+
f"avg internal metric `{self.target_metric}`: {avg_internal_metric}, "
|
280
|
+
f"target internal metric value: {self.target_metric_value}, "
|
281
|
+
f"scale factor: {scale_factor}, "
|
282
|
+
f"desired replicas: {desired_replicas}"
|
283
|
+
)
|
284
|
+
|
285
|
+
desired_replicas = max(1, desired_replicas)
|
286
|
+
return desired_replicas
|
287
|
+
|
288
|
+
async def _compute_target_containers_prometheus(self, current_replicas: int) -> int:
|
240
289
|
# current_replicas is the number of live containers + cold starting containers (not yet live)
|
241
290
|
# containers is the number of live containers that are registered in flash dns
|
242
291
|
containers = await self._get_all_containers()
|
@@ -253,6 +302,7 @@ class _FlashPrometheusAutoscaler:
|
|
253
302
|
target_metric = self.target_metric
|
254
303
|
target_metric_value = float(self.target_metric_value)
|
255
304
|
|
305
|
+
# Gets metrics from prometheus
|
256
306
|
sum_metric = 0
|
257
307
|
containers_with_metrics = 0
|
258
308
|
container_metrics_list = await asyncio.gather(
|
@@ -341,6 +391,15 @@ class _FlashPrometheusAutoscaler:
|
|
341
391
|
|
342
392
|
return metrics
|
343
393
|
|
394
|
+
async def _get_container_metrics(self, container_id: str) -> Optional[api_pb2.TaskGetAutoscalingMetricsResponse]:
|
395
|
+
req = api_pb2.TaskGetAutoscalingMetricsRequest(task_id=container_id)
|
396
|
+
try:
|
397
|
+
resp = await retry_transient_errors(self.client.stub.TaskGetAutoscalingMetrics, req)
|
398
|
+
return resp
|
399
|
+
except Exception as e:
|
400
|
+
logger.warning(f"[Modal Flash] Error getting metrics for container {container_id}: {e}")
|
401
|
+
return None
|
402
|
+
|
344
403
|
async def _get_all_containers(self):
|
345
404
|
req = api_pb2.FlashContainerListRequest(function_id=self.fn.object_id)
|
346
405
|
resp = await retry_transient_errors(self.client.stub.FlashContainerList, req)
|
@@ -419,10 +478,14 @@ async def flash_prometheus_autoscaler(
|
|
419
478
|
app_name: str,
|
420
479
|
cls_name: str,
|
421
480
|
# Endpoint to fetch metrics from. Must be in Prometheus format. Example: "/metrics"
|
481
|
+
# If metrics_endpoint is "internal", we will use containers' internal metrics to autoscale instead.
|
422
482
|
metrics_endpoint: str,
|
423
483
|
# Target metric to autoscale on. Example: "vllm:num_requests_running"
|
484
|
+
# If metrics_endpoint is "internal", target_metrics options are: [cpu_usage_percent, memory_usage_percent]
|
424
485
|
target_metric: str,
|
425
486
|
# Target metric value. Example: 25
|
487
|
+
# If metrics_endpoint is "internal", target_metric_value is a percentage value between 0.1 and 1.0 (inclusive),
|
488
|
+
# indicating container's usage of that metric.
|
426
489
|
target_metric_value: float,
|
427
490
|
min_containers: Optional[int] = None,
|
428
491
|
max_containers: Optional[int] = None,
|
modal/experimental/flash.pyi
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
import modal.client
|
2
|
+
import modal_proto.api_pb2
|
2
3
|
import typing
|
3
4
|
import typing_extensions
|
4
5
|
|
@@ -85,8 +86,15 @@ class _FlashPrometheusAutoscaler:
|
|
85
86
|
|
86
87
|
async def start(self): ...
|
87
88
|
async def _run_autoscaler_loop(self): ...
|
88
|
-
async def
|
89
|
+
async def _compute_target_containers_internal(self, current_replicas: int) -> int:
|
90
|
+
"""Gets internal metrics from container to autoscale up or down."""
|
91
|
+
...
|
92
|
+
|
93
|
+
async def _compute_target_containers_prometheus(self, current_replicas: int) -> int: ...
|
89
94
|
async def _get_metrics(self, url: str) -> typing.Optional[dict[str, list[typing.Any]]]: ...
|
95
|
+
async def _get_container_metrics(
|
96
|
+
self, container_id: str
|
97
|
+
) -> typing.Optional[modal_proto.api_pb2.TaskGetAutoscalingMetricsResponse]: ...
|
90
98
|
async def _get_all_containers(self): ...
|
91
99
|
def _make_scaling_decision(
|
92
100
|
self,
|
@@ -147,11 +155,22 @@ class FlashPrometheusAutoscaler:
|
|
147
155
|
|
148
156
|
_run_autoscaler_loop: ___run_autoscaler_loop_spec[typing_extensions.Self]
|
149
157
|
|
150
|
-
class
|
158
|
+
class ___compute_target_containers_internal_spec(typing_extensions.Protocol[SUPERSELF]):
|
159
|
+
def __call__(self, /, current_replicas: int) -> int:
|
160
|
+
"""Gets internal metrics from container to autoscale up or down."""
|
161
|
+
...
|
162
|
+
|
163
|
+
async def aio(self, /, current_replicas: int) -> int:
|
164
|
+
"""Gets internal metrics from container to autoscale up or down."""
|
165
|
+
...
|
166
|
+
|
167
|
+
_compute_target_containers_internal: ___compute_target_containers_internal_spec[typing_extensions.Self]
|
168
|
+
|
169
|
+
class ___compute_target_containers_prometheus_spec(typing_extensions.Protocol[SUPERSELF]):
|
151
170
|
def __call__(self, /, current_replicas: int) -> int: ...
|
152
171
|
async def aio(self, /, current_replicas: int) -> int: ...
|
153
172
|
|
154
|
-
|
173
|
+
_compute_target_containers_prometheus: ___compute_target_containers_prometheus_spec[typing_extensions.Self]
|
155
174
|
|
156
175
|
class ___get_metrics_spec(typing_extensions.Protocol[SUPERSELF]):
|
157
176
|
def __call__(self, /, url: str) -> typing.Optional[dict[str, list[typing.Any]]]: ...
|
@@ -159,6 +178,16 @@ class FlashPrometheusAutoscaler:
|
|
159
178
|
|
160
179
|
_get_metrics: ___get_metrics_spec[typing_extensions.Self]
|
161
180
|
|
181
|
+
class ___get_container_metrics_spec(typing_extensions.Protocol[SUPERSELF]):
|
182
|
+
def __call__(
|
183
|
+
self, /, container_id: str
|
184
|
+
) -> typing.Optional[modal_proto.api_pb2.TaskGetAutoscalingMetricsResponse]: ...
|
185
|
+
async def aio(
|
186
|
+
self, /, container_id: str
|
187
|
+
) -> typing.Optional[modal_proto.api_pb2.TaskGetAutoscalingMetricsResponse]: ...
|
188
|
+
|
189
|
+
_get_container_metrics: ___get_container_metrics_spec[typing_extensions.Self]
|
190
|
+
|
162
191
|
class ___get_all_containers_spec(typing_extensions.Protocol[SUPERSELF]):
|
163
192
|
def __call__(self, /): ...
|
164
193
|
async def aio(self, /): ...
|
@@ -22,7 +22,7 @@ modal/app.py,sha256=F4baVULljFq0CwC_7U-EKNRNx7CYeWBKudjjYUuWc4U,48416
|
|
22
22
|
modal/app.pyi,sha256=AbXJCBkyt2rI_-M3VbTBYb32at0P6iRZuoC87xY_JrQ,43591
|
23
23
|
modal/call_graph.py,sha256=1g2DGcMIJvRy-xKicuf63IVE98gJSnQsr8R_NVMptNc,2581
|
24
24
|
modal/client.py,sha256=kyAIVB3Ay-XKJizQ_1ufUFB__EagV0MLmHJpyYyJ7J0,18636
|
25
|
-
modal/client.pyi,sha256=
|
25
|
+
modal/client.pyi,sha256=uDYc9D-M2scJv1Ur0rIAugWT8G0EVI6vbcL3XwlY5g8,15831
|
26
26
|
modal/cloud_bucket_mount.py,sha256=YOe9nnvSr4ZbeCn587d7_VhE9IioZYRvF9VYQTQux08,5914
|
27
27
|
modal/cloud_bucket_mount.pyi,sha256=-qSfYAQvIoO_l2wsCCGTG5ZUwQieNKXdAO00yP1-LYU,7394
|
28
28
|
modal/cls.py,sha256=pTEO7pHjlO7taMbIqs4oI9ZZgKDJpVKyGkO5ZT0w6tQ,40934
|
@@ -150,10 +150,10 @@ modal/cli/programs/run_jupyter.py,sha256=44Lpvqk2l3hH-uOkmAOzw60NEsfB5uaRDWDKVsh
|
|
150
150
|
modal/cli/programs/run_marimo.py,sha256=HyZ2za0NYqg31-mGxFQxUIAJ3Q-jRaMocEwWwI5-cdw,2887
|
151
151
|
modal/cli/programs/vscode.py,sha256=KbTAaIXyQBVCDXxXjmBHmKpgXkUw0q4R4KkJvUjCYgk,3380
|
152
152
|
modal/experimental/__init__.py,sha256=aWDb1VO9s4D-5Ktw_kYQSqoCp1W8S7lhns-5l7S1l-8,15102
|
153
|
-
modal/experimental/flash.py,sha256=
|
154
|
-
modal/experimental/flash.pyi,sha256=
|
153
|
+
modal/experimental/flash.py,sha256=Sxzx6WXOJSYAwKn3ZMyPWSroudQTPeHVpCEWbSPKD3A,24124
|
154
|
+
modal/experimental/flash.pyi,sha256=bCH9OgSzwJPo_s5-ObqlNBQUziZ7p5m5_zh0aQTA7sg,12216
|
155
155
|
modal/experimental/ipython.py,sha256=TrCfmol9LGsRZMeDoeMPx3Hv3BFqQhYnmD_iH0pqdhk,2904
|
156
|
-
modal-1.1.4.
|
156
|
+
modal-1.1.4.dev28.dist-info/licenses/LICENSE,sha256=psuoW8kuDP96RQsdhzwOqi6fyWv0ct8CR6Jr7He_P_k,10173
|
157
157
|
modal_docs/__init__.py,sha256=svYKtV8HDwDCN86zbdWqyq5T8sMdGDj0PVlzc2tIxDM,28
|
158
158
|
modal_docs/gen_cli_docs.py,sha256=c1yfBS_x--gL5bs0N4ihMwqwX8l3IBWSkBAKNNIi6bQ,3801
|
159
159
|
modal_docs/gen_reference_docs.py,sha256=d_CQUGQ0rfw28u75I2mov9AlS773z9rG40-yq5o7g2U,6359
|
@@ -176,10 +176,10 @@ modal_proto/options_pb2.pyi,sha256=l7DBrbLO7q3Ir-XDkWsajm0d0TQqqrfuX54i4BMpdQg,1
|
|
176
176
|
modal_proto/options_pb2_grpc.py,sha256=1oboBPFxaTEXt9Aw7EAj8gXHDCNMhZD2VXqocC9l_gk,159
|
177
177
|
modal_proto/options_pb2_grpc.pyi,sha256=CImmhxHsYnF09iENPoe8S4J-n93jtgUYD2JPAc0yJSI,247
|
178
178
|
modal_proto/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
179
|
-
modal_version/__init__.py,sha256=
|
179
|
+
modal_version/__init__.py,sha256=U3zYvussyhm4LpBPEvSAIV24lwpJWrSVmQVc7rel01g,121
|
180
180
|
modal_version/__main__.py,sha256=2FO0yYQQwDTh6udt1h-cBnGd1c4ZyHnHSI4BksxzVac,105
|
181
|
-
modal-1.1.4.
|
182
|
-
modal-1.1.4.
|
183
|
-
modal-1.1.4.
|
184
|
-
modal-1.1.4.
|
185
|
-
modal-1.1.4.
|
181
|
+
modal-1.1.4.dev28.dist-info/METADATA,sha256=l4cMEFeTe6dLilwhAxwXXJArJwHRgcLudcpc6Ck8-n0,2460
|
182
|
+
modal-1.1.4.dev28.dist-info/WHEEL,sha256=1tXe9gY0PYatrMPMDd6jXqjfpz_B-Wqm32CPfRC58XU,91
|
183
|
+
modal-1.1.4.dev28.dist-info/entry_points.txt,sha256=An-wYgeEUnm6xzrAP9_NTSTSciYvvEWsMZILtYrvpAI,46
|
184
|
+
modal-1.1.4.dev28.dist-info/top_level.txt,sha256=4BWzoKYREKUZ5iyPzZpjqx4G8uB5TWxXPDwibLcVa7k,43
|
185
|
+
modal-1.1.4.dev28.dist-info/RECORD,,
|
modal_version/__init__.py
CHANGED
File without changes
|
File without changes
|
File without changes
|
File without changes
|