modal 1.1.4.dev27__py3-none-any.whl → 1.1.4.dev29__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- modal/client.pyi +2 -2
- modal/experimental/flash.py +65 -2
- modal/experimental/flash.pyi +32 -3
- modal/functions.pyi +6 -6
- {modal-1.1.4.dev27.dist-info → modal-1.1.4.dev29.dist-info}/METADATA +1 -1
- {modal-1.1.4.dev27.dist-info → modal-1.1.4.dev29.dist-info}/RECORD +11 -11
- modal_version/__init__.py +1 -1
- {modal-1.1.4.dev27.dist-info → modal-1.1.4.dev29.dist-info}/WHEEL +0 -0
- {modal-1.1.4.dev27.dist-info → modal-1.1.4.dev29.dist-info}/entry_points.txt +0 -0
- {modal-1.1.4.dev27.dist-info → modal-1.1.4.dev29.dist-info}/licenses/LICENSE +0 -0
- {modal-1.1.4.dev27.dist-info → modal-1.1.4.dev29.dist-info}/top_level.txt +0 -0
modal/client.pyi
CHANGED
@@ -33,7 +33,7 @@ class _Client:
|
|
33
33
|
server_url: str,
|
34
34
|
client_type: int,
|
35
35
|
credentials: typing.Optional[tuple[str, str]],
|
36
|
-
version: str = "1.1.4.
|
36
|
+
version: str = "1.1.4.dev29",
|
37
37
|
):
|
38
38
|
"""mdmd:hidden
|
39
39
|
The Modal client object is not intended to be instantiated directly by users.
|
@@ -164,7 +164,7 @@ class Client:
|
|
164
164
|
server_url: str,
|
165
165
|
client_type: int,
|
166
166
|
credentials: typing.Optional[tuple[str, str]],
|
167
|
-
version: str = "1.1.4.
|
167
|
+
version: str = "1.1.4.dev29",
|
168
168
|
):
|
169
169
|
"""mdmd:hidden
|
170
170
|
The Modal client object is not intended to be instantiated directly by users.
|
modal/experimental/flash.py
CHANGED
@@ -200,7 +200,10 @@ class _FlashPrometheusAutoscaler:
|
|
200
200
|
if timestamp >= autoscaling_time - self._max_window_seconds
|
201
201
|
]
|
202
202
|
|
203
|
-
|
203
|
+
if self.metrics_endpoint == "internal":
|
204
|
+
current_target_containers = await self._compute_target_containers_internal(current_replicas)
|
205
|
+
else:
|
206
|
+
current_target_containers = await self._compute_target_containers_prometheus(current_replicas)
|
204
207
|
autoscaling_decisions.append((autoscaling_time, current_target_containers))
|
205
208
|
|
206
209
|
actual_target_containers = self._make_scaling_decision(
|
@@ -236,7 +239,53 @@ class _FlashPrometheusAutoscaler:
|
|
236
239
|
logger.error(traceback.format_exc())
|
237
240
|
await asyncio.sleep(self.autoscaling_interval_seconds)
|
238
241
|
|
239
|
-
async def
|
242
|
+
async def _compute_target_containers_internal(self, current_replicas: int) -> int:
|
243
|
+
"""
|
244
|
+
Gets internal metrics from container to autoscale up or down.
|
245
|
+
"""
|
246
|
+
containers = await self._get_all_containers()
|
247
|
+
if len(containers) > current_replicas:
|
248
|
+
logger.info(
|
249
|
+
f"[Modal Flash] Current replicas {current_replicas} is less than the number of containers "
|
250
|
+
f"{len(containers)}. Setting current_replicas = num_containers."
|
251
|
+
)
|
252
|
+
current_replicas = len(containers)
|
253
|
+
|
254
|
+
if current_replicas == 0:
|
255
|
+
return 1
|
256
|
+
|
257
|
+
internal_metrics_list = []
|
258
|
+
for container in containers:
|
259
|
+
internal_metric = await self._get_container_metrics(container.task_id)
|
260
|
+
if internal_metric is None:
|
261
|
+
continue
|
262
|
+
internal_metrics_list.append(getattr(internal_metric.metrics, self.target_metric))
|
263
|
+
|
264
|
+
if not internal_metrics_list:
|
265
|
+
return current_replicas
|
266
|
+
|
267
|
+
avg_internal_metric = sum(internal_metrics_list) / len(internal_metrics_list)
|
268
|
+
|
269
|
+
scale_factor = avg_internal_metric / self.target_metric_value
|
270
|
+
|
271
|
+
desired_replicas = current_replicas
|
272
|
+
if scale_factor > 1 + self.scale_up_tolerance:
|
273
|
+
desired_replicas = math.ceil(current_replicas * scale_factor)
|
274
|
+
elif scale_factor < 1 - self.scale_down_tolerance:
|
275
|
+
desired_replicas = math.ceil(current_replicas * scale_factor)
|
276
|
+
|
277
|
+
logger.warning(
|
278
|
+
f"[Modal Flash] Current replicas: {current_replicas}, "
|
279
|
+
f"avg internal metric `{self.target_metric}`: {avg_internal_metric}, "
|
280
|
+
f"target internal metric value: {self.target_metric_value}, "
|
281
|
+
f"scale factor: {scale_factor}, "
|
282
|
+
f"desired replicas: {desired_replicas}"
|
283
|
+
)
|
284
|
+
|
285
|
+
desired_replicas = max(1, min(desired_replicas, self.max_containers or 1000))
|
286
|
+
return desired_replicas
|
287
|
+
|
288
|
+
async def _compute_target_containers_prometheus(self, current_replicas: int) -> int:
|
240
289
|
# current_replicas is the number of live containers + cold starting containers (not yet live)
|
241
290
|
# containers is the number of live containers that are registered in flash dns
|
242
291
|
containers = await self._get_all_containers()
|
@@ -253,6 +302,7 @@ class _FlashPrometheusAutoscaler:
|
|
253
302
|
target_metric = self.target_metric
|
254
303
|
target_metric_value = float(self.target_metric_value)
|
255
304
|
|
305
|
+
# Gets metrics from prometheus
|
256
306
|
sum_metric = 0
|
257
307
|
containers_with_metrics = 0
|
258
308
|
container_metrics_list = await asyncio.gather(
|
@@ -341,6 +391,15 @@ class _FlashPrometheusAutoscaler:
|
|
341
391
|
|
342
392
|
return metrics
|
343
393
|
|
394
|
+
async def _get_container_metrics(self, container_id: str) -> Optional[api_pb2.TaskGetAutoscalingMetricsResponse]:
|
395
|
+
req = api_pb2.TaskGetAutoscalingMetricsRequest(task_id=container_id)
|
396
|
+
try:
|
397
|
+
resp = await retry_transient_errors(self.client.stub.TaskGetAutoscalingMetrics, req)
|
398
|
+
return resp
|
399
|
+
except Exception as e:
|
400
|
+
logger.warning(f"[Modal Flash] Error getting metrics for container {container_id}: {e}")
|
401
|
+
return None
|
402
|
+
|
344
403
|
async def _get_all_containers(self):
|
345
404
|
req = api_pb2.FlashContainerListRequest(function_id=self.fn.object_id)
|
346
405
|
resp = await retry_transient_errors(self.client.stub.FlashContainerList, req)
|
@@ -419,10 +478,14 @@ async def flash_prometheus_autoscaler(
|
|
419
478
|
app_name: str,
|
420
479
|
cls_name: str,
|
421
480
|
# Endpoint to fetch metrics from. Must be in Prometheus format. Example: "/metrics"
|
481
|
+
# If metrics_endpoint is "internal", we will use containers' internal metrics to autoscale instead.
|
422
482
|
metrics_endpoint: str,
|
423
483
|
# Target metric to autoscale on. Example: "vllm:num_requests_running"
|
484
|
+
# If metrics_endpoint is "internal", target_metrics options are: [cpu_usage_percent, memory_usage_percent]
|
424
485
|
target_metric: str,
|
425
486
|
# Target metric value. Example: 25
|
487
|
+
# If metrics_endpoint is "internal", target_metric_value is a percentage value between 0.1 and 1.0 (inclusive),
|
488
|
+
# indicating container's usage of that metric.
|
426
489
|
target_metric_value: float,
|
427
490
|
min_containers: Optional[int] = None,
|
428
491
|
max_containers: Optional[int] = None,
|
modal/experimental/flash.pyi
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
import modal.client
|
2
|
+
import modal_proto.api_pb2
|
2
3
|
import typing
|
3
4
|
import typing_extensions
|
4
5
|
|
@@ -85,8 +86,15 @@ class _FlashPrometheusAutoscaler:
|
|
85
86
|
|
86
87
|
async def start(self): ...
|
87
88
|
async def _run_autoscaler_loop(self): ...
|
88
|
-
async def
|
89
|
+
async def _compute_target_containers_internal(self, current_replicas: int) -> int:
|
90
|
+
"""Gets internal metrics from container to autoscale up or down."""
|
91
|
+
...
|
92
|
+
|
93
|
+
async def _compute_target_containers_prometheus(self, current_replicas: int) -> int: ...
|
89
94
|
async def _get_metrics(self, url: str) -> typing.Optional[dict[str, list[typing.Any]]]: ...
|
95
|
+
async def _get_container_metrics(
|
96
|
+
self, container_id: str
|
97
|
+
) -> typing.Optional[modal_proto.api_pb2.TaskGetAutoscalingMetricsResponse]: ...
|
90
98
|
async def _get_all_containers(self): ...
|
91
99
|
def _make_scaling_decision(
|
92
100
|
self,
|
@@ -147,11 +155,22 @@ class FlashPrometheusAutoscaler:
|
|
147
155
|
|
148
156
|
_run_autoscaler_loop: ___run_autoscaler_loop_spec[typing_extensions.Self]
|
149
157
|
|
150
|
-
class
|
158
|
+
class ___compute_target_containers_internal_spec(typing_extensions.Protocol[SUPERSELF]):
|
159
|
+
def __call__(self, /, current_replicas: int) -> int:
|
160
|
+
"""Gets internal metrics from container to autoscale up or down."""
|
161
|
+
...
|
162
|
+
|
163
|
+
async def aio(self, /, current_replicas: int) -> int:
|
164
|
+
"""Gets internal metrics from container to autoscale up or down."""
|
165
|
+
...
|
166
|
+
|
167
|
+
_compute_target_containers_internal: ___compute_target_containers_internal_spec[typing_extensions.Self]
|
168
|
+
|
169
|
+
class ___compute_target_containers_prometheus_spec(typing_extensions.Protocol[SUPERSELF]):
|
151
170
|
def __call__(self, /, current_replicas: int) -> int: ...
|
152
171
|
async def aio(self, /, current_replicas: int) -> int: ...
|
153
172
|
|
154
|
-
|
173
|
+
_compute_target_containers_prometheus: ___compute_target_containers_prometheus_spec[typing_extensions.Self]
|
155
174
|
|
156
175
|
class ___get_metrics_spec(typing_extensions.Protocol[SUPERSELF]):
|
157
176
|
def __call__(self, /, url: str) -> typing.Optional[dict[str, list[typing.Any]]]: ...
|
@@ -159,6 +178,16 @@ class FlashPrometheusAutoscaler:
|
|
159
178
|
|
160
179
|
_get_metrics: ___get_metrics_spec[typing_extensions.Self]
|
161
180
|
|
181
|
+
class ___get_container_metrics_spec(typing_extensions.Protocol[SUPERSELF]):
|
182
|
+
def __call__(
|
183
|
+
self, /, container_id: str
|
184
|
+
) -> typing.Optional[modal_proto.api_pb2.TaskGetAutoscalingMetricsResponse]: ...
|
185
|
+
async def aio(
|
186
|
+
self, /, container_id: str
|
187
|
+
) -> typing.Optional[modal_proto.api_pb2.TaskGetAutoscalingMetricsResponse]: ...
|
188
|
+
|
189
|
+
_get_container_metrics: ___get_container_metrics_spec[typing_extensions.Self]
|
190
|
+
|
162
191
|
class ___get_all_containers_spec(typing_extensions.Protocol[SUPERSELF]):
|
163
192
|
def __call__(self, /): ...
|
164
193
|
async def aio(self, /): ...
|
modal/functions.pyi
CHANGED
@@ -445,7 +445,7 @@ class Function(
|
|
445
445
|
|
446
446
|
_call_generator: ___call_generator_spec[typing_extensions.Self]
|
447
447
|
|
448
|
-
class __remote_spec(typing_extensions.Protocol[
|
448
|
+
class __remote_spec(typing_extensions.Protocol[P_INNER, ReturnType_INNER, SUPERSELF]):
|
449
449
|
def __call__(self, /, *args: P_INNER.args, **kwargs: P_INNER.kwargs) -> ReturnType_INNER:
|
450
450
|
"""Calls the function remotely, executing it with the given arguments and returning the execution's result."""
|
451
451
|
...
|
@@ -454,7 +454,7 @@ class Function(
|
|
454
454
|
"""Calls the function remotely, executing it with the given arguments and returning the execution's result."""
|
455
455
|
...
|
456
456
|
|
457
|
-
remote: __remote_spec[modal._functions.
|
457
|
+
remote: __remote_spec[modal._functions.P, modal._functions.ReturnType, typing_extensions.Self]
|
458
458
|
|
459
459
|
class __remote_gen_spec(typing_extensions.Protocol[SUPERSELF]):
|
460
460
|
def __call__(self, /, *args, **kwargs) -> typing.Generator[typing.Any, None, None]:
|
@@ -481,7 +481,7 @@ class Function(
|
|
481
481
|
"""
|
482
482
|
...
|
483
483
|
|
484
|
-
class ___experimental_spawn_spec(typing_extensions.Protocol[
|
484
|
+
class ___experimental_spawn_spec(typing_extensions.Protocol[P_INNER, ReturnType_INNER, SUPERSELF]):
|
485
485
|
def __call__(self, /, *args: P_INNER.args, **kwargs: P_INNER.kwargs) -> FunctionCall[ReturnType_INNER]:
|
486
486
|
"""[Experimental] Calls the function with the given arguments, without waiting for the results.
|
487
487
|
|
@@ -505,7 +505,7 @@ class Function(
|
|
505
505
|
...
|
506
506
|
|
507
507
|
_experimental_spawn: ___experimental_spawn_spec[
|
508
|
-
modal._functions.
|
508
|
+
modal._functions.P, modal._functions.ReturnType, typing_extensions.Self
|
509
509
|
]
|
510
510
|
|
511
511
|
class ___spawn_map_inner_spec(typing_extensions.Protocol[P_INNER, SUPERSELF]):
|
@@ -514,7 +514,7 @@ class Function(
|
|
514
514
|
|
515
515
|
_spawn_map_inner: ___spawn_map_inner_spec[modal._functions.P, typing_extensions.Self]
|
516
516
|
|
517
|
-
class __spawn_spec(typing_extensions.Protocol[
|
517
|
+
class __spawn_spec(typing_extensions.Protocol[P_INNER, ReturnType_INNER, SUPERSELF]):
|
518
518
|
def __call__(self, /, *args: P_INNER.args, **kwargs: P_INNER.kwargs) -> FunctionCall[ReturnType_INNER]:
|
519
519
|
"""Calls the function with the given arguments, without waiting for the results.
|
520
520
|
|
@@ -535,7 +535,7 @@ class Function(
|
|
535
535
|
"""
|
536
536
|
...
|
537
537
|
|
538
|
-
spawn: __spawn_spec[modal._functions.
|
538
|
+
spawn: __spawn_spec[modal._functions.P, modal._functions.ReturnType, typing_extensions.Self]
|
539
539
|
|
540
540
|
def get_raw_f(self) -> collections.abc.Callable[..., typing.Any]:
|
541
541
|
"""Return the inner Python object wrapped by this Modal Function."""
|
@@ -22,7 +22,7 @@ modal/app.py,sha256=F4baVULljFq0CwC_7U-EKNRNx7CYeWBKudjjYUuWc4U,48416
|
|
22
22
|
modal/app.pyi,sha256=AbXJCBkyt2rI_-M3VbTBYb32at0P6iRZuoC87xY_JrQ,43591
|
23
23
|
modal/call_graph.py,sha256=1g2DGcMIJvRy-xKicuf63IVE98gJSnQsr8R_NVMptNc,2581
|
24
24
|
modal/client.py,sha256=kyAIVB3Ay-XKJizQ_1ufUFB__EagV0MLmHJpyYyJ7J0,18636
|
25
|
-
modal/client.pyi,sha256=
|
25
|
+
modal/client.pyi,sha256=SRlBIsiqUovZcyjv4yo8u61GSDr3_yfG05Wi4kdKR94,15831
|
26
26
|
modal/cloud_bucket_mount.py,sha256=YOe9nnvSr4ZbeCn587d7_VhE9IioZYRvF9VYQTQux08,5914
|
27
27
|
modal/cloud_bucket_mount.pyi,sha256=-qSfYAQvIoO_l2wsCCGTG5ZUwQieNKXdAO00yP1-LYU,7394
|
28
28
|
modal/cls.py,sha256=pTEO7pHjlO7taMbIqs4oI9ZZgKDJpVKyGkO5ZT0w6tQ,40934
|
@@ -39,7 +39,7 @@ modal/file_io.py,sha256=OSKr77TujcXGJW1iikzYiHckLSmv07QBgBHcxxYEkoI,21456
|
|
39
39
|
modal/file_io.pyi,sha256=xtO6Glf_BFwDE7QiQQo24QqcMf_Vv-iz7WojcGVlLBU,15932
|
40
40
|
modal/file_pattern_matcher.py,sha256=A_Kdkej6q7YQyhM_2-BvpFmPqJ0oHb54B6yf9VqvPVE,8116
|
41
41
|
modal/functions.py,sha256=kcNHvqeGBxPI7Cgd57NIBBghkfbeFJzXO44WW0jSmao,325
|
42
|
-
modal/functions.pyi,sha256=
|
42
|
+
modal/functions.pyi,sha256=F3Dll-9c3d8c8NuJuEYICUEB-sa67uWU75L111npwbA,39404
|
43
43
|
modal/gpu.py,sha256=Fe5ORvVPDIstSq1xjmM6OoNgLYFWvogP9r5BgmD3hYg,6769
|
44
44
|
modal/image.py,sha256=nXN9k_6gApHFy8-Bk_XT2Zu3jsDsGVrC3QcuiDC4yRY,103543
|
45
45
|
modal/image.pyi,sha256=vKdb5PpYM8wcpq9PQegeVMjrPLzAipuV4q994NZiL84,69325
|
@@ -150,10 +150,10 @@ modal/cli/programs/run_jupyter.py,sha256=44Lpvqk2l3hH-uOkmAOzw60NEsfB5uaRDWDKVsh
|
|
150
150
|
modal/cli/programs/run_marimo.py,sha256=HyZ2za0NYqg31-mGxFQxUIAJ3Q-jRaMocEwWwI5-cdw,2887
|
151
151
|
modal/cli/programs/vscode.py,sha256=KbTAaIXyQBVCDXxXjmBHmKpgXkUw0q4R4KkJvUjCYgk,3380
|
152
152
|
modal/experimental/__init__.py,sha256=aWDb1VO9s4D-5Ktw_kYQSqoCp1W8S7lhns-5l7S1l-8,15102
|
153
|
-
modal/experimental/flash.py,sha256=
|
154
|
-
modal/experimental/flash.pyi,sha256=
|
153
|
+
modal/experimental/flash.py,sha256=das7DK9m3PVyCEF9WhxXYDpqyB8x5yp6Z17TX-4b36M,24163
|
154
|
+
modal/experimental/flash.pyi,sha256=bCH9OgSzwJPo_s5-ObqlNBQUziZ7p5m5_zh0aQTA7sg,12216
|
155
155
|
modal/experimental/ipython.py,sha256=TrCfmol9LGsRZMeDoeMPx3Hv3BFqQhYnmD_iH0pqdhk,2904
|
156
|
-
modal-1.1.4.
|
156
|
+
modal-1.1.4.dev29.dist-info/licenses/LICENSE,sha256=psuoW8kuDP96RQsdhzwOqi6fyWv0ct8CR6Jr7He_P_k,10173
|
157
157
|
modal_docs/__init__.py,sha256=svYKtV8HDwDCN86zbdWqyq5T8sMdGDj0PVlzc2tIxDM,28
|
158
158
|
modal_docs/gen_cli_docs.py,sha256=c1yfBS_x--gL5bs0N4ihMwqwX8l3IBWSkBAKNNIi6bQ,3801
|
159
159
|
modal_docs/gen_reference_docs.py,sha256=d_CQUGQ0rfw28u75I2mov9AlS773z9rG40-yq5o7g2U,6359
|
@@ -176,10 +176,10 @@ modal_proto/options_pb2.pyi,sha256=l7DBrbLO7q3Ir-XDkWsajm0d0TQqqrfuX54i4BMpdQg,1
|
|
176
176
|
modal_proto/options_pb2_grpc.py,sha256=1oboBPFxaTEXt9Aw7EAj8gXHDCNMhZD2VXqocC9l_gk,159
|
177
177
|
modal_proto/options_pb2_grpc.pyi,sha256=CImmhxHsYnF09iENPoe8S4J-n93jtgUYD2JPAc0yJSI,247
|
178
178
|
modal_proto/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
179
|
-
modal_version/__init__.py,sha256=
|
179
|
+
modal_version/__init__.py,sha256=LAxYkzjk8ZOPClrbopC-Fs8NzznaPQvV8djTy18TxBg,121
|
180
180
|
modal_version/__main__.py,sha256=2FO0yYQQwDTh6udt1h-cBnGd1c4ZyHnHSI4BksxzVac,105
|
181
|
-
modal-1.1.4.
|
182
|
-
modal-1.1.4.
|
183
|
-
modal-1.1.4.
|
184
|
-
modal-1.1.4.
|
185
|
-
modal-1.1.4.
|
181
|
+
modal-1.1.4.dev29.dist-info/METADATA,sha256=IWkqJotciWA6MzyGs4gWFCOth9rm2l8ISjF1nK6xjTE,2460
|
182
|
+
modal-1.1.4.dev29.dist-info/WHEEL,sha256=1tXe9gY0PYatrMPMDd6jXqjfpz_B-Wqm32CPfRC58XU,91
|
183
|
+
modal-1.1.4.dev29.dist-info/entry_points.txt,sha256=An-wYgeEUnm6xzrAP9_NTSTSciYvvEWsMZILtYrvpAI,46
|
184
|
+
modal-1.1.4.dev29.dist-info/top_level.txt,sha256=4BWzoKYREKUZ5iyPzZpjqx4G8uB5TWxXPDwibLcVa7k,43
|
185
|
+
modal-1.1.4.dev29.dist-info/RECORD,,
|
modal_version/__init__.py
CHANGED
File without changes
|
File without changes
|
File without changes
|
File without changes
|