modal 1.1.4.dev27__py3-none-any.whl → 1.1.4.dev29__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
modal/client.pyi CHANGED
@@ -33,7 +33,7 @@ class _Client:
33
33
  server_url: str,
34
34
  client_type: int,
35
35
  credentials: typing.Optional[tuple[str, str]],
36
- version: str = "1.1.4.dev27",
36
+ version: str = "1.1.4.dev29",
37
37
  ):
38
38
  """mdmd:hidden
39
39
  The Modal client object is not intended to be instantiated directly by users.
@@ -164,7 +164,7 @@ class Client:
164
164
  server_url: str,
165
165
  client_type: int,
166
166
  credentials: typing.Optional[tuple[str, str]],
167
- version: str = "1.1.4.dev27",
167
+ version: str = "1.1.4.dev29",
168
168
  ):
169
169
  """mdmd:hidden
170
170
  The Modal client object is not intended to be instantiated directly by users.
@@ -200,7 +200,10 @@ class _FlashPrometheusAutoscaler:
200
200
  if timestamp >= autoscaling_time - self._max_window_seconds
201
201
  ]
202
202
 
203
- current_target_containers = await self._compute_target_containers(current_replicas)
203
+ if self.metrics_endpoint == "internal":
204
+ current_target_containers = await self._compute_target_containers_internal(current_replicas)
205
+ else:
206
+ current_target_containers = await self._compute_target_containers_prometheus(current_replicas)
204
207
  autoscaling_decisions.append((autoscaling_time, current_target_containers))
205
208
 
206
209
  actual_target_containers = self._make_scaling_decision(
@@ -236,7 +239,53 @@ class _FlashPrometheusAutoscaler:
236
239
  logger.error(traceback.format_exc())
237
240
  await asyncio.sleep(self.autoscaling_interval_seconds)
238
241
 
239
- async def _compute_target_containers(self, current_replicas: int) -> int:
242
+ async def _compute_target_containers_internal(self, current_replicas: int) -> int:
243
+ """
244
+ Gets internal metrics from container to autoscale up or down.
245
+ """
246
+ containers = await self._get_all_containers()
247
+ if len(containers) > current_replicas:
248
+ logger.info(
249
+ f"[Modal Flash] Current replicas {current_replicas} is less than the number of containers "
250
+ f"{len(containers)}. Setting current_replicas = num_containers."
251
+ )
252
+ current_replicas = len(containers)
253
+
254
+ if current_replicas == 0:
255
+ return 1
256
+
257
+ internal_metrics_list = []
258
+ for container in containers:
259
+ internal_metric = await self._get_container_metrics(container.task_id)
260
+ if internal_metric is None:
261
+ continue
262
+ internal_metrics_list.append(getattr(internal_metric.metrics, self.target_metric))
263
+
264
+ if not internal_metrics_list:
265
+ return current_replicas
266
+
267
+ avg_internal_metric = sum(internal_metrics_list) / len(internal_metrics_list)
268
+
269
+ scale_factor = avg_internal_metric / self.target_metric_value
270
+
271
+ desired_replicas = current_replicas
272
+ if scale_factor > 1 + self.scale_up_tolerance:
273
+ desired_replicas = math.ceil(current_replicas * scale_factor)
274
+ elif scale_factor < 1 - self.scale_down_tolerance:
275
+ desired_replicas = math.ceil(current_replicas * scale_factor)
276
+
277
+ logger.warning(
278
+ f"[Modal Flash] Current replicas: {current_replicas}, "
279
+ f"avg internal metric `{self.target_metric}`: {avg_internal_metric}, "
280
+ f"target internal metric value: {self.target_metric_value}, "
281
+ f"scale factor: {scale_factor}, "
282
+ f"desired replicas: {desired_replicas}"
283
+ )
284
+
285
+ desired_replicas = max(1, min(desired_replicas, self.max_containers or 1000))
286
+ return desired_replicas
287
+
288
+ async def _compute_target_containers_prometheus(self, current_replicas: int) -> int:
240
289
  # current_replicas is the number of live containers + cold starting containers (not yet live)
241
290
  # containers is the number of live containers that are registered in flash dns
242
291
  containers = await self._get_all_containers()
@@ -253,6 +302,7 @@ class _FlashPrometheusAutoscaler:
253
302
  target_metric = self.target_metric
254
303
  target_metric_value = float(self.target_metric_value)
255
304
 
305
+ # Gets metrics from prometheus
256
306
  sum_metric = 0
257
307
  containers_with_metrics = 0
258
308
  container_metrics_list = await asyncio.gather(
@@ -341,6 +391,15 @@ class _FlashPrometheusAutoscaler:
341
391
 
342
392
  return metrics
343
393
 
394
+ async def _get_container_metrics(self, container_id: str) -> Optional[api_pb2.TaskGetAutoscalingMetricsResponse]:
395
+ req = api_pb2.TaskGetAutoscalingMetricsRequest(task_id=container_id)
396
+ try:
397
+ resp = await retry_transient_errors(self.client.stub.TaskGetAutoscalingMetrics, req)
398
+ return resp
399
+ except Exception as e:
400
+ logger.warning(f"[Modal Flash] Error getting metrics for container {container_id}: {e}")
401
+ return None
402
+
344
403
  async def _get_all_containers(self):
345
404
  req = api_pb2.FlashContainerListRequest(function_id=self.fn.object_id)
346
405
  resp = await retry_transient_errors(self.client.stub.FlashContainerList, req)
@@ -419,10 +478,14 @@ async def flash_prometheus_autoscaler(
419
478
  app_name: str,
420
479
  cls_name: str,
421
480
  # Endpoint to fetch metrics from. Must be in Prometheus format. Example: "/metrics"
481
+ # If metrics_endpoint is "internal", we will use containers' internal metrics to autoscale instead.
422
482
  metrics_endpoint: str,
423
483
  # Target metric to autoscale on. Example: "vllm:num_requests_running"
484
+ # If metrics_endpoint is "internal", target_metrics options are: [cpu_usage_percent, memory_usage_percent]
424
485
  target_metric: str,
425
486
  # Target metric value. Example: 25
487
+ # If metrics_endpoint is "internal", target_metric_value is a percentage value between 0.1 and 1.0 (inclusive),
488
+ # indicating container's usage of that metric.
426
489
  target_metric_value: float,
427
490
  min_containers: Optional[int] = None,
428
491
  max_containers: Optional[int] = None,
@@ -1,4 +1,5 @@
1
1
  import modal.client
2
+ import modal_proto.api_pb2
2
3
  import typing
3
4
  import typing_extensions
4
5
 
@@ -85,8 +86,15 @@ class _FlashPrometheusAutoscaler:
85
86
 
86
87
  async def start(self): ...
87
88
  async def _run_autoscaler_loop(self): ...
88
- async def _compute_target_containers(self, current_replicas: int) -> int: ...
89
+ async def _compute_target_containers_internal(self, current_replicas: int) -> int:
90
+ """Gets internal metrics from container to autoscale up or down."""
91
+ ...
92
+
93
+ async def _compute_target_containers_prometheus(self, current_replicas: int) -> int: ...
89
94
  async def _get_metrics(self, url: str) -> typing.Optional[dict[str, list[typing.Any]]]: ...
95
+ async def _get_container_metrics(
96
+ self, container_id: str
97
+ ) -> typing.Optional[modal_proto.api_pb2.TaskGetAutoscalingMetricsResponse]: ...
90
98
  async def _get_all_containers(self): ...
91
99
  def _make_scaling_decision(
92
100
  self,
@@ -147,11 +155,22 @@ class FlashPrometheusAutoscaler:
147
155
 
148
156
  _run_autoscaler_loop: ___run_autoscaler_loop_spec[typing_extensions.Self]
149
157
 
150
- class ___compute_target_containers_spec(typing_extensions.Protocol[SUPERSELF]):
158
+ class ___compute_target_containers_internal_spec(typing_extensions.Protocol[SUPERSELF]):
159
+ def __call__(self, /, current_replicas: int) -> int:
160
+ """Gets internal metrics from container to autoscale up or down."""
161
+ ...
162
+
163
+ async def aio(self, /, current_replicas: int) -> int:
164
+ """Gets internal metrics from container to autoscale up or down."""
165
+ ...
166
+
167
+ _compute_target_containers_internal: ___compute_target_containers_internal_spec[typing_extensions.Self]
168
+
169
+ class ___compute_target_containers_prometheus_spec(typing_extensions.Protocol[SUPERSELF]):
151
170
  def __call__(self, /, current_replicas: int) -> int: ...
152
171
  async def aio(self, /, current_replicas: int) -> int: ...
153
172
 
154
- _compute_target_containers: ___compute_target_containers_spec[typing_extensions.Self]
173
+ _compute_target_containers_prometheus: ___compute_target_containers_prometheus_spec[typing_extensions.Self]
155
174
 
156
175
  class ___get_metrics_spec(typing_extensions.Protocol[SUPERSELF]):
157
176
  def __call__(self, /, url: str) -> typing.Optional[dict[str, list[typing.Any]]]: ...
@@ -159,6 +178,16 @@ class FlashPrometheusAutoscaler:
159
178
 
160
179
  _get_metrics: ___get_metrics_spec[typing_extensions.Self]
161
180
 
181
+ class ___get_container_metrics_spec(typing_extensions.Protocol[SUPERSELF]):
182
+ def __call__(
183
+ self, /, container_id: str
184
+ ) -> typing.Optional[modal_proto.api_pb2.TaskGetAutoscalingMetricsResponse]: ...
185
+ async def aio(
186
+ self, /, container_id: str
187
+ ) -> typing.Optional[modal_proto.api_pb2.TaskGetAutoscalingMetricsResponse]: ...
188
+
189
+ _get_container_metrics: ___get_container_metrics_spec[typing_extensions.Self]
190
+
162
191
  class ___get_all_containers_spec(typing_extensions.Protocol[SUPERSELF]):
163
192
  def __call__(self, /): ...
164
193
  async def aio(self, /): ...
modal/functions.pyi CHANGED
@@ -445,7 +445,7 @@ class Function(
445
445
 
446
446
  _call_generator: ___call_generator_spec[typing_extensions.Self]
447
447
 
448
- class __remote_spec(typing_extensions.Protocol[ReturnType_INNER, P_INNER, SUPERSELF]):
448
+ class __remote_spec(typing_extensions.Protocol[P_INNER, ReturnType_INNER, SUPERSELF]):
449
449
  def __call__(self, /, *args: P_INNER.args, **kwargs: P_INNER.kwargs) -> ReturnType_INNER:
450
450
  """Calls the function remotely, executing it with the given arguments and returning the execution's result."""
451
451
  ...
@@ -454,7 +454,7 @@ class Function(
454
454
  """Calls the function remotely, executing it with the given arguments and returning the execution's result."""
455
455
  ...
456
456
 
457
- remote: __remote_spec[modal._functions.ReturnType, modal._functions.P, typing_extensions.Self]
457
+ remote: __remote_spec[modal._functions.P, modal._functions.ReturnType, typing_extensions.Self]
458
458
 
459
459
  class __remote_gen_spec(typing_extensions.Protocol[SUPERSELF]):
460
460
  def __call__(self, /, *args, **kwargs) -> typing.Generator[typing.Any, None, None]:
@@ -481,7 +481,7 @@ class Function(
481
481
  """
482
482
  ...
483
483
 
484
- class ___experimental_spawn_spec(typing_extensions.Protocol[ReturnType_INNER, P_INNER, SUPERSELF]):
484
+ class ___experimental_spawn_spec(typing_extensions.Protocol[P_INNER, ReturnType_INNER, SUPERSELF]):
485
485
  def __call__(self, /, *args: P_INNER.args, **kwargs: P_INNER.kwargs) -> FunctionCall[ReturnType_INNER]:
486
486
  """[Experimental] Calls the function with the given arguments, without waiting for the results.
487
487
 
@@ -505,7 +505,7 @@ class Function(
505
505
  ...
506
506
 
507
507
  _experimental_spawn: ___experimental_spawn_spec[
508
- modal._functions.ReturnType, modal._functions.P, typing_extensions.Self
508
+ modal._functions.P, modal._functions.ReturnType, typing_extensions.Self
509
509
  ]
510
510
 
511
511
  class ___spawn_map_inner_spec(typing_extensions.Protocol[P_INNER, SUPERSELF]):
@@ -514,7 +514,7 @@ class Function(
514
514
 
515
515
  _spawn_map_inner: ___spawn_map_inner_spec[modal._functions.P, typing_extensions.Self]
516
516
 
517
- class __spawn_spec(typing_extensions.Protocol[ReturnType_INNER, P_INNER, SUPERSELF]):
517
+ class __spawn_spec(typing_extensions.Protocol[P_INNER, ReturnType_INNER, SUPERSELF]):
518
518
  def __call__(self, /, *args: P_INNER.args, **kwargs: P_INNER.kwargs) -> FunctionCall[ReturnType_INNER]:
519
519
  """Calls the function with the given arguments, without waiting for the results.
520
520
 
@@ -535,7 +535,7 @@ class Function(
535
535
  """
536
536
  ...
537
537
 
538
- spawn: __spawn_spec[modal._functions.ReturnType, modal._functions.P, typing_extensions.Self]
538
+ spawn: __spawn_spec[modal._functions.P, modal._functions.ReturnType, typing_extensions.Self]
539
539
 
540
540
  def get_raw_f(self) -> collections.abc.Callable[..., typing.Any]:
541
541
  """Return the inner Python object wrapped by this Modal Function."""
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: modal
3
- Version: 1.1.4.dev27
3
+ Version: 1.1.4.dev29
4
4
  Summary: Python client library for Modal
5
5
  Author-email: Modal Labs <support@modal.com>
6
6
  License: Apache-2.0
@@ -22,7 +22,7 @@ modal/app.py,sha256=F4baVULljFq0CwC_7U-EKNRNx7CYeWBKudjjYUuWc4U,48416
22
22
  modal/app.pyi,sha256=AbXJCBkyt2rI_-M3VbTBYb32at0P6iRZuoC87xY_JrQ,43591
23
23
  modal/call_graph.py,sha256=1g2DGcMIJvRy-xKicuf63IVE98gJSnQsr8R_NVMptNc,2581
24
24
  modal/client.py,sha256=kyAIVB3Ay-XKJizQ_1ufUFB__EagV0MLmHJpyYyJ7J0,18636
25
- modal/client.pyi,sha256=qbmpuyFE5cuDiOMXHpObxTB0qjs0RjH0jOyD-iuOB9I,15831
25
+ modal/client.pyi,sha256=SRlBIsiqUovZcyjv4yo8u61GSDr3_yfG05Wi4kdKR94,15831
26
26
  modal/cloud_bucket_mount.py,sha256=YOe9nnvSr4ZbeCn587d7_VhE9IioZYRvF9VYQTQux08,5914
27
27
  modal/cloud_bucket_mount.pyi,sha256=-qSfYAQvIoO_l2wsCCGTG5ZUwQieNKXdAO00yP1-LYU,7394
28
28
  modal/cls.py,sha256=pTEO7pHjlO7taMbIqs4oI9ZZgKDJpVKyGkO5ZT0w6tQ,40934
@@ -39,7 +39,7 @@ modal/file_io.py,sha256=OSKr77TujcXGJW1iikzYiHckLSmv07QBgBHcxxYEkoI,21456
39
39
  modal/file_io.pyi,sha256=xtO6Glf_BFwDE7QiQQo24QqcMf_Vv-iz7WojcGVlLBU,15932
40
40
  modal/file_pattern_matcher.py,sha256=A_Kdkej6q7YQyhM_2-BvpFmPqJ0oHb54B6yf9VqvPVE,8116
41
41
  modal/functions.py,sha256=kcNHvqeGBxPI7Cgd57NIBBghkfbeFJzXO44WW0jSmao,325
42
- modal/functions.pyi,sha256=cDqhpIM5caoCR18_8krpAmPOd4QvEbm1ypYUZ6Ze9Wo,39404
42
+ modal/functions.pyi,sha256=F3Dll-9c3d8c8NuJuEYICUEB-sa67uWU75L111npwbA,39404
43
43
  modal/gpu.py,sha256=Fe5ORvVPDIstSq1xjmM6OoNgLYFWvogP9r5BgmD3hYg,6769
44
44
  modal/image.py,sha256=nXN9k_6gApHFy8-Bk_XT2Zu3jsDsGVrC3QcuiDC4yRY,103543
45
45
  modal/image.pyi,sha256=vKdb5PpYM8wcpq9PQegeVMjrPLzAipuV4q994NZiL84,69325
@@ -150,10 +150,10 @@ modal/cli/programs/run_jupyter.py,sha256=44Lpvqk2l3hH-uOkmAOzw60NEsfB5uaRDWDKVsh
150
150
  modal/cli/programs/run_marimo.py,sha256=HyZ2za0NYqg31-mGxFQxUIAJ3Q-jRaMocEwWwI5-cdw,2887
151
151
  modal/cli/programs/vscode.py,sha256=KbTAaIXyQBVCDXxXjmBHmKpgXkUw0q4R4KkJvUjCYgk,3380
152
152
  modal/experimental/__init__.py,sha256=aWDb1VO9s4D-5Ktw_kYQSqoCp1W8S7lhns-5l7S1l-8,15102
153
- modal/experimental/flash.py,sha256=gydlThBSPgS8HA53Lwi0gUAFVtpdXu20IYAQXJSoTP0,21092
154
- modal/experimental/flash.pyi,sha256=A8_qJGtGoXEzKDdHbvhmCw7oqfneFEvJQK3ZdTOvUdU,10830
153
+ modal/experimental/flash.py,sha256=das7DK9m3PVyCEF9WhxXYDpqyB8x5yp6Z17TX-4b36M,24163
154
+ modal/experimental/flash.pyi,sha256=bCH9OgSzwJPo_s5-ObqlNBQUziZ7p5m5_zh0aQTA7sg,12216
155
155
  modal/experimental/ipython.py,sha256=TrCfmol9LGsRZMeDoeMPx3Hv3BFqQhYnmD_iH0pqdhk,2904
156
- modal-1.1.4.dev27.dist-info/licenses/LICENSE,sha256=psuoW8kuDP96RQsdhzwOqi6fyWv0ct8CR6Jr7He_P_k,10173
156
+ modal-1.1.4.dev29.dist-info/licenses/LICENSE,sha256=psuoW8kuDP96RQsdhzwOqi6fyWv0ct8CR6Jr7He_P_k,10173
157
157
  modal_docs/__init__.py,sha256=svYKtV8HDwDCN86zbdWqyq5T8sMdGDj0PVlzc2tIxDM,28
158
158
  modal_docs/gen_cli_docs.py,sha256=c1yfBS_x--gL5bs0N4ihMwqwX8l3IBWSkBAKNNIi6bQ,3801
159
159
  modal_docs/gen_reference_docs.py,sha256=d_CQUGQ0rfw28u75I2mov9AlS773z9rG40-yq5o7g2U,6359
@@ -176,10 +176,10 @@ modal_proto/options_pb2.pyi,sha256=l7DBrbLO7q3Ir-XDkWsajm0d0TQqqrfuX54i4BMpdQg,1
176
176
  modal_proto/options_pb2_grpc.py,sha256=1oboBPFxaTEXt9Aw7EAj8gXHDCNMhZD2VXqocC9l_gk,159
177
177
  modal_proto/options_pb2_grpc.pyi,sha256=CImmhxHsYnF09iENPoe8S4J-n93jtgUYD2JPAc0yJSI,247
178
178
  modal_proto/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
179
- modal_version/__init__.py,sha256=M5SRe08tZk1Qjv_4ZfmVLi4h8aFR7ppq4Q0D9Kx-O6g,121
179
+ modal_version/__init__.py,sha256=LAxYkzjk8ZOPClrbopC-Fs8NzznaPQvV8djTy18TxBg,121
180
180
  modal_version/__main__.py,sha256=2FO0yYQQwDTh6udt1h-cBnGd1c4ZyHnHSI4BksxzVac,105
181
- modal-1.1.4.dev27.dist-info/METADATA,sha256=rfvm0aAYUjUIoGtycMEvgCniQ98n2G_HjSyuHmVdHZw,2460
182
- modal-1.1.4.dev27.dist-info/WHEEL,sha256=1tXe9gY0PYatrMPMDd6jXqjfpz_B-Wqm32CPfRC58XU,91
183
- modal-1.1.4.dev27.dist-info/entry_points.txt,sha256=An-wYgeEUnm6xzrAP9_NTSTSciYvvEWsMZILtYrvpAI,46
184
- modal-1.1.4.dev27.dist-info/top_level.txt,sha256=4BWzoKYREKUZ5iyPzZpjqx4G8uB5TWxXPDwibLcVa7k,43
185
- modal-1.1.4.dev27.dist-info/RECORD,,
181
+ modal-1.1.4.dev29.dist-info/METADATA,sha256=IWkqJotciWA6MzyGs4gWFCOth9rm2l8ISjF1nK6xjTE,2460
182
+ modal-1.1.4.dev29.dist-info/WHEEL,sha256=1tXe9gY0PYatrMPMDd6jXqjfpz_B-Wqm32CPfRC58XU,91
183
+ modal-1.1.4.dev29.dist-info/entry_points.txt,sha256=An-wYgeEUnm6xzrAP9_NTSTSciYvvEWsMZILtYrvpAI,46
184
+ modal-1.1.4.dev29.dist-info/top_level.txt,sha256=4BWzoKYREKUZ5iyPzZpjqx4G8uB5TWxXPDwibLcVa7k,43
185
+ modal-1.1.4.dev29.dist-info/RECORD,,
modal_version/__init__.py CHANGED
@@ -1,4 +1,4 @@
1
1
  # Copyright Modal Labs 2025
2
2
  """Supplies the current version of the modal client library."""
3
3
 
4
- __version__ = "1.1.4.dev27"
4
+ __version__ = "1.1.4.dev29"