PyPI - modal - Versions diffs - 1.1.4.dev27__py3-none-any.whl → 1.1.4.dev28__py3-none-any.whl - Mend

modal 1.1.4.dev27py3-none-any.whl → 1.1.4.dev28py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

modal/client.pyi CHANGED Viewed

@@ -33,7 +33,7 @@ class _Client:
         server_url: str,
         client_type: int,
         credentials: typing.Optional[tuple[str, str]],
-        version: str = "1.1.4.dev27",
+        version: str = "1.1.4.dev28",
     ):
         """mdmd:hidden
         The Modal client object is not intended to be instantiated directly by users.
@@ -164,7 +164,7 @@ class Client:
         server_url: str,
         client_type: int,
         credentials: typing.Optional[tuple[str, str]],
-        version: str = "1.1.4.dev27",
+        version: str = "1.1.4.dev28",
     ):
         """mdmd:hidden
         The Modal client object is not intended to be instantiated directly by users.

modal/experimental/flash.py CHANGED Viewed

@@ -200,7 +200,10 @@ class _FlashPrometheusAutoscaler:
                     if timestamp >= autoscaling_time - self._max_window_seconds
                 ]
-                current_target_containers = await self._compute_target_containers(current_replicas)
+                if self.metrics_endpoint == "internal":
+                    current_target_containers = await self._compute_target_containers_internal(current_replicas)
+                else:
+                    current_target_containers = await self._compute_target_containers_prometheus(current_replicas)
                 autoscaling_decisions.append((autoscaling_time, current_target_containers))
                 actual_target_containers = self._make_scaling_decision(
@@ -236,7 +239,53 @@ class _FlashPrometheusAutoscaler:
                 logger.error(traceback.format_exc())
                 await asyncio.sleep(self.autoscaling_interval_seconds)
-    async def _compute_target_containers(self, current_replicas: int) -> int:
+    async def _compute_target_containers_internal(self, current_replicas: int) -> int:
+        """
+        Gets internal metrics from container to autoscale up or down.
+        """
+        containers = await self._get_all_containers()
+        if len(containers) > current_replicas:
+            logger.info(
+                f"[Modal Flash] Current replicas {current_replicas} is less than the number of containers "
+                f"{len(containers)}. Setting current_replicas = num_containers."
+            )
+            current_replicas = len(containers)
+        if current_replicas == 0:
+            return 1
+        internal_metrics_list = []
+        for container in containers:
+            internal_metric = await self._get_container_metrics(container.id)
+            if internal_metric is None:
+                continue
+            internal_metrics_list.append(getattr(internal_metric.metrics, self.target_metric))
+        if not internal_metrics_list:
+            return current_replicas
+        avg_internal_metric = sum(internal_metrics_list) / len(internal_metrics_list)
+        scale_factor = avg_internal_metric / self.target_metric_value
+        desired_replicas = current_replicas
+        if scale_factor > 1 + self.scale_up_tolerance:
+            desired_replicas = math.ceil(current_replicas * scale_factor)
+        elif scale_factor < 1 - self.scale_down_tolerance:
+            desired_replicas = math.ceil(current_replicas * scale_factor)
+        logger.warning(
+            f"[Modal Flash] Current replicas: {current_replicas}, "
+            f"avg internal metric `{self.target_metric}`: {avg_internal_metric}, "
+            f"target internal metric value: {self.target_metric_value}, "
+            f"scale factor: {scale_factor}, "
+            f"desired replicas: {desired_replicas}"
+        )
+        desired_replicas = max(1, desired_replicas)
+        return desired_replicas
+    async def _compute_target_containers_prometheus(self, current_replicas: int) -> int:
         # current_replicas is the number of live containers + cold starting containers (not yet live)
         # containers is the number of live containers that are registered in flash dns
         containers = await self._get_all_containers()
@@ -253,6 +302,7 @@ class _FlashPrometheusAutoscaler:
         target_metric = self.target_metric
         target_metric_value = float(self.target_metric_value)
+        # Gets metrics from prometheus
         sum_metric = 0
         containers_with_metrics = 0
         container_metrics_list = await asyncio.gather(
@@ -341,6 +391,15 @@ class _FlashPrometheusAutoscaler:
         return metrics
+    async def _get_container_metrics(self, container_id: str) -> Optional[api_pb2.TaskGetAutoscalingMetricsResponse]:
+        req = api_pb2.TaskGetAutoscalingMetricsRequest(task_id=container_id)
+        try:
+            resp = await retry_transient_errors(self.client.stub.TaskGetAutoscalingMetrics, req)
+            return resp
+        except Exception as e:
+            logger.warning(f"[Modal Flash] Error getting metrics for container {container_id}: {e}")
+            return None
     async def _get_all_containers(self):
         req = api_pb2.FlashContainerListRequest(function_id=self.fn.object_id)
         resp = await retry_transient_errors(self.client.stub.FlashContainerList, req)
@@ -419,10 +478,14 @@ async def flash_prometheus_autoscaler(
     app_name: str,
     cls_name: str,
     # Endpoint to fetch metrics from. Must be in Prometheus format. Example: "/metrics"
+    # If metrics_endpoint is "internal", we will use containers' internal metrics to autoscale instead.
     metrics_endpoint: str,
     # Target metric to autoscale on. Example: "vllm:num_requests_running"
+    # If metrics_endpoint is "internal", target_metrics options are: [cpu_usage_percent, memory_usage_percent]
     target_metric: str,
     # Target metric value. Example: 25
+    # If metrics_endpoint is "internal", target_metric_value is a percentage value between 0.1 and 1.0 (inclusive),
+    # indicating container's usage of that metric.
     target_metric_value: float,
     min_containers: Optional[int] = None,
     max_containers: Optional[int] = None,

modal/experimental/flash.pyi CHANGED Viewed

@@ -1,4 +1,5 @@
 import modal.client
+import modal_proto.api_pb2
 import typing
 import typing_extensions
@@ -85,8 +86,15 @@ class _FlashPrometheusAutoscaler:
     async def start(self): ...
     async def _run_autoscaler_loop(self): ...
-    async def _compute_target_containers(self, current_replicas: int) -> int: ...
+    async def _compute_target_containers_internal(self, current_replicas: int) -> int:
+        """Gets internal metrics from container to autoscale up or down."""
+        ...
+    async def _compute_target_containers_prometheus(self, current_replicas: int) -> int: ...
     async def _get_metrics(self, url: str) -> typing.Optional[dict[str, list[typing.Any]]]: ...
+    async def _get_container_metrics(
+        self, container_id: str
+    ) -> typing.Optional[modal_proto.api_pb2.TaskGetAutoscalingMetricsResponse]: ...
     async def _get_all_containers(self): ...
     def _make_scaling_decision(
         self,
@@ -147,11 +155,22 @@ class FlashPrometheusAutoscaler:
     _run_autoscaler_loop: ___run_autoscaler_loop_spec[typing_extensions.Self]
-    class ___compute_target_containers_spec(typing_extensions.Protocol[SUPERSELF]):
+    class ___compute_target_containers_internal_spec(typing_extensions.Protocol[SUPERSELF]):
+        def __call__(self, /, current_replicas: int) -> int:
+            """Gets internal metrics from container to autoscale up or down."""
+            ...
+        async def aio(self, /, current_replicas: int) -> int:
+            """Gets internal metrics from container to autoscale up or down."""
+            ...
+    _compute_target_containers_internal: ___compute_target_containers_internal_spec[typing_extensions.Self]
+    class ___compute_target_containers_prometheus_spec(typing_extensions.Protocol[SUPERSELF]):
         def __call__(self, /, current_replicas: int) -> int: ...
         async def aio(self, /, current_replicas: int) -> int: ...
-    _compute_target_containers: ___compute_target_containers_spec[typing_extensions.Self]
+    _compute_target_containers_prometheus: ___compute_target_containers_prometheus_spec[typing_extensions.Self]
     class ___get_metrics_spec(typing_extensions.Protocol[SUPERSELF]):
         def __call__(self, /, url: str) -> typing.Optional[dict[str, list[typing.Any]]]: ...
@@ -159,6 +178,16 @@ class FlashPrometheusAutoscaler:
     _get_metrics: ___get_metrics_spec[typing_extensions.Self]
+    class ___get_container_metrics_spec(typing_extensions.Protocol[SUPERSELF]):
+        def __call__(
+            self, /, container_id: str
+        ) -> typing.Optional[modal_proto.api_pb2.TaskGetAutoscalingMetricsResponse]: ...
+        async def aio(
+            self, /, container_id: str
+        ) -> typing.Optional[modal_proto.api_pb2.TaskGetAutoscalingMetricsResponse]: ...
+    _get_container_metrics: ___get_container_metrics_spec[typing_extensions.Self]
     class ___get_all_containers_spec(typing_extensions.Protocol[SUPERSELF]):
         def __call__(self, /): ...
         async def aio(self, /): ...

{modal-1.1.4.dev27.dist-info → modal-1.1.4.dev28.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: modal
-Version: 1.1.4.dev27
+Version: 1.1.4.dev28
 Summary: Python client library for Modal
 Author-email: Modal Labs <support@modal.com>
 License: Apache-2.0

{modal-1.1.4.dev27.dist-info → modal-1.1.4.dev28.dist-info}/RECORD RENAMED Viewed

@@ -22,7 +22,7 @@ modal/app.py,sha256=F4baVULljFq0CwC_7U-EKNRNx7CYeWBKudjjYUuWc4U,48416
 modal/app.pyi,sha256=AbXJCBkyt2rI_-M3VbTBYb32at0P6iRZuoC87xY_JrQ,43591
 modal/call_graph.py,sha256=1g2DGcMIJvRy-xKicuf63IVE98gJSnQsr8R_NVMptNc,2581
 modal/client.py,sha256=kyAIVB3Ay-XKJizQ_1ufUFB__EagV0MLmHJpyYyJ7J0,18636
-modal/client.pyi,sha256=qbmpuyFE5cuDiOMXHpObxTB0qjs0RjH0jOyD-iuOB9I,15831
+modal/client.pyi,sha256=uDYc9D-M2scJv1Ur0rIAugWT8G0EVI6vbcL3XwlY5g8,15831
 modal/cloud_bucket_mount.py,sha256=YOe9nnvSr4ZbeCn587d7_VhE9IioZYRvF9VYQTQux08,5914
 modal/cloud_bucket_mount.pyi,sha256=-qSfYAQvIoO_l2wsCCGTG5ZUwQieNKXdAO00yP1-LYU,7394
 modal/cls.py,sha256=pTEO7pHjlO7taMbIqs4oI9ZZgKDJpVKyGkO5ZT0w6tQ,40934
@@ -150,10 +150,10 @@ modal/cli/programs/run_jupyter.py,sha256=44Lpvqk2l3hH-uOkmAOzw60NEsfB5uaRDWDKVsh
 modal/cli/programs/run_marimo.py,sha256=HyZ2za0NYqg31-mGxFQxUIAJ3Q-jRaMocEwWwI5-cdw,2887
 modal/cli/programs/vscode.py,sha256=KbTAaIXyQBVCDXxXjmBHmKpgXkUw0q4R4KkJvUjCYgk,3380
 modal/experimental/__init__.py,sha256=aWDb1VO9s4D-5Ktw_kYQSqoCp1W8S7lhns-5l7S1l-8,15102
-modal/experimental/flash.py,sha256=gydlThBSPgS8HA53Lwi0gUAFVtpdXu20IYAQXJSoTP0,21092
-modal/experimental/flash.pyi,sha256=A8_qJGtGoXEzKDdHbvhmCw7oqfneFEvJQK3ZdTOvUdU,10830
+modal/experimental/flash.py,sha256=Sxzx6WXOJSYAwKn3ZMyPWSroudQTPeHVpCEWbSPKD3A,24124
+modal/experimental/flash.pyi,sha256=bCH9OgSzwJPo_s5-ObqlNBQUziZ7p5m5_zh0aQTA7sg,12216
 modal/experimental/ipython.py,sha256=TrCfmol9LGsRZMeDoeMPx3Hv3BFqQhYnmD_iH0pqdhk,2904
-modal-1.1.4.dev27.dist-info/licenses/LICENSE,sha256=psuoW8kuDP96RQsdhzwOqi6fyWv0ct8CR6Jr7He_P_k,10173
+modal-1.1.4.dev28.dist-info/licenses/LICENSE,sha256=psuoW8kuDP96RQsdhzwOqi6fyWv0ct8CR6Jr7He_P_k,10173
 modal_docs/__init__.py,sha256=svYKtV8HDwDCN86zbdWqyq5T8sMdGDj0PVlzc2tIxDM,28
 modal_docs/gen_cli_docs.py,sha256=c1yfBS_x--gL5bs0N4ihMwqwX8l3IBWSkBAKNNIi6bQ,3801
 modal_docs/gen_reference_docs.py,sha256=d_CQUGQ0rfw28u75I2mov9AlS773z9rG40-yq5o7g2U,6359
@@ -176,10 +176,10 @@ modal_proto/options_pb2.pyi,sha256=l7DBrbLO7q3Ir-XDkWsajm0d0TQqqrfuX54i4BMpdQg,1
 modal_proto/options_pb2_grpc.py,sha256=1oboBPFxaTEXt9Aw7EAj8gXHDCNMhZD2VXqocC9l_gk,159
 modal_proto/options_pb2_grpc.pyi,sha256=CImmhxHsYnF09iENPoe8S4J-n93jtgUYD2JPAc0yJSI,247
 modal_proto/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-modal_version/__init__.py,sha256=M5SRe08tZk1Qjv_4ZfmVLi4h8aFR7ppq4Q0D9Kx-O6g,121
+modal_version/__init__.py,sha256=U3zYvussyhm4LpBPEvSAIV24lwpJWrSVmQVc7rel01g,121
 modal_version/__main__.py,sha256=2FO0yYQQwDTh6udt1h-cBnGd1c4ZyHnHSI4BksxzVac,105
-modal-1.1.4.dev27.dist-info/METADATA,sha256=rfvm0aAYUjUIoGtycMEvgCniQ98n2G_HjSyuHmVdHZw,2460
-modal-1.1.4.dev27.dist-info/WHEEL,sha256=1tXe9gY0PYatrMPMDd6jXqjfpz_B-Wqm32CPfRC58XU,91
-modal-1.1.4.dev27.dist-info/entry_points.txt,sha256=An-wYgeEUnm6xzrAP9_NTSTSciYvvEWsMZILtYrvpAI,46
-modal-1.1.4.dev27.dist-info/top_level.txt,sha256=4BWzoKYREKUZ5iyPzZpjqx4G8uB5TWxXPDwibLcVa7k,43
-modal-1.1.4.dev27.dist-info/RECORD,,
+modal-1.1.4.dev28.dist-info/METADATA,sha256=l4cMEFeTe6dLilwhAxwXXJArJwHRgcLudcpc6Ck8-n0,2460
+modal-1.1.4.dev28.dist-info/WHEEL,sha256=1tXe9gY0PYatrMPMDd6jXqjfpz_B-Wqm32CPfRC58XU,91
+modal-1.1.4.dev28.dist-info/entry_points.txt,sha256=An-wYgeEUnm6xzrAP9_NTSTSciYvvEWsMZILtYrvpAI,46
+modal-1.1.4.dev28.dist-info/top_level.txt,sha256=4BWzoKYREKUZ5iyPzZpjqx4G8uB5TWxXPDwibLcVa7k,43
+modal-1.1.4.dev28.dist-info/RECORD,,

modal_version/__init__.py CHANGED Viewed

@@ -1,4 +1,4 @@
 # Copyright Modal Labs 2025
 """Supplies the current version of the modal client library."""
-__version__ = "1.1.4.dev27"
+__version__ = "1.1.4.dev28"

{modal-1.1.4.dev27.dist-info → modal-1.1.4.dev28.dist-info}/WHEEL RENAMED Viewed

File without changes

{modal-1.1.4.dev27.dist-info → modal-1.1.4.dev28.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{modal-1.1.4.dev27.dist-info → modal-1.1.4.dev28.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{modal-1.1.4.dev27.dist-info → modal-1.1.4.dev28.dist-info}/top_level.txt RENAMED Viewed

File without changes

modal 1.1.4.dev27__py3-none-any.whl → 1.1.4.dev28__py3-none-any.whl

modal 1.1.4.dev27py3-none-any.whl → 1.1.4.dev28py3-none-any.whl