PyPI - modal - Versions diffs - 1.1.5.dev7__py3-none-any.whl → 1.1.5.dev9__py3-none-any.whl - Mend

modal 1.1.5.dev7py3-none-any.whl → 1.1.5.dev9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

modal/client.pyi +2 -2
modal/experimental/flash.py +65 -89
modal/experimental/flash.pyi +38 -8
{modal-1.1.5.dev7.dist-info → modal-1.1.5.dev9.dist-info}/METADATA +1 -1
{modal-1.1.5.dev7.dist-info → modal-1.1.5.dev9.dist-info}/RECORD +13 -13
modal_proto/api.proto +2 -0
modal_proto/api_pb2.py +862 -862
modal_proto/api_pb2.pyi +8 -2
modal_version/__init__.py +1 -1
{modal-1.1.5.dev7.dist-info → modal-1.1.5.dev9.dist-info}/WHEEL +0 -0
{modal-1.1.5.dev7.dist-info → modal-1.1.5.dev9.dist-info}/entry_points.txt +0 -0
{modal-1.1.5.dev7.dist-info → modal-1.1.5.dev9.dist-info}/licenses/LICENSE +0 -0
{modal-1.1.5.dev7.dist-info → modal-1.1.5.dev9.dist-info}/top_level.txt +0 -0

modal/client.pyi CHANGED Viewed

@@ -33,7 +33,7 @@ class _Client:
         server_url: str,
         client_type: int,
         credentials: typing.Optional[tuple[str, str]],
-        version: str = "1.1.5.dev7",
+        version: str = "1.1.5.dev9",
     ):
         """mdmd:hidden
         The Modal client object is not intended to be instantiated directly by users.
@@ -164,7 +164,7 @@ class Client:
         server_url: str,
         client_type: int,
         credentials: typing.Optional[tuple[str, str]],
-        version: str = "1.1.5.dev7",
+        version: str = "1.1.5.dev9",
     ):
         """mdmd:hidden
         The Modal client object is not intended to be instantiated directly by users.

modal/experimental/flash.py CHANGED Viewed

@@ -282,10 +282,7 @@ class _FlashPrometheusAutoscaler:
                     if timestamp >= autoscaling_time - self._max_window_seconds
                 ]
-                if self.metrics_endpoint == "internal":
-                    current_target_containers = await self._compute_target_containers_internal(current_replicas)
-                else:
-                    current_target_containers = await self._compute_target_containers_prometheus(current_replicas)
+                current_target_containers = await self._compute_target_containers(current_replicas=current_replicas)
                 autoscaling_decisions.append((autoscaling_time, current_target_containers))
                 actual_target_containers = self._make_scaling_decision(
@@ -322,7 +319,7 @@ class _FlashPrometheusAutoscaler:
                 logger.error(traceback.format_exc())
                 await asyncio.sleep(self.autoscaling_interval_seconds)
-    async def _compute_target_containers_internal(self, current_replicas: int) -> int:
+    async def _compute_target_containers(self, current_replicas: int) -> int:
         """
         Gets internal metrics from container to autoscale up or down.
         """
@@ -337,100 +334,41 @@ class _FlashPrometheusAutoscaler:
         if current_replicas == 0:
             return 1
-        internal_metrics_list = []
-        for container in containers:
-            internal_metric = await self._get_container_metrics(container.task_id)
-            if internal_metric is None:
-                continue
-            internal_metrics_list.append(getattr(internal_metric.metrics, self.target_metric))
-        if not internal_metrics_list:
-            return current_replicas
-        sum_metric = sum(internal_metrics_list)
-        containers_with_metrics = len(internal_metrics_list)
-        # n_containers_missing_metric is the number of unhealthy containers + number of cold starting containers
-        n_containers_missing_metric = current_replicas - containers_with_metrics
-        # n_containers_unhealthy is the number of live containers that are not emitting metrics i.e. unhealthy
-        n_containers_unhealthy = len(containers) - containers_with_metrics
-        # Scale up assuming that every unhealthy container is at 2x the target metric value.
-        scale_up_target_metric_value = (sum_metric + n_containers_unhealthy * self.target_metric_value) / (
-            (containers_with_metrics + n_containers_unhealthy) or 1
-        )
-        # Scale down assuming that every container (including cold starting containers) are at the target metric value.
-        scale_down_target_metric_value = (sum_metric + n_containers_missing_metric * self.target_metric_value) / (
-            current_replicas or 1
-        )
-        scale_up_ratio = scale_up_target_metric_value / self.target_metric_value
-        scale_down_ratio = scale_down_target_metric_value / self.target_metric_value
-        desired_replicas = current_replicas
-        if scale_up_ratio > 1 + self.scale_up_tolerance:
-            desired_replicas = math.ceil(current_replicas * scale_up_ratio)
-        elif scale_down_ratio < 1 - self.scale_down_tolerance:
-            desired_replicas = math.ceil(current_replicas * scale_down_ratio)
+        # Get metrics based on autoscaler type (prometheus or internal)
+        sum_metric, n_containers_with_metrics = await self._get_scaling_info(containers)
-        logger.warning(
-            f"[Modal Flash] Current replicas: {current_replicas}, "
-            f"sum internal metric `{self.target_metric}`: {sum_metric}, "
-            f"target internal metric value: {self.target_metric_value}, "
-            f"scale up ratio: {scale_up_ratio}, "
-            f"scale down ratio: {scale_down_ratio}, "
-            f"desired replicas: {desired_replicas}"
+        desired_replicas = self._calculate_desired_replicas(
+            n_current_replicas=current_replicas,
+            sum_metric=sum_metric,
+            n_containers_with_metrics=n_containers_with_metrics,
+            n_total_containers=len(containers),
+            target_metric_value=self.target_metric_value,
         )
-        desired_replicas = max(1, min(desired_replicas, self.max_containers or 5000))
-        return desired_replicas
+        return max(1, desired_replicas)
-    async def _compute_target_containers_prometheus(self, current_replicas: int) -> int:
-        # current_replicas is the number of live containers + cold starting containers (not yet live)
-        # containers is the number of live containers that are registered in flash dns
-        containers = await self._get_all_containers()
-        if len(containers) > current_replicas:
-            logger.info(
-                f"[Modal Flash] Current replicas {current_replicas} is less than the number of containers "
-                f"{len(containers)}. Setting current_replicas = num_containers."
-            )
-            current_replicas = len(containers)
-        if current_replicas == 0:
-            return 1
-        target_metric = self.target_metric
-        target_metric_value = float(self.target_metric_value)
-        # Gets metrics from prometheus
-        sum_metric = 0
-        containers_with_metrics = 0
+    def _calculate_desired_replicas(
+        self,
+        n_current_replicas: int,
+        sum_metric: float,
+        n_containers_with_metrics: int,
+        n_total_containers: int,
+        target_metric_value: float,
+    ) -> int:
+        """
+        Calculate the desired number of replicas to autoscale to.
+        """
         buffer_containers = self.buffer_containers or 0
-        container_metrics_list = await asyncio.gather(
-            *[
-                self._get_metrics(f"https://{container.host}:{container.port}/{self.metrics_endpoint}")
-                for container in containers
-            ]
-        )
-        for container_metrics in container_metrics_list:
-            if (
-                container_metrics is None
-                or target_metric not in container_metrics
-                or len(container_metrics[target_metric]) == 0
-            ):
-                continue
-            sum_metric += container_metrics[target_metric][0].value
-            containers_with_metrics += 1
         # n_containers_missing = number of unhealthy containers + number of containers not registered in flash dns
-        n_containers_missing_metric = current_replicas - containers_with_metrics
+        n_containers_missing_metric = n_current_replicas - n_containers_with_metrics
         # n_containers_unhealthy = number of dns registered containers that are not emitting metrics
-        n_containers_unhealthy = len(containers) - containers_with_metrics
+        n_containers_unhealthy = n_total_containers - n_containers_with_metrics
         # number of total containers - buffer containers
         # This is used in 1) scale ratio denominators 2) provisioning base.
         # Max is used to handle case when buffer_containers are first initialized.
-        num_provisioned_containers = max(current_replicas - buffer_containers, 1)
+        num_provisioned_containers = max(n_current_replicas - buffer_containers, 1)
         # Scale up assuming that every unhealthy container is at (1 + scale_up_tolerance)x the target metric value.
         # This way if all containers are unhealthy, we will increase our number of containers.
@@ -454,10 +392,11 @@ class _FlashPrometheusAutoscaler:
             desired_replicas = math.ceil(desired_replicas * scale_down_ratio)
         logger.warning(
-            f"[Modal Flash] Current replicas: {current_replicas}, "
+            f"[Modal Flash] Current replicas: {n_current_replicas}, "
+            f"target metric: {self.target_metric}"
             f"target metric value: {target_metric_value}, "
             f"current sum of metric values: {sum_metric}, "
-            f"number of containers with metrics: {containers_with_metrics}, "
+            f"number of containers with metrics: {n_containers_with_metrics}, "
             f"number of containers unhealthy: {n_containers_unhealthy}, "
             f"number of containers missing metric (includes unhealthy): {n_containers_missing_metric}, "
             f"number of provisioned containers: {num_provisioned_containers}, "
@@ -468,6 +407,43 @@ class _FlashPrometheusAutoscaler:
         return desired_replicas
+    async def _get_scaling_info(self, containers) -> tuple[float, int]:
+        """Get metrics using either internal container metrics API or prometheus HTTP endpoints."""
+        if self.metrics_endpoint == "internal":
+            container_metrics_results = await asyncio.gather(
+                *[self._get_container_metrics(container.task_id) for container in containers]
+            )
+            container_metrics_list = []
+            for container_metric in container_metrics_results:
+                if container_metric is None:
+                    continue
+                container_metrics_list.append(getattr(container_metric.metrics, self.target_metric))
+            sum_metric = sum(container_metrics_list)
+            n_containers_with_metrics = len(container_metrics_list)
+        else:
+            sum_metric = 0
+            n_containers_with_metrics = 0
+            container_metrics_list = await asyncio.gather(
+                *[
+                    self._get_metrics(f"https://{container.host}:{container.port}/{self.metrics_endpoint}")
+                    for container in containers
+                ]
+            )
+            for container_metrics in container_metrics_list:
+                if (
+                    container_metrics is None
+                    or self.target_metric not in container_metrics
+                    or len(container_metrics[self.target_metric]) == 0
+                ):
+                    continue
+                sum_metric += container_metrics[self.target_metric][0].value
+                n_containers_with_metrics += 1
+        return sum_metric, n_containers_with_metrics
     async def _get_metrics(self, url: str) -> Optional[dict[str, list[Any]]]:  # technically any should be Sample
         from prometheus_client.parser import Sample, text_string_to_metric_families

modal/experimental/flash.pyi CHANGED Viewed

@@ -138,11 +138,25 @@ class _FlashPrometheusAutoscaler:
     async def start(self): ...
     async def _run_autoscaler_loop(self): ...
-    async def _compute_target_containers_internal(self, current_replicas: int) -> int:
+    async def _compute_target_containers(self, current_replicas: int) -> int:
         """Gets internal metrics from container to autoscale up or down."""
         ...
-    async def _compute_target_containers_prometheus(self, current_replicas: int) -> int: ...
+    def _calculate_desired_replicas(
+        self,
+        n_current_replicas: int,
+        sum_metric: float,
+        n_containers_with_metrics: int,
+        n_total_containers: int,
+        target_metric_value: float,
+    ) -> int:
+        """Calculate the desired number of replicas to autoscale to."""
+        ...
+    async def _get_scaling_info(self, containers) -> tuple[float, int]:
+        """Get metrics using either internal container metrics API or prometheus HTTP endpoints."""
+        ...
     async def _get_metrics(self, url: str) -> typing.Optional[dict[str, list[typing.Any]]]: ...
     async def _get_container_metrics(
         self, container_id: str
@@ -209,7 +223,7 @@ class FlashPrometheusAutoscaler:
     _run_autoscaler_loop: ___run_autoscaler_loop_spec[typing_extensions.Self]
-    class ___compute_target_containers_internal_spec(typing_extensions.Protocol[SUPERSELF]):
+    class ___compute_target_containers_spec(typing_extensions.Protocol[SUPERSELF]):
         def __call__(self, /, current_replicas: int) -> int:
             """Gets internal metrics from container to autoscale up or down."""
             ...
@@ -218,13 +232,29 @@ class FlashPrometheusAutoscaler:
             """Gets internal metrics from container to autoscale up or down."""
             ...
-    _compute_target_containers_internal: ___compute_target_containers_internal_spec[typing_extensions.Self]
+    _compute_target_containers: ___compute_target_containers_spec[typing_extensions.Self]
-    class ___compute_target_containers_prometheus_spec(typing_extensions.Protocol[SUPERSELF]):
-        def __call__(self, /, current_replicas: int) -> int: ...
-        async def aio(self, /, current_replicas: int) -> int: ...
+    def _calculate_desired_replicas(
+        self,
+        n_current_replicas: int,
+        sum_metric: float,
+        n_containers_with_metrics: int,
+        n_total_containers: int,
+        target_metric_value: float,
+    ) -> int:
+        """Calculate the desired number of replicas to autoscale to."""
+        ...
+    class ___get_scaling_info_spec(typing_extensions.Protocol[SUPERSELF]):
+        def __call__(self, /, containers) -> tuple[float, int]:
+            """Get metrics using either internal container metrics API or prometheus HTTP endpoints."""
+            ...
+        async def aio(self, /, containers) -> tuple[float, int]:
+            """Get metrics using either internal container metrics API or prometheus HTTP endpoints."""
+            ...
-    _compute_target_containers_prometheus: ___compute_target_containers_prometheus_spec[typing_extensions.Self]
+    _get_scaling_info: ___get_scaling_info_spec[typing_extensions.Self]
     class ___get_metrics_spec(typing_extensions.Protocol[SUPERSELF]):
         def __call__(self, /, url: str) -> typing.Optional[dict[str, list[typing.Any]]]: ...

{modal-1.1.5.dev7.dist-info → modal-1.1.5.dev9.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: modal
-Version: 1.1.5.dev7
+Version: 1.1.5.dev9
 Summary: Python client library for Modal
 Author-email: Modal Labs <support@modal.com>
 License: Apache-2.0

{modal-1.1.5.dev7.dist-info → modal-1.1.5.dev9.dist-info}/RECORD RENAMED Viewed

@@ -22,7 +22,7 @@ modal/app.py,sha256=F4baVULljFq0CwC_7U-EKNRNx7CYeWBKudjjYUuWc4U,48416
 modal/app.pyi,sha256=AbXJCBkyt2rI_-M3VbTBYb32at0P6iRZuoC87xY_JrQ,43591
 modal/call_graph.py,sha256=1g2DGcMIJvRy-xKicuf63IVE98gJSnQsr8R_NVMptNc,2581
 modal/client.py,sha256=kyAIVB3Ay-XKJizQ_1ufUFB__EagV0MLmHJpyYyJ7J0,18636
-modal/client.pyi,sha256=zPcN6BqCMJQz4jsZ7qWnE3DFzTLShZIxGHWgP-iTj3M,15829
+modal/client.pyi,sha256=axk1bGHT1cC_rghFoVhClKrxzrPr_OHy8Gb7BGTfXo8,15829
 modal/cloud_bucket_mount.py,sha256=YOe9nnvSr4ZbeCn587d7_VhE9IioZYRvF9VYQTQux08,5914
 modal/cloud_bucket_mount.pyi,sha256=-qSfYAQvIoO_l2wsCCGTG5ZUwQieNKXdAO00yP1-LYU,7394
 modal/cls.py,sha256=pTEO7pHjlO7taMbIqs4oI9ZZgKDJpVKyGkO5ZT0w6tQ,40934
@@ -150,10 +150,10 @@ modal/cli/programs/run_jupyter.py,sha256=44Lpvqk2l3hH-uOkmAOzw60NEsfB5uaRDWDKVsh
 modal/cli/programs/run_marimo.py,sha256=HyZ2za0NYqg31-mGxFQxUIAJ3Q-jRaMocEwWwI5-cdw,2887
 modal/cli/programs/vscode.py,sha256=KbTAaIXyQBVCDXxXjmBHmKpgXkUw0q4R4KkJvUjCYgk,3380
 modal/experimental/__init__.py,sha256=fCqzo_f3vcY750vHtd7CtLs5dvdM_C0ZLLGb3zXuK9w,14913
-modal/experimental/flash.py,sha256=DhX0HG-K55FSK_ym0MjXC9ObVflmamAXfFY-TnuN5SM,30056
-modal/experimental/flash.pyi,sha256=EZiceVufvf7jsqKSQG-gd1hwqxzbEZxMkB1NtrK3AnE,14270
+modal/experimental/flash.py,sha256=8HOHZ0XLSN8Znzsi6hGggS46CC6t_7IgGWyNoeSXS9o,28417
+modal/experimental/flash.pyi,sha256=R9VV0UDotiY9BRUjacB-xI4qhR3yBymAvEZFRFHztLs,15143
 modal/experimental/ipython.py,sha256=TrCfmol9LGsRZMeDoeMPx3Hv3BFqQhYnmD_iH0pqdhk,2904
-modal-1.1.5.dev7.dist-info/licenses/LICENSE,sha256=psuoW8kuDP96RQsdhzwOqi6fyWv0ct8CR6Jr7He_P_k,10173
+modal-1.1.5.dev9.dist-info/licenses/LICENSE,sha256=psuoW8kuDP96RQsdhzwOqi6fyWv0ct8CR6Jr7He_P_k,10173
 modal_docs/__init__.py,sha256=svYKtV8HDwDCN86zbdWqyq5T8sMdGDj0PVlzc2tIxDM,28
 modal_docs/gen_cli_docs.py,sha256=c1yfBS_x--gL5bs0N4ihMwqwX8l3IBWSkBAKNNIi6bQ,3801
 modal_docs/gen_reference_docs.py,sha256=d_CQUGQ0rfw28u75I2mov9AlS773z9rG40-yq5o7g2U,6359
@@ -161,10 +161,10 @@ modal_docs/mdmd/__init__.py,sha256=svYKtV8HDwDCN86zbdWqyq5T8sMdGDj0PVlzc2tIxDM,2
 modal_docs/mdmd/mdmd.py,sha256=tUTImNd4UMFk1opkaw8J672gX8AkBO5gbY2S_NMxsxs,7140
 modal_docs/mdmd/signatures.py,sha256=XJaZrK7Mdepk5fdX51A8uENiLFNil85Ud0d4MH8H5f0,3218
 modal_proto/__init__.py,sha256=MIEP8jhXUeGq_eCjYFcqN5b1bxBM4fdk0VESpjWR0fc,28
-modal_proto/api.proto,sha256=9Vh3r3SjlJh2a9Eb6GwFYh2R6aCKV2-pEQXmVeOYkAE,105611
+modal_proto/api.proto,sha256=4m9Y592ADWWjxaf80HemhUk8hgGU6qN8RCe4dhSaE5o,105717
 modal_proto/api_grpc.py,sha256=r1k5m9Z2brIWtkY59mPwAGSavhssStzmsSq_PEwkZWA,129564
-modal_proto/api_pb2.py,sha256=M0DAllkbXBuQGnXEYiOaqzZoo5TQy0IS9DTHpzHNliU,370000
-modal_proto/api_pb2.pyi,sha256=29iTBpyQozfgPuLuQmY_05PIx4PBJE7yQ-wpgmUrWz8,510035
+modal_proto/api_pb2.py,sha256=P9BfjTZdEY5dGTrILAG3Z1QnYPQdrr1Styx1eISsdOk,370095
+modal_proto/api_pb2.pyi,sha256=UkNiuRnke0P2oNEOHIJ0WnQ91NsAuALBv4oI8jaByIA,510613
 modal_proto/api_pb2_grpc.py,sha256=ovaCh4weYl5r0XfX_2F2M2pn6XGH7Hl-BmVaIB62ssY,279448
 modal_proto/api_pb2_grpc.pyi,sha256=q_S2ScjUjS2bonUnEBqwCXVRZUwEuO5jOD2eL9nw0Wc,65439
 modal_proto/modal_api_grpc.py,sha256=E-5WMq82AVvtGRsmb7AqYzOz-cfqTOokp-QjaFaOhpo,19547
@@ -176,10 +176,10 @@ modal_proto/options_pb2.pyi,sha256=l7DBrbLO7q3Ir-XDkWsajm0d0TQqqrfuX54i4BMpdQg,1
 modal_proto/options_pb2_grpc.py,sha256=1oboBPFxaTEXt9Aw7EAj8gXHDCNMhZD2VXqocC9l_gk,159
 modal_proto/options_pb2_grpc.pyi,sha256=CImmhxHsYnF09iENPoe8S4J-n93jtgUYD2JPAc0yJSI,247
 modal_proto/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-modal_version/__init__.py,sha256=HPnRO9vpz9RoHYzNERnfbmzHSkXVz8ASVK3ldmoP_mw,120
+modal_version/__init__.py,sha256=31WlaQ95-Yb_-QRoYMW2iJofXkQIL15n6Jk0rpH5Zbk,120
 modal_version/__main__.py,sha256=2FO0yYQQwDTh6udt1h-cBnGd1c4ZyHnHSI4BksxzVac,105
-modal-1.1.5.dev7.dist-info/METADATA,sha256=MlzcRmSlouWM_AhEfxMl17M4EbNZ5J1YPJT0nCZcyHk,2459
-modal-1.1.5.dev7.dist-info/WHEEL,sha256=1tXe9gY0PYatrMPMDd6jXqjfpz_B-Wqm32CPfRC58XU,91
-modal-1.1.5.dev7.dist-info/entry_points.txt,sha256=An-wYgeEUnm6xzrAP9_NTSTSciYvvEWsMZILtYrvpAI,46
-modal-1.1.5.dev7.dist-info/top_level.txt,sha256=4BWzoKYREKUZ5iyPzZpjqx4G8uB5TWxXPDwibLcVa7k,43
-modal-1.1.5.dev7.dist-info/RECORD,,
+modal-1.1.5.dev9.dist-info/METADATA,sha256=lc-vSW0z_Ay0U1z-aCyG3fpF1stE5xFO1-UPwkuZV9k,2459
+modal-1.1.5.dev9.dist-info/WHEEL,sha256=1tXe9gY0PYatrMPMDd6jXqjfpz_B-Wqm32CPfRC58XU,91
+modal-1.1.5.dev9.dist-info/entry_points.txt,sha256=An-wYgeEUnm6xzrAP9_NTSTSciYvvEWsMZILtYrvpAI,46
+modal-1.1.5.dev9.dist-info/top_level.txt,sha256=4BWzoKYREKUZ5iyPzZpjqx4G8uB5TWxXPDwibLcVa7k,43
+modal-1.1.5.dev9.dist-info/RECORD,,

modal_proto/api.proto CHANGED Viewed

@@ -642,6 +642,8 @@ message AutoscalerSettings {
   optional uint32 scaleup_window = 4;
   // Maximum amount of time a container can be idle before being scaled down, in seconds; pka "container_idle_timeout"
   optional uint32 scaledown_window = 5;
+  // Target number of containers autoscaler will try to maintain
+  optional uint32 target_containers = 6;
 }
 // Used for flash autoscaling

modal 1.1.5.dev7__py3-none-any.whl → 1.1.5.dev9__py3-none-any.whl

modal 1.1.5.dev7py3-none-any.whl → 1.1.5.dev9py3-none-any.whl