PyPI - modal - Versions diffs - 1.1.5.dev83__py3-none-any.whl → 1.3.1.dev8__py3-none-any.whl - Mend

modal 1.1.5.dev83py3-none-any.whl → 1.3.1.dev8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of modal might be problematic. Click here for more details.

Files changed (139) hide show

modal/__init__.py +4 -4
modal/__main__.py +4 -29
modal/_billing.py +84 -0
modal/_clustered_functions.py +1 -3
modal/_container_entrypoint.py +33 -208
modal/_functions.py +146 -121
modal/_grpc_client.py +191 -0
modal/_ipython.py +16 -6
modal/_load_context.py +106 -0
modal/_object.py +72 -21
modal/_output.py +12 -14
modal/_partial_function.py +31 -4
modal/_resolver.py +44 -57
modal/_runtime/container_io_manager.py +26 -28
modal/_runtime/container_io_manager.pyi +42 -44
modal/_runtime/gpu_memory_snapshot.py +9 -7
modal/_runtime/user_code_event_loop.py +80 -0
modal/_runtime/user_code_imports.py +236 -10
modal/_serialization.py +2 -1
modal/_traceback.py +4 -13
modal/_tunnel.py +16 -11
modal/_tunnel.pyi +25 -3
modal/_utils/async_utils.py +337 -10
modal/_utils/auth_token_manager.py +1 -4
modal/_utils/blob_utils.py +29 -22
modal/_utils/function_utils.py +20 -21
modal/_utils/grpc_testing.py +6 -3
modal/_utils/grpc_utils.py +223 -64
modal/_utils/mount_utils.py +26 -1
modal/_utils/package_utils.py +0 -1
modal/_utils/rand_pb_testing.py +8 -1
modal/_utils/task_command_router_client.py +524 -0
modal/_vendor/cloudpickle.py +144 -48
modal/app.py +215 -96
modal/app.pyi +78 -37
modal/billing.py +5 -0
modal/builder/2025.06.txt +6 -3
modal/builder/PREVIEW.txt +2 -1
modal/builder/base-images.json +4 -2
modal/cli/_download.py +19 -3
modal/cli/cluster.py +4 -2
modal/cli/config.py +3 -1
modal/cli/container.py +5 -4
modal/cli/dict.py +5 -2
modal/cli/entry_point.py +26 -2
modal/cli/environment.py +2 -16
modal/cli/launch.py +1 -76
modal/cli/network_file_system.py +5 -20
modal/cli/queues.py +5 -4
modal/cli/run.py +24 -204
modal/cli/secret.py +1 -2
modal/cli/shell.py +375 -0
modal/cli/utils.py +1 -13
modal/cli/volume.py +11 -17
modal/client.py +16 -125
modal/client.pyi +94 -144
modal/cloud_bucket_mount.py +3 -1
modal/cloud_bucket_mount.pyi +4 -0
modal/cls.py +101 -64
modal/cls.pyi +9 -8
modal/config.py +21 -1
modal/container_process.py +288 -12
modal/container_process.pyi +99 -38
modal/dict.py +72 -33
modal/dict.pyi +88 -57
modal/environments.py +16 -8
modal/environments.pyi +6 -2
modal/exception.py +154 -16
modal/experimental/__init__.py +23 -5
modal/experimental/flash.py +161 -74
modal/experimental/flash.pyi +97 -49
modal/file_io.py +50 -92
modal/file_io.pyi +117 -89
modal/functions.pyi +70 -87
modal/image.py +73 -47
modal/image.pyi +33 -30
modal/io_streams.py +500 -149
modal/io_streams.pyi +279 -189
modal/mount.py +60 -45
modal/mount.pyi +41 -17
modal/network_file_system.py +19 -11
modal/network_file_system.pyi +72 -39
modal/object.pyi +114 -22
modal/parallel_map.py +42 -44
modal/parallel_map.pyi +9 -17
modal/partial_function.pyi +4 -2
modal/proxy.py +14 -6
modal/proxy.pyi +10 -2
modal/queue.py +45 -38
modal/queue.pyi +88 -52
modal/runner.py +96 -96
modal/runner.pyi +44 -27
modal/sandbox.py +225 -108
modal/sandbox.pyi +226 -63
modal/secret.py +58 -56
modal/secret.pyi +28 -13
modal/serving.py +7 -11
modal/serving.pyi +7 -8
modal/snapshot.py +29 -15
modal/snapshot.pyi +18 -10
modal/token_flow.py +1 -1
modal/token_flow.pyi +4 -6
modal/volume.py +102 -55
modal/volume.pyi +125 -66
{modal-1.1.5.dev83.dist-info → modal-1.3.1.dev8.dist-info}/METADATA +10 -9
modal-1.3.1.dev8.dist-info/RECORD +189 -0
modal_proto/api.proto +86 -30
modal_proto/api_grpc.py +10 -25
modal_proto/api_pb2.py +1080 -1047
modal_proto/api_pb2.pyi +253 -79
modal_proto/api_pb2_grpc.py +14 -48
modal_proto/api_pb2_grpc.pyi +6 -18
modal_proto/modal_api_grpc.py +175 -176
modal_proto/{sandbox_router.proto → task_command_router.proto} +62 -45
modal_proto/task_command_router_grpc.py +138 -0
modal_proto/task_command_router_pb2.py +180 -0
modal_proto/{sandbox_router_pb2.pyi → task_command_router_pb2.pyi} +110 -63
modal_proto/task_command_router_pb2_grpc.py +272 -0
modal_proto/task_command_router_pb2_grpc.pyi +100 -0
modal_version/__init__.py +1 -1
modal_version/__main__.py +1 -1
modal/cli/programs/launch_instance_ssh.py +0 -94
modal/cli/programs/run_marimo.py +0 -95
modal-1.1.5.dev83.dist-info/RECORD +0 -191
modal_proto/modal_options_grpc.py +0 -3
modal_proto/options.proto +0 -19
modal_proto/options_grpc.py +0 -3
modal_proto/options_pb2.py +0 -35
modal_proto/options_pb2.pyi +0 -20
modal_proto/options_pb2_grpc.py +0 -4
modal_proto/options_pb2_grpc.pyi +0 -7
modal_proto/sandbox_router_grpc.py +0 -105
modal_proto/sandbox_router_pb2.py +0 -148
modal_proto/sandbox_router_pb2_grpc.py +0 -203
modal_proto/sandbox_router_pb2_grpc.pyi +0 -75
{modal-1.1.5.dev83.dist-info → modal-1.3.1.dev8.dist-info}/WHEEL +0 -0
{modal-1.1.5.dev83.dist-info → modal-1.3.1.dev8.dist-info}/entry_points.txt +0 -0
{modal-1.1.5.dev83.dist-info → modal-1.3.1.dev8.dist-info}/licenses/LICENSE +0 -0
{modal-1.1.5.dev83.dist-info → modal-1.3.1.dev8.dist-info}/top_level.txt +0 -0

modal/exception.py CHANGED Viewed

@@ -1,7 +1,45 @@
 # Copyright Modal Labs 2022
+"""
+Modal-specific exception types.
+## Notes on `grpclib.GRPCError` migration
+Historically, the Modal SDK could propagate `grpclib.GRPCError` exceptions out
+to user code.  As of v1.3, we are in the process of gracefully migrating to
+always raising a Modal exception type in these cases. To avoid breaking user
+code that relies on catching `grpclib.GRPCError`, a subset of Modal exception
+types temporarily inherit from `grpclib.GRPCError`.
+We encourage users to migrate any code that currently catches `grpclib.GRPCError`
+to instead catch the appropriate Modal exception type. The following mapping
+between GRPCError status codes and Modal exception types is currently in use:
+```
+CANCELLED -> ServiceError
+UNKNOWN -> ServiceError
+INVALID_ARGUMENT -> InvalidError
+DEADLINE_EXCEEDED -> ServiceError
+NOT_FOUND -> NotFoundError
+ALREADY_EXISTS -> AlreadyExistsError
+PERMISSION_DENIED -> PermissionDeniedError
+RESOURCE_EXHAUSTED -> ResourceExhaustedError
+FAILED_PRECONDITION -> ConflictError
+ABORTED -> ConflictError
+OUT_OF_RANGE -> InvalidError
+UNIMPLEMENTED -> UnimplementedError
+INTERNAL -> InternalError
+UNAVAILABLE -> ServiceError
+DATA_LOSS -> DataLossError
+UNAUTHENTICATED -> AuthError
+```
+"""
 import random
 import signal
+from typing import Any, Optional
+import grpclib
 import synchronicity.exceptions
 UserCodeException = synchronicity.exceptions.UserCodeException  # Deprecated type used for return_exception wrapping
@@ -26,10 +64,116 @@ class Error(Exception):
     """
-class AlreadyExistsError(Error):
+class _GRPCErrorWrapper(grpclib.GRPCError):
+    """This transitional class helps us migrate away from propagating `grpclib.GRPCError` to users.
+    It serves two purposes:
+    - It avoids abruptly breaking user code that catches `grpclib.GRPCError`
+    - It actively warns when users access attributes defined by `grpclib.GRPCError`
+    This won't catch all cases (users might react indiscriminately to GRPCError without checking the status).
+    The mapping between GRPCError status codes and our error types is defined in `modal._grpc_client`.
+    """
+    # These will be set on the instance in our error handling middleware
+    _grpc_message: str
+    _grpc_status: grpclib.Status
+    _grpc_details: Any
+    def __init__(self, message: Optional[str] = None):
+        # Override GRPCError's init and repr to behave more like a regular Exception
+        # (We don't customize these anywhere in our custom error types currently).
+        self._message = message or ""
+    def __repr__(self) -> str:
+        return f"{type(self).__name__}({self._message!r})"
+    def _warn_on_grpc_error_attribute_access(self) -> None:
+        from ._utils.deprecation import deprecation_warning  # Avoid circular import
+        exc_type = type(self).__name__
+        deprecation_warning(
+            (2025, 12, 9),
+            "Modal will stop propagating the `grpclib.GRPCError` type in the future. "
+            f"Update your code so that it catches `modal.exception.{exc_type}` directly "
+            "to avoid changes to error handling behavior in the future.",
+            pending=True,
+        )
+    @property
+    def message(self) -> str:
+        self._warn_on_grpc_error_attribute_access()
+        return self._grpc_message
+    @message.setter
+    def message(self, value: str) -> None:
+        self._grpc_message = value
+    @property
+    def status(self) -> grpclib.Status:
+        self._warn_on_grpc_error_attribute_access()
+        return self._grpc_status
+    @status.setter
+    def status(self, value: grpclib.Status) -> None:
+        self._grpc_status = value
+    @property
+    def details(self) -> Any:
+        self._warn_on_grpc_error_attribute_access()
+        return self._grpc_details
+    @details.setter
+    def details(self, value: Any) -> None:
+        self._grpc_details = value
+class AlreadyExistsError(Error, _GRPCErrorWrapper):
     """Raised when a resource creation conflicts with an existing resource."""
+class AuthError(Error, _GRPCErrorWrapper):
+    """Raised when a client has missing or invalid authentication."""
+class InternalError(Error, _GRPCErrorWrapper):
+    """Raised when an internal error occurs in the Modal system."""
+class InvalidError(Error, _GRPCErrorWrapper):
+    """Raised when user does something invalid."""
+class ConflictError(InvalidError, _GRPCErrorWrapper):
+    """Raised when a resource conflict occurs between the request and current system state."""
+class DataLossError(Error, _GRPCErrorWrapper):
+    """Raised when data is lost or corrupted."""
+class NotFoundError(Error, _GRPCErrorWrapper):
+    """Raised when a requested resource was not found."""
+class PermissionDeniedError(Error, _GRPCErrorWrapper):
+    """Raised when a user does not have permission to perform the requested operation."""
+class ResourceExhaustedError(Error, _GRPCErrorWrapper):
+    """Raised when a server-side resource has been exhausted, e.g. a quota or rate limit."""
+class ServiceError(Error, _GRPCErrorWrapper):
+    """Raised when an error occurs in basic client/server communication."""
+class UnimplementedError(Error, _GRPCErrorWrapper):
+    """Raised when a requested operation is not implemented or not supported."""
 class RemoteError(Error):
     """Raised when an error occurs on the Modal server."""
@@ -42,6 +186,10 @@ class SandboxTimeoutError(TimeoutError):
     """Raised when a Sandbox exceeds its execution duration limit and times out."""
+class ExecTimeoutError(TimeoutError):
+    """Raised when a container process exceeds its execution duration limit and times out."""
 class SandboxTerminatedError(Error):
     """Raised when a Sandbox is terminated for an internal reason."""
@@ -66,26 +214,14 @@ class OutputExpiredError(TimeoutError):
     """Raised when the Output exceeds expiration and times out."""
-class AuthError(Error):
-    """Raised when a client has missing or invalid authentication."""
 class ConnectionError(Error):
     """Raised when an issue occurs while connecting to the Modal servers."""
-class InvalidError(Error):
-    """Raised when user does something invalid."""
 class VersionError(Error):
     """Raised when the current client version of Modal is unsupported."""
-class NotFoundError(Error):
-    """Raised when a requested resource was not found."""
 class ExecutionError(Error):
     """Raised when something unexpected happened during runtime."""
@@ -116,10 +252,12 @@ class ServerWarning(UserWarning):
     """Warning originating from the Modal server and re-issued in client code."""
+class AsyncUsageWarning(UserWarning):
+    """Warning emitted when a blocking Modal interface is used in an async context."""
 class InternalFailure(Error):
-    """
-    Retriable internal error.
-    """
+    """Retriable internal error."""
 class _CliUserExecutionError(Exception):

modal/experimental/__init__.py CHANGED Viewed

@@ -13,14 +13,18 @@ from .._object import _get_environment_name
 from .._partial_function import _clustered
 from .._runtime.container_io_manager import _ContainerIOManager
 from .._utils.async_utils import synchronize_api, synchronizer
-from .._utils.grpc_utils import retry_transient_errors
 from ..app import _App
 from ..client import _Client
 from ..cls import _Cls
 from ..exception import InvalidError
 from ..image import DockerfileSpec, ImageBuilderVersion, _Image, _ImageRegistryConfig
 from ..secret import _Secret
-from .flash import flash_forward, flash_get_containers, flash_prometheus_autoscaler  # noqa: F401
+from .flash import (  # noqa: F401
+    flash_forward,
+    flash_get_containers,
+    flash_prometheus_autoscaler,
+    http_server,
+)
 def stop_fetching_inputs():
@@ -86,6 +90,19 @@ async def list_deployed_apps(environment_name: str = "", client: Optional[_Clien
     return app_infos
+@synchronizer.create_blocking
+async def stop_app(name: str, *, environment_name: Optional[str] = None, client: Optional[_Client] = None) -> None:
+    """Stop a deployed App.
+    This interface is experimental and may change in the future,
+    although the functionality will continue to be supported.
+    """
+    client_ = client or await _Client.from_env()
+    app = await _App.lookup(name, environment_name=environment_name, client=client_)
+    req = api_pb2.AppStopRequest(app_id=app.app_id, source=api_pb2.APP_STOP_SOURCE_PYTHON_CLIENT)
+    await client_.stub.AppStop(req)
 @synchronizer.create_blocking
 async def get_app_objects(
     app_name: str, *, environment_name: Optional[str] = None, client: Optional[_Client] = None
@@ -116,7 +133,7 @@ async def get_app_objects(
     app = await _App.lookup(app_name, environment_name=environment_name, client=client)
     req = api_pb2.AppGetLayoutRequest(app_id=app.app_id)
-    app_layout_resp = await retry_transient_errors(client.stub.AppGetLayout, req)
+    app_layout_resp = await client.stub.AppGetLayout(req)
     app_objects: dict[str, Union[_Function, _Cls]] = {}
@@ -347,7 +364,8 @@ async def image_delete(
 ) -> None:
     """Delete an Image by its ID.
-    Deletion is irreversible and will prevent Apps from using the Image.
+    Deletion is irreversible and will prevent Functions/Sandboxes from using
+    the Image.
     This is an experimental interface for a feature that we will be adding to
     the main Image class. The stable form of this interface may look different.
@@ -361,4 +379,4 @@ async def image_delete(
         client = await _Client.from_env()
     req = api_pb2.ImageDeleteRequest(image_id=image_id)
-    await retry_transient_errors(client.stub.ImageDelete, req)
+    await client.stub.ImageDelete(req)

modal/experimental/flash.py CHANGED Viewed

@@ -7,16 +7,16 @@ import sys
 import time
 import traceback
 from collections import defaultdict
-from typing import Any, Optional
+from typing import Any, Callable, Optional, Union
 from urllib.parse import urlparse
+from modal._partial_function import _PartialFunctionFlags
 from modal.cls import _Cls
 from modal.dict import _Dict
 from modal_proto import api_pb2
 from .._tunnel import _forward as _forward_tunnel
 from .._utils.async_utils import synchronize_api, synchronizer
-from .._utils.grpc_utils import retry_transient_errors
 from ..client import _Client
 from ..config import logger
 from ..exception import InvalidError
@@ -29,15 +29,20 @@ class _FlashManager:
         self,
         client: _Client,
         port: int,
-        process: Optional[subprocess.Popen] = None,
+        process: Optional[subprocess.Popen] = None,  # to be deprecated
         health_check_url: Optional[str] = None,
+        startup_timeout: int = 30,
+        exit_grace_period: int = 0,
+        h2_enabled: bool = False,
     ):
         self.client = client
         self.port = port
+        self.process = process
         # Health check is not currently being used
         self.health_check_url = health_check_url
-        self.process = process
-        self.tunnel_manager = _forward_tunnel(port, client=client)
+        self.startup_timeout = startup_timeout
+        self.exit_grace_period = exit_grace_period
+        self.tunnel_manager = _forward_tunnel(port, h2_enabled=h2_enabled, client=client)
         self.stopped = False
         self.num_failures = 0
         self.task_id = os.environ["MODAL_TASK_ID"]
@@ -49,10 +54,15 @@ class _FlashManager:
         start_time = time.monotonic()
+        def check_process_is_running() -> Optional[Exception]:
+            if process is not None and process.poll() is not None:
+                return Exception(f"Process {process.pid} exited with code {process.returncode}")
+            return None
         while time.monotonic() - start_time < timeout:
             try:
-                if process is not None and process.poll() is not None:
-                    return False, Exception(f"Process {process.pid} exited with code {process.returncode}")
+                if error := check_process_is_running():
+                    return False, error
                 with socket.create_connection(("localhost", self.port), timeout=0.5):
                     return True, None
             except (ConnectionRefusedError, OSError):
@@ -101,6 +111,7 @@ class _FlashManager:
     async def _run_heartbeat(self, host: str, port: int):
         first_registration = True
+        start_time = time.monotonic()
         while True:
             try:
                 port_check_resp, port_check_error = await self.is_port_connection_healthy(process=self.process)
@@ -113,6 +124,7 @@ class _FlashManager:
                             port=port,
                         ),
                         timeout=10,
+                        retry=None,
                     )
                     self.num_failures = 0
                     if first_registration:
@@ -121,15 +133,16 @@ class _FlashManager:
                         )
                         first_registration = False
                 else:
-                    logger.error(
-                        f"[Modal Flash] Deregistering container {self.task_id} on {self.tunnel.url} "
-                        f"due to error: {port_check_error}, num_failures: {self.num_failures}"
-                    )
-                    self.num_failures += 1
-                    await retry_transient_errors(
-                        self.client.stub.FlashContainerDeregister,
-                        api_pb2.FlashContainerDeregisterRequest(),
-                    )
+                    if first_registration and (time.monotonic() - start_time < self.startup_timeout):
+                        continue
+                    else:
+                        logger.error(
+                            f"[Modal Flash] Deregistering container {self.task_id} on {self.tunnel.url} "
+                            f"due to error: {port_check_error}, num_failures: {self.num_failures}"
+                        )
+                        self.num_failures += 1
+                        await self.client.stub.FlashContainerDeregister(api_pb2.FlashContainerDeregisterRequest())
             except asyncio.CancelledError:
                 logger.warning("[Modal Flash] Shutting down...")
                 break
@@ -147,12 +160,12 @@ class _FlashManager:
         return self.tunnel.url
     async def stop(self):
-        self.heartbeat_task.cancel()
-        await retry_transient_errors(
-            self.client.stub.FlashContainerDeregister,
-            api_pb2.FlashContainerDeregisterRequest(),
-        )
+        try:
+            self.heartbeat_task.cancel()
+        except Exception as e:
+            logger.error(f"[Modal Flash] Error stopping: {e}")
+        await self.client.stub.FlashContainerDeregister(api_pb2.FlashContainerDeregisterRequest())
         self.stopped = True
         logger.warning(f"[Modal Flash] No longer accepting new requests on {self.tunnel.url}.")
@@ -163,18 +176,23 @@ class _FlashManager:
         if not self.stopped:
             await self.stop()
+        await asyncio.sleep(self.exit_grace_period)
         logger.warning(f"[Modal Flash] Closing tunnel on {self.tunnel.url}.")
         await self.tunnel_manager.__aexit__(*sys.exc_info())
-FlashManager = synchronize_api(_FlashManager)
+FlashManager = synchronize_api(_FlashManager, target_module=__name__)
 @synchronizer.create_blocking
 async def flash_forward(
     port: int,
-    process: Optional[subprocess.Popen] = None,
+    process: Optional[subprocess.Popen] = None,  # to be deprecated
     health_check_url: Optional[str] = None,
+    startup_timeout: int = 30,
+    exit_grace_period: int = 0,
+    h2_enabled: bool = False,
 ) -> _FlashManager:
     """
     Forward a port to the Modal Flash service, exposing that port as a stable web endpoint.
@@ -183,7 +201,15 @@ async def flash_forward(
     """
     client = await _Client.from_env()
-    manager = _FlashManager(client, port, process=process, health_check_url=health_check_url)
+    manager = _FlashManager(
+        client,
+        port,
+        process=process,
+        health_check_url=health_check_url,
+        startup_timeout=startup_timeout,
+        exit_grace_period=exit_grace_period,
+        h2_enabled=h2_enabled,
+    )
     await manager._start()
     return manager
@@ -321,7 +347,7 @@ class _FlashPrometheusAutoscaler:
     async def _compute_target_containers(self, current_replicas: int) -> int:
         """
-        Gets internal metrics from container to autoscale up or down.
+        Gets metrics from container to autoscale up or down.
         """
         containers = await self._get_all_containers()
         if len(containers) > current_replicas:
@@ -334,7 +360,7 @@ class _FlashPrometheusAutoscaler:
         if current_replicas == 0:
             return 1
-        # Get metrics based on autoscaler type (prometheus or internal)
+        # Get metrics based on autoscaler type
         sum_metric, n_containers_with_metrics = await self._get_scaling_info(containers)
         desired_replicas = self._calculate_desired_replicas(
@@ -406,39 +432,26 @@ class _FlashPrometheusAutoscaler:
         return desired_replicas
     async def _get_scaling_info(self, containers) -> tuple[float, int]:
-        """Get metrics using either internal container metrics API or prometheus HTTP endpoints."""
-        if self.metrics_endpoint == "internal":
-            container_metrics_results = await asyncio.gather(
-                *[self._get_container_metrics(container.task_id) for container in containers]
-            )
-            container_metrics_list = []
-            for container_metric in container_metrics_results:
-                if container_metric is None:
-                    continue
-                container_metrics_list.append(getattr(container_metric.metrics, self.target_metric))
-            sum_metric = sum(container_metrics_list)
-            n_containers_with_metrics = len(container_metrics_list)
-        else:
-            sum_metric = 0
-            n_containers_with_metrics = 0
-            container_metrics_list = await asyncio.gather(
-                *[
-                    self._get_metrics(f"https://{container.host}:{container.port}/{self.metrics_endpoint}")
-                    for container in containers
-                ]
-            )
+        """Get metrics using container exposed metrics endpoints."""
+        sum_metric = 0
+        n_containers_with_metrics = 0
+        container_metrics_list = await asyncio.gather(
+            *[
+                self._get_metrics(f"https://{container.host}:{container.port}/{self.metrics_endpoint}")
+                for container in containers
+            ]
+        )
-            for container_metrics in container_metrics_list:
-                if (
-                    container_metrics is None
-                    or self.target_metric not in container_metrics
-                    or len(container_metrics[self.target_metric]) == 0
-                ):
-                    continue
-                sum_metric += container_metrics[self.target_metric][0].value
-                n_containers_with_metrics += 1
+        for container_metrics in container_metrics_list:
+            if (
+                container_metrics is None
+                or self.target_metric not in container_metrics
+                or len(container_metrics[self.target_metric]) == 0
+            ):
+                continue
+            sum_metric += container_metrics[self.target_metric][0].value
+            n_containers_with_metrics += 1
         return sum_metric, n_containers_with_metrics
@@ -474,23 +487,14 @@ class _FlashPrometheusAutoscaler:
         return metrics
-    async def _get_container_metrics(self, container_id: str) -> Optional[api_pb2.TaskGetAutoscalingMetricsResponse]:
-        req = api_pb2.TaskGetAutoscalingMetricsRequest(task_id=container_id)
-        try:
-            resp = await retry_transient_errors(self.client.stub.TaskGetAutoscalingMetrics, req)
-            return resp
-        except Exception as e:
-            logger.warning(f"[Modal Flash] Error getting metrics for container {container_id}: {e}")
-            return None
     async def _get_all_containers(self):
         req = api_pb2.FlashContainerListRequest(function_id=self.fn.object_id)
-        resp = await retry_transient_errors(self.client.stub.FlashContainerList, req)
+        resp = await self.client.stub.FlashContainerList(req)
         return resp.containers
     async def _set_target_slots(self, target_slots: int):
         req = api_pb2.FlashSetTargetSlotsMetricsRequest(function_id=self.fn.object_id, target_slots=target_slots)
-        await retry_transient_errors(self.client.stub.FlashSetTargetSlotsMetrics, req)
+        await self.client.stub.FlashSetTargetSlotsMetrics(req)
         return
     def _make_scaling_decision(
@@ -572,14 +576,10 @@ async def flash_prometheus_autoscaler(
     app_name: str,
     cls_name: str,
     # Endpoint to fetch metrics from. Must be in Prometheus format. Example: "/metrics"
-    # If metrics_endpoint is "internal", we will use containers' internal metrics to autoscale instead.
     metrics_endpoint: str,
     # Target metric to autoscale on. Example: "vllm:num_requests_running"
-    # If metrics_endpoint is "internal", target_metrics options are: [cpu_usage_percent, memory_usage_percent]
     target_metric: str,
     # Target metric value. Example: 25
-    # If metrics_endpoint is "internal", target_metric_value is a percentage value between 0.1 and 1.0 (inclusive),
-    # indicating container's usage of that metric.
     target_metric_value: float,
     min_containers: Optional[int] = None,
     max_containers: Optional[int] = None,
@@ -645,5 +645,92 @@ async def flash_get_containers(app_name: str, cls_name: str) -> list[dict[str, A
     assert fn is not None
     await fn.hydrate(client=client)
     req = api_pb2.FlashContainerListRequest(function_id=fn.object_id)
-    resp = await retry_transient_errors(client.stub.FlashContainerList, req)
+    resp = await client.stub.FlashContainerList(req)
     return resp.containers
+def _http_server(
+    port: Optional[int] = None,
+    *,
+    proxy_regions: list[str] = [],  # The regions to proxy the HTTP server to.
+    startup_timeout: int = 30,  # Maximum number of seconds to wait for the HTTP server to start.
+    exit_grace_period: Optional[int] = None,  # The time to wait for the HTTP server to exit gracefully.
+    h2_enabled: bool = False,  # Whether to enable HTTP/2 support.
+):
+    """Decorator for Flash-enabled HTTP servers on Modal classes.
+    Args:
+        port: The local port to forward to the HTTP server.
+        proxy_regions: The regions to proxy the HTTP server to.
+        startup_timeout: The maximum time to wait for the HTTP server to start.
+        exit_grace_period: The time to wait for the HTTP server to exit gracefully.
+    """
+    if port is None:
+        raise InvalidError(
+            "Positional arguments are not allowed. Did you forget parentheses? Suggestion: `@modal.http_server()`."
+        )
+    if not isinstance(port, int) or port < 1 or port > 65535:
+        raise InvalidError("First argument of `@http_server` must be a local port, such as `@http_server(8000)`.")
+    if startup_timeout <= 0:
+        raise InvalidError("The `startup_timeout` argument of `@http_server` must be positive.")
+    if exit_grace_period is not None and exit_grace_period < 0:
+        raise InvalidError("The `exit_grace_period` argument of `@http_server` must be non-negative.")
+    from modal._partial_function import _PartialFunction, _PartialFunctionParams
+    params = _PartialFunctionParams(
+        http_config=api_pb2.HTTPConfig(
+            port=port,
+            proxy_regions=proxy_regions,
+            startup_timeout=startup_timeout or 0,
+            exit_grace_period=exit_grace_period or 0,
+            h2_enabled=h2_enabled,
+        )
+    )
+    def wrapper(obj: Union[Callable[..., Any], _PartialFunction]) -> _PartialFunction:
+        flags = _PartialFunctionFlags.HTTP_WEB_INTERFACE
+        if isinstance(obj, _PartialFunction):
+            pf = obj.stack(flags, params)
+        else:
+            pf = _PartialFunction(obj, flags, params)
+        pf.validate_obj_compatibility("`http_server`")
+        return pf
+    return wrapper
+http_server = synchronize_api(_http_server, target_module=__name__)
+class _FlashContainerEntry:
+    """
+    A class that manages the lifecycle of Flash manager for Flash containers.
+    It is intentional that stop() runs before exit handlers and close().
+    This ensures the container is deregistered first, preventing new requests from being routed to it
+    while exit handlers execute and the exit grace period elapses, before finally closing the tunnel.
+    """
+    def __init__(self, http_config: api_pb2.HTTPConfig):
+        self.http_config: api_pb2.HTTPConfig = http_config
+        self.flash_manager: Optional[FlashManager] = None  # type: ignore
+    def enter(self):
+        if self.http_config != api_pb2.HTTPConfig():
+            self.flash_manager = flash_forward(
+                self.http_config.port,
+                startup_timeout=self.http_config.startup_timeout,
+                exit_grace_period=self.http_config.exit_grace_period,
+                h2_enabled=self.http_config.h2_enabled,
+            )
+    def stop(self):
+        if self.flash_manager:
+            self.flash_manager.stop()
+    def close(self):
+        if self.flash_manager:
+            self.flash_manager.close()

modal 1.1.5.dev83__py3-none-any.whl → 1.3.1.dev8__py3-none-any.whl

Potentially problematic release.

modal 1.1.5.dev83py3-none-any.whl → 1.3.1.dev8py3-none-any.whl