PyPI - modal - Versions diffs - 1.1.5.dev66__py3-none-any.whl → 1.3.1.dev8__py3-none-any.whl - Mend

modal 1.1.5.dev66py3-none-any.whl → 1.3.1.dev8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of modal might be problematic. Click here for more details.

Files changed (143) hide show

modal/__init__.py +4 -4
modal/__main__.py +4 -29
modal/_billing.py +84 -0
modal/_clustered_functions.py +1 -3
modal/_container_entrypoint.py +33 -208
modal/_functions.py +171 -138
modal/_grpc_client.py +191 -0
modal/_ipython.py +16 -6
modal/_load_context.py +106 -0
modal/_object.py +72 -21
modal/_output.py +12 -14
modal/_partial_function.py +31 -4
modal/_resolver.py +44 -57
modal/_runtime/container_io_manager.py +30 -28
modal/_runtime/container_io_manager.pyi +42 -44
modal/_runtime/gpu_memory_snapshot.py +9 -7
modal/_runtime/user_code_event_loop.py +80 -0
modal/_runtime/user_code_imports.py +236 -10
modal/_serialization.py +2 -1
modal/_traceback.py +4 -13
modal/_tunnel.py +16 -11
modal/_tunnel.pyi +25 -3
modal/_utils/async_utils.py +337 -10
modal/_utils/auth_token_manager.py +1 -4
modal/_utils/blob_utils.py +29 -22
modal/_utils/function_utils.py +20 -21
modal/_utils/grpc_testing.py +6 -3
modal/_utils/grpc_utils.py +223 -64
modal/_utils/mount_utils.py +26 -1
modal/_utils/name_utils.py +2 -3
modal/_utils/package_utils.py +0 -1
modal/_utils/rand_pb_testing.py +8 -1
modal/_utils/task_command_router_client.py +524 -0
modal/_vendor/cloudpickle.py +144 -48
modal/app.py +285 -105
modal/app.pyi +216 -53
modal/billing.py +5 -0
modal/builder/2025.06.txt +6 -3
modal/builder/PREVIEW.txt +2 -1
modal/builder/base-images.json +4 -2
modal/cli/_download.py +19 -3
modal/cli/cluster.py +4 -2
modal/cli/config.py +3 -1
modal/cli/container.py +5 -4
modal/cli/dict.py +5 -2
modal/cli/entry_point.py +26 -2
modal/cli/environment.py +2 -16
modal/cli/launch.py +1 -76
modal/cli/network_file_system.py +5 -20
modal/cli/programs/run_jupyter.py +1 -1
modal/cli/programs/vscode.py +1 -1
modal/cli/queues.py +5 -4
modal/cli/run.py +24 -204
modal/cli/secret.py +1 -2
modal/cli/shell.py +375 -0
modal/cli/utils.py +1 -13
modal/cli/volume.py +11 -17
modal/client.py +16 -125
modal/client.pyi +94 -144
modal/cloud_bucket_mount.py +3 -1
modal/cloud_bucket_mount.pyi +4 -0
modal/cls.py +101 -64
modal/cls.pyi +9 -8
modal/config.py +21 -1
modal/container_process.py +288 -12
modal/container_process.pyi +99 -38
modal/dict.py +72 -33
modal/dict.pyi +88 -57
modal/environments.py +16 -8
modal/environments.pyi +6 -2
modal/exception.py +154 -16
modal/experimental/__init__.py +24 -53
modal/experimental/flash.py +161 -74
modal/experimental/flash.pyi +97 -49
modal/file_io.py +50 -92
modal/file_io.pyi +117 -89
modal/functions.pyi +70 -87
modal/image.py +82 -47
modal/image.pyi +51 -30
modal/io_streams.py +500 -149
modal/io_streams.pyi +279 -189
modal/mount.py +60 -46
modal/mount.pyi +41 -17
modal/network_file_system.py +19 -11
modal/network_file_system.pyi +72 -39
modal/object.pyi +114 -22
modal/parallel_map.py +42 -44
modal/parallel_map.pyi +9 -17
modal/partial_function.pyi +4 -2
modal/proxy.py +14 -6
modal/proxy.pyi +10 -2
modal/queue.py +45 -38
modal/queue.pyi +88 -52
modal/runner.py +96 -96
modal/runner.pyi +44 -27
modal/sandbox.py +225 -107
modal/sandbox.pyi +226 -60
modal/secret.py +58 -56
modal/secret.pyi +28 -13
modal/serving.py +7 -11
modal/serving.pyi +7 -8
modal/snapshot.py +29 -15
modal/snapshot.pyi +18 -10
modal/token_flow.py +1 -1
modal/token_flow.pyi +4 -6
modal/volume.py +102 -55
modal/volume.pyi +125 -66
{modal-1.1.5.dev66.dist-info → modal-1.3.1.dev8.dist-info}/METADATA +10 -9
modal-1.3.1.dev8.dist-info/RECORD +189 -0
modal_proto/api.proto +141 -70
modal_proto/api_grpc.py +42 -26
modal_proto/api_pb2.py +1123 -1103
modal_proto/api_pb2.pyi +331 -83
modal_proto/api_pb2_grpc.py +80 -48
modal_proto/api_pb2_grpc.pyi +26 -18
modal_proto/modal_api_grpc.py +175 -174
modal_proto/task_command_router.proto +164 -0
modal_proto/task_command_router_grpc.py +138 -0
modal_proto/task_command_router_pb2.py +180 -0
modal_proto/{sandbox_router_pb2.pyi → task_command_router_pb2.pyi} +148 -57
modal_proto/task_command_router_pb2_grpc.py +272 -0
modal_proto/task_command_router_pb2_grpc.pyi +100 -0
modal_version/__init__.py +1 -1
modal_version/__main__.py +1 -1
modal/cli/programs/launch_instance_ssh.py +0 -94
modal/cli/programs/run_marimo.py +0 -95
modal-1.1.5.dev66.dist-info/RECORD +0 -191
modal_proto/modal_options_grpc.py +0 -3
modal_proto/options.proto +0 -19
modal_proto/options_grpc.py +0 -3
modal_proto/options_pb2.py +0 -35
modal_proto/options_pb2.pyi +0 -20
modal_proto/options_pb2_grpc.py +0 -4
modal_proto/options_pb2_grpc.pyi +0 -7
modal_proto/sandbox_router.proto +0 -125
modal_proto/sandbox_router_grpc.py +0 -89
modal_proto/sandbox_router_pb2.py +0 -128
modal_proto/sandbox_router_pb2_grpc.py +0 -169
modal_proto/sandbox_router_pb2_grpc.pyi +0 -63
{modal-1.1.5.dev66.dist-info → modal-1.3.1.dev8.dist-info}/WHEEL +0 -0
{modal-1.1.5.dev66.dist-info → modal-1.3.1.dev8.dist-info}/entry_points.txt +0 -0
{modal-1.1.5.dev66.dist-info → modal-1.3.1.dev8.dist-info}/licenses/LICENSE +0 -0
{modal-1.1.5.dev66.dist-info → modal-1.3.1.dev8.dist-info}/top_level.txt +0 -0

modal/experimental/flash.py CHANGED Viewed

@@ -7,16 +7,16 @@ import sys
 import time
 import traceback
 from collections import defaultdict
-from typing import Any, Optional
+from typing import Any, Callable, Optional, Union
 from urllib.parse import urlparse
+from modal._partial_function import _PartialFunctionFlags
 from modal.cls import _Cls
 from modal.dict import _Dict
 from modal_proto import api_pb2
 from .._tunnel import _forward as _forward_tunnel
 from .._utils.async_utils import synchronize_api, synchronizer
-from .._utils.grpc_utils import retry_transient_errors
 from ..client import _Client
 from ..config import logger
 from ..exception import InvalidError
@@ -29,15 +29,20 @@ class _FlashManager:
         self,
         client: _Client,
         port: int,
-        process: Optional[subprocess.Popen] = None,
+        process: Optional[subprocess.Popen] = None,  # to be deprecated
         health_check_url: Optional[str] = None,
+        startup_timeout: int = 30,
+        exit_grace_period: int = 0,
+        h2_enabled: bool = False,
     ):
         self.client = client
         self.port = port
+        self.process = process
         # Health check is not currently being used
         self.health_check_url = health_check_url
-        self.process = process
-        self.tunnel_manager = _forward_tunnel(port, client=client)
+        self.startup_timeout = startup_timeout
+        self.exit_grace_period = exit_grace_period
+        self.tunnel_manager = _forward_tunnel(port, h2_enabled=h2_enabled, client=client)
         self.stopped = False
         self.num_failures = 0
         self.task_id = os.environ["MODAL_TASK_ID"]
@@ -49,10 +54,15 @@ class _FlashManager:
         start_time = time.monotonic()
+        def check_process_is_running() -> Optional[Exception]:
+            if process is not None and process.poll() is not None:
+                return Exception(f"Process {process.pid} exited with code {process.returncode}")
+            return None
         while time.monotonic() - start_time < timeout:
             try:
-                if process is not None and process.poll() is not None:
-                    return False, Exception(f"Process {process.pid} exited with code {process.returncode}")
+                if error := check_process_is_running():
+                    return False, error
                 with socket.create_connection(("localhost", self.port), timeout=0.5):
                     return True, None
             except (ConnectionRefusedError, OSError):
@@ -101,6 +111,7 @@ class _FlashManager:
     async def _run_heartbeat(self, host: str, port: int):
         first_registration = True
+        start_time = time.monotonic()
         while True:
             try:
                 port_check_resp, port_check_error = await self.is_port_connection_healthy(process=self.process)
@@ -113,6 +124,7 @@ class _FlashManager:
                             port=port,
                         ),
                         timeout=10,
+                        retry=None,
                     )
                     self.num_failures = 0
                     if first_registration:
@@ -121,15 +133,16 @@ class _FlashManager:
                         )
                         first_registration = False
                 else:
-                    logger.error(
-                        f"[Modal Flash] Deregistering container {self.task_id} on {self.tunnel.url} "
-                        f"due to error: {port_check_error}, num_failures: {self.num_failures}"
-                    )
-                    self.num_failures += 1
-                    await retry_transient_errors(
-                        self.client.stub.FlashContainerDeregister,
-                        api_pb2.FlashContainerDeregisterRequest(),
-                    )
+                    if first_registration and (time.monotonic() - start_time < self.startup_timeout):
+                        continue
+                    else:
+                        logger.error(
+                            f"[Modal Flash] Deregistering container {self.task_id} on {self.tunnel.url} "
+                            f"due to error: {port_check_error}, num_failures: {self.num_failures}"
+                        )
+                        self.num_failures += 1
+                        await self.client.stub.FlashContainerDeregister(api_pb2.FlashContainerDeregisterRequest())
             except asyncio.CancelledError:
                 logger.warning("[Modal Flash] Shutting down...")
                 break
@@ -147,12 +160,12 @@ class _FlashManager:
         return self.tunnel.url
     async def stop(self):
-        self.heartbeat_task.cancel()
-        await retry_transient_errors(
-            self.client.stub.FlashContainerDeregister,
-            api_pb2.FlashContainerDeregisterRequest(),
-        )
+        try:
+            self.heartbeat_task.cancel()
+        except Exception as e:
+            logger.error(f"[Modal Flash] Error stopping: {e}")
+        await self.client.stub.FlashContainerDeregister(api_pb2.FlashContainerDeregisterRequest())
         self.stopped = True
         logger.warning(f"[Modal Flash] No longer accepting new requests on {self.tunnel.url}.")
@@ -163,18 +176,23 @@ class _FlashManager:
         if not self.stopped:
             await self.stop()
+        await asyncio.sleep(self.exit_grace_period)
         logger.warning(f"[Modal Flash] Closing tunnel on {self.tunnel.url}.")
         await self.tunnel_manager.__aexit__(*sys.exc_info())
-FlashManager = synchronize_api(_FlashManager)
+FlashManager = synchronize_api(_FlashManager, target_module=__name__)
 @synchronizer.create_blocking
 async def flash_forward(
     port: int,
-    process: Optional[subprocess.Popen] = None,
+    process: Optional[subprocess.Popen] = None,  # to be deprecated
     health_check_url: Optional[str] = None,
+    startup_timeout: int = 30,
+    exit_grace_period: int = 0,
+    h2_enabled: bool = False,
 ) -> _FlashManager:
     """
     Forward a port to the Modal Flash service, exposing that port as a stable web endpoint.
@@ -183,7 +201,15 @@ async def flash_forward(
     """
     client = await _Client.from_env()
-    manager = _FlashManager(client, port, process=process, health_check_url=health_check_url)
+    manager = _FlashManager(
+        client,
+        port,
+        process=process,
+        health_check_url=health_check_url,
+        startup_timeout=startup_timeout,
+        exit_grace_period=exit_grace_period,
+        h2_enabled=h2_enabled,
+    )
     await manager._start()
     return manager
@@ -321,7 +347,7 @@ class _FlashPrometheusAutoscaler:
     async def _compute_target_containers(self, current_replicas: int) -> int:
         """
-        Gets internal metrics from container to autoscale up or down.
+        Gets metrics from container to autoscale up or down.
         """
         containers = await self._get_all_containers()
         if len(containers) > current_replicas:
@@ -334,7 +360,7 @@ class _FlashPrometheusAutoscaler:
         if current_replicas == 0:
             return 1
-        # Get metrics based on autoscaler type (prometheus or internal)
+        # Get metrics based on autoscaler type
         sum_metric, n_containers_with_metrics = await self._get_scaling_info(containers)
         desired_replicas = self._calculate_desired_replicas(
@@ -406,39 +432,26 @@ class _FlashPrometheusAutoscaler:
         return desired_replicas
     async def _get_scaling_info(self, containers) -> tuple[float, int]:
-        """Get metrics using either internal container metrics API or prometheus HTTP endpoints."""
-        if self.metrics_endpoint == "internal":
-            container_metrics_results = await asyncio.gather(
-                *[self._get_container_metrics(container.task_id) for container in containers]
-            )
-            container_metrics_list = []
-            for container_metric in container_metrics_results:
-                if container_metric is None:
-                    continue
-                container_metrics_list.append(getattr(container_metric.metrics, self.target_metric))
-            sum_metric = sum(container_metrics_list)
-            n_containers_with_metrics = len(container_metrics_list)
-        else:
-            sum_metric = 0
-            n_containers_with_metrics = 0
-            container_metrics_list = await asyncio.gather(
-                *[
-                    self._get_metrics(f"https://{container.host}:{container.port}/{self.metrics_endpoint}")
-                    for container in containers
-                ]
-            )
+        """Get metrics using container exposed metrics endpoints."""
+        sum_metric = 0
+        n_containers_with_metrics = 0
+        container_metrics_list = await asyncio.gather(
+            *[
+                self._get_metrics(f"https://{container.host}:{container.port}/{self.metrics_endpoint}")
+                for container in containers
+            ]
+        )
-            for container_metrics in container_metrics_list:
-                if (
-                    container_metrics is None
-                    or self.target_metric not in container_metrics
-                    or len(container_metrics[self.target_metric]) == 0
-                ):
-                    continue
-                sum_metric += container_metrics[self.target_metric][0].value
-                n_containers_with_metrics += 1
+        for container_metrics in container_metrics_list:
+            if (
+                container_metrics is None
+                or self.target_metric not in container_metrics
+                or len(container_metrics[self.target_metric]) == 0
+            ):
+                continue
+            sum_metric += container_metrics[self.target_metric][0].value
+            n_containers_with_metrics += 1
         return sum_metric, n_containers_with_metrics
@@ -474,23 +487,14 @@ class _FlashPrometheusAutoscaler:
         return metrics
-    async def _get_container_metrics(self, container_id: str) -> Optional[api_pb2.TaskGetAutoscalingMetricsResponse]:
-        req = api_pb2.TaskGetAutoscalingMetricsRequest(task_id=container_id)
-        try:
-            resp = await retry_transient_errors(self.client.stub.TaskGetAutoscalingMetrics, req)
-            return resp
-        except Exception as e:
-            logger.warning(f"[Modal Flash] Error getting metrics for container {container_id}: {e}")
-            return None
     async def _get_all_containers(self):
         req = api_pb2.FlashContainerListRequest(function_id=self.fn.object_id)
-        resp = await retry_transient_errors(self.client.stub.FlashContainerList, req)
+        resp = await self.client.stub.FlashContainerList(req)
         return resp.containers
     async def _set_target_slots(self, target_slots: int):
         req = api_pb2.FlashSetTargetSlotsMetricsRequest(function_id=self.fn.object_id, target_slots=target_slots)
-        await retry_transient_errors(self.client.stub.FlashSetTargetSlotsMetrics, req)
+        await self.client.stub.FlashSetTargetSlotsMetrics(req)
         return
     def _make_scaling_decision(
@@ -572,14 +576,10 @@ async def flash_prometheus_autoscaler(
     app_name: str,
     cls_name: str,
     # Endpoint to fetch metrics from. Must be in Prometheus format. Example: "/metrics"
-    # If metrics_endpoint is "internal", we will use containers' internal metrics to autoscale instead.
     metrics_endpoint: str,
     # Target metric to autoscale on. Example: "vllm:num_requests_running"
-    # If metrics_endpoint is "internal", target_metrics options are: [cpu_usage_percent, memory_usage_percent]
     target_metric: str,
     # Target metric value. Example: 25
-    # If metrics_endpoint is "internal", target_metric_value is a percentage value between 0.1 and 1.0 (inclusive),
-    # indicating container's usage of that metric.
     target_metric_value: float,
     min_containers: Optional[int] = None,
     max_containers: Optional[int] = None,
@@ -645,5 +645,92 @@ async def flash_get_containers(app_name: str, cls_name: str) -> list[dict[str, A
     assert fn is not None
     await fn.hydrate(client=client)
     req = api_pb2.FlashContainerListRequest(function_id=fn.object_id)
-    resp = await retry_transient_errors(client.stub.FlashContainerList, req)
+    resp = await client.stub.FlashContainerList(req)
     return resp.containers
+def _http_server(
+    port: Optional[int] = None,
+    *,
+    proxy_regions: list[str] = [],  # The regions to proxy the HTTP server to.
+    startup_timeout: int = 30,  # Maximum number of seconds to wait for the HTTP server to start.
+    exit_grace_period: Optional[int] = None,  # The time to wait for the HTTP server to exit gracefully.
+    h2_enabled: bool = False,  # Whether to enable HTTP/2 support.
+):
+    """Decorator for Flash-enabled HTTP servers on Modal classes.
+    Args:
+        port: The local port to forward to the HTTP server.
+        proxy_regions: The regions to proxy the HTTP server to.
+        startup_timeout: The maximum time to wait for the HTTP server to start.
+        exit_grace_period: The time to wait for the HTTP server to exit gracefully.
+    """
+    if port is None:
+        raise InvalidError(
+            "Positional arguments are not allowed. Did you forget parentheses? Suggestion: `@modal.http_server()`."
+        )
+    if not isinstance(port, int) or port < 1 or port > 65535:
+        raise InvalidError("First argument of `@http_server` must be a local port, such as `@http_server(8000)`.")
+    if startup_timeout <= 0:
+        raise InvalidError("The `startup_timeout` argument of `@http_server` must be positive.")
+    if exit_grace_period is not None and exit_grace_period < 0:
+        raise InvalidError("The `exit_grace_period` argument of `@http_server` must be non-negative.")
+    from modal._partial_function import _PartialFunction, _PartialFunctionParams
+    params = _PartialFunctionParams(
+        http_config=api_pb2.HTTPConfig(
+            port=port,
+            proxy_regions=proxy_regions,
+            startup_timeout=startup_timeout or 0,
+            exit_grace_period=exit_grace_period or 0,
+            h2_enabled=h2_enabled,
+        )
+    )
+    def wrapper(obj: Union[Callable[..., Any], _PartialFunction]) -> _PartialFunction:
+        flags = _PartialFunctionFlags.HTTP_WEB_INTERFACE
+        if isinstance(obj, _PartialFunction):
+            pf = obj.stack(flags, params)
+        else:
+            pf = _PartialFunction(obj, flags, params)
+        pf.validate_obj_compatibility("`http_server`")
+        return pf
+    return wrapper
+http_server = synchronize_api(_http_server, target_module=__name__)
+class _FlashContainerEntry:
+    """
+    A class that manages the lifecycle of Flash manager for Flash containers.
+    It is intentional that stop() runs before exit handlers and close().
+    This ensures the container is deregistered first, preventing new requests from being routed to it
+    while exit handlers execute and the exit grace period elapses, before finally closing the tunnel.
+    """
+    def __init__(self, http_config: api_pb2.HTTPConfig):
+        self.http_config: api_pb2.HTTPConfig = http_config
+        self.flash_manager: Optional[FlashManager] = None  # type: ignore
+    def enter(self):
+        if self.http_config != api_pb2.HTTPConfig():
+            self.flash_manager = flash_forward(
+                self.http_config.port,
+                startup_timeout=self.http_config.startup_timeout,
+                exit_grace_period=self.http_config.exit_grace_period,
+                h2_enabled=self.http_config.h2_enabled,
+            )
+    def stop(self):
+        if self.flash_manager:
+            self.flash_manager.stop()
+    def close(self):
+        if self.flash_manager:
+            self.flash_manager.close()

modal/experimental/flash.pyi CHANGED Viewed

@@ -11,6 +11,9 @@ class _FlashManager:
         port: int,
         process: typing.Optional[subprocess.Popen] = None,
         health_check_url: typing.Optional[str] = None,
+        startup_timeout: int = 30,
+        exit_grace_period: int = 0,
+        h2_enabled: bool = False,
     ):
         """Initialize self.  See help(type(self)) for accurate signature."""
         ...
@@ -28,8 +31,6 @@ class _FlashManager:
     async def stop(self): ...
     async def close(self): ...
-SUPERSELF = typing.TypeVar("SUPERSELF", covariant=True)
 class FlashManager:
     def __init__(
         self,
@@ -37,9 +38,12 @@ class FlashManager:
         port: int,
         process: typing.Optional[subprocess.Popen] = None,
         health_check_url: typing.Optional[str] = None,
+        startup_timeout: int = 30,
+        exit_grace_period: int = 0,
+        h2_enabled: bool = False,
     ): ...
-    class __is_port_connection_healthy_spec(typing_extensions.Protocol[SUPERSELF]):
+    class __is_port_connection_healthy_spec(typing_extensions.Protocol):
         def __call__(
             self, /, process: typing.Optional[subprocess.Popen], timeout: float = 0.5
         ) -> tuple[bool, typing.Optional[Exception]]: ...
@@ -47,15 +51,15 @@ class FlashManager:
             self, /, process: typing.Optional[subprocess.Popen], timeout: float = 0.5
         ) -> tuple[bool, typing.Optional[Exception]]: ...
-    is_port_connection_healthy: __is_port_connection_healthy_spec[typing_extensions.Self]
+    is_port_connection_healthy: __is_port_connection_healthy_spec
-    class ___start_spec(typing_extensions.Protocol[SUPERSELF]):
+    class ___start_spec(typing_extensions.Protocol):
         def __call__(self, /): ...
         async def aio(self, /): ...
-    _start: ___start_spec[typing_extensions.Self]
+    _start: ___start_spec
-    class ___drain_container_spec(typing_extensions.Protocol[SUPERSELF]):
+    class ___drain_container_spec(typing_extensions.Protocol):
         def __call__(self, /):
             """Background task that checks if we've encountered too many failures and drains the container if so."""
             ...
@@ -64,27 +68,27 @@ class FlashManager:
             """Background task that checks if we've encountered too many failures and drains the container if so."""
             ...
-    _drain_container: ___drain_container_spec[typing_extensions.Self]
+    _drain_container: ___drain_container_spec
-    class ___run_heartbeat_spec(typing_extensions.Protocol[SUPERSELF]):
+    class ___run_heartbeat_spec(typing_extensions.Protocol):
         def __call__(self, /, host: str, port: int): ...
         async def aio(self, /, host: str, port: int): ...
-    _run_heartbeat: ___run_heartbeat_spec[typing_extensions.Self]
+    _run_heartbeat: ___run_heartbeat_spec
     def get_container_url(self): ...
-    class __stop_spec(typing_extensions.Protocol[SUPERSELF]):
+    class __stop_spec(typing_extensions.Protocol):
         def __call__(self, /): ...
         async def aio(self, /): ...
-    stop: __stop_spec[typing_extensions.Self]
+    stop: __stop_spec
-    class __close_spec(typing_extensions.Protocol[SUPERSELF]):
+    class __close_spec(typing_extensions.Protocol):
         def __call__(self, /): ...
         async def aio(self, /): ...
-    close: __close_spec[typing_extensions.Self]
+    close: __close_spec
 class __flash_forward_spec(typing_extensions.Protocol):
     def __call__(
@@ -93,6 +97,9 @@ class __flash_forward_spec(typing_extensions.Protocol):
         port: int,
         process: typing.Optional[subprocess.Popen] = None,
         health_check_url: typing.Optional[str] = None,
+        startup_timeout: int = 30,
+        exit_grace_period: int = 0,
+        h2_enabled: bool = False,
     ) -> FlashManager:
         """Forward a port to the Modal Flash service, exposing that port as a stable web endpoint.
         This is a highly experimental method that can break or be removed at any time without warning.
@@ -106,6 +113,9 @@ class __flash_forward_spec(typing_extensions.Protocol):
         port: int,
         process: typing.Optional[subprocess.Popen] = None,
         health_check_url: typing.Optional[str] = None,
+        startup_timeout: int = 30,
+        exit_grace_period: int = 0,
+        h2_enabled: bool = False,
     ) -> FlashManager:
         """Forward a port to the Modal Flash service, exposing that port as a stable web endpoint.
         This is a highly experimental method that can break or be removed at any time without warning.
@@ -139,7 +149,7 @@ class _FlashPrometheusAutoscaler:
     async def start(self): ...
     async def _run_autoscaler_loop(self): ...
     async def _compute_target_containers(self, current_replicas: int) -> int:
-        """Gets internal metrics from container to autoscale up or down."""
+        """Gets metrics from container to autoscale up or down."""
         ...
     def _calculate_desired_replicas(
@@ -154,13 +164,10 @@ class _FlashPrometheusAutoscaler:
         ...
     async def _get_scaling_info(self, containers) -> tuple[float, int]:
-        """Get metrics using either internal container metrics API or prometheus HTTP endpoints."""
+        """Get metrics using container exposed metrics endpoints."""
         ...
     async def _get_metrics(self, url: str) -> typing.Optional[dict[str, list[typing.Any]]]: ...
-    async def _get_container_metrics(
-        self, container_id: str
-    ) -> typing.Optional[modal_proto.api_pb2.TaskGetAutoscalingMetricsResponse]: ...
     async def _get_all_containers(self): ...
     async def _set_target_slots(self, target_slots: int): ...
     def _make_scaling_decision(
@@ -212,28 +219,28 @@ class FlashPrometheusAutoscaler:
         autoscaling_interval_seconds: int,
     ): ...
-    class __start_spec(typing_extensions.Protocol[SUPERSELF]):
+    class __start_spec(typing_extensions.Protocol):
         def __call__(self, /): ...
         async def aio(self, /): ...
-    start: __start_spec[typing_extensions.Self]
+    start: __start_spec
-    class ___run_autoscaler_loop_spec(typing_extensions.Protocol[SUPERSELF]):
+    class ___run_autoscaler_loop_spec(typing_extensions.Protocol):
         def __call__(self, /): ...
         async def aio(self, /): ...
-    _run_autoscaler_loop: ___run_autoscaler_loop_spec[typing_extensions.Self]
+    _run_autoscaler_loop: ___run_autoscaler_loop_spec
-    class ___compute_target_containers_spec(typing_extensions.Protocol[SUPERSELF]):
+    class ___compute_target_containers_spec(typing_extensions.Protocol):
         def __call__(self, /, current_replicas: int) -> int:
-            """Gets internal metrics from container to autoscale up or down."""
+            """Gets metrics from container to autoscale up or down."""
             ...
         async def aio(self, /, current_replicas: int) -> int:
-            """Gets internal metrics from container to autoscale up or down."""
+            """Gets metrics from container to autoscale up or down."""
             ...
-    _compute_target_containers: ___compute_target_containers_spec[typing_extensions.Self]
+    _compute_target_containers: ___compute_target_containers_spec
     def _calculate_desired_replicas(
         self,
@@ -246,44 +253,34 @@ class FlashPrometheusAutoscaler:
         """Calculate the desired number of replicas to autoscale to."""
         ...
-    class ___get_scaling_info_spec(typing_extensions.Protocol[SUPERSELF]):
+    class ___get_scaling_info_spec(typing_extensions.Protocol):
         def __call__(self, /, containers) -> tuple[float, int]:
-            """Get metrics using either internal container metrics API or prometheus HTTP endpoints."""
+            """Get metrics using container exposed metrics endpoints."""
             ...
         async def aio(self, /, containers) -> tuple[float, int]:
-            """Get metrics using either internal container metrics API or prometheus HTTP endpoints."""
+            """Get metrics using container exposed metrics endpoints."""
             ...
-    _get_scaling_info: ___get_scaling_info_spec[typing_extensions.Self]
+    _get_scaling_info: ___get_scaling_info_spec
-    class ___get_metrics_spec(typing_extensions.Protocol[SUPERSELF]):
+    class ___get_metrics_spec(typing_extensions.Protocol):
         def __call__(self, /, url: str) -> typing.Optional[dict[str, list[typing.Any]]]: ...
         async def aio(self, /, url: str) -> typing.Optional[dict[str, list[typing.Any]]]: ...
-    _get_metrics: ___get_metrics_spec[typing_extensions.Self]
-    class ___get_container_metrics_spec(typing_extensions.Protocol[SUPERSELF]):
-        def __call__(
-            self, /, container_id: str
-        ) -> typing.Optional[modal_proto.api_pb2.TaskGetAutoscalingMetricsResponse]: ...
-        async def aio(
-            self, /, container_id: str
-        ) -> typing.Optional[modal_proto.api_pb2.TaskGetAutoscalingMetricsResponse]: ...
-    _get_container_metrics: ___get_container_metrics_spec[typing_extensions.Self]
+    _get_metrics: ___get_metrics_spec
-    class ___get_all_containers_spec(typing_extensions.Protocol[SUPERSELF]):
+    class ___get_all_containers_spec(typing_extensions.Protocol):
         def __call__(self, /): ...
         async def aio(self, /): ...
-    _get_all_containers: ___get_all_containers_spec[typing_extensions.Self]
+    _get_all_containers: ___get_all_containers_spec
-    class ___set_target_slots_spec(typing_extensions.Protocol[SUPERSELF]):
+    class ___set_target_slots_spec(typing_extensions.Protocol):
         def __call__(self, /, target_slots: int): ...
         async def aio(self, /, target_slots: int): ...
-    _set_target_slots: ___set_target_slots_spec[typing_extensions.Self]
+    _set_target_slots: ___set_target_slots_spec
     def _make_scaling_decision(
         self,
@@ -313,11 +310,11 @@ class FlashPrometheusAutoscaler:
         """
         ...
-    class __stop_spec(typing_extensions.Protocol[SUPERSELF]):
+    class __stop_spec(typing_extensions.Protocol):
         def __call__(self, /): ...
         async def aio(self, /): ...
-    stop: __stop_spec[typing_extensions.Self]
+    stop: __stop_spec
 class __flash_prometheus_autoscaler_spec(typing_extensions.Protocol):
     def __call__(
@@ -392,3 +389,54 @@ class __flash_get_containers_spec(typing_extensions.Protocol):
         ...
 flash_get_containers: __flash_get_containers_spec
+def _http_server(
+    port: typing.Optional[int] = None,
+    *,
+    proxy_regions: list[str] = [],
+    startup_timeout: int = 30,
+    exit_grace_period: typing.Optional[int] = None,
+    h2_enabled: bool = False,
+):
+    """Decorator for Flash-enabled HTTP servers on Modal classes.
+    Args:
+        port: The local port to forward to the HTTP server.
+        proxy_regions: The regions to proxy the HTTP server to.
+        startup_timeout: The maximum time to wait for the HTTP server to start.
+        exit_grace_period: The time to wait for the HTTP server to exit gracefully.
+    """
+    ...
+def http_server(
+    port: typing.Optional[int] = None,
+    *,
+    proxy_regions: list[str] = [],
+    startup_timeout: int = 30,
+    exit_grace_period: typing.Optional[int] = None,
+    h2_enabled: bool = False,
+):
+    """Decorator for Flash-enabled HTTP servers on Modal classes.
+    Args:
+        port: The local port to forward to the HTTP server.
+        proxy_regions: The regions to proxy the HTTP server to.
+        startup_timeout: The maximum time to wait for the HTTP server to start.
+        exit_grace_period: The time to wait for the HTTP server to exit gracefully.
+    """
+    ...
+class _FlashContainerEntry:
+    """A class that manages the lifecycle of Flash manager for Flash containers.
+    It is intentional that stop() runs before exit handlers and close().
+    This ensures the container is deregistered first, preventing new requests from being routed to it
+    while exit handlers execute and the exit grace period elapses, before finally closing the tunnel.
+    """
+    def __init__(self, http_config: modal_proto.api_pb2.HTTPConfig):
+        """Initialize self.  See help(type(self)) for accurate signature."""
+        ...
+    def enter(self): ...
+    def stop(self): ...
+    def close(self): ...

modal 1.1.5.dev66__py3-none-any.whl → 1.3.1.dev8__py3-none-any.whl

Potentially problematic release.

modal 1.1.5.dev66py3-none-any.whl → 1.3.1.dev8py3-none-any.whl