PyPI - modal - Versions diffs - 0.62.16__py3-none-any.whl → 0.72.11__py3-none-any.whl - Mend

modal 0.62.16py3-none-any.whl → 0.72.11py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (220) hide show

modal/__init__.py +17 -13
modal/__main__.py +41 -3
modal/_clustered_functions.py +80 -0
modal/_clustered_functions.pyi +22 -0
modal/_container_entrypoint.py +420 -937
modal/_ipython.py +3 -13
modal/_location.py +17 -10
modal/_output.py +243 -99
modal/_pty.py +2 -2
modal/_resolver.py +55 -59
modal/_resources.py +51 -0
modal/_runtime/__init__.py +1 -0
modal/_runtime/asgi.py +519 -0
modal/_runtime/container_io_manager.py +1036 -0
modal/_runtime/execution_context.py +89 -0
modal/_runtime/telemetry.py +169 -0
modal/_runtime/user_code_imports.py +356 -0
modal/_serialization.py +134 -9
modal/_traceback.py +47 -187
modal/_tunnel.py +52 -16
modal/_tunnel.pyi +19 -36
modal/_utils/app_utils.py +3 -17
modal/_utils/async_utils.py +479 -100
modal/_utils/blob_utils.py +157 -186
modal/_utils/bytes_io_segment_payload.py +97 -0
modal/_utils/deprecation.py +89 -0
modal/_utils/docker_utils.py +98 -0
modal/_utils/function_utils.py +460 -171
modal/_utils/grpc_testing.py +47 -31
modal/_utils/grpc_utils.py +62 -109
modal/_utils/hash_utils.py +61 -19
modal/_utils/http_utils.py +39 -9
modal/_utils/logger.py +2 -1
modal/_utils/mount_utils.py +34 -16
modal/_utils/name_utils.py +58 -0
modal/_utils/package_utils.py +14 -1
modal/_utils/pattern_utils.py +205 -0
modal/_utils/rand_pb_testing.py +5 -7
modal/_utils/shell_utils.py +15 -49
modal/_vendor/a2wsgi_wsgi.py +62 -72
modal/_vendor/cloudpickle.py +1 -1
modal/_watcher.py +14 -12
modal/app.py +1003 -314
modal/app.pyi +540 -264
modal/call_graph.py +7 -6
modal/cli/_download.py +63 -53
modal/cli/_traceback.py +200 -0
modal/cli/app.py +205 -45
modal/cli/config.py +12 -5
modal/cli/container.py +62 -14
modal/cli/dict.py +128 -0
modal/cli/entry_point.py +26 -13
modal/cli/environment.py +40 -9
modal/cli/import_refs.py +64 -58
modal/cli/launch.py +32 -18
modal/cli/network_file_system.py +64 -83
modal/cli/profile.py +1 -1
modal/cli/programs/run_jupyter.py +35 -10
modal/cli/programs/vscode.py +60 -10
modal/cli/queues.py +131 -0
modal/cli/run.py +234 -131
modal/cli/secret.py +8 -7
modal/cli/token.py +7 -2
modal/cli/utils.py +79 -10
modal/cli/volume.py +110 -109
modal/client.py +250 -144
modal/client.pyi +157 -118
modal/cloud_bucket_mount.py +108 -34
modal/cloud_bucket_mount.pyi +32 -38
modal/cls.py +535 -148
modal/cls.pyi +190 -146
modal/config.py +41 -19
modal/container_process.py +177 -0
modal/container_process.pyi +82 -0
modal/dict.py +111 -65
modal/dict.pyi +136 -131
modal/environments.py +106 -5
modal/environments.pyi +77 -25
modal/exception.py +34 -43
modal/experimental.py +61 -2
modal/extensions/ipython.py +5 -5
modal/file_io.py +537 -0
modal/file_io.pyi +235 -0
modal/file_pattern_matcher.py +197 -0
modal/functions.py +906 -911
modal/functions.pyi +466 -430
modal/gpu.py +57 -44
modal/image.py +1089 -479
modal/image.pyi +584 -228
modal/io_streams.py +434 -0
modal/io_streams.pyi +122 -0
modal/mount.py +314 -101
modal/mount.pyi +241 -235
modal/network_file_system.py +92 -92
modal/network_file_system.pyi +152 -110
modal/object.py +67 -36
modal/object.pyi +166 -143
modal/output.py +63 -0
modal/parallel_map.py +434 -0
modal/parallel_map.pyi +75 -0
modal/partial_function.py +282 -117
modal/partial_function.pyi +222 -129
modal/proxy.py +15 -12
modal/proxy.pyi +3 -8
modal/queue.py +182 -65
modal/queue.pyi +218 -118
modal/requirements/2024.04.txt +29 -0
modal/requirements/2024.10.txt +16 -0
modal/requirements/README.md +21 -0
modal/requirements/base-images.json +22 -0
modal/retries.py +48 -7
modal/runner.py +459 -156
modal/runner.pyi +135 -71
modal/running_app.py +38 -0
modal/sandbox.py +514 -236
modal/sandbox.pyi +397 -169
modal/schedule.py +4 -4
modal/scheduler_placement.py +20 -3
modal/secret.py +56 -31
modal/secret.pyi +62 -42
modal/serving.py +51 -56
modal/serving.pyi +44 -36
modal/stream_type.py +15 -0
modal/token_flow.py +5 -3
modal/token_flow.pyi +37 -32
modal/volume.py +285 -157
modal/volume.pyi +249 -184
{modal-0.62.16.dist-info → modal-0.72.11.dist-info}/METADATA +7 -7
modal-0.72.11.dist-info/RECORD +174 -0
{modal-0.62.16.dist-info → modal-0.72.11.dist-info}/top_level.txt +0 -1
modal_docs/gen_reference_docs.py +3 -1
modal_docs/mdmd/mdmd.py +0 -1
modal_docs/mdmd/signatures.py +5 -2
modal_global_objects/images/base_images.py +28 -0
modal_global_objects/mounts/python_standalone.py +2 -2
modal_proto/__init__.py +1 -1
modal_proto/api.proto +1288 -533
modal_proto/api_grpc.py +856 -456
modal_proto/api_pb2.py +2165 -1157
modal_proto/api_pb2.pyi +8859 -0
modal_proto/api_pb2_grpc.py +1674 -855
modal_proto/api_pb2_grpc.pyi +1416 -0
modal_proto/modal_api_grpc.py +149 -0
modal_proto/modal_options_grpc.py +3 -0
modal_proto/options_pb2.pyi +20 -0
modal_proto/options_pb2_grpc.pyi +7 -0
modal_proto/py.typed +0 -0
modal_version/__init__.py +1 -1
modal_version/_version_generated.py +2 -2
modal/_asgi.py +0 -370
modal/_container_entrypoint.pyi +0 -378
modal/_container_exec.py +0 -128
modal/_sandbox_shell.py +0 -49
modal/shared_volume.py +0 -23
modal/shared_volume.pyi +0 -24
modal/stub.py +0 -783
modal/stub.pyi +0 -332
modal-0.62.16.dist-info/RECORD +0 -198
modal_global_objects/images/conda.py +0 -15
modal_global_objects/images/debian_slim.py +0 -15
modal_global_objects/images/micromamba.py +0 -15
test/__init__.py +0 -1
test/aio_test.py +0 -12
test/async_utils_test.py +0 -262
test/blob_test.py +0 -67
test/cli_imports_test.py +0 -149
test/cli_test.py +0 -659
test/client_test.py +0 -194
test/cls_test.py +0 -630
test/config_test.py +0 -137
test/conftest.py +0 -1420
test/container_app_test.py +0 -32
test/container_test.py +0 -1389
test/cpu_test.py +0 -23
test/decorator_test.py +0 -85
test/deprecation_test.py +0 -34
test/dict_test.py +0 -33
test/e2e_test.py +0 -68
test/error_test.py +0 -7
test/function_serialization_test.py +0 -32
test/function_test.py +0 -653
test/function_utils_test.py +0 -101
test/gpu_test.py +0 -159
test/grpc_utils_test.py +0 -141
test/helpers.py +0 -42
test/image_test.py +0 -669
test/live_reload_test.py +0 -80
test/lookup_test.py +0 -70
test/mdmd_test.py +0 -329
test/mount_test.py +0 -162
test/mounted_files_test.py +0 -329
test/network_file_system_test.py +0 -181
test/notebook_test.py +0 -66
test/object_test.py +0 -41
test/package_utils_test.py +0 -25
test/queue_test.py +0 -97
test/resolver_test.py +0 -58
test/retries_test.py +0 -67
test/runner_test.py +0 -85
test/sandbox_test.py +0 -191
test/schedule_test.py +0 -15
test/scheduler_placement_test.py +0 -29
test/secret_test.py +0 -78
test/serialization_test.py +0 -42
test/stub_composition_test.py +0 -10
test/stub_test.py +0 -360
test/test_asgi_wrapper.py +0 -234
test/token_flow_test.py +0 -18
test/traceback_test.py +0 -135
test/tunnel_test.py +0 -29
test/utils_test.py +0 -88
test/version_test.py +0 -14
test/volume_test.py +0 -341
test/watcher_test.py +0 -30
test/webhook_test.py +0 -146
/modal/{requirements.312.txt → requirements/2023.12.312.txt} +0 -0
/modal/{requirements.txt → requirements/2023.12.txt} +0 -0
{modal-0.62.16.dist-info → modal-0.72.11.dist-info}/LICENSE +0 -0
{modal-0.62.16.dist-info → modal-0.72.11.dist-info}/WHEEL +0 -0
{modal-0.62.16.dist-info → modal-0.72.11.dist-info}/entry_points.txt +0 -0

modal/_container_entrypoint.py CHANGED Viewed

@@ -1,63 +1,103 @@
 # Copyright Modal Labs 2022
-from __future__ import annotations
+# ruff: noqa: E402
+import os
+from modal._runtime.user_code_imports import Service, import_class_service, import_single_function_service
+telemetry_socket = os.environ.get("MODAL_TELEMETRY_SOCKET")
+if telemetry_socket:
+    from ._runtime.telemetry import instrument_imports
+    instrument_imports(telemetry_socket)
 import asyncio
-import base64
-import contextlib
-import importlib
+import concurrent.futures
 import inspect
-import json
-import math
-import os
+import queue
 import signal
 import sys
 import threading
 import time
-import traceback
-from collections.abc import Iterable
-from dataclasses import dataclass
-from pathlib import Path
-from typing import TYPE_CHECKING, Any, AsyncGenerator, AsyncIterator, Callable, List, Optional, Set, Tuple, Type
+from collections.abc import Sequence
+from typing import TYPE_CHECKING, Any, Callable, Optional
-from grpclib import Status
+from google.protobuf.message import Message
+from modal._clustered_functions import initialize_clustered_function
+from modal._proxy_tunnel import proxy_tunnel
+from modal._serialization import deserialize, deserialize_proto_params
+from modal._utils.async_utils import TaskContext, synchronizer
+from modal._utils.function_utils import (
+    callable_has_non_self_params,
+)
+from modal.app import App, _App
+from modal.client import Client, _Client
+from modal.config import logger
+from modal.exception import ExecutionError, InputCancellation, InvalidError
+from modal.partial_function import (
+    _find_callables_for_obj,
+    _PartialFunctionFlags,
+)
+from modal.running_app import RunningApp
 from modal_proto import api_pb2
-from ._asgi import (
-    asgi_app_wrapper,
-    get_ip_address,
-    wait_for_web_server,
-    web_server_proxy,
-    webhook_asgi_app,
-    wsgi_app_wrapper,
+from ._runtime.container_io_manager import (
+    ContainerIOManager,
+    IOContext,
+    UserException,
+    _ContainerIOManager,
 )
-from ._proxy_tunnel import proxy_tunnel
-from ._serialization import deserialize, deserialize_data_format, serialize, serialize_data_format
-from ._traceback import extract_traceback
-from ._utils.async_utils import TaskContext, asyncify, synchronize_api, synchronizer
-from ._utils.blob_utils import MAX_OBJECT_SIZE_BYTES, blob_download, blob_upload
-from ._utils.function_utils import LocalFunctionError, is_async as get_is_async, is_global_function, method_has_params
-from ._utils.grpc_utils import retry_transient_errors
-from .app import ContainerApp, _container_app, _ContainerApp, interact
-from .client import HEARTBEAT_INTERVAL, HEARTBEAT_TIMEOUT, Client, _Client
-from .cls import Cls
-from .config import config, logger
-from .exception import InputCancellation, InvalidError
-from .functions import Function, _Function, _set_current_context_ids, _stream_function_call_data
-from .partial_function import _find_callables_for_obj, _PartialFunctionFlags
-from .stub import _Stub
+from ._runtime.execution_context import _set_current_context_ids
 if TYPE_CHECKING:
-    from types import ModuleType
+    import modal._runtime.container_io_manager
+    import modal.object
+class DaemonizedThreadPool:
+    # Used instead of ThreadPoolExecutor, since the latter won't allow
+    # the interpreter to shut down before the currently running tasks
+    # have finished
+    def __init__(self, max_threads: int):
+        self.max_threads = max_threads
+    def __enter__(self):
+        self.spawned_workers = 0
+        self.inputs: queue.Queue[Any] = queue.Queue()
+        self.finished = threading.Event()
+        return self
+    def __exit__(self, exc_type, exc_value, traceback):
+        self.finished.set()
-MAX_OUTPUT_BATCH_SIZE: int = 49
+        if exc_type is None:
+            self.inputs.join()
+        else:
+            # special case - allows us to exit the
+            if self.inputs.unfinished_tasks:
+                logger.info(
+                    f"Exiting DaemonizedThreadPool with {self.inputs.unfinished_tasks} active "
+                    f"inputs due to exception: {repr(exc_type)}"
+                )
-RTT_S: float = 0.5  # conservative estimate of RTT in seconds.
+    def submit(self, func, *args):
+        def worker_thread():
+            while not self.finished.is_set():
+                try:
+                    _func, _args = self.inputs.get(timeout=1)
+                except queue.Empty:
+                    continue
+                try:
+                    _func(*_args)
+                except BaseException:
+                    logger.exception(f"Exception raised by {_func} in DaemonizedThreadPool worker!")
+                self.inputs.task_done()
+        if self.spawned_workers < self.max_threads:
+            threading.Thread(target=worker_thread, daemon=True).start()
+            self.spawned_workers += 1
-class UserException(Exception):
-    # Used to shut down the task gracefully
-    pass
+        self.inputs.put((func, args))
 class UserCodeEventLoop:
@@ -76,14 +116,25 @@ class UserCodeEventLoop:
     def __enter__(self):
         self.loop = asyncio.new_event_loop()
+        self.tasks = set()
         return self
     def __exit__(self, exc_type, exc_value, traceback):
         self.loop.run_until_complete(self.loop.shutdown_asyncgens())
         if sys.version_info[:2] >= (3, 9):
             self.loop.run_until_complete(self.loop.shutdown_default_executor())  # Introduced in Python 3.9
+        for task in self.tasks:
+            task.cancel()
         self.loop.close()
+    def create_task(self, coro):
+        task = self.loop.create_task(coro)
+        self.tasks.add(task)
+        task.add_done_callback(self.tasks.discard)
+        return task
     def run(self, coro):
         task = asyncio.ensure_future(coro, loop=self.loop)
         self._sigints = 0
@@ -99,7 +150,9 @@ class UserCodeEventLoop:
                 # first sigint is graceful
                 task.cancel()
                 return
-            raise KeyboardInterrupt()  # this should normally not happen, but the second sigint would "hard kill" the event loop!
+            # this should normally not happen, but the second sigint would "hard kill" the event loop!
+            raise KeyboardInterrupt()
         ignore_sigint = signal.getsignal(signal.SIGINT) == signal.SIG_IGN
         if not ignore_sigint:
@@ -122,972 +175,381 @@ class UserCodeEventLoop:
                 self.loop.remove_signal_handler(signal.SIGINT)
-class _FunctionIOManager:
-    """Synchronizes all RPC calls and network operations for a running container.
-    TODO: maybe we shouldn't synchronize the whole class.
-    Then we could potentially move a bunch of the global functions onto it.
-    """
-    _GENERATOR_STOP_SENTINEL = object()
-    def __init__(self, container_args: api_pb2.ContainerArguments, client: _Client):
-        self.cancelled_input_ids: Set[str] = set()
-        self.task_id = container_args.task_id
-        self.function_id = container_args.function_id
-        self.app_id = container_args.app_id
-        self.function_def = container_args.function_def
-        self.checkpoint_id = container_args.checkpoint_id
-        self.calls_completed = 0
-        self.total_user_time: float = 0.0
-        self.current_input_id: Optional[str] = None
-        self.current_input_started_at: Optional[float] = None
-        self._input_concurrency: Optional[int] = None
-        self._semaphore: Optional[asyncio.Semaphore] = None
-        self._environment_name = container_args.environment_name
-        self._waiting_for_checkpoint = False
-        self._heartbeat_loop = None
-        self._client = client
-        assert isinstance(self._client, _Client)
-    async def initialize_app(self) -> _ContainerApp:
-        await _container_app.init(self._client, self.app_id, self._environment_name, self.function_def)
-        return _container_app
-    async def _run_heartbeat_loop(self):
-        while 1:
-            t0 = time.monotonic()
-            try:
-                if await self._heartbeat_handle_cancellations():
-                    # got a cancellation event, fine to start another heartbeat immediately
-                    # since the cancellation queue should be empty on the worker server
-                    # however, we wait at least 1s to prevent short-circuiting the heartbeat loop
-                    # in case there is ever a bug. This means it will take at least 1s between
-                    # two subsequent cancellations on the same task at the moment
-                    await asyncio.sleep(1.0)
-                    continue
-            except Exception as exc:
-                # don't stop heartbeat loop if there are transient exceptions!
-                time_elapsed = time.monotonic() - t0
-                error = exc
-                logger.warning(f"Heartbeat attempt failed ({time_elapsed=}, {error=})")
-            heartbeat_duration = time.monotonic() - t0
-            time_until_next_hearbeat = max(0.0, HEARTBEAT_INTERVAL - heartbeat_duration)
-            await asyncio.sleep(time_until_next_hearbeat)
-    async def _heartbeat_handle_cancellations(self) -> bool:
-        # Return True if a cancellation event was received, in that case we shouldn't wait too long for another heartbeat
-        # Don't send heartbeats for tasks waiting to be checkpointed.
-        # Calling gRPC methods open new connections which block the
-        # checkpointing process.
-        if self._waiting_for_checkpoint:
-            return False
-        request = api_pb2.ContainerHeartbeatRequest(supports_graceful_input_cancellation=True)
-        if self.current_input_id is not None:
-            request.current_input_id = self.current_input_id
-        if self.current_input_started_at is not None:
-            request.current_input_started_at = self.current_input_started_at
-        # TODO(erikbern): capture exceptions?
-        response = await retry_transient_errors(
-            self._client.stub.ContainerHeartbeat, request, attempt_timeout=HEARTBEAT_TIMEOUT
-        )
+def call_function(
+    user_code_event_loop: UserCodeEventLoop,
+    container_io_manager: "modal._runtime.container_io_manager.ContainerIOManager",
+    finalized_functions: dict[str, "modal._runtime.user_code_imports.FinalizedFunction"],
+    batch_max_size: int,
+    batch_wait_ms: int,
+):
+    async def run_input_async(io_context: IOContext) -> None:
+        started_at = time.time()
+        input_ids, function_call_ids = io_context.input_ids, io_context.function_call_ids
+        reset_context = _set_current_context_ids(input_ids, function_call_ids)
+        async with container_io_manager.handle_input_exception.aio(io_context, started_at):
+            res = io_context.call_finalized_function()
+            # TODO(erikbern): any exception below shouldn't be considered a user exception
+            if io_context.finalized_function.is_generator:
+                if not inspect.isasyncgen(res):
+                    raise InvalidError(f"Async generator function returned value of type {type(res)}")
-        if response.HasField("cancel_input_event"):
-            # Pause processing of the current input by signaling self a SIGUSR1.
-            input_ids_to_cancel = response.cancel_input_event.input_ids
-            if input_ids_to_cancel:
-                if self._input_concurrency > 1:
-                    logger.info(
-                        "Shutting down task to stop some subset of inputs (concurrent functions don't support fine-grained cancellation)"
+                # Send up to this many outputs at a time.
+                generator_queue: asyncio.Queue[Any] = await container_io_manager._queue_create.aio(1024)
+                generator_output_task = asyncio.create_task(
+                    container_io_manager.generator_output_task.aio(
+                        function_call_ids[0],
+                        io_context.finalized_function.data_format,
+                        generator_queue,
                     )
-                    # This is equivalent to a task cancellation or preemption from worker code,
-                    # except we do not send a SIGKILL to forcefully exit after 30 seconds.
-                    #
-                    # SIGINT always interrupts the main thread, but not any auxiliary threads. On a
-                    # sync function without concurrent inputs, this raises a KeyboardInterrupt. When
-                    # there are concurrent inputs, we cannot interrupt the thread pool, but the
-                    # interpreter stops waiting for daemon threads and exits. On async functions,
-                    # this signal lands outside the event loop, stopping `run_until_complete()`.
-                    os.kill(os.getpid(), signal.SIGINT)
-                elif self.current_input_id in input_ids_to_cancel:
-                    # This goes to a registered signal handler for sync Modal functions, or to the
-                    # `SignalHandlingEventLoop` for async functions.
-                    #
-                    # We only send this signal on functions that do not have concurrent inputs enabled.
-                    # This allows us to do fine-grained input cancellation. On sync functions, the
-                    # SIGUSR1 signal should interrupt the main thread where user code is running,
-                    # raising an InputCancellation() exception. On async functions, the signal should
-                    # reach a handler in SignalHandlingEventLoop, which cancels the task.
-                    os.kill(os.getpid(), signal.SIGUSR1)
-            return True
-        return False
-    @contextlib.asynccontextmanager
-    async def heartbeats(self):
-        async with TaskContext() as tc:
-            self._heartbeat_loop = t = tc.create_task(self._run_heartbeat_loop())
-            t.set_name("heartbeat loop")
-            try:
-                yield
-            finally:
-                t.cancel()
-    def stop_heartbeat(self):
-        if self._heartbeat_loop:
-            self._heartbeat_loop.cancel()
-    async def get_serialized_function(self) -> Tuple[Optional[Any], Callable]:
-        # Fetch the serialized function definition
-        request = api_pb2.FunctionGetSerializedRequest(function_id=self.function_id)
-        response = await self._client.stub.FunctionGetSerialized(request)
-        fun = self.deserialize(response.function_serialized)
-        if response.class_serialized:
-            cls = self.deserialize(response.class_serialized)
-        else:
-            cls = None
-        return cls, fun
-    def serialize(self, obj: Any) -> bytes:
-        return serialize(obj)
-    def deserialize(self, data: bytes) -> Any:
-        return deserialize(data, self._client)
-    @synchronizer.no_io_translation
-    def serialize_data_format(self, obj: Any, data_format: int) -> bytes:
-        return serialize_data_format(obj, data_format)
-    def deserialize_data_format(self, data: bytes, data_format: int) -> Any:
-        return deserialize_data_format(data, data_format, self._client)
-    async def get_data_in(self, function_call_id: str) -> AsyncIterator[Any]:
-        """Read from the `data_in` stream of a function call."""
-        async for data in _stream_function_call_data(self._client, function_call_id, "data_in"):
-            yield data
-    async def put_data_out(
-        self,
-        function_call_id: str,
-        start_index: int,
-        data_format: int,
-        messages_bytes: List[Any],
-    ) -> None:
-        """Put data onto the `data_out` stream of a function call.
-        This is used for generator outputs, which includes web endpoint responses. Note that this
-        was introduced as a performance optimization in client version 0.57, so older clients will
-        still use the previous Postgres-backed system based on `FunctionPutOutputs()`.
-        """
-        data_chunks: List[api_pb2.DataChunk] = []
-        for i, message_bytes in enumerate(messages_bytes):
-            chunk = api_pb2.DataChunk(data_format=data_format, index=start_index + i)  # type: ignore
-            if len(message_bytes) > MAX_OBJECT_SIZE_BYTES:
-                chunk.data_blob_id = await blob_upload(message_bytes, self._client.stub)
-            else:
-                chunk.data = message_bytes
-            data_chunks.append(chunk)
-        req = api_pb2.FunctionCallPutDataRequest(function_call_id=function_call_id, data_chunks=data_chunks)
-        await retry_transient_errors(self._client.stub.FunctionCallPutDataOut, req)
-    async def generator_output_task(self, function_call_id: str, data_format: int, message_rx: asyncio.Queue) -> None:
-        """Task that feeds generator outputs into a function call's `data_out` stream."""
-        index = 1
-        received_sentinel = False
-        while not received_sentinel:
-            message = await message_rx.get()
-            if message is self._GENERATOR_STOP_SENTINEL:
-                break
-            # ASGI 'http.response.start' and 'http.response.body' msgs are observed to be separated by 1ms.
-            # If we don't sleep here for 1ms we end up with an extra call to .put_data_out().
-            if index == 1:
-                await asyncio.sleep(0.001)
-            messages_bytes = [serialize_data_format(message, data_format)]
-            total_size = len(messages_bytes[0]) + 512
-            while total_size < 16 * 1024 * 1024:  # 16 MiB, maximum size in a single message
-                try:
-                    message = message_rx.get_nowait()
-                except asyncio.QueueEmpty:
-                    break
-                if message is self._GENERATOR_STOP_SENTINEL:
-                    received_sentinel = True
-                    break
-                else:
-                    messages_bytes.append(serialize_data_format(message, data_format))
-                    total_size += len(messages_bytes[-1]) + 512  # 512 bytes for estimated framing overhead
-            await self.put_data_out(function_call_id, index, data_format, messages_bytes)
-            index += len(messages_bytes)
-    async def _queue_create(self, size: int) -> asyncio.Queue:
-        """Create a queue, on the synchronicity event loop (needed on Python 3.8 and 3.9)."""
-        return asyncio.Queue(size)
-    async def _queue_put(self, queue: asyncio.Queue, value: Any) -> None:
-        """Put a value onto a queue, using the synchronicity event loop."""
-        await queue.put(value)
-    async def populate_input_blobs(self, item: api_pb2.FunctionInput):
-        args = await blob_download(item.args_blob_id, self._client.stub)
-        # Mutating
-        item.ClearField("args_blob_id")
-        item.args = args
-        return item
-    def get_average_call_time(self) -> float:
-        if self.calls_completed == 0:
-            return 0
-        return self.total_user_time / self.calls_completed
-    def get_max_inputs_to_fetch(self):
-        if self.calls_completed == 0:
-            return 1
-        return math.ceil(RTT_S / max(self.get_average_call_time(), 1e-6))
-    @synchronizer.no_io_translation
-    async def _generate_inputs(self) -> AsyncIterator[Tuple[str, str, api_pb2.FunctionInput]]:
-        request = api_pb2.FunctionGetInputsRequest(function_id=self.function_id)
-        eof_received = False
-        iteration = 0
-        while not eof_received and _container_app.fetching_inputs:
-            request.average_call_time = self.get_average_call_time()
-            request.max_values = self.get_max_inputs_to_fetch()  # Deprecated; remove.
-            request.input_concurrency = self._input_concurrency
-            await self._semaphore.acquire()
-            yielded = False
-            try:
-                # If number of active inputs is at max queue size, this will block.
-                iteration += 1
-                response: api_pb2.FunctionGetInputsResponse = await retry_transient_errors(
-                    self._client.stub.FunctionGetInputs, request
                 )
-                if response.rate_limit_sleep_duration:
-                    logger.info(
-                        "Task exceeded rate limit, sleeping for %.2fs before trying again."
-                        % response.rate_limit_sleep_duration
-                    )
-                    await asyncio.sleep(response.rate_limit_sleep_duration)
-                elif response.inputs:
-                    # for input cancellations and concurrency logic we currently assume
-                    # that there is no input buffering in the container
-                    assert len(response.inputs) == 1
-                    for item in response.inputs:
-                        if item.kill_switch:
-                            logger.debug(f"Task {self.task_id} input kill signal input.")
-                            eof_received = True
-                            break
-                        if item.input_id in self.cancelled_input_ids:
-                            continue
-                        # If we got a pointer to a blob, download it from S3.
-                        if item.input.WhichOneof("args_oneof") == "args_blob_id":
-                            input_pb = await self.populate_input_blobs(item.input)
-                        else:
-                            input_pb = item.input
-                        # If yielded, allow semaphore to be released via complete_call
-                        yield (item.input_id, item.function_call_id, input_pb)
-                        yielded = True
-                        # We only support max_inputs = 1 at the moment
-                        if item.input.final_input or self.function_def.max_inputs == 1:
-                            eof_received = True
-                            break
-            finally:
-                if not yielded:
-                    self._semaphore.release()
-    @synchronizer.no_io_translation
-    async def run_inputs_outputs(self, input_concurrency: int = 1) -> AsyncIterator[Tuple[str, str, Any, Any]]:
-        # Ensure we do not fetch new inputs when container is too busy.
-        # Before trying to fetch an input, acquire the semaphore:
-        # - if no input is fetched, release the semaphore.
-        # - or, when the output for the fetched input is sent, release the semaphore.
-        self._input_concurrency = input_concurrency
-        self._semaphore = asyncio.Semaphore(input_concurrency)
-        try:
-            async for input_id, function_call_id, input_pb in self._generate_inputs():
-                args, kwargs = self.deserialize(input_pb.args) if input_pb.args else ((), {})
-                self.current_input_id, self.current_input_started_at = (input_id, time.time())
-                yield input_id, function_call_id, args, kwargs
-                self.current_input_id, self.current_input_started_at = (None, None)
-        finally:
-            # collect all active input slots, meaning all inputs have wrapped up.
-            for _ in range(input_concurrency):
-                await self._semaphore.acquire()
-    async def _push_output(self, input_id, started_at: float, data_format=api_pb2.DATA_FORMAT_UNSPECIFIED, **kwargs):
-        # upload data to S3 if too big.
-        if "data" in kwargs and kwargs["data"] and len(kwargs["data"]) > MAX_OBJECT_SIZE_BYTES:
-            data_blob_id = await blob_upload(kwargs["data"], self._client.stub)
-            # mutating kwargs.
-            del kwargs["data"]
-            kwargs["data_blob_id"] = data_blob_id
-        output = api_pb2.FunctionPutOutputsItem(
-            input_id=input_id,
-            input_started_at=started_at,
-            output_created_at=time.time(),
-            result=api_pb2.GenericResult(**kwargs),
-            data_format=data_format,
-        )
-        await retry_transient_errors(
-            self._client.stub.FunctionPutOutputs,
-            api_pb2.FunctionPutOutputsRequest(outputs=[output]),
-            additional_status_codes=[Status.RESOURCE_EXHAUSTED],
-            max_retries=None,  # Retry indefinitely, trying every 1s.
-        )
-    def serialize_exception(self, exc: BaseException) -> Optional[bytes]:
-        try:
-            return self.serialize(exc)
-        except Exception as serialization_exc:
-            logger.info(f"Failed to serialize exception {exc}: {serialization_exc}")
-            # We can't always serialize exceptions.
-            return None
-    def serialize_traceback(self, exc: BaseException) -> Tuple[Optional[bytes], Optional[bytes]]:
-        serialized_tb, tb_line_cache = None, None
-        try:
-            tb_dict, line_cache = extract_traceback(exc, self.task_id)
-            serialized_tb = self.serialize(tb_dict)
-            tb_line_cache = self.serialize(line_cache)
-        except Exception:
-            logger.info("Failed to serialize exception traceback.")
-        return serialized_tb, tb_line_cache
-    @contextlib.asynccontextmanager
-    async def handle_user_exception(self) -> AsyncGenerator[None, None]:
-        """Sets the task as failed in a way where it's not retried.
-        Used for handling exceptions from container lifecycle methods at the moment, which should
-        trigger a task failure state.
-        """
-        try:
-            yield
-        except KeyboardInterrupt:
-            # Send no task result in case we get sigint:ed by the runner
-            # The status of the input should have been handled externally already in that case
-            raise
-        except BaseException as exc:
-            # Since this is on a different thread, sys.exc_info() can't find the exception in the stack.
-            traceback.print_exception(type(exc), exc, exc.__traceback__)
-            serialized_tb, tb_line_cache = self.serialize_traceback(exc)
-            result = api_pb2.GenericResult(
-                status=api_pb2.GenericResult.GENERIC_STATUS_FAILURE,
-                data=self.serialize_exception(exc),
-                exception=repr(exc),
-                traceback="".join(traceback.format_exception(type(exc), exc, exc.__traceback__)),
-                serialized_tb=serialized_tb,
-                tb_line_cache=tb_line_cache,
-            )
-            req = api_pb2.TaskResultRequest(result=result)
-            await retry_transient_errors(self._client.stub.TaskResult, req)
-            # Shut down the task gracefully
-            raise UserException()
-    @contextlib.asynccontextmanager
-    async def handle_input_exception(self, input_id, started_at: float) -> AsyncGenerator[None, None]:
-        """Handle an exception while processing a function input."""
-        try:
-            yield
-        except KeyboardInterrupt:
-            raise
-        except (InputCancellation, asyncio.CancelledError):
-            # just skip creating any output for this input and keep going with the next instead
-            # it should have been marked as cancelled already in the backend at this point so it
-            # won't be retried
-            logger.warning(f"The current input ({input_id=}) was cancelled by a user request")
-            await self.complete_call(started_at)
-            return
-        except BaseException as exc:
-            # print exception so it's logged
-            traceback.print_exc()
-            serialized_tb, tb_line_cache = self.serialize_traceback(exc)
-            # Note: we're not serializing the traceback since it contains
-            # local references that means we can't unpickle it. We *are*
-            # serializing the exception, which may have some issues (there
-            # was an earlier note about it that it might not be possible
-            # to unpickle it in some cases). Let's watch out for issues.
-            await self._push_output(
-                input_id,
-                started_at=started_at,
-                data_format=api_pb2.DATA_FORMAT_PICKLE,
-                status=api_pb2.GenericResult.GENERIC_STATUS_FAILURE,
-                data=self.serialize_exception(exc),
-                exception=repr(exc),
-                traceback=traceback.format_exc(),
-                serialized_tb=serialized_tb,
-                tb_line_cache=tb_line_cache,
-            )
-            await self.complete_call(started_at)
-    async def complete_call(self, started_at):
-        self.total_user_time += time.time() - started_at
-        self.calls_completed += 1
-        self._semaphore.release()
-    @synchronizer.no_io_translation
-    async def push_output(self, input_id, started_at: float, data: Any, data_format: int) -> None:
-        await self._push_output(
-            input_id,
-            started_at=started_at,
-            data_format=data_format,
-            status=api_pb2.GenericResult.GENERIC_STATUS_SUCCESS,
-            data=self.serialize_data_format(data, data_format),
-        )
-        await self.complete_call(started_at)
-    async def restore(self) -> None:
-        # Busy-wait for restore. `/__modal/restore-state.json` is created
-        # by the worker process with updates to the container config.
-        restored_path = Path(config.get("restore_state_path"))
-        start = time.perf_counter()
-        while not restored_path.exists():
-            logger.debug(f"Waiting for restore (elapsed={time.perf_counter() - start:.3f}s)")
-            await asyncio.sleep(0.01)
-            continue
-        logger.debug("Container: restored")
-        # Look for state file and create new client with updated credentials.
-        # State data is serialized with key-value pairs, example: {"task_id": "tk-000"}
-        with restored_path.open("r") as file:
-            restored_state = json.load(file)
-        # Local FunctionIOManager state.
-        for key in ["task_id", "function_id"]:
-            if value := restored_state.get(key):
-                logger.debug(f"Updating FunctionIOManager.{key} = {value}")
-                setattr(self, key, restored_state[key])
-        # Env vars and global state.
-        for key, value in restored_state.items():
-            # Empty string indicates that value does not need to be updated.
-            if value != "":
-                config.override_locally(key, value)
-        # Restore input to default state.
-        self.current_input_id = None
-        self.current_input_started_at = None
-        self._client = await _Client.from_env()
-        self._waiting_for_checkpoint = False
-    async def checkpoint(self) -> None:
-        """Message server indicating that function is ready to be checkpointed."""
-        if self.checkpoint_id:
-            logger.debug(f"Checkpoint ID: {self.checkpoint_id}")
-        await self._client.stub.ContainerCheckpoint(
-            api_pb2.ContainerCheckpointRequest(checkpoint_id=self.checkpoint_id)
-        )
+                item_count = 0
+                async for value in res:
+                    await container_io_manager._queue_put.aio(generator_queue, value)
+                    item_count += 1
-        self._waiting_for_checkpoint = True
-        await self._client._close()
-        logger.debug("Checkpointing request sent. Connection closed.")
-        await self.restore()
-    async def volume_commit(self, volume_ids: List[str]) -> None:
-        """
-        Perform volume commit for given `volume_ids`.
-        Only used on container exit to persist uncommitted changes on behalf of user.
-        """
-        if not volume_ids:
-            return
-        await asyncify(os.sync)()
-        results = await asyncio.gather(
-            *[
-                retry_transient_errors(
-                    self._client.stub.VolumeCommit,
-                    api_pb2.VolumeCommitRequest(volume_id=v_id),
-                    max_retries=9,
-                    base_delay=0.25,
-                    max_delay=256,
-                    delay_factor=2,
+                await container_io_manager._queue_put.aio(generator_queue, _ContainerIOManager._GENERATOR_STOP_SENTINEL)
+                await generator_output_task  # Wait to finish sending generator outputs.
+                message = api_pb2.GeneratorDone(items_total=item_count)
+                await container_io_manager.push_outputs.aio(
+                    io_context,
+                    started_at,
+                    message,
+                    api_pb2.DATA_FORMAT_GENERATOR_DONE,
                 )
-                for v_id in volume_ids
-            ],
-            return_exceptions=True,
-        )
-        for volume_id, res in zip(volume_ids, results):
-            if isinstance(res, Exception):
-                logger.error(f"modal.Volume background commit failed for {volume_id}. Exception: {res}")
             else:
-                logger.debug(f"modal.Volume background commit success for {volume_id}.")
-FunctionIOManager = synchronize_api(_FunctionIOManager)
+                if not inspect.iscoroutine(res) or inspect.isgenerator(res) or inspect.isasyncgen(res):
+                    raise InvalidError(
+                        f"Async (non-generator) function returned value of type {type(res)}"
+                        " You might need to use @app.function(..., is_generator=True)."
+                    )
+                value = await res
+                await container_io_manager.push_outputs.aio(
+                    io_context,
+                    started_at,
+                    value,
+                    io_context.finalized_function.data_format,
+                )
+        reset_context()
-def call_function_sync(
-    function_io_manager,  #: FunctionIOManager,  TODO: this type is generated at runtime
-    imp_fun: ImportedFunction,
-):
-    def run_input(input_id: str, function_call_id: str, args: Any, kwargs: Any) -> None:
+    def run_input_sync(io_context: IOContext) -> None:
         started_at = time.time()
-        reset_context = _set_current_context_ids(input_id, function_call_id)
-        with function_io_manager.handle_input_exception(input_id, started_at):
-            logger.debug(f"Starting input {input_id} (sync)")
-            res = imp_fun.fun(*args, **kwargs)
-            logger.debug(f"Finished input {input_id} (sync)")
+        input_ids, function_call_ids = io_context.input_ids, io_context.function_call_ids
+        reset_context = _set_current_context_ids(input_ids, function_call_ids)
+        with container_io_manager.handle_input_exception(io_context, started_at):
+            res = io_context.call_finalized_function()
             # TODO(erikbern): any exception below shouldn't be considered a user exception
-            if imp_fun.is_generator:
+            if io_context.finalized_function.is_generator:
                 if not inspect.isgenerator(res):
                     raise InvalidError(f"Generator function returned value of type {type(res)}")
                 # Send up to this many outputs at a time.
-                generator_queue: asyncio.Queue[Any] = function_io_manager._queue_create(1024)
-                generator_output_task = function_io_manager.generator_output_task(
-                    function_call_id,
-                    imp_fun.data_format,
+                generator_queue: asyncio.Queue[Any] = container_io_manager._queue_create(1024)
+                generator_output_task: concurrent.futures.Future = container_io_manager.generator_output_task(  # type: ignore
+                    function_call_ids[0],
+                    io_context.finalized_function.data_format,
                     generator_queue,
-                    _future=True,  # Synchronicity magic to return a future.
+                    _future=True,  # type: ignore  # Synchronicity magic to return a future.
                 )
                 item_count = 0
                 for value in res:
-                    function_io_manager._queue_put(generator_queue, value)
+                    container_io_manager._queue_put(generator_queue, value)
                     item_count += 1
-                function_io_manager._queue_put(generator_queue, _FunctionIOManager._GENERATOR_STOP_SENTINEL)
+                container_io_manager._queue_put(generator_queue, _ContainerIOManager._GENERATOR_STOP_SENTINEL)
                 generator_output_task.result()  # Wait to finish sending generator outputs.
                 message = api_pb2.GeneratorDone(items_total=item_count)
-                function_io_manager.push_output(input_id, started_at, message, api_pb2.DATA_FORMAT_GENERATOR_DONE)
+                container_io_manager.push_outputs(io_context, started_at, message, api_pb2.DATA_FORMAT_GENERATOR_DONE)
             else:
                 if inspect.iscoroutine(res) or inspect.isgenerator(res) or inspect.isasyncgen(res):
                     raise InvalidError(
                         f"Sync (non-generator) function return value of type {type(res)}."
-                        " You might need to use @stub.function(..., is_generator=True)."
+                        " You might need to use @app.function(..., is_generator=True)."
                     )
-                function_io_manager.push_output(input_id, started_at, res, imp_fun.data_format)
+                container_io_manager.push_outputs(
+                    io_context, started_at, res, io_context.finalized_function.data_format
+                )
         reset_context()
-    if imp_fun.input_concurrency > 1:
-        # We can't use `concurrent.futures.ThreadPoolExecutor` here because in Python 3.11+, this
-        # class has no workaround that allows us to exit the Python interpreter process without
-        # waiting for the worker threads to finish. We need this behavior on SIGINT.
-        import queue
-        import threading
-        spawned_workers = 0
-        inputs: queue.Queue[Any] = queue.Queue()
-        finished = threading.Event()
-        def worker_thread():
-            while not finished.is_set():
-                try:
-                    args = inputs.get(timeout=1)
-                except queue.Empty:
-                    continue
-                try:
-                    run_input(*args)
-                except BaseException:
-                    # This should basically never happen, since only KeyboardInterrupt is the only error that can
-                    # bubble out of from handle_input_exception and those wouldn't be raised outside the main thread
-                    pass
-                inputs.task_done()
-        for input_id, function_call_id, args, kwargs in function_io_manager.run_inputs_outputs(
-            imp_fun.input_concurrency
-        ):
-            if spawned_workers < imp_fun.input_concurrency:
-                threading.Thread(target=worker_thread, daemon=True).start()
-                spawned_workers += 1
-            inputs.put((input_id, function_call_id, args, kwargs))
-        finished.set()
-        inputs.join()
-    else:
-        for input_id, function_call_id, args, kwargs in function_io_manager.run_inputs_outputs(
-            imp_fun.input_concurrency
-        ):
-            try:
-                run_input(input_id, function_call_id, args, kwargs)
-            except:
-                raise
-async def call_function_async(
-    function_io_manager,  #: FunctionIOManager,  TODO: this type is generated at runtime
-    imp_fun: ImportedFunction,
-):
-    async def run_input(input_id: str, function_call_id: str, args: Any, kwargs: Any) -> None:
-        started_at = time.time()
-        reset_context = _set_current_context_ids(input_id, function_call_id)
-        async with function_io_manager.handle_input_exception.aio(input_id, started_at):
-            logger.debug(f"Starting input {input_id} (async)")
-            res = imp_fun.fun(*args, **kwargs)
-            logger.debug(f"Finished input {input_id} (async)")
-            # TODO(erikbern): any exception below shouldn't be considered a user exception
-            if imp_fun.is_generator:
-                if not inspect.isasyncgen(res):
-                    raise InvalidError(f"Async generator function returned value of type {type(res)}")
-                # Send up to this many outputs at a time.
-                generator_queue: asyncio.Queue[Any] = await function_io_manager._queue_create.aio(1024)
-                generator_output_task = asyncio.create_task(
-                    function_io_manager.generator_output_task.aio(
-                        function_call_id,
-                        imp_fun.data_format,
-                        generator_queue,
+    if container_io_manager.target_concurrency > 1:
+        with DaemonizedThreadPool(max_threads=container_io_manager.max_concurrency) as thread_pool:
+            def make_async_cancel_callback(task):
+                def f():
+                    user_code_event_loop.loop.call_soon_threadsafe(task.cancel)
+                return f
+            did_sigint = False
+            def cancel_callback_sync():
+                nonlocal did_sigint
+                # We only want one sigint even if multiple inputs are cancelled
+                # A second sigint would forcibly shut down the event loop and spew
+                # out a bunch of tracebacks, which we only want to happen in case
+                # the worker kills this process after a failed self-termination
+                if not did_sigint:
+                    did_sigint = True
+                    logger.warning(
+                        "User cancelling input of non-async functions with allow_concurrent_inputs > 1.\n"
+                        "This shuts down the container, causing concurrently running inputs to be "
+                        "rescheduled in other containers."
                     )
-                )
-                item_count = 0
-                async for value in res:
-                    await function_io_manager._queue_put.aio(generator_queue, value)
-                    item_count += 1
+                    os.kill(os.getpid(), signal.SIGINT)
-                await function_io_manager._queue_put.aio(generator_queue, _FunctionIOManager._GENERATOR_STOP_SENTINEL)
-                await generator_output_task  # Wait to finish sending generator outputs.
-                message = api_pb2.GeneratorDone(items_total=item_count)
-                await function_io_manager.push_output.aio(
-                    input_id, started_at, message, api_pb2.DATA_FORMAT_GENERATOR_DONE
-                )
-            else:
-                if not inspect.iscoroutine(res) or inspect.isgenerator(res) or inspect.isasyncgen(res):
-                    raise InvalidError(
-                        f"Async (non-generator) function returned value of type {type(res)}"
-                        " You might need to use @stub.function(..., is_generator=True)."
-                    )
-                value = await res
-                await function_io_manager.push_output.aio(input_id, started_at, value, imp_fun.data_format)
-        reset_context()
+            async def run_concurrent_inputs():
+                # all run_input coroutines will have completed by the time we leave the execution context
+                # but the wrapping *tasks* may not yet have been resolved, so we add a 0.01s
+                # for them to resolve gracefully:
+                async with TaskContext(0.01) as task_context:
+                    async for io_context in container_io_manager.run_inputs_outputs.aio(
+                        finalized_functions, batch_max_size, batch_wait_ms
+                    ):
+                        # Note that run_inputs_outputs will not return until all the input slots are released
+                        # so that they can be acquired by the run_inputs_outputs finalizer
+                        # This prevents leaving the task_context before outputs have been created
+                        # TODO: refactor to make this a bit more easy to follow?
+                        if io_context.finalized_function.is_async:
+                            input_task = task_context.create_task(run_input_async(io_context))
+                            io_context.set_cancel_callback(make_async_cancel_callback(input_task))
+                        else:
+                            # run sync input in thread
+                            thread_pool.submit(run_input_sync, io_context)
+                            io_context.set_cancel_callback(cancel_callback_sync)
-    if imp_fun.input_concurrency > 1:
-        # all run_input coroutines will have completed by the time we leave the execution context
-        # but the wrapping *tasks* may not yet have been resolved, so we add a 0.01s
-        # for them to resolve gracefully:
-        async with TaskContext(0.01) as execution_context:
-            async for input_id, function_call_id, args, kwargs in function_io_manager.run_inputs_outputs.aio(
-                imp_fun.input_concurrency
-            ):
-                # Note that run_inputs_outputs will not return until the concurrency semaphore has
-                # released all its slots so that they can be acquired by the run_inputs_outputs finalizer
-                # This prevents leaving the execution_context before outputs have been created
-                # TODO: refactor to make this a bit more easy to follow?
-                execution_context.create_task(run_input(input_id, function_call_id, args, kwargs))
-    else:
-        async for input_id, function_call_id, args, kwargs in function_io_manager.run_inputs_outputs.aio(
-            imp_fun.input_concurrency
-        ):
-            await run_input(input_id, function_call_id, args, kwargs)
-@dataclass
-class ImportedFunction:
-    obj: Any
-    fun: Callable
-    stub: Optional[_Stub]
-    is_async: bool
-    is_generator: bool
-    data_format: int  # api_pb2.DataFormat
-    input_concurrency: int
-    is_auto_snapshot: bool
-    function: _Function
-def import_function(
-    function_def: api_pb2.Function,
-    ser_cls,
-    ser_fun,
-    ser_params: Optional[bytes],
-    function_io_manager,
-    client: Client,
-) -> ImportedFunction:
-    """Imports a function dynamically, and locates the stub.
-    This is somewhat complex because we're dealing with 3 quite different type of functions:
-    1. Functions defined in global scope and decorated in global scope (Function objects)
-    2. Functions defined in global scope but decorated elsewhere (these will be raw callables)
-    3. Serialized functions
-    In addition, we also need to handle
-    * Normal functions
-    * Methods on classes (in which case we need to instantiate the object)
-    This helper also handles web endpoints, ASGI/WSGI servers, and HTTP servers.
-    In order to locate the stub, we try two things:
-    * If the function is a Function, we can get the stub directly from it
-    * Otherwise, use the stub name and look it up from a global list of stubs: this
-      typically only happens in case 2 above, or in sometimes for case 3
-    Note that `import_function` is *not* synchronized, becase we need it to run on the main
-    thread. This is so that any user code running in global scope (which executes as a part of
-    the import) runs on the right thread.
-    """
-    module: Optional[ModuleType] = None
-    cls: Optional[Type] = None
-    fun: Callable
-    function: Optional[_Function] = None
-    active_stub: Optional[_Stub] = None
-    pty_info: api_pb2.PTYInfo = function_def.pty_info
-    if ser_fun is not None:
-        # This is a serialized function we already fetched from the server
-        cls, fun = ser_cls, ser_fun
+            user_code_event_loop.run(run_concurrent_inputs())
     else:
-        # Load the module dynamically
-        module = importlib.import_module(function_def.module_name)
-        qual_name: str = function_def.function_name
-        if not is_global_function(qual_name):
-            raise LocalFunctionError("Attempted to load a function defined in a function scope")
-        parts = qual_name.split(".")
-        if len(parts) == 1:
-            # This is a function
-            cls = None
-            f = getattr(module, qual_name)
-            if isinstance(f, Function):
-                function = synchronizer._translate_in(f)
-                fun = function.get_raw_f()
-                active_stub = function._stub
-            else:
-                fun = f
-        elif len(parts) == 2:
-            # This is a method on a class
-            cls_name, fun_name = parts
-            cls = getattr(module, cls_name)
-            if isinstance(cls, Cls):
-                # The cls decorator is in global scope
-                _cls = synchronizer._translate_in(cls)
-                fun = _cls._callables[fun_name]
-                function = _cls._functions.get(fun_name)
-                active_stub = _cls._stub
-            else:
-                # This is a raw class
-                fun = getattr(cls, fun_name)
-        else:
-            raise InvalidError(f"Invalid function qualname {qual_name}")
-    # If the cls/function decorator was applied in local scope, but the stub is global, we can look it up
-    if active_stub is None:
-        # This branch is reached in the special case that the imported function is 1) not serialized, and 2) isn't a FunctionHandle - i.e, not decorated at definition time
-        # Look at all instantiated stubs - if there is only one with the indicated name, use that one
-        stub_name: Optional[str] = function_def.stub_name or None  # coalesce protobuf field to None
-        matching_stubs = _Stub._all_stubs.get(stub_name, [])
-        if len(matching_stubs) > 1:
-            if stub_name is not None:
-                warning_sub_message = f"stub with the same name ('{stub_name}')"
+        for io_context in container_io_manager.run_inputs_outputs(finalized_functions, batch_max_size, batch_wait_ms):
+            if io_context.finalized_function.is_async:
+                user_code_event_loop.run(run_input_async(io_context))
             else:
-                warning_sub_message = "unnamed stub"
-            logger.warning(
-                f"You have more than one {warning_sub_message}. It's recommended to name all your Stubs uniquely when using multiple stubs"
-            )
-        elif len(matching_stubs) == 1:
-            (active_stub,) = matching_stubs
-        # there could also technically be zero found stubs, but that should probably never be an issue since that would mean user won't use is_inside or other function handles anyway
-    # Check this property before we turn it into a method (overriden by webhooks)
-    is_async = get_is_async(fun)
-    # Use the function definition for whether this is a generator (overriden by webhooks)
-    is_generator = function_def.function_type == api_pb2.Function.FUNCTION_TYPE_GENERATOR
-    # What data format is used for function inputs and outputs
-    data_format = api_pb2.DATA_FORMAT_PICKLE
-    # Container can fetch multiple inputs simultaneously
-    if pty_info.pty_type == api_pb2.PTYInfo.PTY_TYPE_SHELL:
-        # Concurrency doesn't apply for `modal shell`.
-        input_concurrency = 1
-    else:
-        input_concurrency = function_def.allow_concurrent_inputs or 1
+                # Set up a custom signal handler for `SIGUSR1`, which gets translated to an InputCancellation
+                # during function execution. This is sent to cancel inputs from the user
+                def _cancel_input_signal_handler(signum, stackframe):
+                    raise InputCancellation("Input was cancelled by user")
-    # Instantiate the class if it's defined
-    if cls:
-        if ser_params:
-            _client: _Client = synchronizer._translate_in(client)
-            args, kwargs = deserialize(ser_params, _client)
+                usr1_handler = signal.signal(signal.SIGUSR1, _cancel_input_signal_handler)
+                # run this sync code in the main thread, blocking the "userland" event loop
+                # this lets us cancel it using a signal handler that raises an exception
+                try:
+                    run_input_sync(io_context)
+                finally:
+                    signal.signal(signal.SIGUSR1, usr1_handler)  # reset signal handler
+def get_active_app_fallback(function_def: api_pb2.Function) -> _App:
+    # This branch is reached in the special case that the imported function/class is:
+    # 1) not serialized, and
+    # 2) isn't a FunctionHandle - i.e, not decorated at definition time
+    # Look at all instantiated apps - if there is only one with the indicated name, use that one
+    app_name: Optional[str] = function_def.app_name or None  # coalesce protobuf field to None
+    matching_apps = _App._all_apps.get(app_name, [])
+    if len(matching_apps) == 1:
+        active_app: _App = matching_apps[0]
+        return active_app
+    if len(matching_apps) > 1:
+        if app_name is not None:
+            warning_sub_message = f"app with the same name ('{app_name}')"
         else:
-            args, kwargs = (), {}
-        obj = cls(*args, **kwargs)
-        if isinstance(cls, Cls):
-            obj = obj.get_obj()
-        # Bind the function to the instance (using the descriptor protocol!)
-        fun = fun.__get__(obj)
-    else:
-        obj = None
-    if function_def.webhook_config.type:
-        is_async = True
-        is_generator = True
-        data_format = api_pb2.DATA_FORMAT_ASGI
-        if function_def.webhook_config.type == api_pb2.WEBHOOK_TYPE_ASGI_APP:
-            # Function returns an asgi_app, which we can use as a callable.
-            fun = asgi_app_wrapper(fun(), function_io_manager)
-        elif function_def.webhook_config.type == api_pb2.WEBHOOK_TYPE_WSGI_APP:
-            # Function returns an wsgi_app, which we can use as a callable.
-            fun = wsgi_app_wrapper(fun(), function_io_manager)
-        elif function_def.webhook_config.type == api_pb2.WEBHOOK_TYPE_FUNCTION:
-            # Function is a webhook without an ASGI app. Create one for it.
-            fun = asgi_app_wrapper(
-                webhook_asgi_app(fun, function_def.webhook_config.method),
-                function_io_manager,
-            )
+            warning_sub_message = "unnamed app"
+        logger.warning(
+            f"You have more than one {warning_sub_message}. "
+            "It's recommended to name all your Apps uniquely when using multiple apps"
+        )
-        elif function_def.webhook_config.type == api_pb2.WEBHOOK_TYPE_WEB_SERVER:
-            # Function spawns an HTTP web server listening at a port.
-            fun()
-            # We intentionally try to connect to the external interface instead of the loopback
-            # interface here so users are forced to expose the server. This allows us to potentially
-            # change the implementation to use an external bridge in the future.
-            host = get_ip_address(b"eth0")
-            port = function_def.webhook_config.web_server_port
-            startup_timeout = function_def.webhook_config.web_server_startup_timeout
-            wait_for_web_server(host, port, timeout=startup_timeout)
-            fun = asgi_app_wrapper(web_server_proxy(host, port), function_io_manager)
-        else:
-            raise InvalidError(f"Unrecognized web endpoint type {function_def.webhook_config.type}")
-    return ImportedFunction(
-        obj,
-        fun,
-        active_stub,
-        is_async,
-        is_generator,
-        data_format,
-        input_concurrency,
-        function_def.is_auto_snapshot,
-        function,
-    )
+    # If we don't have an active app, create one on the fly
+    # The app object is used to carry the app layout etc
+    return _App()
 def call_lifecycle_functions(
     event_loop: UserCodeEventLoop,
-    function_io_manager,  #: FunctionIOManager,  TODO: this type is generated at runtime
-    funcs: Iterable[Callable],
+    container_io_manager,  #: ContainerIOManager,  TODO: this type is generated at runtime
+    funcs: Sequence[Callable[..., Any]],
 ) -> None:
     """Call function(s), can be sync or async, but any return values are ignored."""
-    with function_io_manager.handle_user_exception():
+    with container_io_manager.handle_user_exception():
         for func in funcs:
             # We are deprecating parameterized exit methods but want to gracefully handle old code.
             # We can remove this once the deprecation in the actual @exit decorator is enforced.
-            args = (None, None, None) if method_has_params(func) else ()
-            res = func(
-                *args
-            )  # in case func is non-async, it's executed here and sigint will by default interrupt it using a KeyboardInterrupt exception
+            args = (None, None, None) if callable_has_non_self_params(func) else ()
+            # in case func is non-async, it's executed here and sigint will by default
+            # interrupt it using a KeyboardInterrupt exception
+            res = func(*args)
             if inspect.iscoroutine(res):
                 # if however func is async, we have to jump through some hoops
                 event_loop.run(res)
+def deserialize_params(serialized_params: bytes, function_def: api_pb2.Function, _client: "modal.client._Client"):
+    if function_def.class_parameter_info.format in (
+        api_pb2.ClassParameterInfo.PARAM_SERIALIZATION_FORMAT_UNSPECIFIED,
+        api_pb2.ClassParameterInfo.PARAM_SERIALIZATION_FORMAT_PICKLE,
+    ):
+        # legacy serialization format - pickle of `(args, kwargs)` w/ support for modal object arguments
+        param_args, param_kwargs = deserialize(serialized_params, _client)
+    elif function_def.class_parameter_info.format == api_pb2.ClassParameterInfo.PARAM_SERIALIZATION_FORMAT_PROTO:
+        param_args = ()
+        param_kwargs = deserialize_proto_params(serialized_params, list(function_def.class_parameter_info.schema))
+    else:
+        raise ExecutionError(
+            f"Unknown class parameter serialization format: {function_def.class_parameter_info.format}"
+        )
+    return param_args, param_kwargs
 def main(container_args: api_pb2.ContainerArguments, client: Client):
-    # This is a bit weird but we need both the blocking and async versions of FunctionIOManager.
+    # This is a bit weird but we need both the blocking and async versions of ContainerIOManager.
     # At some point, we should fix that by having built-in support for running "user code"
-    function_io_manager = FunctionIOManager(container_args, client)
+    container_io_manager = ContainerIOManager(container_args, client)
+    active_app: _App
+    service: Service
+    function_def = container_args.function_def
+    is_auto_snapshot: bool = function_def.is_auto_snapshot
+    # The worker sets this flag to "1" for snapshot and restore tasks. Otherwise, this flag is unset,
+    # in which case snapshots should be disabled.
+    is_snapshotting_function = (
+        function_def.is_checkpointing_function and os.environ.get("MODAL_ENABLE_SNAP_RESTORE", "0") == "1"
+    )
+    _client: _Client = synchronizer._translate_in(client)  # TODO(erikbern): ugly
-    # Define a global app (need to do this before imports).
-    container_app: ContainerApp = function_io_manager.initialize_app()
+    # Call ContainerHello - currently a noop but might be used later for things
+    container_io_manager.hello()
-    with function_io_manager.heartbeats(), UserCodeEventLoop() as event_loop:
+    with container_io_manager.heartbeats(is_snapshotting_function), UserCodeEventLoop() as event_loop:
         # If this is a serialized function, fetch the definition from the server
-        if container_args.function_def.definition_type == api_pb2.Function.DEFINITION_TYPE_SERIALIZED:
-            ser_cls, ser_fun = function_io_manager.get_serialized_function()
+        if function_def.definition_type == api_pb2.Function.DEFINITION_TYPE_SERIALIZED:
+            ser_cls, ser_fun = container_io_manager.get_serialized_function()
         else:
             ser_cls, ser_fun = None, None
         # Initialize the function, importing user code.
-        with function_io_manager.handle_user_exception():
-            imp_fun = import_function(
-                container_args.function_def,
-                ser_cls,
-                ser_fun,
-                container_args.serialized_params,
-                function_io_manager,
-                client,
-            )
+        with container_io_manager.handle_user_exception():
+            if container_args.serialized_params:
+                param_args, param_kwargs = deserialize_params(container_args.serialized_params, function_def, _client)
+            else:
+                param_args = ()
+                param_kwargs = {}
+            if function_def.is_class:
+                service = import_class_service(
+                    function_def,
+                    ser_cls,
+                    param_args,
+                    param_kwargs,
+                )
+            else:
+                service = import_single_function_service(
+                    function_def,
+                    ser_cls,
+                    ser_fun,
+                    param_args,
+                    param_kwargs,
+                )
+            # If the cls/function decorator was applied in local scope, but the app is global, we can look it up
+            if service.app is not None:
+                active_app = service.app
+            else:
+                # if the app can't be inferred by the imported function, use name-based fallback
+                active_app = get_active_app_fallback(function_def)
+            if function_def.pty_info.pty_type == api_pb2.PTYInfo.PTY_TYPE_SHELL:
+                # Concurrency and batching doesn't apply for `modal shell`.
+                batch_max_size = 0
+                batch_wait_ms = 0
+            else:
+                batch_max_size = function_def.batch_max_size or 0
+                batch_wait_ms = function_def.batch_linger_ms or 0
-        # Initialize objects on the stub.
-        if imp_fun.stub is not None:
-            container_app.associate_stub_container(imp_fun.stub)
+        # Get ids and metadata for objects (primarily functions and classes) on the app
+        container_app: RunningApp = container_io_manager.get_app_objects(container_args.app_layout)
+        # Initialize objects on the app.
+        # This is basically only functions and classes - anything else is deprecated and will be unsupported soon
+        app: App = synchronizer._translate_out(active_app)
+        app._init_container(client, container_app)
         # Hydrate all function dependencies.
         # TODO(erikbern): we an remove this once we
         # 1. Enable lazy hydration for all objects
         # 2. Fully deprecate .new() objects
-        if imp_fun.function:
-            dep_object_ids: List[str] = [dep.object_id for dep in container_args.function_def.object_dependencies]
-            container_app.hydrate_function_deps(imp_fun.function, dep_object_ids)
+        if service.code_deps is not None:  # this is not set for serialized or non-global scope functions
+            dep_object_ids: list[str] = [dep.object_id for dep in function_def.object_dependencies]
+            if len(service.code_deps) != len(dep_object_ids):
+                raise ExecutionError(
+                    f"Function has {len(service.code_deps)} dependencies"
+                    f" but container got {len(dep_object_ids)} object ids.\n"
+                    f"Code deps: {service.code_deps}\n"
+                    f"Object ids: {dep_object_ids}"
+                )
+            for object_id, obj in zip(dep_object_ids, service.code_deps):
+                metadata: Message = container_app.object_handle_metadata[object_id]
+                obj._hydrate(object_id, _client, metadata)
+        # Initialize clustered functions.
+        if function_def._experimental_group_size > 0:
+            initialize_clustered_function(
+                client,
+                container_args.task_id,
+                function_def._experimental_group_size,
+            )
-        # Identify all "enter" methods that need to run before we checkpoint.
-        if imp_fun.obj is not None and not imp_fun.is_auto_snapshot:
-            pre_checkpoint_methods = _find_callables_for_obj(imp_fun.obj, _PartialFunctionFlags.ENTER_PRE_CHECKPOINT)
-            call_lifecycle_functions(event_loop, function_io_manager, pre_checkpoint_methods.values())
+        # Identify all "enter" methods that need to run before we snapshot.
+        if service.user_cls_instance is not None and not is_auto_snapshot:
+            pre_snapshot_methods = _find_callables_for_obj(
+                service.user_cls_instance, _PartialFunctionFlags.ENTER_PRE_SNAPSHOT
+            )
+            call_lifecycle_functions(event_loop, container_io_manager, list(pre_snapshot_methods.values()))
         # If this container is being used to create a checkpoint, checkpoint the container after
-        # global imports and innitialization. Checkpointed containers run from this point onwards.
-        if container_args.function_def.is_checkpointing_function:
-            function_io_manager.checkpoint()
+        # global imports and initialization. Checkpointed containers run from this point onwards.
+        if is_snapshotting_function:
+            container_io_manager.memory_snapshot()
         # Install hooks for interactive functions.
-        if container_args.function_def.pty_info.pty_type != api_pb2.PTYInfo.PTY_TYPE_UNSPECIFIED:
+        def breakpoint_wrapper():
+            # note: it would be nice to not have breakpoint_wrapper() included in the backtrace
+            container_io_manager.interact(from_breakpoint=True)
+            import pdb
-            def breakpoint_wrapper():
-                # note: it would be nice to not have breakpoint_wrapper() included in the backtrace
-                interact()
-                import pdb
+            frame = inspect.currentframe().f_back
-                pdb.set_trace()
+            pdb.Pdb().set_trace(frame)
-            sys.breakpointhook = breakpoint_wrapper
+        sys.breakpointhook = breakpoint_wrapper
-        # Identify the "enter" methods to run after resuming from a checkpoint.
-        if imp_fun.obj is not None and not imp_fun.is_auto_snapshot:
-            post_checkpoint_methods = _find_callables_for_obj(imp_fun.obj, _PartialFunctionFlags.ENTER_POST_CHECKPOINT)
-            call_lifecycle_functions(event_loop, function_io_manager, post_checkpoint_methods.values())
+        # Identify the "enter" methods to run after resuming from a snapshot.
+        if service.user_cls_instance is not None and not is_auto_snapshot:
+            post_snapshot_methods = _find_callables_for_obj(
+                service.user_cls_instance, _PartialFunctionFlags.ENTER_POST_SNAPSHOT
+            )
+            call_lifecycle_functions(event_loop, container_io_manager, list(post_snapshot_methods.values()))
+        with container_io_manager.handle_user_exception():
+            finalized_functions = service.get_finalized_functions(function_def, container_io_manager)
         # Execute the function.
+        lifespan_background_tasks = []
         try:
-            if imp_fun.is_async:
-                event_loop.run(call_function_async(function_io_manager, imp_fun))
-            else:
-                # Set up a signal handler for `SIGUSR1`, which gets translated to an InputCancellation
-                # during function execution. This is sent to cancel inputs from the user.
-                def _cancel_input_signal_handler(signum, stackframe):
-                    raise InputCancellation("Input was cancelled by user")
-                signal.signal(signal.SIGUSR1, _cancel_input_signal_handler)
-                call_function_sync(function_io_manager, imp_fun)
+            for finalized_function in finalized_functions.values():
+                if finalized_function.lifespan_manager:
+                    lifespan_background_tasks.append(
+                        event_loop.create_task(finalized_function.lifespan_manager.background_task())
+                    )
+                    with container_io_manager.handle_user_exception():
+                        event_loop.run(finalized_function.lifespan_manager.lifespan_startup())
+            call_function(
+                event_loop,
+                container_io_manager,
+                finalized_functions,
+                batch_max_size,
+                batch_wait_ms,
+            )
         finally:
             # Run exit handlers. From this point onward, ignore all SIGINT signals that come from
             # graceful shutdowns originating on the worker, as well as stray SIGUSR1 signals that
@@ -1096,15 +558,27 @@ def main(container_args: api_pb2.ContainerArguments, client: Client):
             usr1_handler = signal.signal(signal.SIGUSR1, signal.SIG_IGN)
             try:
-                # Identify "exit" methods and run them.
-                if imp_fun.obj is not None and not imp_fun.is_auto_snapshot:
-                    exit_methods = _find_callables_for_obj(imp_fun.obj, _PartialFunctionFlags.EXIT)
-                    call_lifecycle_functions(event_loop, function_io_manager, exit_methods.values())
+                try:
+                    # run lifespan shutdown for asgi apps
+                    for finalized_function in finalized_functions.values():
+                        if finalized_function.lifespan_manager:
+                            with container_io_manager.handle_user_exception():
+                                event_loop.run(finalized_function.lifespan_manager.lifespan_shutdown())
+                finally:
+                    # no need to keep the lifespan asgi call around - we send it no more messages
+                    for lifespan_background_task in lifespan_background_tasks:
+                        lifespan_background_task.cancel()  # prevent dangling tasks
+                    # Identify "exit" methods and run them.
+                    # want to make sure this is called even if the lifespan manager fails
+                    if service.user_cls_instance is not None and not is_auto_snapshot:
+                        exit_methods = _find_callables_for_obj(service.user_cls_instance, _PartialFunctionFlags.EXIT)
+                        call_lifecycle_functions(event_loop, container_io_manager, list(exit_methods.values()))
                 # Finally, commit on exit to catch uncommitted volume changes and surface background
                 # commit errors.
-                function_io_manager.volume_commit(
-                    [v.volume_id for v in container_args.function_def.volume_mounts if v.allow_background_commits]
+                container_io_manager.volume_commit(
+                    [v.volume_id for v in function_def.volume_mounts if v.allow_background_commits]
                 )
             finally:
                 # Restore the original signal handler, needed for container_test hygiene since the
@@ -1117,7 +591,15 @@ if __name__ == "__main__":
     logger.debug("Container: starting")
     container_args = api_pb2.ContainerArguments()
-    container_args.ParseFromString(base64.b64decode(sys.argv[1]))
+    container_arguments_path: Optional[str] = os.environ.get("MODAL_CONTAINER_ARGUMENTS_PATH")
+    if container_arguments_path is None:
+        # TODO(erikbern): this fallback is for old workers and we can remove it very soon (days)
+        import base64
+        container_args.ParseFromString(base64.b64decode(sys.argv[1]))
+    else:
+        container_args.ParseFromString(open(container_arguments_path, "rb").read())
     # Note that we're creating the client in a synchronous context, but it will be running in a separate thread.
     # This is good because if the function is long running then we the client can still send heartbeats
@@ -1137,7 +619,7 @@ if __name__ == "__main__":
     # from shutting down. The sleep(0) here is needed for finished ThreadPoolExecutor resources to
     # shut down without triggering this warning (e.g., `@wsgi_app()`).
     time.sleep(0)
-    lingering_threads: List[threading.Thread] = []
+    lingering_threads: list[threading.Thread] = []
     for thread in threading.enumerate():
         current_thread = threading.get_ident()
         if thread.ident is not None and thread.ident != current_thread and not thread.daemon and thread.is_alive():
@@ -1145,7 +627,8 @@ if __name__ == "__main__":
     if lingering_threads:
         thread_names = ", ".join(t.name for t in lingering_threads)
         logger.warning(
-            f"Detected {len(lingering_threads)} background thread(s) [{thread_names}] still running after container exit. This will prevent runner shutdown for up to 30 seconds."
+            f"Detected {len(lingering_threads)} background thread(s) [{thread_names}] still running "
+            "after container exit. This will prevent runner shutdown for up to 30 seconds."
         )
     logger.debug("Container: done")

modal 0.62.16__py3-none-any.whl → 0.72.11__py3-none-any.whl

modal 0.62.16py3-none-any.whl → 0.72.11py3-none-any.whl