PyPI - modal - Versions diffs - 1.0.3.dev10__py3-none-any.whl → 1.2.3.dev7__py3-none-any.whl - Mend

modal 1.0.3.dev10py3-none-any.whl → 1.2.3.dev7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of modal might be problematic. Click here for more details.

Files changed (160) hide show

modal/__init__.py +0 -2
modal/__main__.py +3 -4
modal/_billing.py +80 -0
modal/_clustered_functions.py +7 -3
modal/_clustered_functions.pyi +15 -3
modal/_container_entrypoint.py +51 -69
modal/_functions.py +508 -240
modal/_grpc_client.py +171 -0
modal/_load_context.py +105 -0
modal/_object.py +81 -21
modal/_output.py +58 -45
modal/_partial_function.py +48 -73
modal/_pty.py +7 -3
modal/_resolver.py +26 -46
modal/_runtime/asgi.py +4 -3
modal/_runtime/container_io_manager.py +358 -220
modal/_runtime/container_io_manager.pyi +296 -101
modal/_runtime/execution_context.py +18 -2
modal/_runtime/execution_context.pyi +64 -7
modal/_runtime/gpu_memory_snapshot.py +262 -57
modal/_runtime/user_code_imports.py +28 -58
modal/_serialization.py +90 -6
modal/_traceback.py +42 -1
modal/_tunnel.pyi +380 -12
modal/_utils/async_utils.py +84 -29
modal/_utils/auth_token_manager.py +111 -0
modal/_utils/blob_utils.py +181 -58
modal/_utils/deprecation.py +19 -0
modal/_utils/function_utils.py +91 -47
modal/_utils/grpc_utils.py +89 -66
modal/_utils/mount_utils.py +26 -1
modal/_utils/name_utils.py +17 -3
modal/_utils/task_command_router_client.py +536 -0
modal/_utils/time_utils.py +34 -6
modal/app.py +256 -88
modal/app.pyi +909 -92
modal/billing.py +5 -0
modal/builder/2025.06.txt +18 -0
modal/builder/PREVIEW.txt +18 -0
modal/builder/base-images.json +58 -0
modal/cli/_download.py +19 -3
modal/cli/_traceback.py +3 -2
modal/cli/app.py +4 -4
modal/cli/cluster.py +15 -7
modal/cli/config.py +5 -3
modal/cli/container.py +7 -6
modal/cli/dict.py +22 -16
modal/cli/entry_point.py +12 -5
modal/cli/environment.py +5 -4
modal/cli/import_refs.py +3 -3
modal/cli/launch.py +102 -5
modal/cli/network_file_system.py +11 -12
modal/cli/profile.py +3 -2
modal/cli/programs/launch_instance_ssh.py +94 -0
modal/cli/programs/run_jupyter.py +1 -1
modal/cli/programs/run_marimo.py +95 -0
modal/cli/programs/vscode.py +1 -1
modal/cli/queues.py +57 -26
modal/cli/run.py +91 -23
modal/cli/secret.py +48 -22
modal/cli/token.py +7 -8
modal/cli/utils.py +4 -7
modal/cli/volume.py +31 -25
modal/client.py +15 -85
modal/client.pyi +183 -62
modal/cloud_bucket_mount.py +5 -3
modal/cloud_bucket_mount.pyi +197 -5
modal/cls.py +200 -126
modal/cls.pyi +446 -68
modal/config.py +29 -11
modal/container_process.py +319 -19
modal/container_process.pyi +190 -20
modal/dict.py +290 -71
modal/dict.pyi +835 -83
modal/environments.py +15 -27
modal/environments.pyi +46 -24
modal/exception.py +14 -2
modal/experimental/__init__.py +194 -40
modal/experimental/flash.py +618 -0
modal/experimental/flash.pyi +380 -0
modal/experimental/ipython.py +11 -7
modal/file_io.py +29 -36
modal/file_io.pyi +251 -53
modal/file_pattern_matcher.py +56 -16
modal/functions.pyi +673 -92
modal/gpu.py +1 -1
modal/image.py +528 -176
modal/image.pyi +1572 -145
modal/io_streams.py +458 -128
modal/io_streams.pyi +433 -52
modal/mount.py +216 -151
modal/mount.pyi +225 -78
modal/network_file_system.py +45 -62
modal/network_file_system.pyi +277 -56
modal/object.pyi +93 -17
modal/parallel_map.py +942 -129
modal/parallel_map.pyi +294 -15
modal/partial_function.py +0 -2
modal/partial_function.pyi +234 -19
modal/proxy.py +17 -8
modal/proxy.pyi +36 -3
modal/queue.py +270 -65
modal/queue.pyi +817 -57
modal/runner.py +115 -101
modal/runner.pyi +205 -49
modal/sandbox.py +512 -136
modal/sandbox.pyi +845 -111
modal/schedule.py +1 -1
modal/secret.py +300 -70
modal/secret.pyi +589 -34
modal/serving.py +7 -11
modal/serving.pyi +7 -8
modal/snapshot.py +11 -8
modal/snapshot.pyi +25 -4
modal/token_flow.py +4 -4
modal/token_flow.pyi +28 -8
modal/volume.py +416 -158
modal/volume.pyi +1117 -121
{modal-1.0.3.dev10.dist-info → modal-1.2.3.dev7.dist-info}/METADATA +10 -9
modal-1.2.3.dev7.dist-info/RECORD +195 -0
modal_docs/mdmd/mdmd.py +17 -4
modal_proto/api.proto +534 -79
modal_proto/api_grpc.py +337 -1
modal_proto/api_pb2.py +1522 -968
modal_proto/api_pb2.pyi +1619 -134
modal_proto/api_pb2_grpc.py +699 -4
modal_proto/api_pb2_grpc.pyi +226 -14
modal_proto/modal_api_grpc.py +175 -154
modal_proto/sandbox_router.proto +145 -0
modal_proto/sandbox_router_grpc.py +105 -0
modal_proto/sandbox_router_pb2.py +149 -0
modal_proto/sandbox_router_pb2.pyi +333 -0
modal_proto/sandbox_router_pb2_grpc.py +203 -0
modal_proto/sandbox_router_pb2_grpc.pyi +75 -0
modal_proto/task_command_router.proto +144 -0
modal_proto/task_command_router_grpc.py +105 -0
modal_proto/task_command_router_pb2.py +149 -0
modal_proto/task_command_router_pb2.pyi +333 -0
modal_proto/task_command_router_pb2_grpc.py +203 -0
modal_proto/task_command_router_pb2_grpc.pyi +75 -0
modal_version/__init__.py +1 -1
modal/requirements/PREVIEW.txt +0 -16
modal/requirements/base-images.json +0 -26
modal-1.0.3.dev10.dist-info/RECORD +0 -179
modal_proto/modal_options_grpc.py +0 -3
modal_proto/options.proto +0 -19
modal_proto/options_grpc.py +0 -3
modal_proto/options_pb2.py +0 -35
modal_proto/options_pb2.pyi +0 -20
modal_proto/options_pb2_grpc.py +0 -4
modal_proto/options_pb2_grpc.pyi +0 -7
/modal/{requirements → builder}/2023.12.312.txt +0 -0
/modal/{requirements → builder}/2023.12.txt +0 -0
/modal/{requirements → builder}/2024.04.txt +0 -0
/modal/{requirements → builder}/2024.10.txt +0 -0
/modal/{requirements → builder}/README.md +0 -0
{modal-1.0.3.dev10.dist-info → modal-1.2.3.dev7.dist-info}/WHEEL +0 -0
{modal-1.0.3.dev10.dist-info → modal-1.2.3.dev7.dist-info}/entry_points.txt +0 -0
{modal-1.0.3.dev10.dist-info → modal-1.2.3.dev7.dist-info}/licenses/LICENSE +0 -0
{modal-1.0.3.dev10.dist-info → modal-1.2.3.dev7.dist-info}/top_level.txt +0 -0

modal/parallel_map.py CHANGED Viewed

@@ -1,13 +1,16 @@
 # Copyright Modal Labs 2024
 import asyncio
 import enum
+import inspect
 import time
 import typing
+from asyncio import FIRST_COMPLETED
 from dataclasses import dataclass
-from typing import Any, Callable, Optional
+from typing import Any, Callable, Optional, Union
 from grpclib import Status
+import modal.exception
 from modal._runtime.execution_context import current_input_id
 from modal._utils.async_utils import (
     AsyncOrSyncIterable,
@@ -25,13 +28,14 @@ from modal._utils.async_utils import (
     warn_if_generator_is_not_consumed,
 )
 from modal._utils.blob_utils import BLOB_MAX_PARALLELISM
+from modal._utils.deprecation import deprecation_warning
 from modal._utils.function_utils import (
     ATTEMPT_TIMEOUT_GRACE_PERIOD,
     OUTPUTS_TIMEOUT,
     _create_input,
     _process_result,
 )
-from modal._utils.grpc_utils import RETRYABLE_GRPC_STATUS_CODES, RetryWarningMessage, retry_transient_errors
+from modal._utils.grpc_utils import RETRYABLE_GRPC_STATUS_CODES, Retry, RetryWarningMessage
 from modal._utils.jwt_utils import DecodedJwt
 from modal.config import logger
 from modal.retries import RetryManager
@@ -75,19 +79,293 @@ class _OutputValue:
 MAX_INPUTS_OUTSTANDING_DEFAULT = 1000
-# maximum number of inputs to send to the server in a single request
+# Maximum number of inputs to send to the server per FunctionPutInputs request
 MAP_INVOCATION_CHUNK_SIZE = 49
+SPAWN_MAP_INVOCATION_CHUNK_SIZE = 512
 if typing.TYPE_CHECKING:
     import modal.functions
+class InputPreprocessor:
+    """
+    Constructs FunctionPutInputsItem objects from the raw-input queue, and puts them in the processed-input queue.
+    """
+    def __init__(
+        self,
+        client: "modal.client._Client",
+        *,
+        raw_input_queue: _SynchronizedQueue,
+        processed_input_queue: asyncio.Queue,
+        function: "modal.functions._Function",
+        created_callback: Callable[[int], None],
+        done_callback: Callable[[], None],
+    ):
+        self.client = client
+        self.function = function
+        self.inputs_created = 0
+        self.raw_input_queue = raw_input_queue
+        self.processed_input_queue = processed_input_queue
+        self.created_callback = created_callback
+        self.done_callback = done_callback
+    async def input_iter(self):
+        while 1:
+            raw_input = await self.raw_input_queue.get()
+            if raw_input is None:  # end of input sentinel
+                break
+            yield raw_input  # args, kwargs
+    def create_input_factory(self):
+        async def create_input(argskwargs):
+            idx = self.inputs_created
+            self.inputs_created += 1
+            self.created_callback(self.inputs_created)
+            (args, kwargs) = argskwargs
+            return await _create_input(
+                args,
+                kwargs,
+                self.client.stub,
+                idx=idx,
+                function=self.function,
+            )
+        return create_input
+    async def drain_input_generator(self):
+        # Parallelize uploading blobs
+        async with aclosing(
+            async_map_ordered(self.input_iter(), self.create_input_factory(), concurrency=BLOB_MAX_PARALLELISM)
+        ) as streamer:
+            async for item in streamer:
+                await self.processed_input_queue.put(item)
+        # close queue iterator
+        await self.processed_input_queue.put(None)
+        self.done_callback()
+        yield
+class InputPumper:
+    """
+    Reads inputs from a queue of FunctionPutInputsItems, and sends them to the server.
+    """
+    def __init__(
+        self,
+        client: "modal.client._Client",
+        *,
+        input_queue: asyncio.Queue,
+        function: "modal.functions._Function",
+        function_call_id: str,
+        max_batch_size: int,
+        map_items_manager: Optional["_MapItemsManager"] = None,
+    ):
+        self.client = client
+        self.function = function
+        self.map_items_manager = map_items_manager
+        self.input_queue = input_queue
+        self.inputs_sent = 0
+        self.function_call_id = function_call_id
+        self.max_batch_size = max_batch_size
+    async def pump_inputs(self):
+        assert self.client.stub
+        async for items in queue_batch_iterator(self.input_queue, max_batch_size=self.max_batch_size):
+            # Add items to the manager. Their state will be SENDING.
+            if self.map_items_manager is not None:
+                await self.map_items_manager.add_items(items)
+            request = api_pb2.FunctionPutInputsRequest(
+                function_id=self.function.object_id,
+                inputs=items,
+                function_call_id=self.function_call_id,
+            )
+            logger.debug(
+                f"Pushing {len(items)} inputs to server. Num queued inputs awaiting"
+                f" push is {self.input_queue.qsize()}. "
+            )
+            resp = await self.client.stub.FunctionPutInputs(request, retry=self._function_inputs_retry)
+            self.inputs_sent += len(items)
+            # Change item state to WAITING_FOR_OUTPUT, and set the input_id and input_jwt which are in the response.
+            if self.map_items_manager is not None:
+                self.map_items_manager.handle_put_inputs_response(resp.inputs)
+            logger.debug(
+                f"Successfully pushed {len(items)} inputs to server. "
+                f"Num queued inputs awaiting push is {self.input_queue.qsize()}."
+            )
+        yield
+    @property
+    def _function_inputs_retry(self) -> Retry:
+        # with 8 retries we log the warning below about every 30 seconds which isn't too spammy.
+        retry_warning_message = RetryWarningMessage(
+            message=f"Warning: map progress for function {self.function._function_name} is limited."
+            " Common bottlenecks include slow iteration over results, or function backlogs.",
+            warning_interval=8,
+            errors_to_warn_for=[Status.RESOURCE_EXHAUSTED],
+        )
+        return Retry(
+            max_retries=None,
+            max_delay=PUMP_INPUTS_MAX_RETRY_DELAY,
+            additional_status_codes=[Status.RESOURCE_EXHAUSTED],
+            warning_message=retry_warning_message,
+        )
+class SyncInputPumper(InputPumper):
+    def __init__(
+        self,
+        client: "modal.client._Client",
+        *,
+        input_queue: asyncio.Queue,
+        retry_queue: TimestampPriorityQueue,
+        function: "modal.functions._Function",
+        function_call_jwt: str,
+        function_call_id: str,
+        map_items_manager: "_MapItemsManager",
+    ):
+        super().__init__(
+            client,
+            input_queue=input_queue,
+            function=function,
+            function_call_id=function_call_id,
+            max_batch_size=MAP_INVOCATION_CHUNK_SIZE,
+            map_items_manager=map_items_manager,
+        )
+        self.retry_queue = retry_queue
+        self.inputs_retried = 0
+        self.function_call_jwt = function_call_jwt
+    async def retry_inputs(self):
+        async for retriable_idxs in queue_batch_iterator(self.retry_queue, max_batch_size=self.max_batch_size):
+            # For each index, use the context in the manager to create a FunctionRetryInputsItem.
+            # This will also update the context state to RETRYING.
+            inputs: list[api_pb2.FunctionRetryInputsItem] = await self.map_items_manager.prepare_items_for_retry(
+                retriable_idxs
+            )
+            request = api_pb2.FunctionRetryInputsRequest(
+                function_call_jwt=self.function_call_jwt,
+                inputs=inputs,
+            )
+            resp = await self.client.stub.FunctionRetryInputs(request, retry=self._function_inputs_retry)
+            # Update the state to WAITING_FOR_OUTPUT, and update the input_jwt in the context
+            # to the new value in the response.
+            self.map_items_manager.handle_retry_response(resp.input_jwts)
+            logger.debug(f"Successfully pushed retry for {len(inputs)} to server.")
+            self.inputs_retried += len(inputs)
+        yield
+class AsyncInputPumper(InputPumper):
+    def __init__(
+        self,
+        client: "modal.client._Client",
+        *,
+        input_queue: asyncio.Queue,
+        function: "modal.functions._Function",
+        function_call_id: str,
+    ):
+        super().__init__(
+            client,
+            input_queue=input_queue,
+            function=function,
+            function_call_id=function_call_id,
+            max_batch_size=SPAWN_MAP_INVOCATION_CHUNK_SIZE,
+        )
+    async def pump_inputs(self):
+        async for _ in super().pump_inputs():
+            pass
+        request = api_pb2.FunctionFinishInputsRequest(
+            function_id=self.function.object_id,
+            function_call_id=self.function_call_id,
+            num_inputs=self.inputs_sent,
+        )
+        await self.client.stub.FunctionFinishInputs(request, retry=Retry(max_retries=None))
+        yield
+async def _spawn_map_invocation(
+    function: "modal.functions._Function", raw_input_queue: _SynchronizedQueue, client: "modal.client._Client"
+) -> tuple[str, int]:
+    assert client.stub
+    request = api_pb2.FunctionMapRequest(
+        function_id=function.object_id,
+        parent_input_id=current_input_id() or "",
+        function_call_type=api_pb2.FUNCTION_CALL_TYPE_MAP,
+        function_call_invocation_type=api_pb2.FUNCTION_CALL_INVOCATION_TYPE_ASYNC,
+    )
+    response: api_pb2.FunctionMapResponse = await client.stub.FunctionMap(request)
+    function_call_id = response.function_call_id
+    have_all_inputs = False
+    inputs_created = 0
+    def set_inputs_created(set_inputs_created):
+        nonlocal inputs_created
+        assert set_inputs_created is None or set_inputs_created > inputs_created
+        inputs_created = set_inputs_created
+    def set_have_all_inputs():
+        nonlocal have_all_inputs
+        have_all_inputs = True
+    input_queue: asyncio.Queue[api_pb2.FunctionPutInputsItem | None] = asyncio.Queue()
+    input_preprocessor = InputPreprocessor(
+        client=client,
+        raw_input_queue=raw_input_queue,
+        processed_input_queue=input_queue,
+        function=function,
+        created_callback=set_inputs_created,
+        done_callback=set_have_all_inputs,
+    )
+    input_pumper = AsyncInputPumper(
+        client=client,
+        input_queue=input_queue,
+        function=function,
+        function_call_id=function_call_id,
+    )
+    def log_stats():
+        logger.debug(
+            f"have_all_inputs={have_all_inputs} inputs_created={inputs_created} inputs_sent={input_pumper.inputs_sent} "
+        )
+    async def log_task():
+        while True:
+            log_stats()
+            try:
+                await asyncio.sleep(10)
+            except asyncio.CancelledError:
+                # Log final stats before exiting
+                log_stats()
+                break
+    async def consume_generator(gen):
+        async for _ in gen:
+            pass
+    log_debug_stats_task = asyncio.create_task(log_task())
+    await asyncio.gather(
+        consume_generator(input_preprocessor.drain_input_generator()),
+        consume_generator(input_pumper.pump_inputs()),
+    )
+    log_debug_stats_task.cancel()
+    await log_debug_stats_task
+    return function_call_id, inputs_created
 async def _map_invocation(
     function: "modal.functions._Function",
     raw_input_queue: _SynchronizedQueue,
     client: "modal.client._Client",
     order_outputs: bool,
     return_exceptions: bool,
+    wrap_returned_exceptions: bool,
     count_update_callback: Optional[Callable[[int, int], None]],
     function_call_invocation_type: "api_pb2.FunctionCallInvocationType.ValueType",
 ):
@@ -99,7 +377,7 @@ async def _map_invocation(
         return_exceptions=return_exceptions,
         function_call_invocation_type=function_call_invocation_type,
     )
-    response: api_pb2.FunctionMapResponse = await retry_transient_errors(client.stub.FunctionMap, request)
+    response: api_pb2.FunctionMapResponse = await client.stub.FunctionMap(request)
     function_call_id = response.function_call_id
     function_call_jwt = response.function_call_jwt
@@ -110,9 +388,8 @@ async def _map_invocation(
     max_inputs_outstanding = response.max_inputs_outstanding or MAX_INPUTS_OUTSTANDING_DEFAULT
     have_all_inputs = False
+    map_done_event = asyncio.Event()
     inputs_created = 0
-    inputs_sent = 0
-    inputs_retried = 0
     outputs_completed = 0
     outputs_received = 0
     retried_outputs = 0
@@ -122,10 +399,6 @@ async def _map_invocation(
     stale_retry_duplicates = 0
     no_context_duplicates = 0
-    def count_update():
-        if count_update_callback is not None:
-            count_update_callback(outputs_completed, inputs_created)
     retry_queue = TimestampPriorityQueue()
     completed_outputs: set[str] = set()  # Set of input_ids whose outputs are complete (expecting no more values)
     input_queue: asyncio.Queue[api_pb2.FunctionPutInputsItem | None] = asyncio.Queue()
@@ -133,109 +406,50 @@ async def _map_invocation(
         retry_policy, function_call_invocation_type, retry_queue, sync_client_retries_enabled, max_inputs_outstanding
     )
-    async def create_input(argskwargs):
-        nonlocal inputs_created
-        idx = inputs_created
-        inputs_created += 1
-        (args, kwargs) = argskwargs
-        return await _create_input(args, kwargs, client.stub, idx=idx, method_name=function._use_method_name)
-    async def input_iter():
-        while 1:
-            raw_input = await raw_input_queue.get()
-            if raw_input is None:  # end of input sentinel
-                break
-            yield raw_input  # args, kwargs
-    async def drain_input_generator():
-        nonlocal have_all_inputs
-        # Parallelize uploading blobs
-        async with aclosing(
-            async_map_ordered(input_iter(), create_input, concurrency=BLOB_MAX_PARALLELISM)
-        ) as streamer:
-            async for item in streamer:
-                await input_queue.put(item)
-        # close queue iterator
-        await input_queue.put(None)
-        have_all_inputs = True
-        yield
+    input_preprocessor = InputPreprocessor(
+        client=client,
+        raw_input_queue=raw_input_queue,
+        processed_input_queue=input_queue,
+        function=function,
+        created_callback=lambda x: update_state(set_inputs_created=x),
+        done_callback=lambda: update_state(set_have_all_inputs=True),
+    )
-    async def pump_inputs():
-        assert client.stub
-        nonlocal inputs_created, inputs_sent
-        async for items in queue_batch_iterator(input_queue, max_batch_size=MAP_INVOCATION_CHUNK_SIZE):
-            # Add items to the manager. Their state will be SENDING.
-            await map_items_manager.add_items(items)
-            request = api_pb2.FunctionPutInputsRequest(
-                function_id=function.object_id,
-                inputs=items,
-                function_call_id=function_call_id,
-            )
-            logger.debug(
-                f"Pushing {len(items)} inputs to server. Num queued inputs awaiting push is {input_queue.qsize()}."
-            )
+    input_pumper = SyncInputPumper(
+        client=client,
+        input_queue=input_queue,
+        retry_queue=retry_queue,
+        function=function,
+        map_items_manager=map_items_manager,
+        function_call_jwt=function_call_jwt,
+        function_call_id=function_call_id,
+    )
-            resp = await send_inputs(client.stub.FunctionPutInputs, request)
-            count_update()
-            inputs_sent += len(items)
-            # Change item state to WAITING_FOR_OUTPUT, and set the input_id and input_jwt which are in the response.
-            map_items_manager.handle_put_inputs_response(resp.inputs)
-            logger.debug(
-                f"Successfully pushed {len(items)} inputs to server. "
-                f"Num queued inputs awaiting push is {input_queue.qsize()}."
-            )
-        yield
+    def update_state(set_have_all_inputs=None, set_inputs_created=None, set_outputs_completed=None):
+        # This should be the only method that needs nonlocal of the following vars
+        nonlocal have_all_inputs, inputs_created, outputs_completed
+        assert set_have_all_inputs is not False  # not allowed
+        assert set_inputs_created is None or set_inputs_created > inputs_created
+        assert set_outputs_completed is None or set_outputs_completed > outputs_completed
+        if set_have_all_inputs is not None:
+            have_all_inputs = set_have_all_inputs
+        if set_inputs_created is not None:
+            inputs_created = set_inputs_created
+        if set_outputs_completed is not None:
+            outputs_completed = set_outputs_completed
-    async def retry_inputs():
-        nonlocal inputs_retried
-        async for retriable_idxs in queue_batch_iterator(retry_queue, max_batch_size=MAP_INVOCATION_CHUNK_SIZE):
-            # For each index, use the context in the manager to create a FunctionRetryInputsItem.
-            # This will also update the context state to RETRYING.
-            inputs: list[api_pb2.FunctionRetryInputsItem] = await map_items_manager.prepare_items_for_retry(
-                retriable_idxs
-            )
-            request = api_pb2.FunctionRetryInputsRequest(
-                function_call_jwt=function_call_jwt,
-                inputs=inputs,
-            )
-            resp = await send_inputs(client.stub.FunctionRetryInputs, request)
-            # Update the state to WAITING_FOR_OUTPUT, and update the input_jwt in the context
-            # to the new value in the response.
-            map_items_manager.handle_retry_response(resp.input_jwts)
-            logger.debug(f"Successfully pushed retry for {len(inputs)} to server.")
-            inputs_retried += len(inputs)
-        yield
+        if count_update_callback is not None:
+            count_update_callback(outputs_completed, inputs_created)
-    async def send_inputs(
-        fn: "modal.client.UnaryUnaryWrapper",
-        request: typing.Union[api_pb2.FunctionPutInputsRequest, api_pb2.FunctionRetryInputsRequest],
-    ) -> typing.Union[api_pb2.FunctionPutInputsResponse, api_pb2.FunctionRetryInputsResponse]:
-        # with 8 retries we log the warning below about every 30 seconds which isn't too spammy.
-        retry_warning_message = RetryWarningMessage(
-            message=f"Warning: map progress for function {function._function_name} is limited."
-            " Common bottlenecks include slow iteration over results, or function backlogs.",
-            warning_interval=8,
-            errors_to_warn_for=[Status.RESOURCE_EXHAUSTED],
-        )
-        return await retry_transient_errors(
-            fn,
-            request,
-            max_retries=None,
-            max_delay=PUMP_INPUTS_MAX_RETRY_DELAY,
-            additional_status_codes=[Status.RESOURCE_EXHAUSTED],
-            retry_warning_message=retry_warning_message,
-        )
+        if have_all_inputs and outputs_completed >= inputs_created:
+            # map is done
+            map_done_event.set()
     async def get_all_outputs():
         assert client.stub
         nonlocal \
-            inputs_created, \
             successful_completions, \
             failed_completions, \
-            outputs_completed, \
-            have_all_inputs, \
             outputs_received, \
             already_complete_duplicates, \
             no_context_duplicates, \
@@ -244,7 +458,7 @@ async def _map_invocation(
         last_entry_id = "0-0"
-        while not have_all_inputs or outputs_completed < inputs_created:
+        while not map_done_event.is_set():
             logger.debug(f"Requesting outputs. Have {outputs_completed} outputs, {inputs_created} inputs.")
             # Get input_jwts of all items in the WAITING_FOR_OUTPUT state.
             # The server uses these to track for lost inputs.
@@ -258,12 +472,29 @@ async def _map_invocation(
                 requested_at=time.time(),
                 input_jwts=input_jwts,
             )
-            response = await retry_transient_errors(
-                client.stub.FunctionGetOutputs,
-                request,
-                max_retries=20,
-                attempt_timeout=OUTPUTS_TIMEOUT + ATTEMPT_TIMEOUT_GRACE_PERIOD,
+            get_response_task = asyncio.create_task(
+                client.stub.FunctionGetOutputs(
+                    request,
+                    retry=Retry(
+                        max_retries=20,
+                        attempt_timeout=OUTPUTS_TIMEOUT + ATTEMPT_TIMEOUT_GRACE_PERIOD,
+                    ),
+                )
             )
+            map_done_task = asyncio.create_task(map_done_event.wait())
+            try:
+                done, pending = await asyncio.wait([get_response_task, map_done_task], return_when=FIRST_COMPLETED)
+                if get_response_task in done:
+                    map_done_task.cancel()
+                    response = get_response_task.result()
+                else:
+                    assert map_done_event.is_set()
+                    # map is done - no more outputs, so return early
+                    return
+            finally:
+                # clean up tasks, in case of cancellations etc.
+                get_response_task.cancel()
+                map_done_task.cancel()
             last_entry_id = response.last_entry_id
             now_seconds = int(time.time())
@@ -288,7 +519,7 @@ async def _map_invocation(
                 if output_type == _OutputType.SUCCESSFUL_COMPLETION or output_type == _OutputType.FAILED_COMPLETION:
                     completed_outputs.add(item.input_id)
-                    outputs_completed += 1
+                    update_state(set_outputs_completed=outputs_completed + 1)
                     yield item
     async def get_all_outputs_and_clean_up():
@@ -306,7 +537,7 @@ async def _map_invocation(
                 clear_on_success=True,
                 requested_at=time.time(),
             )
-            await retry_transient_errors(client.stub.FunctionGetOutputs, request)
+            await client.stub.FunctionGetOutputs(request)
             await retry_queue.close()
     async def fetch_output(item: api_pb2.FunctionGetOutputsItem) -> tuple[int, Any]:
@@ -314,7 +545,13 @@ async def _map_invocation(
             output = await _process_result(item.result, item.data_format, client.stub, client)
         except Exception as e:
             if return_exceptions:
-                output = e
+                if wrap_returned_exceptions:
+                    # Prior to client 1.0.4 there was a bug where return_exceptions would wrap
+                    # any returned exceptions in a synchronicity.UserCodeException. This adds
+                    # deprecated non-breaking compatibility bandaid for migrating away from that:
+                    output = modal.exception.UserCodeException(e)
+                else:
+                    output = e
             else:
                 raise e
         return (item.idx, output)
@@ -328,7 +565,6 @@ async def _map_invocation(
             async_map_ordered(get_all_outputs_and_clean_up(), fetch_output, concurrency=BLOB_MAX_PARALLELISM)
         ) as streamer:
             async for idx, output in streamer:
-                count_update()
                 if not order_outputs:
                     yield _OutputValue(output)
                 else:
@@ -352,8 +588,11 @@ async def _map_invocation(
         def log_stats():
             logger.debug(
                 f"Map stats: sync_client_retries_enabled={sync_client_retries_enabled} "
-                f"have_all_inputs={have_all_inputs} inputs_created={inputs_created} input_sent={inputs_sent} "
-                f"inputs_retried={inputs_retried} outputs_received={outputs_received} "
+                f"have_all_inputs={have_all_inputs} "
+                f"inputs_created={inputs_created} "
+                f"input_sent={input_pumper.inputs_sent} "
+                f"inputs_retried={input_pumper.inputs_retried} "
+                f"outputs_received={outputs_received} "
                 f"successful_completions={successful_completions} failed_completions={failed_completions} "
                 f"no_context_duplicates={no_context_duplicates} old_retry_duplicates={stale_retry_duplicates} "
                 f"already_complete_duplicates={already_complete_duplicates} "
@@ -372,21 +611,388 @@ async def _map_invocation(
     log_debug_stats_task = asyncio.create_task(log_debug_stats())
     async with aclosing(
-        async_merge(drain_input_generator(), pump_inputs(), poll_outputs(), retry_inputs())
+        async_merge(
+            input_preprocessor.drain_input_generator(),
+            input_pumper.pump_inputs(),
+            input_pumper.retry_inputs(),
+            poll_outputs(),
+        )
     ) as streamer:
         async for response in streamer:
-            if response is not None:
+            if response is not None:  # type: ignore[unreachable]
                 yield response.value
     log_debug_stats_task.cancel()
     await log_debug_stats_task
+async def _map_invocation_inputplane(
+    function: "modal.functions._Function",
+    raw_input_queue: _SynchronizedQueue,
+    client: "modal.client._Client",
+    order_outputs: bool,
+    return_exceptions: bool,
+    wrap_returned_exceptions: bool,
+    count_update_callback: Optional[Callable[[int, int], None]],
+) -> typing.AsyncGenerator[Any, None]:
+    """Input-plane implementation of a function map invocation.
+    This is analogous to `_map_invocation`, but instead of the control-plane
+    `FunctionMap` / `FunctionPutInputs` / `FunctionGetOutputs` RPCs it speaks
+    the input-plane protocol consisting of `MapStartOrContinue`, `MapAwait`, and `MapCheckInputs`.
+    """
+    assert function._input_plane_url, "_map_invocation_inputplane should only be used for input-plane backed functions"
+    input_plane_stub = await client.get_stub(function._input_plane_url)
+    # Required for _create_input.
+    assert client.stub, "Client must be hydrated with a stub for _map_invocation_inputplane"
+    # ------------------------------------------------------------
+    # Invocation-wide state
+    # ------------------------------------------------------------
+    have_all_inputs = False
+    map_done_event = asyncio.Event()
+    inputs_created = 0
+    outputs_completed = 0
+    successful_completions = 0
+    failed_completions = 0
+    no_context_duplicates = 0
+    stale_retry_duplicates = 0
+    already_complete_duplicates = 0
+    retried_outputs = 0
+    input_queue_size = 0
+    last_entry_id = ""
+    # The input-plane server returns this after the first request.
+    map_token = None
+    map_token_received = asyncio.Event()
+    # Single priority queue that holds *both* fresh inputs (timestamp == now)
+    # and future retries (timestamp > now).
+    queue: TimestampPriorityQueue[api_pb2.MapStartOrContinueItem] = TimestampPriorityQueue()
+    # Maximum number of inputs that may be in-flight (the server sends this in
+    # the first response – fall back to the default if we never receive it for
+    # any reason).
+    max_inputs_outstanding = MAX_INPUTS_OUTSTANDING_DEFAULT
+    # Set a default retry policy to construct an instance of _MapItemsManager.
+    # We'll update the retry policy with the actual user-specified retry policy
+    # from the server in the first MapStartOrContinue response.
+    retry_policy = api_pb2.FunctionRetryPolicy(
+        retries=0,
+        initial_delay_ms=1000,
+        max_delay_ms=1000,
+        backoff_coefficient=1.0,
+    )
+    map_items_manager = _MapItemsManager(
+        retry_policy=retry_policy,
+        function_call_invocation_type=api_pb2.FUNCTION_CALL_INVOCATION_TYPE_SYNC,
+        retry_queue=queue,
+        sync_client_retries_enabled=True,
+        max_inputs_outstanding=MAX_INPUTS_OUTSTANDING_DEFAULT,
+        is_input_plane_instance=True,
+    )
+    def update_counters(
+        created_delta: int = 0, completed_delta: int = 0, set_have_all_inputs: Union[bool, None] = None
+    ):
+        nonlocal inputs_created, outputs_completed, have_all_inputs
+        if created_delta:
+            inputs_created += created_delta
+        if completed_delta:
+            outputs_completed += completed_delta
+        if set_have_all_inputs is not None:
+            have_all_inputs = set_have_all_inputs
+        if count_update_callback is not None:
+            count_update_callback(outputs_completed, inputs_created)
+        if have_all_inputs and outputs_completed >= inputs_created:
+            map_done_event.set()
+    async def create_input(argskwargs):
+        idx = inputs_created + 1  # 1-indexed map call idx
+        update_counters(created_delta=1)
+        (args, kwargs) = argskwargs
+        put_item: api_pb2.FunctionPutInputsItem = await _create_input(
+            args,
+            kwargs,
+            client.stub,
+            idx=idx,
+            function=function,
+        )
+        return api_pb2.MapStartOrContinueItem(input=put_item)
+    async def input_iter():
+        while True:
+            raw_input = await raw_input_queue.get()
+            if raw_input is None:  # end of input sentinel
+                break
+            yield raw_input  # args, kwargs
+    async def drain_input_generator():
+        async with aclosing(
+            async_map_ordered(input_iter(), create_input, concurrency=BLOB_MAX_PARALLELISM)
+        ) as streamer:
+            async for q_item in streamer:
+                await queue.put(time.time(), q_item)
+        # All inputs have been read.
+        update_counters(set_have_all_inputs=True)
+        yield
+    async def pump_inputs():
+        nonlocal map_token, max_inputs_outstanding
+        async for batch in queue_batch_iterator(queue, max_batch_size=MAP_INVOCATION_CHUNK_SIZE):
+            # Convert the queued items into the proto format expected by the RPC.
+            request_items: list[api_pb2.MapStartOrContinueItem] = [
+                api_pb2.MapStartOrContinueItem(input=qi.input, attempt_token=qi.attempt_token) for qi in batch
+            ]
+            await map_items_manager.add_items_inputplane(request_items)
+            # Build request
+            request = api_pb2.MapStartOrContinueRequest(
+                function_id=function.object_id,
+                map_token=map_token,
+                parent_input_id=current_input_id() or "",
+                items=request_items,
+            )
+            metadata = await client.get_input_plane_metadata(function._input_plane_region)
+            response: api_pb2.MapStartOrContinueResponse = await input_plane_stub.MapStartOrContinue(
+                request,
+                retry=Retry(
+                    additional_status_codes=[Status.RESOURCE_EXHAUSTED],
+                    max_delay=PUMP_INPUTS_MAX_RETRY_DELAY,
+                    max_retries=None,
+                ),
+                metadata=metadata,
+            )
+            # match response items to the corresponding request item index
+            response_items_idx_tuple = [
+                (request_items[idx].input.idx, attempt_token)
+                for idx, attempt_token in enumerate(response.attempt_tokens)
+            ]
+            map_items_manager.handle_put_continue_response(response_items_idx_tuple)
+            # Set the function call id and actual retry policy with the data from the first response.
+            # This conditional is skipped for subsequent iterations of this for-loop.
+            if map_token is None:
+                map_token = response.map_token
+                map_token_received.set()
+                max_inputs_outstanding = response.max_inputs_outstanding or MAX_INPUTS_OUTSTANDING_DEFAULT
+                map_items_manager.set_retry_policy(response.retry_policy)
+                # Update the retry policy for the first batch of inputs.
+                # Subsequent batches will have the correct user-specified retry policy
+                # set by the updated _MapItemsManager.
+                map_items_manager.update_items_retry_policy(response.retry_policy)
+        yield
+    async def check_lost_inputs():
+        nonlocal last_entry_id  # shared with get_all_outputs
+        try:
+            while not map_done_event.is_set():
+                if map_token is None:
+                    await map_token_received.wait()
+                    continue
+                sleep_task = asyncio.create_task(asyncio.sleep(1))
+                map_done_task = asyncio.create_task(map_done_event.wait())
+                done, _ = await asyncio.wait([sleep_task, map_done_task], return_when=FIRST_COMPLETED)
+                if map_done_task in done:
+                    break
+                # check_inputs = [(idx, attempt_token), ...]
+                check_inputs = map_items_manager.get_input_idxs_waiting_for_output()
+                attempt_tokens = [attempt_token for _, attempt_token in check_inputs]
+                request = api_pb2.MapCheckInputsRequest(
+                    last_entry_id=last_entry_id,
+                    timeout=0,  # Non-blocking read
+                    attempt_tokens=attempt_tokens,
+                )
+                metadata = await client.get_input_plane_metadata(function._input_plane_region)
+                response: api_pb2.MapCheckInputsResponse = await input_plane_stub.MapCheckInputs(
+                    request, metadata=metadata
+                )
+                check_inputs_response = [
+                    (check_inputs[resp_idx][0], response.lost[resp_idx]) for resp_idx, _ in enumerate(response.lost)
+                ]
+                # check_inputs_response = [(idx, lost: bool), ...]
+                await map_items_manager.handle_check_inputs_response(check_inputs_response)
+            yield
+        except asyncio.CancelledError:
+            pass
+    async def get_all_outputs():
+        nonlocal \
+            successful_completions, \
+            failed_completions, \
+            no_context_duplicates, \
+            stale_retry_duplicates, \
+            already_complete_duplicates, \
+            retried_outputs, \
+            last_entry_id
+        while not map_done_event.is_set():
+            if map_token is None:
+                await map_token_received.wait()
+                continue
+            request = api_pb2.MapAwaitRequest(
+                map_token=map_token,
+                last_entry_id=last_entry_id,
+                requested_at=time.time(),
+                timeout=OUTPUTS_TIMEOUT,
+            )
+            metadata = await client.get_input_plane_metadata(function._input_plane_region)
+            get_response_task = asyncio.create_task(
+                input_plane_stub.MapAwait(
+                    request,
+                    retry=Retry(
+                        max_retries=20,
+                        attempt_timeout=OUTPUTS_TIMEOUT + ATTEMPT_TIMEOUT_GRACE_PERIOD,
+                    ),
+                    metadata=metadata,
+                )
+            )
+            map_done_task = asyncio.create_task(map_done_event.wait())
+            try:
+                done, pending = await asyncio.wait([get_response_task, map_done_task], return_when=FIRST_COMPLETED)
+                if get_response_task in done:
+                    map_done_task.cancel()
+                    response = get_response_task.result()
+                else:
+                    assert map_done_event.is_set()
+                    # map is done - no more outputs, so return early
+                    return
+            finally:
+                # clean up tasks, in case of cancellations etc.
+                get_response_task.cancel()
+                map_done_task.cancel()
+            last_entry_id = response.last_entry_id
+            for output_item in response.outputs:
+                output_type = await map_items_manager.handle_get_outputs_response(output_item, int(time.time()))
+                if output_type == _OutputType.SUCCESSFUL_COMPLETION:
+                    successful_completions += 1
+                elif output_type == _OutputType.FAILED_COMPLETION:
+                    failed_completions += 1
+                elif output_type == _OutputType.RETRYING:
+                    retried_outputs += 1
+                elif output_type == _OutputType.NO_CONTEXT_DUPLICATE:
+                    no_context_duplicates += 1
+                elif output_type == _OutputType.STALE_RETRY_DUPLICATE:
+                    stale_retry_duplicates += 1
+                elif output_type == _OutputType.ALREADY_COMPLETE_DUPLICATE:
+                    already_complete_duplicates += 1
+                else:
+                    raise Exception(f"Unknown output type: {output_type}")
+                if output_type == _OutputType.SUCCESSFUL_COMPLETION or output_type == _OutputType.FAILED_COMPLETION:
+                    update_counters(completed_delta=1)
+                    yield output_item
+    async def get_all_outputs_and_clean_up():
+        try:
+            async with aclosing(get_all_outputs()) as stream:
+                async for item in stream:
+                    yield item
+        finally:
+            await queue.close()
+            pass
+    async def fetch_output(item: api_pb2.FunctionGetOutputsItem) -> tuple[int, Any]:
+        try:
+            output = await _process_result(item.result, item.data_format, input_plane_stub, client)
+        except Exception as e:
+            if return_exceptions:
+                if wrap_returned_exceptions:
+                    # Prior to client 1.0.4 there was a bug where return_exceptions would wrap
+                    # any returned exceptions in a synchronicity.UserCodeException. This adds
+                    # deprecated non-breaking compatibility bandaid for migrating away from that:
+                    output = modal.exception.UserCodeException(e)
+                else:
+                    output = e
+            else:
+                raise e
+        return (item.idx, output)
+    async def poll_outputs():
+        # map to store out-of-order outputs received
+        received_outputs = {}
+        output_idx = 1  # 1-indexed map call idx
+        async with aclosing(
+            async_map_ordered(get_all_outputs_and_clean_up(), fetch_output, concurrency=BLOB_MAX_PARALLELISM)
+        ) as streamer:
+            async for idx, output in streamer:
+                if not order_outputs:
+                    yield _OutputValue(output)
+                else:
+                    # hold on to outputs for function maps, so we can reorder them correctly.
+                    received_outputs[idx] = output
+                    while True:
+                        if output_idx not in received_outputs:
+                            # we haven't received the output for the current index yet.
+                            # stop returning outputs to the caller and instead wait for
+                            # the next output to arrive from the server.
+                            break
+                        output = received_outputs.pop(output_idx)
+                        yield _OutputValue(output)
+                        output_idx += 1
+        assert len(received_outputs) == 0
+    async def log_debug_stats():
+        def log_stats():
+            logger.debug(
+                f"Map stats:\nsuccessful_completions={successful_completions} failed_completions={failed_completions} "
+                f"no_context_duplicates={no_context_duplicates} stale_retry_duplicates={stale_retry_duplicates} "
+                f"already_complete_duplicates={already_complete_duplicates} retried_outputs={retried_outputs} "
+                f"map_token={map_token} max_inputs_outstanding={max_inputs_outstanding} "
+                f"map_items_manager_size={len(map_items_manager)} input_queue_size={input_queue_size}"
+            )
+        while True:
+            log_stats()
+            try:
+                await asyncio.sleep(10)
+            except asyncio.CancelledError:
+                # Log final stats before exiting
+                log_stats()
+                break
+    log_task = asyncio.create_task(log_debug_stats())
+    async with aclosing(
+        async_merge(drain_input_generator(), pump_inputs(), poll_outputs(), check_lost_inputs())
+    ) as merged:
+        async for maybe_output in merged:
+            if maybe_output is not None:  # ignore None sentinels
+                yield maybe_output.value
+    log_task.cancel()
 async def _map_helper(
     self: "modal.functions.Function",
     async_input_gen: typing.AsyncGenerator[Any, None],
     kwargs={},  # any extra keyword arguments for the function
     order_outputs: bool = True,  # return outputs in order
     return_exceptions: bool = False,  # propagate exceptions (False) or aggregate them in the results list (True)
+    wrap_returned_exceptions: bool = True,
 ) -> typing.AsyncGenerator[Any, None]:
     """Core implementation that supports `_map_async()`, `_starmap_async()` and `_for_each_async()`.
@@ -399,9 +1005,8 @@ async def _map_helper(
     We could make this explicit as an improvement or even let users decide what they
     prefer: throughput (prioritize queueing inputs) or latency (prioritize yielding results)
     """
     raw_input_queue: Any = SynchronizedQueue()  # type: ignore
-    raw_input_queue.init()
+    await raw_input_queue.init.aio()
     async def feed_queue():
         async with aclosing(async_input_gen) as streamer:
@@ -417,12 +1022,41 @@ async def _map_helper(
     # synchronicity-wrapped, since they accept executable code in the form of iterators that we don't want to run inside
     # the synchronicity thread. Instead, we delegate to `._map()` with a safer Queue as input.
     async with aclosing(
-        async_merge(self._map.aio(raw_input_queue, order_outputs, return_exceptions), feed_queue())
+        async_merge(
+            self._map.aio(raw_input_queue, order_outputs, return_exceptions, wrap_returned_exceptions), feed_queue()
+        )
     ) as map_output_stream:
         async for output in map_output_stream:
             yield output
+def _maybe_warn_about_exceptions(func_name: str, return_exceptions: bool, wrap_returned_exceptions: bool):
+    if return_exceptions and wrap_returned_exceptions:
+        deprecation_warning(
+            (2025, 6, 27),
+            (
+                f"Function.{func_name} currently leaks an internal exception wrapping type "
+                "(modal.exceptions.UserCodeException) when `return_exceptions=True` is set. "
+                "In the future, this will change, and the underlying exception will be returned directly.\n"
+                "To opt into the future behavior and silence this warning, add `wrap_returned_exceptions=False`:\n\n"
+                f"    f.{func_name}(..., return_exceptions=True, wrap_returned_exceptions=False)"
+            ),
+        )
+def _invoked_from_sync_wrapper() -> bool:
+    """Check whether the calling function was called from a sync wrapper."""
+    # This is temporary: we only need it to avoind double-firing the wrap_returned_exceptions warning.
+    # (We don't want to push the warning lower in the stack beacuse then we can't attribute to the user's code.)
+    try:
+        frame = inspect.currentframe()
+        caller_function_name = frame.f_back.f_back.f_code.co_name
+        # Embeds some assumptions about how the current calling stack works, but this is just temporary.
+        return caller_function_name == "asend"
+    except Exception:
+        return False
 @warn_if_generator_is_not_consumed(function_name="Function.map.aio")
 async def _map_async(
     self: "modal.functions.Function",
@@ -432,10 +1066,18 @@ async def _map_async(
     kwargs={},  # any extra keyword arguments for the function
     order_outputs: bool = True,  # return outputs in order
     return_exceptions: bool = False,  # propagate exceptions (False) or aggregate them in the results list (True)
+    wrap_returned_exceptions: bool = True,  # wrap returned exceptions in modal.exception.UserCodeException
 ) -> typing.AsyncGenerator[Any, None]:
+    if not _invoked_from_sync_wrapper():
+        _maybe_warn_about_exceptions("map.aio", return_exceptions, wrap_returned_exceptions)
     async_input_gen = async_zip(*[sync_or_async_iter(it) for it in input_iterators])
     async for output in _map_helper(
-        self, async_input_gen, kwargs=kwargs, order_outputs=order_outputs, return_exceptions=return_exceptions
+        self,
+        async_input_gen,
+        kwargs=kwargs,
+        order_outputs=order_outputs,
+        return_exceptions=return_exceptions,
+        wrap_returned_exceptions=wrap_returned_exceptions,
     ):
         yield output
@@ -448,13 +1090,17 @@ async def _starmap_async(
     kwargs={},
     order_outputs: bool = True,
     return_exceptions: bool = False,
+    wrap_returned_exceptions: bool = True,
 ) -> typing.AsyncIterable[Any]:
+    if not _invoked_from_sync_wrapper():
+        _maybe_warn_about_exceptions("starmap.aio", return_exceptions, wrap_returned_exceptions)
     async for output in _map_helper(
         self,
         sync_or_async_iter(input_iterator),
         kwargs=kwargs,
         order_outputs=order_outputs,
         return_exceptions=return_exceptions,
+        wrap_returned_exceptions=wrap_returned_exceptions,
     ):
         yield output
@@ -464,7 +1110,12 @@ async def _for_each_async(self, *input_iterators, kwargs={}, ignore_exceptions:
     # rather than iterating over the result
     async_input_gen = async_zip(*[sync_or_async_iter(it) for it in input_iterators])
     async for _ in _map_helper(
-        self, async_input_gen, kwargs=kwargs, order_outputs=False, return_exceptions=ignore_exceptions
+        self,
+        async_input_gen,
+        kwargs=kwargs,
+        order_outputs=False,
+        return_exceptions=ignore_exceptions,
+        wrap_returned_exceptions=False,
     ):
         pass
@@ -476,6 +1127,7 @@ def _map_sync(
     kwargs={},  # any extra keyword arguments for the function
     order_outputs: bool = True,  # return outputs in order
     return_exceptions: bool = False,  # propagate exceptions (False) or aggregate them in the results list (True)
+    wrap_returned_exceptions: bool = True,
 ) -> AsyncOrSyncIterable:
     """Parallel map over a set of inputs.
@@ -513,10 +1165,16 @@ def _map_sync(
         print(list(my_func.map(range(3), return_exceptions=True)))
     ```
     """
+    _maybe_warn_about_exceptions("map", return_exceptions, wrap_returned_exceptions)
     return AsyncOrSyncIterable(
         _map_async(
-            self, *input_iterators, kwargs=kwargs, order_outputs=order_outputs, return_exceptions=return_exceptions
+            self,
+            *input_iterators,
+            kwargs=kwargs,
+            order_outputs=order_outputs,
+            return_exceptions=return_exceptions,
+            wrap_returned_exceptions=wrap_returned_exceptions,
         ),
         nested_async_message=(
             "You can't iter(Function.map()) from an async function. Use async for ... in Function.map.aio() instead."
@@ -524,6 +1182,56 @@ def _map_sync(
     )
+async def _experimental_spawn_map_async(self, *input_iterators, kwargs={}) -> "modal.functions._FunctionCall":
+    async_input_gen = async_zip(*[sync_or_async_iter(it) for it in input_iterators])
+    return await _spawn_map_helper(self, async_input_gen, kwargs)
+async def _spawn_map_helper(
+    self: "modal.functions.Function", async_input_gen, kwargs={}
+) -> "modal.functions._FunctionCall":
+    raw_input_queue: Any = SynchronizedQueue()  # type: ignore
+    await raw_input_queue.init.aio()
+    async def feed_queue():
+        async with aclosing(async_input_gen) as streamer:
+            async for args in streamer:
+                await raw_input_queue.put.aio((args, kwargs))
+        await raw_input_queue.put.aio(None)  # end-of-input sentinel
+    fc, _ = await asyncio.gather(self._spawn_map.aio(raw_input_queue), feed_queue())
+    return fc
+def _experimental_spawn_map_sync(self, *input_iterators, kwargs={}) -> "modal.functions._FunctionCall":
+    """mdmd:hidden
+    Spawn parallel execution over a set of inputs, returning as soon as the inputs are created.
+    Unlike `modal.Function.map`, this method does not block on completion of the remote execution but
+    returns a `modal.FunctionCall` object that can be used to poll status and retrieve results later.
+    Takes one iterator argument per argument in the function being mapped over.
+    Example:
+    ```python
+    @app.function()
+    def my_func(a, b):
+        return a ** b
+    @app.local_entrypoint()
+    def main():
+        fc = my_func.spawn_map([1, 2], [3, 4])
+    ```
+    """
+    return run_coroutine_in_temporary_event_loop(
+        _experimental_spawn_map_async(self, *input_iterators, kwargs=kwargs),
+        "You can't run Function.spawn_map() from an async function. Use Function.spawn_map.aio() instead.",
+    )
 async def _spawn_map_async(self, *input_iterators, kwargs={}) -> None:
     """This runs in an event loop on the main thread. It consumes inputs from the input iterators and creates async
     function calls for each.
@@ -569,7 +1277,7 @@ def _spawn_map_sync(self, *input_iterators, kwargs={}) -> None:
     return run_coroutine_in_temporary_event_loop(
         _spawn_map_async(self, *input_iterators, kwargs=kwargs),
-        "You can't run Function.spawn_map() from an async function. Use Function.map.aio() instead.",
+        "You can't run Function.spawn_map() from an async function. Use Function.spawn_map.aio() instead.",
     )
@@ -596,6 +1304,7 @@ def _starmap_sync(
     kwargs={},
     order_outputs: bool = True,
     return_exceptions: bool = False,
+    wrap_returned_exceptions: bool = True,
 ) -> AsyncOrSyncIterable:
     """Like `map`, but spreads arguments over multiple function arguments.
@@ -613,9 +1322,15 @@ def _starmap_sync(
         assert list(my_func.starmap([(1, 2), (3, 4)])) == [3, 7]
     ```
     """
+    _maybe_warn_about_exceptions("starmap", return_exceptions, wrap_returned_exceptions)
     return AsyncOrSyncIterable(
         _starmap_async(
-            self, input_iterator, kwargs=kwargs, order_outputs=order_outputs, return_exceptions=return_exceptions
+            self,
+            input_iterator,
+            kwargs=kwargs,
+            order_outputs=order_outputs,
+            return_exceptions=return_exceptions,
+            wrap_returned_exceptions=wrap_returned_exceptions,
         ),
         nested_async_message=(
             "You can't `iter(Function.starmap())` from an async function. "
@@ -653,12 +1368,19 @@ class _MapItemContext:
     sync_client_retries_enabled: bool
     # Both these futures are strings. Omitting generic type because
     # it causes an error when running `inv protoc type-stubs`.
+    # Unused. But important, input_id is not set for inputplane invocations.
     input_id: asyncio.Future
     input_jwt: asyncio.Future
     previous_input_jwt: Optional[str]
     _event_loop: asyncio.AbstractEventLoop
-    def __init__(self, input: api_pb2.FunctionInput, retry_manager: RetryManager, sync_client_retries_enabled: bool):
+    def __init__(
+        self,
+        input: api_pb2.FunctionInput,
+        retry_manager: RetryManager,
+        sync_client_retries_enabled: bool,
+        is_input_plane_instance: bool = False,
+    ):
         self.state = _MapItemState.SENDING
         self.input = input
         self.retry_manager = retry_manager
@@ -669,7 +1391,22 @@ class _MapItemContext:
         # a race condition where we could receive outputs before we have
         # recorded the input ID and JWT in `pending_outputs`.
         self.input_jwt = self._event_loop.create_future()
+        # Unused. But important, this is not set for inputplane invocations.
         self.input_id = self._event_loop.create_future()
+        self._is_input_plane_instance = is_input_plane_instance
+    def handle_map_start_or_continue_response(self, attempt_token: str):
+        if not self.input_jwt.done():
+            self.input_jwt.set_result(attempt_token)
+        else:
+            # Create a new future for the next value
+            self.input_jwt = asyncio.Future()
+            self.input_jwt.set_result(attempt_token)
+        # Set state to WAITING_FOR_OUTPUT only if current state is SENDING. If state is
+        # RETRYING, WAITING_TO_RETRY, or COMPLETE, then we already got the output.
+        if self.state == _MapItemState.SENDING:
+            self.state = _MapItemState.WAITING_FOR_OUTPUT
     def handle_put_inputs_response(self, item: api_pb2.FunctionPutInputsResponseItem):
         self.input_jwt.set_result(item.input_jwt)
@@ -692,10 +1429,11 @@ class _MapItemContext:
         Return True if input state was changed to COMPLETE, otherwise False.
         """
         # If the item is already complete, this is a duplicate output and can be ignored.
         if self.state == _MapItemState.COMPLETE:
             logger.debug(
                 f"Received output for input marked as complete. Must be duplicate, so ignoring. "
-                f"idx={item.idx} input_id={item.input_id}, retry_count={item.retry_count}"
+                f"idx={item.idx} input_id={item.input_id} retry_count={item.retry_count}"
             )
             return _OutputType.ALREADY_COMPLETE_DUPLICATE
         # If the item's retry count doesn't match our retry count, this is probably a duplicate of an old output.
@@ -737,12 +1475,17 @@ class _MapItemContext:
             delay_ms = 0
         # None means the maximum number of retries has been reached, so output the error
-        if delay_ms is None:
+        if delay_ms is None or item.result.status == api_pb2.GenericResult.GENERIC_STATUS_TERMINATED:
             self.state = _MapItemState.COMPLETE
             return _OutputType.FAILED_COMPLETION
         self.state = _MapItemState.WAITING_TO_RETRY
-        await retry_queue.put(now_seconds + (delay_ms / 1000), item.idx)
+        if self._is_input_plane_instance:
+            retry_item = await self.create_map_start_or_continue_item(item.idx)
+            await retry_queue.put(now_seconds + delay_ms / 1_000, retry_item)
+        else:
+            await retry_queue.put(now_seconds + delay_ms / 1_000, item.idx)
         return _OutputType.RETRYING
@@ -757,10 +1500,23 @@ class _MapItemContext:
             retry_count=self.retry_manager.retry_count,
         )
+    def set_retry_policy(self, retry_policy: api_pb2.FunctionRetryPolicy):
+        self.retry_manager = RetryManager(retry_policy)
     def handle_retry_response(self, input_jwt: str):
         self.input_jwt.set_result(input_jwt)
         self.state = _MapItemState.WAITING_FOR_OUTPUT
+    async def create_map_start_or_continue_item(self, idx: int) -> api_pb2.MapStartOrContinueItem:
+        attempt_token = await self.input_jwt
+        return api_pb2.MapStartOrContinueItem(
+            input=api_pb2.FunctionPutInputsItem(
+                input=self.input,
+                idx=idx,
+            ),
+            attempt_token=attempt_token,
+        )
 class _MapItemsManager:
     def __init__(
@@ -770,6 +1526,7 @@ class _MapItemsManager:
         retry_queue: TimestampPriorityQueue,
         sync_client_retries_enabled: bool,
         max_inputs_outstanding: int,
+        is_input_plane_instance: bool = False,
     ):
         self._retry_policy = retry_policy
         self.function_call_invocation_type = function_call_invocation_type
@@ -780,6 +1537,10 @@ class _MapItemsManager:
         self._inputs_outstanding = asyncio.BoundedSemaphore(max_inputs_outstanding)
         self._item_context: dict[int, _MapItemContext] = {}
         self._sync_client_retries_enabled = sync_client_retries_enabled
+        self._is_input_plane_instance = is_input_plane_instance
+    def set_retry_policy(self, retry_policy: api_pb2.FunctionRetryPolicy):
+        self._retry_policy = retry_policy
     async def add_items(self, items: list[api_pb2.FunctionPutInputsItem]):
         for item in items:
@@ -792,9 +1553,28 @@ class _MapItemsManager:
                 sync_client_retries_enabled=self._sync_client_retries_enabled,
             )
+    async def add_items_inputplane(self, items: list[api_pb2.MapStartOrContinueItem]):
+        for item in items:
+            # acquire semaphore to limit the number of inputs in progress
+            # (either queued to be sent, waiting for completion, or retrying)
+            if item.attempt_token != "":  # if it is a retry item
+                self._item_context[item.input.idx].state = _MapItemState.SENDING
+                continue
+            await self._inputs_outstanding.acquire()
+            self._item_context[item.input.idx] = _MapItemContext(
+                input=item.input.input,
+                retry_manager=RetryManager(self._retry_policy),
+                sync_client_retries_enabled=self._sync_client_retries_enabled,
+                is_input_plane_instance=self._is_input_plane_instance,
+            )
     async def prepare_items_for_retry(self, retriable_idxs: list[int]) -> list[api_pb2.FunctionRetryInputsItem]:
         return [await self._item_context[idx].prepare_item_for_retry() for idx in retriable_idxs]
+    def update_items_retry_policy(self, retry_policy: api_pb2.FunctionRetryPolicy):
+        for ctx in self._item_context.values():
+            ctx.set_retry_policy(retry_policy)
     def get_input_jwts_waiting_for_output(self) -> list[str]:
         """
         Returns a list of input_jwts for inputs that are waiting for output.
@@ -806,6 +1586,17 @@ class _MapItemsManager:
             if ctx.state == _MapItemState.WAITING_FOR_OUTPUT and ctx.input_jwt.done()
         ]
+    def get_input_idxs_waiting_for_output(self) -> list[tuple[int, str]]:
+        """
+        Returns a list of input_idxs for inputs that are waiting for output.
+        """
+        # Idx doesn't need a future because it is set by client and not server.
+        return [
+            (idx, ctx.input_jwt.result())
+            for idx, ctx in self._item_context.items()
+            if ctx.state == _MapItemState.WAITING_FOR_OUTPUT and ctx.input_jwt.done()
+        ]
     def _remove_item(self, item_idx: int):
         del self._item_context[item_idx]
         self._inputs_outstanding.release()
@@ -813,6 +1604,18 @@ class _MapItemsManager:
     def get_item_context(self, item_idx: int) -> _MapItemContext:
         return self._item_context.get(item_idx)
+    def handle_put_continue_response(
+        self,
+        items: list[tuple[int, str]],  # idx, input_jwt
+    ):
+        for index, item in items:
+            ctx = self._item_context.get(index, None)
+            # If the context is None, then get_all_outputs() has already received a successful
+            # output, and deleted the context. This happens if FunctionGetOutputs completes
+            # before MapStartOrContinueResponse is received.
+            if ctx is not None:
+                ctx.handle_map_start_or_continue_response(item)
     def handle_put_inputs_response(self, items: list[api_pb2.FunctionPutInputsResponseItem]):
         for item in items:
             ctx = self._item_context.get(item.idx, None)
@@ -832,6 +1635,16 @@ class _MapItemsManager:
             if ctx is not None:
                 ctx.handle_retry_response(input_jwt)
+    async def handle_check_inputs_response(self, response: list[tuple[int, bool]]):
+        for idx, lost in response:
+            ctx = self._item_context.get(idx, None)
+            if ctx is not None:
+                if lost:
+                    ctx.state = _MapItemState.WAITING_TO_RETRY
+                    retry_item = await ctx.create_map_start_or_continue_item(idx)
+                    _ = ctx.retry_manager.get_delay_ms()  # increment retry count but instant retry for lost inputs
+                    await self._retry_queue.put(time.time(), retry_item)
     async def handle_get_outputs_response(self, item: api_pb2.FunctionGetOutputsItem, now_seconds: int) -> _OutputType:
         ctx = self._item_context.get(item.idx, None)
         if ctx is None:

modal 1.0.3.dev10__py3-none-any.whl → 1.2.3.dev7__py3-none-any.whl

Potentially problematic release.

modal 1.0.3.dev10py3-none-any.whl → 1.2.3.dev7py3-none-any.whl