PyPI - modal - Versions diffs - 1.0.3.dev10__py3-none-any.whl → 1.2.3.dev7__py3-none-any.whl - Mend

modal 1.0.3.dev10py3-none-any.whl → 1.2.3.dev7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of modal might be problematic. Click here for more details.

Files changed (160) hide show

modal/__init__.py +0 -2
modal/__main__.py +3 -4
modal/_billing.py +80 -0
modal/_clustered_functions.py +7 -3
modal/_clustered_functions.pyi +15 -3
modal/_container_entrypoint.py +51 -69
modal/_functions.py +508 -240
modal/_grpc_client.py +171 -0
modal/_load_context.py +105 -0
modal/_object.py +81 -21
modal/_output.py +58 -45
modal/_partial_function.py +48 -73
modal/_pty.py +7 -3
modal/_resolver.py +26 -46
modal/_runtime/asgi.py +4 -3
modal/_runtime/container_io_manager.py +358 -220
modal/_runtime/container_io_manager.pyi +296 -101
modal/_runtime/execution_context.py +18 -2
modal/_runtime/execution_context.pyi +64 -7
modal/_runtime/gpu_memory_snapshot.py +262 -57
modal/_runtime/user_code_imports.py +28 -58
modal/_serialization.py +90 -6
modal/_traceback.py +42 -1
modal/_tunnel.pyi +380 -12
modal/_utils/async_utils.py +84 -29
modal/_utils/auth_token_manager.py +111 -0
modal/_utils/blob_utils.py +181 -58
modal/_utils/deprecation.py +19 -0
modal/_utils/function_utils.py +91 -47
modal/_utils/grpc_utils.py +89 -66
modal/_utils/mount_utils.py +26 -1
modal/_utils/name_utils.py +17 -3
modal/_utils/task_command_router_client.py +536 -0
modal/_utils/time_utils.py +34 -6
modal/app.py +256 -88
modal/app.pyi +909 -92
modal/billing.py +5 -0
modal/builder/2025.06.txt +18 -0
modal/builder/PREVIEW.txt +18 -0
modal/builder/base-images.json +58 -0
modal/cli/_download.py +19 -3
modal/cli/_traceback.py +3 -2
modal/cli/app.py +4 -4
modal/cli/cluster.py +15 -7
modal/cli/config.py +5 -3
modal/cli/container.py +7 -6
modal/cli/dict.py +22 -16
modal/cli/entry_point.py +12 -5
modal/cli/environment.py +5 -4
modal/cli/import_refs.py +3 -3
modal/cli/launch.py +102 -5
modal/cli/network_file_system.py +11 -12
modal/cli/profile.py +3 -2
modal/cli/programs/launch_instance_ssh.py +94 -0
modal/cli/programs/run_jupyter.py +1 -1
modal/cli/programs/run_marimo.py +95 -0
modal/cli/programs/vscode.py +1 -1
modal/cli/queues.py +57 -26
modal/cli/run.py +91 -23
modal/cli/secret.py +48 -22
modal/cli/token.py +7 -8
modal/cli/utils.py +4 -7
modal/cli/volume.py +31 -25
modal/client.py +15 -85
modal/client.pyi +183 -62
modal/cloud_bucket_mount.py +5 -3
modal/cloud_bucket_mount.pyi +197 -5
modal/cls.py +200 -126
modal/cls.pyi +446 -68
modal/config.py +29 -11
modal/container_process.py +319 -19
modal/container_process.pyi +190 -20
modal/dict.py +290 -71
modal/dict.pyi +835 -83
modal/environments.py +15 -27
modal/environments.pyi +46 -24
modal/exception.py +14 -2
modal/experimental/__init__.py +194 -40
modal/experimental/flash.py +618 -0
modal/experimental/flash.pyi +380 -0
modal/experimental/ipython.py +11 -7
modal/file_io.py +29 -36
modal/file_io.pyi +251 -53
modal/file_pattern_matcher.py +56 -16
modal/functions.pyi +673 -92
modal/gpu.py +1 -1
modal/image.py +528 -176
modal/image.pyi +1572 -145
modal/io_streams.py +458 -128
modal/io_streams.pyi +433 -52
modal/mount.py +216 -151
modal/mount.pyi +225 -78
modal/network_file_system.py +45 -62
modal/network_file_system.pyi +277 -56
modal/object.pyi +93 -17
modal/parallel_map.py +942 -129
modal/parallel_map.pyi +294 -15
modal/partial_function.py +0 -2
modal/partial_function.pyi +234 -19
modal/proxy.py +17 -8
modal/proxy.pyi +36 -3
modal/queue.py +270 -65
modal/queue.pyi +817 -57
modal/runner.py +115 -101
modal/runner.pyi +205 -49
modal/sandbox.py +512 -136
modal/sandbox.pyi +845 -111
modal/schedule.py +1 -1
modal/secret.py +300 -70
modal/secret.pyi +589 -34
modal/serving.py +7 -11
modal/serving.pyi +7 -8
modal/snapshot.py +11 -8
modal/snapshot.pyi +25 -4
modal/token_flow.py +4 -4
modal/token_flow.pyi +28 -8
modal/volume.py +416 -158
modal/volume.pyi +1117 -121
{modal-1.0.3.dev10.dist-info → modal-1.2.3.dev7.dist-info}/METADATA +10 -9
modal-1.2.3.dev7.dist-info/RECORD +195 -0
modal_docs/mdmd/mdmd.py +17 -4
modal_proto/api.proto +534 -79
modal_proto/api_grpc.py +337 -1
modal_proto/api_pb2.py +1522 -968
modal_proto/api_pb2.pyi +1619 -134
modal_proto/api_pb2_grpc.py +699 -4
modal_proto/api_pb2_grpc.pyi +226 -14
modal_proto/modal_api_grpc.py +175 -154
modal_proto/sandbox_router.proto +145 -0
modal_proto/sandbox_router_grpc.py +105 -0
modal_proto/sandbox_router_pb2.py +149 -0
modal_proto/sandbox_router_pb2.pyi +333 -0
modal_proto/sandbox_router_pb2_grpc.py +203 -0
modal_proto/sandbox_router_pb2_grpc.pyi +75 -0
modal_proto/task_command_router.proto +144 -0
modal_proto/task_command_router_grpc.py +105 -0
modal_proto/task_command_router_pb2.py +149 -0
modal_proto/task_command_router_pb2.pyi +333 -0
modal_proto/task_command_router_pb2_grpc.py +203 -0
modal_proto/task_command_router_pb2_grpc.pyi +75 -0
modal_version/__init__.py +1 -1
modal/requirements/PREVIEW.txt +0 -16
modal/requirements/base-images.json +0 -26
modal-1.0.3.dev10.dist-info/RECORD +0 -179
modal_proto/modal_options_grpc.py +0 -3
modal_proto/options.proto +0 -19
modal_proto/options_grpc.py +0 -3
modal_proto/options_pb2.py +0 -35
modal_proto/options_pb2.pyi +0 -20
modal_proto/options_pb2_grpc.py +0 -4
modal_proto/options_pb2_grpc.pyi +0 -7
/modal/{requirements → builder}/2023.12.312.txt +0 -0
/modal/{requirements → builder}/2023.12.txt +0 -0
/modal/{requirements → builder}/2024.04.txt +0 -0
/modal/{requirements → builder}/2024.10.txt +0 -0
/modal/{requirements → builder}/README.md +0 -0
{modal-1.0.3.dev10.dist-info → modal-1.2.3.dev7.dist-info}/WHEEL +0 -0
{modal-1.0.3.dev10.dist-info → modal-1.2.3.dev7.dist-info}/entry_points.txt +0 -0
{modal-1.0.3.dev10.dist-info → modal-1.2.3.dev7.dist-info}/licenses/LICENSE +0 -0
{modal-1.0.3.dev10.dist-info → modal-1.2.3.dev7.dist-info}/top_level.txt +0 -0

modal/_runtime/container_io_manager.py CHANGED Viewed

@@ -16,6 +16,7 @@ from typing import (
     Any,
     Callable,
     ClassVar,
+    Generator,
     Optional,
     cast,
 )
@@ -24,22 +25,25 @@ from google.protobuf.empty_pb2 import Empty
 from grpclib import Status
 from synchronicity.async_wrap import asynccontextmanager
-import modal_proto.api_pb2
 from modal._runtime import gpu_memory_snapshot
-from modal._serialization import deserialize, serialize, serialize_data_format
-from modal._traceback import extract_traceback, print_exception
-from modal._utils.async_utils import TaskContext, asyncify, synchronize_api, synchronizer
-from modal._utils.blob_utils import MAX_OBJECT_SIZE_BYTES, blob_download, blob_upload
+from modal._serialization import (
+    deserialize_data_format,
+    pickle_exception,
+    pickle_traceback,
+    serialize_data_format,
+)
+from modal._traceback import print_exception
+from modal._utils.async_utils import TaskContext, aclosing, asyncify, synchronize_api, synchronizer
+from modal._utils.blob_utils import MAX_OBJECT_SIZE_BYTES, blob_download, blob_upload, format_blob_data
 from modal._utils.function_utils import _stream_function_call_data
-from modal._utils.grpc_utils import retry_transient_errors
+from modal._utils.grpc_utils import Retry
 from modal._utils.package_utils import parse_major_minor_version
 from modal.client import HEARTBEAT_INTERVAL, HEARTBEAT_TIMEOUT, _Client
 from modal.config import config, logger
-from modal.exception import ClientClosed, InputCancellation, InvalidError, SerializationError
+from modal.exception import ClientClosed, InputCancellation, InvalidError
 from modal_proto import api_pb2
 if TYPE_CHECKING:
-    import modal._runtime.asgi
     import modal._runtime.user_code_imports
@@ -66,6 +70,7 @@ class IOContext:
     input_ids: list[str]
     retry_counts: list[int]
     function_call_ids: list[str]
+    attempt_tokens: list[str]
     function_inputs: list[api_pb2.FunctionInput]
     finalized_function: "modal._runtime.user_code_imports.FinalizedFunction"
@@ -77,6 +82,7 @@ class IOContext:
         input_ids: list[str],
         retry_counts: list[int],
         function_call_ids: list[str],
+        attempt_tokens: list[str],
         finalized_function: "modal._runtime.user_code_imports.FinalizedFunction",
         function_inputs: list[api_pb2.FunctionInput],
         is_batched: bool,
@@ -85,6 +91,7 @@ class IOContext:
         self.input_ids = input_ids
         self.retry_counts = retry_counts
         self.function_call_ids = function_call_ids
+        self.attempt_tokens = attempt_tokens
         self.finalized_function = finalized_function
         self.function_inputs = function_inputs
         self._is_batched = is_batched
@@ -95,11 +102,11 @@ class IOContext:
         cls,
         client: _Client,
         finalized_functions: dict[str, "modal._runtime.user_code_imports.FinalizedFunction"],
-        inputs: list[tuple[str, int, str, api_pb2.FunctionInput]],
+        inputs: list[tuple[str, int, str, str, api_pb2.FunctionInput]],
         is_batched: bool,
     ) -> "IOContext":
         assert len(inputs) >= 1 if is_batched else len(inputs) == 1
-        input_ids, retry_counts, function_call_ids, function_inputs = zip(*inputs)
+        input_ids, retry_counts, function_call_ids, attempt_tokens, function_inputs = zip(*inputs)
         async def _populate_input_blobs(client: _Client, input: api_pb2.FunctionInput) -> api_pb2.FunctionInput:
             # If we got a pointer to a blob, download it from S3.
@@ -121,6 +128,7 @@ class IOContext:
             cast(list[str], input_ids),
             cast(list[int], retry_counts),
             cast(list[str], function_call_ids),
+            cast(list[str], attempt_tokens),
             finalized_function,
             cast(list[api_pb2.FunctionInput], function_inputs),
             is_batched,
@@ -148,9 +156,13 @@ class IOContext:
         # deserializing here instead of the constructor
         # to make sure we handle user exceptions properly
         # and don't retry
-        deserialized_args = [
-            deserialize(input.args, self._client) if input.args else ((), {}) for input in self.function_inputs
-        ]
+        deserialized_args = []
+        for input in self.function_inputs:
+            if input.args:
+                data_format = input.data_format
+                deserialized_args.append(deserialize_data_format(input.args, data_format, self._client))
+            else:
+                deserialized_args.append(((), {}))
         if not self._is_batched:
             return deserialized_args[0]
@@ -188,25 +200,229 @@ class IOContext:
         }
         return (), formatted_kwargs
-    def call_finalized_function(self) -> Any:
+    def _generator_output_format(self) -> "api_pb2.DataFormat.ValueType":
+        return self._determine_output_format(self.function_inputs[0].data_format)
+    def _prepare_batch_output(self, data: Any) -> list[Any]:
+        # validate that output is valid for batch
+        if self._is_batched:
+            # assert data is list etc.
+            function_name = self.finalized_function.callable.__name__
+            if not isinstance(data, list):
+                raise InvalidError(f"Output of batched function {function_name} must be a list.")
+            if len(data) != len(self.input_ids):
+                raise InvalidError(
+                    f"Output of batched function {function_name} must be a list of equal length as its inputs."
+                )
+            return data
+        else:
+            return [data]
+    def call_function_sync(self) -> list[Any]:
         logger.debug(f"Starting input {self.input_ids}")
         args, kwargs = self._args_and_kwargs()
-        res = self.finalized_function.callable(*args, **kwargs)
+        expected_value_or_values = self.finalized_function.callable(*args, **kwargs)
+        if (
+            inspect.iscoroutine(expected_value_or_values)
+            or inspect.isgenerator(expected_value_or_values)
+            or inspect.isasyncgen(expected_value_or_values)
+        ):
+            raise InvalidError(
+                f"Sync (non-generator) function return value of type {type(expected_value_or_values)}."
+                " You might need to use @app.function(..., is_generator=True)."
+            )
         logger.debug(f"Finished input {self.input_ids}")
-        return res
+        return self._prepare_batch_output(expected_value_or_values)
-    def validate_output_data(self, data: Any) -> list[Any]:
-        if not self._is_batched:
-            return [data]
-        function_name = self.finalized_function.callable.__name__
-        if not isinstance(data, list):
-            raise InvalidError(f"Output of batched function {function_name} must be a list.")
-        if len(data) != len(self.input_ids):
+    async def call_function_async(self) -> list[Any]:
+        logger.debug(f"Starting input {self.input_ids}")
+        args, kwargs = self._args_and_kwargs()
+        expected_coro = self.finalized_function.callable(*args, **kwargs)
+        if (
+            not inspect.iscoroutine(expected_coro)
+            or inspect.isgenerator(expected_coro)
+            or inspect.isasyncgen(expected_coro)
+        ):
             raise InvalidError(
-                f"Output of batched function {function_name} must be a list of equal length as its inputs."
+                f"Async (non-generator) function returned value of type {type(expected_coro)}"
+                " You might need to use @app.function(..., is_generator=True)."
             )
-        return data
+        value = await expected_coro
+        logger.debug(f"Finished input {self.input_ids}")
+        return self._prepare_batch_output(value)
+    def call_generator_sync(self) -> Generator[Any, None, None]:
+        assert not self._is_batched
+        logger.debug(f"Starting generator input {self.input_ids}")
+        args, kwargs = self._args_and_kwargs()
+        expected_gen = self.finalized_function.callable(*args, **kwargs)
+        if not inspect.isgenerator(expected_gen):
+            raise InvalidError(f"Generator function returned value of type {type(expected_gen)}")
+        for result in expected_gen:
+            yield result
+        logger.debug(f"Finished generator input {self.input_ids}")
+    async def call_generator_async(self) -> AsyncGenerator[Any, None]:
+        assert not self._is_batched
+        logger.debug(f"Starting generator input {self.input_ids}")
+        args, kwargs = self._args_and_kwargs()
+        expected_async_gen = self.finalized_function.callable(*args, **kwargs)
+        if not inspect.isasyncgen(expected_async_gen):
+            raise InvalidError(f"Async generator function returned value of type {type(expected_async_gen)}")
+        async with aclosing(expected_async_gen) as gen:
+            async for result in gen:
+                yield result
+        logger.debug(f"Finished generator input {self.input_ids}")
+    async def output_items_cancellation(self, started_at: float):
+        output_created_at = time.time()
+        # Create terminated outputs for these inputs to signal that the cancellations have been completed.
+        return [
+            api_pb2.FunctionPutOutputsItem(
+                input_id=input_id,
+                input_started_at=started_at,
+                output_created_at=output_created_at,
+                result=api_pb2.GenericResult(status=api_pb2.GenericResult.GENERIC_STATUS_TERMINATED),
+                retry_count=retry_count,
+            )
+            for input_id, retry_count in zip(self.input_ids, self.retry_counts)
+        ]
+    def _determine_output_format(self, input_format: "api_pb2.DataFormat.ValueType") -> "api_pb2.DataFormat.ValueType":
+        if input_format in self.finalized_function.supported_output_formats:
+            return input_format
+        elif self.finalized_function.supported_output_formats:
+            # This branch would normally be hit when calling a restricted_output function with Pickle input
+            # but we enforce cbor output at function definition level. In the future we might send the intended
+            # output format along with the input to make this disitinction in the calling client instead
+            logger.debug(
+                f"Got an input with format {input_format}, but can only produce output"
+                f" using formats {self.finalized_function.supported_output_formats}"
+            )
+            return self.finalized_function.supported_output_formats[0]
+        else:
+            # This should never happen since self.finalized_function.supported_output_formats should be
+            # populated with defaults in case it's empty, log a warning
+            logger.warning(f"Got an input with format {input_format}, but the function has no defined output formats")
+            return api_pb2.DATA_FORMAT_PICKLE
+    async def output_items_exception(
+        self, started_at: float, task_id: str, exc: BaseException
+    ) -> list[api_pb2.FunctionPutOutputsItem]:
+        # Note: we're not pickling the traceback since it contains
+        # local references that means we can't unpickle it. We *are*
+        # pickling the exception, which may have some issues (there
+        # was an earlier note about it that it might not be possible
+        # to unpickle it in some cases). Let's watch out for issues.
+        repr_exc = repr(exc)
+        if len(repr_exc) >= MAX_OBJECT_SIZE_BYTES:
+            # We prevent large exception messages to avoid
+            # unhandled exceptions causing inf loops
+            # and just send backa trimmed version
+            trimmed_bytes = len(repr_exc) - MAX_OBJECT_SIZE_BYTES - 1000
+            repr_exc = repr_exc[: MAX_OBJECT_SIZE_BYTES - 1000]
+            repr_exc = f"{repr_exc}...\nTrimmed {trimmed_bytes} bytes from original exception"
+        data: bytes = pickle_exception(exc)
+        data_result_part = await format_blob_data(data, self._client.stub)
+        serialized_tb, tb_line_cache = pickle_traceback(exc, task_id)
+        # Failure outputs for when input exceptions occur
+        def data_format_specific_output(input_format: "api_pb2.DataFormat.ValueType") -> dict:
+            output_format = self._determine_output_format(input_format)
+            if output_format == api_pb2.DATA_FORMAT_PICKLE:
+                return {
+                    "data_format": output_format,
+                    "result": api_pb2.GenericResult(
+                        status=api_pb2.GenericResult.GENERIC_STATUS_FAILURE,
+                        exception=repr_exc,
+                        traceback=traceback.format_exc(),
+                        serialized_tb=serialized_tb,
+                        tb_line_cache=tb_line_cache,
+                        **data_result_part,
+                    ),
+                }
+            else:
+                return {
+                    "data_format": output_format,
+                    "result": api_pb2.GenericResult(
+                        status=api_pb2.GenericResult.GENERIC_STATUS_FAILURE,
+                        exception=repr_exc,
+                        traceback=traceback.format_exc(),
+                    ),
+                }
+        # all inputs in the batch get the same failure:
+        output_created_at = time.time()
+        return [
+            api_pb2.FunctionPutOutputsItem(
+                input_id=input_id,
+                input_started_at=started_at,
+                output_created_at=output_created_at,
+                retry_count=retry_count,
+                **data_format_specific_output(function_input.data_format),
+            )
+            for input_id, retry_count, function_input in zip(self.input_ids, self.retry_counts, self.function_inputs)
+        ]
+    def output_items_generator_done(self, started_at: float, items_total: int) -> list[api_pb2.FunctionPutOutputsItem]:
+        assert not self._is_batched, "generators are not supported with batched inputs"
+        assert len(self.function_inputs) == 1, "generators are expected to have 1 input"
+        # Serialize and format the data
+        serialized_bytes = serialize_data_format(
+            api_pb2.GeneratorDone(items_total=items_total), data_format=api_pb2.DATA_FORMAT_GENERATOR_DONE
+        )
+        return [
+            api_pb2.FunctionPutOutputsItem(
+                input_id=self.input_ids[0],
+                input_started_at=started_at,
+                output_created_at=time.time(),
+                result=api_pb2.GenericResult(
+                    status=api_pb2.GenericResult.GENERIC_STATUS_SUCCESS,
+                    data=serialized_bytes,
+                ),
+                data_format=api_pb2.DATA_FORMAT_GENERATOR_DONE,
+                retry_count=self.retry_counts[0],
+            )
+        ]
+    async def output_items(self, started_at: float, data: list[Any]) -> list[api_pb2.FunctionPutOutputsItem]:
+        output_created_at = time.time()
+        # Process all items concurrently and create output items directly
+        async def package_output(
+            item: Any, input_id: str, retry_count: int, input_format: "api_pb2.DataFormat.ValueType"
+        ) -> api_pb2.FunctionPutOutputsItem:
+            output_format = self._determine_output_format(input_format)
+            serialized_bytes = serialize_data_format(item, data_format=output_format)
+            formatted = await format_blob_data(serialized_bytes, self._client.stub)
+            # Create the result
+            result = api_pb2.GenericResult(
+                status=api_pb2.GenericResult.GENERIC_STATUS_SUCCESS,
+                **formatted,
+            )
+            return api_pb2.FunctionPutOutputsItem(
+                input_id=input_id,
+                input_started_at=started_at,
+                output_created_at=output_created_at,
+                result=result,
+                data_format=output_format,
+                retry_count=retry_count,
+            )
+        # Process all items concurrently
+        return await asyncio.gather(
+            *[
+                package_output(item, input_id, retry_count, function_input.data_format)
+                for item, input_id, retry_count, function_input in zip(
+                    data, self.input_ids, self.retry_counts, self.function_inputs
+                )
+            ]
+        )
 class InputSlots:
@@ -267,6 +483,7 @@ class _ContainerIOManager:
     app_id: str
     function_def: api_pb2.Function
     checkpoint_id: Optional[str]
+    input_plane_server_url: Optional[str]
     calls_completed: int
     total_user_time: float
@@ -290,7 +507,6 @@ class _ContainerIOManager:
     _client: _Client
-    _GENERATOR_STOP_SENTINEL: ClassVar[Sentinel] = Sentinel()
     _singleton: ClassVar[Optional["_ContainerIOManager"]] = None
     def _init(self, container_args: api_pb2.ContainerArguments, client: _Client):
@@ -300,6 +516,8 @@ class _ContainerIOManager:
         self.function_def = container_args.function_def
         self.checkpoint_id = container_args.checkpoint_id or None
+        self.input_plane_server_url = container_args.input_plane_server_url
         self.calls_completed = 0
         self.total_user_time = 0.0
         self.current_input_id = None
@@ -323,6 +541,7 @@ class _ContainerIOManager:
         self._heartbeat_loop = None
         self._heartbeat_condition = None
         self._waiting_for_memory_snapshot = False
+        self._cuda_checkpoint_session = None
         self._is_interactivity_enabled = False
         self._fetching_inputs = True
@@ -404,8 +623,8 @@ class _ContainerIOManager:
                 await self.heartbeat_condition.wait()
             request = api_pb2.ContainerHeartbeatRequest(canceled_inputs_return_outputs_v2=True)
-            response = await retry_transient_errors(
-                self._client.stub.ContainerHeartbeat, request, attempt_timeout=HEARTBEAT_TIMEOUT
+            response = await self._client.stub.ContainerHeartbeat(
+                request, retry=Retry(attempt_timeout=HEARTBEAT_TIMEOUT)
             )
         if response.HasField("cancel_input_event"):
@@ -452,10 +671,9 @@ class _ContainerIOManager:
                     target_concurrency=self._target_concurrency,
                     max_concurrency=self._max_concurrency,
                 )
-                resp = await retry_transient_errors(
-                    self._client.stub.FunctionGetDynamicConcurrency,
+                resp = await self._client.stub.FunctionGetDynamicConcurrency(
                     request,
-                    attempt_timeout=DYNAMIC_CONCURRENCY_TIMEOUT_SECS,
+                    retry=Retry(attempt_timeout=DYNAMIC_CONCURRENCY_TIMEOUT_SECS),
                 )
                 if resp.concurrency != self._input_slots.value and not self._stop_concurrency_loop:
                     logger.debug(f"Dynamic concurrency set from {self._input_slots.value} to {resp.concurrency}")
@@ -466,27 +684,23 @@ class _ContainerIOManager:
             await asyncio.sleep(DYNAMIC_CONCURRENCY_INTERVAL_SECS)
-    @synchronizer.no_io_translation
-    def serialize_data_format(self, obj: Any, data_format: int) -> bytes:
-        return serialize_data_format(obj, data_format)
-    async def format_blob_data(self, data: bytes) -> dict[str, Any]:
-        return (
-            {"data_blob_id": await blob_upload(data, self._client.stub)}
-            if len(data) > MAX_OBJECT_SIZE_BYTES
-            else {"data": data}
-        )
-    async def get_data_in(self, function_call_id: str) -> AsyncIterator[Any]:
+    async def get_data_in(self, function_call_id: str, attempt_token: Optional[str]) -> AsyncIterator[Any]:
         """Read from the `data_in` stream of a function call."""
-        async for data in _stream_function_call_data(self._client, function_call_id, "data_in"):
+        stub = self._client.stub
+        if self.input_plane_server_url:
+            stub = await self._client.get_stub(self.input_plane_server_url)
+        async for data in _stream_function_call_data(
+            self._client, stub, function_call_id, variant="data_in", attempt_token=attempt_token
+        ):
             yield data
     async def put_data_out(
         self,
         function_call_id: str,
+        attempt_token: str,
         start_index: int,
-        data_format: int,
+        data_format: "api_pb2.DataFormat.ValueType",
         serialized_messages: list[Any],
     ) -> None:
         """Put data onto the `data_out` stream of a function call.
@@ -505,35 +719,60 @@ class _ContainerIOManager:
             data_chunks.append(chunk)
         req = api_pb2.FunctionCallPutDataRequest(function_call_id=function_call_id, data_chunks=data_chunks)
-        await retry_transient_errors(self._client.stub.FunctionCallPutDataOut, req)
-    async def generator_output_task(self, function_call_id: str, data_format: int, message_rx: asyncio.Queue) -> None:
-        """Task that feeds generator outputs into a function call's `data_out` stream."""
-        index = 1
-        received_sentinel = False
-        while not received_sentinel:
-            message = await message_rx.get()
-            if message is self._GENERATOR_STOP_SENTINEL:
-                break
-            # ASGI 'http.response.start' and 'http.response.body' msgs are observed to be separated by 1ms.
-            # If we don't sleep here for 1ms we end up with an extra call to .put_data_out().
-            if index == 1:
-                await asyncio.sleep(0.001)
-            serialized_messages = [serialize_data_format(message, data_format)]
-            total_size = len(serialized_messages[0]) + 512
-            while total_size < 16 * 1024 * 1024:  # 16 MiB, maximum size in a single message
-                try:
-                    message = message_rx.get_nowait()
-                except asyncio.QueueEmpty:
-                    break
-                if message is self._GENERATOR_STOP_SENTINEL:
-                    received_sentinel = True
+        if attempt_token:
+            req.attempt_token = attempt_token  # oneof clears function_call_id.
+        if self.input_plane_server_url:
+            stub = await self._client.get_stub(self.input_plane_server_url)
+            await stub.FunctionCallPutDataOut(req)
+        else:
+            await self._client.stub.FunctionCallPutDataOut(req)
+    @asynccontextmanager
+    async def generator_output_sender(
+        self,
+        function_call_id: str,
+        attempt_token: str,
+        data_format: "api_pb2.DataFormat.ValueType",
+        message_rx: asyncio.Queue,
+    ) -> AsyncGenerator[None, None]:
+        """Runs background task that feeds generator outputs into a function call's `data_out` stream."""
+        GENERATOR_STOP_SENTINEL = Sentinel()
+        async def generator_output_task():
+            index = 1
+            received_sentinel = False
+            while not received_sentinel:
+                message = await message_rx.get()
+                if message is GENERATOR_STOP_SENTINEL:
                     break
-                else:
-                    serialized_messages.append(serialize_data_format(message, data_format))
-                    total_size += len(serialized_messages[-1]) + 512  # 512 bytes for estimated framing overhead
-            await self.put_data_out(function_call_id, index, data_format, serialized_messages)
-            index += len(serialized_messages)
+                # ASGI 'http.response.start' and 'http.response.body' msgs are observed to be separated by 1ms.
+                # If we don't sleep here for 1ms we end up with an extra call to .put_data_out().
+                if index == 1:
+                    await asyncio.sleep(0.001)
+                serialized_messages = [serialize_data_format(message, data_format)]
+                total_size = len(serialized_messages[0]) + 512
+                while total_size < 16 * 1024 * 1024:  # 16 MiB, maximum size in a single message
+                    try:
+                        message = message_rx.get_nowait()
+                    except asyncio.QueueEmpty:
+                        break
+                    if message is GENERATOR_STOP_SENTINEL:
+                        received_sentinel = True
+                        break
+                    else:
+                        serialized_messages.append(serialize_data_format(message, data_format))
+                        total_size += len(serialized_messages[-1]) + 512  # 512 bytes for estimated framing overhead
+                await self.put_data_out(function_call_id, attempt_token, index, data_format, serialized_messages)
+                index += len(serialized_messages)
+        task = asyncio.create_task(generator_output_task())
+        try:
+            yield
+        finally:
+            # gracefully stop the task after all current inputs have been sent
+            await message_rx.put(GENERATOR_STOP_SENTINEL)
+            await task
     async def _queue_create(self, size: int) -> asyncio.Queue:
         """Create a queue, on the synchronicity event loop (needed on Python 3.8 and 3.9)."""
@@ -560,7 +799,7 @@ class _ContainerIOManager:
         self,
         batch_max_size: int,
         batch_wait_ms: int,
-    ) -> AsyncIterator[list[tuple[str, int, str, api_pb2.FunctionInput]]]:
+    ) -> AsyncIterator[list[tuple[str, int, str, str, api_pb2.FunctionInput]]]:
         request = api_pb2.FunctionGetInputsRequest(function_id=self.function_id)
         iteration = 0
         while self._fetching_inputs:
@@ -575,9 +814,7 @@ class _ContainerIOManager:
             try:
                 # If number of active inputs is at max queue size, this will block.
                 iteration += 1
-                response: api_pb2.FunctionGetInputsResponse = await retry_transient_errors(
-                    self._client.stub.FunctionGetInputs, request
-                )
+                response: api_pb2.FunctionGetInputsResponse = await self._client.stub.FunctionGetInputs(request)
                 if response.rate_limit_sleep_duration:
                     logger.info(
@@ -595,7 +832,9 @@ class _ContainerIOManager:
                         if item.kill_switch:
                             logger.debug(f"Task {self.task_id} input kill signal input.")
                             return
-                        inputs.append((item.input_id, item.retry_count, item.function_call_id, item.input))
+                        inputs.append(
+                            (item.input_id, item.retry_count, item.function_call_id, item.attempt_token, item.input)
+                        )
                         if item.input.final_input:
                             if request.batch_max_size > 0:
                                 logger.debug(f"Task {self.task_id} Final input not expected in batch input stream")
@@ -636,62 +875,24 @@ class _ContainerIOManager:
                 self.current_input_id, self.current_input_started_at = io_context.input_ids[0], time.time()
                 yield io_context
                 self.current_input_id, self.current_input_started_at = (None, None)
             # collect all active input slots, meaning all inputs have wrapped up.
             await self._input_slots.close()
-    @synchronizer.no_io_translation
-    async def _push_outputs(
-        self,
-        io_context: IOContext,
-        started_at: float,
-        data_format: "modal_proto.api_pb2.DataFormat.ValueType",
-        results: list[api_pb2.GenericResult],
-    ) -> None:
-        output_created_at = time.time()
-        outputs = [
-            api_pb2.FunctionPutOutputsItem(
-                input_id=input_id,
-                input_started_at=started_at,
-                output_created_at=output_created_at,
-                result=result,
-                data_format=data_format,
-                retry_count=retry_count,
-            )
-            for input_id, retry_count, result in zip(io_context.input_ids, io_context.retry_counts, results)
-        ]
+    async def _send_outputs(self, started_at: float, outputs: list[api_pb2.FunctionPutOutputsItem]) -> None:
+        """Send pre-built output items with retry and chunking."""
         # There are multiple outputs for a single IOContext in the case of @modal.batched.
         # Limit the batch size to 20 to stay within message size limits and buffer size limits.
         output_batch_size = 20
         for i in range(0, len(outputs), output_batch_size):
-            await retry_transient_errors(
-                self._client.stub.FunctionPutOutputs,
+            await self._client.stub.FunctionPutOutputs(
                 api_pb2.FunctionPutOutputsRequest(outputs=outputs[i : i + output_batch_size]),
-                additional_status_codes=[Status.RESOURCE_EXHAUSTED],
-                max_retries=None,  # Retry indefinitely, trying every 1s.
+                retry=Retry(
+                    additional_status_codes=[Status.RESOURCE_EXHAUSTED],
+                    max_retries=None,  # Retry indefinitely, trying every 1s.
+                ),
             )
-    def serialize_exception(self, exc: BaseException) -> bytes:
-        try:
-            return serialize(exc)
-        except Exception as serialization_exc:
-            # We can't always serialize exceptions.
-            err = f"Failed to serialize exception {exc} of type {type(exc)}: {serialization_exc}"
-            logger.info(err)
-            return serialize(SerializationError(err))
-    def serialize_traceback(self, exc: BaseException) -> tuple[Optional[bytes], Optional[bytes]]:
-        serialized_tb, tb_line_cache = None, None
-        try:
-            tb_dict, line_cache = extract_traceback(exc, self.task_id)
-            serialized_tb = serialize(tb_dict)
-            tb_line_cache = serialize(line_cache)
-        except Exception:
-            logger.info("Failed to serialize exception traceback.")
-        return serialized_tb, tb_line_cache
+        input_ids = [output.input_id for output in outputs]
+        self.exit_context(started_at, input_ids)
     @asynccontextmanager
     async def handle_user_exception(self) -> AsyncGenerator[None, None]:
@@ -714,11 +915,14 @@ class _ContainerIOManager:
             # Since this is on a different thread, sys.exc_info() can't find the exception in the stack.
             print_exception(type(exc), exc, exc.__traceback__)
-            serialized_tb, tb_line_cache = self.serialize_traceback(exc)
+            serialized_tb, tb_line_cache = pickle_traceback(exc, self.task_id)
+            data_or_blob = await format_blob_data(pickle_exception(exc), self._client.stub)
             result = api_pb2.GenericResult(
                 status=api_pb2.GenericResult.GENERIC_STATUS_FAILURE,
-                data=self.serialize_exception(exc),
+                **data_or_blob,
+                # TODO: there is no way to communicate the data format here
+                #   since it usually goes on the envelope outside of GenericResult
                 exception=repr(exc),
                 traceback="".join(traceback.format_exception(type(exc), exc, exc.__traceback__)),
                 serialized_tb=serialized_tb or b"",
@@ -726,7 +930,7 @@ class _ContainerIOManager:
             )
             req = api_pb2.TaskResultRequest(result=result)
-            await retry_transient_errors(self._client.stub.TaskResult, req)
+            await self._client.stub.TaskResult(req)
             # Shut down the task gracefully
             raise UserException()
@@ -748,18 +952,8 @@ class _ContainerIOManager:
             #    for the yield. Typically on event loop shutdown
             raise
         except (InputCancellation, asyncio.CancelledError):
-            # Create terminated outputs for these inputs to signal that the cancellations have been completed.
-            results = [
-                api_pb2.GenericResult(status=api_pb2.GenericResult.GENERIC_STATUS_TERMINATED)
-                for _ in io_context.input_ids
-            ]
-            await self._push_outputs(
-                io_context=io_context,
-                started_at=started_at,
-                data_format=api_pb2.DATA_FORMAT_PICKLE,
-                results=results,
-            )
-            self.exit_context(started_at, io_context.input_ids)
+            outputs = await io_context.output_items_cancellation(started_at)
+            await self._send_outputs(started_at, outputs)
             logger.warning(f"Successfully canceled input {io_context.input_ids}")
             return
         except BaseException as exc:
@@ -769,44 +963,8 @@ class _ContainerIOManager:
             # print exception so it's logged
             print_exception(*sys.exc_info())
-            serialized_tb, tb_line_cache = self.serialize_traceback(exc)
-            # Note: we're not serializing the traceback since it contains
-            # local references that means we can't unpickle it. We *are*
-            # serializing the exception, which may have some issues (there
-            # was an earlier note about it that it might not be possible
-            # to unpickle it in some cases). Let's watch out for issues.
-            repr_exc = repr(exc)
-            if len(repr_exc) >= MAX_OBJECT_SIZE_BYTES:
-                # We prevent large exception messages to avoid
-                # unhandled exceptions causing inf loops
-                # and just send backa trimmed version
-                trimmed_bytes = len(repr_exc) - MAX_OBJECT_SIZE_BYTES - 1000
-                repr_exc = repr_exc[: MAX_OBJECT_SIZE_BYTES - 1000]
-                repr_exc = f"{repr_exc}...\nTrimmed {trimmed_bytes} bytes from original exception"
-            data: bytes = self.serialize_exception(exc) or b""
-            data_result_part = await self.format_blob_data(data)
-            results = [
-                api_pb2.GenericResult(
-                    status=api_pb2.GenericResult.GENERIC_STATUS_FAILURE,
-                    exception=repr_exc,
-                    traceback=traceback.format_exc(),
-                    serialized_tb=serialized_tb or b"",
-                    tb_line_cache=tb_line_cache or b"",
-                    **data_result_part,
-                )
-                for _ in io_context.input_ids
-            ]
-            await self._push_outputs(
-                io_context=io_context,
-                started_at=started_at,
-                data_format=api_pb2.DATA_FORMAT_PICKLE,
-                results=results,
-            )
-            self.exit_context(started_at, io_context.input_ids)
+            outputs = await io_context.output_items_exception(started_at, self.task_id, exc)
+            await self._send_outputs(started_at, outputs)
     def exit_context(self, started_at, input_ids: list[str]):
         self.total_user_time += time.time() - started_at
@@ -817,32 +975,17 @@ class _ContainerIOManager:
         self._input_slots.release()
+    # skip inspection of user-generated output_data for synchronicity input translation
     @synchronizer.no_io_translation
     async def push_outputs(
         self,
         io_context: IOContext,
         started_at: float,
-        data: Any,
-        data_format: "modal_proto.api_pb2.DataFormat.ValueType",
+        output_data: list[Any],  # one per output
     ) -> None:
-        data = io_context.validate_output_data(data)
-        formatted_data = await asyncio.gather(
-            *[self.format_blob_data(self.serialize_data_format(d, data_format)) for d in data]
-        )
-        results = [
-            api_pb2.GenericResult(
-                status=api_pb2.GenericResult.GENERIC_STATUS_SUCCESS,
-                **d,
-            )
-            for d in formatted_data
-        ]
-        await self._push_outputs(
-            io_context=io_context,
-            started_at=started_at,
-            data_format=data_format,
-            results=results,
-        )
-        self.exit_context(started_at, io_context.input_ids)
+        # The standard output encoding+sending method for successful function outputs
+        outputs = await io_context.output_items(started_at, output_data)
+        await self._send_outputs(started_at, outputs)
     async def memory_restore(self) -> None:
         # Busy-wait for restore. `/__modal/restore-state.json` is created
@@ -881,13 +1024,11 @@ class _ContainerIOManager:
         # Restore GPU memory.
         if self.function_def._experimental_enable_gpu_snapshot and self.function_def.resources.gpu_config.gpu_type:
             logger.debug("GPU memory snapshot enabled. Attempting to restore GPU memory.")
-            gpu_process_state = gpu_memory_snapshot.get_state()
-            if gpu_process_state != gpu_memory_snapshot.CudaCheckpointState.CHECKPOINTED:
-                raise ValueError(
-                    "Cannot restore GPU state if GPU isn't in a 'checkpointed' state. "
-                    f"Current GPU state: {gpu_process_state}"
-                )
-            gpu_memory_snapshot.toggle()
+            assert self._cuda_checkpoint_session, (
+                "CudaCheckpointSession not found when attempting to restore GPU memory"
+            )
+            self._cuda_checkpoint_session.restore()
         # Restore input to default state.
         self.current_input_id = None
@@ -907,14 +1048,9 @@ class _ContainerIOManager:
             # Snapshot GPU memory.
             if self.function_def._experimental_enable_gpu_snapshot and self.function_def.resources.gpu_config.gpu_type:
                 logger.debug("GPU memory snapshot enabled. Attempting to snapshot GPU memory.")
-                gpu_process_state = gpu_memory_snapshot.get_state()
-                if gpu_process_state != gpu_memory_snapshot.CudaCheckpointState.RUNNING:
-                    raise ValueError(
-                        f"Cannot snapshot GPU state if it isn't running. Current GPU state: {gpu_process_state}"
-                    )
-                gpu_memory_snapshot.toggle()
-                gpu_memory_snapshot.wait_for_state(gpu_memory_snapshot.CudaCheckpointState.CHECKPOINTED)
+                self._cuda_checkpoint_session = gpu_memory_snapshot.CudaCheckpointSession()
+                self._cuda_checkpoint_session.checkpoint()
             # Notify the heartbeat loop that the snapshot phase has begun in order to
             # prevent it from sending heartbeat RPCs
@@ -944,13 +1080,14 @@ class _ContainerIOManager:
         await asyncify(os.sync)()
         results = await asyncio.gather(
             *[
-                retry_transient_errors(
-                    self._client.stub.VolumeCommit,
+                self._client.stub.VolumeCommit(
                     api_pb2.VolumeCommitRequest(volume_id=v_id),
-                    max_retries=9,
-                    base_delay=0.25,
-                    max_delay=256,
-                    delay_factor=2,
+                    retry=Retry(
+                        max_retries=9,
+                        base_delay=0.25,
+                        max_delay=256,
+                        delay_factor=2,
+                    ),
                 )
                 for v_id in volume_ids
             ],
@@ -1019,7 +1156,8 @@ class _ContainerIOManager:
     @classmethod
     def stop_fetching_inputs(cls):
-        assert cls._singleton
+        if not cls._singleton:
+            raise RuntimeError("Must be called from within a Modal container.")
         cls._singleton._fetching_inputs = False

modal 1.0.3.dev10__py3-none-any.whl → 1.2.3.dev7__py3-none-any.whl

Potentially problematic release.

modal 1.0.3.dev10py3-none-any.whl → 1.2.3.dev7py3-none-any.whl