PyPI - modal - Versions diffs - 1.0.6.dev8__py3-none-any.whl → 1.0.6.dev15__py3-none-any.whl - Mend

modal 1.0.6.dev8py3-none-any.whl → 1.0.6.dev15py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of modal might be problematic. Click here for more details.

Files changed (24) hide show

modal/_container_entrypoint.py +16 -26
modal/_functions.py +22 -2
modal/_runtime/container_io_manager.py +40 -27
modal/_runtime/container_io_manager.pyi +13 -11
modal/_utils/blob_utils.py +22 -9
modal/_utils/function_utils.py +12 -4
modal/client.pyi +2 -2
modal/image.py +251 -0
modal/image.pyi +104 -0
modal/parallel_map.py +8 -1
{modal-1.0.6.dev8.dist-info → modal-1.0.6.dev15.dist-info}/METADATA +1 -1
{modal-1.0.6.dev8.dist-info → modal-1.0.6.dev15.dist-info}/RECORD +24 -24
modal_proto/api.proto +11 -0
modal_proto/api_grpc.py +16 -0
modal_proto/api_pb2.py +711 -691
modal_proto/api_pb2.pyi +38 -3
modal_proto/api_pb2_grpc.py +33 -0
modal_proto/api_pb2_grpc.pyi +10 -0
modal_proto/modal_api_grpc.py +1 -0
modal_version/__init__.py +1 -1
{modal-1.0.6.dev8.dist-info → modal-1.0.6.dev15.dist-info}/WHEEL +0 -0
{modal-1.0.6.dev8.dist-info → modal-1.0.6.dev15.dist-info}/entry_points.txt +0 -0
{modal-1.0.6.dev8.dist-info → modal-1.0.6.dev15.dist-info}/licenses/LICENSE +0 -0
{modal-1.0.6.dev8.dist-info → modal-1.0.6.dev15.dist-info}/top_level.txt +0 -0

modal/_container_entrypoint.py CHANGED Viewed

@@ -15,7 +15,6 @@ if telemetry_socket:
     instrument_imports(telemetry_socket)
 import asyncio
-import concurrent.futures
 import inspect
 import queue
 import signal
@@ -49,7 +48,6 @@ from ._runtime.container_io_manager import (
     ContainerIOManager,
     IOContext,
     UserException,
-    _ContainerIOManager,
 )
 if TYPE_CHECKING:
@@ -198,21 +196,16 @@ def call_function(
                 # Send up to this many outputs at a time.
                 generator_queue: asyncio.Queue[Any] = await container_io_manager._queue_create.aio(1024)
-                generator_output_task = asyncio.create_task(
-                    container_io_manager.generator_output_task.aio(
-                        function_call_ids[0],
-                        io_context.finalized_function.data_format,
-                        generator_queue,
-                    )
-                )
-                item_count = 0
-                async for value in res:
-                    await container_io_manager._queue_put.aio(generator_queue, value)
-                    item_count += 1
+                async with container_io_manager.generator_output_sender(
+                    function_call_ids[0],
+                    io_context.finalized_function.data_format,
+                    generator_queue,
+                ):
+                    item_count = 0
+                    async for value in res:
+                        await container_io_manager._queue_put.aio(generator_queue, value)
+                        item_count += 1
-                await container_io_manager._queue_put.aio(generator_queue, _ContainerIOManager._GENERATOR_STOP_SENTINEL)
-                await generator_output_task  # Wait to finish sending generator outputs.
                 message = api_pb2.GeneratorDone(items_total=item_count)
                 await container_io_manager.push_outputs.aio(
                     io_context,
@@ -249,20 +242,17 @@ def call_function(
                 # Send up to this many outputs at a time.
                 generator_queue: asyncio.Queue[Any] = container_io_manager._queue_create(1024)
-                generator_output_task: concurrent.futures.Future = container_io_manager.generator_output_task(  # type: ignore
+                with container_io_manager.generator_output_sender(
                     function_call_ids[0],
                     io_context.finalized_function.data_format,
                     generator_queue,
-                    _future=True,  # type: ignore  # Synchronicity magic to return a future.
-                )
-                item_count = 0
-                for value in res:
-                    container_io_manager._queue_put(generator_queue, value)
-                    item_count += 1
+                ):
+                    item_count = 0
+                    for value in res:
+                        container_io_manager._queue_put(generator_queue, value)
+                        item_count += 1
-                container_io_manager._queue_put(generator_queue, _ContainerIOManager._GENERATOR_STOP_SENTINEL)
-                generator_output_task.result()  # Wait to finish sending generator outputs.
                 message = api_pb2.GeneratorDone(items_total=item_count)
                 container_io_manager.push_outputs(io_context, started_at, message, api_pb2.DATA_FORMAT_GENERATOR_DONE)
             else:

modal/_functions.py CHANGED Viewed

@@ -40,6 +40,7 @@ from ._utils.async_utils import (
     synchronizer,
     warn_if_generator_is_not_consumed,
 )
+from ._utils.blob_utils import MAX_OBJECT_SIZE_BYTES
 from ._utils.deprecation import deprecation_warning, warn_if_passing_namespace
 from ._utils.function_utils import (
     ATTEMPT_TIMEOUT_GRACE_PERIOD,
@@ -145,6 +146,7 @@ class _Invocation:
             args,
             kwargs,
             stub,
+            max_object_size_bytes=function._max_object_size_bytes,
             method_name=function._use_method_name,
             function_call_invocation_type=function_call_invocation_type,
         )
@@ -386,7 +388,13 @@ class _InputPlaneInvocation:
         function_id = function.object_id
         control_plane_stub = client.stub
         # Note: Blob upload is done on the control plane stub, not the input plane stub!
-        input_item = await _create_input(args, kwargs, control_plane_stub, method_name=function._use_method_name)
+        input_item = await _create_input(
+            args,
+            kwargs,
+            control_plane_stub,
+            max_object_size_bytes=function._max_object_size_bytes,
+            method_name=function._use_method_name,
+        )
         request = api_pb2.AttemptStartRequest(
             function_id=function_id,
@@ -443,8 +451,10 @@ class _InputPlaneInvocation:
                         self.attempt_token = retry_response.attempt_token
                         continue
+                control_plane_stub = self.client.stub
+                # Note: Blob download is done on the control plane stub, not the input plane stub!
                 return await _process_result(
-                    await_response.output.result, await_response.output.data_format, self.stub, self.client
+                    await_response.output.result, await_response.output.data_format, control_plane_stub, self.client
                 )
@@ -1414,6 +1424,15 @@ class _Function(typing.Generic[P, ReturnType, OriginalReturnType], _Object, type
         self._definition_id = metadata.definition_id
         self._input_plane_url = metadata.input_plane_url
         self._input_plane_region = metadata.input_plane_region
+        # The server may pass back a larger max object size for some input plane users. This applies to input plane
+        # users only - anyone using the control plane will get the standard limit.
+        # There are some cases like FunctionPrecreate where this value is not set at all. We expect that this field
+        # will eventually be hydrated with the correct value, but just to be defensive, if the field is not set we use
+        # MAX_OBJECT_SIZE_BYTES, otherwise it would get set to 0. Accidentally using 0 would cause us to blob upload
+        # everything, so let's avoid that.
+        self._max_object_size_bytes = (
+            metadata.max_object_size_bytes if metadata.HasField("max_object_size_bytes") else MAX_OBJECT_SIZE_BYTES
+        )
     def _get_metadata(self):
         # Overridden concrete implementation of base class method
@@ -1430,6 +1449,7 @@ class _Function(typing.Generic[P, ReturnType, OriginalReturnType], _Object, type
             function_schema=self._metadata.function_schema if self._metadata else None,
             input_plane_url=self._input_plane_url,
             input_plane_region=self._input_plane_region,
+            max_object_size_bytes=self._max_object_size_bytes,
         )
     def _check_no_web_url(self, fn_name: str):

modal/_runtime/container_io_manager.py CHANGED Viewed

@@ -290,7 +290,6 @@ class _ContainerIOManager:
     _client: _Client
-    _GENERATOR_STOP_SENTINEL: ClassVar[Sentinel] = Sentinel()
     _singleton: ClassVar[Optional["_ContainerIOManager"]] = None
     def _init(self, container_args: api_pb2.ContainerArguments, client: _Client):
@@ -508,33 +507,47 @@ class _ContainerIOManager:
         req = api_pb2.FunctionCallPutDataRequest(function_call_id=function_call_id, data_chunks=data_chunks)
         await retry_transient_errors(self._client.stub.FunctionCallPutDataOut, req)
-    async def generator_output_task(self, function_call_id: str, data_format: int, message_rx: asyncio.Queue) -> None:
-        """Task that feeds generator outputs into a function call's `data_out` stream."""
-        index = 1
-        received_sentinel = False
-        while not received_sentinel:
-            message = await message_rx.get()
-            if message is self._GENERATOR_STOP_SENTINEL:
-                break
-            # ASGI 'http.response.start' and 'http.response.body' msgs are observed to be separated by 1ms.
-            # If we don't sleep here for 1ms we end up with an extra call to .put_data_out().
-            if index == 1:
-                await asyncio.sleep(0.001)
-            serialized_messages = [serialize_data_format(message, data_format)]
-            total_size = len(serialized_messages[0]) + 512
-            while total_size < 16 * 1024 * 1024:  # 16 MiB, maximum size in a single message
-                try:
-                    message = message_rx.get_nowait()
-                except asyncio.QueueEmpty:
-                    break
-                if message is self._GENERATOR_STOP_SENTINEL:
-                    received_sentinel = True
+    @asynccontextmanager
+    async def generator_output_sender(
+        self, function_call_id: str, data_format: int, message_rx: asyncio.Queue
+    ) -> AsyncGenerator[None, None]:
+        """Runs background task that feeds generator outputs into a function call's `data_out` stream."""
+        GENERATOR_STOP_SENTINEL = Sentinel()
+        async def generator_output_task():
+            index = 1
+            received_sentinel = False
+            while not received_sentinel:
+                message = await message_rx.get()
+                if message is GENERATOR_STOP_SENTINEL:
                     break
-                else:
-                    serialized_messages.append(serialize_data_format(message, data_format))
-                    total_size += len(serialized_messages[-1]) + 512  # 512 bytes for estimated framing overhead
-            await self.put_data_out(function_call_id, index, data_format, serialized_messages)
-            index += len(serialized_messages)
+                # ASGI 'http.response.start' and 'http.response.body' msgs are observed to be separated by 1ms.
+                # If we don't sleep here for 1ms we end up with an extra call to .put_data_out().
+                if index == 1:
+                    await asyncio.sleep(0.001)
+                serialized_messages = [serialize_data_format(message, data_format)]
+                total_size = len(serialized_messages[0]) + 512
+                while total_size < 16 * 1024 * 1024:  # 16 MiB, maximum size in a single message
+                    try:
+                        message = message_rx.get_nowait()
+                    except asyncio.QueueEmpty:
+                        break
+                    if message is GENERATOR_STOP_SENTINEL:
+                        received_sentinel = True
+                        break
+                    else:
+                        serialized_messages.append(serialize_data_format(message, data_format))
+                        total_size += len(serialized_messages[-1]) + 512  # 512 bytes for estimated framing overhead
+                await self.put_data_out(function_call_id, index, data_format, serialized_messages)
+                index += len(serialized_messages)
+        task = asyncio.create_task(generator_output_task())
+        try:
+            yield
+        finally:
+            # gracefully stop the task after all current inputs have been sent
+            await message_rx.put(GENERATOR_STOP_SENTINEL)
+            await task
     async def _queue_create(self, size: int) -> asyncio.Queue:
         """Create a queue, on the synchronicity event loop (needed on Python 3.8 and 3.9)."""

modal/_runtime/container_io_manager.pyi CHANGED Viewed

@@ -106,7 +106,6 @@ class _ContainerIOManager:
     _is_interactivity_enabled: bool
     _fetching_inputs: bool
     _client: modal.client._Client
-    _GENERATOR_STOP_SENTINEL: typing.ClassVar[Sentinel]
     _singleton: typing.ClassVar[typing.Optional[_ContainerIOManager]]
     def _init(self, container_args: modal_proto.api_pb2.ContainerArguments, client: modal.client._Client): ...
@@ -148,10 +147,10 @@ class _ContainerIOManager:
         """
         ...
-    async def generator_output_task(
+    def generator_output_sender(
         self, function_call_id: str, data_format: int, message_rx: asyncio.queues.Queue
-    ) -> None:
-        """Task that feeds generator outputs into a function call's `data_out` stream."""
+    ) -> typing.AsyncContextManager[None]:
+        """Runs background task that feeds generator outputs into a function call's `data_out` stream."""
         ...
     async def _queue_create(self, size: int) -> asyncio.queues.Queue:
@@ -268,7 +267,6 @@ class ContainerIOManager:
     _is_interactivity_enabled: bool
     _fetching_inputs: bool
     _client: modal.client.Client
-    _GENERATOR_STOP_SENTINEL: typing.ClassVar[Sentinel]
     _singleton: typing.ClassVar[typing.Optional[ContainerIOManager]]
     def __init__(self, /, *args, **kwargs):
@@ -367,16 +365,20 @@ class ContainerIOManager:
     put_data_out: __put_data_out_spec[typing_extensions.Self]
-    class __generator_output_task_spec(typing_extensions.Protocol[SUPERSELF]):
-        def __call__(self, /, function_call_id: str, data_format: int, message_rx: asyncio.queues.Queue) -> None:
-            """Task that feeds generator outputs into a function call's `data_out` stream."""
+    class __generator_output_sender_spec(typing_extensions.Protocol[SUPERSELF]):
+        def __call__(
+            self, /, function_call_id: str, data_format: int, message_rx: asyncio.queues.Queue
+        ) -> synchronicity.combined_types.AsyncAndBlockingContextManager[None]:
+            """Runs background task that feeds generator outputs into a function call's `data_out` stream."""
             ...
-        async def aio(self, /, function_call_id: str, data_format: int, message_rx: asyncio.queues.Queue) -> None:
-            """Task that feeds generator outputs into a function call's `data_out` stream."""
+        def aio(
+            self, /, function_call_id: str, data_format: int, message_rx: asyncio.queues.Queue
+        ) -> typing.AsyncContextManager[None]:
+            """Runs background task that feeds generator outputs into a function call's `data_out` stream."""
             ...
-    generator_output_task: __generator_output_task_spec[typing_extensions.Self]
+    generator_output_sender: __generator_output_sender_spec[typing_extensions.Self]
     class ___queue_create_spec(typing_extensions.Protocol[SUPERSELF]):
         def __call__(self, /, size: int) -> asyncio.queues.Queue:

modal/_utils/blob_utils.py CHANGED Viewed

@@ -188,16 +188,23 @@ def get_content_length(data: BinaryIO) -> int:
     return content_length - pos
-async def _blob_upload_with_fallback(items, blob_ids, callback):
+async def _blob_upload_with_fallback(items, blob_ids: list[str], callback) -> tuple[str, bool, int]:
+    r2_latency_ms = 0
+    r2_failed = False
     for idx, (item, blob_id) in enumerate(zip(items, blob_ids)):
         # We want to default to R2 95% of the time and S3 5% of the time.
         # To ensure the failure path is continuously exercised.
         if idx == 0 and len(items) > 1 and random.random() > HEALTHY_R2_UPLOAD_PERCENTAGE:
             continue
         try:
+            init_time = time.monotonic_ns()
             await callback(item)
-            return blob_id
+            if blob_id.endswith(":r2"):
+                r2_latency_ms = (time.monotonic_ns() - init_time) // 1_000_000
+            return blob_id, r2_failed, r2_latency_ms
         except Exception as _:
+            if blob_id.endswith(":r2"):
+                r2_failed = True
             # Ignore all errors except the last one, since we're out of fallback options.
             if idx == len(items) - 1:
                 raise
@@ -206,7 +213,7 @@ async def _blob_upload_with_fallback(items, blob_ids, callback):
 async def _blob_upload(
     upload_hashes: UploadHashes, data: Union[bytes, BinaryIO], stub, progress_report_cb: Optional[Callable] = None
-) -> str:
+) -> tuple[str, bool, int]:
     if isinstance(data, bytes):
         data = BytesIO(data)
@@ -232,7 +239,7 @@ async def _blob_upload(
                 progress_report_cb=progress_report_cb,
             )
-        blob_id = await _blob_upload_with_fallback(
+        blob_id, r2_failed, r2_latency_ms = await _blob_upload_with_fallback(
             resp.multiparts.items,
             resp.blob_ids,
             upload_multipart_upload,
@@ -252,7 +259,7 @@ async def _blob_upload(
                 content_md5_b64=upload_hashes.md5_base64,
             )
-        blob_id = await _blob_upload_with_fallback(
+        blob_id, r2_failed, r2_latency_ms = await _blob_upload_with_fallback(
             resp.upload_urls.items,
             resp.blob_ids,
             upload_to_s3_url,
@@ -261,10 +268,10 @@ async def _blob_upload(
     if progress_report_cb:
         progress_report_cb(complete=True)
-    return blob_id
+    return blob_id, r2_failed, r2_latency_ms
-async def blob_upload(payload: bytes, stub: ModalClientModal) -> str:
+async def blob_upload_with_r2_failure_info(payload: bytes, stub: ModalClientModal) -> tuple[str, bool, int]:
     size_mib = len(payload) / 1024 / 1024
     logger.debug(f"Uploading large blob of size {size_mib:.2f} MiB")
     t0 = time.time()
@@ -272,12 +279,17 @@ async def blob_upload(payload: bytes, stub: ModalClientModal) -> str:
         logger.warning("Blob uploading string, not bytes - auto-encoding as utf8")
         payload = payload.encode("utf8")
     upload_hashes = get_upload_hashes(payload)
-    blob_id = await _blob_upload(upload_hashes, payload, stub)
+    blob_id, r2_failed, r2_latency_ms = await _blob_upload(upload_hashes, payload, stub)
     dur_s = max(time.time() - t0, 0.001)  # avoid division by zero
     throughput_mib_s = (size_mib) / dur_s
     logger.debug(
         f"Uploaded large blob of size {size_mib:.2f} MiB ({throughput_mib_s:.2f} MiB/s, total {dur_s:.2f}s). {blob_id}"
     )
+    return blob_id, r2_failed, r2_latency_ms
+async def blob_upload(payload: bytes, stub: ModalClientModal) -> str:
+    blob_id, _, _ = await blob_upload_with_r2_failure_info(payload, stub)
     return blob_id
@@ -289,7 +301,8 @@ async def blob_upload_file(
     md5_hex: Optional[str] = None,
 ) -> str:
     upload_hashes = get_upload_hashes(file_obj, sha256_hex=sha256_hex, md5_hex=md5_hex)
-    return await _blob_upload(upload_hashes, file_obj, stub, progress_report_cb)
+    blob_id, _, _ = await _blob_upload(upload_hashes, file_obj, stub, progress_report_cb)
+    return blob_id
 @retry(n_attempts=5, base_delay=0.1, timeout=None)

modal/_utils/function_utils.py CHANGED Viewed

@@ -32,7 +32,11 @@ from ..exception import (
     RemoteError,
 )
 from ..mount import ROOT_DIR, _is_modal_path, _Mount
-from .blob_utils import MAX_ASYNC_OBJECT_SIZE_BYTES, MAX_OBJECT_SIZE_BYTES, blob_download, blob_upload
+from .blob_utils import (
+    MAX_ASYNC_OBJECT_SIZE_BYTES,
+    blob_download,
+    blob_upload_with_r2_failure_info,
+)
 from .grpc_utils import RETRYABLE_GRPC_STATUS_CODES
@@ -513,12 +517,13 @@ async def _process_result(result: api_pb2.GenericResult, data_format: int, stub,
 def should_upload(
     num_bytes: int,
+    max_object_size_bytes: int,
     function_call_invocation_type: Optional["api_pb2.FunctionCallInvocationType.ValueType"],
 ) -> bool:
     """
     Determine if the input should be uploaded to blob storage.
     """
-    return num_bytes > MAX_OBJECT_SIZE_BYTES or (
+    return num_bytes > max_object_size_bytes or (
         function_call_invocation_type == api_pb2.FUNCTION_CALL_INVOCATION_TYPE_ASYNC
         and num_bytes > MAX_ASYNC_OBJECT_SIZE_BYTES
     )
@@ -529,6 +534,7 @@ async def _create_input(
     kwargs,
     stub: ModalClientModal,
     *,
+    max_object_size_bytes: int,
     idx: Optional[int] = None,
     method_name: Optional[str] = None,
     function_call_invocation_type: Optional["api_pb2.FunctionCallInvocationType.ValueType"] = None,
@@ -543,8 +549,8 @@ async def _create_input(
     args_serialized = serialize((args, kwargs))
-    if should_upload(len(args_serialized), function_call_invocation_type):
-        args_blob_id = await blob_upload(args_serialized, stub)
+    if should_upload(len(args_serialized), max_object_size_bytes, function_call_invocation_type):
+        args_blob_id, r2_failed, r2_latency_ms = await blob_upload_with_r2_failure_info(args_serialized, stub)
         return api_pb2.FunctionPutInputsItem(
             input=api_pb2.FunctionInput(
                 args_blob_id=args_blob_id,
@@ -552,6 +558,8 @@ async def _create_input(
                 method_name=method_name,
             ),
             idx=idx,
+            r2_failed=r2_failed,
+            r2_latency_ms=r2_latency_ms,
         )
     else:
         return api_pb2.FunctionPutInputsItem(

modal/client.pyi CHANGED Viewed

@@ -31,7 +31,7 @@ class _Client:
         server_url: str,
         client_type: int,
         credentials: typing.Optional[tuple[str, str]],
-        version: str = "1.0.6.dev8",
+        version: str = "1.0.6.dev15",
     ):
         """mdmd:hidden
         The Modal client object is not intended to be instantiated directly by users.
@@ -160,7 +160,7 @@ class Client:
         server_url: str,
         client_type: int,
         credentials: typing.Optional[tuple[str, str]],
-        version: str = "1.0.6.dev8",
+        version: str = "1.0.6.dev15",
     ):
         """mdmd:hidden
         The Modal client object is not intended to be instantiated directly by users.

modal 1.0.6.dev8__py3-none-any.whl → 1.0.6.dev15__py3-none-any.whl

Potentially problematic release.

modal 1.0.6.dev8py3-none-any.whl → 1.0.6.dev15py3-none-any.whl