PyPI - modal - Versions diffs - 0.73.131__tar.gz → 0.73.133__tar.gz - Mend

modal 0.73.131tar.gz → 0.73.133tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (186) hide show

{modal-0.73.131 → modal-0.73.133}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: modal
-Version: 0.73.131
+Version: 0.73.133
 Summary: Python client library for Modal
 Author-email: Modal Labs <support@modal.com>
 License: Apache-2.0

{modal-0.73.131 → modal-0.73.133}/modal/__init__.py RENAMED Viewed

@@ -27,6 +27,7 @@ try:
         asgi_app,
         batched,
         build,
+        concurrent,
         enter,
         exit,
         fastapi_endpoint,
@@ -82,6 +83,7 @@ __all__ = [
     "asgi_app",
     "batched",
     "build",
+    "concurrent",
     "current_function_call_id",
     "current_input_id",
     "enable_output",

{modal-0.73.131 → modal-0.73.133}/modal/_container_entrypoint.py RENAMED Viewed

@@ -273,7 +273,7 @@ def call_function(
                 )
         reset_context()
-    if container_io_manager.target_concurrency > 1:
+    if container_io_manager.input_concurrency_enabled:
         with DaemonizedThreadPool(max_threads=container_io_manager.max_concurrency) as thread_pool:
             def make_async_cancel_callback(task):
@@ -293,7 +293,7 @@ def call_function(
                 if not did_sigint:
                     did_sigint = True
                     logger.warning(
-                        "User cancelling input of non-async functions with allow_concurrent_inputs > 1.\n"
+                        "User cancelling input of non-async functions with input concurrency enabled.\n"
                         "This shuts down the container, causing concurrently running inputs to be "
                         "rescheduled in other containers."
                     )

{modal-0.73.131 → modal-0.73.133}/modal/_functions.py RENAMED Viewed

@@ -440,7 +440,8 @@ class _Function(typing.Generic[P, ReturnType, OriginalReturnType], _Object, type
         max_containers: Optional[int] = None,
         buffer_containers: Optional[int] = None,
         scaledown_window: Optional[int] = None,
-        allow_concurrent_inputs: Optional[int] = None,
+        max_concurrent_inputs: Optional[int] = None,
+        target_concurrent_inputs: Optional[int] = None,
         batch_max_size: Optional[int] = None,
         batch_wait_ms: Optional[int] = None,
         cloud: Optional[str] = None,
@@ -791,7 +792,8 @@ class _Function(typing.Generic[P, ReturnType, OriginalReturnType], _Object, type
                     runtime_perf_record=config.get("runtime_perf_record"),
                     app_name=app_name,
                     is_builder_function=is_builder_function,
-                    target_concurrent_inputs=allow_concurrent_inputs or 0,
+                    max_concurrent_inputs=max_concurrent_inputs or 0,
+                    target_concurrent_inputs=target_concurrent_inputs or 0,
                     batch_max_size=batch_max_size or 0,
                     batch_linger_ms=batch_wait_ms or 0,
                     worker_id=config.get("worker_id"),

{modal-0.73.131 → modal-0.73.133}/modal/_partial_function.py RENAMED Viewed

@@ -59,6 +59,8 @@ class _PartialFunction(typing.Generic[P, ReturnType, OriginalReturnType]):
     force_build: bool
     cluster_size: Optional[int]  # Experimental: Clustered functions
     build_timeout: Optional[int]
+    max_concurrent_inputs: Optional[int]
+    target_concurrent_inputs: Optional[int]
     def __init__(
         self,
@@ -72,6 +74,8 @@ class _PartialFunction(typing.Generic[P, ReturnType, OriginalReturnType]):
         cluster_size: Optional[int] = None,  # Experimental: Clustered functions
         force_build: bool = False,
         build_timeout: Optional[int] = None,
+        max_concurrent_inputs: Optional[int] = None,
+        target_concurrent_inputs: Optional[int] = None,
     ):
         self.raw_f = raw_f
         self.flags = flags
@@ -89,6 +93,8 @@ class _PartialFunction(typing.Generic[P, ReturnType, OriginalReturnType]):
         self.cluster_size = cluster_size  # Experimental: Clustered functions
         self.force_build = force_build
         self.build_timeout = build_timeout
+        self.max_concurrent_inputs = max_concurrent_inputs
+        self.target_concurrent_inputs = target_concurrent_inputs
     def _get_raw_f(self) -> Callable[P, ReturnType]:
         return self.raw_f
@@ -143,6 +149,8 @@ class _PartialFunction(typing.Generic[P, ReturnType, OriginalReturnType]):
             batch_wait_ms=self.batch_wait_ms,
             force_build=self.force_build,
             build_timeout=self.build_timeout,
+            max_concurrent_inputs=self.max_concurrent_inputs,
+            target_concurrent_inputs=self.target_concurrent_inputs,
         )
@@ -722,3 +730,73 @@ def _batched(
         )
     return wrapper
+def _concurrent(
+    _warn_parentheses_missing=None,
+    *,
+    max_inputs: int,  # Hard limit on each container's input concurrency
+    target_inputs: Optional[int] = None,  # Input concurrency that Modal's autoscaler should target
+) -> Callable[[Union[Callable[..., Any], _PartialFunction]], _PartialFunction]:
+    """Decorator that allows individual containers to handle multiple inputs concurrently.
+    The concurrency mechanism depends on whether the function is async or not:
+    - Async functions will run inputs on a single thread as asyncio tasks.
+    - Synchronous functions will use multi-threading. The code must be thread-safe.
+    Input concurrency will be most useful for workflows that are IO-bound
+    (e.g., making network requests) or when running an inference server that supports
+    dynamic batching.
+    When `target_inputs` is set, Modal's autoscaler will try to provision resources
+    such that each container is running that many inputs concurrently, rather than
+    autoscaling based on `max_inputs`. Containers may burst up to up to `max_inputs`
+    if resources are insufficient to remain at the target concurrency, e.g. when the
+    arrival rate of inputs increases. This can trade-off a small increase in average
+    latency to avoid larger tail latencies from input queuing.
+    **Examples:**
+    ```python
+    # Stack the decorator under `@app.function()` to enable input concurrency
+    @app.function()
+    @modal.concurrent(max_inputs=100)
+    async def f(data):
+        # Async function; will be scheduled as asyncio task
+        ...
+    # With `@app.cls()`, apply the decorator at the class level, not on individual methods
+    @app.cls()
+    @modal.concurrent(max_inputs=100, target_inputs=80)
+    class C:
+        @modal.method()
+        def f(self, data):
+            # Sync function; must be thread-safe
+            ...
+    ```
+    """
+    if _warn_parentheses_missing is not None:
+        raise InvalidError(
+            "Positional arguments are not allowed. Did you forget parentheses? Suggestion: `@modal.concurrent()`."
+        )
+    if target_inputs and target_inputs > max_inputs:
+        raise InvalidError("`target_inputs` parameter cannot be greater than `max_inputs`.")
+    def wrapper(obj: Union[Callable[..., Any], _PartialFunction]) -> _PartialFunction:
+        if isinstance(obj, _PartialFunction):
+            # Risky that we need to mutate the parameters here; should make this safer
+            obj.max_concurrent_inputs = max_inputs
+            obj.target_concurrent_inputs = target_inputs
+            obj.add_flags(_PartialFunctionFlags.FUNCTION)
+            return obj
+        return _PartialFunction(
+            obj,
+            _PartialFunctionFlags.FUNCTION,
+            max_concurrent_inputs=max_inputs,
+            target_concurrent_inputs=target_inputs,
+        )
+    return wrapper

{modal-0.73.131 → modal-0.73.133}/modal/_runtime/container_io_manager.py RENAMED Viewed

@@ -264,6 +264,7 @@ class _ContainerIOManager:
     current_inputs: dict[str, IOContext]  # input_id -> IOContext
     current_input_started_at: Optional[float]
+    _input_concurrency_enabled: bool
     _target_concurrency: int
     _max_concurrency: int
     _concurrency_loop: Optional[asyncio.Task]
@@ -296,14 +297,14 @@ class _ContainerIOManager:
         self.current_input_started_at = None
         if container_args.function_def.pty_info.pty_type == api_pb2.PTYInfo.PTY_TYPE_SHELL:
-            target_concurrency = 1
             max_concurrency = 1
+            target_concurrency = 1
         else:
-            target_concurrency = container_args.function_def.target_concurrent_inputs or 1
-            max_concurrency = container_args.function_def.max_concurrent_inputs or target_concurrency
+            max_concurrency = container_args.function_def.max_concurrent_inputs or 1
+            target_concurrency = container_args.function_def.target_concurrent_inputs or max_concurrency
-        self._target_concurrency = target_concurrency
         self._max_concurrency = max_concurrency
+        self._target_concurrency = target_concurrency
         self._concurrency_loop = None
         self._stop_concurrency_loop = False
         self._input_slots = InputSlots(target_concurrency)
@@ -976,6 +977,10 @@ class _ContainerIOManager:
     def max_concurrency(self) -> int:
         return self._max_concurrency
+    @property
+    def input_concurrency_enabled(self) -> int:
+        return max(self._max_concurrency, self._target_concurrency) > 1
     @classmethod
     def get_input_concurrency(cls) -> int:
         """

{modal-0.73.131 → modal-0.73.133}/modal/_runtime/container_io_manager.pyi RENAMED Viewed

@@ -69,6 +69,7 @@ class _ContainerIOManager:
     current_input_id: typing.Optional[str]
     current_inputs: dict[str, IOContext]
     current_input_started_at: typing.Optional[float]
+    _input_concurrency_enabled: bool
     _target_concurrency: int
     _max_concurrency: int
     _concurrency_loop: typing.Optional[asyncio.Task]
@@ -149,6 +150,8 @@ class _ContainerIOManager:
     def target_concurrency(self) -> int: ...
     @property
     def max_concurrency(self) -> int: ...
+    @property
+    def input_concurrency_enabled(self) -> int: ...
     @classmethod
     def get_input_concurrency(cls) -> int: ...
     @classmethod
@@ -169,6 +172,7 @@ class ContainerIOManager:
     current_input_id: typing.Optional[str]
     current_inputs: dict[str, IOContext]
     current_input_started_at: typing.Optional[float]
+    _input_concurrency_enabled: bool
     _target_concurrency: int
     _max_concurrency: int
     _concurrency_loop: typing.Optional[asyncio.Task]
@@ -384,6 +388,8 @@ class ContainerIOManager:
     def target_concurrency(self) -> int: ...
     @property
     def max_concurrency(self) -> int: ...
+    @property
+    def input_concurrency_enabled(self) -> int: ...
     @classmethod
     def get_input_concurrency(cls) -> int: ...
     @classmethod

{modal-0.73.131 → modal-0.73.133}/modal/app.py RENAMED Viewed

@@ -678,6 +678,12 @@ class _App:
                 is_generator = f.is_generator
                 batch_max_size = f.batch_max_size
                 batch_wait_ms = f.batch_wait_ms
+                if f.max_concurrent_inputs:  # Using @modal.concurrent()
+                    max_concurrent_inputs = f.max_concurrent_inputs
+                    target_concurrent_inputs = f.target_concurrent_inputs
+                else:
+                    max_concurrent_inputs = allow_concurrent_inputs
+                    target_concurrent_inputs = None
             else:
                 if not is_global_object(f.__qualname__) and not serialized:
                     raise InvalidError(
@@ -709,10 +715,12 @@ class _App:
                     )
                 info = FunctionInfo(f, serialized=serialized, name_override=name)
+                raw_f = f
                 webhook_config = None
                 batch_max_size = None
                 batch_wait_ms = None
-                raw_f = f
+                max_concurrent_inputs = allow_concurrent_inputs
+                target_concurrent_inputs = None
                 cluster_size = None  # Experimental: Clustered functions
                 i6pn_enabled = i6pn
@@ -753,7 +761,8 @@ class _App:
                 max_containers=max_containers,
                 buffer_containers=buffer_containers,
                 scaledown_window=scaledown_window,
-                allow_concurrent_inputs=allow_concurrent_inputs,
+                max_concurrent_inputs=max_concurrent_inputs,
+                target_concurrent_inputs=target_concurrent_inputs,
                 batch_max_size=batch_max_size,
                 batch_wait_ms=batch_wait_ms,
                 timeout=timeout,
@@ -832,7 +841,7 @@ class _App:
         concurrency_limit: Optional[int] = None,  # Replaced with `max_containers`
         container_idle_timeout: Optional[int] = None,  # Replaced with `scaledown_window`
         _experimental_buffer_containers: Optional[int] = None,  # Now stable API with `buffer_containers`
-    ) -> Callable[[CLS_T], CLS_T]:
+    ) -> Callable[[Union[CLS_T, _PartialFunction]], CLS_T]:
         """
         Decorator to register a new Modal [Cls](/docs/reference/modal.Cls) with this App.
         """
@@ -845,8 +854,21 @@ class _App:
                 raise InvalidError("`region` and `_experimental_scheduler_placement` cannot be used together")
             scheduler_placement = SchedulerPlacement(region=region)
-        def wrapper(user_cls: CLS_T) -> CLS_T:
+        def wrapper(wrapped_cls: Union[CLS_T, _PartialFunction]) -> CLS_T:
             # Check if the decorated object is a class
+            if isinstance(wrapped_cls, _PartialFunction):
+                wrapped_cls.wrapped = True
+                user_cls = wrapped_cls.raw_f
+                if wrapped_cls.max_concurrent_inputs:  # Using @modal.concurrent()
+                    max_concurrent_inputs = wrapped_cls.max_concurrent_inputs
+                    target_concurrent_inputs = wrapped_cls.target_concurrent_inputs
+                else:
+                    max_concurrent_inputs = allow_concurrent_inputs
+                    target_concurrent_inputs = None
+            else:
+                user_cls = wrapped_cls
+                max_concurrent_inputs = allow_concurrent_inputs
+                target_concurrent_inputs = None
             if not inspect.isclass(user_cls):
                 raise TypeError("The @app.cls decorator must be used on a class.")
@@ -871,6 +893,12 @@ class _App:
             ):
                 raise InvalidError("A class must have `enable_memory_snapshot=True` to use `snap=True` on its methods.")
+            for method in _find_partial_methods_for_user_cls(user_cls, _PartialFunctionFlags.FUNCTION).values():
+                if method.max_concurrent_inputs:
+                    raise InvalidError(
+                        "The `@modal.concurrent` decorator cannot be used on methods; decorate the class instead."
+                    )
             info = FunctionInfo(None, serialized=serialized, user_cls=user_cls)
             cls_func = _Function.from_local(
@@ -892,7 +920,8 @@ class _App:
                 scaledown_window=scaledown_window,
                 proxy=proxy,
                 retries=retries,
-                allow_concurrent_inputs=allow_concurrent_inputs,
+                max_concurrent_inputs=max_concurrent_inputs,
+                target_concurrent_inputs=target_concurrent_inputs,
                 batch_max_size=batch_max_size,
                 batch_wait_ms=batch_wait_ms,
                 timeout=timeout,

{modal-0.73.131 → modal-0.73.133}/modal/app.pyi RENAMED Viewed

@@ -1,6 +1,7 @@
 import collections.abc
 import modal._functions
 import modal._object
+import modal._partial_function
 import modal._utils.function_utils
 import modal.client
 import modal.cloud_bucket_mount
@@ -247,7 +248,7 @@ class _App:
         concurrency_limit: typing.Optional[int] = None,
         container_idle_timeout: typing.Optional[int] = None,
         _experimental_buffer_containers: typing.Optional[int] = None,
-    ) -> collections.abc.Callable[[CLS_T], CLS_T]: ...
+    ) -> collections.abc.Callable[[typing.Union[CLS_T, modal._partial_function._PartialFunction]], CLS_T]: ...
     async def spawn_sandbox(
         self,
         *entrypoint_args: str,
@@ -487,7 +488,7 @@ class App:
         concurrency_limit: typing.Optional[int] = None,
         container_idle_timeout: typing.Optional[int] = None,
         _experimental_buffer_containers: typing.Optional[int] = None,
-    ) -> collections.abc.Callable[[CLS_T], CLS_T]: ...
+    ) -> collections.abc.Callable[[typing.Union[CLS_T, modal.partial_function.PartialFunction]], CLS_T]: ...
     class __spawn_sandbox_spec(typing_extensions.Protocol[SUPERSELF]):
         def __call__(

{modal-0.73.131 → modal-0.73.133}/modal/client.pyi RENAMED Viewed

@@ -31,7 +31,7 @@ class _Client:
         server_url: str,
         client_type: int,
         credentials: typing.Optional[tuple[str, str]],
-        version: str = "0.73.131",
+        version: str = "0.73.133",
     ): ...
     def is_closed(self) -> bool: ...
     @property
@@ -93,7 +93,7 @@ class Client:
         server_url: str,
         client_type: int,
         credentials: typing.Optional[tuple[str, str]],
-        version: str = "0.73.131",
+        version: str = "0.73.133",
     ): ...
     def is_closed(self) -> bool: ...
     @property

{modal-0.73.131 → modal-0.73.133}/modal/functions.pyi RENAMED Viewed

@@ -82,7 +82,8 @@ class Function(
         max_containers: typing.Optional[int] = None,
         buffer_containers: typing.Optional[int] = None,
         scaledown_window: typing.Optional[int] = None,
-        allow_concurrent_inputs: typing.Optional[int] = None,
+        max_concurrent_inputs: typing.Optional[int] = None,
+        target_concurrent_inputs: typing.Optional[int] = None,
         batch_max_size: typing.Optional[int] = None,
         batch_wait_ms: typing.Optional[int] = None,
         cloud: typing.Optional[str] = None,
@@ -198,11 +199,11 @@ class Function(
     _call_generator_nowait: ___call_generator_nowait_spec[typing_extensions.Self]
-    class __remote_spec(typing_extensions.Protocol[ReturnType_INNER, P_INNER, SUPERSELF]):
+    class __remote_spec(typing_extensions.Protocol[P_INNER, ReturnType_INNER, SUPERSELF]):
         def __call__(self, *args: P_INNER.args, **kwargs: P_INNER.kwargs) -> ReturnType_INNER: ...
         async def aio(self, *args: P_INNER.args, **kwargs: P_INNER.kwargs) -> ReturnType_INNER: ...
-    remote: __remote_spec[modal._functions.ReturnType, modal._functions.P, typing_extensions.Self]
+    remote: __remote_spec[modal._functions.P, modal._functions.ReturnType, typing_extensions.Self]
     class __remote_gen_spec(typing_extensions.Protocol[SUPERSELF]):
         def __call__(self, *args, **kwargs) -> typing.Generator[typing.Any, None, None]: ...
@@ -217,19 +218,19 @@ class Function(
         self, *args: modal._functions.P.args, **kwargs: modal._functions.P.kwargs
     ) -> modal._functions.OriginalReturnType: ...
-    class ___experimental_spawn_spec(typing_extensions.Protocol[ReturnType_INNER, P_INNER, SUPERSELF]):
+    class ___experimental_spawn_spec(typing_extensions.Protocol[P_INNER, ReturnType_INNER, SUPERSELF]):
         def __call__(self, *args: P_INNER.args, **kwargs: P_INNER.kwargs) -> FunctionCall[ReturnType_INNER]: ...
         async def aio(self, *args: P_INNER.args, **kwargs: P_INNER.kwargs) -> FunctionCall[ReturnType_INNER]: ...
     _experimental_spawn: ___experimental_spawn_spec[
-        modal._functions.ReturnType, modal._functions.P, typing_extensions.Self
+        modal._functions.P, modal._functions.ReturnType, typing_extensions.Self
     ]
-    class __spawn_spec(typing_extensions.Protocol[ReturnType_INNER, P_INNER, SUPERSELF]):
+    class __spawn_spec(typing_extensions.Protocol[P_INNER, ReturnType_INNER, SUPERSELF]):
         def __call__(self, *args: P_INNER.args, **kwargs: P_INNER.kwargs) -> FunctionCall[ReturnType_INNER]: ...
         async def aio(self, *args: P_INNER.args, **kwargs: P_INNER.kwargs) -> FunctionCall[ReturnType_INNER]: ...
-    spawn: __spawn_spec[modal._functions.ReturnType, modal._functions.P, typing_extensions.Self]
+    spawn: __spawn_spec[modal._functions.P, modal._functions.ReturnType, typing_extensions.Self]
     def get_raw_f(self) -> collections.abc.Callable[..., typing.Any]: ...

{modal-0.73.131 → modal-0.73.133}/modal/partial_function.py RENAMED Viewed

@@ -5,6 +5,7 @@ from ._partial_function import (
     _asgi_app,
     _batched,
     _build,
+    _concurrent,
     _enter,
     _exit,
     _fastapi_endpoint,
@@ -28,3 +29,4 @@ build = synchronize_api(_build, target_module=__name__)
 enter = synchronize_api(_enter, target_module=__name__)
 exit = synchronize_api(_exit, target_module=__name__)
 batched = synchronize_api(_batched, target_module=__name__)
+concurrent = synchronize_api(_concurrent, target_module=__name__)

{modal-0.73.131 → modal-0.73.133}/modal/partial_function.pyi RENAMED Viewed

@@ -18,6 +18,8 @@ class PartialFunction(
     force_build: bool
     cluster_size: typing.Optional[int]
     build_timeout: typing.Optional[int]
+    max_concurrent_inputs: typing.Optional[int]
+    target_concurrent_inputs: typing.Optional[int]
     def __init__(
         self,
@@ -31,6 +33,8 @@ class PartialFunction(
         cluster_size: typing.Optional[int] = None,
         force_build: bool = False,
         build_timeout: typing.Optional[int] = None,
+        max_concurrent_inputs: typing.Optional[int] = None,
+        target_concurrent_inputs: typing.Optional[int] = None,
     ): ...
     def _get_raw_f(self) -> collections.abc.Callable[modal._partial_function.P, modal._partial_function.ReturnType]: ...
     def _is_web_endpoint(self) -> bool: ...
@@ -118,3 +122,8 @@ def exit(
 def batched(
     _warn_parentheses_missing=None, *, max_batch_size: int, wait_ms: int
 ) -> collections.abc.Callable[[collections.abc.Callable[..., typing.Any]], PartialFunction]: ...
+def concurrent(
+    _warn_parentheses_missing=None, *, max_inputs: int, target_inputs: typing.Optional[int] = None
+) -> collections.abc.Callable[
+    [typing.Union[collections.abc.Callable[..., typing.Any], PartialFunction]], PartialFunction
+]: ...

{modal-0.73.131 → modal-0.73.133}/modal.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: modal
-Version: 0.73.131
+Version: 0.73.133
 Summary: Python client library for Modal
 Author-email: Modal Labs <support@modal.com>
 License: Apache-2.0

{modal-0.73.131 → modal-0.73.133}/modal_proto/api.proto RENAMED Viewed

@@ -1222,7 +1222,7 @@ message Function {
   repeated VolumeMount volume_mounts = 33;
-  uint32 target_concurrent_inputs = 34;
+  uint32 max_concurrent_inputs = 34;
   repeated CustomDomainInfo custom_domain_info = 35;
@@ -1272,7 +1272,7 @@ message Function {
   uint64 batch_linger_ms = 61; // Miliseconds to block before a response is needed
   bool i6pn_enabled = 62;
   bool _experimental_concurrent_cancellations = 63;
-  uint32 max_concurrent_inputs = 64;
+  uint32 target_concurrent_inputs = 64;
   // TODO(irfansharif): Remove, once https://github.com/modal-labs/modal/pull/15645 lands.
   bool _experimental_task_templates_enabled = 65;  // forces going through the new gpu-fallbacks integration path, even if no fallback options are specified

modal 0.73.131__tar.gz → 0.73.133__tar.gz

modal 0.73.131tar.gz → 0.73.133tar.gz