PyPI - modal - Versions diffs - 0.73.130__tar.gz → 0.73.132__tar.gz - Mend

modal 0.73.130tar.gz → 0.73.132tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (186) hide show

{modal-0.73.130 → modal-0.73.132}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: modal
-Version: 0.73.130
+Version: 0.73.132
 Summary: Python client library for Modal
 Author-email: Modal Labs <support@modal.com>
 License: Apache-2.0

{modal-0.73.130 → modal-0.73.132}/modal/__init__.py RENAMED Viewed

@@ -27,6 +27,7 @@ try:
         asgi_app,
         batched,
         build,
+        concurrent,
         enter,
         exit,
         fastapi_endpoint,
@@ -82,6 +83,7 @@ __all__ = [
     "asgi_app",
     "batched",
     "build",
+    "concurrent",
     "current_function_call_id",
     "current_input_id",
     "enable_output",

{modal-0.73.130 → modal-0.73.132}/modal/_container_entrypoint.py RENAMED Viewed

@@ -273,7 +273,7 @@ def call_function(
                 )
         reset_context()
-    if container_io_manager.target_concurrency > 1:
+    if container_io_manager.input_concurrency_enabled:
         with DaemonizedThreadPool(max_threads=container_io_manager.max_concurrency) as thread_pool:
             def make_async_cancel_callback(task):
@@ -293,7 +293,7 @@ def call_function(
                 if not did_sigint:
                     did_sigint = True
                     logger.warning(
-                        "User cancelling input of non-async functions with allow_concurrent_inputs > 1.\n"
+                        "User cancelling input of non-async functions with input concurrency enabled.\n"
                         "This shuts down the container, causing concurrently running inputs to be "
                         "rescheduled in other containers."
                     )

{modal-0.73.130 → modal-0.73.132}/modal/_functions.py RENAMED Viewed

@@ -25,7 +25,12 @@ from ._pty import get_pty_info
 from ._resolver import Resolver
 from ._resources import convert_fn_config_to_resources_config
 from ._runtime.execution_context import current_input_id, is_local
-from ._serialization import apply_defaults, serialize, serialize_proto_params, validate_params
+from ._serialization import (
+    apply_defaults,
+    serialize,
+    serialize_proto_params,
+    validate_parameter_values,
+)
 from ._traceback import print_server_warnings
 from ._utils.async_utils import (
     TaskContext,
@@ -435,7 +440,8 @@ class _Function(typing.Generic[P, ReturnType, OriginalReturnType], _Object, type
         max_containers: Optional[int] = None,
         buffer_containers: Optional[int] = None,
         scaledown_window: Optional[int] = None,
-        allow_concurrent_inputs: Optional[int] = None,
+        max_concurrent_inputs: Optional[int] = None,
+        target_concurrent_inputs: Optional[int] = None,
         batch_max_size: Optional[int] = None,
         batch_wait_ms: Optional[int] = None,
         cloud: Optional[str] = None,
@@ -786,7 +792,8 @@ class _Function(typing.Generic[P, ReturnType, OriginalReturnType], _Object, type
                     runtime_perf_record=config.get("runtime_perf_record"),
                     app_name=app_name,
                     is_builder_function=is_builder_function,
-                    target_concurrent_inputs=allow_concurrent_inputs or 0,
+                    max_concurrent_inputs=max_concurrent_inputs or 0,
+                    target_concurrent_inputs=target_concurrent_inputs or 0,
                     batch_max_size=batch_max_size or 0,
                     batch_linger_ms=batch_wait_ms or 0,
                     worker_id=config.get("worker_id"),
@@ -975,7 +982,7 @@ class _Function(typing.Generic[P, ReturnType, OriginalReturnType], _Object, type
                     )
                 schema = parent._class_parameter_info.schema
                 kwargs_with_defaults = apply_defaults(kwargs, schema)
-                validate_params(kwargs_with_defaults, schema)
+                validate_parameter_values(kwargs_with_defaults, schema)
                 serialized_params = serialize_proto_params(kwargs_with_defaults)
                 can_use_parent = len(parent._class_parameter_info.schema) == 0  # no parameters
             else:
@@ -1312,7 +1319,7 @@ class _Function(typing.Generic[P, ReturnType, OriginalReturnType], _Object, type
                 order_outputs,
                 return_exceptions,
                 count_update_callback,
-                api_pb2.FUNCTION_CALL_INVOCATION_TYPE_SYNC
+                api_pb2.FUNCTION_CALL_INVOCATION_TYPE_SYNC,
             )
         ) as stream:
             async for item in stream:

{modal-0.73.130 → modal-0.73.132}/modal/_partial_function.py RENAMED Viewed

@@ -59,6 +59,8 @@ class _PartialFunction(typing.Generic[P, ReturnType, OriginalReturnType]):
     force_build: bool
     cluster_size: Optional[int]  # Experimental: Clustered functions
     build_timeout: Optional[int]
+    max_concurrent_inputs: Optional[int]
+    target_concurrent_inputs: Optional[int]
     def __init__(
         self,
@@ -72,6 +74,8 @@ class _PartialFunction(typing.Generic[P, ReturnType, OriginalReturnType]):
         cluster_size: Optional[int] = None,  # Experimental: Clustered functions
         force_build: bool = False,
         build_timeout: Optional[int] = None,
+        max_concurrent_inputs: Optional[int] = None,
+        target_concurrent_inputs: Optional[int] = None,
     ):
         self.raw_f = raw_f
         self.flags = flags
@@ -89,6 +93,8 @@ class _PartialFunction(typing.Generic[P, ReturnType, OriginalReturnType]):
         self.cluster_size = cluster_size  # Experimental: Clustered functions
         self.force_build = force_build
         self.build_timeout = build_timeout
+        self.max_concurrent_inputs = max_concurrent_inputs
+        self.target_concurrent_inputs = target_concurrent_inputs
     def _get_raw_f(self) -> Callable[P, ReturnType]:
         return self.raw_f
@@ -143,6 +149,8 @@ class _PartialFunction(typing.Generic[P, ReturnType, OriginalReturnType]):
             batch_wait_ms=self.batch_wait_ms,
             force_build=self.force_build,
             build_timeout=self.build_timeout,
+            max_concurrent_inputs=self.max_concurrent_inputs,
+            target_concurrent_inputs=self.target_concurrent_inputs,
         )
@@ -722,3 +730,73 @@ def _batched(
         )
     return wrapper
+def _concurrent(
+    _warn_parentheses_missing=None,
+    *,
+    max_inputs: int,  # Hard limit on each container's input concurrency
+    target_inputs: Optional[int] = None,  # Input concurrency that Modal's autoscaler should target
+) -> Callable[[Union[Callable[..., Any], _PartialFunction]], _PartialFunction]:
+    """Decorator that allows individual containers to handle multiple inputs concurrently.
+    The concurrency mechanism depends on whether the function is async or not:
+    - Async functions will run inputs on a single thread as asyncio tasks.
+    - Synchronous functions will use multi-threading. The code must be thread-safe.
+    Input concurrency will be most useful for workflows that are IO-bound
+    (e.g., making network requests) or when running an inference server that supports
+    dynamic batching.
+    When `target_inputs` is set, Modal's autoscaler will try to provision resources
+    such that each container is running that many inputs concurrently, rather than
+    autoscaling based on `max_inputs`. Containers may burst up to up to `max_inputs`
+    if resources are insufficient to remain at the target concurrency, e.g. when the
+    arrival rate of inputs increases. This can trade-off a small increase in average
+    latency to avoid larger tail latencies from input queuing.
+    **Examples:**
+    ```python
+    # Stack the decorator under `@app.function()` to enable input concurrency
+    @app.function()
+    @modal.concurrent(max_inputs=100)
+    async def f(data):
+        # Async function; will be scheduled as asyncio task
+        ...
+    # With `@app.cls()`, apply the decorator at the class level, not on individual methods
+    @app.cls()
+    @modal.concurrent(max_inputs=100, target_inputs=80)
+    class C:
+        @modal.method()
+        def f(self, data):
+            # Sync function; must be thread-safe
+            ...
+    ```
+    """
+    if _warn_parentheses_missing is not None:
+        raise InvalidError(
+            "Positional arguments are not allowed. Did you forget parentheses? Suggestion: `@modal.concurrent()`."
+        )
+    if target_inputs and target_inputs > max_inputs:
+        raise InvalidError("`target_inputs` parameter cannot be greater than `max_inputs`.")
+    def wrapper(obj: Union[Callable[..., Any], _PartialFunction]) -> _PartialFunction:
+        if isinstance(obj, _PartialFunction):
+            # Risky that we need to mutate the parameters here; should make this safer
+            obj.max_concurrent_inputs = max_inputs
+            obj.target_concurrent_inputs = target_inputs
+            obj.add_flags(_PartialFunctionFlags.FUNCTION)
+            return obj
+        return _PartialFunction(
+            obj,
+            _PartialFunctionFlags.FUNCTION,
+            max_concurrent_inputs=max_inputs,
+            target_concurrent_inputs=target_inputs,
+        )
+    return wrapper

{modal-0.73.130 → modal-0.73.132}/modal/_resolver.py RENAMED Viewed

@@ -1,6 +1,7 @@
 # Copyright Modal Labs 2023
 import asyncio
 import contextlib
+import traceback
 import typing
 from asyncio import Future
 from collections.abc import Hashable
@@ -153,7 +154,11 @@ class Resolver:
                 self._deduplication_cache[deduplication_key] = cached_future
         # TODO(elias): print original exception/trace rather than the Resolver-internal trace
-        return await cached_future
+        try:
+            return await cached_future
+        except Exception:
+            traceback.print_exc()
+            raise
     def objects(self) -> list["modal._object._Object"]:
         unique_objects: dict[str, "modal._object._Object"] = {}

{modal-0.73.130 → modal-0.73.132}/modal/_runtime/container_io_manager.py RENAMED Viewed

@@ -264,6 +264,7 @@ class _ContainerIOManager:
     current_inputs: dict[str, IOContext]  # input_id -> IOContext
     current_input_started_at: Optional[float]
+    _input_concurrency_enabled: bool
     _target_concurrency: int
     _max_concurrency: int
     _concurrency_loop: Optional[asyncio.Task]
@@ -296,14 +297,14 @@ class _ContainerIOManager:
         self.current_input_started_at = None
         if container_args.function_def.pty_info.pty_type == api_pb2.PTYInfo.PTY_TYPE_SHELL:
-            target_concurrency = 1
             max_concurrency = 1
+            target_concurrency = 1
         else:
-            target_concurrency = container_args.function_def.target_concurrent_inputs or 1
-            max_concurrency = container_args.function_def.max_concurrent_inputs or target_concurrency
+            max_concurrency = container_args.function_def.max_concurrent_inputs or 1
+            target_concurrency = container_args.function_def.target_concurrent_inputs or max_concurrency
-        self._target_concurrency = target_concurrency
         self._max_concurrency = max_concurrency
+        self._target_concurrency = target_concurrency
         self._concurrency_loop = None
         self._stop_concurrency_loop = False
         self._input_slots = InputSlots(target_concurrency)
@@ -976,6 +977,10 @@ class _ContainerIOManager:
     def max_concurrency(self) -> int:
         return self._max_concurrency
+    @property
+    def input_concurrency_enabled(self) -> int:
+        return max(self._max_concurrency, self._target_concurrency) > 1
     @classmethod
     def get_input_concurrency(cls) -> int:
         """

{modal-0.73.130 → modal-0.73.132}/modal/_runtime/container_io_manager.pyi RENAMED Viewed

@@ -69,6 +69,7 @@ class _ContainerIOManager:
     current_input_id: typing.Optional[str]
     current_inputs: dict[str, IOContext]
     current_input_started_at: typing.Optional[float]
+    _input_concurrency_enabled: bool
     _target_concurrency: int
     _max_concurrency: int
     _concurrency_loop: typing.Optional[asyncio.Task]
@@ -149,6 +150,8 @@ class _ContainerIOManager:
     def target_concurrency(self) -> int: ...
     @property
     def max_concurrency(self) -> int: ...
+    @property
+    def input_concurrency_enabled(self) -> int: ...
     @classmethod
     def get_input_concurrency(cls) -> int: ...
     @classmethod
@@ -169,6 +172,7 @@ class ContainerIOManager:
     current_input_id: typing.Optional[str]
     current_inputs: dict[str, IOContext]
     current_input_started_at: typing.Optional[float]
+    _input_concurrency_enabled: bool
     _target_concurrency: int
     _max_concurrency: int
     _concurrency_loop: typing.Optional[asyncio.Task]
@@ -384,6 +388,8 @@ class ContainerIOManager:
     def target_concurrency(self) -> int: ...
     @property
     def max_concurrency(self) -> int: ...
+    @property
+    def input_concurrency_enabled(self) -> int: ...
     @classmethod
     def get_input_concurrency(cls) -> int: ...
     @classmethod

{modal-0.73.130 → modal-0.73.132}/modal/_serialization.py RENAMED Viewed

@@ -1,14 +1,16 @@
 # Copyright Modal Labs 2022
+import inspect
 import io
 import pickle
 import typing
-from dataclasses import dataclass
+from inspect import Parameter
 from typing import Any
 from modal._utils.async_utils import synchronizer
 from modal_proto import api_pb2
 from ._object import _Object
+from ._type_manager import parameter_serde_registry, schema_registry
 from ._vendor import cloudpickle
 from .config import logger
 from .exception import DeserializationError, ExecutionError, InvalidError
@@ -389,50 +391,6 @@ def check_valid_cls_constructor_arg(key, obj):
         )
-def assert_bytes(obj: Any):
-    if not isinstance(obj, bytes):
-        raise TypeError(f"Expected bytes, got {type(obj)}")
-    return obj
-@dataclass
-class ParamTypeInfo:
-    default_field: str
-    proto_field: str
-    converter: typing.Callable[[str], typing.Any]
-    type: type
-PYTHON_TO_PROTO_TYPE: dict[type, "api_pb2.ParameterType.ValueType"] = {
-    # python type -> protobuf type enum
-    str: api_pb2.PARAM_TYPE_STRING,
-    int: api_pb2.PARAM_TYPE_INT,
-    bytes: api_pb2.PARAM_TYPE_BYTES,
-}
-PROTO_TYPE_INFO = {
-    # Protobuf type enum -> encode/decode helper metadata
-    api_pb2.PARAM_TYPE_STRING: ParamTypeInfo(
-        default_field="string_default",
-        proto_field="string_value",
-        converter=str,
-        type=str,
-    ),
-    api_pb2.PARAM_TYPE_INT: ParamTypeInfo(
-        default_field="int_default",
-        proto_field="int_value",
-        converter=int,
-        type=int,
-    ),
-    api_pb2.PARAM_TYPE_BYTES: ParamTypeInfo(
-        default_field="bytes_default",
-        proto_field="bytes_value",
-        converter=assert_bytes,
-        type=bytes,
-    ),
-}
 def apply_defaults(
     python_params: typing.Mapping[str, Any], schema: typing.Sequence[api_pb2.ClassParameterSpec]
 ) -> dict[str, Any]:
@@ -453,68 +411,56 @@ def apply_defaults(
     return result
+def encode_parameter_value(name: str, python_value: Any) -> api_pb2.ClassParameterValue:
+    """Map to proto parameter representation using python runtime type information"""
+    struct = parameter_serde_registry.encode(python_value)
+    struct.name = name
+    return struct
 def serialize_proto_params(python_params: dict[str, Any]) -> bytes:
     proto_params: list[api_pb2.ClassParameterValue] = []
     for param_name, python_value in python_params.items():
-        python_type = type(python_value)
-        protobuf_type = get_proto_parameter_type(python_type)
-        type_info = PROTO_TYPE_INFO.get(protobuf_type)
-        proto_param = api_pb2.ClassParameterValue(
-            name=param_name,
-            type=protobuf_type,
-        )
-        try:
-            converted_value = type_info.converter(python_value)
-        except ValueError as exc:
-            raise ValueError(f"Invalid type for parameter {param_name}: {exc}")
-        setattr(proto_param, type_info.proto_field, converted_value)
-        proto_params.append(proto_param)
+        proto_params.append(encode_parameter_value(param_name, python_value))
     proto_bytes = api_pb2.ClassParameterSet(parameters=proto_params).SerializeToString(deterministic=True)
     return proto_bytes
 def deserialize_proto_params(serialized_params: bytes) -> dict[str, Any]:
-    proto_struct = api_pb2.ClassParameterSet()
-    proto_struct.ParseFromString(serialized_params)
+    proto_struct = api_pb2.ClassParameterSet.FromString(serialized_params)
     python_params = {}
     for param in proto_struct.parameters:
-        python_value: Any
-        if param.type == api_pb2.PARAM_TYPE_STRING:
-            python_value = param.string_value
-        elif param.type == api_pb2.PARAM_TYPE_INT:
-            python_value = param.int_value
-        elif param.type == api_pb2.PARAM_TYPE_BYTES:
-            python_value = param.bytes_value
-        else:
-            raise NotImplementedError(f"Unimplemented parameter type: {param.type}.")
-        python_params[param.name] = python_value
+        python_params[param.name] = parameter_serde_registry.decode(param)
     return python_params
-def validate_params(params: dict[str, Any], schema: typing.Sequence[api_pb2.ClassParameterSpec]):
-    # first check that all declared values are provided
-    for schema_param in schema:
-        if schema_param.name not in params:
-            # we expect all values to be present - even defaulted ones (defaults are applied on payload construction)
-            raise InvalidError(f"Missing required parameter: {schema_param.name}")
-        python_value = params[schema_param.name]
-        python_type = type(python_value)
-        param_protobuf_type = get_proto_parameter_type(python_type)
-        if schema_param.type != param_protobuf_type:
-            expected_python_type = PROTO_TYPE_INFO[schema_param.type].type
-            raise TypeError(
-                f"Parameter '{schema_param.name}' type error: expected {expected_python_type.__name__}, "
-                f"got {python_type.__name__}"
-            )
+def validate_parameter_values(payload: dict[str, Any], schema: typing.Sequence[api_pb2.ClassParameterSpec]):
+    """Ensure parameter payload conforms to the schema of a class
+    Checks that:
+    * All fields are specified (defaults are expected to already be applied on the payload)
+    * No extra fields are specified
+    * The type of each field is correct
+    """
+    for param_spec in schema:
+        if param_spec.name not in payload:
+            raise InvalidError(f"Missing required parameter: {param_spec.name}")
+        python_value = payload[param_spec.name]
+        if param_spec.HasField("full_type") and param_spec.full_type.base_type:
+            type_enum_value = param_spec.full_type.base_type
+        else:
+            type_enum_value = param_spec.type  # backwards compatibility pre-full_type
+        parameter_serde_registry.validate_value_for_enum_type(type_enum_value, python_value)
     schema_fields = {p.name for p in schema}
     # then check that no extra values are provided
-    non_declared_fields = params.keys() - schema_fields
+    non_declared_fields = payload.keys() - schema_fields
     if non_declared_fields:
         raise InvalidError(
-            f"The following parameter names were provided but are not present in the schema: {non_declared_fields}"
+            f"The following parameter names were provided but are not defined class modal.parameters for the class: "
+            f"{', '.join(non_declared_fields)}"
         )
@@ -528,8 +474,6 @@ def deserialize_params(serialized_params: bytes, function_def: api_pb2.Function,
     elif function_def.class_parameter_info.format == api_pb2.ClassParameterInfo.PARAM_SERIALIZATION_FORMAT_PROTO:
         param_args = ()  # we use kwargs only for our implicit constructors
         param_kwargs = deserialize_proto_params(serialized_params)
-        # TODO: We can probably remove the validation below since we do validation in the caller?
-        validate_params(param_kwargs, list(function_def.class_parameter_info.schema))
     else:
         raise ExecutionError(
             f"Unknown class parameter serialization format: {function_def.class_parameter_info.format}"
@@ -538,9 +482,47 @@ def deserialize_params(serialized_params: bytes, function_def: api_pb2.Function,
     return param_args, param_kwargs
-def get_proto_parameter_type(parameter_type: type) -> "api_pb2.ParameterType.ValueType":
-    if parameter_type not in PYTHON_TO_PROTO_TYPE:
-        type_name = getattr(parameter_type, "__name__", repr(parameter_type))
-        supported = ", ".join(parameter_type.__name__ for parameter_type in PYTHON_TO_PROTO_TYPE.keys())
-        raise InvalidError(f"{type_name} is not a supported parameter type. Use one of: {supported}")
-    return PYTHON_TO_PROTO_TYPE[parameter_type]
+def _signature_parameter_to_spec(
+    python_signature_parameter: inspect.Parameter, include_legacy_parameter_fields: bool = False
+) -> api_pb2.ClassParameterSpec:
+    """Returns proto representation of Parameter as returned by inspect.signature()
+    Setting include_legacy_parameter_fields makes the output backwards compatible with
+    pre v0.74 clients looking at class parameter specifications, and should not be used
+    when registering *function* schemas.
+    """
+    declared_type = python_signature_parameter.annotation
+    full_proto_type = schema_registry.get_proto_generic_type(declared_type)
+    has_default = python_signature_parameter.default is not Parameter.empty
+    field_spec = api_pb2.ClassParameterSpec(
+        name=python_signature_parameter.name,
+        full_type=full_proto_type,
+        has_default=has_default,
+    )
+    if include_legacy_parameter_fields:
+        # add the .{type}_default and `.type` values as required by legacy clients
+        # looking at class parameter specs
+        if full_proto_type.base_type == api_pb2.PARAM_TYPE_INT:
+            if has_default:
+                field_spec.int_default = python_signature_parameter.default
+            field_spec.type = api_pb2.PARAM_TYPE_INT
+        elif full_proto_type.base_type == api_pb2.PARAM_TYPE_STRING:
+            if has_default:
+                field_spec.string_default = python_signature_parameter.default
+            field_spec.type = api_pb2.PARAM_TYPE_STRING
+        elif full_proto_type.base_type == api_pb2.PARAM_TYPE_BYTES:
+            if has_default:
+                field_spec.bytes_default = python_signature_parameter.default
+            field_spec.type = api_pb2.PARAM_TYPE_BYTES
+    return field_spec
+def signature_to_parameter_specs(signature: inspect.Signature) -> list[api_pb2.ClassParameterSpec]:
+    # only used for modal.parameter() specs, uses backwards compatible fields and types
+    modal_parameters: list[api_pb2.ClassParameterSpec] = []
+    for param in signature.parameters.values():
+        field_spec = _signature_parameter_to_spec(param, include_legacy_parameter_fields=True)
+        modal_parameters.append(field_spec)
+    return modal_parameters

modal 0.73.130__tar.gz → 0.73.132__tar.gz

modal 0.73.130tar.gz → 0.73.132tar.gz