PyPI - modal - Versions diffs - 0.62.16__py3-none-any.whl → 0.72.11__py3-none-any.whl - Mend

modal 0.62.16py3-none-any.whl → 0.72.11py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (220) hide show

modal/__init__.py +17 -13
modal/__main__.py +41 -3
modal/_clustered_functions.py +80 -0
modal/_clustered_functions.pyi +22 -0
modal/_container_entrypoint.py +420 -937
modal/_ipython.py +3 -13
modal/_location.py +17 -10
modal/_output.py +243 -99
modal/_pty.py +2 -2
modal/_resolver.py +55 -59
modal/_resources.py +51 -0
modal/_runtime/__init__.py +1 -0
modal/_runtime/asgi.py +519 -0
modal/_runtime/container_io_manager.py +1036 -0
modal/_runtime/execution_context.py +89 -0
modal/_runtime/telemetry.py +169 -0
modal/_runtime/user_code_imports.py +356 -0
modal/_serialization.py +134 -9
modal/_traceback.py +47 -187
modal/_tunnel.py +52 -16
modal/_tunnel.pyi +19 -36
modal/_utils/app_utils.py +3 -17
modal/_utils/async_utils.py +479 -100
modal/_utils/blob_utils.py +157 -186
modal/_utils/bytes_io_segment_payload.py +97 -0
modal/_utils/deprecation.py +89 -0
modal/_utils/docker_utils.py +98 -0
modal/_utils/function_utils.py +460 -171
modal/_utils/grpc_testing.py +47 -31
modal/_utils/grpc_utils.py +62 -109
modal/_utils/hash_utils.py +61 -19
modal/_utils/http_utils.py +39 -9
modal/_utils/logger.py +2 -1
modal/_utils/mount_utils.py +34 -16
modal/_utils/name_utils.py +58 -0
modal/_utils/package_utils.py +14 -1
modal/_utils/pattern_utils.py +205 -0
modal/_utils/rand_pb_testing.py +5 -7
modal/_utils/shell_utils.py +15 -49
modal/_vendor/a2wsgi_wsgi.py +62 -72
modal/_vendor/cloudpickle.py +1 -1
modal/_watcher.py +14 -12
modal/app.py +1003 -314
modal/app.pyi +540 -264
modal/call_graph.py +7 -6
modal/cli/_download.py +63 -53
modal/cli/_traceback.py +200 -0
modal/cli/app.py +205 -45
modal/cli/config.py +12 -5
modal/cli/container.py +62 -14
modal/cli/dict.py +128 -0
modal/cli/entry_point.py +26 -13
modal/cli/environment.py +40 -9
modal/cli/import_refs.py +64 -58
modal/cli/launch.py +32 -18
modal/cli/network_file_system.py +64 -83
modal/cli/profile.py +1 -1
modal/cli/programs/run_jupyter.py +35 -10
modal/cli/programs/vscode.py +60 -10
modal/cli/queues.py +131 -0
modal/cli/run.py +234 -131
modal/cli/secret.py +8 -7
modal/cli/token.py +7 -2
modal/cli/utils.py +79 -10
modal/cli/volume.py +110 -109
modal/client.py +250 -144
modal/client.pyi +157 -118
modal/cloud_bucket_mount.py +108 -34
modal/cloud_bucket_mount.pyi +32 -38
modal/cls.py +535 -148
modal/cls.pyi +190 -146
modal/config.py +41 -19
modal/container_process.py +177 -0
modal/container_process.pyi +82 -0
modal/dict.py +111 -65
modal/dict.pyi +136 -131
modal/environments.py +106 -5
modal/environments.pyi +77 -25
modal/exception.py +34 -43
modal/experimental.py +61 -2
modal/extensions/ipython.py +5 -5
modal/file_io.py +537 -0
modal/file_io.pyi +235 -0
modal/file_pattern_matcher.py +197 -0
modal/functions.py +906 -911
modal/functions.pyi +466 -430
modal/gpu.py +57 -44
modal/image.py +1089 -479
modal/image.pyi +584 -228
modal/io_streams.py +434 -0
modal/io_streams.pyi +122 -0
modal/mount.py +314 -101
modal/mount.pyi +241 -235
modal/network_file_system.py +92 -92
modal/network_file_system.pyi +152 -110
modal/object.py +67 -36
modal/object.pyi +166 -143
modal/output.py +63 -0
modal/parallel_map.py +434 -0
modal/parallel_map.pyi +75 -0
modal/partial_function.py +282 -117
modal/partial_function.pyi +222 -129
modal/proxy.py +15 -12
modal/proxy.pyi +3 -8
modal/queue.py +182 -65
modal/queue.pyi +218 -118
modal/requirements/2024.04.txt +29 -0
modal/requirements/2024.10.txt +16 -0
modal/requirements/README.md +21 -0
modal/requirements/base-images.json +22 -0
modal/retries.py +48 -7
modal/runner.py +459 -156
modal/runner.pyi +135 -71
modal/running_app.py +38 -0
modal/sandbox.py +514 -236
modal/sandbox.pyi +397 -169
modal/schedule.py +4 -4
modal/scheduler_placement.py +20 -3
modal/secret.py +56 -31
modal/secret.pyi +62 -42
modal/serving.py +51 -56
modal/serving.pyi +44 -36
modal/stream_type.py +15 -0
modal/token_flow.py +5 -3
modal/token_flow.pyi +37 -32
modal/volume.py +285 -157
modal/volume.pyi +249 -184
{modal-0.62.16.dist-info → modal-0.72.11.dist-info}/METADATA +7 -7
modal-0.72.11.dist-info/RECORD +174 -0
{modal-0.62.16.dist-info → modal-0.72.11.dist-info}/top_level.txt +0 -1
modal_docs/gen_reference_docs.py +3 -1
modal_docs/mdmd/mdmd.py +0 -1
modal_docs/mdmd/signatures.py +5 -2
modal_global_objects/images/base_images.py +28 -0
modal_global_objects/mounts/python_standalone.py +2 -2
modal_proto/__init__.py +1 -1
modal_proto/api.proto +1288 -533
modal_proto/api_grpc.py +856 -456
modal_proto/api_pb2.py +2165 -1157
modal_proto/api_pb2.pyi +8859 -0
modal_proto/api_pb2_grpc.py +1674 -855
modal_proto/api_pb2_grpc.pyi +1416 -0
modal_proto/modal_api_grpc.py +149 -0
modal_proto/modal_options_grpc.py +3 -0
modal_proto/options_pb2.pyi +20 -0
modal_proto/options_pb2_grpc.pyi +7 -0
modal_proto/py.typed +0 -0
modal_version/__init__.py +1 -1
modal_version/_version_generated.py +2 -2
modal/_asgi.py +0 -370
modal/_container_entrypoint.pyi +0 -378
modal/_container_exec.py +0 -128
modal/_sandbox_shell.py +0 -49
modal/shared_volume.py +0 -23
modal/shared_volume.pyi +0 -24
modal/stub.py +0 -783
modal/stub.pyi +0 -332
modal-0.62.16.dist-info/RECORD +0 -198
modal_global_objects/images/conda.py +0 -15
modal_global_objects/images/debian_slim.py +0 -15
modal_global_objects/images/micromamba.py +0 -15
test/__init__.py +0 -1
test/aio_test.py +0 -12
test/async_utils_test.py +0 -262
test/blob_test.py +0 -67
test/cli_imports_test.py +0 -149
test/cli_test.py +0 -659
test/client_test.py +0 -194
test/cls_test.py +0 -630
test/config_test.py +0 -137
test/conftest.py +0 -1420
test/container_app_test.py +0 -32
test/container_test.py +0 -1389
test/cpu_test.py +0 -23
test/decorator_test.py +0 -85
test/deprecation_test.py +0 -34
test/dict_test.py +0 -33
test/e2e_test.py +0 -68
test/error_test.py +0 -7
test/function_serialization_test.py +0 -32
test/function_test.py +0 -653
test/function_utils_test.py +0 -101
test/gpu_test.py +0 -159
test/grpc_utils_test.py +0 -141
test/helpers.py +0 -42
test/image_test.py +0 -669
test/live_reload_test.py +0 -80
test/lookup_test.py +0 -70
test/mdmd_test.py +0 -329
test/mount_test.py +0 -162
test/mounted_files_test.py +0 -329
test/network_file_system_test.py +0 -181
test/notebook_test.py +0 -66
test/object_test.py +0 -41
test/package_utils_test.py +0 -25
test/queue_test.py +0 -97
test/resolver_test.py +0 -58
test/retries_test.py +0 -67
test/runner_test.py +0 -85
test/sandbox_test.py +0 -191
test/schedule_test.py +0 -15
test/scheduler_placement_test.py +0 -29
test/secret_test.py +0 -78
test/serialization_test.py +0 -42
test/stub_composition_test.py +0 -10
test/stub_test.py +0 -360
test/test_asgi_wrapper.py +0 -234
test/token_flow_test.py +0 -18
test/traceback_test.py +0 -135
test/tunnel_test.py +0 -29
test/utils_test.py +0 -88
test/version_test.py +0 -14
test/volume_test.py +0 -341
test/watcher_test.py +0 -30
test/webhook_test.py +0 -146
/modal/{requirements.312.txt → requirements/2023.12.312.txt} +0 -0
/modal/{requirements.txt → requirements/2023.12.txt} +0 -0
{modal-0.62.16.dist-info → modal-0.72.11.dist-info}/LICENSE +0 -0
{modal-0.62.16.dist-info → modal-0.72.11.dist-info}/WHEEL +0 -0
{modal-0.62.16.dist-info → modal-0.72.11.dist-info}/entry_points.txt +0 -0

modal/functions.py CHANGED Viewed

@@ -1,253 +1,163 @@
 # Copyright Modal Labs 2023
-import asyncio
+import dataclasses
 import inspect
+import textwrap
 import time
+import typing
 import warnings
-from contextvars import ContextVar
+from collections.abc import AsyncGenerator, Collection, Sequence, Sized
 from dataclasses import dataclass
 from pathlib import PurePosixPath
 from typing import (
     TYPE_CHECKING,
     Any,
-    AsyncGenerator,
-    AsyncIterable,
-    AsyncIterator,
     Callable,
-    Collection,
-    Dict,
-    List,
-    Literal,
     Optional,
-    Sequence,
-    Set,
-    Sized,
-    Tuple,
-    Type,
     Union,
 )
-from aiostream import pipe, stream
+import typing_extensions
 from google.protobuf.message import Message
 from grpclib import GRPCError, Status
-from grpclib.exceptions import StreamTerminatedError
+from synchronicity.combined_types import MethodWithAio
 from synchronicity.exceptions import UserCodeException
-from modal import _pty, is_local
-from modal_proto import api_grpc, api_pb2
+from modal_proto import api_pb2
+from modal_proto.modal_api_grpc import ModalClientModal
 from ._location import parse_cloud_provider
-from ._output import OutputManager
+from ._pty import get_pty_info
 from ._resolver import Resolver
-from ._serialization import deserialize, deserialize_data_format, serialize
-from ._traceback import append_modal_tb
+from ._resources import convert_fn_config_to_resources_config
+from ._runtime.execution_context import current_input_id, is_local
+from ._serialization import serialize, serialize_proto_params
+from ._traceback import print_server_warnings
 from ._utils.async_utils import (
-    queue_batch_iterator,
+    TaskContext,
+    aclosing,
+    async_merge,
+    callable_to_agen,
     synchronize_api,
     synchronizer,
     warn_if_generator_is_not_consumed,
 )
-from ._utils.blob_utils import (
-    BLOB_MAX_PARALLELISM,
-    MAX_OBJECT_SIZE_BYTES,
-    blob_download,
-    blob_upload,
+from ._utils.deprecation import deprecation_warning, renamed_parameter
+from ._utils.function_utils import (
+    ATTEMPT_TIMEOUT_GRACE_PERIOD,
+    OUTPUTS_TIMEOUT,
+    FunctionCreationStatus,
+    FunctionInfo,
+    _create_input,
+    _process_result,
+    _stream_function_call_data,
+    get_function_type,
+    is_async,
 )
-from ._utils.function_utils import FunctionInfo, get_referred_objects, is_async
-from ._utils.grpc_utils import RETRYABLE_GRPC_STATUS_CODES, retry_transient_errors, unary_stream
-from ._utils.mount_utils import validate_mount_points, validate_volumes
+from ._utils.grpc_utils import retry_transient_errors
+from ._utils.mount_utils import validate_network_file_systems, validate_volumes
 from .call_graph import InputInfo, _reconstruct_call_graph
 from .client import _Client
 from .cloud_bucket_mount import _CloudBucketMount, cloud_bucket_mounts_to_proto
-from .config import config, logger
+from .config import config
 from .exception import (
     ExecutionError,
     FunctionTimeoutError,
+    InternalFailure,
     InvalidError,
     NotFoundError,
-    RemoteError,
-    deprecation_warning,
+    OutputExpiredError,
 )
 from .gpu import GPU_T, parse_gpu_config
 from .image import _Image
-from .mount import _get_client_mount, _Mount
+from .mount import _get_client_mount, _Mount, get_auto_mounts
 from .network_file_system import _NetworkFileSystem, network_file_system_mount_protos
-from .object import Object, _get_environment_name, _Object, live_method, live_method_gen
+from .object import _get_environment_name, _Object, live_method, live_method_gen
+from .output import _get_output_manager
+from .parallel_map import (
+    _for_each_async,
+    _for_each_sync,
+    _map_async,
+    _map_invocation,
+    _map_sync,
+    _starmap_async,
+    _starmap_sync,
+    _SynchronizedQueue,
+)
 from .proxy import _Proxy
-from .retries import Retries
+from .retries import Retries, RetryManager
 from .schedule import Schedule
 from .scheduler_placement import SchedulerPlacement
 from .secret import _Secret
 from .volume import _Volume
-OUTPUTS_TIMEOUT = 55.0  # seconds
-ATTEMPT_TIMEOUT_GRACE_PERIOD = 5  # seconds
 if TYPE_CHECKING:
-    import modal.stub
-def exc_with_hints(exc: BaseException):
-    """mdmd:hidden"""
-    if isinstance(exc, ImportError) and exc.msg == "attempted relative import with no known parent package":
-        exc.msg += """\n
-HINT: For relative imports to work, you might need to run your modal app as a module. Try:
-- `python -m my_pkg.my_app` instead of `python my_pkg/my_app.py`
-- `modal deploy my_pkg.my_app` instead of `modal deploy my_pkg/my_app.py`
-"""
-    elif isinstance(
-        exc, RuntimeError
-    ) and "CUDA error: no kernel image is available for execution on the device" in str(exc):
-        msg = (
-            exc.args[0]
-            + """\n
-HINT: This error usually indicates an outdated CUDA version. Older versions of torch (<=1.12)
-come with CUDA 10.2 by default. If pinning to an older torch version, you can specify a CUDA version
-manually, for example:
--  image.pip_install("torch==1.12.1+cu116", find_links="https://download.pytorch.org/whl/torch_stable.html")
-"""
-        )
-        exc.args = (msg,)
-    return exc
-async def _process_result(result: api_pb2.GenericResult, data_format: int, stub, client=None):
-    if result.WhichOneof("data_oneof") == "data_blob_id":
-        data = await blob_download(result.data_blob_id, stub)
-    else:
-        data = result.data
-    if result.status == api_pb2.GenericResult.GENERIC_STATUS_TIMEOUT:
-        raise FunctionTimeoutError(result.exception)
-    elif result.status != api_pb2.GenericResult.GENERIC_STATUS_SUCCESS:
-        if data:
-            try:
-                exc = deserialize(data, client)
-            except Exception as deser_exc:
-                raise ExecutionError(
-                    "Could not deserialize remote exception due to local error:\n"
-                    + f"{deser_exc}\n"
-                    + "This can happen if your local environment does not have the remote exception definitions.\n"
-                    + "Here is the remote traceback:\n"
-                    + f"{result.traceback}"
-                )
-            if not isinstance(exc, BaseException):
-                raise ExecutionError(f"Got remote exception of incorrect type {type(exc)}")
-            if result.serialized_tb:
-                try:
-                    tb_dict = deserialize(result.serialized_tb, client)
-                    line_cache = deserialize(result.tb_line_cache, client)
-                    append_modal_tb(exc, tb_dict, line_cache)
-                except Exception:
-                    pass
-            uc_exc = UserCodeException(exc_with_hints(exc))
-            raise uc_exc
-        raise RemoteError(result.exception)
+    import modal.app
+    import modal.cls
+    import modal.partial_function
-    try:
-        return deserialize_data_format(data, data_format, client)
-    except ModuleNotFoundError as deser_exc:
-        raise ExecutionError(
-            "Could not deserialize result due to error:\n"
-            + f"{deser_exc}\n"
-            + "This can happen if your local environment does not have a module that was used to construct the result. \n"
-        )
-async def _create_input(args, kwargs, client, idx: Optional[int] = None) -> api_pb2.FunctionPutInputsItem:
-    """Serialize function arguments and create a FunctionInput protobuf,
-    uploading to blob storage if needed.
-    """
-    if idx is None:
-        idx = 0
-    args_serialized = serialize((args, kwargs))
-    if len(args_serialized) > MAX_OBJECT_SIZE_BYTES:
-        args_blob_id = await blob_upload(args_serialized, client.stub)
-        return api_pb2.FunctionPutInputsItem(
-            input=api_pb2.FunctionInput(args_blob_id=args_blob_id, data_format=api_pb2.DATA_FORMAT_PICKLE),
-            idx=idx,
-        )
-    else:
-        return api_pb2.FunctionPutInputsItem(
-            input=api_pb2.FunctionInput(args=args_serialized, data_format=api_pb2.DATA_FORMAT_PICKLE),
-            idx=idx,
-        )
-async def _stream_function_call_data(
-    client, function_call_id: str, variant: Literal["data_in", "data_out"]
-) -> AsyncIterator[Any]:
-    """Read from the `data_in` or `data_out` stream of a function call."""
-    last_index = 0
-    retries_remaining = 10
-    if variant == "data_in":
-        stub_fn = client.stub.FunctionCallGetDataIn
-    elif variant == "data_out":
-        stub_fn = client.stub.FunctionCallGetDataOut
-    else:
-        raise ValueError(f"Invalid variant {variant}")
-    while True:
-        req = api_pb2.FunctionCallGetDataRequest(function_call_id=function_call_id, last_index=last_index)
-        try:
-            async for chunk in unary_stream(stub_fn, req):
-                if chunk.index <= last_index:
-                    continue
-                last_index = chunk.index
-                if chunk.data_blob_id:
-                    message_bytes = await blob_download(chunk.data_blob_id, client.stub)
-                else:
-                    message_bytes = chunk.data
-                message = deserialize_data_format(message_bytes, chunk.data_format, client)
-                yield message
-        except (GRPCError, StreamTerminatedError) as exc:
-            if retries_remaining > 0:
-                retries_remaining -= 1
-                if isinstance(exc, GRPCError):
-                    if exc.status in RETRYABLE_GRPC_STATUS_CODES:
-                        await asyncio.sleep(1.0)
-                        continue
-                elif isinstance(exc, StreamTerminatedError):
-                    continue
-            raise
-@dataclass
-class _OutputValue:
-    # box class for distinguishing None results from non-existing/None markers
-    value: Any
+@dataclasses.dataclass
+class _RetryContext:
+    function_call_invocation_type: "api_pb2.FunctionCallInvocationType.ValueType"
+    retry_policy: api_pb2.FunctionRetryPolicy
+    function_call_jwt: str
+    input_jwt: str
+    input_id: str
+    item: api_pb2.FunctionPutInputsItem
 class _Invocation:
     """Internal client representation of a single-input call to a Modal Function or Generator"""
-    def __init__(self, stub: api_grpc.ModalClientStub, function_call_id: str, client: _Client):
+    stub: ModalClientModal
+    def __init__(
+        self,
+        stub: ModalClientModal,
+        function_call_id: str,
+        client: _Client,
+        retry_context: Optional[_RetryContext] = None,
+    ):
         self.stub = stub
         self.client = client  # Used by the deserializer.
         self.function_call_id = function_call_id  # TODO: remove and use only input_id
+        self._retry_context = retry_context
     @staticmethod
-    async def create(function_id: str, args, kwargs, client: _Client) -> "_Invocation":
+    async def create(
+        function: "_Function",
+        args,
+        kwargs,
+        *,
+        client: _Client,
+        function_call_invocation_type: "api_pb2.FunctionCallInvocationType.ValueType",
+    ) -> "_Invocation":
         assert client.stub
-        item = await _create_input(args, kwargs, client)
+        function_id = function.object_id
+        item = await _create_input(args, kwargs, client, method_name=function._use_method_name)
         request = api_pb2.FunctionMapRequest(
             function_id=function_id,
             parent_input_id=current_input_id() or "",
             function_call_type=api_pb2.FUNCTION_CALL_TYPE_UNARY,
             pipelined_inputs=[item],
+            function_call_invocation_type=function_call_invocation_type,
         )
         response = await retry_transient_errors(client.stub.FunctionMap, request)
         function_call_id = response.function_call_id
         if response.pipelined_inputs:
-            return _Invocation(client.stub, function_call_id, client)
+            assert len(response.pipelined_inputs) == 1
+            input = response.pipelined_inputs[0]
+            retry_context = _RetryContext(
+                function_call_invocation_type=function_call_invocation_type,
+                retry_policy=response.retry_policy,
+                function_call_jwt=response.function_call_jwt,
+                input_jwt=input.input_jwt,
+                input_id=input.input_id,
+                item=item,
+            )
+            return _Invocation(client.stub, function_call_id, client, retry_context)
         request_put = api_pb2.FunctionPutInputsRequest(
             function_id=function_id, inputs=[item], function_call_id=function_call_id
@@ -259,11 +169,20 @@ class _Invocation:
         processed_inputs = inputs_response.inputs
         if not processed_inputs:
             raise Exception("Could not create function call - the input queue seems to be full")
-        return _Invocation(client.stub, function_call_id, client)
+        input = inputs_response.inputs[0]
+        retry_context = _RetryContext(
+            function_call_invocation_type=function_call_invocation_type,
+            retry_policy=response.retry_policy,
+            function_call_jwt=response.function_call_jwt,
+            input_jwt=input.input_jwt,
+            input_id=input.input_id,
+            item=item,
+        )
+        return _Invocation(client.stub, function_call_id, client, retry_context)
     async def pop_function_call_outputs(
-        self, timeout: Optional[float], clear_on_success: bool
-    ) -> AsyncIterator[api_pb2.FunctionGetOutputsItem]:
+        self, timeout: Optional[float], clear_on_success: bool, input_jwts: Optional[list[str]] = None
+    ) -> api_pb2.FunctionGetOutputsResponse:
         t0 = time.time()
         if timeout is None:
             backend_timeout = OUTPUTS_TIMEOUT
@@ -277,53 +196,100 @@ class _Invocation:
                 timeout=backend_timeout,
                 last_entry_id="0-0",
                 clear_on_success=clear_on_success,
+                requested_at=time.time(),
+                input_jwts=input_jwts,
             )
             response: api_pb2.FunctionGetOutputsResponse = await retry_transient_errors(
                 self.stub.FunctionGetOutputs,
                 request,
                 attempt_timeout=backend_timeout + ATTEMPT_TIMEOUT_GRACE_PERIOD,
             )
             if len(response.outputs) > 0:
-                for item in response.outputs:
-                    yield item
-                return
+                return response
             if timeout is not None:
                 # update timeout in retry loop
                 backend_timeout = min(OUTPUTS_TIMEOUT, t0 + timeout - time.time())
                 if backend_timeout < 0:
-                    break
+                    # return the last response to check for state of num_unfinished_inputs
+                    return response
+    async def _retry_input(self) -> None:
+        ctx = self._retry_context
+        if not ctx:
+            raise ValueError("Cannot retry input when _retry_context is empty.")
+        item = api_pb2.FunctionRetryInputsItem(input_jwt=ctx.input_jwt, input=ctx.item.input)
+        request = api_pb2.FunctionRetryInputsRequest(function_call_jwt=ctx.function_call_jwt, inputs=[item])
+        await retry_transient_errors(
+            self.client.stub.FunctionRetryInputs,
+            request,
+        )
-    async def run_function(self) -> Any:
+    async def _get_single_output(self, expected_jwt: Optional[str] = None) -> Any:
         # waits indefinitely for a single result for the function, and clear the outputs buffer after
         item: api_pb2.FunctionGetOutputsItem = (
-            await stream.list(self.pop_function_call_outputs(timeout=None, clear_on_success=True))
-        )[0]
-        assert not item.result.gen_status
+            await self.pop_function_call_outputs(
+                timeout=None,
+                clear_on_success=True,
+                input_jwts=[expected_jwt] if expected_jwt else None,
+            )
+        ).outputs[0]
         return await _process_result(item.result, item.data_format, self.stub, self.client)
+    async def run_function(self) -> Any:
+        # Use retry logic only if retry policy is specified and
+        ctx = self._retry_context
+        if (
+            not ctx
+            or not ctx.retry_policy
+            or ctx.retry_policy.retries == 0
+            or ctx.function_call_invocation_type != api_pb2.FUNCTION_CALL_INVOCATION_TYPE_SYNC
+        ):
+            return await self._get_single_output()
+        # User errors including timeouts are managed by the user specified retry policy.
+        user_retry_manager = RetryManager(ctx.retry_policy)
+        while True:
+            try:
+                return await self._get_single_output(ctx.input_jwt)
+            except (UserCodeException, FunctionTimeoutError) as exc:
+                await user_retry_manager.raise_or_sleep(exc)
+            except InternalFailure:
+                # For system failures on the server, we retry immediately.
+                pass
+            await self._retry_input()
     async def poll_function(self, timeout: Optional[float] = None):
         """Waits up to timeout for a result from a function.
         If timeout is `None`, waits indefinitely. This function is not
         cancellation-safe.
         """
-        items: List[api_pb2.FunctionGetOutputsItem] = await stream.list(
-            self.pop_function_call_outputs(timeout=timeout, clear_on_success=False)
+        response: api_pb2.FunctionGetOutputsResponse = await self.pop_function_call_outputs(
+            timeout=timeout, clear_on_success=False
         )
-        if len(items) == 0:
+        if len(response.outputs) == 0 and response.num_unfinished_inputs == 0:
+            # if no unfinished inputs and no outputs, then function expired
+            raise OutputExpiredError()
+        elif len(response.outputs) == 0:
             raise TimeoutError()
-        return await _process_result(items[0].result, items[0].data_format, self.stub, self.client)
+        return await _process_result(
+            response.outputs[0].result, response.outputs[0].data_format, self.stub, self.client
+        )
     async def run_generator(self):
-        data_stream = _stream_function_call_data(self.client, self.function_call_id, variant="data_out")
-        combined_stream = stream.merge(data_stream, stream.call(self.run_function))  # type: ignore
         items_received = 0
         items_total: Union[int, None] = None  # populated when self.run_function() completes
-        async with combined_stream.stream() as streamer:
+        async with aclosing(
+            async_merge(
+                _stream_function_call_data(self.client, self.function_call_id, variant="data_out"),
+                callable_to_agen(self.run_function),
+            )
+        ) as streamer:
             async for item in streamer:
                 if isinstance(item, api_pb2.GeneratorDone):
                     items_total = item.items_total
@@ -336,187 +302,29 @@ class _Invocation:
                     break
-MAP_INVOCATION_CHUNK_SIZE = 49
-async def _map_invocation(
-    function_id: str,
-    input_stream: AsyncIterable[Any],
-    kwargs: Dict[str, Any],
-    client: _Client,
-    order_outputs: bool,
-    return_exceptions: bool,
-    count_update_callback: Optional[Callable[[int, int], None]],
-):
-    assert client.stub
-    request = api_pb2.FunctionMapRequest(
-        function_id=function_id,
-        parent_input_id=current_input_id() or "",
-        function_call_type=api_pb2.FUNCTION_CALL_TYPE_MAP,
-        return_exceptions=return_exceptions,
-    )
-    response = await retry_transient_errors(client.stub.FunctionMap, request)
-    function_call_id = response.function_call_id
-    have_all_inputs = False
-    num_inputs = 0
-    num_outputs = 0
-    pending_outputs: Dict[str, int] = {}  # Map input_id -> next expected gen_index value
-    completed_outputs: Set[str] = set()  # Set of input_ids whose outputs are complete (expecting no more values)
-    input_queue: asyncio.Queue = asyncio.Queue()
-    async def create_input(arg: Any) -> api_pb2.FunctionPutInputsItem:
-        nonlocal num_inputs
-        idx = num_inputs
-        num_inputs += 1
-        item = await _create_input(arg, kwargs, client, idx=idx)
-        return item
-    async def drain_input_generator():
-        # Parallelize uploading blobs
-        proto_input_stream = stream.iterate(input_stream) | pipe.map(
-            create_input,  # type: ignore[reportArgumentType]
-            ordered=True,
-            task_limit=BLOB_MAX_PARALLELISM,
-        )
-        async with proto_input_stream.stream() as streamer:
-            async for item in streamer:
-                await input_queue.put(item)
-        # close queue iterator
-        await input_queue.put(None)
-        yield
-    async def pump_inputs():
-        assert client.stub
-        nonlocal have_all_inputs
-        async for items in queue_batch_iterator(input_queue, MAP_INVOCATION_CHUNK_SIZE):
-            request = api_pb2.FunctionPutInputsRequest(
-                function_id=function_id, inputs=items, function_call_id=function_call_id
-            )
-            logger.debug(
-                f"Pushing {len(items)} inputs to server. Num queued inputs awaiting push is {input_queue.qsize()}."
-            )
-            resp = await retry_transient_errors(
-                client.stub.FunctionPutInputs,
-                request,
-                max_retries=None,
-                max_delay=10,
-                additional_status_codes=[Status.RESOURCE_EXHAUSTED],
-            )
-            for item in resp.inputs:
-                pending_outputs.setdefault(item.input_id, 0)
-            logger.debug(
-                f"Successfully pushed {len(items)} inputs to server. Num queued inputs awaiting push is {input_queue.qsize()}."
-            )
-        have_all_inputs = True
-        yield
-    async def get_all_outputs():
-        assert client.stub
-        nonlocal num_inputs, num_outputs, have_all_inputs
-        last_entry_id = "0-0"
-        while not have_all_inputs or len(pending_outputs) > len(completed_outputs):
-            request = api_pb2.FunctionGetOutputsRequest(
-                function_call_id=function_call_id,
-                timeout=OUTPUTS_TIMEOUT,
-                last_entry_id=last_entry_id,
-                clear_on_success=False,
-            )
-            response = await retry_transient_errors(
-                client.stub.FunctionGetOutputs,
-                request,
-                max_retries=20,
-                attempt_timeout=OUTPUTS_TIMEOUT + ATTEMPT_TIMEOUT_GRACE_PERIOD,
-            )
-            if len(response.outputs) == 0:
-                continue
-            last_entry_id = response.last_entry_id
-            for item in response.outputs:
-                pending_outputs.setdefault(item.input_id, 0)
-                if item.input_id in completed_outputs:
-                    # If this input is already completed, it means the output has already been
-                    # processed and was received again due to a duplicate.
-                    continue
-                completed_outputs.add(item.input_id)
-                num_outputs += 1
-                yield item
-    async def get_all_outputs_and_clean_up():
-        assert client.stub
-        try:
-            async for item in get_all_outputs():
-                yield item
-        finally:
-            # "ack" that we have all outputs we are interested in and let backend clear results
-            request = api_pb2.FunctionGetOutputsRequest(
-                function_call_id=function_call_id,
-                timeout=0,
-                last_entry_id="0-0",
-                clear_on_success=True,
-            )
-            await retry_transient_errors(client.stub.FunctionGetOutputs, request)
-    async def fetch_output(item: api_pb2.FunctionGetOutputsItem) -> Tuple[int, Any]:
-        try:
-            output = await _process_result(item.result, item.data_format, client.stub, client)
-        except Exception as e:
-            if return_exceptions:
-                output = e
-            else:
-                raise e
-        return (item.idx, output)
-    async def poll_outputs():
-        outputs = stream.iterate(get_all_outputs_and_clean_up())
-        outputs_fetched = outputs | pipe.map(fetch_output, ordered=True, task_limit=BLOB_MAX_PARALLELISM)  # type: ignore
-        # map to store out-of-order outputs received
-        received_outputs = {}
-        output_idx = 0
-        async with outputs_fetched.stream() as streamer:
-            async for idx, output in streamer:
-                if count_update_callback is not None:
-                    count_update_callback(num_outputs, num_inputs)
-                if not order_outputs:
-                    yield _OutputValue(output)
-                else:
-                    # hold on to outputs for function maps, so we can reorder them correctly.
-                    received_outputs[idx] = output
-                    while output_idx in received_outputs:
-                        output = received_outputs.pop(output_idx)
-                        yield _OutputValue(output)
-                        output_idx += 1
-        assert len(received_outputs) == 0
-    response_gen = stream.merge(drain_input_generator(), pump_inputs(), poll_outputs())
-    async with response_gen.stream() as streamer:
-        async for response in streamer:
-            if response is not None:
-                yield response.value
 # Wrapper type for api_pb2.FunctionStats
 @dataclass(frozen=True)
 class FunctionStats:
     """Simple data structure storing stats for a running function."""
     backlog: int
-    num_active_runners: int
     num_total_runners: int
+    def __getattr__(self, name):
+        if name == "num_active_runners":
+            msg = (
+                "'FunctionStats.num_active_runners' is deprecated."
+                " It currently always has a value of 0,"
+                " but it will be removed in a future release."
+            )
+            deprecation_warning((2024, 6, 14), msg)
+            return 0
+        raise AttributeError(f"'FunctionStats' object has no attribute '{name}'")
 def _parse_retries(
     retries: Optional[Union[int, Retries]],
-    raw_f: Optional[Callable] = None,
+    source: str = "",
 ) -> Optional[api_pb2.FunctionRetryPolicy]:
     if isinstance(retries, int):
         return Retries(
@@ -529,118 +337,168 @@ def _parse_retries(
     elif retries is None:
         return None
     else:
-        err_object = f"Function {raw_f}" if raw_f else "Function"
-        raise InvalidError(
-            f"{err_object} retries must be an integer or instance of modal.Retries. Found: {type(retries)}"
-        )
+        extra = f" on {source}" if source else ""
+        msg = f"Retries parameter must be an integer or instance of modal.Retries. Found: {type(retries)}{extra}."
+        raise InvalidError(msg)
 @dataclass
-class FunctionEnv:
+class _FunctionSpec:
     """
-    Stores information about the function environment. This is used for `modal shell` to support
-    running shells in the same environment as a user-defined function.
+    Stores information about a Function specification.
+    This is used for `modal shell` to support running shells with
+    the same configuration as a user-defined Function.
     """
     image: Optional[_Image]
     mounts: Sequence[_Mount]
     secrets: Sequence[_Secret]
-    network_file_systems: Dict[Union[str, PurePosixPath], _NetworkFileSystem]
-    volumes: Dict[Union[str, PurePosixPath], Union[_Volume, _CloudBucketMount]]
-    gpu: GPU_T
+    network_file_systems: dict[Union[str, PurePosixPath], _NetworkFileSystem]
+    volumes: dict[Union[str, PurePosixPath], Union[_Volume, _CloudBucketMount]]
+    gpus: Union[GPU_T, list[GPU_T]]  # TODO(irfansharif): Somehow assert that it's the first kind, in sandboxes
     cloud: Optional[str]
-    cpu: Optional[float]
-    memory: Optional[int]
+    cpu: Optional[Union[float, tuple[float, float]]]
+    memory: Optional[Union[int, tuple[int, int]]]
+    ephemeral_disk: Optional[int]
+    scheduler_placement: Optional[SchedulerPlacement]
+    proxy: Optional[_Proxy]
+P = typing_extensions.ParamSpec("P")
+ReturnType = typing.TypeVar("ReturnType", covariant=True)
+OriginalReturnType = typing.TypeVar(
+    "OriginalReturnType", covariant=True
+)  # differs from return type if ReturnType is coroutine
-class _Function(_Object, type_prefix="fu"):
+class _Function(typing.Generic[P, ReturnType, OriginalReturnType], _Object, type_prefix="fu"):
     """Functions are the basic units of serverless execution on Modal.
     Generally, you will not construct a `Function` directly. Instead, use the
-    `@stub.function()` decorator on the `Stub` object for your application.
+    `App.function()` decorator to register your Python functions with your App.
     """
     # TODO: more type annotations
     _info: Optional[FunctionInfo]
-    _all_mounts: Collection[_Mount]
-    _stub: "modal.stub._Stub"
-    _obj: Any
+    _serve_mounts: frozenset[_Mount]  # set at load time, only by loader
+    _app: Optional["modal.app._App"] = None
+    _obj: Optional["modal.cls._Obj"] = None  # only set for InstanceServiceFunctions and bound instance methods
     _web_url: Optional[str]
-    _is_remote_cls_method: bool = False  # TODO(erikbern): deprecated
     _function_name: Optional[str]
     _is_method: bool
-    _env: FunctionEnv
+    _spec: Optional[_FunctionSpec] = None
     _tag: str
     _raw_f: Callable[..., Any]
     _build_args: dict
-    _parent: "_Function"
+    _is_generator: Optional[bool] = None
+    _cluster_size: Optional[int] = None
+    # when this is the method of a class/object function, invocation of this function
+    # should supply the method name in the FunctionInput:
+    _use_method_name: str = ""
+    _class_parameter_info: Optional["api_pb2.ClassParameterInfo"] = None
+    _method_handle_metadata: Optional[dict[str, "api_pb2.FunctionHandleMetadata"]] = None
+    def _bind_method(
+        self,
+        user_cls,
+        method_name: str,
+        partial_function: "modal.partial_function._PartialFunction",
+    ):
+        """mdmd:hidden
+        Creates a _Function that is bound to a specific class method name. This _Function is not uniquely tied
+        to any backend function -- its object_id is the function ID of the class service function.
+        """
+        class_service_function = self
+        assert class_service_function._info  # has to be a local function to be able to "bind" it
+        assert not class_service_function._is_method  # should not be used on an already bound method placeholder
+        assert not class_service_function._obj  # should only be used on base function / class service function
+        full_name = f"{user_cls.__name__}.{method_name}"
+        rep = f"Method({full_name})"
+        fun = _Object.__new__(_Function)
+        fun._init(rep)
+        fun._tag = full_name
+        fun._raw_f = partial_function.raw_f
+        fun._info = FunctionInfo(
+            partial_function.raw_f, user_cls=user_cls, serialized=class_service_function.info.is_serialized()
+        )  # needed for .local()
+        fun._use_method_name = method_name
+        fun._app = class_service_function._app
+        fun._is_generator = partial_function.is_generator
+        fun._cluster_size = partial_function.cluster_size
+        fun._spec = class_service_function._spec
+        fun._is_method = True
+        return fun
     @staticmethod
     def from_args(
         info: FunctionInfo,
-        stub,
+        app,
         image: _Image,
-        secret: Optional[_Secret] = None,
         secrets: Sequence[_Secret] = (),
         schedule: Optional[Schedule] = None,
-        is_generator=False,
-        gpu: GPU_T = None,
+        is_generator: bool = False,
+        gpu: Union[GPU_T, list[GPU_T]] = None,
         # TODO: maybe break this out into a separate decorator for notebooks.
         mounts: Collection[_Mount] = (),
-        network_file_systems: Dict[Union[str, PurePosixPath], _NetworkFileSystem] = {},
+        network_file_systems: dict[Union[str, PurePosixPath], _NetworkFileSystem] = {},
         allow_cross_region_volumes: bool = False,
-        volumes: Dict[Union[str, PurePosixPath], Union[_Volume, _CloudBucketMount]] = {},
+        volumes: dict[Union[str, PurePosixPath], Union[_Volume, _CloudBucketMount]] = {},
         webhook_config: Optional[api_pb2.WebhookConfig] = None,
-        memory: Optional[int] = None,
+        memory: Optional[Union[int, tuple[int, int]]] = None,
         proxy: Optional[_Proxy] = None,
         retries: Optional[Union[int, Retries]] = None,
         timeout: Optional[int] = None,
         concurrency_limit: Optional[int] = None,
         allow_concurrent_inputs: Optional[int] = None,
+        batch_max_size: Optional[int] = None,
+        batch_wait_ms: Optional[int] = None,
         container_idle_timeout: Optional[int] = None,
-        cpu: Optional[float] = None,
+        cpu: Optional[Union[float, tuple[float, float]]] = None,
         keep_warm: Optional[int] = None,  # keep_warm=True is equivalent to keep_warm=1
         cloud: Optional[str] = None,
-        _experimental_boost: bool = False,
-        _experimental_scheduler: bool = False,
-        _experimental_scheduler_placement: Optional[SchedulerPlacement] = None,
+        scheduler_placement: Optional[SchedulerPlacement] = None,
         is_builder_function: bool = False,
         is_auto_snapshot: bool = False,
         enable_memory_snapshot: bool = False,
-        checkpointing_enabled: Optional[bool] = None,
-        allow_background_volume_commits: bool = False,
         block_network: bool = False,
+        i6pn_enabled: bool = False,
+        cluster_size: Optional[int] = None,  # Experimental: Clustered functions
         max_inputs: Optional[int] = None,
+        ephemeral_disk: Optional[int] = None,
+        _experimental_buffer_containers: Optional[int] = None,
+        _experimental_proxy_ip: Optional[str] = None,
+        _experimental_custom_scaling_factor: Optional[float] = None,
     ) -> None:
         """mdmd:hidden"""
+        # Needed to avoid circular imports
+        from .partial_function import _find_partial_methods_for_user_cls, _PartialFunctionFlags
         tag = info.get_tag()
-        raw_f = info.raw_f
-        assert callable(raw_f)
-        if schedule is not None:
-            if not info.is_nullary():
+        if info.raw_f:
+            raw_f = info.raw_f
+            assert callable(raw_f)
+            if schedule is not None and not info.is_nullary():
                 raise InvalidError(
                     f"Function {raw_f} has a schedule, so it needs to support being called with no arguments"
                 )
-        if secret is not None:
-            deprecation_warning(
-                (2024, 1, 31),
-                "The singular `secret` parameter is deprecated. Pass a list to `secrets` instead.",
-            )
-            secrets = [secret, *secrets]
-        if checkpointing_enabled is not None:
-            deprecation_warning(
-                (2024, 3, 4),
-                "The argument `checkpointing_enabled` is now deprecated. Use `enable_memory_snapshot` instead.",
-            )
-            enable_memory_snapshot = checkpointing_enabled
+        else:
+            # must be a "class service function"
+            assert info.user_cls
+            assert not webhook_config
+            assert not schedule
         explicit_mounts = mounts
         if is_local():
             entrypoint_mounts = info.get_entrypoint_mount()
             all_mounts = [
                 _get_client_mount(),
                 *explicit_mounts,
@@ -648,45 +506,57 @@ class _Function(_Object, type_prefix="fu"):
             ]
             if config.get("automount"):
-                automounts = info.get_auto_mounts()
-                all_mounts += automounts
+                all_mounts += get_auto_mounts()
         else:
             # skip any mount introspection/logic inside containers, since the function
             # should already be hydrated
             # TODO: maybe the entire constructor should be exited early if not local?
             all_mounts = []
-        retry_policy = _parse_retries(retries, raw_f)
+        retry_policy = _parse_retries(
+            retries, f"Function '{info.get_tag()}'" if info.raw_f else f"Class '{info.get_tag()}'"
+        )
-        gpu_config = parse_gpu_config(gpu)
+        if webhook_config is not None and retry_policy is not None:
+            raise InvalidError(
+                "Web endpoints do not support retries.",
+            )
+        if is_generator and retry_policy is not None:
+            deprecation_warning(
+                (2024, 6, 25),
+                "Retries for generator functions are deprecated and will soon be removed.",
+            )
         if proxy:
             # HACK: remove this once we stop using ssh tunnels for this.
             if image:
+                # TODO(elias): this will cause an error if users use prior `.add_local_*` commands without copy=True
                 image = image.apt_install("autossh")
-        function_env = FunctionEnv(
+        function_spec = _FunctionSpec(
             mounts=all_mounts,
             secrets=secrets,
-            gpu=gpu,
+            gpus=gpu,
             network_file_systems=network_file_systems,
             volumes=volumes,
             image=image,
             cloud=cloud,
             cpu=cpu,
             memory=memory,
+            ephemeral_disk=ephemeral_disk,
+            scheduler_placement=scheduler_placement,
+            proxy=proxy,
         )
-        if info.cls and not is_auto_snapshot:
-            # Needed to avoid circular imports
-            from .partial_function import _find_callables_for_cls, _PartialFunctionFlags
-            build_functions = list(_find_callables_for_cls(info.cls, _PartialFunctionFlags.BUILD).values())
-            for build_function in build_functions:
-                snapshot_info = FunctionInfo(build_function, cls=info.cls)
+        if info.user_cls and not is_auto_snapshot:
+            build_functions = _find_partial_methods_for_user_cls(info.user_cls, _PartialFunctionFlags.BUILD).items()
+            for k, pf in build_functions:
+                build_function = pf.raw_f
+                snapshot_info = FunctionInfo(build_function, user_cls=info.user_cls)
                 snapshot_function = _Function.from_args(
                     snapshot_info,
-                    stub=None,
+                    app=None,
                     image=image,
                     secrets=secrets,
                     gpu=gpu,
@@ -694,16 +564,17 @@ class _Function(_Object, type_prefix="fu"):
                     network_file_systems=network_file_systems,
                     volumes=volumes,
                     memory=memory,
-                    timeout=86400,  # TODO: make this an argument to `@build()`
+                    timeout=pf.build_timeout,
                     cpu=cpu,
+                    ephemeral_disk=ephemeral_disk,
                     is_builder_function=True,
                     is_auto_snapshot=True,
-                    _experimental_scheduler_placement=_experimental_scheduler_placement,
+                    scheduler_placement=scheduler_placement,
                 )
                 image = _Image._from_args(
                     base_images={"base": image},
                     build_function=snapshot_function,
-                    force_build=image.force_build,
+                    force_build=image.force_build or pf.force_build,
                 )
         if keep_warm is not None and not isinstance(keep_warm, int):
@@ -711,9 +582,15 @@ class _Function(_Object, type_prefix="fu"):
         if (keep_warm is not None) and (concurrency_limit is not None) and concurrency_limit < keep_warm:
             raise InvalidError(
-                f"Function `{info.function_name}` has `{concurrency_limit=}`, strictly less than its `{keep_warm=}` parameter."
+                f"Function `{info.function_name}` has `{concurrency_limit=}`, "
+                f"strictly less than its `{keep_warm=}` parameter."
             )
+        if _experimental_custom_scaling_factor is not None and (
+            _experimental_custom_scaling_factor < 0 or _experimental_custom_scaling_factor > 1
+        ):
+            raise InvalidError("`_experimental_custom_scaling_factor` must be between 0.0 and 1.0 inclusive.")
         if not cloud and not is_builder_function:
             cloud = config.get("default_cloud")
         if cloud:
@@ -730,22 +607,56 @@ class _Function(_Object, type_prefix="fu"):
             else:
                 raise InvalidError("Webhooks cannot be generators")
+        if info.raw_f and batch_max_size:
+            func_name = info.raw_f.__name__
+            if is_generator:
+                raise InvalidError(f"Modal batched function {func_name} cannot return generators")
+            for arg in inspect.signature(info.raw_f).parameters.values():
+                if arg.default is not inspect.Parameter.empty:
+                    raise InvalidError(f"Modal batched function {func_name} does not accept default arguments.")
+        if container_idle_timeout is not None and container_idle_timeout <= 0:
+            raise InvalidError("`container_idle_timeout` must be > 0")
+        if max_inputs is not None:
+            if not isinstance(max_inputs, int):
+                raise InvalidError(f"`max_inputs` must be an int, not {type(max_inputs).__name__}")
+            if max_inputs <= 0:
+                raise InvalidError("`max_inputs` must be positive")
+            if max_inputs > 1:
+                raise InvalidError("Only `max_inputs=1` is currently supported")
         # Validate volumes
         validated_volumes = validate_volumes(volumes)
         cloud_bucket_mounts = [(k, v) for k, v in validated_volumes if isinstance(v, _CloudBucketMount)]
         validated_volumes = [(k, v) for k, v in validated_volumes if isinstance(v, _Volume)]
         # Validate NFS
-        if not isinstance(network_file_systems, dict):
-            raise InvalidError("network_file_systems must be a dict[str, NetworkFileSystem] where the keys are paths")
-        validated_network_file_systems = validate_mount_points("Network file system", network_file_systems)
+        validated_network_file_systems = validate_network_file_systems(network_file_systems)
         # Validate image
         if image is not None and not isinstance(image, _Image):
             raise InvalidError(f"Expected modal.Image object. Got {type(image)}.")
-        def _deps(only_explicit_mounts=False) -> List[_Object]:
-            deps: List[_Object] = list(secrets)
+        method_definitions: Optional[dict[str, api_pb2.MethodDefinition]] = None
+        if info.user_cls:
+            method_definitions = {}
+            partial_functions = _find_partial_methods_for_user_cls(info.user_cls, _PartialFunctionFlags.FUNCTION)
+            for method_name, partial_function in partial_functions.items():
+                function_type = get_function_type(partial_function.is_generator)
+                function_name = f"{info.user_cls.__name__}.{method_name}"
+                method_definition = api_pb2.MethodDefinition(
+                    webhook_config=partial_function.webhook_config,
+                    function_type=function_type,
+                    function_name=function_name,
+                )
+                method_definitions[method_name] = method_definition
+        function_type = get_function_type(is_generator)
+        def _deps(only_explicit_mounts=False) -> list[_Object]:
+            deps: list[_Object] = list(secrets)
             if only_explicit_mounts:
                 # TODO: this is a bit hacky, but all_mounts may differ in the container vs locally
                 # We don't want the function dependencies to change, so we have this way to force it to
@@ -769,271 +680,358 @@ class _Function(_Object, type_prefix="fu"):
                 if cloud_bucket_mount.secret:
                     deps.append(cloud_bucket_mount.secret)
-            # Add implicit dependencies from the function's code
-            objs: list[Object] = get_referred_objects(info.raw_f)
-            _objs: list[_Object] = synchronizer._translate_in(objs)  # type: ignore
-            deps += _objs
             return deps
         async def _preload(self: _Function, resolver: Resolver, existing_object_id: Optional[str]):
             assert resolver.client and resolver.client.stub
-            if is_generator:
-                function_type = api_pb2.Function.FUNCTION_TYPE_GENERATOR
-            else:
-                function_type = api_pb2.Function.FUNCTION_TYPE_FUNCTION
+            assert resolver.app_id
             req = api_pb2.FunctionPrecreateRequest(
                 app_id=resolver.app_id,
                 function_name=info.function_name,
                 function_type=function_type,
-                webhook_config=webhook_config,
                 existing_function_id=existing_object_id or "",
             )
+            if method_definitions:
+                for method_name, method_definition in method_definitions.items():
+                    req.method_definitions[method_name].CopyFrom(method_definition)
+            elif webhook_config:
+                req.webhook_config.CopyFrom(webhook_config)
             response = await retry_transient_errors(resolver.client.stub.FunctionPrecreate, req)
             self._hydrate(response.function_id, resolver.client, response.handle_metadata)
         async def _load(self: _Function, resolver: Resolver, existing_object_id: Optional[str]):
             assert resolver.client and resolver.client.stub
-            status_row = resolver.add_status_row()
-            status_row.message(f"Creating {tag}...")
-            if is_generator:
-                function_type = api_pb2.Function.FUNCTION_TYPE_GENERATOR
-            else:
-                function_type = api_pb2.Function.FUNCTION_TYPE_FUNCTION
-            if cpu is not None and cpu < 0.25:
-                raise InvalidError(f"Invalid fractional CPU value {cpu}. Cannot have less than 0.25 CPU resources.")
-            milli_cpu = int(1000 * cpu) if cpu is not None else 0
-            timeout_secs = timeout
+            with FunctionCreationStatus(resolver, tag) as function_creation_status:
+                timeout_secs = timeout
-            if stub and stub.is_interactive and not is_builder_function:
-                pty_info = _pty.get_pty_info(shell=False)
-            else:
-                pty_info = None
-            if info.is_serialized():
-                # Use cloudpickle. Used when working w/ Jupyter notebooks.
-                # serialize at _load time, not function decoration time
-                # otherwise we can't capture a surrounding class for lifetime methods etc.
-                function_serialized = info.serialized_function()
-                class_serialized = serialize(info.cls) if info.cls is not None else None
-                # Ensure that large data in global variables does not blow up the gRPC payload,
-                # which has maximum size 100 MiB. We set the limit lower for performance reasons.
-                if len(function_serialized) > 16 << 20:  # 16 MiB
-                    raise InvalidError(
-                        f"Function {info.raw_f} has size {len(function_serialized)} bytes when packaged. "
-                        "This is larger than the maximum limit of 16 MiB. "
-                        "Try reducing the size of the closure by using parameters or mounts, not large global variables."
-                    )
-                elif len(function_serialized) > 256 << 10:  # 256 KiB
-                    warnings.warn(
-                        f"Function {info.raw_f} has size {len(function_serialized)} bytes when packaged. "
-                        "This is larger than the recommended limit of 256 KiB. "
-                        "Try reducing the size of the closure by using parameters or mounts, not large global variables."
+                if app and app.is_interactive and not is_builder_function:
+                    pty_info = get_pty_info(shell=False)
+                else:
+                    pty_info = None
+                if info.is_serialized():
+                    # Use cloudpickle. Used when working w/ Jupyter notebooks.
+                    # serialize at _load time, not function decoration time
+                    # otherwise we can't capture a surrounding class for lifetime methods etc.
+                    function_serialized = info.serialized_function()
+                    class_serialized = serialize(info.user_cls) if info.user_cls is not None else None
+                    # Ensure that large data in global variables does not blow up the gRPC payload,
+                    # which has maximum size 100 MiB. We set the limit lower for performance reasons.
+                    if len(function_serialized) > 16 << 20:  # 16 MiB
+                        raise InvalidError(
+                            f"Function {info.raw_f} has size {len(function_serialized)} bytes when packaged. "
+                            "This is larger than the maximum limit of 16 MiB. "
+                            "Try reducing the size of the closure by using parameters or mounts, "
+                            "not large global variables."
+                        )
+                    elif len(function_serialized) > 256 << 10:  # 256 KiB
+                        warnings.warn(
+                            f"Function {info.raw_f} has size {len(function_serialized)} bytes when packaged. "
+                            "This is larger than the recommended limit of 256 KiB. "
+                            "Try reducing the size of the closure by using parameters or mounts, "
+                            "not large global variables."
+                        )
+                else:
+                    function_serialized = None
+                    class_serialized = None
+                app_name = ""
+                if app and app.name:
+                    app_name = app.name
+                # Relies on dicts being ordered (true as of Python 3.6).
+                volume_mounts = [
+                    api_pb2.VolumeMount(
+                        mount_path=path,
+                        volume_id=volume.object_id,
+                        allow_background_commits=True,
                     )
-            else:
-                function_serialized = None
-                class_serialized = None
-            stub_name = ""
-            if stub and stub.name:
-                stub_name = stub.name
-            # Relies on dicts being ordered (true as of Python 3.6).
-            volume_mounts = [
-                api_pb2.VolumeMount(
-                    mount_path=path,
-                    volume_id=volume.object_id,
-                    allow_background_commits=allow_background_volume_commits,
+                    for path, volume in validated_volumes
+                ]
+                loaded_mount_ids = {m.object_id for m in all_mounts} | {m.object_id for m in image._mount_layers}
+                # Get object dependencies
+                object_dependencies = []
+                for dep in _deps(only_explicit_mounts=True):
+                    if not dep.object_id:
+                        raise Exception(f"Dependency {dep} isn't hydrated")
+                    object_dependencies.append(api_pb2.ObjectDependency(object_id=dep.object_id))
+                function_data: Optional[api_pb2.FunctionData] = None
+                function_definition: Optional[api_pb2.Function] = None
+                # Create function remotely
+                function_definition = api_pb2.Function(
+                    module_name=info.module_name or "",
+                    function_name=info.function_name,
+                    mount_ids=loaded_mount_ids,
+                    secret_ids=[secret.object_id for secret in secrets],
+                    image_id=(image.object_id if image else ""),
+                    definition_type=info.get_definition_type(),
+                    function_serialized=function_serialized or b"",
+                    class_serialized=class_serialized or b"",
+                    function_type=function_type,
+                    webhook_config=webhook_config,
+                    method_definitions=method_definitions,
+                    method_definitions_set=True,
+                    shared_volume_mounts=network_file_system_mount_protos(
+                        validated_network_file_systems, allow_cross_region_volumes
+                    ),
+                    volume_mounts=volume_mounts,
+                    proxy_id=(proxy.object_id if proxy else None),
+                    retry_policy=retry_policy,
+                    timeout_secs=timeout_secs or 0,
+                    task_idle_timeout_secs=container_idle_timeout or 0,
+                    concurrency_limit=concurrency_limit or 0,
+                    pty_info=pty_info,
+                    cloud_provider=cloud_provider,
+                    warm_pool_size=keep_warm or 0,
+                    runtime=config.get("function_runtime"),
+                    runtime_debug=config.get("function_runtime_debug"),
+                    runtime_perf_record=config.get("runtime_perf_record"),
+                    app_name=app_name,
+                    is_builder_function=is_builder_function,
+                    target_concurrent_inputs=allow_concurrent_inputs or 0,
+                    batch_max_size=batch_max_size or 0,
+                    batch_linger_ms=batch_wait_ms or 0,
+                    worker_id=config.get("worker_id"),
+                    is_auto_snapshot=is_auto_snapshot,
+                    is_method=bool(info.user_cls) and not info.is_service_class(),
+                    checkpointing_enabled=enable_memory_snapshot,
+                    object_dependencies=object_dependencies,
+                    block_network=block_network,
+                    max_inputs=max_inputs or 0,
+                    cloud_bucket_mounts=cloud_bucket_mounts_to_proto(cloud_bucket_mounts),
+                    scheduler_placement=scheduler_placement.proto if scheduler_placement else None,
+                    is_class=info.is_service_class(),
+                    class_parameter_info=info.class_parameter_info(),
+                    i6pn_enabled=i6pn_enabled,
+                    schedule=schedule.proto_message if schedule is not None else None,
+                    snapshot_debug=config.get("snapshot_debug"),
+                    _experimental_group_size=cluster_size or 0,  # Experimental: Clustered functions
+                    _experimental_concurrent_cancellations=True,
+                    _experimental_buffer_containers=_experimental_buffer_containers or 0,
+                    _experimental_proxy_ip=_experimental_proxy_ip,
+                    _experimental_custom_scaling=_experimental_custom_scaling_factor is not None,
                 )
-                for path, volume in validated_volumes
-            ]
-            loaded_mount_ids = {m.object_id for m in all_mounts}
-            # Get object dependencies
-            object_dependencies = []
-            for dep in _deps(only_explicit_mounts=True):
-                if not dep.object_id:
-                    raise Exception(f"Dependency {dep} isn't hydrated")
-                object_dependencies.append(api_pb2.ObjectDependency(object_id=dep.object_id))
-            # Create function remotely
-            function_definition = api_pb2.Function(
-                module_name=info.module_name or "",
-                function_name=info.function_name,
-                mount_ids=loaded_mount_ids,
-                secret_ids=[secret.object_id for secret in secrets],
-                image_id=(image.object_id if image else ""),
-                definition_type=info.definition_type,
-                function_serialized=function_serialized or b"",
-                class_serialized=class_serialized or b"",
-                function_type=function_type,
-                resources=api_pb2.Resources(milli_cpu=milli_cpu, gpu_config=gpu_config, memory_mb=memory or 0),
-                webhook_config=webhook_config,
-                shared_volume_mounts=network_file_system_mount_protos(
-                    validated_network_file_systems, allow_cross_region_volumes
-                ),
-                volume_mounts=volume_mounts,
-                proxy_id=(proxy.object_id if proxy else None),
-                retry_policy=retry_policy,
-                timeout_secs=timeout_secs or 0,
-                task_idle_timeout_secs=container_idle_timeout or 0,
-                concurrency_limit=concurrency_limit or 0,
-                pty_info=pty_info,
-                cloud_provider=cloud_provider,
-                warm_pool_size=keep_warm or 0,
-                runtime=config.get("function_runtime"),
-                runtime_debug=config.get("function_runtime_debug"),
-                stub_name=stub_name,
-                is_builder_function=is_builder_function,
-                allow_concurrent_inputs=allow_concurrent_inputs or 0,
-                worker_id=config.get("worker_id"),
-                is_auto_snapshot=is_auto_snapshot,
-                is_method=bool(info.cls),
-                checkpointing_enabled=enable_memory_snapshot,
-                is_checkpointing_function=False,
-                object_dependencies=object_dependencies,
-                block_network=block_network,
-                max_inputs=max_inputs or 0,
-                cloud_bucket_mounts=cloud_bucket_mounts_to_proto(cloud_bucket_mounts),
-                _experimental_boost=_experimental_boost,
-                _experimental_scheduler=_experimental_scheduler,
-                _experimental_scheduler_placement=_experimental_scheduler_placement.proto
-                if _experimental_scheduler_placement
-                else None,
-            )
-            request = api_pb2.FunctionCreateRequest(
-                app_id=resolver.app_id,
-                function=function_definition,
-                schedule=schedule.proto_message if schedule is not None else None,
-                existing_function_id=existing_object_id or "",
-            )
-            try:
-                response: api_pb2.FunctionCreateResponse = await retry_transient_errors(
-                    resolver.client.stub.FunctionCreate, request
-                )
-            except GRPCError as exc:
-                if exc.status == Status.INVALID_ARGUMENT:
-                    raise InvalidError(exc.message)
-                if exc.status == Status.FAILED_PRECONDITION:
-                    raise InvalidError(exc.message)
-                if exc.message and "Received :status = '413'" in exc.message:
-                    raise InvalidError(f"Function {raw_f} is too large to deploy.")
-                raise
-            if response.function.web_url:
-                # Ensure terms used here match terms used in modal.com/docs/guide/webhook-urls doc.
-                if response.function.web_url_info.truncated:
-                    suffix = " [grey70](label truncated)[/grey70]"
-                elif response.function.web_url_info.has_unique_hash:
-                    suffix = " [grey70](label includes conflict-avoidance hash)[/grey70]"
-                elif response.function.web_url_info.label_stolen:
-                    suffix = " [grey70](label stolen)[/grey70]"
-                else:
-                    suffix = ""
-                # TODO: this is only printed when we're showing progress. Maybe move this somewhere else.
-                status_row.finish(f"Created {tag} => [magenta underline]{response.web_url}[/magenta underline]{suffix}")
-                # Print custom domain in terminal
-                for custom_domain in response.function.custom_domain_info:
-                    custom_domain_status_row = resolver.add_status_row()
-                    custom_domain_status_row.finish(
-                        f"Custom domain for {tag} => [magenta underline]{custom_domain.url}[/magenta underline]{suffix}"
+                if isinstance(gpu, list):
+                    function_data = api_pb2.FunctionData(
+                        module_name=function_definition.module_name,
+                        function_name=function_definition.function_name,
+                        function_type=function_definition.function_type,
+                        warm_pool_size=function_definition.warm_pool_size,
+                        concurrency_limit=function_definition.concurrency_limit,
+                        task_idle_timeout_secs=function_definition.task_idle_timeout_secs,
+                        worker_id=function_definition.worker_id,
+                        timeout_secs=function_definition.timeout_secs,
+                        web_url=function_definition.web_url,
+                        web_url_info=function_definition.web_url_info,
+                        webhook_config=function_definition.webhook_config,
+                        custom_domain_info=function_definition.custom_domain_info,
+                        schedule=schedule.proto_message if schedule is not None else None,
+                        is_class=function_definition.is_class,
+                        class_parameter_info=function_definition.class_parameter_info,
+                        is_method=function_definition.is_method,
+                        use_function_id=function_definition.use_function_id,
+                        use_method_name=function_definition.use_method_name,
+                        method_definitions=function_definition.method_definitions,
+                        method_definitions_set=function_definition.method_definitions_set,
+                        _experimental_group_size=function_definition._experimental_group_size,
+                        _experimental_buffer_containers=function_definition._experimental_buffer_containers,
+                        _experimental_custom_scaling=function_definition._experimental_custom_scaling,
+                        _experimental_proxy_ip=function_definition._experimental_proxy_ip,
+                        snapshot_debug=function_definition.snapshot_debug,
+                        runtime_perf_record=function_definition.runtime_perf_record,
                     )
-            else:
-                status_row.finish(f"Created {tag}.")
+                    ranked_functions = []
+                    for rank, _gpu in enumerate(gpu):
+                        function_definition_copy = api_pb2.Function()
+                        function_definition_copy.CopyFrom(function_definition)
+                        function_definition_copy.resources.CopyFrom(
+                            convert_fn_config_to_resources_config(
+                                cpu=cpu, memory=memory, gpu=_gpu, ephemeral_disk=ephemeral_disk
+                            ),
+                        )
+                        ranked_function = api_pb2.FunctionData.RankedFunction(
+                            rank=rank,
+                            function=function_definition_copy,
+                        )
+                        ranked_functions.append(ranked_function)
+                    function_data.ranked_functions.extend(ranked_functions)
+                    function_definition = None  # function_definition is not used in this case
+                else:
+                    # TODO(irfansharif): Assert on this specific type once we get rid of python 3.9.
+                    # assert isinstance(gpu, GPU_T)  # includes the case where gpu==None case
+                    function_definition.resources.CopyFrom(
+                        convert_fn_config_to_resources_config(
+                            cpu=cpu, memory=memory, gpu=gpu, ephemeral_disk=ephemeral_disk
+                        ),  # type: ignore
+                    )
+                assert resolver.app_id
+                assert (function_definition is None) != (function_data is None)  # xor
+                request = api_pb2.FunctionCreateRequest(
+                    app_id=resolver.app_id,
+                    function=function_definition,
+                    function_data=function_data,
+                    existing_function_id=existing_object_id or "",
+                    defer_updates=True,
+                )
+                try:
+                    response: api_pb2.FunctionCreateResponse = await retry_transient_errors(
+                        resolver.client.stub.FunctionCreate, request
+                    )
+                except GRPCError as exc:
+                    if exc.status == Status.INVALID_ARGUMENT:
+                        raise InvalidError(exc.message)
+                    if exc.status == Status.FAILED_PRECONDITION:
+                        raise InvalidError(exc.message)
+                    if exc.message and "Received :status = '413'" in exc.message:
+                        raise InvalidError(f"Function {info.function_name} is too large to deploy.")
+                    raise
+                function_creation_status.set_response(response)
+            serve_mounts = {m for m in all_mounts if m.is_local()}  # needed for modal.serve file watching
+            serve_mounts |= image._serve_mounts
+            obj._serve_mounts = frozenset(serve_mounts)
             self._hydrate(response.function_id, resolver.client, response.handle_metadata)
         rep = f"Function({tag})"
         obj = _Function._from_loader(_load, rep, preload=_preload, deps=_deps)
-        obj._raw_f = raw_f
+        obj._raw_f = info.raw_f
         obj._info = info
         obj._tag = tag
-        obj._all_mounts = all_mounts  # needed for modal.serve file watching
-        obj._stub = stub  # needed for CLI right now
+        obj._app = app  # needed for CLI right now
         obj._obj = None
         obj._is_generator = is_generator
-        obj._is_method = bool(info.cls)
-        obj._env = function_env  # needed for modal shell
+        obj._cluster_size = cluster_size
+        obj._is_method = False
+        obj._spec = function_spec  # needed for modal shell
-        # Used to check whether we should rebuild an image using run_function
-        # Plaintext source and arg definition for the function, so it's part of the image
-        # hash. We can't use the cloudpickle hash because it's not very stable.
+        # Used to check whether we should rebuild a modal.Image which uses `run_function`.
+        gpus: list[GPU_T] = gpu if isinstance(gpu, list) else [gpu]
         obj._build_args = dict(  # See get_build_def
             secrets=repr(secrets),
-            gpu_config=repr(gpu_config),
+            gpu_config=repr([parse_gpu_config(_gpu) for _gpu in gpus]),
             mounts=repr(mounts),
             network_file_systems=repr(network_file_systems),
         )
+        # these key are excluded if empty to avoid rebuilds on client upgrade
+        if volumes:
+            obj._build_args["volumes"] = repr(volumes)
+        if cloud or scheduler_placement:
+            obj._build_args["cloud"] = repr(cloud)
+            obj._build_args["scheduler_placement"] = repr(scheduler_placement)
         return obj
-    def from_parametrized(
+    def _bind_parameters(
         self,
-        obj,
-        from_other_workspace: bool,
+        obj: "modal.cls._Obj",
         options: Optional[api_pb2.FunctionOptions],
         args: Sized,
-        kwargs: Dict[str, Any],
+        kwargs: dict[str, Any],
     ) -> "_Function":
-        """mdmd:hidden"""
+        """mdmd:hidden
-        async def _load(self: _Function, resolver: Resolver, existing_object_id: Optional[str]):
-            if not self._parent.is_hydrated:
+        Binds a class-function to a specific instance of (init params, options) or a new workspace
+        """
+        # In some cases, reuse the base function, i.e. not create new clones of each method or the "service function"
+        can_use_parent = len(args) + len(kwargs) == 0 and options is None
+        parent = self
+        async def _load(param_bound_func: _Function, resolver: Resolver, existing_object_id: Optional[str]):
+            if parent is None:
+                raise ExecutionError("Can't find the parent class' service function")
+            try:
+                identity = f"{parent.info.function_name} class service function"
+            except Exception:
+                # Can't always look up the function name that way, so fall back to generic message
+                identity = "class service function for a parameterized class"
+            if not parent.is_hydrated:
+                if parent.app._running_app is None:
+                    reason = ", because the App it is defined on is not running"
+                else:
+                    reason = ""
                 raise ExecutionError(
-                    "Base function in class has not been hydrated. This might happen if an object is"
-                    " defined on a different stub, or if it's on the same stub but it didn't get"
-                    " created because it wasn't defined in global scope."
+                    f"The {identity} has not been hydrated with the metadata it needs to run on Modal{reason}."
                 )
-            assert self._parent._client.stub
-            serialized_params = serialize((args, kwargs))
+            assert parent._client.stub
+            if can_use_parent:
+                # We can end up here if parent wasn't hydrated when class was instantiated, but has been since.
+                param_bound_func._hydrate_from_other(parent)
+                return
+            if (
+                parent._class_parameter_info
+                and parent._class_parameter_info.format == api_pb2.ClassParameterInfo.PARAM_SERIALIZATION_FORMAT_PROTO
+            ):
+                if args:
+                    # TODO(elias) - We could potentially support positional args as well, if we want to?
+                    raise InvalidError(
+                        "Can't use positional arguments with modal.parameter-based synthetic constructors.\n"
+                        "Use (<parameter_name>=value) keyword arguments when constructing classes instead."
+                    )
+                serialized_params = serialize_proto_params(kwargs, parent._class_parameter_info.schema)
+            else:
+                serialized_params = serialize((args, kwargs))
             environment_name = _get_environment_name(None, resolver)
+            assert parent is not None
             req = api_pb2.FunctionBindParamsRequest(
-                function_id=self._parent._object_id,
+                function_id=parent._object_id,
                 serialized_params=serialized_params,
                 function_options=options,
                 environment_name=environment_name
                 or "",  # TODO: investigate shouldn't environment name always be specified here?
             )
-            response = await retry_transient_errors(self._parent._client.stub.FunctionBindParams, req)
-            self._hydrate(response.bound_function_id, self._parent._client, response.handle_metadata)
-        fun = _Function._from_loader(_load, "Function(parametrized)", hydrate_lazily=True)
-        if len(args) + len(kwargs) == 0 and not from_other_workspace and options is None and self.is_hydrated:
-            # Edge case that lets us hydrate all objects right away
-            fun._hydrate_from_other(self)
-        fun._is_remote_cls_method = True  # TODO(erikbern): deprecated
+            response = await retry_transient_errors(parent._client.stub.FunctionBindParams, req)
+            param_bound_func._hydrate(response.bound_function_id, parent._client, response.handle_metadata)
+        fun: _Function = _Function._from_loader(_load, "Function(parametrized)", hydrate_lazily=True)
+        if can_use_parent and parent.is_hydrated:
+            # skip the resolver altogether:
+            fun._hydrate_from_other(parent)
         fun._info = self._info
         fun._obj = obj
-        fun._is_generator = self._is_generator
-        fun._is_method = True
-        fun._parent = self
         return fun
     @live_method
     async def keep_warm(self, warm_pool_size: int) -> None:
-        """Set the warm pool size for the function (including parametrized functions).
+        """Set the warm pool size for the function.
-        Please exercise care when using this advanced feature! Setting and forgetting a warm pool on functions can lead to increased costs.
+        Please exercise care when using this advanced feature!
+        Setting and forgetting a warm pool on functions can lead to increased costs.
-        ```python
+        ```python notest
         # Usage on a regular function.
         f = modal.Function.lookup("my-app", "function")
         f.keep_warm(2)
         # Usage on a parametrized function.
         Model = modal.Cls.lookup("my-app", "Model")
-        Model("fine-tuned-model").inference.keep_warm(2)
+        Model("fine-tuned-model").keep_warm(2)
         ```
         """
+        if self._is_method:
+            raise InvalidError(
+                textwrap.dedent(
+                    """
+                The `.keep_warm()` method can not be used on Modal class *methods* deployed using Modal >v0.63.
+                Call `.keep_warm()` on the class *instance* instead.
+            """
+                )
+            )
         assert self._client and self._client.stub
         request = api_pb2.FunctionUpdateSchedulingParamsRequest(
             function_id=self._object_id, warm_pool_size_override=warm_pool_size
@@ -1041,17 +1039,22 @@ class _Function(_Object, type_prefix="fu"):
         await retry_transient_errors(self._client.stub.FunctionUpdateSchedulingParams, request)
     @classmethod
+    @renamed_parameter((2024, 12, 18), "tag", "name")
     def from_name(
-        cls: Type["_Function"],
+        cls: type["_Function"],
         app_name: str,
-        tag: Optional[str] = None,
+        name: str,
         namespace=api_pb2.DEPLOYMENT_NAMESPACE_WORKSPACE,
         environment_name: Optional[str] = None,
     ) -> "_Function":
-        """Retrieve a function with a given name and tag.
+        """Reference a Function from a deployed App by its name.
+        In contast to `modal.Function.lookup`, this is a lazy method
+        that defers hydrating the local object with metadata from
+        Modal servers until the first time it is actually used.
         ```python
-        other_function = modal.Function.from_name("other-app", "function")
+        f = modal.Function.from_name("other-app", "function")
         ```
         """
@@ -1059,7 +1062,7 @@ class _Function(_Object, type_prefix="fu"):
             assert resolver.client and resolver.client.stub
             request = api_pb2.FunctionGetRequest(
                 app_name=app_name,
-                object_tag=tag or "",
+                object_tag=name,
                 namespace=namespace,
                 environment_name=_get_environment_name(environment_name, resolver) or "",
             )
@@ -1071,26 +1074,32 @@ class _Function(_Object, type_prefix="fu"):
                 else:
                     raise
+            print_server_warnings(response.server_warnings)
             self._hydrate(response.function_id, resolver.client, response.handle_metadata)
         rep = f"Ref({app_name})"
-        return cls._from_loader(_load_remote, rep, is_another_app=True)
+        return cls._from_loader(_load_remote, rep, is_another_app=True, hydrate_lazily=True)
     @staticmethod
+    @renamed_parameter((2024, 12, 18), "tag", "name")
     async def lookup(
         app_name: str,
-        tag: Optional[str] = None,
+        name: str,
         namespace=api_pb2.DEPLOYMENT_NAMESPACE_WORKSPACE,
         client: Optional[_Client] = None,
         environment_name: Optional[str] = None,
     ) -> "_Function":
-        """Lookup a function with a given name and tag.
+        """Lookup a Function from a deployed App by its name.
-        ```python
-        other_function = modal.Function.lookup("other-app", "function")
+        In contrast to `modal.Function.from_name`, this is an eager method
+        that will hydrate the local object with metadata from Modal servers.
+        ```python notest
+        f = modal.Function.lookup("other-app", "function")
         ```
         """
-        obj = _Function.from_name(app_name, tag, namespace=namespace, environment_name=environment_name)
+        obj = _Function.from_name(app_name, name, namespace=namespace, environment_name=environment_name)
         if client is None:
             client = await _Client.from_env()
         resolver = Resolver(client=client)
@@ -1104,9 +1113,18 @@ class _Function(_Object, type_prefix="fu"):
         return self._tag
     @property
-    def stub(self) -> "modal.stub._Stub":
+    def app(self) -> "modal.app._App":
         """mdmd:hidden"""
-        return self._stub
+        if self._app is None:
+            raise ExecutionError("The app has not been assigned on the function at this point")
+        return self._app
+    @property
+    def stub(self) -> "modal.app._App":
+        """mdmd:hidden"""
+        # Deprecated soon, only for backwards compatibility
+        return self.app
     @property
     def info(self) -> FunctionInfo:
@@ -1115,12 +1133,15 @@ class _Function(_Object, type_prefix="fu"):
         return self._info
     @property
-    def env(self) -> FunctionEnv:
+    def spec(self) -> _FunctionSpec:
         """mdmd:hidden"""
-        return self._env
+        assert self._spec
+        return self._spec
     def get_build_def(self) -> str:
         """mdmd:hidden"""
+        # Plaintext source and arg definition for the function, so it's part of the image
+        # hash. We can't use the cloudpickle hash because it's not very stable.
         assert hasattr(self, "_raw_f") and hasattr(self, "_build_args")
         return f"{inspect.getsource(self._raw_f)}\n{repr(self._build_args)}"
@@ -1130,208 +1151,170 @@ class _Function(_Object, type_prefix="fu"):
         # Overridden concrete implementation of base class method
         self._progress = None
         self._is_generator = None
+        self._cluster_size = None
         self._web_url = None
-        self._output_mgr: Optional[OutputManager] = None
-        self._mute_cancellation = (
-            False  # set when a user terminates the app intentionally, to prevent useless traceback spam
-        )
         self._function_name = None
         self._info = None
+        self._serve_mounts = frozenset()
     def _hydrate_metadata(self, metadata: Optional[Message]):
         # Overridden concrete implementation of base class method
-        assert metadata and isinstance(metadata, (api_pb2.Function, api_pb2.FunctionHandleMetadata))
+        assert metadata and isinstance(metadata, api_pb2.FunctionHandleMetadata)
         self._is_generator = metadata.function_type == api_pb2.Function.FUNCTION_TYPE_GENERATOR
         self._web_url = metadata.web_url
         self._function_name = metadata.function_name
         self._is_method = metadata.is_method
+        self._use_method_name = metadata.use_method_name
+        self._class_parameter_info = metadata.class_parameter_info
+        self._method_handle_metadata = dict(metadata.method_handle_metadata)
+        self._definition_id = metadata.definition_id
     def _get_metadata(self):
         # Overridden concrete implementation of base class method
-        assert self._function_name
+        assert self._function_name, f"Function name must be set before metadata can be retrieved for {self}"
         return api_pb2.FunctionHandleMetadata(
             function_name=self._function_name,
-            function_type=(
-                api_pb2.Function.FUNCTION_TYPE_GENERATOR
-                if self._is_generator
-                else api_pb2.Function.FUNCTION_TYPE_FUNCTION
-            ),
+            function_type=get_function_type(self._is_generator),
             web_url=self._web_url or "",
+            use_method_name=self._use_method_name,
+            is_method=self._is_method,
+            class_parameter_info=self._class_parameter_info,
+            definition_id=self._definition_id,
+            method_handle_metadata=self._method_handle_metadata,
         )
-    def _set_mute_cancellation(self, value: bool = True):
-        self._mute_cancellation = value
-    def _set_output_mgr(self, output_mgr: OutputManager):
-        self._output_mgr = output_mgr
+    def _check_no_web_url(self, fn_name: str):
+        if self._web_url:
+            raise InvalidError(
+                f"A webhook function cannot be invoked for remote execution with `.{fn_name}`. "
+                f"Invoke this function via its web url '{self._web_url}' "
+                + f"or call it locally: {self._function_name}.local()"
+            )
+    # TODO (live_method on properties is not great, since it could be blocking the event loop from async contexts)
     @property
-    def web_url(self) -> str:
+    @live_method
+    async def web_url(self) -> str:
         """URL of a Function running as a web endpoint."""
         if not self._web_url:
             raise ValueError(
-                f"No web_url can be found for function {self._function_name}. web_url can only be referenced from a running app context"
+                f"No web_url can be found for function {self._function_name}. web_url "
+                "can only be referenced from a running app context"
             )
         return self._web_url
     @property
-    def is_generator(self) -> bool:
+    async def is_generator(self) -> bool:
         """mdmd:hidden"""
-        assert self._is_generator is not None
+        # hacky: kind of like @live_method, but not hydrating if we have the value already from local source
+        if self._is_generator is not None:
+            # this is set if the function or class is local
+            return self._is_generator
+        # not set - this is a from_name lookup - hydrate
+        await self.resolve()
+        assert self._is_generator is not None  # should be set now
         return self._is_generator
-    async def _map(self, input_stream: AsyncIterable[Any], order_outputs: bool, return_exceptions: bool, kwargs={}):
-        if self._web_url:
-            raise InvalidError(
-                "A web endpoint function cannot be directly invoked for parallel remote execution. "
-                f"Invoke this function via its web url '{self._web_url}' or call it locally: {self._function_name}()."
-            )
+    @property
+    def cluster_size(self) -> int:
+        """mdmd:hidden"""
+        return self._cluster_size or 1
+    @live_method_gen
+    async def _map(
+        self, input_queue: _SynchronizedQueue, order_outputs: bool, return_exceptions: bool
+    ) -> AsyncGenerator[Any, None]:
+        """mdmd:hidden
+        Synchronicity-wrapped map implementation. To be safe against invocations of user code in
+        the synchronicity thread it doesn't accept an [async]iterator, and instead takes a
+          _SynchronizedQueue instance that is fed by higher level functions like .map()
+        _SynchronizedQueue is used instead of asyncio.Queue so that the main thread can put
+        items in the queue safely.
+        """
+        self._check_no_web_url("map")
         if self._is_generator:
             raise InvalidError("A generator function cannot be called with `.map(...)`.")
         assert self._function_name
-        count_update_callback = (
-            self._output_mgr.function_progress_callback(self._function_name, total=None) if self._output_mgr else None
-        )
+        if output_mgr := _get_output_manager():
+            count_update_callback = output_mgr.function_progress_callback(self._function_name, total=None)
+        else:
+            count_update_callback = None
+        async with aclosing(
+            _map_invocation(
+                self,  # type: ignore
+                input_queue,
+                self._client,
+                order_outputs,
+                return_exceptions,
+                count_update_callback,
+            )
+        ) as stream:
+            async for item in stream:
+                yield item
-        async for item in _map_invocation(
-            self.object_id,
-            input_stream,
+    async def _call_function(self, args, kwargs) -> ReturnType:
+        if config.get("client_retries"):
+            function_call_invocation_type = api_pb2.FUNCTION_CALL_INVOCATION_TYPE_SYNC
+        else:
+            function_call_invocation_type = api_pb2.FUNCTION_CALL_INVOCATION_TYPE_SYNC_LEGACY
+        invocation = await _Invocation.create(
+            self,
+            args,
             kwargs,
-            self._client,
-            order_outputs,
-            return_exceptions,
-            count_update_callback,
-        ):
-            yield item
+            client=self._client,
+            function_call_invocation_type=function_call_invocation_type,
+        )
-    async def _call_function(self, args, kwargs):
-        invocation = await _Invocation.create(self.object_id, args, kwargs, self._client)
-        try:
-            return await invocation.run_function()
-        except asyncio.CancelledError:
-            # this can happen if the user terminates a program, triggering a cancellation cascade
-            if not self._mute_cancellation:
-                raise
+        return await invocation.run_function()
-    async def _call_function_nowait(self, args, kwargs) -> _Invocation:
-        return await _Invocation.create(self.object_id, args, kwargs, self._client)
+    async def _call_function_nowait(
+        self, args, kwargs, function_call_invocation_type: "api_pb2.FunctionCallInvocationType.ValueType"
+    ) -> _Invocation:
+        return await _Invocation.create(
+            self, args, kwargs, client=self._client, function_call_invocation_type=function_call_invocation_type
+        )
-    @warn_if_generator_is_not_consumed
+    @warn_if_generator_is_not_consumed()
     @live_method_gen
     @synchronizer.no_input_translation
     async def _call_generator(self, args, kwargs):
-        invocation = await _Invocation.create(self.object_id, args, kwargs, self._client)
+        invocation = await _Invocation.create(
+            self,
+            args,
+            kwargs,
+            client=self._client,
+            function_call_invocation_type=api_pb2.FUNCTION_CALL_INVOCATION_TYPE_SYNC_LEGACY,
+        )
         async for res in invocation.run_generator():
             yield res
     @synchronizer.no_io_translation
     async def _call_generator_nowait(self, args, kwargs):
-        return await _Invocation.create(self.object_id, args, kwargs, self._client)
-    @warn_if_generator_is_not_consumed
-    @live_method_gen
-    @synchronizer.no_input_translation
-    async def map(
-        self,
-        *input_iterators,  # one input iterator per argument in the mapped-over function/generator
-        kwargs={},  # any extra keyword arguments for the function
-        order_outputs: bool = True,  # return outputs in order
-        return_exceptions: bool = False,  # propogate exceptions (False) or aggregate them in the results list (True)
-    ) -> AsyncGenerator[Any, None]:
-        """Parallel map over a set of inputs.
-        Takes one iterator argument per argument in the function being mapped over.
-        Example:
-        ```python
-        @stub.function()
-        def my_func(a):
-            return a ** 2
-        @stub.local_entrypoint()
-        def main():
-            assert list(my_func.map([1, 2, 3, 4])) == [1, 4, 9, 16]
-        ```
-        If applied to a `stub.function`, `map()` returns one result per input and the output order
-        is guaranteed to be the same as the input order. Set `order_outputs=False` to return results
-        in the order that they are completed instead.
-        `return_exceptions` can be used to treat exceptions as successful results:
-        ```python
-        @stub.function()
-        def my_func(a):
-            if a == 2:
-                raise Exception("ohno")
-            return a ** 2
-        @stub.local_entrypoint()
-        def main():
-            # [0, 1, UserCodeException(Exception('ohno'))]
-            print(list(my_func.map(range(3), return_exceptions=True)))
-        ```
-        """
-        input_stream = stream.zip(*(stream.iterate(it) for it in input_iterators))
-        async for item in self._map(input_stream, order_outputs, return_exceptions, kwargs):
-            yield item
-    @synchronizer.no_input_translation
-    async def for_each(self, *input_iterators, kwargs={}, ignore_exceptions: bool = False):
-        """Execute function for all inputs, ignoring outputs.
-        Convenient alias for `.map()` in cases where the function just needs to be called.
-        as the caller doesn't have to consume the generator to process the inputs.
-        """
-        # TODO(erikbern): it would be better if this is more like a map_spawn that immediately exits
-        # rather than iterating over the result
-        async for _ in self.map(
-            *input_iterators, kwargs=kwargs, order_outputs=False, return_exceptions=ignore_exceptions
-        ):
-            pass
-    @warn_if_generator_is_not_consumed
-    @live_method_gen
-    @synchronizer.no_input_translation
-    async def starmap(
-        self, input_iterator, kwargs={}, order_outputs: bool = True, return_exceptions: bool = False
-    ) -> AsyncGenerator[Any, None]:
-        """Like `map`, but spreads arguments over multiple function arguments.
-        Assumes every input is a sequence (e.g. a tuple).
-        Example:
-        ```python
-        @stub.function()
-        def my_func(a, b):
-            return a + b
-        @stub.local_entrypoint()
-        def main():
-            assert list(my_func.starmap([(1, 2), (3, 4)])) == [3, 7]
-        ```
-        """
-        input_stream = stream.iterate(input_iterator)
-        async for item in self._map(input_stream, order_outputs, return_exceptions, kwargs):
-            yield item
+        deprecation_warning(
+            (2024, 12, 11),
+            "Calling spawn on a generator function is deprecated and will soon raise an exception.",
+        )
+        return await _Invocation.create(
+            self,
+            args,
+            kwargs,
+            client=self._client,
+            function_call_invocation_type=api_pb2.FUNCTION_CALL_INVOCATION_TYPE_ASYNC_LEGACY,
+        )
     @synchronizer.no_io_translation
     @live_method
-    async def remote(self, *args, **kwargs) -> Any:
+    async def remote(self, *args: P.args, **kwargs: P.kwargs) -> ReturnType:
         """
         Calls the function remotely, executing it with the given arguments and returning the execution's result.
         """
         # TODO: Generics/TypeVars
-        if self._web_url:
-            raise InvalidError(
-                "A web endpoint function cannot be invoked for remote execution with `.remote`. "
-                f"Invoke this function via its web url '{self._web_url}' or call it locally: {self._function_name}()."
-            )
+        self._check_no_web_url("remote")
         if self._is_generator:
             raise InvalidError(
                 "A generator function cannot be called with `.remote(...)`. Use `.remote_gen(...)` instead."
@@ -1346,11 +1329,7 @@ class _Function(_Object, type_prefix="fu"):
         Calls the generator remotely, executing it with the given arguments and returning the execution's result.
         """
         # TODO: Generics/TypeVars
-        if self._web_url:
-            raise InvalidError(
-                "A web endpoint function cannot be invoked for remote execution with `.remote`. "
-                f"Invoke this function via its web url '{self._web_url}' or call it locally: {self._function_name}()."
-            )
+        self._check_no_web_url("remote_gen")
         if not self._is_generator:
             raise InvalidError(
@@ -1359,22 +1338,15 @@ class _Function(_Object, type_prefix="fu"):
         async for item in self._call_generator(args, kwargs):  # type: ignore
             yield item
-    @synchronizer.no_io_translation
-    @live_method
-    async def shell(self, *args, **kwargs) -> None:
-        if self._is_generator:
-            async for item in self._call_generator(args, kwargs):
-                pass
-        else:
-            await self._call_function(args, kwargs)
+    def _is_local(self):
+        return self._info is not None
-    def _get_is_remote_cls_method(self):
-        return self._is_remote_cls_method
-    def _get_info(self):
+    def _get_info(self) -> FunctionInfo:
+        if not self._info:
+            raise ExecutionError("Can't get info for a function that isn't locally defined")
         return self._info
-    def _get_obj(self):
+    def _get_obj(self) -> Optional["modal.cls._Obj"]:
         if not self._is_method:
             return None
         elif not self._obj:
@@ -1383,83 +1355,129 @@ class _Function(_Object, type_prefix="fu"):
             return self._obj
     @synchronizer.nowrap
-    def local(self, *args, **kwargs) -> Any:
+    def local(self, *args: P.args, **kwargs: P.kwargs) -> OriginalReturnType:
         """
         Calls the function locally, executing it with the given arguments and returning the execution's result.
-        This method allows a caller to execute the standard Python function wrapped by Modal.
+        The function will execute in the same environment as the caller, just like calling the underlying function
+        directly in Python. In particular, only secrets available in the caller environment will be available
+        through environment variables.
         """
         # TODO(erikbern): it would be nice to remove the nowrap thing, but right now that would cause
         # "user code" to run on the synchronicity thread, which seems bad
-        info = self._get_info()
-        if not info:
+        if not self._is_local():
             msg = (
-                "The definition for this function is missing so it is not possible to invoke it locally. "
+                "The definition for this function is missing here so it is not possible to invoke it locally. "
                 "If this function was retrieved via `Function.lookup` you need to use `.remote()`."
             )
             raise ExecutionError(msg)
-        obj = self._get_obj()
+        info = self._get_info()
+        if not info.raw_f:
+            # Here if calling .local on a service function itself which should never happen
+            # TODO: check if we end up here in a container for a serialized function?
+            raise ExecutionError("Can't call .local on service function")
+        if is_local() and self.spec.volumes or self.spec.network_file_systems:
+            warnings.warn(
+                f"The {info.function_name} function is executing locally "
+                + "and will not have access to the mounted Volume or NetworkFileSystem data"
+            )
+        obj: Optional["modal.cls._Obj"] = self._get_obj()
         if not obj:
             fun = info.raw_f
             return fun(*args, **kwargs)
         else:
             # This is a method on a class, so bind the self to the function
-            local_obj = obj.get_local_obj()
-            fun = info.raw_f.__get__(local_obj)
+            user_cls_instance = obj._cached_user_cls_instance()
+            fun = info.raw_f.__get__(user_cls_instance)
+            # TODO: replace implicit local enter/exit with a context manager
             if is_async(info.raw_f):
                 # We want to run __aenter__ and fun in the same coroutine
                 async def coro():
-                    await obj.aenter()
+                    await obj._aenter()
                     return await fun(*args, **kwargs)
-                return coro()
+                return coro()  # type: ignore
             else:
-                obj.enter()
+                obj._enter()
                 return fun(*args, **kwargs)
     @synchronizer.no_input_translation
     @live_method
-    async def spawn(self, *args, **kwargs) -> Optional["_FunctionCall"]:
-        """Calls the function with the given arguments, without waiting for the results.
+    async def _experimental_spawn(self, *args: P.args, **kwargs: P.kwargs) -> "_FunctionCall[ReturnType]":
+        """[Experimental] Calls the function with the given arguments, without waiting for the results.
-        Returns a `modal.functions.FunctionCall` object, that can later be polled or waited for using `.get(timeout=...)`.
+        This experimental version of the spawn method allows up to 1 million inputs to be spawned.
+        Returns a `modal.functions.FunctionCall` object, that can later be polled or
+        waited for using `.get(timeout=...)`.
         Conceptually similar to `multiprocessing.pool.apply_async`, or a Future/Promise in other contexts.
+        """
+        self._check_no_web_url("_experimental_spawn")
+        if self._is_generator:
+            invocation = await self._call_generator_nowait(args, kwargs)
+        else:
+            invocation = await self._call_function_nowait(
+                args, kwargs, function_call_invocation_type=api_pb2.FUNCTION_CALL_INVOCATION_TYPE_ASYNC
+            )
-        *Note:* `.spawn()` on a modal generator function does call and execute the generator, but does not currently
-        return a function handle for polling the result.
+        fc = _FunctionCall._new_hydrated(invocation.function_call_id, invocation.client, None)
+        fc._is_generator = self._is_generator if self._is_generator else False
+        return fc
+    @synchronizer.no_input_translation
+    @live_method
+    async def spawn(self, *args: P.args, **kwargs: P.kwargs) -> "_FunctionCall[ReturnType]":
+        """Calls the function with the given arguments, without waiting for the results.
+        Returns a `modal.functions.FunctionCall` object, that can later be polled or
+        waited for using `.get(timeout=...)`.
+        Conceptually similar to `multiprocessing.pool.apply_async`, or a Future/Promise in other contexts.
         """
+        self._check_no_web_url("spawn")
         if self._is_generator:
-            await self._call_generator_nowait(args, kwargs)
-            return None
+            invocation = await self._call_generator_nowait(args, kwargs)
+        else:
+            invocation = await self._call_function_nowait(
+                args, kwargs, api_pb2.FUNCTION_CALL_INVOCATION_TYPE_ASYNC_LEGACY
+            )
-        invocation = await self._call_function_nowait(args, kwargs)
-        return _FunctionCall._new_hydrated(invocation.function_call_id, invocation.client, None)
+        fc = _FunctionCall._new_hydrated(invocation.function_call_id, invocation.client, None)
+        fc._is_generator = self._is_generator if self._is_generator else False
+        return fc
     def get_raw_f(self) -> Callable[..., Any]:
         """Return the inner Python object wrapped by this Modal Function."""
-        if not self._info:
-            raise AttributeError("_info has not been set on this FunctionHandle and not available in this context")
-        return self._info.raw_f
+        return self._raw_f
     @live_method
     async def get_current_stats(self) -> FunctionStats:
         """Return a `FunctionStats` object describing the current function's queue and runner counts."""
         assert self._client.stub
-        resp = await self._client.stub.FunctionGetCurrentStats(
-            api_pb2.FunctionGetCurrentStatsRequest(function_id=self.object_id)
-        )
-        return FunctionStats(
-            backlog=resp.backlog, num_active_runners=resp.num_active_tasks, num_total_runners=resp.num_total_tasks
+        resp = await retry_transient_errors(
+            self._client.stub.FunctionGetCurrentStats,
+            api_pb2.FunctionGetCurrentStatsRequest(function_id=self.object_id),
+            total_timeout=10.0,
         )
+        return FunctionStats(backlog=resp.backlog, num_total_runners=resp.num_total_tasks)
+    # A bit hacky - but the map-style functions need to not be synchronicity-wrapped
+    # in order to not execute their input iterators on the synchronicity event loop.
+    # We still need to wrap them using MethodWithAio to maintain a synchronicity-like
+    # api with `.aio` and get working type-stubs and reference docs generation:
+    map = MethodWithAio(_map_sync, _map_async, synchronizer)
+    starmap = MethodWithAio(_starmap_sync, _starmap_async, synchronizer)
+    for_each = MethodWithAio(_for_each_sync, _for_each_async, synchronizer)
 Function = synchronize_api(_Function)
-class _FunctionCall(_Object, type_prefix="fc"):
+class _FunctionCall(typing.Generic[ReturnType], _Object, type_prefix="fc"):
     """A reference to an executed function call.
     Constructed using `.spawn(...)` on a Modal function with the same
@@ -1470,11 +1488,13 @@ class _FunctionCall(_Object, type_prefix="fc"):
     Conceptually similar to a Future/Promise/AsyncResult in other contexts and languages.
     """
+    _is_generator: bool = False
     def _invocation(self):
         assert self._client.stub
         return _Invocation(self._client.stub, self.object_id, self._client)
-    async def get(self, timeout: Optional[float] = None):
+    async def get(self, timeout: Optional[float] = None) -> ReturnType:
         """Get the result of the function call.
         This function waits indefinitely by default. It takes an optional
@@ -1483,9 +1503,23 @@ class _FunctionCall(_Object, type_prefix="fc"):
         The returned coroutine is not cancellation-safe.
         """
+        if self._is_generator:
+            raise Exception("Cannot get the result of a generator function call. Use `get_gen` instead.")
         return await self._invocation().poll_function(timeout=timeout)
-    async def get_call_graph(self) -> List[InputInfo]:
+    async def get_gen(self) -> AsyncGenerator[Any, None]:
+        """
+        Calls the generator remotely, executing it with the given arguments and returning the execution's result.
+        """
+        if not self._is_generator:
+            raise Exception("Cannot iterate over a non-generator function call. Use `get` instead.")
+        async for res in self._invocation().run_generator():
+            yield res
+    async def get_call_graph(self) -> list[InputInfo]:
         """Returns a structure representing the call graph from a given root
         call ID, along with the status of execution for each node.
@@ -1497,24 +1531,38 @@ class _FunctionCall(_Object, type_prefix="fc"):
         response = await retry_transient_errors(self._client.stub.FunctionGetCallGraph, request)
         return _reconstruct_call_graph(response)
-    async def cancel(self):
-        """Cancels the function call, which will stop its execution and mark its inputs as [`TERMINATED`](/docs/reference/modal.call_graph#modalcall_graphinputstatus)."""
-        request = api_pb2.FunctionCallCancelRequest(function_call_id=self.object_id)
+    async def cancel(
+        self,
+        terminate_containers: bool = False,  # if true, containers running the inputs are forcibly terminated
+    ):
+        """Cancels the function call, which will stop its execution and mark its inputs as
+        [`TERMINATED`](/docs/reference/modal.call_graph#modalcall_graphinputstatus).
+        If `terminate_containers=True` - the containers running the cancelled inputs are all terminated
+        causing any non-cancelled inputs on those containers to be rescheduled in new containers.
+        """
+        request = api_pb2.FunctionCallCancelRequest(
+            function_call_id=self.object_id, terminate_containers=terminate_containers
+        )
         assert self._client and self._client.stub
         await retry_transient_errors(self._client.stub.FunctionCallCancel, request)
     @staticmethod
-    async def from_id(function_call_id: str, client: Optional[_Client] = None) -> "_FunctionCall":
+    async def from_id(
+        function_call_id: str, client: Optional[_Client] = None, is_generator: bool = False
+    ) -> "_FunctionCall":
         if client is None:
             client = await _Client.from_env()
-        return _FunctionCall._new_hydrated(function_call_id, client, None)
+        fc = _FunctionCall._new_hydrated(function_call_id, client, None)
+        fc._is_generator = is_generator
+        return fc
 FunctionCall = synchronize_api(_FunctionCall)
-async def _gather(*function_calls: _FunctionCall):
+async def _gather(*function_calls: _FunctionCall[ReturnType]) -> typing.Sequence[ReturnType]:
     """Wait until all Modal function calls have results before returning
     Accepts a variable number of FunctionCall objects as returned by `Function.spawn()`.
@@ -1532,63 +1580,10 @@ async def _gather(*function_calls: _FunctionCall):
     ```
     """
     try:
-        return await asyncio.gather(*[fc.get() for fc in function_calls])
+        return await TaskContext.gather(*[fc.get() for fc in function_calls])
     except Exception as exc:
         # TODO: kill all running function calls
         raise exc
 gather = synchronize_api(_gather)
-_current_input_id: ContextVar = ContextVar("_current_input_id")
-_current_function_call_id: ContextVar = ContextVar("_current_function_call_id")
-def current_input_id() -> Optional[str]:
-    """Returns the input ID for the current input.
-    Can only be called from Modal function (i.e. in a container context).
-    ```python
-    from modal import current_input_id
-    @stub.function()
-    def process_stuff():
-        print(f"Starting to process {current_input_id()}")
-    ```
-    """
-    try:
-        return _current_input_id.get()
-    except LookupError:
-        return None
-def current_function_call_id() -> Optional[str]:
-    """Returns the function call ID for the current input.
-    Can only be called from Modal function (i.e. in a container context).
-    ```python
-    from modal import current_function_call_id
-    @stub.function()
-    def process_stuff():
-        print(f"Starting to process input from {current_function_call_id()}")
-    ```
-    """
-    try:
-        return _current_function_call_id.get()
-    except LookupError:
-        return None
-def _set_current_context_ids(input_id: str, function_call_id: str) -> Callable[[], None]:
-    input_token = _current_input_id.set(input_id)
-    function_call_token = _current_function_call_id.set(function_call_id)
-    def _reset_current_context_ids():
-        _current_input_id.reset(input_token)
-        _current_function_call_id.reset(function_call_token)
-    return _reset_current_context_ids

modal 0.62.16__py3-none-any.whl → 0.72.11__py3-none-any.whl

modal 0.62.16py3-none-any.whl → 0.72.11py3-none-any.whl