PyPI - modal - Versions diffs - 1.0.6.dev61__py3-none-any.whl → 1.1.1__py3-none-any.whl - Mend

modal 1.0.6.dev61py3-none-any.whl → 1.1.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of modal might be problematic. Click here for more details.

Files changed (75) hide show

modal/__main__.py +2 -2
modal/_clustered_functions.py +3 -0
modal/_clustered_functions.pyi +3 -2
modal/_functions.py +78 -26
modal/_object.py +9 -1
modal/_output.py +14 -25
modal/_runtime/gpu_memory_snapshot.py +158 -54
modal/_utils/async_utils.py +6 -4
modal/_utils/auth_token_manager.py +1 -1
modal/_utils/blob_utils.py +16 -21
modal/_utils/function_utils.py +16 -4
modal/_utils/time_utils.py +8 -4
modal/app.py +0 -4
modal/app.pyi +0 -4
modal/cli/_traceback.py +3 -2
modal/cli/app.py +4 -4
modal/cli/cluster.py +4 -4
modal/cli/config.py +2 -2
modal/cli/container.py +2 -2
modal/cli/dict.py +4 -4
modal/cli/entry_point.py +2 -2
modal/cli/import_refs.py +3 -3
modal/cli/network_file_system.py +8 -9
modal/cli/profile.py +2 -2
modal/cli/queues.py +5 -5
modal/cli/secret.py +5 -5
modal/cli/utils.py +3 -4
modal/cli/volume.py +8 -9
modal/client.py +8 -1
modal/client.pyi +9 -10
modal/container_process.py +2 -2
modal/dict.py +47 -3
modal/dict.pyi +55 -0
modal/exception.py +4 -0
modal/experimental/__init__.py +1 -1
modal/experimental/flash.py +18 -2
modal/experimental/flash.pyi +19 -0
modal/functions.pyi +6 -7
modal/image.py +26 -10
modal/image.pyi +12 -4
modal/mount.py +1 -1
modal/object.pyi +4 -0
modal/parallel_map.py +432 -4
modal/parallel_map.pyi +28 -0
modal/queue.py +46 -3
modal/queue.pyi +53 -0
modal/sandbox.py +105 -25
modal/sandbox.pyi +108 -18
modal/secret.py +48 -5
modal/secret.pyi +55 -0
modal/token_flow.py +3 -3
modal/volume.py +49 -18
modal/volume.pyi +50 -8
{modal-1.0.6.dev61.dist-info → modal-1.1.1.dist-info}/METADATA +2 -2
{modal-1.0.6.dev61.dist-info → modal-1.1.1.dist-info}/RECORD +75 -75
modal_proto/api.proto +140 -14
modal_proto/api_grpc.py +80 -0
modal_proto/api_pb2.py +927 -756
modal_proto/api_pb2.pyi +488 -34
modal_proto/api_pb2_grpc.py +166 -0
modal_proto/api_pb2_grpc.pyi +52 -0
modal_proto/modal_api_grpc.py +5 -0
modal_version/__init__.py +1 -1
/modal/{requirements → builder}/2023.12.312.txt +0 -0
/modal/{requirements → builder}/2023.12.txt +0 -0
/modal/{requirements → builder}/2024.04.txt +0 -0
/modal/{requirements → builder}/2024.10.txt +0 -0
/modal/{requirements → builder}/2025.06.txt +0 -0
/modal/{requirements → builder}/PREVIEW.txt +0 -0
/modal/{requirements → builder}/README.md +0 -0
/modal/{requirements → builder}/base-images.json +0 -0
{modal-1.0.6.dev61.dist-info → modal-1.1.1.dist-info}/WHEEL +0 -0
{modal-1.0.6.dev61.dist-info → modal-1.1.1.dist-info}/entry_points.txt +0 -0
{modal-1.0.6.dev61.dist-info → modal-1.1.1.dist-info}/licenses/LICENSE +0 -0
{modal-1.0.6.dev61.dist-info → modal-1.1.1.dist-info}/top_level.txt +0 -0

modal/_runtime/gpu_memory_snapshot.py CHANGED Viewed

@@ -1,17 +1,18 @@
 # Copyright Modal Labs 2022
 #
 # This module provides a simple interface for creating GPU memory snapshots,
-# provising a convenient interface to `cuda-checkpoint` [1]. This is intended
+# providing a convenient interface to `cuda-checkpoint` [1]. This is intended
 # to be used in conjunction with memory snapshots.
 #
 # [1] https://github.com/NVIDIA/cuda-checkpoint
 import subprocess
 import time
-from concurrent.futures import ThreadPoolExecutor
+from concurrent.futures import ThreadPoolExecutor, as_completed
 from dataclasses import dataclass
 from enum import Enum
 from pathlib import Path
+from typing import List, Optional
 from modal.config import config, logger
@@ -19,7 +20,9 @@ CUDA_CHECKPOINT_PATH: str = config.get("cuda_checkpoint_path")
 class CudaCheckpointState(Enum):
-    """State representation from the CUDA API: https://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__TYPES.html#group__CUDA__TYPES_1gc96cdda177a2b8c296144567cbea4f23"""
+    """State representation from the CUDA API [1].
+    [1] https://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__TYPES.html"""
     RUNNING = "running"
     LOCKED = "locked"
@@ -28,6 +31,8 @@ class CudaCheckpointState(Enum):
 class CudaCheckpointException(Exception):
+    """Exception raised for CUDA checkpoint operations."""
     pass
@@ -39,16 +44,31 @@ class CudaCheckpointProcess:
     pid: int
     state: CudaCheckpointState
-    def toggle(self, target_state: CudaCheckpointState, timeout_secs: float = 5 * 60.0):
+    def toggle(self, target_state: CudaCheckpointState, timeout_secs: float = 5 * 60.0) -> None:
         """Toggle CUDA checkpoint state for current process, moving GPU memory to the
-        CPU and back depending on the current process state when called."""
+        CPU and back depending on the current process state when called.
+        """
         logger.debug(f"PID: {self.pid} Toggling CUDA checkpoint state to {target_state.value}")
         start_time = time.monotonic()
+        retry_count = 0
+        max_retries = 3
         while self._should_continue_toggle(target_state, start_time, timeout_secs):
-            self._execute_toggle_command()
-            time.sleep(0.1)
+            try:
+                self._execute_toggle_command()
+                # Use exponential backoff for retries
+                sleep_time = min(0.1 * (2**retry_count), 1.0)
+                time.sleep(sleep_time)
+                retry_count = 0
+            except CudaCheckpointException as e:
+                retry_count += 1
+                if retry_count >= max_retries:
+                    raise CudaCheckpointException(
+                        f"PID: {self.pid} Failed to toggle state after {max_retries} retries: {e}"
+                    )
+                logger.debug(f"PID: {self.pid} Retry {retry_count}/{max_retries} after error: {e}")
+                time.sleep(0.5 * retry_count)
         logger.debug(f"PID: {self.pid} Target state {target_state.value} reached")
@@ -73,19 +93,25 @@ class CudaCheckpointProcess:
         return True
-    def _execute_toggle_command(self):
+    def _execute_toggle_command(self) -> None:
         """Execute the cuda-checkpoint toggle command."""
         try:
-            subprocess.run(
+            _ = subprocess.run(
                 [CUDA_CHECKPOINT_PATH, "--toggle", "--pid", str(self.pid)],
                 check=True,
                 capture_output=True,
                 text=True,
+                timeout=30,
             )
             logger.debug(f"PID: {self.pid} Successfully toggled CUDA checkpoint state")
         except subprocess.CalledProcessError as e:
-            logger.debug(f"PID: {self.pid} Failed to toggle CUDA checkpoint state: {e.stderr}")
-            raise CudaCheckpointException(e.stderr)
+            error_msg = f"PID: {self.pid} Failed to toggle CUDA checkpoint state: {e.stderr}"
+            logger.debug(error_msg)
+            raise CudaCheckpointException(error_msg)
+        except subprocess.TimeoutExpired:
+            error_msg = f"PID: {self.pid} Toggle command timed out"
+            logger.debug(error_msg)
+            raise CudaCheckpointException(error_msg)
     def refresh_state(self) -> None:
         """Refreshes the current CUDA checkpoint state for this process."""
@@ -95,15 +121,20 @@ class CudaCheckpointProcess:
                 check=True,
                 capture_output=True,
                 text=True,
-                timeout=5,
+                timeout=10,
             )
             state_str = result.stdout.strip().lower()
             self.state = CudaCheckpointState(state_str)
         except subprocess.CalledProcessError as e:
-            logger.debug(f"PID: {self.pid} Failed to get CUDA checkpoint state: {e.stderr}")
-            raise CudaCheckpointException(e.stderr)
+            error_msg = f"PID: {self.pid} Failed to get CUDA checkpoint state: {e.stderr}"
+            logger.debug(error_msg)
+            raise CudaCheckpointException(error_msg)
+        except subprocess.TimeoutExpired:
+            error_msg = f"PID: {self.pid} Get state command timed out"
+            logger.debug(error_msg)
+            raise CudaCheckpointException(error_msg)
 class CudaCheckpointSession:
@@ -111,12 +142,17 @@ class CudaCheckpointSession:
     def __init__(self):
         self.cuda_processes = self._get_cuda_pids()
-        logger.debug(f"PIDs with CUDA sessions: {[c.pid for c in self.cuda_processes]}")
+        if self.cuda_processes:
+            logger.debug(
+                f"Found {len(self.cuda_processes)} PID(s) with CUDA sessions: {[c.pid for c in self.cuda_processes]}"
+            )
+        else:
+            logger.debug("No CUDA sessions found.")
-    def _get_cuda_pids(self) -> list[CudaCheckpointProcess]:
+    def _get_cuda_pids(self) -> List[CudaCheckpointProcess]:
         """Iterates over all PIDs and identifies the ones that have running
         CUDA sessions."""
-        cuda_pids: list[CudaCheckpointProcess] = []
+        cuda_pids: List[CudaCheckpointProcess] = []
         # Get all active process IDs from /proc directory
         proc_dir = Path("/proc")
@@ -125,75 +161,143 @@ class CudaCheckpointSession:
                 "OS does not have /proc path rendering it incompatible with GPU memory snapshots."
             )
-        for entry in proc_dir.iterdir():
-            if not entry.name.isdigit():
-                continue
-            pid = int(entry.name)
-            try:
-                # Call cuda-checkpoint to check if this PID has a CUDA session
-                result = subprocess.run(
-                    [CUDA_CHECKPOINT_PATH, "--get-state", "--pid", str(pid)],
-                    capture_output=True,
-                    text=True,
-                    timeout=10,
-                )
-                # If the command succeeds (return code 0), this PID has a CUDA session
-                if result.returncode == 0:
-                    state_str = result.stdout.strip().lower()
-                    state = CudaCheckpointState(state_str)
-                    cuda_checkpoint_process = CudaCheckpointProcess(pid=pid, state=state)
-                    cuda_pids.append(cuda_checkpoint_process)
+        # Get all numeric directories (PIDs) from /proc
+        pid_dirs = [entry for entry in proc_dir.iterdir() if entry.name.isdigit()]
-            # Command failed, which is expected for PIDs without CUDA sessions
-            except subprocess.CalledProcessError:
-                continue
+        # Use ThreadPoolExecutor to check PIDs in parallel for better performance
+        with ThreadPoolExecutor(max_workers=min(50, len(pid_dirs))) as executor:
+            future_to_pid = {
+                executor.submit(self._check_cuda_session, int(entry.name)): int(entry.name) for entry in pid_dirs
+            }
-            # Raise other exceptions
-            except subprocess.TimeoutExpired:
-                raise CudaCheckpointException(f"Failed to get CUDA state for PID {pid}")
-            except Exception as e:
-                raise CudaCheckpointException(e)
+            for future in as_completed(future_to_pid):
+                pid = future_to_pid[future]
+                try:
+                    cuda_process = future.result()
+                    if cuda_process:
+                        cuda_pids.append(cuda_process)
+                except Exception as e:
+                    logger.debug(f"Error checking PID {pid}: {e}")
         # Sort PIDs for ordered checkpointing
         cuda_pids.sort(key=lambda x: x.pid)
         return cuda_pids
+    def _check_cuda_session(self, pid: int) -> Optional[CudaCheckpointProcess]:
+        """Check if a specific PID has a CUDA session."""
+        try:
+            result = subprocess.run(
+                [CUDA_CHECKPOINT_PATH, "--get-state", "--pid", str(pid)],
+                capture_output=True,
+                text=True,
+                timeout=5,
+            )
+            # If the command succeeds (return code 0), this PID has a CUDA session
+            if result.returncode == 0:
+                state_str = result.stdout.strip().lower()
+                state = CudaCheckpointState(state_str)
+                return CudaCheckpointProcess(pid=pid, state=state)
+        except subprocess.CalledProcessError:
+            # Command failed, which is expected for PIDs without CUDA sessions
+            pass
+        except subprocess.TimeoutExpired:
+            logger.debug(f"Timeout checking CUDA state for PID {pid}")
+        except Exception as e:
+            logger.debug(f"Error checking PID {pid}: {e}")
+        return None
     def checkpoint(self) -> None:
+        """Checkpoint all CUDA processes, moving GPU memory to CPU."""
+        if not self.cuda_processes:
+            logger.debug("No CUDA processes to checkpoint.")
+            return
         # Validate all states first
         for proc in self.cuda_processes:
+            proc.refresh_state()  # Refresh state before validation
             if proc.state != CudaCheckpointState.RUNNING:
-                raise CudaCheckpointException(f"CUDA session not in {CudaCheckpointState.RUNNING} state.")
+                raise CudaCheckpointException(
+                    f"PID {proc.pid}: CUDA session not in {CudaCheckpointState.RUNNING.value} state. "
+                    f"Current state: {proc.state.value}"
+                )
         # Moving state from GPU to CPU can take several seconds per CUDA session.
         # Make a parallel call per CUDA session.
         start = time.perf_counter()
-        def checkpoint_impl(proc: CudaCheckpointProcess):
+        def checkpoint_impl(proc: CudaCheckpointProcess) -> None:
             proc.toggle(CudaCheckpointState.CHECKPOINTED)
         with ThreadPoolExecutor() as executor:
-            list(executor.map(checkpoint_impl, self.cuda_processes))
+            futures = [executor.submit(checkpoint_impl, proc) for proc in self.cuda_processes]
+            # Wait for all futures and collect any exceptions
+            exceptions = []
+            for future in as_completed(futures):
+                try:
+                    future.result()
+                except Exception as e:
+                    exceptions.append(e)
+            if exceptions:
+                raise CudaCheckpointException(
+                    f"Failed to checkpoint {len(exceptions)} processes: {'; '.join(str(e) for e in exceptions)}"
+                )
         elapsed = time.perf_counter() - start
-        logger.debug(f"Checkpointing CUDA sessions took => {elapsed:.3f}s")
+        logger.debug(f"Checkpointing {len(self.cuda_processes)} CUDA sessions took => {elapsed:.3f}s")
     def restore(self) -> None:
+        """Restore all CUDA processes, moving memory back from CPU to GPU."""
+        if not self.cuda_processes:
+            logger.debug("No CUDA sessions to restore.")
+            return
         # Validate all states first
         for proc in self.cuda_processes:
+            proc.refresh_state()  # Refresh state before validation
             if proc.state != CudaCheckpointState.CHECKPOINTED:
-                raise CudaCheckpointException(f"CUDA session not in {CudaCheckpointState.CHECKPOINTED} state.")
+                raise CudaCheckpointException(
+                    f"PID {proc.pid}: CUDA session not in {CudaCheckpointState.CHECKPOINTED.value} state. "
+                    f"Current state: {proc.state.value}"
+                )
         # See checkpoint() for rationale about parallelism.
         start = time.perf_counter()
-        def restore_process(proc: CudaCheckpointProcess):
+        def restore_process(proc: CudaCheckpointProcess) -> None:
             proc.toggle(CudaCheckpointState.RUNNING)
         with ThreadPoolExecutor() as executor:
-            list(executor.map(restore_process, self.cuda_processes))
+            futures = [executor.submit(restore_process, proc) for proc in self.cuda_processes]
+            # Wait for all futures and collect any exceptions
+            exceptions = []
+            for future in as_completed(futures):
+                try:
+                    future.result()
+                except Exception as e:
+                    exceptions.append(e)
+            if exceptions:
+                raise CudaCheckpointException(
+                    f"Failed to restore {len(exceptions)} processes: {'; '.join(str(e) for e in exceptions)}"
+                )
         elapsed = time.perf_counter() - start
-        logger.debug(f"Restoring CUDA sessions took => {elapsed:.3f}s")
+        logger.debug(f"Restoring {len(self.cuda_processes)} CUDA session(s) took => {elapsed:.3f}s")
+    def get_process_count(self) -> int:
+        """Get the number of CUDA processes managed by this session."""
+        return len(self.cuda_processes)
+    def get_process_states(self) -> List[tuple[int, CudaCheckpointState]]:
+        """Get current states of all managed processes."""
+        states = []
+        for proc in self.cuda_processes:
+            proc.refresh_state()
+            states.append((proc.pid, proc.state))
+        return states

modal/_utils/async_utils.py CHANGED Viewed

@@ -279,7 +279,9 @@ class TimestampPriorityQueue(Generic[T]):
     def __init__(self, maxsize: int = 0):
         self.condition = asyncio.Condition()
-        self._queue: asyncio.PriorityQueue[tuple[float, Union[T, None]]] = asyncio.PriorityQueue(maxsize=maxsize)
+        self._queue: asyncio.PriorityQueue[tuple[float, int, Union[T, None]]] = asyncio.PriorityQueue(maxsize=maxsize)
+        # Used to tiebreak items with the same timestamp that are not comparable. (eg. protos)
+        self._counter = itertools.count()
     async def close(self):
         await self.put(self._MAX_PRIORITY, None)
@@ -288,7 +290,7 @@ class TimestampPriorityQueue(Generic[T]):
         """
         Add an item to the queue to be processed at a specific timestamp.
         """
-        await self._queue.put((timestamp, item))
+        await self._queue.put((timestamp, next(self._counter), item))
         async with self.condition:
             self.condition.notify_all()  # notify any waiting coroutines
@@ -301,7 +303,7 @@ class TimestampPriorityQueue(Generic[T]):
                 while self.empty():
                     await self.condition.wait()
                 # peek at the next item
-                timestamp, item = await self._queue.get()
+                timestamp, counter, item = await self._queue.get()
                 now = time.time()
                 if timestamp < now:
                     return item
@@ -309,7 +311,7 @@ class TimestampPriorityQueue(Generic[T]):
                     return None
                 # not ready yet, calculate sleep time
                 sleep_time = timestamp - now
-                self._queue.put_nowait((timestamp, item))  # put it back
+                self._queue.put_nowait((timestamp, counter, item))  # put it back
                 # wait until either the timeout or a new item is added
                 try:
                     await asyncio.wait_for(self.condition.wait(), timeout=sleep_time)

modal/_utils/auth_token_manager.py CHANGED Viewed

@@ -27,7 +27,7 @@ class _AuthTokenManager:
         self._expiry = 0.0
         self._lock: typing.Union[asyncio.Lock, None] = None
-    async def get_token(self):
+    async def get_token(self) -> str:
         """
         When called, the AuthTokenManager can be in one of three states:
         1. Has a valid cached token. It is returned to the caller.

modal/_utils/blob_utils.py CHANGED Viewed

@@ -188,16 +188,10 @@ def get_content_length(data: BinaryIO) -> int:
     return content_length - pos
-async def _measure_endpoint_latency(item: str) -> int:
-    latency_ms = 0
-    t0 = time.monotonic_ns()
-    async with ClientSessionRegistry.get_session().head(item) as _:
-        latency_ms = (time.monotonic_ns() - t0) // 1_000_000
-    return latency_ms
-async def _blob_upload_with_fallback(items, blob_ids: list[str], callback) -> tuple[str, bool, int]:
-    r2_latency_ms = 0
+async def _blob_upload_with_fallback(
+    items, blob_ids: list[str], callback, content_length: int
+) -> tuple[str, bool, int]:
+    r2_throughput_bytes_s = 0
     r2_failed = False
     for idx, (item, blob_id) in enumerate(zip(items, blob_ids)):
         # We want to default to R2 95% of the time and S3 5% of the time.
@@ -206,14 +200,13 @@ async def _blob_upload_with_fallback(items, blob_ids: list[str], callback) -> tu
             continue
         try:
             if blob_id.endswith(":r2"):
-                # measure the time it takes to contact the bucket endpoint
-                r2_latency_ms, _ = await asyncio.gather(
-                    _measure_endpoint_latency(item),
-                    callback(item),
-                )
+                t0 = time.monotonic_ns()
+                await callback(item)
+                dt_ns = time.monotonic_ns() - t0
+                r2_throughput_bytes_s = (content_length * 1_000_000_000) // max(dt_ns, 1)
             else:
                 await callback(item)
-            return blob_id, r2_failed, r2_latency_ms
+            return blob_id, r2_failed, r2_throughput_bytes_s
         except Exception as _:
             if blob_id.endswith(":r2"):
                 r2_failed = True
@@ -251,10 +244,11 @@ async def _blob_upload(
                 progress_report_cb=progress_report_cb,
             )
-        blob_id, r2_failed, r2_latency_ms = await _blob_upload_with_fallback(
+        blob_id, r2_failed, r2_throughput_bytes_s = await _blob_upload_with_fallback(
             resp.multiparts.items,
             resp.blob_ids,
             upload_multipart_upload,
+            content_length=content_length,
         )
     else:
         from .bytes_io_segment_payload import BytesIOSegmentPayload
@@ -271,16 +265,17 @@ async def _blob_upload(
                 content_md5_b64=upload_hashes.md5_base64,
             )
-        blob_id, r2_failed, r2_latency_ms = await _blob_upload_with_fallback(
+        blob_id, r2_failed, r2_throughput_bytes_s = await _blob_upload_with_fallback(
             resp.upload_urls.items,
             resp.blob_ids,
             upload_to_s3_url,
+            content_length=content_length,
         )
     if progress_report_cb:
         progress_report_cb(complete=True)
-    return blob_id, r2_failed, r2_latency_ms
+    return blob_id, r2_failed, r2_throughput_bytes_s
 async def blob_upload_with_r2_failure_info(payload: bytes, stub: ModalClientModal) -> tuple[str, bool, int]:
@@ -291,13 +286,13 @@ async def blob_upload_with_r2_failure_info(payload: bytes, stub: ModalClientModa
         logger.warning("Blob uploading string, not bytes - auto-encoding as utf8")
         payload = payload.encode("utf8")
     upload_hashes = get_upload_hashes(payload)
-    blob_id, r2_failed, r2_latency_ms = await _blob_upload(upload_hashes, payload, stub)
+    blob_id, r2_failed, r2_throughput_bytes_s = await _blob_upload(upload_hashes, payload, stub)
     dur_s = max(time.time() - t0, 0.001)  # avoid division by zero
     throughput_mib_s = (size_mib) / dur_s
     logger.debug(
         f"Uploaded large blob of size {size_mib:.2f} MiB ({throughput_mib_s:.2f} MiB/s, total {dur_s:.2f}s). {blob_id}"
     )
-    return blob_id, r2_failed, r2_latency_ms
+    return blob_id, r2_failed, r2_throughput_bytes_s
 async def blob_upload(payload: bytes, stub: ModalClientModal) -> str:

modal/_utils/function_utils.py CHANGED Viewed

@@ -385,9 +385,16 @@ def callable_has_non_self_non_default_params(f: Callable[..., Any]) -> bool:
 async def _stream_function_call_data(
-    client, stub, function_call_id: str, variant: Literal["data_in", "data_out"]
+    client,
+    stub,
+    function_call_id: Optional[str],
+    variant: Literal["data_in", "data_out"],
+    attempt_token: Optional[str] = None,
 ) -> AsyncGenerator[Any, None]:
     """Read from the `data_in` or `data_out` stream of a function call."""
+    if function_call_id is None and attempt_token is None:
+        raise ValueError("function_call_id or attempt_token is required for data_out stream")
     if stub is None:
         stub = client.stub
@@ -405,7 +412,11 @@ async def _stream_function_call_data(
         raise ValueError(f"Invalid variant {variant}")
     while True:
-        req = api_pb2.FunctionCallGetDataRequest(function_call_id=function_call_id, last_index=last_index)
+        req = api_pb2.FunctionCallGetDataRequest(
+            function_call_id=function_call_id,
+            last_index=last_index,
+            attempt_token=attempt_token,
+        )
         try:
             async for chunk in stub_fn.unary_stream(req):
                 if chunk.index <= last_index:
@@ -531,6 +542,7 @@ def should_upload(
     )
+# This must be called against the client stub, not the input-plane stub.
 async def _create_input(
     args,
     kwargs,
@@ -552,7 +564,7 @@ async def _create_input(
     args_serialized = serialize((args, kwargs))
     if should_upload(len(args_serialized), max_object_size_bytes, function_call_invocation_type):
-        args_blob_id, r2_failed, r2_latency_ms = await blob_upload_with_r2_failure_info(args_serialized, stub)
+        args_blob_id, r2_failed, r2_throughput_bytes_s = await blob_upload_with_r2_failure_info(args_serialized, stub)
         return api_pb2.FunctionPutInputsItem(
             input=api_pb2.FunctionInput(
                 args_blob_id=args_blob_id,
@@ -561,7 +573,7 @@ async def _create_input(
             ),
             idx=idx,
             r2_failed=r2_failed,
-            r2_latency_ms=r2_latency_ms,
+            r2_throughput_bytes_s=r2_throughput_bytes_s,
         )
     else:
         return api_pb2.FunctionPutInputsItem(

modal/_utils/time_utils.py CHANGED Viewed

@@ -3,13 +3,17 @@ from datetime import datetime
 from typing import Optional
-def timestamp_to_local(ts: float, isotz: bool = True) -> Optional[str]:
+def timestamp_to_localized_dt(ts: float) -> datetime:
+    locale_tz = datetime.now().astimezone().tzinfo
+    return datetime.fromtimestamp(ts, tz=locale_tz)
+def timestamp_to_localized_str(ts: float, isotz: bool = True) -> Optional[str]:
     if ts > 0:
-        locale_tz = datetime.now().astimezone().tzinfo
-        dt = datetime.fromtimestamp(ts, tz=locale_tz)
+        dt = timestamp_to_localized_dt(ts)
         if isotz:
             return dt.isoformat(sep=" ", timespec="seconds")
         else:
-            return f"{datetime.strftime(dt, '%Y-%m-%d %H:%M')} {locale_tz.tzname(dt)}"
+            return f"{dt:%Y-%m-%d %H:%M %Z}"
     else:
         return None

modal/app.py CHANGED Viewed

@@ -665,7 +665,6 @@ class _App:
         ] = None,  # Experimental controls over fine-grained scheduling (alpha).
         _experimental_proxy_ip: Optional[str] = None,  # IP address of proxy
         _experimental_custom_scaling_factor: Optional[float] = None,  # Custom scaling factor
-        _experimental_enable_gpu_snapshot: bool = False,  # Experimentally enable GPU memory snapshots.
         # Parameters below here are deprecated. Please update your code as suggested
         keep_warm: Optional[int] = None,  # Replaced with `min_containers`
         concurrency_limit: Optional[int] = None,  # Replaced with `max_containers`
@@ -830,7 +829,6 @@ class _App:
                 include_source=include_source if include_source is not None else self._include_source_default,
                 experimental_options={k: str(v) for k, v in (experimental_options or {}).items()},
                 _experimental_proxy_ip=_experimental_proxy_ip,
-                _experimental_enable_gpu_snapshot=_experimental_enable_gpu_snapshot,
             )
             self._add_function(function, webhook_config is not None)
@@ -889,7 +887,6 @@ class _App:
         ] = None,  # Experimental controls over fine-grained scheduling (alpha).
         _experimental_proxy_ip: Optional[str] = None,  # IP address of proxy
         _experimental_custom_scaling_factor: Optional[float] = None,  # Custom scaling factor
-        _experimental_enable_gpu_snapshot: bool = False,  # Experimentally enable GPU memory snapshots.
         # Parameters below here are deprecated. Please update your code as suggested
         keep_warm: Optional[int] = None,  # Replaced with `min_containers`
         concurrency_limit: Optional[int] = None,  # Replaced with `max_containers`
@@ -1014,7 +1011,6 @@ class _App:
                 experimental_options={k: str(v) for k, v in (experimental_options or {}).items()},
                 _experimental_proxy_ip=_experimental_proxy_ip,
                 _experimental_custom_scaling_factor=_experimental_custom_scaling_factor,
-                _experimental_enable_gpu_snapshot=_experimental_enable_gpu_snapshot,
             )
             self._add_function(cls_func, is_web_endpoint=False)

modal/app.pyi CHANGED Viewed

@@ -425,7 +425,6 @@ class _App:
         _experimental_scheduler_placement: typing.Optional[modal.scheduler_placement.SchedulerPlacement] = None,
         _experimental_proxy_ip: typing.Optional[str] = None,
         _experimental_custom_scaling_factor: typing.Optional[float] = None,
-        _experimental_enable_gpu_snapshot: bool = False,
         keep_warm: typing.Optional[int] = None,
         concurrency_limit: typing.Optional[int] = None,
         container_idle_timeout: typing.Optional[int] = None,
@@ -477,7 +476,6 @@ class _App:
         _experimental_scheduler_placement: typing.Optional[modal.scheduler_placement.SchedulerPlacement] = None,
         _experimental_proxy_ip: typing.Optional[str] = None,
         _experimental_custom_scaling_factor: typing.Optional[float] = None,
-        _experimental_enable_gpu_snapshot: bool = False,
         keep_warm: typing.Optional[int] = None,
         concurrency_limit: typing.Optional[int] = None,
         container_idle_timeout: typing.Optional[int] = None,
@@ -1030,7 +1028,6 @@ class App:
         _experimental_scheduler_placement: typing.Optional[modal.scheduler_placement.SchedulerPlacement] = None,
         _experimental_proxy_ip: typing.Optional[str] = None,
         _experimental_custom_scaling_factor: typing.Optional[float] = None,
-        _experimental_enable_gpu_snapshot: bool = False,
         keep_warm: typing.Optional[int] = None,
         concurrency_limit: typing.Optional[int] = None,
         container_idle_timeout: typing.Optional[int] = None,
@@ -1082,7 +1079,6 @@ class App:
         _experimental_scheduler_placement: typing.Optional[modal.scheduler_placement.SchedulerPlacement] = None,
         _experimental_proxy_ip: typing.Optional[str] = None,
         _experimental_custom_scaling_factor: typing.Optional[float] = None,
-        _experimental_enable_gpu_snapshot: bool = False,
         keep_warm: typing.Optional[int] = None,
         concurrency_limit: typing.Optional[int] = None,
         container_idle_timeout: typing.Optional[int] = None,

modal/cli/_traceback.py CHANGED Viewed

@@ -6,12 +6,13 @@ import re
 import warnings
 from typing import Optional
-from rich.console import Console, RenderResult, group
+from rich.console import RenderResult, group
 from rich.panel import Panel
 from rich.syntax import Syntax
 from rich.text import Text
 from rich.traceback import PathHighlighter, Stack, Traceback, install
+from .._output import make_console
 from ..exception import DeprecationError, PendingDeprecationError, ServerWarning
@@ -193,7 +194,7 @@ def highlight_modal_warnings() -> None:
                 title=title,
                 title_align="left",
             )
-            Console().print(panel)
+            make_console().print(panel)
         else:
             base_showwarning(warning, category, filename, lineno, file=None, line=None)

modal/cli/app.py CHANGED Viewed

@@ -15,7 +15,7 @@ from modal.client import _Client
 from modal.environments import ensure_env
 from modal_proto import api_pb2
-from .._utils.time_utils import timestamp_to_local
+from .._utils.time_utils import timestamp_to_localized_str
 from .utils import ENV_OPTION, display_table, get_app_id_from_name, stream_app_logs
 APP_IDENTIFIER = Argument("", help="App name or ID")
@@ -71,8 +71,8 @@ async def list_(env: Optional[str] = ENV_OPTION, json: bool = False):
                 app_stats.description,
                 state,
                 str(app_stats.n_running_tasks),
-                timestamp_to_local(app_stats.created_at, json),
-                timestamp_to_local(app_stats.stopped_at, json),
+                timestamp_to_localized_str(app_stats.created_at, json),
+                timestamp_to_localized_str(app_stats.stopped_at, json),
             ]
         )
@@ -217,7 +217,7 @@ async def history(
         row = [
             Text(f"v{app_stats.version}", style=style),
-            Text(timestamp_to_local(app_stats.deployed_at, json), style=style),
+            Text(timestamp_to_localized_str(app_stats.deployed_at, json), style=style),
             Text(app_stats.client_version, style=style),
             Text(app_stats.deployed_by, style=style),
         ]

modal 1.0.6.dev61__py3-none-any.whl → 1.1.1__py3-none-any.whl

Potentially problematic release.

modal 1.0.6.dev61py3-none-any.whl → 1.1.1py3-none-any.whl