PyPI - modal - Versions diffs - 0.67.43__py3-none-any.whl → 0.68.24__py3-none-any.whl - Mend

modal 0.67.43py3-none-any.whl → 0.68.24py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (52) hide show

modal/__init__.py +2 -0
modal/_container_entrypoint.py +4 -1
modal/_ipython.py +3 -13
modal/_runtime/asgi.py +4 -0
modal/_runtime/container_io_manager.py +3 -0
modal/_runtime/user_code_imports.py +17 -20
modal/_traceback.py +16 -2
modal/_utils/blob_utils.py +27 -92
modal/_utils/bytes_io_segment_payload.py +97 -0
modal/_utils/function_utils.py +5 -1
modal/_utils/grpc_testing.py +6 -2
modal/_utils/hash_utils.py +51 -10
modal/_utils/http_utils.py +19 -10
modal/_utils/{pattern_matcher.py → pattern_utils.py} +1 -70
modal/_utils/shell_utils.py +11 -5
modal/cli/_traceback.py +11 -4
modal/cli/run.py +25 -12
modal/client.py +6 -37
modal/client.pyi +2 -6
modal/cls.py +132 -62
modal/cls.pyi +13 -7
modal/exception.py +20 -0
modal/file_io.py +380 -0
modal/file_io.pyi +185 -0
modal/file_pattern_matcher.py +121 -0
modal/functions.py +33 -11
modal/functions.pyi +11 -9
modal/image.py +88 -8
modal/image.pyi +20 -4
modal/mount.py +49 -9
modal/mount.pyi +19 -4
modal/network_file_system.py +4 -1
modal/object.py +4 -2
modal/partial_function.py +22 -10
modal/partial_function.pyi +10 -2
modal/runner.py +5 -4
modal/runner.pyi +2 -1
modal/sandbox.py +40 -0
modal/sandbox.pyi +18 -0
modal/volume.py +5 -1
{modal-0.67.43.dist-info → modal-0.68.24.dist-info}/METADATA +2 -2
{modal-0.67.43.dist-info → modal-0.68.24.dist-info}/RECORD +52 -48
modal_docs/gen_reference_docs.py +1 -0
modal_proto/api.proto +33 -1
modal_proto/api_pb2.py +813 -737
modal_proto/api_pb2.pyi +160 -13
modal_version/__init__.py +1 -1
modal_version/_version_generated.py +1 -1
{modal-0.67.43.dist-info → modal-0.68.24.dist-info}/LICENSE +0 -0
{modal-0.67.43.dist-info → modal-0.68.24.dist-info}/WHEEL +0 -0
{modal-0.67.43.dist-info → modal-0.68.24.dist-info}/entry_points.txt +0 -0
{modal-0.67.43.dist-info → modal-0.68.24.dist-info}/top_level.txt +0 -0

modal/__init__.py CHANGED Viewed

@@ -17,6 +17,7 @@ try:
     from .cls import Cls, parameter
     from .dict import Dict
     from .exception import Error
+    from .file_pattern_matcher import FilePatternMatcher
     from .functions import Function
     from .image import Image
     from .mount import Mount
@@ -48,6 +49,7 @@ __all__ = [
     "Cron",
     "Dict",
     "Error",
+    "FilePatternMatcher",
     "Function",
     "Image",
     "Mount",

modal/_container_entrypoint.py CHANGED Viewed

@@ -6,7 +6,7 @@ from modal._runtime.user_code_imports import Service, import_class_service, impo
 telemetry_socket = os.environ.get("MODAL_TELEMETRY_SOCKET")
 if telemetry_socket:
-    from runtime._telemetry import instrument_imports
+    from ._runtime.telemetry import instrument_imports
     instrument_imports(telemetry_socket)
@@ -415,6 +415,9 @@ def main(container_args: api_pb2.ContainerArguments, client: Client):
     _client: _Client = synchronizer._translate_in(client)  # TODO(erikbern): ugly
+    # Call ContainerHello - currently a noop but might be used later for things
+    container_io_manager.hello()
     with container_io_manager.heartbeats(is_snapshotting_function), UserCodeEventLoop() as event_loop:
         # If this is a serialized function, fetch the definition from the server
         if function_def.definition_type == api_pb2.Function.DEFINITION_TYPE_SERIALIZED:

modal/_ipython.py CHANGED Viewed

@@ -1,21 +1,11 @@
 # Copyright Modal Labs 2022
 import sys
-import warnings
-ipy_outstream = None
-try:
-    with warnings.catch_warnings():
-        warnings.simplefilter("ignore")
-        import ipykernel.iostream
-    ipy_outstream = ipykernel.iostream.OutStream
-except ImportError:
-    pass
 def is_notebook(stdout=None):
-    if ipy_outstream is None:
+    ipykernel_iostream = sys.modules.get("ipykernel.iostream")
+    if ipykernel_iostream is None:
         return False
     if stdout is None:
         stdout = sys.stdout
-    return isinstance(stdout, ipy_outstream)
+    return isinstance(stdout, ipykernel_iostream.OutStream)

modal/_runtime/asgi.py CHANGED Viewed

@@ -1,4 +1,8 @@
 # Copyright Modal Labs 2022
+# Note: this module isn't imported unless it's needed.
+# This is because aiohttp is a pretty big dependency that adds significant latency when imported
 import asyncio
 from collections.abc import AsyncGenerator
 from typing import Any, Callable, NoReturn, Optional, cast

modal/_runtime/container_io_manager.py CHANGED Viewed

@@ -335,6 +335,9 @@ class _ContainerIOManager:
         """Only used for tests."""
         cls._singleton = None
+    async def hello(self):
+        await self._client.stub.ContainerHello(Empty())
     async def _run_heartbeat_loop(self):
         while 1:
             t0 = time.monotonic()

modal/_runtime/user_code_imports.py CHANGED Viewed

@@ -9,15 +9,6 @@ import modal._runtime.container_io_manager
 import modal.cls
 import modal.object
 from modal import Function
-from modal._runtime.asgi import (
-    LifespanManager,
-    asgi_app_wrapper,
-    get_ip_address,
-    wait_for_web_server,
-    web_server_proxy,
-    webhook_asgi_app,
-    wsgi_app_wrapper,
-)
 from modal._utils.async_utils import synchronizer
 from modal._utils.function_utils import LocalFunctionError, is_async as get_is_async, is_global_object
 from modal.exception import ExecutionError, InvalidError
@@ -28,6 +19,7 @@ from modal_proto import api_pb2
 if typing.TYPE_CHECKING:
     import modal.app
     import modal.partial_function
+    from modal._runtime.asgi import LifespanManager
 @dataclass
@@ -36,7 +28,7 @@ class FinalizedFunction:
     is_async: bool
     is_generator: bool
     data_format: int  # api_pb2.DataFormat
-    lifespan_manager: Optional[LifespanManager] = None
+    lifespan_manager: Optional["LifespanManager"] = None
 class Service(metaclass=ABCMeta):
@@ -63,19 +55,22 @@ def construct_webhook_callable(
     webhook_config: api_pb2.WebhookConfig,
     container_io_manager: "modal._runtime.container_io_manager.ContainerIOManager",
 ):
+    # Note: aiohttp is a significant dependency of the `asgi` module, so we import it locally
+    from modal._runtime import asgi
     # For webhooks, the user function is used to construct an asgi app:
     if webhook_config.type == api_pb2.WEBHOOK_TYPE_ASGI_APP:
         # Function returns an asgi_app, which we can use as a callable.
-        return asgi_app_wrapper(user_defined_callable(), container_io_manager)
+        return asgi.asgi_app_wrapper(user_defined_callable(), container_io_manager)
     elif webhook_config.type == api_pb2.WEBHOOK_TYPE_WSGI_APP:
-        # Function returns an wsgi_app, which we can use as a callable.
-        return wsgi_app_wrapper(user_defined_callable(), container_io_manager)
+        # Function returns an wsgi_app, which we can use as a callable
+        return asgi.wsgi_app_wrapper(user_defined_callable(), container_io_manager)
     elif webhook_config.type == api_pb2.WEBHOOK_TYPE_FUNCTION:
         # Function is a webhook without an ASGI app. Create one for it.
-        return asgi_app_wrapper(
-            webhook_asgi_app(user_defined_callable, webhook_config.method, webhook_config.web_endpoint_docs),
+        return asgi.asgi_app_wrapper(
+            asgi.webhook_asgi_app(user_defined_callable, webhook_config.method, webhook_config.web_endpoint_docs),
             container_io_manager,
         )
@@ -86,11 +81,11 @@ def construct_webhook_callable(
         # We intentionally try to connect to the external interface instead of the loopback
         # interface here so users are forced to expose the server. This allows us to potentially
         # change the implementation to use an external bridge in the future.
-        host = get_ip_address(b"eth0")
+        host = asgi.get_ip_address(b"eth0")
         port = webhook_config.web_server_port
         startup_timeout = webhook_config.web_server_startup_timeout
-        wait_for_web_server(host, port, timeout=startup_timeout)
-        return asgi_app_wrapper(web_server_proxy(host, port), container_io_manager)
+        asgi.wait_for_web_server(host, port, timeout=startup_timeout)
+        return asgi.asgi_app_wrapper(asgi.web_server_proxy(host, port), container_io_manager)
     else:
         raise InvalidError(f"Unrecognized web endpoint type {webhook_config.type}")
@@ -269,10 +264,12 @@ def import_single_function_service(
                 # The cls decorator is in global scope
                 _cls = synchronizer._translate_in(cls)
                 user_defined_callable = _cls._callables[fun_name]
-                function = _cls._method_functions.get(fun_name)
+                function = _cls._method_functions.get(
+                    fun_name
+                )  # bound to the class service function - there is no instance
                 active_app = _cls._app
             else:
-                # This is a raw class
+                # This is non-decorated class
                 user_defined_callable = getattr(cls, fun_name)
         else:
             raise InvalidError(f"Invalid function qualname {qual_name}")

modal/_traceback.py CHANGED Viewed

@@ -1,16 +1,21 @@
 # Copyright Modal Labs 2022
-"""Helper functions related to operating on traceback objects.
+"""Helper functions related to operating on exceptions, warnings, and traceback objects.
 Functions related to *displaying* tracebacks should go in `modal/cli/_traceback.py`
 so that Rich is not a dependency of the container Client.
 """
 import re
 import sys
 import traceback
+import warnings
 from types import TracebackType
-from typing import Any, Optional
+from typing import Any, Iterable, Optional
+from modal_proto import api_pb2
 from ._vendor.tblib import Traceback as TBLibTraceback
+from .exception import ServerWarning
 TBDictType = dict[str, Any]
 LineCacheType = dict[tuple[str, str], str]
@@ -109,3 +114,12 @@ def print_exception(exc: Optional[type[BaseException]], value: Optional[BaseExce
     if sys.version_info < (3, 11) and value is not None:
         notes = getattr(value, "__notes__", [])
         print(*notes, sep="\n", file=sys.stderr)
+def print_server_warnings(server_warnings: Iterable[api_pb2.Warning]):
+    """Issue a warning originating from the server with empty metadata about local origin.
+    When using the Modal CLI, these warnings should get caught and coerced into Rich panels.
+    """
+    for warning in server_warnings:
+        warnings.warn_explicit(warning.message, ServerWarning, "<modal-server>", 0)

modal/_utils/blob_utils.py CHANGED Viewed

@@ -9,22 +9,22 @@ import time
 from collections.abc import AsyncIterator
 from contextlib import AbstractContextManager, contextmanager
 from pathlib import Path, PurePosixPath
-from typing import Any, BinaryIO, Callable, Optional, Union
+from typing import TYPE_CHECKING, Any, BinaryIO, Callable, Optional, Union
 from urllib.parse import urlparse
-from aiohttp import BytesIOPayload
-from aiohttp.abc import AbstractStreamWriter
 from modal_proto import api_pb2
 from modal_proto.modal_api_grpc import ModalClientModal
 from ..exception import ExecutionError
 from .async_utils import TaskContext, retry
 from .grpc_utils import retry_transient_errors
-from .hash_utils import UploadHashes, get_sha256_hex, get_upload_hashes
+from .hash_utils import UploadHashes, get_upload_hashes
 from .http_utils import ClientSessionRegistry
 from .logger import logger
+if TYPE_CHECKING:
+    from .bytes_io_segment_payload import BytesIOSegmentPayload
 # Max size for function inputs and outputs.
 MAX_OBJECT_SIZE_BYTES = 2 * 1024 * 1024  # 2 MiB
@@ -38,93 +38,16 @@ BLOB_MAX_PARALLELISM = 10
 # read ~16MiB chunks by default
 DEFAULT_SEGMENT_CHUNK_SIZE = 2**24
-class BytesIOSegmentPayload(BytesIOPayload):
-    """Modified bytes payload for concurrent sends of chunks from the same file.
-    Adds:
-    * read limit using remaining_bytes, in order to split files across streams
-    * larger read chunk (to prevent excessive read contention between parts)
-    * calculates an md5 for the segment
-    Feels like this should be in some standard lib...
-    """
-    def __init__(
-        self,
-        bytes_io: BinaryIO,  # should *not* be shared as IO position modification is not locked
-        segment_start: int,
-        segment_length: int,
-        chunk_size: int = DEFAULT_SEGMENT_CHUNK_SIZE,
-        progress_report_cb: Optional[Callable] = None,
-    ):
-        # not thread safe constructor!
-        super().__init__(bytes_io)
-        self.initial_seek_pos = bytes_io.tell()
-        self.segment_start = segment_start
-        self.segment_length = segment_length
-        # seek to start of file segment we are interested in, in order to make .size() evaluate correctly
-        self._value.seek(self.initial_seek_pos + segment_start)
-        assert self.segment_length <= super().size
-        self.chunk_size = chunk_size
-        self.progress_report_cb = progress_report_cb or (lambda *_, **__: None)
-        self.reset_state()
-    def reset_state(self):
-        self._md5_checksum = hashlib.md5()
-        self.num_bytes_read = 0
-        self._value.seek(self.initial_seek_pos)
-    @contextmanager
-    def reset_on_error(self):
-        try:
-            yield
-        except Exception as exc:
-            try:
-                self.progress_report_cb(reset=True)
-            except Exception as cb_exc:
-                raise cb_exc from exc
-            raise exc
-        finally:
-            self.reset_state()
-    @property
-    def size(self) -> int:
-        return self.segment_length
-    def md5_checksum(self):
-        return self._md5_checksum
-    async def write(self, writer: AbstractStreamWriter):
-        loop = asyncio.get_event_loop()
-        async def safe_read():
-            read_start = self.initial_seek_pos + self.segment_start + self.num_bytes_read
-            self._value.seek(read_start)
-            num_bytes = min(self.chunk_size, self.remaining_bytes())
-            chunk = await loop.run_in_executor(None, self._value.read, num_bytes)
-            await loop.run_in_executor(None, self._md5_checksum.update, chunk)
-            self.num_bytes_read += len(chunk)
-            return chunk
-        chunk = await safe_read()
-        while chunk and self.remaining_bytes() > 0:
-            await writer.write(chunk)
-            self.progress_report_cb(advance=len(chunk))
-            chunk = await safe_read()
-        if chunk:
-            await writer.write(chunk)
-            self.progress_report_cb(advance=len(chunk))
-    def remaining_bytes(self):
-        return self.segment_length - self.num_bytes_read
+# Files larger than this will be multipart uploaded. The server might request multipart upload for smaller files as
+# well, but the limit will never be raised.
+# TODO(dano): remove this once we stop requiring md5 for blobs
+MULTIPART_UPLOAD_THRESHOLD = 1024**3
 @retry(n_attempts=5, base_delay=0.5, timeout=None)
 async def _upload_to_s3_url(
     upload_url,
-    payload: BytesIOSegmentPayload,
+    payload: "BytesIOSegmentPayload",
     content_md5_b64: Optional[str] = None,
     content_type: Optional[str] = "application/octet-stream",  # set to None to force omission of ContentType header
 ) -> str:
@@ -180,6 +103,8 @@ async def perform_multipart_upload(
     upload_chunk_size: int = DEFAULT_SEGMENT_CHUNK_SIZE,
     progress_report_cb: Optional[Callable] = None,
 ) -> None:
+    from .bytes_io_segment_payload import BytesIOSegmentPayload
     upload_coros = []
     file_offset = 0
     num_bytes_left = content_length
@@ -273,6 +198,8 @@ async def _blob_upload(
             progress_report_cb=progress_report_cb,
         )
     else:
+        from .bytes_io_segment_payload import BytesIOSegmentPayload
         payload = BytesIOSegmentPayload(
             data, segment_start=0, segment_length=content_length, progress_report_cb=progress_report_cb
         )
@@ -305,9 +232,13 @@ async def blob_upload(payload: bytes, stub: ModalClientModal) -> str:
 async def blob_upload_file(
-    file_obj: BinaryIO, stub: ModalClientModal, progress_report_cb: Optional[Callable] = None
+    file_obj: BinaryIO,
+    stub: ModalClientModal,
+    progress_report_cb: Optional[Callable] = None,
+    sha256_hex: Optional[str] = None,
+    md5_hex: Optional[str] = None,
 ) -> str:
-    upload_hashes = get_upload_hashes(file_obj)
+    upload_hashes = get_upload_hashes(file_obj, sha256_hex=sha256_hex, md5_hex=md5_hex)
     return await _blob_upload(upload_hashes, file_obj, stub, progress_report_cb)
@@ -366,6 +297,7 @@ class FileUploadSpec:
     use_blob: bool
     content: Optional[bytes]  # typically None if using blob, required otherwise
     sha256_hex: str
+    md5_hex: str
     mode: int  # file permission bits (last 12 bits of st_mode)
     size: int
@@ -383,13 +315,15 @@ def _get_file_upload_spec(
         fp.seek(0)
         if size >= LARGE_FILE_LIMIT:
+            # TODO(dano): remove the placeholder md5 once we stop requiring md5 for blobs
+            md5_hex = "baadbaadbaadbaadbaadbaadbaadbaad" if size > MULTIPART_UPLOAD_THRESHOLD else None
             use_blob = True
             content = None
-            sha256_hex = get_sha256_hex(fp)
+            hashes = get_upload_hashes(fp, md5_hex=md5_hex)
         else:
             use_blob = False
             content = fp.read()
-            sha256_hex = get_sha256_hex(content)
+            hashes = get_upload_hashes(content)
     return FileUploadSpec(
         source=source,
@@ -397,7 +331,8 @@ def _get_file_upload_spec(
         mount_filename=mount_filename.as_posix(),
         use_blob=use_blob,
         content=content,
-        sha256_hex=sha256_hex,
+        sha256_hex=hashes.sha256_hex(),
+        md5_hex=hashes.md5_hex(),
         mode=mode & 0o7777,
         size=size,
     )

modal/_utils/bytes_io_segment_payload.py ADDED Viewed

@@ -0,0 +1,97 @@
+# Copyright Modal Labs 2024
+import asyncio
+import hashlib
+from contextlib import contextmanager
+from typing import BinaryIO, Callable, Optional
+# Note: this module needs to import aiohttp in global scope
+# This takes about 50ms and isn't needed in many cases for Modal execution
+# To avoid this, we import it in local scope when needed (blob_utils.py)
+from aiohttp import BytesIOPayload
+from aiohttp.abc import AbstractStreamWriter
+# read ~16MiB chunks by default
+DEFAULT_SEGMENT_CHUNK_SIZE = 2**24
+class BytesIOSegmentPayload(BytesIOPayload):
+    """Modified bytes payload for concurrent sends of chunks from the same file.
+    Adds:
+    * read limit using remaining_bytes, in order to split files across streams
+    * larger read chunk (to prevent excessive read contention between parts)
+    * calculates an md5 for the segment
+    Feels like this should be in some standard lib...
+    """
+    def __init__(
+        self,
+        bytes_io: BinaryIO,  # should *not* be shared as IO position modification is not locked
+        segment_start: int,
+        segment_length: int,
+        chunk_size: int = DEFAULT_SEGMENT_CHUNK_SIZE,
+        progress_report_cb: Optional[Callable] = None,
+    ):
+        # not thread safe constructor!
+        super().__init__(bytes_io)
+        self.initial_seek_pos = bytes_io.tell()
+        self.segment_start = segment_start
+        self.segment_length = segment_length
+        # seek to start of file segment we are interested in, in order to make .size() evaluate correctly
+        self._value.seek(self.initial_seek_pos + segment_start)
+        assert self.segment_length <= super().size
+        self.chunk_size = chunk_size
+        self.progress_report_cb = progress_report_cb or (lambda *_, **__: None)
+        self.reset_state()
+    def reset_state(self):
+        self._md5_checksum = hashlib.md5()
+        self.num_bytes_read = 0
+        self._value.seek(self.initial_seek_pos)
+    @contextmanager
+    def reset_on_error(self):
+        try:
+            yield
+        except Exception as exc:
+            try:
+                self.progress_report_cb(reset=True)
+            except Exception as cb_exc:
+                raise cb_exc from exc
+            raise exc
+        finally:
+            self.reset_state()
+    @property
+    def size(self) -> int:
+        return self.segment_length
+    def md5_checksum(self):
+        return self._md5_checksum
+    async def write(self, writer: "AbstractStreamWriter"):
+        loop = asyncio.get_event_loop()
+        async def safe_read():
+            read_start = self.initial_seek_pos + self.segment_start + self.num_bytes_read
+            self._value.seek(read_start)
+            num_bytes = min(self.chunk_size, self.remaining_bytes())
+            chunk = await loop.run_in_executor(None, self._value.read, num_bytes)
+            await loop.run_in_executor(None, self._md5_checksum.update, chunk)
+            self.num_bytes_read += len(chunk)
+            return chunk
+        chunk = await safe_read()
+        while chunk and self.remaining_bytes() > 0:
+            await writer.write(chunk)
+            self.progress_report_cb(advance=len(chunk))
+            chunk = await safe_read()
+        if chunk:
+            await writer.write(chunk)
+            self.progress_report_cb(advance=len(chunk))
+    def remaining_bytes(self):
+        return self.segment_length - self.num_bytes_read

modal/_utils/function_utils.py CHANGED Viewed

@@ -99,7 +99,11 @@ def get_function_type(is_generator: Optional[bool]) -> "api_pb2.Function.Functio
 class FunctionInfo:
-    """Class that helps us extract a bunch of information about a function."""
+    """Class that helps us extract a bunch of information about a locally defined function.
+    Used for populating the definition of a remote function, and for making .local() calls
+    on a host with the local definition available.
+    """
     raw_f: Optional[Callable[..., Any]]  # if None - this is a "class service function"
     function_name: str

modal/_utils/grpc_testing.py CHANGED Viewed

@@ -50,7 +50,7 @@ def patch_mock_servicer(cls):
     @contextlib.contextmanager
     def intercept(servicer):
-        ctx = InterceptionContext()
+        ctx = InterceptionContext(servicer)
         servicer.interception_context = ctx
         yield ctx
         ctx._assert_responses_consumed()
@@ -101,7 +101,8 @@ class ResponseNotConsumed(Exception):
 class InterceptionContext:
-    def __init__(self):
+    def __init__(self, servicer):
+        self._servicer = servicer
         self.calls: list[tuple[str, Any]] = []  # List[Tuple[method_name, message]]
         self.custom_responses: dict[str, list[tuple[Callable[[Any], bool], list[Any]]]] = defaultdict(list)
         self.custom_defaults: dict[str, Callable[["MockClientServicer", grpclib.server.Stream], Awaitable[None]]] = {}
@@ -149,6 +150,9 @@ class InterceptionContext:
         raise KeyError(f"No message of that type in call list: {self.calls}")
     def get_requests(self, method_name: str) -> list[Any]:
+        if not hasattr(self._servicer, method_name):
+            # we check this to prevent things like `assert ctx.get_requests("ASdfFunctionCreate") == 0` passing
+            raise ValueError(f"{method_name} not in MockServicer - did you spell it right?")
         return [msg for _method_name, msg in self.calls if _method_name == method_name]
     def _add_recv(self, method_name: str, msg):

modal/_utils/hash_utils.py CHANGED Viewed

@@ -2,12 +2,15 @@
 import base64
 import dataclasses
 import hashlib
-from typing import BinaryIO, Callable, Union
+import time
+from typing import BinaryIO, Callable, Optional, Sequence, Union
-HASH_CHUNK_SIZE = 4096
+from modal.config import logger
+HASH_CHUNK_SIZE = 65536
-def _update(hashers: list[Callable[[bytes], None]], data: Union[bytes, BinaryIO]) -> None:
+def _update(hashers: Sequence[Callable[[bytes], None]], data: Union[bytes, BinaryIO]) -> None:
     if isinstance(data, bytes):
         for hasher in hashers:
             hasher(data)
@@ -26,20 +29,26 @@ def _update(hashers: list[Callable[[bytes], None]], data: Union[bytes, BinaryIO]
 def get_sha256_hex(data: Union[bytes, BinaryIO]) -> str:
+    t0 = time.monotonic()
     hasher = hashlib.sha256()
     _update([hasher.update], data)
+    logger.debug("get_sha256_hex took %.3fs", time.monotonic() - t0)
     return hasher.hexdigest()
 def get_sha256_base64(data: Union[bytes, BinaryIO]) -> str:
+    t0 = time.monotonic()
     hasher = hashlib.sha256()
     _update([hasher.update], data)
+    logger.debug("get_sha256_base64 took %.3fs", time.monotonic() - t0)
     return base64.b64encode(hasher.digest()).decode("ascii")
 def get_md5_base64(data: Union[bytes, BinaryIO]) -> str:
+    t0 = time.monotonic()
     hasher = hashlib.md5()
     _update([hasher.update], data)
+    logger.debug("get_md5_base64 took %.3fs", time.monotonic() - t0)
     return base64.b64encode(hasher.digest()).decode("utf-8")
@@ -48,12 +57,44 @@ class UploadHashes:
     md5_base64: str
     sha256_base64: str
+    def md5_hex(self) -> str:
+        return base64.b64decode(self.md5_base64).hex()
+    def sha256_hex(self) -> str:
+        return base64.b64decode(self.sha256_base64).hex()
+def get_upload_hashes(
+    data: Union[bytes, BinaryIO], sha256_hex: Optional[str] = None, md5_hex: Optional[str] = None
+) -> UploadHashes:
+    t0 = time.monotonic()
+    hashers = {}
+    if not sha256_hex:
+        sha256 = hashlib.sha256()
+        hashers["sha256"] = sha256
+    if not md5_hex:
+        md5 = hashlib.md5()
+        hashers["md5"] = md5
+    if hashers:
+        updaters = [h.update for h in hashers.values()]
+        _update(updaters, data)
-def get_upload_hashes(data: Union[bytes, BinaryIO]) -> UploadHashes:
-    md5 = hashlib.md5()
-    sha256 = hashlib.sha256()
-    _update([md5.update, sha256.update], data)
-    return UploadHashes(
-        md5_base64=base64.b64encode(md5.digest()).decode("ascii"),
-        sha256_base64=base64.b64encode(sha256.digest()).decode("ascii"),
+    if sha256_hex:
+        sha256_base64 = base64.b64encode(bytes.fromhex(sha256_hex)).decode("ascii")
+    else:
+        sha256_base64 = base64.b64encode(hashers["sha256"].digest()).decode("ascii")
+    if md5_hex:
+        md5_base64 = base64.b64encode(bytes.fromhex(md5_hex)).decode("ascii")
+    else:
+        md5_base64 = base64.b64encode(hashers["md5"].digest()).decode("ascii")
+    hashes = UploadHashes(
+        md5_base64=md5_base64,
+        sha256_base64=sha256_base64,
     )
+    logger.debug("get_upload_hashes took %.3fs (%s)", time.monotonic() - t0, hashers.keys())
+    return hashes

modal 0.67.43__py3-none-any.whl → 0.68.24__py3-none-any.whl

modal 0.67.43py3-none-any.whl → 0.68.24py3-none-any.whl