modal 0.68.11__py3-none-any.whl → 0.68.31__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. modal/__init__.py +2 -0
  2. modal/_ipython.py +3 -13
  3. modal/_runtime/asgi.py +4 -0
  4. modal/_runtime/user_code_imports.py +13 -18
  5. modal/_utils/blob_utils.py +27 -92
  6. modal/_utils/bytes_io_segment_payload.py +97 -0
  7. modal/_utils/deprecation.py +44 -0
  8. modal/_utils/hash_utils.py +38 -9
  9. modal/_utils/http_utils.py +19 -10
  10. modal/_utils/{pattern_matcher.py → pattern_utils.py} +1 -70
  11. modal/_utils/shell_utils.py +11 -5
  12. modal/app.py +11 -31
  13. modal/app.pyi +3 -4
  14. modal/cli/app.py +1 -1
  15. modal/cli/run.py +25 -5
  16. modal/client.py +1 -1
  17. modal/client.pyi +2 -2
  18. modal/config.py +2 -1
  19. modal/container_process.py +2 -1
  20. modal/dict.py +2 -1
  21. modal/exception.py +0 -54
  22. modal/file_io.py +54 -7
  23. modal/file_io.pyi +18 -8
  24. modal/file_pattern_matcher.py +154 -0
  25. modal/functions.py +2 -8
  26. modal/functions.pyi +5 -1
  27. modal/image.py +106 -10
  28. modal/image.pyi +36 -6
  29. modal/mount.py +49 -9
  30. modal/mount.pyi +19 -4
  31. modal/network_file_system.py +6 -2
  32. modal/partial_function.py +10 -1
  33. modal/partial_function.pyi +8 -0
  34. modal/queue.py +2 -1
  35. modal/runner.py +2 -7
  36. modal/sandbox.py +23 -13
  37. modal/sandbox.pyi +21 -0
  38. modal/serving.py +1 -1
  39. modal/volume.py +7 -2
  40. {modal-0.68.11.dist-info → modal-0.68.31.dist-info}/METADATA +1 -1
  41. {modal-0.68.11.dist-info → modal-0.68.31.dist-info}/RECORD +49 -46
  42. modal_proto/api.proto +8 -0
  43. modal_proto/api_pb2.py +781 -745
  44. modal_proto/api_pb2.pyi +65 -3
  45. modal_version/_version_generated.py +1 -1
  46. {modal-0.68.11.dist-info → modal-0.68.31.dist-info}/LICENSE +0 -0
  47. {modal-0.68.11.dist-info → modal-0.68.31.dist-info}/WHEEL +0 -0
  48. {modal-0.68.11.dist-info → modal-0.68.31.dist-info}/entry_points.txt +0 -0
  49. {modal-0.68.11.dist-info → modal-0.68.31.dist-info}/top_level.txt +0 -0
modal/__init__.py CHANGED
@@ -17,6 +17,7 @@ try:
17
17
  from .cls import Cls, parameter
18
18
  from .dict import Dict
19
19
  from .exception import Error
20
+ from .file_pattern_matcher import FilePatternMatcher
20
21
  from .functions import Function
21
22
  from .image import Image
22
23
  from .mount import Mount
@@ -48,6 +49,7 @@ __all__ = [
48
49
  "Cron",
49
50
  "Dict",
50
51
  "Error",
52
+ "FilePatternMatcher",
51
53
  "Function",
52
54
  "Image",
53
55
  "Mount",
modal/_ipython.py CHANGED
@@ -1,21 +1,11 @@
1
1
  # Copyright Modal Labs 2022
2
2
  import sys
3
- import warnings
4
-
5
- ipy_outstream = None
6
- try:
7
- with warnings.catch_warnings():
8
- warnings.simplefilter("ignore")
9
- import ipykernel.iostream
10
-
11
- ipy_outstream = ipykernel.iostream.OutStream
12
- except ImportError:
13
- pass
14
3
 
15
4
 
16
5
  def is_notebook(stdout=None):
17
- if ipy_outstream is None:
6
+ ipykernel_iostream = sys.modules.get("ipykernel.iostream")
7
+ if ipykernel_iostream is None:
18
8
  return False
19
9
  if stdout is None:
20
10
  stdout = sys.stdout
21
- return isinstance(stdout, ipy_outstream)
11
+ return isinstance(stdout, ipykernel_iostream.OutStream)
modal/_runtime/asgi.py CHANGED
@@ -1,4 +1,8 @@
1
1
  # Copyright Modal Labs 2022
2
+
3
+ # Note: this module isn't imported unless it's needed.
4
+ # This is because aiohttp is a pretty big dependency that adds significant latency when imported
5
+
2
6
  import asyncio
3
7
  from collections.abc import AsyncGenerator
4
8
  from typing import Any, Callable, NoReturn, Optional, cast
@@ -9,15 +9,6 @@ import modal._runtime.container_io_manager
9
9
  import modal.cls
10
10
  import modal.object
11
11
  from modal import Function
12
- from modal._runtime.asgi import (
13
- LifespanManager,
14
- asgi_app_wrapper,
15
- get_ip_address,
16
- wait_for_web_server,
17
- web_server_proxy,
18
- webhook_asgi_app,
19
- wsgi_app_wrapper,
20
- )
21
12
  from modal._utils.async_utils import synchronizer
22
13
  from modal._utils.function_utils import LocalFunctionError, is_async as get_is_async, is_global_object
23
14
  from modal.exception import ExecutionError, InvalidError
@@ -28,6 +19,7 @@ from modal_proto import api_pb2
28
19
  if typing.TYPE_CHECKING:
29
20
  import modal.app
30
21
  import modal.partial_function
22
+ from modal._runtime.asgi import LifespanManager
31
23
 
32
24
 
33
25
  @dataclass
@@ -36,7 +28,7 @@ class FinalizedFunction:
36
28
  is_async: bool
37
29
  is_generator: bool
38
30
  data_format: int # api_pb2.DataFormat
39
- lifespan_manager: Optional[LifespanManager] = None
31
+ lifespan_manager: Optional["LifespanManager"] = None
40
32
 
41
33
 
42
34
  class Service(metaclass=ABCMeta):
@@ -63,19 +55,22 @@ def construct_webhook_callable(
63
55
  webhook_config: api_pb2.WebhookConfig,
64
56
  container_io_manager: "modal._runtime.container_io_manager.ContainerIOManager",
65
57
  ):
58
+ # Note: aiohttp is a significant dependency of the `asgi` module, so we import it locally
59
+ from modal._runtime import asgi
60
+
66
61
  # For webhooks, the user function is used to construct an asgi app:
67
62
  if webhook_config.type == api_pb2.WEBHOOK_TYPE_ASGI_APP:
68
63
  # Function returns an asgi_app, which we can use as a callable.
69
- return asgi_app_wrapper(user_defined_callable(), container_io_manager)
64
+ return asgi.asgi_app_wrapper(user_defined_callable(), container_io_manager)
70
65
 
71
66
  elif webhook_config.type == api_pb2.WEBHOOK_TYPE_WSGI_APP:
72
- # Function returns an wsgi_app, which we can use as a callable.
73
- return wsgi_app_wrapper(user_defined_callable(), container_io_manager)
67
+ # Function returns an wsgi_app, which we can use as a callable
68
+ return asgi.wsgi_app_wrapper(user_defined_callable(), container_io_manager)
74
69
 
75
70
  elif webhook_config.type == api_pb2.WEBHOOK_TYPE_FUNCTION:
76
71
  # Function is a webhook without an ASGI app. Create one for it.
77
- return asgi_app_wrapper(
78
- webhook_asgi_app(user_defined_callable, webhook_config.method, webhook_config.web_endpoint_docs),
72
+ return asgi.asgi_app_wrapper(
73
+ asgi.webhook_asgi_app(user_defined_callable, webhook_config.method, webhook_config.web_endpoint_docs),
79
74
  container_io_manager,
80
75
  )
81
76
 
@@ -86,11 +81,11 @@ def construct_webhook_callable(
86
81
  # We intentionally try to connect to the external interface instead of the loopback
87
82
  # interface here so users are forced to expose the server. This allows us to potentially
88
83
  # change the implementation to use an external bridge in the future.
89
- host = get_ip_address(b"eth0")
84
+ host = asgi.get_ip_address(b"eth0")
90
85
  port = webhook_config.web_server_port
91
86
  startup_timeout = webhook_config.web_server_startup_timeout
92
- wait_for_web_server(host, port, timeout=startup_timeout)
93
- return asgi_app_wrapper(web_server_proxy(host, port), container_io_manager)
87
+ asgi.wait_for_web_server(host, port, timeout=startup_timeout)
88
+ return asgi.asgi_app_wrapper(asgi.web_server_proxy(host, port), container_io_manager)
94
89
  else:
95
90
  raise InvalidError(f"Unrecognized web endpoint type {webhook_config.type}")
96
91
 
@@ -9,22 +9,22 @@ import time
9
9
  from collections.abc import AsyncIterator
10
10
  from contextlib import AbstractContextManager, contextmanager
11
11
  from pathlib import Path, PurePosixPath
12
- from typing import Any, BinaryIO, Callable, Optional, Union
12
+ from typing import TYPE_CHECKING, Any, BinaryIO, Callable, Optional, Union
13
13
  from urllib.parse import urlparse
14
14
 
15
- from aiohttp import BytesIOPayload
16
- from aiohttp.abc import AbstractStreamWriter
17
-
18
15
  from modal_proto import api_pb2
19
16
  from modal_proto.modal_api_grpc import ModalClientModal
20
17
 
21
18
  from ..exception import ExecutionError
22
19
  from .async_utils import TaskContext, retry
23
20
  from .grpc_utils import retry_transient_errors
24
- from .hash_utils import UploadHashes, get_sha256_hex, get_upload_hashes
21
+ from .hash_utils import UploadHashes, get_upload_hashes
25
22
  from .http_utils import ClientSessionRegistry
26
23
  from .logger import logger
27
24
 
25
+ if TYPE_CHECKING:
26
+ from .bytes_io_segment_payload import BytesIOSegmentPayload
27
+
28
28
  # Max size for function inputs and outputs.
29
29
  MAX_OBJECT_SIZE_BYTES = 2 * 1024 * 1024 # 2 MiB
30
30
 
@@ -38,93 +38,16 @@ BLOB_MAX_PARALLELISM = 10
38
38
  # read ~16MiB chunks by default
39
39
  DEFAULT_SEGMENT_CHUNK_SIZE = 2**24
40
40
 
41
-
42
- class BytesIOSegmentPayload(BytesIOPayload):
43
- """Modified bytes payload for concurrent sends of chunks from the same file.
44
-
45
- Adds:
46
- * read limit using remaining_bytes, in order to split files across streams
47
- * larger read chunk (to prevent excessive read contention between parts)
48
- * calculates an md5 for the segment
49
-
50
- Feels like this should be in some standard lib...
51
- """
52
-
53
- def __init__(
54
- self,
55
- bytes_io: BinaryIO, # should *not* be shared as IO position modification is not locked
56
- segment_start: int,
57
- segment_length: int,
58
- chunk_size: int = DEFAULT_SEGMENT_CHUNK_SIZE,
59
- progress_report_cb: Optional[Callable] = None,
60
- ):
61
- # not thread safe constructor!
62
- super().__init__(bytes_io)
63
- self.initial_seek_pos = bytes_io.tell()
64
- self.segment_start = segment_start
65
- self.segment_length = segment_length
66
- # seek to start of file segment we are interested in, in order to make .size() evaluate correctly
67
- self._value.seek(self.initial_seek_pos + segment_start)
68
- assert self.segment_length <= super().size
69
- self.chunk_size = chunk_size
70
- self.progress_report_cb = progress_report_cb or (lambda *_, **__: None)
71
- self.reset_state()
72
-
73
- def reset_state(self):
74
- self._md5_checksum = hashlib.md5()
75
- self.num_bytes_read = 0
76
- self._value.seek(self.initial_seek_pos)
77
-
78
- @contextmanager
79
- def reset_on_error(self):
80
- try:
81
- yield
82
- except Exception as exc:
83
- try:
84
- self.progress_report_cb(reset=True)
85
- except Exception as cb_exc:
86
- raise cb_exc from exc
87
- raise exc
88
- finally:
89
- self.reset_state()
90
-
91
- @property
92
- def size(self) -> int:
93
- return self.segment_length
94
-
95
- def md5_checksum(self):
96
- return self._md5_checksum
97
-
98
- async def write(self, writer: AbstractStreamWriter):
99
- loop = asyncio.get_event_loop()
100
-
101
- async def safe_read():
102
- read_start = self.initial_seek_pos + self.segment_start + self.num_bytes_read
103
- self._value.seek(read_start)
104
- num_bytes = min(self.chunk_size, self.remaining_bytes())
105
- chunk = await loop.run_in_executor(None, self._value.read, num_bytes)
106
-
107
- await loop.run_in_executor(None, self._md5_checksum.update, chunk)
108
- self.num_bytes_read += len(chunk)
109
- return chunk
110
-
111
- chunk = await safe_read()
112
- while chunk and self.remaining_bytes() > 0:
113
- await writer.write(chunk)
114
- self.progress_report_cb(advance=len(chunk))
115
- chunk = await safe_read()
116
- if chunk:
117
- await writer.write(chunk)
118
- self.progress_report_cb(advance=len(chunk))
119
-
120
- def remaining_bytes(self):
121
- return self.segment_length - self.num_bytes_read
41
+ # Files larger than this will be multipart uploaded. The server might request multipart upload for smaller files as
42
+ # well, but the limit will never be raised.
43
+ # TODO(dano): remove this once we stop requiring md5 for blobs
44
+ MULTIPART_UPLOAD_THRESHOLD = 1024**3
122
45
 
123
46
 
124
47
  @retry(n_attempts=5, base_delay=0.5, timeout=None)
125
48
  async def _upload_to_s3_url(
126
49
  upload_url,
127
- payload: BytesIOSegmentPayload,
50
+ payload: "BytesIOSegmentPayload",
128
51
  content_md5_b64: Optional[str] = None,
129
52
  content_type: Optional[str] = "application/octet-stream", # set to None to force omission of ContentType header
130
53
  ) -> str:
@@ -180,6 +103,8 @@ async def perform_multipart_upload(
180
103
  upload_chunk_size: int = DEFAULT_SEGMENT_CHUNK_SIZE,
181
104
  progress_report_cb: Optional[Callable] = None,
182
105
  ) -> None:
106
+ from .bytes_io_segment_payload import BytesIOSegmentPayload
107
+
183
108
  upload_coros = []
184
109
  file_offset = 0
185
110
  num_bytes_left = content_length
@@ -273,6 +198,8 @@ async def _blob_upload(
273
198
  progress_report_cb=progress_report_cb,
274
199
  )
275
200
  else:
201
+ from .bytes_io_segment_payload import BytesIOSegmentPayload
202
+
276
203
  payload = BytesIOSegmentPayload(
277
204
  data, segment_start=0, segment_length=content_length, progress_report_cb=progress_report_cb
278
205
  )
@@ -305,9 +232,13 @@ async def blob_upload(payload: bytes, stub: ModalClientModal) -> str:
305
232
 
306
233
 
307
234
  async def blob_upload_file(
308
- file_obj: BinaryIO, stub: ModalClientModal, progress_report_cb: Optional[Callable] = None
235
+ file_obj: BinaryIO,
236
+ stub: ModalClientModal,
237
+ progress_report_cb: Optional[Callable] = None,
238
+ sha256_hex: Optional[str] = None,
239
+ md5_hex: Optional[str] = None,
309
240
  ) -> str:
310
- upload_hashes = get_upload_hashes(file_obj)
241
+ upload_hashes = get_upload_hashes(file_obj, sha256_hex=sha256_hex, md5_hex=md5_hex)
311
242
  return await _blob_upload(upload_hashes, file_obj, stub, progress_report_cb)
312
243
 
313
244
 
@@ -366,6 +297,7 @@ class FileUploadSpec:
366
297
  use_blob: bool
367
298
  content: Optional[bytes] # typically None if using blob, required otherwise
368
299
  sha256_hex: str
300
+ md5_hex: str
369
301
  mode: int # file permission bits (last 12 bits of st_mode)
370
302
  size: int
371
303
 
@@ -383,13 +315,15 @@ def _get_file_upload_spec(
383
315
  fp.seek(0)
384
316
 
385
317
  if size >= LARGE_FILE_LIMIT:
318
+ # TODO(dano): remove the placeholder md5 once we stop requiring md5 for blobs
319
+ md5_hex = "baadbaadbaadbaadbaadbaadbaadbaad" if size > MULTIPART_UPLOAD_THRESHOLD else None
386
320
  use_blob = True
387
321
  content = None
388
- sha256_hex = get_sha256_hex(fp)
322
+ hashes = get_upload_hashes(fp, md5_hex=md5_hex)
389
323
  else:
390
324
  use_blob = False
391
325
  content = fp.read()
392
- sha256_hex = get_sha256_hex(content)
326
+ hashes = get_upload_hashes(content)
393
327
 
394
328
  return FileUploadSpec(
395
329
  source=source,
@@ -397,7 +331,8 @@ def _get_file_upload_spec(
397
331
  mount_filename=mount_filename.as_posix(),
398
332
  use_blob=use_blob,
399
333
  content=content,
400
- sha256_hex=sha256_hex,
334
+ sha256_hex=hashes.sha256_hex(),
335
+ md5_hex=hashes.md5_hex(),
401
336
  mode=mode & 0o7777,
402
337
  size=size,
403
338
  )
@@ -0,0 +1,97 @@
1
+ # Copyright Modal Labs 2024
2
+
3
+ import asyncio
4
+ import hashlib
5
+ from contextlib import contextmanager
6
+ from typing import BinaryIO, Callable, Optional
7
+
8
+ # Note: this module needs to import aiohttp in global scope
9
+ # This takes about 50ms and isn't needed in many cases for Modal execution
10
+ # To avoid this, we import it in local scope when needed (blob_utils.py)
11
+ from aiohttp import BytesIOPayload
12
+ from aiohttp.abc import AbstractStreamWriter
13
+
14
+ # read ~16MiB chunks by default
15
+ DEFAULT_SEGMENT_CHUNK_SIZE = 2**24
16
+
17
+
18
+ class BytesIOSegmentPayload(BytesIOPayload):
19
+ """Modified bytes payload for concurrent sends of chunks from the same file.
20
+
21
+ Adds:
22
+ * read limit using remaining_bytes, in order to split files across streams
23
+ * larger read chunk (to prevent excessive read contention between parts)
24
+ * calculates an md5 for the segment
25
+
26
+ Feels like this should be in some standard lib...
27
+ """
28
+
29
+ def __init__(
30
+ self,
31
+ bytes_io: BinaryIO, # should *not* be shared as IO position modification is not locked
32
+ segment_start: int,
33
+ segment_length: int,
34
+ chunk_size: int = DEFAULT_SEGMENT_CHUNK_SIZE,
35
+ progress_report_cb: Optional[Callable] = None,
36
+ ):
37
+ # not thread safe constructor!
38
+ super().__init__(bytes_io)
39
+ self.initial_seek_pos = bytes_io.tell()
40
+ self.segment_start = segment_start
41
+ self.segment_length = segment_length
42
+ # seek to start of file segment we are interested in, in order to make .size() evaluate correctly
43
+ self._value.seek(self.initial_seek_pos + segment_start)
44
+ assert self.segment_length <= super().size
45
+ self.chunk_size = chunk_size
46
+ self.progress_report_cb = progress_report_cb or (lambda *_, **__: None)
47
+ self.reset_state()
48
+
49
+ def reset_state(self):
50
+ self._md5_checksum = hashlib.md5()
51
+ self.num_bytes_read = 0
52
+ self._value.seek(self.initial_seek_pos)
53
+
54
+ @contextmanager
55
+ def reset_on_error(self):
56
+ try:
57
+ yield
58
+ except Exception as exc:
59
+ try:
60
+ self.progress_report_cb(reset=True)
61
+ except Exception as cb_exc:
62
+ raise cb_exc from exc
63
+ raise exc
64
+ finally:
65
+ self.reset_state()
66
+
67
+ @property
68
+ def size(self) -> int:
69
+ return self.segment_length
70
+
71
+ def md5_checksum(self):
72
+ return self._md5_checksum
73
+
74
+ async def write(self, writer: "AbstractStreamWriter"):
75
+ loop = asyncio.get_event_loop()
76
+
77
+ async def safe_read():
78
+ read_start = self.initial_seek_pos + self.segment_start + self.num_bytes_read
79
+ self._value.seek(read_start)
80
+ num_bytes = min(self.chunk_size, self.remaining_bytes())
81
+ chunk = await loop.run_in_executor(None, self._value.read, num_bytes)
82
+
83
+ await loop.run_in_executor(None, self._md5_checksum.update, chunk)
84
+ self.num_bytes_read += len(chunk)
85
+ return chunk
86
+
87
+ chunk = await safe_read()
88
+ while chunk and self.remaining_bytes() > 0:
89
+ await writer.write(chunk)
90
+ self.progress_report_cb(advance=len(chunk))
91
+ chunk = await safe_read()
92
+ if chunk:
93
+ await writer.write(chunk)
94
+ self.progress_report_cb(advance=len(chunk))
95
+
96
+ def remaining_bytes(self):
97
+ return self.segment_length - self.num_bytes_read
@@ -0,0 +1,44 @@
1
+ # Copyright Modal Labs 2024
2
+ import sys
3
+ import warnings
4
+ from datetime import date
5
+
6
+ from ..exception import DeprecationError, PendingDeprecationError
7
+
8
+ _INTERNAL_MODULES = ["modal", "synchronicity"]
9
+
10
+
11
+ def _is_internal_frame(frame):
12
+ module = frame.f_globals["__name__"].split(".")[0]
13
+ return module in _INTERNAL_MODULES
14
+
15
+
16
+ def deprecation_error(deprecated_on: tuple[int, int, int], msg: str):
17
+ raise DeprecationError(f"Deprecated on {date(*deprecated_on)}: {msg}")
18
+
19
+
20
+ def deprecation_warning(
21
+ deprecated_on: tuple[int, int, int], msg: str, *, pending: bool = False, show_source: bool = True
22
+ ) -> None:
23
+ """Issue a Modal deprecation warning with source optionally attributed to user code.
24
+
25
+ See the implementation of the built-in [warnings.warn](https://docs.python.org/3/library/warnings.html#available-functions).
26
+ """
27
+ filename, lineno = "<unknown>", 0
28
+ if show_source:
29
+ # Find the last non-Modal line that triggered the warning
30
+ try:
31
+ frame = sys._getframe()
32
+ while frame is not None and _is_internal_frame(frame):
33
+ frame = frame.f_back
34
+ if frame is not None:
35
+ filename = frame.f_code.co_filename
36
+ lineno = frame.f_lineno
37
+ except ValueError:
38
+ # Use the defaults from above
39
+ pass
40
+
41
+ warning_cls = PendingDeprecationError if pending else DeprecationError
42
+
43
+ # This is a lower-level function that warnings.warn uses
44
+ warnings.warn_explicit(f"{date(*deprecated_on)}: {msg}", warning_cls, filename, lineno)
@@ -3,14 +3,14 @@ import base64
3
3
  import dataclasses
4
4
  import hashlib
5
5
  import time
6
- from typing import BinaryIO, Callable, Union
6
+ from typing import BinaryIO, Callable, Optional, Sequence, Union
7
7
 
8
8
  from modal.config import logger
9
9
 
10
10
  HASH_CHUNK_SIZE = 65536
11
11
 
12
12
 
13
- def _update(hashers: list[Callable[[bytes], None]], data: Union[bytes, BinaryIO]) -> None:
13
+ def _update(hashers: Sequence[Callable[[bytes], None]], data: Union[bytes, BinaryIO]) -> None:
14
14
  if isinstance(data, bytes):
15
15
  for hasher in hashers:
16
16
  hasher(data)
@@ -57,15 +57,44 @@ class UploadHashes:
57
57
  md5_base64: str
58
58
  sha256_base64: str
59
59
 
60
+ def md5_hex(self) -> str:
61
+ return base64.b64decode(self.md5_base64).hex()
60
62
 
61
- def get_upload_hashes(data: Union[bytes, BinaryIO]) -> UploadHashes:
63
+ def sha256_hex(self) -> str:
64
+ return base64.b64decode(self.sha256_base64).hex()
65
+
66
+
67
+ def get_upload_hashes(
68
+ data: Union[bytes, BinaryIO], sha256_hex: Optional[str] = None, md5_hex: Optional[str] = None
69
+ ) -> UploadHashes:
62
70
  t0 = time.monotonic()
63
- md5 = hashlib.md5()
64
- sha256 = hashlib.sha256()
65
- _update([md5.update, sha256.update], data)
71
+ hashers = {}
72
+
73
+ if not sha256_hex:
74
+ sha256 = hashlib.sha256()
75
+ hashers["sha256"] = sha256
76
+ if not md5_hex:
77
+ md5 = hashlib.md5()
78
+ hashers["md5"] = md5
79
+
80
+ if hashers:
81
+ updaters = [h.update for h in hashers.values()]
82
+ _update(updaters, data)
83
+
84
+ if sha256_hex:
85
+ sha256_base64 = base64.b64encode(bytes.fromhex(sha256_hex)).decode("ascii")
86
+ else:
87
+ sha256_base64 = base64.b64encode(hashers["sha256"].digest()).decode("ascii")
88
+
89
+ if md5_hex:
90
+ md5_base64 = base64.b64encode(bytes.fromhex(md5_hex)).decode("ascii")
91
+ else:
92
+ md5_base64 = base64.b64encode(hashers["md5"].digest()).decode("ascii")
93
+
66
94
  hashes = UploadHashes(
67
- md5_base64=base64.b64encode(md5.digest()).decode("ascii"),
68
- sha256_base64=base64.b64encode(sha256.digest()).decode("ascii"),
95
+ md5_base64=md5_base64,
96
+ sha256_base64=sha256_base64,
69
97
  )
70
- logger.debug("get_upload_hashes took %.3fs", time.monotonic() - t0)
98
+
99
+ logger.debug("get_upload_hashes took %.3fs (%s)", time.monotonic() - t0, hashers.keys())
71
100
  return hashes
@@ -1,18 +1,18 @@
1
1
  # Copyright Modal Labs 2022
2
2
  import contextlib
3
- import socket
4
- import ssl
5
- from typing import Optional
3
+ from typing import TYPE_CHECKING, Optional
6
4
 
7
- import certifi
8
- from aiohttp import ClientSession, ClientTimeout, TCPConnector
9
- from aiohttp.web import Application
10
- from aiohttp.web_runner import AppRunner, SockSite
5
+ # Note: importing aiohttp seems to take about 100ms, and it's not really necessarily,
6
+ # unless we need to work with blobs. So that's why we import it lazily instead.
7
+
8
+ if TYPE_CHECKING:
9
+ from aiohttp import ClientSession
10
+ from aiohttp.web import Application
11
11
 
12
12
  from .async_utils import on_shutdown
13
13
 
14
14
 
15
- def _http_client_with_tls(timeout: Optional[float]) -> ClientSession:
15
+ def _http_client_with_tls(timeout: Optional[float]) -> "ClientSession":
16
16
  """Create a new HTTP client session with standard, bundled TLS certificates.
17
17
 
18
18
  This is necessary to prevent client issues on some system where Python does
@@ -22,13 +22,18 @@ def _http_client_with_tls(timeout: Optional[float]) -> ClientSession:
22
22
  Specifically: the error "unable to get local issuer certificate" when making
23
23
  an aiohttp request.
24
24
  """
25
+ import ssl
26
+
27
+ import certifi
28
+ from aiohttp import ClientSession, ClientTimeout, TCPConnector
29
+
25
30
  ssl_context = ssl.create_default_context(cafile=certifi.where())
26
31
  connector = TCPConnector(ssl=ssl_context)
27
32
  return ClientSession(connector=connector, timeout=ClientTimeout(total=timeout))
28
33
 
29
34
 
30
35
  class ClientSessionRegistry:
31
- _client_session: ClientSession
36
+ _client_session: "ClientSession"
32
37
  _client_session_active: bool = False
33
38
 
34
39
  @staticmethod
@@ -47,9 +52,13 @@ class ClientSessionRegistry:
47
52
 
48
53
 
49
54
  @contextlib.asynccontextmanager
50
- async def run_temporary_http_server(app: Application):
55
+ async def run_temporary_http_server(app: "Application"):
51
56
  # Allocates a random port, runs a server in a context manager
52
57
  # This is used in various tests
58
+ import socket
59
+
60
+ from aiohttp.web_runner import AppRunner, SockSite
61
+
53
62
  sock = socket.socket()
54
63
  sock.bind(("", 0))
55
64
  port = sock.getsockname()[1]
@@ -5,7 +5,7 @@ This is the same pattern-matching logic used by Docker, except it is written in
5
5
  Python rather than Go. Also, the original Go library has a couple deprecated
6
6
  functions that we don't implement in this port.
7
7
 
8
- The main way to use this library is by constructing a `PatternMatcher` object,
8
+ The main way to use this library is by constructing a `FilePatternMatcher` object,
9
9
  then asking it whether file paths match any of its patterns.
10
10
  """
11
11
 
@@ -148,75 +148,6 @@ class Pattern:
148
148
  return False
149
149
 
150
150
 
151
- class PatternMatcher:
152
- """Allows checking paths against a list of patterns."""
153
-
154
- def __init__(self, patterns: list[str]) -> None:
155
- """Initialize a new PatternMatcher instance.
156
-
157
- Args:
158
- patterns (list): A list of pattern strings.
159
-
160
- Raises:
161
- ValueError: If an illegal exclusion pattern is provided.
162
- """
163
- self.patterns: list[Pattern] = []
164
- self.exclusions = False
165
- for pattern in patterns:
166
- pattern = pattern.strip()
167
- if not pattern:
168
- continue
169
- pattern = os.path.normpath(pattern)
170
- new_pattern = Pattern()
171
- if pattern[0] == "!":
172
- if len(pattern) == 1:
173
- raise ValueError('Illegal exclusion pattern: "!"')
174
- new_pattern.exclusion = True
175
- pattern = pattern[1:]
176
- self.exclusions = True
177
- # In Python, we can proceed without explicit syntax checking
178
- new_pattern.cleaned_pattern = pattern
179
- new_pattern.dirs = pattern.split(os.path.sep)
180
- self.patterns.append(new_pattern)
181
-
182
- def matches(self, file_path: str) -> bool:
183
- """Check if the file path or any of its parent directories match the patterns.
184
-
185
- This is equivalent to `MatchesOrParentMatches()` in the original Go
186
- library. The reason is that `Matches()` in the original library is
187
- deprecated due to buggy behavior.
188
- """
189
- matched = False
190
- file_path = os.path.normpath(file_path)
191
- if file_path == ".":
192
- # Don't let them exclude everything; kind of silly.
193
- return False
194
- parent_path = os.path.dirname(file_path)
195
- if parent_path == "":
196
- parent_path = "."
197
- parent_path_dirs = parent_path.split(os.path.sep)
198
-
199
- for pattern in self.patterns:
200
- # Skip evaluation based on current match status and pattern exclusion
201
- if pattern.exclusion != matched:
202
- continue
203
-
204
- match = pattern.match(file_path)
205
-
206
- if not match and parent_path != ".":
207
- # Check if the pattern matches any of the parent directories
208
- for i in range(len(parent_path_dirs)):
209
- dir_path = os.path.sep.join(parent_path_dirs[: i + 1])
210
- if pattern.match(dir_path):
211
- match = True
212
- break
213
-
214
- if match:
215
- matched = not pattern.exclusion
216
-
217
- return matched
218
-
219
-
220
151
  def read_ignorefile(reader: TextIO) -> list[str]:
221
152
  """Read an ignore file from a reader and return the list of file patterns to
222
153
  ignore, applying the following rules: