modal 1.0.6.dev8__py3-none-any.whl → 1.0.6.dev15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of modal might be problematic. Click here for more details.

@@ -15,7 +15,6 @@ if telemetry_socket:
15
15
  instrument_imports(telemetry_socket)
16
16
 
17
17
  import asyncio
18
- import concurrent.futures
19
18
  import inspect
20
19
  import queue
21
20
  import signal
@@ -49,7 +48,6 @@ from ._runtime.container_io_manager import (
49
48
  ContainerIOManager,
50
49
  IOContext,
51
50
  UserException,
52
- _ContainerIOManager,
53
51
  )
54
52
 
55
53
  if TYPE_CHECKING:
@@ -198,21 +196,16 @@ def call_function(
198
196
 
199
197
  # Send up to this many outputs at a time.
200
198
  generator_queue: asyncio.Queue[Any] = await container_io_manager._queue_create.aio(1024)
201
- generator_output_task = asyncio.create_task(
202
- container_io_manager.generator_output_task.aio(
203
- function_call_ids[0],
204
- io_context.finalized_function.data_format,
205
- generator_queue,
206
- )
207
- )
208
-
209
- item_count = 0
210
- async for value in res:
211
- await container_io_manager._queue_put.aio(generator_queue, value)
212
- item_count += 1
199
+ async with container_io_manager.generator_output_sender(
200
+ function_call_ids[0],
201
+ io_context.finalized_function.data_format,
202
+ generator_queue,
203
+ ):
204
+ item_count = 0
205
+ async for value in res:
206
+ await container_io_manager._queue_put.aio(generator_queue, value)
207
+ item_count += 1
213
208
 
214
- await container_io_manager._queue_put.aio(generator_queue, _ContainerIOManager._GENERATOR_STOP_SENTINEL)
215
- await generator_output_task # Wait to finish sending generator outputs.
216
209
  message = api_pb2.GeneratorDone(items_total=item_count)
217
210
  await container_io_manager.push_outputs.aio(
218
211
  io_context,
@@ -249,20 +242,17 @@ def call_function(
249
242
 
250
243
  # Send up to this many outputs at a time.
251
244
  generator_queue: asyncio.Queue[Any] = container_io_manager._queue_create(1024)
252
- generator_output_task: concurrent.futures.Future = container_io_manager.generator_output_task( # type: ignore
245
+
246
+ with container_io_manager.generator_output_sender(
253
247
  function_call_ids[0],
254
248
  io_context.finalized_function.data_format,
255
249
  generator_queue,
256
- _future=True, # type: ignore # Synchronicity magic to return a future.
257
- )
258
-
259
- item_count = 0
260
- for value in res:
261
- container_io_manager._queue_put(generator_queue, value)
262
- item_count += 1
250
+ ):
251
+ item_count = 0
252
+ for value in res:
253
+ container_io_manager._queue_put(generator_queue, value)
254
+ item_count += 1
263
255
 
264
- container_io_manager._queue_put(generator_queue, _ContainerIOManager._GENERATOR_STOP_SENTINEL)
265
- generator_output_task.result() # Wait to finish sending generator outputs.
266
256
  message = api_pb2.GeneratorDone(items_total=item_count)
267
257
  container_io_manager.push_outputs(io_context, started_at, message, api_pb2.DATA_FORMAT_GENERATOR_DONE)
268
258
  else:
modal/_functions.py CHANGED
@@ -40,6 +40,7 @@ from ._utils.async_utils import (
40
40
  synchronizer,
41
41
  warn_if_generator_is_not_consumed,
42
42
  )
43
+ from ._utils.blob_utils import MAX_OBJECT_SIZE_BYTES
43
44
  from ._utils.deprecation import deprecation_warning, warn_if_passing_namespace
44
45
  from ._utils.function_utils import (
45
46
  ATTEMPT_TIMEOUT_GRACE_PERIOD,
@@ -145,6 +146,7 @@ class _Invocation:
145
146
  args,
146
147
  kwargs,
147
148
  stub,
149
+ max_object_size_bytes=function._max_object_size_bytes,
148
150
  method_name=function._use_method_name,
149
151
  function_call_invocation_type=function_call_invocation_type,
150
152
  )
@@ -386,7 +388,13 @@ class _InputPlaneInvocation:
386
388
  function_id = function.object_id
387
389
  control_plane_stub = client.stub
388
390
  # Note: Blob upload is done on the control plane stub, not the input plane stub!
389
- input_item = await _create_input(args, kwargs, control_plane_stub, method_name=function._use_method_name)
391
+ input_item = await _create_input(
392
+ args,
393
+ kwargs,
394
+ control_plane_stub,
395
+ max_object_size_bytes=function._max_object_size_bytes,
396
+ method_name=function._use_method_name,
397
+ )
390
398
 
391
399
  request = api_pb2.AttemptStartRequest(
392
400
  function_id=function_id,
@@ -443,8 +451,10 @@ class _InputPlaneInvocation:
443
451
  self.attempt_token = retry_response.attempt_token
444
452
  continue
445
453
 
454
+ control_plane_stub = self.client.stub
455
+ # Note: Blob download is done on the control plane stub, not the input plane stub!
446
456
  return await _process_result(
447
- await_response.output.result, await_response.output.data_format, self.stub, self.client
457
+ await_response.output.result, await_response.output.data_format, control_plane_stub, self.client
448
458
  )
449
459
 
450
460
 
@@ -1414,6 +1424,15 @@ class _Function(typing.Generic[P, ReturnType, OriginalReturnType], _Object, type
1414
1424
  self._definition_id = metadata.definition_id
1415
1425
  self._input_plane_url = metadata.input_plane_url
1416
1426
  self._input_plane_region = metadata.input_plane_region
1427
+ # The server may pass back a larger max object size for some input plane users. This applies to input plane
1428
+ # users only - anyone using the control plane will get the standard limit.
1429
+ # There are some cases like FunctionPrecreate where this value is not set at all. We expect that this field
1430
+ # will eventually be hydrated with the correct value, but just to be defensive, if the field is not set we use
1431
+ # MAX_OBJECT_SIZE_BYTES, otherwise it would get set to 0. Accidentally using 0 would cause us to blob upload
1432
+ # everything, so let's avoid that.
1433
+ self._max_object_size_bytes = (
1434
+ metadata.max_object_size_bytes if metadata.HasField("max_object_size_bytes") else MAX_OBJECT_SIZE_BYTES
1435
+ )
1417
1436
 
1418
1437
  def _get_metadata(self):
1419
1438
  # Overridden concrete implementation of base class method
@@ -1430,6 +1449,7 @@ class _Function(typing.Generic[P, ReturnType, OriginalReturnType], _Object, type
1430
1449
  function_schema=self._metadata.function_schema if self._metadata else None,
1431
1450
  input_plane_url=self._input_plane_url,
1432
1451
  input_plane_region=self._input_plane_region,
1452
+ max_object_size_bytes=self._max_object_size_bytes,
1433
1453
  )
1434
1454
 
1435
1455
  def _check_no_web_url(self, fn_name: str):
@@ -290,7 +290,6 @@ class _ContainerIOManager:
290
290
 
291
291
  _client: _Client
292
292
 
293
- _GENERATOR_STOP_SENTINEL: ClassVar[Sentinel] = Sentinel()
294
293
  _singleton: ClassVar[Optional["_ContainerIOManager"]] = None
295
294
 
296
295
  def _init(self, container_args: api_pb2.ContainerArguments, client: _Client):
@@ -508,33 +507,47 @@ class _ContainerIOManager:
508
507
  req = api_pb2.FunctionCallPutDataRequest(function_call_id=function_call_id, data_chunks=data_chunks)
509
508
  await retry_transient_errors(self._client.stub.FunctionCallPutDataOut, req)
510
509
 
511
- async def generator_output_task(self, function_call_id: str, data_format: int, message_rx: asyncio.Queue) -> None:
512
- """Task that feeds generator outputs into a function call's `data_out` stream."""
513
- index = 1
514
- received_sentinel = False
515
- while not received_sentinel:
516
- message = await message_rx.get()
517
- if message is self._GENERATOR_STOP_SENTINEL:
518
- break
519
- # ASGI 'http.response.start' and 'http.response.body' msgs are observed to be separated by 1ms.
520
- # If we don't sleep here for 1ms we end up with an extra call to .put_data_out().
521
- if index == 1:
522
- await asyncio.sleep(0.001)
523
- serialized_messages = [serialize_data_format(message, data_format)]
524
- total_size = len(serialized_messages[0]) + 512
525
- while total_size < 16 * 1024 * 1024: # 16 MiB, maximum size in a single message
526
- try:
527
- message = message_rx.get_nowait()
528
- except asyncio.QueueEmpty:
529
- break
530
- if message is self._GENERATOR_STOP_SENTINEL:
531
- received_sentinel = True
510
+ @asynccontextmanager
511
+ async def generator_output_sender(
512
+ self, function_call_id: str, data_format: int, message_rx: asyncio.Queue
513
+ ) -> AsyncGenerator[None, None]:
514
+ """Runs background task that feeds generator outputs into a function call's `data_out` stream."""
515
+ GENERATOR_STOP_SENTINEL = Sentinel()
516
+
517
+ async def generator_output_task():
518
+ index = 1
519
+ received_sentinel = False
520
+ while not received_sentinel:
521
+ message = await message_rx.get()
522
+ if message is GENERATOR_STOP_SENTINEL:
532
523
  break
533
- else:
534
- serialized_messages.append(serialize_data_format(message, data_format))
535
- total_size += len(serialized_messages[-1]) + 512 # 512 bytes for estimated framing overhead
536
- await self.put_data_out(function_call_id, index, data_format, serialized_messages)
537
- index += len(serialized_messages)
524
+ # ASGI 'http.response.start' and 'http.response.body' msgs are observed to be separated by 1ms.
525
+ # If we don't sleep here for 1ms we end up with an extra call to .put_data_out().
526
+ if index == 1:
527
+ await asyncio.sleep(0.001)
528
+ serialized_messages = [serialize_data_format(message, data_format)]
529
+ total_size = len(serialized_messages[0]) + 512
530
+ while total_size < 16 * 1024 * 1024: # 16 MiB, maximum size in a single message
531
+ try:
532
+ message = message_rx.get_nowait()
533
+ except asyncio.QueueEmpty:
534
+ break
535
+ if message is GENERATOR_STOP_SENTINEL:
536
+ received_sentinel = True
537
+ break
538
+ else:
539
+ serialized_messages.append(serialize_data_format(message, data_format))
540
+ total_size += len(serialized_messages[-1]) + 512 # 512 bytes for estimated framing overhead
541
+ await self.put_data_out(function_call_id, index, data_format, serialized_messages)
542
+ index += len(serialized_messages)
543
+
544
+ task = asyncio.create_task(generator_output_task())
545
+ try:
546
+ yield
547
+ finally:
548
+ # gracefully stop the task after all current inputs have been sent
549
+ await message_rx.put(GENERATOR_STOP_SENTINEL)
550
+ await task
538
551
 
539
552
  async def _queue_create(self, size: int) -> asyncio.Queue:
540
553
  """Create a queue, on the synchronicity event loop (needed on Python 3.8 and 3.9)."""
@@ -106,7 +106,6 @@ class _ContainerIOManager:
106
106
  _is_interactivity_enabled: bool
107
107
  _fetching_inputs: bool
108
108
  _client: modal.client._Client
109
- _GENERATOR_STOP_SENTINEL: typing.ClassVar[Sentinel]
110
109
  _singleton: typing.ClassVar[typing.Optional[_ContainerIOManager]]
111
110
 
112
111
  def _init(self, container_args: modal_proto.api_pb2.ContainerArguments, client: modal.client._Client): ...
@@ -148,10 +147,10 @@ class _ContainerIOManager:
148
147
  """
149
148
  ...
150
149
 
151
- async def generator_output_task(
150
+ def generator_output_sender(
152
151
  self, function_call_id: str, data_format: int, message_rx: asyncio.queues.Queue
153
- ) -> None:
154
- """Task that feeds generator outputs into a function call's `data_out` stream."""
152
+ ) -> typing.AsyncContextManager[None]:
153
+ """Runs background task that feeds generator outputs into a function call's `data_out` stream."""
155
154
  ...
156
155
 
157
156
  async def _queue_create(self, size: int) -> asyncio.queues.Queue:
@@ -268,7 +267,6 @@ class ContainerIOManager:
268
267
  _is_interactivity_enabled: bool
269
268
  _fetching_inputs: bool
270
269
  _client: modal.client.Client
271
- _GENERATOR_STOP_SENTINEL: typing.ClassVar[Sentinel]
272
270
  _singleton: typing.ClassVar[typing.Optional[ContainerIOManager]]
273
271
 
274
272
  def __init__(self, /, *args, **kwargs):
@@ -367,16 +365,20 @@ class ContainerIOManager:
367
365
 
368
366
  put_data_out: __put_data_out_spec[typing_extensions.Self]
369
367
 
370
- class __generator_output_task_spec(typing_extensions.Protocol[SUPERSELF]):
371
- def __call__(self, /, function_call_id: str, data_format: int, message_rx: asyncio.queues.Queue) -> None:
372
- """Task that feeds generator outputs into a function call's `data_out` stream."""
368
+ class __generator_output_sender_spec(typing_extensions.Protocol[SUPERSELF]):
369
+ def __call__(
370
+ self, /, function_call_id: str, data_format: int, message_rx: asyncio.queues.Queue
371
+ ) -> synchronicity.combined_types.AsyncAndBlockingContextManager[None]:
372
+ """Runs background task that feeds generator outputs into a function call's `data_out` stream."""
373
373
  ...
374
374
 
375
- async def aio(self, /, function_call_id: str, data_format: int, message_rx: asyncio.queues.Queue) -> None:
376
- """Task that feeds generator outputs into a function call's `data_out` stream."""
375
+ def aio(
376
+ self, /, function_call_id: str, data_format: int, message_rx: asyncio.queues.Queue
377
+ ) -> typing.AsyncContextManager[None]:
378
+ """Runs background task that feeds generator outputs into a function call's `data_out` stream."""
377
379
  ...
378
380
 
379
- generator_output_task: __generator_output_task_spec[typing_extensions.Self]
381
+ generator_output_sender: __generator_output_sender_spec[typing_extensions.Self]
380
382
 
381
383
  class ___queue_create_spec(typing_extensions.Protocol[SUPERSELF]):
382
384
  def __call__(self, /, size: int) -> asyncio.queues.Queue:
@@ -188,16 +188,23 @@ def get_content_length(data: BinaryIO) -> int:
188
188
  return content_length - pos
189
189
 
190
190
 
191
- async def _blob_upload_with_fallback(items, blob_ids, callback):
191
+ async def _blob_upload_with_fallback(items, blob_ids: list[str], callback) -> tuple[str, bool, int]:
192
+ r2_latency_ms = 0
193
+ r2_failed = False
192
194
  for idx, (item, blob_id) in enumerate(zip(items, blob_ids)):
193
195
  # We want to default to R2 95% of the time and S3 5% of the time.
194
196
  # To ensure the failure path is continuously exercised.
195
197
  if idx == 0 and len(items) > 1 and random.random() > HEALTHY_R2_UPLOAD_PERCENTAGE:
196
198
  continue
197
199
  try:
200
+ init_time = time.monotonic_ns()
198
201
  await callback(item)
199
- return blob_id
202
+ if blob_id.endswith(":r2"):
203
+ r2_latency_ms = (time.monotonic_ns() - init_time) // 1_000_000
204
+ return blob_id, r2_failed, r2_latency_ms
200
205
  except Exception as _:
206
+ if blob_id.endswith(":r2"):
207
+ r2_failed = True
201
208
  # Ignore all errors except the last one, since we're out of fallback options.
202
209
  if idx == len(items) - 1:
203
210
  raise
@@ -206,7 +213,7 @@ async def _blob_upload_with_fallback(items, blob_ids, callback):
206
213
 
207
214
  async def _blob_upload(
208
215
  upload_hashes: UploadHashes, data: Union[bytes, BinaryIO], stub, progress_report_cb: Optional[Callable] = None
209
- ) -> str:
216
+ ) -> tuple[str, bool, int]:
210
217
  if isinstance(data, bytes):
211
218
  data = BytesIO(data)
212
219
 
@@ -232,7 +239,7 @@ async def _blob_upload(
232
239
  progress_report_cb=progress_report_cb,
233
240
  )
234
241
 
235
- blob_id = await _blob_upload_with_fallback(
242
+ blob_id, r2_failed, r2_latency_ms = await _blob_upload_with_fallback(
236
243
  resp.multiparts.items,
237
244
  resp.blob_ids,
238
245
  upload_multipart_upload,
@@ -252,7 +259,7 @@ async def _blob_upload(
252
259
  content_md5_b64=upload_hashes.md5_base64,
253
260
  )
254
261
 
255
- blob_id = await _blob_upload_with_fallback(
262
+ blob_id, r2_failed, r2_latency_ms = await _blob_upload_with_fallback(
256
263
  resp.upload_urls.items,
257
264
  resp.blob_ids,
258
265
  upload_to_s3_url,
@@ -261,10 +268,10 @@ async def _blob_upload(
261
268
  if progress_report_cb:
262
269
  progress_report_cb(complete=True)
263
270
 
264
- return blob_id
271
+ return blob_id, r2_failed, r2_latency_ms
265
272
 
266
273
 
267
- async def blob_upload(payload: bytes, stub: ModalClientModal) -> str:
274
+ async def blob_upload_with_r2_failure_info(payload: bytes, stub: ModalClientModal) -> tuple[str, bool, int]:
268
275
  size_mib = len(payload) / 1024 / 1024
269
276
  logger.debug(f"Uploading large blob of size {size_mib:.2f} MiB")
270
277
  t0 = time.time()
@@ -272,12 +279,17 @@ async def blob_upload(payload: bytes, stub: ModalClientModal) -> str:
272
279
  logger.warning("Blob uploading string, not bytes - auto-encoding as utf8")
273
280
  payload = payload.encode("utf8")
274
281
  upload_hashes = get_upload_hashes(payload)
275
- blob_id = await _blob_upload(upload_hashes, payload, stub)
282
+ blob_id, r2_failed, r2_latency_ms = await _blob_upload(upload_hashes, payload, stub)
276
283
  dur_s = max(time.time() - t0, 0.001) # avoid division by zero
277
284
  throughput_mib_s = (size_mib) / dur_s
278
285
  logger.debug(
279
286
  f"Uploaded large blob of size {size_mib:.2f} MiB ({throughput_mib_s:.2f} MiB/s, total {dur_s:.2f}s). {blob_id}"
280
287
  )
288
+ return blob_id, r2_failed, r2_latency_ms
289
+
290
+
291
+ async def blob_upload(payload: bytes, stub: ModalClientModal) -> str:
292
+ blob_id, _, _ = await blob_upload_with_r2_failure_info(payload, stub)
281
293
  return blob_id
282
294
 
283
295
 
@@ -289,7 +301,8 @@ async def blob_upload_file(
289
301
  md5_hex: Optional[str] = None,
290
302
  ) -> str:
291
303
  upload_hashes = get_upload_hashes(file_obj, sha256_hex=sha256_hex, md5_hex=md5_hex)
292
- return await _blob_upload(upload_hashes, file_obj, stub, progress_report_cb)
304
+ blob_id, _, _ = await _blob_upload(upload_hashes, file_obj, stub, progress_report_cb)
305
+ return blob_id
293
306
 
294
307
 
295
308
  @retry(n_attempts=5, base_delay=0.1, timeout=None)
@@ -32,7 +32,11 @@ from ..exception import (
32
32
  RemoteError,
33
33
  )
34
34
  from ..mount import ROOT_DIR, _is_modal_path, _Mount
35
- from .blob_utils import MAX_ASYNC_OBJECT_SIZE_BYTES, MAX_OBJECT_SIZE_BYTES, blob_download, blob_upload
35
+ from .blob_utils import (
36
+ MAX_ASYNC_OBJECT_SIZE_BYTES,
37
+ blob_download,
38
+ blob_upload_with_r2_failure_info,
39
+ )
36
40
  from .grpc_utils import RETRYABLE_GRPC_STATUS_CODES
37
41
 
38
42
 
@@ -513,12 +517,13 @@ async def _process_result(result: api_pb2.GenericResult, data_format: int, stub,
513
517
 
514
518
  def should_upload(
515
519
  num_bytes: int,
520
+ max_object_size_bytes: int,
516
521
  function_call_invocation_type: Optional["api_pb2.FunctionCallInvocationType.ValueType"],
517
522
  ) -> bool:
518
523
  """
519
524
  Determine if the input should be uploaded to blob storage.
520
525
  """
521
- return num_bytes > MAX_OBJECT_SIZE_BYTES or (
526
+ return num_bytes > max_object_size_bytes or (
522
527
  function_call_invocation_type == api_pb2.FUNCTION_CALL_INVOCATION_TYPE_ASYNC
523
528
  and num_bytes > MAX_ASYNC_OBJECT_SIZE_BYTES
524
529
  )
@@ -529,6 +534,7 @@ async def _create_input(
529
534
  kwargs,
530
535
  stub: ModalClientModal,
531
536
  *,
537
+ max_object_size_bytes: int,
532
538
  idx: Optional[int] = None,
533
539
  method_name: Optional[str] = None,
534
540
  function_call_invocation_type: Optional["api_pb2.FunctionCallInvocationType.ValueType"] = None,
@@ -543,8 +549,8 @@ async def _create_input(
543
549
 
544
550
  args_serialized = serialize((args, kwargs))
545
551
 
546
- if should_upload(len(args_serialized), function_call_invocation_type):
547
- args_blob_id = await blob_upload(args_serialized, stub)
552
+ if should_upload(len(args_serialized), max_object_size_bytes, function_call_invocation_type):
553
+ args_blob_id, r2_failed, r2_latency_ms = await blob_upload_with_r2_failure_info(args_serialized, stub)
548
554
  return api_pb2.FunctionPutInputsItem(
549
555
  input=api_pb2.FunctionInput(
550
556
  args_blob_id=args_blob_id,
@@ -552,6 +558,8 @@ async def _create_input(
552
558
  method_name=method_name,
553
559
  ),
554
560
  idx=idx,
561
+ r2_failed=r2_failed,
562
+ r2_latency_ms=r2_latency_ms,
555
563
  )
556
564
  else:
557
565
  return api_pb2.FunctionPutInputsItem(
modal/client.pyi CHANGED
@@ -31,7 +31,7 @@ class _Client:
31
31
  server_url: str,
32
32
  client_type: int,
33
33
  credentials: typing.Optional[tuple[str, str]],
34
- version: str = "1.0.6.dev8",
34
+ version: str = "1.0.6.dev15",
35
35
  ):
36
36
  """mdmd:hidden
37
37
  The Modal client object is not intended to be instantiated directly by users.
@@ -160,7 +160,7 @@ class Client:
160
160
  server_url: str,
161
161
  client_type: int,
162
162
  credentials: typing.Optional[tuple[str, str]],
163
- version: str = "1.0.6.dev8",
163
+ version: str = "1.0.6.dev15",
164
164
  ):
165
165
  """mdmd:hidden
166
166
  The Modal client object is not intended to be instantiated directly by users.