modal 1.1.5.dev66__py3-none-any.whl → 1.3.1.dev8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of modal might be problematic. Click here for more details.

Files changed (143) hide show
  1. modal/__init__.py +4 -4
  2. modal/__main__.py +4 -29
  3. modal/_billing.py +84 -0
  4. modal/_clustered_functions.py +1 -3
  5. modal/_container_entrypoint.py +33 -208
  6. modal/_functions.py +171 -138
  7. modal/_grpc_client.py +191 -0
  8. modal/_ipython.py +16 -6
  9. modal/_load_context.py +106 -0
  10. modal/_object.py +72 -21
  11. modal/_output.py +12 -14
  12. modal/_partial_function.py +31 -4
  13. modal/_resolver.py +44 -57
  14. modal/_runtime/container_io_manager.py +30 -28
  15. modal/_runtime/container_io_manager.pyi +42 -44
  16. modal/_runtime/gpu_memory_snapshot.py +9 -7
  17. modal/_runtime/user_code_event_loop.py +80 -0
  18. modal/_runtime/user_code_imports.py +236 -10
  19. modal/_serialization.py +2 -1
  20. modal/_traceback.py +4 -13
  21. modal/_tunnel.py +16 -11
  22. modal/_tunnel.pyi +25 -3
  23. modal/_utils/async_utils.py +337 -10
  24. modal/_utils/auth_token_manager.py +1 -4
  25. modal/_utils/blob_utils.py +29 -22
  26. modal/_utils/function_utils.py +20 -21
  27. modal/_utils/grpc_testing.py +6 -3
  28. modal/_utils/grpc_utils.py +223 -64
  29. modal/_utils/mount_utils.py +26 -1
  30. modal/_utils/name_utils.py +2 -3
  31. modal/_utils/package_utils.py +0 -1
  32. modal/_utils/rand_pb_testing.py +8 -1
  33. modal/_utils/task_command_router_client.py +524 -0
  34. modal/_vendor/cloudpickle.py +144 -48
  35. modal/app.py +285 -105
  36. modal/app.pyi +216 -53
  37. modal/billing.py +5 -0
  38. modal/builder/2025.06.txt +6 -3
  39. modal/builder/PREVIEW.txt +2 -1
  40. modal/builder/base-images.json +4 -2
  41. modal/cli/_download.py +19 -3
  42. modal/cli/cluster.py +4 -2
  43. modal/cli/config.py +3 -1
  44. modal/cli/container.py +5 -4
  45. modal/cli/dict.py +5 -2
  46. modal/cli/entry_point.py +26 -2
  47. modal/cli/environment.py +2 -16
  48. modal/cli/launch.py +1 -76
  49. modal/cli/network_file_system.py +5 -20
  50. modal/cli/programs/run_jupyter.py +1 -1
  51. modal/cli/programs/vscode.py +1 -1
  52. modal/cli/queues.py +5 -4
  53. modal/cli/run.py +24 -204
  54. modal/cli/secret.py +1 -2
  55. modal/cli/shell.py +375 -0
  56. modal/cli/utils.py +1 -13
  57. modal/cli/volume.py +11 -17
  58. modal/client.py +16 -125
  59. modal/client.pyi +94 -144
  60. modal/cloud_bucket_mount.py +3 -1
  61. modal/cloud_bucket_mount.pyi +4 -0
  62. modal/cls.py +101 -64
  63. modal/cls.pyi +9 -8
  64. modal/config.py +21 -1
  65. modal/container_process.py +288 -12
  66. modal/container_process.pyi +99 -38
  67. modal/dict.py +72 -33
  68. modal/dict.pyi +88 -57
  69. modal/environments.py +16 -8
  70. modal/environments.pyi +6 -2
  71. modal/exception.py +154 -16
  72. modal/experimental/__init__.py +24 -53
  73. modal/experimental/flash.py +161 -74
  74. modal/experimental/flash.pyi +97 -49
  75. modal/file_io.py +50 -92
  76. modal/file_io.pyi +117 -89
  77. modal/functions.pyi +70 -87
  78. modal/image.py +82 -47
  79. modal/image.pyi +51 -30
  80. modal/io_streams.py +500 -149
  81. modal/io_streams.pyi +279 -189
  82. modal/mount.py +60 -46
  83. modal/mount.pyi +41 -17
  84. modal/network_file_system.py +19 -11
  85. modal/network_file_system.pyi +72 -39
  86. modal/object.pyi +114 -22
  87. modal/parallel_map.py +42 -44
  88. modal/parallel_map.pyi +9 -17
  89. modal/partial_function.pyi +4 -2
  90. modal/proxy.py +14 -6
  91. modal/proxy.pyi +10 -2
  92. modal/queue.py +45 -38
  93. modal/queue.pyi +88 -52
  94. modal/runner.py +96 -96
  95. modal/runner.pyi +44 -27
  96. modal/sandbox.py +225 -107
  97. modal/sandbox.pyi +226 -60
  98. modal/secret.py +58 -56
  99. modal/secret.pyi +28 -13
  100. modal/serving.py +7 -11
  101. modal/serving.pyi +7 -8
  102. modal/snapshot.py +29 -15
  103. modal/snapshot.pyi +18 -10
  104. modal/token_flow.py +1 -1
  105. modal/token_flow.pyi +4 -6
  106. modal/volume.py +102 -55
  107. modal/volume.pyi +125 -66
  108. {modal-1.1.5.dev66.dist-info → modal-1.3.1.dev8.dist-info}/METADATA +10 -9
  109. modal-1.3.1.dev8.dist-info/RECORD +189 -0
  110. modal_proto/api.proto +141 -70
  111. modal_proto/api_grpc.py +42 -26
  112. modal_proto/api_pb2.py +1123 -1103
  113. modal_proto/api_pb2.pyi +331 -83
  114. modal_proto/api_pb2_grpc.py +80 -48
  115. modal_proto/api_pb2_grpc.pyi +26 -18
  116. modal_proto/modal_api_grpc.py +175 -174
  117. modal_proto/task_command_router.proto +164 -0
  118. modal_proto/task_command_router_grpc.py +138 -0
  119. modal_proto/task_command_router_pb2.py +180 -0
  120. modal_proto/{sandbox_router_pb2.pyi → task_command_router_pb2.pyi} +148 -57
  121. modal_proto/task_command_router_pb2_grpc.py +272 -0
  122. modal_proto/task_command_router_pb2_grpc.pyi +100 -0
  123. modal_version/__init__.py +1 -1
  124. modal_version/__main__.py +1 -1
  125. modal/cli/programs/launch_instance_ssh.py +0 -94
  126. modal/cli/programs/run_marimo.py +0 -95
  127. modal-1.1.5.dev66.dist-info/RECORD +0 -191
  128. modal_proto/modal_options_grpc.py +0 -3
  129. modal_proto/options.proto +0 -19
  130. modal_proto/options_grpc.py +0 -3
  131. modal_proto/options_pb2.py +0 -35
  132. modal_proto/options_pb2.pyi +0 -20
  133. modal_proto/options_pb2_grpc.py +0 -4
  134. modal_proto/options_pb2_grpc.pyi +0 -7
  135. modal_proto/sandbox_router.proto +0 -125
  136. modal_proto/sandbox_router_grpc.py +0 -89
  137. modal_proto/sandbox_router_pb2.py +0 -128
  138. modal_proto/sandbox_router_pb2_grpc.py +0 -169
  139. modal_proto/sandbox_router_pb2_grpc.pyi +0 -63
  140. {modal-1.1.5.dev66.dist-info → modal-1.3.1.dev8.dist-info}/WHEEL +0 -0
  141. {modal-1.1.5.dev66.dist-info → modal-1.3.1.dev8.dist-info}/entry_points.txt +0 -0
  142. {modal-1.1.5.dev66.dist-info → modal-1.3.1.dev8.dist-info}/licenses/LICENSE +0 -0
  143. {modal-1.1.5.dev66.dist-info → modal-1.3.1.dev8.dist-info}/top_level.txt +0 -0
@@ -7,16 +7,16 @@ import sys
7
7
  import time
8
8
  import traceback
9
9
  from collections import defaultdict
10
- from typing import Any, Optional
10
+ from typing import Any, Callable, Optional, Union
11
11
  from urllib.parse import urlparse
12
12
 
13
+ from modal._partial_function import _PartialFunctionFlags
13
14
  from modal.cls import _Cls
14
15
  from modal.dict import _Dict
15
16
  from modal_proto import api_pb2
16
17
 
17
18
  from .._tunnel import _forward as _forward_tunnel
18
19
  from .._utils.async_utils import synchronize_api, synchronizer
19
- from .._utils.grpc_utils import retry_transient_errors
20
20
  from ..client import _Client
21
21
  from ..config import logger
22
22
  from ..exception import InvalidError
@@ -29,15 +29,20 @@ class _FlashManager:
29
29
  self,
30
30
  client: _Client,
31
31
  port: int,
32
- process: Optional[subprocess.Popen] = None,
32
+ process: Optional[subprocess.Popen] = None, # to be deprecated
33
33
  health_check_url: Optional[str] = None,
34
+ startup_timeout: int = 30,
35
+ exit_grace_period: int = 0,
36
+ h2_enabled: bool = False,
34
37
  ):
35
38
  self.client = client
36
39
  self.port = port
40
+ self.process = process
37
41
  # Health check is not currently being used
38
42
  self.health_check_url = health_check_url
39
- self.process = process
40
- self.tunnel_manager = _forward_tunnel(port, client=client)
43
+ self.startup_timeout = startup_timeout
44
+ self.exit_grace_period = exit_grace_period
45
+ self.tunnel_manager = _forward_tunnel(port, h2_enabled=h2_enabled, client=client)
41
46
  self.stopped = False
42
47
  self.num_failures = 0
43
48
  self.task_id = os.environ["MODAL_TASK_ID"]
@@ -49,10 +54,15 @@ class _FlashManager:
49
54
 
50
55
  start_time = time.monotonic()
51
56
 
57
+ def check_process_is_running() -> Optional[Exception]:
58
+ if process is not None and process.poll() is not None:
59
+ return Exception(f"Process {process.pid} exited with code {process.returncode}")
60
+ return None
61
+
52
62
  while time.monotonic() - start_time < timeout:
53
63
  try:
54
- if process is not None and process.poll() is not None:
55
- return False, Exception(f"Process {process.pid} exited with code {process.returncode}")
64
+ if error := check_process_is_running():
65
+ return False, error
56
66
  with socket.create_connection(("localhost", self.port), timeout=0.5):
57
67
  return True, None
58
68
  except (ConnectionRefusedError, OSError):
@@ -101,6 +111,7 @@ class _FlashManager:
101
111
 
102
112
  async def _run_heartbeat(self, host: str, port: int):
103
113
  first_registration = True
114
+ start_time = time.monotonic()
104
115
  while True:
105
116
  try:
106
117
  port_check_resp, port_check_error = await self.is_port_connection_healthy(process=self.process)
@@ -113,6 +124,7 @@ class _FlashManager:
113
124
  port=port,
114
125
  ),
115
126
  timeout=10,
127
+ retry=None,
116
128
  )
117
129
  self.num_failures = 0
118
130
  if first_registration:
@@ -121,15 +133,16 @@ class _FlashManager:
121
133
  )
122
134
  first_registration = False
123
135
  else:
124
- logger.error(
125
- f"[Modal Flash] Deregistering container {self.task_id} on {self.tunnel.url} "
126
- f"due to error: {port_check_error}, num_failures: {self.num_failures}"
127
- )
128
- self.num_failures += 1
129
- await retry_transient_errors(
130
- self.client.stub.FlashContainerDeregister,
131
- api_pb2.FlashContainerDeregisterRequest(),
132
- )
136
+ if first_registration and (time.monotonic() - start_time < self.startup_timeout):
137
+ continue
138
+ else:
139
+ logger.error(
140
+ f"[Modal Flash] Deregistering container {self.task_id} on {self.tunnel.url} "
141
+ f"due to error: {port_check_error}, num_failures: {self.num_failures}"
142
+ )
143
+ self.num_failures += 1
144
+ await self.client.stub.FlashContainerDeregister(api_pb2.FlashContainerDeregisterRequest())
145
+
133
146
  except asyncio.CancelledError:
134
147
  logger.warning("[Modal Flash] Shutting down...")
135
148
  break
@@ -147,12 +160,12 @@ class _FlashManager:
147
160
  return self.tunnel.url
148
161
 
149
162
  async def stop(self):
150
- self.heartbeat_task.cancel()
151
- await retry_transient_errors(
152
- self.client.stub.FlashContainerDeregister,
153
- api_pb2.FlashContainerDeregisterRequest(),
154
- )
163
+ try:
164
+ self.heartbeat_task.cancel()
165
+ except Exception as e:
166
+ logger.error(f"[Modal Flash] Error stopping: {e}")
155
167
 
168
+ await self.client.stub.FlashContainerDeregister(api_pb2.FlashContainerDeregisterRequest())
156
169
  self.stopped = True
157
170
  logger.warning(f"[Modal Flash] No longer accepting new requests on {self.tunnel.url}.")
158
171
 
@@ -163,18 +176,23 @@ class _FlashManager:
163
176
  if not self.stopped:
164
177
  await self.stop()
165
178
 
179
+ await asyncio.sleep(self.exit_grace_period)
180
+
166
181
  logger.warning(f"[Modal Flash] Closing tunnel on {self.tunnel.url}.")
167
182
  await self.tunnel_manager.__aexit__(*sys.exc_info())
168
183
 
169
184
 
170
- FlashManager = synchronize_api(_FlashManager)
185
+ FlashManager = synchronize_api(_FlashManager, target_module=__name__)
171
186
 
172
187
 
173
188
  @synchronizer.create_blocking
174
189
  async def flash_forward(
175
190
  port: int,
176
- process: Optional[subprocess.Popen] = None,
191
+ process: Optional[subprocess.Popen] = None, # to be deprecated
177
192
  health_check_url: Optional[str] = None,
193
+ startup_timeout: int = 30,
194
+ exit_grace_period: int = 0,
195
+ h2_enabled: bool = False,
178
196
  ) -> _FlashManager:
179
197
  """
180
198
  Forward a port to the Modal Flash service, exposing that port as a stable web endpoint.
@@ -183,7 +201,15 @@ async def flash_forward(
183
201
  """
184
202
  client = await _Client.from_env()
185
203
 
186
- manager = _FlashManager(client, port, process=process, health_check_url=health_check_url)
204
+ manager = _FlashManager(
205
+ client,
206
+ port,
207
+ process=process,
208
+ health_check_url=health_check_url,
209
+ startup_timeout=startup_timeout,
210
+ exit_grace_period=exit_grace_period,
211
+ h2_enabled=h2_enabled,
212
+ )
187
213
  await manager._start()
188
214
  return manager
189
215
 
@@ -321,7 +347,7 @@ class _FlashPrometheusAutoscaler:
321
347
 
322
348
  async def _compute_target_containers(self, current_replicas: int) -> int:
323
349
  """
324
- Gets internal metrics from container to autoscale up or down.
350
+ Gets metrics from container to autoscale up or down.
325
351
  """
326
352
  containers = await self._get_all_containers()
327
353
  if len(containers) > current_replicas:
@@ -334,7 +360,7 @@ class _FlashPrometheusAutoscaler:
334
360
  if current_replicas == 0:
335
361
  return 1
336
362
 
337
- # Get metrics based on autoscaler type (prometheus or internal)
363
+ # Get metrics based on autoscaler type
338
364
  sum_metric, n_containers_with_metrics = await self._get_scaling_info(containers)
339
365
 
340
366
  desired_replicas = self._calculate_desired_replicas(
@@ -406,39 +432,26 @@ class _FlashPrometheusAutoscaler:
406
432
  return desired_replicas
407
433
 
408
434
  async def _get_scaling_info(self, containers) -> tuple[float, int]:
409
- """Get metrics using either internal container metrics API or prometheus HTTP endpoints."""
410
- if self.metrics_endpoint == "internal":
411
- container_metrics_results = await asyncio.gather(
412
- *[self._get_container_metrics(container.task_id) for container in containers]
413
- )
414
- container_metrics_list = []
415
- for container_metric in container_metrics_results:
416
- if container_metric is None:
417
- continue
418
- container_metrics_list.append(getattr(container_metric.metrics, self.target_metric))
419
-
420
- sum_metric = sum(container_metrics_list)
421
- n_containers_with_metrics = len(container_metrics_list)
422
- else:
423
- sum_metric = 0
424
- n_containers_with_metrics = 0
425
-
426
- container_metrics_list = await asyncio.gather(
427
- *[
428
- self._get_metrics(f"https://{container.host}:{container.port}/{self.metrics_endpoint}")
429
- for container in containers
430
- ]
431
- )
435
+ """Get metrics using container exposed metrics endpoints."""
436
+ sum_metric = 0
437
+ n_containers_with_metrics = 0
438
+
439
+ container_metrics_list = await asyncio.gather(
440
+ *[
441
+ self._get_metrics(f"https://{container.host}:{container.port}/{self.metrics_endpoint}")
442
+ for container in containers
443
+ ]
444
+ )
432
445
 
433
- for container_metrics in container_metrics_list:
434
- if (
435
- container_metrics is None
436
- or self.target_metric not in container_metrics
437
- or len(container_metrics[self.target_metric]) == 0
438
- ):
439
- continue
440
- sum_metric += container_metrics[self.target_metric][0].value
441
- n_containers_with_metrics += 1
446
+ for container_metrics in container_metrics_list:
447
+ if (
448
+ container_metrics is None
449
+ or self.target_metric not in container_metrics
450
+ or len(container_metrics[self.target_metric]) == 0
451
+ ):
452
+ continue
453
+ sum_metric += container_metrics[self.target_metric][0].value
454
+ n_containers_with_metrics += 1
442
455
 
443
456
  return sum_metric, n_containers_with_metrics
444
457
 
@@ -474,23 +487,14 @@ class _FlashPrometheusAutoscaler:
474
487
 
475
488
  return metrics
476
489
 
477
- async def _get_container_metrics(self, container_id: str) -> Optional[api_pb2.TaskGetAutoscalingMetricsResponse]:
478
- req = api_pb2.TaskGetAutoscalingMetricsRequest(task_id=container_id)
479
- try:
480
- resp = await retry_transient_errors(self.client.stub.TaskGetAutoscalingMetrics, req)
481
- return resp
482
- except Exception as e:
483
- logger.warning(f"[Modal Flash] Error getting metrics for container {container_id}: {e}")
484
- return None
485
-
486
490
  async def _get_all_containers(self):
487
491
  req = api_pb2.FlashContainerListRequest(function_id=self.fn.object_id)
488
- resp = await retry_transient_errors(self.client.stub.FlashContainerList, req)
492
+ resp = await self.client.stub.FlashContainerList(req)
489
493
  return resp.containers
490
494
 
491
495
  async def _set_target_slots(self, target_slots: int):
492
496
  req = api_pb2.FlashSetTargetSlotsMetricsRequest(function_id=self.fn.object_id, target_slots=target_slots)
493
- await retry_transient_errors(self.client.stub.FlashSetTargetSlotsMetrics, req)
497
+ await self.client.stub.FlashSetTargetSlotsMetrics(req)
494
498
  return
495
499
 
496
500
  def _make_scaling_decision(
@@ -572,14 +576,10 @@ async def flash_prometheus_autoscaler(
572
576
  app_name: str,
573
577
  cls_name: str,
574
578
  # Endpoint to fetch metrics from. Must be in Prometheus format. Example: "/metrics"
575
- # If metrics_endpoint is "internal", we will use containers' internal metrics to autoscale instead.
576
579
  metrics_endpoint: str,
577
580
  # Target metric to autoscale on. Example: "vllm:num_requests_running"
578
- # If metrics_endpoint is "internal", target_metrics options are: [cpu_usage_percent, memory_usage_percent]
579
581
  target_metric: str,
580
582
  # Target metric value. Example: 25
581
- # If metrics_endpoint is "internal", target_metric_value is a percentage value between 0.1 and 1.0 (inclusive),
582
- # indicating container's usage of that metric.
583
583
  target_metric_value: float,
584
584
  min_containers: Optional[int] = None,
585
585
  max_containers: Optional[int] = None,
@@ -645,5 +645,92 @@ async def flash_get_containers(app_name: str, cls_name: str) -> list[dict[str, A
645
645
  assert fn is not None
646
646
  await fn.hydrate(client=client)
647
647
  req = api_pb2.FlashContainerListRequest(function_id=fn.object_id)
648
- resp = await retry_transient_errors(client.stub.FlashContainerList, req)
648
+ resp = await client.stub.FlashContainerList(req)
649
649
  return resp.containers
650
+
651
+
652
+ def _http_server(
653
+ port: Optional[int] = None,
654
+ *,
655
+ proxy_regions: list[str] = [], # The regions to proxy the HTTP server to.
656
+ startup_timeout: int = 30, # Maximum number of seconds to wait for the HTTP server to start.
657
+ exit_grace_period: Optional[int] = None, # The time to wait for the HTTP server to exit gracefully.
658
+ h2_enabled: bool = False, # Whether to enable HTTP/2 support.
659
+ ):
660
+ """Decorator for Flash-enabled HTTP servers on Modal classes.
661
+
662
+ Args:
663
+ port: The local port to forward to the HTTP server.
664
+ proxy_regions: The regions to proxy the HTTP server to.
665
+ startup_timeout: The maximum time to wait for the HTTP server to start.
666
+ exit_grace_period: The time to wait for the HTTP server to exit gracefully.
667
+
668
+ """
669
+ if port is None:
670
+ raise InvalidError(
671
+ "Positional arguments are not allowed. Did you forget parentheses? Suggestion: `@modal.http_server()`."
672
+ )
673
+ if not isinstance(port, int) or port < 1 or port > 65535:
674
+ raise InvalidError("First argument of `@http_server` must be a local port, such as `@http_server(8000)`.")
675
+ if startup_timeout <= 0:
676
+ raise InvalidError("The `startup_timeout` argument of `@http_server` must be positive.")
677
+ if exit_grace_period is not None and exit_grace_period < 0:
678
+ raise InvalidError("The `exit_grace_period` argument of `@http_server` must be non-negative.")
679
+
680
+ from modal._partial_function import _PartialFunction, _PartialFunctionParams
681
+
682
+ params = _PartialFunctionParams(
683
+ http_config=api_pb2.HTTPConfig(
684
+ port=port,
685
+ proxy_regions=proxy_regions,
686
+ startup_timeout=startup_timeout or 0,
687
+ exit_grace_period=exit_grace_period or 0,
688
+ h2_enabled=h2_enabled,
689
+ )
690
+ )
691
+
692
+ def wrapper(obj: Union[Callable[..., Any], _PartialFunction]) -> _PartialFunction:
693
+ flags = _PartialFunctionFlags.HTTP_WEB_INTERFACE
694
+
695
+ if isinstance(obj, _PartialFunction):
696
+ pf = obj.stack(flags, params)
697
+ else:
698
+ pf = _PartialFunction(obj, flags, params)
699
+ pf.validate_obj_compatibility("`http_server`")
700
+ return pf
701
+
702
+ return wrapper
703
+
704
+
705
+ http_server = synchronize_api(_http_server, target_module=__name__)
706
+
707
+
708
+ class _FlashContainerEntry:
709
+ """
710
+ A class that manages the lifecycle of Flash manager for Flash containers.
711
+
712
+ It is intentional that stop() runs before exit handlers and close().
713
+ This ensures the container is deregistered first, preventing new requests from being routed to it
714
+ while exit handlers execute and the exit grace period elapses, before finally closing the tunnel.
715
+ """
716
+
717
+ def __init__(self, http_config: api_pb2.HTTPConfig):
718
+ self.http_config: api_pb2.HTTPConfig = http_config
719
+ self.flash_manager: Optional[FlashManager] = None # type: ignore
720
+
721
+ def enter(self):
722
+ if self.http_config != api_pb2.HTTPConfig():
723
+ self.flash_manager = flash_forward(
724
+ self.http_config.port,
725
+ startup_timeout=self.http_config.startup_timeout,
726
+ exit_grace_period=self.http_config.exit_grace_period,
727
+ h2_enabled=self.http_config.h2_enabled,
728
+ )
729
+
730
+ def stop(self):
731
+ if self.flash_manager:
732
+ self.flash_manager.stop()
733
+
734
+ def close(self):
735
+ if self.flash_manager:
736
+ self.flash_manager.close()
@@ -11,6 +11,9 @@ class _FlashManager:
11
11
  port: int,
12
12
  process: typing.Optional[subprocess.Popen] = None,
13
13
  health_check_url: typing.Optional[str] = None,
14
+ startup_timeout: int = 30,
15
+ exit_grace_period: int = 0,
16
+ h2_enabled: bool = False,
14
17
  ):
15
18
  """Initialize self. See help(type(self)) for accurate signature."""
16
19
  ...
@@ -28,8 +31,6 @@ class _FlashManager:
28
31
  async def stop(self): ...
29
32
  async def close(self): ...
30
33
 
31
- SUPERSELF = typing.TypeVar("SUPERSELF", covariant=True)
32
-
33
34
  class FlashManager:
34
35
  def __init__(
35
36
  self,
@@ -37,9 +38,12 @@ class FlashManager:
37
38
  port: int,
38
39
  process: typing.Optional[subprocess.Popen] = None,
39
40
  health_check_url: typing.Optional[str] = None,
41
+ startup_timeout: int = 30,
42
+ exit_grace_period: int = 0,
43
+ h2_enabled: bool = False,
40
44
  ): ...
41
45
 
42
- class __is_port_connection_healthy_spec(typing_extensions.Protocol[SUPERSELF]):
46
+ class __is_port_connection_healthy_spec(typing_extensions.Protocol):
43
47
  def __call__(
44
48
  self, /, process: typing.Optional[subprocess.Popen], timeout: float = 0.5
45
49
  ) -> tuple[bool, typing.Optional[Exception]]: ...
@@ -47,15 +51,15 @@ class FlashManager:
47
51
  self, /, process: typing.Optional[subprocess.Popen], timeout: float = 0.5
48
52
  ) -> tuple[bool, typing.Optional[Exception]]: ...
49
53
 
50
- is_port_connection_healthy: __is_port_connection_healthy_spec[typing_extensions.Self]
54
+ is_port_connection_healthy: __is_port_connection_healthy_spec
51
55
 
52
- class ___start_spec(typing_extensions.Protocol[SUPERSELF]):
56
+ class ___start_spec(typing_extensions.Protocol):
53
57
  def __call__(self, /): ...
54
58
  async def aio(self, /): ...
55
59
 
56
- _start: ___start_spec[typing_extensions.Self]
60
+ _start: ___start_spec
57
61
 
58
- class ___drain_container_spec(typing_extensions.Protocol[SUPERSELF]):
62
+ class ___drain_container_spec(typing_extensions.Protocol):
59
63
  def __call__(self, /):
60
64
  """Background task that checks if we've encountered too many failures and drains the container if so."""
61
65
  ...
@@ -64,27 +68,27 @@ class FlashManager:
64
68
  """Background task that checks if we've encountered too many failures and drains the container if so."""
65
69
  ...
66
70
 
67
- _drain_container: ___drain_container_spec[typing_extensions.Self]
71
+ _drain_container: ___drain_container_spec
68
72
 
69
- class ___run_heartbeat_spec(typing_extensions.Protocol[SUPERSELF]):
73
+ class ___run_heartbeat_spec(typing_extensions.Protocol):
70
74
  def __call__(self, /, host: str, port: int): ...
71
75
  async def aio(self, /, host: str, port: int): ...
72
76
 
73
- _run_heartbeat: ___run_heartbeat_spec[typing_extensions.Self]
77
+ _run_heartbeat: ___run_heartbeat_spec
74
78
 
75
79
  def get_container_url(self): ...
76
80
 
77
- class __stop_spec(typing_extensions.Protocol[SUPERSELF]):
81
+ class __stop_spec(typing_extensions.Protocol):
78
82
  def __call__(self, /): ...
79
83
  async def aio(self, /): ...
80
84
 
81
- stop: __stop_spec[typing_extensions.Self]
85
+ stop: __stop_spec
82
86
 
83
- class __close_spec(typing_extensions.Protocol[SUPERSELF]):
87
+ class __close_spec(typing_extensions.Protocol):
84
88
  def __call__(self, /): ...
85
89
  async def aio(self, /): ...
86
90
 
87
- close: __close_spec[typing_extensions.Self]
91
+ close: __close_spec
88
92
 
89
93
  class __flash_forward_spec(typing_extensions.Protocol):
90
94
  def __call__(
@@ -93,6 +97,9 @@ class __flash_forward_spec(typing_extensions.Protocol):
93
97
  port: int,
94
98
  process: typing.Optional[subprocess.Popen] = None,
95
99
  health_check_url: typing.Optional[str] = None,
100
+ startup_timeout: int = 30,
101
+ exit_grace_period: int = 0,
102
+ h2_enabled: bool = False,
96
103
  ) -> FlashManager:
97
104
  """Forward a port to the Modal Flash service, exposing that port as a stable web endpoint.
98
105
  This is a highly experimental method that can break or be removed at any time without warning.
@@ -106,6 +113,9 @@ class __flash_forward_spec(typing_extensions.Protocol):
106
113
  port: int,
107
114
  process: typing.Optional[subprocess.Popen] = None,
108
115
  health_check_url: typing.Optional[str] = None,
116
+ startup_timeout: int = 30,
117
+ exit_grace_period: int = 0,
118
+ h2_enabled: bool = False,
109
119
  ) -> FlashManager:
110
120
  """Forward a port to the Modal Flash service, exposing that port as a stable web endpoint.
111
121
  This is a highly experimental method that can break or be removed at any time without warning.
@@ -139,7 +149,7 @@ class _FlashPrometheusAutoscaler:
139
149
  async def start(self): ...
140
150
  async def _run_autoscaler_loop(self): ...
141
151
  async def _compute_target_containers(self, current_replicas: int) -> int:
142
- """Gets internal metrics from container to autoscale up or down."""
152
+ """Gets metrics from container to autoscale up or down."""
143
153
  ...
144
154
 
145
155
  def _calculate_desired_replicas(
@@ -154,13 +164,10 @@ class _FlashPrometheusAutoscaler:
154
164
  ...
155
165
 
156
166
  async def _get_scaling_info(self, containers) -> tuple[float, int]:
157
- """Get metrics using either internal container metrics API or prometheus HTTP endpoints."""
167
+ """Get metrics using container exposed metrics endpoints."""
158
168
  ...
159
169
 
160
170
  async def _get_metrics(self, url: str) -> typing.Optional[dict[str, list[typing.Any]]]: ...
161
- async def _get_container_metrics(
162
- self, container_id: str
163
- ) -> typing.Optional[modal_proto.api_pb2.TaskGetAutoscalingMetricsResponse]: ...
164
171
  async def _get_all_containers(self): ...
165
172
  async def _set_target_slots(self, target_slots: int): ...
166
173
  def _make_scaling_decision(
@@ -212,28 +219,28 @@ class FlashPrometheusAutoscaler:
212
219
  autoscaling_interval_seconds: int,
213
220
  ): ...
214
221
 
215
- class __start_spec(typing_extensions.Protocol[SUPERSELF]):
222
+ class __start_spec(typing_extensions.Protocol):
216
223
  def __call__(self, /): ...
217
224
  async def aio(self, /): ...
218
225
 
219
- start: __start_spec[typing_extensions.Self]
226
+ start: __start_spec
220
227
 
221
- class ___run_autoscaler_loop_spec(typing_extensions.Protocol[SUPERSELF]):
228
+ class ___run_autoscaler_loop_spec(typing_extensions.Protocol):
222
229
  def __call__(self, /): ...
223
230
  async def aio(self, /): ...
224
231
 
225
- _run_autoscaler_loop: ___run_autoscaler_loop_spec[typing_extensions.Self]
232
+ _run_autoscaler_loop: ___run_autoscaler_loop_spec
226
233
 
227
- class ___compute_target_containers_spec(typing_extensions.Protocol[SUPERSELF]):
234
+ class ___compute_target_containers_spec(typing_extensions.Protocol):
228
235
  def __call__(self, /, current_replicas: int) -> int:
229
- """Gets internal metrics from container to autoscale up or down."""
236
+ """Gets metrics from container to autoscale up or down."""
230
237
  ...
231
238
 
232
239
  async def aio(self, /, current_replicas: int) -> int:
233
- """Gets internal metrics from container to autoscale up or down."""
240
+ """Gets metrics from container to autoscale up or down."""
234
241
  ...
235
242
 
236
- _compute_target_containers: ___compute_target_containers_spec[typing_extensions.Self]
243
+ _compute_target_containers: ___compute_target_containers_spec
237
244
 
238
245
  def _calculate_desired_replicas(
239
246
  self,
@@ -246,44 +253,34 @@ class FlashPrometheusAutoscaler:
246
253
  """Calculate the desired number of replicas to autoscale to."""
247
254
  ...
248
255
 
249
- class ___get_scaling_info_spec(typing_extensions.Protocol[SUPERSELF]):
256
+ class ___get_scaling_info_spec(typing_extensions.Protocol):
250
257
  def __call__(self, /, containers) -> tuple[float, int]:
251
- """Get metrics using either internal container metrics API or prometheus HTTP endpoints."""
258
+ """Get metrics using container exposed metrics endpoints."""
252
259
  ...
253
260
 
254
261
  async def aio(self, /, containers) -> tuple[float, int]:
255
- """Get metrics using either internal container metrics API or prometheus HTTP endpoints."""
262
+ """Get metrics using container exposed metrics endpoints."""
256
263
  ...
257
264
 
258
- _get_scaling_info: ___get_scaling_info_spec[typing_extensions.Self]
265
+ _get_scaling_info: ___get_scaling_info_spec
259
266
 
260
- class ___get_metrics_spec(typing_extensions.Protocol[SUPERSELF]):
267
+ class ___get_metrics_spec(typing_extensions.Protocol):
261
268
  def __call__(self, /, url: str) -> typing.Optional[dict[str, list[typing.Any]]]: ...
262
269
  async def aio(self, /, url: str) -> typing.Optional[dict[str, list[typing.Any]]]: ...
263
270
 
264
- _get_metrics: ___get_metrics_spec[typing_extensions.Self]
265
-
266
- class ___get_container_metrics_spec(typing_extensions.Protocol[SUPERSELF]):
267
- def __call__(
268
- self, /, container_id: str
269
- ) -> typing.Optional[modal_proto.api_pb2.TaskGetAutoscalingMetricsResponse]: ...
270
- async def aio(
271
- self, /, container_id: str
272
- ) -> typing.Optional[modal_proto.api_pb2.TaskGetAutoscalingMetricsResponse]: ...
273
-
274
- _get_container_metrics: ___get_container_metrics_spec[typing_extensions.Self]
271
+ _get_metrics: ___get_metrics_spec
275
272
 
276
- class ___get_all_containers_spec(typing_extensions.Protocol[SUPERSELF]):
273
+ class ___get_all_containers_spec(typing_extensions.Protocol):
277
274
  def __call__(self, /): ...
278
275
  async def aio(self, /): ...
279
276
 
280
- _get_all_containers: ___get_all_containers_spec[typing_extensions.Self]
277
+ _get_all_containers: ___get_all_containers_spec
281
278
 
282
- class ___set_target_slots_spec(typing_extensions.Protocol[SUPERSELF]):
279
+ class ___set_target_slots_spec(typing_extensions.Protocol):
283
280
  def __call__(self, /, target_slots: int): ...
284
281
  async def aio(self, /, target_slots: int): ...
285
282
 
286
- _set_target_slots: ___set_target_slots_spec[typing_extensions.Self]
283
+ _set_target_slots: ___set_target_slots_spec
287
284
 
288
285
  def _make_scaling_decision(
289
286
  self,
@@ -313,11 +310,11 @@ class FlashPrometheusAutoscaler:
313
310
  """
314
311
  ...
315
312
 
316
- class __stop_spec(typing_extensions.Protocol[SUPERSELF]):
313
+ class __stop_spec(typing_extensions.Protocol):
317
314
  def __call__(self, /): ...
318
315
  async def aio(self, /): ...
319
316
 
320
- stop: __stop_spec[typing_extensions.Self]
317
+ stop: __stop_spec
321
318
 
322
319
  class __flash_prometheus_autoscaler_spec(typing_extensions.Protocol):
323
320
  def __call__(
@@ -392,3 +389,54 @@ class __flash_get_containers_spec(typing_extensions.Protocol):
392
389
  ...
393
390
 
394
391
  flash_get_containers: __flash_get_containers_spec
392
+
393
+ def _http_server(
394
+ port: typing.Optional[int] = None,
395
+ *,
396
+ proxy_regions: list[str] = [],
397
+ startup_timeout: int = 30,
398
+ exit_grace_period: typing.Optional[int] = None,
399
+ h2_enabled: bool = False,
400
+ ):
401
+ """Decorator for Flash-enabled HTTP servers on Modal classes.
402
+
403
+ Args:
404
+ port: The local port to forward to the HTTP server.
405
+ proxy_regions: The regions to proxy the HTTP server to.
406
+ startup_timeout: The maximum time to wait for the HTTP server to start.
407
+ exit_grace_period: The time to wait for the HTTP server to exit gracefully.
408
+ """
409
+ ...
410
+
411
+ def http_server(
412
+ port: typing.Optional[int] = None,
413
+ *,
414
+ proxy_regions: list[str] = [],
415
+ startup_timeout: int = 30,
416
+ exit_grace_period: typing.Optional[int] = None,
417
+ h2_enabled: bool = False,
418
+ ):
419
+ """Decorator for Flash-enabled HTTP servers on Modal classes.
420
+
421
+ Args:
422
+ port: The local port to forward to the HTTP server.
423
+ proxy_regions: The regions to proxy the HTTP server to.
424
+ startup_timeout: The maximum time to wait for the HTTP server to start.
425
+ exit_grace_period: The time to wait for the HTTP server to exit gracefully.
426
+ """
427
+ ...
428
+
429
+ class _FlashContainerEntry:
430
+ """A class that manages the lifecycle of Flash manager for Flash containers.
431
+
432
+ It is intentional that stop() runs before exit handlers and close().
433
+ This ensures the container is deregistered first, preventing new requests from being routed to it
434
+ while exit handlers execute and the exit grace period elapses, before finally closing the tunnel.
435
+ """
436
+ def __init__(self, http_config: modal_proto.api_pb2.HTTPConfig):
437
+ """Initialize self. See help(type(self)) for accurate signature."""
438
+ ...
439
+
440
+ def enter(self): ...
441
+ def stop(self): ...
442
+ def close(self): ...