modal 1.1.5.dev66__py3-none-any.whl → 1.3.1.dev8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of modal might be problematic. Click here for more details.
- modal/__init__.py +4 -4
- modal/__main__.py +4 -29
- modal/_billing.py +84 -0
- modal/_clustered_functions.py +1 -3
- modal/_container_entrypoint.py +33 -208
- modal/_functions.py +171 -138
- modal/_grpc_client.py +191 -0
- modal/_ipython.py +16 -6
- modal/_load_context.py +106 -0
- modal/_object.py +72 -21
- modal/_output.py +12 -14
- modal/_partial_function.py +31 -4
- modal/_resolver.py +44 -57
- modal/_runtime/container_io_manager.py +30 -28
- modal/_runtime/container_io_manager.pyi +42 -44
- modal/_runtime/gpu_memory_snapshot.py +9 -7
- modal/_runtime/user_code_event_loop.py +80 -0
- modal/_runtime/user_code_imports.py +236 -10
- modal/_serialization.py +2 -1
- modal/_traceback.py +4 -13
- modal/_tunnel.py +16 -11
- modal/_tunnel.pyi +25 -3
- modal/_utils/async_utils.py +337 -10
- modal/_utils/auth_token_manager.py +1 -4
- modal/_utils/blob_utils.py +29 -22
- modal/_utils/function_utils.py +20 -21
- modal/_utils/grpc_testing.py +6 -3
- modal/_utils/grpc_utils.py +223 -64
- modal/_utils/mount_utils.py +26 -1
- modal/_utils/name_utils.py +2 -3
- modal/_utils/package_utils.py +0 -1
- modal/_utils/rand_pb_testing.py +8 -1
- modal/_utils/task_command_router_client.py +524 -0
- modal/_vendor/cloudpickle.py +144 -48
- modal/app.py +285 -105
- modal/app.pyi +216 -53
- modal/billing.py +5 -0
- modal/builder/2025.06.txt +6 -3
- modal/builder/PREVIEW.txt +2 -1
- modal/builder/base-images.json +4 -2
- modal/cli/_download.py +19 -3
- modal/cli/cluster.py +4 -2
- modal/cli/config.py +3 -1
- modal/cli/container.py +5 -4
- modal/cli/dict.py +5 -2
- modal/cli/entry_point.py +26 -2
- modal/cli/environment.py +2 -16
- modal/cli/launch.py +1 -76
- modal/cli/network_file_system.py +5 -20
- modal/cli/programs/run_jupyter.py +1 -1
- modal/cli/programs/vscode.py +1 -1
- modal/cli/queues.py +5 -4
- modal/cli/run.py +24 -204
- modal/cli/secret.py +1 -2
- modal/cli/shell.py +375 -0
- modal/cli/utils.py +1 -13
- modal/cli/volume.py +11 -17
- modal/client.py +16 -125
- modal/client.pyi +94 -144
- modal/cloud_bucket_mount.py +3 -1
- modal/cloud_bucket_mount.pyi +4 -0
- modal/cls.py +101 -64
- modal/cls.pyi +9 -8
- modal/config.py +21 -1
- modal/container_process.py +288 -12
- modal/container_process.pyi +99 -38
- modal/dict.py +72 -33
- modal/dict.pyi +88 -57
- modal/environments.py +16 -8
- modal/environments.pyi +6 -2
- modal/exception.py +154 -16
- modal/experimental/__init__.py +24 -53
- modal/experimental/flash.py +161 -74
- modal/experimental/flash.pyi +97 -49
- modal/file_io.py +50 -92
- modal/file_io.pyi +117 -89
- modal/functions.pyi +70 -87
- modal/image.py +82 -47
- modal/image.pyi +51 -30
- modal/io_streams.py +500 -149
- modal/io_streams.pyi +279 -189
- modal/mount.py +60 -46
- modal/mount.pyi +41 -17
- modal/network_file_system.py +19 -11
- modal/network_file_system.pyi +72 -39
- modal/object.pyi +114 -22
- modal/parallel_map.py +42 -44
- modal/parallel_map.pyi +9 -17
- modal/partial_function.pyi +4 -2
- modal/proxy.py +14 -6
- modal/proxy.pyi +10 -2
- modal/queue.py +45 -38
- modal/queue.pyi +88 -52
- modal/runner.py +96 -96
- modal/runner.pyi +44 -27
- modal/sandbox.py +225 -107
- modal/sandbox.pyi +226 -60
- modal/secret.py +58 -56
- modal/secret.pyi +28 -13
- modal/serving.py +7 -11
- modal/serving.pyi +7 -8
- modal/snapshot.py +29 -15
- modal/snapshot.pyi +18 -10
- modal/token_flow.py +1 -1
- modal/token_flow.pyi +4 -6
- modal/volume.py +102 -55
- modal/volume.pyi +125 -66
- {modal-1.1.5.dev66.dist-info → modal-1.3.1.dev8.dist-info}/METADATA +10 -9
- modal-1.3.1.dev8.dist-info/RECORD +189 -0
- modal_proto/api.proto +141 -70
- modal_proto/api_grpc.py +42 -26
- modal_proto/api_pb2.py +1123 -1103
- modal_proto/api_pb2.pyi +331 -83
- modal_proto/api_pb2_grpc.py +80 -48
- modal_proto/api_pb2_grpc.pyi +26 -18
- modal_proto/modal_api_grpc.py +175 -174
- modal_proto/task_command_router.proto +164 -0
- modal_proto/task_command_router_grpc.py +138 -0
- modal_proto/task_command_router_pb2.py +180 -0
- modal_proto/{sandbox_router_pb2.pyi → task_command_router_pb2.pyi} +148 -57
- modal_proto/task_command_router_pb2_grpc.py +272 -0
- modal_proto/task_command_router_pb2_grpc.pyi +100 -0
- modal_version/__init__.py +1 -1
- modal_version/__main__.py +1 -1
- modal/cli/programs/launch_instance_ssh.py +0 -94
- modal/cli/programs/run_marimo.py +0 -95
- modal-1.1.5.dev66.dist-info/RECORD +0 -191
- modal_proto/modal_options_grpc.py +0 -3
- modal_proto/options.proto +0 -19
- modal_proto/options_grpc.py +0 -3
- modal_proto/options_pb2.py +0 -35
- modal_proto/options_pb2.pyi +0 -20
- modal_proto/options_pb2_grpc.py +0 -4
- modal_proto/options_pb2_grpc.pyi +0 -7
- modal_proto/sandbox_router.proto +0 -125
- modal_proto/sandbox_router_grpc.py +0 -89
- modal_proto/sandbox_router_pb2.py +0 -128
- modal_proto/sandbox_router_pb2_grpc.py +0 -169
- modal_proto/sandbox_router_pb2_grpc.pyi +0 -63
- {modal-1.1.5.dev66.dist-info → modal-1.3.1.dev8.dist-info}/WHEEL +0 -0
- {modal-1.1.5.dev66.dist-info → modal-1.3.1.dev8.dist-info}/entry_points.txt +0 -0
- {modal-1.1.5.dev66.dist-info → modal-1.3.1.dev8.dist-info}/licenses/LICENSE +0 -0
- {modal-1.1.5.dev66.dist-info → modal-1.3.1.dev8.dist-info}/top_level.txt +0 -0
modal/experimental/flash.py
CHANGED
|
@@ -7,16 +7,16 @@ import sys
|
|
|
7
7
|
import time
|
|
8
8
|
import traceback
|
|
9
9
|
from collections import defaultdict
|
|
10
|
-
from typing import Any, Optional
|
|
10
|
+
from typing import Any, Callable, Optional, Union
|
|
11
11
|
from urllib.parse import urlparse
|
|
12
12
|
|
|
13
|
+
from modal._partial_function import _PartialFunctionFlags
|
|
13
14
|
from modal.cls import _Cls
|
|
14
15
|
from modal.dict import _Dict
|
|
15
16
|
from modal_proto import api_pb2
|
|
16
17
|
|
|
17
18
|
from .._tunnel import _forward as _forward_tunnel
|
|
18
19
|
from .._utils.async_utils import synchronize_api, synchronizer
|
|
19
|
-
from .._utils.grpc_utils import retry_transient_errors
|
|
20
20
|
from ..client import _Client
|
|
21
21
|
from ..config import logger
|
|
22
22
|
from ..exception import InvalidError
|
|
@@ -29,15 +29,20 @@ class _FlashManager:
|
|
|
29
29
|
self,
|
|
30
30
|
client: _Client,
|
|
31
31
|
port: int,
|
|
32
|
-
process: Optional[subprocess.Popen] = None,
|
|
32
|
+
process: Optional[subprocess.Popen] = None, # to be deprecated
|
|
33
33
|
health_check_url: Optional[str] = None,
|
|
34
|
+
startup_timeout: int = 30,
|
|
35
|
+
exit_grace_period: int = 0,
|
|
36
|
+
h2_enabled: bool = False,
|
|
34
37
|
):
|
|
35
38
|
self.client = client
|
|
36
39
|
self.port = port
|
|
40
|
+
self.process = process
|
|
37
41
|
# Health check is not currently being used
|
|
38
42
|
self.health_check_url = health_check_url
|
|
39
|
-
self.
|
|
40
|
-
self.
|
|
43
|
+
self.startup_timeout = startup_timeout
|
|
44
|
+
self.exit_grace_period = exit_grace_period
|
|
45
|
+
self.tunnel_manager = _forward_tunnel(port, h2_enabled=h2_enabled, client=client)
|
|
41
46
|
self.stopped = False
|
|
42
47
|
self.num_failures = 0
|
|
43
48
|
self.task_id = os.environ["MODAL_TASK_ID"]
|
|
@@ -49,10 +54,15 @@ class _FlashManager:
|
|
|
49
54
|
|
|
50
55
|
start_time = time.monotonic()
|
|
51
56
|
|
|
57
|
+
def check_process_is_running() -> Optional[Exception]:
|
|
58
|
+
if process is not None and process.poll() is not None:
|
|
59
|
+
return Exception(f"Process {process.pid} exited with code {process.returncode}")
|
|
60
|
+
return None
|
|
61
|
+
|
|
52
62
|
while time.monotonic() - start_time < timeout:
|
|
53
63
|
try:
|
|
54
|
-
if
|
|
55
|
-
return False,
|
|
64
|
+
if error := check_process_is_running():
|
|
65
|
+
return False, error
|
|
56
66
|
with socket.create_connection(("localhost", self.port), timeout=0.5):
|
|
57
67
|
return True, None
|
|
58
68
|
except (ConnectionRefusedError, OSError):
|
|
@@ -101,6 +111,7 @@ class _FlashManager:
|
|
|
101
111
|
|
|
102
112
|
async def _run_heartbeat(self, host: str, port: int):
|
|
103
113
|
first_registration = True
|
|
114
|
+
start_time = time.monotonic()
|
|
104
115
|
while True:
|
|
105
116
|
try:
|
|
106
117
|
port_check_resp, port_check_error = await self.is_port_connection_healthy(process=self.process)
|
|
@@ -113,6 +124,7 @@ class _FlashManager:
|
|
|
113
124
|
port=port,
|
|
114
125
|
),
|
|
115
126
|
timeout=10,
|
|
127
|
+
retry=None,
|
|
116
128
|
)
|
|
117
129
|
self.num_failures = 0
|
|
118
130
|
if first_registration:
|
|
@@ -121,15 +133,16 @@ class _FlashManager:
|
|
|
121
133
|
)
|
|
122
134
|
first_registration = False
|
|
123
135
|
else:
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
136
|
+
if first_registration and (time.monotonic() - start_time < self.startup_timeout):
|
|
137
|
+
continue
|
|
138
|
+
else:
|
|
139
|
+
logger.error(
|
|
140
|
+
f"[Modal Flash] Deregistering container {self.task_id} on {self.tunnel.url} "
|
|
141
|
+
f"due to error: {port_check_error}, num_failures: {self.num_failures}"
|
|
142
|
+
)
|
|
143
|
+
self.num_failures += 1
|
|
144
|
+
await self.client.stub.FlashContainerDeregister(api_pb2.FlashContainerDeregisterRequest())
|
|
145
|
+
|
|
133
146
|
except asyncio.CancelledError:
|
|
134
147
|
logger.warning("[Modal Flash] Shutting down...")
|
|
135
148
|
break
|
|
@@ -147,12 +160,12 @@ class _FlashManager:
|
|
|
147
160
|
return self.tunnel.url
|
|
148
161
|
|
|
149
162
|
async def stop(self):
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
)
|
|
163
|
+
try:
|
|
164
|
+
self.heartbeat_task.cancel()
|
|
165
|
+
except Exception as e:
|
|
166
|
+
logger.error(f"[Modal Flash] Error stopping: {e}")
|
|
155
167
|
|
|
168
|
+
await self.client.stub.FlashContainerDeregister(api_pb2.FlashContainerDeregisterRequest())
|
|
156
169
|
self.stopped = True
|
|
157
170
|
logger.warning(f"[Modal Flash] No longer accepting new requests on {self.tunnel.url}.")
|
|
158
171
|
|
|
@@ -163,18 +176,23 @@ class _FlashManager:
|
|
|
163
176
|
if not self.stopped:
|
|
164
177
|
await self.stop()
|
|
165
178
|
|
|
179
|
+
await asyncio.sleep(self.exit_grace_period)
|
|
180
|
+
|
|
166
181
|
logger.warning(f"[Modal Flash] Closing tunnel on {self.tunnel.url}.")
|
|
167
182
|
await self.tunnel_manager.__aexit__(*sys.exc_info())
|
|
168
183
|
|
|
169
184
|
|
|
170
|
-
FlashManager = synchronize_api(_FlashManager)
|
|
185
|
+
FlashManager = synchronize_api(_FlashManager, target_module=__name__)
|
|
171
186
|
|
|
172
187
|
|
|
173
188
|
@synchronizer.create_blocking
|
|
174
189
|
async def flash_forward(
|
|
175
190
|
port: int,
|
|
176
|
-
process: Optional[subprocess.Popen] = None,
|
|
191
|
+
process: Optional[subprocess.Popen] = None, # to be deprecated
|
|
177
192
|
health_check_url: Optional[str] = None,
|
|
193
|
+
startup_timeout: int = 30,
|
|
194
|
+
exit_grace_period: int = 0,
|
|
195
|
+
h2_enabled: bool = False,
|
|
178
196
|
) -> _FlashManager:
|
|
179
197
|
"""
|
|
180
198
|
Forward a port to the Modal Flash service, exposing that port as a stable web endpoint.
|
|
@@ -183,7 +201,15 @@ async def flash_forward(
|
|
|
183
201
|
"""
|
|
184
202
|
client = await _Client.from_env()
|
|
185
203
|
|
|
186
|
-
manager = _FlashManager(
|
|
204
|
+
manager = _FlashManager(
|
|
205
|
+
client,
|
|
206
|
+
port,
|
|
207
|
+
process=process,
|
|
208
|
+
health_check_url=health_check_url,
|
|
209
|
+
startup_timeout=startup_timeout,
|
|
210
|
+
exit_grace_period=exit_grace_period,
|
|
211
|
+
h2_enabled=h2_enabled,
|
|
212
|
+
)
|
|
187
213
|
await manager._start()
|
|
188
214
|
return manager
|
|
189
215
|
|
|
@@ -321,7 +347,7 @@ class _FlashPrometheusAutoscaler:
|
|
|
321
347
|
|
|
322
348
|
async def _compute_target_containers(self, current_replicas: int) -> int:
|
|
323
349
|
"""
|
|
324
|
-
Gets
|
|
350
|
+
Gets metrics from container to autoscale up or down.
|
|
325
351
|
"""
|
|
326
352
|
containers = await self._get_all_containers()
|
|
327
353
|
if len(containers) > current_replicas:
|
|
@@ -334,7 +360,7 @@ class _FlashPrometheusAutoscaler:
|
|
|
334
360
|
if current_replicas == 0:
|
|
335
361
|
return 1
|
|
336
362
|
|
|
337
|
-
# Get metrics based on autoscaler type
|
|
363
|
+
# Get metrics based on autoscaler type
|
|
338
364
|
sum_metric, n_containers_with_metrics = await self._get_scaling_info(containers)
|
|
339
365
|
|
|
340
366
|
desired_replicas = self._calculate_desired_replicas(
|
|
@@ -406,39 +432,26 @@ class _FlashPrometheusAutoscaler:
|
|
|
406
432
|
return desired_replicas
|
|
407
433
|
|
|
408
434
|
async def _get_scaling_info(self, containers) -> tuple[float, int]:
|
|
409
|
-
"""Get metrics using
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
sum_metric = sum(container_metrics_list)
|
|
421
|
-
n_containers_with_metrics = len(container_metrics_list)
|
|
422
|
-
else:
|
|
423
|
-
sum_metric = 0
|
|
424
|
-
n_containers_with_metrics = 0
|
|
425
|
-
|
|
426
|
-
container_metrics_list = await asyncio.gather(
|
|
427
|
-
*[
|
|
428
|
-
self._get_metrics(f"https://{container.host}:{container.port}/{self.metrics_endpoint}")
|
|
429
|
-
for container in containers
|
|
430
|
-
]
|
|
431
|
-
)
|
|
435
|
+
"""Get metrics using container exposed metrics endpoints."""
|
|
436
|
+
sum_metric = 0
|
|
437
|
+
n_containers_with_metrics = 0
|
|
438
|
+
|
|
439
|
+
container_metrics_list = await asyncio.gather(
|
|
440
|
+
*[
|
|
441
|
+
self._get_metrics(f"https://{container.host}:{container.port}/{self.metrics_endpoint}")
|
|
442
|
+
for container in containers
|
|
443
|
+
]
|
|
444
|
+
)
|
|
432
445
|
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
446
|
+
for container_metrics in container_metrics_list:
|
|
447
|
+
if (
|
|
448
|
+
container_metrics is None
|
|
449
|
+
or self.target_metric not in container_metrics
|
|
450
|
+
or len(container_metrics[self.target_metric]) == 0
|
|
451
|
+
):
|
|
452
|
+
continue
|
|
453
|
+
sum_metric += container_metrics[self.target_metric][0].value
|
|
454
|
+
n_containers_with_metrics += 1
|
|
442
455
|
|
|
443
456
|
return sum_metric, n_containers_with_metrics
|
|
444
457
|
|
|
@@ -474,23 +487,14 @@ class _FlashPrometheusAutoscaler:
|
|
|
474
487
|
|
|
475
488
|
return metrics
|
|
476
489
|
|
|
477
|
-
async def _get_container_metrics(self, container_id: str) -> Optional[api_pb2.TaskGetAutoscalingMetricsResponse]:
|
|
478
|
-
req = api_pb2.TaskGetAutoscalingMetricsRequest(task_id=container_id)
|
|
479
|
-
try:
|
|
480
|
-
resp = await retry_transient_errors(self.client.stub.TaskGetAutoscalingMetrics, req)
|
|
481
|
-
return resp
|
|
482
|
-
except Exception as e:
|
|
483
|
-
logger.warning(f"[Modal Flash] Error getting metrics for container {container_id}: {e}")
|
|
484
|
-
return None
|
|
485
|
-
|
|
486
490
|
async def _get_all_containers(self):
|
|
487
491
|
req = api_pb2.FlashContainerListRequest(function_id=self.fn.object_id)
|
|
488
|
-
resp = await
|
|
492
|
+
resp = await self.client.stub.FlashContainerList(req)
|
|
489
493
|
return resp.containers
|
|
490
494
|
|
|
491
495
|
async def _set_target_slots(self, target_slots: int):
|
|
492
496
|
req = api_pb2.FlashSetTargetSlotsMetricsRequest(function_id=self.fn.object_id, target_slots=target_slots)
|
|
493
|
-
await
|
|
497
|
+
await self.client.stub.FlashSetTargetSlotsMetrics(req)
|
|
494
498
|
return
|
|
495
499
|
|
|
496
500
|
def _make_scaling_decision(
|
|
@@ -572,14 +576,10 @@ async def flash_prometheus_autoscaler(
|
|
|
572
576
|
app_name: str,
|
|
573
577
|
cls_name: str,
|
|
574
578
|
# Endpoint to fetch metrics from. Must be in Prometheus format. Example: "/metrics"
|
|
575
|
-
# If metrics_endpoint is "internal", we will use containers' internal metrics to autoscale instead.
|
|
576
579
|
metrics_endpoint: str,
|
|
577
580
|
# Target metric to autoscale on. Example: "vllm:num_requests_running"
|
|
578
|
-
# If metrics_endpoint is "internal", target_metrics options are: [cpu_usage_percent, memory_usage_percent]
|
|
579
581
|
target_metric: str,
|
|
580
582
|
# Target metric value. Example: 25
|
|
581
|
-
# If metrics_endpoint is "internal", target_metric_value is a percentage value between 0.1 and 1.0 (inclusive),
|
|
582
|
-
# indicating container's usage of that metric.
|
|
583
583
|
target_metric_value: float,
|
|
584
584
|
min_containers: Optional[int] = None,
|
|
585
585
|
max_containers: Optional[int] = None,
|
|
@@ -645,5 +645,92 @@ async def flash_get_containers(app_name: str, cls_name: str) -> list[dict[str, A
|
|
|
645
645
|
assert fn is not None
|
|
646
646
|
await fn.hydrate(client=client)
|
|
647
647
|
req = api_pb2.FlashContainerListRequest(function_id=fn.object_id)
|
|
648
|
-
resp = await
|
|
648
|
+
resp = await client.stub.FlashContainerList(req)
|
|
649
649
|
return resp.containers
|
|
650
|
+
|
|
651
|
+
|
|
652
|
+
def _http_server(
|
|
653
|
+
port: Optional[int] = None,
|
|
654
|
+
*,
|
|
655
|
+
proxy_regions: list[str] = [], # The regions to proxy the HTTP server to.
|
|
656
|
+
startup_timeout: int = 30, # Maximum number of seconds to wait for the HTTP server to start.
|
|
657
|
+
exit_grace_period: Optional[int] = None, # The time to wait for the HTTP server to exit gracefully.
|
|
658
|
+
h2_enabled: bool = False, # Whether to enable HTTP/2 support.
|
|
659
|
+
):
|
|
660
|
+
"""Decorator for Flash-enabled HTTP servers on Modal classes.
|
|
661
|
+
|
|
662
|
+
Args:
|
|
663
|
+
port: The local port to forward to the HTTP server.
|
|
664
|
+
proxy_regions: The regions to proxy the HTTP server to.
|
|
665
|
+
startup_timeout: The maximum time to wait for the HTTP server to start.
|
|
666
|
+
exit_grace_period: The time to wait for the HTTP server to exit gracefully.
|
|
667
|
+
|
|
668
|
+
"""
|
|
669
|
+
if port is None:
|
|
670
|
+
raise InvalidError(
|
|
671
|
+
"Positional arguments are not allowed. Did you forget parentheses? Suggestion: `@modal.http_server()`."
|
|
672
|
+
)
|
|
673
|
+
if not isinstance(port, int) or port < 1 or port > 65535:
|
|
674
|
+
raise InvalidError("First argument of `@http_server` must be a local port, such as `@http_server(8000)`.")
|
|
675
|
+
if startup_timeout <= 0:
|
|
676
|
+
raise InvalidError("The `startup_timeout` argument of `@http_server` must be positive.")
|
|
677
|
+
if exit_grace_period is not None and exit_grace_period < 0:
|
|
678
|
+
raise InvalidError("The `exit_grace_period` argument of `@http_server` must be non-negative.")
|
|
679
|
+
|
|
680
|
+
from modal._partial_function import _PartialFunction, _PartialFunctionParams
|
|
681
|
+
|
|
682
|
+
params = _PartialFunctionParams(
|
|
683
|
+
http_config=api_pb2.HTTPConfig(
|
|
684
|
+
port=port,
|
|
685
|
+
proxy_regions=proxy_regions,
|
|
686
|
+
startup_timeout=startup_timeout or 0,
|
|
687
|
+
exit_grace_period=exit_grace_period or 0,
|
|
688
|
+
h2_enabled=h2_enabled,
|
|
689
|
+
)
|
|
690
|
+
)
|
|
691
|
+
|
|
692
|
+
def wrapper(obj: Union[Callable[..., Any], _PartialFunction]) -> _PartialFunction:
|
|
693
|
+
flags = _PartialFunctionFlags.HTTP_WEB_INTERFACE
|
|
694
|
+
|
|
695
|
+
if isinstance(obj, _PartialFunction):
|
|
696
|
+
pf = obj.stack(flags, params)
|
|
697
|
+
else:
|
|
698
|
+
pf = _PartialFunction(obj, flags, params)
|
|
699
|
+
pf.validate_obj_compatibility("`http_server`")
|
|
700
|
+
return pf
|
|
701
|
+
|
|
702
|
+
return wrapper
|
|
703
|
+
|
|
704
|
+
|
|
705
|
+
http_server = synchronize_api(_http_server, target_module=__name__)
|
|
706
|
+
|
|
707
|
+
|
|
708
|
+
class _FlashContainerEntry:
|
|
709
|
+
"""
|
|
710
|
+
A class that manages the lifecycle of Flash manager for Flash containers.
|
|
711
|
+
|
|
712
|
+
It is intentional that stop() runs before exit handlers and close().
|
|
713
|
+
This ensures the container is deregistered first, preventing new requests from being routed to it
|
|
714
|
+
while exit handlers execute and the exit grace period elapses, before finally closing the tunnel.
|
|
715
|
+
"""
|
|
716
|
+
|
|
717
|
+
def __init__(self, http_config: api_pb2.HTTPConfig):
|
|
718
|
+
self.http_config: api_pb2.HTTPConfig = http_config
|
|
719
|
+
self.flash_manager: Optional[FlashManager] = None # type: ignore
|
|
720
|
+
|
|
721
|
+
def enter(self):
|
|
722
|
+
if self.http_config != api_pb2.HTTPConfig():
|
|
723
|
+
self.flash_manager = flash_forward(
|
|
724
|
+
self.http_config.port,
|
|
725
|
+
startup_timeout=self.http_config.startup_timeout,
|
|
726
|
+
exit_grace_period=self.http_config.exit_grace_period,
|
|
727
|
+
h2_enabled=self.http_config.h2_enabled,
|
|
728
|
+
)
|
|
729
|
+
|
|
730
|
+
def stop(self):
|
|
731
|
+
if self.flash_manager:
|
|
732
|
+
self.flash_manager.stop()
|
|
733
|
+
|
|
734
|
+
def close(self):
|
|
735
|
+
if self.flash_manager:
|
|
736
|
+
self.flash_manager.close()
|
modal/experimental/flash.pyi
CHANGED
|
@@ -11,6 +11,9 @@ class _FlashManager:
|
|
|
11
11
|
port: int,
|
|
12
12
|
process: typing.Optional[subprocess.Popen] = None,
|
|
13
13
|
health_check_url: typing.Optional[str] = None,
|
|
14
|
+
startup_timeout: int = 30,
|
|
15
|
+
exit_grace_period: int = 0,
|
|
16
|
+
h2_enabled: bool = False,
|
|
14
17
|
):
|
|
15
18
|
"""Initialize self. See help(type(self)) for accurate signature."""
|
|
16
19
|
...
|
|
@@ -28,8 +31,6 @@ class _FlashManager:
|
|
|
28
31
|
async def stop(self): ...
|
|
29
32
|
async def close(self): ...
|
|
30
33
|
|
|
31
|
-
SUPERSELF = typing.TypeVar("SUPERSELF", covariant=True)
|
|
32
|
-
|
|
33
34
|
class FlashManager:
|
|
34
35
|
def __init__(
|
|
35
36
|
self,
|
|
@@ -37,9 +38,12 @@ class FlashManager:
|
|
|
37
38
|
port: int,
|
|
38
39
|
process: typing.Optional[subprocess.Popen] = None,
|
|
39
40
|
health_check_url: typing.Optional[str] = None,
|
|
41
|
+
startup_timeout: int = 30,
|
|
42
|
+
exit_grace_period: int = 0,
|
|
43
|
+
h2_enabled: bool = False,
|
|
40
44
|
): ...
|
|
41
45
|
|
|
42
|
-
class __is_port_connection_healthy_spec(typing_extensions.Protocol
|
|
46
|
+
class __is_port_connection_healthy_spec(typing_extensions.Protocol):
|
|
43
47
|
def __call__(
|
|
44
48
|
self, /, process: typing.Optional[subprocess.Popen], timeout: float = 0.5
|
|
45
49
|
) -> tuple[bool, typing.Optional[Exception]]: ...
|
|
@@ -47,15 +51,15 @@ class FlashManager:
|
|
|
47
51
|
self, /, process: typing.Optional[subprocess.Popen], timeout: float = 0.5
|
|
48
52
|
) -> tuple[bool, typing.Optional[Exception]]: ...
|
|
49
53
|
|
|
50
|
-
is_port_connection_healthy: __is_port_connection_healthy_spec
|
|
54
|
+
is_port_connection_healthy: __is_port_connection_healthy_spec
|
|
51
55
|
|
|
52
|
-
class ___start_spec(typing_extensions.Protocol
|
|
56
|
+
class ___start_spec(typing_extensions.Protocol):
|
|
53
57
|
def __call__(self, /): ...
|
|
54
58
|
async def aio(self, /): ...
|
|
55
59
|
|
|
56
|
-
_start: ___start_spec
|
|
60
|
+
_start: ___start_spec
|
|
57
61
|
|
|
58
|
-
class ___drain_container_spec(typing_extensions.Protocol
|
|
62
|
+
class ___drain_container_spec(typing_extensions.Protocol):
|
|
59
63
|
def __call__(self, /):
|
|
60
64
|
"""Background task that checks if we've encountered too many failures and drains the container if so."""
|
|
61
65
|
...
|
|
@@ -64,27 +68,27 @@ class FlashManager:
|
|
|
64
68
|
"""Background task that checks if we've encountered too many failures and drains the container if so."""
|
|
65
69
|
...
|
|
66
70
|
|
|
67
|
-
_drain_container: ___drain_container_spec
|
|
71
|
+
_drain_container: ___drain_container_spec
|
|
68
72
|
|
|
69
|
-
class ___run_heartbeat_spec(typing_extensions.Protocol
|
|
73
|
+
class ___run_heartbeat_spec(typing_extensions.Protocol):
|
|
70
74
|
def __call__(self, /, host: str, port: int): ...
|
|
71
75
|
async def aio(self, /, host: str, port: int): ...
|
|
72
76
|
|
|
73
|
-
_run_heartbeat: ___run_heartbeat_spec
|
|
77
|
+
_run_heartbeat: ___run_heartbeat_spec
|
|
74
78
|
|
|
75
79
|
def get_container_url(self): ...
|
|
76
80
|
|
|
77
|
-
class __stop_spec(typing_extensions.Protocol
|
|
81
|
+
class __stop_spec(typing_extensions.Protocol):
|
|
78
82
|
def __call__(self, /): ...
|
|
79
83
|
async def aio(self, /): ...
|
|
80
84
|
|
|
81
|
-
stop: __stop_spec
|
|
85
|
+
stop: __stop_spec
|
|
82
86
|
|
|
83
|
-
class __close_spec(typing_extensions.Protocol
|
|
87
|
+
class __close_spec(typing_extensions.Protocol):
|
|
84
88
|
def __call__(self, /): ...
|
|
85
89
|
async def aio(self, /): ...
|
|
86
90
|
|
|
87
|
-
close: __close_spec
|
|
91
|
+
close: __close_spec
|
|
88
92
|
|
|
89
93
|
class __flash_forward_spec(typing_extensions.Protocol):
|
|
90
94
|
def __call__(
|
|
@@ -93,6 +97,9 @@ class __flash_forward_spec(typing_extensions.Protocol):
|
|
|
93
97
|
port: int,
|
|
94
98
|
process: typing.Optional[subprocess.Popen] = None,
|
|
95
99
|
health_check_url: typing.Optional[str] = None,
|
|
100
|
+
startup_timeout: int = 30,
|
|
101
|
+
exit_grace_period: int = 0,
|
|
102
|
+
h2_enabled: bool = False,
|
|
96
103
|
) -> FlashManager:
|
|
97
104
|
"""Forward a port to the Modal Flash service, exposing that port as a stable web endpoint.
|
|
98
105
|
This is a highly experimental method that can break or be removed at any time without warning.
|
|
@@ -106,6 +113,9 @@ class __flash_forward_spec(typing_extensions.Protocol):
|
|
|
106
113
|
port: int,
|
|
107
114
|
process: typing.Optional[subprocess.Popen] = None,
|
|
108
115
|
health_check_url: typing.Optional[str] = None,
|
|
116
|
+
startup_timeout: int = 30,
|
|
117
|
+
exit_grace_period: int = 0,
|
|
118
|
+
h2_enabled: bool = False,
|
|
109
119
|
) -> FlashManager:
|
|
110
120
|
"""Forward a port to the Modal Flash service, exposing that port as a stable web endpoint.
|
|
111
121
|
This is a highly experimental method that can break or be removed at any time without warning.
|
|
@@ -139,7 +149,7 @@ class _FlashPrometheusAutoscaler:
|
|
|
139
149
|
async def start(self): ...
|
|
140
150
|
async def _run_autoscaler_loop(self): ...
|
|
141
151
|
async def _compute_target_containers(self, current_replicas: int) -> int:
|
|
142
|
-
"""Gets
|
|
152
|
+
"""Gets metrics from container to autoscale up or down."""
|
|
143
153
|
...
|
|
144
154
|
|
|
145
155
|
def _calculate_desired_replicas(
|
|
@@ -154,13 +164,10 @@ class _FlashPrometheusAutoscaler:
|
|
|
154
164
|
...
|
|
155
165
|
|
|
156
166
|
async def _get_scaling_info(self, containers) -> tuple[float, int]:
|
|
157
|
-
"""Get metrics using
|
|
167
|
+
"""Get metrics using container exposed metrics endpoints."""
|
|
158
168
|
...
|
|
159
169
|
|
|
160
170
|
async def _get_metrics(self, url: str) -> typing.Optional[dict[str, list[typing.Any]]]: ...
|
|
161
|
-
async def _get_container_metrics(
|
|
162
|
-
self, container_id: str
|
|
163
|
-
) -> typing.Optional[modal_proto.api_pb2.TaskGetAutoscalingMetricsResponse]: ...
|
|
164
171
|
async def _get_all_containers(self): ...
|
|
165
172
|
async def _set_target_slots(self, target_slots: int): ...
|
|
166
173
|
def _make_scaling_decision(
|
|
@@ -212,28 +219,28 @@ class FlashPrometheusAutoscaler:
|
|
|
212
219
|
autoscaling_interval_seconds: int,
|
|
213
220
|
): ...
|
|
214
221
|
|
|
215
|
-
class __start_spec(typing_extensions.Protocol
|
|
222
|
+
class __start_spec(typing_extensions.Protocol):
|
|
216
223
|
def __call__(self, /): ...
|
|
217
224
|
async def aio(self, /): ...
|
|
218
225
|
|
|
219
|
-
start: __start_spec
|
|
226
|
+
start: __start_spec
|
|
220
227
|
|
|
221
|
-
class ___run_autoscaler_loop_spec(typing_extensions.Protocol
|
|
228
|
+
class ___run_autoscaler_loop_spec(typing_extensions.Protocol):
|
|
222
229
|
def __call__(self, /): ...
|
|
223
230
|
async def aio(self, /): ...
|
|
224
231
|
|
|
225
|
-
_run_autoscaler_loop: ___run_autoscaler_loop_spec
|
|
232
|
+
_run_autoscaler_loop: ___run_autoscaler_loop_spec
|
|
226
233
|
|
|
227
|
-
class ___compute_target_containers_spec(typing_extensions.Protocol
|
|
234
|
+
class ___compute_target_containers_spec(typing_extensions.Protocol):
|
|
228
235
|
def __call__(self, /, current_replicas: int) -> int:
|
|
229
|
-
"""Gets
|
|
236
|
+
"""Gets metrics from container to autoscale up or down."""
|
|
230
237
|
...
|
|
231
238
|
|
|
232
239
|
async def aio(self, /, current_replicas: int) -> int:
|
|
233
|
-
"""Gets
|
|
240
|
+
"""Gets metrics from container to autoscale up or down."""
|
|
234
241
|
...
|
|
235
242
|
|
|
236
|
-
_compute_target_containers: ___compute_target_containers_spec
|
|
243
|
+
_compute_target_containers: ___compute_target_containers_spec
|
|
237
244
|
|
|
238
245
|
def _calculate_desired_replicas(
|
|
239
246
|
self,
|
|
@@ -246,44 +253,34 @@ class FlashPrometheusAutoscaler:
|
|
|
246
253
|
"""Calculate the desired number of replicas to autoscale to."""
|
|
247
254
|
...
|
|
248
255
|
|
|
249
|
-
class ___get_scaling_info_spec(typing_extensions.Protocol
|
|
256
|
+
class ___get_scaling_info_spec(typing_extensions.Protocol):
|
|
250
257
|
def __call__(self, /, containers) -> tuple[float, int]:
|
|
251
|
-
"""Get metrics using
|
|
258
|
+
"""Get metrics using container exposed metrics endpoints."""
|
|
252
259
|
...
|
|
253
260
|
|
|
254
261
|
async def aio(self, /, containers) -> tuple[float, int]:
|
|
255
|
-
"""Get metrics using
|
|
262
|
+
"""Get metrics using container exposed metrics endpoints."""
|
|
256
263
|
...
|
|
257
264
|
|
|
258
|
-
_get_scaling_info: ___get_scaling_info_spec
|
|
265
|
+
_get_scaling_info: ___get_scaling_info_spec
|
|
259
266
|
|
|
260
|
-
class ___get_metrics_spec(typing_extensions.Protocol
|
|
267
|
+
class ___get_metrics_spec(typing_extensions.Protocol):
|
|
261
268
|
def __call__(self, /, url: str) -> typing.Optional[dict[str, list[typing.Any]]]: ...
|
|
262
269
|
async def aio(self, /, url: str) -> typing.Optional[dict[str, list[typing.Any]]]: ...
|
|
263
270
|
|
|
264
|
-
_get_metrics: ___get_metrics_spec
|
|
265
|
-
|
|
266
|
-
class ___get_container_metrics_spec(typing_extensions.Protocol[SUPERSELF]):
|
|
267
|
-
def __call__(
|
|
268
|
-
self, /, container_id: str
|
|
269
|
-
) -> typing.Optional[modal_proto.api_pb2.TaskGetAutoscalingMetricsResponse]: ...
|
|
270
|
-
async def aio(
|
|
271
|
-
self, /, container_id: str
|
|
272
|
-
) -> typing.Optional[modal_proto.api_pb2.TaskGetAutoscalingMetricsResponse]: ...
|
|
273
|
-
|
|
274
|
-
_get_container_metrics: ___get_container_metrics_spec[typing_extensions.Self]
|
|
271
|
+
_get_metrics: ___get_metrics_spec
|
|
275
272
|
|
|
276
|
-
class ___get_all_containers_spec(typing_extensions.Protocol
|
|
273
|
+
class ___get_all_containers_spec(typing_extensions.Protocol):
|
|
277
274
|
def __call__(self, /): ...
|
|
278
275
|
async def aio(self, /): ...
|
|
279
276
|
|
|
280
|
-
_get_all_containers: ___get_all_containers_spec
|
|
277
|
+
_get_all_containers: ___get_all_containers_spec
|
|
281
278
|
|
|
282
|
-
class ___set_target_slots_spec(typing_extensions.Protocol
|
|
279
|
+
class ___set_target_slots_spec(typing_extensions.Protocol):
|
|
283
280
|
def __call__(self, /, target_slots: int): ...
|
|
284
281
|
async def aio(self, /, target_slots: int): ...
|
|
285
282
|
|
|
286
|
-
_set_target_slots: ___set_target_slots_spec
|
|
283
|
+
_set_target_slots: ___set_target_slots_spec
|
|
287
284
|
|
|
288
285
|
def _make_scaling_decision(
|
|
289
286
|
self,
|
|
@@ -313,11 +310,11 @@ class FlashPrometheusAutoscaler:
|
|
|
313
310
|
"""
|
|
314
311
|
...
|
|
315
312
|
|
|
316
|
-
class __stop_spec(typing_extensions.Protocol
|
|
313
|
+
class __stop_spec(typing_extensions.Protocol):
|
|
317
314
|
def __call__(self, /): ...
|
|
318
315
|
async def aio(self, /): ...
|
|
319
316
|
|
|
320
|
-
stop: __stop_spec
|
|
317
|
+
stop: __stop_spec
|
|
321
318
|
|
|
322
319
|
class __flash_prometheus_autoscaler_spec(typing_extensions.Protocol):
|
|
323
320
|
def __call__(
|
|
@@ -392,3 +389,54 @@ class __flash_get_containers_spec(typing_extensions.Protocol):
|
|
|
392
389
|
...
|
|
393
390
|
|
|
394
391
|
flash_get_containers: __flash_get_containers_spec
|
|
392
|
+
|
|
393
|
+
def _http_server(
|
|
394
|
+
port: typing.Optional[int] = None,
|
|
395
|
+
*,
|
|
396
|
+
proxy_regions: list[str] = [],
|
|
397
|
+
startup_timeout: int = 30,
|
|
398
|
+
exit_grace_period: typing.Optional[int] = None,
|
|
399
|
+
h2_enabled: bool = False,
|
|
400
|
+
):
|
|
401
|
+
"""Decorator for Flash-enabled HTTP servers on Modal classes.
|
|
402
|
+
|
|
403
|
+
Args:
|
|
404
|
+
port: The local port to forward to the HTTP server.
|
|
405
|
+
proxy_regions: The regions to proxy the HTTP server to.
|
|
406
|
+
startup_timeout: The maximum time to wait for the HTTP server to start.
|
|
407
|
+
exit_grace_period: The time to wait for the HTTP server to exit gracefully.
|
|
408
|
+
"""
|
|
409
|
+
...
|
|
410
|
+
|
|
411
|
+
def http_server(
|
|
412
|
+
port: typing.Optional[int] = None,
|
|
413
|
+
*,
|
|
414
|
+
proxy_regions: list[str] = [],
|
|
415
|
+
startup_timeout: int = 30,
|
|
416
|
+
exit_grace_period: typing.Optional[int] = None,
|
|
417
|
+
h2_enabled: bool = False,
|
|
418
|
+
):
|
|
419
|
+
"""Decorator for Flash-enabled HTTP servers on Modal classes.
|
|
420
|
+
|
|
421
|
+
Args:
|
|
422
|
+
port: The local port to forward to the HTTP server.
|
|
423
|
+
proxy_regions: The regions to proxy the HTTP server to.
|
|
424
|
+
startup_timeout: The maximum time to wait for the HTTP server to start.
|
|
425
|
+
exit_grace_period: The time to wait for the HTTP server to exit gracefully.
|
|
426
|
+
"""
|
|
427
|
+
...
|
|
428
|
+
|
|
429
|
+
class _FlashContainerEntry:
|
|
430
|
+
"""A class that manages the lifecycle of Flash manager for Flash containers.
|
|
431
|
+
|
|
432
|
+
It is intentional that stop() runs before exit handlers and close().
|
|
433
|
+
This ensures the container is deregistered first, preventing new requests from being routed to it
|
|
434
|
+
while exit handlers execute and the exit grace period elapses, before finally closing the tunnel.
|
|
435
|
+
"""
|
|
436
|
+
def __init__(self, http_config: modal_proto.api_pb2.HTTPConfig):
|
|
437
|
+
"""Initialize self. See help(type(self)) for accurate signature."""
|
|
438
|
+
...
|
|
439
|
+
|
|
440
|
+
def enter(self): ...
|
|
441
|
+
def stop(self): ...
|
|
442
|
+
def close(self): ...
|