modal 1.0.6.dev39__py3-none-any.whl → 1.0.6.dev42__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of modal might be problematic. Click here for more details.
- modal/client.pyi +2 -2
- modal/experimental/__init__.py +1 -90
- modal/experimental/flash.py +449 -0
- modal/experimental/flash.pyi +252 -0
- modal/functions.pyi +6 -6
- modal/image.py +12 -1
- modal/image.pyi +15 -0
- {modal-1.0.6.dev39.dist-info → modal-1.0.6.dev42.dist-info}/METADATA +1 -1
- {modal-1.0.6.dev39.dist-info → modal-1.0.6.dev42.dist-info}/RECORD +14 -12
- modal_version/__init__.py +1 -1
- {modal-1.0.6.dev39.dist-info → modal-1.0.6.dev42.dist-info}/WHEEL +0 -0
- {modal-1.0.6.dev39.dist-info → modal-1.0.6.dev42.dist-info}/entry_points.txt +0 -0
- {modal-1.0.6.dev39.dist-info → modal-1.0.6.dev42.dist-info}/licenses/LICENSE +0 -0
- {modal-1.0.6.dev39.dist-info → modal-1.0.6.dev42.dist-info}/top_level.txt +0 -0
modal/client.pyi
CHANGED
|
@@ -33,7 +33,7 @@ class _Client:
|
|
|
33
33
|
server_url: str,
|
|
34
34
|
client_type: int,
|
|
35
35
|
credentials: typing.Optional[tuple[str, str]],
|
|
36
|
-
version: str = "1.0.6.
|
|
36
|
+
version: str = "1.0.6.dev42",
|
|
37
37
|
):
|
|
38
38
|
"""mdmd:hidden
|
|
39
39
|
The Modal client object is not intended to be instantiated directly by users.
|
|
@@ -163,7 +163,7 @@ class Client:
|
|
|
163
163
|
server_url: str,
|
|
164
164
|
client_type: int,
|
|
165
165
|
credentials: typing.Optional[tuple[str, str]],
|
|
166
|
-
version: str = "1.0.6.
|
|
166
|
+
version: str = "1.0.6.dev42",
|
|
167
167
|
):
|
|
168
168
|
"""mdmd:hidden
|
|
169
169
|
The Modal client object is not intended to be instantiated directly by users.
|
modal/experimental/__init__.py
CHANGED
|
@@ -1,7 +1,5 @@
|
|
|
1
1
|
# Copyright Modal Labs 2025
|
|
2
|
-
import asyncio
|
|
3
2
|
import os
|
|
4
|
-
import sys
|
|
5
3
|
from dataclasses import dataclass
|
|
6
4
|
from pathlib import Path
|
|
7
5
|
from typing import Literal, Optional, Union
|
|
@@ -13,17 +11,16 @@ from .._functions import _Function
|
|
|
13
11
|
from .._object import _get_environment_name
|
|
14
12
|
from .._partial_function import _clustered
|
|
15
13
|
from .._runtime.container_io_manager import _ContainerIOManager
|
|
16
|
-
from .._tunnel import _forward as _forward_tunnel
|
|
17
14
|
from .._utils.async_utils import synchronize_api, synchronizer
|
|
18
15
|
from .._utils.deprecation import deprecation_warning
|
|
19
16
|
from .._utils.grpc_utils import retry_transient_errors
|
|
20
17
|
from ..app import _App
|
|
21
18
|
from ..client import _Client
|
|
22
19
|
from ..cls import _Cls, _Obj
|
|
23
|
-
from ..config import logger
|
|
24
20
|
from ..exception import InvalidError
|
|
25
21
|
from ..image import DockerfileSpec, ImageBuilderVersion, _Image, _ImageRegistryConfig
|
|
26
22
|
from ..secret import _Secret
|
|
23
|
+
from .flash import flash_forward, flash_prometheus_autoscaler # noqa: F401
|
|
27
24
|
|
|
28
25
|
|
|
29
26
|
def stop_fetching_inputs():
|
|
@@ -259,89 +256,3 @@ async def update_autoscaler(
|
|
|
259
256
|
|
|
260
257
|
request = api_pb2.FunctionUpdateSchedulingParamsRequest(function_id=f.object_id, settings=settings)
|
|
261
258
|
await retry_transient_errors(client.stub.FunctionUpdateSchedulingParams, request)
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
class _FlashManager:
|
|
265
|
-
def __init__(self, client: _Client, port: int):
|
|
266
|
-
self.client = client
|
|
267
|
-
self.port = port
|
|
268
|
-
self.tunnel_manager = _forward_tunnel(port, client=client)
|
|
269
|
-
self.stopped = False
|
|
270
|
-
|
|
271
|
-
async def _start(self):
|
|
272
|
-
self.tunnel = await self.tunnel_manager.__aenter__()
|
|
273
|
-
|
|
274
|
-
hostname = self.tunnel.url.split("://")[1]
|
|
275
|
-
if ":" in hostname:
|
|
276
|
-
host, port = hostname.split(":")
|
|
277
|
-
else:
|
|
278
|
-
host = hostname
|
|
279
|
-
port = "443"
|
|
280
|
-
|
|
281
|
-
self.heartbeat_task = asyncio.create_task(self._run_heartbeat(host, int(port)))
|
|
282
|
-
|
|
283
|
-
async def _run_heartbeat(self, host: str, port: int):
|
|
284
|
-
first_registration = True
|
|
285
|
-
while True:
|
|
286
|
-
try:
|
|
287
|
-
resp = await self.client.stub.FlashContainerRegister(
|
|
288
|
-
api_pb2.FlashContainerRegisterRequest(
|
|
289
|
-
priority=10,
|
|
290
|
-
weight=5,
|
|
291
|
-
host=host,
|
|
292
|
-
port=port,
|
|
293
|
-
),
|
|
294
|
-
timeout=10,
|
|
295
|
-
)
|
|
296
|
-
if first_registration:
|
|
297
|
-
logger.warning(f"[Modal Flash] Listening at {resp.url}")
|
|
298
|
-
first_registration = False
|
|
299
|
-
except asyncio.CancelledError:
|
|
300
|
-
logger.warning("[Modal Flash] Shutting down...")
|
|
301
|
-
break
|
|
302
|
-
except Exception as e:
|
|
303
|
-
logger.error(f"[Modal Flash] Heartbeat failed: {e}")
|
|
304
|
-
|
|
305
|
-
try:
|
|
306
|
-
await asyncio.sleep(1)
|
|
307
|
-
except asyncio.CancelledError:
|
|
308
|
-
logger.warning("[Modal Flash] Shutting down...")
|
|
309
|
-
break
|
|
310
|
-
|
|
311
|
-
async def stop(self):
|
|
312
|
-
self.heartbeat_task.cancel()
|
|
313
|
-
await retry_transient_errors(
|
|
314
|
-
self.client.stub.FlashContainerDeregister,
|
|
315
|
-
api_pb2.FlashContainerDeregisterRequest(),
|
|
316
|
-
)
|
|
317
|
-
|
|
318
|
-
self.stopped = True
|
|
319
|
-
logger.warning(f"[Modal Flash] No longer accepting new requests on {self.tunnel.url}.")
|
|
320
|
-
|
|
321
|
-
# NOTE(gongy): We skip calling TunnelStop to avoid interrupting in-flight requests.
|
|
322
|
-
# It is up to the user to wait after calling .stop() to drain in-flight requests.
|
|
323
|
-
|
|
324
|
-
async def close(self):
|
|
325
|
-
if not self.stopped:
|
|
326
|
-
await self.stop()
|
|
327
|
-
|
|
328
|
-
logger.warning(f"[Modal Flash] Closing tunnel on {self.tunnel.url}.")
|
|
329
|
-
await self.tunnel_manager.__aexit__(*sys.exc_info())
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
FlashManager = synchronize_api(_FlashManager)
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
@synchronizer.create_blocking
|
|
336
|
-
async def flash_forward(port: int) -> _FlashManager:
|
|
337
|
-
"""
|
|
338
|
-
Forward a port to the Modal Flash service, exposing that port as a stable web endpoint.
|
|
339
|
-
|
|
340
|
-
This is a highly experimental method that can break or be removed at any time without warning.
|
|
341
|
-
Do not use this method unless explicitly instructed to do so by Modal support.
|
|
342
|
-
"""
|
|
343
|
-
client = await _Client.from_env()
|
|
344
|
-
|
|
345
|
-
manager = _FlashManager(client, port)
|
|
346
|
-
await manager._start()
|
|
347
|
-
return manager
|
|
@@ -0,0 +1,449 @@
|
|
|
1
|
+
# Copyright Modal Labs 2025
|
|
2
|
+
import asyncio
|
|
3
|
+
import math
|
|
4
|
+
import sys
|
|
5
|
+
import time
|
|
6
|
+
import traceback
|
|
7
|
+
from collections import defaultdict
|
|
8
|
+
from typing import Any, Optional
|
|
9
|
+
from urllib.parse import urlparse
|
|
10
|
+
|
|
11
|
+
from modal.cls import _Cls
|
|
12
|
+
from modal.dict import _Dict
|
|
13
|
+
from modal_proto import api_pb2
|
|
14
|
+
|
|
15
|
+
from .._tunnel import _forward as _forward_tunnel
|
|
16
|
+
from .._utils.async_utils import synchronize_api, synchronizer
|
|
17
|
+
from .._utils.grpc_utils import retry_transient_errors
|
|
18
|
+
from ..client import _Client
|
|
19
|
+
from ..config import logger
|
|
20
|
+
from ..exception import InvalidError
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class _FlashManager:
|
|
24
|
+
def __init__(self, client: _Client, port: int, health_check_url: Optional[str] = None):
|
|
25
|
+
self.client = client
|
|
26
|
+
self.port = port
|
|
27
|
+
self.health_check_url = health_check_url
|
|
28
|
+
self.tunnel_manager = _forward_tunnel(port, client=client)
|
|
29
|
+
self.stopped = False
|
|
30
|
+
|
|
31
|
+
async def _start(self):
|
|
32
|
+
self.tunnel = await self.tunnel_manager.__aenter__()
|
|
33
|
+
|
|
34
|
+
parsed_url = urlparse(self.tunnel.url)
|
|
35
|
+
host = parsed_url.hostname
|
|
36
|
+
port = parsed_url.port or 443
|
|
37
|
+
|
|
38
|
+
self.heartbeat_task = asyncio.create_task(self._run_heartbeat(host, port))
|
|
39
|
+
|
|
40
|
+
async def _run_heartbeat(self, host: str, port: int):
|
|
41
|
+
first_registration = True
|
|
42
|
+
while True:
|
|
43
|
+
try:
|
|
44
|
+
resp = await self.client.stub.FlashContainerRegister(
|
|
45
|
+
api_pb2.FlashContainerRegisterRequest(
|
|
46
|
+
priority=10,
|
|
47
|
+
weight=5,
|
|
48
|
+
host=host,
|
|
49
|
+
port=port,
|
|
50
|
+
),
|
|
51
|
+
timeout=10,
|
|
52
|
+
)
|
|
53
|
+
if first_registration:
|
|
54
|
+
logger.warning(f"[Modal Flash] Listening at {resp.url}")
|
|
55
|
+
first_registration = False
|
|
56
|
+
except asyncio.CancelledError:
|
|
57
|
+
logger.warning("[Modal Flash] Shutting down...")
|
|
58
|
+
break
|
|
59
|
+
except Exception as e:
|
|
60
|
+
logger.error(f"[Modal Flash] Heartbeat failed: {e}")
|
|
61
|
+
|
|
62
|
+
try:
|
|
63
|
+
await asyncio.sleep(1)
|
|
64
|
+
except asyncio.CancelledError:
|
|
65
|
+
logger.warning("[Modal Flash] Shutting down...")
|
|
66
|
+
break
|
|
67
|
+
|
|
68
|
+
def get_container_url(self):
|
|
69
|
+
# WARNING: Try not to use this method; we aren't sure if we will keep it.
|
|
70
|
+
return self.tunnel.url
|
|
71
|
+
|
|
72
|
+
async def stop(self):
|
|
73
|
+
self.heartbeat_task.cancel()
|
|
74
|
+
await retry_transient_errors(
|
|
75
|
+
self.client.stub.FlashContainerDeregister,
|
|
76
|
+
api_pb2.FlashContainerDeregisterRequest(),
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
self.stopped = True
|
|
80
|
+
logger.warning(f"[Modal Flash] No longer accepting new requests on {self.tunnel.url}.")
|
|
81
|
+
|
|
82
|
+
# NOTE(gongy): We skip calling TunnelStop to avoid interrupting in-flight requests.
|
|
83
|
+
# It is up to the user to wait after calling .stop() to drain in-flight requests.
|
|
84
|
+
|
|
85
|
+
async def close(self):
|
|
86
|
+
if not self.stopped:
|
|
87
|
+
await self.stop()
|
|
88
|
+
|
|
89
|
+
logger.warning(f"[Modal Flash] Closing tunnel on {self.tunnel.url}.")
|
|
90
|
+
await self.tunnel_manager.__aexit__(*sys.exc_info())
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
FlashManager = synchronize_api(_FlashManager)
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
@synchronizer.create_blocking
|
|
97
|
+
async def flash_forward(port: int, health_check_url: Optional[str] = None) -> _FlashManager:
|
|
98
|
+
"""
|
|
99
|
+
Forward a port to the Modal Flash service, exposing that port as a stable web endpoint.
|
|
100
|
+
|
|
101
|
+
This is a highly experimental method that can break or be removed at any time without warning.
|
|
102
|
+
Do not use this method unless explicitly instructed to do so by Modal support.
|
|
103
|
+
"""
|
|
104
|
+
client = await _Client.from_env()
|
|
105
|
+
|
|
106
|
+
manager = _FlashManager(client, port, health_check_url)
|
|
107
|
+
await manager._start()
|
|
108
|
+
return manager
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
class _FlashPrometheusAutoscaler:
|
|
112
|
+
_max_window_seconds = 60 * 60
|
|
113
|
+
|
|
114
|
+
def __init__(
|
|
115
|
+
self,
|
|
116
|
+
client: _Client,
|
|
117
|
+
app_name: str,
|
|
118
|
+
cls_name: str,
|
|
119
|
+
metrics_endpoint: str,
|
|
120
|
+
target_metric: str,
|
|
121
|
+
target_metric_value: float,
|
|
122
|
+
min_containers: Optional[int],
|
|
123
|
+
max_containers: Optional[int],
|
|
124
|
+
scale_up_tolerance: float,
|
|
125
|
+
scale_down_tolerance: float,
|
|
126
|
+
scale_up_stabilization_window_seconds: int,
|
|
127
|
+
scale_down_stabilization_window_seconds: int,
|
|
128
|
+
autoscaling_interval_seconds: int,
|
|
129
|
+
):
|
|
130
|
+
if scale_up_stabilization_window_seconds > self._max_window_seconds:
|
|
131
|
+
raise InvalidError(
|
|
132
|
+
f"scale_up_stabilization_window_seconds must be less than or equal to {self._max_window_seconds}"
|
|
133
|
+
)
|
|
134
|
+
if scale_down_stabilization_window_seconds > self._max_window_seconds:
|
|
135
|
+
raise InvalidError(
|
|
136
|
+
f"scale_down_stabilization_window_seconds must be less than or equal to {self._max_window_seconds}"
|
|
137
|
+
)
|
|
138
|
+
if target_metric_value <= 0:
|
|
139
|
+
raise InvalidError("target_metric_value must be greater than 0")
|
|
140
|
+
|
|
141
|
+
import aiohttp
|
|
142
|
+
|
|
143
|
+
self.client = client
|
|
144
|
+
self.app_name = app_name
|
|
145
|
+
self.cls_name = cls_name
|
|
146
|
+
self.metrics_endpoint = metrics_endpoint
|
|
147
|
+
self.target_metric = target_metric
|
|
148
|
+
self.target_metric_value = target_metric_value
|
|
149
|
+
self.min_containers = min_containers
|
|
150
|
+
self.max_containers = max_containers
|
|
151
|
+
self.scale_up_tolerance = scale_up_tolerance
|
|
152
|
+
self.scale_down_tolerance = scale_down_tolerance
|
|
153
|
+
self.scale_up_stabilization_window_seconds = scale_up_stabilization_window_seconds
|
|
154
|
+
self.scale_down_stabilization_window_seconds = scale_down_stabilization_window_seconds
|
|
155
|
+
self.autoscaling_interval_seconds = autoscaling_interval_seconds
|
|
156
|
+
|
|
157
|
+
FlashClass = _Cls.from_name(app_name, cls_name)
|
|
158
|
+
self.fn = FlashClass._class_service_function
|
|
159
|
+
self.cls = FlashClass()
|
|
160
|
+
|
|
161
|
+
self.http_client = aiohttp.ClientSession()
|
|
162
|
+
self.autoscaling_decisions_dict = _Dict.from_name(
|
|
163
|
+
f"{app_name}-{cls_name}-autoscaling-decisions",
|
|
164
|
+
create_if_missing=True,
|
|
165
|
+
)
|
|
166
|
+
|
|
167
|
+
self.autoscaler_thread = None
|
|
168
|
+
|
|
169
|
+
async def start(self):
|
|
170
|
+
await self.fn.hydrate(client=self.client)
|
|
171
|
+
self.autoscaler_thread = asyncio.create_task(self._run_autoscaler_loop())
|
|
172
|
+
|
|
173
|
+
async def _run_autoscaler_loop(self):
|
|
174
|
+
while True:
|
|
175
|
+
try:
|
|
176
|
+
autoscaling_time = time.time()
|
|
177
|
+
|
|
178
|
+
current_replicas = await self.autoscaling_decisions_dict.get("current_replicas", 0)
|
|
179
|
+
autoscaling_decisions = await self.autoscaling_decisions_dict.get("autoscaling_decisions", [])
|
|
180
|
+
if not isinstance(current_replicas, int):
|
|
181
|
+
logger.warning(f"[Modal Flash] Invalid item in autoscaling decisions: {current_replicas}")
|
|
182
|
+
current_replicas = 0
|
|
183
|
+
if not isinstance(autoscaling_decisions, list):
|
|
184
|
+
logger.warning(f"[Modal Flash] Invalid item in autoscaling decisions: {autoscaling_decisions}")
|
|
185
|
+
autoscaling_decisions = []
|
|
186
|
+
for item in autoscaling_decisions:
|
|
187
|
+
if (
|
|
188
|
+
not isinstance(item, tuple)
|
|
189
|
+
or len(item) != 2
|
|
190
|
+
or not isinstance(item[0], float)
|
|
191
|
+
or not isinstance(item[1], int)
|
|
192
|
+
):
|
|
193
|
+
logger.warning(f"[Modal Flash] Invalid item in autoscaling decisions: {item}")
|
|
194
|
+
autoscaling_decisions = []
|
|
195
|
+
break
|
|
196
|
+
|
|
197
|
+
autoscaling_decisions = [
|
|
198
|
+
(timestamp, decision)
|
|
199
|
+
for timestamp, decision in autoscaling_decisions
|
|
200
|
+
if timestamp >= autoscaling_time - self._max_window_seconds
|
|
201
|
+
]
|
|
202
|
+
|
|
203
|
+
current_target_containers = await self._compute_target_containers(current_replicas)
|
|
204
|
+
autoscaling_decisions.append((autoscaling_time, current_target_containers))
|
|
205
|
+
|
|
206
|
+
actual_target_containers = self._make_scaling_decision(
|
|
207
|
+
current_replicas,
|
|
208
|
+
autoscaling_decisions,
|
|
209
|
+
scale_up_stabilization_window_seconds=self.scale_up_stabilization_window_seconds,
|
|
210
|
+
scale_down_stabilization_window_seconds=self.scale_down_stabilization_window_seconds,
|
|
211
|
+
min_containers=self.min_containers,
|
|
212
|
+
max_containers=self.max_containers,
|
|
213
|
+
)
|
|
214
|
+
|
|
215
|
+
logger.warning(
|
|
216
|
+
f"[Modal Flash] Scaling to {actual_target_containers} containers. Autoscaling decision "
|
|
217
|
+
f"made in {time.time() - autoscaling_time} seconds."
|
|
218
|
+
)
|
|
219
|
+
|
|
220
|
+
await self.autoscaling_decisions_dict.put(
|
|
221
|
+
"autoscaling_decisions",
|
|
222
|
+
autoscaling_decisions,
|
|
223
|
+
)
|
|
224
|
+
await self.autoscaling_decisions_dict.put("current_replicas", actual_target_containers)
|
|
225
|
+
|
|
226
|
+
await self.cls.update_autoscaler(
|
|
227
|
+
min_containers=actual_target_containers,
|
|
228
|
+
max_containers=actual_target_containers,
|
|
229
|
+
)
|
|
230
|
+
|
|
231
|
+
if time.time() - autoscaling_time < self.autoscaling_interval_seconds:
|
|
232
|
+
await asyncio.sleep(self.autoscaling_interval_seconds - (time.time() - autoscaling_time))
|
|
233
|
+
except asyncio.CancelledError:
|
|
234
|
+
logger.warning("[Modal Flash] Shutting down autoscaler...")
|
|
235
|
+
await self.http_client.close()
|
|
236
|
+
break
|
|
237
|
+
except Exception as e:
|
|
238
|
+
logger.error(f"[Modal Flash] Error in autoscaler: {e}")
|
|
239
|
+
logger.error(traceback.format_exc())
|
|
240
|
+
await asyncio.sleep(self.autoscaling_interval_seconds)
|
|
241
|
+
|
|
242
|
+
async def _compute_target_containers(self, current_replicas: int) -> int:
|
|
243
|
+
containers = await self._get_all_containers()
|
|
244
|
+
if len(containers) > current_replicas:
|
|
245
|
+
logger.info(
|
|
246
|
+
f"[Modal Flash] Current replicas {current_replicas} is less than the number of containers "
|
|
247
|
+
f"{len(containers)}. Setting current_replicas = num_containers."
|
|
248
|
+
)
|
|
249
|
+
current_replicas = len(containers)
|
|
250
|
+
|
|
251
|
+
if current_replicas == 0:
|
|
252
|
+
return 1
|
|
253
|
+
|
|
254
|
+
target_metric = self.target_metric
|
|
255
|
+
target_metric_value = float(self.target_metric_value)
|
|
256
|
+
|
|
257
|
+
sum_metric = 0
|
|
258
|
+
containers_with_metrics = 0
|
|
259
|
+
container_metrics_list = await asyncio.gather(
|
|
260
|
+
*[
|
|
261
|
+
self._get_metrics(f"https://{container.host}:{container.port}/{self.metrics_endpoint}")
|
|
262
|
+
for container in containers
|
|
263
|
+
]
|
|
264
|
+
)
|
|
265
|
+
for container_metrics in container_metrics_list:
|
|
266
|
+
if (
|
|
267
|
+
container_metrics is None
|
|
268
|
+
or target_metric not in container_metrics
|
|
269
|
+
or len(container_metrics[target_metric]) == 0
|
|
270
|
+
):
|
|
271
|
+
continue
|
|
272
|
+
sum_metric += container_metrics[target_metric][0].value
|
|
273
|
+
containers_with_metrics += 1
|
|
274
|
+
|
|
275
|
+
n_containers_missing_metric = current_replicas - containers_with_metrics
|
|
276
|
+
|
|
277
|
+
# Scale up / down conservatively: Any container that is missing the metric is assumed to be at the minimum
|
|
278
|
+
# value of the metric when scaling up and the maximum value of the metric when scaling down.
|
|
279
|
+
scale_up_target_metric_value = sum_metric / current_replicas
|
|
280
|
+
scale_down_target_metric_value = (
|
|
281
|
+
sum_metric + n_containers_missing_metric * target_metric_value
|
|
282
|
+
) / current_replicas
|
|
283
|
+
|
|
284
|
+
scale_up_ratio = scale_up_target_metric_value / target_metric_value
|
|
285
|
+
scale_down_ratio = scale_down_target_metric_value / target_metric_value
|
|
286
|
+
|
|
287
|
+
desired_replicas = current_replicas
|
|
288
|
+
if scale_up_ratio > 1 + self.scale_up_tolerance:
|
|
289
|
+
desired_replicas = math.ceil(current_replicas * scale_up_ratio)
|
|
290
|
+
elif scale_down_ratio < 1 - self.scale_down_tolerance:
|
|
291
|
+
desired_replicas = math.ceil(current_replicas * scale_down_ratio)
|
|
292
|
+
|
|
293
|
+
logger.warning(
|
|
294
|
+
f"[Modal Flash] Current replicas: {current_replicas}, target metric value: {target_metric_value}, "
|
|
295
|
+
f"current sum of metric values: {sum_metric}, number of containers missing metric: "
|
|
296
|
+
f"{n_containers_missing_metric}, scale up ratio: {scale_up_ratio}, scale down ratio: {scale_down_ratio}, "
|
|
297
|
+
f"desired replicas: {desired_replicas}"
|
|
298
|
+
)
|
|
299
|
+
|
|
300
|
+
return desired_replicas
|
|
301
|
+
|
|
302
|
+
async def _get_metrics(self, url: str) -> Optional[dict[str, list[Any]]]: # technically any should be Sample
|
|
303
|
+
from prometheus_client.parser import Sample, text_string_to_metric_families
|
|
304
|
+
|
|
305
|
+
# Fetch the metrics from the endpoint
|
|
306
|
+
try:
|
|
307
|
+
response = await self.http_client.get(url)
|
|
308
|
+
response.raise_for_status()
|
|
309
|
+
except Exception as e:
|
|
310
|
+
logger.warning(f"[Modal Flash] Error getting metrics from {url}: {e}")
|
|
311
|
+
return None
|
|
312
|
+
|
|
313
|
+
# Parse the text-based Prometheus metrics format
|
|
314
|
+
metrics: dict[str, list[Sample]] = defaultdict(list)
|
|
315
|
+
for family in text_string_to_metric_families(await response.text()):
|
|
316
|
+
for sample in family.samples:
|
|
317
|
+
metrics[sample.name] += [sample]
|
|
318
|
+
|
|
319
|
+
return metrics
|
|
320
|
+
|
|
321
|
+
async def _get_all_containers(self):
|
|
322
|
+
req = api_pb2.FlashContainerListRequest(function_id=self.fn.object_id)
|
|
323
|
+
resp = await retry_transient_errors(self.client.stub.FlashContainerList, req)
|
|
324
|
+
return resp.containers
|
|
325
|
+
|
|
326
|
+
def _make_scaling_decision(
|
|
327
|
+
self,
|
|
328
|
+
current_replicas: int,
|
|
329
|
+
autoscaling_decisions: list[tuple[float, int]],
|
|
330
|
+
scale_up_stabilization_window_seconds: int = 0,
|
|
331
|
+
scale_down_stabilization_window_seconds: int = 60 * 5,
|
|
332
|
+
min_containers: Optional[int] = None,
|
|
333
|
+
max_containers: Optional[int] = None,
|
|
334
|
+
) -> int:
|
|
335
|
+
"""
|
|
336
|
+
Return the target number of containers following (simplified) Kubernetes HPA
|
|
337
|
+
stabilization-window semantics.
|
|
338
|
+
|
|
339
|
+
Args:
|
|
340
|
+
current_replicas: Current number of running Pods/containers.
|
|
341
|
+
autoscaling_decisions: List of (timestamp, desired_replicas) pairs, where
|
|
342
|
+
timestamp is a UNIX epoch float (seconds).
|
|
343
|
+
The list *must* contain at least one entry and should
|
|
344
|
+
already include the most-recent measurement.
|
|
345
|
+
scale_up_stabilization_window_seconds: 0 disables the up-window.
|
|
346
|
+
scale_down_stabilization_window_seconds: 0 disables the down-window.
|
|
347
|
+
min_containers / max_containers: Clamp the final decision to this range.
|
|
348
|
+
|
|
349
|
+
Returns:
|
|
350
|
+
The target number of containers.
|
|
351
|
+
"""
|
|
352
|
+
if not autoscaling_decisions:
|
|
353
|
+
# Without data we can’t make a new decision – stay where we are.
|
|
354
|
+
return current_replicas
|
|
355
|
+
|
|
356
|
+
# Sort just once in case the caller didn’t: newest record is last.
|
|
357
|
+
autoscaling_decisions.sort(key=lambda rec: rec[0])
|
|
358
|
+
now_ts, latest_desired = autoscaling_decisions[-1]
|
|
359
|
+
|
|
360
|
+
if latest_desired > current_replicas:
|
|
361
|
+
# ---- SCALE-UP path ----
|
|
362
|
+
window_start = now_ts - scale_up_stabilization_window_seconds
|
|
363
|
+
# Consider only records *inside* the window.
|
|
364
|
+
desired_candidates = [desired for ts, desired in autoscaling_decisions if ts >= window_start]
|
|
365
|
+
# Use the *minimum* so that any temporary dip blocks the scale-up.
|
|
366
|
+
candidate = min(desired_candidates) if desired_candidates else latest_desired
|
|
367
|
+
new_replicas = max(current_replicas, candidate) # never scale *down* here
|
|
368
|
+
elif latest_desired < current_replicas:
|
|
369
|
+
# ---- SCALE-DOWN path ----
|
|
370
|
+
window_start = now_ts - scale_down_stabilization_window_seconds
|
|
371
|
+
desired_candidates = [desired for ts, desired in autoscaling_decisions if ts >= window_start]
|
|
372
|
+
# Use the *maximum* so that any temporary spike blocks the scale-down.
|
|
373
|
+
candidate = max(desired_candidates) if desired_candidates else latest_desired
|
|
374
|
+
new_replicas = min(current_replicas, candidate) # never scale *up* here
|
|
375
|
+
else:
|
|
376
|
+
# No change requested.
|
|
377
|
+
new_replicas = current_replicas
|
|
378
|
+
|
|
379
|
+
# Clamp to [min_containers, max_containers].
|
|
380
|
+
if min_containers is not None:
|
|
381
|
+
new_replicas = max(min_containers, new_replicas)
|
|
382
|
+
if max_containers is not None:
|
|
383
|
+
new_replicas = min(max_containers, new_replicas)
|
|
384
|
+
return new_replicas
|
|
385
|
+
|
|
386
|
+
async def stop(self):
|
|
387
|
+
self.autoscaler_thread.cancel()
|
|
388
|
+
await self.autoscaler_thread
|
|
389
|
+
|
|
390
|
+
|
|
391
|
+
FlashPrometheusAutoscaler = synchronize_api(_FlashPrometheusAutoscaler)
|
|
392
|
+
|
|
393
|
+
|
|
394
|
+
@synchronizer.create_blocking
|
|
395
|
+
async def flash_prometheus_autoscaler(
|
|
396
|
+
app_name: str,
|
|
397
|
+
cls_name: str,
|
|
398
|
+
# Endpoint to fetch metrics from. Must be in Prometheus format. Example: "/metrics"
|
|
399
|
+
metrics_endpoint: str,
|
|
400
|
+
# Target metric to autoscale on. Example: "vllm:num_requests_running"
|
|
401
|
+
target_metric: str,
|
|
402
|
+
# Target metric value. Example: 25
|
|
403
|
+
target_metric_value: float,
|
|
404
|
+
min_containers: Optional[int] = None,
|
|
405
|
+
max_containers: Optional[int] = None,
|
|
406
|
+
# Corresponds to https://kubernetes.io/docs/tasks/run-application/horizontal-pod-autoscale/#tolerance
|
|
407
|
+
scale_up_tolerance: float = 0.1,
|
|
408
|
+
# Corresponds to https://kubernetes.io/docs/tasks/run-application/horizontal-pod-autoscale/#tolerance
|
|
409
|
+
scale_down_tolerance: float = 0.1,
|
|
410
|
+
# Corresponds to https://kubernetes.io/docs/tasks/run-application/horizontal-pod-autoscale/#stabilization-window
|
|
411
|
+
scale_up_stabilization_window_seconds: int = 0,
|
|
412
|
+
# Corresponds to https://kubernetes.io/docs/tasks/run-application/horizontal-pod-autoscale/#stabilization-window
|
|
413
|
+
scale_down_stabilization_window_seconds: int = 300,
|
|
414
|
+
# How often to make autoscaling decisions.
|
|
415
|
+
# Corresponds to --horizontal-pod-autoscaler-sync-period in Kubernetes.
|
|
416
|
+
autoscaling_interval_seconds: int = 15,
|
|
417
|
+
) -> _FlashPrometheusAutoscaler:
|
|
418
|
+
"""
|
|
419
|
+
Autoscale a Flash service based on containers' Prometheus metrics.
|
|
420
|
+
|
|
421
|
+
The package `prometheus_client` is required to use this method.
|
|
422
|
+
|
|
423
|
+
This is a highly experimental method that can break or be removed at any time without warning.
|
|
424
|
+
Do not use this method unless explicitly instructed to do so by Modal support.
|
|
425
|
+
"""
|
|
426
|
+
|
|
427
|
+
try:
|
|
428
|
+
import prometheus_client # noqa: F401
|
|
429
|
+
except ImportError:
|
|
430
|
+
raise ImportError("The package `prometheus_client` is required to use this method.")
|
|
431
|
+
|
|
432
|
+
client = await _Client.from_env()
|
|
433
|
+
autoscaler = _FlashPrometheusAutoscaler(
|
|
434
|
+
client,
|
|
435
|
+
app_name,
|
|
436
|
+
cls_name,
|
|
437
|
+
metrics_endpoint,
|
|
438
|
+
target_metric,
|
|
439
|
+
target_metric_value,
|
|
440
|
+
min_containers,
|
|
441
|
+
max_containers,
|
|
442
|
+
scale_up_tolerance,
|
|
443
|
+
scale_down_tolerance,
|
|
444
|
+
scale_up_stabilization_window_seconds,
|
|
445
|
+
scale_down_stabilization_window_seconds,
|
|
446
|
+
autoscaling_interval_seconds,
|
|
447
|
+
)
|
|
448
|
+
await autoscaler.start()
|
|
449
|
+
return autoscaler
|
|
@@ -0,0 +1,252 @@
|
|
|
1
|
+
import modal.client
|
|
2
|
+
import typing
|
|
3
|
+
import typing_extensions
|
|
4
|
+
|
|
5
|
+
class _FlashManager:
|
|
6
|
+
def __init__(self, client: modal.client._Client, port: int, health_check_url: typing.Optional[str] = None):
|
|
7
|
+
"""Initialize self. See help(type(self)) for accurate signature."""
|
|
8
|
+
...
|
|
9
|
+
|
|
10
|
+
async def _start(self): ...
|
|
11
|
+
async def _run_heartbeat(self, host: str, port: int): ...
|
|
12
|
+
def get_container_url(self): ...
|
|
13
|
+
async def stop(self): ...
|
|
14
|
+
async def close(self): ...
|
|
15
|
+
|
|
16
|
+
SUPERSELF = typing.TypeVar("SUPERSELF", covariant=True)
|
|
17
|
+
|
|
18
|
+
class FlashManager:
|
|
19
|
+
def __init__(self, client: modal.client.Client, port: int, health_check_url: typing.Optional[str] = None): ...
|
|
20
|
+
|
|
21
|
+
class ___start_spec(typing_extensions.Protocol[SUPERSELF]):
|
|
22
|
+
def __call__(self, /): ...
|
|
23
|
+
async def aio(self, /): ...
|
|
24
|
+
|
|
25
|
+
_start: ___start_spec[typing_extensions.Self]
|
|
26
|
+
|
|
27
|
+
class ___run_heartbeat_spec(typing_extensions.Protocol[SUPERSELF]):
|
|
28
|
+
def __call__(self, /, host: str, port: int): ...
|
|
29
|
+
async def aio(self, /, host: str, port: int): ...
|
|
30
|
+
|
|
31
|
+
_run_heartbeat: ___run_heartbeat_spec[typing_extensions.Self]
|
|
32
|
+
|
|
33
|
+
def get_container_url(self): ...
|
|
34
|
+
|
|
35
|
+
class __stop_spec(typing_extensions.Protocol[SUPERSELF]):
|
|
36
|
+
def __call__(self, /): ...
|
|
37
|
+
async def aio(self, /): ...
|
|
38
|
+
|
|
39
|
+
stop: __stop_spec[typing_extensions.Self]
|
|
40
|
+
|
|
41
|
+
class __close_spec(typing_extensions.Protocol[SUPERSELF]):
|
|
42
|
+
def __call__(self, /): ...
|
|
43
|
+
async def aio(self, /): ...
|
|
44
|
+
|
|
45
|
+
close: __close_spec[typing_extensions.Self]
|
|
46
|
+
|
|
47
|
+
class __flash_forward_spec(typing_extensions.Protocol):
|
|
48
|
+
def __call__(self, /, port: int, health_check_url: typing.Optional[str] = None) -> FlashManager:
|
|
49
|
+
"""Forward a port to the Modal Flash service, exposing that port as a stable web endpoint.
|
|
50
|
+
|
|
51
|
+
This is a highly experimental method that can break or be removed at any time without warning.
|
|
52
|
+
Do not use this method unless explicitly instructed to do so by Modal support.
|
|
53
|
+
"""
|
|
54
|
+
...
|
|
55
|
+
|
|
56
|
+
async def aio(self, /, port: int, health_check_url: typing.Optional[str] = None) -> FlashManager:
|
|
57
|
+
"""Forward a port to the Modal Flash service, exposing that port as a stable web endpoint.
|
|
58
|
+
|
|
59
|
+
This is a highly experimental method that can break or be removed at any time without warning.
|
|
60
|
+
Do not use this method unless explicitly instructed to do so by Modal support.
|
|
61
|
+
"""
|
|
62
|
+
...
|
|
63
|
+
|
|
64
|
+
flash_forward: __flash_forward_spec
|
|
65
|
+
|
|
66
|
+
class _FlashPrometheusAutoscaler:
|
|
67
|
+
def __init__(
|
|
68
|
+
self,
|
|
69
|
+
client: modal.client._Client,
|
|
70
|
+
app_name: str,
|
|
71
|
+
cls_name: str,
|
|
72
|
+
metrics_endpoint: str,
|
|
73
|
+
target_metric: str,
|
|
74
|
+
target_metric_value: float,
|
|
75
|
+
min_containers: typing.Optional[int],
|
|
76
|
+
max_containers: typing.Optional[int],
|
|
77
|
+
scale_up_tolerance: float,
|
|
78
|
+
scale_down_tolerance: float,
|
|
79
|
+
scale_up_stabilization_window_seconds: int,
|
|
80
|
+
scale_down_stabilization_window_seconds: int,
|
|
81
|
+
autoscaling_interval_seconds: int,
|
|
82
|
+
):
|
|
83
|
+
"""Initialize self. See help(type(self)) for accurate signature."""
|
|
84
|
+
...
|
|
85
|
+
|
|
86
|
+
async def start(self): ...
|
|
87
|
+
async def _run_autoscaler_loop(self): ...
|
|
88
|
+
async def _compute_target_containers(self, current_replicas: int) -> int: ...
|
|
89
|
+
async def _get_metrics(self, url: str) -> typing.Optional[dict[str, list[typing.Any]]]: ...
|
|
90
|
+
async def _get_all_containers(self): ...
|
|
91
|
+
def _make_scaling_decision(
|
|
92
|
+
self,
|
|
93
|
+
current_replicas: int,
|
|
94
|
+
autoscaling_decisions: list[tuple[float, int]],
|
|
95
|
+
scale_up_stabilization_window_seconds: int = 0,
|
|
96
|
+
scale_down_stabilization_window_seconds: int = 300,
|
|
97
|
+
min_containers: typing.Optional[int] = None,
|
|
98
|
+
max_containers: typing.Optional[int] = None,
|
|
99
|
+
) -> int:
|
|
100
|
+
"""Return the target number of containers following (simplified) Kubernetes HPA
|
|
101
|
+
stabilization-window semantics.
|
|
102
|
+
|
|
103
|
+
Args:
|
|
104
|
+
current_replicas: Current number of running Pods/containers.
|
|
105
|
+
autoscaling_decisions: List of (timestamp, desired_replicas) pairs, where
|
|
106
|
+
timestamp is a UNIX epoch float (seconds).
|
|
107
|
+
The list *must* contain at least one entry and should
|
|
108
|
+
already include the most-recent measurement.
|
|
109
|
+
scale_up_stabilization_window_seconds: 0 disables the up-window.
|
|
110
|
+
scale_down_stabilization_window_seconds: 0 disables the down-window.
|
|
111
|
+
min_containers / max_containers: Clamp the final decision to this range.
|
|
112
|
+
|
|
113
|
+
Returns:
|
|
114
|
+
The target number of containers.
|
|
115
|
+
"""
|
|
116
|
+
...
|
|
117
|
+
|
|
118
|
+
async def stop(self): ...
|
|
119
|
+
|
|
120
|
+
class FlashPrometheusAutoscaler:
|
|
121
|
+
def __init__(
|
|
122
|
+
self,
|
|
123
|
+
client: modal.client.Client,
|
|
124
|
+
app_name: str,
|
|
125
|
+
cls_name: str,
|
|
126
|
+
metrics_endpoint: str,
|
|
127
|
+
target_metric: str,
|
|
128
|
+
target_metric_value: float,
|
|
129
|
+
min_containers: typing.Optional[int],
|
|
130
|
+
max_containers: typing.Optional[int],
|
|
131
|
+
scale_up_tolerance: float,
|
|
132
|
+
scale_down_tolerance: float,
|
|
133
|
+
scale_up_stabilization_window_seconds: int,
|
|
134
|
+
scale_down_stabilization_window_seconds: int,
|
|
135
|
+
autoscaling_interval_seconds: int,
|
|
136
|
+
): ...
|
|
137
|
+
|
|
138
|
+
class __start_spec(typing_extensions.Protocol[SUPERSELF]):
|
|
139
|
+
def __call__(self, /): ...
|
|
140
|
+
async def aio(self, /): ...
|
|
141
|
+
|
|
142
|
+
start: __start_spec[typing_extensions.Self]
|
|
143
|
+
|
|
144
|
+
class ___run_autoscaler_loop_spec(typing_extensions.Protocol[SUPERSELF]):
|
|
145
|
+
def __call__(self, /): ...
|
|
146
|
+
async def aio(self, /): ...
|
|
147
|
+
|
|
148
|
+
_run_autoscaler_loop: ___run_autoscaler_loop_spec[typing_extensions.Self]
|
|
149
|
+
|
|
150
|
+
class ___compute_target_containers_spec(typing_extensions.Protocol[SUPERSELF]):
|
|
151
|
+
def __call__(self, /, current_replicas: int) -> int: ...
|
|
152
|
+
async def aio(self, /, current_replicas: int) -> int: ...
|
|
153
|
+
|
|
154
|
+
_compute_target_containers: ___compute_target_containers_spec[typing_extensions.Self]
|
|
155
|
+
|
|
156
|
+
class ___get_metrics_spec(typing_extensions.Protocol[SUPERSELF]):
|
|
157
|
+
def __call__(self, /, url: str) -> typing.Optional[dict[str, list[typing.Any]]]: ...
|
|
158
|
+
async def aio(self, /, url: str) -> typing.Optional[dict[str, list[typing.Any]]]: ...
|
|
159
|
+
|
|
160
|
+
_get_metrics: ___get_metrics_spec[typing_extensions.Self]
|
|
161
|
+
|
|
162
|
+
class ___get_all_containers_spec(typing_extensions.Protocol[SUPERSELF]):
|
|
163
|
+
def __call__(self, /): ...
|
|
164
|
+
async def aio(self, /): ...
|
|
165
|
+
|
|
166
|
+
_get_all_containers: ___get_all_containers_spec[typing_extensions.Self]
|
|
167
|
+
|
|
168
|
+
def _make_scaling_decision(
|
|
169
|
+
self,
|
|
170
|
+
current_replicas: int,
|
|
171
|
+
autoscaling_decisions: list[tuple[float, int]],
|
|
172
|
+
scale_up_stabilization_window_seconds: int = 0,
|
|
173
|
+
scale_down_stabilization_window_seconds: int = 300,
|
|
174
|
+
min_containers: typing.Optional[int] = None,
|
|
175
|
+
max_containers: typing.Optional[int] = None,
|
|
176
|
+
) -> int:
|
|
177
|
+
"""Return the target number of containers following (simplified) Kubernetes HPA
|
|
178
|
+
stabilization-window semantics.
|
|
179
|
+
|
|
180
|
+
Args:
|
|
181
|
+
current_replicas: Current number of running Pods/containers.
|
|
182
|
+
autoscaling_decisions: List of (timestamp, desired_replicas) pairs, where
|
|
183
|
+
timestamp is a UNIX epoch float (seconds).
|
|
184
|
+
The list *must* contain at least one entry and should
|
|
185
|
+
already include the most-recent measurement.
|
|
186
|
+
scale_up_stabilization_window_seconds: 0 disables the up-window.
|
|
187
|
+
scale_down_stabilization_window_seconds: 0 disables the down-window.
|
|
188
|
+
min_containers / max_containers: Clamp the final decision to this range.
|
|
189
|
+
|
|
190
|
+
Returns:
|
|
191
|
+
The target number of containers.
|
|
192
|
+
"""
|
|
193
|
+
...
|
|
194
|
+
|
|
195
|
+
class __stop_spec(typing_extensions.Protocol[SUPERSELF]):
|
|
196
|
+
def __call__(self, /): ...
|
|
197
|
+
async def aio(self, /): ...
|
|
198
|
+
|
|
199
|
+
stop: __stop_spec[typing_extensions.Self]
|
|
200
|
+
|
|
201
|
+
class __flash_prometheus_autoscaler_spec(typing_extensions.Protocol):
|
|
202
|
+
def __call__(
|
|
203
|
+
self,
|
|
204
|
+
/,
|
|
205
|
+
app_name: str,
|
|
206
|
+
cls_name: str,
|
|
207
|
+
metrics_endpoint: str,
|
|
208
|
+
target_metric: str,
|
|
209
|
+
target_metric_value: float,
|
|
210
|
+
min_containers: typing.Optional[int] = None,
|
|
211
|
+
max_containers: typing.Optional[int] = None,
|
|
212
|
+
scale_up_tolerance: float = 0.1,
|
|
213
|
+
scale_down_tolerance: float = 0.1,
|
|
214
|
+
scale_up_stabilization_window_seconds: int = 0,
|
|
215
|
+
scale_down_stabilization_window_seconds: int = 300,
|
|
216
|
+
autoscaling_interval_seconds: int = 15,
|
|
217
|
+
) -> FlashPrometheusAutoscaler:
|
|
218
|
+
"""Autoscale a Flash service based on containers' Prometheus metrics.
|
|
219
|
+
|
|
220
|
+
The package `prometheus_client` is required to use this method.
|
|
221
|
+
|
|
222
|
+
This is a highly experimental method that can break or be removed at any time without warning.
|
|
223
|
+
Do not use this method unless explicitly instructed to do so by Modal support.
|
|
224
|
+
"""
|
|
225
|
+
...
|
|
226
|
+
|
|
227
|
+
async def aio(
|
|
228
|
+
self,
|
|
229
|
+
/,
|
|
230
|
+
app_name: str,
|
|
231
|
+
cls_name: str,
|
|
232
|
+
metrics_endpoint: str,
|
|
233
|
+
target_metric: str,
|
|
234
|
+
target_metric_value: float,
|
|
235
|
+
min_containers: typing.Optional[int] = None,
|
|
236
|
+
max_containers: typing.Optional[int] = None,
|
|
237
|
+
scale_up_tolerance: float = 0.1,
|
|
238
|
+
scale_down_tolerance: float = 0.1,
|
|
239
|
+
scale_up_stabilization_window_seconds: int = 0,
|
|
240
|
+
scale_down_stabilization_window_seconds: int = 300,
|
|
241
|
+
autoscaling_interval_seconds: int = 15,
|
|
242
|
+
) -> FlashPrometheusAutoscaler:
|
|
243
|
+
"""Autoscale a Flash service based on containers' Prometheus metrics.
|
|
244
|
+
|
|
245
|
+
The package `prometheus_client` is required to use this method.
|
|
246
|
+
|
|
247
|
+
This is a highly experimental method that can break or be removed at any time without warning.
|
|
248
|
+
Do not use this method unless explicitly instructed to do so by Modal support.
|
|
249
|
+
"""
|
|
250
|
+
...
|
|
251
|
+
|
|
252
|
+
flash_prometheus_autoscaler: __flash_prometheus_autoscaler_spec
|
modal/functions.pyi
CHANGED
|
@@ -428,7 +428,7 @@ class Function(
|
|
|
428
428
|
|
|
429
429
|
_call_generator: ___call_generator_spec[typing_extensions.Self]
|
|
430
430
|
|
|
431
|
-
class __remote_spec(typing_extensions.Protocol[
|
|
431
|
+
class __remote_spec(typing_extensions.Protocol[P_INNER, ReturnType_INNER, SUPERSELF]):
|
|
432
432
|
def __call__(self, /, *args: P_INNER.args, **kwargs: P_INNER.kwargs) -> ReturnType_INNER:
|
|
433
433
|
"""Calls the function remotely, executing it with the given arguments and returning the execution's result."""
|
|
434
434
|
...
|
|
@@ -437,7 +437,7 @@ class Function(
|
|
|
437
437
|
"""Calls the function remotely, executing it with the given arguments and returning the execution's result."""
|
|
438
438
|
...
|
|
439
439
|
|
|
440
|
-
remote: __remote_spec[modal._functions.
|
|
440
|
+
remote: __remote_spec[modal._functions.P, modal._functions.ReturnType, typing_extensions.Self]
|
|
441
441
|
|
|
442
442
|
class __remote_gen_spec(typing_extensions.Protocol[SUPERSELF]):
|
|
443
443
|
def __call__(self, /, *args, **kwargs) -> typing.Generator[typing.Any, None, None]:
|
|
@@ -464,7 +464,7 @@ class Function(
|
|
|
464
464
|
"""
|
|
465
465
|
...
|
|
466
466
|
|
|
467
|
-
class ___experimental_spawn_spec(typing_extensions.Protocol[
|
|
467
|
+
class ___experimental_spawn_spec(typing_extensions.Protocol[P_INNER, ReturnType_INNER, SUPERSELF]):
|
|
468
468
|
def __call__(self, /, *args: P_INNER.args, **kwargs: P_INNER.kwargs) -> FunctionCall[ReturnType_INNER]:
|
|
469
469
|
"""[Experimental] Calls the function with the given arguments, without waiting for the results.
|
|
470
470
|
|
|
@@ -488,7 +488,7 @@ class Function(
|
|
|
488
488
|
...
|
|
489
489
|
|
|
490
490
|
_experimental_spawn: ___experimental_spawn_spec[
|
|
491
|
-
modal._functions.
|
|
491
|
+
modal._functions.P, modal._functions.ReturnType, typing_extensions.Self
|
|
492
492
|
]
|
|
493
493
|
|
|
494
494
|
class ___spawn_map_inner_spec(typing_extensions.Protocol[P_INNER, SUPERSELF]):
|
|
@@ -497,7 +497,7 @@ class Function(
|
|
|
497
497
|
|
|
498
498
|
_spawn_map_inner: ___spawn_map_inner_spec[modal._functions.P, typing_extensions.Self]
|
|
499
499
|
|
|
500
|
-
class __spawn_spec(typing_extensions.Protocol[
|
|
500
|
+
class __spawn_spec(typing_extensions.Protocol[P_INNER, ReturnType_INNER, SUPERSELF]):
|
|
501
501
|
def __call__(self, /, *args: P_INNER.args, **kwargs: P_INNER.kwargs) -> FunctionCall[ReturnType_INNER]:
|
|
502
502
|
"""Calls the function with the given arguments, without waiting for the results.
|
|
503
503
|
|
|
@@ -518,7 +518,7 @@ class Function(
|
|
|
518
518
|
"""
|
|
519
519
|
...
|
|
520
520
|
|
|
521
|
-
spawn: __spawn_spec[modal._functions.
|
|
521
|
+
spawn: __spawn_spec[modal._functions.P, modal._functions.ReturnType, typing_extensions.Self]
|
|
522
522
|
|
|
523
523
|
def get_raw_f(self) -> collections.abc.Callable[..., typing.Any]:
|
|
524
524
|
"""Return the inner Python object wrapped by this Modal Function."""
|
modal/image.py
CHANGED
|
@@ -23,6 +23,7 @@ from typing import (
|
|
|
23
23
|
|
|
24
24
|
from google.protobuf.message import Message
|
|
25
25
|
from grpclib.exceptions import GRPCError, StreamTerminatedError
|
|
26
|
+
from typing_extensions import Self
|
|
26
27
|
|
|
27
28
|
from modal_proto import api_pb2
|
|
28
29
|
|
|
@@ -42,7 +43,7 @@ from .client import _Client
|
|
|
42
43
|
from .cloud_bucket_mount import _CloudBucketMount
|
|
43
44
|
from .config import config, logger, user_config_path
|
|
44
45
|
from .environments import _get_environment_cached
|
|
45
|
-
from .exception import InvalidError, NotFoundError, RemoteError, VersionError
|
|
46
|
+
from .exception import ExecutionError, InvalidError, NotFoundError, RemoteError, VersionError
|
|
46
47
|
from .file_pattern_matcher import NON_PYTHON_FILES, FilePatternMatcher, _ignore_fn
|
|
47
48
|
from .gpu import GPU_T, parse_gpu_config
|
|
48
49
|
from .mount import _Mount, python_standalone_mount_name
|
|
@@ -2295,5 +2296,15 @@ class _Image(_Object, type_prefix="im"):
|
|
|
2295
2296
|
if task_log.data:
|
|
2296
2297
|
yield task_log.data
|
|
2297
2298
|
|
|
2299
|
+
async def hydrate(self, client: Optional[_Client] = None) -> Self:
|
|
2300
|
+
"""mdmd:hidden"""
|
|
2301
|
+
# Image inherits hydrate() from Object but can't be hydrated on demand
|
|
2302
|
+
# Overriding the method lets us hide it from the docs and raise a better error message
|
|
2303
|
+
if not self.is_hydrated:
|
|
2304
|
+
raise ExecutionError(
|
|
2305
|
+
"Images cannot currently be hydrated on demand; you can build an Image by running an App that uses it."
|
|
2306
|
+
)
|
|
2307
|
+
return self
|
|
2308
|
+
|
|
2298
2309
|
|
|
2299
2310
|
Image = synchronize_api(_Image)
|
modal/image.pyi
CHANGED
|
@@ -940,6 +940,10 @@ class _Image(modal._object._Object):
|
|
|
940
940
|
"""
|
|
941
941
|
...
|
|
942
942
|
|
|
943
|
+
async def hydrate(self, client: typing.Optional[modal.client._Client] = None) -> typing_extensions.Self:
|
|
944
|
+
"""mdmd:hidden"""
|
|
945
|
+
...
|
|
946
|
+
|
|
943
947
|
SUPERSELF = typing.TypeVar("SUPERSELF", covariant=True)
|
|
944
948
|
|
|
945
949
|
class Image(modal.object.Object):
|
|
@@ -1781,4 +1785,15 @@ class Image(modal.object.Object):
|
|
|
1781
1785
|
|
|
1782
1786
|
_logs: ___logs_spec[typing_extensions.Self]
|
|
1783
1787
|
|
|
1788
|
+
class __hydrate_spec(typing_extensions.Protocol[SUPERSELF]):
|
|
1789
|
+
def __call__(self, /, client: typing.Optional[modal.client.Client] = None) -> SUPERSELF:
|
|
1790
|
+
"""mdmd:hidden"""
|
|
1791
|
+
...
|
|
1792
|
+
|
|
1793
|
+
async def aio(self, /, client: typing.Optional[modal.client.Client] = None) -> SUPERSELF:
|
|
1794
|
+
"""mdmd:hidden"""
|
|
1795
|
+
...
|
|
1796
|
+
|
|
1797
|
+
hydrate: __hydrate_spec[typing_extensions.Self]
|
|
1798
|
+
|
|
1784
1799
|
SUPPORTED_PYTHON_SERIES: dict[typing.Literal["2023.12", "2024.04", "2024.10", "2025.06", "PREVIEW"], list[str]]
|
|
@@ -22,7 +22,7 @@ modal/app.py,sha256=U0sPiHpphcRHLnoLYh2IrU2RSpRFX9BE5uHb7h42STs,47478
|
|
|
22
22
|
modal/app.pyi,sha256=cXiSTu2bwu6csAUdkOlh7mr9tPvtaS2qWSEhlC1UxAg,43787
|
|
23
23
|
modal/call_graph.py,sha256=1g2DGcMIJvRy-xKicuf63IVE98gJSnQsr8R_NVMptNc,2581
|
|
24
24
|
modal/client.py,sha256=5QyM7VJjsFbHf6E91ar3A2KY9mx03wdtGlNJvfTKUVs,17087
|
|
25
|
-
modal/client.pyi,sha256=
|
|
25
|
+
modal/client.pyi,sha256=1npkSvHzSBChsHg2sIUks5RbJ4fAQg3Uw4ZnScGzjDg,15270
|
|
26
26
|
modal/cloud_bucket_mount.py,sha256=YOe9nnvSr4ZbeCn587d7_VhE9IioZYRvF9VYQTQux08,5914
|
|
27
27
|
modal/cloud_bucket_mount.pyi,sha256=-qSfYAQvIoO_l2wsCCGTG5ZUwQieNKXdAO00yP1-LYU,7394
|
|
28
28
|
modal/cls.py,sha256=B5EtzpBXemH718YvgXaYjuTKvairvqfXJ7IwLZ_6vVA,40034
|
|
@@ -39,10 +39,10 @@ modal/file_io.py,sha256=BVqAJ0sgPUfN8QsYztWiGB4j56he60TncM02KsylnCw,21449
|
|
|
39
39
|
modal/file_io.pyi,sha256=cPT_hsplE5iLCXhYOLn1Sp9eDdk7DxdFmicQHanJZyg,15918
|
|
40
40
|
modal/file_pattern_matcher.py,sha256=urAue8es8jxqX94k9EYoZxxhtfgOlsEES8lbFHOorzc,7734
|
|
41
41
|
modal/functions.py,sha256=kcNHvqeGBxPI7Cgd57NIBBghkfbeFJzXO44WW0jSmao,325
|
|
42
|
-
modal/functions.pyi,sha256=
|
|
42
|
+
modal/functions.pyi,sha256=ffW_kkU8AxMuV77ltmjK3nslXW_2iwEjKsT-Cgd4Trs,34840
|
|
43
43
|
modal/gpu.py,sha256=Fe5ORvVPDIstSq1xjmM6OoNgLYFWvogP9r5BgmD3hYg,6769
|
|
44
|
-
modal/image.py,sha256=
|
|
45
|
-
modal/image.pyi,sha256=
|
|
44
|
+
modal/image.py,sha256=qTJ6pTcLfYRh112wId7CCNWWmm077w6JoIqxE8BiCoo,102261
|
|
45
|
+
modal/image.pyi,sha256=TVy-rnSAP2WgQ5zf_sQLFzb-99Qg9LiQNGXR9psFA_o,68107
|
|
46
46
|
modal/io_streams.py,sha256=FUDpBsVK8isqwyC7DtAcQZhaHlMFSaNZGhYJOg-SFW0,15590
|
|
47
47
|
modal/io_streams.pyi,sha256=5b3b93ztZeR8IpJtNIGffX24QLPgocE4-gAps8y7CKU,13824
|
|
48
48
|
modal/mount.py,sha256=q-pPeVxAmte-G_LDpbFwaNs2Rb2MIpscfnCXzkhxrOI,36734
|
|
@@ -139,7 +139,9 @@ modal/cli/volume.py,sha256=KJ4WKQYjRGsTERkwHE1HcRia9rWzLIDDnlc89QmTLvE,10960
|
|
|
139
139
|
modal/cli/programs/__init__.py,sha256=svYKtV8HDwDCN86zbdWqyq5T8sMdGDj0PVlzc2tIxDM,28
|
|
140
140
|
modal/cli/programs/run_jupyter.py,sha256=44Lpvqk2l3hH-uOkmAOzw60NEsfB5uaRDWDKVshvQhs,2682
|
|
141
141
|
modal/cli/programs/vscode.py,sha256=KbTAaIXyQBVCDXxXjmBHmKpgXkUw0q4R4KkJvUjCYgk,3380
|
|
142
|
-
modal/experimental/__init__.py,sha256=
|
|
142
|
+
modal/experimental/__init__.py,sha256=XhRr0QQds4fEAoILQFa0CQWUtQ76Gxioo88CupOxDvI,10847
|
|
143
|
+
modal/experimental/flash.py,sha256=xqfJLpdqaa7mgW8OgAFBfZV95PKzuTzevh7SOSVesnA,19055
|
|
144
|
+
modal/experimental/flash.pyi,sha256=1Nd31nYD8Eqi0BI63XuK6owXPGA0s9CgU_WOAzCQSQs,9957
|
|
143
145
|
modal/experimental/ipython.py,sha256=TrCfmol9LGsRZMeDoeMPx3Hv3BFqQhYnmD_iH0pqdhk,2904
|
|
144
146
|
modal/requirements/2023.12.312.txt,sha256=zWWUVgVQ92GXBKNYYr2-5vn9rlnXcmkqlwlX5u1eTYw,400
|
|
145
147
|
modal/requirements/2023.12.txt,sha256=OjsbXFkCSdkzzryZP82Q73osr5wxQ6EUzmGcK7twfkA,502
|
|
@@ -149,7 +151,7 @@ modal/requirements/2025.06.txt,sha256=KxDaVTOwatHvboDo4lorlgJ7-n-MfAwbPwxJ0zcJqr
|
|
|
149
151
|
modal/requirements/PREVIEW.txt,sha256=KxDaVTOwatHvboDo4lorlgJ7-n-MfAwbPwxJ0zcJqrs,312
|
|
150
152
|
modal/requirements/README.md,sha256=9tK76KP0Uph7O0M5oUgsSwEZDj5y-dcUPsnpR0Sc-Ik,854
|
|
151
153
|
modal/requirements/base-images.json,sha256=JYSDAgHTl-WrV_TZW5icY-IJEnbe2eQ4CZ_KN6EOZKU,1304
|
|
152
|
-
modal-1.0.6.
|
|
154
|
+
modal-1.0.6.dev42.dist-info/licenses/LICENSE,sha256=psuoW8kuDP96RQsdhzwOqi6fyWv0ct8CR6Jr7He_P_k,10173
|
|
153
155
|
modal_docs/__init__.py,sha256=svYKtV8HDwDCN86zbdWqyq5T8sMdGDj0PVlzc2tIxDM,28
|
|
154
156
|
modal_docs/gen_cli_docs.py,sha256=c1yfBS_x--gL5bs0N4ihMwqwX8l3IBWSkBAKNNIi6bQ,3801
|
|
155
157
|
modal_docs/gen_reference_docs.py,sha256=d_CQUGQ0rfw28u75I2mov9AlS773z9rG40-yq5o7g2U,6359
|
|
@@ -172,10 +174,10 @@ modal_proto/options_pb2.pyi,sha256=l7DBrbLO7q3Ir-XDkWsajm0d0TQqqrfuX54i4BMpdQg,1
|
|
|
172
174
|
modal_proto/options_pb2_grpc.py,sha256=1oboBPFxaTEXt9Aw7EAj8gXHDCNMhZD2VXqocC9l_gk,159
|
|
173
175
|
modal_proto/options_pb2_grpc.pyi,sha256=CImmhxHsYnF09iENPoe8S4J-n93jtgUYD2JPAc0yJSI,247
|
|
174
176
|
modal_proto/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
175
|
-
modal_version/__init__.py,sha256=
|
|
177
|
+
modal_version/__init__.py,sha256=6aQs8eQpP-g3lZcjz4g62zHviQ50MB--advXf8eOryg,121
|
|
176
178
|
modal_version/__main__.py,sha256=2FO0yYQQwDTh6udt1h-cBnGd1c4ZyHnHSI4BksxzVac,105
|
|
177
|
-
modal-1.0.6.
|
|
178
|
-
modal-1.0.6.
|
|
179
|
-
modal-1.0.6.
|
|
180
|
-
modal-1.0.6.
|
|
181
|
-
modal-1.0.6.
|
|
179
|
+
modal-1.0.6.dev42.dist-info/METADATA,sha256=FNTsJdFH_KZwx-SLKVTHs2M992nVzuII_iYxH4RXj6E,2462
|
|
180
|
+
modal-1.0.6.dev42.dist-info/WHEEL,sha256=1tXe9gY0PYatrMPMDd6jXqjfpz_B-Wqm32CPfRC58XU,91
|
|
181
|
+
modal-1.0.6.dev42.dist-info/entry_points.txt,sha256=An-wYgeEUnm6xzrAP9_NTSTSciYvvEWsMZILtYrvpAI,46
|
|
182
|
+
modal-1.0.6.dev42.dist-info/top_level.txt,sha256=4BWzoKYREKUZ5iyPzZpjqx4G8uB5TWxXPDwibLcVa7k,43
|
|
183
|
+
modal-1.0.6.dev42.dist-info/RECORD,,
|
modal_version/__init__.py
CHANGED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|