modal 1.1.5.dev3__py3-none-any.whl → 1.1.5.dev4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- modal/client.pyi +2 -2
- modal/experimental/flash.py +43 -23
- modal/experimental/flash.pyi +6 -0
- {modal-1.1.5.dev3.dist-info → modal-1.1.5.dev4.dist-info}/METADATA +1 -1
- {modal-1.1.5.dev3.dist-info → modal-1.1.5.dev4.dist-info}/RECORD +10 -10
- modal_version/__init__.py +1 -1
- {modal-1.1.5.dev3.dist-info → modal-1.1.5.dev4.dist-info}/WHEEL +0 -0
- {modal-1.1.5.dev3.dist-info → modal-1.1.5.dev4.dist-info}/entry_points.txt +0 -0
- {modal-1.1.5.dev3.dist-info → modal-1.1.5.dev4.dist-info}/licenses/LICENSE +0 -0
- {modal-1.1.5.dev3.dist-info → modal-1.1.5.dev4.dist-info}/top_level.txt +0 -0
modal/client.pyi
CHANGED
@@ -33,7 +33,7 @@ class _Client:
|
|
33
33
|
server_url: str,
|
34
34
|
client_type: int,
|
35
35
|
credentials: typing.Optional[tuple[str, str]],
|
36
|
-
version: str = "1.1.5.
|
36
|
+
version: str = "1.1.5.dev4",
|
37
37
|
):
|
38
38
|
"""mdmd:hidden
|
39
39
|
The Modal client object is not intended to be instantiated directly by users.
|
@@ -164,7 +164,7 @@ class Client:
|
|
164
164
|
server_url: str,
|
165
165
|
client_type: int,
|
166
166
|
credentials: typing.Optional[tuple[str, str]],
|
167
|
-
version: str = "1.1.5.
|
167
|
+
version: str = "1.1.5.dev4",
|
168
168
|
):
|
169
169
|
"""mdmd:hidden
|
170
170
|
The Modal client object is not intended to be instantiated directly by users.
|
modal/experimental/flash.py
CHANGED
@@ -201,6 +201,7 @@ class _FlashPrometheusAutoscaler:
|
|
201
201
|
target_metric_value: float,
|
202
202
|
min_containers: Optional[int],
|
203
203
|
max_containers: Optional[int],
|
204
|
+
buffer_containers: Optional[int],
|
204
205
|
scale_up_tolerance: float,
|
205
206
|
scale_down_tolerance: float,
|
206
207
|
scale_up_stabilization_window_seconds: int,
|
@@ -228,6 +229,7 @@ class _FlashPrometheusAutoscaler:
|
|
228
229
|
self.target_metric_value = target_metric_value
|
229
230
|
self.min_containers = min_containers
|
230
231
|
self.max_containers = max_containers
|
232
|
+
self.buffer_containers = buffer_containers
|
231
233
|
self.scale_up_tolerance = scale_up_tolerance
|
232
234
|
self.scale_down_tolerance = scale_down_tolerance
|
233
235
|
self.scale_up_stabilization_window_seconds = scale_up_stabilization_window_seconds
|
@@ -293,6 +295,7 @@ class _FlashPrometheusAutoscaler:
|
|
293
295
|
scale_down_stabilization_window_seconds=self.scale_down_stabilization_window_seconds,
|
294
296
|
min_containers=self.min_containers,
|
295
297
|
max_containers=self.max_containers,
|
298
|
+
buffer_containers=self.buffer_containers,
|
296
299
|
)
|
297
300
|
|
298
301
|
logger.warning(
|
@@ -402,6 +405,7 @@ class _FlashPrometheusAutoscaler:
|
|
402
405
|
# Gets metrics from prometheus
|
403
406
|
sum_metric = 0
|
404
407
|
containers_with_metrics = 0
|
408
|
+
buffer_containers = self.buffer_containers or 0
|
405
409
|
container_metrics_list = await asyncio.gather(
|
406
410
|
*[
|
407
411
|
self._get_metrics(f"https://{container.host}:{container.port}/{self.metrics_endpoint}")
|
@@ -418,29 +422,36 @@ class _FlashPrometheusAutoscaler:
|
|
418
422
|
sum_metric += container_metrics[target_metric][0].value
|
419
423
|
containers_with_metrics += 1
|
420
424
|
|
421
|
-
#
|
425
|
+
# n_containers_missing = number of unhealthy containers + number of containers not registered in flash dns
|
422
426
|
n_containers_missing_metric = current_replicas - containers_with_metrics
|
423
|
-
# n_containers_unhealthy
|
427
|
+
# n_containers_unhealthy = number of dns registered containers that are not emitting metrics
|
424
428
|
n_containers_unhealthy = len(containers) - containers_with_metrics
|
425
429
|
|
426
|
-
#
|
427
|
-
|
428
|
-
|
429
|
-
)
|
430
|
+
# number of total containers - buffer containers
|
431
|
+
# This is used in 1) scale ratio denominators 2) provisioning base.
|
432
|
+
# Max is used to handle case when buffer_containers are first initialized.
|
433
|
+
num_provisioned_containers = max(current_replicas - buffer_containers, 1)
|
434
|
+
|
435
|
+
# Scale up assuming that every unhealthy container is at (1 + scale_up_tolerance)x the target metric value.
|
436
|
+
# This way if all containers are unhealthy, we will increase our number of containers.
|
437
|
+
scale_up_target_metric_value = (
|
438
|
+
sum_metric + (1 + self.scale_up_tolerance) * n_containers_unhealthy * target_metric_value
|
439
|
+
) / (num_provisioned_containers)
|
430
440
|
|
431
441
|
# Scale down assuming that every container (including cold starting containers) are at the target metric value.
|
442
|
+
# The denominator is just num_provisioned_containers because we don't want to account for the buffer containers.
|
432
443
|
scale_down_target_metric_value = (sum_metric + n_containers_missing_metric * target_metric_value) / (
|
433
|
-
|
444
|
+
num_provisioned_containers
|
434
445
|
)
|
435
446
|
|
436
447
|
scale_up_ratio = scale_up_target_metric_value / target_metric_value
|
437
448
|
scale_down_ratio = scale_down_target_metric_value / target_metric_value
|
438
449
|
|
439
|
-
desired_replicas =
|
450
|
+
desired_replicas = num_provisioned_containers
|
440
451
|
if scale_up_ratio > 1 + self.scale_up_tolerance:
|
441
|
-
desired_replicas = math.ceil(
|
452
|
+
desired_replicas = math.ceil(desired_replicas * scale_up_ratio)
|
442
453
|
elif scale_down_ratio < 1 - self.scale_down_tolerance:
|
443
|
-
desired_replicas = math.ceil(
|
454
|
+
desired_replicas = math.ceil(desired_replicas * scale_down_ratio)
|
444
455
|
|
445
456
|
logger.warning(
|
446
457
|
f"[Modal Flash] Current replicas: {current_replicas}, "
|
@@ -449,6 +460,7 @@ class _FlashPrometheusAutoscaler:
|
|
449
460
|
f"number of containers with metrics: {containers_with_metrics}, "
|
450
461
|
f"number of containers unhealthy: {n_containers_unhealthy}, "
|
451
462
|
f"number of containers missing metric (includes unhealthy): {n_containers_missing_metric}, "
|
463
|
+
f"number of provisioned containers: {num_provisioned_containers}, "
|
452
464
|
f"scale up ratio: {scale_up_ratio}, "
|
453
465
|
f"scale down ratio: {scale_down_ratio}, "
|
454
466
|
f"desired replicas: {desired_replicas}"
|
@@ -510,6 +522,7 @@ class _FlashPrometheusAutoscaler:
|
|
510
522
|
scale_down_stabilization_window_seconds: int = 60 * 5,
|
511
523
|
min_containers: Optional[int] = None,
|
512
524
|
max_containers: Optional[int] = None,
|
525
|
+
buffer_containers: Optional[int] = None,
|
513
526
|
) -> int:
|
514
527
|
"""
|
515
528
|
Return the target number of containers following (simplified) Kubernetes HPA
|
@@ -560,6 +573,10 @@ class _FlashPrometheusAutoscaler:
|
|
560
573
|
new_replicas = max(min_containers, new_replicas)
|
561
574
|
if max_containers is not None:
|
562
575
|
new_replicas = min(max_containers, new_replicas)
|
576
|
+
|
577
|
+
if buffer_containers is not None:
|
578
|
+
new_replicas += buffer_containers
|
579
|
+
|
563
580
|
return new_replicas
|
564
581
|
|
565
582
|
async def stop(self):
|
@@ -597,6 +614,8 @@ async def flash_prometheus_autoscaler(
|
|
597
614
|
# How often to make autoscaling decisions.
|
598
615
|
# Corresponds to --horizontal-pod-autoscaler-sync-period in Kubernetes.
|
599
616
|
autoscaling_interval_seconds: int = 15,
|
617
|
+
# Whether to include overprovisioned containers in the scale up calculation.
|
618
|
+
buffer_containers: Optional[int] = None,
|
600
619
|
) -> _FlashPrometheusAutoscaler:
|
601
620
|
"""
|
602
621
|
Autoscale a Flash service based on containers' Prometheus metrics.
|
@@ -614,19 +633,20 @@ async def flash_prometheus_autoscaler(
|
|
614
633
|
|
615
634
|
client = await _Client.from_env()
|
616
635
|
autoscaler = _FlashPrometheusAutoscaler(
|
617
|
-
client,
|
618
|
-
app_name,
|
619
|
-
cls_name,
|
620
|
-
metrics_endpoint,
|
621
|
-
target_metric,
|
622
|
-
target_metric_value,
|
623
|
-
min_containers,
|
624
|
-
max_containers,
|
625
|
-
|
626
|
-
|
627
|
-
|
628
|
-
|
629
|
-
|
636
|
+
client=client,
|
637
|
+
app_name=app_name,
|
638
|
+
cls_name=cls_name,
|
639
|
+
metrics_endpoint=metrics_endpoint,
|
640
|
+
target_metric=target_metric,
|
641
|
+
target_metric_value=target_metric_value,
|
642
|
+
min_containers=min_containers,
|
643
|
+
max_containers=max_containers,
|
644
|
+
buffer_containers=buffer_containers,
|
645
|
+
scale_up_tolerance=scale_up_tolerance,
|
646
|
+
scale_down_tolerance=scale_down_tolerance,
|
647
|
+
scale_up_stabilization_window_seconds=scale_up_stabilization_window_seconds,
|
648
|
+
scale_down_stabilization_window_seconds=scale_down_stabilization_window_seconds,
|
649
|
+
autoscaling_interval_seconds=autoscaling_interval_seconds,
|
630
650
|
)
|
631
651
|
await autoscaler.start()
|
632
652
|
return autoscaler
|
modal/experimental/flash.pyi
CHANGED
@@ -126,6 +126,7 @@ class _FlashPrometheusAutoscaler:
|
|
126
126
|
target_metric_value: float,
|
127
127
|
min_containers: typing.Optional[int],
|
128
128
|
max_containers: typing.Optional[int],
|
129
|
+
buffer_containers: typing.Optional[int],
|
129
130
|
scale_up_tolerance: float,
|
130
131
|
scale_down_tolerance: float,
|
131
132
|
scale_up_stabilization_window_seconds: int,
|
@@ -155,6 +156,7 @@ class _FlashPrometheusAutoscaler:
|
|
155
156
|
scale_down_stabilization_window_seconds: int = 300,
|
156
157
|
min_containers: typing.Optional[int] = None,
|
157
158
|
max_containers: typing.Optional[int] = None,
|
159
|
+
buffer_containers: typing.Optional[int] = None,
|
158
160
|
) -> int:
|
159
161
|
"""Return the target number of containers following (simplified) Kubernetes HPA
|
160
162
|
stabilization-window semantics.
|
@@ -187,6 +189,7 @@ class FlashPrometheusAutoscaler:
|
|
187
189
|
target_metric_value: float,
|
188
190
|
min_containers: typing.Optional[int],
|
189
191
|
max_containers: typing.Optional[int],
|
192
|
+
buffer_containers: typing.Optional[int],
|
190
193
|
scale_up_tolerance: float,
|
191
194
|
scale_down_tolerance: float,
|
192
195
|
scale_up_stabilization_window_seconds: int,
|
@@ -253,6 +256,7 @@ class FlashPrometheusAutoscaler:
|
|
253
256
|
scale_down_stabilization_window_seconds: int = 300,
|
254
257
|
min_containers: typing.Optional[int] = None,
|
255
258
|
max_containers: typing.Optional[int] = None,
|
259
|
+
buffer_containers: typing.Optional[int] = None,
|
256
260
|
) -> int:
|
257
261
|
"""Return the target number of containers following (simplified) Kubernetes HPA
|
258
262
|
stabilization-window semantics.
|
@@ -294,6 +298,7 @@ class __flash_prometheus_autoscaler_spec(typing_extensions.Protocol):
|
|
294
298
|
scale_up_stabilization_window_seconds: int = 0,
|
295
299
|
scale_down_stabilization_window_seconds: int = 300,
|
296
300
|
autoscaling_interval_seconds: int = 15,
|
301
|
+
buffer_containers: typing.Optional[int] = None,
|
297
302
|
) -> FlashPrometheusAutoscaler:
|
298
303
|
"""Autoscale a Flash service based on containers' Prometheus metrics.
|
299
304
|
|
@@ -319,6 +324,7 @@ class __flash_prometheus_autoscaler_spec(typing_extensions.Protocol):
|
|
319
324
|
scale_up_stabilization_window_seconds: int = 0,
|
320
325
|
scale_down_stabilization_window_seconds: int = 300,
|
321
326
|
autoscaling_interval_seconds: int = 15,
|
327
|
+
buffer_containers: typing.Optional[int] = None,
|
322
328
|
) -> FlashPrometheusAutoscaler:
|
323
329
|
"""Autoscale a Flash service based on containers' Prometheus metrics.
|
324
330
|
|
@@ -22,7 +22,7 @@ modal/app.py,sha256=F4baVULljFq0CwC_7U-EKNRNx7CYeWBKudjjYUuWc4U,48416
|
|
22
22
|
modal/app.pyi,sha256=AbXJCBkyt2rI_-M3VbTBYb32at0P6iRZuoC87xY_JrQ,43591
|
23
23
|
modal/call_graph.py,sha256=1g2DGcMIJvRy-xKicuf63IVE98gJSnQsr8R_NVMptNc,2581
|
24
24
|
modal/client.py,sha256=kyAIVB3Ay-XKJizQ_1ufUFB__EagV0MLmHJpyYyJ7J0,18636
|
25
|
-
modal/client.pyi,sha256=
|
25
|
+
modal/client.pyi,sha256=F3vfE-1XWCjduhEYS5_QaJVWHuzu35nj5ukX64BVoeU,15829
|
26
26
|
modal/cloud_bucket_mount.py,sha256=YOe9nnvSr4ZbeCn587d7_VhE9IioZYRvF9VYQTQux08,5914
|
27
27
|
modal/cloud_bucket_mount.pyi,sha256=-qSfYAQvIoO_l2wsCCGTG5ZUwQieNKXdAO00yP1-LYU,7394
|
28
28
|
modal/cls.py,sha256=pTEO7pHjlO7taMbIqs4oI9ZZgKDJpVKyGkO5ZT0w6tQ,40934
|
@@ -150,10 +150,10 @@ modal/cli/programs/run_jupyter.py,sha256=44Lpvqk2l3hH-uOkmAOzw60NEsfB5uaRDWDKVsh
|
|
150
150
|
modal/cli/programs/run_marimo.py,sha256=HyZ2za0NYqg31-mGxFQxUIAJ3Q-jRaMocEwWwI5-cdw,2887
|
151
151
|
modal/cli/programs/vscode.py,sha256=KbTAaIXyQBVCDXxXjmBHmKpgXkUw0q4R4KkJvUjCYgk,3380
|
152
152
|
modal/experimental/__init__.py,sha256=fCqzo_f3vcY750vHtd7CtLs5dvdM_C0ZLLGb3zXuK9w,14913
|
153
|
-
modal/experimental/flash.py,sha256=
|
154
|
-
modal/experimental/flash.pyi,sha256=
|
153
|
+
modal/experimental/flash.py,sha256=6KOdHVr3zjfgbe6nOou_mQ_0TKFgeG5INWitxyj6tn8,30049
|
154
|
+
modal/experimental/flash.pyi,sha256=u_ZCXeZPBo778rThEdx9SJR6gkDdNqx9an8hwboQCEk,14258
|
155
155
|
modal/experimental/ipython.py,sha256=TrCfmol9LGsRZMeDoeMPx3Hv3BFqQhYnmD_iH0pqdhk,2904
|
156
|
-
modal-1.1.5.
|
156
|
+
modal-1.1.5.dev4.dist-info/licenses/LICENSE,sha256=psuoW8kuDP96RQsdhzwOqi6fyWv0ct8CR6Jr7He_P_k,10173
|
157
157
|
modal_docs/__init__.py,sha256=svYKtV8HDwDCN86zbdWqyq5T8sMdGDj0PVlzc2tIxDM,28
|
158
158
|
modal_docs/gen_cli_docs.py,sha256=c1yfBS_x--gL5bs0N4ihMwqwX8l3IBWSkBAKNNIi6bQ,3801
|
159
159
|
modal_docs/gen_reference_docs.py,sha256=d_CQUGQ0rfw28u75I2mov9AlS773z9rG40-yq5o7g2U,6359
|
@@ -176,10 +176,10 @@ modal_proto/options_pb2.pyi,sha256=l7DBrbLO7q3Ir-XDkWsajm0d0TQqqrfuX54i4BMpdQg,1
|
|
176
176
|
modal_proto/options_pb2_grpc.py,sha256=1oboBPFxaTEXt9Aw7EAj8gXHDCNMhZD2VXqocC9l_gk,159
|
177
177
|
modal_proto/options_pb2_grpc.pyi,sha256=CImmhxHsYnF09iENPoe8S4J-n93jtgUYD2JPAc0yJSI,247
|
178
178
|
modal_proto/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
179
|
-
modal_version/__init__.py,sha256=
|
179
|
+
modal_version/__init__.py,sha256=rp7MGaJS8eOVHpGNr__NnfXYwj-KIbn7g2X0mI6KEGE,120
|
180
180
|
modal_version/__main__.py,sha256=2FO0yYQQwDTh6udt1h-cBnGd1c4ZyHnHSI4BksxzVac,105
|
181
|
-
modal-1.1.5.
|
182
|
-
modal-1.1.5.
|
183
|
-
modal-1.1.5.
|
184
|
-
modal-1.1.5.
|
185
|
-
modal-1.1.5.
|
181
|
+
modal-1.1.5.dev4.dist-info/METADATA,sha256=UivNw5cnN9znRf66NlHULHZPLhpTCfewLQmN_Vxn7Yw,2459
|
182
|
+
modal-1.1.5.dev4.dist-info/WHEEL,sha256=1tXe9gY0PYatrMPMDd6jXqjfpz_B-Wqm32CPfRC58XU,91
|
183
|
+
modal-1.1.5.dev4.dist-info/entry_points.txt,sha256=An-wYgeEUnm6xzrAP9_NTSTSciYvvEWsMZILtYrvpAI,46
|
184
|
+
modal-1.1.5.dev4.dist-info/top_level.txt,sha256=4BWzoKYREKUZ5iyPzZpjqx4G8uB5TWxXPDwibLcVa7k,43
|
185
|
+
modal-1.1.5.dev4.dist-info/RECORD,,
|
modal_version/__init__.py
CHANGED
File without changes
|
File without changes
|
File without changes
|
File without changes
|