modal 1.1.5.dev3__py3-none-any.whl → 1.1.5.dev4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
modal/client.pyi CHANGED
@@ -33,7 +33,7 @@ class _Client:
33
33
  server_url: str,
34
34
  client_type: int,
35
35
  credentials: typing.Optional[tuple[str, str]],
36
- version: str = "1.1.5.dev3",
36
+ version: str = "1.1.5.dev4",
37
37
  ):
38
38
  """mdmd:hidden
39
39
  The Modal client object is not intended to be instantiated directly by users.
@@ -164,7 +164,7 @@ class Client:
164
164
  server_url: str,
165
165
  client_type: int,
166
166
  credentials: typing.Optional[tuple[str, str]],
167
- version: str = "1.1.5.dev3",
167
+ version: str = "1.1.5.dev4",
168
168
  ):
169
169
  """mdmd:hidden
170
170
  The Modal client object is not intended to be instantiated directly by users.
@@ -201,6 +201,7 @@ class _FlashPrometheusAutoscaler:
201
201
  target_metric_value: float,
202
202
  min_containers: Optional[int],
203
203
  max_containers: Optional[int],
204
+ buffer_containers: Optional[int],
204
205
  scale_up_tolerance: float,
205
206
  scale_down_tolerance: float,
206
207
  scale_up_stabilization_window_seconds: int,
@@ -228,6 +229,7 @@ class _FlashPrometheusAutoscaler:
228
229
  self.target_metric_value = target_metric_value
229
230
  self.min_containers = min_containers
230
231
  self.max_containers = max_containers
232
+ self.buffer_containers = buffer_containers
231
233
  self.scale_up_tolerance = scale_up_tolerance
232
234
  self.scale_down_tolerance = scale_down_tolerance
233
235
  self.scale_up_stabilization_window_seconds = scale_up_stabilization_window_seconds
@@ -293,6 +295,7 @@ class _FlashPrometheusAutoscaler:
293
295
  scale_down_stabilization_window_seconds=self.scale_down_stabilization_window_seconds,
294
296
  min_containers=self.min_containers,
295
297
  max_containers=self.max_containers,
298
+ buffer_containers=self.buffer_containers,
296
299
  )
297
300
 
298
301
  logger.warning(
@@ -402,6 +405,7 @@ class _FlashPrometheusAutoscaler:
402
405
  # Gets metrics from prometheus
403
406
  sum_metric = 0
404
407
  containers_with_metrics = 0
408
+ buffer_containers = self.buffer_containers or 0
405
409
  container_metrics_list = await asyncio.gather(
406
410
  *[
407
411
  self._get_metrics(f"https://{container.host}:{container.port}/{self.metrics_endpoint}")
@@ -418,29 +422,36 @@ class _FlashPrometheusAutoscaler:
418
422
  sum_metric += container_metrics[target_metric][0].value
419
423
  containers_with_metrics += 1
420
424
 
421
- # n_containers_missing_metric is the number of unhealthy containers + number of cold starting containers
425
+ # n_containers_missing = number of unhealthy containers + number of containers not registered in flash dns
422
426
  n_containers_missing_metric = current_replicas - containers_with_metrics
423
- # n_containers_unhealthy is the number of live containers that are not emitting metrics i.e. unhealthy
427
+ # n_containers_unhealthy = number of dns registered containers that are not emitting metrics
424
428
  n_containers_unhealthy = len(containers) - containers_with_metrics
425
429
 
426
- # Scale up assuming that every unhealthy container is at 2x the target metric value.
427
- scale_up_target_metric_value = (sum_metric + n_containers_unhealthy * target_metric_value) / (
428
- (containers_with_metrics + n_containers_unhealthy) or 1
429
- )
430
+ # number of total containers - buffer containers
431
+ # This is used in 1) scale ratio denominators 2) provisioning base.
432
+ # Max is used to handle case when buffer_containers are first initialized.
433
+ num_provisioned_containers = max(current_replicas - buffer_containers, 1)
434
+
435
+ # Scale up assuming that every unhealthy container is at (1 + scale_up_tolerance)x the target metric value.
436
+ # This way if all containers are unhealthy, we will increase our number of containers.
437
+ scale_up_target_metric_value = (
438
+ sum_metric + (1 + self.scale_up_tolerance) * n_containers_unhealthy * target_metric_value
439
+ ) / (num_provisioned_containers)
430
440
 
431
441
  # Scale down assuming that every container (including cold starting containers) are at the target metric value.
442
+ # The denominator is just num_provisioned_containers because we don't want to account for the buffer containers.
432
443
  scale_down_target_metric_value = (sum_metric + n_containers_missing_metric * target_metric_value) / (
433
- current_replicas or 1
444
+ num_provisioned_containers
434
445
  )
435
446
 
436
447
  scale_up_ratio = scale_up_target_metric_value / target_metric_value
437
448
  scale_down_ratio = scale_down_target_metric_value / target_metric_value
438
449
 
439
- desired_replicas = current_replicas
450
+ desired_replicas = num_provisioned_containers
440
451
  if scale_up_ratio > 1 + self.scale_up_tolerance:
441
- desired_replicas = math.ceil(current_replicas * scale_up_ratio)
452
+ desired_replicas = math.ceil(desired_replicas * scale_up_ratio)
442
453
  elif scale_down_ratio < 1 - self.scale_down_tolerance:
443
- desired_replicas = math.ceil(current_replicas * scale_down_ratio)
454
+ desired_replicas = math.ceil(desired_replicas * scale_down_ratio)
444
455
 
445
456
  logger.warning(
446
457
  f"[Modal Flash] Current replicas: {current_replicas}, "
@@ -449,6 +460,7 @@ class _FlashPrometheusAutoscaler:
449
460
  f"number of containers with metrics: {containers_with_metrics}, "
450
461
  f"number of containers unhealthy: {n_containers_unhealthy}, "
451
462
  f"number of containers missing metric (includes unhealthy): {n_containers_missing_metric}, "
463
+ f"number of provisioned containers: {num_provisioned_containers}, "
452
464
  f"scale up ratio: {scale_up_ratio}, "
453
465
  f"scale down ratio: {scale_down_ratio}, "
454
466
  f"desired replicas: {desired_replicas}"
@@ -510,6 +522,7 @@ class _FlashPrometheusAutoscaler:
510
522
  scale_down_stabilization_window_seconds: int = 60 * 5,
511
523
  min_containers: Optional[int] = None,
512
524
  max_containers: Optional[int] = None,
525
+ buffer_containers: Optional[int] = None,
513
526
  ) -> int:
514
527
  """
515
528
  Return the target number of containers following (simplified) Kubernetes HPA
@@ -560,6 +573,10 @@ class _FlashPrometheusAutoscaler:
560
573
  new_replicas = max(min_containers, new_replicas)
561
574
  if max_containers is not None:
562
575
  new_replicas = min(max_containers, new_replicas)
576
+
577
+ if buffer_containers is not None:
578
+ new_replicas += buffer_containers
579
+
563
580
  return new_replicas
564
581
 
565
582
  async def stop(self):
@@ -597,6 +614,8 @@ async def flash_prometheus_autoscaler(
597
614
  # How often to make autoscaling decisions.
598
615
  # Corresponds to --horizontal-pod-autoscaler-sync-period in Kubernetes.
599
616
  autoscaling_interval_seconds: int = 15,
617
+ # Whether to include overprovisioned containers in the scale up calculation.
618
+ buffer_containers: Optional[int] = None,
600
619
  ) -> _FlashPrometheusAutoscaler:
601
620
  """
602
621
  Autoscale a Flash service based on containers' Prometheus metrics.
@@ -614,19 +633,20 @@ async def flash_prometheus_autoscaler(
614
633
 
615
634
  client = await _Client.from_env()
616
635
  autoscaler = _FlashPrometheusAutoscaler(
617
- client,
618
- app_name,
619
- cls_name,
620
- metrics_endpoint,
621
- target_metric,
622
- target_metric_value,
623
- min_containers,
624
- max_containers,
625
- scale_up_tolerance,
626
- scale_down_tolerance,
627
- scale_up_stabilization_window_seconds,
628
- scale_down_stabilization_window_seconds,
629
- autoscaling_interval_seconds,
636
+ client=client,
637
+ app_name=app_name,
638
+ cls_name=cls_name,
639
+ metrics_endpoint=metrics_endpoint,
640
+ target_metric=target_metric,
641
+ target_metric_value=target_metric_value,
642
+ min_containers=min_containers,
643
+ max_containers=max_containers,
644
+ buffer_containers=buffer_containers,
645
+ scale_up_tolerance=scale_up_tolerance,
646
+ scale_down_tolerance=scale_down_tolerance,
647
+ scale_up_stabilization_window_seconds=scale_up_stabilization_window_seconds,
648
+ scale_down_stabilization_window_seconds=scale_down_stabilization_window_seconds,
649
+ autoscaling_interval_seconds=autoscaling_interval_seconds,
630
650
  )
631
651
  await autoscaler.start()
632
652
  return autoscaler
@@ -126,6 +126,7 @@ class _FlashPrometheusAutoscaler:
126
126
  target_metric_value: float,
127
127
  min_containers: typing.Optional[int],
128
128
  max_containers: typing.Optional[int],
129
+ buffer_containers: typing.Optional[int],
129
130
  scale_up_tolerance: float,
130
131
  scale_down_tolerance: float,
131
132
  scale_up_stabilization_window_seconds: int,
@@ -155,6 +156,7 @@ class _FlashPrometheusAutoscaler:
155
156
  scale_down_stabilization_window_seconds: int = 300,
156
157
  min_containers: typing.Optional[int] = None,
157
158
  max_containers: typing.Optional[int] = None,
159
+ buffer_containers: typing.Optional[int] = None,
158
160
  ) -> int:
159
161
  """Return the target number of containers following (simplified) Kubernetes HPA
160
162
  stabilization-window semantics.
@@ -187,6 +189,7 @@ class FlashPrometheusAutoscaler:
187
189
  target_metric_value: float,
188
190
  min_containers: typing.Optional[int],
189
191
  max_containers: typing.Optional[int],
192
+ buffer_containers: typing.Optional[int],
190
193
  scale_up_tolerance: float,
191
194
  scale_down_tolerance: float,
192
195
  scale_up_stabilization_window_seconds: int,
@@ -253,6 +256,7 @@ class FlashPrometheusAutoscaler:
253
256
  scale_down_stabilization_window_seconds: int = 300,
254
257
  min_containers: typing.Optional[int] = None,
255
258
  max_containers: typing.Optional[int] = None,
259
+ buffer_containers: typing.Optional[int] = None,
256
260
  ) -> int:
257
261
  """Return the target number of containers following (simplified) Kubernetes HPA
258
262
  stabilization-window semantics.
@@ -294,6 +298,7 @@ class __flash_prometheus_autoscaler_spec(typing_extensions.Protocol):
294
298
  scale_up_stabilization_window_seconds: int = 0,
295
299
  scale_down_stabilization_window_seconds: int = 300,
296
300
  autoscaling_interval_seconds: int = 15,
301
+ buffer_containers: typing.Optional[int] = None,
297
302
  ) -> FlashPrometheusAutoscaler:
298
303
  """Autoscale a Flash service based on containers' Prometheus metrics.
299
304
 
@@ -319,6 +324,7 @@ class __flash_prometheus_autoscaler_spec(typing_extensions.Protocol):
319
324
  scale_up_stabilization_window_seconds: int = 0,
320
325
  scale_down_stabilization_window_seconds: int = 300,
321
326
  autoscaling_interval_seconds: int = 15,
327
+ buffer_containers: typing.Optional[int] = None,
322
328
  ) -> FlashPrometheusAutoscaler:
323
329
  """Autoscale a Flash service based on containers' Prometheus metrics.
324
330
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: modal
3
- Version: 1.1.5.dev3
3
+ Version: 1.1.5.dev4
4
4
  Summary: Python client library for Modal
5
5
  Author-email: Modal Labs <support@modal.com>
6
6
  License: Apache-2.0
@@ -22,7 +22,7 @@ modal/app.py,sha256=F4baVULljFq0CwC_7U-EKNRNx7CYeWBKudjjYUuWc4U,48416
22
22
  modal/app.pyi,sha256=AbXJCBkyt2rI_-M3VbTBYb32at0P6iRZuoC87xY_JrQ,43591
23
23
  modal/call_graph.py,sha256=1g2DGcMIJvRy-xKicuf63IVE98gJSnQsr8R_NVMptNc,2581
24
24
  modal/client.py,sha256=kyAIVB3Ay-XKJizQ_1ufUFB__EagV0MLmHJpyYyJ7J0,18636
25
- modal/client.pyi,sha256=c9p8XWAlM_rGXLwTQmPudc2C_CW7Melsbw20mhAj8jE,15829
25
+ modal/client.pyi,sha256=F3vfE-1XWCjduhEYS5_QaJVWHuzu35nj5ukX64BVoeU,15829
26
26
  modal/cloud_bucket_mount.py,sha256=YOe9nnvSr4ZbeCn587d7_VhE9IioZYRvF9VYQTQux08,5914
27
27
  modal/cloud_bucket_mount.pyi,sha256=-qSfYAQvIoO_l2wsCCGTG5ZUwQieNKXdAO00yP1-LYU,7394
28
28
  modal/cls.py,sha256=pTEO7pHjlO7taMbIqs4oI9ZZgKDJpVKyGkO5ZT0w6tQ,40934
@@ -150,10 +150,10 @@ modal/cli/programs/run_jupyter.py,sha256=44Lpvqk2l3hH-uOkmAOzw60NEsfB5uaRDWDKVsh
150
150
  modal/cli/programs/run_marimo.py,sha256=HyZ2za0NYqg31-mGxFQxUIAJ3Q-jRaMocEwWwI5-cdw,2887
151
151
  modal/cli/programs/vscode.py,sha256=KbTAaIXyQBVCDXxXjmBHmKpgXkUw0q4R4KkJvUjCYgk,3380
152
152
  modal/experimental/__init__.py,sha256=fCqzo_f3vcY750vHtd7CtLs5dvdM_C0ZLLGb3zXuK9w,14913
153
- modal/experimental/flash.py,sha256=amsEPtzD2OX5w4YcTPKj9MAUhANEgQni1VHnYjLshrc,28647
154
- modal/experimental/flash.pyi,sha256=Tu9n25ZnW4dO1YjNRHIQpZb4VWSfNW5IENrY0HJW-ME,13936
153
+ modal/experimental/flash.py,sha256=6KOdHVr3zjfgbe6nOou_mQ_0TKFgeG5INWitxyj6tn8,30049
154
+ modal/experimental/flash.pyi,sha256=u_ZCXeZPBo778rThEdx9SJR6gkDdNqx9an8hwboQCEk,14258
155
155
  modal/experimental/ipython.py,sha256=TrCfmol9LGsRZMeDoeMPx3Hv3BFqQhYnmD_iH0pqdhk,2904
156
- modal-1.1.5.dev3.dist-info/licenses/LICENSE,sha256=psuoW8kuDP96RQsdhzwOqi6fyWv0ct8CR6Jr7He_P_k,10173
156
+ modal-1.1.5.dev4.dist-info/licenses/LICENSE,sha256=psuoW8kuDP96RQsdhzwOqi6fyWv0ct8CR6Jr7He_P_k,10173
157
157
  modal_docs/__init__.py,sha256=svYKtV8HDwDCN86zbdWqyq5T8sMdGDj0PVlzc2tIxDM,28
158
158
  modal_docs/gen_cli_docs.py,sha256=c1yfBS_x--gL5bs0N4ihMwqwX8l3IBWSkBAKNNIi6bQ,3801
159
159
  modal_docs/gen_reference_docs.py,sha256=d_CQUGQ0rfw28u75I2mov9AlS773z9rG40-yq5o7g2U,6359
@@ -176,10 +176,10 @@ modal_proto/options_pb2.pyi,sha256=l7DBrbLO7q3Ir-XDkWsajm0d0TQqqrfuX54i4BMpdQg,1
176
176
  modal_proto/options_pb2_grpc.py,sha256=1oboBPFxaTEXt9Aw7EAj8gXHDCNMhZD2VXqocC9l_gk,159
177
177
  modal_proto/options_pb2_grpc.pyi,sha256=CImmhxHsYnF09iENPoe8S4J-n93jtgUYD2JPAc0yJSI,247
178
178
  modal_proto/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
179
- modal_version/__init__.py,sha256=0Y1tDsgw11T_vf_1T67OFGKn1Gtt4JMwDn3IKJeIETY,120
179
+ modal_version/__init__.py,sha256=rp7MGaJS8eOVHpGNr__NnfXYwj-KIbn7g2X0mI6KEGE,120
180
180
  modal_version/__main__.py,sha256=2FO0yYQQwDTh6udt1h-cBnGd1c4ZyHnHSI4BksxzVac,105
181
- modal-1.1.5.dev3.dist-info/METADATA,sha256=mactKwIxNbCbeYMuYXywsEKRAnJgV6GEnURnLtmTI5o,2459
182
- modal-1.1.5.dev3.dist-info/WHEEL,sha256=1tXe9gY0PYatrMPMDd6jXqjfpz_B-Wqm32CPfRC58XU,91
183
- modal-1.1.5.dev3.dist-info/entry_points.txt,sha256=An-wYgeEUnm6xzrAP9_NTSTSciYvvEWsMZILtYrvpAI,46
184
- modal-1.1.5.dev3.dist-info/top_level.txt,sha256=4BWzoKYREKUZ5iyPzZpjqx4G8uB5TWxXPDwibLcVa7k,43
185
- modal-1.1.5.dev3.dist-info/RECORD,,
181
+ modal-1.1.5.dev4.dist-info/METADATA,sha256=UivNw5cnN9znRf66NlHULHZPLhpTCfewLQmN_Vxn7Yw,2459
182
+ modal-1.1.5.dev4.dist-info/WHEEL,sha256=1tXe9gY0PYatrMPMDd6jXqjfpz_B-Wqm32CPfRC58XU,91
183
+ modal-1.1.5.dev4.dist-info/entry_points.txt,sha256=An-wYgeEUnm6xzrAP9_NTSTSciYvvEWsMZILtYrvpAI,46
184
+ modal-1.1.5.dev4.dist-info/top_level.txt,sha256=4BWzoKYREKUZ5iyPzZpjqx4G8uB5TWxXPDwibLcVa7k,43
185
+ modal-1.1.5.dev4.dist-info/RECORD,,
modal_version/__init__.py CHANGED
@@ -1,4 +1,4 @@
1
1
  # Copyright Modal Labs 2025
2
2
  """Supplies the current version of the modal client library."""
3
3
 
4
- __version__ = "1.1.5.dev3"
4
+ __version__ = "1.1.5.dev4"