modal 1.1.5.dev7__py3-none-any.whl → 1.1.5.dev9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
modal/client.pyi CHANGED
@@ -33,7 +33,7 @@ class _Client:
33
33
  server_url: str,
34
34
  client_type: int,
35
35
  credentials: typing.Optional[tuple[str, str]],
36
- version: str = "1.1.5.dev7",
36
+ version: str = "1.1.5.dev9",
37
37
  ):
38
38
  """mdmd:hidden
39
39
  The Modal client object is not intended to be instantiated directly by users.
@@ -164,7 +164,7 @@ class Client:
164
164
  server_url: str,
165
165
  client_type: int,
166
166
  credentials: typing.Optional[tuple[str, str]],
167
- version: str = "1.1.5.dev7",
167
+ version: str = "1.1.5.dev9",
168
168
  ):
169
169
  """mdmd:hidden
170
170
  The Modal client object is not intended to be instantiated directly by users.
@@ -282,10 +282,7 @@ class _FlashPrometheusAutoscaler:
282
282
  if timestamp >= autoscaling_time - self._max_window_seconds
283
283
  ]
284
284
 
285
- if self.metrics_endpoint == "internal":
286
- current_target_containers = await self._compute_target_containers_internal(current_replicas)
287
- else:
288
- current_target_containers = await self._compute_target_containers_prometheus(current_replicas)
285
+ current_target_containers = await self._compute_target_containers(current_replicas=current_replicas)
289
286
  autoscaling_decisions.append((autoscaling_time, current_target_containers))
290
287
 
291
288
  actual_target_containers = self._make_scaling_decision(
@@ -322,7 +319,7 @@ class _FlashPrometheusAutoscaler:
322
319
  logger.error(traceback.format_exc())
323
320
  await asyncio.sleep(self.autoscaling_interval_seconds)
324
321
 
325
- async def _compute_target_containers_internal(self, current_replicas: int) -> int:
322
+ async def _compute_target_containers(self, current_replicas: int) -> int:
326
323
  """
327
324
  Gets internal metrics from container to autoscale up or down.
328
325
  """
@@ -337,100 +334,41 @@ class _FlashPrometheusAutoscaler:
337
334
  if current_replicas == 0:
338
335
  return 1
339
336
 
340
- internal_metrics_list = []
341
- for container in containers:
342
- internal_metric = await self._get_container_metrics(container.task_id)
343
- if internal_metric is None:
344
- continue
345
- internal_metrics_list.append(getattr(internal_metric.metrics, self.target_metric))
346
-
347
- if not internal_metrics_list:
348
- return current_replicas
349
-
350
- sum_metric = sum(internal_metrics_list)
351
- containers_with_metrics = len(internal_metrics_list)
352
- # n_containers_missing_metric is the number of unhealthy containers + number of cold starting containers
353
- n_containers_missing_metric = current_replicas - containers_with_metrics
354
- # n_containers_unhealthy is the number of live containers that are not emitting metrics i.e. unhealthy
355
- n_containers_unhealthy = len(containers) - containers_with_metrics
356
-
357
- # Scale up assuming that every unhealthy container is at 2x the target metric value.
358
- scale_up_target_metric_value = (sum_metric + n_containers_unhealthy * self.target_metric_value) / (
359
- (containers_with_metrics + n_containers_unhealthy) or 1
360
- )
361
-
362
- # Scale down assuming that every container (including cold starting containers) are at the target metric value.
363
- scale_down_target_metric_value = (sum_metric + n_containers_missing_metric * self.target_metric_value) / (
364
- current_replicas or 1
365
- )
366
-
367
- scale_up_ratio = scale_up_target_metric_value / self.target_metric_value
368
- scale_down_ratio = scale_down_target_metric_value / self.target_metric_value
369
-
370
- desired_replicas = current_replicas
371
- if scale_up_ratio > 1 + self.scale_up_tolerance:
372
- desired_replicas = math.ceil(current_replicas * scale_up_ratio)
373
- elif scale_down_ratio < 1 - self.scale_down_tolerance:
374
- desired_replicas = math.ceil(current_replicas * scale_down_ratio)
337
+ # Get metrics based on autoscaler type (prometheus or internal)
338
+ sum_metric, n_containers_with_metrics = await self._get_scaling_info(containers)
375
339
 
376
- logger.warning(
377
- f"[Modal Flash] Current replicas: {current_replicas}, "
378
- f"sum internal metric `{self.target_metric}`: {sum_metric}, "
379
- f"target internal metric value: {self.target_metric_value}, "
380
- f"scale up ratio: {scale_up_ratio}, "
381
- f"scale down ratio: {scale_down_ratio}, "
382
- f"desired replicas: {desired_replicas}"
340
+ desired_replicas = self._calculate_desired_replicas(
341
+ n_current_replicas=current_replicas,
342
+ sum_metric=sum_metric,
343
+ n_containers_with_metrics=n_containers_with_metrics,
344
+ n_total_containers=len(containers),
345
+ target_metric_value=self.target_metric_value,
383
346
  )
384
347
 
385
- desired_replicas = max(1, min(desired_replicas, self.max_containers or 5000))
386
- return desired_replicas
348
+ return max(1, desired_replicas)
387
349
 
388
- async def _compute_target_containers_prometheus(self, current_replicas: int) -> int:
389
- # current_replicas is the number of live containers + cold starting containers (not yet live)
390
- # containers is the number of live containers that are registered in flash dns
391
- containers = await self._get_all_containers()
392
- if len(containers) > current_replicas:
393
- logger.info(
394
- f"[Modal Flash] Current replicas {current_replicas} is less than the number of containers "
395
- f"{len(containers)}. Setting current_replicas = num_containers."
396
- )
397
- current_replicas = len(containers)
398
-
399
- if current_replicas == 0:
400
- return 1
401
-
402
- target_metric = self.target_metric
403
- target_metric_value = float(self.target_metric_value)
404
-
405
- # Gets metrics from prometheus
406
- sum_metric = 0
407
- containers_with_metrics = 0
350
+ def _calculate_desired_replicas(
351
+ self,
352
+ n_current_replicas: int,
353
+ sum_metric: float,
354
+ n_containers_with_metrics: int,
355
+ n_total_containers: int,
356
+ target_metric_value: float,
357
+ ) -> int:
358
+ """
359
+ Calculate the desired number of replicas to autoscale to.
360
+ """
408
361
  buffer_containers = self.buffer_containers or 0
409
- container_metrics_list = await asyncio.gather(
410
- *[
411
- self._get_metrics(f"https://{container.host}:{container.port}/{self.metrics_endpoint}")
412
- for container in containers
413
- ]
414
- )
415
- for container_metrics in container_metrics_list:
416
- if (
417
- container_metrics is None
418
- or target_metric not in container_metrics
419
- or len(container_metrics[target_metric]) == 0
420
- ):
421
- continue
422
- sum_metric += container_metrics[target_metric][0].value
423
- containers_with_metrics += 1
424
362
 
425
363
  # n_containers_missing = number of unhealthy containers + number of containers not registered in flash dns
426
- n_containers_missing_metric = current_replicas - containers_with_metrics
364
+ n_containers_missing_metric = n_current_replicas - n_containers_with_metrics
427
365
  # n_containers_unhealthy = number of dns registered containers that are not emitting metrics
428
- n_containers_unhealthy = len(containers) - containers_with_metrics
366
+ n_containers_unhealthy = n_total_containers - n_containers_with_metrics
429
367
 
430
368
  # number of total containers - buffer containers
431
369
  # This is used in 1) scale ratio denominators 2) provisioning base.
432
370
  # Max is used to handle case when buffer_containers are first initialized.
433
- num_provisioned_containers = max(current_replicas - buffer_containers, 1)
371
+ num_provisioned_containers = max(n_current_replicas - buffer_containers, 1)
434
372
 
435
373
  # Scale up assuming that every unhealthy container is at (1 + scale_up_tolerance)x the target metric value.
436
374
  # This way if all containers are unhealthy, we will increase our number of containers.
@@ -454,10 +392,11 @@ class _FlashPrometheusAutoscaler:
454
392
  desired_replicas = math.ceil(desired_replicas * scale_down_ratio)
455
393
 
456
394
  logger.warning(
457
- f"[Modal Flash] Current replicas: {current_replicas}, "
395
+ f"[Modal Flash] Current replicas: {n_current_replicas}, "
396
+ f"target metric: {self.target_metric}"
458
397
  f"target metric value: {target_metric_value}, "
459
398
  f"current sum of metric values: {sum_metric}, "
460
- f"number of containers with metrics: {containers_with_metrics}, "
399
+ f"number of containers with metrics: {n_containers_with_metrics}, "
461
400
  f"number of containers unhealthy: {n_containers_unhealthy}, "
462
401
  f"number of containers missing metric (includes unhealthy): {n_containers_missing_metric}, "
463
402
  f"number of provisioned containers: {num_provisioned_containers}, "
@@ -468,6 +407,43 @@ class _FlashPrometheusAutoscaler:
468
407
 
469
408
  return desired_replicas
470
409
 
410
+ async def _get_scaling_info(self, containers) -> tuple[float, int]:
411
+ """Get metrics using either internal container metrics API or prometheus HTTP endpoints."""
412
+ if self.metrics_endpoint == "internal":
413
+ container_metrics_results = await asyncio.gather(
414
+ *[self._get_container_metrics(container.task_id) for container in containers]
415
+ )
416
+ container_metrics_list = []
417
+ for container_metric in container_metrics_results:
418
+ if container_metric is None:
419
+ continue
420
+ container_metrics_list.append(getattr(container_metric.metrics, self.target_metric))
421
+
422
+ sum_metric = sum(container_metrics_list)
423
+ n_containers_with_metrics = len(container_metrics_list)
424
+ else:
425
+ sum_metric = 0
426
+ n_containers_with_metrics = 0
427
+
428
+ container_metrics_list = await asyncio.gather(
429
+ *[
430
+ self._get_metrics(f"https://{container.host}:{container.port}/{self.metrics_endpoint}")
431
+ for container in containers
432
+ ]
433
+ )
434
+
435
+ for container_metrics in container_metrics_list:
436
+ if (
437
+ container_metrics is None
438
+ or self.target_metric not in container_metrics
439
+ or len(container_metrics[self.target_metric]) == 0
440
+ ):
441
+ continue
442
+ sum_metric += container_metrics[self.target_metric][0].value
443
+ n_containers_with_metrics += 1
444
+
445
+ return sum_metric, n_containers_with_metrics
446
+
471
447
  async def _get_metrics(self, url: str) -> Optional[dict[str, list[Any]]]: # technically any should be Sample
472
448
  from prometheus_client.parser import Sample, text_string_to_metric_families
473
449
 
@@ -138,11 +138,25 @@ class _FlashPrometheusAutoscaler:
138
138
 
139
139
  async def start(self): ...
140
140
  async def _run_autoscaler_loop(self): ...
141
- async def _compute_target_containers_internal(self, current_replicas: int) -> int:
141
+ async def _compute_target_containers(self, current_replicas: int) -> int:
142
142
  """Gets internal metrics from container to autoscale up or down."""
143
143
  ...
144
144
 
145
- async def _compute_target_containers_prometheus(self, current_replicas: int) -> int: ...
145
+ def _calculate_desired_replicas(
146
+ self,
147
+ n_current_replicas: int,
148
+ sum_metric: float,
149
+ n_containers_with_metrics: int,
150
+ n_total_containers: int,
151
+ target_metric_value: float,
152
+ ) -> int:
153
+ """Calculate the desired number of replicas to autoscale to."""
154
+ ...
155
+
156
+ async def _get_scaling_info(self, containers) -> tuple[float, int]:
157
+ """Get metrics using either internal container metrics API or prometheus HTTP endpoints."""
158
+ ...
159
+
146
160
  async def _get_metrics(self, url: str) -> typing.Optional[dict[str, list[typing.Any]]]: ...
147
161
  async def _get_container_metrics(
148
162
  self, container_id: str
@@ -209,7 +223,7 @@ class FlashPrometheusAutoscaler:
209
223
 
210
224
  _run_autoscaler_loop: ___run_autoscaler_loop_spec[typing_extensions.Self]
211
225
 
212
- class ___compute_target_containers_internal_spec(typing_extensions.Protocol[SUPERSELF]):
226
+ class ___compute_target_containers_spec(typing_extensions.Protocol[SUPERSELF]):
213
227
  def __call__(self, /, current_replicas: int) -> int:
214
228
  """Gets internal metrics from container to autoscale up or down."""
215
229
  ...
@@ -218,13 +232,29 @@ class FlashPrometheusAutoscaler:
218
232
  """Gets internal metrics from container to autoscale up or down."""
219
233
  ...
220
234
 
221
- _compute_target_containers_internal: ___compute_target_containers_internal_spec[typing_extensions.Self]
235
+ _compute_target_containers: ___compute_target_containers_spec[typing_extensions.Self]
222
236
 
223
- class ___compute_target_containers_prometheus_spec(typing_extensions.Protocol[SUPERSELF]):
224
- def __call__(self, /, current_replicas: int) -> int: ...
225
- async def aio(self, /, current_replicas: int) -> int: ...
237
+ def _calculate_desired_replicas(
238
+ self,
239
+ n_current_replicas: int,
240
+ sum_metric: float,
241
+ n_containers_with_metrics: int,
242
+ n_total_containers: int,
243
+ target_metric_value: float,
244
+ ) -> int:
245
+ """Calculate the desired number of replicas to autoscale to."""
246
+ ...
247
+
248
+ class ___get_scaling_info_spec(typing_extensions.Protocol[SUPERSELF]):
249
+ def __call__(self, /, containers) -> tuple[float, int]:
250
+ """Get metrics using either internal container metrics API or prometheus HTTP endpoints."""
251
+ ...
252
+
253
+ async def aio(self, /, containers) -> tuple[float, int]:
254
+ """Get metrics using either internal container metrics API or prometheus HTTP endpoints."""
255
+ ...
226
256
 
227
- _compute_target_containers_prometheus: ___compute_target_containers_prometheus_spec[typing_extensions.Self]
257
+ _get_scaling_info: ___get_scaling_info_spec[typing_extensions.Self]
228
258
 
229
259
  class ___get_metrics_spec(typing_extensions.Protocol[SUPERSELF]):
230
260
  def __call__(self, /, url: str) -> typing.Optional[dict[str, list[typing.Any]]]: ...
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: modal
3
- Version: 1.1.5.dev7
3
+ Version: 1.1.5.dev9
4
4
  Summary: Python client library for Modal
5
5
  Author-email: Modal Labs <support@modal.com>
6
6
  License: Apache-2.0
@@ -22,7 +22,7 @@ modal/app.py,sha256=F4baVULljFq0CwC_7U-EKNRNx7CYeWBKudjjYUuWc4U,48416
22
22
  modal/app.pyi,sha256=AbXJCBkyt2rI_-M3VbTBYb32at0P6iRZuoC87xY_JrQ,43591
23
23
  modal/call_graph.py,sha256=1g2DGcMIJvRy-xKicuf63IVE98gJSnQsr8R_NVMptNc,2581
24
24
  modal/client.py,sha256=kyAIVB3Ay-XKJizQ_1ufUFB__EagV0MLmHJpyYyJ7J0,18636
25
- modal/client.pyi,sha256=zPcN6BqCMJQz4jsZ7qWnE3DFzTLShZIxGHWgP-iTj3M,15829
25
+ modal/client.pyi,sha256=axk1bGHT1cC_rghFoVhClKrxzrPr_OHy8Gb7BGTfXo8,15829
26
26
  modal/cloud_bucket_mount.py,sha256=YOe9nnvSr4ZbeCn587d7_VhE9IioZYRvF9VYQTQux08,5914
27
27
  modal/cloud_bucket_mount.pyi,sha256=-qSfYAQvIoO_l2wsCCGTG5ZUwQieNKXdAO00yP1-LYU,7394
28
28
  modal/cls.py,sha256=pTEO7pHjlO7taMbIqs4oI9ZZgKDJpVKyGkO5ZT0w6tQ,40934
@@ -150,10 +150,10 @@ modal/cli/programs/run_jupyter.py,sha256=44Lpvqk2l3hH-uOkmAOzw60NEsfB5uaRDWDKVsh
150
150
  modal/cli/programs/run_marimo.py,sha256=HyZ2za0NYqg31-mGxFQxUIAJ3Q-jRaMocEwWwI5-cdw,2887
151
151
  modal/cli/programs/vscode.py,sha256=KbTAaIXyQBVCDXxXjmBHmKpgXkUw0q4R4KkJvUjCYgk,3380
152
152
  modal/experimental/__init__.py,sha256=fCqzo_f3vcY750vHtd7CtLs5dvdM_C0ZLLGb3zXuK9w,14913
153
- modal/experimental/flash.py,sha256=DhX0HG-K55FSK_ym0MjXC9ObVflmamAXfFY-TnuN5SM,30056
154
- modal/experimental/flash.pyi,sha256=EZiceVufvf7jsqKSQG-gd1hwqxzbEZxMkB1NtrK3AnE,14270
153
+ modal/experimental/flash.py,sha256=8HOHZ0XLSN8Znzsi6hGggS46CC6t_7IgGWyNoeSXS9o,28417
154
+ modal/experimental/flash.pyi,sha256=R9VV0UDotiY9BRUjacB-xI4qhR3yBymAvEZFRFHztLs,15143
155
155
  modal/experimental/ipython.py,sha256=TrCfmol9LGsRZMeDoeMPx3Hv3BFqQhYnmD_iH0pqdhk,2904
156
- modal-1.1.5.dev7.dist-info/licenses/LICENSE,sha256=psuoW8kuDP96RQsdhzwOqi6fyWv0ct8CR6Jr7He_P_k,10173
156
+ modal-1.1.5.dev9.dist-info/licenses/LICENSE,sha256=psuoW8kuDP96RQsdhzwOqi6fyWv0ct8CR6Jr7He_P_k,10173
157
157
  modal_docs/__init__.py,sha256=svYKtV8HDwDCN86zbdWqyq5T8sMdGDj0PVlzc2tIxDM,28
158
158
  modal_docs/gen_cli_docs.py,sha256=c1yfBS_x--gL5bs0N4ihMwqwX8l3IBWSkBAKNNIi6bQ,3801
159
159
  modal_docs/gen_reference_docs.py,sha256=d_CQUGQ0rfw28u75I2mov9AlS773z9rG40-yq5o7g2U,6359
@@ -161,10 +161,10 @@ modal_docs/mdmd/__init__.py,sha256=svYKtV8HDwDCN86zbdWqyq5T8sMdGDj0PVlzc2tIxDM,2
161
161
  modal_docs/mdmd/mdmd.py,sha256=tUTImNd4UMFk1opkaw8J672gX8AkBO5gbY2S_NMxsxs,7140
162
162
  modal_docs/mdmd/signatures.py,sha256=XJaZrK7Mdepk5fdX51A8uENiLFNil85Ud0d4MH8H5f0,3218
163
163
  modal_proto/__init__.py,sha256=MIEP8jhXUeGq_eCjYFcqN5b1bxBM4fdk0VESpjWR0fc,28
164
- modal_proto/api.proto,sha256=9Vh3r3SjlJh2a9Eb6GwFYh2R6aCKV2-pEQXmVeOYkAE,105611
164
+ modal_proto/api.proto,sha256=4m9Y592ADWWjxaf80HemhUk8hgGU6qN8RCe4dhSaE5o,105717
165
165
  modal_proto/api_grpc.py,sha256=r1k5m9Z2brIWtkY59mPwAGSavhssStzmsSq_PEwkZWA,129564
166
- modal_proto/api_pb2.py,sha256=M0DAllkbXBuQGnXEYiOaqzZoo5TQy0IS9DTHpzHNliU,370000
167
- modal_proto/api_pb2.pyi,sha256=29iTBpyQozfgPuLuQmY_05PIx4PBJE7yQ-wpgmUrWz8,510035
166
+ modal_proto/api_pb2.py,sha256=P9BfjTZdEY5dGTrILAG3Z1QnYPQdrr1Styx1eISsdOk,370095
167
+ modal_proto/api_pb2.pyi,sha256=UkNiuRnke0P2oNEOHIJ0WnQ91NsAuALBv4oI8jaByIA,510613
168
168
  modal_proto/api_pb2_grpc.py,sha256=ovaCh4weYl5r0XfX_2F2M2pn6XGH7Hl-BmVaIB62ssY,279448
169
169
  modal_proto/api_pb2_grpc.pyi,sha256=q_S2ScjUjS2bonUnEBqwCXVRZUwEuO5jOD2eL9nw0Wc,65439
170
170
  modal_proto/modal_api_grpc.py,sha256=E-5WMq82AVvtGRsmb7AqYzOz-cfqTOokp-QjaFaOhpo,19547
@@ -176,10 +176,10 @@ modal_proto/options_pb2.pyi,sha256=l7DBrbLO7q3Ir-XDkWsajm0d0TQqqrfuX54i4BMpdQg,1
176
176
  modal_proto/options_pb2_grpc.py,sha256=1oboBPFxaTEXt9Aw7EAj8gXHDCNMhZD2VXqocC9l_gk,159
177
177
  modal_proto/options_pb2_grpc.pyi,sha256=CImmhxHsYnF09iENPoe8S4J-n93jtgUYD2JPAc0yJSI,247
178
178
  modal_proto/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
179
- modal_version/__init__.py,sha256=HPnRO9vpz9RoHYzNERnfbmzHSkXVz8ASVK3ldmoP_mw,120
179
+ modal_version/__init__.py,sha256=31WlaQ95-Yb_-QRoYMW2iJofXkQIL15n6Jk0rpH5Zbk,120
180
180
  modal_version/__main__.py,sha256=2FO0yYQQwDTh6udt1h-cBnGd1c4ZyHnHSI4BksxzVac,105
181
- modal-1.1.5.dev7.dist-info/METADATA,sha256=MlzcRmSlouWM_AhEfxMl17M4EbNZ5J1YPJT0nCZcyHk,2459
182
- modal-1.1.5.dev7.dist-info/WHEEL,sha256=1tXe9gY0PYatrMPMDd6jXqjfpz_B-Wqm32CPfRC58XU,91
183
- modal-1.1.5.dev7.dist-info/entry_points.txt,sha256=An-wYgeEUnm6xzrAP9_NTSTSciYvvEWsMZILtYrvpAI,46
184
- modal-1.1.5.dev7.dist-info/top_level.txt,sha256=4BWzoKYREKUZ5iyPzZpjqx4G8uB5TWxXPDwibLcVa7k,43
185
- modal-1.1.5.dev7.dist-info/RECORD,,
181
+ modal-1.1.5.dev9.dist-info/METADATA,sha256=lc-vSW0z_Ay0U1z-aCyG3fpF1stE5xFO1-UPwkuZV9k,2459
182
+ modal-1.1.5.dev9.dist-info/WHEEL,sha256=1tXe9gY0PYatrMPMDd6jXqjfpz_B-Wqm32CPfRC58XU,91
183
+ modal-1.1.5.dev9.dist-info/entry_points.txt,sha256=An-wYgeEUnm6xzrAP9_NTSTSciYvvEWsMZILtYrvpAI,46
184
+ modal-1.1.5.dev9.dist-info/top_level.txt,sha256=4BWzoKYREKUZ5iyPzZpjqx4G8uB5TWxXPDwibLcVa7k,43
185
+ modal-1.1.5.dev9.dist-info/RECORD,,
modal_proto/api.proto CHANGED
@@ -642,6 +642,8 @@ message AutoscalerSettings {
642
642
  optional uint32 scaleup_window = 4;
643
643
  // Maximum amount of time a container can be idle before being scaled down, in seconds; pka "container_idle_timeout"
644
644
  optional uint32 scaledown_window = 5;
645
+ // Target number of containers autoscaler will try to maintain
646
+ optional uint32 target_containers = 6;
645
647
  }
646
648
 
647
649
  // Used for flash autoscaling