ob-metaflow-extensions 1.3.1__py2.py3-none-any.whl → 1.3.2__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ob-metaflow-extensions might be problematic. Click here for more details.

@@ -180,6 +180,7 @@ class WorkerInfoDict(TypedDict):
180
180
  pending: Dict[str, List[WorkerStatus]]
181
181
  running: Dict[str, List[WorkerStatus]]
182
182
  crashlooping: Dict[str, List[WorkerStatus]]
183
+ failed: Dict[str, List[WorkerStatus]]
183
184
 
184
185
 
185
186
  class CurrentWorkerInfo(TypedDict):
@@ -442,9 +443,11 @@ def _capsule_worker_semantic_status(
442
443
  xx[worker_version].append(w)
443
444
  return xx
444
445
 
446
+ # phases can be Pending, Running, Succeeded, Failed, Unknown, CrashLoopBackOff
445
447
  pending_workers = _make_version_dict(workers, "Pending")
446
448
  running_workers = _make_version_dict(workers, "Running")
447
449
  crashlooping_workers = _make_version_dict(workers, "CrashLoopBackOff")
450
+ failed_workers = _make_version_dict(workers, "Failed")
448
451
 
449
452
  # current_status (formulated basis):
450
453
  # - at least one pods are pending for `_end_state_capsule_version`
@@ -464,7 +467,8 @@ def _capsule_worker_semantic_status(
464
467
  "at_least_one_running": (
465
468
  count_for_version(running_workers) >= min(min_replicas, 1)
466
469
  ),
467
- "at_least_one_crashlooping": count_for_version(crashlooping_workers) > 0,
470
+ "at_least_one_crashlooping": count_for_version(crashlooping_workers) > 0
471
+ or count_for_version(failed_workers) > 0,
468
472
  "none_present": (
469
473
  count_for_version(running_workers) == 0
470
474
  and count_for_version(pending_workers) == 0
@@ -484,6 +488,7 @@ def _capsule_worker_semantic_status(
484
488
  "pending": count_for_version(pending_workers),
485
489
  "running": count_for_version(running_workers),
486
490
  "crashlooping": count_for_version(crashlooping_workers),
491
+ "failed": count_for_version(failed_workers),
487
492
  },
488
493
  }
489
494
 
@@ -491,6 +496,7 @@ def _capsule_worker_semantic_status(
491
496
  "pending": pending_workers,
492
497
  "running": running_workers,
493
498
  "crashlooping": crashlooping_workers,
499
+ "failed": failed_workers,
494
500
  }
495
501
 
496
502
  return {
@@ -255,6 +255,12 @@ class CapsuleInput:
255
255
  replicas.get("min"),
256
256
  replicas.get("max"),
257
257
  )
258
+ rpm = replicas.get("scaling_policy", {}).get("rpm", None)
259
+ autoscaling_config = {}
260
+ if rpm:
261
+ autoscaling_config = {
262
+ "requestRateBasedAutoscalingConfig": {"targetRequestsPerMinute": rpm}
263
+ }
258
264
  if fixed is not None:
259
265
  _min, _max = fixed, fixed
260
266
  gpu_resource = app_config.get_state("resources").get("gpu")
@@ -296,6 +302,7 @@ class CapsuleInput:
296
302
  "autoscalingConfig": {
297
303
  "minReplicas": _min,
298
304
  "maxReplicas": _max,
305
+ **autoscaling_config,
299
306
  },
300
307
  **_scheduling_config,
301
308
  "containerStartupConfig": {
@@ -713,7 +720,7 @@ class CapsuleDeployer:
713
720
  workers_status: List[WorkerStatus],
714
721
  ):
715
722
  for worker in workers_status:
716
- if worker["phase"] == "CrashLoopBackOff":
723
+ if worker["phase"] == "CrashLoopBackOff" or worker["phase"] == "Failed":
717
724
  return worker["workerId"]
718
725
  return None
719
726
 
@@ -851,7 +858,7 @@ class CapsuleDeployer:
851
858
  workers_state_machine.save_debug_info(self._debug_dir)
852
859
  if i % 3 == 0: # Every 3 seconds report the status
853
860
  logger(
854
- f"[debug] 💊 {self.capsule_type} {self.identifier} deployment status: {state_machine.current_status} | worker states: {workers_state_machine.current_status}"
861
+ f"[debug] 💊 {self.capsule_type} {self.identifier} deployment status: {state_machine.current_status} | worker states: {workers_state_machine.current_status} | capsule_ready : {capsule_ready} | further_check_worker_readiness {further_check_worker_readiness}"
855
862
  )
856
863
 
857
864
  # We will only check ready_to_serve_traffic under the following conditions:
@@ -51,6 +51,11 @@ class ReplicaConfigDict(TypedDict, total=False):
51
51
  fixed: Optional[int]
52
52
  min: Optional[int]
53
53
  max: Optional[int]
54
+ scaling_policy: Optional["ScalingPolicyConfigDict"]
55
+
56
+
57
+ class ScalingPolicyConfigDict(TypedDict, total=False):
58
+ rpm: Optional[int]
54
59
 
55
60
 
56
61
  class DependencyConfigDict(TypedDict, total=False):
@@ -301,6 +301,29 @@ class AuthConfig(metaclass=ConfigMeta):
301
301
  )
302
302
 
303
303
 
304
+ class ScalingPolicyConfig(metaclass=ConfigMeta):
305
+ """
306
+ Policies for autoscaling replicas. Available policies:
307
+ - Request based Autoscaling (rpm)
308
+ """
309
+
310
+ # TODO Change the defaulting if we have more autoscaling policies.
311
+ rpm = ConfigField(
312
+ field_type=int,
313
+ # TODO: Add a little more to the docstring where we explain the behavior.
314
+ cli_meta=CLIOption(
315
+ name="scaling_rpm",
316
+ cli_option_str="--scaling-rpm",
317
+ help=(
318
+ "Scale up replicas when the requests per minute crosses this threshold. "
319
+ "If nothing is provided and the replicas.max and replicas.min is set then "
320
+ "the default rpm would be 60."
321
+ ),
322
+ ),
323
+ default=60,
324
+ )
325
+
326
+
304
327
  class ReplicaConfig(metaclass=ConfigMeta):
305
328
  """Replica configuration."""
306
329
 
@@ -333,6 +356,16 @@ class ReplicaConfig(metaclass=ConfigMeta):
333
356
  example=10,
334
357
  )
335
358
 
359
+ scaling_policy = ConfigField(
360
+ cli_meta=None,
361
+ field_type=ScalingPolicyConfig,
362
+ help=(
363
+ "Scaling policy defines the the metric based on which the replicas will horizontally scale. "
364
+ "If min and max replicas are set and are not the same, then a scaling policy will be applied. "
365
+ "Default scaling policies can be 60 rpm (ie 1 rps). "
366
+ ),
367
+ )
368
+
336
369
  @staticmethod
337
370
  def defaults(replica_config: "ReplicaConfig"):
338
371
  if all(
@@ -346,6 +379,7 @@ class ReplicaConfig(metaclass=ConfigMeta):
346
379
  replica_config.fixed = 1
347
380
  elif replica_config.min is not None and replica_config.max is None:
348
381
  replica_config.max = replica_config.min
382
+
349
383
  return
350
384
 
351
385
  @staticmethod
@@ -161,6 +161,21 @@ properties:
161
161
  type: integer
162
162
  example: 10
163
163
  mutation_behavior: union
164
+ scaling_policy:
165
+ title: ScalingPolicyConfig
166
+ description: |-
167
+ Policies for autoscaling replicas. Available policies:
168
+ - Request based Autoscaling (rpm)
169
+ type: object
170
+ required: []
171
+ properties:
172
+ rpm:
173
+ description: |-
174
+ Scale up replicas when the requests per minute crosses this threshold. If nothing is provided and the replicas.max and replicas.min is set then the default rpm would be 60.
175
+ type: integer
176
+ default: 60
177
+ mutation_behavior: union
178
+ mutation_behavior: union
164
179
  mutation_behavior: union
165
180
  dependencies:
166
181
  title: DependencyConfig
@@ -151,7 +151,6 @@ class AppDeployer(TypedCoreConfig):
151
151
  final_status["id"],
152
152
  final_status["auth_type"],
153
153
  final_status["public_url"],
154
- final_status["available_replicas"],
155
154
  final_status["name"],
156
155
  final_status["deployed_version"],
157
156
  final_status["deployed_at"],
@@ -164,7 +163,6 @@ class DeployedApp:
164
163
  _id: str,
165
164
  capsule_type: str,
166
165
  public_url: str,
167
- available_replicas: int,
168
166
  name: str,
169
167
  deployed_version: str,
170
168
  deployed_at: str,
@@ -172,7 +170,6 @@ class DeployedApp:
172
170
  self._id = _id
173
171
  self._capsule_type = capsule_type
174
172
  self._public_url = public_url
175
- self._available_replicas = available_replicas
176
173
  self._name = name
177
174
  self._deployed_version = deployed_version
178
175
  self._deployed_at = deployed_at
@@ -208,6 +205,10 @@ class DeployedApp:
208
205
  capsule = capsule_api.get(self._id)
209
206
  return capsule
210
207
 
208
+ def replicas(self):
209
+ capsule_api = self._get_capsule_api()
210
+ return capsule_api.get_workers(self._id)
211
+
211
212
  def scale_to_zero(self):
212
213
  """
213
214
  Scales the DeployedApp to 0 replicas.
@@ -243,10 +244,6 @@ class DeployedApp:
243
244
  def public_url(self) -> str:
244
245
  return self._public_url
245
246
 
246
- @property
247
- def available_replicas(self) -> int:
248
- return self._available_replicas
249
-
250
247
  @property
251
248
  def name(self) -> str:
252
249
  return self._name
@@ -260,7 +257,6 @@ class DeployedApp:
260
257
  "id": self._id,
261
258
  "auth_style": self.auth_style, # TODO : Fix naming here.
262
259
  "public_url": self._public_url,
263
- "available_replicas": self._available_replicas,
264
260
  "name": self._name,
265
261
  "deployed_version": self._deployed_version,
266
262
  "deployed_at": self._deployed_at,
@@ -272,7 +268,6 @@ class DeployedApp:
272
268
  _id=data["id"],
273
269
  capsule_type=data["capsule_type"],
274
270
  public_url=data["public_url"],
275
- available_replicas=data["available_replicas"],
276
271
  name=data["name"],
277
272
  deployed_version=data["deployed_version"],
278
273
  deployed_at=data["deployed_at"],
@@ -287,7 +282,6 @@ class DeployedApp:
287
282
  f"DeployedApp(id='{self._id}', "
288
283
  f"name='{self._name}', "
289
284
  f"public_url='{self._public_url}', "
290
- f"available_replicas={self._available_replicas}, "
291
285
  f"deployed_version='{self._deployed_version}')"
292
286
  )
293
287
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ob-metaflow-extensions
3
- Version: 1.3.1
3
+ Version: 1.3.2
4
4
  Summary: Outerbounds Platform Extensions for Metaflow
5
5
  Author: Outerbounds, Inc.
6
6
  License: Commercial
@@ -12,15 +12,15 @@ metaflow_extensions/outerbounds/plugins/apps/consts.py,sha256=iHsyqbUg9k-rgswCs1
12
12
  metaflow_extensions/outerbounds/plugins/apps/deploy_decorator.py,sha256=VkmiMdNYHhNdt-Qm9AVv7aE2LWFsIFEc16YcOYjwF6Q,8568
13
13
  metaflow_extensions/outerbounds/plugins/apps/supervisord_utils.py,sha256=GQoN2gyPClcpR9cLldJmbCfqXnoAHxp8xUnY7vzaYtY,9026
14
14
  metaflow_extensions/outerbounds/plugins/apps/core/__init__.py,sha256=c6uCgKlgEkTmM9BVdAO-m3vZvUpK2KW_AZZ2236now4,237
15
- metaflow_extensions/outerbounds/plugins/apps/core/_state_machine.py,sha256=q2in_xmHsFJ1KwgQhVbg_DuJM5I2XdQHhbMGKlJpdcs,19957
15
+ metaflow_extensions/outerbounds/plugins/apps/core/_state_machine.py,sha256=al907t2C86BPf4V1V03PLTJRJMOc8gdl1CxLLbklnDU,20281
16
16
  metaflow_extensions/outerbounds/plugins/apps/core/app_cli.py,sha256=V0Ki_VwjVIyIa2sgXPC7miOPLYWLrsHvzMpTfQypU2U,42169
17
17
  metaflow_extensions/outerbounds/plugins/apps/core/app_config.py,sha256=PHt-HdNfTHIuhY-eB5vkRMp1RKQNWJ4DKdgZWyYgUuc,4167
18
18
  metaflow_extensions/outerbounds/plugins/apps/core/artifacts.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
19
- metaflow_extensions/outerbounds/plugins/apps/core/capsule.py,sha256=tswwFa-c3P0CQtomQV6d--eP4hDQI4SjbbUccvwkmGo,34314
19
+ metaflow_extensions/outerbounds/plugins/apps/core/capsule.py,sha256=VpCmq8R13GNex6aTJnOCswkLnc8acgsQQ9Da6KBh2sQ,34732
20
20
  metaflow_extensions/outerbounds/plugins/apps/core/click_importer.py,sha256=kgoPQmK_-8PSSTc3QMSaynCLQ5VWTkKFOC69FPURyXA,998
21
- metaflow_extensions/outerbounds/plugins/apps/core/config_schema.yaml,sha256=iTThO2vNQrFWe9nYfjiOcMf6FOQ6vU_1ZhXhUAr0L24,8142
21
+ metaflow_extensions/outerbounds/plugins/apps/core/config_schema.yaml,sha256=LfA72d_bqsAuRzFZ9q-DfbiUy1mLimuFQfGwIEhoKNo,8745
22
22
  metaflow_extensions/outerbounds/plugins/apps/core/dependencies.py,sha256=JlWT9f27yzZeJPlqTQk134WDfQgOdyxC5iaw3pLlhqY,4006
23
- metaflow_extensions/outerbounds/plugins/apps/core/deployer.py,sha256=VkYe8mK_VOr-bAiR2RohhKeLf8Z3gHZw7RoRBSCu2FA,9765
23
+ metaflow_extensions/outerbounds/plugins/apps/core/deployer.py,sha256=dNKlDu6n8SufEd5NKmsErl1RYhQXuEe_DgtA0mk7awg,9472
24
24
  metaflow_extensions/outerbounds/plugins/apps/core/perimeters.py,sha256=jeFGAUnFQkBFiOMp_Ls7Ofb80Qogh509suam5sMucYU,3030
25
25
  metaflow_extensions/outerbounds/plugins/apps/core/secrets.py,sha256=sgDiAmpSC8Y5xjlaOEp79F6m0S3x4RONf_vJ5PUAfu8,6127
26
26
  metaflow_extensions/outerbounds/plugins/apps/core/utils.py,sha256=2M2zU8DhbAlJee8P0xKXINAku81PcUylS3sVCSb0TUs,7896
@@ -35,9 +35,9 @@ metaflow_extensions/outerbounds/plugins/apps/core/config/__init__.py,sha256=ZgC9
35
35
  metaflow_extensions/outerbounds/plugins/apps/core/config/cli_generator.py,sha256=0R0-wy7RxAMR9doVRvuluRYxAYgyjZXlTIkOeYGyz7M,5350
36
36
  metaflow_extensions/outerbounds/plugins/apps/core/config/config_utils.py,sha256=bozzUR8rbfOnb5M532RZxB5QNvVgEC1gnVjfCvQ82Yk,34053
37
37
  metaflow_extensions/outerbounds/plugins/apps/core/config/schema_export.py,sha256=tigPtb0we-urwbmctG1GbaQ9NKRKZn4KBbJKmaEntCg,9501
38
- metaflow_extensions/outerbounds/plugins/apps/core/config/typed_configs.py,sha256=bAC2lV1xWtcw0r2LPlqDrggeXPLOyrtZha2KDpm_Vx0,4454
38
+ metaflow_extensions/outerbounds/plugins/apps/core/config/typed_configs.py,sha256=euoS1Ap4yvHC20Aaj5YQWMgxixkxujVeiJ7C4DcAFhQ,4590
39
39
  metaflow_extensions/outerbounds/plugins/apps/core/config/typed_init_generator.py,sha256=KiJ1eiwtBR5eWdBzWqvO6KlqJ2qzjJvl3w4c1uJ3g0Y,13419
40
- metaflow_extensions/outerbounds/plugins/apps/core/config/unified_config.py,sha256=f_Qxy-pJoyGq_tUYr_gHmS7INEivLqv4QbOJljIwECA,35837
40
+ metaflow_extensions/outerbounds/plugins/apps/core/config/unified_config.py,sha256=bO-g_6mv7xciVcDf4Jn-qioPUUvg9Y3fMM5fcraN2Sk,37018
41
41
  metaflow_extensions/outerbounds/plugins/apps/core/experimental/__init__.py,sha256=rd4qGTkHndKYfJmoAKZWiY0KK4j5BK6RBrtle-it1Mg,2746
42
42
  metaflow_extensions/outerbounds/plugins/aws/__init__.py,sha256=VBGdjNKeFLXGZuqh4jVk8cFtO1AWof73a6k_cnbAOYA,145
43
43
  metaflow_extensions/outerbounds/plugins/aws/assume_role.py,sha256=mBewNlnSYsR2rFXFkX-DUH6ku01h2yOcMcLHoCL7eyI,161
@@ -124,7 +124,7 @@ metaflow_extensions/outerbounds/toplevel/plugins/ollama/__init__.py,sha256=GRSz2
124
124
  metaflow_extensions/outerbounds/toplevel/plugins/snowflake/__init__.py,sha256=LptpH-ziXHrednMYUjIaosS1SXD3sOtF_9_eRqd8SJw,50
125
125
  metaflow_extensions/outerbounds/toplevel/plugins/torchtune/__init__.py,sha256=uTVkdSk3xZ7hEKYfdlyVteWj5KeDwaM1hU9WT-_YKfI,50
126
126
  metaflow_extensions/outerbounds/toplevel/plugins/vllm/__init__.py,sha256=ekcgD3KVydf-a0xMI60P4uy6ePkSEoFHiGnDq1JM940,45
127
- ob_metaflow_extensions-1.3.1.dist-info/METADATA,sha256=HnIBZEkQl7rStKXqEPoFTDNrG5O6a_n8iYmSdnQT_q4,518
128
- ob_metaflow_extensions-1.3.1.dist-info/WHEEL,sha256=bb2Ot9scclHKMOLDEHY6B2sicWOgugjFKaJsT7vwMQo,110
129
- ob_metaflow_extensions-1.3.1.dist-info/top_level.txt,sha256=NwG0ukwjygtanDETyp_BUdtYtqIA_lOjzFFh1TsnxvI,20
130
- ob_metaflow_extensions-1.3.1.dist-info/RECORD,,
127
+ ob_metaflow_extensions-1.3.2.dist-info/METADATA,sha256=6Q5Etz6OSCEQL-RBWyUWuJqcrHj8J92vfslA-ldZs4M,518
128
+ ob_metaflow_extensions-1.3.2.dist-info/WHEEL,sha256=bb2Ot9scclHKMOLDEHY6B2sicWOgugjFKaJsT7vwMQo,110
129
+ ob_metaflow_extensions-1.3.2.dist-info/top_level.txt,sha256=NwG0ukwjygtanDETyp_BUdtYtqIA_lOjzFFh1TsnxvI,20
130
+ ob_metaflow_extensions-1.3.2.dist-info/RECORD,,