ob-metaflow-extensions 1.3.0__py2.py3-none-any.whl → 1.3.2__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ob-metaflow-extensions might be problematic. Click here for more details.

Files changed (23) hide show
  1. metaflow_extensions/outerbounds/plugins/__init__.py +3 -0
  2. metaflow_extensions/outerbounds/plugins/apps/core/_state_machine.py +16 -10
  3. metaflow_extensions/outerbounds/plugins/apps/core/app_cli.py +8 -8
  4. metaflow_extensions/outerbounds/plugins/apps/core/capsule.py +19 -14
  5. metaflow_extensions/outerbounds/plugins/apps/core/config/typed_configs.py +5 -0
  6. metaflow_extensions/outerbounds/plugins/apps/core/config/unified_config.py +34 -0
  7. metaflow_extensions/outerbounds/plugins/apps/core/config_schema.yaml +15 -0
  8. metaflow_extensions/outerbounds/plugins/apps/core/dependencies.py +1 -1
  9. metaflow_extensions/outerbounds/plugins/apps/core/deployer.py +4 -10
  10. metaflow_extensions/outerbounds/plugins/apps/core/secrets.py +1 -1
  11. metaflow_extensions/outerbounds/plugins/apps/core/utils.py +2 -2
  12. metaflow_extensions/outerbounds/plugins/s3_proxy/__init__.py +7 -0
  13. metaflow_extensions/outerbounds/plugins/s3_proxy/constants.py +8 -0
  14. metaflow_extensions/outerbounds/plugins/s3_proxy/exceptions.py +13 -0
  15. metaflow_extensions/outerbounds/plugins/s3_proxy/s3_proxy_api.py +93 -0
  16. metaflow_extensions/outerbounds/plugins/s3_proxy/s3_proxy_decorator.py +106 -0
  17. metaflow_extensions/outerbounds/plugins/s3_proxy/s3_proxy_manager.py +222 -0
  18. metaflow_extensions/outerbounds/toplevel/global_aliases_for_metaflow_package.py +33 -1
  19. metaflow_extensions/outerbounds/toplevel/s3_proxy.py +88 -0
  20. {ob_metaflow_extensions-1.3.0.dist-info → ob_metaflow_extensions-1.3.2.dist-info}/METADATA +1 -1
  21. {ob_metaflow_extensions-1.3.0.dist-info → ob_metaflow_extensions-1.3.2.dist-info}/RECORD +23 -16
  22. {ob_metaflow_extensions-1.3.0.dist-info → ob_metaflow_extensions-1.3.2.dist-info}/WHEEL +0 -0
  23. {ob_metaflow_extensions-1.3.0.dist-info → ob_metaflow_extensions-1.3.2.dist-info}/top_level.txt +0 -0
@@ -340,6 +340,9 @@ STEP_DECORATORS_DESC = [
340
340
  ("ollama", ".ollama.OllamaDecorator"),
341
341
  ("vllm", ".vllm.VLLMDecorator"),
342
342
  ("app_deploy", ".apps.app_deploy_decorator.AppDeployDecorator"),
343
+ ("s3_proxy", ".s3_proxy.s3_proxy_decorator.S3ProxyDecorator"),
344
+ ("nebius_s3_proxy", ".s3_proxy.s3_proxy_decorator.NebiusS3ProxyDecorator"),
345
+ ("coreweave_s3_proxy", ".s3_proxy.s3_proxy_decorator.CoreWeaveS3ProxyDecorator"),
343
346
  ]
344
347
 
345
348
  TOGGLE_STEP_DECORATOR = [
@@ -180,6 +180,7 @@ class WorkerInfoDict(TypedDict):
180
180
  pending: Dict[str, List[WorkerStatus]]
181
181
  running: Dict[str, List[WorkerStatus]]
182
182
  crashlooping: Dict[str, List[WorkerStatus]]
183
+ failed: Dict[str, List[WorkerStatus]]
183
184
 
184
185
 
185
186
  class CurrentWorkerInfo(TypedDict):
@@ -199,29 +200,29 @@ class DEPLOYMENT_READY_CONDITIONS:
199
200
  This allows users or platform designers to configure the criteria for deployment readiness.
200
201
 
201
202
  Why do we need deployment readiness conditions?
202
- - Deployments might be taking place from a CI/CD esq environment, In these setups, the downstream build triggers might be depending on a specific criteria for deployment completion. Having readiness conditions allows the CI/CD systems to get a signal of when the deployment is ready.
203
+ - Deployments might be taking place from a CI/CD-esque environment, In these setups, the downstream build triggers might be depending on a specific criteria for deployment completion. Having readiness conditions allows the CI/CD systems to get a signal of when the deployment is ready.
203
204
  - Users might be calling the deployment API under different conditions:
204
205
  - Some users might want a cluster of workers ready before serving traffic while others might want just one worker ready to start serving traffic.
205
206
 
206
207
  Some readiness conditions include:
207
- 1) [at_least_one_running] Atleast min(min_replicas, 1) workers of the current deployment instance's version have started running.
208
+ 1) [at_least_one_running] At least min(min_replicas, 1) workers of the current deployment instance's version have started running.
208
209
  - Usecase: Some endpoints may be deployed ephemerally and are considered ready when at least one instance is running; additional instances are for load management.
209
- 2) [all_running] Atleast min_replicas number of workers are running for the deployment to be considered ready.
210
+ 2) [all_running] At least min_replicas number of workers are running for the deployment to be considered ready.
210
211
  - Usecase: Operators may require that all replicas are available before traffic is routed. Needed when inference endpoints maybe under some SLA or require a larger load
211
- 3) [fully_finished] Atleast min_replicas number of workers are running for the deployment and there are no pending or crashlooping workers from previous versions lying around.
212
+ 3) [fully_finished] At least min_replicas number of workers are running for the deployment and there are no pending or crashlooping workers from previous versions lying around.
212
213
  - Usecase: Ensuring endpoint is fully available and no other versions are running or endpoint has been fully scaled down.
213
214
  4) [async] The deployment will be assumed ready as soon as the server responds with a 200.
214
215
  - Usecase: Operators may only care that the URL is minted for the deployment or the deployment eventually scales down to 0.
215
216
  """
216
217
 
217
- # `ATLEAST_ONE_RUNNING` implies that atleast one worker of the current deployment instance's version has started running.
218
+ # `ATLEAST_ONE_RUNNING` implies that at least one worker of the current deployment instance's version has started running.
218
219
  ATLEAST_ONE_RUNNING = "at_least_one_running"
219
220
 
220
221
  # `ALL_RUNNING` implies that all workers of the current deployment instance's version have started running (i.e. all workers aligning to the minimum number of replicas).
221
222
  # It doesn't imply that all the workers relating to other deployments have been torn down.
222
223
  ALL_RUNNING = "all_running"
223
224
 
224
- # `FULLY_FINISHED` implies Atleast min_replicas number of workers are running for the deployment and there are no pending or crashlooping workers from previous versions lying around.
225
+ # `FULLY_FINISHED` implies at least min_replicas number of workers are running for the deployment and there are no pending or crashlooping workers from previous versions lying around.
225
226
  FULLY_FINISHED = "fully_finished"
226
227
 
227
228
  # `ASYNC` implies that the deployment will be assumed ready after the URL is minted and the worker statuses are not checked.
@@ -442,14 +443,16 @@ def _capsule_worker_semantic_status(
442
443
  xx[worker_version].append(w)
443
444
  return xx
444
445
 
446
+ # phases can be Pending, Running, Succeeded, Failed, Unknown, CrashLoopBackOff
445
447
  pending_workers = _make_version_dict(workers, "Pending")
446
448
  running_workers = _make_version_dict(workers, "Running")
447
449
  crashlooping_workers = _make_version_dict(workers, "CrashLoopBackOff")
450
+ failed_workers = _make_version_dict(workers, "Failed")
448
451
 
449
452
  # current_status (formulated basis):
450
- # - atleast one pods are pending for `_end_state_capsule_version`
451
- # - atleast one pod is in Running state for `_end_state_capsule_version` (maybe terminal) [Might require heath-check thing here]
452
- # - alteast one pod is crashlooping for `_end_state_capsule_version` (maybe terminal)
453
+ # - at least one pods are pending for `_end_state_capsule_version`
454
+ # - at least one pod is in Running state for `_end_state_capsule_version` (maybe terminal) [Might require health-check thing here]
455
+ # - at least one pod is crashlooping for `_end_state_capsule_version` (maybe terminal)
453
456
  # - all pods are running for `_end_state_capsule_version` that match the minimum number of replicas
454
457
  # - all pods are running for `_end_state_capsule_version` that match the maximum number of replicas and no other pods of older versions are running
455
458
  # - no pods relating to `_end_state_capsule_version` are pending/running/crashlooping
@@ -464,7 +467,8 @@ def _capsule_worker_semantic_status(
464
467
  "at_least_one_running": (
465
468
  count_for_version(running_workers) >= min(min_replicas, 1)
466
469
  ),
467
- "at_least_one_crashlooping": count_for_version(crashlooping_workers) > 0,
470
+ "at_least_one_crashlooping": count_for_version(crashlooping_workers) > 0
471
+ or count_for_version(failed_workers) > 0,
468
472
  "none_present": (
469
473
  count_for_version(running_workers) == 0
470
474
  and count_for_version(pending_workers) == 0
@@ -484,6 +488,7 @@ def _capsule_worker_semantic_status(
484
488
  "pending": count_for_version(pending_workers),
485
489
  "running": count_for_version(running_workers),
486
490
  "crashlooping": count_for_version(crashlooping_workers),
491
+ "failed": count_for_version(failed_workers),
487
492
  },
488
493
  }
489
494
 
@@ -491,6 +496,7 @@ def _capsule_worker_semantic_status(
491
496
  "pending": pending_workers,
492
497
  "running": running_workers,
493
498
  "crashlooping": crashlooping_workers,
499
+ "failed": failed_workers,
494
500
  }
495
501
 
496
502
  return {
@@ -239,7 +239,7 @@ def _bake_image(app_config: AppConfig, cache_dir: str, logger):
239
239
  baking_status.resolved_image,
240
240
  )
241
241
  app_config.set_state("python_path", baking_status.python_path)
242
- logger("🐳 Using The Docker Image : %s" % app_config.get_state("image"))
242
+ logger("🐳 Using the docker image : %s" % app_config.get_state("image"))
243
243
 
244
244
 
245
245
  def print_table(data, headers):
@@ -374,7 +374,7 @@ def _package_necessary_things(app_config: AppConfig, logger):
374
374
  # or is it relative to where the caller command is sitting. Ideally it should work
375
375
  # like Kustomizations where its relative to where the yaml file sits for simplicity
376
376
  # of understanding relationships between config files. Ideally users can pass the src_path
377
- # from the command line and that will aliviate any need to package any other directories for
377
+ # from the command line and that will alleviate any need to package any other directories for
378
378
  #
379
379
 
380
380
  package_dir = app_config.get_state("packaging_directory")
@@ -395,7 +395,7 @@ def _package_necessary_things(app_config: AppConfig, logger):
395
395
  )
396
396
  app_config.set_state("code_package_url", package_url)
397
397
  app_config.set_state("code_package_key", package_key)
398
- logger("💾 Code Package Saved to : %s" % app_config.get_state("code_package_url"))
398
+ logger("💾 Code package saved to : %s" % app_config.get_state("code_package_url"))
399
399
 
400
400
 
401
401
  @app.command(help="Deploy an app to the Outerbounds Platform.")
@@ -465,7 +465,7 @@ def deploy(
465
465
 
466
466
  app_config.set_state("packaging_directory", packaging_directory)
467
467
  logger(
468
- "📦 Packaging Directory : %s" % app_config.get_state("packaging_directory"),
468
+ "📦 Packaging directory : %s" % app_config.get_state("packaging_directory"),
469
469
  )
470
470
 
471
471
  if app_config.get("no_deps", False):
@@ -611,7 +611,7 @@ def deploy(
611
611
  )
612
612
  raise AppConfigError(message)
613
613
  capsule_logger(
614
- f"🚀 {'' if not force_upgrade else 'Force'} Upgrading {capsule.capsule_type.lower()} `{capsule.name}`....",
614
+ f"🚀 {'Upgrading' if not force_upgrade else 'Force upgrading'} {capsule.capsule_type.lower()} `{capsule.name}`....",
615
615
  color=ColorTheme.INFO_COLOR,
616
616
  system_msg=True,
617
617
  )
@@ -632,7 +632,7 @@ def deploy(
632
632
  capsule_spinner.stop()
633
633
 
634
634
  logger(
635
- f"💊 {capsule.capsule_type} {app_config.config['name']} ({capsule.identifier}) deployed! {capsule.capsule_type} exposed on the URL: {capsule.url}",
635
+ f"💊 {capsule.capsule_type} {app_config.config['name']} ({capsule.identifier}) deployed! {capsule.capsule_type} available on the URL: {capsule.url}",
636
636
  color=ColorTheme.INFO_COLOR,
637
637
  system_msg=True,
638
638
  )
@@ -761,7 +761,7 @@ def list(ctx, project, branch, name, tags, format, auth_type):
761
761
  def delete(ctx, name, cap_id, project, branch, tags, auto_approve):
762
762
 
763
763
  """Delete an app/apps from the Outerbounds Platform."""
764
- # Atleast one of the args need to be provided
764
+ # At least one of the args need to be provided
765
765
  if not any(
766
766
  [
767
767
  name is not None,
@@ -772,7 +772,7 @@ def delete(ctx, name, cap_id, project, branch, tags, auto_approve):
772
772
  ]
773
773
  ):
774
774
  raise AppConfigError(
775
- "Atleast one of the options need to be provided. You can use --name, --id, --project, --branch, --tag"
775
+ "At least one of the options need to be provided. You can use --name, --id, --project, --branch, --tag"
776
776
  )
777
777
 
778
778
  capsule_api = CapsuleApi(ctx.obj.api_url, ctx.obj.perimeter)
@@ -44,24 +44,24 @@ class CapsuleStateMachine:
44
44
  - Happy Path:
45
45
  - First time Create :
46
46
  - wait for status.updateInProgress to be set to False
47
- - (interleved) Poll the worker endpoints to check their status
47
+ - (interleaved) Poll the worker endpoints to check their status
48
48
  - showcase how many workers are coming up if things are on the cli side.
49
49
  - If the user has set some flag like `--dont-wait-to-fully-finish` then we check the `status.currentlyServedVersion` to see if even one replica is ready to
50
50
  serve traffic.
51
51
  - once the status.updateInProgress is set to False, it means that the replicas are ready
52
52
  - Upgrade:
53
53
  - wait for status.updateInProgress to be set to False
54
- - (interleved) Poll the worker endpoints to check their status and signal the user the number replicas coming up
54
+ - (interleaved) Poll the worker endpoints to check their status and signal the user the number replicas coming up
55
55
  - If the user has set some flag like `--dont-wait-to-fully-finish` then we check the `status.currentlyServedVersion` to see if even one replica is ready to
56
56
  serve traffic.
57
57
  - Unhappy Path:
58
58
  - First time Create :
59
59
  - wait for status.updateInProgress to be set to False,
60
- - (interleved) Poll the workers to check their status.
60
+ - (interleaved) Poll the workers to check their status.
61
61
  - If the worker pertaining the current deployment instance version is crashlooping then crash the deployment process with the error messages and logs.
62
62
  - Upgrade:
63
63
  - wait for status.updateInProgress to be set to False,
64
- - (interleved) Poll the workers to check their status.
64
+ - (interleaved) Poll the workers to check their status.
65
65
  - If the worker pertaining the current deployment instance version is crashlooping then crash the deployment process with the error messages and logs.
66
66
 
67
67
  """
@@ -210,9 +210,9 @@ class CapsuleInput:
210
210
  def construct_exec_command(cls, commands: List[str]):
211
211
  commands = ["set -eEuo pipefail"] + commands
212
212
  command_string = "\n".join(commands)
213
- # First constuct a base64 encoded string of the quoted command
213
+ # First construct a base64 encoded string of the quoted command
214
214
  # One of the reasons we don't directly pass the command string to the backend with a `\n` join
215
- # is because the backend controller doesnt play nice when the command can be a multi-line string.
215
+ # is because the backend controller doesn't play nice when the command can be a multi-line string.
216
216
  # So we encode it to a base64 string and then decode it back to a command string at runtime to provide to
217
217
  # `bash -c`. The ideal thing to have done is to run "bash -c {shlex.quote(command_string)}" and call it a day
218
218
  # but the backend controller yields the following error:
@@ -255,6 +255,12 @@ class CapsuleInput:
255
255
  replicas.get("min"),
256
256
  replicas.get("max"),
257
257
  )
258
+ rpm = replicas.get("scaling_policy", {}).get("rpm", None)
259
+ autoscaling_config = {}
260
+ if rpm:
261
+ autoscaling_config = {
262
+ "requestRateBasedAutoscalingConfig": {"targetRequestsPerMinute": rpm}
263
+ }
258
264
  if fixed is not None:
259
265
  _min, _max = fixed, fixed
260
266
  gpu_resource = app_config.get_state("resources").get("gpu")
@@ -296,6 +302,7 @@ class CapsuleInput:
296
302
  "autoscalingConfig": {
297
303
  "minReplicas": _min,
298
304
  "maxReplicas": _max,
305
+ **autoscaling_config,
299
306
  },
300
307
  **_scheduling_config,
301
308
  "containerStartupConfig": {
@@ -682,7 +689,7 @@ class CapsuleDeployer:
682
689
  """
683
690
  - `capsule_response.version` contains the version of the object present in the database
684
691
  - `current_deployment_instance_version` contains the version of the object that was deployed by this instance of the deployer.
685
- In the situtation that the versions of the objects become a mismatch then it means that current deployment process is not giving the user the
692
+ In the situation that the versions of the objects become a mismatch then it means that current deployment process is not giving the user the
686
693
  output that they desire.
687
694
  """
688
695
  if capsule_response.get("version", None) != current_deployment_instance_version:
@@ -713,7 +720,7 @@ class CapsuleDeployer:
713
720
  workers_status: List[WorkerStatus],
714
721
  ):
715
722
  for worker in workers_status:
716
- if worker["phase"] == "CrashLoopBackOff":
723
+ if worker["phase"] == "CrashLoopBackOff" or worker["phase"] == "Failed":
717
724
  return worker["workerId"]
718
725
  return None
719
726
 
@@ -783,24 +790,22 @@ class CapsuleDeployer:
783
790
  )
784
791
  if capsule_ready or failure_condition_satisfied:
785
792
  logger(
786
- "💊 %s deployment status: %s | worker states: [success :%s | failure :%s ] "
793
+ "💊 %s deployment status: %s "
787
794
  % (
788
795
  self.capsule_type.title(),
789
796
  "in progress"
790
797
  if state_machine.update_in_progress
791
798
  else "completed",
792
- capsule_ready,
793
- failure_condition_satisfied,
794
799
  )
795
800
  )
796
801
  _further_readiness_check_failed = False
797
802
  if further_check_worker_readiness:
798
803
  # HACK : monitor the workers for N seconds to make sure they are healthy
799
- # this is a hack. Ideally we should implment a healtcheck as a first class citizen
804
+ # this is a hack. Ideally we should implement a healthcheck as a first class citizen
800
805
  # but it will take some time to do that so in the meanwhile a timeout set on the cli
801
806
  # side will be really helpful.
802
807
  logger(
803
- "💊 running last minute readiness check for %s..."
808
+ "💊 Running last minute readiness check for %s..."
804
809
  % self.identifier
805
810
  )
806
811
  _further_readiness_check_failed = self._monitor_worker_readiness(
@@ -853,7 +858,7 @@ class CapsuleDeployer:
853
858
  workers_state_machine.save_debug_info(self._debug_dir)
854
859
  if i % 3 == 0: # Every 3 seconds report the status
855
860
  logger(
856
- f"[debug] 💊 {self.capsule_type} {self.identifier} deployment status: {state_machine.current_status} | worker states: {workers_state_machine.current_status}"
861
+ f"[debug] 💊 {self.capsule_type} {self.identifier} deployment status: {state_machine.current_status} | worker states: {workers_state_machine.current_status} | capsule_ready : {capsule_ready} | further_check_worker_readiness {further_check_worker_readiness}"
857
862
  )
858
863
 
859
864
  # We will only check ready_to_serve_traffic under the following conditions:
@@ -51,6 +51,11 @@ class ReplicaConfigDict(TypedDict, total=False):
51
51
  fixed: Optional[int]
52
52
  min: Optional[int]
53
53
  max: Optional[int]
54
+ scaling_policy: Optional["ScalingPolicyConfigDict"]
55
+
56
+
57
+ class ScalingPolicyConfigDict(TypedDict, total=False):
58
+ rpm: Optional[int]
54
59
 
55
60
 
56
61
  class DependencyConfigDict(TypedDict, total=False):
@@ -301,6 +301,29 @@ class AuthConfig(metaclass=ConfigMeta):
301
301
  )
302
302
 
303
303
 
304
+ class ScalingPolicyConfig(metaclass=ConfigMeta):
305
+ """
306
+ Policies for autoscaling replicas. Available policies:
307
+ - Request based Autoscaling (rpm)
308
+ """
309
+
310
+ # TODO Change the defaulting if we have more autoscaling policies.
311
+ rpm = ConfigField(
312
+ field_type=int,
313
+ # TODO: Add a little more to the docstring where we explain the behavior.
314
+ cli_meta=CLIOption(
315
+ name="scaling_rpm",
316
+ cli_option_str="--scaling-rpm",
317
+ help=(
318
+ "Scale up replicas when the requests per minute crosses this threshold. "
319
+ "If nothing is provided and the replicas.max and replicas.min is set then "
320
+ "the default rpm would be 60."
321
+ ),
322
+ ),
323
+ default=60,
324
+ )
325
+
326
+
304
327
  class ReplicaConfig(metaclass=ConfigMeta):
305
328
  """Replica configuration."""
306
329
 
@@ -333,6 +356,16 @@ class ReplicaConfig(metaclass=ConfigMeta):
333
356
  example=10,
334
357
  )
335
358
 
359
+ scaling_policy = ConfigField(
360
+ cli_meta=None,
361
+ field_type=ScalingPolicyConfig,
362
+ help=(
363
+ "Scaling policy defines the the metric based on which the replicas will horizontally scale. "
364
+ "If min and max replicas are set and are not the same, then a scaling policy will be applied. "
365
+ "Default scaling policies can be 60 rpm (ie 1 rps). "
366
+ ),
367
+ )
368
+
336
369
  @staticmethod
337
370
  def defaults(replica_config: "ReplicaConfig"):
338
371
  if all(
@@ -346,6 +379,7 @@ class ReplicaConfig(metaclass=ConfigMeta):
346
379
  replica_config.fixed = 1
347
380
  elif replica_config.min is not None and replica_config.max is None:
348
381
  replica_config.max = replica_config.min
382
+
349
383
  return
350
384
 
351
385
  @staticmethod
@@ -161,6 +161,21 @@ properties:
161
161
  type: integer
162
162
  example: 10
163
163
  mutation_behavior: union
164
+ scaling_policy:
165
+ title: ScalingPolicyConfig
166
+ description: |-
167
+ Policies for autoscaling replicas. Available policies:
168
+ - Request based Autoscaling (rpm)
169
+ type: object
170
+ required: []
171
+ properties:
172
+ rpm:
173
+ description: |-
174
+ Scale up replicas when the requests per minute crosses this threshold. If nothing is provided and the replicas.max and replicas.min is set then the default rpm would be 60.
175
+ type: integer
176
+ default: 60
177
+ mutation_behavior: union
178
+ mutation_behavior: union
164
179
  mutation_behavior: union
165
180
  dependencies:
166
181
  title: DependencyConfig
@@ -95,7 +95,7 @@ def bake_deployment_image(
95
95
  pinned_conda_libs = get_pinned_conda_libs(python_version, DEFAULT_DATASTORE)
96
96
  pypi_packages.update(pinned_conda_libs)
97
97
  _reference = app_config.get("name", "default")
98
- # `image` cannot be None. If by change it is none, FB will fart.
98
+ # `image` cannot be None. If by chance it is none, FB will fart.
99
99
  fb_response = bake_image(
100
100
  cache_file_path=cache_file_path,
101
101
  pypi_packages=pypi_packages,
@@ -151,7 +151,6 @@ class AppDeployer(TypedCoreConfig):
151
151
  final_status["id"],
152
152
  final_status["auth_type"],
153
153
  final_status["public_url"],
154
- final_status["available_replicas"],
155
154
  final_status["name"],
156
155
  final_status["deployed_version"],
157
156
  final_status["deployed_at"],
@@ -164,7 +163,6 @@ class DeployedApp:
164
163
  _id: str,
165
164
  capsule_type: str,
166
165
  public_url: str,
167
- available_replicas: int,
168
166
  name: str,
169
167
  deployed_version: str,
170
168
  deployed_at: str,
@@ -172,7 +170,6 @@ class DeployedApp:
172
170
  self._id = _id
173
171
  self._capsule_type = capsule_type
174
172
  self._public_url = public_url
175
- self._available_replicas = available_replicas
176
173
  self._name = name
177
174
  self._deployed_version = deployed_version
178
175
  self._deployed_at = deployed_at
@@ -208,6 +205,10 @@ class DeployedApp:
208
205
  capsule = capsule_api.get(self._id)
209
206
  return capsule
210
207
 
208
+ def replicas(self):
209
+ capsule_api = self._get_capsule_api()
210
+ return capsule_api.get_workers(self._id)
211
+
211
212
  def scale_to_zero(self):
212
213
  """
213
214
  Scales the DeployedApp to 0 replicas.
@@ -243,10 +244,6 @@ class DeployedApp:
243
244
  def public_url(self) -> str:
244
245
  return self._public_url
245
246
 
246
- @property
247
- def available_replicas(self) -> int:
248
- return self._available_replicas
249
-
250
247
  @property
251
248
  def name(self) -> str:
252
249
  return self._name
@@ -260,7 +257,6 @@ class DeployedApp:
260
257
  "id": self._id,
261
258
  "auth_style": self.auth_style, # TODO : Fix naming here.
262
259
  "public_url": self._public_url,
263
- "available_replicas": self._available_replicas,
264
260
  "name": self._name,
265
261
  "deployed_version": self._deployed_version,
266
262
  "deployed_at": self._deployed_at,
@@ -272,7 +268,6 @@ class DeployedApp:
272
268
  _id=data["id"],
273
269
  capsule_type=data["capsule_type"],
274
270
  public_url=data["public_url"],
275
- available_replicas=data["available_replicas"],
276
271
  name=data["name"],
277
272
  deployed_version=data["deployed_version"],
278
273
  deployed_at=data["deployed_at"],
@@ -287,7 +282,6 @@ class DeployedApp:
287
282
  f"DeployedApp(id='{self._id}', "
288
283
  f"name='{self._name}', "
289
284
  f"public_url='{self._public_url}', "
290
- f"available_replicas={self._available_replicas}, "
291
285
  f"deployed_version='{self._deployed_version}')"
292
286
  )
293
287
 
@@ -116,7 +116,7 @@ class SecretRetriever:
116
116
 
117
117
  if not perimeter:
118
118
  raise OuterboundsSecretsException(
119
- "No perimeter set. Please make sure to run `outerbounds configure <...>` command which can be found on the Ourebounds UI or reach out to your Outerbounds support team."
119
+ "No perimeter set. Please make sure to run `outerbounds configure <...>` command which can be found on the Outerbounds UI or reach out to your Outerbounds support team."
120
120
  )
121
121
 
122
122
  if not integrations_url:
@@ -185,8 +185,8 @@ def safe_requests_wrapper(
185
185
  - How to handle retries for this case will be application specific.
186
186
  2. Errors when the API server may not be reachable (DNS resolution / network issues)
187
187
  - In this scenario, we know that something external to the API server is going wrong causing the issue.
188
- - Failing pre-maturely in the case might not be the best course of action since critical user jobs might crash on intermittent issues.
189
- - So in this case, we can just planely retry the request.
188
+ - Failing prematurely in the case might not be the best course of action since critical user jobs might crash on intermittent issues.
189
+ - So in this case, we can just plainly retry the request.
190
190
 
191
191
  This function handles the second case. It's a simple wrapper to handle the retry logic for connection errors.
192
192
  If this function is provided a `conn_error_retries` of 5, then the last retry will have waited 32 seconds.
@@ -0,0 +1,7 @@
1
+ from .s3_proxy_decorator import (
2
+ S3ProxyDecorator,
3
+ NebiusS3ProxyDecorator,
4
+ CoreWeaveS3ProxyDecorator,
5
+ )
6
+
7
+ __all__ = ["S3ProxyDecorator", "NebiusS3ProxyDecorator", "CoreWeaveS3ProxyDecorator"]
@@ -0,0 +1,8 @@
1
+ S3_PROXY_BINARY_URLS = {
2
+ "aarch64": "https://fast-s3-proxy.outerbounds.sh/linux-arm64/s3-proxy-0.1.1.gz",
3
+ "x86_64": "https://fast-s3-proxy.outerbounds.sh/linux-amd64/s3-proxy-0.1.1.gz",
4
+ }
5
+
6
+ DEFAULT_PROXY_PORT = 8081
7
+ DEFAULT_PROXY_HOST = "localhost"
8
+ S3_PROXY_WRITE_MODES = ["origin-and-cache", "origin", "cache"]
@@ -0,0 +1,13 @@
1
+ from metaflow.exception import MetaflowException
2
+
3
+
4
+ class S3ProxyException(MetaflowException):
5
+ headline = "S3 Proxy Error"
6
+
7
+
8
+ class S3ProxyConfigException(S3ProxyException):
9
+ headline = "S3 Proxy Configuration Error"
10
+
11
+
12
+ class S3ProxyApiException(S3ProxyException):
13
+ headline = "S3 Proxy API Error"
@@ -0,0 +1,93 @@
1
+ import json
2
+ import time
3
+ from typing import Dict, Optional
4
+
5
+ from .exceptions import S3ProxyConfigException, S3ProxyApiException
6
+
7
+
8
+ class S3ProxyConfigResponse:
9
+ def __init__(self, data: Dict):
10
+ self.bucket_name = data.get("bucket_name")
11
+ self.endpoint_url = data.get("endpoint_url")
12
+ self.access_key_id = data.get("access_key_id")
13
+ self.secret_access_key = data.get("secret_access_key")
14
+ self.region = data.get("region")
15
+
16
+
17
+ class S3ProxyApiClient:
18
+ def __init__(self):
19
+ self.perimeter, self.integrations_url = self._get_api_configs()
20
+
21
+ def _get_api_configs(self):
22
+ from metaflow_extensions.outerbounds.remote_config import init_config
23
+ from os import environ
24
+
25
+ conf = init_config()
26
+ perimeter = conf.get("OBP_PERIMETER") or environ.get("OBP_PERIMETER", "")
27
+ integrations_url = conf.get("OBP_INTEGRATIONS_URL") or environ.get(
28
+ "OBP_INTEGRATIONS_URL", ""
29
+ )
30
+
31
+ if not perimeter:
32
+ raise S3ProxyConfigException(
33
+ "No perimeter set. Please run `outerbounds configure` command."
34
+ )
35
+
36
+ if not integrations_url:
37
+ raise S3ProxyConfigException(
38
+ "No integrations URL set. Please contact your Outerbounds support team."
39
+ )
40
+
41
+ return perimeter, integrations_url
42
+
43
+ def fetch_s3_proxy_config(
44
+ self, integration_name: Optional[str] = None
45
+ ) -> S3ProxyConfigResponse:
46
+ url = f"{self.integrations_url}/s3proxy"
47
+
48
+ payload = {"perimeter_name": self.perimeter}
49
+ if integration_name:
50
+ payload["integration_name"] = integration_name
51
+
52
+ headers = {"Content-Type": "application/json"}
53
+
54
+ try:
55
+ from metaflow.metaflow_config import SERVICE_HEADERS
56
+
57
+ headers.update(SERVICE_HEADERS or {})
58
+ except ImportError:
59
+ pass
60
+
61
+ response = self._make_request(url, headers, payload)
62
+ return S3ProxyConfigResponse(response)
63
+
64
+ def _make_request(self, url: str, headers: Dict, payload: Dict) -> Dict:
65
+ from metaflow_extensions.outerbounds.plugins.secrets.secrets import (
66
+ _api_server_get,
67
+ )
68
+
69
+ retryable_status_codes = [409]
70
+ json_payload = json.dumps(payload)
71
+
72
+ for attempt in range(3):
73
+ response = _api_server_get(
74
+ url, data=json_payload, headers=headers, conn_error_retries=5
75
+ )
76
+
77
+ if response.status_code not in retryable_status_codes:
78
+ break
79
+
80
+ if attempt < 2:
81
+ time.sleep(0.5 * (attempt + 1))
82
+
83
+ if response.status_code != 200:
84
+ error_msg = f"API request failed with status {response.status_code}"
85
+ try:
86
+ error_data = response.json()
87
+ if "message" in error_data:
88
+ error_msg = error_data["message"]
89
+ except:
90
+ pass
91
+ raise S3ProxyApiException(error_msg)
92
+
93
+ return response.json()
@@ -0,0 +1,106 @@
1
+ import functools
2
+ from typing import Optional
3
+
4
+ from metaflow import current
5
+ from metaflow.decorators import StepDecorator
6
+
7
+ from .s3_proxy_manager import S3ProxyManager
8
+ from .exceptions import S3ProxyException
9
+ from .constants import S3_PROXY_WRITE_MODES
10
+
11
+
12
+ class S3ProxyDecorator(StepDecorator):
13
+ """
14
+ S3 Proxy decorator for routing S3 requests through a local proxy service.
15
+
16
+ Parameters
17
+ ----------
18
+ integration_name : str, optional
19
+ Name of the S3 proxy integration. If not specified, will use the only
20
+ available S3 proxy integration in the namespace (fails if multiple exist).
21
+ write_mode : str, optional
22
+ The desired behavior during write operations to target (origin) S3 bucket.
23
+ allowed options are:
24
+ "origin-and-cache" -> write to both the target S3 bucket and local object
25
+ storage
26
+ "origin" -> only write to the target S3 bucket
27
+ "cache" -> only write to the object storage service used for caching
28
+ debug : bool, optional
29
+ Enable debug logging for proxy operations.
30
+ """
31
+
32
+ name = "s3_proxy"
33
+ defaults = {
34
+ "integration_name": None,
35
+ "write_mode": None,
36
+ "debug": False,
37
+ }
38
+
39
+ def step_init(self, flow, graph, step, decos, environment, flow_datastore, logger):
40
+ write_mode = self.attributes["write_mode"]
41
+ if write_mode and write_mode not in S3_PROXY_WRITE_MODES:
42
+ raise S3ProxyException(
43
+ f"unexpected write_mode specified: {write_mode}. Allowed values are: {','.join(S3_PROXY_WRITE_MODES)}."
44
+ )
45
+
46
+ self.manager = S3ProxyManager(
47
+ integration_name=self.attributes["integration_name"],
48
+ write_mode=self.attributes["write_mode"],
49
+ debug=self.attributes["debug"],
50
+ )
51
+
52
+ current._update_env({"s3_proxy": self.manager})
53
+
54
+ def task_pre_step(
55
+ self,
56
+ step_name,
57
+ task_datastore,
58
+ metadata,
59
+ run_id,
60
+ task_id,
61
+ flow,
62
+ graph,
63
+ retry_count,
64
+ max_user_code_retries,
65
+ ubf_context,
66
+ inputs,
67
+ ):
68
+ """Setup S3 proxy before step execution"""
69
+ self.manager.setup_proxy()
70
+
71
+ def task_finished(
72
+ self, step_name, flow, graph, is_task_ok, retry_count, max_retries
73
+ ):
74
+ """Cleanup S3 proxy after step execution"""
75
+ if self.manager:
76
+ self.manager.cleanup()
77
+
78
+
79
+ class NebiusS3ProxyDecorator(S3ProxyDecorator):
80
+ """
81
+ Nebius-specific S3 Proxy decorator for routing S3 requests through a local proxy service.
82
+ It exists to make it easier for users to know that this decorator should only be used with
83
+ a Neo Cloud like Nebius.
84
+ """
85
+
86
+ name = "nebius_s3_proxy"
87
+ defaults = {
88
+ "integration_name": None,
89
+ "write_mode": None,
90
+ "debug": False,
91
+ }
92
+
93
+
94
+ class CoreWeaveS3ProxyDecorator(S3ProxyDecorator):
95
+ """
96
+ CoreWeave-specific S3 Proxy decorator for routing S3 requests through a local proxy service.
97
+ It exists to make it easier for users to know that this decorator should only be used with
98
+ a Neo Cloud like CoreWeave.
99
+ """
100
+
101
+ name = "coreweave_s3_proxy"
102
+ defaults = {
103
+ "integration_name": None,
104
+ "write_mode": None,
105
+ "debug": False,
106
+ }
@@ -0,0 +1,222 @@
1
+ import os
2
+ import json
3
+ import gzip
4
+ import time
5
+ import threading
6
+ import subprocess
7
+ from pathlib import Path
8
+ from typing import Optional
9
+
10
+ import requests
11
+
12
+ from .constants import (
13
+ S3_PROXY_BINARY_URLS,
14
+ DEFAULT_PROXY_PORT,
15
+ DEFAULT_PROXY_HOST,
16
+ )
17
+ from metaflow.metaflow_config import AWS_SECRETS_MANAGER_DEFAULT_REGION
18
+ from .s3_proxy_api import S3ProxyApiClient
19
+ from .exceptions import S3ProxyException
20
+
21
+
22
+ class S3ProxyManager:
23
+ def __init__(
24
+ self,
25
+ integration_name: Optional[str] = None,
26
+ write_mode: Optional[str] = None,
27
+ debug: bool = False,
28
+ ):
29
+ self.integration_name = integration_name
30
+ self.write_mode = write_mode
31
+ self.debug = debug
32
+ self.process = None
33
+ self.binary_path = None
34
+ self.config_path = None
35
+ self.api_client = S3ProxyApiClient()
36
+ self.proxy_config = None
37
+
38
+ def setup_proxy(self) -> bool:
39
+ try:
40
+ if self._is_running_in_kubernetes():
41
+ config_data = self.api_client.fetch_s3_proxy_config(
42
+ self.integration_name
43
+ )
44
+ self.binary_path = self._download_binary()
45
+ self.config_path = self._write_config_file(config_data)
46
+ self.process = self._start_proxy_process()
47
+ self._setup_proxy_config(config_data)
48
+ return True
49
+
50
+ print(
51
+ "[@s3_proxy] skipping s3-proxy set up because metaflow has not detected a Kubernetes environment"
52
+ )
53
+ return False
54
+ except Exception as e:
55
+ if self.debug:
56
+ print(f"[@s3_proxy] Setup failed: {e}")
57
+ self.cleanup()
58
+ raise
59
+
60
+ def _is_running_in_kubernetes(self) -> bool:
61
+ """Check if running inside a Kubernetes pod by checking for Kubernetes service account token."""
62
+ return (
63
+ os.path.exists("/var/run/secrets/kubernetes.io/serviceaccount/token")
64
+ and os.environ.get("KUBERNETES_SERVICE_HOST") is not None
65
+ )
66
+
67
+ def _download_binary(self) -> str:
68
+ binary_path = Path("/tmp/s3-proxy")
69
+ if binary_path.exists():
70
+ if self.debug:
71
+ print("[@s3_proxy] Binary already exists, skipping download")
72
+ return str(binary_path.absolute())
73
+
74
+ try:
75
+ if self.debug:
76
+ print("[@s3_proxy] Downloading binary...")
77
+
78
+ from platform import machine
79
+
80
+ arch = machine()
81
+ if arch not in S3_PROXY_BINARY_URLS:
82
+ raise S3ProxyException(
83
+ f"unsupported platform architecture: {arch}. Please reach out to your Outerbounds Support team for more help."
84
+ )
85
+
86
+ response = requests.get(S3_PROXY_BINARY_URLS[arch], stream=True, timeout=60)
87
+ response.raise_for_status()
88
+
89
+ with open(binary_path, "wb") as f:
90
+ with gzip.GzipFile(fileobj=response.raw) as gz:
91
+ f.write(gz.read())
92
+
93
+ binary_path.chmod(0o755)
94
+
95
+ if self.debug:
96
+ print("[@s3_proxy] Binary downloaded successfully")
97
+
98
+ return str(binary_path.absolute())
99
+
100
+ except Exception as e:
101
+ if self.debug:
102
+ print(f"[@s3_proxy] Binary download failed: {e}")
103
+ raise S3ProxyException(f"Failed to download S3 proxy binary: {e}")
104
+
105
+ def _write_config_file(self, config_data) -> str:
106
+ config_path = Path("/tmp/s3-proxy-config.json")
107
+
108
+ proxy_config = {
109
+ "bucketName": config_data.bucket_name,
110
+ "endpointUrl": config_data.endpoint_url,
111
+ "accessKeyId": config_data.access_key_id,
112
+ "accessKeySecret": config_data.secret_access_key,
113
+ "region": config_data.region,
114
+ }
115
+
116
+ config_path.write_text(json.dumps(proxy_config, indent=2))
117
+
118
+ if self.debug:
119
+ print(f"[@s3_proxy] Config written to {config_path}")
120
+
121
+ return str(config_path.absolute())
122
+
123
+ def _start_proxy_process(self) -> subprocess.Popen:
124
+ cmd = [self.binary_path, "--bucket-config", self.config_path, "serve"]
125
+
126
+ if self.debug:
127
+ print(f"[@s3_proxy] Starting proxy: {' '.join(cmd)}")
128
+
129
+ process = subprocess.Popen(
130
+ cmd,
131
+ stdout=subprocess.PIPE,
132
+ stderr=subprocess.STDOUT, # Redirect stderr to stdout
133
+ text=True,
134
+ start_new_session=True,
135
+ )
136
+
137
+ self._setup_log_streaming(process)
138
+
139
+ time.sleep(3)
140
+
141
+ if process.poll() is None:
142
+ if self.debug:
143
+ print(f"[@s3_proxy] Proxy started successfully (pid: {process.pid})")
144
+
145
+ return process
146
+ else:
147
+ stdout_data, stderr_data = process.communicate()
148
+ if self.debug:
149
+ print(f"[@s3_proxy] Proxy failed to start - output: {stdout_data}")
150
+ raise S3ProxyException(f"S3 proxy failed to start: {stdout_data}")
151
+
152
+ def _setup_log_streaming(self, process: subprocess.Popen):
153
+ def stream_logs():
154
+ try:
155
+ # Read stdout line by line (stderr is redirected to stdout)
156
+ while True:
157
+ line = process.stdout.readline()
158
+ if not line:
159
+ # Process has ended
160
+ break
161
+ line = line.strip()
162
+ if line and self.debug:
163
+ print(f"[@s3_proxy] {line}")
164
+
165
+ except Exception as e:
166
+ if self.debug:
167
+ print(f"[@s3_proxy] Log streaming error: {e}")
168
+
169
+ log_thread = threading.Thread(target=stream_logs, daemon=True)
170
+ log_thread.start()
171
+
172
+ def _setup_proxy_config(self, config_data):
173
+ from metaflow_extensions.outerbounds.toplevel.global_aliases_for_metaflow_package import (
174
+ set_s3_proxy_config,
175
+ )
176
+ from metaflow.metaflow_config import AWS_SECRETS_MANAGER_DEFAULT_REGION
177
+
178
+ region = os.environ.get(
179
+ "METAFLOW_AWS_SECRETS_MANAGER_DEFAULT_REGION",
180
+ AWS_SECRETS_MANAGER_DEFAULT_REGION,
181
+ )
182
+
183
+ proxy_config = {
184
+ "endpoint_url": f"http://{DEFAULT_PROXY_HOST}:{DEFAULT_PROXY_PORT}",
185
+ "region": region,
186
+ "bucket_name": config_data.bucket_name,
187
+ "active": True,
188
+ }
189
+
190
+ if self.write_mode:
191
+ proxy_config["write_mode"] = self.write_mode
192
+
193
+ set_s3_proxy_config(proxy_config)
194
+ self.proxy_config = proxy_config
195
+
196
+ if self.debug:
197
+ print("[@s3_proxy] Global S3 proxy configuration activated")
198
+
199
+ def cleanup(self):
200
+ try:
201
+ from metaflow_extensions.outerbounds.toplevel.global_aliases_for_metaflow_package import (
202
+ clear_s3_proxy_config,
203
+ )
204
+
205
+ clear_s3_proxy_config()
206
+
207
+ if self.process and self.process.poll() is None:
208
+ self.process.terminate()
209
+ self.process.wait(timeout=5)
210
+ if self.debug:
211
+ print("[@s3_proxy] Proxy process stopped")
212
+
213
+ from os import remove
214
+
215
+ remove(self.config_path)
216
+ remove(self.binary_path)
217
+
218
+ except Exception as e:
219
+ if self.debug:
220
+ print(f"[@s3_proxy] Cleanup error: {e}")
221
+ finally:
222
+ self.proxy_config = None
@@ -5,6 +5,27 @@
5
5
  __version__ = "v1"
6
6
  __mf_extensions__ = "ob"
7
7
 
8
+ from metaflow_extensions.outerbounds.toplevel.s3_proxy import (
9
+ get_aws_client_with_s3_proxy,
10
+ get_S3_with_s3_proxy,
11
+ )
12
+
13
+ _S3_PROXY_CONFIG = None
14
+
15
+
16
+ def set_s3_proxy_config(config):
17
+ global _S3_PROXY_CONFIG
18
+ _S3_PROXY_CONFIG = config
19
+
20
+
21
+ def clear_s3_proxy_config():
22
+ global _S3_PROXY_CONFIG
23
+ _S3_PROXY_CONFIG = None
24
+
25
+
26
+ def get_s3_proxy_config():
27
+ return _S3_PROXY_CONFIG
28
+
8
29
 
9
30
  # Must match the signature of metaflow.plugins.aws.aws_client.get_aws_client
10
31
  # This function is called by the "userland" code inside tasks. Metaflow internals
@@ -34,7 +55,12 @@ def get_aws_client(
34
55
  if decorator_role_arn:
35
56
  role_arn = decorator_role_arn
36
57
 
37
- return metaflow.plugins.aws.aws_client.get_aws_client(
58
+ if module == "s3" and _S3_PROXY_CONFIG is not None:
59
+ return get_aws_client_with_s3_proxy(
60
+ module, with_error, role_arn, session_vars, client_params, _S3_PROXY_CONFIG
61
+ )
62
+
63
+ client = metaflow.plugins.aws.aws_client.get_aws_client(
38
64
  module,
39
65
  with_error=with_error,
40
66
  role_arn=role_arn or USE_CSPR_ROLE_ARN_IF_SET,
@@ -42,6 +68,8 @@ def get_aws_client(
42
68
  client_params=client_params,
43
69
  )
44
70
 
71
+ return client
72
+
45
73
 
46
74
  # This should match the signature of metaflow.plugins.datatools.s3.S3.
47
75
  #
@@ -68,6 +96,10 @@ def S3(*args, **kwargs):
68
96
  else:
69
97
  kwargs["role"] = USE_CSPR_ROLE_ARN_IF_SET
70
98
 
99
+ # Check if S3 proxy is active using module variable (like CSPR)
100
+ if _S3_PROXY_CONFIG is not None:
101
+ return get_S3_with_s3_proxy(_S3_PROXY_CONFIG, *args, **kwargs)
102
+
71
103
  return metaflow.plugins.datatools.s3.S3(*args, **kwargs)
72
104
 
73
105
 
@@ -0,0 +1,88 @@
1
+ from metaflow_extensions.outerbounds.plugins import USE_CSPR_ROLE_ARN_IF_SET
2
+ from metaflow.metaflow_config import AWS_SECRETS_MANAGER_DEFAULT_REGION
3
+ from metaflow_extensions.outerbounds.plugins.s3_proxy.constants import (
4
+ DEFAULT_PROXY_HOST,
5
+ DEFAULT_PROXY_PORT,
6
+ )
7
+
8
+
9
+ def get_aws_client_with_s3_proxy(
10
+ module,
11
+ with_error=False,
12
+ role_arn=None,
13
+ session_vars=None,
14
+ client_params=None,
15
+ s3_config=None,
16
+ ):
17
+ if not client_params:
18
+ client_params = {}
19
+
20
+ client_params["region_name"] = client_params.get(
21
+ "region_name", s3_config.get("region")
22
+ )
23
+ client_params["endpoint_url"] = s3_config.get(
24
+ "endpoint_url", f"http://{DEFAULT_PROXY_HOST}:{DEFAULT_PROXY_PORT}"
25
+ )
26
+
27
+ import metaflow.plugins.aws.aws_client
28
+
29
+ client = metaflow.plugins.aws.aws_client.get_aws_client(
30
+ module,
31
+ with_error=with_error,
32
+ role_arn=role_arn or USE_CSPR_ROLE_ARN_IF_SET,
33
+ session_vars=session_vars,
34
+ client_params=client_params,
35
+ )
36
+
37
+ def override_s3_proxy_host_header(request, **kwargs):
38
+ region = kwargs["region_name"]
39
+ request.headers["Host"] = f"s3.{region}.amazonaws.com"
40
+ if "x-ob-write-to" not in request.headers and "write_mode" in s3_config:
41
+ request.headers["x-ob-write-to"] = s3_config.get("write_mode")
42
+
43
+ client.meta.events.register("before-sign", override_s3_proxy_host_header)
44
+
45
+ return client
46
+
47
+
48
+ def get_S3_with_s3_proxy(s3_config, *args, **kwargs):
49
+ if "region_name" not in kwargs:
50
+ kwargs["region_name"] = s3_config.get(
51
+ "region", AWS_SECRETS_MANAGER_DEFAULT_REGION
52
+ )
53
+
54
+ kwargs["endpoint_url"] = s3_config.get(
55
+ "endpoint_url", f"http://{DEFAULT_PROXY_HOST}:{DEFAULT_PROXY_PORT}"
56
+ )
57
+
58
+ import metaflow.plugins.datatools.s3
59
+
60
+ mf_s3 = metaflow.plugins.datatools.s3.S3(*args, **kwargs)
61
+
62
+ # Override reset_client to ensure proxy endpoint is preserved
63
+ original_reset_client = mf_s3._s3_client.reset_client
64
+
65
+ def proxy_reset_client():
66
+ original_reset_client()
67
+ import boto3
68
+
69
+ proxy_client = boto3.client(
70
+ "s3",
71
+ region_name=kwargs.get("region_name", s3_config.get("region")),
72
+ endpoint_url=s3_config.get("endpoint_url"),
73
+ )
74
+ mf_s3._s3_client._s3_client = proxy_client
75
+
76
+ mf_s3._s3_client.reset_client = proxy_reset_client
77
+ mf_s3._s3_client.reset_client()
78
+
79
+ def override_s3_proxy_host_header(request, **kwargs):
80
+ region = kwargs["region_name"]
81
+ request.headers["Host"] = f"s3.{region}.amazonaws.com"
82
+ if "x-ob-write-to" not in request.headers and "write_mode" in s3_config:
83
+ request.headers["x-ob-write-to"] = s3_config.get("write_mode")
84
+
85
+ mf_s3._s3_client._s3_client.meta.events.register(
86
+ "before-sign", override_s3_proxy_host_header
87
+ )
88
+ return mf_s3
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ob-metaflow-extensions
3
- Version: 1.3.0
3
+ Version: 1.3.2
4
4
  Summary: Outerbounds Platform Extensions for Metaflow
5
5
  Author: Outerbounds, Inc.
6
6
  License: Commercial
@@ -1,7 +1,7 @@
1
1
  metaflow_extensions/outerbounds/__init__.py,sha256=Gb8u06s9ClQsA_vzxmkCzuMnigPy7kKcDnLfb7eB-64,514
2
2
  metaflow_extensions/outerbounds/remote_config.py,sha256=pEFJuKDYs98eoB_-ryPjVi9b_c4gpHMdBHE14ltoxIU,4672
3
3
  metaflow_extensions/outerbounds/config/__init__.py,sha256=JsQGRuGFz28fQWjUvxUgR8EKBLGRdLUIk_buPLJplJY,1225
4
- metaflow_extensions/outerbounds/plugins/__init__.py,sha256=c4zZnULrBOU2bmleU_D7Xr5H3kTuTgDhtZIKUAJiNqw,13795
4
+ metaflow_extensions/outerbounds/plugins/__init__.py,sha256=ISFPm4cn8IvfZnM6Q4LqDalycr02v6gMsCPo9mRNSwk,14028
5
5
  metaflow_extensions/outerbounds/plugins/auth_server.py,sha256=_Q9_2EL0Xy77bCRphkwT1aSu8gQXRDOH-Z-RxTUO8N4,2202
6
6
  metaflow_extensions/outerbounds/plugins/perimeters.py,sha256=QXh3SFP7GQbS-RAIxUOPbhPzQ7KDFVxZkTdKqFKgXjI,2697
7
7
  metaflow_extensions/outerbounds/plugins/apps/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -12,18 +12,18 @@ metaflow_extensions/outerbounds/plugins/apps/consts.py,sha256=iHsyqbUg9k-rgswCs1
12
12
  metaflow_extensions/outerbounds/plugins/apps/deploy_decorator.py,sha256=VkmiMdNYHhNdt-Qm9AVv7aE2LWFsIFEc16YcOYjwF6Q,8568
13
13
  metaflow_extensions/outerbounds/plugins/apps/supervisord_utils.py,sha256=GQoN2gyPClcpR9cLldJmbCfqXnoAHxp8xUnY7vzaYtY,9026
14
14
  metaflow_extensions/outerbounds/plugins/apps/core/__init__.py,sha256=c6uCgKlgEkTmM9BVdAO-m3vZvUpK2KW_AZZ2236now4,237
15
- metaflow_extensions/outerbounds/plugins/apps/core/_state_machine.py,sha256=8z4MnPeat3Vm8ekLIMJj8vVsvUAQ__i8daf-9UItUIQ,19926
16
- metaflow_extensions/outerbounds/plugins/apps/core/app_cli.py,sha256=9YyvOQzPNlpxA2K9AZ4jYpfDWpLSp66u_NotGGE5DHg,42155
15
+ metaflow_extensions/outerbounds/plugins/apps/core/_state_machine.py,sha256=al907t2C86BPf4V1V03PLTJRJMOc8gdl1CxLLbklnDU,20281
16
+ metaflow_extensions/outerbounds/plugins/apps/core/app_cli.py,sha256=V0Ki_VwjVIyIa2sgXPC7miOPLYWLrsHvzMpTfQypU2U,42169
17
17
  metaflow_extensions/outerbounds/plugins/apps/core/app_config.py,sha256=PHt-HdNfTHIuhY-eB5vkRMp1RKQNWJ4DKdgZWyYgUuc,4167
18
18
  metaflow_extensions/outerbounds/plugins/apps/core/artifacts.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
19
- metaflow_extensions/outerbounds/plugins/apps/core/capsule.py,sha256=9_LrpZcDg3w0FZvs9h_6Mmiy5YTYLCLnEu8wnNau24E,34445
19
+ metaflow_extensions/outerbounds/plugins/apps/core/capsule.py,sha256=VpCmq8R13GNex6aTJnOCswkLnc8acgsQQ9Da6KBh2sQ,34732
20
20
  metaflow_extensions/outerbounds/plugins/apps/core/click_importer.py,sha256=kgoPQmK_-8PSSTc3QMSaynCLQ5VWTkKFOC69FPURyXA,998
21
- metaflow_extensions/outerbounds/plugins/apps/core/config_schema.yaml,sha256=iTThO2vNQrFWe9nYfjiOcMf6FOQ6vU_1ZhXhUAr0L24,8142
22
- metaflow_extensions/outerbounds/plugins/apps/core/dependencies.py,sha256=HDPj7rDARcsKeex5GwH0IP8rOXMH6YdOufgXDknP1S8,4006
23
- metaflow_extensions/outerbounds/plugins/apps/core/deployer.py,sha256=VkYe8mK_VOr-bAiR2RohhKeLf8Z3gHZw7RoRBSCu2FA,9765
21
+ metaflow_extensions/outerbounds/plugins/apps/core/config_schema.yaml,sha256=LfA72d_bqsAuRzFZ9q-DfbiUy1mLimuFQfGwIEhoKNo,8745
22
+ metaflow_extensions/outerbounds/plugins/apps/core/dependencies.py,sha256=JlWT9f27yzZeJPlqTQk134WDfQgOdyxC5iaw3pLlhqY,4006
23
+ metaflow_extensions/outerbounds/plugins/apps/core/deployer.py,sha256=dNKlDu6n8SufEd5NKmsErl1RYhQXuEe_DgtA0mk7awg,9472
24
24
  metaflow_extensions/outerbounds/plugins/apps/core/perimeters.py,sha256=jeFGAUnFQkBFiOMp_Ls7Ofb80Qogh509suam5sMucYU,3030
25
- metaflow_extensions/outerbounds/plugins/apps/core/secrets.py,sha256=aWzcAayQEJghQgFP_qp9w6jyvan_hoL4_ceqZ0ZjLd4,6126
26
- metaflow_extensions/outerbounds/plugins/apps/core/utils.py,sha256=RLO6p25Fzq4olLFtQmfSl9LT0NPDfUosxPrsjO9sczo,7897
25
+ metaflow_extensions/outerbounds/plugins/apps/core/secrets.py,sha256=sgDiAmpSC8Y5xjlaOEp79F6m0S3x4RONf_vJ5PUAfu8,6127
26
+ metaflow_extensions/outerbounds/plugins/apps/core/utils.py,sha256=2M2zU8DhbAlJee8P0xKXINAku81PcUylS3sVCSb0TUs,7896
27
27
  metaflow_extensions/outerbounds/plugins/apps/core/validations.py,sha256=Inr9AJDe-L3PMMMxcJPH1zulh9_SynqITb2BzGseLh4,471
28
28
  metaflow_extensions/outerbounds/plugins/apps/core/_vendor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
29
29
  metaflow_extensions/outerbounds/plugins/apps/core/_vendor/spinner/__init__.py,sha256=gTm0NdQGTRxmghye1CYkhRtodji0MezsqAWs9OrLLRc,102
@@ -35,9 +35,9 @@ metaflow_extensions/outerbounds/plugins/apps/core/config/__init__.py,sha256=ZgC9
35
35
  metaflow_extensions/outerbounds/plugins/apps/core/config/cli_generator.py,sha256=0R0-wy7RxAMR9doVRvuluRYxAYgyjZXlTIkOeYGyz7M,5350
36
36
  metaflow_extensions/outerbounds/plugins/apps/core/config/config_utils.py,sha256=bozzUR8rbfOnb5M532RZxB5QNvVgEC1gnVjfCvQ82Yk,34053
37
37
  metaflow_extensions/outerbounds/plugins/apps/core/config/schema_export.py,sha256=tigPtb0we-urwbmctG1GbaQ9NKRKZn4KBbJKmaEntCg,9501
38
- metaflow_extensions/outerbounds/plugins/apps/core/config/typed_configs.py,sha256=bAC2lV1xWtcw0r2LPlqDrggeXPLOyrtZha2KDpm_Vx0,4454
38
+ metaflow_extensions/outerbounds/plugins/apps/core/config/typed_configs.py,sha256=euoS1Ap4yvHC20Aaj5YQWMgxixkxujVeiJ7C4DcAFhQ,4590
39
39
  metaflow_extensions/outerbounds/plugins/apps/core/config/typed_init_generator.py,sha256=KiJ1eiwtBR5eWdBzWqvO6KlqJ2qzjJvl3w4c1uJ3g0Y,13419
40
- metaflow_extensions/outerbounds/plugins/apps/core/config/unified_config.py,sha256=f_Qxy-pJoyGq_tUYr_gHmS7INEivLqv4QbOJljIwECA,35837
40
+ metaflow_extensions/outerbounds/plugins/apps/core/config/unified_config.py,sha256=bO-g_6mv7xciVcDf4Jn-qioPUUvg9Y3fMM5fcraN2Sk,37018
41
41
  metaflow_extensions/outerbounds/plugins/apps/core/experimental/__init__.py,sha256=rd4qGTkHndKYfJmoAKZWiY0KK4j5BK6RBrtle-it1Mg,2746
42
42
  metaflow_extensions/outerbounds/plugins/aws/__init__.py,sha256=VBGdjNKeFLXGZuqh4jVk8cFtO1AWof73a6k_cnbAOYA,145
43
43
  metaflow_extensions/outerbounds/plugins/aws/assume_role.py,sha256=mBewNlnSYsR2rFXFkX-DUH6ku01h2yOcMcLHoCL7eyI,161
@@ -86,6 +86,12 @@ metaflow_extensions/outerbounds/plugins/ollama/status_card.py,sha256=F5e4McDl28l
86
86
  metaflow_extensions/outerbounds/plugins/profilers/deco_injector.py,sha256=oI_C3c64XBm7n88FILqHwn-Nnc5DeT_68I67lM9rXaI,2434
87
87
  metaflow_extensions/outerbounds/plugins/profilers/gpu_profile_decorator.py,sha256=gDHQ2sMIp4NuZSzUspbSd8RGdFAoO5mgZAyFcZ2a51Y,2619
88
88
  metaflow_extensions/outerbounds/plugins/profilers/simple_card_decorator.py,sha256=4W9tLGCmkFx-4XYLa1xF6qMiaWOBYYFx_RclZDKej30,3259
89
+ metaflow_extensions/outerbounds/plugins/s3_proxy/__init__.py,sha256=9Kw86B331pQJAzkfBMPIDoPrJsW0LVRHXBYikbcc2xk,204
90
+ metaflow_extensions/outerbounds/plugins/s3_proxy/constants.py,sha256=Jjsd3cuo3IMi8rcKsUJx2PK188hMhFNyPTNKCFKfAQI,319
91
+ metaflow_extensions/outerbounds/plugins/s3_proxy/exceptions.py,sha256=IkPqDvSeYQukNeu0aIVCmfQWTvUHsTs-qv7nvry2KjM,305
92
+ metaflow_extensions/outerbounds/plugins/s3_proxy/s3_proxy_api.py,sha256=WjpprW0tCICLOihFywEtgJbCnx-OFmwuT_hR27ACl2A,3007
93
+ metaflow_extensions/outerbounds/plugins/s3_proxy/s3_proxy_decorator.py,sha256=yqQryVjaUQ6Aq_SMI8IRHXwzPokkznHncLDpLSEcQeM,3285
94
+ metaflow_extensions/outerbounds/plugins/s3_proxy/s3_proxy_manager.py,sha256=qmSypJXY-l7P2sI4mO6y-Rut5vGL2m1TjvGIXHUi6vs,7379
89
95
  metaflow_extensions/outerbounds/plugins/secrets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
90
96
  metaflow_extensions/outerbounds/plugins/secrets/secrets.py,sha256=3s98hO_twKkM22tKyDdcUjGQNfYpSXW_jLKISV9ju_U,8433
91
97
  metaflow_extensions/outerbounds/plugins/snowflake/__init__.py,sha256=RG4ixt3jwqcK1_tt0QxLcUbNmf7wWAMnZhBx-ZMGgLk,114
@@ -108,8 +114,9 @@ metaflow_extensions/outerbounds/plugins/vllm/vllm_manager.py,sha256=sp_TX2SrImJG
108
114
  metaflow_extensions/outerbounds/profilers/__init__.py,sha256=wa_jhnCBr82TBxoS0e8b6_6sLyZX0fdHicuGJZNTqKw,29
109
115
  metaflow_extensions/outerbounds/profilers/gpu.py,sha256=3Er8uKQzfm_082uadg4yn_D4Y-iSCgzUfFmguYxZsz4,27485
110
116
  metaflow_extensions/outerbounds/toplevel/__init__.py,sha256=qWUJSv_r5hXJ7jV_On4nEasKIfUCm6_UjkjXWA_A1Ts,90
111
- metaflow_extensions/outerbounds/toplevel/global_aliases_for_metaflow_package.py,sha256=xVCIvKeEzMwu2vfsWYqnq4aetMGmPBjpzOXrZfBr5iI,3036
117
+ metaflow_extensions/outerbounds/toplevel/global_aliases_for_metaflow_package.py,sha256=StTRMBHjuxfxe-wQs8ikoAZc4xnhlceY0R4avaJ1Ps8,3823
112
118
  metaflow_extensions/outerbounds/toplevel/ob_internal.py,sha256=DXCaAtLzlE-bFIiVWEv-iV2JKIWsoSGaUeH4jIQZ9gs,193
119
+ metaflow_extensions/outerbounds/toplevel/s3_proxy.py,sha256=zdqG7Z12cGuoYYCi2P4kqC3WsgL3xfdJGIb7ejecHH4,2862
113
120
  metaflow_extensions/outerbounds/toplevel/plugins/azure/__init__.py,sha256=WUuhz2YQfI4fz7nIcipwwWq781eaoHEk7n4GAn1npDg,63
114
121
  metaflow_extensions/outerbounds/toplevel/plugins/gcp/__init__.py,sha256=BbZiaH3uILlEZ6ntBLKeNyqn3If8nIXZFq_Apd7Dhco,70
115
122
  metaflow_extensions/outerbounds/toplevel/plugins/kubernetes/__init__.py,sha256=5zG8gShSj8m7rgF4xgWBZFuY3GDP5n1T0ktjRpGJLHA,69
@@ -117,7 +124,7 @@ metaflow_extensions/outerbounds/toplevel/plugins/ollama/__init__.py,sha256=GRSz2
117
124
  metaflow_extensions/outerbounds/toplevel/plugins/snowflake/__init__.py,sha256=LptpH-ziXHrednMYUjIaosS1SXD3sOtF_9_eRqd8SJw,50
118
125
  metaflow_extensions/outerbounds/toplevel/plugins/torchtune/__init__.py,sha256=uTVkdSk3xZ7hEKYfdlyVteWj5KeDwaM1hU9WT-_YKfI,50
119
126
  metaflow_extensions/outerbounds/toplevel/plugins/vllm/__init__.py,sha256=ekcgD3KVydf-a0xMI60P4uy6ePkSEoFHiGnDq1JM940,45
120
- ob_metaflow_extensions-1.3.0.dist-info/METADATA,sha256=AqSHKfzK42NBfzMZAHKk3eqN2WWVkxbQQNp_EQA_EnA,518
121
- ob_metaflow_extensions-1.3.0.dist-info/WHEEL,sha256=bb2Ot9scclHKMOLDEHY6B2sicWOgugjFKaJsT7vwMQo,110
122
- ob_metaflow_extensions-1.3.0.dist-info/top_level.txt,sha256=NwG0ukwjygtanDETyp_BUdtYtqIA_lOjzFFh1TsnxvI,20
123
- ob_metaflow_extensions-1.3.0.dist-info/RECORD,,
127
+ ob_metaflow_extensions-1.3.2.dist-info/METADATA,sha256=6Q5Etz6OSCEQL-RBWyUWuJqcrHj8J92vfslA-ldZs4M,518
128
+ ob_metaflow_extensions-1.3.2.dist-info/WHEEL,sha256=bb2Ot9scclHKMOLDEHY6B2sicWOgugjFKaJsT7vwMQo,110
129
+ ob_metaflow_extensions-1.3.2.dist-info/top_level.txt,sha256=NwG0ukwjygtanDETyp_BUdtYtqIA_lOjzFFh1TsnxvI,20
130
+ ob_metaflow_extensions-1.3.2.dist-info/RECORD,,