ob-metaflow-extensions 1.1.151__py2.py3-none-any.whl → 1.4.33__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. metaflow_extensions/outerbounds/__init__.py +1 -1
  2. metaflow_extensions/outerbounds/plugins/__init__.py +17 -3
  3. metaflow_extensions/outerbounds/plugins/apps/app_cli.py +0 -0
  4. metaflow_extensions/outerbounds/plugins/apps/app_deploy_decorator.py +146 -0
  5. metaflow_extensions/outerbounds/plugins/apps/core/__init__.py +10 -0
  6. metaflow_extensions/outerbounds/plugins/apps/core/_state_machine.py +506 -0
  7. metaflow_extensions/outerbounds/plugins/apps/core/_vendor/__init__.py +0 -0
  8. metaflow_extensions/outerbounds/plugins/apps/core/_vendor/spinner/__init__.py +4 -0
  9. metaflow_extensions/outerbounds/plugins/apps/core/_vendor/spinner/spinners.py +478 -0
  10. metaflow_extensions/outerbounds/plugins/apps/core/app_cli.py +1200 -0
  11. metaflow_extensions/outerbounds/plugins/apps/core/app_config.py +146 -0
  12. metaflow_extensions/outerbounds/plugins/apps/core/artifacts.py +0 -0
  13. metaflow_extensions/outerbounds/plugins/apps/core/capsule.py +958 -0
  14. metaflow_extensions/outerbounds/plugins/apps/core/click_importer.py +24 -0
  15. metaflow_extensions/outerbounds/plugins/apps/core/code_package/__init__.py +3 -0
  16. metaflow_extensions/outerbounds/plugins/apps/core/code_package/code_packager.py +618 -0
  17. metaflow_extensions/outerbounds/plugins/apps/core/code_package/examples.py +125 -0
  18. metaflow_extensions/outerbounds/plugins/apps/core/config/__init__.py +12 -0
  19. metaflow_extensions/outerbounds/plugins/apps/core/config/cli_generator.py +161 -0
  20. metaflow_extensions/outerbounds/plugins/apps/core/config/config_utils.py +868 -0
  21. metaflow_extensions/outerbounds/plugins/apps/core/config/schema_export.py +288 -0
  22. metaflow_extensions/outerbounds/plugins/apps/core/config/typed_configs.py +139 -0
  23. metaflow_extensions/outerbounds/plugins/apps/core/config/typed_init_generator.py +398 -0
  24. metaflow_extensions/outerbounds/plugins/apps/core/config/unified_config.py +1088 -0
  25. metaflow_extensions/outerbounds/plugins/apps/core/config_schema.yaml +337 -0
  26. metaflow_extensions/outerbounds/plugins/apps/core/dependencies.py +115 -0
  27. metaflow_extensions/outerbounds/plugins/apps/core/deployer.py +303 -0
  28. metaflow_extensions/outerbounds/plugins/apps/core/experimental/__init__.py +89 -0
  29. metaflow_extensions/outerbounds/plugins/apps/core/perimeters.py +87 -0
  30. metaflow_extensions/outerbounds/plugins/apps/core/secrets.py +164 -0
  31. metaflow_extensions/outerbounds/plugins/apps/core/utils.py +233 -0
  32. metaflow_extensions/outerbounds/plugins/apps/core/validations.py +17 -0
  33. metaflow_extensions/outerbounds/plugins/aws/__init__.py +4 -0
  34. metaflow_extensions/outerbounds/plugins/aws/assume_role.py +3 -0
  35. metaflow_extensions/outerbounds/plugins/aws/assume_role_decorator.py +78 -0
  36. metaflow_extensions/outerbounds/plugins/checkpoint_datastores/coreweave.py +9 -77
  37. metaflow_extensions/outerbounds/plugins/checkpoint_datastores/external_chckpt.py +85 -0
  38. metaflow_extensions/outerbounds/plugins/checkpoint_datastores/nebius.py +7 -78
  39. metaflow_extensions/outerbounds/plugins/fast_bakery/baker.py +110 -0
  40. metaflow_extensions/outerbounds/plugins/fast_bakery/docker_environment.py +17 -3
  41. metaflow_extensions/outerbounds/plugins/fast_bakery/fast_bakery.py +1 -0
  42. metaflow_extensions/outerbounds/plugins/kubernetes/kubernetes_client.py +18 -44
  43. metaflow_extensions/outerbounds/plugins/kubernetes/pod_killer.py +374 -0
  44. metaflow_extensions/outerbounds/plugins/nim/card.py +1 -6
  45. metaflow_extensions/outerbounds/plugins/nim/{__init__.py → nim_decorator.py} +13 -49
  46. metaflow_extensions/outerbounds/plugins/nim/nim_manager.py +294 -233
  47. metaflow_extensions/outerbounds/plugins/nim/utils.py +36 -0
  48. metaflow_extensions/outerbounds/plugins/nvcf/constants.py +2 -2
  49. metaflow_extensions/outerbounds/plugins/nvct/nvct_decorator.py +32 -8
  50. metaflow_extensions/outerbounds/plugins/nvct/nvct_runner.py +1 -1
  51. metaflow_extensions/outerbounds/plugins/ollama/__init__.py +171 -16
  52. metaflow_extensions/outerbounds/plugins/ollama/constants.py +1 -0
  53. metaflow_extensions/outerbounds/plugins/ollama/exceptions.py +22 -0
  54. metaflow_extensions/outerbounds/plugins/ollama/ollama.py +1710 -114
  55. metaflow_extensions/outerbounds/plugins/ollama/status_card.py +292 -0
  56. metaflow_extensions/outerbounds/plugins/optuna/__init__.py +48 -0
  57. metaflow_extensions/outerbounds/plugins/profilers/simple_card_decorator.py +96 -0
  58. metaflow_extensions/outerbounds/plugins/s3_proxy/__init__.py +7 -0
  59. metaflow_extensions/outerbounds/plugins/s3_proxy/binary_caller.py +132 -0
  60. metaflow_extensions/outerbounds/plugins/s3_proxy/constants.py +11 -0
  61. metaflow_extensions/outerbounds/plugins/s3_proxy/exceptions.py +13 -0
  62. metaflow_extensions/outerbounds/plugins/s3_proxy/proxy_bootstrap.py +59 -0
  63. metaflow_extensions/outerbounds/plugins/s3_proxy/s3_proxy_api.py +93 -0
  64. metaflow_extensions/outerbounds/plugins/s3_proxy/s3_proxy_decorator.py +250 -0
  65. metaflow_extensions/outerbounds/plugins/s3_proxy/s3_proxy_manager.py +225 -0
  66. metaflow_extensions/outerbounds/plugins/snowpark/snowpark_client.py +6 -3
  67. metaflow_extensions/outerbounds/plugins/snowpark/snowpark_decorator.py +13 -7
  68. metaflow_extensions/outerbounds/plugins/snowpark/snowpark_job.py +8 -2
  69. metaflow_extensions/outerbounds/plugins/torchtune/__init__.py +163 -0
  70. metaflow_extensions/outerbounds/plugins/vllm/__init__.py +255 -0
  71. metaflow_extensions/outerbounds/plugins/vllm/constants.py +1 -0
  72. metaflow_extensions/outerbounds/plugins/vllm/exceptions.py +1 -0
  73. metaflow_extensions/outerbounds/plugins/vllm/status_card.py +352 -0
  74. metaflow_extensions/outerbounds/plugins/vllm/vllm_manager.py +621 -0
  75. metaflow_extensions/outerbounds/remote_config.py +27 -3
  76. metaflow_extensions/outerbounds/toplevel/global_aliases_for_metaflow_package.py +86 -2
  77. metaflow_extensions/outerbounds/toplevel/ob_internal.py +4 -0
  78. metaflow_extensions/outerbounds/toplevel/plugins/optuna/__init__.py +1 -0
  79. metaflow_extensions/outerbounds/toplevel/plugins/torchtune/__init__.py +1 -0
  80. metaflow_extensions/outerbounds/toplevel/plugins/vllm/__init__.py +1 -0
  81. metaflow_extensions/outerbounds/toplevel/s3_proxy.py +88 -0
  82. {ob_metaflow_extensions-1.1.151.dist-info → ob_metaflow_extensions-1.4.33.dist-info}/METADATA +2 -2
  83. ob_metaflow_extensions-1.4.33.dist-info/RECORD +134 -0
  84. metaflow_extensions/outerbounds/plugins/nim/utilities.py +0 -5
  85. ob_metaflow_extensions-1.1.151.dist-info/RECORD +0 -74
  86. {ob_metaflow_extensions-1.1.151.dist-info → ob_metaflow_extensions-1.4.33.dist-info}/WHEEL +0 -0
  87. {ob_metaflow_extensions-1.1.151.dist-info → ob_metaflow_extensions-1.4.33.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  import metaflow.metaflow_config_funcs
2
2
 
3
- from metaflow_extensions.outerbounds.remote_config import init_config
3
+ from metaflow_extensions.outerbounds.remote_config import init_config, reload_config
4
4
 
5
5
  # we want to overide OSS Metaflow's initialization behavior with our own to support remote configs
6
6
  # we're reassigning the METAFLOW_CONFIG variable because all downstream settings rely on it and
@@ -41,6 +41,9 @@ def get_boto3_session(role_arn=None, session_vars=None):
41
41
  import boto3
42
42
  import botocore
43
43
  from metaflow_extensions.outerbounds.plugins.auth_server import get_token
44
+ from metaflow_extensions.outerbounds.plugins.aws.assume_role import (
45
+ OBP_ASSUME_ROLE_ARN_ENV_VAR,
46
+ )
44
47
 
45
48
  from hashlib import sha256
46
49
  from metaflow.util import get_username
@@ -69,6 +72,12 @@ def get_boto3_session(role_arn=None, session_vars=None):
69
72
  if token_info.get("cspr_role_arn"):
70
73
  cspr_role = token_info["cspr_role_arn"]
71
74
 
75
+ # Check if the assume_role decorator has set a CSPR ARN via environment variable
76
+ # This takes precedence over CSPR role that comes from the token_info response
77
+ decorator_role_arn = os.environ.get(OBP_ASSUME_ROLE_ARN_ENV_VAR)
78
+ if decorator_role_arn:
79
+ cspr_role = decorator_role_arn
80
+
72
81
  if cspr_role:
73
82
  # If CSPR role is set, we set it as the default role to assume
74
83
  # for the AWS SDK. We do this by writing an AWS config file
@@ -326,9 +335,14 @@ STEP_DECORATORS_DESC = [
326
335
  ("snowpark", ".snowpark.snowpark_decorator.SnowparkDecorator"),
327
336
  ("tensorboard", ".tensorboard.TensorboardDecorator"),
328
337
  ("gpu_profile", ".profilers.gpu_profile_decorator.GPUProfileDecorator"),
329
- ("nim", ".nim.NimDecorator"),
338
+ ("test_append_card", ".profilers.simple_card_decorator.DynamicCardAppendDecorator"),
339
+ ("nim", ".nim.nim_decorator.NimDecorator"),
330
340
  ("ollama", ".ollama.OllamaDecorator"),
331
- ("app_deploy", ".apps.deploy_decorator.WorkstationAppDeployDecorator"),
341
+ ("vllm", ".vllm.VLLMDecorator"),
342
+ ("app_deploy", ".apps.app_deploy_decorator.AppDeployDecorator"),
343
+ ("s3_proxy", ".s3_proxy.s3_proxy_decorator.S3ProxyDecorator"),
344
+ ("nebius_s3_proxy", ".s3_proxy.s3_proxy_decorator.NebiusS3ProxyDecorator"),
345
+ ("coreweave_s3_proxy", ".s3_proxy.s3_proxy_decorator.CoreWeaveS3ProxyDecorator"),
332
346
  ]
333
347
 
334
348
  TOGGLE_STEP_DECORATOR = [
@@ -347,4 +361,4 @@ SECRETS_PROVIDERS_DESC = [
347
361
  ("outerbounds", ".secrets.secrets.OuterboundsSecretsProvider"),
348
362
  ]
349
363
  # Adding an override here so the library can be imported at the metaflow.plugins level
350
- __mf_promote_submodules__ = ["snowflake", "ollama"]
364
+ __mf_promote_submodules__ = ["snowflake", "ollama", "torchtune", "optuna"]
@@ -0,0 +1,146 @@
1
+ from metaflow.exception import MetaflowException
2
+ from metaflow.decorators import StepDecorator
3
+ from metaflow import current
4
+ from .core import AppDeployer, apps
5
+ from .core.perimeters import PerimeterExtractor
6
+ import os
7
+ import hashlib
8
+
9
+
10
+ class AppDeployDecorator(StepDecorator):
11
+
12
+ """
13
+ MF Add To Current
14
+ -----------------
15
+ apps -> metaflow_extensions.outerbounds.plugins.apps.core.apps
16
+
17
+ @@ Returns
18
+ ----------
19
+ apps
20
+ The object carrying the Deployer class to deploy apps.
21
+ """
22
+
23
+ name = "app_deploy"
24
+ defaults = {}
25
+
26
+ package_url = None
27
+ package_sha = None
28
+
29
+ MAX_ENTROPY = 6
30
+ MAX_NAME_LENGTH = 15 - MAX_ENTROPY - 1 # -1 for the hyphen
31
+
32
+ def step_init(self, flow, graph, step, decos, environment, flow_datastore, logger):
33
+ self.logger = logger
34
+ self.environment = environment
35
+ self.step = step
36
+ self.flow_datastore = flow_datastore
37
+
38
+ def _resolve_package_url_and_sha(self):
39
+ return os.environ.get("METAFLOW_CODE_URL", self.package_url), os.environ.get(
40
+ "METAFLOW_CODE_SHA", self.package_sha
41
+ )
42
+
43
+ def _extract_project_info(self):
44
+ project = current.get("project_name")
45
+ branch = current.get("branch_name")
46
+ is_production = current.get("is_production")
47
+ return project, branch, is_production
48
+
49
+ def _resolve_default_image(self, flow):
50
+ # TODO : Resolve the default image over here.
51
+ pass
52
+
53
+ def _resolve_default_name_prefix(self, flow, step_name):
54
+ # TODO: Only tweek MAX_NAME_LENGTH as backend support allows longer names.
55
+ base_prefix = (flow.name + "-" + step_name).lower()
56
+ if len(base_prefix) > self.MAX_NAME_LENGTH:
57
+ base_prefix = "mf-app"
58
+ return base_prefix
59
+
60
+ def task_pre_step(
61
+ self,
62
+ step_name,
63
+ task_datastore,
64
+ metadata,
65
+ run_id,
66
+ task_id,
67
+ flow,
68
+ graph,
69
+ retry_count,
70
+ max_user_code_retries,
71
+ ubf_context,
72
+ inputs,
73
+ ):
74
+ perimeter, api_server = PerimeterExtractor.during_metaflow_execution()
75
+ package_url, package_sha = self._resolve_package_url_and_sha()
76
+ if package_url is None or package_sha is None:
77
+ raise MetaflowException(
78
+ "METAFLOW_CODE_URL or METAFLOW_CODE_SHA is not set. "
79
+ "Please set METAFLOW_CODE_URL and METAFLOW_CODE_SHA in your environment."
80
+ )
81
+ image = os.environ.get("FASTBAKERY_IMAGE", None)
82
+
83
+ # TODO [Apps] - This is temporary. Backend will support longer names in the future.
84
+ default_name = self._resolve_default_name_prefix(flow, step_name)
85
+ project, branch, is_production = self._extract_project_info()
86
+ project_info = {}
87
+ if project is not None:
88
+ project_info["metaflow/project"] = project
89
+ project_info["metaflow/branch"] = branch
90
+ project_info["metaflow/is_production"] = is_production
91
+
92
+ default_tags = {
93
+ "metaflow/flow_name": flow.name,
94
+ "metaflow/step_name": step_name,
95
+ "metaflow/run_id": run_id,
96
+ "metaflow/task_id": task_id,
97
+ "metaflow/retry_count": retry_count,
98
+ "metaflow/pathspec": current.pathspec,
99
+ **project_info,
100
+ }
101
+
102
+ AppDeployer._set_state(
103
+ perimeter,
104
+ api_server,
105
+ code_package_url=package_url,
106
+ code_package_key=package_sha,
107
+ name_prefix=default_name,
108
+ image=image,
109
+ max_entropy=self.MAX_ENTROPY,
110
+ default_tags=[{k: str(v)} for k, v in default_tags.items()],
111
+ )
112
+ current._update_env(
113
+ {
114
+ "apps": apps(),
115
+ }
116
+ )
117
+
118
+ def task_post_step(
119
+ self, step_name, flow, graph, retry_count, max_user_code_retries
120
+ ):
121
+ pass
122
+
123
+ def runtime_init(self, flow, graph, package, run_id):
124
+ # Set some more internal state.
125
+ self.flow = flow
126
+ self.graph = graph
127
+ self.package = package
128
+ self.run_id = run_id
129
+
130
+ def runtime_task_created(
131
+ self, task_datastore, task_id, split_index, input_paths, is_cloned, ubf_context
132
+ ):
133
+ # To execute the Kubernetes job, the job container needs to have
134
+ # access to the code package. We store the package in the datastore
135
+ # which the pod is able to download as part of it's entrypoint.
136
+ if not is_cloned:
137
+ self._save_package_once(self.flow_datastore, self.package)
138
+
139
+ @classmethod
140
+ def _save_package_once(cls, flow_datastore, package):
141
+ if cls.package_url is None:
142
+ cls.package_url, cls.package_sha = flow_datastore.save_data(
143
+ [package.blob], len_hint=1
144
+ )[0]
145
+ os.environ["METAFLOW_CODE_URL"] = cls.package_url
146
+ os.environ["METAFLOW_CODE_SHA"] = cls.package_sha
@@ -0,0 +1,10 @@
1
+ from . import app_cli
2
+ from . import config
3
+ from .deployer import AppDeployer, apps
4
+ from .config.typed_configs import (
5
+ ReplicaConfigDict,
6
+ ResourceConfigDict,
7
+ AuthConfigDict,
8
+ DependencyConfigDict,
9
+ PackageConfigDict,
10
+ )