ob-metaflow-extensions 1.1.142__py2.py3-none-any.whl → 1.4.33__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. metaflow_extensions/outerbounds/__init__.py +1 -1
  2. metaflow_extensions/outerbounds/plugins/__init__.py +26 -5
  3. metaflow_extensions/outerbounds/plugins/apps/app_cli.py +0 -0
  4. metaflow_extensions/outerbounds/plugins/apps/app_deploy_decorator.py +146 -0
  5. metaflow_extensions/outerbounds/plugins/apps/core/__init__.py +10 -0
  6. metaflow_extensions/outerbounds/plugins/apps/core/_state_machine.py +506 -0
  7. metaflow_extensions/outerbounds/plugins/apps/core/_vendor/__init__.py +0 -0
  8. metaflow_extensions/outerbounds/plugins/apps/core/_vendor/spinner/__init__.py +4 -0
  9. metaflow_extensions/outerbounds/plugins/apps/core/_vendor/spinner/spinners.py +478 -0
  10. metaflow_extensions/outerbounds/plugins/apps/core/app_cli.py +1200 -0
  11. metaflow_extensions/outerbounds/plugins/apps/core/app_config.py +146 -0
  12. metaflow_extensions/outerbounds/plugins/apps/core/artifacts.py +0 -0
  13. metaflow_extensions/outerbounds/plugins/apps/core/capsule.py +958 -0
  14. metaflow_extensions/outerbounds/plugins/apps/core/click_importer.py +24 -0
  15. metaflow_extensions/outerbounds/plugins/apps/core/code_package/__init__.py +3 -0
  16. metaflow_extensions/outerbounds/plugins/apps/core/code_package/code_packager.py +618 -0
  17. metaflow_extensions/outerbounds/plugins/apps/core/code_package/examples.py +125 -0
  18. metaflow_extensions/outerbounds/plugins/apps/core/config/__init__.py +12 -0
  19. metaflow_extensions/outerbounds/plugins/apps/core/config/cli_generator.py +161 -0
  20. metaflow_extensions/outerbounds/plugins/apps/core/config/config_utils.py +868 -0
  21. metaflow_extensions/outerbounds/plugins/apps/core/config/schema_export.py +288 -0
  22. metaflow_extensions/outerbounds/plugins/apps/core/config/typed_configs.py +139 -0
  23. metaflow_extensions/outerbounds/plugins/apps/core/config/typed_init_generator.py +398 -0
  24. metaflow_extensions/outerbounds/plugins/apps/core/config/unified_config.py +1088 -0
  25. metaflow_extensions/outerbounds/plugins/apps/core/config_schema.yaml +337 -0
  26. metaflow_extensions/outerbounds/plugins/apps/core/dependencies.py +115 -0
  27. metaflow_extensions/outerbounds/plugins/apps/core/deployer.py +303 -0
  28. metaflow_extensions/outerbounds/plugins/apps/core/experimental/__init__.py +89 -0
  29. metaflow_extensions/outerbounds/plugins/apps/core/perimeters.py +87 -0
  30. metaflow_extensions/outerbounds/plugins/apps/core/secrets.py +164 -0
  31. metaflow_extensions/outerbounds/plugins/apps/core/utils.py +233 -0
  32. metaflow_extensions/outerbounds/plugins/apps/core/validations.py +17 -0
  33. metaflow_extensions/outerbounds/plugins/aws/__init__.py +4 -0
  34. metaflow_extensions/outerbounds/plugins/aws/assume_role.py +3 -0
  35. metaflow_extensions/outerbounds/plugins/aws/assume_role_decorator.py +78 -0
  36. metaflow_extensions/outerbounds/plugins/checkpoint_datastores/__init__.py +2 -0
  37. metaflow_extensions/outerbounds/plugins/checkpoint_datastores/coreweave.py +71 -0
  38. metaflow_extensions/outerbounds/plugins/checkpoint_datastores/external_chckpt.py +85 -0
  39. metaflow_extensions/outerbounds/plugins/checkpoint_datastores/nebius.py +73 -0
  40. metaflow_extensions/outerbounds/plugins/fast_bakery/baker.py +110 -0
  41. metaflow_extensions/outerbounds/plugins/fast_bakery/docker_environment.py +17 -3
  42. metaflow_extensions/outerbounds/plugins/fast_bakery/fast_bakery.py +1 -0
  43. metaflow_extensions/outerbounds/plugins/kubernetes/kubernetes_client.py +18 -44
  44. metaflow_extensions/outerbounds/plugins/kubernetes/pod_killer.py +374 -0
  45. metaflow_extensions/outerbounds/plugins/nim/card.py +1 -6
  46. metaflow_extensions/outerbounds/plugins/nim/{__init__.py → nim_decorator.py} +13 -49
  47. metaflow_extensions/outerbounds/plugins/nim/nim_manager.py +294 -233
  48. metaflow_extensions/outerbounds/plugins/nim/utils.py +36 -0
  49. metaflow_extensions/outerbounds/plugins/nvcf/constants.py +2 -2
  50. metaflow_extensions/outerbounds/plugins/nvcf/nvcf.py +100 -19
  51. metaflow_extensions/outerbounds/plugins/nvcf/nvcf_decorator.py +6 -1
  52. metaflow_extensions/outerbounds/plugins/nvct/__init__.py +0 -0
  53. metaflow_extensions/outerbounds/plugins/nvct/exceptions.py +71 -0
  54. metaflow_extensions/outerbounds/plugins/nvct/nvct.py +131 -0
  55. metaflow_extensions/outerbounds/plugins/nvct/nvct_cli.py +289 -0
  56. metaflow_extensions/outerbounds/plugins/nvct/nvct_decorator.py +286 -0
  57. metaflow_extensions/outerbounds/plugins/nvct/nvct_runner.py +218 -0
  58. metaflow_extensions/outerbounds/plugins/nvct/utils.py +29 -0
  59. metaflow_extensions/outerbounds/plugins/ollama/__init__.py +171 -16
  60. metaflow_extensions/outerbounds/plugins/ollama/constants.py +1 -0
  61. metaflow_extensions/outerbounds/plugins/ollama/exceptions.py +22 -0
  62. metaflow_extensions/outerbounds/plugins/ollama/ollama.py +1710 -114
  63. metaflow_extensions/outerbounds/plugins/ollama/status_card.py +292 -0
  64. metaflow_extensions/outerbounds/plugins/optuna/__init__.py +48 -0
  65. metaflow_extensions/outerbounds/plugins/profilers/simple_card_decorator.py +96 -0
  66. metaflow_extensions/outerbounds/plugins/s3_proxy/__init__.py +7 -0
  67. metaflow_extensions/outerbounds/plugins/s3_proxy/binary_caller.py +132 -0
  68. metaflow_extensions/outerbounds/plugins/s3_proxy/constants.py +11 -0
  69. metaflow_extensions/outerbounds/plugins/s3_proxy/exceptions.py +13 -0
  70. metaflow_extensions/outerbounds/plugins/s3_proxy/proxy_bootstrap.py +59 -0
  71. metaflow_extensions/outerbounds/plugins/s3_proxy/s3_proxy_api.py +93 -0
  72. metaflow_extensions/outerbounds/plugins/s3_proxy/s3_proxy_decorator.py +250 -0
  73. metaflow_extensions/outerbounds/plugins/s3_proxy/s3_proxy_manager.py +225 -0
  74. metaflow_extensions/outerbounds/plugins/secrets/secrets.py +38 -2
  75. metaflow_extensions/outerbounds/plugins/snowflake/snowflake.py +44 -4
  76. metaflow_extensions/outerbounds/plugins/snowpark/snowpark_client.py +6 -3
  77. metaflow_extensions/outerbounds/plugins/snowpark/snowpark_decorator.py +13 -7
  78. metaflow_extensions/outerbounds/plugins/snowpark/snowpark_job.py +8 -2
  79. metaflow_extensions/outerbounds/plugins/torchtune/__init__.py +163 -0
  80. metaflow_extensions/outerbounds/plugins/vllm/__init__.py +255 -0
  81. metaflow_extensions/outerbounds/plugins/vllm/constants.py +1 -0
  82. metaflow_extensions/outerbounds/plugins/vllm/exceptions.py +1 -0
  83. metaflow_extensions/outerbounds/plugins/vllm/status_card.py +352 -0
  84. metaflow_extensions/outerbounds/plugins/vllm/vllm_manager.py +621 -0
  85. metaflow_extensions/outerbounds/remote_config.py +27 -3
  86. metaflow_extensions/outerbounds/toplevel/global_aliases_for_metaflow_package.py +87 -2
  87. metaflow_extensions/outerbounds/toplevel/ob_internal.py +4 -0
  88. metaflow_extensions/outerbounds/toplevel/plugins/optuna/__init__.py +1 -0
  89. metaflow_extensions/outerbounds/toplevel/plugins/torchtune/__init__.py +1 -0
  90. metaflow_extensions/outerbounds/toplevel/plugins/vllm/__init__.py +1 -0
  91. metaflow_extensions/outerbounds/toplevel/s3_proxy.py +88 -0
  92. {ob_metaflow_extensions-1.1.142.dist-info → ob_metaflow_extensions-1.4.33.dist-info}/METADATA +2 -2
  93. ob_metaflow_extensions-1.4.33.dist-info/RECORD +134 -0
  94. metaflow_extensions/outerbounds/plugins/nim/utilities.py +0 -5
  95. ob_metaflow_extensions-1.1.142.dist-info/RECORD +0 -64
  96. {ob_metaflow_extensions-1.1.142.dist-info → ob_metaflow_extensions-1.4.33.dist-info}/WHEEL +0 -0
  97. {ob_metaflow_extensions-1.1.142.dist-info → ob_metaflow_extensions-1.4.33.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  import metaflow.metaflow_config_funcs
2
2
 
3
- from metaflow_extensions.outerbounds.remote_config import init_config
3
+ from metaflow_extensions.outerbounds.remote_config import init_config, reload_config
4
4
 
5
5
  # we want to overide OSS Metaflow's initialization behavior with our own to support remote configs
6
6
  # we're reassigning the METAFLOW_CONFIG variable because all downstream settings rely on it and
@@ -41,6 +41,9 @@ def get_boto3_session(role_arn=None, session_vars=None):
41
41
  import boto3
42
42
  import botocore
43
43
  from metaflow_extensions.outerbounds.plugins.auth_server import get_token
44
+ from metaflow_extensions.outerbounds.plugins.aws.assume_role import (
45
+ OBP_ASSUME_ROLE_ARN_ENV_VAR,
46
+ )
44
47
 
45
48
  from hashlib import sha256
46
49
  from metaflow.util import get_username
@@ -69,6 +72,12 @@ def get_boto3_session(role_arn=None, session_vars=None):
69
72
  if token_info.get("cspr_role_arn"):
70
73
  cspr_role = token_info["cspr_role_arn"]
71
74
 
75
+ # Check if the assume_role decorator has set a CSPR ARN via environment variable
76
+ # This takes precedence over CSPR role that comes from the token_info response
77
+ decorator_role_arn = os.environ.get(OBP_ASSUME_ROLE_ARN_ENV_VAR)
78
+ if decorator_role_arn:
79
+ cspr_role = decorator_role_arn
80
+
72
81
  if cspr_role:
73
82
  # If CSPR role is set, we set it as the default role to assume
74
83
  # for the AWS SDK. We do this by writing an AWS config file
@@ -162,13 +171,18 @@ class ObpAuthProvider(object):
162
171
  client_params = {}
163
172
 
164
173
  from botocore.exceptions import ClientError
174
+ from botocore.config import Config
165
175
 
166
176
  with hide_access_keys():
167
177
  session = get_boto3_session(role_arn, session_vars)
178
+ _client_params = client_params.copy()
179
+ if _client_params.get("config") and type(_client_params["config"]) == dict:
180
+ _client_params["config"] = Config(**_client_params["config"])
181
+
168
182
  if with_error:
169
- return session.client(module, **client_params), ClientError
183
+ return session.client(module, **_client_params), ClientError
170
184
  else:
171
- return session.client(module, **client_params)
185
+ return session.client(module, **_client_params)
172
186
 
173
187
 
174
188
  AWS_CLIENT_PROVIDERS_DESC = [("obp", ".ObpAuthProvider")]
@@ -307,11 +321,13 @@ class ObpGcpAuthProvider(object):
307
321
  GCP_CLIENT_PROVIDERS_DESC = [("obp", ".ObpGcpAuthProvider")]
308
322
  CLIS_DESC = [
309
323
  ("nvidia", ".nvcf.nvcf_cli.cli"),
324
+ ("nvct", ".nvct.nvct_cli.cli"),
310
325
  ("fast-bakery", ".fast_bakery.fast_bakery_cli.cli"),
311
326
  ("snowpark", ".snowpark.snowpark_cli.cli"),
312
327
  ]
313
328
  STEP_DECORATORS_DESC = [
314
329
  ("nvidia", ".nvcf.nvcf_decorator.NvcfDecorator"),
330
+ ("nvct", ".nvct.nvct_decorator.NvctDecorator"),
315
331
  (
316
332
  "fast_bakery_internal",
317
333
  ".fast_bakery.fast_bakery_decorator.InternalFastBakeryDecorator",
@@ -319,9 +335,14 @@ STEP_DECORATORS_DESC = [
319
335
  ("snowpark", ".snowpark.snowpark_decorator.SnowparkDecorator"),
320
336
  ("tensorboard", ".tensorboard.TensorboardDecorator"),
321
337
  ("gpu_profile", ".profilers.gpu_profile_decorator.GPUProfileDecorator"),
322
- ("nim", ".nim.NimDecorator"),
338
+ ("test_append_card", ".profilers.simple_card_decorator.DynamicCardAppendDecorator"),
339
+ ("nim", ".nim.nim_decorator.NimDecorator"),
323
340
  ("ollama", ".ollama.OllamaDecorator"),
324
- ("app_deploy", ".apps.deploy_decorator.WorkstationAppDeployDecorator"),
341
+ ("vllm", ".vllm.VLLMDecorator"),
342
+ ("app_deploy", ".apps.app_deploy_decorator.AppDeployDecorator"),
343
+ ("s3_proxy", ".s3_proxy.s3_proxy_decorator.S3ProxyDecorator"),
344
+ ("nebius_s3_proxy", ".s3_proxy.s3_proxy_decorator.NebiusS3ProxyDecorator"),
345
+ ("coreweave_s3_proxy", ".s3_proxy.s3_proxy_decorator.CoreWeaveS3ProxyDecorator"),
325
346
  ]
326
347
 
327
348
  TOGGLE_STEP_DECORATOR = [
@@ -340,4 +361,4 @@ SECRETS_PROVIDERS_DESC = [
340
361
  ("outerbounds", ".secrets.secrets.OuterboundsSecretsProvider"),
341
362
  ]
342
363
  # Adding an override here so the library can be imported at the metaflow.plugins level
343
- __mf_promote_submodules__ = ["snowflake", "ollama"]
364
+ __mf_promote_submodules__ = ["snowflake", "ollama", "torchtune", "optuna"]
@@ -0,0 +1,146 @@
1
+ from metaflow.exception import MetaflowException
2
+ from metaflow.decorators import StepDecorator
3
+ from metaflow import current
4
+ from .core import AppDeployer, apps
5
+ from .core.perimeters import PerimeterExtractor
6
+ import os
7
+ import hashlib
8
+
9
+
10
+ class AppDeployDecorator(StepDecorator):
11
+
12
+ """
13
+ MF Add To Current
14
+ -----------------
15
+ apps -> metaflow_extensions.outerbounds.plugins.apps.core.apps
16
+
17
+ @@ Returns
18
+ ----------
19
+ apps
20
+ The object carrying the Deployer class to deploy apps.
21
+ """
22
+
23
+ name = "app_deploy"
24
+ defaults = {}
25
+
26
+ package_url = None
27
+ package_sha = None
28
+
29
+ MAX_ENTROPY = 6
30
+ MAX_NAME_LENGTH = 15 - MAX_ENTROPY - 1 # -1 for the hyphen
31
+
32
+ def step_init(self, flow, graph, step, decos, environment, flow_datastore, logger):
33
+ self.logger = logger
34
+ self.environment = environment
35
+ self.step = step
36
+ self.flow_datastore = flow_datastore
37
+
38
+ def _resolve_package_url_and_sha(self):
39
+ return os.environ.get("METAFLOW_CODE_URL", self.package_url), os.environ.get(
40
+ "METAFLOW_CODE_SHA", self.package_sha
41
+ )
42
+
43
+ def _extract_project_info(self):
44
+ project = current.get("project_name")
45
+ branch = current.get("branch_name")
46
+ is_production = current.get("is_production")
47
+ return project, branch, is_production
48
+
49
+ def _resolve_default_image(self, flow):
50
+ # TODO : Resolve the default image over here.
51
+ pass
52
+
53
+ def _resolve_default_name_prefix(self, flow, step_name):
54
+ # TODO: Only tweek MAX_NAME_LENGTH as backend support allows longer names.
55
+ base_prefix = (flow.name + "-" + step_name).lower()
56
+ if len(base_prefix) > self.MAX_NAME_LENGTH:
57
+ base_prefix = "mf-app"
58
+ return base_prefix
59
+
60
+ def task_pre_step(
61
+ self,
62
+ step_name,
63
+ task_datastore,
64
+ metadata,
65
+ run_id,
66
+ task_id,
67
+ flow,
68
+ graph,
69
+ retry_count,
70
+ max_user_code_retries,
71
+ ubf_context,
72
+ inputs,
73
+ ):
74
+ perimeter, api_server = PerimeterExtractor.during_metaflow_execution()
75
+ package_url, package_sha = self._resolve_package_url_and_sha()
76
+ if package_url is None or package_sha is None:
77
+ raise MetaflowException(
78
+ "METAFLOW_CODE_URL or METAFLOW_CODE_SHA is not set. "
79
+ "Please set METAFLOW_CODE_URL and METAFLOW_CODE_SHA in your environment."
80
+ )
81
+ image = os.environ.get("FASTBAKERY_IMAGE", None)
82
+
83
+ # TODO [Apps] - This is temporary. Backend will support longer names in the future.
84
+ default_name = self._resolve_default_name_prefix(flow, step_name)
85
+ project, branch, is_production = self._extract_project_info()
86
+ project_info = {}
87
+ if project is not None:
88
+ project_info["metaflow/project"] = project
89
+ project_info["metaflow/branch"] = branch
90
+ project_info["metaflow/is_production"] = is_production
91
+
92
+ default_tags = {
93
+ "metaflow/flow_name": flow.name,
94
+ "metaflow/step_name": step_name,
95
+ "metaflow/run_id": run_id,
96
+ "metaflow/task_id": task_id,
97
+ "metaflow/retry_count": retry_count,
98
+ "metaflow/pathspec": current.pathspec,
99
+ **project_info,
100
+ }
101
+
102
+ AppDeployer._set_state(
103
+ perimeter,
104
+ api_server,
105
+ code_package_url=package_url,
106
+ code_package_key=package_sha,
107
+ name_prefix=default_name,
108
+ image=image,
109
+ max_entropy=self.MAX_ENTROPY,
110
+ default_tags=[{k: str(v)} for k, v in default_tags.items()],
111
+ )
112
+ current._update_env(
113
+ {
114
+ "apps": apps(),
115
+ }
116
+ )
117
+
118
+ def task_post_step(
119
+ self, step_name, flow, graph, retry_count, max_user_code_retries
120
+ ):
121
+ pass
122
+
123
+ def runtime_init(self, flow, graph, package, run_id):
124
+ # Set some more internal state.
125
+ self.flow = flow
126
+ self.graph = graph
127
+ self.package = package
128
+ self.run_id = run_id
129
+
130
+ def runtime_task_created(
131
+ self, task_datastore, task_id, split_index, input_paths, is_cloned, ubf_context
132
+ ):
133
+ # To execute the Kubernetes job, the job container needs to have
134
+ # access to the code package. We store the package in the datastore
135
+ # which the pod is able to download as part of it's entrypoint.
136
+ if not is_cloned:
137
+ self._save_package_once(self.flow_datastore, self.package)
138
+
139
+ @classmethod
140
+ def _save_package_once(cls, flow_datastore, package):
141
+ if cls.package_url is None:
142
+ cls.package_url, cls.package_sha = flow_datastore.save_data(
143
+ [package.blob], len_hint=1
144
+ )[0]
145
+ os.environ["METAFLOW_CODE_URL"] = cls.package_url
146
+ os.environ["METAFLOW_CODE_SHA"] = cls.package_sha
@@ -0,0 +1,10 @@
1
+ from . import app_cli
2
+ from . import config
3
+ from .deployer import AppDeployer, apps
4
+ from .config.typed_configs import (
5
+ ReplicaConfigDict,
6
+ ResourceConfigDict,
7
+ AuthConfigDict,
8
+ DependencyConfigDict,
9
+ PackageConfigDict,
10
+ )