ob-metaflow-extensions 1.1.151__py2.py3-none-any.whl → 1.4.33__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metaflow_extensions/outerbounds/__init__.py +1 -1
- metaflow_extensions/outerbounds/plugins/__init__.py +17 -3
- metaflow_extensions/outerbounds/plugins/apps/app_cli.py +0 -0
- metaflow_extensions/outerbounds/plugins/apps/app_deploy_decorator.py +146 -0
- metaflow_extensions/outerbounds/plugins/apps/core/__init__.py +10 -0
- metaflow_extensions/outerbounds/plugins/apps/core/_state_machine.py +506 -0
- metaflow_extensions/outerbounds/plugins/apps/core/_vendor/__init__.py +0 -0
- metaflow_extensions/outerbounds/plugins/apps/core/_vendor/spinner/__init__.py +4 -0
- metaflow_extensions/outerbounds/plugins/apps/core/_vendor/spinner/spinners.py +478 -0
- metaflow_extensions/outerbounds/plugins/apps/core/app_cli.py +1200 -0
- metaflow_extensions/outerbounds/plugins/apps/core/app_config.py +146 -0
- metaflow_extensions/outerbounds/plugins/apps/core/artifacts.py +0 -0
- metaflow_extensions/outerbounds/plugins/apps/core/capsule.py +958 -0
- metaflow_extensions/outerbounds/plugins/apps/core/click_importer.py +24 -0
- metaflow_extensions/outerbounds/plugins/apps/core/code_package/__init__.py +3 -0
- metaflow_extensions/outerbounds/plugins/apps/core/code_package/code_packager.py +618 -0
- metaflow_extensions/outerbounds/plugins/apps/core/code_package/examples.py +125 -0
- metaflow_extensions/outerbounds/plugins/apps/core/config/__init__.py +12 -0
- metaflow_extensions/outerbounds/plugins/apps/core/config/cli_generator.py +161 -0
- metaflow_extensions/outerbounds/plugins/apps/core/config/config_utils.py +868 -0
- metaflow_extensions/outerbounds/plugins/apps/core/config/schema_export.py +288 -0
- metaflow_extensions/outerbounds/plugins/apps/core/config/typed_configs.py +139 -0
- metaflow_extensions/outerbounds/plugins/apps/core/config/typed_init_generator.py +398 -0
- metaflow_extensions/outerbounds/plugins/apps/core/config/unified_config.py +1088 -0
- metaflow_extensions/outerbounds/plugins/apps/core/config_schema.yaml +337 -0
- metaflow_extensions/outerbounds/plugins/apps/core/dependencies.py +115 -0
- metaflow_extensions/outerbounds/plugins/apps/core/deployer.py +303 -0
- metaflow_extensions/outerbounds/plugins/apps/core/experimental/__init__.py +89 -0
- metaflow_extensions/outerbounds/plugins/apps/core/perimeters.py +87 -0
- metaflow_extensions/outerbounds/plugins/apps/core/secrets.py +164 -0
- metaflow_extensions/outerbounds/plugins/apps/core/utils.py +233 -0
- metaflow_extensions/outerbounds/plugins/apps/core/validations.py +17 -0
- metaflow_extensions/outerbounds/plugins/aws/__init__.py +4 -0
- metaflow_extensions/outerbounds/plugins/aws/assume_role.py +3 -0
- metaflow_extensions/outerbounds/plugins/aws/assume_role_decorator.py +78 -0
- metaflow_extensions/outerbounds/plugins/checkpoint_datastores/coreweave.py +9 -77
- metaflow_extensions/outerbounds/plugins/checkpoint_datastores/external_chckpt.py +85 -0
- metaflow_extensions/outerbounds/plugins/checkpoint_datastores/nebius.py +7 -78
- metaflow_extensions/outerbounds/plugins/fast_bakery/baker.py +110 -0
- metaflow_extensions/outerbounds/plugins/fast_bakery/docker_environment.py +17 -3
- metaflow_extensions/outerbounds/plugins/fast_bakery/fast_bakery.py +1 -0
- metaflow_extensions/outerbounds/plugins/kubernetes/kubernetes_client.py +18 -44
- metaflow_extensions/outerbounds/plugins/kubernetes/pod_killer.py +374 -0
- metaflow_extensions/outerbounds/plugins/nim/card.py +1 -6
- metaflow_extensions/outerbounds/plugins/nim/{__init__.py → nim_decorator.py} +13 -49
- metaflow_extensions/outerbounds/plugins/nim/nim_manager.py +294 -233
- metaflow_extensions/outerbounds/plugins/nim/utils.py +36 -0
- metaflow_extensions/outerbounds/plugins/nvcf/constants.py +2 -2
- metaflow_extensions/outerbounds/plugins/nvct/nvct_decorator.py +32 -8
- metaflow_extensions/outerbounds/plugins/nvct/nvct_runner.py +1 -1
- metaflow_extensions/outerbounds/plugins/ollama/__init__.py +171 -16
- metaflow_extensions/outerbounds/plugins/ollama/constants.py +1 -0
- metaflow_extensions/outerbounds/plugins/ollama/exceptions.py +22 -0
- metaflow_extensions/outerbounds/plugins/ollama/ollama.py +1710 -114
- metaflow_extensions/outerbounds/plugins/ollama/status_card.py +292 -0
- metaflow_extensions/outerbounds/plugins/optuna/__init__.py +48 -0
- metaflow_extensions/outerbounds/plugins/profilers/simple_card_decorator.py +96 -0
- metaflow_extensions/outerbounds/plugins/s3_proxy/__init__.py +7 -0
- metaflow_extensions/outerbounds/plugins/s3_proxy/binary_caller.py +132 -0
- metaflow_extensions/outerbounds/plugins/s3_proxy/constants.py +11 -0
- metaflow_extensions/outerbounds/plugins/s3_proxy/exceptions.py +13 -0
- metaflow_extensions/outerbounds/plugins/s3_proxy/proxy_bootstrap.py +59 -0
- metaflow_extensions/outerbounds/plugins/s3_proxy/s3_proxy_api.py +93 -0
- metaflow_extensions/outerbounds/plugins/s3_proxy/s3_proxy_decorator.py +250 -0
- metaflow_extensions/outerbounds/plugins/s3_proxy/s3_proxy_manager.py +225 -0
- metaflow_extensions/outerbounds/plugins/snowpark/snowpark_client.py +6 -3
- metaflow_extensions/outerbounds/plugins/snowpark/snowpark_decorator.py +13 -7
- metaflow_extensions/outerbounds/plugins/snowpark/snowpark_job.py +8 -2
- metaflow_extensions/outerbounds/plugins/torchtune/__init__.py +163 -0
- metaflow_extensions/outerbounds/plugins/vllm/__init__.py +255 -0
- metaflow_extensions/outerbounds/plugins/vllm/constants.py +1 -0
- metaflow_extensions/outerbounds/plugins/vllm/exceptions.py +1 -0
- metaflow_extensions/outerbounds/plugins/vllm/status_card.py +352 -0
- metaflow_extensions/outerbounds/plugins/vllm/vllm_manager.py +621 -0
- metaflow_extensions/outerbounds/remote_config.py +27 -3
- metaflow_extensions/outerbounds/toplevel/global_aliases_for_metaflow_package.py +86 -2
- metaflow_extensions/outerbounds/toplevel/ob_internal.py +4 -0
- metaflow_extensions/outerbounds/toplevel/plugins/optuna/__init__.py +1 -0
- metaflow_extensions/outerbounds/toplevel/plugins/torchtune/__init__.py +1 -0
- metaflow_extensions/outerbounds/toplevel/plugins/vllm/__init__.py +1 -0
- metaflow_extensions/outerbounds/toplevel/s3_proxy.py +88 -0
- {ob_metaflow_extensions-1.1.151.dist-info → ob_metaflow_extensions-1.4.33.dist-info}/METADATA +2 -2
- ob_metaflow_extensions-1.4.33.dist-info/RECORD +134 -0
- metaflow_extensions/outerbounds/plugins/nim/utilities.py +0 -5
- ob_metaflow_extensions-1.1.151.dist-info/RECORD +0 -74
- {ob_metaflow_extensions-1.1.151.dist-info → ob_metaflow_extensions-1.4.33.dist-info}/WHEEL +0 -0
- {ob_metaflow_extensions-1.1.151.dist-info → ob_metaflow_extensions-1.4.33.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import metaflow.metaflow_config_funcs
|
|
2
2
|
|
|
3
|
-
from metaflow_extensions.outerbounds.remote_config import init_config
|
|
3
|
+
from metaflow_extensions.outerbounds.remote_config import init_config, reload_config
|
|
4
4
|
|
|
5
5
|
# we want to overide OSS Metaflow's initialization behavior with our own to support remote configs
|
|
6
6
|
# we're reassigning the METAFLOW_CONFIG variable because all downstream settings rely on it and
|
|
@@ -41,6 +41,9 @@ def get_boto3_session(role_arn=None, session_vars=None):
|
|
|
41
41
|
import boto3
|
|
42
42
|
import botocore
|
|
43
43
|
from metaflow_extensions.outerbounds.plugins.auth_server import get_token
|
|
44
|
+
from metaflow_extensions.outerbounds.plugins.aws.assume_role import (
|
|
45
|
+
OBP_ASSUME_ROLE_ARN_ENV_VAR,
|
|
46
|
+
)
|
|
44
47
|
|
|
45
48
|
from hashlib import sha256
|
|
46
49
|
from metaflow.util import get_username
|
|
@@ -69,6 +72,12 @@ def get_boto3_session(role_arn=None, session_vars=None):
|
|
|
69
72
|
if token_info.get("cspr_role_arn"):
|
|
70
73
|
cspr_role = token_info["cspr_role_arn"]
|
|
71
74
|
|
|
75
|
+
# Check if the assume_role decorator has set a CSPR ARN via environment variable
|
|
76
|
+
# This takes precedence over CSPR role that comes from the token_info response
|
|
77
|
+
decorator_role_arn = os.environ.get(OBP_ASSUME_ROLE_ARN_ENV_VAR)
|
|
78
|
+
if decorator_role_arn:
|
|
79
|
+
cspr_role = decorator_role_arn
|
|
80
|
+
|
|
72
81
|
if cspr_role:
|
|
73
82
|
# If CSPR role is set, we set it as the default role to assume
|
|
74
83
|
# for the AWS SDK. We do this by writing an AWS config file
|
|
@@ -326,9 +335,14 @@ STEP_DECORATORS_DESC = [
|
|
|
326
335
|
("snowpark", ".snowpark.snowpark_decorator.SnowparkDecorator"),
|
|
327
336
|
("tensorboard", ".tensorboard.TensorboardDecorator"),
|
|
328
337
|
("gpu_profile", ".profilers.gpu_profile_decorator.GPUProfileDecorator"),
|
|
329
|
-
("
|
|
338
|
+
("test_append_card", ".profilers.simple_card_decorator.DynamicCardAppendDecorator"),
|
|
339
|
+
("nim", ".nim.nim_decorator.NimDecorator"),
|
|
330
340
|
("ollama", ".ollama.OllamaDecorator"),
|
|
331
|
-
("
|
|
341
|
+
("vllm", ".vllm.VLLMDecorator"),
|
|
342
|
+
("app_deploy", ".apps.app_deploy_decorator.AppDeployDecorator"),
|
|
343
|
+
("s3_proxy", ".s3_proxy.s3_proxy_decorator.S3ProxyDecorator"),
|
|
344
|
+
("nebius_s3_proxy", ".s3_proxy.s3_proxy_decorator.NebiusS3ProxyDecorator"),
|
|
345
|
+
("coreweave_s3_proxy", ".s3_proxy.s3_proxy_decorator.CoreWeaveS3ProxyDecorator"),
|
|
332
346
|
]
|
|
333
347
|
|
|
334
348
|
TOGGLE_STEP_DECORATOR = [
|
|
@@ -347,4 +361,4 @@ SECRETS_PROVIDERS_DESC = [
|
|
|
347
361
|
("outerbounds", ".secrets.secrets.OuterboundsSecretsProvider"),
|
|
348
362
|
]
|
|
349
363
|
# Adding an override here so the library can be imported at the metaflow.plugins level
|
|
350
|
-
__mf_promote_submodules__ = ["snowflake", "ollama"]
|
|
364
|
+
__mf_promote_submodules__ = ["snowflake", "ollama", "torchtune", "optuna"]
|
|
File without changes
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
from metaflow.exception import MetaflowException
|
|
2
|
+
from metaflow.decorators import StepDecorator
|
|
3
|
+
from metaflow import current
|
|
4
|
+
from .core import AppDeployer, apps
|
|
5
|
+
from .core.perimeters import PerimeterExtractor
|
|
6
|
+
import os
|
|
7
|
+
import hashlib
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class AppDeployDecorator(StepDecorator):
|
|
11
|
+
|
|
12
|
+
"""
|
|
13
|
+
MF Add To Current
|
|
14
|
+
-----------------
|
|
15
|
+
apps -> metaflow_extensions.outerbounds.plugins.apps.core.apps
|
|
16
|
+
|
|
17
|
+
@@ Returns
|
|
18
|
+
----------
|
|
19
|
+
apps
|
|
20
|
+
The object carrying the Deployer class to deploy apps.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
name = "app_deploy"
|
|
24
|
+
defaults = {}
|
|
25
|
+
|
|
26
|
+
package_url = None
|
|
27
|
+
package_sha = None
|
|
28
|
+
|
|
29
|
+
MAX_ENTROPY = 6
|
|
30
|
+
MAX_NAME_LENGTH = 15 - MAX_ENTROPY - 1 # -1 for the hyphen
|
|
31
|
+
|
|
32
|
+
def step_init(self, flow, graph, step, decos, environment, flow_datastore, logger):
|
|
33
|
+
self.logger = logger
|
|
34
|
+
self.environment = environment
|
|
35
|
+
self.step = step
|
|
36
|
+
self.flow_datastore = flow_datastore
|
|
37
|
+
|
|
38
|
+
def _resolve_package_url_and_sha(self):
|
|
39
|
+
return os.environ.get("METAFLOW_CODE_URL", self.package_url), os.environ.get(
|
|
40
|
+
"METAFLOW_CODE_SHA", self.package_sha
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
def _extract_project_info(self):
|
|
44
|
+
project = current.get("project_name")
|
|
45
|
+
branch = current.get("branch_name")
|
|
46
|
+
is_production = current.get("is_production")
|
|
47
|
+
return project, branch, is_production
|
|
48
|
+
|
|
49
|
+
def _resolve_default_image(self, flow):
|
|
50
|
+
# TODO : Resolve the default image over here.
|
|
51
|
+
pass
|
|
52
|
+
|
|
53
|
+
def _resolve_default_name_prefix(self, flow, step_name):
|
|
54
|
+
# TODO: Only tweek MAX_NAME_LENGTH as backend support allows longer names.
|
|
55
|
+
base_prefix = (flow.name + "-" + step_name).lower()
|
|
56
|
+
if len(base_prefix) > self.MAX_NAME_LENGTH:
|
|
57
|
+
base_prefix = "mf-app"
|
|
58
|
+
return base_prefix
|
|
59
|
+
|
|
60
|
+
def task_pre_step(
|
|
61
|
+
self,
|
|
62
|
+
step_name,
|
|
63
|
+
task_datastore,
|
|
64
|
+
metadata,
|
|
65
|
+
run_id,
|
|
66
|
+
task_id,
|
|
67
|
+
flow,
|
|
68
|
+
graph,
|
|
69
|
+
retry_count,
|
|
70
|
+
max_user_code_retries,
|
|
71
|
+
ubf_context,
|
|
72
|
+
inputs,
|
|
73
|
+
):
|
|
74
|
+
perimeter, api_server = PerimeterExtractor.during_metaflow_execution()
|
|
75
|
+
package_url, package_sha = self._resolve_package_url_and_sha()
|
|
76
|
+
if package_url is None or package_sha is None:
|
|
77
|
+
raise MetaflowException(
|
|
78
|
+
"METAFLOW_CODE_URL or METAFLOW_CODE_SHA is not set. "
|
|
79
|
+
"Please set METAFLOW_CODE_URL and METAFLOW_CODE_SHA in your environment."
|
|
80
|
+
)
|
|
81
|
+
image = os.environ.get("FASTBAKERY_IMAGE", None)
|
|
82
|
+
|
|
83
|
+
# TODO [Apps] - This is temporary. Backend will support longer names in the future.
|
|
84
|
+
default_name = self._resolve_default_name_prefix(flow, step_name)
|
|
85
|
+
project, branch, is_production = self._extract_project_info()
|
|
86
|
+
project_info = {}
|
|
87
|
+
if project is not None:
|
|
88
|
+
project_info["metaflow/project"] = project
|
|
89
|
+
project_info["metaflow/branch"] = branch
|
|
90
|
+
project_info["metaflow/is_production"] = is_production
|
|
91
|
+
|
|
92
|
+
default_tags = {
|
|
93
|
+
"metaflow/flow_name": flow.name,
|
|
94
|
+
"metaflow/step_name": step_name,
|
|
95
|
+
"metaflow/run_id": run_id,
|
|
96
|
+
"metaflow/task_id": task_id,
|
|
97
|
+
"metaflow/retry_count": retry_count,
|
|
98
|
+
"metaflow/pathspec": current.pathspec,
|
|
99
|
+
**project_info,
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
AppDeployer._set_state(
|
|
103
|
+
perimeter,
|
|
104
|
+
api_server,
|
|
105
|
+
code_package_url=package_url,
|
|
106
|
+
code_package_key=package_sha,
|
|
107
|
+
name_prefix=default_name,
|
|
108
|
+
image=image,
|
|
109
|
+
max_entropy=self.MAX_ENTROPY,
|
|
110
|
+
default_tags=[{k: str(v)} for k, v in default_tags.items()],
|
|
111
|
+
)
|
|
112
|
+
current._update_env(
|
|
113
|
+
{
|
|
114
|
+
"apps": apps(),
|
|
115
|
+
}
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
def task_post_step(
|
|
119
|
+
self, step_name, flow, graph, retry_count, max_user_code_retries
|
|
120
|
+
):
|
|
121
|
+
pass
|
|
122
|
+
|
|
123
|
+
def runtime_init(self, flow, graph, package, run_id):
|
|
124
|
+
# Set some more internal state.
|
|
125
|
+
self.flow = flow
|
|
126
|
+
self.graph = graph
|
|
127
|
+
self.package = package
|
|
128
|
+
self.run_id = run_id
|
|
129
|
+
|
|
130
|
+
def runtime_task_created(
|
|
131
|
+
self, task_datastore, task_id, split_index, input_paths, is_cloned, ubf_context
|
|
132
|
+
):
|
|
133
|
+
# To execute the Kubernetes job, the job container needs to have
|
|
134
|
+
# access to the code package. We store the package in the datastore
|
|
135
|
+
# which the pod is able to download as part of it's entrypoint.
|
|
136
|
+
if not is_cloned:
|
|
137
|
+
self._save_package_once(self.flow_datastore, self.package)
|
|
138
|
+
|
|
139
|
+
@classmethod
|
|
140
|
+
def _save_package_once(cls, flow_datastore, package):
|
|
141
|
+
if cls.package_url is None:
|
|
142
|
+
cls.package_url, cls.package_sha = flow_datastore.save_data(
|
|
143
|
+
[package.blob], len_hint=1
|
|
144
|
+
)[0]
|
|
145
|
+
os.environ["METAFLOW_CODE_URL"] = cls.package_url
|
|
146
|
+
os.environ["METAFLOW_CODE_SHA"] = cls.package_sha
|