ob-metaflow-extensions 1.1.142__py2.py3-none-any.whl → 1.4.33__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metaflow_extensions/outerbounds/__init__.py +1 -1
- metaflow_extensions/outerbounds/plugins/__init__.py +26 -5
- metaflow_extensions/outerbounds/plugins/apps/app_cli.py +0 -0
- metaflow_extensions/outerbounds/plugins/apps/app_deploy_decorator.py +146 -0
- metaflow_extensions/outerbounds/plugins/apps/core/__init__.py +10 -0
- metaflow_extensions/outerbounds/plugins/apps/core/_state_machine.py +506 -0
- metaflow_extensions/outerbounds/plugins/apps/core/_vendor/__init__.py +0 -0
- metaflow_extensions/outerbounds/plugins/apps/core/_vendor/spinner/__init__.py +4 -0
- metaflow_extensions/outerbounds/plugins/apps/core/_vendor/spinner/spinners.py +478 -0
- metaflow_extensions/outerbounds/plugins/apps/core/app_cli.py +1200 -0
- metaflow_extensions/outerbounds/plugins/apps/core/app_config.py +146 -0
- metaflow_extensions/outerbounds/plugins/apps/core/artifacts.py +0 -0
- metaflow_extensions/outerbounds/plugins/apps/core/capsule.py +958 -0
- metaflow_extensions/outerbounds/plugins/apps/core/click_importer.py +24 -0
- metaflow_extensions/outerbounds/plugins/apps/core/code_package/__init__.py +3 -0
- metaflow_extensions/outerbounds/plugins/apps/core/code_package/code_packager.py +618 -0
- metaflow_extensions/outerbounds/plugins/apps/core/code_package/examples.py +125 -0
- metaflow_extensions/outerbounds/plugins/apps/core/config/__init__.py +12 -0
- metaflow_extensions/outerbounds/plugins/apps/core/config/cli_generator.py +161 -0
- metaflow_extensions/outerbounds/plugins/apps/core/config/config_utils.py +868 -0
- metaflow_extensions/outerbounds/plugins/apps/core/config/schema_export.py +288 -0
- metaflow_extensions/outerbounds/plugins/apps/core/config/typed_configs.py +139 -0
- metaflow_extensions/outerbounds/plugins/apps/core/config/typed_init_generator.py +398 -0
- metaflow_extensions/outerbounds/plugins/apps/core/config/unified_config.py +1088 -0
- metaflow_extensions/outerbounds/plugins/apps/core/config_schema.yaml +337 -0
- metaflow_extensions/outerbounds/plugins/apps/core/dependencies.py +115 -0
- metaflow_extensions/outerbounds/plugins/apps/core/deployer.py +303 -0
- metaflow_extensions/outerbounds/plugins/apps/core/experimental/__init__.py +89 -0
- metaflow_extensions/outerbounds/plugins/apps/core/perimeters.py +87 -0
- metaflow_extensions/outerbounds/plugins/apps/core/secrets.py +164 -0
- metaflow_extensions/outerbounds/plugins/apps/core/utils.py +233 -0
- metaflow_extensions/outerbounds/plugins/apps/core/validations.py +17 -0
- metaflow_extensions/outerbounds/plugins/aws/__init__.py +4 -0
- metaflow_extensions/outerbounds/plugins/aws/assume_role.py +3 -0
- metaflow_extensions/outerbounds/plugins/aws/assume_role_decorator.py +78 -0
- metaflow_extensions/outerbounds/plugins/checkpoint_datastores/__init__.py +2 -0
- metaflow_extensions/outerbounds/plugins/checkpoint_datastores/coreweave.py +71 -0
- metaflow_extensions/outerbounds/plugins/checkpoint_datastores/external_chckpt.py +85 -0
- metaflow_extensions/outerbounds/plugins/checkpoint_datastores/nebius.py +73 -0
- metaflow_extensions/outerbounds/plugins/fast_bakery/baker.py +110 -0
- metaflow_extensions/outerbounds/plugins/fast_bakery/docker_environment.py +17 -3
- metaflow_extensions/outerbounds/plugins/fast_bakery/fast_bakery.py +1 -0
- metaflow_extensions/outerbounds/plugins/kubernetes/kubernetes_client.py +18 -44
- metaflow_extensions/outerbounds/plugins/kubernetes/pod_killer.py +374 -0
- metaflow_extensions/outerbounds/plugins/nim/card.py +1 -6
- metaflow_extensions/outerbounds/plugins/nim/{__init__.py → nim_decorator.py} +13 -49
- metaflow_extensions/outerbounds/plugins/nim/nim_manager.py +294 -233
- metaflow_extensions/outerbounds/plugins/nim/utils.py +36 -0
- metaflow_extensions/outerbounds/plugins/nvcf/constants.py +2 -2
- metaflow_extensions/outerbounds/plugins/nvcf/nvcf.py +100 -19
- metaflow_extensions/outerbounds/plugins/nvcf/nvcf_decorator.py +6 -1
- metaflow_extensions/outerbounds/plugins/nvct/__init__.py +0 -0
- metaflow_extensions/outerbounds/plugins/nvct/exceptions.py +71 -0
- metaflow_extensions/outerbounds/plugins/nvct/nvct.py +131 -0
- metaflow_extensions/outerbounds/plugins/nvct/nvct_cli.py +289 -0
- metaflow_extensions/outerbounds/plugins/nvct/nvct_decorator.py +286 -0
- metaflow_extensions/outerbounds/plugins/nvct/nvct_runner.py +218 -0
- metaflow_extensions/outerbounds/plugins/nvct/utils.py +29 -0
- metaflow_extensions/outerbounds/plugins/ollama/__init__.py +171 -16
- metaflow_extensions/outerbounds/plugins/ollama/constants.py +1 -0
- metaflow_extensions/outerbounds/plugins/ollama/exceptions.py +22 -0
- metaflow_extensions/outerbounds/plugins/ollama/ollama.py +1710 -114
- metaflow_extensions/outerbounds/plugins/ollama/status_card.py +292 -0
- metaflow_extensions/outerbounds/plugins/optuna/__init__.py +48 -0
- metaflow_extensions/outerbounds/plugins/profilers/simple_card_decorator.py +96 -0
- metaflow_extensions/outerbounds/plugins/s3_proxy/__init__.py +7 -0
- metaflow_extensions/outerbounds/plugins/s3_proxy/binary_caller.py +132 -0
- metaflow_extensions/outerbounds/plugins/s3_proxy/constants.py +11 -0
- metaflow_extensions/outerbounds/plugins/s3_proxy/exceptions.py +13 -0
- metaflow_extensions/outerbounds/plugins/s3_proxy/proxy_bootstrap.py +59 -0
- metaflow_extensions/outerbounds/plugins/s3_proxy/s3_proxy_api.py +93 -0
- metaflow_extensions/outerbounds/plugins/s3_proxy/s3_proxy_decorator.py +250 -0
- metaflow_extensions/outerbounds/plugins/s3_proxy/s3_proxy_manager.py +225 -0
- metaflow_extensions/outerbounds/plugins/secrets/secrets.py +38 -2
- metaflow_extensions/outerbounds/plugins/snowflake/snowflake.py +44 -4
- metaflow_extensions/outerbounds/plugins/snowpark/snowpark_client.py +6 -3
- metaflow_extensions/outerbounds/plugins/snowpark/snowpark_decorator.py +13 -7
- metaflow_extensions/outerbounds/plugins/snowpark/snowpark_job.py +8 -2
- metaflow_extensions/outerbounds/plugins/torchtune/__init__.py +163 -0
- metaflow_extensions/outerbounds/plugins/vllm/__init__.py +255 -0
- metaflow_extensions/outerbounds/plugins/vllm/constants.py +1 -0
- metaflow_extensions/outerbounds/plugins/vllm/exceptions.py +1 -0
- metaflow_extensions/outerbounds/plugins/vllm/status_card.py +352 -0
- metaflow_extensions/outerbounds/plugins/vllm/vllm_manager.py +621 -0
- metaflow_extensions/outerbounds/remote_config.py +27 -3
- metaflow_extensions/outerbounds/toplevel/global_aliases_for_metaflow_package.py +87 -2
- metaflow_extensions/outerbounds/toplevel/ob_internal.py +4 -0
- metaflow_extensions/outerbounds/toplevel/plugins/optuna/__init__.py +1 -0
- metaflow_extensions/outerbounds/toplevel/plugins/torchtune/__init__.py +1 -0
- metaflow_extensions/outerbounds/toplevel/plugins/vllm/__init__.py +1 -0
- metaflow_extensions/outerbounds/toplevel/s3_proxy.py +88 -0
- {ob_metaflow_extensions-1.1.142.dist-info → ob_metaflow_extensions-1.4.33.dist-info}/METADATA +2 -2
- ob_metaflow_extensions-1.4.33.dist-info/RECORD +134 -0
- metaflow_extensions/outerbounds/plugins/nim/utilities.py +0 -5
- ob_metaflow_extensions-1.1.142.dist-info/RECORD +0 -64
- {ob_metaflow_extensions-1.1.142.dist-info → ob_metaflow_extensions-1.4.33.dist-info}/WHEEL +0 -0
- {ob_metaflow_extensions-1.1.142.dist-info → ob_metaflow_extensions-1.4.33.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import metaflow.metaflow_config_funcs
|
|
2
2
|
|
|
3
|
-
from metaflow_extensions.outerbounds.remote_config import init_config
|
|
3
|
+
from metaflow_extensions.outerbounds.remote_config import init_config, reload_config
|
|
4
4
|
|
|
5
5
|
# we want to overide OSS Metaflow's initialization behavior with our own to support remote configs
|
|
6
6
|
# we're reassigning the METAFLOW_CONFIG variable because all downstream settings rely on it and
|
|
@@ -41,6 +41,9 @@ def get_boto3_session(role_arn=None, session_vars=None):
|
|
|
41
41
|
import boto3
|
|
42
42
|
import botocore
|
|
43
43
|
from metaflow_extensions.outerbounds.plugins.auth_server import get_token
|
|
44
|
+
from metaflow_extensions.outerbounds.plugins.aws.assume_role import (
|
|
45
|
+
OBP_ASSUME_ROLE_ARN_ENV_VAR,
|
|
46
|
+
)
|
|
44
47
|
|
|
45
48
|
from hashlib import sha256
|
|
46
49
|
from metaflow.util import get_username
|
|
@@ -69,6 +72,12 @@ def get_boto3_session(role_arn=None, session_vars=None):
|
|
|
69
72
|
if token_info.get("cspr_role_arn"):
|
|
70
73
|
cspr_role = token_info["cspr_role_arn"]
|
|
71
74
|
|
|
75
|
+
# Check if the assume_role decorator has set a CSPR ARN via environment variable
|
|
76
|
+
# This takes precedence over CSPR role that comes from the token_info response
|
|
77
|
+
decorator_role_arn = os.environ.get(OBP_ASSUME_ROLE_ARN_ENV_VAR)
|
|
78
|
+
if decorator_role_arn:
|
|
79
|
+
cspr_role = decorator_role_arn
|
|
80
|
+
|
|
72
81
|
if cspr_role:
|
|
73
82
|
# If CSPR role is set, we set it as the default role to assume
|
|
74
83
|
# for the AWS SDK. We do this by writing an AWS config file
|
|
@@ -162,13 +171,18 @@ class ObpAuthProvider(object):
|
|
|
162
171
|
client_params = {}
|
|
163
172
|
|
|
164
173
|
from botocore.exceptions import ClientError
|
|
174
|
+
from botocore.config import Config
|
|
165
175
|
|
|
166
176
|
with hide_access_keys():
|
|
167
177
|
session = get_boto3_session(role_arn, session_vars)
|
|
178
|
+
_client_params = client_params.copy()
|
|
179
|
+
if _client_params.get("config") and type(_client_params["config"]) == dict:
|
|
180
|
+
_client_params["config"] = Config(**_client_params["config"])
|
|
181
|
+
|
|
168
182
|
if with_error:
|
|
169
|
-
return session.client(module, **
|
|
183
|
+
return session.client(module, **_client_params), ClientError
|
|
170
184
|
else:
|
|
171
|
-
return session.client(module, **
|
|
185
|
+
return session.client(module, **_client_params)
|
|
172
186
|
|
|
173
187
|
|
|
174
188
|
AWS_CLIENT_PROVIDERS_DESC = [("obp", ".ObpAuthProvider")]
|
|
@@ -307,11 +321,13 @@ class ObpGcpAuthProvider(object):
|
|
|
307
321
|
GCP_CLIENT_PROVIDERS_DESC = [("obp", ".ObpGcpAuthProvider")]
|
|
308
322
|
CLIS_DESC = [
|
|
309
323
|
("nvidia", ".nvcf.nvcf_cli.cli"),
|
|
324
|
+
("nvct", ".nvct.nvct_cli.cli"),
|
|
310
325
|
("fast-bakery", ".fast_bakery.fast_bakery_cli.cli"),
|
|
311
326
|
("snowpark", ".snowpark.snowpark_cli.cli"),
|
|
312
327
|
]
|
|
313
328
|
STEP_DECORATORS_DESC = [
|
|
314
329
|
("nvidia", ".nvcf.nvcf_decorator.NvcfDecorator"),
|
|
330
|
+
("nvct", ".nvct.nvct_decorator.NvctDecorator"),
|
|
315
331
|
(
|
|
316
332
|
"fast_bakery_internal",
|
|
317
333
|
".fast_bakery.fast_bakery_decorator.InternalFastBakeryDecorator",
|
|
@@ -319,9 +335,14 @@ STEP_DECORATORS_DESC = [
|
|
|
319
335
|
("snowpark", ".snowpark.snowpark_decorator.SnowparkDecorator"),
|
|
320
336
|
("tensorboard", ".tensorboard.TensorboardDecorator"),
|
|
321
337
|
("gpu_profile", ".profilers.gpu_profile_decorator.GPUProfileDecorator"),
|
|
322
|
-
("
|
|
338
|
+
("test_append_card", ".profilers.simple_card_decorator.DynamicCardAppendDecorator"),
|
|
339
|
+
("nim", ".nim.nim_decorator.NimDecorator"),
|
|
323
340
|
("ollama", ".ollama.OllamaDecorator"),
|
|
324
|
-
("
|
|
341
|
+
("vllm", ".vllm.VLLMDecorator"),
|
|
342
|
+
("app_deploy", ".apps.app_deploy_decorator.AppDeployDecorator"),
|
|
343
|
+
("s3_proxy", ".s3_proxy.s3_proxy_decorator.S3ProxyDecorator"),
|
|
344
|
+
("nebius_s3_proxy", ".s3_proxy.s3_proxy_decorator.NebiusS3ProxyDecorator"),
|
|
345
|
+
("coreweave_s3_proxy", ".s3_proxy.s3_proxy_decorator.CoreWeaveS3ProxyDecorator"),
|
|
325
346
|
]
|
|
326
347
|
|
|
327
348
|
TOGGLE_STEP_DECORATOR = [
|
|
@@ -340,4 +361,4 @@ SECRETS_PROVIDERS_DESC = [
|
|
|
340
361
|
("outerbounds", ".secrets.secrets.OuterboundsSecretsProvider"),
|
|
341
362
|
]
|
|
342
363
|
# Adding an override here so the library can be imported at the metaflow.plugins level
|
|
343
|
-
__mf_promote_submodules__ = ["snowflake", "ollama"]
|
|
364
|
+
__mf_promote_submodules__ = ["snowflake", "ollama", "torchtune", "optuna"]
|
|
File without changes
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
from metaflow.exception import MetaflowException
|
|
2
|
+
from metaflow.decorators import StepDecorator
|
|
3
|
+
from metaflow import current
|
|
4
|
+
from .core import AppDeployer, apps
|
|
5
|
+
from .core.perimeters import PerimeterExtractor
|
|
6
|
+
import os
|
|
7
|
+
import hashlib
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class AppDeployDecorator(StepDecorator):
|
|
11
|
+
|
|
12
|
+
"""
|
|
13
|
+
MF Add To Current
|
|
14
|
+
-----------------
|
|
15
|
+
apps -> metaflow_extensions.outerbounds.plugins.apps.core.apps
|
|
16
|
+
|
|
17
|
+
@@ Returns
|
|
18
|
+
----------
|
|
19
|
+
apps
|
|
20
|
+
The object carrying the Deployer class to deploy apps.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
name = "app_deploy"
|
|
24
|
+
defaults = {}
|
|
25
|
+
|
|
26
|
+
package_url = None
|
|
27
|
+
package_sha = None
|
|
28
|
+
|
|
29
|
+
MAX_ENTROPY = 6
|
|
30
|
+
MAX_NAME_LENGTH = 15 - MAX_ENTROPY - 1 # -1 for the hyphen
|
|
31
|
+
|
|
32
|
+
def step_init(self, flow, graph, step, decos, environment, flow_datastore, logger):
|
|
33
|
+
self.logger = logger
|
|
34
|
+
self.environment = environment
|
|
35
|
+
self.step = step
|
|
36
|
+
self.flow_datastore = flow_datastore
|
|
37
|
+
|
|
38
|
+
def _resolve_package_url_and_sha(self):
|
|
39
|
+
return os.environ.get("METAFLOW_CODE_URL", self.package_url), os.environ.get(
|
|
40
|
+
"METAFLOW_CODE_SHA", self.package_sha
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
def _extract_project_info(self):
|
|
44
|
+
project = current.get("project_name")
|
|
45
|
+
branch = current.get("branch_name")
|
|
46
|
+
is_production = current.get("is_production")
|
|
47
|
+
return project, branch, is_production
|
|
48
|
+
|
|
49
|
+
def _resolve_default_image(self, flow):
|
|
50
|
+
# TODO : Resolve the default image over here.
|
|
51
|
+
pass
|
|
52
|
+
|
|
53
|
+
def _resolve_default_name_prefix(self, flow, step_name):
|
|
54
|
+
# TODO: Only tweek MAX_NAME_LENGTH as backend support allows longer names.
|
|
55
|
+
base_prefix = (flow.name + "-" + step_name).lower()
|
|
56
|
+
if len(base_prefix) > self.MAX_NAME_LENGTH:
|
|
57
|
+
base_prefix = "mf-app"
|
|
58
|
+
return base_prefix
|
|
59
|
+
|
|
60
|
+
def task_pre_step(
|
|
61
|
+
self,
|
|
62
|
+
step_name,
|
|
63
|
+
task_datastore,
|
|
64
|
+
metadata,
|
|
65
|
+
run_id,
|
|
66
|
+
task_id,
|
|
67
|
+
flow,
|
|
68
|
+
graph,
|
|
69
|
+
retry_count,
|
|
70
|
+
max_user_code_retries,
|
|
71
|
+
ubf_context,
|
|
72
|
+
inputs,
|
|
73
|
+
):
|
|
74
|
+
perimeter, api_server = PerimeterExtractor.during_metaflow_execution()
|
|
75
|
+
package_url, package_sha = self._resolve_package_url_and_sha()
|
|
76
|
+
if package_url is None or package_sha is None:
|
|
77
|
+
raise MetaflowException(
|
|
78
|
+
"METAFLOW_CODE_URL or METAFLOW_CODE_SHA is not set. "
|
|
79
|
+
"Please set METAFLOW_CODE_URL and METAFLOW_CODE_SHA in your environment."
|
|
80
|
+
)
|
|
81
|
+
image = os.environ.get("FASTBAKERY_IMAGE", None)
|
|
82
|
+
|
|
83
|
+
# TODO [Apps] - This is temporary. Backend will support longer names in the future.
|
|
84
|
+
default_name = self._resolve_default_name_prefix(flow, step_name)
|
|
85
|
+
project, branch, is_production = self._extract_project_info()
|
|
86
|
+
project_info = {}
|
|
87
|
+
if project is not None:
|
|
88
|
+
project_info["metaflow/project"] = project
|
|
89
|
+
project_info["metaflow/branch"] = branch
|
|
90
|
+
project_info["metaflow/is_production"] = is_production
|
|
91
|
+
|
|
92
|
+
default_tags = {
|
|
93
|
+
"metaflow/flow_name": flow.name,
|
|
94
|
+
"metaflow/step_name": step_name,
|
|
95
|
+
"metaflow/run_id": run_id,
|
|
96
|
+
"metaflow/task_id": task_id,
|
|
97
|
+
"metaflow/retry_count": retry_count,
|
|
98
|
+
"metaflow/pathspec": current.pathspec,
|
|
99
|
+
**project_info,
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
AppDeployer._set_state(
|
|
103
|
+
perimeter,
|
|
104
|
+
api_server,
|
|
105
|
+
code_package_url=package_url,
|
|
106
|
+
code_package_key=package_sha,
|
|
107
|
+
name_prefix=default_name,
|
|
108
|
+
image=image,
|
|
109
|
+
max_entropy=self.MAX_ENTROPY,
|
|
110
|
+
default_tags=[{k: str(v)} for k, v in default_tags.items()],
|
|
111
|
+
)
|
|
112
|
+
current._update_env(
|
|
113
|
+
{
|
|
114
|
+
"apps": apps(),
|
|
115
|
+
}
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
def task_post_step(
|
|
119
|
+
self, step_name, flow, graph, retry_count, max_user_code_retries
|
|
120
|
+
):
|
|
121
|
+
pass
|
|
122
|
+
|
|
123
|
+
def runtime_init(self, flow, graph, package, run_id):
|
|
124
|
+
# Set some more internal state.
|
|
125
|
+
self.flow = flow
|
|
126
|
+
self.graph = graph
|
|
127
|
+
self.package = package
|
|
128
|
+
self.run_id = run_id
|
|
129
|
+
|
|
130
|
+
def runtime_task_created(
|
|
131
|
+
self, task_datastore, task_id, split_index, input_paths, is_cloned, ubf_context
|
|
132
|
+
):
|
|
133
|
+
# To execute the Kubernetes job, the job container needs to have
|
|
134
|
+
# access to the code package. We store the package in the datastore
|
|
135
|
+
# which the pod is able to download as part of it's entrypoint.
|
|
136
|
+
if not is_cloned:
|
|
137
|
+
self._save_package_once(self.flow_datastore, self.package)
|
|
138
|
+
|
|
139
|
+
@classmethod
|
|
140
|
+
def _save_package_once(cls, flow_datastore, package):
|
|
141
|
+
if cls.package_url is None:
|
|
142
|
+
cls.package_url, cls.package_sha = flow_datastore.save_data(
|
|
143
|
+
[package.blob], len_hint=1
|
|
144
|
+
)[0]
|
|
145
|
+
os.environ["METAFLOW_CODE_URL"] = cls.package_url
|
|
146
|
+
os.environ["METAFLOW_CODE_SHA"] = cls.package_sha
|