ob-metaflow-extensions 1.1.45rc3__py2.py3-none-any.whl → 1.5.1__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ob-metaflow-extensions might be problematic. Click here for more details.
- metaflow_extensions/outerbounds/__init__.py +1 -7
- metaflow_extensions/outerbounds/config/__init__.py +35 -0
- metaflow_extensions/outerbounds/plugins/__init__.py +186 -57
- metaflow_extensions/outerbounds/plugins/apps/__init__.py +0 -0
- metaflow_extensions/outerbounds/plugins/apps/app_cli.py +0 -0
- metaflow_extensions/outerbounds/plugins/apps/app_utils.py +187 -0
- metaflow_extensions/outerbounds/plugins/apps/consts.py +3 -0
- metaflow_extensions/outerbounds/plugins/apps/core/__init__.py +15 -0
- metaflow_extensions/outerbounds/plugins/apps/core/_state_machine.py +506 -0
- metaflow_extensions/outerbounds/plugins/apps/core/_vendor/__init__.py +0 -0
- metaflow_extensions/outerbounds/plugins/apps/core/_vendor/spinner/__init__.py +4 -0
- metaflow_extensions/outerbounds/plugins/apps/core/_vendor/spinner/spinners.py +478 -0
- metaflow_extensions/outerbounds/plugins/apps/core/app_config.py +128 -0
- metaflow_extensions/outerbounds/plugins/apps/core/app_deploy_decorator.py +330 -0
- metaflow_extensions/outerbounds/plugins/apps/core/artifacts.py +0 -0
- metaflow_extensions/outerbounds/plugins/apps/core/capsule.py +958 -0
- metaflow_extensions/outerbounds/plugins/apps/core/click_importer.py +24 -0
- metaflow_extensions/outerbounds/plugins/apps/core/code_package/__init__.py +3 -0
- metaflow_extensions/outerbounds/plugins/apps/core/code_package/code_packager.py +618 -0
- metaflow_extensions/outerbounds/plugins/apps/core/code_package/examples.py +125 -0
- metaflow_extensions/outerbounds/plugins/apps/core/config/__init__.py +15 -0
- metaflow_extensions/outerbounds/plugins/apps/core/config/cli_generator.py +165 -0
- metaflow_extensions/outerbounds/plugins/apps/core/config/config_utils.py +966 -0
- metaflow_extensions/outerbounds/plugins/apps/core/config/schema_export.py +299 -0
- metaflow_extensions/outerbounds/plugins/apps/core/config/typed_configs.py +233 -0
- metaflow_extensions/outerbounds/plugins/apps/core/config/typed_init_generator.py +537 -0
- metaflow_extensions/outerbounds/plugins/apps/core/config/unified_config.py +1125 -0
- metaflow_extensions/outerbounds/plugins/apps/core/config_schema.yaml +337 -0
- metaflow_extensions/outerbounds/plugins/apps/core/dependencies.py +115 -0
- metaflow_extensions/outerbounds/plugins/apps/core/deployer.py +959 -0
- metaflow_extensions/outerbounds/plugins/apps/core/experimental/__init__.py +89 -0
- metaflow_extensions/outerbounds/plugins/apps/core/perimeters.py +87 -0
- metaflow_extensions/outerbounds/plugins/apps/core/secrets.py +164 -0
- metaflow_extensions/outerbounds/plugins/apps/core/utils.py +233 -0
- metaflow_extensions/outerbounds/plugins/apps/core/validations.py +17 -0
- metaflow_extensions/outerbounds/plugins/apps/deploy_decorator.py +201 -0
- metaflow_extensions/outerbounds/plugins/apps/supervisord_utils.py +243 -0
- metaflow_extensions/outerbounds/plugins/auth_server.py +28 -8
- metaflow_extensions/outerbounds/plugins/aws/__init__.py +4 -0
- metaflow_extensions/outerbounds/plugins/aws/assume_role.py +3 -0
- metaflow_extensions/outerbounds/plugins/aws/assume_role_decorator.py +118 -0
- metaflow_extensions/outerbounds/plugins/card_utilities/__init__.py +0 -0
- metaflow_extensions/outerbounds/plugins/card_utilities/async_cards.py +142 -0
- metaflow_extensions/outerbounds/plugins/card_utilities/extra_components.py +545 -0
- metaflow_extensions/outerbounds/plugins/card_utilities/injector.py +70 -0
- metaflow_extensions/outerbounds/plugins/checkpoint_datastores/__init__.py +2 -0
- metaflow_extensions/outerbounds/plugins/checkpoint_datastores/coreweave.py +71 -0
- metaflow_extensions/outerbounds/plugins/checkpoint_datastores/external_chckpt.py +85 -0
- metaflow_extensions/outerbounds/plugins/checkpoint_datastores/nebius.py +73 -0
- metaflow_extensions/outerbounds/plugins/fast_bakery/__init__.py +0 -0
- metaflow_extensions/outerbounds/plugins/fast_bakery/baker.py +110 -0
- metaflow_extensions/outerbounds/plugins/fast_bakery/docker_environment.py +391 -0
- metaflow_extensions/outerbounds/plugins/fast_bakery/fast_bakery.py +188 -0
- metaflow_extensions/outerbounds/plugins/fast_bakery/fast_bakery_cli.py +54 -0
- metaflow_extensions/outerbounds/plugins/fast_bakery/fast_bakery_decorator.py +50 -0
- metaflow_extensions/outerbounds/plugins/kubernetes/kubernetes_client.py +79 -0
- metaflow_extensions/outerbounds/plugins/kubernetes/pod_killer.py +374 -0
- metaflow_extensions/outerbounds/plugins/nim/card.py +140 -0
- metaflow_extensions/outerbounds/plugins/nim/nim_decorator.py +101 -0
- metaflow_extensions/outerbounds/plugins/nim/nim_manager.py +379 -0
- metaflow_extensions/outerbounds/plugins/nim/utils.py +36 -0
- metaflow_extensions/outerbounds/plugins/nvcf/__init__.py +0 -0
- metaflow_extensions/outerbounds/plugins/nvcf/constants.py +3 -0
- metaflow_extensions/outerbounds/plugins/nvcf/exceptions.py +94 -0
- metaflow_extensions/outerbounds/plugins/nvcf/heartbeat_store.py +178 -0
- metaflow_extensions/outerbounds/plugins/nvcf/nvcf.py +417 -0
- metaflow_extensions/outerbounds/plugins/nvcf/nvcf_cli.py +280 -0
- metaflow_extensions/outerbounds/plugins/nvcf/nvcf_decorator.py +242 -0
- metaflow_extensions/outerbounds/plugins/nvcf/utils.py +6 -0
- metaflow_extensions/outerbounds/plugins/nvct/__init__.py +0 -0
- metaflow_extensions/outerbounds/plugins/nvct/exceptions.py +71 -0
- metaflow_extensions/outerbounds/plugins/nvct/nvct.py +131 -0
- metaflow_extensions/outerbounds/plugins/nvct/nvct_cli.py +289 -0
- metaflow_extensions/outerbounds/plugins/nvct/nvct_decorator.py +286 -0
- metaflow_extensions/outerbounds/plugins/nvct/nvct_runner.py +218 -0
- metaflow_extensions/outerbounds/plugins/nvct/utils.py +29 -0
- metaflow_extensions/outerbounds/plugins/ollama/__init__.py +225 -0
- metaflow_extensions/outerbounds/plugins/ollama/constants.py +1 -0
- metaflow_extensions/outerbounds/plugins/ollama/exceptions.py +22 -0
- metaflow_extensions/outerbounds/plugins/ollama/ollama.py +1924 -0
- metaflow_extensions/outerbounds/plugins/ollama/status_card.py +292 -0
- metaflow_extensions/outerbounds/plugins/optuna/__init__.py +48 -0
- metaflow_extensions/outerbounds/plugins/perimeters.py +19 -5
- metaflow_extensions/outerbounds/plugins/profilers/deco_injector.py +70 -0
- metaflow_extensions/outerbounds/plugins/profilers/gpu_profile_decorator.py +88 -0
- metaflow_extensions/outerbounds/plugins/profilers/simple_card_decorator.py +96 -0
- metaflow_extensions/outerbounds/plugins/s3_proxy/__init__.py +7 -0
- metaflow_extensions/outerbounds/plugins/s3_proxy/binary_caller.py +132 -0
- metaflow_extensions/outerbounds/plugins/s3_proxy/constants.py +11 -0
- metaflow_extensions/outerbounds/plugins/s3_proxy/exceptions.py +13 -0
- metaflow_extensions/outerbounds/plugins/s3_proxy/proxy_bootstrap.py +59 -0
- metaflow_extensions/outerbounds/plugins/s3_proxy/s3_proxy_api.py +93 -0
- metaflow_extensions/outerbounds/plugins/s3_proxy/s3_proxy_decorator.py +250 -0
- metaflow_extensions/outerbounds/plugins/s3_proxy/s3_proxy_manager.py +225 -0
- metaflow_extensions/outerbounds/plugins/secrets/__init__.py +0 -0
- metaflow_extensions/outerbounds/plugins/secrets/secrets.py +204 -0
- metaflow_extensions/outerbounds/plugins/snowflake/__init__.py +3 -0
- metaflow_extensions/outerbounds/plugins/snowflake/snowflake.py +378 -0
- metaflow_extensions/outerbounds/plugins/snowpark/__init__.py +0 -0
- metaflow_extensions/outerbounds/plugins/snowpark/snowpark.py +309 -0
- metaflow_extensions/outerbounds/plugins/snowpark/snowpark_cli.py +277 -0
- metaflow_extensions/outerbounds/plugins/snowpark/snowpark_client.py +150 -0
- metaflow_extensions/outerbounds/plugins/snowpark/snowpark_decorator.py +273 -0
- metaflow_extensions/outerbounds/plugins/snowpark/snowpark_exceptions.py +13 -0
- metaflow_extensions/outerbounds/plugins/snowpark/snowpark_job.py +241 -0
- metaflow_extensions/outerbounds/plugins/snowpark/snowpark_service_spec.py +259 -0
- metaflow_extensions/outerbounds/plugins/tensorboard/__init__.py +50 -0
- metaflow_extensions/outerbounds/plugins/torchtune/__init__.py +163 -0
- metaflow_extensions/outerbounds/plugins/vllm/__init__.py +255 -0
- metaflow_extensions/outerbounds/plugins/vllm/constants.py +1 -0
- metaflow_extensions/outerbounds/plugins/vllm/exceptions.py +1 -0
- metaflow_extensions/outerbounds/plugins/vllm/status_card.py +352 -0
- metaflow_extensions/outerbounds/plugins/vllm/vllm_manager.py +621 -0
- metaflow_extensions/outerbounds/profilers/gpu.py +131 -47
- metaflow_extensions/outerbounds/remote_config.py +53 -16
- metaflow_extensions/outerbounds/toplevel/global_aliases_for_metaflow_package.py +138 -2
- metaflow_extensions/outerbounds/toplevel/ob_internal.py +4 -0
- metaflow_extensions/outerbounds/toplevel/plugins/ollama/__init__.py +1 -0
- metaflow_extensions/outerbounds/toplevel/plugins/optuna/__init__.py +1 -0
- metaflow_extensions/outerbounds/toplevel/plugins/snowflake/__init__.py +1 -0
- metaflow_extensions/outerbounds/toplevel/plugins/torchtune/__init__.py +1 -0
- metaflow_extensions/outerbounds/toplevel/plugins/vllm/__init__.py +1 -0
- metaflow_extensions/outerbounds/toplevel/s3_proxy.py +88 -0
- {ob_metaflow_extensions-1.1.45rc3.dist-info → ob_metaflow_extensions-1.5.1.dist-info}/METADATA +2 -2
- ob_metaflow_extensions-1.5.1.dist-info/RECORD +133 -0
- ob_metaflow_extensions-1.1.45rc3.dist-info/RECORD +0 -19
- {ob_metaflow_extensions-1.1.45rc3.dist-info → ob_metaflow_extensions-1.5.1.dist-info}/WHEEL +0 -0
- {ob_metaflow_extensions-1.1.45rc3.dist-info → ob_metaflow_extensions-1.5.1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,391 @@
|
|
|
1
|
+
import hashlib
|
|
2
|
+
import json
|
|
3
|
+
import os
|
|
4
|
+
import threading
|
|
5
|
+
import time
|
|
6
|
+
import uuid
|
|
7
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
8
|
+
from typing import Dict
|
|
9
|
+
|
|
10
|
+
from metaflow.exception import MetaflowException
|
|
11
|
+
from metaflow.metaflow_config import FAST_BAKERY_URL, get_pinned_conda_libs
|
|
12
|
+
from metaflow.metaflow_environment import MetaflowEnvironment
|
|
13
|
+
from metaflow.plugins.aws.batch.batch_decorator import BatchDecorator
|
|
14
|
+
from metaflow.plugins.kubernetes.kubernetes_decorator import KubernetesDecorator
|
|
15
|
+
from metaflow.plugins.pypi.conda_decorator import CondaStepDecorator
|
|
16
|
+
from metaflow.plugins.pypi.conda_environment import CondaEnvironment
|
|
17
|
+
from metaflow.plugins.pypi.pypi_decorator import PyPIStepDecorator
|
|
18
|
+
from metaflow import decorators
|
|
19
|
+
|
|
20
|
+
from .fast_bakery import FastBakery, FastBakeryApiResponse, FastBakeryException
|
|
21
|
+
|
|
22
|
+
BAKERY_METAFILE = ".imagebakery-cache"
|
|
23
|
+
|
|
24
|
+
import fcntl
|
|
25
|
+
import json
|
|
26
|
+
import os
|
|
27
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
28
|
+
from functools import wraps
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def cache_request(cache_file):
|
|
32
|
+
def decorator(func):
|
|
33
|
+
@wraps(func)
|
|
34
|
+
def wrapper(*args, **kwargs):
|
|
35
|
+
call_args = kwargs.copy()
|
|
36
|
+
call_args.update(zip(func.__code__.co_varnames, args))
|
|
37
|
+
call_args.pop("self", None)
|
|
38
|
+
call_args.pop("ref", None)
|
|
39
|
+
# invalidate cache when moving from one deployment to another
|
|
40
|
+
call_args.update({"fast_bakery_url": FAST_BAKERY_URL})
|
|
41
|
+
cache_key = hashlib.md5(
|
|
42
|
+
json.dumps(call_args, sort_keys=True).encode("utf-8")
|
|
43
|
+
).hexdigest()
|
|
44
|
+
|
|
45
|
+
try:
|
|
46
|
+
with open(cache_file, "r") as f:
|
|
47
|
+
cache = json.load(f)
|
|
48
|
+
if cache_key in cache:
|
|
49
|
+
return FastBakeryApiResponse(cache[cache_key])
|
|
50
|
+
except (FileNotFoundError, json.JSONDecodeError):
|
|
51
|
+
cache = {}
|
|
52
|
+
|
|
53
|
+
result = func(*args, **kwargs)
|
|
54
|
+
|
|
55
|
+
try:
|
|
56
|
+
with open(cache_file, "r+") as f:
|
|
57
|
+
fcntl.flock(f.fileno(), fcntl.LOCK_EX)
|
|
58
|
+
try:
|
|
59
|
+
f.seek(0)
|
|
60
|
+
cache = json.load(f)
|
|
61
|
+
except json.JSONDecodeError:
|
|
62
|
+
cache = {}
|
|
63
|
+
|
|
64
|
+
cache[cache_key] = result.response
|
|
65
|
+
|
|
66
|
+
f.seek(0)
|
|
67
|
+
f.truncate()
|
|
68
|
+
json.dump(cache, f)
|
|
69
|
+
except FileNotFoundError:
|
|
70
|
+
# path to cachefile might not exist.
|
|
71
|
+
os.makedirs(os.path.dirname(cache_file), exist_ok=True)
|
|
72
|
+
with open(cache_file, "w") as f:
|
|
73
|
+
fcntl.flock(f.fileno(), fcntl.LOCK_EX)
|
|
74
|
+
json.dump({cache_key: result.response}, f)
|
|
75
|
+
|
|
76
|
+
return result
|
|
77
|
+
|
|
78
|
+
return wrapper
|
|
79
|
+
|
|
80
|
+
return decorator
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
class DockerEnvironmentException(MetaflowException):
|
|
84
|
+
headline = "Ran into an error while baking image"
|
|
85
|
+
|
|
86
|
+
def __init__(self, msg):
|
|
87
|
+
super(DockerEnvironmentException, self).__init__(msg)
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
class DockerEnvironment(MetaflowEnvironment):
|
|
91
|
+
TYPE = "fast-bakery"
|
|
92
|
+
_filecache = None
|
|
93
|
+
_force_rebuild = False
|
|
94
|
+
|
|
95
|
+
def __init__(self, flow):
|
|
96
|
+
self.skipped_steps = set()
|
|
97
|
+
self.flow = flow
|
|
98
|
+
|
|
99
|
+
self.results = {}
|
|
100
|
+
self.images_baked = 0
|
|
101
|
+
|
|
102
|
+
def set_local_root(self, local_root):
|
|
103
|
+
self.local_root = local_root
|
|
104
|
+
|
|
105
|
+
def decospecs(self):
|
|
106
|
+
# Due to conflicts with the CondaEnvironment fallback and bakery,
|
|
107
|
+
# we can not simply attach 'conda' or 'pypi' to all steps here.
|
|
108
|
+
# Instead we do this on a per-step basis in init_environment
|
|
109
|
+
return ("fast_bakery_internal",) + super().decospecs()
|
|
110
|
+
|
|
111
|
+
def validate_environment(self, logger, datastore_type):
|
|
112
|
+
self.datastore_type = datastore_type
|
|
113
|
+
self.logger = logger
|
|
114
|
+
|
|
115
|
+
# Avoiding circular imports.
|
|
116
|
+
from metaflow.plugins import DATASTORES
|
|
117
|
+
|
|
118
|
+
self.datastore = [d for d in DATASTORES if d.TYPE == self.datastore_type][0]
|
|
119
|
+
|
|
120
|
+
def init_environment(self, echo):
|
|
121
|
+
self.skipped_steps = {
|
|
122
|
+
step.name for step in self.flow if not _step_executes_remotely(step)
|
|
123
|
+
}
|
|
124
|
+
# Attach environment decorator as needed. This is done on a step-by-step basis
|
|
125
|
+
# as we require a conda decorator for fallback steps, but prefer pypi for the baked ones.
|
|
126
|
+
for step in self.flow:
|
|
127
|
+
# Mixing @pypi/@conda in a single step is not supported yet.
|
|
128
|
+
# We validate this before attaching any new ones as the OSS Conda environment requires an implicit conda decorator for pypi environments which would fail the validation.
|
|
129
|
+
if sum(1 for deco in step.decorators if _is_env_deco(deco)) > 1:
|
|
130
|
+
raise MetaflowException(
|
|
131
|
+
"Mixing and matching PyPI packages and Conda packages within a\n"
|
|
132
|
+
"step is not yet supported. Use one of @pypi or @conda only for the *%s* step."
|
|
133
|
+
% step.name
|
|
134
|
+
)
|
|
135
|
+
if step.name in self.skipped_steps:
|
|
136
|
+
# Conda fallback requires a conda decorator as the default for a step
|
|
137
|
+
decorators._attach_decorators_to_step(step, ["conda"])
|
|
138
|
+
else:
|
|
139
|
+
if not _step_has_environment_deco(step):
|
|
140
|
+
# We default to PyPI for steps that are going to be baked.
|
|
141
|
+
decorators._attach_decorators_to_step(step, ["pypi"])
|
|
142
|
+
# init the attached decorator
|
|
143
|
+
# Initialize the decorator we attached.
|
|
144
|
+
# This is crucial for the conda decorator to work properly in the fallback environment
|
|
145
|
+
decorators._init(self.flow)
|
|
146
|
+
for deco in step.decorators:
|
|
147
|
+
if _is_env_deco(deco):
|
|
148
|
+
deco.step_init(
|
|
149
|
+
self.flow,
|
|
150
|
+
None, # not passing graph as it is not available, and not required by conda/pypi decorators
|
|
151
|
+
step.name,
|
|
152
|
+
step.decorators,
|
|
153
|
+
self,
|
|
154
|
+
self.datastore,
|
|
155
|
+
echo,
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
steps_to_bake = [
|
|
159
|
+
step
|
|
160
|
+
for step in self.flow
|
|
161
|
+
if step.name not in self.skipped_steps and not self.is_disabled(step)
|
|
162
|
+
]
|
|
163
|
+
if steps_to_bake:
|
|
164
|
+
self.logger("🚀 Baking container image(s) ...")
|
|
165
|
+
start_time = time.time()
|
|
166
|
+
self.results = self._bake(steps_to_bake)
|
|
167
|
+
for step in steps_to_bake:
|
|
168
|
+
for d in step.decorators:
|
|
169
|
+
if _is_remote_deco(d):
|
|
170
|
+
d.attributes["image"] = self.results[step.name].container_image
|
|
171
|
+
d.attributes["executable"] = self.results[step.name].python_path
|
|
172
|
+
if self.images_baked > 0:
|
|
173
|
+
bake_time = time.time() - start_time
|
|
174
|
+
self.logger(
|
|
175
|
+
f"🎉 All container image(s) baked in {bake_time:.2f} seconds!"
|
|
176
|
+
)
|
|
177
|
+
else:
|
|
178
|
+
self.logger("🎉 All container image(s) baked!")
|
|
179
|
+
|
|
180
|
+
if self.skipped_steps:
|
|
181
|
+
self.delegate = CondaEnvironment(self.flow)
|
|
182
|
+
self.delegate._force_rebuild = self._force_rebuild
|
|
183
|
+
self.delegate.set_local_root(self.local_root)
|
|
184
|
+
self.delegate.validate_environment(echo, self.datastore_type)
|
|
185
|
+
self.delegate.init_environment(echo, self.skipped_steps)
|
|
186
|
+
|
|
187
|
+
def _bake(self, steps) -> Dict[str, FastBakeryApiResponse]:
|
|
188
|
+
metafile_path = get_fastbakery_metafile_path(self.local_root, self.flow.name)
|
|
189
|
+
if self._force_rebuild:
|
|
190
|
+
# clear the metafile if force rebuilding, effectively skipping the cache.
|
|
191
|
+
try:
|
|
192
|
+
os.remove(metafile_path)
|
|
193
|
+
except Exception:
|
|
194
|
+
pass
|
|
195
|
+
|
|
196
|
+
logger_lock = threading.Lock()
|
|
197
|
+
|
|
198
|
+
@cache_request(metafile_path)
|
|
199
|
+
def _cached_bake(
|
|
200
|
+
ref=None,
|
|
201
|
+
python=None,
|
|
202
|
+
pypi_packages=None,
|
|
203
|
+
conda_packages=None,
|
|
204
|
+
base_image=None,
|
|
205
|
+
):
|
|
206
|
+
try:
|
|
207
|
+
bakery = FastBakery(url=FAST_BAKERY_URL)
|
|
208
|
+
bakery._reset_payload()
|
|
209
|
+
bakery.python_version(python)
|
|
210
|
+
bakery.pypi_packages(pypi_packages)
|
|
211
|
+
bakery.conda_packages(conda_packages)
|
|
212
|
+
bakery.base_image(base_image)
|
|
213
|
+
if self._force_rebuild:
|
|
214
|
+
bakery.ignore_cache()
|
|
215
|
+
|
|
216
|
+
with logger_lock:
|
|
217
|
+
self.logger(f"🍳 Baking [{ref}] ...")
|
|
218
|
+
self.logger(f" 🐍 Python: {python}")
|
|
219
|
+
|
|
220
|
+
if pypi_packages:
|
|
221
|
+
self.logger(f" 📦 PyPI packages:")
|
|
222
|
+
for package, version in pypi_packages.items():
|
|
223
|
+
self.logger(f" 🔧 {package}: {version}")
|
|
224
|
+
|
|
225
|
+
if conda_packages:
|
|
226
|
+
self.logger(f" 📦 Conda packages:")
|
|
227
|
+
for package, version in conda_packages.items():
|
|
228
|
+
self.logger(f" 🔧 {package}: {version}")
|
|
229
|
+
|
|
230
|
+
self.logger(f" 🏗️ Base image: {base_image}")
|
|
231
|
+
|
|
232
|
+
start_time = time.time()
|
|
233
|
+
res = bakery.bake()
|
|
234
|
+
# TODO: Get actual bake time from bakery
|
|
235
|
+
bake_time = time.time() - start_time
|
|
236
|
+
|
|
237
|
+
with logger_lock:
|
|
238
|
+
self.logger(f"🏁 Baked [{ref}] in {bake_time:.2f} seconds!")
|
|
239
|
+
self.images_baked += 1
|
|
240
|
+
return res
|
|
241
|
+
except FastBakeryException as ex:
|
|
242
|
+
raise DockerEnvironmentException(f"Bake [{ref}] failed: {str(ex)}")
|
|
243
|
+
|
|
244
|
+
def prepare_step(step):
|
|
245
|
+
base_image = next(
|
|
246
|
+
(
|
|
247
|
+
d.attributes.get("image")
|
|
248
|
+
for d in step.decorators
|
|
249
|
+
if isinstance(d, (KubernetesDecorator))
|
|
250
|
+
),
|
|
251
|
+
None,
|
|
252
|
+
)
|
|
253
|
+
dependencies = next(
|
|
254
|
+
(d for d in step.decorators if _is_env_deco(d)),
|
|
255
|
+
None,
|
|
256
|
+
)
|
|
257
|
+
python = next(
|
|
258
|
+
(
|
|
259
|
+
d.attributes["python"]
|
|
260
|
+
for d in step.decorators
|
|
261
|
+
if isinstance(d, CondaStepDecorator)
|
|
262
|
+
),
|
|
263
|
+
None,
|
|
264
|
+
)
|
|
265
|
+
pypi_deco = next(
|
|
266
|
+
(d for d in step.decorators if isinstance(d, PyPIStepDecorator)), None
|
|
267
|
+
)
|
|
268
|
+
# if pypi decorator is set and user has specified a python version, we must create a new environment.
|
|
269
|
+
# otherwise rely on the base environment
|
|
270
|
+
if pypi_deco is not None:
|
|
271
|
+
python = (
|
|
272
|
+
pypi_deco.attributes["python"]
|
|
273
|
+
if pypi_deco.is_attribute_user_defined("python")
|
|
274
|
+
else None
|
|
275
|
+
)
|
|
276
|
+
|
|
277
|
+
packages = get_pinned_conda_libs(python, self.datastore_type)
|
|
278
|
+
packages.update(dependencies.attributes["packages"] if dependencies else {})
|
|
279
|
+
|
|
280
|
+
requested = {
|
|
281
|
+
"python": python,
|
|
282
|
+
"pypi_packages": (
|
|
283
|
+
packages if isinstance(dependencies, PyPIStepDecorator) else None
|
|
284
|
+
),
|
|
285
|
+
"conda_packages": (
|
|
286
|
+
packages if isinstance(dependencies, CondaStepDecorator) else None
|
|
287
|
+
),
|
|
288
|
+
"base_image": base_image,
|
|
289
|
+
}
|
|
290
|
+
dedup_key = hashlib.sha256(
|
|
291
|
+
json.dumps(requested).encode("utf-8")
|
|
292
|
+
).hexdigest()
|
|
293
|
+
|
|
294
|
+
return step.name, dedup_key, requested
|
|
295
|
+
|
|
296
|
+
with ThreadPoolExecutor() as executor:
|
|
297
|
+
prepared_args = list(executor.map(prepare_step, steps))
|
|
298
|
+
# Deduplicate the requests for baking images of steps.
|
|
299
|
+
# We do not want to bake the same image twice.
|
|
300
|
+
dedup_requests = {}
|
|
301
|
+
for step_name, key, args in prepared_args:
|
|
302
|
+
if key not in dedup_requests:
|
|
303
|
+
dedup_requests[key] = {"step_names": set(), "args": args}
|
|
304
|
+
dedup_requests[key]["step_names"].add(step_name)
|
|
305
|
+
|
|
306
|
+
# unique futures
|
|
307
|
+
futures = []
|
|
308
|
+
for i, kv in enumerate(dedup_requests.items(), 1):
|
|
309
|
+
key, value = kv
|
|
310
|
+
future = executor.submit(
|
|
311
|
+
_cached_bake, **{**value["args"], "ref": f"#{i:02d}"}
|
|
312
|
+
)
|
|
313
|
+
futures.append({"step_names": value["step_names"], "future": future})
|
|
314
|
+
|
|
315
|
+
results = {}
|
|
316
|
+
for item in futures:
|
|
317
|
+
for step_name in item["step_names"]:
|
|
318
|
+
results[step_name] = item["future"].result()
|
|
319
|
+
|
|
320
|
+
return results
|
|
321
|
+
|
|
322
|
+
def executable(self, step_name, default=None):
|
|
323
|
+
if step_name in self.skipped_steps:
|
|
324
|
+
return self.delegate.executable(step_name, default)
|
|
325
|
+
# default is set to the right executable
|
|
326
|
+
if default is not None:
|
|
327
|
+
return default
|
|
328
|
+
if default is None and step_name in self.results:
|
|
329
|
+
# try to read pythonpath from results. This can happen immediately after baking.
|
|
330
|
+
return self.results[step_name].python_path
|
|
331
|
+
# we lack a default and baking results. fallback to parent executable.
|
|
332
|
+
return super().executable(step_name, default)
|
|
333
|
+
|
|
334
|
+
def interpreter(self, step_name):
|
|
335
|
+
if step_name in self.skipped_steps:
|
|
336
|
+
return self.delegate.interpreter(step_name)
|
|
337
|
+
return None
|
|
338
|
+
|
|
339
|
+
def is_disabled(self, step):
|
|
340
|
+
for decorator in step.decorators:
|
|
341
|
+
# @conda decorator is guaranteed to exist thanks to self.decospecs
|
|
342
|
+
if decorator.name in ["conda", "pypi"]:
|
|
343
|
+
# handle @conda/@pypi(disabled=True)
|
|
344
|
+
disabled = decorator.attributes["disabled"]
|
|
345
|
+
return str(disabled).lower() == "true"
|
|
346
|
+
return False
|
|
347
|
+
|
|
348
|
+
def pylint_config(self):
|
|
349
|
+
config = super().pylint_config()
|
|
350
|
+
# Disable (import-error) in pylint
|
|
351
|
+
config.append("--disable=F0401")
|
|
352
|
+
return config
|
|
353
|
+
|
|
354
|
+
def get_package_commands(
|
|
355
|
+
self, codepackage_url, datastore_type, code_package_metadata=None
|
|
356
|
+
):
|
|
357
|
+
# we must set the skip install flag at this stage in order to skip package downloads,
|
|
358
|
+
# doing so in bootstrap_commands is too late in the lifecycle.
|
|
359
|
+
return [
|
|
360
|
+
"export METAFLOW_SKIP_INSTALL_DEPENDENCIES=$FASTBAKERY_IMAGE",
|
|
361
|
+
] + super().get_package_commands(
|
|
362
|
+
codepackage_url, datastore_type, code_package_metadata=code_package_metadata
|
|
363
|
+
)
|
|
364
|
+
|
|
365
|
+
def bootstrap_commands(self, step_name, datastore_type):
|
|
366
|
+
if step_name in self.skipped_steps:
|
|
367
|
+
return self.delegate.bootstrap_commands(step_name, datastore_type)
|
|
368
|
+
return super().bootstrap_commands(step_name, datastore_type)
|
|
369
|
+
|
|
370
|
+
|
|
371
|
+
def get_fastbakery_metafile_path(local_root, flow_name):
|
|
372
|
+
return os.path.join(local_root, flow_name, BAKERY_METAFILE)
|
|
373
|
+
|
|
374
|
+
|
|
375
|
+
def _is_remote_deco(deco):
|
|
376
|
+
return isinstance(deco, (BatchDecorator, KubernetesDecorator))
|
|
377
|
+
|
|
378
|
+
|
|
379
|
+
def _step_executes_remotely(step):
|
|
380
|
+
"Check if a step is going to execute remotely or locally"
|
|
381
|
+
return any(_is_remote_deco(deco) for deco in step.decorators)
|
|
382
|
+
|
|
383
|
+
|
|
384
|
+
def _is_env_deco(deco):
|
|
385
|
+
"Check if a decorator is a known environment decorator (conda/pypi)"
|
|
386
|
+
return isinstance(deco, (PyPIStepDecorator, CondaStepDecorator))
|
|
387
|
+
|
|
388
|
+
|
|
389
|
+
def _step_has_environment_deco(step):
|
|
390
|
+
"Check if a step has a virtual environment decorator"
|
|
391
|
+
return any(_is_env_deco(deco) for deco in step.decorators)
|
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
from typing import Dict, Optional
|
|
2
|
+
import requests
|
|
3
|
+
import time
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class FastBakeryException(Exception):
|
|
7
|
+
pass
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class SolverStats:
|
|
11
|
+
def __init__(self, stats) -> None:
|
|
12
|
+
self.stats = stats
|
|
13
|
+
|
|
14
|
+
@property
|
|
15
|
+
def duration_ms(self):
|
|
16
|
+
return self.stats["durationMs"]
|
|
17
|
+
|
|
18
|
+
@property
|
|
19
|
+
def packages_in_solved_environment(self):
|
|
20
|
+
return self.stats["packagesInSolvedEnvironment"]
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class BakingStats:
|
|
24
|
+
def __init__(self, stats) -> None:
|
|
25
|
+
self.stats = stats
|
|
26
|
+
|
|
27
|
+
@property
|
|
28
|
+
def solver_stats(self) -> Optional[SolverStats]:
|
|
29
|
+
if "solverStats" not in self.stats:
|
|
30
|
+
return None
|
|
31
|
+
return SolverStats(self.stats["solverStats"])
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class FastBakeryApiResponse:
|
|
35
|
+
def __init__(self, response) -> None:
|
|
36
|
+
self.response = response
|
|
37
|
+
|
|
38
|
+
@property
|
|
39
|
+
def python_path(self) -> Optional[str]:
|
|
40
|
+
if not self.success:
|
|
41
|
+
return None
|
|
42
|
+
|
|
43
|
+
return self.response["success"]["pythonPath"]
|
|
44
|
+
|
|
45
|
+
@property
|
|
46
|
+
def container_image(self) -> Optional[str]:
|
|
47
|
+
if not self.success:
|
|
48
|
+
return None
|
|
49
|
+
|
|
50
|
+
return self.response["success"]["containerImage"]
|
|
51
|
+
|
|
52
|
+
@property
|
|
53
|
+
def success(self) -> bool:
|
|
54
|
+
return "success" in self.response
|
|
55
|
+
|
|
56
|
+
@property
|
|
57
|
+
def baking_stats(self) -> Optional[BakingStats]:
|
|
58
|
+
if not self.success:
|
|
59
|
+
return None
|
|
60
|
+
|
|
61
|
+
if "bakingStats" not in self.response["success"]:
|
|
62
|
+
return None
|
|
63
|
+
|
|
64
|
+
if self.response["success"]["bakingStats"] is None:
|
|
65
|
+
return None
|
|
66
|
+
|
|
67
|
+
return BakingStats(self.response["success"]["bakingStats"])
|
|
68
|
+
|
|
69
|
+
@property
|
|
70
|
+
def failure(self) -> bool:
|
|
71
|
+
return "failure" in self.response
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
class FastBakery:
|
|
75
|
+
def __init__(self, url: str):
|
|
76
|
+
if not url:
|
|
77
|
+
raise FastBakeryException("Specifying a url is required.")
|
|
78
|
+
self.url = url
|
|
79
|
+
self.headers = {"Content-Type": "application/json", "Connection": "keep-alive"}
|
|
80
|
+
self._reset_payload()
|
|
81
|
+
|
|
82
|
+
def _reset_payload(self):
|
|
83
|
+
self._payload = {}
|
|
84
|
+
from metaflow_extensions.outerbounds.remote_config import init_config
|
|
85
|
+
from os import environ
|
|
86
|
+
|
|
87
|
+
conf = init_config()
|
|
88
|
+
if "OBP_PERIMETER" in conf:
|
|
89
|
+
perimeter = conf["OBP_PERIMETER"]
|
|
90
|
+
else:
|
|
91
|
+
# if the perimeter is not in metaflow config, try to get it from the environment
|
|
92
|
+
perimeter = environ.get("OBP_PERIMETER", "")
|
|
93
|
+
|
|
94
|
+
self._payload["perimeterName"] = perimeter
|
|
95
|
+
|
|
96
|
+
def python_version(self, version: str):
|
|
97
|
+
self._payload["pythonVersion"] = version
|
|
98
|
+
return self
|
|
99
|
+
|
|
100
|
+
def pypi_packages(self, packages: Dict[str, str]):
|
|
101
|
+
self._payload.setdefault("pipRequirements", []).extend(
|
|
102
|
+
self._format_packages(packages)
|
|
103
|
+
)
|
|
104
|
+
return self
|
|
105
|
+
|
|
106
|
+
def conda_packages(self, packages: Dict[str, str]):
|
|
107
|
+
self._payload.setdefault("condaMatchspecs", []).extend(
|
|
108
|
+
self._format_packages(packages)
|
|
109
|
+
)
|
|
110
|
+
return self
|
|
111
|
+
|
|
112
|
+
def base_image(self, image: str):
|
|
113
|
+
self._payload["baseImage"] = {"imageReference": image}
|
|
114
|
+
return self
|
|
115
|
+
|
|
116
|
+
def image_kind(self, kind: str):
|
|
117
|
+
self._payload["imageKind"] = kind
|
|
118
|
+
return self
|
|
119
|
+
|
|
120
|
+
def ignore_cache(self):
|
|
121
|
+
self._payload["cacheBehavior"] = {
|
|
122
|
+
"responseMaxAgeSeconds": 0,
|
|
123
|
+
"layerMaxAgeSeconds": 0,
|
|
124
|
+
"baseImageMaxAgeSeconds": 0,
|
|
125
|
+
"overwriteExistingLayers": True, # Used primarily to rewrite possibly corrupted layers.
|
|
126
|
+
}
|
|
127
|
+
return self
|
|
128
|
+
|
|
129
|
+
@staticmethod
|
|
130
|
+
def _format_packages(packages: Dict[str, str]) -> list:
|
|
131
|
+
if not packages:
|
|
132
|
+
return []
|
|
133
|
+
|
|
134
|
+
def format_package(pkg: str, ver: str) -> str:
|
|
135
|
+
return (
|
|
136
|
+
f"{pkg}{ver}"
|
|
137
|
+
if any(ver.startswith(c) for c in [">", "<", "~", "@", "="])
|
|
138
|
+
else (f"{pkg}=={ver}" if ver != "" else f"{pkg}")
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
return [format_package(pkg, ver) for pkg, ver in packages.items()]
|
|
142
|
+
|
|
143
|
+
def bake(self) -> FastBakeryApiResponse:
|
|
144
|
+
if "imageKind" not in self._payload:
|
|
145
|
+
self._payload["imageKind"] = "oci-zstd" # Set default if not specified
|
|
146
|
+
|
|
147
|
+
res = self._make_request(self._payload)
|
|
148
|
+
self._reset_payload()
|
|
149
|
+
return res
|
|
150
|
+
|
|
151
|
+
def _make_request(self, payload: Dict) -> FastBakeryApiResponse:
|
|
152
|
+
try:
|
|
153
|
+
from metaflow.metaflow_config import SERVICE_HEADERS
|
|
154
|
+
|
|
155
|
+
headers = {**self.headers, **(SERVICE_HEADERS or {})}
|
|
156
|
+
except ImportError:
|
|
157
|
+
headers = self.headers
|
|
158
|
+
|
|
159
|
+
retryable_status_codes = [409]
|
|
160
|
+
|
|
161
|
+
for attempt in range(2): # 0 = initial attempt, 1-2 = retries
|
|
162
|
+
response = requests.post(self.url, json=payload, headers=headers)
|
|
163
|
+
|
|
164
|
+
if response.status_code not in retryable_status_codes:
|
|
165
|
+
break
|
|
166
|
+
|
|
167
|
+
if attempt < 2: # Don't sleep after the last attempt
|
|
168
|
+
sleep_time = 0.5 * (attempt + 1)
|
|
169
|
+
time.sleep(sleep_time)
|
|
170
|
+
|
|
171
|
+
response = requests.post(self.url, json=payload, headers=headers)
|
|
172
|
+
self._handle_error_response(response)
|
|
173
|
+
return FastBakeryApiResponse(response.json())
|
|
174
|
+
|
|
175
|
+
@staticmethod
|
|
176
|
+
def _handle_error_response(response: requests.Response):
|
|
177
|
+
if response.status_code >= 500:
|
|
178
|
+
raise FastBakeryException(f"Server error: {response.text}")
|
|
179
|
+
|
|
180
|
+
body = response.json()
|
|
181
|
+
status_code = body.get("error", {}).get("statusCode", response.status_code)
|
|
182
|
+
if status_code >= 400:
|
|
183
|
+
try:
|
|
184
|
+
raise FastBakeryException(
|
|
185
|
+
f"*{body['error']['details']['kind']}*\n{body['error']['details']['message']}"
|
|
186
|
+
)
|
|
187
|
+
except KeyError:
|
|
188
|
+
raise FastBakeryException(f"Unexpected error: {body}")
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import os
|
|
3
|
+
from metaflow._vendor import click
|
|
4
|
+
from metaflow.cli import echo_always as echo
|
|
5
|
+
from metaflow.plugins.datastores.local_storage import LocalStorage
|
|
6
|
+
|
|
7
|
+
from .docker_environment import get_fastbakery_metafile_path
|
|
8
|
+
from .fast_bakery import FastBakeryApiResponse
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@click.group()
|
|
12
|
+
def cli():
|
|
13
|
+
pass
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@cli.group(help="Commands related to Fast Bakery support.")
|
|
17
|
+
@click.pass_context
|
|
18
|
+
def fast_bakery(ctx):
|
|
19
|
+
path = LocalStorage.get_datastore_root_from_config(echo, create_on_absent=False)
|
|
20
|
+
ctx.obj.metafile_path = get_fastbakery_metafile_path(path, ctx.obj.flow.name)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@fast_bakery.command(help="Purge local Fast Bakery cache.")
|
|
24
|
+
@click.pass_obj
|
|
25
|
+
def purge(obj):
|
|
26
|
+
try:
|
|
27
|
+
os.remove(obj.metafile_path)
|
|
28
|
+
echo("Local Fast Bakery cache purged.")
|
|
29
|
+
except FileNotFoundError:
|
|
30
|
+
echo("No local Fast Bakery cache found.")
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@fast_bakery.command(help="List the cached images")
|
|
34
|
+
@click.pass_obj
|
|
35
|
+
def images(obj):
|
|
36
|
+
current_cache = None
|
|
37
|
+
try:
|
|
38
|
+
with open(obj.metafile_path, "r") as f:
|
|
39
|
+
current_cache = json.load(f)
|
|
40
|
+
except FileNotFoundError:
|
|
41
|
+
pass
|
|
42
|
+
|
|
43
|
+
if current_cache:
|
|
44
|
+
echo("List of locally cached image tags:\n")
|
|
45
|
+
|
|
46
|
+
for val in current_cache.values():
|
|
47
|
+
response = FastBakeryApiResponse(val)
|
|
48
|
+
echo(response.container_image)
|
|
49
|
+
|
|
50
|
+
echo(
|
|
51
|
+
"In order to clear the cached images, you can use the command\n *fast-bakery purge*"
|
|
52
|
+
)
|
|
53
|
+
else:
|
|
54
|
+
echo("No locally cached images.")
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from metaflow.decorators import StepDecorator
|
|
3
|
+
from metaflow.metadata_provider.metadata import MetaDatum
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class InternalFastBakeryDecorator(StepDecorator):
|
|
7
|
+
"""
|
|
8
|
+
Internal decorator to support Fast bakery
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
name = "fast_bakery_internal"
|
|
12
|
+
|
|
13
|
+
def task_pre_step(
|
|
14
|
+
self,
|
|
15
|
+
step_name,
|
|
16
|
+
task_datastore,
|
|
17
|
+
metadata,
|
|
18
|
+
run_id,
|
|
19
|
+
task_id,
|
|
20
|
+
flow,
|
|
21
|
+
graph,
|
|
22
|
+
retry_count,
|
|
23
|
+
max_retries,
|
|
24
|
+
ubf_context,
|
|
25
|
+
inputs,
|
|
26
|
+
):
|
|
27
|
+
# task_pre_step may run locally if fallback is activated for @catch
|
|
28
|
+
# decorator. In that scenario, we skip collecting Kubernetes execution
|
|
29
|
+
# metadata. A rudimentary way to detect non-local execution is to
|
|
30
|
+
# check for the existence of METAFLOW_KUBERNETES_WORKLOAD environment
|
|
31
|
+
# variable.
|
|
32
|
+
meta = {}
|
|
33
|
+
if "METAFLOW_KUBERNETES_WORKLOAD" in os.environ:
|
|
34
|
+
image = os.environ.get("FASTBAKERY_IMAGE")
|
|
35
|
+
if image:
|
|
36
|
+
meta["fast-bakery-image-name"] = image
|
|
37
|
+
|
|
38
|
+
if len(meta) > 0:
|
|
39
|
+
entries = [
|
|
40
|
+
MetaDatum(
|
|
41
|
+
field=k,
|
|
42
|
+
value=v,
|
|
43
|
+
type=k,
|
|
44
|
+
tags=["attempt_id:{0}".format(retry_count)],
|
|
45
|
+
)
|
|
46
|
+
for k, v in meta.items()
|
|
47
|
+
if v is not None
|
|
48
|
+
]
|
|
49
|
+
# Register book-keeping metadata for debugging.
|
|
50
|
+
metadata.register_metadata(run_id, step_name, task_id, entries)
|