ob-metaflow-extensions 1.1.45rc3__py2.py3-none-any.whl → 1.5.1__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ob-metaflow-extensions might be problematic. Click here for more details.

Files changed (128) hide show
  1. metaflow_extensions/outerbounds/__init__.py +1 -7
  2. metaflow_extensions/outerbounds/config/__init__.py +35 -0
  3. metaflow_extensions/outerbounds/plugins/__init__.py +186 -57
  4. metaflow_extensions/outerbounds/plugins/apps/__init__.py +0 -0
  5. metaflow_extensions/outerbounds/plugins/apps/app_cli.py +0 -0
  6. metaflow_extensions/outerbounds/plugins/apps/app_utils.py +187 -0
  7. metaflow_extensions/outerbounds/plugins/apps/consts.py +3 -0
  8. metaflow_extensions/outerbounds/plugins/apps/core/__init__.py +15 -0
  9. metaflow_extensions/outerbounds/plugins/apps/core/_state_machine.py +506 -0
  10. metaflow_extensions/outerbounds/plugins/apps/core/_vendor/__init__.py +0 -0
  11. metaflow_extensions/outerbounds/plugins/apps/core/_vendor/spinner/__init__.py +4 -0
  12. metaflow_extensions/outerbounds/plugins/apps/core/_vendor/spinner/spinners.py +478 -0
  13. metaflow_extensions/outerbounds/plugins/apps/core/app_config.py +128 -0
  14. metaflow_extensions/outerbounds/plugins/apps/core/app_deploy_decorator.py +330 -0
  15. metaflow_extensions/outerbounds/plugins/apps/core/artifacts.py +0 -0
  16. metaflow_extensions/outerbounds/plugins/apps/core/capsule.py +958 -0
  17. metaflow_extensions/outerbounds/plugins/apps/core/click_importer.py +24 -0
  18. metaflow_extensions/outerbounds/plugins/apps/core/code_package/__init__.py +3 -0
  19. metaflow_extensions/outerbounds/plugins/apps/core/code_package/code_packager.py +618 -0
  20. metaflow_extensions/outerbounds/plugins/apps/core/code_package/examples.py +125 -0
  21. metaflow_extensions/outerbounds/plugins/apps/core/config/__init__.py +15 -0
  22. metaflow_extensions/outerbounds/plugins/apps/core/config/cli_generator.py +165 -0
  23. metaflow_extensions/outerbounds/plugins/apps/core/config/config_utils.py +966 -0
  24. metaflow_extensions/outerbounds/plugins/apps/core/config/schema_export.py +299 -0
  25. metaflow_extensions/outerbounds/plugins/apps/core/config/typed_configs.py +233 -0
  26. metaflow_extensions/outerbounds/plugins/apps/core/config/typed_init_generator.py +537 -0
  27. metaflow_extensions/outerbounds/plugins/apps/core/config/unified_config.py +1125 -0
  28. metaflow_extensions/outerbounds/plugins/apps/core/config_schema.yaml +337 -0
  29. metaflow_extensions/outerbounds/plugins/apps/core/dependencies.py +115 -0
  30. metaflow_extensions/outerbounds/plugins/apps/core/deployer.py +959 -0
  31. metaflow_extensions/outerbounds/plugins/apps/core/experimental/__init__.py +89 -0
  32. metaflow_extensions/outerbounds/plugins/apps/core/perimeters.py +87 -0
  33. metaflow_extensions/outerbounds/plugins/apps/core/secrets.py +164 -0
  34. metaflow_extensions/outerbounds/plugins/apps/core/utils.py +233 -0
  35. metaflow_extensions/outerbounds/plugins/apps/core/validations.py +17 -0
  36. metaflow_extensions/outerbounds/plugins/apps/deploy_decorator.py +201 -0
  37. metaflow_extensions/outerbounds/plugins/apps/supervisord_utils.py +243 -0
  38. metaflow_extensions/outerbounds/plugins/auth_server.py +28 -8
  39. metaflow_extensions/outerbounds/plugins/aws/__init__.py +4 -0
  40. metaflow_extensions/outerbounds/plugins/aws/assume_role.py +3 -0
  41. metaflow_extensions/outerbounds/plugins/aws/assume_role_decorator.py +118 -0
  42. metaflow_extensions/outerbounds/plugins/card_utilities/__init__.py +0 -0
  43. metaflow_extensions/outerbounds/plugins/card_utilities/async_cards.py +142 -0
  44. metaflow_extensions/outerbounds/plugins/card_utilities/extra_components.py +545 -0
  45. metaflow_extensions/outerbounds/plugins/card_utilities/injector.py +70 -0
  46. metaflow_extensions/outerbounds/plugins/checkpoint_datastores/__init__.py +2 -0
  47. metaflow_extensions/outerbounds/plugins/checkpoint_datastores/coreweave.py +71 -0
  48. metaflow_extensions/outerbounds/plugins/checkpoint_datastores/external_chckpt.py +85 -0
  49. metaflow_extensions/outerbounds/plugins/checkpoint_datastores/nebius.py +73 -0
  50. metaflow_extensions/outerbounds/plugins/fast_bakery/__init__.py +0 -0
  51. metaflow_extensions/outerbounds/plugins/fast_bakery/baker.py +110 -0
  52. metaflow_extensions/outerbounds/plugins/fast_bakery/docker_environment.py +391 -0
  53. metaflow_extensions/outerbounds/plugins/fast_bakery/fast_bakery.py +188 -0
  54. metaflow_extensions/outerbounds/plugins/fast_bakery/fast_bakery_cli.py +54 -0
  55. metaflow_extensions/outerbounds/plugins/fast_bakery/fast_bakery_decorator.py +50 -0
  56. metaflow_extensions/outerbounds/plugins/kubernetes/kubernetes_client.py +79 -0
  57. metaflow_extensions/outerbounds/plugins/kubernetes/pod_killer.py +374 -0
  58. metaflow_extensions/outerbounds/plugins/nim/card.py +140 -0
  59. metaflow_extensions/outerbounds/plugins/nim/nim_decorator.py +101 -0
  60. metaflow_extensions/outerbounds/plugins/nim/nim_manager.py +379 -0
  61. metaflow_extensions/outerbounds/plugins/nim/utils.py +36 -0
  62. metaflow_extensions/outerbounds/plugins/nvcf/__init__.py +0 -0
  63. metaflow_extensions/outerbounds/plugins/nvcf/constants.py +3 -0
  64. metaflow_extensions/outerbounds/plugins/nvcf/exceptions.py +94 -0
  65. metaflow_extensions/outerbounds/plugins/nvcf/heartbeat_store.py +178 -0
  66. metaflow_extensions/outerbounds/plugins/nvcf/nvcf.py +417 -0
  67. metaflow_extensions/outerbounds/plugins/nvcf/nvcf_cli.py +280 -0
  68. metaflow_extensions/outerbounds/plugins/nvcf/nvcf_decorator.py +242 -0
  69. metaflow_extensions/outerbounds/plugins/nvcf/utils.py +6 -0
  70. metaflow_extensions/outerbounds/plugins/nvct/__init__.py +0 -0
  71. metaflow_extensions/outerbounds/plugins/nvct/exceptions.py +71 -0
  72. metaflow_extensions/outerbounds/plugins/nvct/nvct.py +131 -0
  73. metaflow_extensions/outerbounds/plugins/nvct/nvct_cli.py +289 -0
  74. metaflow_extensions/outerbounds/plugins/nvct/nvct_decorator.py +286 -0
  75. metaflow_extensions/outerbounds/plugins/nvct/nvct_runner.py +218 -0
  76. metaflow_extensions/outerbounds/plugins/nvct/utils.py +29 -0
  77. metaflow_extensions/outerbounds/plugins/ollama/__init__.py +225 -0
  78. metaflow_extensions/outerbounds/plugins/ollama/constants.py +1 -0
  79. metaflow_extensions/outerbounds/plugins/ollama/exceptions.py +22 -0
  80. metaflow_extensions/outerbounds/plugins/ollama/ollama.py +1924 -0
  81. metaflow_extensions/outerbounds/plugins/ollama/status_card.py +292 -0
  82. metaflow_extensions/outerbounds/plugins/optuna/__init__.py +48 -0
  83. metaflow_extensions/outerbounds/plugins/perimeters.py +19 -5
  84. metaflow_extensions/outerbounds/plugins/profilers/deco_injector.py +70 -0
  85. metaflow_extensions/outerbounds/plugins/profilers/gpu_profile_decorator.py +88 -0
  86. metaflow_extensions/outerbounds/plugins/profilers/simple_card_decorator.py +96 -0
  87. metaflow_extensions/outerbounds/plugins/s3_proxy/__init__.py +7 -0
  88. metaflow_extensions/outerbounds/plugins/s3_proxy/binary_caller.py +132 -0
  89. metaflow_extensions/outerbounds/plugins/s3_proxy/constants.py +11 -0
  90. metaflow_extensions/outerbounds/plugins/s3_proxy/exceptions.py +13 -0
  91. metaflow_extensions/outerbounds/plugins/s3_proxy/proxy_bootstrap.py +59 -0
  92. metaflow_extensions/outerbounds/plugins/s3_proxy/s3_proxy_api.py +93 -0
  93. metaflow_extensions/outerbounds/plugins/s3_proxy/s3_proxy_decorator.py +250 -0
  94. metaflow_extensions/outerbounds/plugins/s3_proxy/s3_proxy_manager.py +225 -0
  95. metaflow_extensions/outerbounds/plugins/secrets/__init__.py +0 -0
  96. metaflow_extensions/outerbounds/plugins/secrets/secrets.py +204 -0
  97. metaflow_extensions/outerbounds/plugins/snowflake/__init__.py +3 -0
  98. metaflow_extensions/outerbounds/plugins/snowflake/snowflake.py +378 -0
  99. metaflow_extensions/outerbounds/plugins/snowpark/__init__.py +0 -0
  100. metaflow_extensions/outerbounds/plugins/snowpark/snowpark.py +309 -0
  101. metaflow_extensions/outerbounds/plugins/snowpark/snowpark_cli.py +277 -0
  102. metaflow_extensions/outerbounds/plugins/snowpark/snowpark_client.py +150 -0
  103. metaflow_extensions/outerbounds/plugins/snowpark/snowpark_decorator.py +273 -0
  104. metaflow_extensions/outerbounds/plugins/snowpark/snowpark_exceptions.py +13 -0
  105. metaflow_extensions/outerbounds/plugins/snowpark/snowpark_job.py +241 -0
  106. metaflow_extensions/outerbounds/plugins/snowpark/snowpark_service_spec.py +259 -0
  107. metaflow_extensions/outerbounds/plugins/tensorboard/__init__.py +50 -0
  108. metaflow_extensions/outerbounds/plugins/torchtune/__init__.py +163 -0
  109. metaflow_extensions/outerbounds/plugins/vllm/__init__.py +255 -0
  110. metaflow_extensions/outerbounds/plugins/vllm/constants.py +1 -0
  111. metaflow_extensions/outerbounds/plugins/vllm/exceptions.py +1 -0
  112. metaflow_extensions/outerbounds/plugins/vllm/status_card.py +352 -0
  113. metaflow_extensions/outerbounds/plugins/vllm/vllm_manager.py +621 -0
  114. metaflow_extensions/outerbounds/profilers/gpu.py +131 -47
  115. metaflow_extensions/outerbounds/remote_config.py +53 -16
  116. metaflow_extensions/outerbounds/toplevel/global_aliases_for_metaflow_package.py +138 -2
  117. metaflow_extensions/outerbounds/toplevel/ob_internal.py +4 -0
  118. metaflow_extensions/outerbounds/toplevel/plugins/ollama/__init__.py +1 -0
  119. metaflow_extensions/outerbounds/toplevel/plugins/optuna/__init__.py +1 -0
  120. metaflow_extensions/outerbounds/toplevel/plugins/snowflake/__init__.py +1 -0
  121. metaflow_extensions/outerbounds/toplevel/plugins/torchtune/__init__.py +1 -0
  122. metaflow_extensions/outerbounds/toplevel/plugins/vllm/__init__.py +1 -0
  123. metaflow_extensions/outerbounds/toplevel/s3_proxy.py +88 -0
  124. {ob_metaflow_extensions-1.1.45rc3.dist-info → ob_metaflow_extensions-1.5.1.dist-info}/METADATA +2 -2
  125. ob_metaflow_extensions-1.5.1.dist-info/RECORD +133 -0
  126. ob_metaflow_extensions-1.1.45rc3.dist-info/RECORD +0 -19
  127. {ob_metaflow_extensions-1.1.45rc3.dist-info → ob_metaflow_extensions-1.5.1.dist-info}/WHEEL +0 -0
  128. {ob_metaflow_extensions-1.1.45rc3.dist-info → ob_metaflow_extensions-1.5.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,391 @@
1
+ import hashlib
2
+ import json
3
+ import os
4
+ import threading
5
+ import time
6
+ import uuid
7
+ from concurrent.futures import ThreadPoolExecutor
8
+ from typing import Dict
9
+
10
+ from metaflow.exception import MetaflowException
11
+ from metaflow.metaflow_config import FAST_BAKERY_URL, get_pinned_conda_libs
12
+ from metaflow.metaflow_environment import MetaflowEnvironment
13
+ from metaflow.plugins.aws.batch.batch_decorator import BatchDecorator
14
+ from metaflow.plugins.kubernetes.kubernetes_decorator import KubernetesDecorator
15
+ from metaflow.plugins.pypi.conda_decorator import CondaStepDecorator
16
+ from metaflow.plugins.pypi.conda_environment import CondaEnvironment
17
+ from metaflow.plugins.pypi.pypi_decorator import PyPIStepDecorator
18
+ from metaflow import decorators
19
+
20
+ from .fast_bakery import FastBakery, FastBakeryApiResponse, FastBakeryException
21
+
22
+ BAKERY_METAFILE = ".imagebakery-cache"
23
+
24
+ import fcntl
25
+ import json
26
+ import os
27
+ from concurrent.futures import ThreadPoolExecutor
28
+ from functools import wraps
29
+
30
+
31
+ def cache_request(cache_file):
32
+ def decorator(func):
33
+ @wraps(func)
34
+ def wrapper(*args, **kwargs):
35
+ call_args = kwargs.copy()
36
+ call_args.update(zip(func.__code__.co_varnames, args))
37
+ call_args.pop("self", None)
38
+ call_args.pop("ref", None)
39
+ # invalidate cache when moving from one deployment to another
40
+ call_args.update({"fast_bakery_url": FAST_BAKERY_URL})
41
+ cache_key = hashlib.md5(
42
+ json.dumps(call_args, sort_keys=True).encode("utf-8")
43
+ ).hexdigest()
44
+
45
+ try:
46
+ with open(cache_file, "r") as f:
47
+ cache = json.load(f)
48
+ if cache_key in cache:
49
+ return FastBakeryApiResponse(cache[cache_key])
50
+ except (FileNotFoundError, json.JSONDecodeError):
51
+ cache = {}
52
+
53
+ result = func(*args, **kwargs)
54
+
55
+ try:
56
+ with open(cache_file, "r+") as f:
57
+ fcntl.flock(f.fileno(), fcntl.LOCK_EX)
58
+ try:
59
+ f.seek(0)
60
+ cache = json.load(f)
61
+ except json.JSONDecodeError:
62
+ cache = {}
63
+
64
+ cache[cache_key] = result.response
65
+
66
+ f.seek(0)
67
+ f.truncate()
68
+ json.dump(cache, f)
69
+ except FileNotFoundError:
70
+ # path to cachefile might not exist.
71
+ os.makedirs(os.path.dirname(cache_file), exist_ok=True)
72
+ with open(cache_file, "w") as f:
73
+ fcntl.flock(f.fileno(), fcntl.LOCK_EX)
74
+ json.dump({cache_key: result.response}, f)
75
+
76
+ return result
77
+
78
+ return wrapper
79
+
80
+ return decorator
81
+
82
+
83
+ class DockerEnvironmentException(MetaflowException):
84
+ headline = "Ran into an error while baking image"
85
+
86
+ def __init__(self, msg):
87
+ super(DockerEnvironmentException, self).__init__(msg)
88
+
89
+
90
+ class DockerEnvironment(MetaflowEnvironment):
91
+ TYPE = "fast-bakery"
92
+ _filecache = None
93
+ _force_rebuild = False
94
+
95
+ def __init__(self, flow):
96
+ self.skipped_steps = set()
97
+ self.flow = flow
98
+
99
+ self.results = {}
100
+ self.images_baked = 0
101
+
102
+ def set_local_root(self, local_root):
103
+ self.local_root = local_root
104
+
105
+ def decospecs(self):
106
+ # Due to conflicts with the CondaEnvironment fallback and bakery,
107
+ # we can not simply attach 'conda' or 'pypi' to all steps here.
108
+ # Instead we do this on a per-step basis in init_environment
109
+ return ("fast_bakery_internal",) + super().decospecs()
110
+
111
+ def validate_environment(self, logger, datastore_type):
112
+ self.datastore_type = datastore_type
113
+ self.logger = logger
114
+
115
+ # Avoiding circular imports.
116
+ from metaflow.plugins import DATASTORES
117
+
118
+ self.datastore = [d for d in DATASTORES if d.TYPE == self.datastore_type][0]
119
+
120
+ def init_environment(self, echo):
121
+ self.skipped_steps = {
122
+ step.name for step in self.flow if not _step_executes_remotely(step)
123
+ }
124
+ # Attach environment decorator as needed. This is done on a step-by-step basis
125
+ # as we require a conda decorator for fallback steps, but prefer pypi for the baked ones.
126
+ for step in self.flow:
127
+ # Mixing @pypi/@conda in a single step is not supported yet.
128
+ # We validate this before attaching any new ones as the OSS Conda environment requires an implicit conda decorator for pypi environments which would fail the validation.
129
+ if sum(1 for deco in step.decorators if _is_env_deco(deco)) > 1:
130
+ raise MetaflowException(
131
+ "Mixing and matching PyPI packages and Conda packages within a\n"
132
+ "step is not yet supported. Use one of @pypi or @conda only for the *%s* step."
133
+ % step.name
134
+ )
135
+ if step.name in self.skipped_steps:
136
+ # Conda fallback requires a conda decorator as the default for a step
137
+ decorators._attach_decorators_to_step(step, ["conda"])
138
+ else:
139
+ if not _step_has_environment_deco(step):
140
+ # We default to PyPI for steps that are going to be baked.
141
+ decorators._attach_decorators_to_step(step, ["pypi"])
142
+ # init the attached decorator
143
+ # Initialize the decorator we attached.
144
+ # This is crucial for the conda decorator to work properly in the fallback environment
145
+ decorators._init(self.flow)
146
+ for deco in step.decorators:
147
+ if _is_env_deco(deco):
148
+ deco.step_init(
149
+ self.flow,
150
+ None, # not passing graph as it is not available, and not required by conda/pypi decorators
151
+ step.name,
152
+ step.decorators,
153
+ self,
154
+ self.datastore,
155
+ echo,
156
+ )
157
+
158
+ steps_to_bake = [
159
+ step
160
+ for step in self.flow
161
+ if step.name not in self.skipped_steps and not self.is_disabled(step)
162
+ ]
163
+ if steps_to_bake:
164
+ self.logger("🚀 Baking container image(s) ...")
165
+ start_time = time.time()
166
+ self.results = self._bake(steps_to_bake)
167
+ for step in steps_to_bake:
168
+ for d in step.decorators:
169
+ if _is_remote_deco(d):
170
+ d.attributes["image"] = self.results[step.name].container_image
171
+ d.attributes["executable"] = self.results[step.name].python_path
172
+ if self.images_baked > 0:
173
+ bake_time = time.time() - start_time
174
+ self.logger(
175
+ f"🎉 All container image(s) baked in {bake_time:.2f} seconds!"
176
+ )
177
+ else:
178
+ self.logger("🎉 All container image(s) baked!")
179
+
180
+ if self.skipped_steps:
181
+ self.delegate = CondaEnvironment(self.flow)
182
+ self.delegate._force_rebuild = self._force_rebuild
183
+ self.delegate.set_local_root(self.local_root)
184
+ self.delegate.validate_environment(echo, self.datastore_type)
185
+ self.delegate.init_environment(echo, self.skipped_steps)
186
+
187
+ def _bake(self, steps) -> Dict[str, FastBakeryApiResponse]:
188
+ metafile_path = get_fastbakery_metafile_path(self.local_root, self.flow.name)
189
+ if self._force_rebuild:
190
+ # clear the metafile if force rebuilding, effectively skipping the cache.
191
+ try:
192
+ os.remove(metafile_path)
193
+ except Exception:
194
+ pass
195
+
196
+ logger_lock = threading.Lock()
197
+
198
+ @cache_request(metafile_path)
199
+ def _cached_bake(
200
+ ref=None,
201
+ python=None,
202
+ pypi_packages=None,
203
+ conda_packages=None,
204
+ base_image=None,
205
+ ):
206
+ try:
207
+ bakery = FastBakery(url=FAST_BAKERY_URL)
208
+ bakery._reset_payload()
209
+ bakery.python_version(python)
210
+ bakery.pypi_packages(pypi_packages)
211
+ bakery.conda_packages(conda_packages)
212
+ bakery.base_image(base_image)
213
+ if self._force_rebuild:
214
+ bakery.ignore_cache()
215
+
216
+ with logger_lock:
217
+ self.logger(f"🍳 Baking [{ref}] ...")
218
+ self.logger(f" 🐍 Python: {python}")
219
+
220
+ if pypi_packages:
221
+ self.logger(f" 📦 PyPI packages:")
222
+ for package, version in pypi_packages.items():
223
+ self.logger(f" 🔧 {package}: {version}")
224
+
225
+ if conda_packages:
226
+ self.logger(f" 📦 Conda packages:")
227
+ for package, version in conda_packages.items():
228
+ self.logger(f" 🔧 {package}: {version}")
229
+
230
+ self.logger(f" 🏗️ Base image: {base_image}")
231
+
232
+ start_time = time.time()
233
+ res = bakery.bake()
234
+ # TODO: Get actual bake time from bakery
235
+ bake_time = time.time() - start_time
236
+
237
+ with logger_lock:
238
+ self.logger(f"🏁 Baked [{ref}] in {bake_time:.2f} seconds!")
239
+ self.images_baked += 1
240
+ return res
241
+ except FastBakeryException as ex:
242
+ raise DockerEnvironmentException(f"Bake [{ref}] failed: {str(ex)}")
243
+
244
+ def prepare_step(step):
245
+ base_image = next(
246
+ (
247
+ d.attributes.get("image")
248
+ for d in step.decorators
249
+ if isinstance(d, (KubernetesDecorator))
250
+ ),
251
+ None,
252
+ )
253
+ dependencies = next(
254
+ (d for d in step.decorators if _is_env_deco(d)),
255
+ None,
256
+ )
257
+ python = next(
258
+ (
259
+ d.attributes["python"]
260
+ for d in step.decorators
261
+ if isinstance(d, CondaStepDecorator)
262
+ ),
263
+ None,
264
+ )
265
+ pypi_deco = next(
266
+ (d for d in step.decorators if isinstance(d, PyPIStepDecorator)), None
267
+ )
268
+ # if pypi decorator is set and user has specified a python version, we must create a new environment.
269
+ # otherwise rely on the base environment
270
+ if pypi_deco is not None:
271
+ python = (
272
+ pypi_deco.attributes["python"]
273
+ if pypi_deco.is_attribute_user_defined("python")
274
+ else None
275
+ )
276
+
277
+ packages = get_pinned_conda_libs(python, self.datastore_type)
278
+ packages.update(dependencies.attributes["packages"] if dependencies else {})
279
+
280
+ requested = {
281
+ "python": python,
282
+ "pypi_packages": (
283
+ packages if isinstance(dependencies, PyPIStepDecorator) else None
284
+ ),
285
+ "conda_packages": (
286
+ packages if isinstance(dependencies, CondaStepDecorator) else None
287
+ ),
288
+ "base_image": base_image,
289
+ }
290
+ dedup_key = hashlib.sha256(
291
+ json.dumps(requested).encode("utf-8")
292
+ ).hexdigest()
293
+
294
+ return step.name, dedup_key, requested
295
+
296
+ with ThreadPoolExecutor() as executor:
297
+ prepared_args = list(executor.map(prepare_step, steps))
298
+ # Deduplicate the requests for baking images of steps.
299
+ # We do not want to bake the same image twice.
300
+ dedup_requests = {}
301
+ for step_name, key, args in prepared_args:
302
+ if key not in dedup_requests:
303
+ dedup_requests[key] = {"step_names": set(), "args": args}
304
+ dedup_requests[key]["step_names"].add(step_name)
305
+
306
+ # unique futures
307
+ futures = []
308
+ for i, kv in enumerate(dedup_requests.items(), 1):
309
+ key, value = kv
310
+ future = executor.submit(
311
+ _cached_bake, **{**value["args"], "ref": f"#{i:02d}"}
312
+ )
313
+ futures.append({"step_names": value["step_names"], "future": future})
314
+
315
+ results = {}
316
+ for item in futures:
317
+ for step_name in item["step_names"]:
318
+ results[step_name] = item["future"].result()
319
+
320
+ return results
321
+
322
+ def executable(self, step_name, default=None):
323
+ if step_name in self.skipped_steps:
324
+ return self.delegate.executable(step_name, default)
325
+ # default is set to the right executable
326
+ if default is not None:
327
+ return default
328
+ if default is None and step_name in self.results:
329
+ # try to read pythonpath from results. This can happen immediately after baking.
330
+ return self.results[step_name].python_path
331
+ # we lack a default and baking results. fallback to parent executable.
332
+ return super().executable(step_name, default)
333
+
334
+ def interpreter(self, step_name):
335
+ if step_name in self.skipped_steps:
336
+ return self.delegate.interpreter(step_name)
337
+ return None
338
+
339
+ def is_disabled(self, step):
340
+ for decorator in step.decorators:
341
+ # @conda decorator is guaranteed to exist thanks to self.decospecs
342
+ if decorator.name in ["conda", "pypi"]:
343
+ # handle @conda/@pypi(disabled=True)
344
+ disabled = decorator.attributes["disabled"]
345
+ return str(disabled).lower() == "true"
346
+ return False
347
+
348
+ def pylint_config(self):
349
+ config = super().pylint_config()
350
+ # Disable (import-error) in pylint
351
+ config.append("--disable=F0401")
352
+ return config
353
+
354
+ def get_package_commands(
355
+ self, codepackage_url, datastore_type, code_package_metadata=None
356
+ ):
357
+ # we must set the skip install flag at this stage in order to skip package downloads,
358
+ # doing so in bootstrap_commands is too late in the lifecycle.
359
+ return [
360
+ "export METAFLOW_SKIP_INSTALL_DEPENDENCIES=$FASTBAKERY_IMAGE",
361
+ ] + super().get_package_commands(
362
+ codepackage_url, datastore_type, code_package_metadata=code_package_metadata
363
+ )
364
+
365
+ def bootstrap_commands(self, step_name, datastore_type):
366
+ if step_name in self.skipped_steps:
367
+ return self.delegate.bootstrap_commands(step_name, datastore_type)
368
+ return super().bootstrap_commands(step_name, datastore_type)
369
+
370
+
371
+ def get_fastbakery_metafile_path(local_root, flow_name):
372
+ return os.path.join(local_root, flow_name, BAKERY_METAFILE)
373
+
374
+
375
+ def _is_remote_deco(deco):
376
+ return isinstance(deco, (BatchDecorator, KubernetesDecorator))
377
+
378
+
379
+ def _step_executes_remotely(step):
380
+ "Check if a step is going to execute remotely or locally"
381
+ return any(_is_remote_deco(deco) for deco in step.decorators)
382
+
383
+
384
+ def _is_env_deco(deco):
385
+ "Check if a decorator is a known environment decorator (conda/pypi)"
386
+ return isinstance(deco, (PyPIStepDecorator, CondaStepDecorator))
387
+
388
+
389
+ def _step_has_environment_deco(step):
390
+ "Check if a step has a virtual environment decorator"
391
+ return any(_is_env_deco(deco) for deco in step.decorators)
@@ -0,0 +1,188 @@
1
+ from typing import Dict, Optional
2
+ import requests
3
+ import time
4
+
5
+
6
+ class FastBakeryException(Exception):
7
+ pass
8
+
9
+
10
+ class SolverStats:
11
+ def __init__(self, stats) -> None:
12
+ self.stats = stats
13
+
14
+ @property
15
+ def duration_ms(self):
16
+ return self.stats["durationMs"]
17
+
18
+ @property
19
+ def packages_in_solved_environment(self):
20
+ return self.stats["packagesInSolvedEnvironment"]
21
+
22
+
23
+ class BakingStats:
24
+ def __init__(self, stats) -> None:
25
+ self.stats = stats
26
+
27
+ @property
28
+ def solver_stats(self) -> Optional[SolverStats]:
29
+ if "solverStats" not in self.stats:
30
+ return None
31
+ return SolverStats(self.stats["solverStats"])
32
+
33
+
34
+ class FastBakeryApiResponse:
35
+ def __init__(self, response) -> None:
36
+ self.response = response
37
+
38
+ @property
39
+ def python_path(self) -> Optional[str]:
40
+ if not self.success:
41
+ return None
42
+
43
+ return self.response["success"]["pythonPath"]
44
+
45
+ @property
46
+ def container_image(self) -> Optional[str]:
47
+ if not self.success:
48
+ return None
49
+
50
+ return self.response["success"]["containerImage"]
51
+
52
+ @property
53
+ def success(self) -> bool:
54
+ return "success" in self.response
55
+
56
+ @property
57
+ def baking_stats(self) -> Optional[BakingStats]:
58
+ if not self.success:
59
+ return None
60
+
61
+ if "bakingStats" not in self.response["success"]:
62
+ return None
63
+
64
+ if self.response["success"]["bakingStats"] is None:
65
+ return None
66
+
67
+ return BakingStats(self.response["success"]["bakingStats"])
68
+
69
+ @property
70
+ def failure(self) -> bool:
71
+ return "failure" in self.response
72
+
73
+
74
+ class FastBakery:
75
+ def __init__(self, url: str):
76
+ if not url:
77
+ raise FastBakeryException("Specifying a url is required.")
78
+ self.url = url
79
+ self.headers = {"Content-Type": "application/json", "Connection": "keep-alive"}
80
+ self._reset_payload()
81
+
82
+ def _reset_payload(self):
83
+ self._payload = {}
84
+ from metaflow_extensions.outerbounds.remote_config import init_config
85
+ from os import environ
86
+
87
+ conf = init_config()
88
+ if "OBP_PERIMETER" in conf:
89
+ perimeter = conf["OBP_PERIMETER"]
90
+ else:
91
+ # if the perimeter is not in metaflow config, try to get it from the environment
92
+ perimeter = environ.get("OBP_PERIMETER", "")
93
+
94
+ self._payload["perimeterName"] = perimeter
95
+
96
+ def python_version(self, version: str):
97
+ self._payload["pythonVersion"] = version
98
+ return self
99
+
100
+ def pypi_packages(self, packages: Dict[str, str]):
101
+ self._payload.setdefault("pipRequirements", []).extend(
102
+ self._format_packages(packages)
103
+ )
104
+ return self
105
+
106
+ def conda_packages(self, packages: Dict[str, str]):
107
+ self._payload.setdefault("condaMatchspecs", []).extend(
108
+ self._format_packages(packages)
109
+ )
110
+ return self
111
+
112
+ def base_image(self, image: str):
113
+ self._payload["baseImage"] = {"imageReference": image}
114
+ return self
115
+
116
+ def image_kind(self, kind: str):
117
+ self._payload["imageKind"] = kind
118
+ return self
119
+
120
+ def ignore_cache(self):
121
+ self._payload["cacheBehavior"] = {
122
+ "responseMaxAgeSeconds": 0,
123
+ "layerMaxAgeSeconds": 0,
124
+ "baseImageMaxAgeSeconds": 0,
125
+ "overwriteExistingLayers": True, # Used primarily to rewrite possibly corrupted layers.
126
+ }
127
+ return self
128
+
129
+ @staticmethod
130
+ def _format_packages(packages: Dict[str, str]) -> list:
131
+ if not packages:
132
+ return []
133
+
134
+ def format_package(pkg: str, ver: str) -> str:
135
+ return (
136
+ f"{pkg}{ver}"
137
+ if any(ver.startswith(c) for c in [">", "<", "~", "@", "="])
138
+ else (f"{pkg}=={ver}" if ver != "" else f"{pkg}")
139
+ )
140
+
141
+ return [format_package(pkg, ver) for pkg, ver in packages.items()]
142
+
143
+ def bake(self) -> FastBakeryApiResponse:
144
+ if "imageKind" not in self._payload:
145
+ self._payload["imageKind"] = "oci-zstd" # Set default if not specified
146
+
147
+ res = self._make_request(self._payload)
148
+ self._reset_payload()
149
+ return res
150
+
151
+ def _make_request(self, payload: Dict) -> FastBakeryApiResponse:
152
+ try:
153
+ from metaflow.metaflow_config import SERVICE_HEADERS
154
+
155
+ headers = {**self.headers, **(SERVICE_HEADERS or {})}
156
+ except ImportError:
157
+ headers = self.headers
158
+
159
+ retryable_status_codes = [409]
160
+
161
+ for attempt in range(2): # 0 = initial attempt, 1-2 = retries
162
+ response = requests.post(self.url, json=payload, headers=headers)
163
+
164
+ if response.status_code not in retryable_status_codes:
165
+ break
166
+
167
+ if attempt < 2: # Don't sleep after the last attempt
168
+ sleep_time = 0.5 * (attempt + 1)
169
+ time.sleep(sleep_time)
170
+
171
+ response = requests.post(self.url, json=payload, headers=headers)
172
+ self._handle_error_response(response)
173
+ return FastBakeryApiResponse(response.json())
174
+
175
+ @staticmethod
176
+ def _handle_error_response(response: requests.Response):
177
+ if response.status_code >= 500:
178
+ raise FastBakeryException(f"Server error: {response.text}")
179
+
180
+ body = response.json()
181
+ status_code = body.get("error", {}).get("statusCode", response.status_code)
182
+ if status_code >= 400:
183
+ try:
184
+ raise FastBakeryException(
185
+ f"*{body['error']['details']['kind']}*\n{body['error']['details']['message']}"
186
+ )
187
+ except KeyError:
188
+ raise FastBakeryException(f"Unexpected error: {body}")
@@ -0,0 +1,54 @@
1
+ import json
2
+ import os
3
+ from metaflow._vendor import click
4
+ from metaflow.cli import echo_always as echo
5
+ from metaflow.plugins.datastores.local_storage import LocalStorage
6
+
7
+ from .docker_environment import get_fastbakery_metafile_path
8
+ from .fast_bakery import FastBakeryApiResponse
9
+
10
+
11
+ @click.group()
12
+ def cli():
13
+ pass
14
+
15
+
16
+ @cli.group(help="Commands related to Fast Bakery support.")
17
+ @click.pass_context
18
+ def fast_bakery(ctx):
19
+ path = LocalStorage.get_datastore_root_from_config(echo, create_on_absent=False)
20
+ ctx.obj.metafile_path = get_fastbakery_metafile_path(path, ctx.obj.flow.name)
21
+
22
+
23
+ @fast_bakery.command(help="Purge local Fast Bakery cache.")
24
+ @click.pass_obj
25
+ def purge(obj):
26
+ try:
27
+ os.remove(obj.metafile_path)
28
+ echo("Local Fast Bakery cache purged.")
29
+ except FileNotFoundError:
30
+ echo("No local Fast Bakery cache found.")
31
+
32
+
33
+ @fast_bakery.command(help="List the cached images")
34
+ @click.pass_obj
35
+ def images(obj):
36
+ current_cache = None
37
+ try:
38
+ with open(obj.metafile_path, "r") as f:
39
+ current_cache = json.load(f)
40
+ except FileNotFoundError:
41
+ pass
42
+
43
+ if current_cache:
44
+ echo("List of locally cached image tags:\n")
45
+
46
+ for val in current_cache.values():
47
+ response = FastBakeryApiResponse(val)
48
+ echo(response.container_image)
49
+
50
+ echo(
51
+ "In order to clear the cached images, you can use the command\n *fast-bakery purge*"
52
+ )
53
+ else:
54
+ echo("No locally cached images.")
@@ -0,0 +1,50 @@
1
+ import os
2
+ from metaflow.decorators import StepDecorator
3
+ from metaflow.metadata_provider.metadata import MetaDatum
4
+
5
+
6
+ class InternalFastBakeryDecorator(StepDecorator):
7
+ """
8
+ Internal decorator to support Fast bakery
9
+ """
10
+
11
+ name = "fast_bakery_internal"
12
+
13
+ def task_pre_step(
14
+ self,
15
+ step_name,
16
+ task_datastore,
17
+ metadata,
18
+ run_id,
19
+ task_id,
20
+ flow,
21
+ graph,
22
+ retry_count,
23
+ max_retries,
24
+ ubf_context,
25
+ inputs,
26
+ ):
27
+ # task_pre_step may run locally if fallback is activated for @catch
28
+ # decorator. In that scenario, we skip collecting Kubernetes execution
29
+ # metadata. A rudimentary way to detect non-local execution is to
30
+ # check for the existence of METAFLOW_KUBERNETES_WORKLOAD environment
31
+ # variable.
32
+ meta = {}
33
+ if "METAFLOW_KUBERNETES_WORKLOAD" in os.environ:
34
+ image = os.environ.get("FASTBAKERY_IMAGE")
35
+ if image:
36
+ meta["fast-bakery-image-name"] = image
37
+
38
+ if len(meta) > 0:
39
+ entries = [
40
+ MetaDatum(
41
+ field=k,
42
+ value=v,
43
+ type=k,
44
+ tags=["attempt_id:{0}".format(retry_count)],
45
+ )
46
+ for k, v in meta.items()
47
+ if v is not None
48
+ ]
49
+ # Register book-keeping metadata for debugging.
50
+ metadata.register_metadata(run_id, step_name, task_id, entries)