ob-metaflow-extensions 1.1.45rc3__py2.py3-none-any.whl → 1.5.1__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ob-metaflow-extensions might be problematic. Click here for more details.
- metaflow_extensions/outerbounds/__init__.py +1 -7
- metaflow_extensions/outerbounds/config/__init__.py +35 -0
- metaflow_extensions/outerbounds/plugins/__init__.py +186 -57
- metaflow_extensions/outerbounds/plugins/apps/__init__.py +0 -0
- metaflow_extensions/outerbounds/plugins/apps/app_cli.py +0 -0
- metaflow_extensions/outerbounds/plugins/apps/app_utils.py +187 -0
- metaflow_extensions/outerbounds/plugins/apps/consts.py +3 -0
- metaflow_extensions/outerbounds/plugins/apps/core/__init__.py +15 -0
- metaflow_extensions/outerbounds/plugins/apps/core/_state_machine.py +506 -0
- metaflow_extensions/outerbounds/plugins/apps/core/_vendor/__init__.py +0 -0
- metaflow_extensions/outerbounds/plugins/apps/core/_vendor/spinner/__init__.py +4 -0
- metaflow_extensions/outerbounds/plugins/apps/core/_vendor/spinner/spinners.py +478 -0
- metaflow_extensions/outerbounds/plugins/apps/core/app_config.py +128 -0
- metaflow_extensions/outerbounds/plugins/apps/core/app_deploy_decorator.py +330 -0
- metaflow_extensions/outerbounds/plugins/apps/core/artifacts.py +0 -0
- metaflow_extensions/outerbounds/plugins/apps/core/capsule.py +958 -0
- metaflow_extensions/outerbounds/plugins/apps/core/click_importer.py +24 -0
- metaflow_extensions/outerbounds/plugins/apps/core/code_package/__init__.py +3 -0
- metaflow_extensions/outerbounds/plugins/apps/core/code_package/code_packager.py +618 -0
- metaflow_extensions/outerbounds/plugins/apps/core/code_package/examples.py +125 -0
- metaflow_extensions/outerbounds/plugins/apps/core/config/__init__.py +15 -0
- metaflow_extensions/outerbounds/plugins/apps/core/config/cli_generator.py +165 -0
- metaflow_extensions/outerbounds/plugins/apps/core/config/config_utils.py +966 -0
- metaflow_extensions/outerbounds/plugins/apps/core/config/schema_export.py +299 -0
- metaflow_extensions/outerbounds/plugins/apps/core/config/typed_configs.py +233 -0
- metaflow_extensions/outerbounds/plugins/apps/core/config/typed_init_generator.py +537 -0
- metaflow_extensions/outerbounds/plugins/apps/core/config/unified_config.py +1125 -0
- metaflow_extensions/outerbounds/plugins/apps/core/config_schema.yaml +337 -0
- metaflow_extensions/outerbounds/plugins/apps/core/dependencies.py +115 -0
- metaflow_extensions/outerbounds/plugins/apps/core/deployer.py +959 -0
- metaflow_extensions/outerbounds/plugins/apps/core/experimental/__init__.py +89 -0
- metaflow_extensions/outerbounds/plugins/apps/core/perimeters.py +87 -0
- metaflow_extensions/outerbounds/plugins/apps/core/secrets.py +164 -0
- metaflow_extensions/outerbounds/plugins/apps/core/utils.py +233 -0
- metaflow_extensions/outerbounds/plugins/apps/core/validations.py +17 -0
- metaflow_extensions/outerbounds/plugins/apps/deploy_decorator.py +201 -0
- metaflow_extensions/outerbounds/plugins/apps/supervisord_utils.py +243 -0
- metaflow_extensions/outerbounds/plugins/auth_server.py +28 -8
- metaflow_extensions/outerbounds/plugins/aws/__init__.py +4 -0
- metaflow_extensions/outerbounds/plugins/aws/assume_role.py +3 -0
- metaflow_extensions/outerbounds/plugins/aws/assume_role_decorator.py +118 -0
- metaflow_extensions/outerbounds/plugins/card_utilities/__init__.py +0 -0
- metaflow_extensions/outerbounds/plugins/card_utilities/async_cards.py +142 -0
- metaflow_extensions/outerbounds/plugins/card_utilities/extra_components.py +545 -0
- metaflow_extensions/outerbounds/plugins/card_utilities/injector.py +70 -0
- metaflow_extensions/outerbounds/plugins/checkpoint_datastores/__init__.py +2 -0
- metaflow_extensions/outerbounds/plugins/checkpoint_datastores/coreweave.py +71 -0
- metaflow_extensions/outerbounds/plugins/checkpoint_datastores/external_chckpt.py +85 -0
- metaflow_extensions/outerbounds/plugins/checkpoint_datastores/nebius.py +73 -0
- metaflow_extensions/outerbounds/plugins/fast_bakery/__init__.py +0 -0
- metaflow_extensions/outerbounds/plugins/fast_bakery/baker.py +110 -0
- metaflow_extensions/outerbounds/plugins/fast_bakery/docker_environment.py +391 -0
- metaflow_extensions/outerbounds/plugins/fast_bakery/fast_bakery.py +188 -0
- metaflow_extensions/outerbounds/plugins/fast_bakery/fast_bakery_cli.py +54 -0
- metaflow_extensions/outerbounds/plugins/fast_bakery/fast_bakery_decorator.py +50 -0
- metaflow_extensions/outerbounds/plugins/kubernetes/kubernetes_client.py +79 -0
- metaflow_extensions/outerbounds/plugins/kubernetes/pod_killer.py +374 -0
- metaflow_extensions/outerbounds/plugins/nim/card.py +140 -0
- metaflow_extensions/outerbounds/plugins/nim/nim_decorator.py +101 -0
- metaflow_extensions/outerbounds/plugins/nim/nim_manager.py +379 -0
- metaflow_extensions/outerbounds/plugins/nim/utils.py +36 -0
- metaflow_extensions/outerbounds/plugins/nvcf/__init__.py +0 -0
- metaflow_extensions/outerbounds/plugins/nvcf/constants.py +3 -0
- metaflow_extensions/outerbounds/plugins/nvcf/exceptions.py +94 -0
- metaflow_extensions/outerbounds/plugins/nvcf/heartbeat_store.py +178 -0
- metaflow_extensions/outerbounds/plugins/nvcf/nvcf.py +417 -0
- metaflow_extensions/outerbounds/plugins/nvcf/nvcf_cli.py +280 -0
- metaflow_extensions/outerbounds/plugins/nvcf/nvcf_decorator.py +242 -0
- metaflow_extensions/outerbounds/plugins/nvcf/utils.py +6 -0
- metaflow_extensions/outerbounds/plugins/nvct/__init__.py +0 -0
- metaflow_extensions/outerbounds/plugins/nvct/exceptions.py +71 -0
- metaflow_extensions/outerbounds/plugins/nvct/nvct.py +131 -0
- metaflow_extensions/outerbounds/plugins/nvct/nvct_cli.py +289 -0
- metaflow_extensions/outerbounds/plugins/nvct/nvct_decorator.py +286 -0
- metaflow_extensions/outerbounds/plugins/nvct/nvct_runner.py +218 -0
- metaflow_extensions/outerbounds/plugins/nvct/utils.py +29 -0
- metaflow_extensions/outerbounds/plugins/ollama/__init__.py +225 -0
- metaflow_extensions/outerbounds/plugins/ollama/constants.py +1 -0
- metaflow_extensions/outerbounds/plugins/ollama/exceptions.py +22 -0
- metaflow_extensions/outerbounds/plugins/ollama/ollama.py +1924 -0
- metaflow_extensions/outerbounds/plugins/ollama/status_card.py +292 -0
- metaflow_extensions/outerbounds/plugins/optuna/__init__.py +48 -0
- metaflow_extensions/outerbounds/plugins/perimeters.py +19 -5
- metaflow_extensions/outerbounds/plugins/profilers/deco_injector.py +70 -0
- metaflow_extensions/outerbounds/plugins/profilers/gpu_profile_decorator.py +88 -0
- metaflow_extensions/outerbounds/plugins/profilers/simple_card_decorator.py +96 -0
- metaflow_extensions/outerbounds/plugins/s3_proxy/__init__.py +7 -0
- metaflow_extensions/outerbounds/plugins/s3_proxy/binary_caller.py +132 -0
- metaflow_extensions/outerbounds/plugins/s3_proxy/constants.py +11 -0
- metaflow_extensions/outerbounds/plugins/s3_proxy/exceptions.py +13 -0
- metaflow_extensions/outerbounds/plugins/s3_proxy/proxy_bootstrap.py +59 -0
- metaflow_extensions/outerbounds/plugins/s3_proxy/s3_proxy_api.py +93 -0
- metaflow_extensions/outerbounds/plugins/s3_proxy/s3_proxy_decorator.py +250 -0
- metaflow_extensions/outerbounds/plugins/s3_proxy/s3_proxy_manager.py +225 -0
- metaflow_extensions/outerbounds/plugins/secrets/__init__.py +0 -0
- metaflow_extensions/outerbounds/plugins/secrets/secrets.py +204 -0
- metaflow_extensions/outerbounds/plugins/snowflake/__init__.py +3 -0
- metaflow_extensions/outerbounds/plugins/snowflake/snowflake.py +378 -0
- metaflow_extensions/outerbounds/plugins/snowpark/__init__.py +0 -0
- metaflow_extensions/outerbounds/plugins/snowpark/snowpark.py +309 -0
- metaflow_extensions/outerbounds/plugins/snowpark/snowpark_cli.py +277 -0
- metaflow_extensions/outerbounds/plugins/snowpark/snowpark_client.py +150 -0
- metaflow_extensions/outerbounds/plugins/snowpark/snowpark_decorator.py +273 -0
- metaflow_extensions/outerbounds/plugins/snowpark/snowpark_exceptions.py +13 -0
- metaflow_extensions/outerbounds/plugins/snowpark/snowpark_job.py +241 -0
- metaflow_extensions/outerbounds/plugins/snowpark/snowpark_service_spec.py +259 -0
- metaflow_extensions/outerbounds/plugins/tensorboard/__init__.py +50 -0
- metaflow_extensions/outerbounds/plugins/torchtune/__init__.py +163 -0
- metaflow_extensions/outerbounds/plugins/vllm/__init__.py +255 -0
- metaflow_extensions/outerbounds/plugins/vllm/constants.py +1 -0
- metaflow_extensions/outerbounds/plugins/vllm/exceptions.py +1 -0
- metaflow_extensions/outerbounds/plugins/vllm/status_card.py +352 -0
- metaflow_extensions/outerbounds/plugins/vllm/vllm_manager.py +621 -0
- metaflow_extensions/outerbounds/profilers/gpu.py +131 -47
- metaflow_extensions/outerbounds/remote_config.py +53 -16
- metaflow_extensions/outerbounds/toplevel/global_aliases_for_metaflow_package.py +138 -2
- metaflow_extensions/outerbounds/toplevel/ob_internal.py +4 -0
- metaflow_extensions/outerbounds/toplevel/plugins/ollama/__init__.py +1 -0
- metaflow_extensions/outerbounds/toplevel/plugins/optuna/__init__.py +1 -0
- metaflow_extensions/outerbounds/toplevel/plugins/snowflake/__init__.py +1 -0
- metaflow_extensions/outerbounds/toplevel/plugins/torchtune/__init__.py +1 -0
- metaflow_extensions/outerbounds/toplevel/plugins/vllm/__init__.py +1 -0
- metaflow_extensions/outerbounds/toplevel/s3_proxy.py +88 -0
- {ob_metaflow_extensions-1.1.45rc3.dist-info → ob_metaflow_extensions-1.5.1.dist-info}/METADATA +2 -2
- ob_metaflow_extensions-1.5.1.dist-info/RECORD +133 -0
- ob_metaflow_extensions-1.1.45rc3.dist-info/RECORD +0 -19
- {ob_metaflow_extensions-1.1.45rc3.dist-info → ob_metaflow_extensions-1.5.1.dist-info}/WHEEL +0 -0
- {ob_metaflow_extensions-1.1.45rc3.dist-info → ob_metaflow_extensions-1.5.1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
from metaflow.exception import MetaflowException
|
|
2
|
+
from collections import defaultdict
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class CardDecoratorInjector:
|
|
6
|
+
"""
|
|
7
|
+
Mixin Useful for injecting @card decorators from other first class Metaflow decorators.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
_first_time_init = defaultdict(dict)
|
|
11
|
+
|
|
12
|
+
@classmethod
|
|
13
|
+
def _get_first_time_init_cached_value(cls, step_name, card_id):
|
|
14
|
+
return cls._first_time_init.get(step_name, {}).get(card_id, None)
|
|
15
|
+
|
|
16
|
+
@classmethod
|
|
17
|
+
def _set_first_time_init_cached_value(cls, step_name, card_id, value):
|
|
18
|
+
cls._first_time_init[step_name][card_id] = value
|
|
19
|
+
|
|
20
|
+
def _card_deco_already_attached(self, step, card_id):
|
|
21
|
+
for decorator in step.decorators:
|
|
22
|
+
if decorator.name == "card":
|
|
23
|
+
if decorator.attributes["id"] and card_id == decorator.attributes["id"]:
|
|
24
|
+
return True
|
|
25
|
+
return False
|
|
26
|
+
|
|
27
|
+
def _get_step(self, flow, step_name):
|
|
28
|
+
for step in flow:
|
|
29
|
+
if step.name == step_name:
|
|
30
|
+
return step
|
|
31
|
+
return None
|
|
32
|
+
|
|
33
|
+
def _first_time_init_check(self, step_dag_node, card_id):
|
|
34
|
+
""" """
|
|
35
|
+
return not self._card_deco_already_attached(step_dag_node, card_id)
|
|
36
|
+
|
|
37
|
+
def attach_card_decorator(
|
|
38
|
+
self,
|
|
39
|
+
flow,
|
|
40
|
+
step_name,
|
|
41
|
+
card_id,
|
|
42
|
+
card_type,
|
|
43
|
+
refresh_interval=5,
|
|
44
|
+
):
|
|
45
|
+
"""
|
|
46
|
+
This method is called `step_init` in your StepDecorator code since
|
|
47
|
+
this class is used as a Mixin
|
|
48
|
+
"""
|
|
49
|
+
from metaflow import decorators as _decorators
|
|
50
|
+
|
|
51
|
+
if not all([card_id, card_type]):
|
|
52
|
+
raise MetaflowException(
|
|
53
|
+
"`INJECTED_CARD_ID` and `INJECTED_CARD_TYPE` must be set in the `CardDecoratorInjector` Mixin"
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
step_dag_node = self._get_step(flow, step_name)
|
|
57
|
+
if (
|
|
58
|
+
self._get_first_time_init_cached_value(step_name, card_id) is None
|
|
59
|
+
): # First check class level setting.
|
|
60
|
+
if self._first_time_init_check(step_dag_node, card_id):
|
|
61
|
+
self._set_first_time_init_cached_value(step_name, card_id, True)
|
|
62
|
+
_decorators._attach_decorators_to_step(
|
|
63
|
+
step_dag_node,
|
|
64
|
+
[
|
|
65
|
+
"card:type=%s,id=%s,refresh_interval=%s"
|
|
66
|
+
% (card_type, card_id, str(refresh_interval))
|
|
67
|
+
],
|
|
68
|
+
)
|
|
69
|
+
else:
|
|
70
|
+
self._set_first_time_init_cached_value(step_name, card_id, False)
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
from metaflow.user_decorators.user_flow_decorator import FlowMutator
|
|
2
|
+
from metaflow.user_decorators.mutable_flow import MutableFlow
|
|
3
|
+
from metaflow.user_decorators.mutable_step import MutableStep
|
|
4
|
+
from .external_chckpt import _ExternalCheckpointFlowDeco
|
|
5
|
+
import os
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class coreweave_checkpoints(_ExternalCheckpointFlowDeco):
|
|
9
|
+
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
This decorator is used for setting the coreweave object store as the artifact store for checkpoints/models created by the flow.
|
|
13
|
+
|
|
14
|
+
Parameters
|
|
15
|
+
----------
|
|
16
|
+
secrets: list
|
|
17
|
+
A list of secrets to be added to the step. These secrets should contain any secrets that are required globally and the secret
|
|
18
|
+
for the coreweave object store. The secret should contain the following keys:
|
|
19
|
+
- COREWEAVE_ACCESS_KEY
|
|
20
|
+
- COREWEAVE_SECRET_KEY
|
|
21
|
+
|
|
22
|
+
bucket_path: str
|
|
23
|
+
The path to the bucket to store the checkpoints/models.
|
|
24
|
+
|
|
25
|
+
Usage
|
|
26
|
+
-----
|
|
27
|
+
```python
|
|
28
|
+
from metaflow import checkpoint, step, FlowSpec, coreweave_checkpoints
|
|
29
|
+
|
|
30
|
+
@coreweave_checkpoints(secrets=[], bucket_path=None)
|
|
31
|
+
class MyFlow(FlowSpec):
|
|
32
|
+
@checkpoint
|
|
33
|
+
@step
|
|
34
|
+
def start(self):
|
|
35
|
+
# Saves the checkpoint in the coreweave object store
|
|
36
|
+
current.checkpoint.save("./foo.txt")
|
|
37
|
+
|
|
38
|
+
@step
|
|
39
|
+
def end(self):
|
|
40
|
+
pass
|
|
41
|
+
```
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
def __init__(self, *args, **kwargs):
|
|
45
|
+
super().__init__(*args, **kwargs)
|
|
46
|
+
|
|
47
|
+
def init(self, *args, **kwargs):
|
|
48
|
+
super().init(*args, **kwargs)
|
|
49
|
+
self.coreweave_endpoint_url = f"https://cwobject.com"
|
|
50
|
+
|
|
51
|
+
def pre_mutate(self, mutable_flow: MutableFlow) -> None:
|
|
52
|
+
from metaflow import (
|
|
53
|
+
with_artifact_store,
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
def _coreweave_config():
|
|
57
|
+
return {
|
|
58
|
+
"root": self.bucket_path,
|
|
59
|
+
"client_params": {
|
|
60
|
+
"aws_access_key_id": os.environ.get("COREWEAVE_ACCESS_KEY"),
|
|
61
|
+
"aws_secret_access_key": os.environ.get("COREWEAVE_SECRET_KEY"),
|
|
62
|
+
"endpoint_url": self.coreweave_endpoint_url,
|
|
63
|
+
"config": dict(s3={"addressing_style": "virtual"}),
|
|
64
|
+
},
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
mutable_flow.add_decorator(
|
|
68
|
+
with_artifact_store,
|
|
69
|
+
deco_kwargs=dict(type="coreweave", config=_coreweave_config),
|
|
70
|
+
)
|
|
71
|
+
self._swap_secrets(mutable_flow)
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
from metaflow.user_decorators.user_flow_decorator import FlowMutator
|
|
2
|
+
from metaflow.user_decorators.mutable_flow import MutableFlow
|
|
3
|
+
from metaflow.user_decorators.mutable_step import MutableStep
|
|
4
|
+
import os
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class _ExternalCheckpointFlowDeco(FlowMutator):
|
|
8
|
+
def init(self, *args, **kwargs):
|
|
9
|
+
self.bucket_path = kwargs.get("bucket_path", None)
|
|
10
|
+
|
|
11
|
+
self.secrets = kwargs.get("secrets", [])
|
|
12
|
+
if self.bucket_path is None:
|
|
13
|
+
raise ValueError(
|
|
14
|
+
"`bucket_path` keyword argument is required for the coreweave_datastore"
|
|
15
|
+
)
|
|
16
|
+
if not self.bucket_path.startswith("s3://"):
|
|
17
|
+
raise ValueError(
|
|
18
|
+
"`bucket_path` must start with `s3://` for the coreweave_datastore"
|
|
19
|
+
)
|
|
20
|
+
if self.secrets is None:
|
|
21
|
+
raise ValueError(
|
|
22
|
+
"`secrets` keyword argument is required for the coreweave_datastore"
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
def _swap_secrets(self, mutable_flow: MutableFlow) -> None:
|
|
26
|
+
from metaflow import (
|
|
27
|
+
checkpoint,
|
|
28
|
+
model,
|
|
29
|
+
huggingface_hub,
|
|
30
|
+
secrets,
|
|
31
|
+
with_artifact_store,
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
def _add_secrets(step: MutableStep) -> None:
|
|
35
|
+
decos_to_add = []
|
|
36
|
+
swapping_decos = {
|
|
37
|
+
"huggingface_hub": huggingface_hub,
|
|
38
|
+
"model": model,
|
|
39
|
+
"checkpoint": checkpoint,
|
|
40
|
+
}
|
|
41
|
+
already_has_secrets = False
|
|
42
|
+
secrets_present_in_deco = []
|
|
43
|
+
for d in step.decorator_specs:
|
|
44
|
+
name, _, _, deco_kwargs = d
|
|
45
|
+
if name in swapping_decos:
|
|
46
|
+
decos_to_add.append((name, deco_kwargs))
|
|
47
|
+
elif name == "secrets":
|
|
48
|
+
already_has_secrets = True
|
|
49
|
+
secrets_present_in_deco.extend(deco_kwargs["sources"])
|
|
50
|
+
|
|
51
|
+
# If the step aleady has secrets then take all the sources in
|
|
52
|
+
# the secrets and add the addtional secrets to the existing secrets
|
|
53
|
+
secrets_to_add = self.secrets
|
|
54
|
+
if already_has_secrets:
|
|
55
|
+
secrets_to_add.extend(secrets_present_in_deco)
|
|
56
|
+
|
|
57
|
+
secrets_to_add = list(set(secrets_to_add))
|
|
58
|
+
|
|
59
|
+
if len(decos_to_add) == 0:
|
|
60
|
+
if already_has_secrets:
|
|
61
|
+
step.remove_decorator("secrets")
|
|
62
|
+
|
|
63
|
+
step.add_decorator(
|
|
64
|
+
secrets,
|
|
65
|
+
deco_kwargs=dict(
|
|
66
|
+
sources=secrets_to_add,
|
|
67
|
+
),
|
|
68
|
+
)
|
|
69
|
+
return
|
|
70
|
+
|
|
71
|
+
for d, _ in decos_to_add:
|
|
72
|
+
step.remove_decorator(d)
|
|
73
|
+
|
|
74
|
+
step.add_decorator(
|
|
75
|
+
secrets,
|
|
76
|
+
deco_kwargs=dict(
|
|
77
|
+
sources=secrets_to_add,
|
|
78
|
+
),
|
|
79
|
+
)
|
|
80
|
+
for d, attrs in decos_to_add:
|
|
81
|
+
_deco_to_add = swapping_decos[d]
|
|
82
|
+
step.add_decorator(_deco_to_add, deco_kwargs=attrs)
|
|
83
|
+
|
|
84
|
+
for step_name, step in mutable_flow.steps:
|
|
85
|
+
_add_secrets(step)
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
from metaflow.user_decorators.mutable_flow import MutableFlow
|
|
2
|
+
from .external_chckpt import _ExternalCheckpointFlowDeco
|
|
3
|
+
import os
|
|
4
|
+
|
|
5
|
+
NEBIUS_ENDPOINT_URL = "https://storage.eu-north1.nebius.cloud:443"
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class nebius_checkpoints(_ExternalCheckpointFlowDeco):
|
|
9
|
+
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
This decorator is used for setting the nebius's S3 compatible object store as the artifact store for
|
|
13
|
+
checkpoints/models created by the flow.
|
|
14
|
+
|
|
15
|
+
Parameters
|
|
16
|
+
----------
|
|
17
|
+
secrets: list
|
|
18
|
+
A list of secrets to be added to the step. These secrets should contain any secrets that are required globally and the secret
|
|
19
|
+
for the nebius object store. The secret should contain the following keys:
|
|
20
|
+
- NEBIUS_ACCESS_KEY
|
|
21
|
+
- NEBIUS_SECRET_KEY
|
|
22
|
+
|
|
23
|
+
bucket_path: str
|
|
24
|
+
The path to the bucket to store the checkpoints/models.
|
|
25
|
+
|
|
26
|
+
endpoint_url: str
|
|
27
|
+
The endpoint url for the nebius object store. Defaults to `https://storage.eu-north1.nebius.cloud:443`
|
|
28
|
+
|
|
29
|
+
Usage
|
|
30
|
+
-----
|
|
31
|
+
```python
|
|
32
|
+
from metaflow import checkpoint, step, FlowSpec, nebius_checkpoints
|
|
33
|
+
|
|
34
|
+
@nebius_checkpoints(secrets=[], bucket_path=None)
|
|
35
|
+
class MyFlow(FlowSpec):
|
|
36
|
+
@checkpoint
|
|
37
|
+
@step
|
|
38
|
+
def start(self):
|
|
39
|
+
# Saves the checkpoint in the nebius object store
|
|
40
|
+
current.checkpoint.save("./foo.txt")
|
|
41
|
+
|
|
42
|
+
@step
|
|
43
|
+
def end(self):
|
|
44
|
+
pass
|
|
45
|
+
```
|
|
46
|
+
"""
|
|
47
|
+
|
|
48
|
+
def __init__(self, *args, **kwargs):
|
|
49
|
+
super().__init__(*args, **kwargs)
|
|
50
|
+
|
|
51
|
+
def init(self, *args, **kwargs):
|
|
52
|
+
super().init(*args, **kwargs)
|
|
53
|
+
self.nebius_endpoint_url = kwargs.get("endpoint_url", NEBIUS_ENDPOINT_URL)
|
|
54
|
+
|
|
55
|
+
def pre_mutate(self, mutable_flow: MutableFlow) -> None:
|
|
56
|
+
from metaflow import (
|
|
57
|
+
with_artifact_store,
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
def _nebius_config():
|
|
61
|
+
return {
|
|
62
|
+
"root": self.bucket_path,
|
|
63
|
+
"client_params": {
|
|
64
|
+
"aws_access_key_id": os.environ.get("NEBIUS_ACCESS_KEY"),
|
|
65
|
+
"aws_secret_access_key": os.environ.get("NEBIUS_SECRET_KEY"),
|
|
66
|
+
"endpoint_url": self.nebius_endpoint_url,
|
|
67
|
+
},
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
mutable_flow.add_decorator(
|
|
71
|
+
with_artifact_store, deco_kwargs=dict(type="s3", config=_nebius_config)
|
|
72
|
+
)
|
|
73
|
+
self._swap_secrets(mutable_flow)
|
|
File without changes
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
import threading
|
|
2
|
+
import time
|
|
3
|
+
import sys
|
|
4
|
+
from typing import Dict, Optional, Any, Callable
|
|
5
|
+
from functools import partial
|
|
6
|
+
from metaflow.exception import MetaflowException
|
|
7
|
+
from metaflow.metaflow_config import FAST_BAKERY_URL
|
|
8
|
+
|
|
9
|
+
from .fast_bakery import FastBakery, FastBakeryApiResponse, FastBakeryException
|
|
10
|
+
from .docker_environment import cache_request
|
|
11
|
+
|
|
12
|
+
BAKERY_METAFILE = ".imagebakery-cache"
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class BakerException(MetaflowException):
|
|
16
|
+
headline = "Ran into an error while baking image"
|
|
17
|
+
|
|
18
|
+
def __init__(self, msg):
|
|
19
|
+
super(BakerException, self).__init__(msg)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def bake_image(
|
|
23
|
+
cache_file_path: str,
|
|
24
|
+
ref: Optional[str] = None,
|
|
25
|
+
python: Optional[str] = None,
|
|
26
|
+
pypi_packages: Optional[Dict[str, str]] = None,
|
|
27
|
+
conda_packages: Optional[Dict[str, str]] = None,
|
|
28
|
+
base_image: Optional[str] = None,
|
|
29
|
+
logger: Optional[Callable[[str], Any]] = None,
|
|
30
|
+
) -> FastBakeryApiResponse:
|
|
31
|
+
"""
|
|
32
|
+
Bakes a Docker image with the specified dependencies.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
cache_file_path: Path to the cache file
|
|
36
|
+
ref: Reference identifier for this bake (for logging purposes)
|
|
37
|
+
python: Python version to use
|
|
38
|
+
pypi_packages: Dictionary of PyPI packages and versions
|
|
39
|
+
conda_packages: Dictionary of Conda packages and versions
|
|
40
|
+
base_image: Base Docker image to use
|
|
41
|
+
logger: Optional logger function to output progress
|
|
42
|
+
|
|
43
|
+
Returns:
|
|
44
|
+
FastBakeryApiResponse: The response from the bakery service
|
|
45
|
+
|
|
46
|
+
Raises:
|
|
47
|
+
BakerException: If the baking process fails
|
|
48
|
+
"""
|
|
49
|
+
# Default logger if none provided
|
|
50
|
+
if logger is None:
|
|
51
|
+
logger = partial(print, file=sys.stderr)
|
|
52
|
+
|
|
53
|
+
# Thread lock for logging
|
|
54
|
+
logger_lock = threading.Lock()
|
|
55
|
+
images_baked = 0
|
|
56
|
+
|
|
57
|
+
@cache_request(cache_file_path)
|
|
58
|
+
def _cached_bake(
|
|
59
|
+
ref=None,
|
|
60
|
+
python=None,
|
|
61
|
+
pypi_packages=None,
|
|
62
|
+
conda_packages=None,
|
|
63
|
+
base_image=None,
|
|
64
|
+
):
|
|
65
|
+
try:
|
|
66
|
+
bakery = FastBakery(url=FAST_BAKERY_URL)
|
|
67
|
+
bakery._reset_payload()
|
|
68
|
+
bakery.python_version(python)
|
|
69
|
+
bakery.pypi_packages(pypi_packages)
|
|
70
|
+
bakery.conda_packages(conda_packages)
|
|
71
|
+
bakery.base_image(base_image)
|
|
72
|
+
# bakery.ignore_cache()
|
|
73
|
+
|
|
74
|
+
with logger_lock:
|
|
75
|
+
logger(f"🍳 Baking [{ref}] ...")
|
|
76
|
+
logger(f" 🐍 Python: {python}")
|
|
77
|
+
|
|
78
|
+
if pypi_packages:
|
|
79
|
+
logger(f" 📦 PyPI packages:")
|
|
80
|
+
for package, version in pypi_packages.items():
|
|
81
|
+
logger(f" 🔧 {package}: {version}")
|
|
82
|
+
|
|
83
|
+
if conda_packages:
|
|
84
|
+
logger(f" 📦 Conda packages:")
|
|
85
|
+
for package, version in conda_packages.items():
|
|
86
|
+
logger(f" 🔧 {package}: {version}")
|
|
87
|
+
|
|
88
|
+
logger(f" 🏗️ Base image: {base_image}")
|
|
89
|
+
|
|
90
|
+
start_time = time.time()
|
|
91
|
+
res = bakery.bake()
|
|
92
|
+
# TODO: Get actual bake time from bakery
|
|
93
|
+
bake_time = time.time() - start_time
|
|
94
|
+
|
|
95
|
+
with logger_lock:
|
|
96
|
+
logger(f"🏁 Baked [{ref}] in {bake_time:.2f} seconds!")
|
|
97
|
+
nonlocal images_baked
|
|
98
|
+
images_baked += 1
|
|
99
|
+
return res
|
|
100
|
+
except FastBakeryException as ex:
|
|
101
|
+
raise BakerException(f"Bake [{ref}] failed: {str(ex)}")
|
|
102
|
+
|
|
103
|
+
# Call the cached bake function with the provided parameters
|
|
104
|
+
return _cached_bake(
|
|
105
|
+
ref=ref,
|
|
106
|
+
python=python,
|
|
107
|
+
pypi_packages=pypi_packages,
|
|
108
|
+
conda_packages=conda_packages,
|
|
109
|
+
base_image=base_image,
|
|
110
|
+
)
|