ob-metaflow-extensions 1.1.146__tar.gz → 1.1.148__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ob-metaflow-extensions might be problematic. Click here for more details.
- {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/PKG-INFO +1 -1
- ob-metaflow-extensions-1.1.148/metaflow_extensions/outerbounds/plugins/checkpoint_datastores/__init__.py +2 -0
- ob-metaflow-extensions-1.1.148/metaflow_extensions/outerbounds/plugins/checkpoint_datastores/coreweave.py +139 -0
- ob-metaflow-extensions-1.1.148/metaflow_extensions/outerbounds/plugins/checkpoint_datastores/nebius.py +144 -0
- {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/plugins/nvcf/nvcf_decorator.py +6 -1
- {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/plugins/snowflake/snowflake.py +44 -4
- {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/toplevel/global_aliases_for_metaflow_package.py +1 -0
- {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/ob_metaflow_extensions.egg-info/PKG-INFO +1 -1
- {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/ob_metaflow_extensions.egg-info/SOURCES.txt +3 -0
- ob-metaflow-extensions-1.1.148/ob_metaflow_extensions.egg-info/requires.txt +3 -0
- {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/setup.py +2 -2
- ob-metaflow-extensions-1.1.146/ob_metaflow_extensions.egg-info/requires.txt +0 -3
- {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/README.md +0 -0
- {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/config/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/plugins/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/plugins/apps/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/plugins/apps/app_utils.py +0 -0
- {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/plugins/apps/consts.py +0 -0
- {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/plugins/apps/deploy_decorator.py +0 -0
- {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/plugins/apps/supervisord_utils.py +0 -0
- {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/plugins/auth_server.py +0 -0
- {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/plugins/card_utilities/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/plugins/card_utilities/async_cards.py +0 -0
- {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/plugins/card_utilities/extra_components.py +0 -0
- {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/plugins/card_utilities/injector.py +0 -0
- {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/plugins/fast_bakery/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/plugins/fast_bakery/docker_environment.py +0 -0
- {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/plugins/fast_bakery/fast_bakery.py +0 -0
- {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/plugins/fast_bakery/fast_bakery_cli.py +0 -0
- {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/plugins/fast_bakery/fast_bakery_decorator.py +0 -0
- {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/plugins/kubernetes/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/plugins/kubernetes/kubernetes_client.py +0 -0
- {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/plugins/nim/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/plugins/nim/card.py +0 -0
- {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/plugins/nim/nim_manager.py +0 -0
- {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/plugins/nim/utilities.py +0 -0
- {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/plugins/nvcf/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/plugins/nvcf/constants.py +0 -0
- {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/plugins/nvcf/exceptions.py +0 -0
- {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/plugins/nvcf/heartbeat_store.py +0 -0
- {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/plugins/nvcf/nvcf.py +0 -0
- {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/plugins/nvcf/nvcf_cli.py +0 -0
- {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/plugins/nvcf/utils.py +0 -0
- {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/plugins/ollama/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/plugins/ollama/ollama.py +0 -0
- {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/plugins/perimeters.py +0 -0
- {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/plugins/profilers/deco_injector.py +0 -0
- {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/plugins/profilers/gpu_profile_decorator.py +0 -0
- {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/plugins/secrets/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/plugins/secrets/secrets.py +0 -0
- {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/plugins/snowflake/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/plugins/snowpark/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/plugins/snowpark/snowpark.py +0 -0
- {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/plugins/snowpark/snowpark_cli.py +0 -0
- {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/plugins/snowpark/snowpark_client.py +0 -0
- {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/plugins/snowpark/snowpark_decorator.py +0 -0
- {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/plugins/snowpark/snowpark_exceptions.py +0 -0
- {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/plugins/snowpark/snowpark_job.py +0 -0
- {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/plugins/snowpark/snowpark_service_spec.py +0 -0
- {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/plugins/tensorboard/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/profilers/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/profilers/gpu.py +0 -0
- {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/remote_config.py +0 -0
- {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/toplevel/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/toplevel/plugins/azure/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/toplevel/plugins/gcp/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/toplevel/plugins/kubernetes/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/toplevel/plugins/ollama/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/toplevel/plugins/snowflake/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/ob_metaflow_extensions.egg-info/dependency_links.txt +0 -0
- {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/ob_metaflow_extensions.egg-info/top_level.txt +0 -0
- {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/setup.cfg +0 -0
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
from metaflow.user_configs.config_decorators import (
|
|
2
|
+
MutableFlow,
|
|
3
|
+
MutableStep,
|
|
4
|
+
CustomFlowDecorator,
|
|
5
|
+
)
|
|
6
|
+
import os
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class coreweave_checkpoints(CustomFlowDecorator):
|
|
10
|
+
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
This decorator is used for setting the coreweave object store as the artifact store for checkpoints/models created by the flow.
|
|
14
|
+
|
|
15
|
+
Parameters
|
|
16
|
+
----------
|
|
17
|
+
secrets: list
|
|
18
|
+
A list of secrets to be added to the step. These secrets should contain any secrets that are required globally and the secret
|
|
19
|
+
for the coreweave object store. The secret should contain the following keys:
|
|
20
|
+
- COREWEAVE_ACCESS_KEY
|
|
21
|
+
- COREWEAVE_SECRET_KEY
|
|
22
|
+
|
|
23
|
+
bucket_path: str
|
|
24
|
+
The path to the bucket to store the checkpoints/models.
|
|
25
|
+
|
|
26
|
+
Usage
|
|
27
|
+
-----
|
|
28
|
+
```python
|
|
29
|
+
from metaflow import checkpoint, step, FlowSpec, coreweave_checkpoints
|
|
30
|
+
|
|
31
|
+
@coreweave_checkpoints(secrets=[], bucket_path=None)
|
|
32
|
+
class MyFlow(FlowSpec):
|
|
33
|
+
@checkpoint
|
|
34
|
+
@step
|
|
35
|
+
def start(self):
|
|
36
|
+
# Saves the checkpoint in the coreweave object store
|
|
37
|
+
current.checkpoint.save("./foo.txt")
|
|
38
|
+
|
|
39
|
+
@step
|
|
40
|
+
def end(self):
|
|
41
|
+
pass
|
|
42
|
+
```
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
def __init__(self, *args, **kwargs):
|
|
46
|
+
super().__init__(*args, **kwargs)
|
|
47
|
+
|
|
48
|
+
def init(self, *args, **kwargs):
|
|
49
|
+
self.bucket_path = kwargs.get("bucket_path", None)
|
|
50
|
+
|
|
51
|
+
self.secrets = kwargs.get("secrets", [])
|
|
52
|
+
if self.bucket_path is None:
|
|
53
|
+
raise ValueError(
|
|
54
|
+
"`bucket_path` keyword argument is required for the coreweave_datastore"
|
|
55
|
+
)
|
|
56
|
+
if not self.bucket_path.startswith("s3://"):
|
|
57
|
+
raise ValueError(
|
|
58
|
+
"`bucket_path` must start with `s3://` for the coreweave_datastore"
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
self.coreweave_endpoint_url = f"https://cwobject.com"
|
|
62
|
+
if self.secrets is None:
|
|
63
|
+
raise ValueError(
|
|
64
|
+
"`secrets` keyword argument is required for the coreweave_datastore"
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
def evaluate(self, mutable_flow: MutableFlow) -> None:
|
|
68
|
+
from metaflow import (
|
|
69
|
+
checkpoint,
|
|
70
|
+
model,
|
|
71
|
+
huggingface_hub,
|
|
72
|
+
secrets,
|
|
73
|
+
with_artifact_store,
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
def _add_secrets(step: MutableStep) -> None:
|
|
77
|
+
decos_to_add = []
|
|
78
|
+
swapping_decos = {
|
|
79
|
+
"huggingface_hub": huggingface_hub,
|
|
80
|
+
"model": model,
|
|
81
|
+
"checkpoint": checkpoint,
|
|
82
|
+
}
|
|
83
|
+
already_has_secrets = False
|
|
84
|
+
secrets_present_in_deco = []
|
|
85
|
+
for d in step.decorators:
|
|
86
|
+
if d.name in swapping_decos:
|
|
87
|
+
decos_to_add.append((d.name, d.attributes))
|
|
88
|
+
elif d.name == "secrets":
|
|
89
|
+
already_has_secrets = True
|
|
90
|
+
secrets_present_in_deco.extend(d.attributes["sources"])
|
|
91
|
+
|
|
92
|
+
# If the step aleady has secrets then take all the sources in
|
|
93
|
+
# the secrets and add the addtional secrets to the existing secrets
|
|
94
|
+
secrets_to_add = self.secrets
|
|
95
|
+
if already_has_secrets:
|
|
96
|
+
secrets_to_add.extend(secrets_present_in_deco)
|
|
97
|
+
|
|
98
|
+
secrets_to_add = list(set(secrets_to_add))
|
|
99
|
+
|
|
100
|
+
if len(decos_to_add) == 0:
|
|
101
|
+
if already_has_secrets:
|
|
102
|
+
step.remove_decorator("secrets")
|
|
103
|
+
|
|
104
|
+
step.add_decorator(
|
|
105
|
+
secrets,
|
|
106
|
+
sources=secrets_to_add,
|
|
107
|
+
)
|
|
108
|
+
return
|
|
109
|
+
|
|
110
|
+
for d, _ in decos_to_add:
|
|
111
|
+
step.remove_decorator(d)
|
|
112
|
+
|
|
113
|
+
step.add_decorator(
|
|
114
|
+
secrets,
|
|
115
|
+
sources=secrets_to_add,
|
|
116
|
+
)
|
|
117
|
+
for d, attrs in decos_to_add:
|
|
118
|
+
_deco_to_add = swapping_decos[d]
|
|
119
|
+
step.add_decorator(_deco_to_add, **attrs)
|
|
120
|
+
|
|
121
|
+
def _coreweave_config():
|
|
122
|
+
return {
|
|
123
|
+
"root": self.bucket_path,
|
|
124
|
+
"client_params": {
|
|
125
|
+
"aws_access_key_id": os.environ.get("COREWEAVE_ACCESS_KEY"),
|
|
126
|
+
"aws_secret_access_key": os.environ.get("COREWEAVE_SECRET_KEY"),
|
|
127
|
+
"endpoint_url": self.coreweave_endpoint_url,
|
|
128
|
+
"config": dict(s3={"addressing_style": "virtual"}),
|
|
129
|
+
},
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
mutable_flow.add_decorator(
|
|
133
|
+
with_artifact_store,
|
|
134
|
+
type="coreweave",
|
|
135
|
+
config=_coreweave_config,
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
for step_name, step in mutable_flow.steps:
|
|
139
|
+
_add_secrets(step)
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
from metaflow.user_configs.config_decorators import (
|
|
2
|
+
MutableFlow,
|
|
3
|
+
MutableStep,
|
|
4
|
+
CustomFlowDecorator,
|
|
5
|
+
)
|
|
6
|
+
import os
|
|
7
|
+
|
|
8
|
+
NEBIUS_ENDPOINT_URL = "https://storage.eu-north1.nebius.cloud:443"
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class nebius_checkpoints(CustomFlowDecorator):
|
|
12
|
+
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
This decorator is used for setting the nebius's S3 compatible object store as the artifact store for
|
|
16
|
+
checkpoints/models created by the flow.
|
|
17
|
+
|
|
18
|
+
Parameters
|
|
19
|
+
----------
|
|
20
|
+
secrets: list
|
|
21
|
+
A list of secrets to be added to the step. These secrets should contain any secrets that are required globally and the secret
|
|
22
|
+
for the nebius object store. The secret should contain the following keys:
|
|
23
|
+
- NEBIUS_ACCESS_KEY
|
|
24
|
+
- NEBIUS_SECRET_KEY
|
|
25
|
+
|
|
26
|
+
bucket_path: str
|
|
27
|
+
The path to the bucket to store the checkpoints/models.
|
|
28
|
+
|
|
29
|
+
endpoint_url: str
|
|
30
|
+
The endpoint url for the nebius object store. Defaults to `https://storage.eu-north1.nebius.cloud:443`
|
|
31
|
+
|
|
32
|
+
Usage
|
|
33
|
+
-----
|
|
34
|
+
```python
|
|
35
|
+
from metaflow import checkpoint, step, FlowSpec, nebius_checkpoints
|
|
36
|
+
|
|
37
|
+
@nebius_checkpoints(secrets=[], bucket_path=None)
|
|
38
|
+
class MyFlow(FlowSpec):
|
|
39
|
+
@checkpoint
|
|
40
|
+
@step
|
|
41
|
+
def start(self):
|
|
42
|
+
# Saves the checkpoint in the nebius object store
|
|
43
|
+
current.checkpoint.save("./foo.txt")
|
|
44
|
+
|
|
45
|
+
@step
|
|
46
|
+
def end(self):
|
|
47
|
+
pass
|
|
48
|
+
```
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
def __init__(self, *args, **kwargs):
|
|
52
|
+
super().__init__(*args, **kwargs)
|
|
53
|
+
|
|
54
|
+
def init(self, *args, **kwargs):
|
|
55
|
+
self.bucket_path = kwargs.get("bucket_path", None)
|
|
56
|
+
|
|
57
|
+
self.secrets = kwargs.get("secrets", [])
|
|
58
|
+
if self.bucket_path is None:
|
|
59
|
+
raise ValueError(
|
|
60
|
+
"`bucket_path` keyword argument is required for the coreweave_datastore"
|
|
61
|
+
)
|
|
62
|
+
if not self.bucket_path.startswith("s3://"):
|
|
63
|
+
raise ValueError(
|
|
64
|
+
"`bucket_path` must start with `s3://` for the coreweave_datastore"
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
self.nebius_endpoint_url = kwargs.get("endpoint_url", NEBIUS_ENDPOINT_URL)
|
|
68
|
+
if self.secrets is None:
|
|
69
|
+
raise ValueError(
|
|
70
|
+
"`secrets` keyword argument is required for the coreweave_datastore"
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
def evaluate(self, mutable_flow: MutableFlow) -> None:
|
|
74
|
+
from metaflow import (
|
|
75
|
+
checkpoint,
|
|
76
|
+
model,
|
|
77
|
+
huggingface_hub,
|
|
78
|
+
secrets,
|
|
79
|
+
with_artifact_store,
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
def _add_secrets(step: MutableStep) -> None:
|
|
83
|
+
decos_to_add = []
|
|
84
|
+
swapping_decos = {
|
|
85
|
+
"huggingface_hub": huggingface_hub,
|
|
86
|
+
"model": model,
|
|
87
|
+
"checkpoint": checkpoint,
|
|
88
|
+
}
|
|
89
|
+
already_has_secrets = False
|
|
90
|
+
secrets_present_in_deco = []
|
|
91
|
+
for d in step.decorators:
|
|
92
|
+
if d.name in swapping_decos:
|
|
93
|
+
decos_to_add.append((d.name, d.attributes))
|
|
94
|
+
elif d.name == "secrets":
|
|
95
|
+
already_has_secrets = True
|
|
96
|
+
secrets_present_in_deco.extend(d.attributes["sources"])
|
|
97
|
+
|
|
98
|
+
# If the step aleady has secrets then take all the sources in
|
|
99
|
+
# the secrets and add the addtional secrets to the existing secrets
|
|
100
|
+
secrets_to_add = self.secrets
|
|
101
|
+
if already_has_secrets:
|
|
102
|
+
secrets_to_add.extend(secrets_present_in_deco)
|
|
103
|
+
|
|
104
|
+
secrets_to_add = list(set(secrets_to_add))
|
|
105
|
+
|
|
106
|
+
if len(decos_to_add) == 0:
|
|
107
|
+
if already_has_secrets:
|
|
108
|
+
step.remove_decorator("secrets")
|
|
109
|
+
|
|
110
|
+
step.add_decorator(
|
|
111
|
+
secrets,
|
|
112
|
+
sources=secrets_to_add,
|
|
113
|
+
)
|
|
114
|
+
return
|
|
115
|
+
|
|
116
|
+
for d, _ in decos_to_add:
|
|
117
|
+
step.remove_decorator(d)
|
|
118
|
+
|
|
119
|
+
step.add_decorator(
|
|
120
|
+
secrets,
|
|
121
|
+
sources=secrets_to_add,
|
|
122
|
+
)
|
|
123
|
+
for d, attrs in decos_to_add:
|
|
124
|
+
_deco_to_add = swapping_decos[d]
|
|
125
|
+
step.add_decorator(_deco_to_add, **attrs)
|
|
126
|
+
|
|
127
|
+
def _nebius_config():
|
|
128
|
+
return {
|
|
129
|
+
"root": self.bucket_path,
|
|
130
|
+
"client_params": {
|
|
131
|
+
"aws_access_key_id": os.environ.get("NEBIUS_ACCESS_KEY"),
|
|
132
|
+
"aws_secret_access_key": os.environ.get("NEBIUS_SECRET_KEY"),
|
|
133
|
+
"endpoint_url": self.nebius_endpoint_url,
|
|
134
|
+
},
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
mutable_flow.add_decorator(
|
|
138
|
+
with_artifact_store,
|
|
139
|
+
type="s3",
|
|
140
|
+
config=_nebius_config,
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
for step_name, step in mutable_flow.steps:
|
|
144
|
+
_add_secrets(step)
|
|
@@ -198,7 +198,12 @@ class NvcfDecorator(StepDecorator):
|
|
|
198
198
|
meta["nvcf-nspectid"] = os.environ.get("NVCF_NSPECTID")
|
|
199
199
|
|
|
200
200
|
entries = [
|
|
201
|
-
MetaDatum(
|
|
201
|
+
MetaDatum(
|
|
202
|
+
field=k,
|
|
203
|
+
value=v,
|
|
204
|
+
type=k,
|
|
205
|
+
tags=["attempt_id:{0}".format(retry_count)],
|
|
206
|
+
)
|
|
202
207
|
for k, v in meta.items()
|
|
203
208
|
if v is not None
|
|
204
209
|
]
|
|
@@ -7,9 +7,10 @@ from metaflow.metaflow_config import SERVICE_URL
|
|
|
7
7
|
from metaflow.metaflow_config_funcs import init_config
|
|
8
8
|
from typing import Dict
|
|
9
9
|
from os import environ
|
|
10
|
-
|
|
10
|
+
import sys
|
|
11
11
|
import json
|
|
12
12
|
import requests
|
|
13
|
+
import random
|
|
13
14
|
import time
|
|
14
15
|
|
|
15
16
|
|
|
@@ -75,7 +76,9 @@ def get_snowflake_token(user: str = "", role: str = "", integration: str = "") -
|
|
|
75
76
|
}
|
|
76
77
|
json_payload = json.dumps(payload)
|
|
77
78
|
headers = provisioner.get_service_auth_header()
|
|
78
|
-
response =
|
|
79
|
+
response = _api_server_get(
|
|
80
|
+
snowflake_token_url, data=json_payload, headers=headers, conn_error_retries=5
|
|
81
|
+
)
|
|
79
82
|
response.raise_for_status()
|
|
80
83
|
return response.json()["token"]
|
|
81
84
|
|
|
@@ -150,6 +153,39 @@ def connect(user: str = "", role: str = "", integration: str = "", **kwargs):
|
|
|
150
153
|
)
|
|
151
154
|
|
|
152
155
|
|
|
156
|
+
def _api_server_get(*args, conn_error_retries=2, **kwargs):
|
|
157
|
+
"""
|
|
158
|
+
There are two categories of errors that we need to handle when dealing with any API server.
|
|
159
|
+
1. HTTP errors. These are are errors that are returned from the API server.
|
|
160
|
+
- How to handle retries for this case will be application specific.
|
|
161
|
+
2. Errors when the API server may not be reachable (DNS resolution / network issues)
|
|
162
|
+
- In this scenario, we know that something external to the API server is going wrong causing the issue.
|
|
163
|
+
- Failing pre-maturely in the case might not be the best course of action since critical user jobs might crash on intermittent issues.
|
|
164
|
+
- So in this case, we can just planely retry the request.
|
|
165
|
+
|
|
166
|
+
This function handles the second case. It's a simple wrapper to handle the retry logic for connection errors.
|
|
167
|
+
If this function is provided a `conn_error_retries` of 5, then the last retry will have waited 32 seconds.
|
|
168
|
+
Generally this is a safe enough number of retries after which we can assume that something is really broken. Until then,
|
|
169
|
+
there can be intermittent issues that would resolve themselves if we retry gracefully.
|
|
170
|
+
"""
|
|
171
|
+
_num_retries = 0
|
|
172
|
+
noise = random.uniform(-0.5, 0.5)
|
|
173
|
+
while _num_retries < conn_error_retries:
|
|
174
|
+
try:
|
|
175
|
+
return requests.get(*args, **kwargs)
|
|
176
|
+
except requests.exceptions.ConnectionError:
|
|
177
|
+
if _num_retries <= conn_error_retries - 1:
|
|
178
|
+
# Exponential backoff with 2^(_num_retries+1) seconds
|
|
179
|
+
time.sleep((2 ** (_num_retries + 1)) + noise)
|
|
180
|
+
_num_retries += 1
|
|
181
|
+
else:
|
|
182
|
+
print(
|
|
183
|
+
"[@snowflake] Failed to connect to the API server. ",
|
|
184
|
+
file=sys.stderr,
|
|
185
|
+
)
|
|
186
|
+
raise
|
|
187
|
+
|
|
188
|
+
|
|
153
189
|
class Snowflake:
|
|
154
190
|
def __init__(
|
|
155
191
|
self, user: str = "", role: str = "", integration: str = "", **kwargs
|
|
@@ -273,7 +309,9 @@ class SnowflakeIntegrationProvisioner:
|
|
|
273
309
|
retryable_status_codes = [409]
|
|
274
310
|
json_payload = json.dumps(payload)
|
|
275
311
|
for attempt in range(2): # 0 = initial attempt, 1-2 = retries
|
|
276
|
-
response =
|
|
312
|
+
response = _api_server_get(
|
|
313
|
+
url, data=json_payload, headers=request_headers, conn_error_retries=5
|
|
314
|
+
)
|
|
277
315
|
if response.status_code not in retryable_status_codes:
|
|
278
316
|
break
|
|
279
317
|
|
|
@@ -281,7 +319,9 @@ class SnowflakeIntegrationProvisioner:
|
|
|
281
319
|
sleep_time = 0.5 * (attempt + 1)
|
|
282
320
|
time.sleep(sleep_time)
|
|
283
321
|
|
|
284
|
-
response =
|
|
322
|
+
response = _api_server_get(
|
|
323
|
+
url, data=json_payload, headers=request_headers, conn_error_retries=5
|
|
324
|
+
)
|
|
285
325
|
self._handle_error_response(response)
|
|
286
326
|
return response.json()
|
|
287
327
|
|
|
@@ -15,6 +15,9 @@ metaflow_extensions/outerbounds/plugins/card_utilities/__init__.py
|
|
|
15
15
|
metaflow_extensions/outerbounds/plugins/card_utilities/async_cards.py
|
|
16
16
|
metaflow_extensions/outerbounds/plugins/card_utilities/extra_components.py
|
|
17
17
|
metaflow_extensions/outerbounds/plugins/card_utilities/injector.py
|
|
18
|
+
metaflow_extensions/outerbounds/plugins/checkpoint_datastores/__init__.py
|
|
19
|
+
metaflow_extensions/outerbounds/plugins/checkpoint_datastores/coreweave.py
|
|
20
|
+
metaflow_extensions/outerbounds/plugins/checkpoint_datastores/nebius.py
|
|
18
21
|
metaflow_extensions/outerbounds/plugins/fast_bakery/__init__.py
|
|
19
22
|
metaflow_extensions/outerbounds/plugins/fast_bakery/docker_environment.py
|
|
20
23
|
metaflow_extensions/outerbounds/plugins/fast_bakery/fast_bakery.py
|
|
@@ -2,7 +2,7 @@ from setuptools import setup, find_namespace_packages
|
|
|
2
2
|
from pathlib import Path
|
|
3
3
|
|
|
4
4
|
|
|
5
|
-
version = "1.1.
|
|
5
|
+
version = "1.1.148"
|
|
6
6
|
this_directory = Path(__file__).parent
|
|
7
7
|
long_description = (this_directory / "README.md").read_text()
|
|
8
8
|
|
|
@@ -18,6 +18,6 @@ setup(
|
|
|
18
18
|
install_requires=[
|
|
19
19
|
"boto3",
|
|
20
20
|
"kubernetes",
|
|
21
|
-
"ob-metaflow == 2.15.
|
|
21
|
+
"ob-metaflow == 2.15.10.1",
|
|
22
22
|
],
|
|
23
23
|
)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|