ob-metaflow-extensions 1.1.145__tar.gz → 1.1.147__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ob-metaflow-extensions might be problematic. Click here for more details.
- {ob-metaflow-extensions-1.1.145 → ob-metaflow-extensions-1.1.147}/PKG-INFO +1 -1
- ob-metaflow-extensions-1.1.147/metaflow_extensions/outerbounds/plugins/checkpoint_datastores/__init__.py +2 -0
- ob-metaflow-extensions-1.1.147/metaflow_extensions/outerbounds/plugins/checkpoint_datastores/coreweave.py +139 -0
- ob-metaflow-extensions-1.1.147/metaflow_extensions/outerbounds/plugins/checkpoint_datastores/nebius.py +144 -0
- {ob-metaflow-extensions-1.1.145 → ob-metaflow-extensions-1.1.147}/metaflow_extensions/outerbounds/plugins/nvcf/nvcf.py +8 -15
- {ob-metaflow-extensions-1.1.145 → ob-metaflow-extensions-1.1.147}/metaflow_extensions/outerbounds/plugins/nvcf/nvcf_decorator.py +6 -1
- {ob-metaflow-extensions-1.1.145 → ob-metaflow-extensions-1.1.147}/metaflow_extensions/outerbounds/toplevel/global_aliases_for_metaflow_package.py +1 -0
- {ob-metaflow-extensions-1.1.145 → ob-metaflow-extensions-1.1.147}/ob_metaflow_extensions.egg-info/PKG-INFO +1 -1
- {ob-metaflow-extensions-1.1.145 → ob-metaflow-extensions-1.1.147}/ob_metaflow_extensions.egg-info/SOURCES.txt +3 -0
- {ob-metaflow-extensions-1.1.145 → ob-metaflow-extensions-1.1.147}/setup.py +1 -1
- {ob-metaflow-extensions-1.1.145 → ob-metaflow-extensions-1.1.147}/README.md +0 -0
- {ob-metaflow-extensions-1.1.145 → ob-metaflow-extensions-1.1.147}/metaflow_extensions/outerbounds/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.145 → ob-metaflow-extensions-1.1.147}/metaflow_extensions/outerbounds/config/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.145 → ob-metaflow-extensions-1.1.147}/metaflow_extensions/outerbounds/plugins/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.145 → ob-metaflow-extensions-1.1.147}/metaflow_extensions/outerbounds/plugins/apps/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.145 → ob-metaflow-extensions-1.1.147}/metaflow_extensions/outerbounds/plugins/apps/app_utils.py +0 -0
- {ob-metaflow-extensions-1.1.145 → ob-metaflow-extensions-1.1.147}/metaflow_extensions/outerbounds/plugins/apps/consts.py +0 -0
- {ob-metaflow-extensions-1.1.145 → ob-metaflow-extensions-1.1.147}/metaflow_extensions/outerbounds/plugins/apps/deploy_decorator.py +0 -0
- {ob-metaflow-extensions-1.1.145 → ob-metaflow-extensions-1.1.147}/metaflow_extensions/outerbounds/plugins/apps/supervisord_utils.py +0 -0
- {ob-metaflow-extensions-1.1.145 → ob-metaflow-extensions-1.1.147}/metaflow_extensions/outerbounds/plugins/auth_server.py +0 -0
- {ob-metaflow-extensions-1.1.145 → ob-metaflow-extensions-1.1.147}/metaflow_extensions/outerbounds/plugins/card_utilities/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.145 → ob-metaflow-extensions-1.1.147}/metaflow_extensions/outerbounds/plugins/card_utilities/async_cards.py +0 -0
- {ob-metaflow-extensions-1.1.145 → ob-metaflow-extensions-1.1.147}/metaflow_extensions/outerbounds/plugins/card_utilities/extra_components.py +0 -0
- {ob-metaflow-extensions-1.1.145 → ob-metaflow-extensions-1.1.147}/metaflow_extensions/outerbounds/plugins/card_utilities/injector.py +0 -0
- {ob-metaflow-extensions-1.1.145 → ob-metaflow-extensions-1.1.147}/metaflow_extensions/outerbounds/plugins/fast_bakery/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.145 → ob-metaflow-extensions-1.1.147}/metaflow_extensions/outerbounds/plugins/fast_bakery/docker_environment.py +0 -0
- {ob-metaflow-extensions-1.1.145 → ob-metaflow-extensions-1.1.147}/metaflow_extensions/outerbounds/plugins/fast_bakery/fast_bakery.py +0 -0
- {ob-metaflow-extensions-1.1.145 → ob-metaflow-extensions-1.1.147}/metaflow_extensions/outerbounds/plugins/fast_bakery/fast_bakery_cli.py +0 -0
- {ob-metaflow-extensions-1.1.145 → ob-metaflow-extensions-1.1.147}/metaflow_extensions/outerbounds/plugins/fast_bakery/fast_bakery_decorator.py +0 -0
- {ob-metaflow-extensions-1.1.145 → ob-metaflow-extensions-1.1.147}/metaflow_extensions/outerbounds/plugins/kubernetes/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.145 → ob-metaflow-extensions-1.1.147}/metaflow_extensions/outerbounds/plugins/kubernetes/kubernetes_client.py +0 -0
- {ob-metaflow-extensions-1.1.145 → ob-metaflow-extensions-1.1.147}/metaflow_extensions/outerbounds/plugins/nim/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.145 → ob-metaflow-extensions-1.1.147}/metaflow_extensions/outerbounds/plugins/nim/card.py +0 -0
- {ob-metaflow-extensions-1.1.145 → ob-metaflow-extensions-1.1.147}/metaflow_extensions/outerbounds/plugins/nim/nim_manager.py +0 -0
- {ob-metaflow-extensions-1.1.145 → ob-metaflow-extensions-1.1.147}/metaflow_extensions/outerbounds/plugins/nim/utilities.py +0 -0
- {ob-metaflow-extensions-1.1.145 → ob-metaflow-extensions-1.1.147}/metaflow_extensions/outerbounds/plugins/nvcf/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.145 → ob-metaflow-extensions-1.1.147}/metaflow_extensions/outerbounds/plugins/nvcf/constants.py +0 -0
- {ob-metaflow-extensions-1.1.145 → ob-metaflow-extensions-1.1.147}/metaflow_extensions/outerbounds/plugins/nvcf/exceptions.py +0 -0
- {ob-metaflow-extensions-1.1.145 → ob-metaflow-extensions-1.1.147}/metaflow_extensions/outerbounds/plugins/nvcf/heartbeat_store.py +0 -0
- {ob-metaflow-extensions-1.1.145 → ob-metaflow-extensions-1.1.147}/metaflow_extensions/outerbounds/plugins/nvcf/nvcf_cli.py +0 -0
- {ob-metaflow-extensions-1.1.145 → ob-metaflow-extensions-1.1.147}/metaflow_extensions/outerbounds/plugins/nvcf/utils.py +0 -0
- {ob-metaflow-extensions-1.1.145 → ob-metaflow-extensions-1.1.147}/metaflow_extensions/outerbounds/plugins/ollama/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.145 → ob-metaflow-extensions-1.1.147}/metaflow_extensions/outerbounds/plugins/ollama/ollama.py +0 -0
- {ob-metaflow-extensions-1.1.145 → ob-metaflow-extensions-1.1.147}/metaflow_extensions/outerbounds/plugins/perimeters.py +0 -0
- {ob-metaflow-extensions-1.1.145 → ob-metaflow-extensions-1.1.147}/metaflow_extensions/outerbounds/plugins/profilers/deco_injector.py +0 -0
- {ob-metaflow-extensions-1.1.145 → ob-metaflow-extensions-1.1.147}/metaflow_extensions/outerbounds/plugins/profilers/gpu_profile_decorator.py +0 -0
- {ob-metaflow-extensions-1.1.145 → ob-metaflow-extensions-1.1.147}/metaflow_extensions/outerbounds/plugins/secrets/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.145 → ob-metaflow-extensions-1.1.147}/metaflow_extensions/outerbounds/plugins/secrets/secrets.py +0 -0
- {ob-metaflow-extensions-1.1.145 → ob-metaflow-extensions-1.1.147}/metaflow_extensions/outerbounds/plugins/snowflake/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.145 → ob-metaflow-extensions-1.1.147}/metaflow_extensions/outerbounds/plugins/snowflake/snowflake.py +0 -0
- {ob-metaflow-extensions-1.1.145 → ob-metaflow-extensions-1.1.147}/metaflow_extensions/outerbounds/plugins/snowpark/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.145 → ob-metaflow-extensions-1.1.147}/metaflow_extensions/outerbounds/plugins/snowpark/snowpark.py +0 -0
- {ob-metaflow-extensions-1.1.145 → ob-metaflow-extensions-1.1.147}/metaflow_extensions/outerbounds/plugins/snowpark/snowpark_cli.py +0 -0
- {ob-metaflow-extensions-1.1.145 → ob-metaflow-extensions-1.1.147}/metaflow_extensions/outerbounds/plugins/snowpark/snowpark_client.py +0 -0
- {ob-metaflow-extensions-1.1.145 → ob-metaflow-extensions-1.1.147}/metaflow_extensions/outerbounds/plugins/snowpark/snowpark_decorator.py +0 -0
- {ob-metaflow-extensions-1.1.145 → ob-metaflow-extensions-1.1.147}/metaflow_extensions/outerbounds/plugins/snowpark/snowpark_exceptions.py +0 -0
- {ob-metaflow-extensions-1.1.145 → ob-metaflow-extensions-1.1.147}/metaflow_extensions/outerbounds/plugins/snowpark/snowpark_job.py +0 -0
- {ob-metaflow-extensions-1.1.145 → ob-metaflow-extensions-1.1.147}/metaflow_extensions/outerbounds/plugins/snowpark/snowpark_service_spec.py +0 -0
- {ob-metaflow-extensions-1.1.145 → ob-metaflow-extensions-1.1.147}/metaflow_extensions/outerbounds/plugins/tensorboard/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.145 → ob-metaflow-extensions-1.1.147}/metaflow_extensions/outerbounds/profilers/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.145 → ob-metaflow-extensions-1.1.147}/metaflow_extensions/outerbounds/profilers/gpu.py +0 -0
- {ob-metaflow-extensions-1.1.145 → ob-metaflow-extensions-1.1.147}/metaflow_extensions/outerbounds/remote_config.py +0 -0
- {ob-metaflow-extensions-1.1.145 → ob-metaflow-extensions-1.1.147}/metaflow_extensions/outerbounds/toplevel/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.145 → ob-metaflow-extensions-1.1.147}/metaflow_extensions/outerbounds/toplevel/plugins/azure/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.145 → ob-metaflow-extensions-1.1.147}/metaflow_extensions/outerbounds/toplevel/plugins/gcp/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.145 → ob-metaflow-extensions-1.1.147}/metaflow_extensions/outerbounds/toplevel/plugins/kubernetes/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.145 → ob-metaflow-extensions-1.1.147}/metaflow_extensions/outerbounds/toplevel/plugins/ollama/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.145 → ob-metaflow-extensions-1.1.147}/metaflow_extensions/outerbounds/toplevel/plugins/snowflake/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.145 → ob-metaflow-extensions-1.1.147}/ob_metaflow_extensions.egg-info/dependency_links.txt +0 -0
- {ob-metaflow-extensions-1.1.145 → ob-metaflow-extensions-1.1.147}/ob_metaflow_extensions.egg-info/requires.txt +0 -0
- {ob-metaflow-extensions-1.1.145 → ob-metaflow-extensions-1.1.147}/ob_metaflow_extensions.egg-info/top_level.txt +0 -0
- {ob-metaflow-extensions-1.1.145 → ob-metaflow-extensions-1.1.147}/setup.cfg +0 -0
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
from metaflow.user_configs.config_decorators import (
|
|
2
|
+
MutableFlow,
|
|
3
|
+
MutableStep,
|
|
4
|
+
CustomFlowDecorator,
|
|
5
|
+
)
|
|
6
|
+
import os
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class coreweave_checkpoints(CustomFlowDecorator):
|
|
10
|
+
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
This decorator is used for setting the coreweave object store as the artifact store for checkpoints/models created by the flow.
|
|
14
|
+
|
|
15
|
+
Parameters
|
|
16
|
+
----------
|
|
17
|
+
secrets: list
|
|
18
|
+
A list of secrets to be added to the step. These secrets should contain any secrets that are required globally and the secret
|
|
19
|
+
for the coreweave object store. The secret should contain the following keys:
|
|
20
|
+
- COREWEAVE_ACCESS_KEY
|
|
21
|
+
- COREWEAVE_SECRET_KEY
|
|
22
|
+
|
|
23
|
+
bucket_path: str
|
|
24
|
+
The path to the bucket to store the checkpoints/models.
|
|
25
|
+
|
|
26
|
+
Usage
|
|
27
|
+
-----
|
|
28
|
+
```python
|
|
29
|
+
from metaflow import checkpoint, step, FlowSpec, coreweave_checkpoints
|
|
30
|
+
|
|
31
|
+
@coreweave_checkpoints(secrets=[], bucket_path=None)
|
|
32
|
+
class MyFlow(FlowSpec):
|
|
33
|
+
@checkpoint
|
|
34
|
+
@step
|
|
35
|
+
def start(self):
|
|
36
|
+
# Saves the checkpoint in the coreweave object store
|
|
37
|
+
current.checkpoint.save("./foo.txt")
|
|
38
|
+
|
|
39
|
+
@step
|
|
40
|
+
def end(self):
|
|
41
|
+
pass
|
|
42
|
+
```
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
def __init__(self, *args, **kwargs):
|
|
46
|
+
super().__init__(*args, **kwargs)
|
|
47
|
+
|
|
48
|
+
def init(self, *args, **kwargs):
|
|
49
|
+
self.bucket_path = kwargs.get("bucket_path", None)
|
|
50
|
+
|
|
51
|
+
self.secrets = kwargs.get("secrets", [])
|
|
52
|
+
if self.bucket_path is None:
|
|
53
|
+
raise ValueError(
|
|
54
|
+
"`bucket_path` keyword argument is required for the coreweave_datastore"
|
|
55
|
+
)
|
|
56
|
+
if not self.bucket_path.startswith("s3://"):
|
|
57
|
+
raise ValueError(
|
|
58
|
+
"`bucket_path` must start with `s3://` for the coreweave_datastore"
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
self.coreweave_endpoint_url = f"https://cwobject.com"
|
|
62
|
+
if self.secrets is None:
|
|
63
|
+
raise ValueError(
|
|
64
|
+
"`secrets` keyword argument is required for the coreweave_datastore"
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
def evaluate(self, mutable_flow: MutableFlow) -> None:
|
|
68
|
+
from metaflow import (
|
|
69
|
+
checkpoint,
|
|
70
|
+
model,
|
|
71
|
+
huggingface_hub,
|
|
72
|
+
secrets,
|
|
73
|
+
with_artifact_store,
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
def _add_secrets(step: MutableStep) -> None:
|
|
77
|
+
decos_to_add = []
|
|
78
|
+
swapping_decos = {
|
|
79
|
+
"huggingface_hub": huggingface_hub,
|
|
80
|
+
"model": model,
|
|
81
|
+
"checkpoint": checkpoint,
|
|
82
|
+
}
|
|
83
|
+
already_has_secrets = False
|
|
84
|
+
secrets_present_in_deco = []
|
|
85
|
+
for d in step.decorators:
|
|
86
|
+
if d.name in swapping_decos:
|
|
87
|
+
decos_to_add.append((d.name, d.attributes))
|
|
88
|
+
elif d.name == "secrets":
|
|
89
|
+
already_has_secrets = True
|
|
90
|
+
secrets_present_in_deco.extend(d.attributes["sources"])
|
|
91
|
+
|
|
92
|
+
# If the step aleady has secrets then take all the sources in
|
|
93
|
+
# the secrets and add the addtional secrets to the existing secrets
|
|
94
|
+
secrets_to_add = self.secrets
|
|
95
|
+
if already_has_secrets:
|
|
96
|
+
secrets_to_add.extend(secrets_present_in_deco)
|
|
97
|
+
|
|
98
|
+
secrets_to_add = list(set(secrets_to_add))
|
|
99
|
+
|
|
100
|
+
if len(decos_to_add) == 0:
|
|
101
|
+
if already_has_secrets:
|
|
102
|
+
step.remove_decorator("secrets")
|
|
103
|
+
|
|
104
|
+
step.add_decorator(
|
|
105
|
+
secrets,
|
|
106
|
+
sources=secrets_to_add,
|
|
107
|
+
)
|
|
108
|
+
return
|
|
109
|
+
|
|
110
|
+
for d, _ in decos_to_add:
|
|
111
|
+
step.remove_decorator(d)
|
|
112
|
+
|
|
113
|
+
step.add_decorator(
|
|
114
|
+
secrets,
|
|
115
|
+
sources=secrets_to_add,
|
|
116
|
+
)
|
|
117
|
+
for d, attrs in decos_to_add:
|
|
118
|
+
_deco_to_add = swapping_decos[d]
|
|
119
|
+
step.add_decorator(_deco_to_add, **attrs)
|
|
120
|
+
|
|
121
|
+
def _coreweave_config():
|
|
122
|
+
return {
|
|
123
|
+
"root": self.bucket_path,
|
|
124
|
+
"client_params": {
|
|
125
|
+
"aws_access_key_id": os.environ.get("COREWEAVE_ACCESS_KEY"),
|
|
126
|
+
"aws_secret_access_key": os.environ.get("COREWEAVE_SECRET_KEY"),
|
|
127
|
+
"endpoint_url": self.coreweave_endpoint_url,
|
|
128
|
+
"config": dict(s3={"addressing_style": "virtual"}),
|
|
129
|
+
},
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
mutable_flow.add_decorator(
|
|
133
|
+
with_artifact_store,
|
|
134
|
+
type="coreweave",
|
|
135
|
+
config=_coreweave_config,
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
for step_name, step in mutable_flow.steps:
|
|
139
|
+
_add_secrets(step)
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
from metaflow.user_configs.config_decorators import (
|
|
2
|
+
MutableFlow,
|
|
3
|
+
MutableStep,
|
|
4
|
+
CustomFlowDecorator,
|
|
5
|
+
)
|
|
6
|
+
import os
|
|
7
|
+
|
|
8
|
+
NEBIUS_ENDPOINT_URL = "https://storage.eu-north1.nebius.cloud:443"
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class nebius_checkpoints(CustomFlowDecorator):
|
|
12
|
+
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
This decorator is used for setting the nebius's S3 compatible object store as the artifact store for
|
|
16
|
+
checkpoints/models created by the flow.
|
|
17
|
+
|
|
18
|
+
Parameters
|
|
19
|
+
----------
|
|
20
|
+
secrets: list
|
|
21
|
+
A list of secrets to be added to the step. These secrets should contain any secrets that are required globally and the secret
|
|
22
|
+
for the nebius object store. The secret should contain the following keys:
|
|
23
|
+
- NEBIUS_ACCESS_KEY
|
|
24
|
+
- NEBIUS_SECRET_KEY
|
|
25
|
+
|
|
26
|
+
bucket_path: str
|
|
27
|
+
The path to the bucket to store the checkpoints/models.
|
|
28
|
+
|
|
29
|
+
endpoint_url: str
|
|
30
|
+
The endpoint url for the nebius object store. Defaults to `https://storage.eu-north1.nebius.cloud:443`
|
|
31
|
+
|
|
32
|
+
Usage
|
|
33
|
+
-----
|
|
34
|
+
```python
|
|
35
|
+
from metaflow import checkpoint, step, FlowSpec, nebius_checkpoints
|
|
36
|
+
|
|
37
|
+
@nebius_checkpoints(secrets=[], bucket_path=None)
|
|
38
|
+
class MyFlow(FlowSpec):
|
|
39
|
+
@checkpoint
|
|
40
|
+
@step
|
|
41
|
+
def start(self):
|
|
42
|
+
# Saves the checkpoint in the nebius object store
|
|
43
|
+
current.checkpoint.save("./foo.txt")
|
|
44
|
+
|
|
45
|
+
@step
|
|
46
|
+
def end(self):
|
|
47
|
+
pass
|
|
48
|
+
```
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
def __init__(self, *args, **kwargs):
|
|
52
|
+
super().__init__(*args, **kwargs)
|
|
53
|
+
|
|
54
|
+
def init(self, *args, **kwargs):
|
|
55
|
+
self.bucket_path = kwargs.get("bucket_path", None)
|
|
56
|
+
|
|
57
|
+
self.secrets = kwargs.get("secrets", [])
|
|
58
|
+
if self.bucket_path is None:
|
|
59
|
+
raise ValueError(
|
|
60
|
+
"`bucket_path` keyword argument is required for the coreweave_datastore"
|
|
61
|
+
)
|
|
62
|
+
if not self.bucket_path.startswith("s3://"):
|
|
63
|
+
raise ValueError(
|
|
64
|
+
"`bucket_path` must start with `s3://` for the coreweave_datastore"
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
self.nebius_endpoint_url = kwargs.get("endpoint_url", NEBIUS_ENDPOINT_URL)
|
|
68
|
+
if self.secrets is None:
|
|
69
|
+
raise ValueError(
|
|
70
|
+
"`secrets` keyword argument is required for the coreweave_datastore"
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
def evaluate(self, mutable_flow: MutableFlow) -> None:
|
|
74
|
+
from metaflow import (
|
|
75
|
+
checkpoint,
|
|
76
|
+
model,
|
|
77
|
+
huggingface_hub,
|
|
78
|
+
secrets,
|
|
79
|
+
with_artifact_store,
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
def _add_secrets(step: MutableStep) -> None:
|
|
83
|
+
decos_to_add = []
|
|
84
|
+
swapping_decos = {
|
|
85
|
+
"huggingface_hub": huggingface_hub,
|
|
86
|
+
"model": model,
|
|
87
|
+
"checkpoint": checkpoint,
|
|
88
|
+
}
|
|
89
|
+
already_has_secrets = False
|
|
90
|
+
secrets_present_in_deco = []
|
|
91
|
+
for d in step.decorators:
|
|
92
|
+
if d.name in swapping_decos:
|
|
93
|
+
decos_to_add.append((d.name, d.attributes))
|
|
94
|
+
elif d.name == "secrets":
|
|
95
|
+
already_has_secrets = True
|
|
96
|
+
secrets_present_in_deco.extend(d.attributes["sources"])
|
|
97
|
+
|
|
98
|
+
# If the step aleady has secrets then take all the sources in
|
|
99
|
+
# the secrets and add the addtional secrets to the existing secrets
|
|
100
|
+
secrets_to_add = self.secrets
|
|
101
|
+
if already_has_secrets:
|
|
102
|
+
secrets_to_add.extend(secrets_present_in_deco)
|
|
103
|
+
|
|
104
|
+
secrets_to_add = list(set(secrets_to_add))
|
|
105
|
+
|
|
106
|
+
if len(decos_to_add) == 0:
|
|
107
|
+
if already_has_secrets:
|
|
108
|
+
step.remove_decorator("secrets")
|
|
109
|
+
|
|
110
|
+
step.add_decorator(
|
|
111
|
+
secrets,
|
|
112
|
+
sources=secrets_to_add,
|
|
113
|
+
)
|
|
114
|
+
return
|
|
115
|
+
|
|
116
|
+
for d, _ in decos_to_add:
|
|
117
|
+
step.remove_decorator(d)
|
|
118
|
+
|
|
119
|
+
step.add_decorator(
|
|
120
|
+
secrets,
|
|
121
|
+
sources=secrets_to_add,
|
|
122
|
+
)
|
|
123
|
+
for d, attrs in decos_to_add:
|
|
124
|
+
_deco_to_add = swapping_decos[d]
|
|
125
|
+
step.add_decorator(_deco_to_add, **attrs)
|
|
126
|
+
|
|
127
|
+
def _nebius_config():
|
|
128
|
+
return {
|
|
129
|
+
"root": self.bucket_path,
|
|
130
|
+
"client_params": {
|
|
131
|
+
"aws_access_key_id": os.environ.get("NEBIUS_ACCESS_KEY"),
|
|
132
|
+
"aws_secret_access_key": os.environ.get("NEBIUS_SECRET_KEY"),
|
|
133
|
+
"endpoint_url": self.nebius_endpoint_url,
|
|
134
|
+
},
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
mutable_flow.add_decorator(
|
|
138
|
+
with_artifact_store,
|
|
139
|
+
type="s3",
|
|
140
|
+
config=_nebius_config,
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
for step_name, step in mutable_flow.steps:
|
|
144
|
+
_add_secrets(step)
|
|
@@ -153,7 +153,7 @@ class Nvcf(object):
|
|
|
153
153
|
|
|
154
154
|
def wait(self, stdout_location, stderr_location, echo=None):
|
|
155
155
|
def wait_for_launch(job):
|
|
156
|
-
status = job.
|
|
156
|
+
status = job._status
|
|
157
157
|
echo(
|
|
158
158
|
"Task status: %s..." % status,
|
|
159
159
|
"stderr",
|
|
@@ -215,7 +215,7 @@ class Job(object):
|
|
|
215
215
|
self._function_id = function_id
|
|
216
216
|
self._ngc_api_key = ngc_api_key
|
|
217
217
|
self._queue_timeout = queue_timeout
|
|
218
|
-
self._poll_seconds = "
|
|
218
|
+
self._poll_seconds = "3600"
|
|
219
219
|
|
|
220
220
|
# Initialize status and tracking variables
|
|
221
221
|
self._status = JobStatus.CREATED
|
|
@@ -299,7 +299,7 @@ class Job(object):
|
|
|
299
299
|
polling_thread.start()
|
|
300
300
|
|
|
301
301
|
def _long_poll_loop(self):
|
|
302
|
-
while self._long_polling_active and self.
|
|
302
|
+
while self._long_polling_active and self._status not in terminal_states:
|
|
303
303
|
try:
|
|
304
304
|
self._poll()
|
|
305
305
|
# No sleep needed - the request itself will block for up to self._poll_seconds
|
|
@@ -310,16 +310,6 @@ class Job(object):
|
|
|
310
310
|
|
|
311
311
|
self._long_polling_active = False
|
|
312
312
|
|
|
313
|
-
@property
|
|
314
|
-
def status(self):
|
|
315
|
-
# If status is already terminal, don't poll again
|
|
316
|
-
if self._status in terminal_states:
|
|
317
|
-
return self._status
|
|
318
|
-
|
|
319
|
-
# Return cached status - no need to poll
|
|
320
|
-
# Long polling loop will update the status
|
|
321
|
-
return self._status
|
|
322
|
-
|
|
323
313
|
@property
|
|
324
314
|
def id(self):
|
|
325
315
|
return self._invocation_id
|
|
@@ -327,11 +317,11 @@ class Job(object):
|
|
|
327
317
|
@property
|
|
328
318
|
def is_running(self):
|
|
329
319
|
# Job is running if it's in SUBMITTED or POLLED state
|
|
330
|
-
return self.
|
|
320
|
+
return self._status in [JobStatus.SUBMITTED, JobStatus.POLLED]
|
|
331
321
|
|
|
332
322
|
@property
|
|
333
323
|
def has_failed(self):
|
|
334
|
-
return self.
|
|
324
|
+
return self._status == JobStatus.FAILED
|
|
335
325
|
|
|
336
326
|
@property
|
|
337
327
|
def result(self):
|
|
@@ -416,6 +406,9 @@ class Job(object):
|
|
|
416
406
|
)
|
|
417
407
|
self._status = JobStatus.FAILED
|
|
418
408
|
raise NvcfPollingConnectionError(e)
|
|
409
|
+
elif e.code in [500, 504]:
|
|
410
|
+
# Don't set status to FAILED, just re-raise for retry decorator
|
|
411
|
+
raise
|
|
419
412
|
else:
|
|
420
413
|
self._status = JobStatus.FAILED
|
|
421
414
|
raise NvcfPollingConnectionError(e)
|
|
@@ -198,7 +198,12 @@ class NvcfDecorator(StepDecorator):
|
|
|
198
198
|
meta["nvcf-nspectid"] = os.environ.get("NVCF_NSPECTID")
|
|
199
199
|
|
|
200
200
|
entries = [
|
|
201
|
-
MetaDatum(
|
|
201
|
+
MetaDatum(
|
|
202
|
+
field=k,
|
|
203
|
+
value=v,
|
|
204
|
+
type=k,
|
|
205
|
+
tags=["attempt_id:{0}".format(retry_count)],
|
|
206
|
+
)
|
|
202
207
|
for k, v in meta.items()
|
|
203
208
|
if v is not None
|
|
204
209
|
]
|
|
@@ -15,6 +15,9 @@ metaflow_extensions/outerbounds/plugins/card_utilities/__init__.py
|
|
|
15
15
|
metaflow_extensions/outerbounds/plugins/card_utilities/async_cards.py
|
|
16
16
|
metaflow_extensions/outerbounds/plugins/card_utilities/extra_components.py
|
|
17
17
|
metaflow_extensions/outerbounds/plugins/card_utilities/injector.py
|
|
18
|
+
metaflow_extensions/outerbounds/plugins/checkpoint_datastores/__init__.py
|
|
19
|
+
metaflow_extensions/outerbounds/plugins/checkpoint_datastores/coreweave.py
|
|
20
|
+
metaflow_extensions/outerbounds/plugins/checkpoint_datastores/nebius.py
|
|
18
21
|
metaflow_extensions/outerbounds/plugins/fast_bakery/__init__.py
|
|
19
22
|
metaflow_extensions/outerbounds/plugins/fast_bakery/docker_environment.py
|
|
20
23
|
metaflow_extensions/outerbounds/plugins/fast_bakery/fast_bakery.py
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|