ob-metaflow-extensions 1.1.146__tar.gz → 1.1.148__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ob-metaflow-extensions might be problematic. Click here for more details.

Files changed (73) hide show
  1. {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/PKG-INFO +1 -1
  2. ob-metaflow-extensions-1.1.148/metaflow_extensions/outerbounds/plugins/checkpoint_datastores/__init__.py +2 -0
  3. ob-metaflow-extensions-1.1.148/metaflow_extensions/outerbounds/plugins/checkpoint_datastores/coreweave.py +139 -0
  4. ob-metaflow-extensions-1.1.148/metaflow_extensions/outerbounds/plugins/checkpoint_datastores/nebius.py +144 -0
  5. {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/plugins/nvcf/nvcf_decorator.py +6 -1
  6. {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/plugins/snowflake/snowflake.py +44 -4
  7. {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/toplevel/global_aliases_for_metaflow_package.py +1 -0
  8. {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/ob_metaflow_extensions.egg-info/PKG-INFO +1 -1
  9. {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/ob_metaflow_extensions.egg-info/SOURCES.txt +3 -0
  10. ob-metaflow-extensions-1.1.148/ob_metaflow_extensions.egg-info/requires.txt +3 -0
  11. {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/setup.py +2 -2
  12. ob-metaflow-extensions-1.1.146/ob_metaflow_extensions.egg-info/requires.txt +0 -3
  13. {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/README.md +0 -0
  14. {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/__init__.py +0 -0
  15. {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/config/__init__.py +0 -0
  16. {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/plugins/__init__.py +0 -0
  17. {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/plugins/apps/__init__.py +0 -0
  18. {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/plugins/apps/app_utils.py +0 -0
  19. {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/plugins/apps/consts.py +0 -0
  20. {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/plugins/apps/deploy_decorator.py +0 -0
  21. {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/plugins/apps/supervisord_utils.py +0 -0
  22. {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/plugins/auth_server.py +0 -0
  23. {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/plugins/card_utilities/__init__.py +0 -0
  24. {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/plugins/card_utilities/async_cards.py +0 -0
  25. {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/plugins/card_utilities/extra_components.py +0 -0
  26. {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/plugins/card_utilities/injector.py +0 -0
  27. {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/plugins/fast_bakery/__init__.py +0 -0
  28. {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/plugins/fast_bakery/docker_environment.py +0 -0
  29. {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/plugins/fast_bakery/fast_bakery.py +0 -0
  30. {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/plugins/fast_bakery/fast_bakery_cli.py +0 -0
  31. {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/plugins/fast_bakery/fast_bakery_decorator.py +0 -0
  32. {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/plugins/kubernetes/__init__.py +0 -0
  33. {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/plugins/kubernetes/kubernetes_client.py +0 -0
  34. {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/plugins/nim/__init__.py +0 -0
  35. {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/plugins/nim/card.py +0 -0
  36. {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/plugins/nim/nim_manager.py +0 -0
  37. {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/plugins/nim/utilities.py +0 -0
  38. {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/plugins/nvcf/__init__.py +0 -0
  39. {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/plugins/nvcf/constants.py +0 -0
  40. {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/plugins/nvcf/exceptions.py +0 -0
  41. {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/plugins/nvcf/heartbeat_store.py +0 -0
  42. {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/plugins/nvcf/nvcf.py +0 -0
  43. {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/plugins/nvcf/nvcf_cli.py +0 -0
  44. {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/plugins/nvcf/utils.py +0 -0
  45. {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/plugins/ollama/__init__.py +0 -0
  46. {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/plugins/ollama/ollama.py +0 -0
  47. {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/plugins/perimeters.py +0 -0
  48. {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/plugins/profilers/deco_injector.py +0 -0
  49. {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/plugins/profilers/gpu_profile_decorator.py +0 -0
  50. {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/plugins/secrets/__init__.py +0 -0
  51. {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/plugins/secrets/secrets.py +0 -0
  52. {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/plugins/snowflake/__init__.py +0 -0
  53. {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/plugins/snowpark/__init__.py +0 -0
  54. {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/plugins/snowpark/snowpark.py +0 -0
  55. {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/plugins/snowpark/snowpark_cli.py +0 -0
  56. {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/plugins/snowpark/snowpark_client.py +0 -0
  57. {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/plugins/snowpark/snowpark_decorator.py +0 -0
  58. {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/plugins/snowpark/snowpark_exceptions.py +0 -0
  59. {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/plugins/snowpark/snowpark_job.py +0 -0
  60. {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/plugins/snowpark/snowpark_service_spec.py +0 -0
  61. {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/plugins/tensorboard/__init__.py +0 -0
  62. {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/profilers/__init__.py +0 -0
  63. {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/profilers/gpu.py +0 -0
  64. {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/remote_config.py +0 -0
  65. {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/toplevel/__init__.py +0 -0
  66. {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/toplevel/plugins/azure/__init__.py +0 -0
  67. {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/toplevel/plugins/gcp/__init__.py +0 -0
  68. {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/toplevel/plugins/kubernetes/__init__.py +0 -0
  69. {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/toplevel/plugins/ollama/__init__.py +0 -0
  70. {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/metaflow_extensions/outerbounds/toplevel/plugins/snowflake/__init__.py +0 -0
  71. {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/ob_metaflow_extensions.egg-info/dependency_links.txt +0 -0
  72. {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/ob_metaflow_extensions.egg-info/top_level.txt +0 -0
  73. {ob-metaflow-extensions-1.1.146 → ob-metaflow-extensions-1.1.148}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ob-metaflow-extensions
3
- Version: 1.1.146
3
+ Version: 1.1.148
4
4
  Summary: Outerbounds Platform Extensions for Metaflow
5
5
  Author: Outerbounds, Inc.
6
6
  License: Commercial
@@ -0,0 +1,2 @@
1
+ from .nebius import nebius_checkpoints
2
+ from .coreweave import coreweave_checkpoints
@@ -0,0 +1,139 @@
1
+ from metaflow.user_configs.config_decorators import (
2
+ MutableFlow,
3
+ MutableStep,
4
+ CustomFlowDecorator,
5
+ )
6
+ import os
7
+
8
+
9
+ class coreweave_checkpoints(CustomFlowDecorator):
10
+
11
+ """
12
+
13
+ This decorator is used for setting the coreweave object store as the artifact store for checkpoints/models created by the flow.
14
+
15
+ Parameters
16
+ ----------
17
+ secrets: list
18
+ A list of secrets to be added to the step. These secrets should contain any secrets that are required globally and the secret
19
+ for the coreweave object store. The secret should contain the following keys:
20
+ - COREWEAVE_ACCESS_KEY
21
+ - COREWEAVE_SECRET_KEY
22
+
23
+ bucket_path: str
24
+ The path to the bucket to store the checkpoints/models.
25
+
26
+ Usage
27
+ -----
28
+ ```python
29
+ from metaflow import checkpoint, step, FlowSpec, coreweave_checkpoints
30
+
31
+ @coreweave_checkpoints(secrets=[], bucket_path=None)
32
+ class MyFlow(FlowSpec):
33
+ @checkpoint
34
+ @step
35
+ def start(self):
36
+ # Saves the checkpoint in the coreweave object store
37
+ current.checkpoint.save("./foo.txt")
38
+
39
+ @step
40
+ def end(self):
41
+ pass
42
+ ```
43
+ """
44
+
45
+ def __init__(self, *args, **kwargs):
46
+ super().__init__(*args, **kwargs)
47
+
48
+ def init(self, *args, **kwargs):
49
+ self.bucket_path = kwargs.get("bucket_path", None)
50
+
51
+ self.secrets = kwargs.get("secrets", [])
52
+ if self.bucket_path is None:
53
+ raise ValueError(
54
+ "`bucket_path` keyword argument is required for the coreweave_datastore"
55
+ )
56
+ if not self.bucket_path.startswith("s3://"):
57
+ raise ValueError(
58
+ "`bucket_path` must start with `s3://` for the coreweave_datastore"
59
+ )
60
+
61
+ self.coreweave_endpoint_url = f"https://cwobject.com"
62
+ if self.secrets is None:
63
+ raise ValueError(
64
+ "`secrets` keyword argument is required for the coreweave_datastore"
65
+ )
66
+
67
+ def evaluate(self, mutable_flow: MutableFlow) -> None:
68
+ from metaflow import (
69
+ checkpoint,
70
+ model,
71
+ huggingface_hub,
72
+ secrets,
73
+ with_artifact_store,
74
+ )
75
+
76
+ def _add_secrets(step: MutableStep) -> None:
77
+ decos_to_add = []
78
+ swapping_decos = {
79
+ "huggingface_hub": huggingface_hub,
80
+ "model": model,
81
+ "checkpoint": checkpoint,
82
+ }
83
+ already_has_secrets = False
84
+ secrets_present_in_deco = []
85
+ for d in step.decorators:
86
+ if d.name in swapping_decos:
87
+ decos_to_add.append((d.name, d.attributes))
88
+ elif d.name == "secrets":
89
+ already_has_secrets = True
90
+ secrets_present_in_deco.extend(d.attributes["sources"])
91
+
92
+ # If the step aleady has secrets then take all the sources in
93
+ # the secrets and add the addtional secrets to the existing secrets
94
+ secrets_to_add = self.secrets
95
+ if already_has_secrets:
96
+ secrets_to_add.extend(secrets_present_in_deco)
97
+
98
+ secrets_to_add = list(set(secrets_to_add))
99
+
100
+ if len(decos_to_add) == 0:
101
+ if already_has_secrets:
102
+ step.remove_decorator("secrets")
103
+
104
+ step.add_decorator(
105
+ secrets,
106
+ sources=secrets_to_add,
107
+ )
108
+ return
109
+
110
+ for d, _ in decos_to_add:
111
+ step.remove_decorator(d)
112
+
113
+ step.add_decorator(
114
+ secrets,
115
+ sources=secrets_to_add,
116
+ )
117
+ for d, attrs in decos_to_add:
118
+ _deco_to_add = swapping_decos[d]
119
+ step.add_decorator(_deco_to_add, **attrs)
120
+
121
+ def _coreweave_config():
122
+ return {
123
+ "root": self.bucket_path,
124
+ "client_params": {
125
+ "aws_access_key_id": os.environ.get("COREWEAVE_ACCESS_KEY"),
126
+ "aws_secret_access_key": os.environ.get("COREWEAVE_SECRET_KEY"),
127
+ "endpoint_url": self.coreweave_endpoint_url,
128
+ "config": dict(s3={"addressing_style": "virtual"}),
129
+ },
130
+ }
131
+
132
+ mutable_flow.add_decorator(
133
+ with_artifact_store,
134
+ type="coreweave",
135
+ config=_coreweave_config,
136
+ )
137
+
138
+ for step_name, step in mutable_flow.steps:
139
+ _add_secrets(step)
@@ -0,0 +1,144 @@
1
+ from metaflow.user_configs.config_decorators import (
2
+ MutableFlow,
3
+ MutableStep,
4
+ CustomFlowDecorator,
5
+ )
6
+ import os
7
+
8
+ NEBIUS_ENDPOINT_URL = "https://storage.eu-north1.nebius.cloud:443"
9
+
10
+
11
+ class nebius_checkpoints(CustomFlowDecorator):
12
+
13
+ """
14
+
15
+ This decorator is used for setting the nebius's S3 compatible object store as the artifact store for
16
+ checkpoints/models created by the flow.
17
+
18
+ Parameters
19
+ ----------
20
+ secrets: list
21
+ A list of secrets to be added to the step. These secrets should contain any secrets that are required globally and the secret
22
+ for the nebius object store. The secret should contain the following keys:
23
+ - NEBIUS_ACCESS_KEY
24
+ - NEBIUS_SECRET_KEY
25
+
26
+ bucket_path: str
27
+ The path to the bucket to store the checkpoints/models.
28
+
29
+ endpoint_url: str
30
+ The endpoint url for the nebius object store. Defaults to `https://storage.eu-north1.nebius.cloud:443`
31
+
32
+ Usage
33
+ -----
34
+ ```python
35
+ from metaflow import checkpoint, step, FlowSpec, nebius_checkpoints
36
+
37
+ @nebius_checkpoints(secrets=[], bucket_path=None)
38
+ class MyFlow(FlowSpec):
39
+ @checkpoint
40
+ @step
41
+ def start(self):
42
+ # Saves the checkpoint in the nebius object store
43
+ current.checkpoint.save("./foo.txt")
44
+
45
+ @step
46
+ def end(self):
47
+ pass
48
+ ```
49
+ """
50
+
51
+ def __init__(self, *args, **kwargs):
52
+ super().__init__(*args, **kwargs)
53
+
54
+ def init(self, *args, **kwargs):
55
+ self.bucket_path = kwargs.get("bucket_path", None)
56
+
57
+ self.secrets = kwargs.get("secrets", [])
58
+ if self.bucket_path is None:
59
+ raise ValueError(
60
+ "`bucket_path` keyword argument is required for the coreweave_datastore"
61
+ )
62
+ if not self.bucket_path.startswith("s3://"):
63
+ raise ValueError(
64
+ "`bucket_path` must start with `s3://` for the coreweave_datastore"
65
+ )
66
+
67
+ self.nebius_endpoint_url = kwargs.get("endpoint_url", NEBIUS_ENDPOINT_URL)
68
+ if self.secrets is None:
69
+ raise ValueError(
70
+ "`secrets` keyword argument is required for the coreweave_datastore"
71
+ )
72
+
73
+ def evaluate(self, mutable_flow: MutableFlow) -> None:
74
+ from metaflow import (
75
+ checkpoint,
76
+ model,
77
+ huggingface_hub,
78
+ secrets,
79
+ with_artifact_store,
80
+ )
81
+
82
+ def _add_secrets(step: MutableStep) -> None:
83
+ decos_to_add = []
84
+ swapping_decos = {
85
+ "huggingface_hub": huggingface_hub,
86
+ "model": model,
87
+ "checkpoint": checkpoint,
88
+ }
89
+ already_has_secrets = False
90
+ secrets_present_in_deco = []
91
+ for d in step.decorators:
92
+ if d.name in swapping_decos:
93
+ decos_to_add.append((d.name, d.attributes))
94
+ elif d.name == "secrets":
95
+ already_has_secrets = True
96
+ secrets_present_in_deco.extend(d.attributes["sources"])
97
+
98
+ # If the step aleady has secrets then take all the sources in
99
+ # the secrets and add the addtional secrets to the existing secrets
100
+ secrets_to_add = self.secrets
101
+ if already_has_secrets:
102
+ secrets_to_add.extend(secrets_present_in_deco)
103
+
104
+ secrets_to_add = list(set(secrets_to_add))
105
+
106
+ if len(decos_to_add) == 0:
107
+ if already_has_secrets:
108
+ step.remove_decorator("secrets")
109
+
110
+ step.add_decorator(
111
+ secrets,
112
+ sources=secrets_to_add,
113
+ )
114
+ return
115
+
116
+ for d, _ in decos_to_add:
117
+ step.remove_decorator(d)
118
+
119
+ step.add_decorator(
120
+ secrets,
121
+ sources=secrets_to_add,
122
+ )
123
+ for d, attrs in decos_to_add:
124
+ _deco_to_add = swapping_decos[d]
125
+ step.add_decorator(_deco_to_add, **attrs)
126
+
127
+ def _nebius_config():
128
+ return {
129
+ "root": self.bucket_path,
130
+ "client_params": {
131
+ "aws_access_key_id": os.environ.get("NEBIUS_ACCESS_KEY"),
132
+ "aws_secret_access_key": os.environ.get("NEBIUS_SECRET_KEY"),
133
+ "endpoint_url": self.nebius_endpoint_url,
134
+ },
135
+ }
136
+
137
+ mutable_flow.add_decorator(
138
+ with_artifact_store,
139
+ type="s3",
140
+ config=_nebius_config,
141
+ )
142
+
143
+ for step_name, step in mutable_flow.steps:
144
+ _add_secrets(step)
@@ -198,7 +198,12 @@ class NvcfDecorator(StepDecorator):
198
198
  meta["nvcf-nspectid"] = os.environ.get("NVCF_NSPECTID")
199
199
 
200
200
  entries = [
201
- MetaDatum(field=k, value=v, type=k, tags=[])
201
+ MetaDatum(
202
+ field=k,
203
+ value=v,
204
+ type=k,
205
+ tags=["attempt_id:{0}".format(retry_count)],
206
+ )
202
207
  for k, v in meta.items()
203
208
  if v is not None
204
209
  ]
@@ -7,9 +7,10 @@ from metaflow.metaflow_config import SERVICE_URL
7
7
  from metaflow.metaflow_config_funcs import init_config
8
8
  from typing import Dict
9
9
  from os import environ
10
-
10
+ import sys
11
11
  import json
12
12
  import requests
13
+ import random
13
14
  import time
14
15
 
15
16
 
@@ -75,7 +76,9 @@ def get_snowflake_token(user: str = "", role: str = "", integration: str = "") -
75
76
  }
76
77
  json_payload = json.dumps(payload)
77
78
  headers = provisioner.get_service_auth_header()
78
- response = requests.get(snowflake_token_url, data=json_payload, headers=headers)
79
+ response = _api_server_get(
80
+ snowflake_token_url, data=json_payload, headers=headers, conn_error_retries=5
81
+ )
79
82
  response.raise_for_status()
80
83
  return response.json()["token"]
81
84
 
@@ -150,6 +153,39 @@ def connect(user: str = "", role: str = "", integration: str = "", **kwargs):
150
153
  )
151
154
 
152
155
 
156
+ def _api_server_get(*args, conn_error_retries=2, **kwargs):
157
+ """
158
+ There are two categories of errors that we need to handle when dealing with any API server.
159
+ 1. HTTP errors. These are are errors that are returned from the API server.
160
+ - How to handle retries for this case will be application specific.
161
+ 2. Errors when the API server may not be reachable (DNS resolution / network issues)
162
+ - In this scenario, we know that something external to the API server is going wrong causing the issue.
163
+ - Failing pre-maturely in the case might not be the best course of action since critical user jobs might crash on intermittent issues.
164
+ - So in this case, we can just planely retry the request.
165
+
166
+ This function handles the second case. It's a simple wrapper to handle the retry logic for connection errors.
167
+ If this function is provided a `conn_error_retries` of 5, then the last retry will have waited 32 seconds.
168
+ Generally this is a safe enough number of retries after which we can assume that something is really broken. Until then,
169
+ there can be intermittent issues that would resolve themselves if we retry gracefully.
170
+ """
171
+ _num_retries = 0
172
+ noise = random.uniform(-0.5, 0.5)
173
+ while _num_retries < conn_error_retries:
174
+ try:
175
+ return requests.get(*args, **kwargs)
176
+ except requests.exceptions.ConnectionError:
177
+ if _num_retries <= conn_error_retries - 1:
178
+ # Exponential backoff with 2^(_num_retries+1) seconds
179
+ time.sleep((2 ** (_num_retries + 1)) + noise)
180
+ _num_retries += 1
181
+ else:
182
+ print(
183
+ "[@snowflake] Failed to connect to the API server. ",
184
+ file=sys.stderr,
185
+ )
186
+ raise
187
+
188
+
153
189
  class Snowflake:
154
190
  def __init__(
155
191
  self, user: str = "", role: str = "", integration: str = "", **kwargs
@@ -273,7 +309,9 @@ class SnowflakeIntegrationProvisioner:
273
309
  retryable_status_codes = [409]
274
310
  json_payload = json.dumps(payload)
275
311
  for attempt in range(2): # 0 = initial attempt, 1-2 = retries
276
- response = requests.get(url, data=json_payload, headers=request_headers)
312
+ response = _api_server_get(
313
+ url, data=json_payload, headers=request_headers, conn_error_retries=5
314
+ )
277
315
  if response.status_code not in retryable_status_codes:
278
316
  break
279
317
 
@@ -281,7 +319,9 @@ class SnowflakeIntegrationProvisioner:
281
319
  sleep_time = 0.5 * (attempt + 1)
282
320
  time.sleep(sleep_time)
283
321
 
284
- response = requests.get(url, data=json_payload, headers=request_headers)
322
+ response = _api_server_get(
323
+ url, data=json_payload, headers=request_headers, conn_error_retries=5
324
+ )
285
325
  self._handle_error_response(response)
286
326
  return response.json()
287
327
 
@@ -52,3 +52,4 @@ def S3(*args, **kwargs):
52
52
 
53
53
  from .. import profilers
54
54
  from ..plugins.snowflake import Snowflake
55
+ from ..plugins.checkpoint_datastores import nebius_checkpoints, coreweave_checkpoints
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ob-metaflow-extensions
3
- Version: 1.1.146
3
+ Version: 1.1.148
4
4
  Summary: Outerbounds Platform Extensions for Metaflow
5
5
  Author: Outerbounds, Inc.
6
6
  License: Commercial
@@ -15,6 +15,9 @@ metaflow_extensions/outerbounds/plugins/card_utilities/__init__.py
15
15
  metaflow_extensions/outerbounds/plugins/card_utilities/async_cards.py
16
16
  metaflow_extensions/outerbounds/plugins/card_utilities/extra_components.py
17
17
  metaflow_extensions/outerbounds/plugins/card_utilities/injector.py
18
+ metaflow_extensions/outerbounds/plugins/checkpoint_datastores/__init__.py
19
+ metaflow_extensions/outerbounds/plugins/checkpoint_datastores/coreweave.py
20
+ metaflow_extensions/outerbounds/plugins/checkpoint_datastores/nebius.py
18
21
  metaflow_extensions/outerbounds/plugins/fast_bakery/__init__.py
19
22
  metaflow_extensions/outerbounds/plugins/fast_bakery/docker_environment.py
20
23
  metaflow_extensions/outerbounds/plugins/fast_bakery/fast_bakery.py
@@ -0,0 +1,3 @@
1
+ boto3
2
+ kubernetes
3
+ ob-metaflow==2.15.10.1
@@ -2,7 +2,7 @@ from setuptools import setup, find_namespace_packages
2
2
  from pathlib import Path
3
3
 
4
4
 
5
- version = "1.1.146"
5
+ version = "1.1.148"
6
6
  this_directory = Path(__file__).parent
7
7
  long_description = (this_directory / "README.md").read_text()
8
8
 
@@ -18,6 +18,6 @@ setup(
18
18
  install_requires=[
19
19
  "boto3",
20
20
  "kubernetes",
21
- "ob-metaflow == 2.15.7.2",
21
+ "ob-metaflow == 2.15.10.1",
22
22
  ],
23
23
  )
@@ -1,3 +0,0 @@
1
- boto3
2
- kubernetes
3
- ob-metaflow==2.15.7.2