ob-metaflow-extensions 1.1.70__tar.gz → 1.1.72__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ob-metaflow-extensions might be problematic. Click here for more details.

Files changed (30) hide show
  1. {ob-metaflow-extensions-1.1.70 → ob-metaflow-extensions-1.1.72}/PKG-INFO +1 -1
  2. {ob-metaflow-extensions-1.1.70 → ob-metaflow-extensions-1.1.72}/metaflow_extensions/outerbounds/plugins/nim/nim_manager.py +60 -24
  3. {ob-metaflow-extensions-1.1.70 → ob-metaflow-extensions-1.1.72}/metaflow_extensions/outerbounds/plugins/perimeters.py +14 -3
  4. {ob-metaflow-extensions-1.1.70 → ob-metaflow-extensions-1.1.72}/ob_metaflow_extensions.egg-info/PKG-INFO +1 -1
  5. {ob-metaflow-extensions-1.1.70 → ob-metaflow-extensions-1.1.72}/setup.py +1 -1
  6. {ob-metaflow-extensions-1.1.70 → ob-metaflow-extensions-1.1.72}/README.md +0 -0
  7. {ob-metaflow-extensions-1.1.70 → ob-metaflow-extensions-1.1.72}/metaflow_extensions/outerbounds/__init__.py +0 -0
  8. {ob-metaflow-extensions-1.1.70 → ob-metaflow-extensions-1.1.72}/metaflow_extensions/outerbounds/config/__init__.py +0 -0
  9. {ob-metaflow-extensions-1.1.70 → ob-metaflow-extensions-1.1.72}/metaflow_extensions/outerbounds/plugins/__init__.py +0 -0
  10. {ob-metaflow-extensions-1.1.70 → ob-metaflow-extensions-1.1.72}/metaflow_extensions/outerbounds/plugins/auth_server.py +0 -0
  11. {ob-metaflow-extensions-1.1.70 → ob-metaflow-extensions-1.1.72}/metaflow_extensions/outerbounds/plugins/kubernetes/__init__.py +0 -0
  12. {ob-metaflow-extensions-1.1.70 → ob-metaflow-extensions-1.1.72}/metaflow_extensions/outerbounds/plugins/kubernetes/kubernetes_client.py +0 -0
  13. {ob-metaflow-extensions-1.1.70 → ob-metaflow-extensions-1.1.72}/metaflow_extensions/outerbounds/plugins/nim/__init__.py +0 -0
  14. {ob-metaflow-extensions-1.1.70 → ob-metaflow-extensions-1.1.72}/metaflow_extensions/outerbounds/plugins/nvcf/__init__.py +0 -0
  15. {ob-metaflow-extensions-1.1.70 → ob-metaflow-extensions-1.1.72}/metaflow_extensions/outerbounds/plugins/nvcf/nvcf.py +0 -0
  16. {ob-metaflow-extensions-1.1.70 → ob-metaflow-extensions-1.1.72}/metaflow_extensions/outerbounds/plugins/nvcf/nvcf_cli.py +0 -0
  17. {ob-metaflow-extensions-1.1.70 → ob-metaflow-extensions-1.1.72}/metaflow_extensions/outerbounds/plugins/nvcf/nvcf_decorator.py +0 -0
  18. {ob-metaflow-extensions-1.1.70 → ob-metaflow-extensions-1.1.72}/metaflow_extensions/outerbounds/profilers/__init__.py +0 -0
  19. {ob-metaflow-extensions-1.1.70 → ob-metaflow-extensions-1.1.72}/metaflow_extensions/outerbounds/profilers/gpu.py +0 -0
  20. {ob-metaflow-extensions-1.1.70 → ob-metaflow-extensions-1.1.72}/metaflow_extensions/outerbounds/remote_config.py +4 -4
  21. {ob-metaflow-extensions-1.1.70 → ob-metaflow-extensions-1.1.72}/metaflow_extensions/outerbounds/toplevel/__init__.py +0 -0
  22. {ob-metaflow-extensions-1.1.70 → ob-metaflow-extensions-1.1.72}/metaflow_extensions/outerbounds/toplevel/global_aliases_for_metaflow_package.py +0 -0
  23. {ob-metaflow-extensions-1.1.70 → ob-metaflow-extensions-1.1.72}/metaflow_extensions/outerbounds/toplevel/plugins/azure/__init__.py +0 -0
  24. {ob-metaflow-extensions-1.1.70 → ob-metaflow-extensions-1.1.72}/metaflow_extensions/outerbounds/toplevel/plugins/gcp/__init__.py +0 -0
  25. {ob-metaflow-extensions-1.1.70 → ob-metaflow-extensions-1.1.72}/metaflow_extensions/outerbounds/toplevel/plugins/kubernetes/__init__.py +0 -0
  26. {ob-metaflow-extensions-1.1.70 → ob-metaflow-extensions-1.1.72}/ob_metaflow_extensions.egg-info/SOURCES.txt +0 -0
  27. {ob-metaflow-extensions-1.1.70 → ob-metaflow-extensions-1.1.72}/ob_metaflow_extensions.egg-info/dependency_links.txt +0 -0
  28. {ob-metaflow-extensions-1.1.70 → ob-metaflow-extensions-1.1.72}/ob_metaflow_extensions.egg-info/requires.txt +0 -0
  29. {ob-metaflow-extensions-1.1.70 → ob-metaflow-extensions-1.1.72}/ob_metaflow_extensions.egg-info/top_level.txt +0 -0
  30. {ob-metaflow-extensions-1.1.70 → ob-metaflow-extensions-1.1.72}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ob-metaflow-extensions
3
- Version: 1.1.70
3
+ Version: 1.1.72
4
4
  Summary: Outerbounds Platform Extensions for Metaflow
5
5
  Author: Outerbounds, Inc.
6
6
  License: Commercial
@@ -5,6 +5,8 @@ import requests
5
5
  from urllib.parse import urlparse
6
6
  from metaflow.metaflow_config import SERVICE_URL
7
7
  from metaflow.metaflow_config_funcs import init_config
8
+ import sys
9
+ import random
8
10
 
9
11
  NVCF_URL = "https://api.nvcf.nvidia.com"
10
12
  NVCF_SUBMIT_ENDPOINT = f"{NVCF_URL}/v2/nvcf/pexec/functions"
@@ -113,6 +115,9 @@ class NimChatCompletion(object):
113
115
  self._nim_metadata = nim_metadata
114
116
  self.compute_provider = provider
115
117
  self.invocations = []
118
+ self.max_request_retries = int(
119
+ os.environ.get("METAFLOW_EXT_HTTP_MAX_RETRIES", "10")
120
+ )
116
121
 
117
122
  if self.compute_provider == "CoreWeave":
118
123
  cw_model_names = [
@@ -154,33 +159,64 @@ class NimChatCompletion(object):
154
159
  request_data = {"model": self.model, **kwargs}
155
160
  request_url = f"{NVCF_SUBMIT_ENDPOINT}/{self.function_id}"
156
161
 
157
- response = requests.post(
158
- request_url,
159
- headers=self._nim_metadata.get_headers_for_nvcf_request(),
160
- json=request_data,
161
- )
162
- response.raise_for_status()
163
- if response.status_code == 202:
164
- invocation_id = response.headers.get("NVCF-REQID")
165
- self.invocations.append(invocation_id)
166
- elif response.status_code == 200:
167
- return response.json()
162
+ attempts = 0
163
+ while attempts < self.max_request_retries:
164
+ try:
165
+ attempts += 1
166
+ response = requests.post(
167
+ request_url,
168
+ headers=self._nim_metadata.get_headers_for_nvcf_request(),
169
+ json=request_data,
170
+ )
171
+ response.raise_for_status()
172
+ if response.status_code == 202:
173
+ invocation_id = response.headers.get("NVCF-REQID")
174
+ self.invocations.append(invocation_id)
175
+ elif response.status_code == 200:
176
+ return response.json()
177
+ except requests.exceptions.ConnectionError as e:
178
+ # ConnectionErrors are generally temporary errors like DNS resolution failures,
179
+ # timeouts etc.
180
+ print(
181
+ "Encountered connection error. Retrying...", e, file=sys.stderr
182
+ )
183
+ time.sleep(retry_delay)
184
+ retry_delay *= 2 # Double the delay for the next attempt
185
+ retry_delay += random.uniform(0, 1) # Add jitter
186
+ retry_delay = min(retry_delay, 10)
168
187
 
169
188
  def _poll():
170
189
  poll_request_url = f"{NVCF_RESULT_ENDPOINT}/{invocation_id}"
171
- poll_response = requests.get(
172
- poll_request_url,
173
- headers=self._nim_metadata.get_headers_for_nvcf_request(),
174
- )
175
- poll_response.raise_for_status()
176
- if poll_response.status_code == 200:
177
- return poll_response.json()
178
- elif poll_response.status_code == 202:
179
- return 202
180
- else:
181
- raise Exception(
182
- f"NVCF returned {poll_response.status_code} status code. Please contact Outerbounds."
183
- )
190
+ attempts = 0
191
+
192
+ while attempts < self.max_request_retries:
193
+ try:
194
+ attempts += 1
195
+ poll_response = requests.get(
196
+ poll_request_url,
197
+ headers=self._nim_metadata.get_headers_for_nvcf_request(),
198
+ )
199
+ poll_response.raise_for_status()
200
+ if poll_response.status_code == 200:
201
+ return poll_response.json()
202
+ elif poll_response.status_code == 202:
203
+ return 202
204
+ else:
205
+ raise Exception(
206
+ f"NVCF returned {poll_response.status_code} status code. Please contact Outerbounds."
207
+ )
208
+ except requests.exceptions.ConnectionError as e:
209
+ # ConnectionErrors are generally temporary errors like DNS resolution failures,
210
+ # timeouts etc.
211
+ print(
212
+ "Encountered connection error. Retrying...",
213
+ e,
214
+ file=sys.stderr,
215
+ )
216
+ time.sleep(retry_delay)
217
+ retry_delay *= 2 # Double the delay for the next attempt
218
+ retry_delay += random.uniform(0, 1) # Add jitter
219
+ retry_delay = min(retry_delay, 10)
184
220
 
185
221
  while True:
186
222
  data = _poll()
@@ -7,6 +7,9 @@ from typing import Union
7
7
 
8
8
  CURRENT_PERIMETER_KEY = "OB_CURRENT_PERIMETER"
9
9
  CURRENT_PERIMETER_URL = "OB_CURRENT_PERIMETER_MF_CONFIG_URL"
10
+ CURRENT_PERIMETER_URL_LEGACY_KEY = (
11
+ "OB_CURRENT_PERIMETER_URL" # For backwards compatibility with workstations.
12
+ )
10
13
 
11
14
 
12
15
  def get_perimeter_config_url_if_set_in_ob_config() -> Union[str, None]:
@@ -37,10 +40,18 @@ def get_perimeter_config_url_if_set_in_ob_config() -> Union[str, None]:
37
40
  with open(file_path, "r") as f:
38
41
  ob_config = json.loads(f.read())
39
42
 
40
- if CURRENT_PERIMETER_KEY in ob_config and CURRENT_PERIMETER_URL in ob_config:
43
+ if CURRENT_PERIMETER_KEY in ob_config and (
44
+ CURRENT_PERIMETER_URL in ob_config
45
+ or CURRENT_PERIMETER_URL_LEGACY_KEY in ob_config
46
+ ):
41
47
  os.environ[CURRENT_PERIMETER_KEY] = ob_config[CURRENT_PERIMETER_KEY]
42
- os.environ[CURRENT_PERIMETER_URL] = ob_config[CURRENT_PERIMETER_URL]
43
- return ob_config[CURRENT_PERIMETER_URL]
48
+ if CURRENT_PERIMETER_URL in ob_config:
49
+ os.environ[CURRENT_PERIMETER_URL] = ob_config[CURRENT_PERIMETER_URL]
50
+ elif CURRENT_PERIMETER_URL_LEGACY_KEY in ob_config:
51
+ os.environ[CURRENT_PERIMETER_URL] = ob_config[
52
+ CURRENT_PERIMETER_URL_LEGACY_KEY
53
+ ]
54
+ return os.environ[CURRENT_PERIMETER_URL]
44
55
  else:
45
56
  raise MetaflowException(
46
57
  "{} does not contain the key {}".format(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ob-metaflow-extensions
3
- Version: 1.1.70
3
+ Version: 1.1.72
4
4
  Summary: Outerbounds Platform Extensions for Metaflow
5
5
  Author: Outerbounds, Inc.
6
6
  License: Commercial
@@ -2,7 +2,7 @@ from setuptools import setup, find_namespace_packages
2
2
  from pathlib import Path
3
3
 
4
4
 
5
- version = "1.1.70"
5
+ version = "1.1.72"
6
6
  this_directory = Path(__file__).parent
7
7
  long_description = (this_directory / "README.md").read_text()
8
8
 
@@ -86,14 +86,14 @@ def init_config() -> Dict[str, str]:
86
86
  command with the string provided in the Outerbounds dashboard"
87
87
  )
88
88
 
89
- # users still have a legacy format and that's ok.
90
- if OBP_REMOTE_CONFIG_KEY not in remote_config:
91
- return remote_config
92
-
93
89
  perimeter_config_url = get_perimeter_config_url_if_set_in_ob_config()
94
90
  if perimeter_config_url:
95
91
  remote_config[OBP_REMOTE_CONFIG_KEY] = perimeter_config_url
96
92
 
93
+ # users still have a legacy format and that's ok.
94
+ if OBP_REMOTE_CONFIG_KEY not in remote_config:
95
+ return remote_config
96
+
97
97
  metaflow_config = resolve_config_from_remote(
98
98
  remote_url=remote_config[OBP_REMOTE_CONFIG_KEY],
99
99
  auth_token=remote_config[AUTH_KEY],