ob-metaflow-extensions 1.1.71__py2.py3-none-any.whl → 1.1.73__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ob-metaflow-extensions might be problematic. Click here for more details.

@@ -52,10 +52,17 @@ def get_token(url_path):
52
52
  return token_info
53
53
  except requests.exceptions.HTTPError as e:
54
54
  raise MetaflowException(repr(e))
55
- except requests.exceptions.ConnectionError as e:
55
+ except (
56
+ requests.exceptions.ConnectionError,
57
+ requests.exceptions.ReadTimeout,
58
+ ) as e:
56
59
  # ConnectionErrors are generally temporary errors like DNS resolution failures,
57
60
  # timeouts etc.
58
- print("received connection error. Retrying...", e, file=sys.stderr)
61
+ print(
62
+ "received error of type {}. Retrying...".format(type(e)),
63
+ e,
64
+ file=sys.stderr,
65
+ )
59
66
  time.sleep(retry_delay)
60
67
  retry_delay *= 2 # Double the delay for the next attempt
61
68
  retry_delay += random.uniform(0, 1) # Add jitter
@@ -5,6 +5,8 @@ import requests
5
5
  from urllib.parse import urlparse
6
6
  from metaflow.metaflow_config import SERVICE_URL
7
7
  from metaflow.metaflow_config_funcs import init_config
8
+ import sys
9
+ import random
8
10
 
9
11
  NVCF_URL = "https://api.nvcf.nvidia.com"
10
12
  NVCF_SUBMIT_ENDPOINT = f"{NVCF_URL}/v2/nvcf/pexec/functions"
@@ -113,6 +115,9 @@ class NimChatCompletion(object):
113
115
  self._nim_metadata = nim_metadata
114
116
  self.compute_provider = provider
115
117
  self.invocations = []
118
+ self.max_request_retries = int(
119
+ os.environ.get("METAFLOW_EXT_HTTP_MAX_RETRIES", "10")
120
+ )
116
121
 
117
122
  if self.compute_provider == "CoreWeave":
118
123
  cw_model_names = [
@@ -154,33 +159,72 @@ class NimChatCompletion(object):
154
159
  request_data = {"model": self.model, **kwargs}
155
160
  request_url = f"{NVCF_SUBMIT_ENDPOINT}/{self.function_id}"
156
161
 
157
- response = requests.post(
158
- request_url,
159
- headers=self._nim_metadata.get_headers_for_nvcf_request(),
160
- json=request_data,
161
- )
162
- response.raise_for_status()
163
- if response.status_code == 202:
164
- invocation_id = response.headers.get("NVCF-REQID")
165
- self.invocations.append(invocation_id)
166
- elif response.status_code == 200:
167
- return response.json()
162
+ attempts = 0
163
+ while attempts < self.max_request_retries:
164
+ try:
165
+ attempts += 1
166
+ response = requests.post(
167
+ request_url,
168
+ headers=self._nim_metadata.get_headers_for_nvcf_request(),
169
+ json=request_data,
170
+ )
171
+ response.raise_for_status()
172
+ if response.status_code == 202:
173
+ invocation_id = response.headers.get("NVCF-REQID")
174
+ self.invocations.append(invocation_id)
175
+ elif response.status_code == 200:
176
+ return response.json()
177
+ except (
178
+ requests.exceptions.ConnectionError,
179
+ requests.exceptions.ReadTimeout,
180
+ ) as e:
181
+ # ConnectionErrors are generally temporary errors like DNS resolution failures,
182
+ # timeouts etc.
183
+ print(
184
+ "received error of type {}. Retrying...".format(type(e)),
185
+ e,
186
+ file=sys.stderr,
187
+ )
188
+ time.sleep(retry_delay)
189
+ retry_delay *= 2 # Double the delay for the next attempt
190
+ retry_delay += random.uniform(0, 1) # Add jitter
191
+ retry_delay = min(retry_delay, 10)
168
192
 
169
193
  def _poll():
170
194
  poll_request_url = f"{NVCF_RESULT_ENDPOINT}/{invocation_id}"
171
- poll_response = requests.get(
172
- poll_request_url,
173
- headers=self._nim_metadata.get_headers_for_nvcf_request(),
174
- )
175
- poll_response.raise_for_status()
176
- if poll_response.status_code == 200:
177
- return poll_response.json()
178
- elif poll_response.status_code == 202:
179
- return 202
180
- else:
181
- raise Exception(
182
- f"NVCF returned {poll_response.status_code} status code. Please contact Outerbounds."
183
- )
195
+ attempts = 0
196
+
197
+ while attempts < self.max_request_retries:
198
+ try:
199
+ attempts += 1
200
+ poll_response = requests.get(
201
+ poll_request_url,
202
+ headers=self._nim_metadata.get_headers_for_nvcf_request(),
203
+ )
204
+ poll_response.raise_for_status()
205
+ if poll_response.status_code == 200:
206
+ return poll_response.json()
207
+ elif poll_response.status_code == 202:
208
+ return 202
209
+ else:
210
+ raise Exception(
211
+ f"NVCF returned {poll_response.status_code} status code. Please contact Outerbounds."
212
+ )
213
+ except (
214
+ requests.exceptions.ConnectionError,
215
+ requests.exceptions.ReadTimeout,
216
+ ) as e:
217
+ # ConnectionErrors are generally temporary errors like DNS resolution failures,
218
+ # timeouts etc.
219
+ print(
220
+ "received error of type {}. Retrying...".format(type(e)),
221
+ e,
222
+ file=sys.stderr,
223
+ )
224
+ time.sleep(retry_delay)
225
+ retry_delay *= 2 # Double the delay for the next attempt
226
+ retry_delay += random.uniform(0, 1) # Add jitter
227
+ retry_delay = min(retry_delay, 10)
184
228
 
185
229
  while True:
186
230
  data = _poll()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ob-metaflow-extensions
3
- Version: 1.1.71
3
+ Version: 1.1.73
4
4
  Summary: Outerbounds Platform Extensions for Metaflow
5
5
  Author: Outerbounds, Inc.
6
6
  License: Commercial
@@ -2,12 +2,12 @@ metaflow_extensions/outerbounds/__init__.py,sha256=TRGvIUMjkfneWtYUFSWoubu_Kf2ek
2
2
  metaflow_extensions/outerbounds/remote_config.py,sha256=Zpfpjgz68_ZgxlXezjzlsDLo4840rkWuZgwDB_5H57U,4059
3
3
  metaflow_extensions/outerbounds/config/__init__.py,sha256=mYo95obHU1IE1wbPkeVz_pfTzNqlNabp1QBEMTGllbE,112
4
4
  metaflow_extensions/outerbounds/plugins/__init__.py,sha256=46NgbJBhVowDR6FyQrZPF2jHHqRTSyCBCYIQAyQ4Ryo,9516
5
- metaflow_extensions/outerbounds/plugins/auth_server.py,sha256=JhlMFcR7SPSfR1C9w6GlqJq-NYNhOfISmHl2PdkYUok,2212
5
+ metaflow_extensions/outerbounds/plugins/auth_server.py,sha256=1v2GBqoMBxp5E7Lejz139w-jxJtPnLDvvHXP0HhEIHI,2361
6
6
  metaflow_extensions/outerbounds/plugins/perimeters.py,sha256=QXh3SFP7GQbS-RAIxUOPbhPzQ7KDFVxZkTdKqFKgXjI,2697
7
7
  metaflow_extensions/outerbounds/plugins/kubernetes/__init__.py,sha256=5zG8gShSj8m7rgF4xgWBZFuY3GDP5n1T0ktjRpGJLHA,69
8
8
  metaflow_extensions/outerbounds/plugins/kubernetes/kubernetes_client.py,sha256=gj6Iaz26bGbZm3aQuNS18Mqh_80iJp5PgFwFSlJRcn8,1968
9
9
  metaflow_extensions/outerbounds/plugins/nim/__init__.py,sha256=GVnvSTjqYVj5oG2yh8KJFt7iZ33cEadDD5HbdmC9hJ0,1457
10
- metaflow_extensions/outerbounds/plugins/nim/nim_manager.py,sha256=l8WDfVtsMt7aZaOaeIPT5ySidxfxXU8gmwLoKUP3f04,7044
10
+ metaflow_extensions/outerbounds/plugins/nim/nim_manager.py,sha256=SWieODDxtIaeZwdMYtObDi57Kjyfw2DUuE6pJtU750w,9206
11
11
  metaflow_extensions/outerbounds/plugins/nvcf/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
12
  metaflow_extensions/outerbounds/plugins/nvcf/nvcf.py,sha256=ftxC5SCo64P5Ycpv5vudluTnQi3-VCZW0umdsPP326A,7926
13
13
  metaflow_extensions/outerbounds/plugins/nvcf/nvcf_cli.py,sha256=ow3lonclEDoZEUQCDV_L8lEr6HopXqjNXzubRrfdIm4,7219
@@ -19,7 +19,7 @@ metaflow_extensions/outerbounds/toplevel/global_aliases_for_metaflow_package.py,
19
19
  metaflow_extensions/outerbounds/toplevel/plugins/azure/__init__.py,sha256=WUuhz2YQfI4fz7nIcipwwWq781eaoHEk7n4GAn1npDg,63
20
20
  metaflow_extensions/outerbounds/toplevel/plugins/gcp/__init__.py,sha256=BbZiaH3uILlEZ6ntBLKeNyqn3If8nIXZFq_Apd7Dhco,70
21
21
  metaflow_extensions/outerbounds/toplevel/plugins/kubernetes/__init__.py,sha256=5zG8gShSj8m7rgF4xgWBZFuY3GDP5n1T0ktjRpGJLHA,69
22
- ob_metaflow_extensions-1.1.71.dist-info/METADATA,sha256=GvyeQsYpp05xKshOovytjvOPBo1JbfmfyzhcXjBIAu0,519
23
- ob_metaflow_extensions-1.1.71.dist-info/WHEEL,sha256=bb2Ot9scclHKMOLDEHY6B2sicWOgugjFKaJsT7vwMQo,110
24
- ob_metaflow_extensions-1.1.71.dist-info/top_level.txt,sha256=NwG0ukwjygtanDETyp_BUdtYtqIA_lOjzFFh1TsnxvI,20
25
- ob_metaflow_extensions-1.1.71.dist-info/RECORD,,
22
+ ob_metaflow_extensions-1.1.73.dist-info/METADATA,sha256=d45Pkzj0dMiznAUc91auqJVpgAp7o7QD_R5DHIKqwgg,519
23
+ ob_metaflow_extensions-1.1.73.dist-info/WHEEL,sha256=bb2Ot9scclHKMOLDEHY6B2sicWOgugjFKaJsT7vwMQo,110
24
+ ob_metaflow_extensions-1.1.73.dist-info/top_level.txt,sha256=NwG0ukwjygtanDETyp_BUdtYtqIA_lOjzFFh1TsnxvI,20
25
+ ob_metaflow_extensions-1.1.73.dist-info/RECORD,,