ob-metaflow-extensions 1.1.71__tar.gz → 1.1.72__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ob-metaflow-extensions might be problematic. Click here for more details.
- {ob-metaflow-extensions-1.1.71 → ob-metaflow-extensions-1.1.72}/PKG-INFO +1 -1
- {ob-metaflow-extensions-1.1.71 → ob-metaflow-extensions-1.1.72}/metaflow_extensions/outerbounds/plugins/nim/nim_manager.py +60 -24
- {ob-metaflow-extensions-1.1.71 → ob-metaflow-extensions-1.1.72}/ob_metaflow_extensions.egg-info/PKG-INFO +1 -1
- {ob-metaflow-extensions-1.1.71 → ob-metaflow-extensions-1.1.72}/setup.py +1 -1
- {ob-metaflow-extensions-1.1.71 → ob-metaflow-extensions-1.1.72}/README.md +0 -0
- {ob-metaflow-extensions-1.1.71 → ob-metaflow-extensions-1.1.72}/metaflow_extensions/outerbounds/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.71 → ob-metaflow-extensions-1.1.72}/metaflow_extensions/outerbounds/config/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.71 → ob-metaflow-extensions-1.1.72}/metaflow_extensions/outerbounds/plugins/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.71 → ob-metaflow-extensions-1.1.72}/metaflow_extensions/outerbounds/plugins/auth_server.py +0 -0
- {ob-metaflow-extensions-1.1.71 → ob-metaflow-extensions-1.1.72}/metaflow_extensions/outerbounds/plugins/kubernetes/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.71 → ob-metaflow-extensions-1.1.72}/metaflow_extensions/outerbounds/plugins/kubernetes/kubernetes_client.py +0 -0
- {ob-metaflow-extensions-1.1.71 → ob-metaflow-extensions-1.1.72}/metaflow_extensions/outerbounds/plugins/nim/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.71 → ob-metaflow-extensions-1.1.72}/metaflow_extensions/outerbounds/plugins/nvcf/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.71 → ob-metaflow-extensions-1.1.72}/metaflow_extensions/outerbounds/plugins/nvcf/nvcf.py +0 -0
- {ob-metaflow-extensions-1.1.71 → ob-metaflow-extensions-1.1.72}/metaflow_extensions/outerbounds/plugins/nvcf/nvcf_cli.py +0 -0
- {ob-metaflow-extensions-1.1.71 → ob-metaflow-extensions-1.1.72}/metaflow_extensions/outerbounds/plugins/nvcf/nvcf_decorator.py +0 -0
- {ob-metaflow-extensions-1.1.71 → ob-metaflow-extensions-1.1.72}/metaflow_extensions/outerbounds/plugins/perimeters.py +0 -0
- {ob-metaflow-extensions-1.1.71 → ob-metaflow-extensions-1.1.72}/metaflow_extensions/outerbounds/profilers/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.71 → ob-metaflow-extensions-1.1.72}/metaflow_extensions/outerbounds/profilers/gpu.py +0 -0
- {ob-metaflow-extensions-1.1.71 → ob-metaflow-extensions-1.1.72}/metaflow_extensions/outerbounds/remote_config.py +0 -0
- {ob-metaflow-extensions-1.1.71 → ob-metaflow-extensions-1.1.72}/metaflow_extensions/outerbounds/toplevel/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.71 → ob-metaflow-extensions-1.1.72}/metaflow_extensions/outerbounds/toplevel/global_aliases_for_metaflow_package.py +0 -0
- {ob-metaflow-extensions-1.1.71 → ob-metaflow-extensions-1.1.72}/metaflow_extensions/outerbounds/toplevel/plugins/azure/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.71 → ob-metaflow-extensions-1.1.72}/metaflow_extensions/outerbounds/toplevel/plugins/gcp/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.71 → ob-metaflow-extensions-1.1.72}/metaflow_extensions/outerbounds/toplevel/plugins/kubernetes/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.71 → ob-metaflow-extensions-1.1.72}/ob_metaflow_extensions.egg-info/SOURCES.txt +0 -0
- {ob-metaflow-extensions-1.1.71 → ob-metaflow-extensions-1.1.72}/ob_metaflow_extensions.egg-info/dependency_links.txt +0 -0
- {ob-metaflow-extensions-1.1.71 → ob-metaflow-extensions-1.1.72}/ob_metaflow_extensions.egg-info/requires.txt +0 -0
- {ob-metaflow-extensions-1.1.71 → ob-metaflow-extensions-1.1.72}/ob_metaflow_extensions.egg-info/top_level.txt +0 -0
- {ob-metaflow-extensions-1.1.71 → ob-metaflow-extensions-1.1.72}/setup.cfg +0 -0
|
@@ -5,6 +5,8 @@ import requests
|
|
|
5
5
|
from urllib.parse import urlparse
|
|
6
6
|
from metaflow.metaflow_config import SERVICE_URL
|
|
7
7
|
from metaflow.metaflow_config_funcs import init_config
|
|
8
|
+
import sys
|
|
9
|
+
import random
|
|
8
10
|
|
|
9
11
|
NVCF_URL = "https://api.nvcf.nvidia.com"
|
|
10
12
|
NVCF_SUBMIT_ENDPOINT = f"{NVCF_URL}/v2/nvcf/pexec/functions"
|
|
@@ -113,6 +115,9 @@ class NimChatCompletion(object):
|
|
|
113
115
|
self._nim_metadata = nim_metadata
|
|
114
116
|
self.compute_provider = provider
|
|
115
117
|
self.invocations = []
|
|
118
|
+
self.max_request_retries = int(
|
|
119
|
+
os.environ.get("METAFLOW_EXT_HTTP_MAX_RETRIES", "10")
|
|
120
|
+
)
|
|
116
121
|
|
|
117
122
|
if self.compute_provider == "CoreWeave":
|
|
118
123
|
cw_model_names = [
|
|
@@ -154,33 +159,64 @@ class NimChatCompletion(object):
|
|
|
154
159
|
request_data = {"model": self.model, **kwargs}
|
|
155
160
|
request_url = f"{NVCF_SUBMIT_ENDPOINT}/{self.function_id}"
|
|
156
161
|
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
162
|
+
attempts = 0
|
|
163
|
+
while attempts < self.max_request_retries:
|
|
164
|
+
try:
|
|
165
|
+
attempts += 1
|
|
166
|
+
response = requests.post(
|
|
167
|
+
request_url,
|
|
168
|
+
headers=self._nim_metadata.get_headers_for_nvcf_request(),
|
|
169
|
+
json=request_data,
|
|
170
|
+
)
|
|
171
|
+
response.raise_for_status()
|
|
172
|
+
if response.status_code == 202:
|
|
173
|
+
invocation_id = response.headers.get("NVCF-REQID")
|
|
174
|
+
self.invocations.append(invocation_id)
|
|
175
|
+
elif response.status_code == 200:
|
|
176
|
+
return response.json()
|
|
177
|
+
except requests.exceptions.ConnectionError as e:
|
|
178
|
+
# ConnectionErrors are generally temporary errors like DNS resolution failures,
|
|
179
|
+
# timeouts etc.
|
|
180
|
+
print(
|
|
181
|
+
"Encountered connection error. Retrying...", e, file=sys.stderr
|
|
182
|
+
)
|
|
183
|
+
time.sleep(retry_delay)
|
|
184
|
+
retry_delay *= 2 # Double the delay for the next attempt
|
|
185
|
+
retry_delay += random.uniform(0, 1) # Add jitter
|
|
186
|
+
retry_delay = min(retry_delay, 10)
|
|
168
187
|
|
|
169
188
|
def _poll():
|
|
170
189
|
poll_request_url = f"{NVCF_RESULT_ENDPOINT}/{invocation_id}"
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
190
|
+
attempts = 0
|
|
191
|
+
|
|
192
|
+
while attempts < self.max_request_retries:
|
|
193
|
+
try:
|
|
194
|
+
attempts += 1
|
|
195
|
+
poll_response = requests.get(
|
|
196
|
+
poll_request_url,
|
|
197
|
+
headers=self._nim_metadata.get_headers_for_nvcf_request(),
|
|
198
|
+
)
|
|
199
|
+
poll_response.raise_for_status()
|
|
200
|
+
if poll_response.status_code == 200:
|
|
201
|
+
return poll_response.json()
|
|
202
|
+
elif poll_response.status_code == 202:
|
|
203
|
+
return 202
|
|
204
|
+
else:
|
|
205
|
+
raise Exception(
|
|
206
|
+
f"NVCF returned {poll_response.status_code} status code. Please contact Outerbounds."
|
|
207
|
+
)
|
|
208
|
+
except requests.exceptions.ConnectionError as e:
|
|
209
|
+
# ConnectionErrors are generally temporary errors like DNS resolution failures,
|
|
210
|
+
# timeouts etc.
|
|
211
|
+
print(
|
|
212
|
+
"Encountered connection error. Retrying...",
|
|
213
|
+
e,
|
|
214
|
+
file=sys.stderr,
|
|
215
|
+
)
|
|
216
|
+
time.sleep(retry_delay)
|
|
217
|
+
retry_delay *= 2 # Double the delay for the next attempt
|
|
218
|
+
retry_delay += random.uniform(0, 1) # Add jitter
|
|
219
|
+
retry_delay = min(retry_delay, 10)
|
|
184
220
|
|
|
185
221
|
while True:
|
|
186
222
|
data = _poll()
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|