ob-metaflow-extensions 1.1.142__tar.gz → 1.1.144__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ob-metaflow-extensions might be problematic. Click here for more details.
- {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/PKG-INFO +1 -1
- {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/plugins/__init__.py +7 -2
- {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/plugins/nvcf/nvcf.py +59 -8
- {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/ob_metaflow_extensions.egg-info/PKG-INFO +1 -1
- {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/setup.py +1 -1
- {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/README.md +0 -0
- {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/config/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/plugins/apps/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/plugins/apps/app_utils.py +0 -0
- {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/plugins/apps/consts.py +0 -0
- {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/plugins/apps/deploy_decorator.py +0 -0
- {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/plugins/apps/supervisord_utils.py +0 -0
- {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/plugins/auth_server.py +0 -0
- {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/plugins/card_utilities/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/plugins/card_utilities/async_cards.py +0 -0
- {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/plugins/card_utilities/extra_components.py +0 -0
- {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/plugins/card_utilities/injector.py +0 -0
- {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/plugins/fast_bakery/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/plugins/fast_bakery/docker_environment.py +0 -0
- {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/plugins/fast_bakery/fast_bakery.py +0 -0
- {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/plugins/fast_bakery/fast_bakery_cli.py +0 -0
- {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/plugins/fast_bakery/fast_bakery_decorator.py +0 -0
- {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/plugins/kubernetes/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/plugins/kubernetes/kubernetes_client.py +0 -0
- {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/plugins/nim/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/plugins/nim/card.py +0 -0
- {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/plugins/nim/nim_manager.py +0 -0
- {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/plugins/nim/utilities.py +0 -0
- {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/plugins/nvcf/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/plugins/nvcf/constants.py +0 -0
- {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/plugins/nvcf/exceptions.py +0 -0
- {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/plugins/nvcf/heartbeat_store.py +0 -0
- {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/plugins/nvcf/nvcf_cli.py +0 -0
- {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/plugins/nvcf/nvcf_decorator.py +0 -0
- {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/plugins/nvcf/utils.py +0 -0
- {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/plugins/ollama/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/plugins/ollama/ollama.py +0 -0
- {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/plugins/perimeters.py +0 -0
- {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/plugins/profilers/deco_injector.py +0 -0
- {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/plugins/profilers/gpu_profile_decorator.py +0 -0
- {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/plugins/secrets/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/plugins/secrets/secrets.py +0 -0
- {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/plugins/snowflake/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/plugins/snowflake/snowflake.py +0 -0
- {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/plugins/snowpark/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/plugins/snowpark/snowpark.py +0 -0
- {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/plugins/snowpark/snowpark_cli.py +0 -0
- {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/plugins/snowpark/snowpark_client.py +0 -0
- {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/plugins/snowpark/snowpark_decorator.py +0 -0
- {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/plugins/snowpark/snowpark_exceptions.py +0 -0
- {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/plugins/snowpark/snowpark_job.py +0 -0
- {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/plugins/snowpark/snowpark_service_spec.py +0 -0
- {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/plugins/tensorboard/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/profilers/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/profilers/gpu.py +0 -0
- {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/remote_config.py +0 -0
- {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/toplevel/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/toplevel/global_aliases_for_metaflow_package.py +0 -0
- {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/toplevel/plugins/azure/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/toplevel/plugins/gcp/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/toplevel/plugins/kubernetes/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/toplevel/plugins/ollama/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/toplevel/plugins/snowflake/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/ob_metaflow_extensions.egg-info/SOURCES.txt +0 -0
- {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/ob_metaflow_extensions.egg-info/dependency_links.txt +0 -0
- {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/ob_metaflow_extensions.egg-info/requires.txt +0 -0
- {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/ob_metaflow_extensions.egg-info/top_level.txt +0 -0
- {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/setup.cfg +0 -0
|
@@ -162,13 +162,18 @@ class ObpAuthProvider(object):
|
|
|
162
162
|
client_params = {}
|
|
163
163
|
|
|
164
164
|
from botocore.exceptions import ClientError
|
|
165
|
+
from botocore.config import Config
|
|
165
166
|
|
|
166
167
|
with hide_access_keys():
|
|
167
168
|
session = get_boto3_session(role_arn, session_vars)
|
|
169
|
+
_client_params = client_params.copy()
|
|
170
|
+
if _client_params.get("config") and type(_client_params["config"]) == dict:
|
|
171
|
+
_client_params["config"] = Config(**_client_params["config"])
|
|
172
|
+
|
|
168
173
|
if with_error:
|
|
169
|
-
return session.client(module, **
|
|
174
|
+
return session.client(module, **_client_params), ClientError
|
|
170
175
|
else:
|
|
171
|
-
return session.client(module, **
|
|
176
|
+
return session.client(module, **_client_params)
|
|
172
177
|
|
|
173
178
|
|
|
174
179
|
AWS_CLIENT_PROVIDERS_DESC = [("obp", ".ObpAuthProvider")]
|
|
@@ -188,10 +188,12 @@ class Nvcf(object):
|
|
|
188
188
|
|
|
189
189
|
|
|
190
190
|
class JobStatus(object):
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
191
|
+
CREATED = "CREATED" # Job object created but not submitted
|
|
192
|
+
SUBMITTED = "SUBMITTED" # Job submitted to NVCF
|
|
193
|
+
POLLED = "POLLED" # Job has been successfully polled at least once
|
|
194
|
+
SUCCESSFUL = "SUCCESSFUL" # Job completed successfully
|
|
195
|
+
FAILED = "FAILED" # Job failed
|
|
196
|
+
DISAPPEARED = "DISAPPEARED" # Job disappeared from NVCF but was previously polled (likely successful)
|
|
195
197
|
|
|
196
198
|
|
|
197
199
|
nvcf_url = "https://api.nvcf.nvidia.com"
|
|
@@ -213,6 +215,11 @@ class Job(object):
|
|
|
213
215
|
self._queue_timeout = queue_timeout
|
|
214
216
|
self._poll_seconds = "3600"
|
|
215
217
|
|
|
218
|
+
# Initialize status and tracking variables
|
|
219
|
+
self._status = JobStatus.CREATED
|
|
220
|
+
self._last_poll_time = time.time()
|
|
221
|
+
self._force_poll_interval = 30
|
|
222
|
+
|
|
216
223
|
flow_name = task_spec.get("flow_name")
|
|
217
224
|
run_id = task_spec.get("run_id")
|
|
218
225
|
step_name = task_spec.get("step_name")
|
|
@@ -280,10 +287,51 @@ class Job(object):
|
|
|
280
287
|
|
|
281
288
|
@property
|
|
282
289
|
def status(self):
|
|
283
|
-
|
|
290
|
+
terminal_states = [
|
|
291
|
+
JobStatus.SUCCESSFUL,
|
|
292
|
+
JobStatus.FAILED,
|
|
293
|
+
JobStatus.DISAPPEARED,
|
|
294
|
+
]
|
|
295
|
+
|
|
296
|
+
# If status is already terminal, don't poll again
|
|
297
|
+
if self._status in terminal_states:
|
|
298
|
+
return self._status
|
|
299
|
+
|
|
300
|
+
current_time = time.time()
|
|
301
|
+
if (
|
|
302
|
+
current_time - self._last_poll_time > self._force_poll_interval
|
|
303
|
+
or self._status not in terminal_states
|
|
304
|
+
):
|
|
284
305
|
try:
|
|
285
306
|
self._poll()
|
|
286
|
-
|
|
307
|
+
self._last_poll_time = current_time
|
|
308
|
+
|
|
309
|
+
# Update job status to POLLED if this is our first successful poll
|
|
310
|
+
if self._status == JobStatus.SUBMITTED:
|
|
311
|
+
self._status = JobStatus.POLLED
|
|
312
|
+
|
|
313
|
+
if self._status == JobStatus.SUCCESSFUL:
|
|
314
|
+
return self._status
|
|
315
|
+
|
|
316
|
+
except HTTPError as e:
|
|
317
|
+
if e.code == 404:
|
|
318
|
+
# 404 interpretation depends on job lifecycle
|
|
319
|
+
if self._status in [JobStatus.POLLED, JobStatus.SUBMITTED]:
|
|
320
|
+
# We've submitted or successfully polled this job before,
|
|
321
|
+
# so a 404 likely means it completed and was removed
|
|
322
|
+
self._status = JobStatus.DISAPPEARED
|
|
323
|
+
self._result = {"exit_code": 0}
|
|
324
|
+
else:
|
|
325
|
+
# Job was never successfully tracked
|
|
326
|
+
print(
|
|
327
|
+
f"[@nvidia] 404 received for job that was never successfully tracked - treating as failure"
|
|
328
|
+
)
|
|
329
|
+
self._status = JobStatus.FAILED
|
|
330
|
+
raise NvcfPollingConnectionError(e)
|
|
331
|
+
else:
|
|
332
|
+
self._status = JobStatus.FAILED
|
|
333
|
+
raise NvcfPollingConnectionError(e)
|
|
334
|
+
except URLError as e:
|
|
287
335
|
self._status = JobStatus.FAILED
|
|
288
336
|
raise NvcfPollingConnectionError(e)
|
|
289
337
|
return self._status
|
|
@@ -294,7 +342,8 @@ class Job(object):
|
|
|
294
342
|
|
|
295
343
|
@property
|
|
296
344
|
def is_running(self):
|
|
297
|
-
|
|
345
|
+
# Job is running if it's in SUBMITTED or POLLED state
|
|
346
|
+
return self.status in [JobStatus.SUBMITTED, JobStatus.POLLED]
|
|
298
347
|
|
|
299
348
|
@property
|
|
300
349
|
def has_failed(self):
|
|
@@ -318,8 +367,10 @@ class Job(object):
|
|
|
318
367
|
f"{result_endpoint}/{self._invocation_id}", headers=headers
|
|
319
368
|
)
|
|
320
369
|
response = urlopen(request)
|
|
370
|
+
body = response.read()
|
|
371
|
+
print(f"[@nvidia] polling response: {body}")
|
|
321
372
|
if response.getcode() == 200:
|
|
322
|
-
data = json.loads(
|
|
373
|
+
data = json.loads(body)
|
|
323
374
|
# TODO: Propagate the internal error forward
|
|
324
375
|
if data.get("exit_code") == 0:
|
|
325
376
|
self._status = JobStatus.SUCCESSFUL
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|