ob-metaflow-extensions 1.1.144__tar.gz → 1.1.145__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ob-metaflow-extensions might be problematic. Click here for more details.

Files changed (69) hide show
  1. {ob-metaflow-extensions-1.1.144 → ob-metaflow-extensions-1.1.145}/PKG-INFO +1 -1
  2. {ob-metaflow-extensions-1.1.144 → ob-metaflow-extensions-1.1.145}/metaflow_extensions/outerbounds/plugins/nvcf/nvcf.py +87 -50
  3. {ob-metaflow-extensions-1.1.144 → ob-metaflow-extensions-1.1.145}/ob_metaflow_extensions.egg-info/PKG-INFO +1 -1
  4. {ob-metaflow-extensions-1.1.144 → ob-metaflow-extensions-1.1.145}/setup.py +1 -1
  5. {ob-metaflow-extensions-1.1.144 → ob-metaflow-extensions-1.1.145}/README.md +0 -0
  6. {ob-metaflow-extensions-1.1.144 → ob-metaflow-extensions-1.1.145}/metaflow_extensions/outerbounds/__init__.py +0 -0
  7. {ob-metaflow-extensions-1.1.144 → ob-metaflow-extensions-1.1.145}/metaflow_extensions/outerbounds/config/__init__.py +0 -0
  8. {ob-metaflow-extensions-1.1.144 → ob-metaflow-extensions-1.1.145}/metaflow_extensions/outerbounds/plugins/__init__.py +0 -0
  9. {ob-metaflow-extensions-1.1.144 → ob-metaflow-extensions-1.1.145}/metaflow_extensions/outerbounds/plugins/apps/__init__.py +0 -0
  10. {ob-metaflow-extensions-1.1.144 → ob-metaflow-extensions-1.1.145}/metaflow_extensions/outerbounds/plugins/apps/app_utils.py +0 -0
  11. {ob-metaflow-extensions-1.1.144 → ob-metaflow-extensions-1.1.145}/metaflow_extensions/outerbounds/plugins/apps/consts.py +0 -0
  12. {ob-metaflow-extensions-1.1.144 → ob-metaflow-extensions-1.1.145}/metaflow_extensions/outerbounds/plugins/apps/deploy_decorator.py +0 -0
  13. {ob-metaflow-extensions-1.1.144 → ob-metaflow-extensions-1.1.145}/metaflow_extensions/outerbounds/plugins/apps/supervisord_utils.py +0 -0
  14. {ob-metaflow-extensions-1.1.144 → ob-metaflow-extensions-1.1.145}/metaflow_extensions/outerbounds/plugins/auth_server.py +0 -0
  15. {ob-metaflow-extensions-1.1.144 → ob-metaflow-extensions-1.1.145}/metaflow_extensions/outerbounds/plugins/card_utilities/__init__.py +0 -0
  16. {ob-metaflow-extensions-1.1.144 → ob-metaflow-extensions-1.1.145}/metaflow_extensions/outerbounds/plugins/card_utilities/async_cards.py +0 -0
  17. {ob-metaflow-extensions-1.1.144 → ob-metaflow-extensions-1.1.145}/metaflow_extensions/outerbounds/plugins/card_utilities/extra_components.py +0 -0
  18. {ob-metaflow-extensions-1.1.144 → ob-metaflow-extensions-1.1.145}/metaflow_extensions/outerbounds/plugins/card_utilities/injector.py +0 -0
  19. {ob-metaflow-extensions-1.1.144 → ob-metaflow-extensions-1.1.145}/metaflow_extensions/outerbounds/plugins/fast_bakery/__init__.py +0 -0
  20. {ob-metaflow-extensions-1.1.144 → ob-metaflow-extensions-1.1.145}/metaflow_extensions/outerbounds/plugins/fast_bakery/docker_environment.py +0 -0
  21. {ob-metaflow-extensions-1.1.144 → ob-metaflow-extensions-1.1.145}/metaflow_extensions/outerbounds/plugins/fast_bakery/fast_bakery.py +0 -0
  22. {ob-metaflow-extensions-1.1.144 → ob-metaflow-extensions-1.1.145}/metaflow_extensions/outerbounds/plugins/fast_bakery/fast_bakery_cli.py +0 -0
  23. {ob-metaflow-extensions-1.1.144 → ob-metaflow-extensions-1.1.145}/metaflow_extensions/outerbounds/plugins/fast_bakery/fast_bakery_decorator.py +0 -0
  24. {ob-metaflow-extensions-1.1.144 → ob-metaflow-extensions-1.1.145}/metaflow_extensions/outerbounds/plugins/kubernetes/__init__.py +0 -0
  25. {ob-metaflow-extensions-1.1.144 → ob-metaflow-extensions-1.1.145}/metaflow_extensions/outerbounds/plugins/kubernetes/kubernetes_client.py +0 -0
  26. {ob-metaflow-extensions-1.1.144 → ob-metaflow-extensions-1.1.145}/metaflow_extensions/outerbounds/plugins/nim/__init__.py +0 -0
  27. {ob-metaflow-extensions-1.1.144 → ob-metaflow-extensions-1.1.145}/metaflow_extensions/outerbounds/plugins/nim/card.py +0 -0
  28. {ob-metaflow-extensions-1.1.144 → ob-metaflow-extensions-1.1.145}/metaflow_extensions/outerbounds/plugins/nim/nim_manager.py +0 -0
  29. {ob-metaflow-extensions-1.1.144 → ob-metaflow-extensions-1.1.145}/metaflow_extensions/outerbounds/plugins/nim/utilities.py +0 -0
  30. {ob-metaflow-extensions-1.1.144 → ob-metaflow-extensions-1.1.145}/metaflow_extensions/outerbounds/plugins/nvcf/__init__.py +0 -0
  31. {ob-metaflow-extensions-1.1.144 → ob-metaflow-extensions-1.1.145}/metaflow_extensions/outerbounds/plugins/nvcf/constants.py +0 -0
  32. {ob-metaflow-extensions-1.1.144 → ob-metaflow-extensions-1.1.145}/metaflow_extensions/outerbounds/plugins/nvcf/exceptions.py +0 -0
  33. {ob-metaflow-extensions-1.1.144 → ob-metaflow-extensions-1.1.145}/metaflow_extensions/outerbounds/plugins/nvcf/heartbeat_store.py +0 -0
  34. {ob-metaflow-extensions-1.1.144 → ob-metaflow-extensions-1.1.145}/metaflow_extensions/outerbounds/plugins/nvcf/nvcf_cli.py +0 -0
  35. {ob-metaflow-extensions-1.1.144 → ob-metaflow-extensions-1.1.145}/metaflow_extensions/outerbounds/plugins/nvcf/nvcf_decorator.py +0 -0
  36. {ob-metaflow-extensions-1.1.144 → ob-metaflow-extensions-1.1.145}/metaflow_extensions/outerbounds/plugins/nvcf/utils.py +0 -0
  37. {ob-metaflow-extensions-1.1.144 → ob-metaflow-extensions-1.1.145}/metaflow_extensions/outerbounds/plugins/ollama/__init__.py +0 -0
  38. {ob-metaflow-extensions-1.1.144 → ob-metaflow-extensions-1.1.145}/metaflow_extensions/outerbounds/plugins/ollama/ollama.py +0 -0
  39. {ob-metaflow-extensions-1.1.144 → ob-metaflow-extensions-1.1.145}/metaflow_extensions/outerbounds/plugins/perimeters.py +0 -0
  40. {ob-metaflow-extensions-1.1.144 → ob-metaflow-extensions-1.1.145}/metaflow_extensions/outerbounds/plugins/profilers/deco_injector.py +0 -0
  41. {ob-metaflow-extensions-1.1.144 → ob-metaflow-extensions-1.1.145}/metaflow_extensions/outerbounds/plugins/profilers/gpu_profile_decorator.py +0 -0
  42. {ob-metaflow-extensions-1.1.144 → ob-metaflow-extensions-1.1.145}/metaflow_extensions/outerbounds/plugins/secrets/__init__.py +0 -0
  43. {ob-metaflow-extensions-1.1.144 → ob-metaflow-extensions-1.1.145}/metaflow_extensions/outerbounds/plugins/secrets/secrets.py +0 -0
  44. {ob-metaflow-extensions-1.1.144 → ob-metaflow-extensions-1.1.145}/metaflow_extensions/outerbounds/plugins/snowflake/__init__.py +0 -0
  45. {ob-metaflow-extensions-1.1.144 → ob-metaflow-extensions-1.1.145}/metaflow_extensions/outerbounds/plugins/snowflake/snowflake.py +0 -0
  46. {ob-metaflow-extensions-1.1.144 → ob-metaflow-extensions-1.1.145}/metaflow_extensions/outerbounds/plugins/snowpark/__init__.py +0 -0
  47. {ob-metaflow-extensions-1.1.144 → ob-metaflow-extensions-1.1.145}/metaflow_extensions/outerbounds/plugins/snowpark/snowpark.py +0 -0
  48. {ob-metaflow-extensions-1.1.144 → ob-metaflow-extensions-1.1.145}/metaflow_extensions/outerbounds/plugins/snowpark/snowpark_cli.py +0 -0
  49. {ob-metaflow-extensions-1.1.144 → ob-metaflow-extensions-1.1.145}/metaflow_extensions/outerbounds/plugins/snowpark/snowpark_client.py +0 -0
  50. {ob-metaflow-extensions-1.1.144 → ob-metaflow-extensions-1.1.145}/metaflow_extensions/outerbounds/plugins/snowpark/snowpark_decorator.py +0 -0
  51. {ob-metaflow-extensions-1.1.144 → ob-metaflow-extensions-1.1.145}/metaflow_extensions/outerbounds/plugins/snowpark/snowpark_exceptions.py +0 -0
  52. {ob-metaflow-extensions-1.1.144 → ob-metaflow-extensions-1.1.145}/metaflow_extensions/outerbounds/plugins/snowpark/snowpark_job.py +0 -0
  53. {ob-metaflow-extensions-1.1.144 → ob-metaflow-extensions-1.1.145}/metaflow_extensions/outerbounds/plugins/snowpark/snowpark_service_spec.py +0 -0
  54. {ob-metaflow-extensions-1.1.144 → ob-metaflow-extensions-1.1.145}/metaflow_extensions/outerbounds/plugins/tensorboard/__init__.py +0 -0
  55. {ob-metaflow-extensions-1.1.144 → ob-metaflow-extensions-1.1.145}/metaflow_extensions/outerbounds/profilers/__init__.py +0 -0
  56. {ob-metaflow-extensions-1.1.144 → ob-metaflow-extensions-1.1.145}/metaflow_extensions/outerbounds/profilers/gpu.py +0 -0
  57. {ob-metaflow-extensions-1.1.144 → ob-metaflow-extensions-1.1.145}/metaflow_extensions/outerbounds/remote_config.py +0 -0
  58. {ob-metaflow-extensions-1.1.144 → ob-metaflow-extensions-1.1.145}/metaflow_extensions/outerbounds/toplevel/__init__.py +0 -0
  59. {ob-metaflow-extensions-1.1.144 → ob-metaflow-extensions-1.1.145}/metaflow_extensions/outerbounds/toplevel/global_aliases_for_metaflow_package.py +0 -0
  60. {ob-metaflow-extensions-1.1.144 → ob-metaflow-extensions-1.1.145}/metaflow_extensions/outerbounds/toplevel/plugins/azure/__init__.py +0 -0
  61. {ob-metaflow-extensions-1.1.144 → ob-metaflow-extensions-1.1.145}/metaflow_extensions/outerbounds/toplevel/plugins/gcp/__init__.py +0 -0
  62. {ob-metaflow-extensions-1.1.144 → ob-metaflow-extensions-1.1.145}/metaflow_extensions/outerbounds/toplevel/plugins/kubernetes/__init__.py +0 -0
  63. {ob-metaflow-extensions-1.1.144 → ob-metaflow-extensions-1.1.145}/metaflow_extensions/outerbounds/toplevel/plugins/ollama/__init__.py +0 -0
  64. {ob-metaflow-extensions-1.1.144 → ob-metaflow-extensions-1.1.145}/metaflow_extensions/outerbounds/toplevel/plugins/snowflake/__init__.py +0 -0
  65. {ob-metaflow-extensions-1.1.144 → ob-metaflow-extensions-1.1.145}/ob_metaflow_extensions.egg-info/SOURCES.txt +0 -0
  66. {ob-metaflow-extensions-1.1.144 → ob-metaflow-extensions-1.1.145}/ob_metaflow_extensions.egg-info/dependency_links.txt +0 -0
  67. {ob-metaflow-extensions-1.1.144 → ob-metaflow-extensions-1.1.145}/ob_metaflow_extensions.egg-info/requires.txt +0 -0
  68. {ob-metaflow-extensions-1.1.144 → ob-metaflow-extensions-1.1.145}/ob_metaflow_extensions.egg-info/top_level.txt +0 -0
  69. {ob-metaflow-extensions-1.1.144 → ob-metaflow-extensions-1.1.145}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ob-metaflow-extensions
3
- Version: 1.1.144
3
+ Version: 1.1.145
4
4
  Summary: Outerbounds Platform Extensions for Metaflow
5
5
  Author: Outerbounds, Inc.
6
6
  License: Commercial
@@ -196,6 +196,8 @@ class JobStatus(object):
196
196
  DISAPPEARED = "DISAPPEARED" # Job disappeared from NVCF but was previously polled (likely successful)
197
197
 
198
198
 
199
+ terminal_states = [JobStatus.SUCCESSFUL, JobStatus.FAILED, JobStatus.DISAPPEARED]
200
+
199
201
  nvcf_url = "https://api.nvcf.nvidia.com"
200
202
  submit_endpoint = f"{nvcf_url}/v2/nvcf/pexec/functions"
201
203
  result_endpoint = f"{nvcf_url}/v2/nvcf/pexec/status"
@@ -213,12 +215,15 @@ class Job(object):
213
215
  self._function_id = function_id
214
216
  self._ngc_api_key = ngc_api_key
215
217
  self._queue_timeout = queue_timeout
216
- self._poll_seconds = "3600"
218
+ self._poll_seconds = "300"
217
219
 
218
220
  # Initialize status and tracking variables
219
221
  self._status = JobStatus.CREATED
220
222
  self._last_poll_time = time.time()
221
- self._force_poll_interval = 30
223
+
224
+ # State tracking for long polling
225
+ self._long_polling_active = False
226
+ self._poll_response = None
222
227
 
223
228
  flow_name = task_spec.get("flow_name")
224
229
  run_id = task_spec.get("run_id")
@@ -279,61 +284,40 @@ class Job(object):
279
284
  self._result = data
280
285
  elif response.getcode() == 202:
281
286
  self._status = JobStatus.SUBMITTED
287
+ # Start long polling immediately after receiving 202
288
+ self._start_long_polling()
282
289
  else:
283
290
  self._status = JobStatus.FAILED
284
291
  except URLError:
285
292
  self._status = JobStatus.FAILED
286
293
  raise
287
294
 
295
+ def _start_long_polling(self):
296
+ if not self._long_polling_active:
297
+ self._long_polling_active = True
298
+ polling_thread = threading.Thread(target=self._long_poll_loop, daemon=True)
299
+ polling_thread.start()
300
+
301
+ def _long_poll_loop(self):
302
+ while self._long_polling_active and self.status not in terminal_states:
303
+ try:
304
+ self._poll()
305
+ # No sleep needed - the request itself will block for up to self._poll_seconds
306
+ except Exception as e:
307
+ print(f"[@nvidia] Long polling error: {e}")
308
+ # Brief pause before retry on error
309
+ time.sleep(1)
310
+
311
+ self._long_polling_active = False
312
+
288
313
  @property
289
314
  def status(self):
290
- terminal_states = [
291
- JobStatus.SUCCESSFUL,
292
- JobStatus.FAILED,
293
- JobStatus.DISAPPEARED,
294
- ]
295
-
296
315
  # If status is already terminal, don't poll again
297
316
  if self._status in terminal_states:
298
317
  return self._status
299
318
 
300
- current_time = time.time()
301
- if (
302
- current_time - self._last_poll_time > self._force_poll_interval
303
- or self._status not in terminal_states
304
- ):
305
- try:
306
- self._poll()
307
- self._last_poll_time = current_time
308
-
309
- # Update job status to POLLED if this is our first successful poll
310
- if self._status == JobStatus.SUBMITTED:
311
- self._status = JobStatus.POLLED
312
-
313
- if self._status == JobStatus.SUCCESSFUL:
314
- return self._status
315
-
316
- except HTTPError as e:
317
- if e.code == 404:
318
- # 404 interpretation depends on job lifecycle
319
- if self._status in [JobStatus.POLLED, JobStatus.SUBMITTED]:
320
- # We've submitted or successfully polled this job before,
321
- # so a 404 likely means it completed and was removed
322
- self._status = JobStatus.DISAPPEARED
323
- self._result = {"exit_code": 0}
324
- else:
325
- # Job was never successfully tracked
326
- print(
327
- f"[@nvidia] 404 received for job that was never successfully tracked - treating as failure"
328
- )
329
- self._status = JobStatus.FAILED
330
- raise NvcfPollingConnectionError(e)
331
- else:
332
- self._status = JobStatus.FAILED
333
- raise NvcfPollingConnectionError(e)
334
- except URLError as e:
335
- self._status = JobStatus.FAILED
336
- raise NvcfPollingConnectionError(e)
319
+ # Return cached status - no need to poll
320
+ # Long polling loop will update the status
337
321
  return self._status
338
322
 
339
323
  @property
@@ -357,6 +341,14 @@ class Job(object):
357
341
  @retry_on_status(status_codes=[504])
358
342
  def _poll(self):
359
343
  try:
344
+ # Implement rate limiting to prevent more than 1 request per second
345
+ current_time = time.time()
346
+ if (
347
+ hasattr(self, "_last_poll_time")
348
+ and current_time - self._last_poll_time < 1
349
+ ):
350
+ time.sleep(1 - (current_time - self._last_poll_time))
351
+
360
352
  headers = {
361
353
  "Authorization": f"Bearer {self._ngc_api_key}",
362
354
  "Content-Type": "application/json",
@@ -366,22 +358,67 @@ class Job(object):
366
358
  request = Request(
367
359
  f"{result_endpoint}/{self._invocation_id}", headers=headers
368
360
  )
361
+
362
+ # Record time before making the request
363
+ self._last_poll_time = time.time()
364
+
369
365
  response = urlopen(request)
370
366
  body = response.read()
371
- print(f"[@nvidia] polling response: {body}")
367
+ print(f"[@nvidia] polling status code: {response.getcode()}")
368
+
372
369
  if response.getcode() == 200:
373
370
  data = json.loads(body)
374
- # TODO: Propagate the internal error forward
375
371
  if data.get("exit_code") == 0:
376
372
  self._status = JobStatus.SUCCESSFUL
377
373
  else:
378
374
  self._status = JobStatus.FAILED
379
375
  self._result = data
380
- elif response.getcode() != 202:
376
+ self._long_polling_active = False # Stop polling once job completes
377
+ elif response.getcode() == 202:
378
+ # Job is still running - status remains SUBMITTED or POLLED
379
+ if self._status == JobStatus.SUBMITTED:
380
+ self._status = JobStatus.POLLED
381
+ elif response.getcode() == 302:
382
+ # Handle redirects for large responses or requests in different regions
383
+ redirect_location = response.headers.get("Location")
384
+ if redirect_location:
385
+ redirect_request = Request(redirect_location, headers=headers)
386
+ redirect_response = urlopen(redirect_request)
387
+ if redirect_response.getcode() == 200:
388
+ data = json.loads(redirect_response.read())
389
+ if data.get("exit_code") == 0:
390
+ self._status = JobStatus.SUCCESSFUL
391
+ else:
392
+ self._status = JobStatus.FAILED
393
+ self._result = data
394
+ self._long_polling_active = False
395
+ else:
381
396
  print(
382
397
  f"[@nvidia] Unexpected response code: {response.getcode()}. Please notify an Outerbounds support engineer if this error persists."
383
398
  )
384
399
  self._status = JobStatus.FAILED
385
- except URLError:
400
+
401
+ except HTTPError as e:
402
+ if e.code == 404:
403
+ # 404 interpretation depends on job lifecycle
404
+ if self._status in [JobStatus.POLLED, JobStatus.SUBMITTED]:
405
+ # We've submitted or successfully polled this job before,
406
+ # so a 404 likely means it completed and was removed
407
+ self._status = JobStatus.DISAPPEARED
408
+ self._result = {"exit_code": 0}
409
+ print(
410
+ f"[@nvidia] 404 received for job that was previously tracked - assuming job completed"
411
+ )
412
+ else:
413
+ # Job was never successfully tracked
414
+ print(
415
+ f"[@nvidia] 404 received for job that was never successfully tracked - treating as failure"
416
+ )
417
+ self._status = JobStatus.FAILED
418
+ raise NvcfPollingConnectionError(e)
419
+ else:
420
+ self._status = JobStatus.FAILED
421
+ raise NvcfPollingConnectionError(e)
422
+ except URLError as e:
386
423
  self._status = JobStatus.FAILED
387
- raise
424
+ raise NvcfPollingConnectionError(e)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ob-metaflow-extensions
3
- Version: 1.1.144
3
+ Version: 1.1.145
4
4
  Summary: Outerbounds Platform Extensions for Metaflow
5
5
  Author: Outerbounds, Inc.
6
6
  License: Commercial
@@ -2,7 +2,7 @@ from setuptools import setup, find_namespace_packages
2
2
  from pathlib import Path
3
3
 
4
4
 
5
- version = "1.1.144"
5
+ version = "1.1.145"
6
6
  this_directory = Path(__file__).parent
7
7
  long_description = (this_directory / "README.md").read_text()
8
8