ob-metaflow-extensions 1.1.142__tar.gz → 1.1.144__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ob-metaflow-extensions might be problematic. Click here for more details.

Files changed (69) hide show
  1. {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/PKG-INFO +1 -1
  2. {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/plugins/__init__.py +7 -2
  3. {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/plugins/nvcf/nvcf.py +59 -8
  4. {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/ob_metaflow_extensions.egg-info/PKG-INFO +1 -1
  5. {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/setup.py +1 -1
  6. {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/README.md +0 -0
  7. {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/__init__.py +0 -0
  8. {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/config/__init__.py +0 -0
  9. {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/plugins/apps/__init__.py +0 -0
  10. {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/plugins/apps/app_utils.py +0 -0
  11. {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/plugins/apps/consts.py +0 -0
  12. {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/plugins/apps/deploy_decorator.py +0 -0
  13. {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/plugins/apps/supervisord_utils.py +0 -0
  14. {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/plugins/auth_server.py +0 -0
  15. {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/plugins/card_utilities/__init__.py +0 -0
  16. {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/plugins/card_utilities/async_cards.py +0 -0
  17. {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/plugins/card_utilities/extra_components.py +0 -0
  18. {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/plugins/card_utilities/injector.py +0 -0
  19. {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/plugins/fast_bakery/__init__.py +0 -0
  20. {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/plugins/fast_bakery/docker_environment.py +0 -0
  21. {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/plugins/fast_bakery/fast_bakery.py +0 -0
  22. {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/plugins/fast_bakery/fast_bakery_cli.py +0 -0
  23. {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/plugins/fast_bakery/fast_bakery_decorator.py +0 -0
  24. {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/plugins/kubernetes/__init__.py +0 -0
  25. {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/plugins/kubernetes/kubernetes_client.py +0 -0
  26. {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/plugins/nim/__init__.py +0 -0
  27. {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/plugins/nim/card.py +0 -0
  28. {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/plugins/nim/nim_manager.py +0 -0
  29. {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/plugins/nim/utilities.py +0 -0
  30. {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/plugins/nvcf/__init__.py +0 -0
  31. {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/plugins/nvcf/constants.py +0 -0
  32. {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/plugins/nvcf/exceptions.py +0 -0
  33. {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/plugins/nvcf/heartbeat_store.py +0 -0
  34. {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/plugins/nvcf/nvcf_cli.py +0 -0
  35. {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/plugins/nvcf/nvcf_decorator.py +0 -0
  36. {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/plugins/nvcf/utils.py +0 -0
  37. {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/plugins/ollama/__init__.py +0 -0
  38. {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/plugins/ollama/ollama.py +0 -0
  39. {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/plugins/perimeters.py +0 -0
  40. {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/plugins/profilers/deco_injector.py +0 -0
  41. {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/plugins/profilers/gpu_profile_decorator.py +0 -0
  42. {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/plugins/secrets/__init__.py +0 -0
  43. {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/plugins/secrets/secrets.py +0 -0
  44. {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/plugins/snowflake/__init__.py +0 -0
  45. {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/plugins/snowflake/snowflake.py +0 -0
  46. {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/plugins/snowpark/__init__.py +0 -0
  47. {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/plugins/snowpark/snowpark.py +0 -0
  48. {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/plugins/snowpark/snowpark_cli.py +0 -0
  49. {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/plugins/snowpark/snowpark_client.py +0 -0
  50. {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/plugins/snowpark/snowpark_decorator.py +0 -0
  51. {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/plugins/snowpark/snowpark_exceptions.py +0 -0
  52. {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/plugins/snowpark/snowpark_job.py +0 -0
  53. {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/plugins/snowpark/snowpark_service_spec.py +0 -0
  54. {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/plugins/tensorboard/__init__.py +0 -0
  55. {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/profilers/__init__.py +0 -0
  56. {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/profilers/gpu.py +0 -0
  57. {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/remote_config.py +0 -0
  58. {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/toplevel/__init__.py +0 -0
  59. {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/toplevel/global_aliases_for_metaflow_package.py +0 -0
  60. {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/toplevel/plugins/azure/__init__.py +0 -0
  61. {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/toplevel/plugins/gcp/__init__.py +0 -0
  62. {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/toplevel/plugins/kubernetes/__init__.py +0 -0
  63. {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/toplevel/plugins/ollama/__init__.py +0 -0
  64. {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/metaflow_extensions/outerbounds/toplevel/plugins/snowflake/__init__.py +0 -0
  65. {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/ob_metaflow_extensions.egg-info/SOURCES.txt +0 -0
  66. {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/ob_metaflow_extensions.egg-info/dependency_links.txt +0 -0
  67. {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/ob_metaflow_extensions.egg-info/requires.txt +0 -0
  68. {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/ob_metaflow_extensions.egg-info/top_level.txt +0 -0
  69. {ob-metaflow-extensions-1.1.142 → ob-metaflow-extensions-1.1.144}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ob-metaflow-extensions
3
- Version: 1.1.142
3
+ Version: 1.1.144
4
4
  Summary: Outerbounds Platform Extensions for Metaflow
5
5
  Author: Outerbounds, Inc.
6
6
  License: Commercial
@@ -162,13 +162,18 @@ class ObpAuthProvider(object):
162
162
  client_params = {}
163
163
 
164
164
  from botocore.exceptions import ClientError
165
+ from botocore.config import Config
165
166
 
166
167
  with hide_access_keys():
167
168
  session = get_boto3_session(role_arn, session_vars)
169
+ _client_params = client_params.copy()
170
+ if _client_params.get("config") and type(_client_params["config"]) == dict:
171
+ _client_params["config"] = Config(**_client_params["config"])
172
+
168
173
  if with_error:
169
- return session.client(module, **client_params), ClientError
174
+ return session.client(module, **_client_params), ClientError
170
175
  else:
171
- return session.client(module, **client_params)
176
+ return session.client(module, **_client_params)
172
177
 
173
178
 
174
179
  AWS_CLIENT_PROVIDERS_DESC = [("obp", ".ObpAuthProvider")]
@@ -188,10 +188,12 @@ class Nvcf(object):
188
188
 
189
189
 
190
190
  class JobStatus(object):
191
- SUBMITTED = "SUBMITTED"
192
- RUNNING = "RUNNING"
193
- SUCCESSFUL = "SUCCESSFUL"
194
- FAILED = "FAILED"
191
+ CREATED = "CREATED" # Job object created but not submitted
192
+ SUBMITTED = "SUBMITTED" # Job submitted to NVCF
193
+ POLLED = "POLLED" # Job has been successfully polled at least once
194
+ SUCCESSFUL = "SUCCESSFUL" # Job completed successfully
195
+ FAILED = "FAILED" # Job failed
196
+ DISAPPEARED = "DISAPPEARED" # Job disappeared from NVCF but was previously polled (likely successful)
195
197
 
196
198
 
197
199
  nvcf_url = "https://api.nvcf.nvidia.com"
@@ -213,6 +215,11 @@ class Job(object):
213
215
  self._queue_timeout = queue_timeout
214
216
  self._poll_seconds = "3600"
215
217
 
218
+ # Initialize status and tracking variables
219
+ self._status = JobStatus.CREATED
220
+ self._last_poll_time = time.time()
221
+ self._force_poll_interval = 30
222
+
216
223
  flow_name = task_spec.get("flow_name")
217
224
  run_id = task_spec.get("run_id")
218
225
  step_name = task_spec.get("step_name")
@@ -280,10 +287,51 @@ class Job(object):
280
287
 
281
288
  @property
282
289
  def status(self):
283
- if self._status not in [JobStatus.SUCCESSFUL, JobStatus.FAILED]:
290
+ terminal_states = [
291
+ JobStatus.SUCCESSFUL,
292
+ JobStatus.FAILED,
293
+ JobStatus.DISAPPEARED,
294
+ ]
295
+
296
+ # If status is already terminal, don't poll again
297
+ if self._status in terminal_states:
298
+ return self._status
299
+
300
+ current_time = time.time()
301
+ if (
302
+ current_time - self._last_poll_time > self._force_poll_interval
303
+ or self._status not in terminal_states
304
+ ):
284
305
  try:
285
306
  self._poll()
286
- except (HTTPError, URLError) as e:
307
+ self._last_poll_time = current_time
308
+
309
+ # Update job status to POLLED if this is our first successful poll
310
+ if self._status == JobStatus.SUBMITTED:
311
+ self._status = JobStatus.POLLED
312
+
313
+ if self._status == JobStatus.SUCCESSFUL:
314
+ return self._status
315
+
316
+ except HTTPError as e:
317
+ if e.code == 404:
318
+ # 404 interpretation depends on job lifecycle
319
+ if self._status in [JobStatus.POLLED, JobStatus.SUBMITTED]:
320
+ # We've submitted or successfully polled this job before,
321
+ # so a 404 likely means it completed and was removed
322
+ self._status = JobStatus.DISAPPEARED
323
+ self._result = {"exit_code": 0}
324
+ else:
325
+ # Job was never successfully tracked
326
+ print(
327
+ f"[@nvidia] 404 received for job that was never successfully tracked - treating as failure"
328
+ )
329
+ self._status = JobStatus.FAILED
330
+ raise NvcfPollingConnectionError(e)
331
+ else:
332
+ self._status = JobStatus.FAILED
333
+ raise NvcfPollingConnectionError(e)
334
+ except URLError as e:
287
335
  self._status = JobStatus.FAILED
288
336
  raise NvcfPollingConnectionError(e)
289
337
  return self._status
@@ -294,7 +342,8 @@ class Job(object):
294
342
 
295
343
  @property
296
344
  def is_running(self):
297
- return self.status == JobStatus.SUBMITTED
345
+ # Job is running if it's in SUBMITTED or POLLED state
346
+ return self.status in [JobStatus.SUBMITTED, JobStatus.POLLED]
298
347
 
299
348
  @property
300
349
  def has_failed(self):
@@ -318,8 +367,10 @@ class Job(object):
318
367
  f"{result_endpoint}/{self._invocation_id}", headers=headers
319
368
  )
320
369
  response = urlopen(request)
370
+ body = response.read()
371
+ print(f"[@nvidia] polling response: {body}")
321
372
  if response.getcode() == 200:
322
- data = json.loads(response.read())
373
+ data = json.loads(body)
323
374
  # TODO: Propagate the internal error forward
324
375
  if data.get("exit_code") == 0:
325
376
  self._status = JobStatus.SUCCESSFUL
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ob-metaflow-extensions
3
- Version: 1.1.142
3
+ Version: 1.1.144
4
4
  Summary: Outerbounds Platform Extensions for Metaflow
5
5
  Author: Outerbounds, Inc.
6
6
  License: Commercial
@@ -2,7 +2,7 @@ from setuptools import setup, find_namespace_packages
2
2
  from pathlib import Path
3
3
 
4
4
 
5
- version = "1.1.142"
5
+ version = "1.1.144"
6
6
  this_directory = Path(__file__).parent
7
7
  long_description = (this_directory / "README.md").read_text()
8
8