ob-metaflow-extensions 1.1.99__tar.gz → 1.1.100rc0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ob-metaflow-extensions might be problematic. Click here for more details.
- {ob-metaflow-extensions-1.1.99 → ob-metaflow-extensions-1.1.100rc0}/PKG-INFO +1 -1
- {ob-metaflow-extensions-1.1.99 → ob-metaflow-extensions-1.1.100rc0}/metaflow_extensions/outerbounds/profilers/gpu.py +31 -5
- {ob-metaflow-extensions-1.1.99 → ob-metaflow-extensions-1.1.100rc0}/ob_metaflow_extensions.egg-info/PKG-INFO +1 -1
- {ob-metaflow-extensions-1.1.99 → ob-metaflow-extensions-1.1.100rc0}/setup.py +1 -1
- {ob-metaflow-extensions-1.1.99 → ob-metaflow-extensions-1.1.100rc0}/README.md +0 -0
- {ob-metaflow-extensions-1.1.99 → ob-metaflow-extensions-1.1.100rc0}/metaflow_extensions/outerbounds/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.99 → ob-metaflow-extensions-1.1.100rc0}/metaflow_extensions/outerbounds/config/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.99 → ob-metaflow-extensions-1.1.100rc0}/metaflow_extensions/outerbounds/plugins/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.99 → ob-metaflow-extensions-1.1.100rc0}/metaflow_extensions/outerbounds/plugins/auth_server.py +0 -0
- {ob-metaflow-extensions-1.1.99 → ob-metaflow-extensions-1.1.100rc0}/metaflow_extensions/outerbounds/plugins/fast_bakery/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.99 → ob-metaflow-extensions-1.1.100rc0}/metaflow_extensions/outerbounds/plugins/fast_bakery/docker_environment.py +0 -0
- {ob-metaflow-extensions-1.1.99 → ob-metaflow-extensions-1.1.100rc0}/metaflow_extensions/outerbounds/plugins/fast_bakery/fast_bakery.py +0 -0
- {ob-metaflow-extensions-1.1.99 → ob-metaflow-extensions-1.1.100rc0}/metaflow_extensions/outerbounds/plugins/fast_bakery/fast_bakery_cli.py +0 -0
- {ob-metaflow-extensions-1.1.99 → ob-metaflow-extensions-1.1.100rc0}/metaflow_extensions/outerbounds/plugins/fast_bakery/fast_bakery_decorator.py +0 -0
- {ob-metaflow-extensions-1.1.99 → ob-metaflow-extensions-1.1.100rc0}/metaflow_extensions/outerbounds/plugins/kubernetes/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.99 → ob-metaflow-extensions-1.1.100rc0}/metaflow_extensions/outerbounds/plugins/kubernetes/kubernetes_client.py +0 -0
- {ob-metaflow-extensions-1.1.99 → ob-metaflow-extensions-1.1.100rc0}/metaflow_extensions/outerbounds/plugins/nim/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.99 → ob-metaflow-extensions-1.1.100rc0}/metaflow_extensions/outerbounds/plugins/nim/nim_manager.py +0 -0
- {ob-metaflow-extensions-1.1.99 → ob-metaflow-extensions-1.1.100rc0}/metaflow_extensions/outerbounds/plugins/nvcf/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.99 → ob-metaflow-extensions-1.1.100rc0}/metaflow_extensions/outerbounds/plugins/nvcf/heartbeat_store.py +0 -0
- {ob-metaflow-extensions-1.1.99 → ob-metaflow-extensions-1.1.100rc0}/metaflow_extensions/outerbounds/plugins/nvcf/nvcf.py +0 -0
- {ob-metaflow-extensions-1.1.99 → ob-metaflow-extensions-1.1.100rc0}/metaflow_extensions/outerbounds/plugins/nvcf/nvcf_cli.py +0 -0
- {ob-metaflow-extensions-1.1.99 → ob-metaflow-extensions-1.1.100rc0}/metaflow_extensions/outerbounds/plugins/nvcf/nvcf_decorator.py +0 -0
- {ob-metaflow-extensions-1.1.99 → ob-metaflow-extensions-1.1.100rc0}/metaflow_extensions/outerbounds/plugins/perimeters.py +0 -0
- {ob-metaflow-extensions-1.1.99 → ob-metaflow-extensions-1.1.100rc0}/metaflow_extensions/outerbounds/plugins/snowpark/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.99 → ob-metaflow-extensions-1.1.100rc0}/metaflow_extensions/outerbounds/plugins/snowpark/snowpark.py +0 -0
- {ob-metaflow-extensions-1.1.99 → ob-metaflow-extensions-1.1.100rc0}/metaflow_extensions/outerbounds/plugins/snowpark/snowpark_cli.py +0 -0
- {ob-metaflow-extensions-1.1.99 → ob-metaflow-extensions-1.1.100rc0}/metaflow_extensions/outerbounds/plugins/snowpark/snowpark_client.py +0 -0
- {ob-metaflow-extensions-1.1.99 → ob-metaflow-extensions-1.1.100rc0}/metaflow_extensions/outerbounds/plugins/snowpark/snowpark_decorator.py +0 -0
- {ob-metaflow-extensions-1.1.99 → ob-metaflow-extensions-1.1.100rc0}/metaflow_extensions/outerbounds/plugins/snowpark/snowpark_exceptions.py +0 -0
- {ob-metaflow-extensions-1.1.99 → ob-metaflow-extensions-1.1.100rc0}/metaflow_extensions/outerbounds/plugins/snowpark/snowpark_job.py +0 -0
- {ob-metaflow-extensions-1.1.99 → ob-metaflow-extensions-1.1.100rc0}/metaflow_extensions/outerbounds/plugins/snowpark/snowpark_service_spec.py +0 -0
- {ob-metaflow-extensions-1.1.99 → ob-metaflow-extensions-1.1.100rc0}/metaflow_extensions/outerbounds/plugins/tensorboard/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.99 → ob-metaflow-extensions-1.1.100rc0}/metaflow_extensions/outerbounds/profilers/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.99 → ob-metaflow-extensions-1.1.100rc0}/metaflow_extensions/outerbounds/remote_config.py +0 -0
- {ob-metaflow-extensions-1.1.99 → ob-metaflow-extensions-1.1.100rc0}/metaflow_extensions/outerbounds/toplevel/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.99 → ob-metaflow-extensions-1.1.100rc0}/metaflow_extensions/outerbounds/toplevel/global_aliases_for_metaflow_package.py +0 -0
- {ob-metaflow-extensions-1.1.99 → ob-metaflow-extensions-1.1.100rc0}/metaflow_extensions/outerbounds/toplevel/plugins/azure/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.99 → ob-metaflow-extensions-1.1.100rc0}/metaflow_extensions/outerbounds/toplevel/plugins/gcp/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.99 → ob-metaflow-extensions-1.1.100rc0}/metaflow_extensions/outerbounds/toplevel/plugins/kubernetes/__init__.py +0 -0
- {ob-metaflow-extensions-1.1.99 → ob-metaflow-extensions-1.1.100rc0}/ob_metaflow_extensions.egg-info/SOURCES.txt +0 -0
- {ob-metaflow-extensions-1.1.99 → ob-metaflow-extensions-1.1.100rc0}/ob_metaflow_extensions.egg-info/dependency_links.txt +0 -0
- {ob-metaflow-extensions-1.1.99 → ob-metaflow-extensions-1.1.100rc0}/ob_metaflow_extensions.egg-info/requires.txt +0 -0
- {ob-metaflow-extensions-1.1.99 → ob-metaflow-extensions-1.1.100rc0}/ob_metaflow_extensions.egg-info/top_level.txt +0 -0
- {ob-metaflow-extensions-1.1.99 → ob-metaflow-extensions-1.1.100rc0}/setup.cfg +0 -0
|
@@ -315,7 +315,11 @@ def _update_charts(results, md_dict):
|
|
|
315
315
|
|
|
316
316
|
# This code is adapted from: https://github.com/outerbounds/monitorbench
|
|
317
317
|
class GPUProfiler:
|
|
318
|
-
def __init__(
|
|
318
|
+
def __init__(
|
|
319
|
+
self, interval=1, monitor_batch_duration=200, artifact_name="gpu_profile_data"
|
|
320
|
+
):
|
|
321
|
+
self.artifact_name = artifact_name
|
|
322
|
+
self._card_setup_finished = False
|
|
319
323
|
self.driver_ver, self.cuda_ver, self.error = self._read_versions()
|
|
320
324
|
(
|
|
321
325
|
self.interconnect_data,
|
|
@@ -389,16 +393,35 @@ class GPUProfiler:
|
|
|
389
393
|
print("GPU Profiler readings are none", file=sys.stderr)
|
|
390
394
|
time.sleep(self._interval)
|
|
391
395
|
continue
|
|
396
|
+
if not self._card_setup_finished:
|
|
397
|
+
self._setup_card()
|
|
398
|
+
time.sleep(self._interval)
|
|
399
|
+
continue
|
|
400
|
+
|
|
392
401
|
_update_utilization(readings, self._card_comps["max_utilization"])
|
|
393
402
|
_update_charts(readings, self._card_comps["charts"])
|
|
394
403
|
current.card["gpu_profile"].refresh()
|
|
395
404
|
time.sleep(self._interval)
|
|
396
405
|
|
|
397
|
-
def _setup_card(self
|
|
406
|
+
def _setup_card(self):
|
|
398
407
|
from metaflow import current
|
|
399
408
|
|
|
400
409
|
results = self._make_reading()
|
|
410
|
+
if "profile" not in results:
|
|
411
|
+
return
|
|
412
|
+
|
|
401
413
|
els = current.card["gpu_profile"]
|
|
414
|
+
els.clear()
|
|
415
|
+
|
|
416
|
+
current.card["gpu_profile"].append(
|
|
417
|
+
Markdown("# GPU profile for `%s`" % current.pathspec)
|
|
418
|
+
)
|
|
419
|
+
current.card["gpu_profile"].append(
|
|
420
|
+
Markdown(
|
|
421
|
+
"_Started at: %s_"
|
|
422
|
+
% datetime.now().astimezone().strftime("%Y-%m-%dT%H:%M:%S %z")
|
|
423
|
+
)
|
|
424
|
+
)
|
|
402
425
|
|
|
403
426
|
def _drivers():
|
|
404
427
|
els.append(Markdown("## Drivers"))
|
|
@@ -444,7 +467,7 @@ class GPUProfiler:
|
|
|
444
467
|
Table(data=_rows, headers=["Device ID", "Max GPU %", "Max memory"])
|
|
445
468
|
)
|
|
446
469
|
els.append(
|
|
447
|
-
Markdown(f"Detailed data saved in an artifact `{artifact_name}`")
|
|
470
|
+
Markdown(f"Detailed data saved in an artifact `{self.artifact_name}`")
|
|
448
471
|
)
|
|
449
472
|
return rows
|
|
450
473
|
|
|
@@ -479,6 +502,7 @@ class GPUProfiler:
|
|
|
479
502
|
_interconnect()
|
|
480
503
|
self._card_comps["max_utilization"] = _utilization()
|
|
481
504
|
self._card_comps["charts"] = _plots()
|
|
505
|
+
self._card_setup_finished = True
|
|
482
506
|
|
|
483
507
|
def _read_versions(self):
|
|
484
508
|
def parse(r, s):
|
|
@@ -572,7 +596,9 @@ class gpu_profile:
|
|
|
572
596
|
def __call__(self, f):
|
|
573
597
|
@wraps(f)
|
|
574
598
|
def func(s):
|
|
575
|
-
prof = GPUProfiler(
|
|
599
|
+
prof = GPUProfiler(
|
|
600
|
+
interval=self.interval, artifact_name=self.artifact_prefix + "data"
|
|
601
|
+
)
|
|
576
602
|
if self.include_artifacts:
|
|
577
603
|
setattr(s, self.artifact_prefix + "num_gpus", len(prof.devices))
|
|
578
604
|
|
|
@@ -585,7 +611,7 @@ class gpu_profile:
|
|
|
585
611
|
% datetime.now().astimezone().strftime("%Y-%m-%dT%H:%M:%S %z")
|
|
586
612
|
)
|
|
587
613
|
)
|
|
588
|
-
prof._setup_card(
|
|
614
|
+
prof._setup_card()
|
|
589
615
|
current.card["gpu_profile"].refresh()
|
|
590
616
|
update_thread = threading.Thread(target=prof._update_card, daemon=True)
|
|
591
617
|
update_thread.start()
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|