ob-metaflow-extensions 1.1.98__py2.py3-none-any.whl → 1.1.100rc0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ob-metaflow-extensions might be problematic. Click here for more details.

@@ -315,7 +315,11 @@ def _update_charts(results, md_dict):
315
315
 
316
316
  # This code is adapted from: https://github.com/outerbounds/monitorbench
317
317
  class GPUProfiler:
318
- def __init__(self, interval=1, monitor_batch_duration=200):
318
+ def __init__(
319
+ self, interval=1, monitor_batch_duration=200, artifact_name="gpu_profile_data"
320
+ ):
321
+ self.artifact_name = artifact_name
322
+ self._card_setup_finished = False
319
323
  self.driver_ver, self.cuda_ver, self.error = self._read_versions()
320
324
  (
321
325
  self.interconnect_data,
@@ -389,16 +393,35 @@ class GPUProfiler:
389
393
  print("GPU Profiler readings are none", file=sys.stderr)
390
394
  time.sleep(self._interval)
391
395
  continue
396
+ if not self._card_setup_finished:
397
+ self._setup_card()
398
+ time.sleep(self._interval)
399
+ continue
400
+
392
401
  _update_utilization(readings, self._card_comps["max_utilization"])
393
402
  _update_charts(readings, self._card_comps["charts"])
394
403
  current.card["gpu_profile"].refresh()
395
404
  time.sleep(self._interval)
396
405
 
397
- def _setup_card(self, artifact_name):
406
+ def _setup_card(self):
398
407
  from metaflow import current
399
408
 
400
409
  results = self._make_reading()
410
+ if "profile" not in results:
411
+ return
412
+
401
413
  els = current.card["gpu_profile"]
414
+ els.clear()
415
+
416
+ current.card["gpu_profile"].append(
417
+ Markdown("# GPU profile for `%s`" % current.pathspec)
418
+ )
419
+ current.card["gpu_profile"].append(
420
+ Markdown(
421
+ "_Started at: %s_"
422
+ % datetime.now().astimezone().strftime("%Y-%m-%dT%H:%M:%S %z")
423
+ )
424
+ )
402
425
 
403
426
  def _drivers():
404
427
  els.append(Markdown("## Drivers"))
@@ -444,7 +467,7 @@ class GPUProfiler:
444
467
  Table(data=_rows, headers=["Device ID", "Max GPU %", "Max memory"])
445
468
  )
446
469
  els.append(
447
- Markdown(f"Detailed data saved in an artifact `{artifact_name}`")
470
+ Markdown(f"Detailed data saved in an artifact `{self.artifact_name}`")
448
471
  )
449
472
  return rows
450
473
 
@@ -479,6 +502,7 @@ class GPUProfiler:
479
502
  _interconnect()
480
503
  self._card_comps["max_utilization"] = _utilization()
481
504
  self._card_comps["charts"] = _plots()
505
+ self._card_setup_finished = True
482
506
 
483
507
  def _read_versions(self):
484
508
  def parse(r, s):
@@ -572,7 +596,9 @@ class gpu_profile:
572
596
  def __call__(self, f):
573
597
  @wraps(f)
574
598
  def func(s):
575
- prof = GPUProfiler(interval=self.interval)
599
+ prof = GPUProfiler(
600
+ interval=self.interval, artifact_name=self.artifact_prefix + "data"
601
+ )
576
602
  if self.include_artifacts:
577
603
  setattr(s, self.artifact_prefix + "num_gpus", len(prof.devices))
578
604
 
@@ -585,7 +611,7 @@ class gpu_profile:
585
611
  % datetime.now().astimezone().strftime("%Y-%m-%dT%H:%M:%S %z")
586
612
  )
587
613
  )
588
- prof._setup_card(self.artifact_prefix + "data")
614
+ prof._setup_card()
589
615
  current.card["gpu_profile"].refresh()
590
616
  update_thread = threading.Thread(target=prof._update_card, daemon=True)
591
617
  update_thread.start()
@@ -1,13 +1,13 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ob-metaflow-extensions
3
- Version: 1.1.98
3
+ Version: 1.1.100rc0
4
4
  Summary: Outerbounds Platform Extensions for Metaflow
5
5
  Author: Outerbounds, Inc.
6
6
  License: Commercial
7
7
  Description-Content-Type: text/markdown
8
8
  Requires-Dist: boto3
9
9
  Requires-Dist: kubernetes
10
- Requires-Dist: ob-metaflow (==2.12.25.1)
10
+ Requires-Dist: ob-metaflow (==2.12.25.2)
11
11
 
12
12
  # Outerbounds platform package
13
13
 
@@ -28,13 +28,13 @@ metaflow_extensions/outerbounds/plugins/snowpark/snowpark_job.py,sha256=d_5UhXqZ
28
28
  metaflow_extensions/outerbounds/plugins/snowpark/snowpark_service_spec.py,sha256=AI_kcm1hZV3JRxJkookcH6twiGnAYjk9Dx-MeoYz60Y,8511
29
29
  metaflow_extensions/outerbounds/plugins/tensorboard/__init__.py,sha256=9lUM4Cqi5RjrHBRfG6AQMRz8-R96eZC8Ih0KD2lv22Y,1858
30
30
  metaflow_extensions/outerbounds/profilers/__init__.py,sha256=wa_jhnCBr82TBxoS0e8b6_6sLyZX0fdHicuGJZNTqKw,29
31
- metaflow_extensions/outerbounds/profilers/gpu.py,sha256=a5YZAepujuP0uDqG9UpXBlZS3wjUt4Yv8CjybXqeT2c,24342
31
+ metaflow_extensions/outerbounds/profilers/gpu.py,sha256=JYu8tQl0p5CIq1xSVUHjK3ldeqoLaZzqLBu86OKr5rg,25104
32
32
  metaflow_extensions/outerbounds/toplevel/__init__.py,sha256=qWUJSv_r5hXJ7jV_On4nEasKIfUCm6_UjkjXWA_A1Ts,90
33
33
  metaflow_extensions/outerbounds/toplevel/global_aliases_for_metaflow_package.py,sha256=Zq3OuL1bOod8KJra-Zk8B3gNhSHoWEGteM9T7g0pp6E,1881
34
34
  metaflow_extensions/outerbounds/toplevel/plugins/azure/__init__.py,sha256=WUuhz2YQfI4fz7nIcipwwWq781eaoHEk7n4GAn1npDg,63
35
35
  metaflow_extensions/outerbounds/toplevel/plugins/gcp/__init__.py,sha256=BbZiaH3uILlEZ6ntBLKeNyqn3If8nIXZFq_Apd7Dhco,70
36
36
  metaflow_extensions/outerbounds/toplevel/plugins/kubernetes/__init__.py,sha256=5zG8gShSj8m7rgF4xgWBZFuY3GDP5n1T0ktjRpGJLHA,69
37
- ob_metaflow_extensions-1.1.98.dist-info/METADATA,sha256=4Qixz_AtC57tc30Z4u2iDj4s7K0oY0zJK7qVaznZ8J0,520
38
- ob_metaflow_extensions-1.1.98.dist-info/WHEEL,sha256=bb2Ot9scclHKMOLDEHY6B2sicWOgugjFKaJsT7vwMQo,110
39
- ob_metaflow_extensions-1.1.98.dist-info/top_level.txt,sha256=NwG0ukwjygtanDETyp_BUdtYtqIA_lOjzFFh1TsnxvI,20
40
- ob_metaflow_extensions-1.1.98.dist-info/RECORD,,
37
+ ob_metaflow_extensions-1.1.100rc0.dist-info/METADATA,sha256=y93pSmIa-j7KdnaJMxeq9Fie5dWrjFqxncdS9zfVtww,524
38
+ ob_metaflow_extensions-1.1.100rc0.dist-info/WHEEL,sha256=bb2Ot9scclHKMOLDEHY6B2sicWOgugjFKaJsT7vwMQo,110
39
+ ob_metaflow_extensions-1.1.100rc0.dist-info/top_level.txt,sha256=NwG0ukwjygtanDETyp_BUdtYtqIA_lOjzFFh1TsnxvI,20
40
+ ob_metaflow_extensions-1.1.100rc0.dist-info/RECORD,,