PyPI - naeural-core - Versions diffs - 7.7.238__py3-none-any.whl → 7.7.240__py3-none-any.whl - Mend

naeural-core 7.7.238py3-none-any.whl → 7.7.240py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

naeural_core/core_logging/logger_mixins/gpu_mixin.py CHANGED Viewed

@@ -1,6 +1,8 @@
 import gc
 import traceback
 import platform
+import subprocess
+import shutil
 class _GPUMixin(object):
   """
@@ -71,6 +73,91 @@ class _GPUMixin(object):
   def skip_gpu_info_check(self):
     return vars(self).get('_GPUMixin__no_gpu_avail', False)
+  def _get_processes_by_uuid_via_nvidia_smi(self, mb: bool):
+    """
+    Returns:
+      processes_by_uuid: dict[str, list[dict]]   # uuid -> [{'PID':..., 'ALLOCATED_MEM':...}, ...]
+    Notes:
+      nvidia-smi used_gpu_memory is in MiB when nounits is used in CSV output (common behavior).
+    """
+    processes_by_uuid = {}
+    smi = shutil.which("nvidia-smi")
+    if not smi:
+      return processes_by_uuid  # keep behavior: processes empty if unavailable
+    try:
+      cmd = [
+        smi,
+        "--query-compute-apps=gpu_uuid,pid,used_gpu_memory",
+        "--format=csv,noheader,nounits",
+      ]
+      out = subprocess.run(cmd, capture_output=True, text=True, timeout=1.5)
+      if out.returncode != 0:
+        return processes_by_uuid
+      for line in out.stdout.splitlines():
+        line = line.strip()
+        if not line:
+          continue
+        # Expect: "<uuid>, <pid>, <used_gpu_memory>"
+        parts = [p.strip() for p in line.split(",")]
+        if len(parts) < 3:
+          continue
+        uuid = parts[0]
+        try:
+          pid = int(parts[1])
+        except Exception:
+          continue
+        used_mib = None
+        try:
+          # used_gpu_memory in MiB, nounits -> numeric
+          used_mib = float(parts[2])
+        except Exception:
+          used_mib = None
+        # Convert to bytes to reuse your existing MB/GB conversion logic
+        used_bytes = None if used_mib is None else int(used_mib * 1024 * 1024)
+        dct_proc_info = {"PID": pid}
+        dct_proc_info["ALLOCATED_MEM"] = round(
+          (used_bytes / 1024 ** (2 if mb else 3)) if used_bytes is not None else 0.0,
+          2
+        )
+        processes_by_uuid.setdefault(uuid, []).append(dct_proc_info)
+      # endfor lines
+    except Exception:
+      # Any failure here should degrade gracefully to empty processes
+      return {}
+    return processes_by_uuid
+  def _get_uuid_by_index(self, timeout=1.5):
+    import subprocess, shutil
+    smi = shutil.which("nvidia-smi")
+    if not smi:
+      return {}
+    out = subprocess.run(
+      [smi, "--query-gpu=index,uuid", "--format=csv,noheader,nounits"],
+      capture_output=True, text=True, timeout=timeout
+    )
+    if out.returncode != 0:
+      return {}
+    d = {}
+    for line in out.stdout.splitlines():
+      line = line.strip()
+      if not line:
+        continue
+      idx_s, uuid = [p.strip() for p in line.split(",", 1)]
+      d[int(idx_s)] = uuid
+    return d
   def gpu_info(self, show=False, mb=False, current_pid=False):
     """
     Collects GPU info. Must have torch installed & non-mandatory nvidia-smi
@@ -101,6 +188,7 @@ class _GPUMixin(object):
         # first get name
         import torch as th
         import os
+        processes_by_uuid = self._get_processes_by_uuid_via_nvidia_smi(mb=mb)
       except:
         self.P("ERROR: `gpu_info` call failed - PyTorch probably is not installed:\n{}".format(
           traceback.format_exc())
@@ -149,20 +237,12 @@ class _GPUMixin(object):
           fan_speed, fan_speed_unit = -1, "N/A"
           if pynvml_avail:
             # --- get an NVML handle that matches torch's CUDA device ordering when possible ---
-            handle = None
-            try:
-              # This helps when CUDA_VISIBLE_DEVICES remaps indices:
-              # torch device 0 may not be NVML index 0.
-              pci_bus_id = getattr(device_props, "pci_bus_id", None)
-              if pci_bus_id:
-                if hasattr(pynvml, "nvmlDeviceGetHandleByPciBusId_v2"):
-                  handle = pynvml.nvmlDeviceGetHandleByPciBusId_v2(pci_bus_id)
-                elif hasattr(pynvml, "nvmlDeviceGetHandleByPciBusId"):
-                  handle = pynvml.nvmlDeviceGetHandleByPciBusId(pci_bus_id)
-            except Exception:
-              handle = None
-            if handle is None:
+            uuid_by_index = self._get_uuid_by_index()
+            # inside your for device_id in range(n_gpus):
+            uuid = uuid_by_index.get(device_id)
+            if uuid:
+              handle = pynvml.nvmlDeviceGetHandleByUUID(uuid)
+            else:
               handle = pynvml.nvmlDeviceGetHandleByIndex(device_id)
             # --- memory (NVML returns bytes) ---
@@ -214,25 +294,22 @@ class _GPUMixin(object):
             if True:
               processes = []
               try:
-                nvml_na = getattr(pynvml, "NVML_VALUE_NOT_AVAILABLE", None)
-                for proc in pynvml.nvmlDeviceGetComputeRunningProcesses(handle):
-                  dct_proc_info = {k.upper(): v for k, v in proc.__dict__.items()}
-                  used_mem = dct_proc_info.pop("USEDGPUMEMORY", None)
-                  if used_mem in (None, nvml_na) or (isinstance(used_mem, int) and used_mem < 0):
-                    used_mem = None
-                  dct_proc_info["ALLOCATED_MEM"] = round(
-                    used_mem / 1024 ** (2 if mb else 3) if used_mem is not None else 0.0,
-                    2
-                  )
-                  processes.append(dct_proc_info)
-                  if dct_proc_info.get("PID") == os.getpid():
-                    current_pid_has_usage = True
-                    current_pid_gpus.append(device_id)
+                # Use NVML only to get the UUID for this device, then map from nvidia-smi results.
+                uuid = None
+                try:
+                  uuid = pynvml.nvmlDeviceGetUUID(handle)
+                except Exception:
+                  uuid = None
+                if uuid and uuid in processes_by_uuid:
+                  processes = processes_by_uuid[uuid]
+                  # Preserve your existing "current pid GPU usage" behavior
+                  for p in processes:
+                    if p.get("PID") == os.getpid():
+                      current_pid_has_usage = True
+                      current_pid_gpus.append(device_id)
               except Exception:
-                # if this fails, keep empty list like before
                 processes = []
               # endtry processes
             # endif processes

naeural_core/main/ver.py CHANGED Viewed

@@ -1,4 +1,4 @@
-__VER__ = '7.7.238'
+__VER__ = '7.7.240'

{naeural_core-7.7.238.dist-info → naeural_core-7.7.240.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: naeural_core
-Version: 7.7.238
+Version: 7.7.240
 Summary: Ratio1 Core is the backbone of the Ratio1 Edge Protocol.
 Project-URL: Homepage, https://github.com/Ratio1/naeural_core
 Project-URL: Bug Tracker, https://github.com/Ratio1/naeural_core/issues

{naeural_core-7.7.238.dist-info → naeural_core-7.7.240.dist-info}/RECORD RENAMED Viewed

@@ -137,7 +137,7 @@ naeural_core/core_logging/logger_mixins/confusion_matrix_mixin.py,sha256=fLJOeyp
 naeural_core/core_logging/logger_mixins/dataframe_mixin.py,sha256=hkOtoTzoBDacpagdFYp2kawsw7rzbgLw2-_pzXLBU6Q,11491
 naeural_core/core_logging/logger_mixins/deploy_models_in_production_mixin.py,sha256=J2j1tnt0Cd2qD31rL8Sov0sz9_T5-h6Ukd-4sl1ITcQ,5986
 naeural_core/core_logging/logger_mixins/fit_debug_tfkeras_mixin.py,sha256=6efE5W59a3VWWR1UbPd9iNfQl0nuse7SW3IV0RFpHOc,9344
-naeural_core/core_logging/logger_mixins/gpu_mixin.py,sha256=Fy2xnyp3auZK3tMMFwYLSAU65YYJbsAXfvUXgaCypnc,12290
+naeural_core/core_logging/logger_mixins/gpu_mixin.py,sha256=1gh83RB8ziZJG-UdF1BnOPJWWFAq4xbbhGuQvUPUn1E,14193
 naeural_core/core_logging/logger_mixins/grid_search_mixin.py,sha256=lo3bTyEmcsk03ttTLs0lC_N0beAC1eTiGI9kZX2ib-A,5961
 naeural_core/core_logging/logger_mixins/histogram_mixin.py,sha256=ro5q99VXdcLSBi6XMZK4fJIXNX8n4wBzay-0SXuj7qc,6413
 naeural_core/core_logging/logger_mixins/keras_callbacks_mixin.py,sha256=ELlTb8TycdFnuO2dMDAherlzRd1rfHWogIDq-svnZ7w,3940
@@ -345,7 +345,7 @@ naeural_core/main/geoloc.py,sha256=TEqyuNzpVqZSBCo0OOrpHYncIsHSClvRt28hgvxJ35o,2
 naeural_core/main/main_loop_data_handler.py,sha256=hABB65OUBhtur3rd2mYsEhdAc54jVILzybrvxml5h0s,13815
 naeural_core/main/net_mon.py,sha256=qlyo1fqTeQy_M9VfJOxon_PBbQat0QO9Zbu_93FMbLc,88144
 naeural_core/main/orchestrator.py,sha256=SKnW5jWksBm2-fZcfeT5dddDmFSTSyA-CcODKX664KI,69657
-naeural_core/main/ver.py,sha256=VXAOU8OhhU9z_rNR0TSicHTPcMP_A6If94K8YHqCTWo,335
+naeural_core/main/ver.py,sha256=PyxIMv29XFE2KQRDl_o3fut65G_P9i8u849nGECYtVQ,335
 naeural_core/main/orchestrator_mixins/__init__.py,sha256=MNleg48vdlqsyAR8Vamjl4ahG2jwCH5kLbQN5CfU57E,149
 naeural_core/main/orchestrator_mixins/managers_init.py,sha256=sQVqpr99a5WP9HCloYCyaWDW5J3IypEImlf703bqTF4,6692
 naeural_core/main/orchestrator_mixins/utils.py,sha256=jMa0uStVNLQmp0VhNMRvfBDjo387ORLlUVLthRNBKqc,1866
@@ -555,7 +555,7 @@ naeural_core/utils/tracing/onnx/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm
 naeural_core/utils/tracing/onnx/base_trt_scripter.py,sha256=1FelEBo7JGsc8hbJ3sevzxnM-J61nvBHz6L1VLpZrVc,2043
 naeural_core/utils/tracing/onnx/utils.py,sha256=IKmqUWakrMWn34uJvbRjNLacdszD8jkkQBFPUhgJtOQ,5618
 naeural_core/utils/web_app/favicon.ico,sha256=zU6-Jxx4ol1A9FJvcQELYV9DiqwqyvjPS89xQybZE74,15406
-naeural_core-7.7.238.dist-info/METADATA,sha256=lEfaX5VSJDt2U-BhSs2OYrp_MYzEFdLCuB3XeLlZOa4,6522
-naeural_core-7.7.238.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
-naeural_core-7.7.238.dist-info/licenses/LICENSE,sha256=SPHPWjOdAUUUUI020nI5VNCtFjmTOlJpi1cZxyB3gKo,11339
-naeural_core-7.7.238.dist-info/RECORD,,
+naeural_core-7.7.240.dist-info/METADATA,sha256=qjN4dhGCJ6yfpUtaJ5rX-XOF3-zeS9PurV7WHEGK3ws,6522
+naeural_core-7.7.240.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
+naeural_core-7.7.240.dist-info/licenses/LICENSE,sha256=SPHPWjOdAUUUUI020nI5VNCtFjmTOlJpi1cZxyB3gKo,11339
+naeural_core-7.7.240.dist-info/RECORD,,

{naeural_core-7.7.238.dist-info → naeural_core-7.7.240.dist-info}/WHEEL RENAMED Viewed

File without changes

{naeural_core-7.7.238.dist-info → naeural_core-7.7.240.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

naeural-core 7.7.238__py3-none-any.whl → 7.7.240__py3-none-any.whl

naeural-core 7.7.238py3-none-any.whl → 7.7.240py3-none-any.whl