naeural-core 7.7.238__py3-none-any.whl → 7.7.239__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,8 @@
1
1
  import gc
2
2
  import traceback
3
3
  import platform
4
+ import subprocess
5
+ import shutil
4
6
 
5
7
  class _GPUMixin(object):
6
8
  """
@@ -71,6 +73,69 @@ class _GPUMixin(object):
71
73
  def skip_gpu_info_check(self):
72
74
  return vars(self).get('_GPUMixin__no_gpu_avail', False)
73
75
 
76
+ def _get_processes_by_uuid_via_nvidia_smi(self, mb: bool):
77
+ """
78
+ Returns:
79
+ processes_by_uuid: dict[str, list[dict]] # uuid -> [{'PID':..., 'ALLOCATED_MEM':...}, ...]
80
+ Notes:
81
+ nvidia-smi used_gpu_memory is in MiB when nounits is used in CSV output (common behavior).
82
+ """
83
+ processes_by_uuid = {}
84
+
85
+ smi = shutil.which("nvidia-smi")
86
+ if not smi:
87
+ return processes_by_uuid # keep behavior: processes empty if unavailable
88
+
89
+ try:
90
+ cmd = [
91
+ smi,
92
+ "--query-compute-apps=gpu_uuid,pid,used_gpu_memory",
93
+ "--format=csv,noheader,nounits",
94
+ ]
95
+ out = subprocess.run(cmd, capture_output=True, text=True, timeout=1.5)
96
+ if out.returncode != 0:
97
+ return processes_by_uuid
98
+
99
+ for line in out.stdout.splitlines():
100
+ line = line.strip()
101
+ if not line:
102
+ continue
103
+ # Expect: "<uuid>, <pid>, <used_gpu_memory>"
104
+ parts = [p.strip() for p in line.split(",")]
105
+ if len(parts) < 3:
106
+ continue
107
+
108
+ uuid = parts[0]
109
+ try:
110
+ pid = int(parts[1])
111
+ except Exception:
112
+ continue
113
+
114
+ used_mib = None
115
+ try:
116
+ # used_gpu_memory in MiB, nounits -> numeric
117
+ used_mib = float(parts[2])
118
+ except Exception:
119
+ used_mib = None
120
+
121
+ # Convert to bytes to reuse your existing MB/GB conversion logic
122
+ used_bytes = None if used_mib is None else int(used_mib * 1024 * 1024)
123
+
124
+ dct_proc_info = {"PID": pid}
125
+ dct_proc_info["ALLOCATED_MEM"] = round(
126
+ (used_bytes / 1024 ** (2 if mb else 3)) if used_bytes is not None else 0.0,
127
+ 2
128
+ )
129
+
130
+ processes_by_uuid.setdefault(uuid, []).append(dct_proc_info)
131
+ # endfor lines
132
+
133
+ except Exception:
134
+ # Any failure here should degrade gracefully to empty processes
135
+ return {}
136
+
137
+ return processes_by_uuid
138
+
74
139
  def gpu_info(self, show=False, mb=False, current_pid=False):
75
140
  """
76
141
  Collects GPU info. Must have torch installed & non-mandatory nvidia-smi
@@ -101,6 +166,7 @@ class _GPUMixin(object):
101
166
  # first get name
102
167
  import torch as th
103
168
  import os
169
+ processes_by_uuid = self._get_processes_by_uuid_via_nvidia_smi(mb=mb)
104
170
  except:
105
171
  self.P("ERROR: `gpu_info` call failed - PyTorch probably is not installed:\n{}".format(
106
172
  traceback.format_exc())
@@ -214,25 +280,22 @@ class _GPUMixin(object):
214
280
  if True:
215
281
  processes = []
216
282
  try:
217
- nvml_na = getattr(pynvml, "NVML_VALUE_NOT_AVAILABLE", None)
218
- for proc in pynvml.nvmlDeviceGetComputeRunningProcesses(handle):
219
- dct_proc_info = {k.upper(): v for k, v in proc.__dict__.items()}
220
- used_mem = dct_proc_info.pop("USEDGPUMEMORY", None)
221
-
222
- if used_mem in (None, nvml_na) or (isinstance(used_mem, int) and used_mem < 0):
223
- used_mem = None
224
-
225
- dct_proc_info["ALLOCATED_MEM"] = round(
226
- used_mem / 1024 ** (2 if mb else 3) if used_mem is not None else 0.0,
227
- 2
228
- )
229
- processes.append(dct_proc_info)
230
-
231
- if dct_proc_info.get("PID") == os.getpid():
232
- current_pid_has_usage = True
233
- current_pid_gpus.append(device_id)
283
+ # Use NVML only to get the UUID for this device, then map from nvidia-smi results.
284
+ uuid = None
285
+ try:
286
+ uuid = pynvml.nvmlDeviceGetUUID(handle)
287
+ except Exception:
288
+ uuid = None
289
+
290
+ if uuid and uuid in processes_by_uuid:
291
+ processes = processes_by_uuid[uuid]
292
+
293
+ # Preserve your existing "current pid GPU usage" behavior
294
+ for p in processes:
295
+ if p.get("PID") == os.getpid():
296
+ current_pid_has_usage = True
297
+ current_pid_gpus.append(device_id)
234
298
  except Exception:
235
- # if this fails, keep empty list like before
236
299
  processes = []
237
300
  # endtry processes
238
301
  # endif processes
naeural_core/main/ver.py CHANGED
@@ -1,4 +1,4 @@
1
- __VER__ = '7.7.238'
1
+ __VER__ = '7.7.239'
2
2
 
3
3
 
4
4
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: naeural_core
3
- Version: 7.7.238
3
+ Version: 7.7.239
4
4
  Summary: Ratio1 Core is the backbone of the Ratio1 Edge Protocol.
5
5
  Project-URL: Homepage, https://github.com/Ratio1/naeural_core
6
6
  Project-URL: Bug Tracker, https://github.com/Ratio1/naeural_core/issues
@@ -137,7 +137,7 @@ naeural_core/core_logging/logger_mixins/confusion_matrix_mixin.py,sha256=fLJOeyp
137
137
  naeural_core/core_logging/logger_mixins/dataframe_mixin.py,sha256=hkOtoTzoBDacpagdFYp2kawsw7rzbgLw2-_pzXLBU6Q,11491
138
138
  naeural_core/core_logging/logger_mixins/deploy_models_in_production_mixin.py,sha256=J2j1tnt0Cd2qD31rL8Sov0sz9_T5-h6Ukd-4sl1ITcQ,5986
139
139
  naeural_core/core_logging/logger_mixins/fit_debug_tfkeras_mixin.py,sha256=6efE5W59a3VWWR1UbPd9iNfQl0nuse7SW3IV0RFpHOc,9344
140
- naeural_core/core_logging/logger_mixins/gpu_mixin.py,sha256=Fy2xnyp3auZK3tMMFwYLSAU65YYJbsAXfvUXgaCypnc,12290
140
+ naeural_core/core_logging/logger_mixins/gpu_mixin.py,sha256=sgtEfuROphSL03Id8n4Wd8SbxGoSuBMVx_v-N_cKvXc,14023
141
141
  naeural_core/core_logging/logger_mixins/grid_search_mixin.py,sha256=lo3bTyEmcsk03ttTLs0lC_N0beAC1eTiGI9kZX2ib-A,5961
142
142
  naeural_core/core_logging/logger_mixins/histogram_mixin.py,sha256=ro5q99VXdcLSBi6XMZK4fJIXNX8n4wBzay-0SXuj7qc,6413
143
143
  naeural_core/core_logging/logger_mixins/keras_callbacks_mixin.py,sha256=ELlTb8TycdFnuO2dMDAherlzRd1rfHWogIDq-svnZ7w,3940
@@ -345,7 +345,7 @@ naeural_core/main/geoloc.py,sha256=TEqyuNzpVqZSBCo0OOrpHYncIsHSClvRt28hgvxJ35o,2
345
345
  naeural_core/main/main_loop_data_handler.py,sha256=hABB65OUBhtur3rd2mYsEhdAc54jVILzybrvxml5h0s,13815
346
346
  naeural_core/main/net_mon.py,sha256=qlyo1fqTeQy_M9VfJOxon_PBbQat0QO9Zbu_93FMbLc,88144
347
347
  naeural_core/main/orchestrator.py,sha256=SKnW5jWksBm2-fZcfeT5dddDmFSTSyA-CcODKX664KI,69657
348
- naeural_core/main/ver.py,sha256=VXAOU8OhhU9z_rNR0TSicHTPcMP_A6If94K8YHqCTWo,335
348
+ naeural_core/main/ver.py,sha256=NCXuXm5fEgpFjFfI3Rlr_co4S3oap5g9XZOHDtGhUPM,335
349
349
  naeural_core/main/orchestrator_mixins/__init__.py,sha256=MNleg48vdlqsyAR8Vamjl4ahG2jwCH5kLbQN5CfU57E,149
350
350
  naeural_core/main/orchestrator_mixins/managers_init.py,sha256=sQVqpr99a5WP9HCloYCyaWDW5J3IypEImlf703bqTF4,6692
351
351
  naeural_core/main/orchestrator_mixins/utils.py,sha256=jMa0uStVNLQmp0VhNMRvfBDjo387ORLlUVLthRNBKqc,1866
@@ -555,7 +555,7 @@ naeural_core/utils/tracing/onnx/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm
555
555
  naeural_core/utils/tracing/onnx/base_trt_scripter.py,sha256=1FelEBo7JGsc8hbJ3sevzxnM-J61nvBHz6L1VLpZrVc,2043
556
556
  naeural_core/utils/tracing/onnx/utils.py,sha256=IKmqUWakrMWn34uJvbRjNLacdszD8jkkQBFPUhgJtOQ,5618
557
557
  naeural_core/utils/web_app/favicon.ico,sha256=zU6-Jxx4ol1A9FJvcQELYV9DiqwqyvjPS89xQybZE74,15406
558
- naeural_core-7.7.238.dist-info/METADATA,sha256=lEfaX5VSJDt2U-BhSs2OYrp_MYzEFdLCuB3XeLlZOa4,6522
559
- naeural_core-7.7.238.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
560
- naeural_core-7.7.238.dist-info/licenses/LICENSE,sha256=SPHPWjOdAUUUUI020nI5VNCtFjmTOlJpi1cZxyB3gKo,11339
561
- naeural_core-7.7.238.dist-info/RECORD,,
558
+ naeural_core-7.7.239.dist-info/METADATA,sha256=jWc9x_Znx1X-FNdp69ujFdvVHKIH5uiEqsS55f673E0,6522
559
+ naeural_core-7.7.239.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
560
+ naeural_core-7.7.239.dist-info/licenses/LICENSE,sha256=SPHPWjOdAUUUUI020nI5VNCtFjmTOlJpi1cZxyB3gKo,11339
561
+ naeural_core-7.7.239.dist-info/RECORD,,