naeural-core 7.7.238__py3-none-any.whl → 7.7.240__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,8 @@
1
1
  import gc
2
2
  import traceback
3
3
  import platform
4
+ import subprocess
5
+ import shutil
4
6
 
5
7
  class _GPUMixin(object):
6
8
  """
@@ -71,6 +73,91 @@ class _GPUMixin(object):
71
73
  def skip_gpu_info_check(self):
72
74
  return vars(self).get('_GPUMixin__no_gpu_avail', False)
73
75
 
76
+ def _get_processes_by_uuid_via_nvidia_smi(self, mb: bool):
77
+ """
78
+ Returns:
79
+ processes_by_uuid: dict[str, list[dict]] # uuid -> [{'PID':..., 'ALLOCATED_MEM':...}, ...]
80
+ Notes:
81
+ nvidia-smi used_gpu_memory is in MiB when nounits is used in CSV output (common behavior).
82
+ """
83
+ processes_by_uuid = {}
84
+
85
+ smi = shutil.which("nvidia-smi")
86
+ if not smi:
87
+ return processes_by_uuid # keep behavior: processes empty if unavailable
88
+
89
+ try:
90
+ cmd = [
91
+ smi,
92
+ "--query-compute-apps=gpu_uuid,pid,used_gpu_memory",
93
+ "--format=csv,noheader,nounits",
94
+ ]
95
+ out = subprocess.run(cmd, capture_output=True, text=True, timeout=1.5)
96
+ if out.returncode != 0:
97
+ return processes_by_uuid
98
+
99
+ for line in out.stdout.splitlines():
100
+ line = line.strip()
101
+ if not line:
102
+ continue
103
+ # Expect: "<uuid>, <pid>, <used_gpu_memory>"
104
+ parts = [p.strip() for p in line.split(",")]
105
+ if len(parts) < 3:
106
+ continue
107
+
108
+ uuid = parts[0]
109
+ try:
110
+ pid = int(parts[1])
111
+ except Exception:
112
+ continue
113
+
114
+ used_mib = None
115
+ try:
116
+ # used_gpu_memory in MiB, nounits -> numeric
117
+ used_mib = float(parts[2])
118
+ except Exception:
119
+ used_mib = None
120
+
121
+ # Convert to bytes to reuse your existing MB/GB conversion logic
122
+ used_bytes = None if used_mib is None else int(used_mib * 1024 * 1024)
123
+
124
+ dct_proc_info = {"PID": pid}
125
+ dct_proc_info["ALLOCATED_MEM"] = round(
126
+ (used_bytes / 1024 ** (2 if mb else 3)) if used_bytes is not None else 0.0,
127
+ 2
128
+ )
129
+
130
+ processes_by_uuid.setdefault(uuid, []).append(dct_proc_info)
131
+ # endfor lines
132
+
133
+ except Exception:
134
+ # Any failure here should degrade gracefully to empty processes
135
+ return {}
136
+
137
+ return processes_by_uuid
138
+
139
+ def _get_uuid_by_index(self, timeout=1.5):
140
+ import subprocess, shutil
141
+ smi = shutil.which("nvidia-smi")
142
+ if not smi:
143
+ return {}
144
+
145
+ out = subprocess.run(
146
+ [smi, "--query-gpu=index,uuid", "--format=csv,noheader,nounits"],
147
+ capture_output=True, text=True, timeout=timeout
148
+ )
149
+ if out.returncode != 0:
150
+ return {}
151
+
152
+ d = {}
153
+ for line in out.stdout.splitlines():
154
+ line = line.strip()
155
+ if not line:
156
+ continue
157
+ idx_s, uuid = [p.strip() for p in line.split(",", 1)]
158
+ d[int(idx_s)] = uuid
159
+ return d
160
+
74
161
  def gpu_info(self, show=False, mb=False, current_pid=False):
75
162
  """
76
163
  Collects GPU info. Must have torch installed & non-mandatory nvidia-smi
@@ -101,6 +188,7 @@ class _GPUMixin(object):
101
188
  # first get name
102
189
  import torch as th
103
190
  import os
191
+ processes_by_uuid = self._get_processes_by_uuid_via_nvidia_smi(mb=mb)
104
192
  except:
105
193
  self.P("ERROR: `gpu_info` call failed - PyTorch probably is not installed:\n{}".format(
106
194
  traceback.format_exc())
@@ -149,20 +237,12 @@ class _GPUMixin(object):
149
237
  fan_speed, fan_speed_unit = -1, "N/A"
150
238
  if pynvml_avail:
151
239
  # --- get an NVML handle that matches torch's CUDA device ordering when possible ---
152
- handle = None
153
- try:
154
- # This helps when CUDA_VISIBLE_DEVICES remaps indices:
155
- # torch device 0 may not be NVML index 0.
156
- pci_bus_id = getattr(device_props, "pci_bus_id", None)
157
- if pci_bus_id:
158
- if hasattr(pynvml, "nvmlDeviceGetHandleByPciBusId_v2"):
159
- handle = pynvml.nvmlDeviceGetHandleByPciBusId_v2(pci_bus_id)
160
- elif hasattr(pynvml, "nvmlDeviceGetHandleByPciBusId"):
161
- handle = pynvml.nvmlDeviceGetHandleByPciBusId(pci_bus_id)
162
- except Exception:
163
- handle = None
164
-
165
- if handle is None:
240
+ uuid_by_index = self._get_uuid_by_index()
241
+ # inside your for device_id in range(n_gpus):
242
+ uuid = uuid_by_index.get(device_id)
243
+ if uuid:
244
+ handle = pynvml.nvmlDeviceGetHandleByUUID(uuid)
245
+ else:
166
246
  handle = pynvml.nvmlDeviceGetHandleByIndex(device_id)
167
247
 
168
248
  # --- memory (NVML returns bytes) ---
@@ -214,25 +294,22 @@ class _GPUMixin(object):
214
294
  if True:
215
295
  processes = []
216
296
  try:
217
- nvml_na = getattr(pynvml, "NVML_VALUE_NOT_AVAILABLE", None)
218
- for proc in pynvml.nvmlDeviceGetComputeRunningProcesses(handle):
219
- dct_proc_info = {k.upper(): v for k, v in proc.__dict__.items()}
220
- used_mem = dct_proc_info.pop("USEDGPUMEMORY", None)
221
-
222
- if used_mem in (None, nvml_na) or (isinstance(used_mem, int) and used_mem < 0):
223
- used_mem = None
224
-
225
- dct_proc_info["ALLOCATED_MEM"] = round(
226
- used_mem / 1024 ** (2 if mb else 3) if used_mem is not None else 0.0,
227
- 2
228
- )
229
- processes.append(dct_proc_info)
230
-
231
- if dct_proc_info.get("PID") == os.getpid():
232
- current_pid_has_usage = True
233
- current_pid_gpus.append(device_id)
297
+ # Use NVML only to get the UUID for this device, then map from nvidia-smi results.
298
+ uuid = None
299
+ try:
300
+ uuid = pynvml.nvmlDeviceGetUUID(handle)
301
+ except Exception:
302
+ uuid = None
303
+
304
+ if uuid and uuid in processes_by_uuid:
305
+ processes = processes_by_uuid[uuid]
306
+
307
+ # Preserve your existing "current pid GPU usage" behavior
308
+ for p in processes:
309
+ if p.get("PID") == os.getpid():
310
+ current_pid_has_usage = True
311
+ current_pid_gpus.append(device_id)
234
312
  except Exception:
235
- # if this fails, keep empty list like before
236
313
  processes = []
237
314
  # endtry processes
238
315
  # endif processes
naeural_core/main/ver.py CHANGED
@@ -1,4 +1,4 @@
1
- __VER__ = '7.7.238'
1
+ __VER__ = '7.7.240'
2
2
 
3
3
 
4
4
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: naeural_core
3
- Version: 7.7.238
3
+ Version: 7.7.240
4
4
  Summary: Ratio1 Core is the backbone of the Ratio1 Edge Protocol.
5
5
  Project-URL: Homepage, https://github.com/Ratio1/naeural_core
6
6
  Project-URL: Bug Tracker, https://github.com/Ratio1/naeural_core/issues
@@ -137,7 +137,7 @@ naeural_core/core_logging/logger_mixins/confusion_matrix_mixin.py,sha256=fLJOeyp
137
137
  naeural_core/core_logging/logger_mixins/dataframe_mixin.py,sha256=hkOtoTzoBDacpagdFYp2kawsw7rzbgLw2-_pzXLBU6Q,11491
138
138
  naeural_core/core_logging/logger_mixins/deploy_models_in_production_mixin.py,sha256=J2j1tnt0Cd2qD31rL8Sov0sz9_T5-h6Ukd-4sl1ITcQ,5986
139
139
  naeural_core/core_logging/logger_mixins/fit_debug_tfkeras_mixin.py,sha256=6efE5W59a3VWWR1UbPd9iNfQl0nuse7SW3IV0RFpHOc,9344
140
- naeural_core/core_logging/logger_mixins/gpu_mixin.py,sha256=Fy2xnyp3auZK3tMMFwYLSAU65YYJbsAXfvUXgaCypnc,12290
140
+ naeural_core/core_logging/logger_mixins/gpu_mixin.py,sha256=1gh83RB8ziZJG-UdF1BnOPJWWFAq4xbbhGuQvUPUn1E,14193
141
141
  naeural_core/core_logging/logger_mixins/grid_search_mixin.py,sha256=lo3bTyEmcsk03ttTLs0lC_N0beAC1eTiGI9kZX2ib-A,5961
142
142
  naeural_core/core_logging/logger_mixins/histogram_mixin.py,sha256=ro5q99VXdcLSBi6XMZK4fJIXNX8n4wBzay-0SXuj7qc,6413
143
143
  naeural_core/core_logging/logger_mixins/keras_callbacks_mixin.py,sha256=ELlTb8TycdFnuO2dMDAherlzRd1rfHWogIDq-svnZ7w,3940
@@ -345,7 +345,7 @@ naeural_core/main/geoloc.py,sha256=TEqyuNzpVqZSBCo0OOrpHYncIsHSClvRt28hgvxJ35o,2
345
345
  naeural_core/main/main_loop_data_handler.py,sha256=hABB65OUBhtur3rd2mYsEhdAc54jVILzybrvxml5h0s,13815
346
346
  naeural_core/main/net_mon.py,sha256=qlyo1fqTeQy_M9VfJOxon_PBbQat0QO9Zbu_93FMbLc,88144
347
347
  naeural_core/main/orchestrator.py,sha256=SKnW5jWksBm2-fZcfeT5dddDmFSTSyA-CcODKX664KI,69657
348
- naeural_core/main/ver.py,sha256=VXAOU8OhhU9z_rNR0TSicHTPcMP_A6If94K8YHqCTWo,335
348
+ naeural_core/main/ver.py,sha256=PyxIMv29XFE2KQRDl_o3fut65G_P9i8u849nGECYtVQ,335
349
349
  naeural_core/main/orchestrator_mixins/__init__.py,sha256=MNleg48vdlqsyAR8Vamjl4ahG2jwCH5kLbQN5CfU57E,149
350
350
  naeural_core/main/orchestrator_mixins/managers_init.py,sha256=sQVqpr99a5WP9HCloYCyaWDW5J3IypEImlf703bqTF4,6692
351
351
  naeural_core/main/orchestrator_mixins/utils.py,sha256=jMa0uStVNLQmp0VhNMRvfBDjo387ORLlUVLthRNBKqc,1866
@@ -555,7 +555,7 @@ naeural_core/utils/tracing/onnx/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm
555
555
  naeural_core/utils/tracing/onnx/base_trt_scripter.py,sha256=1FelEBo7JGsc8hbJ3sevzxnM-J61nvBHz6L1VLpZrVc,2043
556
556
  naeural_core/utils/tracing/onnx/utils.py,sha256=IKmqUWakrMWn34uJvbRjNLacdszD8jkkQBFPUhgJtOQ,5618
557
557
  naeural_core/utils/web_app/favicon.ico,sha256=zU6-Jxx4ol1A9FJvcQELYV9DiqwqyvjPS89xQybZE74,15406
558
- naeural_core-7.7.238.dist-info/METADATA,sha256=lEfaX5VSJDt2U-BhSs2OYrp_MYzEFdLCuB3XeLlZOa4,6522
559
- naeural_core-7.7.238.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
560
- naeural_core-7.7.238.dist-info/licenses/LICENSE,sha256=SPHPWjOdAUUUUI020nI5VNCtFjmTOlJpi1cZxyB3gKo,11339
561
- naeural_core-7.7.238.dist-info/RECORD,,
558
+ naeural_core-7.7.240.dist-info/METADATA,sha256=qjN4dhGCJ6yfpUtaJ5rX-XOF3-zeS9PurV7WHEGK3ws,6522
559
+ naeural_core-7.7.240.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
560
+ naeural_core-7.7.240.dist-info/licenses/LICENSE,sha256=SPHPWjOdAUUUUI020nI5VNCtFjmTOlJpi1cZxyB3gKo,11339
561
+ naeural_core-7.7.240.dist-info/RECORD,,