naeural-core 7.7.238__py3-none-any.whl → 7.7.240__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- naeural_core/core_logging/logger_mixins/gpu_mixin.py +109 -32
- naeural_core/main/ver.py +1 -1
- {naeural_core-7.7.238.dist-info → naeural_core-7.7.240.dist-info}/METADATA +1 -1
- {naeural_core-7.7.238.dist-info → naeural_core-7.7.240.dist-info}/RECORD +6 -6
- {naeural_core-7.7.238.dist-info → naeural_core-7.7.240.dist-info}/WHEEL +0 -0
- {naeural_core-7.7.238.dist-info → naeural_core-7.7.240.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
import gc
|
|
2
2
|
import traceback
|
|
3
3
|
import platform
|
|
4
|
+
import subprocess
|
|
5
|
+
import shutil
|
|
4
6
|
|
|
5
7
|
class _GPUMixin(object):
|
|
6
8
|
"""
|
|
@@ -71,6 +73,91 @@ class _GPUMixin(object):
|
|
|
71
73
|
def skip_gpu_info_check(self):
|
|
72
74
|
return vars(self).get('_GPUMixin__no_gpu_avail', False)
|
|
73
75
|
|
|
76
|
+
def _get_processes_by_uuid_via_nvidia_smi(self, mb: bool):
|
|
77
|
+
"""
|
|
78
|
+
Returns:
|
|
79
|
+
processes_by_uuid: dict[str, list[dict]] # uuid -> [{'PID':..., 'ALLOCATED_MEM':...}, ...]
|
|
80
|
+
Notes:
|
|
81
|
+
nvidia-smi used_gpu_memory is in MiB when nounits is used in CSV output (common behavior).
|
|
82
|
+
"""
|
|
83
|
+
processes_by_uuid = {}
|
|
84
|
+
|
|
85
|
+
smi = shutil.which("nvidia-smi")
|
|
86
|
+
if not smi:
|
|
87
|
+
return processes_by_uuid # keep behavior: processes empty if unavailable
|
|
88
|
+
|
|
89
|
+
try:
|
|
90
|
+
cmd = [
|
|
91
|
+
smi,
|
|
92
|
+
"--query-compute-apps=gpu_uuid,pid,used_gpu_memory",
|
|
93
|
+
"--format=csv,noheader,nounits",
|
|
94
|
+
]
|
|
95
|
+
out = subprocess.run(cmd, capture_output=True, text=True, timeout=1.5)
|
|
96
|
+
if out.returncode != 0:
|
|
97
|
+
return processes_by_uuid
|
|
98
|
+
|
|
99
|
+
for line in out.stdout.splitlines():
|
|
100
|
+
line = line.strip()
|
|
101
|
+
if not line:
|
|
102
|
+
continue
|
|
103
|
+
# Expect: "<uuid>, <pid>, <used_gpu_memory>"
|
|
104
|
+
parts = [p.strip() for p in line.split(",")]
|
|
105
|
+
if len(parts) < 3:
|
|
106
|
+
continue
|
|
107
|
+
|
|
108
|
+
uuid = parts[0]
|
|
109
|
+
try:
|
|
110
|
+
pid = int(parts[1])
|
|
111
|
+
except Exception:
|
|
112
|
+
continue
|
|
113
|
+
|
|
114
|
+
used_mib = None
|
|
115
|
+
try:
|
|
116
|
+
# used_gpu_memory in MiB, nounits -> numeric
|
|
117
|
+
used_mib = float(parts[2])
|
|
118
|
+
except Exception:
|
|
119
|
+
used_mib = None
|
|
120
|
+
|
|
121
|
+
# Convert to bytes to reuse your existing MB/GB conversion logic
|
|
122
|
+
used_bytes = None if used_mib is None else int(used_mib * 1024 * 1024)
|
|
123
|
+
|
|
124
|
+
dct_proc_info = {"PID": pid}
|
|
125
|
+
dct_proc_info["ALLOCATED_MEM"] = round(
|
|
126
|
+
(used_bytes / 1024 ** (2 if mb else 3)) if used_bytes is not None else 0.0,
|
|
127
|
+
2
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
processes_by_uuid.setdefault(uuid, []).append(dct_proc_info)
|
|
131
|
+
# endfor lines
|
|
132
|
+
|
|
133
|
+
except Exception:
|
|
134
|
+
# Any failure here should degrade gracefully to empty processes
|
|
135
|
+
return {}
|
|
136
|
+
|
|
137
|
+
return processes_by_uuid
|
|
138
|
+
|
|
139
|
+
def _get_uuid_by_index(self, timeout=1.5):
|
|
140
|
+
import subprocess, shutil
|
|
141
|
+
smi = shutil.which("nvidia-smi")
|
|
142
|
+
if not smi:
|
|
143
|
+
return {}
|
|
144
|
+
|
|
145
|
+
out = subprocess.run(
|
|
146
|
+
[smi, "--query-gpu=index,uuid", "--format=csv,noheader,nounits"],
|
|
147
|
+
capture_output=True, text=True, timeout=timeout
|
|
148
|
+
)
|
|
149
|
+
if out.returncode != 0:
|
|
150
|
+
return {}
|
|
151
|
+
|
|
152
|
+
d = {}
|
|
153
|
+
for line in out.stdout.splitlines():
|
|
154
|
+
line = line.strip()
|
|
155
|
+
if not line:
|
|
156
|
+
continue
|
|
157
|
+
idx_s, uuid = [p.strip() for p in line.split(",", 1)]
|
|
158
|
+
d[int(idx_s)] = uuid
|
|
159
|
+
return d
|
|
160
|
+
|
|
74
161
|
def gpu_info(self, show=False, mb=False, current_pid=False):
|
|
75
162
|
"""
|
|
76
163
|
Collects GPU info. Must have torch installed & non-mandatory nvidia-smi
|
|
@@ -101,6 +188,7 @@ class _GPUMixin(object):
|
|
|
101
188
|
# first get name
|
|
102
189
|
import torch as th
|
|
103
190
|
import os
|
|
191
|
+
processes_by_uuid = self._get_processes_by_uuid_via_nvidia_smi(mb=mb)
|
|
104
192
|
except:
|
|
105
193
|
self.P("ERROR: `gpu_info` call failed - PyTorch probably is not installed:\n{}".format(
|
|
106
194
|
traceback.format_exc())
|
|
@@ -149,20 +237,12 @@ class _GPUMixin(object):
|
|
|
149
237
|
fan_speed, fan_speed_unit = -1, "N/A"
|
|
150
238
|
if pynvml_avail:
|
|
151
239
|
# --- get an NVML handle that matches torch's CUDA device ordering when possible ---
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
if hasattr(pynvml, "nvmlDeviceGetHandleByPciBusId_v2"):
|
|
159
|
-
handle = pynvml.nvmlDeviceGetHandleByPciBusId_v2(pci_bus_id)
|
|
160
|
-
elif hasattr(pynvml, "nvmlDeviceGetHandleByPciBusId"):
|
|
161
|
-
handle = pynvml.nvmlDeviceGetHandleByPciBusId(pci_bus_id)
|
|
162
|
-
except Exception:
|
|
163
|
-
handle = None
|
|
164
|
-
|
|
165
|
-
if handle is None:
|
|
240
|
+
uuid_by_index = self._get_uuid_by_index()
|
|
241
|
+
# inside your for device_id in range(n_gpus):
|
|
242
|
+
uuid = uuid_by_index.get(device_id)
|
|
243
|
+
if uuid:
|
|
244
|
+
handle = pynvml.nvmlDeviceGetHandleByUUID(uuid)
|
|
245
|
+
else:
|
|
166
246
|
handle = pynvml.nvmlDeviceGetHandleByIndex(device_id)
|
|
167
247
|
|
|
168
248
|
# --- memory (NVML returns bytes) ---
|
|
@@ -214,25 +294,22 @@ class _GPUMixin(object):
|
|
|
214
294
|
if True:
|
|
215
295
|
processes = []
|
|
216
296
|
try:
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
current_pid_has_usage = True
|
|
233
|
-
current_pid_gpus.append(device_id)
|
|
297
|
+
# Use NVML only to get the UUID for this device, then map from nvidia-smi results.
|
|
298
|
+
uuid = None
|
|
299
|
+
try:
|
|
300
|
+
uuid = pynvml.nvmlDeviceGetUUID(handle)
|
|
301
|
+
except Exception:
|
|
302
|
+
uuid = None
|
|
303
|
+
|
|
304
|
+
if uuid and uuid in processes_by_uuid:
|
|
305
|
+
processes = processes_by_uuid[uuid]
|
|
306
|
+
|
|
307
|
+
# Preserve your existing "current pid GPU usage" behavior
|
|
308
|
+
for p in processes:
|
|
309
|
+
if p.get("PID") == os.getpid():
|
|
310
|
+
current_pid_has_usage = True
|
|
311
|
+
current_pid_gpus.append(device_id)
|
|
234
312
|
except Exception:
|
|
235
|
-
# if this fails, keep empty list like before
|
|
236
313
|
processes = []
|
|
237
314
|
# endtry processes
|
|
238
315
|
# endif processes
|
naeural_core/main/ver.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: naeural_core
|
|
3
|
-
Version: 7.7.
|
|
3
|
+
Version: 7.7.240
|
|
4
4
|
Summary: Ratio1 Core is the backbone of the Ratio1 Edge Protocol.
|
|
5
5
|
Project-URL: Homepage, https://github.com/Ratio1/naeural_core
|
|
6
6
|
Project-URL: Bug Tracker, https://github.com/Ratio1/naeural_core/issues
|
|
@@ -137,7 +137,7 @@ naeural_core/core_logging/logger_mixins/confusion_matrix_mixin.py,sha256=fLJOeyp
|
|
|
137
137
|
naeural_core/core_logging/logger_mixins/dataframe_mixin.py,sha256=hkOtoTzoBDacpagdFYp2kawsw7rzbgLw2-_pzXLBU6Q,11491
|
|
138
138
|
naeural_core/core_logging/logger_mixins/deploy_models_in_production_mixin.py,sha256=J2j1tnt0Cd2qD31rL8Sov0sz9_T5-h6Ukd-4sl1ITcQ,5986
|
|
139
139
|
naeural_core/core_logging/logger_mixins/fit_debug_tfkeras_mixin.py,sha256=6efE5W59a3VWWR1UbPd9iNfQl0nuse7SW3IV0RFpHOc,9344
|
|
140
|
-
naeural_core/core_logging/logger_mixins/gpu_mixin.py,sha256=
|
|
140
|
+
naeural_core/core_logging/logger_mixins/gpu_mixin.py,sha256=1gh83RB8ziZJG-UdF1BnOPJWWFAq4xbbhGuQvUPUn1E,14193
|
|
141
141
|
naeural_core/core_logging/logger_mixins/grid_search_mixin.py,sha256=lo3bTyEmcsk03ttTLs0lC_N0beAC1eTiGI9kZX2ib-A,5961
|
|
142
142
|
naeural_core/core_logging/logger_mixins/histogram_mixin.py,sha256=ro5q99VXdcLSBi6XMZK4fJIXNX8n4wBzay-0SXuj7qc,6413
|
|
143
143
|
naeural_core/core_logging/logger_mixins/keras_callbacks_mixin.py,sha256=ELlTb8TycdFnuO2dMDAherlzRd1rfHWogIDq-svnZ7w,3940
|
|
@@ -345,7 +345,7 @@ naeural_core/main/geoloc.py,sha256=TEqyuNzpVqZSBCo0OOrpHYncIsHSClvRt28hgvxJ35o,2
|
|
|
345
345
|
naeural_core/main/main_loop_data_handler.py,sha256=hABB65OUBhtur3rd2mYsEhdAc54jVILzybrvxml5h0s,13815
|
|
346
346
|
naeural_core/main/net_mon.py,sha256=qlyo1fqTeQy_M9VfJOxon_PBbQat0QO9Zbu_93FMbLc,88144
|
|
347
347
|
naeural_core/main/orchestrator.py,sha256=SKnW5jWksBm2-fZcfeT5dddDmFSTSyA-CcODKX664KI,69657
|
|
348
|
-
naeural_core/main/ver.py,sha256=
|
|
348
|
+
naeural_core/main/ver.py,sha256=PyxIMv29XFE2KQRDl_o3fut65G_P9i8u849nGECYtVQ,335
|
|
349
349
|
naeural_core/main/orchestrator_mixins/__init__.py,sha256=MNleg48vdlqsyAR8Vamjl4ahG2jwCH5kLbQN5CfU57E,149
|
|
350
350
|
naeural_core/main/orchestrator_mixins/managers_init.py,sha256=sQVqpr99a5WP9HCloYCyaWDW5J3IypEImlf703bqTF4,6692
|
|
351
351
|
naeural_core/main/orchestrator_mixins/utils.py,sha256=jMa0uStVNLQmp0VhNMRvfBDjo387ORLlUVLthRNBKqc,1866
|
|
@@ -555,7 +555,7 @@ naeural_core/utils/tracing/onnx/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm
|
|
|
555
555
|
naeural_core/utils/tracing/onnx/base_trt_scripter.py,sha256=1FelEBo7JGsc8hbJ3sevzxnM-J61nvBHz6L1VLpZrVc,2043
|
|
556
556
|
naeural_core/utils/tracing/onnx/utils.py,sha256=IKmqUWakrMWn34uJvbRjNLacdszD8jkkQBFPUhgJtOQ,5618
|
|
557
557
|
naeural_core/utils/web_app/favicon.ico,sha256=zU6-Jxx4ol1A9FJvcQELYV9DiqwqyvjPS89xQybZE74,15406
|
|
558
|
-
naeural_core-7.7.
|
|
559
|
-
naeural_core-7.7.
|
|
560
|
-
naeural_core-7.7.
|
|
561
|
-
naeural_core-7.7.
|
|
558
|
+
naeural_core-7.7.240.dist-info/METADATA,sha256=qjN4dhGCJ6yfpUtaJ5rX-XOF3-zeS9PurV7WHEGK3ws,6522
|
|
559
|
+
naeural_core-7.7.240.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
560
|
+
naeural_core-7.7.240.dist-info/licenses/LICENSE,sha256=SPHPWjOdAUUUUI020nI5VNCtFjmTOlJpi1cZxyB3gKo,11339
|
|
561
|
+
naeural_core-7.7.240.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|