AOT-biomaps 2.9.212__py3-none-any.whl → 2.9.233__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of AOT-biomaps might be problematic. Click here for more details.
- AOT_biomaps/AOT_Experiment/Tomography.py +70 -0
- AOT_biomaps/AOT_Experiment/_mainExperiment.py +41 -22
- AOT_biomaps/AOT_Recon/AOT_Optimizers/DEPIERRO.py +48 -11
- AOT_biomaps/AOT_Recon/AOT_Optimizers/LS.py +9 -6
- AOT_biomaps/AOT_Recon/AOT_Optimizers/MAPEM.py +118 -38
- AOT_biomaps/AOT_Recon/AOT_Optimizers/MLEM.py +157 -86
- AOT_biomaps/AOT_Recon/AOT_PotentialFunctions/RelativeDifferences.py +10 -14
- AOT_biomaps/AOT_Recon/AlgebraicRecon.py +337 -185
- AOT_biomaps/AOT_Recon/BayesianRecon.py +33 -96
- AOT_biomaps/AOT_Recon/PrimalDualRecon.py +14 -18
- AOT_biomaps/AOT_Recon/ReconEnums.py +14 -0
- AOT_biomaps/AOT_Recon/ReconTools.py +4 -3
- AOT_biomaps/AOT_Recon/_mainRecon.py +3 -2
- AOT_biomaps/__init__.py +22 -1
- {aot_biomaps-2.9.212.dist-info → aot_biomaps-2.9.233.dist-info}/METADATA +1 -1
- {aot_biomaps-2.9.212.dist-info → aot_biomaps-2.9.233.dist-info}/RECORD +18 -18
- {aot_biomaps-2.9.212.dist-info → aot_biomaps-2.9.233.dist-info}/WHEEL +0 -0
- {aot_biomaps-2.9.212.dist-info → aot_biomaps-2.9.233.dist-info}/top_level.txt +0 -0
|
@@ -5,6 +5,10 @@ import torch
|
|
|
5
5
|
import numpy as np
|
|
6
6
|
import os
|
|
7
7
|
from tqdm import trange
|
|
8
|
+
import cupy as cp
|
|
9
|
+
import cupyx.scipy.sparse as cpsparse
|
|
10
|
+
import gc
|
|
11
|
+
|
|
8
12
|
|
|
9
13
|
def MLEM(
|
|
10
14
|
SMatrix,
|
|
@@ -13,9 +17,12 @@ def MLEM(
|
|
|
13
17
|
isSavingEachIteration=True,
|
|
14
18
|
withTumor=True,
|
|
15
19
|
device=None,
|
|
16
|
-
use_multi_gpu=False,
|
|
17
20
|
use_numba=False,
|
|
21
|
+
denominator_threshold=1e-6,
|
|
18
22
|
max_saves=5000,
|
|
23
|
+
show_logs=True,
|
|
24
|
+
useSparseSMatrix=True,
|
|
25
|
+
Z=350,
|
|
19
26
|
):
|
|
20
27
|
"""
|
|
21
28
|
Unified MLEM algorithm for Acousto-Optic Tomography.
|
|
@@ -37,7 +44,7 @@ def MLEM(
|
|
|
37
44
|
tumor_str = "WITH" if withTumor else "WITHOUT"
|
|
38
45
|
# Auto-select device and method
|
|
39
46
|
if device is None:
|
|
40
|
-
if torch.cuda.is_available() and check_gpu_memory(config.select_best_gpu(), calculate_memory_requirement(SMatrix, y)):
|
|
47
|
+
if torch.cuda.is_available() and check_gpu_memory(config.select_best_gpu(), calculate_memory_requirement(SMatrix, y), show_logs=show_logs):
|
|
41
48
|
device = torch.device(f"cuda:{config.select_best_gpu()}")
|
|
42
49
|
use_gpu = True
|
|
43
50
|
else:
|
|
@@ -47,20 +54,20 @@ def MLEM(
|
|
|
47
54
|
use_gpu = device.type == "cuda"
|
|
48
55
|
# Dispatch to the appropriate implementation
|
|
49
56
|
if use_gpu:
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
57
|
+
if useSparseSMatrix:
|
|
58
|
+
return _MLEM_sparseCSR(SMatrix, y, numIterations, isSavingEachIteration, tumor_str, device.index, max_saves, denominator_threshold, Z, show_logs)
|
|
59
|
+
else:
|
|
60
|
+
return _MLEM_single_GPU(SMatrix, y, numIterations, isSavingEachIteration, tumor_str, device, max_saves, denominator_threshold,show_logs)
|
|
54
61
|
else:
|
|
55
62
|
if use_numba:
|
|
56
|
-
return _MLEM_CPU_numba(SMatrix, y, numIterations, isSavingEachIteration, tumor_str, max_saves)
|
|
63
|
+
return _MLEM_CPU_numba(SMatrix, y, numIterations, isSavingEachIteration, tumor_str, max_saves, denominator_threshold, show_logs)
|
|
57
64
|
else:
|
|
58
|
-
return _MLEM_CPU_opti(SMatrix, y, numIterations, isSavingEachIteration, tumor_str, max_saves)
|
|
65
|
+
return _MLEM_CPU_opti(SMatrix, y, numIterations, isSavingEachIteration, tumor_str, max_saves, denominator_threshold, show_logs)
|
|
59
66
|
except Exception as e:
|
|
60
67
|
print(f"Error in MLEM: {type(e).__name__}: {e}")
|
|
61
68
|
return None, None
|
|
62
69
|
|
|
63
|
-
def _MLEM_single_GPU(SMatrix, y, numIterations, isSavingEachIteration, tumor_str, device, max_saves=
|
|
70
|
+
def _MLEM_single_GPU(SMatrix, y, numIterations, isSavingEachIteration, tumor_str, device, max_saves, denominator_threshold, show_logs=True):
|
|
64
71
|
try:
|
|
65
72
|
eps = torch.finfo(torch.float32).eps
|
|
66
73
|
T, Z, X, N = SMatrix.shape
|
|
@@ -82,7 +89,6 @@ def _MLEM_single_GPU(SMatrix, y, numIterations, isSavingEachIteration, tumor_str
|
|
|
82
89
|
.reshape(-1)
|
|
83
90
|
)
|
|
84
91
|
description = f"AOT-BioMaps -- ML-EM ---- {tumor_str} TUMOR ---- GPU {torch.cuda.current_device()}"
|
|
85
|
-
|
|
86
92
|
# Calculate save indices
|
|
87
93
|
if numIterations <= max_saves:
|
|
88
94
|
save_indices = list(range(numIterations))
|
|
@@ -91,20 +97,21 @@ def _MLEM_single_GPU(SMatrix, y, numIterations, isSavingEachIteration, tumor_str
|
|
|
91
97
|
save_indices = list(range(0, numIterations, step))
|
|
92
98
|
if save_indices[-1] != numIterations - 1:
|
|
93
99
|
save_indices.append(numIterations - 1)
|
|
94
|
-
|
|
95
100
|
saved_theta = []
|
|
96
101
|
saved_indices = []
|
|
97
|
-
|
|
98
102
|
with torch.no_grad():
|
|
99
|
-
|
|
103
|
+
# Utilise range si show_logs=False, sinon trange
|
|
104
|
+
iterator = trange(numIterations, desc=description) if show_logs else range(numIterations)
|
|
105
|
+
for it in iterator:
|
|
100
106
|
q_flat = A_flat @ theta_flat
|
|
101
|
-
|
|
107
|
+
# Appliquer le seuil : si q_flat < denominator_threshold, on met e_flat à 1 (comme dans le code C++)
|
|
108
|
+
mask = q_flat >= denominator_threshold
|
|
109
|
+
e_flat = torch.where(mask, y_flat / (q_flat + eps), torch.ones_like(q_flat))
|
|
102
110
|
c_flat = A_flat.T @ e_flat
|
|
103
111
|
theta_flat = (theta_flat / (norm_factor_flat + eps)) * c_flat
|
|
104
112
|
if isSavingEachIteration and it in save_indices:
|
|
105
113
|
saved_theta.append(theta_flat.reshape(Z, X).clone())
|
|
106
114
|
saved_indices.append(it)
|
|
107
|
-
|
|
108
115
|
# Free memory
|
|
109
116
|
del A_flat, y_flat, norm_factor_flat
|
|
110
117
|
torch.cuda.empty_cache()
|
|
@@ -117,74 +124,15 @@ def _MLEM_single_GPU(SMatrix, y, numIterations, isSavingEachIteration, tumor_str
|
|
|
117
124
|
torch.cuda.empty_cache()
|
|
118
125
|
return None, None
|
|
119
126
|
|
|
120
|
-
def
|
|
121
|
-
try:
|
|
122
|
-
num_gpus = torch.cuda.device_count()
|
|
123
|
-
device = torch.device('cuda:0')
|
|
124
|
-
T, Z, X, N = SMatrix.shape
|
|
125
|
-
A_matrix_torch = torch.tensor(SMatrix, dtype=torch.float32).to(device).permute(0, 3, 1, 2).reshape(T * N, Z * X)
|
|
126
|
-
y_torch = torch.tensor(y, dtype=torch.float32).to(device).reshape(-1)
|
|
127
|
-
A_split = torch.chunk(A_matrix_torch, num_gpus, dim=0)
|
|
128
|
-
y_split = torch.chunk(y_torch, num_gpus)
|
|
129
|
-
theta_0 = torch.ones((Z, X), dtype=torch.float32, device=device)
|
|
130
|
-
theta_list = [theta_0.clone().to(device) for _ in range(num_gpus)]
|
|
131
|
-
normalization_factor = A_matrix_torch.sum(dim=0).reshape(Z, X).to(device)
|
|
132
|
-
|
|
133
|
-
# Calculate save indices
|
|
134
|
-
if numIterations <= max_saves:
|
|
135
|
-
save_indices = list(range(numIterations))
|
|
136
|
-
else:
|
|
137
|
-
step = numIterations // max_saves
|
|
138
|
-
save_indices = list(range(0, numIterations, step))
|
|
139
|
-
if save_indices[-1] != numIterations - 1:
|
|
140
|
-
save_indices.append(numIterations - 1)
|
|
141
|
-
|
|
142
|
-
saved_theta = [theta_0.cpu().numpy()]
|
|
143
|
-
saved_indices = [0]
|
|
144
|
-
description = f"AOT-BioMaps -- ML-EM ---- {tumor_str} TUMOR ---- processing on multi-GPU ({num_gpus} GPUs) ----"
|
|
145
|
-
|
|
146
|
-
for it in trange(numIterations, desc=description):
|
|
147
|
-
theta_p_list = []
|
|
148
|
-
for i in range(num_gpus):
|
|
149
|
-
with torch.cuda.device(f'cuda:{i}'):
|
|
150
|
-
theta_p = theta_list[i].to(f'cuda:{i}')
|
|
151
|
-
A_i = A_split[i].to(f'cuda:{i}')
|
|
152
|
-
y_i = y_split[i].to(f'cuda:{i}')
|
|
153
|
-
q_flat = A_i @ theta_p.reshape(-1)
|
|
154
|
-
e_flat = y_i / (q_flat + torch.finfo(torch.float32).tiny)
|
|
155
|
-
c_flat = A_i.T @ e_flat
|
|
156
|
-
theta_p_plus_1_flat = (theta_p.reshape(-1) / (normalization_factor.to(f'cuda:{i}').reshape(-1) + torch.finfo(torch.float32).tiny)) * c_flat
|
|
157
|
-
theta_p_plus_1 = theta_p_plus_1_flat.reshape(Z, X)
|
|
158
|
-
theta_p_list.append(theta_p_plus_1)
|
|
159
|
-
for i in range(num_gpus):
|
|
160
|
-
theta_list[i] = theta_p_list[i].to('cuda:0')
|
|
161
|
-
if isSavingEachIteration and it in save_indices:
|
|
162
|
-
saved_theta.append(torch.stack(theta_p_list).mean(dim=0).cpu().numpy())
|
|
163
|
-
saved_indices.append(it + 1)
|
|
164
|
-
|
|
165
|
-
del A_matrix_torch, y_torch, A_split, y_split, theta_0, normalization_factor
|
|
166
|
-
for i in range(num_gpus):
|
|
167
|
-
torch.cuda.empty_cache()
|
|
168
|
-
if not isSavingEachIteration:
|
|
169
|
-
return torch.stack(theta_p_list).mean(dim=0).cpu().numpy(), None
|
|
170
|
-
else:
|
|
171
|
-
return saved_theta, saved_indices
|
|
172
|
-
except Exception as e:
|
|
173
|
-
print(f"Error in multi-GPU MLEM: {type(e).__name__}: {e}")
|
|
174
|
-
del A_matrix_torch, y_torch, A_split, y_split, theta_0, normalization_factor
|
|
175
|
-
for i in range(num_gpus):
|
|
176
|
-
torch.cuda.empty_cache()
|
|
177
|
-
return None, None
|
|
178
|
-
|
|
179
|
-
def _MLEM_CPU_numba(SMatrix, y, numIterations, isSavingEachIteration, tumor_str, max_saves=5000):
|
|
127
|
+
def _MLEM_CPU_numba(SMatrix, y, numIterations, isSavingEachIteration, tumor_str, max_saves, denominator_threshold, show_logs=True):
|
|
180
128
|
try:
|
|
181
129
|
numba.set_num_threads(os.cpu_count())
|
|
182
|
-
q_p = np.zeros((SMatrix.shape[0], SMatrix.shape[3]))
|
|
183
|
-
c_p = np.zeros((SMatrix.shape[1], SMatrix.shape[2]))
|
|
184
|
-
theta_p_0 = np.ones((SMatrix.shape[1], SMatrix.shape[2]))
|
|
130
|
+
q_p = np.zeros((SMatrix.shape[0], SMatrix.shape[3]), dtype=np.float32)
|
|
131
|
+
c_p = np.zeros((SMatrix.shape[1], SMatrix.shape[2]), dtype=np.float32)
|
|
132
|
+
theta_p_0 = np.ones((SMatrix.shape[1], SMatrix.shape[2]), dtype=np.float32)
|
|
185
133
|
matrix_theta = [theta_p_0]
|
|
186
134
|
saved_indices = [0]
|
|
187
|
-
normalization_factor = np.sum(SMatrix, axis=(0, 3))
|
|
135
|
+
normalization_factor = np.sum(SMatrix, axis=(0, 3)).astype(np.float32)
|
|
188
136
|
|
|
189
137
|
# Calculate save indices
|
|
190
138
|
if numIterations <= max_saves:
|
|
@@ -196,14 +144,20 @@ def _MLEM_CPU_numba(SMatrix, y, numIterations, isSavingEachIteration, tumor_str,
|
|
|
196
144
|
save_indices.append(numIterations - 1)
|
|
197
145
|
|
|
198
146
|
description = f"AOT-BioMaps -- ML-EM ---- {tumor_str} TUMOR ---- processing on multithread CPU ({numba.config.NUMBA_DEFAULT_NUM_THREADS} threads) ----"
|
|
147
|
+
iterator = trange(numIterations, desc=description) if show_logs else range(numIterations)
|
|
199
148
|
|
|
200
|
-
for it in
|
|
149
|
+
for it in iterator:
|
|
201
150
|
theta_p = matrix_theta[-1]
|
|
202
151
|
_forward_projection(SMatrix, theta_p, q_p)
|
|
203
|
-
|
|
152
|
+
|
|
153
|
+
# Appliquer le seuil : si q_p < denominator_threshold, on met e_p à 1
|
|
154
|
+
mask = q_p >= denominator_threshold
|
|
155
|
+
e_p = np.where(mask, y / (q_p + 1e-8), 1.0)
|
|
156
|
+
|
|
204
157
|
_backward_projection(SMatrix, e_p, c_p)
|
|
205
158
|
theta_p_plus_1 = theta_p / (normalization_factor + 1e-8) * c_p
|
|
206
|
-
|
|
159
|
+
|
|
160
|
+
if isSavingEachIteration and (it + 1) in save_indices:
|
|
207
161
|
matrix_theta.append(theta_p_plus_1)
|
|
208
162
|
saved_indices.append(it + 1)
|
|
209
163
|
else:
|
|
@@ -217,7 +171,7 @@ def _MLEM_CPU_numba(SMatrix, y, numIterations, isSavingEachIteration, tumor_str,
|
|
|
217
171
|
print(f"Error in Numba CPU MLEM: {type(e).__name__}: {e}")
|
|
218
172
|
return None, None
|
|
219
173
|
|
|
220
|
-
def _MLEM_CPU_opti(SMatrix, y, numIterations, isSavingEachIteration, tumor_str, max_saves=
|
|
174
|
+
def _MLEM_CPU_opti(SMatrix, y, numIterations, isSavingEachIteration, tumor_str, max_saves, denominator_threshold, show_logs=True):
|
|
221
175
|
try:
|
|
222
176
|
T, Z, X, N = SMatrix.shape
|
|
223
177
|
A_flat = SMatrix.astype(np.float32).transpose(0, 3, 1, 2).reshape(T * N, Z * X)
|
|
@@ -238,16 +192,22 @@ def _MLEM_CPU_opti(SMatrix, y, numIterations, isSavingEachIteration, tumor_str,
|
|
|
238
192
|
save_indices.append(numIterations - 1)
|
|
239
193
|
|
|
240
194
|
description = f"AOT-BioMaps -- ML-EM ---- {tumor_str} TUMOR ---- processing on single CPU (optimized) ----"
|
|
195
|
+
iterator = trange(numIterations, desc=description) if show_logs else range(numIterations)
|
|
241
196
|
|
|
242
|
-
for it in
|
|
197
|
+
for it in iterator:
|
|
243
198
|
theta_p = matrix_theta[-1]
|
|
244
199
|
theta_p_flat = theta_p.reshape(-1)
|
|
245
200
|
q_flat = A_flat @ theta_p_flat
|
|
246
|
-
|
|
201
|
+
|
|
202
|
+
# Appliquer le seuil : si q_flat < denominator_threshold, on met e_flat à 1
|
|
203
|
+
mask = q_flat >= denominator_threshold
|
|
204
|
+
e_flat = np.where(mask, y_flat / (q_flat + np.finfo(np.float32).tiny), 1.0)
|
|
205
|
+
|
|
247
206
|
c_flat = A_flat.T @ e_flat
|
|
248
207
|
theta_p_plus_1_flat = theta_p_flat / (normalization_factor_flat + np.finfo(np.float32).tiny) * c_flat
|
|
249
208
|
theta_p_plus_1 = theta_p_plus_1_flat.reshape(Z, X)
|
|
250
|
-
|
|
209
|
+
|
|
210
|
+
if isSavingEachIteration and (it + 1) in save_indices:
|
|
251
211
|
matrix_theta.append(theta_p_plus_1)
|
|
252
212
|
saved_indices.append(it + 1)
|
|
253
213
|
else:
|
|
@@ -260,3 +220,114 @@ def _MLEM_CPU_opti(SMatrix, y, numIterations, isSavingEachIteration, tumor_str,
|
|
|
260
220
|
except Exception as e:
|
|
261
221
|
print(f"Error in optimized CPU MLEM: {type(e).__name__}: {e}")
|
|
262
222
|
return None, None
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
def _MLEM_sparseCSR(
|
|
227
|
+
SMatrix,
|
|
228
|
+
y,
|
|
229
|
+
numIterations,
|
|
230
|
+
isSavingEachIteration,
|
|
231
|
+
tumor_str,
|
|
232
|
+
device_index,
|
|
233
|
+
max_saves,
|
|
234
|
+
denominator_threshold,
|
|
235
|
+
Z,
|
|
236
|
+
show_logs=True,
|
|
237
|
+
):
|
|
238
|
+
"""
|
|
239
|
+
MLEM implementation using CuPy with sparse CSR matrix on a single GPU.
|
|
240
|
+
Caution : SMatrix must be a cupyx.scipy.sparse.csr_matrix please sparse it before using.
|
|
241
|
+
"""
|
|
242
|
+
try:
|
|
243
|
+
cp.cuda.Device(device_index).use()
|
|
244
|
+
dtype = cp.float32
|
|
245
|
+
eps = cp.finfo(dtype).eps
|
|
246
|
+
|
|
247
|
+
# --- Préparation de la matrice et des données ---
|
|
248
|
+
if not isinstance(SMatrix, cpsparse.csr_matrix):
|
|
249
|
+
SMatrix = cpsparse.csr_matrix(SMatrix, dtype=dtype)
|
|
250
|
+
else:
|
|
251
|
+
SMatrix = SMatrix.astype(dtype)
|
|
252
|
+
|
|
253
|
+
if not isinstance(y, cp.ndarray):
|
|
254
|
+
y_cupy = cp.asarray(y, dtype=dtype)
|
|
255
|
+
else:
|
|
256
|
+
y_cupy = y.astype(dtype)
|
|
257
|
+
|
|
258
|
+
TN, ZX = SMatrix.shape
|
|
259
|
+
X = ZX // Z
|
|
260
|
+
|
|
261
|
+
# Initialisation du volume reconstruit
|
|
262
|
+
theta_flat = cp.full(ZX, 0.1, dtype=dtype)
|
|
263
|
+
|
|
264
|
+
# Facteur de normalisation
|
|
265
|
+
norm_factor = cp.maximum(SMatrix.sum(axis=0).ravel(), 1e-6)
|
|
266
|
+
norm_factor_inv = 1.0 / norm_factor
|
|
267
|
+
|
|
268
|
+
# Gestion des indices de sauvegarde
|
|
269
|
+
if numIterations <= max_saves:
|
|
270
|
+
save_indices = list(range(numIterations))
|
|
271
|
+
else:
|
|
272
|
+
step = max(1, numIterations // max_saves)
|
|
273
|
+
save_indices = list(range(0, numIterations, step))
|
|
274
|
+
if save_indices[-1] != numIterations - 1:
|
|
275
|
+
save_indices.append(numIterations - 1)
|
|
276
|
+
|
|
277
|
+
saved_theta = []
|
|
278
|
+
saved_indices = []
|
|
279
|
+
|
|
280
|
+
description = f"AOT-BioMaps -- ML-EM (sparse CSR) ---- {tumor_str} TUMOR ---- GPU {device_index}"
|
|
281
|
+
|
|
282
|
+
iterator = trange(numIterations, desc=description, ncols=100) if show_logs else range(numIterations)
|
|
283
|
+
|
|
284
|
+
# --- Boucle principale MLEM ---
|
|
285
|
+
for it in iterator:
|
|
286
|
+
# Étape 1 : Projection
|
|
287
|
+
q_flat = SMatrix.dot(theta_flat)
|
|
288
|
+
q_flat = cp.maximum(q_flat, denominator_threshold)
|
|
289
|
+
|
|
290
|
+
# Étape 2 : Rapport y / (A*L)
|
|
291
|
+
e_flat = y_cupy / q_flat
|
|
292
|
+
|
|
293
|
+
# Étape 3 : Rétroprojection (A.T * e)
|
|
294
|
+
c_flat = SMatrix.T.dot(e_flat)
|
|
295
|
+
|
|
296
|
+
# Étape 4 : Mise à jour
|
|
297
|
+
theta_flat = theta_flat * (norm_factor_inv * c_flat)
|
|
298
|
+
theta_flat = cp.maximum(theta_flat, 0)
|
|
299
|
+
|
|
300
|
+
# Sauvegarde éventuelle
|
|
301
|
+
if isSavingEachIteration and it in save_indices:
|
|
302
|
+
saved_theta.append(theta_flat.reshape(Z, X).get()) # transfert CPU
|
|
303
|
+
saved_indices.append(it)
|
|
304
|
+
|
|
305
|
+
# Libération mémoire GPU
|
|
306
|
+
del q_flat, e_flat, c_flat
|
|
307
|
+
cp.get_default_memory_pool().free_all_blocks()
|
|
308
|
+
gc.collect()
|
|
309
|
+
|
|
310
|
+
# Vérif convergence toutes les 10 itérations
|
|
311
|
+
if it % 10 == 0 and show_logs:
|
|
312
|
+
rel_change = cp.abs(theta_flat - theta_flat).max() / (theta_flat.max() + eps)
|
|
313
|
+
if rel_change < 1e-4:
|
|
314
|
+
print(f"Convergence atteinte à l’itération {it}")
|
|
315
|
+
break
|
|
316
|
+
|
|
317
|
+
# --- Fin : récupération du résultat ---
|
|
318
|
+
result = theta_flat.reshape(Z, X).get() # Retour sur CPU
|
|
319
|
+
del theta_flat, norm_factor, norm_factor_inv
|
|
320
|
+
cp.get_default_memory_pool().free_all_blocks()
|
|
321
|
+
gc.collect()
|
|
322
|
+
|
|
323
|
+
if isSavingEachIteration:
|
|
324
|
+
return saved_theta, saved_indices
|
|
325
|
+
else:
|
|
326
|
+
return result, None
|
|
327
|
+
|
|
328
|
+
except Exception as e:
|
|
329
|
+
print(f"Erreur dans _MLEM_single_GPU_CuPy: {type(e).__name__}: {e}")
|
|
330
|
+
cp.get_default_memory_pool().free_all_blocks()
|
|
331
|
+
gc.collect()
|
|
332
|
+
return None, None
|
|
333
|
+
|
|
@@ -9,26 +9,24 @@ def _Omega_RELATIVE_DIFFERENCE_CPU(theta_flat, index, values, gamma):
|
|
|
9
9
|
theta_k = theta_flat[k_idx]
|
|
10
10
|
diff = theta_k - theta_j
|
|
11
11
|
abs_diff = np.abs(diff)
|
|
12
|
-
|
|
13
12
|
denom = theta_k + theta_j + gamma * abs_diff + 1e-8
|
|
14
13
|
num = diff ** 2
|
|
15
|
-
|
|
14
|
+
psi_pair = num / denom
|
|
15
|
+
psi_pair = values * psi_pair
|
|
16
16
|
# First derivative ∂U/∂θ_j
|
|
17
17
|
dpsi = (2 * diff * denom - num * (1 + gamma * np.sign(diff))) / (denom ** 2)
|
|
18
18
|
grad_pair = values * (-dpsi) # Note the negative sign: U contains ψ(θ_k, θ_j), seeking ∂/∂θ_j
|
|
19
|
-
|
|
20
19
|
# Second derivative ∂²U/∂θ_j² (numerically stable, approximate treatment)
|
|
21
20
|
d2psi = (2 * denom ** 2 - 4 * diff * denom * (1 + gamma * np.sign(diff))
|
|
22
21
|
+ 2 * num * (1 + gamma * np.sign(diff)) ** 2) / (denom ** 3 + 1e-8)
|
|
23
22
|
hess_pair = values * d2psi
|
|
24
|
-
|
|
25
23
|
grad_U = np.zeros_like(theta_flat)
|
|
26
24
|
hess_U = np.zeros_like(theta_flat)
|
|
27
|
-
|
|
28
25
|
np.add.at(grad_U, j_idx, grad_pair)
|
|
29
26
|
np.add.at(hess_U, j_idx, hess_pair)
|
|
30
|
-
|
|
31
|
-
|
|
27
|
+
# Compute U_value
|
|
28
|
+
U_value = 0.5 * np.sum(psi_pair)
|
|
29
|
+
return grad_U, hess_U, U_value
|
|
32
30
|
|
|
33
31
|
def _Omega_RELATIVE_DIFFERENCE_GPU(theta_flat, index, values, device, gamma):
|
|
34
32
|
j_idx, k_idx = index
|
|
@@ -38,26 +36,24 @@ def _Omega_RELATIVE_DIFFERENCE_GPU(theta_flat, index, values, device, gamma):
|
|
|
38
36
|
abs_diff = torch.abs(diff)
|
|
39
37
|
denom = theta_k + theta_j + gamma * abs_diff + 1e-8
|
|
40
38
|
num = diff ** 2
|
|
41
|
-
|
|
39
|
+
psi_pair = num / denom
|
|
40
|
+
psi_pair = values * psi_pair
|
|
42
41
|
# Compute gradient contributions
|
|
43
42
|
dpsi = (2 * diff * denom - num * (1 + gamma * torch.sign(diff))) / (denom ** 2)
|
|
44
43
|
grad_pair = values * (-dpsi)
|
|
45
|
-
|
|
46
44
|
# Compute Hessian contributions
|
|
47
45
|
d2psi = (2 * denom ** 2 - 4 * diff * denom * (1 + gamma * torch.sign(diff))
|
|
48
46
|
+ 2 * num * (1 + gamma * torch.sign(diff)) ** 2) / (denom ** 3 + 1e-8)
|
|
49
47
|
hess_pair = values * d2psi
|
|
50
|
-
|
|
51
48
|
# Initialize gradient and Hessian on the correct device
|
|
52
49
|
grad_U = torch.zeros_like(theta_flat, device=device)
|
|
53
50
|
hess_U = torch.zeros_like(theta_flat, device=device)
|
|
54
|
-
|
|
55
51
|
# Accumulate gradient contributions
|
|
56
52
|
grad_U.index_add_(0, j_idx, grad_pair)
|
|
57
53
|
grad_U.index_add_(0, k_idx, -grad_pair)
|
|
58
|
-
|
|
59
54
|
# Accumulate Hessian contributions
|
|
60
55
|
hess_U.index_add_(0, j_idx, hess_pair)
|
|
61
56
|
hess_U.index_add_(0, k_idx, hess_pair)
|
|
62
|
-
|
|
63
|
-
|
|
57
|
+
# Compute U_value
|
|
58
|
+
U_value = 0.5 * psi_pair.sum()
|
|
59
|
+
return grad_U, hess_U, U_value
|