M3Drop 0.4.38__py3-none-any.whl → 0.4.40__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- m3Drop/diagnosticsGPU.py +142 -104
- m3Drop/normalizationGPU.py +25 -21
- {m3drop-0.4.38.dist-info → m3drop-0.4.40.dist-info}/METADATA +1 -1
- m3drop-0.4.40.dist-info/RECORD +12 -0
- m3drop-0.4.38.dist-info/RECORD +0 -12
- {m3drop-0.4.38.dist-info → m3drop-0.4.40.dist-info}/WHEEL +0 -0
- {m3drop-0.4.38.dist-info → m3drop-0.4.40.dist-info}/licenses/LICENSE +0 -0
- {m3drop-0.4.38.dist-info → m3drop-0.4.40.dist-info}/top_level.txt +0 -0
m3Drop/diagnosticsGPU.py
CHANGED
|
@@ -12,12 +12,15 @@ from scipy import sparse
|
|
|
12
12
|
from scipy import stats
|
|
13
13
|
import anndata
|
|
14
14
|
|
|
15
|
-
# [GOVERNOR INTEGRATION]
|
|
15
|
+
# [GOVERNOR INTEGRATION]
|
|
16
16
|
from .coreGPU import hidden_calc_valsGPU, NBumiFitModelGPU, NBumiFitDispVsMeanGPU, get_optimal_chunk_size
|
|
17
17
|
from cupy.sparse import csr_matrix as cp_csr_matrix
|
|
18
18
|
import scipy.sparse as sp
|
|
19
19
|
from scipy.sparse import csr_matrix as sp_csr_matrix
|
|
20
|
+
|
|
20
21
|
import statsmodels.api as sm
|
|
22
|
+
from scipy.stats import norm
|
|
23
|
+
from statsmodels.stats.multitest import multipletests
|
|
21
24
|
|
|
22
25
|
def NBumiFitBasicModelGPU(
|
|
23
26
|
cleaned_filename: str,
|
|
@@ -137,7 +140,7 @@ def NBumiCheckFitFSGPU(
|
|
|
137
140
|
plot_filename=None
|
|
138
141
|
) -> dict:
|
|
139
142
|
"""
|
|
140
|
-
|
|
143
|
+
FIXED VERSION - No cupy.errstate, proper GPU computation.
|
|
141
144
|
"""
|
|
142
145
|
start_time = time.perf_counter()
|
|
143
146
|
print(f"FUNCTION: NBumiCheckFitFS() | FILE: {cleaned_filename}")
|
|
@@ -168,10 +171,14 @@ def NBumiCheckFitFSGPU(
|
|
|
168
171
|
print("Phase [1/2]: COMPLETE")
|
|
169
172
|
|
|
170
173
|
# --- Phase 2: Calculate Expected Dropouts ---
|
|
171
|
-
print(
|
|
174
|
+
print("Phase [2/2]: Calculating expected dropouts from data chunks...")
|
|
175
|
+
|
|
176
|
+
# [GOVERNOR INTEGRATION] Removed naive calculation, utilizing Governor's chunk_size
|
|
177
|
+
optimal_chunk = chunk_size
|
|
178
|
+
print(f" Using governor chunk size: {optimal_chunk}")
|
|
172
179
|
|
|
173
|
-
for i in range(0, nc,
|
|
174
|
-
end_col = min(i +
|
|
180
|
+
for i in range(0, nc, optimal_chunk):
|
|
181
|
+
end_col = min(i + optimal_chunk, nc)
|
|
175
182
|
print(f"Phase [2/2]: Processing: {end_col} of {nc} cells.", end='\r')
|
|
176
183
|
|
|
177
184
|
tis_chunk_gpu = tis_gpu[i:end_col]
|
|
@@ -192,7 +199,11 @@ def NBumiCheckFitFSGPU(
|
|
|
192
199
|
|
|
193
200
|
# Clean up
|
|
194
201
|
del mu_chunk_gpu, p_is_chunk_gpu, base, tis_chunk_gpu
|
|
195
|
-
|
|
202
|
+
|
|
203
|
+
# Periodic memory cleanup
|
|
204
|
+
mempool = cp.get_default_memory_pool()
|
|
205
|
+
if (i // optimal_chunk) % 10 == 0:
|
|
206
|
+
mempool.free_all_blocks()
|
|
196
207
|
|
|
197
208
|
print(f"Phase [2/2]: COMPLETE{' ' * 50}")
|
|
198
209
|
|
|
@@ -245,7 +256,7 @@ def NBumiCheckFitFSGPU(
|
|
|
245
256
|
}
|
|
246
257
|
|
|
247
258
|
def NBumiCompareModelsGPU(
|
|
248
|
-
raw_filename: str,
|
|
259
|
+
raw_filename: str,
|
|
249
260
|
cleaned_filename: str,
|
|
250
261
|
stats: dict,
|
|
251
262
|
fit_adjust: dict,
|
|
@@ -254,118 +265,132 @@ def NBumiCompareModelsGPU(
|
|
|
254
265
|
plot_filename=None
|
|
255
266
|
) -> dict:
|
|
256
267
|
"""
|
|
257
|
-
OPTIMIZED VERSION
|
|
258
|
-
- Eliminates the 46GB '_basic_norm.h5ad' temporary file.
|
|
259
|
-
- Performs depth normalization and variance calculation on-the-fly in GPU VRAM.
|
|
260
|
-
- PRESERVED SCIENTIFIC LOGIC: Var(X) = E[X^2] - (E[X])^2 on normalized data.
|
|
268
|
+
OPTIMIZED VERSION - Faster normalization and sparse matrix writing.
|
|
261
269
|
"""
|
|
262
270
|
pipeline_start_time = time.time()
|
|
263
271
|
print(f"FUNCTION: NBumiCompareModels() | Comparing models for {cleaned_filename}")
|
|
264
272
|
|
|
265
|
-
# [GOVERNOR]
|
|
273
|
+
# [GOVERNOR INTEGRATION] Calculate chunk size for normalization phase (heavy IO)
|
|
266
274
|
if chunk_size is None:
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
275
|
+
# Multiplier 10.0 for safety during normalization of massive dense expansion
|
|
276
|
+
chunk_size = get_optimal_chunk_size(cleaned_filename, multiplier=10.0, is_dense=True)
|
|
277
|
+
|
|
278
|
+
# --- Phase 1: OPTIMIZED Normalization ---
|
|
279
|
+
print("Phase [1/4]: Creating temporary 'basic' normalized data file...")
|
|
280
|
+
basic_norm_filename = cleaned_filename.replace('.h5ad', '_basic_norm.h5ad')
|
|
281
|
+
|
|
282
|
+
# Read metadata. In 'backed' mode, this keeps a file handle open.
|
|
283
|
+
adata_meta = anndata.read_h5ad(cleaned_filename, backed='r')
|
|
284
|
+
nc, ng = adata_meta.shape
|
|
285
|
+
obs_df = adata_meta.obs.copy()
|
|
286
|
+
var_df = adata_meta.var.copy()
|
|
276
287
|
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
non_zero_mask = tis > 0
|
|
280
|
-
size_factors[non_zero_mask] = tis[non_zero_mask] / median_sum
|
|
288
|
+
cell_sums = stats['tis'].values
|
|
289
|
+
median_sum = np.median(cell_sums[cell_sums > 0])
|
|
281
290
|
|
|
282
|
-
#
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
291
|
+
# Avoid division by zero for cells with zero counts
|
|
292
|
+
size_factors = np.ones_like(cell_sums, dtype=np.float32)
|
|
293
|
+
non_zero_mask = cell_sums > 0
|
|
294
|
+
size_factors[non_zero_mask] = cell_sums[non_zero_mask] / median_sum
|
|
295
|
+
|
|
296
|
+
adata_out = anndata.AnnData(obs=obs_df, var=var_df)
|
|
297
|
+
# [OPTION 2 CHANGE] Removed compression="gzip" to speed up I/O
|
|
298
|
+
adata_out.write_h5ad(basic_norm_filename)
|
|
299
|
+
|
|
300
|
+
with h5py.File(basic_norm_filename, 'a') as f_out:
|
|
301
|
+
if 'X' in f_out:
|
|
302
|
+
del f_out['X']
|
|
303
|
+
x_group_out = f_out.create_group('X')
|
|
304
|
+
x_group_out.attrs['encoding-type'] = 'csr_matrix'
|
|
305
|
+
x_group_out.attrs['encoding-version'] = '0.1.0'
|
|
306
|
+
x_group_out.attrs['shape'] = np.array([nc, ng], dtype='int64')
|
|
307
|
+
|
|
308
|
+
out_data = x_group_out.create_dataset('data', shape=(0,), maxshape=(None,), dtype='float32')
|
|
309
|
+
out_indices = x_group_out.create_dataset('indices', shape=(0,), maxshape=(None,), dtype='int32')
|
|
310
|
+
out_indptr = x_group_out.create_dataset('indptr', shape=(nc + 1,), dtype='int64')
|
|
311
|
+
out_indptr[0] = 0
|
|
312
|
+
current_nnz = 0
|
|
313
|
+
|
|
314
|
+
with h5py.File(cleaned_filename, 'r') as f_in:
|
|
315
|
+
h5_indptr = f_in['X']['indptr']
|
|
316
|
+
h5_data = f_in['X']['data']
|
|
317
|
+
h5_indices = f_in['X']['indices']
|
|
318
|
+
|
|
319
|
+
for i in range(0, nc, chunk_size):
|
|
320
|
+
end_row = min(i + chunk_size, nc)
|
|
321
|
+
print(f"Phase [1/4]: Normalizing: {end_row} of {nc} cells.", end='\r')
|
|
322
|
+
|
|
323
|
+
start_idx, end_idx = h5_indptr[i], h5_indptr[end_row]
|
|
324
|
+
if start_idx == end_idx:
|
|
325
|
+
out_indptr[i + 1 : end_row + 1] = current_nnz
|
|
326
|
+
continue
|
|
292
327
|
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
# Get size factors for this chunk
|
|
314
|
-
sf_chunk = cp.asarray(size_factors[i:end_row])
|
|
315
|
-
|
|
316
|
-
# --- THE MAGIC: On-the-Fly Normalization ---
|
|
317
|
-
# data_norm = data_raw / size_factor
|
|
318
|
-
data_gpu /= sf_chunk[row_indices]
|
|
319
|
-
|
|
320
|
-
# Accumulate for Variance: E[X^2] and E[X]
|
|
321
|
-
cp.add.at(sum_x_sq_gpu, indices_gpu, data_gpu**2)
|
|
322
|
-
cp.add.at(sum_x_gpu, indices_gpu, data_gpu)
|
|
323
|
-
|
|
324
|
-
# Clean up VRAM
|
|
325
|
-
del data_gpu, indices_gpu, indptr_gpu, row_indices, sf_chunk, cell_boundary_markers
|
|
326
|
-
cp.get_default_memory_pool().free_all_blocks()
|
|
328
|
+
# Read data for the chunk
|
|
329
|
+
data_slice = h5_data[start_idx:end_idx]
|
|
330
|
+
indices_slice = h5_indices[start_idx:end_idx]
|
|
331
|
+
indptr_slice = h5_indptr[i:end_row + 1] - start_idx
|
|
332
|
+
|
|
333
|
+
# Move to GPU for fast normalization
|
|
334
|
+
data_gpu = cp.asarray(data_slice.copy(), dtype=cp.float32)
|
|
335
|
+
|
|
336
|
+
indptr_gpu = cp.asarray(indptr_slice.copy())
|
|
337
|
+
nnz_in_chunk = indptr_gpu[-1].item()
|
|
338
|
+
cell_boundary_markers = cp.zeros(nnz_in_chunk, dtype=cp.int32)
|
|
339
|
+
if len(indptr_gpu) > 1:
|
|
340
|
+
cell_boundary_markers[indptr_gpu[:-1]] = 1
|
|
341
|
+
row_indices = cp.cumsum(cell_boundary_markers, axis=0) - 1
|
|
342
|
+
|
|
343
|
+
size_factors_for_chunk = cp.asarray(size_factors[i:end_row])
|
|
344
|
+
|
|
345
|
+
data_gpu /= size_factors_for_chunk[row_indices]
|
|
346
|
+
|
|
347
|
+
data_cpu = np.round(data_gpu.get())
|
|
327
348
|
|
|
328
|
-
|
|
349
|
+
num_cells_in_chunk = end_row - i
|
|
350
|
+
chunk_sp = sp_csr_matrix((data_cpu, indices_slice, indptr_slice),
|
|
351
|
+
shape=(num_cells_in_chunk, ng))
|
|
329
352
|
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
353
|
+
nnz_chunk = chunk_sp.nnz
|
|
354
|
+
out_data.resize(current_nnz + nnz_chunk, axis=0)
|
|
355
|
+
out_data[current_nnz:] = chunk_sp.data
|
|
356
|
+
|
|
357
|
+
out_indices.resize(current_nnz + nnz_chunk, axis=0)
|
|
358
|
+
out_indices[current_nnz:] = chunk_sp.indices
|
|
359
|
+
|
|
360
|
+
new_indptr_list = chunk_sp.indptr[1:].astype(np.int64) + current_nnz
|
|
361
|
+
out_indptr[i + 1 : end_row + 1] = new_indptr_list
|
|
362
|
+
|
|
363
|
+
current_nnz += nnz_chunk
|
|
364
|
+
|
|
365
|
+
del data_gpu, row_indices, size_factors_for_chunk, indptr_gpu
|
|
366
|
+
cp.get_default_memory_pool().free_all_blocks()
|
|
367
|
+
|
|
368
|
+
print(f"Phase [1/4]: COMPLETE{' '*50}")
|
|
369
|
+
|
|
370
|
+
print("Phase [2/4]: Fitting Basic Model on normalized data...")
|
|
338
371
|
|
|
339
|
-
#
|
|
340
|
-
|
|
341
|
-
if cp.isnan(max_size_val): max_size_val = 1000
|
|
342
|
-
size_gpu[cp.isnan(size_gpu) | (size_gpu <= 0)] = max_size_val
|
|
343
|
-
size_gpu[size_gpu < 1e-10] = 1e-10
|
|
372
|
+
# [GOVERNOR INTEGRATION] Calculate chunk size for basic fit on the heavy normalized file
|
|
373
|
+
chunk_size_basic = get_optimal_chunk_size(basic_norm_filename, multiplier=10.0, is_dense=True)
|
|
344
374
|
|
|
345
|
-
#
|
|
346
|
-
fit_basic =
|
|
347
|
-
|
|
348
|
-
'vals': stats,
|
|
349
|
-
'var_obs': pd.Series(my_rowvar_gpu.get(), index=stats['tjs'].index)
|
|
350
|
-
}
|
|
375
|
+
stats_basic = hidden_calc_valsGPU(basic_norm_filename) # hidden_calc uses its own governor internally
|
|
376
|
+
fit_basic = NBumiFitBasicModelGPU(basic_norm_filename, stats_basic, chunk_size=chunk_size_basic)
|
|
377
|
+
print("Phase [2/4]: COMPLETE")
|
|
351
378
|
|
|
352
|
-
|
|
353
|
-
|
|
379
|
+
print("Phase [3/4]: Evaluating fits of both models on ORIGINAL data...")
|
|
380
|
+
# [GOVERNOR INTEGRATION] Chunk size for check fit
|
|
381
|
+
chunk_size_check = get_optimal_chunk_size(cleaned_filename, multiplier=5.0, is_dense=True)
|
|
354
382
|
|
|
355
|
-
|
|
356
|
-
check_adjust = NBumiCheckFitFSGPU(
|
|
357
|
-
cleaned_filename, fit_adjust, suppress_plot=True
|
|
358
|
-
)
|
|
383
|
+
check_adjust = NBumiCheckFitFSGPU(cleaned_filename, fit_adjust, suppress_plot=True, chunk_size=chunk_size_check)
|
|
359
384
|
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
385
|
+
fit_basic_for_eval = {
|
|
386
|
+
'sizes': fit_basic['sizes'],
|
|
387
|
+
'vals': stats,
|
|
388
|
+
'var_obs': fit_basic['var_obs']
|
|
389
|
+
}
|
|
390
|
+
check_basic = NBumiCheckFitFSGPU(cleaned_filename, fit_basic_for_eval, suppress_plot=True, chunk_size=chunk_size_check)
|
|
391
|
+
print("Phase [3/4]: COMPLETE")
|
|
365
392
|
|
|
366
|
-
|
|
367
|
-
print("Phase [3/3]: Generating comparison...")
|
|
368
|
-
|
|
393
|
+
print("Phase [4/4]: Generating final comparison...")
|
|
369
394
|
nc_data = stats['nc']
|
|
370
395
|
mean_expr = stats['tjs'] / nc_data
|
|
371
396
|
observed_dropout = stats['djs'] / nc_data
|
|
@@ -408,8 +433,15 @@ def NBumiCompareModelsGPU(
|
|
|
408
433
|
plt.show()
|
|
409
434
|
|
|
410
435
|
plt.close()
|
|
411
|
-
|
|
436
|
+
print("Phase [4/4]: COMPLETE")
|
|
437
|
+
|
|
412
438
|
pipeline_end_time = time.time()
|
|
439
|
+
|
|
440
|
+
# --- ADD THIS LINE TO FIX THE ERROR ---
|
|
441
|
+
adata_meta.file.close() # Explicitly close the file handle
|
|
442
|
+
|
|
443
|
+
os.remove(basic_norm_filename)
|
|
444
|
+
print(f"STATUS: Temporary file '{basic_norm_filename}' removed.")
|
|
413
445
|
print(f"Total time: {pipeline_end_time - pipeline_start_time:.2f} seconds.\n")
|
|
414
446
|
|
|
415
447
|
return {
|
|
@@ -424,6 +456,11 @@ def NBumiPlotDispVsMeanGPU(
|
|
|
424
456
|
):
|
|
425
457
|
"""
|
|
426
458
|
Generates a diagnostic plot of the dispersion vs. mean expression.
|
|
459
|
+
|
|
460
|
+
Args:
|
|
461
|
+
fit (dict): The 'fit' object from NBumiFitModelGPU.
|
|
462
|
+
suppress_plot (bool): If True, the plot will not be displayed on screen.
|
|
463
|
+
plot_filename (str, optional): Path to save the plot. If None, not saved.
|
|
427
464
|
"""
|
|
428
465
|
print("FUNCTION: NBumiPlotDispVsMean()")
|
|
429
466
|
|
|
@@ -434,6 +471,7 @@ def NBumiPlotDispVsMeanGPU(
|
|
|
434
471
|
intercept, slope = coeffs[0], coeffs[1]
|
|
435
472
|
|
|
436
473
|
# --- 2. Calculate the fitted line for plotting ---
|
|
474
|
+
# Create a smooth, continuous line using the regression coefficients
|
|
437
475
|
log_mean_expr_range = np.linspace(
|
|
438
476
|
np.log(mean_expression[mean_expression > 0].min()),
|
|
439
477
|
np.log(mean_expression.max()),
|
m3Drop/normalizationGPU.py
CHANGED
|
@@ -24,9 +24,7 @@ def NBumiPearsonResidualsGPU(
|
|
|
24
24
|
start_time = time.perf_counter()
|
|
25
25
|
print(f"FUNCTION: NBumiPearsonResiduals() | FILE: {cleaned_filename}")
|
|
26
26
|
|
|
27
|
-
#
|
|
28
|
-
# Multiplier 10.0 (Was 6.0): Accounts for Float64 precision (8 bytes) vs Governor default (4 bytes).
|
|
29
|
-
# 4 matrices * 8 bytes = 32 bytes/cell. Governor 10 * 4 = 40 bytes. Safe buffer established.
|
|
27
|
+
# Governor for Processing (RAM/VRAM)
|
|
30
28
|
chunk_size = get_optimal_chunk_size(cleaned_filename, multiplier=10.0, is_dense=True)
|
|
31
29
|
|
|
32
30
|
# --- Phase 1: Initialization ---
|
|
@@ -45,16 +43,29 @@ def NBumiPearsonResidualsGPU(
|
|
|
45
43
|
tis_gpu = cupy.asarray(tis, dtype=cupy.float64)
|
|
46
44
|
sizes_gpu = cupy.asarray(sizes, dtype=cupy.float64)
|
|
47
45
|
|
|
48
|
-
# Create Output H5
|
|
46
|
+
# Create Output H5
|
|
49
47
|
adata_in = anndata.read_h5ad(cleaned_filename, backed='r')
|
|
50
48
|
adata_out = anndata.AnnData(obs=adata_in.obs, var=adata_in.var)
|
|
51
49
|
adata_out.write_h5ad(output_filename, compression="gzip")
|
|
52
50
|
|
|
51
|
+
# [FIX] Calculate Safe Storage Chunk Size (~1GB)
|
|
52
|
+
# HDF5 limit is 4GB. You requested 1GB for optimal speed.
|
|
53
|
+
bytes_per_row = ng * 4 # float32
|
|
54
|
+
target_bytes = 1_000_000_000 # 1GB
|
|
55
|
+
storage_chunk_rows = int(target_bytes / bytes_per_row)
|
|
56
|
+
|
|
57
|
+
if storage_chunk_rows < 1: storage_chunk_rows = 1
|
|
58
|
+
# Note: It is okay if storage_chunk > processing_chunk (HDF5 handles this),
|
|
59
|
+
# but strictly it must be < 4GB total size.
|
|
60
|
+
|
|
61
|
+
print(f" > Processing Chunk: {chunk_size} rows (RAM)")
|
|
62
|
+
print(f" > Storage Chunk: {storage_chunk_rows} rows (Disk - 1GB Target)")
|
|
63
|
+
|
|
53
64
|
with h5py.File(output_filename, 'a') as f_out:
|
|
54
65
|
if 'X' in f_out:
|
|
55
66
|
del f_out['X']
|
|
56
|
-
# Create dataset
|
|
57
|
-
out_x = f_out.create_dataset('X', shape=(nc, ng), chunks=(
|
|
67
|
+
# Create dataset with SAFE chunks (Fixes the ValueError)
|
|
68
|
+
out_x = f_out.create_dataset('X', shape=(nc, ng), chunks=(storage_chunk_rows, ng), dtype='float32')
|
|
58
69
|
|
|
59
70
|
print("Phase [1/2]: COMPLETE")
|
|
60
71
|
|
|
@@ -77,7 +88,6 @@ def NBumiPearsonResidualsGPU(
|
|
|
77
88
|
indptr_slice = h5_indptr[i:end_row+1] - h5_indptr[i]
|
|
78
89
|
|
|
79
90
|
# Convert to Dense GPU Matrix
|
|
80
|
-
# We construct sparse first, then densify on GPU to save bandwidth
|
|
81
91
|
counts_chunk_sparse_gpu = cp_csr_matrix((
|
|
82
92
|
cupy.asarray(data_slice, dtype=cupy.float64),
|
|
83
93
|
cupy.asarray(indices_slice),
|
|
@@ -91,24 +101,18 @@ def NBumiPearsonResidualsGPU(
|
|
|
91
101
|
mus_chunk_gpu = tjs_gpu[cupy.newaxis, :] * tis_chunk_gpu[:, cupy.newaxis] / total
|
|
92
102
|
|
|
93
103
|
denominator_gpu = cupy.sqrt(mus_chunk_gpu + mus_chunk_gpu**2 / sizes_gpu[cupy.newaxis, :])
|
|
94
|
-
|
|
95
|
-
# --- LOGIC RESTORED: Prevent Division by Zero ---
|
|
96
104
|
denominator_gpu = cupy.where(denominator_gpu == 0, 1, denominator_gpu)
|
|
97
105
|
|
|
98
|
-
# (Counts - Mu) / Sqrt(V)
|
|
99
106
|
pearson_chunk_gpu = (counts_chunk_dense_gpu - mus_chunk_gpu) / denominator_gpu
|
|
100
107
|
|
|
101
108
|
# Write to Disk
|
|
102
|
-
# [OPTIMIZATION] Cast to float32 on GPU to halve PCIe transfer time
|
|
103
109
|
out_x[i:end_row, :] = pearson_chunk_gpu.astype(cupy.float32).get()
|
|
104
110
|
|
|
105
|
-
# Cleanup
|
|
106
111
|
del counts_chunk_dense_gpu, counts_chunk_sparse_gpu, mus_chunk_gpu, pearson_chunk_gpu, denominator_gpu
|
|
107
112
|
cupy.get_default_memory_pool().free_all_blocks()
|
|
108
113
|
|
|
109
114
|
print(f"Phase [2/2]: COMPLETE{' '*50}")
|
|
110
115
|
|
|
111
|
-
# --- LOGIC RESTORED: Explicit File Cleanup ---
|
|
112
116
|
if hasattr(adata_in, "file") and adata_in.file is not None:
|
|
113
117
|
adata_in.file.close()
|
|
114
118
|
|
|
@@ -127,8 +131,6 @@ def NBumiPearsonResidualsApproxGPU(
|
|
|
127
131
|
start_time = time.perf_counter()
|
|
128
132
|
print(f"FUNCTION: NBumiPearsonResidualsApprox() | FILE: {cleaned_filename}")
|
|
129
133
|
|
|
130
|
-
# --- HANDSHAKE ---
|
|
131
|
-
# Multiplier 10.0: Same safety logic as Full residuals.
|
|
132
134
|
chunk_size = get_optimal_chunk_size(cleaned_filename, multiplier=10.0, is_dense=True)
|
|
133
135
|
|
|
134
136
|
# --- Phase 1: Initialization ---
|
|
@@ -150,10 +152,17 @@ def NBumiPearsonResidualsApproxGPU(
|
|
|
150
152
|
adata_out = anndata.AnnData(obs=adata_in.obs, var=adata_in.var)
|
|
151
153
|
adata_out.write_h5ad(output_filename, compression="gzip")
|
|
152
154
|
|
|
155
|
+
# [FIX] Calculate Safe Storage Chunk Size (~1GB)
|
|
156
|
+
bytes_per_row = ng * 4
|
|
157
|
+
target_bytes = 1_000_000_000 # 1GB
|
|
158
|
+
storage_chunk_rows = int(target_bytes / bytes_per_row)
|
|
159
|
+
if storage_chunk_rows < 1: storage_chunk_rows = 1
|
|
160
|
+
|
|
153
161
|
with h5py.File(output_filename, 'a') as f_out:
|
|
154
162
|
if 'X' in f_out:
|
|
155
163
|
del f_out['X']
|
|
156
|
-
|
|
164
|
+
# Create dataset with SAFE chunks
|
|
165
|
+
out_x = f_out.create_dataset('X', shape=(nc, ng), chunks=(storage_chunk_rows, ng), dtype='float32')
|
|
157
166
|
|
|
158
167
|
print("Phase [1/2]: COMPLETE")
|
|
159
168
|
|
|
@@ -185,15 +194,11 @@ def NBumiPearsonResidualsApproxGPU(
|
|
|
185
194
|
tis_chunk_gpu = tis_gpu[i:end_row]
|
|
186
195
|
mus_chunk_gpu = tjs_gpu[cupy.newaxis, :] * tis_chunk_gpu[:, cupy.newaxis] / total
|
|
187
196
|
|
|
188
|
-
# Approx: Denom = Sqrt(Mu)
|
|
189
197
|
denominator_gpu = cupy.sqrt(mus_chunk_gpu)
|
|
190
|
-
|
|
191
|
-
# --- LOGIC RESTORED: Prevent Division by Zero ---
|
|
192
198
|
denominator_gpu = cupy.where(denominator_gpu == 0, 1, denominator_gpu)
|
|
193
199
|
|
|
194
200
|
pearson_chunk_gpu = (counts_chunk_dense_gpu - mus_chunk_gpu) / denominator_gpu
|
|
195
201
|
|
|
196
|
-
# [OPTIMIZATION] Cast to float32 on GPU to halve PCIe transfer time
|
|
197
202
|
out_x[i:end_row, :] = pearson_chunk_gpu.astype(cupy.float32).get()
|
|
198
203
|
|
|
199
204
|
del counts_chunk_dense_gpu, counts_chunk_sparse_gpu, mus_chunk_gpu, pearson_chunk_gpu, denominator_gpu
|
|
@@ -201,7 +206,6 @@ def NBumiPearsonResidualsApproxGPU(
|
|
|
201
206
|
|
|
202
207
|
print(f"Phase [2/2]: COMPLETE{' '*50}")
|
|
203
208
|
|
|
204
|
-
# --- LOGIC RESTORED: Explicit File Cleanup ---
|
|
205
209
|
if hasattr(adata_in, "file") and adata_in.file is not None:
|
|
206
210
|
adata_in.file.close()
|
|
207
211
|
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
m3Drop/__init__.py,sha256=yaUXhUArnwgLf01Zlpqa5qm9K1aByGqQupIoCaLYiDw,2462
|
|
2
|
+
m3Drop/coreCPU.py,sha256=3kPYlSVlYrJEhRUCIoVzmR8CYBaHpxVM5nx-3YQI4d4,17204
|
|
3
|
+
m3Drop/coreGPU.py,sha256=k7A06VNgfJ59J8g1VpfKxhTIKrEbW7Bj8pTbQqHaQL8,24571
|
|
4
|
+
m3Drop/diagnosticsCPU.py,sha256=BecOKTz2GDjzjs9ycXYsyrSHi2UVgsM58RBuNE62vmU,14273
|
|
5
|
+
m3Drop/diagnosticsGPU.py,sha256=m_r7mZ4s0h-YwJEriucrACNeDQxWPZHvd8RAmsXQYXE,19980
|
|
6
|
+
m3Drop/normalizationCPU.py,sha256=4ulCrDZZjxVFh2y0i4ayPkNCsZYaOP-Xq2Dnzu9WXtg,5697
|
|
7
|
+
m3Drop/normalizationGPU.py,sha256=r5gvJFkabEfCfIsVdpJzWGqve_Iy57EYsEyiLfDo8Mo,8539
|
|
8
|
+
m3drop-0.4.40.dist-info/licenses/LICENSE,sha256=44Iqpp8Fc10Xzd5T7cT9UhO31Qftk3gBiCjtpwilP_k,1074
|
|
9
|
+
m3drop-0.4.40.dist-info/METADATA,sha256=rL09G9SvQ_JP_OEw-xrMNle-92XQFzn2-cHDyXkPpH4,5161
|
|
10
|
+
m3drop-0.4.40.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
11
|
+
m3drop-0.4.40.dist-info/top_level.txt,sha256=AEULFEFIgFtAwS-KBlIFoYXrqczX_rwqrEcdK46GIrA,7
|
|
12
|
+
m3drop-0.4.40.dist-info/RECORD,,
|
m3drop-0.4.38.dist-info/RECORD
DELETED
|
@@ -1,12 +0,0 @@
|
|
|
1
|
-
m3Drop/__init__.py,sha256=yaUXhUArnwgLf01Zlpqa5qm9K1aByGqQupIoCaLYiDw,2462
|
|
2
|
-
m3Drop/coreCPU.py,sha256=3kPYlSVlYrJEhRUCIoVzmR8CYBaHpxVM5nx-3YQI4d4,17204
|
|
3
|
-
m3Drop/coreGPU.py,sha256=k7A06VNgfJ59J8g1VpfKxhTIKrEbW7Bj8pTbQqHaQL8,24571
|
|
4
|
-
m3Drop/diagnosticsCPU.py,sha256=BecOKTz2GDjzjs9ycXYsyrSHi2UVgsM58RBuNE62vmU,14273
|
|
5
|
-
m3Drop/diagnosticsGPU.py,sha256=pg_G6VCk6yvSfRzISHZhTJBVvhFA07MQrJqzQ0fehtc,17893
|
|
6
|
-
m3Drop/normalizationCPU.py,sha256=4ulCrDZZjxVFh2y0i4ayPkNCsZYaOP-Xq2Dnzu9WXtg,5697
|
|
7
|
-
m3Drop/normalizationGPU.py,sha256=mHu_Or4ma6qzujGQQQ0oN3D-yoEngLAN4UTknkArRAY,8596
|
|
8
|
-
m3drop-0.4.38.dist-info/licenses/LICENSE,sha256=44Iqpp8Fc10Xzd5T7cT9UhO31Qftk3gBiCjtpwilP_k,1074
|
|
9
|
-
m3drop-0.4.38.dist-info/METADATA,sha256=Pt-cgrYSzffqxPKzzIBEURzG3SSAzeArprgfQjrX1z8,5161
|
|
10
|
-
m3drop-0.4.38.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
11
|
-
m3drop-0.4.38.dist-info/top_level.txt,sha256=AEULFEFIgFtAwS-KBlIFoYXrqczX_rwqrEcdK46GIrA,7
|
|
12
|
-
m3drop-0.4.38.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|