M3Drop 0.4.38__tar.gz → 0.4.39__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {m3drop-0.4.38 → m3drop-0.4.39/M3Drop.egg-info}/PKG-INFO +1 -1
- {m3drop-0.4.38/M3Drop.egg-info → m3drop-0.4.39}/PKG-INFO +1 -1
- {m3drop-0.4.38 → m3drop-0.4.39}/m3Drop/diagnosticsGPU.py +38 -146
- {m3drop-0.4.38 → m3drop-0.4.39}/m3Drop/normalizationGPU.py +25 -21
- {m3drop-0.4.38 → m3drop-0.4.39}/setup.py +1 -1
- {m3drop-0.4.38 → m3drop-0.4.39}/LICENSE +0 -0
- {m3drop-0.4.38 → m3drop-0.4.39}/M3Drop.egg-info/SOURCES.txt +0 -0
- {m3drop-0.4.38 → m3drop-0.4.39}/M3Drop.egg-info/dependency_links.txt +0 -0
- {m3drop-0.4.38 → m3drop-0.4.39}/M3Drop.egg-info/requires.txt +0 -0
- {m3drop-0.4.38 → m3drop-0.4.39}/M3Drop.egg-info/top_level.txt +0 -0
- {m3drop-0.4.38 → m3drop-0.4.39}/README.md +0 -0
- {m3drop-0.4.38 → m3drop-0.4.39}/m3Drop/__init__.py +0 -0
- {m3drop-0.4.38 → m3drop-0.4.39}/m3Drop/coreCPU.py +0 -0
- {m3drop-0.4.38 → m3drop-0.4.39}/m3Drop/coreGPU.py +0 -0
- {m3drop-0.4.38 → m3drop-0.4.39}/m3Drop/diagnosticsCPU.py +0 -0
- {m3drop-0.4.38 → m3drop-0.4.39}/m3Drop/normalizationCPU.py +0 -0
- {m3drop-0.4.38 → m3drop-0.4.39}/pyproject.toml +0 -0
- {m3drop-0.4.38 → m3drop-0.4.39}/setup.cfg +0 -0
|
@@ -12,7 +12,7 @@ from scipy import sparse
|
|
|
12
12
|
from scipy import stats
|
|
13
13
|
import anndata
|
|
14
14
|
|
|
15
|
-
# [GOVERNOR INTEGRATION]
|
|
15
|
+
# [GOVERNOR INTEGRATION]
|
|
16
16
|
from .coreGPU import hidden_calc_valsGPU, NBumiFitModelGPU, NBumiFitDispVsMeanGPU, get_optimal_chunk_size
|
|
17
17
|
from cupy.sparse import csr_matrix as cp_csr_matrix
|
|
18
18
|
import scipy.sparse as sp
|
|
@@ -26,27 +26,22 @@ def NBumiFitBasicModelGPU(
|
|
|
26
26
|
chunk_size: int = None
|
|
27
27
|
) -> dict:
|
|
28
28
|
"""
|
|
29
|
-
Fits a simpler, unadjusted NB model out-of-core
|
|
30
|
-
algorithm. Designed to work with a standard (cell, gene) sparse matrix.
|
|
29
|
+
Fits a simpler, unadjusted NB model out-of-core.
|
|
31
30
|
"""
|
|
32
31
|
start_time = time.perf_counter()
|
|
33
32
|
print(f"FUNCTION: NBumiFitBasicModel() | FILE: {cleaned_filename}")
|
|
34
33
|
|
|
35
|
-
# [GOVERNOR INTEGRATION] Calculate optimal chunk size if not provided
|
|
36
34
|
if chunk_size is None:
|
|
37
35
|
chunk_size = get_optimal_chunk_size(cleaned_filename, multiplier=3.0, is_dense=True)
|
|
38
36
|
|
|
39
37
|
# --- Phase 1: Initialization ---
|
|
40
|
-
print("Phase [1/2]: Initializing parameters and arrays on GPU...")
|
|
41
38
|
tjs = stats['tjs'].values
|
|
42
39
|
nc, ng = stats['nc'], stats['ng']
|
|
43
40
|
|
|
44
41
|
tjs_gpu = cp.asarray(tjs, dtype=cp.float64)
|
|
45
42
|
sum_x_sq_gpu = cp.zeros(ng, dtype=cp.float64)
|
|
46
|
-
print("Phase [1/2]: COMPLETE")
|
|
47
43
|
|
|
48
44
|
# --- Phase 2: Calculate Variance from Data Chunks ---
|
|
49
|
-
print("Phase [2/2]: Calculating variance from data chunks...")
|
|
50
45
|
with h5py.File(cleaned_filename, 'r') as f_in:
|
|
51
46
|
x_group = f_in['X']
|
|
52
47
|
h5_indptr = x_group['indptr']
|
|
@@ -58,65 +53,32 @@ def NBumiFitBasicModelGPU(
|
|
|
58
53
|
print(f"Phase [2/2]: Processing: {end_row} of {nc} cells.", end='\r')
|
|
59
54
|
|
|
60
55
|
start_idx, end_idx = h5_indptr[i], h5_indptr[end_row]
|
|
61
|
-
if start_idx == end_idx:
|
|
62
|
-
continue
|
|
56
|
+
if start_idx == end_idx: continue
|
|
63
57
|
|
|
64
|
-
#
|
|
65
|
-
|
|
58
|
+
# Original processing
|
|
59
|
+
data_slice = h5_data[start_idx:end_idx]
|
|
60
|
+
indices_slice = h5_indices[start_idx:end_idx]
|
|
61
|
+
|
|
62
|
+
data_gpu = cp.asarray(data_slice, dtype=cp.float64)
|
|
63
|
+
indices_gpu = cp.asarray(indices_slice)
|
|
64
|
+
|
|
65
|
+
cp.add.at(sum_x_sq_gpu, indices_gpu, data_gpu**2)
|
|
66
66
|
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
for sub_start in range(start_idx, end_idx, max_elements):
|
|
70
|
-
sub_end = min(sub_start + max_elements, end_idx)
|
|
71
|
-
|
|
72
|
-
data_slice = h5_data[sub_start:sub_end]
|
|
73
|
-
indices_slice = h5_indices[sub_start:sub_end]
|
|
74
|
-
|
|
75
|
-
data_gpu = cp.asarray(data_slice, dtype=cp.float64)
|
|
76
|
-
indices_gpu = cp.asarray(indices_slice)
|
|
77
|
-
|
|
78
|
-
# Accumulate the sum of squares for each gene
|
|
79
|
-
cp.add.at(sum_x_sq_gpu, indices_gpu, data_gpu**2)
|
|
80
|
-
|
|
81
|
-
# Free GPU memory
|
|
82
|
-
del data_gpu, indices_gpu
|
|
83
|
-
cp.get_default_memory_pool().free_all_blocks()
|
|
84
|
-
else:
|
|
85
|
-
# Original processing for smaller chunks
|
|
86
|
-
data_slice = h5_data[start_idx:end_idx]
|
|
87
|
-
indices_slice = h5_indices[start_idx:end_idx]
|
|
88
|
-
|
|
89
|
-
data_gpu = cp.asarray(data_slice, dtype=cp.float64)
|
|
90
|
-
indices_gpu = cp.asarray(indices_slice)
|
|
91
|
-
|
|
92
|
-
# Accumulate the sum of squares for each gene
|
|
93
|
-
cp.add.at(sum_x_sq_gpu, indices_gpu, data_gpu**2)
|
|
94
|
-
|
|
95
|
-
# Clean up
|
|
96
|
-
del data_gpu, indices_gpu
|
|
97
|
-
cp.get_default_memory_pool().free_all_blocks()
|
|
67
|
+
del data_gpu, indices_gpu
|
|
68
|
+
cp.get_default_memory_pool().free_all_blocks()
|
|
98
69
|
|
|
99
|
-
print(f"Phase [2/2]: COMPLETE
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
# Variance of raw data: Var(X) = E[X^2] - E[X]^2
|
|
106
|
-
mean_x_sq_gpu = sum_x_sq_gpu / nc
|
|
107
|
-
mean_mu_gpu = tjs_gpu / nc
|
|
108
|
-
my_rowvar_gpu = mean_x_sq_gpu - mean_mu_gpu**2
|
|
109
|
-
|
|
110
|
-
# Calculate dispersion ('size')
|
|
111
|
-
size_gpu = mean_mu_gpu**2 / (my_rowvar_gpu - mean_mu_gpu)
|
|
70
|
+
print(f"Phase [2/2]: COMPLETE{' '*50}")
|
|
71
|
+
|
|
72
|
+
mean_x_sq_gpu = sum_x_sq_gpu / nc
|
|
73
|
+
mean_mu_gpu = tjs_gpu / nc
|
|
74
|
+
my_rowvar_gpu = mean_x_sq_gpu - mean_mu_gpu**2
|
|
75
|
+
size_gpu = mean_mu_gpu**2 / (my_rowvar_gpu - mean_mu_gpu)
|
|
112
76
|
|
|
113
77
|
max_size_val = cp.nanmax(size_gpu) * 10
|
|
114
|
-
if cp.isnan(max_size_val):
|
|
115
|
-
max_size_val = 1000
|
|
78
|
+
if cp.isnan(max_size_val): max_size_val = 1000
|
|
116
79
|
size_gpu[cp.isnan(size_gpu) | (size_gpu <= 0)] = max_size_val
|
|
117
80
|
size_gpu[size_gpu < 1e-10] = 1e-10
|
|
118
81
|
|
|
119
|
-
# Move results to CPU
|
|
120
82
|
my_rowvar_cpu = my_rowvar_gpu.get()
|
|
121
83
|
sizes_cpu = size_gpu.get()
|
|
122
84
|
|
|
@@ -137,21 +99,17 @@ def NBumiCheckFitFSGPU(
|
|
|
137
99
|
plot_filename=None
|
|
138
100
|
) -> dict:
|
|
139
101
|
"""
|
|
140
|
-
Calculates
|
|
102
|
+
Calculates fit errors. [FIXED] Added clamps to prevent >1.0 probability errors.
|
|
141
103
|
"""
|
|
142
104
|
start_time = time.perf_counter()
|
|
143
105
|
print(f"FUNCTION: NBumiCheckFitFS() | FILE: {cleaned_filename}")
|
|
144
106
|
|
|
145
|
-
# [GOVERNOR INTEGRATION] Adaptive chunk sizing
|
|
146
107
|
if chunk_size is None:
|
|
147
108
|
chunk_size = get_optimal_chunk_size(cleaned_filename, multiplier=5.0, is_dense=True)
|
|
148
109
|
|
|
149
|
-
# --- Phase 1: Initialization ---
|
|
150
|
-
print("Phase [1/2]: Initializing parameters and arrays on GPU...")
|
|
151
110
|
vals = fit['vals']
|
|
152
111
|
size_coeffs = NBumiFitDispVsMeanGPU(fit, suppress_plot=True)
|
|
153
112
|
|
|
154
|
-
# Must use float64 for precision
|
|
155
113
|
tjs_gpu = cp.asarray(vals['tjs'].values, dtype=cp.float64)
|
|
156
114
|
tis_gpu = cp.asarray(vals['tis'].values, dtype=cp.float64)
|
|
157
115
|
total = vals['total']
|
|
@@ -161,76 +119,41 @@ def NBumiCheckFitFSGPU(
|
|
|
161
119
|
mean_expression_gpu = tjs_gpu / nc
|
|
162
120
|
log_mean_expression_gpu = cp.log(mean_expression_gpu)
|
|
163
121
|
smoothed_size_gpu = cp.exp(size_coeffs[0] + size_coeffs[1] * log_mean_expression_gpu)
|
|
122
|
+
|
|
123
|
+
# [FIX] Clamp smoothed size to prevent instability
|
|
124
|
+
smoothed_size_gpu = cp.maximum(smoothed_size_gpu, 1e-8)
|
|
164
125
|
|
|
165
|
-
# Initialize result arrays
|
|
166
126
|
row_ps_gpu = cp.zeros(ng, dtype=cp.float64)
|
|
167
127
|
col_ps_gpu = cp.zeros(nc, dtype=cp.float64)
|
|
168
|
-
print("Phase [1/2]: COMPLETE")
|
|
169
128
|
|
|
170
|
-
# --- Phase 2: Calculate Expected Dropouts ---
|
|
171
|
-
print(f"Phase [2/2]: Calculating expected dropouts (Chunk: {chunk_size})...")
|
|
172
|
-
|
|
173
129
|
for i in range(0, nc, chunk_size):
|
|
174
130
|
end_col = min(i + chunk_size, nc)
|
|
175
131
|
print(f"Phase [2/2]: Processing: {end_col} of {nc} cells.", end='\r')
|
|
176
132
|
|
|
177
133
|
tis_chunk_gpu = tis_gpu[i:end_col]
|
|
178
|
-
|
|
179
|
-
# Standard calculation without errstate
|
|
180
134
|
mu_chunk_gpu = tjs_gpu[:, cp.newaxis] * tis_chunk_gpu[cp.newaxis, :] / total
|
|
181
135
|
|
|
182
|
-
#
|
|
136
|
+
# [FIX] Safer power calculation
|
|
183
137
|
base = 1 + mu_chunk_gpu / smoothed_size_gpu[:, cp.newaxis]
|
|
184
138
|
p_is_chunk_gpu = cp.power(base, -smoothed_size_gpu[:, cp.newaxis])
|
|
185
139
|
|
|
186
|
-
#
|
|
187
|
-
p_is_chunk_gpu = cp.
|
|
140
|
+
# [FIX] Clamp probabilities to valid range [0, 1]
|
|
141
|
+
p_is_chunk_gpu = cp.clip(p_is_chunk_gpu, 0.0, 1.0)
|
|
142
|
+
p_is_chunk_gpu = cp.nan_to_num(p_is_chunk_gpu, nan=0.0)
|
|
188
143
|
|
|
189
|
-
# Sum results
|
|
190
144
|
row_ps_gpu += p_is_chunk_gpu.sum(axis=1)
|
|
191
145
|
col_ps_gpu[i:end_col] = p_is_chunk_gpu.sum(axis=0)
|
|
192
146
|
|
|
193
|
-
# Clean up
|
|
194
147
|
del mu_chunk_gpu, p_is_chunk_gpu, base, tis_chunk_gpu
|
|
195
148
|
cp.get_default_memory_pool().free_all_blocks()
|
|
196
149
|
|
|
197
150
|
print(f"Phase [2/2]: COMPLETE{' ' * 50}")
|
|
198
151
|
|
|
199
|
-
# Move results to CPU
|
|
200
152
|
row_ps_cpu = row_ps_gpu.get()
|
|
201
153
|
col_ps_cpu = col_ps_gpu.get()
|
|
202
154
|
djs_cpu = vals['djs'].values
|
|
203
155
|
dis_cpu = vals['dis'].values
|
|
204
156
|
|
|
205
|
-
# Plotting
|
|
206
|
-
if not suppress_plot:
|
|
207
|
-
plt.figure(figsize=(12, 5))
|
|
208
|
-
plt.subplot(1, 2, 1)
|
|
209
|
-
plt.scatter(djs_cpu, row_ps_cpu, alpha=0.5, s=10)
|
|
210
|
-
plt.title("Gene-specific Dropouts (Smoothed)")
|
|
211
|
-
plt.xlabel("Observed")
|
|
212
|
-
plt.ylabel("Fit")
|
|
213
|
-
lims = [min(plt.xlim()[0], plt.ylim()[0]), max(plt.xlim()[1], plt.ylim()[1])]
|
|
214
|
-
plt.plot(lims, lims, 'r-', alpha=0.75, zorder=0, label="y=x line")
|
|
215
|
-
plt.grid(True); plt.legend()
|
|
216
|
-
|
|
217
|
-
plt.subplot(1, 2, 2)
|
|
218
|
-
plt.scatter(dis_cpu, col_ps_cpu, alpha=0.5, s=10)
|
|
219
|
-
plt.title("Cell-specific Dropouts (Smoothed)")
|
|
220
|
-
plt.xlabel("Observed")
|
|
221
|
-
plt.ylabel("Expected")
|
|
222
|
-
lims = [min(plt.xlim()[0], plt.ylim()[0]), max(plt.xlim()[1], plt.ylim()[1])]
|
|
223
|
-
plt.plot(lims, lims, 'r-', alpha=0.75, zorder=0, label="y=x line")
|
|
224
|
-
plt.grid(True); plt.legend()
|
|
225
|
-
|
|
226
|
-
plt.tight_layout()
|
|
227
|
-
if plot_filename:
|
|
228
|
-
plt.savefig(plot_filename, dpi=300, bbox_inches='tight')
|
|
229
|
-
print(f"STATUS: Diagnostic plot saved to '{plot_filename}'")
|
|
230
|
-
plt.show()
|
|
231
|
-
plt.close()
|
|
232
|
-
|
|
233
|
-
# Calculate errors
|
|
234
157
|
gene_error = np.sum((djs_cpu - row_ps_cpu)**2)
|
|
235
158
|
cell_error = np.sum((dis_cpu - col_ps_cpu)**2)
|
|
236
159
|
|
|
@@ -245,7 +168,7 @@ def NBumiCheckFitFSGPU(
|
|
|
245
168
|
}
|
|
246
169
|
|
|
247
170
|
def NBumiCompareModelsGPU(
|
|
248
|
-
raw_filename: str,
|
|
171
|
+
raw_filename: str,
|
|
249
172
|
cleaned_filename: str,
|
|
250
173
|
stats: dict,
|
|
251
174
|
fit_adjust: dict,
|
|
@@ -255,23 +178,19 @@ def NBumiCompareModelsGPU(
|
|
|
255
178
|
) -> dict:
|
|
256
179
|
"""
|
|
257
180
|
OPTIMIZED VERSION (IN-MEMORY):
|
|
258
|
-
-
|
|
259
|
-
- Performs depth normalization and variance calculation on-the-fly in GPU VRAM.
|
|
260
|
-
- PRESERVED SCIENTIFIC LOGIC: Var(X) = E[X^2] - (E[X])^2 on normalized data.
|
|
181
|
+
- Calculates Basic Fit without writing 46GB file.
|
|
261
182
|
"""
|
|
262
183
|
pipeline_start_time = time.time()
|
|
263
184
|
print(f"FUNCTION: NBumiCompareModels() | Comparing models for {cleaned_filename}")
|
|
264
185
|
|
|
265
|
-
# [GOVERNOR] High multiplier (12.0) because we hold Raw + Norm + Square in VRAM
|
|
266
186
|
if chunk_size is None:
|
|
267
187
|
chunk_size = get_optimal_chunk_size(cleaned_filename, multiplier=12.0, is_dense=False)
|
|
268
188
|
|
|
269
|
-
# --- Phase 1: In-Memory "Basic Fit" (Normalization + Variance) ---
|
|
270
189
|
print("Phase [1/3]: Calculating Basic Model (Depth-Normalized) variance on-the-fly...")
|
|
271
190
|
|
|
272
|
-
# 1. Prepare Size Factors
|
|
273
|
-
tjs = stats['tjs'].values
|
|
274
|
-
tis = stats['tis'].values
|
|
191
|
+
# 1. Prepare Size Factors
|
|
192
|
+
tjs = stats['tjs'].values
|
|
193
|
+
tis = stats['tis'].values
|
|
275
194
|
nc, ng = stats['nc'], stats['ng']
|
|
276
195
|
|
|
277
196
|
median_sum = np.median(tis[tis > 0])
|
|
@@ -279,12 +198,9 @@ def NBumiCompareModelsGPU(
|
|
|
279
198
|
non_zero_mask = tis > 0
|
|
280
199
|
size_factors[non_zero_mask] = tis[non_zero_mask] / median_sum
|
|
281
200
|
|
|
282
|
-
# 2. Prepare GPU Arrays
|
|
283
201
|
sum_x_sq_gpu = cp.zeros(ng, dtype=cp.float64)
|
|
284
|
-
sum_x_gpu = cp.zeros(ng, dtype=cp.float64)
|
|
202
|
+
sum_x_gpu = cp.zeros(ng, dtype=cp.float64)
|
|
285
203
|
|
|
286
|
-
# 3. GPU Loop (Raw Data -> Normalize -> Accumulate)
|
|
287
|
-
# CRITICAL: We read CLEANED_FILENAME to ensure indices match 'stats'
|
|
288
204
|
with h5py.File(cleaned_filename, 'r') as f_in:
|
|
289
205
|
h5_indptr = f_in['X']['indptr']
|
|
290
206
|
h5_data = f_in['X']['data']
|
|
@@ -297,73 +213,52 @@ def NBumiCompareModelsGPU(
|
|
|
297
213
|
start_idx, end_idx = h5_indptr[i], h5_indptr[end_row]
|
|
298
214
|
if start_idx == end_idx: continue
|
|
299
215
|
|
|
300
|
-
# Load Raw Chunk
|
|
301
216
|
data_gpu = cp.asarray(h5_data[start_idx:end_idx], dtype=cp.float32)
|
|
302
217
|
indices_gpu = cp.asarray(h5_indices[start_idx:end_idx])
|
|
303
218
|
indptr_gpu = cp.asarray(h5_indptr[i:end_row + 1] - start_idx)
|
|
304
219
|
|
|
305
|
-
# Expand Size Factors to match Data Structure
|
|
306
220
|
nnz_in_chunk = indptr_gpu[-1].item()
|
|
307
221
|
cell_boundary_markers = cp.zeros(nnz_in_chunk, dtype=cp.int32)
|
|
308
222
|
if len(indptr_gpu) > 1:
|
|
309
223
|
cell_boundary_markers[indptr_gpu[:-1]] = 1
|
|
310
|
-
# row_indices maps every data point to its cell index (0 to chunk_size)
|
|
311
224
|
row_indices = cp.cumsum(cell_boundary_markers, axis=0) - 1
|
|
312
225
|
|
|
313
|
-
# Get size factors for this chunk
|
|
314
226
|
sf_chunk = cp.asarray(size_factors[i:end_row])
|
|
315
227
|
|
|
316
|
-
#
|
|
317
|
-
# data_norm = data_raw / size_factor
|
|
228
|
+
# Normalize
|
|
318
229
|
data_gpu /= sf_chunk[row_indices]
|
|
319
230
|
|
|
320
|
-
# Accumulate
|
|
231
|
+
# Accumulate
|
|
321
232
|
cp.add.at(sum_x_sq_gpu, indices_gpu, data_gpu**2)
|
|
322
233
|
cp.add.at(sum_x_gpu, indices_gpu, data_gpu)
|
|
323
234
|
|
|
324
|
-
# Clean up VRAM
|
|
325
235
|
del data_gpu, indices_gpu, indptr_gpu, row_indices, sf_chunk, cell_boundary_markers
|
|
326
236
|
cp.get_default_memory_pool().free_all_blocks()
|
|
327
237
|
|
|
328
238
|
print(f"Phase [1/3]: COMPLETE{' '*50}")
|
|
329
239
|
|
|
330
|
-
# 4. Finalize Basic Statistics
|
|
331
|
-
# Var(X) = E[X^2] - (E[X])^2
|
|
332
240
|
mean_x_sq_gpu = sum_x_sq_gpu / nc
|
|
333
241
|
mean_mu_gpu = sum_x_gpu / nc
|
|
334
242
|
my_rowvar_gpu = mean_x_sq_gpu - mean_mu_gpu**2
|
|
335
243
|
|
|
336
|
-
# Dispersion = Mean^2 / (Var - Mean)
|
|
337
244
|
size_gpu = mean_mu_gpu**2 / (my_rowvar_gpu - mean_mu_gpu)
|
|
338
245
|
|
|
339
|
-
# Safety Clamping
|
|
340
246
|
max_size_val = cp.nanmax(size_gpu) * 10
|
|
341
247
|
if cp.isnan(max_size_val): max_size_val = 1000
|
|
342
248
|
size_gpu[cp.isnan(size_gpu) | (size_gpu <= 0)] = max_size_val
|
|
343
249
|
size_gpu[size_gpu < 1e-10] = 1e-10
|
|
344
250
|
|
|
345
|
-
# Construct "Basic Fit" Object
|
|
346
251
|
fit_basic = {
|
|
347
252
|
'sizes': pd.Series(size_gpu.get(), index=stats['tjs'].index),
|
|
348
253
|
'vals': stats,
|
|
349
254
|
'var_obs': pd.Series(my_rowvar_gpu.get(), index=stats['tjs'].index)
|
|
350
255
|
}
|
|
351
256
|
|
|
352
|
-
# --- Phase 2: Check Fit (Calculate Errors) ---
|
|
353
257
|
print("Phase [2/3]: Evaluating fit errors on ORIGINAL data...")
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
check_adjust = NBumiCheckFitFSGPU(
|
|
357
|
-
cleaned_filename, fit_adjust, suppress_plot=True
|
|
358
|
-
)
|
|
359
|
-
|
|
360
|
-
# Check Basic (Depth-Norm) - uses its own governor
|
|
361
|
-
check_basic = NBumiCheckFitFSGPU(
|
|
362
|
-
cleaned_filename, fit_basic, suppress_plot=True
|
|
363
|
-
)
|
|
258
|
+
check_adjust = NBumiCheckFitFSGPU(cleaned_filename, fit_adjust, suppress_plot=True)
|
|
259
|
+
check_basic = NBumiCheckFitFSGPU(cleaned_filename, fit_basic, suppress_plot=True)
|
|
364
260
|
print("Phase [2/3]: COMPLETE")
|
|
365
261
|
|
|
366
|
-
# --- Phase 3: Plotting & Comparison ---
|
|
367
262
|
print("Phase [3/3]: Generating comparison...")
|
|
368
263
|
|
|
369
264
|
nc_data = stats['nc']
|
|
@@ -427,13 +322,11 @@ def NBumiPlotDispVsMeanGPU(
|
|
|
427
322
|
"""
|
|
428
323
|
print("FUNCTION: NBumiPlotDispVsMean()")
|
|
429
324
|
|
|
430
|
-
# --- 1. Extract data and regression coefficients ---
|
|
431
325
|
mean_expression = fit['vals']['tjs'].values / fit['vals']['nc']
|
|
432
326
|
sizes = fit['sizes'].values
|
|
433
327
|
coeffs = NBumiFitDispVsMeanGPU(fit, suppress_plot=True)
|
|
434
328
|
intercept, slope = coeffs[0], coeffs[1]
|
|
435
329
|
|
|
436
|
-
# --- 2. Calculate the fitted line for plotting ---
|
|
437
330
|
log_mean_expr_range = np.linspace(
|
|
438
331
|
np.log(mean_expression[mean_expression > 0].min()),
|
|
439
332
|
np.log(mean_expression.max()),
|
|
@@ -442,7 +335,6 @@ def NBumiPlotDispVsMeanGPU(
|
|
|
442
335
|
log_fitted_sizes = intercept + slope * log_mean_expr_range
|
|
443
336
|
fitted_sizes = np.exp(log_fitted_sizes)
|
|
444
337
|
|
|
445
|
-
# --- 3. Create the plot ---
|
|
446
338
|
plt.figure(figsize=(8, 6))
|
|
447
339
|
plt.scatter(mean_expression, sizes, label='Observed Dispersion', alpha=0.5, s=8)
|
|
448
340
|
plt.plot(np.exp(log_mean_expr_range), fitted_sizes, color='red', label='Regression Fit', linewidth=2)
|
|
@@ -24,9 +24,7 @@ def NBumiPearsonResidualsGPU(
|
|
|
24
24
|
start_time = time.perf_counter()
|
|
25
25
|
print(f"FUNCTION: NBumiPearsonResiduals() | FILE: {cleaned_filename}")
|
|
26
26
|
|
|
27
|
-
#
|
|
28
|
-
# Multiplier 10.0 (Was 6.0): Accounts for Float64 precision (8 bytes) vs Governor default (4 bytes).
|
|
29
|
-
# 4 matrices * 8 bytes = 32 bytes/cell. Governor 10 * 4 = 40 bytes. Safe buffer established.
|
|
27
|
+
# Governor for Processing (RAM/VRAM)
|
|
30
28
|
chunk_size = get_optimal_chunk_size(cleaned_filename, multiplier=10.0, is_dense=True)
|
|
31
29
|
|
|
32
30
|
# --- Phase 1: Initialization ---
|
|
@@ -45,16 +43,29 @@ def NBumiPearsonResidualsGPU(
|
|
|
45
43
|
tis_gpu = cupy.asarray(tis, dtype=cupy.float64)
|
|
46
44
|
sizes_gpu = cupy.asarray(sizes, dtype=cupy.float64)
|
|
47
45
|
|
|
48
|
-
# Create Output H5
|
|
46
|
+
# Create Output H5
|
|
49
47
|
adata_in = anndata.read_h5ad(cleaned_filename, backed='r')
|
|
50
48
|
adata_out = anndata.AnnData(obs=adata_in.obs, var=adata_in.var)
|
|
51
49
|
adata_out.write_h5ad(output_filename, compression="gzip")
|
|
52
50
|
|
|
51
|
+
# [FIX] Calculate Safe Storage Chunk Size (~1GB)
|
|
52
|
+
# HDF5 limit is 4GB. You requested 1GB for optimal speed.
|
|
53
|
+
bytes_per_row = ng * 4 # float32
|
|
54
|
+
target_bytes = 1_000_000_000 # 1GB
|
|
55
|
+
storage_chunk_rows = int(target_bytes / bytes_per_row)
|
|
56
|
+
|
|
57
|
+
if storage_chunk_rows < 1: storage_chunk_rows = 1
|
|
58
|
+
# Note: It is okay if storage_chunk > processing_chunk (HDF5 handles this),
|
|
59
|
+
# but strictly it must be < 4GB total size.
|
|
60
|
+
|
|
61
|
+
print(f" > Processing Chunk: {chunk_size} rows (RAM)")
|
|
62
|
+
print(f" > Storage Chunk: {storage_chunk_rows} rows (Disk - 1GB Target)")
|
|
63
|
+
|
|
53
64
|
with h5py.File(output_filename, 'a') as f_out:
|
|
54
65
|
if 'X' in f_out:
|
|
55
66
|
del f_out['X']
|
|
56
|
-
# Create dataset
|
|
57
|
-
out_x = f_out.create_dataset('X', shape=(nc, ng), chunks=(
|
|
67
|
+
# Create dataset with SAFE chunks (Fixes the ValueError)
|
|
68
|
+
out_x = f_out.create_dataset('X', shape=(nc, ng), chunks=(storage_chunk_rows, ng), dtype='float32')
|
|
58
69
|
|
|
59
70
|
print("Phase [1/2]: COMPLETE")
|
|
60
71
|
|
|
@@ -77,7 +88,6 @@ def NBumiPearsonResidualsGPU(
|
|
|
77
88
|
indptr_slice = h5_indptr[i:end_row+1] - h5_indptr[i]
|
|
78
89
|
|
|
79
90
|
# Convert to Dense GPU Matrix
|
|
80
|
-
# We construct sparse first, then densify on GPU to save bandwidth
|
|
81
91
|
counts_chunk_sparse_gpu = cp_csr_matrix((
|
|
82
92
|
cupy.asarray(data_slice, dtype=cupy.float64),
|
|
83
93
|
cupy.asarray(indices_slice),
|
|
@@ -91,24 +101,18 @@ def NBumiPearsonResidualsGPU(
|
|
|
91
101
|
mus_chunk_gpu = tjs_gpu[cupy.newaxis, :] * tis_chunk_gpu[:, cupy.newaxis] / total
|
|
92
102
|
|
|
93
103
|
denominator_gpu = cupy.sqrt(mus_chunk_gpu + mus_chunk_gpu**2 / sizes_gpu[cupy.newaxis, :])
|
|
94
|
-
|
|
95
|
-
# --- LOGIC RESTORED: Prevent Division by Zero ---
|
|
96
104
|
denominator_gpu = cupy.where(denominator_gpu == 0, 1, denominator_gpu)
|
|
97
105
|
|
|
98
|
-
# (Counts - Mu) / Sqrt(V)
|
|
99
106
|
pearson_chunk_gpu = (counts_chunk_dense_gpu - mus_chunk_gpu) / denominator_gpu
|
|
100
107
|
|
|
101
108
|
# Write to Disk
|
|
102
|
-
# [OPTIMIZATION] Cast to float32 on GPU to halve PCIe transfer time
|
|
103
109
|
out_x[i:end_row, :] = pearson_chunk_gpu.astype(cupy.float32).get()
|
|
104
110
|
|
|
105
|
-
# Cleanup
|
|
106
111
|
del counts_chunk_dense_gpu, counts_chunk_sparse_gpu, mus_chunk_gpu, pearson_chunk_gpu, denominator_gpu
|
|
107
112
|
cupy.get_default_memory_pool().free_all_blocks()
|
|
108
113
|
|
|
109
114
|
print(f"Phase [2/2]: COMPLETE{' '*50}")
|
|
110
115
|
|
|
111
|
-
# --- LOGIC RESTORED: Explicit File Cleanup ---
|
|
112
116
|
if hasattr(adata_in, "file") and adata_in.file is not None:
|
|
113
117
|
adata_in.file.close()
|
|
114
118
|
|
|
@@ -127,8 +131,6 @@ def NBumiPearsonResidualsApproxGPU(
|
|
|
127
131
|
start_time = time.perf_counter()
|
|
128
132
|
print(f"FUNCTION: NBumiPearsonResidualsApprox() | FILE: {cleaned_filename}")
|
|
129
133
|
|
|
130
|
-
# --- HANDSHAKE ---
|
|
131
|
-
# Multiplier 10.0: Same safety logic as Full residuals.
|
|
132
134
|
chunk_size = get_optimal_chunk_size(cleaned_filename, multiplier=10.0, is_dense=True)
|
|
133
135
|
|
|
134
136
|
# --- Phase 1: Initialization ---
|
|
@@ -150,10 +152,17 @@ def NBumiPearsonResidualsApproxGPU(
|
|
|
150
152
|
adata_out = anndata.AnnData(obs=adata_in.obs, var=adata_in.var)
|
|
151
153
|
adata_out.write_h5ad(output_filename, compression="gzip")
|
|
152
154
|
|
|
155
|
+
# [FIX] Calculate Safe Storage Chunk Size (~1GB)
|
|
156
|
+
bytes_per_row = ng * 4
|
|
157
|
+
target_bytes = 1_000_000_000 # 1GB
|
|
158
|
+
storage_chunk_rows = int(target_bytes / bytes_per_row)
|
|
159
|
+
if storage_chunk_rows < 1: storage_chunk_rows = 1
|
|
160
|
+
|
|
153
161
|
with h5py.File(output_filename, 'a') as f_out:
|
|
154
162
|
if 'X' in f_out:
|
|
155
163
|
del f_out['X']
|
|
156
|
-
|
|
164
|
+
# Create dataset with SAFE chunks
|
|
165
|
+
out_x = f_out.create_dataset('X', shape=(nc, ng), chunks=(storage_chunk_rows, ng), dtype='float32')
|
|
157
166
|
|
|
158
167
|
print("Phase [1/2]: COMPLETE")
|
|
159
168
|
|
|
@@ -185,15 +194,11 @@ def NBumiPearsonResidualsApproxGPU(
|
|
|
185
194
|
tis_chunk_gpu = tis_gpu[i:end_row]
|
|
186
195
|
mus_chunk_gpu = tjs_gpu[cupy.newaxis, :] * tis_chunk_gpu[:, cupy.newaxis] / total
|
|
187
196
|
|
|
188
|
-
# Approx: Denom = Sqrt(Mu)
|
|
189
197
|
denominator_gpu = cupy.sqrt(mus_chunk_gpu)
|
|
190
|
-
|
|
191
|
-
# --- LOGIC RESTORED: Prevent Division by Zero ---
|
|
192
198
|
denominator_gpu = cupy.where(denominator_gpu == 0, 1, denominator_gpu)
|
|
193
199
|
|
|
194
200
|
pearson_chunk_gpu = (counts_chunk_dense_gpu - mus_chunk_gpu) / denominator_gpu
|
|
195
201
|
|
|
196
|
-
# [OPTIMIZATION] Cast to float32 on GPU to halve PCIe transfer time
|
|
197
202
|
out_x[i:end_row, :] = pearson_chunk_gpu.astype(cupy.float32).get()
|
|
198
203
|
|
|
199
204
|
del counts_chunk_dense_gpu, counts_chunk_sparse_gpu, mus_chunk_gpu, pearson_chunk_gpu, denominator_gpu
|
|
@@ -201,7 +206,6 @@ def NBumiPearsonResidualsApproxGPU(
|
|
|
201
206
|
|
|
202
207
|
print(f"Phase [2/2]: COMPLETE{' '*50}")
|
|
203
208
|
|
|
204
|
-
# --- LOGIC RESTORED: Explicit File Cleanup ---
|
|
205
209
|
if hasattr(adata_in, "file") and adata_in.file is not None:
|
|
206
210
|
adata_in.file.close()
|
|
207
211
|
|
|
@@ -5,7 +5,7 @@ with open("README.md", "r", encoding="utf-8") as fh:
|
|
|
5
5
|
|
|
6
6
|
setuptools.setup(
|
|
7
7
|
name="M3Drop",
|
|
8
|
-
version="0.4.
|
|
8
|
+
version="0.4.39", # Version bump
|
|
9
9
|
author="Tallulah Andrews",
|
|
10
10
|
author_email="tandrew6@uwo.ca",
|
|
11
11
|
description="A Python implementation of the M3Drop single-cell RNA-seq analysis tool.",
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|