M3Drop 0.4.38__tar.gz → 0.4.39__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: M3Drop
3
- Version: 0.4.38
3
+ Version: 0.4.39
4
4
  Summary: A Python implementation of the M3Drop single-cell RNA-seq analysis tool.
5
5
  Home-page: https://github.com/PragalvhaSharma/m3DropNew
6
6
  Author: Tallulah Andrews
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: M3Drop
3
- Version: 0.4.38
3
+ Version: 0.4.39
4
4
  Summary: A Python implementation of the M3Drop single-cell RNA-seq analysis tool.
5
5
  Home-page: https://github.com/PragalvhaSharma/m3DropNew
6
6
  Author: Tallulah Andrews
@@ -12,7 +12,7 @@ from scipy import sparse
12
12
  from scipy import stats
13
13
  import anndata
14
14
 
15
- # [GOVERNOR INTEGRATION]
15
+ # [GOVERNOR INTEGRATION]
16
16
  from .coreGPU import hidden_calc_valsGPU, NBumiFitModelGPU, NBumiFitDispVsMeanGPU, get_optimal_chunk_size
17
17
  from cupy.sparse import csr_matrix as cp_csr_matrix
18
18
  import scipy.sparse as sp
@@ -26,27 +26,22 @@ def NBumiFitBasicModelGPU(
26
26
  chunk_size: int = None
27
27
  ) -> dict:
28
28
  """
29
- Fits a simpler, unadjusted NB model out-of-core using a GPU-accelerated
30
- algorithm. Designed to work with a standard (cell, gene) sparse matrix.
29
+ Fits a simpler, unadjusted NB model out-of-core.
31
30
  """
32
31
  start_time = time.perf_counter()
33
32
  print(f"FUNCTION: NBumiFitBasicModel() | FILE: {cleaned_filename}")
34
33
 
35
- # [GOVERNOR INTEGRATION] Calculate optimal chunk size if not provided
36
34
  if chunk_size is None:
37
35
  chunk_size = get_optimal_chunk_size(cleaned_filename, multiplier=3.0, is_dense=True)
38
36
 
39
37
  # --- Phase 1: Initialization ---
40
- print("Phase [1/2]: Initializing parameters and arrays on GPU...")
41
38
  tjs = stats['tjs'].values
42
39
  nc, ng = stats['nc'], stats['ng']
43
40
 
44
41
  tjs_gpu = cp.asarray(tjs, dtype=cp.float64)
45
42
  sum_x_sq_gpu = cp.zeros(ng, dtype=cp.float64)
46
- print("Phase [1/2]: COMPLETE")
47
43
 
48
44
  # --- Phase 2: Calculate Variance from Data Chunks ---
49
- print("Phase [2/2]: Calculating variance from data chunks...")
50
45
  with h5py.File(cleaned_filename, 'r') as f_in:
51
46
  x_group = f_in['X']
52
47
  h5_indptr = x_group['indptr']
@@ -58,65 +53,32 @@ def NBumiFitBasicModelGPU(
58
53
  print(f"Phase [2/2]: Processing: {end_row} of {nc} cells.", end='\r')
59
54
 
60
55
  start_idx, end_idx = h5_indptr[i], h5_indptr[end_row]
61
- if start_idx == end_idx:
62
- continue
56
+ if start_idx == end_idx: continue
63
57
 
64
- # Process in smaller sub-chunks if needed
65
- max_elements = 5_000_000 # Process max 5M elements at a time
58
+ # Original processing
59
+ data_slice = h5_data[start_idx:end_idx]
60
+ indices_slice = h5_indices[start_idx:end_idx]
61
+
62
+ data_gpu = cp.asarray(data_slice, dtype=cp.float64)
63
+ indices_gpu = cp.asarray(indices_slice)
64
+
65
+ cp.add.at(sum_x_sq_gpu, indices_gpu, data_gpu**2)
66
66
 
67
- if end_idx - start_idx > max_elements:
68
- # Process in sub-chunks
69
- for sub_start in range(start_idx, end_idx, max_elements):
70
- sub_end = min(sub_start + max_elements, end_idx)
71
-
72
- data_slice = h5_data[sub_start:sub_end]
73
- indices_slice = h5_indices[sub_start:sub_end]
74
-
75
- data_gpu = cp.asarray(data_slice, dtype=cp.float64)
76
- indices_gpu = cp.asarray(indices_slice)
77
-
78
- # Accumulate the sum of squares for each gene
79
- cp.add.at(sum_x_sq_gpu, indices_gpu, data_gpu**2)
80
-
81
- # Free GPU memory
82
- del data_gpu, indices_gpu
83
- cp.get_default_memory_pool().free_all_blocks()
84
- else:
85
- # Original processing for smaller chunks
86
- data_slice = h5_data[start_idx:end_idx]
87
- indices_slice = h5_indices[start_idx:end_idx]
88
-
89
- data_gpu = cp.asarray(data_slice, dtype=cp.float64)
90
- indices_gpu = cp.asarray(indices_slice)
91
-
92
- # Accumulate the sum of squares for each gene
93
- cp.add.at(sum_x_sq_gpu, indices_gpu, data_gpu**2)
94
-
95
- # Clean up
96
- del data_gpu, indices_gpu
97
- cp.get_default_memory_pool().free_all_blocks()
67
+ del data_gpu, indices_gpu
68
+ cp.get_default_memory_pool().free_all_blocks()
98
69
 
99
- print(f"Phase [2/2]: COMPLETE ")
100
-
101
- # --- Final calculations on GPU ---
102
- if is_logged:
103
- raise NotImplementedError("Logged data variance calculation is not implemented for out-of-core.")
104
- else:
105
- # Variance of raw data: Var(X) = E[X^2] - E[X]^2
106
- mean_x_sq_gpu = sum_x_sq_gpu / nc
107
- mean_mu_gpu = tjs_gpu / nc
108
- my_rowvar_gpu = mean_x_sq_gpu - mean_mu_gpu**2
109
-
110
- # Calculate dispersion ('size')
111
- size_gpu = mean_mu_gpu**2 / (my_rowvar_gpu - mean_mu_gpu)
70
+ print(f"Phase [2/2]: COMPLETE{' '*50}")
71
+
72
+ mean_x_sq_gpu = sum_x_sq_gpu / nc
73
+ mean_mu_gpu = tjs_gpu / nc
74
+ my_rowvar_gpu = mean_x_sq_gpu - mean_mu_gpu**2
75
+ size_gpu = mean_mu_gpu**2 / (my_rowvar_gpu - mean_mu_gpu)
112
76
 
113
77
  max_size_val = cp.nanmax(size_gpu) * 10
114
- if cp.isnan(max_size_val):
115
- max_size_val = 1000
78
+ if cp.isnan(max_size_val): max_size_val = 1000
116
79
  size_gpu[cp.isnan(size_gpu) | (size_gpu <= 0)] = max_size_val
117
80
  size_gpu[size_gpu < 1e-10] = 1e-10
118
81
 
119
- # Move results to CPU
120
82
  my_rowvar_cpu = my_rowvar_gpu.get()
121
83
  sizes_cpu = size_gpu.get()
122
84
 
@@ -137,21 +99,17 @@ def NBumiCheckFitFSGPU(
137
99
  plot_filename=None
138
100
  ) -> dict:
139
101
  """
140
- Calculates the fit errors (gene_error, cell_error) for a given model.
102
+ Calculates fit errors. [FIXED] Added clamps to prevent >1.0 probability errors.
141
103
  """
142
104
  start_time = time.perf_counter()
143
105
  print(f"FUNCTION: NBumiCheckFitFS() | FILE: {cleaned_filename}")
144
106
 
145
- # [GOVERNOR INTEGRATION] Adaptive chunk sizing
146
107
  if chunk_size is None:
147
108
  chunk_size = get_optimal_chunk_size(cleaned_filename, multiplier=5.0, is_dense=True)
148
109
 
149
- # --- Phase 1: Initialization ---
150
- print("Phase [1/2]: Initializing parameters and arrays on GPU...")
151
110
  vals = fit['vals']
152
111
  size_coeffs = NBumiFitDispVsMeanGPU(fit, suppress_plot=True)
153
112
 
154
- # Must use float64 for precision
155
113
  tjs_gpu = cp.asarray(vals['tjs'].values, dtype=cp.float64)
156
114
  tis_gpu = cp.asarray(vals['tis'].values, dtype=cp.float64)
157
115
  total = vals['total']
@@ -161,76 +119,41 @@ def NBumiCheckFitFSGPU(
161
119
  mean_expression_gpu = tjs_gpu / nc
162
120
  log_mean_expression_gpu = cp.log(mean_expression_gpu)
163
121
  smoothed_size_gpu = cp.exp(size_coeffs[0] + size_coeffs[1] * log_mean_expression_gpu)
122
+
123
+ # [FIX] Clamp smoothed size to prevent instability
124
+ smoothed_size_gpu = cp.maximum(smoothed_size_gpu, 1e-8)
164
125
 
165
- # Initialize result arrays
166
126
  row_ps_gpu = cp.zeros(ng, dtype=cp.float64)
167
127
  col_ps_gpu = cp.zeros(nc, dtype=cp.float64)
168
- print("Phase [1/2]: COMPLETE")
169
128
 
170
- # --- Phase 2: Calculate Expected Dropouts ---
171
- print(f"Phase [2/2]: Calculating expected dropouts (Chunk: {chunk_size})...")
172
-
173
129
  for i in range(0, nc, chunk_size):
174
130
  end_col = min(i + chunk_size, nc)
175
131
  print(f"Phase [2/2]: Processing: {end_col} of {nc} cells.", end='\r')
176
132
 
177
133
  tis_chunk_gpu = tis_gpu[i:end_col]
178
-
179
- # Standard calculation without errstate
180
134
  mu_chunk_gpu = tjs_gpu[:, cp.newaxis] * tis_chunk_gpu[cp.newaxis, :] / total
181
135
 
182
- # Calculate p_is directly - CuPy handles overflow internally
136
+ # [FIX] Safer power calculation
183
137
  base = 1 + mu_chunk_gpu / smoothed_size_gpu[:, cp.newaxis]
184
138
  p_is_chunk_gpu = cp.power(base, -smoothed_size_gpu[:, cp.newaxis])
185
139
 
186
- # Handle any inf/nan values that might have occurred
187
- p_is_chunk_gpu = cp.nan_to_num(p_is_chunk_gpu, nan=0.0, posinf=1.0, neginf=0.0)
140
+ # [FIX] Clamp probabilities to valid range [0, 1]
141
+ p_is_chunk_gpu = cp.clip(p_is_chunk_gpu, 0.0, 1.0)
142
+ p_is_chunk_gpu = cp.nan_to_num(p_is_chunk_gpu, nan=0.0)
188
143
 
189
- # Sum results
190
144
  row_ps_gpu += p_is_chunk_gpu.sum(axis=1)
191
145
  col_ps_gpu[i:end_col] = p_is_chunk_gpu.sum(axis=0)
192
146
 
193
- # Clean up
194
147
  del mu_chunk_gpu, p_is_chunk_gpu, base, tis_chunk_gpu
195
148
  cp.get_default_memory_pool().free_all_blocks()
196
149
 
197
150
  print(f"Phase [2/2]: COMPLETE{' ' * 50}")
198
151
 
199
- # Move results to CPU
200
152
  row_ps_cpu = row_ps_gpu.get()
201
153
  col_ps_cpu = col_ps_gpu.get()
202
154
  djs_cpu = vals['djs'].values
203
155
  dis_cpu = vals['dis'].values
204
156
 
205
- # Plotting
206
- if not suppress_plot:
207
- plt.figure(figsize=(12, 5))
208
- plt.subplot(1, 2, 1)
209
- plt.scatter(djs_cpu, row_ps_cpu, alpha=0.5, s=10)
210
- plt.title("Gene-specific Dropouts (Smoothed)")
211
- plt.xlabel("Observed")
212
- plt.ylabel("Fit")
213
- lims = [min(plt.xlim()[0], plt.ylim()[0]), max(plt.xlim()[1], plt.ylim()[1])]
214
- plt.plot(lims, lims, 'r-', alpha=0.75, zorder=0, label="y=x line")
215
- plt.grid(True); plt.legend()
216
-
217
- plt.subplot(1, 2, 2)
218
- plt.scatter(dis_cpu, col_ps_cpu, alpha=0.5, s=10)
219
- plt.title("Cell-specific Dropouts (Smoothed)")
220
- plt.xlabel("Observed")
221
- plt.ylabel("Expected")
222
- lims = [min(plt.xlim()[0], plt.ylim()[0]), max(plt.xlim()[1], plt.ylim()[1])]
223
- plt.plot(lims, lims, 'r-', alpha=0.75, zorder=0, label="y=x line")
224
- plt.grid(True); plt.legend()
225
-
226
- plt.tight_layout()
227
- if plot_filename:
228
- plt.savefig(plot_filename, dpi=300, bbox_inches='tight')
229
- print(f"STATUS: Diagnostic plot saved to '{plot_filename}'")
230
- plt.show()
231
- plt.close()
232
-
233
- # Calculate errors
234
157
  gene_error = np.sum((djs_cpu - row_ps_cpu)**2)
235
158
  cell_error = np.sum((dis_cpu - col_ps_cpu)**2)
236
159
 
@@ -245,7 +168,7 @@ def NBumiCheckFitFSGPU(
245
168
  }
246
169
 
247
170
  def NBumiCompareModelsGPU(
248
- raw_filename: str, # Kept for API compatibility, but functionally we use cleaned_filename for indices
171
+ raw_filename: str,
249
172
  cleaned_filename: str,
250
173
  stats: dict,
251
174
  fit_adjust: dict,
@@ -255,23 +178,19 @@ def NBumiCompareModelsGPU(
255
178
  ) -> dict:
256
179
  """
257
180
  OPTIMIZED VERSION (IN-MEMORY):
258
- - Eliminates the 46GB '_basic_norm.h5ad' temporary file.
259
- - Performs depth normalization and variance calculation on-the-fly in GPU VRAM.
260
- - PRESERVED SCIENTIFIC LOGIC: Var(X) = E[X^2] - (E[X])^2 on normalized data.
181
+ - Calculates Basic Fit without writing 46GB file.
261
182
  """
262
183
  pipeline_start_time = time.time()
263
184
  print(f"FUNCTION: NBumiCompareModels() | Comparing models for {cleaned_filename}")
264
185
 
265
- # [GOVERNOR] High multiplier (12.0) because we hold Raw + Norm + Square in VRAM
266
186
  if chunk_size is None:
267
187
  chunk_size = get_optimal_chunk_size(cleaned_filename, multiplier=12.0, is_dense=False)
268
188
 
269
- # --- Phase 1: In-Memory "Basic Fit" (Normalization + Variance) ---
270
189
  print("Phase [1/3]: Calculating Basic Model (Depth-Normalized) variance on-the-fly...")
271
190
 
272
- # 1. Prepare Size Factors (CPU)
273
- tjs = stats['tjs'].values # Gene sums
274
- tis = stats['tis'].values # Cell sums
191
+ # 1. Prepare Size Factors
192
+ tjs = stats['tjs'].values
193
+ tis = stats['tis'].values
275
194
  nc, ng = stats['nc'], stats['ng']
276
195
 
277
196
  median_sum = np.median(tis[tis > 0])
@@ -279,12 +198,9 @@ def NBumiCompareModelsGPU(
279
198
  non_zero_mask = tis > 0
280
199
  size_factors[non_zero_mask] = tis[non_zero_mask] / median_sum
281
200
 
282
- # 2. Prepare GPU Arrays
283
201
  sum_x_sq_gpu = cp.zeros(ng, dtype=cp.float64)
284
- sum_x_gpu = cp.zeros(ng, dtype=cp.float64) # Need sum(x) to calc mean(x) for variance
202
+ sum_x_gpu = cp.zeros(ng, dtype=cp.float64)
285
203
 
286
- # 3. GPU Loop (Raw Data -> Normalize -> Accumulate)
287
- # CRITICAL: We read CLEANED_FILENAME to ensure indices match 'stats'
288
204
  with h5py.File(cleaned_filename, 'r') as f_in:
289
205
  h5_indptr = f_in['X']['indptr']
290
206
  h5_data = f_in['X']['data']
@@ -297,73 +213,52 @@ def NBumiCompareModelsGPU(
297
213
  start_idx, end_idx = h5_indptr[i], h5_indptr[end_row]
298
214
  if start_idx == end_idx: continue
299
215
 
300
- # Load Raw Chunk
301
216
  data_gpu = cp.asarray(h5_data[start_idx:end_idx], dtype=cp.float32)
302
217
  indices_gpu = cp.asarray(h5_indices[start_idx:end_idx])
303
218
  indptr_gpu = cp.asarray(h5_indptr[i:end_row + 1] - start_idx)
304
219
 
305
- # Expand Size Factors to match Data Structure
306
220
  nnz_in_chunk = indptr_gpu[-1].item()
307
221
  cell_boundary_markers = cp.zeros(nnz_in_chunk, dtype=cp.int32)
308
222
  if len(indptr_gpu) > 1:
309
223
  cell_boundary_markers[indptr_gpu[:-1]] = 1
310
- # row_indices maps every data point to its cell index (0 to chunk_size)
311
224
  row_indices = cp.cumsum(cell_boundary_markers, axis=0) - 1
312
225
 
313
- # Get size factors for this chunk
314
226
  sf_chunk = cp.asarray(size_factors[i:end_row])
315
227
 
316
- # --- THE MAGIC: On-the-Fly Normalization ---
317
- # data_norm = data_raw / size_factor
228
+ # Normalize
318
229
  data_gpu /= sf_chunk[row_indices]
319
230
 
320
- # Accumulate for Variance: E[X^2] and E[X]
231
+ # Accumulate
321
232
  cp.add.at(sum_x_sq_gpu, indices_gpu, data_gpu**2)
322
233
  cp.add.at(sum_x_gpu, indices_gpu, data_gpu)
323
234
 
324
- # Clean up VRAM
325
235
  del data_gpu, indices_gpu, indptr_gpu, row_indices, sf_chunk, cell_boundary_markers
326
236
  cp.get_default_memory_pool().free_all_blocks()
327
237
 
328
238
  print(f"Phase [1/3]: COMPLETE{' '*50}")
329
239
 
330
- # 4. Finalize Basic Statistics
331
- # Var(X) = E[X^2] - (E[X])^2
332
240
  mean_x_sq_gpu = sum_x_sq_gpu / nc
333
241
  mean_mu_gpu = sum_x_gpu / nc
334
242
  my_rowvar_gpu = mean_x_sq_gpu - mean_mu_gpu**2
335
243
 
336
- # Dispersion = Mean^2 / (Var - Mean)
337
244
  size_gpu = mean_mu_gpu**2 / (my_rowvar_gpu - mean_mu_gpu)
338
245
 
339
- # Safety Clamping
340
246
  max_size_val = cp.nanmax(size_gpu) * 10
341
247
  if cp.isnan(max_size_val): max_size_val = 1000
342
248
  size_gpu[cp.isnan(size_gpu) | (size_gpu <= 0)] = max_size_val
343
249
  size_gpu[size_gpu < 1e-10] = 1e-10
344
250
 
345
- # Construct "Basic Fit" Object
346
251
  fit_basic = {
347
252
  'sizes': pd.Series(size_gpu.get(), index=stats['tjs'].index),
348
253
  'vals': stats,
349
254
  'var_obs': pd.Series(my_rowvar_gpu.get(), index=stats['tjs'].index)
350
255
  }
351
256
 
352
- # --- Phase 2: Check Fit (Calculate Errors) ---
353
257
  print("Phase [2/3]: Evaluating fit errors on ORIGINAL data...")
354
-
355
- # Check Adjust (M3Drop) - uses its own governor
356
- check_adjust = NBumiCheckFitFSGPU(
357
- cleaned_filename, fit_adjust, suppress_plot=True
358
- )
359
-
360
- # Check Basic (Depth-Norm) - uses its own governor
361
- check_basic = NBumiCheckFitFSGPU(
362
- cleaned_filename, fit_basic, suppress_plot=True
363
- )
258
+ check_adjust = NBumiCheckFitFSGPU(cleaned_filename, fit_adjust, suppress_plot=True)
259
+ check_basic = NBumiCheckFitFSGPU(cleaned_filename, fit_basic, suppress_plot=True)
364
260
  print("Phase [2/3]: COMPLETE")
365
261
 
366
- # --- Phase 3: Plotting & Comparison ---
367
262
  print("Phase [3/3]: Generating comparison...")
368
263
 
369
264
  nc_data = stats['nc']
@@ -427,13 +322,11 @@ def NBumiPlotDispVsMeanGPU(
427
322
  """
428
323
  print("FUNCTION: NBumiPlotDispVsMean()")
429
324
 
430
- # --- 1. Extract data and regression coefficients ---
431
325
  mean_expression = fit['vals']['tjs'].values / fit['vals']['nc']
432
326
  sizes = fit['sizes'].values
433
327
  coeffs = NBumiFitDispVsMeanGPU(fit, suppress_plot=True)
434
328
  intercept, slope = coeffs[0], coeffs[1]
435
329
 
436
- # --- 2. Calculate the fitted line for plotting ---
437
330
  log_mean_expr_range = np.linspace(
438
331
  np.log(mean_expression[mean_expression > 0].min()),
439
332
  np.log(mean_expression.max()),
@@ -442,7 +335,6 @@ def NBumiPlotDispVsMeanGPU(
442
335
  log_fitted_sizes = intercept + slope * log_mean_expr_range
443
336
  fitted_sizes = np.exp(log_fitted_sizes)
444
337
 
445
- # --- 3. Create the plot ---
446
338
  plt.figure(figsize=(8, 6))
447
339
  plt.scatter(mean_expression, sizes, label='Observed Dispersion', alpha=0.5, s=8)
448
340
  plt.plot(np.exp(log_mean_expr_range), fitted_sizes, color='red', label='Regression Fit', linewidth=2)
@@ -24,9 +24,7 @@ def NBumiPearsonResidualsGPU(
24
24
  start_time = time.perf_counter()
25
25
  print(f"FUNCTION: NBumiPearsonResiduals() | FILE: {cleaned_filename}")
26
26
 
27
- # --- SAFETY UPDATE ---
28
- # Multiplier 10.0 (Was 6.0): Accounts for Float64 precision (8 bytes) vs Governor default (4 bytes).
29
- # 4 matrices * 8 bytes = 32 bytes/cell. Governor 10 * 4 = 40 bytes. Safe buffer established.
27
+ # Governor for Processing (RAM/VRAM)
30
28
  chunk_size = get_optimal_chunk_size(cleaned_filename, multiplier=10.0, is_dense=True)
31
29
 
32
30
  # --- Phase 1: Initialization ---
@@ -45,16 +43,29 @@ def NBumiPearsonResidualsGPU(
45
43
  tis_gpu = cupy.asarray(tis, dtype=cupy.float64)
46
44
  sizes_gpu = cupy.asarray(sizes, dtype=cupy.float64)
47
45
 
48
- # Create Output H5 (Identical structure to cleaned input)
46
+ # Create Output H5
49
47
  adata_in = anndata.read_h5ad(cleaned_filename, backed='r')
50
48
  adata_out = anndata.AnnData(obs=adata_in.obs, var=adata_in.var)
51
49
  adata_out.write_h5ad(output_filename, compression="gzip")
52
50
 
51
+ # [FIX] Calculate Safe Storage Chunk Size (~1GB)
52
+ # HDF5 limit is 4GB. You requested 1GB for optimal speed.
53
+ bytes_per_row = ng * 4 # float32
54
+ target_bytes = 1_000_000_000 # 1GB
55
+ storage_chunk_rows = int(target_bytes / bytes_per_row)
56
+
57
+ if storage_chunk_rows < 1: storage_chunk_rows = 1
58
+ # Note: It is okay if storage_chunk > processing_chunk (HDF5 handles this),
59
+ # but strictly it must be < 4GB total size.
60
+
61
+ print(f" > Processing Chunk: {chunk_size} rows (RAM)")
62
+ print(f" > Storage Chunk: {storage_chunk_rows} rows (Disk - 1GB Target)")
63
+
53
64
  with h5py.File(output_filename, 'a') as f_out:
54
65
  if 'X' in f_out:
55
66
  del f_out['X']
56
- # Create dataset for dense matrix output (float32)
57
- out_x = f_out.create_dataset('X', shape=(nc, ng), chunks=(chunk_size, ng), dtype='float32')
67
+ # Create dataset with SAFE chunks (Fixes the ValueError)
68
+ out_x = f_out.create_dataset('X', shape=(nc, ng), chunks=(storage_chunk_rows, ng), dtype='float32')
58
69
 
59
70
  print("Phase [1/2]: COMPLETE")
60
71
 
@@ -77,7 +88,6 @@ def NBumiPearsonResidualsGPU(
77
88
  indptr_slice = h5_indptr[i:end_row+1] - h5_indptr[i]
78
89
 
79
90
  # Convert to Dense GPU Matrix
80
- # We construct sparse first, then densify on GPU to save bandwidth
81
91
  counts_chunk_sparse_gpu = cp_csr_matrix((
82
92
  cupy.asarray(data_slice, dtype=cupy.float64),
83
93
  cupy.asarray(indices_slice),
@@ -91,24 +101,18 @@ def NBumiPearsonResidualsGPU(
91
101
  mus_chunk_gpu = tjs_gpu[cupy.newaxis, :] * tis_chunk_gpu[:, cupy.newaxis] / total
92
102
 
93
103
  denominator_gpu = cupy.sqrt(mus_chunk_gpu + mus_chunk_gpu**2 / sizes_gpu[cupy.newaxis, :])
94
-
95
- # --- LOGIC RESTORED: Prevent Division by Zero ---
96
104
  denominator_gpu = cupy.where(denominator_gpu == 0, 1, denominator_gpu)
97
105
 
98
- # (Counts - Mu) / Sqrt(V)
99
106
  pearson_chunk_gpu = (counts_chunk_dense_gpu - mus_chunk_gpu) / denominator_gpu
100
107
 
101
108
  # Write to Disk
102
- # [OPTIMIZATION] Cast to float32 on GPU to halve PCIe transfer time
103
109
  out_x[i:end_row, :] = pearson_chunk_gpu.astype(cupy.float32).get()
104
110
 
105
- # Cleanup
106
111
  del counts_chunk_dense_gpu, counts_chunk_sparse_gpu, mus_chunk_gpu, pearson_chunk_gpu, denominator_gpu
107
112
  cupy.get_default_memory_pool().free_all_blocks()
108
113
 
109
114
  print(f"Phase [2/2]: COMPLETE{' '*50}")
110
115
 
111
- # --- LOGIC RESTORED: Explicit File Cleanup ---
112
116
  if hasattr(adata_in, "file") and adata_in.file is not None:
113
117
  adata_in.file.close()
114
118
 
@@ -127,8 +131,6 @@ def NBumiPearsonResidualsApproxGPU(
127
131
  start_time = time.perf_counter()
128
132
  print(f"FUNCTION: NBumiPearsonResidualsApprox() | FILE: {cleaned_filename}")
129
133
 
130
- # --- HANDSHAKE ---
131
- # Multiplier 10.0: Same safety logic as Full residuals.
132
134
  chunk_size = get_optimal_chunk_size(cleaned_filename, multiplier=10.0, is_dense=True)
133
135
 
134
136
  # --- Phase 1: Initialization ---
@@ -150,10 +152,17 @@ def NBumiPearsonResidualsApproxGPU(
150
152
  adata_out = anndata.AnnData(obs=adata_in.obs, var=adata_in.var)
151
153
  adata_out.write_h5ad(output_filename, compression="gzip")
152
154
 
155
+ # [FIX] Calculate Safe Storage Chunk Size (~1GB)
156
+ bytes_per_row = ng * 4
157
+ target_bytes = 1_000_000_000 # 1GB
158
+ storage_chunk_rows = int(target_bytes / bytes_per_row)
159
+ if storage_chunk_rows < 1: storage_chunk_rows = 1
160
+
153
161
  with h5py.File(output_filename, 'a') as f_out:
154
162
  if 'X' in f_out:
155
163
  del f_out['X']
156
- out_x = f_out.create_dataset('X', shape=(nc, ng), chunks=(chunk_size, ng), dtype='float32')
164
+ # Create dataset with SAFE chunks
165
+ out_x = f_out.create_dataset('X', shape=(nc, ng), chunks=(storage_chunk_rows, ng), dtype='float32')
157
166
 
158
167
  print("Phase [1/2]: COMPLETE")
159
168
 
@@ -185,15 +194,11 @@ def NBumiPearsonResidualsApproxGPU(
185
194
  tis_chunk_gpu = tis_gpu[i:end_row]
186
195
  mus_chunk_gpu = tjs_gpu[cupy.newaxis, :] * tis_chunk_gpu[:, cupy.newaxis] / total
187
196
 
188
- # Approx: Denom = Sqrt(Mu)
189
197
  denominator_gpu = cupy.sqrt(mus_chunk_gpu)
190
-
191
- # --- LOGIC RESTORED: Prevent Division by Zero ---
192
198
  denominator_gpu = cupy.where(denominator_gpu == 0, 1, denominator_gpu)
193
199
 
194
200
  pearson_chunk_gpu = (counts_chunk_dense_gpu - mus_chunk_gpu) / denominator_gpu
195
201
 
196
- # [OPTIMIZATION] Cast to float32 on GPU to halve PCIe transfer time
197
202
  out_x[i:end_row, :] = pearson_chunk_gpu.astype(cupy.float32).get()
198
203
 
199
204
  del counts_chunk_dense_gpu, counts_chunk_sparse_gpu, mus_chunk_gpu, pearson_chunk_gpu, denominator_gpu
@@ -201,7 +206,6 @@ def NBumiPearsonResidualsApproxGPU(
201
206
 
202
207
  print(f"Phase [2/2]: COMPLETE{' '*50}")
203
208
 
204
- # --- LOGIC RESTORED: Explicit File Cleanup ---
205
209
  if hasattr(adata_in, "file") and adata_in.file is not None:
206
210
  adata_in.file.close()
207
211
 
@@ -5,7 +5,7 @@ with open("README.md", "r", encoding="utf-8") as fh:
5
5
 
6
6
  setuptools.setup(
7
7
  name="M3Drop",
8
- version="0.4.38", # Version bump
8
+ version="0.4.39", # Version bump
9
9
  author="Tallulah Andrews",
10
10
  author_email="tandrew6@uwo.ca",
11
11
  description="A Python implementation of the M3Drop single-cell RNA-seq analysis tool.",
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes