M3Drop 0.4.39__py3-none-any.whl → 0.4.40__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
m3Drop/diagnosticsGPU.py CHANGED
@@ -17,7 +17,10 @@ from .coreGPU import hidden_calc_valsGPU, NBumiFitModelGPU, NBumiFitDispVsMeanGP
17
17
  from cupy.sparse import csr_matrix as cp_csr_matrix
18
18
  import scipy.sparse as sp
19
19
  from scipy.sparse import csr_matrix as sp_csr_matrix
20
+
20
21
  import statsmodels.api as sm
22
+ from scipy.stats import norm
23
+ from statsmodels.stats.multitest import multipletests
21
24
 
22
25
  def NBumiFitBasicModelGPU(
23
26
  cleaned_filename: str,
@@ -26,22 +29,27 @@ def NBumiFitBasicModelGPU(
26
29
  chunk_size: int = None
27
30
  ) -> dict:
28
31
  """
29
- Fits a simpler, unadjusted NB model out-of-core.
32
+ Fits a simpler, unadjusted NB model out-of-core using a GPU-accelerated
33
+ algorithm. Designed to work with a standard (cell, gene) sparse matrix.
30
34
  """
31
35
  start_time = time.perf_counter()
32
36
  print(f"FUNCTION: NBumiFitBasicModel() | FILE: {cleaned_filename}")
33
37
 
38
+ # [GOVERNOR INTEGRATION] Calculate optimal chunk size if not provided
34
39
  if chunk_size is None:
35
40
  chunk_size = get_optimal_chunk_size(cleaned_filename, multiplier=3.0, is_dense=True)
36
41
 
37
42
  # --- Phase 1: Initialization ---
43
+ print("Phase [1/2]: Initializing parameters and arrays on GPU...")
38
44
  tjs = stats['tjs'].values
39
45
  nc, ng = stats['nc'], stats['ng']
40
46
 
41
47
  tjs_gpu = cp.asarray(tjs, dtype=cp.float64)
42
48
  sum_x_sq_gpu = cp.zeros(ng, dtype=cp.float64)
49
+ print("Phase [1/2]: COMPLETE")
43
50
 
44
51
  # --- Phase 2: Calculate Variance from Data Chunks ---
52
+ print("Phase [2/2]: Calculating variance from data chunks...")
45
53
  with h5py.File(cleaned_filename, 'r') as f_in:
46
54
  x_group = f_in['X']
47
55
  h5_indptr = x_group['indptr']
@@ -53,32 +61,65 @@ def NBumiFitBasicModelGPU(
53
61
  print(f"Phase [2/2]: Processing: {end_row} of {nc} cells.", end='\r')
54
62
 
55
63
  start_idx, end_idx = h5_indptr[i], h5_indptr[end_row]
56
- if start_idx == end_idx: continue
64
+ if start_idx == end_idx:
65
+ continue
57
66
 
58
- # Original processing
59
- data_slice = h5_data[start_idx:end_idx]
60
- indices_slice = h5_indices[start_idx:end_idx]
61
-
62
- data_gpu = cp.asarray(data_slice, dtype=cp.float64)
63
- indices_gpu = cp.asarray(indices_slice)
64
-
65
- cp.add.at(sum_x_sq_gpu, indices_gpu, data_gpu**2)
67
+ # Process in smaller sub-chunks if needed
68
+ max_elements = 5_000_000 # Process max 5M elements at a time
66
69
 
67
- del data_gpu, indices_gpu
68
- cp.get_default_memory_pool().free_all_blocks()
70
+ if end_idx - start_idx > max_elements:
71
+ # Process in sub-chunks
72
+ for sub_start in range(start_idx, end_idx, max_elements):
73
+ sub_end = min(sub_start + max_elements, end_idx)
74
+
75
+ data_slice = h5_data[sub_start:sub_end]
76
+ indices_slice = h5_indices[sub_start:sub_end]
77
+
78
+ data_gpu = cp.asarray(data_slice, dtype=cp.float64)
79
+ indices_gpu = cp.asarray(indices_slice)
80
+
81
+ # Accumulate the sum of squares for each gene
82
+ cp.add.at(sum_x_sq_gpu, indices_gpu, data_gpu**2)
83
+
84
+ # Free GPU memory
85
+ del data_gpu, indices_gpu
86
+ cp.get_default_memory_pool().free_all_blocks()
87
+ else:
88
+ # Original processing for smaller chunks
89
+ data_slice = h5_data[start_idx:end_idx]
90
+ indices_slice = h5_indices[start_idx:end_idx]
91
+
92
+ data_gpu = cp.asarray(data_slice, dtype=cp.float64)
93
+ indices_gpu = cp.asarray(indices_slice)
94
+
95
+ # Accumulate the sum of squares for each gene
96
+ cp.add.at(sum_x_sq_gpu, indices_gpu, data_gpu**2)
97
+
98
+ # Clean up
99
+ del data_gpu, indices_gpu
100
+ cp.get_default_memory_pool().free_all_blocks()
69
101
 
70
- print(f"Phase [2/2]: COMPLETE{' '*50}")
71
-
72
- mean_x_sq_gpu = sum_x_sq_gpu / nc
73
- mean_mu_gpu = tjs_gpu / nc
74
- my_rowvar_gpu = mean_x_sq_gpu - mean_mu_gpu**2
75
- size_gpu = mean_mu_gpu**2 / (my_rowvar_gpu - mean_mu_gpu)
102
+ print(f"Phase [2/2]: COMPLETE ")
103
+
104
+ # --- Final calculations on GPU ---
105
+ if is_logged:
106
+ raise NotImplementedError("Logged data variance calculation is not implemented for out-of-core.")
107
+ else:
108
+ # Variance of raw data: Var(X) = E[X^2] - E[X]^2
109
+ mean_x_sq_gpu = sum_x_sq_gpu / nc
110
+ mean_mu_gpu = tjs_gpu / nc
111
+ my_rowvar_gpu = mean_x_sq_gpu - mean_mu_gpu**2
112
+
113
+ # Calculate dispersion ('size')
114
+ size_gpu = mean_mu_gpu**2 / (my_rowvar_gpu - mean_mu_gpu)
76
115
 
77
116
  max_size_val = cp.nanmax(size_gpu) * 10
78
- if cp.isnan(max_size_val): max_size_val = 1000
117
+ if cp.isnan(max_size_val):
118
+ max_size_val = 1000
79
119
  size_gpu[cp.isnan(size_gpu) | (size_gpu <= 0)] = max_size_val
80
120
  size_gpu[size_gpu < 1e-10] = 1e-10
81
121
 
122
+ # Move results to CPU
82
123
  my_rowvar_cpu = my_rowvar_gpu.get()
83
124
  sizes_cpu = size_gpu.get()
84
125
 
@@ -99,17 +140,21 @@ def NBumiCheckFitFSGPU(
99
140
  plot_filename=None
100
141
  ) -> dict:
101
142
  """
102
- Calculates fit errors. [FIXED] Added clamps to prevent >1.0 probability errors.
143
+ FIXED VERSION - No cupy.errstate, proper GPU computation.
103
144
  """
104
145
  start_time = time.perf_counter()
105
146
  print(f"FUNCTION: NBumiCheckFitFS() | FILE: {cleaned_filename}")
106
147
 
148
+ # [GOVERNOR INTEGRATION] Adaptive chunk sizing
107
149
  if chunk_size is None:
108
150
  chunk_size = get_optimal_chunk_size(cleaned_filename, multiplier=5.0, is_dense=True)
109
151
 
152
+ # --- Phase 1: Initialization ---
153
+ print("Phase [1/2]: Initializing parameters and arrays on GPU...")
110
154
  vals = fit['vals']
111
155
  size_coeffs = NBumiFitDispVsMeanGPU(fit, suppress_plot=True)
112
156
 
157
+ # Must use float64 for precision
113
158
  tjs_gpu = cp.asarray(vals['tjs'].values, dtype=cp.float64)
114
159
  tis_gpu = cp.asarray(vals['tis'].values, dtype=cp.float64)
115
160
  total = vals['total']
@@ -119,41 +164,84 @@ def NBumiCheckFitFSGPU(
119
164
  mean_expression_gpu = tjs_gpu / nc
120
165
  log_mean_expression_gpu = cp.log(mean_expression_gpu)
121
166
  smoothed_size_gpu = cp.exp(size_coeffs[0] + size_coeffs[1] * log_mean_expression_gpu)
122
-
123
- # [FIX] Clamp smoothed size to prevent instability
124
- smoothed_size_gpu = cp.maximum(smoothed_size_gpu, 1e-8)
125
167
 
168
+ # Initialize result arrays
126
169
  row_ps_gpu = cp.zeros(ng, dtype=cp.float64)
127
170
  col_ps_gpu = cp.zeros(nc, dtype=cp.float64)
171
+ print("Phase [1/2]: COMPLETE")
128
172
 
129
- for i in range(0, nc, chunk_size):
130
- end_col = min(i + chunk_size, nc)
173
+ # --- Phase 2: Calculate Expected Dropouts ---
174
+ print("Phase [2/2]: Calculating expected dropouts from data chunks...")
175
+
176
+ # [GOVERNOR INTEGRATION] Removed naive calculation, utilizing Governor's chunk_size
177
+ optimal_chunk = chunk_size
178
+ print(f" Using governor chunk size: {optimal_chunk}")
179
+
180
+ for i in range(0, nc, optimal_chunk):
181
+ end_col = min(i + optimal_chunk, nc)
131
182
  print(f"Phase [2/2]: Processing: {end_col} of {nc} cells.", end='\r')
132
183
 
133
184
  tis_chunk_gpu = tis_gpu[i:end_col]
185
+
186
+ # Standard calculation without errstate
134
187
  mu_chunk_gpu = tjs_gpu[:, cp.newaxis] * tis_chunk_gpu[cp.newaxis, :] / total
135
188
 
136
- # [FIX] Safer power calculation
189
+ # Calculate p_is directly - CuPy handles overflow internally
137
190
  base = 1 + mu_chunk_gpu / smoothed_size_gpu[:, cp.newaxis]
138
191
  p_is_chunk_gpu = cp.power(base, -smoothed_size_gpu[:, cp.newaxis])
139
192
 
140
- # [FIX] Clamp probabilities to valid range [0, 1]
141
- p_is_chunk_gpu = cp.clip(p_is_chunk_gpu, 0.0, 1.0)
142
- p_is_chunk_gpu = cp.nan_to_num(p_is_chunk_gpu, nan=0.0)
193
+ # Handle any inf/nan values that might have occurred
194
+ p_is_chunk_gpu = cp.nan_to_num(p_is_chunk_gpu, nan=0.0, posinf=1.0, neginf=0.0)
143
195
 
196
+ # Sum results
144
197
  row_ps_gpu += p_is_chunk_gpu.sum(axis=1)
145
198
  col_ps_gpu[i:end_col] = p_is_chunk_gpu.sum(axis=0)
146
199
 
200
+ # Clean up
147
201
  del mu_chunk_gpu, p_is_chunk_gpu, base, tis_chunk_gpu
148
- cp.get_default_memory_pool().free_all_blocks()
202
+
203
+ # Periodic memory cleanup
204
+ mempool = cp.get_default_memory_pool()
205
+ if (i // optimal_chunk) % 10 == 0:
206
+ mempool.free_all_blocks()
149
207
 
150
208
  print(f"Phase [2/2]: COMPLETE{' ' * 50}")
151
209
 
210
+ # Move results to CPU
152
211
  row_ps_cpu = row_ps_gpu.get()
153
212
  col_ps_cpu = col_ps_gpu.get()
154
213
  djs_cpu = vals['djs'].values
155
214
  dis_cpu = vals['dis'].values
156
215
 
216
+ # Plotting
217
+ if not suppress_plot:
218
+ plt.figure(figsize=(12, 5))
219
+ plt.subplot(1, 2, 1)
220
+ plt.scatter(djs_cpu, row_ps_cpu, alpha=0.5, s=10)
221
+ plt.title("Gene-specific Dropouts (Smoothed)")
222
+ plt.xlabel("Observed")
223
+ plt.ylabel("Fit")
224
+ lims = [min(plt.xlim()[0], plt.ylim()[0]), max(plt.xlim()[1], plt.ylim()[1])]
225
+ plt.plot(lims, lims, 'r-', alpha=0.75, zorder=0, label="y=x line")
226
+ plt.grid(True); plt.legend()
227
+
228
+ plt.subplot(1, 2, 2)
229
+ plt.scatter(dis_cpu, col_ps_cpu, alpha=0.5, s=10)
230
+ plt.title("Cell-specific Dropouts (Smoothed)")
231
+ plt.xlabel("Observed")
232
+ plt.ylabel("Expected")
233
+ lims = [min(plt.xlim()[0], plt.ylim()[0]), max(plt.xlim()[1], plt.ylim()[1])]
234
+ plt.plot(lims, lims, 'r-', alpha=0.75, zorder=0, label="y=x line")
235
+ plt.grid(True); plt.legend()
236
+
237
+ plt.tight_layout()
238
+ if plot_filename:
239
+ plt.savefig(plot_filename, dpi=300, bbox_inches='tight')
240
+ print(f"STATUS: Diagnostic plot saved to '{plot_filename}'")
241
+ plt.show()
242
+ plt.close()
243
+
244
+ # Calculate errors
157
245
  gene_error = np.sum((djs_cpu - row_ps_cpu)**2)
158
246
  cell_error = np.sum((dis_cpu - col_ps_cpu)**2)
159
247
 
@@ -177,90 +265,132 @@ def NBumiCompareModelsGPU(
177
265
  plot_filename=None
178
266
  ) -> dict:
179
267
  """
180
- OPTIMIZED VERSION (IN-MEMORY):
181
- - Calculates Basic Fit without writing 46GB file.
268
+ OPTIMIZED VERSION - Faster normalization and sparse matrix writing.
182
269
  """
183
270
  pipeline_start_time = time.time()
184
271
  print(f"FUNCTION: NBumiCompareModels() | Comparing models for {cleaned_filename}")
185
272
 
273
+ # [GOVERNOR INTEGRATION] Calculate chunk size for normalization phase (heavy IO)
186
274
  if chunk_size is None:
187
- chunk_size = get_optimal_chunk_size(cleaned_filename, multiplier=12.0, is_dense=False)
188
-
189
- print("Phase [1/3]: Calculating Basic Model (Depth-Normalized) variance on-the-fly...")
275
+ # Multiplier 10.0 for safety during normalization of massive dense expansion
276
+ chunk_size = get_optimal_chunk_size(cleaned_filename, multiplier=10.0, is_dense=True)
277
+
278
+ # --- Phase 1: OPTIMIZED Normalization ---
279
+ print("Phase [1/4]: Creating temporary 'basic' normalized data file...")
280
+ basic_norm_filename = cleaned_filename.replace('.h5ad', '_basic_norm.h5ad')
281
+
282
+ # Read metadata. In 'backed' mode, this keeps a file handle open.
283
+ adata_meta = anndata.read_h5ad(cleaned_filename, backed='r')
284
+ nc, ng = adata_meta.shape
285
+ obs_df = adata_meta.obs.copy()
286
+ var_df = adata_meta.var.copy()
190
287
 
191
- # 1. Prepare Size Factors
192
- tjs = stats['tjs'].values
193
- tis = stats['tis'].values
194
- nc, ng = stats['nc'], stats['ng']
288
+ cell_sums = stats['tis'].values
289
+ median_sum = np.median(cell_sums[cell_sums > 0])
195
290
 
196
- median_sum = np.median(tis[tis > 0])
197
- size_factors = np.ones_like(tis, dtype=np.float32)
198
- non_zero_mask = tis > 0
199
- size_factors[non_zero_mask] = tis[non_zero_mask] / median_sum
291
+ # Avoid division by zero for cells with zero counts
292
+ size_factors = np.ones_like(cell_sums, dtype=np.float32)
293
+ non_zero_mask = cell_sums > 0
294
+ size_factors[non_zero_mask] = cell_sums[non_zero_mask] / median_sum
295
+
296
+ adata_out = anndata.AnnData(obs=obs_df, var=var_df)
297
+ # [OPTION 2 CHANGE] Removed compression="gzip" to speed up I/O
298
+ adata_out.write_h5ad(basic_norm_filename)
299
+
300
+ with h5py.File(basic_norm_filename, 'a') as f_out:
301
+ if 'X' in f_out:
302
+ del f_out['X']
303
+ x_group_out = f_out.create_group('X')
304
+ x_group_out.attrs['encoding-type'] = 'csr_matrix'
305
+ x_group_out.attrs['encoding-version'] = '0.1.0'
306
+ x_group_out.attrs['shape'] = np.array([nc, ng], dtype='int64')
307
+
308
+ out_data = x_group_out.create_dataset('data', shape=(0,), maxshape=(None,), dtype='float32')
309
+ out_indices = x_group_out.create_dataset('indices', shape=(0,), maxshape=(None,), dtype='int32')
310
+ out_indptr = x_group_out.create_dataset('indptr', shape=(nc + 1,), dtype='int64')
311
+ out_indptr[0] = 0
312
+ current_nnz = 0
313
+
314
+ with h5py.File(cleaned_filename, 'r') as f_in:
315
+ h5_indptr = f_in['X']['indptr']
316
+ h5_data = f_in['X']['data']
317
+ h5_indices = f_in['X']['indices']
318
+
319
+ for i in range(0, nc, chunk_size):
320
+ end_row = min(i + chunk_size, nc)
321
+ print(f"Phase [1/4]: Normalizing: {end_row} of {nc} cells.", end='\r')
322
+
323
+ start_idx, end_idx = h5_indptr[i], h5_indptr[end_row]
324
+ if start_idx == end_idx:
325
+ out_indptr[i + 1 : end_row + 1] = current_nnz
326
+ continue
327
+
328
+ # Read data for the chunk
329
+ data_slice = h5_data[start_idx:end_idx]
330
+ indices_slice = h5_indices[start_idx:end_idx]
331
+ indptr_slice = h5_indptr[i:end_row + 1] - start_idx
332
+
333
+ # Move to GPU for fast normalization
334
+ data_gpu = cp.asarray(data_slice.copy(), dtype=cp.float32)
335
+
336
+ indptr_gpu = cp.asarray(indptr_slice.copy())
337
+ nnz_in_chunk = indptr_gpu[-1].item()
338
+ cell_boundary_markers = cp.zeros(nnz_in_chunk, dtype=cp.int32)
339
+ if len(indptr_gpu) > 1:
340
+ cell_boundary_markers[indptr_gpu[:-1]] = 1
341
+ row_indices = cp.cumsum(cell_boundary_markers, axis=0) - 1
342
+
343
+ size_factors_for_chunk = cp.asarray(size_factors[i:end_row])
344
+
345
+ data_gpu /= size_factors_for_chunk[row_indices]
346
+
347
+ data_cpu = np.round(data_gpu.get())
348
+
349
+ num_cells_in_chunk = end_row - i
350
+ chunk_sp = sp_csr_matrix((data_cpu, indices_slice, indptr_slice),
351
+ shape=(num_cells_in_chunk, ng))
352
+
353
+ nnz_chunk = chunk_sp.nnz
354
+ out_data.resize(current_nnz + nnz_chunk, axis=0)
355
+ out_data[current_nnz:] = chunk_sp.data
356
+
357
+ out_indices.resize(current_nnz + nnz_chunk, axis=0)
358
+ out_indices[current_nnz:] = chunk_sp.indices
359
+
360
+ new_indptr_list = chunk_sp.indptr[1:].astype(np.int64) + current_nnz
361
+ out_indptr[i + 1 : end_row + 1] = new_indptr_list
362
+
363
+ current_nnz += nnz_chunk
364
+
365
+ del data_gpu, row_indices, size_factors_for_chunk, indptr_gpu
366
+ cp.get_default_memory_pool().free_all_blocks()
367
+
368
+ print(f"Phase [1/4]: COMPLETE{' '*50}")
369
+
370
+ print("Phase [2/4]: Fitting Basic Model on normalized data...")
200
371
 
201
- sum_x_sq_gpu = cp.zeros(ng, dtype=cp.float64)
202
- sum_x_gpu = cp.zeros(ng, dtype=cp.float64)
372
+ # [GOVERNOR INTEGRATION] Calculate chunk size for basic fit on the heavy normalized file
373
+ chunk_size_basic = get_optimal_chunk_size(basic_norm_filename, multiplier=10.0, is_dense=True)
203
374
 
204
- with h5py.File(cleaned_filename, 'r') as f_in:
205
- h5_indptr = f_in['X']['indptr']
206
- h5_data = f_in['X']['data']
207
- h5_indices = f_in['X']['indices']
208
-
209
- for i in range(0, nc, chunk_size):
210
- end_row = min(i + chunk_size, nc)
211
- print(f"Phase [1/3]: Processing: {end_row} of {nc} cells.", end='\r')
212
-
213
- start_idx, end_idx = h5_indptr[i], h5_indptr[end_row]
214
- if start_idx == end_idx: continue
215
-
216
- data_gpu = cp.asarray(h5_data[start_idx:end_idx], dtype=cp.float32)
217
- indices_gpu = cp.asarray(h5_indices[start_idx:end_idx])
218
- indptr_gpu = cp.asarray(h5_indptr[i:end_row + 1] - start_idx)
219
-
220
- nnz_in_chunk = indptr_gpu[-1].item()
221
- cell_boundary_markers = cp.zeros(nnz_in_chunk, dtype=cp.int32)
222
- if len(indptr_gpu) > 1:
223
- cell_boundary_markers[indptr_gpu[:-1]] = 1
224
- row_indices = cp.cumsum(cell_boundary_markers, axis=0) - 1
225
-
226
- sf_chunk = cp.asarray(size_factors[i:end_row])
227
-
228
- # Normalize
229
- data_gpu /= sf_chunk[row_indices]
230
-
231
- # Accumulate
232
- cp.add.at(sum_x_sq_gpu, indices_gpu, data_gpu**2)
233
- cp.add.at(sum_x_gpu, indices_gpu, data_gpu)
234
-
235
- del data_gpu, indices_gpu, indptr_gpu, row_indices, sf_chunk, cell_boundary_markers
236
- cp.get_default_memory_pool().free_all_blocks()
237
-
238
- print(f"Phase [1/3]: COMPLETE{' '*50}")
239
-
240
- mean_x_sq_gpu = sum_x_sq_gpu / nc
241
- mean_mu_gpu = sum_x_gpu / nc
242
- my_rowvar_gpu = mean_x_sq_gpu - mean_mu_gpu**2
375
+ stats_basic = hidden_calc_valsGPU(basic_norm_filename) # hidden_calc uses its own governor internally
376
+ fit_basic = NBumiFitBasicModelGPU(basic_norm_filename, stats_basic, chunk_size=chunk_size_basic)
377
+ print("Phase [2/4]: COMPLETE")
243
378
 
244
- size_gpu = mean_mu_gpu**2 / (my_rowvar_gpu - mean_mu_gpu)
379
+ print("Phase [3/4]: Evaluating fits of both models on ORIGINAL data...")
380
+ # [GOVERNOR INTEGRATION] Chunk size for check fit
381
+ chunk_size_check = get_optimal_chunk_size(cleaned_filename, multiplier=5.0, is_dense=True)
245
382
 
246
- max_size_val = cp.nanmax(size_gpu) * 10
247
- if cp.isnan(max_size_val): max_size_val = 1000
248
- size_gpu[cp.isnan(size_gpu) | (size_gpu <= 0)] = max_size_val
249
- size_gpu[size_gpu < 1e-10] = 1e-10
383
+ check_adjust = NBumiCheckFitFSGPU(cleaned_filename, fit_adjust, suppress_plot=True, chunk_size=chunk_size_check)
250
384
 
251
- fit_basic = {
252
- 'sizes': pd.Series(size_gpu.get(), index=stats['tjs'].index),
385
+ fit_basic_for_eval = {
386
+ 'sizes': fit_basic['sizes'],
253
387
  'vals': stats,
254
- 'var_obs': pd.Series(my_rowvar_gpu.get(), index=stats['tjs'].index)
388
+ 'var_obs': fit_basic['var_obs']
255
389
  }
256
-
257
- print("Phase [2/3]: Evaluating fit errors on ORIGINAL data...")
258
- check_adjust = NBumiCheckFitFSGPU(cleaned_filename, fit_adjust, suppress_plot=True)
259
- check_basic = NBumiCheckFitFSGPU(cleaned_filename, fit_basic, suppress_plot=True)
260
- print("Phase [2/3]: COMPLETE")
390
+ check_basic = NBumiCheckFitFSGPU(cleaned_filename, fit_basic_for_eval, suppress_plot=True, chunk_size=chunk_size_check)
391
+ print("Phase [3/4]: COMPLETE")
261
392
 
262
- print("Phase [3/3]: Generating comparison...")
263
-
393
+ print("Phase [4/4]: Generating final comparison...")
264
394
  nc_data = stats['nc']
265
395
  mean_expr = stats['tjs'] / nc_data
266
396
  observed_dropout = stats['djs'] / nc_data
@@ -303,8 +433,15 @@ def NBumiCompareModelsGPU(
303
433
  plt.show()
304
434
 
305
435
  plt.close()
306
-
436
+ print("Phase [4/4]: COMPLETE")
437
+
307
438
  pipeline_end_time = time.time()
439
+
440
+ # --- ADD THIS LINE TO FIX THE ERROR ---
441
+ adata_meta.file.close() # Explicitly close the file handle
442
+
443
+ os.remove(basic_norm_filename)
444
+ print(f"STATUS: Temporary file '{basic_norm_filename}' removed.")
308
445
  print(f"Total time: {pipeline_end_time - pipeline_start_time:.2f} seconds.\n")
309
446
 
310
447
  return {
@@ -319,14 +456,22 @@ def NBumiPlotDispVsMeanGPU(
319
456
  ):
320
457
  """
321
458
  Generates a diagnostic plot of the dispersion vs. mean expression.
459
+
460
+ Args:
461
+ fit (dict): The 'fit' object from NBumiFitModelGPU.
462
+ suppress_plot (bool): If True, the plot will not be displayed on screen.
463
+ plot_filename (str, optional): Path to save the plot. If None, not saved.
322
464
  """
323
465
  print("FUNCTION: NBumiPlotDispVsMean()")
324
466
 
467
+ # --- 1. Extract data and regression coefficients ---
325
468
  mean_expression = fit['vals']['tjs'].values / fit['vals']['nc']
326
469
  sizes = fit['sizes'].values
327
470
  coeffs = NBumiFitDispVsMeanGPU(fit, suppress_plot=True)
328
471
  intercept, slope = coeffs[0], coeffs[1]
329
472
 
473
+ # --- 2. Calculate the fitted line for plotting ---
474
+ # Create a smooth, continuous line using the regression coefficients
330
475
  log_mean_expr_range = np.linspace(
331
476
  np.log(mean_expression[mean_expression > 0].min()),
332
477
  np.log(mean_expression.max()),
@@ -335,6 +480,7 @@ def NBumiPlotDispVsMeanGPU(
335
480
  log_fitted_sizes = intercept + slope * log_mean_expr_range
336
481
  fitted_sizes = np.exp(log_fitted_sizes)
337
482
 
483
+ # --- 3. Create the plot ---
338
484
  plt.figure(figsize=(8, 6))
339
485
  plt.scatter(mean_expression, sizes, label='Observed Dispersion', alpha=0.5, s=8)
340
486
  plt.plot(np.exp(log_mean_expr_range), fitted_sizes, color='red', label='Regression Fit', linewidth=2)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: M3Drop
3
- Version: 0.4.39
3
+ Version: 0.4.40
4
4
  Summary: A Python implementation of the M3Drop single-cell RNA-seq analysis tool.
5
5
  Home-page: https://github.com/PragalvhaSharma/m3DropNew
6
6
  Author: Tallulah Andrews
@@ -2,11 +2,11 @@ m3Drop/__init__.py,sha256=yaUXhUArnwgLf01Zlpqa5qm9K1aByGqQupIoCaLYiDw,2462
2
2
  m3Drop/coreCPU.py,sha256=3kPYlSVlYrJEhRUCIoVzmR8CYBaHpxVM5nx-3YQI4d4,17204
3
3
  m3Drop/coreGPU.py,sha256=k7A06VNgfJ59J8g1VpfKxhTIKrEbW7Bj8pTbQqHaQL8,24571
4
4
  m3Drop/diagnosticsCPU.py,sha256=BecOKTz2GDjzjs9ycXYsyrSHi2UVgsM58RBuNE62vmU,14273
5
- m3Drop/diagnosticsGPU.py,sha256=9yGsPOAjxVZxh_J2uL2pUwGyjPl0wXAkJ7f69qKLeOA,12814
5
+ m3Drop/diagnosticsGPU.py,sha256=m_r7mZ4s0h-YwJEriucrACNeDQxWPZHvd8RAmsXQYXE,19980
6
6
  m3Drop/normalizationCPU.py,sha256=4ulCrDZZjxVFh2y0i4ayPkNCsZYaOP-Xq2Dnzu9WXtg,5697
7
7
  m3Drop/normalizationGPU.py,sha256=r5gvJFkabEfCfIsVdpJzWGqve_Iy57EYsEyiLfDo8Mo,8539
8
- m3drop-0.4.39.dist-info/licenses/LICENSE,sha256=44Iqpp8Fc10Xzd5T7cT9UhO31Qftk3gBiCjtpwilP_k,1074
9
- m3drop-0.4.39.dist-info/METADATA,sha256=YdxFQNg4hI07uRADUE3JQ8i7AmAjpHGSLJsHmBLSvxM,5161
10
- m3drop-0.4.39.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
11
- m3drop-0.4.39.dist-info/top_level.txt,sha256=AEULFEFIgFtAwS-KBlIFoYXrqczX_rwqrEcdK46GIrA,7
12
- m3drop-0.4.39.dist-info/RECORD,,
8
+ m3drop-0.4.40.dist-info/licenses/LICENSE,sha256=44Iqpp8Fc10Xzd5T7cT9UhO31Qftk3gBiCjtpwilP_k,1074
9
+ m3drop-0.4.40.dist-info/METADATA,sha256=rL09G9SvQ_JP_OEw-xrMNle-92XQFzn2-cHDyXkPpH4,5161
10
+ m3drop-0.4.40.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
11
+ m3drop-0.4.40.dist-info/top_level.txt,sha256=AEULFEFIgFtAwS-KBlIFoYXrqczX_rwqrEcdK46GIrA,7
12
+ m3drop-0.4.40.dist-info/RECORD,,