M3Drop 0.4.39__py3-none-any.whl → 0.4.41__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
m3Drop/diagnosticsGPU.py CHANGED
@@ -17,7 +17,10 @@ from .coreGPU import hidden_calc_valsGPU, NBumiFitModelGPU, NBumiFitDispVsMeanGP
17
17
  from cupy.sparse import csr_matrix as cp_csr_matrix
18
18
  import scipy.sparse as sp
19
19
  from scipy.sparse import csr_matrix as sp_csr_matrix
20
+
20
21
  import statsmodels.api as sm
22
+ from scipy.stats import norm
23
+ from statsmodels.stats.multitest import multipletests
21
24
 
22
25
  def NBumiFitBasicModelGPU(
23
26
  cleaned_filename: str,
@@ -26,22 +29,27 @@ def NBumiFitBasicModelGPU(
26
29
  chunk_size: int = None
27
30
  ) -> dict:
28
31
  """
29
- Fits a simpler, unadjusted NB model out-of-core.
32
+ Fits a simpler, unadjusted NB model out-of-core using a GPU-accelerated
33
+ algorithm. Designed to work with a standard (cell, gene) sparse matrix.
30
34
  """
31
35
  start_time = time.perf_counter()
32
36
  print(f"FUNCTION: NBumiFitBasicModel() | FILE: {cleaned_filename}")
33
37
 
38
+ # [GOVERNOR INTEGRATION] Calculate optimal chunk size if not provided
34
39
  if chunk_size is None:
35
40
  chunk_size = get_optimal_chunk_size(cleaned_filename, multiplier=3.0, is_dense=True)
36
41
 
37
42
  # --- Phase 1: Initialization ---
43
+ print("Phase [1/2]: Initializing parameters and arrays on GPU...")
38
44
  tjs = stats['tjs'].values
39
45
  nc, ng = stats['nc'], stats['ng']
40
46
 
41
47
  tjs_gpu = cp.asarray(tjs, dtype=cp.float64)
42
48
  sum_x_sq_gpu = cp.zeros(ng, dtype=cp.float64)
49
+ print("Phase [1/2]: COMPLETE")
43
50
 
44
51
  # --- Phase 2: Calculate Variance from Data Chunks ---
52
+ print("Phase [2/2]: Calculating variance from data chunks...")
45
53
  with h5py.File(cleaned_filename, 'r') as f_in:
46
54
  x_group = f_in['X']
47
55
  h5_indptr = x_group['indptr']
@@ -53,32 +61,44 @@ def NBumiFitBasicModelGPU(
53
61
  print(f"Phase [2/2]: Processing: {end_row} of {nc} cells.", end='\r')
54
62
 
55
63
  start_idx, end_idx = h5_indptr[i], h5_indptr[end_row]
56
- if start_idx == end_idx: continue
64
+ if start_idx == end_idx:
65
+ continue
57
66
 
58
- # Original processing
67
+ # Original processing for smaller chunks
59
68
  data_slice = h5_data[start_idx:end_idx]
60
69
  indices_slice = h5_indices[start_idx:end_idx]
61
70
 
62
71
  data_gpu = cp.asarray(data_slice, dtype=cp.float64)
63
72
  indices_gpu = cp.asarray(indices_slice)
64
73
 
74
+ # Accumulate the sum of squares for each gene
65
75
  cp.add.at(sum_x_sq_gpu, indices_gpu, data_gpu**2)
66
76
 
77
+ # Clean up
67
78
  del data_gpu, indices_gpu
68
79
  cp.get_default_memory_pool().free_all_blocks()
69
80
 
70
- print(f"Phase [2/2]: COMPLETE{' '*50}")
71
-
72
- mean_x_sq_gpu = sum_x_sq_gpu / nc
73
- mean_mu_gpu = tjs_gpu / nc
74
- my_rowvar_gpu = mean_x_sq_gpu - mean_mu_gpu**2
75
- size_gpu = mean_mu_gpu**2 / (my_rowvar_gpu - mean_mu_gpu)
81
+ print(f"Phase [2/2]: COMPLETE ")
82
+
83
+ # --- Final calculations on GPU ---
84
+ if is_logged:
85
+ raise NotImplementedError("Logged data variance calculation is not implemented for out-of-core.")
86
+ else:
87
+ # Variance of raw data: Var(X) = E[X^2] - E[X]^2
88
+ mean_x_sq_gpu = sum_x_sq_gpu / nc
89
+ mean_mu_gpu = tjs_gpu / nc
90
+ my_rowvar_gpu = mean_x_sq_gpu - mean_mu_gpu**2
91
+
92
+ # Calculate dispersion ('size')
93
+ size_gpu = mean_mu_gpu**2 / (my_rowvar_gpu - mean_mu_gpu)
76
94
 
77
95
  max_size_val = cp.nanmax(size_gpu) * 10
78
- if cp.isnan(max_size_val): max_size_val = 1000
96
+ if cp.isnan(max_size_val):
97
+ max_size_val = 1000
79
98
  size_gpu[cp.isnan(size_gpu) | (size_gpu <= 0)] = max_size_val
80
99
  size_gpu[size_gpu < 1e-10] = 1e-10
81
100
 
101
+ # Move results to CPU
82
102
  my_rowvar_cpu = my_rowvar_gpu.get()
83
103
  sizes_cpu = size_gpu.get()
84
104
 
@@ -99,17 +119,22 @@ def NBumiCheckFitFSGPU(
99
119
  plot_filename=None
100
120
  ) -> dict:
101
121
  """
102
- Calculates fit errors. [FIXED] Added clamps to prevent >1.0 probability errors.
122
+ FIXED VERSION - No cupy.errstate, proper GPU computation.
103
123
  """
104
124
  start_time = time.perf_counter()
105
125
  print(f"FUNCTION: NBumiCheckFitFS() | FILE: {cleaned_filename}")
106
126
 
127
+ # [GOVERNOR INTEGRATION] Adaptive chunk sizing
128
+ # [CRITICAL FIX] Increased multiplier to 20.0 to prevent VRAM overflow during dense expansion
107
129
  if chunk_size is None:
108
- chunk_size = get_optimal_chunk_size(cleaned_filename, multiplier=5.0, is_dense=True)
130
+ chunk_size = get_optimal_chunk_size(cleaned_filename, multiplier=20.0, is_dense=True)
109
131
 
132
+ # --- Phase 1: Initialization ---
133
+ print("Phase [1/2]: Initializing parameters and arrays on GPU...")
110
134
  vals = fit['vals']
111
135
  size_coeffs = NBumiFitDispVsMeanGPU(fit, suppress_plot=True)
112
136
 
137
+ # Must use float64 for precision
113
138
  tjs_gpu = cp.asarray(vals['tjs'].values, dtype=cp.float64)
114
139
  tis_gpu = cp.asarray(vals['tis'].values, dtype=cp.float64)
115
140
  total = vals['total']
@@ -119,41 +144,84 @@ def NBumiCheckFitFSGPU(
119
144
  mean_expression_gpu = tjs_gpu / nc
120
145
  log_mean_expression_gpu = cp.log(mean_expression_gpu)
121
146
  smoothed_size_gpu = cp.exp(size_coeffs[0] + size_coeffs[1] * log_mean_expression_gpu)
122
-
123
- # [FIX] Clamp smoothed size to prevent instability
124
- smoothed_size_gpu = cp.maximum(smoothed_size_gpu, 1e-8)
125
147
 
148
+ # Initialize result arrays
126
149
  row_ps_gpu = cp.zeros(ng, dtype=cp.float64)
127
150
  col_ps_gpu = cp.zeros(nc, dtype=cp.float64)
151
+ print("Phase [1/2]: COMPLETE")
128
152
 
129
- for i in range(0, nc, chunk_size):
130
- end_col = min(i + chunk_size, nc)
153
+ # --- Phase 2: Calculate Expected Dropouts ---
154
+ print("Phase [2/2]: Calculating expected dropouts from data chunks...")
155
+
156
+ # [GOVERNOR INTEGRATION] Removed naive calculation, utilizing Governor's chunk_size
157
+ optimal_chunk = chunk_size
158
+ print(f" Using governor chunk size: {optimal_chunk}")
159
+
160
+ for i in range(0, nc, optimal_chunk):
161
+ end_col = min(i + optimal_chunk, nc)
131
162
  print(f"Phase [2/2]: Processing: {end_col} of {nc} cells.", end='\r')
132
163
 
133
164
  tis_chunk_gpu = tis_gpu[i:end_col]
165
+
166
+ # Standard calculation without errstate
134
167
  mu_chunk_gpu = tjs_gpu[:, cp.newaxis] * tis_chunk_gpu[cp.newaxis, :] / total
135
168
 
136
- # [FIX] Safer power calculation
169
+ # Calculate p_is directly - CuPy handles overflow internally
137
170
  base = 1 + mu_chunk_gpu / smoothed_size_gpu[:, cp.newaxis]
138
171
  p_is_chunk_gpu = cp.power(base, -smoothed_size_gpu[:, cp.newaxis])
139
172
 
140
- # [FIX] Clamp probabilities to valid range [0, 1]
141
- p_is_chunk_gpu = cp.clip(p_is_chunk_gpu, 0.0, 1.0)
142
- p_is_chunk_gpu = cp.nan_to_num(p_is_chunk_gpu, nan=0.0)
173
+ # Handle any inf/nan values that might have occurred
174
+ p_is_chunk_gpu = cp.nan_to_num(p_is_chunk_gpu, nan=0.0, posinf=1.0, neginf=0.0)
143
175
 
176
+ # Sum results
144
177
  row_ps_gpu += p_is_chunk_gpu.sum(axis=1)
145
178
  col_ps_gpu[i:end_col] = p_is_chunk_gpu.sum(axis=0)
146
179
 
180
+ # Clean up
147
181
  del mu_chunk_gpu, p_is_chunk_gpu, base, tis_chunk_gpu
148
- cp.get_default_memory_pool().free_all_blocks()
182
+
183
+ # Periodic memory cleanup
184
+ mempool = cp.get_default_memory_pool()
185
+ if (i // optimal_chunk) % 10 == 0:
186
+ mempool.free_all_blocks()
149
187
 
150
188
  print(f"Phase [2/2]: COMPLETE{' ' * 50}")
151
189
 
190
+ # Move results to CPU
152
191
  row_ps_cpu = row_ps_gpu.get()
153
192
  col_ps_cpu = col_ps_gpu.get()
154
193
  djs_cpu = vals['djs'].values
155
194
  dis_cpu = vals['dis'].values
156
195
 
196
+ # Plotting
197
+ if not suppress_plot:
198
+ plt.figure(figsize=(12, 5))
199
+ plt.subplot(1, 2, 1)
200
+ plt.scatter(djs_cpu, row_ps_cpu, alpha=0.5, s=10)
201
+ plt.title("Gene-specific Dropouts (Smoothed)")
202
+ plt.xlabel("Observed")
203
+ plt.ylabel("Fit")
204
+ lims = [min(plt.xlim()[0], plt.ylim()[0]), max(plt.xlim()[1], plt.ylim()[1])]
205
+ plt.plot(lims, lims, 'r-', alpha=0.75, zorder=0, label="y=x line")
206
+ plt.grid(True); plt.legend()
207
+
208
+ plt.subplot(1, 2, 2)
209
+ plt.scatter(dis_cpu, col_ps_cpu, alpha=0.5, s=10)
210
+ plt.title("Cell-specific Dropouts (Smoothed)")
211
+ plt.xlabel("Observed")
212
+ plt.ylabel("Expected")
213
+ lims = [min(plt.xlim()[0], plt.ylim()[0]), max(plt.xlim()[1], plt.ylim()[1])]
214
+ plt.plot(lims, lims, 'r-', alpha=0.75, zorder=0, label="y=x line")
215
+ plt.grid(True); plt.legend()
216
+
217
+ plt.tight_layout()
218
+ if plot_filename:
219
+ plt.savefig(plot_filename, dpi=300, bbox_inches='tight')
220
+ print(f"STATUS: Diagnostic plot saved to '{plot_filename}'")
221
+ plt.show()
222
+ plt.close()
223
+
224
+ # Calculate errors
157
225
  gene_error = np.sum((djs_cpu - row_ps_cpu)**2)
158
226
  cell_error = np.sum((dis_cpu - col_ps_cpu)**2)
159
227
 
@@ -177,90 +245,134 @@ def NBumiCompareModelsGPU(
177
245
  plot_filename=None
178
246
  ) -> dict:
179
247
  """
180
- OPTIMIZED VERSION (IN-MEMORY):
181
- - Calculates Basic Fit without writing 46GB file.
248
+ OPTIMIZED VERSION - Faster normalization and sparse matrix writing.
182
249
  """
183
250
  pipeline_start_time = time.time()
184
251
  print(f"FUNCTION: NBumiCompareModels() | Comparing models for {cleaned_filename}")
185
252
 
253
+ # [GOVERNOR INTEGRATION] Calculate chunk size for normalization phase (heavy IO)
186
254
  if chunk_size is None:
187
- chunk_size = get_optimal_chunk_size(cleaned_filename, multiplier=12.0, is_dense=False)
188
-
189
- print("Phase [1/3]: Calculating Basic Model (Depth-Normalized) variance on-the-fly...")
255
+ # Multiplier 10.0 for safety during normalization of massive dense expansion
256
+ chunk_size = get_optimal_chunk_size(cleaned_filename, multiplier=10.0, is_dense=True)
257
+
258
+ # --- Phase 1: OPTIMIZED Normalization ---
259
+ print("Phase [1/4]: Creating temporary 'basic' normalized data file...")
260
+ basic_norm_filename = cleaned_filename.replace('.h5ad', '_basic_norm.h5ad')
261
+
262
+ # Read metadata. In 'backed' mode, this keeps a file handle open.
263
+ adata_meta = anndata.read_h5ad(cleaned_filename, backed='r')
264
+ nc, ng = adata_meta.shape
265
+ obs_df = adata_meta.obs.copy()
266
+ var_df = adata_meta.var.copy()
190
267
 
191
- # 1. Prepare Size Factors
192
- tjs = stats['tjs'].values
193
- tis = stats['tis'].values
194
- nc, ng = stats['nc'], stats['ng']
268
+ cell_sums = stats['tis'].values
269
+ median_sum = np.median(cell_sums[cell_sums > 0])
195
270
 
196
- median_sum = np.median(tis[tis > 0])
197
- size_factors = np.ones_like(tis, dtype=np.float32)
198
- non_zero_mask = tis > 0
199
- size_factors[non_zero_mask] = tis[non_zero_mask] / median_sum
271
+ # Avoid division by zero for cells with zero counts
272
+ size_factors = np.ones_like(cell_sums, dtype=np.float32)
273
+ non_zero_mask = cell_sums > 0
274
+ size_factors[non_zero_mask] = cell_sums[non_zero_mask] / median_sum
275
+
276
+ adata_out = anndata.AnnData(obs=obs_df, var=var_df)
277
+ # [OPTION 2 CHANGE] Removed compression="gzip" to speed up I/O
278
+ adata_out.write_h5ad(basic_norm_filename)
279
+
280
+ with h5py.File(basic_norm_filename, 'a') as f_out:
281
+ if 'X' in f_out:
282
+ del f_out['X']
283
+ x_group_out = f_out.create_group('X')
284
+ x_group_out.attrs['encoding-type'] = 'csr_matrix'
285
+ x_group_out.attrs['encoding-version'] = '0.1.0'
286
+ x_group_out.attrs['shape'] = np.array([nc, ng], dtype='int64')
287
+
288
+ out_data = x_group_out.create_dataset('data', shape=(0,), maxshape=(None,), dtype='float32')
289
+ out_indices = x_group_out.create_dataset('indices', shape=(0,), maxshape=(None,), dtype='int32')
290
+ out_indptr = x_group_out.create_dataset('indptr', shape=(nc + 1,), dtype='int64')
291
+ out_indptr[0] = 0
292
+ current_nnz = 0
293
+
294
+ with h5py.File(cleaned_filename, 'r') as f_in:
295
+ h5_indptr = f_in['X']['indptr']
296
+ h5_data = f_in['X']['data']
297
+ h5_indices = f_in['X']['indices']
298
+
299
+ for i in range(0, nc, chunk_size):
300
+ end_row = min(i + chunk_size, nc)
301
+ print(f"Phase [1/4]: Normalizing: {end_row} of {nc} cells.", end='\r')
302
+
303
+ start_idx, end_idx = h5_indptr[i], h5_indptr[end_row]
304
+ if start_idx == end_idx:
305
+ out_indptr[i + 1 : end_row + 1] = current_nnz
306
+ continue
307
+
308
+ # Read data for the chunk
309
+ data_slice = h5_data[start_idx:end_idx]
310
+ indices_slice = h5_indices[start_idx:end_idx]
311
+ indptr_slice = h5_indptr[i:end_row + 1] - start_idx
312
+
313
+ # Move to GPU for fast normalization
314
+ data_gpu = cp.asarray(data_slice.copy(), dtype=cp.float32)
315
+
316
+ indptr_gpu = cp.asarray(indptr_slice.copy())
317
+ nnz_in_chunk = indptr_gpu[-1].item()
318
+ cell_boundary_markers = cp.zeros(nnz_in_chunk, dtype=cp.int32)
319
+ if len(indptr_gpu) > 1:
320
+ cell_boundary_markers[indptr_gpu[:-1]] = 1
321
+ row_indices = cp.cumsum(cell_boundary_markers, axis=0) - 1
322
+
323
+ size_factors_for_chunk = cp.asarray(size_factors[i:end_row])
324
+
325
+ data_gpu /= size_factors_for_chunk[row_indices]
326
+
327
+ # [RESTORED LEGACY LOGIC] Rounding matches original file.
328
+ data_cpu = np.round(data_gpu.get())
329
+
330
+ num_cells_in_chunk = end_row - i
331
+ chunk_sp = sp_csr_matrix((data_cpu, indices_slice, indptr_slice),
332
+ shape=(num_cells_in_chunk, ng))
333
+
334
+ nnz_chunk = chunk_sp.nnz
335
+ out_data.resize(current_nnz + nnz_chunk, axis=0)
336
+ out_data[current_nnz:] = chunk_sp.data
337
+
338
+ out_indices.resize(current_nnz + nnz_chunk, axis=0)
339
+ out_indices[current_nnz:] = chunk_sp.indices
340
+
341
+ new_indptr_list = chunk_sp.indptr[1:].astype(np.int64) + current_nnz
342
+ out_indptr[i + 1 : end_row + 1] = new_indptr_list
343
+
344
+ current_nnz += nnz_chunk
345
+
346
+ del data_gpu, row_indices, size_factors_for_chunk, indptr_gpu
347
+ cp.get_default_memory_pool().free_all_blocks()
348
+
349
+ print(f"Phase [1/4]: COMPLETE{' '*50}")
350
+
351
+ print("Phase [2/4]: Fitting Basic Model on normalized data...")
200
352
 
201
- sum_x_sq_gpu = cp.zeros(ng, dtype=cp.float64)
202
- sum_x_gpu = cp.zeros(ng, dtype=cp.float64)
353
+ # [GOVERNOR INTEGRATION] Calculate chunk size for basic fit on the heavy normalized file
354
+ chunk_size_basic = get_optimal_chunk_size(basic_norm_filename, multiplier=10.0, is_dense=True)
203
355
 
204
- with h5py.File(cleaned_filename, 'r') as f_in:
205
- h5_indptr = f_in['X']['indptr']
206
- h5_data = f_in['X']['data']
207
- h5_indices = f_in['X']['indices']
208
-
209
- for i in range(0, nc, chunk_size):
210
- end_row = min(i + chunk_size, nc)
211
- print(f"Phase [1/3]: Processing: {end_row} of {nc} cells.", end='\r')
212
-
213
- start_idx, end_idx = h5_indptr[i], h5_indptr[end_row]
214
- if start_idx == end_idx: continue
215
-
216
- data_gpu = cp.asarray(h5_data[start_idx:end_idx], dtype=cp.float32)
217
- indices_gpu = cp.asarray(h5_indices[start_idx:end_idx])
218
- indptr_gpu = cp.asarray(h5_indptr[i:end_row + 1] - start_idx)
219
-
220
- nnz_in_chunk = indptr_gpu[-1].item()
221
- cell_boundary_markers = cp.zeros(nnz_in_chunk, dtype=cp.int32)
222
- if len(indptr_gpu) > 1:
223
- cell_boundary_markers[indptr_gpu[:-1]] = 1
224
- row_indices = cp.cumsum(cell_boundary_markers, axis=0) - 1
225
-
226
- sf_chunk = cp.asarray(size_factors[i:end_row])
227
-
228
- # Normalize
229
- data_gpu /= sf_chunk[row_indices]
230
-
231
- # Accumulate
232
- cp.add.at(sum_x_sq_gpu, indices_gpu, data_gpu**2)
233
- cp.add.at(sum_x_gpu, indices_gpu, data_gpu)
234
-
235
- del data_gpu, indices_gpu, indptr_gpu, row_indices, sf_chunk, cell_boundary_markers
236
- cp.get_default_memory_pool().free_all_blocks()
237
-
238
- print(f"Phase [1/3]: COMPLETE{' '*50}")
239
-
240
- mean_x_sq_gpu = sum_x_sq_gpu / nc
241
- mean_mu_gpu = sum_x_gpu / nc
242
- my_rowvar_gpu = mean_x_sq_gpu - mean_mu_gpu**2
356
+ stats_basic = hidden_calc_valsGPU(basic_norm_filename) # hidden_calc uses its own governor internally
357
+ fit_basic = NBumiFitBasicModelGPU(basic_norm_filename, stats_basic, chunk_size=chunk_size_basic)
358
+ print("Phase [2/4]: COMPLETE")
243
359
 
244
- size_gpu = mean_mu_gpu**2 / (my_rowvar_gpu - mean_mu_gpu)
360
+ print("Phase [3/4]: Evaluating fits of both models on ORIGINAL data...")
361
+ # [GOVERNOR INTEGRATION] Chunk size for check fit
362
+ # [CRITICAL FIX] Multiplier 20.0 prevents VRAM overflow
363
+ chunk_size_check = get_optimal_chunk_size(cleaned_filename, multiplier=20.0, is_dense=True)
245
364
 
246
- max_size_val = cp.nanmax(size_gpu) * 10
247
- if cp.isnan(max_size_val): max_size_val = 1000
248
- size_gpu[cp.isnan(size_gpu) | (size_gpu <= 0)] = max_size_val
249
- size_gpu[size_gpu < 1e-10] = 1e-10
365
+ check_adjust = NBumiCheckFitFSGPU(cleaned_filename, fit_adjust, suppress_plot=True, chunk_size=chunk_size_check)
250
366
 
251
- fit_basic = {
252
- 'sizes': pd.Series(size_gpu.get(), index=stats['tjs'].index),
367
+ fit_basic_for_eval = {
368
+ 'sizes': fit_basic['sizes'],
253
369
  'vals': stats,
254
- 'var_obs': pd.Series(my_rowvar_gpu.get(), index=stats['tjs'].index)
370
+ 'var_obs': fit_basic['var_obs']
255
371
  }
256
-
257
- print("Phase [2/3]: Evaluating fit errors on ORIGINAL data...")
258
- check_adjust = NBumiCheckFitFSGPU(cleaned_filename, fit_adjust, suppress_plot=True)
259
- check_basic = NBumiCheckFitFSGPU(cleaned_filename, fit_basic, suppress_plot=True)
260
- print("Phase [2/3]: COMPLETE")
372
+ check_basic = NBumiCheckFitFSGPU(cleaned_filename, fit_basic_for_eval, suppress_plot=True, chunk_size=chunk_size_check)
373
+ print("Phase [3/4]: COMPLETE")
261
374
 
262
- print("Phase [3/3]: Generating comparison...")
263
-
375
+ print("Phase [4/4]: Generating final comparison...")
264
376
  nc_data = stats['nc']
265
377
  mean_expr = stats['tjs'] / nc_data
266
378
  observed_dropout = stats['djs'] / nc_data
@@ -303,8 +415,15 @@ def NBumiCompareModelsGPU(
303
415
  plt.show()
304
416
 
305
417
  plt.close()
306
-
418
+ print("Phase [4/4]: COMPLETE")
419
+
307
420
  pipeline_end_time = time.time()
421
+
422
+ # --- ADD THIS LINE TO FIX THE ERROR ---
423
+ adata_meta.file.close() # Explicitly close the file handle
424
+
425
+ os.remove(basic_norm_filename)
426
+ print(f"STATUS: Temporary file '{basic_norm_filename}' removed.")
308
427
  print(f"Total time: {pipeline_end_time - pipeline_start_time:.2f} seconds.\n")
309
428
 
310
429
  return {
@@ -322,11 +441,14 @@ def NBumiPlotDispVsMeanGPU(
322
441
  """
323
442
  print("FUNCTION: NBumiPlotDispVsMean()")
324
443
 
444
+ # --- 1. Extract data and regression coefficients ---
325
445
  mean_expression = fit['vals']['tjs'].values / fit['vals']['nc']
326
446
  sizes = fit['sizes'].values
327
447
  coeffs = NBumiFitDispVsMeanGPU(fit, suppress_plot=True)
328
448
  intercept, slope = coeffs[0], coeffs[1]
329
449
 
450
+ # --- 2. Calculate the fitted line for plotting ---
451
+ # Create a smooth, continuous line using the regression coefficients
330
452
  log_mean_expr_range = np.linspace(
331
453
  np.log(mean_expression[mean_expression > 0].min()),
332
454
  np.log(mean_expression.max()),
@@ -335,6 +457,7 @@ def NBumiPlotDispVsMeanGPU(
335
457
  log_fitted_sizes = intercept + slope * log_mean_expr_range
336
458
  fitted_sizes = np.exp(log_fitted_sizes)
337
459
 
460
+ # --- 3. Create the plot ---
338
461
  plt.figure(figsize=(8, 6))
339
462
  plt.scatter(mean_expression, sizes, label='Observed Dispersion', alpha=0.5, s=8)
340
463
  plt.plot(np.exp(log_mean_expr_range), fitted_sizes, color='red', label='Regression Fit', linewidth=2)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: M3Drop
3
- Version: 0.4.39
3
+ Version: 0.4.41
4
4
  Summary: A Python implementation of the M3Drop single-cell RNA-seq analysis tool.
5
5
  Home-page: https://github.com/PragalvhaSharma/m3DropNew
6
6
  Author: Tallulah Andrews
@@ -2,11 +2,11 @@ m3Drop/__init__.py,sha256=yaUXhUArnwgLf01Zlpqa5qm9K1aByGqQupIoCaLYiDw,2462
2
2
  m3Drop/coreCPU.py,sha256=3kPYlSVlYrJEhRUCIoVzmR8CYBaHpxVM5nx-3YQI4d4,17204
3
3
  m3Drop/coreGPU.py,sha256=k7A06VNgfJ59J8g1VpfKxhTIKrEbW7Bj8pTbQqHaQL8,24571
4
4
  m3Drop/diagnosticsCPU.py,sha256=BecOKTz2GDjzjs9ycXYsyrSHi2UVgsM58RBuNE62vmU,14273
5
- m3Drop/diagnosticsGPU.py,sha256=9yGsPOAjxVZxh_J2uL2pUwGyjPl0wXAkJ7f69qKLeOA,12814
5
+ m3Drop/diagnosticsGPU.py,sha256=0tDHZHVS14qg46p1AZcdX8DOnGmbYJ7ha0FFfKtmENg,18891
6
6
  m3Drop/normalizationCPU.py,sha256=4ulCrDZZjxVFh2y0i4ayPkNCsZYaOP-Xq2Dnzu9WXtg,5697
7
7
  m3Drop/normalizationGPU.py,sha256=r5gvJFkabEfCfIsVdpJzWGqve_Iy57EYsEyiLfDo8Mo,8539
8
- m3drop-0.4.39.dist-info/licenses/LICENSE,sha256=44Iqpp8Fc10Xzd5T7cT9UhO31Qftk3gBiCjtpwilP_k,1074
9
- m3drop-0.4.39.dist-info/METADATA,sha256=YdxFQNg4hI07uRADUE3JQ8i7AmAjpHGSLJsHmBLSvxM,5161
10
- m3drop-0.4.39.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
11
- m3drop-0.4.39.dist-info/top_level.txt,sha256=AEULFEFIgFtAwS-KBlIFoYXrqczX_rwqrEcdK46GIrA,7
12
- m3drop-0.4.39.dist-info/RECORD,,
8
+ m3drop-0.4.41.dist-info/licenses/LICENSE,sha256=44Iqpp8Fc10Xzd5T7cT9UhO31Qftk3gBiCjtpwilP_k,1074
9
+ m3drop-0.4.41.dist-info/METADATA,sha256=5jDbZa9PGiqBAv-TBPgGPqz3nCjMHiEDWdNw9qwPSyA,5161
10
+ m3drop-0.4.41.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
11
+ m3drop-0.4.41.dist-info/top_level.txt,sha256=AEULFEFIgFtAwS-KBlIFoYXrqczX_rwqrEcdK46GIrA,7
12
+ m3drop-0.4.41.dist-info/RECORD,,