M3Drop 0.4.34__py3-none-any.whl → 0.4.36__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
m3Drop/diagnosticsGPU.py CHANGED
@@ -1,395 +1,124 @@
1
- import numpy as np
2
- import pandas as pd
3
- import cupy as cp
4
- import cupyx.scipy.sparse as csp
5
- import matplotlib.pyplot as plt
6
- import h5py
7
- import os
8
- import time
9
- import psutil
10
- import gc
11
- from scipy import sparse
12
- from scipy import stats
13
- import anndata # <--- FIXED: Added missing import
14
-
15
- # [GOVERNOR INTEGRATION] Added get_optimal_chunk_size
16
- from .coreGPU import hidden_calc_valsGPU, NBumiFitModelGPU, NBumiFitDispVsMeanGPU, get_optimal_chunk_size
17
- from cupy.sparse import csr_matrix as cp_csr_matrix
18
- import scipy.sparse as sp
19
- from scipy.sparse import csr_matrix as sp_csr_matrix
20
-
21
- import statsmodels.api as sm
22
- from scipy.stats import norm
23
- from statsmodels.stats.multitest import multipletests
24
-
25
- def NBumiFitBasicModelGPU(
1
+ def NBumiCompareModelsGPU(
2
+ raw_filename: str,
26
3
  cleaned_filename: str,
27
4
  stats: dict,
28
- is_logged=False,
29
- chunk_size: int = None
5
+ fit_adjust: dict,
6
+ chunk_size: int = None,
7
+ suppress_plot=False,
8
+ plot_filename=None
30
9
  ) -> dict:
31
10
  """
32
- Fits a simpler, unadjusted NB model out-of-core using a GPU-accelerated
33
- algorithm. Designed to work with a standard (cell, gene) sparse matrix.
11
+ OPTIMIZED VERSION (IN-MEMORY):
12
+ - Eliminates the 46GB '_basic_norm.h5ad' temporary file.
13
+ - Performs depth normalization and variance calculation on-the-fly in GPU VRAM.
34
14
  """
35
- start_time = time.perf_counter()
36
- print(f"FUNCTION: NBumiFitBasicModel() | FILE: {cleaned_filename}")
15
+ pipeline_start_time = time.time()
16
+ print(f"FUNCTION: NBumiCompareModels() | Comparing models for {cleaned_filename}")
37
17
 
38
- # [GOVERNOR INTEGRATION] Calculate optimal chunk size if not provided
18
+ # [GOVERNOR] High multiplier (12.0) because we hold Raw + Norm + Square in VRAM
39
19
  if chunk_size is None:
40
- chunk_size = get_optimal_chunk_size(cleaned_filename, multiplier=3.0, is_dense=True)
20
+ chunk_size = get_optimal_chunk_size(raw_filename, multiplier=12.0, is_dense=False)
41
21
 
42
- # --- Phase 1: Initialization ---
43
- print("Phase [1/2]: Initializing parameters and arrays on GPU...")
44
- tjs = stats['tjs'].values
22
+ # --- Phase 1: In-Memory "Basic Fit" (Normalization + Variance) ---
23
+ print("Phase [1/3]: Calculating Basic Model (Depth-Normalized) variance on-the-fly...")
24
+
25
+ # 1. Prepare Size Factors (CPU)
26
+ tjs = stats['tjs'].values # Gene sums (needed for final dataframe)
27
+ tis = stats['tis'].values # Cell sums (needed for size factors)
45
28
  nc, ng = stats['nc'], stats['ng']
46
-
47
- tjs_gpu = cp.asarray(tjs, dtype=cp.float64)
29
+
30
+ median_sum = np.median(tis[tis > 0])
31
+ size_factors = np.ones_like(tis, dtype=np.float32)
32
+ non_zero_mask = tis > 0
33
+ size_factors[non_zero_mask] = tis[non_zero_mask] / median_sum
34
+
35
+ # 2. Prepare GPU Arrays
48
36
  sum_x_sq_gpu = cp.zeros(ng, dtype=cp.float64)
49
- print("Phase [1/2]: COMPLETE")
50
-
51
- # --- Phase 2: Calculate Variance from Data Chunks ---
52
- print("Phase [2/2]: Calculating variance from data chunks...")
53
- with h5py.File(cleaned_filename, 'r') as f_in:
54
- x_group = f_in['X']
55
- h5_indptr = x_group['indptr']
56
- h5_data = x_group['data']
57
- h5_indices = x_group['indices']
37
+ sum_x_gpu = cp.zeros(ng, dtype=cp.float64) # Need sum(x) to calc mean(x) for variance
38
+
39
+ # 3. GPU Loop (Raw Data -> Normalize -> Accumulate)
40
+ with h5py.File(raw_filename, 'r') as f_in:
41
+ h5_indptr = f_in['X']['indptr']
42
+ h5_data = f_in['X']['data']
43
+ h5_indices = f_in['X']['indices']
58
44
 
59
45
  for i in range(0, nc, chunk_size):
60
46
  end_row = min(i + chunk_size, nc)
61
- print(f"Phase [2/2]: Processing: {end_row} of {nc} cells.", end='\r')
62
-
47
+ print(f"Phase [1/3]: Processing: {end_row} of {nc} cells.", end='\r')
48
+
63
49
  start_idx, end_idx = h5_indptr[i], h5_indptr[end_row]
64
- if start_idx == end_idx:
65
- continue
50
+ if start_idx == end_idx: continue
51
+
52
+ # Load Raw Chunk
53
+ data_gpu = cp.asarray(h5_data[start_idx:end_idx], dtype=cp.float32)
54
+ indices_gpu = cp.asarray(h5_indices[start_idx:end_idx])
55
+ indptr_gpu = cp.asarray(h5_indptr[i:end_row + 1] - start_idx)
56
+
57
+ # Expand Size Factors to match Data Structure
58
+ # (Map cell's size factor to every non-zero gene in that cell)
59
+ nnz_in_chunk = indptr_gpu[-1].item()
60
+ cell_boundary_markers = cp.zeros(nnz_in_chunk, dtype=cp.int32)
61
+ if len(indptr_gpu) > 1:
62
+ cell_boundary_markers[indptr_gpu[:-1]] = 1
63
+ # row_indices maps every data point to its cell index (0 to chunk_size)
64
+ row_indices = cp.cumsum(cell_boundary_markers, axis=0) - 1
66
65
 
67
- # Process in smaller sub-chunks if needed
68
- max_elements = 5_000_000 # Process max 5M elements at a time
66
+ # Get size factors for this chunk
67
+ sf_chunk = cp.asarray(size_factors[i:end_row])
69
68
 
70
- if end_idx - start_idx > max_elements:
71
- # Process in sub-chunks
72
- for sub_start in range(start_idx, end_idx, max_elements):
73
- sub_end = min(sub_start + max_elements, end_idx)
74
-
75
- data_slice = h5_data[sub_start:sub_end]
76
- indices_slice = h5_indices[sub_start:sub_end]
77
-
78
- data_gpu = cp.asarray(data_slice, dtype=cp.float64)
79
- indices_gpu = cp.asarray(indices_slice)
80
-
81
- # Accumulate the sum of squares for each gene
82
- cp.add.at(sum_x_sq_gpu, indices_gpu, data_gpu**2)
83
-
84
- # Free GPU memory
85
- del data_gpu, indices_gpu
86
- cp.get_default_memory_pool().free_all_blocks()
87
- else:
88
- # Original processing for smaller chunks
89
- data_slice = h5_data[start_idx:end_idx]
90
- indices_slice = h5_indices[start_idx:end_idx]
69
+ # --- THE MAGIC: On-the-Fly Normalization ---
70
+ # data_norm = data_raw / size_factor
71
+ data_gpu /= sf_chunk[row_indices]
72
+
73
+ # Accumulate for Variance: E[X^2] and E[X]
74
+ cp.add.at(sum_x_sq_gpu, indices_gpu, data_gpu**2)
75
+ cp.add.at(sum_x_gpu, indices_gpu, data_gpu)
76
+
77
+ # Clean up VRAM
78
+ del data_gpu, indices_gpu, indptr_gpu, row_indices, sf_chunk, cell_boundary_markers
79
+ cp.get_default_memory_pool().free_all_blocks()
91
80
 
92
- data_gpu = cp.asarray(data_slice, dtype=cp.float64)
93
- indices_gpu = cp.asarray(indices_slice)
81
+ print(f"Phase [1/3]: COMPLETE{' '*50}")
94
82
 
95
- # Accumulate the sum of squares for each gene
96
- cp.add.at(sum_x_sq_gpu, indices_gpu, data_gpu**2)
97
-
98
- # Clean up
99
- del data_gpu, indices_gpu
100
- cp.get_default_memory_pool().free_all_blocks()
83
+ # 4. Finalize Basic Statistics
84
+ # Var(X) = E[X^2] - (E[X])^2
85
+ mean_x_sq_gpu = sum_x_sq_gpu / nc
86
+ mean_mu_gpu = sum_x_gpu / nc
87
+ my_rowvar_gpu = mean_x_sq_gpu - mean_mu_gpu**2
101
88
 
102
- print(f"Phase [2/2]: COMPLETE ")
103
-
104
- # --- Final calculations on GPU ---
105
- if is_logged:
106
- raise NotImplementedError("Logged data variance calculation is not implemented for out-of-core.")
107
- else:
108
- # Variance of raw data: Var(X) = E[X^2] - E[X]^2
109
- mean_x_sq_gpu = sum_x_sq_gpu / nc
110
- mean_mu_gpu = tjs_gpu / nc
111
- my_rowvar_gpu = mean_x_sq_gpu - mean_mu_gpu**2
112
-
113
- # Calculate dispersion ('size')
114
- size_gpu = mean_mu_gpu**2 / (my_rowvar_gpu - mean_mu_gpu)
89
+ # Dispersion = Mean^2 / (Var - Mean)
90
+ size_gpu = mean_mu_gpu**2 / (my_rowvar_gpu - mean_mu_gpu)
115
91
 
92
+ # Safety Clamping (Same as original)
116
93
  max_size_val = cp.nanmax(size_gpu) * 10
117
- if cp.isnan(max_size_val):
118
- max_size_val = 1000
94
+ if cp.isnan(max_size_val): max_size_val = 1000
119
95
  size_gpu[cp.isnan(size_gpu) | (size_gpu <= 0)] = max_size_val
120
96
  size_gpu[size_gpu < 1e-10] = 1e-10
121
97
 
122
- # Move results to CPU
123
- my_rowvar_cpu = my_rowvar_gpu.get()
124
- sizes_cpu = size_gpu.get()
125
-
126
- end_time = time.perf_counter()
127
- print(f"Total time: {end_time - start_time:.2f} seconds.\n")
128
-
129
- return {
130
- 'var_obs': pd.Series(my_rowvar_cpu, index=stats['tjs'].index),
131
- 'sizes': pd.Series(sizes_cpu, index=stats['tjs'].index),
132
- 'vals': stats
98
+ # Construct "Basic Fit" Object
99
+ fit_basic = {
100
+ 'sizes': pd.Series(size_gpu.get(), index=stats['tjs'].index),
101
+ 'vals': stats,
102
+ 'var_obs': pd.Series(my_rowvar_gpu.get(), index=stats['tjs'].index)
133
103
  }
134
-
135
- def NBumiCheckFitFSGPU(
136
- cleaned_filename: str,
137
- fit: dict,
138
- chunk_size: int = None,
139
- suppress_plot=False,
140
- plot_filename=None
141
- ) -> dict:
142
- """
143
- FIXED VERSION - No cupy.errstate, proper GPU computation.
144
- """
145
- start_time = time.perf_counter()
146
- print(f"FUNCTION: NBumiCheckFitFS() | FILE: {cleaned_filename}")
147
-
148
- # [GOVERNOR INTEGRATION] Adaptive chunk sizing
149
- if chunk_size is None:
150
- chunk_size = get_optimal_chunk_size(cleaned_filename, multiplier=5.0, is_dense=True)
151
-
152
- # --- Phase 1: Initialization ---
153
- print("Phase [1/2]: Initializing parameters and arrays on GPU...")
154
- vals = fit['vals']
155
- size_coeffs = NBumiFitDispVsMeanGPU(fit, suppress_plot=True)
156
-
157
- # Must use float64 for precision
158
- tjs_gpu = cp.asarray(vals['tjs'].values, dtype=cp.float64)
159
- tis_gpu = cp.asarray(vals['tis'].values, dtype=cp.float64)
160
- total = vals['total']
161
- nc, ng = vals['nc'], vals['ng']
162
-
163
- # Calculate smoothed size
164
- mean_expression_gpu = tjs_gpu / nc
165
- log_mean_expression_gpu = cp.log(mean_expression_gpu)
166
- smoothed_size_gpu = cp.exp(size_coeffs[0] + size_coeffs[1] * log_mean_expression_gpu)
167
-
168
- # Initialize result arrays
169
- row_ps_gpu = cp.zeros(ng, dtype=cp.float64)
170
- col_ps_gpu = cp.zeros(nc, dtype=cp.float64)
171
- print("Phase [1/2]: COMPLETE")
172
-
173
- # --- Phase 2: Calculate Expected Dropouts ---
174
- print("Phase [2/2]: Calculating expected dropouts from data chunks...")
175
104
 
176
- # [GOVERNOR INTEGRATION] Removed naive calculation, utilizing Governor's chunk_size
177
- optimal_chunk = chunk_size
178
- print(f" Using governor chunk size: {optimal_chunk}")
105
+ # --- Phase 2: Check Fit (Calculate Errors) ---
106
+ print("Phase [2/3]: Evaluating fit errors on ORIGINAL data...")
179
107
 
180
- for i in range(0, nc, optimal_chunk):
181
- end_col = min(i + optimal_chunk, nc)
182
- print(f"Phase [2/2]: Processing: {end_col} of {nc} cells.", end='\r')
183
-
184
- tis_chunk_gpu = tis_gpu[i:end_col]
185
-
186
- # Standard calculation without errstate
187
- mu_chunk_gpu = tjs_gpu[:, cp.newaxis] * tis_chunk_gpu[cp.newaxis, :] / total
188
-
189
- # Calculate p_is directly - CuPy handles overflow internally
190
- base = 1 + mu_chunk_gpu / smoothed_size_gpu[:, cp.newaxis]
191
- p_is_chunk_gpu = cp.power(base, -smoothed_size_gpu[:, cp.newaxis])
192
-
193
- # Handle any inf/nan values that might have occurred
194
- p_is_chunk_gpu = cp.nan_to_num(p_is_chunk_gpu, nan=0.0, posinf=1.0, neginf=0.0)
195
-
196
- # Sum results
197
- row_ps_gpu += p_is_chunk_gpu.sum(axis=1)
198
- col_ps_gpu[i:end_col] = p_is_chunk_gpu.sum(axis=0)
199
-
200
- # Clean up
201
- del mu_chunk_gpu, p_is_chunk_gpu, base, tis_chunk_gpu
202
-
203
- # Periodic memory cleanup
204
- mempool = cp.get_default_memory_pool()
205
- if (i // optimal_chunk) % 10 == 0:
206
- mempool.free_all_blocks()
207
-
208
- print(f"Phase [2/2]: COMPLETE{' ' * 50}")
209
-
210
- # Move results to CPU
211
- row_ps_cpu = row_ps_gpu.get()
212
- col_ps_cpu = col_ps_gpu.get()
213
- djs_cpu = vals['djs'].values
214
- dis_cpu = vals['dis'].values
215
-
216
- # Plotting
217
- if not suppress_plot:
218
- plt.figure(figsize=(12, 5))
219
- plt.subplot(1, 2, 1)
220
- plt.scatter(djs_cpu, row_ps_cpu, alpha=0.5, s=10)
221
- plt.title("Gene-specific Dropouts (Smoothed)")
222
- plt.xlabel("Observed")
223
- plt.ylabel("Fit")
224
- lims = [min(plt.xlim()[0], plt.ylim()[0]), max(plt.xlim()[1], plt.ylim()[1])]
225
- plt.plot(lims, lims, 'r-', alpha=0.75, zorder=0, label="y=x line")
226
- plt.grid(True); plt.legend()
227
-
228
- plt.subplot(1, 2, 2)
229
- plt.scatter(dis_cpu, col_ps_cpu, alpha=0.5, s=10)
230
- plt.title("Cell-specific Dropouts (Smoothed)")
231
- plt.xlabel("Observed")
232
- plt.ylabel("Expected")
233
- lims = [min(plt.xlim()[0], plt.ylim()[0]), max(plt.xlim()[1], plt.ylim()[1])]
234
- plt.plot(lims, lims, 'r-', alpha=0.75, zorder=0, label="y=x line")
235
- plt.grid(True); plt.legend()
236
-
237
- plt.tight_layout()
238
- if plot_filename:
239
- plt.savefig(plot_filename, dpi=300, bbox_inches='tight')
240
- print(f"STATUS: Diagnostic plot saved to '{plot_filename}'")
241
- plt.show()
242
- plt.close()
243
-
244
- # Calculate errors
245
- gene_error = np.sum((djs_cpu - row_ps_cpu)**2)
246
- cell_error = np.sum((dis_cpu - col_ps_cpu)**2)
247
-
248
- end_time = time.perf_counter()
249
- print(f"Total time: {end_time - start_time:.2f} seconds.\n")
250
-
251
- return {
252
- 'gene_error': gene_error,
253
- 'cell_error': cell_error,
254
- 'rowPs': pd.Series(row_ps_cpu, index=fit['vals']['tjs'].index),
255
- 'colPs': pd.Series(col_ps_cpu, index=fit['vals']['tis'].index)
256
- }
257
-
258
- def NBumiCompareModelsGPU(
259
- raw_filename: str,
260
- cleaned_filename: str,
261
- stats: dict,
262
- fit_adjust: dict,
263
- chunk_size: int = None,
264
- suppress_plot=False,
265
- plot_filename=None
266
- ) -> dict:
267
- """
268
- OPTIMIZED VERSION - Faster normalization and sparse matrix writing.
269
- """
270
- pipeline_start_time = time.time()
271
- print(f"FUNCTION: NBumiCompareModels() | Comparing models for {cleaned_filename}")
272
-
273
- # [GOVERNOR INTEGRATION] Calculate chunk size for normalization phase (heavy IO)
274
- if chunk_size is None:
275
- # Multiplier 10.0 for safety during normalization of massive dense expansion
276
- chunk_size = get_optimal_chunk_size(cleaned_filename, multiplier=10.0, is_dense=True)
277
-
278
- # --- Phase 1: OPTIMIZED Normalization ---
279
- print("Phase [1/4]: Creating temporary 'basic' normalized data file...")
280
- basic_norm_filename = cleaned_filename.replace('.h5ad', '_basic_norm.h5ad')
281
-
282
- # Read metadata. In 'backed' mode, this keeps a file handle open.
283
- adata_meta = anndata.read_h5ad(cleaned_filename, backed='r')
284
- nc, ng = adata_meta.shape
285
- obs_df = adata_meta.obs.copy()
286
- var_df = adata_meta.var.copy()
287
-
288
- cell_sums = stats['tis'].values
289
- median_sum = np.median(cell_sums[cell_sums > 0])
108
+ # Check Adjust (M3Drop)
109
+ check_adjust = NBumiCheckFitFSGPU(
110
+ cleaned_filename, fit_adjust, suppress_plot=True, chunk_size=chunk_size
111
+ )
290
112
 
291
- # Avoid division by zero for cells with zero counts
292
- size_factors = np.ones_like(cell_sums, dtype=np.float32)
293
- non_zero_mask = cell_sums > 0
294
- size_factors[non_zero_mask] = cell_sums[non_zero_mask] / median_sum
295
-
296
- adata_out = anndata.AnnData(obs=obs_df, var=var_df)
297
- adata_out.write_h5ad(basic_norm_filename, compression="gzip")
298
-
299
- with h5py.File(basic_norm_filename, 'a') as f_out:
300
- if 'X' in f_out:
301
- del f_out['X']
302
- x_group_out = f_out.create_group('X')
303
- x_group_out.attrs['encoding-type'] = 'csr_matrix'
304
- x_group_out.attrs['encoding-version'] = '0.1.0'
305
- x_group_out.attrs['shape'] = np.array([nc, ng], dtype='int64')
306
-
307
- out_data = x_group_out.create_dataset('data', shape=(0,), maxshape=(None,), dtype='float32')
308
- out_indices = x_group_out.create_dataset('indices', shape=(0,), maxshape=(None,), dtype='int32')
309
- out_indptr = x_group_out.create_dataset('indptr', shape=(nc + 1,), dtype='int64')
310
- out_indptr[0] = 0
311
- current_nnz = 0
312
-
313
- with h5py.File(cleaned_filename, 'r') as f_in:
314
- h5_indptr = f_in['X']['indptr']
315
- h5_data = f_in['X']['data']
316
- h5_indices = f_in['X']['indices']
317
-
318
- for i in range(0, nc, chunk_size):
319
- end_row = min(i + chunk_size, nc)
320
- print(f"Phase [1/4]: Normalizing: {end_row} of {nc} cells.", end='\r')
321
-
322
- start_idx, end_idx = h5_indptr[i], h5_indptr[end_row]
323
- if start_idx == end_idx:
324
- out_indptr[i + 1 : end_row + 1] = current_nnz
325
- continue
326
-
327
- # Read data for the chunk
328
- data_slice = h5_data[start_idx:end_idx]
329
- indices_slice = h5_indices[start_idx:end_idx]
330
- indptr_slice = h5_indptr[i:end_row + 1] - start_idx
331
-
332
- # Move to GPU for fast normalization
333
- data_gpu = cp.asarray(data_slice.copy(), dtype=cp.float32)
334
-
335
- indptr_gpu = cp.asarray(indptr_slice.copy())
336
- nnz_in_chunk = indptr_gpu[-1].item()
337
- cell_boundary_markers = cp.zeros(nnz_in_chunk, dtype=cp.int32)
338
- if len(indptr_gpu) > 1:
339
- cell_boundary_markers[indptr_gpu[:-1]] = 1
340
- row_indices = cp.cumsum(cell_boundary_markers, axis=0) - 1
341
-
342
- size_factors_for_chunk = cp.asarray(size_factors[i:end_row])
343
-
344
- data_gpu /= size_factors_for_chunk[row_indices]
345
-
346
- data_cpu = np.round(data_gpu.get())
347
-
348
- num_cells_in_chunk = end_row - i
349
- chunk_sp = sp_csr_matrix((data_cpu, indices_slice, indptr_slice),
350
- shape=(num_cells_in_chunk, ng))
351
-
352
- nnz_chunk = chunk_sp.nnz
353
- out_data.resize(current_nnz + nnz_chunk, axis=0)
354
- out_data[current_nnz:] = chunk_sp.data
355
-
356
- out_indices.resize(current_nnz + nnz_chunk, axis=0)
357
- out_indices[current_nnz:] = chunk_sp.indices
358
-
359
- new_indptr_list = chunk_sp.indptr[1:].astype(np.int64) + current_nnz
360
- out_indptr[i + 1 : end_row + 1] = new_indptr_list
361
-
362
- current_nnz += nnz_chunk
363
-
364
- del data_gpu, row_indices, size_factors_for_chunk, indptr_gpu
365
- cp.get_default_memory_pool().free_all_blocks()
366
-
367
- print(f"Phase [1/4]: COMPLETE{' '*50}")
113
+ # Check Basic (Depth-Norm)
114
+ check_basic = NBumiCheckFitFSGPU(
115
+ cleaned_filename, fit_basic, suppress_plot=True, chunk_size=chunk_size
116
+ )
117
+ print("Phase [2/3]: COMPLETE")
368
118
 
369
- print("Phase [2/4]: Fitting Basic Model on normalized data...")
370
-
371
- # [GOVERNOR INTEGRATION] Calculate chunk size for basic fit on the heavy normalized file
372
- chunk_size_basic = get_optimal_chunk_size(basic_norm_filename, multiplier=10.0, is_dense=True)
119
+ # --- Phase 3: Plotting & Comparison ---
120
+ print("Phase [3/3]: Generating comparison...")
373
121
 
374
- stats_basic = hidden_calc_valsGPU(basic_norm_filename) # hidden_calc uses its own governor internally
375
- fit_basic = NBumiFitBasicModelGPU(basic_norm_filename, stats_basic, chunk_size=chunk_size_basic)
376
- print("Phase [2/4]: COMPLETE")
377
-
378
- print("Phase [3/4]: Evaluating fits of both models on ORIGINAL data...")
379
- # [GOVERNOR INTEGRATION] Chunk size for check fit
380
- chunk_size_check = get_optimal_chunk_size(cleaned_filename, multiplier=5.0, is_dense=True)
381
-
382
- check_adjust = NBumiCheckFitFSGPU(cleaned_filename, fit_adjust, suppress_plot=True, chunk_size=chunk_size_check)
383
-
384
- fit_basic_for_eval = {
385
- 'sizes': fit_basic['sizes'],
386
- 'vals': stats,
387
- 'var_obs': fit_basic['var_obs']
388
- }
389
- check_basic = NBumiCheckFitFSGPU(cleaned_filename, fit_basic_for_eval, suppress_plot=True, chunk_size=chunk_size_check)
390
- print("Phase [3/4]: COMPLETE")
391
-
392
- print("Phase [4/4]: Generating final comparison...")
393
122
  nc_data = stats['nc']
394
123
  mean_expr = stats['tjs'] / nc_data
395
124
  observed_dropout = stats['djs'] / nc_data
@@ -432,72 +161,11 @@ def NBumiCompareModelsGPU(
432
161
  plt.show()
433
162
 
434
163
  plt.close()
435
- print("Phase [4/4]: COMPLETE")
436
-
437
- pipeline_end_time = time.time()
438
164
 
439
- # --- ADD THIS LINE TO FIX THE ERROR ---
440
- adata_meta.file.close() # Explicitly close the file handle
441
-
442
- os.remove(basic_norm_filename)
443
- print(f"STATUS: Temporary file '{basic_norm_filename}' removed.")
165
+ pipeline_end_time = time.time()
444
166
  print(f"Total time: {pipeline_end_time - pipeline_start_time:.2f} seconds.\n")
445
167
 
446
168
  return {
447
169
  "errors": {"Depth-Adjusted": err_adj, "Basic": err_bas},
448
170
  "comparison_df": comparison_df
449
171
  }
450
-
451
- def NBumiPlotDispVsMeanGPU(
452
- fit: dict,
453
- suppress_plot: bool = False,
454
- plot_filename: str = None
455
- ):
456
- """
457
- Generates a diagnostic plot of the dispersion vs. mean expression.
458
-
459
- Args:
460
- fit (dict): The 'fit' object from NBumiFitModelGPU.
461
- suppress_plot (bool): If True, the plot will not be displayed on screen.
462
- plot_filename (str, optional): Path to save the plot. If None, not saved.
463
- """
464
- print("FUNCTION: NBumiPlotDispVsMean()")
465
-
466
- # --- 1. Extract data and regression coefficients ---
467
- mean_expression = fit['vals']['tjs'].values / fit['vals']['nc']
468
- sizes = fit['sizes'].values
469
- coeffs = NBumiFitDispVsMeanGPU(fit, suppress_plot=True)
470
- intercept, slope = coeffs[0], coeffs[1]
471
-
472
- # --- 2. Calculate the fitted line for plotting ---
473
- # Create a smooth, continuous line using the regression coefficients
474
- log_mean_expr_range = np.linspace(
475
- np.log(mean_expression[mean_expression > 0].min()),
476
- np.log(mean_expression.max()),
477
- 100
478
- )
479
- log_fitted_sizes = intercept + slope * log_mean_expr_range
480
- fitted_sizes = np.exp(log_fitted_sizes)
481
-
482
- # --- 3. Create the plot ---
483
- plt.figure(figsize=(8, 6))
484
- plt.scatter(mean_expression, sizes, label='Observed Dispersion', alpha=0.5, s=8)
485
- plt.plot(np.exp(log_mean_expr_range), fitted_sizes, color='red', label='Regression Fit', linewidth=2)
486
-
487
- plt.xscale('log')
488
- plt.yscale('log')
489
- plt.xlabel('Mean Expression')
490
- plt.ylabel('Dispersion Parameter (Sizes)')
491
- plt.title('Dispersion vs. Mean Expression')
492
- plt.legend()
493
- plt.grid(True, which="both", linestyle='--', alpha=0.6)
494
-
495
- if plot_filename:
496
- plt.savefig(plot_filename, dpi=300, bbox_inches='tight')
497
- print(f"STATUS: Diagnostic plot saved to '{plot_filename}'")
498
-
499
- if not suppress_plot:
500
- plt.show()
501
-
502
- plt.close()
503
- print("FUNCTION: NBumiPlotDispVsMean() COMPLETE\n")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: M3Drop
3
- Version: 0.4.34
3
+ Version: 0.4.36
4
4
  Summary: A Python implementation of the M3Drop single-cell RNA-seq analysis tool.
5
5
  Home-page: https://github.com/PragalvhaSharma/m3DropNew
6
6
  Author: Tallulah Andrews
@@ -2,11 +2,11 @@ m3Drop/__init__.py,sha256=yaUXhUArnwgLf01Zlpqa5qm9K1aByGqQupIoCaLYiDw,2462
2
2
  m3Drop/coreCPU.py,sha256=3kPYlSVlYrJEhRUCIoVzmR8CYBaHpxVM5nx-3YQI4d4,17204
3
3
  m3Drop/coreGPU.py,sha256=k7A06VNgfJ59J8g1VpfKxhTIKrEbW7Bj8pTbQqHaQL8,24571
4
4
  m3Drop/diagnosticsCPU.py,sha256=BecOKTz2GDjzjs9ycXYsyrSHi2UVgsM58RBuNE62vmU,14273
5
- m3Drop/diagnosticsGPU.py,sha256=SMJKsCttbI5feOIQC7w7eCe3kpxw1RzHBsWy-OtQh2M,19996
5
+ m3Drop/diagnosticsGPU.py,sha256=jmTEN1IkxecPylAw_4zBjYrWj3MFfTGu-m9bowYsVBk,6797
6
6
  m3Drop/normalizationCPU.py,sha256=4ulCrDZZjxVFh2y0i4ayPkNCsZYaOP-Xq2Dnzu9WXtg,5697
7
7
  m3Drop/normalizationGPU.py,sha256=mHu_Or4ma6qzujGQQQ0oN3D-yoEngLAN4UTknkArRAY,8596
8
- m3drop-0.4.34.dist-info/licenses/LICENSE,sha256=44Iqpp8Fc10Xzd5T7cT9UhO31Qftk3gBiCjtpwilP_k,1074
9
- m3drop-0.4.34.dist-info/METADATA,sha256=gAefE3nHHVo4UzTL-0hbMiJTuOcFOxs75kQZ6-d7bJ8,5161
10
- m3drop-0.4.34.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
11
- m3drop-0.4.34.dist-info/top_level.txt,sha256=AEULFEFIgFtAwS-KBlIFoYXrqczX_rwqrEcdK46GIrA,7
12
- m3drop-0.4.34.dist-info/RECORD,,
8
+ m3drop-0.4.36.dist-info/licenses/LICENSE,sha256=44Iqpp8Fc10Xzd5T7cT9UhO31Qftk3gBiCjtpwilP_k,1074
9
+ m3drop-0.4.36.dist-info/METADATA,sha256=P6wHTiOQHkAGLsF8sUaW4Dws0hpsK13j6mrOtaczj5M,5161
10
+ m3drop-0.4.36.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
11
+ m3drop-0.4.36.dist-info/top_level.txt,sha256=AEULFEFIgFtAwS-KBlIFoYXrqczX_rwqrEcdK46GIrA,7
12
+ m3drop-0.4.36.dist-info/RECORD,,