M3Drop 0.4.48__tar.gz → 0.4.50__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: M3Drop
3
- Version: 0.4.48
3
+ Version: 0.4.50
4
4
  Summary: A Python implementation of the M3Drop single-cell RNA-seq analysis tool.
5
5
  Home-page: https://github.com/PragalvhaSharma/m3DropNew
6
6
  Author: Tallulah Andrews
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: M3Drop
3
- Version: 0.4.48
3
+ Version: 0.4.50
4
4
  Summary: A Python implementation of the M3Drop single-cell RNA-seq analysis tool.
5
5
  Home-page: https://github.com/PragalvhaSharma/m3DropNew
6
6
  Author: Tallulah Andrews
@@ -122,9 +122,17 @@ def NBumiPearsonResidualsCombinedCPU(
122
122
  adata_out_approx = anndata.AnnData(obs=adata_in.obs, var=filtered_var)
123
123
  adata_out_approx.write_h5ad(output_filename_approx, compression=None)
124
124
 
125
+ # --- CHUNK SIZE FIX ---
125
126
  # Calculate appropriate H5 storage chunks
126
127
  storage_chunk_rows = int(1_000_000_000 / (ng_filtered * 8))
127
- if storage_chunk_rows < 1: storage_chunk_rows = 1
128
+
129
+ # [CRITICAL FIX] Clamp chunk size to total rows (nc)
130
+ if storage_chunk_rows > nc:
131
+ storage_chunk_rows = nc
132
+
133
+ if storage_chunk_rows < 1:
134
+ storage_chunk_rows = 1
135
+ # ----------------------
128
136
 
129
137
  # Open both files for writing simultaneously
130
138
  with h5py.File(output_filename_full, 'a') as f_full, h5py.File(output_filename_approx, 'a') as f_approx:
@@ -0,0 +1,355 @@
1
+ import pickle
2
+ import time
3
+ import sys
4
+ import numpy as np
5
+ import h5py
6
+ import anndata
7
+ import pandas as pd
8
+ import os
9
+ import matplotlib.pyplot as plt
10
+ import seaborn as sns
11
+
12
+ try:
13
+ import cupy
14
+ from cupy.sparse import csr_matrix as cp_csr_matrix
15
+ import cupyx
16
+ HAS_GPU = True
17
+ except ImportError:
18
+ cupy = None
19
+ HAS_GPU = False
20
+
21
+ # Package-compatible import
22
+ try:
23
+ from .ControlDeviceGPU import ControlDevice
24
+ except ImportError:
25
+ try:
26
+ from ControlDeviceGPU import ControlDevice
27
+ except ImportError:
28
+ print("CRITICAL ERROR: 'ControlDeviceGPU.py' not found.")
29
+ sys.exit(1)
30
+
31
+ # ==========================================
32
+ # KERNELS
33
+ # ==========================================
34
+
35
+ pearson_residual_kernel = cupy.ElementwiseKernel(
36
+ 'float64 count, float64 tj, float64 ti, float64 theta, float64 total', 'float64 out',
37
+ '''
38
+ double mu = (tj * ti) / total;
39
+ double denom_sq = mu + ( (mu * mu) / theta );
40
+ double denom = sqrt(denom_sq);
41
+ if (denom < 1e-12) { out = (count == 0.0) ? 0.0 : 0.0; } else { out = (count - mu) / denom; }
42
+ ''',
43
+ 'pearson_residual_kernel'
44
+ )
45
+
46
+ pearson_approx_kernel = cupy.ElementwiseKernel(
47
+ 'float64 count, float64 tj, float64 ti, float64 total', 'float64 out',
48
+ '''
49
+ double mu = (tj * ti) / total;
50
+ double denom = sqrt(mu);
51
+ if (denom < 1e-12) { out = 0.0; } else { out = (count - mu) / denom; }
52
+ ''',
53
+ 'pearson_approx_kernel'
54
+ )
55
+
56
+ def NBumiPearsonResidualsCombinedGPU(
57
+ raw_filename: str,
58
+ mask_filename: str,
59
+ fit_filename: str,
60
+ stats_filename: str,
61
+ output_filename_full: str,
62
+ output_filename_approx: str,
63
+ plot_summary_filename: str = None,
64
+ plot_detail_filename: str = None,
65
+ mode: str = "auto",
66
+ manual_target: int = 3000
67
+ ):
68
+ """
69
+ Calculates Full and Approximate residuals in a SINGLE PASS.
70
+ Includes "Sidecar" Visualization logic (Streaming Stats + Subsampling).
71
+ """
72
+ start_time = time.perf_counter()
73
+ print(f"FUNCTION: NBumiPearsonResidualsCombined() | FILE: {raw_filename}")
74
+
75
+ # 1. Load Mask
76
+ with open(mask_filename, 'rb') as f: mask_cpu = pickle.load(f)
77
+ mask_gpu = cupy.asarray(mask_cpu)
78
+ ng_filtered = int(cupy.sum(mask_gpu))
79
+
80
+ # 2. Manual Init
81
+ with h5py.File(raw_filename, 'r') as f:
82
+ indptr_cpu = f['X']['indptr'][:]
83
+ total_rows = len(indptr_cpu) - 1
84
+
85
+ device = ControlDevice(indptr=indptr_cpu, total_rows=total_rows, n_genes=ng_filtered, mode=mode, manual_target=manual_target)
86
+ nc = device.total_rows
87
+
88
+ print("Phase [1/2]: Initializing parameters...")
89
+ # Load parameters
90
+ with open(fit_filename, 'rb') as f: fit = pickle.load(f)
91
+
92
+ # Common params
93
+ total = fit['vals']['total']
94
+ tjs_gpu = cupy.asarray(fit['vals']['tjs'].values, dtype=cupy.float64)
95
+ tis_gpu = cupy.asarray(fit['vals']['tis'].values, dtype=cupy.float64)
96
+ sizes_gpu = cupy.asarray(fit['sizes'].values, dtype=cupy.float64)
97
+
98
+ # Setup Output Files
99
+ adata_in = anndata.read_h5ad(raw_filename, backed='r')
100
+ filtered_var = adata_in.var[mask_cpu]
101
+
102
+ # Create skeletons
103
+ adata_out_full = anndata.AnnData(obs=adata_in.obs, var=filtered_var)
104
+ adata_out_full.write_h5ad(output_filename_full, compression=None)
105
+
106
+ adata_out_approx = anndata.AnnData(obs=adata_in.obs, var=filtered_var)
107
+ adata_out_approx.write_h5ad(output_filename_approx, compression=None)
108
+
109
+ # --- VISUALIZATION SETUP (THE SIDECAR) ---
110
+ # 1. Sampling Rate (Target 5 Million Max)
111
+ TARGET_SAMPLES = 5_000_000
112
+ total_points = nc * ng_filtered
113
+
114
+ if total_points <= TARGET_SAMPLES:
115
+ sampling_rate = 1.0 # Take everything
116
+ else:
117
+ sampling_rate = TARGET_SAMPLES / total_points
118
+
119
+ print(f" > Visualization Sampling Rate: {sampling_rate*100:.4f}% (Target: {TARGET_SAMPLES:,} points)")
120
+
121
+ # 2. Accumulators for Plot 1 (Variance) - EXACT MATH
122
+ # We need Sum(x) and Sum(x^2) for: Raw, Approx, Full
123
+ acc_raw_sum = cupy.zeros(ng_filtered, dtype=cupy.float64)
124
+ # acc_raw_sq = cupy.zeros(ng_filtered, dtype=cupy.float64) # Not strictly needed for Mean X-axis, but good for completeness. Skipping to save VRAM.
125
+
126
+ acc_approx_sum = cupy.zeros(ng_filtered, dtype=cupy.float64)
127
+ acc_approx_sq = cupy.zeros(ng_filtered, dtype=cupy.float64)
128
+
129
+ acc_full_sum = cupy.zeros(ng_filtered, dtype=cupy.float64)
130
+ acc_full_sq = cupy.zeros(ng_filtered, dtype=cupy.float64)
131
+
132
+ # 3. Lists for Plots 2 & 3 (Scatter/KDE) - SAMPLED
133
+ viz_approx_samples = []
134
+ viz_full_samples = []
135
+ # -----------------------------------------
136
+
137
+ # Storage Chunk Calc
138
+ storage_chunk_rows = int(1_000_000_000 / (ng_filtered * 8))
139
+ if storage_chunk_rows > nc: storage_chunk_rows = nc
140
+ if storage_chunk_rows < 1: storage_chunk_rows = 1
141
+
142
+ # Open files
143
+ with h5py.File(output_filename_full, 'a') as f_full, h5py.File(output_filename_approx, 'a') as f_approx:
144
+ if 'X' in f_full: del f_full['X']
145
+ if 'X' in f_approx: del f_approx['X']
146
+
147
+ out_x_full = f_full.create_dataset('X', shape=(nc, ng_filtered), chunks=(storage_chunk_rows, ng_filtered), dtype='float64')
148
+ out_x_approx = f_approx.create_dataset('X', shape=(nc, ng_filtered), chunks=(storage_chunk_rows, ng_filtered), dtype='float64')
149
+
150
+ with h5py.File(raw_filename, 'r') as f_in:
151
+ h5_indptr = f_in['X']['indptr']
152
+ h5_data = f_in['X']['data']
153
+ h5_indices = f_in['X']['indices']
154
+
155
+ current_row = 0
156
+ while current_row < nc:
157
+ # [SAFE MODE] Multiplier 3.0 is safe for Index Sampling
158
+ end_row = device.get_next_chunk(current_row, mode='dense', overhead_multiplier=3.0)
159
+ if end_row is None or end_row <= current_row: break
160
+
161
+ chunk_size = end_row - current_row
162
+ print(f"Phase [2/2]: Processing rows {end_row} of {nc} | Chunk: {chunk_size}", end='\r')
163
+
164
+ start_idx, end_idx = h5_indptr[current_row], h5_indptr[end_row]
165
+
166
+ # Load Raw
167
+ data_gpu_raw = cupy.asarray(h5_data[start_idx:end_idx], dtype=cupy.float64)
168
+ indices_gpu_raw = cupy.asarray(h5_indices[start_idx:end_idx])
169
+ indptr_gpu_raw = cupy.asarray(h5_indptr[current_row:end_row+1] - h5_indptr[current_row])
170
+
171
+ chunk_gpu = cp_csr_matrix((data_gpu_raw, indices_gpu_raw, indptr_gpu_raw), shape=(chunk_size, len(mask_cpu)))
172
+ chunk_gpu = chunk_gpu[:, mask_gpu]
173
+ chunk_gpu.data = cupy.ceil(chunk_gpu.data)
174
+
175
+ # Dense Conversion
176
+ counts_dense = chunk_gpu.todense()
177
+ del chunk_gpu, data_gpu_raw, indices_gpu_raw, indptr_gpu_raw
178
+ cupy.get_default_memory_pool().free_all_blocks()
179
+
180
+ # --- VIZ ACCUMULATION 1: RAW MEAN ---
181
+ # Add raw sums to accumulator (column-wise sum)
182
+ acc_raw_sum += cupy.sum(counts_dense, axis=0)
183
+
184
+ # --- VIZ SAMPLING: GENERATE INDICES ---
185
+ # We pick indices NOW so we can grab the same points from both Approx and Full
186
+ chunk_total_items = chunk_size * ng_filtered
187
+ n_samples_chunk = int(chunk_total_items * sampling_rate)
188
+
189
+ if n_samples_chunk > 0:
190
+ # Index Sampling: Zero VRAM overhead compared to Masking
191
+ # Use flatten indices
192
+ sample_indices = cupy.random.choice(chunk_total_items, size=n_samples_chunk, replace=False)
193
+ else:
194
+ sample_indices = None
195
+
196
+ # --- CALC 1: APPROX ---
197
+ approx_out = cupy.empty_like(counts_dense)
198
+ pearson_approx_kernel(
199
+ counts_dense,
200
+ tjs_gpu,
201
+ tis_gpu[current_row:end_row][:, cupy.newaxis],
202
+ total,
203
+ approx_out
204
+ )
205
+
206
+ # [VIZ UPDATE: APPROX]
207
+ acc_approx_sum += cupy.sum(approx_out, axis=0)
208
+ acc_approx_sq += cupy.sum(approx_out**2, axis=0)
209
+
210
+ if sample_indices is not None:
211
+ # Flatten temporarily to sample, then return to CPU
212
+ # Note: take() returns a new array, small size
213
+ sampled_vals = approx_out.ravel().take(sample_indices)
214
+ viz_approx_samples.append(cupy.asnumpy(sampled_vals))
215
+
216
+ # [DISK WRITE: APPROX]
217
+ out_x_approx[current_row:end_row, :] = approx_out.get()
218
+ del approx_out
219
+
220
+ # --- CALC 2: FULL (In-place) ---
221
+ pearson_residual_kernel(
222
+ counts_dense,
223
+ tjs_gpu,
224
+ tis_gpu[current_row:end_row][:, cupy.newaxis],
225
+ sizes_gpu,
226
+ total,
227
+ counts_dense # Overwrite input
228
+ )
229
+
230
+ # [VIZ UPDATE: FULL]
231
+ acc_full_sum += cupy.sum(counts_dense, axis=0)
232
+ acc_full_sq += cupy.sum(counts_dense**2, axis=0)
233
+
234
+ if sample_indices is not None:
235
+ sampled_vals = counts_dense.ravel().take(sample_indices)
236
+ viz_full_samples.append(cupy.asnumpy(sampled_vals))
237
+
238
+ # [DISK WRITE: FULL]
239
+ out_x_full[current_row:end_row, :] = counts_dense.get()
240
+
241
+ del counts_dense, sample_indices
242
+ cupy.get_default_memory_pool().free_all_blocks()
243
+ current_row = end_row
244
+
245
+ print(f"\nPhase [2/2]: COMPLETE{' '*50}")
246
+
247
+ # ==========================================
248
+ # VIZ GENERATION (POST-PROCESS)
249
+ # ==========================================
250
+ if plot_summary_filename and plot_detail_filename:
251
+ print("Phase [Viz]: Generating Diagnostics...")
252
+
253
+ # 1. Finalize Variance Stats (GPU -> CPU)
254
+ # Var = E[X^2] - (E[X])^2
255
+ # Mean = Sum / N
256
+
257
+ # Pull everything to CPU once
258
+ raw_sum = cupy.asnumpy(acc_raw_sum)
259
+
260
+ approx_sum = cupy.asnumpy(acc_approx_sum)
261
+ approx_sq = cupy.asnumpy(acc_approx_sq)
262
+
263
+ full_sum = cupy.asnumpy(acc_full_sum)
264
+ full_sq = cupy.asnumpy(acc_full_sq)
265
+
266
+ # Calculate
267
+ mean_raw = raw_sum / nc
268
+
269
+ mean_approx = approx_sum / nc
270
+ mean_sq_approx = approx_sq / nc
271
+ var_approx = mean_sq_approx - (mean_approx**2)
272
+
273
+ mean_full = full_sum / nc
274
+ mean_sq_full = full_sq / nc
275
+ var_full = mean_sq_full - (mean_full**2)
276
+
277
+ # 2. Finalize Samples
278
+ if viz_approx_samples:
279
+ flat_approx = np.concatenate(viz_approx_samples)
280
+ flat_full = np.concatenate(viz_full_samples)
281
+ else:
282
+ flat_approx = np.array([])
283
+ flat_full = np.array([])
284
+
285
+ print(f" > Samples Collected: {len(flat_approx):,} points")
286
+
287
+ # --- FILE 1: SUMMARY (1080p) ---
288
+ print(f" > Saving Summary Plot: {plot_summary_filename}")
289
+ fig1, ax1 = plt.subplots(1, 2, figsize=(16, 7)) # 16x7 inches ~ 1080p aspect
290
+
291
+ # Plot 1: Variance Stabilization
292
+ ax = ax1[0]
293
+ ax.scatter(mean_raw, var_approx, s=2, alpha=0.5, color='red', label='Approx (Poisson)')
294
+ ax.scatter(mean_raw, var_full, s=2, alpha=0.5, color='blue', label='Full (NB Pearson)')
295
+ ax.axhline(1.0, color='black', linestyle='--', linewidth=1)
296
+ ax.set_xscale('log')
297
+ ax.set_yscale('log')
298
+ ax.set_title("Variance Stabilization Check")
299
+ ax.set_xlabel("Mean Raw Expression (log)")
300
+ ax.set_ylabel("Variance of Residuals (log)")
301
+ ax.legend()
302
+ ax.grid(True, alpha=0.3)
303
+ ax.text(0.5, -0.15, "Goal: Blue dots should form a flat line at y=1",
304
+ transform=ax.transAxes, ha='center', fontsize=9,
305
+ bbox=dict(facecolor='#f0f0f0', edgecolor='black', alpha=0.7))
306
+
307
+ # Plot 3: Distribution
308
+ ax = ax1[1]
309
+ if len(flat_approx) > 100:
310
+ # Clip for cleaner KDE
311
+ mask_kde = (flat_approx > -10) & (flat_approx < 10)
312
+ sns.kdeplot(flat_approx[mask_kde], fill=True, color='red', alpha=0.3, label='Approx', ax=ax, warn_singular=False)
313
+ sns.kdeplot(flat_full[mask_kde], fill=True, color='blue', alpha=0.3, label='Full', ax=ax, warn_singular=False)
314
+ ax.set_xlim(-5, 5)
315
+ ax.set_title("Distribution of Residuals")
316
+ ax.set_xlabel("Residual Value")
317
+ ax.legend()
318
+ ax.grid(True, alpha=0.3)
319
+ ax.text(0.5, -0.15, "Goal: Blue curve should be tighter (narrower) than Red",
320
+ transform=ax.transAxes, ha='center', fontsize=9,
321
+ bbox=dict(facecolor='#f0f0f0', edgecolor='black', alpha=0.7))
322
+
323
+ plt.tight_layout()
324
+ plt.savefig(plot_summary_filename, dpi=120) # 120 DPI * 16 inch = 1920 width
325
+ plt.close()
326
+
327
+ # --- FILE 2: DETAIL (4K) ---
328
+ print(f" > Saving Detail Plot: {plot_detail_filename}")
329
+ fig2, ax2 = plt.subplots(figsize=(20, 11)) # 20x11 inches ~ 4K aspect
330
+
331
+ if len(flat_approx) > 0:
332
+ ax2.scatter(flat_approx, flat_full, s=1, alpha=0.5, color='purple')
333
+
334
+ # Diagonal line
335
+ lims = [
336
+ np.min([ax2.get_xlim(), ax2.get_ylim()]),
337
+ np.max([ax2.get_xlim(), ax2.get_ylim()]),
338
+ ]
339
+ ax2.plot(lims, lims, 'k-', alpha=0.75, zorder=0)
340
+
341
+ ax2.set_title("Residual Shrinkage (Sampled)")
342
+ ax2.set_xlabel("Approx Residuals")
343
+ ax2.set_ylabel("Full Residuals")
344
+ ax2.grid(True, alpha=0.3)
345
+ ax2.text(0.5, -0.1, "Goal: Points below diagonal = Dispersion Penalty Working",
346
+ transform=ax2.transAxes, ha='center', fontsize=12,
347
+ bbox=dict(facecolor='#f0f0f0', edgecolor='black', alpha=0.7))
348
+
349
+ plt.tight_layout()
350
+ plt.savefig(plot_detail_filename, dpi=200) # 200 DPI * 20 inch = 4000 width (4Kish)
351
+ plt.close()
352
+
353
+
354
+ if hasattr(adata_in, "file") and adata_in.file is not None: adata_in.file.close()
355
+ print(f"Total time: {time.perf_counter() - start_time:.2f} seconds.\n")
@@ -5,7 +5,7 @@ with open("README.md", "r", encoding="utf-8") as fh:
5
5
 
6
6
  setuptools.setup(
7
7
  name="M3Drop", # Name for pip (pip install M3Drop)
8
- version="0.4.48",
8
+ version="0.4.50",
9
9
  author="Tallulah Andrews",
10
10
  author_email="tandrew6@uwo.ca",
11
11
  description="A Python implementation of the M3Drop single-cell RNA-seq analysis tool.",
@@ -1,178 +0,0 @@
1
- import pickle
2
- import time
3
- import sys
4
- import numpy as np
5
- import h5py
6
- import anndata
7
- import pandas as pd
8
- import os
9
-
10
- try:
11
- import cupy
12
- from cupy.sparse import csr_matrix as cp_csr_matrix
13
- HAS_GPU = True
14
- except ImportError:
15
- cupy = None
16
- HAS_GPU = False
17
-
18
- # Package-compatible import
19
- try:
20
- from .ControlDeviceGPU import ControlDevice
21
- except ImportError:
22
- # Fallback for direct script execution (debugging)
23
- try:
24
- from ControlDeviceGPU import ControlDevice
25
- except ImportError:
26
- print("CRITICAL ERROR: 'ControlDeviceGPU.py' not found.")
27
- sys.exit(1)
28
-
29
- # ==========================================
30
- # KERNELS
31
- # ==========================================
32
-
33
- pearson_residual_kernel = cupy.ElementwiseKernel(
34
- 'float64 count, float64 tj, float64 ti, float64 theta, float64 total', 'float64 out',
35
- '''
36
- double mu = (tj * ti) / total;
37
- double denom_sq = mu + ( (mu * mu) / theta );
38
- double denom = sqrt(denom_sq);
39
- if (denom < 1e-12) { out = (count == 0.0) ? 0.0 : 0.0; } else { out = (count - mu) / denom; }
40
- ''',
41
- 'pearson_residual_kernel'
42
- )
43
-
44
- pearson_approx_kernel = cupy.ElementwiseKernel(
45
- 'float64 count, float64 tj, float64 ti, float64 total', 'float64 out',
46
- '''
47
- double mu = (tj * ti) / total;
48
- double denom = sqrt(mu);
49
- if (denom < 1e-12) { out = 0.0; } else { out = (count - mu) / denom; }
50
- ''',
51
- 'pearson_approx_kernel'
52
- )
53
-
54
- def NBumiPearsonResidualsCombinedGPU(
55
- raw_filename: str,
56
- mask_filename: str,
57
- fit_filename: str,
58
- stats_filename: str,
59
- output_filename_full: str,
60
- output_filename_approx: str,
61
- mode: str = "auto",
62
- manual_target: int = 3000
63
- ):
64
- """
65
- UPGRADED: Calculates Full and Approximate residuals in a SINGLE PASS.
66
- """
67
- start_time = time.perf_counter()
68
- print(f"FUNCTION: NBumiPearsonResidualsCombined() | FILE: {raw_filename}")
69
-
70
- # 1. Load Mask
71
- with open(mask_filename, 'rb') as f: mask_cpu = pickle.load(f)
72
- mask_gpu = cupy.asarray(mask_cpu)
73
- ng_filtered = int(cupy.sum(mask_gpu))
74
-
75
- # 2. Manual Init
76
- with h5py.File(raw_filename, 'r') as f: indptr_cpu = f['X']['indptr'][:]; total_rows = len(indptr_cpu) - 1
77
- device = ControlDevice(indptr=indptr_cpu, total_rows=total_rows, n_genes=ng_filtered, mode=mode, manual_target=manual_target)
78
- nc = device.total_rows
79
-
80
- print("Phase [1/2]: Initializing parameters...")
81
- # Load parameters for both calculations
82
- with open(fit_filename, 'rb') as f: fit = pickle.load(f)
83
- with open(stats_filename, 'rb') as f: stats = pickle.load(f)
84
-
85
- # Common params
86
- total = fit['vals']['total']
87
- tjs_gpu = cupy.asarray(fit['vals']['tjs'].values, dtype=cupy.float64)
88
- tis_gpu = cupy.asarray(fit['vals']['tis'].values, dtype=cupy.float64)
89
-
90
- # Specific params
91
- sizes_gpu = cupy.asarray(fit['sizes'].values, dtype=cupy.float64) # For Full
92
-
93
- # Setup Output Files
94
- adata_in = anndata.read_h5ad(raw_filename, backed='r')
95
- filtered_var = adata_in.var[mask_cpu]
96
-
97
- # Create skeletons
98
- adata_out_full = anndata.AnnData(obs=adata_in.obs, var=filtered_var)
99
- adata_out_full.write_h5ad(output_filename_full, compression=None)
100
-
101
- adata_out_approx = anndata.AnnData(obs=adata_in.obs, var=filtered_var)
102
- adata_out_approx.write_h5ad(output_filename_approx, compression=None)
103
-
104
- storage_chunk_rows = int(1_000_000_000 / (ng_filtered * 8))
105
- if storage_chunk_rows < 1: storage_chunk_rows = 1
106
-
107
- # Open both files for writing simultaneously
108
- with h5py.File(output_filename_full, 'a') as f_full, h5py.File(output_filename_approx, 'a') as f_approx:
109
- if 'X' in f_full: del f_full['X']
110
- if 'X' in f_approx: del f_approx['X']
111
-
112
- out_x_full = f_full.create_dataset(
113
- 'X', shape=(nc, ng_filtered), chunks=(storage_chunk_rows, ng_filtered), dtype='float64'
114
- )
115
- out_x_approx = f_approx.create_dataset(
116
- 'X', shape=(nc, ng_filtered), chunks=(storage_chunk_rows, ng_filtered), dtype='float64'
117
- )
118
-
119
- with h5py.File(raw_filename, 'r') as f_in:
120
- h5_indptr = f_in['X']['indptr']
121
- h5_data = f_in['X']['data']
122
- h5_indices = f_in['X']['indices']
123
-
124
- current_row = 0
125
- while current_row < nc:
126
- end_row = device.get_next_chunk(current_row, mode='dense', overhead_multiplier=3.0) # Higher overhead for double write
127
- if end_row is None or end_row <= current_row: break
128
-
129
- chunk_size = end_row - current_row
130
- print(f"Phase [2/2]: Processing rows {end_row} of {nc} | Chunk: {chunk_size}", end='\r')
131
-
132
- start_idx, end_idx = h5_indptr[current_row], h5_indptr[end_row]
133
-
134
- # Load & Filter
135
- data_gpu_raw = cupy.asarray(h5_data[start_idx:end_idx], dtype=cupy.float64)
136
- indices_gpu_raw = cupy.asarray(h5_indices[start_idx:end_idx])
137
- indptr_gpu_raw = cupy.asarray(h5_indptr[current_row:end_row+1] - h5_indptr[current_row])
138
-
139
- chunk_gpu = cp_csr_matrix((data_gpu_raw, indices_gpu_raw, indptr_gpu_raw), shape=(chunk_size, len(mask_cpu)))
140
- chunk_gpu = chunk_gpu[:, mask_gpu]
141
- chunk_gpu.data = cupy.ceil(chunk_gpu.data)
142
-
143
- # Dense Conversion
144
- counts_dense = chunk_gpu.todense()
145
- del chunk_gpu, data_gpu_raw, indices_gpu_raw, indptr_gpu_raw
146
- cupy.get_default_memory_pool().free_all_blocks()
147
-
148
- # --- CALC 1: APPROX (Cheaper, do first) ---
149
- approx_out = cupy.empty_like(counts_dense)
150
- pearson_approx_kernel(
151
- counts_dense,
152
- tjs_gpu,
153
- tis_gpu[current_row:end_row][:, cupy.newaxis],
154
- total,
155
- approx_out
156
- )
157
- out_x_approx[current_row:end_row, :] = approx_out.get()
158
- del approx_out
159
-
160
- # --- CALC 2: FULL (In-place on counts_dense to save VRAM) ---
161
- pearson_residual_kernel(
162
- counts_dense,
163
- tjs_gpu,
164
- tis_gpu[current_row:end_row][:, cupy.newaxis],
165
- sizes_gpu,
166
- total,
167
- counts_dense # Overwrite input
168
- )
169
- out_x_full[current_row:end_row, :] = counts_dense.get()
170
-
171
- del counts_dense
172
- cupy.get_default_memory_pool().free_all_blocks()
173
- current_row = end_row
174
-
175
- print(f"\nPhase [2/2]: COMPLETE{' '*50}")
176
-
177
- if hasattr(adata_in, "file") and adata_in.file is not None: adata_in.file.close()
178
- print(f"Total time: {time.perf_counter() - start_time:.2f} seconds.\n")
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes