M3Drop 0.4.52__tar.gz → 0.4.53__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: M3Drop
3
- Version: 0.4.52
3
+ Version: 0.4.53
4
4
  Summary: A Python implementation of the M3Drop single-cell RNA-seq analysis tool.
5
5
  Home-page: https://github.com/PragalvhaSharma/m3DropNew
6
6
  Author: Tallulah Andrews
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: M3Drop
3
- Version: 0.4.52
3
+ Version: 0.4.53
4
4
  Summary: A Python implementation of the M3Drop single-cell RNA-seq analysis tool.
5
5
  Home-page: https://github.com/PragalvhaSharma/m3DropNew
6
6
  Author: Tallulah Andrews
@@ -119,9 +119,7 @@ def NBumiPearsonResidualsCombinedGPU(
119
119
  print(f" > Visualization Sampling Rate: {sampling_rate*100:.4f}% (Target: {TARGET_SAMPLES:,} points)")
120
120
 
121
121
  # 2. Accumulators for Plot 1 (Variance) - EXACT MATH
122
- # We need Sum(x) and Sum(x^2) for: Raw, Approx, Full
123
122
  acc_raw_sum = cupy.zeros(ng_filtered, dtype=cupy.float64)
124
- # acc_raw_sq = cupy.zeros(ng_filtered, dtype=cupy.float64) # Not strictly needed for Mean X-axis, but good for completeness. Skipping to save VRAM.
125
123
 
126
124
  acc_approx_sum = cupy.zeros(ng_filtered, dtype=cupy.float64)
127
125
  acc_approx_sq = cupy.zeros(ng_filtered, dtype=cupy.float64)
@@ -154,7 +152,7 @@ def NBumiPearsonResidualsCombinedGPU(
154
152
 
155
153
  current_row = 0
156
154
  while current_row < nc:
157
- # [SAFE MODE] Multiplier 3.0 is safe for Index Sampling
155
+ # [SAFE MODE RESTORED] Multiplier 3.0 is efficient because we use IN-PLACE ops below.
158
156
  end_row = device.get_next_chunk(current_row, mode='dense', overhead_multiplier=3.0)
159
157
  if end_row is None or end_row <= current_row: break
160
158
 
@@ -178,24 +176,21 @@ def NBumiPearsonResidualsCombinedGPU(
178
176
  cupy.get_default_memory_pool().free_all_blocks()
179
177
 
180
178
  # --- VIZ ACCUMULATION 1: RAW MEAN ---
181
- # Add raw sums to accumulator (column-wise sum)
182
179
  acc_raw_sum += cupy.sum(counts_dense, axis=0)
183
180
 
184
181
  # --- VIZ SAMPLING: GENERATE INDICES ---
185
- # We pick indices NOW so we can grab the same points from both Approx and Full
186
182
  chunk_total_items = chunk_size * ng_filtered
187
183
  n_samples_chunk = int(chunk_total_items * sampling_rate)
188
184
 
189
185
  if n_samples_chunk > 0:
190
- # [CRITICAL FIX] Use randint (with replacement) instead of choice(replace=False).
191
- # 'choice' with replace=False tries to allocate a permutation of the ENTIRE chunk (3GB+).
192
- # 'randint' only allocates the indices we need (KB).
193
- # Given the huge population (300M+) and small sample (100k+), collisions are statistically negligible.
186
+ # [SAFE] Use randint (with replacement) to avoid VRAM spike
194
187
  sample_indices = cupy.random.randint(0, int(chunk_total_items), size=n_samples_chunk)
195
188
  else:
196
189
  sample_indices = None
197
190
 
198
- # --- CALC 1: APPROX ---
191
+ # ============================================
192
+ # CALC 1: APPROX (Optimize Order of Ops)
193
+ # ============================================
199
194
  approx_out = cupy.empty_like(counts_dense)
200
195
  pearson_approx_kernel(
201
196
  counts_dense,
@@ -205,40 +200,53 @@ def NBumiPearsonResidualsCombinedGPU(
205
200
  approx_out
206
201
  )
207
202
 
208
- # [VIZ UPDATE: APPROX]
203
+ # 1. Accumulate Sum (First Moment)
209
204
  acc_approx_sum += cupy.sum(approx_out, axis=0)
210
- acc_approx_sq += cupy.sum(approx_out**2, axis=0)
211
205
 
206
+ # 2. Sample (Before we destroy the data)
212
207
  if sample_indices is not None:
213
- # Flatten temporarily to sample, then return to CPU
214
- # Note: take() returns a new array, small size
215
208
  sampled_vals = approx_out.ravel().take(sample_indices)
216
209
  viz_approx_samples.append(cupy.asnumpy(sampled_vals))
217
210
 
218
- # [DISK WRITE: APPROX]
211
+ # 3. Write to Disk (Save the clean residuals)
219
212
  out_x_approx[current_row:end_row, :] = approx_out.get()
213
+
214
+ # 4. Square IN-PLACE (Destroying VRAM copy to create squares without allocation)
215
+ approx_out *= approx_out
216
+
217
+ # 5. Accumulate Sum of Squares (Second Moment)
218
+ acc_approx_sq += cupy.sum(approx_out, axis=0)
219
+
220
220
  del approx_out
221
221
 
222
- # --- CALC 2: FULL (In-place) ---
222
+ # ============================================
223
+ # CALC 2: FULL (Optimize Order of Ops)
224
+ # ============================================
223
225
  pearson_residual_kernel(
224
226
  counts_dense,
225
227
  tjs_gpu,
226
228
  tis_gpu[current_row:end_row][:, cupy.newaxis],
227
229
  sizes_gpu,
228
230
  total,
229
- counts_dense # Overwrite input
231
+ counts_dense # Overwrite input with Residuals
230
232
  )
231
233
 
232
- # [VIZ UPDATE: FULL]
234
+ # 1. Accumulate Sum
233
235
  acc_full_sum += cupy.sum(counts_dense, axis=0)
234
- acc_full_sq += cupy.sum(counts_dense**2, axis=0)
235
236
 
237
+ # 2. Sample
236
238
  if sample_indices is not None:
237
239
  sampled_vals = counts_dense.ravel().take(sample_indices)
238
240
  viz_full_samples.append(cupy.asnumpy(sampled_vals))
239
241
 
240
- # [DISK WRITE: FULL]
242
+ # 3. Write to Disk
241
243
  out_x_full[current_row:end_row, :] = counts_dense.get()
244
+
245
+ # 4. Square IN-PLACE
246
+ counts_dense *= counts_dense
247
+
248
+ # 5. Accumulate Sum of Squares
249
+ acc_full_sq += cupy.sum(counts_dense, axis=0)
242
250
 
243
251
  del counts_dense, sample_indices
244
252
  cupy.get_default_memory_pool().free_all_blocks()
@@ -253,10 +261,6 @@ def NBumiPearsonResidualsCombinedGPU(
253
261
  print("Phase [Viz]: Generating Diagnostics...")
254
262
 
255
263
  # 1. Finalize Variance Stats (GPU -> CPU)
256
- # Var = E[X^2] - (E[X])^2
257
- # Mean = Sum / N
258
-
259
- # Pull everything to CPU once
260
264
  raw_sum = cupy.asnumpy(acc_raw_sum)
261
265
 
262
266
  approx_sum = cupy.asnumpy(acc_approx_sum)
@@ -265,7 +269,7 @@ def NBumiPearsonResidualsCombinedGPU(
265
269
  full_sum = cupy.asnumpy(acc_full_sum)
266
270
  full_sq = cupy.asnumpy(acc_full_sq)
267
271
 
268
- # Calculate
272
+ # Calculate Variance: E[X^2] - (E[X])^2
269
273
  mean_raw = raw_sum / nc
270
274
 
271
275
  mean_approx = approx_sum / nc
@@ -288,7 +292,7 @@ def NBumiPearsonResidualsCombinedGPU(
288
292
 
289
293
  # --- FILE 1: SUMMARY (1080p) ---
290
294
  print(f" > Saving Summary Plot: {plot_summary_filename}")
291
- fig1, ax1 = plt.subplots(1, 2, figsize=(16, 7)) # 16x7 inches ~ 1080p aspect
295
+ fig1, ax1 = plt.subplots(1, 2, figsize=(16, 7))
292
296
 
293
297
  # Plot 1: Variance Stabilization
294
298
  ax = ax1[0]
@@ -309,7 +313,6 @@ def NBumiPearsonResidualsCombinedGPU(
309
313
  # Plot 3: Distribution
310
314
  ax = ax1[1]
311
315
  if len(flat_approx) > 100:
312
- # Clip for cleaner KDE
313
316
  mask_kde = (flat_approx > -10) & (flat_approx < 10)
314
317
  sns.kdeplot(flat_approx[mask_kde], fill=True, color='red', alpha=0.3, label='Approx', ax=ax, warn_singular=False)
315
318
  sns.kdeplot(flat_full[mask_kde], fill=True, color='blue', alpha=0.3, label='Full', ax=ax, warn_singular=False)
@@ -323,17 +326,15 @@ def NBumiPearsonResidualsCombinedGPU(
323
326
  bbox=dict(facecolor='#f0f0f0', edgecolor='black', alpha=0.7))
324
327
 
325
328
  plt.tight_layout()
326
- plt.savefig(plot_summary_filename, dpi=120) # 120 DPI * 16 inch = 1920 width
329
+ plt.savefig(plot_summary_filename, dpi=120)
327
330
  plt.close()
328
331
 
329
332
  # --- FILE 2: DETAIL (4K) ---
330
333
  print(f" > Saving Detail Plot: {plot_detail_filename}")
331
- fig2, ax2 = plt.subplots(figsize=(20, 11)) # 20x11 inches ~ 4K aspect
334
+ fig2, ax2 = plt.subplots(figsize=(20, 11))
332
335
 
333
336
  if len(flat_approx) > 0:
334
337
  ax2.scatter(flat_approx, flat_full, s=1, alpha=0.5, color='purple')
335
-
336
- # Diagonal line
337
338
  lims = [
338
339
  np.min([ax2.get_xlim(), ax2.get_ylim()]),
339
340
  np.max([ax2.get_xlim(), ax2.get_ylim()]),
@@ -349,9 +350,8 @@ def NBumiPearsonResidualsCombinedGPU(
349
350
  bbox=dict(facecolor='#f0f0f0', edgecolor='black', alpha=0.7))
350
351
 
351
352
  plt.tight_layout()
352
- plt.savefig(plot_detail_filename, dpi=200) # 200 DPI * 20 inch = 4000 width (4Kish)
353
+ plt.savefig(plot_detail_filename, dpi=200)
353
354
  plt.close()
354
355
 
355
-
356
356
  if hasattr(adata_in, "file") and adata_in.file is not None: adata_in.file.close()
357
357
  print(f"Total time: {time.perf_counter() - start_time:.2f} seconds.\n")
@@ -5,7 +5,7 @@ with open("README.md", "r", encoding="utf-8") as fh:
5
5
 
6
6
  setuptools.setup(
7
7
  name="M3Drop", # Name for pip (pip install M3Drop)
8
- version="0.4.52",
8
+ version="0.4.53",
9
9
  author="Tallulah Andrews",
10
10
  author_email="tandrew6@uwo.ca",
11
11
  description="A Python implementation of the M3Drop single-cell RNA-seq analysis tool.",
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes