M3Drop 0.4.51__tar.gz → 0.4.53__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: M3Drop
3
- Version: 0.4.51
3
+ Version: 0.4.53
4
4
  Summary: A Python implementation of the M3Drop single-cell RNA-seq analysis tool.
5
5
  Home-page: https://github.com/PragalvhaSharma/m3DropNew
6
6
  Author: Tallulah Andrews
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: M3Drop
3
- Version: 0.4.51
3
+ Version: 0.4.53
4
4
  Summary: A Python implementation of the M3Drop single-cell RNA-seq analysis tool.
5
5
  Home-page: https://github.com/PragalvhaSharma/m3DropNew
6
6
  Author: Tallulah Andrews
@@ -119,9 +119,7 @@ def NBumiPearsonResidualsCombinedGPU(
119
119
  print(f" > Visualization Sampling Rate: {sampling_rate*100:.4f}% (Target: {TARGET_SAMPLES:,} points)")
120
120
 
121
121
  # 2. Accumulators for Plot 1 (Variance) - EXACT MATH
122
- # We need Sum(x) and Sum(x^2) for: Raw, Approx, Full
123
122
  acc_raw_sum = cupy.zeros(ng_filtered, dtype=cupy.float64)
124
- # acc_raw_sq = cupy.zeros(ng_filtered, dtype=cupy.float64) # Not strictly needed for Mean X-axis, but good for completeness. Skipping to save VRAM.
125
123
 
126
124
  acc_approx_sum = cupy.zeros(ng_filtered, dtype=cupy.float64)
127
125
  acc_approx_sq = cupy.zeros(ng_filtered, dtype=cupy.float64)
@@ -154,7 +152,7 @@ def NBumiPearsonResidualsCombinedGPU(
154
152
 
155
153
  current_row = 0
156
154
  while current_row < nc:
157
- # [SAFE MODE] Multiplier 3.0 is safe for Index Sampling
155
+ # [SAFE MODE RESTORED] Multiplier 3.0 is efficient because we use IN-PLACE ops below.
158
156
  end_row = device.get_next_chunk(current_row, mode='dense', overhead_multiplier=3.0)
159
157
  if end_row is None or end_row <= current_row: break
160
158
 
@@ -178,23 +176,21 @@ def NBumiPearsonResidualsCombinedGPU(
178
176
  cupy.get_default_memory_pool().free_all_blocks()
179
177
 
180
178
  # --- VIZ ACCUMULATION 1: RAW MEAN ---
181
- # Add raw sums to accumulator (column-wise sum)
182
179
  acc_raw_sum += cupy.sum(counts_dense, axis=0)
183
180
 
184
181
  # --- VIZ SAMPLING: GENERATE INDICES ---
185
- # We pick indices NOW so we can grab the same points from both Approx and Full
186
182
  chunk_total_items = chunk_size * ng_filtered
187
183
  n_samples_chunk = int(chunk_total_items * sampling_rate)
188
184
 
189
185
  if n_samples_chunk > 0:
190
- # Index Sampling: Zero VRAM overhead compared to Masking
191
- # Use flatten indices
192
- # [FIXED LINE BELOW] Added int() cast for safety
193
- sample_indices = cupy.random.choice(int(chunk_total_items), size=n_samples_chunk, replace=False)
186
+ # [SAFE] Use randint (with replacement) to avoid VRAM spike
187
+ sample_indices = cupy.random.randint(0, int(chunk_total_items), size=n_samples_chunk)
194
188
  else:
195
189
  sample_indices = None
196
190
 
197
- # --- CALC 1: APPROX ---
191
+ # ============================================
192
+ # CALC 1: APPROX (Optimize Order of Ops)
193
+ # ============================================
198
194
  approx_out = cupy.empty_like(counts_dense)
199
195
  pearson_approx_kernel(
200
196
  counts_dense,
@@ -204,40 +200,53 @@ def NBumiPearsonResidualsCombinedGPU(
204
200
  approx_out
205
201
  )
206
202
 
207
- # [VIZ UPDATE: APPROX]
203
+ # 1. Accumulate Sum (First Moment)
208
204
  acc_approx_sum += cupy.sum(approx_out, axis=0)
209
- acc_approx_sq += cupy.sum(approx_out**2, axis=0)
210
205
 
206
+ # 2. Sample (Before we destroy the data)
211
207
  if sample_indices is not None:
212
- # Flatten temporarily to sample, then return to CPU
213
- # Note: take() returns a new array, small size
214
208
  sampled_vals = approx_out.ravel().take(sample_indices)
215
209
  viz_approx_samples.append(cupy.asnumpy(sampled_vals))
216
210
 
217
- # [DISK WRITE: APPROX]
211
+ # 3. Write to Disk (Save the clean residuals)
218
212
  out_x_approx[current_row:end_row, :] = approx_out.get()
213
+
214
+ # 4. Square IN-PLACE (Destroying VRAM copy to create squares without allocation)
215
+ approx_out *= approx_out
216
+
217
+ # 5. Accumulate Sum of Squares (Second Moment)
218
+ acc_approx_sq += cupy.sum(approx_out, axis=0)
219
+
219
220
  del approx_out
220
221
 
221
- # --- CALC 2: FULL (In-place) ---
222
+ # ============================================
223
+ # CALC 2: FULL (Optimize Order of Ops)
224
+ # ============================================
222
225
  pearson_residual_kernel(
223
226
  counts_dense,
224
227
  tjs_gpu,
225
228
  tis_gpu[current_row:end_row][:, cupy.newaxis],
226
229
  sizes_gpu,
227
230
  total,
228
- counts_dense # Overwrite input
231
+ counts_dense # Overwrite input with Residuals
229
232
  )
230
233
 
231
- # [VIZ UPDATE: FULL]
234
+ # 1. Accumulate Sum
232
235
  acc_full_sum += cupy.sum(counts_dense, axis=0)
233
- acc_full_sq += cupy.sum(counts_dense**2, axis=0)
234
236
 
237
+ # 2. Sample
235
238
  if sample_indices is not None:
236
239
  sampled_vals = counts_dense.ravel().take(sample_indices)
237
240
  viz_full_samples.append(cupy.asnumpy(sampled_vals))
238
241
 
239
- # [DISK WRITE: FULL]
242
+ # 3. Write to Disk
240
243
  out_x_full[current_row:end_row, :] = counts_dense.get()
244
+
245
+ # 4. Square IN-PLACE
246
+ counts_dense *= counts_dense
247
+
248
+ # 5. Accumulate Sum of Squares
249
+ acc_full_sq += cupy.sum(counts_dense, axis=0)
241
250
 
242
251
  del counts_dense, sample_indices
243
252
  cupy.get_default_memory_pool().free_all_blocks()
@@ -252,10 +261,6 @@ def NBumiPearsonResidualsCombinedGPU(
252
261
  print("Phase [Viz]: Generating Diagnostics...")
253
262
 
254
263
  # 1. Finalize Variance Stats (GPU -> CPU)
255
- # Var = E[X^2] - (E[X])^2
256
- # Mean = Sum / N
257
-
258
- # Pull everything to CPU once
259
264
  raw_sum = cupy.asnumpy(acc_raw_sum)
260
265
 
261
266
  approx_sum = cupy.asnumpy(acc_approx_sum)
@@ -264,7 +269,7 @@ def NBumiPearsonResidualsCombinedGPU(
264
269
  full_sum = cupy.asnumpy(acc_full_sum)
265
270
  full_sq = cupy.asnumpy(acc_full_sq)
266
271
 
267
- # Calculate
272
+ # Calculate Variance: E[X^2] - (E[X])^2
268
273
  mean_raw = raw_sum / nc
269
274
 
270
275
  mean_approx = approx_sum / nc
@@ -287,7 +292,7 @@ def NBumiPearsonResidualsCombinedGPU(
287
292
 
288
293
  # --- FILE 1: SUMMARY (1080p) ---
289
294
  print(f" > Saving Summary Plot: {plot_summary_filename}")
290
- fig1, ax1 = plt.subplots(1, 2, figsize=(16, 7)) # 16x7 inches ~ 1080p aspect
295
+ fig1, ax1 = plt.subplots(1, 2, figsize=(16, 7))
291
296
 
292
297
  # Plot 1: Variance Stabilization
293
298
  ax = ax1[0]
@@ -308,7 +313,6 @@ def NBumiPearsonResidualsCombinedGPU(
308
313
  # Plot 3: Distribution
309
314
  ax = ax1[1]
310
315
  if len(flat_approx) > 100:
311
- # Clip for cleaner KDE
312
316
  mask_kde = (flat_approx > -10) & (flat_approx < 10)
313
317
  sns.kdeplot(flat_approx[mask_kde], fill=True, color='red', alpha=0.3, label='Approx', ax=ax, warn_singular=False)
314
318
  sns.kdeplot(flat_full[mask_kde], fill=True, color='blue', alpha=0.3, label='Full', ax=ax, warn_singular=False)
@@ -322,17 +326,15 @@ def NBumiPearsonResidualsCombinedGPU(
322
326
  bbox=dict(facecolor='#f0f0f0', edgecolor='black', alpha=0.7))
323
327
 
324
328
  plt.tight_layout()
325
- plt.savefig(plot_summary_filename, dpi=120) # 120 DPI * 16 inch = 1920 width
329
+ plt.savefig(plot_summary_filename, dpi=120)
326
330
  plt.close()
327
331
 
328
332
  # --- FILE 2: DETAIL (4K) ---
329
333
  print(f" > Saving Detail Plot: {plot_detail_filename}")
330
- fig2, ax2 = plt.subplots(figsize=(20, 11)) # 20x11 inches ~ 4K aspect
334
+ fig2, ax2 = plt.subplots(figsize=(20, 11))
331
335
 
332
336
  if len(flat_approx) > 0:
333
337
  ax2.scatter(flat_approx, flat_full, s=1, alpha=0.5, color='purple')
334
-
335
- # Diagonal line
336
338
  lims = [
337
339
  np.min([ax2.get_xlim(), ax2.get_ylim()]),
338
340
  np.max([ax2.get_xlim(), ax2.get_ylim()]),
@@ -348,9 +350,8 @@ def NBumiPearsonResidualsCombinedGPU(
348
350
  bbox=dict(facecolor='#f0f0f0', edgecolor='black', alpha=0.7))
349
351
 
350
352
  plt.tight_layout()
351
- plt.savefig(plot_detail_filename, dpi=200) # 200 DPI * 20 inch = 4000 width (4Kish)
353
+ plt.savefig(plot_detail_filename, dpi=200)
352
354
  plt.close()
353
355
 
354
-
355
356
  if hasattr(adata_in, "file") and adata_in.file is not None: adata_in.file.close()
356
357
  print(f"Total time: {time.perf_counter() - start_time:.2f} seconds.\n")
@@ -5,7 +5,7 @@ with open("README.md", "r", encoding="utf-8") as fh:
5
5
 
6
6
  setuptools.setup(
7
7
  name="M3Drop", # Name for pip (pip install M3Drop)
8
- version="0.4.51",
8
+ version="0.4.53",
9
9
  author="Tallulah Andrews",
10
10
  author_email="tandrew6@uwo.ca",
11
11
  description="A Python implementation of the M3Drop single-cell RNA-seq analysis tool.",
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes