M3Drop 0.4.52__tar.gz → 0.4.54__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: M3Drop
3
- Version: 0.4.52
3
+ Version: 0.4.54
4
4
  Summary: A Python implementation of the M3Drop single-cell RNA-seq analysis tool.
5
5
  Home-page: https://github.com/PragalvhaSharma/m3DropNew
6
6
  Author: Tallulah Andrews
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: M3Drop
3
- Version: 0.4.52
3
+ Version: 0.4.54
4
4
  Summary: A Python implementation of the M3Drop single-cell RNA-seq analysis tool.
5
5
  Home-page: https://github.com/PragalvhaSharma/m3DropNew
6
6
  Author: Tallulah Andrews
@@ -119,9 +119,7 @@ def NBumiPearsonResidualsCombinedGPU(
119
119
  print(f" > Visualization Sampling Rate: {sampling_rate*100:.4f}% (Target: {TARGET_SAMPLES:,} points)")
120
120
 
121
121
  # 2. Accumulators for Plot 1 (Variance) - EXACT MATH
122
- # We need Sum(x) and Sum(x^2) for: Raw, Approx, Full
123
122
  acc_raw_sum = cupy.zeros(ng_filtered, dtype=cupy.float64)
124
- # acc_raw_sq = cupy.zeros(ng_filtered, dtype=cupy.float64) # Not strictly needed for Mean X-axis, but good for completeness. Skipping to save VRAM.
125
123
 
126
124
  acc_approx_sum = cupy.zeros(ng_filtered, dtype=cupy.float64)
127
125
  acc_approx_sq = cupy.zeros(ng_filtered, dtype=cupy.float64)
@@ -154,7 +152,7 @@ def NBumiPearsonResidualsCombinedGPU(
154
152
 
155
153
  current_row = 0
156
154
  while current_row < nc:
157
- # [SAFE MODE] Multiplier 3.0 is safe for Index Sampling
155
+ # [SAFE MODE RESTORED] Multiplier 3.0 is efficient because we use IN-PLACE ops below.
158
156
  end_row = device.get_next_chunk(current_row, mode='dense', overhead_multiplier=3.0)
159
157
  if end_row is None or end_row <= current_row: break
160
158
 
@@ -182,20 +180,18 @@ def NBumiPearsonResidualsCombinedGPU(
182
180
  acc_raw_sum += cupy.sum(counts_dense, axis=0)
183
181
 
184
182
  # --- VIZ SAMPLING: GENERATE INDICES ---
185
- # We pick indices NOW so we can grab the same points from both Approx and Full
186
183
  chunk_total_items = chunk_size * ng_filtered
187
184
  n_samples_chunk = int(chunk_total_items * sampling_rate)
188
185
 
189
186
  if n_samples_chunk > 0:
190
- # [CRITICAL FIX] Use randint (with replacement) instead of choice(replace=False).
191
- # 'choice' with replace=False tries to allocate a permutation of the ENTIRE chunk (3GB+).
192
- # 'randint' only allocates the indices we need (KB).
193
- # Given the huge population (300M+) and small sample (100k+), collisions are statistically negligible.
187
+ # [SAFE] Use randint (with replacement) to avoid VRAM spike
194
188
  sample_indices = cupy.random.randint(0, int(chunk_total_items), size=n_samples_chunk)
195
189
  else:
196
190
  sample_indices = None
197
191
 
198
- # --- CALC 1: APPROX ---
192
+ # ============================================
193
+ # CALC 1: APPROX (Optimize Order of Ops)
194
+ # ============================================
199
195
  approx_out = cupy.empty_like(counts_dense)
200
196
  pearson_approx_kernel(
201
197
  counts_dense,
@@ -205,40 +201,53 @@ def NBumiPearsonResidualsCombinedGPU(
205
201
  approx_out
206
202
  )
207
203
 
208
- # [VIZ UPDATE: APPROX]
204
+ # 1. Accumulate Sum (First Moment)
209
205
  acc_approx_sum += cupy.sum(approx_out, axis=0)
210
- acc_approx_sq += cupy.sum(approx_out**2, axis=0)
211
206
 
207
+ # 2. Sample (Before we destroy the data)
212
208
  if sample_indices is not None:
213
- # Flatten temporarily to sample, then return to CPU
214
- # Note: take() returns a new array, small size
215
209
  sampled_vals = approx_out.ravel().take(sample_indices)
216
210
  viz_approx_samples.append(cupy.asnumpy(sampled_vals))
217
211
 
218
- # [DISK WRITE: APPROX]
212
+ # 3. Write to Disk (Save the clean residuals)
219
213
  out_x_approx[current_row:end_row, :] = approx_out.get()
214
+
215
+ # 4. Square IN-PLACE (Destroying VRAM copy to create squares without allocation)
216
+ approx_out *= approx_out
217
+
218
+ # 5. Accumulate Sum of Squares (Second Moment)
219
+ acc_approx_sq += cupy.sum(approx_out, axis=0)
220
+
220
221
  del approx_out
221
222
 
222
- # --- CALC 2: FULL (In-place) ---
223
+ # ============================================
224
+ # CALC 2: FULL (Optimize Order of Ops)
225
+ # ============================================
223
226
  pearson_residual_kernel(
224
227
  counts_dense,
225
228
  tjs_gpu,
226
229
  tis_gpu[current_row:end_row][:, cupy.newaxis],
227
230
  sizes_gpu,
228
231
  total,
229
- counts_dense # Overwrite input
232
+ counts_dense # Overwrite input with Residuals
230
233
  )
231
234
 
232
- # [VIZ UPDATE: FULL]
235
+ # 1. Accumulate Sum
233
236
  acc_full_sum += cupy.sum(counts_dense, axis=0)
234
- acc_full_sq += cupy.sum(counts_dense**2, axis=0)
235
237
 
238
+ # 2. Sample
236
239
  if sample_indices is not None:
237
240
  sampled_vals = counts_dense.ravel().take(sample_indices)
238
241
  viz_full_samples.append(cupy.asnumpy(sampled_vals))
239
242
 
240
- # [DISK WRITE: FULL]
243
+ # 3. Write to Disk
241
244
  out_x_full[current_row:end_row, :] = counts_dense.get()
245
+
246
+ # 4. Square IN-PLACE
247
+ counts_dense *= counts_dense
248
+
249
+ # 5. Accumulate Sum of Squares
250
+ acc_full_sq += cupy.sum(counts_dense, axis=0)
242
251
 
243
252
  del counts_dense, sample_indices
244
253
  cupy.get_default_memory_pool().free_all_blocks()
@@ -253,10 +262,6 @@ def NBumiPearsonResidualsCombinedGPU(
253
262
  print("Phase [Viz]: Generating Diagnostics...")
254
263
 
255
264
  # 1. Finalize Variance Stats (GPU -> CPU)
256
- # Var = E[X^2] - (E[X])^2
257
- # Mean = Sum / N
258
-
259
- # Pull everything to CPU once
260
265
  raw_sum = cupy.asnumpy(acc_raw_sum)
261
266
 
262
267
  approx_sum = cupy.asnumpy(acc_approx_sum)
@@ -265,7 +270,7 @@ def NBumiPearsonResidualsCombinedGPU(
265
270
  full_sum = cupy.asnumpy(acc_full_sum)
266
271
  full_sq = cupy.asnumpy(acc_full_sq)
267
272
 
268
- # Calculate
273
+ # Calculate Variance: E[X^2] - (E[X])^2
269
274
  mean_raw = raw_sum / nc
270
275
 
271
276
  mean_approx = approx_sum / nc
@@ -288,7 +293,7 @@ def NBumiPearsonResidualsCombinedGPU(
288
293
 
289
294
  # --- FILE 1: SUMMARY (1080p) ---
290
295
  print(f" > Saving Summary Plot: {plot_summary_filename}")
291
- fig1, ax1 = plt.subplots(1, 2, figsize=(16, 7)) # 16x7 inches ~ 1080p aspect
296
+ fig1, ax1 = plt.subplots(1, 2, figsize=(16, 7))
292
297
 
293
298
  # Plot 1: Variance Stabilization
294
299
  ax = ax1[0]
@@ -301,18 +306,25 @@ def NBumiPearsonResidualsCombinedGPU(
301
306
  ax.set_xlabel("Mean Raw Expression (log)")
302
307
  ax.set_ylabel("Variance of Residuals (log)")
303
308
  ax.legend()
304
- ax.grid(True, alpha=0.3)
309
+ ax.grid(True, which='both', linestyle='--', alpha=0.5) # Enhanced Grid
305
310
  ax.text(0.5, -0.15, "Goal: Blue dots should form a flat line at y=1",
306
311
  transform=ax.transAxes, ha='center', fontsize=9,
307
312
  bbox=dict(facecolor='#f0f0f0', edgecolor='black', alpha=0.7))
308
313
 
309
- # Plot 3: Distribution
314
+ # Plot 3: Distribution (Histogram + KDE Overlay)
310
315
  ax = ax1[1]
311
316
  if len(flat_approx) > 100:
312
- # Clip for cleaner KDE
313
317
  mask_kde = (flat_approx > -10) & (flat_approx < 10)
314
- sns.kdeplot(flat_approx[mask_kde], fill=True, color='red', alpha=0.3, label='Approx', ax=ax, warn_singular=False)
315
- sns.kdeplot(flat_full[mask_kde], fill=True, color='blue', alpha=0.3, label='Full', ax=ax, warn_singular=False)
318
+
319
+ # Histograms (The Truth)
320
+ bins = np.linspace(-5, 5, 100)
321
+ ax.hist(flat_approx[mask_kde], bins=bins, color='red', alpha=0.2, density=True, label='_nolegend_')
322
+ ax.hist(flat_full[mask_kde], bins=bins, color='blue', alpha=0.2, density=True, label='_nolegend_')
323
+
324
+ # KDEs (The Trend)
325
+ sns.kdeplot(flat_approx[mask_kde], fill=False, color='red', linewidth=2, label='Approx', ax=ax, warn_singular=False)
326
+ sns.kdeplot(flat_full[mask_kde], fill=False, color='blue', linewidth=2, label='Full', ax=ax, warn_singular=False)
327
+
316
328
  ax.set_xlim(-5, 5)
317
329
  ax.set_title("Distribution of Residuals")
318
330
  ax.set_xlabel("Residual Value")
@@ -323,17 +335,16 @@ def NBumiPearsonResidualsCombinedGPU(
323
335
  bbox=dict(facecolor='#f0f0f0', edgecolor='black', alpha=0.7))
324
336
 
325
337
  plt.tight_layout()
326
- plt.savefig(plot_summary_filename, dpi=120) # 120 DPI * 16 inch = 1920 width
338
+ plt.savefig(plot_summary_filename, dpi=120)
327
339
  plt.close()
328
340
 
329
341
  # --- FILE 2: DETAIL (4K) ---
330
342
  print(f" > Saving Detail Plot: {plot_detail_filename}")
331
- fig2, ax2 = plt.subplots(figsize=(20, 11)) # 20x11 inches ~ 4K aspect
343
+ fig2, ax2 = plt.subplots(figsize=(20, 11))
332
344
 
333
345
  if len(flat_approx) > 0:
334
346
  ax2.scatter(flat_approx, flat_full, s=1, alpha=0.5, color='purple')
335
347
 
336
- # Diagonal line
337
348
  lims = [
338
349
  np.min([ax2.get_xlim(), ax2.get_ylim()]),
339
350
  np.max([ax2.get_xlim(), ax2.get_ylim()]),
@@ -349,9 +360,8 @@ def NBumiPearsonResidualsCombinedGPU(
349
360
  bbox=dict(facecolor='#f0f0f0', edgecolor='black', alpha=0.7))
350
361
 
351
362
  plt.tight_layout()
352
- plt.savefig(plot_detail_filename, dpi=200) # 200 DPI * 20 inch = 4000 width (4Kish)
363
+ plt.savefig(plot_detail_filename, dpi=200)
353
364
  plt.close()
354
365
 
355
-
356
366
  if hasattr(adata_in, "file") and adata_in.file is not None: adata_in.file.close()
357
367
  print(f"Total time: {time.perf_counter() - start_time:.2f} seconds.\n")
@@ -5,7 +5,7 @@ with open("README.md", "r", encoding="utf-8") as fh:
5
5
 
6
6
  setuptools.setup(
7
7
  name="M3Drop", # Name for pip (pip install M3Drop)
8
- version="0.4.52",
8
+ version="0.4.54",
9
9
  author="Tallulah Andrews",
10
10
  author_email="tandrew6@uwo.ca",
11
11
  description="A Python implementation of the M3Drop single-cell RNA-seq analysis tool.",
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes