M3Drop 0.4.52__py3-none-any.whl → 0.4.53__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- m3Drop/NormalizationGPU.py +33 -33
- {m3drop-0.4.52.dist-info → m3drop-0.4.53.dist-info}/METADATA +1 -1
- {m3drop-0.4.52.dist-info → m3drop-0.4.53.dist-info}/RECORD +6 -6
- {m3drop-0.4.52.dist-info → m3drop-0.4.53.dist-info}/WHEEL +0 -0
- {m3drop-0.4.52.dist-info → m3drop-0.4.53.dist-info}/licenses/LICENSE +0 -0
- {m3drop-0.4.52.dist-info → m3drop-0.4.53.dist-info}/top_level.txt +0 -0
m3Drop/NormalizationGPU.py
CHANGED
|
@@ -119,9 +119,7 @@ def NBumiPearsonResidualsCombinedGPU(
|
|
|
119
119
|
print(f" > Visualization Sampling Rate: {sampling_rate*100:.4f}% (Target: {TARGET_SAMPLES:,} points)")
|
|
120
120
|
|
|
121
121
|
# 2. Accumulators for Plot 1 (Variance) - EXACT MATH
|
|
122
|
-
# We need Sum(x) and Sum(x^2) for: Raw, Approx, Full
|
|
123
122
|
acc_raw_sum = cupy.zeros(ng_filtered, dtype=cupy.float64)
|
|
124
|
-
# acc_raw_sq = cupy.zeros(ng_filtered, dtype=cupy.float64) # Not strictly needed for Mean X-axis, but good for completeness. Skipping to save VRAM.
|
|
125
123
|
|
|
126
124
|
acc_approx_sum = cupy.zeros(ng_filtered, dtype=cupy.float64)
|
|
127
125
|
acc_approx_sq = cupy.zeros(ng_filtered, dtype=cupy.float64)
|
|
@@ -154,7 +152,7 @@ def NBumiPearsonResidualsCombinedGPU(
|
|
|
154
152
|
|
|
155
153
|
current_row = 0
|
|
156
154
|
while current_row < nc:
|
|
157
|
-
# [SAFE MODE] Multiplier 3.0 is
|
|
155
|
+
# [SAFE MODE RESTORED] Multiplier 3.0 is efficient because we use IN-PLACE ops below.
|
|
158
156
|
end_row = device.get_next_chunk(current_row, mode='dense', overhead_multiplier=3.0)
|
|
159
157
|
if end_row is None or end_row <= current_row: break
|
|
160
158
|
|
|
@@ -178,24 +176,21 @@ def NBumiPearsonResidualsCombinedGPU(
|
|
|
178
176
|
cupy.get_default_memory_pool().free_all_blocks()
|
|
179
177
|
|
|
180
178
|
# --- VIZ ACCUMULATION 1: RAW MEAN ---
|
|
181
|
-
# Add raw sums to accumulator (column-wise sum)
|
|
182
179
|
acc_raw_sum += cupy.sum(counts_dense, axis=0)
|
|
183
180
|
|
|
184
181
|
# --- VIZ SAMPLING: GENERATE INDICES ---
|
|
185
|
-
# We pick indices NOW so we can grab the same points from both Approx and Full
|
|
186
182
|
chunk_total_items = chunk_size * ng_filtered
|
|
187
183
|
n_samples_chunk = int(chunk_total_items * sampling_rate)
|
|
188
184
|
|
|
189
185
|
if n_samples_chunk > 0:
|
|
190
|
-
# [
|
|
191
|
-
# 'choice' with replace=False tries to allocate a permutation of the ENTIRE chunk (3GB+).
|
|
192
|
-
# 'randint' only allocates the indices we need (KB).
|
|
193
|
-
# Given the huge population (300M+) and small sample (100k+), collisions are statistically negligible.
|
|
186
|
+
# [SAFE] Use randint (with replacement) to avoid VRAM spike
|
|
194
187
|
sample_indices = cupy.random.randint(0, int(chunk_total_items), size=n_samples_chunk)
|
|
195
188
|
else:
|
|
196
189
|
sample_indices = None
|
|
197
190
|
|
|
198
|
-
#
|
|
191
|
+
# ============================================
|
|
192
|
+
# CALC 1: APPROX (Optimize Order of Ops)
|
|
193
|
+
# ============================================
|
|
199
194
|
approx_out = cupy.empty_like(counts_dense)
|
|
200
195
|
pearson_approx_kernel(
|
|
201
196
|
counts_dense,
|
|
@@ -205,40 +200,53 @@ def NBumiPearsonResidualsCombinedGPU(
|
|
|
205
200
|
approx_out
|
|
206
201
|
)
|
|
207
202
|
|
|
208
|
-
#
|
|
203
|
+
# 1. Accumulate Sum (First Moment)
|
|
209
204
|
acc_approx_sum += cupy.sum(approx_out, axis=0)
|
|
210
|
-
acc_approx_sq += cupy.sum(approx_out**2, axis=0)
|
|
211
205
|
|
|
206
|
+
# 2. Sample (Before we destroy the data)
|
|
212
207
|
if sample_indices is not None:
|
|
213
|
-
# Flatten temporarily to sample, then return to CPU
|
|
214
|
-
# Note: take() returns a new array, small size
|
|
215
208
|
sampled_vals = approx_out.ravel().take(sample_indices)
|
|
216
209
|
viz_approx_samples.append(cupy.asnumpy(sampled_vals))
|
|
217
210
|
|
|
218
|
-
#
|
|
211
|
+
# 3. Write to Disk (Save the clean residuals)
|
|
219
212
|
out_x_approx[current_row:end_row, :] = approx_out.get()
|
|
213
|
+
|
|
214
|
+
# 4. Square IN-PLACE (Destroying VRAM copy to create squares without allocation)
|
|
215
|
+
approx_out *= approx_out
|
|
216
|
+
|
|
217
|
+
# 5. Accumulate Sum of Squares (Second Moment)
|
|
218
|
+
acc_approx_sq += cupy.sum(approx_out, axis=0)
|
|
219
|
+
|
|
220
220
|
del approx_out
|
|
221
221
|
|
|
222
|
-
#
|
|
222
|
+
# ============================================
|
|
223
|
+
# CALC 2: FULL (Optimize Order of Ops)
|
|
224
|
+
# ============================================
|
|
223
225
|
pearson_residual_kernel(
|
|
224
226
|
counts_dense,
|
|
225
227
|
tjs_gpu,
|
|
226
228
|
tis_gpu[current_row:end_row][:, cupy.newaxis],
|
|
227
229
|
sizes_gpu,
|
|
228
230
|
total,
|
|
229
|
-
counts_dense # Overwrite input
|
|
231
|
+
counts_dense # Overwrite input with Residuals
|
|
230
232
|
)
|
|
231
233
|
|
|
232
|
-
#
|
|
234
|
+
# 1. Accumulate Sum
|
|
233
235
|
acc_full_sum += cupy.sum(counts_dense, axis=0)
|
|
234
|
-
acc_full_sq += cupy.sum(counts_dense**2, axis=0)
|
|
235
236
|
|
|
237
|
+
# 2. Sample
|
|
236
238
|
if sample_indices is not None:
|
|
237
239
|
sampled_vals = counts_dense.ravel().take(sample_indices)
|
|
238
240
|
viz_full_samples.append(cupy.asnumpy(sampled_vals))
|
|
239
241
|
|
|
240
|
-
#
|
|
242
|
+
# 3. Write to Disk
|
|
241
243
|
out_x_full[current_row:end_row, :] = counts_dense.get()
|
|
244
|
+
|
|
245
|
+
# 4. Square IN-PLACE
|
|
246
|
+
counts_dense *= counts_dense
|
|
247
|
+
|
|
248
|
+
# 5. Accumulate Sum of Squares
|
|
249
|
+
acc_full_sq += cupy.sum(counts_dense, axis=0)
|
|
242
250
|
|
|
243
251
|
del counts_dense, sample_indices
|
|
244
252
|
cupy.get_default_memory_pool().free_all_blocks()
|
|
@@ -253,10 +261,6 @@ def NBumiPearsonResidualsCombinedGPU(
|
|
|
253
261
|
print("Phase [Viz]: Generating Diagnostics...")
|
|
254
262
|
|
|
255
263
|
# 1. Finalize Variance Stats (GPU -> CPU)
|
|
256
|
-
# Var = E[X^2] - (E[X])^2
|
|
257
|
-
# Mean = Sum / N
|
|
258
|
-
|
|
259
|
-
# Pull everything to CPU once
|
|
260
264
|
raw_sum = cupy.asnumpy(acc_raw_sum)
|
|
261
265
|
|
|
262
266
|
approx_sum = cupy.asnumpy(acc_approx_sum)
|
|
@@ -265,7 +269,7 @@ def NBumiPearsonResidualsCombinedGPU(
|
|
|
265
269
|
full_sum = cupy.asnumpy(acc_full_sum)
|
|
266
270
|
full_sq = cupy.asnumpy(acc_full_sq)
|
|
267
271
|
|
|
268
|
-
# Calculate
|
|
272
|
+
# Calculate Variance: E[X^2] - (E[X])^2
|
|
269
273
|
mean_raw = raw_sum / nc
|
|
270
274
|
|
|
271
275
|
mean_approx = approx_sum / nc
|
|
@@ -288,7 +292,7 @@ def NBumiPearsonResidualsCombinedGPU(
|
|
|
288
292
|
|
|
289
293
|
# --- FILE 1: SUMMARY (1080p) ---
|
|
290
294
|
print(f" > Saving Summary Plot: {plot_summary_filename}")
|
|
291
|
-
fig1, ax1 = plt.subplots(1, 2, figsize=(16, 7))
|
|
295
|
+
fig1, ax1 = plt.subplots(1, 2, figsize=(16, 7))
|
|
292
296
|
|
|
293
297
|
# Plot 1: Variance Stabilization
|
|
294
298
|
ax = ax1[0]
|
|
@@ -309,7 +313,6 @@ def NBumiPearsonResidualsCombinedGPU(
|
|
|
309
313
|
# Plot 3: Distribution
|
|
310
314
|
ax = ax1[1]
|
|
311
315
|
if len(flat_approx) > 100:
|
|
312
|
-
# Clip for cleaner KDE
|
|
313
316
|
mask_kde = (flat_approx > -10) & (flat_approx < 10)
|
|
314
317
|
sns.kdeplot(flat_approx[mask_kde], fill=True, color='red', alpha=0.3, label='Approx', ax=ax, warn_singular=False)
|
|
315
318
|
sns.kdeplot(flat_full[mask_kde], fill=True, color='blue', alpha=0.3, label='Full', ax=ax, warn_singular=False)
|
|
@@ -323,17 +326,15 @@ def NBumiPearsonResidualsCombinedGPU(
|
|
|
323
326
|
bbox=dict(facecolor='#f0f0f0', edgecolor='black', alpha=0.7))
|
|
324
327
|
|
|
325
328
|
plt.tight_layout()
|
|
326
|
-
plt.savefig(plot_summary_filename, dpi=120)
|
|
329
|
+
plt.savefig(plot_summary_filename, dpi=120)
|
|
327
330
|
plt.close()
|
|
328
331
|
|
|
329
332
|
# --- FILE 2: DETAIL (4K) ---
|
|
330
333
|
print(f" > Saving Detail Plot: {plot_detail_filename}")
|
|
331
|
-
fig2, ax2 = plt.subplots(figsize=(20, 11))
|
|
334
|
+
fig2, ax2 = plt.subplots(figsize=(20, 11))
|
|
332
335
|
|
|
333
336
|
if len(flat_approx) > 0:
|
|
334
337
|
ax2.scatter(flat_approx, flat_full, s=1, alpha=0.5, color='purple')
|
|
335
|
-
|
|
336
|
-
# Diagonal line
|
|
337
338
|
lims = [
|
|
338
339
|
np.min([ax2.get_xlim(), ax2.get_ylim()]),
|
|
339
340
|
np.max([ax2.get_xlim(), ax2.get_ylim()]),
|
|
@@ -349,9 +350,8 @@ def NBumiPearsonResidualsCombinedGPU(
|
|
|
349
350
|
bbox=dict(facecolor='#f0f0f0', edgecolor='black', alpha=0.7))
|
|
350
351
|
|
|
351
352
|
plt.tight_layout()
|
|
352
|
-
plt.savefig(plot_detail_filename, dpi=200)
|
|
353
|
+
plt.savefig(plot_detail_filename, dpi=200)
|
|
353
354
|
plt.close()
|
|
354
355
|
|
|
355
|
-
|
|
356
356
|
if hasattr(adata_in, "file") and adata_in.file is not None: adata_in.file.close()
|
|
357
357
|
print(f"Total time: {time.perf_counter() - start_time:.2f} seconds.\n")
|
|
@@ -5,10 +5,10 @@ m3Drop/CoreGPU.py,sha256=6LToLuWyHxX_7sC2z0Xnvy_qqgmpew5DmnCV0PxmTZQ,19785
|
|
|
5
5
|
m3Drop/DiagnosticsCPU.py,sha256=l0Imkh3F3zo4ovihUjx7cYWYgzPdztWCN1hcBFO43nY,12943
|
|
6
6
|
m3Drop/DiagnosticsGPU.py,sha256=bsatHyHszgbufneeJvFvHBTLzDuY006nP2yHPHs8s7M,14389
|
|
7
7
|
m3Drop/NormalizationCPU.py,sha256=DmqvjcpHwkNZicEb2GBqTDBVyvtBeUSLmFRwRFDk0ms,7458
|
|
8
|
-
m3Drop/NormalizationGPU.py,sha256=
|
|
8
|
+
m3Drop/NormalizationGPU.py,sha256=M9n1rkTYS4KRAtx5D5Ld0LmOQWW_jJbgkT37jqUP7Dc,14736
|
|
9
9
|
m3Drop/__init__.py,sha256=W_TQ9P8_7Tdsa6kDZ6IJKT0FMkX_JFvBqiP821CZIrk,2180
|
|
10
|
-
m3drop-0.4.
|
|
11
|
-
m3drop-0.4.
|
|
12
|
-
m3drop-0.4.
|
|
13
|
-
m3drop-0.4.
|
|
14
|
-
m3drop-0.4.
|
|
10
|
+
m3drop-0.4.53.dist-info/licenses/LICENSE,sha256=44Iqpp8Fc10Xzd5T7cT9UhO31Qftk3gBiCjtpwilP_k,1074
|
|
11
|
+
m3drop-0.4.53.dist-info/METADATA,sha256=XJpT_P5K-z-BQGnSrX8mJotRywANnWy-PofCviSOElY,5248
|
|
12
|
+
m3drop-0.4.53.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
13
|
+
m3drop-0.4.53.dist-info/top_level.txt,sha256=AEULFEFIgFtAwS-KBlIFoYXrqczX_rwqrEcdK46GIrA,7
|
|
14
|
+
m3drop-0.4.53.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|