M3Drop 0.4.46__py3-none-any.whl → 0.4.47__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- m3Drop/NormalizationGPU.py +149 -182
- {m3drop-0.4.46.dist-info → m3drop-0.4.47.dist-info}/METADATA +1 -1
- {m3drop-0.4.46.dist-info → m3drop-0.4.47.dist-info}/RECORD +6 -6
- {m3drop-0.4.46.dist-info → m3drop-0.4.47.dist-info}/WHEEL +0 -0
- {m3drop-0.4.46.dist-info → m3drop-0.4.47.dist-info}/licenses/LICENSE +0 -0
- {m3drop-0.4.46.dist-info → m3drop-0.4.47.dist-info}/top_level.txt +0 -0
m3Drop/NormalizationGPU.py
CHANGED
|
@@ -1,211 +1,178 @@
|
|
|
1
1
|
import pickle
|
|
2
2
|
import time
|
|
3
|
-
import
|
|
3
|
+
import sys
|
|
4
4
|
import numpy as np
|
|
5
5
|
import h5py
|
|
6
6
|
import anndata
|
|
7
7
|
import pandas as pd
|
|
8
|
-
from cupy.sparse import csr_matrix as cp_csr_matrix
|
|
9
8
|
import os
|
|
10
9
|
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
10
|
+
try:
|
|
11
|
+
import cupy
|
|
12
|
+
from cupy.sparse import csr_matrix as cp_csr_matrix
|
|
13
|
+
HAS_GPU = True
|
|
14
|
+
except ImportError:
|
|
15
|
+
cupy = None
|
|
16
|
+
HAS_GPU = False
|
|
17
|
+
|
|
18
|
+
# Package-compatible import
|
|
19
|
+
try:
|
|
20
|
+
from .ControlDeviceGPU import ControlDevice
|
|
21
|
+
except ImportError:
|
|
22
|
+
# Fallback for direct script execution (debugging)
|
|
23
|
+
try:
|
|
24
|
+
from ControlDeviceGPU import ControlDevice
|
|
25
|
+
except ImportError:
|
|
26
|
+
print("CRITICAL ERROR: 'ControlDeviceGPU.py' not found.")
|
|
27
|
+
sys.exit(1)
|
|
28
|
+
|
|
29
|
+
# ==========================================
|
|
30
|
+
# KERNELS
|
|
31
|
+
# ==========================================
|
|
32
|
+
|
|
33
|
+
pearson_residual_kernel = cupy.ElementwiseKernel(
|
|
34
|
+
'float64 count, float64 tj, float64 ti, float64 theta, float64 total', 'float64 out',
|
|
35
|
+
'''
|
|
36
|
+
double mu = (tj * ti) / total;
|
|
37
|
+
double denom_sq = mu + ( (mu * mu) / theta );
|
|
38
|
+
double denom = sqrt(denom_sq);
|
|
39
|
+
if (denom < 1e-12) { out = (count == 0.0) ? 0.0 : 0.0; } else { out = (count - mu) / denom; }
|
|
40
|
+
''',
|
|
41
|
+
'pearson_residual_kernel'
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
pearson_approx_kernel = cupy.ElementwiseKernel(
|
|
45
|
+
'float64 count, float64 tj, float64 ti, float64 total', 'float64 out',
|
|
46
|
+
'''
|
|
47
|
+
double mu = (tj * ti) / total;
|
|
48
|
+
double denom = sqrt(mu);
|
|
49
|
+
if (denom < 1e-12) { out = 0.0; } else { out = (count - mu) / denom; }
|
|
50
|
+
''',
|
|
51
|
+
'pearson_approx_kernel'
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
def NBumiPearsonResidualsCombinedGPU(
|
|
55
|
+
raw_filename: str,
|
|
56
|
+
mask_filename: str,
|
|
57
|
+
fit_filename: str,
|
|
58
|
+
stats_filename: str,
|
|
59
|
+
output_filename_full: str,
|
|
60
|
+
output_filename_approx: str,
|
|
61
|
+
mode: str = "auto",
|
|
62
|
+
manual_target: int = 3000
|
|
17
63
|
):
|
|
18
64
|
"""
|
|
19
|
-
Calculates
|
|
65
|
+
UPGRADED: Calculates Full and Approximate residuals in a SINGLE PASS.
|
|
20
66
|
"""
|
|
21
67
|
start_time = time.perf_counter()
|
|
22
|
-
print(f"FUNCTION:
|
|
23
|
-
|
|
24
|
-
#
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
68
|
+
print(f"FUNCTION: NBumiPearsonResidualsCombined() | FILE: {raw_filename}")
|
|
69
|
+
|
|
70
|
+
# 1. Load Mask
|
|
71
|
+
with open(mask_filename, 'rb') as f: mask_cpu = pickle.load(f)
|
|
72
|
+
mask_gpu = cupy.asarray(mask_cpu)
|
|
73
|
+
ng_filtered = int(cupy.sum(mask_gpu))
|
|
74
|
+
|
|
75
|
+
# 2. Manual Init
|
|
76
|
+
with h5py.File(raw_filename, 'r') as f: indptr_cpu = f['X']['indptr'][:]; total_rows = len(indptr_cpu) - 1
|
|
77
|
+
device = ControlDevice(indptr=indptr_cpu, total_rows=total_rows, n_genes=ng_filtered, mode=mode, manual_target=manual_target)
|
|
78
|
+
nc = device.total_rows
|
|
79
|
+
|
|
80
|
+
print("Phase [1/2]: Initializing parameters...")
|
|
81
|
+
# Load parameters for both calculations
|
|
82
|
+
with open(fit_filename, 'rb') as f: fit = pickle.load(f)
|
|
83
|
+
with open(stats_filename, 'rb') as f: stats = pickle.load(f)
|
|
84
|
+
|
|
85
|
+
# Common params
|
|
86
|
+
total = fit['vals']['total']
|
|
87
|
+
tjs_gpu = cupy.asarray(fit['vals']['tjs'].values, dtype=cupy.float64)
|
|
88
|
+
tis_gpu = cupy.asarray(fit['vals']['tis'].values, dtype=cupy.float64)
|
|
89
|
+
|
|
90
|
+
# Specific params
|
|
91
|
+
sizes_gpu = cupy.asarray(fit['sizes'].values, dtype=cupy.float64) # For Full
|
|
92
|
+
|
|
93
|
+
# Setup Output Files
|
|
94
|
+
adata_in = anndata.read_h5ad(raw_filename, backed='r')
|
|
95
|
+
filtered_var = adata_in.var[mask_cpu]
|
|
96
|
+
|
|
97
|
+
# Create skeletons
|
|
98
|
+
adata_out_full = anndata.AnnData(obs=adata_in.obs, var=filtered_var)
|
|
99
|
+
adata_out_full.write_h5ad(output_filename_full, compression=None)
|
|
47
100
|
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
bytes_per_row = ng * 4 # float32
|
|
51
|
-
target_bytes = 1_000_000_000 # 1GB
|
|
52
|
-
storage_chunk_rows = int(target_bytes / bytes_per_row)
|
|
101
|
+
adata_out_approx = anndata.AnnData(obs=adata_in.obs, var=filtered_var)
|
|
102
|
+
adata_out_approx.write_h5ad(output_filename_approx, compression=None)
|
|
53
103
|
|
|
104
|
+
storage_chunk_rows = int(1_000_000_000 / (ng_filtered * 8))
|
|
54
105
|
if storage_chunk_rows < 1: storage_chunk_rows = 1
|
|
55
|
-
# Note: It is okay if storage_chunk > processing_chunk (HDF5 handles this),
|
|
56
|
-
# but strictly it must be < 4GB total size.
|
|
57
106
|
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
if 'X' in f_out:
|
|
63
|
-
del f_out['X']
|
|
64
|
-
# Create dataset with SAFE chunks (Fixes the ValueError)
|
|
65
|
-
out_x = f_out.create_dataset('X', shape=(nc, ng), chunks=(storage_chunk_rows, ng), dtype='float32')
|
|
66
|
-
|
|
67
|
-
print("Phase [1/2]: COMPLETE")
|
|
68
|
-
|
|
69
|
-
# --- Phase 2: Calculate Residuals ---
|
|
70
|
-
print("Phase [2/2]: Calculating Pearson residuals from data chunks...")
|
|
107
|
+
# Open both files for writing simultaneously
|
|
108
|
+
with h5py.File(output_filename_full, 'a') as f_full, h5py.File(output_filename_approx, 'a') as f_approx:
|
|
109
|
+
if 'X' in f_full: del f_full['X']
|
|
110
|
+
if 'X' in f_approx: del f_approx['X']
|
|
71
111
|
|
|
72
|
-
|
|
112
|
+
out_x_full = f_full.create_dataset(
|
|
113
|
+
'X', shape=(nc, ng_filtered), chunks=(storage_chunk_rows, ng_filtered), dtype='float64'
|
|
114
|
+
)
|
|
115
|
+
out_x_approx = f_approx.create_dataset(
|
|
116
|
+
'X', shape=(nc, ng_filtered), chunks=(storage_chunk_rows, ng_filtered), dtype='float64'
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
with h5py.File(raw_filename, 'r') as f_in:
|
|
73
120
|
h5_indptr = f_in['X']['indptr']
|
|
74
121
|
h5_data = f_in['X']['data']
|
|
75
122
|
h5_indices = f_in['X']['indices']
|
|
123
|
+
|
|
124
|
+
current_row = 0
|
|
125
|
+
while current_row < nc:
|
|
126
|
+
end_row = device.get_next_chunk(current_row, mode='dense', overhead_multiplier=3.0) # Higher overhead for double write
|
|
127
|
+
if end_row is None or end_row <= current_row: break
|
|
76
128
|
|
|
77
|
-
|
|
78
|
-
end_row
|
|
79
|
-
print(f"Phase [2/2]: Processing: {end_row} of {nc} cells.", end='\r')
|
|
80
|
-
|
|
81
|
-
# Load Chunk
|
|
82
|
-
start_idx, end_idx = h5_indptr[i], h5_indptr[end_row]
|
|
83
|
-
data_slice = h5_data[start_idx:end_idx]
|
|
84
|
-
indices_slice = h5_indices[start_idx:end_idx]
|
|
85
|
-
indptr_slice = h5_indptr[i:end_row+1] - h5_indptr[i]
|
|
86
|
-
|
|
87
|
-
# Convert to Dense GPU Matrix
|
|
88
|
-
counts_chunk_sparse_gpu = cp_csr_matrix((
|
|
89
|
-
cupy.asarray(data_slice, dtype=cupy.float64),
|
|
90
|
-
cupy.asarray(indices_slice),
|
|
91
|
-
cupy.asarray(indptr_slice)
|
|
92
|
-
), shape=(end_row-i, ng))
|
|
93
|
-
|
|
94
|
-
counts_chunk_dense_gpu = counts_chunk_sparse_gpu.todense()
|
|
95
|
-
|
|
96
|
-
# Calculate Residuals
|
|
97
|
-
tis_chunk_gpu = tis_gpu[i:end_row]
|
|
98
|
-
mus_chunk_gpu = tjs_gpu[cupy.newaxis, :] * tis_chunk_gpu[:, cupy.newaxis] / total
|
|
99
|
-
|
|
100
|
-
denominator_gpu = cupy.sqrt(mus_chunk_gpu + mus_chunk_gpu**2 / sizes_gpu[cupy.newaxis, :])
|
|
101
|
-
denominator_gpu = cupy.where(denominator_gpu == 0, 1, denominator_gpu)
|
|
129
|
+
chunk_size = end_row - current_row
|
|
130
|
+
print(f"Phase [2/2]: Processing rows {end_row} of {nc} | Chunk: {chunk_size}", end='\r')
|
|
102
131
|
|
|
103
|
-
|
|
132
|
+
start_idx, end_idx = h5_indptr[current_row], h5_indptr[end_row]
|
|
104
133
|
|
|
105
|
-
#
|
|
106
|
-
|
|
134
|
+
# Load & Filter
|
|
135
|
+
data_gpu_raw = cupy.asarray(h5_data[start_idx:end_idx], dtype=cupy.float64)
|
|
136
|
+
indices_gpu_raw = cupy.asarray(h5_indices[start_idx:end_idx])
|
|
137
|
+
indptr_gpu_raw = cupy.asarray(h5_indptr[current_row:end_row+1] - h5_indptr[current_row])
|
|
107
138
|
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
print(f"Phase [2/2]: COMPLETE{' '*50}")
|
|
112
|
-
|
|
113
|
-
if hasattr(adata_in, "file") and adata_in.file is not None:
|
|
114
|
-
adata_in.file.close()
|
|
115
|
-
|
|
116
|
-
end_time = time.perf_counter()
|
|
117
|
-
print(f"Total time: {end_time - start_time:.2f} seconds.\n")
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
def NBumiPearsonResidualsApproxGPU(
|
|
121
|
-
cleaned_filename: str,
|
|
122
|
-
stats_filename: str,
|
|
123
|
-
output_filename: str
|
|
124
|
-
):
|
|
125
|
-
"""
|
|
126
|
-
Calculates approximate Pearson residuals.
|
|
127
|
-
"""
|
|
128
|
-
start_time = time.perf_counter()
|
|
129
|
-
print(f"FUNCTION: NBumiPearsonResidualsApprox() | FILE: {cleaned_filename}")
|
|
130
|
-
|
|
131
|
-
chunk_size = get_optimal_chunk_size(cleaned_filename, multiplier=10.0, is_dense=True)
|
|
139
|
+
chunk_gpu = cp_csr_matrix((data_gpu_raw, indices_gpu_raw, indptr_gpu_raw), shape=(chunk_size, len(mask_cpu)))
|
|
140
|
+
chunk_gpu = chunk_gpu[:, mask_gpu]
|
|
141
|
+
chunk_gpu.data = cupy.ceil(chunk_gpu.data)
|
|
132
142
|
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
vals = stats
|
|
139
|
-
tjs = vals['tjs'].values
|
|
140
|
-
tis = vals['tis'].values
|
|
141
|
-
total = vals['total']
|
|
142
|
-
nc, ng = vals['nc'], vals['ng']
|
|
143
|
-
|
|
144
|
-
tjs_gpu = cupy.asarray(tjs, dtype=cupy.float64)
|
|
145
|
-
tis_gpu = cupy.asarray(tis, dtype=cupy.float64)
|
|
146
|
-
|
|
147
|
-
# Create Output H5
|
|
148
|
-
adata_in = anndata.read_h5ad(cleaned_filename, backed='r')
|
|
149
|
-
adata_out = anndata.AnnData(obs=adata_in.obs, var=adata_in.var)
|
|
150
|
-
adata_out.write_h5ad(output_filename, compression="gzip")
|
|
151
|
-
|
|
152
|
-
# [FIX] Calculate Safe Storage Chunk Size (~1GB)
|
|
153
|
-
bytes_per_row = ng * 4
|
|
154
|
-
target_bytes = 1_000_000_000 # 1GB
|
|
155
|
-
storage_chunk_rows = int(target_bytes / bytes_per_row)
|
|
156
|
-
if storage_chunk_rows < 1: storage_chunk_rows = 1
|
|
157
|
-
|
|
158
|
-
with h5py.File(output_filename, 'a') as f_out:
|
|
159
|
-
if 'X' in f_out:
|
|
160
|
-
del f_out['X']
|
|
161
|
-
# Create dataset with SAFE chunks
|
|
162
|
-
out_x = f_out.create_dataset('X', shape=(nc, ng), chunks=(storage_chunk_rows, ng), dtype='float32')
|
|
163
|
-
|
|
164
|
-
print("Phase [1/2]: COMPLETE")
|
|
165
|
-
|
|
166
|
-
# --- Phase 2: Calculate Residuals ---
|
|
167
|
-
print("Phase [2/2]: Calculating approx residuals from data chunks...")
|
|
168
|
-
|
|
169
|
-
with h5py.File(cleaned_filename, 'r') as f_in:
|
|
170
|
-
h5_indptr = f_in['X']['indptr']
|
|
171
|
-
h5_data = f_in['X']['data']
|
|
172
|
-
h5_indices = f_in['X']['indices']
|
|
173
|
-
|
|
174
|
-
for i in range(0, nc, chunk_size):
|
|
175
|
-
end_row = min(i + chunk_size, nc)
|
|
176
|
-
print(f"Phase [2/2]: Processing: {end_row} of {nc} cells.", end='\r')
|
|
177
|
-
|
|
178
|
-
start_idx, end_idx = h5_indptr[i], h5_indptr[end_row]
|
|
179
|
-
data_slice = h5_data[start_idx:end_idx]
|
|
180
|
-
indices_slice = h5_indices[start_idx:end_idx]
|
|
181
|
-
indptr_slice = h5_indptr[i:end_row+1] - h5_indptr[i]
|
|
182
|
-
|
|
183
|
-
counts_chunk_sparse_gpu = cp_csr_matrix((
|
|
184
|
-
cupy.asarray(data_slice, dtype=cupy.float64),
|
|
185
|
-
cupy.asarray(indices_slice),
|
|
186
|
-
cupy.asarray(indptr_slice)
|
|
187
|
-
), shape=(end_row-i, ng))
|
|
188
|
-
|
|
189
|
-
counts_chunk_dense_gpu = counts_chunk_sparse_gpu.todense()
|
|
143
|
+
# Dense Conversion
|
|
144
|
+
counts_dense = chunk_gpu.todense()
|
|
145
|
+
del chunk_gpu, data_gpu_raw, indices_gpu_raw, indptr_gpu_raw
|
|
146
|
+
cupy.get_default_memory_pool().free_all_blocks()
|
|
190
147
|
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
148
|
+
# --- CALC 1: APPROX (Cheaper, do first) ---
|
|
149
|
+
approx_out = cupy.empty_like(counts_dense)
|
|
150
|
+
pearson_approx_kernel(
|
|
151
|
+
counts_dense,
|
|
152
|
+
tjs_gpu,
|
|
153
|
+
tis_gpu[current_row:end_row][:, cupy.newaxis],
|
|
154
|
+
total,
|
|
155
|
+
approx_out
|
|
156
|
+
)
|
|
157
|
+
out_x_approx[current_row:end_row, :] = approx_out.get()
|
|
158
|
+
del approx_out
|
|
159
|
+
|
|
160
|
+
# --- CALC 2: FULL (In-place on counts_dense to save VRAM) ---
|
|
161
|
+
pearson_residual_kernel(
|
|
162
|
+
counts_dense,
|
|
163
|
+
tjs_gpu,
|
|
164
|
+
tis_gpu[current_row:end_row][:, cupy.newaxis],
|
|
165
|
+
sizes_gpu,
|
|
166
|
+
total,
|
|
167
|
+
counts_dense # Overwrite input
|
|
168
|
+
)
|
|
169
|
+
out_x_full[current_row:end_row, :] = counts_dense.get()
|
|
196
170
|
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
out_x[i:end_row, :] = pearson_chunk_gpu.astype(cupy.float32).get()
|
|
200
|
-
|
|
201
|
-
del counts_chunk_dense_gpu, counts_chunk_sparse_gpu, mus_chunk_gpu, pearson_chunk_gpu, denominator_gpu
|
|
171
|
+
del counts_dense
|
|
202
172
|
cupy.get_default_memory_pool().free_all_blocks()
|
|
173
|
+
current_row = end_row
|
|
203
174
|
|
|
204
|
-
print(f"
|
|
205
|
-
|
|
206
|
-
if hasattr(adata_in, "file") and adata_in.file is not None:
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
end_time = time.perf_counter()
|
|
210
|
-
print(f"Total time: {end_time - start_time:.2f} seconds.\n")
|
|
211
|
-
|
|
175
|
+
print(f"\nPhase [2/2]: COMPLETE{' '*50}")
|
|
176
|
+
|
|
177
|
+
if hasattr(adata_in, "file") and adata_in.file is not None: adata_in.file.close()
|
|
178
|
+
print(f"Total time: {time.perf_counter() - start_time:.2f} seconds.\n")
|
|
@@ -5,10 +5,10 @@ m3Drop/CoreGPU.py,sha256=6LToLuWyHxX_7sC2z0Xnvy_qqgmpew5DmnCV0PxmTZQ,19785
|
|
|
5
5
|
m3Drop/DiagnosticsCPU.py,sha256=l0Imkh3F3zo4ovihUjx7cYWYgzPdztWCN1hcBFO43nY,12943
|
|
6
6
|
m3Drop/DiagnosticsGPU.py,sha256=z5BMOZNo_ruMBaDWJIE6zWhMUtf2ItY5Vcgu4N9lbok,14321
|
|
7
7
|
m3Drop/NormalizationCPU.py,sha256=Mm8VzWDu-NONbp-ngAt4PLjCKAGc7gJZKf-Yd-U95r0,7255
|
|
8
|
-
m3Drop/NormalizationGPU.py,sha256=
|
|
8
|
+
m3Drop/NormalizationGPU.py,sha256=1XRDZhNVkIbQMv_ggNoNEnIxRMY1NHDjOtOq4QGVRwY,7011
|
|
9
9
|
m3Drop/__init__.py,sha256=W_TQ9P8_7Tdsa6kDZ6IJKT0FMkX_JFvBqiP821CZIrk,2180
|
|
10
|
-
m3drop-0.4.
|
|
11
|
-
m3drop-0.4.
|
|
12
|
-
m3drop-0.4.
|
|
13
|
-
m3drop-0.4.
|
|
14
|
-
m3drop-0.4.
|
|
10
|
+
m3drop-0.4.47.dist-info/licenses/LICENSE,sha256=44Iqpp8Fc10Xzd5T7cT9UhO31Qftk3gBiCjtpwilP_k,1074
|
|
11
|
+
m3drop-0.4.47.dist-info/METADATA,sha256=z1THnYFMNjSObhg-QIGn61c1-lyqlXZiyAtcSLfE-fc,5248
|
|
12
|
+
m3drop-0.4.47.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
13
|
+
m3drop-0.4.47.dist-info/top_level.txt,sha256=AEULFEFIgFtAwS-KBlIFoYXrqczX_rwqrEcdK46GIrA,7
|
|
14
|
+
m3drop-0.4.47.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|