PyPI - M3Drop - Versions diffs - 0.4.41__py3-none-any.whl → 0.4.44__py3-none-any.whl - Mend

M3Drop 0.4.41py3-none-any.whl → 0.4.44py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

m3Drop/CoreCPU.py +510 -0
m3Drop/CoreGPU.py +506 -0
m3Drop/DiagnosticsCPU.py +407 -0
m3Drop/DiagnosticsGPU.py +420 -0
m3Drop/NormalizationCPU.py +202 -0
m3Drop/{normalizationGPU.py → NormalizationGPU.py} +3 -5
m3Drop/__init__.py +42 -51
{m3drop-0.4.41.dist-info → m3drop-0.4.44.dist-info}/METADATA +4 -1
m3drop-0.4.44.dist-info/RECORD +12 -0
{m3drop-0.4.41.dist-info → m3drop-0.4.44.dist-info}/WHEEL +1 -1
m3Drop/coreCPU.py +0 -477
m3Drop/coreGPU.py +0 -591
m3Drop/diagnosticsCPU.py +0 -391
m3Drop/diagnosticsGPU.py +0 -481
m3Drop/normalizationCPU.py +0 -146
m3drop-0.4.41.dist-info/RECORD +0 -12
{m3drop-0.4.41.dist-info → m3drop-0.4.44.dist-info}/licenses/LICENSE +0 -0
{m3drop-0.4.41.dist-info → m3drop-0.4.44.dist-info}/top_level.txt +0 -0

m3Drop/DiagnosticsGPU.py ADDED Viewed

@@ -0,0 +1,420 @@
+import numpy as np
+import pandas as pd
+import cupy as cp
+import cupyx.scipy.sparse as csp
+import matplotlib.pyplot as plt
+import h5py
+import os
+import time
+import pickle
+import psutil
+import gc
+from scipy import sparse
+from scipy import stats
+import anndata
+from .ControlDeviceGPU import ControlDevice
+from .CoreGPU import (
+    hidden_calc_valsGPU,
+    NBumiFitModelGPU,
+    NBumiFitDispVsMeanGPU,
+    dropout_prob_kernel
+)
+from cupy.sparse import csr_matrix as cp_csr_matrix
+import scipy.sparse as sp
+from scipy.sparse import csr_matrix as sp_csr_matrix
+import statsmodels.api as sm
+from scipy.stats import norm
+from statsmodels.stats.multitest import multipletests
+# ==========================================
+#        DIAGNOSTICS & COMPARISON
+# ==========================================
+def NBumiFitBasicModelGPU(
+    filename: str,
+    stats: dict,
+    mask_filename: str = None,
+    mode: str = "auto",
+    manual_target: int = 3000,
+    phase_label: str = "Phase [1/1]",
+    desc_label: str = None  # [UI FIX] Added for delayed printing
+) -> dict:
+    """
+    Fits the Basic Model by calculating Normalized Variance ON-THE-FLY.
+    STRICT FLOAT64 ENFORCEMENT.
+    """
+    # 1. Get Raw Dimensions & Setup ControlDevice
+    with h5py.File(filename, 'r') as f:
+        indptr_cpu = f['X']['indptr'][:]
+        total_rows = len(indptr_cpu) - 1
+        raw_ng = f['X'].attrs['shape'][1]
+    device = ControlDevice(
+        indptr=indptr_cpu,
+        total_rows=total_rows,
+        n_genes=raw_ng,
+        mode=mode,
+        manual_target=manual_target
+    )
+    nc = device.total_rows
+    # [UI FIX] Print description AFTER ControlDevice box
+    if desc_label:
+        print(f"{phase_label}: {desc_label}")
+    # 2. Load Mask
+    if mask_filename and os.path.exists(mask_filename):
+        with open(mask_filename, 'rb') as f:
+            mask_cpu = pickle.load(f)
+    else:
+        mask_cpu = np.ones(raw_ng, dtype=bool)
+    filtered_ng = int(np.sum(mask_cpu))
+    # 3. Pre-calculate Size Factors
+    cell_sums = stats['tis'].values
+    median_sum = np.median(cell_sums[cell_sums > 0])
+    # [FLOAT64] Explicitly utilizing float64 for size factors
+    size_factors = np.ones_like(cell_sums, dtype=np.float64)
+    non_zero_mask = cell_sums > 0
+    size_factors[non_zero_mask] = cell_sums[non_zero_mask] / median_sum
+    # 4. Init GPU Arrays
+    sum_norm_x_gpu = cp.zeros(filtered_ng, dtype=cp.float64)
+    sum_norm_sq_gpu = cp.zeros(filtered_ng, dtype=cp.float64)
+    with h5py.File(filename, 'r') as f_in:
+        h5_indptr = f_in['X']['indptr']
+        h5_data = f_in['X']['data']
+        h5_indices = f_in['X']['indices']
+        current_row = 0
+        while current_row < nc:
+            end_row = device.get_next_chunk(current_row, mode='dense', overhead_multiplier=1.5)
+            if end_row is None or end_row <= current_row: break
+            chunk_size = end_row - current_row
+            # [UI] Phase-aware progress bar
+            print(f"{phase_label}: Processing {end_row} of {nc} | Chunk: {chunk_size}", end='\r')
+            start_idx, end_idx = h5_indptr[current_row], h5_indptr[end_row]
+            if start_idx == end_idx:
+                current_row = end_row
+                continue
+            # [FLOAT64] Load Raw Chunk as float64
+            data_gpu = cp.asarray(h5_data[start_idx:end_idx], dtype=cp.float64)
+            indices_gpu = cp.asarray(h5_indices[start_idx:end_idx])
+            indptr_gpu = cp.asarray(h5_indptr[current_row:end_row+1] - h5_indptr[current_row])
+            # Reconstruct CSR & Filter
+            raw_chunk = cp_csr_matrix((data_gpu, indices_gpu, indptr_gpu), shape=(chunk_size, raw_ng))
+            mask_gpu = cp.asarray(mask_cpu)
+            filtered_chunk = raw_chunk[:, mask_gpu]
+            # Fused Normalization
+            # [FLOAT64] Size factors are already float64
+            sf_chunk = cp.asarray(size_factors[current_row:end_row], dtype=cp.float64)
+            recip_sf = 1.0 / sf_chunk
+            D = csp.diags(recip_sf)
+            norm_chunk = D.dot(filtered_chunk)
+            norm_chunk.data = cp.round(norm_chunk.data)
+            # Accumulate
+            sum_norm_x_gpu += norm_chunk.sum(axis=0).ravel()
+            norm_chunk.data **= 2
+            sum_norm_sq_gpu += norm_chunk.sum(axis=0).ravel()
+            del data_gpu, indices_gpu, raw_chunk, filtered_chunk, norm_chunk, D, sf_chunk, mask_gpu
+            cp.get_default_memory_pool().free_all_blocks()
+            current_row = end_row
+    # Final Calculations
+    mean_norm_gpu = sum_norm_x_gpu / nc
+    mean_sq_norm_gpu = sum_norm_sq_gpu / nc
+    var_norm_gpu = mean_sq_norm_gpu - (mean_norm_gpu ** 2)
+    denom_gpu = var_norm_gpu - mean_norm_gpu
+    size_gpu = cp.full(filtered_ng, 1000.0, dtype=cp.float64)
+    valid_mask = denom_gpu > 1e-6
+    size_gpu[valid_mask] = mean_norm_gpu[valid_mask]**2 / denom_gpu[valid_mask]
+    max_size_val = cp.nanmax(size_gpu[size_gpu < 1e6]) * 10
+    if cp.isnan(max_size_val) or max_size_val == 0: max_size_val = 1000.0
+    size_gpu[cp.isnan(size_gpu) | (size_gpu <= 0)] = max_size_val
+    size_gpu[size_gpu < 1e-10] = 1e-10
+    # [UI] Clean completion - Force Newline
+    print("")
+    print(f"{phase_label}: COMPLETE")
+    return {
+        'var_obs': pd.Series(var_norm_gpu.get(), index=stats['tjs'].index),
+        'sizes': pd.Series(size_gpu.get(), index=stats['tjs'].index),
+        'vals': stats
+    }
+def NBumiCheckFitFSGPU(
+    filename: str,
+    fit: dict,
+    mode: str = "auto",
+    manual_target: int = 3000,
+    suppress_plot=False,
+    plot_filename=None,
+    phase_label="Phase [1/1]",
+    desc_label: str = None  # [UI FIX] Added for delayed printing
+) -> dict:
+    """
+    Calculates expected dropouts. Handles Real and Virtual Populations.
+    Uses FUSED KERNEL to prevent OOM on large chunks.
+    """
+    vals = fit['vals']
+    ng = vals['ng']
+    with h5py.File(filename, 'r') as f:
+        indptr_cpu = f['X']['indptr'][:]
+        total_rows = len(indptr_cpu) - 1
+    device = ControlDevice(
+        indptr=indptr_cpu,
+        total_rows=total_rows,
+        n_genes=ng,
+        mode=mode,
+        manual_target=manual_target
+    )
+    nc = device.total_rows
+    # [UI FIX] Print description AFTER ControlDevice box
+    if desc_label:
+        print(f"{phase_label}: {desc_label}")
+    size_coeffs = NBumiFitDispVsMeanGPU(fit, suppress_plot=True)
+    tjs_gpu = cp.asarray(vals['tjs'].values, dtype=cp.float64)
+    tis_gpu = cp.asarray(vals['tis'].values, dtype=cp.float64)
+    total = vals['total']
+    mean_expression_gpu = tjs_gpu / nc
+    log_mean_expression_gpu = cp.zeros_like(mean_expression_gpu)
+    valid_means = mean_expression_gpu > 0
+    log_mean_expression_gpu[valid_means] = cp.log(mean_expression_gpu[valid_means])
+    smoothed_size_gpu = cp.exp(size_coeffs[0] + size_coeffs[1] * log_mean_expression_gpu)
+    row_ps_gpu = cp.zeros(ng, dtype=cp.float64)
+    col_ps_gpu = cp.zeros(nc, dtype=cp.float64)
+    current_row = 0
+    while current_row < nc:
+        # [FIX] Keep overhead low (1.1) because we are using Fused Kernel
+        end_row = device.get_next_chunk(current_row, mode='dense', overhead_multiplier=1.1)
+        if end_row is None or end_row <= current_row: break
+        chunk_size = end_row - current_row
+        # [UI] Phase-aware progress bar
+        print(f"{phase_label}: Processing {end_row} of {nc} | Chunk: {chunk_size}", end='\r')
+        tis_chunk_gpu = tis_gpu[current_row:end_row]
+        # [CRITICAL] FUSED KERNEL PRESERVED (Supercomputer Fix)
+        # Explicit float64 for the output buffer
+        p_is_chunk_gpu = cp.empty((chunk_size, ng), dtype=cp.float64)
+        dropout_prob_kernel(
+            tjs_gpu,                 # Gene totals
+            tis_chunk_gpu[:, None],  # Cell totals
+            total,                   # Grand total
+            smoothed_size_gpu,       # Exp size
+            p_is_chunk_gpu           # Output
+        )
+        p_is_chunk_gpu = cp.nan_to_num(p_is_chunk_gpu, nan=0.0, posinf=1.0, neginf=0.0)
+        row_ps_gpu += p_is_chunk_gpu.sum(axis=0)
+        col_ps_gpu[current_row:end_row] = p_is_chunk_gpu.sum(axis=1)
+        del p_is_chunk_gpu, tis_chunk_gpu
+        cp.get_default_memory_pool().free_all_blocks()
+        current_row = end_row
+    # [UI] Clean completion - Force Newline
+    print("")
+    print(f"{phase_label}: COMPLETE")
+    row_ps_cpu = row_ps_gpu.get()
+    col_ps_cpu = col_ps_gpu.get()
+    return {
+        'rowPs': pd.Series(row_ps_cpu, index=fit['vals']['tjs'].index),
+        'colPs': pd.Series(col_ps_cpu, index=fit['vals']['tis'].index)
+    }
+def NBumiCompareModelsGPU(
+    raw_filename: str,
+    stats: dict,
+    fit_adjust: dict,
+    mask_filename: str = None,
+    mode: str = "auto",
+    manual_target: int = 3000,
+    suppress_plot=False,
+    plot_filename=None
+) -> dict:
+    """
+    Orchestrates the Comparison Pipeline with standardized UI.
+    """
+    print(f"FUNCTION: NBumiCompareModelsGPU()")
+    pipeline_start_time = time.time()
+    # STEP 1: Fit Basic Model
+    # [UI FIX] Removed early print, passed as desc_label
+    fit_basic = NBumiFitBasicModelGPU(
+        raw_filename,
+        stats,
+        mask_filename=mask_filename,
+        mode=mode,
+        manual_target=manual_target,
+        phase_label="Phase [1/3]",
+        desc_label="Fitting Basic Model (Virtual)..."
+    )
+    # STEP 2: Depth-Adjusted Dropout
+    # [UI FIX] Removed early print, passed as desc_label
+    check_adjust = NBumiCheckFitFSGPU(
+        raw_filename,
+        fit_adjust,
+        mode=mode,
+        manual_target=manual_target,
+        suppress_plot=True,
+        phase_label="Phase [2/3]",
+        desc_label="Calculating Depth-Adjusted Dropouts..."
+    )
+    # STEP 3: Basic Dropout
+    # [UI FIX] Removed early print, passed as desc_label
+    stats_virtual = stats.copy()
+    mean_depth = stats['total'] / stats['nc']
+    stats_virtual['tis'] = pd.Series(
+        np.full(stats['nc'], mean_depth),
+        index=stats['tis'].index
+    )
+    fit_basic_for_eval = {
+        'sizes': fit_basic['sizes'],
+        'vals': stats_virtual,
+        'var_obs': fit_basic['var_obs']
+    }
+    check_basic = NBumiCheckFitFSGPU(
+        raw_filename,
+        fit_basic_for_eval,
+        mode=mode,
+        manual_target=manual_target,
+        suppress_plot=True,
+        phase_label="Phase [3/3]",
+        desc_label="Calculating Basic Dropouts..."
+    )
+    # Calculation & Plotting
+    nc_data = stats['nc']
+    mean_expr = stats['tjs'] / nc_data
+    observed_dropout = stats['djs'] / nc_data
+    adj_dropout_fit = check_adjust['rowPs'] / nc_data
+    bas_dropout_fit = check_basic['rowPs'] / nc_data
+    err_adj = np.sum(np.abs(adj_dropout_fit - observed_dropout))
+    err_bas = np.sum(np.abs(bas_dropout_fit - observed_dropout))
+    comparison_df = pd.DataFrame({
+        'mean_expr': mean_expr,
+        'observed': observed_dropout,
+        'adj_fit': adj_dropout_fit,
+        'bas_fit': bas_dropout_fit
+    })
+    # Plotting Logic
+    plt.figure(figsize=(10, 6))
+    sorted_idx = np.argsort(mean_expr.values)
+    plot_idx = sorted_idx[::2] if len(mean_expr) > 20000 else sorted_idx
+    plt.scatter(mean_expr.iloc[plot_idx], observed_dropout.iloc[plot_idx],
+                c='black', s=3, alpha=0.5, label='Observed')
+    plt.scatter(mean_expr.iloc[plot_idx], bas_dropout_fit.iloc[plot_idx],
+                c='purple', s=3, alpha=0.6, label=f'Basic Fit (Error: {err_bas:.2f})')
+    plt.scatter(mean_expr.iloc[plot_idx], adj_dropout_fit.iloc[plot_idx],
+                c='goldenrod', s=3, alpha=0.7, label=f'Depth-Adjusted Fit (Error: {err_adj:.2f})')
+    plt.xscale('log')
+    plt.xlabel("Mean Expression")
+    plt.ylabel("Dropout Rate")
+    plt.title("M3Drop Model Comparison")
+    plt.legend()
+    plt.grid(True, linestyle='--', alpha=0.3)
+    if plot_filename:
+        plt.savefig(plot_filename, dpi=300, bbox_inches='tight')
+        print(f"Saving plot to: {plot_filename}")
+    if not suppress_plot:
+        plt.show()
+    plt.close()
+    pipeline_end_time = time.time()
+    print(f"Total time: {pipeline_end_time - pipeline_start_time:.2f} seconds.\n")
+    return {
+        "errors": {"Depth-Adjusted": err_adj, "Basic": err_bas},
+        "comparison_df": comparison_df
+    }
+def NBumiPlotDispVsMeanGPU(
+    fit: dict,
+    suppress_plot: bool = False,
+    plot_filename: str = None
+):
+    print("FUNCTION: NBumiPlotDispVsMean()")
+    start_time = time.time()
+    mean_expression = fit['vals']['tjs'].values / fit['vals']['nc']
+    sizes = fit['sizes'].values
+    coeffs = NBumiFitDispVsMeanGPU(fit, suppress_plot=True)
+    intercept, slope = coeffs[0], coeffs[1]
+    log_mean_expr_range = np.linspace(
+        np.log(mean_expression[mean_expression > 0].min()),
+        np.log(mean_expression.max()),
+        100
+    )
+    log_fitted_sizes = intercept + slope * log_mean_expr_range
+    fitted_sizes = np.exp(log_fitted_sizes)
+    plt.figure(figsize=(8, 6))
+    plt.scatter(mean_expression, sizes, label='Observed Dispersion', alpha=0.5, s=8)
+    plt.plot(np.exp(log_mean_expr_range), fitted_sizes, color='red', label='Regression Fit', linewidth=2)
+    plt.xscale('log')
+    plt.yscale('log')
+    plt.xlabel('Mean Expression')
+    plt.ylabel('Dispersion Parameter (Sizes)')
+    plt.title('Dispersion vs. Mean Expression')
+    plt.legend()
+    plt.grid(True, which="both", linestyle='--', alpha=0.6)
+    if plot_filename:
+        plt.savefig(plot_filename, dpi=300, bbox_inches='tight')
+        print(f"Saving plot to: {plot_filename}")
+    if not suppress_plot:
+        plt.show()
+    plt.close()
+    end_time = time.time()
+    print(f"Total time: {end_time - start_time:.2f} seconds.\n")

m3Drop/NormalizationCPU.py ADDED Viewed

@@ -0,0 +1,202 @@
+import pickle
+import time
+import sys
+import numpy as np
+import h5py
+import anndata
+import pandas as pd
+import os
+from scipy import sparse
+try:
+    from numba import jit, prange
+except ImportError:
+    print("CRITICAL ERROR: 'numba' not found. Please install it (pip install numba).")
+    sys.exit(1)
+# [REFACTOR] Relative Import
+try:
+    from .ControlDeviceCPU import ControlDevice
+except ImportError:
+    from ControlDeviceCPU import ControlDevice
+# ==========================================
+#        NUMBA KERNELS (CPU)
+# ==========================================
+@jit(nopython=True, parallel=True, fastmath=True)
+def pearson_residual_kernel_cpu(counts, tj, ti, theta, total, out_matrix):
+    """
+    Calculates Pearson residuals using Negative Binomial logic.
+    Parallelized across CPU cores.
+    """
+    rows = counts.shape[0]
+    cols = counts.shape[1]
+    for r in prange(rows):
+        ti_val = ti[r]
+        for c in range(cols):
+            count_val = counts[r, c]
+            mu = (tj[c] * ti_val) / total
+            # theta is vector of size cols (genes)
+            theta_val = theta[c]
+            denom_sq = mu + ((mu * mu) / theta_val)
+            denom = np.sqrt(denom_sq)
+            if denom < 1e-12:
+                out_matrix[r, c] = 0.0
+            else:
+                out_matrix[r, c] = (count_val - mu) / denom
+@jit(nopython=True, parallel=True, fastmath=True)
+def pearson_approx_kernel_cpu(counts, tj, ti, total, out_matrix):
+    """
+    Calculates Approximate Pearson residuals (Poisson limit).
+    """
+    rows = counts.shape[0]
+    cols = counts.shape[1]
+    for r in prange(rows):
+        ti_val = ti[r]
+        for c in range(cols):
+            count_val = counts[r, c]
+            mu = (tj[c] * ti_val) / total
+            denom = np.sqrt(mu)
+            if denom < 1e-12:
+                out_matrix[r, c] = 0.0
+            else:
+                out_matrix[r, c] = (count_val - mu) / denom
+# ==========================================
+#        NORMALIZATION FUNCTION
+# ==========================================
+def NBumiPearsonResidualsCombinedCPU(
+    raw_filename: str,
+    mask_filename: str,
+    fit_filename: str,
+    stats_filename: str,
+    output_filename_full: str,
+    output_filename_approx: str,
+    mode: str = "auto",
+    manual_target: int = 3000
+):
+    """
+    CPU-Optimized: Calculates Full and Approximate residuals in a SINGLE PASS.
+    Uses Numba for acceleration on L3-sized dense chunks.
+    """
+    start_time = time.perf_counter()
+    print(f"FUNCTION: NBumiPearsonResidualsCombinedCPU() | FILE: {raw_filename}")
+    # 1. Load Mask
+    with open(mask_filename, 'rb') as f: mask = pickle.load(f)
+    ng_filtered = int(np.sum(mask))
+    # 2. Init Device
+    with h5py.File(raw_filename, 'r') as f: indptr_cpu = f['X']['indptr'][:]; total_rows = len(indptr_cpu) - 1
+    device = ControlDevice(indptr=indptr_cpu, total_rows=total_rows, n_genes=ng_filtered, mode=mode, manual_target=manual_target)
+    nc = device.total_rows
+    print("Phase [1/2]: Initializing parameters...")
+    # Load parameters
+    with open(fit_filename, 'rb') as f: fit = pickle.load(f)
+    with open(stats_filename, 'rb') as f: stats = pickle.load(f)
+    # Common params (Numpy Arrays)
+    total = fit['vals']['total']
+    tjs = fit['vals']['tjs'].values.astype(np.float64)
+    tis = fit['vals']['tis'].values.astype(np.float64)
+    # Specific params
+    sizes = fit['sizes'].values.astype(np.float64) # For Full
+    # Setup Output Files
+    adata_in = anndata.read_h5ad(raw_filename, backed='r')
+    filtered_var = adata_in.var[mask]
+    # Create skeletons
+    adata_out_full = anndata.AnnData(obs=adata_in.obs, var=filtered_var)
+    adata_out_full.write_h5ad(output_filename_full, compression=None)
+    adata_out_approx = anndata.AnnData(obs=adata_in.obs, var=filtered_var)
+    adata_out_approx.write_h5ad(output_filename_approx, compression=None)
+    # Calculate appropriate H5 storage chunks
+    storage_chunk_rows = int(1_000_000_000 / (ng_filtered * 8))
+    if storage_chunk_rows < 1: storage_chunk_rows = 1
+    # Open both files for writing simultaneously
+    with h5py.File(output_filename_full, 'a') as f_full, h5py.File(output_filename_approx, 'a') as f_approx:
+        if 'X' in f_full: del f_full['X']
+        if 'X' in f_approx: del f_approx['X']
+        # Float64 output
+        out_x_full = f_full.create_dataset(
+            'X', shape=(nc, ng_filtered), chunks=(storage_chunk_rows, ng_filtered), dtype='float64'
+        )
+        out_x_approx = f_approx.create_dataset(
+            'X', shape=(nc, ng_filtered), chunks=(storage_chunk_rows, ng_filtered), dtype='float64'
+        )
+        with h5py.File(raw_filename, 'r') as f_in:
+            h5_indptr = f_in['X']['indptr']
+            h5_data = f_in['X']['data']
+            h5_indices = f_in['X']['indices']
+            current_row = 0
+            while current_row < nc:
+                # Dense mode is faster for Numba
+                end_row = device.get_next_chunk(current_row, mode='dense', overhead_multiplier=3.0)
+                if end_row is None or end_row <= current_row: break
+                chunk_size = end_row - current_row
+                print(f"Phase [2/2]: Processing rows {end_row} of {nc} | Chunk: {chunk_size}", end='\r')
+                start_idx, end_idx = h5_indptr[current_row], h5_indptr[end_row]
+                # Load & Filter
+                data = np.array(h5_data[start_idx:end_idx], dtype=np.float64)
+                indices = np.array(h5_indices[start_idx:end_idx])
+                indptr = np.array(h5_indptr[current_row:end_row+1] - h5_indptr[current_row])
+                chunk_csr = sparse.csr_matrix((data, indices, indptr), shape=(chunk_size, len(mask)))
+                chunk_csr = chunk_csr[:, mask]
+                chunk_csr.data = np.ceil(chunk_csr.data)
+                # Convert to Dense for Numba (faster than sparse iteration for dense ops)
+                counts_dense = chunk_csr.toarray()
+                # --- CALC 1: APPROX ---
+                approx_out = np.empty_like(counts_dense)
+                pearson_approx_kernel_cpu(
+                    counts_dense,
+                    tjs,
+                    tis[current_row:end_row],
+                    total,
+                    approx_out
+                )
+                out_x_approx[current_row:end_row, :] = approx_out
+                del approx_out
+                # --- CALC 2: FULL (In-place on counts_dense) ---
+                # We can reuse the counts_dense buffer for output to save RAM
+                pearson_residual_kernel_cpu(
+                    counts_dense,
+                    tjs,
+                    tis[current_row:end_row],
+                    sizes,
+                    total,
+                    counts_dense # Overwrite input
+                )
+                out_x_full[current_row:end_row, :] = counts_dense
+                current_row = end_row
+        print(f"\nPhase [2/2]: COMPLETE{' '*50}")
+    if hasattr(adata_in, "file") and adata_in.file is not None: adata_in.file.close()
+    print(f"Total time: {time.perf_counter() - start_time:.2f} seconds.\n")

m3Drop/{normalizationGPU.py → NormalizationGPU.py} RENAMED Viewed

@@ -1,8 +1,3 @@
-try:
-    from .coreGPU import get_optimal_chunk_size
-except ImportError:
-    from coreGPU import get_optimal_chunk_size
 import pickle
 import time
 import cupy
@@ -13,6 +8,8 @@ import pandas as pd
 from cupy.sparse import csr_matrix as cp_csr_matrix
 import os
+from .ControlDeviceGPU import ControlDevice
 def NBumiPearsonResidualsGPU(
     cleaned_filename: str,
     fit_filename: str,
@@ -211,3 +208,4 @@ def NBumiPearsonResidualsApproxGPU(
     end_time = time.perf_counter()
     print(f"Total time: {end_time - start_time:.2f} seconds.\n")

M3Drop 0.4.41__py3-none-any.whl → 0.4.44__py3-none-any.whl

M3Drop 0.4.41py3-none-any.whl → 0.4.44py3-none-any.whl