PyPI - M3Drop - Versions diffs - 0.4.38__tar.gz → 0.4.39__tar.gz - Mend

M3Drop 0.4.38tar.gz → 0.4.39tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

{m3drop-0.4.38 → m3drop-0.4.39/M3Drop.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: M3Drop
-Version: 0.4.38
+Version: 0.4.39
 Summary: A Python implementation of the M3Drop single-cell RNA-seq analysis tool.
 Home-page: https://github.com/PragalvhaSharma/m3DropNew
 Author: Tallulah Andrews

{m3drop-0.4.38/M3Drop.egg-info → m3drop-0.4.39}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: M3Drop
-Version: 0.4.38
+Version: 0.4.39
 Summary: A Python implementation of the M3Drop single-cell RNA-seq analysis tool.
 Home-page: https://github.com/PragalvhaSharma/m3DropNew
 Author: Tallulah Andrews

{m3drop-0.4.38 → m3drop-0.4.39}/m3Drop/diagnosticsGPU.py RENAMED Viewed

@@ -12,7 +12,7 @@ from scipy import sparse
 from scipy import stats
 import anndata
-# [GOVERNOR INTEGRATION]
+# [GOVERNOR INTEGRATION]
 from .coreGPU import hidden_calc_valsGPU, NBumiFitModelGPU, NBumiFitDispVsMeanGPU, get_optimal_chunk_size
 from cupy.sparse import csr_matrix as cp_csr_matrix
 import scipy.sparse as sp
@@ -26,27 +26,22 @@ def NBumiFitBasicModelGPU(
     chunk_size: int = None
 ) -> dict:
     """
-    Fits a simpler, unadjusted NB model out-of-core using a GPU-accelerated
-    algorithm. Designed to work with a standard (cell, gene) sparse matrix.
+    Fits a simpler, unadjusted NB model out-of-core.
     """
     start_time = time.perf_counter()
     print(f"FUNCTION: NBumiFitBasicModel() | FILE: {cleaned_filename}")
-    # [GOVERNOR INTEGRATION] Calculate optimal chunk size if not provided
     if chunk_size is None:
         chunk_size = get_optimal_chunk_size(cleaned_filename, multiplier=3.0, is_dense=True)
     # --- Phase 1: Initialization ---
-    print("Phase [1/2]: Initializing parameters and arrays on GPU...")
     tjs = stats['tjs'].values
     nc, ng = stats['nc'], stats['ng']
     tjs_gpu = cp.asarray(tjs, dtype=cp.float64)
     sum_x_sq_gpu = cp.zeros(ng, dtype=cp.float64)
-    print("Phase [1/2]: COMPLETE")
     # --- Phase 2: Calculate Variance from Data Chunks ---
-    print("Phase [2/2]: Calculating variance from data chunks...")
     with h5py.File(cleaned_filename, 'r') as f_in:
         x_group = f_in['X']
         h5_indptr = x_group['indptr']
@@ -58,65 +53,32 @@ def NBumiFitBasicModelGPU(
             print(f"Phase [2/2]: Processing: {end_row} of {nc} cells.", end='\r')
             start_idx, end_idx = h5_indptr[i], h5_indptr[end_row]
-            if start_idx == end_idx:
-                continue
+            if start_idx == end_idx: continue
-            # Process in smaller sub-chunks if needed
-            max_elements = 5_000_000  # Process max 5M elements at a time
+            # Original processing
+            data_slice = h5_data[start_idx:end_idx]
+            indices_slice = h5_indices[start_idx:end_idx]
+            data_gpu = cp.asarray(data_slice, dtype=cp.float64)
+            indices_gpu = cp.asarray(indices_slice)
+            cp.add.at(sum_x_sq_gpu, indices_gpu, data_gpu**2)
-            if end_idx - start_idx > max_elements:
-                # Process in sub-chunks
-                for sub_start in range(start_idx, end_idx, max_elements):
-                    sub_end = min(sub_start + max_elements, end_idx)
-                    data_slice = h5_data[sub_start:sub_end]
-                    indices_slice = h5_indices[sub_start:sub_end]
-                    data_gpu = cp.asarray(data_slice, dtype=cp.float64)
-                    indices_gpu = cp.asarray(indices_slice)
-                    # Accumulate the sum of squares for each gene
-                    cp.add.at(sum_x_sq_gpu, indices_gpu, data_gpu**2)
-                    # Free GPU memory
-                    del data_gpu, indices_gpu
-                    cp.get_default_memory_pool().free_all_blocks()
-            else:
-                # Original processing for smaller chunks
-                data_slice = h5_data[start_idx:end_idx]
-                indices_slice = h5_indices[start_idx:end_idx]
-                data_gpu = cp.asarray(data_slice, dtype=cp.float64)
-                indices_gpu = cp.asarray(indices_slice)
-                # Accumulate the sum of squares for each gene
-                cp.add.at(sum_x_sq_gpu, indices_gpu, data_gpu**2)
-                # Clean up
-                del data_gpu, indices_gpu
-                cp.get_default_memory_pool().free_all_blocks()
+            del data_gpu, indices_gpu
+            cp.get_default_memory_pool().free_all_blocks()
-    print(f"Phase [2/2]: COMPLETE                                       ")
-    # --- Final calculations on GPU ---
-    if is_logged:
-        raise NotImplementedError("Logged data variance calculation is not implemented for out-of-core.")
-    else:
-        # Variance of raw data: Var(X) = E[X^2] - E[X]^2
-        mean_x_sq_gpu = sum_x_sq_gpu / nc
-        mean_mu_gpu = tjs_gpu / nc
-        my_rowvar_gpu = mean_x_sq_gpu - mean_mu_gpu**2
-        # Calculate dispersion ('size')
-        size_gpu = mean_mu_gpu**2 / (my_rowvar_gpu - mean_mu_gpu)
+    print(f"Phase [2/2]: COMPLETE{' '*50}")
+    mean_x_sq_gpu = sum_x_sq_gpu / nc
+    mean_mu_gpu = tjs_gpu / nc
+    my_rowvar_gpu = mean_x_sq_gpu - mean_mu_gpu**2
+    size_gpu = mean_mu_gpu**2 / (my_rowvar_gpu - mean_mu_gpu)
     max_size_val = cp.nanmax(size_gpu) * 10
-    if cp.isnan(max_size_val):
-        max_size_val = 1000
+    if cp.isnan(max_size_val): max_size_val = 1000
     size_gpu[cp.isnan(size_gpu) | (size_gpu <= 0)] = max_size_val
     size_gpu[size_gpu < 1e-10] = 1e-10
-    # Move results to CPU
     my_rowvar_cpu = my_rowvar_gpu.get()
     sizes_cpu = size_gpu.get()
@@ -137,21 +99,17 @@ def NBumiCheckFitFSGPU(
     plot_filename=None
 ) -> dict:
     """
-    Calculates the fit errors (gene_error, cell_error) for a given model.
+    Calculates fit errors. [FIXED] Added clamps to prevent >1.0 probability errors.
     """
     start_time = time.perf_counter()
     print(f"FUNCTION: NBumiCheckFitFS() | FILE: {cleaned_filename}")
-    # [GOVERNOR INTEGRATION] Adaptive chunk sizing
     if chunk_size is None:
         chunk_size = get_optimal_chunk_size(cleaned_filename, multiplier=5.0, is_dense=True)
-    # --- Phase 1: Initialization ---
-    print("Phase [1/2]: Initializing parameters and arrays on GPU...")
     vals = fit['vals']
     size_coeffs = NBumiFitDispVsMeanGPU(fit, suppress_plot=True)
-    # Must use float64 for precision
     tjs_gpu = cp.asarray(vals['tjs'].values, dtype=cp.float64)
     tis_gpu = cp.asarray(vals['tis'].values, dtype=cp.float64)
     total = vals['total']
@@ -161,76 +119,41 @@ def NBumiCheckFitFSGPU(
     mean_expression_gpu = tjs_gpu / nc
     log_mean_expression_gpu = cp.log(mean_expression_gpu)
     smoothed_size_gpu = cp.exp(size_coeffs[0] + size_coeffs[1] * log_mean_expression_gpu)
+    # [FIX] Clamp smoothed size to prevent instability
+    smoothed_size_gpu = cp.maximum(smoothed_size_gpu, 1e-8)
-    # Initialize result arrays
     row_ps_gpu = cp.zeros(ng, dtype=cp.float64)
     col_ps_gpu = cp.zeros(nc, dtype=cp.float64)
-    print("Phase [1/2]: COMPLETE")
-    # --- Phase 2: Calculate Expected Dropouts ---
-    print(f"Phase [2/2]: Calculating expected dropouts (Chunk: {chunk_size})...")
     for i in range(0, nc, chunk_size):
         end_col = min(i + chunk_size, nc)
         print(f"Phase [2/2]: Processing: {end_col} of {nc} cells.", end='\r')
         tis_chunk_gpu = tis_gpu[i:end_col]
-        # Standard calculation without errstate
         mu_chunk_gpu = tjs_gpu[:, cp.newaxis] * tis_chunk_gpu[cp.newaxis, :] / total
-        # Calculate p_is directly - CuPy handles overflow internally
+        # [FIX] Safer power calculation
         base = 1 + mu_chunk_gpu / smoothed_size_gpu[:, cp.newaxis]
         p_is_chunk_gpu = cp.power(base, -smoothed_size_gpu[:, cp.newaxis])
-        # Handle any inf/nan values that might have occurred
-        p_is_chunk_gpu = cp.nan_to_num(p_is_chunk_gpu, nan=0.0, posinf=1.0, neginf=0.0)
+        # [FIX] Clamp probabilities to valid range [0, 1]
+        p_is_chunk_gpu = cp.clip(p_is_chunk_gpu, 0.0, 1.0)
+        p_is_chunk_gpu = cp.nan_to_num(p_is_chunk_gpu, nan=0.0)
-        # Sum results
         row_ps_gpu += p_is_chunk_gpu.sum(axis=1)
         col_ps_gpu[i:end_col] = p_is_chunk_gpu.sum(axis=0)
-        # Clean up
         del mu_chunk_gpu, p_is_chunk_gpu, base, tis_chunk_gpu
         cp.get_default_memory_pool().free_all_blocks()
     print(f"Phase [2/2]: COMPLETE{' ' * 50}")
-    # Move results to CPU
     row_ps_cpu = row_ps_gpu.get()
     col_ps_cpu = col_ps_gpu.get()
     djs_cpu = vals['djs'].values
     dis_cpu = vals['dis'].values
-    # Plotting
-    if not suppress_plot:
-        plt.figure(figsize=(12, 5))
-        plt.subplot(1, 2, 1)
-        plt.scatter(djs_cpu, row_ps_cpu, alpha=0.5, s=10)
-        plt.title("Gene-specific Dropouts (Smoothed)")
-        plt.xlabel("Observed")
-        plt.ylabel("Fit")
-        lims = [min(plt.xlim()[0], plt.ylim()[0]), max(plt.xlim()[1], plt.ylim()[1])]
-        plt.plot(lims, lims, 'r-', alpha=0.75, zorder=0, label="y=x line")
-        plt.grid(True); plt.legend()
-        plt.subplot(1, 2, 2)
-        plt.scatter(dis_cpu, col_ps_cpu, alpha=0.5, s=10)
-        plt.title("Cell-specific Dropouts (Smoothed)")
-        plt.xlabel("Observed")
-        plt.ylabel("Expected")
-        lims = [min(plt.xlim()[0], plt.ylim()[0]), max(plt.xlim()[1], plt.ylim()[1])]
-        plt.plot(lims, lims, 'r-', alpha=0.75, zorder=0, label="y=x line")
-        plt.grid(True); plt.legend()
-        plt.tight_layout()
-        if plot_filename:
-            plt.savefig(plot_filename, dpi=300, bbox_inches='tight')
-            print(f"STATUS: Diagnostic plot saved to '{plot_filename}'")
-        plt.show()
-        plt.close()
-    # Calculate errors
     gene_error = np.sum((djs_cpu - row_ps_cpu)**2)
     cell_error = np.sum((dis_cpu - col_ps_cpu)**2)
@@ -245,7 +168,7 @@ def NBumiCheckFitFSGPU(
     }
 def NBumiCompareModelsGPU(
-    raw_filename: str, # Kept for API compatibility, but functionally we use cleaned_filename for indices
+    raw_filename: str,
     cleaned_filename: str,
     stats: dict,
     fit_adjust: dict,
@@ -255,23 +178,19 @@ def NBumiCompareModelsGPU(
 ) -> dict:
     """
     OPTIMIZED VERSION (IN-MEMORY):
-    - Eliminates the 46GB '_basic_norm.h5ad' temporary file.
-    - Performs depth normalization and variance calculation on-the-fly in GPU VRAM.
-    - PRESERVED SCIENTIFIC LOGIC: Var(X) = E[X^2] - (E[X])^2 on normalized data.
+    - Calculates Basic Fit without writing 46GB file.
     """
     pipeline_start_time = time.time()
     print(f"FUNCTION: NBumiCompareModels() | Comparing models for {cleaned_filename}")
-    # [GOVERNOR] High multiplier (12.0) because we hold Raw + Norm + Square in VRAM
     if chunk_size is None:
         chunk_size = get_optimal_chunk_size(cleaned_filename, multiplier=12.0, is_dense=False)
-    # --- Phase 1: In-Memory "Basic Fit" (Normalization + Variance) ---
     print("Phase [1/3]: Calculating Basic Model (Depth-Normalized) variance on-the-fly...")
-    # 1. Prepare Size Factors (CPU)
-    tjs = stats['tjs'].values # Gene sums
-    tis = stats['tis'].values # Cell sums
+    # 1. Prepare Size Factors
+    tjs = stats['tjs'].values
+    tis = stats['tis'].values
     nc, ng = stats['nc'], stats['ng']
     median_sum = np.median(tis[tis > 0])
@@ -279,12 +198,9 @@ def NBumiCompareModelsGPU(
     non_zero_mask = tis > 0
     size_factors[non_zero_mask] = tis[non_zero_mask] / median_sum
-    # 2. Prepare GPU Arrays
     sum_x_sq_gpu = cp.zeros(ng, dtype=cp.float64)
-    sum_x_gpu = cp.zeros(ng, dtype=cp.float64) # Need sum(x) to calc mean(x) for variance
+    sum_x_gpu = cp.zeros(ng, dtype=cp.float64)
-    # 3. GPU Loop (Raw Data -> Normalize -> Accumulate)
-    # CRITICAL: We read CLEANED_FILENAME to ensure indices match 'stats'
     with h5py.File(cleaned_filename, 'r') as f_in:
         h5_indptr = f_in['X']['indptr']
         h5_data = f_in['X']['data']
@@ -297,73 +213,52 @@ def NBumiCompareModelsGPU(
             start_idx, end_idx = h5_indptr[i], h5_indptr[end_row]
             if start_idx == end_idx: continue
-            # Load Raw Chunk
             data_gpu = cp.asarray(h5_data[start_idx:end_idx], dtype=cp.float32)
             indices_gpu = cp.asarray(h5_indices[start_idx:end_idx])
             indptr_gpu = cp.asarray(h5_indptr[i:end_row + 1] - start_idx)
-            # Expand Size Factors to match Data Structure
             nnz_in_chunk = indptr_gpu[-1].item()
             cell_boundary_markers = cp.zeros(nnz_in_chunk, dtype=cp.int32)
             if len(indptr_gpu) > 1:
                 cell_boundary_markers[indptr_gpu[:-1]] = 1
-            # row_indices maps every data point to its cell index (0 to chunk_size)
             row_indices = cp.cumsum(cell_boundary_markers, axis=0) - 1
-            # Get size factors for this chunk
             sf_chunk = cp.asarray(size_factors[i:end_row])
-            # --- THE MAGIC: On-the-Fly Normalization ---
-            # data_norm = data_raw / size_factor
+            # Normalize
             data_gpu /= sf_chunk[row_indices]
-            # Accumulate for Variance: E[X^2] and E[X]
+            # Accumulate
             cp.add.at(sum_x_sq_gpu, indices_gpu, data_gpu**2)
             cp.add.at(sum_x_gpu, indices_gpu, data_gpu)
-            # Clean up VRAM
             del data_gpu, indices_gpu, indptr_gpu, row_indices, sf_chunk, cell_boundary_markers
             cp.get_default_memory_pool().free_all_blocks()
     print(f"Phase [1/3]: COMPLETE{' '*50}")
-    # 4. Finalize Basic Statistics
-    # Var(X) = E[X^2] - (E[X])^2
     mean_x_sq_gpu = sum_x_sq_gpu / nc
     mean_mu_gpu = sum_x_gpu / nc
     my_rowvar_gpu = mean_x_sq_gpu - mean_mu_gpu**2
-    # Dispersion = Mean^2 / (Var - Mean)
     size_gpu = mean_mu_gpu**2 / (my_rowvar_gpu - mean_mu_gpu)
-    # Safety Clamping
     max_size_val = cp.nanmax(size_gpu) * 10
     if cp.isnan(max_size_val): max_size_val = 1000
     size_gpu[cp.isnan(size_gpu) | (size_gpu <= 0)] = max_size_val
     size_gpu[size_gpu < 1e-10] = 1e-10
-    # Construct "Basic Fit" Object
     fit_basic = {
         'sizes': pd.Series(size_gpu.get(), index=stats['tjs'].index),
         'vals': stats,
         'var_obs': pd.Series(my_rowvar_gpu.get(), index=stats['tjs'].index)
     }
-    # --- Phase 2: Check Fit (Calculate Errors) ---
     print("Phase [2/3]: Evaluating fit errors on ORIGINAL data...")
-    # Check Adjust (M3Drop) - uses its own governor
-    check_adjust = NBumiCheckFitFSGPU(
-        cleaned_filename, fit_adjust, suppress_plot=True
-    )
-    # Check Basic (Depth-Norm) - uses its own governor
-    check_basic = NBumiCheckFitFSGPU(
-        cleaned_filename, fit_basic, suppress_plot=True
-    )
+    check_adjust = NBumiCheckFitFSGPU(cleaned_filename, fit_adjust, suppress_plot=True)
+    check_basic = NBumiCheckFitFSGPU(cleaned_filename, fit_basic, suppress_plot=True)
     print("Phase [2/3]: COMPLETE")
-    # --- Phase 3: Plotting & Comparison ---
     print("Phase [3/3]: Generating comparison...")
     nc_data = stats['nc']
@@ -427,13 +322,11 @@ def NBumiPlotDispVsMeanGPU(
     """
     print("FUNCTION: NBumiPlotDispVsMean()")
-    # --- 1. Extract data and regression coefficients ---
     mean_expression = fit['vals']['tjs'].values / fit['vals']['nc']
     sizes = fit['sizes'].values
     coeffs = NBumiFitDispVsMeanGPU(fit, suppress_plot=True)
     intercept, slope = coeffs[0], coeffs[1]
-    # --- 2. Calculate the fitted line for plotting ---
     log_mean_expr_range = np.linspace(
         np.log(mean_expression[mean_expression > 0].min()),
         np.log(mean_expression.max()),
@@ -442,7 +335,6 @@ def NBumiPlotDispVsMeanGPU(
     log_fitted_sizes = intercept + slope * log_mean_expr_range
     fitted_sizes = np.exp(log_fitted_sizes)
-    # --- 3. Create the plot ---
     plt.figure(figsize=(8, 6))
     plt.scatter(mean_expression, sizes, label='Observed Dispersion', alpha=0.5, s=8)
     plt.plot(np.exp(log_mean_expr_range), fitted_sizes, color='red', label='Regression Fit', linewidth=2)

{m3drop-0.4.38 → m3drop-0.4.39}/m3Drop/normalizationGPU.py RENAMED Viewed

@@ -24,9 +24,7 @@ def NBumiPearsonResidualsGPU(
     start_time = time.perf_counter()
     print(f"FUNCTION: NBumiPearsonResiduals() | FILE: {cleaned_filename}")
-    # --- SAFETY UPDATE ---
-    # Multiplier 10.0 (Was 6.0): Accounts for Float64 precision (8 bytes) vs Governor default (4 bytes).
-    # 4 matrices * 8 bytes = 32 bytes/cell. Governor 10 * 4 = 40 bytes. Safe buffer established.
+    # Governor for Processing (RAM/VRAM)
     chunk_size = get_optimal_chunk_size(cleaned_filename, multiplier=10.0, is_dense=True)
     # --- Phase 1: Initialization ---
@@ -45,16 +43,29 @@ def NBumiPearsonResidualsGPU(
     tis_gpu = cupy.asarray(tis, dtype=cupy.float64)
     sizes_gpu = cupy.asarray(sizes, dtype=cupy.float64)
-    # Create Output H5 (Identical structure to cleaned input)
+    # Create Output H5
     adata_in = anndata.read_h5ad(cleaned_filename, backed='r')
     adata_out = anndata.AnnData(obs=adata_in.obs, var=adata_in.var)
     adata_out.write_h5ad(output_filename, compression="gzip")
+    # [FIX] Calculate Safe Storage Chunk Size (~1GB)
+    # HDF5 limit is 4GB. You requested 1GB for optimal speed.
+    bytes_per_row = ng * 4 # float32
+    target_bytes = 1_000_000_000 # 1GB
+    storage_chunk_rows = int(target_bytes / bytes_per_row)
+    if storage_chunk_rows < 1: storage_chunk_rows = 1
+    # Note: It is okay if storage_chunk > processing_chunk (HDF5 handles this),
+    # but strictly it must be < 4GB total size.
+    print(f"  > Processing Chunk: {chunk_size} rows (RAM)")
+    print(f"  > Storage Chunk:    {storage_chunk_rows} rows (Disk - 1GB Target)")
     with h5py.File(output_filename, 'a') as f_out:
         if 'X' in f_out:
             del f_out['X']
-        # Create dataset for dense matrix output (float32)
-        out_x = f_out.create_dataset('X', shape=(nc, ng), chunks=(chunk_size, ng), dtype='float32')
+        # Create dataset with SAFE chunks (Fixes the ValueError)
+        out_x = f_out.create_dataset('X', shape=(nc, ng), chunks=(storage_chunk_rows, ng), dtype='float32')
         print("Phase [1/2]: COMPLETE")
@@ -77,7 +88,6 @@ def NBumiPearsonResidualsGPU(
                 indptr_slice = h5_indptr[i:end_row+1] - h5_indptr[i]
                 # Convert to Dense GPU Matrix
-                # We construct sparse first, then densify on GPU to save bandwidth
                 counts_chunk_sparse_gpu = cp_csr_matrix((
                     cupy.asarray(data_slice, dtype=cupy.float64),
                     cupy.asarray(indices_slice),
@@ -91,24 +101,18 @@ def NBumiPearsonResidualsGPU(
                 mus_chunk_gpu = tjs_gpu[cupy.newaxis, :] * tis_chunk_gpu[:, cupy.newaxis] / total
                 denominator_gpu = cupy.sqrt(mus_chunk_gpu + mus_chunk_gpu**2 / sizes_gpu[cupy.newaxis, :])
-                # --- LOGIC RESTORED: Prevent Division by Zero ---
                 denominator_gpu = cupy.where(denominator_gpu == 0, 1, denominator_gpu)
-                # (Counts - Mu) / Sqrt(V)
                 pearson_chunk_gpu = (counts_chunk_dense_gpu - mus_chunk_gpu) / denominator_gpu
                 # Write to Disk
-                # [OPTIMIZATION] Cast to float32 on GPU to halve PCIe transfer time
                 out_x[i:end_row, :] = pearson_chunk_gpu.astype(cupy.float32).get()
-                # Cleanup
                 del counts_chunk_dense_gpu, counts_chunk_sparse_gpu, mus_chunk_gpu, pearson_chunk_gpu, denominator_gpu
                 cupy.get_default_memory_pool().free_all_blocks()
         print(f"Phase [2/2]: COMPLETE{' '*50}")
-    # --- LOGIC RESTORED: Explicit File Cleanup ---
     if hasattr(adata_in, "file") and adata_in.file is not None:
         adata_in.file.close()
@@ -127,8 +131,6 @@ def NBumiPearsonResidualsApproxGPU(
     start_time = time.perf_counter()
     print(f"FUNCTION: NBumiPearsonResidualsApprox() | FILE: {cleaned_filename}")
-    # --- HANDSHAKE ---
-    # Multiplier 10.0: Same safety logic as Full residuals.
     chunk_size = get_optimal_chunk_size(cleaned_filename, multiplier=10.0, is_dense=True)
     # --- Phase 1: Initialization ---
@@ -150,10 +152,17 @@ def NBumiPearsonResidualsApproxGPU(
     adata_out = anndata.AnnData(obs=adata_in.obs, var=adata_in.var)
     adata_out.write_h5ad(output_filename, compression="gzip")
+    # [FIX] Calculate Safe Storage Chunk Size (~1GB)
+    bytes_per_row = ng * 4
+    target_bytes = 1_000_000_000 # 1GB
+    storage_chunk_rows = int(target_bytes / bytes_per_row)
+    if storage_chunk_rows < 1: storage_chunk_rows = 1
     with h5py.File(output_filename, 'a') as f_out:
         if 'X' in f_out:
             del f_out['X']
-        out_x = f_out.create_dataset('X', shape=(nc, ng), chunks=(chunk_size, ng), dtype='float32')
+        # Create dataset with SAFE chunks
+        out_x = f_out.create_dataset('X', shape=(nc, ng), chunks=(storage_chunk_rows, ng), dtype='float32')
         print("Phase [1/2]: COMPLETE")
@@ -185,15 +194,11 @@ def NBumiPearsonResidualsApproxGPU(
                 tis_chunk_gpu = tis_gpu[i:end_row]
                 mus_chunk_gpu = tjs_gpu[cupy.newaxis, :] * tis_chunk_gpu[:, cupy.newaxis] / total
-                # Approx: Denom = Sqrt(Mu)
                 denominator_gpu = cupy.sqrt(mus_chunk_gpu)
-                # --- LOGIC RESTORED: Prevent Division by Zero ---
                 denominator_gpu = cupy.where(denominator_gpu == 0, 1, denominator_gpu)
                 pearson_chunk_gpu = (counts_chunk_dense_gpu - mus_chunk_gpu) / denominator_gpu
-                # [OPTIMIZATION] Cast to float32 on GPU to halve PCIe transfer time
                 out_x[i:end_row, :] = pearson_chunk_gpu.astype(cupy.float32).get()
                 del counts_chunk_dense_gpu, counts_chunk_sparse_gpu, mus_chunk_gpu, pearson_chunk_gpu, denominator_gpu
@@ -201,7 +206,6 @@ def NBumiPearsonResidualsApproxGPU(
         print(f"Phase [2/2]: COMPLETE{' '*50}")
-    # --- LOGIC RESTORED: Explicit File Cleanup ---
     if hasattr(adata_in, "file") and adata_in.file is not None:
         adata_in.file.close()

{m3drop-0.4.38 → m3drop-0.4.39}/setup.py RENAMED Viewed

@@ -5,7 +5,7 @@ with open("README.md", "r", encoding="utf-8") as fh:
 setuptools.setup(
     name="M3Drop",
-    version="0.4.38",  # Version bump
+    version="0.4.39",  # Version bump
     author="Tallulah Andrews",
     author_email="tandrew6@uwo.ca",
     description="A Python implementation of the M3Drop single-cell RNA-seq analysis tool.",