PyPI - microarray - Versions diffs - 0.1.0__py3-none-any.whl - Mend

microarray 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

microarray/__init__.py +15 -0
microarray/_version.py +3 -0
microarray/datasets/__init__.py +3 -0
microarray/datasets/_arrayexpress.py +1 -0
microarray/datasets/_cdf_files.py +35 -0
microarray/datasets/_geo.py +1 -0
microarray/datasets/_utils.py +143 -0
microarray/io/__init__.py +17 -0
microarray/io/_anndata_converter.py +198 -0
microarray/io/_cdf.py +575 -0
microarray/io/_cel.py +591 -0
microarray/io/_read.py +127 -0
microarray/plotting/__init__.py +28 -0
microarray/plotting/_base.py +253 -0
microarray/plotting/_cel.py +75 -0
microarray/plotting/_de_plots.py +239 -0
microarray/plotting/_diagnostic_plots.py +268 -0
microarray/plotting/_heatmap.py +279 -0
microarray/plotting/_ma_plots.py +136 -0
microarray/plotting/_pca.py +320 -0
microarray/plotting/_qc_plots.py +335 -0
microarray/plotting/_score.py +38 -0
microarray/plotting/_top_table_heatmap.py +98 -0
microarray/plotting/_utils.py +280 -0
microarray/preprocessing/__init__.py +39 -0
microarray/preprocessing/_background.py +862 -0
microarray/preprocessing/_log2.py +77 -0
microarray/preprocessing/_normalize.py +1292 -0
microarray/preprocessing/_rma.py +243 -0
microarray/preprocessing/_robust.py +170 -0
microarray/preprocessing/_summarize.py +318 -0
microarray/py.typed +0 -0
microarray/tools/__init__.py +26 -0
microarray/tools/_biomart.py +416 -0
microarray/tools/_empirical_bayes.py +401 -0
microarray/tools/_fdist.py +171 -0
microarray/tools/_linear_models.py +387 -0
microarray/tools/_mds.py +101 -0
microarray/tools/_pca.py +88 -0
microarray/tools/_score.py +86 -0
microarray/tools/_toptable.py +360 -0
microarray-0.1.0.dist-info/METADATA +75 -0
microarray-0.1.0.dist-info/RECORD +44 -0
microarray-0.1.0.dist-info/WHEEL +4 -0

microarray/preprocessing/_background.py ADDED Viewed

@@ -0,0 +1,862 @@
+"""Background correction for microarray data.
+Implements multiple background correction methods including:
+- Basic methods: none, subtract, half, minimum
+- Advanced methods: edwards, normexp (normal+exponential convolution model)
+The normexp model is based on Ritchie et al. (2007) and Irizarry et al. (2003).
+References:
+    Ritchie, M.E., Silver, J., Oshlack, A., Holmes, M., Diyagama, D., Holloway, A., and Smyth, G.K. (2007).
+    A comparison of background correction methods for two-colour microarrays.
+    Bioinformatics 23, 2700-2707.
+    Irizarry, R.A., Hobbs, B., Collin, F., et al. (2003).
+    Exploration, normalization, and summaries of high density oligonucleotide array probe level data.
+    Biostatistics, 4(2), 249-264.
+    Edwards, D. (2003). Non-linear normalization and background correction in one-channel cDNA microarray studies.
+    Bioinformatics 19, 825-833.
+"""
+import warnings
+from typing import Literal
+import numpy as np
+from anndata import AnnData
+from numpy.typing import NDArray
+from scipy.stats import norm
+def background_correct(
+    adata: AnnData,
+    method: Literal["none", "subtract", "half", "minimum", "edwards", "normexp", "mas"] = "normexp",
+    offset: float = 0,
+    normexp_method: Literal["saddle", "mle", "rma", "rma75"] = "saddle",
+    edwards_offset: float = 0.1,
+    mas_grid_dim: int = 16,
+    copy: bool = False,
+) -> AnnData | None:
+    """Apply background correction to microarray intensity data.
+    Implements multiple background correction methods from limma, suitable for
+    different experimental designs and data characteristics.
+    Parameters
+    ----------
+    adata
+        AnnData object with shape (n_samples, n_probes).
+        Must contain raw intensity values in `.X`.
+        For methods other than 'none' and 'normexp', background intensities
+        should be available in `.layers['background']`.
+    method
+        Background correction method:
+        - 'none': No correction applied
+        - 'subtract': Simple background subtraction (E - Eb)
+        - 'half': Subtract with minimum threshold of 0.5
+        - 'minimum': Subtract, then replace negatives with half-minimum per array
+        - 'edwards': Log-linear interpolation for dull spots (Edwards 2003)
+        - 'normexp': Normal+exponential convolution model (default, recommended)
+        - 'mas': MAS5 spatial background correction with grid-based smoothing
+    offset
+        Value to add to corrected intensities. Default is 0.
+    normexp_method
+        Parameter estimation method for normexp correction:
+        - 'saddle': Saddle-point approximation (default, fast)
+        - 'mle': Maximum likelihood estimation (slower, more accurate)
+        - 'rma': RMA algorithm (requires affy package equivalent)
+        - 'rma75': RMA-75 variant (McGee & Chen 2006)
+    edwards_offset
+        Threshold fraction for edwards method. Default is 0.1 (10%).
+    mas_grid_dim
+        Number of grid regions for MAS5 spatial background (default=16).
+        Must be a perfect square (e.g., 4, 9, 16, 25).
+    copy
+        If True, return a copy of the AnnData object. If False, modify in place.
+    Returns:
+    -------
+    AnnData or None
+        If `copy=True`, returns corrected AnnData object.
+        If `copy=False`, modifies `adata` in place and returns None.
+        The corrected intensities are stored in `.X`.
+        Correction parameters are stored in `.uns['background_correction']`.
+    Examples:
+    --------
+    >>> import microarray as ma
+    >>> adata = ma.io.cel_batch_to_anndata(cel_files, cdf_path, annotation_db)
+    >>> # RMA-style normexp correction (default)
+    >>> adata_corrected = ma.pp.background_correct(adata, method="normexp", copy=True)
+    >>> # Simple subtraction (requires background layer)
+    >>> adata_corrected = ma.pp.background_correct(adata, method="subtract", copy=True)
+    >>> # Edwards method for dull spots
+    >>> adata_corrected = ma.pp.background_correct(adata, method="edwards", copy=True)
+    Notes:
+    -----
+    **Method descriptions:**
+    - **none**: No correction, sets background to zero. Fast but not recommended.
+    - **subtract**: Traditional `E = E - Eb`. Simple but can produce negative values.
+    - **half**: `E = max(E - Eb, 0.5)`. Prevents values below 0.5.
+    - **minimum**: Subtracts background, then for each array replaces negatives with
+      half the minimum positive value in that array. Array-specific adjustment.
+    - **edwards**: Log-linear interpolation for dull spots. For spots where
+      `E - Eb < delta`, applies: `E = delta * exp(1 - (Eb + delta)/E)`.
+      The threshold `delta` is chosen so approximately `edwards_offset` fraction
+      of negative/zero spots are interpolated.
+    - **normexp**: Normal+exponential convolution model. Observed intensity =
+      Normal(μ, σ²) background + Exponential(α) signal. Computes conditional
+      expectation E[signal | observed]. Most sophisticated and recommended for
+      Affymetrix and similar data.
+    - **mas**: MAS5 spatial background correction. Divides chip into grid regions
+      (default 16), estimates background from lowest 2% of intensities in each region,
+      then applies weighted smoothing based on distance to region centroids.
+      Corrected value = max(E - BG_weighted, 0.5 * noise_weighted). Required for
+      MAS5 pipeline.
+    The normexp model uses different parameter estimation methods:
+    - 'saddle': Fast saddle-point approximation (recommended)
+    - 'mle': Maximum likelihood via optimization (more accurate, slower)
+    - 'rma': Original RMA method (mode-based estimation)
+    - 'rma75': RMA-75 with improved mode correction (more robust)
+    """
+    adata = adata.copy() if copy else adata
+    # Validate input
+    if adata.X is None:
+        raise ValueError("AnnData object must have .X attribute with intensity values")
+    if adata.n_obs < 1:
+        raise ValueError("AnnData must contain at least one sample")
+    # Check if background layer is needed
+    needs_background = method in ["subtract", "half", "minimum", "edwards"]
+    if needs_background and "background" not in adata.layers:
+        raise ValueError(f"Method '{method}' requires background intensities in adata.layers['background']")
+    # Apply the appropriate correction method
+    if method == "none":
+        _background_none(adata)
+    elif method == "subtract":
+        _background_subtract(adata)
+    elif method == "half":
+        _background_half(adata)
+    elif method == "minimum":
+        _background_minimum(adata)
+    elif method == "edwards":
+        _background_edwards(adata, offset=edwards_offset)
+    elif method == "normexp":
+        _background_normexp(adata, method=normexp_method)
+    elif method == "mas":
+        _background_mas(adata, grid_dim=mas_grid_dim)
+    else:
+        raise ValueError(f"Unknown background correction method: {method}")
+    # Apply offset if specified
+    if offset != 0:
+        adata.X = adata.X + offset
+    # Store correction metadata
+    adata.uns["background_correction"] = {
+        "method": method,
+        "offset": offset,
+    }
+    if method == "normexp":
+        adata.uns["background_correction"]["normexp_method"] = normexp_method
+    elif method == "mas":
+        adata.uns["background_correction"]["mas_grid_dim"] = mas_grid_dim
+    elif method == "edwards":
+        adata.uns["background_correction"]["edwards_offset"] = edwards_offset
+    return adata if copy else None
+def _background_none(adata: AnnData) -> None:
+    """Apply no background correction (passthrough)."""
+    # No modification needed, just for completeness
+    pass
+def _background_subtract(adata: AnnData) -> None:
+    """Simple background subtraction: E = E - Eb.
+    Can produce negative values.
+    """
+    Eb = adata.layers["background"]
+    adata.X = adata.X - Eb
+def _background_half(adata: AnnData) -> None:
+    """Background subtraction with minimum threshold: E = max(E - Eb, 0.5).
+    Prevents values below 0.5.
+    """
+    Eb = adata.layers["background"]
+    adata.X = np.maximum(adata.X - Eb, 0.5)
+def _background_minimum(adata: AnnData) -> None:
+    """Background subtraction with array-specific negative replacement.
+    For each array:
+    1. Subtract background: E = E - Eb
+    2. For spots where E < 1e-18, set E = min(positive_values) / 2
+    """
+    Eb = adata.layers["background"]
+    E = adata.X - Eb
+    # Process each sample (array) separately
+    n_samples = adata.n_obs
+    for i in range(n_samples):
+        sample_E = E[i, :]
+        negative_mask = sample_E < 1e-18
+        if np.any(negative_mask):
+            # Find minimum positive value
+            positive_values = sample_E[~negative_mask]
+            if len(positive_values) > 0:
+                min_positive = np.min(positive_values)
+                E[i, negative_mask] = min_positive / 2
+            else:
+                # All values are negative - set to small positive value
+                E[i, negative_mask] = 1e-6
+    adata.X = E
+def _background_edwards(adata: AnnData, offset: float = 0.1) -> None:
+    """Edwards log-linear interpolation for dull spots.
+    Applies log-linear interpolation for spots where E - Eb < delta.
+    The threshold delta is chosen per array such that the number of spots
+    with 0 < E - Eb < delta is approximately offset * (number of spots with E - Eb <= 0).
+    Parameters
+    ----------
+    adata
+        AnnData object with foreground in .X and background in .layers['background']
+    offset
+        Fraction controlling threshold (default 0.1 = 10%)
+    References:
+    ----------
+    Edwards, D. (2003). Non-linear normalization and background correction
+    in one-channel cDNA microarray studies. Bioinformatics 19, 825-833.
+    """
+    E = adata.X.copy()
+    Eb = adata.layers["background"]
+    sub = E - Eb
+    # Compute delta threshold for each array (sample)
+    n_samples = adata.n_obs
+    delta = np.zeros((n_samples, 1))
+    for i in range(n_samples):
+        sub_sample = sub[i, :]
+        # Compute fraction of negative/zero values
+        neg_frac = np.mean(sub_sample < 1e-16)
+        # Set quantile threshold
+        quantile_val = neg_frac * (1 + offset)
+        delta[i, 0] = np.quantile(sub_sample, quantile_val)
+    # Apply correction
+    # where sub < delta: E = delta * exp(1 - (Eb + delta) / E)
+    # where sub >= delta: E = sub
+    corrected = np.where(sub < delta, delta * np.exp(1 - (Eb + delta) / E), sub)
+    adata.X = corrected
+def _background_normexp(adata: AnnData, method: str = "saddle") -> None:
+    """Apply normexp (normal+exponential convolution) background correction.
+    For each sample, estimates parameters (μ, σ, α) and computes the
+    conditional expectation E[signal | observed].
+    Parameters
+    ----------
+    adata
+        AnnData object with intensities in .X
+    method
+        Parameter estimation method: 'saddle', 'mle', 'rma', or 'rma75'
+    """
+    Y = adata.X.copy()
+    n_samples, n_probes = Y.shape
+    # Store parameters for each sample
+    correction_params = {
+        "mu": np.zeros(n_samples),
+        "sigma": np.zeros(n_samples),
+        "alpha": np.zeros(n_samples),
+        "method": method,
+    }
+    # Apply correction to each sample
+    for i in range(n_samples):
+        y = Y[i, :]
+        # Estimate parameters
+        if method == "rma":
+            params = _normexp_fit_rma(y)
+        elif method == "rma75":
+            params = _normexp_fit_rma75(y)
+        elif method in ["saddle", "mle"]:
+            # For now, use simplified estimation (full optimization in step 6)
+            params = _normexp_fit_simple(y)
+        else:
+            raise ValueError(f"Unknown normexp method: {method}")
+        # Store parameters
+        correction_params["mu"][i] = params["mu"]
+        correction_params["sigma"][i] = params["sigma"]
+        correction_params["alpha"][i] = params["alpha"]
+        # Compute corrected signal
+        y_corrected = _normexp_signal(params, y)
+        Y[i, :] = y_corrected
+    # Update AnnData
+    adata.X = Y
+    adata.uns["normexp_params"] = correction_params
+def _normexp_fit_simple(x: np.ndarray) -> dict[str, float]:
+    """Simple parameter estimation for normexp (current RMA implementation).
+    Parameters
+    ----------
+    x
+        Intensity values for one sample
+    Returns:
+    -------
+    dict
+        Dictionary with keys 'mu', 'sigma', 'alpha'
+    """
+    # Estimate μ from histogram mode (0 to 75th percentile)
+    q75 = np.percentile(x, 75)
+    x_subset = x[x <= q75]
+    hist, bin_edges = np.histogram(x_subset, bins=100)
+    mode_idx = np.argmax(hist)
+    mu = (bin_edges[mode_idx] + bin_edges[mode_idx + 1]) / 2
+    # Estimate σ from probes with intensity < mu
+    x_low = x[x < mu]
+    if len(x_low) > 0:
+        sigma = np.std(x_low - mu) * np.sqrt(2)
+    else:
+        sigma = np.std(x) * 0.5
+        warnings.warn(
+            f"No probes below estimated μ={mu:.2f}. Using fallback σ estimation.",
+            UserWarning,
+            stacklevel=3,
+        )
+    # Ensure sigma is not too small
+    sigma = max(sigma, 1e-6)
+    # Fixed alpha for simple method
+    alpha = 0.03
+    return {"mu": mu, "sigma": sigma, "alpha": alpha}
+def _normexp_fit_rma(x: np.ndarray) -> dict[str, float]:
+    """RMA-style parameter estimation for normexp.
+    Based on the original RMA algorithm from Affymetrix.
+    Uses mode-based estimation without mode correction.
+    Parameters
+    ----------
+    x
+        Intensity values for one sample
+    Returns:
+    -------
+    dict
+        Dictionary with keys 'mu', 'sigma', 'alpha'
+    """
+    # Get mode estimate using 5th percentile as rough approximation
+    mu = np.percentile(x, 5)
+    # Estimate sigma from background probes
+    bg_data = x[x < mu]
+    if len(bg_data) > 1:
+        sigma = np.std(bg_data - mu) * np.sqrt(2)
+    else:
+        # Fallback
+        sigma = np.std(x) * 0.5
+    # Ensure sigma > 0
+    sigma = max(sigma, 1e-6)
+    # Estimate alpha (mean signal)
+    alpha = np.mean(x) - mu
+    alpha = max(alpha, 1e-6)
+    return {"mu": mu, "sigma": sigma, "alpha": alpha}
+def _normexp_fit_rma75(x: np.ndarray, n_pts: int = 2**14) -> dict[str, float]:
+    """RMA-75 parameter estimation with mode correction.
+    Implements the improved RMA-75 method from McGee & Chen (2006),
+    which includes mode correction and uses 75th quantile for alpha estimation.
+    Parameters
+    ----------
+    x
+        Intensity values for one sample
+    n_pts
+        Number of points for density estimation (default 16384)
+    Returns:
+    -------
+    dict
+        Dictionary with keys 'mu', 'sigma', 'alpha'
+    References:
+    ----------
+    McGee, M. and Chen, Z. (2006). Parameter estimation for the
+    exponential-normal convolution model for background correction
+    of Affymetrix GeneChip data. Stat Appl Genet Mol Biol, 5(1), Article 24.
+    """
+    from scipy.optimize import brentq
+    from scipy.stats import gaussian_kde
+    def max_density(data, n_pts):
+        """Find mode using kernel density estimation."""
+        if len(data) < 2:
+            return np.median(data)
+        # Use Epanechnikov kernel equivalent
+        kde = gaussian_kde(data, bw_method="scott")
+        x_range = np.linspace(data.min(), data.max(), n_pts)
+        density = kde(x_range)
+        return x_range[np.argmax(density)]
+    def mu_est_correct(m, s, a):
+        """Mode correction function."""
+        def f(t):
+            z1 = t - s * a
+            z2 = m / s + s * a
+            return norm.pdf(z1) - s * a * (norm.cdf(z1) + norm.cdf(z2) - 1)
+        try:
+            t = brentq(f, -5, 10, xtol=1e-12)
+            return m - s * t
+        except ValueError:
+            # If root finding fails, return original mode
+            return m
+    # Get initial mode estimate
+    pmbg = max_density(x, min(n_pts, len(x) // 2))
+    bg_data = x[x < pmbg]
+    if len(bg_data) > 0:
+        pmbg = max_density(bg_data, min(n_pts, len(bg_data) // 2))
+    mubg = pmbg  # Initial mode
+    # Estimate sigma from background
+    bg_data = x[x < pmbg]
+    if len(bg_data) > 1:
+        bg_data_centered = bg_data - pmbg
+        bgsd = np.sqrt(np.sum(bg_data_centered**2) / (len(bg_data) - 1)) * np.sqrt(2)
+    else:
+        bgsd = np.std(x) * 0.5
+    # Estimate alpha from 75th quantile
+    q75 = 0.75
+    alpha3 = -(np.quantile(x, q75) - pmbg) / np.log(1 - q75)
+    # Apply mode correction
+    mu3 = mu_est_correct(m=mubg, s=bgsd, a=1 / alpha3)
+    mu3 = (mu3 + mubg) / 2  # Average with original mode
+    # Re-estimate sigma with corrected mode
+    bg_data3 = x[x < mu3]
+    if len(bg_data3) > 1:
+        bg_data3_centered = bg_data3 - mu3
+        bgsd3 = np.sqrt(np.sum(bg_data3_centered**2) / (len(bg_data3) - 1)) * np.sqrt(2)
+    else:
+        bgsd3 = bgsd
+    # Re-estimate alpha
+    alpha3 = -(np.quantile(x, q75) - mu3) / np.log(1 - q75)
+    return {"mu": mu3, "sigma": max(bgsd3, 1e-6), "alpha": max(1 / alpha3, 1e-6)}
+def _normexp_signal(params: dict[str, float], x: np.ndarray) -> np.ndarray:
+    """Compute expected signal given observed intensity in normexp model.
+    Computes E[signal | observed] using the normal+exponential convolution model.
+    Parameters
+    ----------
+    params
+        Dictionary with keys 'mu', 'sigma', 'alpha'
+    x
+        Observed intensity values
+    Returns:
+    -------
+    np.ndarray
+        Corrected signal values
+    References:
+    ----------
+    Ritchie et al. (2007). A comparison of background correction methods
+    for two-colour microarrays. Bioinformatics 23, 2700-2707.
+    """
+    mu = params["mu"]
+    sigma = params["sigma"]
+    alpha = params["alpha"]
+    if alpha <= 0:
+        raise ValueError("alpha must be positive")
+    if sigma <= 0:
+        raise ValueError("sigma must be positive")
+    # Compute mu.sf = x - mu - sigma²/alpha
+    sigma2 = sigma * sigma
+    mu_sf = x - mu - sigma2 / alpha
+    # Compute signal = mu_sf + sigma² * exp(log_pdf - log_cdf)
+    # Using log-space for numerical stability
+    a_std = mu_sf / sigma
+    log_pdf = norm.logpdf(a_std)
+    log_cdf = norm.logcdf(a_std)
+    # Handle numerical issues
+    with warnings.catch_warnings():
+        warnings.filterwarnings("ignore", category=RuntimeWarning)
+        correction_term = sigma * np.exp(log_pdf - log_cdf)
+        correction_term = np.nan_to_num(correction_term, nan=0.0, posinf=0.0, neginf=0.0)
+    signal = mu_sf + correction_term
+    # Ensure non-negative values
+    if np.any(signal < 0):
+        warnings.warn(
+            "Numerical accuracy limit reached with very low intensity or high background. "
+            "Setting adjusted intensities to small positive value.",
+            UserWarning,
+            stacklevel=3,
+        )
+        signal = np.maximum(signal, 1e-6)
+    return signal
+def rma_background_correct(
+    adata: AnnData,
+    copy: bool = False,
+) -> AnnData | None:
+    """Apply RMA background correction to microarray intensity data.
+    This function is maintained for backward compatibility. It calls the unified
+    `background_correct()` function with method='normexp'.
+    Implements the RMA background correction using a convolution model where
+    observed intensity = background + signal. The background is modeled as
+    normal(μ, σ²) and signal as exponential(α).
+    For each sample, the algorithm:
+    1. Estimates μ (mu) from the mode of the intensity distribution
+    2. Estimates σ (sigma) from the standard deviation of low-intensity probes
+    3. Adjusts intensities using the conditional expectation: E[signal | observed]
+    Parameters
+    ----------
+    adata
+        AnnData object with shape (n_samples, n_probes).
+        Must contain raw intensity values in `.X`.
+    copy
+        If True, return a copy of the AnnData object. If False, modify in place.
+    Returns:
+    -------
+    AnnData or None
+        If `copy=True`, returns corrected AnnData object.
+        If `copy=False`, modifies `adata` in place and returns None.
+        The corrected intensities are stored in `.X`.
+        Correction parameters are stored in `.uns['background_correction']`
+        and `.uns['normexp_params']`.
+    Examples:
+    --------
+    >>> import microarray as ma
+    >>> adata = ma.io.cel_batch_to_anndata(cel_files, cdf_path, annotation_db)
+    >>> adata_corrected = ma.pp.rma_background_correct(adata, copy=True)
+    Notes:
+    -----
+    The algorithm uses:
+    - α (alpha) = 0.03 (fixed, in simplified mode)
+    - μ (mu) estimated from histogram mode (0 to 75th percentile, 100 bins)
+    - σ (sigma) estimated from probes with intensity < μ, scaled by √2
+    The correction formula is:
+        a = y - μ - α·σ²
+        y_adjusted = a + σ·exp(log_pdf(a/σ) - log_cdf(a/σ))
+    where y is the observed intensity and pdf/cdf are from the standard normal distribution.
+    See Also:
+    --------
+    background_correct : Unified interface with multiple background correction methods
+    """
+    # Call unified interface with normexp method
+    result = background_correct(adata, method="normexp", normexp_method="saddle", copy=copy)
+    # For backward compatibility, also store in old location
+    if copy:
+        if "normexp_params" in result.uns:
+            result.uns["rma_background"] = result.uns["normexp_params"].copy()
+            result.uns["rma_background"]["method"] = "saddle"
+        return result
+    else:
+        if "normexp_params" in adata.uns:
+            adata.uns["rma_background"] = adata.uns["normexp_params"].copy()
+            adata.uns["rma_background"]["method"] = "saddle"
+        return None
+def _background_mas(adata: AnnData, grid_dim: int = 16) -> None:
+    """MAS5 spatial background correction with grid-based smoothing.
+       Divides the chip into grid_dim regions, estimates background in each region
+       from the lowest 2% of intensities, then applies weighted smoothing based on
+       distance to region centroids.
+       Parameters
+       ----------
+       adata : AnnData
+           AnnData object with probe intensities. Must have 'x' and 'y' columns in
+           `.var` containing probe spatial coordinates.
+       grid_dim : int, default=16
+           Number of grid regions (must be a perfect square).
+    Notes:
+       -----
+       Algorithm:
+       1. Divide chip into sqrt(grid_dim) × sqrt(grid_dim) grid regions
+       2. For each region, compute background as mean of lowest 2% intensities
+       3. Compute noise as standard deviation of lowest 2% intensities
+       4. For each probe, compute weighted average of regional backgrounds based on
+          inverse squared distance to region
+    centroids
+       5. Corrected value = max(intensity - bg_weighted, 0.5 * noise_weighted)
+    References:
+       ----------
+       Affymetrix (2002). Statistical Algorithms Description Document.
+    """
+    # Validate grid_dim is a perfect square
+    grid_dim_1d = int(np.sqrt(grid_dim))
+    if grid_dim_1d**2 != grid_dim:
+        raise ValueError(f"grid_dim must be a perfect square, got {grid_dim}")
+    # Check for spatial coordinates
+    if "x" not in adata.var.columns or "y" not in adata.var.columns:
+        raise ValueError(
+            "MAS5 background correction requires probe spatial coordinates in adata.var['x'] and adata.var['y']"
+        )
+    x = adata.var["x"].values
+    y = adata.var["y"].values
+    # Infer chip dimensions from max coordinates
+    rows = int(np.max(x)) + 1
+    cols = int(np.max(y)) + 1
+    n_samples, n_probes = adata.X.shape
+    # Compute grid centroids
+    centroidx, centroidy = _get_centroids(rows, cols, grid_dim_1d, grid_dim_1d)
+    # Compute grid boundaries
+    gridpt_x, gridpt_y = _get_gridpts(rows, cols, grid_dim)
+    # Assign each probe to a grid region
+    whichgrid = _compute_grids(x, y, rows, cols, n_probes, gridpt_x, gridpt_y)
+    # Compute weights for each probe (based on distance to centroids)
+    weights = _compute_weights(x, y, n_probes, grid_dim, centroidx, centroidy)
+    # Process each sample independently
+    corrected = np.zeros_like(adata.X)
+    for j in range(n_samples):
+        probe_intensity = adata.X[j, :]
+        # Compute background and noise for each grid region
+        bg_grid, noise_grid = _compute_background_quadrant(probe_intensity, n_probes, grid_dim, whichgrid)
+        # Apply weighted background correction to each probe
+        for i in range(n_probes):
+            bg_weighted = _background_correct_probe(x[i], y[i], grid_dim, weights[i, :], bg_grid)
+            noise_weighted = _background_correct_probe(x[i], y[i], grid_dim, weights[i, :], noise_grid)
+            # Corrected value = max(intensity - bg, 0.5 * noise)
+            corrected[j, i] = max(probe_intensity[i] - bg_weighted, 0.5 * noise_weighted)
+    adata.X = corrected
+def _get_centroids(
+    rows: int, cols: int, grid_dim_rows: int, grid_dim_cols: int
+) -> tuple[NDArray[np.floating], NDArray[np.floating]]:
+    """Compute centroids of grid regions."""
+    grid_dim = grid_dim_rows * grid_dim_cols
+    cuts_x = np.array([(i + 1) * rows / grid_dim_rows - rows / (2.0 * grid_dim_rows) for i in range(grid_dim_rows)])
+    cuts_y = np.array([(j + 1) * cols / grid_dim_cols - cols / (2.0 * grid_dim_cols) for j in range(grid_dim_cols)])
+    centroidx = np.zeros(grid_dim)
+    centroidy = np.zeros(grid_dim)
+    for j in range(grid_dim_cols):
+        for i in range(grid_dim_rows):
+            idx = j * grid_dim_rows + i
+            centroidx[idx] = cuts_x[idx // grid_dim_rows] + 0.5
+            centroidy[idx] = cuts_y[idx % grid_dim_rows] + 0.5
+    return centroidx, centroidy
+def _get_gridpts(rows: int, cols: int, grid_dim: int) -> tuple[NDArray[np.int_], NDArray[np.int_]]:
+    """Compute grid boundary points."""
+    grid_dim_1d = int(np.sqrt(grid_dim))
+    gridpt_x = np.array([(i + 1) * cols // grid_dim_1d for i in range(grid_dim_1d - 1)])
+    gridpt_y = np.array([(i + 1) * rows // grid_dim_1d for i in range(grid_dim_1d - 1)])
+    return gridpt_x, gridpt_y
+def _compute_grids(
+    x: NDArray[np.int_],
+    y: NDArray[np.int_],
+    rows: int,
+    cols: int,
+    n_probes: int,
+    gridpt_x: NDArray[np.int_],
+    gridpt_y: NDArray[np.int_],
+) -> NDArray[np.int_]:
+    """Assign each probe to a grid region."""
+    grid_dim_1d = len(gridpt_x) + 1
+    whichgrid = np.zeros(n_probes, dtype=np.int32)
+    for i in range(n_probes):
+        # Find x grid
+        x_grid = 0
+        for j in range(len(gridpt_x)):
+            if x[i] <= gridpt_x[j]:
+                x_grid = j
+                break
+        else:
+            x_grid = len(gridpt_x)
+        # Find y grid
+        y_grid = 0
+        for j in range(len(gridpt_y)):
+            if y[i] <= gridpt_y[j]:
+                y_grid = j
+                break
+        else:
+            y_grid = len(gridpt_y)
+        # Grid index (1-based)
+        whichgrid[i] = x_grid * grid_dim_1d + y_grid + 1
+    return whichgrid
+def _compute_weights(
+    x: NDArray[np.int_],
+    y: NDArray[np.int_],
+    n_probes: int,
+    grid_dim: int,
+    centroidx: NDArray[np.floating],
+    centroidy: NDArray[np.floating],
+) -> NDArray[np.floating]:
+    """Compute inverse distance weights for each probe to each grid centroid."""
+    smooth = 100.0
+    weights = np.zeros((n_probes, grid_dim), dtype=np.float64)
+    for i in range(n_probes):
+        # Compute squared distances to all centroids
+        distances_sq = (x[i] - centroidx) ** 2 + (y[i] - centroidy) ** 2
+        # Inverse distance weights
+        weights[i, :] = 1.0 / (distances_sq + smooth)
+    return weights
+def _compute_background_quadrant(
+    probe_intensity: NDArray[np.floating],
+    n_probes: int,
+    grid_dim: int,
+    whichgrid: NDArray[np.int_],
+) -> tuple[NDArray[np.floating], NDArray[np.floating]]:
+    """Compute background and noise for each grid region.
+    Background is mean of lowest 2% of intensities.
+    Noise is standard deviation of lowest 2%.
+    """
+    bg_grid = np.zeros(grid_dim, dtype=np.float64)
+    noise_grid = np.zeros(grid_dim, dtype=np.float64)
+    for j in range(grid_dim):
+        # Get probes in this grid (whichgrid is 1-based)
+        mask = whichgrid == (j + 1)
+        if not np.any(mask):
+            bg_grid[j] = 0.0
+            noise_grid[j] = 1.0
+            continue
+        grid_intensities = probe_intensity[mask]
+        # Sort to find lowest 2%
+        grid_sorted = np.sort(grid_intensities)
+        lower_2pc = max(1, int(0.02 * len(grid_sorted)))
+        # Mean and std of lowest 2%
+        lowest = grid_sorted[:lower_2pc]
+        bg_grid[j] = np.mean(lowest)
+        noise_grid[j] = np.std(lowest, ddof=1) if len(lowest) > 1 else 1.0
+    return bg_grid, noise_grid
+def _background_correct_probe(
+    x: int,
+    y: int,
+    grid_dim: int,
+    weights: NDArray[np.floating],
+    centroid_values: NDArray[np.floating],
+) -> float:
+    """Compute weighted background value for a single probe."""
+    weighted_sum = np.sum(weights * centroid_values)
+    sum_weights = np.sum(weights)
+    return weighted_sum / sum_weights if sum_weights > 0 else 0.0