PyPI - microarray - Versions diffs - 0.1.0__py3-none-any.whl - Mend

microarray 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

microarray/__init__.py +15 -0
microarray/_version.py +3 -0
microarray/datasets/__init__.py +3 -0
microarray/datasets/_arrayexpress.py +1 -0
microarray/datasets/_cdf_files.py +35 -0
microarray/datasets/_geo.py +1 -0
microarray/datasets/_utils.py +143 -0
microarray/io/__init__.py +17 -0
microarray/io/_anndata_converter.py +198 -0
microarray/io/_cdf.py +575 -0
microarray/io/_cel.py +591 -0
microarray/io/_read.py +127 -0
microarray/plotting/__init__.py +28 -0
microarray/plotting/_base.py +253 -0
microarray/plotting/_cel.py +75 -0
microarray/plotting/_de_plots.py +239 -0
microarray/plotting/_diagnostic_plots.py +268 -0
microarray/plotting/_heatmap.py +279 -0
microarray/plotting/_ma_plots.py +136 -0
microarray/plotting/_pca.py +320 -0
microarray/plotting/_qc_plots.py +335 -0
microarray/plotting/_score.py +38 -0
microarray/plotting/_top_table_heatmap.py +98 -0
microarray/plotting/_utils.py +280 -0
microarray/preprocessing/__init__.py +39 -0
microarray/preprocessing/_background.py +862 -0
microarray/preprocessing/_log2.py +77 -0
microarray/preprocessing/_normalize.py +1292 -0
microarray/preprocessing/_rma.py +243 -0
microarray/preprocessing/_robust.py +170 -0
microarray/preprocessing/_summarize.py +318 -0
microarray/py.typed +0 -0
microarray/tools/__init__.py +26 -0
microarray/tools/_biomart.py +416 -0
microarray/tools/_empirical_bayes.py +401 -0
microarray/tools/_fdist.py +171 -0
microarray/tools/_linear_models.py +387 -0
microarray/tools/_mds.py +101 -0
microarray/tools/_pca.py +88 -0
microarray/tools/_score.py +86 -0
microarray/tools/_toptable.py +360 -0
microarray-0.1.0.dist-info/METADATA +75 -0
microarray-0.1.0.dist-info/RECORD +44 -0
microarray-0.1.0.dist-info/WHEEL +4 -0

microarray/plotting/_top_table_heatmap.py ADDED Viewed

@@ -0,0 +1,98 @@
+from __future__ import annotations
+import inspect
+from typing import Any
+from anndata import AnnData
+from matplotlib.axes import Axes
+from matplotlib.figure import Figure
+from microarray.plotting._heatmap import heatmap
+from microarray.tools._toptable import top_table
+_TOP_TABLE_KWARGS = set(inspect.signature(top_table).parameters.keys()) - {"data", "group", "number"}
+_HEATMAP_KWARGS = set(inspect.signature(heatmap).parameters.keys()) - {"adata", "genes", "groupby", "title", "show"}
+def top_table_heatmap(
+    adata: AnnData,
+    n_top: int = 10,
+    groupby: str | None = None,
+    title: str | None = None,
+    show: bool = True,
+    **kwargs: Any,
+) -> tuple[Figure, dict[str, Axes | None]]:
+    """Plot top marker genes per condition in a clustered heatmap.
+    This helper uses :func:`microarray.tools.top_table` to collect top marker
+    genes for each condition and then visualizes the combined gene set using
+    :func:`microarray.plotting.heatmap`.
+    Args:
+        adata: AnnData object containing a moderated fit in ``adata.uns['lm_fit']``.
+        n_top: Number of top genes to extract per condition.
+        groupby: Grouping column in ``adata.obs``. If omitted, uses
+            ``adata.uns['lm_fit']['groupby']``.
+        title: Optional heatmap title. Uses a default title when omitted.
+        show: Whether to show the matplotlib figure.
+        **kwargs: Additional keyword arguments forwarded to
+            :func:`microarray.tools.top_table` and/or
+            :func:`microarray.plotting.heatmap`, depending on supported
+            parameter names.
+    Returns:
+        A tuple ``(figure, axes_dict)`` as returned by :func:`heatmap`.
+    """
+    if n_top <= 0:
+        raise ValueError("n_top must be a positive integer")
+    fit = adata.uns.get("lm_fit")
+    if fit is None:
+        raise ValueError("No fit object found in adata.uns['lm_fit']. Run lm_fit and ebayes first.")
+    if groupby is None:
+        groupby = fit.get("groupby")
+    if not groupby:
+        raise ValueError("groupby must be provided or available in adata.uns['lm_fit']['groupby']")
+    if groupby not in adata.obs:
+        raise ValueError(f"Column '{groupby}' not found in adata.obs")
+    group_to_column = fit.get("group_to_column")
+    if isinstance(group_to_column, dict) and len(group_to_column) > 0:
+        groups = list(group_to_column.keys())
+    else:
+        groups = list(dict.fromkeys(adata.obs[groupby].astype(str).tolist()))
+    top_kwargs: dict[str, Any] = {}
+    heatmap_kwargs: dict[str, Any] = {}
+    unknown_keys: list[str] = []
+    for key, value in kwargs.items():
+        matched = False
+        if key in _TOP_TABLE_KWARGS:
+            top_kwargs[key] = value
+            matched = True
+        if key in _HEATMAP_KWARGS:
+            heatmap_kwargs[key] = value
+            matched = True
+        if not matched:
+            unknown_keys.append(key)
+    if unknown_keys:
+        unknown_str = ", ".join(sorted(unknown_keys))
+        raise TypeError(f"Unknown keyword argument(s): {unknown_str}")
+    marker_genes: list[str] = []
+    seen: set[str] = set()
+    for group in groups:
+        results = top_table(adata, group=str(group), number=n_top, **top_kwargs)
+        for gene in results.index.astype(str):
+            if gene not in seen:
+                marker_genes.append(gene)
+                seen.add(gene)
+    if len(marker_genes) == 0:
+        raise ValueError("No marker genes found. Adjust filtering parameters for top_table.")
+    heatmap_title = title if title is not None else f"Top {n_top} marker genes per {groupby}"
+    return heatmap(adata, genes=marker_genes, groupby=groupby, title=heatmap_title, show=show, **heatmap_kwargs)

microarray/plotting/_utils.py ADDED Viewed

@@ -0,0 +1,280 @@
+"""Utility functions for plotting."""
+from typing import Any
+from warnings import warn
+import matplotlib.pyplot as plt
+import numpy as np
+from matplotlib.axes import Axes
+def with_highlights(
+    x: np.ndarray,
+    y: np.ndarray,
+    status: np.ndarray | None = None,
+    colors: dict[str, str] | None = None,
+    labels: dict[str, str] | None = None,
+    pch: int | dict[str, str] = 16,
+    cex: float | dict[str, float] = 1.0,
+    alpha: float = 0.6,
+    xlab: str = "",
+    ylab: str = "",
+    title: str = "",
+    legend: bool | str = "best",
+    ax: Axes | None = None,
+    **kwargs: Any,
+) -> Axes:
+    """Create scatter plot with status-based highlighting.
+    Core plotting utility inspired by limma's plotWithHighlights.
+    Supports color coding and symbol customization based on status groups.
+    Args:
+        x: X-axis values
+        y: Y-axis values
+        status: Status labels for each point. If None, all points have same appearance.
+        colors: Dictionary mapping status values to colors. Defaults to standard palette.
+        labels: Dictionary mapping status values to legend labels. Defaults to status values.
+        pch: Point marker style. Can be single value or dict mapping status to marker.
+        cex: Point size multiplier. Can be single value or dict mapping status to size.
+        alpha: Point transparency (0-1)
+        xlab: X-axis label
+        ylab: Y-axis label
+        title: Plot title
+        legend: Legend position ('best', 'upper right', etc.) or False to disable
+        ax: Existing Axes object. If None, creates new figure.
+        **kwargs: Additional arguments passed to ax.scatter()
+    Returns:
+        Axes object with the plot
+    Examples:
+        >>> import numpy as np
+        >>> from microarray.plotting import with_highlights
+        >>> x = np.random.randn(100)
+        >>> y = np.random.randn(100)
+        >>> status = np.where(np.abs(y) > 1, "significant", "not-significant")
+        >>> ax = with_highlights(x, y, status=status)
+    """
+    if ax is None:
+        _, ax = plt.subplots(figsize=(8, 6))
+    # Convert marker codes (R-style pch values)
+    marker_map = {
+        15: "s",  # square
+        16: "o",  # circle
+        17: "^",  # triangle up
+        18: "D",  # diamond
+        19: "o",  # filled circle
+    }
+    if status is None:
+        # Single group: plot all points with same style
+        marker = marker_map.get(pch, "o") if isinstance(pch, int) else "o"
+        size = cex * 20 if isinstance(cex, (int, float)) else 20
+        ax.scatter(x, y, marker=marker, s=size, alpha=alpha, **kwargs)
+    else:
+        # Multiple groups: plot by status
+        unique_statuses = np.unique(status)
+        # Default color palette (similar to R's default colors)
+        default_colors = {
+            "up": "#E41A1C",
+            "down": "#377EB8",
+            "not-significant": "#999999",
+            "NotSig": "#999999",
+            "Sig": "#E41A1C",
+            "-1": "#377EB8",
+            "0": "#999999",
+            "1": "#E41A1C",
+        }
+        if colors is None:
+            colors = {}
+        # Assign colors to each unique status
+        color_palette = ["#E41A1C", "#377EB8", "#4DAF4A", "#984EA3", "#FF7F00", "#FFFF33"]
+        status_colors = {}
+        for i, stat in enumerate(unique_statuses):
+            if stat in colors:
+                status_colors[stat] = colors[stat]
+            elif stat in default_colors:
+                status_colors[stat] = default_colors[stat]
+            else:
+                status_colors[stat] = color_palette[i % len(color_palette)]
+        # Prepare labels for legend
+        if labels is None:
+            labels = {stat: str(stat) for stat in unique_statuses}
+        # Plot each status group
+        for stat in unique_statuses:
+            mask = status == stat
+            color = status_colors[stat]
+            label = labels.get(stat, str(stat))
+            # Get marker and size for this status
+            if isinstance(pch, dict):
+                marker_code = pch.get(stat, 16)
+                marker = marker_map.get(marker_code, "o") if isinstance(marker_code, int) else marker_code
+            else:
+                marker = marker_map.get(pch, "o") if isinstance(pch, int) else "o"
+            if isinstance(cex, dict):
+                size = cex.get(stat, 1.0) * 20
+            else:
+                size = cex * 20
+            ax.scatter(x[mask], y[mask], c=color, marker=marker, s=size, alpha=alpha, label=label, **kwargs)
+        # Add legend if requested
+        if legend and len(unique_statuses) > 1:
+            ax.legend(loc=legend if isinstance(legend, str) else "best", frameon=True)
+    # Set labels and title
+    if xlab:
+        ax.set_xlabel(xlab)
+    if ylab:
+        ax.set_ylabel(ylab)
+    if title:
+        ax.set_title(title)
+    ax.grid(True, alpha=0.3, linestyle="--")
+    return ax
+def add_loess_curve(
+    ax: Axes,
+    x: np.ndarray,
+    y: np.ndarray,
+    span: float = 0.3,
+    color: str = "blue",
+    linewidth: float = 2,
+    linestyle: str = "-",
+    label: str | None = None,
+) -> Axes:
+    """Add LOESS (locally weighted scatterplot smoothing) curve to existing plot.
+    Args:
+        ax: Axes object to add curve to
+        x: X-axis values
+        y: Y-axis values
+        span: Smoothing span (fraction of data to use for smoothing). Default 0.3.
+        color: Line color
+        linewidth: Line width
+        linestyle: Line style ('-', '--', '-.', ':')
+        label: Legend label for the curve
+    Returns:
+        Axes object with added curve
+    Examples:
+        >>> import numpy as np
+        >>> import matplotlib.pyplot as plt
+        >>> from microarray.plotting._utils import add_loess_curve
+        >>> fig, ax = plt.subplots()
+        >>> x = np.linspace(0, 10, 100)
+        >>> y = np.sin(x) + np.random.randn(100) * 0.1
+        >>> ax.scatter(x, y, alpha=0.5)
+        >>> add_loess_curve(ax, x, y, span=0.2)
+    """
+    try:
+        from statsmodels.nonparametric.smoothers_lowess import lowess
+    except ImportError:
+        # Fallback: use simple moving average if statsmodels not available
+        warn(
+            "statsmodels not available, using moving average instead of LOESS",
+            ImportWarning,
+            stacklevel=2,
+        )
+        from scipy.ndimage import uniform_filter1d
+        # Sort by x
+        sort_idx = np.argsort(x)
+        x_sorted = x[sort_idx]
+        y_sorted = y[sort_idx]
+        # Apply moving average
+        window = max(3, int(len(x) * span))
+        if window % 2 == 0:
+            window += 1  # Make odd for symmetry
+        y_smooth = uniform_filter1d(y_sorted, size=window, mode="nearest")
+        ax.plot(x_sorted, y_smooth, color=color, linewidth=linewidth, linestyle=linestyle, label=label)
+        return ax
+    # Remove NaN values
+    mask = ~(np.isnan(x) | np.isnan(y))
+    x_clean = x[mask]
+    y_clean = y[mask]
+    if len(x_clean) < 3:
+        return ax  # Need at least 3 points
+    # Calculate LOESS curve
+    # lowess returns (x, y) pairs already sorted
+    smoothed = lowess(y_clean, x_clean, frac=span, return_sorted=True)
+    ax.plot(smoothed[:, 0], smoothed[:, 1], color=color, linewidth=linewidth, linestyle=linestyle, label=label)
+    return ax
+def add_reference_line(
+    ax: Axes,
+    y: float = 0,
+    x: float | None = None,
+    color: str = "gray",
+    linewidth: float = 1,
+    linestyle: str = "--",
+    alpha: float = 0.7,
+) -> Axes:
+    """Add horizontal or vertical reference line.
+    Args:
+        ax: Axes object
+        y: Y-coordinate for horizontal line (used if x is None)
+        x: X-coordinate for vertical line (overrides y if provided)
+        color: Line color
+        linewidth: Line width
+        linestyle: Line style
+        alpha: Line transparency
+    Returns:
+        Axes object with reference line
+    """
+    if x is not None:
+        ax.axvline(x=x, color=color, linewidth=linewidth, linestyle=linestyle, alpha=alpha)
+    else:
+        ax.axhline(y=y, color=color, linewidth=linewidth, linestyle=linestyle, alpha=alpha)
+    return ax
+def get_default_colors(n: int) -> list[str]:
+    """Get default color palette for n categories.
+    Args:
+        n: Number of colors needed
+    Returns:
+        List of color hex codes
+    """
+    # R-like default colors
+    colors = [
+        "#E41A1C",  # red
+        "#377EB8",  # blue
+        "#4DAF4A",  # green
+        "#984EA3",  # purple
+        "#FF7F00",  # orange
+        "#FFFF33",  # yellow
+        "#A65628",  # brown
+        "#F781BF",  # pink
+    ]
+    if n <= len(colors):
+        return colors[:n]
+    # If more colors needed, cycle through
+    return [colors[i % len(colors)] for i in range(n)]

microarray/preprocessing/__init__.py ADDED Viewed

@@ -0,0 +1,39 @@
+"""Preprocessing functions for microarray data analysis.
+This module provides preprocessing methods including RMA (Robust Multi-array Average),
+MAS5 (MicroArray Suite 5.0), Li-Wong (dChip), and a flexible expresso pipeline for
+custom preprocessing workflows.
+"""
+from ._background import background_correct, rma_background_correct
+from ._log2 import log2
+from ._normalize import (
+    normalize_constant,
+    normalize_contrasts,
+    normalize_invariantset,
+    normalize_loess,
+    normalize_qspline,
+    normalize_quantile,
+    normalize_quantile_robust,
+)
+from ._rma import rma
+from ._robust import tukey_biweight, tukey_biweight_summary
+from ._summarize import median_polish, summarize_probesets
+__all__ = [
+    "background_correct",
+    "rma_background_correct",
+    "log2",
+    "normalize_constant",
+    "normalize_contrasts",
+    "normalize_invariantset",
+    "normalize_loess",
+    "normalize_qspline",
+    "normalize_quantile",
+    "normalize_quantile_robust",
+    "rma",
+    "tukey_biweight",
+    "tukey_biweight_summary",
+    "median_polish",
+    "summarize_probesets",
+]