PyPI - pylocuszoom - Versions diffs - 1.2.0__py3-none-any.whl → 1.3.1__py3-none-any.whl - Mend

pylocuszoom 1.2.0py3-none-any.whl → 1.3.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

pylocuszoom/__init__.py +16 -2
pylocuszoom/backends/base.py +94 -2
pylocuszoom/backends/bokeh_backend.py +160 -6
pylocuszoom/backends/matplotlib_backend.py +142 -2
pylocuszoom/backends/plotly_backend.py +101 -1
pylocuszoom/coloc.py +82 -0
pylocuszoom/coloc_plotter.py +390 -0
pylocuszoom/colors.py +26 -0
pylocuszoom/config.py +61 -0
pylocuszoom/labels.py +41 -16
pylocuszoom/ld.py +239 -0
pylocuszoom/ld_heatmap_plotter.py +252 -0
pylocuszoom/miami_plotter.py +490 -0
pylocuszoom/plotter.py +472 -6
{pylocuszoom-1.2.0.dist-info → pylocuszoom-1.3.1.dist-info}/METADATA +166 -21
{pylocuszoom-1.2.0.dist-info → pylocuszoom-1.3.1.dist-info}/RECORD +18 -14
pylocuszoom-1.3.1.dist-info/licenses/LICENSE.md +595 -0
pylocuszoom-1.2.0.dist-info/licenses/LICENSE.md +0 -17
{pylocuszoom-1.2.0.dist-info → pylocuszoom-1.3.1.dist-info}/WHEEL +0 -0

pylocuszoom/ld.py CHANGED Viewed

@@ -16,6 +16,72 @@ from .logging import logger
 from .utils import validate_plink_files
+def build_pairwise_ld_command(
+    plink_path: str,
+    bfile_path: str,
+    output_path: str,
+    snp_list_file: Optional[str] = None,
+    chrom: Optional[int] = None,
+    start: Optional[int] = None,
+    end: Optional[int] = None,
+    species: Optional[str] = "canine",
+    metric: str = "r2",
+) -> list:
+    """Build PLINK command for pairwise LD matrix computation.
+    Generates command for computing an N x N LD matrix using PLINK's
+    --r2 square (or --r dprime square) command.
+    Args:
+        plink_path: Path to PLINK executable.
+        bfile_path: Input binary fileset prefix (.bed/.bim/.fam).
+        output_path: Output prefix (creates .ld and .snplist files).
+        snp_list_file: Path to file with SNP IDs to extract (one per line).
+        chrom: Chromosome number for region-based extraction.
+        start: Start position (bp) for region-based extraction.
+        end: End position (bp) for region-based extraction.
+        species: Species flag ('canine', 'feline', or None for human).
+        metric: LD metric ('r2' or 'dprime').
+    Returns:
+        List of command arguments for subprocess.
+    """
+    cmd = [plink_path]
+    # Species flag
+    if species == "canine":
+        cmd.append("--dog")
+    elif species == "feline":
+        cmd.extend(["--chr-set", "18"])
+    # Input and output
+    cmd.extend(["--bfile", bfile_path])
+    cmd.extend(["--out", output_path])
+    # LD metric and square matrix flag
+    if metric == "dprime":
+        cmd.extend(["--r", "dprime", "square"])
+    else:
+        cmd.extend(["--r2", "square"])
+    # Track SNP order in output
+    cmd.append("--write-snplist")
+    # SNP extraction mode
+    if snp_list_file:
+        cmd.extend(["--extract", snp_list_file])
+    # Region-based extraction
+    if chrom is not None:
+        cmd.extend(["--chr", str(chrom)])
+    if start is not None:
+        cmd.extend(["--from-bp", str(start)])
+    if end is not None:
+        cmd.extend(["--to-bp", str(end)])
+    return cmd
 def find_plink() -> Optional[str]:
     """Find PLINK executable on PATH.
@@ -84,6 +150,51 @@ def build_ld_command(
     return cmd
+def parse_pairwise_ld_output(
+    ld_file: str, snplist_file: str
+) -> tuple[pd.DataFrame, list[str]]:
+    """Parse PLINK pairwise LD matrix output files.
+    PLINK --r2 square outputs:
+    - .ld file: N x N matrix of R2/D' values (whitespace-separated, no headers)
+    - .snplist file: SNP IDs in order (one per line)
+    Args:
+        ld_file: Path to .ld output file (square matrix).
+        snplist_file: Path to .snplist output file (SNP IDs).
+    Returns:
+        Tuple of (DataFrame with R2/D' values, list of SNP IDs).
+        DataFrame has SNP IDs as both index and columns.
+        Returns (empty DataFrame, empty list) if files not found.
+    """
+    # Check if files exist
+    if not os.path.exists(ld_file) or not os.path.exists(snplist_file):
+        return pd.DataFrame(), []
+    # Read SNP list
+    with open(snplist_file) as f:
+        snp_ids = [line.strip() for line in f if line.strip()]
+    if not snp_ids:
+        return pd.DataFrame(), []
+    # Read LD matrix (whitespace-separated, no headers)
+    # Values can be numbers or 'nan'
+    matrix = pd.read_csv(
+        ld_file,
+        sep=r"\s+",
+        header=None,
+        names=snp_ids,
+        index_col=False,
+    )
+    # Set SNP IDs as row index
+    matrix.index = snp_ids
+    return matrix, snp_ids
 def parse_ld_output(ld_file: str, lead_snp: str) -> pd.DataFrame:
     """Parse PLINK .ld output file.
@@ -208,3 +319,131 @@ def calculate_ld(
         # Clean up temp directory
         if cleanup_working_dir and os.path.exists(working_dir):
             shutil.rmtree(working_dir, ignore_errors=True)
+def calculate_pairwise_ld(
+    bfile_path: str,
+    snp_list: list[str] | None = None,
+    chrom: int | None = None,
+    start: int | None = None,
+    end: int | None = None,
+    plink_path: str | None = None,
+    working_dir: str | None = None,
+    species: str = "canine",
+    metric: str = "r2",
+) -> tuple[pd.DataFrame, list[str]]:
+    """Calculate pairwise LD matrix for a set of variants.
+    Runs PLINK --r2 square to compute an N x N LD matrix, suitable for
+    LD heatmap visualization.
+    Args:
+        bfile_path: Path to PLINK binary fileset (.bed/.bim/.fam prefix).
+        snp_list: List of SNP IDs to compute pairwise LD between.
+        chrom: Chromosome number for region-based extraction.
+        start: Start position (bp) for region-based extraction.
+        end: End position (bp) for region-based extraction.
+        plink_path: Path to PLINK executable. Auto-detects if None.
+        working_dir: Directory for PLINK output files. Uses temp dir if None.
+        species: Species flag ('canine', 'feline', or None for human).
+        metric: LD metric ('r2' or 'dprime').
+    Returns:
+        Tuple of (LD matrix DataFrame, list of SNP IDs).
+        DataFrame has SNP IDs as both index and columns.
+        Returns (empty DataFrame, empty list) if PLINK fails.
+    Raises:
+        FileNotFoundError: If PLINK executable not found.
+        ValidationError: If PLINK binary files (.bed/.bim/.fam) are missing.
+        ValidationError: If requested SNPs are not found in reference panel.
+    Example:
+        >>> matrix, snp_ids = calculate_pairwise_ld(
+        ...     bfile_path="/path/to/genotypes",
+        ...     snp_list=["rs1", "rs2", "rs3"],
+        ... )
+        >>> # matrix is 3x3 DataFrame with LD values
+        >>> matrix.loc["rs1", "rs2"]  # LD between rs1 and rs2
+    """
+    from .utils import ValidationError
+    # Find PLINK
+    if plink_path is None:
+        plink_path = find_plink()
+    if plink_path is None:
+        raise FileNotFoundError(
+            "PLINK not found. Install PLINK 1.9 or specify plink_path."
+        )
+    logger.debug(f"Using PLINK at {plink_path}")
+    # Validate PLINK files exist
+    validate_plink_files(bfile_path)
+    # Use temp directory if working_dir not specified
+    cleanup_working_dir = False
+    if working_dir is None:
+        working_dir = tempfile.mkdtemp(prefix="snp_scope_pairwise_ld_")
+        cleanup_working_dir = True
+    try:
+        os.makedirs(working_dir, exist_ok=True)
+        output_prefix = os.path.join(working_dir, "pairwise_ld")
+        # Write SNP list to file if provided
+        snp_list_file = None
+        if snp_list:
+            snp_list_file = os.path.join(working_dir, "snp_list.txt")
+            with open(snp_list_file, "w") as f:
+                for snp in snp_list:
+                    f.write(f"{snp}\n")
+        # Build and run PLINK command
+        cmd = build_pairwise_ld_command(
+            plink_path=plink_path,
+            bfile_path=bfile_path,
+            output_path=output_prefix,
+            snp_list_file=snp_list_file,
+            chrom=chrom,
+            start=start,
+            end=end,
+            species=species,
+            metric=metric,
+        )
+        logger.debug(f"Running PLINK command: {' '.join(cmd)}")
+        result = subprocess.run(
+            cmd,
+            cwd=working_dir,
+            capture_output=True,
+            text=True,
+        )
+        if result.returncode != 0:
+            logger.warning(
+                f"PLINK pairwise LD calculation failed: {result.stderr[:200]}"
+            )
+            return pd.DataFrame(), []
+        # Parse output
+        ld_file = f"{output_prefix}.ld"
+        snplist_file = f"{output_prefix}.snplist"
+        matrix, found_snps = parse_pairwise_ld_output(ld_file, snplist_file)
+        # Validate all requested SNPs were found
+        if snp_list:
+            missing_snps = set(snp_list) - set(found_snps)
+            if missing_snps:
+                raise ValidationError(
+                    f"SNPs not found in reference panel: {', '.join(sorted(missing_snps))}"
+                )
+        return matrix, found_snps
+    finally:
+        # Clean up temp directory
+        if cleanup_working_dir and os.path.exists(working_dir):
+            shutil.rmtree(working_dir, ignore_errors=True)

pylocuszoom/ld_heatmap_plotter.py ADDED Viewed

@@ -0,0 +1,252 @@
+"""LD heatmap generator for pairwise linkage disequilibrium visualization.
+Provides triangular heatmap display of pairwise LD values (R² or D')
+with colorbar legend and SNP highlighting support.
+"""
+from typing import Any, List, Optional, Tuple, Union
+import numpy as np
+import pandas as pd
+from .backends import BackendType, get_backend
+from .colors import (
+    LD_HEATMAP_COLORS,
+    LEAD_SNP_HIGHLIGHT_COLOR,
+    SECONDARY_HIGHLIGHT_COLOR,
+)
+class LDHeatmapPlotter:
+    """LD heatmap generator for pairwise LD visualization.
+    Creates triangular heatmaps showing pairwise linkage disequilibrium
+    between variants. Supports R² and D' metrics, lead SNP highlighting,
+    and multiple backend renderers.
+    Supports multiple rendering backends:
+    - matplotlib (default): Static publication-quality plots
+    - plotly: Interactive HTML with hover tooltips
+    - bokeh: Interactive HTML for dashboards
+    Args:
+        species: Species name ('canine', 'feline', 'human', or None).
+            Currently unused but kept for API consistency.
+        backend: Plotting backend ('matplotlib', 'plotly', or 'bokeh').
+    Example:
+        >>> plotter = LDHeatmapPlotter()
+        >>> fig = plotter.plot_ld_heatmap(ld_matrix, lead_snp="rs12345")
+        >>> fig.savefig("ld_heatmap.png", dpi=150)
+    """
+    def __init__(
+        self,
+        species: str = "canine",
+        backend: BackendType = "matplotlib",
+    ):
+        """Initialize the LD heatmap plotter."""
+        self.species = species  # Kept for backward compatibility, currently unused
+        self._backend = get_backend(backend)
+        self.backend_name = backend
+    def plot_ld_heatmap(
+        self,
+        ld_matrix: Union[pd.DataFrame, np.ndarray],
+        snp_ids: Optional[List[str]] = None,
+        lead_snp: Optional[str] = None,
+        highlight_snps: Optional[List[str]] = None,
+        metric: str = "r2",
+        figsize: Tuple[float, float] = (8, 8),
+        title: Optional[str] = None,
+        show_colorbar: bool = True,
+    ) -> Any:
+        """Create triangular LD heatmap.
+        Args:
+            ld_matrix: Square DataFrame or numpy array with pairwise LD values.
+                NaN values are displayed as grey (missing data).
+            snp_ids: List of SNP IDs for axis labels. If None, uses matrix index.
+            lead_snp: SNP ID to highlight as lead variant (red highlight).
+            highlight_snps: Additional SNP IDs to highlight (blue highlight).
+            metric: LD metric label for colorbar ("r2" or "dprime").
+            figsize: Figure size as (width, height).
+            title: Plot title.
+            show_colorbar: Whether to show colorbar legend.
+        Returns:
+            Figure object (type depends on backend).
+        Raises:
+            ValueError: If ld_matrix is not square.
+            ValueError: If lead_snp not found in snp_ids.
+            ValueError: If any highlight_snps not found in snp_ids.
+        Example:
+            >>> fig = plotter.plot_ld_heatmap(
+            ...     ld_matrix,
+            ...     snp_ids=["rs1", "rs2", "rs3"],
+            ...     lead_snp="rs1",
+            ...     metric="r2",
+            ... )
+        """
+        # Extract data and snp_ids from DataFrame if needed
+        if isinstance(ld_matrix, pd.DataFrame):
+            data = ld_matrix.values
+            if snp_ids is None:
+                snp_ids = list(ld_matrix.index.astype(str))
+        else:
+            data = np.asarray(ld_matrix)
+            if snp_ids is None:
+                snp_ids = [str(i) for i in range(data.shape[0])]
+        # Validate square matrix
+        if data.ndim != 2 or data.shape[0] != data.shape[1]:
+            raise ValueError(f"ld_matrix must be square, got shape {data.shape}")
+        n_snps = len(snp_ids)
+        if data.shape[0] != n_snps:
+            raise ValueError(
+                f"snp_ids length ({n_snps}) does not match matrix dimension ({data.shape[0]})"
+            )
+        # Validate lead_snp
+        lead_idx = None
+        if lead_snp is not None:
+            if lead_snp not in snp_ids:
+                raise ValueError(f"lead_snp '{lead_snp}' not found in snp_ids")
+            lead_idx = snp_ids.index(lead_snp)
+        # Validate highlight_snps
+        highlight_indices = []
+        if highlight_snps:
+            for snp in highlight_snps:
+                if snp not in snp_ids:
+                    raise ValueError(f"highlight_snp '{snp}' not found in snp_ids")
+                highlight_indices.append(snp_ids.index(snp))
+        # Create figure with single panel
+        fig, axes = self._backend.create_figure(
+            n_panels=1,
+            height_ratios=[1.0],
+            figsize=figsize,
+            sharex=False,
+        )
+        ax = axes[0]
+        # Render triangular heatmap
+        mappable = self._backend.add_heatmap(
+            ax,
+            data=data,
+            x_coords=list(range(n_snps)),
+            y_coords=list(range(n_snps)),
+            cmap_colors=LD_HEATMAP_COLORS,
+            vmin=0.0,
+            vmax=1.0,
+            mask_upper=True,
+        )
+        # Add colorbar
+        if show_colorbar:
+            label = "R²" if metric == "r2" else "D'"
+            self._backend.add_colorbar(ax, mappable, label=label)
+        # Highlight lead SNP
+        if lead_idx is not None:
+            self._highlight_snp(
+                ax=ax,
+                fig=fig,
+                snp_idx=lead_idx,
+                n_snps=n_snps,
+                color=LEAD_SNP_HIGHLIGHT_COLOR,
+            )
+        # Highlight additional SNPs
+        for idx in highlight_indices:
+            self._highlight_snp(
+                ax=ax,
+                fig=fig,
+                snp_idx=idx,
+                n_snps=n_snps,
+                color=SECONDARY_HIGHLIGHT_COLOR,
+            )
+        # Set axis ticks with SNP labels
+        tick_positions = list(range(n_snps))
+        self._backend.set_xticks(ax, tick_positions, snp_ids, rotation=90)
+        self._backend.set_yticks(ax, tick_positions, snp_ids)
+        # Set title
+        if title:
+            self._backend.set_title(ax, title)
+        # Finalize layout
+        self._backend.finalize_layout(fig)
+        return fig
+    def _highlight_snp(
+        self,
+        ax: Any,
+        fig: Any,
+        snp_idx: int,
+        n_snps: int,
+        color: str,
+    ) -> None:
+        """Add visual highlight for a SNP's row/column in the heatmap.
+        Draws rectangle borders around the row and column cells for the
+        given SNP in the lower triangle.
+        Args:
+            ax: Axes object from backend.
+            fig: Figure object from backend.
+            snp_idx: Index of the SNP to highlight.
+            n_snps: Total number of SNPs in the matrix.
+            color: Highlight color.
+        """
+        # Compute all cell positions to highlight (x, y pairs)
+        # Row cells: columns 0 to snp_idx, row = snp_idx
+        row_cells = [(j, snp_idx) for j in range(snp_idx + 1)]
+        # Column cells: column = snp_idx, rows snp_idx+1 to end (skip diagonal)
+        col_cells = [(snp_idx, i) for i in range(snp_idx + 1, n_snps)]
+        all_cells = row_cells + col_cells
+        if self.backend_name == "matplotlib":
+            from matplotlib.patches import Rectangle
+            for x, y in all_cells:
+                rect = Rectangle(
+                    (x - 0.5, y - 0.5),
+                    1.0,
+                    1.0,
+                    fill=False,
+                    edgecolor=color,
+                    linewidth=2,
+                    zorder=10,
+                )
+                ax.add_patch(rect)
+        elif self.backend_name == "plotly":
+            for x, y in all_cells:
+                fig.add_shape(
+                    type="rect",
+                    x0=x - 0.5,
+                    x1=x + 0.5,
+                    y0=y - 0.5,
+                    y1=y + 0.5,
+                    line=dict(color=color, width=2),
+                    fillcolor="rgba(0,0,0,0)",
+                )
+        elif self.backend_name == "bokeh":
+            for x, y in all_cells:
+                ax.rect(
+                    x=x,
+                    y=y,
+                    width=1,
+                    height=1,
+                    fill_alpha=0,
+                    line_color=color,
+                    line_width=2,
+                )

pylocuszoom 1.2.0__py3-none-any.whl → 1.3.1__py3-none-any.whl

pylocuszoom 1.2.0py3-none-any.whl → 1.3.1py3-none-any.whl