PyPI - pylocuszoom - Versions diffs - 0.1.0__py3-none-any.whl → 0.3.0__py3-none-any.whl - Mend

pylocuszoom 0.1.0py3-none-any.whl → 0.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

pylocuszoom/__init__.py +39 -20
pylocuszoom/backends/__init__.py +1 -5
pylocuszoom/backends/base.py +3 -1
pylocuszoom/backends/bokeh_backend.py +220 -51
pylocuszoom/backends/matplotlib_backend.py +35 -8
pylocuszoom/backends/plotly_backend.py +273 -32
pylocuszoom/colors.py +132 -0
pylocuszoom/eqtl.py +3 -2
pylocuszoom/finemapping.py +223 -0
pylocuszoom/gene_track.py +259 -38
pylocuszoom/labels.py +32 -33
pylocuszoom/ld.py +8 -7
pylocuszoom/plotter.py +615 -162
pylocuszoom/recombination.py +14 -14
pylocuszoom/utils.py +3 -1
{pylocuszoom-0.1.0.dist-info → pylocuszoom-0.3.0.dist-info}/METADATA +36 -27
pylocuszoom-0.3.0.dist-info/RECORD +21 -0
pylocuszoom-0.1.0.dist-info/RECORD +0 -20
{pylocuszoom-0.1.0.dist-info → pylocuszoom-0.3.0.dist-info}/WHEEL +0 -0
{pylocuszoom-0.1.0.dist-info → pylocuszoom-0.3.0.dist-info}/licenses/LICENSE.md +0 -0

pylocuszoom/finemapping.py ADDED Viewed

@@ -0,0 +1,223 @@
+"""Fine-mapping/SuSiE data handling for pyLocusZoom.
+Provides utilities for loading, validating, and preparing statistical
+fine-mapping results (SuSiE, FINEMAP, etc.) for visualization.
+"""
+from typing import List, Optional
+import pandas as pd
+from .logging import logger
+# Required columns for fine-mapping data
+REQUIRED_FINEMAPPING_COLS = ["pos", "pip"]
+OPTIONAL_FINEMAPPING_COLS = ["rs", "cs", "cs_id", "effect", "se"]
+class FinemappingValidationError(ValueError):
+    """Raised when fine-mapping DataFrame validation fails."""
+    pass
+def validate_finemapping_df(
+    df: pd.DataFrame,
+    pos_col: str = "pos",
+    pip_col: str = "pip",
+) -> None:
+    """Validate fine-mapping DataFrame has required columns.
+    Args:
+        df: Fine-mapping DataFrame to validate.
+        pos_col: Column name for genomic position.
+        pip_col: Column name for posterior inclusion probability.
+    Raises:
+        FinemappingValidationError: If required columns are missing.
+    """
+    missing = []
+    if pos_col not in df.columns:
+        missing.append(pos_col)
+    if pip_col not in df.columns:
+        missing.append(pip_col)
+    if missing:
+        raise FinemappingValidationError(
+            f"Fine-mapping DataFrame missing required columns: {missing}. "
+            f"Required: {pos_col} (position), {pip_col} (posterior inclusion probability)"
+        )
+    # Validate PIP values are in [0, 1]
+    if not df[pip_col].between(0, 1).all():
+        invalid_count = (~df[pip_col].between(0, 1)).sum()
+        raise FinemappingValidationError(
+            f"PIP values must be between 0 and 1. Found {invalid_count} invalid values."
+        )
+def filter_finemapping_by_region(
+    df: pd.DataFrame,
+    chrom: int,
+    start: int,
+    end: int,
+    pos_col: str = "pos",
+    chrom_col: Optional[str] = "chr",
+) -> pd.DataFrame:
+    """Filter fine-mapping data to a genomic region.
+    Args:
+        df: Fine-mapping DataFrame.
+        chrom: Chromosome number.
+        start: Start position.
+        end: End position.
+        pos_col: Column name for position.
+        chrom_col: Column name for chromosome (if present).
+    Returns:
+        Filtered DataFrame containing only variants in the region.
+    """
+    mask = (df[pos_col] >= start) & (df[pos_col] <= end)
+    # Filter by chromosome if column exists
+    if chrom_col and chrom_col in df.columns:
+        chrom_str = str(chrom).replace("chr", "")
+        df_chrom = df[chrom_col].astype(str).str.replace("chr", "", regex=False)
+        mask = mask & (df_chrom == chrom_str)
+    filtered = df[mask].copy()
+    logger.debug(
+        f"Filtered fine-mapping data to {len(filtered)} variants in region "
+        f"chr{chrom}:{start}-{end}"
+    )
+    return filtered
+def get_credible_sets(
+    df: pd.DataFrame,
+    cs_col: str = "cs",
+) -> List[int]:
+    """Get list of unique credible set IDs.
+    Args:
+        df: Fine-mapping DataFrame.
+        cs_col: Column containing credible set assignments.
+    Returns:
+        Sorted list of unique credible set IDs (excluding 0/NA).
+    """
+    if cs_col not in df.columns:
+        return []
+    # Filter out variants not in a credible set (typically cs=0 or NA)
+    cs_values = df[cs_col].dropna()
+    cs_values = cs_values[cs_values != 0]
+    return sorted(cs_values.unique().tolist())
+def filter_by_credible_set(
+    df: pd.DataFrame,
+    cs_id: int,
+    cs_col: str = "cs",
+) -> pd.DataFrame:
+    """Filter to variants in a specific credible set.
+    Args:
+        df: Fine-mapping DataFrame.
+        cs_id: Credible set ID to filter for.
+        cs_col: Column containing credible set assignments.
+    Returns:
+        Filtered DataFrame containing only variants in the credible set.
+    """
+    if cs_col not in df.columns:
+        raise FinemappingValidationError(
+            f"Cannot filter by credible set: column '{cs_col}' not found. "
+            f"Available columns: {list(df.columns)}"
+        )
+    return df[df[cs_col] == cs_id].copy()
+def prepare_finemapping_for_plotting(
+    df: pd.DataFrame,
+    pos_col: str = "pos",
+    pip_col: str = "pip",
+    chrom: Optional[int] = None,
+    start: Optional[int] = None,
+    end: Optional[int] = None,
+) -> pd.DataFrame:
+    """Prepare fine-mapping data for plotting.
+    Validates, filters, and sorts data for plotting as a line or scatter.
+    Args:
+        df: Raw fine-mapping DataFrame.
+        pos_col: Column name for position.
+        pip_col: Column name for PIP.
+        chrom: Optional chromosome for region filtering.
+        start: Optional start position for region filtering.
+        end: Optional end position for region filtering.
+    Returns:
+        Prepared DataFrame sorted by position.
+    """
+    validate_finemapping_df(df, pos_col=pos_col, pip_col=pip_col)
+    result = df.copy()
+    # Filter by region if specified
+    if chrom is not None and start is not None and end is not None:
+        result = filter_finemapping_by_region(
+            result, chrom, start, end, pos_col=pos_col
+        )
+    # Sort by position for line plotting
+    result = result.sort_values(pos_col)
+    return result
+def get_top_pip_variants(
+    df: pd.DataFrame,
+    n: int = 5,
+    pip_col: str = "pip",
+    pip_threshold: float = 0.0,
+) -> pd.DataFrame:
+    """Get top variants by posterior inclusion probability.
+    Args:
+        df: Fine-mapping DataFrame.
+        n: Number of top variants to return.
+        pip_col: Column containing PIP values.
+        pip_threshold: Minimum PIP threshold.
+    Returns:
+        DataFrame with top N variants by PIP.
+    """
+    filtered = df[df[pip_col] >= pip_threshold]
+    return filtered.nlargest(n, pip_col)
+def calculate_credible_set_coverage(
+    df: pd.DataFrame,
+    cs_col: str = "cs",
+    pip_col: str = "pip",
+) -> dict:
+    """Calculate cumulative PIP for each credible set.
+    Args:
+        df: Fine-mapping DataFrame.
+        cs_col: Column containing credible set assignments.
+        pip_col: Column containing PIP values.
+    Returns:
+        Dictionary mapping credible set ID to cumulative PIP.
+    """
+    if cs_col not in df.columns:
+        return {}
+    coverage = {}
+    for cs_id in get_credible_sets(df, cs_col):
+        cs_data = filter_by_credible_set(df, cs_id, cs_col)
+        coverage[cs_id] = cs_data[pip_col].sum()
+    return coverage

pylocuszoom/gene_track.py CHANGED Viewed

@@ -7,7 +7,7 @@ Provides LocusZoom-style gene track plotting with:
 - Gene name labels
 """
-from typing import List, Optional, Union
+from typing import Any, List, Optional, Union
 import pandas as pd
 from matplotlib.axes import Axes
@@ -15,17 +15,17 @@ from matplotlib.patches import Polygon, Rectangle
 from .utils import normalize_chrom
-# Strand-specific colors (bold, distinct)
+# Strand-specific colors (distinct from LD palette)
 STRAND_COLORS: dict[Optional[str], str] = {
-    "+": "#6A3D9A",  # Bold purple for forward strand
-    "-": "#1F78B4",  # Bold teal/blue for reverse strand
-    None: "#666666",  # Grey if no strand info
+    "+": "#DAA520",  # Goldenrod for forward strand
+    "-": "#6BB3FF",  # Light blue for reverse strand
+    None: "#999999",  # Light grey if no strand info
 }
 # Layout constants
-ROW_HEIGHT = 0.40  # Total height per row
-GENE_AREA = 0.28  # Bottom portion for gene drawing
-EXON_HEIGHT = 0.22  # Exon rectangle height
+ROW_HEIGHT = 0.35  # Total height per row (reduced for tighter spacing)
+GENE_AREA = 0.25  # Bottom portion for gene drawing
+EXON_HEIGHT = 0.20  # Exon rectangle height
 INTRON_HEIGHT = 0.02  # Thin intron line
@@ -145,7 +145,7 @@ def plot_gene_track(
     ].copy()
     ax.set_xlim(start, end)
-    ax.set_ylabel("Genes", fontsize=10)
+    ax.set_ylabel("")
     ax.set_yticks([])
     # theme_classic: only bottom spine
@@ -175,7 +175,7 @@ def plot_gene_track(
     # Set y-axis limits - small bottom margin for gene body, tight top
     max_row = max(positions) if positions else 0
     bottom_margin = EXON_HEIGHT / 2 + 0.02  # Room for bottom gene
-    top_margin = 0.15  # Small space above top label
+    top_margin = 0.05  # Minimal space above top label
     ax.set_ylim(
         -bottom_margin,
         (max_row + 1) * ROW_HEIGHT - ROW_HEIGHT + GENE_AREA + top_margin,
@@ -193,6 +193,8 @@ def plot_gene_track(
             & (exons_df["start"] <= end)
         ].copy()
+    region_width = end - start
     for idx, (_, gene) in enumerate(region_genes.iterrows()):
         gene_start = max(int(gene["start"]), start)
         gene_end = min(int(gene["end"]), end)
@@ -255,43 +257,59 @@ def plot_gene_track(
                 )
             )
-        # Add strand direction triangle at gene tip
+        # Add strand direction triangles (tip, center, tail)
         if "strand" in gene.index:
             strand = gene["strand"]
-            region_width = end - start
             arrow_dir = 1 if strand == "+" else -1
-            # Triangle dimensions - whole arrow past gene end
+            # Triangle dimensions
             tri_height = EXON_HEIGHT * 0.35
             tri_width = region_width * 0.006
-            # Triangle entirely past gene tip
-            if arrow_dir == 1:  # Forward strand: arrow starts at gene end
-                base_x = gene_end
-                tip_x = base_x + tri_width
-                tri_points = [
-                    [tip_x, y_gene],  # Tip pointing right
-                    [base_x, y_gene + tri_height],
-                    [base_x, y_gene - tri_height],
+            # Arrow positions: front, middle, back (tip positions)
+            tip_offset = tri_width / 2  # Tiny offset to keep tip inside gene
+            tail_offset = tri_width * 1.5  # Offset for tail arrow from gene start/end
+            gene_center = (gene_start + gene_end) / 2
+            if arrow_dir == 1:  # Forward strand
+                arrow_tip_positions = [
+                    gene_start + tail_offset,  # Tail (tip inside gene)
+                    gene_center + tri_width / 2,  # Middle (arrow center at gene center)
+                    gene_end - tip_offset,  # Tip (near gene end)
                 ]
-            else:  # Reverse strand: arrow starts at gene start
-                base_x = gene_start
-                tip_x = base_x - tri_width
-                tri_points = [
-                    [tip_x, y_gene],  # Tip pointing left
-                    [base_x, y_gene + tri_height],
-                    [base_x, y_gene - tri_height],
+                arrow_color = "#000000"  # Black for forward
+            else:  # Reverse strand
+                arrow_tip_positions = [
+                    gene_end - tail_offset,  # Tail (tip inside gene)
+                    gene_center - tri_width / 2,  # Middle (arrow center at gene center)
+                    gene_start + tip_offset,  # Tip (near gene start)
                 ]
-            triangle = Polygon(
-                tri_points,
-                closed=True,
-                facecolor="black",
-                edgecolor="black",
-                linewidth=0.5,
-                zorder=5,
-            )
-            ax.add_patch(triangle)
+                arrow_color = "#333333"  # Dark grey for reverse
+            for tip_x in arrow_tip_positions:
+                if arrow_dir == 1:
+                    base_x = tip_x - tri_width
+                    tri_points = [
+                        [tip_x, y_gene],  # Tip pointing right
+                        [base_x, y_gene + tri_height],
+                        [base_x, y_gene - tri_height],
+                    ]
+                else:
+                    base_x = tip_x + tri_width
+                    tri_points = [
+                        [tip_x, y_gene],  # Tip pointing left
+                        [base_x, y_gene + tri_height],
+                        [base_x, y_gene - tri_height],
+                    ]
+                triangle = Polygon(
+                    tri_points,
+                    closed=True,
+                    facecolor=arrow_color,
+                    edgecolor=arrow_color,
+                    linewidth=0.5,
+                    zorder=5,
+                )
+                ax.add_patch(triangle)
         # Add gene name label in the gap above gene
         if gene_name:
@@ -309,3 +327,206 @@ def plot_gene_track(
                 zorder=4,
                 clip_on=True,
             )
+def plot_gene_track_generic(
+    ax: Any,
+    backend: Any,
+    genes_df: pd.DataFrame,
+    chrom: Union[int, str],
+    start: int,
+    end: int,
+    exons_df: Optional[pd.DataFrame] = None,
+) -> None:
+    """Plot gene annotations using a backend-agnostic approach.
+    This function works with matplotlib, plotly, and bokeh backends.
+    Args:
+        ax: Axes object (format depends on backend).
+        backend: Backend instance with drawing methods.
+        genes_df: Gene annotations with chr, start, end, gene_name,
+            and optionally strand (+/-) column.
+        chrom: Chromosome number or string.
+        start: Region start position.
+        end: Region end position.
+        exons_df: Exon annotations with chr, start, end, gene_name
+            columns for drawing exon structure. Optional.
+    """
+    chrom_str = normalize_chrom(chrom)
+    region_genes = genes_df[
+        (genes_df["chr"].astype(str).str.replace("chr", "", regex=False) == chrom_str)
+        & (genes_df["end"] >= start)
+        & (genes_df["start"] <= end)
+    ].copy()
+    backend.set_xlim(ax, start, end)
+    backend.set_ylabel(ax, "", fontsize=10)
+    if region_genes.empty:
+        backend.set_ylim(ax, 0, 1)
+        backend.add_text(
+            ax,
+            (start + end) / 2,
+            0.5,
+            "No genes",
+            fontsize=9,
+            ha="center",
+            va="center",
+            color="grey",
+        )
+        return
+    # Assign vertical positions to avoid overlap
+    region_genes = region_genes.sort_values("start")
+    positions = assign_gene_positions(region_genes, start, end)
+    # Set y-axis limits - small bottom margin for gene body, tight top
+    max_row = max(positions) if positions else 0
+    bottom_margin = EXON_HEIGHT / 2 + 0.02  # Room for bottom gene
+    top_margin = 0.05  # Minimal space above top label
+    backend.set_ylim(
+        ax,
+        -bottom_margin,
+        (max_row + 1) * ROW_HEIGHT - ROW_HEIGHT + GENE_AREA + top_margin,
+    )
+    # Filter exons for this region if available
+    region_exons = None
+    if exons_df is not None and not exons_df.empty:
+        region_exons = exons_df[
+            (
+                exons_df["chr"].astype(str).str.replace("chr", "", regex=False)
+                == chrom_str
+            )
+            & (exons_df["end"] >= start)
+            & (exons_df["start"] <= end)
+        ].copy()
+    region_width = end - start
+    for idx, (_, gene) in enumerate(region_genes.iterrows()):
+        gene_start = max(int(gene["start"]), start)
+        gene_end = min(int(gene["end"]), end)
+        row = positions[idx]
+        gene_name = gene.get("gene_name", "")
+        # Get strand-specific color
+        strand = gene.get("strand") if "strand" in gene.index else None
+        gene_col = STRAND_COLORS.get(strand, STRAND_COLORS[None])
+        # Y position: bottom of row + offset for gene area
+        y_gene = row * ROW_HEIGHT + 0.05
+        y_label = y_gene + EXON_HEIGHT / 2 + 0.01  # Just above gene top
+        # Check if we have exon data for this gene
+        gene_exons = None
+        if region_exons is not None and not region_exons.empty and gene_name:
+            gene_exons = region_exons[region_exons["gene_name"] == gene_name].copy()
+        if gene_exons is not None and not gene_exons.empty:
+            # Draw intron line (thin horizontal line spanning gene)
+            backend.add_rectangle(
+                ax,
+                (gene_start, y_gene - INTRON_HEIGHT / 2),
+                gene_end - gene_start,
+                INTRON_HEIGHT,
+                facecolor=gene_col,
+                edgecolor=gene_col,
+                linewidth=0.5,
+                zorder=1,
+            )
+            # Draw exons (thick rectangles)
+            for _, exon in gene_exons.iterrows():
+                exon_start = max(int(exon["start"]), start)
+                exon_end = min(int(exon["end"]), end)
+                backend.add_rectangle(
+                    ax,
+                    (exon_start, y_gene - EXON_HEIGHT / 2),
+                    exon_end - exon_start,
+                    EXON_HEIGHT,
+                    facecolor=gene_col,
+                    edgecolor=gene_col,
+                    linewidth=0.5,
+                    zorder=2,
+                )
+        else:
+            # No exon data - draw full gene body as rectangle (fallback)
+            backend.add_rectangle(
+                ax,
+                (gene_start, y_gene - EXON_HEIGHT / 2),
+                gene_end - gene_start,
+                EXON_HEIGHT,
+                facecolor=gene_col,
+                edgecolor=gene_col,
+                linewidth=0.5,
+                zorder=2,
+            )
+        # Add strand direction triangles (tip, center, tail)
+        if "strand" in gene.index:
+            strand = gene["strand"]
+            arrow_dir = 1 if strand == "+" else -1
+            # Triangle dimensions
+            tri_height = EXON_HEIGHT * 0.35
+            tri_width = region_width * 0.006
+            # Arrow positions: front, middle, back (tip positions)
+            tip_offset = tri_width / 2  # Tiny offset to keep tip inside gene
+            tail_offset = tri_width * 1.5  # Offset for tail arrow from gene start/end
+            gene_center = (gene_start + gene_end) / 2
+            if arrow_dir == 1:  # Forward strand
+                arrow_tip_positions = [
+                    gene_start + tail_offset,  # Tail (tip inside gene)
+                    gene_center + tri_width / 2,  # Middle (arrow center at gene center)
+                    gene_end - tip_offset,  # Tip (near gene end)
+                ]
+                arrow_color = "#000000"  # Black for forward
+            else:  # Reverse strand
+                arrow_tip_positions = [
+                    gene_end - tail_offset,  # Tail (tip inside gene)
+                    gene_center - tri_width / 2,  # Middle (arrow center at gene center)
+                    gene_start + tip_offset,  # Tip (near gene start)
+                ]
+                arrow_color = "#333333"  # Dark grey for reverse
+            for tip_x in arrow_tip_positions:
+                if arrow_dir == 1:
+                    base_x = tip_x - tri_width
+                    tri_points = [
+                        [tip_x, y_gene],  # Tip pointing right
+                        [base_x, y_gene + tri_height],
+                        [base_x, y_gene - tri_height],
+                    ]
+                else:
+                    base_x = tip_x + tri_width
+                    tri_points = [
+                        [tip_x, y_gene],  # Tip pointing left
+                        [base_x, y_gene + tri_height],
+                        [base_x, y_gene - tri_height],
+                    ]
+                backend.add_polygon(
+                    ax,
+                    tri_points,
+                    facecolor=arrow_color,
+                    edgecolor=arrow_color,
+                    linewidth=0.5,
+                    zorder=5,
+                )
+        # Add gene name label in the gap above gene
+        if gene_name:
+            label_pos = (gene_start + gene_end) / 2
+            backend.add_text(
+                ax,
+                label_pos,
+                y_label,
+                gene_name,
+                fontsize=6,
+                ha="center",
+                va="bottom",
+                color="#000000",
+            )

pylocuszoom 0.1.0__py3-none-any.whl → 0.3.0__py3-none-any.whl

pylocuszoom 0.1.0py3-none-any.whl → 0.3.0py3-none-any.whl