PyPI - pylocuszoom - Versions diffs - 0.5.0__py3-none-any.whl → 0.8.0__py3-none-any.whl - Mend

pylocuszoom 0.5.0py3-none-any.whl → 0.8.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

pylocuszoom/__init__.py +38 -2
pylocuszoom/backends/__init__.py +116 -17
pylocuszoom/backends/base.py +424 -35
pylocuszoom/backends/bokeh_backend.py +192 -34
pylocuszoom/backends/hover.py +198 -0
pylocuszoom/backends/matplotlib_backend.py +332 -3
pylocuszoom/backends/plotly_backend.py +187 -38
pylocuszoom/colors.py +41 -0
pylocuszoom/ensembl.py +476 -0
pylocuszoom/eqtl.py +15 -19
pylocuszoom/finemapping.py +17 -26
pylocuszoom/forest.py +35 -0
pylocuszoom/gene_track.py +161 -135
pylocuszoom/loaders.py +38 -18
pylocuszoom/phewas.py +34 -0
pylocuszoom/plotter.py +370 -190
pylocuszoom/recombination.py +64 -34
pylocuszoom/schemas.py +37 -26
pylocuszoom/utils.py +52 -0
pylocuszoom/validation.py +172 -0
{pylocuszoom-0.5.0.dist-info → pylocuszoom-0.8.0.dist-info}/METADATA +97 -28
pylocuszoom-0.8.0.dist-info/RECORD +29 -0
pylocuszoom-0.5.0.dist-info/RECORD +0 -24
{pylocuszoom-0.5.0.dist-info → pylocuszoom-0.8.0.dist-info}/WHEEL +0 -0
{pylocuszoom-0.5.0.dist-info → pylocuszoom-0.8.0.dist-info}/licenses/LICENSE.md +0 -0

pylocuszoom/gene_track.py CHANGED Viewed

@@ -28,6 +28,10 @@ GENE_AREA = 0.25  # Bottom portion for gene drawing
 EXON_HEIGHT = 0.20  # Exon rectangle height
 INTRON_HEIGHT = 0.02  # Thin intron line
+# Arrow dimensions (pre-computed for clarity)
+ARROW_HEIGHT_RATIO = 0.2625  # EXON_HEIGHT * 0.35 * 0.75 (75% of original height)
+ARROW_WIDTH_RATIO = 0.0066  # region_width * 0.006 * 1.1 (10% wider than original)
 def assign_gene_positions(genes_df: pd.DataFrame, start: int, end: int) -> List[int]:
     """Assign row indices to genes to minimize overlap.
@@ -111,6 +115,147 @@ def get_nearest_gene(
     return nearby.loc[nearby["dist"].idxmin(), "gene_name"]
+def _filter_genes_by_region(
+    df: pd.DataFrame, chrom: Union[int, str], start: int, end: int
+) -> pd.DataFrame:
+    """Filter a DataFrame to genes/exons within a genomic region."""
+    chrom_str = normalize_chrom(chrom)
+    return df[
+        (df["chr"].astype(str).str.replace("chr", "", regex=False) == chrom_str)
+        & (df["end"] >= start)
+        & (df["start"] <= end)
+    ].copy()
+def _compute_arrow_geometry(
+    gene_start: int, gene_end: int, region_width: int, strand: str
+) -> tuple[list[float], float, float, str]:
+    """Compute arrow tip positions and dimensions for strand arrows.
+    Returns:
+        Tuple of (arrow_tip_positions, tri_height, tri_width, arrow_color).
+    """
+    tri_height = EXON_HEIGHT * ARROW_HEIGHT_RATIO
+    tri_width = region_width * ARROW_WIDTH_RATIO
+    tip_offset = tri_width / 2
+    tail_offset = tri_width * 1.5
+    gene_center = (gene_start + gene_end) / 2
+    if strand == "+":
+        arrow_tip_positions = [
+            gene_start + tail_offset,
+            gene_center + tri_width / 2,
+            gene_end - tip_offset,
+        ]
+        arrow_color = "#000000"
+    else:
+        arrow_tip_positions = [
+            gene_end - tail_offset,
+            gene_center - tri_width / 2,
+            gene_start + tip_offset,
+        ]
+        arrow_color = "#333333"
+    return arrow_tip_positions, tri_height, tri_width, arrow_color
+def _draw_strand_arrows_matplotlib(
+    ax: Axes,
+    gene: pd.Series,
+    gene_start: int,
+    gene_end: int,
+    y_gene: float,
+    region_width: int,
+) -> None:
+    """Draw strand direction arrows using matplotlib."""
+    strand = gene["strand"]
+    arrow_tip_positions, tri_height, tri_width, arrow_color = _compute_arrow_geometry(
+        gene_start, gene_end, region_width, strand
+    )
+    # Draw connecting line between arrow centers
+    if len(arrow_tip_positions) > 1:
+        ax.plot(
+            [arrow_tip_positions[0], arrow_tip_positions[-1]],
+            [y_gene, y_gene],
+            color=arrow_color,
+            linewidth=1.0,
+            zorder=4,
+            solid_capstyle="butt",
+        )
+    for tip_x in arrow_tip_positions:
+        if strand == "+":
+            base_x = tip_x - tri_width
+        else:
+            base_x = tip_x + tri_width
+        tri_points = [
+            [tip_x, y_gene],
+            [base_x, y_gene + tri_height],
+            [base_x, y_gene - tri_height],
+        ]
+        triangle = Polygon(
+            tri_points,
+            closed=True,
+            facecolor=arrow_color,
+            edgecolor=arrow_color,
+            linewidth=0.5,
+            zorder=5,
+        )
+        ax.add_patch(triangle)
+def _draw_strand_arrows_generic(
+    ax: Any,
+    backend: Any,
+    gene: pd.Series,
+    gene_start: int,
+    gene_end: int,
+    y_gene: float,
+    region_width: int,
+) -> None:
+    """Draw strand direction arrows using a generic backend."""
+    strand = gene["strand"]
+    arrow_tip_positions, tri_height, tri_width, arrow_color = _compute_arrow_geometry(
+        gene_start, gene_end, region_width, strand
+    )
+    # Draw connecting line between arrow centers
+    if len(arrow_tip_positions) > 1:
+        backend.line(
+            ax,
+            x=pd.Series([arrow_tip_positions[0], arrow_tip_positions[-1]]),
+            y=pd.Series([y_gene, y_gene]),
+            color=arrow_color,
+            linewidth=1.0,
+            zorder=4,
+        )
+    for tip_x in arrow_tip_positions:
+        if strand == "+":
+            base_x = tip_x - tri_width
+        else:
+            base_x = tip_x + tri_width
+        tri_points = [
+            [tip_x, y_gene],
+            [base_x, y_gene + tri_height],
+            [base_x, y_gene - tri_height],
+        ]
+        backend.add_polygon(
+            ax,
+            tri_points,
+            facecolor=arrow_color,
+            edgecolor=arrow_color,
+            linewidth=0.5,
+            zorder=5,
+        )
 def plot_gene_track(
     ax: Axes,
     genes_df: pd.DataFrame,
@@ -137,12 +282,7 @@ def plot_gene_track(
         exons_df: Exon annotations with chr, start, end, gene_name
             columns for drawing exon structure. Optional.
     """
-    chrom_str = normalize_chrom(chrom)
-    region_genes = genes_df[
-        (genes_df["chr"].astype(str).str.replace("chr", "", regex=False) == chrom_str)
-        & (genes_df["end"] >= start)
-        & (genes_df["start"] <= end)
-    ].copy()
+    region_genes = _filter_genes_by_region(genes_df, chrom, start, end)
     ax.set_xlim(start, end)
     ax.set_ylabel("")
@@ -178,20 +318,13 @@ def plot_gene_track(
     top_margin = 0.05  # Minimal space above top label
     ax.set_ylim(
         -bottom_margin,
-        (max_row + 1) * ROW_HEIGHT - ROW_HEIGHT + GENE_AREA + top_margin,
+        max_row * ROW_HEIGHT + GENE_AREA + top_margin,
     )
     # Filter exons for this region if available
     region_exons = None
     if exons_df is not None and not exons_df.empty:
-        region_exons = exons_df[
-            (
-                exons_df["chr"].astype(str).str.replace("chr", "", regex=False)
-                == chrom_str
-            )
-            & (exons_df["end"] >= start)
-            & (exons_df["start"] <= end)
-        ].copy()
+        region_exons = _filter_genes_by_region(exons_df, chrom, start, end)
     region_width = end - start
@@ -257,59 +390,11 @@ def plot_gene_track(
                 )
             )
-        # Add strand direction triangles (tip, center, tail)
+        # Add strand direction triangles
         if "strand" in gene.index:
-            strand = gene["strand"]
-            arrow_dir = 1 if strand == "+" else -1
-            # Triangle dimensions
-            tri_height = EXON_HEIGHT * 0.35
-            tri_width = region_width * 0.006
-            # Arrow positions: front, middle, back (tip positions)
-            tip_offset = tri_width / 2  # Tiny offset to keep tip inside gene
-            tail_offset = tri_width * 1.5  # Offset for tail arrow from gene start/end
-            gene_center = (gene_start + gene_end) / 2
-            if arrow_dir == 1:  # Forward strand
-                arrow_tip_positions = [
-                    gene_start + tail_offset,  # Tail (tip inside gene)
-                    gene_center + tri_width / 2,  # Middle (arrow center at gene center)
-                    gene_end - tip_offset,  # Tip (near gene end)
-                ]
-                arrow_color = "#000000"  # Black for forward
-            else:  # Reverse strand
-                arrow_tip_positions = [
-                    gene_end - tail_offset,  # Tail (tip inside gene)
-                    gene_center - tri_width / 2,  # Middle (arrow center at gene center)
-                    gene_start + tip_offset,  # Tip (near gene start)
-                ]
-                arrow_color = "#333333"  # Dark grey for reverse
-            for tip_x in arrow_tip_positions:
-                if arrow_dir == 1:
-                    base_x = tip_x - tri_width
-                    tri_points = [
-                        [tip_x, y_gene],  # Tip pointing right
-                        [base_x, y_gene + tri_height],
-                        [base_x, y_gene - tri_height],
-                    ]
-                else:
-                    base_x = tip_x + tri_width
-                    tri_points = [
-                        [tip_x, y_gene],  # Tip pointing left
-                        [base_x, y_gene + tri_height],
-                        [base_x, y_gene - tri_height],
-                    ]
-                triangle = Polygon(
-                    tri_points,
-                    closed=True,
-                    facecolor=arrow_color,
-                    edgecolor=arrow_color,
-                    linewidth=0.5,
-                    zorder=5,
-                )
-                ax.add_patch(triangle)
+            _draw_strand_arrows_matplotlib(
+                ax, gene, gene_start, gene_end, y_gene, region_width
+            )
         # Add gene name label in the gap above gene
         if gene_name:
@@ -320,7 +405,7 @@ def plot_gene_track(
                 gene_name,
                 ha="center",
                 va="bottom",
-                fontsize=5.5,
+                fontsize=7,
                 color="#000000",
                 fontweight="medium",
                 style="italic",
@@ -353,12 +438,7 @@ def plot_gene_track_generic(
         exons_df: Exon annotations with chr, start, end, gene_name
             columns for drawing exon structure. Optional.
     """
-    chrom_str = normalize_chrom(chrom)
-    region_genes = genes_df[
-        (genes_df["chr"].astype(str).str.replace("chr", "", regex=False) == chrom_str)
-        & (genes_df["end"] >= start)
-        & (genes_df["start"] <= end)
-    ].copy()
+    region_genes = _filter_genes_by_region(genes_df, chrom, start, end)
     backend.set_xlim(ax, start, end)
     backend.set_ylabel(ax, "", fontsize=10)
@@ -389,20 +469,13 @@ def plot_gene_track_generic(
     backend.set_ylim(
         ax,
         -bottom_margin,
-        (max_row + 1) * ROW_HEIGHT - ROW_HEIGHT + GENE_AREA + top_margin,
+        max_row * ROW_HEIGHT + GENE_AREA + top_margin,
     )
     # Filter exons for this region if available
     region_exons = None
     if exons_df is not None and not exons_df.empty:
-        region_exons = exons_df[
-            (
-                exons_df["chr"].astype(str).str.replace("chr", "", regex=False)
-                == chrom_str
-            )
-            & (exons_df["end"] >= start)
-            & (exons_df["start"] <= end)
-        ].copy()
+        region_exons = _filter_genes_by_region(exons_df, chrom, start, end)
     region_width = end - start
@@ -465,58 +538,11 @@ def plot_gene_track_generic(
                 zorder=2,
             )
-        # Add strand direction triangles (tip, center, tail)
+        # Add strand direction triangles
         if "strand" in gene.index:
-            strand = gene["strand"]
-            arrow_dir = 1 if strand == "+" else -1
-            # Triangle dimensions
-            tri_height = EXON_HEIGHT * 0.35
-            tri_width = region_width * 0.006
-            # Arrow positions: front, middle, back (tip positions)
-            tip_offset = tri_width / 2  # Tiny offset to keep tip inside gene
-            tail_offset = tri_width * 1.5  # Offset for tail arrow from gene start/end
-            gene_center = (gene_start + gene_end) / 2
-            if arrow_dir == 1:  # Forward strand
-                arrow_tip_positions = [
-                    gene_start + tail_offset,  # Tail (tip inside gene)
-                    gene_center + tri_width / 2,  # Middle (arrow center at gene center)
-                    gene_end - tip_offset,  # Tip (near gene end)
-                ]
-                arrow_color = "#000000"  # Black for forward
-            else:  # Reverse strand
-                arrow_tip_positions = [
-                    gene_end - tail_offset,  # Tail (tip inside gene)
-                    gene_center - tri_width / 2,  # Middle (arrow center at gene center)
-                    gene_start + tip_offset,  # Tip (near gene start)
-                ]
-                arrow_color = "#333333"  # Dark grey for reverse
-            for tip_x in arrow_tip_positions:
-                if arrow_dir == 1:
-                    base_x = tip_x - tri_width
-                    tri_points = [
-                        [tip_x, y_gene],  # Tip pointing right
-                        [base_x, y_gene + tri_height],
-                        [base_x, y_gene - tri_height],
-                    ]
-                else:
-                    base_x = tip_x + tri_width
-                    tri_points = [
-                        [tip_x, y_gene],  # Tip pointing left
-                        [base_x, y_gene + tri_height],
-                        [base_x, y_gene - tri_height],
-                    ]
-                backend.add_polygon(
-                    ax,
-                    tri_points,
-                    facecolor=arrow_color,
-                    edgecolor=arrow_color,
-                    linewidth=0.5,
-                    zorder=5,
-                )
+            _draw_strand_arrows_generic(
+                ax, backend, gene, gene_start, gene_end, y_gene, region_width
+            )
         # Add gene name label in the gap above gene
         if gene_name:
@@ -526,7 +552,7 @@ def plot_gene_track_generic(
                 label_pos,
                 y_label,
                 gene_name,
-                fontsize=6,
+                fontsize=7,
                 ha="center",
                 va="bottom",
                 color="#000000",

pylocuszoom/loaders.py CHANGED Viewed

@@ -260,10 +260,14 @@ def load_saige(
         "POS": pos_col,
         "MarkerID": rs_col,
         "CHR": "chr",
-        "p.value": p_col,
-        "p.value.NA": p_col,  # SPA-adjusted
     }
+    # Prefer SPA-adjusted p-value (p.value.NA) over raw p.value when both present
+    if "p.value.NA" in df.columns:
+        col_map["p.value.NA"] = p_col
+    elif "p.value" in df.columns:
+        col_map["p.value"] = p_col
     df = df.rename(columns=col_map)
     logger.debug(f"Loaded SAIGE file with {len(df)} variants")
     validate_gwas_dataframe(df, pos_col=pos_col, p_col=p_col, rs_col=rs_col)
@@ -318,7 +322,7 @@ def load_gtex_eqtl(
         gene: Optional gene to filter to (ENSG ID or gene symbol).
     Returns:
-        DataFrame with columns: pos, p_value, gene, effect.
+        DataFrame with columns: pos, p_value, gene, effect_size.
     Example:
         >>> eqtl_df = load_gtex_eqtl("GTEx_Analysis.signif_pairs.txt.gz", gene="BRCA1")
@@ -351,10 +355,10 @@ def load_gtex_eqtl(
             col_map[col] = "gene"
             break
-    # Effect size (slope)
+    # Effect size (slope) - standardize to effect_size for plotting compatibility
     for col in ["slope", "beta", "effect_size"]:
         if col in df.columns:
-            col_map[col] = "effect"
+            col_map[col] = "effect_size"
             break
     df = df.rename(columns=col_map)
@@ -385,7 +389,7 @@ def load_eqtl_catalogue(
         gene: Optional gene to filter to.
     Returns:
-        DataFrame with columns: pos, p_value, gene, effect.
+        DataFrame with columns: pos, p_value, gene, effect_size.
     """
     df = pd.read_csv(filepath, sep="\t")
@@ -393,7 +397,7 @@ def load_eqtl_catalogue(
         "position": "pos",
         "pvalue": "p_value",
         "gene_id": "gene",
-        "beta": "effect",
+        "beta": "effect_size",  # Standardize to effect_size for plotter
         "chromosome": "chr",
     }
@@ -422,7 +426,7 @@ def load_matrixeqtl(
         gene: Optional gene to filter to.
     Returns:
-        DataFrame with columns: pos, p_value, gene, effect.
+        DataFrame with columns: pos, p_value, gene, effect_size.
     Note:
         MatrixEQTL output doesn't include position by default.
@@ -435,7 +439,7 @@ def load_matrixeqtl(
         "gene": "gene",
         "p-value": "p_value",
         "pvalue": "p_value",
-        "beta": "effect",
+        "beta": "effect_size",  # Standardize to effect_size for plotter
         "t-stat": "t_stat",
     }
@@ -725,14 +729,28 @@ def load_bed(
     # Assign column names if no header
     if not has_header:
         n_cols = len(df.columns)
-        col_names = ["chr", "start", "end"]
-        if n_cols >= 4:
-            col_names.append("gene_name")
-        if n_cols >= 5:
-            col_names.append("score")
-        if n_cols >= 6:
-            col_names.append("strand")
-        df.columns = col_names[:n_cols]
+        # Standard BED column names (up to BED12)
+        bed_col_names = [
+            "chr",
+            "start",
+            "end",
+            "gene_name",
+            "score",
+            "strand",
+            "thickStart",
+            "thickEnd",
+            "itemRgb",
+            "blockCount",
+            "blockSizes",
+            "blockStarts",
+        ]
+        # Use standard names for known columns, generic for extras
+        if n_cols <= len(bed_col_names):
+            df.columns = bed_col_names[:n_cols]
+        else:
+            # More columns than BED12 - use known names + generic
+            extra_cols = [f"col{i}" for i in range(len(bed_col_names), n_cols)]
+            df.columns = bed_col_names + extra_cols
     # Standardize column names if header was present
     col_map = {
@@ -859,4 +877,6 @@ def load_gwas(
     if format not in loaders:
         raise ValueError(f"Unknown format '{format}'. Options: {list(loaders.keys())}")
-    return loaders[format](filepath, pos_col=pos_col, p_col=p_col, rs_col=rs_col)
+    return loaders[format](
+        filepath, pos_col=pos_col, p_col=p_col, rs_col=rs_col, **kwargs
+    )

pylocuszoom/phewas.py ADDED Viewed

@@ -0,0 +1,34 @@
+"""PheWAS data validation and preparation.
+Validates and prepares phenome-wide association study data for plotting.
+"""
+import pandas as pd
+from .validation import DataFrameValidator
+def validate_phewas_df(
+    df: pd.DataFrame,
+    phenotype_col: str = "phenotype",
+    p_col: str = "p_value",
+    category_col: str = "category",
+) -> None:
+    """Validate PheWAS DataFrame has required columns and types.
+    Args:
+        df: PheWAS results DataFrame.
+        phenotype_col: Column name for phenotype names.
+        p_col: Column name for p-values.
+        category_col: Column name for phenotype categories (optional).
+    Raises:
+        ValidationError: If required columns are missing or have invalid types.
+    """
+    (
+        DataFrameValidator(df, "PheWAS DataFrame")
+        .require_columns([phenotype_col, p_col])
+        .require_numeric([p_col])
+        .require_range(p_col, min_val=0, max_val=1, exclusive_min=True)
+        .validate()
+    )

pylocuszoom 0.5.0__py3-none-any.whl → 0.8.0__py3-none-any.whl

pylocuszoom 0.5.0py3-none-any.whl → 0.8.0py3-none-any.whl