PyPI - pylocuszoom - Versions diffs - 0.3.0__py3-none-any.whl → 0.6.0__py3-none-any.whl - Mend

pylocuszoom 0.3.0py3-none-any.whl → 0.6.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

pylocuszoom/__init__.py +74 -2
pylocuszoom/backends/base.py +131 -0
pylocuszoom/backends/bokeh_backend.py +254 -68
pylocuszoom/backends/matplotlib_backend.py +173 -0
pylocuszoom/backends/plotly_backend.py +327 -87
pylocuszoom/colors.py +44 -1
pylocuszoom/forest.py +37 -0
pylocuszoom/gene_track.py +1 -0
pylocuszoom/loaders.py +880 -0
pylocuszoom/phewas.py +35 -0
pylocuszoom/plotter.py +342 -117
pylocuszoom/py.typed +0 -0
pylocuszoom/recombination.py +49 -35
pylocuszoom/schemas.py +406 -0
{pylocuszoom-0.3.0.dist-info → pylocuszoom-0.6.0.dist-info}/METADATA +153 -25
pylocuszoom-0.6.0.dist-info/RECORD +26 -0
pylocuszoom-0.3.0.dist-info/RECORD +0 -21
{pylocuszoom-0.3.0.dist-info → pylocuszoom-0.6.0.dist-info}/WHEEL +0 -0
{pylocuszoom-0.3.0.dist-info → pylocuszoom-0.6.0.dist-info}/licenses/LICENSE.md +0 -0

pylocuszoom/phewas.py ADDED Viewed

@@ -0,0 +1,35 @@
+"""PheWAS data validation and preparation.
+Validates and prepares phenome-wide association study data for plotting.
+"""
+import pandas as pd
+from .utils import ValidationError
+def validate_phewas_df(
+    df: pd.DataFrame,
+    phenotype_col: str = "phenotype",
+    p_col: str = "p_value",
+    category_col: str = "category",
+) -> None:
+    """Validate PheWAS DataFrame has required columns.
+    Args:
+        df: PheWAS results DataFrame.
+        phenotype_col: Column name for phenotype names.
+        p_col: Column name for p-values.
+        category_col: Column name for phenotype categories (optional).
+    Raises:
+        ValidationError: If required columns are missing.
+    """
+    required = [phenotype_col, p_col]
+    missing = [col for col in required if col not in df.columns]
+    if missing:
+        raise ValidationError(
+            f"PheWAS DataFrame missing required columns: {missing}. "
+            f"Required: {required}. Found: {list(df.columns)}"
+        )

pylocuszoom/plotter.py CHANGED Viewed

@@ -31,12 +31,14 @@ from .colors import (
     get_eqtl_color,
     get_ld_bin,
     get_ld_color_palette,
+    get_phewas_category_palette,
 )
 from .eqtl import validate_eqtl_df
 from .finemapping import (
     get_credible_sets,
     prepare_finemapping_for_plotting,
 )
+from .forest import validate_forest_df
 from .gene_track import (
     assign_gene_positions,
     plot_gene_track,
@@ -45,6 +47,7 @@ from .gene_track import (
 from .labels import add_snp_labels
 from .ld import calculate_ld, find_plink
 from .logging import enable_logging, logger
+from .phewas import validate_phewas_df
 from .recombination import (
     RECOMB_COLOR,
     add_recombination_overlay,
@@ -139,11 +142,8 @@ class LocusZoomPlotter:
     @staticmethod
     def _default_build(species: str) -> Optional[str]:
         """Get default genome build for species."""
-        if species == "canine":
-            return "canfam3.1"
-        if species == "feline":
-            return "felCat9"
-        return None
+        builds = {"canine": "canfam3.1", "feline": "felCat9"}
+        return builds.get(species)
     def _ensure_recomb_maps(self) -> Optional[Path]:
         """Ensure recombination maps are downloaded.
@@ -157,8 +157,8 @@ class LocusZoomPlotter:
             default_dir = get_default_data_dir()
             if (
                 default_dir.exists()
-                and len(list(default_dir.glob("chr*_recomb.tsv"))) >= 38
-            ):
+                and len(list(default_dir.glob("chr*_recomb.tsv"))) >= 39
+            ):  # 38 autosomes + X
                 return default_dir
             # Download
             try:
@@ -215,7 +215,7 @@ class LocusZoomPlotter:
         p_col: str = "p_wald",
         rs_col: str = "rs",
         figsize: Tuple[int, int] = (12, 8),
-    ) -> Figure:
+    ) -> Any:
         """Create a regional association plot.
         Args:
@@ -589,53 +589,6 @@ class LocusZoomPlotter:
             yaxis_name=secondary_y,
         )
-    def _add_eqtl_legend(self, ax: Axes) -> None:
-        """Add eQTL effect size legend to plot."""
-        legend_elements = []
-        # Positive effects (upward triangles)
-        for _, _, label, color in EQTL_POSITIVE_BINS:
-            legend_elements.append(
-                Line2D(
-                    [0],
-                    [0],
-                    marker="^",
-                    color="w",
-                    markerfacecolor=color,
-                    markeredgecolor="black",
-                    markersize=7,
-                    label=label,
-                )
-            )
-        # Negative effects (downward triangles)
-        for _, _, label, color in EQTL_NEGATIVE_BINS:
-            legend_elements.append(
-                Line2D(
-                    [0],
-                    [0],
-                    marker="v",
-                    color="w",
-                    markerfacecolor=color,
-                    markeredgecolor="black",
-                    markersize=7,
-                    label=label,
-                )
-            )
-        ax.legend(
-            handles=legend_elements,
-            loc="upper right",
-            fontsize=8,
-            frameon=True,
-            framealpha=0.9,
-            title="eQTL effect",
-            title_fontsize=9,
-            handlelength=1.2,
-            handleheight=1.0,
-            labelspacing=0.3,
-        )
     def _plot_finemapping(
         self,
         ax: Axes,
@@ -657,6 +610,23 @@ class LocusZoomPlotter:
             show_credible_sets: Whether to color points by credible set.
             pip_threshold: Minimum PIP to display as scatter point.
         """
+        def _build_finemapping_hover_data(
+            subset_df: pd.DataFrame,
+        ) -> Optional[pd.DataFrame]:
+            """Build hover data for interactive backends."""
+            hover_cols = {}
+            # Position
+            if pos_col in subset_df.columns:
+                hover_cols["Position"] = subset_df[pos_col].values
+            # PIP
+            if pip_col in subset_df.columns:
+                hover_cols["PIP"] = subset_df[pip_col].values
+            # Credible set
+            if cs_col and cs_col in subset_df.columns:
+                hover_cols["Credible Set"] = subset_df[cs_col].values
+            return pd.DataFrame(hover_cols) if hover_cols else None
         # Sort by position for line plotting
         df = df.sort_values(pos_col)
@@ -690,7 +660,7 @@ class LocusZoomPlotter:
                     edgecolor="black",
                     linewidth=0.5,
                     zorder=3,
-                    label=f"CS{cs_id}",
+                    hover_data=_build_finemapping_hover_data(cs_data),
                 )
             # Plot variants not in any credible set
             non_cs_data = df[(df[cs_col].isna()) | (df[cs_col] == 0)]
@@ -707,6 +677,7 @@ class LocusZoomPlotter:
                         edgecolor="black",
                         linewidth=0.3,
                         zorder=2,
+                        hover_data=_build_finemapping_hover_data(non_cs_data),
                     )
         else:
             # No credible sets - show all points above threshold
@@ -723,51 +694,9 @@ class LocusZoomPlotter:
                         edgecolor="black",
                         linewidth=0.5,
                         zorder=3,
+                        hover_data=_build_finemapping_hover_data(high_pip),
                     )
-    def _add_finemapping_legend(
-        self,
-        ax: Axes,
-        credible_sets: List[int],
-    ) -> None:
-        """Add fine-mapping legend showing credible sets.
-        Args:
-            ax: Matplotlib axes object.
-            credible_sets: List of credible set IDs to include.
-        """
-        if not credible_sets:
-            return
-        legend_elements = []
-        for cs_id in credible_sets:
-            color = get_credible_set_color(cs_id)
-            legend_elements.append(
-                Line2D(
-                    [0],
-                    [0],
-                    marker="o",
-                    color="w",
-                    markerfacecolor=color,
-                    markeredgecolor="black",
-                    markersize=7,
-                    label=f"CS{cs_id}",
-                )
-            )
-        ax.legend(
-            handles=legend_elements,
-            loc="upper right",
-            fontsize=8,
-            frameon=True,
-            framealpha=0.9,
-            title="Credible sets",
-            title_fontsize=9,
-            handlelength=1.2,
-            handleheight=1.0,
-            labelspacing=0.3,
-        )
     def plot_stacked(
         self,
         gwas_dfs: List[pd.DataFrame],
@@ -968,7 +897,9 @@ class LocusZoomPlotter:
                         panel_ld_col = "R2"
             # Plot association
-            self._plot_association(ax, df, pos_col, panel_ld_col, lead_pos, rs_col, p_col)
+            self._plot_association(
+                ax, df, pos_col, panel_ld_col, lead_pos, rs_col, p_col
+            )
             # Add significance line
             self._backend.axhline(
@@ -1080,10 +1011,12 @@ class LocusZoomPlotter:
                     pip_threshold=0.01,
                 )
-                # Add legend for credible sets
+                # Add legend for credible sets (all backends)
                 credible_sets = get_credible_sets(fm_data, finemapping_cs_col)
                 if credible_sets:
-                    self._add_finemapping_legend(ax, credible_sets)
+                    self._backend.add_finemapping_legend(
+                        ax, credible_sets, get_credible_set_color
+                    )
             self._backend.set_ylabel(ax, "PIP")
             self._backend.set_ylim(ax, -0.05, 1.05)
@@ -1100,41 +1033,87 @@ class LocusZoomPlotter:
             if eqtl_gene and "gene" in eqtl_data.columns:
                 eqtl_data = eqtl_data[eqtl_data["gene"] == eqtl_gene]
-            # Filter by region
+            # Filter by region (position and chromosome)
             if "pos" in eqtl_data.columns:
-                eqtl_data = eqtl_data[
-                    (eqtl_data["pos"] >= start) & (eqtl_data["pos"] <= end)
-                ]
+                mask = (eqtl_data["pos"] >= start) & (eqtl_data["pos"] <= end)
+                # Also filter by chromosome if column exists
+                if "chr" in eqtl_data.columns:
+                    chrom_str = str(chrom).replace("chr", "")
+                    eqtl_chrom = (
+                        eqtl_data["chr"].astype(str).str.replace("chr", "", regex=False)
+                    )
+                    mask = mask & (eqtl_chrom == chrom_str)
+                eqtl_data = eqtl_data[mask]
             if not eqtl_data.empty:
                 eqtl_data["neglog10p"] = -np.log10(
                     eqtl_data["p_value"].clip(lower=1e-300)
                 )
+                def _build_eqtl_hover_data(
+                    subset_df: pd.DataFrame,
+                ) -> Optional[pd.DataFrame]:
+                    """Build hover data for eQTL interactive backends."""
+                    hover_cols = {}
+                    # Position
+                    if "pos" in subset_df.columns:
+                        hover_cols["Position"] = subset_df["pos"].values
+                    # P-value
+                    if "p_value" in subset_df.columns:
+                        hover_cols["P-value"] = subset_df["p_value"].values
+                    # Effect size
+                    if "effect_size" in subset_df.columns:
+                        hover_cols["Effect"] = subset_df["effect_size"].values
+                    # Gene
+                    if "gene" in subset_df.columns:
+                        hover_cols["Gene"] = subset_df["gene"].values
+                    return pd.DataFrame(hover_cols) if hover_cols else None
                 # Check if effect_size column exists for directional coloring
                 has_effect = "effect_size" in eqtl_data.columns
                 if has_effect:
-                    # Plot triangles by effect direction with color by magnitude
-                    for _, row in eqtl_data.iterrows():
-                        effect = row["effect_size"]
-                        color = get_eqtl_color(effect)
-                        marker = "^" if effect >= 0 else "v"
+                    # Plot triangles by effect direction (batch by sign for efficiency)
+                    pos_effects = eqtl_data[eqtl_data["effect_size"] >= 0]
+                    neg_effects = eqtl_data[eqtl_data["effect_size"] < 0]
+                    # Plot positive effects (up triangles)
+                    for _, row in pos_effects.iterrows():
+                        row_df = pd.DataFrame([row])
                         self._backend.scatter(
                             ax,
                             pd.Series([row["pos"]]),
                             pd.Series([row["neglog10p"]]),
-                            colors=color,
+                            colors=get_eqtl_color(row["effect_size"]),
                             sizes=50,
-                            marker=marker,
+                            marker="^",
                             edgecolor="black",
                             linewidth=0.5,
                             zorder=2,
+                            hover_data=_build_eqtl_hover_data(row_df),
                         )
-                    # Add eQTL effect legend
-                    self._add_eqtl_legend(ax)
+                    # Plot negative effects (down triangles)
+                    for _, row in neg_effects.iterrows():
+                        row_df = pd.DataFrame([row])
+                        self._backend.scatter(
+                            ax,
+                            pd.Series([row["pos"]]),
+                            pd.Series([row["neglog10p"]]),
+                            colors=get_eqtl_color(row["effect_size"]),
+                            sizes=50,
+                            marker="v",
+                            edgecolor="black",
+                            linewidth=0.5,
+                            zorder=2,
+                            hover_data=_build_eqtl_hover_data(row_df),
+                        )
+                    # Add eQTL effect legend (all backends)
+                    self._backend.add_eqtl_legend(
+                        ax, EQTL_POSITIVE_BINS, EQTL_NEGATIVE_BINS
+                    )
                 else:
                     # No effect sizes - plot as diamonds
+                    label = f"eQTL ({eqtl_gene})" if eqtl_gene else "eQTL"
                     self._backend.scatter(
                         ax,
                         eqtl_data["pos"],
@@ -1145,9 +1124,10 @@ class LocusZoomPlotter:
                         edgecolor="black",
                         linewidth=0.5,
                         zorder=2,
-                        label=f"eQTL ({eqtl_gene})" if eqtl_gene else "eQTL",
+                        label=label,
+                        hover_data=_build_eqtl_hover_data(eqtl_data),
                     )
-                    ax.legend(loc="upper right", fontsize=9)
+                    self._backend.add_simple_legend(ax, label, loc="upper right")
             self._backend.set_ylabel(ax, r"$-\log_{10}$ P (eQTL)")
             self._backend.axhline(
@@ -1184,3 +1164,248 @@ class LocusZoomPlotter:
         self._backend.finalize_layout(fig, hspace=0.1)
         return fig
+    def plot_phewas(
+        self,
+        phewas_df: pd.DataFrame,
+        variant_id: str,
+        phenotype_col: str = "phenotype",
+        p_col: str = "p_value",
+        category_col: str = "category",
+        effect_col: Optional[str] = None,
+        significance_threshold: float = 5e-8,
+        figsize: Tuple[float, float] = (10, 8),
+    ) -> Any:
+        """Create a PheWAS (Phenome-Wide Association Study) plot.
+        Shows associations of a single variant across multiple phenotypes,
+        with phenotypes grouped by category and colored accordingly.
+        Args:
+            phewas_df: DataFrame with phenotype associations.
+            variant_id: Variant identifier (e.g., "rs12345") for plot title.
+            phenotype_col: Column name for phenotype names.
+            p_col: Column name for p-values.
+            category_col: Column name for phenotype categories.
+            effect_col: Optional column name for effect direction (beta/OR).
+            significance_threshold: P-value threshold for significance line.
+            figsize: Figure size as (width, height).
+        Returns:
+            Figure object (type depends on backend).
+        Example:
+            >>> fig = plotter.plot_phewas(
+            ...     phewas_df,
+            ...     variant_id="rs12345",
+            ...     category_col="category",
+            ... )
+        """
+        validate_phewas_df(phewas_df, phenotype_col, p_col, category_col)
+        df = phewas_df.copy()
+        df["neglog10p"] = -np.log10(df[p_col].clip(lower=1e-300))
+        # Sort by category then by p-value for consistent ordering
+        if category_col in df.columns:
+            df = df.sort_values([category_col, p_col])
+            categories = df[category_col].unique().tolist()
+            palette = get_phewas_category_palette(categories)
+        else:
+            df = df.sort_values(p_col)
+            categories = []
+            palette = {}
+        # Create figure
+        fig, axes = self._backend.create_figure(
+            n_panels=1,
+            height_ratios=[1.0],
+            figsize=figsize,
+        )
+        ax = axes[0]
+        # Assign y-positions (one per phenotype)
+        df["y_pos"] = range(len(df))
+        # Plot points by category
+        if categories:
+            for cat in categories:
+                cat_data = df[df[category_col] == cat]
+                # Use upward triangles for positive effects, circles otherwise
+                if effect_col and effect_col in cat_data.columns:
+                    for _, row in cat_data.iterrows():
+                        marker = "^" if row[effect_col] >= 0 else "v"
+                        self._backend.scatter(
+                            ax,
+                            pd.Series([row["neglog10p"]]),
+                            pd.Series([row["y_pos"]]),
+                            colors=palette[cat],
+                            sizes=60,
+                            marker=marker,
+                            edgecolor="black",
+                            linewidth=0.5,
+                            zorder=2,
+                        )
+                else:
+                    self._backend.scatter(
+                        ax,
+                        cat_data["neglog10p"],
+                        cat_data["y_pos"],
+                        colors=palette[cat],
+                        sizes=60,
+                        marker="o",
+                        edgecolor="black",
+                        linewidth=0.5,
+                        zorder=2,
+                    )
+        else:
+            self._backend.scatter(
+                ax,
+                df["neglog10p"],
+                df["y_pos"],
+                colors="#4169E1",
+                sizes=60,
+                edgecolor="black",
+                linewidth=0.5,
+                zorder=2,
+            )
+        # Add significance threshold line
+        sig_line = -np.log10(significance_threshold)
+        self._backend.axvline(
+            ax, x=sig_line, color="red", linestyle="--", linewidth=1, alpha=0.7
+        )
+        # Set axis labels and limits
+        self._backend.set_xlabel(ax, r"$-\log_{10}$ P")
+        self._backend.set_ylabel(ax, "Phenotype")
+        self._backend.set_ylim(ax, -0.5, len(df) - 0.5)
+        # Set y-tick labels to phenotype names (matplotlib only)
+        if self.backend_name == "matplotlib":
+            ax.set_yticks(df["y_pos"])
+            ax.set_yticklabels(df[phenotype_col], fontsize=8)
+        self._backend.set_title(ax, f"PheWAS: {variant_id}")
+        self._backend.hide_spines(ax, ["top", "right"])
+        self._backend.finalize_layout(fig)
+        return fig
+    def plot_forest(
+        self,
+        forest_df: pd.DataFrame,
+        variant_id: str,
+        study_col: str = "study",
+        effect_col: str = "effect",
+        ci_lower_col: str = "ci_lower",
+        ci_upper_col: str = "ci_upper",
+        weight_col: Optional[str] = None,
+        null_value: float = 0.0,
+        effect_label: str = "Effect Size",
+        figsize: Tuple[float, float] = (8, 6),
+    ) -> Any:
+        """Create a forest plot showing effect sizes with confidence intervals.
+        Args:
+            forest_df: DataFrame with effect sizes and confidence intervals.
+            variant_id: Variant identifier for plot title.
+            study_col: Column name for study/phenotype names.
+            effect_col: Column name for effect sizes.
+            ci_lower_col: Column name for lower confidence interval.
+            ci_upper_col: Column name for upper confidence interval.
+            weight_col: Optional column for study weights (affects marker size).
+            null_value: Reference value for null effect (0 for beta, 1 for OR).
+            effect_label: X-axis label.
+            figsize: Figure size as (width, height).
+        Returns:
+            Figure object (type depends on backend).
+        Example:
+            >>> fig = plotter.plot_forest(
+            ...     forest_df,
+            ...     variant_id="rs12345",
+            ...     effect_label="Odds Ratio",
+            ...     null_value=1.0,
+            ... )
+        """
+        validate_forest_df(forest_df, study_col, effect_col, ci_lower_col, ci_upper_col)
+        df = forest_df.copy()
+        # Create figure
+        fig, axes = self._backend.create_figure(
+            n_panels=1,
+            height_ratios=[1.0],
+            figsize=figsize,
+        )
+        ax = axes[0]
+        # Assign y-positions (reverse so first study is at top)
+        df["y_pos"] = range(len(df) - 1, -1, -1)
+        # Calculate marker sizes from weights
+        if weight_col and weight_col in df.columns:
+            # Scale weights to marker sizes (min 40, max 200)
+            weights = df[weight_col]
+            min_size, max_size = 40, 200
+            weight_range = weights.max() - weights.min()
+            if weight_range > 0:
+                sizes = min_size + (weights - weights.min()) / weight_range * (
+                    max_size - min_size
+                )
+            else:
+                sizes = (min_size + max_size) / 2
+        else:
+            sizes = 80
+        # Calculate error bar extents
+        xerr_lower = df[effect_col] - df[ci_lower_col]
+        xerr_upper = df[ci_upper_col] - df[effect_col]
+        # Plot error bars (confidence intervals)
+        self._backend.errorbar_h(
+            ax,
+            x=df[effect_col],
+            y=df["y_pos"],
+            xerr_lower=xerr_lower,
+            xerr_upper=xerr_upper,
+            color="black",
+            linewidth=1.5,
+            capsize=3,
+            zorder=2,
+        )
+        # Plot effect size markers
+        self._backend.scatter(
+            ax,
+            df[effect_col],
+            df["y_pos"],
+            colors="#4169E1",
+            sizes=sizes,
+            marker="s",  # square markers typical for forest plots
+            edgecolor="black",
+            linewidth=0.5,
+            zorder=3,
+        )
+        # Add null effect line
+        self._backend.axvline(
+            ax, x=null_value, color="grey", linestyle="--", linewidth=1, alpha=0.7
+        )
+        # Set axis labels and limits
+        self._backend.set_xlabel(ax, effect_label)
+        self._backend.set_ylim(ax, -0.5, len(df) - 0.5)
+        # Set y-tick labels to study names (matplotlib only)
+        if self.backend_name == "matplotlib":
+            ax.set_yticks(df["y_pos"])
+            ax.set_yticklabels(df[study_col], fontsize=10)
+        self._backend.set_title(ax, f"Forest Plot: {variant_id}")
+        self._backend.hide_spines(ax, ["top", "right"])
+        self._backend.finalize_layout(fig)
+        return fig

pylocuszoom/py.typed ADDED Viewed

File without changes

pylocuszoom 0.3.0__py3-none-any.whl → 0.6.0__py3-none-any.whl

pylocuszoom 0.3.0py3-none-any.whl → 0.6.0py3-none-any.whl