PyPI - pylocuszoom - Versions diffs - 0.6.0__py3-none-any.whl → 0.8.0__py3-none-any.whl - Mend

pylocuszoom 0.6.0py3-none-any.whl → 0.8.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

pylocuszoom/__init__.py +15 -0
pylocuszoom/backends/__init__.py +116 -17
pylocuszoom/backends/base.py +363 -60
pylocuszoom/backends/bokeh_backend.py +77 -15
pylocuszoom/backends/hover.py +198 -0
pylocuszoom/backends/matplotlib_backend.py +263 -3
pylocuszoom/backends/plotly_backend.py +73 -16
pylocuszoom/ensembl.py +476 -0
pylocuszoom/eqtl.py +15 -19
pylocuszoom/finemapping.py +17 -26
pylocuszoom/forest.py +9 -11
pylocuszoom/gene_track.py +161 -135
pylocuszoom/loaders.py +3 -1
pylocuszoom/phewas.py +10 -11
pylocuszoom/plotter.py +120 -194
pylocuszoom/recombination.py +19 -3
pylocuszoom/utils.py +52 -0
pylocuszoom/validation.py +172 -0
{pylocuszoom-0.6.0.dist-info → pylocuszoom-0.8.0.dist-info}/METADATA +46 -25
pylocuszoom-0.8.0.dist-info/RECORD +29 -0
pylocuszoom-0.6.0.dist-info/RECORD +0 -26
{pylocuszoom-0.6.0.dist-info → pylocuszoom-0.8.0.dist-info}/WHEEL +0 -0
{pylocuszoom-0.6.0.dist-info → pylocuszoom-0.8.0.dist-info}/licenses/LICENSE.md +0 -0

pylocuszoom/plotter.py CHANGED Viewed

@@ -15,12 +15,9 @@ from typing import Any, List, Optional, Tuple
 import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
-from matplotlib.axes import Axes
-from matplotlib.figure import Figure
-from matplotlib.lines import Line2D
-from matplotlib.patches import Patch
 from .backends import BackendType, get_backend
+from .backends.hover import HoverConfig, HoverDataBuilder
 from .colors import (
     EQTL_NEGATIVE_BINS,
     EQTL_POSITIVE_BINS,
@@ -33,6 +30,7 @@ from .colors import (
     get_ld_color_palette,
     get_phewas_category_palette,
 )
+from .ensembl import get_genes_for_region
 from .eqtl import validate_eqtl_df
 from .finemapping import (
     get_credible_sets,
@@ -41,16 +39,13 @@ from .finemapping import (
 from .forest import validate_forest_df
 from .gene_track import (
     assign_gene_positions,
-    plot_gene_track,
     plot_gene_track_generic,
 )
-from .labels import add_snp_labels
 from .ld import calculate_ld, find_plink
 from .logging import enable_logging, logger
 from .phewas import validate_phewas_df
 from .recombination import (
     RECOMB_COLOR,
-    add_recombination_overlay,
     download_canine_recombination_maps,
     get_default_data_dir,
     get_recombination_rate_for_region,
@@ -119,8 +114,21 @@ class LocusZoomPlotter:
         recomb_data_dir: Optional[str] = None,
         genomewide_threshold: float = DEFAULT_GENOMEWIDE_THRESHOLD,
         log_level: Optional[str] = "INFO",
+        auto_genes: bool = False,
     ):
-        """Initialize the plotter."""
+        """Initialize the plotter.
+        Args:
+            species: Species name ('canine', 'feline', or None for custom).
+            genome_build: Genome build for coordinate system.
+            backend: Plotting backend ('matplotlib', 'plotly', or 'bokeh').
+            plink_path: Path to PLINK executable for LD calculation.
+            recomb_data_dir: Directory containing recombination maps.
+            genomewide_threshold: P-value threshold for significance line.
+            log_level: Logging level.
+            auto_genes: If True, automatically fetch genes from Ensembl when
+                genes_df is not provided. Default False for backward compatibility.
+        """
         # Configure logging
         if log_level is not None:
             enable_logging(log_level)
@@ -129,12 +137,12 @@ class LocusZoomPlotter:
         self.genome_build = (
             genome_build if genome_build else self._default_build(species)
         )
-        self.backend_name = backend
         self._backend = get_backend(backend)
         self.plink_path = plink_path or find_plink()
         self.recomb_data_dir = recomb_data_dir
         self.genomewide_threshold = genomewide_threshold
         self._genomewide_line = -np.log10(genomewide_threshold)
+        self._auto_genes = auto_genes
         # Cache for loaded data
         self._recomb_cache = {}
@@ -248,6 +256,22 @@ class LocusZoomPlotter:
         """
         # Validate inputs
         validate_gwas_df(gwas_df, pos_col=pos_col, p_col=p_col)
+        # Auto-fetch genes if enabled and not provided
+        if genes_df is None and self._auto_genes:
+            logger.debug(
+                f"auto_genes enabled, fetching genes for chr{chrom}:{start}-{end}"
+            )
+            genes_df = get_genes_for_region(
+                species=self.species,
+                chrom=chrom,
+                start=start,
+                end=end,
+            )
+            if genes_df.empty:
+                logger.debug("No genes found in region from Ensembl")
+                genes_df = None
         if genes_df is not None:
             validate_genes_df(genes_df)
@@ -305,10 +329,10 @@ class LocusZoomPlotter:
             zorder=1,
         )
-        # Add SNP labels (matplotlib only - interactive backends use hover tooltips)
+        # Add SNP labels (capability check - interactive backends use hover tooltips)
         if snp_labels and rs_col in df.columns and label_top_n > 0 and not df.empty:
-            if self.backend_name == "matplotlib":
-                add_snp_labels(
+            if self._backend.supports_snp_labels:
+                self._backend.add_snp_labels(
                     ax,
                     df,
                     pos_col=pos_col,
@@ -319,12 +343,10 @@ class LocusZoomPlotter:
                     chrom=chrom,
                 )
-        # Add recombination overlay (all backends)
+        # Add recombination overlay (all backends with secondary axis support)
         if recomb_df is not None and not recomb_df.empty:
-            if self.backend_name == "matplotlib":
-                add_recombination_overlay(ax, recomb_df, start, end)
-            else:
-                self._add_recombination_overlay_generic(ax, recomb_df, start, end)
+            if self._backend.supports_secondary_axis:
+                self._add_recombination_overlay(ax, recomb_df, start, end)
         # Format axes
         self._backend.set_ylabel(ax, r"$-\log_{10}$ P")
@@ -333,19 +355,13 @@ class LocusZoomPlotter:
         # Add LD legend (all backends)
         if ld_col is not None and ld_col in df.columns:
-            if self.backend_name == "matplotlib":
-                self._add_ld_legend(ax)
-            else:
-                self._backend.add_ld_legend(ax, LD_BINS, LEAD_SNP_COLOR)
+            self._backend.add_ld_legend(ax, LD_BINS, LEAD_SNP_COLOR)
-        # Plot gene track (all backends)
+        # Plot gene track (all backends use generic function)
         if genes_df is not None and gene_ax is not None:
-            if self.backend_name == "matplotlib":
-                plot_gene_track(gene_ax, genes_df, chrom, start, end, exons_df)
-            else:
-                plot_gene_track_generic(
-                    gene_ax, self._backend, genes_df, chrom, start, end, exons_df
-                )
+            plot_gene_track_generic(
+                gene_ax, self._backend, genes_df, chrom, start, end, exons_df
+            )
             self._backend.set_xlabel(gene_ax, f"Chromosome {chrom} (Mb)")
             self._backend.hide_spines(gene_ax, ["top", "right", "left"])
         else:
@@ -366,7 +382,7 @@ class LocusZoomPlotter:
         start: int,
         end: int,
         figsize: Tuple[int, int],
-    ) -> Tuple[Figure, Axes, Optional[Axes]]:
+    ) -> Tuple[Any, Any, Optional[Any]]:
         """Create figure with optional gene track."""
         if genes_df is not None:
             # Calculate dynamic height based on gene rows
@@ -410,7 +426,7 @@ class LocusZoomPlotter:
     def _plot_association(
         self,
-        ax: Axes,
+        ax: Any,
         df: pd.DataFrame,
         pos_col: str,
         ld_col: Optional[str],
@@ -419,23 +435,14 @@ class LocusZoomPlotter:
         p_col: Optional[str] = None,
     ) -> None:
         """Plot association scatter with LD coloring."""
-        def _build_hover_data(subset_df: pd.DataFrame) -> Optional[pd.DataFrame]:
-            """Build hover data for interactive backends."""
-            hover_cols = {}
-            # RS ID first (will be bold in hover)
-            if rs_col and rs_col in subset_df.columns:
-                hover_cols["SNP"] = subset_df[rs_col].values
-            # Position
-            if pos_col in subset_df.columns:
-                hover_cols["Position"] = subset_df[pos_col].values
-            # P-value
-            if p_col and p_col in subset_df.columns:
-                hover_cols["P-value"] = subset_df[p_col].values
-            # LD
-            if ld_col and ld_col in subset_df.columns:
-                hover_cols["R²"] = subset_df[ld_col].values
-            return pd.DataFrame(hover_cols) if hover_cols else None
+        # Build hover data using HoverDataBuilder
+        hover_config = HoverConfig(
+            snp_col=rs_col if rs_col and rs_col in df.columns else None,
+            pos_col=pos_col if pos_col in df.columns else None,
+            p_col=p_col if p_col and p_col in df.columns else None,
+            ld_col=ld_col if ld_col and ld_col in df.columns else None,
+        )
+        hover_builder = HoverDataBuilder(hover_config)
         # LD-based coloring
         if ld_col is not None and ld_col in df.columns:
@@ -454,7 +461,7 @@ class LocusZoomPlotter:
                     edgecolor="black",
                     linewidth=0.5,
                     zorder=2,
-                    hover_data=_build_hover_data(bin_data),
+                    hover_data=hover_builder.build_dataframe(bin_data),
                 )
         else:
             # Default: grey points
@@ -467,7 +474,7 @@ class LocusZoomPlotter:
                 edgecolor="black",
                 linewidth=0.5,
                 zorder=2,
-                hover_data=_build_hover_data(df),
+                hover_data=hover_builder.build_dataframe(df),
             )
         # Highlight lead SNP with larger, more prominent marker
@@ -484,57 +491,21 @@ class LocusZoomPlotter:
                     edgecolor="black",
                     linewidth=1.5,
                     zorder=10,
-                    hover_data=_build_hover_data(lead_snp),
+                    hover_data=hover_builder.build_dataframe(lead_snp),
                 )
-    def _add_ld_legend(self, ax: Axes) -> None:
-        """Add LD color legend to plot."""
-        palette = get_ld_color_palette()
-        legend_elements = [
-            Line2D(
-                [0],
-                [0],
-                marker="D",
-                color="w",
-                markerfacecolor=LEAD_SNP_COLOR,
-                markeredgecolor="black",
-                markersize=6,
-                label="Lead SNP",
-            ),
-        ]
-        for threshold, label, _ in LD_BINS:
-            legend_elements.append(
-                Patch(
-                    facecolor=palette[label],
-                    edgecolor="black",
-                    label=label,
-                )
-            )
-        ax.legend(
-            handles=legend_elements,
-            loc="upper right",
-            fontsize=9,
-            frameon=True,
-            framealpha=0.9,
-            title=r"$r^2$",
-            title_fontsize=10,
-            handlelength=1.5,
-            handleheight=1.0,
-            labelspacing=0.4,
-        )
-    def _add_recombination_overlay_generic(
+    def _add_recombination_overlay(
         self,
         ax: Any,
         recomb_df: pd.DataFrame,
         start: int,
         end: int,
     ) -> None:
-        """Add recombination overlay for interactive backends (plotly/bokeh).
+        """Add recombination overlay for all backends.
         Creates a secondary y-axis with recombination rate line and fill.
+        Uses backend-agnostic secondary axis methods that work across
+        matplotlib, plotly, and bokeh.
         """
         # Filter to region
         region_recomb = recomb_df[
@@ -591,7 +562,7 @@ class LocusZoomPlotter:
     def _plot_finemapping(
         self,
-        ax: Axes,
+        ax: Any,
         df: pd.DataFrame,
         pos_col: str = "pos",
         pip_col: str = "pip",
@@ -610,22 +581,15 @@ class LocusZoomPlotter:
             show_credible_sets: Whether to color points by credible set.
             pip_threshold: Minimum PIP to display as scatter point.
         """
-        def _build_finemapping_hover_data(
-            subset_df: pd.DataFrame,
-        ) -> Optional[pd.DataFrame]:
-            """Build hover data for interactive backends."""
-            hover_cols = {}
-            # Position
-            if pos_col in subset_df.columns:
-                hover_cols["Position"] = subset_df[pos_col].values
-            # PIP
-            if pip_col in subset_df.columns:
-                hover_cols["PIP"] = subset_df[pip_col].values
-            # Credible set
-            if cs_col and cs_col in subset_df.columns:
-                hover_cols["Credible Set"] = subset_df[cs_col].values
-            return pd.DataFrame(hover_cols) if hover_cols else None
+        # Build hover data using HoverDataBuilder
+        extra_cols = {pip_col: "PIP"}
+        if cs_col and cs_col in df.columns:
+            extra_cols[cs_col] = "Credible Set"
+        hover_config = HoverConfig(
+            pos_col=pos_col if pos_col in df.columns else None,
+            extra_cols=extra_cols,
+        )
+        hover_builder = HoverDataBuilder(hover_config)
         # Sort by position for line plotting
         df = df.sort_values(pos_col)
@@ -660,7 +624,7 @@ class LocusZoomPlotter:
                     edgecolor="black",
                     linewidth=0.5,
                     zorder=3,
-                    hover_data=_build_finemapping_hover_data(cs_data),
+                    hover_data=hover_builder.build_dataframe(cs_data),
                 )
             # Plot variants not in any credible set
             non_cs_data = df[(df[cs_col].isna()) | (df[cs_col] == 0)]
@@ -677,7 +641,7 @@ class LocusZoomPlotter:
                         edgecolor="black",
                         linewidth=0.3,
                         zorder=2,
-                        hover_data=_build_finemapping_hover_data(non_cs_data),
+                        hover_data=hover_builder.build_dataframe(non_cs_data),
                     )
         else:
             # No credible sets - show all points above threshold
@@ -694,7 +658,7 @@ class LocusZoomPlotter:
                         edgecolor="black",
                         linewidth=0.5,
                         zorder=3,
-                        hover_data=_build_finemapping_hover_data(high_pip),
+                        hover_data=hover_builder.build_dataframe(high_pip),
                     )
     def plot_stacked(
@@ -912,10 +876,10 @@ class LocusZoomPlotter:
                 zorder=1,
             )
-            # Add SNP labels (matplotlib only - interactive backends use hover tooltips)
+            # Add SNP labels (capability check - interactive backends use hover tooltips)
             if snp_labels and rs_col in df.columns and label_top_n > 0 and not df.empty:
-                if self.backend_name == "matplotlib":
-                    add_snp_labels(
+                if self._backend.supports_snp_labels:
+                    self._backend.add_snp_labels(
                         ax,
                         df,
                         pos_col=pos_col,
@@ -928,10 +892,8 @@ class LocusZoomPlotter:
             # Add recombination overlay (only on first panel, all backends)
             if i == 0 and recomb_df is not None and not recomb_df.empty:
-                if self.backend_name == "matplotlib":
-                    add_recombination_overlay(ax, recomb_df, start, end)
-                else:
-                    self._add_recombination_overlay_generic(ax, recomb_df, start, end)
+                if self._backend.supports_secondary_axis:
+                    self._add_recombination_overlay(ax, recomb_df, start, end)
             # Format axes
             self._backend.set_ylabel(ax, r"$-\log_{10}$ P")
@@ -940,50 +902,11 @@ class LocusZoomPlotter:
             # Add panel label
             if panel_labels and i < len(panel_labels):
-                if self.backend_name == "matplotlib":
-                    ax.annotate(
-                        panel_labels[i],
-                        xy=(0.02, 0.95),
-                        xycoords="axes fraction",
-                        fontsize=11,
-                        fontweight="bold",
-                        va="top",
-                        ha="left",
-                    )
-                elif self.backend_name == "plotly":
-                    fig, row = ax
-                    fig.add_annotation(
-                        text=f"<b>{panel_labels[i]}</b>",
-                        xref=f"x{row} domain" if row > 1 else "x domain",
-                        yref=f"y{row} domain" if row > 1 else "y domain",
-                        x=0.02,
-                        y=0.95,
-                        showarrow=False,
-                        font=dict(size=11),
-                        xanchor="left",
-                        yanchor="top",
-                    )
-                elif self.backend_name == "bokeh":
-                    from bokeh.models import Label
-                    # Get y-axis range for positioning
-                    y_max = ax.y_range.end if ax.y_range.end else 10
-                    x_min = ax.x_range.start if ax.x_range.start else start
-                    label = Label(
-                        x=x_min + (end - start) * 0.02,
-                        y=y_max * 0.95,
-                        text=panel_labels[i],
-                        text_font_size="11pt",
-                        text_font_style="bold",
-                    )
-                    ax.add_layout(label)
+                self._backend.add_panel_label(ax, panel_labels[i])
             # Add LD legend (only on first panel, all backends)
             if i == 0 and panel_ld_col is not None and panel_ld_col in df.columns:
-                if self.backend_name == "matplotlib":
-                    self._add_ld_legend(ax)
-                else:
-                    self._backend.add_ld_legend(ax, LD_BINS, LEAD_SNP_COLOR)
+                self._backend.add_ld_legend(ax, LD_BINS, LEAD_SNP_COLOR)
         # Track current panel index
         panel_idx = n_gwas
@@ -1050,24 +973,18 @@ class LocusZoomPlotter:
                     eqtl_data["p_value"].clip(lower=1e-300)
                 )
-                def _build_eqtl_hover_data(
-                    subset_df: pd.DataFrame,
-                ) -> Optional[pd.DataFrame]:
-                    """Build hover data for eQTL interactive backends."""
-                    hover_cols = {}
-                    # Position
-                    if "pos" in subset_df.columns:
-                        hover_cols["Position"] = subset_df["pos"].values
-                    # P-value
-                    if "p_value" in subset_df.columns:
-                        hover_cols["P-value"] = subset_df["p_value"].values
-                    # Effect size
-                    if "effect_size" in subset_df.columns:
-                        hover_cols["Effect"] = subset_df["effect_size"].values
-                    # Gene
-                    if "gene" in subset_df.columns:
-                        hover_cols["Gene"] = subset_df["gene"].values
-                    return pd.DataFrame(hover_cols) if hover_cols else None
+                # Build hover data using HoverDataBuilder
+                eqtl_extra_cols = {}
+                if "effect_size" in eqtl_data.columns:
+                    eqtl_extra_cols["effect_size"] = "Effect"
+                if "gene" in eqtl_data.columns:
+                    eqtl_extra_cols["gene"] = "Gene"
+                eqtl_hover_config = HoverConfig(
+                    pos_col="pos" if "pos" in eqtl_data.columns else None,
+                    p_col="p_value" if "p_value" in eqtl_data.columns else None,
+                    extra_cols=eqtl_extra_cols,
+                )
+                eqtl_hover_builder = HoverDataBuilder(eqtl_hover_config)
                 # Check if effect_size column exists for directional coloring
                 has_effect = "effect_size" in eqtl_data.columns
@@ -1090,7 +1007,7 @@ class LocusZoomPlotter:
                             edgecolor="black",
                             linewidth=0.5,
                             zorder=2,
-                            hover_data=_build_eqtl_hover_data(row_df),
+                            hover_data=eqtl_hover_builder.build_dataframe(row_df),
                         )
                     # Plot negative effects (down triangles)
                     for _, row in neg_effects.iterrows():
@@ -1105,7 +1022,7 @@ class LocusZoomPlotter:
                             edgecolor="black",
                             linewidth=0.5,
                             zorder=2,
-                            hover_data=_build_eqtl_hover_data(row_df),
+                            hover_data=eqtl_hover_builder.build_dataframe(row_df),
                         )
                     # Add eQTL effect legend (all backends)
                     self._backend.add_eqtl_legend(
@@ -1125,7 +1042,7 @@ class LocusZoomPlotter:
                         linewidth=0.5,
                         zorder=2,
                         label=label,
-                        hover_data=_build_eqtl_hover_data(eqtl_data),
+                        hover_data=eqtl_hover_builder.build_dataframe(eqtl_data),
                     )
                     self._backend.add_simple_legend(ax, label, loc="upper right")
@@ -1141,15 +1058,12 @@ class LocusZoomPlotter:
             self._backend.hide_spines(ax, ["top", "right"])
             panel_idx += 1
-        # Plot gene track (all backends)
+        # Plot gene track (all backends use generic function)
         if genes_df is not None:
             gene_ax = axes[panel_idx]
-            if self.backend_name == "matplotlib":
-                plot_gene_track(gene_ax, genes_df, chrom, start, end, exons_df)
-            else:
-                plot_gene_track_generic(
-                    gene_ax, self._backend, genes_df, chrom, start, end, exons_df
-                )
+            plot_gene_track_generic(
+                gene_ax, self._backend, genes_df, chrom, start, end, exons_df
+            )
             self._backend.set_xlabel(gene_ax, f"Chromosome {chrom} (Mb)")
             self._backend.hide_spines(gene_ax, ["top", "right", "left"])
         else:
@@ -1281,10 +1195,13 @@ class LocusZoomPlotter:
         self._backend.set_ylabel(ax, "Phenotype")
         self._backend.set_ylim(ax, -0.5, len(df) - 0.5)
-        # Set y-tick labels to phenotype names (matplotlib only)
-        if self.backend_name == "matplotlib":
-            ax.set_yticks(df["y_pos"])
-            ax.set_yticklabels(df[phenotype_col], fontsize=8)
+        # Set y-tick labels to phenotype names
+        self._backend.set_yticks(
+            ax,
+            positions=df["y_pos"].tolist(),
+            labels=df[phenotype_col].tolist(),
+            fontsize=8,
+        )
         self._backend.set_title(ax, f"PheWAS: {variant_id}")
         self._backend.hide_spines(ax, ["top", "right"])
@@ -1399,10 +1316,19 @@ class LocusZoomPlotter:
         self._backend.set_xlabel(ax, effect_label)
         self._backend.set_ylim(ax, -0.5, len(df) - 0.5)
-        # Set y-tick labels to study names (matplotlib only)
-        if self.backend_name == "matplotlib":
-            ax.set_yticks(df["y_pos"])
-            ax.set_yticklabels(df[study_col], fontsize=10)
+        # Ensure x-axis includes the null value with some padding
+        x_min = min(df[ci_lower_col].min(), null_value)
+        x_max = max(df[ci_upper_col].max(), null_value)
+        x_padding = (x_max - x_min) * 0.1
+        self._backend.set_xlim(ax, x_min - x_padding, x_max + x_padding)
+        # Set y-tick labels to study names
+        self._backend.set_yticks(
+            ax,
+            positions=df["y_pos"].tolist(),
+            labels=df[study_col].tolist(),
+            fontsize=10,
+        )
         self._backend.set_title(ax, f"Forest Plot: {variant_id}")
         self._backend.hide_spines(ax, ["top", "right"])

pylocuszoom/recombination.py CHANGED Viewed

@@ -18,6 +18,7 @@ from matplotlib.axes import Axes
 from tqdm import tqdm
 from .logging import logger
+from .utils import filter_by_region
 # Recombination overlay color
 RECOMB_COLOR = "#7FCDFF"  # Light blue
@@ -252,10 +253,20 @@ def download_canine_recombination_maps(
         logger.debug(f"Downloaded {tar_path.stat().st_size / 1024:.1f} KB")
-        # Extract tar.gz
+        # Extract tar.gz with path traversal protection
         logger.debug("Extracting genetic maps...")
         with tarfile.open(tar_path, "r:gz") as tar:
-            tar.extractall(tmpdir)
+            # Filter to prevent path traversal attacks
+            safe_members = []
+            for member in tar.getmembers():
+                # Resolve the path and ensure it stays within tmpdir
+                member_path = Path(tmpdir) / member.name
+                try:
+                    member_path.resolve().relative_to(Path(tmpdir).resolve())
+                    safe_members.append(member)
+                except ValueError:
+                    logger.warning(f"Skipping unsafe path in archive: {member.name}")
+            tar.extractall(tmpdir, members=safe_members)
         # Find and process the extracted files
         extracted_dir = Path(tmpdir)
@@ -374,7 +385,12 @@ def get_recombination_rate_for_region(
         )
     # Filter to region
-    region_df = df[(df["pos"] >= start) & (df["pos"] <= end)].copy()
+    region_df = filter_by_region(
+        df,
+        region=(chrom, start, end),
+        chrom_col="",  # Recomb maps don't have chromosome column
+        pos_col="pos",
+    )
     return region_df[["pos", "rate"]]

pylocuszoom/utils.py CHANGED Viewed

@@ -106,6 +106,58 @@ def normalize_chrom(chrom: Union[int, str]) -> str:
     return str(chrom).replace("chr", "")
+def filter_by_region(
+    df: pd.DataFrame,
+    region: tuple,
+    chrom_col: str = "chrom",
+    pos_col: str = "pos",
+) -> pd.DataFrame:
+    """Filter DataFrame to genomic region with inclusive bounds.
+    Filters rows where position is within [start, end] (inclusive).
+    If chrom_col exists in DataFrame, also filters by chromosome.
+    Chromosome comparison normalizes types (int/str, chr prefix).
+    Args:
+        df: DataFrame to filter.
+        region: Tuple of (chrom, start, end) defining the region.
+        chrom_col: Column name for chromosome (default: "chrom").
+            If column doesn't exist, filters by position only.
+        pos_col: Column name for position (default: "pos").
+    Returns:
+        Filtered DataFrame (copy, not view).
+    Raises:
+        KeyError: If pos_col is not found in DataFrame.
+    Example:
+        >>> filtered = filter_by_region(df, region=(1, 1000000, 2000000))
+        >>> filtered = filter_by_region(df, region=("chr1", 1e6, 2e6), pos_col="position")
+    """
+    chrom, start, end = region
+    # Validate position column exists
+    if pos_col not in df.columns:
+        raise KeyError(
+            f"Position column '{pos_col}' not found in DataFrame. "
+            f"Available columns: {list(df.columns)}"
+        )
+    # Position filtering (inclusive bounds)
+    mask = (df[pos_col] >= start) & (df[pos_col] <= end)
+    # Chromosome filtering (if column exists)
+    if chrom_col in df.columns:
+        chrom_normalized = normalize_chrom(chrom)
+        df_chrom_normalized = (
+            df[chrom_col].astype(str).str.replace("chr", "", regex=False)
+        )
+        mask = mask & (df_chrom_normalized == chrom_normalized)
+    return df[mask].copy()
 def validate_dataframe(
     df: pd.DataFrame,
     required_cols: List[str],

pylocuszoom 0.6.0__py3-none-any.whl → 0.8.0__py3-none-any.whl

pylocuszoom 0.6.0py3-none-any.whl → 0.8.0py3-none-any.whl