pylocuszoom 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pylocuszoom/__init__.py CHANGED
@@ -3,20 +3,21 @@
3
3
  This package provides LocusZoom-style regional association plots with:
4
4
  - LD coloring based on R² with lead variant
5
5
  - Gene and exon tracks
6
- - Recombination rate overlays (dog built-in, or user-provided)
6
+ - Recombination rate overlays (canine built-in, or user-provided)
7
7
  - Automatic SNP labeling
8
8
  - Multiple backends: matplotlib (static), plotly (interactive), bokeh (dashboards)
9
9
  - eQTL overlay support
10
+ - Fine-mapping/SuSiE visualization (PIP line with credible set coloring)
10
11
  - PySpark DataFrame support for large-scale data
11
12
 
12
13
  Example:
13
14
  >>> from pylocuszoom import LocusZoomPlotter
14
- >>> plotter = LocusZoomPlotter(species="dog")
15
+ >>> plotter = LocusZoomPlotter(species="canine")
15
16
  >>> fig = plotter.plot(gwas_df, chrom=1, start=1000000, end=2000000)
16
17
  >>> fig.savefig("regional_plot.png", dpi=150)
17
18
 
18
19
  Interactive example:
19
- >>> plotter = LocusZoomPlotter(species="dog", backend="plotly")
20
+ >>> plotter = LocusZoomPlotter(species="canine", backend="plotly")
20
21
  >>> fig = plotter.plot(gwas_df, chrom=1, start=1000000, end=2000000)
21
22
  >>> fig.write_html("regional_plot.html")
22
23
 
@@ -28,22 +29,42 @@ Stacked plots:
28
29
  ... )
29
30
 
30
31
  Species Support:
31
- - Dog (Canis lupus familiaris): Full features including built-in recombination maps
32
- - Cat (Felis catus): LD coloring and gene tracks (user provides recombination data)
32
+ - Canine (Canis lupus familiaris): Full features including built-in recombination maps
33
+ - Feline (Felis catus): LD coloring and gene tracks (user provides recombination data)
33
34
  - Custom: User provides all reference data
34
35
  """
35
36
 
36
37
  __version__ = "0.1.0"
37
38
 
38
39
  # Main plotter class
39
- from .plotter import LocusZoomPlotter
40
-
41
40
  # Backend types
42
41
  from .backends import BackendType, get_backend
43
42
 
44
43
  # Colors and LD
45
44
  from .colors import LEAD_SNP_COLOR, get_ld_bin, get_ld_color, get_ld_color_palette
46
45
 
46
+ # eQTL support
47
+ from .eqtl import (
48
+ EQTLValidationError,
49
+ calculate_colocalization_overlap,
50
+ filter_eqtl_by_gene,
51
+ filter_eqtl_by_region,
52
+ get_eqtl_genes,
53
+ prepare_eqtl_for_plotting,
54
+ validate_eqtl_df,
55
+ )
56
+
57
+ # Fine-mapping/SuSiE support
58
+ from .finemapping import (
59
+ FinemappingValidationError,
60
+ filter_by_credible_set,
61
+ filter_finemapping_by_region,
62
+ get_credible_sets,
63
+ get_top_pip_variants,
64
+ prepare_finemapping_for_plotting,
65
+ validate_finemapping_df,
66
+ )
67
+
47
68
  # Gene track
48
69
  from .gene_track import get_nearest_gene, plot_gene_track
49
70
 
@@ -55,26 +76,16 @@ from .ld import calculate_ld
55
76
 
56
77
  # Logging configuration
57
78
  from .logging import disable_logging, enable_logging
79
+ from .plotter import LocusZoomPlotter
58
80
 
59
81
  # Reference data management
60
82
  from .recombination import (
61
83
  add_recombination_overlay,
62
- download_dog_recombination_maps,
84
+ download_canine_recombination_maps,
63
85
  get_recombination_rate_for_region,
64
86
  load_recombination_map,
65
87
  )
66
88
 
67
- # eQTL support
68
- from .eqtl import (
69
- EQTLValidationError,
70
- calculate_colocalization_overlap,
71
- filter_eqtl_by_gene,
72
- filter_eqtl_by_region,
73
- get_eqtl_genes,
74
- prepare_eqtl_for_plotting,
75
- validate_eqtl_df,
76
- )
77
-
78
89
  # Validation utilities
79
90
  from .utils import ValidationError, to_pandas
80
91
 
@@ -86,7 +97,7 @@ __all__ = [
86
97
  "BackendType",
87
98
  "get_backend",
88
99
  # Reference data
89
- "download_dog_recombination_maps",
100
+ "download_canine_recombination_maps",
90
101
  # Colors
91
102
  "get_ld_color",
92
103
  "get_ld_bin",
@@ -111,6 +122,14 @@ __all__ = [
111
122
  "get_eqtl_genes",
112
123
  "calculate_colocalization_overlap",
113
124
  "EQTLValidationError",
125
+ # Fine-mapping/SuSiE
126
+ "validate_finemapping_df",
127
+ "filter_finemapping_by_region",
128
+ "filter_by_credible_set",
129
+ "get_credible_sets",
130
+ "get_top_pip_variants",
131
+ "prepare_finemapping_for_plotting",
132
+ "FinemappingValidationError",
114
133
  # Logging
115
134
  "enable_logging",
116
135
  "disable_logging",
@@ -3,15 +3,11 @@
3
3
  Supports matplotlib (default), plotly, and bokeh backends.
4
4
  """
5
5
 
6
- from typing import TYPE_CHECKING, Literal
6
+ from typing import Literal
7
7
 
8
8
  from .base import PlotBackend
9
9
  from .matplotlib_backend import MatplotlibBackend
10
10
 
11
- if TYPE_CHECKING:
12
- from .bokeh_backend import BokehBackend
13
- from .plotly_backend import PlotlyBackend
14
-
15
11
  BackendType = Literal["matplotlib", "plotly", "bokeh"]
16
12
 
17
13
  _BACKENDS: dict[str, type[PlotBackend]] = {
@@ -3,7 +3,7 @@
3
3
  Defines the interface that matplotlib, plotly, and bokeh backends must implement.
4
4
  """
5
5
 
6
- from typing import Any, Dict, List, Optional, Protocol, Tuple, Union
6
+ from typing import Any, List, Optional, Protocol, Tuple, Union
7
7
 
8
8
  import pandas as pd
9
9
 
@@ -8,7 +8,7 @@ from typing import Any, List, Optional, Tuple, Union
8
8
  import pandas as pd
9
9
  from bokeh.io import export_png, export_svgs, output_file, save, show
10
10
  from bokeh.layouts import column
11
- from bokeh.models import ColumnDataSource, HoverTool, Legend, LegendItem, Span
11
+ from bokeh.models import ColumnDataSource, HoverTool, Span
12
12
  from bokeh.plotting import figure
13
13
 
14
14
 
@@ -108,10 +108,10 @@ class BokehBackend:
108
108
 
109
109
  # Handle sizes (convert from area to diameter)
110
110
  if isinstance(sizes, (int, float)):
111
- bokeh_size = max(6, sizes ** 0.5)
111
+ bokeh_size = max(6, sizes**0.5)
112
112
  data["size"] = [bokeh_size] * len(x)
113
113
  else:
114
- data["size"] = [max(6, s ** 0.5) for s in sizes]
114
+ data["size"] = [max(6, s**0.5) for s in sizes]
115
115
 
116
116
  # Add hover data
117
117
  tooltips = []
@@ -289,7 +289,6 @@ class BokehBackend:
289
289
  zorder: int = 2,
290
290
  ) -> Any:
291
291
  """Add a rectangle to the figure."""
292
- from bokeh.models import Rect
293
292
 
294
293
  x_center = xy[0] + width / 2
295
294
  y_center = xy[1] + height / 2
@@ -389,9 +388,7 @@ class BokehBackend:
389
388
  # For now, assume values are already in bp and need /1e6
390
389
  from bokeh.models import FuncTickFormatter
391
390
 
392
- ax.xaxis.formatter = FuncTickFormatter(
393
- code="return (tick / 1e6).toFixed(2);"
394
- )
391
+ ax.xaxis.formatter = FuncTickFormatter(code="return (tick / 1e6).toFixed(2);")
395
392
 
396
393
  def save(
397
394
  self,
@@ -205,7 +205,12 @@ class MatplotlibBackend:
205
205
 
206
206
  def set_title(self, ax: Axes, title: str, fontsize: int = 14) -> None:
207
207
  """Set panel title."""
208
- ax.set_title(title, fontsize=fontsize)
208
+ ax.set_title(
209
+ title,
210
+ fontsize=fontsize,
211
+ fontweight="bold",
212
+ fontfamily="sans-serif",
213
+ )
209
214
 
210
215
  def create_twin_axis(self, ax: Axes) -> Axes:
211
216
  """Create a secondary y-axis sharing the same x-axis."""
@@ -100,9 +100,9 @@ class PlotlyBackend:
100
100
 
101
101
  # Convert size (matplotlib uses area, plotly uses diameter)
102
102
  if isinstance(sizes, (int, float)):
103
- size = max(6, sizes ** 0.5) # Approximate conversion
103
+ size = max(6, sizes**0.5) # Approximate conversion
104
104
  else:
105
- size = [max(6, s ** 0.5) for s in sizes]
105
+ size = [max(6, s**0.5) for s in sizes]
106
106
 
107
107
  # Build hover template
108
108
  if hover_data is not None:
@@ -317,7 +317,9 @@ class PlotlyBackend:
317
317
  """Set x-axis label."""
318
318
  fig, row = ax
319
319
  xaxis = f"xaxis{row}" if row > 1 else "xaxis"
320
- fig.update_layout(**{xaxis: dict(title=dict(text=label, font=dict(size=fontsize)))})
320
+ fig.update_layout(
321
+ **{xaxis: dict(title=dict(text=label, font=dict(size=fontsize)))}
322
+ )
321
323
 
322
324
  def set_ylabel(
323
325
  self, ax: Tuple[go.Figure, int], label: str, fontsize: int = 12
@@ -325,7 +327,9 @@ class PlotlyBackend:
325
327
  """Set y-axis label."""
326
328
  fig, row = ax
327
329
  yaxis = f"yaxis{row}" if row > 1 else "yaxis"
328
- fig.update_layout(**{yaxis: dict(title=dict(text=label, font=dict(size=fontsize)))})
330
+ fig.update_layout(
331
+ **{yaxis: dict(title=dict(text=label, font=dict(size=fontsize)))}
332
+ )
329
333
 
330
334
  def set_title(
331
335
  self, ax: Tuple[go.Figure, int], title: str, fontsize: int = 14
@@ -395,14 +399,9 @@ class PlotlyBackend:
395
399
 
396
400
  Plotly doesn't have spines, but we can hide axis lines.
397
401
  """
398
- fig, row = ax
399
-
400
- xaxis = f"xaxis{row}" if row > 1 else "xaxis"
401
- yaxis = f"yaxis{row}" if row > 1 else "yaxis"
402
-
403
- if "top" in spines or "right" in spines:
404
- # Plotly's template "plotly_white" already hides these
405
- pass
402
+ # Plotly's template "plotly_white" already hides top/right lines
403
+ # No action needed - method exists for API compatibility
404
+ pass
406
405
 
407
406
  def format_xaxis_mb(self, ax: Tuple[go.Figure, int]) -> None:
408
407
  """Format x-axis to show megabase values."""
pylocuszoom/colors.py CHANGED
@@ -29,6 +29,101 @@ LD_NA_LABEL = "NA"
29
29
  # Lead SNP color (purple diamond)
30
30
  LEAD_SNP_COLOR = "#7D26CD" # purple3
31
31
 
32
+ # Fine-mapping/SuSiE credible set colors
33
+ # Colors for up to 10 credible sets, matching locuszoomr style
34
+ CREDIBLE_SET_COLORS: List[str] = [
35
+ "#FF7F00", # orange (CS1)
36
+ "#1F78B4", # blue (CS2)
37
+ "#33A02C", # green (CS3)
38
+ "#E31A1C", # red (CS4)
39
+ "#6A3D9A", # purple (CS5)
40
+ "#B15928", # brown (CS6)
41
+ "#FB9A99", # pink (CS7)
42
+ "#A6CEE3", # light blue (CS8)
43
+ "#B2DF8A", # light green (CS9)
44
+ "#FDBF6F", # light orange (CS10)
45
+ ]
46
+
47
+ # PIP line color (when not showing credible sets)
48
+ PIP_LINE_COLOR = "#FF7F00" # orange
49
+
50
+ # eQTL effect size bins - matches locuszoomr color scheme
51
+ # Format: (min_threshold, max_threshold, label, color)
52
+ # Positive effects (upward triangles)
53
+ EQTL_POSITIVE_BINS: List[Tuple[float, float, str, str]] = [
54
+ (0.3, 0.4, "0.3 : 0.4", "#8B1A1A"), # dark red/maroon
55
+ (0.2, 0.3, "0.2 : 0.3", "#FF6600"), # orange
56
+ (0.1, 0.2, "0.1 : 0.2", "#FFB347"), # light orange
57
+ ]
58
+ # Negative effects (downward triangles)
59
+ EQTL_NEGATIVE_BINS: List[Tuple[float, float, str, str]] = [
60
+ (-0.2, -0.1, "-0.2 : -0.1", "#66CDAA"), # medium aquamarine
61
+ (-0.3, -0.2, "-0.3 : -0.2", "#4682B4"), # steel blue
62
+ (-0.4, -0.3, "-0.4 : -0.3", "#00008B"), # dark blue
63
+ ]
64
+
65
+
66
+ def get_eqtl_color(effect: Optional[float]) -> str:
67
+ """Get color based on eQTL effect size.
68
+
69
+ Args:
70
+ effect: Effect size (beta coefficient).
71
+
72
+ Returns:
73
+ Hex color code string.
74
+ """
75
+ if _is_missing(effect):
76
+ return LD_NA_COLOR
77
+
78
+ if effect >= 0:
79
+ for min_t, max_t, _, color in EQTL_POSITIVE_BINS:
80
+ if min_t <= effect < max_t or (max_t == 0.4 and effect >= max_t):
81
+ return color
82
+ return EQTL_POSITIVE_BINS[-1][3] # smallest positive bin
83
+ else:
84
+ for min_t, max_t, _, color in EQTL_NEGATIVE_BINS:
85
+ if min_t < effect <= max_t or (min_t == -0.4 and effect <= min_t):
86
+ return color
87
+ return EQTL_NEGATIVE_BINS[-1][3] # smallest negative bin
88
+
89
+
90
+ def get_eqtl_bin(effect: Optional[float]) -> str:
91
+ """Get eQTL effect bin label.
92
+
93
+ Args:
94
+ effect: Effect size (beta coefficient).
95
+
96
+ Returns:
97
+ Bin label string.
98
+ """
99
+ if _is_missing(effect):
100
+ return LD_NA_LABEL
101
+
102
+ if effect >= 0:
103
+ for min_t, max_t, label, _ in EQTL_POSITIVE_BINS:
104
+ if min_t <= effect < max_t or (max_t == 0.4 and effect >= max_t):
105
+ return label
106
+ return EQTL_POSITIVE_BINS[-1][2]
107
+ else:
108
+ for min_t, max_t, label, _ in EQTL_NEGATIVE_BINS:
109
+ if min_t < effect <= max_t or (min_t == -0.4 and effect <= min_t):
110
+ return label
111
+ return EQTL_NEGATIVE_BINS[-1][2]
112
+
113
+
114
+ def get_eqtl_color_palette() -> dict[str, str]:
115
+ """Get color palette for eQTL effect bins.
116
+
117
+ Returns:
118
+ Dictionary mapping bin labels to hex colors.
119
+ """
120
+ palette = {}
121
+ for _, _, label, color in EQTL_POSITIVE_BINS:
122
+ palette[label] = color
123
+ for _, _, label, color in EQTL_NEGATIVE_BINS:
124
+ palette[label] = color
125
+ return palette
126
+
32
127
 
33
128
  def get_ld_color(r2: Optional[float]) -> str:
34
129
  """Get LocusZoom-style color based on LD R² value.
@@ -105,3 +200,40 @@ def get_ld_color_palette() -> dict[str, str]:
105
200
  palette = {label: color for _, label, color in LD_BINS}
106
201
  palette[LD_NA_LABEL] = LD_NA_COLOR
107
202
  return palette
203
+
204
+
205
+ def get_credible_set_color(cs_id: int) -> str:
206
+ """Get color for a credible set.
207
+
208
+ Args:
209
+ cs_id: Credible set ID (1-indexed).
210
+
211
+ Returns:
212
+ Hex color code string.
213
+
214
+ Example:
215
+ >>> get_credible_set_color(1)
216
+ '#FF7F00'
217
+ """
218
+ if cs_id < 1:
219
+ return LD_NA_COLOR
220
+ # Use modulo to cycle through colors if more than 10 credible sets
221
+ idx = (cs_id - 1) % len(CREDIBLE_SET_COLORS)
222
+ return CREDIBLE_SET_COLORS[idx]
223
+
224
+
225
+ def get_credible_set_color_palette(n_sets: int = 10) -> dict[int, str]:
226
+ """Get color palette for credible sets.
227
+
228
+ Args:
229
+ n_sets: Number of credible sets to include.
230
+
231
+ Returns:
232
+ Dictionary mapping credible set IDs (1-indexed) to hex colors.
233
+
234
+ Example:
235
+ >>> palette = get_credible_set_color_palette(3)
236
+ >>> palette[1]
237
+ '#FF7F00'
238
+ """
239
+ return {i + 1: CREDIBLE_SET_COLORS[i % len(CREDIBLE_SET_COLORS)] for i in range(n_sets)}
pylocuszoom/eqtl.py CHANGED
@@ -11,7 +11,6 @@ import pandas as pd
11
11
 
12
12
  from .logging import logger
13
13
 
14
-
15
14
  REQUIRED_EQTL_COLS = ["pos", "p_value"]
16
15
  OPTIONAL_EQTL_COLS = ["gene", "effect_size", "rs", "se"]
17
16
 
@@ -109,7 +108,9 @@ def filter_eqtl_by_region(
109
108
  mask = mask & (df_chrom == chrom_str)
110
109
 
111
110
  filtered = df[mask].copy()
112
- logger.debug(f"Filtered eQTL data to {len(filtered)} variants in region chr{chrom}:{start}-{end}")
111
+ logger.debug(
112
+ f"Filtered eQTL data to {len(filtered)} variants in region chr{chrom}:{start}-{end}"
113
+ )
113
114
  return filtered
114
115
 
115
116
 
@@ -0,0 +1,224 @@
1
+ """Fine-mapping/SuSiE data handling for pyLocusZoom.
2
+
3
+ Provides utilities for loading, validating, and preparing statistical
4
+ fine-mapping results (SuSiE, FINEMAP, etc.) for visualization.
5
+ """
6
+
7
+ from typing import List, Optional
8
+
9
+ import numpy as np
10
+ import pandas as pd
11
+
12
+ from .logging import logger
13
+
14
+ # Required columns for fine-mapping data
15
+ REQUIRED_FINEMAPPING_COLS = ["pos", "pip"]
16
+ OPTIONAL_FINEMAPPING_COLS = ["rs", "cs", "cs_id", "effect", "se"]
17
+
18
+
19
+ class FinemappingValidationError(ValueError):
20
+ """Raised when fine-mapping DataFrame validation fails."""
21
+
22
+ pass
23
+
24
+
25
+ def validate_finemapping_df(
26
+ df: pd.DataFrame,
27
+ pos_col: str = "pos",
28
+ pip_col: str = "pip",
29
+ ) -> None:
30
+ """Validate fine-mapping DataFrame has required columns.
31
+
32
+ Args:
33
+ df: Fine-mapping DataFrame to validate.
34
+ pos_col: Column name for genomic position.
35
+ pip_col: Column name for posterior inclusion probability.
36
+
37
+ Raises:
38
+ FinemappingValidationError: If required columns are missing.
39
+ """
40
+ missing = []
41
+ if pos_col not in df.columns:
42
+ missing.append(pos_col)
43
+ if pip_col not in df.columns:
44
+ missing.append(pip_col)
45
+
46
+ if missing:
47
+ raise FinemappingValidationError(
48
+ f"Fine-mapping DataFrame missing required columns: {missing}. "
49
+ f"Required: {pos_col} (position), {pip_col} (posterior inclusion probability)"
50
+ )
51
+
52
+ # Validate PIP values are in [0, 1]
53
+ if not df[pip_col].between(0, 1).all():
54
+ invalid_count = (~df[pip_col].between(0, 1)).sum()
55
+ raise FinemappingValidationError(
56
+ f"PIP values must be between 0 and 1. Found {invalid_count} invalid values."
57
+ )
58
+
59
+
60
+ def filter_finemapping_by_region(
61
+ df: pd.DataFrame,
62
+ chrom: int,
63
+ start: int,
64
+ end: int,
65
+ pos_col: str = "pos",
66
+ chrom_col: Optional[str] = "chr",
67
+ ) -> pd.DataFrame:
68
+ """Filter fine-mapping data to a genomic region.
69
+
70
+ Args:
71
+ df: Fine-mapping DataFrame.
72
+ chrom: Chromosome number.
73
+ start: Start position.
74
+ end: End position.
75
+ pos_col: Column name for position.
76
+ chrom_col: Column name for chromosome (if present).
77
+
78
+ Returns:
79
+ Filtered DataFrame containing only variants in the region.
80
+ """
81
+ mask = (df[pos_col] >= start) & (df[pos_col] <= end)
82
+
83
+ # Filter by chromosome if column exists
84
+ if chrom_col and chrom_col in df.columns:
85
+ chrom_str = str(chrom).replace("chr", "")
86
+ df_chrom = df[chrom_col].astype(str).str.replace("chr", "", regex=False)
87
+ mask = mask & (df_chrom == chrom_str)
88
+
89
+ filtered = df[mask].copy()
90
+ logger.debug(
91
+ f"Filtered fine-mapping data to {len(filtered)} variants in region "
92
+ f"chr{chrom}:{start}-{end}"
93
+ )
94
+ return filtered
95
+
96
+
97
+ def get_credible_sets(
98
+ df: pd.DataFrame,
99
+ cs_col: str = "cs",
100
+ ) -> List[int]:
101
+ """Get list of unique credible set IDs.
102
+
103
+ Args:
104
+ df: Fine-mapping DataFrame.
105
+ cs_col: Column containing credible set assignments.
106
+
107
+ Returns:
108
+ Sorted list of unique credible set IDs (excluding 0/NA).
109
+ """
110
+ if cs_col not in df.columns:
111
+ return []
112
+ # Filter out variants not in a credible set (typically cs=0 or NA)
113
+ cs_values = df[cs_col].dropna()
114
+ cs_values = cs_values[cs_values != 0]
115
+ return sorted(cs_values.unique().tolist())
116
+
117
+
118
+ def filter_by_credible_set(
119
+ df: pd.DataFrame,
120
+ cs_id: int,
121
+ cs_col: str = "cs",
122
+ ) -> pd.DataFrame:
123
+ """Filter to variants in a specific credible set.
124
+
125
+ Args:
126
+ df: Fine-mapping DataFrame.
127
+ cs_id: Credible set ID to filter for.
128
+ cs_col: Column containing credible set assignments.
129
+
130
+ Returns:
131
+ Filtered DataFrame containing only variants in the credible set.
132
+ """
133
+ if cs_col not in df.columns:
134
+ raise FinemappingValidationError(
135
+ f"Cannot filter by credible set: column '{cs_col}' not found. "
136
+ f"Available columns: {list(df.columns)}"
137
+ )
138
+ return df[df[cs_col] == cs_id].copy()
139
+
140
+
141
+ def prepare_finemapping_for_plotting(
142
+ df: pd.DataFrame,
143
+ pos_col: str = "pos",
144
+ pip_col: str = "pip",
145
+ chrom: Optional[int] = None,
146
+ start: Optional[int] = None,
147
+ end: Optional[int] = None,
148
+ ) -> pd.DataFrame:
149
+ """Prepare fine-mapping data for plotting.
150
+
151
+ Validates, filters, and sorts data for plotting as a line or scatter.
152
+
153
+ Args:
154
+ df: Raw fine-mapping DataFrame.
155
+ pos_col: Column name for position.
156
+ pip_col: Column name for PIP.
157
+ chrom: Optional chromosome for region filtering.
158
+ start: Optional start position for region filtering.
159
+ end: Optional end position for region filtering.
160
+
161
+ Returns:
162
+ Prepared DataFrame sorted by position.
163
+ """
164
+ validate_finemapping_df(df, pos_col=pos_col, pip_col=pip_col)
165
+
166
+ result = df.copy()
167
+
168
+ # Filter by region if specified
169
+ if chrom is not None and start is not None and end is not None:
170
+ result = filter_finemapping_by_region(
171
+ result, chrom, start, end, pos_col=pos_col
172
+ )
173
+
174
+ # Sort by position for line plotting
175
+ result = result.sort_values(pos_col)
176
+
177
+ return result
178
+
179
+
180
+ def get_top_pip_variants(
181
+ df: pd.DataFrame,
182
+ n: int = 5,
183
+ pip_col: str = "pip",
184
+ pip_threshold: float = 0.0,
185
+ ) -> pd.DataFrame:
186
+ """Get top variants by posterior inclusion probability.
187
+
188
+ Args:
189
+ df: Fine-mapping DataFrame.
190
+ n: Number of top variants to return.
191
+ pip_col: Column containing PIP values.
192
+ pip_threshold: Minimum PIP threshold.
193
+
194
+ Returns:
195
+ DataFrame with top N variants by PIP.
196
+ """
197
+ filtered = df[df[pip_col] >= pip_threshold]
198
+ return filtered.nlargest(n, pip_col)
199
+
200
+
201
+ def calculate_credible_set_coverage(
202
+ df: pd.DataFrame,
203
+ cs_col: str = "cs",
204
+ pip_col: str = "pip",
205
+ ) -> dict:
206
+ """Calculate cumulative PIP for each credible set.
207
+
208
+ Args:
209
+ df: Fine-mapping DataFrame.
210
+ cs_col: Column containing credible set assignments.
211
+ pip_col: Column containing PIP values.
212
+
213
+ Returns:
214
+ Dictionary mapping credible set ID to cumulative PIP.
215
+ """
216
+ if cs_col not in df.columns:
217
+ return {}
218
+
219
+ coverage = {}
220
+ for cs_id in get_credible_sets(df, cs_col):
221
+ cs_data = filter_by_credible_set(df, cs_id, cs_col)
222
+ coverage[cs_id] = cs_data[pip_col].sum()
223
+
224
+ return coverage