pylocuszoom 1.1.2__py3-none-any.whl → 1.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,490 @@
1
+ """Miami plot generator for comparing two GWAS datasets.
2
+
3
+ Provides visualization of GWAS comparisons with mirrored y-axes:
4
+ - Top panel shows -log10(p) ascending (standard Manhattan)
5
+ - Bottom panel shows -log10(p) descending (inverted y-axis)
6
+ - Both panels share x-axis with consistent chromosome alignment
7
+ """
8
+
9
+ from typing import Any, List, Optional, Tuple
10
+
11
+ import pandas as pd
12
+
13
+ from ._plotter_utils import (
14
+ DEFAULT_GENOMEWIDE_THRESHOLD,
15
+ MANHATTAN_EDGE_WIDTH,
16
+ MANHATTAN_POINT_SIZE,
17
+ POINT_EDGE_COLOR,
18
+ add_significance_line,
19
+ )
20
+ from .backends import BackendType, get_backend
21
+ from .backends.hover import HoverConfig, HoverDataBuilder
22
+ from .manhattan import prepare_manhattan_data
23
+
24
+
25
+ class MiamiPlotter:
26
+ """Miami plot generator for comparing two GWAS datasets.
27
+
28
+ Creates mirrored Manhattan plots with top panel showing -log10(p)
29
+ ascending and bottom panel showing -log10(p) descending, enabling
30
+ visual comparison of two GWAS results.
31
+
32
+ Supports multiple rendering backends:
33
+ - matplotlib (default): Static publication-quality plots
34
+ - plotly: Interactive HTML with hover tooltips
35
+ - bokeh: Interactive HTML for dashboards
36
+
37
+ Args:
38
+ species: Species name ('canine', 'feline', 'human', or None).
39
+ Used to determine chromosome order.
40
+ backend: Plotting backend ('matplotlib', 'plotly', or 'bokeh').
41
+ genomewide_threshold: P-value threshold for significance line.
42
+
43
+ Example:
44
+ >>> plotter = MiamiPlotter(species="human")
45
+ >>> fig = plotter.plot_miami(discovery_df, replication_df)
46
+ >>> fig.savefig("miami_plot.png", dpi=150)
47
+ """
48
+
49
+ def __init__(
50
+ self,
51
+ species: str = "canine",
52
+ backend: BackendType = "matplotlib",
53
+ genomewide_threshold: float = DEFAULT_GENOMEWIDE_THRESHOLD,
54
+ ):
55
+ """Initialize the Miami plotter."""
56
+ self.species = species
57
+ self._backend = get_backend(backend)
58
+ self.backend_name = backend
59
+ self.genomewide_threshold = genomewide_threshold
60
+
61
+ def plot_miami(
62
+ self,
63
+ top_df: pd.DataFrame,
64
+ bottom_df: pd.DataFrame,
65
+ chrom_col: str = "chrom",
66
+ pos_col: str = "pos",
67
+ p_col: str = "p",
68
+ rs_col: Optional[str] = None,
69
+ custom_chrom_order: Optional[List[str]] = None,
70
+ top_threshold: Optional[float] = DEFAULT_GENOMEWIDE_THRESHOLD,
71
+ bottom_threshold: Optional[float] = DEFAULT_GENOMEWIDE_THRESHOLD,
72
+ top_label: Optional[str] = None,
73
+ bottom_label: Optional[str] = None,
74
+ top_snp_annotations: Optional[List[str]] = None,
75
+ bottom_snp_annotations: Optional[List[str]] = None,
76
+ highlight_regions: Optional[List[Tuple[str, int, int]]] = None,
77
+ highlight_color: str = "yellow",
78
+ highlight_alpha: float = 0.3,
79
+ figsize: Tuple[float, float] = (12, 8),
80
+ title: Optional[str] = None,
81
+ ) -> Any:
82
+ """Create a Miami plot comparing two GWAS datasets.
83
+
84
+ The top panel displays -log10(p) values ascending (standard Manhattan),
85
+ while the bottom panel displays -log10(p) descending (inverted), creating
86
+ a mirrored comparison.
87
+
88
+ Args:
89
+ top_df: GWAS results DataFrame for top panel.
90
+ bottom_df: GWAS results DataFrame for bottom panel.
91
+ chrom_col: Column name for chromosome.
92
+ pos_col: Column name for position.
93
+ p_col: Column name for p-value.
94
+ rs_col: Column name for SNP RS ID (for hover tooltips and annotations).
95
+ custom_chrom_order: Custom chromosome order (overrides species).
96
+ top_threshold: Significance threshold for top panel. None to skip.
97
+ bottom_threshold: Significance threshold for bottom panel. None to skip.
98
+ top_label: Label for top panel (e.g., "Discovery").
99
+ bottom_label: Label for bottom panel (e.g., "Replication").
100
+ top_snp_annotations: List of SNP IDs to annotate on top panel.
101
+ Requires rs_col to be set. Basic text labels (no collision avoidance).
102
+ bottom_snp_annotations: List of SNP IDs to annotate on bottom panel.
103
+ Requires rs_col to be set. Basic text labels (no collision avoidance).
104
+ highlight_regions: List of (chrom, start, end) tuples to highlight.
105
+ Regions are drawn as vertical spans across both panels.
106
+ highlight_color: Color for highlighted regions.
107
+ highlight_alpha: Transparency for highlighted regions (0-1).
108
+ figsize: Figure size as (width, height).
109
+ title: Overall plot title.
110
+
111
+ Returns:
112
+ Figure object (type depends on backend).
113
+
114
+ Raises:
115
+ ValueError: If required columns are missing from either DataFrame.
116
+
117
+ Example:
118
+ >>> fig = plotter.plot_miami(
119
+ ... discovery_df,
120
+ ... replication_df,
121
+ ... top_label="Discovery",
122
+ ... bottom_label="Replication",
123
+ ... )
124
+ """
125
+ # Compute union of chromosomes to ensure consistent alignment
126
+ # This is critical to avoid Pitfall #3 from research
127
+ all_chroms = self._get_chromosome_union(top_df, bottom_df, chrom_col)
128
+
129
+ # Use custom order if provided, otherwise use chromosome union
130
+ if custom_chrom_order is None:
131
+ custom_chrom_order = all_chroms
132
+
133
+ # Prepare both datasets with consistent chromosome ordering
134
+ top_prepared = prepare_manhattan_data(
135
+ df=top_df,
136
+ chrom_col=chrom_col,
137
+ pos_col=pos_col,
138
+ p_col=p_col,
139
+ species=self.species,
140
+ custom_order=custom_chrom_order,
141
+ )
142
+ bottom_prepared = prepare_manhattan_data(
143
+ df=bottom_df,
144
+ chrom_col=chrom_col,
145
+ pos_col=pos_col,
146
+ p_col=p_col,
147
+ species=self.species,
148
+ custom_order=custom_chrom_order,
149
+ )
150
+
151
+ # Create figure with 2 panels
152
+ fig, axes = self._backend.create_figure(
153
+ n_panels=2,
154
+ height_ratios=[1.0, 1.0],
155
+ figsize=figsize,
156
+ sharex=True,
157
+ )
158
+ top_ax, bottom_ax = axes
159
+
160
+ # Get consistent chrom order from first prepared dataframe
161
+ chrom_order = top_prepared.attrs["chrom_order"]
162
+ chrom_centers = top_prepared.attrs["chrom_centers"]
163
+
164
+ # Plot top panel (normal y-axis)
165
+ self._render_manhattan_points(
166
+ ax=top_ax,
167
+ prepared_df=top_prepared,
168
+ chrom_order=chrom_order,
169
+ pos_col=pos_col,
170
+ p_col=p_col,
171
+ rs_col=rs_col,
172
+ )
173
+ add_significance_line(self._backend, top_ax, top_threshold)
174
+
175
+ # Plot bottom panel (inverted y-axis)
176
+ self._render_manhattan_points(
177
+ ax=bottom_ax,
178
+ prepared_df=bottom_prepared,
179
+ chrom_order=chrom_order,
180
+ pos_col=pos_col,
181
+ p_col=p_col,
182
+ rs_col=rs_col,
183
+ )
184
+ add_significance_line(self._backend, bottom_ax, bottom_threshold)
185
+
186
+ # Set x limits consistently across both panels
187
+ x_min = min(
188
+ top_prepared["_cumulative_pos"].min(),
189
+ bottom_prepared["_cumulative_pos"].min(),
190
+ )
191
+ x_max = max(
192
+ top_prepared["_cumulative_pos"].max(),
193
+ bottom_prepared["_cumulative_pos"].max(),
194
+ )
195
+ x_padding = (x_max - x_min) * 0.01
196
+ self._backend.set_xlim(top_ax, x_min - x_padding, x_max + x_padding)
197
+ self._backend.set_xlim(bottom_ax, x_min - x_padding, x_max + x_padding)
198
+
199
+ # Set y limits - critical for Miami plot
200
+ top_y_max = top_prepared["_neg_log_p"].max() * 1.1
201
+ bottom_y_max = bottom_prepared["_neg_log_p"].max() * 1.1
202
+
203
+ # Top panel: normal y-axis (0 at bottom, max at top)
204
+ self._backend.set_ylim(top_ax, 0, top_y_max)
205
+
206
+ # Bottom panel: inverted y-axis (max at bottom, 0 at top)
207
+ # For matplotlib, passing (max, 0) inverts the axis
208
+ self._backend.set_ylim(bottom_ax, bottom_y_max, 0)
209
+
210
+ # Set x-axis ticks for chromosome labels (needed for both panels in interactive backends)
211
+ valid_chroms = [c for c in chrom_order if c in chrom_centers]
212
+ positions = [chrom_centers[c] for c in valid_chroms]
213
+ labels = [str(c) for c in valid_chroms]
214
+ for ax in [top_ax, bottom_ax]:
215
+ self._backend.set_xticks(ax, positions, labels, fontsize=8)
216
+
217
+ # Y-axis labels
218
+ self._backend.set_ylabel(top_ax, r"$-\log_{10}(p)$", fontsize=12)
219
+ self._backend.set_ylabel(bottom_ax, r"$-\log_{10}(p)$", fontsize=12)
220
+
221
+ # X-axis label only on bottom panel
222
+ self._backend.set_xlabel(bottom_ax, "Chromosome", fontsize=12)
223
+
224
+ # Panel labels - top at top, bottom at bottom for Miami plot layout
225
+ if top_label:
226
+ self._backend.add_panel_label(top_ax, top_label, y_frac=0.95)
227
+ if bottom_label:
228
+ # For Miami plots, bottom panel label should be at the bottom of the panel
229
+ self._backend.add_panel_label(bottom_ax, bottom_label, y_frac=0.05)
230
+
231
+ # SNP annotations
232
+ if top_snp_annotations and rs_col:
233
+ self._add_snp_annotations(
234
+ ax=top_ax,
235
+ prepared_df=top_prepared,
236
+ rs_col=rs_col,
237
+ snp_ids=top_snp_annotations,
238
+ )
239
+ if bottom_snp_annotations and rs_col:
240
+ self._add_snp_annotations(
241
+ ax=bottom_ax,
242
+ prepared_df=bottom_prepared,
243
+ rs_col=rs_col,
244
+ snp_ids=bottom_snp_annotations,
245
+ )
246
+
247
+ # Region highlighting
248
+ if highlight_regions:
249
+ # Calculate chromosome offsets for position conversion
250
+ chrom_offsets = self._get_chrom_offsets(top_prepared, pos_col)
251
+ for chrom, start, end in highlight_regions:
252
+ self._draw_region_highlight(
253
+ fig=fig,
254
+ top_ax=top_ax,
255
+ bottom_ax=bottom_ax,
256
+ chrom=str(chrom),
257
+ start=start,
258
+ end=end,
259
+ chrom_offsets=chrom_offsets,
260
+ color=highlight_color,
261
+ alpha=highlight_alpha,
262
+ )
263
+
264
+ # Hide spines for clean appearance
265
+ self._backend.hide_spines(top_ax, ["top", "right"])
266
+ self._backend.hide_spines(bottom_ax, ["top", "right"])
267
+
268
+ # Overall title
269
+ if title:
270
+ self._backend.set_suptitle(fig, title, fontsize=14)
271
+ self._backend.finalize_layout(fig, top=0.92, hspace=0.05)
272
+ else:
273
+ self._backend.finalize_layout(fig, hspace=0.05)
274
+
275
+ return fig
276
+
277
+ def _get_chromosome_union(
278
+ self,
279
+ top_df: pd.DataFrame,
280
+ bottom_df: pd.DataFrame,
281
+ chrom_col: str,
282
+ ) -> List[str]:
283
+ """Get union of chromosomes from both DataFrames.
284
+
285
+ Ensures consistent chromosome ordering across both panels,
286
+ which is critical for x-axis alignment in Miami plots.
287
+
288
+ Args:
289
+ top_df: Top panel DataFrame.
290
+ bottom_df: Bottom panel DataFrame.
291
+ chrom_col: Chromosome column name.
292
+
293
+ Returns:
294
+ Sorted list of all unique chromosomes.
295
+ """
296
+ top_chroms = set(top_df[chrom_col].astype(str).unique())
297
+ bottom_chroms = set(bottom_df[chrom_col].astype(str).unique())
298
+ all_chroms = top_chroms | bottom_chroms
299
+
300
+ # Sort chromosomes: numeric first (by value), then alphabetic
301
+ def sort_key(chrom: str) -> tuple:
302
+ try:
303
+ return (0, int(chrom), "")
304
+ except ValueError:
305
+ return (1, 0, chrom)
306
+
307
+ return sorted(all_chroms, key=sort_key)
308
+
309
+ def _render_manhattan_points(
310
+ self,
311
+ ax: Any,
312
+ prepared_df: pd.DataFrame,
313
+ chrom_order: List[str],
314
+ pos_col: str,
315
+ p_col: str,
316
+ rs_col: Optional[str] = None,
317
+ point_size: int = MANHATTAN_POINT_SIZE,
318
+ ) -> None:
319
+ """Render Manhattan plot scatter points grouped by chromosome.
320
+
321
+ Args:
322
+ ax: Axes object from backend.
323
+ prepared_df: DataFrame with _chrom_str, _cumulative_pos, _neg_log_p, _color.
324
+ chrom_order: List of chromosome names in display order.
325
+ pos_col: Original position column name.
326
+ p_col: Original p-value column name.
327
+ rs_col: RS ID column name for hover data.
328
+ point_size: Size of scatter points.
329
+ """
330
+ for chrom in chrom_order:
331
+ chrom_data = prepared_df[prepared_df["_chrom_str"] == chrom]
332
+ if len(chrom_data) > 0:
333
+ # Build hover data for interactive backends
334
+ hover_df = None
335
+ if self._backend.supports_hover and rs_col is not None:
336
+ hover_config = HoverConfig(
337
+ snp_col=rs_col,
338
+ pos_col=pos_col,
339
+ p_col=p_col,
340
+ )
341
+ builder = HoverDataBuilder(hover_config)
342
+ hover_df = builder.build_dataframe(chrom_data)
343
+
344
+ self._backend.scatter(
345
+ ax,
346
+ chrom_data["_cumulative_pos"],
347
+ chrom_data["_neg_log_p"],
348
+ colors=chrom_data["_color"].iloc[0],
349
+ sizes=point_size,
350
+ marker="o",
351
+ edgecolor=POINT_EDGE_COLOR,
352
+ linewidth=MANHATTAN_EDGE_WIDTH,
353
+ zorder=2,
354
+ hover_data=hover_df,
355
+ )
356
+
357
+ def _add_snp_annotations(
358
+ self,
359
+ ax: Any,
360
+ prepared_df: pd.DataFrame,
361
+ rs_col: str,
362
+ snp_ids: List[str],
363
+ ) -> None:
364
+ """Add text annotations for specified SNPs.
365
+
366
+ Basic annotation without collision avoidance. For matplotlib,
367
+ use add_snp_labels() in LocusZoomPlotter for adjustText support.
368
+
369
+ Args:
370
+ ax: Axes object from backend.
371
+ prepared_df: DataFrame with _cumulative_pos, _neg_log_p, and RS column.
372
+ rs_col: Column name containing SNP IDs.
373
+ snp_ids: List of SNP IDs to annotate.
374
+ """
375
+ # Filter to only requested SNPs
376
+ snps_to_annotate = prepared_df[prepared_df[rs_col].isin(snp_ids)]
377
+
378
+ for _, row in snps_to_annotate.iterrows():
379
+ x = row["_cumulative_pos"]
380
+ y = row["_neg_log_p"]
381
+ label = row[rs_col]
382
+
383
+ # Add text slightly above the point
384
+ self._backend.add_text(
385
+ ax,
386
+ x=x,
387
+ y=y,
388
+ text=str(label),
389
+ fontsize=8,
390
+ ha="center",
391
+ va="bottom",
392
+ )
393
+
394
+ def _get_chrom_offsets(
395
+ self, prepared_df: pd.DataFrame, pos_col: str
396
+ ) -> dict[str, float]:
397
+ """Calculate cumulative position offset for each chromosome.
398
+
399
+ The offset is the difference between cumulative position and original
400
+ position for the first SNP on each chromosome.
401
+
402
+ Args:
403
+ prepared_df: DataFrame with _chrom_str, _cumulative_pos columns.
404
+ pos_col: Original position column name.
405
+
406
+ Returns:
407
+ Dict mapping chromosome string to its cumulative offset.
408
+ """
409
+ offsets = {}
410
+ for chrom in prepared_df.attrs.get("chrom_order", []):
411
+ chrom_data = prepared_df[prepared_df["_chrom_str"] == str(chrom)]
412
+ if not chrom_data.empty:
413
+ first_row = chrom_data.iloc[0]
414
+ offsets[str(chrom)] = first_row["_cumulative_pos"] - first_row[pos_col]
415
+ return offsets
416
+
417
+ def _draw_region_highlight(
418
+ self,
419
+ fig: Any,
420
+ top_ax: Any,
421
+ bottom_ax: Any,
422
+ chrom: str,
423
+ start: int,
424
+ end: int,
425
+ chrom_offsets: dict[str, float],
426
+ color: str,
427
+ alpha: float,
428
+ ) -> None:
429
+ """Draw highlighted region across both panels.
430
+
431
+ Uses backend-specific implementations since region highlighting
432
+ is not part of the PlotBackend protocol.
433
+
434
+ Args:
435
+ fig: Figure object from backend.
436
+ top_ax: Top panel axes.
437
+ bottom_ax: Bottom panel axes.
438
+ chrom: Chromosome name (as string).
439
+ start: Region start position (bp).
440
+ end: Region end position (bp).
441
+ chrom_offsets: Dict mapping chromosome to cumulative offset.
442
+ color: Highlight color.
443
+ alpha: Highlight transparency.
444
+ """
445
+ if chrom not in chrom_offsets:
446
+ return # Chromosome not in data
447
+
448
+ offset = chrom_offsets[chrom]
449
+ x_start = offset + start
450
+ x_end = offset + end
451
+
452
+ if self.backend_name == "matplotlib":
453
+ top_ax.axvspan(x_start, x_end, color=color, alpha=alpha, zorder=0)
454
+ bottom_ax.axvspan(x_start, x_end, color=color, alpha=alpha, zorder=0)
455
+
456
+ elif self.backend_name == "plotly":
457
+ # For plotly, fig is the Figure and axes are (fig, row) tuples
458
+ fig.add_vrect(
459
+ x0=x_start,
460
+ x1=x_end,
461
+ fillcolor=color,
462
+ opacity=alpha,
463
+ layer="below",
464
+ line_width=0,
465
+ row=1,
466
+ col=1,
467
+ )
468
+ fig.add_vrect(
469
+ x0=x_start,
470
+ x1=x_end,
471
+ fillcolor=color,
472
+ opacity=alpha,
473
+ layer="below",
474
+ line_width=0,
475
+ row=2,
476
+ col=1,
477
+ )
478
+
479
+ elif self.backend_name == "bokeh":
480
+ from bokeh.models import BoxAnnotation
481
+
482
+ # For bokeh, axes are figure objects
483
+ for ax in [top_ax, bottom_ax]:
484
+ box = BoxAnnotation(
485
+ left=x_start,
486
+ right=x_end,
487
+ fill_color=color,
488
+ fill_alpha=alpha,
489
+ )
490
+ ax.add_layout(box)