pylocuszoom 0.2.0__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pylocuszoom/__init__.py +52 -1
- pylocuszoom/backends/base.py +47 -0
- pylocuszoom/backends/bokeh_backend.py +323 -61
- pylocuszoom/backends/matplotlib_backend.py +133 -7
- pylocuszoom/backends/plotly_backend.py +423 -33
- pylocuszoom/colors.py +3 -1
- pylocuszoom/finemapping.py +0 -1
- pylocuszoom/gene_track.py +232 -23
- pylocuszoom/loaders.py +862 -0
- pylocuszoom/plotter.py +354 -245
- pylocuszoom/py.typed +0 -0
- pylocuszoom/recombination.py +4 -4
- pylocuszoom/schemas.py +395 -0
- {pylocuszoom-0.2.0.dist-info → pylocuszoom-0.5.0.dist-info}/METADATA +125 -31
- pylocuszoom-0.5.0.dist-info/RECORD +24 -0
- pylocuszoom-0.2.0.dist-info/RECORD +0 -21
- {pylocuszoom-0.2.0.dist-info → pylocuszoom-0.5.0.dist-info}/WHEEL +0 -0
- {pylocuszoom-0.2.0.dist-info → pylocuszoom-0.5.0.dist-info}/licenses/LICENSE.md +0 -0
pylocuszoom/plotter.py
CHANGED
|
@@ -19,7 +19,6 @@ from matplotlib.axes import Axes
|
|
|
19
19
|
from matplotlib.figure import Figure
|
|
20
20
|
from matplotlib.lines import Line2D
|
|
21
21
|
from matplotlib.patches import Patch
|
|
22
|
-
from matplotlib.ticker import FuncFormatter, MaxNLocator
|
|
23
22
|
|
|
24
23
|
from .backends import BackendType, get_backend
|
|
25
24
|
from .colors import (
|
|
@@ -38,11 +37,16 @@ from .finemapping import (
|
|
|
38
37
|
get_credible_sets,
|
|
39
38
|
prepare_finemapping_for_plotting,
|
|
40
39
|
)
|
|
41
|
-
from .gene_track import
|
|
40
|
+
from .gene_track import (
|
|
41
|
+
assign_gene_positions,
|
|
42
|
+
plot_gene_track,
|
|
43
|
+
plot_gene_track_generic,
|
|
44
|
+
)
|
|
42
45
|
from .labels import add_snp_labels
|
|
43
46
|
from .ld import calculate_ld, find_plink
|
|
44
47
|
from .logging import enable_logging, logger
|
|
45
48
|
from .recombination import (
|
|
49
|
+
RECOMB_COLOR,
|
|
46
50
|
add_recombination_overlay,
|
|
47
51
|
download_canine_recombination_maps,
|
|
48
52
|
get_default_data_dir,
|
|
@@ -50,8 +54,8 @@ from .recombination import (
|
|
|
50
54
|
)
|
|
51
55
|
from .utils import normalize_chrom, validate_genes_df, validate_gwas_df
|
|
52
56
|
|
|
53
|
-
# Default significance threshold: 5e-8
|
|
54
|
-
DEFAULT_GENOMEWIDE_THRESHOLD = 5e-
|
|
57
|
+
# Default significance threshold: 5e-8 (genome-wide significance)
|
|
58
|
+
DEFAULT_GENOMEWIDE_THRESHOLD = 5e-8
|
|
55
59
|
DEFAULT_GENOMEWIDE_LINE = -np.log10(DEFAULT_GENOMEWIDE_THRESHOLD)
|
|
56
60
|
|
|
57
61
|
|
|
@@ -135,11 +139,8 @@ class LocusZoomPlotter:
|
|
|
135
139
|
@staticmethod
|
|
136
140
|
def _default_build(species: str) -> Optional[str]:
|
|
137
141
|
"""Get default genome build for species."""
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
if species == "feline":
|
|
141
|
-
return "felCat9"
|
|
142
|
-
return None
|
|
142
|
+
builds = {"canine": "canfam3.1", "feline": "felCat9"}
|
|
143
|
+
return builds.get(species)
|
|
143
144
|
|
|
144
145
|
def _ensure_recomb_maps(self) -> Optional[Path]:
|
|
145
146
|
"""Ensure recombination maps are downloaded.
|
|
@@ -153,8 +154,8 @@ class LocusZoomPlotter:
|
|
|
153
154
|
default_dir = get_default_data_dir()
|
|
154
155
|
if (
|
|
155
156
|
default_dir.exists()
|
|
156
|
-
and len(list(default_dir.glob("chr*_recomb.tsv"))) >=
|
|
157
|
-
):
|
|
157
|
+
and len(list(default_dir.glob("chr*_recomb.tsv"))) >= 39
|
|
158
|
+
): # 38 autosomes + X
|
|
158
159
|
return default_dir
|
|
159
160
|
# Download
|
|
160
161
|
try:
|
|
@@ -211,7 +212,7 @@ class LocusZoomPlotter:
|
|
|
211
212
|
p_col: str = "p_wald",
|
|
212
213
|
rs_col: str = "rs",
|
|
213
214
|
figsize: Tuple[int, int] = (12, 8),
|
|
214
|
-
) ->
|
|
215
|
+
) -> Any:
|
|
215
216
|
"""Create a regional association plot.
|
|
216
217
|
|
|
217
218
|
Args:
|
|
@@ -288,62 +289,70 @@ class LocusZoomPlotter:
|
|
|
288
289
|
fig, ax, gene_ax = self._create_figure(genes_df, chrom, start, end, figsize)
|
|
289
290
|
|
|
290
291
|
# Plot association data
|
|
291
|
-
self._plot_association(ax, df, pos_col, ld_col, lead_pos)
|
|
292
|
+
self._plot_association(ax, df, pos_col, ld_col, lead_pos, rs_col, p_col)
|
|
292
293
|
|
|
293
294
|
# Add significance line
|
|
294
|
-
|
|
295
|
+
self._backend.axhline(
|
|
296
|
+
ax,
|
|
295
297
|
y=self._genomewide_line,
|
|
296
298
|
color="red",
|
|
297
|
-
linestyle=
|
|
299
|
+
linestyle="--",
|
|
298
300
|
linewidth=1,
|
|
299
|
-
alpha=0.
|
|
301
|
+
alpha=0.65,
|
|
300
302
|
zorder=1,
|
|
301
303
|
)
|
|
302
304
|
|
|
303
|
-
# Add SNP labels
|
|
305
|
+
# Add SNP labels (matplotlib only - interactive backends use hover tooltips)
|
|
304
306
|
if snp_labels and rs_col in df.columns and label_top_n > 0 and not df.empty:
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
307
|
+
if self.backend_name == "matplotlib":
|
|
308
|
+
add_snp_labels(
|
|
309
|
+
ax,
|
|
310
|
+
df,
|
|
311
|
+
pos_col=pos_col,
|
|
312
|
+
neglog10p_col="neglog10p",
|
|
313
|
+
rs_col=rs_col,
|
|
314
|
+
label_top_n=label_top_n,
|
|
315
|
+
genes_df=genes_df,
|
|
316
|
+
chrom=chrom,
|
|
317
|
+
)
|
|
315
318
|
|
|
316
|
-
# Add recombination overlay
|
|
319
|
+
# Add recombination overlay (all backends)
|
|
317
320
|
if recomb_df is not None and not recomb_df.empty:
|
|
318
|
-
|
|
321
|
+
if self.backend_name == "matplotlib":
|
|
322
|
+
add_recombination_overlay(ax, recomb_df, start, end)
|
|
323
|
+
else:
|
|
324
|
+
self._add_recombination_overlay_generic(ax, recomb_df, start, end)
|
|
319
325
|
|
|
320
326
|
# Format axes
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
ax
|
|
324
|
-
ax.spines["right"].set_visible(False)
|
|
327
|
+
self._backend.set_ylabel(ax, r"$-\log_{10}$ P")
|
|
328
|
+
self._backend.set_xlim(ax, start, end)
|
|
329
|
+
self._backend.hide_spines(ax, ["top", "right"])
|
|
325
330
|
|
|
326
|
-
# Add LD legend
|
|
331
|
+
# Add LD legend (all backends)
|
|
327
332
|
if ld_col is not None and ld_col in df.columns:
|
|
328
|
-
self.
|
|
333
|
+
if self.backend_name == "matplotlib":
|
|
334
|
+
self._add_ld_legend(ax)
|
|
335
|
+
else:
|
|
336
|
+
self._backend.add_ld_legend(ax, LD_BINS, LEAD_SNP_COLOR)
|
|
329
337
|
|
|
330
|
-
# Plot gene track
|
|
338
|
+
# Plot gene track (all backends)
|
|
331
339
|
if genes_df is not None and gene_ax is not None:
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
340
|
+
if self.backend_name == "matplotlib":
|
|
341
|
+
plot_gene_track(gene_ax, genes_df, chrom, start, end, exons_df)
|
|
342
|
+
else:
|
|
343
|
+
plot_gene_track_generic(
|
|
344
|
+
gene_ax, self._backend, genes_df, chrom, start, end, exons_df
|
|
345
|
+
)
|
|
346
|
+
self._backend.set_xlabel(gene_ax, f"Chromosome {chrom} (Mb)")
|
|
347
|
+
self._backend.hide_spines(gene_ax, ["top", "right", "left"])
|
|
337
348
|
else:
|
|
338
|
-
|
|
349
|
+
self._backend.set_xlabel(ax, f"Chromosome {chrom} (Mb)")
|
|
339
350
|
|
|
340
351
|
# Format x-axis with Mb labels
|
|
341
|
-
|
|
342
|
-
ax.xaxis.set_major_locator(MaxNLocator(nbins=6))
|
|
352
|
+
self._backend.format_xaxis_mb(ax)
|
|
343
353
|
|
|
344
354
|
# Adjust layout
|
|
345
|
-
|
|
346
|
-
plt.ion()
|
|
355
|
+
self._backend.finalize_layout(fig, hspace=0.1)
|
|
347
356
|
|
|
348
357
|
return fig
|
|
349
358
|
|
|
@@ -381,18 +390,20 @@ class LocusZoomPlotter:
|
|
|
381
390
|
assoc_height = figsize[1] * 0.6
|
|
382
391
|
total_height = assoc_height + gene_track_height
|
|
383
392
|
|
|
384
|
-
fig, axes =
|
|
385
|
-
2,
|
|
386
|
-
1,
|
|
387
|
-
figsize=(figsize[0], total_height),
|
|
393
|
+
fig, axes = self._backend.create_figure(
|
|
394
|
+
n_panels=2,
|
|
388
395
|
height_ratios=[assoc_height, gene_track_height],
|
|
396
|
+
figsize=(figsize[0], total_height),
|
|
389
397
|
sharex=True,
|
|
390
|
-
gridspec_kw={"hspace": 0},
|
|
391
398
|
)
|
|
392
399
|
return fig, axes[0], axes[1]
|
|
393
400
|
else:
|
|
394
|
-
fig,
|
|
395
|
-
|
|
401
|
+
fig, axes = self._backend.create_figure(
|
|
402
|
+
n_panels=1,
|
|
403
|
+
height_ratios=[1.0],
|
|
404
|
+
figsize=(figsize[0], figsize[1] * 0.75),
|
|
405
|
+
)
|
|
406
|
+
return fig, axes[0], None
|
|
396
407
|
|
|
397
408
|
def _plot_association(
|
|
398
409
|
self,
|
|
@@ -401,8 +412,28 @@ class LocusZoomPlotter:
|
|
|
401
412
|
pos_col: str,
|
|
402
413
|
ld_col: Optional[str],
|
|
403
414
|
lead_pos: Optional[int],
|
|
415
|
+
rs_col: Optional[str] = None,
|
|
416
|
+
p_col: Optional[str] = None,
|
|
404
417
|
) -> None:
|
|
405
418
|
"""Plot association scatter with LD coloring."""
|
|
419
|
+
|
|
420
|
+
def _build_hover_data(subset_df: pd.DataFrame) -> Optional[pd.DataFrame]:
|
|
421
|
+
"""Build hover data for interactive backends."""
|
|
422
|
+
hover_cols = {}
|
|
423
|
+
# RS ID first (will be bold in hover)
|
|
424
|
+
if rs_col and rs_col in subset_df.columns:
|
|
425
|
+
hover_cols["SNP"] = subset_df[rs_col].values
|
|
426
|
+
# Position
|
|
427
|
+
if pos_col in subset_df.columns:
|
|
428
|
+
hover_cols["Position"] = subset_df[pos_col].values
|
|
429
|
+
# P-value
|
|
430
|
+
if p_col and p_col in subset_df.columns:
|
|
431
|
+
hover_cols["P-value"] = subset_df[p_col].values
|
|
432
|
+
# LD
|
|
433
|
+
if ld_col and ld_col in subset_df.columns:
|
|
434
|
+
hover_cols["R²"] = subset_df[ld_col].values
|
|
435
|
+
return pd.DataFrame(hover_cols) if hover_cols else None
|
|
436
|
+
|
|
406
437
|
# LD-based coloring
|
|
407
438
|
if ld_col is not None and ld_col in df.columns:
|
|
408
439
|
df["ld_bin"] = df[ld_col].apply(get_ld_bin)
|
|
@@ -411,40 +442,46 @@ class LocusZoomPlotter:
|
|
|
411
442
|
palette = get_ld_color_palette()
|
|
412
443
|
for bin_label in df["ld_bin"].unique():
|
|
413
444
|
bin_data = df[df["ld_bin"] == bin_label]
|
|
414
|
-
|
|
445
|
+
self._backend.scatter(
|
|
446
|
+
ax,
|
|
415
447
|
bin_data[pos_col],
|
|
416
448
|
bin_data["neglog10p"],
|
|
417
|
-
|
|
418
|
-
|
|
449
|
+
colors=palette.get(bin_label, "#BEBEBE"),
|
|
450
|
+
sizes=60,
|
|
419
451
|
edgecolor="black",
|
|
420
452
|
linewidth=0.5,
|
|
421
453
|
zorder=2,
|
|
454
|
+
hover_data=_build_hover_data(bin_data),
|
|
422
455
|
)
|
|
423
456
|
else:
|
|
424
457
|
# Default: grey points
|
|
425
|
-
|
|
458
|
+
self._backend.scatter(
|
|
459
|
+
ax,
|
|
426
460
|
df[pos_col],
|
|
427
461
|
df["neglog10p"],
|
|
428
|
-
|
|
429
|
-
|
|
462
|
+
colors="#BEBEBE",
|
|
463
|
+
sizes=60,
|
|
430
464
|
edgecolor="black",
|
|
431
465
|
linewidth=0.5,
|
|
432
466
|
zorder=2,
|
|
467
|
+
hover_data=_build_hover_data(df),
|
|
433
468
|
)
|
|
434
469
|
|
|
435
|
-
# Highlight lead SNP
|
|
470
|
+
# Highlight lead SNP with larger, more prominent marker
|
|
436
471
|
if lead_pos is not None:
|
|
437
472
|
lead_snp = df[df[pos_col] == lead_pos]
|
|
438
473
|
if not lead_snp.empty:
|
|
439
|
-
|
|
474
|
+
self._backend.scatter(
|
|
475
|
+
ax,
|
|
440
476
|
lead_snp[pos_col],
|
|
441
477
|
lead_snp["neglog10p"],
|
|
442
|
-
|
|
443
|
-
|
|
478
|
+
colors=LEAD_SNP_COLOR,
|
|
479
|
+
sizes=120, # Larger than regular points for visibility
|
|
444
480
|
marker="D",
|
|
445
|
-
|
|
446
|
-
|
|
481
|
+
edgecolor="black",
|
|
482
|
+
linewidth=1.5,
|
|
447
483
|
zorder=10,
|
|
484
|
+
hover_data=_build_hover_data(lead_snp),
|
|
448
485
|
)
|
|
449
486
|
|
|
450
487
|
def _add_ld_legend(self, ax: Axes) -> None:
|
|
@@ -485,51 +522,68 @@ class LocusZoomPlotter:
|
|
|
485
522
|
labelspacing=0.4,
|
|
486
523
|
)
|
|
487
524
|
|
|
488
|
-
def
|
|
489
|
-
|
|
490
|
-
|
|
525
|
+
def _add_recombination_overlay_generic(
|
|
526
|
+
self,
|
|
527
|
+
ax: Any,
|
|
528
|
+
recomb_df: pd.DataFrame,
|
|
529
|
+
start: int,
|
|
530
|
+
end: int,
|
|
531
|
+
) -> None:
|
|
532
|
+
"""Add recombination overlay for interactive backends (plotly/bokeh).
|
|
491
533
|
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
marker="^",
|
|
499
|
-
color="w",
|
|
500
|
-
markerfacecolor=color,
|
|
501
|
-
markeredgecolor="black",
|
|
502
|
-
markersize=7,
|
|
503
|
-
label=label,
|
|
504
|
-
)
|
|
505
|
-
)
|
|
534
|
+
Creates a secondary y-axis with recombination rate line and fill.
|
|
535
|
+
"""
|
|
536
|
+
# Filter to region
|
|
537
|
+
region_recomb = recomb_df[
|
|
538
|
+
(recomb_df["pos"] >= start) & (recomb_df["pos"] <= end)
|
|
539
|
+
].copy()
|
|
506
540
|
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
legend_elements.append(
|
|
510
|
-
Line2D(
|
|
511
|
-
[0],
|
|
512
|
-
[0],
|
|
513
|
-
marker="v",
|
|
514
|
-
color="w",
|
|
515
|
-
markerfacecolor=color,
|
|
516
|
-
markeredgecolor="black",
|
|
517
|
-
markersize=7,
|
|
518
|
-
label=label,
|
|
519
|
-
)
|
|
520
|
-
)
|
|
541
|
+
if region_recomb.empty:
|
|
542
|
+
return
|
|
521
543
|
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
544
|
+
# Create secondary y-axis
|
|
545
|
+
yaxis_name = self._backend.create_twin_axis(ax)
|
|
546
|
+
|
|
547
|
+
# For plotly, yaxis_name is a tuple (fig, row, secondary_y)
|
|
548
|
+
# For bokeh, yaxis_name is just a string
|
|
549
|
+
if isinstance(yaxis_name, tuple):
|
|
550
|
+
_, _, secondary_y = yaxis_name
|
|
551
|
+
else:
|
|
552
|
+
secondary_y = yaxis_name
|
|
553
|
+
|
|
554
|
+
# Plot fill under curve
|
|
555
|
+
self._backend.fill_between_secondary(
|
|
556
|
+
ax,
|
|
557
|
+
region_recomb["pos"],
|
|
558
|
+
0,
|
|
559
|
+
region_recomb["rate"],
|
|
560
|
+
color=RECOMB_COLOR,
|
|
561
|
+
alpha=0.15,
|
|
562
|
+
yaxis_name=secondary_y,
|
|
563
|
+
)
|
|
564
|
+
|
|
565
|
+
# Plot recombination rate line
|
|
566
|
+
self._backend.line_secondary(
|
|
567
|
+
ax,
|
|
568
|
+
region_recomb["pos"],
|
|
569
|
+
region_recomb["rate"],
|
|
570
|
+
color=RECOMB_COLOR,
|
|
571
|
+
linewidth=1.5,
|
|
572
|
+
alpha=0.7,
|
|
573
|
+
yaxis_name=secondary_y,
|
|
574
|
+
)
|
|
575
|
+
|
|
576
|
+
# Set y-axis limits and label
|
|
577
|
+
max_rate = region_recomb["rate"].max()
|
|
578
|
+
self._backend.set_secondary_ylim(
|
|
579
|
+
ax, 0, max(max_rate * 1.2, 20), yaxis_name=secondary_y
|
|
580
|
+
)
|
|
581
|
+
self._backend.set_secondary_ylabel(
|
|
582
|
+
ax,
|
|
583
|
+
"Recombination rate (cM/Mb)",
|
|
584
|
+
color=RECOMB_COLOR,
|
|
585
|
+
fontsize=9,
|
|
586
|
+
yaxis_name=secondary_y,
|
|
533
587
|
)
|
|
534
588
|
|
|
535
589
|
def _plot_finemapping(
|
|
@@ -553,11 +607,29 @@ class LocusZoomPlotter:
|
|
|
553
607
|
show_credible_sets: Whether to color points by credible set.
|
|
554
608
|
pip_threshold: Minimum PIP to display as scatter point.
|
|
555
609
|
"""
|
|
610
|
+
|
|
611
|
+
def _build_finemapping_hover_data(
|
|
612
|
+
subset_df: pd.DataFrame,
|
|
613
|
+
) -> Optional[pd.DataFrame]:
|
|
614
|
+
"""Build hover data for interactive backends."""
|
|
615
|
+
hover_cols = {}
|
|
616
|
+
# Position
|
|
617
|
+
if pos_col in subset_df.columns:
|
|
618
|
+
hover_cols["Position"] = subset_df[pos_col].values
|
|
619
|
+
# PIP
|
|
620
|
+
if pip_col in subset_df.columns:
|
|
621
|
+
hover_cols["PIP"] = subset_df[pip_col].values
|
|
622
|
+
# Credible set
|
|
623
|
+
if cs_col and cs_col in subset_df.columns:
|
|
624
|
+
hover_cols["Credible Set"] = subset_df[cs_col].values
|
|
625
|
+
return pd.DataFrame(hover_cols) if hover_cols else None
|
|
626
|
+
|
|
556
627
|
# Sort by position for line plotting
|
|
557
628
|
df = df.sort_values(pos_col)
|
|
558
629
|
|
|
559
630
|
# Plot PIP as line
|
|
560
|
-
|
|
631
|
+
self._backend.line(
|
|
632
|
+
ax,
|
|
561
633
|
df[pos_col],
|
|
562
634
|
df[pip_col],
|
|
563
635
|
color=PIP_LINE_COLOR,
|
|
@@ -575,92 +647,53 @@ class LocusZoomPlotter:
|
|
|
575
647
|
for cs_id in credible_sets:
|
|
576
648
|
cs_data = df[df[cs_col] == cs_id]
|
|
577
649
|
color = get_credible_set_color(cs_id)
|
|
578
|
-
|
|
650
|
+
self._backend.scatter(
|
|
651
|
+
ax,
|
|
579
652
|
cs_data[pos_col],
|
|
580
653
|
cs_data[pip_col],
|
|
581
|
-
|
|
582
|
-
|
|
654
|
+
colors=color,
|
|
655
|
+
sizes=50,
|
|
583
656
|
marker="o",
|
|
584
657
|
edgecolor="black",
|
|
585
658
|
linewidth=0.5,
|
|
586
659
|
zorder=3,
|
|
587
|
-
|
|
660
|
+
hover_data=_build_finemapping_hover_data(cs_data),
|
|
588
661
|
)
|
|
589
662
|
# Plot variants not in any credible set
|
|
590
663
|
non_cs_data = df[(df[cs_col].isna()) | (df[cs_col] == 0)]
|
|
591
664
|
if not non_cs_data.empty and pip_threshold > 0:
|
|
592
665
|
non_cs_data = non_cs_data[non_cs_data[pip_col] >= pip_threshold]
|
|
593
666
|
if not non_cs_data.empty:
|
|
594
|
-
|
|
667
|
+
self._backend.scatter(
|
|
668
|
+
ax,
|
|
595
669
|
non_cs_data[pos_col],
|
|
596
670
|
non_cs_data[pip_col],
|
|
597
|
-
|
|
598
|
-
|
|
671
|
+
colors="#BEBEBE",
|
|
672
|
+
sizes=30,
|
|
599
673
|
marker="o",
|
|
600
674
|
edgecolor="black",
|
|
601
675
|
linewidth=0.3,
|
|
602
676
|
zorder=2,
|
|
603
|
-
|
|
677
|
+
hover_data=_build_finemapping_hover_data(non_cs_data),
|
|
604
678
|
)
|
|
605
679
|
else:
|
|
606
680
|
# No credible sets - show all points above threshold
|
|
607
681
|
if pip_threshold > 0:
|
|
608
682
|
high_pip = df[df[pip_col] >= pip_threshold]
|
|
609
683
|
if not high_pip.empty:
|
|
610
|
-
|
|
684
|
+
self._backend.scatter(
|
|
685
|
+
ax,
|
|
611
686
|
high_pip[pos_col],
|
|
612
687
|
high_pip[pip_col],
|
|
613
|
-
|
|
614
|
-
|
|
688
|
+
colors=PIP_LINE_COLOR,
|
|
689
|
+
sizes=50,
|
|
615
690
|
marker="o",
|
|
616
691
|
edgecolor="black",
|
|
617
692
|
linewidth=0.5,
|
|
618
693
|
zorder=3,
|
|
694
|
+
hover_data=_build_finemapping_hover_data(high_pip),
|
|
619
695
|
)
|
|
620
696
|
|
|
621
|
-
def _add_finemapping_legend(
|
|
622
|
-
self,
|
|
623
|
-
ax: Axes,
|
|
624
|
-
credible_sets: List[int],
|
|
625
|
-
) -> None:
|
|
626
|
-
"""Add fine-mapping legend showing credible sets.
|
|
627
|
-
|
|
628
|
-
Args:
|
|
629
|
-
ax: Matplotlib axes object.
|
|
630
|
-
credible_sets: List of credible set IDs to include.
|
|
631
|
-
"""
|
|
632
|
-
if not credible_sets:
|
|
633
|
-
return
|
|
634
|
-
|
|
635
|
-
legend_elements = []
|
|
636
|
-
for cs_id in credible_sets:
|
|
637
|
-
color = get_credible_set_color(cs_id)
|
|
638
|
-
legend_elements.append(
|
|
639
|
-
Line2D(
|
|
640
|
-
[0],
|
|
641
|
-
[0],
|
|
642
|
-
marker="o",
|
|
643
|
-
color="w",
|
|
644
|
-
markerfacecolor=color,
|
|
645
|
-
markeredgecolor="black",
|
|
646
|
-
markersize=7,
|
|
647
|
-
label=f"CS{cs_id}",
|
|
648
|
-
)
|
|
649
|
-
)
|
|
650
|
-
|
|
651
|
-
ax.legend(
|
|
652
|
-
handles=legend_elements,
|
|
653
|
-
loc="upper right",
|
|
654
|
-
fontsize=8,
|
|
655
|
-
frameon=True,
|
|
656
|
-
framealpha=0.9,
|
|
657
|
-
title="Credible sets",
|
|
658
|
-
title_fontsize=9,
|
|
659
|
-
handlelength=1.2,
|
|
660
|
-
handleheight=1.0,
|
|
661
|
-
labelspacing=0.3,
|
|
662
|
-
)
|
|
663
|
-
|
|
664
697
|
def plot_stacked(
|
|
665
698
|
self,
|
|
666
699
|
gwas_dfs: List[pd.DataFrame],
|
|
@@ -825,24 +858,17 @@ class LocusZoomPlotter:
|
|
|
825
858
|
f"Creating stacked plot with {n_panels} panels for chr{chrom}:{start}-{end}"
|
|
826
859
|
)
|
|
827
860
|
|
|
828
|
-
# Prevent auto-display in interactive environments
|
|
829
|
-
plt.ioff()
|
|
830
|
-
|
|
831
861
|
# Load recombination data if needed
|
|
832
862
|
if show_recombination and recomb_df is None:
|
|
833
863
|
recomb_df = self._get_recomb_for_region(chrom, start, end)
|
|
834
864
|
|
|
835
|
-
# Create figure
|
|
836
|
-
fig, axes =
|
|
837
|
-
n_panels,
|
|
838
|
-
1,
|
|
839
|
-
figsize=actual_figsize,
|
|
865
|
+
# Create figure using backend
|
|
866
|
+
fig, axes = self._backend.create_figure(
|
|
867
|
+
n_panels=n_panels,
|
|
840
868
|
height_ratios=height_ratios,
|
|
869
|
+
figsize=actual_figsize,
|
|
841
870
|
sharex=True,
|
|
842
|
-
gridspec_kw={"hspace": 0.05},
|
|
843
871
|
)
|
|
844
|
-
if n_panels == 1:
|
|
845
|
-
axes = [axes]
|
|
846
872
|
|
|
847
873
|
# Plot each GWAS panel
|
|
848
874
|
for i, (gwas_df, lead_pos) in enumerate(zip(gwas_dfs, lead_positions)):
|
|
@@ -868,56 +894,93 @@ class LocusZoomPlotter:
|
|
|
868
894
|
panel_ld_col = "R2"
|
|
869
895
|
|
|
870
896
|
# Plot association
|
|
871
|
-
self._plot_association(
|
|
897
|
+
self._plot_association(
|
|
898
|
+
ax, df, pos_col, panel_ld_col, lead_pos, rs_col, p_col
|
|
899
|
+
)
|
|
872
900
|
|
|
873
901
|
# Add significance line
|
|
874
|
-
|
|
902
|
+
self._backend.axhline(
|
|
903
|
+
ax,
|
|
875
904
|
y=self._genomewide_line,
|
|
876
905
|
color="red",
|
|
877
906
|
linestyle="--",
|
|
878
907
|
linewidth=1,
|
|
879
|
-
alpha=0.
|
|
908
|
+
alpha=0.65,
|
|
880
909
|
zorder=1,
|
|
881
910
|
)
|
|
882
911
|
|
|
883
|
-
# Add SNP labels
|
|
912
|
+
# Add SNP labels (matplotlib only - interactive backends use hover tooltips)
|
|
884
913
|
if snp_labels and rs_col in df.columns and label_top_n > 0 and not df.empty:
|
|
885
|
-
|
|
886
|
-
|
|
887
|
-
|
|
888
|
-
|
|
889
|
-
|
|
890
|
-
|
|
891
|
-
|
|
892
|
-
|
|
893
|
-
|
|
894
|
-
|
|
914
|
+
if self.backend_name == "matplotlib":
|
|
915
|
+
add_snp_labels(
|
|
916
|
+
ax,
|
|
917
|
+
df,
|
|
918
|
+
pos_col=pos_col,
|
|
919
|
+
neglog10p_col="neglog10p",
|
|
920
|
+
rs_col=rs_col,
|
|
921
|
+
label_top_n=label_top_n,
|
|
922
|
+
genes_df=genes_df,
|
|
923
|
+
chrom=chrom,
|
|
924
|
+
)
|
|
895
925
|
|
|
896
|
-
# Add recombination overlay (only on first panel)
|
|
926
|
+
# Add recombination overlay (only on first panel, all backends)
|
|
897
927
|
if i == 0 and recomb_df is not None and not recomb_df.empty:
|
|
898
|
-
|
|
928
|
+
if self.backend_name == "matplotlib":
|
|
929
|
+
add_recombination_overlay(ax, recomb_df, start, end)
|
|
930
|
+
else:
|
|
931
|
+
self._add_recombination_overlay_generic(ax, recomb_df, start, end)
|
|
899
932
|
|
|
900
933
|
# Format axes
|
|
901
|
-
|
|
902
|
-
|
|
903
|
-
ax
|
|
904
|
-
ax.spines["right"].set_visible(False)
|
|
934
|
+
self._backend.set_ylabel(ax, r"$-\log_{10}$ P")
|
|
935
|
+
self._backend.set_xlim(ax, start, end)
|
|
936
|
+
self._backend.hide_spines(ax, ["top", "right"])
|
|
905
937
|
|
|
906
938
|
# Add panel label
|
|
907
939
|
if panel_labels and i < len(panel_labels):
|
|
908
|
-
|
|
909
|
-
|
|
910
|
-
|
|
911
|
-
|
|
912
|
-
|
|
913
|
-
|
|
914
|
-
|
|
915
|
-
|
|
916
|
-
|
|
940
|
+
if self.backend_name == "matplotlib":
|
|
941
|
+
ax.annotate(
|
|
942
|
+
panel_labels[i],
|
|
943
|
+
xy=(0.02, 0.95),
|
|
944
|
+
xycoords="axes fraction",
|
|
945
|
+
fontsize=11,
|
|
946
|
+
fontweight="bold",
|
|
947
|
+
va="top",
|
|
948
|
+
ha="left",
|
|
949
|
+
)
|
|
950
|
+
elif self.backend_name == "plotly":
|
|
951
|
+
fig, row = ax
|
|
952
|
+
fig.add_annotation(
|
|
953
|
+
text=f"<b>{panel_labels[i]}</b>",
|
|
954
|
+
xref=f"x{row} domain" if row > 1 else "x domain",
|
|
955
|
+
yref=f"y{row} domain" if row > 1 else "y domain",
|
|
956
|
+
x=0.02,
|
|
957
|
+
y=0.95,
|
|
958
|
+
showarrow=False,
|
|
959
|
+
font=dict(size=11),
|
|
960
|
+
xanchor="left",
|
|
961
|
+
yanchor="top",
|
|
962
|
+
)
|
|
963
|
+
elif self.backend_name == "bokeh":
|
|
964
|
+
from bokeh.models import Label
|
|
965
|
+
|
|
966
|
+
# Get y-axis range for positioning
|
|
967
|
+
y_max = ax.y_range.end if ax.y_range.end else 10
|
|
968
|
+
x_min = ax.x_range.start if ax.x_range.start else start
|
|
969
|
+
label = Label(
|
|
970
|
+
x=x_min + (end - start) * 0.02,
|
|
971
|
+
y=y_max * 0.95,
|
|
972
|
+
text=panel_labels[i],
|
|
973
|
+
text_font_size="11pt",
|
|
974
|
+
text_font_style="bold",
|
|
975
|
+
)
|
|
976
|
+
ax.add_layout(label)
|
|
917
977
|
|
|
918
|
-
# Add LD legend (only on first panel)
|
|
978
|
+
# Add LD legend (only on first panel, all backends)
|
|
919
979
|
if i == 0 and panel_ld_col is not None and panel_ld_col in df.columns:
|
|
920
|
-
self.
|
|
980
|
+
if self.backend_name == "matplotlib":
|
|
981
|
+
self._add_ld_legend(ax)
|
|
982
|
+
else:
|
|
983
|
+
self._backend.add_ld_legend(ax, LD_BINS, LEAD_SNP_COLOR)
|
|
921
984
|
|
|
922
985
|
# Track current panel index
|
|
923
986
|
panel_idx = n_gwas
|
|
@@ -945,15 +1008,16 @@ class LocusZoomPlotter:
|
|
|
945
1008
|
pip_threshold=0.01,
|
|
946
1009
|
)
|
|
947
1010
|
|
|
948
|
-
# Add legend for credible sets
|
|
1011
|
+
# Add legend for credible sets (all backends)
|
|
949
1012
|
credible_sets = get_credible_sets(fm_data, finemapping_cs_col)
|
|
950
1013
|
if credible_sets:
|
|
951
|
-
self.
|
|
1014
|
+
self._backend.add_finemapping_legend(
|
|
1015
|
+
ax, credible_sets, get_credible_set_color
|
|
1016
|
+
)
|
|
952
1017
|
|
|
953
|
-
|
|
954
|
-
|
|
955
|
-
ax
|
|
956
|
-
ax.spines["right"].set_visible(False)
|
|
1018
|
+
self._backend.set_ylabel(ax, "PIP")
|
|
1019
|
+
self._backend.set_ylim(ax, -0.05, 1.05)
|
|
1020
|
+
self._backend.hide_spines(ax, ["top", "right"])
|
|
957
1021
|
panel_idx += 1
|
|
958
1022
|
|
|
959
1023
|
# Plot eQTL panel if provided
|
|
@@ -977,72 +1041,117 @@ class LocusZoomPlotter:
|
|
|
977
1041
|
eqtl_data["p_value"].clip(lower=1e-300)
|
|
978
1042
|
)
|
|
979
1043
|
|
|
1044
|
+
def _build_eqtl_hover_data(
|
|
1045
|
+
subset_df: pd.DataFrame,
|
|
1046
|
+
) -> Optional[pd.DataFrame]:
|
|
1047
|
+
"""Build hover data for eQTL interactive backends."""
|
|
1048
|
+
hover_cols = {}
|
|
1049
|
+
# Position
|
|
1050
|
+
if "pos" in subset_df.columns:
|
|
1051
|
+
hover_cols["Position"] = subset_df["pos"].values
|
|
1052
|
+
# P-value
|
|
1053
|
+
if "p_value" in subset_df.columns:
|
|
1054
|
+
hover_cols["P-value"] = subset_df["p_value"].values
|
|
1055
|
+
# Effect size
|
|
1056
|
+
if "effect_size" in subset_df.columns:
|
|
1057
|
+
hover_cols["Effect"] = subset_df["effect_size"].values
|
|
1058
|
+
# Gene
|
|
1059
|
+
if "gene" in subset_df.columns:
|
|
1060
|
+
hover_cols["Gene"] = subset_df["gene"].values
|
|
1061
|
+
return pd.DataFrame(hover_cols) if hover_cols else None
|
|
1062
|
+
|
|
980
1063
|
# Check if effect_size column exists for directional coloring
|
|
981
1064
|
has_effect = "effect_size" in eqtl_data.columns
|
|
982
1065
|
|
|
983
1066
|
if has_effect:
|
|
984
|
-
# Plot triangles by effect direction
|
|
985
|
-
|
|
986
|
-
|
|
987
|
-
|
|
988
|
-
|
|
989
|
-
|
|
990
|
-
|
|
991
|
-
|
|
992
|
-
|
|
993
|
-
|
|
994
|
-
|
|
1067
|
+
# Plot triangles by effect direction (batch by sign for efficiency)
|
|
1068
|
+
pos_effects = eqtl_data[eqtl_data["effect_size"] >= 0]
|
|
1069
|
+
neg_effects = eqtl_data[eqtl_data["effect_size"] < 0]
|
|
1070
|
+
|
|
1071
|
+
# Plot positive effects (up triangles)
|
|
1072
|
+
for _, row in pos_effects.iterrows():
|
|
1073
|
+
row_df = pd.DataFrame([row])
|
|
1074
|
+
self._backend.scatter(
|
|
1075
|
+
ax,
|
|
1076
|
+
pd.Series([row["pos"]]),
|
|
1077
|
+
pd.Series([row["neglog10p"]]),
|
|
1078
|
+
colors=get_eqtl_color(row["effect_size"]),
|
|
1079
|
+
sizes=50,
|
|
1080
|
+
marker="^",
|
|
995
1081
|
edgecolor="black",
|
|
996
1082
|
linewidth=0.5,
|
|
997
1083
|
zorder=2,
|
|
1084
|
+
hover_data=_build_eqtl_hover_data(row_df),
|
|
998
1085
|
)
|
|
999
|
-
#
|
|
1000
|
-
|
|
1086
|
+
# Plot negative effects (down triangles)
|
|
1087
|
+
for _, row in neg_effects.iterrows():
|
|
1088
|
+
row_df = pd.DataFrame([row])
|
|
1089
|
+
self._backend.scatter(
|
|
1090
|
+
ax,
|
|
1091
|
+
pd.Series([row["pos"]]),
|
|
1092
|
+
pd.Series([row["neglog10p"]]),
|
|
1093
|
+
colors=get_eqtl_color(row["effect_size"]),
|
|
1094
|
+
sizes=50,
|
|
1095
|
+
marker="v",
|
|
1096
|
+
edgecolor="black",
|
|
1097
|
+
linewidth=0.5,
|
|
1098
|
+
zorder=2,
|
|
1099
|
+
hover_data=_build_eqtl_hover_data(row_df),
|
|
1100
|
+
)
|
|
1101
|
+
# Add eQTL effect legend (all backends)
|
|
1102
|
+
self._backend.add_eqtl_legend(
|
|
1103
|
+
ax, EQTL_POSITIVE_BINS, EQTL_NEGATIVE_BINS
|
|
1104
|
+
)
|
|
1001
1105
|
else:
|
|
1002
1106
|
# No effect sizes - plot as diamonds
|
|
1003
|
-
|
|
1107
|
+
label = f"eQTL ({eqtl_gene})" if eqtl_gene else "eQTL"
|
|
1108
|
+
self._backend.scatter(
|
|
1109
|
+
ax,
|
|
1004
1110
|
eqtl_data["pos"],
|
|
1005
1111
|
eqtl_data["neglog10p"],
|
|
1006
|
-
|
|
1007
|
-
|
|
1112
|
+
colors="#FF6B6B",
|
|
1113
|
+
sizes=60,
|
|
1008
1114
|
marker="D",
|
|
1009
1115
|
edgecolor="black",
|
|
1010
1116
|
linewidth=0.5,
|
|
1011
1117
|
zorder=2,
|
|
1012
|
-
label=
|
|
1118
|
+
label=label,
|
|
1119
|
+
hover_data=_build_eqtl_hover_data(eqtl_data),
|
|
1013
1120
|
)
|
|
1014
|
-
|
|
1121
|
+
self._backend.add_simple_legend(ax, label, loc="upper right")
|
|
1015
1122
|
|
|
1016
|
-
|
|
1017
|
-
|
|
1123
|
+
self._backend.set_ylabel(ax, r"$-\log_{10}$ P (eQTL)")
|
|
1124
|
+
self._backend.axhline(
|
|
1125
|
+
ax,
|
|
1018
1126
|
y=self._genomewide_line,
|
|
1019
1127
|
color="red",
|
|
1020
1128
|
linestyle="--",
|
|
1021
1129
|
linewidth=1,
|
|
1022
|
-
alpha=0.
|
|
1130
|
+
alpha=0.65,
|
|
1023
1131
|
)
|
|
1024
|
-
ax
|
|
1025
|
-
ax.spines["right"].set_visible(False)
|
|
1132
|
+
self._backend.hide_spines(ax, ["top", "right"])
|
|
1026
1133
|
panel_idx += 1
|
|
1027
1134
|
|
|
1028
|
-
# Plot gene track
|
|
1135
|
+
# Plot gene track (all backends)
|
|
1029
1136
|
if genes_df is not None:
|
|
1030
1137
|
gene_ax = axes[panel_idx]
|
|
1031
|
-
|
|
1032
|
-
|
|
1033
|
-
|
|
1034
|
-
|
|
1035
|
-
|
|
1138
|
+
if self.backend_name == "matplotlib":
|
|
1139
|
+
plot_gene_track(gene_ax, genes_df, chrom, start, end, exons_df)
|
|
1140
|
+
else:
|
|
1141
|
+
plot_gene_track_generic(
|
|
1142
|
+
gene_ax, self._backend, genes_df, chrom, start, end, exons_df
|
|
1143
|
+
)
|
|
1144
|
+
self._backend.set_xlabel(gene_ax, f"Chromosome {chrom} (Mb)")
|
|
1145
|
+
self._backend.hide_spines(gene_ax, ["top", "right", "left"])
|
|
1036
1146
|
else:
|
|
1037
1147
|
# Set x-label on bottom panel
|
|
1038
|
-
axes[-1]
|
|
1148
|
+
self._backend.set_xlabel(axes[-1], f"Chromosome {chrom} (Mb)")
|
|
1039
1149
|
|
|
1040
|
-
# Format x-axis
|
|
1041
|
-
|
|
1042
|
-
|
|
1150
|
+
# Format x-axis (call for all axes - Plotly needs each subplot formatted)
|
|
1151
|
+
for ax in axes:
|
|
1152
|
+
self._backend.format_xaxis_mb(ax)
|
|
1043
1153
|
|
|
1044
1154
|
# Adjust layout
|
|
1045
|
-
|
|
1046
|
-
plt.ion()
|
|
1155
|
+
self._backend.finalize_layout(fig, hspace=0.1)
|
|
1047
1156
|
|
|
1048
1157
|
return fig
|