pylocuszoom 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pylocuszoom/__init__.py +39 -20
- pylocuszoom/backends/__init__.py +1 -5
- pylocuszoom/backends/base.py +1 -1
- pylocuszoom/backends/bokeh_backend.py +4 -7
- pylocuszoom/backends/matplotlib_backend.py +6 -1
- pylocuszoom/backends/plotly_backend.py +11 -12
- pylocuszoom/colors.py +132 -0
- pylocuszoom/eqtl.py +3 -2
- pylocuszoom/finemapping.py +224 -0
- pylocuszoom/gene_track.py +44 -31
- pylocuszoom/labels.py +32 -33
- pylocuszoom/ld.py +8 -7
- pylocuszoom/plotter.py +381 -66
- pylocuszoom/recombination.py +14 -14
- pylocuszoom/utils.py +3 -1
- {pylocuszoom-0.1.0.dist-info → pylocuszoom-0.2.0.dist-info}/METADATA +20 -25
- pylocuszoom-0.2.0.dist-info/RECORD +21 -0
- pylocuszoom-0.1.0.dist-info/RECORD +0 -20
- {pylocuszoom-0.1.0.dist-info → pylocuszoom-0.2.0.dist-info}/WHEEL +0 -0
- {pylocuszoom-0.1.0.dist-info → pylocuszoom-0.2.0.dist-info}/licenses/LICENSE.md +0 -0
pylocuszoom/plotter.py
CHANGED
|
@@ -10,7 +10,7 @@ Supports multiple backends:
|
|
|
10
10
|
"""
|
|
11
11
|
|
|
12
12
|
from pathlib import Path
|
|
13
|
-
from typing import Any, List, Optional, Tuple
|
|
13
|
+
from typing import Any, List, Optional, Tuple
|
|
14
14
|
|
|
15
15
|
import matplotlib.pyplot as plt
|
|
16
16
|
import numpy as np
|
|
@@ -21,27 +21,36 @@ from matplotlib.lines import Line2D
|
|
|
21
21
|
from matplotlib.patches import Patch
|
|
22
22
|
from matplotlib.ticker import FuncFormatter, MaxNLocator
|
|
23
23
|
|
|
24
|
-
from .backends import BackendType,
|
|
25
|
-
|
|
24
|
+
from .backends import BackendType, get_backend
|
|
26
25
|
from .colors import (
|
|
26
|
+
EQTL_NEGATIVE_BINS,
|
|
27
|
+
EQTL_POSITIVE_BINS,
|
|
27
28
|
LD_BINS,
|
|
28
29
|
LEAD_SNP_COLOR,
|
|
30
|
+
PIP_LINE_COLOR,
|
|
31
|
+
get_credible_set_color,
|
|
32
|
+
get_eqtl_color,
|
|
29
33
|
get_ld_bin,
|
|
30
34
|
get_ld_color_palette,
|
|
31
35
|
)
|
|
36
|
+
from .eqtl import validate_eqtl_df
|
|
37
|
+
from .finemapping import (
|
|
38
|
+
get_credible_sets,
|
|
39
|
+
prepare_finemapping_for_plotting,
|
|
40
|
+
)
|
|
32
41
|
from .gene_track import assign_gene_positions, plot_gene_track
|
|
33
42
|
from .labels import add_snp_labels
|
|
34
43
|
from .ld import calculate_ld, find_plink
|
|
35
44
|
from .logging import enable_logging, logger
|
|
36
45
|
from .recombination import (
|
|
37
46
|
add_recombination_overlay,
|
|
38
|
-
|
|
47
|
+
download_canine_recombination_maps,
|
|
39
48
|
get_default_data_dir,
|
|
40
49
|
get_recombination_rate_for_region,
|
|
41
50
|
)
|
|
42
51
|
from .utils import normalize_chrom, validate_genes_df, validate_gwas_df
|
|
43
52
|
|
|
44
|
-
# Default significance threshold: 5e-8 for human, 5e-7 for
|
|
53
|
+
# Default significance threshold: 5e-8 for human, 5e-7 for canine
|
|
45
54
|
DEFAULT_GENOMEWIDE_THRESHOLD = 5e-7
|
|
46
55
|
DEFAULT_GENOMEWIDE_LINE = -np.log10(DEFAULT_GENOMEWIDE_THRESHOLD)
|
|
47
56
|
|
|
@@ -52,7 +61,7 @@ class LocusZoomPlotter:
|
|
|
52
61
|
Creates LocusZoom-style regional plots with:
|
|
53
62
|
- LD coloring based on R² with lead variant
|
|
54
63
|
- Gene and exon tracks
|
|
55
|
-
- Recombination rate overlays (
|
|
64
|
+
- Recombination rate overlays (canine built-in, or user-provided)
|
|
56
65
|
- Automatic SNP labeling
|
|
57
66
|
|
|
58
67
|
Supports multiple rendering backends:
|
|
@@ -61,9 +70,9 @@ class LocusZoomPlotter:
|
|
|
61
70
|
- bokeh: Interactive HTML for dashboards
|
|
62
71
|
|
|
63
72
|
Args:
|
|
64
|
-
species: Species name ('
|
|
65
|
-
|
|
66
|
-
genome_build: Genome build for coordinate system. For
|
|
73
|
+
species: Species name ('canine', 'feline', or None for custom).
|
|
74
|
+
Canine has built-in recombination maps.
|
|
75
|
+
genome_build: Genome build for coordinate system. For canine:
|
|
67
76
|
"canfam3.1" (default) or "canfam4". If "canfam4", recombination
|
|
68
77
|
maps are automatically lifted over from CanFam3.1.
|
|
69
78
|
backend: Plotting backend ('matplotlib', 'plotly', or 'bokeh').
|
|
@@ -78,10 +87,10 @@ class LocusZoomPlotter:
|
|
|
78
87
|
|
|
79
88
|
Example:
|
|
80
89
|
>>> # Static plot (default)
|
|
81
|
-
>>> plotter = LocusZoomPlotter(species="
|
|
90
|
+
>>> plotter = LocusZoomPlotter(species="canine")
|
|
82
91
|
>>>
|
|
83
92
|
>>> # Interactive plot with plotly
|
|
84
|
-
>>> plotter = LocusZoomPlotter(species="
|
|
93
|
+
>>> plotter = LocusZoomPlotter(species="canine", backend="plotly")
|
|
85
94
|
>>>
|
|
86
95
|
>>> fig = plotter.plot(
|
|
87
96
|
... gwas_df,
|
|
@@ -96,7 +105,7 @@ class LocusZoomPlotter:
|
|
|
96
105
|
|
|
97
106
|
def __init__(
|
|
98
107
|
self,
|
|
99
|
-
species: str = "
|
|
108
|
+
species: str = "canine",
|
|
100
109
|
genome_build: Optional[str] = None,
|
|
101
110
|
backend: BackendType = "matplotlib",
|
|
102
111
|
plink_path: Optional[str] = None,
|
|
@@ -126,9 +135,9 @@ class LocusZoomPlotter:
|
|
|
126
135
|
@staticmethod
|
|
127
136
|
def _default_build(species: str) -> Optional[str]:
|
|
128
137
|
"""Get default genome build for species."""
|
|
129
|
-
if species == "
|
|
138
|
+
if species == "canine":
|
|
130
139
|
return "canfam3.1"
|
|
131
|
-
if species == "
|
|
140
|
+
if species == "feline":
|
|
132
141
|
return "felCat9"
|
|
133
142
|
return None
|
|
134
143
|
|
|
@@ -137,7 +146,7 @@ class LocusZoomPlotter:
|
|
|
137
146
|
|
|
138
147
|
Returns path to recombination map directory, or None if not available.
|
|
139
148
|
"""
|
|
140
|
-
if self.species == "
|
|
149
|
+
if self.species == "canine":
|
|
141
150
|
if self.recomb_data_dir:
|
|
142
151
|
return Path(self.recomb_data_dir)
|
|
143
152
|
# Check if already downloaded
|
|
@@ -149,7 +158,7 @@ class LocusZoomPlotter:
|
|
|
149
158
|
return default_dir
|
|
150
159
|
# Download
|
|
151
160
|
try:
|
|
152
|
-
return
|
|
161
|
+
return download_canine_recombination_maps()
|
|
153
162
|
except Exception as e:
|
|
154
163
|
logger.warning(f"Could not download recombination maps: {e}")
|
|
155
164
|
return None
|
|
@@ -249,20 +258,27 @@ class LocusZoomPlotter:
|
|
|
249
258
|
|
|
250
259
|
# Calculate LD if reference file provided
|
|
251
260
|
if ld_reference_file and lead_pos and ld_col is None:
|
|
252
|
-
|
|
253
|
-
if not
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
bfile_path=ld_reference_file,
|
|
258
|
-
lead_snp=lead_snp_id,
|
|
259
|
-
window_kb=max((end - start) // 1000, 500),
|
|
260
|
-
plink_path=self.plink_path,
|
|
261
|
-
species=self.species,
|
|
261
|
+
# Check if rs_col exists before attempting LD calculation
|
|
262
|
+
if rs_col not in df.columns:
|
|
263
|
+
logger.warning(
|
|
264
|
+
f"Cannot calculate LD: column '{rs_col}' not found in GWAS data. "
|
|
265
|
+
f"Provide rs_col parameter or add SNP IDs to DataFrame."
|
|
262
266
|
)
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
267
|
+
else:
|
|
268
|
+
lead_snp_row = df[df[pos_col] == lead_pos]
|
|
269
|
+
if not lead_snp_row.empty:
|
|
270
|
+
lead_snp_id = lead_snp_row[rs_col].iloc[0]
|
|
271
|
+
logger.debug(f"Calculating LD for lead SNP {lead_snp_id}")
|
|
272
|
+
ld_df = calculate_ld(
|
|
273
|
+
bfile_path=ld_reference_file,
|
|
274
|
+
lead_snp=lead_snp_id,
|
|
275
|
+
window_kb=max((end - start) // 1000, 500),
|
|
276
|
+
plink_path=self.plink_path,
|
|
277
|
+
species=self.species,
|
|
278
|
+
)
|
|
279
|
+
if not ld_df.empty:
|
|
280
|
+
df = df.merge(ld_df, left_on=rs_col, right_on="SNP", how="left")
|
|
281
|
+
ld_col = "R2"
|
|
266
282
|
|
|
267
283
|
# Load recombination data if needed
|
|
268
284
|
if show_recombination and recomb_df is None:
|
|
@@ -277,9 +293,10 @@ class LocusZoomPlotter:
|
|
|
277
293
|
# Add significance line
|
|
278
294
|
ax.axhline(
|
|
279
295
|
y=self._genomewide_line,
|
|
280
|
-
color="
|
|
281
|
-
linestyle=
|
|
296
|
+
color="red",
|
|
297
|
+
linestyle=(0, (5, 10)),
|
|
282
298
|
linewidth=1,
|
|
299
|
+
alpha=0.8,
|
|
283
300
|
zorder=1,
|
|
284
301
|
)
|
|
285
302
|
|
|
@@ -423,10 +440,10 @@ class LocusZoomPlotter:
|
|
|
423
440
|
lead_snp[pos_col],
|
|
424
441
|
lead_snp["neglog10p"],
|
|
425
442
|
c=LEAD_SNP_COLOR,
|
|
426
|
-
s=
|
|
443
|
+
s=60,
|
|
427
444
|
marker="D",
|
|
428
445
|
edgecolors="black",
|
|
429
|
-
linewidths=1,
|
|
446
|
+
linewidths=1.5,
|
|
430
447
|
zorder=10,
|
|
431
448
|
)
|
|
432
449
|
|
|
@@ -441,8 +458,8 @@ class LocusZoomPlotter:
|
|
|
441
458
|
color="w",
|
|
442
459
|
markerfacecolor=LEAD_SNP_COLOR,
|
|
443
460
|
markeredgecolor="black",
|
|
444
|
-
markersize=
|
|
445
|
-
label="
|
|
461
|
+
markersize=6,
|
|
462
|
+
label="Lead SNP",
|
|
446
463
|
),
|
|
447
464
|
]
|
|
448
465
|
|
|
@@ -457,7 +474,7 @@ class LocusZoomPlotter:
|
|
|
457
474
|
|
|
458
475
|
ax.legend(
|
|
459
476
|
handles=legend_elements,
|
|
460
|
-
loc="upper
|
|
477
|
+
loc="upper right",
|
|
461
478
|
fontsize=9,
|
|
462
479
|
frameon=True,
|
|
463
480
|
framealpha=0.9,
|
|
@@ -468,6 +485,182 @@ class LocusZoomPlotter:
|
|
|
468
485
|
labelspacing=0.4,
|
|
469
486
|
)
|
|
470
487
|
|
|
488
|
+
def _add_eqtl_legend(self, ax: Axes) -> None:
|
|
489
|
+
"""Add eQTL effect size legend to plot."""
|
|
490
|
+
legend_elements = []
|
|
491
|
+
|
|
492
|
+
# Positive effects (upward triangles)
|
|
493
|
+
for _, _, label, color in EQTL_POSITIVE_BINS:
|
|
494
|
+
legend_elements.append(
|
|
495
|
+
Line2D(
|
|
496
|
+
[0],
|
|
497
|
+
[0],
|
|
498
|
+
marker="^",
|
|
499
|
+
color="w",
|
|
500
|
+
markerfacecolor=color,
|
|
501
|
+
markeredgecolor="black",
|
|
502
|
+
markersize=7,
|
|
503
|
+
label=label,
|
|
504
|
+
)
|
|
505
|
+
)
|
|
506
|
+
|
|
507
|
+
# Negative effects (downward triangles)
|
|
508
|
+
for _, _, label, color in EQTL_NEGATIVE_BINS:
|
|
509
|
+
legend_elements.append(
|
|
510
|
+
Line2D(
|
|
511
|
+
[0],
|
|
512
|
+
[0],
|
|
513
|
+
marker="v",
|
|
514
|
+
color="w",
|
|
515
|
+
markerfacecolor=color,
|
|
516
|
+
markeredgecolor="black",
|
|
517
|
+
markersize=7,
|
|
518
|
+
label=label,
|
|
519
|
+
)
|
|
520
|
+
)
|
|
521
|
+
|
|
522
|
+
ax.legend(
|
|
523
|
+
handles=legend_elements,
|
|
524
|
+
loc="upper right",
|
|
525
|
+
fontsize=8,
|
|
526
|
+
frameon=True,
|
|
527
|
+
framealpha=0.9,
|
|
528
|
+
title="eQTL effect",
|
|
529
|
+
title_fontsize=9,
|
|
530
|
+
handlelength=1.2,
|
|
531
|
+
handleheight=1.0,
|
|
532
|
+
labelspacing=0.3,
|
|
533
|
+
)
|
|
534
|
+
|
|
535
|
+
def _plot_finemapping(
|
|
536
|
+
self,
|
|
537
|
+
ax: Axes,
|
|
538
|
+
df: pd.DataFrame,
|
|
539
|
+
pos_col: str = "pos",
|
|
540
|
+
pip_col: str = "pip",
|
|
541
|
+
cs_col: Optional[str] = "cs",
|
|
542
|
+
show_credible_sets: bool = True,
|
|
543
|
+
pip_threshold: float = 0.0,
|
|
544
|
+
) -> None:
|
|
545
|
+
"""Plot fine-mapping results (PIP line with credible set coloring).
|
|
546
|
+
|
|
547
|
+
Args:
|
|
548
|
+
ax: Matplotlib axes object.
|
|
549
|
+
df: Fine-mapping DataFrame with pos and pip columns.
|
|
550
|
+
pos_col: Column name for position.
|
|
551
|
+
pip_col: Column name for posterior inclusion probability.
|
|
552
|
+
cs_col: Column name for credible set assignment (optional).
|
|
553
|
+
show_credible_sets: Whether to color points by credible set.
|
|
554
|
+
pip_threshold: Minimum PIP to display as scatter point.
|
|
555
|
+
"""
|
|
556
|
+
# Sort by position for line plotting
|
|
557
|
+
df = df.sort_values(pos_col)
|
|
558
|
+
|
|
559
|
+
# Plot PIP as line
|
|
560
|
+
ax.plot(
|
|
561
|
+
df[pos_col],
|
|
562
|
+
df[pip_col],
|
|
563
|
+
color=PIP_LINE_COLOR,
|
|
564
|
+
linewidth=1.5,
|
|
565
|
+
alpha=0.8,
|
|
566
|
+
zorder=1,
|
|
567
|
+
)
|
|
568
|
+
|
|
569
|
+
# Check if credible sets are available
|
|
570
|
+
has_cs = cs_col is not None and cs_col in df.columns and show_credible_sets
|
|
571
|
+
credible_sets = get_credible_sets(df, cs_col) if has_cs else []
|
|
572
|
+
|
|
573
|
+
if credible_sets:
|
|
574
|
+
# Plot points colored by credible set
|
|
575
|
+
for cs_id in credible_sets:
|
|
576
|
+
cs_data = df[df[cs_col] == cs_id]
|
|
577
|
+
color = get_credible_set_color(cs_id)
|
|
578
|
+
ax.scatter(
|
|
579
|
+
cs_data[pos_col],
|
|
580
|
+
cs_data[pip_col],
|
|
581
|
+
c=color,
|
|
582
|
+
s=50,
|
|
583
|
+
marker="o",
|
|
584
|
+
edgecolor="black",
|
|
585
|
+
linewidth=0.5,
|
|
586
|
+
zorder=3,
|
|
587
|
+
label=f"CS{cs_id}",
|
|
588
|
+
)
|
|
589
|
+
# Plot variants not in any credible set
|
|
590
|
+
non_cs_data = df[(df[cs_col].isna()) | (df[cs_col] == 0)]
|
|
591
|
+
if not non_cs_data.empty and pip_threshold > 0:
|
|
592
|
+
non_cs_data = non_cs_data[non_cs_data[pip_col] >= pip_threshold]
|
|
593
|
+
if not non_cs_data.empty:
|
|
594
|
+
ax.scatter(
|
|
595
|
+
non_cs_data[pos_col],
|
|
596
|
+
non_cs_data[pip_col],
|
|
597
|
+
c="#BEBEBE",
|
|
598
|
+
s=30,
|
|
599
|
+
marker="o",
|
|
600
|
+
edgecolor="black",
|
|
601
|
+
linewidth=0.3,
|
|
602
|
+
zorder=2,
|
|
603
|
+
alpha=0.6,
|
|
604
|
+
)
|
|
605
|
+
else:
|
|
606
|
+
# No credible sets - show all points above threshold
|
|
607
|
+
if pip_threshold > 0:
|
|
608
|
+
high_pip = df[df[pip_col] >= pip_threshold]
|
|
609
|
+
if not high_pip.empty:
|
|
610
|
+
ax.scatter(
|
|
611
|
+
high_pip[pos_col],
|
|
612
|
+
high_pip[pip_col],
|
|
613
|
+
c=PIP_LINE_COLOR,
|
|
614
|
+
s=50,
|
|
615
|
+
marker="o",
|
|
616
|
+
edgecolor="black",
|
|
617
|
+
linewidth=0.5,
|
|
618
|
+
zorder=3,
|
|
619
|
+
)
|
|
620
|
+
|
|
621
|
+
def _add_finemapping_legend(
|
|
622
|
+
self,
|
|
623
|
+
ax: Axes,
|
|
624
|
+
credible_sets: List[int],
|
|
625
|
+
) -> None:
|
|
626
|
+
"""Add fine-mapping legend showing credible sets.
|
|
627
|
+
|
|
628
|
+
Args:
|
|
629
|
+
ax: Matplotlib axes object.
|
|
630
|
+
credible_sets: List of credible set IDs to include.
|
|
631
|
+
"""
|
|
632
|
+
if not credible_sets:
|
|
633
|
+
return
|
|
634
|
+
|
|
635
|
+
legend_elements = []
|
|
636
|
+
for cs_id in credible_sets:
|
|
637
|
+
color = get_credible_set_color(cs_id)
|
|
638
|
+
legend_elements.append(
|
|
639
|
+
Line2D(
|
|
640
|
+
[0],
|
|
641
|
+
[0],
|
|
642
|
+
marker="o",
|
|
643
|
+
color="w",
|
|
644
|
+
markerfacecolor=color,
|
|
645
|
+
markeredgecolor="black",
|
|
646
|
+
markersize=7,
|
|
647
|
+
label=f"CS{cs_id}",
|
|
648
|
+
)
|
|
649
|
+
)
|
|
650
|
+
|
|
651
|
+
ax.legend(
|
|
652
|
+
handles=legend_elements,
|
|
653
|
+
loc="upper right",
|
|
654
|
+
fontsize=8,
|
|
655
|
+
frameon=True,
|
|
656
|
+
framealpha=0.9,
|
|
657
|
+
title="Credible sets",
|
|
658
|
+
title_fontsize=9,
|
|
659
|
+
handlelength=1.2,
|
|
660
|
+
handleheight=1.0,
|
|
661
|
+
labelspacing=0.3,
|
|
662
|
+
)
|
|
663
|
+
|
|
471
664
|
def plot_stacked(
|
|
472
665
|
self,
|
|
473
666
|
gwas_dfs: List[pd.DataFrame],
|
|
@@ -478,10 +671,13 @@ class LocusZoomPlotter:
|
|
|
478
671
|
panel_labels: Optional[List[str]] = None,
|
|
479
672
|
ld_reference_file: Optional[str] = None,
|
|
480
673
|
ld_reference_files: Optional[List[str]] = None,
|
|
674
|
+
ld_col: Optional[str] = None,
|
|
481
675
|
genes_df: Optional[pd.DataFrame] = None,
|
|
482
676
|
exons_df: Optional[pd.DataFrame] = None,
|
|
483
677
|
eqtl_df: Optional[pd.DataFrame] = None,
|
|
484
678
|
eqtl_gene: Optional[str] = None,
|
|
679
|
+
finemapping_df: Optional[pd.DataFrame] = None,
|
|
680
|
+
finemapping_cs_col: Optional[str] = "cs",
|
|
485
681
|
recomb_df: Optional[pd.DataFrame] = None,
|
|
486
682
|
show_recombination: bool = True,
|
|
487
683
|
snp_labels: bool = True,
|
|
@@ -506,10 +702,15 @@ class LocusZoomPlotter:
|
|
|
506
702
|
panel_labels: Labels for each panel (e.g., phenotype names).
|
|
507
703
|
ld_reference_file: Single PLINK fileset for all panels.
|
|
508
704
|
ld_reference_files: List of PLINK filesets (one per panel).
|
|
705
|
+
ld_col: Column name for pre-computed LD (R²) values in each DataFrame.
|
|
706
|
+
Use this if LD was calculated externally.
|
|
509
707
|
genes_df: Gene annotations for bottom track.
|
|
510
708
|
exons_df: Exon annotations for gene track.
|
|
511
709
|
eqtl_df: eQTL data to display as additional panel.
|
|
512
710
|
eqtl_gene: Filter eQTL data to this target gene.
|
|
711
|
+
finemapping_df: Fine-mapping/SuSiE results with pos and pip columns.
|
|
712
|
+
Displayed as PIP line with optional credible set coloring.
|
|
713
|
+
finemapping_cs_col: Column name for credible set assignment in finemapping_df.
|
|
513
714
|
recomb_df: Pre-loaded recombination rate data.
|
|
514
715
|
show_recombination: Whether to show recombination overlay.
|
|
515
716
|
snp_labels: Whether to label top SNPs.
|
|
@@ -534,11 +735,30 @@ class LocusZoomPlotter:
|
|
|
534
735
|
if n_gwas == 0:
|
|
535
736
|
raise ValueError("At least one GWAS DataFrame required")
|
|
536
737
|
|
|
738
|
+
# Validate list lengths match
|
|
739
|
+
if lead_positions is not None and len(lead_positions) != n_gwas:
|
|
740
|
+
raise ValueError(
|
|
741
|
+
f"lead_positions length ({len(lead_positions)}) must match "
|
|
742
|
+
f"number of GWAS DataFrames ({n_gwas})"
|
|
743
|
+
)
|
|
744
|
+
if panel_labels is not None and len(panel_labels) != n_gwas:
|
|
745
|
+
raise ValueError(
|
|
746
|
+
f"panel_labels length ({len(panel_labels)}) must match "
|
|
747
|
+
f"number of GWAS DataFrames ({n_gwas})"
|
|
748
|
+
)
|
|
749
|
+
if ld_reference_files is not None and len(ld_reference_files) != n_gwas:
|
|
750
|
+
raise ValueError(
|
|
751
|
+
f"ld_reference_files length ({len(ld_reference_files)}) must match "
|
|
752
|
+
f"number of GWAS DataFrames ({n_gwas})"
|
|
753
|
+
)
|
|
754
|
+
|
|
537
755
|
# Validate inputs
|
|
538
756
|
for i, df in enumerate(gwas_dfs):
|
|
539
757
|
validate_gwas_df(df, pos_col=pos_col, p_col=p_col)
|
|
540
758
|
if genes_df is not None:
|
|
541
759
|
validate_genes_df(genes_df)
|
|
760
|
+
if eqtl_df is not None:
|
|
761
|
+
validate_eqtl_df(eqtl_df)
|
|
542
762
|
|
|
543
763
|
# Handle lead positions
|
|
544
764
|
if lead_positions is None:
|
|
@@ -558,12 +778,16 @@ class LocusZoomPlotter:
|
|
|
558
778
|
# Calculate panel layout
|
|
559
779
|
panel_height = 2.5 # inches per GWAS panel
|
|
560
780
|
eqtl_height = 2.0 if eqtl_df is not None else 0
|
|
781
|
+
finemapping_height = 1.5 if finemapping_df is not None else 0
|
|
561
782
|
|
|
562
783
|
# Gene track height
|
|
563
784
|
if genes_df is not None:
|
|
564
785
|
chrom_str = normalize_chrom(chrom)
|
|
565
786
|
region_genes = genes_df[
|
|
566
|
-
(
|
|
787
|
+
(
|
|
788
|
+
genes_df["chr"].astype(str).str.replace("chr", "", regex=False)
|
|
789
|
+
== chrom_str
|
|
790
|
+
)
|
|
567
791
|
& (genes_df["end"] >= start)
|
|
568
792
|
& (genes_df["start"] <= end)
|
|
569
793
|
]
|
|
@@ -579,8 +803,15 @@ class LocusZoomPlotter:
|
|
|
579
803
|
gene_track_height = 0
|
|
580
804
|
|
|
581
805
|
# Calculate total panels and heights
|
|
582
|
-
n_panels =
|
|
806
|
+
n_panels = (
|
|
807
|
+
n_gwas
|
|
808
|
+
+ (1 if finemapping_df is not None else 0)
|
|
809
|
+
+ (1 if eqtl_df is not None else 0)
|
|
810
|
+
+ (1 if genes_df is not None else 0)
|
|
811
|
+
)
|
|
583
812
|
height_ratios = [panel_height] * n_gwas
|
|
813
|
+
if finemapping_df is not None:
|
|
814
|
+
height_ratios.append(finemapping_height)
|
|
584
815
|
if eqtl_df is not None:
|
|
585
816
|
height_ratios.append(eqtl_height)
|
|
586
817
|
if genes_df is not None:
|
|
@@ -590,7 +821,9 @@ class LocusZoomPlotter:
|
|
|
590
821
|
total_height = figsize[1] if figsize[1] else sum(height_ratios)
|
|
591
822
|
actual_figsize = (figsize[0], total_height)
|
|
592
823
|
|
|
593
|
-
logger.debug(
|
|
824
|
+
logger.debug(
|
|
825
|
+
f"Creating stacked plot with {n_panels} panels for chr{chrom}:{start}-{end}"
|
|
826
|
+
)
|
|
594
827
|
|
|
595
828
|
# Prevent auto-display in interactive environments
|
|
596
829
|
plt.ioff()
|
|
@@ -617,9 +850,9 @@ class LocusZoomPlotter:
|
|
|
617
850
|
df = gwas_df.copy()
|
|
618
851
|
df["neglog10p"] = -np.log10(df[p_col].clip(lower=1e-300))
|
|
619
852
|
|
|
620
|
-
#
|
|
621
|
-
|
|
622
|
-
if ld_reference_files and ld_reference_files[i] and lead_pos:
|
|
853
|
+
# Use pre-computed LD or calculate from reference
|
|
854
|
+
panel_ld_col = ld_col
|
|
855
|
+
if ld_reference_files and ld_reference_files[i] and lead_pos and not ld_col:
|
|
623
856
|
lead_snp_row = df[df[pos_col] == lead_pos]
|
|
624
857
|
if not lead_snp_row.empty and rs_col in df.columns:
|
|
625
858
|
lead_snp_id = lead_snp_row[rs_col].iloc[0]
|
|
@@ -632,19 +865,32 @@ class LocusZoomPlotter:
|
|
|
632
865
|
)
|
|
633
866
|
if not ld_df.empty:
|
|
634
867
|
df = df.merge(ld_df, left_on=rs_col, right_on="SNP", how="left")
|
|
635
|
-
|
|
868
|
+
panel_ld_col = "R2"
|
|
636
869
|
|
|
637
870
|
# Plot association
|
|
638
|
-
self._plot_association(ax, df, pos_col,
|
|
871
|
+
self._plot_association(ax, df, pos_col, panel_ld_col, lead_pos)
|
|
639
872
|
|
|
640
873
|
# Add significance line
|
|
641
|
-
ax.axhline(
|
|
874
|
+
ax.axhline(
|
|
875
|
+
y=self._genomewide_line,
|
|
876
|
+
color="red",
|
|
877
|
+
linestyle="--",
|
|
878
|
+
linewidth=1,
|
|
879
|
+
alpha=0.8,
|
|
880
|
+
zorder=1,
|
|
881
|
+
)
|
|
642
882
|
|
|
643
883
|
# Add SNP labels
|
|
644
884
|
if snp_labels and rs_col in df.columns and label_top_n > 0 and not df.empty:
|
|
645
885
|
add_snp_labels(
|
|
646
|
-
ax,
|
|
647
|
-
|
|
886
|
+
ax,
|
|
887
|
+
df,
|
|
888
|
+
pos_col=pos_col,
|
|
889
|
+
neglog10p_col="neglog10p",
|
|
890
|
+
rs_col=rs_col,
|
|
891
|
+
label_top_n=label_top_n,
|
|
892
|
+
genes_df=genes_df,
|
|
893
|
+
chrom=chrom,
|
|
648
894
|
)
|
|
649
895
|
|
|
650
896
|
# Add recombination overlay (only on first panel)
|
|
@@ -670,13 +916,50 @@ class LocusZoomPlotter:
|
|
|
670
916
|
)
|
|
671
917
|
|
|
672
918
|
# Add LD legend (only on first panel)
|
|
673
|
-
if i == 0 and
|
|
919
|
+
if i == 0 and panel_ld_col is not None and panel_ld_col in df.columns:
|
|
674
920
|
self._add_ld_legend(ax)
|
|
675
921
|
|
|
676
|
-
#
|
|
922
|
+
# Track current panel index
|
|
677
923
|
panel_idx = n_gwas
|
|
678
|
-
|
|
924
|
+
|
|
925
|
+
# Plot fine-mapping panel if provided
|
|
926
|
+
if finemapping_df is not None:
|
|
679
927
|
ax = axes[panel_idx]
|
|
928
|
+
fm_data = prepare_finemapping_for_plotting(
|
|
929
|
+
finemapping_df,
|
|
930
|
+
pos_col="pos",
|
|
931
|
+
pip_col="pip",
|
|
932
|
+
chrom=chrom,
|
|
933
|
+
start=start,
|
|
934
|
+
end=end,
|
|
935
|
+
)
|
|
936
|
+
|
|
937
|
+
if not fm_data.empty:
|
|
938
|
+
self._plot_finemapping(
|
|
939
|
+
ax,
|
|
940
|
+
fm_data,
|
|
941
|
+
pos_col="pos",
|
|
942
|
+
pip_col="pip",
|
|
943
|
+
cs_col=finemapping_cs_col,
|
|
944
|
+
show_credible_sets=True,
|
|
945
|
+
pip_threshold=0.01,
|
|
946
|
+
)
|
|
947
|
+
|
|
948
|
+
# Add legend for credible sets
|
|
949
|
+
credible_sets = get_credible_sets(fm_data, finemapping_cs_col)
|
|
950
|
+
if credible_sets:
|
|
951
|
+
self._add_finemapping_legend(ax, credible_sets)
|
|
952
|
+
|
|
953
|
+
ax.set_ylabel("PIP")
|
|
954
|
+
ax.set_ylim(-0.05, 1.05)
|
|
955
|
+
ax.spines["top"].set_visible(False)
|
|
956
|
+
ax.spines["right"].set_visible(False)
|
|
957
|
+
panel_idx += 1
|
|
958
|
+
|
|
959
|
+
# Plot eQTL panel if provided
|
|
960
|
+
eqtl_panel_idx = panel_idx
|
|
961
|
+
if eqtl_df is not None:
|
|
962
|
+
ax = axes[eqtl_panel_idx]
|
|
680
963
|
eqtl_data = eqtl_df.copy()
|
|
681
964
|
|
|
682
965
|
# Filter by gene if specified
|
|
@@ -685,27 +968,59 @@ class LocusZoomPlotter:
|
|
|
685
968
|
|
|
686
969
|
# Filter by region
|
|
687
970
|
if "pos" in eqtl_data.columns:
|
|
688
|
-
eqtl_data = eqtl_data[
|
|
971
|
+
eqtl_data = eqtl_data[
|
|
972
|
+
(eqtl_data["pos"] >= start) & (eqtl_data["pos"] <= end)
|
|
973
|
+
]
|
|
689
974
|
|
|
690
975
|
if not eqtl_data.empty:
|
|
691
|
-
eqtl_data["neglog10p"] = -np.log10(
|
|
692
|
-
|
|
693
|
-
# Plot as diamonds (different from GWAS circles)
|
|
694
|
-
ax.scatter(
|
|
695
|
-
eqtl_data["pos"],
|
|
696
|
-
eqtl_data["neglog10p"],
|
|
697
|
-
c="#FF6B6B",
|
|
698
|
-
s=60,
|
|
699
|
-
marker="D",
|
|
700
|
-
edgecolor="black",
|
|
701
|
-
linewidth=0.5,
|
|
702
|
-
zorder=2,
|
|
703
|
-
label=f"eQTL ({eqtl_gene})" if eqtl_gene else "eQTL",
|
|
976
|
+
eqtl_data["neglog10p"] = -np.log10(
|
|
977
|
+
eqtl_data["p_value"].clip(lower=1e-300)
|
|
704
978
|
)
|
|
705
|
-
|
|
979
|
+
|
|
980
|
+
# Check if effect_size column exists for directional coloring
|
|
981
|
+
has_effect = "effect_size" in eqtl_data.columns
|
|
982
|
+
|
|
983
|
+
if has_effect:
|
|
984
|
+
# Plot triangles by effect direction with color by magnitude
|
|
985
|
+
for _, row in eqtl_data.iterrows():
|
|
986
|
+
effect = row["effect_size"]
|
|
987
|
+
color = get_eqtl_color(effect)
|
|
988
|
+
marker = "^" if effect >= 0 else "v"
|
|
989
|
+
ax.scatter(
|
|
990
|
+
row["pos"],
|
|
991
|
+
row["neglog10p"],
|
|
992
|
+
c=color,
|
|
993
|
+
s=50,
|
|
994
|
+
marker=marker,
|
|
995
|
+
edgecolor="black",
|
|
996
|
+
linewidth=0.5,
|
|
997
|
+
zorder=2,
|
|
998
|
+
)
|
|
999
|
+
# Add eQTL effect legend
|
|
1000
|
+
self._add_eqtl_legend(ax)
|
|
1001
|
+
else:
|
|
1002
|
+
# No effect sizes - plot as diamonds
|
|
1003
|
+
ax.scatter(
|
|
1004
|
+
eqtl_data["pos"],
|
|
1005
|
+
eqtl_data["neglog10p"],
|
|
1006
|
+
c="#FF6B6B",
|
|
1007
|
+
s=60,
|
|
1008
|
+
marker="D",
|
|
1009
|
+
edgecolor="black",
|
|
1010
|
+
linewidth=0.5,
|
|
1011
|
+
zorder=2,
|
|
1012
|
+
label=f"eQTL ({eqtl_gene})" if eqtl_gene else "eQTL",
|
|
1013
|
+
)
|
|
1014
|
+
ax.legend(loc="upper right", fontsize=9)
|
|
706
1015
|
|
|
707
1016
|
ax.set_ylabel(r"$-\log_{10}$ P (eQTL)")
|
|
708
|
-
ax.axhline(
|
|
1017
|
+
ax.axhline(
|
|
1018
|
+
y=self._genomewide_line,
|
|
1019
|
+
color="red",
|
|
1020
|
+
linestyle="--",
|
|
1021
|
+
linewidth=1,
|
|
1022
|
+
alpha=0.8,
|
|
1023
|
+
)
|
|
709
1024
|
ax.spines["top"].set_visible(False)
|
|
710
1025
|
ax.spines["right"].set_visible(False)
|
|
711
1026
|
panel_idx += 1
|