pylocuszoom 0.3.0__py3-none-any.whl → 0.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pylocuszoom/__init__.py +74 -2
- pylocuszoom/backends/base.py +131 -0
- pylocuszoom/backends/bokeh_backend.py +254 -68
- pylocuszoom/backends/matplotlib_backend.py +173 -0
- pylocuszoom/backends/plotly_backend.py +327 -87
- pylocuszoom/colors.py +44 -1
- pylocuszoom/forest.py +37 -0
- pylocuszoom/gene_track.py +1 -0
- pylocuszoom/loaders.py +880 -0
- pylocuszoom/phewas.py +35 -0
- pylocuszoom/plotter.py +342 -117
- pylocuszoom/py.typed +0 -0
- pylocuszoom/recombination.py +49 -35
- pylocuszoom/schemas.py +406 -0
- {pylocuszoom-0.3.0.dist-info → pylocuszoom-0.6.0.dist-info}/METADATA +153 -25
- pylocuszoom-0.6.0.dist-info/RECORD +26 -0
- pylocuszoom-0.3.0.dist-info/RECORD +0 -21
- {pylocuszoom-0.3.0.dist-info → pylocuszoom-0.6.0.dist-info}/WHEEL +0 -0
- {pylocuszoom-0.3.0.dist-info → pylocuszoom-0.6.0.dist-info}/licenses/LICENSE.md +0 -0
pylocuszoom/phewas.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
"""PheWAS data validation and preparation.
|
|
2
|
+
|
|
3
|
+
Validates and prepares phenome-wide association study data for plotting.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import pandas as pd
|
|
7
|
+
|
|
8
|
+
from .utils import ValidationError
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def validate_phewas_df(
|
|
12
|
+
df: pd.DataFrame,
|
|
13
|
+
phenotype_col: str = "phenotype",
|
|
14
|
+
p_col: str = "p_value",
|
|
15
|
+
category_col: str = "category",
|
|
16
|
+
) -> None:
|
|
17
|
+
"""Validate PheWAS DataFrame has required columns.
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
df: PheWAS results DataFrame.
|
|
21
|
+
phenotype_col: Column name for phenotype names.
|
|
22
|
+
p_col: Column name for p-values.
|
|
23
|
+
category_col: Column name for phenotype categories (optional).
|
|
24
|
+
|
|
25
|
+
Raises:
|
|
26
|
+
ValidationError: If required columns are missing.
|
|
27
|
+
"""
|
|
28
|
+
required = [phenotype_col, p_col]
|
|
29
|
+
missing = [col for col in required if col not in df.columns]
|
|
30
|
+
|
|
31
|
+
if missing:
|
|
32
|
+
raise ValidationError(
|
|
33
|
+
f"PheWAS DataFrame missing required columns: {missing}. "
|
|
34
|
+
f"Required: {required}. Found: {list(df.columns)}"
|
|
35
|
+
)
|
pylocuszoom/plotter.py
CHANGED
|
@@ -31,12 +31,14 @@ from .colors import (
|
|
|
31
31
|
get_eqtl_color,
|
|
32
32
|
get_ld_bin,
|
|
33
33
|
get_ld_color_palette,
|
|
34
|
+
get_phewas_category_palette,
|
|
34
35
|
)
|
|
35
36
|
from .eqtl import validate_eqtl_df
|
|
36
37
|
from .finemapping import (
|
|
37
38
|
get_credible_sets,
|
|
38
39
|
prepare_finemapping_for_plotting,
|
|
39
40
|
)
|
|
41
|
+
from .forest import validate_forest_df
|
|
40
42
|
from .gene_track import (
|
|
41
43
|
assign_gene_positions,
|
|
42
44
|
plot_gene_track,
|
|
@@ -45,6 +47,7 @@ from .gene_track import (
|
|
|
45
47
|
from .labels import add_snp_labels
|
|
46
48
|
from .ld import calculate_ld, find_plink
|
|
47
49
|
from .logging import enable_logging, logger
|
|
50
|
+
from .phewas import validate_phewas_df
|
|
48
51
|
from .recombination import (
|
|
49
52
|
RECOMB_COLOR,
|
|
50
53
|
add_recombination_overlay,
|
|
@@ -139,11 +142,8 @@ class LocusZoomPlotter:
|
|
|
139
142
|
@staticmethod
|
|
140
143
|
def _default_build(species: str) -> Optional[str]:
|
|
141
144
|
"""Get default genome build for species."""
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
if species == "feline":
|
|
145
|
-
return "felCat9"
|
|
146
|
-
return None
|
|
145
|
+
builds = {"canine": "canfam3.1", "feline": "felCat9"}
|
|
146
|
+
return builds.get(species)
|
|
147
147
|
|
|
148
148
|
def _ensure_recomb_maps(self) -> Optional[Path]:
|
|
149
149
|
"""Ensure recombination maps are downloaded.
|
|
@@ -157,8 +157,8 @@ class LocusZoomPlotter:
|
|
|
157
157
|
default_dir = get_default_data_dir()
|
|
158
158
|
if (
|
|
159
159
|
default_dir.exists()
|
|
160
|
-
and len(list(default_dir.glob("chr*_recomb.tsv"))) >=
|
|
161
|
-
):
|
|
160
|
+
and len(list(default_dir.glob("chr*_recomb.tsv"))) >= 39
|
|
161
|
+
): # 38 autosomes + X
|
|
162
162
|
return default_dir
|
|
163
163
|
# Download
|
|
164
164
|
try:
|
|
@@ -215,7 +215,7 @@ class LocusZoomPlotter:
|
|
|
215
215
|
p_col: str = "p_wald",
|
|
216
216
|
rs_col: str = "rs",
|
|
217
217
|
figsize: Tuple[int, int] = (12, 8),
|
|
218
|
-
) ->
|
|
218
|
+
) -> Any:
|
|
219
219
|
"""Create a regional association plot.
|
|
220
220
|
|
|
221
221
|
Args:
|
|
@@ -589,53 +589,6 @@ class LocusZoomPlotter:
|
|
|
589
589
|
yaxis_name=secondary_y,
|
|
590
590
|
)
|
|
591
591
|
|
|
592
|
-
def _add_eqtl_legend(self, ax: Axes) -> None:
|
|
593
|
-
"""Add eQTL effect size legend to plot."""
|
|
594
|
-
legend_elements = []
|
|
595
|
-
|
|
596
|
-
# Positive effects (upward triangles)
|
|
597
|
-
for _, _, label, color in EQTL_POSITIVE_BINS:
|
|
598
|
-
legend_elements.append(
|
|
599
|
-
Line2D(
|
|
600
|
-
[0],
|
|
601
|
-
[0],
|
|
602
|
-
marker="^",
|
|
603
|
-
color="w",
|
|
604
|
-
markerfacecolor=color,
|
|
605
|
-
markeredgecolor="black",
|
|
606
|
-
markersize=7,
|
|
607
|
-
label=label,
|
|
608
|
-
)
|
|
609
|
-
)
|
|
610
|
-
|
|
611
|
-
# Negative effects (downward triangles)
|
|
612
|
-
for _, _, label, color in EQTL_NEGATIVE_BINS:
|
|
613
|
-
legend_elements.append(
|
|
614
|
-
Line2D(
|
|
615
|
-
[0],
|
|
616
|
-
[0],
|
|
617
|
-
marker="v",
|
|
618
|
-
color="w",
|
|
619
|
-
markerfacecolor=color,
|
|
620
|
-
markeredgecolor="black",
|
|
621
|
-
markersize=7,
|
|
622
|
-
label=label,
|
|
623
|
-
)
|
|
624
|
-
)
|
|
625
|
-
|
|
626
|
-
ax.legend(
|
|
627
|
-
handles=legend_elements,
|
|
628
|
-
loc="upper right",
|
|
629
|
-
fontsize=8,
|
|
630
|
-
frameon=True,
|
|
631
|
-
framealpha=0.9,
|
|
632
|
-
title="eQTL effect",
|
|
633
|
-
title_fontsize=9,
|
|
634
|
-
handlelength=1.2,
|
|
635
|
-
handleheight=1.0,
|
|
636
|
-
labelspacing=0.3,
|
|
637
|
-
)
|
|
638
|
-
|
|
639
592
|
def _plot_finemapping(
|
|
640
593
|
self,
|
|
641
594
|
ax: Axes,
|
|
@@ -657,6 +610,23 @@ class LocusZoomPlotter:
|
|
|
657
610
|
show_credible_sets: Whether to color points by credible set.
|
|
658
611
|
pip_threshold: Minimum PIP to display as scatter point.
|
|
659
612
|
"""
|
|
613
|
+
|
|
614
|
+
def _build_finemapping_hover_data(
|
|
615
|
+
subset_df: pd.DataFrame,
|
|
616
|
+
) -> Optional[pd.DataFrame]:
|
|
617
|
+
"""Build hover data for interactive backends."""
|
|
618
|
+
hover_cols = {}
|
|
619
|
+
# Position
|
|
620
|
+
if pos_col in subset_df.columns:
|
|
621
|
+
hover_cols["Position"] = subset_df[pos_col].values
|
|
622
|
+
# PIP
|
|
623
|
+
if pip_col in subset_df.columns:
|
|
624
|
+
hover_cols["PIP"] = subset_df[pip_col].values
|
|
625
|
+
# Credible set
|
|
626
|
+
if cs_col and cs_col in subset_df.columns:
|
|
627
|
+
hover_cols["Credible Set"] = subset_df[cs_col].values
|
|
628
|
+
return pd.DataFrame(hover_cols) if hover_cols else None
|
|
629
|
+
|
|
660
630
|
# Sort by position for line plotting
|
|
661
631
|
df = df.sort_values(pos_col)
|
|
662
632
|
|
|
@@ -690,7 +660,7 @@ class LocusZoomPlotter:
|
|
|
690
660
|
edgecolor="black",
|
|
691
661
|
linewidth=0.5,
|
|
692
662
|
zorder=3,
|
|
693
|
-
|
|
663
|
+
hover_data=_build_finemapping_hover_data(cs_data),
|
|
694
664
|
)
|
|
695
665
|
# Plot variants not in any credible set
|
|
696
666
|
non_cs_data = df[(df[cs_col].isna()) | (df[cs_col] == 0)]
|
|
@@ -707,6 +677,7 @@ class LocusZoomPlotter:
|
|
|
707
677
|
edgecolor="black",
|
|
708
678
|
linewidth=0.3,
|
|
709
679
|
zorder=2,
|
|
680
|
+
hover_data=_build_finemapping_hover_data(non_cs_data),
|
|
710
681
|
)
|
|
711
682
|
else:
|
|
712
683
|
# No credible sets - show all points above threshold
|
|
@@ -723,51 +694,9 @@ class LocusZoomPlotter:
|
|
|
723
694
|
edgecolor="black",
|
|
724
695
|
linewidth=0.5,
|
|
725
696
|
zorder=3,
|
|
697
|
+
hover_data=_build_finemapping_hover_data(high_pip),
|
|
726
698
|
)
|
|
727
699
|
|
|
728
|
-
def _add_finemapping_legend(
|
|
729
|
-
self,
|
|
730
|
-
ax: Axes,
|
|
731
|
-
credible_sets: List[int],
|
|
732
|
-
) -> None:
|
|
733
|
-
"""Add fine-mapping legend showing credible sets.
|
|
734
|
-
|
|
735
|
-
Args:
|
|
736
|
-
ax: Matplotlib axes object.
|
|
737
|
-
credible_sets: List of credible set IDs to include.
|
|
738
|
-
"""
|
|
739
|
-
if not credible_sets:
|
|
740
|
-
return
|
|
741
|
-
|
|
742
|
-
legend_elements = []
|
|
743
|
-
for cs_id in credible_sets:
|
|
744
|
-
color = get_credible_set_color(cs_id)
|
|
745
|
-
legend_elements.append(
|
|
746
|
-
Line2D(
|
|
747
|
-
[0],
|
|
748
|
-
[0],
|
|
749
|
-
marker="o",
|
|
750
|
-
color="w",
|
|
751
|
-
markerfacecolor=color,
|
|
752
|
-
markeredgecolor="black",
|
|
753
|
-
markersize=7,
|
|
754
|
-
label=f"CS{cs_id}",
|
|
755
|
-
)
|
|
756
|
-
)
|
|
757
|
-
|
|
758
|
-
ax.legend(
|
|
759
|
-
handles=legend_elements,
|
|
760
|
-
loc="upper right",
|
|
761
|
-
fontsize=8,
|
|
762
|
-
frameon=True,
|
|
763
|
-
framealpha=0.9,
|
|
764
|
-
title="Credible sets",
|
|
765
|
-
title_fontsize=9,
|
|
766
|
-
handlelength=1.2,
|
|
767
|
-
handleheight=1.0,
|
|
768
|
-
labelspacing=0.3,
|
|
769
|
-
)
|
|
770
|
-
|
|
771
700
|
def plot_stacked(
|
|
772
701
|
self,
|
|
773
702
|
gwas_dfs: List[pd.DataFrame],
|
|
@@ -968,7 +897,9 @@ class LocusZoomPlotter:
|
|
|
968
897
|
panel_ld_col = "R2"
|
|
969
898
|
|
|
970
899
|
# Plot association
|
|
971
|
-
self._plot_association(
|
|
900
|
+
self._plot_association(
|
|
901
|
+
ax, df, pos_col, panel_ld_col, lead_pos, rs_col, p_col
|
|
902
|
+
)
|
|
972
903
|
|
|
973
904
|
# Add significance line
|
|
974
905
|
self._backend.axhline(
|
|
@@ -1080,10 +1011,12 @@ class LocusZoomPlotter:
|
|
|
1080
1011
|
pip_threshold=0.01,
|
|
1081
1012
|
)
|
|
1082
1013
|
|
|
1083
|
-
# Add legend for credible sets
|
|
1014
|
+
# Add legend for credible sets (all backends)
|
|
1084
1015
|
credible_sets = get_credible_sets(fm_data, finemapping_cs_col)
|
|
1085
1016
|
if credible_sets:
|
|
1086
|
-
self.
|
|
1017
|
+
self._backend.add_finemapping_legend(
|
|
1018
|
+
ax, credible_sets, get_credible_set_color
|
|
1019
|
+
)
|
|
1087
1020
|
|
|
1088
1021
|
self._backend.set_ylabel(ax, "PIP")
|
|
1089
1022
|
self._backend.set_ylim(ax, -0.05, 1.05)
|
|
@@ -1100,41 +1033,87 @@ class LocusZoomPlotter:
|
|
|
1100
1033
|
if eqtl_gene and "gene" in eqtl_data.columns:
|
|
1101
1034
|
eqtl_data = eqtl_data[eqtl_data["gene"] == eqtl_gene]
|
|
1102
1035
|
|
|
1103
|
-
# Filter by region
|
|
1036
|
+
# Filter by region (position and chromosome)
|
|
1104
1037
|
if "pos" in eqtl_data.columns:
|
|
1105
|
-
|
|
1106
|
-
|
|
1107
|
-
|
|
1038
|
+
mask = (eqtl_data["pos"] >= start) & (eqtl_data["pos"] <= end)
|
|
1039
|
+
# Also filter by chromosome if column exists
|
|
1040
|
+
if "chr" in eqtl_data.columns:
|
|
1041
|
+
chrom_str = str(chrom).replace("chr", "")
|
|
1042
|
+
eqtl_chrom = (
|
|
1043
|
+
eqtl_data["chr"].astype(str).str.replace("chr", "", regex=False)
|
|
1044
|
+
)
|
|
1045
|
+
mask = mask & (eqtl_chrom == chrom_str)
|
|
1046
|
+
eqtl_data = eqtl_data[mask]
|
|
1108
1047
|
|
|
1109
1048
|
if not eqtl_data.empty:
|
|
1110
1049
|
eqtl_data["neglog10p"] = -np.log10(
|
|
1111
1050
|
eqtl_data["p_value"].clip(lower=1e-300)
|
|
1112
1051
|
)
|
|
1113
1052
|
|
|
1053
|
+
def _build_eqtl_hover_data(
|
|
1054
|
+
subset_df: pd.DataFrame,
|
|
1055
|
+
) -> Optional[pd.DataFrame]:
|
|
1056
|
+
"""Build hover data for eQTL interactive backends."""
|
|
1057
|
+
hover_cols = {}
|
|
1058
|
+
# Position
|
|
1059
|
+
if "pos" in subset_df.columns:
|
|
1060
|
+
hover_cols["Position"] = subset_df["pos"].values
|
|
1061
|
+
# P-value
|
|
1062
|
+
if "p_value" in subset_df.columns:
|
|
1063
|
+
hover_cols["P-value"] = subset_df["p_value"].values
|
|
1064
|
+
# Effect size
|
|
1065
|
+
if "effect_size" in subset_df.columns:
|
|
1066
|
+
hover_cols["Effect"] = subset_df["effect_size"].values
|
|
1067
|
+
# Gene
|
|
1068
|
+
if "gene" in subset_df.columns:
|
|
1069
|
+
hover_cols["Gene"] = subset_df["gene"].values
|
|
1070
|
+
return pd.DataFrame(hover_cols) if hover_cols else None
|
|
1071
|
+
|
|
1114
1072
|
# Check if effect_size column exists for directional coloring
|
|
1115
1073
|
has_effect = "effect_size" in eqtl_data.columns
|
|
1116
1074
|
|
|
1117
1075
|
if has_effect:
|
|
1118
|
-
# Plot triangles by effect direction
|
|
1119
|
-
|
|
1120
|
-
|
|
1121
|
-
|
|
1122
|
-
|
|
1076
|
+
# Plot triangles by effect direction (batch by sign for efficiency)
|
|
1077
|
+
pos_effects = eqtl_data[eqtl_data["effect_size"] >= 0]
|
|
1078
|
+
neg_effects = eqtl_data[eqtl_data["effect_size"] < 0]
|
|
1079
|
+
|
|
1080
|
+
# Plot positive effects (up triangles)
|
|
1081
|
+
for _, row in pos_effects.iterrows():
|
|
1082
|
+
row_df = pd.DataFrame([row])
|
|
1123
1083
|
self._backend.scatter(
|
|
1124
1084
|
ax,
|
|
1125
1085
|
pd.Series([row["pos"]]),
|
|
1126
1086
|
pd.Series([row["neglog10p"]]),
|
|
1127
|
-
colors=
|
|
1087
|
+
colors=get_eqtl_color(row["effect_size"]),
|
|
1128
1088
|
sizes=50,
|
|
1129
|
-
marker=
|
|
1089
|
+
marker="^",
|
|
1130
1090
|
edgecolor="black",
|
|
1131
1091
|
linewidth=0.5,
|
|
1132
1092
|
zorder=2,
|
|
1093
|
+
hover_data=_build_eqtl_hover_data(row_df),
|
|
1133
1094
|
)
|
|
1134
|
-
#
|
|
1135
|
-
|
|
1095
|
+
# Plot negative effects (down triangles)
|
|
1096
|
+
for _, row in neg_effects.iterrows():
|
|
1097
|
+
row_df = pd.DataFrame([row])
|
|
1098
|
+
self._backend.scatter(
|
|
1099
|
+
ax,
|
|
1100
|
+
pd.Series([row["pos"]]),
|
|
1101
|
+
pd.Series([row["neglog10p"]]),
|
|
1102
|
+
colors=get_eqtl_color(row["effect_size"]),
|
|
1103
|
+
sizes=50,
|
|
1104
|
+
marker="v",
|
|
1105
|
+
edgecolor="black",
|
|
1106
|
+
linewidth=0.5,
|
|
1107
|
+
zorder=2,
|
|
1108
|
+
hover_data=_build_eqtl_hover_data(row_df),
|
|
1109
|
+
)
|
|
1110
|
+
# Add eQTL effect legend (all backends)
|
|
1111
|
+
self._backend.add_eqtl_legend(
|
|
1112
|
+
ax, EQTL_POSITIVE_BINS, EQTL_NEGATIVE_BINS
|
|
1113
|
+
)
|
|
1136
1114
|
else:
|
|
1137
1115
|
# No effect sizes - plot as diamonds
|
|
1116
|
+
label = f"eQTL ({eqtl_gene})" if eqtl_gene else "eQTL"
|
|
1138
1117
|
self._backend.scatter(
|
|
1139
1118
|
ax,
|
|
1140
1119
|
eqtl_data["pos"],
|
|
@@ -1145,9 +1124,10 @@ class LocusZoomPlotter:
|
|
|
1145
1124
|
edgecolor="black",
|
|
1146
1125
|
linewidth=0.5,
|
|
1147
1126
|
zorder=2,
|
|
1148
|
-
label=
|
|
1127
|
+
label=label,
|
|
1128
|
+
hover_data=_build_eqtl_hover_data(eqtl_data),
|
|
1149
1129
|
)
|
|
1150
|
-
|
|
1130
|
+
self._backend.add_simple_legend(ax, label, loc="upper right")
|
|
1151
1131
|
|
|
1152
1132
|
self._backend.set_ylabel(ax, r"$-\log_{10}$ P (eQTL)")
|
|
1153
1133
|
self._backend.axhline(
|
|
@@ -1184,3 +1164,248 @@ class LocusZoomPlotter:
|
|
|
1184
1164
|
self._backend.finalize_layout(fig, hspace=0.1)
|
|
1185
1165
|
|
|
1186
1166
|
return fig
|
|
1167
|
+
|
|
1168
|
+
def plot_phewas(
|
|
1169
|
+
self,
|
|
1170
|
+
phewas_df: pd.DataFrame,
|
|
1171
|
+
variant_id: str,
|
|
1172
|
+
phenotype_col: str = "phenotype",
|
|
1173
|
+
p_col: str = "p_value",
|
|
1174
|
+
category_col: str = "category",
|
|
1175
|
+
effect_col: Optional[str] = None,
|
|
1176
|
+
significance_threshold: float = 5e-8,
|
|
1177
|
+
figsize: Tuple[float, float] = (10, 8),
|
|
1178
|
+
) -> Any:
|
|
1179
|
+
"""Create a PheWAS (Phenome-Wide Association Study) plot.
|
|
1180
|
+
|
|
1181
|
+
Shows associations of a single variant across multiple phenotypes,
|
|
1182
|
+
with phenotypes grouped by category and colored accordingly.
|
|
1183
|
+
|
|
1184
|
+
Args:
|
|
1185
|
+
phewas_df: DataFrame with phenotype associations.
|
|
1186
|
+
variant_id: Variant identifier (e.g., "rs12345") for plot title.
|
|
1187
|
+
phenotype_col: Column name for phenotype names.
|
|
1188
|
+
p_col: Column name for p-values.
|
|
1189
|
+
category_col: Column name for phenotype categories.
|
|
1190
|
+
effect_col: Optional column name for effect direction (beta/OR).
|
|
1191
|
+
significance_threshold: P-value threshold for significance line.
|
|
1192
|
+
figsize: Figure size as (width, height).
|
|
1193
|
+
|
|
1194
|
+
Returns:
|
|
1195
|
+
Figure object (type depends on backend).
|
|
1196
|
+
|
|
1197
|
+
Example:
|
|
1198
|
+
>>> fig = plotter.plot_phewas(
|
|
1199
|
+
... phewas_df,
|
|
1200
|
+
... variant_id="rs12345",
|
|
1201
|
+
... category_col="category",
|
|
1202
|
+
... )
|
|
1203
|
+
"""
|
|
1204
|
+
validate_phewas_df(phewas_df, phenotype_col, p_col, category_col)
|
|
1205
|
+
|
|
1206
|
+
df = phewas_df.copy()
|
|
1207
|
+
df["neglog10p"] = -np.log10(df[p_col].clip(lower=1e-300))
|
|
1208
|
+
|
|
1209
|
+
# Sort by category then by p-value for consistent ordering
|
|
1210
|
+
if category_col in df.columns:
|
|
1211
|
+
df = df.sort_values([category_col, p_col])
|
|
1212
|
+
categories = df[category_col].unique().tolist()
|
|
1213
|
+
palette = get_phewas_category_palette(categories)
|
|
1214
|
+
else:
|
|
1215
|
+
df = df.sort_values(p_col)
|
|
1216
|
+
categories = []
|
|
1217
|
+
palette = {}
|
|
1218
|
+
|
|
1219
|
+
# Create figure
|
|
1220
|
+
fig, axes = self._backend.create_figure(
|
|
1221
|
+
n_panels=1,
|
|
1222
|
+
height_ratios=[1.0],
|
|
1223
|
+
figsize=figsize,
|
|
1224
|
+
)
|
|
1225
|
+
ax = axes[0]
|
|
1226
|
+
|
|
1227
|
+
# Assign y-positions (one per phenotype)
|
|
1228
|
+
df["y_pos"] = range(len(df))
|
|
1229
|
+
|
|
1230
|
+
# Plot points by category
|
|
1231
|
+
if categories:
|
|
1232
|
+
for cat in categories:
|
|
1233
|
+
cat_data = df[df[category_col] == cat]
|
|
1234
|
+
# Use upward triangles for positive effects, circles otherwise
|
|
1235
|
+
if effect_col and effect_col in cat_data.columns:
|
|
1236
|
+
for _, row in cat_data.iterrows():
|
|
1237
|
+
marker = "^" if row[effect_col] >= 0 else "v"
|
|
1238
|
+
self._backend.scatter(
|
|
1239
|
+
ax,
|
|
1240
|
+
pd.Series([row["neglog10p"]]),
|
|
1241
|
+
pd.Series([row["y_pos"]]),
|
|
1242
|
+
colors=palette[cat],
|
|
1243
|
+
sizes=60,
|
|
1244
|
+
marker=marker,
|
|
1245
|
+
edgecolor="black",
|
|
1246
|
+
linewidth=0.5,
|
|
1247
|
+
zorder=2,
|
|
1248
|
+
)
|
|
1249
|
+
else:
|
|
1250
|
+
self._backend.scatter(
|
|
1251
|
+
ax,
|
|
1252
|
+
cat_data["neglog10p"],
|
|
1253
|
+
cat_data["y_pos"],
|
|
1254
|
+
colors=palette[cat],
|
|
1255
|
+
sizes=60,
|
|
1256
|
+
marker="o",
|
|
1257
|
+
edgecolor="black",
|
|
1258
|
+
linewidth=0.5,
|
|
1259
|
+
zorder=2,
|
|
1260
|
+
)
|
|
1261
|
+
else:
|
|
1262
|
+
self._backend.scatter(
|
|
1263
|
+
ax,
|
|
1264
|
+
df["neglog10p"],
|
|
1265
|
+
df["y_pos"],
|
|
1266
|
+
colors="#4169E1",
|
|
1267
|
+
sizes=60,
|
|
1268
|
+
edgecolor="black",
|
|
1269
|
+
linewidth=0.5,
|
|
1270
|
+
zorder=2,
|
|
1271
|
+
)
|
|
1272
|
+
|
|
1273
|
+
# Add significance threshold line
|
|
1274
|
+
sig_line = -np.log10(significance_threshold)
|
|
1275
|
+
self._backend.axvline(
|
|
1276
|
+
ax, x=sig_line, color="red", linestyle="--", linewidth=1, alpha=0.7
|
|
1277
|
+
)
|
|
1278
|
+
|
|
1279
|
+
# Set axis labels and limits
|
|
1280
|
+
self._backend.set_xlabel(ax, r"$-\log_{10}$ P")
|
|
1281
|
+
self._backend.set_ylabel(ax, "Phenotype")
|
|
1282
|
+
self._backend.set_ylim(ax, -0.5, len(df) - 0.5)
|
|
1283
|
+
|
|
1284
|
+
# Set y-tick labels to phenotype names (matplotlib only)
|
|
1285
|
+
if self.backend_name == "matplotlib":
|
|
1286
|
+
ax.set_yticks(df["y_pos"])
|
|
1287
|
+
ax.set_yticklabels(df[phenotype_col], fontsize=8)
|
|
1288
|
+
|
|
1289
|
+
self._backend.set_title(ax, f"PheWAS: {variant_id}")
|
|
1290
|
+
self._backend.hide_spines(ax, ["top", "right"])
|
|
1291
|
+
self._backend.finalize_layout(fig)
|
|
1292
|
+
|
|
1293
|
+
return fig
|
|
1294
|
+
|
|
1295
|
+
def plot_forest(
|
|
1296
|
+
self,
|
|
1297
|
+
forest_df: pd.DataFrame,
|
|
1298
|
+
variant_id: str,
|
|
1299
|
+
study_col: str = "study",
|
|
1300
|
+
effect_col: str = "effect",
|
|
1301
|
+
ci_lower_col: str = "ci_lower",
|
|
1302
|
+
ci_upper_col: str = "ci_upper",
|
|
1303
|
+
weight_col: Optional[str] = None,
|
|
1304
|
+
null_value: float = 0.0,
|
|
1305
|
+
effect_label: str = "Effect Size",
|
|
1306
|
+
figsize: Tuple[float, float] = (8, 6),
|
|
1307
|
+
) -> Any:
|
|
1308
|
+
"""Create a forest plot showing effect sizes with confidence intervals.
|
|
1309
|
+
|
|
1310
|
+
Args:
|
|
1311
|
+
forest_df: DataFrame with effect sizes and confidence intervals.
|
|
1312
|
+
variant_id: Variant identifier for plot title.
|
|
1313
|
+
study_col: Column name for study/phenotype names.
|
|
1314
|
+
effect_col: Column name for effect sizes.
|
|
1315
|
+
ci_lower_col: Column name for lower confidence interval.
|
|
1316
|
+
ci_upper_col: Column name for upper confidence interval.
|
|
1317
|
+
weight_col: Optional column for study weights (affects marker size).
|
|
1318
|
+
null_value: Reference value for null effect (0 for beta, 1 for OR).
|
|
1319
|
+
effect_label: X-axis label.
|
|
1320
|
+
figsize: Figure size as (width, height).
|
|
1321
|
+
|
|
1322
|
+
Returns:
|
|
1323
|
+
Figure object (type depends on backend).
|
|
1324
|
+
|
|
1325
|
+
Example:
|
|
1326
|
+
>>> fig = plotter.plot_forest(
|
|
1327
|
+
... forest_df,
|
|
1328
|
+
... variant_id="rs12345",
|
|
1329
|
+
... effect_label="Odds Ratio",
|
|
1330
|
+
... null_value=1.0,
|
|
1331
|
+
... )
|
|
1332
|
+
"""
|
|
1333
|
+
validate_forest_df(forest_df, study_col, effect_col, ci_lower_col, ci_upper_col)
|
|
1334
|
+
|
|
1335
|
+
df = forest_df.copy()
|
|
1336
|
+
|
|
1337
|
+
# Create figure
|
|
1338
|
+
fig, axes = self._backend.create_figure(
|
|
1339
|
+
n_panels=1,
|
|
1340
|
+
height_ratios=[1.0],
|
|
1341
|
+
figsize=figsize,
|
|
1342
|
+
)
|
|
1343
|
+
ax = axes[0]
|
|
1344
|
+
|
|
1345
|
+
# Assign y-positions (reverse so first study is at top)
|
|
1346
|
+
df["y_pos"] = range(len(df) - 1, -1, -1)
|
|
1347
|
+
|
|
1348
|
+
# Calculate marker sizes from weights
|
|
1349
|
+
if weight_col and weight_col in df.columns:
|
|
1350
|
+
# Scale weights to marker sizes (min 40, max 200)
|
|
1351
|
+
weights = df[weight_col]
|
|
1352
|
+
min_size, max_size = 40, 200
|
|
1353
|
+
weight_range = weights.max() - weights.min()
|
|
1354
|
+
if weight_range > 0:
|
|
1355
|
+
sizes = min_size + (weights - weights.min()) / weight_range * (
|
|
1356
|
+
max_size - min_size
|
|
1357
|
+
)
|
|
1358
|
+
else:
|
|
1359
|
+
sizes = (min_size + max_size) / 2
|
|
1360
|
+
else:
|
|
1361
|
+
sizes = 80
|
|
1362
|
+
|
|
1363
|
+
# Calculate error bar extents
|
|
1364
|
+
xerr_lower = df[effect_col] - df[ci_lower_col]
|
|
1365
|
+
xerr_upper = df[ci_upper_col] - df[effect_col]
|
|
1366
|
+
|
|
1367
|
+
# Plot error bars (confidence intervals)
|
|
1368
|
+
self._backend.errorbar_h(
|
|
1369
|
+
ax,
|
|
1370
|
+
x=df[effect_col],
|
|
1371
|
+
y=df["y_pos"],
|
|
1372
|
+
xerr_lower=xerr_lower,
|
|
1373
|
+
xerr_upper=xerr_upper,
|
|
1374
|
+
color="black",
|
|
1375
|
+
linewidth=1.5,
|
|
1376
|
+
capsize=3,
|
|
1377
|
+
zorder=2,
|
|
1378
|
+
)
|
|
1379
|
+
|
|
1380
|
+
# Plot effect size markers
|
|
1381
|
+
self._backend.scatter(
|
|
1382
|
+
ax,
|
|
1383
|
+
df[effect_col],
|
|
1384
|
+
df["y_pos"],
|
|
1385
|
+
colors="#4169E1",
|
|
1386
|
+
sizes=sizes,
|
|
1387
|
+
marker="s", # square markers typical for forest plots
|
|
1388
|
+
edgecolor="black",
|
|
1389
|
+
linewidth=0.5,
|
|
1390
|
+
zorder=3,
|
|
1391
|
+
)
|
|
1392
|
+
|
|
1393
|
+
# Add null effect line
|
|
1394
|
+
self._backend.axvline(
|
|
1395
|
+
ax, x=null_value, color="grey", linestyle="--", linewidth=1, alpha=0.7
|
|
1396
|
+
)
|
|
1397
|
+
|
|
1398
|
+
# Set axis labels and limits
|
|
1399
|
+
self._backend.set_xlabel(ax, effect_label)
|
|
1400
|
+
self._backend.set_ylim(ax, -0.5, len(df) - 0.5)
|
|
1401
|
+
|
|
1402
|
+
# Set y-tick labels to study names (matplotlib only)
|
|
1403
|
+
if self.backend_name == "matplotlib":
|
|
1404
|
+
ax.set_yticks(df["y_pos"])
|
|
1405
|
+
ax.set_yticklabels(df[study_col], fontsize=10)
|
|
1406
|
+
|
|
1407
|
+
self._backend.set_title(ax, f"Forest Plot: {variant_id}")
|
|
1408
|
+
self._backend.hide_spines(ax, ["top", "right"])
|
|
1409
|
+
self._backend.finalize_layout(fig)
|
|
1410
|
+
|
|
1411
|
+
return fig
|
pylocuszoom/py.typed
ADDED
|
File without changes
|