pylocuszoom 1.0.0__py3-none-any.whl → 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pylocuszoom/__init__.py +9 -1
- pylocuszoom/_plotter_utils.py +66 -0
- pylocuszoom/backends/base.py +56 -0
- pylocuszoom/backends/bokeh_backend.py +141 -29
- pylocuszoom/backends/matplotlib_backend.py +60 -0
- pylocuszoom/backends/plotly_backend.py +297 -88
- pylocuszoom/ensembl.py +6 -11
- pylocuszoom/gene_track.py +2 -24
- pylocuszoom/labels.py +6 -2
- pylocuszoom/manhattan.py +246 -0
- pylocuszoom/manhattan_plotter.py +760 -0
- pylocuszoom/plotter.py +236 -270
- pylocuszoom/qq.py +123 -0
- pylocuszoom/recombination.py +7 -7
- pylocuszoom/stats_plotter.py +319 -0
- {pylocuszoom-1.0.0.dist-info → pylocuszoom-1.1.0.dist-info}/METADATA +124 -14
- pylocuszoom-1.1.0.dist-info/RECORD +36 -0
- pylocuszoom-1.0.0.dist-info/RECORD +0 -31
- {pylocuszoom-1.0.0.dist-info → pylocuszoom-1.1.0.dist-info}/WHEEL +0 -0
- {pylocuszoom-1.0.0.dist-info → pylocuszoom-1.1.0.dist-info}/licenses/LICENSE.md +0 -0
pylocuszoom/plotter.py
CHANGED
|
@@ -17,6 +17,7 @@ import numpy as np
|
|
|
17
17
|
import pandas as pd
|
|
18
18
|
import requests
|
|
19
19
|
|
|
20
|
+
from ._plotter_utils import DEFAULT_GENOMEWIDE_THRESHOLD
|
|
20
21
|
from .backends import BackendType, get_backend
|
|
21
22
|
from .backends.hover import HoverConfig, HoverDataBuilder
|
|
22
23
|
from .colors import (
|
|
@@ -29,7 +30,6 @@ from .colors import (
|
|
|
29
30
|
get_eqtl_color,
|
|
30
31
|
get_ld_bin,
|
|
31
32
|
get_ld_color_palette,
|
|
32
|
-
get_phewas_category_palette,
|
|
33
33
|
)
|
|
34
34
|
from .config import PlotConfig, StackedPlotConfig
|
|
35
35
|
from .ensembl import get_genes_for_region
|
|
@@ -38,24 +38,23 @@ from .finemapping import (
|
|
|
38
38
|
get_credible_sets,
|
|
39
39
|
prepare_finemapping_for_plotting,
|
|
40
40
|
)
|
|
41
|
-
from .forest import validate_forest_df
|
|
42
41
|
from .gene_track import (
|
|
43
42
|
assign_gene_positions,
|
|
44
43
|
plot_gene_track_generic,
|
|
45
44
|
)
|
|
46
45
|
from .ld import calculate_ld, find_plink
|
|
47
46
|
from .logging import enable_logging, logger
|
|
48
|
-
from .
|
|
47
|
+
from .manhattan_plotter import ManhattanPlotter
|
|
49
48
|
from .recombination import (
|
|
50
49
|
RECOMB_COLOR,
|
|
51
50
|
download_canine_recombination_maps,
|
|
52
51
|
get_default_data_dir,
|
|
53
52
|
get_recombination_rate_for_region,
|
|
54
53
|
)
|
|
54
|
+
from .stats_plotter import StatsPlotter
|
|
55
55
|
from .utils import normalize_chrom, validate_genes_df, validate_gwas_df
|
|
56
56
|
|
|
57
|
-
#
|
|
58
|
-
DEFAULT_GENOMEWIDE_THRESHOLD = 5e-8
|
|
57
|
+
# Precomputed significance line value (used for plotting)
|
|
59
58
|
DEFAULT_GENOMEWIDE_LINE = -np.log10(DEFAULT_GENOMEWIDE_THRESHOLD)
|
|
60
59
|
|
|
61
60
|
|
|
@@ -140,6 +139,7 @@ class LocusZoomPlotter:
|
|
|
140
139
|
genome_build if genome_build else self._default_build(species)
|
|
141
140
|
)
|
|
142
141
|
self._backend = get_backend(backend)
|
|
142
|
+
self._backend_name = backend # Store for delegation to child plotters
|
|
143
143
|
self.plink_path = plink_path or find_plink()
|
|
144
144
|
self.recomb_data_dir = recomb_data_dir
|
|
145
145
|
self.genomewide_threshold = genomewide_threshold
|
|
@@ -149,6 +149,27 @@ class LocusZoomPlotter:
|
|
|
149
149
|
# Cache for loaded data
|
|
150
150
|
self._recomb_cache = {}
|
|
151
151
|
|
|
152
|
+
@property
|
|
153
|
+
def _manhattan_plotter(self) -> ManhattanPlotter:
|
|
154
|
+
"""Lazy-load ManhattanPlotter with shared configuration."""
|
|
155
|
+
if not hasattr(self, "_manhattan_plotter_instance"):
|
|
156
|
+
self._manhattan_plotter_instance = ManhattanPlotter(
|
|
157
|
+
species=self.species,
|
|
158
|
+
backend=self._backend_name,
|
|
159
|
+
genomewide_threshold=self.genomewide_threshold,
|
|
160
|
+
)
|
|
161
|
+
return self._manhattan_plotter_instance
|
|
162
|
+
|
|
163
|
+
@property
|
|
164
|
+
def _stats_plotter(self) -> StatsPlotter:
|
|
165
|
+
"""Lazy-load StatsPlotter with shared configuration."""
|
|
166
|
+
if not hasattr(self, "_stats_plotter_instance"):
|
|
167
|
+
self._stats_plotter_instance = StatsPlotter(
|
|
168
|
+
backend=self._backend_name,
|
|
169
|
+
genomewide_threshold=self.genomewide_threshold,
|
|
170
|
+
)
|
|
171
|
+
return self._stats_plotter_instance
|
|
172
|
+
|
|
152
173
|
@staticmethod
|
|
153
174
|
def _default_build(species: str) -> Optional[str]:
|
|
154
175
|
"""Get default genome build for species."""
|
|
@@ -214,6 +235,22 @@ class LocusZoomPlotter:
|
|
|
214
235
|
except FileNotFoundError:
|
|
215
236
|
return None
|
|
216
237
|
|
|
238
|
+
def _transform_pvalues(self, df: pd.DataFrame, p_col: str) -> pd.DataFrame:
|
|
239
|
+
"""Add neglog10p column with -log10 transformed p-values.
|
|
240
|
+
|
|
241
|
+
Delegates to shared utility function. Assumes df is already a copy.
|
|
242
|
+
|
|
243
|
+
Args:
|
|
244
|
+
df: DataFrame with p-value column (should be a copy).
|
|
245
|
+
p_col: Name of p-value column.
|
|
246
|
+
|
|
247
|
+
Returns:
|
|
248
|
+
DataFrame with neglog10p column added.
|
|
249
|
+
"""
|
|
250
|
+
# Use shared utility - note: df should already be a copy at call sites
|
|
251
|
+
df["neglog10p"] = -np.log10(df[p_col].clip(lower=1e-300))
|
|
252
|
+
return df
|
|
253
|
+
|
|
217
254
|
def plot(
|
|
218
255
|
self,
|
|
219
256
|
gwas_df: pd.DataFrame,
|
|
@@ -249,7 +286,8 @@ class LocusZoomPlotter:
|
|
|
249
286
|
label_top_n: Number of top SNPs to label.
|
|
250
287
|
show_recombination: Whether to show recombination rate overlay.
|
|
251
288
|
figsize: Figure size as (width, height) in inches.
|
|
252
|
-
lead_pos: Position of lead
|
|
289
|
+
lead_pos: Position of lead SNP to highlight. For stacked plots with
|
|
290
|
+
multiple regions, use plot_stacked() with lead_positions (plural).
|
|
253
291
|
ld_reference_file: Path to PLINK binary fileset for LD calculation.
|
|
254
292
|
ld_col: Column name for pre-computed LD (R^2) values.
|
|
255
293
|
genes_df: Gene annotations with chr, start, end, gene_name.
|
|
@@ -332,7 +370,7 @@ class LocusZoomPlotter:
|
|
|
332
370
|
if clipped_count > 0:
|
|
333
371
|
logger.debug(f"Clipping {clipped_count} p-values below 1e-300 to 1e-300")
|
|
334
372
|
|
|
335
|
-
df
|
|
373
|
+
df = self._transform_pvalues(df, p_col)
|
|
336
374
|
|
|
337
375
|
# Calculate LD if reference file provided
|
|
338
376
|
if ld_reference_file and lead_pos and ld_col is None:
|
|
@@ -401,7 +439,12 @@ class LocusZoomPlotter:
|
|
|
401
439
|
# Format axes
|
|
402
440
|
self._backend.set_ylabel(ax, r"$-\log_{10}$ P")
|
|
403
441
|
self._backend.set_xlim(ax, start, end)
|
|
404
|
-
|
|
442
|
+
# When recombination overlay is present, keep right spine for secondary y-axis
|
|
443
|
+
has_recomb = recomb_df is not None and not recomb_df.empty
|
|
444
|
+
if has_recomb and self._backend.supports_secondary_axis:
|
|
445
|
+
self._backend.hide_spines(ax, ["top"])
|
|
446
|
+
else:
|
|
447
|
+
self._backend.hide_spines(ax, ["top", "right"])
|
|
405
448
|
|
|
406
449
|
# Add LD legend (all backends)
|
|
407
450
|
if ld_col is not None and ld_col in df.columns:
|
|
@@ -605,24 +648,28 @@ class LocusZoomPlotter:
|
|
|
605
648
|
region_recomb["pos"],
|
|
606
649
|
region_recomb["rate"],
|
|
607
650
|
color=RECOMB_COLOR,
|
|
608
|
-
linewidth=
|
|
609
|
-
alpha=0.
|
|
651
|
+
linewidth=2.5,
|
|
652
|
+
alpha=0.8,
|
|
610
653
|
yaxis_name=secondary_y,
|
|
611
654
|
)
|
|
612
655
|
|
|
613
|
-
# Set y-axis limits and label
|
|
656
|
+
# Set y-axis limits and label - scale to fit data with headroom
|
|
614
657
|
max_rate = region_recomb["rate"].max()
|
|
615
658
|
self._backend.set_secondary_ylim(
|
|
616
|
-
secondary_ax, 0, max(max_rate * 1.
|
|
659
|
+
secondary_ax, 0, max(max_rate * 1.3, 10), yaxis_name=secondary_y
|
|
617
660
|
)
|
|
618
661
|
self._backend.set_secondary_ylabel(
|
|
619
662
|
secondary_ax,
|
|
620
663
|
"Recombination rate (cM/Mb)",
|
|
621
|
-
color=
|
|
664
|
+
color="black", # Use black for readability (line/fill color remains light blue)
|
|
622
665
|
fontsize=9,
|
|
623
666
|
yaxis_name=secondary_y,
|
|
624
667
|
)
|
|
625
668
|
|
|
669
|
+
# Hide top spine on the secondary axis (matplotlib twin axis has its own frame)
|
|
670
|
+
if isinstance(twin_result, Axes):
|
|
671
|
+
secondary_ax.spines["top"].set_visible(False)
|
|
672
|
+
|
|
626
673
|
def _plot_finemapping(
|
|
627
674
|
self,
|
|
628
675
|
ax: Any,
|
|
@@ -770,7 +817,8 @@ class LocusZoomPlotter:
|
|
|
770
817
|
figsize: Figure size as (width, height) in inches.
|
|
771
818
|
ld_reference_file: Single PLINK fileset (broadcast to all panels).
|
|
772
819
|
ld_col: Column name for pre-computed LD (R^2) values.
|
|
773
|
-
lead_positions: List of lead SNP positions
|
|
820
|
+
lead_positions: List of lead SNP positions, one per region. For single
|
|
821
|
+
region plots, use plot() with lead_pos (singular).
|
|
774
822
|
panel_labels: List of panel labels (one per panel).
|
|
775
823
|
ld_reference_files: List of PLINK filesets (one per panel).
|
|
776
824
|
genes_df: Gene annotations for bottom track.
|
|
@@ -929,24 +977,34 @@ class LocusZoomPlotter:
|
|
|
929
977
|
for i, (gwas_df, lead_pos) in enumerate(zip(gwas_dfs, lead_positions)):
|
|
930
978
|
ax = axes[i]
|
|
931
979
|
df = gwas_df.copy()
|
|
932
|
-
df
|
|
980
|
+
df = self._transform_pvalues(df, p_col)
|
|
933
981
|
|
|
934
982
|
# Use pre-computed LD or calculate from reference
|
|
935
983
|
panel_ld_col = ld_col
|
|
936
984
|
if ld_reference_files and ld_reference_files[i] and lead_pos and not ld_col:
|
|
937
|
-
|
|
938
|
-
if not
|
|
939
|
-
|
|
940
|
-
|
|
941
|
-
|
|
942
|
-
|
|
943
|
-
window_kb=max((end - start) // 1000, 500),
|
|
944
|
-
plink_path=self.plink_path,
|
|
945
|
-
species=self.species,
|
|
985
|
+
# Check if rs_col exists before attempting LD calculation
|
|
986
|
+
if rs_col not in df.columns:
|
|
987
|
+
logger.warning(
|
|
988
|
+
f"Cannot calculate LD for panel {i + 1}: column '{rs_col}' "
|
|
989
|
+
f"not found in GWAS data. "
|
|
990
|
+
f"Provide rs_col parameter or add SNP IDs to DataFrame."
|
|
946
991
|
)
|
|
947
|
-
|
|
948
|
-
|
|
949
|
-
|
|
992
|
+
else:
|
|
993
|
+
lead_snp_row = df[df[pos_col] == lead_pos]
|
|
994
|
+
if not lead_snp_row.empty:
|
|
995
|
+
lead_snp_id = lead_snp_row[rs_col].iloc[0]
|
|
996
|
+
ld_df = calculate_ld(
|
|
997
|
+
bfile_path=ld_reference_files[i],
|
|
998
|
+
lead_snp=lead_snp_id,
|
|
999
|
+
window_kb=max((end - start) // 1000, 500),
|
|
1000
|
+
plink_path=self.plink_path,
|
|
1001
|
+
species=self.species,
|
|
1002
|
+
)
|
|
1003
|
+
if not ld_df.empty:
|
|
1004
|
+
df = df.merge(
|
|
1005
|
+
ld_df, left_on=rs_col, right_on="SNP", how="left"
|
|
1006
|
+
)
|
|
1007
|
+
panel_ld_col = "R2"
|
|
950
1008
|
|
|
951
1009
|
# Plot association
|
|
952
1010
|
self._plot_association(
|
|
@@ -1041,8 +1099,16 @@ class LocusZoomPlotter:
|
|
|
1041
1099
|
eqtl_data = eqtl_df.copy()
|
|
1042
1100
|
|
|
1043
1101
|
# Filter by gene if specified
|
|
1044
|
-
|
|
1045
|
-
|
|
1102
|
+
eqtl_gene_filtered = False
|
|
1103
|
+
if eqtl_gene:
|
|
1104
|
+
if "gene" in eqtl_data.columns:
|
|
1105
|
+
eqtl_data = eqtl_data[eqtl_data["gene"] == eqtl_gene]
|
|
1106
|
+
eqtl_gene_filtered = True
|
|
1107
|
+
else:
|
|
1108
|
+
logger.warning(
|
|
1109
|
+
f"eqtl_gene='{eqtl_gene}' specified but eQTL data has no 'gene' column; "
|
|
1110
|
+
"showing all eQTL data unfiltered"
|
|
1111
|
+
)
|
|
1046
1112
|
|
|
1047
1113
|
# Filter by region (position and chromosome)
|
|
1048
1114
|
if "pos" in eqtl_data.columns:
|
|
@@ -1057,9 +1123,7 @@ class LocusZoomPlotter:
|
|
|
1057
1123
|
eqtl_data = eqtl_data[mask]
|
|
1058
1124
|
|
|
1059
1125
|
if not eqtl_data.empty:
|
|
1060
|
-
eqtl_data
|
|
1061
|
-
eqtl_data["p_value"].clip(lower=1e-300)
|
|
1062
|
-
)
|
|
1126
|
+
eqtl_data = self._transform_pvalues(eqtl_data, "p_value")
|
|
1063
1127
|
|
|
1064
1128
|
# Build hover data using HoverDataBuilder
|
|
1065
1129
|
eqtl_extra_cols = {}
|
|
@@ -1119,7 +1183,8 @@ class LocusZoomPlotter:
|
|
|
1119
1183
|
)
|
|
1120
1184
|
else:
|
|
1121
1185
|
# No effect sizes - plot as diamonds
|
|
1122
|
-
|
|
1186
|
+
# Only show gene in label if filtering was actually applied
|
|
1187
|
+
label = f"eQTL ({eqtl_gene})" if eqtl_gene_filtered else "eQTL"
|
|
1123
1188
|
self._backend.scatter(
|
|
1124
1189
|
ax,
|
|
1125
1190
|
eqtl_data["pos"],
|
|
@@ -1179,143 +1244,17 @@ class LocusZoomPlotter:
|
|
|
1179
1244
|
significance_threshold: float = 5e-8,
|
|
1180
1245
|
figsize: Tuple[float, float] = (10, 8),
|
|
1181
1246
|
) -> Any:
|
|
1182
|
-
"""Create a PheWAS
|
|
1183
|
-
|
|
1184
|
-
|
|
1185
|
-
|
|
1186
|
-
|
|
1187
|
-
|
|
1188
|
-
|
|
1189
|
-
|
|
1190
|
-
|
|
1191
|
-
p_col: Column name for p-values.
|
|
1192
|
-
category_col: Column name for phenotype categories.
|
|
1193
|
-
effect_col: Optional column name for effect direction (beta/OR).
|
|
1194
|
-
significance_threshold: P-value threshold for significance line.
|
|
1195
|
-
figsize: Figure size as (width, height).
|
|
1196
|
-
|
|
1197
|
-
Returns:
|
|
1198
|
-
Figure object (type depends on backend).
|
|
1199
|
-
|
|
1200
|
-
Example:
|
|
1201
|
-
>>> fig = plotter.plot_phewas(
|
|
1202
|
-
... phewas_df,
|
|
1203
|
-
... variant_id="rs12345",
|
|
1204
|
-
... category_col="category",
|
|
1205
|
-
... )
|
|
1206
|
-
"""
|
|
1207
|
-
validate_phewas_df(phewas_df, phenotype_col, p_col, category_col)
|
|
1208
|
-
|
|
1209
|
-
df = phewas_df.copy()
|
|
1210
|
-
df["neglog10p"] = -np.log10(df[p_col].clip(lower=1e-300))
|
|
1211
|
-
|
|
1212
|
-
# Sort by category then by p-value for consistent ordering
|
|
1213
|
-
if category_col in df.columns:
|
|
1214
|
-
df = df.sort_values([category_col, p_col])
|
|
1215
|
-
categories = df[category_col].unique().tolist()
|
|
1216
|
-
palette = get_phewas_category_palette(categories)
|
|
1217
|
-
else:
|
|
1218
|
-
df = df.sort_values(p_col)
|
|
1219
|
-
categories = []
|
|
1220
|
-
palette = {}
|
|
1221
|
-
|
|
1222
|
-
# Create figure
|
|
1223
|
-
fig, axes = self._backend.create_figure(
|
|
1224
|
-
n_panels=1,
|
|
1225
|
-
height_ratios=[1.0],
|
|
1247
|
+
"""Create a PheWAS plot. See StatsPlotter.plot_phewas for docs."""
|
|
1248
|
+
return self._stats_plotter.plot_phewas(
|
|
1249
|
+
phewas_df=phewas_df,
|
|
1250
|
+
variant_id=variant_id,
|
|
1251
|
+
phenotype_col=phenotype_col,
|
|
1252
|
+
p_col=p_col,
|
|
1253
|
+
category_col=category_col,
|
|
1254
|
+
effect_col=effect_col,
|
|
1255
|
+
significance_threshold=significance_threshold,
|
|
1226
1256
|
figsize=figsize,
|
|
1227
1257
|
)
|
|
1228
|
-
ax = axes[0]
|
|
1229
|
-
|
|
1230
|
-
# Assign y-positions (one per phenotype)
|
|
1231
|
-
df["y_pos"] = range(len(df))
|
|
1232
|
-
|
|
1233
|
-
# Plot points by category
|
|
1234
|
-
if categories:
|
|
1235
|
-
for cat in categories:
|
|
1236
|
-
# Handle NaN category: NaN == NaN is False in pandas
|
|
1237
|
-
if pd.isna(cat):
|
|
1238
|
-
cat_data = df[df[category_col].isna()]
|
|
1239
|
-
else:
|
|
1240
|
-
cat_data = df[df[category_col] == cat]
|
|
1241
|
-
# Use upward triangles for positive effects, circles otherwise
|
|
1242
|
-
if effect_col and effect_col in cat_data.columns:
|
|
1243
|
-
# Vectorized: split by effect sign, 2 scatter calls per category
|
|
1244
|
-
pos_data = cat_data[cat_data[effect_col] >= 0]
|
|
1245
|
-
neg_data = cat_data[cat_data[effect_col] < 0]
|
|
1246
|
-
|
|
1247
|
-
if not pos_data.empty:
|
|
1248
|
-
self._backend.scatter(
|
|
1249
|
-
ax,
|
|
1250
|
-
pos_data["neglog10p"],
|
|
1251
|
-
pos_data["y_pos"],
|
|
1252
|
-
colors=palette[cat],
|
|
1253
|
-
sizes=60,
|
|
1254
|
-
marker="^",
|
|
1255
|
-
edgecolor="black",
|
|
1256
|
-
linewidth=0.5,
|
|
1257
|
-
zorder=2,
|
|
1258
|
-
)
|
|
1259
|
-
if not neg_data.empty:
|
|
1260
|
-
self._backend.scatter(
|
|
1261
|
-
ax,
|
|
1262
|
-
neg_data["neglog10p"],
|
|
1263
|
-
neg_data["y_pos"],
|
|
1264
|
-
colors=palette[cat],
|
|
1265
|
-
sizes=60,
|
|
1266
|
-
marker="v",
|
|
1267
|
-
edgecolor="black",
|
|
1268
|
-
linewidth=0.5,
|
|
1269
|
-
zorder=2,
|
|
1270
|
-
)
|
|
1271
|
-
else:
|
|
1272
|
-
self._backend.scatter(
|
|
1273
|
-
ax,
|
|
1274
|
-
cat_data["neglog10p"],
|
|
1275
|
-
cat_data["y_pos"],
|
|
1276
|
-
colors=palette[cat],
|
|
1277
|
-
sizes=60,
|
|
1278
|
-
marker="o",
|
|
1279
|
-
edgecolor="black",
|
|
1280
|
-
linewidth=0.5,
|
|
1281
|
-
zorder=2,
|
|
1282
|
-
)
|
|
1283
|
-
else:
|
|
1284
|
-
self._backend.scatter(
|
|
1285
|
-
ax,
|
|
1286
|
-
df["neglog10p"],
|
|
1287
|
-
df["y_pos"],
|
|
1288
|
-
colors="#4169E1",
|
|
1289
|
-
sizes=60,
|
|
1290
|
-
edgecolor="black",
|
|
1291
|
-
linewidth=0.5,
|
|
1292
|
-
zorder=2,
|
|
1293
|
-
)
|
|
1294
|
-
|
|
1295
|
-
# Add significance threshold line
|
|
1296
|
-
sig_line = -np.log10(significance_threshold)
|
|
1297
|
-
self._backend.axvline(
|
|
1298
|
-
ax, x=sig_line, color="red", linestyle="--", linewidth=1, alpha=0.7
|
|
1299
|
-
)
|
|
1300
|
-
|
|
1301
|
-
# Set axis labels and limits
|
|
1302
|
-
self._backend.set_xlabel(ax, r"$-\log_{10}$ P")
|
|
1303
|
-
self._backend.set_ylabel(ax, "Phenotype")
|
|
1304
|
-
self._backend.set_ylim(ax, -0.5, len(df) - 0.5)
|
|
1305
|
-
|
|
1306
|
-
# Set y-tick labels to phenotype names
|
|
1307
|
-
self._backend.set_yticks(
|
|
1308
|
-
ax,
|
|
1309
|
-
positions=df["y_pos"].tolist(),
|
|
1310
|
-
labels=df[phenotype_col].tolist(),
|
|
1311
|
-
fontsize=8,
|
|
1312
|
-
)
|
|
1313
|
-
|
|
1314
|
-
self._backend.set_title(ax, f"PheWAS: {variant_id}")
|
|
1315
|
-
self._backend.hide_spines(ax, ["top", "right"])
|
|
1316
|
-
self._backend.finalize_layout(fig)
|
|
1317
|
-
|
|
1318
|
-
return fig
|
|
1319
1258
|
|
|
1320
1259
|
def plot_forest(
|
|
1321
1260
|
self,
|
|
@@ -1330,116 +1269,143 @@ class LocusZoomPlotter:
|
|
|
1330
1269
|
effect_label: str = "Effect Size",
|
|
1331
1270
|
figsize: Tuple[float, float] = (8, 6),
|
|
1332
1271
|
) -> Any:
|
|
1333
|
-
"""Create a forest plot
|
|
1334
|
-
|
|
1335
|
-
|
|
1336
|
-
|
|
1337
|
-
|
|
1338
|
-
|
|
1339
|
-
|
|
1340
|
-
|
|
1341
|
-
|
|
1342
|
-
|
|
1343
|
-
|
|
1344
|
-
effect_label: X-axis label.
|
|
1345
|
-
figsize: Figure size as (width, height).
|
|
1346
|
-
|
|
1347
|
-
Returns:
|
|
1348
|
-
Figure object (type depends on backend).
|
|
1349
|
-
|
|
1350
|
-
Example:
|
|
1351
|
-
>>> fig = plotter.plot_forest(
|
|
1352
|
-
... forest_df,
|
|
1353
|
-
... variant_id="rs12345",
|
|
1354
|
-
... effect_label="Odds Ratio",
|
|
1355
|
-
... null_value=1.0,
|
|
1356
|
-
... )
|
|
1357
|
-
"""
|
|
1358
|
-
validate_forest_df(forest_df, study_col, effect_col, ci_lower_col, ci_upper_col)
|
|
1359
|
-
|
|
1360
|
-
df = forest_df.copy()
|
|
1361
|
-
|
|
1362
|
-
# Create figure
|
|
1363
|
-
fig, axes = self._backend.create_figure(
|
|
1364
|
-
n_panels=1,
|
|
1365
|
-
height_ratios=[1.0],
|
|
1272
|
+
"""Create a forest plot. See StatsPlotter.plot_forest for docs."""
|
|
1273
|
+
return self._stats_plotter.plot_forest(
|
|
1274
|
+
forest_df=forest_df,
|
|
1275
|
+
variant_id=variant_id,
|
|
1276
|
+
study_col=study_col,
|
|
1277
|
+
effect_col=effect_col,
|
|
1278
|
+
ci_lower_col=ci_lower_col,
|
|
1279
|
+
ci_upper_col=ci_upper_col,
|
|
1280
|
+
weight_col=weight_col,
|
|
1281
|
+
null_value=null_value,
|
|
1282
|
+
effect_label=effect_label,
|
|
1366
1283
|
figsize=figsize,
|
|
1367
1284
|
)
|
|
1368
|
-
ax = axes[0]
|
|
1369
|
-
|
|
1370
|
-
# Assign y-positions (reverse so first study is at top)
|
|
1371
|
-
df["y_pos"] = range(len(df) - 1, -1, -1)
|
|
1372
|
-
|
|
1373
|
-
# Calculate marker sizes from weights
|
|
1374
|
-
if weight_col and weight_col in df.columns:
|
|
1375
|
-
# Scale weights to marker sizes (min 40, max 200)
|
|
1376
|
-
weights = df[weight_col]
|
|
1377
|
-
min_size, max_size = 40, 200
|
|
1378
|
-
weight_range = weights.max() - weights.min()
|
|
1379
|
-
if weight_range > 0:
|
|
1380
|
-
sizes = min_size + (weights - weights.min()) / weight_range * (
|
|
1381
|
-
max_size - min_size
|
|
1382
|
-
)
|
|
1383
|
-
else:
|
|
1384
|
-
sizes = (min_size + max_size) / 2
|
|
1385
|
-
else:
|
|
1386
|
-
sizes = 80
|
|
1387
|
-
|
|
1388
|
-
# Calculate error bar extents
|
|
1389
|
-
xerr_lower = df[effect_col] - df[ci_lower_col]
|
|
1390
|
-
xerr_upper = df[ci_upper_col] - df[effect_col]
|
|
1391
1285
|
|
|
1392
|
-
|
|
1393
|
-
self
|
|
1394
|
-
|
|
1395
|
-
|
|
1396
|
-
|
|
1397
|
-
|
|
1398
|
-
|
|
1399
|
-
|
|
1400
|
-
|
|
1401
|
-
|
|
1402
|
-
|
|
1286
|
+
def plot_manhattan(
|
|
1287
|
+
self,
|
|
1288
|
+
df: pd.DataFrame,
|
|
1289
|
+
chrom_col: str = "chrom",
|
|
1290
|
+
pos_col: str = "pos",
|
|
1291
|
+
p_col: str = "p",
|
|
1292
|
+
custom_chrom_order: Optional[List[str]] = None,
|
|
1293
|
+
category_col: Optional[str] = None,
|
|
1294
|
+
category_order: Optional[List[str]] = None,
|
|
1295
|
+
significance_threshold: Optional[float] = DEFAULT_GENOMEWIDE_THRESHOLD,
|
|
1296
|
+
figsize: Tuple[float, float] = (12, 5),
|
|
1297
|
+
title: Optional[str] = None,
|
|
1298
|
+
) -> Any:
|
|
1299
|
+
"""Create a Manhattan plot. See ManhattanPlotter.plot_manhattan for docs."""
|
|
1300
|
+
return self._manhattan_plotter.plot_manhattan(
|
|
1301
|
+
df=df,
|
|
1302
|
+
chrom_col=chrom_col,
|
|
1303
|
+
pos_col=pos_col,
|
|
1304
|
+
p_col=p_col,
|
|
1305
|
+
custom_chrom_order=custom_chrom_order,
|
|
1306
|
+
category_col=category_col,
|
|
1307
|
+
category_order=category_order,
|
|
1308
|
+
significance_threshold=significance_threshold,
|
|
1309
|
+
figsize=figsize,
|
|
1310
|
+
title=title,
|
|
1403
1311
|
)
|
|
1404
1312
|
|
|
1405
|
-
|
|
1406
|
-
self
|
|
1407
|
-
|
|
1408
|
-
|
|
1409
|
-
|
|
1410
|
-
|
|
1411
|
-
|
|
1412
|
-
|
|
1413
|
-
|
|
1414
|
-
|
|
1415
|
-
|
|
1313
|
+
def plot_qq(
|
|
1314
|
+
self,
|
|
1315
|
+
df: pd.DataFrame,
|
|
1316
|
+
p_col: str = "p",
|
|
1317
|
+
show_confidence_band: bool = True,
|
|
1318
|
+
show_lambda: bool = True,
|
|
1319
|
+
figsize: Tuple[float, float] = (6, 6),
|
|
1320
|
+
title: Optional[str] = None,
|
|
1321
|
+
) -> Any:
|
|
1322
|
+
"""Create a QQ plot. See ManhattanPlotter.plot_qq for docs."""
|
|
1323
|
+
return self._manhattan_plotter.plot_qq(
|
|
1324
|
+
df=df,
|
|
1325
|
+
p_col=p_col,
|
|
1326
|
+
show_confidence_band=show_confidence_band,
|
|
1327
|
+
show_lambda=show_lambda,
|
|
1328
|
+
figsize=figsize,
|
|
1329
|
+
title=title,
|
|
1416
1330
|
)
|
|
1417
1331
|
|
|
1418
|
-
|
|
1419
|
-
self
|
|
1420
|
-
|
|
1332
|
+
def plot_manhattan_stacked(
|
|
1333
|
+
self,
|
|
1334
|
+
gwas_dfs: List[pd.DataFrame],
|
|
1335
|
+
chrom_col: str = "chrom",
|
|
1336
|
+
pos_col: str = "pos",
|
|
1337
|
+
p_col: str = "p",
|
|
1338
|
+
custom_chrom_order: Optional[List[str]] = None,
|
|
1339
|
+
significance_threshold: Optional[float] = DEFAULT_GENOMEWIDE_THRESHOLD,
|
|
1340
|
+
panel_labels: Optional[List[str]] = None,
|
|
1341
|
+
figsize: Tuple[float, float] = (12, 8),
|
|
1342
|
+
title: Optional[str] = None,
|
|
1343
|
+
) -> Any:
|
|
1344
|
+
"""Create stacked Manhattan plots. See ManhattanPlotter.plot_manhattan_stacked for docs."""
|
|
1345
|
+
return self._manhattan_plotter.plot_manhattan_stacked(
|
|
1346
|
+
gwas_dfs=gwas_dfs,
|
|
1347
|
+
chrom_col=chrom_col,
|
|
1348
|
+
pos_col=pos_col,
|
|
1349
|
+
p_col=p_col,
|
|
1350
|
+
custom_chrom_order=custom_chrom_order,
|
|
1351
|
+
significance_threshold=significance_threshold,
|
|
1352
|
+
panel_labels=panel_labels,
|
|
1353
|
+
figsize=figsize,
|
|
1354
|
+
title=title,
|
|
1421
1355
|
)
|
|
1422
1356
|
|
|
1423
|
-
|
|
1424
|
-
self
|
|
1425
|
-
|
|
1426
|
-
|
|
1427
|
-
|
|
1428
|
-
|
|
1429
|
-
|
|
1430
|
-
|
|
1431
|
-
|
|
1432
|
-
|
|
1433
|
-
|
|
1434
|
-
|
|
1435
|
-
|
|
1436
|
-
|
|
1437
|
-
|
|
1438
|
-
|
|
1357
|
+
def plot_manhattan_qq(
|
|
1358
|
+
self,
|
|
1359
|
+
df: pd.DataFrame,
|
|
1360
|
+
chrom_col: str = "chrom",
|
|
1361
|
+
pos_col: str = "pos",
|
|
1362
|
+
p_col: str = "p",
|
|
1363
|
+
custom_chrom_order: Optional[List[str]] = None,
|
|
1364
|
+
significance_threshold: Optional[float] = DEFAULT_GENOMEWIDE_THRESHOLD,
|
|
1365
|
+
show_confidence_band: bool = True,
|
|
1366
|
+
show_lambda: bool = True,
|
|
1367
|
+
figsize: Tuple[float, float] = (14, 5),
|
|
1368
|
+
title: Optional[str] = None,
|
|
1369
|
+
) -> Any:
|
|
1370
|
+
"""Create side-by-side Manhattan and QQ plots. See ManhattanPlotter.plot_manhattan_qq for docs."""
|
|
1371
|
+
return self._manhattan_plotter.plot_manhattan_qq(
|
|
1372
|
+
df=df,
|
|
1373
|
+
chrom_col=chrom_col,
|
|
1374
|
+
pos_col=pos_col,
|
|
1375
|
+
p_col=p_col,
|
|
1376
|
+
custom_chrom_order=custom_chrom_order,
|
|
1377
|
+
significance_threshold=significance_threshold,
|
|
1378
|
+
show_confidence_band=show_confidence_band,
|
|
1379
|
+
show_lambda=show_lambda,
|
|
1380
|
+
figsize=figsize,
|
|
1381
|
+
title=title,
|
|
1439
1382
|
)
|
|
1440
1383
|
|
|
1441
|
-
|
|
1442
|
-
self
|
|
1443
|
-
|
|
1444
|
-
|
|
1445
|
-
|
|
1384
|
+
def plot_manhattan_qq_stacked(
|
|
1385
|
+
self,
|
|
1386
|
+
gwas_dfs: List[pd.DataFrame],
|
|
1387
|
+
chrom_col: str = "chrom",
|
|
1388
|
+
pos_col: str = "pos",
|
|
1389
|
+
p_col: str = "p",
|
|
1390
|
+
custom_chrom_order: Optional[List[str]] = None,
|
|
1391
|
+
significance_threshold: Optional[float] = DEFAULT_GENOMEWIDE_THRESHOLD,
|
|
1392
|
+
show_confidence_band: bool = True,
|
|
1393
|
+
show_lambda: bool = True,
|
|
1394
|
+
panel_labels: Optional[List[str]] = None,
|
|
1395
|
+
figsize: Tuple[float, float] = (14, 8),
|
|
1396
|
+
title: Optional[str] = None,
|
|
1397
|
+
) -> Any:
|
|
1398
|
+
"""Create stacked Manhattan+QQ plots. See ManhattanPlotter.plot_manhattan_qq_stacked for docs."""
|
|
1399
|
+
return self._manhattan_plotter.plot_manhattan_qq_stacked(
|
|
1400
|
+
gwas_dfs=gwas_dfs,
|
|
1401
|
+
chrom_col=chrom_col,
|
|
1402
|
+
pos_col=pos_col,
|
|
1403
|
+
p_col=p_col,
|
|
1404
|
+
custom_chrom_order=custom_chrom_order,
|
|
1405
|
+
significance_threshold=significance_threshold,
|
|
1406
|
+
show_confidence_band=show_confidence_band,
|
|
1407
|
+
show_lambda=show_lambda,
|
|
1408
|
+
panel_labels=panel_labels,
|
|
1409
|
+
figsize=figsize,
|
|
1410
|
+
title=title,
|
|
1411
|
+
)
|