pylocuszoom 0.8.0__py3-none-any.whl → 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pylocuszoom/__init__.py +27 -7
- pylocuszoom/_plotter_utils.py +66 -0
- pylocuszoom/backends/base.py +56 -0
- pylocuszoom/backends/bokeh_backend.py +141 -29
- pylocuszoom/backends/matplotlib_backend.py +60 -0
- pylocuszoom/backends/plotly_backend.py +297 -88
- pylocuszoom/config.py +365 -0
- pylocuszoom/ensembl.py +6 -11
- pylocuszoom/eqtl.py +3 -7
- pylocuszoom/exceptions.py +33 -0
- pylocuszoom/finemapping.py +2 -7
- pylocuszoom/forest.py +1 -0
- pylocuszoom/gene_track.py +10 -31
- pylocuszoom/labels.py +6 -2
- pylocuszoom/manhattan.py +246 -0
- pylocuszoom/manhattan_plotter.py +760 -0
- pylocuszoom/plotter.py +401 -327
- pylocuszoom/qq.py +123 -0
- pylocuszoom/recombination.py +7 -7
- pylocuszoom/schemas.py +1 -6
- pylocuszoom/stats_plotter.py +319 -0
- pylocuszoom/utils.py +2 -4
- pylocuszoom/validation.py +51 -0
- {pylocuszoom-0.8.0.dist-info → pylocuszoom-1.1.0.dist-info}/METADATA +159 -25
- pylocuszoom-1.1.0.dist-info/RECORD +36 -0
- pylocuszoom-0.8.0.dist-info/RECORD +0 -29
- {pylocuszoom-0.8.0.dist-info → pylocuszoom-1.1.0.dist-info}/WHEEL +0 -0
- {pylocuszoom-0.8.0.dist-info → pylocuszoom-1.1.0.dist-info}/licenses/LICENSE.md +0 -0
pylocuszoom/manhattan.py
ADDED
|
@@ -0,0 +1,246 @@
|
|
|
1
|
+
"""Manhattan plot data preparation and chromosome ordering."""
|
|
2
|
+
|
|
3
|
+
from typing import Literal
|
|
4
|
+
|
|
5
|
+
import colorcet as cc
|
|
6
|
+
import numpy as np
|
|
7
|
+
import pandas as pd
|
|
8
|
+
|
|
9
|
+
# Species aliases
|
|
10
|
+
SPECIES_ALIASES: dict[str, str] = {
|
|
11
|
+
"dog": "canine",
|
|
12
|
+
"cat": "feline",
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
# Chromosome orders for supported species
|
|
16
|
+
CHROMOSOME_ORDERS: dict[str, list[str]] = {
|
|
17
|
+
"canine": [str(i) for i in range(1, 39)] + ["X", "Y", "MT"],
|
|
18
|
+
"feline": [
|
|
19
|
+
"A1",
|
|
20
|
+
"A2",
|
|
21
|
+
"A3",
|
|
22
|
+
"B1",
|
|
23
|
+
"B2",
|
|
24
|
+
"B3",
|
|
25
|
+
"B4",
|
|
26
|
+
"C1",
|
|
27
|
+
"C2",
|
|
28
|
+
"D1",
|
|
29
|
+
"D2",
|
|
30
|
+
"D3",
|
|
31
|
+
"D4",
|
|
32
|
+
"E1",
|
|
33
|
+
"E2",
|
|
34
|
+
"E3",
|
|
35
|
+
"X",
|
|
36
|
+
"Y",
|
|
37
|
+
"MT",
|
|
38
|
+
],
|
|
39
|
+
"human": [str(i) for i in range(1, 23)] + ["X", "Y", "MT"],
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def get_chromosome_order(
|
|
44
|
+
species: Literal["canine", "feline", "human", "dog", "cat"] | None = None,
|
|
45
|
+
custom_order: list[str] | None = None,
|
|
46
|
+
) -> list[str]:
|
|
47
|
+
"""Get chromosome order for a species.
|
|
48
|
+
|
|
49
|
+
Args:
|
|
50
|
+
species: Species name for built-in order. Supports aliases:
|
|
51
|
+
'dog' -> 'canine', 'cat' -> 'feline'.
|
|
52
|
+
custom_order: Custom chromosome order (overrides species).
|
|
53
|
+
|
|
54
|
+
Returns:
|
|
55
|
+
List of chromosome names in display order.
|
|
56
|
+
|
|
57
|
+
Raises:
|
|
58
|
+
ValueError: If neither species nor custom_order provided,
|
|
59
|
+
or if species is unknown.
|
|
60
|
+
"""
|
|
61
|
+
if custom_order is not None:
|
|
62
|
+
return custom_order
|
|
63
|
+
if species is not None:
|
|
64
|
+
# Resolve aliases
|
|
65
|
+
resolved_species = SPECIES_ALIASES.get(species, species)
|
|
66
|
+
if resolved_species not in CHROMOSOME_ORDERS:
|
|
67
|
+
raise ValueError(
|
|
68
|
+
f"Unknown species '{species}'. "
|
|
69
|
+
f"Use one of {list(CHROMOSOME_ORDERS.keys())} "
|
|
70
|
+
f"(or aliases: {list(SPECIES_ALIASES.keys())}) "
|
|
71
|
+
f"or provide custom_order."
|
|
72
|
+
)
|
|
73
|
+
return CHROMOSOME_ORDERS[resolved_species]
|
|
74
|
+
raise ValueError("Must provide either species or custom_order")
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def get_chromosome_colors(n_chromosomes: int) -> list[str]:
|
|
78
|
+
"""Get perceptually distinct colors for chromosomes.
|
|
79
|
+
|
|
80
|
+
Uses colorcet glasbey_dark palette for good visual
|
|
81
|
+
separation with saturated colors.
|
|
82
|
+
|
|
83
|
+
Args:
|
|
84
|
+
n_chromosomes: Number of chromosomes to color.
|
|
85
|
+
|
|
86
|
+
Returns:
|
|
87
|
+
List of hex color strings.
|
|
88
|
+
"""
|
|
89
|
+
palette = cc.b_glasbey_bw_minc_20_maxl_70
|
|
90
|
+
return [palette[i % len(palette)] for i in range(n_chromosomes)]
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def prepare_manhattan_data(
|
|
94
|
+
df: pd.DataFrame,
|
|
95
|
+
chrom_col: str = "chrom",
|
|
96
|
+
pos_col: str = "pos",
|
|
97
|
+
p_col: str = "p",
|
|
98
|
+
species: Literal["canine", "feline", "human", "dog", "cat"] | None = None,
|
|
99
|
+
custom_order: list[str] | None = None,
|
|
100
|
+
) -> pd.DataFrame:
|
|
101
|
+
"""Prepare DataFrame for Manhattan plot rendering.
|
|
102
|
+
|
|
103
|
+
Computes cumulative positions for x-axis and assigns chromosome colors.
|
|
104
|
+
|
|
105
|
+
Args:
|
|
106
|
+
df: GWAS results DataFrame.
|
|
107
|
+
chrom_col: Column name for chromosome.
|
|
108
|
+
pos_col: Column name for position.
|
|
109
|
+
p_col: Column name for p-value.
|
|
110
|
+
species: Species for chromosome ordering.
|
|
111
|
+
custom_order: Custom chromosome order.
|
|
112
|
+
|
|
113
|
+
Returns:
|
|
114
|
+
DataFrame with additional columns:
|
|
115
|
+
- _chrom_idx: Integer index for chromosome
|
|
116
|
+
- _cumulative_pos: X-axis position
|
|
117
|
+
- _neg_log_p: -log10(p-value)
|
|
118
|
+
- _color: Hex color for chromosome
|
|
119
|
+
"""
|
|
120
|
+
# Validate required columns
|
|
121
|
+
for col, name in [(chrom_col, "chrom"), (pos_col, "pos"), (p_col, "p")]:
|
|
122
|
+
if col not in df.columns:
|
|
123
|
+
raise ValueError(f"Column '{col}' not found in DataFrame (for {name})")
|
|
124
|
+
|
|
125
|
+
# Get chromosome order
|
|
126
|
+
chrom_order = get_chromosome_order(species, custom_order)
|
|
127
|
+
|
|
128
|
+
# Create working copy
|
|
129
|
+
result = df.copy()
|
|
130
|
+
|
|
131
|
+
# Normalize chromosome names (handle int vs str)
|
|
132
|
+
result["_chrom_str"] = result[chrom_col].astype(str)
|
|
133
|
+
|
|
134
|
+
# Map chromosomes to order index (-1 for unknown)
|
|
135
|
+
chrom_to_idx = {chrom: i for i, chrom in enumerate(chrom_order)}
|
|
136
|
+
result["_chrom_idx"] = result["_chrom_str"].map(
|
|
137
|
+
lambda x: chrom_to_idx.get(x, len(chrom_order))
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
# Sort by chromosome index then position
|
|
141
|
+
result = result.sort_values(["_chrom_idx", pos_col])
|
|
142
|
+
|
|
143
|
+
# Calculate cumulative positions
|
|
144
|
+
# First get max position per chromosome
|
|
145
|
+
chrom_offsets = {}
|
|
146
|
+
cumulative = 0
|
|
147
|
+
for chrom in chrom_order:
|
|
148
|
+
chrom_data = result[result["_chrom_str"] == chrom]
|
|
149
|
+
if len(chrom_data) > 0:
|
|
150
|
+
chrom_offsets[chrom] = cumulative
|
|
151
|
+
cumulative += chrom_data[pos_col].max() + 1_000_000 # 1Mb gap
|
|
152
|
+
|
|
153
|
+
# Handle chromosomes not in order
|
|
154
|
+
unknown_chroms = set(result["_chrom_str"]) - set(chrom_order)
|
|
155
|
+
for chrom in sorted(unknown_chroms):
|
|
156
|
+
chrom_data = result[result["_chrom_str"] == chrom]
|
|
157
|
+
if len(chrom_data) > 0:
|
|
158
|
+
chrom_offsets[chrom] = cumulative
|
|
159
|
+
cumulative += chrom_data[pos_col].max() + 1_000_000
|
|
160
|
+
|
|
161
|
+
# Calculate cumulative position
|
|
162
|
+
result["_cumulative_pos"] = result.apply(
|
|
163
|
+
lambda row: chrom_offsets.get(row["_chrom_str"], 0) + row[pos_col], axis=1
|
|
164
|
+
)
|
|
165
|
+
|
|
166
|
+
# Calculate -log10(p)
|
|
167
|
+
result["_neg_log_p"] = -np.log10(result[p_col].clip(lower=1e-300))
|
|
168
|
+
|
|
169
|
+
# Assign colors
|
|
170
|
+
all_chroms = chrom_order + sorted(unknown_chroms)
|
|
171
|
+
colors = get_chromosome_colors(len(all_chroms))
|
|
172
|
+
chrom_to_color = {chrom: colors[i] for i, chrom in enumerate(all_chroms)}
|
|
173
|
+
result["_color"] = result["_chrom_str"].map(chrom_to_color)
|
|
174
|
+
|
|
175
|
+
# Calculate chromosome centers for x-axis labels
|
|
176
|
+
chrom_centers = {}
|
|
177
|
+
for chrom in all_chroms:
|
|
178
|
+
chrom_data = result[result["_chrom_str"] == chrom]
|
|
179
|
+
if len(chrom_data) > 0:
|
|
180
|
+
chrom_centers[chrom] = chrom_data["_cumulative_pos"].mean()
|
|
181
|
+
|
|
182
|
+
result.attrs["chrom_centers"] = chrom_centers
|
|
183
|
+
result.attrs["chrom_order"] = all_chroms
|
|
184
|
+
|
|
185
|
+
return result
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
def prepare_categorical_data(
|
|
189
|
+
df: pd.DataFrame,
|
|
190
|
+
category_col: str,
|
|
191
|
+
p_col: str = "p",
|
|
192
|
+
category_order: list[str] | None = None,
|
|
193
|
+
) -> pd.DataFrame:
|
|
194
|
+
"""Prepare DataFrame for categorical Manhattan plot (PheWAS-style).
|
|
195
|
+
|
|
196
|
+
Args:
|
|
197
|
+
df: Results DataFrame with categories and p-values.
|
|
198
|
+
category_col: Column name for category.
|
|
199
|
+
p_col: Column name for p-value.
|
|
200
|
+
category_order: Custom category order.
|
|
201
|
+
|
|
202
|
+
Returns:
|
|
203
|
+
DataFrame with additional columns for plotting.
|
|
204
|
+
"""
|
|
205
|
+
# Validate required columns
|
|
206
|
+
if category_col not in df.columns:
|
|
207
|
+
raise ValueError(f"Column '{category_col}' not found in DataFrame")
|
|
208
|
+
if p_col not in df.columns:
|
|
209
|
+
raise ValueError(f"Column '{p_col}' not found in DataFrame")
|
|
210
|
+
|
|
211
|
+
result = df.copy()
|
|
212
|
+
|
|
213
|
+
# Get category order
|
|
214
|
+
if category_order is None:
|
|
215
|
+
# Get unique values, drop NaN, convert to strings for consistent sorting
|
|
216
|
+
unique_vals = result[category_col].dropna().unique()
|
|
217
|
+
# Convert all to strings and sort to handle mixed types safely
|
|
218
|
+
category_order = sorted([str(v) for v in unique_vals])
|
|
219
|
+
|
|
220
|
+
# Convert category column to string for consistent handling
|
|
221
|
+
result["_cat_str"] = result[category_col].astype(str)
|
|
222
|
+
|
|
223
|
+
# Map categories to index (use string values for lookup)
|
|
224
|
+
cat_to_idx = {cat: i for i, cat in enumerate(category_order)}
|
|
225
|
+
result["_cat_idx"] = result["_cat_str"].map(
|
|
226
|
+
lambda x: cat_to_idx.get(x, len(category_order))
|
|
227
|
+
)
|
|
228
|
+
|
|
229
|
+
# Use category index as x position (with jitter for multiple points per category)
|
|
230
|
+
np.random.seed(42) # Reproducible jitter
|
|
231
|
+
result["_x_pos"] = result["_cat_idx"] + np.random.uniform(
|
|
232
|
+
-0.3, 0.3, size=len(result)
|
|
233
|
+
)
|
|
234
|
+
|
|
235
|
+
# Calculate -log10(p)
|
|
236
|
+
result["_neg_log_p"] = -np.log10(result[p_col].clip(lower=1e-300))
|
|
237
|
+
|
|
238
|
+
# Assign colors (use string values for lookup)
|
|
239
|
+
colors = get_chromosome_colors(len(category_order))
|
|
240
|
+
cat_to_color = {cat: colors[i] for i, cat in enumerate(category_order)}
|
|
241
|
+
result["_color"] = result["_cat_str"].map(cat_to_color)
|
|
242
|
+
|
|
243
|
+
result.attrs["category_order"] = category_order
|
|
244
|
+
result.attrs["category_centers"] = {cat: i for i, cat in enumerate(category_order)}
|
|
245
|
+
|
|
246
|
+
return result
|