pylocuszoom 0.6.0__py3-none-any.whl → 0.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pylocuszoom/__init__.py +15 -0
- pylocuszoom/backends/__init__.py +116 -17
- pylocuszoom/backends/base.py +363 -60
- pylocuszoom/backends/bokeh_backend.py +77 -15
- pylocuszoom/backends/hover.py +198 -0
- pylocuszoom/backends/matplotlib_backend.py +263 -3
- pylocuszoom/backends/plotly_backend.py +73 -16
- pylocuszoom/ensembl.py +476 -0
- pylocuszoom/eqtl.py +15 -19
- pylocuszoom/finemapping.py +17 -26
- pylocuszoom/forest.py +9 -11
- pylocuszoom/gene_track.py +161 -135
- pylocuszoom/loaders.py +3 -1
- pylocuszoom/phewas.py +10 -11
- pylocuszoom/plotter.py +120 -194
- pylocuszoom/recombination.py +19 -3
- pylocuszoom/utils.py +52 -0
- pylocuszoom/validation.py +172 -0
- {pylocuszoom-0.6.0.dist-info → pylocuszoom-0.8.0.dist-info}/METADATA +46 -25
- pylocuszoom-0.8.0.dist-info/RECORD +29 -0
- pylocuszoom-0.6.0.dist-info/RECORD +0 -26
- {pylocuszoom-0.6.0.dist-info → pylocuszoom-0.8.0.dist-info}/WHEEL +0 -0
- {pylocuszoom-0.6.0.dist-info → pylocuszoom-0.8.0.dist-info}/licenses/LICENSE.md +0 -0
pylocuszoom/gene_track.py
CHANGED
|
@@ -28,6 +28,10 @@ GENE_AREA = 0.25 # Bottom portion for gene drawing
|
|
|
28
28
|
EXON_HEIGHT = 0.20 # Exon rectangle height
|
|
29
29
|
INTRON_HEIGHT = 0.02 # Thin intron line
|
|
30
30
|
|
|
31
|
+
# Arrow dimensions (pre-computed for clarity)
|
|
32
|
+
ARROW_HEIGHT_RATIO = 0.2625 # EXON_HEIGHT * 0.35 * 0.75 (75% of original height)
|
|
33
|
+
ARROW_WIDTH_RATIO = 0.0066 # region_width * 0.006 * 1.1 (10% wider than original)
|
|
34
|
+
|
|
31
35
|
|
|
32
36
|
def assign_gene_positions(genes_df: pd.DataFrame, start: int, end: int) -> List[int]:
|
|
33
37
|
"""Assign row indices to genes to minimize overlap.
|
|
@@ -111,6 +115,147 @@ def get_nearest_gene(
|
|
|
111
115
|
return nearby.loc[nearby["dist"].idxmin(), "gene_name"]
|
|
112
116
|
|
|
113
117
|
|
|
118
|
+
def _filter_genes_by_region(
|
|
119
|
+
df: pd.DataFrame, chrom: Union[int, str], start: int, end: int
|
|
120
|
+
) -> pd.DataFrame:
|
|
121
|
+
"""Filter a DataFrame to genes/exons within a genomic region."""
|
|
122
|
+
chrom_str = normalize_chrom(chrom)
|
|
123
|
+
return df[
|
|
124
|
+
(df["chr"].astype(str).str.replace("chr", "", regex=False) == chrom_str)
|
|
125
|
+
& (df["end"] >= start)
|
|
126
|
+
& (df["start"] <= end)
|
|
127
|
+
].copy()
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def _compute_arrow_geometry(
|
|
131
|
+
gene_start: int, gene_end: int, region_width: int, strand: str
|
|
132
|
+
) -> tuple[list[float], float, float, str]:
|
|
133
|
+
"""Compute arrow tip positions and dimensions for strand arrows.
|
|
134
|
+
|
|
135
|
+
Returns:
|
|
136
|
+
Tuple of (arrow_tip_positions, tri_height, tri_width, arrow_color).
|
|
137
|
+
"""
|
|
138
|
+
tri_height = EXON_HEIGHT * ARROW_HEIGHT_RATIO
|
|
139
|
+
tri_width = region_width * ARROW_WIDTH_RATIO
|
|
140
|
+
|
|
141
|
+
tip_offset = tri_width / 2
|
|
142
|
+
tail_offset = tri_width * 1.5
|
|
143
|
+
gene_center = (gene_start + gene_end) / 2
|
|
144
|
+
|
|
145
|
+
if strand == "+":
|
|
146
|
+
arrow_tip_positions = [
|
|
147
|
+
gene_start + tail_offset,
|
|
148
|
+
gene_center + tri_width / 2,
|
|
149
|
+
gene_end - tip_offset,
|
|
150
|
+
]
|
|
151
|
+
arrow_color = "#000000"
|
|
152
|
+
else:
|
|
153
|
+
arrow_tip_positions = [
|
|
154
|
+
gene_end - tail_offset,
|
|
155
|
+
gene_center - tri_width / 2,
|
|
156
|
+
gene_start + tip_offset,
|
|
157
|
+
]
|
|
158
|
+
arrow_color = "#333333"
|
|
159
|
+
|
|
160
|
+
return arrow_tip_positions, tri_height, tri_width, arrow_color
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def _draw_strand_arrows_matplotlib(
|
|
164
|
+
ax: Axes,
|
|
165
|
+
gene: pd.Series,
|
|
166
|
+
gene_start: int,
|
|
167
|
+
gene_end: int,
|
|
168
|
+
y_gene: float,
|
|
169
|
+
region_width: int,
|
|
170
|
+
) -> None:
|
|
171
|
+
"""Draw strand direction arrows using matplotlib."""
|
|
172
|
+
strand = gene["strand"]
|
|
173
|
+
arrow_tip_positions, tri_height, tri_width, arrow_color = _compute_arrow_geometry(
|
|
174
|
+
gene_start, gene_end, region_width, strand
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
# Draw connecting line between arrow centers
|
|
178
|
+
if len(arrow_tip_positions) > 1:
|
|
179
|
+
ax.plot(
|
|
180
|
+
[arrow_tip_positions[0], arrow_tip_positions[-1]],
|
|
181
|
+
[y_gene, y_gene],
|
|
182
|
+
color=arrow_color,
|
|
183
|
+
linewidth=1.0,
|
|
184
|
+
zorder=4,
|
|
185
|
+
solid_capstyle="butt",
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
for tip_x in arrow_tip_positions:
|
|
189
|
+
if strand == "+":
|
|
190
|
+
base_x = tip_x - tri_width
|
|
191
|
+
else:
|
|
192
|
+
base_x = tip_x + tri_width
|
|
193
|
+
|
|
194
|
+
tri_points = [
|
|
195
|
+
[tip_x, y_gene],
|
|
196
|
+
[base_x, y_gene + tri_height],
|
|
197
|
+
[base_x, y_gene - tri_height],
|
|
198
|
+
]
|
|
199
|
+
|
|
200
|
+
triangle = Polygon(
|
|
201
|
+
tri_points,
|
|
202
|
+
closed=True,
|
|
203
|
+
facecolor=arrow_color,
|
|
204
|
+
edgecolor=arrow_color,
|
|
205
|
+
linewidth=0.5,
|
|
206
|
+
zorder=5,
|
|
207
|
+
)
|
|
208
|
+
ax.add_patch(triangle)
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
def _draw_strand_arrows_generic(
|
|
212
|
+
ax: Any,
|
|
213
|
+
backend: Any,
|
|
214
|
+
gene: pd.Series,
|
|
215
|
+
gene_start: int,
|
|
216
|
+
gene_end: int,
|
|
217
|
+
y_gene: float,
|
|
218
|
+
region_width: int,
|
|
219
|
+
) -> None:
|
|
220
|
+
"""Draw strand direction arrows using a generic backend."""
|
|
221
|
+
strand = gene["strand"]
|
|
222
|
+
arrow_tip_positions, tri_height, tri_width, arrow_color = _compute_arrow_geometry(
|
|
223
|
+
gene_start, gene_end, region_width, strand
|
|
224
|
+
)
|
|
225
|
+
|
|
226
|
+
# Draw connecting line between arrow centers
|
|
227
|
+
if len(arrow_tip_positions) > 1:
|
|
228
|
+
backend.line(
|
|
229
|
+
ax,
|
|
230
|
+
x=pd.Series([arrow_tip_positions[0], arrow_tip_positions[-1]]),
|
|
231
|
+
y=pd.Series([y_gene, y_gene]),
|
|
232
|
+
color=arrow_color,
|
|
233
|
+
linewidth=1.0,
|
|
234
|
+
zorder=4,
|
|
235
|
+
)
|
|
236
|
+
|
|
237
|
+
for tip_x in arrow_tip_positions:
|
|
238
|
+
if strand == "+":
|
|
239
|
+
base_x = tip_x - tri_width
|
|
240
|
+
else:
|
|
241
|
+
base_x = tip_x + tri_width
|
|
242
|
+
|
|
243
|
+
tri_points = [
|
|
244
|
+
[tip_x, y_gene],
|
|
245
|
+
[base_x, y_gene + tri_height],
|
|
246
|
+
[base_x, y_gene - tri_height],
|
|
247
|
+
]
|
|
248
|
+
|
|
249
|
+
backend.add_polygon(
|
|
250
|
+
ax,
|
|
251
|
+
tri_points,
|
|
252
|
+
facecolor=arrow_color,
|
|
253
|
+
edgecolor=arrow_color,
|
|
254
|
+
linewidth=0.5,
|
|
255
|
+
zorder=5,
|
|
256
|
+
)
|
|
257
|
+
|
|
258
|
+
|
|
114
259
|
def plot_gene_track(
|
|
115
260
|
ax: Axes,
|
|
116
261
|
genes_df: pd.DataFrame,
|
|
@@ -137,12 +282,7 @@ def plot_gene_track(
|
|
|
137
282
|
exons_df: Exon annotations with chr, start, end, gene_name
|
|
138
283
|
columns for drawing exon structure. Optional.
|
|
139
284
|
"""
|
|
140
|
-
|
|
141
|
-
region_genes = genes_df[
|
|
142
|
-
(genes_df["chr"].astype(str).str.replace("chr", "", regex=False) == chrom_str)
|
|
143
|
-
& (genes_df["end"] >= start)
|
|
144
|
-
& (genes_df["start"] <= end)
|
|
145
|
-
].copy()
|
|
285
|
+
region_genes = _filter_genes_by_region(genes_df, chrom, start, end)
|
|
146
286
|
|
|
147
287
|
ax.set_xlim(start, end)
|
|
148
288
|
ax.set_ylabel("")
|
|
@@ -178,20 +318,13 @@ def plot_gene_track(
|
|
|
178
318
|
top_margin = 0.05 # Minimal space above top label
|
|
179
319
|
ax.set_ylim(
|
|
180
320
|
-bottom_margin,
|
|
181
|
-
|
|
321
|
+
max_row * ROW_HEIGHT + GENE_AREA + top_margin,
|
|
182
322
|
)
|
|
183
323
|
|
|
184
324
|
# Filter exons for this region if available
|
|
185
325
|
region_exons = None
|
|
186
326
|
if exons_df is not None and not exons_df.empty:
|
|
187
|
-
region_exons = exons_df
|
|
188
|
-
(
|
|
189
|
-
exons_df["chr"].astype(str).str.replace("chr", "", regex=False)
|
|
190
|
-
== chrom_str
|
|
191
|
-
)
|
|
192
|
-
& (exons_df["end"] >= start)
|
|
193
|
-
& (exons_df["start"] <= end)
|
|
194
|
-
].copy()
|
|
327
|
+
region_exons = _filter_genes_by_region(exons_df, chrom, start, end)
|
|
195
328
|
|
|
196
329
|
region_width = end - start
|
|
197
330
|
|
|
@@ -257,59 +390,11 @@ def plot_gene_track(
|
|
|
257
390
|
)
|
|
258
391
|
)
|
|
259
392
|
|
|
260
|
-
# Add strand direction triangles
|
|
393
|
+
# Add strand direction triangles
|
|
261
394
|
if "strand" in gene.index:
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
# Triangle dimensions
|
|
266
|
-
tri_height = EXON_HEIGHT * 0.35
|
|
267
|
-
tri_width = region_width * 0.006
|
|
268
|
-
|
|
269
|
-
# Arrow positions: front, middle, back (tip positions)
|
|
270
|
-
tip_offset = tri_width / 2 # Tiny offset to keep tip inside gene
|
|
271
|
-
tail_offset = tri_width * 1.5 # Offset for tail arrow from gene start/end
|
|
272
|
-
gene_center = (gene_start + gene_end) / 2
|
|
273
|
-
if arrow_dir == 1: # Forward strand
|
|
274
|
-
arrow_tip_positions = [
|
|
275
|
-
gene_start + tail_offset, # Tail (tip inside gene)
|
|
276
|
-
gene_center + tri_width / 2, # Middle (arrow center at gene center)
|
|
277
|
-
gene_end - tip_offset, # Tip (near gene end)
|
|
278
|
-
]
|
|
279
|
-
arrow_color = "#000000" # Black for forward
|
|
280
|
-
else: # Reverse strand
|
|
281
|
-
arrow_tip_positions = [
|
|
282
|
-
gene_end - tail_offset, # Tail (tip inside gene)
|
|
283
|
-
gene_center - tri_width / 2, # Middle (arrow center at gene center)
|
|
284
|
-
gene_start + tip_offset, # Tip (near gene start)
|
|
285
|
-
]
|
|
286
|
-
arrow_color = "#333333" # Dark grey for reverse
|
|
287
|
-
|
|
288
|
-
for tip_x in arrow_tip_positions:
|
|
289
|
-
if arrow_dir == 1:
|
|
290
|
-
base_x = tip_x - tri_width
|
|
291
|
-
tri_points = [
|
|
292
|
-
[tip_x, y_gene], # Tip pointing right
|
|
293
|
-
[base_x, y_gene + tri_height],
|
|
294
|
-
[base_x, y_gene - tri_height],
|
|
295
|
-
]
|
|
296
|
-
else:
|
|
297
|
-
base_x = tip_x + tri_width
|
|
298
|
-
tri_points = [
|
|
299
|
-
[tip_x, y_gene], # Tip pointing left
|
|
300
|
-
[base_x, y_gene + tri_height],
|
|
301
|
-
[base_x, y_gene - tri_height],
|
|
302
|
-
]
|
|
303
|
-
|
|
304
|
-
triangle = Polygon(
|
|
305
|
-
tri_points,
|
|
306
|
-
closed=True,
|
|
307
|
-
facecolor=arrow_color,
|
|
308
|
-
edgecolor=arrow_color,
|
|
309
|
-
linewidth=0.5,
|
|
310
|
-
zorder=5,
|
|
311
|
-
)
|
|
312
|
-
ax.add_patch(triangle)
|
|
395
|
+
_draw_strand_arrows_matplotlib(
|
|
396
|
+
ax, gene, gene_start, gene_end, y_gene, region_width
|
|
397
|
+
)
|
|
313
398
|
|
|
314
399
|
# Add gene name label in the gap above gene
|
|
315
400
|
if gene_name:
|
|
@@ -320,7 +405,7 @@ def plot_gene_track(
|
|
|
320
405
|
gene_name,
|
|
321
406
|
ha="center",
|
|
322
407
|
va="bottom",
|
|
323
|
-
fontsize=
|
|
408
|
+
fontsize=7,
|
|
324
409
|
color="#000000",
|
|
325
410
|
fontweight="medium",
|
|
326
411
|
style="italic",
|
|
@@ -353,12 +438,7 @@ def plot_gene_track_generic(
|
|
|
353
438
|
exons_df: Exon annotations with chr, start, end, gene_name
|
|
354
439
|
columns for drawing exon structure. Optional.
|
|
355
440
|
"""
|
|
356
|
-
|
|
357
|
-
region_genes = genes_df[
|
|
358
|
-
(genes_df["chr"].astype(str).str.replace("chr", "", regex=False) == chrom_str)
|
|
359
|
-
& (genes_df["end"] >= start)
|
|
360
|
-
& (genes_df["start"] <= end)
|
|
361
|
-
].copy()
|
|
441
|
+
region_genes = _filter_genes_by_region(genes_df, chrom, start, end)
|
|
362
442
|
|
|
363
443
|
backend.set_xlim(ax, start, end)
|
|
364
444
|
backend.set_ylabel(ax, "", fontsize=10)
|
|
@@ -389,20 +469,13 @@ def plot_gene_track_generic(
|
|
|
389
469
|
backend.set_ylim(
|
|
390
470
|
ax,
|
|
391
471
|
-bottom_margin,
|
|
392
|
-
|
|
472
|
+
max_row * ROW_HEIGHT + GENE_AREA + top_margin,
|
|
393
473
|
)
|
|
394
474
|
|
|
395
475
|
# Filter exons for this region if available
|
|
396
476
|
region_exons = None
|
|
397
477
|
if exons_df is not None and not exons_df.empty:
|
|
398
|
-
region_exons = exons_df
|
|
399
|
-
(
|
|
400
|
-
exons_df["chr"].astype(str).str.replace("chr", "", regex=False)
|
|
401
|
-
== chrom_str
|
|
402
|
-
)
|
|
403
|
-
& (exons_df["end"] >= start)
|
|
404
|
-
& (exons_df["start"] <= end)
|
|
405
|
-
].copy()
|
|
478
|
+
region_exons = _filter_genes_by_region(exons_df, chrom, start, end)
|
|
406
479
|
|
|
407
480
|
region_width = end - start
|
|
408
481
|
|
|
@@ -465,58 +538,11 @@ def plot_gene_track_generic(
|
|
|
465
538
|
zorder=2,
|
|
466
539
|
)
|
|
467
540
|
|
|
468
|
-
# Add strand direction triangles
|
|
541
|
+
# Add strand direction triangles
|
|
469
542
|
if "strand" in gene.index:
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
# Triangle dimensions
|
|
474
|
-
tri_height = EXON_HEIGHT * 0.35
|
|
475
|
-
tri_width = region_width * 0.006
|
|
476
|
-
|
|
477
|
-
# Arrow positions: front, middle, back (tip positions)
|
|
478
|
-
tip_offset = tri_width / 2 # Tiny offset to keep tip inside gene
|
|
479
|
-
tail_offset = tri_width * 1.5 # Offset for tail arrow from gene start/end
|
|
480
|
-
gene_center = (gene_start + gene_end) / 2
|
|
481
|
-
if arrow_dir == 1: # Forward strand
|
|
482
|
-
arrow_tip_positions = [
|
|
483
|
-
gene_start + tail_offset, # Tail (tip inside gene)
|
|
484
|
-
gene_center + tri_width / 2, # Middle (arrow center at gene center)
|
|
485
|
-
gene_end - tip_offset, # Tip (near gene end)
|
|
486
|
-
]
|
|
487
|
-
arrow_color = "#000000" # Black for forward
|
|
488
|
-
else: # Reverse strand
|
|
489
|
-
arrow_tip_positions = [
|
|
490
|
-
gene_end - tail_offset, # Tail (tip inside gene)
|
|
491
|
-
gene_center - tri_width / 2, # Middle (arrow center at gene center)
|
|
492
|
-
gene_start + tip_offset, # Tip (near gene start)
|
|
493
|
-
]
|
|
494
|
-
arrow_color = "#333333" # Dark grey for reverse
|
|
495
|
-
|
|
496
|
-
for tip_x in arrow_tip_positions:
|
|
497
|
-
if arrow_dir == 1:
|
|
498
|
-
base_x = tip_x - tri_width
|
|
499
|
-
tri_points = [
|
|
500
|
-
[tip_x, y_gene], # Tip pointing right
|
|
501
|
-
[base_x, y_gene + tri_height],
|
|
502
|
-
[base_x, y_gene - tri_height],
|
|
503
|
-
]
|
|
504
|
-
else:
|
|
505
|
-
base_x = tip_x + tri_width
|
|
506
|
-
tri_points = [
|
|
507
|
-
[tip_x, y_gene], # Tip pointing left
|
|
508
|
-
[base_x, y_gene + tri_height],
|
|
509
|
-
[base_x, y_gene - tri_height],
|
|
510
|
-
]
|
|
511
|
-
|
|
512
|
-
backend.add_polygon(
|
|
513
|
-
ax,
|
|
514
|
-
tri_points,
|
|
515
|
-
facecolor=arrow_color,
|
|
516
|
-
edgecolor=arrow_color,
|
|
517
|
-
linewidth=0.5,
|
|
518
|
-
zorder=5,
|
|
519
|
-
)
|
|
543
|
+
_draw_strand_arrows_generic(
|
|
544
|
+
ax, backend, gene, gene_start, gene_end, y_gene, region_width
|
|
545
|
+
)
|
|
520
546
|
|
|
521
547
|
# Add gene name label in the gap above gene
|
|
522
548
|
if gene_name:
|
|
@@ -526,7 +552,7 @@ def plot_gene_track_generic(
|
|
|
526
552
|
label_pos,
|
|
527
553
|
y_label,
|
|
528
554
|
gene_name,
|
|
529
|
-
fontsize=
|
|
555
|
+
fontsize=7,
|
|
530
556
|
ha="center",
|
|
531
557
|
va="bottom",
|
|
532
558
|
color="#000000",
|
pylocuszoom/loaders.py
CHANGED
|
@@ -877,4 +877,6 @@ def load_gwas(
|
|
|
877
877
|
if format not in loaders:
|
|
878
878
|
raise ValueError(f"Unknown format '{format}'. Options: {list(loaders.keys())}")
|
|
879
879
|
|
|
880
|
-
return loaders[format](
|
|
880
|
+
return loaders[format](
|
|
881
|
+
filepath, pos_col=pos_col, p_col=p_col, rs_col=rs_col, **kwargs
|
|
882
|
+
)
|
pylocuszoom/phewas.py
CHANGED
|
@@ -5,7 +5,7 @@ Validates and prepares phenome-wide association study data for plotting.
|
|
|
5
5
|
|
|
6
6
|
import pandas as pd
|
|
7
7
|
|
|
8
|
-
from .
|
|
8
|
+
from .validation import DataFrameValidator
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
def validate_phewas_df(
|
|
@@ -14,7 +14,7 @@ def validate_phewas_df(
|
|
|
14
14
|
p_col: str = "p_value",
|
|
15
15
|
category_col: str = "category",
|
|
16
16
|
) -> None:
|
|
17
|
-
"""Validate PheWAS DataFrame has required columns.
|
|
17
|
+
"""Validate PheWAS DataFrame has required columns and types.
|
|
18
18
|
|
|
19
19
|
Args:
|
|
20
20
|
df: PheWAS results DataFrame.
|
|
@@ -23,13 +23,12 @@ def validate_phewas_df(
|
|
|
23
23
|
category_col: Column name for phenotype categories (optional).
|
|
24
24
|
|
|
25
25
|
Raises:
|
|
26
|
-
ValidationError: If required columns are missing.
|
|
26
|
+
ValidationError: If required columns are missing or have invalid types.
|
|
27
27
|
"""
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
)
|
|
28
|
+
(
|
|
29
|
+
DataFrameValidator(df, "PheWAS DataFrame")
|
|
30
|
+
.require_columns([phenotype_col, p_col])
|
|
31
|
+
.require_numeric([p_col])
|
|
32
|
+
.require_range(p_col, min_val=0, max_val=1, exclusive_min=True)
|
|
33
|
+
.validate()
|
|
34
|
+
)
|