pylocuszoom 0.2.0__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pylocuszoom/__init__.py +52 -1
- pylocuszoom/backends/base.py +47 -0
- pylocuszoom/backends/bokeh_backend.py +323 -61
- pylocuszoom/backends/matplotlib_backend.py +133 -7
- pylocuszoom/backends/plotly_backend.py +423 -33
- pylocuszoom/colors.py +3 -1
- pylocuszoom/finemapping.py +0 -1
- pylocuszoom/gene_track.py +232 -23
- pylocuszoom/loaders.py +862 -0
- pylocuszoom/plotter.py +354 -245
- pylocuszoom/py.typed +0 -0
- pylocuszoom/recombination.py +4 -4
- pylocuszoom/schemas.py +395 -0
- {pylocuszoom-0.2.0.dist-info → pylocuszoom-0.5.0.dist-info}/METADATA +125 -31
- pylocuszoom-0.5.0.dist-info/RECORD +24 -0
- pylocuszoom-0.2.0.dist-info/RECORD +0 -21
- {pylocuszoom-0.2.0.dist-info → pylocuszoom-0.5.0.dist-info}/WHEEL +0 -0
- {pylocuszoom-0.2.0.dist-info → pylocuszoom-0.5.0.dist-info}/licenses/LICENSE.md +0 -0
pylocuszoom/gene_track.py
CHANGED
|
@@ -7,7 +7,7 @@ Provides LocusZoom-style gene track plotting with:
|
|
|
7
7
|
- Gene name labels
|
|
8
8
|
"""
|
|
9
9
|
|
|
10
|
-
from typing import List, Optional, Union
|
|
10
|
+
from typing import Any, List, Optional, Union
|
|
11
11
|
|
|
12
12
|
import pandas as pd
|
|
13
13
|
from matplotlib.axes import Axes
|
|
@@ -17,15 +17,15 @@ from .utils import normalize_chrom
|
|
|
17
17
|
|
|
18
18
|
# Strand-specific colors (distinct from LD palette)
|
|
19
19
|
STRAND_COLORS: dict[Optional[str], str] = {
|
|
20
|
-
"+": "#
|
|
21
|
-
"-": "#
|
|
20
|
+
"+": "#DAA520", # Goldenrod for forward strand
|
|
21
|
+
"-": "#6BB3FF", # Light blue for reverse strand
|
|
22
22
|
None: "#999999", # Light grey if no strand info
|
|
23
23
|
}
|
|
24
24
|
|
|
25
25
|
# Layout constants
|
|
26
|
-
ROW_HEIGHT = 0.
|
|
27
|
-
GENE_AREA = 0.
|
|
28
|
-
EXON_HEIGHT = 0.
|
|
26
|
+
ROW_HEIGHT = 0.35 # Total height per row (reduced for tighter spacing)
|
|
27
|
+
GENE_AREA = 0.25 # Bottom portion for gene drawing
|
|
28
|
+
EXON_HEIGHT = 0.20 # Exon rectangle height
|
|
29
29
|
INTRON_HEIGHT = 0.02 # Thin intron line
|
|
30
30
|
|
|
31
31
|
|
|
@@ -175,7 +175,7 @@ def plot_gene_track(
|
|
|
175
175
|
# Set y-axis limits - small bottom margin for gene body, tight top
|
|
176
176
|
max_row = max(positions) if positions else 0
|
|
177
177
|
bottom_margin = EXON_HEIGHT / 2 + 0.02 # Room for bottom gene
|
|
178
|
-
top_margin = 0.
|
|
178
|
+
top_margin = 0.05 # Minimal space above top label
|
|
179
179
|
ax.set_ylim(
|
|
180
180
|
-bottom_margin,
|
|
181
181
|
(max_row + 1) * ROW_HEIGHT - ROW_HEIGHT + GENE_AREA + top_margin,
|
|
@@ -193,6 +193,8 @@ def plot_gene_track(
|
|
|
193
193
|
& (exons_df["start"] <= end)
|
|
194
194
|
].copy()
|
|
195
195
|
|
|
196
|
+
region_width = end - start
|
|
197
|
+
|
|
196
198
|
for idx, (_, gene) in enumerate(region_genes.iterrows()):
|
|
197
199
|
gene_start = max(int(gene["start"]), start)
|
|
198
200
|
gene_end = min(int(gene["end"]), end)
|
|
@@ -258,38 +260,41 @@ def plot_gene_track(
|
|
|
258
260
|
# Add strand direction triangles (tip, center, tail)
|
|
259
261
|
if "strand" in gene.index:
|
|
260
262
|
strand = gene["strand"]
|
|
261
|
-
region_width = end - start
|
|
262
|
-
gene_width = gene_end - gene_start
|
|
263
263
|
arrow_dir = 1 if strand == "+" else -1
|
|
264
264
|
|
|
265
265
|
# Triangle dimensions
|
|
266
266
|
tri_height = EXON_HEIGHT * 0.35
|
|
267
267
|
tri_width = region_width * 0.006
|
|
268
268
|
|
|
269
|
-
# Arrow positions: front, middle, back
|
|
269
|
+
# Arrow positions: front, middle, back (tip positions)
|
|
270
|
+
tip_offset = tri_width / 2 # Tiny offset to keep tip inside gene
|
|
271
|
+
tail_offset = tri_width * 1.5 # Offset for tail arrow from gene start/end
|
|
272
|
+
gene_center = (gene_start + gene_end) / 2
|
|
270
273
|
if arrow_dir == 1: # Forward strand
|
|
271
|
-
|
|
272
|
-
gene_start, #
|
|
273
|
-
|
|
274
|
-
gene_end, #
|
|
274
|
+
arrow_tip_positions = [
|
|
275
|
+
gene_start + tail_offset, # Tail (tip inside gene)
|
|
276
|
+
gene_center + tri_width / 2, # Middle (arrow center at gene center)
|
|
277
|
+
gene_end - tip_offset, # Tip (near gene end)
|
|
275
278
|
]
|
|
279
|
+
arrow_color = "#000000" # Black for forward
|
|
276
280
|
else: # Reverse strand
|
|
277
|
-
|
|
278
|
-
gene_end, #
|
|
279
|
-
|
|
280
|
-
gene_start, #
|
|
281
|
+
arrow_tip_positions = [
|
|
282
|
+
gene_end - tail_offset, # Tail (tip inside gene)
|
|
283
|
+
gene_center - tri_width / 2, # Middle (arrow center at gene center)
|
|
284
|
+
gene_start + tip_offset, # Tip (near gene start)
|
|
281
285
|
]
|
|
286
|
+
arrow_color = "#333333" # Dark grey for reverse
|
|
282
287
|
|
|
283
|
-
for
|
|
288
|
+
for tip_x in arrow_tip_positions:
|
|
284
289
|
if arrow_dir == 1:
|
|
285
|
-
|
|
290
|
+
base_x = tip_x - tri_width
|
|
286
291
|
tri_points = [
|
|
287
292
|
[tip_x, y_gene], # Tip pointing right
|
|
288
293
|
[base_x, y_gene + tri_height],
|
|
289
294
|
[base_x, y_gene - tri_height],
|
|
290
295
|
]
|
|
291
296
|
else:
|
|
292
|
-
|
|
297
|
+
base_x = tip_x + tri_width
|
|
293
298
|
tri_points = [
|
|
294
299
|
[tip_x, y_gene], # Tip pointing left
|
|
295
300
|
[base_x, y_gene + tri_height],
|
|
@@ -299,8 +304,8 @@ def plot_gene_track(
|
|
|
299
304
|
triangle = Polygon(
|
|
300
305
|
tri_points,
|
|
301
306
|
closed=True,
|
|
302
|
-
facecolor=
|
|
303
|
-
edgecolor=
|
|
307
|
+
facecolor=arrow_color,
|
|
308
|
+
edgecolor=arrow_color,
|
|
304
309
|
linewidth=0.5,
|
|
305
310
|
zorder=5,
|
|
306
311
|
)
|
|
@@ -322,3 +327,207 @@ def plot_gene_track(
|
|
|
322
327
|
zorder=4,
|
|
323
328
|
clip_on=True,
|
|
324
329
|
)
|
|
330
|
+
|
|
331
|
+
|
|
332
|
+
def plot_gene_track_generic(
|
|
333
|
+
ax: Any,
|
|
334
|
+
backend: Any,
|
|
335
|
+
genes_df: pd.DataFrame,
|
|
336
|
+
chrom: Union[int, str],
|
|
337
|
+
start: int,
|
|
338
|
+
end: int,
|
|
339
|
+
exons_df: Optional[pd.DataFrame] = None,
|
|
340
|
+
) -> None:
|
|
341
|
+
"""Plot gene annotations using a backend-agnostic approach.
|
|
342
|
+
|
|
343
|
+
This function works with matplotlib, plotly, and bokeh backends.
|
|
344
|
+
|
|
345
|
+
Args:
|
|
346
|
+
ax: Axes object (format depends on backend).
|
|
347
|
+
backend: Backend instance with drawing methods.
|
|
348
|
+
genes_df: Gene annotations with chr, start, end, gene_name,
|
|
349
|
+
and optionally strand (+/-) column.
|
|
350
|
+
chrom: Chromosome number or string.
|
|
351
|
+
start: Region start position.
|
|
352
|
+
end: Region end position.
|
|
353
|
+
exons_df: Exon annotations with chr, start, end, gene_name
|
|
354
|
+
columns for drawing exon structure. Optional.
|
|
355
|
+
"""
|
|
356
|
+
chrom_str = normalize_chrom(chrom)
|
|
357
|
+
region_genes = genes_df[
|
|
358
|
+
(genes_df["chr"].astype(str).str.replace("chr", "", regex=False) == chrom_str)
|
|
359
|
+
& (genes_df["end"] >= start)
|
|
360
|
+
& (genes_df["start"] <= end)
|
|
361
|
+
].copy()
|
|
362
|
+
|
|
363
|
+
backend.set_xlim(ax, start, end)
|
|
364
|
+
backend.set_ylabel(ax, "", fontsize=10)
|
|
365
|
+
backend.hide_yaxis(ax)
|
|
366
|
+
|
|
367
|
+
if region_genes.empty:
|
|
368
|
+
backend.set_ylim(ax, 0, 1)
|
|
369
|
+
backend.add_text(
|
|
370
|
+
ax,
|
|
371
|
+
(start + end) / 2,
|
|
372
|
+
0.5,
|
|
373
|
+
"No genes",
|
|
374
|
+
fontsize=9,
|
|
375
|
+
ha="center",
|
|
376
|
+
va="center",
|
|
377
|
+
color="grey",
|
|
378
|
+
)
|
|
379
|
+
return
|
|
380
|
+
|
|
381
|
+
# Assign vertical positions to avoid overlap
|
|
382
|
+
region_genes = region_genes.sort_values("start")
|
|
383
|
+
positions = assign_gene_positions(region_genes, start, end)
|
|
384
|
+
|
|
385
|
+
# Set y-axis limits - small bottom margin for gene body, tight top
|
|
386
|
+
max_row = max(positions) if positions else 0
|
|
387
|
+
bottom_margin = EXON_HEIGHT / 2 + 0.02 # Room for bottom gene
|
|
388
|
+
top_margin = 0.05 # Minimal space above top label
|
|
389
|
+
backend.set_ylim(
|
|
390
|
+
ax,
|
|
391
|
+
-bottom_margin,
|
|
392
|
+
(max_row + 1) * ROW_HEIGHT - ROW_HEIGHT + GENE_AREA + top_margin,
|
|
393
|
+
)
|
|
394
|
+
|
|
395
|
+
# Filter exons for this region if available
|
|
396
|
+
region_exons = None
|
|
397
|
+
if exons_df is not None and not exons_df.empty:
|
|
398
|
+
region_exons = exons_df[
|
|
399
|
+
(
|
|
400
|
+
exons_df["chr"].astype(str).str.replace("chr", "", regex=False)
|
|
401
|
+
== chrom_str
|
|
402
|
+
)
|
|
403
|
+
& (exons_df["end"] >= start)
|
|
404
|
+
& (exons_df["start"] <= end)
|
|
405
|
+
].copy()
|
|
406
|
+
|
|
407
|
+
region_width = end - start
|
|
408
|
+
|
|
409
|
+
for idx, (_, gene) in enumerate(region_genes.iterrows()):
|
|
410
|
+
gene_start = max(int(gene["start"]), start)
|
|
411
|
+
gene_end = min(int(gene["end"]), end)
|
|
412
|
+
row = positions[idx]
|
|
413
|
+
gene_name = gene.get("gene_name", "")
|
|
414
|
+
|
|
415
|
+
# Get strand-specific color
|
|
416
|
+
strand = gene.get("strand") if "strand" in gene.index else None
|
|
417
|
+
gene_col = STRAND_COLORS.get(strand, STRAND_COLORS[None])
|
|
418
|
+
|
|
419
|
+
# Y position: bottom of row + offset for gene area
|
|
420
|
+
y_gene = row * ROW_HEIGHT + 0.05
|
|
421
|
+
y_label = y_gene + EXON_HEIGHT / 2 + 0.01 # Just above gene top
|
|
422
|
+
|
|
423
|
+
# Check if we have exon data for this gene
|
|
424
|
+
gene_exons = None
|
|
425
|
+
if region_exons is not None and not region_exons.empty and gene_name:
|
|
426
|
+
gene_exons = region_exons[region_exons["gene_name"] == gene_name].copy()
|
|
427
|
+
|
|
428
|
+
if gene_exons is not None and not gene_exons.empty:
|
|
429
|
+
# Draw intron line (thin horizontal line spanning gene)
|
|
430
|
+
backend.add_rectangle(
|
|
431
|
+
ax,
|
|
432
|
+
(gene_start, y_gene - INTRON_HEIGHT / 2),
|
|
433
|
+
gene_end - gene_start,
|
|
434
|
+
INTRON_HEIGHT,
|
|
435
|
+
facecolor=gene_col,
|
|
436
|
+
edgecolor=gene_col,
|
|
437
|
+
linewidth=0.5,
|
|
438
|
+
zorder=1,
|
|
439
|
+
)
|
|
440
|
+
|
|
441
|
+
# Draw exons (thick rectangles)
|
|
442
|
+
for _, exon in gene_exons.iterrows():
|
|
443
|
+
exon_start = max(int(exon["start"]), start)
|
|
444
|
+
exon_end = min(int(exon["end"]), end)
|
|
445
|
+
backend.add_rectangle(
|
|
446
|
+
ax,
|
|
447
|
+
(exon_start, y_gene - EXON_HEIGHT / 2),
|
|
448
|
+
exon_end - exon_start,
|
|
449
|
+
EXON_HEIGHT,
|
|
450
|
+
facecolor=gene_col,
|
|
451
|
+
edgecolor=gene_col,
|
|
452
|
+
linewidth=0.5,
|
|
453
|
+
zorder=2,
|
|
454
|
+
)
|
|
455
|
+
else:
|
|
456
|
+
# No exon data - draw full gene body as rectangle (fallback)
|
|
457
|
+
backend.add_rectangle(
|
|
458
|
+
ax,
|
|
459
|
+
(gene_start, y_gene - EXON_HEIGHT / 2),
|
|
460
|
+
gene_end - gene_start,
|
|
461
|
+
EXON_HEIGHT,
|
|
462
|
+
facecolor=gene_col,
|
|
463
|
+
edgecolor=gene_col,
|
|
464
|
+
linewidth=0.5,
|
|
465
|
+
zorder=2,
|
|
466
|
+
)
|
|
467
|
+
|
|
468
|
+
# Add strand direction triangles (tip, center, tail)
|
|
469
|
+
if "strand" in gene.index:
|
|
470
|
+
strand = gene["strand"]
|
|
471
|
+
arrow_dir = 1 if strand == "+" else -1
|
|
472
|
+
|
|
473
|
+
# Triangle dimensions
|
|
474
|
+
tri_height = EXON_HEIGHT * 0.35
|
|
475
|
+
tri_width = region_width * 0.006
|
|
476
|
+
|
|
477
|
+
# Arrow positions: front, middle, back (tip positions)
|
|
478
|
+
tip_offset = tri_width / 2 # Tiny offset to keep tip inside gene
|
|
479
|
+
tail_offset = tri_width * 1.5 # Offset for tail arrow from gene start/end
|
|
480
|
+
gene_center = (gene_start + gene_end) / 2
|
|
481
|
+
if arrow_dir == 1: # Forward strand
|
|
482
|
+
arrow_tip_positions = [
|
|
483
|
+
gene_start + tail_offset, # Tail (tip inside gene)
|
|
484
|
+
gene_center + tri_width / 2, # Middle (arrow center at gene center)
|
|
485
|
+
gene_end - tip_offset, # Tip (near gene end)
|
|
486
|
+
]
|
|
487
|
+
arrow_color = "#000000" # Black for forward
|
|
488
|
+
else: # Reverse strand
|
|
489
|
+
arrow_tip_positions = [
|
|
490
|
+
gene_end - tail_offset, # Tail (tip inside gene)
|
|
491
|
+
gene_center - tri_width / 2, # Middle (arrow center at gene center)
|
|
492
|
+
gene_start + tip_offset, # Tip (near gene start)
|
|
493
|
+
]
|
|
494
|
+
arrow_color = "#333333" # Dark grey for reverse
|
|
495
|
+
|
|
496
|
+
for tip_x in arrow_tip_positions:
|
|
497
|
+
if arrow_dir == 1:
|
|
498
|
+
base_x = tip_x - tri_width
|
|
499
|
+
tri_points = [
|
|
500
|
+
[tip_x, y_gene], # Tip pointing right
|
|
501
|
+
[base_x, y_gene + tri_height],
|
|
502
|
+
[base_x, y_gene - tri_height],
|
|
503
|
+
]
|
|
504
|
+
else:
|
|
505
|
+
base_x = tip_x + tri_width
|
|
506
|
+
tri_points = [
|
|
507
|
+
[tip_x, y_gene], # Tip pointing left
|
|
508
|
+
[base_x, y_gene + tri_height],
|
|
509
|
+
[base_x, y_gene - tri_height],
|
|
510
|
+
]
|
|
511
|
+
|
|
512
|
+
backend.add_polygon(
|
|
513
|
+
ax,
|
|
514
|
+
tri_points,
|
|
515
|
+
facecolor=arrow_color,
|
|
516
|
+
edgecolor=arrow_color,
|
|
517
|
+
linewidth=0.5,
|
|
518
|
+
zorder=5,
|
|
519
|
+
)
|
|
520
|
+
|
|
521
|
+
# Add gene name label in the gap above gene
|
|
522
|
+
if gene_name:
|
|
523
|
+
label_pos = (gene_start + gene_end) / 2
|
|
524
|
+
backend.add_text(
|
|
525
|
+
ax,
|
|
526
|
+
label_pos,
|
|
527
|
+
y_label,
|
|
528
|
+
gene_name,
|
|
529
|
+
fontsize=6,
|
|
530
|
+
ha="center",
|
|
531
|
+
va="bottom",
|
|
532
|
+
color="#000000",
|
|
533
|
+
)
|