pylocuszoom 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pylocuszoom/__init__.py +39 -20
- pylocuszoom/backends/__init__.py +1 -5
- pylocuszoom/backends/base.py +1 -1
- pylocuszoom/backends/bokeh_backend.py +4 -7
- pylocuszoom/backends/matplotlib_backend.py +6 -1
- pylocuszoom/backends/plotly_backend.py +11 -12
- pylocuszoom/colors.py +132 -0
- pylocuszoom/eqtl.py +3 -2
- pylocuszoom/finemapping.py +224 -0
- pylocuszoom/gene_track.py +44 -31
- pylocuszoom/labels.py +32 -33
- pylocuszoom/ld.py +8 -7
- pylocuszoom/plotter.py +381 -66
- pylocuszoom/recombination.py +14 -14
- pylocuszoom/utils.py +3 -1
- {pylocuszoom-0.1.0.dist-info → pylocuszoom-0.2.0.dist-info}/METADATA +20 -25
- pylocuszoom-0.2.0.dist-info/RECORD +21 -0
- pylocuszoom-0.1.0.dist-info/RECORD +0 -20
- {pylocuszoom-0.1.0.dist-info → pylocuszoom-0.2.0.dist-info}/WHEEL +0 -0
- {pylocuszoom-0.1.0.dist-info → pylocuszoom-0.2.0.dist-info}/licenses/LICENSE.md +0 -0
pylocuszoom/gene_track.py
CHANGED
|
@@ -15,11 +15,11 @@ from matplotlib.patches import Polygon, Rectangle
|
|
|
15
15
|
|
|
16
16
|
from .utils import normalize_chrom
|
|
17
17
|
|
|
18
|
-
# Strand-specific colors (
|
|
18
|
+
# Strand-specific colors (distinct from LD palette)
|
|
19
19
|
STRAND_COLORS: dict[Optional[str], str] = {
|
|
20
|
-
"+": "#
|
|
21
|
-
"-": "#
|
|
22
|
-
None: "#
|
|
20
|
+
"+": "#FFD700", # Gold/bright yellow for forward strand
|
|
21
|
+
"-": "#DDA0DD", # Plum/light purple for reverse strand
|
|
22
|
+
None: "#999999", # Light grey if no strand info
|
|
23
23
|
}
|
|
24
24
|
|
|
25
25
|
# Layout constants
|
|
@@ -145,7 +145,7 @@ def plot_gene_track(
|
|
|
145
145
|
].copy()
|
|
146
146
|
|
|
147
147
|
ax.set_xlim(start, end)
|
|
148
|
-
ax.set_ylabel("
|
|
148
|
+
ax.set_ylabel("")
|
|
149
149
|
ax.set_yticks([])
|
|
150
150
|
|
|
151
151
|
# theme_classic: only bottom spine
|
|
@@ -255,43 +255,56 @@ def plot_gene_track(
|
|
|
255
255
|
)
|
|
256
256
|
)
|
|
257
257
|
|
|
258
|
-
# Add strand direction
|
|
258
|
+
# Add strand direction triangles (tip, center, tail)
|
|
259
259
|
if "strand" in gene.index:
|
|
260
260
|
strand = gene["strand"]
|
|
261
261
|
region_width = end - start
|
|
262
|
+
gene_width = gene_end - gene_start
|
|
262
263
|
arrow_dir = 1 if strand == "+" else -1
|
|
263
264
|
|
|
264
|
-
# Triangle dimensions
|
|
265
|
+
# Triangle dimensions
|
|
265
266
|
tri_height = EXON_HEIGHT * 0.35
|
|
266
267
|
tri_width = region_width * 0.006
|
|
267
268
|
|
|
268
|
-
#
|
|
269
|
-
if arrow_dir == 1: # Forward strand
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
[base_x, y_gene + tri_height],
|
|
275
|
-
[base_x, y_gene - tri_height],
|
|
269
|
+
# Arrow positions: front, middle, back
|
|
270
|
+
if arrow_dir == 1: # Forward strand
|
|
271
|
+
arrow_positions = [
|
|
272
|
+
gene_start, # Front
|
|
273
|
+
(gene_start + gene_end) / 2, # Middle
|
|
274
|
+
gene_end, # Back (tip past gene end)
|
|
276
275
|
]
|
|
277
|
-
else: # Reverse strand
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
[base_x, y_gene + tri_height],
|
|
283
|
-
[base_x, y_gene - tri_height],
|
|
276
|
+
else: # Reverse strand
|
|
277
|
+
arrow_positions = [
|
|
278
|
+
gene_end, # Front (arrows point left, so start from right)
|
|
279
|
+
(gene_start + gene_end) / 2, # Middle
|
|
280
|
+
gene_start, # Back (tip past gene start)
|
|
284
281
|
]
|
|
285
282
|
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
283
|
+
for base_x in arrow_positions:
|
|
284
|
+
if arrow_dir == 1:
|
|
285
|
+
tip_x = base_x + tri_width
|
|
286
|
+
tri_points = [
|
|
287
|
+
[tip_x, y_gene], # Tip pointing right
|
|
288
|
+
[base_x, y_gene + tri_height],
|
|
289
|
+
[base_x, y_gene - tri_height],
|
|
290
|
+
]
|
|
291
|
+
else:
|
|
292
|
+
tip_x = base_x - tri_width
|
|
293
|
+
tri_points = [
|
|
294
|
+
[tip_x, y_gene], # Tip pointing left
|
|
295
|
+
[base_x, y_gene + tri_height],
|
|
296
|
+
[base_x, y_gene - tri_height],
|
|
297
|
+
]
|
|
298
|
+
|
|
299
|
+
triangle = Polygon(
|
|
300
|
+
tri_points,
|
|
301
|
+
closed=True,
|
|
302
|
+
facecolor="#000000",
|
|
303
|
+
edgecolor="#000000",
|
|
304
|
+
linewidth=0.5,
|
|
305
|
+
zorder=5,
|
|
306
|
+
)
|
|
307
|
+
ax.add_patch(triangle)
|
|
295
308
|
|
|
296
309
|
# Add gene name label in the gap above gene
|
|
297
310
|
if gene_name:
|
pylocuszoom/labels.py
CHANGED
|
@@ -2,18 +2,15 @@
|
|
|
2
2
|
|
|
3
3
|
Provides automatic labeling of top significant SNPs with:
|
|
4
4
|
- SNP ID (rs number)
|
|
5
|
-
- Nearest gene name (if gene annotations provided)
|
|
6
5
|
- Automatic overlap avoidance (if adjustText installed)
|
|
7
6
|
"""
|
|
8
7
|
|
|
9
|
-
from typing import List, Optional, Union
|
|
8
|
+
from typing import Any, List, Optional, Union
|
|
10
9
|
|
|
11
10
|
import pandas as pd
|
|
12
11
|
from matplotlib.axes import Axes
|
|
13
12
|
from matplotlib.text import Annotation
|
|
14
13
|
|
|
15
|
-
from .gene_track import get_nearest_gene
|
|
16
|
-
|
|
17
14
|
|
|
18
15
|
def add_snp_labels(
|
|
19
16
|
ax: Axes,
|
|
@@ -25,11 +22,11 @@ def add_snp_labels(
|
|
|
25
22
|
genes_df: Optional[pd.DataFrame] = None,
|
|
26
23
|
chrom: Optional[Union[int, str]] = None,
|
|
27
24
|
max_label_length: int = 15,
|
|
25
|
+
**kwargs: Any,
|
|
28
26
|
) -> List[Annotation]:
|
|
29
27
|
"""Add text labels to top SNPs in the regional plot.
|
|
30
28
|
|
|
31
|
-
Labels the most significant SNPs with
|
|
32
|
-
or the nearest gene name (if genes_df provided).
|
|
29
|
+
Labels the most significant SNPs with their SNP ID (rs number).
|
|
33
30
|
|
|
34
31
|
Args:
|
|
35
32
|
ax: Matplotlib axes object.
|
|
@@ -39,10 +36,8 @@ def add_snp_labels(
|
|
|
39
36
|
neglog10p_col: Column name for -log10(p-value).
|
|
40
37
|
rs_col: Column name for SNP ID.
|
|
41
38
|
label_top_n: Number of top SNPs to label.
|
|
42
|
-
genes_df:
|
|
43
|
-
|
|
44
|
-
instead of SNP ID.
|
|
45
|
-
chrom: Chromosome number. Required if genes_df is provided.
|
|
39
|
+
genes_df: Unused, kept for backward compatibility.
|
|
40
|
+
chrom: Unused, kept for backward compatibility.
|
|
46
41
|
max_label_length: Maximum label length before truncation.
|
|
47
42
|
|
|
48
43
|
Returns:
|
|
@@ -53,6 +48,8 @@ def add_snp_labels(
|
|
|
53
48
|
>>> # ... plot your data ...
|
|
54
49
|
>>> texts = add_snp_labels(ax, df, label_top_n=5)
|
|
55
50
|
"""
|
|
51
|
+
# genes_df and chrom are unused but kept for backward compatibility
|
|
52
|
+
del genes_df, chrom, kwargs
|
|
56
53
|
if neglog10p_col not in df.columns:
|
|
57
54
|
raise ValueError(
|
|
58
55
|
f"Column '{neglog10p_col}' not found in DataFrame. "
|
|
@@ -63,33 +60,34 @@ def add_snp_labels(
|
|
|
63
60
|
top_snps = df.nlargest(label_top_n, neglog10p_col)
|
|
64
61
|
|
|
65
62
|
texts = []
|
|
63
|
+
used_labels = set() # Track used labels to avoid duplicates
|
|
64
|
+
|
|
66
65
|
for _, snp in top_snps.iterrows():
|
|
67
66
|
x = snp[pos_col]
|
|
68
67
|
y = snp[neglog10p_col]
|
|
69
68
|
|
|
70
|
-
#
|
|
69
|
+
# Use SNP ID as label
|
|
71
70
|
label = str(snp[rs_col])
|
|
72
71
|
|
|
73
|
-
#
|
|
74
|
-
if
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
label = nearest_gene
|
|
72
|
+
# Skip duplicate labels
|
|
73
|
+
if label in used_labels:
|
|
74
|
+
continue
|
|
75
|
+
used_labels.add(label)
|
|
78
76
|
|
|
79
77
|
# Truncate long labels
|
|
80
78
|
if len(label) > max_label_length:
|
|
81
79
|
label = label[: max_label_length - 3] + "..."
|
|
82
80
|
|
|
83
|
-
# Add text annotation
|
|
81
|
+
# Add text annotation centered above marker
|
|
84
82
|
text = ax.annotate(
|
|
85
83
|
label,
|
|
86
84
|
xy=(x, y),
|
|
87
|
-
xytext=(
|
|
85
|
+
xytext=(0, 7),
|
|
88
86
|
textcoords="offset points",
|
|
89
|
-
fontsize=
|
|
87
|
+
fontsize=6,
|
|
90
88
|
fontweight="bold",
|
|
91
89
|
color="#333333",
|
|
92
|
-
ha="
|
|
90
|
+
ha="center",
|
|
93
91
|
va="bottom",
|
|
94
92
|
zorder=15,
|
|
95
93
|
bbox=dict(
|
|
@@ -101,18 +99,19 @@ def add_snp_labels(
|
|
|
101
99
|
)
|
|
102
100
|
texts.append(text)
|
|
103
101
|
|
|
104
|
-
#
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
102
|
+
# Only use adjustText when there are multiple labels to avoid overlap
|
|
103
|
+
if len(texts) > 1:
|
|
104
|
+
try:
|
|
105
|
+
from adjustText import adjust_text
|
|
106
|
+
|
|
107
|
+
adjust_text(
|
|
108
|
+
texts,
|
|
109
|
+
ax=ax,
|
|
110
|
+
arrowprops=dict(arrowstyle="-", color="gray", lw=0.5),
|
|
111
|
+
expand_points=(1.5, 1.5),
|
|
112
|
+
)
|
|
113
|
+
except ImportError:
|
|
114
|
+
# adjustText not installed, labels may overlap
|
|
115
|
+
pass
|
|
117
116
|
|
|
118
117
|
return texts
|
pylocuszoom/ld.py
CHANGED
|
@@ -38,7 +38,7 @@ def build_ld_command(
|
|
|
38
38
|
output_path: str,
|
|
39
39
|
window_kb: int = 500,
|
|
40
40
|
ld_window_r2: float = 0.0,
|
|
41
|
-
species: str = "
|
|
41
|
+
species: str = "canine",
|
|
42
42
|
threads: Optional[int] = None,
|
|
43
43
|
) -> list:
|
|
44
44
|
"""Build PLINK command for LD calculation.
|
|
@@ -50,7 +50,7 @@ def build_ld_command(
|
|
|
50
50
|
output_path: Output prefix (creates .ld file).
|
|
51
51
|
window_kb: Window size in kilobases.
|
|
52
52
|
ld_window_r2: Minimum R² to report (0.0 reports all).
|
|
53
|
-
species: Species flag for PLINK ('
|
|
53
|
+
species: Species flag for PLINK ('canine', 'feline', or None for human).
|
|
54
54
|
threads: Number of threads (auto-detect if None).
|
|
55
55
|
|
|
56
56
|
Returns:
|
|
@@ -58,10 +58,10 @@ def build_ld_command(
|
|
|
58
58
|
"""
|
|
59
59
|
cmd = [plink_path]
|
|
60
60
|
|
|
61
|
-
# Species flag
|
|
62
|
-
if species == "
|
|
61
|
+
# Species flag (maps to PLINK's --dog flag)
|
|
62
|
+
if species == "canine":
|
|
63
63
|
cmd.append("--dog")
|
|
64
|
-
elif species == "
|
|
64
|
+
elif species == "feline":
|
|
65
65
|
# PLINK doesn't have --cat, use --chr-set for 18 autosomes + X
|
|
66
66
|
cmd.extend(["--chr-set", "18"])
|
|
67
67
|
|
|
@@ -119,7 +119,7 @@ def calculate_ld(
|
|
|
119
119
|
window_kb: int = 500,
|
|
120
120
|
plink_path: Optional[str] = None,
|
|
121
121
|
working_dir: Optional[str] = None,
|
|
122
|
-
species: str = "
|
|
122
|
+
species: str = "canine",
|
|
123
123
|
threads: Optional[int] = None,
|
|
124
124
|
) -> pd.DataFrame:
|
|
125
125
|
"""Calculate LD (R²) between a lead SNP and all SNPs in a region.
|
|
@@ -133,7 +133,7 @@ def calculate_ld(
|
|
|
133
133
|
window_kb: Window size in kilobases around lead SNP.
|
|
134
134
|
plink_path: Path to PLINK executable. Auto-detects if None.
|
|
135
135
|
working_dir: Directory for PLINK output files. Uses temp dir if None.
|
|
136
|
-
species: Species flag ('
|
|
136
|
+
species: Species flag ('canine', 'feline', or None for human).
|
|
137
137
|
threads: Number of threads for PLINK.
|
|
138
138
|
|
|
139
139
|
Returns:
|
|
@@ -142,6 +142,7 @@ def calculate_ld(
|
|
|
142
142
|
|
|
143
143
|
Raises:
|
|
144
144
|
FileNotFoundError: If PLINK executable not found.
|
|
145
|
+
ValidationError: If PLINK binary files (.bed/.bim/.fam) are missing.
|
|
145
146
|
|
|
146
147
|
Example:
|
|
147
148
|
>>> ld_df = calculate_ld(
|