pylocuszoom 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pylocuszoom/gene_track.py CHANGED
@@ -15,11 +15,11 @@ from matplotlib.patches import Polygon, Rectangle
15
15
 
16
16
  from .utils import normalize_chrom
17
17
 
18
- # Strand-specific colors (bold, distinct)
18
+ # Strand-specific colors (distinct from LD palette)
19
19
  STRAND_COLORS: dict[Optional[str], str] = {
20
- "+": "#6A3D9A", # Bold purple for forward strand
21
- "-": "#1F78B4", # Bold teal/blue for reverse strand
22
- None: "#666666", # Grey if no strand info
20
+ "+": "#FFD700", # Gold/bright yellow for forward strand
21
+ "-": "#DDA0DD", # Plum/light purple for reverse strand
22
+ None: "#999999", # Light grey if no strand info
23
23
  }
24
24
 
25
25
  # Layout constants
@@ -145,7 +145,7 @@ def plot_gene_track(
145
145
  ].copy()
146
146
 
147
147
  ax.set_xlim(start, end)
148
- ax.set_ylabel("Genes", fontsize=10)
148
+ ax.set_ylabel("")
149
149
  ax.set_yticks([])
150
150
 
151
151
  # theme_classic: only bottom spine
@@ -255,43 +255,56 @@ def plot_gene_track(
255
255
  )
256
256
  )
257
257
 
258
- # Add strand direction triangle at gene tip
258
+ # Add strand direction triangles (tip, center, tail)
259
259
  if "strand" in gene.index:
260
260
  strand = gene["strand"]
261
261
  region_width = end - start
262
+ gene_width = gene_end - gene_start
262
263
  arrow_dir = 1 if strand == "+" else -1
263
264
 
264
- # Triangle dimensions - whole arrow past gene end
265
+ # Triangle dimensions
265
266
  tri_height = EXON_HEIGHT * 0.35
266
267
  tri_width = region_width * 0.006
267
268
 
268
- # Triangle entirely past gene tip
269
- if arrow_dir == 1: # Forward strand: arrow starts at gene end
270
- base_x = gene_end
271
- tip_x = base_x + tri_width
272
- tri_points = [
273
- [tip_x, y_gene], # Tip pointing right
274
- [base_x, y_gene + tri_height],
275
- [base_x, y_gene - tri_height],
269
+ # Arrow positions: front, middle, back
270
+ if arrow_dir == 1: # Forward strand
271
+ arrow_positions = [
272
+ gene_start, # Front
273
+ (gene_start + gene_end) / 2, # Middle
274
+ gene_end, # Back (tip past gene end)
276
275
  ]
277
- else: # Reverse strand: arrow starts at gene start
278
- base_x = gene_start
279
- tip_x = base_x - tri_width
280
- tri_points = [
281
- [tip_x, y_gene], # Tip pointing left
282
- [base_x, y_gene + tri_height],
283
- [base_x, y_gene - tri_height],
276
+ else: # Reverse strand
277
+ arrow_positions = [
278
+ gene_end, # Front (arrows point left, so start from right)
279
+ (gene_start + gene_end) / 2, # Middle
280
+ gene_start, # Back (tip past gene start)
284
281
  ]
285
282
 
286
- triangle = Polygon(
287
- tri_points,
288
- closed=True,
289
- facecolor="black",
290
- edgecolor="black",
291
- linewidth=0.5,
292
- zorder=5,
293
- )
294
- ax.add_patch(triangle)
283
+ for base_x in arrow_positions:
284
+ if arrow_dir == 1:
285
+ tip_x = base_x + tri_width
286
+ tri_points = [
287
+ [tip_x, y_gene], # Tip pointing right
288
+ [base_x, y_gene + tri_height],
289
+ [base_x, y_gene - tri_height],
290
+ ]
291
+ else:
292
+ tip_x = base_x - tri_width
293
+ tri_points = [
294
+ [tip_x, y_gene], # Tip pointing left
295
+ [base_x, y_gene + tri_height],
296
+ [base_x, y_gene - tri_height],
297
+ ]
298
+
299
+ triangle = Polygon(
300
+ tri_points,
301
+ closed=True,
302
+ facecolor="#000000",
303
+ edgecolor="#000000",
304
+ linewidth=0.5,
305
+ zorder=5,
306
+ )
307
+ ax.add_patch(triangle)
295
308
 
296
309
  # Add gene name label in the gap above gene
297
310
  if gene_name:
pylocuszoom/labels.py CHANGED
@@ -2,18 +2,15 @@
2
2
 
3
3
  Provides automatic labeling of top significant SNPs with:
4
4
  - SNP ID (rs number)
5
- - Nearest gene name (if gene annotations provided)
6
5
  - Automatic overlap avoidance (if adjustText installed)
7
6
  """
8
7
 
9
- from typing import List, Optional, Union
8
+ from typing import Any, List, Optional, Union
10
9
 
11
10
  import pandas as pd
12
11
  from matplotlib.axes import Axes
13
12
  from matplotlib.text import Annotation
14
13
 
15
- from .gene_track import get_nearest_gene
16
-
17
14
 
18
15
  def add_snp_labels(
19
16
  ax: Axes,
@@ -25,11 +22,11 @@ def add_snp_labels(
25
22
  genes_df: Optional[pd.DataFrame] = None,
26
23
  chrom: Optional[Union[int, str]] = None,
27
24
  max_label_length: int = 15,
25
+ **kwargs: Any,
28
26
  ) -> List[Annotation]:
29
27
  """Add text labels to top SNPs in the regional plot.
30
28
 
31
- Labels the most significant SNPs with either their SNP ID
32
- or the nearest gene name (if genes_df provided).
29
+ Labels the most significant SNPs with their SNP ID (rs number).
33
30
 
34
31
  Args:
35
32
  ax: Matplotlib axes object.
@@ -39,10 +36,8 @@ def add_snp_labels(
39
36
  neglog10p_col: Column name for -log10(p-value).
40
37
  rs_col: Column name for SNP ID.
41
38
  label_top_n: Number of top SNPs to label.
42
- genes_df: Optional gene annotations for gene-based labels.
43
- If provided with chrom, labels will show nearest gene name
44
- instead of SNP ID.
45
- chrom: Chromosome number. Required if genes_df is provided.
39
+ genes_df: Unused, kept for backward compatibility.
40
+ chrom: Unused, kept for backward compatibility.
46
41
  max_label_length: Maximum label length before truncation.
47
42
 
48
43
  Returns:
@@ -53,6 +48,8 @@ def add_snp_labels(
53
48
  >>> # ... plot your data ...
54
49
  >>> texts = add_snp_labels(ax, df, label_top_n=5)
55
50
  """
51
+ # genes_df and chrom are unused but kept for backward compatibility
52
+ del genes_df, chrom, kwargs
56
53
  if neglog10p_col not in df.columns:
57
54
  raise ValueError(
58
55
  f"Column '{neglog10p_col}' not found in DataFrame. "
@@ -63,33 +60,34 @@ def add_snp_labels(
63
60
  top_snps = df.nlargest(label_top_n, neglog10p_col)
64
61
 
65
62
  texts = []
63
+ used_labels = set() # Track used labels to avoid duplicates
64
+
66
65
  for _, snp in top_snps.iterrows():
67
66
  x = snp[pos_col]
68
67
  y = snp[neglog10p_col]
69
68
 
70
- # Determine label text
69
+ # Use SNP ID as label
71
70
  label = str(snp[rs_col])
72
71
 
73
- # Try to get gene name if genes_df provided
74
- if genes_df is not None and chrom is not None:
75
- nearest_gene = get_nearest_gene(genes_df, chrom, int(x))
76
- if nearest_gene:
77
- label = nearest_gene
72
+ # Skip duplicate labels
73
+ if label in used_labels:
74
+ continue
75
+ used_labels.add(label)
78
76
 
79
77
  # Truncate long labels
80
78
  if len(label) > max_label_length:
81
79
  label = label[: max_label_length - 3] + "..."
82
80
 
83
- # Add text annotation with offset
81
+ # Add text annotation centered above marker
84
82
  text = ax.annotate(
85
83
  label,
86
84
  xy=(x, y),
87
- xytext=(5, 5),
85
+ xytext=(0, 7),
88
86
  textcoords="offset points",
89
- fontsize=8,
87
+ fontsize=6,
90
88
  fontweight="bold",
91
89
  color="#333333",
92
- ha="left",
90
+ ha="center",
93
91
  va="bottom",
94
92
  zorder=15,
95
93
  bbox=dict(
@@ -101,18 +99,19 @@ def add_snp_labels(
101
99
  )
102
100
  texts.append(text)
103
101
 
104
- # Try to adjust text positions to avoid overlap
105
- try:
106
- from adjustText import adjust_text
107
-
108
- adjust_text(
109
- texts,
110
- ax=ax,
111
- arrowprops=dict(arrowstyle="-", color="gray", lw=0.5),
112
- expand_points=(1.5, 1.5),
113
- )
114
- except ImportError:
115
- # adjustText not installed, labels may overlap
116
- pass
102
+ # Only use adjustText when there are multiple labels to avoid overlap
103
+ if len(texts) > 1:
104
+ try:
105
+ from adjustText import adjust_text
106
+
107
+ adjust_text(
108
+ texts,
109
+ ax=ax,
110
+ arrowprops=dict(arrowstyle="-", color="gray", lw=0.5),
111
+ expand_points=(1.5, 1.5),
112
+ )
113
+ except ImportError:
114
+ # adjustText not installed, labels may overlap
115
+ pass
117
116
 
118
117
  return texts
pylocuszoom/ld.py CHANGED
@@ -38,7 +38,7 @@ def build_ld_command(
38
38
  output_path: str,
39
39
  window_kb: int = 500,
40
40
  ld_window_r2: float = 0.0,
41
- species: str = "dog",
41
+ species: str = "canine",
42
42
  threads: Optional[int] = None,
43
43
  ) -> list:
44
44
  """Build PLINK command for LD calculation.
@@ -50,7 +50,7 @@ def build_ld_command(
50
50
  output_path: Output prefix (creates .ld file).
51
51
  window_kb: Window size in kilobases.
52
52
  ld_window_r2: Minimum R² to report (0.0 reports all).
53
- species: Species flag for PLINK ('dog', 'cat', or None for human).
53
+ species: Species flag for PLINK ('canine', 'feline', or None for human).
54
54
  threads: Number of threads (auto-detect if None).
55
55
 
56
56
  Returns:
@@ -58,10 +58,10 @@ def build_ld_command(
58
58
  """
59
59
  cmd = [plink_path]
60
60
 
61
- # Species flag
62
- if species == "dog":
61
+ # Species flag (maps to PLINK's --dog flag)
62
+ if species == "canine":
63
63
  cmd.append("--dog")
64
- elif species == "cat":
64
+ elif species == "feline":
65
65
  # PLINK doesn't have --cat, use --chr-set for 18 autosomes + X
66
66
  cmd.extend(["--chr-set", "18"])
67
67
 
@@ -119,7 +119,7 @@ def calculate_ld(
119
119
  window_kb: int = 500,
120
120
  plink_path: Optional[str] = None,
121
121
  working_dir: Optional[str] = None,
122
- species: str = "dog",
122
+ species: str = "canine",
123
123
  threads: Optional[int] = None,
124
124
  ) -> pd.DataFrame:
125
125
  """Calculate LD (R²) between a lead SNP and all SNPs in a region.
@@ -133,7 +133,7 @@ def calculate_ld(
133
133
  window_kb: Window size in kilobases around lead SNP.
134
134
  plink_path: Path to PLINK executable. Auto-detects if None.
135
135
  working_dir: Directory for PLINK output files. Uses temp dir if None.
136
- species: Species flag ('dog', 'cat', or None for human).
136
+ species: Species flag ('canine', 'feline', or None for human).
137
137
  threads: Number of threads for PLINK.
138
138
 
139
139
  Returns:
@@ -142,6 +142,7 @@ def calculate_ld(
142
142
 
143
143
  Raises:
144
144
  FileNotFoundError: If PLINK executable not found.
145
+ ValidationError: If PLINK binary files (.bed/.bim/.fam) are missing.
145
146
 
146
147
  Example:
147
148
  >>> ld_df = calculate_ld(