pylocuszoom 0.1.0__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pylocuszoom/labels.py CHANGED
@@ -2,18 +2,15 @@
2
2
 
3
3
  Provides automatic labeling of top significant SNPs with:
4
4
  - SNP ID (rs number)
5
- - Nearest gene name (if gene annotations provided)
6
5
  - Automatic overlap avoidance (if adjustText installed)
7
6
  """
8
7
 
9
- from typing import List, Optional, Union
8
+ from typing import Any, List, Optional, Union
10
9
 
11
10
  import pandas as pd
12
11
  from matplotlib.axes import Axes
13
12
  from matplotlib.text import Annotation
14
13
 
15
- from .gene_track import get_nearest_gene
16
-
17
14
 
18
15
  def add_snp_labels(
19
16
  ax: Axes,
@@ -25,11 +22,11 @@ def add_snp_labels(
25
22
  genes_df: Optional[pd.DataFrame] = None,
26
23
  chrom: Optional[Union[int, str]] = None,
27
24
  max_label_length: int = 15,
25
+ **kwargs: Any,
28
26
  ) -> List[Annotation]:
29
27
  """Add text labels to top SNPs in the regional plot.
30
28
 
31
- Labels the most significant SNPs with either their SNP ID
32
- or the nearest gene name (if genes_df provided).
29
+ Labels the most significant SNPs with their SNP ID (rs number).
33
30
 
34
31
  Args:
35
32
  ax: Matplotlib axes object.
@@ -39,10 +36,8 @@ def add_snp_labels(
39
36
  neglog10p_col: Column name for -log10(p-value).
40
37
  rs_col: Column name for SNP ID.
41
38
  label_top_n: Number of top SNPs to label.
42
- genes_df: Optional gene annotations for gene-based labels.
43
- If provided with chrom, labels will show nearest gene name
44
- instead of SNP ID.
45
- chrom: Chromosome number. Required if genes_df is provided.
39
+ genes_df: Unused, kept for backward compatibility.
40
+ chrom: Unused, kept for backward compatibility.
46
41
  max_label_length: Maximum label length before truncation.
47
42
 
48
43
  Returns:
@@ -53,6 +48,8 @@ def add_snp_labels(
53
48
  >>> # ... plot your data ...
54
49
  >>> texts = add_snp_labels(ax, df, label_top_n=5)
55
50
  """
51
+ # genes_df and chrom are unused but kept for backward compatibility
52
+ del genes_df, chrom, kwargs
56
53
  if neglog10p_col not in df.columns:
57
54
  raise ValueError(
58
55
  f"Column '{neglog10p_col}' not found in DataFrame. "
@@ -63,33 +60,34 @@ def add_snp_labels(
63
60
  top_snps = df.nlargest(label_top_n, neglog10p_col)
64
61
 
65
62
  texts = []
63
+ used_labels = set() # Track used labels to avoid duplicates
64
+
66
65
  for _, snp in top_snps.iterrows():
67
66
  x = snp[pos_col]
68
67
  y = snp[neglog10p_col]
69
68
 
70
- # Determine label text
69
+ # Use SNP ID as label
71
70
  label = str(snp[rs_col])
72
71
 
73
- # Try to get gene name if genes_df provided
74
- if genes_df is not None and chrom is not None:
75
- nearest_gene = get_nearest_gene(genes_df, chrom, int(x))
76
- if nearest_gene:
77
- label = nearest_gene
72
+ # Skip duplicate labels
73
+ if label in used_labels:
74
+ continue
75
+ used_labels.add(label)
78
76
 
79
77
  # Truncate long labels
80
78
  if len(label) > max_label_length:
81
79
  label = label[: max_label_length - 3] + "..."
82
80
 
83
- # Add text annotation with offset
81
+ # Add text annotation centered above marker
84
82
  text = ax.annotate(
85
83
  label,
86
84
  xy=(x, y),
87
- xytext=(5, 5),
85
+ xytext=(0, 7),
88
86
  textcoords="offset points",
89
- fontsize=8,
87
+ fontsize=6,
90
88
  fontweight="bold",
91
89
  color="#333333",
92
- ha="left",
90
+ ha="center",
93
91
  va="bottom",
94
92
  zorder=15,
95
93
  bbox=dict(
@@ -101,18 +99,19 @@ def add_snp_labels(
101
99
  )
102
100
  texts.append(text)
103
101
 
104
- # Try to adjust text positions to avoid overlap
105
- try:
106
- from adjustText import adjust_text
107
-
108
- adjust_text(
109
- texts,
110
- ax=ax,
111
- arrowprops=dict(arrowstyle="-", color="gray", lw=0.5),
112
- expand_points=(1.5, 1.5),
113
- )
114
- except ImportError:
115
- # adjustText not installed, labels may overlap
116
- pass
102
+ # Only use adjustText when there are multiple labels to avoid overlap
103
+ if len(texts) > 1:
104
+ try:
105
+ from adjustText import adjust_text
106
+
107
+ adjust_text(
108
+ texts,
109
+ ax=ax,
110
+ arrowprops=dict(arrowstyle="-", color="gray", lw=0.5),
111
+ expand_points=(1.5, 1.5),
112
+ )
113
+ except ImportError:
114
+ # adjustText not installed, labels may overlap
115
+ pass
117
116
 
118
117
  return texts
pylocuszoom/ld.py CHANGED
@@ -38,7 +38,7 @@ def build_ld_command(
38
38
  output_path: str,
39
39
  window_kb: int = 500,
40
40
  ld_window_r2: float = 0.0,
41
- species: str = "dog",
41
+ species: str = "canine",
42
42
  threads: Optional[int] = None,
43
43
  ) -> list:
44
44
  """Build PLINK command for LD calculation.
@@ -50,7 +50,7 @@ def build_ld_command(
50
50
  output_path: Output prefix (creates .ld file).
51
51
  window_kb: Window size in kilobases.
52
52
  ld_window_r2: Minimum R² to report (0.0 reports all).
53
- species: Species flag for PLINK ('dog', 'cat', or None for human).
53
+ species: Species flag for PLINK ('canine', 'feline', or None for human).
54
54
  threads: Number of threads (auto-detect if None).
55
55
 
56
56
  Returns:
@@ -58,10 +58,10 @@ def build_ld_command(
58
58
  """
59
59
  cmd = [plink_path]
60
60
 
61
- # Species flag
62
- if species == "dog":
61
+ # Species flag (maps to PLINK's --dog flag)
62
+ if species == "canine":
63
63
  cmd.append("--dog")
64
- elif species == "cat":
64
+ elif species == "feline":
65
65
  # PLINK doesn't have --cat, use --chr-set for 18 autosomes + X
66
66
  cmd.extend(["--chr-set", "18"])
67
67
 
@@ -119,7 +119,7 @@ def calculate_ld(
119
119
  window_kb: int = 500,
120
120
  plink_path: Optional[str] = None,
121
121
  working_dir: Optional[str] = None,
122
- species: str = "dog",
122
+ species: str = "canine",
123
123
  threads: Optional[int] = None,
124
124
  ) -> pd.DataFrame:
125
125
  """Calculate LD (R²) between a lead SNP and all SNPs in a region.
@@ -133,7 +133,7 @@ def calculate_ld(
133
133
  window_kb: Window size in kilobases around lead SNP.
134
134
  plink_path: Path to PLINK executable. Auto-detects if None.
135
135
  working_dir: Directory for PLINK output files. Uses temp dir if None.
136
- species: Species flag ('dog', 'cat', or None for human).
136
+ species: Species flag ('canine', 'feline', or None for human).
137
137
  threads: Number of threads for PLINK.
138
138
 
139
139
  Returns:
@@ -142,6 +142,7 @@ def calculate_ld(
142
142
 
143
143
  Raises:
144
144
  FileNotFoundError: If PLINK executable not found.
145
+ ValidationError: If PLINK binary files (.bed/.bim/.fam) are missing.
145
146
 
146
147
  Example:
147
148
  >>> ld_df = calculate_ld(