pylocuszoom 0.6.0__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pylocuszoom/gene_track.py CHANGED
@@ -28,6 +28,10 @@ GENE_AREA = 0.25 # Bottom portion for gene drawing
28
28
  EXON_HEIGHT = 0.20 # Exon rectangle height
29
29
  INTRON_HEIGHT = 0.02 # Thin intron line
30
30
 
31
+ # Arrow dimensions (pre-computed for clarity)
32
+ ARROW_HEIGHT_RATIO = 0.2625 # EXON_HEIGHT * 0.35 * 0.75 (75% of original height)
33
+ ARROW_WIDTH_RATIO = 0.0066 # region_width * 0.006 * 1.1 (10% wider than original)
34
+
31
35
 
32
36
  def assign_gene_positions(genes_df: pd.DataFrame, start: int, end: int) -> List[int]:
33
37
  """Assign row indices to genes to minimize overlap.
@@ -44,22 +48,23 @@ def assign_gene_positions(genes_df: pd.DataFrame, start: int, end: int) -> List[
44
48
  List of integer row indices (0, 1, 2, ...) for each gene.
45
49
  """
46
50
  positions = []
47
- occupied = [] # List of (end_pos, row)
51
+ # Track the rightmost end position for each row (including label buffer)
52
+ row_ends: dict[int, int] = {} # row -> rightmost end position
48
53
  region_width = end - start
54
+ label_buffer = region_width * 0.08 # Extra space for labels
49
55
 
50
56
  for _, gene in genes_df.iterrows():
51
57
  gene_start = max(gene["start"], start)
52
58
  gene_end = min(gene["end"], end)
53
59
 
54
- # Find first available row with buffer for label spacing
60
+ # Find first available row where gene doesn't overlap
55
61
  row = 0
56
- label_buffer = region_width * 0.08 # Extra space for labels
57
- for occ_end, occ_row in occupied:
58
- if occ_row == row and occ_end > gene_start - label_buffer:
59
- row = occ_row + 1
62
+ while row in row_ends and row_ends[row] > gene_start - label_buffer:
63
+ row += 1
60
64
 
61
65
  positions.append(row)
62
- occupied.append((gene_end, row))
66
+ # Update the row's end position (including buffer for next gene check)
67
+ row_ends[row] = gene_end
63
68
 
64
69
  return positions
65
70
 
@@ -111,6 +116,147 @@ def get_nearest_gene(
111
116
  return nearby.loc[nearby["dist"].idxmin(), "gene_name"]
112
117
 
113
118
 
119
+ def _filter_genes_by_region(
120
+ df: pd.DataFrame, chrom: Union[int, str], start: int, end: int
121
+ ) -> pd.DataFrame:
122
+ """Filter a DataFrame to genes/exons within a genomic region."""
123
+ chrom_str = normalize_chrom(chrom)
124
+ return df[
125
+ (df["chr"].astype(str).str.replace("chr", "", regex=False) == chrom_str)
126
+ & (df["end"] >= start)
127
+ & (df["start"] <= end)
128
+ ].copy()
129
+
130
+
131
+ def _compute_arrow_geometry(
132
+ gene_start: int, gene_end: int, region_width: int, strand: str
133
+ ) -> tuple[list[float], float, float, str]:
134
+ """Compute arrow tip positions and dimensions for strand arrows.
135
+
136
+ Returns:
137
+ Tuple of (arrow_tip_positions, tri_height, tri_width, arrow_color).
138
+ """
139
+ tri_height = EXON_HEIGHT * ARROW_HEIGHT_RATIO
140
+ tri_width = region_width * ARROW_WIDTH_RATIO
141
+
142
+ tip_offset = tri_width / 2
143
+ tail_offset = tri_width * 1.5
144
+ gene_center = (gene_start + gene_end) / 2
145
+
146
+ if strand == "+":
147
+ arrow_tip_positions = [
148
+ gene_start + tail_offset,
149
+ gene_center + tri_width / 2,
150
+ gene_end - tip_offset,
151
+ ]
152
+ arrow_color = "#000000"
153
+ else:
154
+ arrow_tip_positions = [
155
+ gene_end - tail_offset,
156
+ gene_center - tri_width / 2,
157
+ gene_start + tip_offset,
158
+ ]
159
+ arrow_color = "#333333"
160
+
161
+ return arrow_tip_positions, tri_height, tri_width, arrow_color
162
+
163
+
164
+ def _draw_strand_arrows_matplotlib(
165
+ ax: Axes,
166
+ gene: pd.Series,
167
+ gene_start: int,
168
+ gene_end: int,
169
+ y_gene: float,
170
+ region_width: int,
171
+ ) -> None:
172
+ """Draw strand direction arrows using matplotlib."""
173
+ strand = gene["strand"]
174
+ arrow_tip_positions, tri_height, tri_width, arrow_color = _compute_arrow_geometry(
175
+ gene_start, gene_end, region_width, strand
176
+ )
177
+
178
+ # Draw connecting line between arrow centers
179
+ if len(arrow_tip_positions) > 1:
180
+ ax.plot(
181
+ [arrow_tip_positions[0], arrow_tip_positions[-1]],
182
+ [y_gene, y_gene],
183
+ color=arrow_color,
184
+ linewidth=1.0,
185
+ zorder=4,
186
+ solid_capstyle="butt",
187
+ )
188
+
189
+ for tip_x in arrow_tip_positions:
190
+ if strand == "+":
191
+ base_x = tip_x - tri_width
192
+ else:
193
+ base_x = tip_x + tri_width
194
+
195
+ tri_points = [
196
+ [tip_x, y_gene],
197
+ [base_x, y_gene + tri_height],
198
+ [base_x, y_gene - tri_height],
199
+ ]
200
+
201
+ triangle = Polygon(
202
+ tri_points,
203
+ closed=True,
204
+ facecolor=arrow_color,
205
+ edgecolor=arrow_color,
206
+ linewidth=0.5,
207
+ zorder=5,
208
+ )
209
+ ax.add_patch(triangle)
210
+
211
+
212
+ def _draw_strand_arrows_generic(
213
+ ax: Any,
214
+ backend: Any,
215
+ gene: pd.Series,
216
+ gene_start: int,
217
+ gene_end: int,
218
+ y_gene: float,
219
+ region_width: int,
220
+ ) -> None:
221
+ """Draw strand direction arrows using a generic backend."""
222
+ strand = gene["strand"]
223
+ arrow_tip_positions, tri_height, tri_width, arrow_color = _compute_arrow_geometry(
224
+ gene_start, gene_end, region_width, strand
225
+ )
226
+
227
+ # Draw connecting line between arrow centers
228
+ if len(arrow_tip_positions) > 1:
229
+ backend.line(
230
+ ax,
231
+ x=pd.Series([arrow_tip_positions[0], arrow_tip_positions[-1]]),
232
+ y=pd.Series([y_gene, y_gene]),
233
+ color=arrow_color,
234
+ linewidth=1.0,
235
+ zorder=4,
236
+ )
237
+
238
+ for tip_x in arrow_tip_positions:
239
+ if strand == "+":
240
+ base_x = tip_x - tri_width
241
+ else:
242
+ base_x = tip_x + tri_width
243
+
244
+ tri_points = [
245
+ [tip_x, y_gene],
246
+ [base_x, y_gene + tri_height],
247
+ [base_x, y_gene - tri_height],
248
+ ]
249
+
250
+ backend.add_polygon(
251
+ ax,
252
+ tri_points,
253
+ facecolor=arrow_color,
254
+ edgecolor=arrow_color,
255
+ linewidth=0.5,
256
+ zorder=5,
257
+ )
258
+
259
+
114
260
  def plot_gene_track(
115
261
  ax: Axes,
116
262
  genes_df: pd.DataFrame,
@@ -137,12 +283,7 @@ def plot_gene_track(
137
283
  exons_df: Exon annotations with chr, start, end, gene_name
138
284
  columns for drawing exon structure. Optional.
139
285
  """
140
- chrom_str = normalize_chrom(chrom)
141
- region_genes = genes_df[
142
- (genes_df["chr"].astype(str).str.replace("chr", "", regex=False) == chrom_str)
143
- & (genes_df["end"] >= start)
144
- & (genes_df["start"] <= end)
145
- ].copy()
286
+ region_genes = _filter_genes_by_region(genes_df, chrom, start, end)
146
287
 
147
288
  ax.set_xlim(start, end)
148
289
  ax.set_ylabel("")
@@ -178,20 +319,13 @@ def plot_gene_track(
178
319
  top_margin = 0.05 # Minimal space above top label
179
320
  ax.set_ylim(
180
321
  -bottom_margin,
181
- (max_row + 1) * ROW_HEIGHT - ROW_HEIGHT + GENE_AREA + top_margin,
322
+ max_row * ROW_HEIGHT + GENE_AREA + top_margin,
182
323
  )
183
324
 
184
325
  # Filter exons for this region if available
185
326
  region_exons = None
186
327
  if exons_df is not None and not exons_df.empty:
187
- region_exons = exons_df[
188
- (
189
- exons_df["chr"].astype(str).str.replace("chr", "", regex=False)
190
- == chrom_str
191
- )
192
- & (exons_df["end"] >= start)
193
- & (exons_df["start"] <= end)
194
- ].copy()
328
+ region_exons = _filter_genes_by_region(exons_df, chrom, start, end)
195
329
 
196
330
  region_width = end - start
197
331
 
@@ -257,59 +391,11 @@ def plot_gene_track(
257
391
  )
258
392
  )
259
393
 
260
- # Add strand direction triangles (tip, center, tail)
394
+ # Add strand direction triangles
261
395
  if "strand" in gene.index:
262
- strand = gene["strand"]
263
- arrow_dir = 1 if strand == "+" else -1
264
-
265
- # Triangle dimensions
266
- tri_height = EXON_HEIGHT * 0.35
267
- tri_width = region_width * 0.006
268
-
269
- # Arrow positions: front, middle, back (tip positions)
270
- tip_offset = tri_width / 2 # Tiny offset to keep tip inside gene
271
- tail_offset = tri_width * 1.5 # Offset for tail arrow from gene start/end
272
- gene_center = (gene_start + gene_end) / 2
273
- if arrow_dir == 1: # Forward strand
274
- arrow_tip_positions = [
275
- gene_start + tail_offset, # Tail (tip inside gene)
276
- gene_center + tri_width / 2, # Middle (arrow center at gene center)
277
- gene_end - tip_offset, # Tip (near gene end)
278
- ]
279
- arrow_color = "#000000" # Black for forward
280
- else: # Reverse strand
281
- arrow_tip_positions = [
282
- gene_end - tail_offset, # Tail (tip inside gene)
283
- gene_center - tri_width / 2, # Middle (arrow center at gene center)
284
- gene_start + tip_offset, # Tip (near gene start)
285
- ]
286
- arrow_color = "#333333" # Dark grey for reverse
287
-
288
- for tip_x in arrow_tip_positions:
289
- if arrow_dir == 1:
290
- base_x = tip_x - tri_width
291
- tri_points = [
292
- [tip_x, y_gene], # Tip pointing right
293
- [base_x, y_gene + tri_height],
294
- [base_x, y_gene - tri_height],
295
- ]
296
- else:
297
- base_x = tip_x + tri_width
298
- tri_points = [
299
- [tip_x, y_gene], # Tip pointing left
300
- [base_x, y_gene + tri_height],
301
- [base_x, y_gene - tri_height],
302
- ]
303
-
304
- triangle = Polygon(
305
- tri_points,
306
- closed=True,
307
- facecolor=arrow_color,
308
- edgecolor=arrow_color,
309
- linewidth=0.5,
310
- zorder=5,
311
- )
312
- ax.add_patch(triangle)
396
+ _draw_strand_arrows_matplotlib(
397
+ ax, gene, gene_start, gene_end, y_gene, region_width
398
+ )
313
399
 
314
400
  # Add gene name label in the gap above gene
315
401
  if gene_name:
@@ -320,7 +406,7 @@ def plot_gene_track(
320
406
  gene_name,
321
407
  ha="center",
322
408
  va="bottom",
323
- fontsize=5.5,
409
+ fontsize=7,
324
410
  color="#000000",
325
411
  fontweight="medium",
326
412
  style="italic",
@@ -353,12 +439,7 @@ def plot_gene_track_generic(
353
439
  exons_df: Exon annotations with chr, start, end, gene_name
354
440
  columns for drawing exon structure. Optional.
355
441
  """
356
- chrom_str = normalize_chrom(chrom)
357
- region_genes = genes_df[
358
- (genes_df["chr"].astype(str).str.replace("chr", "", regex=False) == chrom_str)
359
- & (genes_df["end"] >= start)
360
- & (genes_df["start"] <= end)
361
- ].copy()
442
+ region_genes = _filter_genes_by_region(genes_df, chrom, start, end)
362
443
 
363
444
  backend.set_xlim(ax, start, end)
364
445
  backend.set_ylabel(ax, "", fontsize=10)
@@ -389,20 +470,13 @@ def plot_gene_track_generic(
389
470
  backend.set_ylim(
390
471
  ax,
391
472
  -bottom_margin,
392
- (max_row + 1) * ROW_HEIGHT - ROW_HEIGHT + GENE_AREA + top_margin,
473
+ max_row * ROW_HEIGHT + GENE_AREA + top_margin,
393
474
  )
394
475
 
395
476
  # Filter exons for this region if available
396
477
  region_exons = None
397
478
  if exons_df is not None and not exons_df.empty:
398
- region_exons = exons_df[
399
- (
400
- exons_df["chr"].astype(str).str.replace("chr", "", regex=False)
401
- == chrom_str
402
- )
403
- & (exons_df["end"] >= start)
404
- & (exons_df["start"] <= end)
405
- ].copy()
479
+ region_exons = _filter_genes_by_region(exons_df, chrom, start, end)
406
480
 
407
481
  region_width = end - start
408
482
 
@@ -465,58 +539,11 @@ def plot_gene_track_generic(
465
539
  zorder=2,
466
540
  )
467
541
 
468
- # Add strand direction triangles (tip, center, tail)
542
+ # Add strand direction triangles
469
543
  if "strand" in gene.index:
470
- strand = gene["strand"]
471
- arrow_dir = 1 if strand == "+" else -1
472
-
473
- # Triangle dimensions
474
- tri_height = EXON_HEIGHT * 0.35
475
- tri_width = region_width * 0.006
476
-
477
- # Arrow positions: front, middle, back (tip positions)
478
- tip_offset = tri_width / 2 # Tiny offset to keep tip inside gene
479
- tail_offset = tri_width * 1.5 # Offset for tail arrow from gene start/end
480
- gene_center = (gene_start + gene_end) / 2
481
- if arrow_dir == 1: # Forward strand
482
- arrow_tip_positions = [
483
- gene_start + tail_offset, # Tail (tip inside gene)
484
- gene_center + tri_width / 2, # Middle (arrow center at gene center)
485
- gene_end - tip_offset, # Tip (near gene end)
486
- ]
487
- arrow_color = "#000000" # Black for forward
488
- else: # Reverse strand
489
- arrow_tip_positions = [
490
- gene_end - tail_offset, # Tail (tip inside gene)
491
- gene_center - tri_width / 2, # Middle (arrow center at gene center)
492
- gene_start + tip_offset, # Tip (near gene start)
493
- ]
494
- arrow_color = "#333333" # Dark grey for reverse
495
-
496
- for tip_x in arrow_tip_positions:
497
- if arrow_dir == 1:
498
- base_x = tip_x - tri_width
499
- tri_points = [
500
- [tip_x, y_gene], # Tip pointing right
501
- [base_x, y_gene + tri_height],
502
- [base_x, y_gene - tri_height],
503
- ]
504
- else:
505
- base_x = tip_x + tri_width
506
- tri_points = [
507
- [tip_x, y_gene], # Tip pointing left
508
- [base_x, y_gene + tri_height],
509
- [base_x, y_gene - tri_height],
510
- ]
511
-
512
- backend.add_polygon(
513
- ax,
514
- tri_points,
515
- facecolor=arrow_color,
516
- edgecolor=arrow_color,
517
- linewidth=0.5,
518
- zorder=5,
519
- )
544
+ _draw_strand_arrows_generic(
545
+ ax, backend, gene, gene_start, gene_end, y_gene, region_width
546
+ )
520
547
 
521
548
  # Add gene name label in the gap above gene
522
549
  if gene_name:
@@ -526,7 +553,7 @@ def plot_gene_track_generic(
526
553
  label_pos,
527
554
  y_label,
528
555
  gene_name,
529
- fontsize=6,
556
+ fontsize=7,
530
557
  ha="center",
531
558
  va="bottom",
532
559
  color="#000000",
pylocuszoom/loaders.py CHANGED
@@ -877,4 +877,6 @@ def load_gwas(
877
877
  if format not in loaders:
878
878
  raise ValueError(f"Unknown format '{format}'. Options: {list(loaders.keys())}")
879
879
 
880
- return loaders[format](filepath, pos_col=pos_col, p_col=p_col, rs_col=rs_col)
880
+ return loaders[format](
881
+ filepath, pos_col=pos_col, p_col=p_col, rs_col=rs_col, **kwargs
882
+ )
pylocuszoom/phewas.py CHANGED
@@ -5,7 +5,7 @@ Validates and prepares phenome-wide association study data for plotting.
5
5
 
6
6
  import pandas as pd
7
7
 
8
- from .utils import ValidationError
8
+ from .validation import DataFrameValidator
9
9
 
10
10
 
11
11
  def validate_phewas_df(
@@ -14,7 +14,7 @@ def validate_phewas_df(
14
14
  p_col: str = "p_value",
15
15
  category_col: str = "category",
16
16
  ) -> None:
17
- """Validate PheWAS DataFrame has required columns.
17
+ """Validate PheWAS DataFrame has required columns and types.
18
18
 
19
19
  Args:
20
20
  df: PheWAS results DataFrame.
@@ -23,13 +23,12 @@ def validate_phewas_df(
23
23
  category_col: Column name for phenotype categories (optional).
24
24
 
25
25
  Raises:
26
- ValidationError: If required columns are missing.
26
+ ValidationError: If required columns are missing or have invalid types.
27
27
  """
28
- required = [phenotype_col, p_col]
29
- missing = [col for col in required if col not in df.columns]
30
-
31
- if missing:
32
- raise ValidationError(
33
- f"PheWAS DataFrame missing required columns: {missing}. "
34
- f"Required: {required}. Found: {list(df.columns)}"
35
- )
28
+ (
29
+ DataFrameValidator(df, "PheWAS DataFrame")
30
+ .require_columns([phenotype_col, p_col])
31
+ .require_numeric([p_col])
32
+ .require_range(p_col, min_val=0, max_val=1, exclusive_min=True)
33
+ .validate()
34
+ )