pylocuszoom 0.6.0__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pylocuszoom/__init__.py +34 -7
- pylocuszoom/backends/__init__.py +116 -17
- pylocuszoom/backends/base.py +363 -60
- pylocuszoom/backends/bokeh_backend.py +77 -15
- pylocuszoom/backends/hover.py +198 -0
- pylocuszoom/backends/matplotlib_backend.py +263 -3
- pylocuszoom/backends/plotly_backend.py +73 -16
- pylocuszoom/config.py +365 -0
- pylocuszoom/ensembl.py +476 -0
- pylocuszoom/eqtl.py +17 -25
- pylocuszoom/exceptions.py +33 -0
- pylocuszoom/finemapping.py +18 -32
- pylocuszoom/forest.py +10 -11
- pylocuszoom/gene_track.py +169 -142
- pylocuszoom/loaders.py +3 -1
- pylocuszoom/phewas.py +10 -11
- pylocuszoom/plotter.py +311 -277
- pylocuszoom/recombination.py +19 -3
- pylocuszoom/schemas.py +1 -6
- pylocuszoom/utils.py +54 -4
- pylocuszoom/validation.py +223 -0
- {pylocuszoom-0.6.0.dist-info → pylocuszoom-1.0.0.dist-info}/METADATA +82 -37
- pylocuszoom-1.0.0.dist-info/RECORD +31 -0
- pylocuszoom-0.6.0.dist-info/RECORD +0 -26
- {pylocuszoom-0.6.0.dist-info → pylocuszoom-1.0.0.dist-info}/WHEEL +0 -0
- {pylocuszoom-0.6.0.dist-info → pylocuszoom-1.0.0.dist-info}/licenses/LICENSE.md +0 -0
pylocuszoom/gene_track.py
CHANGED
|
@@ -28,6 +28,10 @@ GENE_AREA = 0.25 # Bottom portion for gene drawing
|
|
|
28
28
|
EXON_HEIGHT = 0.20 # Exon rectangle height
|
|
29
29
|
INTRON_HEIGHT = 0.02 # Thin intron line
|
|
30
30
|
|
|
31
|
+
# Arrow dimensions (pre-computed for clarity)
|
|
32
|
+
ARROW_HEIGHT_RATIO = 0.2625 # EXON_HEIGHT * 0.35 * 0.75 (75% of original height)
|
|
33
|
+
ARROW_WIDTH_RATIO = 0.0066 # region_width * 0.006 * 1.1 (10% wider than original)
|
|
34
|
+
|
|
31
35
|
|
|
32
36
|
def assign_gene_positions(genes_df: pd.DataFrame, start: int, end: int) -> List[int]:
|
|
33
37
|
"""Assign row indices to genes to minimize overlap.
|
|
@@ -44,22 +48,23 @@ def assign_gene_positions(genes_df: pd.DataFrame, start: int, end: int) -> List[
|
|
|
44
48
|
List of integer row indices (0, 1, 2, ...) for each gene.
|
|
45
49
|
"""
|
|
46
50
|
positions = []
|
|
47
|
-
|
|
51
|
+
# Track the rightmost end position for each row (including label buffer)
|
|
52
|
+
row_ends: dict[int, int] = {} # row -> rightmost end position
|
|
48
53
|
region_width = end - start
|
|
54
|
+
label_buffer = region_width * 0.08 # Extra space for labels
|
|
49
55
|
|
|
50
56
|
for _, gene in genes_df.iterrows():
|
|
51
57
|
gene_start = max(gene["start"], start)
|
|
52
58
|
gene_end = min(gene["end"], end)
|
|
53
59
|
|
|
54
|
-
# Find first available row
|
|
60
|
+
# Find first available row where gene doesn't overlap
|
|
55
61
|
row = 0
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
if occ_row == row and occ_end > gene_start - label_buffer:
|
|
59
|
-
row = occ_row + 1
|
|
62
|
+
while row in row_ends and row_ends[row] > gene_start - label_buffer:
|
|
63
|
+
row += 1
|
|
60
64
|
|
|
61
65
|
positions.append(row)
|
|
62
|
-
|
|
66
|
+
# Update the row's end position (including buffer for next gene check)
|
|
67
|
+
row_ends[row] = gene_end
|
|
63
68
|
|
|
64
69
|
return positions
|
|
65
70
|
|
|
@@ -111,6 +116,147 @@ def get_nearest_gene(
|
|
|
111
116
|
return nearby.loc[nearby["dist"].idxmin(), "gene_name"]
|
|
112
117
|
|
|
113
118
|
|
|
119
|
+
def _filter_genes_by_region(
|
|
120
|
+
df: pd.DataFrame, chrom: Union[int, str], start: int, end: int
|
|
121
|
+
) -> pd.DataFrame:
|
|
122
|
+
"""Filter a DataFrame to genes/exons within a genomic region."""
|
|
123
|
+
chrom_str = normalize_chrom(chrom)
|
|
124
|
+
return df[
|
|
125
|
+
(df["chr"].astype(str).str.replace("chr", "", regex=False) == chrom_str)
|
|
126
|
+
& (df["end"] >= start)
|
|
127
|
+
& (df["start"] <= end)
|
|
128
|
+
].copy()
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def _compute_arrow_geometry(
|
|
132
|
+
gene_start: int, gene_end: int, region_width: int, strand: str
|
|
133
|
+
) -> tuple[list[float], float, float, str]:
|
|
134
|
+
"""Compute arrow tip positions and dimensions for strand arrows.
|
|
135
|
+
|
|
136
|
+
Returns:
|
|
137
|
+
Tuple of (arrow_tip_positions, tri_height, tri_width, arrow_color).
|
|
138
|
+
"""
|
|
139
|
+
tri_height = EXON_HEIGHT * ARROW_HEIGHT_RATIO
|
|
140
|
+
tri_width = region_width * ARROW_WIDTH_RATIO
|
|
141
|
+
|
|
142
|
+
tip_offset = tri_width / 2
|
|
143
|
+
tail_offset = tri_width * 1.5
|
|
144
|
+
gene_center = (gene_start + gene_end) / 2
|
|
145
|
+
|
|
146
|
+
if strand == "+":
|
|
147
|
+
arrow_tip_positions = [
|
|
148
|
+
gene_start + tail_offset,
|
|
149
|
+
gene_center + tri_width / 2,
|
|
150
|
+
gene_end - tip_offset,
|
|
151
|
+
]
|
|
152
|
+
arrow_color = "#000000"
|
|
153
|
+
else:
|
|
154
|
+
arrow_tip_positions = [
|
|
155
|
+
gene_end - tail_offset,
|
|
156
|
+
gene_center - tri_width / 2,
|
|
157
|
+
gene_start + tip_offset,
|
|
158
|
+
]
|
|
159
|
+
arrow_color = "#333333"
|
|
160
|
+
|
|
161
|
+
return arrow_tip_positions, tri_height, tri_width, arrow_color
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def _draw_strand_arrows_matplotlib(
|
|
165
|
+
ax: Axes,
|
|
166
|
+
gene: pd.Series,
|
|
167
|
+
gene_start: int,
|
|
168
|
+
gene_end: int,
|
|
169
|
+
y_gene: float,
|
|
170
|
+
region_width: int,
|
|
171
|
+
) -> None:
|
|
172
|
+
"""Draw strand direction arrows using matplotlib."""
|
|
173
|
+
strand = gene["strand"]
|
|
174
|
+
arrow_tip_positions, tri_height, tri_width, arrow_color = _compute_arrow_geometry(
|
|
175
|
+
gene_start, gene_end, region_width, strand
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
# Draw connecting line between arrow centers
|
|
179
|
+
if len(arrow_tip_positions) > 1:
|
|
180
|
+
ax.plot(
|
|
181
|
+
[arrow_tip_positions[0], arrow_tip_positions[-1]],
|
|
182
|
+
[y_gene, y_gene],
|
|
183
|
+
color=arrow_color,
|
|
184
|
+
linewidth=1.0,
|
|
185
|
+
zorder=4,
|
|
186
|
+
solid_capstyle="butt",
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
for tip_x in arrow_tip_positions:
|
|
190
|
+
if strand == "+":
|
|
191
|
+
base_x = tip_x - tri_width
|
|
192
|
+
else:
|
|
193
|
+
base_x = tip_x + tri_width
|
|
194
|
+
|
|
195
|
+
tri_points = [
|
|
196
|
+
[tip_x, y_gene],
|
|
197
|
+
[base_x, y_gene + tri_height],
|
|
198
|
+
[base_x, y_gene - tri_height],
|
|
199
|
+
]
|
|
200
|
+
|
|
201
|
+
triangle = Polygon(
|
|
202
|
+
tri_points,
|
|
203
|
+
closed=True,
|
|
204
|
+
facecolor=arrow_color,
|
|
205
|
+
edgecolor=arrow_color,
|
|
206
|
+
linewidth=0.5,
|
|
207
|
+
zorder=5,
|
|
208
|
+
)
|
|
209
|
+
ax.add_patch(triangle)
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
def _draw_strand_arrows_generic(
|
|
213
|
+
ax: Any,
|
|
214
|
+
backend: Any,
|
|
215
|
+
gene: pd.Series,
|
|
216
|
+
gene_start: int,
|
|
217
|
+
gene_end: int,
|
|
218
|
+
y_gene: float,
|
|
219
|
+
region_width: int,
|
|
220
|
+
) -> None:
|
|
221
|
+
"""Draw strand direction arrows using a generic backend."""
|
|
222
|
+
strand = gene["strand"]
|
|
223
|
+
arrow_tip_positions, tri_height, tri_width, arrow_color = _compute_arrow_geometry(
|
|
224
|
+
gene_start, gene_end, region_width, strand
|
|
225
|
+
)
|
|
226
|
+
|
|
227
|
+
# Draw connecting line between arrow centers
|
|
228
|
+
if len(arrow_tip_positions) > 1:
|
|
229
|
+
backend.line(
|
|
230
|
+
ax,
|
|
231
|
+
x=pd.Series([arrow_tip_positions[0], arrow_tip_positions[-1]]),
|
|
232
|
+
y=pd.Series([y_gene, y_gene]),
|
|
233
|
+
color=arrow_color,
|
|
234
|
+
linewidth=1.0,
|
|
235
|
+
zorder=4,
|
|
236
|
+
)
|
|
237
|
+
|
|
238
|
+
for tip_x in arrow_tip_positions:
|
|
239
|
+
if strand == "+":
|
|
240
|
+
base_x = tip_x - tri_width
|
|
241
|
+
else:
|
|
242
|
+
base_x = tip_x + tri_width
|
|
243
|
+
|
|
244
|
+
tri_points = [
|
|
245
|
+
[tip_x, y_gene],
|
|
246
|
+
[base_x, y_gene + tri_height],
|
|
247
|
+
[base_x, y_gene - tri_height],
|
|
248
|
+
]
|
|
249
|
+
|
|
250
|
+
backend.add_polygon(
|
|
251
|
+
ax,
|
|
252
|
+
tri_points,
|
|
253
|
+
facecolor=arrow_color,
|
|
254
|
+
edgecolor=arrow_color,
|
|
255
|
+
linewidth=0.5,
|
|
256
|
+
zorder=5,
|
|
257
|
+
)
|
|
258
|
+
|
|
259
|
+
|
|
114
260
|
def plot_gene_track(
|
|
115
261
|
ax: Axes,
|
|
116
262
|
genes_df: pd.DataFrame,
|
|
@@ -137,12 +283,7 @@ def plot_gene_track(
|
|
|
137
283
|
exons_df: Exon annotations with chr, start, end, gene_name
|
|
138
284
|
columns for drawing exon structure. Optional.
|
|
139
285
|
"""
|
|
140
|
-
|
|
141
|
-
region_genes = genes_df[
|
|
142
|
-
(genes_df["chr"].astype(str).str.replace("chr", "", regex=False) == chrom_str)
|
|
143
|
-
& (genes_df["end"] >= start)
|
|
144
|
-
& (genes_df["start"] <= end)
|
|
145
|
-
].copy()
|
|
286
|
+
region_genes = _filter_genes_by_region(genes_df, chrom, start, end)
|
|
146
287
|
|
|
147
288
|
ax.set_xlim(start, end)
|
|
148
289
|
ax.set_ylabel("")
|
|
@@ -178,20 +319,13 @@ def plot_gene_track(
|
|
|
178
319
|
top_margin = 0.05 # Minimal space above top label
|
|
179
320
|
ax.set_ylim(
|
|
180
321
|
-bottom_margin,
|
|
181
|
-
|
|
322
|
+
max_row * ROW_HEIGHT + GENE_AREA + top_margin,
|
|
182
323
|
)
|
|
183
324
|
|
|
184
325
|
# Filter exons for this region if available
|
|
185
326
|
region_exons = None
|
|
186
327
|
if exons_df is not None and not exons_df.empty:
|
|
187
|
-
region_exons = exons_df
|
|
188
|
-
(
|
|
189
|
-
exons_df["chr"].astype(str).str.replace("chr", "", regex=False)
|
|
190
|
-
== chrom_str
|
|
191
|
-
)
|
|
192
|
-
& (exons_df["end"] >= start)
|
|
193
|
-
& (exons_df["start"] <= end)
|
|
194
|
-
].copy()
|
|
328
|
+
region_exons = _filter_genes_by_region(exons_df, chrom, start, end)
|
|
195
329
|
|
|
196
330
|
region_width = end - start
|
|
197
331
|
|
|
@@ -257,59 +391,11 @@ def plot_gene_track(
|
|
|
257
391
|
)
|
|
258
392
|
)
|
|
259
393
|
|
|
260
|
-
# Add strand direction triangles
|
|
394
|
+
# Add strand direction triangles
|
|
261
395
|
if "strand" in gene.index:
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
# Triangle dimensions
|
|
266
|
-
tri_height = EXON_HEIGHT * 0.35
|
|
267
|
-
tri_width = region_width * 0.006
|
|
268
|
-
|
|
269
|
-
# Arrow positions: front, middle, back (tip positions)
|
|
270
|
-
tip_offset = tri_width / 2 # Tiny offset to keep tip inside gene
|
|
271
|
-
tail_offset = tri_width * 1.5 # Offset for tail arrow from gene start/end
|
|
272
|
-
gene_center = (gene_start + gene_end) / 2
|
|
273
|
-
if arrow_dir == 1: # Forward strand
|
|
274
|
-
arrow_tip_positions = [
|
|
275
|
-
gene_start + tail_offset, # Tail (tip inside gene)
|
|
276
|
-
gene_center + tri_width / 2, # Middle (arrow center at gene center)
|
|
277
|
-
gene_end - tip_offset, # Tip (near gene end)
|
|
278
|
-
]
|
|
279
|
-
arrow_color = "#000000" # Black for forward
|
|
280
|
-
else: # Reverse strand
|
|
281
|
-
arrow_tip_positions = [
|
|
282
|
-
gene_end - tail_offset, # Tail (tip inside gene)
|
|
283
|
-
gene_center - tri_width / 2, # Middle (arrow center at gene center)
|
|
284
|
-
gene_start + tip_offset, # Tip (near gene start)
|
|
285
|
-
]
|
|
286
|
-
arrow_color = "#333333" # Dark grey for reverse
|
|
287
|
-
|
|
288
|
-
for tip_x in arrow_tip_positions:
|
|
289
|
-
if arrow_dir == 1:
|
|
290
|
-
base_x = tip_x - tri_width
|
|
291
|
-
tri_points = [
|
|
292
|
-
[tip_x, y_gene], # Tip pointing right
|
|
293
|
-
[base_x, y_gene + tri_height],
|
|
294
|
-
[base_x, y_gene - tri_height],
|
|
295
|
-
]
|
|
296
|
-
else:
|
|
297
|
-
base_x = tip_x + tri_width
|
|
298
|
-
tri_points = [
|
|
299
|
-
[tip_x, y_gene], # Tip pointing left
|
|
300
|
-
[base_x, y_gene + tri_height],
|
|
301
|
-
[base_x, y_gene - tri_height],
|
|
302
|
-
]
|
|
303
|
-
|
|
304
|
-
triangle = Polygon(
|
|
305
|
-
tri_points,
|
|
306
|
-
closed=True,
|
|
307
|
-
facecolor=arrow_color,
|
|
308
|
-
edgecolor=arrow_color,
|
|
309
|
-
linewidth=0.5,
|
|
310
|
-
zorder=5,
|
|
311
|
-
)
|
|
312
|
-
ax.add_patch(triangle)
|
|
396
|
+
_draw_strand_arrows_matplotlib(
|
|
397
|
+
ax, gene, gene_start, gene_end, y_gene, region_width
|
|
398
|
+
)
|
|
313
399
|
|
|
314
400
|
# Add gene name label in the gap above gene
|
|
315
401
|
if gene_name:
|
|
@@ -320,7 +406,7 @@ def plot_gene_track(
|
|
|
320
406
|
gene_name,
|
|
321
407
|
ha="center",
|
|
322
408
|
va="bottom",
|
|
323
|
-
fontsize=
|
|
409
|
+
fontsize=7,
|
|
324
410
|
color="#000000",
|
|
325
411
|
fontweight="medium",
|
|
326
412
|
style="italic",
|
|
@@ -353,12 +439,7 @@ def plot_gene_track_generic(
|
|
|
353
439
|
exons_df: Exon annotations with chr, start, end, gene_name
|
|
354
440
|
columns for drawing exon structure. Optional.
|
|
355
441
|
"""
|
|
356
|
-
|
|
357
|
-
region_genes = genes_df[
|
|
358
|
-
(genes_df["chr"].astype(str).str.replace("chr", "", regex=False) == chrom_str)
|
|
359
|
-
& (genes_df["end"] >= start)
|
|
360
|
-
& (genes_df["start"] <= end)
|
|
361
|
-
].copy()
|
|
442
|
+
region_genes = _filter_genes_by_region(genes_df, chrom, start, end)
|
|
362
443
|
|
|
363
444
|
backend.set_xlim(ax, start, end)
|
|
364
445
|
backend.set_ylabel(ax, "", fontsize=10)
|
|
@@ -389,20 +470,13 @@ def plot_gene_track_generic(
|
|
|
389
470
|
backend.set_ylim(
|
|
390
471
|
ax,
|
|
391
472
|
-bottom_margin,
|
|
392
|
-
|
|
473
|
+
max_row * ROW_HEIGHT + GENE_AREA + top_margin,
|
|
393
474
|
)
|
|
394
475
|
|
|
395
476
|
# Filter exons for this region if available
|
|
396
477
|
region_exons = None
|
|
397
478
|
if exons_df is not None and not exons_df.empty:
|
|
398
|
-
region_exons = exons_df
|
|
399
|
-
(
|
|
400
|
-
exons_df["chr"].astype(str).str.replace("chr", "", regex=False)
|
|
401
|
-
== chrom_str
|
|
402
|
-
)
|
|
403
|
-
& (exons_df["end"] >= start)
|
|
404
|
-
& (exons_df["start"] <= end)
|
|
405
|
-
].copy()
|
|
479
|
+
region_exons = _filter_genes_by_region(exons_df, chrom, start, end)
|
|
406
480
|
|
|
407
481
|
region_width = end - start
|
|
408
482
|
|
|
@@ -465,58 +539,11 @@ def plot_gene_track_generic(
|
|
|
465
539
|
zorder=2,
|
|
466
540
|
)
|
|
467
541
|
|
|
468
|
-
# Add strand direction triangles
|
|
542
|
+
# Add strand direction triangles
|
|
469
543
|
if "strand" in gene.index:
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
# Triangle dimensions
|
|
474
|
-
tri_height = EXON_HEIGHT * 0.35
|
|
475
|
-
tri_width = region_width * 0.006
|
|
476
|
-
|
|
477
|
-
# Arrow positions: front, middle, back (tip positions)
|
|
478
|
-
tip_offset = tri_width / 2 # Tiny offset to keep tip inside gene
|
|
479
|
-
tail_offset = tri_width * 1.5 # Offset for tail arrow from gene start/end
|
|
480
|
-
gene_center = (gene_start + gene_end) / 2
|
|
481
|
-
if arrow_dir == 1: # Forward strand
|
|
482
|
-
arrow_tip_positions = [
|
|
483
|
-
gene_start + tail_offset, # Tail (tip inside gene)
|
|
484
|
-
gene_center + tri_width / 2, # Middle (arrow center at gene center)
|
|
485
|
-
gene_end - tip_offset, # Tip (near gene end)
|
|
486
|
-
]
|
|
487
|
-
arrow_color = "#000000" # Black for forward
|
|
488
|
-
else: # Reverse strand
|
|
489
|
-
arrow_tip_positions = [
|
|
490
|
-
gene_end - tail_offset, # Tail (tip inside gene)
|
|
491
|
-
gene_center - tri_width / 2, # Middle (arrow center at gene center)
|
|
492
|
-
gene_start + tip_offset, # Tip (near gene start)
|
|
493
|
-
]
|
|
494
|
-
arrow_color = "#333333" # Dark grey for reverse
|
|
495
|
-
|
|
496
|
-
for tip_x in arrow_tip_positions:
|
|
497
|
-
if arrow_dir == 1:
|
|
498
|
-
base_x = tip_x - tri_width
|
|
499
|
-
tri_points = [
|
|
500
|
-
[tip_x, y_gene], # Tip pointing right
|
|
501
|
-
[base_x, y_gene + tri_height],
|
|
502
|
-
[base_x, y_gene - tri_height],
|
|
503
|
-
]
|
|
504
|
-
else:
|
|
505
|
-
base_x = tip_x + tri_width
|
|
506
|
-
tri_points = [
|
|
507
|
-
[tip_x, y_gene], # Tip pointing left
|
|
508
|
-
[base_x, y_gene + tri_height],
|
|
509
|
-
[base_x, y_gene - tri_height],
|
|
510
|
-
]
|
|
511
|
-
|
|
512
|
-
backend.add_polygon(
|
|
513
|
-
ax,
|
|
514
|
-
tri_points,
|
|
515
|
-
facecolor=arrow_color,
|
|
516
|
-
edgecolor=arrow_color,
|
|
517
|
-
linewidth=0.5,
|
|
518
|
-
zorder=5,
|
|
519
|
-
)
|
|
544
|
+
_draw_strand_arrows_generic(
|
|
545
|
+
ax, backend, gene, gene_start, gene_end, y_gene, region_width
|
|
546
|
+
)
|
|
520
547
|
|
|
521
548
|
# Add gene name label in the gap above gene
|
|
522
549
|
if gene_name:
|
|
@@ -526,7 +553,7 @@ def plot_gene_track_generic(
|
|
|
526
553
|
label_pos,
|
|
527
554
|
y_label,
|
|
528
555
|
gene_name,
|
|
529
|
-
fontsize=
|
|
556
|
+
fontsize=7,
|
|
530
557
|
ha="center",
|
|
531
558
|
va="bottom",
|
|
532
559
|
color="#000000",
|
pylocuszoom/loaders.py
CHANGED
|
@@ -877,4 +877,6 @@ def load_gwas(
|
|
|
877
877
|
if format not in loaders:
|
|
878
878
|
raise ValueError(f"Unknown format '{format}'. Options: {list(loaders.keys())}")
|
|
879
879
|
|
|
880
|
-
return loaders[format](
|
|
880
|
+
return loaders[format](
|
|
881
|
+
filepath, pos_col=pos_col, p_col=p_col, rs_col=rs_col, **kwargs
|
|
882
|
+
)
|
pylocuszoom/phewas.py
CHANGED
|
@@ -5,7 +5,7 @@ Validates and prepares phenome-wide association study data for plotting.
|
|
|
5
5
|
|
|
6
6
|
import pandas as pd
|
|
7
7
|
|
|
8
|
-
from .
|
|
8
|
+
from .validation import DataFrameValidator
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
def validate_phewas_df(
|
|
@@ -14,7 +14,7 @@ def validate_phewas_df(
|
|
|
14
14
|
p_col: str = "p_value",
|
|
15
15
|
category_col: str = "category",
|
|
16
16
|
) -> None:
|
|
17
|
-
"""Validate PheWAS DataFrame has required columns.
|
|
17
|
+
"""Validate PheWAS DataFrame has required columns and types.
|
|
18
18
|
|
|
19
19
|
Args:
|
|
20
20
|
df: PheWAS results DataFrame.
|
|
@@ -23,13 +23,12 @@ def validate_phewas_df(
|
|
|
23
23
|
category_col: Column name for phenotype categories (optional).
|
|
24
24
|
|
|
25
25
|
Raises:
|
|
26
|
-
ValidationError: If required columns are missing.
|
|
26
|
+
ValidationError: If required columns are missing or have invalid types.
|
|
27
27
|
"""
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
)
|
|
28
|
+
(
|
|
29
|
+
DataFrameValidator(df, "PheWAS DataFrame")
|
|
30
|
+
.require_columns([phenotype_col, p_col])
|
|
31
|
+
.require_numeric([p_col])
|
|
32
|
+
.require_range(p_col, min_val=0, max_val=1, exclusive_min=True)
|
|
33
|
+
.validate()
|
|
34
|
+
)
|