pycmplot 0.2.0__py3-none-any.whl → 0.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pycmplot/__init__.py +1 -1
- pycmplot/_core.py +99 -24
- pycmplot/annotation.py +48 -45
- pycmplot/cli.py +63 -20
- pycmplot/constants.py +2 -2
- pycmplot/io.py +144 -63
- pycmplot/liftover.py +8 -8
- pycmplot/plotting/circular.py +59 -46
- pycmplot/plotting/linear.py +378 -46
- pycmplot/plotting/qq.py +643 -0
- pycmplot/resources.py +6 -6
- pycmplot/stats.py +6 -6
- {pycmplot-0.2.0.dist-info → pycmplot-0.2.2.dist-info}/METADATA +8 -4
- pycmplot-0.2.2.dist-info/RECORD +22 -0
- {pycmplot-0.2.0.dist-info → pycmplot-0.2.2.dist-info}/licenses/LICENSE +1 -1
- pycmplot-0.2.0.dist-info/RECORD +0 -21
- {pycmplot-0.2.0.dist-info → pycmplot-0.2.2.dist-info}/WHEEL +0 -0
- {pycmplot-0.2.0.dist-info → pycmplot-0.2.2.dist-info}/entry_points.txt +0 -0
- {pycmplot-0.2.0.dist-info → pycmplot-0.2.2.dist-info}/top_level.txt +0 -0
pycmplot/__init__.py
CHANGED
pycmplot/_core.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
CORE_MODULE =
|
|
3
|
+
CORE_MODULE = """
|
|
4
4
|
pycmplot._core
|
|
5
5
|
==============
|
|
6
6
|
|
|
@@ -12,7 +12,7 @@ work to :mod:`pycmplot.io`, :mod:`pycmplot.plotting.linear`, and
|
|
|
12
12
|
All imports are deferred inside :func:`main` so that
|
|
13
13
|
``import pycmplot`` remains fast regardless of the size of the dependency
|
|
14
14
|
tree.
|
|
15
|
-
"""
|
|
15
|
+
"""
|
|
16
16
|
|
|
17
17
|
import logging
|
|
18
18
|
import warnings
|
|
@@ -26,7 +26,7 @@ logger = logging.getLogger(__name__)
|
|
|
26
26
|
|
|
27
27
|
|
|
28
28
|
def main() -> None:
|
|
29
|
-
MAIN =
|
|
29
|
+
MAIN = """Orchestrate the full pycmplot pipeline from the command line.
|
|
30
30
|
|
|
31
31
|
This function is registered as the ``pycmplot`` console-script entry point
|
|
32
32
|
in ``pyproject.toml`` / ``setup.cfg``. It performs the following steps in
|
|
@@ -75,7 +75,7 @@ def main() -> None:
|
|
|
75
75
|
Linear Manhattan plotter called for ``--mode lm`` (default).
|
|
76
76
|
pycmplot.plotting.circular.plot_circular :
|
|
77
77
|
Circular Manhattan plotter called for ``--mode cm``.
|
|
78
|
-
"""
|
|
78
|
+
"""
|
|
79
79
|
|
|
80
80
|
# ------------------------------------------------------------------
|
|
81
81
|
# Deferred imports so ``import pycmplot`` remains fast
|
|
@@ -92,6 +92,7 @@ def main() -> None:
|
|
|
92
92
|
)
|
|
93
93
|
from pycmplot.plotting.linear import plot_linear
|
|
94
94
|
from pycmplot.plotting.circular import plot_circular
|
|
95
|
+
from pycmplot.plotting.qq import plot_qq_combined, plot_qq_separate, plot_qq_overlay
|
|
95
96
|
from pycmplot.resources import ResourceConfig
|
|
96
97
|
|
|
97
98
|
# ------------------------------------------------------------------
|
|
@@ -105,10 +106,18 @@ def main() -> None:
|
|
|
105
106
|
chrom_arg = args.chrom_column
|
|
106
107
|
pos_arg = args.pos_column
|
|
107
108
|
snp_arg = args.snp_column
|
|
108
|
-
build_arg = args.
|
|
109
|
+
build_arg = args.build
|
|
110
|
+
buildc_arg = args.build_column
|
|
109
111
|
labels_raw = args.labels
|
|
110
112
|
pcol_arg = args.pval_column
|
|
111
113
|
logp = args.logp
|
|
114
|
+
qq = args.qq_plot
|
|
115
|
+
qq_separate = args.qq_separate
|
|
116
|
+
qq_ncols = args.qq_ncols
|
|
117
|
+
qq_thin = args.qq_thin
|
|
118
|
+
thin_below = args.thin_below
|
|
119
|
+
qq_max_points = args.qq_max_points
|
|
120
|
+
qq_overlay = args.qq_overlay
|
|
112
121
|
chrom_label_size = args.chrom_label_size
|
|
113
122
|
chrom_label_side = args.chrom_label_side
|
|
114
123
|
track_label_size = args.track_label_size
|
|
@@ -123,13 +132,13 @@ def main() -> None:
|
|
|
123
132
|
point_size = args.point_size
|
|
124
133
|
highlight = args.highlight
|
|
125
134
|
highlight_thresh = args.highlight_thresh
|
|
126
|
-
|
|
135
|
+
highlight_color = args.highlight_color
|
|
127
136
|
highlight_line = args.highlight_line
|
|
128
|
-
|
|
137
|
+
highlight_line_color = args.highlight_line_color
|
|
129
138
|
colors_raw = args.colors
|
|
130
|
-
r_min = args.
|
|
131
|
-
r_max = args.
|
|
132
|
-
pad = args.
|
|
139
|
+
r_min = args.min_radius
|
|
140
|
+
r_max = args.max_radius
|
|
141
|
+
pad = args.circular_track_spacing
|
|
133
142
|
output_format = args.output_format
|
|
134
143
|
output_dir = args.output_dir
|
|
135
144
|
dpi = args.dpi
|
|
@@ -142,18 +151,20 @@ def main() -> None:
|
|
|
142
151
|
|
|
143
152
|
|
|
144
153
|
# ------------------------------------------------------------------
|
|
145
|
-
# Sumstat, labels, colours, track heights str to list
|
|
154
|
+
# Sumstat, labels, colours, track heights [build] str to list
|
|
146
155
|
# ------------------------------------------------------------------
|
|
147
156
|
(
|
|
148
157
|
sum_stats,
|
|
149
158
|
labels,
|
|
150
159
|
colors,
|
|
151
|
-
t_heights
|
|
160
|
+
t_heights,
|
|
161
|
+
builds
|
|
152
162
|
) = strip_comma_separated_input_streams(
|
|
153
163
|
sum_stats = sum_stats_raw,
|
|
154
164
|
labels = labels_raw,
|
|
155
165
|
colors_raw = colors_raw,
|
|
156
166
|
track_heights = track_heights,
|
|
167
|
+
builds = build_arg if build_arg else None,
|
|
157
168
|
)
|
|
158
169
|
|
|
159
170
|
# ------------------------------------------------------------------
|
|
@@ -161,7 +172,8 @@ def main() -> None:
|
|
|
161
172
|
# ------------------------------------------------------------------
|
|
162
173
|
(
|
|
163
174
|
plt_name,
|
|
164
|
-
table_out
|
|
175
|
+
table_out,
|
|
176
|
+
plt_base,
|
|
165
177
|
) = get_output_paths(
|
|
166
178
|
labels,
|
|
167
179
|
mode = mode,
|
|
@@ -182,7 +194,8 @@ def main() -> None:
|
|
|
182
194
|
pos = pos_arg,
|
|
183
195
|
snp = snp_arg,
|
|
184
196
|
pcol = pcol_arg,
|
|
185
|
-
|
|
197
|
+
buildc = buildc_arg,
|
|
198
|
+
build = builds
|
|
186
199
|
)
|
|
187
200
|
|
|
188
201
|
# ------------------------------------------------------------------
|
|
@@ -198,6 +211,7 @@ def main() -> None:
|
|
|
198
211
|
sumstats_loaded,
|
|
199
212
|
hits_table,
|
|
200
213
|
signif_lines,
|
|
214
|
+
pval_dict,
|
|
201
215
|
) = get_sumstats_and_merged_sector_list(
|
|
202
216
|
sum_stats=sum_stats,
|
|
203
217
|
labels=labels,
|
|
@@ -212,6 +226,19 @@ def main() -> None:
|
|
|
212
226
|
resources=resources,
|
|
213
227
|
)
|
|
214
228
|
|
|
229
|
+
# ------------------------------------------------------------------
|
|
230
|
+
# ANNOTATE BY
|
|
231
|
+
# ------------------------------------------------------------------
|
|
232
|
+
if annotate:
|
|
233
|
+
if str(annotate).upper() == "GENE":
|
|
234
|
+
label_col = 'top_gene'
|
|
235
|
+
elif str(annotate).upper() == "SNP":
|
|
236
|
+
label_col = 'SNP'
|
|
237
|
+
else:
|
|
238
|
+
label_col = annotate
|
|
239
|
+
|
|
240
|
+
logger.info(f"Anotate by: {label_col}")
|
|
241
|
+
|
|
215
242
|
# ------------------------------------------------------------------
|
|
216
243
|
# CIRCULAR MANHATTAN
|
|
217
244
|
# ------------------------------------------------------------------
|
|
@@ -224,15 +251,16 @@ def main() -> None:
|
|
|
224
251
|
signif_lines = signif_lines,
|
|
225
252
|
highlight = highlight,
|
|
226
253
|
highlight_thresh = highlight_thresh,
|
|
227
|
-
|
|
254
|
+
highlight_color = highlight_color,
|
|
228
255
|
highlight_line = highlight_line,
|
|
229
|
-
|
|
256
|
+
highlight_line_color = highlight_line_color,
|
|
230
257
|
colors = colors,
|
|
231
258
|
chrom_label_side = chrom_label_side,
|
|
232
259
|
chrom_label_size = chrom_label_size,
|
|
233
260
|
track_label_size = track_label_size,
|
|
234
261
|
track_label_orientation = track_label_orientation,
|
|
235
262
|
annotate = annotate,
|
|
263
|
+
label_col = label_col if annotate else None,
|
|
236
264
|
annotation_size = annotation_size,
|
|
237
265
|
hits_table = hits_table,
|
|
238
266
|
sector_sizes = merged_assoc_sector_sizes,
|
|
@@ -253,30 +281,77 @@ def main() -> None:
|
|
|
253
281
|
else:
|
|
254
282
|
logger.info("Generating LINEAR MANHATTAN Plot ...")
|
|
255
283
|
plot_linear(
|
|
256
|
-
sumstats_loaded
|
|
257
|
-
track_heights
|
|
284
|
+
sumstats_loaded=sumstats_loaded,
|
|
285
|
+
track_heights=t_heights,
|
|
258
286
|
trim_pval=trim_pval,
|
|
259
287
|
logp=True if logp else False,
|
|
260
288
|
point_size=point_size,
|
|
261
289
|
highlight=highlight,
|
|
262
290
|
highlight_thresh=highlight_thresh,
|
|
263
|
-
|
|
264
|
-
highlight_line
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
291
|
+
highlight_color=highlight_color,
|
|
292
|
+
highlight_line=highlight_line,
|
|
293
|
+
highlight_line_color=highlight_line_color,
|
|
294
|
+
annotate=annotate,
|
|
295
|
+
hits_table=hits_table if not hits_table.empty else None,
|
|
296
|
+
label_col=label_col if annotate else None,
|
|
268
297
|
chr_spacing=chr_spacing,
|
|
269
298
|
linear_track_spacing=linear_track_spacing,
|
|
270
299
|
colors=colors,
|
|
271
300
|
signif_lines=signif_lines,
|
|
272
301
|
plot_title=plot_title,
|
|
273
|
-
no_track_labels
|
|
302
|
+
no_track_labels=no_track_labels,
|
|
274
303
|
dpi=dpi,
|
|
275
304
|
output_format=output_format,
|
|
276
305
|
output_dir=output_dir,
|
|
277
306
|
figsize=(15, 9)
|
|
278
307
|
)
|
|
279
308
|
|
|
309
|
+
# ------------------------------------------------------------------
|
|
310
|
+
# QQ PLOT
|
|
311
|
+
# ------------------------------------------------------------------
|
|
312
|
+
if qq and sumstats_loaded:
|
|
313
|
+
logger.info("Generating QQ Plot(s) ...")
|
|
314
|
+
qq_stem = f"{plt_base}_qq"
|
|
315
|
+
|
|
316
|
+
if qq_separate:
|
|
317
|
+
plot_qq_separate(
|
|
318
|
+
pval_dict=pval_dict,
|
|
319
|
+
thin=qq_thin,
|
|
320
|
+
thin_below=thin_below,
|
|
321
|
+
max_points=qq_max_points,
|
|
322
|
+
output_path=qq_stem,
|
|
323
|
+
colors=colors,
|
|
324
|
+
signif_threshold=signif_threshold or 5e-8,
|
|
325
|
+
dpi=dpi,
|
|
326
|
+
fig_format=output_format,
|
|
327
|
+
)
|
|
328
|
+
elif qq_overlay:
|
|
329
|
+
plot_qq_overlay(
|
|
330
|
+
pval_dict=pval_dict,
|
|
331
|
+
thin=qq_thin,
|
|
332
|
+
thin_below=thin_below,
|
|
333
|
+
max_points=qq_max_points,
|
|
334
|
+
colors=colors,
|
|
335
|
+
signif_threshold=signif_threshold or 5e-8,
|
|
336
|
+
dpi=dpi,
|
|
337
|
+
title=plot_title,
|
|
338
|
+
output_path=f"{qq_stem}_overlay",
|
|
339
|
+
fig_format=output_format,
|
|
340
|
+
)
|
|
341
|
+
else:
|
|
342
|
+
plot_qq_combined(
|
|
343
|
+
pval_dict=pval_dict,
|
|
344
|
+
thin=qq_thin,
|
|
345
|
+
thin_below=thin_below,
|
|
346
|
+
max_points=qq_max_points,
|
|
347
|
+
colors=colors,
|
|
348
|
+
ncols=qq_ncols,
|
|
349
|
+
signif_threshold=signif_threshold or 5e-8,
|
|
350
|
+
dpi=dpi,
|
|
351
|
+
title=plot_title,
|
|
352
|
+
output_path=f"{qq_stem}_combined",
|
|
353
|
+
fig_format=output_format,
|
|
354
|
+
)
|
|
280
355
|
|
|
281
356
|
if __name__ == "__main__":
|
|
282
357
|
main()
|
pycmplot/annotation.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
MODULE_DOCSTRING =
|
|
3
|
+
MODULE_DOCSTRING = """
|
|
4
4
|
pycmplot.annotation
|
|
5
5
|
====================
|
|
6
6
|
|
|
@@ -20,7 +20,7 @@ Annotation relies on a bundled Ensembl gene-info TSV (hg38 or hg19). The
|
|
|
20
20
|
file is resolved through :class:`~pycmplot.resources.ResourceConfig`; custom
|
|
21
21
|
paths can be supplied via the ``PYCMPLOT_GENEINFO_HG38`` /
|
|
22
22
|
``PYCMPLOT_GENEINFO_HG19`` environment variables.
|
|
23
|
-
"""
|
|
23
|
+
"""
|
|
24
24
|
|
|
25
25
|
import bisect
|
|
26
26
|
import logging
|
|
@@ -41,7 +41,7 @@ logger = logging.getLogger(__name__)
|
|
|
41
41
|
# ---------------------------------------------------------------------------
|
|
42
42
|
|
|
43
43
|
def _build_genes_dict(genes_df: pd.DataFrame) -> dict:
|
|
44
|
-
BUILD_GENES_DICT =
|
|
44
|
+
BUILD_GENES_DICT = """Build a chromosome-keyed interval dictionary with sorted start positions.
|
|
45
45
|
|
|
46
46
|
Pre-processes the gene reference DataFrame into a structure that supports
|
|
47
47
|
efficient O(log N) binary-search lookup of genes near a query position.
|
|
@@ -67,7 +67,7 @@ def _build_genes_dict(genes_df: pd.DataFrame) -> dict:
|
|
|
67
67
|
-----
|
|
68
68
|
This function is called once per :func:`get_hits_summary_table` invocation;
|
|
69
69
|
the result is passed to :func:`_annotate_variant` for each lead SNP.
|
|
70
|
-
"""
|
|
70
|
+
"""
|
|
71
71
|
|
|
72
72
|
genes_df = genes_df.sort_values(["CHR", "START"])
|
|
73
73
|
genes_dict: dict = {}
|
|
@@ -98,7 +98,7 @@ def _annotate_variant(
|
|
|
98
98
|
window: int = 500_000,
|
|
99
99
|
promoter_window: int = 2_000,
|
|
100
100
|
) -> dict:
|
|
101
|
-
ANNOTATE_VARIANT =
|
|
101
|
+
ANNOTATE_VARIANT = """Return strand-aware nearest-gene annotation for a single variant.
|
|
102
102
|
|
|
103
103
|
Searches the pre-built *genes_dict* within *window* bp of *pos* on
|
|
104
104
|
*chrom*. Reports the nearest upstream and downstream genes (relative to
|
|
@@ -138,7 +138,7 @@ def _annotate_variant(
|
|
|
138
138
|
within *promoter_window* bp upstream of any TSS.
|
|
139
139
|
* ``gene_density`` (int) – number of genes with any overlap in the
|
|
140
140
|
search window.
|
|
141
|
-
"""
|
|
141
|
+
"""
|
|
142
142
|
|
|
143
143
|
_empty = {
|
|
144
144
|
"genic": False,
|
|
@@ -238,7 +238,7 @@ def _annotate_and_prioritize_variant(
|
|
|
238
238
|
promoter_window: int = 2_000,
|
|
239
239
|
biotype_weights: Optional[dict] = None,
|
|
240
240
|
) -> Optional[dict]:
|
|
241
|
-
ANNOTATE_PRIORITIZE =
|
|
241
|
+
ANNOTATE_PRIORITIZE = """Score and rank candidate genes for a single variant using a composite
|
|
242
242
|
priority metric.
|
|
243
243
|
|
|
244
244
|
Builds a candidate gene set within *window* bp of *pos* on *chrom*, then
|
|
@@ -287,7 +287,7 @@ def _annotate_and_prioritize_variant(
|
|
|
287
287
|
For intergenic variants, ``top_gene`` contains the two nearest flanking
|
|
288
288
|
gene symbols joined by ``'-'`` (e.g. ``'HBB-HBD'``) and ``biotype``
|
|
289
289
|
is set to ``'intergenic'``.
|
|
290
|
-
"""
|
|
290
|
+
"""
|
|
291
291
|
|
|
292
292
|
if biotype_weights is None:
|
|
293
293
|
biotype_weights = BIOTYPE_WEIGHTS
|
|
@@ -386,7 +386,7 @@ def _annotate_and_prioritize_variant(
|
|
|
386
386
|
# ---------------------------------------------------------------------------
|
|
387
387
|
|
|
388
388
|
def _clump_by_distance(df: pd.DataFrame, window_kb: int = 500) -> pd.DataFrame:
|
|
389
|
-
CLUMP_BY_DISTANCE =
|
|
389
|
+
CLUMP_BY_DISTANCE = """Reduce a lead-SNP table to one representative SNP per locus.
|
|
390
390
|
|
|
391
391
|
Applies greedy distance-based clumping within each chromosome group,
|
|
392
392
|
starting from the most significant SNP (lowest ``P`` or highest ``logP``).
|
|
@@ -406,7 +406,7 @@ def _clump_by_distance(df: pd.DataFrame, window_kb: int = 500) -> pd.DataFrame:
|
|
|
406
406
|
pandas.DataFrame
|
|
407
407
|
Deduplicated locus representatives sorted by chromosome and position
|
|
408
408
|
(natural sort order).
|
|
409
|
-
"""
|
|
409
|
+
"""
|
|
410
410
|
|
|
411
411
|
window = window_kb * 1000
|
|
412
412
|
clumped: list[pd.Series] = []
|
|
@@ -438,7 +438,7 @@ def get_hits_summary_table(
|
|
|
438
438
|
table_out: Optional[str] = None,
|
|
439
439
|
resources: Optional[ResourceConfig] = None,
|
|
440
440
|
) -> pd.DataFrame:
|
|
441
|
-
GET_HITS_SUMMARY_TABLE =
|
|
441
|
+
GET_HITS_SUMMARY_TABLE = """Annotate lead SNPs with nearest genes and write the locus summary table.
|
|
442
442
|
|
|
443
443
|
For each lead SNP in *leads_df*, runs two complementary annotation passes:
|
|
444
444
|
|
|
@@ -528,51 +528,54 @@ def get_hits_summary_table(
|
|
|
528
528
|
SNP CHR POS top_gene biotype
|
|
529
529
|
0 rs123456 2 60718043 BCL11A protein_coding
|
|
530
530
|
1 rs789012 11 5246696 HBB protein_coding
|
|
531
|
-
"""
|
|
531
|
+
"""
|
|
532
532
|
|
|
533
533
|
if resources is None:
|
|
534
534
|
resources = default_resources
|
|
535
535
|
|
|
536
536
|
# Choose gene info file based on build
|
|
537
|
-
if
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
537
|
+
if 'BUILD' in leads_df.columns:
|
|
538
|
+
if "OLD_POS" not in leads_df.columns and list(set(leads_df["BUILD"])) == ["hg19"]:
|
|
539
|
+
geneinfo_path = resources.require("geneinfo_hg19")
|
|
540
|
+
else:
|
|
541
|
+
geneinfo_path = resources.require("geneinfo_hg38")
|
|
541
542
|
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
543
|
+
logger.info("Loading gene info from: %s", geneinfo_path)
|
|
544
|
+
geneinfo = pd.read_csv(geneinfo_path, header=0, sep="\t")
|
|
545
|
+
genes_dict = _build_genes_dict(geneinfo)
|
|
545
546
|
|
|
546
|
-
|
|
547
|
-
|
|
547
|
+
window = window_kb * 1_000
|
|
548
|
+
records: list[dict] = []
|
|
548
549
|
|
|
549
550
|
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
551
|
+
logger.info("Annotating lead variants and generating hits summary table ...")
|
|
552
|
+
for _, row in leads_df.iterrows():
|
|
553
|
+
annotation = _annotate_variant(
|
|
554
|
+
chrom=row["CHR"],
|
|
555
|
+
pos=row["POS"],
|
|
556
|
+
genes_dict=genes_dict,
|
|
557
|
+
window=window,
|
|
558
|
+
)
|
|
559
|
+
prioritized = _annotate_and_prioritize_variant(
|
|
560
|
+
chrom=row["CHR"],
|
|
561
|
+
pos=row["POS"],
|
|
562
|
+
genes_df=geneinfo,
|
|
563
|
+
lead_snps_df=leads_df,
|
|
564
|
+
window=window,
|
|
565
|
+
)
|
|
565
566
|
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
567
|
+
record = {
|
|
568
|
+
**(row.to_dict()),
|
|
569
|
+
**(annotation if annotation is not None else {}),
|
|
570
|
+
**(prioritized if prioritized is not None else {}),
|
|
571
|
+
}
|
|
572
|
+
records.append(record)
|
|
572
573
|
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
574
|
+
locus_table = pd.DataFrame(records).sort_values(
|
|
575
|
+
["CHR", "POS"], key=natsort.natsort_keygen()
|
|
576
|
+
)
|
|
577
|
+
else:
|
|
578
|
+
locus_table = leads_df
|
|
576
579
|
|
|
577
580
|
if table_out is not None:
|
|
578
581
|
locus_table.to_csv(table_out, index=False, sep="\t", na_rep="None")
|
pycmplot/cli.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
CLI_MODULE =
|
|
3
|
+
CLI_MODULE = """
|
|
4
4
|
pycmplot.cli
|
|
5
5
|
============
|
|
6
6
|
|
|
@@ -15,7 +15,7 @@ Arguments are organised into four groups:
|
|
|
15
15
|
colours, and output format (apply to both plot modes).
|
|
16
16
|
* **Circular Only** — arguments specific to ``--mode cm``.
|
|
17
17
|
* **Linear Only** — arguments specific to ``--mode lm`` (default).
|
|
18
|
-
"""
|
|
18
|
+
"""
|
|
19
19
|
|
|
20
20
|
import argparse
|
|
21
21
|
from pathlib import Path
|
|
@@ -30,7 +30,7 @@ DESCMSG = """
|
|
|
30
30
|
|
|
31
31
|
|
|
32
32
|
def get_arguments(descmsg: str = DESCMSG) -> argparse.Namespace:
|
|
33
|
-
GET_ARGUMENTS =
|
|
33
|
+
GET_ARGUMENTS = """Parse and return command-line arguments for the pycmplot entry point.
|
|
34
34
|
|
|
35
35
|
Parameters
|
|
36
36
|
----------
|
|
@@ -146,8 +146,10 @@ def get_arguments(descmsg: str = DESCMSG) -> argparse.Namespace:
|
|
|
146
146
|
- Description
|
|
147
147
|
* - ``annotate``
|
|
148
148
|
- str
|
|
149
|
-
- Annotation content:
|
|
150
|
-
|
|
149
|
+
- Annotation content: Annotate loci by column in hits table
|
|
150
|
+
``'snp'`` (rsID), ``top_gene``, ``nearest_upstream_gene``, ``nearest_downstream_gene``, etc,
|
|
151
|
+
or ``'gene'`` (let the package decide one of ``top_gene`` or ``nearest_upstream_gene``).
|
|
152
|
+
Default ``'snp'``.
|
|
151
153
|
* - ``annotation_size``
|
|
152
154
|
- float
|
|
153
155
|
- Font size for annotation labels. Default ``6``.
|
|
@@ -263,7 +265,7 @@ def get_arguments(descmsg: str = DESCMSG) -> argparse.Namespace:
|
|
|
263
265
|
--------
|
|
264
266
|
pycmplot._core.main :
|
|
265
267
|
Consumes the :class:`~argparse.Namespace` returned by this function.
|
|
266
|
-
"""
|
|
268
|
+
"""
|
|
267
269
|
|
|
268
270
|
parser = argparse.ArgumentParser(
|
|
269
271
|
prog="pycmplot",
|
|
@@ -293,10 +295,7 @@ def get_arguments(descmsg: str = DESCMSG) -> argparse.Namespace:
|
|
|
293
295
|
),
|
|
294
296
|
required=True, type=str, metavar="str",
|
|
295
297
|
)
|
|
296
|
-
|
|
297
|
-
"-b", "--build_column", required=True, type=str, metavar="str",
|
|
298
|
-
help="Genome build column name (containing hg18/hg19/hg38)."
|
|
299
|
-
)
|
|
298
|
+
|
|
300
299
|
|
|
301
300
|
# ------------------------------------------------------------------
|
|
302
301
|
# Optional
|
|
@@ -329,13 +328,51 @@ def get_arguments(descmsg: str = DESCMSG) -> argparse.Namespace:
|
|
|
329
328
|
help="File delimiter (autodetected if omitted)."
|
|
330
329
|
)
|
|
331
330
|
opt.add_argument(
|
|
332
|
-
"--
|
|
333
|
-
|
|
331
|
+
"-bc", "--build_column", required=False, type=str, metavar="str",
|
|
332
|
+
help=("Name of column containing genome build (hg18/hg19/hg38)."
|
|
333
|
+
"Or use ``--build`` below to supply genome builds per summary stat file."
|
|
334
|
+
))
|
|
335
|
+
opt.add_argument(
|
|
336
|
+
"-b","--build", required=False, type=str, metavar='str',
|
|
337
|
+
help=
|
|
338
|
+
"""Comma-sperated list of genome build of summary stats file(s) listed
|
|
339
|
+
in the same order as sumstats files. e.g. hg19,hg38,hg38,hg19 means:
|
|
340
|
+
file1.txt.gz --> hg19
|
|
341
|
+
file2.txt.gz --> hg38
|
|
342
|
+
file3.tsv --> hg38 ... etc
|
|
343
|
+
"""
|
|
334
344
|
)
|
|
335
345
|
opt.add_argument(
|
|
336
|
-
"
|
|
337
|
-
help="
|
|
346
|
+
"--logp", action="store_true",
|
|
347
|
+
help="Plot −log₁₀(p) instead of raw p-values."
|
|
338
348
|
)
|
|
349
|
+
opt.add_argument("-qq", "--qq_plot", action="store_true",
|
|
350
|
+
help="Generate QQ-plot(s) alongside the Manhattan plot.")
|
|
351
|
+
opt.add_argument("-qq_sep", "--qq_separate", action="store_true",
|
|
352
|
+
help=(
|
|
353
|
+
"Save one QQ-plot file per sumstat instead of a "
|
|
354
|
+
"combined multi-panel figure. Only used when -qq is set."
|
|
355
|
+
))
|
|
356
|
+
opt.add_argument("-qq_cols", "--qq_ncols", default=3, type=int, metavar="int",
|
|
357
|
+
help="Number of columns in the combined QQ-plot grid (default: 3).")
|
|
358
|
+
opt.add_argument("-qq_thin", "--qq_thin", action="store_true", default=False,
|
|
359
|
+
help=(
|
|
360
|
+
"Thin null-like p-values before QQ plotting for speed (default: off)."
|
|
361
|
+
"Include this flag to turn on for speed."
|
|
362
|
+
))
|
|
363
|
+
opt.add_argument("-thin_below", "--thin_below", type=float, metavar="float", default=0.01,
|
|
364
|
+
help=(
|
|
365
|
+
"P-value threshold below which all points are always kept."
|
|
366
|
+
"Points above this threshold are downsampled (default: 0.01)."
|
|
367
|
+
))
|
|
368
|
+
opt.add_argument("-qq_max_pts", "--qq_max_points", default=50000, type=int, metavar="int",
|
|
369
|
+
help="Max points to plot per QQ track after thinning (default: 50000).")
|
|
370
|
+
opt.add_argument("-qq_ov", "--qq_overlay", action="store_true",
|
|
371
|
+
help=(
|
|
372
|
+
"Plot all sumstats on a single overlaid QQ-plot, "
|
|
373
|
+
"each coloured by label with lambda in the legend. "
|
|
374
|
+
"Only used when -qq is set."
|
|
375
|
+
))
|
|
339
376
|
opt.add_argument(
|
|
340
377
|
"-tp", "--trim_pval", type=float, metavar="float",
|
|
341
378
|
help="Trim variants with p > this value before plotting."
|
|
@@ -355,11 +392,17 @@ def get_arguments(descmsg: str = DESCMSG) -> argparse.Namespace:
|
|
|
355
392
|
default=None, const=1e-5, nargs="?", type=float, metavar="float",
|
|
356
393
|
help="Suggestive significance threshold (default: 1e-5)."
|
|
357
394
|
)
|
|
395
|
+
|
|
396
|
+
# CLASS TO HANDLE ANNOTATION VALUES NOT IN CHOICE LIST
|
|
397
|
+
class AllowAll(list):
|
|
398
|
+
def __contains__(self, item):
|
|
399
|
+
return True
|
|
400
|
+
|
|
358
401
|
opt.add_argument(
|
|
359
402
|
"-a", "--annotate",
|
|
360
|
-
choices=["
|
|
361
|
-
default=
|
|
362
|
-
help="Annotate
|
|
403
|
+
choices=AllowAll(["snp", "gene", "top_gene", "nearest_upstream_gene", "nearest_downstream_gene"]), nargs="?",
|
|
404
|
+
default=None, type=str, metavar="{snp,gene,top_gene,nearest_upstream_gene,nearest_downstream_gene,...}", const="SNP",
|
|
405
|
+
help="Annotate loci by column name in hits table (defaults to 'snp' if provided and no value set)."
|
|
363
406
|
)
|
|
364
407
|
opt.add_argument(
|
|
365
408
|
"-p_size", "--point_size", default=6, type=float, metavar="float",
|
|
@@ -378,7 +421,7 @@ def get_arguments(descmsg: str = DESCMSG) -> argparse.Namespace:
|
|
|
378
421
|
help="P-value threshold for highlighting (default: 5e-8)."
|
|
379
422
|
)
|
|
380
423
|
opt.add_argument(
|
|
381
|
-
"-hc", "--
|
|
424
|
+
"-hc", "--highlight_color", default="brown", type=str, metavar="str",
|
|
382
425
|
help="Color of highlighted positions (default: brown)."
|
|
383
426
|
)
|
|
384
427
|
opt.add_argument(
|
|
@@ -386,7 +429,7 @@ def get_arguments(descmsg: str = DESCMSG) -> argparse.Namespace:
|
|
|
386
429
|
help="Draw vertical dashed lines through highlighted positions."
|
|
387
430
|
)
|
|
388
431
|
opt.add_argument(
|
|
389
|
-
"-hlc", "--
|
|
432
|
+
"-hlc", "--highlight_line_color", default="grey", type=str, metavar="str",
|
|
390
433
|
help="Color of highlight line (default: grey)."
|
|
391
434
|
)
|
|
392
435
|
opt.add_argument(
|
|
@@ -444,7 +487,7 @@ def get_arguments(descmsg: str = DESCMSG) -> argparse.Namespace:
|
|
|
444
487
|
)
|
|
445
488
|
cio.add_argument(
|
|
446
489
|
"-cl_side", "--chrom_label_side", choices=["inside", "outside"],
|
|
447
|
-
nargs="?", default=
|
|
490
|
+
nargs="?", default=None, const="inside", type=str,
|
|
448
491
|
help="Chromosome label placement (default: inside)."
|
|
449
492
|
)
|
|
450
493
|
cio.add_argument(
|
pycmplot/constants.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
CONSTANTS_MODULE =
|
|
1
|
+
CONSTANTS_MODULE = """
|
|
2
2
|
pycmplot.constants
|
|
3
3
|
==================
|
|
4
4
|
|
|
@@ -27,7 +27,7 @@ Notes
|
|
|
27
27
|
``hg38_chr_lengths`` reflects the GRCh38 primary assembly (GCA_000001405).
|
|
28
28
|
Values may differ slightly from builds that include alternate contigs or
|
|
29
29
|
patches.
|
|
30
|
-
"""
|
|
30
|
+
"""
|
|
31
31
|
|
|
32
32
|
# ---------------------------------------------------------------------------
|
|
33
33
|
# hg38 chromosome lengths (GRCh38)
|