pycmplot 0.1.6__tar.gz → 0.1.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pycmplot-0.1.6/pycmplot.egg-info → pycmplot-0.1.7}/PKG-INFO +1 -1
- {pycmplot-0.1.6 → pycmplot-0.1.7}/pycmplot/__init__.py +4 -2
- pycmplot-0.1.7/pycmplot/_core.py +218 -0
- {pycmplot-0.1.6 → pycmplot-0.1.7}/pycmplot/cli.py +7 -0
- {pycmplot-0.1.6 → pycmplot-0.1.7}/pycmplot/io.py +203 -19
- pycmplot-0.1.7/pycmplot/plotting/circular.py +489 -0
- {pycmplot-0.1.6 → pycmplot-0.1.7}/pycmplot/plotting/linear.py +125 -42
- {pycmplot-0.1.6 → pycmplot-0.1.7}/pycmplot/stats.py +4 -4
- {pycmplot-0.1.6 → pycmplot-0.1.7/pycmplot.egg-info}/PKG-INFO +1 -1
- {pycmplot-0.1.6 → pycmplot-0.1.7}/pyproject.toml +1 -1
- {pycmplot-0.1.6 → pycmplot-0.1.7}/setup.cfg +1 -1
- pycmplot-0.1.6/pycmplot/_core.py +0 -419
- pycmplot-0.1.6/pycmplot/plotting/circular.py +0 -261
- {pycmplot-0.1.6 → pycmplot-0.1.7}/LICENSE +0 -0
- {pycmplot-0.1.6 → pycmplot-0.1.7}/LICENSE.mit +0 -0
- {pycmplot-0.1.6 → pycmplot-0.1.7}/README.md +0 -0
- {pycmplot-0.1.6 → pycmplot-0.1.7}/pycmplot/annotation.py +0 -0
- {pycmplot-0.1.6 → pycmplot-0.1.7}/pycmplot/constants.py +0 -0
- {pycmplot-0.1.6 → pycmplot-0.1.7}/pycmplot/data/Homo_sapiens.GRCh37.geneinfo.tsv.gz +0 -0
- {pycmplot-0.1.6 → pycmplot-0.1.7}/pycmplot/data/Homo_sapiens.GRCh38.geneinfo.tsv.gz +0 -0
- {pycmplot-0.1.6 → pycmplot-0.1.7}/pycmplot/data/hg19ToHg38.over.chain +0 -0
- {pycmplot-0.1.6 → pycmplot-0.1.7}/pycmplot/liftover.py +0 -0
- {pycmplot-0.1.6 → pycmplot-0.1.7}/pycmplot/resources.py +0 -0
- {pycmplot-0.1.6 → pycmplot-0.1.7}/pycmplot.egg-info/SOURCES.txt +0 -0
- {pycmplot-0.1.6 → pycmplot-0.1.7}/pycmplot.egg-info/dependency_links.txt +0 -0
- {pycmplot-0.1.6 → pycmplot-0.1.7}/pycmplot.egg-info/entry_points.txt +0 -0
- {pycmplot-0.1.6 → pycmplot-0.1.7}/pycmplot.egg-info/requires.txt +0 -0
- {pycmplot-0.1.6 → pycmplot-0.1.7}/pycmplot.egg-info/top_level.txt +0 -0
- {pycmplot-0.1.6 → pycmplot-0.1.7}/setup.py +0 -0
|
@@ -11,6 +11,7 @@ Command-line::
|
|
|
11
11
|
|
|
12
12
|
Python API::
|
|
13
13
|
|
|
14
|
+
from pycmplot.io import prep_pycmplot_input_info, get_sumstats_and_merged_sector_list
|
|
14
15
|
from pycmplot.plotting import plot_linear, plot_circular
|
|
15
16
|
from pycmplot.stats import get_lead_snps
|
|
16
17
|
from pycmplot.annotation import get_hits_summary_table
|
|
@@ -22,7 +23,7 @@ Public surface
|
|
|
22
23
|
from pycmplot.plotting.linear import plot_linear
|
|
23
24
|
from pycmplot.plotting.circular import plot_circular, compute_track_radii_dict
|
|
24
25
|
from pycmplot.stats import get_lead_snps, get_highlight_snps
|
|
25
|
-
from pycmplot.io import get_sumstats_and_merged_sector_list
|
|
26
|
+
from pycmplot.io import prep_pycmplot_input_info, get_sumstats_and_merged_sector_list
|
|
26
27
|
from pycmplot.annotation import get_hits_summary_table
|
|
27
28
|
from pycmplot.constants import hg38_chr_lengths, BIOTYPE_WEIGHTS
|
|
28
29
|
from pycmplot.resources import ResourceConfig
|
|
@@ -33,6 +34,7 @@ __all__ = [
|
|
|
33
34
|
"compute_track_radii_dict",
|
|
34
35
|
"get_lead_snps",
|
|
35
36
|
"get_highlight_snps",
|
|
37
|
+
"prep_pycmplot_input_info",
|
|
36
38
|
"get_sumstats_and_merged_sector_list",
|
|
37
39
|
"get_hits_summary_table",
|
|
38
40
|
"hg38_chr_lengths",
|
|
@@ -40,4 +42,4 @@ __all__ = [
|
|
|
40
42
|
"ResourceConfig",
|
|
41
43
|
]
|
|
42
44
|
|
|
43
|
-
__version__ = "0.1.
|
|
45
|
+
__version__ = "0.1.7"
|
|
@@ -0,0 +1,218 @@
|
|
|
1
|
+
"""
|
|
2
|
+
pycmplot._core
|
|
3
|
+
==============
|
|
4
|
+
Main entry point — orchestrates CLI parsing, data loading, and plotting.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import logging
|
|
10
|
+
import warnings
|
|
11
|
+
|
|
12
|
+
# Suppress noisy font-manager warnings before any matplotlib import
|
|
13
|
+
logging.getLogger("matplotlib.font_manager").setLevel(logging.ERROR)
|
|
14
|
+
warnings.filterwarnings("ignore")
|
|
15
|
+
|
|
16
|
+
logging.basicConfig(level=logging.INFO, format="[%(levelname)s] %(message)s")
|
|
17
|
+
logger = logging.getLogger(__name__)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def main() -> None:
|
|
21
|
+
"""CLI entry point — ``pycmplot`` console script."""
|
|
22
|
+
|
|
23
|
+
# ------------------------------------------------------------------
|
|
24
|
+
# Deferred imports so ``import pycmplot`` remains fast
|
|
25
|
+
# ------------------------------------------------------------------
|
|
26
|
+
from pycmplot.cli import get_arguments, DESCMSG
|
|
27
|
+
from pycmplot.io import (
|
|
28
|
+
get_sumstats_and_merged_sector_list,
|
|
29
|
+
prep_pycmplot_input_info,
|
|
30
|
+
get_output_paths,
|
|
31
|
+
strip_comma_separated_input_streams,
|
|
32
|
+
#detect_delimiter,
|
|
33
|
+
#resolve_delimiter,
|
|
34
|
+
#get_file_header,
|
|
35
|
+
)
|
|
36
|
+
from pycmplot.plotting.linear import plot_linear
|
|
37
|
+
from pycmplot.plotting.circular import plot_circular
|
|
38
|
+
from pycmplot.resources import ResourceConfig
|
|
39
|
+
|
|
40
|
+
# ------------------------------------------------------------------
|
|
41
|
+
# Parse CLI
|
|
42
|
+
# ------------------------------------------------------------------
|
|
43
|
+
args = get_arguments(DESCMSG)
|
|
44
|
+
print(DESCMSG)
|
|
45
|
+
|
|
46
|
+
mode = args.mode
|
|
47
|
+
sum_stats_raw = args.sum_stats
|
|
48
|
+
chrom_arg = args.chrom_column
|
|
49
|
+
pos_arg = args.pos_column
|
|
50
|
+
snp_arg = args.snp_column
|
|
51
|
+
build_arg = args.build_column
|
|
52
|
+
labels_raw = args.labels
|
|
53
|
+
pcol_arg = args.pval_column
|
|
54
|
+
logp = args.logp
|
|
55
|
+
chrom_label_size = args.chrom_label_size
|
|
56
|
+
chrom_label_side = args.chrom_label_side
|
|
57
|
+
track_label_size = args.track_label_size
|
|
58
|
+
track_label_orientation = args.track_label_orientation
|
|
59
|
+
sort_track = args.sort_track
|
|
60
|
+
trim_pval = args.trim_pval
|
|
61
|
+
signif_threshold = args.signif_threshold
|
|
62
|
+
signif_line = args.signif_line
|
|
63
|
+
suggest_threshold= args.suggest_threshold
|
|
64
|
+
annotate = args.annotate
|
|
65
|
+
annotation_size = args.annotation_size
|
|
66
|
+
point_size = args.point_size
|
|
67
|
+
highlight = args.highlight
|
|
68
|
+
highlight_thresh = args.highlight_thresh
|
|
69
|
+
highlight_line = args.highlight_line
|
|
70
|
+
colors_raw = args.colors
|
|
71
|
+
r_min = args.r_min
|
|
72
|
+
r_max = args.r_max
|
|
73
|
+
pad = args.pad
|
|
74
|
+
output_format = args.output_format
|
|
75
|
+
output_dir = args.output_dir
|
|
76
|
+
dpi = args.dpi
|
|
77
|
+
plot_title = args.plot_title
|
|
78
|
+
plot_title_size = args.plot_title_size
|
|
79
|
+
track_heights = args.track_heights
|
|
80
|
+
track_spacing = args.track_spacing
|
|
81
|
+
no_track_labels = args.no_track_labels
|
|
82
|
+
chr_spacing = args.chr_spacing
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
# ------------------------------------------------------------------
|
|
86
|
+
# Sumstat, labels, colours, track heights str to list
|
|
87
|
+
# ------------------------------------------------------------------
|
|
88
|
+
(
|
|
89
|
+
sum_stats,
|
|
90
|
+
labels,
|
|
91
|
+
colors,
|
|
92
|
+
t_heights
|
|
93
|
+
) = strip_comma_separated_input_streams(
|
|
94
|
+
sum_stats = sum_stats_raw,
|
|
95
|
+
labels = labels_raw,
|
|
96
|
+
colors_raw = colors_raw,
|
|
97
|
+
track_heights = track_heights,
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
# ------------------------------------------------------------------
|
|
101
|
+
# Output paths
|
|
102
|
+
# ------------------------------------------------------------------
|
|
103
|
+
(
|
|
104
|
+
plt_name,
|
|
105
|
+
table_out
|
|
106
|
+
) = get_output_paths(
|
|
107
|
+
labels,
|
|
108
|
+
mode = mode,
|
|
109
|
+
logp = logp,
|
|
110
|
+
output_dir = output_dir,
|
|
111
|
+
plot_title = plot_title,
|
|
112
|
+
output_format = output_format
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
# ------------------------------------------------------------------
|
|
116
|
+
# Resolve column names
|
|
117
|
+
# ------------------------------------------------------------------
|
|
118
|
+
sumstats_hdr_dic = prep_pycmplot_input_info(
|
|
119
|
+
sum_stats = sum_stats,
|
|
120
|
+
labels = labels,
|
|
121
|
+
delim = args.delim,
|
|
122
|
+
chrom = chrom_arg,
|
|
123
|
+
pos = pos_arg,
|
|
124
|
+
snp = snp_arg,
|
|
125
|
+
pcol = pcol_arg,
|
|
126
|
+
build = build_arg
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
# ------------------------------------------------------------------
|
|
130
|
+
# ResourceConfig — picks up environment variables automatically
|
|
131
|
+
# ------------------------------------------------------------------
|
|
132
|
+
resources = ResourceConfig()
|
|
133
|
+
|
|
134
|
+
# ------------------------------------------------------------------
|
|
135
|
+
# Load data, compute sectors, get hits table
|
|
136
|
+
# ------------------------------------------------------------------
|
|
137
|
+
(
|
|
138
|
+
merged_assoc_sector_sizes,
|
|
139
|
+
sumstats_loaded,
|
|
140
|
+
hits_table,
|
|
141
|
+
signif_lines,
|
|
142
|
+
) = get_sumstats_and_merged_sector_list(
|
|
143
|
+
sum_stats=sum_stats,
|
|
144
|
+
labels=labels,
|
|
145
|
+
trim_pval=trim_pval,
|
|
146
|
+
logp=logp,
|
|
147
|
+
file_info=sumstats_hdr_dic,
|
|
148
|
+
sort_tracks=sort_track,
|
|
149
|
+
table_out=table_out,
|
|
150
|
+
signif_threshold=signif_threshold,
|
|
151
|
+
signif_line=signif_line,
|
|
152
|
+
suggest_threshold=suggest_threshold,
|
|
153
|
+
resources=resources,
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
# ------------------------------------------------------------------
|
|
157
|
+
# CIRCULAR MANHATTAN
|
|
158
|
+
# ------------------------------------------------------------------
|
|
159
|
+
if mode.upper() == "CM":
|
|
160
|
+
logger.info("Generating CIRCULAR MANHATTAN Plot ...")
|
|
161
|
+
plot_circular(
|
|
162
|
+
sumstats_loaded = sumstats_loaded,
|
|
163
|
+
logp = logp,
|
|
164
|
+
signif_line = signif_line,
|
|
165
|
+
signif_lines = signif_lines,
|
|
166
|
+
highlight = highlight,
|
|
167
|
+
highlight_thresh = highlight_thresh,
|
|
168
|
+
highlight_line = highlight_line,
|
|
169
|
+
colors = colors,
|
|
170
|
+
chrom_label_side = chrom_label_side,
|
|
171
|
+
chrom_label_size = chrom_label_size,
|
|
172
|
+
track_label_size = track_label_size,
|
|
173
|
+
track_label_orientation = track_label_orientation,
|
|
174
|
+
annotate = annotate,
|
|
175
|
+
annotation_size = annotation_size,
|
|
176
|
+
hits_table = hits_table,
|
|
177
|
+
sector_sizes = merged_assoc_sector_sizes,
|
|
178
|
+
pad = pad,
|
|
179
|
+
r_min = r_min,
|
|
180
|
+
r_max = r_max,
|
|
181
|
+
plot_title = plot_title,
|
|
182
|
+
plot_title_size = plot_title_size,
|
|
183
|
+
no_track_labels = no_track_labels,
|
|
184
|
+
dpi = dpi,
|
|
185
|
+
output_format=output_format,
|
|
186
|
+
output_dir=output_dir
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
# ------------------------------------------------------------------
|
|
190
|
+
# LINEAR MANHATTAN
|
|
191
|
+
# ------------------------------------------------------------------
|
|
192
|
+
else:
|
|
193
|
+
logger.info("Generating LINEAR MANHATTAN Plot ...")
|
|
194
|
+
plot_linear(
|
|
195
|
+
sumstats_loaded = sumstats_loaded,
|
|
196
|
+
track_heights = t_heights,
|
|
197
|
+
trim_pval=trim_pval,
|
|
198
|
+
logp=True if logp else False,
|
|
199
|
+
point_size=point_size,
|
|
200
|
+
highlight=highlight,
|
|
201
|
+
highlight_thresh=highlight_thresh,
|
|
202
|
+
annot_df=hits_table if not hits_table.empty else None,
|
|
203
|
+
label_col="top_gene",
|
|
204
|
+
chr_spacing=chr_spacing,
|
|
205
|
+
track_spacing=track_spacing,
|
|
206
|
+
colors=colors,
|
|
207
|
+
signif_lines=signif_lines,
|
|
208
|
+
plot_title=plot_title,
|
|
209
|
+
no_track_labels = no_track_labels,
|
|
210
|
+
dpi=dpi,
|
|
211
|
+
output_format=output_format,
|
|
212
|
+
output_dir=output_dir,
|
|
213
|
+
figsize=(15, 9)
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
if __name__ == "__main__":
|
|
218
|
+
main()
|
|
@@ -147,6 +147,13 @@ def get_arguments(descmsg: str = DESCMSG) -> argparse.Namespace:
|
|
|
147
147
|
const="chrom_len", default=None, type=str, #metavar="str",
|
|
148
148
|
help="Sort tracks by chromosome count or label."
|
|
149
149
|
)
|
|
150
|
+
opt.add_argument(
|
|
151
|
+
"-ntl", "--no_track_labels",
|
|
152
|
+
help=(
|
|
153
|
+
"Exclude track labels from plot. (default: False)"
|
|
154
|
+
),
|
|
155
|
+
action="store_true"
|
|
156
|
+
)
|
|
150
157
|
opt.add_argument(
|
|
151
158
|
"-plt", "--plot_title", default="MyCMplot", type=str, metavar="str",
|
|
152
159
|
help="Plot plot_title / output file stem."
|
|
@@ -8,6 +8,8 @@ from __future__ import annotations
|
|
|
8
8
|
|
|
9
9
|
import csv
|
|
10
10
|
import gzip
|
|
11
|
+
import sys
|
|
12
|
+
import re
|
|
11
13
|
import logging
|
|
12
14
|
from collections import defaultdict
|
|
13
15
|
from pathlib import Path
|
|
@@ -109,6 +111,198 @@ def get_file_header(
|
|
|
109
111
|
return list(hdr)
|
|
110
112
|
|
|
111
113
|
|
|
114
|
+
|
|
115
|
+
def strip_comma_separated_input_streams(
|
|
116
|
+
sum_stats,
|
|
117
|
+
labels,
|
|
118
|
+
colors_raw = 'steelblue,grey',
|
|
119
|
+
track_heights = None,
|
|
120
|
+
):
|
|
121
|
+
|
|
122
|
+
if len(sum_stats) != len(labels):
|
|
123
|
+
sys.exit(
|
|
124
|
+
"Error: number of summary stats files and labels must match.\n"
|
|
125
|
+
f" Files: {sum_stats}\n"
|
|
126
|
+
f" Labels: {labels}"
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
# ------------------------------------------------------------------
|
|
130
|
+
# Sumstat, labels str to list
|
|
131
|
+
# ------------------------------------------------------------------
|
|
132
|
+
labels = [lbl.strip() for lbl in labels.strip().split(",")]
|
|
133
|
+
|
|
134
|
+
sum_stats = [s.strip() for s in sum_stats.strip().split(",")]
|
|
135
|
+
|
|
136
|
+
# ------------------------------------------------------------------
|
|
137
|
+
# Colours str to list
|
|
138
|
+
# ------------------------------------------------------------------
|
|
139
|
+
colors = [c.strip() for c in colors_raw.strip().split(",")]
|
|
140
|
+
|
|
141
|
+
# ------------------------------------------------------------------
|
|
142
|
+
# Linear track heights str to list
|
|
143
|
+
# ------------------------------------------------------------------
|
|
144
|
+
t_heights = [float(x) for x in track_heights.strip().split(",")]
|
|
145
|
+
|
|
146
|
+
return sum_stats, labels, colors, t_heights
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
# ------------------------------------------------------------------
|
|
150
|
+
# Random string for output paths
|
|
151
|
+
# ------------------------------------------------------------------
|
|
152
|
+
def generate_random_string(length):
|
|
153
|
+
import random
|
|
154
|
+
import string
|
|
155
|
+
# Combine uppercase, lowercase, and digits
|
|
156
|
+
characters = string.ascii_letters + string.digits
|
|
157
|
+
# random.choices picks multiple characters with replacement
|
|
158
|
+
return ''.join(random.choices(characters, k=length))
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
# ------------------------------------------------------------------
|
|
162
|
+
# Output paths
|
|
163
|
+
# ------------------------------------------------------------------
|
|
164
|
+
def get_output_paths(
|
|
165
|
+
labels,
|
|
166
|
+
mode: Optional[str] = 'lm',
|
|
167
|
+
logp: bool = False,
|
|
168
|
+
output_dir: Optional[str] = '.',
|
|
169
|
+
plot_title: Optional[str] = None,
|
|
170
|
+
output_format: Optional[str] = 'png'
|
|
171
|
+
):
|
|
172
|
+
|
|
173
|
+
out_path = Path(output_dir).resolve()
|
|
174
|
+
|
|
175
|
+
out_path.mkdir(parents=True, exist_ok=True)
|
|
176
|
+
|
|
177
|
+
if plot_title:
|
|
178
|
+
pltitle = re.sub(r"[^a-zA-Z0-9\s]", "", plot_title).replace(" ", "_")
|
|
179
|
+
else:
|
|
180
|
+
pltitle = generate_random_string(10)
|
|
181
|
+
|
|
182
|
+
plt_base = str(out_path / f"{pltitle}_{'_'.join(labels)}_{mode.lower()}")
|
|
183
|
+
|
|
184
|
+
suffix = "_logp" if logp else "_pval"
|
|
185
|
+
|
|
186
|
+
plt_name = f"{plt_base}{suffix}.{output_format.lower()}"
|
|
187
|
+
|
|
188
|
+
table_out = f"{plt_base}{suffix}_locus_summary_table.tsv"
|
|
189
|
+
|
|
190
|
+
return plt_name, table_out
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
# ---------------------------------------------------------------------------
|
|
195
|
+
# input formatter
|
|
196
|
+
# ---------------------------------------------------------------------------
|
|
197
|
+
def prep_pycmplot_input_info(
|
|
198
|
+
sum_stats: list[str],
|
|
199
|
+
labels: list[str],
|
|
200
|
+
delim: Optional[str] = None,
|
|
201
|
+
chrom: Optional[str] = None,
|
|
202
|
+
pos: Optional[str] = None,
|
|
203
|
+
snp: Optional[str] = None,
|
|
204
|
+
pcol: Optional[str] = None,
|
|
205
|
+
build: Optional[str] = None
|
|
206
|
+
):
|
|
207
|
+
"""Resolve column names and delimiter
|
|
208
|
+
|
|
209
|
+
Parameters
|
|
210
|
+
----------
|
|
211
|
+
sum_stats:
|
|
212
|
+
List of file paths to GWAS summary statistics (possibly gzip-compressed).
|
|
213
|
+
labels:
|
|
214
|
+
Track labels in the same order as *sum_stats*.
|
|
215
|
+
delim:
|
|
216
|
+
File delimiter (autodetected if omitted)
|
|
217
|
+
chrom:
|
|
218
|
+
Chromosome column
|
|
219
|
+
pos:
|
|
220
|
+
Position column
|
|
221
|
+
snp:
|
|
222
|
+
SNP or Marker ID column
|
|
223
|
+
pcol:
|
|
224
|
+
P-value column
|
|
225
|
+
build:
|
|
226
|
+
Build version column
|
|
227
|
+
|
|
228
|
+
Returns
|
|
229
|
+
-------
|
|
230
|
+
{old_columns, column_dtypes, new_columns, delim}
|
|
231
|
+
|
|
232
|
+
"""
|
|
233
|
+
# ------------------------------------------------------------------
|
|
234
|
+
# Resolve delimiter
|
|
235
|
+
# ------------------------------------------------------------------
|
|
236
|
+
if delim:
|
|
237
|
+
sep = resolve_delimiter(delim)
|
|
238
|
+
else:
|
|
239
|
+
sep = None # autodetect per file
|
|
240
|
+
|
|
241
|
+
# ------------------------------------------------------------------
|
|
242
|
+
# Column-name candidate lists for auto-resolution
|
|
243
|
+
# ------------------------------------------------------------------
|
|
244
|
+
chr_candidates = [chrom, "CHR", "CHROM", "Chromosome", "#CHROM", "#CHR",
|
|
245
|
+
"Chrom", "chrom", "chr", "chromosome", "#chr", "#chrom"]
|
|
246
|
+
pos_candidates = [pos, "BP", "POS", "bp", "pos", "Basepair"]
|
|
247
|
+
snp_candidates = [snp, "SNP", "RSID", "rsID", "MarkerName", "MarkerID",
|
|
248
|
+
"Predictor", "Marker", "SNPID", "ID"]
|
|
249
|
+
pvl_candidates = [pcol, "P", "P-value", "Wald_P", "pvalue", "p_val", "pval"]
|
|
250
|
+
bld_candidates = [build, "BUILD", "Genome", "Genome_Build", "Genome-build"]
|
|
251
|
+
|
|
252
|
+
# Remove None entries
|
|
253
|
+
chr_candidates = [c for c in chr_candidates if c]
|
|
254
|
+
pos_candidates = [c for c in pos_candidates if c]
|
|
255
|
+
snp_candidates = [c for c in snp_candidates if c]
|
|
256
|
+
pvl_candidates = [c for c in pvl_candidates if c]
|
|
257
|
+
bld_candidates = [c for c in bld_candidates if c]
|
|
258
|
+
|
|
259
|
+
# ------------------------------------------------------------------
|
|
260
|
+
# Resolve column names per file
|
|
261
|
+
# ------------------------------------------------------------------
|
|
262
|
+
sumstats_hdr_dic: dict = {}
|
|
263
|
+
|
|
264
|
+
for name, fpath in zip(labels, sum_stats):
|
|
265
|
+
if sep:
|
|
266
|
+
file_sep, dialect = sep, None
|
|
267
|
+
else:
|
|
268
|
+
file_sep, dialect = detect_delimiter(fpath, sample_size=5_000)
|
|
269
|
+
|
|
270
|
+
hdr = get_file_header(fpath, delim=file_sep, dialect=dialect)
|
|
271
|
+
|
|
272
|
+
try:
|
|
273
|
+
chrom_col = next(c for c in hdr if c in set(chr_candidates))
|
|
274
|
+
pos_col = next(c for c in hdr if c in set(pos_candidates))
|
|
275
|
+
snp_col = next(c for c in hdr if c in set(snp_candidates))
|
|
276
|
+
pcol = next(c for c in hdr if c in set(pvl_candidates))
|
|
277
|
+
bcol = next(c for c in hdr if c in set(bld_candidates))
|
|
278
|
+
except StopIteration as exc:
|
|
279
|
+
sys.exit(
|
|
280
|
+
f"Error: could not find a required column in {fpath}.\n"
|
|
281
|
+
f" Header: {hdr}\n"
|
|
282
|
+
f" Details: {exc}"
|
|
283
|
+
)
|
|
284
|
+
|
|
285
|
+
old_cols = [chrom_col, pos_col, snp_col, pcol, bcol]
|
|
286
|
+
new_cols = {
|
|
287
|
+
chrom_col: "CHR",
|
|
288
|
+
pos_col: "POS",
|
|
289
|
+
snp_col: "SNP",
|
|
290
|
+
pcol: "P",
|
|
291
|
+
bcol: "BUILD",
|
|
292
|
+
}
|
|
293
|
+
col_dtypes = {
|
|
294
|
+
chrom_col: str,
|
|
295
|
+
pos_col: object,
|
|
296
|
+
snp_col: str,
|
|
297
|
+
pcol: float,
|
|
298
|
+
bcol: str,
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
sumstats_hdr_dic[name] = [old_cols, col_dtypes, new_cols, file_sep]
|
|
302
|
+
|
|
303
|
+
return sumstats_hdr_dic
|
|
304
|
+
|
|
305
|
+
|
|
112
306
|
# ---------------------------------------------------------------------------
|
|
113
307
|
# Sector-size helpers
|
|
114
308
|
# ---------------------------------------------------------------------------
|
|
@@ -134,8 +328,6 @@ def get_sumstats_and_merged_sector_list(
|
|
|
134
328
|
file_info: Optional[dict] = None,
|
|
135
329
|
sort_tracks: Optional[str] = "chrom_len",
|
|
136
330
|
table_out: Optional[str] = None,
|
|
137
|
-
highlight: bool = False,
|
|
138
|
-
highlight_thresh: float = 5e-8,
|
|
139
331
|
signif_threshold: Optional[float] = None,
|
|
140
332
|
signif_line: Optional[float] = None,
|
|
141
333
|
suggest_threshold: Optional[float] = None,
|
|
@@ -156,8 +348,8 @@ def get_sumstats_and_merged_sector_list(
|
|
|
156
348
|
``'label'`` — sort tracks alphabetically by label.
|
|
157
349
|
``'chrom_len'`` — sort by number of chromosomes (default).
|
|
158
350
|
``None`` — preserve input order.
|
|
159
|
-
|
|
160
|
-
|
|
351
|
+
signif_threshold:
|
|
352
|
+
Threshold of significance to create hits table.
|
|
161
353
|
resources:
|
|
162
354
|
:class:`~pycmplot.resources.ResourceConfig` instance.
|
|
163
355
|
|
|
@@ -225,21 +417,13 @@ def get_sumstats_and_merged_sector_list(
|
|
|
225
417
|
logger.info("Converting hg19 coordinates to hg38 ...")
|
|
226
418
|
sumstats_loaded[label][0] = liftover_position(df, resources=resources)
|
|
227
419
|
|
|
228
|
-
# Lead SNPs
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
sumstats_loaded[label][0],
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
logp=True,
|
|
236
|
-
)
|
|
237
|
-
else:
|
|
238
|
-
leads = get_lead_snps(
|
|
239
|
-
df=sumstats_loaded[label][0],
|
|
240
|
-
highlight_thresh=signif_threshold or 5e-8,
|
|
241
|
-
logp=True,
|
|
242
|
-
)
|
|
420
|
+
# Lead SNPs
|
|
421
|
+
logger.info("Extracting variants to highlight ...")
|
|
422
|
+
leads = get_lead_snps(
|
|
423
|
+
df=sumstats_loaded[label][0],
|
|
424
|
+
signif_threshold=signif_threshold or 5e-8,
|
|
425
|
+
logp=True,
|
|
426
|
+
)
|
|
243
427
|
|
|
244
428
|
all_lead_snps.append(leads)
|
|
245
429
|
|