pycmplot 0.2.1__tar.gz → 0.2.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pycmplot-0.2.1/pycmplot.egg-info → pycmplot-0.2.3}/PKG-INFO +3 -2
- {pycmplot-0.2.1 → pycmplot-0.2.3}/README.md +2 -1
- {pycmplot-0.2.1 → pycmplot-0.2.3}/docs/conf.py +1 -1
- {pycmplot-0.2.1 → pycmplot-0.2.3}/pycmplot/__init__.py +1 -1
- {pycmplot-0.2.1 → pycmplot-0.2.3}/pycmplot/_core.py +61 -5
- {pycmplot-0.2.1 → pycmplot-0.2.3}/pycmplot/cli.py +38 -17
- {pycmplot-0.2.1 → pycmplot-0.2.3}/pycmplot/io.py +50 -20
- {pycmplot-0.2.1 → pycmplot-0.2.3}/pycmplot/plotting/circular.py +11 -7
- {pycmplot-0.2.1 → pycmplot-0.2.3}/pycmplot/plotting/linear.py +140 -9
- pycmplot-0.2.3/pycmplot/plotting/qq.py +643 -0
- {pycmplot-0.2.1 → pycmplot-0.2.3/pycmplot.egg-info}/PKG-INFO +3 -2
- {pycmplot-0.2.1 → pycmplot-0.2.3}/pycmplot.egg-info/SOURCES.txt +1 -0
- {pycmplot-0.2.1 → pycmplot-0.2.3}/pyproject.toml +2 -2
- {pycmplot-0.2.1 → pycmplot-0.2.3}/setup.cfg +1 -1
- {pycmplot-0.2.1 → pycmplot-0.2.3}/LICENSE +0 -0
- {pycmplot-0.2.1 → pycmplot-0.2.3}/pycmplot/annotation.py +0 -0
- {pycmplot-0.2.1 → pycmplot-0.2.3}/pycmplot/constants.py +0 -0
- {pycmplot-0.2.1 → pycmplot-0.2.3}/pycmplot/data/Homo_sapiens.GRCh37.geneinfo.tsv.gz +0 -0
- {pycmplot-0.2.1 → pycmplot-0.2.3}/pycmplot/data/Homo_sapiens.GRCh38.geneinfo.tsv.gz +0 -0
- {pycmplot-0.2.1 → pycmplot-0.2.3}/pycmplot/data/hg19ToHg38.over.chain +0 -0
- {pycmplot-0.2.1 → pycmplot-0.2.3}/pycmplot/liftover.py +0 -0
- {pycmplot-0.2.1 → pycmplot-0.2.3}/pycmplot/resources.py +0 -0
- {pycmplot-0.2.1 → pycmplot-0.2.3}/pycmplot/stats.py +0 -0
- {pycmplot-0.2.1 → pycmplot-0.2.3}/pycmplot.egg-info/dependency_links.txt +0 -0
- {pycmplot-0.2.1 → pycmplot-0.2.3}/pycmplot.egg-info/entry_points.txt +0 -0
- {pycmplot-0.2.1 → pycmplot-0.2.3}/pycmplot.egg-info/requires.txt +0 -0
- {pycmplot-0.2.1 → pycmplot-0.2.3}/pycmplot.egg-info/top_level.txt +0 -0
- {pycmplot-0.2.1 → pycmplot-0.2.3}/pycmplot_docs/docs/conf.py +0 -0
- {pycmplot-0.2.1 → pycmplot-0.2.3}/pycmplot_docs/docstrings_annotation.py +0 -0
- {pycmplot-0.2.1 → pycmplot-0.2.3}/pycmplot_docs/docstrings_core_cli.py +0 -0
- {pycmplot-0.2.1 → pycmplot-0.2.3}/pycmplot_docs/docstrings_io.py +0 -0
- {pycmplot-0.2.1 → pycmplot-0.2.3}/pycmplot_docs/docstrings_liftover.py +0 -0
- {pycmplot-0.2.1 → pycmplot-0.2.3}/pycmplot_docs/docstrings_plotting.py +0 -0
- {pycmplot-0.2.1 → pycmplot-0.2.3}/pycmplot_docs/docstrings_resources_constants.py +0 -0
- {pycmplot-0.2.1 → pycmplot-0.2.3}/pycmplot_docs/docstrings_stats.py +0 -0
- {pycmplot-0.2.1 → pycmplot-0.2.3}/setup.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pycmplot
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.3
|
|
4
4
|
Summary: Multi-track circular and linear Manhattan plot generation for GWAS summary statistics
|
|
5
5
|
Author: Kevin Esoh
|
|
6
6
|
Author-email: Kevin Esoh <kesohku1@jh.edu>
|
|
@@ -183,7 +183,8 @@ pycmplot \
|
|
|
183
183
|
| `-b, --build` | Comma-separated genome builds of sumstats | off |
|
|
184
184
|
| `-bc, --build_column` | Genome build column name (containing hg18/hg19/hg38) | off |
|
|
185
185
|
| `-m, --mode` | `lm` linear or `cm` circular | `lm` |
|
|
186
|
-
| `-qq, --qq_plot` | Also generate a QQ-plot | off
|
|
186
|
+
| `-qq, --qq_plot` | Also generate a QQ-plot | off |
|
|
187
|
+
| `-qq_thin, --qq_thin` | Thin p-values for faster QQ-plotting | off |
|
|
187
188
|
| `--logp` | Plot -log10(p) | off |
|
|
188
189
|
| `-sig, --signif_threshold` | Genome-wide significance threshold | off (auto 0.05/N) |
|
|
189
190
|
| `-sigl, --signif_line` | Value for genome-wide significance line if different from `-sig` | 5e-8 |
|
|
@@ -157,7 +157,8 @@ pycmplot \
|
|
|
157
157
|
| `-b, --build` | Comma-separated genome builds of sumstats | off |
|
|
158
158
|
| `-bc, --build_column` | Genome build column name (containing hg18/hg19/hg38) | off |
|
|
159
159
|
| `-m, --mode` | `lm` linear or `cm` circular | `lm` |
|
|
160
|
-
| `-qq, --qq_plot` | Also generate a QQ-plot | off
|
|
160
|
+
| `-qq, --qq_plot` | Also generate a QQ-plot | off |
|
|
161
|
+
| `-qq_thin, --qq_thin` | Thin p-values for faster QQ-plotting | off |
|
|
161
162
|
| `--logp` | Plot -log10(p) | off |
|
|
162
163
|
| `-sig, --signif_threshold` | Genome-wide significance threshold | off (auto 0.05/N) |
|
|
163
164
|
| `-sigl, --signif_line` | Value for genome-wide significance line if different from `-sig` | 5e-8 |
|
|
@@ -12,7 +12,7 @@ sys.path.insert(0, os.path.abspath(".."))
|
|
|
12
12
|
project = "pycmplot"
|
|
13
13
|
copyright = "2026, Kevin Esoh"
|
|
14
14
|
author = "Kevin Esoh"
|
|
15
|
-
release = "0.2.
|
|
15
|
+
release = "0.2.3" # update to match PyPI version
|
|
16
16
|
|
|
17
17
|
# -- General configuration -----------------------------------------------------
|
|
18
18
|
extensions = [
|
|
@@ -92,6 +92,7 @@ def main() -> None:
|
|
|
92
92
|
)
|
|
93
93
|
from pycmplot.plotting.linear import plot_linear
|
|
94
94
|
from pycmplot.plotting.circular import plot_circular
|
|
95
|
+
from pycmplot.plotting.qq import plot_qq_combined, plot_qq_separate, plot_qq_overlay
|
|
95
96
|
from pycmplot.resources import ResourceConfig
|
|
96
97
|
|
|
97
98
|
# ------------------------------------------------------------------
|
|
@@ -110,6 +111,13 @@ def main() -> None:
|
|
|
110
111
|
labels_raw = args.labels
|
|
111
112
|
pcol_arg = args.pval_column
|
|
112
113
|
logp = args.logp
|
|
114
|
+
qq = args.qq_plot
|
|
115
|
+
qq_separate = args.qq_separate
|
|
116
|
+
qq_ncols = args.qq_ncols
|
|
117
|
+
qq_thin = args.qq_thin
|
|
118
|
+
thin_below = args.thin_below
|
|
119
|
+
qq_max_points = args.qq_max_points
|
|
120
|
+
qq_overlay = args.qq_overlay
|
|
113
121
|
chrom_label_size = args.chrom_label_size
|
|
114
122
|
chrom_label_side = args.chrom_label_side
|
|
115
123
|
track_label_size = args.track_label_size
|
|
@@ -164,7 +172,8 @@ def main() -> None:
|
|
|
164
172
|
# ------------------------------------------------------------------
|
|
165
173
|
(
|
|
166
174
|
plt_name,
|
|
167
|
-
table_out
|
|
175
|
+
table_out,
|
|
176
|
+
plt_base,
|
|
168
177
|
) = get_output_paths(
|
|
169
178
|
labels,
|
|
170
179
|
mode = mode,
|
|
@@ -202,6 +211,7 @@ def main() -> None:
|
|
|
202
211
|
sumstats_loaded,
|
|
203
212
|
hits_table,
|
|
204
213
|
signif_lines,
|
|
214
|
+
pval_dict,
|
|
205
215
|
) = get_sumstats_and_merged_sector_list(
|
|
206
216
|
sum_stats=sum_stats,
|
|
207
217
|
labels=labels,
|
|
@@ -219,13 +229,13 @@ def main() -> None:
|
|
|
219
229
|
# ------------------------------------------------------------------
|
|
220
230
|
# ANNOTATE BY
|
|
221
231
|
# ------------------------------------------------------------------
|
|
232
|
+
label_col = 'SNP'
|
|
222
233
|
if annotate:
|
|
223
|
-
if str(annotate).upper() == "GENE":
|
|
234
|
+
if str(annotate).upper() == "GENE" and 'top_gene' in hits_table.columns:
|
|
224
235
|
label_col = 'top_gene'
|
|
225
|
-
elif
|
|
226
|
-
label_col = 'SNP'
|
|
227
|
-
else:
|
|
236
|
+
elif label_col in hits_table.columns:
|
|
228
237
|
label_col = annotate
|
|
238
|
+
|
|
229
239
|
|
|
230
240
|
logger.info(f"Anotate by: {label_col}")
|
|
231
241
|
|
|
@@ -296,6 +306,52 @@ def main() -> None:
|
|
|
296
306
|
figsize=(15, 9)
|
|
297
307
|
)
|
|
298
308
|
|
|
309
|
+
# ------------------------------------------------------------------
|
|
310
|
+
# QQ PLOT
|
|
311
|
+
# ------------------------------------------------------------------
|
|
312
|
+
if qq and sumstats_loaded:
|
|
313
|
+
logger.info("Generating QQ Plot(s) ...")
|
|
314
|
+
qq_stem = f"{plt_base}_qq"
|
|
315
|
+
|
|
316
|
+
if qq_separate:
|
|
317
|
+
plot_qq_separate(
|
|
318
|
+
pval_dict=pval_dict,
|
|
319
|
+
thin=qq_thin,
|
|
320
|
+
thin_below=thin_below,
|
|
321
|
+
max_points=qq_max_points,
|
|
322
|
+
output_path=qq_stem,
|
|
323
|
+
colors=colors,
|
|
324
|
+
signif_threshold=signif_threshold or 5e-8,
|
|
325
|
+
dpi=dpi,
|
|
326
|
+
fig_format=output_format,
|
|
327
|
+
)
|
|
328
|
+
elif qq_overlay:
|
|
329
|
+
plot_qq_overlay(
|
|
330
|
+
pval_dict=pval_dict,
|
|
331
|
+
thin=qq_thin,
|
|
332
|
+
thin_below=thin_below,
|
|
333
|
+
max_points=qq_max_points,
|
|
334
|
+
colors=colors,
|
|
335
|
+
signif_threshold=signif_threshold or 5e-8,
|
|
336
|
+
dpi=dpi,
|
|
337
|
+
title=plot_title,
|
|
338
|
+
output_path=f"{qq_stem}_overlay",
|
|
339
|
+
fig_format=output_format,
|
|
340
|
+
)
|
|
341
|
+
else:
|
|
342
|
+
plot_qq_combined(
|
|
343
|
+
pval_dict=pval_dict,
|
|
344
|
+
thin=qq_thin,
|
|
345
|
+
thin_below=thin_below,
|
|
346
|
+
max_points=qq_max_points,
|
|
347
|
+
colors=colors,
|
|
348
|
+
ncols=qq_ncols,
|
|
349
|
+
signif_threshold=signif_threshold or 5e-8,
|
|
350
|
+
dpi=dpi,
|
|
351
|
+
title=plot_title,
|
|
352
|
+
output_path=f"{qq_stem}_combined",
|
|
353
|
+
fig_format=output_format,
|
|
354
|
+
)
|
|
299
355
|
|
|
300
356
|
if __name__ == "__main__":
|
|
301
357
|
main()
|
|
@@ -329,29 +329,50 @@ def get_arguments(descmsg: str = DESCMSG) -> argparse.Namespace:
|
|
|
329
329
|
)
|
|
330
330
|
opt.add_argument(
|
|
331
331
|
"-bc", "--build_column", required=False, type=str, metavar="str",
|
|
332
|
-
|
|
333
|
-
|
|
332
|
+
help=("Name of column containing genome build (hg18/hg19/hg38)."
|
|
333
|
+
"Or use ``--build`` below to supply genome builds per summary stat file."
|
|
334
|
+
))
|
|
334
335
|
opt.add_argument(
|
|
335
|
-
"-b","--build",
|
|
336
|
-
help=
|
|
337
|
-
Comma-sperated list of genome build of summary stats file(s) listed
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
"""
|
|
343
|
-
required=False,
|
|
344
|
-
type=str,
|
|
345
|
-
metavar='str'
|
|
336
|
+
"-b","--build", required=False, type=str, metavar='str',
|
|
337
|
+
help=
|
|
338
|
+
"""Comma-sperated list of genome build of summary stats file(s) listed
|
|
339
|
+
in the same order as sumstats files. e.g. hg19,hg38,hg38,hg19 means:
|
|
340
|
+
file1.txt.gz --> hg19
|
|
341
|
+
file2.txt.gz --> hg38
|
|
342
|
+
file3.tsv --> hg38 ... etc
|
|
343
|
+
"""
|
|
346
344
|
)
|
|
347
345
|
opt.add_argument(
|
|
348
346
|
"--logp", action="store_true",
|
|
349
347
|
help="Plot −log₁₀(p) instead of raw p-values."
|
|
350
348
|
)
|
|
351
|
-
opt.add_argument(
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
349
|
+
opt.add_argument("-qq", "--qq_plot", action="store_true",
|
|
350
|
+
help="Generate QQ-plot(s) alongside the Manhattan plot.")
|
|
351
|
+
opt.add_argument("-qq_sep", "--qq_separate", action="store_true",
|
|
352
|
+
help=(
|
|
353
|
+
"Save one QQ-plot file per sumstat instead of a "
|
|
354
|
+
"combined multi-panel figure. Only used when -qq is set."
|
|
355
|
+
))
|
|
356
|
+
opt.add_argument("-qq_cols", "--qq_ncols", default=3, type=int, metavar="int",
|
|
357
|
+
help="Number of columns in the combined QQ-plot grid (default: 3).")
|
|
358
|
+
opt.add_argument("-qq_thin", "--qq_thin", action="store_true", default=False,
|
|
359
|
+
help=(
|
|
360
|
+
"Thin null-like p-values before QQ plotting for speed (default: off)."
|
|
361
|
+
"Include this flag to turn on for speed."
|
|
362
|
+
))
|
|
363
|
+
opt.add_argument("-thin_below", "--thin_below", type=float, metavar="float", default=0.01,
|
|
364
|
+
help=(
|
|
365
|
+
"P-value threshold below which all points are always kept."
|
|
366
|
+
"Points above this threshold are downsampled (default: 0.01)."
|
|
367
|
+
))
|
|
368
|
+
opt.add_argument("-qq_max_pts", "--qq_max_points", default=50000, type=int, metavar="int",
|
|
369
|
+
help="Max points to plot per QQ track after thinning (default: 50000).")
|
|
370
|
+
opt.add_argument("-qq_ov", "--qq_overlay", action="store_true",
|
|
371
|
+
help=(
|
|
372
|
+
"Plot all sumstats on a single overlaid QQ-plot, "
|
|
373
|
+
"each coloured by label with lambda in the legend. "
|
|
374
|
+
"Only used when -qq is set."
|
|
375
|
+
))
|
|
355
376
|
opt.add_argument(
|
|
356
377
|
"-tp", "--trim_pval", type=float, metavar="float",
|
|
357
378
|
help="Trim variants with p > this value before plotting."
|
|
@@ -39,6 +39,7 @@ import pandas as pd
|
|
|
39
39
|
from pycmplot.stats import get_lead_snps, get_highlight_snps
|
|
40
40
|
from pycmplot.annotation import get_hits_summary_table
|
|
41
41
|
from pycmplot.resources import ResourceConfig, default_resources
|
|
42
|
+
from pycmplot.constants import hg38_chr_lengths
|
|
42
43
|
|
|
43
44
|
logger = logging.getLogger(__name__)
|
|
44
45
|
|
|
@@ -291,7 +292,9 @@ def strip_comma_separated_input_streams(
|
|
|
291
292
|
|
|
292
293
|
if builds:
|
|
293
294
|
builds = [s.strip() for s in builds.strip().split(",")]
|
|
294
|
-
if len(sum_stats)
|
|
295
|
+
if len(sum_stats) == len(labels) == len(builds):
|
|
296
|
+
pass
|
|
297
|
+
else:
|
|
295
298
|
sys.exit(
|
|
296
299
|
"Error: number of summary stats files, labels, and builds must match.\n"
|
|
297
300
|
f" Files: {sum_stats}\n"
|
|
@@ -429,16 +432,16 @@ def get_output_paths(
|
|
|
429
432
|
|
|
430
433
|
labels = [re.sub(r"[^a-zA-Z0-9\s]", "", x).replace(" ", "_") for x in labels]
|
|
431
434
|
|
|
432
|
-
plt_base = str(out_path / f"{pltitle}_{'_'.join(labels)}_{mode.lower()}")
|
|
433
|
-
|
|
434
435
|
suffix = "_logp" if logp else "_pval"
|
|
435
436
|
|
|
436
|
-
|
|
437
|
+
plt_base = str(out_path / f"{pltitle}_{'_'.join(labels)}_{mode.lower()}{suffix}")
|
|
438
|
+
|
|
439
|
+
plt_name = f"{plt_base}.{output_format.lower()}"
|
|
437
440
|
|
|
438
|
-
table_out = f"{plt_base}
|
|
441
|
+
table_out = f"{plt_base}_locus_summary_table.tsv"
|
|
439
442
|
|
|
440
443
|
|
|
441
|
-
return plt_name, table_out
|
|
444
|
+
return plt_name, table_out, plt_base
|
|
442
445
|
|
|
443
446
|
|
|
444
447
|
|
|
@@ -561,11 +564,14 @@ def prep_pycmplot_input_info(
|
|
|
561
564
|
snp_candidates = [c for c in snp_candidates if c]
|
|
562
565
|
pvl_candidates = [c for c in pvl_candidates if c]
|
|
563
566
|
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
567
|
+
#if buildc:
|
|
568
|
+
bld_candidates = buildc
|
|
569
|
+
if not bld_candidates:
|
|
570
|
+
bld_candidates = ["BUILD", "Genome", "Genome_Build", "Genome-build"]
|
|
571
|
+
bld_candidates_l = [x.lower() for x in bld_candidates]
|
|
572
|
+
bld_candidates_u = [x.upper() for x in bld_candidates]
|
|
573
|
+
bld_candidates = [buildc] + bld_candidates + bld_candidates_l + bld_candidates_u
|
|
574
|
+
bld_candidates = [c for c in bld_candidates if c]
|
|
569
575
|
|
|
570
576
|
# ------------------------------------------------------------------
|
|
571
577
|
# Resolve column names per file
|
|
@@ -631,7 +637,7 @@ def prep_pycmplot_input_info(
|
|
|
631
637
|
}
|
|
632
638
|
col_dtypes = {
|
|
633
639
|
chrom_col: str,
|
|
634
|
-
pos_col:
|
|
640
|
+
pos_col: object,
|
|
635
641
|
snp_col: str,
|
|
636
642
|
pcol: float,
|
|
637
643
|
bcol: str,
|
|
@@ -678,6 +684,7 @@ def get_sumstats_and_merged_sector_list(
|
|
|
678
684
|
signif_line: Optional[float] = None,
|
|
679
685
|
suggest_threshold: Optional[float] = None,
|
|
680
686
|
resources: Optional[ResourceConfig] = None,
|
|
687
|
+
hg38_chr_lengths = hg38_chr_lengths,
|
|
681
688
|
):
|
|
682
689
|
GET_SUMSTATS = """Load summary statistics, run liftover, extract lead SNPs, and compute
|
|
683
690
|
merged Circos sector sizes.
|
|
@@ -795,6 +802,7 @@ def get_sumstats_and_merged_sector_list(
|
|
|
795
802
|
}
|
|
796
803
|
|
|
797
804
|
sumstats_loaded: dict[str, list] = {}
|
|
805
|
+
pval_dict: dict[str, np.ndarray | pd.Series] = {}
|
|
798
806
|
all_lead_snps: list[pd.DataFrame] = []
|
|
799
807
|
|
|
800
808
|
for label in sumstats.keys() & (file_info or {}).keys():
|
|
@@ -818,13 +826,18 @@ def get_sumstats_and_merged_sector_list(
|
|
|
818
826
|
dtype=sumstat_dtypes,
|
|
819
827
|
).rename(columns=sumstat_newcols)
|
|
820
828
|
|
|
829
|
+
# Get dict of p-values for qq-plotting before applying trim_pval
|
|
830
|
+
logger.info("Extracting raw p-values for QQ-plotting ...")
|
|
831
|
+
pval_dict[label] = df["P"].dropna().astype(float).values
|
|
832
|
+
|
|
833
|
+
|
|
821
834
|
# Add build column if not exist and build supplied
|
|
822
835
|
if build:
|
|
823
836
|
df['BUILD'] = build
|
|
824
837
|
|
|
825
838
|
# Trim insignificant variants for faster plotting
|
|
826
839
|
if trim_pval:
|
|
827
|
-
logger.info("Excluding variants with p-value less than %s ...", trim_pval)
|
|
840
|
+
logger.info("Excluding variants with p-value less than %s to speed up Manhattan plotting ...", trim_pval)
|
|
828
841
|
df = df[df["P"].astype(float) <= float(trim_pval)]
|
|
829
842
|
else:
|
|
830
843
|
df = df[df["P"].astype(float) <= 1]
|
|
@@ -853,6 +866,7 @@ def get_sumstats_and_merged_sector_list(
|
|
|
853
866
|
if "BUILD" in df.columns and "hg19" in df["BUILD"].unique():
|
|
854
867
|
logger.info("Converting hg19 coordinates to hg38 ...")
|
|
855
868
|
sumstats_loaded[label][0] = liftover_position(df, resources=resources)
|
|
869
|
+
liftover = True
|
|
856
870
|
|
|
857
871
|
# Lead SNPs
|
|
858
872
|
logger.info("Extracting variants to highlight ...")
|
|
@@ -917,7 +931,13 @@ def get_sumstats_and_merged_sector_list(
|
|
|
917
931
|
for _ in sumstats
|
|
918
932
|
]
|
|
919
933
|
|
|
920
|
-
|
|
934
|
+
|
|
935
|
+
# sort dicts by user-supplied order
|
|
936
|
+
sumstats_loaded = {key: sumstats_loaded[key] for key in labels if key in sumstats_loaded}
|
|
937
|
+
pval_dict = {key: pval_dict[key] for key in labels if key in pval_dict}
|
|
938
|
+
|
|
939
|
+
|
|
940
|
+
# or sort by user option
|
|
921
941
|
if sort_tracks is not None:
|
|
922
942
|
if sort_tracks.lower() == "label":
|
|
923
943
|
sumstats_loaded = dict(sorted(sumstats_loaded.items()))
|
|
@@ -928,6 +948,7 @@ def get_sumstats_and_merged_sector_list(
|
|
|
928
948
|
key=lambda item: (item[0], natsort.natsort_keygen()(item[1][1])),
|
|
929
949
|
)
|
|
930
950
|
)
|
|
951
|
+
|
|
931
952
|
|
|
932
953
|
# Compute per-sumstat sector sizes (chrom → [min_pos, max_pos])
|
|
933
954
|
assoc_sector_sizes_list: list[dict] = []
|
|
@@ -941,7 +962,16 @@ def get_sumstats_and_merged_sector_list(
|
|
|
941
962
|
for chrom in assoc["CHR"].unique():
|
|
942
963
|
sub = assoc[assoc["CHR"] == chrom]
|
|
943
964
|
lo_val = max(sub["POS"].min() - 1_000_000, 0)
|
|
944
|
-
hi_val = sub["POS"].max()
|
|
965
|
+
hi_val = sub["POS"].max()
|
|
966
|
+
chrom_max = hi_val
|
|
967
|
+
|
|
968
|
+
# Ensure sector sizes are within chrom ranges if liftover
|
|
969
|
+
if liftover:
|
|
970
|
+
logger.info("Limiting sector sizes to chromosome ranges for liftover sumstats ...")
|
|
971
|
+
hg38_chr_lengths = {k.replace("chr",""): v for k, v in hg38_chr_lengths.items()}
|
|
972
|
+
chrom_max = hg38_chr_lengths[chrom]
|
|
973
|
+
|
|
974
|
+
hi_val = min(hi_val, chrom_max)
|
|
945
975
|
assoc_dic[str(chrom)] = [lo_val, hi_val]
|
|
946
976
|
|
|
947
977
|
min_dic_val = min(assoc_dic.values())
|
|
@@ -955,9 +985,9 @@ def get_sumstats_and_merged_sector_list(
|
|
|
955
985
|
|
|
956
986
|
# Add spacer sector for y-axis labelling
|
|
957
987
|
if min_dic_val is not None:
|
|
958
|
-
if len(labels) <= 5:
|
|
959
|
-
|
|
960
|
-
else:
|
|
961
|
-
|
|
988
|
+
#if len(labels) <= 5:
|
|
989
|
+
# merged["Spacer1"] = [x + x / 2 for x in min_dic_val]
|
|
990
|
+
#else:
|
|
991
|
+
merged["Spacer1"] = [x * 2 for x in min_dic_val]
|
|
962
992
|
|
|
963
|
-
return merged, sumstats_loaded, hits_table, signif_lines
|
|
993
|
+
return merged, sumstats_loaded, hits_table, signif_lines, pval_dict
|
|
@@ -380,6 +380,7 @@ def plot_circular(
|
|
|
380
380
|
annotate: str = None,
|
|
381
381
|
label_col: str = None,
|
|
382
382
|
chrom_label_side: str = 'inside',
|
|
383
|
+
chrom_label_size: float = 6,
|
|
383
384
|
signif_line: float = 5e-8,
|
|
384
385
|
highlight: bool = False,
|
|
385
386
|
highlight_thresh: float = 5e-8,
|
|
@@ -387,7 +388,6 @@ def plot_circular(
|
|
|
387
388
|
highlight_line: bool = False,
|
|
388
389
|
highlight_line_color: str = 'grey',
|
|
389
390
|
colors: list[str] = ['steelblue', 'grey'],
|
|
390
|
-
chrom_label_size: float = 6,
|
|
391
391
|
track_label_size: float = 6,
|
|
392
392
|
track_label_orientation: str = 'vertical',
|
|
393
393
|
hits_table: pd.DataFrame = None,
|
|
@@ -526,7 +526,8 @@ def plot_circular(
|
|
|
526
526
|
labels = list(sumstats_loaded.keys())
|
|
527
527
|
(
|
|
528
528
|
plt_name,
|
|
529
|
-
table_out
|
|
529
|
+
table_out,
|
|
530
|
+
plt_base,
|
|
530
531
|
) = get_output_paths(
|
|
531
532
|
labels,
|
|
532
533
|
mode='cm',
|
|
@@ -558,14 +559,16 @@ def plot_circular(
|
|
|
558
559
|
radii_reversed = dict(reversed(list(radii.items())))
|
|
559
560
|
|
|
560
561
|
inside_loc = r_min - 3
|
|
561
|
-
outside_loc =
|
|
562
|
-
chrom_label_loc = outside_loc if chrom_label_side == "outside" else inside_loc
|
|
562
|
+
outside_loc = r_max + 4
|
|
563
563
|
|
|
564
564
|
if annotate:
|
|
565
565
|
annot_key = next(iter(radii_reversed))
|
|
566
566
|
annot_r = radii_reversed.pop(annot_key)
|
|
567
|
+
outside_loc = max(list(radii_reversed.values())[0]) + 2
|
|
567
568
|
radii_reversed["annot_track_r"] = annot_r
|
|
568
569
|
|
|
570
|
+
chrom_label_loc = outside_loc if chrom_label_side == "outside" else inside_loc
|
|
571
|
+
|
|
569
572
|
for index, (sector_radius, sumstats_key, sumstats_value, signif_dict) in enumerate(
|
|
570
573
|
zip(
|
|
571
574
|
radii_reversed.values(),
|
|
@@ -647,7 +650,7 @@ def plot_circular(
|
|
|
647
650
|
x=pos,
|
|
648
651
|
label=str(label),
|
|
649
652
|
min_r=r_low,
|
|
650
|
-
max_r=r_low +
|
|
653
|
+
max_r=r_low + 6,
|
|
651
654
|
label_size=annotation_size,
|
|
652
655
|
text_kws={
|
|
653
656
|
"size": "large",
|
|
@@ -668,8 +671,9 @@ def plot_circular(
|
|
|
668
671
|
r=[sector_min_r, r_low],
|
|
669
672
|
start=pos,
|
|
670
673
|
end=pos,
|
|
674
|
+
alpha=0.4,
|
|
671
675
|
color=highlight_line_color,
|
|
672
|
-
lw=0.
|
|
676
|
+
lw=0.4,
|
|
673
677
|
ls="--",
|
|
674
678
|
)
|
|
675
679
|
|
|
@@ -709,6 +713,6 @@ def plot_circular(
|
|
|
709
713
|
|
|
710
714
|
if plt_name:
|
|
711
715
|
fig.savefig(fname=plt_name.lower(), dpi=dpi)
|
|
712
|
-
logger.info("Saved circular Manhattan plot: %s", plt_name)
|
|
716
|
+
logger.info("Saved circular Manhattan plot: %s", plt_name.lower())
|
|
713
717
|
|
|
714
718
|
return fig
|
|
@@ -100,7 +100,7 @@ def _cluster_annotations_by_chr(
|
|
|
100
100
|
return clusters
|
|
101
101
|
|
|
102
102
|
|
|
103
|
-
def
|
|
103
|
+
def _draw_annotation_arrows(
|
|
104
104
|
ax,
|
|
105
105
|
annot_df,
|
|
106
106
|
chr_col: str,
|
|
@@ -212,7 +212,7 @@ def _draw_annotation_arrows_2(
|
|
|
212
212
|
|
|
213
213
|
|
|
214
214
|
|
|
215
|
-
def
|
|
215
|
+
def _draw_annotation_arrows_2(
|
|
216
216
|
ax,
|
|
217
217
|
annot_df,
|
|
218
218
|
chr_col: str,
|
|
@@ -361,6 +361,118 @@ def _draw_annotation_arrows(
|
|
|
361
361
|
|
|
362
362
|
last_xtext = max(x_texts)
|
|
363
363
|
|
|
364
|
+
|
|
365
|
+
# Using cumulative distance for anntations and separating clusters
|
|
366
|
+
def _draw_annotation_arrows_3(
|
|
367
|
+
ax,
|
|
368
|
+
annot_df,
|
|
369
|
+
chr_col: str,
|
|
370
|
+
label_col: str,
|
|
371
|
+
offsets: dict,
|
|
372
|
+
chr_max: dict,
|
|
373
|
+
spread_width: float = 60e6,
|
|
374
|
+
isolation_threshold: float = 80e6,
|
|
375
|
+
stack_threshold: float = 10e6,
|
|
376
|
+
y_text_base: float = 0.55,
|
|
377
|
+
y_stack_step: float = 0.02,
|
|
378
|
+
max_rad: float = 0.35,
|
|
379
|
+
y_tip: float = 0.0,
|
|
380
|
+
) -> None:
|
|
381
|
+
|
|
382
|
+
annot_df = annot_df.sort_values(by=[chr_col, "x"], key=natsort_keygen())
|
|
383
|
+
last_xtext = 0 - spread_width
|
|
384
|
+
|
|
385
|
+
for chr_name, df_chr in annot_df.groupby(chr_col, sort=False):
|
|
386
|
+
df_chr = df_chr.sort_values("x")
|
|
387
|
+
chr_start = offsets[chr_name]
|
|
388
|
+
chr_end = offsets[chr_name] + chr_max[chr_name]
|
|
389
|
+
chr_range = chr_end - chr_start
|
|
390
|
+
|
|
391
|
+
x_signals = df_chr["x"].values
|
|
392
|
+
labels = df_chr[label_col].values
|
|
393
|
+
n = len(x_signals)
|
|
394
|
+
|
|
395
|
+
# ------------------------------------------------------------------
|
|
396
|
+
# Compute label x positions (spread or straight)
|
|
397
|
+
# ------------------------------------------------------------------
|
|
398
|
+
x_texts = []
|
|
399
|
+
for k, x_sig in enumerate(x_signals):
|
|
400
|
+
neighbours = np.delete(x_signals, k)
|
|
401
|
+
min_dist = np.min(np.abs(neighbours - x_sig)) if len(neighbours) else np.inf
|
|
402
|
+
|
|
403
|
+
if min_dist >= isolation_threshold:
|
|
404
|
+
x_texts.append(x_sig) # Tier 1: sit directly above
|
|
405
|
+
else:
|
|
406
|
+
x_texts.append(None) # Tier 2: needs spreading
|
|
407
|
+
|
|
408
|
+
spread_indices = [k for k, v in enumerate(x_texts) if v is None]
|
|
409
|
+
if spread_indices:
|
|
410
|
+
sw = spread_width
|
|
411
|
+
pad = sw / int(str(sw)[:2]) / 2
|
|
412
|
+
while sw > chr_range and sw > pad:
|
|
413
|
+
sw -= pad
|
|
414
|
+
|
|
415
|
+
sig_start = x_signals[spread_indices[0]]
|
|
416
|
+
xmin = sig_start - sw
|
|
417
|
+
positions = np.arange(xmin, xmin + len(spread_indices) * sw, sw)
|
|
418
|
+
|
|
419
|
+
while positions[0] <= last_xtext:
|
|
420
|
+
positions = positions + sw
|
|
421
|
+
|
|
422
|
+
for j, k in enumerate(spread_indices):
|
|
423
|
+
x_texts[k] = positions[j]
|
|
424
|
+
|
|
425
|
+
# ------------------------------------------------------------------
|
|
426
|
+
# Compute label y positions using cumulative x distance
|
|
427
|
+
# ------------------------------------------------------------------
|
|
428
|
+
y_texts = [y_text_base] * n
|
|
429
|
+
|
|
430
|
+
for k in range(1, n):
|
|
431
|
+
cum_dist = abs(x_texts[k] - x_texts[k - 1])
|
|
432
|
+
if cum_dist <= stack_threshold:
|
|
433
|
+
# too close to previous label — stack upward adaptively
|
|
434
|
+
y_texts[k] = y_texts[k - 1] + y_stack_step + (
|
|
435
|
+
y_stack_step * (1 - cum_dist / stack_threshold)
|
|
436
|
+
)
|
|
437
|
+
else:
|
|
438
|
+
y_texts[k] = y_text_base # far enough — reset to baseline
|
|
439
|
+
|
|
440
|
+
# ------------------------------------------------------------------
|
|
441
|
+
# Draw arrows and labels
|
|
442
|
+
# ------------------------------------------------------------------
|
|
443
|
+
for x_sig, x_txt, y_txt, label in zip(x_signals, x_texts, y_texts, labels):
|
|
444
|
+
dx = x_txt - x_sig
|
|
445
|
+
rad = np.clip(dx / (spread_width * 2), -max_rad, max_rad)
|
|
446
|
+
|
|
447
|
+
arrow = FancyArrowPatch(
|
|
448
|
+
(x_txt, y_txt),
|
|
449
|
+
(x_sig, y_tip - 0.05),
|
|
450
|
+
arrowstyle="-|>",
|
|
451
|
+
mutation_scale=12,
|
|
452
|
+
lw=0.6,
|
|
453
|
+
color="grey",
|
|
454
|
+
alpha=0.5,
|
|
455
|
+
connectionstyle=f"arc3,rad={rad}",
|
|
456
|
+
transform=ax.transData,
|
|
457
|
+
)
|
|
458
|
+
ax.add_patch(arrow)
|
|
459
|
+
|
|
460
|
+
ax.text(
|
|
461
|
+
x_txt,
|
|
462
|
+
y_txt + 0.02,
|
|
463
|
+
str(label),
|
|
464
|
+
rotation=45,
|
|
465
|
+
ha="left",
|
|
466
|
+
va="bottom",
|
|
467
|
+
fontsize=10,
|
|
468
|
+
clip_on=False,
|
|
469
|
+
color="black",
|
|
470
|
+
fontstyle="italic",
|
|
471
|
+
fontweight="regular",
|
|
472
|
+
)
|
|
473
|
+
|
|
474
|
+
last_xtext = max(x_texts)
|
|
475
|
+
|
|
364
476
|
# ---------------------------------------------------------------------------
|
|
365
477
|
# Public function
|
|
366
478
|
# ---------------------------------------------------------------------------
|
|
@@ -647,7 +759,8 @@ def plot_linearm(
|
|
|
647
759
|
# Annotation track
|
|
648
760
|
# ------------------------------------------------------------------
|
|
649
761
|
if annotate and annot_df is not None:
|
|
650
|
-
|
|
762
|
+
|
|
763
|
+
|
|
651
764
|
_draw_annotation_arrows(
|
|
652
765
|
ax_annot,
|
|
653
766
|
annot_df,
|
|
@@ -657,9 +770,10 @@ def plot_linearm(
|
|
|
657
770
|
chr_max=chr_max,
|
|
658
771
|
spread_width=60e6,
|
|
659
772
|
)
|
|
660
|
-
|
|
773
|
+
|
|
661
774
|
|
|
662
|
-
|
|
775
|
+
"""
|
|
776
|
+
_draw_annotation_arrows_2(
|
|
663
777
|
ax=ax_annot,
|
|
664
778
|
annot_df=annot_df,
|
|
665
779
|
chr_col=chr_col,
|
|
@@ -667,13 +781,31 @@ def plot_linearm(
|
|
|
667
781
|
offsets=offsets,
|
|
668
782
|
chr_max=chr_max,
|
|
669
783
|
spread_width=60e6,
|
|
670
|
-
isolation_threshold=
|
|
784
|
+
isolation_threshold=40e6, # above this → straight (Tier 1)
|
|
671
785
|
stack_threshold=10e6, # below this → stack (Tier 3)
|
|
672
786
|
max_tilt=45, # max angleA departure from vertical
|
|
673
787
|
y_tip=0.0,
|
|
674
788
|
y_text=0.55,
|
|
675
789
|
y_stack_step=0.12, # vertical gap between stacked labels
|
|
676
790
|
)
|
|
791
|
+
|
|
792
|
+
|
|
793
|
+
_draw_annotation_arrows_3(
|
|
794
|
+
ax=ax_annot,
|
|
795
|
+
annot_df=annot_df,
|
|
796
|
+
chr_col=chr_col,
|
|
797
|
+
label_col=label_col,
|
|
798
|
+
offsets=offsets,
|
|
799
|
+
chr_max=chr_max,
|
|
800
|
+
spread_width=60e6,
|
|
801
|
+
isolation_threshold=80e6,
|
|
802
|
+
stack_threshold=90e6,
|
|
803
|
+
y_text_base=0.55,
|
|
804
|
+
y_stack_step=0.03,
|
|
805
|
+
max_rad=0.35,
|
|
806
|
+
y_tip=0.0,
|
|
807
|
+
)
|
|
808
|
+
"""
|
|
677
809
|
|
|
678
810
|
ax_annot.set_ylim(0, 1)
|
|
679
811
|
ax_annot.axis("off")
|
|
@@ -859,12 +991,11 @@ def plot_linear(
|
|
|
859
991
|
logger.info("'SNP' column is used for annotation since '%s' column could not be resolved in hits table.", label_col)
|
|
860
992
|
pass
|
|
861
993
|
|
|
862
|
-
logger.info(f"LABEL COL: {label}")
|
|
863
|
-
|
|
864
994
|
# plot name
|
|
865
995
|
(
|
|
866
996
|
plt_name,
|
|
867
|
-
table_out
|
|
997
|
+
table_out,
|
|
998
|
+
plt_base,
|
|
868
999
|
) = get_output_paths(
|
|
869
1000
|
labels = t_labels,
|
|
870
1001
|
mode='lm',
|