pycmplot 0.1.9__tar.gz → 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pycmplot-0.2.0/PKG-INFO +228 -0
- {pycmplot-0.1.9 → pycmplot-0.2.0}/README.md +3 -0
- {pycmplot-0.1.9 → pycmplot-0.2.0}/docs/conf.py +1 -1
- pycmplot-0.2.0/pycmplot.egg-info/PKG-INFO +228 -0
- {pycmplot-0.1.9 → pycmplot-0.2.0}/pycmplot.egg-info/entry_points.txt +0 -1
- {pycmplot-0.1.9 → pycmplot-0.2.0}/pycmplot.egg-info/requires.txt +7 -0
- pycmplot-0.2.0/pycmplot.egg-info/top_level.txt +3 -0
- {pycmplot-0.1.9 → pycmplot-0.2.0}/pyproject.toml +8 -2
- {pycmplot-0.1.9 → pycmplot-0.2.0}/setup.cfg +2 -2
- pycmplot-0.1.9/PKG-INFO +0 -14
- pycmplot-0.1.9/pycmplot.egg-info/PKG-INFO +0 -14
- pycmplot-0.1.9/pycmplot.egg-info/top_level.txt +0 -1
- {pycmplot-0.1.9 → pycmplot-0.2.0}/LICENSE +0 -0
- {pycmplot-0.1.9 → pycmplot-0.2.0}/pycmplot/__init__.py +0 -0
- {pycmplot-0.1.9 → pycmplot-0.2.0}/pycmplot/_core.py +0 -0
- {pycmplot-0.1.9 → pycmplot-0.2.0}/pycmplot/annotation.py +0 -0
- {pycmplot-0.1.9 → pycmplot-0.2.0}/pycmplot/cli.py +0 -0
- {pycmplot-0.1.9 → pycmplot-0.2.0}/pycmplot/constants.py +0 -0
- {pycmplot-0.1.9 → pycmplot-0.2.0}/pycmplot/data/Homo_sapiens.GRCh37.geneinfo.tsv.gz +0 -0
- {pycmplot-0.1.9 → pycmplot-0.2.0}/pycmplot/data/Homo_sapiens.GRCh38.geneinfo.tsv.gz +0 -0
- {pycmplot-0.1.9 → pycmplot-0.2.0}/pycmplot/data/hg19ToHg38.over.chain +0 -0
- {pycmplot-0.1.9 → pycmplot-0.2.0}/pycmplot/io.py +0 -0
- {pycmplot-0.1.9 → pycmplot-0.2.0}/pycmplot/liftover.py +0 -0
- {pycmplot-0.1.9 → pycmplot-0.2.0}/pycmplot/plotting/circular.py +0 -0
- {pycmplot-0.1.9 → pycmplot-0.2.0}/pycmplot/plotting/linear.py +0 -0
- {pycmplot-0.1.9 → pycmplot-0.2.0}/pycmplot/resources.py +0 -0
- {pycmplot-0.1.9 → pycmplot-0.2.0}/pycmplot/stats.py +0 -0
- {pycmplot-0.1.9 → pycmplot-0.2.0}/pycmplot.egg-info/SOURCES.txt +0 -0
- {pycmplot-0.1.9 → pycmplot-0.2.0}/pycmplot.egg-info/dependency_links.txt +0 -0
- {pycmplot-0.1.9 → pycmplot-0.2.0}/pycmplot_docs/docs/conf.py +0 -0
- {pycmplot-0.1.9 → pycmplot-0.2.0}/pycmplot_docs/docstrings_annotation.py +0 -0
- {pycmplot-0.1.9 → pycmplot-0.2.0}/pycmplot_docs/docstrings_core_cli.py +0 -0
- {pycmplot-0.1.9 → pycmplot-0.2.0}/pycmplot_docs/docstrings_io.py +0 -0
- {pycmplot-0.1.9 → pycmplot-0.2.0}/pycmplot_docs/docstrings_liftover.py +0 -0
- {pycmplot-0.1.9 → pycmplot-0.2.0}/pycmplot_docs/docstrings_plotting.py +0 -0
- {pycmplot-0.1.9 → pycmplot-0.2.0}/pycmplot_docs/docstrings_resources_constants.py +0 -0
- {pycmplot-0.1.9 → pycmplot-0.2.0}/pycmplot_docs/docstrings_stats.py +0 -0
- {pycmplot-0.1.9 → pycmplot-0.2.0}/setup.py +0 -0
pycmplot-0.2.0/PKG-INFO
ADDED
|
@@ -0,0 +1,228 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: pycmplot
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: Multi-track circular and linear Manhattan plot generation for GWAS summary statistics
|
|
5
|
+
Author: Kevin Esoh
|
|
6
|
+
Author-email: Kevin Esoh <kesohku1@jh.edu>
|
|
7
|
+
License-Expression: CC-BY-NC-SA-4.0
|
|
8
|
+
Requires-Python: >=3.9
|
|
9
|
+
Description-Content-Type: text/markdown
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Requires-Dist: pandas>=1.5
|
|
12
|
+
Requires-Dist: numpy>=1.23
|
|
13
|
+
Requires-Dist: matplotlib>=3.6
|
|
14
|
+
Requires-Dist: pillow>=9.0
|
|
15
|
+
Requires-Dist: pycirclize>=0.6
|
|
16
|
+
Requires-Dist: natsort>=8.0
|
|
17
|
+
Requires-Dist: adjustText>=0.8
|
|
18
|
+
Requires-Dist: pyliftover>=0.4
|
|
19
|
+
Provides-Extra: dev
|
|
20
|
+
Requires-Dist: pytest; extra == "dev"
|
|
21
|
+
Requires-Dist: black; extra == "dev"
|
|
22
|
+
Requires-Dist: ruff; extra == "dev"
|
|
23
|
+
Requires-Dist: towncrier; extra == "dev"
|
|
24
|
+
Requires-Dist: sphinx; extra == "dev"
|
|
25
|
+
Dynamic: license-file
|
|
26
|
+
|
|
27
|
+
# pycmplot
|
|
28
|
+
|
|
29
|
+
Multi-track **circular** and **linear** Manhattan plot generation for GWAS summary statistics.
|
|
30
|
+
|
|
31
|
+
```
|
|
32
|
+
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
|
|
33
|
+
| PACKAGE FOR CIRCULAR AND LINEAR MANHATTAN PLOTTING |
|
|
34
|
+
| Kevin Esoh, 2026 |
|
|
35
|
+
| kesohku1@jh.edu |
|
|
36
|
+
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
This package will take any number of per SNP/variant summary statistics, be it GWAS,
|
|
40
|
+
selection scans (e.g. iHS, EHH, FST), etc and generate Manhattan plots. If given a single
|
|
41
|
+
file, a single one-track Manhattan plot will be generated. Multiple files will result in
|
|
42
|
+
the generation of a multi-track stacked Manhattan plot.
|
|
43
|
+
|
|
44
|
+
In the process, the package will generate a **hits summary table** for variants with p-value
|
|
45
|
+
(or whatever statistic for significance is used) below the user-specified significance threshold.
|
|
46
|
+
This hits summary table will contain annotated gene names, in addition to other annotations, that
|
|
47
|
+
would then be used to annotate the plots.
|
|
48
|
+
|
|
49
|
+
Importantly, the package allows for conversion of hg19 genomic coordinates to hg38 coordinates.
|
|
50
|
+
This ensures that summary stats obtained using different imputation panels, for instance, can be
|
|
51
|
+
processed in the same run. That is, users can simply concatenate multiple summary stats files together,
|
|
52
|
+
such as those for the same trait but analysed using different imputation panels. Users only need to
|
|
53
|
+
add a new column specifying the genome build (hg19 or hg38) of the variants. Then the `--build_column`
|
|
54
|
+
option of the package should be used to indicate the column and then the package will liftover all
|
|
55
|
+
postions in hg19 to hg38 ensuring that hits table generation and plotting are done with one unified
|
|
56
|
+
corrdinate system.
|
|
57
|
+
|
|
58
|
+
A key functionality of the package is its ability to auto-detect certain columns if ommited on the
|
|
59
|
+
command-line or python API:
|
|
60
|
+
- Chromosome column: `-chr, --chrom_column` or ommited
|
|
61
|
+
- Basepair position column: `-pos, --pos_column` or ommited
|
|
62
|
+
- SNP or Marker ID column: `-snp, --snp_column` or ommited
|
|
63
|
+
- P-value (or whatever value) column: `-p, --pval_column` or ommited
|
|
64
|
+
- Build version column: `-b, --build_column` or ommited
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
Candidate names for each of the columns is shown below.
|
|
68
|
+
|
|
69
|
+
```python
|
|
70
|
+
# Resolve column names
|
|
71
|
+
chr_candidates = [chrom, 'CHR', 'CHROM', 'Chromosome', '#CHROM', '#CHR', 'Chrom', 'chrom', 'chr', 'chromosome', '#chr', '#chrom']
|
|
72
|
+
pos_candidates = [pos, 'BP', 'POS', 'bp', 'pos', 'Basepair']
|
|
73
|
+
snp_candidates = [snp, 'SNP', 'RSID', 'rsID', 'MarkerName', 'MarkerID', 'Predictor', 'Marker', 'SNPID', 'ID']
|
|
74
|
+
pvl_candidates = [pcol, 'P', 'P-value', 'Wald_P', 'pvalue', 'p_val', 'pval']
|
|
75
|
+
bld_candidates = [build, 'BUILD', 'Genome', 'Genome_Build', 'Genome-build']
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
> NB: Upper and lower cases of the candidates are also considered, making each candidate expanded 3 times.
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
Since GWAS summary stats files can be very large, to improve speed and memory efficiency, it is
|
|
82
|
+
**highly recommended** to use `-tp, --trim_pval` with a value to exclude variants with p-value above a
|
|
83
|
+
certain threshold, e.g. `0.01 (1e-2)` or `0.001 (1e-3)`.
|
|
84
|
+
|
|
85
|
+
A potential useful application is **comparative visualization** of results from multiple imputation panels,
|
|
86
|
+
multiple populations, or multiple traits to observe shared genetic architecture.
|
|
87
|
+
|
|
88
|
+
---
|
|
89
|
+
|
|
90
|
+
## Installation
|
|
91
|
+
|
|
92
|
+
### From PyPI
|
|
93
|
+
```bash
|
|
94
|
+
pip install pycmplot
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
### From GitHub
|
|
99
|
+
```bash
|
|
100
|
+
git clone https://github.com/esohkevin/pycmplot.git
|
|
101
|
+
|
|
102
|
+
cd pycmplot
|
|
103
|
+
|
|
104
|
+
pip install -e .
|
|
105
|
+
|
|
106
|
+
# or
|
|
107
|
+
|
|
108
|
+
pip install -e . --break-system-packages
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
### Use python virtual environment if local installation is not possible
|
|
113
|
+
```bash
|
|
114
|
+
python -m venv ~/bin/pycmplot
|
|
115
|
+
|
|
116
|
+
source ~/bin/pycmplot/bin/activate
|
|
117
|
+
|
|
118
|
+
pip install --upgrade pip setuptools wheel
|
|
119
|
+
|
|
120
|
+
# then follow any of the installation steps above
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
# Test the installation
|
|
125
|
+
```bash
|
|
126
|
+
pycmplot -h
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
### Dependencies
|
|
130
|
+
|
|
131
|
+
| Package | Purpose |
|
|
132
|
+
|---------|---------|
|
|
133
|
+
| pandas, numpy | Data loading & statistics |
|
|
134
|
+
| matplotlib | Plotting backend |
|
|
135
|
+
| pycirclize | Circular (Circos-style) tracks |
|
|
136
|
+
| natsort | Natural chromosome sorting |
|
|
137
|
+
| adjustText | Label collision avoidance |
|
|
138
|
+
| pyliftover | hg19 to hg38 coordinate conversion |
|
|
139
|
+
| Pillow | Image utilities |
|
|
140
|
+
|
|
141
|
+
---
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
## Command-line usage
|
|
145
|
+
|
|
146
|
+
### Linear Manhattan (default)
|
|
147
|
+
|
|
148
|
+
```bash
|
|
149
|
+
pycmplot \
|
|
150
|
+
--sum_stats HbF.tsv.gz,MCV.txt.gz,MCH.tsv.gz \
|
|
151
|
+
--labels HbF,MCV,MCH \
|
|
152
|
+
--logp \
|
|
153
|
+
--signif_line \
|
|
154
|
+
--highlight \
|
|
155
|
+
--annotate GENE \
|
|
156
|
+
--output_dir ./results \
|
|
157
|
+
--output_format png \
|
|
158
|
+
--dpi 300
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
### Circular Manhattan
|
|
162
|
+
|
|
163
|
+
```bash
|
|
164
|
+
pycmplot \
|
|
165
|
+
--sum_stats HbF.tsv.gz,MCV.tsv.gz \
|
|
166
|
+
--labels HbF,MCV \
|
|
167
|
+
--mode cm \
|
|
168
|
+
--trim_pval 0.01 \
|
|
169
|
+
--logp \
|
|
170
|
+
--signif_threshold \
|
|
171
|
+
--plot_title "RBC Traits" \
|
|
172
|
+
--output_dir ./results
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
### Key options
|
|
176
|
+
|
|
177
|
+
| Flag | Description | Default |
|
|
178
|
+
|------|-------------|---------|
|
|
179
|
+
| `-s, --sum_stats` | Comma-separated sumstats files | **required** |
|
|
180
|
+
| `-l, --labels` | Comma-separated track labels | **required** |
|
|
181
|
+
| `-b, --build_column` | Genome build column name (containing hg18/hg19/hg38) | **required** |
|
|
182
|
+
| `-m, --mode` | `lm` linear or `cm` circular | `lm` |
|
|
183
|
+
| `-qq, --qq_plot` | Also generate a QQ-plot | off (coming soon...) |
|
|
184
|
+
| `--logp` | Plot -log10(p) | off |
|
|
185
|
+
| `-sig, --signif_threshold` | Genome-wide significance threshold | off (auto 0.05/N) |
|
|
186
|
+
| `-sigl, --signif_line` | Value for genome-wide significance line if different from `-sig` | 5e-8 |
|
|
187
|
+
| `-sug, --suggest_threshold` | Threshold for suggestive signals | off |
|
|
188
|
+
| `-hl, --highlight` | Highlight significant loci | off |
|
|
189
|
+
| `-a, --annotate` | Annotate with `SNP` or `GENE` | `SNP` |
|
|
190
|
+
| `-tp, --trim_pval` | Trim variants above this p-value for speed | off |
|
|
191
|
+
| `-st, --sort_track` | Sort tracks by `label` or `chrom_len` | input order |
|
|
192
|
+
| `-od, --output_dir` | Output directory | `.` |
|
|
193
|
+
| `-of, --output_format` | Output format (`png`, `pdf`, `svg`, `jpg`) | `png` |
|
|
194
|
+
|
|
195
|
+
Run `pycmplot -h` for the full option list.
|
|
196
|
+
|
|
197
|
+
---
|
|
198
|
+
|
|
199
|
+
## Python API
|
|
200
|
+
|
|
201
|
+
A demonstration of how to use the python API is provided in this notebook: https://github.com/esohkevin/pycmplot/blob/main/pycmplot_python_api.ipynb
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
---
|
|
205
|
+
|
|
206
|
+
## Package structure
|
|
207
|
+
|
|
208
|
+
```
|
|
209
|
+
pycmplot/
|
|
210
|
+
├── pyproject.toml
|
|
211
|
+
├── setup.py
|
|
212
|
+
├── setup.cfg
|
|
213
|
+
├── README.md
|
|
214
|
+
└── pycmplot/
|
|
215
|
+
├── __init__.py # public API exports
|
|
216
|
+
├── __main__.py # python -m pycmplot
|
|
217
|
+
├── _core.py # main() orchestration
|
|
218
|
+
├── cli.py # argparse definitions
|
|
219
|
+
├── constants.py # chromosome lengths, biotype weights
|
|
220
|
+
├── resources.py # external resource path config
|
|
221
|
+
├── io.py # sumstat loading, delimiter detection
|
|
222
|
+
├── stats.py # get_lead_snps, get_highlight_snps
|
|
223
|
+
├── liftover.py # lazy hg19→hg38 liftover
|
|
224
|
+
├── annotation.py # nearest-gene annotation, hits table
|
|
225
|
+
└── plotting/
|
|
226
|
+
├── __init__.py
|
|
227
|
+
├── linear.py # plot_linear
|
|
228
|
+
└── circular.py # plot_circular, compute_track_radii_dict
|
|
@@ -49,6 +49,9 @@ pvl_candidates = [pcol, 'P', 'P-value', 'Wald_P', 'pvalue', 'p_val', 'pval']
|
|
|
49
49
|
bld_candidates = [build, 'BUILD', 'Genome', 'Genome_Build', 'Genome-build']
|
|
50
50
|
```
|
|
51
51
|
|
|
52
|
+
> NB: Upper and lower cases of the candidates are also considered, making each candidate expanded 3 times.
|
|
53
|
+
|
|
54
|
+
|
|
52
55
|
Since GWAS summary stats files can be very large, to improve speed and memory efficiency, it is
|
|
53
56
|
**highly recommended** to use `-tp, --trim_pval` with a value to exclude variants with p-value above a
|
|
54
57
|
certain threshold, e.g. `0.01 (1e-2)` or `0.001 (1e-3)`.
|
|
@@ -12,7 +12,7 @@ sys.path.insert(0, os.path.abspath(".."))
|
|
|
12
12
|
project = "pycmplot"
|
|
13
13
|
copyright = "2026, Kevin Esoh"
|
|
14
14
|
author = "Kevin Esoh"
|
|
15
|
-
release = "0.
|
|
15
|
+
release = "0.2.0" # update to match your PyPI version
|
|
16
16
|
|
|
17
17
|
# -- General configuration -----------------------------------------------------
|
|
18
18
|
extensions = [
|
|
@@ -0,0 +1,228 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: pycmplot
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: Multi-track circular and linear Manhattan plot generation for GWAS summary statistics
|
|
5
|
+
Author: Kevin Esoh
|
|
6
|
+
Author-email: Kevin Esoh <kesohku1@jh.edu>
|
|
7
|
+
License-Expression: CC-BY-NC-SA-4.0
|
|
8
|
+
Requires-Python: >=3.9
|
|
9
|
+
Description-Content-Type: text/markdown
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Requires-Dist: pandas>=1.5
|
|
12
|
+
Requires-Dist: numpy>=1.23
|
|
13
|
+
Requires-Dist: matplotlib>=3.6
|
|
14
|
+
Requires-Dist: pillow>=9.0
|
|
15
|
+
Requires-Dist: pycirclize>=0.6
|
|
16
|
+
Requires-Dist: natsort>=8.0
|
|
17
|
+
Requires-Dist: adjustText>=0.8
|
|
18
|
+
Requires-Dist: pyliftover>=0.4
|
|
19
|
+
Provides-Extra: dev
|
|
20
|
+
Requires-Dist: pytest; extra == "dev"
|
|
21
|
+
Requires-Dist: black; extra == "dev"
|
|
22
|
+
Requires-Dist: ruff; extra == "dev"
|
|
23
|
+
Requires-Dist: towncrier; extra == "dev"
|
|
24
|
+
Requires-Dist: sphinx; extra == "dev"
|
|
25
|
+
Dynamic: license-file
|
|
26
|
+
|
|
27
|
+
# pycmplot
|
|
28
|
+
|
|
29
|
+
Multi-track **circular** and **linear** Manhattan plot generation for GWAS summary statistics.
|
|
30
|
+
|
|
31
|
+
```
|
|
32
|
+
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
|
|
33
|
+
| PACKAGE FOR CIRCULAR AND LINEAR MANHATTAN PLOTTING |
|
|
34
|
+
| Kevin Esoh, 2026 |
|
|
35
|
+
| kesohku1@jh.edu |
|
|
36
|
+
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
This package will take any number of per SNP/variant summary statistics, be it GWAS,
|
|
40
|
+
selection scans (e.g. iHS, EHH, FST), etc and generate Manhattan plots. If given a single
|
|
41
|
+
file, a single one-track Manhattan plot will be generated. Multiple files will result in
|
|
42
|
+
the generation of a multi-track stacked Manhattan plot.
|
|
43
|
+
|
|
44
|
+
In the process, the package will generate a **hits summary table** for variants with p-value
|
|
45
|
+
(or whatever statistic for significance is used) below the user-specified significance threshold.
|
|
46
|
+
This hits summary table will contain annotated gene names, in addition to other annotations, that
|
|
47
|
+
would then be used to annotate the plots.
|
|
48
|
+
|
|
49
|
+
Importantly, the package allows for conversion of hg19 genomic coordinates to hg38 coordinates.
|
|
50
|
+
This ensures that summary stats obtained using different imputation panels, for instance, can be
|
|
51
|
+
processed in the same run. That is, users can simply concatenate multiple summary stats files together,
|
|
52
|
+
such as those for the same trait but analysed using different imputation panels. Users only need to
|
|
53
|
+
add a new column specifying the genome build (hg19 or hg38) of the variants. Then the `--build_column`
|
|
54
|
+
option of the package should be used to indicate the column and then the package will liftover all
|
|
55
|
+
postions in hg19 to hg38 ensuring that hits table generation and plotting are done with one unified
|
|
56
|
+
corrdinate system.
|
|
57
|
+
|
|
58
|
+
A key functionality of the package is its ability to auto-detect certain columns if ommited on the
|
|
59
|
+
command-line or python API:
|
|
60
|
+
- Chromosome column: `-chr, --chrom_column` or ommited
|
|
61
|
+
- Basepair position column: `-pos, --pos_column` or ommited
|
|
62
|
+
- SNP or Marker ID column: `-snp, --snp_column` or ommited
|
|
63
|
+
- P-value (or whatever value) column: `-p, --pval_column` or ommited
|
|
64
|
+
- Build version column: `-b, --build_column` or ommited
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
Candidate names for each of the columns is shown below.
|
|
68
|
+
|
|
69
|
+
```python
|
|
70
|
+
# Resolve column names
|
|
71
|
+
chr_candidates = [chrom, 'CHR', 'CHROM', 'Chromosome', '#CHROM', '#CHR', 'Chrom', 'chrom', 'chr', 'chromosome', '#chr', '#chrom']
|
|
72
|
+
pos_candidates = [pos, 'BP', 'POS', 'bp', 'pos', 'Basepair']
|
|
73
|
+
snp_candidates = [snp, 'SNP', 'RSID', 'rsID', 'MarkerName', 'MarkerID', 'Predictor', 'Marker', 'SNPID', 'ID']
|
|
74
|
+
pvl_candidates = [pcol, 'P', 'P-value', 'Wald_P', 'pvalue', 'p_val', 'pval']
|
|
75
|
+
bld_candidates = [build, 'BUILD', 'Genome', 'Genome_Build', 'Genome-build']
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
> NB: Upper and lower cases of the candidates are also considered, making each candidate expanded 3 times.
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
Since GWAS summary stats files can be very large, to improve speed and memory efficiency, it is
|
|
82
|
+
**highly recommended** to use `-tp, --trim_pval` with a value to exclude variants with p-value above a
|
|
83
|
+
certain threshold, e.g. `0.01 (1e-2)` or `0.001 (1e-3)`.
|
|
84
|
+
|
|
85
|
+
A potential useful application is **comparative visualization** of results from multiple imputation panels,
|
|
86
|
+
multiple populations, or multiple traits to observe shared genetic architecture.
|
|
87
|
+
|
|
88
|
+
---
|
|
89
|
+
|
|
90
|
+
## Installation
|
|
91
|
+
|
|
92
|
+
### From PyPI
|
|
93
|
+
```bash
|
|
94
|
+
pip install pycmplot
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
### From GitHub
|
|
99
|
+
```bash
|
|
100
|
+
git clone https://github.com/esohkevin/pycmplot.git
|
|
101
|
+
|
|
102
|
+
cd pycmplot
|
|
103
|
+
|
|
104
|
+
pip install -e .
|
|
105
|
+
|
|
106
|
+
# or
|
|
107
|
+
|
|
108
|
+
pip install -e . --break-system-packages
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
### Use python virtual environment if local installation is not possible
|
|
113
|
+
```bash
|
|
114
|
+
python -m venv ~/bin/pycmplot
|
|
115
|
+
|
|
116
|
+
source ~/bin/pycmplot/bin/activate
|
|
117
|
+
|
|
118
|
+
pip install --upgrade pip setuptools wheel
|
|
119
|
+
|
|
120
|
+
# then follow any of the installation steps above
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
# Test the installation
|
|
125
|
+
```bash
|
|
126
|
+
pycmplot -h
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
### Dependencies
|
|
130
|
+
|
|
131
|
+
| Package | Purpose |
|
|
132
|
+
|---------|---------|
|
|
133
|
+
| pandas, numpy | Data loading & statistics |
|
|
134
|
+
| matplotlib | Plotting backend |
|
|
135
|
+
| pycirclize | Circular (Circos-style) tracks |
|
|
136
|
+
| natsort | Natural chromosome sorting |
|
|
137
|
+
| adjustText | Label collision avoidance |
|
|
138
|
+
| pyliftover | hg19 to hg38 coordinate conversion |
|
|
139
|
+
| Pillow | Image utilities |
|
|
140
|
+
|
|
141
|
+
---
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
## Command-line usage
|
|
145
|
+
|
|
146
|
+
### Linear Manhattan (default)
|
|
147
|
+
|
|
148
|
+
```bash
|
|
149
|
+
pycmplot \
|
|
150
|
+
--sum_stats HbF.tsv.gz,MCV.txt.gz,MCH.tsv.gz \
|
|
151
|
+
--labels HbF,MCV,MCH \
|
|
152
|
+
--logp \
|
|
153
|
+
--signif_line \
|
|
154
|
+
--highlight \
|
|
155
|
+
--annotate GENE \
|
|
156
|
+
--output_dir ./results \
|
|
157
|
+
--output_format png \
|
|
158
|
+
--dpi 300
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
### Circular Manhattan
|
|
162
|
+
|
|
163
|
+
```bash
|
|
164
|
+
pycmplot \
|
|
165
|
+
--sum_stats HbF.tsv.gz,MCV.tsv.gz \
|
|
166
|
+
--labels HbF,MCV \
|
|
167
|
+
--mode cm \
|
|
168
|
+
--trim_pval 0.01 \
|
|
169
|
+
--logp \
|
|
170
|
+
--signif_threshold \
|
|
171
|
+
--plot_title "RBC Traits" \
|
|
172
|
+
--output_dir ./results
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
### Key options
|
|
176
|
+
|
|
177
|
+
| Flag | Description | Default |
|
|
178
|
+
|------|-------------|---------|
|
|
179
|
+
| `-s, --sum_stats` | Comma-separated sumstats files | **required** |
|
|
180
|
+
| `-l, --labels` | Comma-separated track labels | **required** |
|
|
181
|
+
| `-b, --build_column` | Genome build column name (containing hg18/hg19/hg38) | **required** |
|
|
182
|
+
| `-m, --mode` | `lm` linear or `cm` circular | `lm` |
|
|
183
|
+
| `-qq, --qq_plot` | Also generate a QQ-plot | off (coming soon...) |
|
|
184
|
+
| `--logp` | Plot -log10(p) | off |
|
|
185
|
+
| `-sig, --signif_threshold` | Genome-wide significance threshold | off (auto 0.05/N) |
|
|
186
|
+
| `-sigl, --signif_line` | Value for genome-wide significance line if different from `-sig` | 5e-8 |
|
|
187
|
+
| `-sug, --suggest_threshold` | Threshold for suggestive signals | off |
|
|
188
|
+
| `-hl, --highlight` | Highlight significant loci | off |
|
|
189
|
+
| `-a, --annotate` | Annotate with `SNP` or `GENE` | `SNP` |
|
|
190
|
+
| `-tp, --trim_pval` | Trim variants above this p-value for speed | off |
|
|
191
|
+
| `-st, --sort_track` | Sort tracks by `label` or `chrom_len` | input order |
|
|
192
|
+
| `-od, --output_dir` | Output directory | `.` |
|
|
193
|
+
| `-of, --output_format` | Output format (`png`, `pdf`, `svg`, `jpg`) | `png` |
|
|
194
|
+
|
|
195
|
+
Run `pycmplot -h` for the full option list.
|
|
196
|
+
|
|
197
|
+
---
|
|
198
|
+
|
|
199
|
+
## Python API
|
|
200
|
+
|
|
201
|
+
A demonstration of how to use the python API is provided in this notebook: https://github.com/esohkevin/pycmplot/blob/main/pycmplot_python_api.ipynb
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
---
|
|
205
|
+
|
|
206
|
+
## Package structure
|
|
207
|
+
|
|
208
|
+
```
|
|
209
|
+
pycmplot/
|
|
210
|
+
├── pyproject.toml
|
|
211
|
+
├── setup.py
|
|
212
|
+
├── setup.cfg
|
|
213
|
+
├── README.md
|
|
214
|
+
└── pycmplot/
|
|
215
|
+
├── __init__.py # public API exports
|
|
216
|
+
├── __main__.py # python -m pycmplot
|
|
217
|
+
├── _core.py # main() orchestration
|
|
218
|
+
├── cli.py # argparse definitions
|
|
219
|
+
├── constants.py # chromosome lengths, biotype weights
|
|
220
|
+
├── resources.py # external resource path config
|
|
221
|
+
├── io.py # sumstat loading, delimiter detection
|
|
222
|
+
├── stats.py # get_lead_snps, get_highlight_snps
|
|
223
|
+
├── liftover.py # lazy hg19→hg38 liftover
|
|
224
|
+
├── annotation.py # nearest-gene annotation, hits table
|
|
225
|
+
└── plotting/
|
|
226
|
+
├── __init__.py
|
|
227
|
+
├── linear.py # plot_linear
|
|
228
|
+
└── circular.py # plot_circular, compute_track_radii_dict
|
|
@@ -4,10 +4,10 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "pycmplot"
|
|
7
|
-
version = "0.
|
|
7
|
+
version = "0.2.0"
|
|
8
8
|
description = "Multi-track circular and linear Manhattan plot generation for GWAS summary statistics"
|
|
9
9
|
readme = "README.md"
|
|
10
|
-
license =
|
|
10
|
+
license = "CC-BY-NC-SA-4.0"
|
|
11
11
|
authors = [{ name = "Kevin Esoh", email = "kesohku1@jh.edu" }]
|
|
12
12
|
requires-python = ">=3.9"
|
|
13
13
|
|
|
@@ -33,6 +33,12 @@ pycmplot = "pycmplot._core:main"
|
|
|
33
33
|
|
|
34
34
|
[tool.setuptools.packages.find]
|
|
35
35
|
where = ["."]
|
|
36
|
+
exclude = [
|
|
37
|
+
"dist*", "changelog.d*", "build-pypi-package.md",
|
|
38
|
+
"pycmplot_python_api.ipynb", "mit-license.txt",
|
|
39
|
+
"pycmplot_docs", "docs"
|
|
40
|
+
]
|
|
41
|
+
|
|
36
42
|
|
|
37
43
|
[tool.towncrier]
|
|
38
44
|
package = "pycmplot"
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
[metadata]
|
|
2
2
|
name = pycmplot
|
|
3
|
-
version = 0.
|
|
3
|
+
version = 0.2.0
|
|
4
4
|
author = Kevin Esoh
|
|
5
|
-
author_email = kesohku1@
|
|
5
|
+
author_email = kesohku1@jh.edu
|
|
6
6
|
description = Multi-track circular and linear Manhattan plot generation for GWAS summary statistics
|
|
7
7
|
|
|
8
8
|
[options]
|
pycmplot-0.1.9/PKG-INFO
DELETED
|
@@ -1,14 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.1
|
|
2
|
-
Name: pycmplot
|
|
3
|
-
Version: 0.1.9
|
|
4
|
-
Summary: Multi-track circular and linear Manhattan plot generation for GWAS summary statistics
|
|
5
|
-
Home-page: UNKNOWN
|
|
6
|
-
Author: Kevin Esoh
|
|
7
|
-
Author-email: kesohku1@jhmi.edu
|
|
8
|
-
License: UNKNOWN
|
|
9
|
-
Platform: UNKNOWN
|
|
10
|
-
Requires-Python: >=3.9
|
|
11
|
-
License-File: LICENSE
|
|
12
|
-
|
|
13
|
-
UNKNOWN
|
|
14
|
-
|
|
@@ -1,14 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.1
|
|
2
|
-
Name: pycmplot
|
|
3
|
-
Version: 0.1.9
|
|
4
|
-
Summary: Multi-track circular and linear Manhattan plot generation for GWAS summary statistics
|
|
5
|
-
Home-page: UNKNOWN
|
|
6
|
-
Author: Kevin Esoh
|
|
7
|
-
Author-email: kesohku1@jhmi.edu
|
|
8
|
-
License: UNKNOWN
|
|
9
|
-
Platform: UNKNOWN
|
|
10
|
-
Requires-Python: >=3.9
|
|
11
|
-
License-File: LICENSE
|
|
12
|
-
|
|
13
|
-
UNKNOWN
|
|
14
|
-
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
pycmplot
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|