pylocuszoom 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pylocuszoom-0.1.0/.github/workflows/ci.yml +71 -0
- pylocuszoom-0.1.0/.github/workflows/publish.yml +23 -0
- pylocuszoom-0.1.0/.gitignore +25 -0
- pylocuszoom-0.1.0/LICENSE.md +17 -0
- pylocuszoom-0.1.0/PKG-INFO +367 -0
- pylocuszoom-0.1.0/README.md +325 -0
- pylocuszoom-0.1.0/docs/ARCHITECTURE.md +174 -0
- pylocuszoom-0.1.0/examples/getting_started.ipynb +351 -0
- pylocuszoom-0.1.0/logo.svg +56 -0
- pylocuszoom-0.1.0/pyproject.toml +73 -0
- pylocuszoom-0.1.0/src/pylocuszoom/__init__.py +120 -0
- pylocuszoom-0.1.0/src/pylocuszoom/backends/__init__.py +52 -0
- pylocuszoom-0.1.0/src/pylocuszoom/backends/base.py +341 -0
- pylocuszoom-0.1.0/src/pylocuszoom/backends/bokeh_backend.py +441 -0
- pylocuszoom-0.1.0/src/pylocuszoom/backends/matplotlib_backend.py +288 -0
- pylocuszoom-0.1.0/src/pylocuszoom/backends/plotly_backend.py +474 -0
- pylocuszoom-0.1.0/src/pylocuszoom/colors.py +107 -0
- pylocuszoom-0.1.0/src/pylocuszoom/eqtl.py +218 -0
- pylocuszoom-0.1.0/src/pylocuszoom/gene_track.py +311 -0
- pylocuszoom-0.1.0/src/pylocuszoom/labels.py +118 -0
- pylocuszoom-0.1.0/src/pylocuszoom/ld.py +209 -0
- pylocuszoom-0.1.0/src/pylocuszoom/logging.py +153 -0
- pylocuszoom-0.1.0/src/pylocuszoom/plotter.py +733 -0
- pylocuszoom-0.1.0/src/pylocuszoom/recombination.py +432 -0
- pylocuszoom-0.1.0/src/pylocuszoom/reference_data/__init__.py +4 -0
- pylocuszoom-0.1.0/src/pylocuszoom/utils.py +194 -0
- pylocuszoom-0.1.0/tests/conftest.py +60 -0
- pylocuszoom-0.1.0/tests/test_colors.py +110 -0
- pylocuszoom-0.1.0/tests/test_gene_track.py +200 -0
- pylocuszoom-0.1.0/tests/test_labels.py +167 -0
- pylocuszoom-0.1.0/tests/test_ld.py +276 -0
- pylocuszoom-0.1.0/tests/test_logging.py +52 -0
- pylocuszoom-0.1.0/tests/test_plotter.py +257 -0
- pylocuszoom-0.1.0/tests/test_recombination.py +177 -0
- pylocuszoom-0.1.0/uv.lock +1817 -0
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [main]
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
lint:
|
|
11
|
+
runs-on: ubuntu-latest
|
|
12
|
+
steps:
|
|
13
|
+
- uses: actions/checkout@v4
|
|
14
|
+
|
|
15
|
+
- name: Install uv
|
|
16
|
+
uses: astral-sh/setup-uv@v5
|
|
17
|
+
|
|
18
|
+
- name: Set up Python
|
|
19
|
+
run: uv python install 3.11
|
|
20
|
+
|
|
21
|
+
- name: Run ruff check
|
|
22
|
+
run: uvx ruff check src tests
|
|
23
|
+
|
|
24
|
+
- name: Run ruff format check
|
|
25
|
+
run: uvx ruff format --check src tests
|
|
26
|
+
|
|
27
|
+
test:
|
|
28
|
+
runs-on: ubuntu-latest
|
|
29
|
+
strategy:
|
|
30
|
+
fail-fast: false
|
|
31
|
+
matrix:
|
|
32
|
+
python-version: ["3.9", "3.10", "3.11", "3.12"]
|
|
33
|
+
|
|
34
|
+
steps:
|
|
35
|
+
- uses: actions/checkout@v4
|
|
36
|
+
|
|
37
|
+
- name: Install uv
|
|
38
|
+
uses: astral-sh/setup-uv@v5
|
|
39
|
+
|
|
40
|
+
- name: Set up Python ${{ matrix.python-version }}
|
|
41
|
+
run: uv python install ${{ matrix.python-version }}
|
|
42
|
+
|
|
43
|
+
- name: Install dependencies
|
|
44
|
+
run: uv sync --extra dev --extra all
|
|
45
|
+
|
|
46
|
+
- name: Run tests
|
|
47
|
+
run: uv run pytest --cov=pylocuszoom --cov-report=xml
|
|
48
|
+
|
|
49
|
+
- name: Upload coverage
|
|
50
|
+
uses: codecov/codecov-action@v4
|
|
51
|
+
if: matrix.python-version == '3.11'
|
|
52
|
+
with:
|
|
53
|
+
files: ./coverage.xml
|
|
54
|
+
fail_ci_if_error: false
|
|
55
|
+
|
|
56
|
+
build:
|
|
57
|
+
runs-on: ubuntu-latest
|
|
58
|
+
steps:
|
|
59
|
+
- uses: actions/checkout@v4
|
|
60
|
+
|
|
61
|
+
- name: Install uv
|
|
62
|
+
uses: astral-sh/setup-uv@v5
|
|
63
|
+
|
|
64
|
+
- name: Build package
|
|
65
|
+
run: uv build
|
|
66
|
+
|
|
67
|
+
- name: Upload artifacts
|
|
68
|
+
uses: actions/upload-artifact@v4
|
|
69
|
+
with:
|
|
70
|
+
name: dist
|
|
71
|
+
path: dist/
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
name: Publish to PyPI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
release:
|
|
5
|
+
types: [published]
|
|
6
|
+
|
|
7
|
+
jobs:
|
|
8
|
+
publish:
|
|
9
|
+
runs-on: ubuntu-latest
|
|
10
|
+
environment: pypi
|
|
11
|
+
permissions:
|
|
12
|
+
id-token: write
|
|
13
|
+
steps:
|
|
14
|
+
- uses: actions/checkout@v4
|
|
15
|
+
|
|
16
|
+
- name: Install uv
|
|
17
|
+
uses: astral-sh/setup-uv@v5
|
|
18
|
+
|
|
19
|
+
- name: Build package
|
|
20
|
+
run: uv build
|
|
21
|
+
|
|
22
|
+
- name: Publish to PyPI
|
|
23
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*.egg-info/
|
|
5
|
+
dist/
|
|
6
|
+
build/
|
|
7
|
+
|
|
8
|
+
# Virtual environments
|
|
9
|
+
.venv/
|
|
10
|
+
|
|
11
|
+
# Testing/linting caches
|
|
12
|
+
.pytest_cache/
|
|
13
|
+
.ruff_cache/
|
|
14
|
+
.benchmarks/
|
|
15
|
+
.coverage
|
|
16
|
+
htmlcov/
|
|
17
|
+
|
|
18
|
+
# IDE
|
|
19
|
+
.idea/
|
|
20
|
+
.vscode/
|
|
21
|
+
*.swp
|
|
22
|
+
.claude/
|
|
23
|
+
|
|
24
|
+
# Project instructions (private)
|
|
25
|
+
CLAUDE.md
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
GNU GENERAL PUBLIC LICENSE
|
|
2
|
+
Version 3, 29 June 2007
|
|
3
|
+
|
|
4
|
+
Copyright (C) 2026 Michael Denyer
|
|
5
|
+
|
|
6
|
+
This program is free software: you can redistribute it and/or modify
|
|
7
|
+
it under the terms of the GNU General Public License as published by
|
|
8
|
+
the Free Software Foundation, either version 3 of the License, or
|
|
9
|
+
(at your option) any later version.
|
|
10
|
+
|
|
11
|
+
This program is distributed in the hope that it will be useful,
|
|
12
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
13
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
14
|
+
GNU General Public License for more details.
|
|
15
|
+
|
|
16
|
+
You should have received a copy of the GNU General Public License
|
|
17
|
+
along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
@@ -0,0 +1,367 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: pylocuszoom
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Regional association plots for GWAS results with LD coloring, gene tracks, and recombination rate overlays
|
|
5
|
+
Project-URL: Homepage, https://github.com/michael-denyer/pylocuszoom
|
|
6
|
+
Project-URL: Documentation, https://github.com/michael-denyer/pylocuszoom#readme
|
|
7
|
+
Project-URL: Repository, https://github.com/michael-denyer/pylocuszoom
|
|
8
|
+
Author: Michael Denyer
|
|
9
|
+
License-Expression: GPL-3.0-or-later
|
|
10
|
+
License-File: LICENSE.md
|
|
11
|
+
Keywords: genetics,gwas,locus-zoom,locuszoom,regional-plot,visualization
|
|
12
|
+
Classifier: Development Status :: 3 - Alpha
|
|
13
|
+
Classifier: Intended Audience :: Science/Research
|
|
14
|
+
Classifier: License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
+
Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
|
|
20
|
+
Classifier: Topic :: Scientific/Engineering :: Visualization
|
|
21
|
+
Requires-Python: >=3.10
|
|
22
|
+
Requires-Dist: bokeh>=3.8.2
|
|
23
|
+
Requires-Dist: kaleido>=0.2.0
|
|
24
|
+
Requires-Dist: loguru>=0.7.0
|
|
25
|
+
Requires-Dist: matplotlib>=3.5.0
|
|
26
|
+
Requires-Dist: numpy>=1.21.0
|
|
27
|
+
Requires-Dist: pandas>=1.4.0
|
|
28
|
+
Requires-Dist: plotly>=5.0.0
|
|
29
|
+
Requires-Dist: pyliftover>=0.4
|
|
30
|
+
Provides-Extra: all
|
|
31
|
+
Requires-Dist: adjusttext>=0.8; extra == 'all'
|
|
32
|
+
Requires-Dist: pyspark>=3.0.0; extra == 'all'
|
|
33
|
+
Provides-Extra: dev
|
|
34
|
+
Requires-Dist: pytest-cov>=4.0.0; extra == 'dev'
|
|
35
|
+
Requires-Dist: pytest>=7.0.0; extra == 'dev'
|
|
36
|
+
Requires-Dist: ruff>=0.1.0; extra == 'dev'
|
|
37
|
+
Provides-Extra: labels
|
|
38
|
+
Requires-Dist: adjusttext>=0.8; extra == 'labels'
|
|
39
|
+
Provides-Extra: spark
|
|
40
|
+
Requires-Dist: pyspark>=3.0.0; extra == 'spark'
|
|
41
|
+
Description-Content-Type: text/markdown
|
|
42
|
+
|
|
43
|
+
# pyLocusZoom
|
|
44
|
+
|
|
45
|
+
[](https://github.com/michael-denyer/pyLocusZoom/actions/workflows/ci.yml)
|
|
46
|
+
[](https://www.gnu.org/licenses/gpl-3.0)
|
|
47
|
+
[](https://www.python.org/downloads/)
|
|
48
|
+
[](https://github.com/astral-sh/ruff)
|
|
49
|
+
|
|
50
|
+
[](https://matplotlib.org/)
|
|
51
|
+
[](https://plotly.com/python/)
|
|
52
|
+
[](https://bokeh.org/)
|
|
53
|
+
[](https://pandas.pydata.org/)
|
|
54
|
+
|
|
55
|
+
<img src="logo.svg" alt="pyLocusZoom logo" width="120" align="right">
|
|
56
|
+
|
|
57
|
+
Regional association plots for GWAS results with LD coloring, gene tracks, and recombination rate overlays.
|
|
58
|
+
|
|
59
|
+
Inspired by [LocusZoom](http://locuszoom.org/) and [locuszoomr](https://github.com/myles-lewis/locuszoomr).
|
|
60
|
+
|
|
61
|
+
## Features
|
|
62
|
+
|
|
63
|
+
- **LD coloring**: SNPs colored by linkage disequilibrium (R²) with lead variant
|
|
64
|
+
- **Gene track**: Annotated gene/exon positions below the association plot
|
|
65
|
+
- **Recombination rate**: Overlay showing recombination rate across region (*Canis lupus familiaris* only)
|
|
66
|
+
- **SNP labels**: Automatic labeling of top SNPs with RS ID or nearest gene
|
|
67
|
+
- **Species support**: Built-in *Canis lupus familiaris* (CanFam3.1/CanFam4), *Felis catus* (FelCat9), or custom species
|
|
68
|
+
- **CanFam4 support**: Automatic coordinate liftover for recombination maps
|
|
69
|
+
- **Multiple backends**: matplotlib (static), plotly (interactive), bokeh (dashboards)
|
|
70
|
+
- **Stacked plots**: Compare multiple GWAS/phenotypes vertically
|
|
71
|
+
- **eQTL overlay**: Expression QTL data as separate panel
|
|
72
|
+
- **PySpark support**: Handles large-scale genomics DataFrames
|
|
73
|
+
|
|
74
|
+
## Installation
|
|
75
|
+
|
|
76
|
+
```bash
|
|
77
|
+
uv add pylocuszoom
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
Or with pip:
|
|
81
|
+
|
|
82
|
+
```bash
|
|
83
|
+
pip install pylocuszoom
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
## Quick Start
|
|
87
|
+
|
|
88
|
+
```python
|
|
89
|
+
from pylocuszoom import LocusZoomPlotter
|
|
90
|
+
|
|
91
|
+
# Initialize plotter (loads reference data for dog)
|
|
92
|
+
plotter = LocusZoomPlotter(species="dog")
|
|
93
|
+
|
|
94
|
+
# Create regional plot
|
|
95
|
+
fig = plotter.plot(
|
|
96
|
+
gwas_df, # DataFrame with ps, p_wald, rs columns
|
|
97
|
+
chrom=1,
|
|
98
|
+
start=1000000,
|
|
99
|
+
end=2000000,
|
|
100
|
+
lead_pos=1500000, # Highlight lead SNP
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
fig.savefig("regional_plot.png", dpi=150)
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
## Full Example
|
|
107
|
+
|
|
108
|
+
```python
|
|
109
|
+
from pylocuszoom import LocusZoomPlotter
|
|
110
|
+
|
|
111
|
+
plotter = LocusZoomPlotter(
|
|
112
|
+
species="dog", # or "cat", or None for custom
|
|
113
|
+
plink_path="/path/to/plink", # Optional, auto-detects if on PATH
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
fig = plotter.plot(
|
|
117
|
+
gwas_df,
|
|
118
|
+
chrom=1,
|
|
119
|
+
start=1000000,
|
|
120
|
+
end=2000000,
|
|
121
|
+
lead_pos=1500000,
|
|
122
|
+
ld_reference_file="genotypes.bed", # For LD calculation
|
|
123
|
+
genes_df=genes_df, # Gene annotations
|
|
124
|
+
exons_df=exons_df, # Exon annotations
|
|
125
|
+
show_recombination=True, # Overlay recombination rate
|
|
126
|
+
snp_labels=True, # Label top SNPs
|
|
127
|
+
label_top_n=5, # How many to label
|
|
128
|
+
pos_col="ps", # Column name for position
|
|
129
|
+
p_col="p_wald", # Column name for p-value
|
|
130
|
+
rs_col="rs", # Column name for SNP ID
|
|
131
|
+
figsize=(12, 8),
|
|
132
|
+
)
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
## Genome Builds
|
|
136
|
+
|
|
137
|
+
The default genome build for dog is CanFam3.1. For CanFam4 data:
|
|
138
|
+
|
|
139
|
+
```python
|
|
140
|
+
plotter = LocusZoomPlotter(species="dog", genome_build="canfam4")
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
Recombination maps are automatically lifted over from CanFam3.1 to CanFam4 coordinates using the UCSC liftOver chain file.
|
|
144
|
+
|
|
145
|
+
## Using with Other Species
|
|
146
|
+
|
|
147
|
+
```python
|
|
148
|
+
# Cat (LD and gene tracks, user provides recombination data)
|
|
149
|
+
plotter = LocusZoomPlotter(species="cat")
|
|
150
|
+
|
|
151
|
+
# Custom species (provide all reference data)
|
|
152
|
+
plotter = LocusZoomPlotter(
|
|
153
|
+
species=None,
|
|
154
|
+
recomb_data_dir="/path/to/recomb_maps/",
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
# Or provide data per-plot
|
|
158
|
+
fig = plotter.plot(
|
|
159
|
+
gwas_df,
|
|
160
|
+
chrom=1, start=1000000, end=2000000,
|
|
161
|
+
recomb_df=my_recomb_dataframe,
|
|
162
|
+
genes_df=my_genes_df,
|
|
163
|
+
)
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
## Interactive Backends
|
|
167
|
+
|
|
168
|
+
Choose between static (matplotlib) and interactive (plotly, bokeh) outputs:
|
|
169
|
+
|
|
170
|
+
```python
|
|
171
|
+
# Static publication-quality plot (default)
|
|
172
|
+
plotter = LocusZoomPlotter(species="dog", backend="matplotlib")
|
|
173
|
+
fig = plotter.plot(gwas_df, chrom=1, start=1000000, end=2000000)
|
|
174
|
+
fig.savefig("plot.png", dpi=150)
|
|
175
|
+
|
|
176
|
+
# Interactive with plotly (hover tooltips, zoom/pan)
|
|
177
|
+
plotter = LocusZoomPlotter(species="dog", backend="plotly")
|
|
178
|
+
fig = plotter.plot(gwas_df, chrom=1, start=1000000, end=2000000)
|
|
179
|
+
fig.write_html("plot.html")
|
|
180
|
+
|
|
181
|
+
# Interactive with bokeh (dashboard-friendly)
|
|
182
|
+
plotter = LocusZoomPlotter(species="dog", backend="bokeh")
|
|
183
|
+
fig = plotter.plot(gwas_df, chrom=1, start=1000000, end=2000000)
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
Interactive plots show SNP details (RS ID, p-value, R²) on hover.
|
|
187
|
+
|
|
188
|
+
## Stacked Plots
|
|
189
|
+
|
|
190
|
+
Compare multiple GWAS results vertically with shared x-axis:
|
|
191
|
+
|
|
192
|
+
```python
|
|
193
|
+
fig = plotter.plot_stacked(
|
|
194
|
+
[gwas_height, gwas_bmi, gwas_whr],
|
|
195
|
+
chrom=1,
|
|
196
|
+
start=1000000,
|
|
197
|
+
end=2000000,
|
|
198
|
+
panel_labels=["Height", "BMI", "WHR"],
|
|
199
|
+
genes_df=genes_df,
|
|
200
|
+
)
|
|
201
|
+
```
|
|
202
|
+
|
|
203
|
+
## eQTL Overlay
|
|
204
|
+
|
|
205
|
+
Add expression QTL data as a separate panel:
|
|
206
|
+
|
|
207
|
+
```python
|
|
208
|
+
eqtl_df = pd.DataFrame({
|
|
209
|
+
"pos": [1000500, 1001200, 1002000],
|
|
210
|
+
"p_value": [1e-6, 1e-4, 0.01],
|
|
211
|
+
"gene": ["BRCA1", "BRCA1", "BRCA1"],
|
|
212
|
+
})
|
|
213
|
+
|
|
214
|
+
fig = plotter.plot_stacked(
|
|
215
|
+
[gwas_df],
|
|
216
|
+
chrom=1, start=1000000, end=2000000,
|
|
217
|
+
eqtl_df=eqtl_df,
|
|
218
|
+
eqtl_gene="BRCA1",
|
|
219
|
+
genes_df=genes_df,
|
|
220
|
+
)
|
|
221
|
+
```
|
|
222
|
+
|
|
223
|
+
## PySpark Support
|
|
224
|
+
|
|
225
|
+
For large-scale genomics data, pass PySpark DataFrames directly:
|
|
226
|
+
|
|
227
|
+
```python
|
|
228
|
+
from pylocuszoom import LocusZoomPlotter, to_pandas
|
|
229
|
+
|
|
230
|
+
# PySpark DataFrame (automatically converted)
|
|
231
|
+
fig = plotter.plot(spark_gwas_df, chrom=1, start=1000000, end=2000000)
|
|
232
|
+
|
|
233
|
+
# Or convert manually with sampling for very large data
|
|
234
|
+
pandas_df = to_pandas(spark_gwas_df, sample_size=100000)
|
|
235
|
+
```
|
|
236
|
+
|
|
237
|
+
Install PySpark support: `uv add pylocuszoom[spark]`
|
|
238
|
+
|
|
239
|
+
## Data Formats
|
|
240
|
+
|
|
241
|
+
### GWAS Results DataFrame
|
|
242
|
+
|
|
243
|
+
Required columns (names configurable via `pos_col`, `p_col`, `rs_col`):
|
|
244
|
+
|
|
245
|
+
| Column | Type | Required | Description |
|
|
246
|
+
|--------|------|----------|-------------|
|
|
247
|
+
| `ps` | int | Yes | Genomic position in base pairs (1-based). Must match coordinate system of genes/recombination data. |
|
|
248
|
+
| `p_wald` | float | Yes | Association p-value (0 < p ≤ 1). Values are -log10 transformed for plotting. |
|
|
249
|
+
| `rs` | str | No | SNP identifier (e.g., "rs12345" or "chr1:12345"). Used for labeling top SNPs if `snp_labels=True`. |
|
|
250
|
+
|
|
251
|
+
Example:
|
|
252
|
+
```python
|
|
253
|
+
gwas_df = pd.DataFrame({
|
|
254
|
+
"ps": [1000000, 1000500, 1001000],
|
|
255
|
+
"p_wald": [1e-8, 1e-6, 0.05],
|
|
256
|
+
"rs": ["rs123", "rs456", "rs789"],
|
|
257
|
+
})
|
|
258
|
+
```
|
|
259
|
+
|
|
260
|
+
### Genes DataFrame
|
|
261
|
+
|
|
262
|
+
| Column | Type | Required | Description |
|
|
263
|
+
|--------|------|----------|-------------|
|
|
264
|
+
| `chr` | str or int | Yes | Chromosome identifier. Accepts "1", "chr1", or 1. The "chr" prefix is stripped for matching. |
|
|
265
|
+
| `start` | int | Yes | Gene start position (bp, 1-based). Transcript start for strand-aware genes. |
|
|
266
|
+
| `end` | int | Yes | Gene end position (bp, 1-based). Must be ≥ start. |
|
|
267
|
+
| `gene_name` | str | Yes | Gene symbol displayed in track (e.g., "BRCA1", "TP53"). Keep short for readability. |
|
|
268
|
+
|
|
269
|
+
Example:
|
|
270
|
+
```python
|
|
271
|
+
genes_df = pd.DataFrame({
|
|
272
|
+
"chr": ["1", "1", "1"],
|
|
273
|
+
"start": [1000000, 1050000, 1100000],
|
|
274
|
+
"end": [1020000, 1080000, 1150000],
|
|
275
|
+
"gene_name": ["GENE1", "GENE2", "GENE3"],
|
|
276
|
+
})
|
|
277
|
+
```
|
|
278
|
+
|
|
279
|
+
### Exons DataFrame (optional)
|
|
280
|
+
|
|
281
|
+
Provides exon/intron structure. If omitted, genes are drawn as simple rectangles.
|
|
282
|
+
|
|
283
|
+
| Column | Type | Required | Description |
|
|
284
|
+
|--------|------|----------|-------------|
|
|
285
|
+
| `chr` | str or int | Yes | Chromosome identifier. |
|
|
286
|
+
| `start` | int | Yes | Exon start position (bp). |
|
|
287
|
+
| `end` | int | Yes | Exon end position (bp). |
|
|
288
|
+
| `gene_name` | str | Yes | Parent gene symbol. Must match `gene_name` in genes DataFrame. |
|
|
289
|
+
|
|
290
|
+
### Recombination DataFrame
|
|
291
|
+
|
|
292
|
+
| Column | Type | Required | Description |
|
|
293
|
+
|--------|------|----------|-------------|
|
|
294
|
+
| `pos` | int | Yes | Genomic position (bp). Should span the plotted region with reasonable density (every ~10kb). |
|
|
295
|
+
| `rate` | float | Yes | Recombination rate in centiMorgans per megabase (cM/Mb). Typical range: 0-50 cM/Mb. |
|
|
296
|
+
|
|
297
|
+
Example:
|
|
298
|
+
```python
|
|
299
|
+
recomb_df = pd.DataFrame({
|
|
300
|
+
"pos": [1000000, 1010000, 1020000],
|
|
301
|
+
"rate": [0.5, 2.3, 1.1],
|
|
302
|
+
})
|
|
303
|
+
```
|
|
304
|
+
|
|
305
|
+
### Recombination Map Files
|
|
306
|
+
|
|
307
|
+
When using `recomb_data_dir`, files must be named `chr{N}_recomb.tsv` (e.g., `chr1_recomb.tsv`, `chrX_recomb.tsv`).
|
|
308
|
+
|
|
309
|
+
Format: Tab-separated with header row:
|
|
310
|
+
|
|
311
|
+
| Column | Description |
|
|
312
|
+
|--------|-------------|
|
|
313
|
+
| `chr` | Chromosome number (without "chr" prefix) |
|
|
314
|
+
| `pos` | Position in base pairs |
|
|
315
|
+
| `rate` | Recombination rate (cM/Mb) |
|
|
316
|
+
| `cM` | Cumulative genetic distance (optional, not used for plotting) |
|
|
317
|
+
|
|
318
|
+
```
|
|
319
|
+
chr pos rate cM
|
|
320
|
+
1 10000 0.5 0.005
|
|
321
|
+
1 20000 1.2 0.017
|
|
322
|
+
1 30000 0.8 0.025
|
|
323
|
+
```
|
|
324
|
+
|
|
325
|
+
## Reference Data
|
|
326
|
+
|
|
327
|
+
Dog recombination maps are downloaded from [Campbell et al. 2016](https://github.com/cflerin/dog_recombination) on first use.
|
|
328
|
+
|
|
329
|
+
To manually download:
|
|
330
|
+
|
|
331
|
+
```python
|
|
332
|
+
from pylocuszoom import download_dog_recombination_maps
|
|
333
|
+
|
|
334
|
+
download_dog_recombination_maps()
|
|
335
|
+
```
|
|
336
|
+
|
|
337
|
+
## Logging
|
|
338
|
+
|
|
339
|
+
Logging uses [loguru](https://github.com/Delgan/loguru) and is configured via the `log_level` parameter (default: `"INFO"`):
|
|
340
|
+
|
|
341
|
+
```python
|
|
342
|
+
# Suppress logging
|
|
343
|
+
plotter = LocusZoomPlotter(log_level=None)
|
|
344
|
+
|
|
345
|
+
# Enable DEBUG level for troubleshooting
|
|
346
|
+
plotter = LocusZoomPlotter(log_level="DEBUG")
|
|
347
|
+
```
|
|
348
|
+
|
|
349
|
+
## Requirements
|
|
350
|
+
|
|
351
|
+
- Python >= 3.10
|
|
352
|
+
- matplotlib >= 3.5.0
|
|
353
|
+
- pandas >= 1.4.0
|
|
354
|
+
- numpy >= 1.21.0
|
|
355
|
+
- loguru >= 0.7.0
|
|
356
|
+
- plotly >= 5.0.0
|
|
357
|
+
- bokeh >= 3.8.2
|
|
358
|
+
- kaleido >= 0.2.0 (for plotly static export)
|
|
359
|
+
- pyliftover >= 0.4 (for CanFam4 coordinate liftover)
|
|
360
|
+
- [PLINK 1.9](https://www.cog-genomics.org/plink/) (for LD calculations) - must be on PATH or specify `plink_path`
|
|
361
|
+
|
|
362
|
+
Optional:
|
|
363
|
+
- pyspark >= 3.0.0 (for PySpark DataFrame support) - `uv add pylocuszoom[spark]`
|
|
364
|
+
|
|
365
|
+
## License
|
|
366
|
+
|
|
367
|
+
GPL-3.0-or-later
|