quantnado 0.3.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- quantnado-0.3.2/.github/workflows/build-container.yml +36 -0
- quantnado-0.3.2/.github/workflows/deploy-docs.yml +48 -0
- quantnado-0.3.2/.github/workflows/pypi.yml +31 -0
- quantnado-0.3.2/.github/workflows/python-tests.yml +25 -0
- quantnado-0.3.2/.gitignore +25 -0
- quantnado-0.3.2/Dockerfile +26 -0
- quantnado-0.3.2/LICENSE +21 -0
- quantnado-0.3.2/PKG-INFO +181 -0
- quantnado-0.3.2/README.md +141 -0
- quantnado-0.3.2/docs/api/bamstore.md +4 -0
- quantnado-0.3.2/docs/api/quantnado.md +4 -0
- quantnado-0.3.2/docs/assets/images/logo.png +0 -0
- quantnado-0.3.2/docs/basic_usage.md +134 -0
- quantnado-0.3.2/docs/cli/call_peaks.md +338 -0
- quantnado-0.3.2/docs/cli/create_dataset.md +249 -0
- quantnado-0.3.2/docs/cli/index.md +71 -0
- quantnado-0.3.2/docs/cli.md +291 -0
- quantnado-0.3.2/docs/examples.md +349 -0
- quantnado-0.3.2/docs/faq.md +365 -0
- quantnado-0.3.2/docs/index.md +63 -0
- quantnado-0.3.2/docs/installation.md +123 -0
- quantnado-0.3.2/docs/quick_start.md +71 -0
- quantnado-0.3.2/docs/troubleshooting.md +682 -0
- quantnado-0.3.2/environment.yaml +34 -0
- quantnado-0.3.2/example/create_dataset.ipynb +1967 -0
- quantnado-0.3.2/example/explore_dataset.ipynb +495 -0
- quantnado-0.3.2/explore_dataset.ipynb +1234 -0
- quantnado-0.3.2/mkdocs.yml +153 -0
- quantnado-0.3.2/pyproject.toml +78 -0
- quantnado-0.3.2/quantnado/__init__.py +13 -0
- quantnado-0.3.2/quantnado/_version.py +34 -0
- quantnado-0.3.2/quantnado/api.py +595 -0
- quantnado-0.3.2/quantnado/cli.py +164 -0
- quantnado-0.3.2/quantnado/dataset/__init__.py +19 -0
- quantnado-0.3.2/quantnado/dataset/bam.py +984 -0
- quantnado-0.3.2/quantnado/dataset/core.py +344 -0
- quantnado-0.3.2/quantnado/dataset/counts.py +236 -0
- quantnado-0.3.2/quantnado/dataset/enums.py +30 -0
- quantnado-0.3.2/quantnado/dataset/features.py +371 -0
- quantnado-0.3.2/quantnado/dataset/metadata.py +77 -0
- quantnado-0.3.2/quantnado/dataset/pca.py +242 -0
- quantnado-0.3.2/quantnado/dataset/ranges.py +97 -0
- quantnado-0.3.2/quantnado/dataset/reduce.py +947 -0
- quantnado-0.3.2/quantnado/peak_calling/call_quantile_peaks.py +152 -0
- quantnado-0.3.2/quantnado/utils.py +218 -0
- quantnado-0.3.2/quantnado.egg-info/PKG-INFO +181 -0
- quantnado-0.3.2/quantnado.egg-info/SOURCES.txt +66 -0
- quantnado-0.3.2/quantnado.egg-info/dependency_links.txt +1 -0
- quantnado-0.3.2/quantnado.egg-info/entry_points.txt +4 -0
- quantnado-0.3.2/quantnado.egg-info/requires.txt +29 -0
- quantnado-0.3.2/quantnado.egg-info/top_level.txt +5 -0
- quantnado-0.3.2/setup.cfg +4 -0
- quantnado-0.3.2/setup.py +3 -0
- quantnado-0.3.2/tests/cli/__init__.py +0 -0
- quantnado-0.3.2/tests/cli/test_cli.py +261 -0
- quantnado-0.3.2/tests/conftest.py +81 -0
- quantnado-0.3.2/tests/integration/__init__.py +0 -0
- quantnado-0.3.2/tests/integration/test_bam_store.py +340 -0
- quantnado-0.3.2/tests/integration/test_feature_counts.py +83 -0
- quantnado-0.3.2/tests/integration/test_peak_calling.py +69 -0
- quantnado-0.3.2/tests/integration/test_quantnado_api.py +190 -0
- quantnado-0.3.2/tests/integration/test_reduce.py +196 -0
- quantnado-0.3.2/tests/test_bam_store.py +399 -0
- quantnado-0.3.2/tests/test_dataset_flow.py +272 -0
- quantnado-0.3.2/tests/unit/__init__.py +0 -0
- quantnado-0.3.2/tests/unit/test_enums.py +83 -0
- quantnado-0.3.2/tests/unit/test_pca.py +105 -0
- quantnado-0.3.2/tests/unit/test_utils.py +111 -0
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
name: Build CI Container
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches:
|
|
6
|
+
- main
|
|
7
|
+
paths:
|
|
8
|
+
- 'pyproject.toml'
|
|
9
|
+
- 'Dockerfile'
|
|
10
|
+
- '.github/workflows/build-container.yml'
|
|
11
|
+
workflow_dispatch:
|
|
12
|
+
|
|
13
|
+
jobs:
|
|
14
|
+
build:
|
|
15
|
+
runs-on: ubuntu-latest
|
|
16
|
+
permissions:
|
|
17
|
+
contents: read
|
|
18
|
+
packages: write
|
|
19
|
+
|
|
20
|
+
steps:
|
|
21
|
+
- name: Checkout repository
|
|
22
|
+
uses: actions/checkout@v4
|
|
23
|
+
|
|
24
|
+
- name: Log in to GitHub Container Registry
|
|
25
|
+
uses: docker/login-action@v3
|
|
26
|
+
with:
|
|
27
|
+
registry: ghcr.io
|
|
28
|
+
username: ${{ github.actor }}
|
|
29
|
+
password: ${{ secrets.GITHUB_TOKEN }}
|
|
30
|
+
|
|
31
|
+
- name: Build and push
|
|
32
|
+
uses: docker/build-push-action@v5
|
|
33
|
+
with:
|
|
34
|
+
context: .
|
|
35
|
+
push: true
|
|
36
|
+
tags: ghcr.io/milne-group/quantnado-ci:latest
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
name: Build and Deploy Documentation
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches:
|
|
6
|
+
- main
|
|
7
|
+
|
|
8
|
+
permissions:
|
|
9
|
+
contents: write
|
|
10
|
+
|
|
11
|
+
jobs:
|
|
12
|
+
deploy:
|
|
13
|
+
runs-on: ubuntu-latest
|
|
14
|
+
steps:
|
|
15
|
+
- uses: actions/checkout@v4
|
|
16
|
+
with:
|
|
17
|
+
fetch-depth: 0
|
|
18
|
+
|
|
19
|
+
- uses: actions/setup-python@v5
|
|
20
|
+
with:
|
|
21
|
+
python-version: "3.13"
|
|
22
|
+
|
|
23
|
+
- id: cache-id
|
|
24
|
+
run: echo "cache_id=$(date --utc '+%V')" >> $GITHUB_OUTPUT
|
|
25
|
+
|
|
26
|
+
- uses: actions/cache@v3
|
|
27
|
+
with:
|
|
28
|
+
key: mkdocs-material-${{ steps.cache-id.outputs.cache_id }}
|
|
29
|
+
path: .cache
|
|
30
|
+
restore-keys: |
|
|
31
|
+
mkdocs-material-
|
|
32
|
+
|
|
33
|
+
- name: Install OS dependencies
|
|
34
|
+
run: |
|
|
35
|
+
sudo apt-get update
|
|
36
|
+
sudo apt-get install -y gcc git cmake make libtool g++ perl coreutils \
|
|
37
|
+
libcurl4-openssl-dev libbz2-dev liblzma-dev zlib1g-dev
|
|
38
|
+
|
|
39
|
+
- name: Install package with docs dependencies
|
|
40
|
+
run: |
|
|
41
|
+
pip install mkdocs mkdocs-material mkdocstrings mkdocstrings-python pymdown-extensions
|
|
42
|
+
SETUPTOOLS_SCM_PRETEND_VERSION=0.0.0 pip install -e . --no-deps
|
|
43
|
+
|
|
44
|
+
- name: Build
|
|
45
|
+
run: mkdocs build
|
|
46
|
+
|
|
47
|
+
- name: Deploy
|
|
48
|
+
run: mkdocs gh-deploy --force
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
name: Publish to PyPI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
release:
|
|
5
|
+
types: [published]
|
|
6
|
+
|
|
7
|
+
permissions:
|
|
8
|
+
contents: read
|
|
9
|
+
id-token: write
|
|
10
|
+
|
|
11
|
+
jobs:
|
|
12
|
+
deploy:
|
|
13
|
+
|
|
14
|
+
runs-on: ubuntu-latest
|
|
15
|
+
|
|
16
|
+
steps:
|
|
17
|
+
- uses: actions/checkout@v4
|
|
18
|
+
with:
|
|
19
|
+
fetch-depth: 0
|
|
20
|
+
- name: Set up Python
|
|
21
|
+
uses: actions/setup-python@v3
|
|
22
|
+
with:
|
|
23
|
+
python-version: '3.x'
|
|
24
|
+
- name: Install dependencies
|
|
25
|
+
run: |
|
|
26
|
+
python -m pip install --upgrade pip --no-cache-dir
|
|
27
|
+
pip install build --no-cache-dir
|
|
28
|
+
- name: Build package
|
|
29
|
+
run: python -m build
|
|
30
|
+
- name: Publish package
|
|
31
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
name: Run Python Tests
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push
|
|
5
|
+
|
|
6
|
+
jobs:
|
|
7
|
+
test:
|
|
8
|
+
runs-on: ubuntu-latest
|
|
9
|
+
|
|
10
|
+
steps:
|
|
11
|
+
- name: Checkout code
|
|
12
|
+
uses: actions/checkout@v4
|
|
13
|
+
|
|
14
|
+
- name: Set up Python
|
|
15
|
+
uses: actions/setup-python@v4
|
|
16
|
+
with:
|
|
17
|
+
python-version: '3.13'
|
|
18
|
+
|
|
19
|
+
- name: Install dependencies
|
|
20
|
+
run: |
|
|
21
|
+
python -m pip install --upgrade pip
|
|
22
|
+
pip install ".[dev]"
|
|
23
|
+
|
|
24
|
+
- name: Run tests
|
|
25
|
+
run: pytest tests
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
.DS_Store
|
|
2
|
+
.vscode
|
|
3
|
+
.vscode/
|
|
4
|
+
*__pycache__/
|
|
5
|
+
*.bed
|
|
6
|
+
*.egg-info/
|
|
7
|
+
*.log
|
|
8
|
+
*.sh
|
|
9
|
+
*.zarr
|
|
10
|
+
*/_version.py
|
|
11
|
+
build/
|
|
12
|
+
data/
|
|
13
|
+
epiquant/
|
|
14
|
+
example/*.log
|
|
15
|
+
example/combined_metadata.csv
|
|
16
|
+
example/figures/
|
|
17
|
+
logs/
|
|
18
|
+
modality
|
|
19
|
+
my_test/
|
|
20
|
+
notebooks/
|
|
21
|
+
site/
|
|
22
|
+
sps*
|
|
23
|
+
test_dataset.ipynb
|
|
24
|
+
test_output/
|
|
25
|
+
202*
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
FROM python:3.13-slim
|
|
2
|
+
|
|
3
|
+
WORKDIR /app
|
|
4
|
+
|
|
5
|
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
6
|
+
gcc \
|
|
7
|
+
g++ \
|
|
8
|
+
make \
|
|
9
|
+
zlib1g-dev \
|
|
10
|
+
libbz2-dev \
|
|
11
|
+
liblzma-dev \
|
|
12
|
+
libcurl4-openssl-dev \
|
|
13
|
+
libssl-dev \
|
|
14
|
+
git \
|
|
15
|
+
&& rm -rf /var/lib/apt/lists/*
|
|
16
|
+
|
|
17
|
+
COPY pyproject.toml ./
|
|
18
|
+
# Dummy package so pip can resolve extras without the full source
|
|
19
|
+
RUN mkdir -p quantnado && touch quantnado/__init__.py
|
|
20
|
+
|
|
21
|
+
ENV SETUPTOOLS_SCM_PRETEND_VERSION=0.0.0
|
|
22
|
+
|
|
23
|
+
RUN pip install --no-cache-dir --upgrade pip && \
|
|
24
|
+
pip install --no-cache-dir ".[dev]"
|
|
25
|
+
|
|
26
|
+
WORKDIR /workspace
|
quantnado-0.3.2/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Catherine Chahrour
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
quantnado-0.3.2/PKG-INFO
ADDED
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: quantnado
|
|
3
|
+
Version: 0.3.2
|
|
4
|
+
Summary: Dataset generation and peak calling for multi-modal Next-Generation Sequencing data
|
|
5
|
+
Author: Alastair Smith
|
|
6
|
+
Author-email: Catherine Chahrour <catherine.chahrour@imm.ox.ac.uk>
|
|
7
|
+
License-Expression: MIT
|
|
8
|
+
Requires-Python: <3.14,>=3.12
|
|
9
|
+
Description-Content-Type: text/markdown
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Requires-Dist: bamnado
|
|
12
|
+
Requires-Dist: crested
|
|
13
|
+
Requires-Dist: dask-ml<2027,>=2025
|
|
14
|
+
Requires-Dist: dask>=2026
|
|
15
|
+
Requires-Dist: icechunk>=1.1
|
|
16
|
+
Requires-Dist: loguru>=0.7
|
|
17
|
+
Requires-Dist: numpy<3.0,>=2.0
|
|
18
|
+
Requires-Dist: pandas<4.0,>=3.0
|
|
19
|
+
Requires-Dist: pyBigWig>=0.3
|
|
20
|
+
Requires-Dist: pyranges==0.1.4
|
|
21
|
+
Requires-Dist: pysam>=0.23
|
|
22
|
+
Requires-Dist: seaborn>=0.13
|
|
23
|
+
Requires-Dist: sparse>=0.18
|
|
24
|
+
Requires-Dist: tqdm>=4.67
|
|
25
|
+
Requires-Dist: typer>=0.24
|
|
26
|
+
Requires-Dist: xarray>=2026
|
|
27
|
+
Requires-Dist: zarr<4.0,>=3.0
|
|
28
|
+
Provides-Extra: dev
|
|
29
|
+
Requires-Dist: mkdocs-material; extra == "dev"
|
|
30
|
+
Requires-Dist: mkdocs>=1.6; extra == "dev"
|
|
31
|
+
Requires-Dist: mkdocstrings-python; extra == "dev"
|
|
32
|
+
Requires-Dist: mkdocstrings; extra == "dev"
|
|
33
|
+
Requires-Dist: pymdown-extensions; extra == "dev"
|
|
34
|
+
Requires-Dist: pytest; extra == "dev"
|
|
35
|
+
Requires-Dist: ruff; extra == "dev"
|
|
36
|
+
Requires-Dist: ipykernel; extra == "dev"
|
|
37
|
+
Requires-Dist: pysam; extra == "dev"
|
|
38
|
+
Requires-Dist: crested; extra == "dev"
|
|
39
|
+
Dynamic: license-file
|
|
40
|
+
|
|
41
|
+
# QuantNado
|
|
42
|
+
|
|
43
|
+
**Dataset generation and peak calling for multi-modal Next-Generation Sequencing data.**
|
|
44
|
+
|
|
45
|
+
[](https://github.com/Milne-Group/QuantNado/actions/workflows/python-tests.yml)
|
|
46
|
+
[](https://pypi.org/project/quantnado)
|
|
47
|
+
[](https://milne-group.github.io/QuantNado/)
|
|
48
|
+
[](LICENSE)
|
|
49
|
+
[](https://pypi.org/project/quantnado)
|
|
50
|
+
|
|
51
|
+
QuantNado provides efficient Zarr-backed storage and analysis of genomic signal from BAM and bigWig files, with support for signal reduction, feature counting, dimensionality reduction, and quantile-based peak calling.
|
|
52
|
+
|
|
53
|
+
---
|
|
54
|
+
|
|
55
|
+
## Installation
|
|
56
|
+
|
|
57
|
+
```bash
|
|
58
|
+
pip install quantnado
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
Requires Python 3.12 or 3.13.
|
|
62
|
+
|
|
63
|
+
---
|
|
64
|
+
|
|
65
|
+
## Quick Start
|
|
66
|
+
|
|
67
|
+
### Create a dataset from BAM files
|
|
68
|
+
|
|
69
|
+
```python
|
|
70
|
+
from quantnado import QuantNado
|
|
71
|
+
|
|
72
|
+
qn = QuantNado.from_bam_files(
|
|
73
|
+
bam_files=["sample1.bam", "sample2.bam", "sample3.bam"],
|
|
74
|
+
store_path="dataset.zarr",
|
|
75
|
+
metadata="samples.csv", # optional
|
|
76
|
+
)
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
### Load and analyse an existing dataset
|
|
80
|
+
|
|
81
|
+
```python
|
|
82
|
+
from quantnado import QuantNado
|
|
83
|
+
|
|
84
|
+
qn = QuantNado.open("dataset.zarr")
|
|
85
|
+
|
|
86
|
+
# Aggregate signal over genomic ranges
|
|
87
|
+
promoter_signal = qn.reduce("promoters.bed", reduction="mean")
|
|
88
|
+
print(promoter_signal["mean"].shape) # (n_promoters, n_samples)
|
|
89
|
+
|
|
90
|
+
# PCA on reduced signal
|
|
91
|
+
pca_obj, transformed = qn.pca(promoter_signal["mean"], n_components=10)
|
|
92
|
+
print(transformed.shape) # (n_samples, 10)
|
|
93
|
+
|
|
94
|
+
# Generate a count matrix for DESeq2
|
|
95
|
+
counts, features = qn.feature_counts("genes.gtf", feature_type="gene")
|
|
96
|
+
counts.to_csv("counts.csv")
|
|
97
|
+
|
|
98
|
+
# Extract signal over a specific region
|
|
99
|
+
region = qn.extract_region("chr1:1000-5000")
|
|
100
|
+
print(region.shape) # (n_samples, 4000)
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
---
|
|
104
|
+
|
|
105
|
+
## Command-line Interface
|
|
106
|
+
|
|
107
|
+
QuantNado installs a `quantnado` command with two subcommands.
|
|
108
|
+
|
|
109
|
+
### `create-dataset` — build a Zarr dataset from BAM files
|
|
110
|
+
|
|
111
|
+
```bash
|
|
112
|
+
quantnado create-dataset sample1.bam sample2.bam sample3.bam \
|
|
113
|
+
--output dataset.zarr \
|
|
114
|
+
--chromsizes hg38.chrom.sizes \
|
|
115
|
+
--metadata samples.csv \
|
|
116
|
+
--max-workers 8
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
### `call-peaks` — call quantile-based peaks from bigWig files
|
|
120
|
+
|
|
121
|
+
```bash
|
|
122
|
+
quantnado call-peaks \
|
|
123
|
+
--bigwig-dir path/to/bigwigs/ \
|
|
124
|
+
--output-dir peaks/ \
|
|
125
|
+
--chromsizes hg38.chrom.sizes \
|
|
126
|
+
--quantile 0.98
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
Run `quantnado --help` or `quantnado <subcommand> --help` for full option listings.
|
|
130
|
+
|
|
131
|
+
---
|
|
132
|
+
|
|
133
|
+
## API Reference
|
|
134
|
+
|
|
135
|
+
Full documentation is available at [milne-group.github.io/QuantNado](https://milne-group.github.io/QuantNado/).
|
|
136
|
+
|
|
137
|
+
### `QuantNado`
|
|
138
|
+
|
|
139
|
+
| Method / Property | Description |
|
|
140
|
+
|---|---|
|
|
141
|
+
| `QuantNado.from_bam_files(bam_files, store_path, ...)` | Create a new dataset from BAM files |
|
|
142
|
+
| `QuantNado.open(store_path, read_only=True)` | Open an existing dataset |
|
|
143
|
+
| `.reduce(ranges, reduction="mean")` | Aggregate signal over genomic ranges (BED) |
|
|
144
|
+
| `.feature_counts(gtf_file, feature_type="gene")` | Generate a DESeq2-compatible count matrix |
|
|
145
|
+
| `.pca(data, n_components=10)` | Run PCA on a signal matrix |
|
|
146
|
+
| `.extract_region(region)` | Extract raw signal for a genomic region |
|
|
147
|
+
| `.to_xarray(chromosomes)` | Load dataset as lazy xarray DataArrays |
|
|
148
|
+
| `.samples` | List of sample names |
|
|
149
|
+
| `.metadata` | Sample metadata (DataFrame) |
|
|
150
|
+
| `.chromosomes` | Available chromosome names |
|
|
151
|
+
| `.chromsizes` | Chromosome sizes (dict) |
|
|
152
|
+
| `.store_path` | Path to the underlying Zarr store |
|
|
153
|
+
|
|
154
|
+
---
|
|
155
|
+
|
|
156
|
+
## Requirements
|
|
157
|
+
|
|
158
|
+
| Dependency | Purpose |
|
|
159
|
+
|---|---|
|
|
160
|
+
| `zarr`, `icechunk` | Zarr v3 storage backend |
|
|
161
|
+
| `xarray`, `dask` | Lazy array operations |
|
|
162
|
+
| `pandas`, `numpy` | Data structures |
|
|
163
|
+
| `pysam`, `bamnado` | BAM file I/O |
|
|
164
|
+
| `pyBigWig` | bigWig I/O |
|
|
165
|
+
| `pyranges` | Genomic range operations |
|
|
166
|
+
| `scikit-learn` (via `dask-ml`) | PCA |
|
|
167
|
+
| `typer`, `loguru` | CLI and logging |
|
|
168
|
+
|
|
169
|
+
Optional extras for deep learning-based peak calling:
|
|
170
|
+
|
|
171
|
+
```bash
|
|
172
|
+
pip install "quantnado[ml]"
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
Installs `torch`, `modisco-lite`, and `crested`.
|
|
176
|
+
|
|
177
|
+
---
|
|
178
|
+
|
|
179
|
+
## License
|
|
180
|
+
|
|
181
|
+
MIT — see [LICENSE](LICENSE).
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
# QuantNado
|
|
2
|
+
|
|
3
|
+
**Dataset generation and peak calling for multi-modal Next-Generation Sequencing data.**
|
|
4
|
+
|
|
5
|
+
[](https://github.com/Milne-Group/QuantNado/actions/workflows/python-tests.yml)
|
|
6
|
+
[](https://pypi.org/project/quantnado)
|
|
7
|
+
[](https://milne-group.github.io/QuantNado/)
|
|
8
|
+
[](LICENSE)
|
|
9
|
+
[](https://pypi.org/project/quantnado)
|
|
10
|
+
|
|
11
|
+
QuantNado provides efficient Zarr-backed storage and analysis of genomic signal from BAM and bigWig files, with support for signal reduction, feature counting, dimensionality reduction, and quantile-based peak calling.
|
|
12
|
+
|
|
13
|
+
---
|
|
14
|
+
|
|
15
|
+
## Installation
|
|
16
|
+
|
|
17
|
+
```bash
|
|
18
|
+
pip install quantnado
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
Requires Python 3.12 or 3.13.
|
|
22
|
+
|
|
23
|
+
---
|
|
24
|
+
|
|
25
|
+
## Quick Start
|
|
26
|
+
|
|
27
|
+
### Create a dataset from BAM files
|
|
28
|
+
|
|
29
|
+
```python
|
|
30
|
+
from quantnado import QuantNado
|
|
31
|
+
|
|
32
|
+
qn = QuantNado.from_bam_files(
|
|
33
|
+
bam_files=["sample1.bam", "sample2.bam", "sample3.bam"],
|
|
34
|
+
store_path="dataset.zarr",
|
|
35
|
+
metadata="samples.csv", # optional
|
|
36
|
+
)
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
### Load and analyse an existing dataset
|
|
40
|
+
|
|
41
|
+
```python
|
|
42
|
+
from quantnado import QuantNado
|
|
43
|
+
|
|
44
|
+
qn = QuantNado.open("dataset.zarr")
|
|
45
|
+
|
|
46
|
+
# Aggregate signal over genomic ranges
|
|
47
|
+
promoter_signal = qn.reduce("promoters.bed", reduction="mean")
|
|
48
|
+
print(promoter_signal["mean"].shape) # (n_promoters, n_samples)
|
|
49
|
+
|
|
50
|
+
# PCA on reduced signal
|
|
51
|
+
pca_obj, transformed = qn.pca(promoter_signal["mean"], n_components=10)
|
|
52
|
+
print(transformed.shape) # (n_samples, 10)
|
|
53
|
+
|
|
54
|
+
# Generate a count matrix for DESeq2
|
|
55
|
+
counts, features = qn.feature_counts("genes.gtf", feature_type="gene")
|
|
56
|
+
counts.to_csv("counts.csv")
|
|
57
|
+
|
|
58
|
+
# Extract signal over a specific region
|
|
59
|
+
region = qn.extract_region("chr1:1000-5000")
|
|
60
|
+
print(region.shape) # (n_samples, 4000)
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
---
|
|
64
|
+
|
|
65
|
+
## Command-line Interface
|
|
66
|
+
|
|
67
|
+
QuantNado installs a `quantnado` command with two subcommands.
|
|
68
|
+
|
|
69
|
+
### `create-dataset` — build a Zarr dataset from BAM files
|
|
70
|
+
|
|
71
|
+
```bash
|
|
72
|
+
quantnado create-dataset sample1.bam sample2.bam sample3.bam \
|
|
73
|
+
--output dataset.zarr \
|
|
74
|
+
--chromsizes hg38.chrom.sizes \
|
|
75
|
+
--metadata samples.csv \
|
|
76
|
+
--max-workers 8
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
### `call-peaks` — call quantile-based peaks from bigWig files
|
|
80
|
+
|
|
81
|
+
```bash
|
|
82
|
+
quantnado call-peaks \
|
|
83
|
+
--bigwig-dir path/to/bigwigs/ \
|
|
84
|
+
--output-dir peaks/ \
|
|
85
|
+
--chromsizes hg38.chrom.sizes \
|
|
86
|
+
--quantile 0.98
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
Run `quantnado --help` or `quantnado <subcommand> --help` for full option listings.
|
|
90
|
+
|
|
91
|
+
---
|
|
92
|
+
|
|
93
|
+
## API Reference
|
|
94
|
+
|
|
95
|
+
Full documentation is available at [milne-group.github.io/QuantNado](https://milne-group.github.io/QuantNado/).
|
|
96
|
+
|
|
97
|
+
### `QuantNado`
|
|
98
|
+
|
|
99
|
+
| Method / Property | Description |
|
|
100
|
+
|---|---|
|
|
101
|
+
| `QuantNado.from_bam_files(bam_files, store_path, ...)` | Create a new dataset from BAM files |
|
|
102
|
+
| `QuantNado.open(store_path, read_only=True)` | Open an existing dataset |
|
|
103
|
+
| `.reduce(ranges, reduction="mean")` | Aggregate signal over genomic ranges (BED) |
|
|
104
|
+
| `.feature_counts(gtf_file, feature_type="gene")` | Generate a DESeq2-compatible count matrix |
|
|
105
|
+
| `.pca(data, n_components=10)` | Run PCA on a signal matrix |
|
|
106
|
+
| `.extract_region(region)` | Extract raw signal for a genomic region |
|
|
107
|
+
| `.to_xarray(chromosomes)` | Load dataset as lazy xarray DataArrays |
|
|
108
|
+
| `.samples` | List of sample names |
|
|
109
|
+
| `.metadata` | Sample metadata (DataFrame) |
|
|
110
|
+
| `.chromosomes` | Available chromosome names |
|
|
111
|
+
| `.chromsizes` | Chromosome sizes (dict) |
|
|
112
|
+
| `.store_path` | Path to the underlying Zarr store |
|
|
113
|
+
|
|
114
|
+
---
|
|
115
|
+
|
|
116
|
+
## Requirements
|
|
117
|
+
|
|
118
|
+
| Dependency | Purpose |
|
|
119
|
+
|---|---|
|
|
120
|
+
| `zarr`, `icechunk` | Zarr v3 storage backend |
|
|
121
|
+
| `xarray`, `dask` | Lazy array operations |
|
|
122
|
+
| `pandas`, `numpy` | Data structures |
|
|
123
|
+
| `pysam`, `bamnado` | BAM file I/O |
|
|
124
|
+
| `pyBigWig` | bigWig I/O |
|
|
125
|
+
| `pyranges` | Genomic range operations |
|
|
126
|
+
| `scikit-learn` (via `dask-ml`) | PCA |
|
|
127
|
+
| `typer`, `loguru` | CLI and logging |
|
|
128
|
+
|
|
129
|
+
Optional extras for deep learning-based peak calling:
|
|
130
|
+
|
|
131
|
+
```bash
|
|
132
|
+
pip install "quantnado[ml]"
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
Installs `torch`, `modisco-lite`, and `crested`.
|
|
136
|
+
|
|
137
|
+
---
|
|
138
|
+
|
|
139
|
+
## License
|
|
140
|
+
|
|
141
|
+
MIT — see [LICENSE](LICENSE).
|
|
Binary file
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
# Basic Usage
|
|
2
|
+
|
|
3
|
+
Common workflows and analysis patterns with QuantNado.
|
|
4
|
+
|
|
5
|
+
## Creating Datasets
|
|
6
|
+
|
|
7
|
+
### From Single BAM File
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
quantnado create-dataset sample.bam \
|
|
11
|
+
--output sample.zarr \
|
|
12
|
+
--chromsizes hg38.chrom.sizes
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
### From Multiple BAM Files
|
|
16
|
+
|
|
17
|
+
Process all BAM files in a directory:
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
quantnado create-dataset results/aligned/*.bam \
|
|
21
|
+
--output my_cohort.zarr \
|
|
22
|
+
--chromsizes hg38.chrom.sizes \
|
|
23
|
+
--max-workers 8
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
### With Sample Metadata
|
|
27
|
+
|
|
28
|
+
Create a CSV with sample information (`metadata.csv`):
|
|
29
|
+
|
|
30
|
+
```
|
|
31
|
+
sample_id,condition,replicate
|
|
32
|
+
sample1,control,1
|
|
33
|
+
sample2,control,2
|
|
34
|
+
sample3,treatment,1
|
|
35
|
+
sample4,treatment,2
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
Then:
|
|
39
|
+
|
|
40
|
+
```bash
|
|
41
|
+
quantnado create-dataset *.bam \
|
|
42
|
+
--output dataset.zarr \
|
|
43
|
+
--chromsizes hg38.chrom.sizes \
|
|
44
|
+
--metadata metadata.csv
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
## Peak Calling
|
|
48
|
+
|
|
49
|
+
### Basic Peak Calling
|
|
50
|
+
|
|
51
|
+
```bash
|
|
52
|
+
quantnado call-peaks \
|
|
53
|
+
--bigwig-dir ./bigwigs/ \
|
|
54
|
+
--output-dir ./peaks/ \
|
|
55
|
+
--chromsizes hg38.chrom.sizes \
|
|
56
|
+
--quantile 0.98
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
Output: One BED file per sample in `./peaks/`
|
|
60
|
+
|
|
61
|
+
### Filtering and Merging
|
|
62
|
+
|
|
63
|
+
```bash
|
|
64
|
+
quantnado call-peaks \
|
|
65
|
+
--bigwig-dir ./bigwigs/ \
|
|
66
|
+
--output-dir ./peaks/ \
|
|
67
|
+
--chromsizes hg38.chrom.sizes \
|
|
68
|
+
--quantile 0.95 \
|
|
69
|
+
--merge # Merge overlapping peaks
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
## Python API
|
|
73
|
+
|
|
74
|
+
### Loading a Dataset
|
|
75
|
+
|
|
76
|
+
```python
|
|
77
|
+
from quantnado import QuantNado
|
|
78
|
+
|
|
79
|
+
# Open existing dataset
|
|
80
|
+
qn = QuantNado.open("my_dataset.zarr")
|
|
81
|
+
|
|
82
|
+
# Check samples
|
|
83
|
+
print(qn.samples)
|
|
84
|
+
print(qn.metadata)
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
### Accessing Data
|
|
88
|
+
|
|
89
|
+
```python
|
|
90
|
+
# Get data for a specific region (returns xarray DataArray)
|
|
91
|
+
region_data = qn.extract_region("chr1:1000000-2000000")
|
|
92
|
+
|
|
93
|
+
# Get all chromosomes as a dict of lazy xarray DataArrays
|
|
94
|
+
all_chroms = qn.to_xarray()
|
|
95
|
+
chr1_data = all_chroms["chr1"]
|
|
96
|
+
|
|
97
|
+
# Compute and convert to pandas DataFrame
|
|
98
|
+
df = chr1_data.to_dataframe(name="signal")
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
### Creating Datasets Programmatically
|
|
102
|
+
|
|
103
|
+
```python
|
|
104
|
+
from quantnado import BamStore
|
|
105
|
+
import pandas as pd
|
|
106
|
+
|
|
107
|
+
# Create from BAM files
|
|
108
|
+
store = BamStore.from_bam_files(
|
|
109
|
+
bam_files=["sample1.bam", "sample2.bam"],
|
|
110
|
+
chromsizes="hg38.chrom.sizes",
|
|
111
|
+
store_path="dataset.zarr",
|
|
112
|
+
max_workers=8
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
# Add metadata
|
|
116
|
+
metadata = pd.DataFrame({
|
|
117
|
+
"sample_id": ["sample1", "sample2"],
|
|
118
|
+
"condition": ["control", "treatment"]
|
|
119
|
+
})
|
|
120
|
+
store.set_metadata(metadata)
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
## Common Issues
|
|
124
|
+
|
|
125
|
+
**Q: How long does dataset creation take?**
|
|
126
|
+
A: Depends on BAM file size (typically 20 min - 2 hours for whole-genome). Use `--max-workers` to parallelize.
|
|
127
|
+
|
|
128
|
+
**Q: Can I resume interrupted dataset creation?**
|
|
129
|
+
A: Yes. Pass `--resume` to the CLI (or `resume=True` in Python) and QuantNado will skip already-completed samples. Without this flag the default behaviour is `--overwrite`, which deletes any existing store at that path.
|
|
130
|
+
|
|
131
|
+
**Q: What genomic coordinates are supported?**
|
|
132
|
+
A: Any coordinates in your chromsizes file. Supports human, mouse, yeast, etc.
|
|
133
|
+
|
|
134
|
+
See [Troubleshooting](troubleshooting.md) for more help.
|