quantnado 0.3.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. quantnado-0.3.2/.github/workflows/build-container.yml +36 -0
  2. quantnado-0.3.2/.github/workflows/deploy-docs.yml +48 -0
  3. quantnado-0.3.2/.github/workflows/pypi.yml +31 -0
  4. quantnado-0.3.2/.github/workflows/python-tests.yml +25 -0
  5. quantnado-0.3.2/.gitignore +25 -0
  6. quantnado-0.3.2/Dockerfile +26 -0
  7. quantnado-0.3.2/LICENSE +21 -0
  8. quantnado-0.3.2/PKG-INFO +181 -0
  9. quantnado-0.3.2/README.md +141 -0
  10. quantnado-0.3.2/docs/api/bamstore.md +4 -0
  11. quantnado-0.3.2/docs/api/quantnado.md +4 -0
  12. quantnado-0.3.2/docs/assets/images/logo.png +0 -0
  13. quantnado-0.3.2/docs/basic_usage.md +134 -0
  14. quantnado-0.3.2/docs/cli/call_peaks.md +338 -0
  15. quantnado-0.3.2/docs/cli/create_dataset.md +249 -0
  16. quantnado-0.3.2/docs/cli/index.md +71 -0
  17. quantnado-0.3.2/docs/cli.md +291 -0
  18. quantnado-0.3.2/docs/examples.md +349 -0
  19. quantnado-0.3.2/docs/faq.md +365 -0
  20. quantnado-0.3.2/docs/index.md +63 -0
  21. quantnado-0.3.2/docs/installation.md +123 -0
  22. quantnado-0.3.2/docs/quick_start.md +71 -0
  23. quantnado-0.3.2/docs/troubleshooting.md +682 -0
  24. quantnado-0.3.2/environment.yaml +34 -0
  25. quantnado-0.3.2/example/create_dataset.ipynb +1967 -0
  26. quantnado-0.3.2/example/explore_dataset.ipynb +495 -0
  27. quantnado-0.3.2/explore_dataset.ipynb +1234 -0
  28. quantnado-0.3.2/mkdocs.yml +153 -0
  29. quantnado-0.3.2/pyproject.toml +78 -0
  30. quantnado-0.3.2/quantnado/__init__.py +13 -0
  31. quantnado-0.3.2/quantnado/_version.py +34 -0
  32. quantnado-0.3.2/quantnado/api.py +595 -0
  33. quantnado-0.3.2/quantnado/cli.py +164 -0
  34. quantnado-0.3.2/quantnado/dataset/__init__.py +19 -0
  35. quantnado-0.3.2/quantnado/dataset/bam.py +984 -0
  36. quantnado-0.3.2/quantnado/dataset/core.py +344 -0
  37. quantnado-0.3.2/quantnado/dataset/counts.py +236 -0
  38. quantnado-0.3.2/quantnado/dataset/enums.py +30 -0
  39. quantnado-0.3.2/quantnado/dataset/features.py +371 -0
  40. quantnado-0.3.2/quantnado/dataset/metadata.py +77 -0
  41. quantnado-0.3.2/quantnado/dataset/pca.py +242 -0
  42. quantnado-0.3.2/quantnado/dataset/ranges.py +97 -0
  43. quantnado-0.3.2/quantnado/dataset/reduce.py +947 -0
  44. quantnado-0.3.2/quantnado/peak_calling/call_quantile_peaks.py +152 -0
  45. quantnado-0.3.2/quantnado/utils.py +218 -0
  46. quantnado-0.3.2/quantnado.egg-info/PKG-INFO +181 -0
  47. quantnado-0.3.2/quantnado.egg-info/SOURCES.txt +66 -0
  48. quantnado-0.3.2/quantnado.egg-info/dependency_links.txt +1 -0
  49. quantnado-0.3.2/quantnado.egg-info/entry_points.txt +4 -0
  50. quantnado-0.3.2/quantnado.egg-info/requires.txt +29 -0
  51. quantnado-0.3.2/quantnado.egg-info/top_level.txt +5 -0
  52. quantnado-0.3.2/setup.cfg +4 -0
  53. quantnado-0.3.2/setup.py +3 -0
  54. quantnado-0.3.2/tests/cli/__init__.py +0 -0
  55. quantnado-0.3.2/tests/cli/test_cli.py +261 -0
  56. quantnado-0.3.2/tests/conftest.py +81 -0
  57. quantnado-0.3.2/tests/integration/__init__.py +0 -0
  58. quantnado-0.3.2/tests/integration/test_bam_store.py +340 -0
  59. quantnado-0.3.2/tests/integration/test_feature_counts.py +83 -0
  60. quantnado-0.3.2/tests/integration/test_peak_calling.py +69 -0
  61. quantnado-0.3.2/tests/integration/test_quantnado_api.py +190 -0
  62. quantnado-0.3.2/tests/integration/test_reduce.py +196 -0
  63. quantnado-0.3.2/tests/test_bam_store.py +399 -0
  64. quantnado-0.3.2/tests/test_dataset_flow.py +272 -0
  65. quantnado-0.3.2/tests/unit/__init__.py +0 -0
  66. quantnado-0.3.2/tests/unit/test_enums.py +83 -0
  67. quantnado-0.3.2/tests/unit/test_pca.py +105 -0
  68. quantnado-0.3.2/tests/unit/test_utils.py +111 -0
@@ -0,0 +1,36 @@
1
+ name: Build CI Container
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - main
7
+ paths:
8
+ - 'pyproject.toml'
9
+ - 'Dockerfile'
10
+ - '.github/workflows/build-container.yml'
11
+ workflow_dispatch:
12
+
13
+ jobs:
14
+ build:
15
+ runs-on: ubuntu-latest
16
+ permissions:
17
+ contents: read
18
+ packages: write
19
+
20
+ steps:
21
+ - name: Checkout repository
22
+ uses: actions/checkout@v4
23
+
24
+ - name: Log in to GitHub Container Registry
25
+ uses: docker/login-action@v3
26
+ with:
27
+ registry: ghcr.io
28
+ username: ${{ github.actor }}
29
+ password: ${{ secrets.GITHUB_TOKEN }}
30
+
31
+ - name: Build and push
32
+ uses: docker/build-push-action@v5
33
+ with:
34
+ context: .
35
+ push: true
36
+ tags: ghcr.io/milne-group/quantnado-ci:latest
@@ -0,0 +1,48 @@
1
+ name: Build and Deploy Documentation
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - main
7
+
8
+ permissions:
9
+ contents: write
10
+
11
+ jobs:
12
+ deploy:
13
+ runs-on: ubuntu-latest
14
+ steps:
15
+ - uses: actions/checkout@v4
16
+ with:
17
+ fetch-depth: 0
18
+
19
+ - uses: actions/setup-python@v5
20
+ with:
21
+ python-version: "3.13"
22
+
23
+ - id: cache-id
24
+ run: echo "cache_id=$(date --utc '+%V')" >> $GITHUB_OUTPUT
25
+
26
+ - uses: actions/cache@v3
27
+ with:
28
+ key: mkdocs-material-${{ steps.cache-id.outputs.cache_id }}
29
+ path: .cache
30
+ restore-keys: |
31
+ mkdocs-material-
32
+
33
+ - name: Install OS dependencies
34
+ run: |
35
+ sudo apt-get update
36
+ sudo apt-get install -y gcc git cmake make libtool g++ perl coreutils \
37
+ libcurl4-openssl-dev libbz2-dev liblzma-dev zlib1g-dev
38
+
39
+ - name: Install package with docs dependencies
40
+ run: |
41
+ pip install mkdocs mkdocs-material mkdocstrings mkdocstrings-python pymdown-extensions
42
+ SETUPTOOLS_SCM_PRETEND_VERSION=0.0.0 pip install -e . --no-deps
43
+
44
+ - name: Build
45
+ run: mkdocs build
46
+
47
+ - name: Deploy
48
+ run: mkdocs gh-deploy --force
@@ -0,0 +1,31 @@
1
+ name: Publish to PyPI
2
+
3
+ on:
4
+ release:
5
+ types: [published]
6
+
7
+ permissions:
8
+ contents: read
9
+ id-token: write
10
+
11
+ jobs:
12
+ deploy:
13
+
14
+ runs-on: ubuntu-latest
15
+
16
+ steps:
17
+ - uses: actions/checkout@v4
18
+ with:
19
+ fetch-depth: 0
20
+ - name: Set up Python
21
+ uses: actions/setup-python@v3
22
+ with:
23
+ python-version: '3.x'
24
+ - name: Install dependencies
25
+ run: |
26
+ python -m pip install --upgrade pip --no-cache-dir
27
+ pip install build --no-cache-dir
28
+ - name: Build package
29
+ run: python -m build
30
+ - name: Publish package
31
+ uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,25 @@
1
+ name: Run Python Tests
2
+
3
+ on:
4
+ push
5
+
6
+ jobs:
7
+ test:
8
+ runs-on: ubuntu-latest
9
+
10
+ steps:
11
+ - name: Checkout code
12
+ uses: actions/checkout@v4
13
+
14
+ - name: Set up Python
15
+ uses: actions/setup-python@v4
16
+ with:
17
+ python-version: '3.13'
18
+
19
+ - name: Install dependencies
20
+ run: |
21
+ python -m pip install --upgrade pip
22
+ pip install ".[dev]"
23
+
24
+ - name: Run tests
25
+ run: pytest tests
@@ -0,0 +1,25 @@
1
+ .DS_Store
2
+ .vscode
3
+ .vscode/
4
+ *__pycache__/
5
+ *.bed
6
+ *.egg-info/
7
+ *.log
8
+ *.sh
9
+ *.zarr
10
+ */_version.py
11
+ build/
12
+ data/
13
+ epiquant/
14
+ example/*.log
15
+ example/combined_metadata.csv
16
+ example/figures/
17
+ logs/
18
+ modality
19
+ my_test/
20
+ notebooks/
21
+ site/
22
+ sps*
23
+ test_dataset.ipynb
24
+ test_output/
25
+ 202*
@@ -0,0 +1,26 @@
1
+ FROM python:3.13-slim
2
+
3
+ WORKDIR /app
4
+
5
+ RUN apt-get update && apt-get install -y --no-install-recommends \
6
+ gcc \
7
+ g++ \
8
+ make \
9
+ zlib1g-dev \
10
+ libbz2-dev \
11
+ liblzma-dev \
12
+ libcurl4-openssl-dev \
13
+ libssl-dev \
14
+ git \
15
+ && rm -rf /var/lib/apt/lists/*
16
+
17
+ COPY pyproject.toml ./
18
+ # Dummy package so pip can resolve extras without the full source
19
+ RUN mkdir -p quantnado && touch quantnado/__init__.py
20
+
21
+ ENV SETUPTOOLS_SCM_PRETEND_VERSION=0.0.0
22
+
23
+ RUN pip install --no-cache-dir --upgrade pip && \
24
+ pip install --no-cache-dir ".[dev]"
25
+
26
+ WORKDIR /workspace
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Catherine Chahrour
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,181 @@
1
+ Metadata-Version: 2.4
2
+ Name: quantnado
3
+ Version: 0.3.2
4
+ Summary: Dataset generation and peak calling for multi-modal Next-Generation Sequencing data
5
+ Author: Alastair Smith
6
+ Author-email: Catherine Chahrour <catherine.chahrour@imm.ox.ac.uk>
7
+ License-Expression: MIT
8
+ Requires-Python: <3.14,>=3.12
9
+ Description-Content-Type: text/markdown
10
+ License-File: LICENSE
11
+ Requires-Dist: bamnado
12
+ Requires-Dist: crested
13
+ Requires-Dist: dask-ml<2027,>=2025
14
+ Requires-Dist: dask>=2026
15
+ Requires-Dist: icechunk>=1.1
16
+ Requires-Dist: loguru>=0.7
17
+ Requires-Dist: numpy<3.0,>=2.0
18
+ Requires-Dist: pandas<4.0,>=3.0
19
+ Requires-Dist: pyBigWig>=0.3
20
+ Requires-Dist: pyranges==0.1.4
21
+ Requires-Dist: pysam>=0.23
22
+ Requires-Dist: seaborn>=0.13
23
+ Requires-Dist: sparse>=0.18
24
+ Requires-Dist: tqdm>=4.67
25
+ Requires-Dist: typer>=0.24
26
+ Requires-Dist: xarray>=2026
27
+ Requires-Dist: zarr<4.0,>=3.0
28
+ Provides-Extra: dev
29
+ Requires-Dist: mkdocs-material; extra == "dev"
30
+ Requires-Dist: mkdocs>=1.6; extra == "dev"
31
+ Requires-Dist: mkdocstrings-python; extra == "dev"
32
+ Requires-Dist: mkdocstrings; extra == "dev"
33
+ Requires-Dist: pymdown-extensions; extra == "dev"
34
+ Requires-Dist: pytest; extra == "dev"
35
+ Requires-Dist: ruff; extra == "dev"
36
+ Requires-Dist: ipykernel; extra == "dev"
37
+ Requires-Dist: pysam; extra == "dev"
38
+ Requires-Dist: crested; extra == "dev"
39
+ Dynamic: license-file
40
+
41
+ # QuantNado
42
+
43
+ **Dataset generation and peak calling for multi-modal Next-Generation Sequencing data.**
44
+
45
+ [![CI](https://github.com/Milne-Group/QuantNado/actions/workflows/python-tests.yml/badge.svg)](https://github.com/Milne-Group/QuantNado/actions/workflows/python-tests.yml)
46
+ [![PyPI](https://img.shields.io/pypi/v/quantnado)](https://pypi.org/project/quantnado)
47
+ [![Docs](https://img.shields.io/badge/docs-milne--group.github.io-blue)](https://milne-group.github.io/QuantNado/)
48
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE)
49
+ [![Python](https://img.shields.io/badge/python-3.12%20%7C%203.13-blue)](https://pypi.org/project/quantnado)
50
+
51
+ QuantNado provides efficient Zarr-backed storage and analysis of genomic signal from BAM and bigWig files, with support for signal reduction, feature counting, dimensionality reduction, and quantile-based peak calling.
52
+
53
+ ---
54
+
55
+ ## Installation
56
+
57
+ ```bash
58
+ pip install quantnado
59
+ ```
60
+
61
+ Requires Python 3.12 or 3.13.
62
+
63
+ ---
64
+
65
+ ## Quick Start
66
+
67
+ ### Create a dataset from BAM files
68
+
69
+ ```python
70
+ from quantnado import QuantNado
71
+
72
+ qn = QuantNado.from_bam_files(
73
+ bam_files=["sample1.bam", "sample2.bam", "sample3.bam"],
74
+ store_path="dataset.zarr",
75
+ metadata="samples.csv", # optional
76
+ )
77
+ ```
78
+
79
+ ### Load and analyse an existing dataset
80
+
81
+ ```python
82
+ from quantnado import QuantNado
83
+
84
+ qn = QuantNado.open("dataset.zarr")
85
+
86
+ # Aggregate signal over genomic ranges
87
+ promoter_signal = qn.reduce("promoters.bed", reduction="mean")
88
+ print(promoter_signal["mean"].shape) # (n_promoters, n_samples)
89
+
90
+ # PCA on reduced signal
91
+ pca_obj, transformed = qn.pca(promoter_signal["mean"], n_components=10)
92
+ print(transformed.shape) # (n_samples, 10)
93
+
94
+ # Generate a count matrix for DESeq2
95
+ counts, features = qn.feature_counts("genes.gtf", feature_type="gene")
96
+ counts.to_csv("counts.csv")
97
+
98
+ # Extract signal over a specific region
99
+ region = qn.extract_region("chr1:1000-5000")
100
+ print(region.shape) # (n_samples, 4000)
101
+ ```
102
+
103
+ ---
104
+
105
+ ## Command-line Interface
106
+
107
+ QuantNado installs a `quantnado` command with two subcommands.
108
+
109
+ ### `create-dataset` — build a Zarr dataset from BAM files
110
+
111
+ ```bash
112
+ quantnado create-dataset sample1.bam sample2.bam sample3.bam \
113
+ --output dataset.zarr \
114
+ --chromsizes hg38.chrom.sizes \
115
+ --metadata samples.csv \
116
+ --max-workers 8
117
+ ```
118
+
119
+ ### `call-peaks` — call quantile-based peaks from bigWig files
120
+
121
+ ```bash
122
+ quantnado call-peaks \
123
+ --bigwig-dir path/to/bigwigs/ \
124
+ --output-dir peaks/ \
125
+ --chromsizes hg38.chrom.sizes \
126
+ --quantile 0.98
127
+ ```
128
+
129
+ Run `quantnado --help` or `quantnado <subcommand> --help` for full option listings.
130
+
131
+ ---
132
+
133
+ ## API Reference
134
+
135
+ Full documentation is available at [milne-group.github.io/QuantNado](https://milne-group.github.io/QuantNado/).
136
+
137
+ ### `QuantNado`
138
+
139
+ | Method / Property | Description |
140
+ |---|---|
141
+ | `QuantNado.from_bam_files(bam_files, store_path, ...)` | Create a new dataset from BAM files |
142
+ | `QuantNado.open(store_path, read_only=True)` | Open an existing dataset |
143
+ | `.reduce(ranges, reduction="mean")` | Aggregate signal over genomic ranges (BED) |
144
+ | `.feature_counts(gtf_file, feature_type="gene")` | Generate a DESeq2-compatible count matrix |
145
+ | `.pca(data, n_components=10)` | Run PCA on a signal matrix |
146
+ | `.extract_region(region)` | Extract raw signal for a genomic region |
147
+ | `.to_xarray(chromosomes)` | Load dataset as lazy xarray DataArrays |
148
+ | `.samples` | List of sample names |
149
+ | `.metadata` | Sample metadata (DataFrame) |
150
+ | `.chromosomes` | Available chromosome names |
151
+ | `.chromsizes` | Chromosome sizes (dict) |
152
+ | `.store_path` | Path to the underlying Zarr store |
153
+
154
+ ---
155
+
156
+ ## Requirements
157
+
158
+ | Dependency | Purpose |
159
+ |---|---|
160
+ | `zarr`, `icechunk` | Zarr v3 storage backend |
161
+ | `xarray`, `dask` | Lazy array operations |
162
+ | `pandas`, `numpy` | Data structures |
163
+ | `pysam`, `bamnado` | BAM file I/O |
164
+ | `pyBigWig` | bigWig I/O |
165
+ | `pyranges` | Genomic range operations |
166
+ | `scikit-learn` (via `dask-ml`) | PCA |
167
+ | `typer`, `loguru` | CLI and logging |
168
+
169
+ Optional extras for deep learning-based peak calling:
170
+
171
+ ```bash
172
+ pip install "quantnado[ml]"
173
+ ```
174
+
175
+ Installs `torch`, `modisco-lite`, and `crested`.
176
+
177
+ ---
178
+
179
+ ## License
180
+
181
+ MIT — see [LICENSE](LICENSE).
@@ -0,0 +1,141 @@
1
+ # QuantNado
2
+
3
+ **Dataset generation and peak calling for multi-modal Next-Generation Sequencing data.**
4
+
5
+ [![CI](https://github.com/Milne-Group/QuantNado/actions/workflows/python-tests.yml/badge.svg)](https://github.com/Milne-Group/QuantNado/actions/workflows/python-tests.yml)
6
+ [![PyPI](https://img.shields.io/pypi/v/quantnado)](https://pypi.org/project/quantnado)
7
+ [![Docs](https://img.shields.io/badge/docs-milne--group.github.io-blue)](https://milne-group.github.io/QuantNado/)
8
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE)
9
+ [![Python](https://img.shields.io/badge/python-3.12%20%7C%203.13-blue)](https://pypi.org/project/quantnado)
10
+
11
+ QuantNado provides efficient Zarr-backed storage and analysis of genomic signal from BAM and bigWig files, with support for signal reduction, feature counting, dimensionality reduction, and quantile-based peak calling.
12
+
13
+ ---
14
+
15
+ ## Installation
16
+
17
+ ```bash
18
+ pip install quantnado
19
+ ```
20
+
21
+ Requires Python 3.12 or 3.13.
22
+
23
+ ---
24
+
25
+ ## Quick Start
26
+
27
+ ### Create a dataset from BAM files
28
+
29
+ ```python
30
+ from quantnado import QuantNado
31
+
32
+ qn = QuantNado.from_bam_files(
33
+ bam_files=["sample1.bam", "sample2.bam", "sample3.bam"],
34
+ store_path="dataset.zarr",
35
+ metadata="samples.csv", # optional
36
+ )
37
+ ```
38
+
39
+ ### Load and analyse an existing dataset
40
+
41
+ ```python
42
+ from quantnado import QuantNado
43
+
44
+ qn = QuantNado.open("dataset.zarr")
45
+
46
+ # Aggregate signal over genomic ranges
47
+ promoter_signal = qn.reduce("promoters.bed", reduction="mean")
48
+ print(promoter_signal["mean"].shape) # (n_promoters, n_samples)
49
+
50
+ # PCA on reduced signal
51
+ pca_obj, transformed = qn.pca(promoter_signal["mean"], n_components=10)
52
+ print(transformed.shape) # (n_samples, 10)
53
+
54
+ # Generate a count matrix for DESeq2
55
+ counts, features = qn.feature_counts("genes.gtf", feature_type="gene")
56
+ counts.to_csv("counts.csv")
57
+
58
+ # Extract signal over a specific region
59
+ region = qn.extract_region("chr1:1000-5000")
60
+ print(region.shape) # (n_samples, 4000)
61
+ ```
62
+
63
+ ---
64
+
65
+ ## Command-line Interface
66
+
67
+ QuantNado installs a `quantnado` command with two subcommands.
68
+
69
+ ### `create-dataset` — build a Zarr dataset from BAM files
70
+
71
+ ```bash
72
+ quantnado create-dataset sample1.bam sample2.bam sample3.bam \
73
+ --output dataset.zarr \
74
+ --chromsizes hg38.chrom.sizes \
75
+ --metadata samples.csv \
76
+ --max-workers 8
77
+ ```
78
+
79
+ ### `call-peaks` — call quantile-based peaks from bigWig files
80
+
81
+ ```bash
82
+ quantnado call-peaks \
83
+ --bigwig-dir path/to/bigwigs/ \
84
+ --output-dir peaks/ \
85
+ --chromsizes hg38.chrom.sizes \
86
+ --quantile 0.98
87
+ ```
88
+
89
+ Run `quantnado --help` or `quantnado <subcommand> --help` for full option listings.
90
+
91
+ ---
92
+
93
+ ## API Reference
94
+
95
+ Full documentation is available at [milne-group.github.io/QuantNado](https://milne-group.github.io/QuantNado/).
96
+
97
+ ### `QuantNado`
98
+
99
+ | Method / Property | Description |
100
+ |---|---|
101
+ | `QuantNado.from_bam_files(bam_files, store_path, ...)` | Create a new dataset from BAM files |
102
+ | `QuantNado.open(store_path, read_only=True)` | Open an existing dataset |
103
+ | `.reduce(ranges, reduction="mean")` | Aggregate signal over genomic ranges (BED) |
104
+ | `.feature_counts(gtf_file, feature_type="gene")` | Generate a DESeq2-compatible count matrix |
105
+ | `.pca(data, n_components=10)` | Run PCA on a signal matrix |
106
+ | `.extract_region(region)` | Extract raw signal for a genomic region |
107
+ | `.to_xarray(chromosomes)` | Load dataset as lazy xarray DataArrays |
108
+ | `.samples` | List of sample names |
109
+ | `.metadata` | Sample metadata (DataFrame) |
110
+ | `.chromosomes` | Available chromosome names |
111
+ | `.chromsizes` | Chromosome sizes (dict) |
112
+ | `.store_path` | Path to the underlying Zarr store |
113
+
114
+ ---
115
+
116
+ ## Requirements
117
+
118
+ | Dependency | Purpose |
119
+ |---|---|
120
+ | `zarr`, `icechunk` | Zarr v3 storage backend |
121
+ | `xarray`, `dask` | Lazy array operations |
122
+ | `pandas`, `numpy` | Data structures |
123
+ | `pysam`, `bamnado` | BAM file I/O |
124
+ | `pyBigWig` | bigWig I/O |
125
+ | `pyranges` | Genomic range operations |
126
+ | `scikit-learn` (via `dask-ml`) | PCA |
127
+ | `typer`, `loguru` | CLI and logging |
128
+
129
+ Optional extras for deep learning-based peak calling:
130
+
131
+ ```bash
132
+ pip install "quantnado[ml]"
133
+ ```
134
+
135
+ Installs `torch`, `modisco-lite`, and `crested`.
136
+
137
+ ---
138
+
139
+ ## License
140
+
141
+ MIT — see [LICENSE](LICENSE).
@@ -0,0 +1,4 @@
1
+ ::: quantnado.dataset.bam.BamStore
2
+ options:
3
+ show_source: true
4
+ show_root_heading: true
@@ -0,0 +1,4 @@
1
+ ::: quantnado.api.QuantNado
2
+ options:
3
+ show_source: true
4
+ show_root_heading: true
@@ -0,0 +1,134 @@
1
+ # Basic Usage
2
+
3
+ Common workflows and analysis patterns with QuantNado.
4
+
5
+ ## Creating Datasets
6
+
7
+ ### From Single BAM File
8
+
9
+ ```bash
10
+ quantnado create-dataset sample.bam \
11
+ --output sample.zarr \
12
+ --chromsizes hg38.chrom.sizes
13
+ ```
14
+
15
+ ### From Multiple BAM Files
16
+
17
+ Process all BAM files in a directory:
18
+
19
+ ```bash
20
+ quantnado create-dataset results/aligned/*.bam \
21
+ --output my_cohort.zarr \
22
+ --chromsizes hg38.chrom.sizes \
23
+ --max-workers 8
24
+ ```
25
+
26
+ ### With Sample Metadata
27
+
28
+ Create a CSV with sample information (`metadata.csv`):
29
+
30
+ ```
31
+ sample_id,condition,replicate
32
+ sample1,control,1
33
+ sample2,control,2
34
+ sample3,treatment,1
35
+ sample4,treatment,2
36
+ ```
37
+
38
+ Then:
39
+
40
+ ```bash
41
+ quantnado create-dataset *.bam \
42
+ --output dataset.zarr \
43
+ --chromsizes hg38.chrom.sizes \
44
+ --metadata metadata.csv
45
+ ```
46
+
47
+ ## Peak Calling
48
+
49
+ ### Basic Peak Calling
50
+
51
+ ```bash
52
+ quantnado call-peaks \
53
+ --bigwig-dir ./bigwigs/ \
54
+ --output-dir ./peaks/ \
55
+ --chromsizes hg38.chrom.sizes \
56
+ --quantile 0.98
57
+ ```
58
+
59
+ Output: One BED file per sample in `./peaks/`
60
+
61
+ ### Filtering and Merging
62
+
63
+ ```bash
64
+ quantnado call-peaks \
65
+ --bigwig-dir ./bigwigs/ \
66
+ --output-dir ./peaks/ \
67
+ --chromsizes hg38.chrom.sizes \
68
+ --quantile 0.95 \
69
+ --merge # Merge overlapping peaks
70
+ ```
71
+
72
+ ## Python API
73
+
74
+ ### Loading a Dataset
75
+
76
+ ```python
77
+ from quantnado import QuantNado
78
+
79
+ # Open existing dataset
80
+ qn = QuantNado.open("my_dataset.zarr")
81
+
82
+ # Check samples
83
+ print(qn.samples)
84
+ print(qn.metadata)
85
+ ```
86
+
87
+ ### Accessing Data
88
+
89
+ ```python
90
+ # Get data for a specific region (returns xarray DataArray)
91
+ region_data = qn.extract_region("chr1:1000000-2000000")
92
+
93
+ # Get all chromosomes as a dict of lazy xarray DataArrays
94
+ all_chroms = qn.to_xarray()
95
+ chr1_data = all_chroms["chr1"]
96
+
97
+ # Compute and convert to pandas DataFrame
98
+ df = chr1_data.to_dataframe(name="signal")
99
+ ```
100
+
101
+ ### Creating Datasets Programmatically
102
+
103
+ ```python
104
+ from quantnado import BamStore
105
+ import pandas as pd
106
+
107
+ # Create from BAM files
108
+ store = BamStore.from_bam_files(
109
+ bam_files=["sample1.bam", "sample2.bam"],
110
+ chromsizes="hg38.chrom.sizes",
111
+ store_path="dataset.zarr",
112
+ max_workers=8
113
+ )
114
+
115
+ # Add metadata
116
+ metadata = pd.DataFrame({
117
+ "sample_id": ["sample1", "sample2"],
118
+ "condition": ["control", "treatment"]
119
+ })
120
+ store.set_metadata(metadata)
121
+ ```
122
+
123
+ ## Common Issues
124
+
125
+ **Q: How long does dataset creation take?**
126
+ A: Depends on BAM file size (typically 20 min - 2 hours for whole-genome). Use `--max-workers` to parallelize.
127
+
128
+ **Q: Can I resume interrupted dataset creation?**
129
+ A: Yes. Pass `--resume` to the CLI (or `resume=True` in Python) and QuantNado will skip already-completed samples. Without this flag the default behaviour is `--overwrite`, which deletes any existing store at that path.
130
+
131
+ **Q: What genomic coordinates are supported?**
132
+ A: Any coordinates in your chromsizes file. Supports human, mouse, yeast, etc.
133
+
134
+ See [Troubleshooting](troubleshooting.md) for more help.