py-gbcms 2.0.0__tar.gz → 2.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {py_gbcms-2.0.0 → py_gbcms-2.2.0}/LICENSE +49 -52
- py_gbcms-2.2.0/PKG-INFO +216 -0
- py_gbcms-2.2.0/README.md +180 -0
- {py_gbcms-2.0.0 → py_gbcms-2.2.0}/pyproject.toml +20 -31
- py_gbcms-2.2.0/rust/.gitignore +72 -0
- py_gbcms-2.2.0/rust/Cargo.lock +1395 -0
- py_gbcms-2.2.0/rust/Cargo.toml +20 -0
- py_gbcms-2.2.0/rust/src/counting.rs +663 -0
- py_gbcms-2.2.0/rust/src/lib.rs +16 -0
- py_gbcms-2.2.0/rust/src/stats.rs +79 -0
- py_gbcms-2.2.0/rust/src/types.rs +90 -0
- py_gbcms-2.2.0/src/gbcms/__init__.py +23 -0
- py_gbcms-2.2.0/src/gbcms/_rs.pyi +49 -0
- py_gbcms-2.2.0/src/gbcms/cli.py +204 -0
- py_gbcms-2.2.0/src/gbcms/core/__init__.py +9 -0
- py_gbcms-2.2.0/src/gbcms/core/kernel.py +128 -0
- py_gbcms-2.2.0/src/gbcms/io/__init__.py +18 -0
- py_gbcms-2.2.0/src/gbcms/io/input.py +227 -0
- py_gbcms-2.2.0/src/gbcms/io/output.py +354 -0
- py_gbcms-2.2.0/src/gbcms/models/__init__.py +27 -0
- py_gbcms-2.2.0/src/gbcms/models/core.py +172 -0
- py_gbcms-2.2.0/src/gbcms/pipeline.py +257 -0
- py_gbcms-2.2.0/src/gbcms/py.typed +0 -0
- py_gbcms-2.2.0/src/gbcms/utils/__init__.py +14 -0
- py_gbcms-2.2.0/src/gbcms/utils/logging.py +123 -0
- py_gbcms-2.0.0/.gitbook.yaml +0 -8
- py_gbcms-2.0.0/.github/workflows/release.yml +0 -130
- py_gbcms-2.0.0/.github/workflows/test.yml +0 -150
- py_gbcms-2.0.0/.gitignore +0 -65
- py_gbcms-2.0.0/.pre-commit-config.yaml +0 -31
- py_gbcms-2.0.0/CONTRIBUTING.md +0 -154
- py_gbcms-2.0.0/Dockerfile +0 -55
- py_gbcms-2.0.0/Dockerfile.test +0 -39
- py_gbcms-2.0.0/Makefile +0 -104
- py_gbcms-2.0.0/PKG-INFO +0 -506
- py_gbcms-2.0.0/README.md +0 -462
- py_gbcms-2.0.0/docker-compose.yml +0 -26
- py_gbcms-2.0.0/docs/ADVANCED_FEATURES.md +0 -747
- py_gbcms-2.0.0/docs/ARCHITECTURE.md +0 -631
- py_gbcms-2.0.0/docs/CLI_FEATURES.md +0 -393
- py_gbcms-2.0.0/docs/COMPLETE_FEATURES_SUMMARY.md +0 -600
- py_gbcms-2.0.0/docs/CPP_FEATURE_COMPARISON.md +0 -334
- py_gbcms-2.0.0/docs/CYVCF2_SUPPORT.md +0 -406
- py_gbcms-2.0.0/docs/DOCKER_GUIDE.md +0 -589
- py_gbcms-2.0.0/docs/DOCKER_SUMMARY.md +0 -394
- py_gbcms-2.0.0/docs/FAQ.md +0 -476
- py_gbcms-2.0.0/docs/INPUT_OUTPUT.md +0 -469
- py_gbcms-2.0.0/docs/INSTALLATION.md +0 -124
- py_gbcms-2.0.0/docs/PACKAGE_STRUCTURE.md +0 -299
- py_gbcms-2.0.0/docs/PARALLELIZATION_GUIDE.md +0 -185
- py_gbcms-2.0.0/docs/QUICKSTART.md +0 -329
- py_gbcms-2.0.0/docs/README.md +0 -100
- py_gbcms-2.0.0/docs/SUMMARY.md +0 -40
- py_gbcms-2.0.0/docs/TESTING_GUIDE.md +0 -261
- py_gbcms-2.0.0/git-flow-helper.sh +0 -118
- py_gbcms-2.0.0/scripts/setup_and_test.sh +0 -145
- py_gbcms-2.0.0/scripts/test_docker.sh +0 -156
- py_gbcms-2.0.0/scripts/test_maf_workflow.sh +0 -164
- py_gbcms-2.0.0/scripts/test_vcf_workflow.sh +0 -116
- py_gbcms-2.0.0/scripts/validate_against_cpp.sh +0 -272
- py_gbcms-2.0.0/scripts/verify_installation.py +0 -147
- py_gbcms-2.0.0/src/gbcms/__init__.py +0 -13
- py_gbcms-2.0.0/src/gbcms/cli.py +0 -745
- py_gbcms-2.0.0/src/gbcms/config.py +0 -98
- py_gbcms-2.0.0/src/gbcms/counter.py +0 -1074
- py_gbcms-2.0.0/src/gbcms/models.py +0 -295
- py_gbcms-2.0.0/src/gbcms/numba_counter.py +0 -394
- py_gbcms-2.0.0/src/gbcms/output.py +0 -573
- py_gbcms-2.0.0/src/gbcms/parallel.py +0 -129
- py_gbcms-2.0.0/src/gbcms/processor.py +0 -293
- py_gbcms-2.0.0/src/gbcms/reference.py +0 -86
- py_gbcms-2.0.0/src/gbcms/variant.py +0 -390
- py_gbcms-2.0.0/tests/__init__.py +0 -1
- py_gbcms-2.0.0/tests/conftest.py +0 -117
- py_gbcms-2.0.0/tests/test_cli.py +0 -235
- py_gbcms-2.0.0/tests/test_config.py +0 -142
- py_gbcms-2.0.0/tests/test_counter.py +0 -188
- py_gbcms-2.0.0/tests/test_output.py +0 -191
- py_gbcms-2.0.0/tests/test_reference.py +0 -84
- py_gbcms-2.0.0/tests/test_variant.py +0 -159
- py_gbcms-2.0.0/uv.lock +0 -1237
|
@@ -1,16 +1,11 @@
|
|
|
1
1
|
GNU AFFERO GENERAL PUBLIC LICENSE
|
|
2
2
|
Version 3, 19 November 2007
|
|
3
3
|
|
|
4
|
-
Copyright (C) 2007 Free Software Foundation, Inc. <
|
|
4
|
+
Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
|
|
5
5
|
Everyone is permitted to copy and distribute verbatim copies
|
|
6
6
|
of this license document, but changing it is not allowed.
|
|
7
7
|
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
1. Definitions.
|
|
11
|
-
|
|
12
|
-
"License" shall mean the terms and conditions for use, reproduction,
|
|
13
|
-
and distribution as defined by Sections 1 through 9 of this document.
|
|
8
|
+
Preamble
|
|
14
9
|
|
|
15
10
|
The GNU Affero General Public License is a free, copyleft license for
|
|
16
11
|
software and other kinds of works, specifically designed to ensure
|
|
@@ -18,15 +13,16 @@ cooperation with the community in the case of network server software.
|
|
|
18
13
|
|
|
19
14
|
The licenses for most software and other practical works are designed
|
|
20
15
|
to take away your freedom to share and change the works. By contrast,
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
you can
|
|
16
|
+
our General Public Licenses are intended to guarantee your freedom to
|
|
17
|
+
share and change all versions of a program--to make sure it remains free
|
|
18
|
+
software for all its users.
|
|
19
|
+
|
|
20
|
+
When we speak of free software, we are referring to freedom, not
|
|
21
|
+
price. Our General Public Licenses are designed to make sure that you
|
|
22
|
+
have the freedom to distribute copies of free software (and charge for
|
|
23
|
+
them if you wish), that you receive source code or can get it if you
|
|
24
|
+
want it, that you can change the software or use pieces of it in new
|
|
25
|
+
free programs, and that you know you can do these things.
|
|
30
26
|
|
|
31
27
|
Developers that use our General Public Licenses protect your rights
|
|
32
28
|
with two steps: (1) assert copyright on the software, and (2) offer
|
|
@@ -39,23 +35,39 @@ receive widespread use, become available for other developers to
|
|
|
39
35
|
incorporate. Many developers of free software are heartened and
|
|
40
36
|
encouraged by the resulting cooperation. However, in the case of
|
|
41
37
|
software used on network servers, this result may fail to come about.
|
|
42
|
-
The GNU
|
|
43
|
-
|
|
44
|
-
|
|
38
|
+
The GNU General Public License permits making a modified version and
|
|
39
|
+
letting the public access it on a server without ever releasing its
|
|
40
|
+
source code to the public.
|
|
41
|
+
|
|
42
|
+
The GNU Affero General Public License is designed specifically to
|
|
43
|
+
ensure that, in such cases, the modified source code becomes available
|
|
44
|
+
to the community. It requires the operator of a network server to
|
|
45
|
+
provide the source code of the modified version running there to the
|
|
46
|
+
users of that server. Therefore, public use of a modified version, on
|
|
47
|
+
a publicly accessible server, gives the public access to the source
|
|
48
|
+
code of the modified version.
|
|
49
|
+
|
|
50
|
+
An older license, called the Affero General Public License and
|
|
51
|
+
published by Affero, was designed to accomplish similar goals. This is
|
|
52
|
+
a different license, not a version of the Affero GPL, but Affero has
|
|
53
|
+
released a new version of the Affero GPL which permits relicensing under
|
|
54
|
+
this license.
|
|
55
|
+
|
|
56
|
+
The precise terms and conditions for copying, distribution and
|
|
57
|
+
modification follow.
|
|
45
58
|
|
|
46
|
-
|
|
47
|
-
gratis or for a fee, and make the source code available to users so
|
|
48
|
-
they can modify the program while keeping the network server running,
|
|
49
|
-
you must offer the source code under the GNU Affero General Public
|
|
50
|
-
License.
|
|
59
|
+
TERMS AND CONDITIONS
|
|
51
60
|
|
|
52
|
-
|
|
53
|
-
Public License, but includes an additional permission and a requirement
|
|
54
|
-
regarding network server software that is different from the GNU GPL.
|
|
61
|
+
0. Definitions.
|
|
55
62
|
|
|
56
|
-
"
|
|
57
|
-
|
|
58
|
-
|
|
63
|
+
"This License" refers to version 3 of the GNU Affero General Public License.
|
|
64
|
+
|
|
65
|
+
"Copyright" also means copyright-like laws that apply to other kinds of
|
|
66
|
+
works, such as semiconductor masks.
|
|
67
|
+
|
|
68
|
+
"The Program" refers to any copyrightable work licensed under this
|
|
69
|
+
License. Each licensee is addressed as "you". "Licensees" and
|
|
70
|
+
"recipients" may be individuals or organizations.
|
|
59
71
|
|
|
60
72
|
To "modify" a work means to copy from or adapt all or part of the work
|
|
61
73
|
in a fashion requiring copyright permission, other than the making of an
|
|
@@ -262,9 +274,9 @@ in one of these ways:
|
|
|
262
274
|
available for as long as needed to satisfy these requirements.
|
|
263
275
|
|
|
264
276
|
e) Convey the object code using peer-to-peer transmission, provided
|
|
265
|
-
you inform other peers where the object and
|
|
266
|
-
are being offered to the general public at no
|
|
267
|
-
subsection 6d.
|
|
277
|
+
you inform other peers where the object code and Corresponding
|
|
278
|
+
Source of the work are being offered to the general public at no
|
|
279
|
+
charge under subsection 6d.
|
|
268
280
|
|
|
269
281
|
A separable portion of the object code, whose source code is excluded
|
|
270
282
|
from the Corresponding Source as a System Library, need not be
|
|
@@ -353,7 +365,7 @@ that material) supplement the terms of this License with terms:
|
|
|
353
365
|
authors of the material; or
|
|
354
366
|
|
|
355
367
|
e) Declining to grant rights under trademark law for use of some
|
|
356
|
-
trade names,
|
|
368
|
+
trade names, trademarks, or service marks; or
|
|
357
369
|
|
|
358
370
|
f) Requiring indemnification of licensors and authors of that
|
|
359
371
|
material by anyone who conveys the material (or modified versions of
|
|
@@ -533,7 +545,7 @@ interacting with it remotely through a computer network (if your version
|
|
|
533
545
|
supports such interaction) an opportunity to receive the Corresponding
|
|
534
546
|
Source of your version by providing access to the Corresponding Source
|
|
535
547
|
from a network server at no charge, through some standard or customary
|
|
536
|
-
means of facilitating copying of software. This
|
|
548
|
+
means of facilitating copying of software. This Corresponding Source
|
|
537
549
|
shall include the Corresponding Source for any work covered by version 3
|
|
538
550
|
of the GNU General Public License that is incorporated pursuant to the
|
|
539
551
|
following paragraph.
|
|
@@ -631,7 +643,7 @@ the "copyright" line and a pointer to where the full notice is found.
|
|
|
631
643
|
GNU Affero General Public License for more details.
|
|
632
644
|
|
|
633
645
|
You should have received a copy of the GNU Affero General Public License
|
|
634
|
-
along with this program. If not, see <
|
|
646
|
+
along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
635
647
|
|
|
636
648
|
Also add information on how to contact you by electronic and paper mail.
|
|
637
649
|
|
|
@@ -646,19 +658,4 @@ specific requirements.
|
|
|
646
658
|
You should also get your employer (if you work as a programmer) or school,
|
|
647
659
|
if any, to sign a "copyright disclaimer" for the program, if necessary.
|
|
648
660
|
For more information on this, and how to apply and follow the GNU AGPL, see
|
|
649
|
-
<
|
|
650
|
-
|
|
651
|
-
Copyright 2024 MSK-ACCESS Team
|
|
652
|
-
|
|
653
|
-
This program is free software: you can redistribute it and/or modify
|
|
654
|
-
it under the terms of the GNU Affero General Public License as published by
|
|
655
|
-
the Free Software Foundation, either version 3 of the License, or
|
|
656
|
-
(at your option) any later version.
|
|
657
|
-
|
|
658
|
-
This program is distributed in the hope that it will be useful,
|
|
659
|
-
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
660
|
-
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
661
|
-
GNU Affero General Public License for more details.
|
|
662
|
-
|
|
663
|
-
You should have received a copy of the GNU Affero General Public License
|
|
664
|
-
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
661
|
+
<https://www.gnu.org/licenses/>.
|
py_gbcms-2.2.0/PKG-INFO
ADDED
|
@@ -0,0 +1,216 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: py-gbcms
|
|
3
|
+
Version: 2.2.0
|
|
4
|
+
Classifier: Development Status :: 4 - Beta
|
|
5
|
+
Classifier: Intended Audience :: Science/Research
|
|
6
|
+
Classifier: License :: OSI Approved :: GNU Affero General Public License v3
|
|
7
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
8
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
9
|
+
Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
|
|
10
|
+
Requires-Dist: pysam>=0.21.0
|
|
11
|
+
Requires-Dist: typer>=0.9.0
|
|
12
|
+
Requires-Dist: rich>=13.0.0
|
|
13
|
+
Requires-Dist: pydantic>=2.0.0
|
|
14
|
+
Requires-Dist: pytest>=7.4.0 ; extra == 'dev'
|
|
15
|
+
Requires-Dist: pytest-cov>=4.1.0 ; extra == 'dev'
|
|
16
|
+
Requires-Dist: pytest-mock>=3.11.0 ; extra == 'dev'
|
|
17
|
+
Requires-Dist: black>=23.0.0 ; extra == 'dev'
|
|
18
|
+
Requires-Dist: ruff>=0.1.0 ; extra == 'dev'
|
|
19
|
+
Requires-Dist: mypy>=1.5.0 ; extra == 'dev'
|
|
20
|
+
Requires-Dist: types-pyyaml>=6.0.0 ; extra == 'dev'
|
|
21
|
+
Requires-Dist: mkdocs-material>=9.0.0 ; extra == 'dev'
|
|
22
|
+
Provides-Extra: all
|
|
23
|
+
Provides-Extra: dev
|
|
24
|
+
Provides-Extra: fast
|
|
25
|
+
License-File: LICENSE
|
|
26
|
+
Summary: Python implementation of GetBaseCountsMultiSample (gbcms) for calculating base counts in BAM files
|
|
27
|
+
Keywords: bioinformatics,genomics,bam,vcf,maf,base-counts,gbcms
|
|
28
|
+
Author-email: MSK-ACCESS <shahr2@mskcc.org>
|
|
29
|
+
Requires-Python: >=3.10
|
|
30
|
+
Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
|
|
31
|
+
Project-URL: Bug Tracker, https://github.com/msk-access/py-gbcms/issues
|
|
32
|
+
Project-URL: Documentation, https://github.com/msk-access/py-gbcms#readme
|
|
33
|
+
Project-URL: Homepage, https://github.com/msk-access/py-gbcms
|
|
34
|
+
Project-URL: Repository, https://github.com/msk-access/py-gbcms
|
|
35
|
+
|
|
36
|
+
# py-gbcms
|
|
37
|
+
|
|
38
|
+
**Complete orientation-aware counting system for genomic variants**
|
|
39
|
+
|
|
40
|
+
[](https://github.com/msk-access/py-gbcms/actions)
|
|
41
|
+
[](https://www.python.org/downloads/)
|
|
42
|
+
|
|
43
|
+
## Features
|
|
44
|
+
|
|
45
|
+
- 🚀 **High Performance**: Rust-powered core engine with multi-threading
|
|
46
|
+
- 🧬 **Complete Variant Support**: SNP, MNP, insertion, deletion, and complex variants (DelIns, SNP+Indel)
|
|
47
|
+
- 📊 **Orientation-Aware**: Forward and reverse strand analysis with fragment counting
|
|
48
|
+
- 🔬 **Statistical Analysis**: Fisher's exact test for strand bias
|
|
49
|
+
- 📁 **Flexible I/O**: VCF and MAF input/output formats
|
|
50
|
+
- 🎯 **Quality Filters**: 7 configurable read filtering options
|
|
51
|
+
|
|
52
|
+
## Installation
|
|
53
|
+
|
|
54
|
+
**Quick install:**
|
|
55
|
+
```bash
|
|
56
|
+
pip install py-gbcms
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
**From source (requires Rust):**
|
|
60
|
+
```bash
|
|
61
|
+
git clone https://github.com/msk-access/py-gbcms.git
|
|
62
|
+
cd py-gbcms
|
|
63
|
+
pip install .
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
**Docker:**
|
|
67
|
+
```bash
|
|
68
|
+
docker pull ghcr.io/msk-access/py-gbcms:2.1.0
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
📖 **Full documentation:** https://msk-access.github.io/py-gbcms/
|
|
72
|
+
|
|
73
|
+
---
|
|
74
|
+
|
|
75
|
+
## Usage
|
|
76
|
+
|
|
77
|
+
`py-gbcms` can be used in two ways:
|
|
78
|
+
|
|
79
|
+
### 🔧 Option 1: Standalone CLI (1-10 samples)
|
|
80
|
+
|
|
81
|
+
**Best for:** Quick analysis, local processing, direct control
|
|
82
|
+
|
|
83
|
+
```bash
|
|
84
|
+
gbcms run \
|
|
85
|
+
--variants variants.vcf \
|
|
86
|
+
--bam sample1.bam \
|
|
87
|
+
--fasta reference.fa \
|
|
88
|
+
--output-dir results/
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
**Output:** `results/sample1.vcf`
|
|
92
|
+
|
|
93
|
+
**Learn more:**
|
|
94
|
+
- 📘 [CLI Quick Start](https://cmo-ci.gitbook.io/py-gbcms/quick-start)
|
|
95
|
+
- 📖 [CLI Reference](https://cmo-ci.gitbook.io/py-gbcms/cli_features)
|
|
96
|
+
|
|
97
|
+
---
|
|
98
|
+
|
|
99
|
+
### 🔄 Option 2: Nextflow Workflow (10+ samples, HPC)
|
|
100
|
+
|
|
101
|
+
**Best for:** Many samples, HPC clusters (SLURM), reproducible pipelines
|
|
102
|
+
|
|
103
|
+
```bash
|
|
104
|
+
nextflow run nextflow/main.nf \
|
|
105
|
+
--input samplesheet.csv \
|
|
106
|
+
--variants variants.vcf \
|
|
107
|
+
--fasta reference.fa \
|
|
108
|
+
-profile slurm
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
**Features:**
|
|
112
|
+
- ✅ Automatic parallelization across samples
|
|
113
|
+
- ✅ SLURM/HPC integration
|
|
114
|
+
- ✅ Container support (Docker/Singularity)
|
|
115
|
+
- ✅ Resume failed runs
|
|
116
|
+
|
|
117
|
+
**Learn more:**
|
|
118
|
+
- 🔄 [Nextflow Workflow Guide](https://cmo-ci.gitbook.io/py-gbcms/nextflow)
|
|
119
|
+
- 📋 [Usage Patterns Comparison](https://cmo-ci.gitbook.io/py-gbcms/workflows)
|
|
120
|
+
|
|
121
|
+
---
|
|
122
|
+
|
|
123
|
+
## Which Should I Use?
|
|
124
|
+
|
|
125
|
+
| Scenario | Recommendation |
|
|
126
|
+
|----------|----------------|
|
|
127
|
+
| 1-10 samples, local machine | **CLI** |
|
|
128
|
+
| 10+ samples, HPC cluster | **Nextflow** |
|
|
129
|
+
| Quick ad-hoc analysis | **CLI** |
|
|
130
|
+
| Production pipeline | **Nextflow** |
|
|
131
|
+
| Need auto-parallelization | **Nextflow** |
|
|
132
|
+
| Full manual control | **CLI** |
|
|
133
|
+
|
|
134
|
+
---
|
|
135
|
+
|
|
136
|
+
## Quick Examples
|
|
137
|
+
|
|
138
|
+
### CLI: Single Sample
|
|
139
|
+
```bash
|
|
140
|
+
gbcms run \
|
|
141
|
+
--variants variants.vcf \
|
|
142
|
+
--bam tumor.bam \
|
|
143
|
+
--fasta hg19.fa \
|
|
144
|
+
--output-dir results/ \
|
|
145
|
+
--threads 4
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
### CLI: Multiple Samples (Sequential)
|
|
149
|
+
```bash
|
|
150
|
+
gbcms run \
|
|
151
|
+
--variants variants.vcf \
|
|
152
|
+
--bam-list samples.txt \
|
|
153
|
+
--fasta hg19.fa \
|
|
154
|
+
--output-dir results/
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
### Nextflow: Many Samples (Parallel)
|
|
158
|
+
```bash
|
|
159
|
+
# samplesheet.csv:
|
|
160
|
+
# sample,bam,bai
|
|
161
|
+
# tumor1,/path/to/tumor1.bam,
|
|
162
|
+
# tumor2,/path/to/tumor2.bam,
|
|
163
|
+
|
|
164
|
+
nextflow run nextflow/main.nf \
|
|
165
|
+
--input samplesheet.csv \
|
|
166
|
+
--variants variants.vcf \
|
|
167
|
+
--fasta hg19.fa \
|
|
168
|
+
--outdir results \
|
|
169
|
+
-profile slurm
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
---
|
|
173
|
+
|
|
174
|
+
## Documentation
|
|
175
|
+
|
|
176
|
+
📚 **Full Documentation:** https://cmo-ci.gitbook.io/py-gbcms/
|
|
177
|
+
|
|
178
|
+
**Quick Links:**
|
|
179
|
+
- [Installation](https://cmo-ci.gitbook.io/py-gbcms/installation)
|
|
180
|
+
- [CLI Quick Start](https://cmo-ci.gitbook.io/py-gbcms/quick-start)
|
|
181
|
+
- [Nextflow Workflow](https://cmo-ci.gitbook.io/py-gbcms/nextflow)
|
|
182
|
+
- [CLI Reference](https://cmo-ci.gitbook.io/py-gbcms/cli_features)
|
|
183
|
+
- [Input & Output Formats](https://cmo-ci.gitbook.io/py-gbcms/input_output)
|
|
184
|
+
- [Architecture](https://cmo-ci.gitbook.io/py-gbcms/architecture)
|
|
185
|
+
|
|
186
|
+
---
|
|
187
|
+
|
|
188
|
+
## Contributing
|
|
189
|
+
|
|
190
|
+
See [CONTRIBUTING.md](CONTRIBUTING.md) for development guidelines.
|
|
191
|
+
|
|
192
|
+
To contribute to documentation, see the [`gh-pages` branch](https://github.com/msk-access/py-gbcms/tree/gh-pages).
|
|
193
|
+
|
|
194
|
+
---
|
|
195
|
+
|
|
196
|
+
## Citation
|
|
197
|
+
|
|
198
|
+
If you use `py-gbcms` in your research, please cite:
|
|
199
|
+
|
|
200
|
+
```
|
|
201
|
+
[Citation to be added]
|
|
202
|
+
```
|
|
203
|
+
|
|
204
|
+
---
|
|
205
|
+
|
|
206
|
+
## License
|
|
207
|
+
|
|
208
|
+
AGPL-3.0 - see [LICENSE](LICENSE) for details.
|
|
209
|
+
|
|
210
|
+
---
|
|
211
|
+
|
|
212
|
+
## Support
|
|
213
|
+
|
|
214
|
+
- 🐛 **Issues:** https://github.com/msk-access/py-gbcms/issues
|
|
215
|
+
- 💬 **Discussions:** https://github.com/msk-access/py-gbcms/discussions
|
|
216
|
+
|
py_gbcms-2.2.0/README.md
ADDED
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
# py-gbcms
|
|
2
|
+
|
|
3
|
+
**Complete orientation-aware counting system for genomic variants**
|
|
4
|
+
|
|
5
|
+
[](https://github.com/msk-access/py-gbcms/actions)
|
|
6
|
+
[](https://www.python.org/downloads/)
|
|
7
|
+
|
|
8
|
+
## Features
|
|
9
|
+
|
|
10
|
+
- 🚀 **High Performance**: Rust-powered core engine with multi-threading
|
|
11
|
+
- 🧬 **Complete Variant Support**: SNP, MNP, insertion, deletion, and complex variants (DelIns, SNP+Indel)
|
|
12
|
+
- 📊 **Orientation-Aware**: Forward and reverse strand analysis with fragment counting
|
|
13
|
+
- 🔬 **Statistical Analysis**: Fisher's exact test for strand bias
|
|
14
|
+
- 📁 **Flexible I/O**: VCF and MAF input/output formats
|
|
15
|
+
- 🎯 **Quality Filters**: 7 configurable read filtering options
|
|
16
|
+
|
|
17
|
+
## Installation
|
|
18
|
+
|
|
19
|
+
**Quick install:**
|
|
20
|
+
```bash
|
|
21
|
+
pip install py-gbcms
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
**From source (requires Rust):**
|
|
25
|
+
```bash
|
|
26
|
+
git clone https://github.com/msk-access/py-gbcms.git
|
|
27
|
+
cd py-gbcms
|
|
28
|
+
pip install .
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
**Docker:**
|
|
32
|
+
```bash
|
|
33
|
+
docker pull ghcr.io/msk-access/py-gbcms:2.1.0
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
📖 **Full documentation:** https://msk-access.github.io/py-gbcms/
|
|
37
|
+
|
|
38
|
+
---
|
|
39
|
+
|
|
40
|
+
## Usage
|
|
41
|
+
|
|
42
|
+
`py-gbcms` can be used in two ways:
|
|
43
|
+
|
|
44
|
+
### 🔧 Option 1: Standalone CLI (1-10 samples)
|
|
45
|
+
|
|
46
|
+
**Best for:** Quick analysis, local processing, direct control
|
|
47
|
+
|
|
48
|
+
```bash
|
|
49
|
+
gbcms run \
|
|
50
|
+
--variants variants.vcf \
|
|
51
|
+
--bam sample1.bam \
|
|
52
|
+
--fasta reference.fa \
|
|
53
|
+
--output-dir results/
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
**Output:** `results/sample1.vcf`
|
|
57
|
+
|
|
58
|
+
**Learn more:**
|
|
59
|
+
- 📘 [CLI Quick Start](https://cmo-ci.gitbook.io/py-gbcms/quick-start)
|
|
60
|
+
- 📖 [CLI Reference](https://cmo-ci.gitbook.io/py-gbcms/cli_features)
|
|
61
|
+
|
|
62
|
+
---
|
|
63
|
+
|
|
64
|
+
### 🔄 Option 2: Nextflow Workflow (10+ samples, HPC)
|
|
65
|
+
|
|
66
|
+
**Best for:** Many samples, HPC clusters (SLURM), reproducible pipelines
|
|
67
|
+
|
|
68
|
+
```bash
|
|
69
|
+
nextflow run nextflow/main.nf \
|
|
70
|
+
--input samplesheet.csv \
|
|
71
|
+
--variants variants.vcf \
|
|
72
|
+
--fasta reference.fa \
|
|
73
|
+
-profile slurm
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
**Features:**
|
|
77
|
+
- ✅ Automatic parallelization across samples
|
|
78
|
+
- ✅ SLURM/HPC integration
|
|
79
|
+
- ✅ Container support (Docker/Singularity)
|
|
80
|
+
- ✅ Resume failed runs
|
|
81
|
+
|
|
82
|
+
**Learn more:**
|
|
83
|
+
- 🔄 [Nextflow Workflow Guide](https://cmo-ci.gitbook.io/py-gbcms/nextflow)
|
|
84
|
+
- 📋 [Usage Patterns Comparison](https://cmo-ci.gitbook.io/py-gbcms/workflows)
|
|
85
|
+
|
|
86
|
+
---
|
|
87
|
+
|
|
88
|
+
## Which Should I Use?
|
|
89
|
+
|
|
90
|
+
| Scenario | Recommendation |
|
|
91
|
+
|----------|----------------|
|
|
92
|
+
| 1-10 samples, local machine | **CLI** |
|
|
93
|
+
| 10+ samples, HPC cluster | **Nextflow** |
|
|
94
|
+
| Quick ad-hoc analysis | **CLI** |
|
|
95
|
+
| Production pipeline | **Nextflow** |
|
|
96
|
+
| Need auto-parallelization | **Nextflow** |
|
|
97
|
+
| Full manual control | **CLI** |
|
|
98
|
+
|
|
99
|
+
---
|
|
100
|
+
|
|
101
|
+
## Quick Examples
|
|
102
|
+
|
|
103
|
+
### CLI: Single Sample
|
|
104
|
+
```bash
|
|
105
|
+
gbcms run \
|
|
106
|
+
--variants variants.vcf \
|
|
107
|
+
--bam tumor.bam \
|
|
108
|
+
--fasta hg19.fa \
|
|
109
|
+
--output-dir results/ \
|
|
110
|
+
--threads 4
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
### CLI: Multiple Samples (Sequential)
|
|
114
|
+
```bash
|
|
115
|
+
gbcms run \
|
|
116
|
+
--variants variants.vcf \
|
|
117
|
+
--bam-list samples.txt \
|
|
118
|
+
--fasta hg19.fa \
|
|
119
|
+
--output-dir results/
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
### Nextflow: Many Samples (Parallel)
|
|
123
|
+
```bash
|
|
124
|
+
# samplesheet.csv:
|
|
125
|
+
# sample,bam,bai
|
|
126
|
+
# tumor1,/path/to/tumor1.bam,
|
|
127
|
+
# tumor2,/path/to/tumor2.bam,
|
|
128
|
+
|
|
129
|
+
nextflow run nextflow/main.nf \
|
|
130
|
+
--input samplesheet.csv \
|
|
131
|
+
--variants variants.vcf \
|
|
132
|
+
--fasta hg19.fa \
|
|
133
|
+
--outdir results \
|
|
134
|
+
-profile slurm
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
---
|
|
138
|
+
|
|
139
|
+
## Documentation
|
|
140
|
+
|
|
141
|
+
📚 **Full Documentation:** https://cmo-ci.gitbook.io/py-gbcms/
|
|
142
|
+
|
|
143
|
+
**Quick Links:**
|
|
144
|
+
- [Installation](https://cmo-ci.gitbook.io/py-gbcms/installation)
|
|
145
|
+
- [CLI Quick Start](https://cmo-ci.gitbook.io/py-gbcms/quick-start)
|
|
146
|
+
- [Nextflow Workflow](https://cmo-ci.gitbook.io/py-gbcms/nextflow)
|
|
147
|
+
- [CLI Reference](https://cmo-ci.gitbook.io/py-gbcms/cli_features)
|
|
148
|
+
- [Input & Output Formats](https://cmo-ci.gitbook.io/py-gbcms/input_output)
|
|
149
|
+
- [Architecture](https://cmo-ci.gitbook.io/py-gbcms/architecture)
|
|
150
|
+
|
|
151
|
+
---
|
|
152
|
+
|
|
153
|
+
## Contributing
|
|
154
|
+
|
|
155
|
+
See [CONTRIBUTING.md](CONTRIBUTING.md) for development guidelines.
|
|
156
|
+
|
|
157
|
+
To contribute to documentation, see the [`gh-pages` branch](https://github.com/msk-access/py-gbcms/tree/gh-pages).
|
|
158
|
+
|
|
159
|
+
---
|
|
160
|
+
|
|
161
|
+
## Citation
|
|
162
|
+
|
|
163
|
+
If you use `py-gbcms` in your research, please cite:
|
|
164
|
+
|
|
165
|
+
```
|
|
166
|
+
[Citation to be added]
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
---
|
|
170
|
+
|
|
171
|
+
## License
|
|
172
|
+
|
|
173
|
+
AGPL-3.0 - see [LICENSE](LICENSE) for details.
|
|
174
|
+
|
|
175
|
+
---
|
|
176
|
+
|
|
177
|
+
## Support
|
|
178
|
+
|
|
179
|
+
- 🐛 **Issues:** https://github.com/msk-access/py-gbcms/issues
|
|
180
|
+
- 💬 **Discussions:** https://github.com/msk-access/py-gbcms/discussions
|
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "py-gbcms"
|
|
3
|
-
version = "2.
|
|
3
|
+
version = "2.2.0"
|
|
4
4
|
description = "Python implementation of GetBaseCountsMultiSample (gbcms) for calculating base counts in BAM files"
|
|
5
5
|
authors = [
|
|
6
6
|
{name = "MSK-ACCESS", email = "shahr2@mskcc.org"}
|
|
7
7
|
]
|
|
8
8
|
readme = "README.md"
|
|
9
|
-
requires-python = ">=3.
|
|
10
|
-
license = {
|
|
9
|
+
requires-python = ">=3.10"
|
|
10
|
+
license = {file = "LICENSE"}
|
|
11
11
|
keywords = ["bioinformatics", "genomics", "bam", "vcf", "maf", "base-counts", "gbcms"]
|
|
12
12
|
classifiers = [
|
|
13
13
|
"Development Status :: 4 - Beta",
|
|
@@ -19,27 +19,16 @@ classifiers = [
|
|
|
19
19
|
]
|
|
20
20
|
|
|
21
21
|
dependencies = [
|
|
22
|
-
"pysam>=0.
|
|
23
|
-
"numpy>=1.24.0",
|
|
22
|
+
"pysam>=0.21.0",
|
|
24
23
|
"typer>=0.9.0",
|
|
25
24
|
"rich>=13.0.0",
|
|
26
|
-
"pandas>=2.0.0",
|
|
27
25
|
"pydantic>=2.0.0",
|
|
28
|
-
"pydantic-settings>=2.0.0",
|
|
29
|
-
"numba>=0.58.0",
|
|
30
|
-
"joblib>=1.3.0",
|
|
31
|
-
"scipy>=1.11.0",
|
|
32
26
|
]
|
|
33
27
|
|
|
34
28
|
[project.optional-dependencies]
|
|
35
|
-
fast = [
|
|
36
|
-
"cyvcf2>=0.30.0",
|
|
37
|
-
]
|
|
38
|
-
|
|
29
|
+
fast = []
|
|
39
30
|
|
|
40
|
-
all = [
|
|
41
|
-
"cyvcf2>=0.30.0",
|
|
42
|
-
]
|
|
31
|
+
all = []
|
|
43
32
|
|
|
44
33
|
dev = [
|
|
45
34
|
"pytest>=7.4.0",
|
|
@@ -48,25 +37,28 @@ dev = [
|
|
|
48
37
|
"black>=23.0.0",
|
|
49
38
|
"ruff>=0.1.0",
|
|
50
39
|
"mypy>=1.5.0",
|
|
51
|
-
"pre-commit>=3.3.0",
|
|
52
40
|
"types-pyyaml>=6.0.0",
|
|
41
|
+
"mkdocs-material>=9.0.0",
|
|
53
42
|
]
|
|
54
43
|
|
|
55
44
|
[project.scripts]
|
|
56
45
|
gbcms = "gbcms.cli:app"
|
|
57
46
|
|
|
58
47
|
[project.urls]
|
|
59
|
-
Homepage = "https://github.com/msk-access/
|
|
60
|
-
Repository = "https://github.com/msk-access/
|
|
61
|
-
Documentation = "https://github.com/msk-access/
|
|
62
|
-
"Bug Tracker" = "https://github.com/msk-access/
|
|
48
|
+
Homepage = "https://github.com/msk-access/py-gbcms"
|
|
49
|
+
Repository = "https://github.com/msk-access/py-gbcms"
|
|
50
|
+
Documentation = "https://github.com/msk-access/py-gbcms#readme"
|
|
51
|
+
"Bug Tracker" = "https://github.com/msk-access/py-gbcms/issues"
|
|
63
52
|
|
|
64
53
|
[build-system]
|
|
65
|
-
requires = ["
|
|
66
|
-
build-backend = "
|
|
54
|
+
requires = ["maturin>=1.0,<2.0"]
|
|
55
|
+
build-backend = "maturin"
|
|
56
|
+
|
|
57
|
+
[tool.maturin]
|
|
58
|
+
python-source = "src"
|
|
59
|
+
manifest-path = "rust/Cargo.toml"
|
|
60
|
+
module-name = "gbcms._rs"
|
|
67
61
|
|
|
68
|
-
[tool.hatch.build.targets.wheel]
|
|
69
|
-
packages = ["src/gbcms"]
|
|
70
62
|
|
|
71
63
|
[tool.pytest.ini_options]
|
|
72
64
|
testpaths = ["tests"]
|
|
@@ -89,7 +81,7 @@ include = '\.pyi?$'
|
|
|
89
81
|
|
|
90
82
|
[tool.ruff]
|
|
91
83
|
line-length = 100
|
|
92
|
-
target-version = "
|
|
84
|
+
target-version = "py310"
|
|
93
85
|
|
|
94
86
|
[tool.ruff.lint]
|
|
95
87
|
select = [
|
|
@@ -127,9 +119,7 @@ disable_error_code = ["call-arg"]
|
|
|
127
119
|
[[tool.mypy.overrides]]
|
|
128
120
|
module = [
|
|
129
121
|
"pysam.*",
|
|
130
|
-
"
|
|
131
|
-
"joblib.*",
|
|
132
|
-
"cyvcf2.*",
|
|
122
|
+
"gbcms._rs",
|
|
133
123
|
]
|
|
134
124
|
ignore_missing_imports = true
|
|
135
125
|
|
|
@@ -152,7 +142,6 @@ exclude_lines = [
|
|
|
152
142
|
dev = [
|
|
153
143
|
"black>=25.9.0",
|
|
154
144
|
"mypy>=1.18.2",
|
|
155
|
-
"pre-commit>=4.3.0",
|
|
156
145
|
"pytest>=8.4.2",
|
|
157
146
|
"pytest-cov>=4.1.0",
|
|
158
147
|
"pytest-benchmark>=5.1.0",
|