py-gbcms 2.1.2__tar.gz → 2.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {py_gbcms-2.1.2 → py_gbcms-2.2.0}/LICENSE +49 -52
- {py_gbcms-2.1.2 → py_gbcms-2.2.0}/PKG-INFO +22 -22
- {py_gbcms-2.1.2 → py_gbcms-2.2.0}/pyproject.toml +12 -27
- {py_gbcms-2.1.2/src/gbcms_rs → py_gbcms-2.2.0/rust}/Cargo.lock +1 -1
- {py_gbcms-2.1.2/src/gbcms_rs → py_gbcms-2.2.0/rust}/Cargo.toml +3 -3
- {py_gbcms-2.1.2/src/gbcms_rs → py_gbcms-2.2.0/rust}/src/lib.rs +2 -2
- py_gbcms-2.2.0/src/gbcms/__init__.py +23 -0
- py_gbcms-2.2.0/src/gbcms/_rs.pyi +49 -0
- {py_gbcms-2.1.2 → py_gbcms-2.2.0}/src/gbcms/cli.py +104 -63
- py_gbcms-2.2.0/src/gbcms/core/__init__.py +9 -0
- {py_gbcms-2.1.2 → py_gbcms-2.2.0}/src/gbcms/core/kernel.py +2 -0
- py_gbcms-2.2.0/src/gbcms/io/__init__.py +18 -0
- {py_gbcms-2.1.2 → py_gbcms-2.2.0}/src/gbcms/io/input.py +6 -1
- {py_gbcms-2.1.2 → py_gbcms-2.2.0}/src/gbcms/io/output.py +1 -8
- py_gbcms-2.2.0/src/gbcms/models/__init__.py +27 -0
- py_gbcms-2.2.0/src/gbcms/models/core.py +172 -0
- py_gbcms-2.2.0/src/gbcms/pipeline.py +257 -0
- py_gbcms-2.2.0/src/gbcms/utils/__init__.py +14 -0
- py_gbcms-2.2.0/src/gbcms/utils/logging.py +123 -0
- py_gbcms-2.1.2/.gitignore +0 -84
- py_gbcms-2.1.2/CHANGELOG.md +0 -192
- py_gbcms-2.1.2/CONTRIBUTING.md +0 -147
- py_gbcms-2.1.2/src/gbcms/__init__.py +0 -1
- py_gbcms-2.1.2/src/gbcms/models/core.py +0 -133
- py_gbcms-2.1.2/src/gbcms/pipeline.py +0 -212
- py_gbcms-2.1.2/src/gbcms_rs/pyproject.toml +0 -13
- py_gbcms-2.1.2/src/gbcms_rs.pyi +0 -50
- py_gbcms-2.1.2/uv.lock +0 -1381
- {py_gbcms-2.1.2 → py_gbcms-2.2.0}/README.md +0 -0
- {py_gbcms-2.1.2/src/gbcms_rs → py_gbcms-2.2.0/rust}/.gitignore +0 -0
- {py_gbcms-2.1.2/src/gbcms_rs → py_gbcms-2.2.0/rust}/src/counting.rs +0 -0
- {py_gbcms-2.1.2/src/gbcms_rs → py_gbcms-2.2.0/rust}/src/stats.rs +0 -0
- {py_gbcms-2.1.2/src/gbcms_rs → py_gbcms-2.2.0/rust}/src/types.rs +0 -0
- {py_gbcms-2.1.2 → py_gbcms-2.2.0}/src/gbcms/py.typed +0 -0
|
@@ -1,16 +1,11 @@
|
|
|
1
1
|
GNU AFFERO GENERAL PUBLIC LICENSE
|
|
2
2
|
Version 3, 19 November 2007
|
|
3
3
|
|
|
4
|
-
Copyright (C) 2007 Free Software Foundation, Inc. <
|
|
4
|
+
Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
|
|
5
5
|
Everyone is permitted to copy and distribute verbatim copies
|
|
6
6
|
of this license document, but changing it is not allowed.
|
|
7
7
|
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
1. Definitions.
|
|
11
|
-
|
|
12
|
-
"License" shall mean the terms and conditions for use, reproduction,
|
|
13
|
-
and distribution as defined by Sections 1 through 9 of this document.
|
|
8
|
+
Preamble
|
|
14
9
|
|
|
15
10
|
The GNU Affero General Public License is a free, copyleft license for
|
|
16
11
|
software and other kinds of works, specifically designed to ensure
|
|
@@ -18,15 +13,16 @@ cooperation with the community in the case of network server software.
|
|
|
18
13
|
|
|
19
14
|
The licenses for most software and other practical works are designed
|
|
20
15
|
to take away your freedom to share and change the works. By contrast,
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
you can
|
|
16
|
+
our General Public Licenses are intended to guarantee your freedom to
|
|
17
|
+
share and change all versions of a program--to make sure it remains free
|
|
18
|
+
software for all its users.
|
|
19
|
+
|
|
20
|
+
When we speak of free software, we are referring to freedom, not
|
|
21
|
+
price. Our General Public Licenses are designed to make sure that you
|
|
22
|
+
have the freedom to distribute copies of free software (and charge for
|
|
23
|
+
them if you wish), that you receive source code or can get it if you
|
|
24
|
+
want it, that you can change the software or use pieces of it in new
|
|
25
|
+
free programs, and that you know you can do these things.
|
|
30
26
|
|
|
31
27
|
Developers that use our General Public Licenses protect your rights
|
|
32
28
|
with two steps: (1) assert copyright on the software, and (2) offer
|
|
@@ -39,23 +35,39 @@ receive widespread use, become available for other developers to
|
|
|
39
35
|
incorporate. Many developers of free software are heartened and
|
|
40
36
|
encouraged by the resulting cooperation. However, in the case of
|
|
41
37
|
software used on network servers, this result may fail to come about.
|
|
42
|
-
The GNU
|
|
43
|
-
|
|
44
|
-
|
|
38
|
+
The GNU General Public License permits making a modified version and
|
|
39
|
+
letting the public access it on a server without ever releasing its
|
|
40
|
+
source code to the public.
|
|
41
|
+
|
|
42
|
+
The GNU Affero General Public License is designed specifically to
|
|
43
|
+
ensure that, in such cases, the modified source code becomes available
|
|
44
|
+
to the community. It requires the operator of a network server to
|
|
45
|
+
provide the source code of the modified version running there to the
|
|
46
|
+
users of that server. Therefore, public use of a modified version, on
|
|
47
|
+
a publicly accessible server, gives the public access to the source
|
|
48
|
+
code of the modified version.
|
|
49
|
+
|
|
50
|
+
An older license, called the Affero General Public License and
|
|
51
|
+
published by Affero, was designed to accomplish similar goals. This is
|
|
52
|
+
a different license, not a version of the Affero GPL, but Affero has
|
|
53
|
+
released a new version of the Affero GPL which permits relicensing under
|
|
54
|
+
this license.
|
|
55
|
+
|
|
56
|
+
The precise terms and conditions for copying, distribution and
|
|
57
|
+
modification follow.
|
|
45
58
|
|
|
46
|
-
|
|
47
|
-
gratis or for a fee, and make the source code available to users so
|
|
48
|
-
they can modify the program while keeping the network server running,
|
|
49
|
-
you must offer the source code under the GNU Affero General Public
|
|
50
|
-
License.
|
|
59
|
+
TERMS AND CONDITIONS
|
|
51
60
|
|
|
52
|
-
|
|
53
|
-
Public License, but includes an additional permission and a requirement
|
|
54
|
-
regarding network server software that is different from the GNU GPL.
|
|
61
|
+
0. Definitions.
|
|
55
62
|
|
|
56
|
-
"
|
|
57
|
-
|
|
58
|
-
|
|
63
|
+
"This License" refers to version 3 of the GNU Affero General Public License.
|
|
64
|
+
|
|
65
|
+
"Copyright" also means copyright-like laws that apply to other kinds of
|
|
66
|
+
works, such as semiconductor masks.
|
|
67
|
+
|
|
68
|
+
"The Program" refers to any copyrightable work licensed under this
|
|
69
|
+
License. Each licensee is addressed as "you". "Licensees" and
|
|
70
|
+
"recipients" may be individuals or organizations.
|
|
59
71
|
|
|
60
72
|
To "modify" a work means to copy from or adapt all or part of the work
|
|
61
73
|
in a fashion requiring copyright permission, other than the making of an
|
|
@@ -262,9 +274,9 @@ in one of these ways:
|
|
|
262
274
|
available for as long as needed to satisfy these requirements.
|
|
263
275
|
|
|
264
276
|
e) Convey the object code using peer-to-peer transmission, provided
|
|
265
|
-
you inform other peers where the object and
|
|
266
|
-
are being offered to the general public at no
|
|
267
|
-
subsection 6d.
|
|
277
|
+
you inform other peers where the object code and Corresponding
|
|
278
|
+
Source of the work are being offered to the general public at no
|
|
279
|
+
charge under subsection 6d.
|
|
268
280
|
|
|
269
281
|
A separable portion of the object code, whose source code is excluded
|
|
270
282
|
from the Corresponding Source as a System Library, need not be
|
|
@@ -353,7 +365,7 @@ that material) supplement the terms of this License with terms:
|
|
|
353
365
|
authors of the material; or
|
|
354
366
|
|
|
355
367
|
e) Declining to grant rights under trademark law for use of some
|
|
356
|
-
trade names,
|
|
368
|
+
trade names, trademarks, or service marks; or
|
|
357
369
|
|
|
358
370
|
f) Requiring indemnification of licensors and authors of that
|
|
359
371
|
material by anyone who conveys the material (or modified versions of
|
|
@@ -533,7 +545,7 @@ interacting with it remotely through a computer network (if your version
|
|
|
533
545
|
supports such interaction) an opportunity to receive the Corresponding
|
|
534
546
|
Source of your version by providing access to the Corresponding Source
|
|
535
547
|
from a network server at no charge, through some standard or customary
|
|
536
|
-
means of facilitating copying of software. This
|
|
548
|
+
means of facilitating copying of software. This Corresponding Source
|
|
537
549
|
shall include the Corresponding Source for any work covered by version 3
|
|
538
550
|
of the GNU General Public License that is incorporated pursuant to the
|
|
539
551
|
following paragraph.
|
|
@@ -631,7 +643,7 @@ the "copyright" line and a pointer to where the full notice is found.
|
|
|
631
643
|
GNU Affero General Public License for more details.
|
|
632
644
|
|
|
633
645
|
You should have received a copy of the GNU Affero General Public License
|
|
634
|
-
along with this program. If not, see <
|
|
646
|
+
along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
635
647
|
|
|
636
648
|
Also add information on how to contact you by electronic and paper mail.
|
|
637
649
|
|
|
@@ -646,19 +658,4 @@ specific requirements.
|
|
|
646
658
|
You should also get your employer (if you work as a programmer) or school,
|
|
647
659
|
if any, to sign a "copyright disclaimer" for the program, if necessary.
|
|
648
660
|
For more information on this, and how to apply and follow the GNU AGPL, see
|
|
649
|
-
<
|
|
650
|
-
|
|
651
|
-
Copyright 2024 MSK-ACCESS Team
|
|
652
|
-
|
|
653
|
-
This program is free software: you can redistribute it and/or modify
|
|
654
|
-
it under the terms of the GNU Affero General Public License as published by
|
|
655
|
-
the Free Software Foundation, either version 3 of the License, or
|
|
656
|
-
(at your option) any later version.
|
|
657
|
-
|
|
658
|
-
This program is distributed in the hope that it will be useful,
|
|
659
|
-
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
660
|
-
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
661
|
-
GNU Affero General Public License for more details.
|
|
662
|
-
|
|
663
|
-
You should have received a copy of the GNU Affero General Public License
|
|
664
|
-
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
661
|
+
<https://www.gnu.org/licenses/>.
|
|
@@ -1,38 +1,37 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: py-gbcms
|
|
3
|
-
Version: 2.
|
|
4
|
-
Summary: Python implementation of GetBaseCountsMultiSample (gbcms) for calculating base counts in BAM files
|
|
5
|
-
Project-URL: Homepage, https://github.com/msk-access/py-gbcms
|
|
6
|
-
Project-URL: Repository, https://github.com/msk-access/py-gbcms
|
|
7
|
-
Project-URL: Documentation, https://github.com/msk-access/py-gbcms#readme
|
|
8
|
-
Project-URL: Bug Tracker, https://github.com/msk-access/py-gbcms/issues
|
|
9
|
-
Author-email: MSK-ACCESS <shahr2@mskcc.org>
|
|
10
|
-
License: AGPL-3.0
|
|
11
|
-
License-File: LICENSE
|
|
12
|
-
Keywords: bam,base-counts,bioinformatics,gbcms,genomics,maf,vcf
|
|
3
|
+
Version: 2.2.0
|
|
13
4
|
Classifier: Development Status :: 4 - Beta
|
|
14
5
|
Classifier: Intended Audience :: Science/Research
|
|
15
6
|
Classifier: License :: OSI Approved :: GNU Affero General Public License v3
|
|
16
7
|
Classifier: Programming Language :: Python :: 3.11
|
|
17
8
|
Classifier: Programming Language :: Python :: 3.12
|
|
18
9
|
Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
|
|
19
|
-
Requires-Python: >=3.10
|
|
20
|
-
Requires-Dist: pydantic>=2.0.0
|
|
21
10
|
Requires-Dist: pysam>=0.21.0
|
|
22
|
-
Requires-Dist: rich>=13.0.0
|
|
23
11
|
Requires-Dist: typer>=0.9.0
|
|
12
|
+
Requires-Dist: rich>=13.0.0
|
|
13
|
+
Requires-Dist: pydantic>=2.0.0
|
|
14
|
+
Requires-Dist: pytest>=7.4.0 ; extra == 'dev'
|
|
15
|
+
Requires-Dist: pytest-cov>=4.1.0 ; extra == 'dev'
|
|
16
|
+
Requires-Dist: pytest-mock>=3.11.0 ; extra == 'dev'
|
|
17
|
+
Requires-Dist: black>=23.0.0 ; extra == 'dev'
|
|
18
|
+
Requires-Dist: ruff>=0.1.0 ; extra == 'dev'
|
|
19
|
+
Requires-Dist: mypy>=1.5.0 ; extra == 'dev'
|
|
20
|
+
Requires-Dist: types-pyyaml>=6.0.0 ; extra == 'dev'
|
|
21
|
+
Requires-Dist: mkdocs-material>=9.0.0 ; extra == 'dev'
|
|
24
22
|
Provides-Extra: all
|
|
25
23
|
Provides-Extra: dev
|
|
26
|
-
Requires-Dist: black>=23.0.0; extra == 'dev'
|
|
27
|
-
Requires-Dist: mkdocs-material>=9.0.0; extra == 'dev'
|
|
28
|
-
Requires-Dist: mypy>=1.5.0; extra == 'dev'
|
|
29
|
-
Requires-Dist: pytest-cov>=4.1.0; extra == 'dev'
|
|
30
|
-
Requires-Dist: pytest-mock>=3.11.0; extra == 'dev'
|
|
31
|
-
Requires-Dist: pytest>=7.4.0; extra == 'dev'
|
|
32
|
-
Requires-Dist: ruff>=0.1.0; extra == 'dev'
|
|
33
|
-
Requires-Dist: types-pyyaml>=6.0.0; extra == 'dev'
|
|
34
24
|
Provides-Extra: fast
|
|
35
|
-
|
|
25
|
+
License-File: LICENSE
|
|
26
|
+
Summary: Python implementation of GetBaseCountsMultiSample (gbcms) for calculating base counts in BAM files
|
|
27
|
+
Keywords: bioinformatics,genomics,bam,vcf,maf,base-counts,gbcms
|
|
28
|
+
Author-email: MSK-ACCESS <shahr2@mskcc.org>
|
|
29
|
+
Requires-Python: >=3.10
|
|
30
|
+
Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
|
|
31
|
+
Project-URL: Bug Tracker, https://github.com/msk-access/py-gbcms/issues
|
|
32
|
+
Project-URL: Documentation, https://github.com/msk-access/py-gbcms#readme
|
|
33
|
+
Project-URL: Homepage, https://github.com/msk-access/py-gbcms
|
|
34
|
+
Project-URL: Repository, https://github.com/msk-access/py-gbcms
|
|
36
35
|
|
|
37
36
|
# py-gbcms
|
|
38
37
|
|
|
@@ -214,3 +213,4 @@ AGPL-3.0 - see [LICENSE](LICENSE) for details.
|
|
|
214
213
|
|
|
215
214
|
- 🐛 **Issues:** https://github.com/msk-access/py-gbcms/issues
|
|
216
215
|
- 💬 **Discussions:** https://github.com/msk-access/py-gbcms/discussions
|
|
216
|
+
|
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "py-gbcms"
|
|
3
|
-
version = "2.
|
|
3
|
+
version = "2.2.0"
|
|
4
4
|
description = "Python implementation of GetBaseCountsMultiSample (gbcms) for calculating base counts in BAM files"
|
|
5
5
|
authors = [
|
|
6
6
|
{name = "MSK-ACCESS", email = "shahr2@mskcc.org"}
|
|
7
7
|
]
|
|
8
8
|
readme = "README.md"
|
|
9
9
|
requires-python = ">=3.10"
|
|
10
|
-
license = {
|
|
10
|
+
license = {file = "LICENSE"}
|
|
11
11
|
keywords = ["bioinformatics", "genomics", "bam", "vcf", "maf", "base-counts", "gbcms"]
|
|
12
12
|
classifiers = [
|
|
13
13
|
"Development Status :: 4 - Beta",
|
|
@@ -51,29 +51,14 @@ Documentation = "https://github.com/msk-access/py-gbcms#readme"
|
|
|
51
51
|
"Bug Tracker" = "https://github.com/msk-access/py-gbcms/issues"
|
|
52
52
|
|
|
53
53
|
[build-system]
|
|
54
|
-
requires = ["
|
|
55
|
-
build-backend = "
|
|
56
|
-
|
|
57
|
-
[tool.
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
"/.github",
|
|
63
|
-
"/docs",
|
|
64
|
-
"/tests",
|
|
65
|
-
"/nextflow",
|
|
66
|
-
"/examples",
|
|
67
|
-
"/.gitignore",
|
|
68
|
-
"/.gitbook.yaml",
|
|
69
|
-
"/mkdocs.yml",
|
|
70
|
-
"/docker-compose.yml",
|
|
71
|
-
"/Dockerfile",
|
|
72
|
-
"/Makefile",
|
|
73
|
-
"/test_real_data.sh",
|
|
74
|
-
"/git-flow-helper.sh",
|
|
75
|
-
"/scripts",
|
|
76
|
-
]
|
|
54
|
+
requires = ["maturin>=1.0,<2.0"]
|
|
55
|
+
build-backend = "maturin"
|
|
56
|
+
|
|
57
|
+
[tool.maturin]
|
|
58
|
+
python-source = "src"
|
|
59
|
+
manifest-path = "rust/Cargo.toml"
|
|
60
|
+
module-name = "gbcms._rs"
|
|
61
|
+
|
|
77
62
|
|
|
78
63
|
[tool.pytest.ini_options]
|
|
79
64
|
testpaths = ["tests"]
|
|
@@ -96,7 +81,7 @@ include = '\.pyi?$'
|
|
|
96
81
|
|
|
97
82
|
[tool.ruff]
|
|
98
83
|
line-length = 100
|
|
99
|
-
target-version = "
|
|
84
|
+
target-version = "py310"
|
|
100
85
|
|
|
101
86
|
[tool.ruff.lint]
|
|
102
87
|
select = [
|
|
@@ -134,7 +119,7 @@ disable_error_code = ["call-arg"]
|
|
|
134
119
|
[[tool.mypy.overrides]]
|
|
135
120
|
module = [
|
|
136
121
|
"pysam.*",
|
|
137
|
-
"
|
|
122
|
+
"gbcms._rs",
|
|
138
123
|
]
|
|
139
124
|
ignore_missing_imports = true
|
|
140
125
|
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
[package]
|
|
2
2
|
name = "gbcms_rs"
|
|
3
|
-
version = "2.
|
|
4
|
-
edition = "
|
|
3
|
+
version = "2.2.0"
|
|
4
|
+
edition = "2021"
|
|
5
5
|
|
|
6
6
|
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
|
7
7
|
[lib]
|
|
8
|
-
name = "
|
|
8
|
+
name = "_rs"
|
|
9
9
|
crate-type = ["cdylib"]
|
|
10
10
|
|
|
11
11
|
[dependencies]
|
|
@@ -5,9 +5,9 @@ mod counting;
|
|
|
5
5
|
mod stats;
|
|
6
6
|
mod types;
|
|
7
7
|
|
|
8
|
-
/// A Python module implemented in Rust.
|
|
8
|
+
/// A Python module implemented in Rust (bundled as gbcms._rs).
|
|
9
9
|
#[pymodule]
|
|
10
|
-
fn
|
|
10
|
+
fn _rs(m: &Bound<'_, PyModule>) -> PyResult<()> {
|
|
11
11
|
pyo3_log::init();
|
|
12
12
|
m.add_function(wrap_pyfunction!(counting::count_bam, m)?)?;
|
|
13
13
|
m.add_class::<types::Variant>()?;
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
"""
|
|
2
|
+
gbcms (Get Base Counts Multi-Sample) - A tool for counting bases at variant positions.
|
|
3
|
+
|
|
4
|
+
This package provides a command-line interface and Python API for genotyping
|
|
5
|
+
variants in BAM files using a high-performance Rust counting engine.
|
|
6
|
+
|
|
7
|
+
Example usage:
|
|
8
|
+
$ gbcms run -v variants.vcf -b sample.bam -f reference.fa -o output/
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
__version__ = "2.2.0"
|
|
12
|
+
|
|
13
|
+
from .models.core import GbcmsConfig, OutputFormat, Variant, VariantType
|
|
14
|
+
from .pipeline import Pipeline
|
|
15
|
+
|
|
16
|
+
__all__ = [
|
|
17
|
+
"__version__",
|
|
18
|
+
"GbcmsConfig",
|
|
19
|
+
"OutputFormat",
|
|
20
|
+
"Pipeline",
|
|
21
|
+
"Variant",
|
|
22
|
+
"VariantType",
|
|
23
|
+
]
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
# Type stubs for the Rust extension module (gbcms._rs)
|
|
2
|
+
# This file tells mypy about the types in the native extension
|
|
3
|
+
|
|
4
|
+
class Variant:
|
|
5
|
+
chrom: str
|
|
6
|
+
pos: int
|
|
7
|
+
ref_allele: str
|
|
8
|
+
alt_allele: str
|
|
9
|
+
variant_type: str
|
|
10
|
+
|
|
11
|
+
def __init__(
|
|
12
|
+
self,
|
|
13
|
+
chrom: str,
|
|
14
|
+
pos: int,
|
|
15
|
+
ref_allele: str,
|
|
16
|
+
alt_allele: str,
|
|
17
|
+
variant_type: str,
|
|
18
|
+
) -> None: ...
|
|
19
|
+
|
|
20
|
+
class BaseCounts:
|
|
21
|
+
chrom: str
|
|
22
|
+
pos: int
|
|
23
|
+
ref: str
|
|
24
|
+
alt: str
|
|
25
|
+
dp: int
|
|
26
|
+
rd: int
|
|
27
|
+
ad: int
|
|
28
|
+
rd_fwd: int
|
|
29
|
+
rd_rev: int
|
|
30
|
+
ad_fwd: int
|
|
31
|
+
ad_rev: int
|
|
32
|
+
dp_fragment: int
|
|
33
|
+
rd_fragment: int
|
|
34
|
+
ad_fragment: int
|
|
35
|
+
sb_pvalue: float
|
|
36
|
+
|
|
37
|
+
def count_bam(
|
|
38
|
+
bam_path: str,
|
|
39
|
+
variants: list[Variant],
|
|
40
|
+
min_mapq: int = 20,
|
|
41
|
+
min_baseq: int = 0,
|
|
42
|
+
filter_duplicates: bool = True,
|
|
43
|
+
filter_secondary: bool = False,
|
|
44
|
+
filter_supplementary: bool = False,
|
|
45
|
+
filter_qc_failed: bool = False,
|
|
46
|
+
filter_improper_pair: bool = False,
|
|
47
|
+
filter_indel: bool = False,
|
|
48
|
+
threads: int = 1,
|
|
49
|
+
) -> list[BaseCounts]: ...
|
|
@@ -2,12 +2,24 @@
|
|
|
2
2
|
CLI Entry Point: Exposes the gbcms functionality via command line.
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
|
+
import logging
|
|
5
6
|
from pathlib import Path
|
|
6
7
|
|
|
7
8
|
import typer
|
|
8
9
|
|
|
9
|
-
from .models.core import
|
|
10
|
+
from .models.core import (
|
|
11
|
+
GbcmsConfig,
|
|
12
|
+
OutputConfig,
|
|
13
|
+
OutputFormat,
|
|
14
|
+
QualityThresholds,
|
|
15
|
+
ReadFilters,
|
|
16
|
+
)
|
|
10
17
|
from .pipeline import Pipeline
|
|
18
|
+
from .utils import setup_logging
|
|
19
|
+
|
|
20
|
+
__all__ = ["app", "run"]
|
|
21
|
+
|
|
22
|
+
logger = logging.getLogger(__name__)
|
|
11
23
|
|
|
12
24
|
app = typer.Typer(help="gbcms: Get Base Counts Multi-Sample")
|
|
13
25
|
|
|
@@ -22,6 +34,7 @@ def main():
|
|
|
22
34
|
|
|
23
35
|
@app.command()
|
|
24
36
|
def run(
|
|
37
|
+
# Input options
|
|
25
38
|
variant_file: Path = typer.Option(
|
|
26
39
|
..., "--variants", "-v", help="Path to VCF or MAF file containing variants"
|
|
27
40
|
),
|
|
@@ -32,6 +45,7 @@ def run(
|
|
|
32
45
|
None, "--bam-list", "-L", help="File containing list of BAM paths (one per line)"
|
|
33
46
|
),
|
|
34
47
|
reference: Path = typer.Option(..., "--fasta", "-f", help="Path to reference FASTA file"),
|
|
48
|
+
# Output options
|
|
35
49
|
output_dir: Path = typer.Option(
|
|
36
50
|
..., "--output-dir", "-o", help="Directory to write output files"
|
|
37
51
|
),
|
|
@@ -41,65 +55,106 @@ def run(
|
|
|
41
55
|
output_suffix: str = typer.Option(
|
|
42
56
|
"", "--suffix", "-S", help="Suffix to append to output filename (e.g. '.genotyped')"
|
|
43
57
|
),
|
|
58
|
+
# Quality thresholds
|
|
44
59
|
min_mapq: int = typer.Option(20, "--min-mapq", help="Minimum mapping quality"),
|
|
45
60
|
min_baseq: int = typer.Option(0, "--min-baseq", help="Minimum base quality"),
|
|
61
|
+
# Read filters
|
|
46
62
|
filter_duplicates: bool = typer.Option(True, help="Filter duplicate reads"),
|
|
47
63
|
filter_secondary: bool = typer.Option(False, help="Filter secondary alignments"),
|
|
48
64
|
filter_supplementary: bool = typer.Option(False, help="Filter supplementary alignments"),
|
|
49
65
|
filter_qc_failed: bool = typer.Option(False, help="Filter reads failing QC"),
|
|
50
66
|
filter_improper_pair: bool = typer.Option(False, help="Filter improperly paired reads"),
|
|
51
67
|
filter_indel: bool = typer.Option(False, help="Filter reads containing indels"),
|
|
68
|
+
# Performance
|
|
52
69
|
threads: int = typer.Option(
|
|
53
|
-
1, "--threads", "-t", help="Number of threads
|
|
70
|
+
1, "--threads", "-t", help="Number of threads for parallel processing"
|
|
54
71
|
),
|
|
55
72
|
verbose: bool = typer.Option(False, "--verbose", "-V", help="Enable verbose debug logging"),
|
|
56
73
|
):
|
|
57
74
|
"""
|
|
58
75
|
Run gbcms on one or more BAM files.
|
|
59
76
|
"""
|
|
60
|
-
|
|
77
|
+
# Configure logging
|
|
78
|
+
setup_logging(verbose=verbose)
|
|
61
79
|
|
|
62
|
-
|
|
63
|
-
|
|
80
|
+
# Parse BAM inputs
|
|
81
|
+
bams_dict = _parse_bam_inputs(bam_files, bam_list)
|
|
64
82
|
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
83
|
+
if not bams_dict:
|
|
84
|
+
logger.error("No valid BAM files provided via --bam or --bam-list")
|
|
85
|
+
raise typer.Exit(code=1)
|
|
86
|
+
|
|
87
|
+
logger.info("Found %d BAM file(s) to process", len(bams_dict))
|
|
88
|
+
|
|
89
|
+
try:
|
|
90
|
+
# Build nested config objects
|
|
91
|
+
output_config = OutputConfig(
|
|
92
|
+
directory=output_dir,
|
|
93
|
+
format=output_format,
|
|
94
|
+
suffix=output_suffix,
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
quality_config = QualityThresholds(
|
|
98
|
+
min_mapping_quality=min_mapq,
|
|
99
|
+
min_base_quality=min_baseq,
|
|
100
|
+
)
|
|
73
101
|
|
|
74
|
-
|
|
102
|
+
filter_config = ReadFilters(
|
|
103
|
+
duplicates=filter_duplicates,
|
|
104
|
+
secondary=filter_secondary,
|
|
105
|
+
supplementary=filter_supplementary,
|
|
106
|
+
qc_failed=filter_qc_failed,
|
|
107
|
+
improper_pair=filter_improper_pair,
|
|
108
|
+
indel=filter_indel,
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
config = GbcmsConfig(
|
|
112
|
+
variant_file=variant_file,
|
|
113
|
+
bam_files=bams_dict,
|
|
114
|
+
reference_fasta=reference,
|
|
115
|
+
output=output_config,
|
|
116
|
+
quality=quality_config,
|
|
117
|
+
filters=filter_config,
|
|
118
|
+
threads=threads,
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
pipeline = Pipeline(config)
|
|
122
|
+
pipeline.run()
|
|
123
|
+
|
|
124
|
+
except Exception as e:
|
|
125
|
+
logger.exception("Pipeline failed: %s", e)
|
|
126
|
+
raise typer.Exit(code=1) from e
|
|
75
127
|
|
|
76
|
-
|
|
77
|
-
|
|
128
|
+
|
|
129
|
+
def _parse_bam_inputs(bam_files: list[Path] | None, bam_list: Path | None) -> dict[str, Path]:
|
|
130
|
+
"""
|
|
131
|
+
Parse BAM inputs from direct arguments and/or BAM list file.
|
|
132
|
+
|
|
133
|
+
Args:
|
|
134
|
+
bam_files: List of BAM paths (optionally with sample_id:path format).
|
|
135
|
+
bam_list: Path to file containing BAM paths (one per line).
|
|
136
|
+
|
|
137
|
+
Returns:
|
|
138
|
+
Dictionary mapping sample names to BAM paths.
|
|
139
|
+
"""
|
|
140
|
+
bams_dict: dict[str, Path] = {}
|
|
78
141
|
|
|
79
142
|
# 1. Process direct BAM arguments
|
|
80
143
|
if bam_files:
|
|
81
144
|
for bam_arg in bam_files:
|
|
82
|
-
|
|
83
|
-
bam_str = str(bam_arg)
|
|
84
|
-
if ":" in bam_str:
|
|
85
|
-
parts = bam_str.split(":", 1)
|
|
86
|
-
sample_name = parts[0]
|
|
87
|
-
bam_path = Path(parts[1])
|
|
88
|
-
else:
|
|
89
|
-
bam_path = bam_arg
|
|
90
|
-
sample_name = bam_path.stem
|
|
145
|
+
sample_name, bam_path = _parse_bam_arg(bam_arg)
|
|
91
146
|
|
|
92
147
|
if not bam_path.exists():
|
|
93
|
-
|
|
94
|
-
|
|
148
|
+
logger.error("BAM file not found: %s", bam_path)
|
|
149
|
+
continue
|
|
95
150
|
|
|
96
151
|
bams_dict[sample_name] = bam_path
|
|
97
152
|
|
|
98
153
|
# 2. Process BAM list file
|
|
99
154
|
if bam_list:
|
|
100
155
|
if not bam_list.exists():
|
|
101
|
-
|
|
102
|
-
|
|
156
|
+
logger.error("BAM list file not found: %s", bam_list)
|
|
157
|
+
return bams_dict
|
|
103
158
|
|
|
104
159
|
try:
|
|
105
160
|
with open(bam_list) as f:
|
|
@@ -107,7 +162,7 @@ def run(
|
|
|
107
162
|
line = line.strip()
|
|
108
163
|
if not line or line.startswith("#"):
|
|
109
164
|
continue
|
|
110
|
-
|
|
165
|
+
|
|
111
166
|
parts = line.split()
|
|
112
167
|
if len(parts) >= 2:
|
|
113
168
|
sample_name = parts[0]
|
|
@@ -117,46 +172,32 @@ def run(
|
|
|
117
172
|
sample_name = bam_path.stem
|
|
118
173
|
|
|
119
174
|
if not bam_path.exists():
|
|
120
|
-
|
|
121
|
-
f"[yellow]Warning: BAM file from list not found: {bam_path}[/yellow]"
|
|
122
|
-
)
|
|
175
|
+
logger.warning("BAM file from list not found: %s", bam_path)
|
|
123
176
|
continue
|
|
177
|
+
|
|
124
178
|
bams_dict[sample_name] = bam_path
|
|
179
|
+
|
|
125
180
|
except Exception as e:
|
|
126
|
-
|
|
127
|
-
raise typer.Exit(code=1) from e
|
|
181
|
+
logger.error("Error reading BAM list file %s: %s", bam_list, e)
|
|
128
182
|
|
|
129
|
-
|
|
130
|
-
console.print(
|
|
131
|
-
"[bold red]Error: No valid BAM files provided via --bam or --bam-list[/bold red]"
|
|
132
|
-
)
|
|
133
|
-
raise typer.Exit(code=1)
|
|
183
|
+
return bams_dict
|
|
134
184
|
|
|
135
|
-
try:
|
|
136
|
-
config = GbcmsConfig(
|
|
137
|
-
variant_file=variant_file,
|
|
138
|
-
bam_files=bams_dict,
|
|
139
|
-
reference_fasta=reference,
|
|
140
|
-
output_dir=output_dir,
|
|
141
|
-
output_format=output_format,
|
|
142
|
-
output_suffix=output_suffix,
|
|
143
|
-
min_mapping_quality=min_mapq,
|
|
144
|
-
min_base_quality=min_baseq,
|
|
145
|
-
filter_duplicates=filter_duplicates,
|
|
146
|
-
filter_secondary=filter_secondary,
|
|
147
|
-
filter_supplementary=filter_supplementary,
|
|
148
|
-
filter_qc_failed=filter_qc_failed,
|
|
149
|
-
filter_improper_pair=filter_improper_pair,
|
|
150
|
-
filter_indel=filter_indel,
|
|
151
|
-
threads=threads,
|
|
152
|
-
)
|
|
153
185
|
|
|
154
|
-
|
|
155
|
-
|
|
186
|
+
def _parse_bam_arg(bam_arg: Path) -> tuple[str, Path]:
|
|
187
|
+
"""
|
|
188
|
+
Parse a BAM argument that may be in sample_id:path format.
|
|
156
189
|
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
190
|
+
Args:
|
|
191
|
+
bam_arg: Path object (may contain sample_id:path as string).
|
|
192
|
+
|
|
193
|
+
Returns:
|
|
194
|
+
Tuple of (sample_name, bam_path).
|
|
195
|
+
"""
|
|
196
|
+
bam_str = str(bam_arg)
|
|
197
|
+
if ":" in bam_str:
|
|
198
|
+
parts = bam_str.split(":", 1)
|
|
199
|
+
return parts[0], Path(parts[1])
|
|
200
|
+
return bam_arg.stem, bam_arg
|
|
160
201
|
|
|
161
202
|
|
|
162
203
|
if __name__ == "__main__":
|