py-gbcms 2.1.2__tar.gz → 2.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. {py_gbcms-2.1.2 → py_gbcms-2.2.0}/LICENSE +49 -52
  2. {py_gbcms-2.1.2 → py_gbcms-2.2.0}/PKG-INFO +22 -22
  3. {py_gbcms-2.1.2 → py_gbcms-2.2.0}/pyproject.toml +12 -27
  4. {py_gbcms-2.1.2/src/gbcms_rs → py_gbcms-2.2.0/rust}/Cargo.lock +1 -1
  5. {py_gbcms-2.1.2/src/gbcms_rs → py_gbcms-2.2.0/rust}/Cargo.toml +3 -3
  6. {py_gbcms-2.1.2/src/gbcms_rs → py_gbcms-2.2.0/rust}/src/lib.rs +2 -2
  7. py_gbcms-2.2.0/src/gbcms/__init__.py +23 -0
  8. py_gbcms-2.2.0/src/gbcms/_rs.pyi +49 -0
  9. {py_gbcms-2.1.2 → py_gbcms-2.2.0}/src/gbcms/cli.py +104 -63
  10. py_gbcms-2.2.0/src/gbcms/core/__init__.py +9 -0
  11. {py_gbcms-2.1.2 → py_gbcms-2.2.0}/src/gbcms/core/kernel.py +2 -0
  12. py_gbcms-2.2.0/src/gbcms/io/__init__.py +18 -0
  13. {py_gbcms-2.1.2 → py_gbcms-2.2.0}/src/gbcms/io/input.py +6 -1
  14. {py_gbcms-2.1.2 → py_gbcms-2.2.0}/src/gbcms/io/output.py +1 -8
  15. py_gbcms-2.2.0/src/gbcms/models/__init__.py +27 -0
  16. py_gbcms-2.2.0/src/gbcms/models/core.py +172 -0
  17. py_gbcms-2.2.0/src/gbcms/pipeline.py +257 -0
  18. py_gbcms-2.2.0/src/gbcms/utils/__init__.py +14 -0
  19. py_gbcms-2.2.0/src/gbcms/utils/logging.py +123 -0
  20. py_gbcms-2.1.2/.gitignore +0 -84
  21. py_gbcms-2.1.2/CHANGELOG.md +0 -192
  22. py_gbcms-2.1.2/CONTRIBUTING.md +0 -147
  23. py_gbcms-2.1.2/src/gbcms/__init__.py +0 -1
  24. py_gbcms-2.1.2/src/gbcms/models/core.py +0 -133
  25. py_gbcms-2.1.2/src/gbcms/pipeline.py +0 -212
  26. py_gbcms-2.1.2/src/gbcms_rs/pyproject.toml +0 -13
  27. py_gbcms-2.1.2/src/gbcms_rs.pyi +0 -50
  28. py_gbcms-2.1.2/uv.lock +0 -1381
  29. {py_gbcms-2.1.2 → py_gbcms-2.2.0}/README.md +0 -0
  30. {py_gbcms-2.1.2/src/gbcms_rs → py_gbcms-2.2.0/rust}/.gitignore +0 -0
  31. {py_gbcms-2.1.2/src/gbcms_rs → py_gbcms-2.2.0/rust}/src/counting.rs +0 -0
  32. {py_gbcms-2.1.2/src/gbcms_rs → py_gbcms-2.2.0/rust}/src/stats.rs +0 -0
  33. {py_gbcms-2.1.2/src/gbcms_rs → py_gbcms-2.2.0/rust}/src/types.rs +0 -0
  34. {py_gbcms-2.1.2 → py_gbcms-2.2.0}/src/gbcms/py.typed +0 -0
@@ -1,16 +1,11 @@
1
1
  GNU AFFERO GENERAL PUBLIC LICENSE
2
2
  Version 3, 19 November 2007
3
3
 
4
- Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
4
+ Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
5
5
  Everyone is permitted to copy and distribute verbatim copies
6
6
  of this license document, but changing it is not allowed.
7
7
 
8
- TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
9
-
10
- 1. Definitions.
11
-
12
- "License" shall mean the terms and conditions for use, reproduction,
13
- and distribution as defined by Sections 1 through 9 of this document.
8
+ Preamble
14
9
 
15
10
  The GNU Affero General Public License is a free, copyleft license for
16
11
  software and other kinds of works, specifically designed to ensure
@@ -18,15 +13,16 @@ cooperation with the community in the case of network server software.
18
13
 
19
14
  The licenses for most software and other practical works are designed
20
15
  to take away your freedom to share and change the works. By contrast,
21
- the GNU Affero General Public License is intended to guarantee your
22
- freedom to share and change all versions of a program--to make sure it
23
- remains free software for all its users. When we speak of free software,
24
- we are referring to freedom, not price. Our General Public Licenses
25
- are designed to make sure that you have the freedom to distribute copies
26
- of free software (and charge for them if you wish), that you receive
27
- source code or can get it if you want it, that you can change the
28
- software or use pieces of it in new free programs, and that you know
29
- you can do these things.
16
+ our General Public Licenses are intended to guarantee your freedom to
17
+ share and change all versions of a program--to make sure it remains free
18
+ software for all its users.
19
+
20
+ When we speak of free software, we are referring to freedom, not
21
+ price. Our General Public Licenses are designed to make sure that you
22
+ have the freedom to distribute copies of free software (and charge for
23
+ them if you wish), that you receive source code or can get it if you
24
+ want it, that you can change the software or use pieces of it in new
25
+ free programs, and that you know you can do these things.
30
26
 
31
27
  Developers that use our General Public Licenses protect your rights
32
28
  with two steps: (1) assert copyright on the software, and (2) offer
@@ -39,23 +35,39 @@ receive widespread use, become available for other developers to
39
35
  incorporate. Many developers of free software are heartened and
40
36
  encouraged by the resulting cooperation. However, in the case of
41
37
  software used on network servers, this result may fail to come about.
42
- The GNU Affero General Public License includes a provision that helps
43
- developers of such software achieve the same cooperation we expect
44
- from other free software.
38
+ The GNU General Public License permits making a modified version and
39
+ letting the public access it on a server without ever releasing its
40
+ source code to the public.
41
+
42
+ The GNU Affero General Public License is designed specifically to
43
+ ensure that, in such cases, the modified source code becomes available
44
+ to the community. It requires the operator of a network server to
45
+ provide the source code of the modified version running there to the
46
+ users of that server. Therefore, public use of a modified version, on
47
+ a publicly accessible server, gives the public access to the source
48
+ code of the modified version.
49
+
50
+ An older license, called the Affero General Public License and
51
+ published by Affero, was designed to accomplish similar goals. This is
52
+ a different license, not a version of the Affero GPL, but Affero has
53
+ released a new version of the Affero GPL which permits relicensing under
54
+ this license.
55
+
56
+ The precise terms and conditions for copying, distribution and
57
+ modification follow.
45
58
 
46
- For example, if you distribute copies of such a program, whether
47
- gratis or for a fee, and make the source code available to users so
48
- they can modify the program while keeping the network server running,
49
- you must offer the source code under the GNU Affero General Public
50
- License.
59
+ TERMS AND CONDITIONS
51
60
 
52
- The GNU Affero General Public License is based on the GNU General
53
- Public License, but includes an additional permission and a requirement
54
- regarding network server software that is different from the GNU GPL.
61
+ 0. Definitions.
55
62
 
56
- "The Program" here refers to any copyrightable work licensed under
57
- the GNU Affero General Public License. Each licensee is addressed as
58
- "you". "Licensees" and "recipients" may be individuals or organizations.
63
+ "This License" refers to version 3 of the GNU Affero General Public License.
64
+
65
+ "Copyright" also means copyright-like laws that apply to other kinds of
66
+ works, such as semiconductor masks.
67
+
68
+ "The Program" refers to any copyrightable work licensed under this
69
+ License. Each licensee is addressed as "you". "Licensees" and
70
+ "recipients" may be individuals or organizations.
59
71
 
60
72
  To "modify" a work means to copy from or adapt all or part of the work
61
73
  in a fashion requiring copyright permission, other than the making of an
@@ -262,9 +274,9 @@ in one of these ways:
262
274
  available for as long as needed to satisfy these requirements.
263
275
 
264
276
  e) Convey the object code using peer-to-peer transmission, provided
265
- you inform other peers where the object and its Corresponding Source
266
- are being offered to the general public at no charge under
267
- subsection 6d.
277
+ you inform other peers where the object code and Corresponding
278
+ Source of the work are being offered to the general public at no
279
+ charge under subsection 6d.
268
280
 
269
281
  A separable portion of the object code, whose source code is excluded
270
282
  from the Corresponding Source as a System Library, need not be
@@ -353,7 +365,7 @@ that material) supplement the terms of this License with terms:
353
365
  authors of the material; or
354
366
 
355
367
  e) Declining to grant rights under trademark law for use of some
356
- trade names, marks, or slogans; or
368
+ trade names, trademarks, or service marks; or
357
369
 
358
370
  f) Requiring indemnification of licensors and authors of that
359
371
  material by anyone who conveys the material (or modified versions of
@@ -533,7 +545,7 @@ interacting with it remotely through a computer network (if your version
533
545
  supports such interaction) an opportunity to receive the Corresponding
534
546
  Source of your version by providing access to the Corresponding Source
535
547
  from a network server at no charge, through some standard or customary
536
- means of facilitating copying of software. This corresponding source
548
+ means of facilitating copying of software. This Corresponding Source
537
549
  shall include the Corresponding Source for any work covered by version 3
538
550
  of the GNU General Public License that is incorporated pursuant to the
539
551
  following paragraph.
@@ -631,7 +643,7 @@ the "copyright" line and a pointer to where the full notice is found.
631
643
  GNU Affero General Public License for more details.
632
644
 
633
645
  You should have received a copy of the GNU Affero General Public License
634
- along with this program. If not, see <http://www.gnu.org/licenses/>.
646
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
635
647
 
636
648
  Also add information on how to contact you by electronic and paper mail.
637
649
 
@@ -646,19 +658,4 @@ specific requirements.
646
658
  You should also get your employer (if you work as a programmer) or school,
647
659
  if any, to sign a "copyright disclaimer" for the program, if necessary.
648
660
  For more information on this, and how to apply and follow the GNU AGPL, see
649
- <http://www.gnu.org/licenses/>.
650
-
651
- Copyright 2024 MSK-ACCESS Team
652
-
653
- This program is free software: you can redistribute it and/or modify
654
- it under the terms of the GNU Affero General Public License as published by
655
- the Free Software Foundation, either version 3 of the License, or
656
- (at your option) any later version.
657
-
658
- This program is distributed in the hope that it will be useful,
659
- but WITHOUT ANY WARRANTY; without even the implied warranty of
660
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
661
- GNU Affero General Public License for more details.
662
-
663
- You should have received a copy of the GNU Affero General Public License
664
- along with this program. If not, see <http://www.gnu.org/licenses/>.
661
+ <https://www.gnu.org/licenses/>.
@@ -1,38 +1,37 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: py-gbcms
3
- Version: 2.1.2
4
- Summary: Python implementation of GetBaseCountsMultiSample (gbcms) for calculating base counts in BAM files
5
- Project-URL: Homepage, https://github.com/msk-access/py-gbcms
6
- Project-URL: Repository, https://github.com/msk-access/py-gbcms
7
- Project-URL: Documentation, https://github.com/msk-access/py-gbcms#readme
8
- Project-URL: Bug Tracker, https://github.com/msk-access/py-gbcms/issues
9
- Author-email: MSK-ACCESS <shahr2@mskcc.org>
10
- License: AGPL-3.0
11
- License-File: LICENSE
12
- Keywords: bam,base-counts,bioinformatics,gbcms,genomics,maf,vcf
3
+ Version: 2.2.0
13
4
  Classifier: Development Status :: 4 - Beta
14
5
  Classifier: Intended Audience :: Science/Research
15
6
  Classifier: License :: OSI Approved :: GNU Affero General Public License v3
16
7
  Classifier: Programming Language :: Python :: 3.11
17
8
  Classifier: Programming Language :: Python :: 3.12
18
9
  Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
19
- Requires-Python: >=3.10
20
- Requires-Dist: pydantic>=2.0.0
21
10
  Requires-Dist: pysam>=0.21.0
22
- Requires-Dist: rich>=13.0.0
23
11
  Requires-Dist: typer>=0.9.0
12
+ Requires-Dist: rich>=13.0.0
13
+ Requires-Dist: pydantic>=2.0.0
14
+ Requires-Dist: pytest>=7.4.0 ; extra == 'dev'
15
+ Requires-Dist: pytest-cov>=4.1.0 ; extra == 'dev'
16
+ Requires-Dist: pytest-mock>=3.11.0 ; extra == 'dev'
17
+ Requires-Dist: black>=23.0.0 ; extra == 'dev'
18
+ Requires-Dist: ruff>=0.1.0 ; extra == 'dev'
19
+ Requires-Dist: mypy>=1.5.0 ; extra == 'dev'
20
+ Requires-Dist: types-pyyaml>=6.0.0 ; extra == 'dev'
21
+ Requires-Dist: mkdocs-material>=9.0.0 ; extra == 'dev'
24
22
  Provides-Extra: all
25
23
  Provides-Extra: dev
26
- Requires-Dist: black>=23.0.0; extra == 'dev'
27
- Requires-Dist: mkdocs-material>=9.0.0; extra == 'dev'
28
- Requires-Dist: mypy>=1.5.0; extra == 'dev'
29
- Requires-Dist: pytest-cov>=4.1.0; extra == 'dev'
30
- Requires-Dist: pytest-mock>=3.11.0; extra == 'dev'
31
- Requires-Dist: pytest>=7.4.0; extra == 'dev'
32
- Requires-Dist: ruff>=0.1.0; extra == 'dev'
33
- Requires-Dist: types-pyyaml>=6.0.0; extra == 'dev'
34
24
  Provides-Extra: fast
35
- Description-Content-Type: text/markdown
25
+ License-File: LICENSE
26
+ Summary: Python implementation of GetBaseCountsMultiSample (gbcms) for calculating base counts in BAM files
27
+ Keywords: bioinformatics,genomics,bam,vcf,maf,base-counts,gbcms
28
+ Author-email: MSK-ACCESS <shahr2@mskcc.org>
29
+ Requires-Python: >=3.10
30
+ Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
31
+ Project-URL: Bug Tracker, https://github.com/msk-access/py-gbcms/issues
32
+ Project-URL: Documentation, https://github.com/msk-access/py-gbcms#readme
33
+ Project-URL: Homepage, https://github.com/msk-access/py-gbcms
34
+ Project-URL: Repository, https://github.com/msk-access/py-gbcms
36
35
 
37
36
  # py-gbcms
38
37
 
@@ -214,3 +213,4 @@ AGPL-3.0 - see [LICENSE](LICENSE) for details.
214
213
 
215
214
  - 🐛 **Issues:** https://github.com/msk-access/py-gbcms/issues
216
215
  - 💬 **Discussions:** https://github.com/msk-access/py-gbcms/discussions
216
+
@@ -1,13 +1,13 @@
1
1
  [project]
2
2
  name = "py-gbcms"
3
- version = "2.1.2"
3
+ version = "2.2.0"
4
4
  description = "Python implementation of GetBaseCountsMultiSample (gbcms) for calculating base counts in BAM files"
5
5
  authors = [
6
6
  {name = "MSK-ACCESS", email = "shahr2@mskcc.org"}
7
7
  ]
8
8
  readme = "README.md"
9
9
  requires-python = ">=3.10"
10
- license = {text = "AGPL-3.0"}
10
+ license = {file = "LICENSE"}
11
11
  keywords = ["bioinformatics", "genomics", "bam", "vcf", "maf", "base-counts", "gbcms"]
12
12
  classifiers = [
13
13
  "Development Status :: 4 - Beta",
@@ -51,29 +51,14 @@ Documentation = "https://github.com/msk-access/py-gbcms#readme"
51
51
  "Bug Tracker" = "https://github.com/msk-access/py-gbcms/issues"
52
52
 
53
53
  [build-system]
54
- requires = ["hatchling"]
55
- build-backend = "hatchling.build"
56
-
57
- [tool.hatch.build.targets.wheel]
58
- packages = ["src/gbcms"]
59
-
60
- [tool.hatch.build.targets.sdist]
61
- exclude = [
62
- "/.github",
63
- "/docs",
64
- "/tests",
65
- "/nextflow",
66
- "/examples",
67
- "/.gitignore",
68
- "/.gitbook.yaml",
69
- "/mkdocs.yml",
70
- "/docker-compose.yml",
71
- "/Dockerfile",
72
- "/Makefile",
73
- "/test_real_data.sh",
74
- "/git-flow-helper.sh",
75
- "/scripts",
76
- ]
54
+ requires = ["maturin>=1.0,<2.0"]
55
+ build-backend = "maturin"
56
+
57
+ [tool.maturin]
58
+ python-source = "src"
59
+ manifest-path = "rust/Cargo.toml"
60
+ module-name = "gbcms._rs"
61
+
77
62
 
78
63
  [tool.pytest.ini_options]
79
64
  testpaths = ["tests"]
@@ -96,7 +81,7 @@ include = '\.pyi?$'
96
81
 
97
82
  [tool.ruff]
98
83
  line-length = 100
99
- target-version = "py311"
84
+ target-version = "py310"
100
85
 
101
86
  [tool.ruff.lint]
102
87
  select = [
@@ -134,7 +119,7 @@ disable_error_code = ["call-arg"]
134
119
  [[tool.mypy.overrides]]
135
120
  module = [
136
121
  "pysam.*",
137
- "gbcms_rs",
122
+ "gbcms._rs",
138
123
  ]
139
124
  ignore_missing_imports = true
140
125
 
@@ -257,7 +257,7 @@ dependencies = [
257
257
 
258
258
  [[package]]
259
259
  name = "gbcms_rs"
260
- version = "2.0.0"
260
+ version = "2.2.0"
261
261
  dependencies = [
262
262
  "anyhow",
263
263
  "bio-types",
@@ -1,11 +1,11 @@
1
1
  [package]
2
2
  name = "gbcms_rs"
3
- version = "2.0.0"
4
- edition = "2024"
3
+ version = "2.2.0"
4
+ edition = "2021"
5
5
 
6
6
  # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
7
7
  [lib]
8
- name = "gbcms_rs"
8
+ name = "_rs"
9
9
  crate-type = ["cdylib"]
10
10
 
11
11
  [dependencies]
@@ -5,9 +5,9 @@ mod counting;
5
5
  mod stats;
6
6
  mod types;
7
7
 
8
- /// A Python module implemented in Rust.
8
+ /// A Python module implemented in Rust (bundled as gbcms._rs).
9
9
  #[pymodule]
10
- fn gbcms_rs(m: &Bound<'_, PyModule>) -> PyResult<()> {
10
+ fn _rs(m: &Bound<'_, PyModule>) -> PyResult<()> {
11
11
  pyo3_log::init();
12
12
  m.add_function(wrap_pyfunction!(counting::count_bam, m)?)?;
13
13
  m.add_class::<types::Variant>()?;
@@ -0,0 +1,23 @@
1
+ """
2
+ gbcms (Get Base Counts Multi-Sample) - A tool for counting bases at variant positions.
3
+
4
+ This package provides a command-line interface and Python API for genotyping
5
+ variants in BAM files using a high-performance Rust counting engine.
6
+
7
+ Example usage:
8
+ $ gbcms run -v variants.vcf -b sample.bam -f reference.fa -o output/
9
+ """
10
+
11
+ __version__ = "2.2.0"
12
+
13
+ from .models.core import GbcmsConfig, OutputFormat, Variant, VariantType
14
+ from .pipeline import Pipeline
15
+
16
+ __all__ = [
17
+ "__version__",
18
+ "GbcmsConfig",
19
+ "OutputFormat",
20
+ "Pipeline",
21
+ "Variant",
22
+ "VariantType",
23
+ ]
@@ -0,0 +1,49 @@
1
+ # Type stubs for the Rust extension module (gbcms._rs)
2
+ # This file tells mypy about the types in the native extension
3
+
4
+ class Variant:
5
+ chrom: str
6
+ pos: int
7
+ ref_allele: str
8
+ alt_allele: str
9
+ variant_type: str
10
+
11
+ def __init__(
12
+ self,
13
+ chrom: str,
14
+ pos: int,
15
+ ref_allele: str,
16
+ alt_allele: str,
17
+ variant_type: str,
18
+ ) -> None: ...
19
+
20
+ class BaseCounts:
21
+ chrom: str
22
+ pos: int
23
+ ref: str
24
+ alt: str
25
+ dp: int
26
+ rd: int
27
+ ad: int
28
+ rd_fwd: int
29
+ rd_rev: int
30
+ ad_fwd: int
31
+ ad_rev: int
32
+ dp_fragment: int
33
+ rd_fragment: int
34
+ ad_fragment: int
35
+ sb_pvalue: float
36
+
37
+ def count_bam(
38
+ bam_path: str,
39
+ variants: list[Variant],
40
+ min_mapq: int = 20,
41
+ min_baseq: int = 0,
42
+ filter_duplicates: bool = True,
43
+ filter_secondary: bool = False,
44
+ filter_supplementary: bool = False,
45
+ filter_qc_failed: bool = False,
46
+ filter_improper_pair: bool = False,
47
+ filter_indel: bool = False,
48
+ threads: int = 1,
49
+ ) -> list[BaseCounts]: ...
@@ -2,12 +2,24 @@
2
2
  CLI Entry Point: Exposes the gbcms functionality via command line.
3
3
  """
4
4
 
5
+ import logging
5
6
  from pathlib import Path
6
7
 
7
8
  import typer
8
9
 
9
- from .models.core import GbcmsConfig, OutputFormat
10
+ from .models.core import (
11
+ GbcmsConfig,
12
+ OutputConfig,
13
+ OutputFormat,
14
+ QualityThresholds,
15
+ ReadFilters,
16
+ )
10
17
  from .pipeline import Pipeline
18
+ from .utils import setup_logging
19
+
20
+ __all__ = ["app", "run"]
21
+
22
+ logger = logging.getLogger(__name__)
11
23
 
12
24
  app = typer.Typer(help="gbcms: Get Base Counts Multi-Sample")
13
25
 
@@ -22,6 +34,7 @@ def main():
22
34
 
23
35
  @app.command()
24
36
  def run(
37
+ # Input options
25
38
  variant_file: Path = typer.Option(
26
39
  ..., "--variants", "-v", help="Path to VCF or MAF file containing variants"
27
40
  ),
@@ -32,6 +45,7 @@ def run(
32
45
  None, "--bam-list", "-L", help="File containing list of BAM paths (one per line)"
33
46
  ),
34
47
  reference: Path = typer.Option(..., "--fasta", "-f", help="Path to reference FASTA file"),
48
+ # Output options
35
49
  output_dir: Path = typer.Option(
36
50
  ..., "--output-dir", "-o", help="Directory to write output files"
37
51
  ),
@@ -41,65 +55,106 @@ def run(
41
55
  output_suffix: str = typer.Option(
42
56
  "", "--suffix", "-S", help="Suffix to append to output filename (e.g. '.genotyped')"
43
57
  ),
58
+ # Quality thresholds
44
59
  min_mapq: int = typer.Option(20, "--min-mapq", help="Minimum mapping quality"),
45
60
  min_baseq: int = typer.Option(0, "--min-baseq", help="Minimum base quality"),
61
+ # Read filters
46
62
  filter_duplicates: bool = typer.Option(True, help="Filter duplicate reads"),
47
63
  filter_secondary: bool = typer.Option(False, help="Filter secondary alignments"),
48
64
  filter_supplementary: bool = typer.Option(False, help="Filter supplementary alignments"),
49
65
  filter_qc_failed: bool = typer.Option(False, help="Filter reads failing QC"),
50
66
  filter_improper_pair: bool = typer.Option(False, help="Filter improperly paired reads"),
51
67
  filter_indel: bool = typer.Option(False, help="Filter reads containing indels"),
68
+ # Performance
52
69
  threads: int = typer.Option(
53
- 1, "--threads", "-t", help="Number of threads (not yet implemented in v2 python layer)"
70
+ 1, "--threads", "-t", help="Number of threads for parallel processing"
54
71
  ),
55
72
  verbose: bool = typer.Option(False, "--verbose", "-V", help="Enable verbose debug logging"),
56
73
  ):
57
74
  """
58
75
  Run gbcms on one or more BAM files.
59
76
  """
60
- import logging
77
+ # Configure logging
78
+ setup_logging(verbose=verbose)
61
79
 
62
- from rich.console import Console
63
- from rich.logging import RichHandler
80
+ # Parse BAM inputs
81
+ bams_dict = _parse_bam_inputs(bam_files, bam_list)
64
82
 
65
- # Configure logging
66
- log_level = logging.DEBUG if verbose else logging.INFO
67
- logging.basicConfig(
68
- level=log_level,
69
- format="%(message)s",
70
- datefmt="[%X]",
71
- handlers=[RichHandler(rich_tracebacks=True, markup=True)],
72
- )
83
+ if not bams_dict:
84
+ logger.error("No valid BAM files provided via --bam or --bam-list")
85
+ raise typer.Exit(code=1)
86
+
87
+ logger.info("Found %d BAM file(s) to process", len(bams_dict))
88
+
89
+ try:
90
+ # Build nested config objects
91
+ output_config = OutputConfig(
92
+ directory=output_dir,
93
+ format=output_format,
94
+ suffix=output_suffix,
95
+ )
96
+
97
+ quality_config = QualityThresholds(
98
+ min_mapping_quality=min_mapq,
99
+ min_base_quality=min_baseq,
100
+ )
73
101
 
74
- console = Console()
102
+ filter_config = ReadFilters(
103
+ duplicates=filter_duplicates,
104
+ secondary=filter_secondary,
105
+ supplementary=filter_supplementary,
106
+ qc_failed=filter_qc_failed,
107
+ improper_pair=filter_improper_pair,
108
+ indel=filter_indel,
109
+ )
110
+
111
+ config = GbcmsConfig(
112
+ variant_file=variant_file,
113
+ bam_files=bams_dict,
114
+ reference_fasta=reference,
115
+ output=output_config,
116
+ quality=quality_config,
117
+ filters=filter_config,
118
+ threads=threads,
119
+ )
120
+
121
+ pipeline = Pipeline(config)
122
+ pipeline.run()
123
+
124
+ except Exception as e:
125
+ logger.exception("Pipeline failed: %s", e)
126
+ raise typer.Exit(code=1) from e
75
127
 
76
- # Map BAMs to sample names (filename stem for now)
77
- bams_dict = {}
128
+
129
+ def _parse_bam_inputs(bam_files: list[Path] | None, bam_list: Path | None) -> dict[str, Path]:
130
+ """
131
+ Parse BAM inputs from direct arguments and/or BAM list file.
132
+
133
+ Args:
134
+ bam_files: List of BAM paths (optionally with sample_id:path format).
135
+ bam_list: Path to file containing BAM paths (one per line).
136
+
137
+ Returns:
138
+ Dictionary mapping sample names to BAM paths.
139
+ """
140
+ bams_dict: dict[str, Path] = {}
78
141
 
79
142
  # 1. Process direct BAM arguments
80
143
  if bam_files:
81
144
  for bam_arg in bam_files:
82
- # Check for sample_id:path format
83
- bam_str = str(bam_arg)
84
- if ":" in bam_str:
85
- parts = bam_str.split(":", 1)
86
- sample_name = parts[0]
87
- bam_path = Path(parts[1])
88
- else:
89
- bam_path = bam_arg
90
- sample_name = bam_path.stem
145
+ sample_name, bam_path = _parse_bam_arg(bam_arg)
91
146
 
92
147
  if not bam_path.exists():
93
- console.print(f"[bold red]Error: BAM file not found: {bam_path}[/bold red]")
94
- raise typer.Exit(code=1)
148
+ logger.error("BAM file not found: %s", bam_path)
149
+ continue
95
150
 
96
151
  bams_dict[sample_name] = bam_path
97
152
 
98
153
  # 2. Process BAM list file
99
154
  if bam_list:
100
155
  if not bam_list.exists():
101
- console.print(f"[bold red]Error: BAM list file not found: {bam_list}[/bold red]")
102
- raise typer.Exit(code=1)
156
+ logger.error("BAM list file not found: %s", bam_list)
157
+ return bams_dict
103
158
 
104
159
  try:
105
160
  with open(bam_list) as f:
@@ -107,7 +162,7 @@ def run(
107
162
  line = line.strip()
108
163
  if not line or line.startswith("#"):
109
164
  continue
110
- # Check for 2 columns (sample_id path)
165
+
111
166
  parts = line.split()
112
167
  if len(parts) >= 2:
113
168
  sample_name = parts[0]
@@ -117,46 +172,32 @@ def run(
117
172
  sample_name = bam_path.stem
118
173
 
119
174
  if not bam_path.exists():
120
- console.print(
121
- f"[yellow]Warning: BAM file from list not found: {bam_path}[/yellow]"
122
- )
175
+ logger.warning("BAM file from list not found: %s", bam_path)
123
176
  continue
177
+
124
178
  bams_dict[sample_name] = bam_path
179
+
125
180
  except Exception as e:
126
- console.print(f"[bold red]Error reading BAM list file {bam_list}: {e}[/bold red]")
127
- raise typer.Exit(code=1) from e
181
+ logger.error("Error reading BAM list file %s: %s", bam_list, e)
128
182
 
129
- if not bams_dict:
130
- console.print(
131
- "[bold red]Error: No valid BAM files provided via --bam or --bam-list[/bold red]"
132
- )
133
- raise typer.Exit(code=1)
183
+ return bams_dict
134
184
 
135
- try:
136
- config = GbcmsConfig(
137
- variant_file=variant_file,
138
- bam_files=bams_dict,
139
- reference_fasta=reference,
140
- output_dir=output_dir,
141
- output_format=output_format,
142
- output_suffix=output_suffix,
143
- min_mapping_quality=min_mapq,
144
- min_base_quality=min_baseq,
145
- filter_duplicates=filter_duplicates,
146
- filter_secondary=filter_secondary,
147
- filter_supplementary=filter_supplementary,
148
- filter_qc_failed=filter_qc_failed,
149
- filter_improper_pair=filter_improper_pair,
150
- filter_indel=filter_indel,
151
- threads=threads,
152
- )
153
185
 
154
- pipeline = Pipeline(config)
155
- pipeline.run()
186
+ def _parse_bam_arg(bam_arg: Path) -> tuple[str, Path]:
187
+ """
188
+ Parse a BAM argument that may be in sample_id:path format.
156
189
 
157
- except Exception as e:
158
- console.print(f"[bold red]Error: {e}[/bold red]")
159
- raise typer.Exit(code=1) from e
190
+ Args:
191
+ bam_arg: Path object (may contain sample_id:path as string).
192
+
193
+ Returns:
194
+ Tuple of (sample_name, bam_path).
195
+ """
196
+ bam_str = str(bam_arg)
197
+ if ":" in bam_str:
198
+ parts = bam_str.split(":", 1)
199
+ return parts[0], Path(parts[1])
200
+ return bam_arg.stem, bam_arg
160
201
 
161
202
 
162
203
  if __name__ == "__main__":
@@ -0,0 +1,9 @@
1
+ """
2
+ Core module for gbcms.
3
+
4
+ Provides the coordinate transformation kernel for handling VCF/MAF coordinates.
5
+ """
6
+
7
+ from .kernel import CoordinateKernel
8
+
9
+ __all__ = ["CoordinateKernel"]