metaumbra 1.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,29 @@
1
+ BSD 3-Clause License
2
+
3
+ Copyright (c) 2026, MetaUmbra contributors
4
+ All rights reserved.
5
+
6
+ Redistribution and use in source and binary forms, with or without
7
+ modification, are permitted provided that the following conditions are met:
8
+
9
+ 1. Redistributions of source code must retain the above copyright notice, this
10
+ list of conditions and the following disclaimer.
11
+
12
+ 2. Redistributions in binary form must reproduce the above copyright notice,
13
+ this list of conditions and the following disclaimer in the documentation
14
+ and/or other materials provided with the distribution.
15
+
16
+ 3. Neither the name of the copyright holder nor the names of its
17
+ contributors may be used to endorse or promote products derived from
18
+ this software without specific prior written permission.
19
+
20
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
@@ -0,0 +1,3 @@
1
+ include README.md
2
+ include LICENSE
3
+ recursive-include src/metaumbra/assets *.png
@@ -0,0 +1,133 @@
1
+ Metadata-Version: 2.4
2
+ Name: metaumbra
3
+ Version: 1.1.0
4
+ Summary: Genome-level presence inference from metaproteomic peptide lists.
5
+ License-Expression: BSD-3-Clause
6
+ Keywords: metaproteomics,proteomics,bioinformatics,genome inference,peptides
7
+ Classifier: Development Status :: 4 - Beta
8
+ Classifier: Intended Audience :: Science/Research
9
+ Classifier: Operating System :: OS Independent
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Programming Language :: Python :: 3 :: Only
12
+ Classifier: Programming Language :: Python :: 3.10
13
+ Classifier: Programming Language :: Python :: 3.11
14
+ Classifier: Programming Language :: Python :: 3.12
15
+ Classifier: Programming Language :: Python :: 3.13
16
+ Classifier: Programming Language :: Python :: 3.14
17
+ Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
18
+ Requires-Python: >=3.10
19
+ Description-Content-Type: text/markdown
20
+ License-File: LICENSE
21
+ Requires-Dist: rpg==2.0.5
22
+ Requires-Dist: numpy
23
+ Requires-Dist: pandas
24
+ Requires-Dist: tqdm
25
+ Provides-Extra: gui
26
+ Requires-Dist: PySide6; extra == "gui"
27
+ Provides-Extra: parquet
28
+ Requires-Dist: pyarrow; extra == "parquet"
29
+ Provides-Extra: dev
30
+ Requires-Dist: build; extra == "dev"
31
+ Requires-Dist: twine; extra == "dev"
32
+ Provides-Extra: all
33
+ Requires-Dist: PySide6; extra == "all"
34
+ Requires-Dist: pyarrow; extra == "all"
35
+ Dynamic: license-file
36
+
37
+ # MetaUmbra
38
+ [![MetaUmbra](src/metaumbra/assets/baner.png)](src/metaumbra/assets/baner.png)
39
+
40
+ ## Genome-level presence inference from metaproteomic peptides
41
+
42
+ MetaUmbra converts identified metaproteomic peptides into statistically supported genome presence calls. It evaluates each candidate genome using both unique and shared peptide evidence and reports genome-level p-values, BH-adjusted q-values, and presence scores.
43
+
44
+ ## Main features
45
+
46
+ - Evaluate candidate genome support from metaproteomic peptide tables
47
+ - Build genome-specific theoretical peptide references from protein FASTA files
48
+ - Support user-defined genome collections, including isolate genomes, strain panels, and MAG catalogs
49
+ - Use both unique and shared peptide evidence for genome presence inference
50
+ - Report genome-level p-values, BH-adjusted q-values, and presence scores
51
+ - Provide GUI, command-line, and Python workflow support
52
+ - Support peptide tables from common metaproteomics workflows such as DIA-NN and MaxQuant
53
+
54
+ ## Workflow overview
55
+ [![MetaUmbra](src/metaumbra/assets/workflow.png)](src/metaumbra/assets/workflow.png)
56
+
57
+
58
+ ## Installation
59
+
60
+ MetaUmbra requires Python 3.10 or newer.
61
+
62
+ ```bash
63
+ pip install ".[all]"
64
+ ```
65
+
66
+ ## Usage
67
+
68
+ MetaUmbra can be used through either the graphical interface or the command line.
69
+
70
+ ### Graphical interface
71
+
72
+ ```bash
73
+ metaumbra-gui
74
+ ```
75
+
76
+ The GUI supports FASTA digestion, peptide table loading, genome presence scoring, and result export.
77
+
78
+ ### Command line
79
+
80
+ MetaUmbra provides separate commands for the main workflow steps:
81
+
82
+ ```bash
83
+ metaumbra digest --help
84
+ metaumbra score --help
85
+ metaumbra extract-parquet --help
86
+ ```
87
+
88
+ A typical workflow is:
89
+
90
+ ```bash
91
+ metaumbra digest ...
92
+ metaumbra score ...
93
+ ```
94
+
95
+ Use `metaumbra extract-parquet ...` to convert DIA-NN parquet reports to peptide TSV files before scoring.
96
+
97
+ ## Input
98
+
99
+ MetaUmbra requires:
100
+
101
+ - Protein FASTA files, with one FASTA file per genome
102
+ - An observed peptide table containing peptide sequences
103
+
104
+ Optional inputs include peptide scores, peptide-level error values, decoy flags, and genome lineage annotations.
105
+
106
+ ## Output
107
+
108
+ The main output is a TSV table containing genome-level evidence and significance values.
109
+
110
+ Key output columns include:
111
+
112
+ | Column | Description |
113
+ | --- | --- |
114
+ | `genome_id` | Candidate genome identifier |
115
+ | `num_peptides_matched` | Number of observed peptides matched to the genome |
116
+ | `num_peptides_unique` | Number of matched peptides unique to the genome |
117
+ | `weighted_evidence` | Total degeneracy-weighted peptide evidence |
118
+ | `weighted_evidence_shared` | Weighted evidence from shared peptides |
119
+ | `p_presence` | Genome-level p-value |
120
+ | `q_presence` | BH-adjusted genome-level q-value |
121
+ | `presence_score` | Ranking score based on q-value |
122
+
123
+ ## Citation
124
+
125
+ If you use MetaUmbra, please cite:
126
+
127
+ > Wu Q, Ning Z, Zhang A, Cheng K, Figeys D. MetaUmbra: Statistically Controlled Genome-Level Presence Inference from Metaproteomic Peptides.
128
+
129
+ A formal citation will be added after publication.
130
+
131
+ ## Contact
132
+
133
+ For questions or issues, please use the GitHub issue tracker or contact the corresponding author listed in the associated manuscript.
@@ -0,0 +1,97 @@
1
+ # MetaUmbra
2
+ [![MetaUmbra](src/metaumbra/assets/baner.png)](src/metaumbra/assets/baner.png)
3
+
4
+ ## Genome-level presence inference from metaproteomic peptides
5
+
6
+ MetaUmbra converts identified metaproteomic peptides into statistically supported genome presence calls. It evaluates each candidate genome using both unique and shared peptide evidence and reports genome-level p-values, BH-adjusted q-values, and presence scores.
7
+
8
+ ## Main features
9
+
10
+ - Evaluate candidate genome support from metaproteomic peptide tables
11
+ - Build genome-specific theoretical peptide references from protein FASTA files
12
+ - Support user-defined genome collections, including isolate genomes, strain panels, and MAG catalogs
13
+ - Use both unique and shared peptide evidence for genome presence inference
14
+ - Report genome-level p-values, BH-adjusted q-values, and presence scores
15
+ - Provide GUI, command-line, and Python workflow support
16
+ - Support peptide tables from common metaproteomics workflows such as DIA-NN and MaxQuant
17
+
18
+ ## Workflow overview
19
+ [![MetaUmbra](src/metaumbra/assets/workflow.png)](src/metaumbra/assets/workflow.png)
20
+
21
+
22
+ ## Installation
23
+
24
+ MetaUmbra requires Python 3.10 or newer.
25
+
26
+ ```bash
27
+ pip install ".[all]"
28
+ ```
29
+
30
+ ## Usage
31
+
32
+ MetaUmbra can be used through either the graphical interface or the command line.
33
+
34
+ ### Graphical interface
35
+
36
+ ```bash
37
+ metaumbra-gui
38
+ ```
39
+
40
+ The GUI supports FASTA digestion, peptide table loading, genome presence scoring, and result export.
41
+
42
+ ### Command line
43
+
44
+ MetaUmbra provides separate commands for the main workflow steps:
45
+
46
+ ```bash
47
+ metaumbra digest --help
48
+ metaumbra score --help
49
+ metaumbra extract-parquet --help
50
+ ```
51
+
52
+ A typical workflow is:
53
+
54
+ ```bash
55
+ metaumbra digest ...
56
+ metaumbra score ...
57
+ ```
58
+
59
+ Use `metaumbra extract-parquet ...` to convert DIA-NN parquet reports to peptide TSV files before scoring.
60
+
61
+ ## Input
62
+
63
+ MetaUmbra requires:
64
+
65
+ - Protein FASTA files, with one FASTA file per genome
66
+ - An observed peptide table containing peptide sequences
67
+
68
+ Optional inputs include peptide scores, peptide-level error values, decoy flags, and genome lineage annotations.
69
+
70
+ ## Output
71
+
72
+ The main output is a TSV table containing genome-level evidence and significance values.
73
+
74
+ Key output columns include:
75
+
76
+ | Column | Description |
77
+ | --- | --- |
78
+ | `genome_id` | Candidate genome identifier |
79
+ | `num_peptides_matched` | Number of observed peptides matched to the genome |
80
+ | `num_peptides_unique` | Number of matched peptides unique to the genome |
81
+ | `weighted_evidence` | Total degeneracy-weighted peptide evidence |
82
+ | `weighted_evidence_shared` | Weighted evidence from shared peptides |
83
+ | `p_presence` | Genome-level p-value |
84
+ | `q_presence` | BH-adjusted genome-level q-value |
85
+ | `presence_score` | Ranking score based on q-value |
86
+
87
+ ## Citation
88
+
89
+ If you use MetaUmbra, please cite:
90
+
91
+ > Wu Q, Ning Z, Zhang A, Cheng K, Figeys D. MetaUmbra: Statistically Controlled Genome-Level Presence Inference from Metaproteomic Peptides.
92
+
93
+ A formal citation will be added after publication.
94
+
95
+ ## Contact
96
+
97
+ For questions or issues, please use the GitHub issue tracker or contact the corresponding author listed in the associated manuscript.
@@ -0,0 +1,59 @@
1
+ [build-system]
2
+ requires = ["setuptools>=77", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "metaumbra"
7
+ dynamic = ["version"]
8
+ description = "Genome-level presence inference from metaproteomic peptide lists."
9
+ readme = "README.md"
10
+ license = "BSD-3-Clause"
11
+ requires-python = ">=3.10"
12
+ keywords = ["metaproteomics", "proteomics", "bioinformatics", "genome inference", "peptides"]
13
+ classifiers = [
14
+ "Development Status :: 4 - Beta",
15
+ "Intended Audience :: Science/Research",
16
+ "Operating System :: OS Independent",
17
+ "Programming Language :: Python :: 3",
18
+ "Programming Language :: Python :: 3 :: Only",
19
+ "Programming Language :: Python :: 3.10",
20
+ "Programming Language :: Python :: 3.11",
21
+ "Programming Language :: Python :: 3.12",
22
+ "Programming Language :: Python :: 3.13",
23
+ "Programming Language :: Python :: 3.14",
24
+ "Topic :: Scientific/Engineering :: Bio-Informatics",
25
+ ]
26
+ dependencies = [
27
+ "rpg==2.0.5",
28
+ "numpy",
29
+ "pandas",
30
+ "tqdm",
31
+ ]
32
+
33
+ [project.optional-dependencies]
34
+ gui = ["PySide6"]
35
+ parquet = ["pyarrow"]
36
+ dev = ["build", "twine"]
37
+ all = [
38
+ "PySide6",
39
+ "pyarrow",
40
+ ]
41
+
42
+ [project.scripts]
43
+ metaumbra = "metaumbra.cli:main"
44
+
45
+ [project.gui-scripts]
46
+ metaumbra-gui = "metaumbra.gui:main"
47
+
48
+ [tool.setuptools]
49
+ package-dir = {"" = "src"}
50
+ include-package-data = true
51
+
52
+ [tool.setuptools.dynamic]
53
+ version = {attr = "metaumbra.__version__.__version__"}
54
+
55
+ [tool.setuptools.packages.find]
56
+ where = ["src"]
57
+
58
+ [tool.setuptools.package-data]
59
+ metaumbra = ["assets/*.png"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,5 @@
1
+ """MetaUmbra package metadata."""
2
+
3
+ from .__version__ import __version__
4
+
5
+ __all__ = ["__version__"]
@@ -0,0 +1,14 @@
1
+ """Run the MetaUmbra command-line interface."""
2
+
3
+ import sys
4
+ from pathlib import Path
5
+
6
+ if __package__ in {None, ""}:
7
+ sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
8
+ from metaumbra.cli import main
9
+ else:
10
+ from .cli import main
11
+
12
+
13
+ if __name__ == "__main__":
14
+ raise SystemExit(main(sys.argv[1:]))
@@ -0,0 +1,5 @@
1
+ __version__ = "1.1.0"
2
+
3
+
4
+ if __name__ == "__main__":
5
+ print(__version__)
@@ -0,0 +1,268 @@
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+ import json
5
+ import sys
6
+ from pathlib import Path
7
+ from typing import Any
8
+
9
+ if __package__ in {None, ""}:
10
+ sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
11
+ from metaumbra import __version__
12
+ from metaumbra.workflows import (
13
+ DigestConfig,
14
+ ParquetExtractionConfig,
15
+ ScoringConfig,
16
+ run_digest_workflow,
17
+ run_parquet_extraction_workflow,
18
+ run_scoring_workflow,
19
+ )
20
+ else:
21
+ from . import __version__
22
+ from .workflows import (
23
+ DigestConfig,
24
+ ParquetExtractionConfig,
25
+ ScoringConfig,
26
+ run_digest_workflow,
27
+ run_parquet_extraction_workflow,
28
+ run_scoring_workflow,
29
+ )
30
+
31
+
32
+ def _print_result(payload: dict[str, Any]) -> None:
33
+ print(json.dumps(payload, indent=2, ensure_ascii=False))
34
+
35
+
36
+ def _add_common_version_flag(parser: argparse.ArgumentParser) -> None:
37
+ parser.add_argument(
38
+ "--version",
39
+ action="version",
40
+ version=f"%(prog)s {__version__}",
41
+ )
42
+
43
+
44
+ def build_parser() -> argparse.ArgumentParser:
45
+ parser = argparse.ArgumentParser(
46
+ prog="metaumbra",
47
+ description="MetaUmbra packaging-friendly command line interface.",
48
+ )
49
+ _add_common_version_flag(parser)
50
+ subparsers = parser.add_subparsers(dest="command")
51
+
52
+ gui_parser = subparsers.add_parser("gui", help="Launch the Qt GUI.")
53
+ _add_common_version_flag(gui_parser)
54
+
55
+ digest_parser = subparsers.add_parser("digest", help="Digest FASTA files into peptide tables.")
56
+ _add_common_version_flag(digest_parser)
57
+ digest_input = digest_parser.add_mutually_exclusive_group(required=True)
58
+ digest_input.add_argument("--input-file", help="Single FASTA file to digest.")
59
+ digest_input.add_argument("--input-dir", help="Directory of FASTA files to digest.")
60
+ digest_parser.add_argument("--output-file", help="Output TSV path for single-file mode.")
61
+ digest_parser.add_argument("--output-dir", help="Output directory for directory mode.")
62
+ digest_parser.add_argument("--enzyme-id", default="42", help="RPG enzyme ID. Default: 42 (Trypsin).")
63
+ digest_parser.add_argument("--min-length", type=int, default=7, help="Minimum peptide length.")
64
+ digest_parser.add_argument("--max-length", type=int, default=30, help="Maximum peptide length.")
65
+ digest_parser.add_argument("--max-miscleavages", type=int, default=2, help="Maximum missed cleavages.")
66
+ digest_parser.add_argument("--processes", type=int, help="Worker process count.")
67
+ digest_parser.add_argument(
68
+ "--full-header",
69
+ action="store_true",
70
+ help="Keep full FASTA headers instead of truncating at the first space.",
71
+ )
72
+ digest_parser.add_argument(
73
+ "--no-skip-existing",
74
+ action="store_true",
75
+ help="Rebuild existing output files in directory mode.",
76
+ )
77
+ digest_parser.add_argument(
78
+ "--quiet",
79
+ action="store_true",
80
+ help="Reduce runtime log output.",
81
+ )
82
+
83
+ score_parser = subparsers.add_parser("score", help="Score genome presence from peptide observations.")
84
+ _add_common_version_flag(score_parser)
85
+ score_parser.add_argument("--peptide-table", required=True, help="Observed peptide TSV path.")
86
+ score_parser.add_argument(
87
+ "--genome-digest-dir",
88
+ action="append",
89
+ required=True,
90
+ help="Genome digest directory. Repeat for multiple directories.",
91
+ )
92
+ score_parser.add_argument("--output", required=True, help="Output TSV path.")
93
+ score_parser.add_argument("--peptide-seq-col", default="Sequence", help="Peptide sequence column name.")
94
+ score_parser.add_argument("--peptide-score-col", default="score", help="Peptide score column name.")
95
+ score_parser.add_argument("--peptide-error-col", default="Q.Value", help="Peptide error/FDR column name.")
96
+ score_parser.add_argument("--peptide-error-cutoff", type=float, default=0.05, help="Peptide error cutoff.")
97
+ score_parser.add_argument(
98
+ "--peptide-decoy-flag-col",
99
+ default="Reverse",
100
+ help="Optional decoy flag column. Pass an empty string to disable it.",
101
+ )
102
+ score_parser.add_argument("--decoy-flag-value", default="+", help="Decoy marker value.")
103
+ score_parser.add_argument("--num-workers", type=int, help="Worker process count.")
104
+ score_parser.add_argument(
105
+ "--selected-genome-id",
106
+ action="append",
107
+ default=[],
108
+ help="Restrict scoring to specific genome IDs. Repeat as needed.",
109
+ )
110
+ score_parser.add_argument(
111
+ "--exclude-genome-id",
112
+ action="append",
113
+ default=[],
114
+ help="Genome IDs to exclude. Repeat as needed.",
115
+ )
116
+ score_parser.add_argument("--lineage-table", default="", help="Optional genome lineage table.")
117
+ score_parser.add_argument("--lineage-genome-id-col", default="", help="Genome ID column in the lineage table.")
118
+ score_parser.add_argument("--lineage-lineage-col", default="", help="Lineage column in the lineage table.")
119
+ score_parser.add_argument("--cache-path", default="", help="Optional matched peptide cache path.")
120
+ score_parser.add_argument(
121
+ "--use-cache-if-exists",
122
+ action="store_true",
123
+ help="Reuse an existing matched peptide cache if available.",
124
+ )
125
+ score_parser.add_argument(
126
+ "--no-save-cache",
127
+ action="store_true",
128
+ help="Do not persist matched peptide cache output.",
129
+ )
130
+ score_parser.add_argument(
131
+ "--no-compute-coverage",
132
+ action="store_true",
133
+ help="Skip cumulative coverage calculations.",
134
+ )
135
+ score_parser.add_argument(
136
+ "--no-export-temp",
137
+ action="store_true",
138
+ help="Skip temporary artifact exports.",
139
+ )
140
+ score_parser.add_argument(
141
+ "--return-full-table",
142
+ action="store_true",
143
+ help="Return and write the full internal result table.",
144
+ )
145
+
146
+ parquet_parser = subparsers.add_parser(
147
+ "extract-parquet",
148
+ help="Extract selected columns from a parquet peptide table into TSV.",
149
+ )
150
+ _add_common_version_flag(parquet_parser)
151
+ parquet_parser.add_argument("--input", required=True, help="Input parquet file.")
152
+ parquet_parser.add_argument("--output", required=True, help="Output TSV file.")
153
+ parquet_parser.add_argument(
154
+ "--input-column",
155
+ action="append",
156
+ default=[],
157
+ help="Input column to extract. Repeat to control order.",
158
+ )
159
+ parquet_parser.add_argument(
160
+ "--output-column",
161
+ action="append",
162
+ default=[],
163
+ help="Output column name. Repeat to match --input-column order.",
164
+ )
165
+ parquet_parser.add_argument("--batch-size", type=int, default=65536, help="Parquet streaming batch size.")
166
+ parquet_parser.add_argument("--force", action="store_true", help="Overwrite an existing TSV output.")
167
+
168
+ return parser
169
+
170
+
171
+ def _run_gui() -> int:
172
+ from .gui import main as gui_main
173
+
174
+ gui_main()
175
+ return 0
176
+
177
+
178
+ def _run_digest(args: argparse.Namespace) -> int:
179
+ input_mode = "file" if args.input_file else "directory"
180
+ if input_mode == "file" and not args.output_file:
181
+ raise SystemExit("--output-file is required when using --input-file.")
182
+ if input_mode == "directory" and not args.output_dir:
183
+ raise SystemExit("--output-dir is required when using --input-dir.")
184
+
185
+ config = DigestConfig(
186
+ input_mode=input_mode,
187
+ input_file=args.input_file or "",
188
+ input_dir=args.input_dir or "",
189
+ output_file=args.output_file or "",
190
+ output_dir=args.output_dir or "",
191
+ enzyme_id=str(args.enzyme_id),
192
+ min_length=args.min_length,
193
+ max_length=args.max_length,
194
+ max_num_miscleavages=args.max_miscleavages,
195
+ processes=args.processes,
196
+ short_header=not args.full_header,
197
+ verbose=not args.quiet,
198
+ skip_existing=not args.no_skip_existing,
199
+ )
200
+ _print_result(run_digest_workflow(config))
201
+ return 0
202
+
203
+
204
+ def _run_score(args: argparse.Namespace) -> int:
205
+ config = ScoringConfig(
206
+ peptide_table_path=args.peptide_table,
207
+ genome_lineage_table_path=args.lineage_table,
208
+ genome_lineage_genome_id_col=args.lineage_genome_id_col,
209
+ genome_lineage_lineage_col=args.lineage_lineage_col,
210
+ genome_digest_dirs=args.genome_digest_dir,
211
+ selected_genome_ids=args.selected_genome_id,
212
+ output_tsv_path=args.output,
213
+ peptide_seq_col=args.peptide_seq_col,
214
+ peptide_score_col=args.peptide_score_col,
215
+ peptide_error_col=args.peptide_error_col,
216
+ peptide_error_cutoff=args.peptide_error_cutoff,
217
+ peptide_decoy_flag_col=args.peptide_decoy_flag_col,
218
+ decoy_flag_value=args.decoy_flag_value,
219
+ exclude_genome_ids=args.exclude_genome_id,
220
+ num_workers=args.num_workers,
221
+ matched_peptides_cache_path=args.cache_path,
222
+ save_matched_peptides_cache=not args.no_save_cache,
223
+ use_cache_if_exists=args.use_cache_if_exists,
224
+ compute_coverage=not args.no_compute_coverage,
225
+ export_temp=not args.no_export_temp,
226
+ return_full_table=args.return_full_table,
227
+ )
228
+ _print_result(run_scoring_workflow(config))
229
+ return 0
230
+
231
+
232
+ def _run_parquet_extraction(args: argparse.Namespace) -> int:
233
+ input_columns = args.input_column or ["Run", "Stripped.Sequence", "Evidence", "Q.Value"]
234
+ output_columns = args.output_column or ["Run", "Sequence", "score", "Q.Value"]
235
+
236
+ config = ParquetExtractionConfig(
237
+ input_parquet_path=args.input,
238
+ output_tsv_path=args.output,
239
+ input_columns=input_columns,
240
+ output_columns=output_columns,
241
+ batch_size=args.batch_size,
242
+ force=args.force,
243
+ )
244
+ _print_result(run_parquet_extraction_workflow(config))
245
+ return 0
246
+
247
+
248
+ def main(argv: list[str] | None = None) -> int:
249
+ parser = build_parser()
250
+ args = parser.parse_args(argv)
251
+
252
+ if args.command is None:
253
+ parser.print_help()
254
+ return 0
255
+ if args.command == "gui":
256
+ return _run_gui()
257
+ if args.command == "digest":
258
+ return _run_digest(args)
259
+ if args.command == "score":
260
+ return _run_score(args)
261
+ if args.command == "extract-parquet":
262
+ return _run_parquet_extraction(args)
263
+
264
+ raise SystemExit(f"Unknown command: {args.command}")
265
+
266
+
267
+ if __name__ == "__main__":
268
+ raise SystemExit(main(sys.argv[1:]))