pipeconcord 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. pipeconcord-0.2.0/LICENSE +21 -0
  2. pipeconcord-0.2.0/PKG-INFO +232 -0
  3. pipeconcord-0.2.0/README.md +205 -0
  4. pipeconcord-0.2.0/pipeconcord/__init__.py +8 -0
  5. pipeconcord-0.2.0/pipeconcord/__main__.py +5 -0
  6. pipeconcord-0.2.0/pipeconcord/_version.py +1 -0
  7. pipeconcord-0.2.0/pipeconcord/cli.py +147 -0
  8. pipeconcord-0.2.0/pipeconcord/comparators/__init__.py +25 -0
  9. pipeconcord-0.2.0/pipeconcord/comparators/bam_stats.py +229 -0
  10. pipeconcord-0.2.0/pipeconcord/comparators/base.py +21 -0
  11. pipeconcord-0.2.0/pipeconcord/comparators/bed.py +269 -0
  12. pipeconcord-0.2.0/pipeconcord/comparators/counts.py +342 -0
  13. pipeconcord-0.2.0/pipeconcord/comparators/deg.py +411 -0
  14. pipeconcord-0.2.0/pipeconcord/comparators/expression.py +228 -0
  15. pipeconcord-0.2.0/pipeconcord/comparators/fasta.py +232 -0
  16. pipeconcord-0.2.0/pipeconcord/comparators/table.py +266 -0
  17. pipeconcord-0.2.0/pipeconcord/comparators/vcf.py +393 -0
  18. pipeconcord-0.2.0/pipeconcord/core/__init__.py +5 -0
  19. pipeconcord-0.2.0/pipeconcord/core/batch.py +109 -0
  20. pipeconcord-0.2.0/pipeconcord/core/engine.py +36 -0
  21. pipeconcord-0.2.0/pipeconcord/core/registry.py +63 -0
  22. pipeconcord-0.2.0/pipeconcord/core/report.py +39 -0
  23. pipeconcord-0.2.0/pipeconcord/core/utils.py +113 -0
  24. pipeconcord-0.2.0/pipeconcord/detection/__init__.py +4 -0
  25. pipeconcord-0.2.0/pipeconcord/detection/dispatch.py +15 -0
  26. pipeconcord-0.2.0/pipeconcord/detection/filetype.py +88 -0
  27. pipeconcord-0.2.0/pipeconcord/io/__init__.py +23 -0
  28. pipeconcord-0.2.0/pipeconcord/io/report_writers.py +411 -0
  29. pipeconcord-0.2.0/pipeconcord.egg-info/PKG-INFO +232 -0
  30. pipeconcord-0.2.0/pipeconcord.egg-info/SOURCES.txt +47 -0
  31. pipeconcord-0.2.0/pipeconcord.egg-info/dependency_links.txt +1 -0
  32. pipeconcord-0.2.0/pipeconcord.egg-info/entry_points.txt +12 -0
  33. pipeconcord-0.2.0/pipeconcord.egg-info/requires.txt +5 -0
  34. pipeconcord-0.2.0/pipeconcord.egg-info/top_level.txt +1 -0
  35. pipeconcord-0.2.0/pyproject.toml +69 -0
  36. pipeconcord-0.2.0/setup.cfg +4 -0
  37. pipeconcord-0.2.0/tests/test_bam_stats_comparator.py +97 -0
  38. pipeconcord-0.2.0/tests/test_batch.py +98 -0
  39. pipeconcord-0.2.0/tests/test_bed_comparator.py +62 -0
  40. pipeconcord-0.2.0/tests/test_cli.py +68 -0
  41. pipeconcord-0.2.0/tests/test_counts_comparator.py +80 -0
  42. pipeconcord-0.2.0/tests/test_deg_comparator.py +69 -0
  43. pipeconcord-0.2.0/tests/test_engine_integration.py +22 -0
  44. pipeconcord-0.2.0/tests/test_expression_comparator.py +56 -0
  45. pipeconcord-0.2.0/tests/test_fasta_comparator.py +61 -0
  46. pipeconcord-0.2.0/tests/test_filetype.py +44 -0
  47. pipeconcord-0.2.0/tests/test_report_writers.py +58 -0
  48. pipeconcord-0.2.0/tests/test_table_comparator.py +50 -0
  49. pipeconcord-0.2.0/tests/test_vcf_comparator.py +140 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 JunhaoQiu
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,232 @@
1
+ Metadata-Version: 2.4
2
+ Name: pipeconcord
3
+ Version: 0.2.0
4
+ Summary: Format-aware concordance reports for bioinformatics pipeline outputs.
5
+ Author: PipeConcord contributors
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://qchiujunhao.github.io/pipeconcord/
8
+ Project-URL: Repository, https://github.com/qchiujunhao/pipeconcord
9
+ Project-URL: Issues, https://github.com/qchiujunhao/pipeconcord/issues
10
+ Project-URL: Documentation, https://qchiujunhao.github.io/pipeconcord/
11
+ Keywords: bioinformatics,testing,concordance,pipeline,comparison
12
+ Classifier: Development Status :: 3 - Alpha
13
+ Classifier: Intended Audience :: Science/Research
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
19
+ Requires-Python: >=3.10
20
+ Description-Content-Type: text/markdown
21
+ License-File: LICENSE
22
+ Provides-Extra: dev
23
+ Requires-Dist: coverage[toml]>=7.0; extra == "dev"
24
+ Requires-Dist: mkdocs>=1.6; extra == "dev"
25
+ Requires-Dist: ruff>=0.4; extra == "dev"
26
+ Dynamic: license-file
27
+
28
+ # PipeConcord
29
+
30
+ `pipeconcord` is a Python toolkit for comparing bioinformatics pipeline outputs
31
+ with semantic, format-aware metrics. Instead of only checking whether files are
32
+ byte-for-byte identical, it measures whether two runs agree in biologically or
33
+ analytically meaningful ways.
34
+
35
+ Project website: <https://qchiujunhao.github.io/pipeconcord/>
36
+
37
+ Status: alpha. The core comparison model and initial comparators are usable, but
38
+ APIs and metrics may change as more bioinformatics formats and workflows are
39
+ validated.
40
+
41
+ Rename note: the first alpha release used the package name `biocompare`. Current
42
+ and future releases use `pipeconcord` to avoid confusion with an unrelated
43
+ life-science product directory.
44
+
45
+ This repository currently implements the Phase 1 vertical slice:
46
+
47
+ - a shared `ConcordanceReport` model
48
+ - comparator registry with plugin entry point support
49
+ - file type detection for common bioinformatics and tabular formats
50
+ - a differential expression result comparator
51
+ - a count/expression matrix comparator
52
+ - a normalized expression matrix comparator
53
+ - a BED interval comparator
54
+ - a FASTA/FASTQ sequence comparator
55
+ - a lightweight VCF comparator with ALT splitting and minimal allele trimming
56
+ - a `samtools flagstat`/`samtools stats` comparator
57
+ - a generic CSV/TSV table comparator
58
+ - JSON/text report writers
59
+ - a command-line interface
60
+ - automated tests with `unittest`
61
+
62
+ ## Quickstart
63
+
64
+ Install from PyPI:
65
+
66
+ ```bash
67
+ python3 -m pip install pipeconcord
68
+ ```
69
+
70
+ The package is published at <https://pypi.org/project/pipeconcord/>.
71
+ PipeConcord requires Python 3.10 or newer.
72
+
73
+ To test unreleased changes from the default branch, install from GitHub:
74
+
75
+ ```bash
76
+ python3 -m pip install git+https://github.com/qchiujunhao/pipeconcord.git
77
+ ```
78
+
79
+ Create two small TSV files:
80
+
81
+ ```bash
82
+ cat > old.tsv <<'EOF'
83
+ gene_id value
84
+ A 1.0
85
+ B 2.0
86
+ EOF
87
+
88
+ cat > new.tsv <<'EOF'
89
+ gene_id value
90
+ A 1.1
91
+ B 2.0
92
+ EOF
93
+ ```
94
+
95
+ Compare them by `gene_id`:
96
+
97
+ ```bash
98
+ pipeconcord compare old.tsv new.tsv --key gene_id
99
+ ```
100
+
101
+ Write a report to disk:
102
+
103
+ ```bash
104
+ pipeconcord compare old.tsv new.tsv \
105
+ --key gene_id \
106
+ --output report.json
107
+ ```
108
+
109
+ Compare differential expression tables:
110
+
111
+ ```bash
112
+ pipeconcord compare old_de.tsv new_de.tsv \
113
+ --type deg \
114
+ --alpha 0.05
115
+ ```
116
+
117
+ Compare count matrices:
118
+
119
+ ```bash
120
+ pipeconcord compare old_counts.tsv new_counts.tsv \
121
+ --type counts
122
+ ```
123
+
124
+ Compare normalized expression matrices:
125
+
126
+ ```bash
127
+ pipeconcord compare old_tpm.tsv new_tpm.tsv \
128
+ --type expression
129
+ ```
130
+
131
+ Compare BED intervals:
132
+
133
+ ```bash
134
+ pipeconcord compare old_peaks.bed new_peaks.bed \
135
+ --type bed \
136
+ --min-reciprocal-overlap 0.5
137
+ ```
138
+
139
+ Compare FASTA sequences:
140
+
141
+ ```bash
142
+ pipeconcord compare old_sequences.fa new_sequences.fa \
143
+ --type fasta
144
+ ```
145
+
146
+ Compare VCF calls:
147
+
148
+ ```bash
149
+ pipeconcord compare old_calls.vcf new_calls.vcf \
150
+ --type vcf
151
+ ```
152
+
153
+ Optionally provide a reference FASTA for simple repeated-indel left alignment:
154
+
155
+ ```bash
156
+ pipeconcord compare calls_a.vcf calls_b.vcf \
157
+ --type vcf \
158
+ --reference-fasta reference.fa
159
+ ```
160
+
161
+ Compare alignment summary statistics:
162
+
163
+ ```bash
164
+ pipeconcord compare old_flagstat.txt new_flagstat.txt \
165
+ --type bam_stats
166
+ ```
167
+
168
+ Run a batch comparison from a manifest:
169
+
170
+ ```bash
171
+ pipeconcord batch manifest.tsv --format text
172
+ ```
173
+
174
+ The manifest must contain `file_a` and `file_b` columns. Optional columns are
175
+ `label` and `type`.
176
+
177
+ Use `--min-concordance` in CI to fail when any comparison falls below a chosen
178
+ threshold:
179
+
180
+ ```bash
181
+ pipeconcord batch manifest.tsv --min-concordance 0.95
182
+ ```
183
+
184
+ Write an HTML report:
185
+
186
+ ```bash
187
+ pipeconcord compare old_peaks.bed new_peaks.bed \
188
+ --type bed \
189
+ --format html \
190
+ --output report.html
191
+ ```
192
+
193
+ Batch reports also support `--format html`.
194
+
195
+ ## Development
196
+
197
+ Install the repository in editable mode with development tools:
198
+
199
+ ```bash
200
+ python3 -m pip install -e ".[dev]"
201
+ ```
202
+
203
+ Run the tests:
204
+
205
+ ```bash
206
+ python3 -m unittest discover -s tests
207
+ ```
208
+
209
+ Run lint and coverage:
210
+
211
+ ```bash
212
+ python3 -m ruff check .
213
+ python3 -m coverage run -m unittest discover -s tests
214
+ python3 -m coverage report
215
+ ```
216
+
217
+ ## Plugin Model
218
+
219
+ Comparators subclass `pipeconcord.comparators.base.Comparator` and return a
220
+ `pipeconcord.core.report.ConcordanceReport`. Third-party packages can register
221
+ comparators with the `pipeconcord.comparators` entry point group.
222
+
223
+ ## Documentation
224
+
225
+ Additional documentation is available on the project website and in `docs/`,
226
+ including API notes, design rationale, and tutorials for regression testing,
227
+ RNA-seq outputs, variant calls, and BED peak comparisons.
228
+
229
+ ## Citation and Paper Draft
230
+
231
+ Citation metadata is available in `CITATION.cff`. A draft JOSS-style paper is
232
+ available under `paper/`.
@@ -0,0 +1,205 @@
1
+ # PipeConcord
2
+
3
+ `pipeconcord` is a Python toolkit for comparing bioinformatics pipeline outputs
4
+ with semantic, format-aware metrics. Instead of only checking whether files are
5
+ byte-for-byte identical, it measures whether two runs agree in biologically or
6
+ analytically meaningful ways.
7
+
8
+ Project website: <https://qchiujunhao.github.io/pipeconcord/>
9
+
10
+ Status: alpha. The core comparison model and initial comparators are usable, but
11
+ APIs and metrics may change as more bioinformatics formats and workflows are
12
+ validated.
13
+
14
+ Rename note: the first alpha release used the package name `biocompare`. Current
15
+ and future releases use `pipeconcord` to avoid confusion with an unrelated
16
+ life-science product directory.
17
+
18
+ This repository currently implements the Phase 1 vertical slice:
19
+
20
+ - a shared `ConcordanceReport` model
21
+ - comparator registry with plugin entry point support
22
+ - file type detection for common bioinformatics and tabular formats
23
+ - a differential expression result comparator
24
+ - a count/expression matrix comparator
25
+ - a normalized expression matrix comparator
26
+ - a BED interval comparator
27
+ - a FASTA/FASTQ sequence comparator
28
+ - a lightweight VCF comparator with ALT splitting and minimal allele trimming
29
+ - a `samtools flagstat`/`samtools stats` comparator
30
+ - a generic CSV/TSV table comparator
31
+ - JSON/text report writers
32
+ - a command-line interface
33
+ - automated tests with `unittest`
34
+
35
+ ## Quickstart
36
+
37
+ Install from PyPI:
38
+
39
+ ```bash
40
+ python3 -m pip install pipeconcord
41
+ ```
42
+
43
+ The package is published at <https://pypi.org/project/pipeconcord/>.
44
+ PipeConcord requires Python 3.10 or newer.
45
+
46
+ To test unreleased changes from the default branch, install from GitHub:
47
+
48
+ ```bash
49
+ python3 -m pip install git+https://github.com/qchiujunhao/pipeconcord.git
50
+ ```
51
+
52
+ Create two small TSV files:
53
+
54
+ ```bash
55
+ cat > old.tsv <<'EOF'
56
+ gene_id value
57
+ A 1.0
58
+ B 2.0
59
+ EOF
60
+
61
+ cat > new.tsv <<'EOF'
62
+ gene_id value
63
+ A 1.1
64
+ B 2.0
65
+ EOF
66
+ ```
67
+
68
+ Compare them by `gene_id`:
69
+
70
+ ```bash
71
+ pipeconcord compare old.tsv new.tsv --key gene_id
72
+ ```
73
+
74
+ Write a report to disk:
75
+
76
+ ```bash
77
+ pipeconcord compare old.tsv new.tsv \
78
+ --key gene_id \
79
+ --output report.json
80
+ ```
81
+
82
+ Compare differential expression tables:
83
+
84
+ ```bash
85
+ pipeconcord compare old_de.tsv new_de.tsv \
86
+ --type deg \
87
+ --alpha 0.05
88
+ ```
89
+
90
+ Compare count matrices:
91
+
92
+ ```bash
93
+ pipeconcord compare old_counts.tsv new_counts.tsv \
94
+ --type counts
95
+ ```
96
+
97
+ Compare normalized expression matrices:
98
+
99
+ ```bash
100
+ pipeconcord compare old_tpm.tsv new_tpm.tsv \
101
+ --type expression
102
+ ```
103
+
104
+ Compare BED intervals:
105
+
106
+ ```bash
107
+ pipeconcord compare old_peaks.bed new_peaks.bed \
108
+ --type bed \
109
+ --min-reciprocal-overlap 0.5
110
+ ```
111
+
112
+ Compare FASTA sequences:
113
+
114
+ ```bash
115
+ pipeconcord compare old_sequences.fa new_sequences.fa \
116
+ --type fasta
117
+ ```
118
+
119
+ Compare VCF calls:
120
+
121
+ ```bash
122
+ pipeconcord compare old_calls.vcf new_calls.vcf \
123
+ --type vcf
124
+ ```
125
+
126
+ Optionally provide a reference FASTA for simple repeated-indel left alignment:
127
+
128
+ ```bash
129
+ pipeconcord compare calls_a.vcf calls_b.vcf \
130
+ --type vcf \
131
+ --reference-fasta reference.fa
132
+ ```
133
+
134
+ Compare alignment summary statistics:
135
+
136
+ ```bash
137
+ pipeconcord compare old_flagstat.txt new_flagstat.txt \
138
+ --type bam_stats
139
+ ```
140
+
141
+ Run a batch comparison from a manifest:
142
+
143
+ ```bash
144
+ pipeconcord batch manifest.tsv --format text
145
+ ```
146
+
147
+ The manifest must contain `file_a` and `file_b` columns. Optional columns are
148
+ `label` and `type`.
149
+
150
+ Use `--min-concordance` in CI to fail when any comparison falls below a chosen
151
+ threshold:
152
+
153
+ ```bash
154
+ pipeconcord batch manifest.tsv --min-concordance 0.95
155
+ ```
156
+
157
+ Write an HTML report:
158
+
159
+ ```bash
160
+ pipeconcord compare old_peaks.bed new_peaks.bed \
161
+ --type bed \
162
+ --format html \
163
+ --output report.html
164
+ ```
165
+
166
+ Batch reports also support `--format html`.
167
+
168
+ ## Development
169
+
170
+ Install the repository in editable mode with development tools:
171
+
172
+ ```bash
173
+ python3 -m pip install -e ".[dev]"
174
+ ```
175
+
176
+ Run the tests:
177
+
178
+ ```bash
179
+ python3 -m unittest discover -s tests
180
+ ```
181
+
182
+ Run lint and coverage:
183
+
184
+ ```bash
185
+ python3 -m ruff check .
186
+ python3 -m coverage run -m unittest discover -s tests
187
+ python3 -m coverage report
188
+ ```
189
+
190
+ ## Plugin Model
191
+
192
+ Comparators subclass `pipeconcord.comparators.base.Comparator` and return a
193
+ `pipeconcord.core.report.ConcordanceReport`. Third-party packages can register
194
+ comparators with the `pipeconcord.comparators` entry point group.
195
+
196
+ ## Documentation
197
+
198
+ Additional documentation is available on the project website and in `docs/`,
199
+ including API notes, design rationale, and tutorials for regression testing,
200
+ RNA-seq outputs, variant calls, and BED peak comparisons.
201
+
202
+ ## Citation and Paper Draft
203
+
204
+ Citation metadata is available in `CITATION.cff`. A draft JOSS-style paper is
205
+ available under `paper/`.
@@ -0,0 +1,8 @@
1
+ """Semantic comparison of bioinformatics pipeline outputs."""
2
+
3
+ from pipeconcord._version import __version__
4
+ from pipeconcord.core.engine import ComparisonEngine
5
+ from pipeconcord.core.report import ConcordanceReport
6
+
7
+ __all__ = ["ComparisonEngine", "ConcordanceReport", "__version__"]
8
+
@@ -0,0 +1,5 @@
1
+ from pipeconcord.cli import main
2
+
3
+ if __name__ == "__main__":
4
+ raise SystemExit(main())
5
+
@@ -0,0 +1 @@
1
+ __version__ = "0.2.0"
@@ -0,0 +1,147 @@
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+ import sys
5
+
6
+ from pipeconcord.core.batch import run_batch
7
+ from pipeconcord.core.engine import ComparisonEngine
8
+ from pipeconcord.io.report_writers import (
9
+ batch_to_html,
10
+ batch_to_json,
11
+ batch_to_text,
12
+ batch_to_tsv,
13
+ report_to_html,
14
+ report_to_json,
15
+ report_to_text,
16
+ write_batch,
17
+ write_report,
18
+ )
19
+
20
+
21
+ def build_parser() -> argparse.ArgumentParser:
22
+ parser = argparse.ArgumentParser(
23
+ prog="pipeconcord",
24
+ description="Generate semantic concordance reports for bioinformatics outputs.",
25
+ )
26
+ subparsers = parser.add_subparsers(dest="command")
27
+
28
+ compare_parser = subparsers.add_parser("compare", help="Compare one pair of files.")
29
+ add_common_options(compare_parser)
30
+ compare_parser.add_argument("file_a", help="First output file to compare.")
31
+ compare_parser.add_argument("file_b", help="Second output file to compare.")
32
+ compare_parser.add_argument("-o", "--output", help="Write the report to a file instead of stdout.")
33
+ compare_parser.add_argument("--format", choices=["html", "json", "text"], default="json", help="Output format.")
34
+
35
+ batch_parser = subparsers.add_parser("batch", help="Compare file pairs listed in a CSV/TSV manifest.")
36
+ add_common_options(batch_parser)
37
+ batch_parser.add_argument("manifest", help="CSV/TSV manifest with file_a and file_b columns.")
38
+ batch_parser.add_argument("--min-concordance", type=float, help="Fail if any successful comparison is below this threshold.")
39
+ batch_parser.add_argument("--stop-on-error", action="store_true", help="Stop on the first failed comparison.")
40
+ batch_parser.add_argument("-o", "--output", help="Write the batch report to a file instead of stdout.")
41
+ batch_parser.add_argument("--format", choices=["html", "json", "tsv", "text"], default="tsv", help="Batch output format.")
42
+
43
+ return parser
44
+
45
+
46
+ def add_common_options(parser: argparse.ArgumentParser) -> None:
47
+ parser.add_argument("-t", "--type", dest="file_type", help="Force a comparator/file type such as bam_stats, bed, counts, deg, expression, fasta, fastq, table, csv, tsv, or vcf.")
48
+ parser.add_argument("--key", dest="key_column", help="Column to use for row alignment.")
49
+ parser.add_argument("--delimiter", help="Force a delimiter for tabular files.")
50
+ parser.add_argument("--alpha", type=float, help="DEG adjusted p-value threshold. Default: 0.05.")
51
+ parser.add_argument("--lfc-threshold", type=float, help="DEG absolute log-fold-change threshold. Default: 0.0.")
52
+ parser.add_argument("--top-n", type=int, help="Number of top-ranked DEG genes to compare. Default: 50.")
53
+ parser.add_argument("--gene-column", help="Gene identifier column override for DEG/count matrices.")
54
+ parser.add_argument("--sample-columns", help="Counts comparator sample columns as a comma-separated list.")
55
+ parser.add_argument("--min-reciprocal-overlap", type=float, help="BED interval match threshold. Default: 0.0 for any overlap.")
56
+ parser.add_argument("--reference-fasta", help="Reference FASTA for optional VCF indel left-alignment.")
57
+ parser.add_argument("--logfc-column", help="DEG log-fold-change column override.")
58
+ parser.add_argument("--padj-column", help="DEG adjusted p-value column override.")
59
+ parser.add_argument("--pvalue-column", help="DEG raw p-value column override when adjusted p-values are absent.")
60
+
61
+
62
+ def main(argv: list[str] | None = None) -> int:
63
+ args_list = list(sys.argv[1:] if argv is None else argv)
64
+ if args_list and args_list[0] not in {"compare", "batch", "-h", "--help"}:
65
+ args_list = ["compare", *args_list]
66
+ parser = build_parser()
67
+ args = parser.parse_args(args_list)
68
+ if args.command is None:
69
+ parser.print_help()
70
+ return 2
71
+
72
+ try:
73
+ if args.command == "batch":
74
+ return run_batch_command(args)
75
+ return run_compare_command(args)
76
+ except Exception as exc:
77
+ parser.exit(2, f"pipeconcord: error: {exc}\n")
78
+ return 0
79
+
80
+
81
+ def run_compare_command(args: argparse.Namespace) -> int:
82
+ engine = ComparisonEngine()
83
+ report = engine.compare(
84
+ args.file_a,
85
+ args.file_b,
86
+ **comparison_kwargs(args),
87
+ )
88
+ if args.output:
89
+ write_report(report, args.output, fmt=args.format)
90
+ elif args.format == "html":
91
+ print(report_to_html(report))
92
+ elif args.format == "text":
93
+ print(report_to_text(report))
94
+ else:
95
+ print(report_to_json(report))
96
+ return 0
97
+
98
+
99
+ def run_batch_command(args: argparse.Namespace) -> int:
100
+ results = run_batch(
101
+ args.manifest,
102
+ stop_on_error=args.stop_on_error,
103
+ default_file_type=args.file_type,
104
+ **comparison_kwargs(args, include_file_type=False),
105
+ )
106
+ if args.output:
107
+ write_batch(results, args.output, fmt=args.format)
108
+ elif args.format == "html":
109
+ print(batch_to_html(results))
110
+ elif args.format == "json":
111
+ print(batch_to_json(results))
112
+ elif args.format == "text":
113
+ print(batch_to_text(results))
114
+ else:
115
+ print(batch_to_tsv(results))
116
+ if args.min_concordance is not None and not 0.0 <= args.min_concordance <= 1.0:
117
+ raise ValueError("min-concordance must be between 0.0 and 1.0")
118
+ below_threshold = [
119
+ result
120
+ for result in results
121
+ if result.report is not None and args.min_concordance is not None and result.report.overall_concordance < args.min_concordance
122
+ ]
123
+ return 1 if any(result.error for result in results) or below_threshold else 0
124
+
125
+
126
+ def comparison_kwargs(args: argparse.Namespace, *, include_file_type: bool = True) -> dict[str, object]:
127
+ kwargs: dict[str, object] = {
128
+ "key_column": args.key_column,
129
+ "delimiter": args.delimiter,
130
+ "alpha": args.alpha,
131
+ "lfc_threshold": args.lfc_threshold,
132
+ "top_n": args.top_n,
133
+ "gene_column": args.gene_column,
134
+ "sample_columns": args.sample_columns,
135
+ "min_reciprocal_overlap": args.min_reciprocal_overlap,
136
+ "reference_fasta": args.reference_fasta,
137
+ "logfc_column": args.logfc_column,
138
+ "padj_column": args.padj_column,
139
+ "pvalue_column": args.pvalue_column,
140
+ }
141
+ if include_file_type:
142
+ kwargs["file_type"] = args.file_type
143
+ return kwargs
144
+
145
+
146
+ if __name__ == "__main__":
147
+ raise SystemExit(main())
@@ -0,0 +1,25 @@
1
+ from __future__ import annotations
2
+
3
+ from pipeconcord.comparators.bam_stats import BAMStatsComparator
4
+ from pipeconcord.comparators.bed import BEDComparator
5
+ from pipeconcord.comparators.counts import CountsComparator
6
+ from pipeconcord.comparators.deg import DEGComparator
7
+ from pipeconcord.comparators.expression import ExpressionComparator
8
+ from pipeconcord.comparators.fasta import FASTAComparator
9
+ from pipeconcord.comparators.table import TableComparator
10
+ from pipeconcord.comparators.vcf import VCFComparator
11
+ from pipeconcord.core.registry import ComparatorRegistry
12
+
13
+
14
+ def register_builtin_comparators(registry: type[ComparatorRegistry] = ComparatorRegistry) -> None:
15
+ registry.register(DEGComparator)
16
+ registry.register(ExpressionComparator)
17
+ registry.register(CountsComparator)
18
+ registry.register(BEDComparator)
19
+ registry.register(FASTAComparator)
20
+ registry.register(VCFComparator)
21
+ registry.register(BAMStatsComparator)
22
+ registry.register(TableComparator)
23
+
24
+
25
+ __all__ = ["BAMStatsComparator", "BEDComparator", "CountsComparator", "DEGComparator", "ExpressionComparator", "FASTAComparator", "TableComparator", "VCFComparator", "register_builtin_comparators"]