metasheet-guard 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metasheet_guard-0.1.0/LICENSE +21 -0
- metasheet_guard-0.1.0/PKG-INFO +144 -0
- metasheet_guard-0.1.0/README.md +110 -0
- metasheet_guard-0.1.0/pyproject.toml +76 -0
- metasheet_guard-0.1.0/setup.cfg +4 -0
- metasheet_guard-0.1.0/src/metasheet_guard/__init__.py +84 -0
- metasheet_guard-0.1.0/src/metasheet_guard/cli.py +237 -0
- metasheet_guard-0.1.0/src/metasheet_guard/export/__init__.py +13 -0
- metasheet_guard-0.1.0/src/metasheet_guard/export/deseq2.py +26 -0
- metasheet_guard-0.1.0/src/metasheet_guard/export/generic.py +12 -0
- metasheet_guard-0.1.0/src/metasheet_guard/export/nfcore_rnaseq.py +28 -0
- metasheet_guard-0.1.0/src/metasheet_guard/export/snakemake.py +38 -0
- metasheet_guard-0.1.0/src/metasheet_guard/io/__init__.py +6 -0
- metasheet_guard-0.1.0/src/metasheet_guard/io/csv.py +140 -0
- metasheet_guard-0.1.0/src/metasheet_guard/io/sra.py +49 -0
- metasheet_guard-0.1.0/src/metasheet_guard/issue.py +29 -0
- metasheet_guard-0.1.0/src/metasheet_guard/model/__init__.py +15 -0
- metasheet_guard-0.1.0/src/metasheet_guard/model/design.py +16 -0
- metasheet_guard-0.1.0/src/metasheet_guard/model/file.py +18 -0
- metasheet_guard-0.1.0/src/metasheet_guard/model/project.py +134 -0
- metasheet_guard-0.1.0/src/metasheet_guard/model/run.py +18 -0
- metasheet_guard-0.1.0/src/metasheet_guard/model/sample.py +19 -0
- metasheet_guard-0.1.0/src/metasheet_guard/repair/__init__.py +6 -0
- metasheet_guard-0.1.0/src/metasheet_guard/repair/engine.py +127 -0
- metasheet_guard-0.1.0/src/metasheet_guard/repair/provenance.py +42 -0
- metasheet_guard-0.1.0/src/metasheet_guard/report/__init__.py +6 -0
- metasheet_guard-0.1.0/src/metasheet_guard/report/html.py +26 -0
- metasheet_guard-0.1.0/src/metasheet_guard/report/json.py +20 -0
- metasheet_guard-0.1.0/src/metasheet_guard/report/templates/report.html.j2 +67 -0
- metasheet_guard-0.1.0/src/metasheet_guard/result.py +63 -0
- metasheet_guard-0.1.0/src/metasheet_guard/schema/__init__.py +5 -0
- metasheet_guard-0.1.0/src/metasheet_guard/schema/loader.py +117 -0
- metasheet_guard-0.1.0/src/metasheet_guard/schemas/bulk-rnaseq.yaml +126 -0
- metasheet_guard-0.1.0/src/metasheet_guard/schemas/generic-ngs.yaml +77 -0
- metasheet_guard-0.1.0/src/metasheet_guard/validators/__init__.py +59 -0
- metasheet_guard-0.1.0/src/metasheet_guard/validators/base.py +16 -0
- metasheet_guard-0.1.0/src/metasheet_guard/validators/design.py +226 -0
- metasheet_guard-0.1.0/src/metasheet_guard/validators/export_readiness.py +76 -0
- metasheet_guard-0.1.0/src/metasheet_guard/validators/fastq.py +264 -0
- metasheet_guard-0.1.0/src/metasheet_guard/validators/metadata.py +218 -0
- metasheet_guard-0.1.0/src/metasheet_guard/validators/sample_run.py +201 -0
- metasheet_guard-0.1.0/src/metasheet_guard/validators/table.py +221 -0
- metasheet_guard-0.1.0/src/metasheet_guard.egg-info/PKG-INFO +144 -0
- metasheet_guard-0.1.0/src/metasheet_guard.egg-info/SOURCES.txt +61 -0
- metasheet_guard-0.1.0/src/metasheet_guard.egg-info/dependency_links.txt +1 -0
- metasheet_guard-0.1.0/src/metasheet_guard.egg-info/entry_points.txt +2 -0
- metasheet_guard-0.1.0/src/metasheet_guard.egg-info/requires.txt +10 -0
- metasheet_guard-0.1.0/src/metasheet_guard.egg-info/top_level.txt +1 -0
- metasheet_guard-0.1.0/tests/test_cli.py +163 -0
- metasheet_guard-0.1.0/tests/test_csv_reader.py +37 -0
- metasheet_guard-0.1.0/tests/test_design_validators.py +31 -0
- metasheet_guard-0.1.0/tests/test_export_deseq2.py +13 -0
- metasheet_guard-0.1.0/tests/test_export_nfcore.py +13 -0
- metasheet_guard-0.1.0/tests/test_export_snakemake.py +14 -0
- metasheet_guard-0.1.0/tests/test_fastq_validators.py +39 -0
- metasheet_guard-0.1.0/tests/test_metadata_validators.py +31 -0
- metasheet_guard-0.1.0/tests/test_model.py +21 -0
- metasheet_guard-0.1.0/tests/test_repair.py +33 -0
- metasheet_guard-0.1.0/tests/test_reports.py +53 -0
- metasheet_guard-0.1.0/tests/test_sample_run_validators.py +26 -0
- metasheet_guard-0.1.0/tests/test_schema_loader.py +44 -0
- metasheet_guard-0.1.0/tests/test_sra_import.py +13 -0
- metasheet_guard-0.1.0/tests/test_table_validators.py +70 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 MetaSheet-Guard contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: metasheet-guard
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Experimental-design-aware quality control for sequencing analysis sample sheets.
|
|
5
|
+
Author: MetaSheet-Guard contributors
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://qchiujunhao.github.io/metasheet-guard/
|
|
8
|
+
Project-URL: Documentation, https://qchiujunhao.github.io/metasheet-guard/
|
|
9
|
+
Project-URL: Repository, https://github.com/qchiujunhao/metasheet-guard
|
|
10
|
+
Project-URL: Issues, https://github.com/qchiujunhao/metasheet-guard/issues
|
|
11
|
+
Keywords: bioinformatics,metadata,sample sheet,sequencing,quality control
|
|
12
|
+
Classifier: Development Status :: 2 - Pre-Alpha
|
|
13
|
+
Classifier: Environment :: Console
|
|
14
|
+
Classifier: Intended Audience :: Science/Research
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
20
|
+
Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
|
|
21
|
+
Requires-Python: >=3.10
|
|
22
|
+
Description-Content-Type: text/markdown
|
|
23
|
+
License-File: LICENSE
|
|
24
|
+
Requires-Dist: jinja2>=3.1
|
|
25
|
+
Requires-Dist: pyyaml>=6.0
|
|
26
|
+
Requires-Dist: rich>=13.0
|
|
27
|
+
Requires-Dist: typer>=0.12
|
|
28
|
+
Provides-Extra: dev
|
|
29
|
+
Requires-Dist: pytest>=8.0; extra == "dev"
|
|
30
|
+
Requires-Dist: ruff>=0.6.0; extra == "dev"
|
|
31
|
+
Requires-Dist: pre-commit>=3.5; extra == "dev"
|
|
32
|
+
Requires-Dist: mkdocs-material>=9.5; extra == "dev"
|
|
33
|
+
Dynamic: license-file
|
|
34
|
+
|
|
35
|
+
# MetaSheet-Guard
|
|
36
|
+
|
|
37
|
+
[Documentation website](https://qchiujunhao.github.io/metasheet-guard/) |
|
|
38
|
+
[Repository](https://github.com/qchiujunhao/metasheet-guard)
|
|
39
|
+
|
|
40
|
+
MetaSheet-Guard performs experimental-design-aware quality control for sequencing
|
|
41
|
+
analysis sample sheets. It targets the analysis-preparation stage: after FASTQ
|
|
42
|
+
generation or public metadata collection, but before running workflows such as
|
|
43
|
+
Nextflow, Snakemake, nf-core/rnaseq, or custom RNA-seq pipelines.
|
|
44
|
+
|
|
45
|
+
The current package includes CSV/TSV reading, bundled YAML schemas, validation,
|
|
46
|
+
repair provenance, workflow export, JSON/HTML reports, and a
|
|
47
|
+
`metasheet-guard` CLI.
|
|
48
|
+
|
|
49
|
+
## Scope
|
|
50
|
+
|
|
51
|
+
MetaSheet-Guard is being built to model relationships between biological
|
|
52
|
+
samples, sequencing runs, lanes, FASTQ files, replicates, conditions, batches,
|
|
53
|
+
and downstream workflow requirements. The current release is intentionally small
|
|
54
|
+
and currently supports these first-pass capabilities:
|
|
55
|
+
|
|
56
|
+
- required columns
|
|
57
|
+
- duplicate column names
|
|
58
|
+
- schema-defined column aliases
|
|
59
|
+
- empty values in required columns
|
|
60
|
+
- sample ID and metadata consistency checks
|
|
61
|
+
- FASTQ path, extension, gzip, pair, and duplication checks
|
|
62
|
+
- sample/run/lane relationship checks
|
|
63
|
+
- batch-condition and related design-risk checks
|
|
64
|
+
- safe repair with `changes.json` provenance
|
|
65
|
+
- nf-core/rnaseq, Snakemake, canonical CSV, and DESeq2 design exports
|
|
66
|
+
- bundled `generic-ngs` and `bulk-rnaseq` schemas
|
|
67
|
+
|
|
68
|
+
## Non-goals
|
|
69
|
+
|
|
70
|
+
MetaSheet-Guard is not an RNA-seq aligner, quantifier, differential expression
|
|
71
|
+
tool, SRA downloader, nf-core/fetchngs replacement, nf-schema replacement,
|
|
72
|
+
Illumina BCL Convert or bcl2fastq SampleSheet validator, single-cell object
|
|
73
|
+
validator, spatial image validator, or generic CSV validation framework.
|
|
74
|
+
|
|
75
|
+
## Installation
|
|
76
|
+
|
|
77
|
+
```bash
|
|
78
|
+
pip install -e ".[dev]"
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
## Quickstart
|
|
82
|
+
|
|
83
|
+
Validate a broken bulk RNA-seq sample sheet and write a JSON report:
|
|
84
|
+
|
|
85
|
+
```bash
|
|
86
|
+
metasheet-guard check examples/broken/missing_required_column.csv \
|
|
87
|
+
--schema bulk-rnaseq \
|
|
88
|
+
--json report.json
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
The command exits with status code `1` when blocking validation errors are found.
|
|
92
|
+
For the example above, `report.json` contains a `REQUIRED_COLUMN_MISSING` issue
|
|
93
|
+
because the `bulk-rnaseq` schema requires a `condition` column.
|
|
94
|
+
|
|
95
|
+
Validate a minimal valid example:
|
|
96
|
+
|
|
97
|
+
```bash
|
|
98
|
+
metasheet-guard check examples/valid/bulk_rnaseq_paired.csv \
|
|
99
|
+
--schema bulk-rnaseq
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
Repair safe metadata issues and record provenance:
|
|
103
|
+
|
|
104
|
+
```bash
|
|
105
|
+
metasheet-guard repair examples/broken/condition_case_mixed.csv \
|
|
106
|
+
--schema bulk-rnaseq \
|
|
107
|
+
--out clean.csv \
|
|
108
|
+
--changes changes.json
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
Only safe repairs are implemented. Suggested/inference-based repairs are
|
|
112
|
+
reserved for later milestones and currently fail clearly if requested.
|
|
113
|
+
|
|
114
|
+
Export a cleaned sheet:
|
|
115
|
+
|
|
116
|
+
```bash
|
|
117
|
+
metasheet-guard export examples/valid/bulk_rnaseq_paired.csv \
|
|
118
|
+
--target nf-core-rnaseq \
|
|
119
|
+
--out nfcore_samplesheet.csv
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
## Python API
|
|
123
|
+
|
|
124
|
+
```python
|
|
125
|
+
from metasheet_guard import read_sheet, validate
|
|
126
|
+
|
|
127
|
+
sheet = read_sheet("examples/broken/missing_required_column.csv")
|
|
128
|
+
result = validate(sheet, schema="bulk-rnaseq")
|
|
129
|
+
|
|
130
|
+
for issue in result.issues:
|
|
131
|
+
print(issue.severity, issue.code, issue.message)
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
## Development
|
|
135
|
+
|
|
136
|
+
Run tests and linting:
|
|
137
|
+
|
|
138
|
+
```bash
|
|
139
|
+
pytest
|
|
140
|
+
ruff check .
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
The project uses `src/` packaging, Typer for the command-line interface, PyYAML
|
|
144
|
+
for schemas, pytest for tests, and Ruff for linting.
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
# MetaSheet-Guard
|
|
2
|
+
|
|
3
|
+
[Documentation website](https://qchiujunhao.github.io/metasheet-guard/) |
|
|
4
|
+
[Repository](https://github.com/qchiujunhao/metasheet-guard)
|
|
5
|
+
|
|
6
|
+
MetaSheet-Guard performs experimental-design-aware quality control for sequencing
|
|
7
|
+
analysis sample sheets. It targets the analysis-preparation stage: after FASTQ
|
|
8
|
+
generation or public metadata collection, but before running workflows such as
|
|
9
|
+
Nextflow, Snakemake, nf-core/rnaseq, or custom RNA-seq pipelines.
|
|
10
|
+
|
|
11
|
+
The current package includes CSV/TSV reading, bundled YAML schemas, validation,
|
|
12
|
+
repair provenance, workflow export, JSON/HTML reports, and a
|
|
13
|
+
`metasheet-guard` CLI.
|
|
14
|
+
|
|
15
|
+
## Scope
|
|
16
|
+
|
|
17
|
+
MetaSheet-Guard is being built to model relationships between biological
|
|
18
|
+
samples, sequencing runs, lanes, FASTQ files, replicates, conditions, batches,
|
|
19
|
+
and downstream workflow requirements. The current release is intentionally small
|
|
20
|
+
and currently supports these first-pass capabilities:
|
|
21
|
+
|
|
22
|
+
- required columns
|
|
23
|
+
- duplicate column names
|
|
24
|
+
- schema-defined column aliases
|
|
25
|
+
- empty values in required columns
|
|
26
|
+
- sample ID and metadata consistency checks
|
|
27
|
+
- FASTQ path, extension, gzip, pair, and duplication checks
|
|
28
|
+
- sample/run/lane relationship checks
|
|
29
|
+
- batch-condition and related design-risk checks
|
|
30
|
+
- safe repair with `changes.json` provenance
|
|
31
|
+
- nf-core/rnaseq, Snakemake, canonical CSV, and DESeq2 design exports
|
|
32
|
+
- bundled `generic-ngs` and `bulk-rnaseq` schemas
|
|
33
|
+
|
|
34
|
+
## Non-goals
|
|
35
|
+
|
|
36
|
+
MetaSheet-Guard is not an RNA-seq aligner, quantifier, differential expression
|
|
37
|
+
tool, SRA downloader, nf-core/fetchngs replacement, nf-schema replacement,
|
|
38
|
+
Illumina BCL Convert or bcl2fastq SampleSheet validator, single-cell object
|
|
39
|
+
validator, spatial image validator, or generic CSV validation framework.
|
|
40
|
+
|
|
41
|
+
## Installation
|
|
42
|
+
|
|
43
|
+
```bash
|
|
44
|
+
pip install -e ".[dev]"
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
## Quickstart
|
|
48
|
+
|
|
49
|
+
Validate a broken bulk RNA-seq sample sheet and write a JSON report:
|
|
50
|
+
|
|
51
|
+
```bash
|
|
52
|
+
metasheet-guard check examples/broken/missing_required_column.csv \
|
|
53
|
+
--schema bulk-rnaseq \
|
|
54
|
+
--json report.json
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
The command exits with status code `1` when blocking validation errors are found.
|
|
58
|
+
For the example above, `report.json` contains a `REQUIRED_COLUMN_MISSING` issue
|
|
59
|
+
because the `bulk-rnaseq` schema requires a `condition` column.
|
|
60
|
+
|
|
61
|
+
Validate a minimal valid example:
|
|
62
|
+
|
|
63
|
+
```bash
|
|
64
|
+
metasheet-guard check examples/valid/bulk_rnaseq_paired.csv \
|
|
65
|
+
--schema bulk-rnaseq
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
Repair safe metadata issues and record provenance:
|
|
69
|
+
|
|
70
|
+
```bash
|
|
71
|
+
metasheet-guard repair examples/broken/condition_case_mixed.csv \
|
|
72
|
+
--schema bulk-rnaseq \
|
|
73
|
+
--out clean.csv \
|
|
74
|
+
--changes changes.json
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
Only safe repairs are implemented. Suggested/inference-based repairs are
|
|
78
|
+
reserved for later milestones and currently fail clearly if requested.
|
|
79
|
+
|
|
80
|
+
Export a cleaned sheet:
|
|
81
|
+
|
|
82
|
+
```bash
|
|
83
|
+
metasheet-guard export examples/valid/bulk_rnaseq_paired.csv \
|
|
84
|
+
--target nf-core-rnaseq \
|
|
85
|
+
--out nfcore_samplesheet.csv
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
## Python API
|
|
89
|
+
|
|
90
|
+
```python
|
|
91
|
+
from metasheet_guard import read_sheet, validate
|
|
92
|
+
|
|
93
|
+
sheet = read_sheet("examples/broken/missing_required_column.csv")
|
|
94
|
+
result = validate(sheet, schema="bulk-rnaseq")
|
|
95
|
+
|
|
96
|
+
for issue in result.issues:
|
|
97
|
+
print(issue.severity, issue.code, issue.message)
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
## Development
|
|
101
|
+
|
|
102
|
+
Run tests and linting:
|
|
103
|
+
|
|
104
|
+
```bash
|
|
105
|
+
pytest
|
|
106
|
+
ruff check .
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
The project uses `src/` packaging, Typer for the command-line interface, PyYAML
|
|
110
|
+
for schemas, pytest for tests, and Ruff for linting.
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "metasheet-guard"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Experimental-design-aware quality control for sequencing analysis sample sheets."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.10"
|
|
11
|
+
license = "MIT"
|
|
12
|
+
authors = [
|
|
13
|
+
{ name = "MetaSheet-Guard contributors" }
|
|
14
|
+
]
|
|
15
|
+
keywords = [
|
|
16
|
+
"bioinformatics",
|
|
17
|
+
"metadata",
|
|
18
|
+
"sample sheet",
|
|
19
|
+
"sequencing",
|
|
20
|
+
"quality control"
|
|
21
|
+
]
|
|
22
|
+
classifiers = [
|
|
23
|
+
"Development Status :: 2 - Pre-Alpha",
|
|
24
|
+
"Environment :: Console",
|
|
25
|
+
"Intended Audience :: Science/Research",
|
|
26
|
+
"Programming Language :: Python :: 3",
|
|
27
|
+
"Programming Language :: Python :: 3.10",
|
|
28
|
+
"Programming Language :: Python :: 3.11",
|
|
29
|
+
"Programming Language :: Python :: 3.12",
|
|
30
|
+
"Programming Language :: Python :: 3.13",
|
|
31
|
+
"Topic :: Scientific/Engineering :: Bio-Informatics"
|
|
32
|
+
]
|
|
33
|
+
dependencies = [
|
|
34
|
+
"jinja2>=3.1",
|
|
35
|
+
"pyyaml>=6.0",
|
|
36
|
+
"rich>=13.0",
|
|
37
|
+
"typer>=0.12"
|
|
38
|
+
]
|
|
39
|
+
|
|
40
|
+
[project.optional-dependencies]
|
|
41
|
+
dev = [
|
|
42
|
+
"pytest>=8.0",
|
|
43
|
+
"ruff>=0.6.0",
|
|
44
|
+
"pre-commit>=3.5",
|
|
45
|
+
"mkdocs-material>=9.5"
|
|
46
|
+
]
|
|
47
|
+
|
|
48
|
+
[project.scripts]
|
|
49
|
+
metasheet-guard = "metasheet_guard.cli:app"
|
|
50
|
+
|
|
51
|
+
[project.urls]
|
|
52
|
+
Homepage = "https://qchiujunhao.github.io/metasheet-guard/"
|
|
53
|
+
Documentation = "https://qchiujunhao.github.io/metasheet-guard/"
|
|
54
|
+
Repository = "https://github.com/qchiujunhao/metasheet-guard"
|
|
55
|
+
Issues = "https://github.com/qchiujunhao/metasheet-guard/issues"
|
|
56
|
+
|
|
57
|
+
[tool.setuptools.packages.find]
|
|
58
|
+
where = ["src"]
|
|
59
|
+
|
|
60
|
+
[tool.setuptools.package-data]
|
|
61
|
+
metasheet_guard = ["schemas/*.yaml", "report/templates/*.j2"]
|
|
62
|
+
|
|
63
|
+
[tool.pytest.ini_options]
|
|
64
|
+
testpaths = ["tests"]
|
|
65
|
+
addopts = "-ra"
|
|
66
|
+
|
|
67
|
+
[tool.ruff]
|
|
68
|
+
line-length = 88
|
|
69
|
+
target-version = "py310"
|
|
70
|
+
|
|
71
|
+
[tool.ruff.lint]
|
|
72
|
+
select = ["E", "F", "I", "UP", "B"]
|
|
73
|
+
|
|
74
|
+
[tool.ruff.format]
|
|
75
|
+
quote-style = "double"
|
|
76
|
+
indent-style = "space"
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
"""Public API for MetaSheet-Guard."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from metasheet_guard.export import (
|
|
9
|
+
export_canonical,
|
|
10
|
+
export_deseq2_design,
|
|
11
|
+
export_nfcore_rnaseq,
|
|
12
|
+
export_snakemake,
|
|
13
|
+
)
|
|
14
|
+
from metasheet_guard.io.csv import SheetTable, read_table
|
|
15
|
+
from metasheet_guard.repair import repair_sheet
|
|
16
|
+
from metasheet_guard.report.json import write_json_report
|
|
17
|
+
from metasheet_guard.result import ValidationResult
|
|
18
|
+
from metasheet_guard.schema.loader import Schema, load_schema
|
|
19
|
+
from metasheet_guard.validators import run_validators
|
|
20
|
+
|
|
21
|
+
__version__ = "0.1.0"
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def read_sheet(path: str | Path) -> SheetTable:
|
|
25
|
+
"""Read a CSV or TSV sample sheet into a table object."""
|
|
26
|
+
|
|
27
|
+
return read_table(path)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def validate(
|
|
31
|
+
sheet: SheetTable | str | Path,
|
|
32
|
+
schema: Schema | str | Path = "generic-ngs",
|
|
33
|
+
root: str | Path | None = None,
|
|
34
|
+
include_export_readiness: bool = True,
|
|
35
|
+
**_: Any,
|
|
36
|
+
) -> ValidationResult:
|
|
37
|
+
"""Validate a sample sheet with the currently implemented rule set."""
|
|
38
|
+
|
|
39
|
+
table = read_table(sheet) if isinstance(sheet, str | Path) else sheet
|
|
40
|
+
schema_obj = load_schema(schema) if not isinstance(schema, Schema) else schema
|
|
41
|
+
issues, readiness = run_validators(
|
|
42
|
+
table,
|
|
43
|
+
schema_obj,
|
|
44
|
+
root=root,
|
|
45
|
+
include_export_readiness=include_export_readiness,
|
|
46
|
+
)
|
|
47
|
+
return ValidationResult.from_issues(
|
|
48
|
+
issues=issues,
|
|
49
|
+
row_count=len(table.rows),
|
|
50
|
+
column_count=len(table.headers),
|
|
51
|
+
export_readiness=readiness,
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def export_sheet(
|
|
56
|
+
sheet: SheetTable | str | Path,
|
|
57
|
+
target: str,
|
|
58
|
+
output: str | Path,
|
|
59
|
+
) -> None:
|
|
60
|
+
"""Export a sample sheet to a supported workflow target."""
|
|
61
|
+
|
|
62
|
+
if target == "nf-core-rnaseq":
|
|
63
|
+
export_nfcore_rnaseq(sheet, output)
|
|
64
|
+
elif target == "deseq2-design":
|
|
65
|
+
export_deseq2_design(sheet, output)
|
|
66
|
+
elif target == "snakemake":
|
|
67
|
+
export_snakemake(sheet, output)
|
|
68
|
+
elif target in {"canonical", "generic"}:
|
|
69
|
+
export_canonical(sheet, output)
|
|
70
|
+
else:
|
|
71
|
+
raise ValueError(f"Unsupported export target: {target}")
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
__all__ = [
|
|
75
|
+
"Schema",
|
|
76
|
+
"SheetTable",
|
|
77
|
+
"ValidationResult",
|
|
78
|
+
"__version__",
|
|
79
|
+
"read_sheet",
|
|
80
|
+
"export_sheet",
|
|
81
|
+
"repair_sheet",
|
|
82
|
+
"validate",
|
|
83
|
+
"write_json_report",
|
|
84
|
+
]
|
|
@@ -0,0 +1,237 @@
|
|
|
1
|
+
"""Command-line interface for MetaSheet-Guard."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Annotated
|
|
7
|
+
|
|
8
|
+
import typer
|
|
9
|
+
from rich.console import Console
|
|
10
|
+
|
|
11
|
+
from metasheet_guard import __version__, export_sheet, validate
|
|
12
|
+
from metasheet_guard.io.csv import read_table
|
|
13
|
+
from metasheet_guard.io.sra import import_sra_runinfo
|
|
14
|
+
from metasheet_guard.repair import repair_sheet
|
|
15
|
+
from metasheet_guard.report.html import write_html_report
|
|
16
|
+
from metasheet_guard.report.json import write_json_report
|
|
17
|
+
from metasheet_guard.schema.loader import load_schema
|
|
18
|
+
|
|
19
|
+
app = typer.Typer(
|
|
20
|
+
add_completion=False,
|
|
21
|
+
help=(
|
|
22
|
+
"Experimental-design-aware quality control for sequencing analysis "
|
|
23
|
+
"sample sheets."
|
|
24
|
+
),
|
|
25
|
+
no_args_is_help=True,
|
|
26
|
+
)
|
|
27
|
+
console = Console()
|
|
28
|
+
schema_app = typer.Typer(help="Inspect bundled schemas.", no_args_is_help=True)
|
|
29
|
+
import_app = typer.Typer(help="Import external metadata tables.", no_args_is_help=True)
|
|
30
|
+
demo_app = typer.Typer(help="Create and run small local demos.", no_args_is_help=True)
|
|
31
|
+
app.add_typer(schema_app, name="schema")
|
|
32
|
+
app.add_typer(import_app, name="import")
|
|
33
|
+
app.add_typer(demo_app, name="demo")
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _version_callback(value: bool) -> None:
|
|
37
|
+
if value:
|
|
38
|
+
console.print(f"metasheet-guard {__version__}")
|
|
39
|
+
raise typer.Exit()
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
@app.callback()
|
|
43
|
+
def main(
|
|
44
|
+
version: Annotated[
|
|
45
|
+
bool,
|
|
46
|
+
typer.Option(
|
|
47
|
+
"--version",
|
|
48
|
+
callback=_version_callback,
|
|
49
|
+
help="Show the installed MetaSheet-Guard version and exit.",
|
|
50
|
+
is_eager=True,
|
|
51
|
+
),
|
|
52
|
+
] = False,
|
|
53
|
+
) -> None:
|
|
54
|
+
"""MetaSheet-Guard command group."""
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
@app.command()
|
|
58
|
+
def check(
|
|
59
|
+
path: Annotated[
|
|
60
|
+
Path,
|
|
61
|
+
typer.Argument(exists=True, file_okay=True, dir_okay=False, readable=True),
|
|
62
|
+
],
|
|
63
|
+
schema: Annotated[
|
|
64
|
+
str,
|
|
65
|
+
typer.Option(
|
|
66
|
+
"--schema",
|
|
67
|
+
"-s",
|
|
68
|
+
help="Bundled schema name or path to a YAML schema file.",
|
|
69
|
+
),
|
|
70
|
+
] = "generic-ngs",
|
|
71
|
+
root: Annotated[
|
|
72
|
+
Path | None,
|
|
73
|
+
typer.Option(
|
|
74
|
+
"--root",
|
|
75
|
+
help=(
|
|
76
|
+
"FASTQ root directory. Accepted for CLI compatibility; "
|
|
77
|
+
"unused in Milestone 1."
|
|
78
|
+
),
|
|
79
|
+
),
|
|
80
|
+
] = None,
|
|
81
|
+
json_path: Annotated[
|
|
82
|
+
Path | None,
|
|
83
|
+
typer.Option(
|
|
84
|
+
"--json",
|
|
85
|
+
help="Write a machine-readable JSON validation report.",
|
|
86
|
+
),
|
|
87
|
+
] = None,
|
|
88
|
+
html_path: Annotated[
|
|
89
|
+
Path | None,
|
|
90
|
+
typer.Option(
|
|
91
|
+
"--html",
|
|
92
|
+
help="Write a static HTML validation report.",
|
|
93
|
+
),
|
|
94
|
+
] = None,
|
|
95
|
+
) -> None:
|
|
96
|
+
"""Validate a CSV or TSV sequencing analysis sample sheet."""
|
|
97
|
+
|
|
98
|
+
schema_obj = load_schema(schema)
|
|
99
|
+
table = read_table(path)
|
|
100
|
+
result = validate(table, schema=schema_obj, root=root)
|
|
101
|
+
|
|
102
|
+
if json_path is not None:
|
|
103
|
+
write_json_report(result, json_path)
|
|
104
|
+
if html_path is not None:
|
|
105
|
+
write_html_report(result, html_path)
|
|
106
|
+
|
|
107
|
+
console.print(
|
|
108
|
+
"[bold]Validation complete[/bold]: "
|
|
109
|
+
f"{result.summary['errors']} error(s), "
|
|
110
|
+
f"{result.summary['warnings']} warning(s), "
|
|
111
|
+
f"{result.summary['infos']} info issue(s)."
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
if result.has_blocking_errors:
|
|
115
|
+
raise typer.Exit(code=1)
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
@app.command()
|
|
119
|
+
def repair(
|
|
120
|
+
path: Annotated[
|
|
121
|
+
Path,
|
|
122
|
+
typer.Argument(exists=True, file_okay=True, dir_okay=False, readable=True),
|
|
123
|
+
],
|
|
124
|
+
schema: Annotated[
|
|
125
|
+
str,
|
|
126
|
+
typer.Option("--schema", "-s", help="Bundled schema name or YAML schema path."),
|
|
127
|
+
] = "generic-ngs",
|
|
128
|
+
out: Annotated[
|
|
129
|
+
Path, typer.Option("--out", help="Repaired output CSV path.")
|
|
130
|
+
] = Path("clean.csv"),
|
|
131
|
+
changes: Annotated[
|
|
132
|
+
Path,
|
|
133
|
+
typer.Option("--changes", help="Repair provenance JSON path."),
|
|
134
|
+
] = Path("changes.json"),
|
|
135
|
+
dry_run: Annotated[
|
|
136
|
+
bool,
|
|
137
|
+
typer.Option("--dry-run", help="Report repairs without changing output rows."),
|
|
138
|
+
] = False,
|
|
139
|
+
safe_only: Annotated[
|
|
140
|
+
bool,
|
|
141
|
+
typer.Option(
|
|
142
|
+
"--safe-only/--allow-suggestions", help="Apply only safe repairs."
|
|
143
|
+
),
|
|
144
|
+
] = True,
|
|
145
|
+
) -> None:
|
|
146
|
+
"""Apply conservative safe repairs and write changes.json provenance."""
|
|
147
|
+
|
|
148
|
+
try:
|
|
149
|
+
result = repair_sheet(path, schema=schema, safe_only=safe_only, dry_run=dry_run)
|
|
150
|
+
except ValueError as exc:
|
|
151
|
+
console.print(f"[red]Error:[/red] {exc}")
|
|
152
|
+
raise typer.Exit(code=1) from exc
|
|
153
|
+
result.to_csv(out)
|
|
154
|
+
result.write_changes(changes)
|
|
155
|
+
console.print(
|
|
156
|
+
f"Wrote {out} and {changes} with {len(result.changes)} recorded change(s)."
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
@app.command()
|
|
161
|
+
def export(
|
|
162
|
+
path: Annotated[
|
|
163
|
+
Path,
|
|
164
|
+
typer.Argument(exists=True, file_okay=True, dir_okay=False, readable=True),
|
|
165
|
+
],
|
|
166
|
+
target: Annotated[
|
|
167
|
+
str,
|
|
168
|
+
typer.Option(
|
|
169
|
+
"--target",
|
|
170
|
+
help="Export target: nf-core-rnaseq, deseq2-design, snakemake, canonical.",
|
|
171
|
+
),
|
|
172
|
+
],
|
|
173
|
+
out: Annotated[Path, typer.Option("--out", help="Output path.")],
|
|
174
|
+
) -> None:
|
|
175
|
+
"""Export a cleaned sample sheet for a downstream workflow."""
|
|
176
|
+
|
|
177
|
+
export_sheet(path, target=target, output=out)
|
|
178
|
+
console.print(f"Wrote {target} export to {out}.")
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
@import_app.command("sra-runinfo")
|
|
182
|
+
def import_sra_runinfo_command(
|
|
183
|
+
path: Annotated[
|
|
184
|
+
Path,
|
|
185
|
+
typer.Argument(exists=True, file_okay=True, dir_okay=False, readable=True),
|
|
186
|
+
],
|
|
187
|
+
out: Annotated[Path, typer.Option("--out", help="Canonical CSV output path.")],
|
|
188
|
+
schema: Annotated[
|
|
189
|
+
str,
|
|
190
|
+
typer.Option(
|
|
191
|
+
"--schema", help="Accepted for command symmetry; currently unused."
|
|
192
|
+
),
|
|
193
|
+
] = "generic-ngs",
|
|
194
|
+
) -> None:
|
|
195
|
+
"""Convert an SRA-like RunInfo CSV into a canonical sample sheet."""
|
|
196
|
+
|
|
197
|
+
del schema
|
|
198
|
+
import_sra_runinfo(path, out)
|
|
199
|
+
console.print(f"Wrote canonical sample sheet to {out}.")
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
@schema_app.command("show")
|
|
203
|
+
def schema_show(name: str) -> None:
|
|
204
|
+
"""Print a bundled or user-provided schema as loaded YAML-like data."""
|
|
205
|
+
|
|
206
|
+
schema = load_schema(name)
|
|
207
|
+
console.print(schema)
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
@schema_app.command("list")
|
|
211
|
+
def schema_list() -> None:
|
|
212
|
+
"""List bundled schemas."""
|
|
213
|
+
|
|
214
|
+
console.print("generic-ngs")
|
|
215
|
+
console.print("bulk-rnaseq")
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
@demo_app.command("init")
|
|
219
|
+
def demo_init() -> None:
|
|
220
|
+
"""Show where bundled examples live."""
|
|
221
|
+
|
|
222
|
+
console.print("Use examples/valid and examples/broken in this repository.")
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
@demo_app.command("run")
|
|
226
|
+
def demo_run() -> None:
|
|
227
|
+
"""Run the missing-required-column demo."""
|
|
228
|
+
|
|
229
|
+
demo_path = Path("examples/broken/missing_required_column.csv")
|
|
230
|
+
report_path = Path("report.json")
|
|
231
|
+
schema_obj = load_schema("bulk-rnaseq")
|
|
232
|
+
table = read_table(demo_path)
|
|
233
|
+
result = validate(table, schema=schema_obj)
|
|
234
|
+
write_json_report(result, report_path)
|
|
235
|
+
console.print(f"Wrote demo report to {report_path}.")
|
|
236
|
+
if result.has_blocking_errors:
|
|
237
|
+
raise typer.Exit(code=1)
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
"""Workflow exporters."""
|
|
2
|
+
|
|
3
|
+
from metasheet_guard.export.deseq2 import export_deseq2_design
|
|
4
|
+
from metasheet_guard.export.generic import export_canonical
|
|
5
|
+
from metasheet_guard.export.nfcore_rnaseq import export_nfcore_rnaseq
|
|
6
|
+
from metasheet_guard.export.snakemake import export_snakemake
|
|
7
|
+
|
|
8
|
+
__all__ = [
|
|
9
|
+
"export_canonical",
|
|
10
|
+
"export_deseq2_design",
|
|
11
|
+
"export_nfcore_rnaseq",
|
|
12
|
+
"export_snakemake",
|
|
13
|
+
]
|