metasheet-guard 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. metasheet_guard-0.1.0/LICENSE +21 -0
  2. metasheet_guard-0.1.0/PKG-INFO +144 -0
  3. metasheet_guard-0.1.0/README.md +110 -0
  4. metasheet_guard-0.1.0/pyproject.toml +76 -0
  5. metasheet_guard-0.1.0/setup.cfg +4 -0
  6. metasheet_guard-0.1.0/src/metasheet_guard/__init__.py +84 -0
  7. metasheet_guard-0.1.0/src/metasheet_guard/cli.py +237 -0
  8. metasheet_guard-0.1.0/src/metasheet_guard/export/__init__.py +13 -0
  9. metasheet_guard-0.1.0/src/metasheet_guard/export/deseq2.py +26 -0
  10. metasheet_guard-0.1.0/src/metasheet_guard/export/generic.py +12 -0
  11. metasheet_guard-0.1.0/src/metasheet_guard/export/nfcore_rnaseq.py +28 -0
  12. metasheet_guard-0.1.0/src/metasheet_guard/export/snakemake.py +38 -0
  13. metasheet_guard-0.1.0/src/metasheet_guard/io/__init__.py +6 -0
  14. metasheet_guard-0.1.0/src/metasheet_guard/io/csv.py +140 -0
  15. metasheet_guard-0.1.0/src/metasheet_guard/io/sra.py +49 -0
  16. metasheet_guard-0.1.0/src/metasheet_guard/issue.py +29 -0
  17. metasheet_guard-0.1.0/src/metasheet_guard/model/__init__.py +15 -0
  18. metasheet_guard-0.1.0/src/metasheet_guard/model/design.py +16 -0
  19. metasheet_guard-0.1.0/src/metasheet_guard/model/file.py +18 -0
  20. metasheet_guard-0.1.0/src/metasheet_guard/model/project.py +134 -0
  21. metasheet_guard-0.1.0/src/metasheet_guard/model/run.py +18 -0
  22. metasheet_guard-0.1.0/src/metasheet_guard/model/sample.py +19 -0
  23. metasheet_guard-0.1.0/src/metasheet_guard/repair/__init__.py +6 -0
  24. metasheet_guard-0.1.0/src/metasheet_guard/repair/engine.py +127 -0
  25. metasheet_guard-0.1.0/src/metasheet_guard/repair/provenance.py +42 -0
  26. metasheet_guard-0.1.0/src/metasheet_guard/report/__init__.py +6 -0
  27. metasheet_guard-0.1.0/src/metasheet_guard/report/html.py +26 -0
  28. metasheet_guard-0.1.0/src/metasheet_guard/report/json.py +20 -0
  29. metasheet_guard-0.1.0/src/metasheet_guard/report/templates/report.html.j2 +67 -0
  30. metasheet_guard-0.1.0/src/metasheet_guard/result.py +63 -0
  31. metasheet_guard-0.1.0/src/metasheet_guard/schema/__init__.py +5 -0
  32. metasheet_guard-0.1.0/src/metasheet_guard/schema/loader.py +117 -0
  33. metasheet_guard-0.1.0/src/metasheet_guard/schemas/bulk-rnaseq.yaml +126 -0
  34. metasheet_guard-0.1.0/src/metasheet_guard/schemas/generic-ngs.yaml +77 -0
  35. metasheet_guard-0.1.0/src/metasheet_guard/validators/__init__.py +59 -0
  36. metasheet_guard-0.1.0/src/metasheet_guard/validators/base.py +16 -0
  37. metasheet_guard-0.1.0/src/metasheet_guard/validators/design.py +226 -0
  38. metasheet_guard-0.1.0/src/metasheet_guard/validators/export_readiness.py +76 -0
  39. metasheet_guard-0.1.0/src/metasheet_guard/validators/fastq.py +264 -0
  40. metasheet_guard-0.1.0/src/metasheet_guard/validators/metadata.py +218 -0
  41. metasheet_guard-0.1.0/src/metasheet_guard/validators/sample_run.py +201 -0
  42. metasheet_guard-0.1.0/src/metasheet_guard/validators/table.py +221 -0
  43. metasheet_guard-0.1.0/src/metasheet_guard.egg-info/PKG-INFO +144 -0
  44. metasheet_guard-0.1.0/src/metasheet_guard.egg-info/SOURCES.txt +61 -0
  45. metasheet_guard-0.1.0/src/metasheet_guard.egg-info/dependency_links.txt +1 -0
  46. metasheet_guard-0.1.0/src/metasheet_guard.egg-info/entry_points.txt +2 -0
  47. metasheet_guard-0.1.0/src/metasheet_guard.egg-info/requires.txt +10 -0
  48. metasheet_guard-0.1.0/src/metasheet_guard.egg-info/top_level.txt +1 -0
  49. metasheet_guard-0.1.0/tests/test_cli.py +163 -0
  50. metasheet_guard-0.1.0/tests/test_csv_reader.py +37 -0
  51. metasheet_guard-0.1.0/tests/test_design_validators.py +31 -0
  52. metasheet_guard-0.1.0/tests/test_export_deseq2.py +13 -0
  53. metasheet_guard-0.1.0/tests/test_export_nfcore.py +13 -0
  54. metasheet_guard-0.1.0/tests/test_export_snakemake.py +14 -0
  55. metasheet_guard-0.1.0/tests/test_fastq_validators.py +39 -0
  56. metasheet_guard-0.1.0/tests/test_metadata_validators.py +31 -0
  57. metasheet_guard-0.1.0/tests/test_model.py +21 -0
  58. metasheet_guard-0.1.0/tests/test_repair.py +33 -0
  59. metasheet_guard-0.1.0/tests/test_reports.py +53 -0
  60. metasheet_guard-0.1.0/tests/test_sample_run_validators.py +26 -0
  61. metasheet_guard-0.1.0/tests/test_schema_loader.py +44 -0
  62. metasheet_guard-0.1.0/tests/test_sra_import.py +13 -0
  63. metasheet_guard-0.1.0/tests/test_table_validators.py +70 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 MetaSheet-Guard contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,144 @@
1
+ Metadata-Version: 2.4
2
+ Name: metasheet-guard
3
+ Version: 0.1.0
4
+ Summary: Experimental-design-aware quality control for sequencing analysis sample sheets.
5
+ Author: MetaSheet-Guard contributors
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://qchiujunhao.github.io/metasheet-guard/
8
+ Project-URL: Documentation, https://qchiujunhao.github.io/metasheet-guard/
9
+ Project-URL: Repository, https://github.com/qchiujunhao/metasheet-guard
10
+ Project-URL: Issues, https://github.com/qchiujunhao/metasheet-guard/issues
11
+ Keywords: bioinformatics,metadata,sample sheet,sequencing,quality control
12
+ Classifier: Development Status :: 2 - Pre-Alpha
13
+ Classifier: Environment :: Console
14
+ Classifier: Intended Audience :: Science/Research
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3.10
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: Programming Language :: Python :: 3.13
20
+ Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
21
+ Requires-Python: >=3.10
22
+ Description-Content-Type: text/markdown
23
+ License-File: LICENSE
24
+ Requires-Dist: jinja2>=3.1
25
+ Requires-Dist: pyyaml>=6.0
26
+ Requires-Dist: rich>=13.0
27
+ Requires-Dist: typer>=0.12
28
+ Provides-Extra: dev
29
+ Requires-Dist: pytest>=8.0; extra == "dev"
30
+ Requires-Dist: ruff>=0.6.0; extra == "dev"
31
+ Requires-Dist: pre-commit>=3.5; extra == "dev"
32
+ Requires-Dist: mkdocs-material>=9.5; extra == "dev"
33
+ Dynamic: license-file
34
+
35
+ # MetaSheet-Guard
36
+
37
+ [Documentation website](https://qchiujunhao.github.io/metasheet-guard/) |
38
+ [Repository](https://github.com/qchiujunhao/metasheet-guard)
39
+
40
+ MetaSheet-Guard performs experimental-design-aware quality control for sequencing
41
+ analysis sample sheets. It targets the analysis-preparation stage: after FASTQ
42
+ generation or public metadata collection, but before running workflows such as
43
+ Nextflow, Snakemake, nf-core/rnaseq, or custom RNA-seq pipelines.
44
+
45
+ The current package includes CSV/TSV reading, bundled YAML schemas, validation,
46
+ repair provenance, workflow export, JSON/HTML reports, and a
47
+ `metasheet-guard` CLI.
48
+
49
+ ## Scope
50
+
51
+ MetaSheet-Guard is being built to model relationships between biological
52
+ samples, sequencing runs, lanes, FASTQ files, replicates, conditions, batches,
53
+ and downstream workflow requirements. The current release is intentionally small
54
+ and currently supports these first-pass capabilities:
55
+
56
+ - required columns
57
+ - duplicate column names
58
+ - schema-defined column aliases
59
+ - empty values in required columns
60
+ - sample ID and metadata consistency checks
61
+ - FASTQ path, extension, gzip, pair, and duplication checks
62
+ - sample/run/lane relationship checks
63
+ - batch-condition and related design-risk checks
64
+ - safe repair with `changes.json` provenance
65
+ - nf-core/rnaseq, Snakemake, canonical CSV, and DESeq2 design exports
66
+ - bundled `generic-ngs` and `bulk-rnaseq` schemas
67
+
68
+ ## Non-goals
69
+
70
+ MetaSheet-Guard is not an RNA-seq aligner, quantifier, differential expression
71
+ tool, SRA downloader, nf-core/fetchngs replacement, nf-schema replacement,
72
+ Illumina BCL Convert or bcl2fastq SampleSheet validator, single-cell object
73
+ validator, spatial image validator, or generic CSV validation framework.
74
+
75
+ ## Installation
76
+
77
+ ```bash
78
+ pip install -e ".[dev]"
79
+ ```
80
+
81
+ ## Quickstart
82
+
83
+ Validate a broken bulk RNA-seq sample sheet and write a JSON report:
84
+
85
+ ```bash
86
+ metasheet-guard check examples/broken/missing_required_column.csv \
87
+ --schema bulk-rnaseq \
88
+ --json report.json
89
+ ```
90
+
91
+ The command exits with status code `1` when blocking validation errors are found.
92
+ For the example above, `report.json` contains a `REQUIRED_COLUMN_MISSING` issue
93
+ because the `bulk-rnaseq` schema requires a `condition` column.
94
+
95
+ Validate a minimal valid example:
96
+
97
+ ```bash
98
+ metasheet-guard check examples/valid/bulk_rnaseq_paired.csv \
99
+ --schema bulk-rnaseq
100
+ ```
101
+
102
+ Repair safe metadata issues and record provenance:
103
+
104
+ ```bash
105
+ metasheet-guard repair examples/broken/condition_case_mixed.csv \
106
+ --schema bulk-rnaseq \
107
+ --out clean.csv \
108
+ --changes changes.json
109
+ ```
110
+
111
+ Only safe repairs are implemented. Suggested/inference-based repairs are
112
+ reserved for later milestones and currently fail clearly if requested.
113
+
114
+ Export a cleaned sheet:
115
+
116
+ ```bash
117
+ metasheet-guard export examples/valid/bulk_rnaseq_paired.csv \
118
+ --target nf-core-rnaseq \
119
+ --out nfcore_samplesheet.csv
120
+ ```
121
+
122
+ ## Python API
123
+
124
+ ```python
125
+ from metasheet_guard import read_sheet, validate
126
+
127
+ sheet = read_sheet("examples/broken/missing_required_column.csv")
128
+ result = validate(sheet, schema="bulk-rnaseq")
129
+
130
+ for issue in result.issues:
131
+ print(issue.severity, issue.code, issue.message)
132
+ ```
133
+
134
+ ## Development
135
+
136
+ Run tests and linting:
137
+
138
+ ```bash
139
+ pytest
140
+ ruff check .
141
+ ```
142
+
143
+ The project uses `src/` packaging, Typer for the command-line interface, PyYAML
144
+ for schemas, pytest for tests, and Ruff for linting.
@@ -0,0 +1,110 @@
1
+ # MetaSheet-Guard
2
+
3
+ [Documentation website](https://qchiujunhao.github.io/metasheet-guard/) |
4
+ [Repository](https://github.com/qchiujunhao/metasheet-guard)
5
+
6
+ MetaSheet-Guard performs experimental-design-aware quality control for sequencing
7
+ analysis sample sheets. It targets the analysis-preparation stage: after FASTQ
8
+ generation or public metadata collection, but before running workflows such as
9
+ Nextflow, Snakemake, nf-core/rnaseq, or custom RNA-seq pipelines.
10
+
11
+ The current package includes CSV/TSV reading, bundled YAML schemas, validation,
12
+ repair provenance, workflow export, JSON/HTML reports, and a
13
+ `metasheet-guard` CLI.
14
+
15
+ ## Scope
16
+
17
+ MetaSheet-Guard is being built to model relationships between biological
18
+ samples, sequencing runs, lanes, FASTQ files, replicates, conditions, batches,
19
+ and downstream workflow requirements. The current release is intentionally small
20
+ and currently supports these first-pass capabilities:
21
+
22
+ - required columns
23
+ - duplicate column names
24
+ - schema-defined column aliases
25
+ - empty values in required columns
26
+ - sample ID and metadata consistency checks
27
+ - FASTQ path, extension, gzip, pair, and duplication checks
28
+ - sample/run/lane relationship checks
29
+ - batch-condition and related design-risk checks
30
+ - safe repair with `changes.json` provenance
31
+ - nf-core/rnaseq, Snakemake, canonical CSV, and DESeq2 design exports
32
+ - bundled `generic-ngs` and `bulk-rnaseq` schemas
33
+
34
+ ## Non-goals
35
+
36
+ MetaSheet-Guard is not an RNA-seq aligner, quantifier, differential expression
37
+ tool, SRA downloader, nf-core/fetchngs replacement, nf-schema replacement,
38
+ Illumina BCL Convert or bcl2fastq SampleSheet validator, single-cell object
39
+ validator, spatial image validator, or generic CSV validation framework.
40
+
41
+ ## Installation
42
+
43
+ ```bash
44
+ pip install -e ".[dev]"
45
+ ```
46
+
47
+ ## Quickstart
48
+
49
+ Validate a broken bulk RNA-seq sample sheet and write a JSON report:
50
+
51
+ ```bash
52
+ metasheet-guard check examples/broken/missing_required_column.csv \
53
+ --schema bulk-rnaseq \
54
+ --json report.json
55
+ ```
56
+
57
+ The command exits with status code `1` when blocking validation errors are found.
58
+ For the example above, `report.json` contains a `REQUIRED_COLUMN_MISSING` issue
59
+ because the `bulk-rnaseq` schema requires a `condition` column.
60
+
61
+ Validate a minimal valid example:
62
+
63
+ ```bash
64
+ metasheet-guard check examples/valid/bulk_rnaseq_paired.csv \
65
+ --schema bulk-rnaseq
66
+ ```
67
+
68
+ Repair safe metadata issues and record provenance:
69
+
70
+ ```bash
71
+ metasheet-guard repair examples/broken/condition_case_mixed.csv \
72
+ --schema bulk-rnaseq \
73
+ --out clean.csv \
74
+ --changes changes.json
75
+ ```
76
+
77
+ Only safe repairs are implemented. Suggested/inference-based repairs are
78
+ reserved for later milestones and currently fail clearly if requested.
79
+
80
+ Export a cleaned sheet:
81
+
82
+ ```bash
83
+ metasheet-guard export examples/valid/bulk_rnaseq_paired.csv \
84
+ --target nf-core-rnaseq \
85
+ --out nfcore_samplesheet.csv
86
+ ```
87
+
88
+ ## Python API
89
+
90
+ ```python
91
+ from metasheet_guard import read_sheet, validate
92
+
93
+ sheet = read_sheet("examples/broken/missing_required_column.csv")
94
+ result = validate(sheet, schema="bulk-rnaseq")
95
+
96
+ for issue in result.issues:
97
+ print(issue.severity, issue.code, issue.message)
98
+ ```
99
+
100
+ ## Development
101
+
102
+ Run tests and linting:
103
+
104
+ ```bash
105
+ pytest
106
+ ruff check .
107
+ ```
108
+
109
+ The project uses `src/` packaging, Typer for the command-line interface, PyYAML
110
+ for schemas, pytest for tests, and Ruff for linting.
@@ -0,0 +1,76 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "metasheet-guard"
7
+ version = "0.1.0"
8
+ description = "Experimental-design-aware quality control for sequencing analysis sample sheets."
9
+ readme = "README.md"
10
+ requires-python = ">=3.10"
11
+ license = "MIT"
12
+ authors = [
13
+ { name = "MetaSheet-Guard contributors" }
14
+ ]
15
+ keywords = [
16
+ "bioinformatics",
17
+ "metadata",
18
+ "sample sheet",
19
+ "sequencing",
20
+ "quality control"
21
+ ]
22
+ classifiers = [
23
+ "Development Status :: 2 - Pre-Alpha",
24
+ "Environment :: Console",
25
+ "Intended Audience :: Science/Research",
26
+ "Programming Language :: Python :: 3",
27
+ "Programming Language :: Python :: 3.10",
28
+ "Programming Language :: Python :: 3.11",
29
+ "Programming Language :: Python :: 3.12",
30
+ "Programming Language :: Python :: 3.13",
31
+ "Topic :: Scientific/Engineering :: Bio-Informatics"
32
+ ]
33
+ dependencies = [
34
+ "jinja2>=3.1",
35
+ "pyyaml>=6.0",
36
+ "rich>=13.0",
37
+ "typer>=0.12"
38
+ ]
39
+
40
+ [project.optional-dependencies]
41
+ dev = [
42
+ "pytest>=8.0",
43
+ "ruff>=0.6.0",
44
+ "pre-commit>=3.5",
45
+ "mkdocs-material>=9.5"
46
+ ]
47
+
48
+ [project.scripts]
49
+ metasheet-guard = "metasheet_guard.cli:app"
50
+
51
+ [project.urls]
52
+ Homepage = "https://qchiujunhao.github.io/metasheet-guard/"
53
+ Documentation = "https://qchiujunhao.github.io/metasheet-guard/"
54
+ Repository = "https://github.com/qchiujunhao/metasheet-guard"
55
+ Issues = "https://github.com/qchiujunhao/metasheet-guard/issues"
56
+
57
+ [tool.setuptools.packages.find]
58
+ where = ["src"]
59
+
60
+ [tool.setuptools.package-data]
61
+ metasheet_guard = ["schemas/*.yaml", "report/templates/*.j2"]
62
+
63
+ [tool.pytest.ini_options]
64
+ testpaths = ["tests"]
65
+ addopts = "-ra"
66
+
67
+ [tool.ruff]
68
+ line-length = 88
69
+ target-version = "py310"
70
+
71
+ [tool.ruff.lint]
72
+ select = ["E", "F", "I", "UP", "B"]
73
+
74
+ [tool.ruff.format]
75
+ quote-style = "double"
76
+ indent-style = "space"
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,84 @@
1
+ """Public API for MetaSheet-Guard."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+ from typing import Any
7
+
8
+ from metasheet_guard.export import (
9
+ export_canonical,
10
+ export_deseq2_design,
11
+ export_nfcore_rnaseq,
12
+ export_snakemake,
13
+ )
14
+ from metasheet_guard.io.csv import SheetTable, read_table
15
+ from metasheet_guard.repair import repair_sheet
16
+ from metasheet_guard.report.json import write_json_report
17
+ from metasheet_guard.result import ValidationResult
18
+ from metasheet_guard.schema.loader import Schema, load_schema
19
+ from metasheet_guard.validators import run_validators
20
+
21
+ __version__ = "0.1.0"
22
+
23
+
24
+ def read_sheet(path: str | Path) -> SheetTable:
25
+ """Read a CSV or TSV sample sheet into a table object."""
26
+
27
+ return read_table(path)
28
+
29
+
30
+ def validate(
31
+ sheet: SheetTable | str | Path,
32
+ schema: Schema | str | Path = "generic-ngs",
33
+ root: str | Path | None = None,
34
+ include_export_readiness: bool = True,
35
+ **_: Any,
36
+ ) -> ValidationResult:
37
+ """Validate a sample sheet with the currently implemented rule set."""
38
+
39
+ table = read_table(sheet) if isinstance(sheet, str | Path) else sheet
40
+ schema_obj = load_schema(schema) if not isinstance(schema, Schema) else schema
41
+ issues, readiness = run_validators(
42
+ table,
43
+ schema_obj,
44
+ root=root,
45
+ include_export_readiness=include_export_readiness,
46
+ )
47
+ return ValidationResult.from_issues(
48
+ issues=issues,
49
+ row_count=len(table.rows),
50
+ column_count=len(table.headers),
51
+ export_readiness=readiness,
52
+ )
53
+
54
+
55
+ def export_sheet(
56
+ sheet: SheetTable | str | Path,
57
+ target: str,
58
+ output: str | Path,
59
+ ) -> None:
60
+ """Export a sample sheet to a supported workflow target."""
61
+
62
+ if target == "nf-core-rnaseq":
63
+ export_nfcore_rnaseq(sheet, output)
64
+ elif target == "deseq2-design":
65
+ export_deseq2_design(sheet, output)
66
+ elif target == "snakemake":
67
+ export_snakemake(sheet, output)
68
+ elif target in {"canonical", "generic"}:
69
+ export_canonical(sheet, output)
70
+ else:
71
+ raise ValueError(f"Unsupported export target: {target}")
72
+
73
+
74
+ __all__ = [
75
+ "Schema",
76
+ "SheetTable",
77
+ "ValidationResult",
78
+ "__version__",
79
+ "read_sheet",
80
+ "export_sheet",
81
+ "repair_sheet",
82
+ "validate",
83
+ "write_json_report",
84
+ ]
@@ -0,0 +1,237 @@
1
+ """Command-line interface for MetaSheet-Guard."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+ from typing import Annotated
7
+
8
+ import typer
9
+ from rich.console import Console
10
+
11
+ from metasheet_guard import __version__, export_sheet, validate
12
+ from metasheet_guard.io.csv import read_table
13
+ from metasheet_guard.io.sra import import_sra_runinfo
14
+ from metasheet_guard.repair import repair_sheet
15
+ from metasheet_guard.report.html import write_html_report
16
+ from metasheet_guard.report.json import write_json_report
17
+ from metasheet_guard.schema.loader import load_schema
18
+
19
+ app = typer.Typer(
20
+ add_completion=False,
21
+ help=(
22
+ "Experimental-design-aware quality control for sequencing analysis "
23
+ "sample sheets."
24
+ ),
25
+ no_args_is_help=True,
26
+ )
27
+ console = Console()
28
+ schema_app = typer.Typer(help="Inspect bundled schemas.", no_args_is_help=True)
29
+ import_app = typer.Typer(help="Import external metadata tables.", no_args_is_help=True)
30
+ demo_app = typer.Typer(help="Create and run small local demos.", no_args_is_help=True)
31
+ app.add_typer(schema_app, name="schema")
32
+ app.add_typer(import_app, name="import")
33
+ app.add_typer(demo_app, name="demo")
34
+
35
+
36
+ def _version_callback(value: bool) -> None:
37
+ if value:
38
+ console.print(f"metasheet-guard {__version__}")
39
+ raise typer.Exit()
40
+
41
+
42
+ @app.callback()
43
+ def main(
44
+ version: Annotated[
45
+ bool,
46
+ typer.Option(
47
+ "--version",
48
+ callback=_version_callback,
49
+ help="Show the installed MetaSheet-Guard version and exit.",
50
+ is_eager=True,
51
+ ),
52
+ ] = False,
53
+ ) -> None:
54
+ """MetaSheet-Guard command group."""
55
+
56
+
57
+ @app.command()
58
+ def check(
59
+ path: Annotated[
60
+ Path,
61
+ typer.Argument(exists=True, file_okay=True, dir_okay=False, readable=True),
62
+ ],
63
+ schema: Annotated[
64
+ str,
65
+ typer.Option(
66
+ "--schema",
67
+ "-s",
68
+ help="Bundled schema name or path to a YAML schema file.",
69
+ ),
70
+ ] = "generic-ngs",
71
+ root: Annotated[
72
+ Path | None,
73
+ typer.Option(
74
+ "--root",
75
+ help=(
76
+ "FASTQ root directory. Accepted for CLI compatibility; "
77
+ "unused in Milestone 1."
78
+ ),
79
+ ),
80
+ ] = None,
81
+ json_path: Annotated[
82
+ Path | None,
83
+ typer.Option(
84
+ "--json",
85
+ help="Write a machine-readable JSON validation report.",
86
+ ),
87
+ ] = None,
88
+ html_path: Annotated[
89
+ Path | None,
90
+ typer.Option(
91
+ "--html",
92
+ help="Write a static HTML validation report.",
93
+ ),
94
+ ] = None,
95
+ ) -> None:
96
+ """Validate a CSV or TSV sequencing analysis sample sheet."""
97
+
98
+ schema_obj = load_schema(schema)
99
+ table = read_table(path)
100
+ result = validate(table, schema=schema_obj, root=root)
101
+
102
+ if json_path is not None:
103
+ write_json_report(result, json_path)
104
+ if html_path is not None:
105
+ write_html_report(result, html_path)
106
+
107
+ console.print(
108
+ "[bold]Validation complete[/bold]: "
109
+ f"{result.summary['errors']} error(s), "
110
+ f"{result.summary['warnings']} warning(s), "
111
+ f"{result.summary['infos']} info issue(s)."
112
+ )
113
+
114
+ if result.has_blocking_errors:
115
+ raise typer.Exit(code=1)
116
+
117
+
118
+ @app.command()
119
+ def repair(
120
+ path: Annotated[
121
+ Path,
122
+ typer.Argument(exists=True, file_okay=True, dir_okay=False, readable=True),
123
+ ],
124
+ schema: Annotated[
125
+ str,
126
+ typer.Option("--schema", "-s", help="Bundled schema name or YAML schema path."),
127
+ ] = "generic-ngs",
128
+ out: Annotated[
129
+ Path, typer.Option("--out", help="Repaired output CSV path.")
130
+ ] = Path("clean.csv"),
131
+ changes: Annotated[
132
+ Path,
133
+ typer.Option("--changes", help="Repair provenance JSON path."),
134
+ ] = Path("changes.json"),
135
+ dry_run: Annotated[
136
+ bool,
137
+ typer.Option("--dry-run", help="Report repairs without changing output rows."),
138
+ ] = False,
139
+ safe_only: Annotated[
140
+ bool,
141
+ typer.Option(
142
+ "--safe-only/--allow-suggestions", help="Apply only safe repairs."
143
+ ),
144
+ ] = True,
145
+ ) -> None:
146
+ """Apply conservative safe repairs and write changes.json provenance."""
147
+
148
+ try:
149
+ result = repair_sheet(path, schema=schema, safe_only=safe_only, dry_run=dry_run)
150
+ except ValueError as exc:
151
+ console.print(f"[red]Error:[/red] {exc}")
152
+ raise typer.Exit(code=1) from exc
153
+ result.to_csv(out)
154
+ result.write_changes(changes)
155
+ console.print(
156
+ f"Wrote {out} and {changes} with {len(result.changes)} recorded change(s)."
157
+ )
158
+
159
+
160
+ @app.command()
161
+ def export(
162
+ path: Annotated[
163
+ Path,
164
+ typer.Argument(exists=True, file_okay=True, dir_okay=False, readable=True),
165
+ ],
166
+ target: Annotated[
167
+ str,
168
+ typer.Option(
169
+ "--target",
170
+ help="Export target: nf-core-rnaseq, deseq2-design, snakemake, canonical.",
171
+ ),
172
+ ],
173
+ out: Annotated[Path, typer.Option("--out", help="Output path.")],
174
+ ) -> None:
175
+ """Export a cleaned sample sheet for a downstream workflow."""
176
+
177
+ export_sheet(path, target=target, output=out)
178
+ console.print(f"Wrote {target} export to {out}.")
179
+
180
+
181
+ @import_app.command("sra-runinfo")
182
+ def import_sra_runinfo_command(
183
+ path: Annotated[
184
+ Path,
185
+ typer.Argument(exists=True, file_okay=True, dir_okay=False, readable=True),
186
+ ],
187
+ out: Annotated[Path, typer.Option("--out", help="Canonical CSV output path.")],
188
+ schema: Annotated[
189
+ str,
190
+ typer.Option(
191
+ "--schema", help="Accepted for command symmetry; currently unused."
192
+ ),
193
+ ] = "generic-ngs",
194
+ ) -> None:
195
+ """Convert an SRA-like RunInfo CSV into a canonical sample sheet."""
196
+
197
+ del schema
198
+ import_sra_runinfo(path, out)
199
+ console.print(f"Wrote canonical sample sheet to {out}.")
200
+
201
+
202
+ @schema_app.command("show")
203
+ def schema_show(name: str) -> None:
204
+ """Print a bundled or user-provided schema as loaded YAML-like data."""
205
+
206
+ schema = load_schema(name)
207
+ console.print(schema)
208
+
209
+
210
+ @schema_app.command("list")
211
+ def schema_list() -> None:
212
+ """List bundled schemas."""
213
+
214
+ console.print("generic-ngs")
215
+ console.print("bulk-rnaseq")
216
+
217
+
218
+ @demo_app.command("init")
219
+ def demo_init() -> None:
220
+ """Show where bundled examples live."""
221
+
222
+ console.print("Use examples/valid and examples/broken in this repository.")
223
+
224
+
225
+ @demo_app.command("run")
226
+ def demo_run() -> None:
227
+ """Run the missing-required-column demo."""
228
+
229
+ demo_path = Path("examples/broken/missing_required_column.csv")
230
+ report_path = Path("report.json")
231
+ schema_obj = load_schema("bulk-rnaseq")
232
+ table = read_table(demo_path)
233
+ result = validate(table, schema=schema_obj)
234
+ write_json_report(result, report_path)
235
+ console.print(f"Wrote demo report to {report_path}.")
236
+ if result.has_blocking_errors:
237
+ raise typer.Exit(code=1)
@@ -0,0 +1,13 @@
1
+ """Workflow exporters."""
2
+
3
+ from metasheet_guard.export.deseq2 import export_deseq2_design
4
+ from metasheet_guard.export.generic import export_canonical
5
+ from metasheet_guard.export.nfcore_rnaseq import export_nfcore_rnaseq
6
+ from metasheet_guard.export.snakemake import export_snakemake
7
+
8
+ __all__ = [
9
+ "export_canonical",
10
+ "export_deseq2_design",
11
+ "export_nfcore_rnaseq",
12
+ "export_snakemake",
13
+ ]