samplesheet-parser 0.3.2__tar.gz → 0.3.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/.github/workflows/ci.yml +15 -0
- {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/.gitignore +2 -0
- {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/CHANGELOG.md +54 -0
- {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/CITATION.cff +14 -4
- {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/PKG-INFO +78 -3
- {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/README.md +77 -2
- samplesheet_parser-0.3.4/images/samplesheet_parser_arch_v03.png +0 -0
- {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/pyproject.toml +1 -1
- {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/samplesheet_parser/__init__.py +2 -0
- {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/samplesheet_parser/cli.py +105 -1
- samplesheet_parser-0.3.4/samplesheet_parser/index_utils.py +184 -0
- {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/samplesheet_parser/merger.py +12 -2
- {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/samplesheet_parser/validators.py +9 -1
- {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/tests/test_cli.py +155 -0
- samplesheet_parser-0.3.4/tests/test_index_utils.py +163 -0
- {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/tests/test_validators/test_validators.py +57 -0
- {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/.github/workflows/copilot-instructions.md +0 -0
- {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/.zenodo.json +0 -0
- {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/CONTRIBUTING.md +0 -0
- {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/LICENSE +0 -0
- {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/examples/demo_merger.py +0 -0
- {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/examples/parse_examples.py +0 -0
- {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/examples/sample_sheets/ProjectAlpha_SampleSheet.csv +0 -0
- {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/examples/sample_sheets/ProjectBeta_SampleSheet.csv +0 -0
- {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/examples/sample_sheets/ProjectDelta_SampleSheet_collision.csv +0 -0
- {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/examples/sample_sheets/ProjectGamma_SampleSheet.csv +0 -0
- {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/examples/sample_sheets/README.md +0 -0
- {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/examples/sample_sheets/combined_clean.csv +0 -0
- {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/examples/sample_sheets/combined_collision_forced.csv +0 -0
- {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/examples/sample_sheets/combined_mixed_formats.csv +0 -0
- {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/examples/sample_sheets/v1_dual_index.csv +0 -0
- {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/examples/sample_sheets/v1_multi_lane.csv +0 -0
- {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/examples/sample_sheets/v1_single_index.csv +0 -0
- {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/examples/sample_sheets/v1_with_lab_qc_settings.csv +0 -0
- {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/examples/sample_sheets/v1_with_manifests.csv +0 -0
- {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/examples/sample_sheets/v2_nextseq_single_index.csv +0 -0
- {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/examples/sample_sheets/v2_novaseq_x_dual_index.csv +0 -0
- {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/examples/sample_sheets/v2_with_cloud_settings.csv +0 -0
- {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/examples/sample_sheets/v2_with_index_umi.csv +0 -0
- {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/examples/sample_sheets/v2_with_pipeline_settings.csv +0 -0
- {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/examples/sample_sheets/v2_with_read_umi.csv +0 -0
- {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/images/samplesheet_parser_overview.png +0 -0
- {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/samplesheet_parser/converter.py +0 -0
- {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/samplesheet_parser/diff.py +0 -0
- {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/samplesheet_parser/enums.py +0 -0
- {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/samplesheet_parser/factory.py +0 -0
- {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/samplesheet_parser/parsers/__init__.py +0 -0
- {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/samplesheet_parser/parsers/v1.py +0 -0
- {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/samplesheet_parser/parsers/v2.py +0 -0
- {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/samplesheet_parser/writer.py +0 -0
- {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/scripts/demo_converter.py +0 -0
- {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/scripts/demo_diff.py +0 -0
- {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/scripts/demo_writer.py +0 -0
- {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/tests/__init__.py +0 -0
- {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/tests/conftest.py +0 -0
- {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/tests/fixtures/SampleSheet_v1_dual_index.csv +0 -0
- {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/tests/fixtures/SampleSheet_v2_dual_index.csv +0 -0
- {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/tests/fixtures/SampleSheet_v2_modified.csv +0 -0
- {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/tests/test_converter.py +0 -0
- {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/tests/test_diff.py +0 -0
- {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/tests/test_factory.py +0 -0
- {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/tests/test_merger.py +0 -0
- {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/tests/test_parsers/__init__.py +0 -0
- {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/tests/test_parsers/test_v1.py +0 -0
- {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/tests/test_parsers/test_v2.py +0 -0
- {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/tests/test_validators/__init__.py +0 -0
- {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/tests/test_validators/test_hamming.py +0 -0
- {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/tests/test_writer.py +0 -0
|
@@ -67,3 +67,18 @@ jobs:
|
|
|
67
67
|
uses: pypa/gh-action-pypi-publish@release/v1
|
|
68
68
|
with:
|
|
69
69
|
password: ${{ secrets.PYPI_API_TOKEN }}
|
|
70
|
+
|
|
71
|
+
release:
|
|
72
|
+
needs: publish
|
|
73
|
+
runs-on: ubuntu-latest
|
|
74
|
+
if: startsWith(github.ref, 'refs/tags/v')
|
|
75
|
+
permissions:
|
|
76
|
+
contents: write
|
|
77
|
+
|
|
78
|
+
steps:
|
|
79
|
+
- uses: actions/checkout@v4
|
|
80
|
+
|
|
81
|
+
- name: Create GitHub Release
|
|
82
|
+
uses: softprops/action-gh-release@v2
|
|
83
|
+
with:
|
|
84
|
+
generate_release_notes: true
|
|
@@ -6,6 +6,60 @@ The format follows [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
|
|
|
6
6
|
|
|
7
7
|
---
|
|
8
8
|
|
|
9
|
+
## [0.3.4] - 2026-04-04
|
|
10
|
+
|
|
11
|
+
### Added
|
|
12
|
+
|
|
13
|
+
- **`samplesheet info` CLI command** — prints a concise summary of any V1 or
|
|
14
|
+
V2 sample sheet (format, sample count, lanes, index type, read lengths,
|
|
15
|
+
adapters, experiment name, instrument). Supports `--format json` for
|
|
16
|
+
machine-readable output; exits 0 on success, 2 on unreadable files.
|
|
17
|
+
|
|
18
|
+
- **Configurable Hamming distance threshold** — `SampleSheetValidator.validate()`
|
|
19
|
+
now accepts a `min_hamming_distance` keyword argument (default: 3) so labs
|
|
20
|
+
using longer indexes can enforce stricter thresholds without changing the
|
|
21
|
+
module-level constant.
|
|
22
|
+
- `SampleSheetMerger` accepts the same parameter in `__init__()` and applies
|
|
23
|
+
it to both the intra-sheet and cross-sheet Hamming checks as well as the
|
|
24
|
+
post-merge validation step.
|
|
25
|
+
- `samplesheet validate` exposes `--min-hamming N` (must be ≥ 1; exits 2 on
|
|
26
|
+
invalid input). The JSON output includes `min_hamming_distance` for
|
|
27
|
+
auditability.
|
|
28
|
+
|
|
29
|
+
- **`normalize_index_lengths()` utility** — normalizes index sequence lengths
|
|
30
|
+
across a list of sample dicts (output of `sheet.samples()`) to a consistent
|
|
31
|
+
length before merging sheets with mixed-length indexes.
|
|
32
|
+
- `strategy="trim"` — trims all indexes to the shortest sequence length.
|
|
33
|
+
- `strategy="pad"` — pads shorter indexes to the longest length using `"N"`
|
|
34
|
+
wildcard characters (supported by BCLConvert ≥ 3.9 and bcl2fastq ≥ 2.20).
|
|
35
|
+
- Auto-detects V1-style (`index`/`index2`) and V2-style (`Index`/`Index2`)
|
|
36
|
+
field names; explicit `index1_key`/`index2_key` overrides supported.
|
|
37
|
+
- Exported from the top-level package as `normalize_index_lengths`.
|
|
38
|
+
|
|
39
|
+
- **CI / pre-commit integration guide** in README — GitHub Actions workflow
|
|
40
|
+
and pre-commit hook configuration for automatic sample sheet validation on
|
|
41
|
+
every commit or pull request that touches a `SampleSheet.csv`.
|
|
42
|
+
|
|
43
|
+
### Fixed
|
|
44
|
+
|
|
45
|
+
- `_detect_key()` in `index_utils` now selects the key with at least one
|
|
46
|
+
non-empty value before falling back to key presence, preventing silent
|
|
47
|
+
normalization skip when a key exists but all its values are `None` or `""`.
|
|
48
|
+
|
|
49
|
+
### Changed
|
|
50
|
+
|
|
51
|
+
- `--min-hamming` CLI option default and help text are now derived from the
|
|
52
|
+
`MIN_HAMMING_DISTANCE` constant in `validators.py` to prevent drift.
|
|
53
|
+
|
|
54
|
+
---
|
|
55
|
+
|
|
56
|
+
## [0.3.3] - 2026-03-13
|
|
57
|
+
|
|
58
|
+
### Documentation
|
|
59
|
+
- Add architecture diagram showing full library structure including CLI and SampleSheetMerger
|
|
60
|
+
- Update README with architecture overview, solid vs dashed line legend
|
|
61
|
+
- Add `[Custom_Sections*]` to V1 and V2 format descriptions
|
|
62
|
+
|
|
9
63
|
## [0.3.2] - 2026-03-12
|
|
10
64
|
|
|
11
65
|
### Added
|
|
@@ -1,23 +1,33 @@
|
|
|
1
1
|
cff-version: 1.2.0
|
|
2
|
-
message: "If you use this software, please cite it
|
|
2
|
+
message: "If you use this software, please cite it using the metadata below."
|
|
3
3
|
type: software
|
|
4
4
|
title: "samplesheet-parser"
|
|
5
|
+
abstract: >
|
|
6
|
+
A Python library for parsing, validating, converting, and merging
|
|
7
|
+
Illumina SampleSheet V1 and V2 files for BCLConvert and bcl2fastq.
|
|
8
|
+
Provides format auto-detection, bidirectional V1/V2 conversion,
|
|
9
|
+
structural and index validation, sheet diffing, and cross-project
|
|
10
|
+
merging with Hamming-distance collision detection.
|
|
5
11
|
version: 0.3.2
|
|
6
12
|
date-released: 2026-03-12
|
|
7
13
|
license: Apache-2.0
|
|
8
14
|
url: "https://github.com/chaitanyakasaraneni/samplesheet-parser"
|
|
9
15
|
repository-code: "https://github.com/chaitanyakasaraneni/samplesheet-parser"
|
|
10
|
-
|
|
11
|
-
A Python library for parsing, validating, converting, and merging
|
|
12
|
-
Illumina SampleSheet V1 and V2 files for BCLConvert and bcl2fastq.
|
|
16
|
+
repository-artifact: "https://pypi.org/project/samplesheet-parser/"
|
|
13
17
|
keywords:
|
|
14
18
|
- bioinformatics
|
|
15
19
|
- Illumina
|
|
16
20
|
- SampleSheet
|
|
17
21
|
- BCLConvert
|
|
22
|
+
- bcl2fastq
|
|
18
23
|
- demultiplexing
|
|
19
24
|
- genomics
|
|
25
|
+
- sequencing
|
|
20
26
|
- Python
|
|
27
|
+
identifiers:
|
|
28
|
+
- type: doi
|
|
29
|
+
value: 10.5281/zenodo.18989694
|
|
30
|
+
description: Concept DOI (all versions)
|
|
21
31
|
authors:
|
|
22
32
|
- family-names: Kasaraneni
|
|
23
33
|
given-names: Chaitanya Krishna
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: samplesheet-parser
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.4
|
|
4
4
|
Summary: Format-agnostic parser for Illumina SampleSheet.csv files — supports IEM V1 and BCLConvert V2
|
|
5
5
|
Project-URL: Homepage, https://github.com/chaitanyakasaraneni/samplesheet-parser
|
|
6
6
|
Project-URL: Documentation, https://illumina-samplesheet.readthedocs.io
|
|
@@ -60,8 +60,9 @@ Supports both the classic IEM V1 format (bcl2fastq era) and the modern BCLConver
|
|
|
60
60
|
[](https://opensource.org/licenses/Apache-2.0)
|
|
61
61
|
[](https://github.com/chaitanyakasaraneni/samplesheet-parser/actions)
|
|
62
62
|
[](https://codecov.io/gh/chaitanyakasaraneni/samplesheet-parser)
|
|
63
|
+
[](https://doi.org/10.5281/zenodo.18989694)
|
|
63
64
|
|
|
64
|
-

|
|
65
66
|
|
|
66
67
|
*`SampleSheetFactory` auto-detects the format and routes to the correct parser. Both formats share a common interface — `SampleSheetConverter` handles bidirectional conversion, `SampleSheetValidator` catches index and adapter issues, `SampleSheetDiff` compares two sheets across any combination of V1/V2 formats, `SampleSheetMerger` combines multiple per-project sheets into one, and `SampleSheetWriter` builds or edits sheets programmatically. The `samplesheet` CLI exposes all of this from the shell.*
|
|
67
68
|
|
|
@@ -556,6 +557,80 @@ sheet.get_read_structure() # → ReadStructure dataclass
|
|
|
556
557
|
|
|
557
558
|
---
|
|
558
559
|
|
|
560
|
+
## CI / pre-commit integration
|
|
561
|
+
|
|
562
|
+
The CLI exits with meaningful codes (`0` = clean, `1` = issues, `2` = error), making it easy to wire into automated pipelines.
|
|
563
|
+
|
|
564
|
+
### GitHub Actions
|
|
565
|
+
|
|
566
|
+
Add a validation step to any workflow that touches `SampleSheet.csv`:
|
|
567
|
+
|
|
568
|
+
```yaml
|
|
569
|
+
# .github/workflows/validate-samplesheet.yml
|
|
570
|
+
name: Validate SampleSheet
|
|
571
|
+
|
|
572
|
+
on:
|
|
573
|
+
push:
|
|
574
|
+
paths:
|
|
575
|
+
- '**/SampleSheet.csv'
|
|
576
|
+
pull_request:
|
|
577
|
+
paths:
|
|
578
|
+
- '**/SampleSheet.csv'
|
|
579
|
+
|
|
580
|
+
jobs:
|
|
581
|
+
validate:
|
|
582
|
+
runs-on: ubuntu-latest
|
|
583
|
+
steps:
|
|
584
|
+
- uses: actions/checkout@v4
|
|
585
|
+
|
|
586
|
+
- uses: actions/setup-python@v5
|
|
587
|
+
with:
|
|
588
|
+
python-version: '3.12'
|
|
589
|
+
|
|
590
|
+
- run: pip install "samplesheet-parser[cli]"
|
|
591
|
+
|
|
592
|
+
- name: Validate SampleSheet
|
|
593
|
+
run: samplesheet validate SampleSheet.csv --format json
|
|
594
|
+
```
|
|
595
|
+
|
|
596
|
+
### pre-commit hook
|
|
597
|
+
|
|
598
|
+
Gate commits that touch any `SampleSheet.csv` in the repository:
|
|
599
|
+
|
|
600
|
+
```yaml
|
|
601
|
+
# .pre-commit-config.yaml
|
|
602
|
+
repos:
|
|
603
|
+
- repo: local
|
|
604
|
+
hooks:
|
|
605
|
+
- id: samplesheet-validate
|
|
606
|
+
name: Validate SampleSheet.csv
|
|
607
|
+
entry: samplesheet validate
|
|
608
|
+
language: python
|
|
609
|
+
additional_dependencies: ["samplesheet-parser[cli]"]
|
|
610
|
+
files: SampleSheet\.csv$
|
|
611
|
+
pass_filenames: true
|
|
612
|
+
```
|
|
613
|
+
|
|
614
|
+
Install and run once to verify:
|
|
615
|
+
|
|
616
|
+
```bash
|
|
617
|
+
pip install pre-commit
|
|
618
|
+
pre-commit install
|
|
619
|
+
pre-commit run samplesheet-validate --all-files
|
|
620
|
+
```
|
|
621
|
+
|
|
622
|
+
### Stricter Hamming distance in CI
|
|
623
|
+
|
|
624
|
+
If your lab uses longer indexes (10 bp+), raise the minimum Hamming distance threshold to catch borderline cases earlier:
|
|
625
|
+
|
|
626
|
+
```bash
|
|
627
|
+
samplesheet validate SampleSheet.csv --min-hamming 4
|
|
628
|
+
```
|
|
629
|
+
|
|
630
|
+
This is especially useful in CI where you want to prevent runs that will likely fail demultiplexing.
|
|
631
|
+
|
|
632
|
+
---
|
|
633
|
+
|
|
559
634
|
## Contributing
|
|
560
635
|
|
|
561
636
|
```bash
|
|
@@ -584,7 +659,7 @@ See [CONTRIBUTING.md](CONTRIBUTING.md) for the full local testing guide and PR c
|
|
|
584
659
|
title = {samplesheet-parser: Format-agnostic parser for Illumina SampleSheet.csv},
|
|
585
660
|
year = {2026},
|
|
586
661
|
url = {https://github.com/chaitanyakasaraneni/samplesheet-parser},
|
|
587
|
-
version = {0.3.
|
|
662
|
+
version = {0.3.4}
|
|
588
663
|
}
|
|
589
664
|
```
|
|
590
665
|
|
|
@@ -9,8 +9,9 @@ Supports both the classic IEM V1 format (bcl2fastq era) and the modern BCLConver
|
|
|
9
9
|
[](https://opensource.org/licenses/Apache-2.0)
|
|
10
10
|
[](https://github.com/chaitanyakasaraneni/samplesheet-parser/actions)
|
|
11
11
|
[](https://codecov.io/gh/chaitanyakasaraneni/samplesheet-parser)
|
|
12
|
+
[](https://doi.org/10.5281/zenodo.18989694)
|
|
12
13
|
|
|
13
|
-

|
|
14
15
|
|
|
15
16
|
*`SampleSheetFactory` auto-detects the format and routes to the correct parser. Both formats share a common interface — `SampleSheetConverter` handles bidirectional conversion, `SampleSheetValidator` catches index and adapter issues, `SampleSheetDiff` compares two sheets across any combination of V1/V2 formats, `SampleSheetMerger` combines multiple per-project sheets into one, and `SampleSheetWriter` builds or edits sheets programmatically. The `samplesheet` CLI exposes all of this from the shell.*
|
|
16
17
|
|
|
@@ -505,6 +506,80 @@ sheet.get_read_structure() # → ReadStructure dataclass
|
|
|
505
506
|
|
|
506
507
|
---
|
|
507
508
|
|
|
509
|
+
## CI / pre-commit integration
|
|
510
|
+
|
|
511
|
+
The CLI exits with meaningful codes (`0` = clean, `1` = issues, `2` = error), making it easy to wire into automated pipelines.
|
|
512
|
+
|
|
513
|
+
### GitHub Actions
|
|
514
|
+
|
|
515
|
+
Add a validation step to any workflow that touches `SampleSheet.csv`:
|
|
516
|
+
|
|
517
|
+
```yaml
|
|
518
|
+
# .github/workflows/validate-samplesheet.yml
|
|
519
|
+
name: Validate SampleSheet
|
|
520
|
+
|
|
521
|
+
on:
|
|
522
|
+
push:
|
|
523
|
+
paths:
|
|
524
|
+
- '**/SampleSheet.csv'
|
|
525
|
+
pull_request:
|
|
526
|
+
paths:
|
|
527
|
+
- '**/SampleSheet.csv'
|
|
528
|
+
|
|
529
|
+
jobs:
|
|
530
|
+
validate:
|
|
531
|
+
runs-on: ubuntu-latest
|
|
532
|
+
steps:
|
|
533
|
+
- uses: actions/checkout@v4
|
|
534
|
+
|
|
535
|
+
- uses: actions/setup-python@v5
|
|
536
|
+
with:
|
|
537
|
+
python-version: '3.12'
|
|
538
|
+
|
|
539
|
+
- run: pip install "samplesheet-parser[cli]"
|
|
540
|
+
|
|
541
|
+
- name: Validate SampleSheet
|
|
542
|
+
run: samplesheet validate SampleSheet.csv --format json
|
|
543
|
+
```
|
|
544
|
+
|
|
545
|
+
### pre-commit hook
|
|
546
|
+
|
|
547
|
+
Gate commits that touch any `SampleSheet.csv` in the repository:
|
|
548
|
+
|
|
549
|
+
```yaml
|
|
550
|
+
# .pre-commit-config.yaml
|
|
551
|
+
repos:
|
|
552
|
+
- repo: local
|
|
553
|
+
hooks:
|
|
554
|
+
- id: samplesheet-validate
|
|
555
|
+
name: Validate SampleSheet.csv
|
|
556
|
+
entry: samplesheet validate
|
|
557
|
+
language: python
|
|
558
|
+
additional_dependencies: ["samplesheet-parser[cli]"]
|
|
559
|
+
files: SampleSheet\.csv$
|
|
560
|
+
pass_filenames: true
|
|
561
|
+
```
|
|
562
|
+
|
|
563
|
+
Install and run once to verify:
|
|
564
|
+
|
|
565
|
+
```bash
|
|
566
|
+
pip install pre-commit
|
|
567
|
+
pre-commit install
|
|
568
|
+
pre-commit run samplesheet-validate --all-files
|
|
569
|
+
```
|
|
570
|
+
|
|
571
|
+
### Stricter Hamming distance in CI
|
|
572
|
+
|
|
573
|
+
If your lab uses longer indexes (10 bp+), raise the minimum Hamming distance threshold to catch borderline cases earlier:
|
|
574
|
+
|
|
575
|
+
```bash
|
|
576
|
+
samplesheet validate SampleSheet.csv --min-hamming 4
|
|
577
|
+
```
|
|
578
|
+
|
|
579
|
+
This is especially useful in CI where you want to prevent runs that will likely fail demultiplexing.
|
|
580
|
+
|
|
581
|
+
---
|
|
582
|
+
|
|
508
583
|
## Contributing
|
|
509
584
|
|
|
510
585
|
```bash
|
|
@@ -533,7 +608,7 @@ See [CONTRIBUTING.md](CONTRIBUTING.md) for the full local testing guide and PR c
|
|
|
533
608
|
title = {samplesheet-parser: Format-agnostic parser for Illumina SampleSheet.csv},
|
|
534
609
|
year = {2026},
|
|
535
610
|
url = {https://github.com/chaitanyakasaraneni/samplesheet-parser},
|
|
536
|
-
version = {0.3.
|
|
611
|
+
version = {0.3.4}
|
|
537
612
|
}
|
|
538
613
|
```
|
|
539
614
|
|
|
Binary file
|
|
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "samplesheet-parser"
|
|
7
|
-
version = "0.3.
|
|
7
|
+
version = "0.3.4"
|
|
8
8
|
description = "Format-agnostic parser for Illumina SampleSheet.csv files — supports IEM V1 and BCLConvert V2"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = { file = "LICENSE" }
|
|
@@ -36,6 +36,7 @@ from samplesheet_parser.converter import SampleSheetConverter
|
|
|
36
36
|
from samplesheet_parser.diff import DiffResult, SampleSheetDiff
|
|
37
37
|
from samplesheet_parser.enums import IndexType, SampleSheetVersion
|
|
38
38
|
from samplesheet_parser.factory import SampleSheetFactory
|
|
39
|
+
from samplesheet_parser.index_utils import normalize_index_lengths
|
|
39
40
|
from samplesheet_parser.merger import MergeResult, SampleSheetMerger
|
|
40
41
|
from samplesheet_parser.parsers.v1 import SampleSheetV1
|
|
41
42
|
from samplesheet_parser.parsers.v2 import SampleSheetV2
|
|
@@ -56,5 +57,6 @@ __all__ = [
|
|
|
56
57
|
"SampleSheetWriter",
|
|
57
58
|
"SampleSheetMerger",
|
|
58
59
|
"MergeResult",
|
|
60
|
+
"normalize_index_lengths",
|
|
59
61
|
"__version__",
|
|
60
62
|
]
|
|
@@ -5,6 +5,7 @@ Entry point: ``samplesheet`` (configured in ``pyproject.toml``).
|
|
|
5
5
|
|
|
6
6
|
Commands
|
|
7
7
|
--------
|
|
8
|
+
info Show a quick summary of a sample sheet.
|
|
8
9
|
validate Validate a sheet — exit 0 if clean, exit 1 if errors.
|
|
9
10
|
convert Convert between V1 and V2 formats.
|
|
10
11
|
diff Diff two sheets — exit 1 if changes detected.
|
|
@@ -20,8 +21,12 @@ Usage
|
|
|
20
21
|
-----
|
|
21
22
|
::
|
|
22
23
|
|
|
24
|
+
samplesheet info SampleSheet.csv
|
|
25
|
+
samplesheet info SampleSheet.csv --format json
|
|
26
|
+
|
|
23
27
|
samplesheet validate SampleSheet.csv
|
|
24
28
|
samplesheet validate SampleSheet.csv --format json
|
|
29
|
+
samplesheet validate SampleSheet.csv --min-hamming 4
|
|
25
30
|
|
|
26
31
|
samplesheet convert SampleSheet_v1.csv --to v2 --output SampleSheet_v2.csv
|
|
27
32
|
samplesheet convert SampleSheet_v2.csv --to v1 --output SampleSheet_v1.csv
|
|
@@ -50,6 +55,7 @@ except ImportError: # pragma: no cover
|
|
|
50
55
|
_TYPER_AVAILABLE = False
|
|
51
56
|
|
|
52
57
|
from samplesheet_parser.enums import SampleSheetVersion
|
|
58
|
+
from samplesheet_parser.validators import MIN_HAMMING_DISTANCE as _MIN_HAMMING_DEFAULT
|
|
53
59
|
|
|
54
60
|
if _TYPER_AVAILABLE:
|
|
55
61
|
app = typer.Typer(
|
|
@@ -115,6 +121,87 @@ if _TYPER_AVAILABLE:
|
|
|
115
121
|
typer.echo(f"Error: unknown format '{fmt}'. Use 'text' or 'json'.", err=True)
|
|
116
122
|
raise typer.Exit(code=2)
|
|
117
123
|
|
|
124
|
+
# ---------------------------------------------------------------------------
|
|
125
|
+
# info
|
|
126
|
+
# ---------------------------------------------------------------------------
|
|
127
|
+
|
|
128
|
+
@app.command()
|
|
129
|
+
def info(
|
|
130
|
+
path: Annotated[Path, typer.Argument(help="Path to SampleSheet.csv.", metavar="FILE")],
|
|
131
|
+
fmt: _FormatOption = "text",
|
|
132
|
+
) -> None:
|
|
133
|
+
"""Display a quick summary of a sample sheet without full validation.
|
|
134
|
+
|
|
135
|
+
Shows format version, sample count, lanes, index type, read lengths,
|
|
136
|
+
and adapter sequences at a glance.
|
|
137
|
+
|
|
138
|
+
Exits 0 on success, 2 on unreadable files.
|
|
139
|
+
"""
|
|
140
|
+
from samplesheet_parser.factory import SampleSheetFactory
|
|
141
|
+
from samplesheet_parser.parsers.v1 import SampleSheetV1
|
|
142
|
+
|
|
143
|
+
_validate_fmt(fmt)
|
|
144
|
+
if not path.exists():
|
|
145
|
+
typer.echo(f"Error: file not found: {path}", err=True)
|
|
146
|
+
raise typer.Exit(code=2)
|
|
147
|
+
|
|
148
|
+
try:
|
|
149
|
+
factory = SampleSheetFactory()
|
|
150
|
+
sheet = factory.create_parser(str(path), parse=True, clean=False)
|
|
151
|
+
except Exception as exc:
|
|
152
|
+
typer.echo(f"Error: could not parse {path}: {exc}", err=True)
|
|
153
|
+
raise typer.Exit(code=2) from exc
|
|
154
|
+
|
|
155
|
+
if factory.version is None: # pragma: no cover
|
|
156
|
+
raise RuntimeError("SampleSheetFactory.version must be set after create_parser")
|
|
157
|
+
|
|
158
|
+
samples = sheet.samples()
|
|
159
|
+
lanes = sorted({str(s.get("lane") or "") for s in samples} - {""}) or ["(none)"]
|
|
160
|
+
index_type = sheet.index_type()
|
|
161
|
+
adapters: list[str] = getattr(sheet, "adapters", []) or []
|
|
162
|
+
experiment_name: str | None = getattr(sheet, "experiment_name", None)
|
|
163
|
+
|
|
164
|
+
if isinstance(sheet, SampleSheetV1):
|
|
165
|
+
read_lengths = [str(r) for r in (sheet.read_lengths or [])]
|
|
166
|
+
instrument = sheet.instrument_type
|
|
167
|
+
else:
|
|
168
|
+
reads_dict = sheet.reads or {}
|
|
169
|
+
read_lengths = [
|
|
170
|
+
str(reads_dict[k])
|
|
171
|
+
for k in ("Read1Cycles", "Read2Cycles")
|
|
172
|
+
if k in reads_dict
|
|
173
|
+
]
|
|
174
|
+
instrument = sheet.instrument_platform
|
|
175
|
+
|
|
176
|
+
if fmt == "json":
|
|
177
|
+
_print_json({
|
|
178
|
+
"file": str(path),
|
|
179
|
+
"format": factory.version.value,
|
|
180
|
+
"sample_count": len(samples),
|
|
181
|
+
"lanes": lanes,
|
|
182
|
+
"index_type": index_type,
|
|
183
|
+
"read_lengths": read_lengths,
|
|
184
|
+
"adapters": adapters,
|
|
185
|
+
"experiment_name": experiment_name,
|
|
186
|
+
"instrument": instrument,
|
|
187
|
+
})
|
|
188
|
+
else:
|
|
189
|
+
typer.echo(f"File: {path}")
|
|
190
|
+
typer.echo(f"Format: {factory.version.value}")
|
|
191
|
+
typer.echo(f"Samples: {len(samples)}")
|
|
192
|
+
typer.echo(f"Lanes: {', '.join(lanes)}")
|
|
193
|
+
typer.echo(f"Index type: {index_type}")
|
|
194
|
+
typer.echo(
|
|
195
|
+
f"Read lengths: {' + '.join(read_lengths) if read_lengths else '(not set)'}"
|
|
196
|
+
)
|
|
197
|
+
typer.echo(f"Adapters: {', '.join(adapters) if adapters else '(none)'}")
|
|
198
|
+
if experiment_name:
|
|
199
|
+
typer.echo(f"Experiment: {experiment_name}")
|
|
200
|
+
if instrument:
|
|
201
|
+
typer.echo(f"Instrument: {instrument}")
|
|
202
|
+
|
|
203
|
+
raise typer.Exit(code=0)
|
|
204
|
+
|
|
118
205
|
# ---------------------------------------------------------------------------
|
|
119
206
|
# validate
|
|
120
207
|
# ---------------------------------------------------------------------------
|
|
@@ -123,6 +210,17 @@ if _TYPER_AVAILABLE:
|
|
|
123
210
|
def validate(
|
|
124
211
|
path: Annotated[Path, typer.Argument(help="Path to SampleSheet.csv.", metavar="FILE")],
|
|
125
212
|
fmt: _FormatOption = "text",
|
|
213
|
+
min_hamming: Annotated[
|
|
214
|
+
int,
|
|
215
|
+
typer.Option(
|
|
216
|
+
"--min-hamming",
|
|
217
|
+
help=(
|
|
218
|
+
f"Minimum Hamming distance between indexes "
|
|
219
|
+
f"(default: {_MIN_HAMMING_DEFAULT}, must be >= 1)."
|
|
220
|
+
),
|
|
221
|
+
metavar="N",
|
|
222
|
+
),
|
|
223
|
+
] = _MIN_HAMMING_DEFAULT,
|
|
126
224
|
) -> None:
|
|
127
225
|
"""Validate a sample sheet for index, adapter, and structural issues.
|
|
128
226
|
|
|
@@ -134,6 +232,11 @@ if _TYPER_AVAILABLE:
|
|
|
134
232
|
from samplesheet_parser.validators import SampleSheetValidator
|
|
135
233
|
|
|
136
234
|
_validate_fmt(fmt)
|
|
235
|
+
if min_hamming < 1:
|
|
236
|
+
typer.echo(
|
|
237
|
+
f"Error: --min-hamming must be >= 1, got {min_hamming}.", err=True
|
|
238
|
+
)
|
|
239
|
+
raise typer.Exit(code=2)
|
|
137
240
|
if not path.exists():
|
|
138
241
|
typer.echo(f"Error: file not found: {path}", err=True)
|
|
139
242
|
raise typer.Exit(code=2)
|
|
@@ -149,13 +252,14 @@ if _TYPER_AVAILABLE:
|
|
|
149
252
|
raise RuntimeError("SampleSheetFactory.version must be set after create_parser")
|
|
150
253
|
version = factory.version
|
|
151
254
|
|
|
152
|
-
result = SampleSheetValidator().validate(sheet)
|
|
255
|
+
result = SampleSheetValidator().validate(sheet, min_hamming_distance=min_hamming)
|
|
153
256
|
|
|
154
257
|
if fmt == "json":
|
|
155
258
|
_print_json({
|
|
156
259
|
"file": str(path),
|
|
157
260
|
"version": version.value,
|
|
158
261
|
"is_valid": result.is_valid,
|
|
262
|
+
"min_hamming_distance": min_hamming,
|
|
159
263
|
"errors": [
|
|
160
264
|
{"code": e.code, "message": e.message, "context": e.context}
|
|
161
265
|
for e in result.errors
|